diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -3,246703 +3,247046 @@ "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, - "global_step": 35240, + "global_step": 35289, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 2.837684449489217e-05, + "epoch": 2.833744226246139e-05, "grad_norm": 0.0, - "learning_rate": 1.890359168241966e-08, - "loss": 1.577, + "learning_rate": 1.8885741265344666e-08, + "loss": 1.9338, "step": 1 }, { - "epoch": 5.675368898978434e-05, + "epoch": 5.667488452492278e-05, "grad_norm": 0.0, - "learning_rate": 3.780718336483932e-08, - "loss": 1.5748, + "learning_rate": 3.777148253068933e-08, + "loss": 1.8518, "step": 2 }, { - "epoch": 8.51305334846765e-05, + "epoch": 8.501232678738417e-05, "grad_norm": 0.0, - "learning_rate": 5.671077504725899e-08, - "loss": 1.6437, + "learning_rate": 5.6657223796034004e-08, + "loss": 1.916, "step": 3 }, { - "epoch": 0.00011350737797956867, + "epoch": 0.00011334976904984556, "grad_norm": 0.0, - "learning_rate": 7.561436672967865e-08, - "loss": 1.5787, + "learning_rate": 7.554296506137866e-08, + "loss": 1.8867, "step": 4 }, { - "epoch": 0.00014188422247446084, + "epoch": 0.00014168721131230694, "grad_norm": 0.0, - "learning_rate": 9.45179584120983e-08, - "loss": 1.6143, + "learning_rate": 9.442870632672333e-08, + "loss": 1.8817, "step": 5 }, { - "epoch": 0.000170261066969353, + "epoch": 0.00017002465357476835, "grad_norm": 0.0, - "learning_rate": 1.1342155009451797e-07, - "loss": 1.6899, + "learning_rate": 1.1331444759206801e-07, + "loss": 1.9254, "step": 6 }, { - "epoch": 0.0001986379114642452, + "epoch": 0.00019836209583722972, "grad_norm": 0.0, - "learning_rate": 1.3232514177693763e-07, - "loss": 1.597, + "learning_rate": 1.3220018885741267e-07, + "loss": 2.0063, "step": 7 }, { - "epoch": 0.00022701475595913735, + "epoch": 0.00022669953809969113, "grad_norm": 0.0, - "learning_rate": 1.512287334593573e-07, - "loss": 1.6447, + "learning_rate": 1.5108593012275733e-07, + "loss": 1.7539, "step": 8 }, { - "epoch": 0.00025539160045402954, + "epoch": 0.0002550369803621525, "grad_norm": 0.0, - "learning_rate": 1.7013232514177695e-07, - "loss": 1.6228, + "learning_rate": 1.69971671388102e-07, + "loss": 1.948, "step": 9 }, { - "epoch": 0.00028376844494892167, + "epoch": 0.0002833744226246139, "grad_norm": 0.0, - "learning_rate": 1.890359168241966e-07, - "loss": 1.4889, + "learning_rate": 1.8885741265344666e-07, + "loss": 1.9763, "step": 10 }, { - "epoch": 0.00031214528944381386, + "epoch": 0.0003117118648870753, "grad_norm": 0.0, - "learning_rate": 2.079395085066163e-07, - "loss": 1.5586, + "learning_rate": 2.0774315391879134e-07, + "loss": 1.8718, "step": 11 }, { - "epoch": 0.000340522133938706, + "epoch": 0.0003400493071495367, "grad_norm": 0.0, - "learning_rate": 2.2684310018903595e-07, - "loss": 1.5734, + "learning_rate": 2.2662889518413602e-07, + "loss": 1.9429, "step": 12 }, { - "epoch": 0.0003688989784335982, + "epoch": 0.00036838674941199807, "grad_norm": 0.0, - "learning_rate": 2.457466918714556e-07, - "loss": 1.5389, + "learning_rate": 2.4551463644948064e-07, + "loss": 1.7384, "step": 13 }, { - "epoch": 0.0003972758229284904, + "epoch": 0.00039672419167445944, "grad_norm": 0.0, - "learning_rate": 2.6465028355387527e-07, - "loss": 1.635, + "learning_rate": 2.6440037771482535e-07, + "loss": 1.8365, "step": 14 }, { - "epoch": 0.0004256526674233825, + "epoch": 0.0004250616339369209, "grad_norm": 0.0, - "learning_rate": 2.835538752362949e-07, - "loss": 1.5775, + "learning_rate": 2.8328611898017e-07, + "loss": 1.807, "step": 15 }, { - "epoch": 0.0004540295119182747, + "epoch": 0.00045339907619938225, "grad_norm": 0.0, - "learning_rate": 3.024574669187146e-07, - "loss": 1.5248, + "learning_rate": 3.0217186024551465e-07, + "loss": 1.7406, "step": 16 }, { - "epoch": 0.00048240635641316683, + "epoch": 0.00048173651846184363, "grad_norm": 0.0, - "learning_rate": 3.2136105860113424e-07, - "loss": 1.5082, + "learning_rate": 3.2105760151085936e-07, + "loss": 1.7758, "step": 17 }, { - "epoch": 0.0005107832009080591, + "epoch": 0.000510073960724305, "grad_norm": 0.0, - "learning_rate": 3.402646502835539e-07, - "loss": 1.3573, + "learning_rate": 3.39943342776204e-07, + "loss": 1.9236, "step": 18 }, { - "epoch": 0.0005391600454029512, + "epoch": 0.0005384114029867664, "grad_norm": 0.0, - "learning_rate": 3.591682419659736e-07, - "loss": 1.5769, + "learning_rate": 3.588290840415486e-07, + "loss": 1.7365, "step": 19 }, { - "epoch": 0.0005675368898978433, + "epoch": 0.0005667488452492278, "grad_norm": 0.0, - "learning_rate": 3.780718336483932e-07, - "loss": 1.4963, + "learning_rate": 3.777148253068933e-07, + "loss": 1.6179, "step": 20 }, { - "epoch": 0.0005959137343927355, + "epoch": 0.0005950862875116892, "grad_norm": 0.0, - "learning_rate": 3.9697542533081287e-07, - "loss": 1.6018, + "learning_rate": 3.9660056657223797e-07, + "loss": 1.7025, "step": 21 }, { - "epoch": 0.0006242905788876277, + "epoch": 0.0006234237297741506, "grad_norm": 0.0, - "learning_rate": 4.158790170132326e-07, - "loss": 1.5967, + "learning_rate": 4.154863078375827e-07, + "loss": 1.7085, "step": 22 }, { - "epoch": 0.0006526674233825199, + "epoch": 0.000651761172036612, "grad_norm": 0.0, - "learning_rate": 4.347826086956522e-07, - "loss": 1.4023, + "learning_rate": 4.3437204910292733e-07, + "loss": 1.6545, "step": 23 }, { - "epoch": 0.000681044267877412, + "epoch": 0.0006800986142990734, "grad_norm": 0.0, - "learning_rate": 4.536862003780719e-07, - "loss": 1.4445, + "learning_rate": 4.5325779036827203e-07, + "loss": 1.5475, "step": 24 }, { - "epoch": 0.0007094211123723042, + "epoch": 0.0007084360565615348, "grad_norm": 0.0, - "learning_rate": 4.7258979206049156e-07, - "loss": 1.3446, + "learning_rate": 4.721435316336167e-07, + "loss": 1.6116, "step": 25 }, { - "epoch": 0.0007377979568671964, + "epoch": 0.0007367734988239961, "grad_norm": 0.0, - "learning_rate": 4.914933837429112e-07, - "loss": 1.4777, + "learning_rate": 4.910292728989613e-07, + "loss": 1.5748, "step": 26 }, { - "epoch": 0.0007661748013620886, + "epoch": 0.0007651109410864575, "grad_norm": 0.0, - "learning_rate": 5.103969754253309e-07, - "loss": 1.3499, + "learning_rate": 5.09915014164306e-07, + "loss": 1.6433, "step": 27 }, { - "epoch": 0.0007945516458569807, + "epoch": 0.0007934483833489189, "grad_norm": 0.0, - "learning_rate": 5.293005671077505e-07, - "loss": 1.2587, + "learning_rate": 5.288007554296507e-07, + "loss": 1.6883, "step": 28 }, { - "epoch": 0.0008229284903518728, + "epoch": 0.0008217858256113803, "grad_norm": 0.0, - "learning_rate": 5.482041587901702e-07, - "loss": 1.3668, + "learning_rate": 5.476864966949953e-07, + "loss": 1.6438, "step": 29 }, { - "epoch": 0.000851305334846765, + "epoch": 0.0008501232678738418, "grad_norm": 0.0, - "learning_rate": 5.671077504725898e-07, - "loss": 1.4567, + "learning_rate": 5.6657223796034e-07, + "loss": 1.6052, "step": 30 }, { - "epoch": 0.0008796821793416572, + "epoch": 0.0008784607101363031, "grad_norm": 0.0, - "learning_rate": 5.860113421550095e-07, - "loss": 1.4492, + "learning_rate": 5.854579792256847e-07, + "loss": 1.744, "step": 31 }, { - "epoch": 0.0009080590238365494, + "epoch": 0.0009067981523987645, "grad_norm": 0.0, - "learning_rate": 6.049149338374292e-07, - "loss": 1.3674, + "learning_rate": 6.043437204910293e-07, + "loss": 1.5133, "step": 32 }, { - "epoch": 0.0009364358683314416, + "epoch": 0.0009351355946612259, "grad_norm": 0.0, - "learning_rate": 6.238185255198488e-07, - "loss": 1.3464, + "learning_rate": 6.232294617563739e-07, + "loss": 1.493, "step": 33 }, { - "epoch": 0.0009648127128263337, + "epoch": 0.0009634730369236873, "grad_norm": 0.0, - "learning_rate": 6.427221172022685e-07, - "loss": 1.2973, + "learning_rate": 6.421152030217187e-07, + "loss": 1.4501, "step": 34 }, { - "epoch": 0.0009931895573212259, + "epoch": 0.0009918104791861487, "grad_norm": 0.0, - "learning_rate": 6.61625708884688e-07, - "loss": 1.4635, + "learning_rate": 6.610009442870634e-07, + "loss": 1.4948, "step": 35 }, { - "epoch": 0.0010215664018161181, + "epoch": 0.00102014792144861, "grad_norm": 0.0, - "learning_rate": 6.805293005671078e-07, - "loss": 1.2485, + "learning_rate": 6.79886685552408e-07, + "loss": 1.3794, "step": 36 }, { - "epoch": 0.0010499432463110102, + "epoch": 0.0010484853637110715, "grad_norm": 0.0, - "learning_rate": 6.994328922495275e-07, - "loss": 1.357, + "learning_rate": 6.987724268177526e-07, + "loss": 1.4128, "step": 37 }, { - "epoch": 0.0010783200908059023, + "epoch": 0.0010768228059735328, "grad_norm": 0.0, - "learning_rate": 7.183364839319472e-07, - "loss": 1.4143, + "learning_rate": 7.176581680830972e-07, + "loss": 1.4231, "step": 38 }, { - "epoch": 0.0011066969353007946, + "epoch": 0.0011051602482359943, "grad_norm": 0.0, - "learning_rate": 7.372400756143668e-07, - "loss": 1.305, + "learning_rate": 7.365439093484419e-07, + "loss": 1.7042, "step": 39 }, { - "epoch": 0.0011350737797956867, + "epoch": 0.0011334976904984555, "grad_norm": 0.0, - "learning_rate": 7.561436672967864e-07, - "loss": 1.2737, + "learning_rate": 7.554296506137866e-07, + "loss": 1.4255, "step": 40 }, { - "epoch": 0.001163450624290579, + "epoch": 0.001161835132760917, "grad_norm": 0.0, - "learning_rate": 7.750472589792062e-07, - "loss": 1.2682, + "learning_rate": 7.743153918791313e-07, + "loss": 1.5361, "step": 41 }, { - "epoch": 0.001191827468785471, + "epoch": 0.0011901725750233785, "grad_norm": 0.0, - "learning_rate": 7.939508506616257e-07, - "loss": 1.3037, + "learning_rate": 7.932011331444759e-07, + "loss": 1.5909, "step": 42 }, { - "epoch": 0.0012202043132803631, + "epoch": 0.0012185100172858398, "grad_norm": 0.0, - "learning_rate": 8.128544423440454e-07, - "loss": 1.3223, + "learning_rate": 8.120868744098206e-07, + "loss": 1.5424, "step": 43 }, { - "epoch": 0.0012485811577752554, + "epoch": 0.0012468474595483013, "grad_norm": 0.0, - "learning_rate": 8.317580340264652e-07, - "loss": 1.2573, + "learning_rate": 8.309726156751653e-07, + "loss": 1.5165, "step": 44 }, { - "epoch": 0.0012769580022701475, + "epoch": 0.0012751849018107625, "grad_norm": 0.0, - "learning_rate": 8.506616257088847e-07, - "loss": 1.3235, + "learning_rate": 8.4985835694051e-07, + "loss": 1.3527, "step": 45 }, { - "epoch": 0.0013053348467650398, + "epoch": 0.001303522344073224, "grad_norm": 0.0, - "learning_rate": 8.695652173913044e-07, - "loss": 1.0813, + "learning_rate": 8.687440982058547e-07, + "loss": 1.4314, "step": 46 }, { - "epoch": 0.001333711691259932, + "epoch": 0.0013318597863356853, "grad_norm": 0.0, - "learning_rate": 8.884688090737241e-07, - "loss": 1.4175, + "learning_rate": 8.876298394711994e-07, + "loss": 1.4909, "step": 47 }, { - "epoch": 0.001362088535754824, + "epoch": 0.0013601972285981468, "grad_norm": 0.0, - "learning_rate": 9.073724007561438e-07, - "loss": 1.1378, + "learning_rate": 9.065155807365441e-07, + "loss": 1.4704, "step": 48 }, { - "epoch": 0.0013904653802497163, + "epoch": 0.001388534670860608, "grad_norm": 0.0, - "learning_rate": 9.262759924385634e-07, - "loss": 1.3513, + "learning_rate": 9.254013220018887e-07, + "loss": 1.5158, "step": 49 }, { - "epoch": 0.0014188422247446084, + "epoch": 0.0014168721131230695, "grad_norm": 0.0, - "learning_rate": 9.451795841209831e-07, - "loss": 1.1996, + "learning_rate": 9.442870632672334e-07, + "loss": 1.4045, "step": 50 }, { - "epoch": 0.0014472190692395007, + "epoch": 0.001445209555385531, "grad_norm": 0.0, - "learning_rate": 9.640831758034028e-07, - "loss": 1.1925, + "learning_rate": 9.631728045325779e-07, + "loss": 1.5609, "step": 51 }, { - "epoch": 0.0014755959137343927, + "epoch": 0.0014735469976479923, "grad_norm": 0.0, - "learning_rate": 9.829867674858224e-07, - "loss": 1.158, + "learning_rate": 9.820585457979226e-07, + "loss": 1.446, "step": 52 }, { - "epoch": 0.0015039727582292848, + "epoch": 0.0015018844399104538, "grad_norm": 0.0, - "learning_rate": 1.001890359168242e-06, - "loss": 1.2653, + "learning_rate": 1.0009442870632673e-06, + "loss": 1.2448, "step": 53 }, { - "epoch": 0.0015323496027241771, + "epoch": 0.001530221882172915, "grad_norm": 0.0, - "learning_rate": 1.0207939508506617e-06, - "loss": 1.3096, + "learning_rate": 1.019830028328612e-06, + "loss": 1.4854, "step": 54 }, { - "epoch": 0.0015607264472190692, + "epoch": 0.0015585593244353765, "grad_norm": 0.0, - "learning_rate": 1.0396975425330814e-06, - "loss": 1.3491, + "learning_rate": 1.0387157695939567e-06, + "loss": 1.4353, "step": 55 }, { - "epoch": 0.0015891032917139615, + "epoch": 0.0015868967666978378, "grad_norm": 0.0, - "learning_rate": 1.058601134215501e-06, - "loss": 1.2775, + "learning_rate": 1.0576015108593014e-06, + "loss": 1.4887, "step": 56 }, { - "epoch": 0.0016174801362088536, + "epoch": 0.0016152342089602993, "grad_norm": 0.0, - "learning_rate": 1.0775047258979207e-06, - "loss": 1.3036, + "learning_rate": 1.0764872521246459e-06, + "loss": 1.4382, "step": 57 }, { - "epoch": 0.0016458569807037457, + "epoch": 0.0016435716512227605, "grad_norm": 0.0, - "learning_rate": 1.0964083175803404e-06, - "loss": 1.2415, + "learning_rate": 1.0953729933899906e-06, + "loss": 1.4327, "step": 58 }, { - "epoch": 0.001674233825198638, + "epoch": 0.001671909093485222, "grad_norm": 0.0, - "learning_rate": 1.11531190926276e-06, - "loss": 1.2456, + "learning_rate": 1.1142587346553353e-06, + "loss": 1.5363, "step": 59 }, { - "epoch": 0.00170261066969353, + "epoch": 0.0017002465357476835, "grad_norm": 0.0, - "learning_rate": 1.1342155009451797e-06, - "loss": 1.2328, + "learning_rate": 1.13314447592068e-06, + "loss": 1.3883, "step": 60 }, { - "epoch": 0.0017309875141884223, + "epoch": 0.0017285839780101448, "grad_norm": 0.0, - "learning_rate": 1.1531190926275994e-06, - "loss": 1.1286, + "learning_rate": 1.1520302171860247e-06, + "loss": 1.4332, "step": 61 }, { - "epoch": 0.0017593643586833144, + "epoch": 0.0017569214202726063, "grad_norm": 0.0, - "learning_rate": 1.172022684310019e-06, - "loss": 1.3732, + "learning_rate": 1.1709159584513694e-06, + "loss": 1.2873, "step": 62 }, { - "epoch": 0.0017877412031782065, + "epoch": 0.0017852588625350675, "grad_norm": 0.0, - "learning_rate": 1.1909262759924387e-06, - "loss": 1.1947, + "learning_rate": 1.1898016997167141e-06, + "loss": 1.4282, "step": 63 }, { - "epoch": 0.0018161180476730988, + "epoch": 0.001813596304797529, "grad_norm": 0.0, - "learning_rate": 1.2098298676748583e-06, - "loss": 1.161, + "learning_rate": 1.2086874409820586e-06, + "loss": 1.2802, "step": 64 }, { - "epoch": 0.0018444948921679909, + "epoch": 0.0018419337470599903, "grad_norm": 0.0, - "learning_rate": 1.228733459357278e-06, - "loss": 1.1613, + "learning_rate": 1.2275731822474033e-06, + "loss": 1.4505, "step": 65 }, { - "epoch": 0.0018728717366628832, + "epoch": 0.0018702711893224518, "grad_norm": 0.0, - "learning_rate": 1.2476370510396976e-06, - "loss": 1.1978, + "learning_rate": 1.2464589235127478e-06, + "loss": 1.376, "step": 66 }, { - "epoch": 0.0019012485811577752, + "epoch": 0.001898608631584913, "grad_norm": 0.0, - "learning_rate": 1.266540642722117e-06, - "loss": 1.2115, + "learning_rate": 1.2653446647780925e-06, + "loss": 1.3548, "step": 67 }, { - "epoch": 0.0019296254256526673, + "epoch": 0.0019269460738473745, "grad_norm": 0.0, - "learning_rate": 1.285444234404537e-06, - "loss": 1.1695, + "learning_rate": 1.2842304060434374e-06, + "loss": 1.2231, "step": 68 }, { - "epoch": 0.0019580022701475596, + "epoch": 0.001955283516109836, "grad_norm": 0.0, - "learning_rate": 1.3043478260869566e-06, - "loss": 1.3129, + "learning_rate": 1.303116147308782e-06, + "loss": 1.3114, "step": 69 }, { - "epoch": 0.0019863791146424517, + "epoch": 0.0019836209583722975, "grad_norm": 0.0, - "learning_rate": 1.323251417769376e-06, - "loss": 1.1815, + "learning_rate": 1.3220018885741268e-06, + "loss": 1.2469, "step": 70 }, { - "epoch": 0.002014755959137344, + "epoch": 0.0020119584006347588, "grad_norm": 0.0, - "learning_rate": 1.342155009451796e-06, - "loss": 1.2826, + "learning_rate": 1.3408876298394713e-06, + "loss": 1.3226, "step": 71 }, { - "epoch": 0.0020431328036322363, + "epoch": 0.00204029584289722, "grad_norm": 0.0, - "learning_rate": 1.3610586011342156e-06, - "loss": 1.2458, + "learning_rate": 1.359773371104816e-06, + "loss": 1.3387, "step": 72 }, { - "epoch": 0.0020715096481271284, + "epoch": 0.0020686332851596813, "grad_norm": 0.0, - "learning_rate": 1.3799621928166355e-06, - "loss": 1.0872, + "learning_rate": 1.3786591123701607e-06, + "loss": 1.2803, "step": 73 }, { - "epoch": 0.0020998864926220205, + "epoch": 0.002096970727422143, "grad_norm": 0.0, - "learning_rate": 1.398865784499055e-06, - "loss": 1.2012, + "learning_rate": 1.3975448536355052e-06, + "loss": 1.3319, "step": 74 }, { - "epoch": 0.0021282633371169125, + "epoch": 0.0021253081696846043, "grad_norm": 0.0, - "learning_rate": 1.4177693761814746e-06, - "loss": 1.1517, + "learning_rate": 1.41643059490085e-06, + "loss": 1.4272, "step": 75 }, { - "epoch": 0.0021566401816118046, + "epoch": 0.0021536456119470655, "grad_norm": 0.0, - "learning_rate": 1.4366729678638944e-06, - "loss": 1.1891, + "learning_rate": 1.4353163361661944e-06, + "loss": 1.3019, "step": 76 }, { - "epoch": 0.002185017026106697, + "epoch": 0.0021819830542095272, "grad_norm": 0.0, - "learning_rate": 1.4555765595463139e-06, - "loss": 1.226, + "learning_rate": 1.4542020774315394e-06, + "loss": 1.3168, "step": 77 }, { - "epoch": 0.002213393870601589, + "epoch": 0.0022103204964719885, "grad_norm": 0.0, - "learning_rate": 1.4744801512287335e-06, - "loss": 1.1941, + "learning_rate": 1.4730878186968839e-06, + "loss": 1.4648, "step": 78 }, { - "epoch": 0.0022417707150964813, + "epoch": 0.0022386579387344498, "grad_norm": 0.0, - "learning_rate": 1.4933837429111534e-06, - "loss": 1.2591, + "learning_rate": 1.4919735599622288e-06, + "loss": 1.2441, "step": 79 }, { - "epoch": 0.0022701475595913734, + "epoch": 0.002266995380996911, "grad_norm": 0.0, - "learning_rate": 1.5122873345935729e-06, - "loss": 1.301, + "learning_rate": 1.5108593012275733e-06, + "loss": 1.2635, "step": 80 }, { - "epoch": 0.0022985244040862655, + "epoch": 0.0022953328232593728, "grad_norm": 0.0, - "learning_rate": 1.5311909262759925e-06, - "loss": 1.174, + "learning_rate": 1.529745042492918e-06, + "loss": 1.3317, "step": 81 }, { - "epoch": 0.002326901248581158, + "epoch": 0.002323670265521834, "grad_norm": 0.0, - "learning_rate": 1.5500945179584124e-06, - "loss": 1.1715, + "learning_rate": 1.5486307837582627e-06, + "loss": 1.3459, "step": 82 }, { - "epoch": 0.00235527809307605, + "epoch": 0.0023520077077842953, "grad_norm": 0.0, - "learning_rate": 1.5689981096408318e-06, - "loss": 1.13, + "learning_rate": 1.5675165250236074e-06, + "loss": 1.4001, "step": 83 }, { - "epoch": 0.002383654937570942, + "epoch": 0.002380345150046757, "grad_norm": 0.0, - "learning_rate": 1.5879017013232515e-06, - "loss": 1.2639, + "learning_rate": 1.5864022662889519e-06, + "loss": 1.3108, "step": 84 }, { - "epoch": 0.002412031782065834, + "epoch": 0.0024086825923092183, "grad_norm": 0.0, - "learning_rate": 1.6068052930056714e-06, - "loss": 1.2665, + "learning_rate": 1.6052880075542968e-06, + "loss": 1.299, "step": 85 }, { - "epoch": 0.0024404086265607263, + "epoch": 0.0024370200345716795, "grad_norm": 0.0, - "learning_rate": 1.6257088846880908e-06, - "loss": 1.2371, + "learning_rate": 1.6241737488196413e-06, + "loss": 1.2575, "step": 86 }, { - "epoch": 0.002468785471055619, + "epoch": 0.002465357476834141, "grad_norm": 0.0, - "learning_rate": 1.6446124763705105e-06, - "loss": 1.1932, + "learning_rate": 1.6430594900849862e-06, + "loss": 1.3018, "step": 87 }, { - "epoch": 0.002497162315550511, + "epoch": 0.0024936949190966025, "grad_norm": 0.0, - "learning_rate": 1.6635160680529303e-06, - "loss": 1.1747, + "learning_rate": 1.6619452313503307e-06, + "loss": 1.2533, "step": 88 }, { - "epoch": 0.002525539160045403, + "epoch": 0.0025220323613590638, "grad_norm": 0.0, - "learning_rate": 1.6824196597353498e-06, - "loss": 1.2115, + "learning_rate": 1.6808309726156752e-06, + "loss": 1.373, "step": 89 }, { - "epoch": 0.002553916004540295, + "epoch": 0.002550369803621525, "grad_norm": 0.0, - "learning_rate": 1.7013232514177694e-06, - "loss": 1.1421, + "learning_rate": 1.69971671388102e-06, + "loss": 1.3206, "step": 90 }, { - "epoch": 0.002582292849035187, + "epoch": 0.0025787072458839863, "grad_norm": 0.0, - "learning_rate": 1.7202268431001893e-06, - "loss": 1.0763, + "learning_rate": 1.7186024551463646e-06, + "loss": 1.2732, "step": 91 }, { - "epoch": 0.0026106696935300796, + "epoch": 0.002607044688146448, "grad_norm": 0.0, - "learning_rate": 1.7391304347826088e-06, - "loss": 1.237, + "learning_rate": 1.7374881964117093e-06, + "loss": 1.3606, "step": 92 }, { - "epoch": 0.0026390465380249717, + "epoch": 0.0026353821304089093, "grad_norm": 0.0, - "learning_rate": 1.7580340264650286e-06, - "loss": 1.2073, + "learning_rate": 1.7563739376770538e-06, + "loss": 1.2191, "step": 93 }, { - "epoch": 0.002667423382519864, + "epoch": 0.0026637195726713706, "grad_norm": 0.0, - "learning_rate": 1.7769376181474483e-06, - "loss": 1.1453, + "learning_rate": 1.7752596789423987e-06, + "loss": 1.4163, "step": 94 }, { - "epoch": 0.002695800227014756, + "epoch": 0.0026920570149338323, "grad_norm": 0.0, - "learning_rate": 1.7958412098298677e-06, - "loss": 1.1752, + "learning_rate": 1.7941454202077432e-06, + "loss": 1.432, "step": 95 }, { - "epoch": 0.002724177071509648, + "epoch": 0.0027203944571962935, "grad_norm": 0.0, - "learning_rate": 1.8147448015122876e-06, - "loss": 1.2231, + "learning_rate": 1.8130311614730881e-06, + "loss": 1.2738, "step": 96 }, { - "epoch": 0.0027525539160045405, + "epoch": 0.002748731899458755, "grad_norm": 0.0, - "learning_rate": 1.8336483931947073e-06, - "loss": 1.0829, + "learning_rate": 1.8319169027384326e-06, + "loss": 1.2629, "step": 97 }, { - "epoch": 0.0027809307604994326, + "epoch": 0.002777069341721216, "grad_norm": 0.0, - "learning_rate": 1.8525519848771267e-06, - "loss": 1.2483, + "learning_rate": 1.8508026440037773e-06, + "loss": 1.2672, "step": 98 }, { - "epoch": 0.0028093076049943246, + "epoch": 0.0028054067839836778, "grad_norm": 0.0, - "learning_rate": 1.8714555765595466e-06, - "loss": 1.1451, + "learning_rate": 1.869688385269122e-06, + "loss": 1.5001, "step": 99 }, { - "epoch": 0.0028376844494892167, + "epoch": 0.002833744226246139, "grad_norm": 0.0, - "learning_rate": 1.8903591682419662e-06, - "loss": 1.1886, + "learning_rate": 1.8885741265344667e-06, + "loss": 1.272, "step": 100 }, { - "epoch": 0.002866061293984109, + "epoch": 0.0028620816685086003, "grad_norm": 0.0, - "learning_rate": 1.909262759924386e-06, - "loss": 1.0508, + "learning_rate": 1.9074598677998114e-06, + "loss": 1.3843, "step": 101 }, { - "epoch": 0.0028944381384790013, + "epoch": 0.002890419110771062, "grad_norm": 0.0, - "learning_rate": 1.9281663516068055e-06, - "loss": 1.0985, + "learning_rate": 1.9263456090651557e-06, + "loss": 1.2614, "step": 102 }, { - "epoch": 0.0029228149829738934, + "epoch": 0.0029187565530335233, "grad_norm": 0.0, - "learning_rate": 1.947069943289225e-06, - "loss": 1.2544, + "learning_rate": 1.945231350330501e-06, + "loss": 1.1707, "step": 103 }, { - "epoch": 0.0029511918274687855, + "epoch": 0.0029470939952959845, "grad_norm": 0.0, - "learning_rate": 1.965973534971645e-06, - "loss": 1.2699, + "learning_rate": 1.964117091595845e-06, + "loss": 1.3327, "step": 104 }, { - "epoch": 0.0029795686719636776, + "epoch": 0.002975431437558446, "grad_norm": 0.0, - "learning_rate": 1.9848771266540645e-06, - "loss": 1.261, + "learning_rate": 1.98300283286119e-06, + "loss": 1.3974, "step": 105 }, { - "epoch": 0.0030079455164585696, + "epoch": 0.0030037688798209075, "grad_norm": 0.0, - "learning_rate": 2.003780718336484e-06, - "loss": 1.1757, + "learning_rate": 2.0018885741265345e-06, + "loss": 1.4677, "step": 106 }, { - "epoch": 0.003036322360953462, + "epoch": 0.0030321063220833688, "grad_norm": 0.0, - "learning_rate": 2.022684310018904e-06, - "loss": 1.0599, + "learning_rate": 2.0207743153918793e-06, + "loss": 1.2856, "step": 107 }, { - "epoch": 0.0030646992054483542, + "epoch": 0.00306044376434583, "grad_norm": 0.0, - "learning_rate": 2.0415879017013235e-06, - "loss": 1.1872, + "learning_rate": 2.039660056657224e-06, + "loss": 1.2867, "step": 108 }, { - "epoch": 0.0030930760499432463, + "epoch": 0.0030887812066082918, "grad_norm": 0.0, - "learning_rate": 2.060491493383743e-06, - "loss": 1.1823, + "learning_rate": 2.0585457979225687e-06, + "loss": 1.2441, "step": 109 }, { - "epoch": 0.0031214528944381384, + "epoch": 0.003117118648870753, "grad_norm": 0.0, - "learning_rate": 2.079395085066163e-06, - "loss": 1.1038, + "learning_rate": 2.0774315391879134e-06, + "loss": 1.1903, "step": 110 }, { - "epoch": 0.0031498297389330305, + "epoch": 0.0031454560911332143, "grad_norm": 0.0, - "learning_rate": 2.0982986767485825e-06, - "loss": 1.1498, + "learning_rate": 2.096317280453258e-06, + "loss": 1.1802, "step": 111 }, { - "epoch": 0.003178206583427923, + "epoch": 0.0031737935333956756, "grad_norm": 0.0, - "learning_rate": 2.117202268431002e-06, - "loss": 1.2322, + "learning_rate": 2.1152030217186028e-06, + "loss": 1.2908, "step": 112 }, { - "epoch": 0.003206583427922815, + "epoch": 0.0032021309756581373, "grad_norm": 0.0, - "learning_rate": 2.1361058601134218e-06, - "loss": 1.0839, + "learning_rate": 2.1340887629839475e-06, + "loss": 1.2318, "step": 113 }, { - "epoch": 0.003234960272417707, + "epoch": 0.0032304684179205985, "grad_norm": 0.0, - "learning_rate": 2.1550094517958414e-06, - "loss": 1.0966, + "learning_rate": 2.1529745042492918e-06, + "loss": 1.2753, "step": 114 }, { - "epoch": 0.0032633371169125992, + "epoch": 0.00325880586018306, "grad_norm": 0.0, - "learning_rate": 2.173913043478261e-06, - "loss": 1.1529, + "learning_rate": 2.171860245514637e-06, + "loss": 1.3102, "step": 115 }, { - "epoch": 0.0032917139614074913, + "epoch": 0.003287143302445521, "grad_norm": 0.0, - "learning_rate": 2.1928166351606808e-06, - "loss": 1.1382, + "learning_rate": 2.190745986779981e-06, + "loss": 1.2073, "step": 116 }, { - "epoch": 0.003320090805902384, + "epoch": 0.0033154807447079828, "grad_norm": 0.0, - "learning_rate": 2.2117202268431004e-06, - "loss": 1.0868, + "learning_rate": 2.209631728045326e-06, + "loss": 1.362, "step": 117 }, { - "epoch": 0.003348467650397276, + "epoch": 0.003343818186970444, "grad_norm": 0.0, - "learning_rate": 2.23062381852552e-06, - "loss": 1.1593, + "learning_rate": 2.2285174693106706e-06, + "loss": 1.3385, "step": 118 }, { - "epoch": 0.003376844494892168, + "epoch": 0.0033721556292329053, "grad_norm": 0.0, - "learning_rate": 2.2495274102079397e-06, - "loss": 1.0725, + "learning_rate": 2.2474032105760153e-06, + "loss": 1.2264, "step": 119 }, { - "epoch": 0.00340522133938706, + "epoch": 0.003400493071495367, "grad_norm": 0.0, - "learning_rate": 2.2684310018903594e-06, - "loss": 1.0975, + "learning_rate": 2.26628895184136e-06, + "loss": 1.3238, "step": 120 }, { - "epoch": 0.003433598183881952, + "epoch": 0.0034288305137578283, "grad_norm": 0.0, - "learning_rate": 2.287334593572779e-06, - "loss": 1.0643, + "learning_rate": 2.2851746931067047e-06, + "loss": 1.2363, "step": 121 }, { - "epoch": 0.0034619750283768447, + "epoch": 0.0034571679560202895, "grad_norm": 0.0, - "learning_rate": 2.3062381852551987e-06, - "loss": 1.2552, + "learning_rate": 2.3040604343720494e-06, + "loss": 1.2355, "step": 122 }, { - "epoch": 0.0034903518728717367, + "epoch": 0.003485505398282751, "grad_norm": 0.0, - "learning_rate": 2.3251417769376184e-06, - "loss": 1.2235, + "learning_rate": 2.3229461756373937e-06, + "loss": 1.2152, "step": 123 }, { - "epoch": 0.003518728717366629, + "epoch": 0.0035138428405452125, "grad_norm": 0.0, - "learning_rate": 2.344045368620038e-06, - "loss": 1.3259, + "learning_rate": 2.341831916902739e-06, + "loss": 1.3018, "step": 124 }, { - "epoch": 0.003547105561861521, + "epoch": 0.003542180282807674, "grad_norm": 0.0, - "learning_rate": 2.3629489603024577e-06, - "loss": 1.1089, + "learning_rate": 2.360717658168083e-06, + "loss": 1.4257, "step": 125 }, { - "epoch": 0.003575482406356413, + "epoch": 0.003570517725070135, "grad_norm": 0.0, - "learning_rate": 2.3818525519848773e-06, - "loss": 1.092, + "learning_rate": 2.3796033994334282e-06, + "loss": 1.3066, "step": 126 }, { - "epoch": 0.0036038592508513055, + "epoch": 0.0035988551673325968, "grad_norm": 0.0, - "learning_rate": 2.400756143667297e-06, - "loss": 1.2257, + "learning_rate": 2.3984891406987725e-06, + "loss": 1.2933, "step": 127 }, { - "epoch": 0.0036322360953461976, + "epoch": 0.003627192609595058, "grad_norm": 0.0, - "learning_rate": 2.4196597353497167e-06, - "loss": 1.1103, + "learning_rate": 2.4173748819641172e-06, + "loss": 1.2506, "step": 128 }, { - "epoch": 0.0036606129398410897, + "epoch": 0.0036555300518575193, "grad_norm": 0.0, - "learning_rate": 2.4385633270321363e-06, - "loss": 1.0021, + "learning_rate": 2.436260623229462e-06, + "loss": 1.2862, "step": 129 }, { - "epoch": 0.0036889897843359817, + "epoch": 0.0036838674941199806, "grad_norm": 0.0, - "learning_rate": 2.457466918714556e-06, - "loss": 1.2441, + "learning_rate": 2.4551463644948066e-06, + "loss": 1.3924, "step": 130 }, { - "epoch": 0.003717366628830874, + "epoch": 0.0037122049363824423, "grad_norm": 0.0, - "learning_rate": 2.4763705103969756e-06, - "loss": 1.1216, + "learning_rate": 2.4740321057601513e-06, + "loss": 1.1953, "step": 131 }, { - "epoch": 0.0037457434733257663, + "epoch": 0.0037405423786449035, "grad_norm": 0.0, - "learning_rate": 2.4952741020793953e-06, - "loss": 1.09, + "learning_rate": 2.4929178470254956e-06, + "loss": 1.39, "step": 132 }, { - "epoch": 0.0037741203178206584, + "epoch": 0.003768879820907365, "grad_norm": 0.0, - "learning_rate": 2.514177693761815e-06, - "loss": 1.2769, + "learning_rate": 2.5118035882908403e-06, + "loss": 1.2482, "step": 133 }, { - "epoch": 0.0038024971623155505, + "epoch": 0.003797217263169826, "grad_norm": 0.0, - "learning_rate": 2.533081285444234e-06, - "loss": 1.1239, + "learning_rate": 2.530689329556185e-06, + "loss": 1.3558, "step": 134 }, { - "epoch": 0.0038308740068104426, + "epoch": 0.0038255547054322878, "grad_norm": 0.0, - "learning_rate": 2.5519848771266543e-06, - "loss": 1.1168, + "learning_rate": 2.54957507082153e-06, + "loss": 1.3318, "step": 135 }, { - "epoch": 0.0038592508513053347, + "epoch": 0.003853892147694749, "grad_norm": 0.0, - "learning_rate": 2.570888468809074e-06, - "loss": 1.1944, + "learning_rate": 2.568460812086875e-06, + "loss": 1.2079, "step": 136 }, { - "epoch": 0.003887627695800227, + "epoch": 0.0038822295899572103, "grad_norm": 0.0, - "learning_rate": 2.589792060491493e-06, - "loss": 1.181, + "learning_rate": 2.587346553352219e-06, + "loss": 1.1694, "step": 137 }, { - "epoch": 0.003916004540295119, + "epoch": 0.003910567032219672, "grad_norm": 0.0, - "learning_rate": 2.6086956521739132e-06, - "loss": 1.1676, + "learning_rate": 2.606232294617564e-06, + "loss": 1.304, "step": 138 }, { - "epoch": 0.003944381384790012, + "epoch": 0.003938904474482133, "grad_norm": 0.0, - "learning_rate": 2.627599243856333e-06, - "loss": 1.1534, + "learning_rate": 2.6251180358829086e-06, + "loss": 1.3052, "step": 139 }, { - "epoch": 0.003972758229284903, + "epoch": 0.003967241916744595, "grad_norm": 0.0, - "learning_rate": 2.646502835538752e-06, - "loss": 1.1686, + "learning_rate": 2.6440037771482537e-06, + "loss": 1.3549, "step": 140 }, { - "epoch": 0.004001135073779796, + "epoch": 0.003995579359007056, "grad_norm": 0.0, - "learning_rate": 2.6654064272211722e-06, - "loss": 1.1894, + "learning_rate": 2.662889518413598e-06, + "loss": 1.2641, "step": 141 }, { - "epoch": 0.004029511918274688, + "epoch": 0.0040239168012695175, "grad_norm": 0.0, - "learning_rate": 2.684310018903592e-06, - "loss": 1.1708, + "learning_rate": 2.6817752596789427e-06, + "loss": 1.2177, "step": 142 }, { - "epoch": 0.00405788876276958, + "epoch": 0.004052254243531979, "grad_norm": 0.0, - "learning_rate": 2.703213610586012e-06, - "loss": 1.2236, + "learning_rate": 2.7006610009442874e-06, + "loss": 1.2983, "step": 143 }, { - "epoch": 0.004086265607264473, + "epoch": 0.00408059168579444, "grad_norm": 0.0, - "learning_rate": 2.722117202268431e-06, - "loss": 1.084, + "learning_rate": 2.719546742209632e-06, + "loss": 1.2385, "step": 144 }, { - "epoch": 0.004114642451759364, + "epoch": 0.004108929128056902, "grad_norm": 0.0, - "learning_rate": 2.741020793950851e-06, - "loss": 1.0896, + "learning_rate": 2.7384324834749764e-06, + "loss": 1.295, "step": 145 }, { - "epoch": 0.004143019296254257, + "epoch": 0.004137266570319363, "grad_norm": 0.0, - "learning_rate": 2.759924385633271e-06, - "loss": 1.2076, + "learning_rate": 2.7573182247403215e-06, + "loss": 1.306, "step": 146 }, { - "epoch": 0.004171396140749148, + "epoch": 0.004165604012581824, "grad_norm": 0.0, - "learning_rate": 2.77882797731569e-06, - "loss": 1.0709, + "learning_rate": 2.776203966005666e-06, + "loss": 1.26, "step": 147 }, { - "epoch": 0.004199772985244041, + "epoch": 0.004193941454844286, "grad_norm": 0.0, - "learning_rate": 2.79773156899811e-06, - "loss": 1.2851, + "learning_rate": 2.7950897072710105e-06, + "loss": 1.2722, "step": 148 }, { - "epoch": 0.0042281498297389334, + "epoch": 0.004222278897106747, "grad_norm": 0.0, - "learning_rate": 2.81663516068053e-06, - "loss": 1.1168, + "learning_rate": 2.813975448536355e-06, + "loss": 1.329, "step": 149 }, { - "epoch": 0.004256526674233825, + "epoch": 0.0042506163393692085, "grad_norm": 0.0, - "learning_rate": 2.835538752362949e-06, - "loss": 1.1165, + "learning_rate": 2.8328611898017e-06, + "loss": 1.3113, "step": 150 }, { - "epoch": 0.004284903518728718, + "epoch": 0.00427895378163167, "grad_norm": 0.0, - "learning_rate": 2.854442344045369e-06, - "loss": 1.2343, + "learning_rate": 2.8517469310670446e-06, + "loss": 1.2202, "step": 151 }, { - "epoch": 0.004313280363223609, + "epoch": 0.004307291223894131, "grad_norm": 0.0, - "learning_rate": 2.873345935727789e-06, - "loss": 1.1205, + "learning_rate": 2.870632672332389e-06, + "loss": 1.3385, "step": 152 }, { - "epoch": 0.004341657207718502, + "epoch": 0.004335628666156593, "grad_norm": 0.0, - "learning_rate": 2.892249527410208e-06, - "loss": 1.1872, + "learning_rate": 2.889518413597734e-06, + "loss": 1.2448, "step": 153 }, { - "epoch": 0.004370034052213394, + "epoch": 0.0043639661084190545, "grad_norm": 0.0, - "learning_rate": 2.9111531190926278e-06, - "loss": 1.0697, + "learning_rate": 2.9084041548630787e-06, + "loss": 1.3074, "step": 154 }, { - "epoch": 0.004398410896708286, + "epoch": 0.004392303550681515, "grad_norm": 0.0, - "learning_rate": 2.930056710775048e-06, - "loss": 1.1793, + "learning_rate": 2.9272898961284234e-06, + "loss": 1.2231, "step": 155 }, { - "epoch": 0.004426787741203178, + "epoch": 0.004420640992943977, "grad_norm": 0.0, - "learning_rate": 2.948960302457467e-06, - "loss": 1.1258, + "learning_rate": 2.9461756373937677e-06, + "loss": 1.24, "step": 156 }, { - "epoch": 0.00445516458569807, + "epoch": 0.004448978435206438, "grad_norm": 0.0, - "learning_rate": 2.9678638941398867e-06, - "loss": 1.1127, + "learning_rate": 2.9650613786591124e-06, + "loss": 1.2204, "step": 157 }, { - "epoch": 0.004483541430192963, + "epoch": 0.0044773158774688996, "grad_norm": 0.0, - "learning_rate": 2.986767485822307e-06, - "loss": 1.1584, + "learning_rate": 2.9839471199244575e-06, + "loss": 1.2688, "step": 158 }, { - "epoch": 0.004511918274687855, + "epoch": 0.004505653319731361, "grad_norm": 0.0, - "learning_rate": 3.005671077504726e-06, - "loss": 1.0905, + "learning_rate": 3.0028328611898022e-06, + "loss": 1.1301, "step": 159 }, { - "epoch": 0.004540295119182747, + "epoch": 0.004533990761993822, "grad_norm": 0.0, - "learning_rate": 3.0245746691871457e-06, - "loss": 1.2025, + "learning_rate": 3.0217186024551465e-06, + "loss": 1.2487, "step": 160 }, { - "epoch": 0.004568671963677639, + "epoch": 0.004562328204256284, "grad_norm": 0.0, - "learning_rate": 3.043478260869566e-06, - "loss": 1.2253, + "learning_rate": 3.0406043437204912e-06, + "loss": 1.1977, "step": 161 }, { - "epoch": 0.004597048808172531, + "epoch": 0.0045906656465187455, "grad_norm": 0.0, - "learning_rate": 3.062381852551985e-06, - "loss": 1.2033, + "learning_rate": 3.059490084985836e-06, + "loss": 1.1379, "step": 162 }, { - "epoch": 0.004625425652667423, + "epoch": 0.004619003088781206, "grad_norm": 0.0, - "learning_rate": 3.0812854442344047e-06, - "loss": 1.1696, + "learning_rate": 3.0783758262511802e-06, + "loss": 1.2034, "step": 163 }, { - "epoch": 0.004653802497162316, + "epoch": 0.004647340531043668, "grad_norm": 0.0, - "learning_rate": 3.1001890359168248e-06, - "loss": 1.1219, + "learning_rate": 3.0972615675165254e-06, + "loss": 1.2661, "step": 164 }, { - "epoch": 0.004682179341657208, + "epoch": 0.00467567797330613, "grad_norm": 0.0, - "learning_rate": 3.119092627599244e-06, - "loss": 1.0379, + "learning_rate": 3.11614730878187e-06, + "loss": 1.1943, "step": 165 }, { - "epoch": 0.0047105561861521, + "epoch": 0.004704015415568591, "grad_norm": 0.0, - "learning_rate": 3.1379962192816637e-06, - "loss": 1.1075, + "learning_rate": 3.1350330500472148e-06, + "loss": 1.2672, "step": 166 }, { - "epoch": 0.004738933030646992, + "epoch": 0.004732352857831052, "grad_norm": 0.0, - "learning_rate": 3.1568998109640837e-06, - "loss": 1.0141, + "learning_rate": 3.153918791312559e-06, + "loss": 1.1424, "step": 167 }, { - "epoch": 0.004767309875141884, + "epoch": 0.004760690300093514, "grad_norm": 0.0, - "learning_rate": 3.175803402646503e-06, - "loss": 1.169, + "learning_rate": 3.1728045325779038e-06, + "loss": 1.2279, "step": 168 }, { - "epoch": 0.004795686719636777, + "epoch": 0.004789027742355975, "grad_norm": 0.0, - "learning_rate": 3.1947069943289226e-06, - "loss": 1.0287, + "learning_rate": 3.191690273843249e-06, + "loss": 1.1577, "step": 169 }, { - "epoch": 0.004824063564131668, + "epoch": 0.0048173651846184365, "grad_norm": 0.0, - "learning_rate": 3.2136105860113427e-06, - "loss": 1.2131, + "learning_rate": 3.2105760151085936e-06, + "loss": 1.1403, "step": 170 }, { - "epoch": 0.004852440408626561, + "epoch": 0.004845702626880897, "grad_norm": 0.0, - "learning_rate": 3.232514177693762e-06, - "loss": 1.093, + "learning_rate": 3.229461756373938e-06, + "loss": 1.1198, "step": 171 }, { - "epoch": 0.004880817253121453, + "epoch": 0.004874040069143359, "grad_norm": 0.0, - "learning_rate": 3.2514177693761816e-06, - "loss": 1.0597, + "learning_rate": 3.2483474976392826e-06, + "loss": 1.4328, "step": 172 }, { - "epoch": 0.004909194097616345, + "epoch": 0.004902377511405821, "grad_norm": 0.0, - "learning_rate": 3.2703213610586017e-06, - "loss": 1.137, + "learning_rate": 3.2672332389046273e-06, + "loss": 1.1816, "step": 173 }, { - "epoch": 0.004937570942111238, + "epoch": 0.004930714953668282, "grad_norm": 0.0, - "learning_rate": 3.289224952741021e-06, - "loss": 1.1445, + "learning_rate": 3.2861189801699724e-06, + "loss": 1.2372, "step": 174 }, { - "epoch": 0.004965947786606129, + "epoch": 0.004959052395930743, "grad_norm": 0.0, - "learning_rate": 3.3081285444234406e-06, - "loss": 1.2112, + "learning_rate": 3.3050047214353163e-06, + "loss": 1.2335, "step": 175 }, { - "epoch": 0.004994324631101022, + "epoch": 0.004987389838193205, "grad_norm": 0.0, - "learning_rate": 3.3270321361058607e-06, - "loss": 1.2593, + "learning_rate": 3.3238904627006614e-06, + "loss": 1.1978, "step": 176 }, { - "epoch": 0.005022701475595913, + "epoch": 0.005015727280455666, "grad_norm": 0.0, - "learning_rate": 3.34593572778828e-06, - "loss": 1.1005, + "learning_rate": 3.342776203966006e-06, + "loss": 1.2584, "step": 177 }, { - "epoch": 0.005051078320090806, + "epoch": 0.0050440647227181275, "grad_norm": 0.0, - "learning_rate": 3.3648393194706996e-06, - "loss": 1.1233, + "learning_rate": 3.3616619452313504e-06, + "loss": 1.2833, "step": 178 }, { - "epoch": 0.0050794551645856984, + "epoch": 0.005072402164980589, "grad_norm": 0.0, - "learning_rate": 3.3837429111531196e-06, - "loss": 1.2104, + "learning_rate": 3.380547686496695e-06, + "loss": 1.2027, "step": 179 }, { - "epoch": 0.00510783200908059, + "epoch": 0.00510073960724305, "grad_norm": 0.0, - "learning_rate": 3.402646502835539e-06, - "loss": 1.1046, + "learning_rate": 3.39943342776204e-06, + "loss": 1.1873, "step": 180 }, { - "epoch": 0.005136208853575483, + "epoch": 0.005129077049505512, "grad_norm": 0.0, - "learning_rate": 3.4215500945179585e-06, - "loss": 1.0977, + "learning_rate": 3.418319169027385e-06, + "loss": 1.1542, "step": 181 }, { - "epoch": 0.005164585698070374, + "epoch": 0.005157414491767973, "grad_norm": 0.0, - "learning_rate": 3.4404536862003786e-06, - "loss": 1.1314, + "learning_rate": 3.437204910292729e-06, + "loss": 1.2029, "step": 182 }, { - "epoch": 0.005192962542565267, + "epoch": 0.005185751934030434, "grad_norm": 0.0, - "learning_rate": 3.459357277882798e-06, - "loss": 1.2799, + "learning_rate": 3.456090651558074e-06, + "loss": 1.1537, "step": 183 }, { - "epoch": 0.005221339387060159, + "epoch": 0.005214089376292896, "grad_norm": 0.0, - "learning_rate": 3.4782608695652175e-06, - "loss": 1.1058, + "learning_rate": 3.4749763928234186e-06, + "loss": 1.2198, "step": 184 }, { - "epoch": 0.005249716231555051, + "epoch": 0.005242426818555357, "grad_norm": 0.0, - "learning_rate": 3.4971644612476376e-06, - "loss": 1.1795, + "learning_rate": 3.4938621340887633e-06, + "loss": 1.1828, "step": 185 }, { - "epoch": 0.0052780930760499434, + "epoch": 0.0052707642608178186, "grad_norm": 0.0, - "learning_rate": 3.5160680529300572e-06, - "loss": 1.0912, + "learning_rate": 3.5127478753541076e-06, + "loss": 1.207, "step": 186 }, { - "epoch": 0.005306469920544835, + "epoch": 0.00529910170308028, "grad_norm": 0.0, - "learning_rate": 3.5349716446124765e-06, - "loss": 1.1478, + "learning_rate": 3.5316336166194527e-06, + "loss": 1.1825, "step": 187 }, { - "epoch": 0.005334846765039728, + "epoch": 0.005327439145342741, "grad_norm": 0.0, - "learning_rate": 3.5538752362948966e-06, - "loss": 1.0885, + "learning_rate": 3.5505193578847974e-06, + "loss": 1.1684, "step": 188 }, { - "epoch": 0.00536322360953462, + "epoch": 0.005355776587605203, "grad_norm": 0.0, - "learning_rate": 3.5727788279773162e-06, - "loss": 1.027, + "learning_rate": 3.5694050991501417e-06, + "loss": 1.2487, "step": 189 }, { - "epoch": 0.005391600454029512, + "epoch": 0.0053841140298676645, "grad_norm": 0.0, - "learning_rate": 3.5916824196597355e-06, - "loss": 1.0976, + "learning_rate": 3.5882908404154864e-06, + "loss": 1.1942, "step": 190 }, { - "epoch": 0.005419977298524404, + "epoch": 0.005412451472130125, "grad_norm": 0.0, - "learning_rate": 3.6105860113421555e-06, - "loss": 1.2166, + "learning_rate": 3.607176581680831e-06, + "loss": 1.2144, "step": 191 }, { - "epoch": 0.005448354143019296, + "epoch": 0.005440788914392587, "grad_norm": 0.0, - "learning_rate": 3.629489603024575e-06, - "loss": 1.1633, + "learning_rate": 3.6260623229461763e-06, + "loss": 1.4025, "step": 192 }, { - "epoch": 0.0054767309875141884, + "epoch": 0.005469126356655049, "grad_norm": 0.0, - "learning_rate": 3.6483931947069944e-06, - "loss": 1.1325, + "learning_rate": 3.6449480642115205e-06, + "loss": 1.08, "step": 193 }, { - "epoch": 0.005505107832009081, + "epoch": 0.00549746379891751, "grad_norm": 0.0, - "learning_rate": 3.6672967863894145e-06, - "loss": 1.1855, + "learning_rate": 3.6638338054768652e-06, + "loss": 1.1788, "step": 194 }, { - "epoch": 0.005533484676503973, + "epoch": 0.005525801241179971, "grad_norm": 0.0, - "learning_rate": 3.686200378071834e-06, - "loss": 1.0189, + "learning_rate": 3.68271954674221e-06, + "loss": 1.2086, "step": 195 }, { - "epoch": 0.005561861520998865, + "epoch": 0.005554138683442432, "grad_norm": 0.0, - "learning_rate": 3.7051039697542534e-06, - "loss": 1.1936, + "learning_rate": 3.7016052880075547e-06, + "loss": 1.2828, "step": 196 }, { - "epoch": 0.005590238365493757, + "epoch": 0.005582476125704894, "grad_norm": 0.0, - "learning_rate": 3.7240075614366735e-06, - "loss": 1.0137, + "learning_rate": 3.720491029272899e-06, + "loss": 1.1159, "step": 197 }, { - "epoch": 0.005618615209988649, + "epoch": 0.0056108135679673555, "grad_norm": 0.0, - "learning_rate": 3.742911153119093e-06, - "loss": 1.1463, + "learning_rate": 3.739376770538244e-06, + "loss": 1.1252, "step": 198 }, { - "epoch": 0.005646992054483542, + "epoch": 0.005639151010229816, "grad_norm": 0.0, - "learning_rate": 3.7618147448015124e-06, - "loss": 1.1313, + "learning_rate": 3.7582625118035888e-06, + "loss": 1.1085, "step": 199 }, { - "epoch": 0.0056753688989784334, + "epoch": 0.005667488452492278, "grad_norm": 0.0, - "learning_rate": 3.7807183364839325e-06, - "loss": 1.2205, + "learning_rate": 3.7771482530689335e-06, + "loss": 1.2355, "step": 200 }, { - "epoch": 0.005703745743473326, + "epoch": 0.00569582589475474, "grad_norm": 0.0, - "learning_rate": 3.799621928166352e-06, - "loss": 1.1478, + "learning_rate": 3.7960339943342778e-06, + "loss": 1.2592, "step": 201 }, { - "epoch": 0.005732122587968218, + "epoch": 0.005724163337017201, "grad_norm": 0.0, - "learning_rate": 3.818525519848772e-06, - "loss": 1.0767, + "learning_rate": 3.814919735599623e-06, + "loss": 1.16, "step": 202 }, { - "epoch": 0.00576049943246311, + "epoch": 0.005752500779279662, "grad_norm": 0.0, - "learning_rate": 3.8374291115311914e-06, - "loss": 0.9917, + "learning_rate": 3.833805476864968e-06, + "loss": 1.3342, "step": 203 }, { - "epoch": 0.005788876276958003, + "epoch": 0.005780838221542124, "grad_norm": 0.0, - "learning_rate": 3.856332703213611e-06, - "loss": 1.151, + "learning_rate": 3.8526912181303115e-06, + "loss": 1.2954, "step": 204 }, { - "epoch": 0.005817253121452894, + "epoch": 0.005809175663804585, "grad_norm": 0.0, - "learning_rate": 3.875236294896031e-06, - "loss": 1.1743, + "learning_rate": 3.871576959395656e-06, + "loss": 1.2168, "step": 205 }, { - "epoch": 0.005845629965947787, + "epoch": 0.0058375131060670465, "grad_norm": 0.0, - "learning_rate": 3.89413988657845e-06, - "loss": 1.1749, + "learning_rate": 3.890462700661002e-06, + "loss": 1.1747, "step": 206 }, { - "epoch": 0.0058740068104426784, + "epoch": 0.005865850548329507, "grad_norm": 0.0, - "learning_rate": 3.91304347826087e-06, - "loss": 1.1226, + "learning_rate": 3.909348441926346e-06, + "loss": 1.2069, "step": 207 }, { - "epoch": 0.005902383654937571, + "epoch": 0.005894187990591969, "grad_norm": 0.0, - "learning_rate": 3.93194706994329e-06, - "loss": 1.0245, + "learning_rate": 3.92823418319169e-06, + "loss": 1.2817, "step": 208 }, { - "epoch": 0.0059307604994324635, + "epoch": 0.005922525432854431, "grad_norm": 0.0, - "learning_rate": 3.950850661625709e-06, - "loss": 1.0116, + "learning_rate": 3.947119924457035e-06, + "loss": 1.3235, "step": 209 }, { - "epoch": 0.005959137343927355, + "epoch": 0.005950862875116892, "grad_norm": 0.0, - "learning_rate": 3.969754253308129e-06, - "loss": 1.0735, + "learning_rate": 3.96600566572238e-06, + "loss": 1.2228, "step": 210 }, { - "epoch": 0.005987514188422248, + "epoch": 0.005979200317379353, "grad_norm": 0.0, - "learning_rate": 3.988657844990549e-06, - "loss": 1.1202, + "learning_rate": 3.984891406987724e-06, + "loss": 1.2421, "step": 211 }, { - "epoch": 0.006015891032917139, + "epoch": 0.006007537759641815, "grad_norm": 0.0, - "learning_rate": 4.007561436672968e-06, - "loss": 1.0577, + "learning_rate": 4.003777148253069e-06, + "loss": 1.2018, "step": 212 }, { - "epoch": 0.006044267877412032, + "epoch": 0.006035875201904276, "grad_norm": 0.0, - "learning_rate": 4.026465028355388e-06, - "loss": 1.0854, + "learning_rate": 4.022662889518414e-06, + "loss": 1.2424, "step": 213 }, { - "epoch": 0.006072644721906924, + "epoch": 0.0060642126441667376, "grad_norm": 0.0, - "learning_rate": 4.045368620037808e-06, - "loss": 1.087, + "learning_rate": 4.0415486307837585e-06, + "loss": 1.2189, "step": 214 }, { - "epoch": 0.006101021566401816, + "epoch": 0.006092550086429199, "grad_norm": 0.0, - "learning_rate": 4.064272211720227e-06, - "loss": 1.0675, + "learning_rate": 4.060434372049103e-06, + "loss": 1.2545, "step": 215 }, { - "epoch": 0.0061293984108967085, + "epoch": 0.00612088752869166, "grad_norm": 0.0, - "learning_rate": 4.083175803402647e-06, - "loss": 1.0993, + "learning_rate": 4.079320113314448e-06, + "loss": 1.2996, "step": 216 }, { - "epoch": 0.0061577752553916, + "epoch": 0.006149224970954122, "grad_norm": 0.0, - "learning_rate": 4.102079395085067e-06, - "loss": 1.1538, + "learning_rate": 4.098205854579793e-06, + "loss": 1.2469, "step": 217 }, { - "epoch": 0.006186152099886493, + "epoch": 0.0061775624132165835, "grad_norm": 0.0, - "learning_rate": 4.120982986767486e-06, - "loss": 1.0317, + "learning_rate": 4.117091595845137e-06, + "loss": 1.3011, "step": 218 }, { - "epoch": 0.006214528944381385, + "epoch": 0.006205899855479044, "grad_norm": 0.0, - "learning_rate": 4.139886578449906e-06, - "loss": 1.1135, + "learning_rate": 4.135977337110482e-06, + "loss": 1.386, "step": 219 }, { - "epoch": 0.006242905788876277, + "epoch": 0.006234237297741506, "grad_norm": 0.0, - "learning_rate": 4.158790170132326e-06, - "loss": 1.108, + "learning_rate": 4.154863078375827e-06, + "loss": 1.3004, "step": 220 }, { - "epoch": 0.006271282633371169, + "epoch": 0.006262574740003967, "grad_norm": 0.0, - "learning_rate": 4.177693761814745e-06, - "loss": 1.1342, + "learning_rate": 4.1737488196411714e-06, + "loss": 1.1944, "step": 221 }, { - "epoch": 0.006299659477866061, + "epoch": 0.006290912182266429, "grad_norm": 0.0, - "learning_rate": 4.196597353497165e-06, - "loss": 1.1353, + "learning_rate": 4.192634560906516e-06, + "loss": 1.3088, "step": 222 }, { - "epoch": 0.0063280363223609535, + "epoch": 0.00631924962452889, "grad_norm": 0.0, - "learning_rate": 4.215500945179585e-06, - "loss": 1.0895, + "learning_rate": 4.21152030217186e-06, + "loss": 1.2352, "step": 223 }, { - "epoch": 0.006356413166855846, + "epoch": 0.006347587066791351, "grad_norm": 0.0, - "learning_rate": 4.234404536862004e-06, - "loss": 1.1093, + "learning_rate": 4.2304060434372056e-06, + "loss": 1.2207, "step": 224 }, { - "epoch": 0.006384790011350738, + "epoch": 0.006375924509053813, "grad_norm": 0.0, - "learning_rate": 4.253308128544424e-06, - "loss": 1.0825, + "learning_rate": 4.24929178470255e-06, + "loss": 1.2494, "step": 225 }, { - "epoch": 0.00641316685584563, + "epoch": 0.0064042619513162745, "grad_norm": 0.0, - "learning_rate": 4.2722117202268436e-06, - "loss": 1.1407, + "learning_rate": 4.268177525967895e-06, + "loss": 1.3145, "step": 226 }, { - "epoch": 0.006441543700340522, + "epoch": 0.006432599393578735, "grad_norm": 0.0, - "learning_rate": 4.291115311909263e-06, - "loss": 1.1452, + "learning_rate": 4.287063267233239e-06, + "loss": 1.1792, "step": 227 }, { - "epoch": 0.006469920544835414, + "epoch": 0.006460936835841197, "grad_norm": 0.0, - "learning_rate": 4.310018903591683e-06, - "loss": 1.1328, + "learning_rate": 4.3059490084985835e-06, + "loss": 1.1788, "step": 228 }, { - "epoch": 0.006498297389330307, + "epoch": 0.006489274278103659, "grad_norm": 0.0, - "learning_rate": 4.3289224952741025e-06, - "loss": 1.0636, + "learning_rate": 4.324834749763929e-06, + "loss": 1.1329, "step": 229 }, { - "epoch": 0.0065266742338251985, + "epoch": 0.00651761172036612, "grad_norm": 0.0, - "learning_rate": 4.347826086956522e-06, - "loss": 1.1859, + "learning_rate": 4.343720491029274e-06, + "loss": 1.1944, "step": 230 }, { - "epoch": 0.006555051078320091, + "epoch": 0.006545949162628581, "grad_norm": 0.0, - "learning_rate": 4.366729678638942e-06, - "loss": 1.122, + "learning_rate": 4.362606232294618e-06, + "loss": 1.2713, "step": 231 }, { - "epoch": 0.006583427922814983, + "epoch": 0.006574286604891042, "grad_norm": 0.0, - "learning_rate": 4.3856332703213615e-06, - "loss": 1.0177, + "learning_rate": 4.381491973559962e-06, + "loss": 1.257, "step": 232 }, { - "epoch": 0.006611804767309875, + "epoch": 0.006602624047153504, "grad_norm": 0.0, - "learning_rate": 4.404536862003781e-06, - "loss": 1.0872, + "learning_rate": 4.400377714825307e-06, + "loss": 1.2758, "step": 233 }, { - "epoch": 0.006640181611804768, + "epoch": 0.0066309614894159655, "grad_norm": 0.0, - "learning_rate": 4.423440453686201e-06, - "loss": 1.056, + "learning_rate": 4.419263456090652e-06, + "loss": 1.1614, "step": 234 }, { - "epoch": 0.006668558456299659, + "epoch": 0.006659298931678426, "grad_norm": 0.0, - "learning_rate": 4.4423440453686205e-06, - "loss": 1.1278, + "learning_rate": 4.4381491973559965e-06, + "loss": 1.2531, "step": 235 }, { - "epoch": 0.006696935300794552, + "epoch": 0.006687636373940888, "grad_norm": 0.0, - "learning_rate": 4.46124763705104e-06, - "loss": 1.0988, + "learning_rate": 4.457034938621341e-06, + "loss": 1.2192, "step": 236 }, { - "epoch": 0.0067253121452894435, + "epoch": 0.00671597381620335, "grad_norm": 0.0, - "learning_rate": 4.48015122873346e-06, - "loss": 0.9534, + "learning_rate": 4.475920679886686e-06, + "loss": 1.0932, "step": 237 }, { - "epoch": 0.006753688989784336, + "epoch": 0.006744311258465811, "grad_norm": 0.0, - "learning_rate": 4.4990548204158795e-06, - "loss": 1.065, + "learning_rate": 4.494806421152031e-06, + "loss": 1.2228, "step": 238 }, { - "epoch": 0.0067820658342792285, + "epoch": 0.006772648700728272, "grad_norm": 0.0, - "learning_rate": 4.517958412098299e-06, - "loss": 1.0014, + "learning_rate": 4.513692162417375e-06, + "loss": 1.1165, "step": 239 }, { - "epoch": 0.00681044267877412, + "epoch": 0.006800986142990734, "grad_norm": 0.0, - "learning_rate": 4.536862003780719e-06, - "loss": 1.1644, + "learning_rate": 4.53257790368272e-06, + "loss": 1.267, "step": 240 }, { - "epoch": 0.006838819523269013, + "epoch": 0.006829323585253195, "grad_norm": 0.0, - "learning_rate": 4.5557655954631384e-06, - "loss": 1.1082, + "learning_rate": 4.551463644948065e-06, + "loss": 1.2385, "step": 241 }, { - "epoch": 0.006867196367763904, + "epoch": 0.0068576610275156566, "grad_norm": 0.0, - "learning_rate": 4.574669187145558e-06, - "loss": 1.15, + "learning_rate": 4.570349386213409e-06, + "loss": 1.2845, "step": 242 }, { - "epoch": 0.006895573212258797, + "epoch": 0.006885998469778117, "grad_norm": 0.0, - "learning_rate": 4.593572778827978e-06, - "loss": 1.1852, + "learning_rate": 4.589235127478754e-06, + "loss": 1.1191, "step": 243 }, { - "epoch": 0.006923950056753689, + "epoch": 0.006914335912040579, "grad_norm": 0.0, - "learning_rate": 4.612476370510397e-06, - "loss": 1.0718, + "learning_rate": 4.608120868744099e-06, + "loss": 1.2372, "step": 244 }, { - "epoch": 0.006952326901248581, + "epoch": 0.006942673354303041, "grad_norm": 0.0, - "learning_rate": 4.631379962192817e-06, - "loss": 1.0342, + "learning_rate": 4.6270066100094435e-06, + "loss": 1.0528, "step": 245 }, { - "epoch": 0.0069807037457434735, + "epoch": 0.006971010796565502, "grad_norm": 0.0, - "learning_rate": 4.650283553875237e-06, - "loss": 1.1017, + "learning_rate": 4.645892351274787e-06, + "loss": 1.2126, "step": 246 }, { - "epoch": 0.007009080590238365, + "epoch": 0.006999348238827963, "grad_norm": 0.0, - "learning_rate": 4.669187145557656e-06, - "loss": 1.0341, + "learning_rate": 4.664778092540133e-06, + "loss": 1.3069, "step": 247 }, { - "epoch": 0.007037457434733258, + "epoch": 0.007027685681090425, "grad_norm": 0.0, - "learning_rate": 4.688090737240076e-06, - "loss": 1.1276, + "learning_rate": 4.683663833805478e-06, + "loss": 1.2067, "step": 248 }, { - "epoch": 0.00706583427922815, + "epoch": 0.007056023123352886, "grad_norm": 0.0, - "learning_rate": 4.706994328922496e-06, - "loss": 1.0712, + "learning_rate": 4.7025495750708215e-06, + "loss": 1.2478, "step": 249 }, { - "epoch": 0.007094211123723042, + "epoch": 0.007084360565615348, "grad_norm": 0.0, - "learning_rate": 4.725897920604915e-06, - "loss": 1.1603, + "learning_rate": 4.721435316336166e-06, + "loss": 1.2856, "step": 250 }, { - "epoch": 0.007122587968217934, + "epoch": 0.007112698007877809, "grad_norm": 0.0, - "learning_rate": 4.744801512287335e-06, - "loss": 1.0171, + "learning_rate": 4.740321057601511e-06, + "loss": 1.2213, "step": 251 }, { - "epoch": 0.007150964812712826, + "epoch": 0.00714103545014027, "grad_norm": 0.0, - "learning_rate": 4.763705103969755e-06, - "loss": 1.1587, + "learning_rate": 4.7592067988668565e-06, + "loss": 1.2152, "step": 252 }, { - "epoch": 0.0071793416572077185, + "epoch": 0.007169372892402732, "grad_norm": 0.0, - "learning_rate": 4.782608695652174e-06, - "loss": 1.0914, + "learning_rate": 4.7780925401322e-06, + "loss": 1.1938, "step": 253 }, { - "epoch": 0.007207718501702611, + "epoch": 0.0071977103346651935, "grad_norm": 0.0, - "learning_rate": 4.801512287334594e-06, - "loss": 1.1912, + "learning_rate": 4.796978281397545e-06, + "loss": 1.2687, "step": 254 }, { - "epoch": 0.007236095346197503, + "epoch": 0.007226047776927654, "grad_norm": 0.0, - "learning_rate": 4.820415879017014e-06, - "loss": 1.0142, + "learning_rate": 4.81586402266289e-06, + "loss": 1.2059, "step": 255 }, { - "epoch": 0.007264472190692395, + "epoch": 0.007254385219190116, "grad_norm": 0.0, - "learning_rate": 4.839319470699433e-06, - "loss": 1.0599, + "learning_rate": 4.8347497639282344e-06, + "loss": 1.19, "step": 256 }, { - "epoch": 0.007292849035187287, + "epoch": 0.007282722661452577, "grad_norm": 0.0, - "learning_rate": 4.858223062381853e-06, - "loss": 1.136, + "learning_rate": 4.853635505193579e-06, + "loss": 1.1724, "step": 257 }, { - "epoch": 0.007321225879682179, + "epoch": 0.007311060103715039, "grad_norm": 0.0, - "learning_rate": 4.877126654064273e-06, - "loss": 1.0036, + "learning_rate": 4.872521246458924e-06, + "loss": 1.1832, "step": 258 }, { - "epoch": 0.007349602724177072, + "epoch": 0.0073393975459775, "grad_norm": 0.0, - "learning_rate": 4.896030245746692e-06, - "loss": 1.0686, + "learning_rate": 4.8914069877242686e-06, + "loss": 1.2066, "step": 259 }, { - "epoch": 0.0073779795686719635, + "epoch": 0.007367734988239961, "grad_norm": 0.0, - "learning_rate": 4.914933837429112e-06, - "loss": 1.1184, + "learning_rate": 4.910292728989613e-06, + "loss": 1.1475, "step": 260 }, { - "epoch": 0.007406356413166856, + "epoch": 0.007396072430502423, "grad_norm": 0.0, - "learning_rate": 4.933837429111532e-06, - "loss": 1.0451, + "learning_rate": 4.929178470254958e-06, + "loss": 1.3201, "step": 261 }, { - "epoch": 0.007434733257661748, + "epoch": 0.0074244098727648845, "grad_norm": 0.0, - "learning_rate": 4.952741020793951e-06, - "loss": 1.1512, + "learning_rate": 4.948064211520303e-06, + "loss": 1.223, "step": 262 }, { - "epoch": 0.00746311010215664, + "epoch": 0.007452747315027345, "grad_norm": 0.0, - "learning_rate": 4.971644612476371e-06, - "loss": 1.0852, + "learning_rate": 4.966949952785647e-06, + "loss": 1.2578, "step": 263 }, { - "epoch": 0.007491486946651533, + "epoch": 0.007481084757289807, "grad_norm": 0.0, - "learning_rate": 4.990548204158791e-06, - "loss": 1.1717, + "learning_rate": 4.985835694050991e-06, + "loss": 1.1475, "step": 264 }, { - "epoch": 0.007519863791146424, + "epoch": 0.007509422199552269, "grad_norm": 0.0, - "learning_rate": 5.00945179584121e-06, - "loss": 1.066, + "learning_rate": 5.004721435316337e-06, + "loss": 1.2665, "step": 265 }, { - "epoch": 0.007548240635641317, + "epoch": 0.00753775964181473, "grad_norm": 0.0, - "learning_rate": 5.02835538752363e-06, - "loss": 1.0351, + "learning_rate": 5.023607176581681e-06, + "loss": 1.2566, "step": 266 }, { - "epoch": 0.0075766174801362085, + "epoch": 0.007566097084077191, "grad_norm": 0.0, - "learning_rate": 5.04725897920605e-06, - "loss": 1.0977, + "learning_rate": 5.042492917847026e-06, + "loss": 1.1874, "step": 267 }, { - "epoch": 0.007604994324631101, + "epoch": 0.007594434526339652, "grad_norm": 0.0, - "learning_rate": 5.066162570888468e-06, - "loss": 1.1569, + "learning_rate": 5.06137865911237e-06, + "loss": 1.1515, "step": 268 }, { - "epoch": 0.0076333711691259935, + "epoch": 0.007622771968602114, "grad_norm": 0.0, - "learning_rate": 5.085066162570889e-06, - "loss": 1.0821, + "learning_rate": 5.080264400377716e-06, + "loss": 1.0916, "step": 269 }, { - "epoch": 0.007661748013620885, + "epoch": 0.0076511094108645756, "grad_norm": 0.0, - "learning_rate": 5.1039697542533085e-06, - "loss": 1.0617, + "learning_rate": 5.09915014164306e-06, + "loss": 1.2374, "step": 270 }, { - "epoch": 0.007690124858115778, + "epoch": 0.007679446853127036, "grad_norm": 0.0, - "learning_rate": 5.122873345935728e-06, - "loss": 1.1791, + "learning_rate": 5.118035882908404e-06, + "loss": 1.2041, "step": 271 }, { - "epoch": 0.007718501702610669, + "epoch": 0.007707784295389498, "grad_norm": 0.0, - "learning_rate": 5.141776937618148e-06, - "loss": 1.049, + "learning_rate": 5.13692162417375e-06, + "loss": 1.2884, "step": 272 }, { - "epoch": 0.007746878547105562, + "epoch": 0.00773612173765196, "grad_norm": 0.0, - "learning_rate": 5.160680529300568e-06, - "loss": 1.0999, + "learning_rate": 5.155807365439094e-06, + "loss": 1.2892, "step": 273 }, { - "epoch": 0.007775255391600454, + "epoch": 0.007764459179914421, "grad_norm": 0.0, - "learning_rate": 5.179584120982986e-06, - "loss": 1.1115, + "learning_rate": 5.174693106704438e-06, + "loss": 1.3876, "step": 274 }, { - "epoch": 0.007803632236095346, + "epoch": 0.007792796622176882, "grad_norm": 0.0, - "learning_rate": 5.198487712665407e-06, - "loss": 1.1638, + "learning_rate": 5.193578847969784e-06, + "loss": 1.21, "step": 275 }, { - "epoch": 0.007832009080590238, + "epoch": 0.007821134064439343, "grad_norm": 0.0, - "learning_rate": 5.2173913043478265e-06, - "loss": 1.0799, + "learning_rate": 5.212464589235128e-06, + "loss": 1.0363, "step": 276 }, { - "epoch": 0.007860385925085131, + "epoch": 0.007849471506701806, "grad_norm": 0.0, - "learning_rate": 5.236294896030246e-06, - "loss": 1.0193, + "learning_rate": 5.231350330500472e-06, + "loss": 1.2465, "step": 277 }, { - "epoch": 0.007888762769580024, + "epoch": 0.007877808948964267, "grad_norm": 0.0, - "learning_rate": 5.255198487712666e-06, - "loss": 1.1257, + "learning_rate": 5.250236071765817e-06, + "loss": 1.2111, "step": 278 }, { - "epoch": 0.007917139614074914, + "epoch": 0.007906146391226727, "grad_norm": 0.0, - "learning_rate": 5.274102079395086e-06, - "loss": 1.1393, + "learning_rate": 5.269121813031162e-06, + "loss": 1.2418, "step": 279 }, { - "epoch": 0.007945516458569807, + "epoch": 0.00793448383348919, "grad_norm": 0.0, - "learning_rate": 5.293005671077504e-06, - "loss": 1.0826, + "learning_rate": 5.288007554296507e-06, + "loss": 1.0929, "step": 280 }, { - "epoch": 0.0079738933030647, + "epoch": 0.00796282127575165, "grad_norm": 0.0, - "learning_rate": 5.311909262759925e-06, - "loss": 1.0242, + "learning_rate": 5.306893295561851e-06, + "loss": 1.2004, "step": 281 }, { - "epoch": 0.008002270147559592, + "epoch": 0.007991158718014112, "grad_norm": 0.0, - "learning_rate": 5.3308128544423444e-06, - "loss": 1.0492, + "learning_rate": 5.325779036827196e-06, + "loss": 1.202, "step": 282 }, { - "epoch": 0.008030646992054484, + "epoch": 0.008019496160276574, "grad_norm": 0.0, - "learning_rate": 5.349716446124764e-06, - "loss": 1.0583, + "learning_rate": 5.344664778092541e-06, + "loss": 1.2267, "step": 283 }, { - "epoch": 0.008059023836549375, + "epoch": 0.008047833602539035, "grad_norm": 0.0, - "learning_rate": 5.368620037807184e-06, - "loss": 1.0333, + "learning_rate": 5.363550519357885e-06, + "loss": 1.2863, "step": 284 }, { - "epoch": 0.008087400681044268, + "epoch": 0.008076171044801496, "grad_norm": 0.0, - "learning_rate": 5.387523629489604e-06, - "loss": 1.1055, + "learning_rate": 5.382436260623229e-06, + "loss": 1.1041, "step": 285 }, { - "epoch": 0.00811577752553916, + "epoch": 0.008104508487063958, "grad_norm": 0.0, - "learning_rate": 5.406427221172024e-06, - "loss": 1.1657, + "learning_rate": 5.401322001888575e-06, + "loss": 1.1439, "step": 286 }, { - "epoch": 0.008144154370034053, + "epoch": 0.00813284592932642, "grad_norm": 0.0, - "learning_rate": 5.425330812854443e-06, - "loss": 1.0406, + "learning_rate": 5.4202077431539195e-06, + "loss": 1.2829, "step": 287 }, { - "epoch": 0.008172531214528945, + "epoch": 0.00816118337158888, "grad_norm": 0.0, - "learning_rate": 5.444234404536862e-06, - "loss": 1.2001, + "learning_rate": 5.439093484419264e-06, + "loss": 1.2609, "step": 288 }, { - "epoch": 0.008200908059023836, + "epoch": 0.008189520813851341, "grad_norm": 0.0, - "learning_rate": 5.463137996219282e-06, - "loss": 1.1033, + "learning_rate": 5.457979225684609e-06, + "loss": 1.277, "step": 289 }, { - "epoch": 0.008229284903518728, + "epoch": 0.008217858256113804, "grad_norm": 0.0, - "learning_rate": 5.482041587901702e-06, - "loss": 1.126, + "learning_rate": 5.476864966949953e-06, + "loss": 1.1736, "step": 290 }, { - "epoch": 0.008257661748013621, + "epoch": 0.008246195698376264, "grad_norm": 0.0, - "learning_rate": 5.500945179584122e-06, - "loss": 1.1374, + "learning_rate": 5.495750708215298e-06, + "loss": 1.2495, "step": 291 }, { - "epoch": 0.008286038592508514, + "epoch": 0.008274533140638725, "grad_norm": 0.0, - "learning_rate": 5.519848771266542e-06, - "loss": 1.0359, + "learning_rate": 5.514636449480643e-06, + "loss": 1.2483, "step": 292 }, { - "epoch": 0.008314415437003406, + "epoch": 0.008302870582901188, "grad_norm": 0.0, - "learning_rate": 5.538752362948961e-06, - "loss": 1.1772, + "learning_rate": 5.533522190745987e-06, + "loss": 1.1586, "step": 293 }, { - "epoch": 0.008342792281498297, + "epoch": 0.008331208025163649, "grad_norm": 0.0, - "learning_rate": 5.55765595463138e-06, - "loss": 1.09, + "learning_rate": 5.552407932011332e-06, + "loss": 1.2206, "step": 294 }, { - "epoch": 0.00837116912599319, + "epoch": 0.00835954546742611, "grad_norm": 0.0, - "learning_rate": 5.5765595463138e-06, - "loss": 1.0872, + "learning_rate": 5.571293673276676e-06, + "loss": 1.1808, "step": 295 }, { - "epoch": 0.008399545970488082, + "epoch": 0.008387882909688572, "grad_norm": 0.0, - "learning_rate": 5.59546313799622e-06, - "loss": 1.107, + "learning_rate": 5.590179414542021e-06, + "loss": 1.1801, "step": 296 }, { - "epoch": 0.008427922814982974, + "epoch": 0.008416220351951033, "grad_norm": 0.0, - "learning_rate": 5.614366729678639e-06, - "loss": 1.0838, + "learning_rate": 5.609065155807366e-06, + "loss": 1.2302, "step": 297 }, { - "epoch": 0.008456299659477867, + "epoch": 0.008444557794213494, "grad_norm": 0.0, - "learning_rate": 5.63327032136106e-06, - "loss": 1.0784, + "learning_rate": 5.62795089707271e-06, + "loss": 1.1304, "step": 298 }, { - "epoch": 0.008484676503972758, + "epoch": 0.008472895236475956, "grad_norm": 0.0, - "learning_rate": 5.652173913043479e-06, - "loss": 1.0838, + "learning_rate": 5.646836638338056e-06, + "loss": 1.2464, "step": 299 }, { - "epoch": 0.00851305334846765, + "epoch": 0.008501232678738417, "grad_norm": 0.0, - "learning_rate": 5.671077504725898e-06, - "loss": 1.0481, + "learning_rate": 5.6657223796034e-06, + "loss": 1.2396, "step": 300 }, { - "epoch": 0.008541430192962543, + "epoch": 0.008529570121000878, "grad_norm": 0.0, - "learning_rate": 5.689981096408318e-06, - "loss": 1.0033, + "learning_rate": 5.6846081208687445e-06, + "loss": 1.0963, "step": 301 }, { - "epoch": 0.008569807037457435, + "epoch": 0.00855790756326334, "grad_norm": 0.0, - "learning_rate": 5.708884688090738e-06, - "loss": 0.9697, + "learning_rate": 5.703493862134089e-06, + "loss": 1.2228, "step": 302 }, { - "epoch": 0.008598183881952328, + "epoch": 0.008586245005525801, "grad_norm": 0.0, - "learning_rate": 5.727788279773157e-06, - "loss": 1.1099, + "learning_rate": 5.722379603399434e-06, + "loss": 1.2766, "step": 303 }, { - "epoch": 0.008626560726447218, + "epoch": 0.008614582447788262, "grad_norm": 0.0, - "learning_rate": 5.746691871455578e-06, - "loss": 1.0736, + "learning_rate": 5.741265344664778e-06, + "loss": 1.1713, "step": 304 }, { - "epoch": 0.008654937570942111, + "epoch": 0.008642919890050725, "grad_norm": 0.0, - "learning_rate": 5.7655954631379966e-06, - "loss": 1.1558, + "learning_rate": 5.760151085930123e-06, + "loss": 1.2592, "step": 305 }, { - "epoch": 0.008683314415437004, + "epoch": 0.008671257332313186, "grad_norm": 0.0, - "learning_rate": 5.784499054820416e-06, - "loss": 1.1485, + "learning_rate": 5.779036827195468e-06, + "loss": 1.2694, "step": 306 }, { - "epoch": 0.008711691259931896, + "epoch": 0.008699594774575646, "grad_norm": 0.0, - "learning_rate": 5.803402646502836e-06, - "loss": 1.0186, + "learning_rate": 5.797922568460812e-06, + "loss": 1.2725, "step": 307 }, { - "epoch": 0.008740068104426789, + "epoch": 0.008727932216838109, "grad_norm": 0.0, - "learning_rate": 5.8223062381852555e-06, - "loss": 1.083, + "learning_rate": 5.8168083097261574e-06, + "loss": 1.1114, "step": 308 }, { - "epoch": 0.00876844494892168, + "epoch": 0.00875626965910057, "grad_norm": 0.0, - "learning_rate": 5.841209829867675e-06, - "loss": 1.0646, + "learning_rate": 5.835694050991501e-06, + "loss": 1.1193, "step": 309 }, { - "epoch": 0.008796821793416572, + "epoch": 0.00878460710136303, "grad_norm": 0.0, - "learning_rate": 5.860113421550096e-06, - "loss": 1.1739, + "learning_rate": 5.854579792256847e-06, + "loss": 1.1685, "step": 310 }, { - "epoch": 0.008825198637911464, + "epoch": 0.008812944543625493, "grad_norm": 0.0, - "learning_rate": 5.8790170132325145e-06, - "loss": 1.0208, + "learning_rate": 5.8734655335221916e-06, + "loss": 1.1501, "step": 311 }, { - "epoch": 0.008853575482406357, + "epoch": 0.008841281985887954, "grad_norm": 0.0, - "learning_rate": 5.897920604914934e-06, - "loss": 1.1558, + "learning_rate": 5.892351274787535e-06, + "loss": 1.1719, "step": 312 }, { - "epoch": 0.00888195232690125, + "epoch": 0.008869619428150415, "grad_norm": 0.0, - "learning_rate": 5.916824196597354e-06, - "loss": 1.0154, + "learning_rate": 5.911237016052881e-06, + "loss": 1.0672, "step": 313 }, { - "epoch": 0.00891032917139614, + "epoch": 0.008897956870412876, "grad_norm": 0.0, - "learning_rate": 5.9357277882797735e-06, - "loss": 1.1487, + "learning_rate": 5.930122757318225e-06, + "loss": 1.2072, "step": 314 }, { - "epoch": 0.008938706015891033, + "epoch": 0.008926294312675338, "grad_norm": 0.0, - "learning_rate": 5.954631379962193e-06, - "loss": 1.1968, + "learning_rate": 5.9490084985835695e-06, + "loss": 1.2041, "step": 315 }, { - "epoch": 0.008967082860385925, + "epoch": 0.008954631754937799, "grad_norm": 0.0, - "learning_rate": 5.973534971644614e-06, - "loss": 1.0932, + "learning_rate": 5.967894239848915e-06, + "loss": 1.1723, "step": 316 }, { - "epoch": 0.008995459704880818, + "epoch": 0.00898296919720026, "grad_norm": 0.0, - "learning_rate": 5.9924385633270325e-06, - "loss": 1.0035, + "learning_rate": 5.986779981114259e-06, + "loss": 1.0968, "step": 317 }, { - "epoch": 0.00902383654937571, + "epoch": 0.009011306639462723, "grad_norm": 0.0, - "learning_rate": 6.011342155009452e-06, - "loss": 1.1041, + "learning_rate": 6.0056657223796045e-06, + "loss": 1.2642, "step": 318 }, { - "epoch": 0.009052213393870601, + "epoch": 0.009039644081725183, "grad_norm": 0.0, - "learning_rate": 6.030245746691872e-06, - "loss": 1.0483, + "learning_rate": 6.024551463644948e-06, + "loss": 1.2031, "step": 319 }, { - "epoch": 0.009080590238365494, + "epoch": 0.009067981523987644, "grad_norm": 0.0, - "learning_rate": 6.0491493383742914e-06, - "loss": 1.109, + "learning_rate": 6.043437204910293e-06, + "loss": 1.2073, "step": 320 }, { - "epoch": 0.009108967082860386, + "epoch": 0.009096318966250107, "grad_norm": 0.0, - "learning_rate": 6.068052930056711e-06, - "loss": 1.1079, + "learning_rate": 6.062322946175639e-06, + "loss": 1.2341, "step": 321 }, { - "epoch": 0.009137343927355279, + "epoch": 0.009124656408512568, "grad_norm": 0.0, - "learning_rate": 6.086956521739132e-06, - "loss": 1.0806, + "learning_rate": 6.0812086874409825e-06, + "loss": 1.2157, "step": 322 }, { - "epoch": 0.009165720771850171, + "epoch": 0.009152993850775028, "grad_norm": 0.0, - "learning_rate": 6.10586011342155e-06, - "loss": 1.0574, + "learning_rate": 6.100094428706327e-06, + "loss": 1.2114, "step": 323 }, { - "epoch": 0.009194097616345062, + "epoch": 0.009181331293037491, "grad_norm": 0.0, - "learning_rate": 6.12476370510397e-06, - "loss": 1.0582, + "learning_rate": 6.118980169971672e-06, + "loss": 1.2593, "step": 324 }, { - "epoch": 0.009222474460839954, + "epoch": 0.009209668735299952, "grad_norm": 0.0, - "learning_rate": 6.14366729678639e-06, - "loss": 1.0743, + "learning_rate": 6.137865911237017e-06, + "loss": 1.1956, "step": 325 }, { - "epoch": 0.009250851305334847, + "epoch": 0.009238006177562413, "grad_norm": 0.0, - "learning_rate": 6.162570888468809e-06, - "loss": 1.0721, + "learning_rate": 6.1567516525023604e-06, + "loss": 1.144, "step": 326 }, { - "epoch": 0.00927922814982974, + "epoch": 0.009266343619824875, "grad_norm": 0.0, - "learning_rate": 6.181474480151229e-06, - "loss": 1.0522, + "learning_rate": 6.175637393767706e-06, + "loss": 1.2298, "step": 327 }, { - "epoch": 0.009307604994324632, + "epoch": 0.009294681062087336, "grad_norm": 0.0, - "learning_rate": 6.2003780718336495e-06, - "loss": 1.1191, + "learning_rate": 6.194523135033051e-06, + "loss": 1.2527, "step": 328 }, { - "epoch": 0.009335981838819523, + "epoch": 0.009323018504349797, "grad_norm": 0.0, - "learning_rate": 6.219281663516069e-06, - "loss": 1.1987, + "learning_rate": 6.213408876298395e-06, + "loss": 1.1456, "step": 329 }, { - "epoch": 0.009364358683314415, + "epoch": 0.00935135594661226, "grad_norm": 0.0, - "learning_rate": 6.238185255198488e-06, - "loss": 1.1167, + "learning_rate": 6.23229461756374e-06, + "loss": 1.0806, "step": 330 }, { - "epoch": 0.009392735527809308, + "epoch": 0.00937969338887472, "grad_norm": 0.0, - "learning_rate": 6.257088846880908e-06, - "loss": 1.0319, + "learning_rate": 6.251180358829084e-06, + "loss": 1.1435, "step": 331 }, { - "epoch": 0.0094211123723042, + "epoch": 0.009408030831137181, "grad_norm": 0.0, - "learning_rate": 6.275992438563327e-06, - "loss": 1.0634, + "learning_rate": 6.2700661000944295e-06, + "loss": 1.1483, "step": 332 }, { - "epoch": 0.009449489216799093, + "epoch": 0.009436368273399644, "grad_norm": 0.0, - "learning_rate": 6.294896030245747e-06, - "loss": 1.1193, + "learning_rate": 6.288951841359774e-06, + "loss": 1.233, "step": 333 }, { - "epoch": 0.009477866061293983, + "epoch": 0.009464705715662105, "grad_norm": 0.0, - "learning_rate": 6.3137996219281675e-06, - "loss": 1.0862, + "learning_rate": 6.307837582625118e-06, + "loss": 1.2014, "step": 334 }, { - "epoch": 0.009506242905788876, + "epoch": 0.009493043157924565, "grad_norm": 0.0, - "learning_rate": 6.332703213610587e-06, - "loss": 1.1658, + "learning_rate": 6.326723323890464e-06, + "loss": 1.2161, "step": 335 }, { - "epoch": 0.009534619750283769, + "epoch": 0.009521380600187028, "grad_norm": 0.0, - "learning_rate": 6.351606805293006e-06, - "loss": 1.189, + "learning_rate": 6.3456090651558075e-06, + "loss": 1.1541, "step": 336 }, { - "epoch": 0.009562996594778661, + "epoch": 0.009549718042449489, "grad_norm": 0.0, - "learning_rate": 6.370510396975426e-06, - "loss": 1.0882, + "learning_rate": 6.364494806421152e-06, + "loss": 1.1417, "step": 337 }, { - "epoch": 0.009591373439273554, + "epoch": 0.00957805548471195, "grad_norm": 0.0, - "learning_rate": 6.389413988657845e-06, - "loss": 1.1045, + "learning_rate": 6.383380547686498e-06, + "loss": 1.2574, "step": 338 }, { - "epoch": 0.009619750283768444, + "epoch": 0.00960639292697441, "grad_norm": 0.0, - "learning_rate": 6.408317580340265e-06, - "loss": 1.1162, + "learning_rate": 6.402266288951842e-06, + "loss": 1.2659, "step": 339 }, { - "epoch": 0.009648127128263337, + "epoch": 0.009634730369236873, "grad_norm": 0.0, - "learning_rate": 6.4272211720226854e-06, - "loss": 1.0182, + "learning_rate": 6.421152030217187e-06, + "loss": 1.1527, "step": 340 }, { - "epoch": 0.00967650397275823, + "epoch": 0.009663067811499334, "grad_norm": 0.0, - "learning_rate": 6.446124763705105e-06, - "loss": 1.0938, + "learning_rate": 6.440037771482531e-06, + "loss": 1.1712, "step": 341 }, { - "epoch": 0.009704880817253122, + "epoch": 0.009691405253761795, "grad_norm": 0.0, - "learning_rate": 6.465028355387524e-06, - "loss": 1.1387, + "learning_rate": 6.458923512747876e-06, + "loss": 1.0903, "step": 342 }, { - "epoch": 0.009733257661748014, + "epoch": 0.009719742696024257, "grad_norm": 0.0, - "learning_rate": 6.4839319470699436e-06, - "loss": 1.0679, + "learning_rate": 6.477809254013221e-06, + "loss": 1.1907, "step": 343 }, { - "epoch": 0.009761634506242905, + "epoch": 0.009748080138286718, "grad_norm": 0.0, - "learning_rate": 6.502835538752363e-06, - "loss": 1.0858, + "learning_rate": 6.496694995278565e-06, + "loss": 1.1501, "step": 344 }, { - "epoch": 0.009790011350737798, + "epoch": 0.009776417580549179, "grad_norm": 0.0, - "learning_rate": 6.521739130434783e-06, - "loss": 1.0499, + "learning_rate": 6.51558073654391e-06, + "loss": 1.1877, "step": 345 }, { - "epoch": 0.00981838819523269, + "epoch": 0.009804755022811642, "grad_norm": 0.0, - "learning_rate": 6.540642722117203e-06, - "loss": 0.9648, + "learning_rate": 6.5344664778092546e-06, + "loss": 1.1413, "step": 346 }, { - "epoch": 0.009846765039727583, + "epoch": 0.009833092465074102, "grad_norm": 0.0, - "learning_rate": 6.559546313799623e-06, - "loss": 1.1292, + "learning_rate": 6.553352219074599e-06, + "loss": 1.1768, "step": 347 }, { - "epoch": 0.009875141884222475, + "epoch": 0.009861429907336563, "grad_norm": 0.0, - "learning_rate": 6.578449905482042e-06, - "loss": 1.0131, + "learning_rate": 6.572237960339945e-06, + "loss": 1.255, "step": 348 }, { - "epoch": 0.009903518728717366, + "epoch": 0.009889767349599026, "grad_norm": 0.0, - "learning_rate": 6.5973534971644615e-06, - "loss": 1.1511, + "learning_rate": 6.591123701605289e-06, + "loss": 1.167, "step": 349 }, { - "epoch": 0.009931895573212259, + "epoch": 0.009918104791861487, "grad_norm": 0.0, - "learning_rate": 6.616257088846881e-06, - "loss": 1.1293, + "learning_rate": 6.6100094428706325e-06, + "loss": 1.2211, "step": 350 }, { - "epoch": 0.009960272417707151, + "epoch": 0.009946442234123947, "grad_norm": 0.0, - "learning_rate": 6.635160680529301e-06, - "loss": 1.1228, + "learning_rate": 6.628895184135978e-06, + "loss": 1.1656, "step": 351 }, { - "epoch": 0.009988649262202044, + "epoch": 0.00997477967638641, "grad_norm": 0.0, - "learning_rate": 6.654064272211721e-06, - "loss": 1.0753, + "learning_rate": 6.647780925401323e-06, + "loss": 1.1995, "step": 352 }, { - "epoch": 0.010017026106696936, + "epoch": 0.01000311711864887, "grad_norm": 0.0, - "learning_rate": 6.672967863894141e-06, - "loss": 1.2051, + "learning_rate": 6.666666666666667e-06, + "loss": 1.1136, "step": 353 }, { - "epoch": 0.010045402951191827, + "epoch": 0.010031454560911332, "grad_norm": 0.0, - "learning_rate": 6.69187145557656e-06, - "loss": 0.9972, + "learning_rate": 6.685552407932012e-06, + "loss": 1.1492, "step": 354 }, { - "epoch": 0.01007377979568672, + "epoch": 0.010059792003173794, "grad_norm": 0.0, - "learning_rate": 6.7107750472589795e-06, - "loss": 1.0515, + "learning_rate": 6.704438149197356e-06, + "loss": 1.1657, "step": 355 }, { - "epoch": 0.010102156640181612, + "epoch": 0.010088129445436255, "grad_norm": 0.0, - "learning_rate": 6.729678638941399e-06, - "loss": 1.1081, + "learning_rate": 6.723323890462701e-06, + "loss": 1.2016, "step": 356 }, { - "epoch": 0.010130533484676504, + "epoch": 0.010116466887698716, "grad_norm": 0.0, - "learning_rate": 6.748582230623819e-06, - "loss": 1.1013, + "learning_rate": 6.742209631728046e-06, + "loss": 1.2551, "step": 357 }, { - "epoch": 0.010158910329171397, + "epoch": 0.010144804329961178, "grad_norm": 0.0, - "learning_rate": 6.767485822306239e-06, - "loss": 0.9883, + "learning_rate": 6.76109537299339e-06, + "loss": 1.1411, "step": 358 }, { - "epoch": 0.010187287173666288, + "epoch": 0.01017314177222364, "grad_norm": 0.0, - "learning_rate": 6.786389413988659e-06, - "loss": 1.0848, + "learning_rate": 6.779981114258736e-06, + "loss": 1.0932, "step": 359 }, { - "epoch": 0.01021566401816118, + "epoch": 0.0102014792144861, "grad_norm": 0.0, - "learning_rate": 6.805293005671078e-06, - "loss": 1.1311, + "learning_rate": 6.79886685552408e-06, + "loss": 1.2139, "step": 360 }, { - "epoch": 0.010244040862656073, + "epoch": 0.010229816656748563, "grad_norm": 0.0, - "learning_rate": 6.824196597353497e-06, - "loss": 1.182, + "learning_rate": 6.817752596789424e-06, + "loss": 1.1497, "step": 361 }, { - "epoch": 0.010272417707150965, + "epoch": 0.010258154099011024, "grad_norm": 0.0, - "learning_rate": 6.843100189035917e-06, - "loss": 1.115, + "learning_rate": 6.83663833805477e-06, + "loss": 1.1794, "step": 362 }, { - "epoch": 0.010300794551645858, + "epoch": 0.010286491541273484, "grad_norm": 0.0, - "learning_rate": 6.862003780718337e-06, - "loss": 1.1503, + "learning_rate": 6.855524079320114e-06, + "loss": 1.1115, "step": 363 }, { - "epoch": 0.010329171396140749, + "epoch": 0.010314828983535945, "grad_norm": 0.0, - "learning_rate": 6.880907372400757e-06, - "loss": 1.1635, + "learning_rate": 6.874409820585458e-06, + "loss": 1.2964, "step": 364 }, { - "epoch": 0.010357548240635641, + "epoch": 0.010343166425798408, "grad_norm": 0.0, - "learning_rate": 6.899810964083177e-06, - "loss": 1.1221, + "learning_rate": 6.893295561850803e-06, + "loss": 1.1651, "step": 365 }, { - "epoch": 0.010385925085130534, + "epoch": 0.010371503868060869, "grad_norm": 0.0, - "learning_rate": 6.918714555765596e-06, - "loss": 1.083, + "learning_rate": 6.912181303116148e-06, + "loss": 1.3035, "step": 366 }, { - "epoch": 0.010414301929625426, + "epoch": 0.01039984131032333, "grad_norm": 0.0, - "learning_rate": 6.937618147448015e-06, - "loss": 0.9808, + "learning_rate": 6.931067044381492e-06, + "loss": 1.1905, "step": 367 }, { - "epoch": 0.010442678774120319, + "epoch": 0.010428178752585792, "grad_norm": 0.0, - "learning_rate": 6.956521739130435e-06, - "loss": 1.0874, + "learning_rate": 6.949952785646837e-06, + "loss": 1.188, "step": 368 }, { - "epoch": 0.01047105561861521, + "epoch": 0.010456516194848253, "grad_norm": 0.0, - "learning_rate": 6.975425330812855e-06, - "loss": 1.0268, + "learning_rate": 6.968838526912182e-06, + "loss": 1.073, "step": 369 }, { - "epoch": 0.010499432463110102, + "epoch": 0.010484853637110714, "grad_norm": 0.0, - "learning_rate": 6.994328922495275e-06, - "loss": 1.0733, + "learning_rate": 6.987724268177527e-06, + "loss": 1.155, "step": 370 }, { - "epoch": 0.010527809307604994, + "epoch": 0.010513191079373176, "grad_norm": 0.0, - "learning_rate": 7.013232514177695e-06, - "loss": 1.1044, + "learning_rate": 7.006610009442871e-06, + "loss": 1.2005, "step": 371 }, { - "epoch": 0.010556186152099887, + "epoch": 0.010541528521635637, "grad_norm": 0.0, - "learning_rate": 7.0321361058601145e-06, - "loss": 1.1287, + "learning_rate": 7.025495750708215e-06, + "loss": 1.14, "step": 372 }, { - "epoch": 0.01058456299659478, + "epoch": 0.010569865963898098, "grad_norm": 0.0, - "learning_rate": 7.051039697542533e-06, - "loss": 1.0375, + "learning_rate": 7.044381491973561e-06, + "loss": 1.2338, "step": 373 }, { - "epoch": 0.01061293984108967, + "epoch": 0.01059820340616056, "grad_norm": 0.0, - "learning_rate": 7.069943289224953e-06, - "loss": 1.1217, + "learning_rate": 7.0632672332389055e-06, + "loss": 1.1942, "step": 374 }, { - "epoch": 0.010641316685584563, + "epoch": 0.010626540848423021, "grad_norm": 0.0, - "learning_rate": 7.088846880907373e-06, - "loss": 1.1165, + "learning_rate": 7.082152974504249e-06, + "loss": 1.2418, "step": 375 }, { - "epoch": 0.010669693530079455, + "epoch": 0.010654878290685482, "grad_norm": 0.0, - "learning_rate": 7.107750472589793e-06, - "loss": 1.0785, + "learning_rate": 7.101038715769595e-06, + "loss": 1.2633, "step": 376 }, { - "epoch": 0.010698070374574348, + "epoch": 0.010683215732947945, "grad_norm": 0.0, - "learning_rate": 7.126654064272213e-06, - "loss": 0.9635, + "learning_rate": 7.119924457034939e-06, + "loss": 1.2393, "step": 377 }, { - "epoch": 0.01072644721906924, + "epoch": 0.010711553175210406, "grad_norm": 0.0, - "learning_rate": 7.1455576559546324e-06, - "loss": 1.1927, + "learning_rate": 7.1388101983002834e-06, + "loss": 1.2675, "step": 378 }, { - "epoch": 0.010754824063564131, + "epoch": 0.010739890617472866, "grad_norm": 0.0, - "learning_rate": 7.164461247637051e-06, - "loss": 1.0847, + "learning_rate": 7.157695939565629e-06, + "loss": 1.138, "step": 379 }, { - "epoch": 0.010783200908059024, + "epoch": 0.010768228059735329, "grad_norm": 0.0, - "learning_rate": 7.183364839319471e-06, - "loss": 1.101, + "learning_rate": 7.176581680830973e-06, + "loss": 1.126, "step": 380 }, { - "epoch": 0.010811577752553916, + "epoch": 0.01079656550199779, "grad_norm": 0.0, - "learning_rate": 7.2022684310018906e-06, - "loss": 1.1551, + "learning_rate": 7.195467422096318e-06, + "loss": 1.0874, "step": 381 }, { - "epoch": 0.010839954597048809, + "epoch": 0.01082490294426025, "grad_norm": 0.0, - "learning_rate": 7.221172022684311e-06, - "loss": 1.1528, + "learning_rate": 7.214353163361662e-06, + "loss": 1.2328, "step": 382 }, { - "epoch": 0.010868331441543701, + "epoch": 0.010853240386522713, "grad_norm": 0.0, - "learning_rate": 7.240075614366731e-06, - "loss": 1.1289, + "learning_rate": 7.233238904627007e-06, + "loss": 1.1919, "step": 383 }, { - "epoch": 0.010896708286038592, + "epoch": 0.010881577828785174, "grad_norm": 0.0, - "learning_rate": 7.25897920604915e-06, - "loss": 1.0753, + "learning_rate": 7.2521246458923525e-06, + "loss": 1.2048, "step": 384 }, { - "epoch": 0.010925085130533484, + "epoch": 0.010909915271047635, "grad_norm": 0.0, - "learning_rate": 7.277882797731569e-06, - "loss": 1.02, + "learning_rate": 7.271010387157696e-06, + "loss": 1.1932, "step": 385 }, { - "epoch": 0.010953461975028377, + "epoch": 0.010938252713310097, "grad_norm": 0.0, - "learning_rate": 7.296786389413989e-06, - "loss": 1.0767, + "learning_rate": 7.289896128423041e-06, + "loss": 1.1366, "step": 386 }, { - "epoch": 0.01098183881952327, + "epoch": 0.010966590155572558, "grad_norm": 0.0, - "learning_rate": 7.3156899810964085e-06, - "loss": 1.037, + "learning_rate": 7.308781869688386e-06, + "loss": 1.0938, "step": 387 }, { - "epoch": 0.011010215664018162, + "epoch": 0.01099492759783502, "grad_norm": 0.0, - "learning_rate": 7.334593572778829e-06, - "loss": 1.2678, + "learning_rate": 7.3276676109537305e-06, + "loss": 1.1037, "step": 388 }, { - "epoch": 0.011038592508513053, + "epoch": 0.01102326504009748, "grad_norm": 0.0, - "learning_rate": 7.353497164461249e-06, - "loss": 1.0829, + "learning_rate": 7.346553352219076e-06, + "loss": 1.0821, "step": 389 }, { - "epoch": 0.011066969353007945, + "epoch": 0.011051602482359943, "grad_norm": 0.0, - "learning_rate": 7.372400756143668e-06, - "loss": 1.1423, + "learning_rate": 7.36543909348442e-06, + "loss": 1.1518, "step": 390 }, { - "epoch": 0.011095346197502838, + "epoch": 0.011079939924622403, "grad_norm": 0.0, - "learning_rate": 7.391304347826087e-06, - "loss": 1.0005, + "learning_rate": 7.384324834749765e-06, + "loss": 1.2915, "step": 391 }, { - "epoch": 0.01112372304199773, + "epoch": 0.011108277366884864, "grad_norm": 0.0, - "learning_rate": 7.410207939508507e-06, - "loss": 1.0382, + "learning_rate": 7.403210576015109e-06, + "loss": 1.2028, "step": 392 }, { - "epoch": 0.011152099886492623, + "epoch": 0.011136614809147327, "grad_norm": 0.0, - "learning_rate": 7.4291115311909265e-06, - "loss": 1.0858, + "learning_rate": 7.422096317280454e-06, + "loss": 1.2163, "step": 393 }, { - "epoch": 0.011180476730987514, + "epoch": 0.011164952251409788, "grad_norm": 0.0, - "learning_rate": 7.448015122873347e-06, - "loss": 1.0151, + "learning_rate": 7.440982058545798e-06, + "loss": 1.1524, "step": 394 }, { - "epoch": 0.011208853575482406, + "epoch": 0.011193289693672248, "grad_norm": 0.0, - "learning_rate": 7.466918714555767e-06, - "loss": 1.1294, + "learning_rate": 7.4598677998111434e-06, + "loss": 1.1852, "step": 395 }, { - "epoch": 0.011237230419977299, + "epoch": 0.011221627135934711, "grad_norm": 0.0, - "learning_rate": 7.485822306238186e-06, - "loss": 1.1019, + "learning_rate": 7.478753541076488e-06, + "loss": 1.0468, "step": 396 }, { - "epoch": 0.011265607264472191, + "epoch": 0.011249964578197172, "grad_norm": 0.0, - "learning_rate": 7.504725897920605e-06, - "loss": 1.0646, + "learning_rate": 7.497639282341832e-06, + "loss": 1.1896, "step": 397 }, { - "epoch": 0.011293984108967084, + "epoch": 0.011278302020459633, "grad_norm": 0.0, - "learning_rate": 7.523629489603025e-06, - "loss": 1.1392, + "learning_rate": 7.5165250236071775e-06, + "loss": 1.231, "step": 398 }, { - "epoch": 0.011322360953461974, + "epoch": 0.011306639462722095, "grad_norm": 0.0, - "learning_rate": 7.542533081285444e-06, - "loss": 1.01, + "learning_rate": 7.535410764872521e-06, + "loss": 1.1369, "step": 399 }, { - "epoch": 0.011350737797956867, + "epoch": 0.011334976904984556, "grad_norm": 0.0, - "learning_rate": 7.561436672967865e-06, - "loss": 1.1423, + "learning_rate": 7.554296506137867e-06, + "loss": 1.2664, "step": 400 }, { - "epoch": 0.01137911464245176, + "epoch": 0.011363314347247017, "grad_norm": 0.0, - "learning_rate": 7.580340264650285e-06, - "loss": 0.9815, + "learning_rate": 7.573182247403212e-06, + "loss": 1.2499, "step": 401 }, { - "epoch": 0.011407491486946652, + "epoch": 0.01139165178950948, "grad_norm": 0.0, - "learning_rate": 7.599243856332704e-06, - "loss": 1.1646, + "learning_rate": 7.5920679886685555e-06, + "loss": 1.1228, "step": 402 }, { - "epoch": 0.011435868331441544, + "epoch": 0.01141998923177194, "grad_norm": 0.0, - "learning_rate": 7.618147448015123e-06, - "loss": 1.092, + "learning_rate": 7.610953729933901e-06, + "loss": 1.2271, "step": 403 }, { - "epoch": 0.011464245175936435, + "epoch": 0.011448326674034401, "grad_norm": 0.0, - "learning_rate": 7.637051039697544e-06, - "loss": 1.1157, + "learning_rate": 7.629839471199246e-06, + "loss": 1.2048, "step": 404 }, { - "epoch": 0.011492622020431328, + "epoch": 0.011476664116296864, "grad_norm": 0.0, - "learning_rate": 7.655954631379963e-06, - "loss": 1.081, + "learning_rate": 7.64872521246459e-06, + "loss": 1.2071, "step": 405 }, { - "epoch": 0.01152099886492622, + "epoch": 0.011505001558559325, "grad_norm": 0.0, - "learning_rate": 7.674858223062383e-06, - "loss": 1.0903, + "learning_rate": 7.667610953729935e-06, + "loss": 1.2441, "step": 406 }, { - "epoch": 0.011549375709421113, + "epoch": 0.011533339000821785, "grad_norm": 0.0, - "learning_rate": 7.693761814744803e-06, - "loss": 1.1231, + "learning_rate": 7.686496694995279e-06, + "loss": 1.1868, "step": 407 }, { - "epoch": 0.011577752553916005, + "epoch": 0.011561676443084248, "grad_norm": 0.0, - "learning_rate": 7.712665406427222e-06, - "loss": 1.0812, + "learning_rate": 7.705382436260623e-06, + "loss": 1.2624, "step": 408 }, { - "epoch": 0.011606129398410896, + "epoch": 0.011590013885346709, "grad_norm": 0.0, - "learning_rate": 7.731568998109642e-06, - "loss": 1.1128, + "learning_rate": 7.724268177525968e-06, + "loss": 1.3582, "step": 409 }, { - "epoch": 0.011634506242905789, + "epoch": 0.01161835132760917, "grad_norm": 0.0, - "learning_rate": 7.750472589792062e-06, - "loss": 1.0083, + "learning_rate": 7.743153918791312e-06, + "loss": 1.2122, "step": 410 }, { - "epoch": 0.011662883087400681, + "epoch": 0.011646688769871632, "grad_norm": 0.0, - "learning_rate": 7.769376181474481e-06, - "loss": 1.0358, + "learning_rate": 7.762039660056658e-06, + "loss": 1.2421, "step": 411 }, { - "epoch": 0.011691259931895574, + "epoch": 0.011675026212134093, "grad_norm": 0.0, - "learning_rate": 7.7882797731569e-06, - "loss": 1.1496, + "learning_rate": 7.780925401322003e-06, + "loss": 1.1799, "step": 412 }, { - "epoch": 0.011719636776390466, + "epoch": 0.011703363654396554, "grad_norm": 0.0, - "learning_rate": 7.80718336483932e-06, - "loss": 1.1403, + "learning_rate": 7.799811142587347e-06, + "loss": 1.1546, "step": 413 }, { - "epoch": 0.011748013620885357, + "epoch": 0.011731701096659015, "grad_norm": 0.0, - "learning_rate": 7.82608695652174e-06, - "loss": 1.0926, + "learning_rate": 7.818696883852693e-06, + "loss": 1.0656, "step": 414 }, { - "epoch": 0.01177639046538025, + "epoch": 0.011760038538921477, "grad_norm": 0.0, - "learning_rate": 7.84499054820416e-06, - "loss": 1.1217, + "learning_rate": 7.837582625118037e-06, + "loss": 1.2553, "step": 415 }, { - "epoch": 0.011804767309875142, + "epoch": 0.011788375981183938, "grad_norm": 0.0, - "learning_rate": 7.86389413988658e-06, - "loss": 1.0541, + "learning_rate": 7.85646836638338e-06, + "loss": 1.194, "step": 416 }, { - "epoch": 0.011833144154370034, + "epoch": 0.011816713423446399, "grad_norm": 0.0, - "learning_rate": 7.882797731568999e-06, - "loss": 1.1663, + "learning_rate": 7.875354107648726e-06, + "loss": 1.1502, "step": 417 }, { - "epoch": 0.011861520998864927, + "epoch": 0.011845050865708862, "grad_norm": 0.0, - "learning_rate": 7.901701323251419e-06, - "loss": 1.1205, + "learning_rate": 7.89423984891407e-06, + "loss": 1.1394, "step": 418 }, { - "epoch": 0.011889897843359818, + "epoch": 0.011873388307971322, "grad_norm": 0.0, - "learning_rate": 7.920604914933838e-06, - "loss": 1.0402, + "learning_rate": 7.913125590179416e-06, + "loss": 1.2711, "step": 419 }, { - "epoch": 0.01191827468785471, + "epoch": 0.011901725750233783, "grad_norm": 0.0, - "learning_rate": 7.939508506616258e-06, - "loss": 0.9959, + "learning_rate": 7.93201133144476e-06, + "loss": 1.3054, "step": 420 }, { - "epoch": 0.011946651532349603, + "epoch": 0.011930063192496246, "grad_norm": 0.0, - "learning_rate": 7.958412098298678e-06, - "loss": 0.9762, + "learning_rate": 7.950897072710105e-06, + "loss": 1.2083, "step": 421 }, { - "epoch": 0.011975028376844495, + "epoch": 0.011958400634758707, "grad_norm": 0.0, - "learning_rate": 7.977315689981097e-06, - "loss": 1.0894, + "learning_rate": 7.969782813975449e-06, + "loss": 1.223, "step": 422 }, { - "epoch": 0.012003405221339388, + "epoch": 0.011986738077021167, "grad_norm": 0.0, - "learning_rate": 7.996219281663517e-06, - "loss": 1.1104, + "learning_rate": 7.988668555240794e-06, + "loss": 1.0549, "step": 423 }, { - "epoch": 0.012031782065834279, + "epoch": 0.01201507551928363, "grad_norm": 0.0, - "learning_rate": 8.015122873345937e-06, - "loss": 1.1134, + "learning_rate": 8.007554296506138e-06, + "loss": 1.2508, "step": 424 }, { - "epoch": 0.012060158910329171, + "epoch": 0.012043412961546091, "grad_norm": 0.0, - "learning_rate": 8.034026465028356e-06, - "loss": 1.1134, + "learning_rate": 8.026440037771484e-06, + "loss": 1.0093, "step": 425 }, { - "epoch": 0.012088535754824064, + "epoch": 0.012071750403808552, "grad_norm": 0.0, - "learning_rate": 8.052930056710776e-06, - "loss": 1.1705, + "learning_rate": 8.045325779036828e-06, + "loss": 1.2766, "step": 426 }, { - "epoch": 0.012116912599318956, + "epoch": 0.012100087846071014, "grad_norm": 0.0, - "learning_rate": 8.071833648393196e-06, - "loss": 1.0048, + "learning_rate": 8.064211520302171e-06, + "loss": 1.0249, "step": 427 }, { - "epoch": 0.012145289443813849, + "epoch": 0.012128425288333475, "grad_norm": 0.0, - "learning_rate": 8.090737240075615e-06, - "loss": 1.0726, + "learning_rate": 8.083097261567517e-06, + "loss": 1.2846, "step": 428 }, { - "epoch": 0.01217366628830874, + "epoch": 0.012156762730595936, "grad_norm": 0.0, - "learning_rate": 8.109640831758035e-06, - "loss": 1.1926, + "learning_rate": 8.101983002832861e-06, + "loss": 1.2424, "step": 429 }, { - "epoch": 0.012202043132803632, + "epoch": 0.012185100172858399, "grad_norm": 0.0, - "learning_rate": 8.128544423440455e-06, - "loss": 1.0779, + "learning_rate": 8.120868744098206e-06, + "loss": 1.1846, "step": 430 }, { - "epoch": 0.012230419977298524, + "epoch": 0.01221343761512086, "grad_norm": 0.0, - "learning_rate": 8.147448015122874e-06, - "loss": 1.1087, + "learning_rate": 8.139754485363552e-06, + "loss": 1.1193, "step": 431 }, { - "epoch": 0.012258796821793417, + "epoch": 0.01224177505738332, "grad_norm": 0.0, - "learning_rate": 8.166351606805294e-06, - "loss": 1.1183, + "learning_rate": 8.158640226628896e-06, + "loss": 1.1547, "step": 432 }, { - "epoch": 0.01228717366628831, + "epoch": 0.012270112499645783, "grad_norm": 0.0, - "learning_rate": 8.185255198487714e-06, - "loss": 1.2776, + "learning_rate": 8.177525967894241e-06, + "loss": 1.0895, "step": 433 }, { - "epoch": 0.0123155505107832, + "epoch": 0.012298449941908244, "grad_norm": 0.0, - "learning_rate": 8.204158790170133e-06, - "loss": 0.9655, + "learning_rate": 8.196411709159585e-06, + "loss": 1.0972, "step": 434 }, { - "epoch": 0.012343927355278093, + "epoch": 0.012326787384170704, "grad_norm": 0.0, - "learning_rate": 8.223062381852553e-06, - "loss": 1.0998, + "learning_rate": 8.215297450424929e-06, + "loss": 1.1428, "step": 435 }, { - "epoch": 0.012372304199772985, + "epoch": 0.012355124826433167, "grad_norm": 0.0, - "learning_rate": 8.241965973534973e-06, - "loss": 1.1248, + "learning_rate": 8.234183191690275e-06, + "loss": 1.1477, "step": 436 }, { - "epoch": 0.012400681044267878, + "epoch": 0.012383462268695628, "grad_norm": 0.0, - "learning_rate": 8.260869565217392e-06, - "loss": 1.1497, + "learning_rate": 8.253068932955619e-06, + "loss": 1.2009, "step": 437 }, { - "epoch": 0.01242905788876277, + "epoch": 0.012411799710958089, "grad_norm": 0.0, - "learning_rate": 8.279773156899812e-06, - "loss": 1.0568, + "learning_rate": 8.271954674220964e-06, + "loss": 1.2119, "step": 438 }, { - "epoch": 0.012457434733257661, + "epoch": 0.01244013715322055, "grad_norm": 0.0, - "learning_rate": 8.298676748582232e-06, - "loss": 1.0396, + "learning_rate": 8.290840415486308e-06, + "loss": 1.21, "step": 439 }, { - "epoch": 0.012485811577752554, + "epoch": 0.012468474595483012, "grad_norm": 0.0, - "learning_rate": 8.317580340264651e-06, - "loss": 1.096, + "learning_rate": 8.309726156751653e-06, + "loss": 1.1614, "step": 440 }, { - "epoch": 0.012514188422247446, + "epoch": 0.012496812037745473, "grad_norm": 0.0, - "learning_rate": 8.336483931947071e-06, - "loss": 1.109, + "learning_rate": 8.328611898016999e-06, + "loss": 1.3113, "step": 441 }, { - "epoch": 0.012542565266742339, + "epoch": 0.012525149480007934, "grad_norm": 0.0, - "learning_rate": 8.35538752362949e-06, - "loss": 1.0496, + "learning_rate": 8.347497639282343e-06, + "loss": 1.1668, "step": 442 }, { - "epoch": 0.012570942111237231, + "epoch": 0.012553486922270396, "grad_norm": 0.0, - "learning_rate": 8.37429111531191e-06, - "loss": 1.0344, + "learning_rate": 8.366383380547687e-06, + "loss": 1.0338, "step": 443 }, { - "epoch": 0.012599318955732122, + "epoch": 0.012581824364532857, "grad_norm": 0.0, - "learning_rate": 8.39319470699433e-06, - "loss": 0.9294, + "learning_rate": 8.385269121813032e-06, + "loss": 1.1761, "step": 444 }, { - "epoch": 0.012627695800227014, + "epoch": 0.012610161806795318, "grad_norm": 0.0, - "learning_rate": 8.41209829867675e-06, - "loss": 1.0494, + "learning_rate": 8.404154863078376e-06, + "loss": 1.1943, "step": 445 }, { - "epoch": 0.012656072644721907, + "epoch": 0.01263849924905778, "grad_norm": 0.0, - "learning_rate": 8.43100189035917e-06, - "loss": 0.9863, + "learning_rate": 8.42304060434372e-06, + "loss": 1.2905, "step": 446 }, { - "epoch": 0.0126844494892168, + "epoch": 0.012666836691320241, "grad_norm": 0.0, - "learning_rate": 8.449905482041589e-06, - "loss": 1.136, + "learning_rate": 8.441926345609066e-06, + "loss": 1.1436, "step": 447 }, { - "epoch": 0.012712826333711692, + "epoch": 0.012695174133582702, "grad_norm": 0.0, - "learning_rate": 8.468809073724009e-06, - "loss": 1.1558, + "learning_rate": 8.460812086874411e-06, + "loss": 1.1173, "step": 448 }, { - "epoch": 0.012741203178206583, + "epoch": 0.012723511575845165, "grad_norm": 0.0, - "learning_rate": 8.487712665406428e-06, - "loss": 1.1218, + "learning_rate": 8.479697828139755e-06, + "loss": 1.2465, "step": 449 }, { - "epoch": 0.012769580022701475, + "epoch": 0.012751849018107626, "grad_norm": 0.0, - "learning_rate": 8.506616257088848e-06, - "loss": 1.202, + "learning_rate": 8.4985835694051e-06, + "loss": 1.1374, "step": 450 }, { - "epoch": 0.012797956867196368, + "epoch": 0.012780186460370086, "grad_norm": 0.0, - "learning_rate": 8.525519848771267e-06, - "loss": 1.0699, + "learning_rate": 8.517469310670444e-06, + "loss": 1.2466, "step": 451 }, { - "epoch": 0.01282633371169126, + "epoch": 0.012808523902632549, "grad_norm": 0.0, - "learning_rate": 8.544423440453687e-06, - "loss": 0.9576, + "learning_rate": 8.53635505193579e-06, + "loss": 1.1504, "step": 452 }, { - "epoch": 0.012854710556186153, + "epoch": 0.01283686134489501, "grad_norm": 0.0, - "learning_rate": 8.563327032136107e-06, - "loss": 1.1714, + "learning_rate": 8.555240793201134e-06, + "loss": 1.1081, "step": 453 }, { - "epoch": 0.012883087400681044, + "epoch": 0.01286519878715747, "grad_norm": 0.0, - "learning_rate": 8.582230623818526e-06, - "loss": 1.0603, + "learning_rate": 8.574126534466478e-06, + "loss": 1.233, "step": 454 }, { - "epoch": 0.012911464245175936, + "epoch": 0.012893536229419933, "grad_norm": 0.0, - "learning_rate": 8.601134215500946e-06, - "loss": 1.1562, + "learning_rate": 8.593012275731823e-06, + "loss": 1.155, "step": 455 }, { - "epoch": 0.012939841089670829, + "epoch": 0.012921873671682394, "grad_norm": 0.0, - "learning_rate": 8.620037807183366e-06, - "loss": 1.1459, + "learning_rate": 8.611898016997167e-06, + "loss": 1.2705, "step": 456 }, { - "epoch": 0.012968217934165721, + "epoch": 0.012950211113944855, "grad_norm": 0.0, - "learning_rate": 8.638941398865785e-06, - "loss": 1.029, + "learning_rate": 8.630783758262513e-06, + "loss": 1.1406, "step": 457 }, { - "epoch": 0.012996594778660614, + "epoch": 0.012978548556207318, "grad_norm": 0.0, - "learning_rate": 8.657844990548205e-06, - "loss": 1.1277, + "learning_rate": 8.649669499527858e-06, + "loss": 1.1962, "step": 458 }, { - "epoch": 0.013024971623155504, + "epoch": 0.013006885998469778, "grad_norm": 0.0, - "learning_rate": 8.676748582230625e-06, - "loss": 1.1028, + "learning_rate": 8.668555240793202e-06, + "loss": 1.0623, "step": 459 }, { - "epoch": 0.013053348467650397, + "epoch": 0.01303522344073224, "grad_norm": 0.0, - "learning_rate": 8.695652173913044e-06, - "loss": 1.0356, + "learning_rate": 8.687440982058548e-06, + "loss": 1.1702, "step": 460 }, { - "epoch": 0.01308172531214529, + "epoch": 0.013063560882994702, "grad_norm": 0.0, - "learning_rate": 8.714555765595464e-06, - "loss": 1.1004, + "learning_rate": 8.706326723323891e-06, + "loss": 1.1767, "step": 461 }, { - "epoch": 0.013110102156640182, + "epoch": 0.013091898325257163, "grad_norm": 0.0, - "learning_rate": 8.733459357277884e-06, - "loss": 1.1775, + "learning_rate": 8.725212464589235e-06, + "loss": 1.101, "step": 462 }, { - "epoch": 0.013138479001135074, + "epoch": 0.013120235767519623, "grad_norm": 0.0, - "learning_rate": 8.752362948960303e-06, - "loss": 0.9222, + "learning_rate": 8.744098205854581e-06, + "loss": 1.2732, "step": 463 }, { - "epoch": 0.013166855845629965, + "epoch": 0.013148573209782084, "grad_norm": 0.0, - "learning_rate": 8.771266540642723e-06, - "loss": 1.1071, + "learning_rate": 8.762983947119925e-06, + "loss": 1.1124, "step": 464 }, { - "epoch": 0.013195232690124858, + "epoch": 0.013176910652044547, "grad_norm": 0.0, - "learning_rate": 8.790170132325143e-06, - "loss": 1.0911, + "learning_rate": 8.78186968838527e-06, + "loss": 1.1329, "step": 465 }, { - "epoch": 0.01322360953461975, + "epoch": 0.013205248094307008, "grad_norm": 0.0, - "learning_rate": 8.809073724007562e-06, - "loss": 1.1357, + "learning_rate": 8.800755429650614e-06, + "loss": 1.1687, "step": 466 }, { - "epoch": 0.013251986379114643, + "epoch": 0.013233585536569469, "grad_norm": 0.0, - "learning_rate": 8.827977315689982e-06, - "loss": 1.1003, + "learning_rate": 8.81964117091596e-06, + "loss": 1.2106, "step": 467 }, { - "epoch": 0.013280363223609535, + "epoch": 0.013261922978831931, "grad_norm": 0.0, - "learning_rate": 8.846880907372402e-06, - "loss": 1.1507, + "learning_rate": 8.838526912181304e-06, + "loss": 1.0714, "step": 468 }, { - "epoch": 0.013308740068104426, + "epoch": 0.013290260421094392, "grad_norm": 0.0, - "learning_rate": 8.865784499054821e-06, - "loss": 1.1572, + "learning_rate": 8.857412653446649e-06, + "loss": 1.217, "step": 469 }, { - "epoch": 0.013337116912599319, + "epoch": 0.013318597863356853, "grad_norm": 0.0, - "learning_rate": 8.884688090737241e-06, - "loss": 1.0898, + "learning_rate": 8.876298394711993e-06, + "loss": 1.1148, "step": 470 }, { - "epoch": 0.013365493757094211, + "epoch": 0.013346935305619315, "grad_norm": 0.0, - "learning_rate": 8.90359168241966e-06, - "loss": 1.0504, + "learning_rate": 8.895184135977339e-06, + "loss": 1.1125, "step": 471 }, { - "epoch": 0.013393870601589104, + "epoch": 0.013375272747881776, "grad_norm": 0.0, - "learning_rate": 8.92249527410208e-06, - "loss": 1.0406, + "learning_rate": 8.914069877242682e-06, + "loss": 1.2425, "step": 472 }, { - "epoch": 0.013422247446083996, + "epoch": 0.013403610190144237, "grad_norm": 0.0, - "learning_rate": 8.9413988657845e-06, - "loss": 0.9498, + "learning_rate": 8.932955618508026e-06, + "loss": 1.1992, "step": 473 }, { - "epoch": 0.013450624290578887, + "epoch": 0.0134319476324067, "grad_norm": 0.0, - "learning_rate": 8.96030245746692e-06, - "loss": 1.0526, + "learning_rate": 8.951841359773372e-06, + "loss": 1.2588, "step": 474 }, { - "epoch": 0.01347900113507378, + "epoch": 0.01346028507466916, "grad_norm": 0.0, - "learning_rate": 8.97920604914934e-06, - "loss": 1.0418, + "learning_rate": 8.970727101038716e-06, + "loss": 1.1115, "step": 475 }, { - "epoch": 0.013507377979568672, + "epoch": 0.013488622516931621, "grad_norm": 0.0, - "learning_rate": 8.998109640831759e-06, - "loss": 1.1428, + "learning_rate": 8.989612842304061e-06, + "loss": 1.0824, "step": 476 }, { - "epoch": 0.013535754824063564, + "epoch": 0.013516959959194084, "grad_norm": 0.0, - "learning_rate": 9.017013232514179e-06, - "loss": 1.0874, + "learning_rate": 9.008498583569407e-06, + "loss": 1.2454, "step": 477 }, { - "epoch": 0.013564131668558457, + "epoch": 0.013545297401456545, "grad_norm": 0.0, - "learning_rate": 9.035916824196598e-06, - "loss": 1.1464, + "learning_rate": 9.02738432483475e-06, + "loss": 1.1915, "step": 478 }, { - "epoch": 0.013592508513053348, + "epoch": 0.013573634843719005, "grad_norm": 0.0, - "learning_rate": 9.054820415879018e-06, - "loss": 1.1772, + "learning_rate": 9.046270066100094e-06, + "loss": 1.2052, "step": 479 }, { - "epoch": 0.01362088535754824, + "epoch": 0.013601972285981468, "grad_norm": 0.0, - "learning_rate": 9.073724007561438e-06, - "loss": 1.1157, + "learning_rate": 9.06515580736544e-06, + "loss": 1.1483, "step": 480 }, { - "epoch": 0.013649262202043133, + "epoch": 0.013630309728243929, "grad_norm": 0.0, - "learning_rate": 9.092627599243857e-06, - "loss": 1.0257, + "learning_rate": 9.084041548630784e-06, + "loss": 1.2174, "step": 481 }, { - "epoch": 0.013677639046538025, + "epoch": 0.01365864717050639, "grad_norm": 0.0, - "learning_rate": 9.111531190926277e-06, - "loss": 1.2037, + "learning_rate": 9.10292728989613e-06, + "loss": 1.1077, "step": 482 }, { - "epoch": 0.013706015891032918, + "epoch": 0.013686984612768852, "grad_norm": 0.0, - "learning_rate": 9.130434782608697e-06, - "loss": 1.0027, + "learning_rate": 9.121813031161473e-06, + "loss": 1.2228, "step": 483 }, { - "epoch": 0.013734392735527809, + "epoch": 0.013715322055031313, "grad_norm": 0.0, - "learning_rate": 9.149338374291116e-06, - "loss": 1.0844, + "learning_rate": 9.140698772426819e-06, + "loss": 1.1239, "step": 484 }, { - "epoch": 0.013762769580022701, + "epoch": 0.013743659497293774, "grad_norm": 0.0, - "learning_rate": 9.168241965973536e-06, - "loss": 1.0396, + "learning_rate": 9.159584513692163e-06, + "loss": 1.2779, "step": 485 }, { - "epoch": 0.013791146424517594, + "epoch": 0.013771996939556235, "grad_norm": 0.0, - "learning_rate": 9.187145557655956e-06, - "loss": 1.1398, + "learning_rate": 9.178470254957508e-06, + "loss": 1.1861, "step": 486 }, { - "epoch": 0.013819523269012486, + "epoch": 0.013800334381818697, "grad_norm": 0.0, - "learning_rate": 9.206049149338375e-06, - "loss": 1.08, + "learning_rate": 9.197355996222852e-06, + "loss": 1.1501, "step": 487 }, { - "epoch": 0.013847900113507379, + "epoch": 0.013828671824081158, "grad_norm": 0.0, - "learning_rate": 9.224952741020795e-06, - "loss": 1.0933, + "learning_rate": 9.216241737488198e-06, + "loss": 1.1544, "step": 488 }, { - "epoch": 0.01387627695800227, + "epoch": 0.013857009266343619, "grad_norm": 0.0, - "learning_rate": 9.243856332703214e-06, - "loss": 1.1354, + "learning_rate": 9.235127478753542e-06, + "loss": 1.2762, "step": 489 }, { - "epoch": 0.013904653802497162, + "epoch": 0.013885346708606082, "grad_norm": 0.0, - "learning_rate": 9.262759924385634e-06, - "loss": 0.9667, + "learning_rate": 9.254013220018887e-06, + "loss": 1.1516, "step": 490 }, { - "epoch": 0.013933030646992054, + "epoch": 0.013913684150868542, "grad_norm": 0.0, - "learning_rate": 9.281663516068054e-06, - "loss": 0.9978, + "learning_rate": 9.272898961284231e-06, + "loss": 1.178, "step": 491 }, { - "epoch": 0.013961407491486947, + "epoch": 0.013942021593131003, "grad_norm": 0.0, - "learning_rate": 9.300567107750473e-06, - "loss": 1.0295, + "learning_rate": 9.291784702549575e-06, + "loss": 1.3495, "step": 492 }, { - "epoch": 0.01398978433598184, + "epoch": 0.013970359035393466, "grad_norm": 0.0, - "learning_rate": 9.319470699432893e-06, - "loss": 1.045, + "learning_rate": 9.31067044381492e-06, + "loss": 1.1719, "step": 493 }, { - "epoch": 0.01401816118047673, + "epoch": 0.013998696477655927, "grad_norm": 0.0, - "learning_rate": 9.338374291115313e-06, - "loss": 1.1342, + "learning_rate": 9.329556185080266e-06, + "loss": 1.2208, "step": 494 }, { - "epoch": 0.014046538024971623, + "epoch": 0.014027033919918388, "grad_norm": 0.0, - "learning_rate": 9.357277882797732e-06, - "loss": 1.0482, + "learning_rate": 9.34844192634561e-06, + "loss": 1.2852, "step": 495 }, { - "epoch": 0.014074914869466515, + "epoch": 0.01405537136218085, "grad_norm": 0.0, - "learning_rate": 9.376181474480152e-06, - "loss": 1.0491, + "learning_rate": 9.367327667610955e-06, + "loss": 1.0266, "step": 496 }, { - "epoch": 0.014103291713961408, + "epoch": 0.014083708804443311, "grad_norm": 0.0, - "learning_rate": 9.395085066162572e-06, - "loss": 1.0694, + "learning_rate": 9.3862134088763e-06, + "loss": 1.1207, "step": 497 }, { - "epoch": 0.0141316685584563, + "epoch": 0.014112046246705772, "grad_norm": 0.0, - "learning_rate": 9.413988657844991e-06, - "loss": 1.1411, + "learning_rate": 9.405099150141643e-06, + "loss": 1.1588, "step": 498 }, { - "epoch": 0.014160045402951191, + "epoch": 0.014140383688968234, "grad_norm": 0.0, - "learning_rate": 9.432892249527411e-06, - "loss": 1.0182, + "learning_rate": 9.423984891406989e-06, + "loss": 1.1815, "step": 499 }, { - "epoch": 0.014188422247446084, + "epoch": 0.014168721131230695, "grad_norm": 0.0, - "learning_rate": 9.45179584120983e-06, - "loss": 1.0368, + "learning_rate": 9.442870632672332e-06, + "loss": 1.2175, "step": 500 }, { - "epoch": 0.014216799091940976, + "epoch": 0.014197058573493156, "grad_norm": 0.0, - "learning_rate": 9.47069943289225e-06, - "loss": 1.0369, + "learning_rate": 9.461756373937678e-06, + "loss": 1.091, "step": 501 }, { - "epoch": 0.014245175936435869, + "epoch": 0.014225396015755619, "grad_norm": 0.0, - "learning_rate": 9.48960302457467e-06, - "loss": 1.0325, + "learning_rate": 9.480642115203022e-06, + "loss": 1.1046, "step": 502 }, { - "epoch": 0.014273552780930761, + "epoch": 0.01425373345801808, "grad_norm": 0.0, - "learning_rate": 9.50850661625709e-06, - "loss": 1.0409, + "learning_rate": 9.499527856468367e-06, + "loss": 1.1068, "step": 503 }, { - "epoch": 0.014301929625425652, + "epoch": 0.01428207090028054, "grad_norm": 0.0, - "learning_rate": 9.52741020793951e-06, - "loss": 1.0643, + "learning_rate": 9.518413597733713e-06, + "loss": 1.1844, "step": 504 }, { - "epoch": 0.014330306469920544, + "epoch": 0.014310408342543003, "grad_norm": 0.0, - "learning_rate": 9.546313799621929e-06, - "loss": 1.1072, + "learning_rate": 9.537299338999057e-06, + "loss": 1.2375, "step": 505 }, { - "epoch": 0.014358683314415437, + "epoch": 0.014338745784805464, "grad_norm": 0.0, - "learning_rate": 9.565217391304349e-06, - "loss": 1.0961, + "learning_rate": 9.5561850802644e-06, + "loss": 1.2876, "step": 506 }, { - "epoch": 0.01438706015891033, + "epoch": 0.014367083227067924, "grad_norm": 0.0, - "learning_rate": 9.584120982986768e-06, - "loss": 1.0848, + "learning_rate": 9.575070821529746e-06, + "loss": 1.1637, "step": 507 }, { - "epoch": 0.014415437003405222, + "epoch": 0.014395420669330387, "grad_norm": 0.0, - "learning_rate": 9.603024574669188e-06, - "loss": 1.1181, + "learning_rate": 9.59395656279509e-06, + "loss": 1.1768, "step": 508 }, { - "epoch": 0.014443813847900113, + "epoch": 0.014423758111592848, "grad_norm": 0.0, - "learning_rate": 9.621928166351608e-06, - "loss": 1.0671, + "learning_rate": 9.612842304060434e-06, + "loss": 1.1523, "step": 509 }, { - "epoch": 0.014472190692395005, + "epoch": 0.014452095553855309, "grad_norm": 0.0, - "learning_rate": 9.640831758034027e-06, - "loss": 1.0238, + "learning_rate": 9.63172804532578e-06, + "loss": 1.161, "step": 510 }, { - "epoch": 0.014500567536889898, + "epoch": 0.01448043299611777, "grad_norm": 0.0, - "learning_rate": 9.659735349716447e-06, - "loss": 1.0671, + "learning_rate": 9.650613786591125e-06, + "loss": 1.0583, "step": 511 }, { - "epoch": 0.01452894438138479, + "epoch": 0.014508770438380232, "grad_norm": 0.0, - "learning_rate": 9.678638941398867e-06, - "loss": 1.0871, + "learning_rate": 9.669499527856469e-06, + "loss": 1.1178, "step": 512 }, { - "epoch": 0.014557321225879683, + "epoch": 0.014537107880642693, "grad_norm": 0.0, - "learning_rate": 9.697542533081286e-06, - "loss": 1.0355, + "learning_rate": 9.688385269121814e-06, + "loss": 1.1462, "step": 513 }, { - "epoch": 0.014585698070374574, + "epoch": 0.014565445322905154, "grad_norm": 0.0, - "learning_rate": 9.716446124763706e-06, - "loss": 1.0803, + "learning_rate": 9.707271010387158e-06, + "loss": 1.0579, "step": 514 }, { - "epoch": 0.014614074914869466, + "epoch": 0.014593782765167616, "grad_norm": 0.0, - "learning_rate": 9.735349716446126e-06, - "loss": 1.0284, + "learning_rate": 9.726156751652504e-06, + "loss": 1.2625, "step": 515 }, { - "epoch": 0.014642451759364359, + "epoch": 0.014622120207430077, "grad_norm": 0.0, - "learning_rate": 9.754253308128545e-06, - "loss": 1.0406, + "learning_rate": 9.745042492917848e-06, + "loss": 1.2162, "step": 516 }, { - "epoch": 0.014670828603859251, + "epoch": 0.014650457649692538, "grad_norm": 0.0, - "learning_rate": 9.773156899810965e-06, - "loss": 1.053, + "learning_rate": 9.763928234183192e-06, + "loss": 1.1657, "step": 517 }, { - "epoch": 0.014699205448354144, + "epoch": 0.014678795091955, "grad_norm": 0.0, - "learning_rate": 9.792060491493385e-06, - "loss": 1.1042, + "learning_rate": 9.782813975448537e-06, + "loss": 1.1826, "step": 518 }, { - "epoch": 0.014727582292849034, + "epoch": 0.014707132534217461, "grad_norm": 0.0, - "learning_rate": 9.810964083175804e-06, - "loss": 1.0751, + "learning_rate": 9.801699716713881e-06, + "loss": 1.2114, "step": 519 }, { - "epoch": 0.014755959137343927, + "epoch": 0.014735469976479922, "grad_norm": 0.0, - "learning_rate": 9.829867674858224e-06, - "loss": 0.9958, + "learning_rate": 9.820585457979227e-06, + "loss": 1.267, "step": 520 }, { - "epoch": 0.01478433598183882, + "epoch": 0.014763807418742385, "grad_norm": 0.0, - "learning_rate": 9.848771266540644e-06, - "loss": 1.1558, + "learning_rate": 9.839471199244572e-06, + "loss": 1.1984, "step": 521 }, { - "epoch": 0.014812712826333712, + "epoch": 0.014792144861004846, "grad_norm": 0.0, - "learning_rate": 9.867674858223063e-06, - "loss": 0.9994, + "learning_rate": 9.858356940509916e-06, + "loss": 1.1805, "step": 522 }, { - "epoch": 0.014841089670828604, + "epoch": 0.014820482303267307, "grad_norm": 0.0, - "learning_rate": 9.886578449905483e-06, - "loss": 1.0357, + "learning_rate": 9.877242681775262e-06, + "loss": 1.2211, "step": 523 }, { - "epoch": 0.014869466515323495, + "epoch": 0.014848819745529769, "grad_norm": 0.0, - "learning_rate": 9.905482041587903e-06, - "loss": 1.0869, + "learning_rate": 9.896128423040605e-06, + "loss": 1.1942, "step": 524 }, { - "epoch": 0.014897843359818388, + "epoch": 0.01487715718779223, "grad_norm": 0.0, - "learning_rate": 9.924385633270322e-06, - "loss": 1.0451, + "learning_rate": 9.91501416430595e-06, + "loss": 1.2608, "step": 525 }, { - "epoch": 0.01492622020431328, + "epoch": 0.01490549463005469, "grad_norm": 0.0, - "learning_rate": 9.943289224952742e-06, - "loss": 1.1299, + "learning_rate": 9.933899905571295e-06, + "loss": 1.1016, "step": 526 }, { - "epoch": 0.014954597048808173, + "epoch": 0.014933832072317153, "grad_norm": 0.0, - "learning_rate": 9.962192816635162e-06, - "loss": 1.2252, + "learning_rate": 9.952785646836639e-06, + "loss": 1.134, "step": 527 }, { - "epoch": 0.014982973893303065, + "epoch": 0.014962169514579614, "grad_norm": 0.0, - "learning_rate": 9.981096408317581e-06, - "loss": 1.0944, + "learning_rate": 9.971671388101982e-06, + "loss": 1.2915, "step": 528 }, { - "epoch": 0.015011350737797956, + "epoch": 0.014990506956842075, "grad_norm": 0.0, - "learning_rate": 1e-05, - "loss": 1.1097, + "learning_rate": 9.990557129367328e-06, + "loss": 1.0709, "step": 529 }, { - "epoch": 0.015039727582292849, + "epoch": 0.015018844399104538, "grad_norm": 0.0, - "learning_rate": 1.001890359168242e-05, - "loss": 1.1234, + "learning_rate": 1.0009442870632674e-05, + "loss": 1.118, "step": 530 }, { - "epoch": 0.015068104426787741, + "epoch": 0.015047181841366998, "grad_norm": 0.0, - "learning_rate": 1.003780718336484e-05, - "loss": 1.121, + "learning_rate": 1.0028328611898017e-05, + "loss": 1.0549, "step": 531 }, { - "epoch": 0.015096481271282634, + "epoch": 0.01507551928362946, "grad_norm": 0.0, - "learning_rate": 1.005671077504726e-05, - "loss": 1.1518, + "learning_rate": 1.0047214353163361e-05, + "loss": 1.307, "step": 532 }, { - "epoch": 0.015124858115777526, + "epoch": 0.015103856725891922, "grad_norm": 0.0, - "learning_rate": 1.007561436672968e-05, - "loss": 1.0138, + "learning_rate": 1.0066100094428709e-05, + "loss": 1.1893, "step": 533 }, { - "epoch": 0.015153234960272417, + "epoch": 0.015132194168154383, "grad_norm": 0.0, - "learning_rate": 1.00945179584121e-05, - "loss": 1.1559, + "learning_rate": 1.0084985835694052e-05, + "loss": 1.1422, "step": 534 }, { - "epoch": 0.01518161180476731, + "epoch": 0.015160531610416843, "grad_norm": 0.0, - "learning_rate": 1.011342155009452e-05, - "loss": 1.0013, + "learning_rate": 1.0103871576959396e-05, + "loss": 1.2535, "step": 535 }, { - "epoch": 0.015209988649262202, + "epoch": 0.015188869052679304, "grad_norm": 0.0, - "learning_rate": 1.0132325141776937e-05, - "loss": 1.1763, + "learning_rate": 1.012275731822474e-05, + "loss": 1.1636, "step": 536 }, { - "epoch": 0.015238365493757094, + "epoch": 0.015217206494941767, "grad_norm": 0.0, - "learning_rate": 1.0151228733459358e-05, - "loss": 0.9456, + "learning_rate": 1.0141643059490086e-05, + "loss": 1.1426, "step": 537 }, { - "epoch": 0.015266742338251987, + "epoch": 0.015245543937204228, "grad_norm": 0.0, - "learning_rate": 1.0170132325141778e-05, - "loss": 1.0938, + "learning_rate": 1.0160528800755431e-05, + "loss": 1.1871, "step": 538 }, { - "epoch": 0.015295119182746878, + "epoch": 0.015273881379466689, "grad_norm": 0.0, - "learning_rate": 1.0189035916824197e-05, - "loss": 1.0457, + "learning_rate": 1.0179414542020775e-05, + "loss": 1.1648, "step": 539 }, { - "epoch": 0.01532349602724177, + "epoch": 0.015302218821729151, "grad_norm": 0.0, - "learning_rate": 1.0207939508506617e-05, - "loss": 0.976, + "learning_rate": 1.019830028328612e-05, + "loss": 1.1185, "step": 540 }, { - "epoch": 0.015351872871736663, + "epoch": 0.015330556263991612, "grad_norm": 0.0, - "learning_rate": 1.0226843100189037e-05, - "loss": 1.2176, + "learning_rate": 1.0217186024551465e-05, + "loss": 1.0786, "step": 541 }, { - "epoch": 0.015380249716231555, + "epoch": 0.015358893706254073, "grad_norm": 0.0, - "learning_rate": 1.0245746691871456e-05, - "loss": 1.0466, + "learning_rate": 1.0236071765816808e-05, + "loss": 1.1891, "step": 542 }, { - "epoch": 0.015408626560726448, + "epoch": 0.015387231148516535, "grad_norm": 0.0, - "learning_rate": 1.0264650283553876e-05, - "loss": 1.0492, + "learning_rate": 1.0254957507082152e-05, + "loss": 1.1305, "step": 543 }, { - "epoch": 0.015437003405221339, + "epoch": 0.015415568590778996, "grad_norm": 0.0, - "learning_rate": 1.0283553875236296e-05, - "loss": 1.1672, + "learning_rate": 1.02738432483475e-05, + "loss": 1.1986, "step": 544 }, { - "epoch": 0.015465380249716231, + "epoch": 0.015443906033041457, "grad_norm": 0.0, - "learning_rate": 1.0302457466918715e-05, - "loss": 1.0318, + "learning_rate": 1.0292728989612843e-05, + "loss": 1.2179, "step": 545 }, { - "epoch": 0.015493757094211124, + "epoch": 0.01547224347530392, "grad_norm": 0.0, - "learning_rate": 1.0321361058601137e-05, - "loss": 1.1222, + "learning_rate": 1.0311614730878187e-05, + "loss": 1.1525, "step": 546 }, { - "epoch": 0.015522133938706016, + "epoch": 0.01550058091756638, "grad_norm": 0.0, - "learning_rate": 1.0340264650283556e-05, - "loss": 1.1519, + "learning_rate": 1.0330500472143533e-05, + "loss": 1.1863, "step": 547 }, { - "epoch": 0.015550510783200909, + "epoch": 0.015528918359828841, "grad_norm": 0.0, - "learning_rate": 1.0359168241965973e-05, - "loss": 1.0659, + "learning_rate": 1.0349386213408877e-05, + "loss": 1.1245, "step": 548 }, { - "epoch": 0.0155788876276958, + "epoch": 0.015557255802091304, "grad_norm": 0.0, - "learning_rate": 1.0378071833648394e-05, - "loss": 1.1028, + "learning_rate": 1.0368271954674222e-05, + "loss": 1.1768, "step": 549 }, { - "epoch": 0.015607264472190692, + "epoch": 0.015585593244353765, "grad_norm": 0.0, - "learning_rate": 1.0396975425330814e-05, - "loss": 1.1011, + "learning_rate": 1.0387157695939568e-05, + "loss": 1.1517, "step": 550 }, { - "epoch": 0.015635641316685586, + "epoch": 0.015613930686616226, "grad_norm": 0.0, - "learning_rate": 1.0415879017013233e-05, - "loss": 1.0847, + "learning_rate": 1.0406043437204912e-05, + "loss": 1.2001, "step": 551 }, { - "epoch": 0.015664018161180477, + "epoch": 0.015642268128878686, "grad_norm": 0.0, - "learning_rate": 1.0434782608695653e-05, - "loss": 1.1486, + "learning_rate": 1.0424929178470255e-05, + "loss": 1.0851, "step": 552 }, { - "epoch": 0.015692395005675368, + "epoch": 0.01567060557114115, "grad_norm": 0.0, - "learning_rate": 1.0453686200378073e-05, - "loss": 1.1779, + "learning_rate": 1.04438149197356e-05, + "loss": 1.0633, "step": 553 }, { - "epoch": 0.015720771850170262, + "epoch": 0.01569894301340361, "grad_norm": 0.0, - "learning_rate": 1.0472589792060492e-05, - "loss": 1.076, + "learning_rate": 1.0462700661000945e-05, + "loss": 1.1875, "step": 554 }, { - "epoch": 0.015749148694665153, + "epoch": 0.01572728045566607, "grad_norm": 0.0, - "learning_rate": 1.0491493383742912e-05, - "loss": 1.0219, + "learning_rate": 1.048158640226629e-05, + "loss": 1.2048, "step": 555 }, { - "epoch": 0.015777525539160047, + "epoch": 0.015755617897928533, "grad_norm": 0.0, - "learning_rate": 1.0510396975425332e-05, - "loss": 1.0999, + "learning_rate": 1.0500472143531634e-05, + "loss": 1.0852, "step": 556 }, { - "epoch": 0.015805902383654938, + "epoch": 0.015783955340190996, "grad_norm": 0.0, - "learning_rate": 1.0529300567107751e-05, - "loss": 1.2273, + "learning_rate": 1.051935788479698e-05, + "loss": 1.1334, "step": 557 }, { - "epoch": 0.01583427922814983, + "epoch": 0.015812292782453455, "grad_norm": 0.0, - "learning_rate": 1.0548204158790173e-05, - "loss": 1.0422, + "learning_rate": 1.0538243626062324e-05, + "loss": 1.198, "step": 558 }, { - "epoch": 0.015862656072644723, + "epoch": 0.015840630224715917, "grad_norm": 0.0, - "learning_rate": 1.0567107750472592e-05, - "loss": 1.0613, + "learning_rate": 1.0557129367327668e-05, + "loss": 1.2544, "step": 559 }, { - "epoch": 0.015891032917139614, + "epoch": 0.01586896766697838, "grad_norm": 0.0, - "learning_rate": 1.0586011342155009e-05, - "loss": 0.9979, + "learning_rate": 1.0576015108593015e-05, + "loss": 1.1927, "step": 560 }, { - "epoch": 0.015919409761634508, + "epoch": 0.01589730510924084, "grad_norm": 0.0, - "learning_rate": 1.060491493383743e-05, - "loss": 1.0312, + "learning_rate": 1.0594900849858359e-05, + "loss": 1.1464, "step": 561 }, { - "epoch": 0.0159477866061294, + "epoch": 0.0159256425515033, "grad_norm": 0.0, - "learning_rate": 1.062381852551985e-05, - "loss": 1.112, + "learning_rate": 1.0613786591123702e-05, + "loss": 1.113, "step": 562 }, { - "epoch": 0.01597616345062429, + "epoch": 0.015953979993765764, "grad_norm": 0.0, - "learning_rate": 1.064272211720227e-05, - "loss": 0.9374, + "learning_rate": 1.0632672332389046e-05, + "loss": 1.085, "step": 563 }, { - "epoch": 0.016004540295119184, + "epoch": 0.015982317436028223, "grad_norm": 0.0, - "learning_rate": 1.0661625708884689e-05, - "loss": 1.089, + "learning_rate": 1.0651558073654392e-05, + "loss": 1.2026, "step": 564 }, { - "epoch": 0.016032917139614074, + "epoch": 0.016010654878290686, "grad_norm": 0.0, - "learning_rate": 1.0680529300567109e-05, - "loss": 1.1803, + "learning_rate": 1.0670443814919737e-05, + "loss": 1.1462, "step": 565 }, { - "epoch": 0.01606129398410897, + "epoch": 0.01603899232055315, "grad_norm": 0.0, - "learning_rate": 1.0699432892249528e-05, - "loss": 0.989, + "learning_rate": 1.0689329556185081e-05, + "loss": 1.1565, "step": 566 }, { - "epoch": 0.01608967082860386, + "epoch": 0.016067329762815608, "grad_norm": 0.0, - "learning_rate": 1.0718336483931948e-05, - "loss": 1.1544, + "learning_rate": 1.0708215297450427e-05, + "loss": 1.1434, "step": 567 }, { - "epoch": 0.01611804767309875, + "epoch": 0.01609566720507807, "grad_norm": 0.0, - "learning_rate": 1.0737240075614367e-05, - "loss": 1.0525, + "learning_rate": 1.072710103871577e-05, + "loss": 1.1495, "step": 568 }, { - "epoch": 0.016146424517593645, + "epoch": 0.016124004647340533, "grad_norm": 0.0, - "learning_rate": 1.0756143667296787e-05, - "loss": 1.1099, + "learning_rate": 1.0745986779981115e-05, + "loss": 1.2269, "step": 569 }, { - "epoch": 0.016174801362088535, + "epoch": 0.016152342089602992, "grad_norm": 0.0, - "learning_rate": 1.0775047258979208e-05, - "loss": 1.0101, + "learning_rate": 1.0764872521246458e-05, + "loss": 1.2084, "step": 570 }, { - "epoch": 0.01620317820658343, + "epoch": 0.016180679531865454, "grad_norm": 0.0, - "learning_rate": 1.0793950850661628e-05, - "loss": 1.0596, + "learning_rate": 1.0783758262511806e-05, + "loss": 1.2275, "step": 571 }, { - "epoch": 0.01623155505107832, + "epoch": 0.016209016974127917, "grad_norm": 0.0, - "learning_rate": 1.0812854442344048e-05, - "loss": 1.0419, + "learning_rate": 1.080264400377715e-05, + "loss": 1.2118, "step": 572 }, { - "epoch": 0.01625993189557321, + "epoch": 0.016237354416390376, "grad_norm": 0.0, - "learning_rate": 1.0831758034026466e-05, - "loss": 1.2113, + "learning_rate": 1.0821529745042493e-05, + "loss": 1.3405, "step": 573 }, { - "epoch": 0.016288308740068105, + "epoch": 0.01626569185865284, "grad_norm": 0.0, - "learning_rate": 1.0850661625708885e-05, - "loss": 0.9956, + "learning_rate": 1.0840415486307839e-05, + "loss": 1.1417, "step": 574 }, { - "epoch": 0.016316685584562996, + "epoch": 0.016294029300915298, "grad_norm": 0.0, - "learning_rate": 1.0869565217391305e-05, - "loss": 1.054, + "learning_rate": 1.0859301227573183e-05, + "loss": 1.1895, "step": 575 }, { - "epoch": 0.01634506242905789, + "epoch": 0.01632236674317776, "grad_norm": 0.0, - "learning_rate": 1.0888468809073725e-05, - "loss": 1.1067, + "learning_rate": 1.0878186968838528e-05, + "loss": 1.1922, "step": 576 }, { - "epoch": 0.01637343927355278, + "epoch": 0.016350704185440223, "grad_norm": 0.0, - "learning_rate": 1.0907372400756144e-05, - "loss": 1.0895, + "learning_rate": 1.0897072710103874e-05, + "loss": 1.1641, "step": 577 }, { - "epoch": 0.016401816118047672, + "epoch": 0.016379041627702682, "grad_norm": 0.0, - "learning_rate": 1.0926275992438564e-05, - "loss": 1.011, + "learning_rate": 1.0915958451369218e-05, + "loss": 1.0761, "step": 578 }, { - "epoch": 0.016430192962542566, + "epoch": 0.016407379069965145, "grad_norm": 0.0, - "learning_rate": 1.0945179584120984e-05, - "loss": 1.0725, + "learning_rate": 1.0934844192634562e-05, + "loss": 1.2097, "step": 579 }, { - "epoch": 0.016458569807037457, + "epoch": 0.016435716512227607, "grad_norm": 0.0, - "learning_rate": 1.0964083175803403e-05, - "loss": 1.1512, + "learning_rate": 1.0953729933899905e-05, + "loss": 1.1974, "step": 580 }, { - "epoch": 0.01648694665153235, + "epoch": 0.016464053954490066, "grad_norm": 0.0, - "learning_rate": 1.0982986767485823e-05, - "loss": 1.0124, + "learning_rate": 1.097261567516525e-05, + "loss": 1.1616, "step": 581 }, { - "epoch": 0.016515323496027242, + "epoch": 0.01649239139675253, "grad_norm": 0.0, - "learning_rate": 1.1001890359168244e-05, - "loss": 1.0335, + "learning_rate": 1.0991501416430597e-05, + "loss": 1.1666, "step": 582 }, { - "epoch": 0.016543700340522133, + "epoch": 0.01652072883901499, "grad_norm": 0.0, - "learning_rate": 1.1020793950850664e-05, - "loss": 1.1806, + "learning_rate": 1.101038715769594e-05, + "loss": 1.1404, "step": 583 }, { - "epoch": 0.016572077185017027, + "epoch": 0.01654906628127745, "grad_norm": 0.0, - "learning_rate": 1.1039697542533084e-05, - "loss": 1.1337, + "learning_rate": 1.1029272898961286e-05, + "loss": 1.1824, "step": 584 }, { - "epoch": 0.016600454029511918, + "epoch": 0.016577403723539913, "grad_norm": 0.0, - "learning_rate": 1.1058601134215502e-05, - "loss": 1.1096, + "learning_rate": 1.104815864022663e-05, + "loss": 1.1137, "step": 585 }, { - "epoch": 0.016628830874006812, + "epoch": 0.016605741165802376, "grad_norm": 0.0, - "learning_rate": 1.1077504725897921e-05, - "loss": 0.9867, + "learning_rate": 1.1067044381491974e-05, + "loss": 1.2334, "step": 586 }, { - "epoch": 0.016657207718501703, + "epoch": 0.016634078608064835, "grad_norm": 0.0, - "learning_rate": 1.1096408317580341e-05, - "loss": 1.1279, + "learning_rate": 1.1085930122757321e-05, + "loss": 1.1588, "step": 587 }, { - "epoch": 0.016685584562996594, + "epoch": 0.016662416050327297, "grad_norm": 0.0, - "learning_rate": 1.111531190926276e-05, - "loss": 1.1595, + "learning_rate": 1.1104815864022665e-05, + "loss": 1.1977, "step": 588 }, { - "epoch": 0.016713961407491488, + "epoch": 0.01669075349258976, "grad_norm": 0.0, - "learning_rate": 1.113421550094518e-05, - "loss": 1.1272, + "learning_rate": 1.1123701605288009e-05, + "loss": 1.129, "step": 589 }, { - "epoch": 0.01674233825198638, + "epoch": 0.01671909093485222, "grad_norm": 0.0, - "learning_rate": 1.11531190926276e-05, - "loss": 0.9953, + "learning_rate": 1.1142587346553353e-05, + "loss": 1.0936, "step": 590 }, { - "epoch": 0.016770715096481273, + "epoch": 0.01674742837711468, "grad_norm": 0.0, - "learning_rate": 1.117202268431002e-05, - "loss": 1.0692, + "learning_rate": 1.1161473087818696e-05, + "loss": 1.2652, "step": 591 }, { - "epoch": 0.016799091940976164, + "epoch": 0.016775765819377144, "grad_norm": 0.0, - "learning_rate": 1.119092627599244e-05, - "loss": 1.0696, + "learning_rate": 1.1180358829084042e-05, + "loss": 1.1779, "step": 592 }, { - "epoch": 0.016827468785471054, + "epoch": 0.016804103261639603, "grad_norm": 0.0, - "learning_rate": 1.1209829867674859e-05, - "loss": 1.0686, + "learning_rate": 1.1199244570349388e-05, + "loss": 1.2056, "step": 593 }, { - "epoch": 0.01685584562996595, + "epoch": 0.016832440703902066, "grad_norm": 0.0, - "learning_rate": 1.1228733459357279e-05, - "loss": 0.9491, + "learning_rate": 1.1218130311614731e-05, + "loss": 1.1441, "step": 594 }, { - "epoch": 0.01688422247446084, + "epoch": 0.01686077814616453, "grad_norm": 0.0, - "learning_rate": 1.12476370510397e-05, - "loss": 1.0397, + "learning_rate": 1.1237016052880077e-05, + "loss": 1.1894, "step": 595 }, { - "epoch": 0.016912599318955734, + "epoch": 0.016889115588426987, "grad_norm": 0.0, - "learning_rate": 1.126654064272212e-05, - "loss": 1.1132, + "learning_rate": 1.125590179414542e-05, + "loss": 1.2842, "step": 596 }, { - "epoch": 0.016940976163450625, + "epoch": 0.01691745303068945, "grad_norm": 0.0, - "learning_rate": 1.1285444234404538e-05, - "loss": 1.0736, + "learning_rate": 1.1274787535410765e-05, + "loss": 1.1052, "step": 597 }, { - "epoch": 0.016969353007945515, + "epoch": 0.016945790472951913, "grad_norm": 0.0, - "learning_rate": 1.1304347826086957e-05, - "loss": 1.1422, + "learning_rate": 1.1293673276676112e-05, + "loss": 1.0623, "step": 598 }, { - "epoch": 0.01699772985244041, + "epoch": 0.01697412791521437, "grad_norm": 0.0, - "learning_rate": 1.1323251417769377e-05, - "loss": 1.1134, + "learning_rate": 1.1312559017941456e-05, + "loss": 1.1542, "step": 599 }, { - "epoch": 0.0170261066969353, + "epoch": 0.017002465357476834, "grad_norm": 0.0, - "learning_rate": 1.1342155009451797e-05, - "loss": 1.1223, + "learning_rate": 1.13314447592068e-05, + "loss": 1.196, "step": 600 }, { - "epoch": 0.017054483541430195, + "epoch": 0.017030802799739297, "grad_norm": 0.0, - "learning_rate": 1.1361058601134216e-05, - "loss": 1.0376, + "learning_rate": 1.1350330500472143e-05, + "loss": 1.1997, "step": 601 }, { - "epoch": 0.017082860385925085, + "epoch": 0.017059140242001756, "grad_norm": 0.0, - "learning_rate": 1.1379962192816636e-05, - "loss": 1.114, + "learning_rate": 1.1369216241737489e-05, + "loss": 1.1639, "step": 602 }, { - "epoch": 0.017111237230419976, + "epoch": 0.01708747768426422, "grad_norm": 0.0, - "learning_rate": 1.1398865784499056e-05, - "loss": 1.0723, + "learning_rate": 1.1388101983002833e-05, + "loss": 1.1685, "step": 603 }, { - "epoch": 0.01713961407491487, + "epoch": 0.01711581512652668, "grad_norm": 0.0, - "learning_rate": 1.1417769376181475e-05, - "loss": 1.1404, + "learning_rate": 1.1406987724268178e-05, + "loss": 1.1928, "step": 604 }, { - "epoch": 0.01716799091940976, + "epoch": 0.01714415256878914, "grad_norm": 0.0, - "learning_rate": 1.1436672967863895e-05, - "loss": 0.9975, + "learning_rate": 1.1425873465533524e-05, + "loss": 1.1805, "step": 605 }, { - "epoch": 0.017196367763904655, + "epoch": 0.017172490011051603, "grad_norm": 0.0, - "learning_rate": 1.1455576559546314e-05, - "loss": 1.0676, + "learning_rate": 1.1444759206798868e-05, + "loss": 1.1537, "step": 606 }, { - "epoch": 0.017224744608399546, + "epoch": 0.017200827453314065, "grad_norm": 0.0, - "learning_rate": 1.1474480151228736e-05, - "loss": 1.1318, + "learning_rate": 1.1463644948064212e-05, + "loss": 1.0925, "step": 607 }, { - "epoch": 0.017253121452894437, + "epoch": 0.017229164895576524, "grad_norm": 0.0, - "learning_rate": 1.1493383742911156e-05, - "loss": 1.0956, + "learning_rate": 1.1482530689329556e-05, + "loss": 1.2629, "step": 608 }, { - "epoch": 0.01728149829738933, + "epoch": 0.017257502337838987, "grad_norm": 0.0, - "learning_rate": 1.1512287334593572e-05, - "loss": 1.1684, + "learning_rate": 1.1501416430594903e-05, + "loss": 1.138, "step": 609 }, { - "epoch": 0.017309875141884222, + "epoch": 0.01728583978010145, "grad_norm": 0.0, - "learning_rate": 1.1531190926275993e-05, - "loss": 1.1311, + "learning_rate": 1.1520302171860247e-05, + "loss": 1.1708, "step": 610 }, { - "epoch": 0.017338251986379116, + "epoch": 0.01731417722236391, "grad_norm": 0.0, - "learning_rate": 1.1550094517958413e-05, - "loss": 1.0422, + "learning_rate": 1.153918791312559e-05, + "loss": 1.0527, "step": 611 }, { - "epoch": 0.017366628830874007, + "epoch": 0.01734251466462637, "grad_norm": 0.0, - "learning_rate": 1.1568998109640832e-05, - "loss": 1.1046, + "learning_rate": 1.1558073654390936e-05, + "loss": 1.1119, "step": 612 }, { - "epoch": 0.017395005675368898, + "epoch": 0.017370852106888834, "grad_norm": 0.0, - "learning_rate": 1.1587901701323252e-05, - "loss": 1.007, + "learning_rate": 1.157695939565628e-05, + "loss": 1.0544, "step": 613 }, { - "epoch": 0.017423382519863792, + "epoch": 0.017399189549151293, "grad_norm": 0.0, - "learning_rate": 1.1606805293005672e-05, - "loss": 1.0799, + "learning_rate": 1.1595845136921624e-05, + "loss": 1.1486, "step": 614 }, { - "epoch": 0.017451759364358683, + "epoch": 0.017427526991413755, "grad_norm": 0.0, - "learning_rate": 1.1625708884688091e-05, - "loss": 1.0965, + "learning_rate": 1.1614730878186971e-05, + "loss": 1.2757, "step": 615 }, { - "epoch": 0.017480136208853577, + "epoch": 0.017455864433676218, "grad_norm": 0.0, - "learning_rate": 1.1644612476370511e-05, - "loss": 1.0836, + "learning_rate": 1.1633616619452315e-05, + "loss": 1.1553, "step": 616 }, { - "epoch": 0.017508513053348468, + "epoch": 0.017484201875938677, "grad_norm": 0.0, - "learning_rate": 1.166351606805293e-05, - "loss": 0.9422, + "learning_rate": 1.1652502360717659e-05, + "loss": 1.1233, "step": 617 }, { - "epoch": 0.01753688989784336, + "epoch": 0.01751253931820114, "grad_norm": 0.0, - "learning_rate": 1.168241965973535e-05, - "loss": 1.0988, + "learning_rate": 1.1671388101983003e-05, + "loss": 1.0777, "step": 618 }, { - "epoch": 0.017565266742338253, + "epoch": 0.017540876760463602, "grad_norm": 0.0, - "learning_rate": 1.1701323251417772e-05, - "loss": 1.127, + "learning_rate": 1.1690273843248348e-05, + "loss": 1.1747, "step": 619 }, { - "epoch": 0.017593643586833144, + "epoch": 0.01756921420272606, "grad_norm": 0.0, - "learning_rate": 1.1720226843100191e-05, - "loss": 1.0518, + "learning_rate": 1.1709159584513694e-05, + "loss": 1.1588, "step": 620 }, { - "epoch": 0.017622020431328038, + "epoch": 0.017597551644988524, "grad_norm": 0.0, - "learning_rate": 1.1739130434782611e-05, - "loss": 1.0561, + "learning_rate": 1.1728045325779038e-05, + "loss": 1.1856, "step": 621 }, { - "epoch": 0.01765039727582293, + "epoch": 0.017625889087250986, "grad_norm": 0.0, - "learning_rate": 1.1758034026465029e-05, - "loss": 1.0829, + "learning_rate": 1.1746931067044383e-05, + "loss": 1.139, "step": 622 }, { - "epoch": 0.01767877412031782, + "epoch": 0.017654226529513446, "grad_norm": 0.0, - "learning_rate": 1.1776937618147449e-05, - "loss": 1.0552, + "learning_rate": 1.1765816808309727e-05, + "loss": 1.0848, "step": 623 }, { - "epoch": 0.017707150964812714, + "epoch": 0.017682563971775908, "grad_norm": 0.0, - "learning_rate": 1.1795841209829868e-05, - "loss": 1.1144, + "learning_rate": 1.178470254957507e-05, + "loss": 1.2071, "step": 624 }, { - "epoch": 0.017735527809307605, + "epoch": 0.017710901414038367, "grad_norm": 0.0, - "learning_rate": 1.1814744801512288e-05, - "loss": 1.1992, + "learning_rate": 1.1803588290840415e-05, + "loss": 1.165, "step": 625 }, { - "epoch": 0.0177639046538025, + "epoch": 0.01773923885630083, "grad_norm": 0.0, - "learning_rate": 1.1833648393194708e-05, - "loss": 1.0677, + "learning_rate": 1.1822474032105762e-05, + "loss": 1.1826, "step": 626 }, { - "epoch": 0.01779228149829739, + "epoch": 0.017767576298563292, "grad_norm": 0.0, - "learning_rate": 1.1852551984877127e-05, - "loss": 1.1686, + "learning_rate": 1.1841359773371106e-05, + "loss": 1.1346, "step": 627 }, { - "epoch": 0.01782065834279228, + "epoch": 0.01779591374082575, "grad_norm": 0.0, - "learning_rate": 1.1871455576559547e-05, - "loss": 1.049, + "learning_rate": 1.186024551463645e-05, + "loss": 1.197, "step": 628 }, { - "epoch": 0.017849035187287175, + "epoch": 0.017824251183088214, "grad_norm": 0.0, - "learning_rate": 1.1890359168241967e-05, - "loss": 1.1206, + "learning_rate": 1.1879131255901795e-05, + "loss": 1.1392, "step": 629 }, { - "epoch": 0.017877412031782065, + "epoch": 0.017852588625350677, "grad_norm": 0.0, - "learning_rate": 1.1909262759924386e-05, - "loss": 1.023, + "learning_rate": 1.1898016997167139e-05, + "loss": 1.0493, "step": 630 }, { - "epoch": 0.01790578887627696, + "epoch": 0.017880926067613136, "grad_norm": 0.0, - "learning_rate": 1.1928166351606808e-05, - "loss": 0.9014, + "learning_rate": 1.1916902738432485e-05, + "loss": 1.1349, "step": 631 }, { - "epoch": 0.01793416572077185, + "epoch": 0.017909263509875598, "grad_norm": 0.0, - "learning_rate": 1.1947069943289227e-05, - "loss": 1.0382, + "learning_rate": 1.193578847969783e-05, + "loss": 1.1688, "step": 632 }, { - "epoch": 0.01796254256526674, + "epoch": 0.01793760095213806, "grad_norm": 0.0, - "learning_rate": 1.1965973534971647e-05, - "loss": 1.0891, + "learning_rate": 1.1954674220963174e-05, + "loss": 1.1362, "step": 633 }, { - "epoch": 0.017990919409761635, + "epoch": 0.01796593839440052, "grad_norm": 0.0, - "learning_rate": 1.1984877126654065e-05, - "loss": 1.0846, + "learning_rate": 1.1973559962228518e-05, + "loss": 1.256, "step": 634 }, { - "epoch": 0.018019296254256526, + "epoch": 0.017994275836662982, "grad_norm": 0.0, - "learning_rate": 1.2003780718336485e-05, - "loss": 1.05, + "learning_rate": 1.1992445703493862e-05, + "loss": 1.1974, "step": 635 }, { - "epoch": 0.01804767309875142, + "epoch": 0.018022613278925445, "grad_norm": 0.0, - "learning_rate": 1.2022684310018904e-05, - "loss": 1.0553, + "learning_rate": 1.2011331444759209e-05, + "loss": 1.2349, "step": 636 }, { - "epoch": 0.01807604994324631, + "epoch": 0.018050950721187904, "grad_norm": 0.0, - "learning_rate": 1.2041587901701324e-05, - "loss": 1.0789, + "learning_rate": 1.2030217186024553e-05, + "loss": 1.1238, "step": 637 }, { - "epoch": 0.018104426787741202, + "epoch": 0.018079288163450367, "grad_norm": 0.0, - "learning_rate": 1.2060491493383744e-05, - "loss": 1.1219, + "learning_rate": 1.2049102927289897e-05, + "loss": 1.2486, "step": 638 }, { - "epoch": 0.018132803632236096, + "epoch": 0.01810762560571283, "grad_norm": 0.0, - "learning_rate": 1.2079395085066163e-05, - "loss": 1.1429, + "learning_rate": 1.2067988668555242e-05, + "loss": 1.1445, "step": 639 }, { - "epoch": 0.018161180476730987, + "epoch": 0.01813596304797529, "grad_norm": 0.0, - "learning_rate": 1.2098298676748583e-05, - "loss": 1.2378, + "learning_rate": 1.2086874409820586e-05, + "loss": 1.1856, "step": 640 }, { - "epoch": 0.01818955732122588, + "epoch": 0.01816430049023775, "grad_norm": 0.0, - "learning_rate": 1.2117202268431003e-05, - "loss": 1.1236, + "learning_rate": 1.210576015108593e-05, + "loss": 1.0592, "step": 641 }, { - "epoch": 0.018217934165720772, + "epoch": 0.018192637932500214, "grad_norm": 0.0, - "learning_rate": 1.2136105860113422e-05, - "loss": 1.2139, + "learning_rate": 1.2124645892351277e-05, + "loss": 1.1702, "step": 642 }, { - "epoch": 0.018246311010215663, + "epoch": 0.018220975374762673, "grad_norm": 0.0, - "learning_rate": 1.2155009451795844e-05, - "loss": 1.191, + "learning_rate": 1.2143531633616621e-05, + "loss": 1.1489, "step": 643 }, { - "epoch": 0.018274687854710557, + "epoch": 0.018249312817025135, "grad_norm": 0.0, - "learning_rate": 1.2173913043478263e-05, - "loss": 1.1034, + "learning_rate": 1.2162417374881965e-05, + "loss": 1.1703, "step": 644 }, { - "epoch": 0.018303064699205448, + "epoch": 0.018277650259287598, "grad_norm": 0.0, - "learning_rate": 1.2192816635160683e-05, - "loss": 1.0853, + "learning_rate": 1.2181303116147309e-05, + "loss": 1.2722, "step": 645 }, { - "epoch": 0.018331441543700342, + "epoch": 0.018305987701550057, "grad_norm": 0.0, - "learning_rate": 1.22117202268431e-05, - "loss": 1.124, + "learning_rate": 1.2200188857412654e-05, + "loss": 1.1605, "step": 646 }, { - "epoch": 0.018359818388195233, + "epoch": 0.01833432514381252, "grad_norm": 0.0, - "learning_rate": 1.223062381852552e-05, - "loss": 1.0785, + "learning_rate": 1.2219074598678e-05, + "loss": 1.1337, "step": 647 }, { - "epoch": 0.018388195232690124, + "epoch": 0.018362662586074982, "grad_norm": 0.0, - "learning_rate": 1.224952741020794e-05, - "loss": 1.0229, + "learning_rate": 1.2237960339943344e-05, + "loss": 0.936, "step": 648 }, { - "epoch": 0.018416572077185018, + "epoch": 0.01839100002833744, "grad_norm": 0.0, - "learning_rate": 1.226843100189036e-05, - "loss": 1.0643, + "learning_rate": 1.225684608120869e-05, + "loss": 1.2885, "step": 649 }, { - "epoch": 0.01844494892167991, + "epoch": 0.018419337470599904, "grad_norm": 0.0, - "learning_rate": 1.228733459357278e-05, - "loss": 1.2037, + "learning_rate": 1.2275731822474033e-05, + "loss": 1.0853, "step": 650 }, { - "epoch": 0.018473325766174803, + "epoch": 0.018447674912862366, "grad_norm": 0.0, - "learning_rate": 1.2306238185255199e-05, - "loss": 0.9857, + "learning_rate": 1.2294617563739377e-05, + "loss": 1.1705, "step": 651 }, { - "epoch": 0.018501702610669694, + "epoch": 0.018476012355124825, "grad_norm": 0.0, - "learning_rate": 1.2325141776937619e-05, - "loss": 0.9998, + "learning_rate": 1.2313503305004721e-05, + "loss": 1.2724, "step": 652 }, { - "epoch": 0.018530079455164584, + "epoch": 0.018504349797387288, "grad_norm": 0.0, - "learning_rate": 1.2344045368620038e-05, - "loss": 1.1326, + "learning_rate": 1.2332389046270068e-05, + "loss": 1.3173, "step": 653 }, { - "epoch": 0.01855845629965948, + "epoch": 0.01853268723964975, "grad_norm": 0.0, - "learning_rate": 1.2362948960302458e-05, - "loss": 1.0415, + "learning_rate": 1.2351274787535412e-05, + "loss": 1.2118, "step": 654 }, { - "epoch": 0.01858683314415437, + "epoch": 0.01856102468191221, "grad_norm": 0.0, - "learning_rate": 1.238185255198488e-05, - "loss": 1.176, + "learning_rate": 1.2370160528800756e-05, + "loss": 1.123, "step": 655 }, { - "epoch": 0.018615209988649264, + "epoch": 0.018589362124174672, "grad_norm": 0.0, - "learning_rate": 1.2400756143667299e-05, - "loss": 1.1318, + "learning_rate": 1.2389046270066101e-05, + "loss": 1.0534, "step": 656 }, { - "epoch": 0.018643586833144155, + "epoch": 0.018617699566437135, "grad_norm": 0.0, - "learning_rate": 1.2419659735349719e-05, - "loss": 1.1105, + "learning_rate": 1.2407932011331445e-05, + "loss": 1.2331, "step": 657 }, { - "epoch": 0.018671963677639045, + "epoch": 0.018646037008699594, "grad_norm": 0.0, - "learning_rate": 1.2438563327032138e-05, - "loss": 1.1023, + "learning_rate": 1.242681775259679e-05, + "loss": 1.1987, "step": 658 }, { - "epoch": 0.01870034052213394, + "epoch": 0.018674374450962056, "grad_norm": 0.0, - "learning_rate": 1.2457466918714556e-05, - "loss": 1.2043, + "learning_rate": 1.2445703493862136e-05, + "loss": 1.1526, "step": 659 }, { - "epoch": 0.01872871736662883, + "epoch": 0.01870271189322452, "grad_norm": 0.0, - "learning_rate": 1.2476370510396976e-05, - "loss": 1.1423, + "learning_rate": 1.246458923512748e-05, + "loss": 1.2217, "step": 660 }, { - "epoch": 0.018757094211123725, + "epoch": 0.018731049335486978, "grad_norm": 0.0, - "learning_rate": 1.2495274102079396e-05, - "loss": 1.1603, + "learning_rate": 1.2483474976392824e-05, + "loss": 1.1844, "step": 661 }, { - "epoch": 0.018785471055618615, + "epoch": 0.01875938677774944, "grad_norm": 0.0, - "learning_rate": 1.2514177693761815e-05, - "loss": 1.1532, + "learning_rate": 1.2502360717658168e-05, + "loss": 1.2399, "step": 662 }, { - "epoch": 0.018813847900113506, + "epoch": 0.018787724220011903, "grad_norm": 0.0, - "learning_rate": 1.2533081285444235e-05, - "loss": 1.083, + "learning_rate": 1.2521246458923513e-05, + "loss": 1.1196, "step": 663 }, { - "epoch": 0.0188422247446084, + "epoch": 0.018816061662274362, "grad_norm": 0.0, - "learning_rate": 1.2551984877126655e-05, - "loss": 1.116, + "learning_rate": 1.2540132200188859e-05, + "loss": 1.1953, "step": 664 }, { - "epoch": 0.01887060158910329, + "epoch": 0.018844399104536825, "grad_norm": 0.0, - "learning_rate": 1.2570888468809074e-05, - "loss": 1.0292, + "learning_rate": 1.2559017941454203e-05, + "loss": 1.2, "step": 665 }, { - "epoch": 0.018898978433598185, + "epoch": 0.018872736546799287, "grad_norm": 0.0, - "learning_rate": 1.2589792060491494e-05, - "loss": 1.0607, + "learning_rate": 1.2577903682719548e-05, + "loss": 1.2404, "step": 666 }, { - "epoch": 0.018927355278093076, + "epoch": 0.018901073989061747, "grad_norm": 0.0, - "learning_rate": 1.2608695652173915e-05, - "loss": 1.0036, + "learning_rate": 1.2596789423984892e-05, + "loss": 1.1239, "step": 667 }, { - "epoch": 0.018955732122587967, + "epoch": 0.01892941143132421, "grad_norm": 0.0, - "learning_rate": 1.2627599243856335e-05, - "loss": 1.1885, + "learning_rate": 1.2615675165250236e-05, + "loss": 1.181, "step": 668 }, { - "epoch": 0.01898410896708286, + "epoch": 0.01895774887358667, "grad_norm": 0.0, - "learning_rate": 1.2646502835538755e-05, - "loss": 1.215, + "learning_rate": 1.2634560906515583e-05, + "loss": 1.1092, "step": 669 }, { - "epoch": 0.019012485811577752, + "epoch": 0.01898608631584913, "grad_norm": 0.0, - "learning_rate": 1.2665406427221174e-05, - "loss": 1.1202, + "learning_rate": 1.2653446647780927e-05, + "loss": 1.2159, "step": 670 }, { - "epoch": 0.019040862656072646, + "epoch": 0.019014423758111593, "grad_norm": 0.0, - "learning_rate": 1.2684310018903592e-05, - "loss": 1.1649, + "learning_rate": 1.2672332389046271e-05, + "loss": 1.1341, "step": 671 }, { - "epoch": 0.019069239500567537, + "epoch": 0.019042761200374056, "grad_norm": 0.0, - "learning_rate": 1.2703213610586012e-05, - "loss": 0.9935, + "learning_rate": 1.2691218130311615e-05, + "loss": 1.0938, "step": 672 }, { - "epoch": 0.019097616345062428, + "epoch": 0.019071098642636515, "grad_norm": 0.0, - "learning_rate": 1.2722117202268432e-05, - "loss": 1.1705, + "learning_rate": 1.271010387157696e-05, + "loss": 1.1205, "step": 673 }, { - "epoch": 0.019125993189557322, + "epoch": 0.019099436084898978, "grad_norm": 0.0, - "learning_rate": 1.2741020793950851e-05, - "loss": 1.0602, + "learning_rate": 1.2728989612842304e-05, + "loss": 1.1949, "step": 674 }, { - "epoch": 0.019154370034052213, + "epoch": 0.019127773527161437, "grad_norm": 0.0, - "learning_rate": 1.2759924385633271e-05, - "loss": 0.9731, + "learning_rate": 1.274787535410765e-05, + "loss": 1.1982, "step": 675 }, { - "epoch": 0.019182746878547107, + "epoch": 0.0191561109694239, "grad_norm": 0.0, - "learning_rate": 1.277882797731569e-05, - "loss": 1.0178, + "learning_rate": 1.2766761095372996e-05, + "loss": 1.118, "step": 676 }, { - "epoch": 0.019211123723041998, + "epoch": 0.019184448411686362, "grad_norm": 0.0, - "learning_rate": 1.279773156899811e-05, - "loss": 1.0421, + "learning_rate": 1.278564683663834e-05, + "loss": 1.1493, "step": 677 }, { - "epoch": 0.01923950056753689, + "epoch": 0.01921278585394882, "grad_norm": 0.0, - "learning_rate": 1.281663516068053e-05, - "loss": 1.1722, + "learning_rate": 1.2804532577903683e-05, + "loss": 1.2017, "step": 678 }, { - "epoch": 0.019267877412031783, + "epoch": 0.019241123296211284, "grad_norm": 0.0, - "learning_rate": 1.2835538752362951e-05, - "loss": 1.0453, + "learning_rate": 1.2823418319169027e-05, + "loss": 1.2307, "step": 679 }, { - "epoch": 0.019296254256526674, + "epoch": 0.019269460738473746, "grad_norm": 0.0, - "learning_rate": 1.2854442344045371e-05, - "loss": 1.0212, + "learning_rate": 1.2842304060434374e-05, + "loss": 1.1378, "step": 680 }, { - "epoch": 0.019324631101021568, + "epoch": 0.019297798180736205, "grad_norm": 0.0, - "learning_rate": 1.287334593572779e-05, - "loss": 1.072, + "learning_rate": 1.2861189801699718e-05, + "loss": 1.137, "step": 681 }, { - "epoch": 0.01935300794551646, + "epoch": 0.019326135622998668, "grad_norm": 0.0, - "learning_rate": 1.289224952741021e-05, - "loss": 1.0455, + "learning_rate": 1.2880075542965062e-05, + "loss": 1.191, "step": 682 }, { - "epoch": 0.01938138479001135, + "epoch": 0.01935447306526113, "grad_norm": 0.0, - "learning_rate": 1.2911153119092628e-05, - "loss": 1.1078, + "learning_rate": 1.2898961284230408e-05, + "loss": 1.2636, "step": 683 }, { - "epoch": 0.019409761634506244, + "epoch": 0.01938281050752359, "grad_norm": 0.0, - "learning_rate": 1.2930056710775048e-05, - "loss": 0.9597, + "learning_rate": 1.2917847025495751e-05, + "loss": 1.0973, "step": 684 }, { - "epoch": 0.019438138479001135, + "epoch": 0.019411147949786052, "grad_norm": 0.0, - "learning_rate": 1.2948960302457467e-05, - "loss": 0.9636, + "learning_rate": 1.2936732766761095e-05, + "loss": 1.1726, "step": 685 }, { - "epoch": 0.01946651532349603, + "epoch": 0.019439485392048515, "grad_norm": 0.0, - "learning_rate": 1.2967863894139887e-05, - "loss": 1.0811, + "learning_rate": 1.2955618508026443e-05, + "loss": 1.2561, "step": 686 }, { - "epoch": 0.01949489216799092, + "epoch": 0.019467822834310974, "grad_norm": 0.0, - "learning_rate": 1.2986767485822307e-05, - "loss": 1.0591, + "learning_rate": 1.2974504249291786e-05, + "loss": 1.0698, "step": 687 }, { - "epoch": 0.01952326901248581, + "epoch": 0.019496160276573436, "grad_norm": 0.0, - "learning_rate": 1.3005671077504726e-05, - "loss": 1.0716, + "learning_rate": 1.299338999055713e-05, + "loss": 1.1329, "step": 688 }, { - "epoch": 0.019551645856980705, + "epoch": 0.0195244977188359, "grad_norm": 0.0, - "learning_rate": 1.3024574669187146e-05, - "loss": 0.9895, + "learning_rate": 1.3012275731822474e-05, + "loss": 1.2476, "step": 689 }, { - "epoch": 0.019580022701475595, + "epoch": 0.019552835161098358, "grad_norm": 0.0, - "learning_rate": 1.3043478260869566e-05, - "loss": 1.0777, + "learning_rate": 1.303116147308782e-05, + "loss": 1.1007, "step": 690 }, { - "epoch": 0.01960839954597049, + "epoch": 0.01958117260336082, "grad_norm": 0.0, - "learning_rate": 1.3062381852551987e-05, - "loss": 1.1207, + "learning_rate": 1.3050047214353165e-05, + "loss": 1.1335, "step": 691 }, { - "epoch": 0.01963677639046538, + "epoch": 0.019609510045623283, "grad_norm": 0.0, - "learning_rate": 1.3081285444234407e-05, - "loss": 1.0518, + "learning_rate": 1.3068932955618509e-05, + "loss": 1.1176, "step": 692 }, { - "epoch": 0.01966515323496027, + "epoch": 0.019637847487885742, "grad_norm": 0.0, - "learning_rate": 1.3100189035916826e-05, - "loss": 1.1628, + "learning_rate": 1.3087818696883855e-05, + "loss": 1.1359, "step": 693 }, { - "epoch": 0.019693530079455165, + "epoch": 0.019666184930148205, "grad_norm": 0.0, - "learning_rate": 1.3119092627599246e-05, - "loss": 0.9706, + "learning_rate": 1.3106704438149199e-05, + "loss": 0.993, "step": 694 }, { - "epoch": 0.019721906923950056, + "epoch": 0.019694522372410667, "grad_norm": 0.0, - "learning_rate": 1.3137996219281666e-05, - "loss": 1.1305, + "learning_rate": 1.3125590179414542e-05, + "loss": 1.0792, "step": 695 }, { - "epoch": 0.01975028376844495, + "epoch": 0.019722859814673126, "grad_norm": 0.0, - "learning_rate": 1.3156899810964084e-05, - "loss": 1.1601, + "learning_rate": 1.314447592067989e-05, + "loss": 1.2115, "step": 696 }, { - "epoch": 0.01977866061293984, + "epoch": 0.01975119725693559, "grad_norm": 0.0, - "learning_rate": 1.3175803402646503e-05, - "loss": 1.0982, + "learning_rate": 1.3163361661945233e-05, + "loss": 1.1992, "step": 697 }, { - "epoch": 0.019807037457434732, + "epoch": 0.01977953469919805, "grad_norm": 0.0, - "learning_rate": 1.3194706994328923e-05, - "loss": 1.1296, + "learning_rate": 1.3182247403210577e-05, + "loss": 1.2098, "step": 698 }, { - "epoch": 0.019835414301929626, + "epoch": 0.01980787214146051, "grad_norm": 0.0, - "learning_rate": 1.3213610586011343e-05, - "loss": 1.0998, + "learning_rate": 1.3201133144475921e-05, + "loss": 1.1838, "step": 699 }, { - "epoch": 0.019863791146424517, + "epoch": 0.019836209583722973, "grad_norm": 0.0, - "learning_rate": 1.3232514177693762e-05, - "loss": 1.0375, + "learning_rate": 1.3220018885741265e-05, + "loss": 1.1792, "step": 700 }, { - "epoch": 0.01989216799091941, + "epoch": 0.019864547025985436, "grad_norm": 0.0, - "learning_rate": 1.3251417769376182e-05, - "loss": 1.0855, + "learning_rate": 1.323890462700661e-05, + "loss": 1.2637, "step": 701 }, { - "epoch": 0.019920544835414302, + "epoch": 0.019892884468247895, "grad_norm": 0.0, - "learning_rate": 1.3270321361058602e-05, - "loss": 1.0593, + "learning_rate": 1.3257790368271956e-05, + "loss": 1.1417, "step": 702 }, { - "epoch": 0.019948921679909193, + "epoch": 0.019921221910510357, "grad_norm": 0.0, - "learning_rate": 1.3289224952741023e-05, - "loss": 1.1246, + "learning_rate": 1.3276676109537302e-05, + "loss": 1.3282, "step": 703 }, { - "epoch": 0.019977298524404087, + "epoch": 0.01994955935277282, "grad_norm": 0.0, - "learning_rate": 1.3308128544423443e-05, - "loss": 1.1074, + "learning_rate": 1.3295561850802646e-05, + "loss": 1.1534, "step": 704 }, { - "epoch": 0.020005675368898978, + "epoch": 0.01997789679503528, "grad_norm": 0.0, - "learning_rate": 1.3327032136105862e-05, - "loss": 1.1425, + "learning_rate": 1.331444759206799e-05, + "loss": 1.2514, "step": 705 }, { - "epoch": 0.020034052213393872, + "epoch": 0.02000623423729774, "grad_norm": 0.0, - "learning_rate": 1.3345935727788282e-05, - "loss": 1.0214, + "learning_rate": 1.3333333333333333e-05, + "loss": 1.1548, "step": 706 }, { - "epoch": 0.020062429057888763, + "epoch": 0.020034571679560204, "grad_norm": 0.0, - "learning_rate": 1.3364839319470702e-05, - "loss": 0.9668, + "learning_rate": 1.335221907459868e-05, + "loss": 1.1824, "step": 707 }, { - "epoch": 0.020090805902383654, + "epoch": 0.020062909121822663, "grad_norm": 0.0, - "learning_rate": 1.338374291115312e-05, - "loss": 1.0623, + "learning_rate": 1.3371104815864024e-05, + "loss": 1.1727, "step": 708 }, { - "epoch": 0.020119182746878548, + "epoch": 0.020091246564085126, "grad_norm": 0.0, - "learning_rate": 1.340264650283554e-05, - "loss": 1.1541, + "learning_rate": 1.3389990557129368e-05, + "loss": 1.1498, "step": 709 }, { - "epoch": 0.02014755959137344, + "epoch": 0.02011958400634759, "grad_norm": 0.0, - "learning_rate": 1.3421550094517959e-05, - "loss": 1.0668, + "learning_rate": 1.3408876298394712e-05, + "loss": 1.2328, "step": 710 }, { - "epoch": 0.020175936435868333, + "epoch": 0.020147921448610048, "grad_norm": 0.0, - "learning_rate": 1.3440453686200379e-05, - "loss": 1.133, + "learning_rate": 1.3427762039660058e-05, + "loss": 1.1937, "step": 711 }, { - "epoch": 0.020204313280363224, + "epoch": 0.02017625889087251, "grad_norm": 0.0, - "learning_rate": 1.3459357277882798e-05, - "loss": 1.0334, + "learning_rate": 1.3446647780925402e-05, + "loss": 1.0904, "step": 712 }, { - "epoch": 0.020232690124858115, + "epoch": 0.020204596333134973, "grad_norm": 0.0, - "learning_rate": 1.3478260869565218e-05, - "loss": 1.1704, + "learning_rate": 1.3465533522190749e-05, + "loss": 1.0846, "step": 713 }, { - "epoch": 0.02026106696935301, + "epoch": 0.020232933775397432, "grad_norm": 0.0, - "learning_rate": 1.3497164461247638e-05, - "loss": 1.0205, + "learning_rate": 1.3484419263456093e-05, + "loss": 1.0255, "step": 714 }, { - "epoch": 0.0202894438138479, + "epoch": 0.020261271217659894, "grad_norm": 0.0, - "learning_rate": 1.3516068052930059e-05, - "loss": 1.1558, + "learning_rate": 1.3503305004721436e-05, + "loss": 1.1442, "step": 715 }, { - "epoch": 0.020317820658342794, + "epoch": 0.020289608659922357, "grad_norm": 0.0, - "learning_rate": 1.3534971644612479e-05, - "loss": 1.1239, + "learning_rate": 1.352219074598678e-05, + "loss": 1.1349, "step": 716 }, { - "epoch": 0.020346197502837685, + "epoch": 0.020317946102184816, "grad_norm": 0.0, - "learning_rate": 1.3553875236294898e-05, - "loss": 1.0981, + "learning_rate": 1.3541076487252124e-05, + "loss": 1.1148, "step": 717 }, { - "epoch": 0.020374574347332575, + "epoch": 0.02034628354444728, "grad_norm": 0.0, - "learning_rate": 1.3572778827977318e-05, - "loss": 1.1115, + "learning_rate": 1.3559962228517471e-05, + "loss": 1.1137, "step": 718 }, { - "epoch": 0.02040295119182747, + "epoch": 0.02037462098670974, "grad_norm": 0.0, - "learning_rate": 1.3591682419659738e-05, - "loss": 1.102, + "learning_rate": 1.3578847969782815e-05, + "loss": 1.1217, "step": 719 }, { - "epoch": 0.02043132803632236, + "epoch": 0.0204029584289722, "grad_norm": 0.0, - "learning_rate": 1.3610586011342156e-05, - "loss": 1.0573, + "learning_rate": 1.359773371104816e-05, + "loss": 1.2358, "step": 720 }, { - "epoch": 0.020459704880817255, + "epoch": 0.020431295871234663, "grad_norm": 0.0, - "learning_rate": 1.3629489603024575e-05, - "loss": 1.1352, + "learning_rate": 1.3616619452313505e-05, + "loss": 1.2145, "step": 721 }, { - "epoch": 0.020488081725312145, + "epoch": 0.020459633313497125, "grad_norm": 0.0, - "learning_rate": 1.3648393194706995e-05, - "loss": 1.0493, + "learning_rate": 1.3635505193578849e-05, + "loss": 1.2072, "step": 722 }, { - "epoch": 0.020516458569807036, + "epoch": 0.020487970755759585, "grad_norm": 0.0, - "learning_rate": 1.3667296786389414e-05, - "loss": 1.1145, + "learning_rate": 1.3654390934844192e-05, + "loss": 1.2495, "step": 723 }, { - "epoch": 0.02054483541430193, + "epoch": 0.020516308198022047, "grad_norm": 0.0, - "learning_rate": 1.3686200378071834e-05, - "loss": 0.9449, + "learning_rate": 1.367327667610954e-05, + "loss": 1.1351, "step": 724 }, { - "epoch": 0.02057321225879682, + "epoch": 0.020544645640284506, "grad_norm": 0.0, - "learning_rate": 1.3705103969754254e-05, - "loss": 1.0547, + "learning_rate": 1.3692162417374884e-05, + "loss": 1.123, "step": 725 }, { - "epoch": 0.020601589103291715, + "epoch": 0.02057298308254697, "grad_norm": 0.0, - "learning_rate": 1.3724007561436673e-05, - "loss": 1.1419, + "learning_rate": 1.3711048158640227e-05, + "loss": 1.1348, "step": 726 }, { - "epoch": 0.020629965947786606, + "epoch": 0.02060132052480943, "grad_norm": 0.0, - "learning_rate": 1.3742911153119093e-05, - "loss": 1.104, + "learning_rate": 1.3729933899905571e-05, + "loss": 1.2349, "step": 727 }, { - "epoch": 0.020658342792281497, + "epoch": 0.02062965796707189, "grad_norm": 0.0, - "learning_rate": 1.3761814744801514e-05, - "loss": 1.0639, + "learning_rate": 1.3748819641170917e-05, + "loss": 1.1243, "step": 728 }, { - "epoch": 0.02068671963677639, + "epoch": 0.020657995409334353, "grad_norm": 0.0, - "learning_rate": 1.3780718336483934e-05, - "loss": 1.1784, + "learning_rate": 1.3767705382436262e-05, + "loss": 1.2044, "step": 729 }, { - "epoch": 0.020715096481271282, + "epoch": 0.020686332851596816, "grad_norm": 0.0, - "learning_rate": 1.3799621928166354e-05, - "loss": 1.1049, + "learning_rate": 1.3786591123701606e-05, + "loss": 1.1366, "step": 730 }, { - "epoch": 0.020743473325766176, + "epoch": 0.020714670293859275, "grad_norm": 0.0, - "learning_rate": 1.3818525519848773e-05, - "loss": 1.1318, + "learning_rate": 1.3805476864966952e-05, + "loss": 1.2708, "step": 731 }, { - "epoch": 0.020771850170261067, + "epoch": 0.020743007736121737, "grad_norm": 0.0, - "learning_rate": 1.3837429111531191e-05, - "loss": 1.0363, + "learning_rate": 1.3824362606232296e-05, + "loss": 1.1847, "step": 732 }, { - "epoch": 0.020800227014755958, + "epoch": 0.0207713451783842, "grad_norm": 0.0, - "learning_rate": 1.3856332703213611e-05, - "loss": 1.0326, + "learning_rate": 1.384324834749764e-05, + "loss": 1.1143, "step": 733 }, { - "epoch": 0.020828603859250852, + "epoch": 0.02079968262064666, "grad_norm": 0.0, - "learning_rate": 1.387523629489603e-05, - "loss": 1.2156, + "learning_rate": 1.3862134088762983e-05, + "loss": 1.0666, "step": 734 }, { - "epoch": 0.020856980703745743, + "epoch": 0.02082802006290912, "grad_norm": 0.0, - "learning_rate": 1.389413988657845e-05, - "loss": 1.0814, + "learning_rate": 1.388101983002833e-05, + "loss": 1.2192, "step": 735 }, { - "epoch": 0.020885357548240637, + "epoch": 0.020856357505171584, "grad_norm": 0.0, - "learning_rate": 1.391304347826087e-05, - "loss": 1.0659, + "learning_rate": 1.3899905571293674e-05, + "loss": 1.1173, "step": 736 }, { - "epoch": 0.020913734392735528, + "epoch": 0.020884694947434043, "grad_norm": 0.0, - "learning_rate": 1.393194706994329e-05, - "loss": 0.9708, + "learning_rate": 1.3918791312559018e-05, + "loss": 1.1929, "step": 737 }, { - "epoch": 0.02094211123723042, + "epoch": 0.020913032389696506, "grad_norm": 0.0, - "learning_rate": 1.395085066162571e-05, - "loss": 1.0581, + "learning_rate": 1.3937677053824364e-05, + "loss": 1.1838, "step": 738 }, { - "epoch": 0.020970488081725313, + "epoch": 0.02094136983195897, "grad_norm": 0.0, - "learning_rate": 1.3969754253308129e-05, - "loss": 1.0936, + "learning_rate": 1.3956562795089708e-05, + "loss": 1.1719, "step": 739 }, { - "epoch": 0.020998864926220204, + "epoch": 0.020969707274221427, "grad_norm": 0.0, - "learning_rate": 1.398865784499055e-05, - "loss": 1.0244, + "learning_rate": 1.3975448536355053e-05, + "loss": 1.1893, "step": 740 }, { - "epoch": 0.021027241770715098, + "epoch": 0.02099804471648389, "grad_norm": 0.0, - "learning_rate": 1.400756143667297e-05, - "loss": 1.1244, + "learning_rate": 1.3994334277620399e-05, + "loss": 1.1397, "step": 741 }, { - "epoch": 0.02105561861520999, + "epoch": 0.021026382158746353, "grad_norm": 0.0, - "learning_rate": 1.402646502835539e-05, - "loss": 1.1279, + "learning_rate": 1.4013220018885743e-05, + "loss": 1.0652, "step": 742 }, { - "epoch": 0.02108399545970488, + "epoch": 0.02105471960100881, "grad_norm": 0.0, - "learning_rate": 1.404536862003781e-05, - "loss": 1.0732, + "learning_rate": 1.4032105760151087e-05, + "loss": 1.1364, "step": 743 }, { - "epoch": 0.021112372304199774, + "epoch": 0.021083057043271274, "grad_norm": 0.0, - "learning_rate": 1.4064272211720229e-05, - "loss": 1.1562, + "learning_rate": 1.405099150141643e-05, + "loss": 1.0389, "step": 744 }, { - "epoch": 0.021140749148694665, + "epoch": 0.021111394485533737, "grad_norm": 0.0, - "learning_rate": 1.4083175803402647e-05, - "loss": 1.0708, + "learning_rate": 1.4069877242681776e-05, + "loss": 1.1844, "step": 745 }, { - "epoch": 0.02116912599318956, + "epoch": 0.021139731927796196, "grad_norm": 0.0, - "learning_rate": 1.4102079395085067e-05, - "loss": 1.0504, + "learning_rate": 1.4088762983947122e-05, + "loss": 1.16, "step": 746 }, { - "epoch": 0.02119750283768445, + "epoch": 0.02116806937005866, "grad_norm": 0.0, - "learning_rate": 1.4120982986767486e-05, - "loss": 1.059, + "learning_rate": 1.4107648725212465e-05, + "loss": 1.2575, "step": 747 }, { - "epoch": 0.02122587968217934, + "epoch": 0.02119640681232112, "grad_norm": 0.0, - "learning_rate": 1.4139886578449906e-05, - "loss": 1.0971, + "learning_rate": 1.4126534466477811e-05, + "loss": 1.2132, "step": 748 }, { - "epoch": 0.021254256526674235, + "epoch": 0.02122474425458358, "grad_norm": 0.0, - "learning_rate": 1.4158790170132326e-05, - "loss": 1.094, + "learning_rate": 1.4145420207743155e-05, + "loss": 1.2802, "step": 749 }, { - "epoch": 0.021282633371169125, + "epoch": 0.021253081696846043, "grad_norm": 0.0, - "learning_rate": 1.4177693761814745e-05, - "loss": 1.0623, + "learning_rate": 1.4164305949008499e-05, + "loss": 1.1954, "step": 750 }, { - "epoch": 0.02131101021566402, + "epoch": 0.021281419139108505, "grad_norm": 0.0, - "learning_rate": 1.4196597353497165e-05, - "loss": 1.1278, + "learning_rate": 1.4183191690273846e-05, + "loss": 1.137, "step": 751 }, { - "epoch": 0.02133938706015891, + "epoch": 0.021309756581370964, "grad_norm": 0.0, - "learning_rate": 1.4215500945179586e-05, - "loss": 0.9839, + "learning_rate": 1.420207743153919e-05, + "loss": 1.141, "step": 752 }, { - "epoch": 0.0213677639046538, + "epoch": 0.021338094023633427, "grad_norm": 0.0, - "learning_rate": 1.4234404536862006e-05, - "loss": 1.1566, + "learning_rate": 1.4220963172804534e-05, + "loss": 1.1968, "step": 753 }, { - "epoch": 0.021396140749148695, + "epoch": 0.02136643146589589, "grad_norm": 0.0, - "learning_rate": 1.4253308128544426e-05, - "loss": 1.2277, + "learning_rate": 1.4239848914069877e-05, + "loss": 1.2523, "step": 754 }, { - "epoch": 0.021424517593643586, + "epoch": 0.02139476890815835, "grad_norm": 0.0, - "learning_rate": 1.4272211720226845e-05, - "loss": 1.0525, + "learning_rate": 1.4258734655335223e-05, + "loss": 1.139, "step": 755 }, { - "epoch": 0.02145289443813848, + "epoch": 0.02142310635042081, "grad_norm": 0.0, - "learning_rate": 1.4291115311909265e-05, - "loss": 0.9971, + "learning_rate": 1.4277620396600567e-05, + "loss": 1.119, "step": 756 }, { - "epoch": 0.02148127128263337, + "epoch": 0.021451443792683274, "grad_norm": 0.0, - "learning_rate": 1.4310018903591683e-05, - "loss": 1.0791, + "learning_rate": 1.4296506137865912e-05, + "loss": 1.0361, "step": 757 }, { - "epoch": 0.021509648127128262, + "epoch": 0.021479781234945733, "grad_norm": 0.0, - "learning_rate": 1.4328922495274103e-05, - "loss": 1.0686, + "learning_rate": 1.4315391879131258e-05, + "loss": 1.0984, "step": 758 }, { - "epoch": 0.021538024971623156, + "epoch": 0.021508118677208195, "grad_norm": 0.0, - "learning_rate": 1.4347826086956522e-05, - "loss": 1.0901, + "learning_rate": 1.4334277620396602e-05, + "loss": 1.1778, "step": 759 }, { - "epoch": 0.021566401816118047, + "epoch": 0.021536456119470658, "grad_norm": 0.0, - "learning_rate": 1.4366729678638942e-05, - "loss": 1.0773, + "learning_rate": 1.4353163361661946e-05, + "loss": 1.1036, "step": 760 }, { - "epoch": 0.02159477866061294, + "epoch": 0.021564793561733117, "grad_norm": 0.0, - "learning_rate": 1.4385633270321361e-05, - "loss": 1.1132, + "learning_rate": 1.437204910292729e-05, + "loss": 1.0767, "step": 761 }, { - "epoch": 0.021623155505107832, + "epoch": 0.02159313100399558, "grad_norm": 0.0, - "learning_rate": 1.4404536862003781e-05, - "loss": 1.0583, + "learning_rate": 1.4390934844192637e-05, + "loss": 1.1501, "step": 762 }, { - "epoch": 0.021651532349602723, + "epoch": 0.021621468446258042, "grad_norm": 0.0, - "learning_rate": 1.44234404536862e-05, - "loss": 1.2404, + "learning_rate": 1.440982058545798e-05, + "loss": 1.1225, "step": 763 }, { - "epoch": 0.021679909194097617, + "epoch": 0.0216498058885205, "grad_norm": 0.0, - "learning_rate": 1.4442344045368622e-05, - "loss": 1.0255, + "learning_rate": 1.4428706326723325e-05, + "loss": 1.1545, "step": 764 }, { - "epoch": 0.021708286038592508, + "epoch": 0.021678143330782964, "grad_norm": 0.0, - "learning_rate": 1.4461247637051042e-05, - "loss": 1.065, + "learning_rate": 1.444759206798867e-05, + "loss": 1.0387, "step": 765 }, { - "epoch": 0.021736662883087402, + "epoch": 0.021706480773045427, "grad_norm": 0.0, - "learning_rate": 1.4480151228733461e-05, - "loss": 1.1645, + "learning_rate": 1.4466477809254014e-05, + "loss": 1.1342, "step": 766 }, { - "epoch": 0.021765039727582293, + "epoch": 0.021734818215307886, "grad_norm": 0.0, - "learning_rate": 1.4499054820415881e-05, - "loss": 1.0922, + "learning_rate": 1.448536355051936e-05, + "loss": 1.2411, "step": 767 }, { - "epoch": 0.021793416572077184, + "epoch": 0.021763155657570348, "grad_norm": 0.0, - "learning_rate": 1.45179584120983e-05, - "loss": 1.1274, + "learning_rate": 1.4504249291784705e-05, + "loss": 1.2343, "step": 768 }, { - "epoch": 0.021821793416572078, + "epoch": 0.02179149309983281, "grad_norm": 0.0, - "learning_rate": 1.4536862003780719e-05, - "loss": 1.0566, + "learning_rate": 1.4523135033050049e-05, + "loss": 1.0298, "step": 769 }, { - "epoch": 0.02185017026106697, + "epoch": 0.02181983054209527, "grad_norm": 0.0, - "learning_rate": 1.4555765595463138e-05, - "loss": 1.0489, + "learning_rate": 1.4542020774315393e-05, + "loss": 1.1876, "step": 770 }, { - "epoch": 0.021878547105561863, + "epoch": 0.021848167984357732, "grad_norm": 0.0, - "learning_rate": 1.4574669187145558e-05, - "loss": 1.1254, + "learning_rate": 1.4560906515580737e-05, + "loss": 1.257, "step": 771 }, { - "epoch": 0.021906923950056754, + "epoch": 0.021876505426620195, "grad_norm": 0.0, - "learning_rate": 1.4593572778827978e-05, - "loss": 1.0549, + "learning_rate": 1.4579792256846082e-05, + "loss": 1.1286, "step": 772 }, { - "epoch": 0.021935300794551645, + "epoch": 0.021904842868882654, "grad_norm": 0.0, - "learning_rate": 1.4612476370510397e-05, - "loss": 1.0265, + "learning_rate": 1.4598677998111428e-05, + "loss": 1.2214, "step": 773 }, { - "epoch": 0.02196367763904654, + "epoch": 0.021933180311145117, "grad_norm": 0.0, - "learning_rate": 1.4631379962192817e-05, - "loss": 1.0092, + "learning_rate": 1.4617563739376772e-05, + "loss": 1.2448, "step": 774 }, { - "epoch": 0.02199205448354143, + "epoch": 0.021961517753407576, "grad_norm": 0.0, - "learning_rate": 1.4650283553875237e-05, - "loss": 1.1013, + "learning_rate": 1.4636449480642117e-05, + "loss": 1.2062, "step": 775 }, { - "epoch": 0.022020431328036324, + "epoch": 0.02198985519567004, "grad_norm": 0.0, - "learning_rate": 1.4669187145557658e-05, - "loss": 1.0743, + "learning_rate": 1.4655335221907461e-05, + "loss": 1.1511, "step": 776 }, { - "epoch": 0.022048808172531215, + "epoch": 0.0220181926379325, "grad_norm": 0.0, - "learning_rate": 1.4688090737240078e-05, - "loss": 1.0157, + "learning_rate": 1.4674220963172805e-05, + "loss": 1.2255, "step": 777 }, { - "epoch": 0.022077185017026105, + "epoch": 0.02204653008019496, "grad_norm": 0.0, - "learning_rate": 1.4706994328922497e-05, - "loss": 1.1353, + "learning_rate": 1.4693106704438152e-05, + "loss": 1.186, "step": 778 }, { - "epoch": 0.022105561861521, + "epoch": 0.022074867522457423, "grad_norm": 0.0, - "learning_rate": 1.4725897920604917e-05, - "loss": 1.12, + "learning_rate": 1.4711992445703496e-05, + "loss": 1.1645, "step": 779 }, { - "epoch": 0.02213393870601589, + "epoch": 0.022103204964719885, "grad_norm": 0.0, - "learning_rate": 1.4744801512287337e-05, - "loss": 1.2262, + "learning_rate": 1.473087818696884e-05, + "loss": 1.1131, "step": 780 }, { - "epoch": 0.022162315550510785, + "epoch": 0.022131542406982344, "grad_norm": 0.0, - "learning_rate": 1.4763705103969756e-05, - "loss": 1.1503, + "learning_rate": 1.4749763928234184e-05, + "loss": 0.9958, "step": 781 }, { - "epoch": 0.022190692395005675, + "epoch": 0.022159879849244807, "grad_norm": 0.0, - "learning_rate": 1.4782608695652174e-05, - "loss": 1.2505, + "learning_rate": 1.476864966949953e-05, + "loss": 1.1723, "step": 782 }, { - "epoch": 0.022219069239500566, + "epoch": 0.02218821729150727, "grad_norm": 0.0, - "learning_rate": 1.4801512287334594e-05, - "loss": 1.0793, + "learning_rate": 1.4787535410764873e-05, + "loss": 1.0923, "step": 783 }, { - "epoch": 0.02224744608399546, + "epoch": 0.02221655473376973, "grad_norm": 0.0, - "learning_rate": 1.4820415879017014e-05, - "loss": 1.1154, + "learning_rate": 1.4806421152030219e-05, + "loss": 1.1617, "step": 784 }, { - "epoch": 0.02227582292849035, + "epoch": 0.02224489217603219, "grad_norm": 0.0, - "learning_rate": 1.4839319470699433e-05, - "loss": 1.0829, + "learning_rate": 1.4825306893295564e-05, + "loss": 1.193, "step": 785 }, { - "epoch": 0.022304199772985246, + "epoch": 0.022273229618294654, "grad_norm": 0.0, - "learning_rate": 1.4858223062381853e-05, - "loss": 1.0953, + "learning_rate": 1.4844192634560908e-05, + "loss": 1.136, "step": 786 }, { - "epoch": 0.022332576617480136, + "epoch": 0.022301567060557113, "grad_norm": 0.0, - "learning_rate": 1.4877126654064273e-05, - "loss": 1.1858, + "learning_rate": 1.4863078375826252e-05, + "loss": 1.1424, "step": 787 }, { - "epoch": 0.022360953461975027, + "epoch": 0.022329904502819575, "grad_norm": 0.0, - "learning_rate": 1.4896030245746694e-05, - "loss": 1.084, + "learning_rate": 1.4881964117091596e-05, + "loss": 1.1222, "step": 788 }, { - "epoch": 0.02238933030646992, + "epoch": 0.022358241945082038, "grad_norm": 0.0, - "learning_rate": 1.4914933837429114e-05, - "loss": 1.1877, + "learning_rate": 1.4900849858356943e-05, + "loss": 1.0813, "step": 789 }, { - "epoch": 0.022417707150964812, + "epoch": 0.022386579387344497, "grad_norm": 0.0, - "learning_rate": 1.4933837429111533e-05, - "loss": 1.1587, + "learning_rate": 1.4919735599622287e-05, + "loss": 1.1384, "step": 790 }, { - "epoch": 0.022446083995459706, + "epoch": 0.02241491682960696, "grad_norm": 0.0, - "learning_rate": 1.4952741020793953e-05, - "loss": 0.9821, + "learning_rate": 1.493862134088763e-05, + "loss": 1.1887, "step": 791 }, { - "epoch": 0.022474460839954597, + "epoch": 0.022443254271869422, "grad_norm": 0.0, - "learning_rate": 1.4971644612476373e-05, - "loss": 1.1376, + "learning_rate": 1.4957507082152976e-05, + "loss": 1.2347, "step": 792 }, { - "epoch": 0.022502837684449488, + "epoch": 0.02247159171413188, "grad_norm": 0.0, - "learning_rate": 1.4990548204158792e-05, - "loss": 1.0818, + "learning_rate": 1.497639282341832e-05, + "loss": 1.1316, "step": 793 }, { - "epoch": 0.022531214528944382, + "epoch": 0.022499929156394344, "grad_norm": 0.0, - "learning_rate": 1.500945179584121e-05, - "loss": 1.0822, + "learning_rate": 1.4995278564683664e-05, + "loss": 1.3047, "step": 794 }, { - "epoch": 0.022559591373439273, + "epoch": 0.022528266598656806, "grad_norm": 0.0, - "learning_rate": 1.502835538752363e-05, - "loss": 1.1562, + "learning_rate": 1.5014164305949011e-05, + "loss": 1.1646, "step": 795 }, { - "epoch": 0.022587968217934167, + "epoch": 0.022556604040919265, "grad_norm": 0.0, - "learning_rate": 1.504725897920605e-05, - "loss": 1.1499, + "learning_rate": 1.5033050047214355e-05, + "loss": 1.2541, "step": 796 }, { - "epoch": 0.022616345062429058, + "epoch": 0.022584941483181728, "grad_norm": 0.0, - "learning_rate": 1.506616257088847e-05, - "loss": 1.1045, + "learning_rate": 1.5051935788479699e-05, + "loss": 1.1222, "step": 797 }, { - "epoch": 0.02264472190692395, + "epoch": 0.02261327892544419, "grad_norm": 0.0, - "learning_rate": 1.5085066162570889e-05, - "loss": 1.0016, + "learning_rate": 1.5070821529745043e-05, + "loss": 1.2504, "step": 798 }, { - "epoch": 0.022673098751418843, + "epoch": 0.02264161636770665, "grad_norm": 0.0, - "learning_rate": 1.5103969754253308e-05, - "loss": 1.0579, + "learning_rate": 1.5089707271010388e-05, + "loss": 1.2826, "step": 799 }, { - "epoch": 0.022701475595913734, + "epoch": 0.022669953809969112, "grad_norm": 0.0, - "learning_rate": 1.512287334593573e-05, - "loss": 1.1352, + "learning_rate": 1.5108593012275734e-05, + "loss": 1.2552, "step": 800 }, { - "epoch": 0.022729852440408628, + "epoch": 0.022698291252231575, "grad_norm": 0.0, - "learning_rate": 1.514177693761815e-05, - "loss": 1.1414, + "learning_rate": 1.5127478753541078e-05, + "loss": 1.1766, "step": 801 }, { - "epoch": 0.02275822928490352, + "epoch": 0.022726628694494034, "grad_norm": 0.0, - "learning_rate": 1.516068052930057e-05, - "loss": 1.1441, + "learning_rate": 1.5146364494806423e-05, + "loss": 1.2169, "step": 802 }, { - "epoch": 0.02278660612939841, + "epoch": 0.022754966136756496, "grad_norm": 0.0, - "learning_rate": 1.5179584120982989e-05, - "loss": 1.1287, + "learning_rate": 1.5165250236071767e-05, + "loss": 1.0581, "step": 803 }, { - "epoch": 0.022814982973893304, + "epoch": 0.02278330357901896, "grad_norm": 0.0, - "learning_rate": 1.5198487712665408e-05, - "loss": 1.0449, + "learning_rate": 1.5184135977337111e-05, + "loss": 1.1857, "step": 804 }, { - "epoch": 0.022843359818388195, + "epoch": 0.022811641021281418, "grad_norm": 0.0, - "learning_rate": 1.5217391304347828e-05, - "loss": 1.1455, + "learning_rate": 1.5203021718602455e-05, + "loss": 1.2317, "step": 805 }, { - "epoch": 0.02287173666288309, + "epoch": 0.02283997846354388, "grad_norm": 0.0, - "learning_rate": 1.5236294896030246e-05, - "loss": 1.1479, + "learning_rate": 1.5221907459867802e-05, + "loss": 1.2722, "step": 806 }, { - "epoch": 0.02290011350737798, + "epoch": 0.022868315905806343, "grad_norm": 0.0, - "learning_rate": 1.5255198487712666e-05, - "loss": 1.0531, + "learning_rate": 1.5240793201133146e-05, + "loss": 1.1079, "step": 807 }, { - "epoch": 0.02292849035187287, + "epoch": 0.022896653348068802, "grad_norm": 0.0, - "learning_rate": 1.5274102079395087e-05, - "loss": 1.025, + "learning_rate": 1.525967894239849e-05, + "loss": 1.2094, "step": 808 }, { - "epoch": 0.022956867196367765, + "epoch": 0.022924990790331265, "grad_norm": 0.0, - "learning_rate": 1.5293005671077507e-05, - "loss": 1.1536, + "learning_rate": 1.5278564683663834e-05, + "loss": 1.1315, "step": 809 }, { - "epoch": 0.022985244040862655, + "epoch": 0.022953328232593728, "grad_norm": 0.0, - "learning_rate": 1.5311909262759926e-05, - "loss": 1.1478, + "learning_rate": 1.529745042492918e-05, + "loss": 1.1677, "step": 810 }, { - "epoch": 0.02301362088535755, + "epoch": 0.022981665674856187, "grad_norm": 0.0, - "learning_rate": 1.5330812854442346e-05, - "loss": 1.0617, + "learning_rate": 1.5316336166194525e-05, + "loss": 1.2019, "step": 811 }, { - "epoch": 0.02304199772985244, + "epoch": 0.02301000311711865, "grad_norm": 0.0, - "learning_rate": 1.5349716446124766e-05, - "loss": 1.0875, + "learning_rate": 1.533522190745987e-05, + "loss": 1.2004, "step": 812 }, { - "epoch": 0.02307037457434733, + "epoch": 0.023038340559381112, "grad_norm": 0.0, - "learning_rate": 1.5368620037807185e-05, - "loss": 0.9966, + "learning_rate": 1.5354107648725213e-05, + "loss": 1.1124, "step": 813 }, { - "epoch": 0.023098751418842225, + "epoch": 0.02306667800164357, "grad_norm": 0.0, - "learning_rate": 1.5387523629489605e-05, - "loss": 1.0558, + "learning_rate": 1.5372993389990558e-05, + "loss": 1.2642, "step": 814 }, { - "epoch": 0.023127128263337116, + "epoch": 0.023095015443906033, "grad_norm": 0.0, - "learning_rate": 1.5406427221172025e-05, - "loss": 1.1823, + "learning_rate": 1.5391879131255904e-05, + "loss": 1.1183, "step": 815 }, { - "epoch": 0.02315550510783201, + "epoch": 0.023123352886168496, "grad_norm": 0.0, - "learning_rate": 1.5425330812854444e-05, - "loss": 1.1734, + "learning_rate": 1.5410764872521246e-05, + "loss": 1.025, "step": 816 }, { - "epoch": 0.0231838819523269, + "epoch": 0.023151690328430955, "grad_norm": 0.0, - "learning_rate": 1.5444234404536864e-05, - "loss": 1.0455, + "learning_rate": 1.5429650613786595e-05, + "loss": 1.1365, "step": 817 }, { - "epoch": 0.023212258796821792, + "epoch": 0.023180027770693418, "grad_norm": 0.0, - "learning_rate": 1.5463137996219284e-05, - "loss": 1.0964, + "learning_rate": 1.5448536355051937e-05, + "loss": 1.1144, "step": 818 }, { - "epoch": 0.023240635641316686, + "epoch": 0.02320836521295588, "grad_norm": 0.0, - "learning_rate": 1.54820415879017e-05, - "loss": 1.06, + "learning_rate": 1.5467422096317282e-05, + "loss": 1.0889, "step": 819 }, { - "epoch": 0.023269012485811577, + "epoch": 0.02323670265521834, "grad_norm": 0.0, - "learning_rate": 1.5500945179584123e-05, - "loss": 1.1407, + "learning_rate": 1.5486307837582625e-05, + "loss": 1.1187, "step": 820 }, { - "epoch": 0.02329738933030647, + "epoch": 0.023265040097480802, "grad_norm": 0.0, - "learning_rate": 1.5519848771266543e-05, - "loss": 1.0937, + "learning_rate": 1.550519357884797e-05, + "loss": 1.1475, "step": 821 }, { - "epoch": 0.023325766174801362, + "epoch": 0.023293377539743264, "grad_norm": 0.0, - "learning_rate": 1.5538752362948962e-05, - "loss": 1.0789, + "learning_rate": 1.5524079320113316e-05, + "loss": 1.2467, "step": 822 }, { - "epoch": 0.023354143019296253, + "epoch": 0.023321714982005724, "grad_norm": 0.0, - "learning_rate": 1.5557655954631382e-05, - "loss": 1.0791, + "learning_rate": 1.554296506137866e-05, + "loss": 1.1599, "step": 823 }, { - "epoch": 0.023382519863791147, + "epoch": 0.023350052424268186, "grad_norm": 0.0, - "learning_rate": 1.55765595463138e-05, - "loss": 1.2011, + "learning_rate": 1.5561850802644007e-05, + "loss": 1.1513, "step": 824 }, { - "epoch": 0.023410896708286038, + "epoch": 0.023378389866530645, "grad_norm": 0.0, - "learning_rate": 1.559546313799622e-05, - "loss": 1.0826, + "learning_rate": 1.558073654390935e-05, + "loss": 1.1563, "step": 825 }, { - "epoch": 0.023439273552780932, + "epoch": 0.023406727308793108, "grad_norm": 0.0, - "learning_rate": 1.561436672967864e-05, - "loss": 1.0533, + "learning_rate": 1.5599622285174695e-05, + "loss": 1.1157, "step": 826 }, { - "epoch": 0.023467650397275823, + "epoch": 0.02343506475105557, "grad_norm": 0.0, - "learning_rate": 1.563327032136106e-05, - "loss": 1.2472, + "learning_rate": 1.5618508026440037e-05, + "loss": 1.031, "step": 827 }, { - "epoch": 0.023496027241770714, + "epoch": 0.02346340219331803, "grad_norm": 0.0, - "learning_rate": 1.565217391304348e-05, - "loss": 1.0664, + "learning_rate": 1.5637393767705386e-05, + "loss": 1.0976, "step": 828 }, { - "epoch": 0.023524404086265608, + "epoch": 0.023491739635580492, "grad_norm": 0.0, - "learning_rate": 1.56710775047259e-05, - "loss": 1.0267, + "learning_rate": 1.5656279508970728e-05, + "loss": 1.1357, "step": 829 }, { - "epoch": 0.0235527809307605, + "epoch": 0.023520077077842955, "grad_norm": 0.0, - "learning_rate": 1.568998109640832e-05, - "loss": 1.0615, + "learning_rate": 1.5675165250236073e-05, + "loss": 1.2262, "step": 830 }, { - "epoch": 0.023581157775255393, + "epoch": 0.023548414520105414, "grad_norm": 0.0, - "learning_rate": 1.5708884688090736e-05, - "loss": 1.0719, + "learning_rate": 1.5694050991501416e-05, + "loss": 1.2255, "step": 831 }, { - "epoch": 0.023609534619750284, + "epoch": 0.023576751962367876, "grad_norm": 0.0, - "learning_rate": 1.572778827977316e-05, - "loss": 1.081, + "learning_rate": 1.571293673276676e-05, + "loss": 1.1171, "step": 832 }, { - "epoch": 0.023637911464245175, + "epoch": 0.02360508940463034, "grad_norm": 0.0, - "learning_rate": 1.574669187145558e-05, - "loss": 1.1225, + "learning_rate": 1.5731822474032107e-05, + "loss": 1.21, "step": 833 }, { - "epoch": 0.02366628830874007, + "epoch": 0.023633426846892798, "grad_norm": 0.0, - "learning_rate": 1.5765595463137998e-05, - "loss": 1.0608, + "learning_rate": 1.5750708215297452e-05, + "loss": 1.2825, "step": 834 }, { - "epoch": 0.02369466515323496, + "epoch": 0.02366176428915526, "grad_norm": 0.0, - "learning_rate": 1.5784499054820418e-05, - "loss": 1.0762, + "learning_rate": 1.5769593956562798e-05, + "loss": 1.155, "step": 835 }, { - "epoch": 0.023723041997729854, + "epoch": 0.023690101731417723, "grad_norm": 0.0, - "learning_rate": 1.5803402646502838e-05, - "loss": 1.108, + "learning_rate": 1.578847969782814e-05, + "loss": 1.1092, "step": 836 }, { - "epoch": 0.023751418842224745, + "epoch": 0.023718439173680182, "grad_norm": 0.0, - "learning_rate": 1.5822306238185257e-05, - "loss": 1.1208, + "learning_rate": 1.5807365439093485e-05, + "loss": 1.0843, "step": 837 }, { - "epoch": 0.023779795686719635, + "epoch": 0.023746776615942645, "grad_norm": 0.0, - "learning_rate": 1.5841209829867677e-05, - "loss": 1.1525, + "learning_rate": 1.582625118035883e-05, + "loss": 1.2233, "step": 838 }, { - "epoch": 0.02380817253121453, + "epoch": 0.023775114058205107, "grad_norm": 0.0, - "learning_rate": 1.5860113421550097e-05, - "loss": 0.9816, + "learning_rate": 1.5845136921624177e-05, + "loss": 1.186, "step": 839 }, { - "epoch": 0.02383654937570942, + "epoch": 0.023803451500467566, "grad_norm": 0.0, - "learning_rate": 1.5879017013232516e-05, - "loss": 1.1038, + "learning_rate": 1.586402266288952e-05, + "loss": 1.288, "step": 840 }, { - "epoch": 0.023864926220204315, + "epoch": 0.02383178894273003, "grad_norm": 0.0, - "learning_rate": 1.5897920604914936e-05, - "loss": 1.1779, + "learning_rate": 1.5882908404154864e-05, + "loss": 1.2964, "step": 841 }, { - "epoch": 0.023893303064699205, + "epoch": 0.02386012638499249, "grad_norm": 0.0, - "learning_rate": 1.5916824196597355e-05, - "loss": 1.1103, + "learning_rate": 1.590179414542021e-05, + "loss": 1.156, "step": 842 }, { - "epoch": 0.023921679909194096, + "epoch": 0.02388846382725495, "grad_norm": 0.0, - "learning_rate": 1.5935727788279772e-05, - "loss": 1.1468, + "learning_rate": 1.5920679886685552e-05, + "loss": 1.2202, "step": 843 }, { - "epoch": 0.02395005675368899, + "epoch": 0.023916801269517413, "grad_norm": 0.0, - "learning_rate": 1.5954631379962195e-05, - "loss": 1.1675, + "learning_rate": 1.5939565627950898e-05, + "loss": 1.3331, "step": 844 }, { - "epoch": 0.02397843359818388, + "epoch": 0.023945138711779876, "grad_norm": 0.0, - "learning_rate": 1.5973534971644614e-05, - "loss": 1.1182, + "learning_rate": 1.5958451369216243e-05, + "loss": 1.1904, "step": 845 }, { - "epoch": 0.024006810442678776, + "epoch": 0.023973476154042335, "grad_norm": 0.0, - "learning_rate": 1.5992438563327034e-05, - "loss": 1.0066, + "learning_rate": 1.597733711048159e-05, + "loss": 1.1658, "step": 846 }, { - "epoch": 0.024035187287173666, + "epoch": 0.024001813596304798, "grad_norm": 0.0, - "learning_rate": 1.6011342155009454e-05, - "loss": 1.1363, + "learning_rate": 1.599622285174693e-05, + "loss": 1.1521, "step": 847 }, { - "epoch": 0.024063564131668557, + "epoch": 0.02403015103856726, "grad_norm": 0.0, - "learning_rate": 1.6030245746691873e-05, - "loss": 1.1112, + "learning_rate": 1.6015108593012276e-05, + "loss": 1.1029, "step": 848 }, { - "epoch": 0.02409194097616345, + "epoch": 0.02405848848082972, "grad_norm": 0.0, - "learning_rate": 1.6049149338374293e-05, - "loss": 1.0151, + "learning_rate": 1.6033994334277622e-05, + "loss": 1.1145, "step": 849 }, { - "epoch": 0.024120317820658342, + "epoch": 0.024086825923092182, "grad_norm": 0.0, - "learning_rate": 1.6068052930056713e-05, - "loss": 1.1466, + "learning_rate": 1.6052880075542968e-05, + "loss": 1.0924, "step": 850 }, { - "epoch": 0.024148694665153236, + "epoch": 0.024115163365354644, "grad_norm": 0.0, - "learning_rate": 1.6086956521739132e-05, - "loss": 1.0952, + "learning_rate": 1.607176581680831e-05, + "loss": 1.2271, "step": 851 }, { - "epoch": 0.024177071509648127, + "epoch": 0.024143500807617103, "grad_norm": 0.0, - "learning_rate": 1.6105860113421552e-05, - "loss": 1.1207, + "learning_rate": 1.6090651558073655e-05, + "loss": 1.0907, "step": 852 }, { - "epoch": 0.024205448354143018, + "epoch": 0.024171838249879566, "grad_norm": 0.0, - "learning_rate": 1.6124763705103972e-05, - "loss": 1.1338, + "learning_rate": 1.6109537299339e-05, + "loss": 1.1396, "step": 853 }, { - "epoch": 0.024233825198637912, + "epoch": 0.02420017569214203, "grad_norm": 0.0, - "learning_rate": 1.614366729678639e-05, - "loss": 1.0369, + "learning_rate": 1.6128423040604343e-05, + "loss": 1.2059, "step": 854 }, { - "epoch": 0.024262202043132803, + "epoch": 0.024228513134404488, "grad_norm": 0.0, - "learning_rate": 1.616257088846881e-05, - "loss": 1.051, + "learning_rate": 1.6147308781869692e-05, + "loss": 1.2788, "step": 855 }, { - "epoch": 0.024290578887627697, + "epoch": 0.02425685057666695, "grad_norm": 0.0, - "learning_rate": 1.618147448015123e-05, - "loss": 1.0521, + "learning_rate": 1.6166194523135034e-05, + "loss": 1.0941, "step": 856 }, { - "epoch": 0.024318955732122588, + "epoch": 0.024285188018929413, "grad_norm": 0.0, - "learning_rate": 1.620037807183365e-05, - "loss": 1.051, + "learning_rate": 1.618508026440038e-05, + "loss": 1.1942, "step": 857 }, { - "epoch": 0.02434733257661748, + "epoch": 0.024313525461191872, "grad_norm": 0.0, - "learning_rate": 1.621928166351607e-05, - "loss": 1.1226, + "learning_rate": 1.6203966005665722e-05, + "loss": 1.2222, "step": 858 }, { - "epoch": 0.024375709421112373, + "epoch": 0.024341862903454334, "grad_norm": 0.0, - "learning_rate": 1.623818525519849e-05, - "loss": 1.0548, + "learning_rate": 1.6222851746931067e-05, + "loss": 1.14, "step": 859 }, { - "epoch": 0.024404086265607264, + "epoch": 0.024370200345716797, "grad_norm": 0.0, - "learning_rate": 1.625708884688091e-05, - "loss": 1.0794, + "learning_rate": 1.6241737488196413e-05, + "loss": 1.207, "step": 860 }, { - "epoch": 0.024432463110102158, + "epoch": 0.024398537787979256, "grad_norm": 0.0, - "learning_rate": 1.627599243856333e-05, - "loss": 1.0048, + "learning_rate": 1.626062322946176e-05, + "loss": 1.2058, "step": 861 }, { - "epoch": 0.02446083995459705, + "epoch": 0.02442687523024172, "grad_norm": 0.0, - "learning_rate": 1.629489603024575e-05, - "loss": 0.9879, + "learning_rate": 1.6279508970727104e-05, + "loss": 1.1768, "step": 862 }, { - "epoch": 0.02448921679909194, + "epoch": 0.02445521267250418, "grad_norm": 0.0, - "learning_rate": 1.631379962192817e-05, - "loss": 1.1754, + "learning_rate": 1.6298394711992446e-05, + "loss": 1.1348, "step": 863 }, { - "epoch": 0.024517593643586834, + "epoch": 0.02448355011476664, "grad_norm": 0.0, - "learning_rate": 1.6332703213610588e-05, - "loss": 1.141, + "learning_rate": 1.631728045325779e-05, + "loss": 1.208, "step": 864 }, { - "epoch": 0.024545970488081725, + "epoch": 0.024511887557029103, "grad_norm": 0.0, - "learning_rate": 1.6351606805293008e-05, - "loss": 1.0932, + "learning_rate": 1.6336166194523134e-05, + "loss": 1.2534, "step": 865 }, { - "epoch": 0.02457434733257662, + "epoch": 0.024540224999291566, "grad_norm": 0.0, - "learning_rate": 1.6370510396975427e-05, - "loss": 1.0335, + "learning_rate": 1.6355051935788483e-05, + "loss": 1.1918, "step": 866 }, { - "epoch": 0.02460272417707151, + "epoch": 0.024568562441554025, "grad_norm": 0.0, - "learning_rate": 1.6389413988657847e-05, - "loss": 1.0093, + "learning_rate": 1.6373937677053825e-05, + "loss": 1.22, "step": 867 }, { - "epoch": 0.0246311010215664, + "epoch": 0.024596899883816487, "grad_norm": 0.0, - "learning_rate": 1.6408317580340267e-05, - "loss": 1.0605, + "learning_rate": 1.639282341831917e-05, + "loss": 1.1084, "step": 868 }, { - "epoch": 0.024659477866061295, + "epoch": 0.02462523732607895, "grad_norm": 0.0, - "learning_rate": 1.6427221172022686e-05, - "loss": 1.1274, + "learning_rate": 1.6411709159584516e-05, + "loss": 1.2494, "step": 869 }, { - "epoch": 0.024687854710556185, + "epoch": 0.02465357476834141, "grad_norm": 0.0, - "learning_rate": 1.6446124763705106e-05, - "loss": 1.1568, + "learning_rate": 1.6430594900849858e-05, + "loss": 0.9888, "step": 870 }, { - "epoch": 0.02471623155505108, + "epoch": 0.02468191221060387, "grad_norm": 0.0, - "learning_rate": 1.6465028355387526e-05, - "loss": 1.0097, + "learning_rate": 1.6449480642115204e-05, + "loss": 1.1365, "step": 871 }, { - "epoch": 0.02474460839954597, + "epoch": 0.024710249652866334, "grad_norm": 0.0, - "learning_rate": 1.6483931947069945e-05, - "loss": 1.114, + "learning_rate": 1.646836638338055e-05, + "loss": 1.119, "step": 872 }, { - "epoch": 0.02477298524404086, + "epoch": 0.024738587095128793, "grad_norm": 0.0, - "learning_rate": 1.6502835538752365e-05, - "loss": 1.0433, + "learning_rate": 1.6487252124645895e-05, + "loss": 1.0938, "step": 873 }, { - "epoch": 0.024801362088535756, + "epoch": 0.024766924537391256, "grad_norm": 0.0, - "learning_rate": 1.6521739130434785e-05, - "loss": 1.1086, + "learning_rate": 1.6506137865911237e-05, + "loss": 1.2347, "step": 874 }, { - "epoch": 0.024829738933030646, + "epoch": 0.024795261979653715, "grad_norm": 0.0, - "learning_rate": 1.6540642722117204e-05, - "loss": 1.1088, + "learning_rate": 1.6525023607176583e-05, + "loss": 1.1958, "step": 875 }, { - "epoch": 0.02485811577752554, + "epoch": 0.024823599421916177, "grad_norm": 0.0, - "learning_rate": 1.6559546313799624e-05, - "loss": 1.1254, + "learning_rate": 1.6543909348441928e-05, + "loss": 1.2559, "step": 876 }, { - "epoch": 0.02488649262202043, + "epoch": 0.02485193686417864, "grad_norm": 0.0, - "learning_rate": 1.6578449905482044e-05, - "loss": 1.0885, + "learning_rate": 1.6562795089707274e-05, + "loss": 1.1729, "step": 877 }, { - "epoch": 0.024914869466515322, + "epoch": 0.0248802743064411, "grad_norm": 0.0, - "learning_rate": 1.6597353497164463e-05, - "loss": 1.0163, + "learning_rate": 1.6581680830972616e-05, + "loss": 0.998, "step": 878 }, { - "epoch": 0.024943246311010216, + "epoch": 0.02490861174870356, "grad_norm": 0.0, - "learning_rate": 1.6616257088846883e-05, - "loss": 1.1274, + "learning_rate": 1.660056657223796e-05, + "loss": 1.1483, "step": 879 }, { - "epoch": 0.024971623155505107, + "epoch": 0.024936949190966024, "grad_norm": 0.0, - "learning_rate": 1.6635160680529302e-05, - "loss": 1.0764, + "learning_rate": 1.6619452313503307e-05, + "loss": 1.0743, "step": 880 }, { - "epoch": 0.025, + "epoch": 0.024965286633228483, "grad_norm": 0.0, - "learning_rate": 1.6654064272211722e-05, - "loss": 1.1566, + "learning_rate": 1.663833805476865e-05, + "loss": 1.1937, "step": 881 }, { - "epoch": 0.025028376844494892, + "epoch": 0.024993624075490946, "grad_norm": 0.0, - "learning_rate": 1.6672967863894142e-05, - "loss": 1.0825, + "learning_rate": 1.6657223796033998e-05, + "loss": 1.1681, "step": 882 }, { - "epoch": 0.025056753688989783, + "epoch": 0.02502196151775341, "grad_norm": 0.0, - "learning_rate": 1.669187145557656e-05, - "loss": 1.1797, + "learning_rate": 1.667610953729934e-05, + "loss": 1.1093, "step": 883 }, { - "epoch": 0.025085130533484677, + "epoch": 0.025050298960015868, "grad_norm": 0.0, - "learning_rate": 1.671077504725898e-05, - "loss": 1.1177, + "learning_rate": 1.6694995278564686e-05, + "loss": 1.1331, "step": 884 }, { - "epoch": 0.025113507377979568, + "epoch": 0.02507863640227833, "grad_norm": 0.0, - "learning_rate": 1.67296786389414e-05, - "loss": 1.0751, + "learning_rate": 1.6713881019830028e-05, + "loss": 1.1682, "step": 885 }, { - "epoch": 0.025141884222474462, + "epoch": 0.025106973844540793, "grad_norm": 0.0, - "learning_rate": 1.674858223062382e-05, - "loss": 1.0567, + "learning_rate": 1.6732766761095374e-05, + "loss": 1.1375, "step": 886 }, { - "epoch": 0.025170261066969353, + "epoch": 0.025135311286803252, "grad_norm": 0.0, - "learning_rate": 1.676748582230624e-05, - "loss": 1.1445, + "learning_rate": 1.675165250236072e-05, + "loss": 1.0139, "step": 887 }, { - "epoch": 0.025198637911464244, + "epoch": 0.025163648729065714, "grad_norm": 0.0, - "learning_rate": 1.678638941398866e-05, - "loss": 1.1209, + "learning_rate": 1.6770538243626065e-05, + "loss": 1.2227, "step": 888 }, { - "epoch": 0.025227014755959138, + "epoch": 0.025191986171328177, "grad_norm": 0.0, - "learning_rate": 1.680529300567108e-05, - "loss": 1.0367, + "learning_rate": 1.678942398489141e-05, + "loss": 1.2067, "step": 889 }, { - "epoch": 0.02525539160045403, + "epoch": 0.025220323613590636, "grad_norm": 0.0, - "learning_rate": 1.68241965973535e-05, - "loss": 1.1688, + "learning_rate": 1.6808309726156752e-05, + "loss": 1.0951, "step": 890 }, { - "epoch": 0.025283768444948923, + "epoch": 0.0252486610558531, "grad_norm": 0.0, - "learning_rate": 1.684310018903592e-05, - "loss": 1.0974, + "learning_rate": 1.6827195467422098e-05, + "loss": 1.2504, "step": 891 }, { - "epoch": 0.025312145289443814, + "epoch": 0.02527699849811556, "grad_norm": 0.0, - "learning_rate": 1.686200378071834e-05, - "loss": 1.176, + "learning_rate": 1.684608120868744e-05, + "loss": 0.9901, "step": 892 }, { - "epoch": 0.025340522133938705, + "epoch": 0.02530533594037802, "grad_norm": 0.0, - "learning_rate": 1.6880907372400758e-05, - "loss": 1.079, + "learning_rate": 1.686496694995279e-05, + "loss": 1.2271, "step": 893 }, { - "epoch": 0.0253688989784336, + "epoch": 0.025333673382640483, "grad_norm": 0.0, - "learning_rate": 1.6899810964083178e-05, - "loss": 1.1097, + "learning_rate": 1.688385269121813e-05, + "loss": 1.0905, "step": 894 }, { - "epoch": 0.02539727582292849, + "epoch": 0.025362010824902945, "grad_norm": 0.0, - "learning_rate": 1.6918714555765597e-05, - "loss": 1.0207, + "learning_rate": 1.6902738432483477e-05, + "loss": 1.1918, "step": 895 }, { - "epoch": 0.025425652667423384, + "epoch": 0.025390348267165404, "grad_norm": 0.0, - "learning_rate": 1.6937618147448017e-05, - "loss": 1.1157, + "learning_rate": 1.6921624173748822e-05, + "loss": 1.1195, "step": 896 }, { - "epoch": 0.025454029511918275, + "epoch": 0.025418685709427867, "grad_norm": 0.0, - "learning_rate": 1.6956521739130437e-05, - "loss": 1.1028, + "learning_rate": 1.6940509915014164e-05, + "loss": 1.0983, "step": 897 }, { - "epoch": 0.025482406356413165, + "epoch": 0.02544702315169033, "grad_norm": 0.0, - "learning_rate": 1.6975425330812856e-05, - "loss": 1.1507, + "learning_rate": 1.695939565627951e-05, + "loss": 1.1206, "step": 898 }, { - "epoch": 0.02551078320090806, + "epoch": 0.02547536059395279, "grad_norm": 0.0, - "learning_rate": 1.6994328922495276e-05, - "loss": 1.0549, + "learning_rate": 1.6978281397544856e-05, + "loss": 1.2123, "step": 899 }, { - "epoch": 0.02553916004540295, + "epoch": 0.02550369803621525, "grad_norm": 0.0, - "learning_rate": 1.7013232514177696e-05, - "loss": 1.1238, + "learning_rate": 1.69971671388102e-05, + "loss": 1.1556, "step": 900 }, { - "epoch": 0.025567536889897845, + "epoch": 0.025532035478477714, "grad_norm": 0.0, - "learning_rate": 1.7032136105860115e-05, - "loss": 1.177, + "learning_rate": 1.7016052880075543e-05, + "loss": 1.1118, "step": 901 }, { - "epoch": 0.025595913734392736, + "epoch": 0.025560372920740173, "grad_norm": 0.0, - "learning_rate": 1.7051039697542535e-05, - "loss": 1.1144, + "learning_rate": 1.703493862134089e-05, + "loss": 1.2187, "step": 902 }, { - "epoch": 0.025624290578887626, + "epoch": 0.025588710363002636, "grad_norm": 0.0, - "learning_rate": 1.7069943289224955e-05, - "loss": 1.1054, + "learning_rate": 1.7053824362606234e-05, + "loss": 1.1808, "step": 903 }, { - "epoch": 0.02565266742338252, + "epoch": 0.025617047805265098, "grad_norm": 0.0, - "learning_rate": 1.7088846880907374e-05, - "loss": 1.0129, + "learning_rate": 1.707271010387158e-05, + "loss": 1.1165, "step": 904 }, { - "epoch": 0.02568104426787741, + "epoch": 0.025645385247527557, "grad_norm": 0.0, - "learning_rate": 1.7107750472589794e-05, - "loss": 1.1375, + "learning_rate": 1.7091595845136922e-05, + "loss": 1.0584, "step": 905 }, { - "epoch": 0.025709421112372306, + "epoch": 0.02567372268979002, "grad_norm": 0.0, - "learning_rate": 1.7126654064272214e-05, - "loss": 1.189, + "learning_rate": 1.7110481586402268e-05, + "loss": 1.0872, "step": 906 }, { - "epoch": 0.025737797956867196, + "epoch": 0.025702060132052482, "grad_norm": 0.0, - "learning_rate": 1.7145557655954633e-05, - "loss": 1.0265, + "learning_rate": 1.7129367327667613e-05, + "loss": 1.1257, "step": 907 }, { - "epoch": 0.025766174801362087, + "epoch": 0.02573039757431494, "grad_norm": 0.0, - "learning_rate": 1.7164461247637053e-05, - "loss": 1.1796, + "learning_rate": 1.7148253068932955e-05, + "loss": 1.251, "step": 908 }, { - "epoch": 0.02579455164585698, + "epoch": 0.025758735016577404, "grad_norm": 0.0, - "learning_rate": 1.7183364839319473e-05, - "loss": 1.0213, + "learning_rate": 1.7167138810198304e-05, + "loss": 1.1633, "step": 909 }, { - "epoch": 0.025822928490351872, + "epoch": 0.025787072458839867, "grad_norm": 0.0, - "learning_rate": 1.7202268431001892e-05, - "loss": 1.0744, + "learning_rate": 1.7186024551463646e-05, + "loss": 1.1605, "step": 910 }, { - "epoch": 0.025851305334846766, + "epoch": 0.025815409901102326, "grad_norm": 0.0, - "learning_rate": 1.7221172022684312e-05, - "loss": 1.0767, + "learning_rate": 1.7204910292728992e-05, + "loss": 1.2009, "step": 911 }, { - "epoch": 0.025879682179341657, + "epoch": 0.025843747343364788, "grad_norm": 0.0, - "learning_rate": 1.724007561436673e-05, - "loss": 1.087, + "learning_rate": 1.7223796033994334e-05, + "loss": 1.0993, "step": 912 }, { - "epoch": 0.025908059023836548, + "epoch": 0.02587208478562725, "grad_norm": 0.0, - "learning_rate": 1.725897920604915e-05, - "loss": 1.1349, + "learning_rate": 1.724268177525968e-05, + "loss": 1.1467, "step": 913 }, { - "epoch": 0.025936435868331442, + "epoch": 0.02590042222788971, "grad_norm": 0.0, - "learning_rate": 1.727788279773157e-05, - "loss": 1.0972, + "learning_rate": 1.7261567516525025e-05, + "loss": 1.2084, "step": 914 }, { - "epoch": 0.025964812712826333, + "epoch": 0.025928759670152172, "grad_norm": 0.0, - "learning_rate": 1.729678638941399e-05, - "loss": 1.1146, + "learning_rate": 1.728045325779037e-05, + "loss": 1.1201, "step": 915 }, { - "epoch": 0.025993189557321227, + "epoch": 0.025957097112414635, "grad_norm": 0.0, - "learning_rate": 1.731568998109641e-05, - "loss": 0.9762, + "learning_rate": 1.7299338999055716e-05, + "loss": 1.1795, "step": 916 }, { - "epoch": 0.026021566401816118, + "epoch": 0.025985434554677094, "grad_norm": 0.0, - "learning_rate": 1.733459357277883e-05, - "loss": 1.1717, + "learning_rate": 1.731822474032106e-05, + "loss": 1.1232, "step": 917 }, { - "epoch": 0.02604994324631101, + "epoch": 0.026013771996939557, "grad_norm": 0.0, - "learning_rate": 1.735349716446125e-05, - "loss": 1.0688, + "learning_rate": 1.7337110481586404e-05, + "loss": 1.1712, "step": 918 }, { - "epoch": 0.026078320090805903, + "epoch": 0.02604210943920202, "grad_norm": 0.0, - "learning_rate": 1.737240075614367e-05, - "loss": 1.0612, + "learning_rate": 1.7355996222851746e-05, + "loss": 1.1458, "step": 919 }, { - "epoch": 0.026106696935300794, + "epoch": 0.02607044688146448, "grad_norm": 0.0, - "learning_rate": 1.739130434782609e-05, - "loss": 1.0578, + "learning_rate": 1.7374881964117095e-05, + "loss": 1.1636, "step": 920 }, { - "epoch": 0.026135073779795688, + "epoch": 0.02609878432372694, "grad_norm": 0.0, - "learning_rate": 1.741020793950851e-05, - "loss": 0.9924, + "learning_rate": 1.7393767705382437e-05, + "loss": 1.2953, "step": 921 }, { - "epoch": 0.02616345062429058, + "epoch": 0.026127121765989404, "grad_norm": 0.0, - "learning_rate": 1.7429111531190928e-05, - "loss": 1.1282, + "learning_rate": 1.7412653446647783e-05, + "loss": 1.1941, "step": 922 }, { - "epoch": 0.02619182746878547, + "epoch": 0.026155459208251863, "grad_norm": 0.0, - "learning_rate": 1.7448015122873348e-05, - "loss": 1.0682, + "learning_rate": 1.743153918791313e-05, + "loss": 1.1356, "step": 923 }, { - "epoch": 0.026220204313280364, + "epoch": 0.026183796650514325, "grad_norm": 0.0, - "learning_rate": 1.7466918714555767e-05, - "loss": 1.1993, + "learning_rate": 1.745042492917847e-05, + "loss": 1.2153, "step": 924 }, { - "epoch": 0.026248581157775255, + "epoch": 0.026212134092776784, "grad_norm": 0.0, - "learning_rate": 1.7485822306238187e-05, - "loss": 1.0298, + "learning_rate": 1.7469310670443816e-05, + "loss": 1.2854, "step": 925 }, { - "epoch": 0.02627695800227015, + "epoch": 0.026240471535039247, "grad_norm": 0.0, - "learning_rate": 1.7504725897920607e-05, - "loss": 1.0383, + "learning_rate": 1.7488196411709162e-05, + "loss": 1.1179, "step": 926 }, { - "epoch": 0.02630533484676504, + "epoch": 0.02626880897730171, "grad_norm": 0.0, - "learning_rate": 1.7523629489603026e-05, - "loss": 1.0659, + "learning_rate": 1.7507082152974507e-05, + "loss": 1.171, "step": 927 }, { - "epoch": 0.02633371169125993, + "epoch": 0.02629714641956417, "grad_norm": 0.0, - "learning_rate": 1.7542533081285446e-05, - "loss": 1.0561, + "learning_rate": 1.752596789423985e-05, + "loss": 1.2213, "step": 928 }, { - "epoch": 0.026362088535754825, + "epoch": 0.02632548386182663, "grad_norm": 0.0, - "learning_rate": 1.7561436672967866e-05, - "loss": 1.004, + "learning_rate": 1.7544853635505195e-05, + "loss": 1.1704, "step": 929 }, { - "epoch": 0.026390465380249715, + "epoch": 0.026353821304089094, "grad_norm": 0.0, - "learning_rate": 1.7580340264650285e-05, - "loss": 1.0173, + "learning_rate": 1.756373937677054e-05, + "loss": 1.0771, "step": 930 }, { - "epoch": 0.02641884222474461, + "epoch": 0.026382158746351553, "grad_norm": 0.0, - "learning_rate": 1.7599243856332705e-05, - "loss": 1.0912, + "learning_rate": 1.7582625118035886e-05, + "loss": 1.2738, "step": 931 }, { - "epoch": 0.0264472190692395, + "epoch": 0.026410496188614015, "grad_norm": 0.0, - "learning_rate": 1.7618147448015125e-05, - "loss": 0.9771, + "learning_rate": 1.7601510859301228e-05, + "loss": 1.0872, "step": 932 }, { - "epoch": 0.02647559591373439, + "epoch": 0.026438833630876478, "grad_norm": 0.0, - "learning_rate": 1.7637051039697544e-05, - "loss": 0.9924, + "learning_rate": 1.7620396600566574e-05, + "loss": 1.1313, "step": 933 }, { - "epoch": 0.026503972758229286, + "epoch": 0.026467171073138937, "grad_norm": 0.0, - "learning_rate": 1.7655954631379964e-05, - "loss": 1.0828, + "learning_rate": 1.763928234183192e-05, + "loss": 1.1945, "step": 934 }, { - "epoch": 0.026532349602724176, + "epoch": 0.0264955085154014, "grad_norm": 0.0, - "learning_rate": 1.7674858223062384e-05, - "loss": 1.1503, + "learning_rate": 1.765816808309726e-05, + "loss": 1.2142, "step": 935 }, { - "epoch": 0.02656072644721907, + "epoch": 0.026523845957663862, "grad_norm": 0.0, - "learning_rate": 1.7693761814744803e-05, - "loss": 1.1491, + "learning_rate": 1.7677053824362607e-05, + "loss": 1.06, "step": 936 }, { - "epoch": 0.02658910329171396, + "epoch": 0.02655218339992632, "grad_norm": 0.0, - "learning_rate": 1.7712665406427223e-05, - "loss": 1.1957, + "learning_rate": 1.7695939565627953e-05, + "loss": 1.0525, "step": 937 }, { - "epoch": 0.026617480136208852, + "epoch": 0.026580520842188784, "grad_norm": 0.0, - "learning_rate": 1.7731568998109643e-05, - "loss": 1.0767, + "learning_rate": 1.7714825306893298e-05, + "loss": 1.0756, "step": 938 }, { - "epoch": 0.026645856980703746, + "epoch": 0.026608858284451246, "grad_norm": 0.0, - "learning_rate": 1.7750472589792062e-05, - "loss": 1.1028, + "learning_rate": 1.773371104815864e-05, + "loss": 1.2549, "step": 939 }, { - "epoch": 0.026674233825198637, + "epoch": 0.026637195726713706, "grad_norm": 0.0, - "learning_rate": 1.7769376181474482e-05, - "loss": 1.0693, + "learning_rate": 1.7752596789423986e-05, + "loss": 1.1296, "step": 940 }, { - "epoch": 0.02670261066969353, + "epoch": 0.026665533168976168, "grad_norm": 0.0, - "learning_rate": 1.77882797731569e-05, - "loss": 1.0373, + "learning_rate": 1.777148253068933e-05, + "loss": 1.012, "step": 941 }, { - "epoch": 0.026730987514188422, + "epoch": 0.02669387061123863, "grad_norm": 0.0, - "learning_rate": 1.780718336483932e-05, - "loss": 1.2275, + "learning_rate": 1.7790368271954677e-05, + "loss": 1.1195, "step": 942 }, { - "epoch": 0.026759364358683313, + "epoch": 0.02672220805350109, "grad_norm": 0.0, - "learning_rate": 1.782608695652174e-05, - "loss": 1.1487, + "learning_rate": 1.7809254013220023e-05, + "loss": 1.1443, "step": 943 }, { - "epoch": 0.026787741203178207, + "epoch": 0.026750545495763552, "grad_norm": 0.0, - "learning_rate": 1.784499054820416e-05, - "loss": 1.2285, + "learning_rate": 1.7828139754485365e-05, + "loss": 1.1257, "step": 944 }, { - "epoch": 0.026816118047673098, + "epoch": 0.026778882938026015, "grad_norm": 0.0, - "learning_rate": 1.786389413988658e-05, - "loss": 1.0129, + "learning_rate": 1.784702549575071e-05, + "loss": 1.0789, "step": 945 }, { - "epoch": 0.026844494892167992, + "epoch": 0.026807220380288474, "grad_norm": 0.0, - "learning_rate": 1.7882797731569e-05, - "loss": 1.2041, + "learning_rate": 1.7865911237016052e-05, + "loss": 1.2325, "step": 946 }, { - "epoch": 0.026872871736662883, + "epoch": 0.026835557822550937, "grad_norm": 0.0, - "learning_rate": 1.790170132325142e-05, - "loss": 1.0586, + "learning_rate": 1.7884796978281398e-05, + "loss": 1.1865, "step": 947 }, { - "epoch": 0.026901248581157774, + "epoch": 0.0268638952648134, "grad_norm": 0.0, - "learning_rate": 1.792060491493384e-05, - "loss": 1.142, + "learning_rate": 1.7903682719546744e-05, + "loss": 1.2458, "step": 948 }, { - "epoch": 0.026929625425652668, + "epoch": 0.026892232707075858, "grad_norm": 0.0, - "learning_rate": 1.793950850661626e-05, - "loss": 1.1145, + "learning_rate": 1.792256846081209e-05, + "loss": 1.4362, "step": 949 }, { - "epoch": 0.02695800227014756, + "epoch": 0.02692057014933832, "grad_norm": 0.0, - "learning_rate": 1.795841209829868e-05, - "loss": 1.0354, + "learning_rate": 1.794145420207743e-05, + "loss": 1.1303, "step": 950 }, { - "epoch": 0.026986379114642453, + "epoch": 0.026948907591600783, "grad_norm": 0.0, - "learning_rate": 1.7977315689981098e-05, - "loss": 1.1082, + "learning_rate": 1.7960339943342777e-05, + "loss": 1.1712, "step": 951 }, { - "epoch": 0.027014755959137344, + "epoch": 0.026977245033863242, "grad_norm": 0.0, - "learning_rate": 1.7996219281663518e-05, - "loss": 1.1336, + "learning_rate": 1.7979225684608122e-05, + "loss": 1.1671, "step": 952 }, { - "epoch": 0.027043132803632235, + "epoch": 0.027005582476125705, "grad_norm": 0.0, - "learning_rate": 1.8015122873345938e-05, - "loss": 1.1254, + "learning_rate": 1.7998111425873468e-05, + "loss": 1.1608, "step": 953 }, { - "epoch": 0.02707150964812713, + "epoch": 0.027033919918388168, "grad_norm": 0.0, - "learning_rate": 1.8034026465028357e-05, - "loss": 1.106, + "learning_rate": 1.8016997167138813e-05, + "loss": 1.178, "step": 954 }, { - "epoch": 0.02709988649262202, + "epoch": 0.027062257360650627, "grad_norm": 0.0, - "learning_rate": 1.8052930056710777e-05, - "loss": 1.0362, + "learning_rate": 1.8035882908404156e-05, + "loss": 1.3507, "step": 955 }, { - "epoch": 0.027128263337116914, + "epoch": 0.02709059480291309, "grad_norm": 0.0, - "learning_rate": 1.8071833648393197e-05, - "loss": 1.1838, + "learning_rate": 1.80547686496695e-05, + "loss": 1.0958, "step": 956 }, { - "epoch": 0.027156640181611805, + "epoch": 0.027118932245175552, "grad_norm": 0.0, - "learning_rate": 1.8090737240075616e-05, - "loss": 1.1169, + "learning_rate": 1.8073654390934843e-05, + "loss": 1.2291, "step": 957 }, { - "epoch": 0.027185017026106695, + "epoch": 0.02714726968743801, "grad_norm": 0.0, - "learning_rate": 1.8109640831758036e-05, - "loss": 1.0865, + "learning_rate": 1.809254013220019e-05, + "loss": 1.0882, "step": 958 }, { - "epoch": 0.02721339387060159, + "epoch": 0.027175607129700474, "grad_norm": 0.0, - "learning_rate": 1.8128544423440455e-05, - "loss": 1.2031, + "learning_rate": 1.8111425873465534e-05, + "loss": 1.1105, "step": 959 }, { - "epoch": 0.02724177071509648, + "epoch": 0.027203944571962936, "grad_norm": 0.0, - "learning_rate": 1.8147448015122875e-05, - "loss": 1.0189, + "learning_rate": 1.813031161473088e-05, + "loss": 1.2769, "step": 960 }, { - "epoch": 0.027270147559591375, + "epoch": 0.027232282014225395, "grad_norm": 0.0, - "learning_rate": 1.8166351606805295e-05, - "loss": 1.0428, + "learning_rate": 1.8149197355996226e-05, + "loss": 1.1504, "step": 961 }, { - "epoch": 0.027298524404086266, + "epoch": 0.027260619456487858, "grad_norm": 0.0, - "learning_rate": 1.8185255198487714e-05, - "loss": 1.1097, + "learning_rate": 1.8168083097261568e-05, + "loss": 1.1042, "step": 962 }, { - "epoch": 0.027326901248581156, + "epoch": 0.02728895689875032, "grad_norm": 0.0, - "learning_rate": 1.8204158790170134e-05, - "loss": 1.0812, + "learning_rate": 1.8186968838526913e-05, + "loss": 1.1164, "step": 963 }, { - "epoch": 0.02735527809307605, + "epoch": 0.02731729434101278, "grad_norm": 0.0, - "learning_rate": 1.8223062381852554e-05, - "loss": 1.1042, + "learning_rate": 1.820585457979226e-05, + "loss": 1.1423, "step": 964 }, { - "epoch": 0.02738365493757094, + "epoch": 0.027345631783275242, "grad_norm": 0.0, - "learning_rate": 1.8241965973534973e-05, - "loss": 1.2125, + "learning_rate": 1.8224740321057604e-05, + "loss": 1.2349, "step": 965 }, { - "epoch": 0.027412031782065836, + "epoch": 0.027373969225537705, "grad_norm": 0.0, - "learning_rate": 1.8260869565217393e-05, - "loss": 1.1216, + "learning_rate": 1.8243626062322947e-05, + "loss": 1.1776, "step": 966 }, { - "epoch": 0.027440408626560726, + "epoch": 0.027402306667800164, "grad_norm": 0.0, - "learning_rate": 1.8279773156899813e-05, - "loss": 1.1035, + "learning_rate": 1.8262511803588292e-05, + "loss": 1.2228, "step": 967 }, { - "epoch": 0.027468785471055617, + "epoch": 0.027430644110062626, "grad_norm": 0.0, - "learning_rate": 1.8298676748582232e-05, - "loss": 1.1222, + "learning_rate": 1.8281397544853638e-05, + "loss": 1.0574, "step": 968 }, { - "epoch": 0.02749716231555051, + "epoch": 0.02745898155232509, "grad_norm": 0.0, - "learning_rate": 1.8317580340264652e-05, - "loss": 1.1665, + "learning_rate": 1.8300283286118983e-05, + "loss": 1.1566, "step": 969 }, { - "epoch": 0.027525539160045402, + "epoch": 0.027487318994587548, "grad_norm": 0.0, - "learning_rate": 1.8336483931947072e-05, - "loss": 1.2081, + "learning_rate": 1.8319169027384325e-05, + "loss": 1.1717, "step": 970 }, { - "epoch": 0.027553916004540296, + "epoch": 0.02751565643685001, "grad_norm": 0.0, - "learning_rate": 1.835538752362949e-05, - "loss": 1.1211, + "learning_rate": 1.833805476864967e-05, + "loss": 1.1215, "step": 971 }, { - "epoch": 0.027582292849035187, + "epoch": 0.02754399387911247, "grad_norm": 0.0, - "learning_rate": 1.837429111531191e-05, - "loss": 1.1106, + "learning_rate": 1.8356940509915016e-05, + "loss": 1.2098, "step": 972 }, { - "epoch": 0.027610669693530078, + "epoch": 0.027572331321374932, "grad_norm": 0.0, - "learning_rate": 1.839319470699433e-05, - "loss": 1.1011, + "learning_rate": 1.837582625118036e-05, + "loss": 1.2334, "step": 973 }, { - "epoch": 0.027639046538024972, + "epoch": 0.027600668763637395, "grad_norm": 0.0, - "learning_rate": 1.841209829867675e-05, - "loss": 1.162, + "learning_rate": 1.8394711992445704e-05, + "loss": 1.111, "step": 974 }, { - "epoch": 0.027667423382519863, + "epoch": 0.027629006205899854, "grad_norm": 0.0, - "learning_rate": 1.843100189035917e-05, - "loss": 1.0762, + "learning_rate": 1.841359773371105e-05, + "loss": 1.186, "step": 975 }, { - "epoch": 0.027695800227014757, + "epoch": 0.027657343648162316, "grad_norm": 0.0, - "learning_rate": 1.844990548204159e-05, - "loss": 1.0404, + "learning_rate": 1.8432483474976395e-05, + "loss": 1.1011, "step": 976 }, { - "epoch": 0.027724177071509648, + "epoch": 0.02768568109042478, "grad_norm": 0.0, - "learning_rate": 1.846880907372401e-05, - "loss": 1.0825, + "learning_rate": 1.8451369216241737e-05, + "loss": 1.1928, "step": 977 }, { - "epoch": 0.02775255391600454, + "epoch": 0.027714018532687238, "grad_norm": 0.0, - "learning_rate": 1.848771266540643e-05, - "loss": 1.077, + "learning_rate": 1.8470254957507083e-05, + "loss": 1.0721, "step": 978 }, { - "epoch": 0.027780930760499433, + "epoch": 0.0277423559749497, "grad_norm": 0.0, - "learning_rate": 1.850661625708885e-05, - "loss": 1.1219, + "learning_rate": 1.848914069877243e-05, + "loss": 1.1285, "step": 979 }, { - "epoch": 0.027809307604994324, + "epoch": 0.027770693417212163, "grad_norm": 0.0, - "learning_rate": 1.8525519848771268e-05, - "loss": 1.14, + "learning_rate": 1.8508026440037774e-05, + "loss": 1.1862, "step": 980 }, { - "epoch": 0.027837684449489218, + "epoch": 0.027799030859474622, "grad_norm": 0.0, - "learning_rate": 1.8544423440453688e-05, - "loss": 1.1385, + "learning_rate": 1.852691218130312e-05, + "loss": 1.1775, "step": 981 }, { - "epoch": 0.02786606129398411, + "epoch": 0.027827368301737085, "grad_norm": 0.0, - "learning_rate": 1.8563327032136108e-05, - "loss": 1.1255, + "learning_rate": 1.8545797922568462e-05, + "loss": 1.233, "step": 982 }, { - "epoch": 0.027894438138479, + "epoch": 0.027855705743999547, "grad_norm": 0.0, - "learning_rate": 1.8582230623818527e-05, - "loss": 1.1345, + "learning_rate": 1.8564683663833807e-05, + "loss": 1.1717, "step": 983 }, { - "epoch": 0.027922814982973894, + "epoch": 0.027884043186262007, "grad_norm": 0.0, - "learning_rate": 1.8601134215500947e-05, - "loss": 1.1709, + "learning_rate": 1.858356940509915e-05, + "loss": 1.1349, "step": 984 }, { - "epoch": 0.027951191827468785, + "epoch": 0.02791238062852447, "grad_norm": 0.0, - "learning_rate": 1.8620037807183367e-05, - "loss": 1.0066, + "learning_rate": 1.8602455146364495e-05, + "loss": 1.2029, "step": 985 }, { - "epoch": 0.02797956867196368, + "epoch": 0.02794071807078693, "grad_norm": 0.0, - "learning_rate": 1.8638941398865786e-05, - "loss": 1.1105, + "learning_rate": 1.862134088762984e-05, + "loss": 1.0713, "step": 986 }, { - "epoch": 0.02800794551645857, + "epoch": 0.02796905551304939, "grad_norm": 0.0, - "learning_rate": 1.8657844990548206e-05, - "loss": 1.1282, + "learning_rate": 1.8640226628895186e-05, + "loss": 1.2452, "step": 987 }, { - "epoch": 0.02803632236095346, + "epoch": 0.027997392955311853, "grad_norm": 0.0, - "learning_rate": 1.8676748582230626e-05, - "loss": 1.0744, + "learning_rate": 1.8659112370160532e-05, + "loss": 1.17, "step": 988 }, { - "epoch": 0.028064699205448355, + "epoch": 0.028025730397574316, "grad_norm": 0.0, - "learning_rate": 1.8695652173913045e-05, - "loss": 1.0988, + "learning_rate": 1.8677998111425874e-05, + "loss": 1.2217, "step": 989 }, { - "epoch": 0.028093076049943246, + "epoch": 0.028054067839836775, "grad_norm": 0.0, - "learning_rate": 1.8714555765595465e-05, - "loss": 1.0661, + "learning_rate": 1.869688385269122e-05, + "loss": 1.1859, "step": 990 }, { - "epoch": 0.02812145289443814, + "epoch": 0.028082405282099238, "grad_norm": 0.0, - "learning_rate": 1.8733459357277885e-05, - "loss": 1.1689, + "learning_rate": 1.8715769593956565e-05, + "loss": 1.143, "step": 991 }, { - "epoch": 0.02814982973893303, + "epoch": 0.0281107427243617, "grad_norm": 0.0, - "learning_rate": 1.8752362948960304e-05, - "loss": 1.1183, + "learning_rate": 1.873465533522191e-05, + "loss": 1.1924, "step": 992 }, { - "epoch": 0.02817820658342792, + "epoch": 0.02813908016662416, "grad_norm": 0.0, - "learning_rate": 1.8771266540642724e-05, - "loss": 0.959, + "learning_rate": 1.8753541076487253e-05, + "loss": 1.197, "step": 993 }, { - "epoch": 0.028206583427922816, + "epoch": 0.028167417608886622, "grad_norm": 0.0, - "learning_rate": 1.8790170132325144e-05, - "loss": 1.0887, + "learning_rate": 1.87724268177526e-05, + "loss": 1.0934, "step": 994 }, { - "epoch": 0.028234960272417706, + "epoch": 0.028195755051149084, "grad_norm": 0.0, - "learning_rate": 1.8809073724007563e-05, - "loss": 1.1091, + "learning_rate": 1.8791312559017944e-05, + "loss": 1.1078, "step": 995 }, { - "epoch": 0.0282633371169126, + "epoch": 0.028224092493411543, "grad_norm": 0.0, - "learning_rate": 1.8827977315689983e-05, - "loss": 1.0734, + "learning_rate": 1.8810198300283286e-05, + "loss": 1.2384, "step": 996 }, { - "epoch": 0.02829171396140749, + "epoch": 0.028252429935674006, "grad_norm": 0.0, - "learning_rate": 1.8846880907372402e-05, - "loss": 1.1206, + "learning_rate": 1.882908404154863e-05, + "loss": 1.1484, "step": 997 }, { - "epoch": 0.028320090805902382, + "epoch": 0.02828076737793647, "grad_norm": 0.0, - "learning_rate": 1.8865784499054822e-05, - "loss": 1.2136, + "learning_rate": 1.8847969782813977e-05, + "loss": 1.2137, "step": 998 }, { - "epoch": 0.028348467650397276, + "epoch": 0.028309104820198928, "grad_norm": 0.0, - "learning_rate": 1.8884688090737242e-05, - "loss": 1.08, + "learning_rate": 1.8866855524079323e-05, + "loss": 1.0782, "step": 999 }, { - "epoch": 0.028376844494892167, + "epoch": 0.02833744226246139, "grad_norm": 0.0, - "learning_rate": 1.890359168241966e-05, - "loss": 1.0858, + "learning_rate": 1.8885741265344665e-05, + "loss": 1.137, "step": 1000 }, { - "epoch": 0.02840522133938706, + "epoch": 0.028365779704723853, "grad_norm": 0.0, - "learning_rate": 1.892249527410208e-05, - "loss": 1.1063, + "learning_rate": 1.890462700661001e-05, + "loss": 1.194, "step": 1001 }, { - "epoch": 0.028433598183881952, + "epoch": 0.028394117146986312, "grad_norm": 0.0, - "learning_rate": 1.89413988657845e-05, - "loss": 1.0768, + "learning_rate": 1.8923512747875356e-05, + "loss": 1.0541, "step": 1002 }, { - "epoch": 0.028461975028376843, + "epoch": 0.028422454589248775, "grad_norm": 0.0, - "learning_rate": 1.896030245746692e-05, - "loss": 1.159, + "learning_rate": 1.89423984891407e-05, + "loss": 1.2079, "step": 1003 }, { - "epoch": 0.028490351872871737, + "epoch": 0.028450792031511237, "grad_norm": 0.0, - "learning_rate": 1.897920604914934e-05, - "loss": 1.2452, + "learning_rate": 1.8961284230406044e-05, + "loss": 1.187, "step": 1004 }, { - "epoch": 0.028518728717366628, + "epoch": 0.028479129473773696, "grad_norm": 0.0, - "learning_rate": 1.899810964083176e-05, - "loss": 1.1211, + "learning_rate": 1.898016997167139e-05, + "loss": 1.1599, "step": 1005 }, { - "epoch": 0.028547105561861522, + "epoch": 0.02850746691603616, "grad_norm": 0.0, - "learning_rate": 1.901701323251418e-05, - "loss": 1.0608, + "learning_rate": 1.8999055712936735e-05, + "loss": 1.1718, "step": 1006 }, { - "epoch": 0.028575482406356413, + "epoch": 0.02853580435829862, "grad_norm": 0.0, - "learning_rate": 1.90359168241966e-05, - "loss": 1.0038, + "learning_rate": 1.9017941454202077e-05, + "loss": 1.1938, "step": 1007 }, { - "epoch": 0.028603859250851304, + "epoch": 0.02856414180056108, "grad_norm": 0.0, - "learning_rate": 1.905482041587902e-05, - "loss": 1.1209, + "learning_rate": 1.9036827195467426e-05, + "loss": 1.2468, "step": 1008 }, { - "epoch": 0.028632236095346198, + "epoch": 0.028592479242823543, "grad_norm": 0.0, - "learning_rate": 1.907372400756144e-05, - "loss": 1.1105, + "learning_rate": 1.9055712936732768e-05, + "loss": 1.1701, "step": 1009 }, { - "epoch": 0.02866061293984109, + "epoch": 0.028620816685086006, "grad_norm": 0.0, - "learning_rate": 1.9092627599243858e-05, - "loss": 1.0688, + "learning_rate": 1.9074598677998114e-05, + "loss": 1.1414, "step": 1010 }, { - "epoch": 0.028688989784335983, + "epoch": 0.028649154127348465, "grad_norm": 0.0, - "learning_rate": 1.9111531190926278e-05, - "loss": 1.1584, + "learning_rate": 1.9093484419263456e-05, + "loss": 1.1783, "step": 1011 }, { - "epoch": 0.028717366628830874, + "epoch": 0.028677491569610927, "grad_norm": 0.0, - "learning_rate": 1.9130434782608697e-05, - "loss": 0.9859, + "learning_rate": 1.91123701605288e-05, + "loss": 1.2122, "step": 1012 }, { - "epoch": 0.028745743473325765, + "epoch": 0.02870582901187339, "grad_norm": 0.0, - "learning_rate": 1.9149338374291117e-05, - "loss": 1.0729, + "learning_rate": 1.9131255901794147e-05, + "loss": 1.2389, "step": 1013 }, { - "epoch": 0.02877412031782066, + "epoch": 0.02873416645413585, "grad_norm": 0.0, - "learning_rate": 1.9168241965973537e-05, - "loss": 1.1955, + "learning_rate": 1.9150141643059492e-05, + "loss": 1.1016, "step": 1014 }, { - "epoch": 0.02880249716231555, + "epoch": 0.02876250389639831, "grad_norm": 0.0, - "learning_rate": 1.9187145557655956e-05, - "loss": 1.0184, + "learning_rate": 1.9169027384324838e-05, + "loss": 1.0969, "step": 1015 }, { - "epoch": 0.028830874006810444, + "epoch": 0.028790841338660774, "grad_norm": 0.0, - "learning_rate": 1.9206049149338376e-05, - "loss": 1.0976, + "learning_rate": 1.918791312559018e-05, + "loss": 1.1701, "step": 1016 }, { - "epoch": 0.028859250851305335, + "epoch": 0.028819178780923233, "grad_norm": 0.0, - "learning_rate": 1.9224952741020796e-05, - "loss": 0.9903, + "learning_rate": 1.9206798866855526e-05, + "loss": 1.2404, "step": 1017 }, { - "epoch": 0.028887627695800226, + "epoch": 0.028847516223185696, "grad_norm": 0.0, - "learning_rate": 1.9243856332703215e-05, - "loss": 1.0305, + "learning_rate": 1.9225684608120868e-05, + "loss": 1.1728, "step": 1018 }, { - "epoch": 0.02891600454029512, + "epoch": 0.02887585366544816, "grad_norm": 0.0, - "learning_rate": 1.9262759924385635e-05, - "loss": 1.0703, + "learning_rate": 1.9244570349386217e-05, + "loss": 1.1865, "step": 1019 }, { - "epoch": 0.02894438138479001, + "epoch": 0.028904191107710617, "grad_norm": 0.0, - "learning_rate": 1.9281663516068055e-05, - "loss": 1.1443, + "learning_rate": 1.926345609065156e-05, + "loss": 1.1162, "step": 1020 }, { - "epoch": 0.028972758229284905, + "epoch": 0.02893252854997308, "grad_norm": 0.0, - "learning_rate": 1.9300567107750474e-05, - "loss": 1.2634, + "learning_rate": 1.9282341831916905e-05, + "loss": 1.1747, "step": 1021 }, { - "epoch": 0.029001135073779796, + "epoch": 0.02896086599223554, "grad_norm": 0.0, - "learning_rate": 1.9319470699432894e-05, - "loss": 1.1668, + "learning_rate": 1.930122757318225e-05, + "loss": 1.2077, "step": 1022 }, { - "epoch": 0.029029511918274686, + "epoch": 0.028989203434498, "grad_norm": 0.0, - "learning_rate": 1.9338374291115314e-05, - "loss": 1.1374, + "learning_rate": 1.9320113314447592e-05, + "loss": 1.1709, "step": 1023 }, { - "epoch": 0.02905788876276958, + "epoch": 0.029017540876760464, "grad_norm": 0.0, - "learning_rate": 1.9357277882797733e-05, - "loss": 1.2186, + "learning_rate": 1.9338999055712938e-05, + "loss": 1.2383, "step": 1024 }, { - "epoch": 0.02908626560726447, + "epoch": 0.029045878319022923, "grad_norm": 0.0, - "learning_rate": 1.9376181474480153e-05, - "loss": 1.0564, + "learning_rate": 1.9357884796978283e-05, + "loss": 1.1296, "step": 1025 }, { - "epoch": 0.029114642451759366, + "epoch": 0.029074215761285386, "grad_norm": 0.0, - "learning_rate": 1.9395085066162573e-05, - "loss": 1.171, + "learning_rate": 1.937677053824363e-05, + "loss": 1.1473, "step": 1026 }, { - "epoch": 0.029143019296254256, + "epoch": 0.02910255320354785, "grad_norm": 0.0, - "learning_rate": 1.9413988657844992e-05, - "loss": 1.0893, + "learning_rate": 1.939565627950897e-05, + "loss": 1.2365, "step": 1027 }, { - "epoch": 0.029171396140749147, + "epoch": 0.029130890645810308, "grad_norm": 0.0, - "learning_rate": 1.9432892249527412e-05, - "loss": 1.1413, + "learning_rate": 1.9414542020774317e-05, + "loss": 1.2007, "step": 1028 }, { - "epoch": 0.02919977298524404, + "epoch": 0.02915922808807277, "grad_norm": 0.0, - "learning_rate": 1.945179584120983e-05, - "loss": 1.1973, + "learning_rate": 1.9433427762039662e-05, + "loss": 1.1995, "step": 1029 }, { - "epoch": 0.029228149829738932, + "epoch": 0.029187565530335233, "grad_norm": 0.0, - "learning_rate": 1.947069943289225e-05, - "loss": 1.0253, + "learning_rate": 1.9452313503305008e-05, + "loss": 1.164, "step": 1030 }, { - "epoch": 0.029256526674233826, + "epoch": 0.029215902972597692, "grad_norm": 0.0, - "learning_rate": 1.948960302457467e-05, - "loss": 1.1682, + "learning_rate": 1.947119924457035e-05, + "loss": 1.2151, "step": 1031 }, { - "epoch": 0.029284903518728717, + "epoch": 0.029244240414860154, "grad_norm": 0.0, - "learning_rate": 1.950850661625709e-05, - "loss": 1.1101, + "learning_rate": 1.9490084985835695e-05, + "loss": 1.1885, "step": 1032 }, { - "epoch": 0.029313280363223608, + "epoch": 0.029272577857122617, "grad_norm": 0.0, - "learning_rate": 1.952741020793951e-05, - "loss": 1.0394, + "learning_rate": 1.950897072710104e-05, + "loss": 1.2051, "step": 1033 }, { - "epoch": 0.029341657207718502, + "epoch": 0.029300915299385076, "grad_norm": 0.0, - "learning_rate": 1.954631379962193e-05, - "loss": 1.1735, + "learning_rate": 1.9527856468366383e-05, + "loss": 1.1603, "step": 1034 }, { - "epoch": 0.029370034052213393, + "epoch": 0.02932925274164754, "grad_norm": 0.0, - "learning_rate": 1.956521739130435e-05, - "loss": 1.1027, + "learning_rate": 1.9546742209631732e-05, + "loss": 1.2018, "step": 1035 }, { - "epoch": 0.029398410896708287, + "epoch": 0.02935759018391, "grad_norm": 0.0, - "learning_rate": 1.958412098298677e-05, - "loss": 1.103, + "learning_rate": 1.9565627950897074e-05, + "loss": 1.1445, "step": 1036 }, { - "epoch": 0.029426787741203178, + "epoch": 0.02938592762617246, "grad_norm": 0.0, - "learning_rate": 1.960302457466919e-05, - "loss": 1.0724, + "learning_rate": 1.958451369216242e-05, + "loss": 1.2405, "step": 1037 }, { - "epoch": 0.02945516458569807, + "epoch": 0.029414265068434923, "grad_norm": 0.0, - "learning_rate": 1.962192816635161e-05, - "loss": 1.0233, + "learning_rate": 1.9603399433427762e-05, + "loss": 1.1291, "step": 1038 }, { - "epoch": 0.029483541430192963, + "epoch": 0.029442602510697385, "grad_norm": 0.0, - "learning_rate": 1.9640831758034028e-05, - "loss": 1.1242, + "learning_rate": 1.9622285174693108e-05, + "loss": 1.1038, "step": 1039 }, { - "epoch": 0.029511918274687854, + "epoch": 0.029470939952959845, "grad_norm": 0.0, - "learning_rate": 1.9659735349716448e-05, - "loss": 1.0401, + "learning_rate": 1.9641170915958453e-05, + "loss": 1.1838, "step": 1040 }, { - "epoch": 0.029540295119182748, + "epoch": 0.029499277395222307, "grad_norm": 0.0, - "learning_rate": 1.9678638941398867e-05, - "loss": 1.0078, + "learning_rate": 1.96600566572238e-05, + "loss": 1.1879, "step": 1041 }, { - "epoch": 0.02956867196367764, + "epoch": 0.02952761483748477, "grad_norm": 0.0, - "learning_rate": 1.9697542533081287e-05, - "loss": 1.0826, + "learning_rate": 1.9678942398489144e-05, + "loss": 1.2451, "step": 1042 }, { - "epoch": 0.02959704880817253, + "epoch": 0.02955595227974723, "grad_norm": 0.0, - "learning_rate": 1.9716446124763707e-05, - "loss": 1.1392, + "learning_rate": 1.9697828139754486e-05, + "loss": 1.1383, "step": 1043 }, { - "epoch": 0.029625425652667424, + "epoch": 0.02958428972200969, "grad_norm": 0.0, - "learning_rate": 1.9735349716446126e-05, - "loss": 0.906, + "learning_rate": 1.9716713881019832e-05, + "loss": 1.1562, "step": 1044 }, { - "epoch": 0.029653802497162315, + "epoch": 0.029612627164272154, "grad_norm": 0.0, - "learning_rate": 1.9754253308128546e-05, - "loss": 1.1176, + "learning_rate": 1.9735599622285174e-05, + "loss": 1.1584, "step": 1045 }, { - "epoch": 0.02968217934165721, + "epoch": 0.029640964606534613, "grad_norm": 0.0, - "learning_rate": 1.9773156899810966e-05, - "loss": 1.1241, + "learning_rate": 1.9754485363550523e-05, + "loss": 1.1212, "step": 1046 }, { - "epoch": 0.0297105561861521, + "epoch": 0.029669302048797076, "grad_norm": 0.0, - "learning_rate": 1.9792060491493385e-05, - "loss": 1.1982, + "learning_rate": 1.9773371104815865e-05, + "loss": 1.2668, "step": 1047 }, { - "epoch": 0.02973893303064699, + "epoch": 0.029697639491059538, "grad_norm": 0.0, - "learning_rate": 1.9810964083175805e-05, - "loss": 1.0876, + "learning_rate": 1.979225684608121e-05, + "loss": 1.1325, "step": 1048 }, { - "epoch": 0.029767309875141885, + "epoch": 0.029725976933321997, "grad_norm": 0.0, - "learning_rate": 1.9829867674858225e-05, - "loss": 1.1379, + "learning_rate": 1.9811142587346556e-05, + "loss": 1.2148, "step": 1049 }, { - "epoch": 0.029795686719636776, + "epoch": 0.02975431437558446, "grad_norm": 0.0, - "learning_rate": 1.9848771266540644e-05, - "loss": 1.0798, + "learning_rate": 1.98300283286119e-05, + "loss": 1.1513, "step": 1050 }, { - "epoch": 0.02982406356413167, + "epoch": 0.029782651817846922, "grad_norm": 0.0, - "learning_rate": 1.9867674858223064e-05, - "loss": 1.1047, + "learning_rate": 1.9848914069877244e-05, + "loss": 1.1302, "step": 1051 }, { - "epoch": 0.02985244040862656, + "epoch": 0.02981098926010938, "grad_norm": 0.0, - "learning_rate": 1.9886578449905484e-05, - "loss": 1.0774, + "learning_rate": 1.986779981114259e-05, + "loss": 1.0855, "step": 1052 }, { - "epoch": 0.02988081725312145, + "epoch": 0.029839326702371844, "grad_norm": 0.0, - "learning_rate": 1.9905482041587903e-05, - "loss": 1.1715, + "learning_rate": 1.9886685552407935e-05, + "loss": 1.195, "step": 1053 }, { - "epoch": 0.029909194097616346, + "epoch": 0.029867664144634307, "grad_norm": 0.0, - "learning_rate": 1.9924385633270323e-05, - "loss": 1.1429, + "learning_rate": 1.9905571293673277e-05, + "loss": 1.1566, "step": 1054 }, { - "epoch": 0.029937570942111236, + "epoch": 0.029896001586896766, "grad_norm": 0.0, - "learning_rate": 1.9943289224952743e-05, - "loss": 1.1236, + "learning_rate": 1.9924457034938623e-05, + "loss": 1.1047, "step": 1055 }, { - "epoch": 0.02996594778660613, + "epoch": 0.02992433902915923, "grad_norm": 0.0, - "learning_rate": 1.9962192816635162e-05, - "loss": 1.0279, + "learning_rate": 1.9943342776203965e-05, + "loss": 1.1091, "step": 1056 }, { - "epoch": 0.02999432463110102, + "epoch": 0.02995267647142169, "grad_norm": 0.0, - "learning_rate": 1.9981096408317582e-05, - "loss": 1.1338, + "learning_rate": 1.9962228517469314e-05, + "loss": 1.0957, "step": 1057 }, { - "epoch": 0.030022701475595912, + "epoch": 0.02998101391368415, "grad_norm": 0.0, - "learning_rate": 2e-05, - "loss": 1.3414, + "learning_rate": 1.9981114258734656e-05, + "loss": 1.1157, "step": 1058 }, { - "epoch": 0.030051078320090806, + "epoch": 0.030009351355946613, "grad_norm": 0.0, - "learning_rate": 1.9999999957764777e-05, - "loss": 1.0999, + "learning_rate": 2e-05, + "loss": 1.2177, "step": 1059 }, { - "epoch": 0.030079455164585697, + "epoch": 0.030037688798209075, "grad_norm": 0.0, - "learning_rate": 1.9999999831059104e-05, - "loss": 1.0542, + "learning_rate": 1.9999999957883145e-05, + "loss": 1.1057, "step": 1060 }, { - "epoch": 0.03010783200908059, + "epoch": 0.030066026240471534, "grad_norm": 0.0, - "learning_rate": 1.999999961988298e-05, - "loss": 1.1465, + "learning_rate": 1.9999999831532575e-05, + "loss": 1.1277, "step": 1061 }, { - "epoch": 0.030136208853575482, + "epoch": 0.030094363682733997, "grad_norm": 0.0, - "learning_rate": 1.999999932423641e-05, - "loss": 1.0553, + "learning_rate": 1.9999999620948292e-05, + "loss": 1.1524, "step": 1062 }, { - "epoch": 0.030164585698070373, + "epoch": 0.03012270112499646, "grad_norm": 0.0, - "learning_rate": 1.9999998944119402e-05, - "loss": 1.0979, + "learning_rate": 1.9999999326130303e-05, + "loss": 1.2402, "step": 1063 }, { - "epoch": 0.030192962542565267, + "epoch": 0.03015103856725892, "grad_norm": 0.0, - "learning_rate": 1.9999998479531948e-05, - "loss": 1.1504, + "learning_rate": 1.9999998947078603e-05, + "loss": 1.1557, "step": 1064 }, { - "epoch": 0.030221339387060158, + "epoch": 0.03017937600952138, "grad_norm": 0.0, - "learning_rate": 1.9999997930474058e-05, - "loss": 1.1111, + "learning_rate": 1.9999998483793198e-05, + "loss": 1.1688, "step": 1065 }, { - "epoch": 0.030249716231555052, + "epoch": 0.030207713451783844, "grad_norm": 0.0, - "learning_rate": 1.9999997296945736e-05, - "loss": 1.1091, + "learning_rate": 1.9999997936274092e-05, + "loss": 1.123, "step": 1066 }, { - "epoch": 0.030278093076049943, + "epoch": 0.030236050894046303, "grad_norm": 0.0, - "learning_rate": 1.9999996578946986e-05, - "loss": 0.903, + "learning_rate": 1.999999730452129e-05, + "loss": 1.0443, "step": 1067 }, { - "epoch": 0.030306469920544834, + "epoch": 0.030264388336308765, "grad_norm": 0.0, - "learning_rate": 1.999999577647782e-05, - "loss": 1.084, + "learning_rate": 1.99999965885348e-05, + "loss": 1.0908, "step": 1068 }, { - "epoch": 0.030334846765039728, + "epoch": 0.030292725778571228, "grad_norm": 0.0, - "learning_rate": 1.9999994889538238e-05, - "loss": 1.1923, + "learning_rate": 1.9999995788314622e-05, + "loss": 1.2873, "step": 1069 }, { - "epoch": 0.03036322360953462, + "epoch": 0.030321063220833687, "grad_norm": 0.0, - "learning_rate": 1.999999391812825e-05, - "loss": 1.0813, + "learning_rate": 1.9999994903860772e-05, + "loss": 1.201, "step": 1070 }, { - "epoch": 0.030391600454029513, + "epoch": 0.03034940066309615, "grad_norm": 0.0, - "learning_rate": 1.999999286224786e-05, - "loss": 1.0852, + "learning_rate": 1.9999993935173247e-05, + "loss": 1.1473, "step": 1071 }, { - "epoch": 0.030419977298524404, + "epoch": 0.03037773810535861, "grad_norm": 0.0, - "learning_rate": 1.9999991721897084e-05, - "loss": 0.9843, + "learning_rate": 1.999999288225206e-05, + "loss": 1.1736, "step": 1072 }, { - "epoch": 0.030448354143019295, + "epoch": 0.03040607554762107, "grad_norm": 0.0, - "learning_rate": 1.9999990497075926e-05, - "loss": 1.2191, + "learning_rate": 1.9999991745097218e-05, + "loss": 1.1696, "step": 1073 }, { - "epoch": 0.03047673098751419, + "epoch": 0.030434412989883534, "grad_norm": 0.0, - "learning_rate": 1.9999989187784404e-05, - "loss": 1.1279, + "learning_rate": 1.9999990523708736e-05, + "loss": 1.0495, "step": 1074 }, { - "epoch": 0.03050510783200908, + "epoch": 0.030462750432145993, "grad_norm": 0.0, - "learning_rate": 1.9999987794022518e-05, - "loss": 1.0517, + "learning_rate": 1.9999989218086615e-05, + "loss": 1.1643, "step": 1075 }, { - "epoch": 0.030533484676503974, + "epoch": 0.030491087874408455, "grad_norm": 0.0, - "learning_rate": 1.9999986315790288e-05, - "loss": 1.0317, + "learning_rate": 1.9999987828230875e-05, + "loss": 1.1394, "step": 1076 }, { - "epoch": 0.030561861520998865, + "epoch": 0.030519425316670918, "grad_norm": 0.0, - "learning_rate": 1.9999984753087725e-05, - "loss": 1.1552, + "learning_rate": 1.9999986354141524e-05, + "loss": 1.1703, "step": 1077 }, { - "epoch": 0.030590238365493756, + "epoch": 0.030547762758933377, "grad_norm": 0.0, - "learning_rate": 1.999998310591484e-05, - "loss": 1.0956, + "learning_rate": 1.9999984795818572e-05, + "loss": 1.1324, "step": 1078 }, { - "epoch": 0.03061861520998865, + "epoch": 0.03057610020119584, "grad_norm": 0.0, - "learning_rate": 1.9999981374271648e-05, - "loss": 1.1677, + "learning_rate": 1.9999983153262038e-05, + "loss": 1.1898, "step": 1079 }, { - "epoch": 0.03064699205448354, + "epoch": 0.030604437643458302, "grad_norm": 0.0, - "learning_rate": 1.9999979558158165e-05, - "loss": 1.0638, + "learning_rate": 1.999998142647193e-05, + "loss": 1.2068, "step": 1080 }, { - "epoch": 0.030675368898978435, + "epoch": 0.03063277508572076, "grad_norm": 0.0, - "learning_rate": 1.9999977657574403e-05, - "loss": 1.1348, + "learning_rate": 1.999997961544827e-05, + "loss": 1.2494, "step": 1081 }, { - "epoch": 0.030703745743473326, + "epoch": 0.030661112527983224, "grad_norm": 0.0, - "learning_rate": 1.9999975672520385e-05, - "loss": 1.1708, + "learning_rate": 1.9999977720191063e-05, + "loss": 1.2412, "step": 1082 }, { - "epoch": 0.030732122587968216, + "epoch": 0.030689449970245686, "grad_norm": 0.0, - "learning_rate": 1.9999973602996116e-05, - "loss": 1.0725, + "learning_rate": 1.999997574070033e-05, + "loss": 1.228, "step": 1083 }, { - "epoch": 0.03076049943246311, + "epoch": 0.030717787412508146, "grad_norm": 0.0, - "learning_rate": 1.9999971449001627e-05, - "loss": 1.0288, + "learning_rate": 1.999997367697609e-05, + "loss": 1.2275, "step": 1084 }, { - "epoch": 0.030788876276958, + "epoch": 0.030746124854770608, "grad_norm": 0.0, - "learning_rate": 1.9999969210536924e-05, - "loss": 1.145, + "learning_rate": 1.999997152901836e-05, + "loss": 1.071, "step": 1085 }, { - "epoch": 0.030817253121452896, + "epoch": 0.03077446229703307, "grad_norm": 0.0, - "learning_rate": 1.9999966887602036e-05, - "loss": 1.239, + "learning_rate": 1.9999969296827152e-05, + "loss": 1.2005, "step": 1086 }, { - "epoch": 0.030845629965947786, + "epoch": 0.03080279973929553, "grad_norm": 0.0, - "learning_rate": 1.9999964480196977e-05, - "loss": 1.0939, + "learning_rate": 1.9999966980402495e-05, + "loss": 1.1568, "step": 1087 }, { - "epoch": 0.030874006810442677, + "epoch": 0.030831137181557992, "grad_norm": 0.0, - "learning_rate": 1.9999961988321765e-05, - "loss": 1.1566, + "learning_rate": 1.99999645797444e-05, + "loss": 1.0773, "step": 1088 }, { - "epoch": 0.03090238365493757, + "epoch": 0.030859474623820455, "grad_norm": 0.0, - "learning_rate": 1.999995941197643e-05, - "loss": 1.1091, + "learning_rate": 1.999996209485289e-05, + "loss": 1.1944, "step": 1089 }, { - "epoch": 0.030930760499432462, + "epoch": 0.030887812066082914, "grad_norm": 0.0, - "learning_rate": 1.9999956751160986e-05, - "loss": 1.1646, + "learning_rate": 1.9999959525727983e-05, + "loss": 1.2032, "step": 1090 }, { - "epoch": 0.030959137343927357, + "epoch": 0.030916149508345377, "grad_norm": 0.0, - "learning_rate": 1.9999954005875457e-05, - "loss": 1.1113, + "learning_rate": 1.9999956872369706e-05, + "loss": 1.2258, "step": 1091 }, { - "epoch": 0.030987514188422247, + "epoch": 0.03094448695060784, "grad_norm": 0.0, - "learning_rate": 1.999995117611987e-05, - "loss": 1.1141, + "learning_rate": 1.999995413477808e-05, + "loss": 1.1927, "step": 1092 }, { - "epoch": 0.031015891032917138, + "epoch": 0.0309728243928703, "grad_norm": 0.0, - "learning_rate": 1.999994826189424e-05, - "loss": 1.1011, + "learning_rate": 1.999995131295312e-05, + "loss": 1.2244, "step": 1093 }, { - "epoch": 0.031044267877412032, + "epoch": 0.03100116183513276, "grad_norm": 0.0, - "learning_rate": 1.99999452631986e-05, - "loss": 1.1009, + "learning_rate": 1.9999948406894868e-05, + "loss": 1.1487, "step": 1094 }, { - "epoch": 0.031072644721906923, + "epoch": 0.031029499277395223, "grad_norm": 0.0, - "learning_rate": 1.9999942180032973e-05, - "loss": 1.1047, + "learning_rate": 1.999994541660333e-05, + "loss": 1.1634, "step": 1095 }, { - "epoch": 0.031101021566401817, + "epoch": 0.031057836719657683, "grad_norm": 0.0, - "learning_rate": 1.999993901239739e-05, - "loss": 1.0235, + "learning_rate": 1.999994234207854e-05, + "loss": 1.2123, "step": 1096 }, { - "epoch": 0.031129398410896708, + "epoch": 0.031086174161920145, "grad_norm": 0.0, - "learning_rate": 1.9999935760291868e-05, - "loss": 0.9928, + "learning_rate": 1.9999939183320523e-05, + "loss": 1.1459, "step": 1097 }, { - "epoch": 0.0311577752553916, + "epoch": 0.031114511604182608, "grad_norm": 0.0, - "learning_rate": 1.9999932423716437e-05, - "loss": 1.189, + "learning_rate": 1.9999935940329304e-05, + "loss": 1.2354, "step": 1098 }, { - "epoch": 0.031186152099886493, + "epoch": 0.031142849046445067, "grad_norm": 0.0, - "learning_rate": 1.999992900267113e-05, - "loss": 1.012, + "learning_rate": 1.999993261310491e-05, + "loss": 1.0645, "step": 1099 }, { - "epoch": 0.031214528944381384, + "epoch": 0.03117118648870753, "grad_norm": 0.0, - "learning_rate": 1.9999925497155973e-05, - "loss": 1.0475, + "learning_rate": 1.999992920164737e-05, + "loss": 1.1756, "step": 1100 }, { - "epoch": 0.031242905788876278, + "epoch": 0.031199523930969992, "grad_norm": 0.0, - "learning_rate": 1.9999921907170994e-05, - "loss": 1.0188, + "learning_rate": 1.9999925705956716e-05, + "loss": 1.1328, "step": 1101 }, { - "epoch": 0.03127128263337117, + "epoch": 0.03122786137323245, "grad_norm": 0.0, - "learning_rate": 1.9999918232716228e-05, - "loss": 0.9334, + "learning_rate": 1.9999922126032975e-05, + "loss": 1.1094, "step": 1102 }, { - "epoch": 0.03129965947786606, + "epoch": 0.031256198815494914, "grad_norm": 0.0, - "learning_rate": 1.9999914473791704e-05, - "loss": 1.1162, + "learning_rate": 1.9999918461876174e-05, + "loss": 1.0765, "step": 1103 }, { - "epoch": 0.031328036322360954, + "epoch": 0.03128453625775737, "grad_norm": 0.0, - "learning_rate": 1.999991063039745e-05, - "loss": 1.1131, + "learning_rate": 1.9999914713486344e-05, + "loss": 1.1599, "step": 1104 }, { - "epoch": 0.03135641316685585, + "epoch": 0.03131287370001984, "grad_norm": 0.0, - "learning_rate": 1.99999067025335e-05, - "loss": 0.9573, + "learning_rate": 1.9999910880863523e-05, + "loss": 1.1106, "step": 1105 }, { - "epoch": 0.031384790011350736, + "epoch": 0.0313412111422823, "grad_norm": 0.0, - "learning_rate": 1.999990269019989e-05, - "loss": 1.2264, + "learning_rate": 1.9999906964007738e-05, + "loss": 1.178, "step": 1106 }, { - "epoch": 0.03141316685584563, + "epoch": 0.03136954858454476, "grad_norm": 0.0, - "learning_rate": 1.9999898593396655e-05, - "loss": 1.0248, + "learning_rate": 1.999990296291902e-05, + "loss": 1.1269, "step": 1107 }, { - "epoch": 0.031441543700340524, + "epoch": 0.03139788602680722, "grad_norm": 0.0, - "learning_rate": 1.9999894412123825e-05, - "loss": 1.1758, + "learning_rate": 1.9999898877597412e-05, + "loss": 1.1661, "step": 1108 }, { - "epoch": 0.03146992054483541, + "epoch": 0.03142622346906968, "grad_norm": 0.0, - "learning_rate": 1.999989014638144e-05, - "loss": 1.159, + "learning_rate": 1.9999894708042943e-05, + "loss": 1.1697, "step": 1109 }, { - "epoch": 0.031498297389330306, + "epoch": 0.03145456091133214, "grad_norm": 0.0, - "learning_rate": 1.999988579616953e-05, - "loss": 1.1033, + "learning_rate": 1.9999890454255642e-05, + "loss": 1.2137, "step": 1110 }, { - "epoch": 0.0315266742338252, + "epoch": 0.03148289835359461, "grad_norm": 0.0, - "learning_rate": 1.9999881361488135e-05, - "loss": 1.1097, + "learning_rate": 1.9999886116235553e-05, + "loss": 1.2826, "step": 1111 }, { - "epoch": 0.031555051078320094, + "epoch": 0.031511235795857066, "grad_norm": 0.0, - "learning_rate": 1.9999876842337294e-05, - "loss": 1.1604, + "learning_rate": 1.999988169398271e-05, + "loss": 1.1034, "step": 1112 }, { - "epoch": 0.03158342792281498, + "epoch": 0.031539573238119525, "grad_norm": 0.0, - "learning_rate": 1.9999872238717044e-05, - "loss": 1.2469, + "learning_rate": 1.9999877187497148e-05, + "loss": 1.0012, "step": 1113 }, { - "epoch": 0.031611804767309876, + "epoch": 0.03156791068038199, "grad_norm": 0.0, - "learning_rate": 1.9999867550627426e-05, - "loss": 1.0451, + "learning_rate": 1.9999872596778908e-05, + "loss": 1.2349, "step": 1114 }, { - "epoch": 0.03164018161180477, + "epoch": 0.03159624812264445, "grad_norm": 0.0, - "learning_rate": 1.9999862778068474e-05, - "loss": 1.1688, + "learning_rate": 1.9999867921828028e-05, + "loss": 1.1709, "step": 1115 }, { - "epoch": 0.03166855845629966, + "epoch": 0.03162458556490691, "grad_norm": 0.0, - "learning_rate": 1.9999857921040232e-05, - "loss": 0.9243, + "learning_rate": 1.999986316264455e-05, + "loss": 1.2156, "step": 1116 }, { - "epoch": 0.03169693530079455, + "epoch": 0.031652923007169376, "grad_norm": 0.0, - "learning_rate": 1.999985297954274e-05, - "loss": 1.1182, + "learning_rate": 1.999985831922851e-05, + "loss": 1.1354, "step": 1117 }, { - "epoch": 0.031725312145289446, + "epoch": 0.031681260449431835, "grad_norm": 0.0, - "learning_rate": 1.9999847953576038e-05, - "loss": 1.1164, + "learning_rate": 1.9999853391579946e-05, + "loss": 1.2399, "step": 1118 }, { - "epoch": 0.03175368898978433, + "epoch": 0.031709597891694294, "grad_norm": 0.0, - "learning_rate": 1.999984284314018e-05, - "loss": 1.1098, + "learning_rate": 1.9999848379698906e-05, + "loss": 1.3628, "step": 1119 }, { - "epoch": 0.03178206583427923, + "epoch": 0.03173793533395676, "grad_norm": 0.0, - "learning_rate": 1.999983764823519e-05, - "loss": 1.0537, + "learning_rate": 1.999984328358543e-05, + "loss": 1.1736, "step": 1120 }, { - "epoch": 0.03181044267877412, + "epoch": 0.03176627277621922, "grad_norm": 0.0, - "learning_rate": 1.9999832368861126e-05, - "loss": 1.1184, + "learning_rate": 1.999983810323956e-05, + "loss": 1.1956, "step": 1121 }, { - "epoch": 0.031838819523269016, + "epoch": 0.03179461021848168, "grad_norm": 0.0, - "learning_rate": 1.9999827005018028e-05, - "loss": 1.1411, + "learning_rate": 1.9999832838661343e-05, + "loss": 1.0685, "step": 1122 }, { - "epoch": 0.0318671963677639, + "epoch": 0.031822947660744144, "grad_norm": 0.0, - "learning_rate": 1.9999821556705942e-05, - "loss": 1.2577, + "learning_rate": 1.9999827489850817e-05, + "loss": 1.1263, "step": 1123 }, { - "epoch": 0.0318955732122588, + "epoch": 0.0318512851030066, "grad_norm": 0.0, - "learning_rate": 1.9999816023924915e-05, - "loss": 1.0672, + "learning_rate": 1.9999822056808035e-05, + "loss": 1.2598, "step": 1124 }, { - "epoch": 0.03192395005675369, + "epoch": 0.03187962254526906, "grad_norm": 0.0, - "learning_rate": 1.999981040667499e-05, - "loss": 1.0743, + "learning_rate": 1.9999816539533033e-05, + "loss": 1.3718, "step": 1125 }, { - "epoch": 0.03195232690124858, + "epoch": 0.03190795998753153, "grad_norm": 0.0, - "learning_rate": 1.999980470495622e-05, - "loss": 1.0168, + "learning_rate": 1.9999810938025867e-05, + "loss": 1.1281, "step": 1126 }, { - "epoch": 0.03198070374574347, + "epoch": 0.03193629742979399, "grad_norm": 0.0, - "learning_rate": 1.9999798918768648e-05, - "loss": 1.1617, + "learning_rate": 1.999980525228658e-05, + "loss": 1.1145, "step": 1127 }, { - "epoch": 0.03200908059023837, + "epoch": 0.03196463487205645, "grad_norm": 0.0, - "learning_rate": 1.9999793048112327e-05, - "loss": 1.1187, + "learning_rate": 1.9999799482315216e-05, + "loss": 1.2564, "step": 1128 }, { - "epoch": 0.032037457434733255, + "epoch": 0.03199297231431891, "grad_norm": 0.0, - "learning_rate": 1.9999787092987303e-05, - "loss": 1.1457, + "learning_rate": 1.9999793628111833e-05, + "loss": 1.0937, "step": 1129 }, { - "epoch": 0.03206583427922815, + "epoch": 0.03202130975658137, "grad_norm": 0.0, - "learning_rate": 1.9999781053393626e-05, - "loss": 1.1346, + "learning_rate": 1.999978768967647e-05, + "loss": 1.1761, "step": 1130 }, { - "epoch": 0.03209421112372304, + "epoch": 0.03204964719884383, "grad_norm": 0.0, - "learning_rate": 1.999977492933135e-05, - "loss": 1.0232, + "learning_rate": 1.9999781667009185e-05, + "loss": 1.0614, "step": 1131 }, { - "epoch": 0.03212258796821794, + "epoch": 0.0320779846411063, "grad_norm": 0.0, - "learning_rate": 1.999976872080053e-05, - "loss": 1.1667, + "learning_rate": 1.9999775560110026e-05, + "loss": 1.2065, "step": 1132 }, { - "epoch": 0.032150964812712825, + "epoch": 0.032106322083368756, "grad_norm": 0.0, - "learning_rate": 1.9999762427801207e-05, - "loss": 1.0385, + "learning_rate": 1.9999769368979044e-05, + "loss": 1.1468, "step": 1133 }, { - "epoch": 0.03217934165720772, + "epoch": 0.032134659525631215, "grad_norm": 0.0, - "learning_rate": 1.9999756050333446e-05, - "loss": 1.114, + "learning_rate": 1.999976309361629e-05, + "loss": 1.2514, "step": 1134 }, { - "epoch": 0.03220771850170261, + "epoch": 0.03216299696789368, "grad_norm": 0.0, - "learning_rate": 1.9999749588397293e-05, - "loss": 1.0872, + "learning_rate": 1.999975673402182e-05, + "loss": 1.2632, "step": 1135 }, { - "epoch": 0.0322360953461975, + "epoch": 0.03219133441015614, "grad_norm": 0.0, - "learning_rate": 1.999974304199281e-05, - "loss": 1.103, + "learning_rate": 1.9999750290195684e-05, + "loss": 1.2292, "step": 1136 }, { - "epoch": 0.032264472190692395, + "epoch": 0.0322196718524186, "grad_norm": 0.0, - "learning_rate": 1.9999736411120044e-05, - "loss": 1.1421, + "learning_rate": 1.9999743762137937e-05, + "loss": 1.2195, "step": 1137 }, { - "epoch": 0.03229284903518729, + "epoch": 0.032248009294681065, "grad_norm": 0.0, - "learning_rate": 1.9999729695779057e-05, - "loss": 1.052, + "learning_rate": 1.9999737149848638e-05, + "loss": 1.1015, "step": 1138 }, { - "epoch": 0.032321225879682176, + "epoch": 0.032276346736943524, "grad_norm": 0.0, - "learning_rate": 1.9999722895969904e-05, - "loss": 0.9908, + "learning_rate": 1.999973045332784e-05, + "loss": 1.279, "step": 1139 }, { - "epoch": 0.03234960272417707, + "epoch": 0.032304684179205984, "grad_norm": 0.0, - "learning_rate": 1.9999716011692644e-05, - "loss": 1.133, + "learning_rate": 1.9999723672575592e-05, + "loss": 1.1362, "step": 1140 }, { - "epoch": 0.032377979568671965, + "epoch": 0.03233302162146845, "grad_norm": 0.0, - "learning_rate": 1.9999709042947327e-05, - "loss": 1.1512, + "learning_rate": 1.9999716807591967e-05, + "loss": 1.0932, "step": 1141 }, { - "epoch": 0.03240635641316686, + "epoch": 0.03236135906373091, "grad_norm": 0.0, - "learning_rate": 1.9999701989734025e-05, - "loss": 1.1963, + "learning_rate": 1.9999709858377008e-05, + "loss": 0.9578, "step": 1142 }, { - "epoch": 0.032434733257661746, + "epoch": 0.03238969650599337, "grad_norm": 0.0, - "learning_rate": 1.9999694852052788e-05, - "loss": 1.2105, + "learning_rate": 1.999970282493078e-05, + "loss": 1.2375, "step": 1143 }, { - "epoch": 0.03246311010215664, + "epoch": 0.032418033948255834, "grad_norm": 0.0, - "learning_rate": 1.999968762990368e-05, - "loss": 1.1259, + "learning_rate": 1.9999695707253345e-05, + "loss": 1.0653, "step": 1144 }, { - "epoch": 0.032491486946651535, + "epoch": 0.03244637139051829, "grad_norm": 0.0, - "learning_rate": 1.999968032328676e-05, - "loss": 1.099, + "learning_rate": 1.9999688505344757e-05, + "loss": 1.2114, "step": 1145 }, { - "epoch": 0.03251986379114642, + "epoch": 0.03247470883278075, "grad_norm": 0.0, - "learning_rate": 1.999967293220209e-05, - "loss": 1.1671, + "learning_rate": 1.9999681219205085e-05, + "loss": 1.2282, "step": 1146 }, { - "epoch": 0.032548240635641316, + "epoch": 0.03250304627504321, "grad_norm": 0.0, - "learning_rate": 1.999966545664973e-05, - "loss": 1.1042, + "learning_rate": 1.999967384883438e-05, + "loss": 1.0339, "step": 1147 }, { - "epoch": 0.03257661748013621, + "epoch": 0.03253138371730568, "grad_norm": 0.0, - "learning_rate": 1.9999657896629752e-05, - "loss": 1.1061, + "learning_rate": 1.999966639423271e-05, + "loss": 1.1278, "step": 1148 }, { - "epoch": 0.0326049943246311, + "epoch": 0.032559721159568136, "grad_norm": 0.0, - "learning_rate": 1.9999650252142214e-05, - "loss": 1.082, + "learning_rate": 1.9999658855400135e-05, + "loss": 1.1992, "step": 1149 }, { - "epoch": 0.03263337116912599, + "epoch": 0.032588058601830595, "grad_norm": 0.0, - "learning_rate": 1.9999642523187178e-05, - "loss": 1.0002, + "learning_rate": 1.9999651232336723e-05, + "loss": 1.2797, "step": 1150 }, { - "epoch": 0.03266174801362089, + "epoch": 0.03261639604409306, "grad_norm": 0.0, - "learning_rate": 1.9999634709764716e-05, - "loss": 1.0108, + "learning_rate": 1.9999643525042532e-05, + "loss": 1.1696, "step": 1151 }, { - "epoch": 0.03269012485811578, + "epoch": 0.03264473348635552, "grad_norm": 0.0, - "learning_rate": 1.9999626811874885e-05, - "loss": 1.125, + "learning_rate": 1.9999635733517634e-05, + "loss": 1.0923, "step": 1152 }, { - "epoch": 0.03271850170261067, + "epoch": 0.03267307092861798, "grad_norm": 0.0, - "learning_rate": 1.9999618829517763e-05, - "loss": 1.1801, + "learning_rate": 1.9999627857762088e-05, + "loss": 1.1005, "step": 1153 }, { - "epoch": 0.03274687854710556, + "epoch": 0.032701408370880446, "grad_norm": 0.0, - "learning_rate": 1.9999610762693407e-05, - "loss": 1.1085, + "learning_rate": 1.9999619897775963e-05, + "loss": 1.1058, "step": 1154 }, { - "epoch": 0.03277525539160046, + "epoch": 0.032729745813142905, "grad_norm": 0.0, - "learning_rate": 1.999960261140189e-05, - "loss": 1.1584, + "learning_rate": 1.999961185355933e-05, + "loss": 1.2092, "step": 1155 }, { - "epoch": 0.032803632236095344, + "epoch": 0.032758083255405364, "grad_norm": 0.0, - "learning_rate": 1.9999594375643277e-05, - "loss": 1.0705, + "learning_rate": 1.999960372511225e-05, + "loss": 1.1573, "step": 1156 }, { - "epoch": 0.03283200908059024, + "epoch": 0.03278642069766783, "grad_norm": 0.0, - "learning_rate": 1.9999586055417645e-05, - "loss": 1.075, + "learning_rate": 1.9999595512434794e-05, + "loss": 1.2426, "step": 1157 }, { - "epoch": 0.03286038592508513, + "epoch": 0.03281475813993029, "grad_norm": 0.0, - "learning_rate": 1.999957765072506e-05, - "loss": 1.0788, + "learning_rate": 1.9999587215527034e-05, + "loss": 1.1599, "step": 1158 }, { - "epoch": 0.03288876276958002, + "epoch": 0.03284309558219275, "grad_norm": 0.0, - "learning_rate": 1.9999569161565595e-05, - "loss": 1.0356, + "learning_rate": 1.9999578834389036e-05, + "loss": 1.337, "step": 1159 }, { - "epoch": 0.032917139614074914, + "epoch": 0.032871433024455214, "grad_norm": 0.0, - "learning_rate": 1.9999560587939316e-05, - "loss": 1.0715, + "learning_rate": 1.9999570369020876e-05, + "loss": 1.1805, "step": 1160 }, { - "epoch": 0.03294551645856981, + "epoch": 0.03289977046671767, "grad_norm": 0.0, - "learning_rate": 1.9999551929846298e-05, - "loss": 0.994, + "learning_rate": 1.9999561819422615e-05, + "loss": 1.2177, "step": 1161 }, { - "epoch": 0.0329738933030647, + "epoch": 0.03292810790898013, "grad_norm": 0.0, - "learning_rate": 1.999954318728662e-05, - "loss": 1.0629, + "learning_rate": 1.9999553185594337e-05, + "loss": 1.1292, "step": 1162 }, { - "epoch": 0.03300227014755959, + "epoch": 0.0329564453512426, "grad_norm": 0.0, - "learning_rate": 1.9999534360260348e-05, - "loss": 1.2315, + "learning_rate": 1.9999544467536106e-05, + "loss": 1.1381, "step": 1163 }, { - "epoch": 0.033030646992054484, + "epoch": 0.03298478279350506, "grad_norm": 0.0, - "learning_rate": 1.9999525448767558e-05, - "loss": 1.1747, + "learning_rate": 1.9999535665248e-05, + "loss": 1.1651, "step": 1164 }, { - "epoch": 0.03305902383654938, + "epoch": 0.03301312023576752, "grad_norm": 0.0, - "learning_rate": 1.999951645280833e-05, - "loss": 1.0809, + "learning_rate": 1.9999526778730092e-05, + "loss": 1.2767, "step": 1165 }, { - "epoch": 0.033087400681044266, + "epoch": 0.03304145767802998, "grad_norm": 0.0, - "learning_rate": 1.9999507372382738e-05, - "loss": 0.9746, + "learning_rate": 1.9999517807982455e-05, + "loss": 1.1718, "step": 1166 }, { - "epoch": 0.03311577752553916, + "epoch": 0.03306979512029244, "grad_norm": 0.0, - "learning_rate": 1.9999498207490856e-05, - "loss": 1.1134, + "learning_rate": 1.999950875300517e-05, + "loss": 1.217, "step": 1167 }, { - "epoch": 0.033144154370034054, + "epoch": 0.0330981325625549, "grad_norm": 0.0, - "learning_rate": 1.9999488958132764e-05, - "loss": 1.1071, + "learning_rate": 1.9999499613798306e-05, + "loss": 1.1891, "step": 1168 }, { - "epoch": 0.03317253121452894, + "epoch": 0.03312647000481737, "grad_norm": 0.0, - "learning_rate": 1.9999479624308537e-05, - "loss": 1.1554, + "learning_rate": 1.9999490390361947e-05, + "loss": 1.1319, "step": 1169 }, { - "epoch": 0.033200908059023836, + "epoch": 0.033154807447079826, "grad_norm": 0.0, - "learning_rate": 1.999947020601826e-05, - "loss": 1.064, + "learning_rate": 1.9999481082696164e-05, + "loss": 1.216, "step": 1170 }, { - "epoch": 0.03322928490351873, + "epoch": 0.033183144889342285, "grad_norm": 0.0, - "learning_rate": 1.9999460703262004e-05, - "loss": 1.2153, + "learning_rate": 1.999947169080104e-05, + "loss": 1.2047, "step": 1171 }, { - "epoch": 0.033257661748013624, + "epoch": 0.03321148233160475, "grad_norm": 0.0, - "learning_rate": 1.9999451116039858e-05, - "loss": 1.0988, + "learning_rate": 1.999946221467665e-05, + "loss": 1.0793, "step": 1172 }, { - "epoch": 0.03328603859250851, + "epoch": 0.03323981977386721, "grad_norm": 0.0, - "learning_rate": 1.9999441444351898e-05, - "loss": 1.1158, + "learning_rate": 1.999945265432308e-05, + "loss": 1.204, "step": 1173 }, { - "epoch": 0.033314415437003406, + "epoch": 0.03326815721612967, "grad_norm": 0.0, - "learning_rate": 1.9999431688198205e-05, - "loss": 1.0896, + "learning_rate": 1.9999443009740406e-05, + "loss": 1.1729, "step": 1174 }, { - "epoch": 0.0333427922814983, + "epoch": 0.033296494658392135, "grad_norm": 0.0, - "learning_rate": 1.9999421847578864e-05, - "loss": 1.0946, + "learning_rate": 1.9999433280928713e-05, + "loss": 1.0794, "step": 1175 }, { - "epoch": 0.03337116912599319, + "epoch": 0.033324832100654594, "grad_norm": 0.0, - "learning_rate": 1.9999411922493958e-05, - "loss": 1.16, + "learning_rate": 1.9999423467888078e-05, + "loss": 1.1641, "step": 1176 }, { - "epoch": 0.03339954597048808, + "epoch": 0.033353169542917054, "grad_norm": 0.0, - "learning_rate": 1.999940191294357e-05, - "loss": 1.1924, + "learning_rate": 1.9999413570618588e-05, + "loss": 1.1555, "step": 1177 }, { - "epoch": 0.033427922814982976, + "epoch": 0.03338150698517952, "grad_norm": 0.0, - "learning_rate": 1.9999391818927783e-05, - "loss": 1.0836, + "learning_rate": 1.9999403589120317e-05, + "loss": 1.13, "step": 1178 }, { - "epoch": 0.03345629965947786, + "epoch": 0.03340984442744198, "grad_norm": 0.0, - "learning_rate": 1.9999381640446682e-05, - "loss": 1.1586, + "learning_rate": 1.9999393523393365e-05, + "loss": 1.1579, "step": 1179 }, { - "epoch": 0.03348467650397276, + "epoch": 0.03343818186970444, "grad_norm": 0.0, - "learning_rate": 1.9999371377500356e-05, - "loss": 1.194, + "learning_rate": 1.9999383373437803e-05, + "loss": 1.1213, "step": 1180 }, { - "epoch": 0.03351305334846765, + "epoch": 0.033466519311966904, "grad_norm": 0.0, - "learning_rate": 1.9999361030088894e-05, - "loss": 1.136, + "learning_rate": 1.9999373139253724e-05, + "loss": 1.2017, "step": 1181 }, { - "epoch": 0.033541430192962546, + "epoch": 0.03349485675422936, "grad_norm": 0.0, - "learning_rate": 1.9999350598212377e-05, - "loss": 1.0001, + "learning_rate": 1.999936282084121e-05, + "loss": 1.2267, "step": 1182 }, { - "epoch": 0.03356980703745743, + "epoch": 0.03352319419649182, "grad_norm": 0.0, - "learning_rate": 1.9999340081870894e-05, - "loss": 1.054, + "learning_rate": 1.999935241820035e-05, + "loss": 1.2183, "step": 1183 }, { - "epoch": 0.03359818388195233, + "epoch": 0.03355153163875429, "grad_norm": 0.0, - "learning_rate": 1.999932948106454e-05, - "loss": 1.1672, + "learning_rate": 1.9999341931331234e-05, + "loss": 1.1418, "step": 1184 }, { - "epoch": 0.03362656072644722, + "epoch": 0.03357986908101675, "grad_norm": 0.0, - "learning_rate": 1.99993187957934e-05, - "loss": 1.0803, + "learning_rate": 1.9999331360233946e-05, + "loss": 0.979, "step": 1185 }, { - "epoch": 0.03365493757094211, + "epoch": 0.033608206523279206, "grad_norm": 0.0, - "learning_rate": 1.999930802605756e-05, - "loss": 1.1684, + "learning_rate": 1.9999320704908576e-05, + "loss": 1.2149, "step": 1186 }, { - "epoch": 0.033683314415437, + "epoch": 0.03363654396554167, "grad_norm": 0.0, - "learning_rate": 1.9999297171857122e-05, - "loss": 1.1723, + "learning_rate": 1.9999309965355215e-05, + "loss": 1.1176, "step": 1187 }, { - "epoch": 0.0337116912599319, + "epoch": 0.03366488140780413, "grad_norm": 0.0, - "learning_rate": 1.9999286233192167e-05, - "loss": 1.1069, + "learning_rate": 1.9999299141573955e-05, + "loss": 1.3741, "step": 1188 }, { - "epoch": 0.033740068104426785, + "epoch": 0.03369321885006659, "grad_norm": 0.0, - "learning_rate": 1.999927521006279e-05, - "loss": 1.0953, + "learning_rate": 1.999928823356488e-05, + "loss": 1.1312, "step": 1189 }, { - "epoch": 0.03376844494892168, + "epoch": 0.03372155629232906, "grad_norm": 0.0, - "learning_rate": 1.9999264102469094e-05, - "loss": 1.1483, + "learning_rate": 1.9999277241328093e-05, + "loss": 1.136, "step": 1190 }, { - "epoch": 0.03379682179341657, + "epoch": 0.033749893734591516, "grad_norm": 0.0, - "learning_rate": 1.9999252910411156e-05, - "loss": 1.1584, + "learning_rate": 1.9999266164863678e-05, + "loss": 0.9818, "step": 1191 }, { - "epoch": 0.03382519863791147, + "epoch": 0.033778231176853975, "grad_norm": 0.0, - "learning_rate": 1.9999241633889085e-05, - "loss": 1.2137, + "learning_rate": 1.9999255004171732e-05, + "loss": 1.1506, "step": 1192 }, { - "epoch": 0.033853575482406355, + "epoch": 0.03380656861911644, "grad_norm": 0.0, - "learning_rate": 1.999923027290297e-05, - "loss": 1.0601, + "learning_rate": 1.9999243759252345e-05, + "loss": 1.1532, "step": 1193 }, { - "epoch": 0.03388195232690125, + "epoch": 0.0338349060613789, "grad_norm": 0.0, - "learning_rate": 1.9999218827452902e-05, - "loss": 1.066, + "learning_rate": 1.9999232430105618e-05, + "loss": 1.0527, "step": 1194 }, { - "epoch": 0.03391032917139614, + "epoch": 0.03386324350364136, "grad_norm": 0.0, - "learning_rate": 1.9999207297538987e-05, - "loss": 1.0919, + "learning_rate": 1.9999221016731646e-05, + "loss": 1.179, "step": 1195 }, { - "epoch": 0.03393870601589103, + "epoch": 0.033891580945903825, "grad_norm": 0.0, - "learning_rate": 1.9999195683161318e-05, - "loss": 1.0858, + "learning_rate": 1.9999209519130516e-05, + "loss": 1.1043, "step": 1196 }, { - "epoch": 0.033967082860385925, + "epoch": 0.033919918388166284, "grad_norm": 0.0, - "learning_rate": 1.9999183984319993e-05, - "loss": 1.142, + "learning_rate": 1.999919793730233e-05, + "loss": 1.1042, "step": 1197 }, { - "epoch": 0.03399545970488082, + "epoch": 0.03394825583042874, "grad_norm": 0.0, - "learning_rate": 1.999917220101511e-05, - "loss": 1.0593, + "learning_rate": 1.9999186271247192e-05, + "loss": 1.1712, "step": 1198 }, { - "epoch": 0.034023836549375706, + "epoch": 0.03397659327269121, "grad_norm": 0.0, - "learning_rate": 1.9999160333246774e-05, - "loss": 1.1547, + "learning_rate": 1.9999174520965194e-05, + "loss": 1.16, "step": 1199 }, { - "epoch": 0.0340522133938706, + "epoch": 0.03400493071495367, "grad_norm": 0.0, - "learning_rate": 1.999914838101508e-05, - "loss": 1.1496, + "learning_rate": 1.9999162686456435e-05, + "loss": 1.0082, "step": 1200 }, { - "epoch": 0.034080590238365495, + "epoch": 0.03403326815721613, "grad_norm": 0.0, - "learning_rate": 1.9999136344320128e-05, - "loss": 1.0646, + "learning_rate": 1.9999150767721014e-05, + "loss": 1.1883, "step": 1201 }, { - "epoch": 0.03410896708286039, + "epoch": 0.034061605599478594, "grad_norm": 0.0, - "learning_rate": 1.9999124223162023e-05, - "loss": 1.0381, + "learning_rate": 1.9999138764759035e-05, + "loss": 1.2209, "step": 1202 }, { - "epoch": 0.034137343927355276, + "epoch": 0.03408994304174105, "grad_norm": 0.0, - "learning_rate": 1.9999112017540866e-05, - "loss": 1.1329, + "learning_rate": 1.9999126677570597e-05, + "loss": 1.082, "step": 1203 }, { - "epoch": 0.03416572077185017, + "epoch": 0.03411828048400351, "grad_norm": 0.0, - "learning_rate": 1.9999099727456757e-05, - "loss": 1.2656, + "learning_rate": 1.99991145061558e-05, + "loss": 1.1062, "step": 1204 }, { - "epoch": 0.034194097616345065, + "epoch": 0.03414661792626598, "grad_norm": 0.0, - "learning_rate": 1.9999087352909807e-05, - "loss": 1.1876, + "learning_rate": 1.9999102250514753e-05, + "loss": 1.1429, "step": 1205 }, { - "epoch": 0.03422247446083995, + "epoch": 0.03417495536852844, "grad_norm": 0.0, - "learning_rate": 1.9999074893900116e-05, - "loss": 1.0812, + "learning_rate": 1.999908991064755e-05, + "loss": 1.222, "step": 1206 }, { - "epoch": 0.034250851305334847, + "epoch": 0.034203292810790896, "grad_norm": 0.0, - "learning_rate": 1.9999062350427792e-05, - "loss": 1.1744, + "learning_rate": 1.9999077486554302e-05, + "loss": 1.1794, "step": 1207 }, { - "epoch": 0.03427922814982974, + "epoch": 0.03423163025305336, "grad_norm": 0.0, - "learning_rate": 1.9999049722492934e-05, - "loss": 1.1959, + "learning_rate": 1.999906497823511e-05, + "loss": 1.1129, "step": 1208 }, { - "epoch": 0.03430760499432463, + "epoch": 0.03425996769531582, "grad_norm": 0.0, - "learning_rate": 1.9999037010095658e-05, - "loss": 1.0103, + "learning_rate": 1.999905238569008e-05, + "loss": 1.2514, "step": 1209 }, { - "epoch": 0.03433598183881952, + "epoch": 0.03428830513757828, "grad_norm": 0.0, - "learning_rate": 1.9999024213236064e-05, - "loss": 1.1115, + "learning_rate": 1.9999039708919322e-05, + "loss": 1.2388, "step": 1210 }, { - "epoch": 0.03436435868331442, + "epoch": 0.034316642579840746, "grad_norm": 0.0, - "learning_rate": 1.9999011331914264e-05, - "loss": 1.0889, + "learning_rate": 1.9999026947922937e-05, + "loss": 1.1348, "step": 1211 }, { - "epoch": 0.03439273552780931, + "epoch": 0.034344980022103205, "grad_norm": 0.0, - "learning_rate": 1.9998998366130368e-05, - "loss": 1.2025, + "learning_rate": 1.999901410270103e-05, + "loss": 1.2103, "step": 1212 }, { - "epoch": 0.0344211123723042, + "epoch": 0.034373317464365664, "grad_norm": 0.0, - "learning_rate": 1.999898531588448e-05, - "loss": 1.0353, + "learning_rate": 1.9999001173253724e-05, + "loss": 1.1261, "step": 1213 }, { - "epoch": 0.03444948921679909, + "epoch": 0.03440165490662813, "grad_norm": 0.0, - "learning_rate": 1.9998972181176715e-05, - "loss": 1.0947, + "learning_rate": 1.9998988159581116e-05, + "loss": 1.0942, "step": 1214 }, { - "epoch": 0.03447786606129399, + "epoch": 0.03442999234889059, "grad_norm": 0.0, - "learning_rate": 1.999895896200718e-05, - "loss": 1.2683, + "learning_rate": 1.9998975061683312e-05, + "loss": 1.1217, "step": 1215 }, { - "epoch": 0.034506242905788874, + "epoch": 0.03445832979115305, "grad_norm": 0.0, - "learning_rate": 1.9998945658375995e-05, - "loss": 1.0974, + "learning_rate": 1.999896187956043e-05, + "loss": 1.0616, "step": 1216 }, { - "epoch": 0.03453461975028377, + "epoch": 0.034486667233415515, "grad_norm": 0.0, - "learning_rate": 1.9998932270283264e-05, - "loss": 1.1192, + "learning_rate": 1.9998948613212583e-05, + "loss": 1.158, "step": 1217 }, { - "epoch": 0.03456299659477866, + "epoch": 0.034515004675677974, "grad_norm": 0.0, - "learning_rate": 1.9998918797729103e-05, - "loss": 1.1417, + "learning_rate": 1.9998935262639877e-05, + "loss": 1.1301, "step": 1218 }, { - "epoch": 0.03459137343927355, + "epoch": 0.03454334211794043, "grad_norm": 0.0, - "learning_rate": 1.999890524071362e-05, - "loss": 1.1412, + "learning_rate": 1.9998921827842423e-05, + "loss": 1.2439, "step": 1219 }, { - "epoch": 0.034619750283768444, + "epoch": 0.0345716795602029, "grad_norm": 0.0, - "learning_rate": 1.999889159923694e-05, - "loss": 1.1199, + "learning_rate": 1.9998908308820343e-05, + "loss": 1.171, "step": 1220 }, { - "epoch": 0.03464812712826334, + "epoch": 0.03460001700246536, "grad_norm": 0.0, - "learning_rate": 1.999887787329917e-05, - "loss": 1.1506, + "learning_rate": 1.999889470557374e-05, + "loss": 1.1764, "step": 1221 }, { - "epoch": 0.03467650397275823, + "epoch": 0.03462835444472782, "grad_norm": 0.0, - "learning_rate": 1.9998864062900434e-05, - "loss": 1.1284, + "learning_rate": 1.9998881018102735e-05, + "loss": 1.0937, "step": 1222 }, { - "epoch": 0.03470488081725312, + "epoch": 0.03465669188699028, "grad_norm": 0.0, - "learning_rate": 1.999885016804084e-05, - "loss": 1.1406, + "learning_rate": 1.9998867246407447e-05, + "loss": 1.2155, "step": 1223 }, { - "epoch": 0.034733257661748014, + "epoch": 0.03468502932925274, "grad_norm": 0.0, - "learning_rate": 1.999883618872051e-05, - "loss": 1.0665, + "learning_rate": 1.999885339048798e-05, + "loss": 1.2572, "step": 1224 }, { - "epoch": 0.03476163450624291, + "epoch": 0.0347133667715152, "grad_norm": 0.0, - "learning_rate": 1.9998822124939565e-05, - "loss": 1.1335, + "learning_rate": 1.999883945034446e-05, + "loss": 1.192, "step": 1225 }, { - "epoch": 0.034790011350737796, + "epoch": 0.03474170421377767, "grad_norm": 0.0, - "learning_rate": 1.9998807976698114e-05, - "loss": 1.172, + "learning_rate": 1.9998825425977007e-05, + "loss": 1.1452, "step": 1226 }, { - "epoch": 0.03481838819523269, + "epoch": 0.03477004165604013, "grad_norm": 0.0, - "learning_rate": 1.9998793743996285e-05, - "loss": 1.1661, + "learning_rate": 1.9998811317385728e-05, + "loss": 1.0504, "step": 1227 }, { - "epoch": 0.034846765039727584, + "epoch": 0.034798379098302586, "grad_norm": 0.0, - "learning_rate": 1.9998779426834195e-05, - "loss": 1.0439, + "learning_rate": 1.999879712457075e-05, + "loss": 1.181, "step": 1228 }, { - "epoch": 0.03487514188422247, + "epoch": 0.03482671654056505, "grad_norm": 0.0, - "learning_rate": 1.999876502521197e-05, - "loss": 1.0295, + "learning_rate": 1.9998782847532195e-05, + "loss": 1.2238, "step": 1229 }, { - "epoch": 0.034903518728717366, + "epoch": 0.03485505398282751, "grad_norm": 0.0, - "learning_rate": 1.999875053912972e-05, - "loss": 1.1784, + "learning_rate": 1.9998768486270178e-05, + "loss": 1.1603, "step": 1230 }, { - "epoch": 0.03493189557321226, + "epoch": 0.03488339142508997, "grad_norm": 0.0, - "learning_rate": 1.999873596858758e-05, - "loss": 1.1077, + "learning_rate": 1.999875404078482e-05, + "loss": 1.1111, "step": 1231 }, { - "epoch": 0.034960272417707154, + "epoch": 0.034911728867352436, "grad_norm": 0.0, - "learning_rate": 1.9998721313585668e-05, - "loss": 0.9946, + "learning_rate": 1.999873951107624e-05, + "loss": 1.1611, "step": 1232 }, { - "epoch": 0.03498864926220204, + "epoch": 0.034940066309614895, "grad_norm": 0.0, - "learning_rate": 1.9998706574124104e-05, - "loss": 1.1368, + "learning_rate": 1.999872489714457e-05, + "loss": 1.0623, "step": 1233 }, { - "epoch": 0.035017026106696936, + "epoch": 0.034968403751877354, "grad_norm": 0.0, - "learning_rate": 1.9998691750203018e-05, - "loss": 1.0347, + "learning_rate": 1.9998710198989923e-05, + "loss": 1.2147, "step": 1234 }, { - "epoch": 0.03504540295119183, + "epoch": 0.03499674119413982, "grad_norm": 0.0, - "learning_rate": 1.9998676841822532e-05, - "loss": 1.0566, + "learning_rate": 1.9998695416612432e-05, + "loss": 1.1675, "step": 1235 }, { - "epoch": 0.03507377979568672, + "epoch": 0.03502507863640228, "grad_norm": 0.0, - "learning_rate": 1.9998661848982775e-05, - "loss": 1.1119, + "learning_rate": 1.9998680550012212e-05, + "loss": 1.2388, "step": 1236 }, { - "epoch": 0.03510215664018161, + "epoch": 0.03505341607866474, "grad_norm": 0.0, - "learning_rate": 1.9998646771683876e-05, - "loss": 1.1189, + "learning_rate": 1.9998665599189392e-05, + "loss": 1.2023, "step": 1237 }, { - "epoch": 0.035130533484676506, + "epoch": 0.035081753520927204, "grad_norm": 0.0, - "learning_rate": 1.999863160992595e-05, - "loss": 1.1071, + "learning_rate": 1.99986505641441e-05, + "loss": 1.219, "step": 1238 }, { - "epoch": 0.03515891032917139, + "epoch": 0.035110090963189663, "grad_norm": 0.0, - "learning_rate": 1.9998616363709136e-05, - "loss": 1.0993, + "learning_rate": 1.999863544487646e-05, + "loss": 1.1605, "step": 1239 }, { - "epoch": 0.03518728717366629, + "epoch": 0.03513842840545212, "grad_norm": 0.0, - "learning_rate": 1.999860103303356e-05, - "loss": 1.0732, + "learning_rate": 1.9998620241386606e-05, + "loss": 1.1453, "step": 1240 }, { - "epoch": 0.03521566401816118, + "epoch": 0.03516676584771459, "grad_norm": 0.0, - "learning_rate": 1.999858561789936e-05, - "loss": 1.0735, + "learning_rate": 1.999860495367466e-05, + "loss": 1.2155, "step": 1241 }, { - "epoch": 0.035244040862656076, + "epoch": 0.03519510328997705, "grad_norm": 0.0, - "learning_rate": 1.9998570118306647e-05, - "loss": 1.0306, + "learning_rate": 1.9998589581740746e-05, + "loss": 1.0904, "step": 1242 }, { - "epoch": 0.03527241770715096, + "epoch": 0.03522344073223951, "grad_norm": 0.0, - "learning_rate": 1.9998554534255565e-05, - "loss": 1.1354, + "learning_rate": 1.9998574125585005e-05, + "loss": 1.1765, "step": 1243 }, { - "epoch": 0.03530079455164586, + "epoch": 0.03525177817450197, "grad_norm": 0.0, - "learning_rate": 1.9998538865746245e-05, - "loss": 1.135, + "learning_rate": 1.9998558585207556e-05, + "loss": 1.1675, "step": 1244 }, { - "epoch": 0.03532917139614075, + "epoch": 0.03528011561676443, "grad_norm": 0.0, - "learning_rate": 1.999852311277882e-05, - "loss": 1.2171, + "learning_rate": 1.999854296060854e-05, + "loss": 1.229, "step": 1245 }, { - "epoch": 0.03535754824063564, + "epoch": 0.03530845305902689, "grad_norm": 0.0, - "learning_rate": 1.9998507275353417e-05, - "loss": 1.1097, + "learning_rate": 1.999852725178808e-05, + "loss": 1.2728, "step": 1246 }, { - "epoch": 0.03538592508513053, + "epoch": 0.03533679050128935, "grad_norm": 0.0, - "learning_rate": 1.9998491353470176e-05, - "loss": 1.0301, + "learning_rate": 1.9998511458746314e-05, + "loss": 1.2739, "step": 1247 }, { - "epoch": 0.03541430192962543, + "epoch": 0.035365127943551816, "grad_norm": 0.0, - "learning_rate": 1.9998475347129226e-05, - "loss": 1.1252, + "learning_rate": 1.9998495581483373e-05, + "loss": 1.1815, "step": 1248 }, { - "epoch": 0.035442678774120315, + "epoch": 0.035393465385814275, "grad_norm": 0.0, - "learning_rate": 1.999845925633071e-05, - "loss": 1.0934, + "learning_rate": 1.999847961999939e-05, + "loss": 1.1095, "step": 1249 }, { - "epoch": 0.03547105561861521, + "epoch": 0.035421802828076734, "grad_norm": 0.0, - "learning_rate": 1.9998443081074754e-05, - "loss": 1.1041, + "learning_rate": 1.9998463574294505e-05, + "loss": 1.1174, "step": 1250 }, { - "epoch": 0.0354994324631101, + "epoch": 0.0354501402703392, "grad_norm": 0.0, - "learning_rate": 1.9998426821361502e-05, - "loss": 1.196, + "learning_rate": 1.9998447444368843e-05, + "loss": 1.1722, "step": 1251 }, { - "epoch": 0.035527809307605, + "epoch": 0.03547847771260166, "grad_norm": 0.0, - "learning_rate": 1.9998410477191087e-05, - "loss": 1.1141, + "learning_rate": 1.9998431230222545e-05, + "loss": 1.172, "step": 1252 }, { - "epoch": 0.035556186152099885, + "epoch": 0.03550681515486412, "grad_norm": 0.0, - "learning_rate": 1.9998394048563653e-05, - "loss": 1.0136, + "learning_rate": 1.999841493185575e-05, + "loss": 1.1559, "step": 1253 }, { - "epoch": 0.03558456299659478, + "epoch": 0.035535152597126585, "grad_norm": 0.0, - "learning_rate": 1.9998377535479334e-05, - "loss": 1.0909, + "learning_rate": 1.9998398549268594e-05, + "loss": 1.0451, "step": 1254 }, { - "epoch": 0.03561293984108967, + "epoch": 0.035563490039389044, "grad_norm": 0.0, - "learning_rate": 1.9998360937938274e-05, - "loss": 1.1531, + "learning_rate": 1.9998382082461214e-05, + "loss": 1.166, "step": 1255 }, { - "epoch": 0.03564131668558456, + "epoch": 0.0355918274816515, "grad_norm": 0.0, - "learning_rate": 1.9998344255940602e-05, - "loss": 1.0243, + "learning_rate": 1.999836553143375e-05, + "loss": 1.0435, "step": 1256 }, { - "epoch": 0.035669693530079455, + "epoch": 0.03562016492391397, "grad_norm": 0.0, - "learning_rate": 1.9998327489486474e-05, - "loss": 1.1557, + "learning_rate": 1.9998348896186336e-05, + "loss": 1.1311, "step": 1257 }, { - "epoch": 0.03569807037457435, + "epoch": 0.03564850236617643, "grad_norm": 0.0, - "learning_rate": 1.999831063857602e-05, - "loss": 1.0771, + "learning_rate": 1.999833217671912e-05, + "loss": 1.1125, "step": 1258 }, { - "epoch": 0.035726447219069236, + "epoch": 0.03567683980843889, "grad_norm": 0.0, - "learning_rate": 1.9998293703209388e-05, - "loss": 1.0991, + "learning_rate": 1.9998315373032238e-05, + "loss": 1.1952, "step": 1259 }, { - "epoch": 0.03575482406356413, + "epoch": 0.03570517725070135, "grad_norm": 0.0, - "learning_rate": 1.9998276683386716e-05, - "loss": 1.0704, + "learning_rate": 1.999829848512583e-05, + "loss": 1.1578, "step": 1260 }, { - "epoch": 0.035783200908059025, + "epoch": 0.03573351469296381, "grad_norm": 0.0, - "learning_rate": 1.9998259579108155e-05, - "loss": 1.1071, + "learning_rate": 1.9998281513000046e-05, + "loss": 1.1906, "step": 1261 }, { - "epoch": 0.03581157775255392, + "epoch": 0.03576185213522627, "grad_norm": 0.0, - "learning_rate": 1.9998242390373846e-05, - "loss": 1.0708, + "learning_rate": 1.999826445665502e-05, + "loss": 1.2062, "step": 1262 }, { - "epoch": 0.035839954597048806, + "epoch": 0.03579018957748874, "grad_norm": 0.0, - "learning_rate": 1.999822511718393e-05, - "loss": 0.9999, + "learning_rate": 1.9998247316090903e-05, + "loss": 1.1735, "step": 1263 }, { - "epoch": 0.0358683314415437, + "epoch": 0.035818527019751197, "grad_norm": 0.0, - "learning_rate": 1.999820775953856e-05, - "loss": 1.0459, + "learning_rate": 1.9998230091307834e-05, + "loss": 1.16, "step": 1264 }, { - "epoch": 0.035896708286038595, + "epoch": 0.035846864462013656, "grad_norm": 0.0, - "learning_rate": 1.9998190317437883e-05, - "loss": 1.1022, + "learning_rate": 1.9998212782305963e-05, + "loss": 1.1991, "step": 1265 }, { - "epoch": 0.03592508513053348, + "epoch": 0.03587520190427612, "grad_norm": 0.0, - "learning_rate": 1.999817279088204e-05, - "loss": 1.1146, + "learning_rate": 1.9998195389085432e-05, + "loss": 1.1449, "step": 1266 }, { - "epoch": 0.03595346197502838, + "epoch": 0.03590353934653858, "grad_norm": 0.0, - "learning_rate": 1.999815517987118e-05, - "loss": 1.176, + "learning_rate": 1.9998177911646387e-05, + "loss": 1.1, "step": 1267 }, { - "epoch": 0.03598183881952327, + "epoch": 0.03593187678880104, "grad_norm": 0.0, - "learning_rate": 1.9998137484405456e-05, - "loss": 1.1653, + "learning_rate": 1.9998160349988977e-05, + "loss": 1.1306, "step": 1268 }, { - "epoch": 0.03601021566401816, + "epoch": 0.035960214231063506, "grad_norm": 0.0, - "learning_rate": 1.9998119704485016e-05, - "loss": 0.9491, + "learning_rate": 1.999814270411335e-05, + "loss": 1.1344, "step": 1269 }, { - "epoch": 0.03603859250851305, + "epoch": 0.035988551673325965, "grad_norm": 0.0, - "learning_rate": 1.999810184011001e-05, - "loss": 1.1568, + "learning_rate": 1.9998124974019656e-05, + "loss": 1.1772, "step": 1270 }, { - "epoch": 0.03606696935300795, + "epoch": 0.036016889115588424, "grad_norm": 0.0, - "learning_rate": 1.9998083891280584e-05, - "loss": 1.1309, + "learning_rate": 1.999810715970804e-05, + "loss": 1.1909, "step": 1271 }, { - "epoch": 0.03609534619750284, + "epoch": 0.03604522655785089, "grad_norm": 0.0, - "learning_rate": 1.99980658579969e-05, - "loss": 1.0787, + "learning_rate": 1.9998089261178656e-05, + "loss": 1.0425, "step": 1272 }, { - "epoch": 0.03612372304199773, + "epoch": 0.03607356400011335, "grad_norm": 0.0, - "learning_rate": 1.99980477402591e-05, - "loss": 1.1589, + "learning_rate": 1.999807127843165e-05, + "loss": 1.1695, "step": 1273 }, { - "epoch": 0.03615209988649262, + "epoch": 0.03610190144237581, "grad_norm": 0.0, - "learning_rate": 1.999802953806734e-05, - "loss": 1.2008, + "learning_rate": 1.999805321146718e-05, + "loss": 1.21, "step": 1274 }, { - "epoch": 0.03618047673098752, + "epoch": 0.036130238884638274, "grad_norm": 0.0, - "learning_rate": 1.9998011251421775e-05, - "loss": 1.0872, + "learning_rate": 1.9998035060285398e-05, + "loss": 1.1601, "step": 1275 }, { - "epoch": 0.036208853575482404, + "epoch": 0.036158576326900733, "grad_norm": 0.0, - "learning_rate": 1.9997992880322564e-05, - "loss": 1.1526, + "learning_rate": 1.9998016824886452e-05, + "loss": 1.2114, "step": 1276 }, { - "epoch": 0.0362372304199773, + "epoch": 0.03618691376916319, "grad_norm": 0.0, - "learning_rate": 1.9997974424769855e-05, - "loss": 1.0873, + "learning_rate": 1.9997998505270496e-05, + "loss": 1.0878, "step": 1277 }, { - "epoch": 0.03626560726447219, + "epoch": 0.03621525121142566, "grad_norm": 0.0, - "learning_rate": 1.9997955884763804e-05, - "loss": 1.1399, + "learning_rate": 1.9997980101437687e-05, + "loss": 1.1833, "step": 1278 }, { - "epoch": 0.03629398410896708, + "epoch": 0.03624358865368812, "grad_norm": 0.0, - "learning_rate": 1.9997937260304577e-05, - "loss": 1.0861, + "learning_rate": 1.9997961613388176e-05, + "loss": 1.0976, "step": 1279 }, { - "epoch": 0.036322360953461974, + "epoch": 0.03627192609595058, "grad_norm": 0.0, - "learning_rate": 1.999791855139232e-05, - "loss": 1.1161, + "learning_rate": 1.9997943041122126e-05, + "loss": 1.1192, "step": 1280 }, { - "epoch": 0.03635073779795687, + "epoch": 0.03630026353821304, "grad_norm": 0.0, - "learning_rate": 1.9997899758027194e-05, - "loss": 1.0652, + "learning_rate": 1.9997924384639687e-05, + "loss": 1.0392, "step": 1281 }, { - "epoch": 0.03637911464245176, + "epoch": 0.0363286009804755, "grad_norm": 0.0, - "learning_rate": 1.999788088020936e-05, - "loss": 1.0107, + "learning_rate": 1.9997905643941017e-05, + "loss": 1.071, "step": 1282 }, { - "epoch": 0.03640749148694665, + "epoch": 0.03635693842273796, "grad_norm": 0.0, - "learning_rate": 1.999786191793898e-05, - "loss": 1.0565, + "learning_rate": 1.999788681902628e-05, + "loss": 1.1653, "step": 1283 }, { - "epoch": 0.036435868331441544, + "epoch": 0.03638527586500043, "grad_norm": 0.0, - "learning_rate": 1.9997842871216207e-05, - "loss": 1.0727, + "learning_rate": 1.9997867909895626e-05, + "loss": 1.0753, "step": 1284 }, { - "epoch": 0.03646424517593644, + "epoch": 0.036413613307262886, "grad_norm": 0.0, - "learning_rate": 1.999782374004121e-05, - "loss": 1.1263, + "learning_rate": 1.9997848916549217e-05, + "loss": 1.231, "step": 1285 }, { - "epoch": 0.036492622020431326, + "epoch": 0.036441950749525345, "grad_norm": 0.0, - "learning_rate": 1.9997804524414145e-05, - "loss": 1.1734, + "learning_rate": 1.9997829838987215e-05, + "loss": 1.1675, "step": 1286 }, { - "epoch": 0.03652099886492622, + "epoch": 0.03647028819178781, "grad_norm": 0.0, - "learning_rate": 1.9997785224335174e-05, - "loss": 1.0879, + "learning_rate": 1.999781067720978e-05, + "loss": 1.3116, "step": 1287 }, { - "epoch": 0.036549375709421114, + "epoch": 0.03649862563405027, "grad_norm": 0.0, - "learning_rate": 1.9997765839804464e-05, - "loss": 1.1596, + "learning_rate": 1.9997791431217073e-05, + "loss": 1.1049, "step": 1288 }, { - "epoch": 0.036577752553916, + "epoch": 0.03652696307631273, "grad_norm": 0.0, - "learning_rate": 1.9997746370822174e-05, - "loss": 1.2098, + "learning_rate": 1.9997772101009255e-05, + "loss": 1.1519, "step": 1289 }, { - "epoch": 0.036606129398410896, + "epoch": 0.036555300518575196, "grad_norm": 0.0, - "learning_rate": 1.9997726817388476e-05, - "loss": 1.1445, + "learning_rate": 1.9997752686586495e-05, + "loss": 1.1761, "step": 1290 }, { - "epoch": 0.03663450624290579, + "epoch": 0.036583637960837655, "grad_norm": 0.0, - "learning_rate": 1.9997707179503526e-05, - "loss": 1.015, + "learning_rate": 1.9997733187948947e-05, + "loss": 1.162, "step": 1291 }, { - "epoch": 0.036662883087400684, + "epoch": 0.036611975403100114, "grad_norm": 0.0, - "learning_rate": 1.9997687457167495e-05, - "loss": 1.0678, + "learning_rate": 1.9997713605096782e-05, + "loss": 1.2352, "step": 1292 }, { - "epoch": 0.03669125993189557, + "epoch": 0.03664031284536258, "grad_norm": 0.0, - "learning_rate": 1.9997667650380547e-05, - "loss": 1.0296, + "learning_rate": 1.9997693938030162e-05, + "loss": 0.9322, "step": 1293 }, { - "epoch": 0.036719636776390466, + "epoch": 0.03666865028762504, "grad_norm": 0.0, - "learning_rate": 1.9997647759142852e-05, - "loss": 1.0941, + "learning_rate": 1.999767418674925e-05, + "loss": 1.1376, "step": 1294 }, { - "epoch": 0.03674801362088536, + "epoch": 0.0366969877298875, "grad_norm": 0.0, - "learning_rate": 1.9997627783454577e-05, - "loss": 1.0073, + "learning_rate": 1.999765435125422e-05, + "loss": 1.1626, "step": 1295 }, { - "epoch": 0.03677639046538025, + "epoch": 0.036725325172149964, "grad_norm": 0.0, - "learning_rate": 1.999760772331589e-05, - "loss": 1.1444, + "learning_rate": 1.999763443154523e-05, + "loss": 1.0471, "step": 1296 }, { - "epoch": 0.03680476730987514, + "epoch": 0.03675366261441242, "grad_norm": 0.0, - "learning_rate": 1.999758757872696e-05, - "loss": 1.0406, + "learning_rate": 1.999761442762246e-05, + "loss": 1.2251, "step": 1297 }, { - "epoch": 0.036833144154370036, + "epoch": 0.03678200005667488, "grad_norm": 0.0, - "learning_rate": 1.9997567349687963e-05, - "loss": 1.0349, + "learning_rate": 1.9997594339486065e-05, + "loss": 1.1693, "step": 1298 }, { - "epoch": 0.03686152099886492, + "epoch": 0.03681033749893735, "grad_norm": 0.0, - "learning_rate": 1.9997547036199062e-05, - "loss": 1.1228, + "learning_rate": 1.9997574167136225e-05, + "loss": 1.0763, "step": 1299 }, { - "epoch": 0.03688989784335982, + "epoch": 0.03683867494119981, "grad_norm": 0.0, - "learning_rate": 1.999752663826043e-05, - "loss": 1.0758, + "learning_rate": 1.99975539105731e-05, + "loss": 1.0429, "step": 1300 }, { - "epoch": 0.03691827468785471, + "epoch": 0.036867012383462267, "grad_norm": 0.0, - "learning_rate": 1.9997506155872246e-05, - "loss": 1.0991, + "learning_rate": 1.999753356979687e-05, + "loss": 1.2119, "step": 1301 }, { - "epoch": 0.036946651532349606, + "epoch": 0.03689534982572473, "grad_norm": 0.0, - "learning_rate": 1.9997485589034676e-05, - "loss": 1.1101, + "learning_rate": 1.9997513144807704e-05, + "loss": 1.0796, "step": 1302 }, { - "epoch": 0.03697502837684449, + "epoch": 0.03692368726798719, "grad_norm": 0.0, - "learning_rate": 1.9997464937747893e-05, - "loss": 1.0897, + "learning_rate": 1.9997492635605766e-05, + "loss": 1.1086, "step": 1303 }, { - "epoch": 0.03700340522133939, + "epoch": 0.03695202471024965, "grad_norm": 0.0, - "learning_rate": 1.999744420201208e-05, - "loss": 1.1467, + "learning_rate": 1.9997472042191243e-05, + "loss": 1.1471, "step": 1304 }, { - "epoch": 0.03703178206583428, + "epoch": 0.03698036215251212, "grad_norm": 0.0, - "learning_rate": 1.99974233818274e-05, - "loss": 1.096, + "learning_rate": 1.9997451364564294e-05, + "loss": 1.0806, "step": 1305 }, { - "epoch": 0.03706015891032917, + "epoch": 0.037008699594774576, "grad_norm": 0.0, - "learning_rate": 1.999740247719404e-05, - "loss": 1.0614, + "learning_rate": 1.99974306027251e-05, + "loss": 1.1058, "step": 1306 }, { - "epoch": 0.03708853575482406, + "epoch": 0.037037037037037035, "grad_norm": 0.0, - "learning_rate": 1.9997381488112166e-05, - "loss": 1.1507, + "learning_rate": 1.9997409756673838e-05, + "loss": 1.2297, "step": 1307 }, { - "epoch": 0.03711691259931896, + "epoch": 0.0370653744792995, "grad_norm": 0.0, - "learning_rate": 1.9997360414581967e-05, - "loss": 1.1402, + "learning_rate": 1.9997388826410683e-05, + "loss": 1.2601, "step": 1308 }, { - "epoch": 0.037145289443813845, + "epoch": 0.03709371192156196, "grad_norm": 0.0, - "learning_rate": 1.9997339256603613e-05, - "loss": 1.1121, + "learning_rate": 1.9997367811935807e-05, + "loss": 1.1926, "step": 1309 }, { - "epoch": 0.03717366628830874, + "epoch": 0.03712204936382442, "grad_norm": 0.0, - "learning_rate": 1.9997318014177283e-05, - "loss": 1.1083, + "learning_rate": 1.9997346713249387e-05, + "loss": 1.2147, "step": 1310 }, { - "epoch": 0.03720204313280363, + "epoch": 0.037150386806086885, "grad_norm": 0.0, - "learning_rate": 1.9997296687303162e-05, - "loss": 1.0427, + "learning_rate": 1.9997325530351605e-05, + "loss": 1.0473, "step": 1311 }, { - "epoch": 0.03723041997729853, + "epoch": 0.037178724248349344, "grad_norm": 0.0, - "learning_rate": 1.9997275275981428e-05, - "loss": 1.0449, + "learning_rate": 1.9997304263242638e-05, + "loss": 1.2541, "step": 1312 }, { - "epoch": 0.037258796821793415, + "epoch": 0.0372070616906118, "grad_norm": 0.0, - "learning_rate": 1.9997253780212253e-05, - "loss": 1.0616, + "learning_rate": 1.9997282911922667e-05, + "loss": 1.1086, "step": 1313 }, { - "epoch": 0.03728717366628831, + "epoch": 0.03723539913287427, "grad_norm": 0.0, - "learning_rate": 1.9997232199995832e-05, - "loss": 1.1141, + "learning_rate": 1.9997261476391867e-05, + "loss": 1.1202, "step": 1314 }, { - "epoch": 0.0373155505107832, + "epoch": 0.03726373657513673, "grad_norm": 0.0, - "learning_rate": 1.9997210535332337e-05, - "loss": 1.0897, + "learning_rate": 1.999723995665042e-05, + "loss": 1.2344, "step": 1315 }, { - "epoch": 0.03734392735527809, + "epoch": 0.03729207401739919, "grad_norm": 0.0, - "learning_rate": 1.9997188786221958e-05, - "loss": 1.0572, + "learning_rate": 1.9997218352698514e-05, + "loss": 1.1859, "step": 1316 }, { - "epoch": 0.037372304199772985, + "epoch": 0.037320411459661654, "grad_norm": 0.0, - "learning_rate": 1.9997166952664877e-05, - "loss": 1.141, + "learning_rate": 1.999719666453632e-05, + "loss": 1.3773, "step": 1317 }, { - "epoch": 0.03740068104426788, + "epoch": 0.03734874890192411, "grad_norm": 0.0, - "learning_rate": 1.9997145034661274e-05, - "loss": 1.1018, + "learning_rate": 1.9997174892164026e-05, + "loss": 1.1663, "step": 1318 }, { - "epoch": 0.037429057888762766, + "epoch": 0.03737708634418657, "grad_norm": 0.0, - "learning_rate": 1.9997123032211343e-05, - "loss": 0.9648, + "learning_rate": 1.999715303558182e-05, + "loss": 1.1291, "step": 1319 }, { - "epoch": 0.03745743473325766, + "epoch": 0.03740542378644904, "grad_norm": 0.0, - "learning_rate": 1.9997100945315256e-05, - "loss": 1.0932, + "learning_rate": 1.9997131094789875e-05, + "loss": 1.1042, "step": 1320 }, { - "epoch": 0.037485811577752555, + "epoch": 0.0374337612287115, "grad_norm": 0.0, - "learning_rate": 1.9997078773973217e-05, - "loss": 1.152, + "learning_rate": 1.999710906978839e-05, + "loss": 1.104, "step": 1321 }, { - "epoch": 0.03751418842224745, + "epoch": 0.037462098670973956, "grad_norm": 0.0, - "learning_rate": 1.9997056518185397e-05, - "loss": 1.1836, + "learning_rate": 1.9997086960577538e-05, + "loss": 1.1544, "step": 1322 }, { - "epoch": 0.037542565266742337, + "epoch": 0.03749043611323642, "grad_norm": 0.0, - "learning_rate": 1.9997034177951995e-05, - "loss": 1.0556, + "learning_rate": 1.9997064767157514e-05, + "loss": 1.1846, "step": 1323 }, { - "epoch": 0.03757094211123723, + "epoch": 0.03751877355549888, "grad_norm": 0.0, - "learning_rate": 1.9997011753273195e-05, - "loss": 1.04, + "learning_rate": 1.99970424895285e-05, + "loss": 1.1973, "step": 1324 }, { - "epoch": 0.037599318955732125, + "epoch": 0.03754711099776134, "grad_norm": 0.0, - "learning_rate": 1.999698924414919e-05, - "loss": 1.0475, + "learning_rate": 1.999702012769068e-05, + "loss": 1.1953, "step": 1325 }, { - "epoch": 0.03762769580022701, + "epoch": 0.037575448440023806, "grad_norm": 0.0, - "learning_rate": 1.999696665058016e-05, - "loss": 1.0991, + "learning_rate": 1.9996997681644252e-05, + "loss": 1.0909, "step": 1326 }, { - "epoch": 0.03765607264472191, + "epoch": 0.037603785882286266, "grad_norm": 0.0, - "learning_rate": 1.999694397256631e-05, - "loss": 1.1665, + "learning_rate": 1.99969751513894e-05, + "loss": 1.1836, "step": 1327 }, { - "epoch": 0.0376844494892168, + "epoch": 0.037632123324548725, "grad_norm": 0.0, - "learning_rate": 1.9996921210107823e-05, - "loss": 1.1594, + "learning_rate": 1.9996952536926312e-05, + "loss": 1.3152, "step": 1328 }, { - "epoch": 0.03771282633371169, + "epoch": 0.03766046076681119, "grad_norm": 0.0, - "learning_rate": 1.999689836320489e-05, - "loss": 1.1004, + "learning_rate": 1.999692983825518e-05, + "loss": 1.2467, "step": 1329 }, { - "epoch": 0.03774120317820658, + "epoch": 0.03768879820907365, "grad_norm": 0.0, - "learning_rate": 1.9996875431857715e-05, - "loss": 1.1137, + "learning_rate": 1.99969070553762e-05, + "loss": 1.2765, "step": 1330 }, { - "epoch": 0.03776958002270148, + "epoch": 0.03771713565133611, "grad_norm": 0.0, - "learning_rate": 1.9996852416066477e-05, - "loss": 1.1244, + "learning_rate": 1.9996884188289557e-05, + "loss": 1.1455, "step": 1331 }, { - "epoch": 0.03779795686719637, + "epoch": 0.037745473093598575, "grad_norm": 0.0, - "learning_rate": 1.999682931583138e-05, - "loss": 1.1667, + "learning_rate": 1.9996861236995443e-05, + "loss": 1.2548, "step": 1332 }, { - "epoch": 0.03782633371169126, + "epoch": 0.037773810535861034, "grad_norm": 0.0, - "learning_rate": 1.9996806131152614e-05, - "loss": 1.0464, + "learning_rate": 1.999683820149406e-05, + "loss": 1.1906, "step": 1333 }, { - "epoch": 0.03785471055618615, + "epoch": 0.03780214797812349, "grad_norm": 0.0, - "learning_rate": 1.999678286203038e-05, - "loss": 1.0611, + "learning_rate": 1.999681508178559e-05, + "loss": 1.1898, "step": 1334 }, { - "epoch": 0.03788308740068105, + "epoch": 0.03783048542038596, "grad_norm": 0.0, - "learning_rate": 1.999675950846487e-05, - "loss": 1.0367, + "learning_rate": 1.9996791877870242e-05, + "loss": 1.1996, "step": 1335 }, { - "epoch": 0.037911464245175934, + "epoch": 0.03785882286264842, "grad_norm": 0.0, - "learning_rate": 1.999673607045628e-05, - "loss": 1.2136, + "learning_rate": 1.9996768589748198e-05, + "loss": 1.2151, "step": 1336 }, { - "epoch": 0.03793984108967083, + "epoch": 0.03788716030491088, "grad_norm": 0.0, - "learning_rate": 1.999671254800482e-05, - "loss": 1.1363, + "learning_rate": 1.9996745217419664e-05, + "loss": 1.2276, "step": 1337 }, { - "epoch": 0.03796821793416572, + "epoch": 0.03791549774717334, "grad_norm": 0.0, - "learning_rate": 1.9996688941110672e-05, - "loss": 1.0368, + "learning_rate": 1.9996721760884833e-05, + "loss": 1.1868, "step": 1338 }, { - "epoch": 0.03799659477866061, + "epoch": 0.0379438351894358, "grad_norm": 0.0, - "learning_rate": 1.9996665249774047e-05, - "loss": 1.1336, + "learning_rate": 1.99966982201439e-05, + "loss": 1.1559, "step": 1339 }, { - "epoch": 0.038024971623155504, + "epoch": 0.03797217263169826, "grad_norm": 0.0, - "learning_rate": 1.9996641473995138e-05, - "loss": 1.0932, + "learning_rate": 1.9996674595197065e-05, + "loss": 1.178, "step": 1340 }, { - "epoch": 0.0380533484676504, + "epoch": 0.03800051007396073, "grad_norm": 0.0, - "learning_rate": 1.9996617613774152e-05, - "loss": 1.0707, + "learning_rate": 1.999665088604453e-05, + "loss": 1.227, "step": 1341 }, { - "epoch": 0.03808172531214529, + "epoch": 0.03802884751622319, "grad_norm": 0.0, - "learning_rate": 1.9996593669111286e-05, - "loss": 1.0793, + "learning_rate": 1.999662709268649e-05, + "loss": 1.1806, "step": 1342 }, { - "epoch": 0.03811010215664018, + "epoch": 0.038057184958485646, "grad_norm": 0.0, - "learning_rate": 1.9996569640006744e-05, - "loss": 1.0394, + "learning_rate": 1.9996603215123152e-05, + "loss": 1.1605, "step": 1343 }, { - "epoch": 0.038138479001135074, + "epoch": 0.03808552240074811, "grad_norm": 0.0, - "learning_rate": 1.9996545526460732e-05, - "loss": 1.1033, + "learning_rate": 1.999657925335471e-05, + "loss": 1.1041, "step": 1344 }, { - "epoch": 0.03816685584562997, + "epoch": 0.03811385984301057, "grad_norm": 0.0, - "learning_rate": 1.9996521328473446e-05, - "loss": 1.1003, + "learning_rate": 1.9996555207381368e-05, + "loss": 1.0201, "step": 1345 }, { - "epoch": 0.038195232690124856, + "epoch": 0.03814219728527303, "grad_norm": 0.0, - "learning_rate": 1.9996497046045096e-05, - "loss": 1.1318, + "learning_rate": 1.999653107720333e-05, + "loss": 1.0124, "step": 1346 }, { - "epoch": 0.03822360953461975, + "epoch": 0.03817053472753549, "grad_norm": 0.0, - "learning_rate": 1.9996472679175887e-05, - "loss": 1.0308, + "learning_rate": 1.9996506862820798e-05, + "loss": 1.1854, "step": 1347 }, { - "epoch": 0.038251986379114644, + "epoch": 0.038198872169797955, "grad_norm": 0.0, - "learning_rate": 1.9996448227866024e-05, - "loss": 1.1315, + "learning_rate": 1.9996482564233977e-05, + "loss": 1.1107, "step": 1348 }, { - "epoch": 0.03828036322360953, + "epoch": 0.038227209612060414, "grad_norm": 0.0, - "learning_rate": 1.9996423692115714e-05, - "loss": 1.0494, + "learning_rate": 1.9996458181443072e-05, + "loss": 1.0607, "step": 1349 }, { - "epoch": 0.038308740068104426, + "epoch": 0.03825554705432287, "grad_norm": 0.0, - "learning_rate": 1.999639907192516e-05, - "loss": 1.0369, + "learning_rate": 1.999643371444829e-05, + "loss": 1.1161, "step": 1350 }, { - "epoch": 0.03833711691259932, + "epoch": 0.03828388449658534, "grad_norm": 0.0, - "learning_rate": 1.999637436729458e-05, - "loss": 1.0959, + "learning_rate": 1.999640916324983e-05, + "loss": 1.2166, "step": 1351 }, { - "epoch": 0.038365493757094214, + "epoch": 0.0383122219388478, "grad_norm": 0.0, - "learning_rate": 1.999634957822417e-05, - "loss": 1.2135, + "learning_rate": 1.9996384527847907e-05, + "loss": 1.1802, "step": 1352 }, { - "epoch": 0.0383938706015891, + "epoch": 0.03834055938111026, "grad_norm": 0.0, - "learning_rate": 1.999632470471415e-05, - "loss": 1.1397, + "learning_rate": 1.9996359808242723e-05, + "loss": 1.1202, "step": 1353 }, { - "epoch": 0.038422247446083996, + "epoch": 0.038368896823372724, "grad_norm": 0.0, - "learning_rate": 1.9996299746764725e-05, - "loss": 1.1454, + "learning_rate": 1.9996335004434488e-05, + "loss": 1.1287, "step": 1354 }, { - "epoch": 0.03845062429057889, + "epoch": 0.03839723426563518, "grad_norm": 0.0, - "learning_rate": 1.9996274704376105e-05, - "loss": 1.0349, + "learning_rate": 1.9996310116423412e-05, + "loss": 1.1328, "step": 1355 }, { - "epoch": 0.03847900113507378, + "epoch": 0.03842557170789764, "grad_norm": 0.0, - "learning_rate": 1.9996249577548504e-05, - "loss": 1.0767, + "learning_rate": 1.9996285144209706e-05, + "loss": 1.1721, "step": 1356 }, { - "epoch": 0.03850737797956867, + "epoch": 0.03845390915016011, "grad_norm": 0.0, - "learning_rate": 1.9996224366282135e-05, - "loss": 1.0404, + "learning_rate": 1.9996260087793577e-05, + "loss": 1.173, "step": 1357 }, { - "epoch": 0.038535754824063566, + "epoch": 0.03848224659242257, "grad_norm": 0.0, - "learning_rate": 1.999619907057721e-05, - "loss": 1.032, + "learning_rate": 1.9996234947175237e-05, + "loss": 1.1288, "step": 1358 }, { - "epoch": 0.03856413166855845, + "epoch": 0.038510584034685026, "grad_norm": 0.0, - "learning_rate": 1.999617369043394e-05, - "loss": 1.1699, + "learning_rate": 1.9996209722354896e-05, + "loss": 1.171, "step": 1359 }, { - "epoch": 0.03859250851305335, + "epoch": 0.03853892147694749, "grad_norm": 0.0, - "learning_rate": 1.999614822585254e-05, - "loss": 1.2491, + "learning_rate": 1.9996184413332772e-05, + "loss": 1.1878, "step": 1360 }, { - "epoch": 0.03862088535754824, + "epoch": 0.03856725891920995, "grad_norm": 0.0, - "learning_rate": 1.9996122676833227e-05, - "loss": 1.2008, + "learning_rate": 1.999615902010907e-05, + "loss": 1.0476, "step": 1361 }, { - "epoch": 0.038649262202043136, + "epoch": 0.03859559636147241, "grad_norm": 0.0, - "learning_rate": 1.999609704337622e-05, - "loss": 1.071, + "learning_rate": 1.9996133542684014e-05, + "loss": 1.2512, "step": 1362 }, { - "epoch": 0.03867763904653802, + "epoch": 0.038623933803734876, "grad_norm": 0.0, - "learning_rate": 1.9996071325481728e-05, - "loss": 1.0821, + "learning_rate": 1.9996107981057807e-05, + "loss": 1.1218, "step": 1363 }, { - "epoch": 0.03870601589103292, + "epoch": 0.038652271245997336, "grad_norm": 0.0, - "learning_rate": 1.9996045523149977e-05, - "loss": 1.2499, + "learning_rate": 1.9996082335230674e-05, + "loss": 1.2286, "step": 1364 }, { - "epoch": 0.03873439273552781, + "epoch": 0.038680608688259795, "grad_norm": 0.0, - "learning_rate": 1.9996019636381178e-05, - "loss": 1.0806, + "learning_rate": 1.9996056605202828e-05, + "loss": 1.1038, "step": 1365 }, { - "epoch": 0.0387627695800227, + "epoch": 0.03870894613052226, "grad_norm": 0.0, - "learning_rate": 1.9995993665175547e-05, - "loss": 1.0557, + "learning_rate": 1.999603079097449e-05, + "loss": 1.1014, "step": 1366 }, { - "epoch": 0.03879114642451759, + "epoch": 0.03873728357278472, "grad_norm": 0.0, - "learning_rate": 1.9995967609533314e-05, - "loss": 1.1001, + "learning_rate": 1.9996004892545864e-05, + "loss": 1.1469, "step": 1367 }, { - "epoch": 0.03881952326901249, + "epoch": 0.03876562101504718, "grad_norm": 0.0, - "learning_rate": 1.999594146945469e-05, - "loss": 1.0077, + "learning_rate": 1.999597890991718e-05, + "loss": 1.3159, "step": 1368 }, { - "epoch": 0.038847900113507375, + "epoch": 0.038793958457309645, "grad_norm": 0.0, - "learning_rate": 1.99959152449399e-05, - "loss": 1.115, + "learning_rate": 1.999595284308866e-05, + "loss": 1.2518, "step": 1369 }, { - "epoch": 0.03887627695800227, + "epoch": 0.038822295899572104, "grad_norm": 0.0, - "learning_rate": 1.9995888935989164e-05, - "loss": 1.108, + "learning_rate": 1.9995926692060508e-05, + "loss": 1.2407, "step": 1370 }, { - "epoch": 0.03890465380249716, + "epoch": 0.03885063334183456, "grad_norm": 0.0, - "learning_rate": 1.9995862542602708e-05, - "loss": 1.1609, + "learning_rate": 1.999590045683296e-05, + "loss": 1.1633, "step": 1371 }, { - "epoch": 0.03893303064699206, + "epoch": 0.03887897078409703, "grad_norm": 0.0, - "learning_rate": 1.999583606478075e-05, - "loss": 1.0772, + "learning_rate": 1.9995874137406225e-05, + "loss": 1.0846, "step": 1372 }, { - "epoch": 0.038961407491486945, + "epoch": 0.03890730822635949, "grad_norm": 0.0, - "learning_rate": 1.9995809502523513e-05, - "loss": 1.2271, + "learning_rate": 1.9995847733780534e-05, + "loss": 1.0664, "step": 1373 }, { - "epoch": 0.03898978433598184, + "epoch": 0.03893564566862195, "grad_norm": 0.0, - "learning_rate": 1.9995782855831224e-05, - "loss": 0.9818, + "learning_rate": 1.99958212459561e-05, + "loss": 1.1512, "step": 1374 }, { - "epoch": 0.03901816118047673, + "epoch": 0.03896398311088441, "grad_norm": 0.0, - "learning_rate": 1.999575612470411e-05, - "loss": 1.0999, + "learning_rate": 1.999579467393316e-05, + "loss": 1.2939, "step": 1375 }, { - "epoch": 0.03904653802497162, + "epoch": 0.03899232055314687, "grad_norm": 0.0, - "learning_rate": 1.9995729309142396e-05, - "loss": 1.2104, + "learning_rate": 1.9995768017711925e-05, + "loss": 1.1418, "step": 1376 }, { - "epoch": 0.039074914869466515, + "epoch": 0.03902065799540933, "grad_norm": 0.0, - "learning_rate": 1.9995702409146303e-05, - "loss": 1.0274, + "learning_rate": 1.9995741277292625e-05, + "loss": 1.1754, "step": 1377 }, { - "epoch": 0.03910329171396141, + "epoch": 0.0390489954376718, "grad_norm": 0.0, - "learning_rate": 1.9995675424716066e-05, - "loss": 1.1594, + "learning_rate": 1.9995714452675485e-05, + "loss": 1.1089, "step": 1378 }, { - "epoch": 0.039131668558456296, + "epoch": 0.03907733287993426, "grad_norm": 0.0, - "learning_rate": 1.9995648355851907e-05, - "loss": 1.0819, + "learning_rate": 1.999568754386073e-05, + "loss": 1.1553, "step": 1379 }, { - "epoch": 0.03916004540295119, + "epoch": 0.039105670322196716, "grad_norm": 0.0, - "learning_rate": 1.9995621202554058e-05, - "loss": 1.0876, + "learning_rate": 1.9995660550848588e-05, + "loss": 1.0953, "step": 1380 }, { - "epoch": 0.039188422247446085, + "epoch": 0.03913400776445918, "grad_norm": 0.0, - "learning_rate": 1.9995593964822748e-05, - "loss": 1.1374, + "learning_rate": 1.9995633473639285e-05, + "loss": 1.2136, "step": 1381 }, { - "epoch": 0.03921679909194098, + "epoch": 0.03916234520672164, "grad_norm": 0.0, - "learning_rate": 1.9995566642658208e-05, - "loss": 1.0517, + "learning_rate": 1.9995606312233046e-05, + "loss": 1.213, "step": 1382 }, { - "epoch": 0.03924517593643587, + "epoch": 0.0391906826489841, "grad_norm": 0.0, - "learning_rate": 1.9995539236060664e-05, - "loss": 1.057, + "learning_rate": 1.999557906663011e-05, + "loss": 1.2153, "step": 1383 }, { - "epoch": 0.03927355278093076, + "epoch": 0.039219020091246566, "grad_norm": 0.0, - "learning_rate": 1.9995511745030352e-05, - "loss": 1.1052, + "learning_rate": 1.9995551736830693e-05, + "loss": 1.1929, "step": 1384 }, { - "epoch": 0.039301929625425655, + "epoch": 0.039247357533509025, "grad_norm": 0.0, - "learning_rate": 1.9995484169567503e-05, - "loss": 1.0792, + "learning_rate": 1.9995524322835035e-05, + "loss": 1.0591, "step": 1385 }, { - "epoch": 0.03933030646992054, + "epoch": 0.039275694975771484, "grad_norm": 0.0, - "learning_rate": 1.999545650967235e-05, - "loss": 1.1333, + "learning_rate": 1.9995496824643364e-05, + "loss": 1.1785, "step": 1386 }, { - "epoch": 0.03935868331441544, + "epoch": 0.03930403241803395, "grad_norm": 0.0, - "learning_rate": 1.9995428765345125e-05, - "loss": 1.124, + "learning_rate": 1.9995469242255913e-05, + "loss": 1.1962, "step": 1387 }, { - "epoch": 0.03938706015891033, + "epoch": 0.03933236986029641, "grad_norm": 0.0, - "learning_rate": 1.9995400936586063e-05, - "loss": 1.1782, + "learning_rate": 1.999544157567291e-05, + "loss": 1.143, "step": 1388 }, { - "epoch": 0.03941543700340522, + "epoch": 0.03936070730255887, "grad_norm": 0.0, - "learning_rate": 1.9995373023395404e-05, - "loss": 1.0876, + "learning_rate": 1.9995413824894593e-05, + "loss": 1.1651, "step": 1389 }, { - "epoch": 0.03944381384790011, + "epoch": 0.039389044744821335, "grad_norm": 0.0, - "learning_rate": 1.9995345025773377e-05, - "loss": 1.1339, + "learning_rate": 1.999538598992119e-05, + "loss": 1.1373, "step": 1390 }, { - "epoch": 0.03947219069239501, + "epoch": 0.039417382187083794, "grad_norm": 0.0, - "learning_rate": 1.9995316943720225e-05, - "loss": 1.0193, + "learning_rate": 1.999535807075294e-05, + "loss": 1.2197, "step": 1391 }, { - "epoch": 0.0395005675368899, + "epoch": 0.03944571962934625, "grad_norm": 0.0, - "learning_rate": 1.9995288777236177e-05, - "loss": 1.097, + "learning_rate": 1.999533006739008e-05, + "loss": 1.2374, "step": 1392 }, { - "epoch": 0.03952894438138479, + "epoch": 0.03947405707160872, "grad_norm": 0.0, - "learning_rate": 1.9995260526321474e-05, - "loss": 1.158, + "learning_rate": 1.999530197983284e-05, + "loss": 1.1947, "step": 1393 }, { - "epoch": 0.03955732122587968, + "epoch": 0.03950239451387118, "grad_norm": 0.0, - "learning_rate": 1.999523219097636e-05, - "loss": 1.1865, + "learning_rate": 1.9995273808081467e-05, + "loss": 1.163, "step": 1394 }, { - "epoch": 0.03958569807037458, + "epoch": 0.03953073195613364, "grad_norm": 0.0, - "learning_rate": 1.9995203771201072e-05, - "loss": 1.1821, + "learning_rate": 1.9995245552136185e-05, + "loss": 1.2045, "step": 1395 }, { - "epoch": 0.039614074914869464, + "epoch": 0.0395590693983961, "grad_norm": 0.0, - "learning_rate": 1.9995175266995847e-05, - "loss": 1.1078, + "learning_rate": 1.9995217211997237e-05, + "loss": 1.0729, "step": 1396 }, { - "epoch": 0.03964245175936436, + "epoch": 0.03958740684065856, "grad_norm": 0.0, - "learning_rate": 1.9995146678360925e-05, - "loss": 1.2579, + "learning_rate": 1.9995188787664864e-05, + "loss": 1.2337, "step": 1397 }, { - "epoch": 0.03967082860385925, + "epoch": 0.03961574428292102, "grad_norm": 0.0, - "learning_rate": 1.999511800529655e-05, - "loss": 1.14, + "learning_rate": 1.9995160279139306e-05, + "loss": 1.0849, "step": 1398 }, { - "epoch": 0.03969920544835414, + "epoch": 0.03964408172518349, "grad_norm": 0.0, - "learning_rate": 1.9995089247802966e-05, - "loss": 1.0497, + "learning_rate": 1.9995131686420802e-05, + "loss": 1.1712, "step": 1399 }, { - "epoch": 0.039727582292849034, + "epoch": 0.039672419167445946, "grad_norm": 0.0, - "learning_rate": 1.9995060405880412e-05, - "loss": 1.0768, + "learning_rate": 1.9995103009509587e-05, + "loss": 1.07, "step": 1400 }, { - "epoch": 0.03975595913734393, + "epoch": 0.039700756609708406, "grad_norm": 0.0, - "learning_rate": 1.999503147952913e-05, - "loss": 1.0794, + "learning_rate": 1.9995074248405913e-05, + "loss": 1.3056, "step": 1401 }, { - "epoch": 0.03978433598183882, + "epoch": 0.03972909405197087, "grad_norm": 0.0, - "learning_rate": 1.9995002468749374e-05, - "loss": 1.0789, + "learning_rate": 1.9995045403110017e-05, + "loss": 1.1272, "step": 1402 }, { - "epoch": 0.03981271282633371, + "epoch": 0.03975743149423333, "grad_norm": 0.0, - "learning_rate": 1.999497337354138e-05, - "loss": 1.0711, + "learning_rate": 1.999501647362214e-05, + "loss": 1.1729, "step": 1403 }, { - "epoch": 0.039841089670828604, + "epoch": 0.03978576893649579, "grad_norm": 0.0, - "learning_rate": 1.9994944193905396e-05, - "loss": 1.0861, + "learning_rate": 1.9994987459942528e-05, + "loss": 1.3098, "step": 1404 }, { - "epoch": 0.0398694665153235, + "epoch": 0.039814106378758256, "grad_norm": 0.0, - "learning_rate": 1.9994914929841672e-05, - "loss": 1.121, + "learning_rate": 1.9994958362071422e-05, + "loss": 0.9509, "step": 1405 }, { - "epoch": 0.039897843359818386, + "epoch": 0.039842443821020715, "grad_norm": 0.0, - "learning_rate": 1.9994885581350448e-05, - "loss": 1.1877, + "learning_rate": 1.9994929180009074e-05, + "loss": 1.1913, "step": 1406 }, { - "epoch": 0.03992622020431328, + "epoch": 0.039870781263283174, "grad_norm": 0.0, - "learning_rate": 1.999485614843198e-05, - "loss": 1.24, + "learning_rate": 1.9994899913755726e-05, + "loss": 1.1403, "step": 1407 }, { - "epoch": 0.039954597048808174, + "epoch": 0.03989911870554564, "grad_norm": 0.0, - "learning_rate": 1.9994826631086514e-05, - "loss": 1.0684, + "learning_rate": 1.9994870563311627e-05, + "loss": 1.1866, "step": 1408 }, { - "epoch": 0.03998297389330306, + "epoch": 0.0399274561478081, "grad_norm": 0.0, - "learning_rate": 1.999479702931429e-05, - "loss": 1.0558, + "learning_rate": 1.999484112867702e-05, + "loss": 1.29, "step": 1409 }, { - "epoch": 0.040011350737797956, + "epoch": 0.03995579359007056, "grad_norm": 0.0, - "learning_rate": 1.9994767343115574e-05, - "loss": 1.2299, + "learning_rate": 1.999481160985215e-05, + "loss": 1.057, "step": 1410 }, { - "epoch": 0.04003972758229285, + "epoch": 0.039984131032333024, "grad_norm": 0.0, - "learning_rate": 1.9994737572490607e-05, - "loss": 1.0783, + "learning_rate": 1.9994782006837275e-05, + "loss": 1.1576, "step": 1411 }, { - "epoch": 0.040068104426787744, + "epoch": 0.04001246847459548, "grad_norm": 0.0, - "learning_rate": 1.9994707717439643e-05, - "loss": 1.0994, + "learning_rate": 1.999475231963264e-05, + "loss": 1.1844, "step": 1412 }, { - "epoch": 0.04009648127128263, + "epoch": 0.04004080591685794, "grad_norm": 0.0, - "learning_rate": 1.9994677777962934e-05, - "loss": 1.1059, + "learning_rate": 1.999472254823849e-05, + "loss": 1.239, "step": 1413 }, { - "epoch": 0.040124858115777526, + "epoch": 0.04006914335912041, "grad_norm": 0.0, - "learning_rate": 1.999464775406073e-05, - "loss": 1.0369, + "learning_rate": 1.9994692692655082e-05, + "loss": 1.0931, "step": 1414 }, { - "epoch": 0.04015323496027242, + "epoch": 0.04009748080138287, "grad_norm": 0.0, - "learning_rate": 1.999461764573329e-05, - "loss": 1.0925, + "learning_rate": 1.999466275288267e-05, + "loss": 1.2036, "step": 1415 }, { - "epoch": 0.04018161180476731, + "epoch": 0.04012581824364533, "grad_norm": 0.0, - "learning_rate": 1.999458745298086e-05, - "loss": 1.0956, + "learning_rate": 1.9994632728921498e-05, + "loss": 1.214, "step": 1416 }, { - "epoch": 0.0402099886492622, + "epoch": 0.04015415568590779, "grad_norm": 0.0, - "learning_rate": 1.9994557175803707e-05, - "loss": 1.1477, + "learning_rate": 1.9994602620771825e-05, + "loss": 1.1485, "step": 1417 }, { - "epoch": 0.040238365493757096, + "epoch": 0.04018249312817025, "grad_norm": 0.0, - "learning_rate": 1.9994526814202076e-05, - "loss": 1.1672, + "learning_rate": 1.9994572428433906e-05, + "loss": 1.2238, "step": 1418 }, { - "epoch": 0.04026674233825198, + "epoch": 0.04021083057043271, "grad_norm": 0.0, - "learning_rate": 1.9994496368176232e-05, - "loss": 1.0673, + "learning_rate": 1.9994542151907988e-05, + "loss": 1.1336, "step": 1419 }, { - "epoch": 0.04029511918274688, + "epoch": 0.04023916801269518, "grad_norm": 0.0, - "learning_rate": 1.999446583772642e-05, - "loss": 1.0695, + "learning_rate": 1.999451179119433e-05, + "loss": 1.2018, "step": 1420 }, { - "epoch": 0.04032349602724177, + "epoch": 0.040267505454957636, "grad_norm": 0.0, - "learning_rate": 1.9994435222852913e-05, - "loss": 0.9402, + "learning_rate": 1.9994481346293186e-05, + "loss": 1.2988, "step": 1421 }, { - "epoch": 0.040351872871736666, + "epoch": 0.040295842897220095, "grad_norm": 0.0, - "learning_rate": 1.999440452355596e-05, - "loss": 1.131, + "learning_rate": 1.9994450817204817e-05, + "loss": 1.2561, "step": 1422 }, { - "epoch": 0.04038024971623155, + "epoch": 0.04032418033948256, "grad_norm": 0.0, - "learning_rate": 1.9994373739835824e-05, - "loss": 1.1447, + "learning_rate": 1.9994420203929478e-05, + "loss": 1.093, "step": 1423 }, { - "epoch": 0.04040862656072645, + "epoch": 0.04035251778174502, "grad_norm": 0.0, - "learning_rate": 1.9994342871692764e-05, - "loss": 0.9223, + "learning_rate": 1.9994389506467423e-05, + "loss": 1.1607, "step": 1424 }, { - "epoch": 0.04043700340522134, + "epoch": 0.04038085522400748, "grad_norm": 0.0, - "learning_rate": 1.999431191912704e-05, - "loss": 0.9412, + "learning_rate": 1.999435872481892e-05, + "loss": 1.0986, "step": 1425 }, { - "epoch": 0.04046538024971623, + "epoch": 0.040409192666269945, "grad_norm": 0.0, - "learning_rate": 1.9994280882138913e-05, - "loss": 1.1995, + "learning_rate": 1.9994327858984216e-05, + "loss": 1.2865, "step": 1426 }, { - "epoch": 0.04049375709421112, + "epoch": 0.040437530108532405, "grad_norm": 0.0, - "learning_rate": 1.9994249760728645e-05, - "loss": 1.1981, + "learning_rate": 1.9994296908963583e-05, + "loss": 1.1758, "step": 1427 }, { - "epoch": 0.04052213393870602, + "epoch": 0.040465867550794864, "grad_norm": 0.0, - "learning_rate": 1.9994218554896503e-05, - "loss": 1.0422, + "learning_rate": 1.999426587475727e-05, + "loss": 1.1516, "step": 1428 }, { - "epoch": 0.040550510783200905, + "epoch": 0.04049420499305733, "grad_norm": 0.0, - "learning_rate": 1.9994187264642746e-05, - "loss": 1.0927, + "learning_rate": 1.9994234756365547e-05, + "loss": 1.1385, "step": 1429 }, { - "epoch": 0.0405788876276958, + "epoch": 0.04052254243531979, "grad_norm": 0.0, - "learning_rate": 1.999415588996764e-05, - "loss": 1.0881, + "learning_rate": 1.9994203553788675e-05, + "loss": 1.0487, "step": 1430 }, { - "epoch": 0.04060726447219069, + "epoch": 0.04055087987758225, "grad_norm": 0.0, - "learning_rate": 1.9994124430871448e-05, - "loss": 1.0957, + "learning_rate": 1.9994172267026912e-05, + "loss": 1.1918, "step": 1431 }, { - "epoch": 0.04063564131668559, + "epoch": 0.040579217319844714, "grad_norm": 0.0, - "learning_rate": 1.999409288735444e-05, - "loss": 1.0766, + "learning_rate": 1.9994140896080524e-05, + "loss": 1.196, "step": 1432 }, { - "epoch": 0.040664018161180475, + "epoch": 0.04060755476210717, "grad_norm": 0.0, - "learning_rate": 1.999406125941688e-05, - "loss": 1.085, + "learning_rate": 1.999410944094978e-05, + "loss": 1.0945, "step": 1433 }, { - "epoch": 0.04069239500567537, + "epoch": 0.04063589220436963, "grad_norm": 0.0, - "learning_rate": 1.9994029547059036e-05, - "loss": 1.0342, + "learning_rate": 1.999407790163494e-05, + "loss": 1.1753, "step": 1434 }, { - "epoch": 0.04072077185017026, + "epoch": 0.0406642296466321, "grad_norm": 0.0, - "learning_rate": 1.9993997750281172e-05, - "loss": 0.9865, + "learning_rate": 1.9994046278136266e-05, + "loss": 1.2095, "step": 1435 }, { - "epoch": 0.04074914869466515, + "epoch": 0.04069256708889456, "grad_norm": 0.0, - "learning_rate": 1.9993965869083562e-05, - "loss": 1.1448, + "learning_rate": 1.9994014570454034e-05, + "loss": 1.2234, "step": 1436 }, { - "epoch": 0.040777525539160045, + "epoch": 0.040720904531157016, "grad_norm": 0.0, - "learning_rate": 1.9993933903466472e-05, - "loss": 1.0605, + "learning_rate": 1.9993982778588507e-05, + "loss": 1.1647, "step": 1437 }, { - "epoch": 0.04080590238365494, + "epoch": 0.04074924197341948, "grad_norm": 0.0, - "learning_rate": 1.9993901853430175e-05, - "loss": 1.0706, + "learning_rate": 1.999395090253995e-05, + "loss": 1.1169, "step": 1438 }, { - "epoch": 0.040834279228149827, + "epoch": 0.04077757941568194, "grad_norm": 0.0, - "learning_rate": 1.999386971897494e-05, - "loss": 0.9794, + "learning_rate": 1.999391894230863e-05, + "loss": 1.1719, "step": 1439 }, { - "epoch": 0.04086265607264472, + "epoch": 0.0408059168579444, "grad_norm": 0.0, - "learning_rate": 1.9993837500101035e-05, - "loss": 1.1321, + "learning_rate": 1.9993886897894823e-05, + "loss": 1.2498, "step": 1440 }, { - "epoch": 0.040891032917139615, + "epoch": 0.04083425430020687, "grad_norm": 0.0, - "learning_rate": 1.999380519680874e-05, - "loss": 0.9838, + "learning_rate": 1.9993854769298795e-05, + "loss": 1.2118, "step": 1441 }, { - "epoch": 0.04091940976163451, + "epoch": 0.040862591742469326, "grad_norm": 0.0, - "learning_rate": 1.999377280909832e-05, - "loss": 1.0408, + "learning_rate": 1.9993822556520818e-05, + "loss": 1.1168, "step": 1442 }, { - "epoch": 0.0409477866061294, + "epoch": 0.040890929184731785, "grad_norm": 0.0, - "learning_rate": 1.999374033697005e-05, - "loss": 1.0914, + "learning_rate": 1.9993790259561162e-05, + "loss": 1.2049, "step": 1443 }, { - "epoch": 0.04097616345062429, + "epoch": 0.04091926662699425, "grad_norm": 0.0, - "learning_rate": 1.9993707780424208e-05, - "loss": 1.0272, + "learning_rate": 1.99937578784201e-05, + "loss": 1.2181, "step": 1444 }, { - "epoch": 0.041004540295119185, + "epoch": 0.04094760406925671, "grad_norm": 0.0, - "learning_rate": 1.999367513946107e-05, - "loss": 1.0509, + "learning_rate": 1.99937254130979e-05, + "loss": 1.1192, "step": 1445 }, { - "epoch": 0.04103291713961407, + "epoch": 0.04097594151151917, "grad_norm": 0.0, - "learning_rate": 1.9993642414080905e-05, - "loss": 1.0273, + "learning_rate": 1.9993692863594846e-05, + "loss": 1.1827, "step": 1446 }, { - "epoch": 0.04106129398410897, + "epoch": 0.04100427895378163, "grad_norm": 0.0, - "learning_rate": 1.9993609604283993e-05, - "loss": 1.2256, + "learning_rate": 1.9993660229911205e-05, + "loss": 1.1492, "step": 1447 }, { - "epoch": 0.04108967082860386, + "epoch": 0.041032616396044094, "grad_norm": 0.0, - "learning_rate": 1.9993576710070615e-05, - "loss": 1.1097, + "learning_rate": 1.9993627512047248e-05, + "loss": 1.1906, "step": 1448 }, { - "epoch": 0.04111804767309875, + "epoch": 0.04106095383830655, "grad_norm": 0.0, - "learning_rate": 1.9993543731441046e-05, - "loss": 1.1451, + "learning_rate": 1.9993594710003262e-05, + "loss": 1.1278, "step": 1449 }, { - "epoch": 0.04114642451759364, + "epoch": 0.04108929128056901, "grad_norm": 0.0, - "learning_rate": 1.999351066839556e-05, - "loss": 1.079, + "learning_rate": 1.9993561823779512e-05, + "loss": 0.9848, "step": 1450 }, { - "epoch": 0.04117480136208854, + "epoch": 0.04111762872283148, "grad_norm": 0.0, - "learning_rate": 1.9993477520934442e-05, - "loss": 1.1017, + "learning_rate": 1.999352885337628e-05, + "loss": 1.1902, "step": 1451 }, { - "epoch": 0.04120317820658343, + "epoch": 0.04114596616509394, "grad_norm": 0.0, - "learning_rate": 1.999344428905797e-05, - "loss": 1.0412, + "learning_rate": 1.9993495798793846e-05, + "loss": 0.9908, "step": 1452 }, { - "epoch": 0.04123155505107832, + "epoch": 0.0411743036073564, "grad_norm": 0.0, - "learning_rate": 1.999341097276642e-05, - "loss": 1.1742, + "learning_rate": 1.9993462660032484e-05, + "loss": 1.1605, "step": 1453 }, { - "epoch": 0.04125993189557321, + "epoch": 0.04120264104961886, "grad_norm": 0.0, - "learning_rate": 1.9993377572060084e-05, - "loss": 1.1304, + "learning_rate": 1.9993429437092474e-05, + "loss": 1.2059, "step": 1454 }, { - "epoch": 0.04128830874006811, + "epoch": 0.04123097849188132, "grad_norm": 0.0, - "learning_rate": 1.999334408693924e-05, - "loss": 1.1185, + "learning_rate": 1.99933961299741e-05, + "loss": 1.1671, "step": 1455 }, { - "epoch": 0.041316685584562994, + "epoch": 0.04125931593414378, "grad_norm": 0.0, - "learning_rate": 1.9993310517404165e-05, - "loss": 1.0686, + "learning_rate": 1.9993362738677637e-05, + "loss": 1.1593, "step": 1456 }, { - "epoch": 0.04134506242905789, + "epoch": 0.04128765337640625, "grad_norm": 0.0, - "learning_rate": 1.999327686345515e-05, - "loss": 1.1513, + "learning_rate": 1.999332926320337e-05, + "loss": 1.1607, "step": 1457 }, { - "epoch": 0.04137343927355278, + "epoch": 0.041315990818668706, "grad_norm": 0.0, - "learning_rate": 1.9993243125092473e-05, - "loss": 1.0223, + "learning_rate": 1.999329570355158e-05, + "loss": 1.1681, "step": 1458 }, { - "epoch": 0.04140181611804767, + "epoch": 0.041344328260931165, "grad_norm": 0.0, - "learning_rate": 1.9993209302316425e-05, - "loss": 1.1275, + "learning_rate": 1.9993262059722548e-05, + "loss": 1.1999, "step": 1459 }, { - "epoch": 0.041430192962542564, + "epoch": 0.04137266570319363, "grad_norm": 0.0, - "learning_rate": 1.9993175395127285e-05, - "loss": 1.278, + "learning_rate": 1.9993228331716558e-05, + "loss": 1.0933, "step": 1460 }, { - "epoch": 0.04145856980703746, + "epoch": 0.04140100314545609, "grad_norm": 0.0, - "learning_rate": 1.9993141403525346e-05, - "loss": 1.0834, + "learning_rate": 1.9993194519533896e-05, + "loss": 1.1418, "step": 1461 }, { - "epoch": 0.04148694665153235, + "epoch": 0.04142934058771855, "grad_norm": 0.0, - "learning_rate": 1.9993107327510892e-05, - "loss": 1.0311, + "learning_rate": 1.9993160623174846e-05, + "loss": 1.1238, "step": 1462 }, { - "epoch": 0.04151532349602724, + "epoch": 0.041457678029981015, "grad_norm": 0.0, - "learning_rate": 1.999307316708421e-05, - "loss": 1.1417, + "learning_rate": 1.9993126642639694e-05, + "loss": 1.1756, "step": 1463 }, { - "epoch": 0.041543700340522134, + "epoch": 0.041486015472243475, "grad_norm": 0.0, - "learning_rate": 1.999303892224559e-05, - "loss": 1.0139, + "learning_rate": 1.9993092577928725e-05, + "loss": 1.1047, "step": 1464 }, { - "epoch": 0.04157207718501703, + "epoch": 0.041514352914505934, "grad_norm": 0.0, - "learning_rate": 1.9993004592995323e-05, - "loss": 1.0518, + "learning_rate": 1.9993058429042224e-05, + "loss": 1.197, "step": 1465 }, { - "epoch": 0.041600454029511916, + "epoch": 0.0415426903567684, "grad_norm": 0.0, - "learning_rate": 1.9992970179333695e-05, - "loss": 0.9776, + "learning_rate": 1.9993024195980485e-05, + "loss": 1.2997, "step": 1466 }, { - "epoch": 0.04162883087400681, + "epoch": 0.04157102779903086, "grad_norm": 0.0, - "learning_rate": 1.9992935681261e-05, - "loss": 1.1852, + "learning_rate": 1.999298987874379e-05, + "loss": 1.0721, "step": 1467 }, { - "epoch": 0.041657207718501704, + "epoch": 0.04159936524129332, "grad_norm": 0.0, - "learning_rate": 1.9992901098777525e-05, - "loss": 1.0026, + "learning_rate": 1.999295547733243e-05, + "loss": 1.1686, "step": 1468 }, { - "epoch": 0.04168558456299659, + "epoch": 0.041627702683555784, "grad_norm": 0.0, - "learning_rate": 1.999286643188357e-05, - "loss": 1.0977, + "learning_rate": 1.9992920991746694e-05, + "loss": 1.1622, "step": 1469 }, { - "epoch": 0.041713961407491486, + "epoch": 0.04165604012581824, "grad_norm": 0.0, - "learning_rate": 1.999283168057942e-05, - "loss": 1.0851, + "learning_rate": 1.9992886421986876e-05, + "loss": 1.2737, "step": 1470 }, { - "epoch": 0.04174233825198638, + "epoch": 0.0416843775680807, "grad_norm": 0.0, - "learning_rate": 1.9992796844865375e-05, - "loss": 1.0076, + "learning_rate": 1.9992851768053267e-05, + "loss": 1.1769, "step": 1471 }, { - "epoch": 0.041770715096481274, + "epoch": 0.04171271501034317, "grad_norm": 0.0, - "learning_rate": 1.9992761924741718e-05, - "loss": 1.1319, + "learning_rate": 1.999281702994615e-05, + "loss": 1.1957, "step": 1472 }, { - "epoch": 0.04179909194097616, + "epoch": 0.04174105245260563, "grad_norm": 0.0, - "learning_rate": 1.9992726920208757e-05, - "loss": 1.1639, + "learning_rate": 1.9992782207665835e-05, + "loss": 1.2108, "step": 1473 }, { - "epoch": 0.041827468785471056, + "epoch": 0.041769389894868086, "grad_norm": 0.0, - "learning_rate": 1.9992691831266785e-05, - "loss": 1.0253, + "learning_rate": 1.9992747301212595e-05, + "loss": 1.1319, "step": 1474 }, { - "epoch": 0.04185584562996595, + "epoch": 0.04179772733713055, "grad_norm": 0.0, - "learning_rate": 1.9992656657916093e-05, - "loss": 1.0916, + "learning_rate": 1.9992712310586736e-05, + "loss": 1.245, "step": 1475 }, { - "epoch": 0.04188422247446084, + "epoch": 0.04182606477939301, "grad_norm": 0.0, - "learning_rate": 1.9992621400156982e-05, - "loss": 1.1104, + "learning_rate": 1.9992677235788554e-05, + "loss": 1.1249, "step": 1476 }, { - "epoch": 0.04191259931895573, + "epoch": 0.04185440222165547, "grad_norm": 0.0, - "learning_rate": 1.999258605798975e-05, - "loss": 1.0718, + "learning_rate": 1.9992642076818337e-05, + "loss": 1.2102, "step": 1477 }, { - "epoch": 0.041940976163450626, + "epoch": 0.04188273966391794, "grad_norm": 0.0, - "learning_rate": 1.999255063141469e-05, - "loss": 1.0734, + "learning_rate": 1.999260683367639e-05, + "loss": 1.0872, "step": 1478 }, { - "epoch": 0.04196935300794551, + "epoch": 0.041911077106180396, "grad_norm": 0.0, - "learning_rate": 1.9992515120432106e-05, - "loss": 1.1507, + "learning_rate": 1.9992571506362997e-05, + "loss": 1.0854, "step": 1479 }, { - "epoch": 0.04199772985244041, + "epoch": 0.041939414548442855, "grad_norm": 0.0, - "learning_rate": 1.9992479525042305e-05, - "loss": 1.1052, + "learning_rate": 1.999253609487847e-05, + "loss": 1.166, "step": 1480 }, { - "epoch": 0.0420261066969353, + "epoch": 0.04196775199070532, "grad_norm": 0.0, - "learning_rate": 1.9992443845245572e-05, - "loss": 1.136, + "learning_rate": 1.99925005992231e-05, + "loss": 1.0839, "step": 1481 }, { - "epoch": 0.042054483541430196, + "epoch": 0.04199608943296778, "grad_norm": 0.0, - "learning_rate": 1.999240808104222e-05, - "loss": 1.1523, + "learning_rate": 1.9992465019397186e-05, + "loss": 1.1321, "step": 1482 }, { - "epoch": 0.04208286038592508, + "epoch": 0.04202442687523024, "grad_norm": 0.0, - "learning_rate": 1.9992372232432548e-05, - "loss": 1.054, + "learning_rate": 1.9992429355401026e-05, + "loss": 1.1354, "step": 1483 }, { - "epoch": 0.04211123723041998, + "epoch": 0.042052764317492705, "grad_norm": 0.0, - "learning_rate": 1.9992336299416857e-05, - "loss": 1.0933, + "learning_rate": 1.9992393607234928e-05, + "loss": 1.2089, "step": 1484 }, { - "epoch": 0.04213961407491487, + "epoch": 0.042081101759755164, "grad_norm": 0.0, - "learning_rate": 1.9992300281995454e-05, - "loss": 1.0848, + "learning_rate": 1.9992357774899185e-05, + "loss": 1.1997, "step": 1485 }, { - "epoch": 0.04216799091940976, + "epoch": 0.04210943920201762, "grad_norm": 0.0, - "learning_rate": 1.999226418016864e-05, - "loss": 1.1591, + "learning_rate": 1.9992321858394104e-05, + "loss": 1.1533, "step": 1486 }, { - "epoch": 0.04219636776390465, + "epoch": 0.04213777664428009, "grad_norm": 0.0, - "learning_rate": 1.9992227993936723e-05, - "loss": 1.1549, + "learning_rate": 1.9992285857719983e-05, + "loss": 1.0807, "step": 1487 }, { - "epoch": 0.04222474460839955, + "epoch": 0.04216611408654255, "grad_norm": 0.0, - "learning_rate": 1.9992191723300004e-05, - "loss": 1.1093, + "learning_rate": 1.999224977287713e-05, + "loss": 1.1669, "step": 1488 }, { - "epoch": 0.042253121452894435, + "epoch": 0.04219445152880501, "grad_norm": 0.0, - "learning_rate": 1.9992155368258794e-05, - "loss": 1.2156, + "learning_rate": 1.999221360386584e-05, + "loss": 1.2362, "step": 1489 }, { - "epoch": 0.04228149829738933, + "epoch": 0.042222788971067474, "grad_norm": 0.0, - "learning_rate": 1.99921189288134e-05, - "loss": 1.121, + "learning_rate": 1.999217735068643e-05, + "loss": 1.1141, "step": 1490 }, { - "epoch": 0.04230987514188422, + "epoch": 0.04225112641332993, "grad_norm": 0.0, - "learning_rate": 1.9992082404964126e-05, - "loss": 1.282, + "learning_rate": 1.99921410133392e-05, + "loss": 1.0372, "step": 1491 }, { - "epoch": 0.04233825198637912, + "epoch": 0.04227946385559239, "grad_norm": 0.0, - "learning_rate": 1.9992045796711283e-05, - "loss": 1.1324, + "learning_rate": 1.9992104591824457e-05, + "loss": 1.1208, "step": 1492 }, { - "epoch": 0.042366628830874005, + "epoch": 0.04230780129785486, "grad_norm": 0.0, - "learning_rate": 1.9992009104055178e-05, - "loss": 1.1132, + "learning_rate": 1.9992068086142506e-05, + "loss": 1.2494, "step": 1493 }, { - "epoch": 0.0423950056753689, + "epoch": 0.04233613874011732, "grad_norm": 0.0, - "learning_rate": 1.999197232699613e-05, - "loss": 1.0533, + "learning_rate": 1.9992031496293652e-05, + "loss": 1.0466, "step": 1494 }, { - "epoch": 0.04242338251986379, + "epoch": 0.042364476182379776, "grad_norm": 0.0, - "learning_rate": 1.9991935465534434e-05, - "loss": 1.1573, + "learning_rate": 1.9991994822278207e-05, + "loss": 1.1244, "step": 1495 }, { - "epoch": 0.04245175936435868, + "epoch": 0.04239281362464224, "grad_norm": 0.0, - "learning_rate": 1.9991898519670412e-05, - "loss": 1.1808, + "learning_rate": 1.999195806409648e-05, + "loss": 1.1599, "step": 1496 }, { - "epoch": 0.042480136208853575, + "epoch": 0.0424211510669047, "grad_norm": 0.0, - "learning_rate": 1.999186148940438e-05, - "loss": 1.0348, + "learning_rate": 1.999192122174878e-05, + "loss": 1.0945, "step": 1497 }, { - "epoch": 0.04250851305334847, + "epoch": 0.04244948850916716, "grad_norm": 0.0, - "learning_rate": 1.999182437473664e-05, - "loss": 1.1705, + "learning_rate": 1.9991884295235414e-05, + "loss": 1.1151, "step": 1498 }, { - "epoch": 0.04253688989784336, + "epoch": 0.042477825951429626, "grad_norm": 0.0, - "learning_rate": 1.999178717566751e-05, - "loss": 1.0907, + "learning_rate": 1.9991847284556703e-05, + "loss": 1.2072, "step": 1499 }, { - "epoch": 0.04256526674233825, + "epoch": 0.042506163393692085, "grad_norm": 0.0, - "learning_rate": 1.9991749892197304e-05, - "loss": 1.1386, + "learning_rate": 1.9991810189712945e-05, + "loss": 1.0923, "step": 1500 }, { - "epoch": 0.042593643586833145, + "epoch": 0.042534500835954545, "grad_norm": 0.0, - "learning_rate": 1.999171252432634e-05, - "loss": 1.1598, + "learning_rate": 1.9991773010704464e-05, + "loss": 1.123, "step": 1501 }, { - "epoch": 0.04262202043132804, + "epoch": 0.04256283827821701, "grad_norm": 0.0, - "learning_rate": 1.9991675072054933e-05, - "loss": 0.9728, + "learning_rate": 1.9991735747531566e-05, + "loss": 1.1725, "step": 1502 }, { - "epoch": 0.04265039727582293, + "epoch": 0.04259117572047947, "grad_norm": 0.0, - "learning_rate": 1.999163753538339e-05, - "loss": 1.0206, + "learning_rate": 1.999169840019457e-05, + "loss": 1.2181, "step": 1503 }, { - "epoch": 0.04267877412031782, + "epoch": 0.04261951316274193, "grad_norm": 0.0, - "learning_rate": 1.9991599914312043e-05, - "loss": 1.1821, + "learning_rate": 1.9991660968693788e-05, + "loss": 1.0854, "step": 1504 }, { - "epoch": 0.042707150964812715, + "epoch": 0.042647850605004395, "grad_norm": 0.0, - "learning_rate": 1.99915622088412e-05, - "loss": 1.1884, + "learning_rate": 1.999162345302953e-05, + "loss": 1.21, "step": 1505 }, { - "epoch": 0.0427355278093076, + "epoch": 0.042676188047266854, "grad_norm": 0.0, - "learning_rate": 1.9991524418971182e-05, - "loss": 1.1673, + "learning_rate": 1.999158585320212e-05, + "loss": 1.1608, "step": 1506 }, { - "epoch": 0.0427639046538025, + "epoch": 0.04270452548952931, "grad_norm": 0.0, - "learning_rate": 1.9991486544702306e-05, - "loss": 1.1766, + "learning_rate": 1.9991548169211875e-05, + "loss": 1.2053, "step": 1507 }, { - "epoch": 0.04279228149829739, + "epoch": 0.04273286293179178, "grad_norm": 0.0, - "learning_rate": 1.9991448586034897e-05, - "loss": 1.1561, + "learning_rate": 1.999151040105911e-05, + "loss": 1.1307, "step": 1508 }, { - "epoch": 0.04282065834279228, + "epoch": 0.04276120037405424, "grad_norm": 0.0, - "learning_rate": 1.999141054296927e-05, - "loss": 0.9215, + "learning_rate": 1.999147254874414e-05, + "loss": 1.1392, "step": 1509 }, { - "epoch": 0.04284903518728717, + "epoch": 0.0427895378163167, "grad_norm": 0.0, - "learning_rate": 1.999137241550575e-05, - "loss": 1.0902, + "learning_rate": 1.9991434612267286e-05, + "loss": 1.0836, "step": 1510 }, { - "epoch": 0.04287741203178207, + "epoch": 0.04281787525857916, "grad_norm": 0.0, - "learning_rate": 1.999133420364466e-05, - "loss": 1.1257, + "learning_rate": 1.999139659162887e-05, + "loss": 1.1379, "step": 1511 }, { - "epoch": 0.04290578887627696, + "epoch": 0.04284621270084162, "grad_norm": 0.0, - "learning_rate": 1.9991295907386317e-05, - "loss": 1.0868, + "learning_rate": 1.999135848682921e-05, + "loss": 1.1965, "step": 1512 }, { - "epoch": 0.04293416572077185, + "epoch": 0.04287455014310408, "grad_norm": 0.0, - "learning_rate": 1.9991257526731052e-05, - "loss": 1.0951, + "learning_rate": 1.9991320297868624e-05, + "loss": 1.2512, "step": 1513 }, { - "epoch": 0.04296254256526674, + "epoch": 0.04290288758536655, "grad_norm": 0.0, - "learning_rate": 1.9991219061679182e-05, - "loss": 1.0966, + "learning_rate": 1.999128202474744e-05, + "loss": 1.1153, "step": 1514 }, { - "epoch": 0.04299091940976164, + "epoch": 0.04293122502762901, "grad_norm": 0.0, - "learning_rate": 1.999118051223104e-05, - "loss": 1.1363, + "learning_rate": 1.9991243667465978e-05, + "loss": 1.1956, "step": 1515 }, { - "epoch": 0.043019296254256524, + "epoch": 0.042959562469891466, "grad_norm": 0.0, - "learning_rate": 1.9991141878386944e-05, - "loss": 0.9785, + "learning_rate": 1.9991205226024558e-05, + "loss": 1.2061, "step": 1516 }, { - "epoch": 0.04304767309875142, + "epoch": 0.04298789991215393, "grad_norm": 0.0, - "learning_rate": 1.9991103160147226e-05, - "loss": 1.2314, + "learning_rate": 1.9991166700423504e-05, + "loss": 1.1572, "step": 1517 }, { - "epoch": 0.04307604994324631, + "epoch": 0.04301623735441639, "grad_norm": 0.0, - "learning_rate": 1.9991064357512208e-05, - "loss": 1.1276, + "learning_rate": 1.9991128090663146e-05, + "loss": 1.2078, "step": 1518 }, { - "epoch": 0.0431044267877412, + "epoch": 0.04304457479667885, "grad_norm": 0.0, - "learning_rate": 1.999102547048222e-05, - "loss": 1.0728, + "learning_rate": 1.9991089396743808e-05, + "loss": 1.2017, "step": 1519 }, { - "epoch": 0.043132803632236094, + "epoch": 0.043072912238941316, "grad_norm": 0.0, - "learning_rate": 1.9990986499057593e-05, - "loss": 1.1172, + "learning_rate": 1.9991050618665813e-05, + "loss": 1.2557, "step": 1520 }, { - "epoch": 0.04316118047673099, + "epoch": 0.043101249681203775, "grad_norm": 0.0, - "learning_rate": 1.9990947443238647e-05, - "loss": 1.0302, + "learning_rate": 1.9991011756429488e-05, + "loss": 1.1407, "step": 1521 }, { - "epoch": 0.04318955732122588, + "epoch": 0.043129587123466234, "grad_norm": 0.0, - "learning_rate": 1.999090830302573e-05, - "loss": 0.9705, + "learning_rate": 1.999097281003516e-05, + "loss": 1.121, "step": 1522 }, { - "epoch": 0.04321793416572077, + "epoch": 0.0431579245657287, "grad_norm": 0.0, - "learning_rate": 1.9990869078419154e-05, - "loss": 1.0174, + "learning_rate": 1.9990933779483156e-05, + "loss": 1.0251, "step": 1523 }, { - "epoch": 0.043246311010215664, + "epoch": 0.04318626200799116, "grad_norm": 0.0, - "learning_rate": 1.999082976941926e-05, - "loss": 1.0326, + "learning_rate": 1.999089466477381e-05, + "loss": 1.2241, "step": 1524 }, { - "epoch": 0.04327468785471056, + "epoch": 0.04321459945025362, "grad_norm": 0.0, - "learning_rate": 1.9990790376026377e-05, - "loss": 1.0992, + "learning_rate": 1.999085546590745e-05, + "loss": 1.1525, "step": 1525 }, { - "epoch": 0.043303064699205446, + "epoch": 0.043242936892516085, "grad_norm": 0.0, - "learning_rate": 1.9990750898240837e-05, - "loss": 1.0937, + "learning_rate": 1.99908161828844e-05, + "loss": 1.1146, "step": 1526 }, { - "epoch": 0.04333144154370034, + "epoch": 0.043271274334778544, "grad_norm": 0.0, - "learning_rate": 1.9990711336062976e-05, - "loss": 1.0822, + "learning_rate": 1.9990776815704996e-05, + "loss": 1.1461, "step": 1527 }, { - "epoch": 0.043359818388195234, + "epoch": 0.043299611777041, "grad_norm": 0.0, - "learning_rate": 1.9990671689493132e-05, - "loss": 1.0337, + "learning_rate": 1.9990737364369572e-05, + "loss": 1.0974, "step": 1528 }, { - "epoch": 0.04338819523269012, + "epoch": 0.04332794921930347, "grad_norm": 0.0, - "learning_rate": 1.9990631958531627e-05, - "loss": 1.1381, + "learning_rate": 1.9990697828878453e-05, + "loss": 1.1629, "step": 1529 }, { - "epoch": 0.043416572077185016, + "epoch": 0.04335628666156593, "grad_norm": 0.0, - "learning_rate": 1.999059214317881e-05, - "loss": 1.1149, + "learning_rate": 1.999065820923198e-05, + "loss": 1.1964, "step": 1530 }, { - "epoch": 0.04344494892167991, + "epoch": 0.04338462410382839, "grad_norm": 0.0, - "learning_rate": 1.999055224343501e-05, - "loss": 1.0766, + "learning_rate": 1.9990618505430483e-05, + "loss": 1.1736, "step": 1531 }, { - "epoch": 0.043473325766174804, + "epoch": 0.04341296154609085, "grad_norm": 0.0, - "learning_rate": 1.9990512259300567e-05, - "loss": 1.0404, + "learning_rate": 1.9990578717474292e-05, + "loss": 1.0162, "step": 1532 }, { - "epoch": 0.04350170261066969, + "epoch": 0.04344129898835331, "grad_norm": 0.0, - "learning_rate": 1.999047219077582e-05, - "loss": 1.0703, + "learning_rate": 1.9990538845363752e-05, + "loss": 1.2, "step": 1533 }, { - "epoch": 0.043530079455164586, + "epoch": 0.04346963643061577, "grad_norm": 0.0, - "learning_rate": 1.9990432037861104e-05, - "loss": 1.0418, + "learning_rate": 1.999049888909919e-05, + "loss": 1.1454, "step": 1534 }, { - "epoch": 0.04355845629965948, + "epoch": 0.04349797387287824, "grad_norm": 0.0, - "learning_rate": 1.9990391800556757e-05, - "loss": 1.193, + "learning_rate": 1.9990458848680948e-05, + "loss": 1.3031, "step": 1535 }, { - "epoch": 0.04358683314415437, + "epoch": 0.043526311315140696, "grad_norm": 0.0, - "learning_rate": 1.9990351478863124e-05, - "loss": 1.0692, + "learning_rate": 1.9990418724109358e-05, + "loss": 1.1825, "step": 1536 }, { - "epoch": 0.04361520998864926, + "epoch": 0.043554648757403155, "grad_norm": 0.0, - "learning_rate": 1.999031107278054e-05, - "loss": 1.039, + "learning_rate": 1.9990378515384763e-05, + "loss": 1.2813, "step": 1537 }, { - "epoch": 0.043643586833144156, + "epoch": 0.04358298619966562, "grad_norm": 0.0, - "learning_rate": 1.9990270582309353e-05, - "loss": 1.1281, + "learning_rate": 1.9990338222507503e-05, + "loss": 1.0866, "step": 1538 }, { - "epoch": 0.04367196367763904, + "epoch": 0.04361132364192808, "grad_norm": 0.0, - "learning_rate": 1.9990230007449898e-05, - "loss": 1.0673, + "learning_rate": 1.999029784547791e-05, + "loss": 1.1573, "step": 1539 }, { - "epoch": 0.04370034052213394, + "epoch": 0.04363966108419054, "grad_norm": 0.0, - "learning_rate": 1.9990189348202526e-05, - "loss": 1.1174, + "learning_rate": 1.999025738429633e-05, + "loss": 1.0753, "step": 1540 }, { - "epoch": 0.04372871736662883, + "epoch": 0.043667998526453006, "grad_norm": 0.0, - "learning_rate": 1.999014860456757e-05, - "loss": 0.9782, + "learning_rate": 1.9990216838963103e-05, + "loss": 1.1795, "step": 1541 }, { - "epoch": 0.043757094211123726, + "epoch": 0.043696335968715465, "grad_norm": 0.0, - "learning_rate": 1.999010777654538e-05, - "loss": 1.1076, + "learning_rate": 1.9990176209478568e-05, + "loss": 1.0508, "step": 1542 }, { - "epoch": 0.04378547105561861, + "epoch": 0.043724673410977924, "grad_norm": 0.0, - "learning_rate": 1.9990066864136305e-05, - "loss": 1.0124, + "learning_rate": 1.9990135495843068e-05, + "loss": 1.2252, "step": 1543 }, { - "epoch": 0.04381384790011351, + "epoch": 0.04375301085324039, "grad_norm": 0.0, - "learning_rate": 1.9990025867340683e-05, - "loss": 1.1193, + "learning_rate": 1.999009469805695e-05, + "loss": 1.2156, "step": 1544 }, { - "epoch": 0.0438422247446084, + "epoch": 0.04378134829550285, "grad_norm": 0.0, - "learning_rate": 1.9989984786158864e-05, - "loss": 1.0733, + "learning_rate": 1.9990053816120553e-05, + "loss": 1.127, "step": 1545 }, { - "epoch": 0.04387060158910329, + "epoch": 0.04380968573776531, "grad_norm": 0.0, - "learning_rate": 1.9989943620591195e-05, - "loss": 1.0951, + "learning_rate": 1.9990012850034225e-05, + "loss": 1.1925, "step": 1546 }, { - "epoch": 0.04389897843359818, + "epoch": 0.04383802318002777, "grad_norm": 0.0, - "learning_rate": 1.998990237063802e-05, - "loss": 1.0034, + "learning_rate": 1.9989971799798308e-05, + "loss": 1.0796, "step": 1547 }, { - "epoch": 0.04392735527809308, + "epoch": 0.04386636062229023, "grad_norm": 0.0, - "learning_rate": 1.9989861036299695e-05, - "loss": 1.1717, + "learning_rate": 1.9989930665413148e-05, + "loss": 1.1499, "step": 1548 }, { - "epoch": 0.043955732122587965, + "epoch": 0.04389469806455269, "grad_norm": 0.0, - "learning_rate": 1.9989819617576562e-05, - "loss": 1.0789, + "learning_rate": 1.9989889446879092e-05, + "loss": 1.0745, "step": 1549 }, { - "epoch": 0.04398410896708286, + "epoch": 0.04392303550681515, "grad_norm": 0.0, - "learning_rate": 1.9989778114468974e-05, - "loss": 1.1419, + "learning_rate": 1.9989848144196488e-05, + "loss": 1.0678, "step": 1550 }, { - "epoch": 0.04401248581157775, + "epoch": 0.04395137294907762, "grad_norm": 0.0, - "learning_rate": 1.998973652697728e-05, - "loss": 1.0998, + "learning_rate": 1.9989806757365686e-05, + "loss": 1.1985, "step": 1551 }, { - "epoch": 0.04404086265607265, + "epoch": 0.04397971039134008, "grad_norm": 0.0, - "learning_rate": 1.9989694855101837e-05, - "loss": 1.1829, + "learning_rate": 1.998976528638703e-05, + "loss": 1.1352, "step": 1552 }, { - "epoch": 0.044069239500567535, + "epoch": 0.044008047833602536, "grad_norm": 0.0, - "learning_rate": 1.998965309884299e-05, - "loss": 1.1104, + "learning_rate": 1.998972373126087e-05, + "loss": 1.1768, "step": 1553 }, { - "epoch": 0.04409761634506243, + "epoch": 0.044036385275865, "grad_norm": 0.0, - "learning_rate": 1.998961125820109e-05, - "loss": 1.1509, + "learning_rate": 1.9989682091987558e-05, + "loss": 1.1711, "step": 1554 }, { - "epoch": 0.044125993189557323, + "epoch": 0.04406472271812746, "grad_norm": 0.0, - "learning_rate": 1.99895693331765e-05, - "loss": 1.1085, + "learning_rate": 1.9989640368567446e-05, + "loss": 1.104, "step": 1555 }, { - "epoch": 0.04415437003405221, + "epoch": 0.04409306016038992, "grad_norm": 0.0, - "learning_rate": 1.9989527323769565e-05, - "loss": 1.1931, + "learning_rate": 1.9989598561000882e-05, + "loss": 1.0659, "step": 1556 }, { - "epoch": 0.044182746878547105, + "epoch": 0.044121397602652386, "grad_norm": 0.0, - "learning_rate": 1.9989485229980648e-05, - "loss": 1.1035, + "learning_rate": 1.9989556669288222e-05, + "loss": 1.0741, "step": 1557 }, { - "epoch": 0.044211123723042, + "epoch": 0.044149735044914845, "grad_norm": 0.0, - "learning_rate": 1.9989443051810098e-05, - "loss": 1.0297, + "learning_rate": 1.9989514693429815e-05, + "loss": 1.1014, "step": 1558 }, { - "epoch": 0.04423950056753689, + "epoch": 0.044178072487177304, "grad_norm": 0.0, - "learning_rate": 1.9989400789258272e-05, - "loss": 1.0768, + "learning_rate": 1.9989472633426016e-05, + "loss": 1.2155, "step": 1559 }, { - "epoch": 0.04426787741203178, + "epoch": 0.04420640992943977, "grad_norm": 0.0, - "learning_rate": 1.9989358442325532e-05, - "loss": 1.0315, + "learning_rate": 1.998943048927718e-05, + "loss": 1.2013, "step": 1560 }, { - "epoch": 0.044296254256526675, + "epoch": 0.04423474737170223, "grad_norm": 0.0, - "learning_rate": 1.998931601101223e-05, - "loss": 1.1033, + "learning_rate": 1.998938826098366e-05, + "loss": 1.159, "step": 1561 }, { - "epoch": 0.04432463110102157, + "epoch": 0.04426308481396469, "grad_norm": 0.0, - "learning_rate": 1.9989273495318726e-05, - "loss": 1.1325, + "learning_rate": 1.9989345948545816e-05, + "loss": 0.9875, "step": 1562 }, { - "epoch": 0.04435300794551646, + "epoch": 0.044291422256227155, "grad_norm": 0.0, - "learning_rate": 1.9989230895245382e-05, - "loss": 1.201, + "learning_rate": 1.9989303551963996e-05, + "loss": 1.1832, "step": 1563 }, { - "epoch": 0.04438138479001135, + "epoch": 0.044319759698489614, "grad_norm": 0.0, - "learning_rate": 1.998918821079255e-05, - "loss": 1.1086, + "learning_rate": 1.998926107123857e-05, + "loss": 1.1692, "step": 1564 }, { - "epoch": 0.044409761634506245, + "epoch": 0.04434809714075207, "grad_norm": 0.0, - "learning_rate": 1.9989145441960604e-05, - "loss": 1.1896, + "learning_rate": 1.9989218506369883e-05, + "loss": 1.1447, "step": 1565 }, { - "epoch": 0.04443813847900113, + "epoch": 0.04437643458301454, "grad_norm": 0.0, - "learning_rate": 1.998910258874989e-05, - "loss": 1.0022, + "learning_rate": 1.9989175857358303e-05, + "loss": 1.2132, "step": 1566 }, { - "epoch": 0.04446651532349603, + "epoch": 0.044404772025277, "grad_norm": 0.0, - "learning_rate": 1.9989059651160786e-05, - "loss": 1.1567, + "learning_rate": 1.9989133124204183e-05, + "loss": 1.1251, "step": 1567 }, { - "epoch": 0.04449489216799092, + "epoch": 0.04443310946753946, "grad_norm": 0.0, - "learning_rate": 1.998901662919364e-05, - "loss": 0.9951, + "learning_rate": 1.9989090306907885e-05, + "loss": 1.1332, "step": 1568 }, { - "epoch": 0.04452326901248581, + "epoch": 0.04446144690980192, "grad_norm": 0.0, - "learning_rate": 1.998897352284882e-05, - "loss": 1.1557, + "learning_rate": 1.9989047405469772e-05, + "loss": 1.0839, "step": 1569 }, { - "epoch": 0.0445516458569807, + "epoch": 0.04448978435206438, "grad_norm": 0.0, - "learning_rate": 1.9988930332126694e-05, - "loss": 1.0872, + "learning_rate": 1.9989004419890202e-05, + "loss": 1.167, "step": 1570 }, { - "epoch": 0.0445800227014756, + "epoch": 0.04451812179432684, "grad_norm": 0.0, - "learning_rate": 1.9988887057027627e-05, - "loss": 1.1135, + "learning_rate": 1.998896135016954e-05, + "loss": 1.0933, "step": 1571 }, { - "epoch": 0.04460839954597049, + "epoch": 0.04454645923658931, "grad_norm": 0.0, - "learning_rate": 1.998884369755198e-05, - "loss": 1.1525, + "learning_rate": 1.9988918196308146e-05, + "loss": 1.2792, "step": 1572 }, { - "epoch": 0.04463677639046538, + "epoch": 0.044574796678851766, "grad_norm": 0.0, - "learning_rate": 1.9988800253700118e-05, - "loss": 1.0186, + "learning_rate": 1.9988874958306385e-05, + "loss": 1.2734, "step": 1573 }, { - "epoch": 0.04466515323496027, + "epoch": 0.044603134121114225, "grad_norm": 0.0, - "learning_rate": 1.9988756725472417e-05, - "loss": 0.9871, + "learning_rate": 1.998883163616462e-05, + "loss": 1.113, "step": 1574 }, { - "epoch": 0.04469353007945517, + "epoch": 0.04463147156337669, "grad_norm": 0.0, - "learning_rate": 1.9988713112869234e-05, - "loss": 1.1436, + "learning_rate": 1.9988788229883215e-05, + "loss": 1.141, "step": 1575 }, { - "epoch": 0.044721906923950054, + "epoch": 0.04465980900563915, "grad_norm": 0.0, - "learning_rate": 1.9988669415890943e-05, - "loss": 1.025, + "learning_rate": 1.9988744739462542e-05, + "loss": 1.1528, "step": 1576 }, { - "epoch": 0.04475028376844495, + "epoch": 0.04468814644790161, "grad_norm": 0.0, - "learning_rate": 1.9988625634537916e-05, - "loss": 1.1335, + "learning_rate": 1.9988701164902963e-05, + "loss": 1.1722, "step": 1577 }, { - "epoch": 0.04477866061293984, + "epoch": 0.044716483890164076, "grad_norm": 0.0, - "learning_rate": 1.9988581768810517e-05, - "loss": 1.155, + "learning_rate": 1.998865750620484e-05, + "loss": 1.1647, "step": 1578 }, { - "epoch": 0.04480703745743473, + "epoch": 0.044744821332426535, "grad_norm": 0.0, - "learning_rate": 1.998853781870912e-05, - "loss": 1.1158, + "learning_rate": 1.9988613763368548e-05, + "loss": 1.1084, "step": 1579 }, { - "epoch": 0.044835414301929624, + "epoch": 0.044773158774688994, "grad_norm": 0.0, - "learning_rate": 1.998849378423409e-05, - "loss": 1.155, + "learning_rate": 1.9988569936394454e-05, + "loss": 1.1717, "step": 1580 }, { - "epoch": 0.04486379114642452, + "epoch": 0.04480149621695146, "grad_norm": 0.0, - "learning_rate": 1.998844966538581e-05, - "loss": 1.0762, + "learning_rate": 1.9988526025282925e-05, + "loss": 1.192, "step": 1581 }, { - "epoch": 0.04489216799091941, + "epoch": 0.04482983365921392, "grad_norm": 0.0, - "learning_rate": 1.9988405462164642e-05, - "loss": 0.9684, + "learning_rate": 1.9988482030034336e-05, + "loss": 1.1396, "step": 1582 }, { - "epoch": 0.0449205448354143, + "epoch": 0.04485817110147638, "grad_norm": 0.0, - "learning_rate": 1.998836117457097e-05, - "loss": 1.0786, + "learning_rate": 1.9988437950649052e-05, + "loss": 1.0624, "step": 1583 }, { - "epoch": 0.044948921679909194, + "epoch": 0.044886508543738844, "grad_norm": 0.0, - "learning_rate": 1.9988316802605158e-05, - "loss": 1.0676, + "learning_rate": 1.9988393787127444e-05, + "loss": 1.0321, "step": 1584 }, { - "epoch": 0.04497729852440409, + "epoch": 0.0449148459860013, "grad_norm": 0.0, - "learning_rate": 1.9988272346267587e-05, - "loss": 1.086, + "learning_rate": 1.9988349539469887e-05, + "loss": 1.1952, "step": 1585 }, { - "epoch": 0.045005675368898976, + "epoch": 0.04494318342826376, "grad_norm": 0.0, - "learning_rate": 1.998822780555863e-05, - "loss": 1.0784, + "learning_rate": 1.998830520767675e-05, + "loss": 1.0956, "step": 1586 }, { - "epoch": 0.04503405221339387, + "epoch": 0.04497152087052623, "grad_norm": 0.0, - "learning_rate": 1.9988183180478658e-05, - "loss": 1.0782, + "learning_rate": 1.998826079174841e-05, + "loss": 1.044, "step": 1587 }, { - "epoch": 0.045062429057888764, + "epoch": 0.04499985831278869, "grad_norm": 0.0, - "learning_rate": 1.998813847102806e-05, - "loss": 1.1075, + "learning_rate": 1.9988216291685245e-05, + "loss": 1.1883, "step": 1588 }, { - "epoch": 0.04509080590238365, + "epoch": 0.04502819575505115, "grad_norm": 0.0, - "learning_rate": 1.998809367720721e-05, - "loss": 1.1141, + "learning_rate": 1.9988171707487624e-05, + "loss": 1.1112, "step": 1589 }, { - "epoch": 0.045119182746878546, + "epoch": 0.04505653319731361, "grad_norm": 0.0, - "learning_rate": 1.9988048799016476e-05, - "loss": 1.1046, + "learning_rate": 1.998812703915592e-05, + "loss": 1.2099, "step": 1590 }, { - "epoch": 0.04514755959137344, + "epoch": 0.04508487063957607, "grad_norm": 0.0, - "learning_rate": 1.9988003836456247e-05, - "loss": 1.0073, + "learning_rate": 1.9988082286690512e-05, + "loss": 1.2394, "step": 1591 }, { - "epoch": 0.045175936435868334, + "epoch": 0.04511320808183853, "grad_norm": 0.0, - "learning_rate": 1.9987958789526902e-05, - "loss": 1.0355, + "learning_rate": 1.998803745009178e-05, + "loss": 1.0596, "step": 1592 }, { - "epoch": 0.04520431328036322, + "epoch": 0.045141545524101, "grad_norm": 0.0, - "learning_rate": 1.998791365822882e-05, - "loss": 1.0837, + "learning_rate": 1.9987992529360105e-05, + "loss": 1.1722, "step": 1593 }, { - "epoch": 0.045232690124858116, + "epoch": 0.045169882966363456, "grad_norm": 0.0, - "learning_rate": 1.9987868442562383e-05, - "loss": 1.0924, + "learning_rate": 1.9987947524495853e-05, + "loss": 1.0988, "step": 1594 }, { - "epoch": 0.04526106696935301, + "epoch": 0.045198220408625915, "grad_norm": 0.0, - "learning_rate": 1.9987823142527972e-05, - "loss": 1.0542, + "learning_rate": 1.9987902435499412e-05, + "loss": 1.1262, "step": 1595 }, { - "epoch": 0.0452894438138479, + "epoch": 0.04522655785088838, "grad_norm": 0.0, - "learning_rate": 1.998777775812597e-05, - "loss": 1.0951, + "learning_rate": 1.998785726237116e-05, + "loss": 1.1632, "step": 1596 }, { - "epoch": 0.04531782065834279, + "epoch": 0.04525489529315084, "grad_norm": 0.0, - "learning_rate": 1.9987732289356757e-05, - "loss": 1.1334, + "learning_rate": 1.998781200511148e-05, + "loss": 1.1863, "step": 1597 }, { - "epoch": 0.045346197502837686, + "epoch": 0.0452832327354133, "grad_norm": 0.0, - "learning_rate": 1.9987686736220726e-05, - "loss": 1.1186, + "learning_rate": 1.9987766663720746e-05, + "loss": 1.1689, "step": 1598 }, { - "epoch": 0.04537457434733257, + "epoch": 0.045311570177675765, "grad_norm": 0.0, - "learning_rate": 1.9987641098718255e-05, - "loss": 1.0501, + "learning_rate": 1.9987721238199345e-05, + "loss": 1.2169, "step": 1599 }, { - "epoch": 0.04540295119182747, + "epoch": 0.045339907619938224, "grad_norm": 0.0, - "learning_rate": 1.998759537684973e-05, - "loss": 1.0245, + "learning_rate": 1.9987675728547665e-05, + "loss": 1.0249, "step": 1600 }, { - "epoch": 0.04543132803632236, + "epoch": 0.045368245062200684, "grad_norm": 0.0, - "learning_rate": 1.9987549570615534e-05, - "loss": 1.0498, + "learning_rate": 1.998763013476608e-05, + "loss": 1.1408, "step": 1601 }, { - "epoch": 0.045459704880817256, + "epoch": 0.04539658250446315, "grad_norm": 0.0, - "learning_rate": 1.998750368001606e-05, - "loss": 1.088, + "learning_rate": 1.9987584456854977e-05, + "loss": 1.1243, "step": 1602 }, { - "epoch": 0.04548808172531214, + "epoch": 0.04542491994672561, "grad_norm": 0.0, - "learning_rate": 1.9987457705051696e-05, - "loss": 1.0878, + "learning_rate": 1.998753869481474e-05, + "loss": 1.0693, "step": 1603 }, { - "epoch": 0.04551645856980704, + "epoch": 0.04545325738898807, "grad_norm": 0.0, - "learning_rate": 1.9987411645722826e-05, - "loss": 1.1068, + "learning_rate": 1.998749284864576e-05, + "loss": 1.056, "step": 1604 }, { - "epoch": 0.04554483541430193, + "epoch": 0.045481594831250534, "grad_norm": 0.0, - "learning_rate": 1.998736550202984e-05, - "loss": 1.0068, + "learning_rate": 1.9987446918348412e-05, + "loss": 1.1372, "step": 1605 }, { - "epoch": 0.04557321225879682, + "epoch": 0.04550993227351299, "grad_norm": 0.0, - "learning_rate": 1.998731927397313e-05, - "loss": 1.1708, + "learning_rate": 1.9987400903923096e-05, + "loss": 1.029, "step": 1606 }, { - "epoch": 0.04560158910329171, + "epoch": 0.04553826971577545, "grad_norm": 0.0, - "learning_rate": 1.9987272961553087e-05, - "loss": 1.1256, + "learning_rate": 1.998735480537019e-05, + "loss": 1.2014, "step": 1607 }, { - "epoch": 0.04562996594778661, + "epoch": 0.04556660715803792, "grad_norm": 0.0, - "learning_rate": 1.9987226564770095e-05, - "loss": 1.0287, + "learning_rate": 1.9987308622690087e-05, + "loss": 1.1394, "step": 1608 }, { - "epoch": 0.045658342792281495, + "epoch": 0.04559494460030038, "grad_norm": 0.0, - "learning_rate": 1.9987180083624557e-05, - "loss": 1.114, + "learning_rate": 1.9987262355883173e-05, + "loss": 1.1237, "step": 1609 }, { - "epoch": 0.04568671963677639, + "epoch": 0.045623282042562836, "grad_norm": 0.0, - "learning_rate": 1.9987133518116858e-05, - "loss": 1.0789, + "learning_rate": 1.9987216004949845e-05, + "loss": 1.1449, "step": 1610 }, { - "epoch": 0.04571509648127128, + "epoch": 0.0456516194848253, "grad_norm": 0.0, - "learning_rate": 1.998708686824739e-05, - "loss": 1.0861, + "learning_rate": 1.998716956989048e-05, + "loss": 1.1588, "step": 1611 }, { - "epoch": 0.04574347332576618, + "epoch": 0.04567995692708776, "grad_norm": 0.0, - "learning_rate": 1.998704013401655e-05, - "loss": 1.1345, + "learning_rate": 1.9987123050705483e-05, + "loss": 1.2648, "step": 1612 }, { - "epoch": 0.045771850170261065, + "epoch": 0.04570829436935022, "grad_norm": 0.0, - "learning_rate": 1.998699331542474e-05, - "loss": 1.0867, + "learning_rate": 1.998707644739524e-05, + "loss": 1.068, "step": 1613 }, { - "epoch": 0.04580022701475596, + "epoch": 0.04573663181161269, "grad_norm": 0.0, - "learning_rate": 1.9986946412472342e-05, - "loss": 1.192, + "learning_rate": 1.9987029759960142e-05, + "loss": 1.092, "step": 1614 }, { - "epoch": 0.045828603859250854, + "epoch": 0.045764969253875146, "grad_norm": 0.0, - "learning_rate": 1.9986899425159762e-05, - "loss": 0.9879, + "learning_rate": 1.9986982988400582e-05, + "loss": 1.1199, "step": 1615 }, { - "epoch": 0.04585698070374574, + "epoch": 0.045793306696137605, "grad_norm": 0.0, - "learning_rate": 1.998685235348739e-05, - "loss": 1.1135, + "learning_rate": 1.9986936132716956e-05, + "loss": 1.0711, "step": 1616 }, { - "epoch": 0.045885357548240635, + "epoch": 0.04582164413840007, "grad_norm": 0.0, - "learning_rate": 1.9986805197455628e-05, - "loss": 1.1932, + "learning_rate": 1.9986889192909663e-05, + "loss": 1.1003, "step": 1617 }, { - "epoch": 0.04591373439273553, + "epoch": 0.04584998158066253, "grad_norm": 0.0, - "learning_rate": 1.998675795706488e-05, - "loss": 1.0075, + "learning_rate": 1.9986842168979087e-05, + "loss": 1.1393, "step": 1618 }, { - "epoch": 0.04594211123723042, + "epoch": 0.04587831902292499, "grad_norm": 0.0, - "learning_rate": 1.9986710632315533e-05, - "loss": 1.1204, + "learning_rate": 1.9986795060925636e-05, + "loss": 1.1565, "step": 1619 }, { - "epoch": 0.04597048808172531, + "epoch": 0.045906656465187455, "grad_norm": 0.0, - "learning_rate": 1.9986663223207993e-05, - "loss": 1.056, + "learning_rate": 1.99867478687497e-05, + "loss": 1.1716, "step": 1620 }, { - "epoch": 0.045998864926220205, + "epoch": 0.045934993907449914, "grad_norm": 0.0, - "learning_rate": 1.998661572974266e-05, - "loss": 1.0387, + "learning_rate": 1.998670059245168e-05, + "loss": 1.0574, "step": 1621 }, { - "epoch": 0.0460272417707151, + "epoch": 0.04596333134971237, "grad_norm": 0.0, - "learning_rate": 1.9986568151919935e-05, - "loss": 1.1643, + "learning_rate": 1.9986653232031968e-05, + "loss": 1.1005, "step": 1622 }, { - "epoch": 0.04605561861520999, + "epoch": 0.04599166879197484, "grad_norm": 0.0, - "learning_rate": 1.998652048974022e-05, - "loss": 1.0826, + "learning_rate": 1.998660578749097e-05, + "loss": 1.1426, "step": 1623 }, { - "epoch": 0.04608399545970488, + "epoch": 0.0460200062342373, "grad_norm": 0.0, - "learning_rate": 1.998647274320392e-05, - "loss": 1.0145, + "learning_rate": 1.9986558258829082e-05, + "loss": 1.0476, "step": 1624 }, { - "epoch": 0.046112372304199775, + "epoch": 0.04604834367649976, "grad_norm": 0.0, - "learning_rate": 1.998642491231143e-05, - "loss": 1.2176, + "learning_rate": 1.998651064604671e-05, + "loss": 1.1653, "step": 1625 }, { - "epoch": 0.04614074914869466, + "epoch": 0.046076681118762224, "grad_norm": 0.0, - "learning_rate": 1.9986376997063163e-05, - "loss": 1.0624, + "learning_rate": 1.9986462949144245e-05, + "loss": 1.0946, "step": 1626 }, { - "epoch": 0.04616912599318956, + "epoch": 0.04610501856102468, "grad_norm": 0.0, - "learning_rate": 1.9986328997459523e-05, - "loss": 1.0152, + "learning_rate": 1.9986415168122094e-05, + "loss": 1.2324, "step": 1627 }, { - "epoch": 0.04619750283768445, + "epoch": 0.04613335600328714, "grad_norm": 0.0, - "learning_rate": 1.9986280913500907e-05, - "loss": 1.0501, + "learning_rate": 1.998636730298066e-05, + "loss": 1.1582, "step": 1628 }, { - "epoch": 0.04622587968217934, + "epoch": 0.04616169344554961, "grad_norm": 0.0, - "learning_rate": 1.998623274518773e-05, - "loss": 1.091, + "learning_rate": 1.9986319353720353e-05, + "loss": 1.1306, "step": 1629 }, { - "epoch": 0.04625425652667423, + "epoch": 0.04619003088781207, "grad_norm": 0.0, - "learning_rate": 1.99861844925204e-05, - "loss": 1.1566, + "learning_rate": 1.998627132034156e-05, + "loss": 1.1763, "step": 1630 }, { - "epoch": 0.04628263337116913, + "epoch": 0.046218368330074526, "grad_norm": 0.0, - "learning_rate": 1.9986136155499318e-05, - "loss": 1.0364, + "learning_rate": 1.99862232028447e-05, + "loss": 1.1631, "step": 1631 }, { - "epoch": 0.04631101021566402, + "epoch": 0.04624670577233699, "grad_norm": 0.0, - "learning_rate": 1.9986087734124895e-05, - "loss": 1.132, + "learning_rate": 1.9986175001230173e-05, + "loss": 1.0751, "step": 1632 }, { - "epoch": 0.04633938706015891, + "epoch": 0.04627504321459945, "grad_norm": 0.0, - "learning_rate": 1.9986039228397536e-05, - "loss": 1.0508, + "learning_rate": 1.9986126715498386e-05, + "loss": 1.1131, "step": 1633 }, { - "epoch": 0.0463677639046538, + "epoch": 0.04630338065686191, "grad_norm": 0.0, - "learning_rate": 1.9985990638317663e-05, - "loss": 1.1039, + "learning_rate": 1.998607834564975e-05, + "loss": 1.1818, "step": 1634 }, { - "epoch": 0.0463961407491487, + "epoch": 0.046331718099124376, "grad_norm": 0.0, - "learning_rate": 1.9985941963885672e-05, - "loss": 1.0591, + "learning_rate": 1.998602989168466e-05, + "loss": 1.1834, "step": 1635 }, { - "epoch": 0.046424517593643584, + "epoch": 0.046360055541386835, "grad_norm": 0.0, - "learning_rate": 1.998589320510198e-05, - "loss": 1.0598, + "learning_rate": 1.9985981353603536e-05, + "loss": 1.1015, "step": 1636 }, { - "epoch": 0.04645289443813848, + "epoch": 0.046388392983649294, "grad_norm": 0.0, - "learning_rate": 1.9985844361967005e-05, - "loss": 1.0894, + "learning_rate": 1.9985932731406782e-05, + "loss": 1.1154, "step": 1637 }, { - "epoch": 0.04648127128263337, + "epoch": 0.04641673042591176, "grad_norm": 0.0, - "learning_rate": 1.9985795434481146e-05, - "loss": 1.0312, + "learning_rate": 1.998588402509481e-05, + "loss": 1.132, "step": 1638 }, { - "epoch": 0.04650964812712826, + "epoch": 0.04644506786817422, "grad_norm": 0.0, - "learning_rate": 1.9985746422644833e-05, - "loss": 1.1414, + "learning_rate": 1.9985835234668025e-05, + "loss": 1.1398, "step": 1639 }, { - "epoch": 0.046538024971623154, + "epoch": 0.04647340531043668, "grad_norm": 0.0, - "learning_rate": 1.998569732645846e-05, - "loss": 1.2029, + "learning_rate": 1.9985786360126845e-05, + "loss": 1.0533, "step": 1640 }, { - "epoch": 0.04656640181611805, + "epoch": 0.046501742752699145, "grad_norm": 0.0, - "learning_rate": 1.9985648145922464e-05, - "loss": 0.9942, + "learning_rate": 1.9985737401471677e-05, + "loss": 1.1557, "step": 1641 }, { - "epoch": 0.04659477866061294, + "epoch": 0.046530080194961604, "grad_norm": 0.0, - "learning_rate": 1.9985598881037243e-05, - "loss": 1.138, + "learning_rate": 1.9985688358702933e-05, + "loss": 1.0483, "step": 1642 }, { - "epoch": 0.04662315550510783, + "epoch": 0.04655841763722406, "grad_norm": 0.0, - "learning_rate": 1.998554953180322e-05, - "loss": 1.0823, + "learning_rate": 1.9985639231821033e-05, + "loss": 1.1147, "step": 1643 }, { - "epoch": 0.046651532349602724, + "epoch": 0.04658675507948653, "grad_norm": 0.0, - "learning_rate": 1.9985500098220813e-05, - "loss": 0.9982, + "learning_rate": 1.9985590020826382e-05, + "loss": 1.0967, "step": 1644 }, { - "epoch": 0.04667990919409762, + "epoch": 0.04661509252174899, "grad_norm": 0.0, - "learning_rate": 1.9985450580290434e-05, - "loss": 1.1318, + "learning_rate": 1.9985540725719398e-05, + "loss": 1.1373, "step": 1645 }, { - "epoch": 0.046708286038592506, + "epoch": 0.04664342996401145, "grad_norm": 0.0, - "learning_rate": 1.9985400978012508e-05, - "loss": 1.1391, + "learning_rate": 1.9985491346500497e-05, + "loss": 1.1015, "step": 1646 }, { - "epoch": 0.0467366628830874, + "epoch": 0.046671767406273906, "grad_norm": 0.0, - "learning_rate": 1.998535129138745e-05, - "loss": 1.0183, + "learning_rate": 1.9985441883170096e-05, + "loss": 1.1761, "step": 1647 }, { - "epoch": 0.046765039727582294, + "epoch": 0.04670010484853637, "grad_norm": 0.0, - "learning_rate": 1.998530152041568e-05, - "loss": 1.1011, + "learning_rate": 1.998539233572861e-05, + "loss": 1.1247, "step": 1648 }, { - "epoch": 0.04679341657207718, + "epoch": 0.04672844229079883, "grad_norm": 0.0, - "learning_rate": 1.998525166509762e-05, - "loss": 1.0677, + "learning_rate": 1.998534270417645e-05, + "loss": 1.1121, "step": 1649 }, { - "epoch": 0.046821793416572076, + "epoch": 0.04675677973306129, "grad_norm": 0.0, - "learning_rate": 1.9985201725433693e-05, - "loss": 1.018, + "learning_rate": 1.9985292988514048e-05, + "loss": 1.065, "step": 1650 }, { - "epoch": 0.04685017026106697, + "epoch": 0.04678511717532376, "grad_norm": 0.0, - "learning_rate": 1.9985151701424317e-05, - "loss": 1.1255, + "learning_rate": 1.998524318874181e-05, + "loss": 1.1384, "step": 1651 }, { - "epoch": 0.046878547105561864, + "epoch": 0.046813454617586216, "grad_norm": 0.0, - "learning_rate": 1.9985101593069914e-05, - "loss": 1.0742, + "learning_rate": 1.9985193304860165e-05, + "loss": 1.1734, "step": 1652 }, { - "epoch": 0.04690692395005675, + "epoch": 0.046841792059848675, "grad_norm": 0.0, - "learning_rate": 1.998505140037091e-05, - "loss": 1.0602, + "learning_rate": 1.9985143336869527e-05, + "loss": 1.222, "step": 1653 }, { - "epoch": 0.046935300794551646, + "epoch": 0.04687012950211114, "grad_norm": 0.0, - "learning_rate": 1.9985001123327728e-05, - "loss": 1.0327, + "learning_rate": 1.998509328477032e-05, + "loss": 1.2401, "step": 1654 }, { - "epoch": 0.04696367763904654, + "epoch": 0.0468984669443736, "grad_norm": 0.0, - "learning_rate": 1.9984950761940792e-05, - "loss": 1.1322, + "learning_rate": 1.9985043148562962e-05, + "loss": 1.1404, "step": 1655 }, { - "epoch": 0.04699205448354143, + "epoch": 0.04692680438663606, "grad_norm": 0.0, - "learning_rate": 1.998490031621053e-05, - "loss": 1.1163, + "learning_rate": 1.998499292824788e-05, + "loss": 1.1152, "step": 1656 }, { - "epoch": 0.04702043132803632, + "epoch": 0.046955141828898525, "grad_norm": 0.0, - "learning_rate": 1.9984849786137363e-05, - "loss": 1.1312, + "learning_rate": 1.9984942623825495e-05, + "loss": 1.23, "step": 1657 }, { - "epoch": 0.047048808172531216, + "epoch": 0.046983479271160984, "grad_norm": 0.0, - "learning_rate": 1.9984799171721723e-05, - "loss": 1.1323, + "learning_rate": 1.9984892235296226e-05, + "loss": 1.0758, "step": 1658 }, { - "epoch": 0.0470771850170261, + "epoch": 0.04701181671342344, "grad_norm": 0.0, - "learning_rate": 1.9984748472964034e-05, - "loss": 0.9733, + "learning_rate": 1.9984841762660508e-05, + "loss": 1.1927, "step": 1659 }, { - "epoch": 0.047105561861521, + "epoch": 0.04704015415568591, "grad_norm": 0.0, - "learning_rate": 1.998469768986473e-05, - "loss": 0.97, + "learning_rate": 1.9984791205918755e-05, + "loss": 1.145, "step": 1660 }, { - "epoch": 0.04713393870601589, + "epoch": 0.04706849159794837, "grad_norm": 0.0, - "learning_rate": 1.998464682242423e-05, - "loss": 1.1909, + "learning_rate": 1.9984740565071405e-05, + "loss": 1.1656, "step": 1661 }, { - "epoch": 0.047162315550510786, + "epoch": 0.04709682904021083, "grad_norm": 0.0, - "learning_rate": 1.9984595870642972e-05, - "loss": 1.1464, + "learning_rate": 1.998468984011887e-05, + "loss": 1.2784, "step": 1662 }, { - "epoch": 0.04719069239500567, + "epoch": 0.047125166482473294, "grad_norm": 0.0, - "learning_rate": 1.9984544834521385e-05, - "loss": 1.0805, + "learning_rate": 1.9984639031061584e-05, + "loss": 1.1433, "step": 1663 }, { - "epoch": 0.04721906923950057, + "epoch": 0.04715350392473575, "grad_norm": 0.0, - "learning_rate": 1.9984493714059897e-05, - "loss": 1.0629, + "learning_rate": 1.998458813789998e-05, + "loss": 1.0957, "step": 1664 }, { - "epoch": 0.04724744608399546, + "epoch": 0.04718184136699821, "grad_norm": 0.0, - "learning_rate": 1.9984442509258942e-05, - "loss": 1.0893, + "learning_rate": 1.9984537160634482e-05, + "loss": 1.1377, "step": 1665 }, { - "epoch": 0.04727582292849035, + "epoch": 0.04721017880926068, "grad_norm": 0.0, - "learning_rate": 1.9984391220118953e-05, - "loss": 1.0031, + "learning_rate": 1.9984486099265522e-05, + "loss": 1.141, "step": 1666 }, { - "epoch": 0.04730419977298524, + "epoch": 0.04723851625152314, "grad_norm": 0.0, - "learning_rate": 1.9984339846640362e-05, - "loss": 1.0901, + "learning_rate": 1.9984434953793523e-05, + "loss": 1.0675, "step": 1667 }, { - "epoch": 0.04733257661748014, + "epoch": 0.047266853693785596, "grad_norm": 0.0, - "learning_rate": 1.9984288388823608e-05, - "loss": 1.0601, + "learning_rate": 1.9984383724218924e-05, + "loss": 1.0986, "step": 1668 }, { - "epoch": 0.047360953461975025, + "epoch": 0.04729519113604806, "grad_norm": 0.0, - "learning_rate": 1.9984236846669115e-05, - "loss": 1.1797, + "learning_rate": 1.9984332410542153e-05, + "loss": 1.1381, "step": 1669 }, { - "epoch": 0.04738933030646992, + "epoch": 0.04732352857831052, "grad_norm": 0.0, - "learning_rate": 1.9984185220177327e-05, - "loss": 1.1362, + "learning_rate": 1.998428101276364e-05, + "loss": 1.1016, "step": 1670 }, { - "epoch": 0.047417707150964813, + "epoch": 0.04735186602057298, "grad_norm": 0.0, - "learning_rate": 1.9984133509348678e-05, - "loss": 1.0934, + "learning_rate": 1.9984229530883822e-05, + "loss": 1.1666, "step": 1671 }, { - "epoch": 0.04744608399545971, + "epoch": 0.047380203462835446, "grad_norm": 0.0, - "learning_rate": 1.9984081714183604e-05, - "loss": 1.0096, + "learning_rate": 1.9984177964903133e-05, + "loss": 1.1497, "step": 1672 }, { - "epoch": 0.047474460839954595, + "epoch": 0.047408540905097905, "grad_norm": 0.0, - "learning_rate": 1.9984029834682543e-05, - "loss": 0.9875, + "learning_rate": 1.9984126314822002e-05, + "loss": 1.2125, "step": 1673 }, { - "epoch": 0.04750283768444949, + "epoch": 0.047436878347360364, "grad_norm": 0.0, - "learning_rate": 1.998397787084593e-05, - "loss": 1.0656, + "learning_rate": 1.998407458064087e-05, + "loss": 1.067, "step": 1674 }, { - "epoch": 0.047531214528944384, + "epoch": 0.04746521578962283, "grad_norm": 0.0, - "learning_rate": 1.998392582267421e-05, - "loss": 1.1882, + "learning_rate": 1.998402276236017e-05, + "loss": 1.1461, "step": 1675 }, { - "epoch": 0.04755959137343927, + "epoch": 0.04749355323188529, "grad_norm": 0.0, - "learning_rate": 1.9983873690167822e-05, - "loss": 1.0566, + "learning_rate": 1.998397085998034e-05, + "loss": 1.1654, "step": 1676 }, { - "epoch": 0.047587968217934165, + "epoch": 0.04752189067414775, "grad_norm": 0.0, - "learning_rate": 1.9983821473327202e-05, - "loss": 1.1548, + "learning_rate": 1.998391887350181e-05, + "loss": 1.1968, "step": 1677 }, { - "epoch": 0.04761634506242906, + "epoch": 0.047550228116410215, "grad_norm": 0.0, - "learning_rate": 1.9983769172152794e-05, - "loss": 1.1414, + "learning_rate": 1.9983866802925035e-05, + "loss": 1.2508, "step": 1678 }, { - "epoch": 0.04764472190692395, + "epoch": 0.047578565558672674, "grad_norm": 0.0, - "learning_rate": 1.9983716786645035e-05, - "loss": 1.0154, + "learning_rate": 1.9983814648250434e-05, + "loss": 1.1032, "step": 1679 }, { - "epoch": 0.04767309875141884, + "epoch": 0.04760690300093513, "grad_norm": 0.0, - "learning_rate": 1.9983664316804378e-05, - "loss": 1.0992, + "learning_rate": 1.9983762409478457e-05, + "loss": 1.0652, "step": 1680 }, { - "epoch": 0.047701475595913735, + "epoch": 0.0476352404431976, "grad_norm": 0.0, - "learning_rate": 1.9983611762631254e-05, - "loss": 1.0854, + "learning_rate": 1.998371008660954e-05, + "loss": 1.1792, "step": 1681 }, { - "epoch": 0.04772985244040863, + "epoch": 0.04766357788546006, "grad_norm": 0.0, - "learning_rate": 1.9983559124126116e-05, - "loss": 1.099, + "learning_rate": 1.9983657679644128e-05, + "loss": 1.1492, "step": 1682 }, { - "epoch": 0.04775822928490352, + "epoch": 0.04769191532772252, "grad_norm": 0.0, - "learning_rate": 1.9983506401289403e-05, - "loss": 1.0267, + "learning_rate": 1.9983605188582656e-05, + "loss": 1.1469, "step": 1683 }, { - "epoch": 0.04778660612939841, + "epoch": 0.04772025276998498, "grad_norm": 0.0, - "learning_rate": 1.9983453594121565e-05, - "loss": 1.0981, + "learning_rate": 1.9983552613425575e-05, + "loss": 1.0813, "step": 1684 }, { - "epoch": 0.047814982973893305, + "epoch": 0.04774859021224744, "grad_norm": 0.0, - "learning_rate": 1.9983400702623045e-05, - "loss": 0.9428, + "learning_rate": 1.998349995417332e-05, + "loss": 1.1306, "step": 1685 }, { - "epoch": 0.04784335981838819, + "epoch": 0.0477769276545099, "grad_norm": 0.0, - "learning_rate": 1.998334772679429e-05, - "loss": 1.1086, + "learning_rate": 1.9983447210826342e-05, + "loss": 1.2669, "step": 1686 }, { - "epoch": 0.04787173666288309, + "epoch": 0.04780526509677237, "grad_norm": 0.0, - "learning_rate": 1.9983294666635745e-05, - "loss": 1.1893, + "learning_rate": 1.998339438338508e-05, + "loss": 1.1578, "step": 1687 }, { - "epoch": 0.04790011350737798, + "epoch": 0.04783360253903483, "grad_norm": 0.0, - "learning_rate": 1.9983241522147864e-05, - "loss": 1.1312, + "learning_rate": 1.9983341471849974e-05, + "loss": 1.0731, "step": 1688 }, { - "epoch": 0.04792849035187287, + "epoch": 0.047861939981297286, "grad_norm": 0.0, - "learning_rate": 1.9983188293331097e-05, - "loss": 1.1901, + "learning_rate": 1.9983288476221482e-05, + "loss": 1.1475, "step": 1689 }, { - "epoch": 0.04795686719636776, + "epoch": 0.04789027742355975, "grad_norm": 0.0, - "learning_rate": 1.9983134980185884e-05, - "loss": 1.0831, + "learning_rate": 1.998323539650004e-05, + "loss": 1.062, "step": 1690 }, { - "epoch": 0.04798524404086266, + "epoch": 0.04791861486582221, "grad_norm": 0.0, - "learning_rate": 1.9983081582712684e-05, - "loss": 1.0482, + "learning_rate": 1.99831822326861e-05, + "loss": 1.1561, "step": 1691 }, { - "epoch": 0.04801362088535755, + "epoch": 0.04794695230808467, "grad_norm": 0.0, - "learning_rate": 1.9983028100911946e-05, - "loss": 1.0884, + "learning_rate": 1.998312898478011e-05, + "loss": 1.0126, "step": 1692 }, { - "epoch": 0.04804199772985244, + "epoch": 0.047975289750347136, "grad_norm": 0.0, - "learning_rate": 1.9982974534784118e-05, - "loss": 1.0925, + "learning_rate": 1.9983075652782516e-05, + "loss": 1.1917, "step": 1693 }, { - "epoch": 0.04807037457434733, + "epoch": 0.048003627192609595, "grad_norm": 0.0, - "learning_rate": 1.9982920884329654e-05, - "loss": 0.9237, + "learning_rate": 1.9983022236693767e-05, + "loss": 1.1014, "step": 1694 }, { - "epoch": 0.04809875141884223, + "epoch": 0.048031964634872054, "grad_norm": 0.0, - "learning_rate": 1.998286714954901e-05, - "loss": 1.0304, + "learning_rate": 1.998296873651432e-05, + "loss": 1.2932, "step": 1695 }, { - "epoch": 0.048127128263337114, + "epoch": 0.04806030207713452, "grad_norm": 0.0, - "learning_rate": 1.9982813330442644e-05, - "loss": 1.1534, + "learning_rate": 1.9982915152244617e-05, + "loss": 1.1955, "step": 1696 }, { - "epoch": 0.04815550510783201, + "epoch": 0.04808863951939698, "grad_norm": 0.0, - "learning_rate": 1.9982759427011e-05, - "loss": 1.1363, + "learning_rate": 1.9982861483885113e-05, + "loss": 1.1049, "step": 1697 }, { - "epoch": 0.0481838819523269, + "epoch": 0.04811697696165944, "grad_norm": 0.0, - "learning_rate": 1.9982705439254533e-05, - "loss": 1.0915, + "learning_rate": 1.9982807731436257e-05, + "loss": 1.0198, "step": 1698 }, { - "epoch": 0.04821225879682179, + "epoch": 0.048145314403921904, "grad_norm": 0.0, - "learning_rate": 1.9982651367173712e-05, - "loss": 0.9594, + "learning_rate": 1.9982753894898507e-05, + "loss": 1.013, "step": 1699 }, { - "epoch": 0.048240635641316684, + "epoch": 0.048173651846184364, "grad_norm": 0.0, - "learning_rate": 1.9982597210768986e-05, - "loss": 1.0999, + "learning_rate": 1.9982699974272314e-05, + "loss": 1.018, "step": 1700 }, { - "epoch": 0.04826901248581158, + "epoch": 0.04820198928844682, "grad_norm": 0.0, - "learning_rate": 1.998254297004081e-05, - "loss": 1.045, + "learning_rate": 1.998264596955813e-05, + "loss": 1.1485, "step": 1701 }, { - "epoch": 0.04829738933030647, + "epoch": 0.04823032673070929, "grad_norm": 0.0, - "learning_rate": 1.9982488644989647e-05, - "loss": 1.028, + "learning_rate": 1.9982591880756418e-05, + "loss": 1.1277, "step": 1702 }, { - "epoch": 0.04832576617480136, + "epoch": 0.04825866417297175, "grad_norm": 0.0, - "learning_rate": 1.9982434235615953e-05, - "loss": 1.1301, + "learning_rate": 1.998253770786762e-05, + "loss": 1.1297, "step": 1703 }, { - "epoch": 0.048354143019296254, + "epoch": 0.04828700161523421, "grad_norm": 0.0, - "learning_rate": 1.9982379741920187e-05, - "loss": 1.1125, + "learning_rate": 1.9982483450892206e-05, + "loss": 1.135, "step": 1704 }, { - "epoch": 0.04838251986379115, + "epoch": 0.04831533905749667, "grad_norm": 0.0, - "learning_rate": 1.9982325163902812e-05, - "loss": 1.014, + "learning_rate": 1.9982429109830625e-05, + "loss": 1.1642, "step": 1705 }, { - "epoch": 0.048410896708286036, + "epoch": 0.04834367649975913, "grad_norm": 0.0, - "learning_rate": 1.9982270501564286e-05, - "loss": 1.1206, + "learning_rate": 1.9982374684683337e-05, + "loss": 1.0823, "step": 1706 }, { - "epoch": 0.04843927355278093, + "epoch": 0.04837201394202159, "grad_norm": 0.0, - "learning_rate": 1.998221575490507e-05, - "loss": 1.0394, + "learning_rate": 1.9982320175450798e-05, + "loss": 1.1767, "step": 1707 }, { - "epoch": 0.048467650397275824, + "epoch": 0.04840035138428406, "grad_norm": 0.0, - "learning_rate": 1.9982160923925633e-05, - "loss": 1.0919, + "learning_rate": 1.998226558213347e-05, + "loss": 1.1535, "step": 1708 }, { - "epoch": 0.04849602724177071, + "epoch": 0.048428688826546516, "grad_norm": 0.0, - "learning_rate": 1.9982106008626434e-05, - "loss": 1.0678, + "learning_rate": 1.9982210904731812e-05, + "loss": 1.2077, "step": 1709 }, { - "epoch": 0.048524404086265606, + "epoch": 0.048457026268808975, "grad_norm": 0.0, - "learning_rate": 1.9982051009007937e-05, - "loss": 1.0738, + "learning_rate": 1.9982156143246288e-05, + "loss": 1.0996, "step": 1710 }, { - "epoch": 0.0485527809307605, + "epoch": 0.04848536371107144, "grad_norm": 0.0, - "learning_rate": 1.9981995925070607e-05, - "loss": 1.1398, + "learning_rate": 1.998210129767735e-05, + "loss": 1.0905, "step": 1711 }, { - "epoch": 0.048581157775255394, + "epoch": 0.0485137011533339, "grad_norm": 0.0, - "learning_rate": 1.9981940756814904e-05, - "loss": 1.1462, + "learning_rate": 1.998204636802547e-05, + "loss": 1.0293, "step": 1712 }, { - "epoch": 0.04860953461975028, + "epoch": 0.04854203859559636, "grad_norm": 0.0, - "learning_rate": 1.99818855042413e-05, - "loss": 1.154, + "learning_rate": 1.998199135429111e-05, + "loss": 1.1018, "step": 1713 }, { - "epoch": 0.048637911464245176, + "epoch": 0.048570376037858826, "grad_norm": 0.0, - "learning_rate": 1.9981830167350262e-05, - "loss": 1.1026, + "learning_rate": 1.9981936256474727e-05, + "loss": 1.1817, "step": 1714 }, { - "epoch": 0.04866628830874007, + "epoch": 0.048598713480121285, "grad_norm": 0.0, - "learning_rate": 1.9981774746142254e-05, - "loss": 1.2369, + "learning_rate": 1.9981881074576786e-05, + "loss": 1.1575, "step": 1715 }, { - "epoch": 0.04869466515323496, + "epoch": 0.048627050922383744, "grad_norm": 0.0, - "learning_rate": 1.998171924061775e-05, - "loss": 1.0884, + "learning_rate": 1.9981825808597757e-05, + "loss": 1.1098, "step": 1716 }, { - "epoch": 0.04872304199772985, + "epoch": 0.04865538836464621, "grad_norm": 0.0, - "learning_rate": 1.9981663650777213e-05, - "loss": 1.0955, + "learning_rate": 1.99817704585381e-05, + "loss": 1.1764, "step": 1717 }, { - "epoch": 0.048751418842224746, + "epoch": 0.04868372580690867, "grad_norm": 0.0, - "learning_rate": 1.9981607976621115e-05, - "loss": 1.0678, + "learning_rate": 1.9981715024398286e-05, + "loss": 1.1432, "step": 1718 }, { - "epoch": 0.04877979568671963, + "epoch": 0.04871206324917113, "grad_norm": 0.0, - "learning_rate": 1.9981552218149924e-05, - "loss": 1.0985, + "learning_rate": 1.9981659506178778e-05, + "loss": 1.1265, "step": 1719 }, { - "epoch": 0.04880817253121453, + "epoch": 0.048740400691433594, "grad_norm": 0.0, - "learning_rate": 1.9981496375364115e-05, - "loss": 1.047, + "learning_rate": 1.9981603903880046e-05, + "loss": 1.1039, "step": 1720 }, { - "epoch": 0.04883654937570942, + "epoch": 0.04876873813369605, "grad_norm": 0.0, - "learning_rate": 1.9981440448264155e-05, - "loss": 1.0356, + "learning_rate": 1.998154821750256e-05, + "loss": 0.9821, "step": 1721 }, { - "epoch": 0.048864926220204316, + "epoch": 0.04879707557595851, "grad_norm": 0.0, - "learning_rate": 1.9981384436850518e-05, - "loss": 0.9744, + "learning_rate": 1.998149244704678e-05, + "loss": 1.151, "step": 1722 }, { - "epoch": 0.0488933030646992, + "epoch": 0.04882541301822098, "grad_norm": 0.0, - "learning_rate": 1.998132834112368e-05, - "loss": 1.1263, + "learning_rate": 1.9981436592513194e-05, + "loss": 1.0789, "step": 1723 }, { - "epoch": 0.0489216799091941, + "epoch": 0.04885375046048344, "grad_norm": 0.0, - "learning_rate": 1.9981272161084113e-05, - "loss": 0.9855, + "learning_rate": 1.9981380653902253e-05, + "loss": 1.2288, "step": 1724 }, { - "epoch": 0.04895005675368899, + "epoch": 0.0488820879027459, "grad_norm": 0.0, - "learning_rate": 1.9981215896732292e-05, - "loss": 1.1915, + "learning_rate": 1.9981324631214435e-05, + "loss": 1.1392, "step": 1725 }, { - "epoch": 0.04897843359818388, + "epoch": 0.04891042534500836, "grad_norm": 0.0, - "learning_rate": 1.998115954806869e-05, - "loss": 1.1288, + "learning_rate": 1.9981268524450218e-05, + "loss": 1.0779, "step": 1726 }, { - "epoch": 0.04900681044267877, + "epoch": 0.04893876278727082, "grad_norm": 0.0, - "learning_rate": 1.9981103115093786e-05, - "loss": 1.1508, + "learning_rate": 1.9981212333610066e-05, + "loss": 1.1175, "step": 1727 }, { - "epoch": 0.04903518728717367, + "epoch": 0.04896710022953328, "grad_norm": 0.0, - "learning_rate": 1.9981046597808054e-05, - "loss": 1.0825, + "learning_rate": 1.9981156058694458e-05, + "loss": 1.0591, "step": 1728 }, { - "epoch": 0.049063564131668555, + "epoch": 0.04899543767179575, "grad_norm": 0.0, - "learning_rate": 1.9980989996211974e-05, - "loss": 0.9093, + "learning_rate": 1.9981099699703866e-05, + "loss": 1.0387, "step": 1729 }, { - "epoch": 0.04909194097616345, + "epoch": 0.049023775114058206, "grad_norm": 0.0, - "learning_rate": 1.998093331030602e-05, - "loss": 1.1195, + "learning_rate": 1.9981043256638767e-05, + "loss": 1.0138, "step": 1730 }, { - "epoch": 0.049120317820658344, + "epoch": 0.049052112556320665, "grad_norm": 0.0, - "learning_rate": 1.9980876540090675e-05, - "loss": 1.2018, + "learning_rate": 1.998098672949963e-05, + "loss": 1.0895, "step": 1731 }, { - "epoch": 0.04914869466515324, + "epoch": 0.04908044999858313, "grad_norm": 0.0, - "learning_rate": 1.9980819685566416e-05, - "loss": 1.0881, + "learning_rate": 1.9980930118286937e-05, + "loss": 1.1282, "step": 1732 }, { - "epoch": 0.049177071509648125, + "epoch": 0.04910878744084559, "grad_norm": 0.0, - "learning_rate": 1.998076274673373e-05, - "loss": 1.0943, + "learning_rate": 1.9980873423001162e-05, + "loss": 1.1985, "step": 1733 }, { - "epoch": 0.04920544835414302, + "epoch": 0.04913712488310805, "grad_norm": 0.0, - "learning_rate": 1.9980705723593088e-05, - "loss": 1.0335, + "learning_rate": 1.9980816643642787e-05, + "loss": 0.9883, "step": 1734 }, { - "epoch": 0.049233825198637914, + "epoch": 0.049165462325370515, "grad_norm": 0.0, - "learning_rate": 1.9980648616144974e-05, - "loss": 1.1083, + "learning_rate": 1.9980759780212288e-05, + "loss": 1.0941, "step": 1735 }, { - "epoch": 0.0492622020431328, + "epoch": 0.049193799767632974, "grad_norm": 0.0, - "learning_rate": 1.9980591424389878e-05, - "loss": 1.0764, + "learning_rate": 1.9980702832710137e-05, + "loss": 1.1881, "step": 1736 }, { - "epoch": 0.049290578887627695, + "epoch": 0.049222137209895434, "grad_norm": 0.0, - "learning_rate": 1.9980534148328275e-05, - "loss": 1.0922, + "learning_rate": 1.9980645801136826e-05, + "loss": 1.0761, "step": 1737 }, { - "epoch": 0.04931895573212259, + "epoch": 0.0492504746521579, "grad_norm": 0.0, - "learning_rate": 1.998047678796065e-05, - "loss": 1.1416, + "learning_rate": 1.9980588685492824e-05, + "loss": 1.0646, "step": 1738 }, { - "epoch": 0.04934733257661748, + "epoch": 0.04927881209442036, "grad_norm": 0.0, - "learning_rate": 1.998041934328749e-05, - "loss": 1.0356, + "learning_rate": 1.9980531485778624e-05, + "loss": 1.1684, "step": 1739 }, { - "epoch": 0.04937570942111237, + "epoch": 0.04930714953668282, "grad_norm": 0.0, - "learning_rate": 1.9980361814309282e-05, - "loss": 1.016, + "learning_rate": 1.9980474201994693e-05, + "loss": 0.9842, "step": 1740 }, { - "epoch": 0.049404086265607265, + "epoch": 0.049335486978945284, "grad_norm": 0.0, - "learning_rate": 1.9980304201026506e-05, - "loss": 1.0849, + "learning_rate": 1.9980416834141526e-05, + "loss": 1.1026, "step": 1741 }, { - "epoch": 0.04943246311010216, + "epoch": 0.04936382442120774, "grad_norm": 0.0, - "learning_rate": 1.998024650343965e-05, - "loss": 1.0447, + "learning_rate": 1.9980359382219603e-05, + "loss": 1.1242, "step": 1742 }, { - "epoch": 0.04946083995459705, + "epoch": 0.0493921618634702, "grad_norm": 0.0, - "learning_rate": 1.9980188721549207e-05, - "loss": 1.1741, + "learning_rate": 1.9980301846229406e-05, + "loss": 0.9794, "step": 1743 }, { - "epoch": 0.04948921679909194, + "epoch": 0.04942049930573267, "grad_norm": 0.0, - "learning_rate": 1.9980130855355657e-05, - "loss": 1.1127, + "learning_rate": 1.998024422617142e-05, + "loss": 1.1716, "step": 1744 }, { - "epoch": 0.049517593643586835, + "epoch": 0.04944883674799513, "grad_norm": 0.0, - "learning_rate": 1.9980072904859496e-05, - "loss": 1.0802, + "learning_rate": 1.998018652204613e-05, + "loss": 1.1234, "step": 1745 }, { - "epoch": 0.04954597048808172, + "epoch": 0.049477174190257586, "grad_norm": 0.0, - "learning_rate": 1.9980014870061212e-05, - "loss": 1.1255, + "learning_rate": 1.9980128733854026e-05, + "loss": 1.1419, "step": 1746 }, { - "epoch": 0.04957434733257662, + "epoch": 0.049505511632520045, "grad_norm": 0.0, - "learning_rate": 1.9979956750961293e-05, - "loss": 1.0358, + "learning_rate": 1.9980070861595585e-05, + "loss": 1.1511, "step": 1747 }, { - "epoch": 0.04960272417707151, + "epoch": 0.04953384907478251, "grad_norm": 0.0, - "learning_rate": 1.9979898547560227e-05, - "loss": 0.9149, + "learning_rate": 1.9980012905271305e-05, + "loss": 1.1447, "step": 1748 }, { - "epoch": 0.0496311010215664, + "epoch": 0.04956218651704497, "grad_norm": 0.0, - "learning_rate": 1.997984025985851e-05, - "loss": 1.0429, + "learning_rate": 1.9979954864881672e-05, + "loss": 1.1773, "step": 1749 }, { - "epoch": 0.04965947786606129, + "epoch": 0.04959052395930743, "grad_norm": 0.0, - "learning_rate": 1.9979781887856638e-05, - "loss": 1.1014, + "learning_rate": 1.9979896740427173e-05, + "loss": 1.0789, "step": 1750 }, { - "epoch": 0.04968785471055619, + "epoch": 0.049618861401569896, "grad_norm": 0.0, - "learning_rate": 1.9979723431555098e-05, - "loss": 1.1538, + "learning_rate": 1.9979838531908297e-05, + "loss": 1.1811, "step": 1751 }, { - "epoch": 0.04971623155505108, + "epoch": 0.049647198843832355, "grad_norm": 0.0, - "learning_rate": 1.9979664890954383e-05, - "loss": 1.0909, + "learning_rate": 1.9979780239325534e-05, + "loss": 1.032, "step": 1752 }, { - "epoch": 0.04974460839954597, + "epoch": 0.049675536286094814, "grad_norm": 0.0, - "learning_rate": 1.997960626605499e-05, - "loss": 1.0879, + "learning_rate": 1.9979721862679376e-05, + "loss": 1.1744, "step": 1753 }, { - "epoch": 0.04977298524404086, + "epoch": 0.04970387372835728, "grad_norm": 0.0, - "learning_rate": 1.997954755685742e-05, - "loss": 0.9962, + "learning_rate": 1.9979663401970317e-05, + "loss": 1.1508, "step": 1754 }, { - "epoch": 0.04980136208853576, + "epoch": 0.04973221117061974, "grad_norm": 0.0, - "learning_rate": 1.997948876336216e-05, - "loss": 1.0319, + "learning_rate": 1.9979604857198845e-05, + "loss": 1.0338, "step": 1755 }, { - "epoch": 0.049829738933030644, + "epoch": 0.0497605486128822, "grad_norm": 0.0, - "learning_rate": 1.9979429885569707e-05, - "loss": 1.0741, + "learning_rate": 1.9979546228365456e-05, + "loss": 1.1589, "step": 1756 }, { - "epoch": 0.04985811577752554, + "epoch": 0.049788886055144664, "grad_norm": 0.0, - "learning_rate": 1.9979370923480563e-05, - "loss": 1.1923, + "learning_rate": 1.9979487515470647e-05, + "loss": 1.0867, "step": 1757 }, { - "epoch": 0.04988649262202043, + "epoch": 0.04981722349740712, "grad_norm": 0.0, - "learning_rate": 1.9979311877095228e-05, - "loss": 1.171, + "learning_rate": 1.9979428718514905e-05, + "loss": 1.0386, "step": 1758 }, { - "epoch": 0.04991486946651532, + "epoch": 0.04984556093966958, "grad_norm": 0.0, - "learning_rate": 1.997925274641419e-05, - "loss": 1.1318, + "learning_rate": 1.997936983749873e-05, + "loss": 1.1689, "step": 1759 }, { - "epoch": 0.049943246311010214, + "epoch": 0.04987389838193205, "grad_norm": 0.0, - "learning_rate": 1.9979193531437962e-05, - "loss": 1.1082, + "learning_rate": 1.9979310872422615e-05, + "loss": 1.125, "step": 1760 }, { - "epoch": 0.04997162315550511, + "epoch": 0.04990223582419451, "grad_norm": 0.0, - "learning_rate": 1.997913423216703e-05, - "loss": 1.1227, + "learning_rate": 1.997925182328706e-05, + "loss": 1.0927, "step": 1761 }, { - "epoch": 0.05, + "epoch": 0.049930573266456967, "grad_norm": 0.0, - "learning_rate": 1.997907484860191e-05, - "loss": 1.1214, + "learning_rate": 1.9979192690092563e-05, + "loss": 1.079, "step": 1762 }, { - "epoch": 0.05002837684449489, + "epoch": 0.04995891070871943, "grad_norm": 0.0, - "learning_rate": 1.9979015380743093e-05, - "loss": 1.1306, + "learning_rate": 1.997913347283962e-05, + "loss": 1.1627, "step": 1763 }, { - "epoch": 0.050056753688989784, + "epoch": 0.04998724815098189, "grad_norm": 0.0, - "learning_rate": 1.9978955828591085e-05, - "loss": 1.1901, + "learning_rate": 1.997907417152873e-05, + "loss": 1.2574, "step": 1764 }, { - "epoch": 0.05008513053348468, + "epoch": 0.05001558559324435, "grad_norm": 0.0, - "learning_rate": 1.997889619214639e-05, - "loss": 0.9865, + "learning_rate": 1.997901478616039e-05, + "loss": 1.1503, "step": 1765 }, { - "epoch": 0.050113507377979566, + "epoch": 0.05004392303550682, "grad_norm": 0.0, - "learning_rate": 1.9978836471409506e-05, - "loss": 1.1825, + "learning_rate": 1.9978955316735106e-05, + "loss": 1.1519, "step": 1766 }, { - "epoch": 0.05014188422247446, + "epoch": 0.050072260477769276, "grad_norm": 0.0, - "learning_rate": 1.9978776666380945e-05, - "loss": 0.9525, + "learning_rate": 1.9978895763253375e-05, + "loss": 1.1204, "step": 1767 }, { - "epoch": 0.050170261066969354, + "epoch": 0.050100597920031735, "grad_norm": 0.0, - "learning_rate": 1.997871677706121e-05, - "loss": 0.9607, + "learning_rate": 1.99788361257157e-05, + "loss": 1.0301, "step": 1768 }, { - "epoch": 0.05019863791146425, + "epoch": 0.0501289353622942, "grad_norm": 0.0, - "learning_rate": 1.9978656803450804e-05, - "loss": 0.9821, + "learning_rate": 1.997877640412258e-05, + "loss": 1.1121, "step": 1769 }, { - "epoch": 0.050227014755959136, + "epoch": 0.05015727280455666, "grad_norm": 0.0, - "learning_rate": 1.9978596745550236e-05, - "loss": 1.1352, + "learning_rate": 1.9978716598474523e-05, + "loss": 1.0607, "step": 1770 }, { - "epoch": 0.05025539160045403, + "epoch": 0.05018561024681912, "grad_norm": 0.0, - "learning_rate": 1.997853660336001e-05, - "loss": 1.1225, + "learning_rate": 1.9978656708772032e-05, + "loss": 1.0575, "step": 1771 }, { - "epoch": 0.050283768444948924, + "epoch": 0.050213947689081585, "grad_norm": 0.0, - "learning_rate": 1.997847637688064e-05, - "loss": 1.0872, + "learning_rate": 1.9978596735015606e-05, + "loss": 1.1725, "step": 1772 }, { - "epoch": 0.05031214528944381, + "epoch": 0.050242285131344044, "grad_norm": 0.0, - "learning_rate": 1.9978416066112628e-05, - "loss": 1.1976, + "learning_rate": 1.9978536677205756e-05, + "loss": 1.1834, "step": 1773 }, { - "epoch": 0.050340522133938706, + "epoch": 0.050270622573606503, "grad_norm": 0.0, - "learning_rate": 1.997835567105649e-05, - "loss": 1.1021, + "learning_rate": 1.9978476535342986e-05, + "loss": 1.1149, "step": 1774 }, { - "epoch": 0.0503688989784336, + "epoch": 0.05029896001586897, "grad_norm": 0.0, - "learning_rate": 1.9978295191712736e-05, - "loss": 1.115, + "learning_rate": 1.9978416309427806e-05, + "loss": 1.1801, "step": 1775 }, { - "epoch": 0.05039727582292849, + "epoch": 0.05032729745813143, "grad_norm": 0.0, - "learning_rate": 1.997823462808187e-05, - "loss": 1.0061, + "learning_rate": 1.9978355999460716e-05, + "loss": 1.1351, "step": 1776 }, { - "epoch": 0.05042565266742338, + "epoch": 0.05035563490039389, "grad_norm": 0.0, - "learning_rate": 1.997817398016441e-05, - "loss": 1.1981, + "learning_rate": 1.997829560544223e-05, + "loss": 1.1759, "step": 1777 }, { - "epoch": 0.050454029511918276, + "epoch": 0.050383972342656354, "grad_norm": 0.0, - "learning_rate": 1.9978113247960862e-05, - "loss": 1.1353, + "learning_rate": 1.9978235127372854e-05, + "loss": 1.1715, "step": 1778 }, { - "epoch": 0.05048240635641317, + "epoch": 0.05041230978491881, "grad_norm": 0.0, - "learning_rate": 1.9978052431471742e-05, - "loss": 1.1035, + "learning_rate": 1.9978174565253096e-05, + "loss": 1.157, "step": 1779 }, { - "epoch": 0.05051078320090806, + "epoch": 0.05044064722718127, "grad_norm": 0.0, - "learning_rate": 1.9977991530697566e-05, - "loss": 1.0378, + "learning_rate": 1.9978113919083474e-05, + "loss": 1.2176, "step": 1780 }, { - "epoch": 0.05053916004540295, + "epoch": 0.05046898466944374, "grad_norm": 0.0, - "learning_rate": 1.997793054563885e-05, - "loss": 1.0845, + "learning_rate": 1.997805318886449e-05, + "loss": 1.1417, "step": 1781 }, { - "epoch": 0.050567536889897846, + "epoch": 0.0504973221117062, "grad_norm": 0.0, - "learning_rate": 1.9977869476296105e-05, - "loss": 0.9818, + "learning_rate": 1.9977992374596657e-05, + "loss": 1.1234, "step": 1782 }, { - "epoch": 0.05059591373439273, + "epoch": 0.050525659553968656, "grad_norm": 0.0, - "learning_rate": 1.9977808322669848e-05, - "loss": 1.0648, + "learning_rate": 1.9977931476280492e-05, + "loss": 1.1135, "step": 1783 }, { - "epoch": 0.05062429057888763, + "epoch": 0.05055399699623112, "grad_norm": 0.0, - "learning_rate": 1.9977747084760594e-05, - "loss": 1.0585, + "learning_rate": 1.99778704939165e-05, + "loss": 1.1416, "step": 1784 }, { - "epoch": 0.05065266742338252, + "epoch": 0.05058233443849358, "grad_norm": 0.0, - "learning_rate": 1.997768576256886e-05, - "loss": 1.0356, + "learning_rate": 1.9977809427505204e-05, + "loss": 1.2223, "step": 1785 }, { - "epoch": 0.05068104426787741, + "epoch": 0.05061067188075604, "grad_norm": 0.0, - "learning_rate": 1.997762435609517e-05, - "loss": 1.0701, + "learning_rate": 1.9977748277047114e-05, + "loss": 1.217, "step": 1786 }, { - "epoch": 0.050709421112372303, + "epoch": 0.050639009323018506, "grad_norm": 0.0, - "learning_rate": 1.997756286534004e-05, - "loss": 1.143, + "learning_rate": 1.9977687042542743e-05, + "loss": 1.0527, "step": 1787 }, { - "epoch": 0.0507377979568672, + "epoch": 0.050667346765280966, "grad_norm": 0.0, - "learning_rate": 1.9977501290303984e-05, - "loss": 1.0034, + "learning_rate": 1.9977625723992614e-05, + "loss": 1.1157, "step": 1788 }, { - "epoch": 0.05076617480136209, + "epoch": 0.050695684207543425, "grad_norm": 0.0, - "learning_rate": 1.9977439630987527e-05, - "loss": 1.172, + "learning_rate": 1.9977564321397234e-05, + "loss": 1.0002, "step": 1789 }, { - "epoch": 0.05079455164585698, + "epoch": 0.05072402164980589, "grad_norm": 0.0, - "learning_rate": 1.997737788739119e-05, - "loss": 1.0299, + "learning_rate": 1.9977502834757124e-05, + "loss": 1.2269, "step": 1790 }, { - "epoch": 0.050822928490351874, + "epoch": 0.05075235909206835, "grad_norm": 0.0, - "learning_rate": 1.997731605951549e-05, - "loss": 1.0405, + "learning_rate": 1.9977441264072803e-05, + "loss": 1.1353, "step": 1791 }, { - "epoch": 0.05085130533484677, + "epoch": 0.05078069653433081, "grad_norm": 0.0, - "learning_rate": 1.9977254147360958e-05, - "loss": 1.1224, + "learning_rate": 1.997737960934479e-05, + "loss": 1.1912, "step": 1792 }, { - "epoch": 0.050879682179341655, + "epoch": 0.050809033976593275, "grad_norm": 0.0, - "learning_rate": 1.997719215092811e-05, - "loss": 1.1185, + "learning_rate": 1.9977317870573605e-05, + "loss": 1.2416, "step": 1793 }, { - "epoch": 0.05090805902383655, + "epoch": 0.050837371418855734, "grad_norm": 0.0, - "learning_rate": 1.997713007021747e-05, - "loss": 1.0739, + "learning_rate": 1.9977256047759765e-05, + "loss": 1.0607, "step": 1794 }, { - "epoch": 0.050936435868331444, + "epoch": 0.05086570886111819, "grad_norm": 0.0, - "learning_rate": 1.997706790522956e-05, - "loss": 1.133, + "learning_rate": 1.997719414090379e-05, + "loss": 1.176, "step": 1795 }, { - "epoch": 0.05096481271282633, + "epoch": 0.05089404630338066, "grad_norm": 0.0, - "learning_rate": 1.9977005655964916e-05, - "loss": 1.1054, + "learning_rate": 1.997713215000621e-05, + "loss": 1.0828, "step": 1796 }, { - "epoch": 0.050993189557321225, + "epoch": 0.05092238374564312, "grad_norm": 0.0, - "learning_rate": 1.9976943322424053e-05, - "loss": 1.0506, + "learning_rate": 1.9977070075067536e-05, + "loss": 1.1515, "step": 1797 }, { - "epoch": 0.05102156640181612, + "epoch": 0.05095072118790558, "grad_norm": 0.0, - "learning_rate": 1.99768809046075e-05, - "loss": 1.0236, + "learning_rate": 1.99770079160883e-05, + "loss": 1.1667, "step": 1798 }, { - "epoch": 0.051049943246311014, + "epoch": 0.05097905863016804, "grad_norm": 0.0, - "learning_rate": 1.997681840251579e-05, - "loss": 1.1409, + "learning_rate": 1.9976945673069017e-05, + "loss": 1.0558, "step": 1799 }, { - "epoch": 0.0510783200908059, + "epoch": 0.0510073960724305, "grad_norm": 0.0, - "learning_rate": 1.9976755816149443e-05, - "loss": 1.1895, + "learning_rate": 1.997688334601022e-05, + "loss": 1.0997, "step": 1800 }, { - "epoch": 0.051106696935300795, + "epoch": 0.05103573351469296, "grad_norm": 0.0, - "learning_rate": 1.9976693145508994e-05, - "loss": 1.0565, + "learning_rate": 1.9976820934912425e-05, + "loss": 1.2069, "step": 1801 }, { - "epoch": 0.05113507377979569, + "epoch": 0.05106407095695543, "grad_norm": 0.0, - "learning_rate": 1.9976630390594966e-05, - "loss": 1.0639, + "learning_rate": 1.9976758439776166e-05, + "loss": 1.1555, "step": 1802 }, { - "epoch": 0.05116345062429058, + "epoch": 0.05109240839921789, "grad_norm": 0.0, - "learning_rate": 1.9976567551407896e-05, - "loss": 1.1689, + "learning_rate": 1.9976695860601962e-05, + "loss": 1.1417, "step": 1803 }, { - "epoch": 0.05119182746878547, + "epoch": 0.051120745841480346, "grad_norm": 0.0, - "learning_rate": 1.9976504627948308e-05, - "loss": 1.0856, + "learning_rate": 1.9976633197390347e-05, + "loss": 1.0876, "step": 1804 }, { - "epoch": 0.051220204313280365, + "epoch": 0.05114908328374281, "grad_norm": 0.0, - "learning_rate": 1.997644162021674e-05, - "loss": 0.9677, + "learning_rate": 1.9976570450141845e-05, + "loss": 1.0375, "step": 1805 }, { - "epoch": 0.05124858115777525, + "epoch": 0.05117742072600527, "grad_norm": 0.0, - "learning_rate": 1.997637852821372e-05, - "loss": 0.9794, + "learning_rate": 1.9976507618856986e-05, + "loss": 1.1278, "step": 1806 }, { - "epoch": 0.05127695800227015, + "epoch": 0.05120575816826773, "grad_norm": 0.0, - "learning_rate": 1.9976315351939782e-05, - "loss": 1.1109, + "learning_rate": 1.9976444703536297e-05, + "loss": 1.1155, "step": 1807 }, { - "epoch": 0.05130533484676504, + "epoch": 0.051234095610530196, "grad_norm": 0.0, - "learning_rate": 1.997625209139546e-05, - "loss": 1.097, + "learning_rate": 1.997638170418031e-05, + "loss": 1.091, "step": 1808 }, { - "epoch": 0.051333711691259935, + "epoch": 0.051262433052792655, "grad_norm": 0.0, - "learning_rate": 1.9976188746581288e-05, - "loss": 1.2153, + "learning_rate": 1.997631862078956e-05, + "loss": 1.103, "step": 1809 }, { - "epoch": 0.05136208853575482, + "epoch": 0.051290770495055114, "grad_norm": 0.0, - "learning_rate": 1.99761253174978e-05, - "loss": 1.0952, + "learning_rate": 1.9976255453364567e-05, + "loss": 1.1281, "step": 1810 }, { - "epoch": 0.05139046538024972, + "epoch": 0.05131910793731758, "grad_norm": 0.0, - "learning_rate": 1.9976061804145535e-05, - "loss": 1.0575, + "learning_rate": 1.9976192201905877e-05, + "loss": 1.0823, "step": 1811 }, { - "epoch": 0.05141884222474461, + "epoch": 0.05134744537958004, "grad_norm": 0.0, - "learning_rate": 1.9975998206525024e-05, - "loss": 1.1812, + "learning_rate": 1.997612886641401e-05, + "loss": 1.0674, "step": 1812 }, { - "epoch": 0.0514472190692395, + "epoch": 0.0513757828218425, "grad_norm": 0.0, - "learning_rate": 1.997593452463681e-05, - "loss": 1.1626, + "learning_rate": 1.9976065446889505e-05, + "loss": 1.2554, "step": 1813 }, { - "epoch": 0.05147559591373439, + "epoch": 0.051404120264104965, "grad_norm": 0.0, - "learning_rate": 1.997587075848143e-05, - "loss": 1.0648, + "learning_rate": 1.9976001943332898e-05, + "loss": 1.1699, "step": 1814 }, { - "epoch": 0.05150397275822929, + "epoch": 0.051432457706367424, "grad_norm": 0.0, - "learning_rate": 1.997580690805942e-05, - "loss": 0.9951, + "learning_rate": 1.997593835574472e-05, + "loss": 1.14, "step": 1815 }, { - "epoch": 0.051532349602724174, + "epoch": 0.05146079514862988, "grad_norm": 0.0, - "learning_rate": 1.997574297337132e-05, - "loss": 1.0155, + "learning_rate": 1.997587468412551e-05, + "loss": 1.1084, "step": 1816 }, { - "epoch": 0.05156072644721907, + "epoch": 0.05148913259089235, "grad_norm": 0.0, - "learning_rate": 1.9975678954417672e-05, - "loss": 1.0792, + "learning_rate": 1.9975810928475806e-05, + "loss": 1.0619, "step": 1817 }, { - "epoch": 0.05158910329171396, + "epoch": 0.05151747003315481, "grad_norm": 0.0, - "learning_rate": 1.9975614851199015e-05, - "loss": 1.056, + "learning_rate": 1.997574708879614e-05, + "loss": 1.1281, "step": 1818 }, { - "epoch": 0.05161748013620886, + "epoch": 0.05154580747541727, "grad_norm": 0.0, - "learning_rate": 1.997555066371589e-05, - "loss": 1.0553, + "learning_rate": 1.997568316508705e-05, + "loss": 1.1306, "step": 1819 }, { - "epoch": 0.051645856980703744, + "epoch": 0.05157414491767973, "grad_norm": 0.0, - "learning_rate": 1.997548639196884e-05, - "loss": 1.2516, + "learning_rate": 1.9975619157349076e-05, + "loss": 1.1702, "step": 1820 }, { - "epoch": 0.05167423382519864, + "epoch": 0.05160248235994219, "grad_norm": 0.0, - "learning_rate": 1.997542203595841e-05, - "loss": 1.1085, + "learning_rate": 1.9975555065582762e-05, + "loss": 1.1324, "step": 1821 }, { - "epoch": 0.05170261066969353, + "epoch": 0.05163081980220465, "grad_norm": 0.0, - "learning_rate": 1.997535759568514e-05, - "loss": 1.1097, + "learning_rate": 1.9975490889788638e-05, + "loss": 1.2453, "step": 1822 }, { - "epoch": 0.05173098751418842, + "epoch": 0.05165915724446712, "grad_norm": 0.0, - "learning_rate": 1.9975293071149577e-05, - "loss": 0.9525, + "learning_rate": 1.9975426629967252e-05, + "loss": 1.0948, "step": 1823 }, { - "epoch": 0.051759364358683314, + "epoch": 0.051687494686729576, "grad_norm": 0.0, - "learning_rate": 1.9975228462352265e-05, - "loss": 1.0793, + "learning_rate": 1.9975362286119145e-05, + "loss": 1.2132, "step": 1824 }, { - "epoch": 0.05178774120317821, + "epoch": 0.051715832128992036, "grad_norm": 0.0, - "learning_rate": 1.9975163769293748e-05, - "loss": 1.0842, + "learning_rate": 1.9975297858244858e-05, + "loss": 1.0815, "step": 1825 }, { - "epoch": 0.051816118047673096, + "epoch": 0.0517441695712545, "grad_norm": 0.0, - "learning_rate": 1.9975098991974575e-05, - "loss": 1.1093, + "learning_rate": 1.997523334634493e-05, + "loss": 1.0534, "step": 1826 }, { - "epoch": 0.05184449489216799, + "epoch": 0.05177250701351696, "grad_norm": 0.0, - "learning_rate": 1.9975034130395295e-05, - "loss": 1.0504, + "learning_rate": 1.9975168750419906e-05, + "loss": 1.2018, "step": 1827 }, { - "epoch": 0.051872871736662884, + "epoch": 0.05180084445577942, "grad_norm": 0.0, - "learning_rate": 1.997496918455645e-05, - "loss": 1.1671, + "learning_rate": 1.9975104070470335e-05, + "loss": 1.1486, "step": 1828 }, { - "epoch": 0.05190124858115778, + "epoch": 0.051829181898041886, "grad_norm": 0.0, - "learning_rate": 1.997490415445859e-05, - "loss": 1.0347, + "learning_rate": 1.997503930649676e-05, + "loss": 1.1404, "step": 1829 }, { - "epoch": 0.051929625425652666, + "epoch": 0.051857519340304345, "grad_norm": 0.0, - "learning_rate": 1.997483904010227e-05, - "loss": 1.0821, + "learning_rate": 1.9974974458499717e-05, + "loss": 1.1775, "step": 1830 }, { - "epoch": 0.05195800227014756, + "epoch": 0.051885856782566804, "grad_norm": 0.0, - "learning_rate": 1.9974773841488036e-05, - "loss": 1.0982, + "learning_rate": 1.9974909526479768e-05, + "loss": 1.0654, "step": 1831 }, { - "epoch": 0.051986379114642454, + "epoch": 0.05191419422482927, "grad_norm": 0.0, - "learning_rate": 1.997470855861644e-05, - "loss": 1.0218, + "learning_rate": 1.9974844510437444e-05, + "loss": 1.1645, "step": 1832 }, { - "epoch": 0.05201475595913734, + "epoch": 0.05194253166709173, "grad_norm": 0.0, - "learning_rate": 1.997464319148803e-05, - "loss": 1.0032, + "learning_rate": 1.9974779410373307e-05, + "loss": 1.2009, "step": 1833 }, { - "epoch": 0.052043132803632236, + "epoch": 0.05197086910935419, "grad_norm": 0.0, - "learning_rate": 1.9974577740103363e-05, - "loss": 0.9853, + "learning_rate": 1.9974714226287896e-05, + "loss": 1.1679, "step": 1834 }, { - "epoch": 0.05207150964812713, + "epoch": 0.051999206551616654, "grad_norm": 0.0, - "learning_rate": 1.9974512204462985e-05, - "loss": 1.0292, + "learning_rate": 1.9974648958181767e-05, + "loss": 1.1725, "step": 1835 }, { - "epoch": 0.05209988649262202, + "epoch": 0.05202754399387911, "grad_norm": 0.0, - "learning_rate": 1.9974446584567458e-05, - "loss": 1.1791, + "learning_rate": 1.997458360605546e-05, + "loss": 0.9829, "step": 1836 }, { - "epoch": 0.05212826333711691, + "epoch": 0.05205588143614157, "grad_norm": 0.0, - "learning_rate": 1.997438088041733e-05, - "loss": 1.0132, + "learning_rate": 1.9974518169909536e-05, + "loss": 1.1014, "step": 1837 }, { - "epoch": 0.052156640181611806, + "epoch": 0.05208421887840404, "grad_norm": 0.0, - "learning_rate": 1.9974315092013162e-05, - "loss": 1.2145, + "learning_rate": 1.997445264974454e-05, + "loss": 1.1302, "step": 1838 }, { - "epoch": 0.0521850170261067, + "epoch": 0.0521125563206665, "grad_norm": 0.0, - "learning_rate": 1.9974249219355502e-05, - "loss": 1.1343, + "learning_rate": 1.9974387045561022e-05, + "loss": 1.0835, "step": 1839 }, { - "epoch": 0.05221339387060159, + "epoch": 0.05214089376292896, "grad_norm": 0.0, - "learning_rate": 1.997418326244491e-05, - "loss": 0.9472, + "learning_rate": 1.9974321357359545e-05, + "loss": 1.152, "step": 1840 }, { - "epoch": 0.05224177071509648, + "epoch": 0.05216923120519142, "grad_norm": 0.0, - "learning_rate": 1.997411722128195e-05, - "loss": 1.0993, + "learning_rate": 1.9974255585140653e-05, + "loss": 1.1074, "step": 1841 }, { - "epoch": 0.052270147559591376, + "epoch": 0.05219756864745388, "grad_norm": 0.0, - "learning_rate": 1.9974051095867167e-05, - "loss": 1.1379, + "learning_rate": 1.9974189728904898e-05, + "loss": 1.1082, "step": 1842 }, { - "epoch": 0.05229852440408626, + "epoch": 0.05222590608971634, "grad_norm": 0.0, - "learning_rate": 1.9973984886201128e-05, - "loss": 1.0552, + "learning_rate": 1.9974123788652843e-05, + "loss": 0.9775, "step": 1843 }, { - "epoch": 0.05232690124858116, + "epoch": 0.05225424353197881, "grad_norm": 0.0, - "learning_rate": 1.997391859228439e-05, - "loss": 1.1167, + "learning_rate": 1.997405776438504e-05, + "loss": 1.2061, "step": 1844 }, { - "epoch": 0.05235527809307605, + "epoch": 0.052282580974241266, "grad_norm": 0.0, - "learning_rate": 1.9973852214117512e-05, - "loss": 1.0447, + "learning_rate": 1.9973991656102042e-05, + "loss": 1.2722, "step": 1845 }, { - "epoch": 0.05238365493757094, + "epoch": 0.052310918416503725, "grad_norm": 0.0, - "learning_rate": 1.9973785751701057e-05, - "loss": 0.9611, + "learning_rate": 1.997392546380441e-05, + "loss": 1.0663, "step": 1846 }, { - "epoch": 0.052412031782065834, + "epoch": 0.052339255858766184, "grad_norm": 0.0, - "learning_rate": 1.9973719205035587e-05, - "loss": 1.073, + "learning_rate": 1.9973859187492698e-05, + "loss": 1.1721, "step": 1847 }, { - "epoch": 0.05244040862656073, + "epoch": 0.05236759330102865, "grad_norm": 0.0, - "learning_rate": 1.9973652574121663e-05, - "loss": 1.1274, + "learning_rate": 1.997379282716747e-05, + "loss": 1.1248, "step": 1848 }, { - "epoch": 0.05246878547105562, + "epoch": 0.05239593074329111, "grad_norm": 0.0, - "learning_rate": 1.9973585858959846e-05, - "loss": 1.1375, + "learning_rate": 1.997372638282928e-05, + "loss": 1.0971, "step": 1849 }, { - "epoch": 0.05249716231555051, + "epoch": 0.05242426818555357, "grad_norm": 0.0, - "learning_rate": 1.99735190595507e-05, - "loss": 1.1826, + "learning_rate": 1.9973659854478685e-05, + "loss": 1.1805, "step": 1850 }, { - "epoch": 0.052525539160045404, + "epoch": 0.052452605627816035, "grad_norm": 0.0, - "learning_rate": 1.997345217589479e-05, - "loss": 1.0445, + "learning_rate": 1.9973593242116256e-05, + "loss": 1.1869, "step": 1851 }, { - "epoch": 0.0525539160045403, + "epoch": 0.052480943070078494, "grad_norm": 0.0, - "learning_rate": 1.9973385207992684e-05, - "loss": 1.1722, + "learning_rate": 1.9973526545742544e-05, + "loss": 1.278, "step": 1852 }, { - "epoch": 0.052582292849035185, + "epoch": 0.05250928051234095, "grad_norm": 0.0, - "learning_rate": 1.997331815584494e-05, - "loss": 0.9827, + "learning_rate": 1.9973459765358116e-05, + "loss": 1.1877, "step": 1853 }, { - "epoch": 0.05261066969353008, + "epoch": 0.05253761795460342, "grad_norm": 0.0, - "learning_rate": 1.9973251019452132e-05, - "loss": 1.1126, + "learning_rate": 1.997339290096353e-05, + "loss": 1.1408, "step": 1854 }, { - "epoch": 0.052639046538024974, + "epoch": 0.05256595539686588, "grad_norm": 0.0, - "learning_rate": 1.9973183798814828e-05, - "loss": 1.1521, + "learning_rate": 1.9973325952559353e-05, + "loss": 1.0456, "step": 1855 }, { - "epoch": 0.05266742338251986, + "epoch": 0.05259429283912834, "grad_norm": 0.0, - "learning_rate": 1.9973116493933586e-05, - "loss": 1.0545, + "learning_rate": 1.997325892014615e-05, + "loss": 1.1369, "step": 1856 }, { - "epoch": 0.052695800227014755, + "epoch": 0.0526226302813908, "grad_norm": 0.0, - "learning_rate": 1.9973049104808985e-05, - "loss": 1.1232, + "learning_rate": 1.9973191803724484e-05, + "loss": 1.0458, "step": 1857 }, { - "epoch": 0.05272417707150965, + "epoch": 0.05265096772365326, "grad_norm": 0.0, - "learning_rate": 1.997298163144159e-05, - "loss": 1.0005, + "learning_rate": 1.9973124603294916e-05, + "loss": 1.1751, "step": 1858 }, { - "epoch": 0.052752553916004544, + "epoch": 0.05267930516591572, "grad_norm": 0.0, - "learning_rate": 1.9972914073831967e-05, - "loss": 1.2048, + "learning_rate": 1.997305731885802e-05, + "loss": 1.2021, "step": 1859 }, { - "epoch": 0.05278093076049943, + "epoch": 0.05270764260817819, "grad_norm": 0.0, - "learning_rate": 1.9972846431980693e-05, - "loss": 1.0792, + "learning_rate": 1.9972989950414355e-05, + "loss": 1.1383, "step": 1860 }, { - "epoch": 0.052809307604994325, + "epoch": 0.052735980050440646, "grad_norm": 0.0, - "learning_rate": 1.9972778705888337e-05, - "loss": 1.0759, + "learning_rate": 1.9972922497964497e-05, + "loss": 1.1292, "step": 1861 }, { - "epoch": 0.05283768444948922, + "epoch": 0.052764317492703106, "grad_norm": 0.0, - "learning_rate": 1.997271089555547e-05, - "loss": 1.0875, + "learning_rate": 1.9972854961509007e-05, + "loss": 1.1114, "step": 1862 }, { - "epoch": 0.05286606129398411, + "epoch": 0.05279265493496557, "grad_norm": 0.0, - "learning_rate": 1.9972643000982667e-05, - "loss": 0.9955, + "learning_rate": 1.9972787341048456e-05, + "loss": 1.1264, "step": 1863 }, { - "epoch": 0.052894438138479, + "epoch": 0.05282099237722803, "grad_norm": 0.0, - "learning_rate": 1.99725750221705e-05, - "loss": 1.1052, + "learning_rate": 1.997271963658341e-05, + "loss": 1.0153, "step": 1864 }, { - "epoch": 0.052922814982973895, + "epoch": 0.05284932981949049, "grad_norm": 0.0, - "learning_rate": 1.997250695911954e-05, - "loss": 1.132, + "learning_rate": 1.997265184811445e-05, + "loss": 1.1483, "step": 1865 }, { - "epoch": 0.05295119182746878, + "epoch": 0.052877667261752956, "grad_norm": 0.0, - "learning_rate": 1.997243881183037e-05, - "loss": 1.0418, + "learning_rate": 1.997258397564214e-05, + "loss": 1.0812, "step": 1866 }, { - "epoch": 0.05297956867196368, + "epoch": 0.052906004704015415, "grad_norm": 0.0, - "learning_rate": 1.9972370580303556e-05, - "loss": 1.018, + "learning_rate": 1.997251601916705e-05, + "loss": 1.245, "step": 1867 }, { - "epoch": 0.05300794551645857, + "epoch": 0.052934342146277874, "grad_norm": 0.0, - "learning_rate": 1.9972302264539686e-05, - "loss": 1.1015, + "learning_rate": 1.9972447978689752e-05, + "loss": 1.0658, "step": 1868 }, { - "epoch": 0.053036322360953465, + "epoch": 0.05296267958854034, "grad_norm": 0.0, - "learning_rate": 1.9972233864539325e-05, - "loss": 1.0466, + "learning_rate": 1.9972379854210824e-05, + "loss": 1.2059, "step": 1869 }, { - "epoch": 0.05306469920544835, + "epoch": 0.0529910170308028, "grad_norm": 0.0, - "learning_rate": 1.9972165380303058e-05, - "loss": 1.1041, + "learning_rate": 1.9972311645730836e-05, + "loss": 1.2262, "step": 1870 }, { - "epoch": 0.05309307604994325, + "epoch": 0.05301935447306526, "grad_norm": 0.0, - "learning_rate": 1.997209681183146e-05, - "loss": 1.0406, + "learning_rate": 1.9972243353250363e-05, + "loss": 0.9525, "step": 1871 }, { - "epoch": 0.05312145289443814, + "epoch": 0.053047691915327724, "grad_norm": 0.0, - "learning_rate": 1.9972028159125115e-05, - "loss": 1.1823, + "learning_rate": 1.9972174976769986e-05, + "loss": 1.1363, "step": 1872 }, { - "epoch": 0.05314982973893303, + "epoch": 0.05307602935759018, "grad_norm": 0.0, - "learning_rate": 1.99719594221846e-05, - "loss": 1.0578, + "learning_rate": 1.9972106516290272e-05, + "loss": 1.0641, "step": 1873 }, { - "epoch": 0.05317820658342792, + "epoch": 0.05310436679985264, "grad_norm": 0.0, - "learning_rate": 1.9971890601010497e-05, - "loss": 1.0512, + "learning_rate": 1.9972037971811802e-05, + "loss": 1.1533, "step": 1874 }, { - "epoch": 0.05320658342792282, + "epoch": 0.05313270424211511, "grad_norm": 0.0, - "learning_rate": 1.9971821695603383e-05, - "loss": 1.0874, + "learning_rate": 1.9971969343335152e-05, + "loss": 1.0649, "step": 1875 }, { - "epoch": 0.053234960272417704, + "epoch": 0.05316104168437757, "grad_norm": 0.0, - "learning_rate": 1.997175270596384e-05, - "loss": 1.0391, + "learning_rate": 1.9971900630860904e-05, + "loss": 1.0726, "step": 1876 }, { - "epoch": 0.0532633371169126, + "epoch": 0.05318937912664003, "grad_norm": 0.0, - "learning_rate": 1.997168363209246e-05, - "loss": 0.9897, + "learning_rate": 1.9971831834389634e-05, + "loss": 1.1701, "step": 1877 }, { - "epoch": 0.05329171396140749, + "epoch": 0.05321771656890249, "grad_norm": 0.0, - "learning_rate": 1.9971614473989817e-05, - "loss": 1.1074, + "learning_rate": 1.9971762953921922e-05, + "loss": 1.2333, "step": 1878 }, { - "epoch": 0.05332009080590239, + "epoch": 0.05324605401116495, "grad_norm": 0.0, - "learning_rate": 1.9971545231656498e-05, - "loss": 1.1343, + "learning_rate": 1.9971693989458347e-05, + "loss": 1.3014, "step": 1879 }, { - "epoch": 0.053348467650397274, + "epoch": 0.05327439145342741, "grad_norm": 0.0, - "learning_rate": 1.9971475905093087e-05, - "loss": 1.1169, + "learning_rate": 1.997162494099949e-05, + "loss": 1.0148, "step": 1880 }, { - "epoch": 0.05337684449489217, + "epoch": 0.05330272889568988, "grad_norm": 0.0, - "learning_rate": 1.9971406494300174e-05, - "loss": 1.1955, + "learning_rate": 1.9971555808545932e-05, + "loss": 1.1745, "step": 1881 }, { - "epoch": 0.05340522133938706, + "epoch": 0.053331066337952336, "grad_norm": 0.0, - "learning_rate": 1.997133699927834e-05, - "loss": 1.0346, + "learning_rate": 1.9971486592098258e-05, + "loss": 1.1155, "step": 1882 }, { - "epoch": 0.05343359818388195, + "epoch": 0.053359403780214795, "grad_norm": 0.0, - "learning_rate": 1.9971267420028175e-05, - "loss": 1.2126, + "learning_rate": 1.997141729165705e-05, + "loss": 1.0367, "step": 1883 }, { - "epoch": 0.053461975028376844, + "epoch": 0.05338774122247726, "grad_norm": 0.0, - "learning_rate": 1.9971197756550263e-05, - "loss": 1.1011, + "learning_rate": 1.997134790722289e-05, + "loss": 1.2091, "step": 1884 }, { - "epoch": 0.05349035187287174, + "epoch": 0.05341607866473972, "grad_norm": 0.0, - "learning_rate": 1.99711280088452e-05, - "loss": 1.0734, + "learning_rate": 1.9971278438796365e-05, + "loss": 1.042, "step": 1885 }, { - "epoch": 0.053518728717366626, + "epoch": 0.05344441610700218, "grad_norm": 0.0, - "learning_rate": 1.9971058176913566e-05, - "loss": 1.0799, + "learning_rate": 1.9971208886378056e-05, + "loss": 1.1306, "step": 1886 }, { - "epoch": 0.05354710556186152, + "epoch": 0.053472753549264646, "grad_norm": 0.0, - "learning_rate": 1.997098826075596e-05, - "loss": 1.1577, + "learning_rate": 1.9971139249968556e-05, + "loss": 1.0661, "step": 1887 }, { - "epoch": 0.053575482406356414, + "epoch": 0.053501090991527105, "grad_norm": 0.0, - "learning_rate": 1.9970918260372962e-05, - "loss": 1.0904, + "learning_rate": 1.9971069529568446e-05, + "loss": 1.1177, "step": 1888 }, { - "epoch": 0.05360385925085131, + "epoch": 0.053529428433789564, "grad_norm": 0.0, - "learning_rate": 1.9970848175765174e-05, - "loss": 1.0442, + "learning_rate": 1.9970999725178313e-05, + "loss": 1.125, "step": 1889 }, { - "epoch": 0.053632236095346196, + "epoch": 0.05355776587605203, "grad_norm": 0.0, - "learning_rate": 1.9970778006933183e-05, - "loss": 1.1646, + "learning_rate": 1.9970929836798748e-05, + "loss": 1.0324, "step": 1890 }, { - "epoch": 0.05366061293984109, + "epoch": 0.05358610331831449, "grad_norm": 0.0, - "learning_rate": 1.997070775387758e-05, - "loss": 1.078, + "learning_rate": 1.997085986443034e-05, + "loss": 1.1618, "step": 1891 }, { - "epoch": 0.053688989784335985, + "epoch": 0.05361444076057695, "grad_norm": 0.0, - "learning_rate": 1.997063741659896e-05, - "loss": 1.0853, + "learning_rate": 1.9970789808073676e-05, + "loss": 1.2082, "step": 1892 }, { - "epoch": 0.05371736662883087, + "epoch": 0.053642778202839414, "grad_norm": 0.0, - "learning_rate": 1.997056699509792e-05, - "loss": 1.1241, + "learning_rate": 1.9970719667729344e-05, + "loss": 1.1814, "step": 1893 }, { - "epoch": 0.053745743473325766, + "epoch": 0.05367111564510187, "grad_norm": 0.0, - "learning_rate": 1.997049648937505e-05, - "loss": 1.0231, + "learning_rate": 1.9970649443397942e-05, + "loss": 1.0203, "step": 1894 }, { - "epoch": 0.05377412031782066, + "epoch": 0.05369945308736433, "grad_norm": 0.0, - "learning_rate": 1.997042589943095e-05, - "loss": 1.06, + "learning_rate": 1.997057913508005e-05, + "loss": 1.1511, "step": 1895 }, { - "epoch": 0.05380249716231555, + "epoch": 0.0537277905296268, "grad_norm": 0.0, - "learning_rate": 1.997035522526621e-05, - "loss": 1.1386, + "learning_rate": 1.9970508742776276e-05, + "loss": 1.0481, "step": 1896 }, { - "epoch": 0.05383087400681044, + "epoch": 0.05375612797188926, "grad_norm": 0.0, - "learning_rate": 1.9970284466881437e-05, - "loss": 1.0753, + "learning_rate": 1.99704382664872e-05, + "loss": 1.0831, "step": 1897 }, { - "epoch": 0.053859250851305336, + "epoch": 0.053784465414151716, "grad_norm": 0.0, - "learning_rate": 1.997021362427722e-05, - "loss": 1.0108, + "learning_rate": 1.9970367706213422e-05, + "loss": 1.1632, "step": 1898 }, { - "epoch": 0.05388762769580023, + "epoch": 0.05381280285641418, "grad_norm": 0.0, - "learning_rate": 1.997014269745416e-05, - "loss": 1.1526, + "learning_rate": 1.9970297061955533e-05, + "loss": 1.1097, "step": 1899 }, { - "epoch": 0.05391600454029512, + "epoch": 0.05384114029867664, "grad_norm": 0.0, - "learning_rate": 1.997007168641286e-05, - "loss": 1.1998, + "learning_rate": 1.997022633371413e-05, + "loss": 1.2398, "step": 1900 }, { - "epoch": 0.05394438138479001, + "epoch": 0.0538694777409391, "grad_norm": 0.0, - "learning_rate": 1.9970000591153913e-05, - "loss": 1.172, + "learning_rate": 1.9970155521489808e-05, + "loss": 1.0594, "step": 1901 }, { - "epoch": 0.053972758229284906, + "epoch": 0.05389781518320157, "grad_norm": 0.0, - "learning_rate": 1.996992941167792e-05, - "loss": 0.9428, + "learning_rate": 1.9970084625283164e-05, + "loss": 1.1589, "step": 1902 }, { - "epoch": 0.054001135073779793, + "epoch": 0.053926152625464026, "grad_norm": 0.0, - "learning_rate": 1.996985814798549e-05, - "loss": 1.1326, + "learning_rate": 1.9970013645094796e-05, + "loss": 1.1047, "step": 1903 }, { - "epoch": 0.05402951191827469, + "epoch": 0.053954490067726485, "grad_norm": 0.0, - "learning_rate": 1.9969786800077217e-05, - "loss": 1.0875, + "learning_rate": 1.99699425809253e-05, + "loss": 1.2534, "step": 1904 }, { - "epoch": 0.05405788876276958, + "epoch": 0.05398282750998895, "grad_norm": 0.0, - "learning_rate": 1.996971536795371e-05, - "loss": 1.1203, + "learning_rate": 1.9969871432775273e-05, + "loss": 1.0549, "step": 1905 }, { - "epoch": 0.05408626560726447, + "epoch": 0.05401116495225141, "grad_norm": 0.0, - "learning_rate": 1.996964385161557e-05, - "loss": 1.087, + "learning_rate": 1.996980020064532e-05, + "loss": 1.1117, "step": 1906 }, { - "epoch": 0.054114642451759364, + "epoch": 0.05403950239451387, "grad_norm": 0.0, - "learning_rate": 1.9969572251063397e-05, - "loss": 1.1569, + "learning_rate": 1.9969728884536035e-05, + "loss": 1.1588, "step": 1907 }, { - "epoch": 0.05414301929625426, + "epoch": 0.054067839836776335, "grad_norm": 0.0, - "learning_rate": 1.9969500566297798e-05, - "loss": 0.9488, + "learning_rate": 1.996965748444802e-05, + "loss": 1.0671, "step": 1908 }, { - "epoch": 0.05417139614074915, + "epoch": 0.054096177279038794, "grad_norm": 0.0, - "learning_rate": 1.996942879731938e-05, - "loss": 1.0688, + "learning_rate": 1.9969586000381884e-05, + "loss": 1.1024, "step": 1909 }, { - "epoch": 0.05419977298524404, + "epoch": 0.05412451472130125, "grad_norm": 0.0, - "learning_rate": 1.996935694412875e-05, - "loss": 1.0853, + "learning_rate": 1.996951443233822e-05, + "loss": 1.0492, "step": 1910 }, { - "epoch": 0.054228149829738934, + "epoch": 0.05415285216356372, "grad_norm": 0.0, - "learning_rate": 1.9969285006726513e-05, - "loss": 1.1064, + "learning_rate": 1.996944278031763e-05, + "loss": 1.0605, "step": 1911 }, { - "epoch": 0.05425652667423383, + "epoch": 0.05418118960582618, "grad_norm": 0.0, - "learning_rate": 1.9969212985113278e-05, - "loss": 1.0593, + "learning_rate": 1.9969371044320728e-05, + "loss": 1.1163, "step": 1912 }, { - "epoch": 0.054284903518728715, + "epoch": 0.05420952704808864, "grad_norm": 0.0, - "learning_rate": 1.9969140879289654e-05, - "loss": 1.0373, + "learning_rate": 1.9969299224348107e-05, + "loss": 1.2451, "step": 1913 }, { - "epoch": 0.05431328036322361, + "epoch": 0.054237864490351104, "grad_norm": 0.0, - "learning_rate": 1.9969068689256246e-05, - "loss": 1.1328, + "learning_rate": 1.996922732040038e-05, + "loss": 1.0366, "step": 1914 }, { - "epoch": 0.054341657207718504, + "epoch": 0.05426620193261356, "grad_norm": 0.0, - "learning_rate": 1.996899641501367e-05, - "loss": 0.9223, + "learning_rate": 1.9969155332478144e-05, + "loss": 1.1458, "step": 1915 }, { - "epoch": 0.05437003405221339, + "epoch": 0.05429453937487602, "grad_norm": 0.0, - "learning_rate": 1.996892405656253e-05, - "loss": 1.0717, + "learning_rate": 1.9969083260582017e-05, + "loss": 1.2632, "step": 1916 }, { - "epoch": 0.054398410896708285, + "epoch": 0.05432287681713849, "grad_norm": 0.0, - "learning_rate": 1.9968851613903443e-05, - "loss": 1.0536, + "learning_rate": 1.9969011104712596e-05, + "loss": 1.1838, "step": 1917 }, { - "epoch": 0.05442678774120318, + "epoch": 0.05435121425940095, "grad_norm": 0.0, - "learning_rate": 1.9968779087037016e-05, - "loss": 1.0187, + "learning_rate": 1.9968938864870494e-05, + "loss": 1.1986, "step": 1918 }, { - "epoch": 0.054455164585698074, + "epoch": 0.054379551701663406, "grad_norm": 0.0, - "learning_rate": 1.9968706475963863e-05, - "loss": 1.0691, + "learning_rate": 1.9968866541056317e-05, + "loss": 1.1715, "step": 1919 }, { - "epoch": 0.05448354143019296, + "epoch": 0.05440788914392587, "grad_norm": 0.0, - "learning_rate": 1.99686337806846e-05, - "loss": 0.923, + "learning_rate": 1.9968794133270678e-05, + "loss": 1.1875, "step": 1920 }, { - "epoch": 0.054511918274687855, + "epoch": 0.05443622658618833, "grad_norm": 0.0, - "learning_rate": 1.9968561001199843e-05, - "loss": 1.0846, + "learning_rate": 1.996872164151418e-05, + "loss": 1.0745, "step": 1921 }, { - "epoch": 0.05454029511918275, + "epoch": 0.05446456402845079, "grad_norm": 0.0, - "learning_rate": 1.99684881375102e-05, - "loss": 1.1037, + "learning_rate": 1.996864906578744e-05, + "loss": 1.2325, "step": 1922 }, { - "epoch": 0.05456867196367764, + "epoch": 0.054492901470713256, "grad_norm": 0.0, - "learning_rate": 1.996841518961629e-05, - "loss": 0.9933, + "learning_rate": 1.9968576406091066e-05, + "loss": 1.2031, "step": 1923 }, { - "epoch": 0.05459704880817253, + "epoch": 0.054521238912975716, "grad_norm": 0.0, - "learning_rate": 1.9968342157518724e-05, - "loss": 1.0575, + "learning_rate": 1.9968503662425672e-05, + "loss": 1.1322, "step": 1924 }, { - "epoch": 0.054625425652667425, + "epoch": 0.054549576355238175, "grad_norm": 0.0, - "learning_rate": 1.996826904121813e-05, - "loss": 1.1027, + "learning_rate": 1.996843083479187e-05, + "loss": 1.2078, "step": 1925 }, { - "epoch": 0.05465380249716231, + "epoch": 0.05457791379750064, "grad_norm": 0.0, - "learning_rate": 1.9968195840715118e-05, - "loss": 1.0408, + "learning_rate": 1.9968357923190275e-05, + "loss": 1.0382, "step": 1926 }, { - "epoch": 0.05468217934165721, + "epoch": 0.0546062512397631, "grad_norm": 0.0, - "learning_rate": 1.9968122556010305e-05, - "loss": 1.0275, + "learning_rate": 1.9968284927621498e-05, + "loss": 1.1956, "step": 1927 }, { - "epoch": 0.0547105561861521, + "epoch": 0.05463458868202556, "grad_norm": 0.0, - "learning_rate": 1.9968049187104316e-05, - "loss": 1.0276, + "learning_rate": 1.9968211848086155e-05, + "loss": 1.1644, "step": 1928 }, { - "epoch": 0.054738933030646995, + "epoch": 0.054662926124288025, "grad_norm": 0.0, - "learning_rate": 1.9967975733997763e-05, - "loss": 1.1469, + "learning_rate": 1.9968138684584862e-05, + "loss": 1.1375, "step": 1929 }, { - "epoch": 0.05476730987514188, + "epoch": 0.054691263566550484, "grad_norm": 0.0, - "learning_rate": 1.9967902196691274e-05, - "loss": 0.9847, + "learning_rate": 1.9968065437118238e-05, + "loss": 1.1125, "step": 1930 }, { - "epoch": 0.05479568671963678, + "epoch": 0.05471960100881294, "grad_norm": 0.0, - "learning_rate": 1.9967828575185467e-05, - "loss": 1.1055, + "learning_rate": 1.9967992105686893e-05, + "loss": 1.1808, "step": 1931 }, { - "epoch": 0.05482406356413167, + "epoch": 0.05474793845107541, "grad_norm": 0.0, - "learning_rate": 1.9967754869480963e-05, - "loss": 1.226, + "learning_rate": 1.9967918690291454e-05, + "loss": 1.1215, "step": 1932 }, { - "epoch": 0.05485244040862656, + "epoch": 0.05477627589333787, "grad_norm": 0.0, - "learning_rate": 1.9967681079578386e-05, - "loss": 1.0755, + "learning_rate": 1.9967845190932528e-05, + "loss": 1.0481, "step": 1933 }, { - "epoch": 0.05488081725312145, + "epoch": 0.05480461333560033, "grad_norm": 0.0, - "learning_rate": 1.996760720547836e-05, - "loss": 1.1952, + "learning_rate": 1.9967771607610746e-05, + "loss": 1.1567, "step": 1934 }, { - "epoch": 0.05490919409761635, + "epoch": 0.05483295077786279, "grad_norm": 0.0, - "learning_rate": 1.996753324718151e-05, - "loss": 1.0931, + "learning_rate": 1.996769794032672e-05, + "loss": 1.0885, "step": 1935 }, { - "epoch": 0.054937570942111234, + "epoch": 0.05486128822012525, "grad_norm": 0.0, - "learning_rate": 1.9967459204688452e-05, - "loss": 1.0486, + "learning_rate": 1.996762418908107e-05, + "loss": 1.1683, "step": 1936 }, { - "epoch": 0.05496594778660613, + "epoch": 0.05488962566238771, "grad_norm": 0.0, - "learning_rate": 1.9967385077999822e-05, - "loss": 1.0103, + "learning_rate": 1.9967550353874426e-05, + "loss": 1.1141, "step": 1937 }, { - "epoch": 0.05499432463110102, + "epoch": 0.05491796310465018, "grad_norm": 0.0, - "learning_rate": 1.996731086711624e-05, - "loss": 1.0655, + "learning_rate": 1.99674764347074e-05, + "loss": 1.2018, "step": 1938 }, { - "epoch": 0.05502270147559592, + "epoch": 0.05494630054691264, "grad_norm": 0.0, - "learning_rate": 1.9967236572038337e-05, - "loss": 1.0971, + "learning_rate": 1.996740243158062e-05, + "loss": 1.167, "step": 1939 }, { - "epoch": 0.055051078320090804, + "epoch": 0.054974637989175096, "grad_norm": 0.0, - "learning_rate": 1.996716219276674e-05, - "loss": 1.09, + "learning_rate": 1.9967328344494708e-05, + "loss": 1.1054, "step": 1940 }, { - "epoch": 0.0550794551645857, + "epoch": 0.05500297543143756, "grad_norm": 0.0, - "learning_rate": 1.996708772930207e-05, - "loss": 1.0802, + "learning_rate": 1.996725417345029e-05, + "loss": 1.1057, "step": 1941 }, { - "epoch": 0.05510783200908059, + "epoch": 0.05503131287370002, "grad_norm": 0.0, - "learning_rate": 1.996701318164497e-05, - "loss": 1.0537, + "learning_rate": 1.9967179918447982e-05, + "loss": 1.1088, "step": 1942 }, { - "epoch": 0.05513620885357548, + "epoch": 0.05505965031596248, "grad_norm": 0.0, - "learning_rate": 1.9966938549796056e-05, - "loss": 1.0274, + "learning_rate": 1.996710557948842e-05, + "loss": 1.1828, "step": 1943 }, { - "epoch": 0.055164585698070374, + "epoch": 0.05508798775822494, "grad_norm": 0.0, - "learning_rate": 1.9966863833755966e-05, - "loss": 1.0435, + "learning_rate": 1.9967031156572233e-05, + "loss": 1.1163, "step": 1944 }, { - "epoch": 0.05519296254256527, + "epoch": 0.055116325200487405, "grad_norm": 0.0, - "learning_rate": 1.996678903352533e-05, - "loss": 0.9958, + "learning_rate": 1.9966956649700034e-05, + "loss": 1.1067, "step": 1945 }, { - "epoch": 0.055221339387060156, + "epoch": 0.055144662642749864, "grad_norm": 0.0, - "learning_rate": 1.9966714149104776e-05, - "loss": 1.03, + "learning_rate": 1.996688205887246e-05, + "loss": 1.08, "step": 1946 }, { - "epoch": 0.05524971623155505, + "epoch": 0.05517300008501232, "grad_norm": 0.0, - "learning_rate": 1.996663918049494e-05, - "loss": 1.0184, + "learning_rate": 1.9966807384090135e-05, + "loss": 1.0103, "step": 1947 }, { - "epoch": 0.055278093076049944, + "epoch": 0.05520133752727479, "grad_norm": 0.0, - "learning_rate": 1.996656412769646e-05, - "loss": 1.1021, + "learning_rate": 1.9966732625353695e-05, + "loss": 1.036, "step": 1948 }, { - "epoch": 0.05530646992054484, + "epoch": 0.05522967496953725, "grad_norm": 0.0, - "learning_rate": 1.996648899070996e-05, - "loss": 1.0828, + "learning_rate": 1.996665778266376e-05, + "loss": 1.1416, "step": 1949 }, { - "epoch": 0.055334846765039726, + "epoch": 0.05525801241179971, "grad_norm": 0.0, - "learning_rate": 1.9966413769536078e-05, - "loss": 1.1503, + "learning_rate": 1.996658285602097e-05, + "loss": 1.1298, "step": 1950 }, { - "epoch": 0.05536322360953462, + "epoch": 0.055286349854062174, "grad_norm": 0.0, - "learning_rate": 1.9966338464175457e-05, - "loss": 1.028, + "learning_rate": 1.9966507845425948e-05, + "loss": 1.1344, "step": 1951 }, { - "epoch": 0.055391600454029515, + "epoch": 0.05531468729632463, "grad_norm": 0.0, - "learning_rate": 1.9966263074628723e-05, - "loss": 1.2029, + "learning_rate": 1.9966432750879332e-05, + "loss": 1.1267, "step": 1952 }, { - "epoch": 0.0554199772985244, + "epoch": 0.05534302473858709, "grad_norm": 0.0, - "learning_rate": 1.9966187600896516e-05, - "loss": 1.0735, + "learning_rate": 1.996635757238175e-05, + "loss": 1.1283, "step": 1953 }, { - "epoch": 0.055448354143019296, + "epoch": 0.05537136218084956, "grad_norm": 0.0, - "learning_rate": 1.9966112042979476e-05, - "loss": 1.1306, + "learning_rate": 1.996628230993384e-05, + "loss": 1.061, "step": 1954 }, { - "epoch": 0.05547673098751419, + "epoch": 0.05539969962311202, "grad_norm": 0.0, - "learning_rate": 1.9966036400878237e-05, - "loss": 0.967, + "learning_rate": 1.996620696353623e-05, + "loss": 1.0035, "step": 1955 }, { - "epoch": 0.05550510783200908, + "epoch": 0.055428037065374476, "grad_norm": 0.0, - "learning_rate": 1.9965960674593443e-05, - "loss": 1.1264, + "learning_rate": 1.996613153318956e-05, + "loss": 1.1704, "step": 1956 }, { - "epoch": 0.05553348467650397, + "epoch": 0.05545637450763694, "grad_norm": 0.0, - "learning_rate": 1.996588486412573e-05, - "loss": 1.1111, + "learning_rate": 1.9966056018894464e-05, + "loss": 1.1116, "step": 1957 }, { - "epoch": 0.055561861520998866, + "epoch": 0.0554847119498994, "grad_norm": 0.0, - "learning_rate": 1.996580896947574e-05, - "loss": 1.175, + "learning_rate": 1.9965980420651575e-05, + "loss": 0.9919, "step": 1958 }, { - "epoch": 0.05559023836549376, + "epoch": 0.05551304939216186, "grad_norm": 0.0, - "learning_rate": 1.9965732990644116e-05, - "loss": 1.0022, + "learning_rate": 1.9965904738461534e-05, + "loss": 1.037, "step": 1959 }, { - "epoch": 0.05561861520998865, + "epoch": 0.055541386834424326, "grad_norm": 0.0, - "learning_rate": 1.9965656927631497e-05, - "loss": 1.1198, + "learning_rate": 1.9965828972324974e-05, + "loss": 1.0784, "step": 1960 }, { - "epoch": 0.05564699205448354, + "epoch": 0.055569724276686785, "grad_norm": 0.0, - "learning_rate": 1.9965580780438522e-05, - "loss": 1.0251, + "learning_rate": 1.996575312224254e-05, + "loss": 1.1694, "step": 1961 }, { - "epoch": 0.055675368898978436, + "epoch": 0.055598061718949245, "grad_norm": 0.0, - "learning_rate": 1.9965504549065843e-05, - "loss": 1.1548, + "learning_rate": 1.9965677188214863e-05, + "loss": 1.1564, "step": 1962 }, { - "epoch": 0.055703745743473324, + "epoch": 0.05562639916121171, "grad_norm": 0.0, - "learning_rate": 1.9965428233514092e-05, - "loss": 1.085, + "learning_rate": 1.996560117024259e-05, + "loss": 1.102, "step": 1963 }, { - "epoch": 0.05573212258796822, + "epoch": 0.05565473660347417, "grad_norm": 0.0, - "learning_rate": 1.996535183378393e-05, - "loss": 1.0967, + "learning_rate": 1.9965525068326355e-05, + "loss": 1.0561, "step": 1964 }, { - "epoch": 0.05576049943246311, + "epoch": 0.05568307404573663, "grad_norm": 0.0, - "learning_rate": 1.9965275349875987e-05, - "loss": 1.0532, + "learning_rate": 1.99654488824668e-05, + "loss": 1.1088, "step": 1965 }, { - "epoch": 0.055788876276958, + "epoch": 0.055711411487999095, "grad_norm": 0.0, - "learning_rate": 1.9965198781790917e-05, - "loss": 0.9056, + "learning_rate": 1.9965372612664572e-05, + "loss": 1.0643, "step": 1966 }, { - "epoch": 0.055817253121452894, + "epoch": 0.055739748930261554, "grad_norm": 0.0, - "learning_rate": 1.9965122129529362e-05, - "loss": 1.115, + "learning_rate": 1.996529625892031e-05, + "loss": 1.1185, "step": 1967 }, { - "epoch": 0.05584562996594779, + "epoch": 0.05576808637252401, "grad_norm": 0.0, - "learning_rate": 1.9965045393091974e-05, - "loss": 1.0776, + "learning_rate": 1.9965219821234653e-05, + "loss": 1.1049, "step": 1968 }, { - "epoch": 0.05587400681044268, + "epoch": 0.05579642381478648, "grad_norm": 0.0, - "learning_rate": 1.9964968572479403e-05, - "loss": 1.0346, + "learning_rate": 1.9965143299608253e-05, + "loss": 1.054, "step": 1969 }, { - "epoch": 0.05590238365493757, + "epoch": 0.05582476125704894, "grad_norm": 0.0, - "learning_rate": 1.9964891667692292e-05, - "loss": 1.1388, + "learning_rate": 1.996506669404175e-05, + "loss": 1.1921, "step": 1970 }, { - "epoch": 0.055930760499432464, + "epoch": 0.0558530986993114, "grad_norm": 0.0, - "learning_rate": 1.996481467873129e-05, - "loss": 1.0622, + "learning_rate": 1.996499000453579e-05, + "loss": 1.1811, "step": 1971 }, { - "epoch": 0.05595913734392736, + "epoch": 0.05588143614157386, "grad_norm": 0.0, - "learning_rate": 1.9964737605597054e-05, - "loss": 1.0449, + "learning_rate": 1.9964913231091017e-05, + "loss": 1.1104, "step": 1972 }, { - "epoch": 0.055987514188422245, + "epoch": 0.05590977358383632, "grad_norm": 0.0, - "learning_rate": 1.9964660448290232e-05, - "loss": 1.1296, + "learning_rate": 1.9964836373708078e-05, + "loss": 1.312, "step": 1973 }, { - "epoch": 0.05601589103291714, + "epoch": 0.05593811102609878, "grad_norm": 0.0, - "learning_rate": 1.9964583206811475e-05, - "loss": 1.059, + "learning_rate": 1.9964759432387626e-05, + "loss": 1.1518, "step": 1974 }, { - "epoch": 0.056044267877412034, + "epoch": 0.05596644846836125, "grad_norm": 0.0, - "learning_rate": 1.9964505881161435e-05, - "loss": 1.0673, + "learning_rate": 1.9964682407130302e-05, + "loss": 0.987, "step": 1975 }, { - "epoch": 0.05607264472190692, + "epoch": 0.05599478591062371, "grad_norm": 0.0, - "learning_rate": 1.9964428471340765e-05, - "loss": 1.0173, + "learning_rate": 1.996460529793676e-05, + "loss": 1.0355, "step": 1976 }, { - "epoch": 0.056101021566401815, + "epoch": 0.056023123352886166, "grad_norm": 0.0, - "learning_rate": 1.9964350977350123e-05, - "loss": 1.1125, + "learning_rate": 1.9964528104807647e-05, + "loss": 1.2065, "step": 1977 }, { - "epoch": 0.05612939841089671, + "epoch": 0.05605146079514863, "grad_norm": 0.0, - "learning_rate": 1.9964273399190156e-05, - "loss": 1.0588, + "learning_rate": 1.9964450827743613e-05, + "loss": 1.1846, "step": 1978 }, { - "epoch": 0.056157775255391604, + "epoch": 0.05607979823741109, "grad_norm": 0.0, - "learning_rate": 1.9964195736861524e-05, - "loss": 1.0306, + "learning_rate": 1.996437346674531e-05, + "loss": 1.0977, "step": 1979 }, { - "epoch": 0.05618615209988649, + "epoch": 0.05610813567967355, "grad_norm": 0.0, - "learning_rate": 1.9964117990364885e-05, - "loss": 1.0672, + "learning_rate": 1.996429602181339e-05, + "loss": 1.0652, "step": 1980 }, { - "epoch": 0.056214528944381385, + "epoch": 0.056136473121936016, "grad_norm": 0.0, - "learning_rate": 1.9964040159700896e-05, - "loss": 1.1533, + "learning_rate": 1.99642184929485e-05, + "loss": 1.0375, "step": 1981 }, { - "epoch": 0.05624290578887628, + "epoch": 0.056164810564198475, "grad_norm": 0.0, - "learning_rate": 1.9963962244870205e-05, - "loss": 0.9707, + "learning_rate": 1.9964140880151302e-05, + "loss": 1.2057, "step": 1982 }, { - "epoch": 0.05627128263337117, + "epoch": 0.056193148006460934, "grad_norm": 0.0, - "learning_rate": 1.996388424587348e-05, - "loss": 1.1298, + "learning_rate": 1.9964063183422443e-05, + "loss": 1.1166, "step": 1983 }, { - "epoch": 0.05629965947786606, + "epoch": 0.0562214854487234, "grad_norm": 0.0, - "learning_rate": 1.996380616271138e-05, - "loss": 1.0885, + "learning_rate": 1.9963985402762577e-05, + "loss": 1.11, "step": 1984 }, { - "epoch": 0.056328036322360955, + "epoch": 0.05624982289098586, "grad_norm": 0.0, - "learning_rate": 1.9963727995384558e-05, - "loss": 1.0892, + "learning_rate": 1.9963907538172367e-05, + "loss": 1.0883, "step": 1985 }, { - "epoch": 0.05635641316685584, + "epoch": 0.05627816033324832, "grad_norm": 0.0, - "learning_rate": 1.996364974389368e-05, - "loss": 1.1026, + "learning_rate": 1.9963829589652462e-05, + "loss": 1.0476, "step": 1986 }, { - "epoch": 0.05638479001135074, + "epoch": 0.056306497775510785, "grad_norm": 0.0, - "learning_rate": 1.9963571408239405e-05, - "loss": 1.1049, + "learning_rate": 1.996375155720352e-05, + "loss": 1.1479, "step": 1987 }, { - "epoch": 0.05641316685584563, + "epoch": 0.056334835217773244, "grad_norm": 0.0, - "learning_rate": 1.996349298842239e-05, - "loss": 1.1036, + "learning_rate": 1.99636734408262e-05, + "loss": 1.0582, "step": 1988 }, { - "epoch": 0.056441543700340525, + "epoch": 0.0563631726600357, "grad_norm": 0.0, - "learning_rate": 1.9963414484443304e-05, - "loss": 1.0108, + "learning_rate": 1.9963595240521158e-05, + "loss": 1.1201, "step": 1989 }, { - "epoch": 0.05646992054483541, + "epoch": 0.05639151010229817, "grad_norm": 0.0, - "learning_rate": 1.996333589630281e-05, - "loss": 1.0296, + "learning_rate": 1.9963516956289054e-05, + "loss": 1.0836, "step": 1990 }, { - "epoch": 0.05649829738933031, + "epoch": 0.05641984754456063, "grad_norm": 0.0, - "learning_rate": 1.9963257224001568e-05, - "loss": 1.049, + "learning_rate": 1.9963438588130547e-05, + "loss": 1.0437, "step": 1991 }, { - "epoch": 0.0565266742338252, + "epoch": 0.05644818498682309, "grad_norm": 0.0, - "learning_rate": 1.9963178467540244e-05, - "loss": 1.1864, + "learning_rate": 1.9963360136046295e-05, + "loss": 1.2092, "step": 1992 }, { - "epoch": 0.05655505107832009, + "epoch": 0.05647652242908555, "grad_norm": 0.0, - "learning_rate": 1.9963099626919506e-05, - "loss": 0.9172, + "learning_rate": 1.996328160003696e-05, + "loss": 1.2022, "step": 1993 }, { - "epoch": 0.05658342792281498, + "epoch": 0.05650485987134801, "grad_norm": 0.0, - "learning_rate": 1.9963020702140016e-05, - "loss": 1.0971, + "learning_rate": 1.996320298010321e-05, + "loss": 1.068, "step": 1994 }, { - "epoch": 0.05661180476730988, + "epoch": 0.05653319731361047, "grad_norm": 0.0, - "learning_rate": 1.996294169320244e-05, - "loss": 1.08, + "learning_rate": 1.9963124276245695e-05, + "loss": 1.2255, "step": 1995 }, { - "epoch": 0.056640181611804764, + "epoch": 0.05656153475587294, "grad_norm": 0.0, - "learning_rate": 1.9962862600107452e-05, - "loss": 1.1657, + "learning_rate": 1.9963045488465088e-05, + "loss": 0.9931, "step": 1996 }, { - "epoch": 0.05666855845629966, + "epoch": 0.056589872198135396, "grad_norm": 0.0, - "learning_rate": 1.9962783422855715e-05, - "loss": 1.0987, + "learning_rate": 1.996296661676205e-05, + "loss": 1.1163, "step": 1997 }, { - "epoch": 0.05669693530079455, + "epoch": 0.056618209640397855, "grad_norm": 0.0, - "learning_rate": 1.9962704161447894e-05, - "loss": 1.0039, + "learning_rate": 1.996288766113724e-05, + "loss": 1.1258, "step": 1998 }, { - "epoch": 0.05672531214528945, + "epoch": 0.05664654708266032, "grad_norm": 0.0, - "learning_rate": 1.996262481588467e-05, - "loss": 0.9565, + "learning_rate": 1.9962808621591334e-05, + "loss": 1.1813, "step": 1999 }, { - "epoch": 0.056753688989784334, + "epoch": 0.05667488452492278, "grad_norm": 0.0, - "learning_rate": 1.9962545386166702e-05, - "loss": 1.076, + "learning_rate": 1.9962729498124983e-05, + "loss": 1.1348, "step": 2000 }, { - "epoch": 0.05678206583427923, + "epoch": 0.05670322196718524, "grad_norm": 0.0, - "learning_rate": 1.9962465872294663e-05, - "loss": 1.055, + "learning_rate": 1.9962650290738866e-05, + "loss": 1.1097, "step": 2001 }, { - "epoch": 0.05681044267877412, + "epoch": 0.056731559409447706, "grad_norm": 0.0, - "learning_rate": 1.996238627426923e-05, - "loss": 1.0233, + "learning_rate": 1.9962570999433648e-05, + "loss": 1.0868, "step": 2002 }, { - "epoch": 0.05683881952326901, + "epoch": 0.056759896851710165, "grad_norm": 0.0, - "learning_rate": 1.9962306592091073e-05, - "loss": 1.0026, + "learning_rate": 1.9962491624209996e-05, + "loss": 1.1245, "step": 2003 }, { - "epoch": 0.056867196367763904, + "epoch": 0.056788234293972624, "grad_norm": 0.0, - "learning_rate": 1.9962226825760865e-05, - "loss": 1.0051, + "learning_rate": 1.9962412165068575e-05, + "loss": 1.19, "step": 2004 }, { - "epoch": 0.0568955732122588, + "epoch": 0.05681657173623509, "grad_norm": 0.0, - "learning_rate": 1.9962146975279274e-05, - "loss": 1.0939, + "learning_rate": 1.9962332622010057e-05, + "loss": 0.9895, "step": 2005 }, { - "epoch": 0.056923950056753686, + "epoch": 0.05684490917849755, "grad_norm": 0.0, - "learning_rate": 1.9962067040646983e-05, - "loss": 0.9321, + "learning_rate": 1.996225299503511e-05, + "loss": 1.1247, "step": 2006 }, { - "epoch": 0.05695232690124858, + "epoch": 0.05687324662076001, "grad_norm": 0.0, - "learning_rate": 1.9961987021864666e-05, - "loss": 1.1409, + "learning_rate": 1.996217328414441e-05, + "loss": 1.0665, "step": 2007 }, { - "epoch": 0.056980703745743475, + "epoch": 0.056901584063022474, "grad_norm": 0.0, - "learning_rate": 1.9961906918932996e-05, - "loss": 1.0884, + "learning_rate": 1.9962093489338622e-05, + "loss": 1.1043, "step": 2008 }, { - "epoch": 0.05700908059023837, + "epoch": 0.05692992150528493, "grad_norm": 0.0, - "learning_rate": 1.996182673185265e-05, - "loss": 1.0526, + "learning_rate": 1.9962013610618423e-05, + "loss": 1.2517, "step": 2009 }, { - "epoch": 0.057037457434733256, + "epoch": 0.05695825894754739, "grad_norm": 0.0, - "learning_rate": 1.9961746460624304e-05, - "loss": 0.9333, + "learning_rate": 1.9961933647984487e-05, + "loss": 1.1782, "step": 2010 }, { - "epoch": 0.05706583427922815, + "epoch": 0.05698659638980986, "grad_norm": 0.0, - "learning_rate": 1.9961666105248637e-05, - "loss": 1.0242, + "learning_rate": 1.996185360143748e-05, + "loss": 1.171, "step": 2011 }, { - "epoch": 0.057094211123723045, + "epoch": 0.05701493383207232, "grad_norm": 0.0, - "learning_rate": 1.9961585665726332e-05, - "loss": 1.1112, + "learning_rate": 1.9961773470978083e-05, + "loss": 1.2876, "step": 2012 }, { - "epoch": 0.05712258796821793, + "epoch": 0.05704327127433478, "grad_norm": 0.0, - "learning_rate": 1.9961505142058063e-05, - "loss": 1.1519, + "learning_rate": 1.9961693256606968e-05, + "loss": 1.0604, "step": 2013 }, { - "epoch": 0.057150964812712826, + "epoch": 0.05707160871659724, "grad_norm": 0.0, - "learning_rate": 1.9961424534244514e-05, - "loss": 1.1085, + "learning_rate": 1.996161295832481e-05, + "loss": 1.1537, "step": 2014 }, { - "epoch": 0.05717934165720772, + "epoch": 0.0570999461588597, "grad_norm": 0.0, - "learning_rate": 1.9961343842286363e-05, - "loss": 1.09, + "learning_rate": 1.996153257613229e-05, + "loss": 1.1883, "step": 2015 }, { - "epoch": 0.05720771850170261, + "epoch": 0.05712828360112216, "grad_norm": 0.0, - "learning_rate": 1.9961263066184292e-05, - "loss": 1.099, + "learning_rate": 1.9961452110030082e-05, + "loss": 1.2714, "step": 2016 }, { - "epoch": 0.0572360953461975, + "epoch": 0.05715662104338463, "grad_norm": 0.0, - "learning_rate": 1.9961182205938984e-05, - "loss": 1.1071, + "learning_rate": 1.9961371560018864e-05, + "loss": 1.0589, "step": 2017 }, { - "epoch": 0.057264472190692396, + "epoch": 0.057184958485647086, "grad_norm": 0.0, - "learning_rate": 1.9961101261551127e-05, - "loss": 1.1096, + "learning_rate": 1.9961290926099313e-05, + "loss": 1.1746, "step": 2018 }, { - "epoch": 0.05729284903518729, + "epoch": 0.057213295927909545, "grad_norm": 0.0, - "learning_rate": 1.9961020233021395e-05, - "loss": 1.0361, + "learning_rate": 1.996121020827211e-05, + "loss": 1.1079, "step": 2019 }, { - "epoch": 0.05732122587968218, + "epoch": 0.05724163337017201, "grad_norm": 0.0, - "learning_rate": 1.9960939120350477e-05, - "loss": 0.9811, + "learning_rate": 1.9961129406537932e-05, + "loss": 1.1394, "step": 2020 }, { - "epoch": 0.05734960272417707, + "epoch": 0.05726997081243447, "grad_norm": 0.0, - "learning_rate": 1.996085792353906e-05, - "loss": 1.1366, + "learning_rate": 1.9961048520897465e-05, + "loss": 1.0392, "step": 2021 }, { - "epoch": 0.057377979568671966, + "epoch": 0.05729830825469693, "grad_norm": 0.0, - "learning_rate": 1.9960776642587828e-05, - "loss": 1.0561, + "learning_rate": 1.9960967551351385e-05, + "loss": 1.18, "step": 2022 }, { - "epoch": 0.057406356413166854, + "epoch": 0.057326645696959395, "grad_norm": 0.0, - "learning_rate": 1.996069527749747e-05, - "loss": 1.1013, + "learning_rate": 1.996088649790038e-05, + "loss": 1.1443, "step": 2023 }, { - "epoch": 0.05743473325766175, + "epoch": 0.057354983139221855, "grad_norm": 0.0, - "learning_rate": 1.996061382826867e-05, - "loss": 1.0924, + "learning_rate": 1.9960805360545124e-05, + "loss": 1.0931, "step": 2024 }, { - "epoch": 0.05746311010215664, + "epoch": 0.057383320581484314, "grad_norm": 0.0, - "learning_rate": 1.9960532294902114e-05, - "loss": 1.047, + "learning_rate": 1.996072413928631e-05, + "loss": 1.1556, "step": 2025 }, { - "epoch": 0.05749148694665153, + "epoch": 0.05741165802374678, "grad_norm": 0.0, - "learning_rate": 1.9960450677398495e-05, - "loss": 1.1148, + "learning_rate": 1.9960642834124614e-05, + "loss": 1.1006, "step": 2026 }, { - "epoch": 0.057519863791146424, + "epoch": 0.05743999546600924, "grad_norm": 0.0, - "learning_rate": 1.9960368975758505e-05, - "loss": 1.0682, + "learning_rate": 1.9960561445060726e-05, + "loss": 1.0974, "step": 2027 }, { - "epoch": 0.05754824063564132, + "epoch": 0.0574683329082717, "grad_norm": 0.0, - "learning_rate": 1.9960287189982826e-05, - "loss": 0.9956, + "learning_rate": 1.996047997209533e-05, + "loss": 1.1049, "step": 2028 }, { - "epoch": 0.05757661748013621, + "epoch": 0.057496670350534164, "grad_norm": 0.0, - "learning_rate": 1.9960205320072158e-05, - "loss": 0.9899, + "learning_rate": 1.9960398415229114e-05, + "loss": 1.09, "step": 2029 }, { - "epoch": 0.0576049943246311, + "epoch": 0.05752500779279662, "grad_norm": 0.0, - "learning_rate": 1.9960123366027187e-05, - "loss": 1.0283, + "learning_rate": 1.9960316774462766e-05, + "loss": 1.2575, "step": 2030 }, { - "epoch": 0.057633371169125994, + "epoch": 0.05755334523505908, "grad_norm": 0.0, - "learning_rate": 1.9960041327848602e-05, - "loss": 1.1094, + "learning_rate": 1.9960235049796967e-05, + "loss": 1.1513, "step": 2031 }, { - "epoch": 0.05766174801362089, + "epoch": 0.05758168267732155, "grad_norm": 0.0, - "learning_rate": 1.9959959205537105e-05, - "loss": 1.0514, + "learning_rate": 1.996015324123241e-05, + "loss": 1.0776, "step": 2032 }, { - "epoch": 0.057690124858115775, + "epoch": 0.05761002011958401, "grad_norm": 0.0, - "learning_rate": 1.9959876999093382e-05, - "loss": 1.0329, + "learning_rate": 1.9960071348769783e-05, + "loss": 1.1188, "step": 2033 }, { - "epoch": 0.05771850170261067, + "epoch": 0.057638357561846466, "grad_norm": 0.0, - "learning_rate": 1.995979470851813e-05, - "loss": 1.1328, + "learning_rate": 1.9959989372409777e-05, + "loss": 1.1252, "step": 2034 }, { - "epoch": 0.057746878547105564, + "epoch": 0.05766669500410893, "grad_norm": 0.0, - "learning_rate": 1.9959712333812047e-05, - "loss": 0.9457, + "learning_rate": 1.9959907312153085e-05, + "loss": 1.1537, "step": 2035 }, { - "epoch": 0.05777525539160045, + "epoch": 0.05769503244637139, "grad_norm": 0.0, - "learning_rate": 1.9959629874975823e-05, - "loss": 1.0764, + "learning_rate": 1.995982516800039e-05, + "loss": 1.1602, "step": 2036 }, { - "epoch": 0.057803632236095345, + "epoch": 0.05772336988863385, "grad_norm": 0.0, - "learning_rate": 1.995954733201016e-05, - "loss": 1.009, + "learning_rate": 1.9959742939952393e-05, + "loss": 1.0836, "step": 2037 }, { - "epoch": 0.05783200908059024, + "epoch": 0.05775170733089632, "grad_norm": 0.0, - "learning_rate": 1.9959464704915753e-05, - "loss": 1.0139, + "learning_rate": 1.9959660628009782e-05, + "loss": 1.1567, "step": 2038 }, { - "epoch": 0.057860385925085134, + "epoch": 0.057780044773158776, "grad_norm": 0.0, - "learning_rate": 1.99593819936933e-05, - "loss": 1.0575, + "learning_rate": 1.995957823217325e-05, + "loss": 1.0687, "step": 2039 }, { - "epoch": 0.05788876276958002, + "epoch": 0.057808382215421235, "grad_norm": 0.0, - "learning_rate": 1.99592991983435e-05, - "loss": 1.1348, + "learning_rate": 1.995949575244349e-05, + "loss": 1.2011, "step": 2040 }, { - "epoch": 0.057917139614074915, + "epoch": 0.0578367196576837, "grad_norm": 0.0, - "learning_rate": 1.995921631886705e-05, - "loss": 1.0089, + "learning_rate": 1.9959413188821204e-05, + "loss": 1.1225, "step": 2041 }, { - "epoch": 0.05794551645856981, + "epoch": 0.05786505709994616, "grad_norm": 0.0, - "learning_rate": 1.9959133355264653e-05, - "loss": 1.1407, + "learning_rate": 1.995933054130708e-05, + "loss": 1.1398, "step": 2042 }, { - "epoch": 0.0579738933030647, + "epoch": 0.05789339454220862, "grad_norm": 0.0, - "learning_rate": 1.9959050307537006e-05, - "loss": 1.0033, + "learning_rate": 1.9959247809901817e-05, + "loss": 1.067, "step": 2043 }, { - "epoch": 0.05800227014755959, + "epoch": 0.05792173198447108, "grad_norm": 0.0, - "learning_rate": 1.9958967175684818e-05, - "loss": 1.0869, + "learning_rate": 1.995916499460611e-05, + "loss": 1.1537, "step": 2044 }, { - "epoch": 0.058030646992054485, + "epoch": 0.057950069426733544, "grad_norm": 0.0, - "learning_rate": 1.9958883959708783e-05, - "loss": 1.0931, + "learning_rate": 1.9959082095420658e-05, + "loss": 1.0695, "step": 2045 }, { - "epoch": 0.05805902383654937, + "epoch": 0.057978406868996, "grad_norm": 0.0, - "learning_rate": 1.995880065960961e-05, - "loss": 1.1234, + "learning_rate": 1.995899911234616e-05, + "loss": 1.0679, "step": 2046 }, { - "epoch": 0.05808740068104427, + "epoch": 0.05800674431125846, "grad_norm": 0.0, - "learning_rate": 1.9958717275388e-05, - "loss": 1.0978, + "learning_rate": 1.9958916045383317e-05, + "loss": 1.1238, "step": 2047 }, { - "epoch": 0.05811577752553916, + "epoch": 0.05803508175352093, "grad_norm": 0.0, - "learning_rate": 1.9958633807044657e-05, - "loss": 1.062, + "learning_rate": 1.9958832894532824e-05, + "loss": 1.076, "step": 2048 }, { - "epoch": 0.058144154370034055, + "epoch": 0.05806341919578339, "grad_norm": 0.0, - "learning_rate": 1.9958550254580284e-05, - "loss": 1.0973, + "learning_rate": 1.9958749659795382e-05, + "loss": 1.0526, "step": 2049 }, { - "epoch": 0.05817253121452894, + "epoch": 0.05809175663804585, "grad_norm": 0.0, - "learning_rate": 1.9958466617995593e-05, - "loss": 1.1095, + "learning_rate": 1.9958666341171694e-05, + "loss": 1.0263, "step": 2050 }, { - "epoch": 0.05820090805902384, + "epoch": 0.05812009408030831, "grad_norm": 0.0, - "learning_rate": 1.9958382897291287e-05, - "loss": 1.1229, + "learning_rate": 1.9958582938662464e-05, + "loss": 1.0834, "step": 2051 }, { - "epoch": 0.05822928490351873, + "epoch": 0.05814843152257077, "grad_norm": 0.0, - "learning_rate": 1.995829909246807e-05, - "loss": 1.0529, + "learning_rate": 1.995849945226839e-05, + "loss": 1.1623, "step": 2052 }, { - "epoch": 0.05825766174801362, + "epoch": 0.05817676896483323, "grad_norm": 0.0, - "learning_rate": 1.9958215203526655e-05, - "loss": 1.0524, + "learning_rate": 1.9958415881990176e-05, + "loss": 1.1835, "step": 2053 }, { - "epoch": 0.05828603859250851, + "epoch": 0.0582051064070957, "grad_norm": 0.0, - "learning_rate": 1.9958131230467747e-05, - "loss": 0.9191, + "learning_rate": 1.9958332227828525e-05, + "loss": 1.125, "step": 2054 }, { - "epoch": 0.05831441543700341, + "epoch": 0.058233443849358156, "grad_norm": 0.0, - "learning_rate": 1.9958047173292058e-05, - "loss": 1.1382, + "learning_rate": 1.995824848978415e-05, + "loss": 1.0357, "step": 2055 }, { - "epoch": 0.058342792281498294, + "epoch": 0.058261781291620615, "grad_norm": 0.0, - "learning_rate": 1.9957963032000298e-05, - "loss": 1.0986, + "learning_rate": 1.9958164667857747e-05, + "loss": 1.0747, "step": 2056 }, { - "epoch": 0.05837116912599319, + "epoch": 0.05829011873388308, "grad_norm": 0.0, - "learning_rate": 1.9957878806593176e-05, - "loss": 1.0134, + "learning_rate": 1.9958080762050023e-05, + "loss": 1.1406, "step": 2057 }, { - "epoch": 0.05839954597048808, + "epoch": 0.05831845617614554, "grad_norm": 0.0, - "learning_rate": 1.9957794497071403e-05, - "loss": 1.0594, + "learning_rate": 1.995799677236169e-05, + "loss": 1.1326, "step": 2058 }, { - "epoch": 0.05842792281498298, + "epoch": 0.058346793618408, "grad_norm": 0.0, - "learning_rate": 1.9957710103435694e-05, - "loss": 1.0177, + "learning_rate": 1.995791269879345e-05, + "loss": 1.1656, "step": 2059 }, { - "epoch": 0.058456299659477864, + "epoch": 0.058375131060670465, "grad_norm": 0.0, - "learning_rate": 1.9957625625686757e-05, - "loss": 1.1846, + "learning_rate": 1.9957828541346014e-05, + "loss": 1.0488, "step": 2060 }, { - "epoch": 0.05848467650397276, + "epoch": 0.058403468502932925, "grad_norm": 0.0, - "learning_rate": 1.9957541063825314e-05, - "loss": 1.1044, + "learning_rate": 1.995774430002009e-05, + "loss": 1.069, "step": 2061 }, { - "epoch": 0.05851305334846765, + "epoch": 0.058431805945195384, "grad_norm": 0.0, - "learning_rate": 1.995745641785207e-05, - "loss": 1.0801, + "learning_rate": 1.9957659974816393e-05, + "loss": 1.0923, "step": 2062 }, { - "epoch": 0.05854143019296254, + "epoch": 0.05846014338745785, "grad_norm": 0.0, - "learning_rate": 1.995737168776774e-05, - "loss": 1.0537, + "learning_rate": 1.995757556573562e-05, + "loss": 1.1646, "step": 2063 }, { - "epoch": 0.058569807037457434, + "epoch": 0.05848848082972031, "grad_norm": 0.0, - "learning_rate": 1.995728687357305e-05, - "loss": 1.1534, + "learning_rate": 1.99574910727785e-05, + "loss": 1.1666, "step": 2064 }, { - "epoch": 0.05859818388195233, + "epoch": 0.05851681827198277, "grad_norm": 0.0, - "learning_rate": 1.995720197526871e-05, - "loss": 1.0161, + "learning_rate": 1.9957406495945725e-05, + "loss": 1.1722, "step": 2065 }, { - "epoch": 0.058626560726447216, + "epoch": 0.058545155714245234, "grad_norm": 0.0, - "learning_rate": 1.9957116992855435e-05, - "loss": 1.0196, + "learning_rate": 1.9957321835238024e-05, + "loss": 1.1044, "step": 2066 }, { - "epoch": 0.05865493757094211, + "epoch": 0.05857349315650769, "grad_norm": 0.0, - "learning_rate": 1.9957031926333944e-05, - "loss": 1.1523, + "learning_rate": 1.9957237090656104e-05, + "loss": 1.1116, "step": 2067 }, { - "epoch": 0.058683314415437005, + "epoch": 0.05860183059877015, "grad_norm": 0.0, - "learning_rate": 1.995694677570496e-05, - "loss": 1.0767, + "learning_rate": 1.9957152262200673e-05, + "loss": 1.0358, "step": 2068 }, { - "epoch": 0.0587116912599319, + "epoch": 0.05863016804103262, "grad_norm": 0.0, - "learning_rate": 1.9956861540969198e-05, - "loss": 1.1447, + "learning_rate": 1.9957067349872457e-05, + "loss": 1.083, "step": 2069 }, { - "epoch": 0.058740068104426786, + "epoch": 0.05865850548329508, "grad_norm": 0.0, - "learning_rate": 1.9956776222127376e-05, - "loss": 0.9709, + "learning_rate": 1.9956982353672163e-05, + "loss": 1.0949, "step": 2070 }, { - "epoch": 0.05876844494892168, + "epoch": 0.058686842925557536, "grad_norm": 0.0, - "learning_rate": 1.995669081918022e-05, - "loss": 1.0032, + "learning_rate": 1.995689727360051e-05, + "loss": 1.0919, "step": 2071 }, { - "epoch": 0.058796821793416575, + "epoch": 0.05871518036782, "grad_norm": 0.0, - "learning_rate": 1.995660533212845e-05, - "loss": 1.0803, + "learning_rate": 1.995681210965821e-05, + "loss": 1.1145, "step": 2072 }, { - "epoch": 0.05882519863791146, + "epoch": 0.05874351781008246, "grad_norm": 0.0, - "learning_rate": 1.9956519760972787e-05, - "loss": 1.0918, + "learning_rate": 1.995672686184599e-05, + "loss": 1.1235, "step": 2073 }, { - "epoch": 0.058853575482406356, + "epoch": 0.05877185525234492, "grad_norm": 0.0, - "learning_rate": 1.9956434105713954e-05, - "loss": 1.0907, + "learning_rate": 1.995664153016456e-05, + "loss": 1.0961, "step": 2074 }, { - "epoch": 0.05888195232690125, + "epoch": 0.05880019269460739, "grad_norm": 0.0, - "learning_rate": 1.9956348366352675e-05, - "loss": 1.0871, + "learning_rate": 1.9956556114614638e-05, + "loss": 1.2047, "step": 2075 }, { - "epoch": 0.05891032917139614, + "epoch": 0.058828530136869846, "grad_norm": 0.0, - "learning_rate": 1.9956262542889674e-05, - "loss": 1.0328, + "learning_rate": 1.995647061519695e-05, + "loss": 1.2156, "step": 2076 }, { - "epoch": 0.05893870601589103, + "epoch": 0.058856867579132305, "grad_norm": 0.0, - "learning_rate": 1.9956176635325676e-05, - "loss": 1.0143, + "learning_rate": 1.995638503191221e-05, + "loss": 1.0861, "step": 2077 }, { - "epoch": 0.058967082860385926, + "epoch": 0.05888520502139477, "grad_norm": 0.0, - "learning_rate": 1.99560906436614e-05, - "loss": 0.9993, + "learning_rate": 1.9956299364761143e-05, + "loss": 1.1419, "step": 2078 }, { - "epoch": 0.05899545970488082, + "epoch": 0.05891354246365723, "grad_norm": 0.0, - "learning_rate": 1.9956004567897587e-05, - "loss": 1.0403, + "learning_rate": 1.995621361374447e-05, + "loss": 1.0717, "step": 2079 }, { - "epoch": 0.05902383654937571, + "epoch": 0.05894187990591969, "grad_norm": 0.0, - "learning_rate": 1.995591840803495e-05, - "loss": 0.9684, + "learning_rate": 1.9956127778862917e-05, + "loss": 1.0584, "step": 2080 }, { - "epoch": 0.0590522133938706, + "epoch": 0.058970217348182155, "grad_norm": 0.0, - "learning_rate": 1.9955832164074226e-05, - "loss": 1.08, + "learning_rate": 1.9956041860117196e-05, + "loss": 1.1074, "step": 2081 }, { - "epoch": 0.059080590238365496, + "epoch": 0.058998554790444614, "grad_norm": 0.0, - "learning_rate": 1.995574583601614e-05, - "loss": 1.0548, + "learning_rate": 1.9955955857508038e-05, + "loss": 1.0676, "step": 2082 }, { - "epoch": 0.059108967082860384, + "epoch": 0.05902689223270707, "grad_norm": 0.0, - "learning_rate": 1.9955659423861423e-05, - "loss": 0.9439, + "learning_rate": 1.9955869771036167e-05, + "loss": 1.1502, "step": 2083 }, { - "epoch": 0.05913734392735528, + "epoch": 0.05905522967496954, "grad_norm": 0.0, - "learning_rate": 1.9955572927610798e-05, - "loss": 1.0931, + "learning_rate": 1.9955783600702308e-05, + "loss": 1.1626, "step": 2084 }, { - "epoch": 0.05916572077185017, + "epoch": 0.059083567117232, "grad_norm": 0.0, - "learning_rate": 1.9955486347265006e-05, - "loss": 1.0887, + "learning_rate": 1.9955697346507187e-05, + "loss": 1.1803, "step": 2085 }, { - "epoch": 0.05919409761634506, + "epoch": 0.05911190455949446, "grad_norm": 0.0, - "learning_rate": 1.995539968282477e-05, - "loss": 1.0127, + "learning_rate": 1.995561100845153e-05, + "loss": 1.0986, "step": 2086 }, { - "epoch": 0.059222474460839954, + "epoch": 0.059140242001756924, "grad_norm": 0.0, - "learning_rate": 1.9955312934290827e-05, - "loss": 1.1313, + "learning_rate": 1.9955524586536067e-05, + "loss": 1.1199, "step": 2087 }, { - "epoch": 0.05925085130533485, + "epoch": 0.05916857944401938, "grad_norm": 0.0, - "learning_rate": 1.9955226101663906e-05, - "loss": 1.0292, + "learning_rate": 1.9955438080761525e-05, + "loss": 1.0354, "step": 2088 }, { - "epoch": 0.05927922814982974, + "epoch": 0.05919691688628184, "grad_norm": 0.0, - "learning_rate": 1.9955139184944746e-05, - "loss": 1.1541, + "learning_rate": 1.9955351491128624e-05, + "loss": 0.9964, "step": 2089 }, { - "epoch": 0.05930760499432463, + "epoch": 0.05922525432854431, "grad_norm": 0.0, - "learning_rate": 1.9955052184134077e-05, - "loss": 1.0131, + "learning_rate": 1.9955264817638105e-05, + "loss": 1.1279, "step": 2090 }, { - "epoch": 0.059335981838819524, + "epoch": 0.05925359177080677, "grad_norm": 0.0, - "learning_rate": 1.9954965099232636e-05, - "loss": 1.1861, + "learning_rate": 1.9955178060290695e-05, + "loss": 1.0486, "step": 2091 }, { - "epoch": 0.05936435868331442, + "epoch": 0.059281929213069226, "grad_norm": 0.0, - "learning_rate": 1.9954877930241152e-05, - "loss": 1.0425, + "learning_rate": 1.9955091219087125e-05, + "loss": 1.3294, "step": 2092 }, { - "epoch": 0.059392735527809305, + "epoch": 0.05931026665533169, "grad_norm": 0.0, - "learning_rate": 1.9954790677160368e-05, - "loss": 1.1117, + "learning_rate": 1.995500429402812e-05, + "loss": 1.0923, "step": 2093 }, { - "epoch": 0.0594211123723042, + "epoch": 0.05933860409759415, "grad_norm": 0.0, - "learning_rate": 1.9954703339991024e-05, - "loss": 1.0251, + "learning_rate": 1.9954917285114418e-05, + "loss": 1.2206, "step": 2094 }, { - "epoch": 0.059449489216799094, + "epoch": 0.05936694153985661, "grad_norm": 0.0, - "learning_rate": 1.995461591873385e-05, - "loss": 1.1614, + "learning_rate": 1.9954830192346752e-05, + "loss": 1.1447, "step": 2095 }, { - "epoch": 0.05947786606129398, + "epoch": 0.059395278982119076, "grad_norm": 0.0, - "learning_rate": 1.995452841338959e-05, - "loss": 1.1038, + "learning_rate": 1.9954743015725856e-05, + "loss": 1.0134, "step": 2096 }, { - "epoch": 0.059506242905788875, + "epoch": 0.059423616424381535, "grad_norm": 0.0, - "learning_rate": 1.995444082395898e-05, - "loss": 1.0569, + "learning_rate": 1.9954655755252463e-05, + "loss": 1.1626, "step": 2097 }, { - "epoch": 0.05953461975028377, + "epoch": 0.059451953866643995, "grad_norm": 0.0, - "learning_rate": 1.995435315044276e-05, - "loss": 1.1608, + "learning_rate": 1.99545684109273e-05, + "loss": 1.1118, "step": 2098 }, { - "epoch": 0.059562996594778664, + "epoch": 0.05948029130890646, "grad_norm": 0.0, - "learning_rate": 1.995426539284167e-05, - "loss": 1.0686, + "learning_rate": 1.995448098275112e-05, + "loss": 0.9279, "step": 2099 }, { - "epoch": 0.05959137343927355, + "epoch": 0.05950862875116892, "grad_norm": 0.0, - "learning_rate": 1.9954177551156454e-05, - "loss": 1.1168, + "learning_rate": 1.995439347072465e-05, + "loss": 1.0775, "step": 2100 }, { - "epoch": 0.059619750283768445, + "epoch": 0.05953696619343138, "grad_norm": 0.0, - "learning_rate": 1.9954089625387856e-05, - "loss": 1.0912, + "learning_rate": 1.995430587484862e-05, + "loss": 1.1198, "step": 2101 }, { - "epoch": 0.05964812712826334, + "epoch": 0.059565303635693845, "grad_norm": 0.0, - "learning_rate": 1.9954001615536612e-05, - "loss": 1.1555, + "learning_rate": 1.9954218195123782e-05, + "loss": 1.11, "step": 2102 }, { - "epoch": 0.05967650397275823, + "epoch": 0.059593641077956304, "grad_norm": 0.0, - "learning_rate": 1.995391352160347e-05, - "loss": 1.0819, + "learning_rate": 1.9954130431550867e-05, + "loss": 1.2081, "step": 2103 }, { - "epoch": 0.05970488081725312, + "epoch": 0.05962197852021876, "grad_norm": 0.0, - "learning_rate": 1.9953825343589175e-05, - "loss": 1.012, + "learning_rate": 1.9954042584130614e-05, + "loss": 1.002, "step": 2104 }, { - "epoch": 0.059733257661748015, + "epoch": 0.05965031596248123, "grad_norm": 0.0, - "learning_rate": 1.9953737081494466e-05, - "loss": 1.0555, + "learning_rate": 1.9953954652863763e-05, + "loss": 1.0647, "step": 2105 }, { - "epoch": 0.0597616345062429, + "epoch": 0.05967865340474369, "grad_norm": 0.0, - "learning_rate": 1.9953648735320096e-05, - "loss": 1.1178, + "learning_rate": 1.9953866637751054e-05, + "loss": 1.0983, "step": 2106 }, { - "epoch": 0.0597900113507378, + "epoch": 0.05970699084700615, "grad_norm": 0.0, - "learning_rate": 1.9953560305066806e-05, - "loss": 1.0199, + "learning_rate": 1.9953778538793235e-05, + "loss": 1.1225, "step": 2107 }, { - "epoch": 0.05981838819523269, + "epoch": 0.05973532828926861, "grad_norm": 0.0, - "learning_rate": 1.9953471790735345e-05, - "loss": 1.0753, + "learning_rate": 1.995369035599104e-05, + "loss": 1.0398, "step": 2108 }, { - "epoch": 0.059846765039727585, + "epoch": 0.05976366573153107, "grad_norm": 0.0, - "learning_rate": 1.995338319232646e-05, - "loss": 1.0781, + "learning_rate": 1.9953602089345215e-05, + "loss": 1.2196, "step": 2109 }, { - "epoch": 0.05987514188422247, + "epoch": 0.05979200317379353, "grad_norm": 0.0, - "learning_rate": 1.9953294509840896e-05, - "loss": 1.1143, + "learning_rate": 1.9953513738856506e-05, + "loss": 1.1114, "step": 2110 }, { - "epoch": 0.05990351872871737, + "epoch": 0.059820340616056, "grad_norm": 0.0, - "learning_rate": 1.995320574327941e-05, - "loss": 1.014, + "learning_rate": 1.995342530452565e-05, + "loss": 1.1993, "step": 2111 }, { - "epoch": 0.05993189557321226, + "epoch": 0.05984867805831846, "grad_norm": 0.0, - "learning_rate": 1.995311689264275e-05, - "loss": 1.11, + "learning_rate": 1.99533367863534e-05, + "loss": 1.2133, "step": 2112 }, { - "epoch": 0.05996027241770715, + "epoch": 0.059877015500580916, "grad_norm": 0.0, - "learning_rate": 1.9953027957931658e-05, - "loss": 1.0388, + "learning_rate": 1.9953248184340497e-05, + "loss": 1.1438, "step": 2113 }, { - "epoch": 0.05998864926220204, + "epoch": 0.05990535294284338, "grad_norm": 0.0, - "learning_rate": 1.9952938939146897e-05, - "loss": 1.0557, + "learning_rate": 1.995315949848769e-05, + "loss": 1.1371, "step": 2114 }, { - "epoch": 0.06001702610669694, + "epoch": 0.05993369038510584, "grad_norm": 0.0, - "learning_rate": 1.995284983628921e-05, - "loss": 1.1618, + "learning_rate": 1.995307072879572e-05, + "loss": 1.1832, "step": 2115 }, { - "epoch": 0.060045402951191824, + "epoch": 0.0599620278273683, "grad_norm": 0.0, - "learning_rate": 1.9952760649359354e-05, - "loss": 1.1568, + "learning_rate": 1.9952981875265346e-05, + "loss": 1.1483, "step": 2116 }, { - "epoch": 0.06007377979568672, + "epoch": 0.059990365269630766, "grad_norm": 0.0, - "learning_rate": 1.9952671378358085e-05, - "loss": 1.1805, + "learning_rate": 1.9952892937897304e-05, + "loss": 1.1371, "step": 2117 }, { - "epoch": 0.06010215664018161, + "epoch": 0.060018702711893225, "grad_norm": 0.0, - "learning_rate": 1.9952582023286145e-05, - "loss": 1.1037, + "learning_rate": 1.9952803916692352e-05, + "loss": 1.1276, "step": 2118 }, { - "epoch": 0.06013053348467651, + "epoch": 0.060047040154155684, "grad_norm": 0.0, - "learning_rate": 1.9952492584144302e-05, - "loss": 0.9972, + "learning_rate": 1.9952714811651234e-05, + "loss": 1.1456, "step": 2119 }, { - "epoch": 0.060158910329171394, + "epoch": 0.06007537759641815, "grad_norm": 0.0, - "learning_rate": 1.995240306093331e-05, - "loss": 1.0449, + "learning_rate": 1.9952625622774708e-05, + "loss": 1.2197, "step": 2120 }, { - "epoch": 0.06018728717366629, + "epoch": 0.06010371503868061, "grad_norm": 0.0, - "learning_rate": 1.995231345365392e-05, - "loss": 1.0334, + "learning_rate": 1.9952536350063516e-05, + "loss": 1.0703, "step": 2121 }, { - "epoch": 0.06021566401816118, + "epoch": 0.06013205248094307, "grad_norm": 0.0, - "learning_rate": 1.995222376230689e-05, - "loss": 1.1099, + "learning_rate": 1.9952446993518417e-05, + "loss": 1.1731, "step": 2122 }, { - "epoch": 0.06024404086265607, + "epoch": 0.060160389923205534, "grad_norm": 0.0, - "learning_rate": 1.9952133986892976e-05, - "loss": 1.0894, + "learning_rate": 1.995235755314016e-05, + "loss": 1.1009, "step": 2123 }, { - "epoch": 0.060272417707150965, + "epoch": 0.060188727365467994, "grad_norm": 0.0, - "learning_rate": 1.995204412741294e-05, - "loss": 1.009, + "learning_rate": 1.9952268028929497e-05, + "loss": 1.1136, "step": 2124 }, { - "epoch": 0.06030079455164586, + "epoch": 0.06021706480773045, "grad_norm": 0.0, - "learning_rate": 1.995195418386754e-05, - "loss": 1.1465, + "learning_rate": 1.995217842088719e-05, + "loss": 1.0654, "step": 2125 }, { - "epoch": 0.060329171396140746, + "epoch": 0.06024540224999292, "grad_norm": 0.0, - "learning_rate": 1.9951864156257537e-05, - "loss": 1.0267, + "learning_rate": 1.9952088729013985e-05, + "loss": 1.1465, "step": 2126 }, { - "epoch": 0.06035754824063564, + "epoch": 0.06027373969225538, "grad_norm": 0.0, - "learning_rate": 1.995177404458369e-05, - "loss": 0.9958, + "learning_rate": 1.995199895331064e-05, + "loss": 1.0484, "step": 2127 }, { - "epoch": 0.060385925085130535, + "epoch": 0.06030207713451784, "grad_norm": 0.0, - "learning_rate": 1.9951683848846764e-05, - "loss": 1.0534, + "learning_rate": 1.9951909093777917e-05, + "loss": 1.0954, "step": 2128 }, { - "epoch": 0.06041430192962543, + "epoch": 0.0603304145767803, "grad_norm": 0.0, - "learning_rate": 1.9951593569047513e-05, - "loss": 1.0239, + "learning_rate": 1.9951819150416564e-05, + "loss": 1.1063, "step": 2129 }, { - "epoch": 0.060442678774120316, + "epoch": 0.06035875201904276, "grad_norm": 0.0, - "learning_rate": 1.9951503205186703e-05, - "loss": 1.1071, + "learning_rate": 1.9951729123227346e-05, + "loss": 1.1536, "step": 2130 }, { - "epoch": 0.06047105561861521, + "epoch": 0.06038708946130522, "grad_norm": 0.0, - "learning_rate": 1.99514127572651e-05, - "loss": 1.0476, + "learning_rate": 1.9951639012211017e-05, + "loss": 1.1517, "step": 2131 }, { - "epoch": 0.060499432463110105, + "epoch": 0.06041542690356769, "grad_norm": 0.0, - "learning_rate": 1.9951322225283466e-05, - "loss": 0.9771, + "learning_rate": 1.9951548817368337e-05, + "loss": 1.2477, "step": 2132 }, { - "epoch": 0.06052780930760499, + "epoch": 0.060443764345830146, "grad_norm": 0.0, - "learning_rate": 1.9951231609242563e-05, - "loss": 1.0548, + "learning_rate": 1.9951458538700065e-05, + "loss": 1.1092, "step": 2133 }, { - "epoch": 0.060556186152099886, + "epoch": 0.060472101788092605, "grad_norm": 0.0, - "learning_rate": 1.995114090914316e-05, - "loss": 0.9857, + "learning_rate": 1.9951368176206962e-05, + "loss": 1.121, "step": 2134 }, { - "epoch": 0.06058456299659478, + "epoch": 0.06050043923035507, "grad_norm": 0.0, - "learning_rate": 1.9951050124986023e-05, - "loss": 0.9822, + "learning_rate": 1.9951277729889792e-05, + "loss": 1.0853, "step": 2135 }, { - "epoch": 0.06061293984108967, + "epoch": 0.06052877667261753, "grad_norm": 0.0, - "learning_rate": 1.995095925677192e-05, - "loss": 1.1638, + "learning_rate": 1.9951187199749313e-05, + "loss": 1.1932, "step": 2136 }, { - "epoch": 0.06064131668558456, + "epoch": 0.06055711411487999, "grad_norm": 0.0, - "learning_rate": 1.9950868304501617e-05, - "loss": 1.035, + "learning_rate": 1.9951096585786287e-05, + "loss": 1.0425, "step": 2137 }, { - "epoch": 0.060669693530079456, + "epoch": 0.060585451557142456, "grad_norm": 0.0, - "learning_rate": 1.995077726817588e-05, - "loss": 1.077, + "learning_rate": 1.995100588800148e-05, + "loss": 1.1474, "step": 2138 }, { - "epoch": 0.06069807037457435, + "epoch": 0.060613788999404915, "grad_norm": 0.0, - "learning_rate": 1.9950686147795483e-05, - "loss": 1.1177, + "learning_rate": 1.995091510639566e-05, + "loss": 1.2123, "step": 2139 }, { - "epoch": 0.06072644721906924, + "epoch": 0.060642126441667374, "grad_norm": 0.0, - "learning_rate": 1.995059494336119e-05, - "loss": 1.0376, + "learning_rate": 1.9950824240969582e-05, + "loss": 1.0498, "step": 2140 }, { - "epoch": 0.06075482406356413, + "epoch": 0.06067046388392984, "grad_norm": 0.0, - "learning_rate": 1.9950503654873775e-05, - "loss": 1.1316, + "learning_rate": 1.9950733291724018e-05, + "loss": 1.1389, "step": 2141 }, { - "epoch": 0.060783200908059026, + "epoch": 0.0606988013261923, "grad_norm": 0.0, - "learning_rate": 1.9950412282334005e-05, - "loss": 1.0558, + "learning_rate": 1.995064225865973e-05, + "loss": 1.0969, "step": 2142 }, { - "epoch": 0.060811577752553914, + "epoch": 0.06072713876845476, "grad_norm": 0.0, - "learning_rate": 1.9950320825742658e-05, - "loss": 0.9715, + "learning_rate": 1.9950551141777487e-05, + "loss": 1.0574, "step": 2143 }, { - "epoch": 0.06083995459704881, + "epoch": 0.06075547621071722, "grad_norm": 0.0, - "learning_rate": 1.9950229285100506e-05, - "loss": 1.1074, + "learning_rate": 1.995045994107806e-05, + "loss": 1.1234, "step": 2144 }, { - "epoch": 0.0608683314415437, + "epoch": 0.06078381365297968, "grad_norm": 0.0, - "learning_rate": 1.9950137660408318e-05, - "loss": 1.1356, + "learning_rate": 1.995036865656221e-05, + "loss": 1.0447, "step": 2145 }, { - "epoch": 0.06089670828603859, + "epoch": 0.06081215109524214, "grad_norm": 0.0, - "learning_rate": 1.995004595166687e-05, - "loss": 1.1609, + "learning_rate": 1.9950277288230714e-05, + "loss": 1.0986, "step": 2146 }, { - "epoch": 0.060925085130533484, + "epoch": 0.0608404885375046, "grad_norm": 0.0, - "learning_rate": 1.9949954158876938e-05, - "loss": 1.1341, + "learning_rate": 1.9950185836084338e-05, + "loss": 1.251, "step": 2147 }, { - "epoch": 0.06095346197502838, + "epoch": 0.06086882597976707, "grad_norm": 0.0, - "learning_rate": 1.994986228203929e-05, - "loss": 1.0507, + "learning_rate": 1.9950094300123845e-05, + "loss": 1.1264, "step": 2148 }, { - "epoch": 0.06098183881952327, + "epoch": 0.06089716342202953, "grad_norm": 0.0, - "learning_rate": 1.9949770321154715e-05, - "loss": 1.0827, + "learning_rate": 1.995000268035002e-05, + "loss": 1.2418, "step": 2149 }, { - "epoch": 0.06101021566401816, + "epoch": 0.060925500864291986, "grad_norm": 0.0, - "learning_rate": 1.994967827622398e-05, - "loss": 1.0328, + "learning_rate": 1.9949910976763623e-05, + "loss": 1.1882, "step": 2150 }, { - "epoch": 0.061038592508513054, + "epoch": 0.06095383830655445, "grad_norm": 0.0, - "learning_rate": 1.9949586147247867e-05, - "loss": 1.1063, + "learning_rate": 1.9949819189365432e-05, + "loss": 1.1331, "step": 2151 }, { - "epoch": 0.06106696935300795, + "epoch": 0.06098217574881691, "grad_norm": 0.0, - "learning_rate": 1.9949493934227153e-05, - "loss": 1.0448, + "learning_rate": 1.994972731815622e-05, + "loss": 1.1281, "step": 2152 }, { - "epoch": 0.061095346197502835, + "epoch": 0.06101051319107937, "grad_norm": 0.0, - "learning_rate": 1.994940163716261e-05, - "loss": 0.9604, + "learning_rate": 1.9949635363136762e-05, + "loss": 1.1967, "step": 2153 }, { - "epoch": 0.06112372304199773, + "epoch": 0.061038850633341836, "grad_norm": 0.0, - "learning_rate": 1.9949309256055028e-05, - "loss": 1.0205, + "learning_rate": 1.9949543324307828e-05, + "loss": 1.1319, "step": 2154 }, { - "epoch": 0.061152099886492624, + "epoch": 0.061067188075604295, "grad_norm": 0.0, - "learning_rate": 1.9949216790905186e-05, - "loss": 0.9872, + "learning_rate": 1.99494512016702e-05, + "loss": 1.2044, "step": 2155 }, { - "epoch": 0.06118047673098751, + "epoch": 0.061095525517866754, "grad_norm": 0.0, - "learning_rate": 1.994912424171386e-05, - "loss": 1.0592, + "learning_rate": 1.9949358995224645e-05, + "loss": 1.1752, "step": 2156 }, { - "epoch": 0.061208853575482405, + "epoch": 0.06112386296012922, "grad_norm": 0.0, - "learning_rate": 1.994903160848184e-05, - "loss": 1.2107, + "learning_rate": 1.9949266704971945e-05, + "loss": 1.1295, "step": 2157 }, { - "epoch": 0.0612372304199773, + "epoch": 0.06115220040239168, "grad_norm": 0.0, - "learning_rate": 1.994893889120989e-05, - "loss": 1.0016, + "learning_rate": 1.994917433091288e-05, + "loss": 1.1369, "step": 2158 }, { - "epoch": 0.061265607264472194, + "epoch": 0.06118053784465414, "grad_norm": 0.0, - "learning_rate": 1.9948846089898816e-05, - "loss": 1.1381, + "learning_rate": 1.9949081873048222e-05, + "loss": 1.1305, "step": 2159 }, { - "epoch": 0.06129398410896708, + "epoch": 0.061208875286916604, "grad_norm": 0.0, - "learning_rate": 1.9948753204549393e-05, - "loss": 1.0269, + "learning_rate": 1.9948989331378755e-05, + "loss": 1.2303, "step": 2160 }, { - "epoch": 0.061322360953461975, + "epoch": 0.061237212729179064, "grad_norm": 0.0, - "learning_rate": 1.99486602351624e-05, - "loss": 1.0975, + "learning_rate": 1.9948896705905255e-05, + "loss": 1.1487, "step": 2161 }, { - "epoch": 0.06135073779795687, + "epoch": 0.06126555017144152, "grad_norm": 0.0, - "learning_rate": 1.9948567181738627e-05, - "loss": 1.0718, + "learning_rate": 1.9948803996628503e-05, + "loss": 1.1034, "step": 2162 }, { - "epoch": 0.06137911464245176, + "epoch": 0.06129388761370399, "grad_norm": 0.0, - "learning_rate": 1.994847404427886e-05, - "loss": 1.0911, + "learning_rate": 1.9948711203549282e-05, + "loss": 1.1289, "step": 2163 }, { - "epoch": 0.06140749148694665, + "epoch": 0.06132222505596645, "grad_norm": 0.0, - "learning_rate": 1.9948380822783887e-05, - "loss": 1.0464, + "learning_rate": 1.9948618326668373e-05, + "loss": 1.1278, "step": 2164 }, { - "epoch": 0.061435868331441545, + "epoch": 0.06135056249822891, "grad_norm": 0.0, - "learning_rate": 1.994828751725449e-05, - "loss": 0.9786, + "learning_rate": 1.9948525365986554e-05, + "loss": 1.1317, "step": 2165 }, { - "epoch": 0.06146424517593643, + "epoch": 0.06137889994049137, "grad_norm": 0.0, - "learning_rate": 1.9948194127691467e-05, - "loss": 1.0974, + "learning_rate": 1.9948432321504617e-05, + "loss": 1.1111, "step": 2166 }, { - "epoch": 0.06149262202043133, + "epoch": 0.06140723738275383, "grad_norm": 0.0, - "learning_rate": 1.9948100654095596e-05, - "loss": 1.0373, + "learning_rate": 1.9948339193223333e-05, + "loss": 1.057, "step": 2167 }, { - "epoch": 0.06152099886492622, + "epoch": 0.06143557482501629, "grad_norm": 0.0, - "learning_rate": 1.9948007096467673e-05, - "loss": 1.0705, + "learning_rate": 1.99482459811435e-05, + "loss": 1.171, "step": 2168 }, { - "epoch": 0.061549375709421116, + "epoch": 0.06146391226727876, "grad_norm": 0.0, - "learning_rate": 1.994791345480849e-05, - "loss": 1.0131, + "learning_rate": 1.9948152685265896e-05, + "loss": 1.1791, "step": 2169 }, { - "epoch": 0.061577752553916, + "epoch": 0.061492249709541216, "grad_norm": 0.0, - "learning_rate": 1.9947819729118833e-05, - "loss": 1.1085, + "learning_rate": 1.9948059305591304e-05, + "loss": 1.1042, "step": 2170 }, { - "epoch": 0.0616061293984109, + "epoch": 0.061520587151803675, "grad_norm": 0.0, - "learning_rate": 1.9947725919399496e-05, - "loss": 0.997, + "learning_rate": 1.994796584212052e-05, + "loss": 1.0222, "step": 2171 }, { - "epoch": 0.06163450624290579, + "epoch": 0.06154892459406614, "grad_norm": 0.0, - "learning_rate": 1.9947632025651267e-05, - "loss": 1.0521, + "learning_rate": 1.994787229485432e-05, + "loss": 1.207, "step": 2172 }, { - "epoch": 0.06166288308740068, + "epoch": 0.0615772620363286, "grad_norm": 0.0, - "learning_rate": 1.9947538047874948e-05, - "loss": 1.0536, + "learning_rate": 1.9947778663793502e-05, + "loss": 1.0902, "step": 2173 }, { - "epoch": 0.06169125993189557, + "epoch": 0.06160559947859106, "grad_norm": 0.0, - "learning_rate": 1.9947443986071328e-05, - "loss": 0.9862, + "learning_rate": 1.994768494893885e-05, + "loss": 1.0511, "step": 2174 }, { - "epoch": 0.06171963677639047, + "epoch": 0.061633936920853526, "grad_norm": 0.0, - "learning_rate": 1.99473498402412e-05, - "loss": 1.2814, + "learning_rate": 1.994759115029115e-05, + "loss": 1.1349, "step": 2175 }, { - "epoch": 0.061748013620885354, + "epoch": 0.061662274363115985, "grad_norm": 0.0, - "learning_rate": 1.9947255610385357e-05, - "loss": 1.0329, + "learning_rate": 1.9947497267851198e-05, + "loss": 1.1862, "step": 2176 }, { - "epoch": 0.06177639046538025, + "epoch": 0.061690611805378444, "grad_norm": 0.0, - "learning_rate": 1.9947161296504603e-05, - "loss": 1.0471, + "learning_rate": 1.9947403301619782e-05, + "loss": 1.0393, "step": 2177 }, { - "epoch": 0.06180476730987514, + "epoch": 0.06171894924764091, "grad_norm": 0.0, - "learning_rate": 1.9947066898599728e-05, - "loss": 0.9783, + "learning_rate": 1.9947309251597695e-05, + "loss": 1.2064, "step": 2178 }, { - "epoch": 0.06183314415437004, + "epoch": 0.06174728668990337, "grad_norm": 0.0, - "learning_rate": 1.9946972416671533e-05, - "loss": 1.149, + "learning_rate": 1.9947215117785727e-05, + "loss": 1.1678, "step": 2179 }, { - "epoch": 0.061861520998864925, + "epoch": 0.06177562413216583, "grad_norm": 0.0, - "learning_rate": 1.9946877850720818e-05, - "loss": 1.0159, + "learning_rate": 1.9947120900184674e-05, + "loss": 1.0056, "step": 2180 }, { - "epoch": 0.06188989784335982, + "epoch": 0.061803961574428294, "grad_norm": 0.0, - "learning_rate": 1.9946783200748374e-05, - "loss": 1.0466, + "learning_rate": 1.9947026598795327e-05, + "loss": 1.1119, "step": 2181 }, { - "epoch": 0.06191827468785471, + "epoch": 0.06183229901669075, "grad_norm": 0.0, - "learning_rate": 1.9946688466755006e-05, - "loss": 1.1643, + "learning_rate": 1.994693221361848e-05, + "loss": 1.0211, "step": 2182 }, { - "epoch": 0.0619466515323496, + "epoch": 0.06186063645895321, "grad_norm": 0.0, - "learning_rate": 1.9946593648741515e-05, - "loss": 1.1557, + "learning_rate": 1.9946837744654933e-05, + "loss": 1.1151, "step": 2183 }, { - "epoch": 0.061975028376844495, + "epoch": 0.06188897390121568, "grad_norm": 0.0, - "learning_rate": 1.99464987467087e-05, - "loss": 1.0425, + "learning_rate": 1.9946743191905473e-05, + "loss": 1.1855, "step": 2184 }, { - "epoch": 0.06200340522133939, + "epoch": 0.06191731134347814, "grad_norm": 0.0, - "learning_rate": 1.994640376065736e-05, - "loss": 1.1028, + "learning_rate": 1.9946648555370905e-05, + "loss": 1.1223, "step": 2185 }, { - "epoch": 0.062031782065834276, + "epoch": 0.0619456487857406, "grad_norm": 0.0, - "learning_rate": 1.9946308690588304e-05, - "loss": 1.1209, + "learning_rate": 1.9946553835052023e-05, + "loss": 1.1472, "step": 2186 }, { - "epoch": 0.06206015891032917, + "epoch": 0.06197398622800306, "grad_norm": 0.0, - "learning_rate": 1.994621353650233e-05, - "loss": 1.0532, + "learning_rate": 1.9946459030949622e-05, + "loss": 1.1352, "step": 2187 }, { - "epoch": 0.062088535754824065, + "epoch": 0.06200232367026552, "grad_norm": 0.0, - "learning_rate": 1.9946118298400242e-05, - "loss": 0.9855, + "learning_rate": 1.9946364143064506e-05, + "loss": 1.0556, "step": 2188 }, { - "epoch": 0.06211691259931896, + "epoch": 0.06203066111252798, "grad_norm": 0.0, - "learning_rate": 1.994602297628285e-05, - "loss": 1.149, + "learning_rate": 1.9946269171397467e-05, + "loss": 1.2574, "step": 2189 }, { - "epoch": 0.062145289443813846, + "epoch": 0.06205899855479045, "grad_norm": 0.0, - "learning_rate": 1.994592757015095e-05, - "loss": 1.2413, + "learning_rate": 1.9946174115949315e-05, + "loss": 1.1415, "step": 2190 }, { - "epoch": 0.06217366628830874, + "epoch": 0.062087335997052906, "grad_norm": 0.0, - "learning_rate": 1.9945832080005354e-05, - "loss": 1.1023, + "learning_rate": 1.9946078976720842e-05, + "loss": 1.1939, "step": 2191 }, { - "epoch": 0.062202043132803635, + "epoch": 0.062115673439315365, "grad_norm": 0.0, - "learning_rate": 1.9945736505846866e-05, - "loss": 1.0183, + "learning_rate": 1.9945983753712853e-05, + "loss": 1.0247, "step": 2192 }, { - "epoch": 0.06223041997729852, + "epoch": 0.06214401088157783, "grad_norm": 0.0, - "learning_rate": 1.9945640847676296e-05, - "loss": 1.0215, + "learning_rate": 1.9945888446926146e-05, + "loss": 1.1019, "step": 2193 }, { - "epoch": 0.062258796821793416, + "epoch": 0.06217234832384029, "grad_norm": 0.0, - "learning_rate": 1.9945545105494454e-05, - "loss": 0.9984, + "learning_rate": 1.9945793056361527e-05, + "loss": 1.1456, "step": 2194 }, { - "epoch": 0.06228717366628831, + "epoch": 0.06220068576610275, "grad_norm": 0.0, - "learning_rate": 1.994544927930214e-05, - "loss": 1.199, + "learning_rate": 1.9945697582019807e-05, + "loss": 1.1074, "step": 2195 }, { - "epoch": 0.0623155505107832, + "epoch": 0.062229023208365215, "grad_norm": 0.0, - "learning_rate": 1.9945353369100172e-05, - "loss": 1.0598, + "learning_rate": 1.994560202390178e-05, + "loss": 1.1806, "step": 2196 }, { - "epoch": 0.06234392735527809, + "epoch": 0.062257360650627674, "grad_norm": 0.0, - "learning_rate": 1.994525737488936e-05, - "loss": 1.0331, + "learning_rate": 1.994550638200825e-05, + "loss": 1.0583, "step": 2197 }, { - "epoch": 0.062372304199772986, + "epoch": 0.062285698092890134, "grad_norm": 0.0, - "learning_rate": 1.9945161296670505e-05, - "loss": 1.0437, + "learning_rate": 1.994541065634003e-05, + "loss": 1.1735, "step": 2198 }, { - "epoch": 0.06240068104426788, + "epoch": 0.0623140355351526, "grad_norm": 0.0, - "learning_rate": 1.994506513444443e-05, - "loss": 0.9817, + "learning_rate": 1.9945314846897922e-05, + "loss": 1.1033, "step": 2199 }, { - "epoch": 0.06242905788876277, + "epoch": 0.06234237297741506, "grad_norm": 0.0, - "learning_rate": 1.994496888821194e-05, - "loss": 1.0573, + "learning_rate": 1.9945218953682736e-05, + "loss": 1.1378, "step": 2200 }, { - "epoch": 0.06245743473325766, + "epoch": 0.06237071041967752, "grad_norm": 0.0, - "learning_rate": 1.994487255797385e-05, - "loss": 1.1385, + "learning_rate": 1.9945122976695274e-05, + "loss": 1.1703, "step": 2201 }, { - "epoch": 0.062485811577752556, + "epoch": 0.062399047861939984, "grad_norm": 0.0, - "learning_rate": 1.9944776143730978e-05, - "loss": 1.1289, + "learning_rate": 1.994502691593635e-05, + "loss": 1.0831, "step": 2202 }, { - "epoch": 0.06251418842224744, + "epoch": 0.06242738530420244, "grad_norm": 0.0, - "learning_rate": 1.9944679645484136e-05, - "loss": 1.2634, + "learning_rate": 1.994493077140677e-05, + "loss": 1.1604, "step": 2203 }, { - "epoch": 0.06254256526674234, + "epoch": 0.0624557227464649, "grad_norm": 0.0, - "learning_rate": 1.9944583063234132e-05, - "loss": 1.0071, + "learning_rate": 1.9944834543107347e-05, + "loss": 1.1613, "step": 2204 }, { - "epoch": 0.06257094211123723, + "epoch": 0.06248406018872737, "grad_norm": 0.0, - "learning_rate": 1.994448639698179e-05, - "loss": 1.1777, + "learning_rate": 1.994473823103889e-05, + "loss": 1.1571, "step": 2205 }, { - "epoch": 0.06259931895573212, + "epoch": 0.06251239763098983, "grad_norm": 0.0, - "learning_rate": 1.994438964672793e-05, - "loss": 1.2089, + "learning_rate": 1.9944641835202212e-05, + "loss": 1.1514, "step": 2206 }, { - "epoch": 0.06262769580022702, + "epoch": 0.0625407350732523, "grad_norm": 0.0, - "learning_rate": 1.994429281247336e-05, - "loss": 0.9887, + "learning_rate": 1.994454535559812e-05, + "loss": 1.1915, "step": 2207 }, { - "epoch": 0.06265607264472191, + "epoch": 0.06256907251551475, "grad_norm": 0.0, - "learning_rate": 1.9944195894218897e-05, - "loss": 1.0681, + "learning_rate": 1.994444879222743e-05, + "loss": 1.0662, "step": 2208 }, { - "epoch": 0.0626844494892168, + "epoch": 0.06259740995777721, "grad_norm": 0.0, - "learning_rate": 1.994409889196537e-05, - "loss": 0.9717, + "learning_rate": 1.9944352145090954e-05, + "loss": 1.0993, "step": 2209 }, { - "epoch": 0.0627128263337117, + "epoch": 0.06262574740003968, "grad_norm": 0.0, - "learning_rate": 1.994400180571359e-05, - "loss": 1.0571, + "learning_rate": 1.9944255414189508e-05, + "loss": 0.9759, "step": 2210 }, { - "epoch": 0.06274120317820658, + "epoch": 0.06265408484230213, "grad_norm": 0.0, - "learning_rate": 1.9943904635464383e-05, - "loss": 1.0356, + "learning_rate": 1.9944158599523902e-05, + "loss": 1.1164, "step": 2211 }, { - "epoch": 0.06276958002270147, + "epoch": 0.0626824222845646, "grad_norm": 0.0, - "learning_rate": 1.994380738121856e-05, - "loss": 1.0749, + "learning_rate": 1.9944061701094962e-05, + "loss": 0.8985, "step": 2212 }, { - "epoch": 0.06279795686719637, + "epoch": 0.06271075972682706, "grad_norm": 0.0, - "learning_rate": 1.9943710042976955e-05, - "loss": 1.1425, + "learning_rate": 1.9943964718903495e-05, + "loss": 1.1307, "step": 2213 }, { - "epoch": 0.06282633371169126, + "epoch": 0.06273909716908951, "grad_norm": 0.0, - "learning_rate": 1.9943612620740383e-05, - "loss": 1.1008, + "learning_rate": 1.9943867652950323e-05, + "loss": 1.0702, "step": 2214 }, { - "epoch": 0.06285471055618615, + "epoch": 0.06276743461135198, "grad_norm": 0.0, - "learning_rate": 1.9943515114509667e-05, - "loss": 0.9453, + "learning_rate": 1.994377050323626e-05, + "loss": 1.0714, "step": 2215 }, { - "epoch": 0.06288308740068105, + "epoch": 0.06279577205361445, "grad_norm": 0.0, - "learning_rate": 1.994341752428563e-05, - "loss": 1.0688, + "learning_rate": 1.994367326976212e-05, + "loss": 1.0877, "step": 2216 }, { - "epoch": 0.06291146424517594, + "epoch": 0.0628241094958769, "grad_norm": 0.0, - "learning_rate": 1.9943319850069104e-05, - "loss": 1.0365, + "learning_rate": 1.9943575952528734e-05, + "loss": 1.0834, "step": 2217 }, { - "epoch": 0.06293984108967082, + "epoch": 0.06285244693813936, "grad_norm": 0.0, - "learning_rate": 1.9943222091860902e-05, - "loss": 1.0635, + "learning_rate": 1.9943478551536914e-05, + "loss": 1.0209, "step": 2218 }, { - "epoch": 0.06296821793416572, + "epoch": 0.06288078438040183, "grad_norm": 0.0, - "learning_rate": 1.994312424966186e-05, - "loss": 0.9835, + "learning_rate": 1.994338106678748e-05, + "loss": 1.1868, "step": 2219 }, { - "epoch": 0.06299659477866061, + "epoch": 0.06290912182266428, "grad_norm": 0.0, - "learning_rate": 1.99430263234728e-05, - "loss": 1.0384, + "learning_rate": 1.9943283498281256e-05, + "loss": 1.113, "step": 2220 }, { - "epoch": 0.0630249716231555, + "epoch": 0.06293745926492675, "grad_norm": 0.0, - "learning_rate": 1.9942928313294548e-05, - "loss": 1.0839, + "learning_rate": 1.9943185846019064e-05, + "loss": 0.9675, "step": 2221 }, { - "epoch": 0.0630533484676504, + "epoch": 0.06296579670718921, "grad_norm": 0.0, - "learning_rate": 1.9942830219127935e-05, - "loss": 0.986, + "learning_rate": 1.9943088110001722e-05, + "loss": 1.1119, "step": 2222 }, { - "epoch": 0.06308172531214529, + "epoch": 0.06299413414945167, "grad_norm": 0.0, - "learning_rate": 1.9942732040973788e-05, - "loss": 0.9996, + "learning_rate": 1.9942990290230057e-05, + "loss": 1.0558, "step": 2223 }, { - "epoch": 0.06311010215664019, + "epoch": 0.06302247159171413, "grad_norm": 0.0, - "learning_rate": 1.9942633778832936e-05, - "loss": 1.0829, + "learning_rate": 1.9942892386704896e-05, + "loss": 1.1546, "step": 2224 }, { - "epoch": 0.06313847900113508, + "epoch": 0.0630508090339766, "grad_norm": 0.0, - "learning_rate": 1.9942535432706212e-05, - "loss": 1.0043, + "learning_rate": 1.994279439942706e-05, + "loss": 1.1823, "step": 2225 }, { - "epoch": 0.06316685584562996, + "epoch": 0.06307914647623905, "grad_norm": 0.0, - "learning_rate": 1.994243700259444e-05, - "loss": 1.1107, + "learning_rate": 1.994269632839737e-05, + "loss": 1.154, "step": 2226 }, { - "epoch": 0.06319523269012486, + "epoch": 0.06310748391850152, "grad_norm": 0.0, - "learning_rate": 1.9942338488498457e-05, - "loss": 0.9619, + "learning_rate": 1.994259817361666e-05, + "loss": 1.2231, "step": 2227 }, { - "epoch": 0.06322360953461975, + "epoch": 0.06313582136076398, "grad_norm": 0.0, - "learning_rate": 1.9942239890419097e-05, - "loss": 1.0009, + "learning_rate": 1.9942499935085754e-05, + "loss": 1.2347, "step": 2228 }, { - "epoch": 0.06325198637911464, + "epoch": 0.06316415880302644, "grad_norm": 0.0, - "learning_rate": 1.9942141208357185e-05, - "loss": 1.0313, + "learning_rate": 1.9942401612805478e-05, + "loss": 1.1562, "step": 2229 }, { - "epoch": 0.06328036322360954, + "epoch": 0.0631924962452889, "grad_norm": 0.0, - "learning_rate": 1.994204244231356e-05, - "loss": 1.0413, + "learning_rate": 1.9942303206776662e-05, + "loss": 1.0302, "step": 2230 }, { - "epoch": 0.06330874006810443, + "epoch": 0.06322083368755137, "grad_norm": 0.0, - "learning_rate": 1.994194359228906e-05, - "loss": 1.0188, + "learning_rate": 1.9942204717000133e-05, + "loss": 1.1305, "step": 2231 }, { - "epoch": 0.06333711691259931, + "epoch": 0.06324917112981382, "grad_norm": 0.0, - "learning_rate": 1.9941844658284513e-05, - "loss": 1.0146, + "learning_rate": 1.9942106143476722e-05, + "loss": 1.1193, "step": 2232 }, { - "epoch": 0.06336549375709422, + "epoch": 0.06327750857207629, "grad_norm": 0.0, - "learning_rate": 1.9941745640300756e-05, - "loss": 1.1964, + "learning_rate": 1.9942007486207258e-05, + "loss": 1.0275, "step": 2233 }, { - "epoch": 0.0633938706015891, + "epoch": 0.06330584601433875, "grad_norm": 0.0, - "learning_rate": 1.994164653833863e-05, - "loss": 0.8035, + "learning_rate": 1.9941908745192575e-05, + "loss": 1.1331, "step": 2234 }, { - "epoch": 0.06342224744608399, + "epoch": 0.0633341834566012, "grad_norm": 0.0, - "learning_rate": 1.994154735239896e-05, - "loss": 1.0314, + "learning_rate": 1.9941809920433503e-05, + "loss": 1.0607, "step": 2235 }, { - "epoch": 0.06345062429057889, + "epoch": 0.06336252089886367, "grad_norm": 0.0, - "learning_rate": 1.99414480824826e-05, - "loss": 1.0891, + "learning_rate": 1.994171101193087e-05, + "loss": 1.095, "step": 2236 }, { - "epoch": 0.06347900113507378, + "epoch": 0.06339085834112614, "grad_norm": 0.0, - "learning_rate": 1.9941348728590385e-05, - "loss": 0.9944, + "learning_rate": 1.994161201968552e-05, + "loss": 0.9699, "step": 2237 }, { - "epoch": 0.06350737797956867, + "epoch": 0.06341919578338859, "grad_norm": 0.0, - "learning_rate": 1.9941249290723143e-05, - "loss": 1.1032, + "learning_rate": 1.9941512943698277e-05, + "loss": 1.1581, "step": 2238 }, { - "epoch": 0.06353575482406357, + "epoch": 0.06344753322565105, "grad_norm": 0.0, - "learning_rate": 1.9941149768881725e-05, - "loss": 1.0668, + "learning_rate": 1.994141378396998e-05, + "loss": 1.1995, "step": 2239 }, { - "epoch": 0.06356413166855845, + "epoch": 0.06347587066791352, "grad_norm": 0.0, - "learning_rate": 1.9941050163066962e-05, - "loss": 0.9964, + "learning_rate": 1.994131454050146e-05, + "loss": 1.0669, "step": 2240 }, { - "epoch": 0.06359250851305334, + "epoch": 0.06350420811017597, "grad_norm": 0.0, - "learning_rate": 1.994095047327971e-05, - "loss": 1.0717, + "learning_rate": 1.9941215213293558e-05, + "loss": 1.2537, "step": 2241 }, { - "epoch": 0.06362088535754824, + "epoch": 0.06353254555243844, "grad_norm": 0.0, - "learning_rate": 1.9940850699520798e-05, - "loss": 0.9899, + "learning_rate": 1.9941115802347106e-05, + "loss": 1.0339, "step": 2242 }, { - "epoch": 0.06364926220204313, + "epoch": 0.0635608829947009, "grad_norm": 0.0, - "learning_rate": 1.9940750841791074e-05, - "loss": 1.0942, + "learning_rate": 1.9941016307662947e-05, + "loss": 1.1796, "step": 2243 }, { - "epoch": 0.06367763904653803, + "epoch": 0.06358922043696336, "grad_norm": 0.0, - "learning_rate": 1.994065090009138e-05, - "loss": 0.9886, + "learning_rate": 1.9940916729241918e-05, + "loss": 1.1446, "step": 2244 }, { - "epoch": 0.06370601589103292, + "epoch": 0.06361755787922582, "grad_norm": 0.0, - "learning_rate": 1.994055087442256e-05, - "loss": 1.0993, + "learning_rate": 1.994081706708485e-05, + "loss": 1.0132, "step": 2245 }, { - "epoch": 0.0637343927355278, + "epoch": 0.06364589532148829, "grad_norm": 0.0, - "learning_rate": 1.9940450764785465e-05, - "loss": 1.1165, + "learning_rate": 1.9940717321192593e-05, + "loss": 1.0606, "step": 2246 }, { - "epoch": 0.06376276958002271, + "epoch": 0.06367423276375074, "grad_norm": 0.0, - "learning_rate": 1.9940350571180933e-05, - "loss": 1.0253, + "learning_rate": 1.9940617491565982e-05, + "loss": 1.1016, "step": 2247 }, { - "epoch": 0.0637911464245176, + "epoch": 0.0637025702060132, "grad_norm": 0.0, - "learning_rate": 1.994025029360981e-05, - "loss": 0.9656, + "learning_rate": 1.994051757820586e-05, + "loss": 1.2218, "step": 2248 }, { - "epoch": 0.06381952326901248, + "epoch": 0.06373090764827567, "grad_norm": 0.0, - "learning_rate": 1.9940149932072947e-05, - "loss": 1.0098, + "learning_rate": 1.9940417581113062e-05, + "loss": 1.0999, "step": 2249 }, { - "epoch": 0.06384790011350738, + "epoch": 0.06375924509053812, "grad_norm": 0.0, - "learning_rate": 1.9940049486571195e-05, - "loss": 1.0304, + "learning_rate": 1.994031750028844e-05, + "loss": 1.0579, "step": 2250 }, { - "epoch": 0.06387627695800227, + "epoch": 0.06378758253280059, "grad_norm": 0.0, - "learning_rate": 1.9939948957105394e-05, - "loss": 1.0224, + "learning_rate": 1.994021733573283e-05, + "loss": 1.1116, "step": 2251 }, { - "epoch": 0.06390465380249716, + "epoch": 0.06381591997506306, "grad_norm": 0.0, - "learning_rate": 1.9939848343676398e-05, - "loss": 1.1219, + "learning_rate": 1.994011708744708e-05, + "loss": 1.0636, "step": 2252 }, { - "epoch": 0.06393303064699206, + "epoch": 0.06384425741732551, "grad_norm": 0.0, - "learning_rate": 1.9939747646285058e-05, - "loss": 1.0747, + "learning_rate": 1.9940016755432032e-05, + "loss": 1.0879, "step": 2253 }, { - "epoch": 0.06396140749148695, + "epoch": 0.06387259485958798, "grad_norm": 0.0, - "learning_rate": 1.9939646864932218e-05, - "loss": 0.9686, + "learning_rate": 1.993991633968853e-05, + "loss": 1.0974, "step": 2254 }, { - "epoch": 0.06398978433598183, + "epoch": 0.06390093230185044, "grad_norm": 0.0, - "learning_rate": 1.9939545999618737e-05, - "loss": 0.9386, + "learning_rate": 1.9939815840217425e-05, + "loss": 1.0279, "step": 2255 }, { - "epoch": 0.06401816118047673, + "epoch": 0.0639292697441129, "grad_norm": 0.0, - "learning_rate": 1.9939445050345464e-05, - "loss": 1.0728, + "learning_rate": 1.9939715257019557e-05, + "loss": 0.9973, "step": 2256 }, { - "epoch": 0.06404653802497162, + "epoch": 0.06395760718637536, "grad_norm": 0.0, - "learning_rate": 1.9939344017113255e-05, - "loss": 1.1245, + "learning_rate": 1.993961459009578e-05, + "loss": 1.0595, "step": 2257 }, { - "epoch": 0.06407491486946651, + "epoch": 0.06398594462863783, "grad_norm": 0.0, - "learning_rate": 1.9939242899922954e-05, - "loss": 1.0152, + "learning_rate": 1.9939513839446934e-05, + "loss": 1.1546, "step": 2258 }, { - "epoch": 0.06410329171396141, + "epoch": 0.06401428207090028, "grad_norm": 0.0, - "learning_rate": 1.9939141698775424e-05, - "loss": 1.0815, + "learning_rate": 1.9939413005073873e-05, + "loss": 1.1158, "step": 2259 }, { - "epoch": 0.0641316685584563, + "epoch": 0.06404261951316274, "grad_norm": 0.0, - "learning_rate": 1.993904041367152e-05, - "loss": 0.9763, + "learning_rate": 1.9939312086977446e-05, + "loss": 1.0166, "step": 2260 }, { - "epoch": 0.06416004540295119, + "epoch": 0.06407095695542521, "grad_norm": 0.0, - "learning_rate": 1.9938939044612097e-05, - "loss": 1.1401, + "learning_rate": 1.9939211085158504e-05, + "loss": 1.0737, "step": 2261 }, { - "epoch": 0.06418842224744609, + "epoch": 0.06409929439768766, "grad_norm": 0.0, - "learning_rate": 1.9938837591598003e-05, - "loss": 0.9922, + "learning_rate": 1.9939109999617894e-05, + "loss": 1.075, "step": 2262 }, { - "epoch": 0.06421679909194097, + "epoch": 0.06412763183995013, "grad_norm": 0.0, - "learning_rate": 1.9938736054630103e-05, - "loss": 1.0805, + "learning_rate": 1.993900883035647e-05, + "loss": 1.1959, "step": 2263 }, { - "epoch": 0.06424517593643587, + "epoch": 0.0641559692822126, "grad_norm": 0.0, - "learning_rate": 1.9938634433709254e-05, - "loss": 1.0129, + "learning_rate": 1.9938907577375084e-05, + "loss": 1.062, "step": 2264 }, { - "epoch": 0.06427355278093076, + "epoch": 0.06418430672447505, "grad_norm": 0.0, - "learning_rate": 1.993853272883631e-05, - "loss": 0.929, + "learning_rate": 1.9938806240674592e-05, + "loss": 1.2193, "step": 2265 }, { - "epoch": 0.06430192962542565, + "epoch": 0.06421264416673751, "grad_norm": 0.0, - "learning_rate": 1.9938430940012138e-05, - "loss": 1.0608, + "learning_rate": 1.9938704820255837e-05, + "loss": 1.0648, "step": 2266 }, { - "epoch": 0.06433030646992055, + "epoch": 0.06424098160899998, "grad_norm": 0.0, - "learning_rate": 1.993832906723759e-05, - "loss": 1.0079, + "learning_rate": 1.993860331611969e-05, + "loss": 1.0619, "step": 2267 }, { - "epoch": 0.06435868331441544, + "epoch": 0.06426931905126243, "grad_norm": 0.0, - "learning_rate": 1.993822711051353e-05, - "loss": 1.0424, + "learning_rate": 1.993850172826699e-05, + "loss": 0.9873, "step": 2268 }, { - "epoch": 0.06438706015891033, + "epoch": 0.0642976564935249, "grad_norm": 0.0, - "learning_rate": 1.9938125069840817e-05, - "loss": 1.0535, + "learning_rate": 1.99384000566986e-05, + "loss": 1.1358, "step": 2269 }, { - "epoch": 0.06441543700340523, + "epoch": 0.06432599393578736, "grad_norm": 0.0, - "learning_rate": 1.993802294522032e-05, - "loss": 1.1017, + "learning_rate": 1.9938298301415376e-05, + "loss": 1.1101, "step": 2270 }, { - "epoch": 0.06444381384790011, + "epoch": 0.06435433137804981, "grad_norm": 0.0, - "learning_rate": 1.993792073665289e-05, - "loss": 1.1283, + "learning_rate": 1.9938196462418177e-05, + "loss": 1.0958, "step": 2271 }, { - "epoch": 0.064472190692395, + "epoch": 0.06438266882031228, "grad_norm": 0.0, - "learning_rate": 1.99378184441394e-05, - "loss": 0.9802, + "learning_rate": 1.9938094539707857e-05, + "loss": 1.058, "step": 2272 }, { - "epoch": 0.0645005675368899, + "epoch": 0.06441100626257475, "grad_norm": 0.0, - "learning_rate": 1.9937716067680712e-05, - "loss": 1.149, + "learning_rate": 1.993799253328528e-05, + "loss": 1.1458, "step": 2273 }, { - "epoch": 0.06452894438138479, + "epoch": 0.0644393437048372, "grad_norm": 0.0, - "learning_rate": 1.993761360727769e-05, - "loss": 1.1481, + "learning_rate": 1.9937890443151294e-05, + "loss": 1.088, "step": 2274 }, { - "epoch": 0.06455732122587968, + "epoch": 0.06446768114709966, "grad_norm": 0.0, - "learning_rate": 1.9937511062931197e-05, - "loss": 1.2144, + "learning_rate": 1.993778826930677e-05, + "loss": 1.1125, "step": 2275 }, { - "epoch": 0.06458569807037458, + "epoch": 0.06449601858936213, "grad_norm": 0.0, - "learning_rate": 1.99374084346421e-05, - "loss": 1.0313, + "learning_rate": 1.9937686011752567e-05, + "loss": 1.1838, "step": 2276 }, { - "epoch": 0.06461407491486947, + "epoch": 0.06452435603162458, "grad_norm": 0.0, - "learning_rate": 1.993730572241127e-05, - "loss": 1.0728, + "learning_rate": 1.9937583670489547e-05, + "loss": 1.0788, "step": 2277 }, { - "epoch": 0.06464245175936435, + "epoch": 0.06455269347388705, "grad_norm": 0.0, - "learning_rate": 1.993720292623957e-05, - "loss": 1.1033, + "learning_rate": 1.9937481245518563e-05, + "loss": 1.0874, "step": 2278 }, { - "epoch": 0.06467082860385925, + "epoch": 0.06458103091614951, "grad_norm": 0.0, - "learning_rate": 1.9937100046127872e-05, - "loss": 0.957, + "learning_rate": 1.9937378736840486e-05, + "loss": 1.1125, "step": 2279 }, { - "epoch": 0.06469920544835414, + "epoch": 0.06460936835841197, "grad_norm": 0.0, - "learning_rate": 1.9936997082077043e-05, - "loss": 1.1226, + "learning_rate": 1.993727614445618e-05, + "loss": 1.0747, "step": 2280 }, { - "epoch": 0.06472758229284903, + "epoch": 0.06463770580067443, "grad_norm": 0.0, - "learning_rate": 1.9936894034087952e-05, - "loss": 1.1047, + "learning_rate": 1.9937173468366508e-05, + "loss": 1.1601, "step": 2281 }, { - "epoch": 0.06475595913734393, + "epoch": 0.0646660432429369, "grad_norm": 0.0, - "learning_rate": 1.993679090216147e-05, - "loss": 0.9836, + "learning_rate": 1.993707070857233e-05, + "loss": 1.1155, "step": 2282 }, { - "epoch": 0.06478433598183882, + "epoch": 0.06469438068519935, "grad_norm": 0.0, - "learning_rate": 1.993668768629847e-05, - "loss": 1.1187, + "learning_rate": 1.9936967865074517e-05, + "loss": 1.0369, "step": 2283 }, { - "epoch": 0.06481271282633372, + "epoch": 0.06472271812746182, "grad_norm": 0.0, - "learning_rate": 1.9936584386499824e-05, - "loss": 1.076, + "learning_rate": 1.9936864937873935e-05, + "loss": 1.1487, "step": 2284 }, { - "epoch": 0.0648410896708286, + "epoch": 0.06475105556972428, "grad_norm": 0.0, - "learning_rate": 1.99364810027664e-05, - "loss": 1.0547, + "learning_rate": 1.993676192697145e-05, + "loss": 1.1892, "step": 2285 }, { - "epoch": 0.06486946651532349, + "epoch": 0.06477939301198674, "grad_norm": 0.0, - "learning_rate": 1.9936377535099073e-05, - "loss": 1.1398, + "learning_rate": 1.9936658832367927e-05, + "loss": 1.131, "step": 2286 }, { - "epoch": 0.0648978433598184, + "epoch": 0.0648077304542492, "grad_norm": 0.0, - "learning_rate": 1.993627398349872e-05, - "loss": 1.0319, + "learning_rate": 1.9936555654064237e-05, + "loss": 1.1184, "step": 2287 }, { - "epoch": 0.06492622020431328, + "epoch": 0.06483606789651167, "grad_norm": 0.0, - "learning_rate": 1.9936170347966215e-05, - "loss": 1.0098, + "learning_rate": 1.9936452392061248e-05, + "loss": 1.0859, "step": 2288 }, { - "epoch": 0.06495459704880817, + "epoch": 0.06486440533877412, "grad_norm": 0.0, - "learning_rate": 1.9936066628502432e-05, - "loss": 1.0332, + "learning_rate": 1.9936349046359833e-05, + "loss": 1.1078, "step": 2289 }, { - "epoch": 0.06498297389330307, + "epoch": 0.06489274278103659, "grad_norm": 0.0, - "learning_rate": 1.9935962825108248e-05, - "loss": 1.058, + "learning_rate": 1.993624561696086e-05, + "loss": 1.1253, "step": 2290 }, { - "epoch": 0.06501135073779796, + "epoch": 0.06492108022329905, "grad_norm": 0.0, - "learning_rate": 1.9935858937784537e-05, - "loss": 1.0574, + "learning_rate": 1.9936142103865198e-05, + "loss": 1.1153, "step": 2291 }, { - "epoch": 0.06503972758229284, + "epoch": 0.0649494176655615, "grad_norm": 0.0, - "learning_rate": 1.993575496653218e-05, - "loss": 1.0221, + "learning_rate": 1.9936038507073723e-05, + "loss": 1.2475, "step": 2292 }, { - "epoch": 0.06506810442678775, + "epoch": 0.06497775510782397, "grad_norm": 0.0, - "learning_rate": 1.9935650911352055e-05, + "learning_rate": 1.9935934826587306e-05, "loss": 1.0027, "step": 2293 }, { - "epoch": 0.06509648127128263, + "epoch": 0.06500609255008642, "grad_norm": 0.0, - "learning_rate": 1.9935546772245042e-05, - "loss": 1.1157, + "learning_rate": 1.993583106240682e-05, + "loss": 1.1178, "step": 2294 }, { - "epoch": 0.06512485811577752, + "epoch": 0.06503442999234889, "grad_norm": 0.0, - "learning_rate": 1.9935442549212016e-05, - "loss": 1.0892, + "learning_rate": 1.993572721453314e-05, + "loss": 1.0519, "step": 2295 }, { - "epoch": 0.06515323496027242, + "epoch": 0.06506276743461135, "grad_norm": 0.0, - "learning_rate": 1.993533824225386e-05, - "loss": 1.0455, + "learning_rate": 1.993562328296714e-05, + "loss": 1.0649, "step": 2296 }, { - "epoch": 0.06518161180476731, + "epoch": 0.0650911048768738, "grad_norm": 0.0, - "learning_rate": 1.9935233851371456e-05, - "loss": 1.0944, + "learning_rate": 1.9935519267709694e-05, + "loss": 1.1605, "step": 2297 }, { - "epoch": 0.0652099886492622, + "epoch": 0.06511944231913627, "grad_norm": 0.0, - "learning_rate": 1.9935129376565686e-05, - "loss": 1.0828, + "learning_rate": 1.9935415168761682e-05, + "loss": 1.1432, "step": 2298 }, { - "epoch": 0.0652383654937571, + "epoch": 0.06514777976139874, "grad_norm": 0.0, - "learning_rate": 1.9935024817837433e-05, - "loss": 1.1168, + "learning_rate": 1.993531098612398e-05, + "loss": 1.1731, "step": 2299 }, { - "epoch": 0.06526674233825198, + "epoch": 0.06517611720366119, "grad_norm": 0.0, - "learning_rate": 1.993492017518757e-05, - "loss": 1.05, + "learning_rate": 1.993520671979746e-05, + "loss": 1.0399, "step": 2300 }, { - "epoch": 0.06529511918274687, + "epoch": 0.06520445464592366, "grad_norm": 0.0, - "learning_rate": 1.9934815448617e-05, - "loss": 1.1275, + "learning_rate": 1.9935102369783008e-05, + "loss": 1.0989, "step": 2301 }, { - "epoch": 0.06532349602724177, + "epoch": 0.06523279208818612, "grad_norm": 0.0, - "learning_rate": 1.9934710638126594e-05, - "loss": 1.0964, + "learning_rate": 1.9934997936081497e-05, + "loss": 1.0885, "step": 2302 }, { - "epoch": 0.06535187287173666, + "epoch": 0.06526112953044858, "grad_norm": 0.0, - "learning_rate": 1.993460574371724e-05, - "loss": 1.0863, + "learning_rate": 1.993489341869381e-05, + "loss": 0.9802, "step": 2303 }, { - "epoch": 0.06538024971623156, + "epoch": 0.06528946697271104, "grad_norm": 0.0, - "learning_rate": 1.9934500765389824e-05, - "loss": 1.0931, + "learning_rate": 1.9934788817620827e-05, + "loss": 1.0713, "step": 2304 }, { - "epoch": 0.06540862656072645, + "epoch": 0.06531780441497351, "grad_norm": 0.0, - "learning_rate": 1.9934395703145234e-05, - "loss": 1.1202, + "learning_rate": 1.9934684132863427e-05, + "loss": 1.1595, "step": 2305 }, { - "epoch": 0.06543700340522134, + "epoch": 0.06534614185723596, "grad_norm": 0.0, - "learning_rate": 1.9934290556984356e-05, - "loss": 1.01, + "learning_rate": 1.9934579364422495e-05, + "loss": 1.004, "step": 2306 }, { - "epoch": 0.06546538024971624, + "epoch": 0.06537447929949843, "grad_norm": 0.0, - "learning_rate": 1.9934185326908082e-05, - "loss": 1.0324, + "learning_rate": 1.9934474512298912e-05, + "loss": 1.1614, "step": 2307 }, { - "epoch": 0.06549375709421112, + "epoch": 0.06540281674176089, "grad_norm": 0.0, - "learning_rate": 1.9934080012917295e-05, - "loss": 1.1113, + "learning_rate": 1.993436957649356e-05, + "loss": 1.219, "step": 2308 }, { - "epoch": 0.06552213393870601, + "epoch": 0.06543115418402334, "grad_norm": 0.0, - "learning_rate": 1.9933974615012888e-05, - "loss": 1.0459, + "learning_rate": 1.9934264557007323e-05, + "loss": 1.0677, "step": 2309 }, { - "epoch": 0.06555051078320091, + "epoch": 0.06545949162628581, "grad_norm": 0.0, - "learning_rate": 1.9933869133195752e-05, - "loss": 1.0397, + "learning_rate": 1.993415945384109e-05, + "loss": 1.0383, "step": 2310 }, { - "epoch": 0.0655788876276958, + "epoch": 0.06548782906854828, "grad_norm": 0.0, - "learning_rate": 1.9933763567466776e-05, - "loss": 0.9309, + "learning_rate": 1.9934054266995742e-05, + "loss": 1.0, "step": 2311 }, { - "epoch": 0.06560726447219069, + "epoch": 0.06551616651081073, "grad_norm": 0.0, - "learning_rate": 1.993365791782685e-05, - "loss": 1.063, + "learning_rate": 1.9933948996472162e-05, + "loss": 1.0974, "step": 2312 }, { - "epoch": 0.06563564131668559, + "epoch": 0.0655445039530732, "grad_norm": 0.0, - "learning_rate": 1.9933552184276873e-05, - "loss": 0.9634, + "learning_rate": 1.9933843642271243e-05, + "loss": 1.0512, "step": 2313 }, { - "epoch": 0.06566401816118048, + "epoch": 0.06557284139533566, "grad_norm": 0.0, - "learning_rate": 1.9933446366817735e-05, - "loss": 1.1072, + "learning_rate": 1.9933738204393874e-05, + "loss": 1.2226, "step": 2314 }, { - "epoch": 0.06569239500567536, + "epoch": 0.06560117883759811, "grad_norm": 0.0, - "learning_rate": 1.9933340465450328e-05, - "loss": 1.0513, + "learning_rate": 1.993363268284094e-05, + "loss": 0.9012, "step": 2315 }, { - "epoch": 0.06572077185017026, + "epoch": 0.06562951627986058, "grad_norm": 0.0, - "learning_rate": 1.9933234480175545e-05, - "loss": 1.0477, + "learning_rate": 1.9933527077613323e-05, + "loss": 1.0672, "step": 2316 }, { - "epoch": 0.06574914869466515, + "epoch": 0.06565785372212304, "grad_norm": 0.0, - "learning_rate": 1.9933128410994288e-05, - "loss": 1.0329, + "learning_rate": 1.993342138871192e-05, + "loss": 1.0753, "step": 2317 }, { - "epoch": 0.06577752553916004, + "epoch": 0.0656861911643855, "grad_norm": 0.0, - "learning_rate": 1.9933022257907445e-05, - "loss": 1.062, + "learning_rate": 1.993331561613762e-05, + "loss": 1.1455, "step": 2318 }, { - "epoch": 0.06580590238365494, + "epoch": 0.06571452860664796, "grad_norm": 0.0, - "learning_rate": 1.9932916020915917e-05, - "loss": 1.0421, + "learning_rate": 1.9933209759891318e-05, + "loss": 0.9811, "step": 2319 }, { - "epoch": 0.06583427922814983, + "epoch": 0.06574286604891043, "grad_norm": 0.0, - "learning_rate": 1.9932809700020602e-05, - "loss": 1.0038, + "learning_rate": 1.9933103819973896e-05, + "loss": 1.0836, "step": 2320 }, { - "epoch": 0.06586265607264472, + "epoch": 0.06577120349117288, "grad_norm": 0.0, - "learning_rate": 1.9932703295222398e-05, - "loss": 1.0353, + "learning_rate": 1.9932997796386254e-05, + "loss": 1.2329, "step": 2321 }, { - "epoch": 0.06589103291713962, + "epoch": 0.06579954093343535, "grad_norm": 0.0, - "learning_rate": 1.99325968065222e-05, - "loss": 1.081, + "learning_rate": 1.9932891689129284e-05, + "loss": 1.0504, "step": 2322 }, { - "epoch": 0.0659194097616345, + "epoch": 0.06582787837569781, "grad_norm": 0.0, - "learning_rate": 1.993249023392091e-05, - "loss": 0.9538, + "learning_rate": 1.993278549820388e-05, + "loss": 1.1073, "step": 2323 }, { - "epoch": 0.0659477866061294, + "epoch": 0.06585621581796026, "grad_norm": 0.0, - "learning_rate": 1.9932383577419432e-05, - "loss": 1.1124, + "learning_rate": 1.9932679223610934e-05, + "loss": 1.08, "step": 2324 }, { - "epoch": 0.06597616345062429, + "epoch": 0.06588455326022273, "grad_norm": 0.0, - "learning_rate": 1.993227683701866e-05, - "loss": 1.1124, + "learning_rate": 1.9932572865351342e-05, + "loss": 1.1455, "step": 2325 }, { - "epoch": 0.06600454029511918, + "epoch": 0.0659128907024852, "grad_norm": 0.0, - "learning_rate": 1.9932170012719504e-05, - "loss": 1.0739, + "learning_rate": 1.9932466423425997e-05, + "loss": 1.0344, "step": 2326 }, { - "epoch": 0.06603291713961408, + "epoch": 0.06594122814474765, "grad_norm": 0.0, - "learning_rate": 1.9932063104522857e-05, - "loss": 1.0601, + "learning_rate": 1.9932359897835805e-05, + "loss": 1.029, "step": 2327 }, { - "epoch": 0.06606129398410897, + "epoch": 0.06596956558701011, "grad_norm": 0.0, - "learning_rate": 1.9931956112429625e-05, - "loss": 1.0764, + "learning_rate": 1.9932253288581656e-05, + "loss": 1.0478, "step": 2328 }, { - "epoch": 0.06608967082860386, + "epoch": 0.06599790302927258, "grad_norm": 0.0, - "learning_rate": 1.9931849036440715e-05, - "loss": 1.0587, + "learning_rate": 1.993214659566445e-05, + "loss": 1.1882, "step": 2329 }, { - "epoch": 0.06611804767309876, + "epoch": 0.06602624047153503, "grad_norm": 0.0, - "learning_rate": 1.9931741876557033e-05, - "loss": 1.0231, + "learning_rate": 1.993203981908508e-05, + "loss": 1.1348, "step": 2330 }, { - "epoch": 0.06614642451759364, + "epoch": 0.0660545779137975, "grad_norm": 0.0, - "learning_rate": 1.9931634632779477e-05, - "loss": 1.1528, + "learning_rate": 1.9931932958844453e-05, + "loss": 0.9928, "step": 2331 }, { - "epoch": 0.06617480136208853, + "epoch": 0.06608291535605997, "grad_norm": 0.0, - "learning_rate": 1.9931527305108956e-05, - "loss": 1.0458, + "learning_rate": 1.993182601494347e-05, + "loss": 1.1611, "step": 2332 }, { - "epoch": 0.06620317820658343, + "epoch": 0.06611125279832242, "grad_norm": 0.0, - "learning_rate": 1.993141989354638e-05, - "loss": 0.9999, + "learning_rate": 1.9931718987383024e-05, + "loss": 0.9771, "step": 2333 }, { - "epoch": 0.06623155505107832, + "epoch": 0.06613959024058488, "grad_norm": 0.0, - "learning_rate": 1.9931312398092654e-05, - "loss": 1.1028, + "learning_rate": 1.9931611876164024e-05, + "loss": 1.0283, "step": 2334 }, { - "epoch": 0.06625993189557321, + "epoch": 0.06616792768284735, "grad_norm": 0.0, - "learning_rate": 1.9931204818748685e-05, - "loss": 1.0627, + "learning_rate": 1.9931504681287364e-05, + "loss": 1.1536, "step": 2335 }, { - "epoch": 0.06628830874006811, + "epoch": 0.0661962651251098, "grad_norm": 0.0, - "learning_rate": 1.9931097155515383e-05, - "loss": 1.0614, + "learning_rate": 1.9931397402753957e-05, + "loss": 1.1496, "step": 2336 }, { - "epoch": 0.066316685584563, + "epoch": 0.06622460256737227, "grad_norm": 0.0, - "learning_rate": 1.993098940839366e-05, - "loss": 0.9848, + "learning_rate": 1.9931290040564702e-05, + "loss": 1.0712, "step": 2337 }, { - "epoch": 0.06634506242905788, + "epoch": 0.06625294000963473, "grad_norm": 0.0, - "learning_rate": 1.9930881577384417e-05, - "loss": 1.0674, + "learning_rate": 1.99311825947205e-05, + "loss": 1.1574, "step": 2338 }, { - "epoch": 0.06637343927355278, + "epoch": 0.06628127745189719, "grad_norm": 0.0, - "learning_rate": 1.9930773662488573e-05, - "loss": 1.0789, + "learning_rate": 1.993107506522226e-05, + "loss": 1.1206, "step": 2339 }, { - "epoch": 0.06640181611804767, + "epoch": 0.06630961489415965, "grad_norm": 0.0, - "learning_rate": 1.9930665663707035e-05, - "loss": 0.9802, + "learning_rate": 1.993096745207089e-05, + "loss": 1.1196, "step": 2340 }, { - "epoch": 0.06643019296254256, + "epoch": 0.06633795233642212, "grad_norm": 0.0, - "learning_rate": 1.993055758104072e-05, - "loss": 1.1653, + "learning_rate": 1.993085975526729e-05, + "loss": 1.149, "step": 2341 }, { - "epoch": 0.06645856980703746, + "epoch": 0.06636628977868457, "grad_norm": 0.0, - "learning_rate": 1.9930449414490544e-05, - "loss": 1.1218, + "learning_rate": 1.993075197481237e-05, + "loss": 1.009, "step": 2342 }, { - "epoch": 0.06648694665153235, + "epoch": 0.06639462722094704, "grad_norm": 0.0, - "learning_rate": 1.993034116405741e-05, - "loss": 1.1315, + "learning_rate": 1.9930644110707042e-05, + "loss": 1.1116, "step": 2343 }, { - "epoch": 0.06651532349602725, + "epoch": 0.0664229646632095, "grad_norm": 0.0, - "learning_rate": 1.993023282974224e-05, - "loss": 1.0801, + "learning_rate": 1.993053616295221e-05, + "loss": 1.1918, "step": 2344 }, { - "epoch": 0.06654370034052214, + "epoch": 0.06645130210547195, "grad_norm": 0.0, - "learning_rate": 1.9930124411545943e-05, - "loss": 1.0185, + "learning_rate": 1.9930428131548782e-05, + "loss": 1.1996, "step": 2345 }, { - "epoch": 0.06657207718501702, + "epoch": 0.06647963954773442, "grad_norm": 0.0, - "learning_rate": 1.993001590946944e-05, - "loss": 1.0932, + "learning_rate": 1.993032001649767e-05, + "loss": 1.2266, "step": 2346 }, { - "epoch": 0.06660045402951192, + "epoch": 0.06650797698999689, "grad_norm": 0.0, - "learning_rate": 1.992990732351365e-05, - "loss": 1.0713, + "learning_rate": 1.9930211817799788e-05, + "loss": 1.1833, "step": 2347 }, { - "epoch": 0.06662883087400681, + "epoch": 0.06653631443225934, "grad_norm": 0.0, - "learning_rate": 1.992979865367948e-05, - "loss": 1.1374, + "learning_rate": 1.9930103535456044e-05, + "loss": 1.2622, "step": 2348 }, { - "epoch": 0.0666572077185017, + "epoch": 0.0665646518745218, "grad_norm": 0.0, - "learning_rate": 1.9929689899967858e-05, - "loss": 1.0532, + "learning_rate": 1.9929995169467346e-05, + "loss": 1.0689, "step": 2349 }, { - "epoch": 0.0666855845629966, + "epoch": 0.06659298931678427, "grad_norm": 0.0, - "learning_rate": 1.99295810623797e-05, - "loss": 1.0518, + "learning_rate": 1.9929886719834615e-05, + "loss": 1.0932, "step": 2350 }, { - "epoch": 0.06671396140749149, + "epoch": 0.06662132675904672, "grad_norm": 0.0, - "learning_rate": 1.992947214091592e-05, - "loss": 0.9565, + "learning_rate": 1.9929778186558763e-05, + "loss": 1.0738, "step": 2351 }, { - "epoch": 0.06674233825198637, + "epoch": 0.06664966420130919, "grad_norm": 0.0, - "learning_rate": 1.992936313557745e-05, - "loss": 1.0167, + "learning_rate": 1.9929669569640697e-05, + "loss": 1.1044, "step": 2352 }, { - "epoch": 0.06677071509648128, + "epoch": 0.06667800164357165, "grad_norm": 0.0, - "learning_rate": 1.9929254046365198e-05, - "loss": 1.1486, + "learning_rate": 1.992956086908134e-05, + "loss": 1.1367, "step": 2353 }, { - "epoch": 0.06679909194097616, + "epoch": 0.06670633908583411, "grad_norm": 0.0, - "learning_rate": 1.9929144873280092e-05, - "loss": 0.9929, + "learning_rate": 1.9929452084881604e-05, + "loss": 1.0648, "step": 2354 }, { - "epoch": 0.06682746878547105, + "epoch": 0.06673467652809657, "grad_norm": 0.0, - "learning_rate": 1.9929035616323054e-05, - "loss": 1.0189, + "learning_rate": 1.9929343217042404e-05, + "loss": 1.1247, "step": 2355 }, { - "epoch": 0.06685584562996595, + "epoch": 0.06676301397035904, "grad_norm": 0.0, - "learning_rate": 1.9928926275495006e-05, - "loss": 1.1032, + "learning_rate": 1.992923426556466e-05, + "loss": 1.0271, "step": 2356 }, { - "epoch": 0.06688422247446084, + "epoch": 0.06679135141262149, "grad_norm": 0.0, - "learning_rate": 1.9928816850796865e-05, - "loss": 1.0807, + "learning_rate": 1.992912523044929e-05, + "loss": 1.0633, "step": 2357 }, { - "epoch": 0.06691259931895573, + "epoch": 0.06681968885488396, "grad_norm": 0.0, - "learning_rate": 1.9928707342229568e-05, - "loss": 1.0085, + "learning_rate": 1.992901611169721e-05, + "loss": 1.1111, "step": 2358 }, { - "epoch": 0.06694097616345063, + "epoch": 0.06684802629714642, "grad_norm": 0.0, - "learning_rate": 1.9928597749794035e-05, - "loss": 1.1438, + "learning_rate": 1.9928906909309342e-05, + "loss": 1.076, "step": 2359 }, { - "epoch": 0.06696935300794551, + "epoch": 0.06687636373940888, "grad_norm": 0.0, - "learning_rate": 1.992848807349119e-05, - "loss": 1.1235, + "learning_rate": 1.9928797623286602e-05, + "loss": 1.0611, "step": 2360 }, { - "epoch": 0.0669977298524404, + "epoch": 0.06690470118167134, "grad_norm": 0.0, - "learning_rate": 1.9928378313321953e-05, - "loss": 1.0916, + "learning_rate": 1.9928688253629916e-05, + "loss": 1.1221, "step": 2361 }, { - "epoch": 0.0670261066969353, + "epoch": 0.06693303862393381, "grad_norm": 0.0, - "learning_rate": 1.9928268469287264e-05, - "loss": 0.962, + "learning_rate": 1.99285788003402e-05, + "loss": 1.1129, "step": 2362 }, { - "epoch": 0.06705448354143019, + "epoch": 0.06696137606619626, "grad_norm": 0.0, - "learning_rate": 1.9928158541388046e-05, - "loss": 1.0493, + "learning_rate": 1.9928469263418376e-05, + "loss": 1.1818, "step": 2363 }, { - "epoch": 0.06708286038592509, + "epoch": 0.06698971350845873, "grad_norm": 0.0, - "learning_rate": 1.992804852962522e-05, - "loss": 1.0789, + "learning_rate": 1.992835964286537e-05, + "loss": 1.1475, "step": 2364 }, { - "epoch": 0.06711123723041998, + "epoch": 0.06701805095072119, "grad_norm": 0.0, - "learning_rate": 1.992793843399973e-05, - "loss": 1.1963, + "learning_rate": 1.9928249938682103e-05, + "loss": 1.0209, "step": 2365 }, { - "epoch": 0.06713961407491487, + "epoch": 0.06704638839298364, "grad_norm": 0.0, - "learning_rate": 1.9927828254512495e-05, - "loss": 0.9551, + "learning_rate": 1.99281401508695e-05, + "loss": 1.0138, "step": 2366 }, { - "epoch": 0.06716799091940977, + "epoch": 0.06707472583524611, "grad_norm": 0.0, - "learning_rate": 1.9927717991164445e-05, - "loss": 1.0491, + "learning_rate": 1.9928030279428487e-05, + "loss": 1.164, "step": 2367 }, { - "epoch": 0.06719636776390465, + "epoch": 0.06710306327750858, "grad_norm": 0.0, - "learning_rate": 1.9927607643956518e-05, - "loss": 1.0743, + "learning_rate": 1.9927920324359985e-05, + "loss": 1.1115, "step": 2368 }, { - "epoch": 0.06722474460839954, + "epoch": 0.06713140071977103, "grad_norm": 0.0, - "learning_rate": 1.9927497212889643e-05, - "loss": 1.092, + "learning_rate": 1.9927810285664928e-05, + "loss": 1.186, "step": 2369 }, { - "epoch": 0.06725312145289444, + "epoch": 0.0671597381620335, "grad_norm": 0.0, - "learning_rate": 1.992738669796475e-05, - "loss": 1.0544, + "learning_rate": 1.9927700163344238e-05, + "loss": 1.0709, "step": 2370 }, { - "epoch": 0.06728149829738933, + "epoch": 0.06718807560429596, "grad_norm": 0.0, - "learning_rate": 1.9927276099182777e-05, - "loss": 1.0327, + "learning_rate": 1.992758995739884e-05, + "loss": 1.0367, "step": 2371 }, { - "epoch": 0.06730987514188422, + "epoch": 0.06721641304655841, "grad_norm": 0.0, - "learning_rate": 1.9927165416544656e-05, - "loss": 1.0555, + "learning_rate": 1.9927479667829667e-05, + "loss": 1.1045, "step": 2372 }, { - "epoch": 0.06733825198637912, + "epoch": 0.06724475048882088, "grad_norm": 0.0, - "learning_rate": 1.9927054650051324e-05, - "loss": 1.0949, + "learning_rate": 1.9927369294637646e-05, + "loss": 1.0785, "step": 2373 }, { - "epoch": 0.067366628830874, + "epoch": 0.06727308793108334, "grad_norm": 0.0, - "learning_rate": 1.9926943799703716e-05, - "loss": 1.1088, + "learning_rate": 1.9927258837823707e-05, + "loss": 1.1686, "step": 2374 }, { - "epoch": 0.0673950056753689, + "epoch": 0.0673014253733458, "grad_norm": 0.0, - "learning_rate": 1.9926832865502766e-05, - "loss": 1.0378, + "learning_rate": 1.992714829738878e-05, + "loss": 1.1664, "step": 2375 }, { - "epoch": 0.0674233825198638, + "epoch": 0.06732976281560826, "grad_norm": 0.0, - "learning_rate": 1.992672184744941e-05, - "loss": 1.0398, + "learning_rate": 1.9927037673333797e-05, + "loss": 1.1123, "step": 2376 }, { - "epoch": 0.06745175936435868, + "epoch": 0.06735810025787073, "grad_norm": 0.0, - "learning_rate": 1.992661074554459e-05, - "loss": 1.0589, + "learning_rate": 1.992692696565969e-05, + "loss": 1.0549, "step": 2377 }, { - "epoch": 0.06748013620885357, + "epoch": 0.06738643770013318, "grad_norm": 0.0, - "learning_rate": 1.9926499559789245e-05, - "loss": 1.1829, + "learning_rate": 1.9926816174367388e-05, + "loss": 1.1912, "step": 2378 }, { - "epoch": 0.06750851305334847, + "epoch": 0.06741477514239565, "grad_norm": 0.0, - "learning_rate": 1.992638829018431e-05, - "loss": 0.9743, + "learning_rate": 1.992670529945783e-05, + "loss": 1.1256, "step": 2379 }, { - "epoch": 0.06753688989784336, + "epoch": 0.06744311258465811, "grad_norm": 0.0, - "learning_rate": 1.9926276936730728e-05, - "loss": 0.9507, + "learning_rate": 1.9926594340931947e-05, + "loss": 1.0616, "step": 2380 }, { - "epoch": 0.06756526674233825, + "epoch": 0.06747145002692057, "grad_norm": 0.0, - "learning_rate": 1.9926165499429437e-05, - "loss": 1.0154, + "learning_rate": 1.9926483298790672e-05, + "loss": 1.1398, "step": 2381 }, { - "epoch": 0.06759364358683315, + "epoch": 0.06749978746918303, "grad_norm": 0.0, - "learning_rate": 1.9926053978281384e-05, - "loss": 0.99, + "learning_rate": 1.9926372173034946e-05, + "loss": 1.0969, "step": 2382 }, { - "epoch": 0.06762202043132803, + "epoch": 0.0675281249114455, "grad_norm": 0.0, - "learning_rate": 1.99259423732875e-05, - "loss": 1.0809, + "learning_rate": 1.9926260963665694e-05, + "loss": 1.0591, "step": 2383 }, { - "epoch": 0.06765039727582293, + "epoch": 0.06755646235370795, "grad_norm": 0.0, - "learning_rate": 1.992583068444874e-05, - "loss": 1.042, + "learning_rate": 1.992614967068387e-05, + "loss": 1.0712, "step": 2384 }, { - "epoch": 0.06767877412031782, + "epoch": 0.06758479979597042, "grad_norm": 0.0, - "learning_rate": 1.992571891176604e-05, - "loss": 1.0907, + "learning_rate": 1.9926038294090394e-05, + "loss": 1.1298, "step": 2385 }, { - "epoch": 0.06770715096481271, + "epoch": 0.06761313723823288, "grad_norm": 0.0, - "learning_rate": 1.9925607055240345e-05, - "loss": 0.9768, + "learning_rate": 1.992592683388621e-05, + "loss": 1.0937, "step": 2386 }, { - "epoch": 0.06773552780930761, + "epoch": 0.06764147468049533, "grad_norm": 0.0, - "learning_rate": 1.9925495114872603e-05, - "loss": 1.0216, + "learning_rate": 1.9925815290072263e-05, + "loss": 1.0515, "step": 2387 }, { - "epoch": 0.0677639046538025, + "epoch": 0.0676698121227578, "grad_norm": 0.0, - "learning_rate": 1.9925383090663758e-05, - "loss": 1.0032, + "learning_rate": 1.9925703662649483e-05, + "loss": 1.0517, "step": 2388 }, { - "epoch": 0.06779228149829739, + "epoch": 0.06769814956502027, "grad_norm": 0.0, - "learning_rate": 1.9925270982614755e-05, - "loss": 1.0734, + "learning_rate": 1.9925591951618822e-05, + "loss": 1.0324, "step": 2389 }, { - "epoch": 0.06782065834279229, + "epoch": 0.06772648700728272, "grad_norm": 0.0, - "learning_rate": 1.992515879072654e-05, - "loss": 0.986, + "learning_rate": 1.992548015698121e-05, + "loss": 1.1754, "step": 2390 }, { - "epoch": 0.06784903518728717, + "epoch": 0.06775482444954518, "grad_norm": 0.0, - "learning_rate": 1.9925046515000063e-05, - "loss": 1.0822, + "learning_rate": 1.9925368278737594e-05, + "loss": 1.0702, "step": 2391 }, { - "epoch": 0.06787741203178206, + "epoch": 0.06778316189180765, "grad_norm": 0.0, - "learning_rate": 1.9924934155436274e-05, - "loss": 1.1248, + "learning_rate": 1.9925256316888917e-05, + "loss": 1.0881, "step": 2392 }, { - "epoch": 0.06790578887627696, + "epoch": 0.0678114993340701, "grad_norm": 0.0, - "learning_rate": 1.9924821712036116e-05, - "loss": 1.0187, + "learning_rate": 1.9925144271436116e-05, + "loss": 1.1709, "step": 2393 }, { - "epoch": 0.06793416572077185, + "epoch": 0.06783983677633257, "grad_norm": 0.0, - "learning_rate": 1.9924709184800546e-05, - "loss": 1.1668, + "learning_rate": 1.9925032142380144e-05, + "loss": 1.0284, "step": 2394 }, { - "epoch": 0.06796254256526674, + "epoch": 0.06786817421859503, "grad_norm": 0.0, - "learning_rate": 1.9924596573730514e-05, - "loss": 1.0547, + "learning_rate": 1.9924919929721938e-05, + "loss": 1.1756, "step": 2395 }, { - "epoch": 0.06799091940976164, + "epoch": 0.06789651166085749, "grad_norm": 0.0, - "learning_rate": 1.9924483878826965e-05, - "loss": 1.017, + "learning_rate": 1.9924807633462445e-05, + "loss": 1.1696, "step": 2396 }, { - "epoch": 0.06801929625425653, + "epoch": 0.06792484910311995, "grad_norm": 0.0, - "learning_rate": 1.9924371100090857e-05, - "loss": 1.0293, + "learning_rate": 1.9924695253602612e-05, + "loss": 1.0963, "step": 2397 }, { - "epoch": 0.06804767309875141, + "epoch": 0.06795318654538242, "grad_norm": 0.0, - "learning_rate": 1.992425823752314e-05, - "loss": 1.1043, + "learning_rate": 1.992458279014339e-05, + "loss": 1.1144, "step": 2398 }, { - "epoch": 0.06807604994324631, + "epoch": 0.06798152398764487, "grad_norm": 0.0, - "learning_rate": 1.9924145291124764e-05, - "loss": 0.9837, + "learning_rate": 1.9924470243085716e-05, + "loss": 1.138, "step": 2399 }, { - "epoch": 0.0681044267877412, + "epoch": 0.06800986142990734, "grad_norm": 0.0, - "learning_rate": 1.992403226089669e-05, - "loss": 1.0342, + "learning_rate": 1.9924357612430544e-05, + "loss": 1.1039, "step": 2400 }, { - "epoch": 0.06813280363223609, + "epoch": 0.0680381988721698, "grad_norm": 0.0, - "learning_rate": 1.9923919146839866e-05, - "loss": 1.0648, + "learning_rate": 1.9924244898178825e-05, + "loss": 1.1077, "step": 2401 }, { - "epoch": 0.06816118047673099, + "epoch": 0.06806653631443225, "grad_norm": 0.0, - "learning_rate": 1.9923805948955254e-05, - "loss": 1.025, + "learning_rate": 1.9924132100331505e-05, + "loss": 1.0972, "step": 2402 }, { - "epoch": 0.06818955732122588, + "epoch": 0.06809487375669472, "grad_norm": 0.0, - "learning_rate": 1.9923692667243806e-05, - "loss": 1.1037, + "learning_rate": 1.9924019218889536e-05, + "loss": 1.0897, "step": 2403 }, { - "epoch": 0.06821793416572078, + "epoch": 0.06812321119895719, "grad_norm": 0.0, - "learning_rate": 1.992357930170648e-05, - "loss": 1.1838, + "learning_rate": 1.9923906253853867e-05, + "loss": 1.1765, "step": 2404 }, { - "epoch": 0.06824631101021567, + "epoch": 0.06815154864121964, "grad_norm": 0.0, - "learning_rate": 1.992346585234423e-05, - "loss": 1.0368, + "learning_rate": 1.9923793205225453e-05, + "loss": 1.1544, "step": 2405 }, { - "epoch": 0.06827468785471055, + "epoch": 0.0681798860834821, "grad_norm": 0.0, - "learning_rate": 1.9923352319158025e-05, - "loss": 1.1508, + "learning_rate": 1.9923680073005244e-05, + "loss": 1.0639, "step": 2406 }, { - "epoch": 0.06830306469920545, + "epoch": 0.06820822352574457, "grad_norm": 0.0, - "learning_rate": 1.9923238702148812e-05, - "loss": 0.9807, + "learning_rate": 1.992356685719419e-05, + "loss": 1.1114, "step": 2407 }, { - "epoch": 0.06833144154370034, + "epoch": 0.06823656096800702, "grad_norm": 0.0, - "learning_rate": 1.9923125001317557e-05, - "loss": 0.9876, + "learning_rate": 1.9923453557793247e-05, + "loss": 1.0855, "step": 2408 }, { - "epoch": 0.06835981838819523, + "epoch": 0.06826489841026949, "grad_norm": 0.0, - "learning_rate": 1.992301121666522e-05, - "loss": 1.085, + "learning_rate": 1.992334017480337e-05, + "loss": 1.1768, "step": 2409 }, { - "epoch": 0.06838819523269013, + "epoch": 0.06829323585253196, "grad_norm": 0.0, - "learning_rate": 1.9922897348192758e-05, - "loss": 1.0325, + "learning_rate": 1.992322670822551e-05, + "loss": 1.042, "step": 2410 }, { - "epoch": 0.06841657207718502, + "epoch": 0.06832157329479441, "grad_norm": 0.0, - "learning_rate": 1.9922783395901138e-05, - "loss": 1.0321, + "learning_rate": 1.9923113158060632e-05, + "loss": 1.0388, "step": 2411 }, { - "epoch": 0.0684449489216799, + "epoch": 0.06834991073705687, "grad_norm": 0.0, - "learning_rate": 1.9922669359791322e-05, - "loss": 1.156, + "learning_rate": 1.9922999524309684e-05, + "loss": 1.2375, "step": 2412 }, { - "epoch": 0.0684733257661748, + "epoch": 0.06837824817931934, "grad_norm": 0.0, - "learning_rate": 1.992255523986427e-05, - "loss": 1.05, + "learning_rate": 1.992288580697363e-05, + "loss": 1.1494, "step": 2413 }, { - "epoch": 0.06850170261066969, + "epoch": 0.06840658562158179, "grad_norm": 0.0, - "learning_rate": 1.992244103612095e-05, - "loss": 1.1171, + "learning_rate": 1.9922772006053424e-05, + "loss": 0.9964, "step": 2414 }, { - "epoch": 0.06853007945516458, + "epoch": 0.06843492306384426, "grad_norm": 0.0, - "learning_rate": 1.9922326748562323e-05, - "loss": 1.1129, + "learning_rate": 1.9922658121550024e-05, + "loss": 1.036, "step": 2415 }, { - "epoch": 0.06855845629965948, + "epoch": 0.06846326050610672, "grad_norm": 0.0, - "learning_rate": 1.9922212377189355e-05, - "loss": 0.9969, + "learning_rate": 1.9922544153464387e-05, + "loss": 1.0998, "step": 2416 }, { - "epoch": 0.06858683314415437, + "epoch": 0.06849159794836918, "grad_norm": 0.0, - "learning_rate": 1.9922097922003016e-05, - "loss": 1.0946, + "learning_rate": 1.9922430101797476e-05, + "loss": 1.069, "step": 2417 }, { - "epoch": 0.06861520998864926, + "epoch": 0.06851993539063164, "grad_norm": 0.0, - "learning_rate": 1.9921983383004267e-05, - "loss": 1.0554, + "learning_rate": 1.9922315966550253e-05, + "loss": 1.1255, "step": 2418 }, { - "epoch": 0.06864358683314416, + "epoch": 0.06854827283289411, "grad_norm": 0.0, - "learning_rate": 1.9921868760194075e-05, - "loss": 1.127, + "learning_rate": 1.992220174772368e-05, + "loss": 1.1455, "step": 2419 }, { - "epoch": 0.06867196367763904, + "epoch": 0.06857661027515656, "grad_norm": 0.0, - "learning_rate": 1.9921754053573418e-05, - "loss": 1.051, + "learning_rate": 1.9922087445318713e-05, + "loss": 1.227, "step": 2420 }, { - "epoch": 0.06870034052213393, + "epoch": 0.06860494771741903, "grad_norm": 0.0, - "learning_rate": 1.9921639263143256e-05, - "loss": 1.0953, + "learning_rate": 1.9921973059336324e-05, + "loss": 1.2068, "step": 2421 }, { - "epoch": 0.06872871736662883, + "epoch": 0.06863328515968149, "grad_norm": 0.0, - "learning_rate": 1.992152438890456e-05, - "loss": 1.0477, + "learning_rate": 1.9921858589777466e-05, + "loss": 1.1687, "step": 2422 }, { - "epoch": 0.06875709421112372, + "epoch": 0.06866162260194394, "grad_norm": 0.0, - "learning_rate": 1.9921409430858304e-05, - "loss": 1.0334, + "learning_rate": 1.9921744036643113e-05, + "loss": 1.0588, "step": 2423 }, { - "epoch": 0.06878547105561862, + "epoch": 0.06868996004420641, "grad_norm": 0.0, - "learning_rate": 1.9921294389005453e-05, - "loss": 1.0635, + "learning_rate": 1.9921629399934224e-05, + "loss": 1.1398, "step": 2424 }, { - "epoch": 0.06881384790011351, + "epoch": 0.06871829748646888, "grad_norm": 0.0, - "learning_rate": 1.9921179263346986e-05, - "loss": 0.9706, + "learning_rate": 1.9921514679651767e-05, + "loss": 1.0794, "step": 2425 }, { - "epoch": 0.0688422247446084, + "epoch": 0.06874663492873133, "grad_norm": 0.0, - "learning_rate": 1.992106405388387e-05, - "loss": 1.2063, + "learning_rate": 1.9921399875796705e-05, + "loss": 1.1241, "step": 2426 }, { - "epoch": 0.0688706015891033, + "epoch": 0.0687749723709938, "grad_norm": 0.0, - "learning_rate": 1.992094876061708e-05, - "loss": 1.0378, + "learning_rate": 1.992128498837001e-05, + "loss": 1.1485, "step": 2427 }, { - "epoch": 0.06889897843359818, + "epoch": 0.06880330981325626, "grad_norm": 0.0, - "learning_rate": 1.9920833383547592e-05, - "loss": 1.084, + "learning_rate": 1.9921170017372645e-05, + "loss": 1.0587, "step": 2428 }, { - "epoch": 0.06892735527809307, + "epoch": 0.06883164725551871, "grad_norm": 0.0, - "learning_rate": 1.9920717922676375e-05, - "loss": 1.0638, + "learning_rate": 1.9921054962805586e-05, + "loss": 1.1708, "step": 2429 }, { - "epoch": 0.06895573212258797, + "epoch": 0.06885998469778118, "grad_norm": 0.0, - "learning_rate": 1.9920602378004407e-05, - "loss": 1.0687, + "learning_rate": 1.992093982466979e-05, + "loss": 1.0154, "step": 2430 }, { - "epoch": 0.06898410896708286, + "epoch": 0.06888832214004365, "grad_norm": 0.0, - "learning_rate": 1.9920486749532667e-05, - "loss": 1.0461, + "learning_rate": 1.992082460296624e-05, + "loss": 1.0878, "step": 2431 }, { - "epoch": 0.06901248581157775, + "epoch": 0.0689166595823061, "grad_norm": 0.0, - "learning_rate": 1.9920371037262132e-05, - "loss": 1.0089, + "learning_rate": 1.99207092976959e-05, + "loss": 1.1061, "step": 2432 }, { - "epoch": 0.06904086265607265, + "epoch": 0.06894499702456856, "grad_norm": 0.0, - "learning_rate": 1.9920255241193773e-05, - "loss": 1.1061, + "learning_rate": 1.9920593908859737e-05, + "loss": 1.1554, "step": 2433 }, { - "epoch": 0.06906923950056754, + "epoch": 0.06897333446683103, "grad_norm": 0.0, - "learning_rate": 1.9920139361328574e-05, - "loss": 1.1046, + "learning_rate": 1.9920478436458734e-05, + "loss": 1.1511, "step": 2434 }, { - "epoch": 0.06909761634506242, + "epoch": 0.06900167190909348, "grad_norm": 0.0, - "learning_rate": 1.992002339766751e-05, - "loss": 1.0121, + "learning_rate": 1.992036288049385e-05, + "loss": 1.1885, "step": 2435 }, { - "epoch": 0.06912599318955732, + "epoch": 0.06903000935135595, "grad_norm": 0.0, - "learning_rate": 1.9919907350211564e-05, - "loss": 1.1346, + "learning_rate": 1.9920247240966072e-05, + "loss": 1.0139, "step": 2436 }, { - "epoch": 0.06915437003405221, + "epoch": 0.06905834679361841, "grad_norm": 0.0, - "learning_rate": 1.9919791218961714e-05, - "loss": 1.1152, + "learning_rate": 1.992013151787636e-05, + "loss": 1.1512, "step": 2437 }, { - "epoch": 0.0691827468785471, + "epoch": 0.06908668423588087, "grad_norm": 0.0, - "learning_rate": 1.9919675003918944e-05, - "loss": 0.9783, + "learning_rate": 1.9920015711225705e-05, + "loss": 1.1169, "step": 2438 }, { - "epoch": 0.069211123723042, + "epoch": 0.06911502167814333, "grad_norm": 0.0, - "learning_rate": 1.9919558705084232e-05, - "loss": 1.0765, + "learning_rate": 1.9919899821015066e-05, + "loss": 1.0951, "step": 2439 }, { - "epoch": 0.06923950056753689, + "epoch": 0.0691433591204058, "grad_norm": 0.0, - "learning_rate": 1.991944232245856e-05, - "loss": 1.1439, + "learning_rate": 1.9919783847245436e-05, + "loss": 1.14, "step": 2440 }, { - "epoch": 0.06926787741203178, + "epoch": 0.06917169656266825, "grad_norm": 0.0, - "learning_rate": 1.9919325856042914e-05, - "loss": 1.0473, + "learning_rate": 1.9919667789917775e-05, + "loss": 1.1148, "step": 2441 }, { - "epoch": 0.06929625425652668, + "epoch": 0.06920003400493072, "grad_norm": 0.0, - "learning_rate": 1.9919209305838273e-05, - "loss": 0.983, + "learning_rate": 1.9919551649033074e-05, + "loss": 1.0568, "step": 2442 }, { - "epoch": 0.06932463110102156, + "epoch": 0.06922837144719318, "grad_norm": 0.0, - "learning_rate": 1.991909267184563e-05, - "loss": 1.0829, + "learning_rate": 1.99194354245923e-05, + "loss": 1.0253, "step": 2443 }, { - "epoch": 0.06935300794551646, + "epoch": 0.06925670888945563, "grad_norm": 0.0, - "learning_rate": 1.9918975954065964e-05, - "loss": 1.1662, + "learning_rate": 1.9919319116596446e-05, + "loss": 1.1598, "step": 2444 }, { - "epoch": 0.06938138479001135, + "epoch": 0.0692850463317181, "grad_norm": 0.0, - "learning_rate": 1.9918859152500264e-05, - "loss": 1.1079, + "learning_rate": 1.9919202725046477e-05, + "loss": 1.1743, "step": 2445 }, { - "epoch": 0.06940976163450624, + "epoch": 0.06931338377398057, "grad_norm": 0.0, - "learning_rate": 1.991874226714951e-05, - "loss": 1.1272, + "learning_rate": 1.9919086249943382e-05, + "loss": 1.1424, "step": 2446 }, { - "epoch": 0.06943813847900114, + "epoch": 0.06934172121624302, "grad_norm": 0.0, - "learning_rate": 1.9918625298014696e-05, - "loss": 1.0447, + "learning_rate": 1.991896969128814e-05, + "loss": 1.0466, "step": 2447 }, { - "epoch": 0.06946651532349603, + "epoch": 0.06937005865850548, "grad_norm": 0.0, - "learning_rate": 1.991850824509681e-05, - "loss": 1.1203, + "learning_rate": 1.9918853049081736e-05, + "loss": 1.0323, "step": 2448 }, { - "epoch": 0.06949489216799092, + "epoch": 0.06939839610076795, "grad_norm": 0.0, - "learning_rate": 1.991839110839684e-05, - "loss": 1.1371, + "learning_rate": 1.9918736323325146e-05, + "loss": 0.9834, "step": 2449 }, { - "epoch": 0.06952326901248582, + "epoch": 0.0694267335430304, "grad_norm": 0.0, - "learning_rate": 1.9918273887915773e-05, - "loss": 1.1733, + "learning_rate": 1.9918619514019357e-05, + "loss": 1.0334, "step": 2450 }, { - "epoch": 0.0695516458569807, + "epoch": 0.06945507098529287, "grad_norm": 0.0, - "learning_rate": 1.99181565836546e-05, - "loss": 1.0855, + "learning_rate": 1.9918502621165355e-05, + "loss": 1.238, "step": 2451 }, { - "epoch": 0.06958002270147559, + "epoch": 0.06948340842755533, "grad_norm": 0.0, - "learning_rate": 1.9918039195614315e-05, - "loss": 1.0376, + "learning_rate": 1.991838564476412e-05, + "loss": 1.1186, "step": 2452 }, { - "epoch": 0.06960839954597049, + "epoch": 0.06951174586981779, "grad_norm": 0.0, - "learning_rate": 1.9917921723795903e-05, - "loss": 0.9678, + "learning_rate": 1.9918268584816644e-05, + "loss": 1.1108, "step": 2453 }, { - "epoch": 0.06963677639046538, + "epoch": 0.06954008331208025, "grad_norm": 0.0, - "learning_rate": 1.9917804168200362e-05, - "loss": 1.1274, + "learning_rate": 1.99181514413239e-05, + "loss": 1.0081, "step": 2454 }, { - "epoch": 0.06966515323496027, + "epoch": 0.06956842075434272, "grad_norm": 0.0, - "learning_rate": 1.9917686528828682e-05, - "loss": 1.0511, + "learning_rate": 1.991803421428689e-05, + "loss": 1.0707, "step": 2455 }, { - "epoch": 0.06969353007945517, + "epoch": 0.06959675819660517, "grad_norm": 0.0, - "learning_rate": 1.9917568805681862e-05, - "loss": 1.0992, + "learning_rate": 1.9917916903706592e-05, + "loss": 1.1066, "step": 2456 }, { - "epoch": 0.06972190692395006, + "epoch": 0.06962509563886764, "grad_norm": 0.0, - "learning_rate": 1.991745099876089e-05, - "loss": 1.0132, + "learning_rate": 1.9917799509583998e-05, + "loss": 1.0096, "step": 2457 }, { - "epoch": 0.06975028376844494, + "epoch": 0.0696534330811301, "grad_norm": 0.0, - "learning_rate": 1.9917333108066764e-05, - "loss": 1.0661, + "learning_rate": 1.9917682031920096e-05, + "loss": 0.9724, "step": 2458 }, { - "epoch": 0.06977866061293984, + "epoch": 0.06968177052339256, "grad_norm": 0.0, - "learning_rate": 1.9917215133600478e-05, - "loss": 1.0488, + "learning_rate": 1.9917564470715876e-05, + "loss": 1.097, "step": 2459 }, { - "epoch": 0.06980703745743473, + "epoch": 0.06971010796565502, "grad_norm": 0.0, - "learning_rate": 1.9917097075363033e-05, - "loss": 1.0998, + "learning_rate": 1.991744682597233e-05, + "loss": 1.1008, "step": 2460 }, { - "epoch": 0.06983541430192962, + "epoch": 0.06973844540791749, "grad_norm": 0.0, - "learning_rate": 1.991697893335542e-05, - "loss": 1.1353, + "learning_rate": 1.991732909769044e-05, + "loss": 1.0598, "step": 2461 }, { - "epoch": 0.06986379114642452, + "epoch": 0.06976678285017994, "grad_norm": 0.0, - "learning_rate": 1.9916860707578642e-05, - "loss": 0.9968, + "learning_rate": 1.991721128587121e-05, + "loss": 1.098, "step": 2462 }, { - "epoch": 0.06989216799091941, + "epoch": 0.0697951202924424, "grad_norm": 0.0, - "learning_rate": 1.9916742398033694e-05, - "loss": 1.0874, + "learning_rate": 1.9917093390515626e-05, + "loss": 1.1189, "step": 2463 }, { - "epoch": 0.06992054483541431, + "epoch": 0.06982345773470487, "grad_norm": 0.0, - "learning_rate": 1.991662400472158e-05, - "loss": 0.994, + "learning_rate": 1.991697541162468e-05, + "loss": 1.0738, "step": 2464 }, { - "epoch": 0.0699489216799092, + "epoch": 0.06985179517696732, "grad_norm": 0.0, - "learning_rate": 1.9916505527643296e-05, - "loss": 1.0849, + "learning_rate": 1.9916857349199366e-05, + "loss": 1.2111, "step": 2465 }, { - "epoch": 0.06997729852440408, + "epoch": 0.06988013261922979, "grad_norm": 0.0, - "learning_rate": 1.991638696679984e-05, - "loss": 1.0941, + "learning_rate": 1.9916739203240682e-05, + "loss": 1.1232, "step": 2466 }, { - "epoch": 0.07000567536889898, + "epoch": 0.06990847006149226, "grad_norm": 0.0, - "learning_rate": 1.9916268322192224e-05, - "loss": 1.0577, + "learning_rate": 1.991662097374962e-05, + "loss": 1.1147, "step": 2467 }, { - "epoch": 0.07003405221339387, + "epoch": 0.06993680750375471, "grad_norm": 0.0, - "learning_rate": 1.991614959382144e-05, - "loss": 1.0192, + "learning_rate": 1.991650266072718e-05, + "loss": 1.0475, "step": 2468 }, { - "epoch": 0.07006242905788876, + "epoch": 0.06996514494601717, "grad_norm": 0.0, - "learning_rate": 1.9916030781688495e-05, - "loss": 1.158, + "learning_rate": 1.9916384264174354e-05, + "loss": 1.1089, "step": 2469 }, { - "epoch": 0.07009080590238366, + "epoch": 0.06999348238827964, "grad_norm": 0.0, - "learning_rate": 1.9915911885794395e-05, - "loss": 1.1432, + "learning_rate": 1.991626578409214e-05, + "loss": 0.9518, "step": 2470 }, { - "epoch": 0.07011918274687855, + "epoch": 0.07002181983054209, "grad_norm": 0.0, - "learning_rate": 1.9915792906140138e-05, - "loss": 1.0707, + "learning_rate": 1.991614722048154e-05, + "loss": 1.141, "step": 2471 }, { - "epoch": 0.07014755959137343, + "epoch": 0.07005015727280456, "grad_norm": 0.0, - "learning_rate": 1.9915673842726733e-05, - "loss": 1.1093, + "learning_rate": 1.9916028573343548e-05, + "loss": 1.0788, "step": 2472 }, { - "epoch": 0.07017593643586834, + "epoch": 0.07007849471506702, "grad_norm": 0.0, - "learning_rate": 1.9915554695555186e-05, - "loss": 0.9374, + "learning_rate": 1.9915909842679166e-05, + "loss": 1.0674, "step": 2473 }, { - "epoch": 0.07020431328036322, + "epoch": 0.07010683215732948, "grad_norm": 0.0, - "learning_rate": 1.9915435464626504e-05, - "loss": 1.0326, + "learning_rate": 1.991579102848939e-05, + "loss": 1.2218, "step": 2474 }, { - "epoch": 0.07023269012485811, + "epoch": 0.07013516959959194, "grad_norm": 0.0, - "learning_rate": 1.9915316149941694e-05, - "loss": 1.0177, + "learning_rate": 1.9915672130775226e-05, + "loss": 1.2065, "step": 2475 }, { - "epoch": 0.07026106696935301, + "epoch": 0.07016350704185441, "grad_norm": 0.0, - "learning_rate": 1.9915196751501757e-05, - "loss": 1.1109, + "learning_rate": 1.991555314953767e-05, + "loss": 1.2054, "step": 2476 }, { - "epoch": 0.0702894438138479, + "epoch": 0.07019184448411686, "grad_norm": 0.0, - "learning_rate": 1.991507726930771e-05, - "loss": 1.0547, + "learning_rate": 1.9915434084777738e-05, + "loss": 1.1859, "step": 2477 }, { - "epoch": 0.07031782065834279, + "epoch": 0.07022018192637933, "grad_norm": 0.0, - "learning_rate": 1.9914957703360564e-05, - "loss": 1.0151, + "learning_rate": 1.9915314936496412e-05, + "loss": 1.0495, "step": 2478 }, { - "epoch": 0.07034619750283769, + "epoch": 0.07024851936864179, "grad_norm": 0.0, - "learning_rate": 1.991483805366132e-05, - "loss": 1.0739, + "learning_rate": 1.9915195704694714e-05, + "loss": 1.014, "step": 2479 }, { - "epoch": 0.07037457434733257, + "epoch": 0.07027685681090425, "grad_norm": 0.0, - "learning_rate": 1.9914718320210994e-05, - "loss": 1.0057, + "learning_rate": 1.9915076389373635e-05, + "loss": 1.1375, "step": 2480 }, { - "epoch": 0.07040295119182746, + "epoch": 0.07030519425316671, "grad_norm": 0.0, - "learning_rate": 1.99145985030106e-05, - "loss": 1.1395, + "learning_rate": 1.9914956990534187e-05, + "loss": 1.0633, "step": 2481 }, { - "epoch": 0.07043132803632236, + "epoch": 0.07033353169542918, "grad_norm": 0.0, - "learning_rate": 1.991447860206114e-05, - "loss": 1.065, + "learning_rate": 1.9914837508177375e-05, + "loss": 1.0493, "step": 2482 }, { - "epoch": 0.07045970488081725, + "epoch": 0.07036186913769163, "grad_norm": 0.0, - "learning_rate": 1.991435861736364e-05, - "loss": 1.0942, + "learning_rate": 1.9914717942304205e-05, + "loss": 1.0796, "step": 2483 }, { - "epoch": 0.07048808172531215, + "epoch": 0.0703902065799541, "grad_norm": 0.0, - "learning_rate": 1.9914238548919106e-05, - "loss": 1.0609, + "learning_rate": 1.9914598292915684e-05, + "loss": 1.126, "step": 2484 }, { - "epoch": 0.07051645856980704, + "epoch": 0.07041854402221656, "grad_norm": 0.0, - "learning_rate": 1.9914118396728547e-05, - "loss": 1.0809, + "learning_rate": 1.991447856001282e-05, + "loss": 1.0625, "step": 2485 }, { - "epoch": 0.07054483541430193, + "epoch": 0.07044688146447901, "grad_norm": 0.0, - "learning_rate": 1.991399816079299e-05, - "loss": 0.9764, + "learning_rate": 1.9914358743596623e-05, + "loss": 1.0393, "step": 2486 }, { - "epoch": 0.07057321225879683, + "epoch": 0.07047521890674148, "grad_norm": 0.0, - "learning_rate": 1.9913877841113444e-05, - "loss": 1.2016, + "learning_rate": 1.9914238843668096e-05, + "loss": 1.2194, "step": 2487 }, { - "epoch": 0.07060158910329171, + "epoch": 0.07050355634900395, "grad_norm": 0.0, - "learning_rate": 1.9913757437690928e-05, - "loss": 0.9975, + "learning_rate": 1.991411886022826e-05, + "loss": 1.0561, "step": 2488 }, { - "epoch": 0.0706299659477866, + "epoch": 0.0705318937912664, "grad_norm": 0.0, - "learning_rate": 1.9913636950526455e-05, - "loss": 1.099, + "learning_rate": 1.9913998793278116e-05, + "loss": 1.065, "step": 2489 }, { - "epoch": 0.0706583427922815, + "epoch": 0.07056023123352886, "grad_norm": 0.0, - "learning_rate": 1.9913516379621046e-05, - "loss": 1.1277, + "learning_rate": 1.9913878642818676e-05, + "loss": 1.1102, "step": 2490 }, { - "epoch": 0.07068671963677639, + "epoch": 0.07058856867579133, "grad_norm": 0.0, - "learning_rate": 1.9913395724975718e-05, - "loss": 1.0233, + "learning_rate": 1.991375840885096e-05, + "loss": 0.9936, "step": 2491 }, { - "epoch": 0.07071509648127128, + "epoch": 0.07061690611805378, "grad_norm": 0.0, - "learning_rate": 1.9913274986591488e-05, - "loss": 1.0486, + "learning_rate": 1.9913638091375972e-05, + "loss": 1.0798, "step": 2492 }, { - "epoch": 0.07074347332576618, + "epoch": 0.07064524356031625, "grad_norm": 0.0, - "learning_rate": 1.9913154164469382e-05, - "loss": 1.0503, + "learning_rate": 1.991351769039473e-05, + "loss": 1.1465, "step": 2493 }, { - "epoch": 0.07077185017026107, + "epoch": 0.0706735810025787, "grad_norm": 0.0, - "learning_rate": 1.991303325861042e-05, - "loss": 1.0247, + "learning_rate": 1.9913397205908248e-05, + "loss": 1.0569, "step": 2494 }, { - "epoch": 0.07080022701475595, + "epoch": 0.07070191844484117, "grad_norm": 0.0, - "learning_rate": 1.991291226901561e-05, - "loss": 0.9715, + "learning_rate": 1.9913276637917537e-05, + "loss": 1.071, "step": 2495 }, { - "epoch": 0.07082860385925085, + "epoch": 0.07073025588710363, "grad_norm": 0.0, - "learning_rate": 1.9912791195685992e-05, - "loss": 1.0267, + "learning_rate": 1.9913155986423618e-05, + "loss": 1.106, "step": 2496 }, { - "epoch": 0.07085698070374574, + "epoch": 0.07075859332936608, "grad_norm": 0.0, - "learning_rate": 1.9912670038622583e-05, - "loss": 1.006, + "learning_rate": 1.9913035251427507e-05, + "loss": 1.1087, "step": 2497 }, { - "epoch": 0.07088535754824063, + "epoch": 0.07078693077162855, "grad_norm": 0.0, - "learning_rate": 1.9912548797826402e-05, - "loss": 1.0259, + "learning_rate": 1.9912914432930213e-05, + "loss": 1.1509, "step": 2498 }, { - "epoch": 0.07091373439273553, + "epoch": 0.07081526821389102, "grad_norm": 0.0, - "learning_rate": 1.9912427473298473e-05, - "loss": 1.0486, + "learning_rate": 1.9912793530932765e-05, + "loss": 0.9951, "step": 2499 }, { - "epoch": 0.07094211123723042, + "epoch": 0.07084360565615347, "grad_norm": 0.0, - "learning_rate": 1.9912306065039827e-05, - "loss": 1.0762, + "learning_rate": 1.9912672545436177e-05, + "loss": 1.1615, "step": 2500 }, { - "epoch": 0.0709704880817253, + "epoch": 0.07087194309841593, "grad_norm": 0.0, - "learning_rate": 1.9912184573051486e-05, - "loss": 1.073, + "learning_rate": 1.9912551476441463e-05, + "loss": 1.1492, "step": 2501 }, { - "epoch": 0.0709988649262202, + "epoch": 0.0709002805406784, "grad_norm": 0.0, - "learning_rate": 1.9912062997334476e-05, - "loss": 1.0759, + "learning_rate": 1.991243032394965e-05, + "loss": 1.1332, "step": 2502 }, { - "epoch": 0.0710272417707151, + "epoch": 0.07092861798294085, "grad_norm": 0.0, - "learning_rate": 1.9911941337889825e-05, - "loss": 1.0722, + "learning_rate": 1.9912309087961753e-05, + "loss": 1.2188, "step": 2503 }, { - "epoch": 0.07105561861521, + "epoch": 0.07095695542520332, "grad_norm": 0.0, - "learning_rate": 1.9911819594718556e-05, - "loss": 0.9524, + "learning_rate": 1.99121877684788e-05, + "loss": 1.2138, "step": 2504 }, { - "epoch": 0.07108399545970488, + "epoch": 0.07098529286746579, "grad_norm": 0.0, - "learning_rate": 1.9911697767821705e-05, - "loss": 1.0877, + "learning_rate": 1.9912066365501804e-05, + "loss": 1.0815, "step": 2505 }, { - "epoch": 0.07111237230419977, + "epoch": 0.07101363030972824, "grad_norm": 0.0, - "learning_rate": 1.9911575857200296e-05, - "loss": 1.1547, + "learning_rate": 1.9911944879031794e-05, + "loss": 1.2, "step": 2506 }, { - "epoch": 0.07114074914869467, + "epoch": 0.0710419677519907, "grad_norm": 0.0, - "learning_rate": 1.9911453862855362e-05, - "loss": 1.1913, + "learning_rate": 1.991182330906979e-05, + "loss": 1.145, "step": 2507 }, { - "epoch": 0.07116912599318956, + "epoch": 0.07107030519425317, "grad_norm": 0.0, - "learning_rate": 1.991133178478793e-05, - "loss": 1.039, + "learning_rate": 1.991170165561682e-05, + "loss": 1.2362, "step": 2508 }, { - "epoch": 0.07119750283768445, + "epoch": 0.07109864263651562, "grad_norm": 0.0, - "learning_rate": 1.991120962299903e-05, - "loss": 1.0932, + "learning_rate": 1.9911579918673903e-05, + "loss": 1.0967, "step": 2509 }, { - "epoch": 0.07122587968217935, + "epoch": 0.07112698007877809, "grad_norm": 0.0, - "learning_rate": 1.99110873774897e-05, - "loss": 1.1093, + "learning_rate": 1.9911458098242072e-05, + "loss": 1.0776, "step": 2510 }, { - "epoch": 0.07125425652667423, + "epoch": 0.07115531752104055, "grad_norm": 0.0, - "learning_rate": 1.991096504826097e-05, - "loss": 1.0519, + "learning_rate": 1.9911336194322347e-05, + "loss": 1.0296, "step": 2511 }, { - "epoch": 0.07128263337116912, + "epoch": 0.071183654963303, "grad_norm": 0.0, - "learning_rate": 1.9910842635313875e-05, - "loss": 1.1132, + "learning_rate": 1.9911214206915758e-05, + "loss": 1.0834, "step": 2512 }, { - "epoch": 0.07131101021566402, + "epoch": 0.07121199240556547, "grad_norm": 0.0, - "learning_rate": 1.991072013864944e-05, - "loss": 1.0199, + "learning_rate": 1.991109213602333e-05, + "loss": 1.0704, "step": 2513 }, { - "epoch": 0.07133938706015891, + "epoch": 0.07124032984782794, "grad_norm": 0.0, - "learning_rate": 1.9910597558268713e-05, - "loss": 0.9458, + "learning_rate": 1.991096998164609e-05, + "loss": 1.1282, "step": 2514 }, { - "epoch": 0.0713677639046538, + "epoch": 0.07126866729009039, "grad_norm": 0.0, - "learning_rate": 1.9910474894172717e-05, - "loss": 1.0148, + "learning_rate": 1.9910847743785077e-05, + "loss": 1.1039, "step": 2515 }, { - "epoch": 0.0713961407491487, + "epoch": 0.07129700473235286, "grad_norm": 0.0, - "learning_rate": 1.99103521463625e-05, - "loss": 1.1014, + "learning_rate": 1.9910725422441305e-05, + "loss": 1.0017, "step": 2516 }, { - "epoch": 0.07142451759364359, + "epoch": 0.07132534217461532, "grad_norm": 0.0, - "learning_rate": 1.9910229314839088e-05, - "loss": 1.0468, + "learning_rate": 1.9910603017615816e-05, + "loss": 1.1631, "step": 2517 }, { - "epoch": 0.07145289443813847, + "epoch": 0.07135367961687777, "grad_norm": 0.0, - "learning_rate": 1.9910106399603525e-05, - "loss": 0.9307, + "learning_rate": 1.991048052930964e-05, + "loss": 1.1009, "step": 2518 }, { - "epoch": 0.07148127128263337, + "epoch": 0.07138201705914024, "grad_norm": 0.0, - "learning_rate": 1.9909983400656848e-05, - "loss": 1.104, + "learning_rate": 1.99103579575238e-05, + "loss": 1.0869, "step": 2519 }, { - "epoch": 0.07150964812712826, + "epoch": 0.0714103545014027, "grad_norm": 0.0, - "learning_rate": 1.9909860318000097e-05, - "loss": 1.0524, + "learning_rate": 1.9910235302259344e-05, + "loss": 1.1244, "step": 2520 }, { - "epoch": 0.07153802497162315, + "epoch": 0.07143869194366516, "grad_norm": 0.0, - "learning_rate": 1.990973715163431e-05, - "loss": 1.0929, + "learning_rate": 1.9910112563517288e-05, + "loss": 1.1914, "step": 2521 }, { - "epoch": 0.07156640181611805, + "epoch": 0.07146702938592762, "grad_norm": 0.0, - "learning_rate": 1.990961390156053e-05, - "loss": 1.1543, + "learning_rate": 1.9909989741298676e-05, + "loss": 1.0518, "step": 2522 }, { - "epoch": 0.07159477866061294, + "epoch": 0.07149536682819009, "grad_norm": 0.0, - "learning_rate": 1.990949056777979e-05, - "loss": 1.1684, + "learning_rate": 1.9909866835604542e-05, + "loss": 1.0879, "step": 2523 }, { - "epoch": 0.07162315550510784, + "epoch": 0.07152370427045254, "grad_norm": 0.0, - "learning_rate": 1.990936715029314e-05, - "loss": 1.0423, + "learning_rate": 1.9909743846435916e-05, + "loss": 1.132, "step": 2524 }, { - "epoch": 0.07165153234960273, + "epoch": 0.07155204171271501, "grad_norm": 0.0, - "learning_rate": 1.9909243649101625e-05, - "loss": 1.0888, + "learning_rate": 1.990962077379384e-05, + "loss": 1.0859, "step": 2525 }, { - "epoch": 0.07167990919409761, + "epoch": 0.07158037915497747, "grad_norm": 0.0, - "learning_rate": 1.9909120064206277e-05, - "loss": 1.1314, + "learning_rate": 1.990949761767935e-05, + "loss": 1.1337, "step": 2526 }, { - "epoch": 0.07170828603859251, + "epoch": 0.07160871659723993, "grad_norm": 0.0, - "learning_rate": 1.990899639560815e-05, - "loss": 0.9606, + "learning_rate": 1.990937437809348e-05, + "loss": 1.1406, "step": 2527 }, { - "epoch": 0.0717366628830874, + "epoch": 0.07163705403950239, "grad_norm": 0.0, - "learning_rate": 1.9908872643308284e-05, - "loss": 1.0649, + "learning_rate": 1.9909251055037272e-05, + "loss": 1.1811, "step": 2528 }, { - "epoch": 0.07176503972758229, + "epoch": 0.07166539148176486, "grad_norm": 0.0, - "learning_rate": 1.9908748807307725e-05, - "loss": 0.9916, + "learning_rate": 1.9909127648511758e-05, + "loss": 1.0289, "step": 2529 }, { - "epoch": 0.07179341657207719, + "epoch": 0.07169372892402731, "grad_norm": 0.0, - "learning_rate": 1.990862488760752e-05, - "loss": 0.9976, + "learning_rate": 1.9909004158517984e-05, + "loss": 1.0702, "step": 2530 }, { - "epoch": 0.07182179341657208, + "epoch": 0.07172206636628978, "grad_norm": 0.0, - "learning_rate": 1.9908500884208716e-05, - "loss": 1.0401, + "learning_rate": 1.990888058505699e-05, + "loss": 1.1262, "step": 2531 }, { - "epoch": 0.07185017026106696, + "epoch": 0.07175040380855224, "grad_norm": 0.0, - "learning_rate": 1.9908376797112357e-05, - "loss": 1.0511, + "learning_rate": 1.9908756928129814e-05, + "loss": 0.9265, "step": 2532 }, { - "epoch": 0.07187854710556187, + "epoch": 0.0717787412508147, "grad_norm": 0.0, - "learning_rate": 1.9908252626319494e-05, - "loss": 0.9583, + "learning_rate": 1.9908633187737502e-05, + "loss": 1.2058, "step": 2533 }, { - "epoch": 0.07190692395005675, + "epoch": 0.07180707869307716, "grad_norm": 0.0, - "learning_rate": 1.9908128371831177e-05, - "loss": 1.1569, + "learning_rate": 1.990850936388109e-05, + "loss": 1.1541, "step": 2534 }, { - "epoch": 0.07193530079455164, + "epoch": 0.07183541613533963, "grad_norm": 0.0, - "learning_rate": 1.9908004033648452e-05, - "loss": 0.9966, + "learning_rate": 1.9908385456561624e-05, + "loss": 0.992, "step": 2535 }, { - "epoch": 0.07196367763904654, + "epoch": 0.07186375357760208, "grad_norm": 0.0, - "learning_rate": 1.9907879611772377e-05, - "loss": 1.1358, + "learning_rate": 1.990826146578015e-05, + "loss": 1.1069, "step": 2536 }, { - "epoch": 0.07199205448354143, + "epoch": 0.07189209101986455, "grad_norm": 0.0, - "learning_rate": 1.9907755106203992e-05, - "loss": 1.1798, + "learning_rate": 1.9908137391537708e-05, + "loss": 1.0753, "step": 2537 }, { - "epoch": 0.07202043132803632, + "epoch": 0.07192042846212701, "grad_norm": 0.0, - "learning_rate": 1.9907630516944358e-05, - "loss": 1.1101, + "learning_rate": 1.9908013233835346e-05, + "loss": 1.0791, "step": 2538 }, { - "epoch": 0.07204880817253122, + "epoch": 0.07194876590438946, "grad_norm": 0.0, - "learning_rate": 1.9907505843994524e-05, - "loss": 1.1682, + "learning_rate": 1.990788899267411e-05, + "loss": 1.0737, "step": 2539 }, { - "epoch": 0.0720771850170261, + "epoch": 0.07197710334665193, "grad_norm": 0.0, - "learning_rate": 1.990738108735554e-05, - "loss": 1.0648, + "learning_rate": 1.9907764668055046e-05, + "loss": 1.0934, "step": 2540 }, { - "epoch": 0.07210556186152099, + "epoch": 0.0720054407889144, "grad_norm": 0.0, - "learning_rate": 1.9907256247028463e-05, - "loss": 1.0364, + "learning_rate": 1.99076402599792e-05, + "loss": 1.0193, "step": 2541 }, { - "epoch": 0.0721339387060159, + "epoch": 0.07203377823117685, "grad_norm": 0.0, - "learning_rate": 1.9907131323014347e-05, - "loss": 1.0339, + "learning_rate": 1.9907515768447622e-05, + "loss": 1.1414, "step": 2542 }, { - "epoch": 0.07216231555051078, + "epoch": 0.07206211567343931, "grad_norm": 0.0, - "learning_rate": 1.990700631531425e-05, - "loss": 1.0399, + "learning_rate": 1.9907391193461357e-05, + "loss": 1.1377, "step": 2543 }, { - "epoch": 0.07219069239500568, + "epoch": 0.07209045311570178, "grad_norm": 0.0, - "learning_rate": 1.990688122392922e-05, - "loss": 1.0823, + "learning_rate": 1.9907266535021465e-05, + "loss": 1.0948, "step": 2544 }, { - "epoch": 0.07221906923950057, + "epoch": 0.07211879055796423, "grad_norm": 0.0, - "learning_rate": 1.9906756048860323e-05, - "loss": 1.077, + "learning_rate": 1.990714179312898e-05, + "loss": 1.0475, "step": 2545 }, { - "epoch": 0.07224744608399546, + "epoch": 0.0721471280002267, "grad_norm": 0.0, - "learning_rate": 1.9906630790108612e-05, - "loss": 1.011, + "learning_rate": 1.9907016967784963e-05, + "loss": 1.0776, "step": 2546 }, { - "epoch": 0.07227582292849036, + "epoch": 0.07217546544248916, "grad_norm": 0.0, - "learning_rate": 1.9906505447675144e-05, - "loss": 1.1248, + "learning_rate": 1.990689205899046e-05, + "loss": 1.0515, "step": 2547 }, { - "epoch": 0.07230419977298524, + "epoch": 0.07220380288475162, "grad_norm": 0.0, - "learning_rate": 1.990638002156098e-05, - "loss": 0.9417, + "learning_rate": 1.9906767066746532e-05, + "loss": 1.0063, "step": 2548 }, { - "epoch": 0.07233257661748013, + "epoch": 0.07223214032701408, "grad_norm": 0.0, - "learning_rate": 1.990625451176718e-05, - "loss": 1.0848, + "learning_rate": 1.9906641991054222e-05, + "loss": 1.1796, "step": 2549 }, { - "epoch": 0.07236095346197503, + "epoch": 0.07226047776927655, "grad_norm": 0.0, - "learning_rate": 1.99061289182948e-05, - "loss": 1.0529, + "learning_rate": 1.9906516831914592e-05, + "loss": 1.1831, "step": 2550 }, { - "epoch": 0.07238933030646992, + "epoch": 0.072288815211539, "grad_norm": 0.0, - "learning_rate": 1.9906003241144907e-05, - "loss": 0.9844, + "learning_rate": 1.9906391589328687e-05, + "loss": 1.0424, "step": 2551 }, { - "epoch": 0.07241770715096481, + "epoch": 0.07231715265380147, "grad_norm": 0.0, - "learning_rate": 1.9905877480318556e-05, - "loss": 1.0559, + "learning_rate": 1.9906266263297572e-05, + "loss": 1.0894, "step": 2552 }, { - "epoch": 0.07244608399545971, + "epoch": 0.07234549009606393, "grad_norm": 0.0, - "learning_rate": 1.9905751635816816e-05, - "loss": 1.1498, + "learning_rate": 1.9906140853822294e-05, + "loss": 1.0374, "step": 2553 }, { - "epoch": 0.0724744608399546, + "epoch": 0.07237382753832639, "grad_norm": 0.0, - "learning_rate": 1.990562570764074e-05, - "loss": 1.0977, + "learning_rate": 1.9906015360903913e-05, + "loss": 0.9755, "step": 2554 }, { - "epoch": 0.07250283768444948, + "epoch": 0.07240216498058885, "grad_norm": 0.0, - "learning_rate": 1.9905499695791405e-05, - "loss": 1.0185, + "learning_rate": 1.990588978454349e-05, + "loss": 1.1503, "step": 2555 }, { - "epoch": 0.07253121452894438, + "epoch": 0.07243050242285132, "grad_norm": 0.0, - "learning_rate": 1.9905373600269868e-05, - "loss": 1.0683, + "learning_rate": 1.9905764124742074e-05, + "loss": 1.0421, "step": 2556 }, { - "epoch": 0.07255959137343927, + "epoch": 0.07245883986511377, "grad_norm": 0.0, - "learning_rate": 1.990524742107719e-05, - "loss": 0.9836, + "learning_rate": 1.990563838150073e-05, + "loss": 1.1569, "step": 2557 }, { - "epoch": 0.07258796821793416, + "epoch": 0.07248717730737624, "grad_norm": 0.0, - "learning_rate": 1.990512115821445e-05, - "loss": 0.9332, + "learning_rate": 1.9905512554820516e-05, + "loss": 1.0815, "step": 2558 }, { - "epoch": 0.07261634506242906, + "epoch": 0.0725155147496387, "grad_norm": 0.0, - "learning_rate": 1.9904994811682702e-05, - "loss": 1.1359, + "learning_rate": 1.9905386644702495e-05, + "loss": 1.0945, "step": 2559 }, { - "epoch": 0.07264472190692395, + "epoch": 0.07254385219190115, "grad_norm": 0.0, - "learning_rate": 1.9904868381483015e-05, - "loss": 1.075, + "learning_rate": 1.990526065114772e-05, + "loss": 1.0645, "step": 2560 }, { - "epoch": 0.07267309875141884, + "epoch": 0.07257218963416362, "grad_norm": 0.0, - "learning_rate": 1.9904741867616463e-05, - "loss": 1.0912, + "learning_rate": 1.9905134574157255e-05, + "loss": 1.1198, "step": 2561 }, { - "epoch": 0.07270147559591374, + "epoch": 0.07260052707642609, "grad_norm": 0.0, - "learning_rate": 1.9904615270084113e-05, - "loss": 1.0843, + "learning_rate": 1.9905008413732164e-05, + "loss": 1.0877, "step": 2562 }, { - "epoch": 0.07272985244040862, + "epoch": 0.07262886451868854, "grad_norm": 0.0, - "learning_rate": 1.990448858888703e-05, - "loss": 1.0455, + "learning_rate": 1.9904882169873512e-05, + "loss": 1.0368, "step": 2563 }, { - "epoch": 0.07275822928490353, + "epoch": 0.072657201960951, "grad_norm": 0.0, - "learning_rate": 1.9904361824026284e-05, - "loss": 1.0604, + "learning_rate": 1.9904755842582358e-05, + "loss": 1.2388, "step": 2564 }, { - "epoch": 0.07278660612939841, + "epoch": 0.07268553940321347, "grad_norm": 0.0, - "learning_rate": 1.990423497550295e-05, - "loss": 0.9955, + "learning_rate": 1.9904629431859767e-05, + "loss": 1.0848, "step": 2565 }, { - "epoch": 0.0728149829738933, + "epoch": 0.07271387684547592, "grad_norm": 0.0, - "learning_rate": 1.99041080433181e-05, - "loss": 1.08, + "learning_rate": 1.9904502937706806e-05, + "loss": 1.0349, "step": 2566 }, { - "epoch": 0.0728433598183882, + "epoch": 0.07274221428773839, "grad_norm": 0.0, - "learning_rate": 1.99039810274728e-05, - "loss": 1.1174, + "learning_rate": 1.9904376360124534e-05, + "loss": 1.1479, "step": 2567 }, { - "epoch": 0.07287173666288309, + "epoch": 0.07277055173000085, "grad_norm": 0.0, - "learning_rate": 1.9903853927968134e-05, - "loss": 1.0415, + "learning_rate": 1.9904249699114027e-05, + "loss": 1.1365, "step": 2568 }, { - "epoch": 0.07290011350737798, + "epoch": 0.0727988891722633, "grad_norm": 0.0, - "learning_rate": 1.9903726744805163e-05, - "loss": 0.994, + "learning_rate": 1.9904122954676345e-05, + "loss": 1.084, "step": 2569 }, { - "epoch": 0.07292849035187288, + "epoch": 0.07282722661452577, "grad_norm": 0.0, - "learning_rate": 1.990359947798497e-05, - "loss": 1.0046, + "learning_rate": 1.9903996126812555e-05, + "loss": 0.9718, "step": 2570 }, { - "epoch": 0.07295686719636776, + "epoch": 0.07285556405678824, "grad_norm": 0.0, - "learning_rate": 1.9903472127508623e-05, - "loss": 1.1033, + "learning_rate": 1.990386921552373e-05, + "loss": 1.1841, "step": 2571 }, { - "epoch": 0.07298524404086265, + "epoch": 0.07288390149905069, "grad_norm": 0.0, - "learning_rate": 1.9903344693377203e-05, - "loss": 1.1935, + "learning_rate": 1.990374222081094e-05, + "loss": 1.1064, "step": 2572 }, { - "epoch": 0.07301362088535755, + "epoch": 0.07291223894131316, "grad_norm": 0.0, - "learning_rate": 1.9903217175591786e-05, - "loss": 1.1702, + "learning_rate": 1.9903615142675247e-05, + "loss": 1.0711, "step": 2573 }, { - "epoch": 0.07304199772985244, + "epoch": 0.07294057638357562, "grad_norm": 0.0, - "learning_rate": 1.9903089574153452e-05, - "loss": 1.1328, + "learning_rate": 1.9903487981117732e-05, + "loss": 1.0527, "step": 2574 }, { - "epoch": 0.07307037457434733, + "epoch": 0.07296891382583807, "grad_norm": 0.0, - "learning_rate": 1.990296188906327e-05, - "loss": 1.019, + "learning_rate": 1.9903360736139455e-05, + "loss": 1.1222, "step": 2575 }, { - "epoch": 0.07309875141884223, + "epoch": 0.07299725126810054, "grad_norm": 0.0, - "learning_rate": 1.9902834120322326e-05, - "loss": 1.0583, + "learning_rate": 1.9903233407741494e-05, + "loss": 1.1266, "step": 2576 }, { - "epoch": 0.07312712826333712, + "epoch": 0.073025588710363, "grad_norm": 0.0, - "learning_rate": 1.9902706267931698e-05, - "loss": 1.0551, + "learning_rate": 1.990310599592492e-05, + "loss": 1.0409, "step": 2577 }, { - "epoch": 0.073155505107832, + "epoch": 0.07305392615262546, "grad_norm": 0.0, - "learning_rate": 1.9902578331892464e-05, - "loss": 1.1555, + "learning_rate": 1.990297850069081e-05, + "loss": 1.1448, "step": 2578 }, { - "epoch": 0.0731838819523269, + "epoch": 0.07308226359488793, "grad_norm": 0.0, - "learning_rate": 1.9902450312205707e-05, - "loss": 1.2054, + "learning_rate": 1.9902850922040227e-05, + "loss": 1.0356, "step": 2579 }, { - "epoch": 0.07321225879682179, + "epoch": 0.07311060103715039, "grad_norm": 0.0, - "learning_rate": 1.99023222088725e-05, - "loss": 1.0443, + "learning_rate": 1.990272325997426e-05, + "loss": 1.0259, "step": 2580 }, { - "epoch": 0.07324063564131668, + "epoch": 0.07313893847941284, "grad_norm": 0.0, - "learning_rate": 1.9902194021893943e-05, - "loss": 1.0368, + "learning_rate": 1.990259551449398e-05, + "loss": 1.0751, "step": 2581 }, { - "epoch": 0.07326901248581158, + "epoch": 0.07316727592167531, "grad_norm": 0.0, - "learning_rate": 1.9902065751271103e-05, - "loss": 1.032, + "learning_rate": 1.9902467685600456e-05, + "loss": 1.0505, "step": 2582 }, { - "epoch": 0.07329738933030647, + "epoch": 0.07319561336393778, "grad_norm": 0.0, - "learning_rate": 1.9901937397005065e-05, - "loss": 1.1155, + "learning_rate": 1.990233977329477e-05, + "loss": 1.1739, "step": 2583 }, { - "epoch": 0.07332576617480137, + "epoch": 0.07322395080620023, "grad_norm": 0.0, - "learning_rate": 1.990180895909692e-05, - "loss": 1.0077, + "learning_rate": 1.9902211777577998e-05, + "loss": 1.1284, "step": 2584 }, { - "epoch": 0.07335414301929626, + "epoch": 0.0732522882484627, "grad_norm": 0.0, - "learning_rate": 1.990168043754775e-05, - "loss": 1.0623, + "learning_rate": 1.9902083698451222e-05, + "loss": 1.0204, "step": 2585 }, { - "epoch": 0.07338251986379114, + "epoch": 0.07328062569072516, "grad_norm": 0.0, - "learning_rate": 1.9901551832358642e-05, - "loss": 1.2307, + "learning_rate": 1.9901955535915517e-05, + "loss": 1.0604, "step": 2586 }, { - "epoch": 0.07341089670828604, + "epoch": 0.07330896313298761, "grad_norm": 0.0, - "learning_rate": 1.9901423143530677e-05, - "loss": 1.023, + "learning_rate": 1.9901827289971962e-05, + "loss": 1.1032, "step": 2587 }, { - "epoch": 0.07343927355278093, + "epoch": 0.07333730057525008, "grad_norm": 0.0, - "learning_rate": 1.9901294371064947e-05, - "loss": 1.1315, + "learning_rate": 1.9901698960621642e-05, + "loss": 1.0025, "step": 2588 }, { - "epoch": 0.07346765039727582, + "epoch": 0.07336563801751254, "grad_norm": 0.0, - "learning_rate": 1.9901165514962536e-05, - "loss": 1.0136, + "learning_rate": 1.990157054786563e-05, + "loss": 1.0899, "step": 2589 }, { - "epoch": 0.07349602724177072, + "epoch": 0.073393975459775, "grad_norm": 0.0, - "learning_rate": 1.990103657522454e-05, - "loss": 1.1786, + "learning_rate": 1.9901442051705016e-05, + "loss": 1.0463, "step": 2590 }, { - "epoch": 0.07352440408626561, + "epoch": 0.07342231290203746, "grad_norm": 0.0, - "learning_rate": 1.990090755185204e-05, - "loss": 1.255, + "learning_rate": 1.990131347214088e-05, + "loss": 1.085, "step": 2591 }, { - "epoch": 0.0735527809307605, + "epoch": 0.07345065034429993, "grad_norm": 0.0, - "learning_rate": 1.9900778444846126e-05, - "loss": 1.1486, + "learning_rate": 1.99011848091743e-05, + "loss": 1.1575, "step": 2592 }, { - "epoch": 0.0735811577752554, + "epoch": 0.07347898778656238, "grad_norm": 0.0, - "learning_rate": 1.9900649254207894e-05, - "loss": 0.9801, + "learning_rate": 1.9901056062806363e-05, + "loss": 1.1707, "step": 2593 }, { - "epoch": 0.07360953461975028, + "epoch": 0.07350732522882485, "grad_norm": 0.0, - "learning_rate": 1.9900519979938436e-05, - "loss": 1.0435, + "learning_rate": 1.990092723303816e-05, + "loss": 1.0328, "step": 2594 }, { - "epoch": 0.07363791146424517, + "epoch": 0.07353566267108731, "grad_norm": 0.0, - "learning_rate": 1.9900390622038834e-05, - "loss": 1.0346, + "learning_rate": 1.9900798319870763e-05, + "loss": 1.0881, "step": 2595 }, { - "epoch": 0.07366628830874007, + "epoch": 0.07356400011334976, "grad_norm": 0.0, - "learning_rate": 1.9900261180510192e-05, - "loss": 1.082, + "learning_rate": 1.9900669323305272e-05, + "loss": 1.0588, "step": 2596 }, { - "epoch": 0.07369466515323496, + "epoch": 0.07359233755561223, "grad_norm": 0.0, - "learning_rate": 1.9900131655353597e-05, - "loss": 1.1472, + "learning_rate": 1.9900540243342764e-05, + "loss": 1.1279, "step": 2597 }, { - "epoch": 0.07372304199772985, + "epoch": 0.0736206749978747, "grad_norm": 0.0, - "learning_rate": 1.9900002046570147e-05, - "loss": 1.0288, + "learning_rate": 1.990041107998433e-05, + "loss": 1.1071, "step": 2598 }, { - "epoch": 0.07375141884222475, + "epoch": 0.07364901244013715, "grad_norm": 0.0, - "learning_rate": 1.989987235416093e-05, - "loss": 1.1273, + "learning_rate": 1.990028183323105e-05, + "loss": 1.0656, "step": 2599 }, { - "epoch": 0.07377979568671963, + "epoch": 0.07367734988239961, "grad_norm": 0.0, - "learning_rate": 1.989974257812705e-05, - "loss": 1.0779, + "learning_rate": 1.9900152503084026e-05, + "loss": 1.0815, "step": 2600 }, { - "epoch": 0.07380817253121452, + "epoch": 0.07370568732466208, "grad_norm": 0.0, - "learning_rate": 1.9899612718469603e-05, - "loss": 0.9949, + "learning_rate": 1.990002308954434e-05, + "loss": 1.0487, "step": 2601 }, { - "epoch": 0.07383654937570942, + "epoch": 0.07373402476692453, "grad_norm": 0.0, - "learning_rate": 1.9899482775189675e-05, - "loss": 1.0545, + "learning_rate": 1.9899893592613086e-05, + "loss": 1.1263, "step": 2602 }, { - "epoch": 0.07386492622020431, + "epoch": 0.073762362209187, "grad_norm": 0.0, - "learning_rate": 1.9899352748288377e-05, - "loss": 1.1253, + "learning_rate": 1.989976401229135e-05, + "loss": 1.1362, "step": 2603 }, { - "epoch": 0.07389330306469921, + "epoch": 0.07379069965144947, "grad_norm": 0.0, - "learning_rate": 1.9899222637766798e-05, - "loss": 1.02, + "learning_rate": 1.9899634348580226e-05, + "loss": 1.1052, "step": 2604 }, { - "epoch": 0.0739216799091941, + "epoch": 0.07381903709371192, "grad_norm": 0.0, - "learning_rate": 1.989909244362604e-05, - "loss": 0.9231, + "learning_rate": 1.9899504601480805e-05, + "loss": 1.0272, "step": 2605 }, { - "epoch": 0.07395005675368899, + "epoch": 0.07384737453597438, "grad_norm": 0.0, - "learning_rate": 1.9898962165867204e-05, - "loss": 1.017, + "learning_rate": 1.9899374770994183e-05, + "loss": 1.1248, "step": 2606 }, { - "epoch": 0.07397843359818389, + "epoch": 0.07387571197823685, "grad_norm": 0.0, - "learning_rate": 1.989883180449139e-05, - "loss": 1.0338, + "learning_rate": 1.9899244857121446e-05, + "loss": 1.0934, "step": 2607 }, { - "epoch": 0.07400681044267877, + "epoch": 0.0739040494204993, "grad_norm": 0.0, - "learning_rate": 1.9898701359499698e-05, - "loss": 1.0941, + "learning_rate": 1.9899114859863696e-05, + "loss": 1.1363, "step": 2608 }, { - "epoch": 0.07403518728717366, + "epoch": 0.07393238686276177, "grad_norm": 0.0, - "learning_rate": 1.989857083089323e-05, - "loss": 1.0181, + "learning_rate": 1.9898984779222027e-05, + "loss": 1.0716, "step": 2609 }, { - "epoch": 0.07406356413166856, + "epoch": 0.07396072430502423, "grad_norm": 0.0, - "learning_rate": 1.989844021867309e-05, - "loss": 0.9684, + "learning_rate": 1.9898854615197534e-05, + "loss": 1.1161, "step": 2610 }, { - "epoch": 0.07409194097616345, + "epoch": 0.07398906174728669, "grad_norm": 0.0, - "learning_rate": 1.989830952284038e-05, - "loss": 1.0087, + "learning_rate": 1.989872436779131e-05, + "loss": 1.1558, "step": 2611 }, { - "epoch": 0.07412031782065834, + "epoch": 0.07401739918954915, "grad_norm": 0.0, - "learning_rate": 1.9898178743396208e-05, - "loss": 1.1486, + "learning_rate": 1.9898594037004457e-05, + "loss": 1.1013, "step": 2612 }, { - "epoch": 0.07414869466515324, + "epoch": 0.07404573663181162, "grad_norm": 0.0, - "learning_rate": 1.9898047880341674e-05, - "loss": 1.1004, + "learning_rate": 1.9898463622838073e-05, + "loss": 1.0933, "step": 2613 }, { - "epoch": 0.07417707150964813, + "epoch": 0.07407407407407407, "grad_norm": 0.0, - "learning_rate": 1.989791693367788e-05, - "loss": 1.1404, + "learning_rate": 1.9898333125293248e-05, + "loss": 1.2441, "step": 2614 }, { - "epoch": 0.07420544835414301, + "epoch": 0.07410241151633654, "grad_norm": 0.0, - "learning_rate": 1.989778590340594e-05, - "loss": 1.1234, + "learning_rate": 1.989820254437109e-05, + "loss": 0.9972, "step": 2615 }, { - "epoch": 0.07423382519863791, + "epoch": 0.074130748958599, "grad_norm": 0.0, - "learning_rate": 1.9897654789526957e-05, - "loss": 1.0663, + "learning_rate": 1.9898071880072696e-05, + "loss": 1.0027, "step": 2616 }, { - "epoch": 0.0742622020431328, + "epoch": 0.07415908640086145, "grad_norm": 0.0, - "learning_rate": 1.989752359204204e-05, - "loss": 1.0382, + "learning_rate": 1.989794113239917e-05, + "loss": 1.1868, "step": 2617 }, { - "epoch": 0.07429057888762769, + "epoch": 0.07418742384312392, "grad_norm": 0.0, - "learning_rate": 1.9897392310952295e-05, - "loss": 1.0348, + "learning_rate": 1.9897810301351607e-05, + "loss": 1.0598, "step": 2618 }, { - "epoch": 0.07431895573212259, + "epoch": 0.07421576128538639, "grad_norm": 0.0, - "learning_rate": 1.9897260946258833e-05, - "loss": 1.2169, + "learning_rate": 1.9897679386931115e-05, + "loss": 0.9171, "step": 2619 }, { - "epoch": 0.07434733257661748, + "epoch": 0.07424409872764884, "grad_norm": 0.0, - "learning_rate": 1.9897129497962764e-05, - "loss": 1.002, + "learning_rate": 1.989754838913879e-05, + "loss": 1.0286, "step": 2620 }, { - "epoch": 0.07437570942111237, + "epoch": 0.0742724361699113, "grad_norm": 0.0, - "learning_rate": 1.9896997966065194e-05, - "loss": 1.0812, + "learning_rate": 1.9897417307975742e-05, + "loss": 1.167, "step": 2621 }, { - "epoch": 0.07440408626560727, + "epoch": 0.07430077361217377, "grad_norm": 0.0, - "learning_rate": 1.989686635056724e-05, - "loss": 1.0351, + "learning_rate": 1.9897286143443076e-05, + "loss": 1.0674, "step": 2622 }, { - "epoch": 0.07443246311010215, + "epoch": 0.07432911105443622, "grad_norm": 0.0, - "learning_rate": 1.9896734651470013e-05, - "loss": 1.0816, + "learning_rate": 1.9897154895541888e-05, + "loss": 1.0511, "step": 2623 }, { - "epoch": 0.07446083995459706, + "epoch": 0.07435744849669869, "grad_norm": 0.0, - "learning_rate": 1.9896602868774617e-05, - "loss": 1.0122, + "learning_rate": 1.989702356427329e-05, + "loss": 1.0806, "step": 2624 }, { - "epoch": 0.07448921679909194, + "epoch": 0.07438578593896115, "grad_norm": 0.0, - "learning_rate": 1.9896471002482175e-05, - "loss": 1.0958, + "learning_rate": 1.9896892149638393e-05, + "loss": 1.1736, "step": 2625 }, { - "epoch": 0.07451759364358683, + "epoch": 0.0744141233812236, "grad_norm": 0.0, - "learning_rate": 1.98963390525938e-05, - "loss": 1.162, + "learning_rate": 1.9896760651638292e-05, + "loss": 1.1153, "step": 2626 }, { - "epoch": 0.07454597048808173, + "epoch": 0.07444246082348607, "grad_norm": 0.0, - "learning_rate": 1.98962070191106e-05, - "loss": 1.0746, + "learning_rate": 1.9896629070274103e-05, + "loss": 0.9971, "step": 2627 }, { - "epoch": 0.07457434733257662, + "epoch": 0.07447079826574854, "grad_norm": 0.0, - "learning_rate": 1.98960749020337e-05, - "loss": 1.1483, + "learning_rate": 1.9896497405546934e-05, + "loss": 1.1486, "step": 2628 }, { - "epoch": 0.0746027241770715, + "epoch": 0.07449913570801099, "grad_norm": 0.0, - "learning_rate": 1.9895942701364207e-05, - "loss": 1.0732, + "learning_rate": 1.989636565745789e-05, + "loss": 1.1877, "step": 2629 }, { - "epoch": 0.0746311010215664, + "epoch": 0.07452747315027346, "grad_norm": 0.0, - "learning_rate": 1.989581041710324e-05, - "loss": 1.0281, + "learning_rate": 1.9896233826008083e-05, + "loss": 1.2034, "step": 2630 }, { - "epoch": 0.0746594778660613, + "epoch": 0.07455581059253592, "grad_norm": 0.0, - "learning_rate": 1.989567804925192e-05, - "loss": 1.1464, + "learning_rate": 1.9896101911198624e-05, + "loss": 1.0739, "step": 2631 }, { - "epoch": 0.07468785471055618, + "epoch": 0.07458414803479838, "grad_norm": 0.0, - "learning_rate": 1.989554559781136e-05, - "loss": 0.9626, + "learning_rate": 1.9895969913030626e-05, + "loss": 1.0031, "step": 2632 }, { - "epoch": 0.07471623155505108, + "epoch": 0.07461248547706084, "grad_norm": 0.0, - "learning_rate": 1.9895413062782683e-05, - "loss": 0.9904, + "learning_rate": 1.9895837831505195e-05, + "loss": 1.1129, "step": 2633 }, { - "epoch": 0.07474460839954597, + "epoch": 0.07464082291932331, "grad_norm": 0.0, - "learning_rate": 1.9895280444167006e-05, - "loss": 1.0397, + "learning_rate": 1.989570566662345e-05, + "loss": 1.0268, "step": 2634 }, { - "epoch": 0.07477298524404086, + "epoch": 0.07466916036158576, "grad_norm": 0.0, - "learning_rate": 1.9895147741965453e-05, - "loss": 1.1033, + "learning_rate": 1.98955734183865e-05, + "loss": 1.1015, "step": 2635 }, { - "epoch": 0.07480136208853576, + "epoch": 0.07469749780384823, "grad_norm": 0.0, - "learning_rate": 1.9895014956179143e-05, - "loss": 1.0931, + "learning_rate": 1.989544108679546e-05, + "loss": 1.0172, "step": 2636 }, { - "epoch": 0.07482973893303065, + "epoch": 0.07472583524611069, "grad_norm": 0.0, - "learning_rate": 1.989488208680919e-05, - "loss": 0.9296, + "learning_rate": 1.989530867185145e-05, + "loss": 0.9604, "step": 2637 }, { - "epoch": 0.07485811577752553, + "epoch": 0.07475417268837314, "grad_norm": 0.0, - "learning_rate": 1.989474913385673e-05, - "loss": 1.0757, + "learning_rate": 1.9895176173555574e-05, + "loss": 1.1029, "step": 2638 }, { - "epoch": 0.07488649262202043, + "epoch": 0.07478251013063561, "grad_norm": 0.0, - "learning_rate": 1.9894616097322877e-05, - "loss": 0.9433, + "learning_rate": 1.989504359190896e-05, + "loss": 1.036, "step": 2639 }, { - "epoch": 0.07491486946651532, + "epoch": 0.07481084757289808, "grad_norm": 0.0, - "learning_rate": 1.9894482977208757e-05, - "loss": 0.9733, + "learning_rate": 1.9894910926912713e-05, + "loss": 1.0609, "step": 2640 }, { - "epoch": 0.07494324631101021, + "epoch": 0.07483918501516053, "grad_norm": 0.0, - "learning_rate": 1.9894349773515493e-05, - "loss": 1.0098, + "learning_rate": 1.989477817856796e-05, + "loss": 1.1414, "step": 2641 }, { - "epoch": 0.07497162315550511, + "epoch": 0.074867522457423, "grad_norm": 0.0, - "learning_rate": 1.9894216486244214e-05, - "loss": 0.9911, + "learning_rate": 1.989464534687582e-05, + "loss": 1.3318, "step": 2642 }, { - "epoch": 0.075, + "epoch": 0.07489585989968546, "grad_norm": 0.0, - "learning_rate": 1.9894083115396042e-05, - "loss": 1.1122, + "learning_rate": 1.9894512431837404e-05, + "loss": 1.1566, "step": 2643 }, { - "epoch": 0.0750283768444949, + "epoch": 0.07492419734194791, "grad_norm": 0.0, - "learning_rate": 1.9893949660972105e-05, - "loss": 1.0543, + "learning_rate": 1.9894379433453837e-05, + "loss": 1.0005, "step": 2644 }, { - "epoch": 0.07505675368898979, + "epoch": 0.07495253478421038, "grad_norm": 0.0, - "learning_rate": 1.9893816122973534e-05, - "loss": 1.0416, + "learning_rate": 1.9894246351726236e-05, + "loss": 1.0466, "step": 2645 }, { - "epoch": 0.07508513053348467, + "epoch": 0.07498087222647284, "grad_norm": 0.0, - "learning_rate": 1.9893682501401448e-05, - "loss": 1.0443, + "learning_rate": 1.9894113186655726e-05, + "loss": 0.9971, "step": 2646 }, { - "epoch": 0.07511350737797957, + "epoch": 0.0750092096687353, "grad_norm": 0.0, - "learning_rate": 1.9893548796256984e-05, - "loss": 1.0253, + "learning_rate": 1.9893979938243422e-05, + "loss": 1.1265, "step": 2647 }, { - "epoch": 0.07514188422247446, + "epoch": 0.07503754711099776, "grad_norm": 0.0, - "learning_rate": 1.989341500754127e-05, - "loss": 1.0598, + "learning_rate": 1.9893846606490456e-05, + "loss": 1.1168, "step": 2648 }, { - "epoch": 0.07517026106696935, + "epoch": 0.07506588455326023, "grad_norm": 0.0, - "learning_rate": 1.989328113525543e-05, - "loss": 1.0975, + "learning_rate": 1.9893713191397944e-05, + "loss": 1.0296, "step": 2649 }, { - "epoch": 0.07519863791146425, + "epoch": 0.07509422199552268, "grad_norm": 0.0, - "learning_rate": 1.9893147179400602e-05, - "loss": 1.106, + "learning_rate": 1.9893579692967013e-05, + "loss": 1.0948, "step": 2650 }, { - "epoch": 0.07522701475595914, + "epoch": 0.07512255943778515, "grad_norm": 0.0, - "learning_rate": 1.9893013139977917e-05, - "loss": 1.0759, + "learning_rate": 1.9893446111198788e-05, + "loss": 1.0868, "step": 2651 }, { - "epoch": 0.07525539160045402, + "epoch": 0.07515089688004761, "grad_norm": 0.0, - "learning_rate": 1.9892879016988506e-05, - "loss": 1.0382, + "learning_rate": 1.9893312446094392e-05, + "loss": 1.1094, "step": 2652 }, { - "epoch": 0.07528376844494893, + "epoch": 0.07517923432231007, "grad_norm": 0.0, - "learning_rate": 1.9892744810433498e-05, - "loss": 1.0516, + "learning_rate": 1.9893178697654948e-05, + "loss": 1.2118, "step": 2653 }, { - "epoch": 0.07531214528944381, + "epoch": 0.07520757176457253, "grad_norm": 0.0, - "learning_rate": 1.989261052031403e-05, - "loss": 0.9838, + "learning_rate": 1.9893044865881586e-05, + "loss": 1.0815, "step": 2654 }, { - "epoch": 0.0753405221339387, + "epoch": 0.075235909206835, "grad_norm": 0.0, - "learning_rate": 1.989247614663124e-05, - "loss": 1.0884, + "learning_rate": 1.9892910950775433e-05, + "loss": 1.096, "step": 2655 }, { - "epoch": 0.0753688989784336, + "epoch": 0.07526424664909745, "grad_norm": 0.0, - "learning_rate": 1.9892341689386253e-05, - "loss": 1.0046, + "learning_rate": 1.9892776952337623e-05, + "loss": 1.0316, "step": 2656 }, { - "epoch": 0.07539727582292849, + "epoch": 0.07529258409135992, "grad_norm": 0.0, - "learning_rate": 1.9892207148580216e-05, - "loss": 0.9816, + "learning_rate": 1.9892642870569277e-05, + "loss": 1.1467, "step": 2657 }, { - "epoch": 0.07542565266742338, + "epoch": 0.07532092153362238, "grad_norm": 0.0, - "learning_rate": 1.989207252421426e-05, - "loss": 1.0579, + "learning_rate": 1.9892508705471524e-05, + "loss": 1.2167, "step": 2658 }, { - "epoch": 0.07545402951191828, + "epoch": 0.07534925897588483, "grad_norm": 0.0, - "learning_rate": 1.989193781628952e-05, - "loss": 1.0722, + "learning_rate": 1.98923744570455e-05, + "loss": 1.1184, "step": 2659 }, { - "epoch": 0.07548240635641316, + "epoch": 0.0753775964181473, "grad_norm": 0.0, - "learning_rate": 1.989180302480714e-05, - "loss": 0.921, + "learning_rate": 1.989224012529233e-05, + "loss": 1.0102, "step": 2660 }, { - "epoch": 0.07551078320090807, + "epoch": 0.07540593386040977, "grad_norm": 0.0, - "learning_rate": 1.9891668149768253e-05, - "loss": 1.052, + "learning_rate": 1.9892105710213148e-05, + "loss": 1.1088, "step": 2661 }, { - "epoch": 0.07553916004540295, + "epoch": 0.07543427130267222, "grad_norm": 0.0, - "learning_rate": 1.9891533191174002e-05, - "loss": 1.0237, + "learning_rate": 1.9891971211809085e-05, + "loss": 1.1181, "step": 2662 }, { - "epoch": 0.07556753688989784, + "epoch": 0.07546260874493468, "grad_norm": 0.0, - "learning_rate": 1.9891398149025522e-05, - "loss": 1.0414, + "learning_rate": 1.9891836630081277e-05, + "loss": 1.0883, "step": 2663 }, { - "epoch": 0.07559591373439274, + "epoch": 0.07549094618719715, "grad_norm": 0.0, - "learning_rate": 1.9891263023323963e-05, - "loss": 1.165, + "learning_rate": 1.9891701965030855e-05, + "loss": 1.1366, "step": 2664 }, { - "epoch": 0.07562429057888763, + "epoch": 0.0755192836294596, "grad_norm": 0.0, - "learning_rate": 1.9891127814070454e-05, - "loss": 1.0303, + "learning_rate": 1.9891567216658957e-05, + "loss": 1.0333, "step": 2665 }, { - "epoch": 0.07565266742338252, + "epoch": 0.07554762107172207, "grad_norm": 0.0, - "learning_rate": 1.9890992521266147e-05, - "loss": 0.9995, + "learning_rate": 1.9891432384966715e-05, + "loss": 1.0488, "step": 2666 }, { - "epoch": 0.07568104426787742, + "epoch": 0.07557595851398453, "grad_norm": 0.0, - "learning_rate": 1.989085714491218e-05, - "loss": 1.0732, + "learning_rate": 1.989129746995526e-05, + "loss": 1.116, "step": 2667 }, { - "epoch": 0.0757094211123723, + "epoch": 0.07560429595624699, "grad_norm": 0.0, - "learning_rate": 1.9890721685009703e-05, - "loss": 1.0743, + "learning_rate": 1.989116247162574e-05, + "loss": 1.1175, "step": 2668 }, { - "epoch": 0.07573779795686719, + "epoch": 0.07563263339850945, "grad_norm": 0.0, - "learning_rate": 1.989058614155985e-05, - "loss": 1.0353, + "learning_rate": 1.989102738997928e-05, + "loss": 1.1709, "step": 2669 }, { - "epoch": 0.0757661748013621, + "epoch": 0.07566097084077192, "grad_norm": 0.0, - "learning_rate": 1.9890450514563776e-05, - "loss": 1.0167, + "learning_rate": 1.9890892225017024e-05, + "loss": 1.1595, "step": 2670 }, { - "epoch": 0.07579455164585698, + "epoch": 0.07568930828303437, "grad_norm": 0.0, - "learning_rate": 1.989031480402262e-05, - "loss": 1.1784, + "learning_rate": 1.9890756976740115e-05, + "loss": 1.0566, "step": 2671 }, { - "epoch": 0.07582292849035187, + "epoch": 0.07571764572529684, "grad_norm": 0.0, - "learning_rate": 1.9890179009937527e-05, - "loss": 1.0027, + "learning_rate": 1.989062164514968e-05, + "loss": 0.977, "step": 2672 }, { - "epoch": 0.07585130533484677, + "epoch": 0.0757459831675593, "grad_norm": 0.0, - "learning_rate": 1.989004313230965e-05, - "loss": 1.0757, + "learning_rate": 1.989048623024687e-05, + "loss": 1.1085, "step": 2673 }, { - "epoch": 0.07587968217934166, + "epoch": 0.07577432060982175, "grad_norm": 0.0, - "learning_rate": 1.9889907171140134e-05, - "loss": 1.0609, + "learning_rate": 1.989035073203282e-05, + "loss": 1.0717, "step": 2674 }, { - "epoch": 0.07590805902383654, + "epoch": 0.07580265805208422, "grad_norm": 0.0, - "learning_rate": 1.9889771126430128e-05, - "loss": 0.9979, + "learning_rate": 1.9890215150508677e-05, + "loss": 1.1456, "step": 2675 }, { - "epoch": 0.07593643586833144, + "epoch": 0.07583099549434669, "grad_norm": 0.0, - "learning_rate": 1.988963499818078e-05, - "loss": 0.9901, + "learning_rate": 1.9890079485675577e-05, + "loss": 1.0835, "step": 2676 }, { - "epoch": 0.07596481271282633, + "epoch": 0.07585933293660914, "grad_norm": 0.0, - "learning_rate": 1.988949878639324e-05, - "loss": 1.045, + "learning_rate": 1.9889943737534662e-05, + "loss": 1.0467, "step": 2677 }, { - "epoch": 0.07599318955732122, + "epoch": 0.0758876703788716, "grad_norm": 0.0, - "learning_rate": 1.9889362491068658e-05, - "loss": 1.0239, + "learning_rate": 1.988980790608708e-05, + "loss": 1.1644, "step": 2678 }, { - "epoch": 0.07602156640181612, + "epoch": 0.07591600782113407, "grad_norm": 0.0, - "learning_rate": 1.988922611220819e-05, - "loss": 1.011, + "learning_rate": 1.9889671991333976e-05, + "loss": 1.2069, "step": 2679 }, { - "epoch": 0.07604994324631101, + "epoch": 0.07594434526339652, "grad_norm": 0.0, - "learning_rate": 1.988908964981298e-05, - "loss": 1.1345, + "learning_rate": 1.988953599327649e-05, + "loss": 1.0942, "step": 2680 }, { - "epoch": 0.07607832009080591, + "epoch": 0.07597268270565899, "grad_norm": 0.0, - "learning_rate": 1.988895310388419e-05, - "loss": 1.1384, + "learning_rate": 1.988939991191577e-05, + "loss": 1.1376, "step": 2681 }, { - "epoch": 0.0761066969353008, + "epoch": 0.07600102014792146, "grad_norm": 0.0, - "learning_rate": 1.9888816474422966e-05, - "loss": 1.1786, + "learning_rate": 1.9889263747252962e-05, + "loss": 1.196, "step": 2682 }, { - "epoch": 0.07613507377979568, + "epoch": 0.07602935759018391, "grad_norm": 0.0, - "learning_rate": 1.9888679761430463e-05, - "loss": 1.0778, + "learning_rate": 1.9889127499289215e-05, + "loss": 1.0136, "step": 2683 }, { - "epoch": 0.07616345062429059, + "epoch": 0.07605769503244637, "grad_norm": 0.0, - "learning_rate": 1.988854296490784e-05, - "loss": 1.0721, + "learning_rate": 1.9888991168025673e-05, + "loss": 1.1094, "step": 2684 }, { - "epoch": 0.07619182746878547, + "epoch": 0.07608603247470884, "grad_norm": 0.0, - "learning_rate": 1.9888406084856253e-05, - "loss": 1.0942, + "learning_rate": 1.9888854753463487e-05, + "loss": 0.9808, "step": 2685 }, { - "epoch": 0.07622020431328036, + "epoch": 0.07611436991697129, "grad_norm": 0.0, - "learning_rate": 1.9888269121276852e-05, - "loss": 1.0947, + "learning_rate": 1.9888718255603804e-05, + "loss": 1.2129, "step": 2686 }, { - "epoch": 0.07624858115777526, + "epoch": 0.07614270735923376, "grad_norm": 0.0, - "learning_rate": 1.9888132074170797e-05, - "loss": 1.0805, + "learning_rate": 1.9888581674447778e-05, + "loss": 1.0743, "step": 2687 }, { - "epoch": 0.07627695800227015, + "epoch": 0.07617104480149622, "grad_norm": 0.0, - "learning_rate": 1.9887994943539247e-05, - "loss": 1.0779, + "learning_rate": 1.9888445009996555e-05, + "loss": 0.9278, "step": 2688 }, { - "epoch": 0.07630533484676504, + "epoch": 0.07619938224375868, "grad_norm": 0.0, - "learning_rate": 1.9887857729383356e-05, - "loss": 1.1391, + "learning_rate": 1.9888308262251286e-05, + "loss": 1.052, "step": 2689 }, { - "epoch": 0.07633371169125994, + "epoch": 0.07622771968602114, "grad_norm": 0.0, - "learning_rate": 1.9887720431704288e-05, - "loss": 1.0648, + "learning_rate": 1.9888171431213128e-05, + "loss": 1.1953, "step": 2690 }, { - "epoch": 0.07636208853575482, + "epoch": 0.07625605712828361, "grad_norm": 0.0, - "learning_rate": 1.9887583050503204e-05, - "loss": 1.0063, + "learning_rate": 1.9888034516883228e-05, + "loss": 1.1812, "step": 2691 }, { - "epoch": 0.07639046538024971, + "epoch": 0.07628439457054606, "grad_norm": 0.0, - "learning_rate": 1.988744558578126e-05, - "loss": 1.0508, + "learning_rate": 1.988789751926274e-05, + "loss": 1.0788, "step": 2692 }, { - "epoch": 0.07641884222474461, + "epoch": 0.07631273201280853, "grad_norm": 0.0, - "learning_rate": 1.9887308037539616e-05, - "loss": 0.9489, + "learning_rate": 1.988776043835282e-05, + "loss": 1.0387, "step": 2693 }, { - "epoch": 0.0764472190692395, + "epoch": 0.07634106945507098, "grad_norm": 0.0, - "learning_rate": 1.988717040577944e-05, - "loss": 1.2203, + "learning_rate": 1.9887623274154623e-05, + "loss": 1.0157, "step": 2694 }, { - "epoch": 0.07647559591373439, + "epoch": 0.07636940689733344, "grad_norm": 0.0, - "learning_rate": 1.9887032690501887e-05, - "loss": 1.0726, + "learning_rate": 1.9887486026669304e-05, + "loss": 1.0316, "step": 2695 }, { - "epoch": 0.07650397275822929, + "epoch": 0.07639774433959591, "grad_norm": 0.0, - "learning_rate": 1.988689489170813e-05, - "loss": 1.0831, + "learning_rate": 1.988734869589802e-05, + "loss": 1.095, "step": 2696 }, { - "epoch": 0.07653234960272418, + "epoch": 0.07642608178185836, "grad_norm": 0.0, - "learning_rate": 1.988675700939932e-05, - "loss": 0.9997, + "learning_rate": 1.9887211281841924e-05, + "loss": 1.0666, "step": 2697 }, { - "epoch": 0.07656072644721906, + "epoch": 0.07645441922412083, "grad_norm": 0.0, - "learning_rate": 1.9886619043576636e-05, - "loss": 1.0041, + "learning_rate": 1.9887073784502177e-05, + "loss": 1.1067, "step": 2698 }, { - "epoch": 0.07658910329171396, + "epoch": 0.0764827566663833, "grad_norm": 0.0, - "learning_rate": 1.9886480994241235e-05, - "loss": 0.9855, + "learning_rate": 1.9886936203879935e-05, + "loss": 1.0352, "step": 2699 }, { - "epoch": 0.07661748013620885, + "epoch": 0.07651109410864575, "grad_norm": 0.0, - "learning_rate": 1.988634286139428e-05, - "loss": 1.0165, + "learning_rate": 1.9886798539976357e-05, + "loss": 1.1385, "step": 2700 }, { - "epoch": 0.07664585698070375, + "epoch": 0.07653943155090821, "grad_norm": 0.0, - "learning_rate": 1.988620464503695e-05, - "loss": 1.0619, + "learning_rate": 1.9886660792792607e-05, + "loss": 1.0626, "step": 2701 }, { - "epoch": 0.07667423382519864, + "epoch": 0.07656776899317068, "grad_norm": 0.0, - "learning_rate": 1.9886066345170398e-05, - "loss": 1.1176, + "learning_rate": 1.988652296232984e-05, + "loss": 1.1956, "step": 2702 }, { - "epoch": 0.07670261066969353, + "epoch": 0.07659610643543313, "grad_norm": 0.0, - "learning_rate": 1.9885927961795804e-05, - "loss": 1.1003, + "learning_rate": 1.9886385048589217e-05, + "loss": 1.1272, "step": 2703 }, { - "epoch": 0.07673098751418843, + "epoch": 0.0766244438776956, "grad_norm": 0.0, - "learning_rate": 1.988578949491433e-05, - "loss": 1.0561, + "learning_rate": 1.9886247051571904e-05, + "loss": 1.1613, "step": 2704 }, { - "epoch": 0.07675936435868332, + "epoch": 0.07665278131995806, "grad_norm": 0.0, - "learning_rate": 1.9885650944527146e-05, - "loss": 0.9846, + "learning_rate": 1.9886108971279058e-05, + "loss": 1.0966, "step": 2705 }, { - "epoch": 0.0767877412031782, + "epoch": 0.07668111876222052, "grad_norm": 0.0, - "learning_rate": 1.988551231063543e-05, - "loss": 0.9136, + "learning_rate": 1.988597080771185e-05, + "loss": 1.0156, "step": 2706 }, { - "epoch": 0.0768161180476731, + "epoch": 0.07670945620448298, "grad_norm": 0.0, - "learning_rate": 1.9885373593240342e-05, - "loss": 1.045, + "learning_rate": 1.9885832560871434e-05, + "loss": 1.1205, "step": 2707 }, { - "epoch": 0.07684449489216799, + "epoch": 0.07673779364674545, "grad_norm": 0.0, - "learning_rate": 1.988523479234306e-05, - "loss": 0.9653, + "learning_rate": 1.988569423075898e-05, + "loss": 1.2141, "step": 2708 }, { - "epoch": 0.07687287173666288, + "epoch": 0.0767661310890079, "grad_norm": 0.0, - "learning_rate": 1.9885095907944754e-05, - "loss": 1.0573, + "learning_rate": 1.9885555817375656e-05, + "loss": 0.9988, "step": 2709 }, { - "epoch": 0.07690124858115778, + "epoch": 0.07679446853127037, "grad_norm": 0.0, - "learning_rate": 1.98849569400466e-05, - "loss": 1.0053, + "learning_rate": 1.9885417320722623e-05, + "loss": 1.0173, "step": 2710 }, { - "epoch": 0.07692962542565267, + "epoch": 0.07682280597353283, "grad_norm": 0.0, - "learning_rate": 1.988481788864977e-05, - "loss": 1.0949, + "learning_rate": 1.9885278740801047e-05, + "loss": 1.1372, "step": 2711 }, { - "epoch": 0.07695800227014755, + "epoch": 0.07685114341579528, "grad_norm": 0.0, - "learning_rate": 1.988467875375544e-05, - "loss": 1.0005, + "learning_rate": 1.98851400776121e-05, + "loss": 1.0104, "step": 2712 }, { - "epoch": 0.07698637911464246, + "epoch": 0.07687948085805775, "grad_norm": 0.0, - "learning_rate": 1.988453953536478e-05, - "loss": 1.06, + "learning_rate": 1.9885001331156943e-05, + "loss": 1.1355, "step": 2713 }, { - "epoch": 0.07701475595913734, + "epoch": 0.07690781830032022, "grad_norm": 0.0, - "learning_rate": 1.9884400233478976e-05, - "loss": 1.024, + "learning_rate": 1.988486250143675e-05, + "loss": 0.9501, "step": 2714 }, { - "epoch": 0.07704313280363223, + "epoch": 0.07693615574258267, "grad_norm": 0.0, - "learning_rate": 1.98842608480992e-05, - "loss": 1.037, + "learning_rate": 1.9884723588452693e-05, + "loss": 1.1429, "step": 2715 }, { - "epoch": 0.07707150964812713, + "epoch": 0.07696449318484513, "grad_norm": 0.0, - "learning_rate": 1.988412137922662e-05, - "loss": 1.1903, + "learning_rate": 1.9884584592205937e-05, + "loss": 1.165, "step": 2716 }, { - "epoch": 0.07709988649262202, + "epoch": 0.0769928306271076, "grad_norm": 0.0, - "learning_rate": 1.988398182686243e-05, - "loss": 1.0625, + "learning_rate": 1.9884445512697657e-05, + "loss": 1.2158, "step": 2717 }, { - "epoch": 0.0771282633371169, + "epoch": 0.07702116806937005, "grad_norm": 0.0, - "learning_rate": 1.9883842191007796e-05, - "loss": 1.069, + "learning_rate": 1.9884306349929018e-05, + "loss": 1.1305, "step": 2718 }, { - "epoch": 0.07715664018161181, + "epoch": 0.07704950551163252, "grad_norm": 0.0, - "learning_rate": 1.9883702471663906e-05, - "loss": 0.9618, + "learning_rate": 1.9884167103901196e-05, + "loss": 1.1347, "step": 2719 }, { - "epoch": 0.0771850170261067, + "epoch": 0.07707784295389498, "grad_norm": 0.0, - "learning_rate": 1.9883562668831936e-05, - "loss": 1.033, + "learning_rate": 1.988402777461537e-05, + "loss": 1.0845, "step": 2720 }, { - "epoch": 0.0772133938706016, + "epoch": 0.07710618039615744, "grad_norm": 0.0, - "learning_rate": 1.988342278251307e-05, - "loss": 1.1261, + "learning_rate": 1.9883888362072702e-05, + "loss": 1.0671, "step": 2721 }, { - "epoch": 0.07724177071509648, + "epoch": 0.0771345178384199, "grad_norm": 0.0, - "learning_rate": 1.9883282812708483e-05, - "loss": 1.0018, + "learning_rate": 1.9883748866274373e-05, + "loss": 1.0572, "step": 2722 }, { - "epoch": 0.07727014755959137, + "epoch": 0.07716285528068237, "grad_norm": 0.0, - "learning_rate": 1.9883142759419364e-05, - "loss": 1.1218, + "learning_rate": 1.988360928722156e-05, + "loss": 1.1049, "step": 2723 }, { - "epoch": 0.07729852440408627, + "epoch": 0.07719119272294482, "grad_norm": 0.0, - "learning_rate": 1.9883002622646894e-05, - "loss": 1.0644, + "learning_rate": 1.988346962491543e-05, + "loss": 1.0665, "step": 2724 }, { - "epoch": 0.07732690124858116, + "epoch": 0.07721953016520729, "grad_norm": 0.0, - "learning_rate": 1.9882862402392256e-05, - "loss": 1.0234, + "learning_rate": 1.988332987935717e-05, + "loss": 1.1854, "step": 2725 }, { - "epoch": 0.07735527809307605, + "epoch": 0.07724786760746975, "grad_norm": 0.0, - "learning_rate": 1.9882722098656637e-05, - "loss": 0.9679, + "learning_rate": 1.988319005054795e-05, + "loss": 1.0888, "step": 2726 }, { - "epoch": 0.07738365493757095, + "epoch": 0.0772762050497322, "grad_norm": 0.0, - "learning_rate": 1.9882581711441217e-05, - "loss": 1.1383, + "learning_rate": 1.988305013848895e-05, + "loss": 1.0159, "step": 2727 }, { - "epoch": 0.07741203178206583, + "epoch": 0.07730454249199467, "grad_norm": 0.0, - "learning_rate": 1.9882441240747184e-05, - "loss": 1.1344, + "learning_rate": 1.988291014318135e-05, + "loss": 1.2017, "step": 2728 }, { - "epoch": 0.07744040862656072, + "epoch": 0.07733287993425714, "grad_norm": 0.0, - "learning_rate": 1.988230068657573e-05, - "loss": 0.9771, + "learning_rate": 1.988277006462633e-05, + "loss": 1.0145, "step": 2729 }, { - "epoch": 0.07746878547105562, + "epoch": 0.07736121737651959, "grad_norm": 0.0, - "learning_rate": 1.9882160048928036e-05, - "loss": 1.0359, + "learning_rate": 1.9882629902825067e-05, + "loss": 1.1687, "step": 2730 }, { - "epoch": 0.07749716231555051, + "epoch": 0.07738955481878206, "grad_norm": 0.0, - "learning_rate": 1.9882019327805294e-05, - "loss": 1.0734, + "learning_rate": 1.9882489657778737e-05, + "loss": 1.1924, "step": 2731 }, { - "epoch": 0.0775255391600454, + "epoch": 0.07741789226104452, "grad_norm": 0.0, - "learning_rate": 1.9881878523208687e-05, - "loss": 1.1139, + "learning_rate": 1.9882349329488534e-05, + "loss": 1.0663, "step": 2732 }, { - "epoch": 0.0775539160045403, + "epoch": 0.07744622970330697, "grad_norm": 0.0, - "learning_rate": 1.988173763513941e-05, - "loss": 1.0589, + "learning_rate": 1.988220891795563e-05, + "loss": 1.0445, "step": 2733 }, { - "epoch": 0.07758229284903519, + "epoch": 0.07747456714556944, "grad_norm": 0.0, - "learning_rate": 1.988159666359865e-05, - "loss": 1.0533, + "learning_rate": 1.9882068423181208e-05, + "loss": 1.1123, "step": 2734 }, { - "epoch": 0.07761066969353007, + "epoch": 0.0775029045878319, "grad_norm": 0.0, - "learning_rate": 1.98814556085876e-05, - "loss": 1.0115, + "learning_rate": 1.9881927845166457e-05, + "loss": 1.1082, "step": 2735 }, { - "epoch": 0.07763904653802498, + "epoch": 0.07753124203009436, "grad_norm": 0.0, - "learning_rate": 1.988131447010745e-05, - "loss": 1.1069, + "learning_rate": 1.9881787183912558e-05, + "loss": 1.0369, "step": 2736 }, { - "epoch": 0.07766742338251986, + "epoch": 0.07755957947235682, "grad_norm": 0.0, - "learning_rate": 1.988117324815939e-05, - "loss": 1.1563, + "learning_rate": 1.9881646439420695e-05, + "loss": 1.0709, "step": 2737 }, { - "epoch": 0.07769580022701475, + "epoch": 0.07758791691461929, "grad_norm": 0.0, - "learning_rate": 1.9881031942744617e-05, - "loss": 1.0208, + "learning_rate": 1.9881505611692056e-05, + "loss": 1.0193, "step": 2738 }, { - "epoch": 0.07772417707150965, + "epoch": 0.07761625435688174, "grad_norm": 0.0, - "learning_rate": 1.9880890553864325e-05, - "loss": 1.0632, + "learning_rate": 1.9881364700727827e-05, + "loss": 0.9699, "step": 2739 }, { - "epoch": 0.07775255391600454, + "epoch": 0.07764459179914421, "grad_norm": 0.0, - "learning_rate": 1.9880749081519703e-05, - "loss": 1.061, + "learning_rate": 1.988122370652919e-05, + "loss": 1.0925, "step": 2740 }, { - "epoch": 0.07778093076049944, + "epoch": 0.07767292924140667, "grad_norm": 0.0, - "learning_rate": 1.9880607525711954e-05, - "loss": 1.0445, + "learning_rate": 1.9881082629097336e-05, + "loss": 1.0935, "step": 2741 }, { - "epoch": 0.07780930760499433, + "epoch": 0.07770126668366913, "grad_norm": 0.0, - "learning_rate": 1.9880465886442264e-05, - "loss": 1.1056, + "learning_rate": 1.988094146843346e-05, + "loss": 1.0979, "step": 2742 }, { - "epoch": 0.07783768444948921, + "epoch": 0.07772960412593159, "grad_norm": 0.0, - "learning_rate": 1.9880324163711834e-05, - "loss": 1.0189, + "learning_rate": 1.988080022453874e-05, + "loss": 1.0413, "step": 2743 }, { - "epoch": 0.07786606129398412, + "epoch": 0.07775794156819406, "grad_norm": 0.0, - "learning_rate": 1.9880182357521867e-05, - "loss": 1.1119, + "learning_rate": 1.9880658897414368e-05, + "loss": 1.209, "step": 2744 }, { - "epoch": 0.077894438138479, + "epoch": 0.07778627901045651, "grad_norm": 0.0, - "learning_rate": 1.988004046787355e-05, - "loss": 1.049, + "learning_rate": 1.9880517487061543e-05, + "loss": 1.0603, "step": 2745 }, { - "epoch": 0.07792281498297389, + "epoch": 0.07781461645271898, "grad_norm": 0.0, - "learning_rate": 1.9879898494768093e-05, - "loss": 1.0715, + "learning_rate": 1.9880375993481446e-05, + "loss": 1.1876, "step": 2746 }, { - "epoch": 0.07795119182746879, + "epoch": 0.07784295389498144, "grad_norm": 0.0, - "learning_rate": 1.9879756438206687e-05, - "loss": 0.9899, + "learning_rate": 1.9880234416675276e-05, + "loss": 1.0509, "step": 2747 }, { - "epoch": 0.07797956867196368, + "epoch": 0.0778712913372439, "grad_norm": 0.0, - "learning_rate": 1.987961429819053e-05, - "loss": 1.0576, + "learning_rate": 1.988009275664422e-05, + "loss": 1.1627, "step": 2748 }, { - "epoch": 0.07800794551645857, + "epoch": 0.07789962877950636, "grad_norm": 0.0, - "learning_rate": 1.987947207472083e-05, - "loss": 0.8771, + "learning_rate": 1.9879951013389475e-05, + "loss": 1.1063, "step": 2749 }, { - "epoch": 0.07803632236095347, + "epoch": 0.07792796622176883, "grad_norm": 0.0, - "learning_rate": 1.987932976779879e-05, - "loss": 0.9766, + "learning_rate": 1.9879809186912237e-05, + "loss": 1.0658, "step": 2750 }, { - "epoch": 0.07806469920544835, + "epoch": 0.07795630366403128, "grad_norm": 0.0, - "learning_rate": 1.9879187377425598e-05, - "loss": 1.0651, + "learning_rate": 1.9879667277213692e-05, + "loss": 1.1374, "step": 2751 }, { - "epoch": 0.07809307604994324, + "epoch": 0.07798464110629375, "grad_norm": 0.0, - "learning_rate": 1.987904490360247e-05, - "loss": 1.0876, + "learning_rate": 1.9879525284295042e-05, + "loss": 1.2266, "step": 2752 }, { - "epoch": 0.07812145289443814, + "epoch": 0.07801297854855621, "grad_norm": 0.0, - "learning_rate": 1.987890234633061e-05, - "loss": 1.1172, + "learning_rate": 1.9879383208157484e-05, + "loss": 0.9927, "step": 2753 }, { - "epoch": 0.07814982973893303, + "epoch": 0.07804131599081866, "grad_norm": 0.0, - "learning_rate": 1.9878759705611213e-05, - "loss": 1.0282, + "learning_rate": 1.9879241048802213e-05, + "loss": 1.2076, "step": 2754 }, { - "epoch": 0.07817820658342792, + "epoch": 0.07806965343308113, "grad_norm": 0.0, - "learning_rate": 1.987861698144549e-05, - "loss": 0.926, + "learning_rate": 1.9879098806230424e-05, + "loss": 1.135, "step": 2755 }, { - "epoch": 0.07820658342792282, + "epoch": 0.0780979908753436, "grad_norm": 0.0, - "learning_rate": 1.9878474173834642e-05, - "loss": 0.9254, + "learning_rate": 1.9878956480443315e-05, + "loss": 1.0484, "step": 2756 }, { - "epoch": 0.0782349602724177, + "epoch": 0.07812632831760605, "grad_norm": 0.0, - "learning_rate": 1.9878331282779883e-05, - "loss": 0.9851, + "learning_rate": 1.987881407144209e-05, + "loss": 1.1097, "step": 2757 }, { - "epoch": 0.07826333711691259, + "epoch": 0.07815466575986851, "grad_norm": 0.0, - "learning_rate": 1.9878188308282414e-05, - "loss": 1.0843, + "learning_rate": 1.9878671579227947e-05, + "loss": 1.0672, "step": 2758 }, { - "epoch": 0.0782917139614075, + "epoch": 0.07818300320213098, "grad_norm": 0.0, - "learning_rate": 1.9878045250343445e-05, - "loss": 1.064, + "learning_rate": 1.9878529003802086e-05, + "loss": 1.1445, "step": 2759 }, { - "epoch": 0.07832009080590238, + "epoch": 0.07821134064439343, "grad_norm": 0.0, - "learning_rate": 1.9877902108964182e-05, - "loss": 1.0652, + "learning_rate": 1.98783863451657e-05, + "loss": 1.0228, "step": 2760 }, { - "epoch": 0.07834846765039728, + "epoch": 0.0782396780866559, "grad_norm": 0.0, - "learning_rate": 1.9877758884145838e-05, - "loss": 1.0373, + "learning_rate": 1.9878243603320003e-05, + "loss": 1.023, "step": 2761 }, { - "epoch": 0.07837684449489217, + "epoch": 0.07826801552891836, "grad_norm": 0.0, - "learning_rate": 1.9877615575889622e-05, - "loss": 1.0829, + "learning_rate": 1.9878100778266193e-05, + "loss": 1.0992, "step": 2762 }, { - "epoch": 0.07840522133938706, + "epoch": 0.07829635297118082, "grad_norm": 0.0, - "learning_rate": 1.987747218419674e-05, - "loss": 1.1512, + "learning_rate": 1.9877957870005468e-05, + "loss": 1.0496, "step": 2763 }, { - "epoch": 0.07843359818388196, + "epoch": 0.07832469041344328, "grad_norm": 0.0, - "learning_rate": 1.9877328709068407e-05, - "loss": 1.0511, + "learning_rate": 1.987781487853904e-05, + "loss": 1.1851, "step": 2764 }, { - "epoch": 0.07846197502837685, + "epoch": 0.07835302785570575, "grad_norm": 0.0, - "learning_rate": 1.9877185150505834e-05, - "loss": 0.9342, + "learning_rate": 1.9877671803868106e-05, + "loss": 1.1094, "step": 2765 }, { - "epoch": 0.07849035187287173, + "epoch": 0.0783813652979682, "grad_norm": 0.0, - "learning_rate": 1.9877041508510238e-05, - "loss": 1.1913, + "learning_rate": 1.9877528645993876e-05, + "loss": 1.1273, "step": 2766 }, { - "epoch": 0.07851872871736663, + "epoch": 0.07840970274023067, "grad_norm": 0.0, - "learning_rate": 1.9876897783082825e-05, - "loss": 1.1114, + "learning_rate": 1.9877385404917554e-05, + "loss": 1.1935, "step": 2767 }, { - "epoch": 0.07854710556186152, + "epoch": 0.07843804018249313, "grad_norm": 0.0, - "learning_rate": 1.987675397422481e-05, - "loss": 1.0608, + "learning_rate": 1.9877242080640347e-05, + "loss": 1.1176, "step": 2768 }, { - "epoch": 0.07857548240635641, + "epoch": 0.07846637762475558, "grad_norm": 0.0, - "learning_rate": 1.9876610081937416e-05, - "loss": 1.0657, + "learning_rate": 1.987709867316346e-05, + "loss": 1.1185, "step": 2769 }, { - "epoch": 0.07860385925085131, + "epoch": 0.07849471506701805, "grad_norm": 0.0, - "learning_rate": 1.9876466106221847e-05, - "loss": 1.2003, + "learning_rate": 1.9876955182488105e-05, + "loss": 1.0594, "step": 2770 }, { - "epoch": 0.0786322360953462, + "epoch": 0.07852305250928052, "grad_norm": 0.0, - "learning_rate": 1.9876322047079326e-05, - "loss": 1.1002, + "learning_rate": 1.987681160861549e-05, + "loss": 1.1109, "step": 2771 }, { - "epoch": 0.07866061293984108, + "epoch": 0.07855138995154297, "grad_norm": 0.0, - "learning_rate": 1.987617790451107e-05, - "loss": 1.0434, + "learning_rate": 1.9876667951546823e-05, + "loss": 1.1156, "step": 2772 }, { - "epoch": 0.07868898978433599, + "epoch": 0.07857972739380543, "grad_norm": 0.0, - "learning_rate": 1.9876033678518295e-05, - "loss": 1.0399, + "learning_rate": 1.987652421128331e-05, + "loss": 1.0738, "step": 2773 }, { - "epoch": 0.07871736662883087, + "epoch": 0.0786080648360679, "grad_norm": 0.0, - "learning_rate": 1.987588936910222e-05, - "loss": 0.976, + "learning_rate": 1.9876380387826168e-05, + "loss": 1.1967, "step": 2774 }, { - "epoch": 0.07874574347332576, + "epoch": 0.07863640227833035, "grad_norm": 0.0, - "learning_rate": 1.9875744976264063e-05, - "loss": 1.1287, + "learning_rate": 1.9876236481176608e-05, + "loss": 1.2526, "step": 2775 }, { - "epoch": 0.07877412031782066, + "epoch": 0.07866473972059282, "grad_norm": 0.0, - "learning_rate": 1.9875600500005046e-05, - "loss": 1.0782, + "learning_rate": 1.987609249133584e-05, + "loss": 1.0559, "step": 2776 }, { - "epoch": 0.07880249716231555, + "epoch": 0.07869307716285528, "grad_norm": 0.0, - "learning_rate": 1.9875455940326386e-05, - "loss": 1.0809, + "learning_rate": 1.9875948418305078e-05, + "loss": 1.0648, "step": 2777 }, { - "epoch": 0.07883087400681044, + "epoch": 0.07872141460511774, "grad_norm": 0.0, - "learning_rate": 1.987531129722931e-05, - "loss": 1.1218, + "learning_rate": 1.9875804262085534e-05, + "loss": 1.1708, "step": 2778 }, { - "epoch": 0.07885925085130534, + "epoch": 0.0787497520473802, "grad_norm": 0.0, - "learning_rate": 1.987516657071503e-05, - "loss": 1.126, + "learning_rate": 1.9875660022678427e-05, + "loss": 1.1151, "step": 2779 }, { - "epoch": 0.07888762769580022, + "epoch": 0.07877808948964267, "grad_norm": 0.0, - "learning_rate": 1.9875021760784776e-05, - "loss": 1.1321, + "learning_rate": 1.9875515700084963e-05, + "loss": 1.1115, "step": 2780 }, { - "epoch": 0.07891600454029513, + "epoch": 0.07880642693190512, "grad_norm": 0.0, - "learning_rate": 1.9874876867439766e-05, - "loss": 1.0215, + "learning_rate": 1.9875371294306368e-05, + "loss": 1.0802, "step": 2781 }, { - "epoch": 0.07894438138479001, + "epoch": 0.07883476437416759, "grad_norm": 0.0, - "learning_rate": 1.9874731890681232e-05, - "loss": 0.9843, + "learning_rate": 1.987522680534385e-05, + "loss": 1.1699, "step": 2782 }, { - "epoch": 0.0789727582292849, + "epoch": 0.07886310181643005, "grad_norm": 0.0, - "learning_rate": 1.9874586830510393e-05, - "loss": 0.9521, + "learning_rate": 1.987508223319863e-05, + "loss": 1.0207, "step": 2783 }, { - "epoch": 0.0790011350737798, + "epoch": 0.0788914392586925, "grad_norm": 0.0, - "learning_rate": 1.9874441686928473e-05, - "loss": 1.0736, + "learning_rate": 1.9874937577871928e-05, + "loss": 1.1191, "step": 2784 }, { - "epoch": 0.07902951191827469, + "epoch": 0.07891977670095497, "grad_norm": 0.0, - "learning_rate": 1.98742964599367e-05, - "loss": 1.0786, + "learning_rate": 1.9874792839364958e-05, + "loss": 1.1202, "step": 2785 }, { - "epoch": 0.07905788876276958, + "epoch": 0.07894811414321744, "grad_norm": 0.0, - "learning_rate": 1.9874151149536304e-05, - "loss": 1.0254, + "learning_rate": 1.987464801767894e-05, + "loss": 1.0967, "step": 2786 }, { - "epoch": 0.07908626560726448, + "epoch": 0.07897645158547989, "grad_norm": 0.0, - "learning_rate": 1.9874005755728505e-05, - "loss": 1.0779, + "learning_rate": 1.9874503112815094e-05, + "loss": 1.1931, "step": 2787 }, { - "epoch": 0.07911464245175936, + "epoch": 0.07900478902774236, "grad_norm": 0.0, - "learning_rate": 1.987386027851454e-05, - "loss": 1.029, + "learning_rate": 1.9874358124774644e-05, + "loss": 1.0545, "step": 2788 }, { - "epoch": 0.07914301929625425, + "epoch": 0.07903312647000482, "grad_norm": 0.0, - "learning_rate": 1.987371471789563e-05, - "loss": 1.1727, + "learning_rate": 1.9874213053558807e-05, + "loss": 1.0585, "step": 2789 }, { - "epoch": 0.07917139614074915, + "epoch": 0.07906146391226727, "grad_norm": 0.0, - "learning_rate": 1.987356907387301e-05, - "loss": 1.0892, + "learning_rate": 1.987406789916881e-05, + "loss": 1.087, "step": 2790 }, { - "epoch": 0.07919977298524404, + "epoch": 0.07908980135452974, "grad_norm": 0.0, - "learning_rate": 1.9873423346447905e-05, - "loss": 0.9356, + "learning_rate": 1.9873922661605868e-05, + "loss": 1.1425, "step": 2791 }, { - "epoch": 0.07922814982973893, + "epoch": 0.0791181387967922, "grad_norm": 0.0, - "learning_rate": 1.9873277535621557e-05, - "loss": 1.0252, + "learning_rate": 1.987377734087121e-05, + "loss": 1.0472, "step": 2792 }, { - "epoch": 0.07925652667423383, + "epoch": 0.07914647623905466, "grad_norm": 0.0, - "learning_rate": 1.9873131641395183e-05, - "loss": 1.003, + "learning_rate": 1.987363193696606e-05, + "loss": 0.9835, "step": 2793 }, { - "epoch": 0.07928490351872872, + "epoch": 0.07917481368131712, "grad_norm": 0.0, - "learning_rate": 1.9872985663770024e-05, - "loss": 1.067, + "learning_rate": 1.9873486449891643e-05, + "loss": 1.1221, "step": 2794 }, { - "epoch": 0.0793132803632236, + "epoch": 0.07920315112357959, "grad_norm": 0.0, - "learning_rate": 1.9872839602747308e-05, - "loss": 1.0594, + "learning_rate": 1.9873340879649182e-05, + "loss": 1.0598, "step": 2795 }, { - "epoch": 0.0793416572077185, + "epoch": 0.07923148856584204, "grad_norm": 0.0, - "learning_rate": 1.987269345832828e-05, - "loss": 1.046, + "learning_rate": 1.9873195226239903e-05, + "loss": 1.0457, "step": 2796 }, { - "epoch": 0.07937003405221339, + "epoch": 0.07925982600810451, "grad_norm": 0.0, - "learning_rate": 1.987254723051416e-05, - "loss": 1.0573, + "learning_rate": 1.9873049489665036e-05, + "loss": 1.2837, "step": 2797 }, { - "epoch": 0.07939841089670828, + "epoch": 0.07928816345036697, "grad_norm": 0.0, - "learning_rate": 1.987240091930619e-05, - "loss": 0.9778, + "learning_rate": 1.9872903669925807e-05, + "loss": 1.1016, "step": 2798 }, { - "epoch": 0.07942678774120318, + "epoch": 0.07931650089262943, "grad_norm": 0.0, - "learning_rate": 1.9872254524705608e-05, - "loss": 1.087, + "learning_rate": 1.9872757767023445e-05, + "loss": 1.0023, "step": 2799 }, { - "epoch": 0.07945516458569807, + "epoch": 0.07934483833489189, "grad_norm": 0.0, - "learning_rate": 1.9872108046713644e-05, - "loss": 1.0157, + "learning_rate": 1.9872611780959173e-05, + "loss": 1.0913, "step": 2800 }, { - "epoch": 0.07948354143019297, + "epoch": 0.07937317577715436, "grad_norm": 0.0, - "learning_rate": 1.9871961485331544e-05, - "loss": 1.0942, + "learning_rate": 1.987246571173423e-05, + "loss": 1.0265, "step": 2801 }, { - "epoch": 0.07951191827468786, + "epoch": 0.07940151321941681, "grad_norm": 0.0, - "learning_rate": 1.987181484056054e-05, - "loss": 1.0822, + "learning_rate": 1.9872319559349843e-05, + "loss": 1.1411, "step": 2802 }, { - "epoch": 0.07954029511918274, + "epoch": 0.07942985066167928, "grad_norm": 0.0, - "learning_rate": 1.9871668112401872e-05, - "loss": 0.9392, + "learning_rate": 1.9872173323807244e-05, + "loss": 1.0501, "step": 2803 }, { - "epoch": 0.07956867196367765, + "epoch": 0.07945818810394174, "grad_norm": 0.0, - "learning_rate": 1.987152130085678e-05, - "loss": 1.0742, + "learning_rate": 1.987202700510766e-05, + "loss": 1.1715, "step": 2804 }, { - "epoch": 0.07959704880817253, + "epoch": 0.0794865255462042, "grad_norm": 0.0, - "learning_rate": 1.9871374405926506e-05, - "loss": 1.0646, + "learning_rate": 1.9871880603252326e-05, + "loss": 1.2684, "step": 2805 }, { - "epoch": 0.07962542565266742, + "epoch": 0.07951486298846666, "grad_norm": 0.0, - "learning_rate": 1.9871227427612285e-05, - "loss": 0.9622, + "learning_rate": 1.9871734118242477e-05, + "loss": 1.0096, "step": 2806 }, { - "epoch": 0.07965380249716232, + "epoch": 0.07954320043072913, "grad_norm": 0.0, - "learning_rate": 1.9871080365915366e-05, - "loss": 0.9899, + "learning_rate": 1.9871587550079346e-05, + "loss": 1.1006, "step": 2807 }, { - "epoch": 0.07968217934165721, + "epoch": 0.07957153787299158, "grad_norm": 0.0, - "learning_rate": 1.9870933220836983e-05, - "loss": 1.0186, + "learning_rate": 1.9871440898764163e-05, + "loss": 1.1165, "step": 2808 }, { - "epoch": 0.0797105561861521, + "epoch": 0.07959987531525405, "grad_norm": 0.0, - "learning_rate": 1.987078599237839e-05, - "loss": 1.0703, + "learning_rate": 1.9871294164298175e-05, + "loss": 0.9776, "step": 2809 }, { - "epoch": 0.079738933030647, + "epoch": 0.07962821275751651, "grad_norm": 0.0, - "learning_rate": 1.987063868054082e-05, - "loss": 1.1122, + "learning_rate": 1.9871147346682605e-05, + "loss": 1.067, "step": 2810 }, { - "epoch": 0.07976730987514188, + "epoch": 0.07965655019977896, "grad_norm": 0.0, - "learning_rate": 1.9870491285325516e-05, - "loss": 1.0978, + "learning_rate": 1.98710004459187e-05, + "loss": 1.1079, "step": 2811 }, { - "epoch": 0.07979568671963677, + "epoch": 0.07968488764204143, "grad_norm": 0.0, - "learning_rate": 1.9870343806733735e-05, - "loss": 1.02, + "learning_rate": 1.9870853462007688e-05, + "loss": 1.1216, "step": 2812 }, { - "epoch": 0.07982406356413167, + "epoch": 0.0797132250843039, "grad_norm": 0.0, - "learning_rate": 1.9870196244766715e-05, - "loss": 1.1178, + "learning_rate": 1.9870706394950815e-05, + "loss": 1.1304, "step": 2813 }, { - "epoch": 0.07985244040862656, + "epoch": 0.07974156252656635, "grad_norm": 0.0, - "learning_rate": 1.9870048599425704e-05, - "loss": 1.0224, + "learning_rate": 1.9870559244749317e-05, + "loss": 1.0374, "step": 2814 }, { - "epoch": 0.07988081725312145, + "epoch": 0.07976989996882881, "grad_norm": 0.0, - "learning_rate": 1.9869900870711947e-05, - "loss": 1.0956, + "learning_rate": 1.987041201140443e-05, + "loss": 1.032, "step": 2815 }, { - "epoch": 0.07990919409761635, + "epoch": 0.07979823741109128, "grad_norm": 0.0, - "learning_rate": 1.9869753058626696e-05, - "loss": 0.9984, + "learning_rate": 1.98702646949174e-05, + "loss": 1.1454, "step": 2816 }, { - "epoch": 0.07993757094211124, + "epoch": 0.07982657485335373, "grad_norm": 0.0, - "learning_rate": 1.9869605163171194e-05, - "loss": 1.0709, + "learning_rate": 1.9870117295289463e-05, + "loss": 1.2065, "step": 2817 }, { - "epoch": 0.07996594778660612, + "epoch": 0.0798549122956162, "grad_norm": 0.0, - "learning_rate": 1.9869457184346698e-05, - "loss": 1.1053, + "learning_rate": 1.9869969812521867e-05, + "loss": 1.1763, "step": 2818 }, { - "epoch": 0.07999432463110102, + "epoch": 0.07988324973787866, "grad_norm": 0.0, - "learning_rate": 1.9869309122154447e-05, - "loss": 1.038, + "learning_rate": 1.9869822246615846e-05, + "loss": 1.0221, "step": 2819 }, { - "epoch": 0.08002270147559591, + "epoch": 0.07991158718014112, "grad_norm": 0.0, - "learning_rate": 1.9869160976595703e-05, - "loss": 1.0919, + "learning_rate": 1.9869674597572647e-05, + "loss": 1.1331, "step": 2820 }, { - "epoch": 0.08005107832009081, + "epoch": 0.07993992462240358, "grad_norm": 0.0, - "learning_rate": 1.9869012747671715e-05, - "loss": 1.0854, + "learning_rate": 1.986952686539352e-05, + "loss": 1.117, "step": 2821 }, { - "epoch": 0.0800794551645857, + "epoch": 0.07996826206466605, "grad_norm": 0.0, - "learning_rate": 1.9868864435383724e-05, - "loss": 1.124, + "learning_rate": 1.9869379050079697e-05, + "loss": 1.1081, "step": 2822 }, { - "epoch": 0.08010783200908059, + "epoch": 0.0799965995069285, "grad_norm": 0.0, - "learning_rate": 1.9868716039733e-05, - "loss": 1.0162, + "learning_rate": 1.986923115163243e-05, + "loss": 1.0714, "step": 2823 }, { - "epoch": 0.08013620885357549, + "epoch": 0.08002493694919097, "grad_norm": 0.0, - "learning_rate": 1.9868567560720783e-05, - "loss": 1.0468, + "learning_rate": 1.9869083170052966e-05, + "loss": 1.0837, "step": 2824 }, { - "epoch": 0.08016458569807038, + "epoch": 0.08005327439145343, "grad_norm": 0.0, - "learning_rate": 1.9868418998348336e-05, - "loss": 1.0735, + "learning_rate": 1.9868935105342548e-05, + "loss": 0.9729, "step": 2825 }, { - "epoch": 0.08019296254256526, + "epoch": 0.08008161183371588, "grad_norm": 0.0, - "learning_rate": 1.986827035261691e-05, - "loss": 1.0823, + "learning_rate": 1.9868786957502425e-05, + "loss": 1.1455, "step": 2826 }, { - "epoch": 0.08022133938706016, + "epoch": 0.08010994927597835, "grad_norm": 0.0, - "learning_rate": 1.9868121623527757e-05, - "loss": 1.0967, + "learning_rate": 1.9868638726533846e-05, + "loss": 1.1014, "step": 2827 }, { - "epoch": 0.08024971623155505, + "epoch": 0.08013828671824082, "grad_norm": 0.0, - "learning_rate": 1.986797281108214e-05, - "loss": 1.0806, + "learning_rate": 1.9868490412438056e-05, + "loss": 1.1126, "step": 2828 }, { - "epoch": 0.08027809307604994, + "epoch": 0.08016662416050327, "grad_norm": 0.0, - "learning_rate": 1.9867823915281312e-05, - "loss": 1.0942, + "learning_rate": 1.9868342015216312e-05, + "loss": 1.0302, "step": 2829 }, { - "epoch": 0.08030646992054484, + "epoch": 0.08019496160276574, "grad_norm": 0.0, - "learning_rate": 1.986767493612653e-05, - "loss": 0.9627, + "learning_rate": 1.9868193534869852e-05, + "loss": 0.9951, "step": 2830 }, { - "epoch": 0.08033484676503973, + "epoch": 0.0802232990450282, "grad_norm": 0.0, - "learning_rate": 1.9867525873619056e-05, - "loss": 1.1732, + "learning_rate": 1.986804497139994e-05, + "loss": 0.9446, "step": 2831 }, { - "epoch": 0.08036322360953461, + "epoch": 0.08025163648729065, "grad_norm": 0.0, - "learning_rate": 1.9867376727760147e-05, - "loss": 1.0535, + "learning_rate": 1.9867896324807817e-05, + "loss": 1.1667, "step": 2832 }, { - "epoch": 0.08039160045402952, + "epoch": 0.08027997392955312, "grad_norm": 0.0, - "learning_rate": 1.9867227498551066e-05, - "loss": 1.0287, + "learning_rate": 1.986774759509474e-05, + "loss": 1.1367, "step": 2833 }, { - "epoch": 0.0804199772985244, + "epoch": 0.08030831137181559, "grad_norm": 0.0, - "learning_rate": 1.9867078185993068e-05, - "loss": 0.9642, + "learning_rate": 1.9867598782261958e-05, + "loss": 1.1104, "step": 2834 }, { - "epoch": 0.08044835414301929, + "epoch": 0.08033664881407804, "grad_norm": 0.0, - "learning_rate": 1.9866928790087418e-05, - "loss": 1.0695, + "learning_rate": 1.986744988631073e-05, + "loss": 1.0681, "step": 2835 }, { - "epoch": 0.08047673098751419, + "epoch": 0.0803649862563405, "grad_norm": 0.0, - "learning_rate": 1.9866779310835372e-05, - "loss": 1.0102, + "learning_rate": 1.9867300907242308e-05, + "loss": 1.1126, "step": 2836 }, { - "epoch": 0.08050510783200908, + "epoch": 0.08039332369860297, "grad_norm": 0.0, - "learning_rate": 1.9866629748238206e-05, - "loss": 1.0191, + "learning_rate": 1.9867151845057942e-05, + "loss": 1.1549, "step": 2837 }, { - "epoch": 0.08053348467650397, + "epoch": 0.08042166114086542, "grad_norm": 0.0, - "learning_rate": 1.986648010229717e-05, - "loss": 0.9096, + "learning_rate": 1.98670026997589e-05, + "loss": 1.093, "step": 2838 }, { - "epoch": 0.08056186152099887, + "epoch": 0.08044999858312789, "grad_norm": 0.0, - "learning_rate": 1.9866330373013532e-05, - "loss": 1.1255, + "learning_rate": 1.986685347134642e-05, + "loss": 1.068, "step": 2839 }, { - "epoch": 0.08059023836549375, + "epoch": 0.08047833602539035, "grad_norm": 0.0, - "learning_rate": 1.986618056038856e-05, - "loss": 0.9983, + "learning_rate": 1.9866704159821776e-05, + "loss": 1.1722, "step": 2840 }, { - "epoch": 0.08061861520998866, + "epoch": 0.0805066734676528, "grad_norm": 0.0, - "learning_rate": 1.9866030664423518e-05, - "loss": 1.1166, + "learning_rate": 1.9866554765186218e-05, + "loss": 1.1248, "step": 2841 }, { - "epoch": 0.08064699205448354, + "epoch": 0.08053501090991527, "grad_norm": 0.0, - "learning_rate": 1.9865880685119673e-05, - "loss": 1.0067, + "learning_rate": 1.9866405287441004e-05, + "loss": 1.0738, "step": 2842 }, { - "epoch": 0.08067536889897843, + "epoch": 0.08056334835217774, "grad_norm": 0.0, - "learning_rate": 1.9865730622478286e-05, - "loss": 1.0742, + "learning_rate": 1.9866255726587397e-05, + "loss": 0.9881, "step": 2843 }, { - "epoch": 0.08070374574347333, + "epoch": 0.08059168579444019, "grad_norm": 0.0, - "learning_rate": 1.986558047650063e-05, - "loss": 1.0395, + "learning_rate": 1.986610608262665e-05, + "loss": 1.0092, "step": 2844 }, { - "epoch": 0.08073212258796822, + "epoch": 0.08062002323670266, "grad_norm": 0.0, - "learning_rate": 1.986543024718797e-05, - "loss": 1.0714, + "learning_rate": 1.986595635556003e-05, + "loss": 1.1278, "step": 2845 }, { - "epoch": 0.0807604994324631, + "epoch": 0.08064836067896512, "grad_norm": 0.0, - "learning_rate": 1.9865279934541585e-05, - "loss": 1.0587, + "learning_rate": 1.9865806545388796e-05, + "loss": 1.0429, "step": 2846 }, { - "epoch": 0.08078887627695801, + "epoch": 0.08067669812122757, "grad_norm": 0.0, - "learning_rate": 1.986512953856273e-05, - "loss": 1.0413, + "learning_rate": 1.986565665211421e-05, + "loss": 1.0396, "step": 2847 }, { - "epoch": 0.0808172531214529, + "epoch": 0.08070503556349004, "grad_norm": 0.0, - "learning_rate": 1.9864979059252685e-05, - "loss": 1.0122, + "learning_rate": 1.9865506675737537e-05, + "loss": 1.0293, "step": 2848 }, { - "epoch": 0.08084562996594778, + "epoch": 0.0807333730057525, "grad_norm": 0.0, - "learning_rate": 1.9864828496612717e-05, - "loss": 1.051, + "learning_rate": 1.9865356616260035e-05, + "loss": 1.0558, "step": 2849 }, { - "epoch": 0.08087400681044268, + "epoch": 0.08076171044801496, "grad_norm": 0.0, - "learning_rate": 1.98646778506441e-05, - "loss": 1.1117, + "learning_rate": 1.986520647368297e-05, + "loss": 1.0516, "step": 2850 }, { - "epoch": 0.08090238365493757, + "epoch": 0.08079004789027742, "grad_norm": 0.0, - "learning_rate": 1.9864527121348108e-05, - "loss": 1.0403, + "learning_rate": 1.986505624800761e-05, + "loss": 1.0892, "step": 2851 }, { - "epoch": 0.08093076049943246, + "epoch": 0.08081838533253989, "grad_norm": 0.0, - "learning_rate": 1.9864376308726006e-05, - "loss": 1.1978, + "learning_rate": 1.9864905939235215e-05, + "loss": 1.052, "step": 2852 }, { - "epoch": 0.08095913734392736, + "epoch": 0.08084672277480234, "grad_norm": 0.0, - "learning_rate": 1.986422541277908e-05, - "loss": 1.0442, + "learning_rate": 1.9864755547367056e-05, + "loss": 1.0351, "step": 2853 }, { - "epoch": 0.08098751418842225, + "epoch": 0.08087506021706481, "grad_norm": 0.0, - "learning_rate": 1.9864074433508597e-05, - "loss": 1.0206, + "learning_rate": 1.9864605072404397e-05, + "loss": 1.0693, "step": 2854 }, { - "epoch": 0.08101589103291713, + "epoch": 0.08090339765932728, "grad_norm": 0.0, - "learning_rate": 1.9863923370915834e-05, - "loss": 1.1049, + "learning_rate": 1.986445451434851e-05, + "loss": 1.1118, "step": 2855 }, { - "epoch": 0.08104426787741204, + "epoch": 0.08093173510158973, "grad_norm": 0.0, - "learning_rate": 1.9863772225002066e-05, - "loss": 1.1604, + "learning_rate": 1.9864303873200655e-05, + "loss": 1.1924, "step": 2856 }, { - "epoch": 0.08107264472190692, + "epoch": 0.0809600725438522, "grad_norm": 0.0, - "learning_rate": 1.986362099576857e-05, - "loss": 1.0176, + "learning_rate": 1.9864153148962108e-05, + "loss": 1.1679, "step": 2857 }, { - "epoch": 0.08110102156640181, + "epoch": 0.08098840998611466, "grad_norm": 0.0, - "learning_rate": 1.9863469683216622e-05, - "loss": 1.1833, + "learning_rate": 1.9864002341634134e-05, + "loss": 1.0466, "step": 2858 }, { - "epoch": 0.08112939841089671, + "epoch": 0.08101674742837711, "grad_norm": 0.0, - "learning_rate": 1.9863318287347507e-05, - "loss": 1.0378, + "learning_rate": 1.9863851451218006e-05, + "loss": 1.0168, "step": 2859 }, { - "epoch": 0.0811577752553916, + "epoch": 0.08104508487063958, "grad_norm": 0.0, - "learning_rate": 1.9863166808162497e-05, - "loss": 1.1114, + "learning_rate": 1.9863700477714992e-05, + "loss": 1.0823, "step": 2860 }, { - "epoch": 0.0811861520998865, + "epoch": 0.08107342231290204, "grad_norm": 0.0, - "learning_rate": 1.9863015245662872e-05, - "loss": 1.1239, + "learning_rate": 1.986354942112637e-05, + "loss": 1.0919, "step": 2861 }, { - "epoch": 0.08121452894438139, + "epoch": 0.0811017597551645, "grad_norm": 0.0, - "learning_rate": 1.9862863599849917e-05, - "loss": 0.9142, + "learning_rate": 1.986339828145341e-05, + "loss": 1.0728, "step": 2862 }, { - "epoch": 0.08124290578887627, + "epoch": 0.08113009719742696, "grad_norm": 0.0, - "learning_rate": 1.9862711870724908e-05, - "loss": 1.05, + "learning_rate": 1.9863247058697383e-05, + "loss": 1.0567, "step": 2863 }, { - "epoch": 0.08127128263337118, + "epoch": 0.08115843463968943, "grad_norm": 0.0, - "learning_rate": 1.9862560058289127e-05, - "loss": 1.168, + "learning_rate": 1.986309575285956e-05, + "loss": 1.0154, "step": 2864 }, { - "epoch": 0.08129965947786606, + "epoch": 0.08118677208195188, "grad_norm": 0.0, - "learning_rate": 1.986240816254386e-05, - "loss": 0.9996, + "learning_rate": 1.986294436394122e-05, + "loss": 1.115, "step": 2865 }, { - "epoch": 0.08132803632236095, + "epoch": 0.08121510952421435, "grad_norm": 0.0, - "learning_rate": 1.9862256183490387e-05, - "loss": 0.9531, + "learning_rate": 1.9862792891943634e-05, + "loss": 1.0879, "step": 2866 }, { - "epoch": 0.08135641316685585, + "epoch": 0.08124344696647681, "grad_norm": 0.0, - "learning_rate": 1.9862104121129992e-05, - "loss": 1.0161, + "learning_rate": 1.9862641336868087e-05, + "loss": 0.9942, "step": 2867 }, { - "epoch": 0.08138479001135074, + "epoch": 0.08127178440873926, "grad_norm": 0.0, - "learning_rate": 1.9861951975463962e-05, - "loss": 0.9814, + "learning_rate": 1.9862489698715847e-05, + "loss": 1.1025, "step": 2868 }, { - "epoch": 0.08141316685584563, + "epoch": 0.08130012185100173, "grad_norm": 0.0, - "learning_rate": 1.986179974649358e-05, - "loss": 1.0613, + "learning_rate": 1.9862337977488194e-05, + "loss": 1.0286, "step": 2869 }, { - "epoch": 0.08144154370034053, + "epoch": 0.0813284592932642, "grad_norm": 0.0, - "learning_rate": 1.986164743422013e-05, - "loss": 1.0678, + "learning_rate": 1.9862186173186406e-05, + "loss": 1.1483, "step": 2870 }, { - "epoch": 0.08146992054483541, + "epoch": 0.08135679673552665, "grad_norm": 0.0, - "learning_rate": 1.9861495038644905e-05, - "loss": 1.0635, + "learning_rate": 1.986203428581176e-05, + "loss": 1.1105, "step": 2871 }, { - "epoch": 0.0814982973893303, + "epoch": 0.08138513417778911, "grad_norm": 0.0, - "learning_rate": 1.9861342559769184e-05, - "loss": 1.0859, + "learning_rate": 1.986188231536554e-05, + "loss": 1.1034, "step": 2872 }, { - "epoch": 0.0815266742338252, + "epoch": 0.08141347162005158, "grad_norm": 0.0, - "learning_rate": 1.986118999759426e-05, - "loss": 0.9062, + "learning_rate": 1.986173026184902e-05, + "loss": 1.0736, "step": 2873 }, { - "epoch": 0.08155505107832009, + "epoch": 0.08144180906231403, "grad_norm": 0.0, - "learning_rate": 1.986103735212142e-05, - "loss": 0.9502, + "learning_rate": 1.9861578125263484e-05, + "loss": 1.0719, "step": 2874 }, { - "epoch": 0.08158342792281498, + "epoch": 0.0814701465045765, "grad_norm": 0.0, - "learning_rate": 1.9860884623351957e-05, - "loss": 1.0077, + "learning_rate": 1.9861425905610216e-05, + "loss": 1.2083, "step": 2875 }, { - "epoch": 0.08161180476730988, + "epoch": 0.08149848394683896, "grad_norm": 0.0, - "learning_rate": 1.9860731811287157e-05, - "loss": 1.03, + "learning_rate": 1.9861273602890495e-05, + "loss": 1.0404, "step": 2876 }, { - "epoch": 0.08164018161180477, + "epoch": 0.08152682138910142, "grad_norm": 0.0, - "learning_rate": 1.986057891592831e-05, - "loss": 1.0106, + "learning_rate": 1.9861121217105603e-05, + "loss": 1.0992, "step": 2877 }, { - "epoch": 0.08166855845629965, + "epoch": 0.08155515883136388, "grad_norm": 0.0, - "learning_rate": 1.9860425937276714e-05, - "loss": 1.0676, + "learning_rate": 1.9860968748256828e-05, + "loss": 1.1324, "step": 2878 }, { - "epoch": 0.08169693530079455, + "epoch": 0.08158349627362635, "grad_norm": 0.0, - "learning_rate": 1.9860272875333653e-05, - "loss": 1.0466, + "learning_rate": 1.986081619634545e-05, + "loss": 1.0368, "step": 2879 }, { - "epoch": 0.08172531214528944, + "epoch": 0.0816118337158888, "grad_norm": 0.0, - "learning_rate": 1.986011973010043e-05, - "loss": 1.0519, + "learning_rate": 1.9860663561372756e-05, + "loss": 0.9447, "step": 2880 }, { - "epoch": 0.08175368898978434, + "epoch": 0.08164017115815127, "grad_norm": 0.0, - "learning_rate": 1.9859966501578325e-05, - "loss": 1.0861, + "learning_rate": 1.9860510843340027e-05, + "loss": 1.1104, "step": 2881 }, { - "epoch": 0.08178206583427923, + "epoch": 0.08166850860041373, "grad_norm": 0.0, - "learning_rate": 1.9859813189768643e-05, - "loss": 1.0951, + "learning_rate": 1.986035804224856e-05, + "loss": 1.0815, "step": 2882 }, { - "epoch": 0.08181044267877412, + "epoch": 0.08169684604267619, "grad_norm": 0.0, - "learning_rate": 1.9859659794672677e-05, - "loss": 1.0648, + "learning_rate": 1.9860205158099632e-05, + "loss": 1.0205, "step": 2883 }, { - "epoch": 0.08183881952326902, + "epoch": 0.08172518348493865, "grad_norm": 0.0, - "learning_rate": 1.9859506316291716e-05, - "loss": 1.1274, + "learning_rate": 1.9860052190894533e-05, + "loss": 1.1959, "step": 2884 }, { - "epoch": 0.0818671963677639, + "epoch": 0.08175352092720112, "grad_norm": 0.0, - "learning_rate": 1.9859352754627067e-05, - "loss": 1.0288, + "learning_rate": 1.9859899140634557e-05, + "loss": 1.0907, "step": 2885 }, { - "epoch": 0.0818955732122588, + "epoch": 0.08178185836946357, "grad_norm": 0.0, - "learning_rate": 1.985919910968002e-05, - "loss": 1.1359, + "learning_rate": 1.9859746007320985e-05, + "loss": 1.1218, "step": 2886 }, { - "epoch": 0.0819239500567537, + "epoch": 0.08181019581172604, "grad_norm": 0.0, - "learning_rate": 1.985904538145188e-05, - "loss": 1.089, + "learning_rate": 1.9859592790955114e-05, + "loss": 1.1169, "step": 2887 }, { - "epoch": 0.08195232690124858, + "epoch": 0.0818385332539885, "grad_norm": 0.0, - "learning_rate": 1.9858891569943936e-05, - "loss": 1.0921, + "learning_rate": 1.9859439491538232e-05, + "loss": 1.0685, "step": 2888 }, { - "epoch": 0.08198070374574347, + "epoch": 0.08186687069625095, "grad_norm": 0.0, - "learning_rate": 1.985873767515749e-05, - "loss": 1.0232, + "learning_rate": 1.9859286109071626e-05, + "loss": 1.1708, "step": 2889 }, { - "epoch": 0.08200908059023837, + "epoch": 0.08189520813851342, "grad_norm": 0.0, - "learning_rate": 1.9858583697093845e-05, - "loss": 0.9455, + "learning_rate": 1.9859132643556597e-05, + "loss": 1.0984, "step": 2890 }, { - "epoch": 0.08203745743473326, + "epoch": 0.08192354558077587, "grad_norm": 0.0, - "learning_rate": 1.9858429635754303e-05, - "loss": 1.0395, + "learning_rate": 1.985897909499443e-05, + "loss": 1.1219, "step": 2891 }, { - "epoch": 0.08206583427922814, + "epoch": 0.08195188302303834, "grad_norm": 0.0, - "learning_rate": 1.9858275491140156e-05, - "loss": 1.0055, + "learning_rate": 1.985882546338642e-05, + "loss": 1.1277, "step": 2892 }, { - "epoch": 0.08209421112372305, + "epoch": 0.0819802204653008, "grad_norm": 0.0, - "learning_rate": 1.985812126325272e-05, - "loss": 1.1758, + "learning_rate": 1.9858671748733863e-05, + "loss": 1.0952, "step": 2893 }, { - "epoch": 0.08212258796821793, + "epoch": 0.08200855790756326, "grad_norm": 0.0, - "learning_rate": 1.9857966952093285e-05, - "loss": 0.9824, + "learning_rate": 1.9858517951038056e-05, + "loss": 1.0421, "step": 2894 }, { - "epoch": 0.08215096481271282, + "epoch": 0.08203689534982572, "grad_norm": 0.0, - "learning_rate": 1.9857812557663166e-05, - "loss": 1.1331, + "learning_rate": 1.985836407030029e-05, + "loss": 1.0503, "step": 2895 }, { - "epoch": 0.08217934165720772, + "epoch": 0.08206523279208819, "grad_norm": 0.0, - "learning_rate": 1.9857658079963657e-05, - "loss": 1.0859, + "learning_rate": 1.985821010652186e-05, + "loss": 1.1386, "step": 2896 }, { - "epoch": 0.08220771850170261, + "epoch": 0.08209357023435064, "grad_norm": 0.0, - "learning_rate": 1.985750351899607e-05, - "loss": 1.012, + "learning_rate": 1.9858056059704068e-05, + "loss": 1.127, "step": 2897 }, { - "epoch": 0.0822360953461975, + "epoch": 0.0821219076766131, "grad_norm": 0.0, - "learning_rate": 1.9857348874761704e-05, - "loss": 1.1076, + "learning_rate": 1.985790192984821e-05, + "loss": 0.9929, "step": 2898 }, { - "epoch": 0.0822644721906924, + "epoch": 0.08215024511887557, "grad_norm": 0.0, - "learning_rate": 1.985719414726187e-05, - "loss": 1.0446, + "learning_rate": 1.985774771695558e-05, + "loss": 0.9807, "step": 2899 }, { - "epoch": 0.08229284903518728, + "epoch": 0.08217858256113802, "grad_norm": 0.0, - "learning_rate": 1.9857039336497875e-05, - "loss": 1.0458, + "learning_rate": 1.9857593421027483e-05, + "loss": 1.071, "step": 2900 }, { - "epoch": 0.08232122587968219, + "epoch": 0.08220692000340049, "grad_norm": 0.0, - "learning_rate": 1.9856884442471024e-05, - "loss": 0.9798, + "learning_rate": 1.9857439042065215e-05, + "loss": 1.1364, "step": 2901 }, { - "epoch": 0.08234960272417707, + "epoch": 0.08223525744566296, "grad_norm": 0.0, - "learning_rate": 1.985672946518263e-05, - "loss": 1.017, + "learning_rate": 1.9857284580070077e-05, + "loss": 1.0837, "step": 2902 }, { - "epoch": 0.08237797956867196, + "epoch": 0.08226359488792541, "grad_norm": 0.0, - "learning_rate": 1.9856574404633996e-05, - "loss": 1.0648, + "learning_rate": 1.985713003504337e-05, + "loss": 1.1477, "step": 2903 }, { - "epoch": 0.08240635641316686, + "epoch": 0.08229193233018788, "grad_norm": 0.0, - "learning_rate": 1.985641926082644e-05, - "loss": 0.9646, + "learning_rate": 1.98569754069864e-05, + "loss": 1.0889, "step": 2904 }, { - "epoch": 0.08243473325766175, + "epoch": 0.08232026977245034, "grad_norm": 0.0, - "learning_rate": 1.9856264033761263e-05, - "loss": 1.0797, + "learning_rate": 1.985682069590046e-05, + "loss": 1.0888, "step": 2905 }, { - "epoch": 0.08246311010215664, + "epoch": 0.0823486072147128, "grad_norm": 0.0, - "learning_rate": 1.985610872343978e-05, - "loss": 0.9978, + "learning_rate": 1.985666590178686e-05, + "loss": 1.0876, "step": 2906 }, { - "epoch": 0.08249148694665154, + "epoch": 0.08237694465697526, "grad_norm": 0.0, - "learning_rate": 1.9855953329863302e-05, - "loss": 1.0783, + "learning_rate": 1.985651102464691e-05, + "loss": 1.0665, "step": 2907 }, { - "epoch": 0.08251986379114643, + "epoch": 0.08240528209923773, "grad_norm": 0.0, - "learning_rate": 1.9855797853033148e-05, - "loss": 0.985, + "learning_rate": 1.9856356064481902e-05, + "loss": 1.048, "step": 2908 }, { - "epoch": 0.08254824063564131, + "epoch": 0.08243361954150018, "grad_norm": 0.0, - "learning_rate": 1.9855642292950622e-05, - "loss": 1.1302, + "learning_rate": 1.985620102129315e-05, + "loss": 1.1371, "step": 2909 }, { - "epoch": 0.08257661748013621, + "epoch": 0.08246195698376264, "grad_norm": 0.0, - "learning_rate": 1.9855486649617047e-05, - "loss": 1.0178, + "learning_rate": 1.9856045895081952e-05, + "loss": 1.1315, "step": 2910 }, { - "epoch": 0.0826049943246311, + "epoch": 0.08249029442602511, "grad_norm": 0.0, - "learning_rate": 1.9855330923033732e-05, - "loss": 1.0968, + "learning_rate": 1.985589068584962e-05, + "loss": 1.0065, "step": 2911 }, { - "epoch": 0.08263337116912599, + "epoch": 0.08251863186828756, "grad_norm": 0.0, - "learning_rate": 1.9855175113201994e-05, - "loss": 1.1755, + "learning_rate": 1.9855735393597463e-05, + "loss": 1.0386, "step": 2912 }, { - "epoch": 0.08266174801362089, + "epoch": 0.08254696931055003, "grad_norm": 0.0, - "learning_rate": 1.9855019220123145e-05, - "loss": 1.0068, + "learning_rate": 1.9855580018326786e-05, + "loss": 1.1522, "step": 2913 }, { - "epoch": 0.08269012485811578, + "epoch": 0.0825753067528125, "grad_norm": 0.0, - "learning_rate": 1.985486324379851e-05, - "loss": 1.0953, + "learning_rate": 1.98554245600389e-05, + "loss": 1.133, "step": 2914 }, { - "epoch": 0.08271850170261066, + "epoch": 0.08260364419507495, "grad_norm": 0.0, - "learning_rate": 1.98547071842294e-05, - "loss": 1.017, + "learning_rate": 1.9855269018735113e-05, + "loss": 1.0239, "step": 2915 }, { - "epoch": 0.08274687854710557, + "epoch": 0.08263198163733741, "grad_norm": 0.0, - "learning_rate": 1.9854551041417137e-05, - "loss": 1.0985, + "learning_rate": 1.9855113394416736e-05, + "loss": 1.0728, "step": 2916 }, { - "epoch": 0.08277525539160045, + "epoch": 0.08266031907959988, "grad_norm": 0.0, - "learning_rate": 1.9854394815363037e-05, - "loss": 1.1315, + "learning_rate": 1.9854957687085078e-05, + "loss": 1.0256, "step": 2917 }, { - "epoch": 0.08280363223609534, + "epoch": 0.08268865652186233, "grad_norm": 0.0, - "learning_rate": 1.985423850606842e-05, - "loss": 0.993, + "learning_rate": 1.9854801896741453e-05, + "loss": 1.1686, "step": 2918 }, { - "epoch": 0.08283200908059024, + "epoch": 0.0827169939641248, "grad_norm": 0.0, - "learning_rate": 1.985408211353461e-05, - "loss": 1.1005, + "learning_rate": 1.9854646023387173e-05, + "loss": 1.1617, "step": 2919 }, { - "epoch": 0.08286038592508513, + "epoch": 0.08274533140638726, "grad_norm": 0.0, - "learning_rate": 1.9853925637762925e-05, - "loss": 1.1917, + "learning_rate": 1.9854490067023546e-05, + "loss": 1.0114, "step": 2920 }, { - "epoch": 0.08288876276958003, + "epoch": 0.08277366884864971, "grad_norm": 0.0, - "learning_rate": 1.9853769078754685e-05, - "loss": 1.106, + "learning_rate": 1.9854334027651892e-05, + "loss": 1.0827, "step": 2921 }, { - "epoch": 0.08291713961407492, + "epoch": 0.08280200629091218, "grad_norm": 0.0, - "learning_rate": 1.9853612436511215e-05, - "loss": 1.0167, + "learning_rate": 1.9854177905273524e-05, + "loss": 1.1406, "step": 2922 }, { - "epoch": 0.0829455164585698, + "epoch": 0.08283034373317465, "grad_norm": 0.0, - "learning_rate": 1.985345571103384e-05, - "loss": 1.0828, + "learning_rate": 1.9854021699889756e-05, + "loss": 1.1166, "step": 2923 }, { - "epoch": 0.0829738933030647, + "epoch": 0.0828586811754371, "grad_norm": 0.0, - "learning_rate": 1.985329890232388e-05, - "loss": 1.0757, + "learning_rate": 1.98538654115019e-05, + "loss": 1.0906, "step": 2924 }, { - "epoch": 0.08300227014755959, + "epoch": 0.08288701861769956, "grad_norm": 0.0, - "learning_rate": 1.9853142010382662e-05, - "loss": 1.2318, + "learning_rate": 1.9853709040111283e-05, + "loss": 1.1975, "step": 2925 }, { - "epoch": 0.08303064699205448, + "epoch": 0.08291535605996203, "grad_norm": 0.0, - "learning_rate": 1.985298503521151e-05, - "loss": 1.0027, + "learning_rate": 1.985355258571921e-05, + "loss": 1.007, "step": 2926 }, { - "epoch": 0.08305902383654938, + "epoch": 0.08294369350222448, "grad_norm": 0.0, - "learning_rate": 1.9852827976811752e-05, - "loss": 1.116, + "learning_rate": 1.985339604832701e-05, + "loss": 1.1346, "step": 2927 }, { - "epoch": 0.08308740068104427, + "epoch": 0.08297203094448695, "grad_norm": 0.0, - "learning_rate": 1.985267083518471e-05, - "loss": 1.0129, + "learning_rate": 1.985323942793599e-05, + "loss": 1.1358, "step": 2928 }, { - "epoch": 0.08311577752553916, + "epoch": 0.08300036838674942, "grad_norm": 0.0, - "learning_rate": 1.9852513610331717e-05, - "loss": 1.1226, + "learning_rate": 1.9853082724547482e-05, + "loss": 1.0165, "step": 2929 }, { - "epoch": 0.08314415437003406, + "epoch": 0.08302870582901187, "grad_norm": 0.0, - "learning_rate": 1.9852356302254098e-05, - "loss": 1.0136, + "learning_rate": 1.9852925938162795e-05, + "loss": 1.1275, "step": 2930 }, { - "epoch": 0.08317253121452894, + "epoch": 0.08305704327127433, "grad_norm": 0.0, - "learning_rate": 1.985219891095318e-05, - "loss": 1.1228, + "learning_rate": 1.985276906878326e-05, + "loss": 1.1423, "step": 2931 }, { - "epoch": 0.08320090805902383, + "epoch": 0.0830853807135368, "grad_norm": 0.0, - "learning_rate": 1.9852041436430302e-05, - "loss": 1.1567, + "learning_rate": 1.9852612116410184e-05, + "loss": 1.0939, "step": 2932 }, { - "epoch": 0.08322928490351873, + "epoch": 0.08311371815579925, "grad_norm": 0.0, - "learning_rate": 1.985188387868678e-05, - "loss": 1.0062, + "learning_rate": 1.9852455081044902e-05, + "loss": 1.0115, "step": 2933 }, { - "epoch": 0.08325766174801362, + "epoch": 0.08314205559806172, "grad_norm": 0.0, - "learning_rate": 1.9851726237723953e-05, - "loss": 1.0765, + "learning_rate": 1.985229796268873e-05, + "loss": 1.0919, "step": 2934 }, { - "epoch": 0.08328603859250851, + "epoch": 0.08317039304032418, "grad_norm": 0.0, - "learning_rate": 1.9851568513543154e-05, - "loss": 1.0142, + "learning_rate": 1.9852140761342997e-05, + "loss": 1.0871, "step": 2935 }, { - "epoch": 0.08331441543700341, + "epoch": 0.08319873048258664, "grad_norm": 0.0, - "learning_rate": 1.985141070614571e-05, - "loss": 1.1237, + "learning_rate": 1.985198347700902e-05, + "loss": 1.1105, "step": 2936 }, { - "epoch": 0.0833427922814983, + "epoch": 0.0832270679248491, "grad_norm": 0.0, - "learning_rate": 1.985125281553296e-05, - "loss": 1.1594, + "learning_rate": 1.985182610968813e-05, + "loss": 0.9424, "step": 2937 }, { - "epoch": 0.08337116912599318, + "epoch": 0.08325540536711157, "grad_norm": 0.0, - "learning_rate": 1.9851094841706232e-05, - "loss": 1.1067, + "learning_rate": 1.985166865938165e-05, + "loss": 1.0992, "step": 2938 }, { - "epoch": 0.08339954597048808, + "epoch": 0.08328374280937402, "grad_norm": 0.0, - "learning_rate": 1.9850936784666865e-05, - "loss": 1.0757, + "learning_rate": 1.9851511126090908e-05, + "loss": 1.0424, "step": 2939 }, { - "epoch": 0.08342792281498297, + "epoch": 0.08331208025163649, "grad_norm": 0.0, - "learning_rate": 1.9850778644416194e-05, - "loss": 1.0395, + "learning_rate": 1.9851353509817228e-05, + "loss": 1.2083, "step": 2940 }, { - "epoch": 0.08345629965947787, + "epoch": 0.08334041769389895, "grad_norm": 0.0, - "learning_rate": 1.9850620420955548e-05, - "loss": 1.0716, + "learning_rate": 1.9851195810561935e-05, + "loss": 1.105, "step": 2941 }, { - "epoch": 0.08348467650397276, + "epoch": 0.0833687551361614, "grad_norm": 0.0, - "learning_rate": 1.9850462114286275e-05, - "loss": 1.0537, + "learning_rate": 1.9851038028326368e-05, + "loss": 1.0077, "step": 2942 }, { - "epoch": 0.08351305334846765, + "epoch": 0.08339709257842387, "grad_norm": 0.0, - "learning_rate": 1.98503037244097e-05, - "loss": 1.0148, + "learning_rate": 1.985088016311185e-05, + "loss": 1.203, "step": 2943 }, { - "epoch": 0.08354143019296255, + "epoch": 0.08342543002068634, "grad_norm": 0.0, - "learning_rate": 1.985014525132717e-05, - "loss": 1.0786, + "learning_rate": 1.9850722214919706e-05, + "loss": 1.0995, "step": 2944 }, { - "epoch": 0.08356980703745744, + "epoch": 0.08345376746294879, "grad_norm": 0.0, - "learning_rate": 1.9849986695040017e-05, - "loss": 1.0854, + "learning_rate": 1.985056418375127e-05, + "loss": 1.1053, "step": 2945 }, { - "epoch": 0.08359818388195232, + "epoch": 0.08348210490521125, "grad_norm": 0.0, - "learning_rate": 1.9849828055549584e-05, - "loss": 1.0581, + "learning_rate": 1.985040606960788e-05, + "loss": 1.1217, "step": 2946 }, { - "epoch": 0.08362656072644722, + "epoch": 0.08351044234747372, "grad_norm": 0.0, - "learning_rate": 1.9849669332857212e-05, - "loss": 1.055, + "learning_rate": 1.9850247872490855e-05, + "loss": 1.0989, "step": 2947 }, { - "epoch": 0.08365493757094211, + "epoch": 0.08353877978973617, "grad_norm": 0.0, - "learning_rate": 1.9849510526964243e-05, - "loss": 1.0903, + "learning_rate": 1.985008959240154e-05, + "loss": 1.001, "step": 2948 }, { - "epoch": 0.083683314415437, + "epoch": 0.08356711723199864, "grad_norm": 0.0, - "learning_rate": 1.984935163787201e-05, - "loss": 1.0459, + "learning_rate": 1.9849931229341258e-05, + "loss": 1.1493, "step": 2949 }, { - "epoch": 0.0837116912599319, + "epoch": 0.0835954546742611, "grad_norm": 0.0, - "learning_rate": 1.9849192665581865e-05, - "loss": 1.0738, + "learning_rate": 1.984977278331135e-05, + "loss": 1.1741, "step": 2950 }, { - "epoch": 0.08374006810442679, + "epoch": 0.08362379211652356, "grad_norm": 0.0, - "learning_rate": 1.9849033610095144e-05, - "loss": 0.9703, + "learning_rate": 1.9849614254313147e-05, + "loss": 1.1618, "step": 2951 }, { - "epoch": 0.08376844494892167, + "epoch": 0.08365212955878602, "grad_norm": 0.0, - "learning_rate": 1.9848874471413196e-05, - "loss": 1.0571, + "learning_rate": 1.984945564234799e-05, + "loss": 1.0872, "step": 2952 }, { - "epoch": 0.08379682179341658, + "epoch": 0.08368046700104849, "grad_norm": 0.0, - "learning_rate": 1.9848715249537363e-05, - "loss": 0.9756, + "learning_rate": 1.9849296947417206e-05, + "loss": 1.1373, "step": 2953 }, { - "epoch": 0.08382519863791146, + "epoch": 0.08370880444331094, "grad_norm": 0.0, - "learning_rate": 1.984855594446899e-05, - "loss": 1.1504, + "learning_rate": 1.984913816952214e-05, + "loss": 1.02, "step": 2954 }, { - "epoch": 0.08385357548240635, + "epoch": 0.08373714188557341, "grad_norm": 0.0, - "learning_rate": 1.984839655620942e-05, - "loss": 1.0977, + "learning_rate": 1.984897930866412e-05, + "loss": 1.0572, "step": 2955 }, { - "epoch": 0.08388195232690125, + "epoch": 0.08376547932783587, "grad_norm": 0.0, - "learning_rate": 1.9848237084760004e-05, - "loss": 1.1891, + "learning_rate": 1.9848820364844494e-05, + "loss": 1.1358, "step": 2956 }, { - "epoch": 0.08391032917139614, + "epoch": 0.08379381677009833, "grad_norm": 0.0, - "learning_rate": 1.9848077530122083e-05, - "loss": 0.9978, + "learning_rate": 1.9848661338064597e-05, + "loss": 1.04, "step": 2957 }, { - "epoch": 0.08393870601589103, + "epoch": 0.08382215421236079, "grad_norm": 0.0, - "learning_rate": 1.984791789229701e-05, - "loss": 1.004, + "learning_rate": 1.9848502228325764e-05, + "loss": 1.0859, "step": 2958 }, { - "epoch": 0.08396708286038593, + "epoch": 0.08385049165462326, "grad_norm": 0.0, - "learning_rate": 1.984775817128613e-05, - "loss": 1.1451, + "learning_rate": 1.9848343035629345e-05, + "loss": 0.9892, "step": 2959 }, { - "epoch": 0.08399545970488081, + "epoch": 0.08387882909688571, "grad_norm": 0.0, - "learning_rate": 1.98475983670908e-05, - "loss": 1.0538, + "learning_rate": 1.9848183759976674e-05, + "loss": 0.9995, "step": 2960 }, { - "epoch": 0.08402383654937572, + "epoch": 0.08390716653914818, "grad_norm": 0.0, - "learning_rate": 1.984743847971236e-05, - "loss": 1.0885, + "learning_rate": 1.9848024401369092e-05, + "loss": 1.0896, "step": 2961 }, { - "epoch": 0.0840522133938706, + "epoch": 0.08393550398141064, "grad_norm": 0.0, - "learning_rate": 1.984727850915216e-05, - "loss": 0.9894, + "learning_rate": 1.9847864959807946e-05, + "loss": 1.0728, "step": 2962 }, { - "epoch": 0.08408059023836549, + "epoch": 0.0839638414236731, "grad_norm": 0.0, - "learning_rate": 1.984711845541156e-05, - "loss": 1.1683, + "learning_rate": 1.9847705435294575e-05, + "loss": 1.0055, "step": 2963 }, { - "epoch": 0.08410896708286039, + "epoch": 0.08399217886593556, "grad_norm": 0.0, - "learning_rate": 1.984695831849191e-05, - "loss": 1.1648, + "learning_rate": 1.9847545827830327e-05, + "loss": 1.0578, "step": 2964 }, { - "epoch": 0.08413734392735528, + "epoch": 0.08402051630819803, "grad_norm": 0.0, - "learning_rate": 1.9846798098394558e-05, - "loss": 1.0745, + "learning_rate": 1.984738613741654e-05, + "loss": 1.0571, "step": 2965 }, { - "epoch": 0.08416572077185017, + "epoch": 0.08404885375046048, "grad_norm": 0.0, - "learning_rate": 1.984663779512086e-05, - "loss": 1.0828, + "learning_rate": 1.9847226364054567e-05, + "loss": 1.0249, "step": 2966 }, { - "epoch": 0.08419409761634507, + "epoch": 0.08407719119272294, "grad_norm": 0.0, - "learning_rate": 1.984647740867217e-05, - "loss": 1.0204, + "learning_rate": 1.9847066507745744e-05, + "loss": 1.1222, "step": 2967 }, { - "epoch": 0.08422247446083996, + "epoch": 0.08410552863498541, "grad_norm": 0.0, - "learning_rate": 1.9846316939049847e-05, - "loss": 1.1292, + "learning_rate": 1.984690656849143e-05, + "loss": 1.0893, "step": 2968 }, { - "epoch": 0.08425085130533484, + "epoch": 0.08413386607724786, "grad_norm": 0.0, - "learning_rate": 1.9846156386255236e-05, - "loss": 1.1005, + "learning_rate": 1.9846746546292958e-05, + "loss": 1.032, "step": 2969 }, { - "epoch": 0.08427922814982974, + "epoch": 0.08416220351951033, "grad_norm": 0.0, - "learning_rate": 1.98459957502897e-05, - "loss": 1.0501, + "learning_rate": 1.984658644115169e-05, + "loss": 1.0661, "step": 2970 }, { - "epoch": 0.08430760499432463, + "epoch": 0.0841905409617728, "grad_norm": 0.0, - "learning_rate": 1.9845835031154598e-05, - "loss": 1.1031, + "learning_rate": 1.9846426253068963e-05, + "loss": 1.1204, "step": 2971 }, { - "epoch": 0.08433598183881952, + "epoch": 0.08421887840403525, "grad_norm": 0.0, - "learning_rate": 1.984567422885128e-05, - "loss": 0.962, + "learning_rate": 1.9846265982046134e-05, + "loss": 0.9626, "step": 2972 }, { - "epoch": 0.08436435868331442, + "epoch": 0.08424721584629771, "grad_norm": 0.0, - "learning_rate": 1.9845513343381116e-05, - "loss": 1.0873, + "learning_rate": 1.9846105628084553e-05, + "loss": 1.1655, "step": 2973 }, { - "epoch": 0.0843927355278093, + "epoch": 0.08427555328856018, "grad_norm": 0.0, - "learning_rate": 1.9845352374745455e-05, - "loss": 1.0949, + "learning_rate": 1.9845945191185564e-05, + "loss": 1.0271, "step": 2974 }, { - "epoch": 0.0844211123723042, + "epoch": 0.08430389073082263, "grad_norm": 0.0, - "learning_rate": 1.984519132294566e-05, - "loss": 1.1229, + "learning_rate": 1.984578467135052e-05, + "loss": 1.152, "step": 2975 }, { - "epoch": 0.0844494892167991, + "epoch": 0.0843322281730851, "grad_norm": 0.0, - "learning_rate": 1.984503018798309e-05, - "loss": 1.0181, + "learning_rate": 1.984562406858078e-05, + "loss": 1.0253, "step": 2976 }, { - "epoch": 0.08447786606129398, + "epoch": 0.08436056561534756, "grad_norm": 0.0, - "learning_rate": 1.984486896985911e-05, - "loss": 0.9611, + "learning_rate": 1.984546338287769e-05, + "loss": 1.1982, "step": 2977 }, { - "epoch": 0.08450624290578887, + "epoch": 0.08438890305761002, "grad_norm": 0.0, - "learning_rate": 1.9844707668575078e-05, - "loss": 0.9576, + "learning_rate": 1.9845302614242608e-05, + "loss": 1.0363, "step": 2978 }, { - "epoch": 0.08453461975028377, + "epoch": 0.08441724049987248, "grad_norm": 0.0, - "learning_rate": 1.984454628413236e-05, - "loss": 1.1555, + "learning_rate": 1.9845141762676885e-05, + "loss": 1.0858, "step": 2979 }, { - "epoch": 0.08456299659477866, + "epoch": 0.08444557794213495, "grad_norm": 0.0, - "learning_rate": 1.9844384816532313e-05, - "loss": 1.1057, + "learning_rate": 1.9844980828181876e-05, + "loss": 1.1335, "step": 2980 }, { - "epoch": 0.08459137343927356, + "epoch": 0.0844739153843974, "grad_norm": 0.0, - "learning_rate": 1.9844223265776305e-05, - "loss": 1.1341, + "learning_rate": 1.984481981075894e-05, + "loss": 1.199, "step": 2981 }, { - "epoch": 0.08461975028376845, + "epoch": 0.08450225282665987, "grad_norm": 0.0, - "learning_rate": 1.9844061631865703e-05, - "loss": 1.041, + "learning_rate": 1.9844658710409428e-05, + "loss": 1.0612, "step": 2982 }, { - "epoch": 0.08464812712826333, + "epoch": 0.08453059026892233, "grad_norm": 0.0, - "learning_rate": 1.9843899914801867e-05, - "loss": 0.9773, + "learning_rate": 1.9844497527134703e-05, + "loss": 1.0569, "step": 2983 }, { - "epoch": 0.08467650397275824, + "epoch": 0.08455892771118478, "grad_norm": 0.0, - "learning_rate": 1.984373811458617e-05, - "loss": 1.0862, + "learning_rate": 1.984433626093612e-05, + "loss": 1.1121, "step": 2984 }, { - "epoch": 0.08470488081725312, + "epoch": 0.08458726515344725, "grad_norm": 0.0, - "learning_rate": 1.9843576231219973e-05, - "loss": 1.0594, + "learning_rate": 1.9844174911815034e-05, + "loss": 1.1471, "step": 2985 }, { - "epoch": 0.08473325766174801, + "epoch": 0.08461560259570972, "grad_norm": 0.0, - "learning_rate": 1.9843414264704646e-05, - "loss": 1.0238, + "learning_rate": 1.9844013479772808e-05, + "loss": 1.0953, "step": 2986 }, { - "epoch": 0.08476163450624291, + "epoch": 0.08464394003797217, "grad_norm": 0.0, - "learning_rate": 1.9843252215041552e-05, - "loss": 1.0253, + "learning_rate": 1.98438519648108e-05, + "loss": 1.0705, "step": 2987 }, { - "epoch": 0.0847900113507378, + "epoch": 0.08467227748023463, "grad_norm": 0.0, - "learning_rate": 1.984309008223207e-05, - "loss": 1.0424, + "learning_rate": 1.9843690366930374e-05, + "loss": 1.0753, "step": 2988 }, { - "epoch": 0.08481838819523269, + "epoch": 0.0847006149224971, "grad_norm": 0.0, - "learning_rate": 1.984292786627756e-05, - "loss": 0.9973, + "learning_rate": 1.984352868613289e-05, + "loss": 1.1442, "step": 2989 }, { - "epoch": 0.08484676503972759, + "epoch": 0.08472895236475955, "grad_norm": 0.0, - "learning_rate": 1.9842765567179394e-05, - "loss": 1.0931, + "learning_rate": 1.9843366922419704e-05, + "loss": 0.9807, "step": 2990 }, { - "epoch": 0.08487514188422247, + "epoch": 0.08475728980702202, "grad_norm": 0.0, - "learning_rate": 1.9842603184938948e-05, - "loss": 1.0396, + "learning_rate": 1.9843205075792187e-05, + "loss": 1.094, "step": 2991 }, { - "epoch": 0.08490351872871736, + "epoch": 0.08478562724928448, "grad_norm": 0.0, - "learning_rate": 1.984244071955759e-05, - "loss": 1.0175, + "learning_rate": 1.9843043146251698e-05, + "loss": 1.0728, "step": 2992 }, { - "epoch": 0.08493189557321226, + "epoch": 0.08481396469154694, "grad_norm": 0.0, - "learning_rate": 1.9842278171036692e-05, - "loss": 1.1586, + "learning_rate": 1.98428811337996e-05, + "loss": 1.0352, "step": 2993 }, { - "epoch": 0.08496027241770715, + "epoch": 0.0848423021338094, "grad_norm": 0.0, - "learning_rate": 1.984211553937763e-05, - "loss": 1.1019, + "learning_rate": 1.984271903843726e-05, + "loss": 1.0453, "step": 2994 }, { - "epoch": 0.08498864926220204, + "epoch": 0.08487063957607187, "grad_norm": 0.0, - "learning_rate": 1.9841952824581773e-05, - "loss": 1.2157, + "learning_rate": 1.984255686016604e-05, + "loss": 0.979, "step": 2995 }, { - "epoch": 0.08501702610669694, + "epoch": 0.08489897701833432, "grad_norm": 0.0, - "learning_rate": 1.98417900266505e-05, - "loss": 1.0819, + "learning_rate": 1.9842394598987313e-05, + "loss": 1.1074, "step": 2996 }, { - "epoch": 0.08504540295119183, + "epoch": 0.08492731446059679, "grad_norm": 0.0, - "learning_rate": 1.9841627145585184e-05, - "loss": 1.1494, + "learning_rate": 1.9842232254902435e-05, + "loss": 1.1525, "step": 2997 }, { - "epoch": 0.08507377979568671, + "epoch": 0.08495565190285925, "grad_norm": 0.0, - "learning_rate": 1.9841464181387197e-05, - "loss": 1.0197, + "learning_rate": 1.9842069827912787e-05, + "loss": 1.1325, "step": 2998 }, { - "epoch": 0.08510215664018161, + "epoch": 0.0849839893451217, "grad_norm": 0.0, - "learning_rate": 1.9841301134057925e-05, - "loss": 1.0233, + "learning_rate": 1.9841907318019726e-05, + "loss": 1.1185, "step": 2999 }, { - "epoch": 0.0851305334846765, + "epoch": 0.08501232678738417, "grad_norm": 0.0, - "learning_rate": 1.9841138003598737e-05, - "loss": 1.0634, + "learning_rate": 1.9841744725224626e-05, + "loss": 0.9764, "step": 3000 }, { - "epoch": 0.0851589103291714, + "epoch": 0.08504066422964664, "grad_norm": 0.0, - "learning_rate": 1.9840974790011015e-05, - "loss": 1.0414, + "learning_rate": 1.9841582049528856e-05, + "loss": 1.1576, "step": 3001 }, { - "epoch": 0.08518728717366629, + "epoch": 0.08506900167190909, "grad_norm": 0.0, - "learning_rate": 1.9840811493296134e-05, - "loss": 1.0423, + "learning_rate": 1.9841419290933786e-05, + "loss": 1.1029, "step": 3002 }, { - "epoch": 0.08521566401816118, + "epoch": 0.08509733911417156, "grad_norm": 0.0, - "learning_rate": 1.9840648113455478e-05, - "loss": 1.1871, + "learning_rate": 1.9841256449440783e-05, + "loss": 1.0123, "step": 3003 }, { - "epoch": 0.08524404086265608, + "epoch": 0.08512567655643402, "grad_norm": 0.0, - "learning_rate": 1.9840484650490424e-05, - "loss": 1.0109, + "learning_rate": 1.9841093525051227e-05, + "loss": 1.1049, "step": 3004 }, { - "epoch": 0.08527241770715097, + "epoch": 0.08515401399869647, "grad_norm": 0.0, - "learning_rate": 1.984032110440236e-05, - "loss": 0.9941, + "learning_rate": 1.9840930517766484e-05, + "loss": 1.0391, "step": 3005 }, { - "epoch": 0.08530079455164585, + "epoch": 0.08518235144095894, "grad_norm": 0.0, - "learning_rate": 1.9840157475192655e-05, - "loss": 1.1481, + "learning_rate": 1.9840767427587932e-05, + "loss": 1.0722, "step": 3006 }, { - "epoch": 0.08532917139614075, + "epoch": 0.0852106888832214, "grad_norm": 0.0, - "learning_rate": 1.9839993762862698e-05, - "loss": 0.9224, + "learning_rate": 1.984060425451694e-05, + "loss": 1.0992, "step": 3007 }, { - "epoch": 0.08535754824063564, + "epoch": 0.08523902632548386, "grad_norm": 0.0, - "learning_rate": 1.983982996741387e-05, - "loss": 1.0468, + "learning_rate": 1.9840440998554883e-05, + "loss": 0.9975, "step": 3008 }, { - "epoch": 0.08538592508513053, + "epoch": 0.08526736376774632, "grad_norm": 0.0, - "learning_rate": 1.983966608884756e-05, - "loss": 1.1698, + "learning_rate": 1.9840277659703138e-05, + "loss": 1.1404, "step": 3009 }, { - "epoch": 0.08541430192962543, + "epoch": 0.08529570121000879, "grad_norm": 0.0, - "learning_rate": 1.9839502127165145e-05, - "loss": 1.0002, + "learning_rate": 1.9840114237963076e-05, + "loss": 1.107, "step": 3010 }, { - "epoch": 0.08544267877412032, + "epoch": 0.08532403865227124, "grad_norm": 0.0, - "learning_rate": 1.9839338082368017e-05, - "loss": 1.0838, + "learning_rate": 1.9839950733336084e-05, + "loss": 1.0539, "step": 3011 }, { - "epoch": 0.0854710556186152, + "epoch": 0.08535237609453371, "grad_norm": 0.0, - "learning_rate": 1.9839173954457553e-05, - "loss": 1.0893, + "learning_rate": 1.983978714582353e-05, + "loss": 1.1037, "step": 3012 }, { - "epoch": 0.0854994324631101, + "epoch": 0.08538071353679617, "grad_norm": 0.0, - "learning_rate": 1.983900974343515e-05, - "loss": 1.0081, + "learning_rate": 1.9839623475426795e-05, + "loss": 1.0165, "step": 3013 }, { - "epoch": 0.085527809307605, + "epoch": 0.08540905097905863, "grad_norm": 0.0, - "learning_rate": 1.9838845449302183e-05, - "loss": 1.0851, + "learning_rate": 1.983945972214726e-05, + "loss": 1.172, "step": 3014 }, { - "epoch": 0.08555618615209988, + "epoch": 0.08543738842132109, "grad_norm": 0.0, - "learning_rate": 1.983868107206005e-05, - "loss": 0.9441, + "learning_rate": 1.98392958859863e-05, + "loss": 1.0317, "step": 3015 }, { - "epoch": 0.08558456299659478, + "epoch": 0.08546572586358356, "grad_norm": 0.0, - "learning_rate": 1.9838516611710136e-05, - "loss": 1.0245, + "learning_rate": 1.9839131966945297e-05, + "loss": 1.1346, "step": 3016 }, { - "epoch": 0.08561293984108967, + "epoch": 0.08549406330584601, "grad_norm": 0.0, - "learning_rate": 1.983835206825383e-05, - "loss": 0.9027, + "learning_rate": 1.9838967965025627e-05, + "loss": 1.1731, "step": 3017 }, { - "epoch": 0.08564131668558456, + "epoch": 0.08552240074810848, "grad_norm": 0.0, - "learning_rate": 1.9838187441692517e-05, - "loss": 1.0741, + "learning_rate": 1.9838803880228682e-05, + "loss": 1.0384, "step": 3018 }, { - "epoch": 0.08566969353007946, + "epoch": 0.08555073819037094, "grad_norm": 0.0, - "learning_rate": 1.9838022732027597e-05, - "loss": 1.0633, + "learning_rate": 1.9838639712555842e-05, + "loss": 1.1313, "step": 3019 }, { - "epoch": 0.08569807037457434, + "epoch": 0.0855790756326334, "grad_norm": 0.0, - "learning_rate": 1.9837857939260457e-05, - "loss": 1.0803, + "learning_rate": 1.9838475462008478e-05, + "loss": 1.02, "step": 3020 }, { - "epoch": 0.08572644721906925, + "epoch": 0.08560741307489586, "grad_norm": 0.0, - "learning_rate": 1.9837693063392487e-05, - "loss": 1.0415, + "learning_rate": 1.9838311128587987e-05, + "loss": 1.0097, "step": 3021 }, { - "epoch": 0.08575482406356413, + "epoch": 0.08563575051715833, "grad_norm": 0.0, - "learning_rate": 1.9837528104425082e-05, - "loss": 1.0489, + "learning_rate": 1.9838146712295747e-05, + "loss": 1.0354, "step": 3022 }, { - "epoch": 0.08578320090805902, + "epoch": 0.08566408795942078, "grad_norm": 0.0, - "learning_rate": 1.9837363062359632e-05, - "loss": 1.1132, + "learning_rate": 1.9837982213133144e-05, + "loss": 0.9756, "step": 3023 }, { - "epoch": 0.08581157775255392, + "epoch": 0.08569242540168324, "grad_norm": 0.0, - "learning_rate": 1.983719793719754e-05, - "loss": 0.9943, + "learning_rate": 1.983781763110156e-05, + "loss": 1.0657, "step": 3024 }, { - "epoch": 0.08583995459704881, + "epoch": 0.08572076284394571, "grad_norm": 0.0, - "learning_rate": 1.9837032728940194e-05, - "loss": 0.9725, + "learning_rate": 1.983765296620239e-05, + "loss": 1.0364, "step": 3025 }, { - "epoch": 0.0858683314415437, + "epoch": 0.08574910028620816, "grad_norm": 0.0, - "learning_rate": 1.9836867437588987e-05, - "loss": 1.1106, + "learning_rate": 1.9837488218437012e-05, + "loss": 1.0583, "step": 3026 }, { - "epoch": 0.0858967082860386, + "epoch": 0.08577743772847063, "grad_norm": 0.0, - "learning_rate": 1.983670206314532e-05, - "loss": 1.0229, + "learning_rate": 1.983732338780682e-05, + "loss": 1.0968, "step": 3027 }, { - "epoch": 0.08592508513053349, + "epoch": 0.0858057751707331, "grad_norm": 0.0, - "learning_rate": 1.9836536605610587e-05, - "loss": 1.0389, + "learning_rate": 1.9837158474313198e-05, + "loss": 1.1124, "step": 3028 }, { - "epoch": 0.08595346197502837, + "epoch": 0.08583411261299555, "grad_norm": 0.0, - "learning_rate": 1.983637106498619e-05, - "loss": 1.1334, + "learning_rate": 1.983699347795754e-05, + "loss": 1.0727, "step": 3029 }, { - "epoch": 0.08598183881952327, + "epoch": 0.08586245005525801, "grad_norm": 0.0, - "learning_rate": 1.983620544127352e-05, - "loss": 1.1211, + "learning_rate": 1.9836828398741234e-05, + "loss": 1.0477, "step": 3030 }, { - "epoch": 0.08601021566401816, + "epoch": 0.08589078749752048, "grad_norm": 0.0, - "learning_rate": 1.9836039734473983e-05, - "loss": 1.0032, + "learning_rate": 1.9836663236665666e-05, + "loss": 1.1523, "step": 3031 }, { - "epoch": 0.08603859250851305, + "epoch": 0.08591912493978293, "grad_norm": 0.0, - "learning_rate": 1.9835873944588978e-05, - "loss": 1.0383, + "learning_rate": 1.983649799173223e-05, + "loss": 1.0283, "step": 3032 }, { - "epoch": 0.08606696935300795, + "epoch": 0.0859474623820454, "grad_norm": 0.0, - "learning_rate": 1.98357080716199e-05, - "loss": 0.9944, + "learning_rate": 1.9836332663942323e-05, + "loss": 1.2161, "step": 3033 }, { - "epoch": 0.08609534619750284, + "epoch": 0.08597579982430786, "grad_norm": 0.0, - "learning_rate": 1.9835542115568157e-05, - "loss": 1.0566, + "learning_rate": 1.983616725329733e-05, + "loss": 1.047, "step": 3034 }, { - "epoch": 0.08612372304199772, + "epoch": 0.08600413726657032, "grad_norm": 0.0, - "learning_rate": 1.9835376076435146e-05, - "loss": 1.0078, + "learning_rate": 1.9836001759798647e-05, + "loss": 1.1454, "step": 3035 }, { - "epoch": 0.08615209988649263, + "epoch": 0.08603247470883278, "grad_norm": 0.0, - "learning_rate": 1.983520995422227e-05, - "loss": 0.9987, + "learning_rate": 1.983583618344767e-05, + "loss": 1.0008, "step": 3036 }, { - "epoch": 0.08618047673098751, + "epoch": 0.08606081215109525, "grad_norm": 0.0, - "learning_rate": 1.9835043748930936e-05, - "loss": 1.0574, + "learning_rate": 1.9835670524245793e-05, + "loss": 1.1606, "step": 3037 }, { - "epoch": 0.0862088535754824, + "epoch": 0.0860891495933577, "grad_norm": 0.0, - "learning_rate": 1.9834877460562546e-05, - "loss": 1.0175, + "learning_rate": 1.9835504782194413e-05, + "loss": 1.0931, "step": 3038 }, { - "epoch": 0.0862372304199773, + "epoch": 0.08611748703562017, "grad_norm": 0.0, - "learning_rate": 1.98347110891185e-05, - "loss": 1.0207, + "learning_rate": 1.983533895729492e-05, + "loss": 0.9839, "step": 3039 }, { - "epoch": 0.08626560726447219, + "epoch": 0.08614582447788263, "grad_norm": 0.0, - "learning_rate": 1.983454463460021e-05, - "loss": 1.0212, + "learning_rate": 1.9835173049548716e-05, + "loss": 1.176, "step": 3040 }, { - "epoch": 0.08629398410896709, + "epoch": 0.08617416192014508, "grad_norm": 0.0, - "learning_rate": 1.983437809700908e-05, - "loss": 1.0435, + "learning_rate": 1.98350070589572e-05, + "loss": 1.0373, "step": 3041 }, { - "epoch": 0.08632236095346198, + "epoch": 0.08620249936240755, "grad_norm": 0.0, - "learning_rate": 1.9834211476346516e-05, - "loss": 1.0912, + "learning_rate": 1.9834840985521765e-05, + "loss": 1.0749, "step": 3042 }, { - "epoch": 0.08635073779795686, + "epoch": 0.08623083680467002, "grad_norm": 0.0, - "learning_rate": 1.9834044772613924e-05, - "loss": 1.0043, + "learning_rate": 1.9834674829243813e-05, + "loss": 1.1319, "step": 3043 }, { - "epoch": 0.08637911464245177, + "epoch": 0.08625917424693247, "grad_norm": 0.0, - "learning_rate": 1.9833877985812716e-05, - "loss": 1.0407, + "learning_rate": 1.983450859012474e-05, + "loss": 1.1582, "step": 3044 }, { - "epoch": 0.08640749148694665, + "epoch": 0.08628751168919493, "grad_norm": 0.0, - "learning_rate": 1.9833711115944295e-05, - "loss": 1.0564, + "learning_rate": 1.983434226816595e-05, + "loss": 1.1161, "step": 3045 }, { - "epoch": 0.08643586833144154, + "epoch": 0.0863158491314574, "grad_norm": 0.0, - "learning_rate": 1.9833544163010074e-05, - "loss": 1.0326, + "learning_rate": 1.9834175863368847e-05, + "loss": 1.047, "step": 3046 }, { - "epoch": 0.08646424517593644, + "epoch": 0.08634418657371985, "grad_norm": 0.0, - "learning_rate": 1.9833377127011468e-05, - "loss": 1.0674, + "learning_rate": 1.9834009375734825e-05, + "loss": 0.9932, "step": 3047 }, { - "epoch": 0.08649262202043133, + "epoch": 0.08637252401598232, "grad_norm": 0.0, - "learning_rate": 1.9833210007949878e-05, - "loss": 1.0298, + "learning_rate": 1.9833842805265293e-05, + "loss": 0.9427, "step": 3048 }, { - "epoch": 0.08652099886492622, + "epoch": 0.08640086145824478, "grad_norm": 0.0, - "learning_rate": 1.9833042805826725e-05, - "loss": 1.0465, + "learning_rate": 1.9833676151961648e-05, + "loss": 1.0727, "step": 3049 }, { - "epoch": 0.08654937570942112, + "epoch": 0.08642919890050724, "grad_norm": 0.0, - "learning_rate": 1.9832875520643417e-05, - "loss": 0.9803, + "learning_rate": 1.98335094158253e-05, + "loss": 1.0862, "step": 3050 }, { - "epoch": 0.086577752553916, + "epoch": 0.0864575363427697, "grad_norm": 0.0, - "learning_rate": 1.9832708152401366e-05, - "loss": 1.0693, + "learning_rate": 1.983334259685765e-05, + "loss": 1.0276, "step": 3051 }, { - "epoch": 0.08660612939841089, + "epoch": 0.08648587378503217, "grad_norm": 0.0, - "learning_rate": 1.9832540701101987e-05, - "loss": 1.0912, + "learning_rate": 1.9833175695060102e-05, + "loss": 1.1306, "step": 3052 }, { - "epoch": 0.08663450624290579, + "epoch": 0.08651421122729462, "grad_norm": 0.0, - "learning_rate": 1.9832373166746695e-05, - "loss": 1.05, + "learning_rate": 1.9833008710434068e-05, + "loss": 1.1186, "step": 3053 }, { - "epoch": 0.08666288308740068, + "epoch": 0.08654254866955709, "grad_norm": 0.0, - "learning_rate": 1.983220554933691e-05, - "loss": 1.1015, + "learning_rate": 1.9832841642980948e-05, + "loss": 1.0201, "step": 3054 }, { - "epoch": 0.08669125993189557, + "epoch": 0.08657088611181955, "grad_norm": 0.0, - "learning_rate": 1.9832037848874038e-05, - "loss": 1.0365, + "learning_rate": 1.9832674492702148e-05, + "loss": 1.1393, "step": 3055 }, { - "epoch": 0.08671963677639047, + "epoch": 0.086599223554082, "grad_norm": 0.0, - "learning_rate": 1.98318700653595e-05, - "loss": 0.9373, + "learning_rate": 1.9832507259599084e-05, + "loss": 1.0459, "step": 3056 }, { - "epoch": 0.08674801362088536, + "epoch": 0.08662756099634447, "grad_norm": 0.0, - "learning_rate": 1.9831702198794713e-05, - "loss": 1.1041, + "learning_rate": 1.983233994367316e-05, + "loss": 1.1187, "step": 3057 }, { - "epoch": 0.08677639046538024, + "epoch": 0.08665589843860694, "grad_norm": 0.0, - "learning_rate": 1.9831534249181095e-05, - "loss": 1.1612, + "learning_rate": 1.983217254492578e-05, + "loss": 1.0966, "step": 3058 }, { - "epoch": 0.08680476730987514, + "epoch": 0.08668423588086939, "grad_norm": 0.0, - "learning_rate": 1.9831366216520068e-05, - "loss": 1.1271, + "learning_rate": 1.9832005063358366e-05, + "loss": 1.0113, "step": 3059 }, { - "epoch": 0.08683314415437003, + "epoch": 0.08671257332313186, "grad_norm": 0.0, - "learning_rate": 1.9831198100813047e-05, - "loss": 1.0277, + "learning_rate": 1.983183749897232e-05, + "loss": 0.9979, "step": 3060 }, { - "epoch": 0.08686152099886493, + "epoch": 0.08674091076539432, "grad_norm": 0.0, - "learning_rate": 1.9831029902061455e-05, - "loss": 1.1796, + "learning_rate": 1.9831669851769054e-05, + "loss": 1.0941, "step": 3061 }, { - "epoch": 0.08688989784335982, + "epoch": 0.08676924820765677, "grad_norm": 0.0, - "learning_rate": 1.983086162026671e-05, - "loss": 1.0175, + "learning_rate": 1.9831502121749985e-05, + "loss": 1.0813, "step": 3062 }, { - "epoch": 0.08691827468785471, + "epoch": 0.08679758564991924, "grad_norm": 0.0, - "learning_rate": 1.9830693255430236e-05, - "loss": 1.1006, + "learning_rate": 1.9831334308916518e-05, + "loss": 1.0978, "step": 3063 }, { - "epoch": 0.08694665153234961, + "epoch": 0.0868259230921817, "grad_norm": 0.0, - "learning_rate": 1.9830524807553455e-05, - "loss": 1.0496, + "learning_rate": 1.9831166413270076e-05, + "loss": 1.1431, "step": 3064 }, { - "epoch": 0.0869750283768445, + "epoch": 0.08685426053444416, "grad_norm": 0.0, - "learning_rate": 1.9830356276637788e-05, - "loss": 1.0273, + "learning_rate": 1.983099843481207e-05, + "loss": 0.961, "step": 3065 }, { - "epoch": 0.08700340522133938, + "epoch": 0.08688259797670662, "grad_norm": 0.0, - "learning_rate": 1.983018766268466e-05, - "loss": 1.0015, + "learning_rate": 1.983083037354391e-05, + "loss": 1.103, "step": 3066 }, { - "epoch": 0.08703178206583428, + "epoch": 0.08691093541896909, "grad_norm": 0.0, - "learning_rate": 1.9830018965695494e-05, - "loss": 1.0775, + "learning_rate": 1.9830662229467015e-05, + "loss": 1.1378, "step": 3067 }, { - "epoch": 0.08706015891032917, + "epoch": 0.08693927286123154, "grad_norm": 0.0, - "learning_rate": 1.9829850185671717e-05, - "loss": 1.0767, + "learning_rate": 1.9830494002582803e-05, + "loss": 1.1418, "step": 3068 }, { - "epoch": 0.08708853575482406, + "epoch": 0.08696761030349401, "grad_norm": 0.0, - "learning_rate": 1.9829681322614754e-05, - "loss": 1.0396, + "learning_rate": 1.983032569289269e-05, + "loss": 1.0572, "step": 3069 }, { - "epoch": 0.08711691259931896, + "epoch": 0.08699594774575647, "grad_norm": 0.0, - "learning_rate": 1.9829512376526027e-05, - "loss": 1.0515, + "learning_rate": 1.983015730039809e-05, + "loss": 1.1629, "step": 3070 }, { - "epoch": 0.08714528944381385, + "epoch": 0.08702428518801893, "grad_norm": 0.0, - "learning_rate": 1.982934334740697e-05, - "loss": 1.0743, + "learning_rate": 1.9829988825100427e-05, + "loss": 1.1714, "step": 3071 }, { - "epoch": 0.08717366628830873, + "epoch": 0.08705262263028139, "grad_norm": 0.0, - "learning_rate": 1.982917423525901e-05, - "loss": 1.099, + "learning_rate": 1.9829820267001118e-05, + "loss": 1.0148, "step": 3072 }, { - "epoch": 0.08720204313280364, + "epoch": 0.08708096007254386, "grad_norm": 0.0, - "learning_rate": 1.982900504008357e-05, - "loss": 0.9913, + "learning_rate": 1.9829651626101584e-05, + "loss": 1.0718, "step": 3073 }, { - "epoch": 0.08723041997729852, + "epoch": 0.08710929751480631, "grad_norm": 0.0, - "learning_rate": 1.9828835761882085e-05, - "loss": 1.0457, + "learning_rate": 1.982948290240324e-05, + "loss": 1.0874, "step": 3074 }, { - "epoch": 0.08725879682179341, + "epoch": 0.08713763495706878, "grad_norm": 0.0, - "learning_rate": 1.9828666400655982e-05, - "loss": 0.9859, + "learning_rate": 1.9829314095907516e-05, + "loss": 1.1415, "step": 3075 }, { - "epoch": 0.08728717366628831, + "epoch": 0.08716597239933124, "grad_norm": 0.0, - "learning_rate": 1.982849695640669e-05, - "loss": 1.1194, + "learning_rate": 1.9829145206615828e-05, + "loss": 1.0512, "step": 3076 }, { - "epoch": 0.0873155505107832, + "epoch": 0.0871943098415937, "grad_norm": 0.0, - "learning_rate": 1.9828327429135645e-05, - "loss": 1.02, + "learning_rate": 1.98289762345296e-05, + "loss": 1.0142, "step": 3077 }, { - "epoch": 0.08734392735527809, + "epoch": 0.08722264728385616, "grad_norm": 0.0, - "learning_rate": 1.9828157818844277e-05, - "loss": 1.064, + "learning_rate": 1.9828807179650255e-05, + "loss": 0.9702, "step": 3078 }, { - "epoch": 0.08737230419977299, + "epoch": 0.08725098472611863, "grad_norm": 0.0, - "learning_rate": 1.9827988125534014e-05, - "loss": 1.0123, + "learning_rate": 1.9828638041979216e-05, + "loss": 1.1006, "step": 3079 }, { - "epoch": 0.08740068104426787, + "epoch": 0.08727932216838108, "grad_norm": 0.0, - "learning_rate": 1.9827818349206296e-05, - "loss": 1.0573, + "learning_rate": 1.9828468821517913e-05, + "loss": 1.0176, "step": 3080 }, { - "epoch": 0.08742905788876278, + "epoch": 0.08730765961064355, "grad_norm": 0.0, - "learning_rate": 1.9827648489862553e-05, - "loss": 1.0428, + "learning_rate": 1.9828299518267763e-05, + "loss": 0.9862, "step": 3081 }, { - "epoch": 0.08745743473325766, + "epoch": 0.08733599705290601, "grad_norm": 0.0, - "learning_rate": 1.9827478547504224e-05, - "loss": 1.0088, + "learning_rate": 1.9828130132230198e-05, + "loss": 1.0573, "step": 3082 }, { - "epoch": 0.08748581157775255, + "epoch": 0.08736433449516846, "grad_norm": 0.0, - "learning_rate": 1.982730852213274e-05, - "loss": 0.9504, + "learning_rate": 1.982796066340664e-05, + "loss": 1.1034, "step": 3083 }, { - "epoch": 0.08751418842224745, + "epoch": 0.08739267193743093, "grad_norm": 0.0, - "learning_rate": 1.982713841374954e-05, - "loss": 1.1702, + "learning_rate": 1.9827791111798526e-05, + "loss": 0.9754, "step": 3084 }, { - "epoch": 0.08754256526674234, + "epoch": 0.0874210093796934, "grad_norm": 0.0, - "learning_rate": 1.982696822235606e-05, - "loss": 1.0767, + "learning_rate": 1.9827621477407275e-05, + "loss": 1.1594, "step": 3085 }, { - "epoch": 0.08757094211123723, + "epoch": 0.08744934682195585, "grad_norm": 0.0, - "learning_rate": 1.9826797947953738e-05, - "loss": 1.0703, + "learning_rate": 1.982745176023432e-05, + "loss": 1.2016, "step": 3086 }, { - "epoch": 0.08759931895573213, + "epoch": 0.08747768426421831, "grad_norm": 0.0, - "learning_rate": 1.9826627590544013e-05, - "loss": 1.0164, + "learning_rate": 1.982728196028109e-05, + "loss": 1.1492, "step": 3087 }, { - "epoch": 0.08762769580022702, + "epoch": 0.08750602170648078, "grad_norm": 0.0, - "learning_rate": 1.982645715012832e-05, - "loss": 1.0264, + "learning_rate": 1.9827112077549012e-05, + "loss": 1.1118, "step": 3088 }, { - "epoch": 0.0876560726447219, + "epoch": 0.08753435914874323, "grad_norm": 0.0, - "learning_rate": 1.9826286626708106e-05, - "loss": 1.0847, + "learning_rate": 1.982694211203952e-05, + "loss": 1.0243, "step": 3089 }, { - "epoch": 0.0876844494892168, + "epoch": 0.0875626965910057, "grad_norm": 0.0, - "learning_rate": 1.9826116020284803e-05, - "loss": 0.9575, + "learning_rate": 1.9826772063754047e-05, + "loss": 1.1502, "step": 3090 }, { - "epoch": 0.08771282633371169, + "epoch": 0.08759103403326815, "grad_norm": 0.0, - "learning_rate": 1.9825945330859857e-05, - "loss": 0.9956, + "learning_rate": 1.9826601932694023e-05, + "loss": 1.063, "step": 3091 }, { - "epoch": 0.08774120317820658, + "epoch": 0.08761937147553062, "grad_norm": 0.0, - "learning_rate": 1.9825774558434712e-05, - "loss": 1.0322, + "learning_rate": 1.9826431718860882e-05, + "loss": 1.1215, "step": 3092 }, { - "epoch": 0.08776958002270148, + "epoch": 0.08764770891779308, "grad_norm": 0.0, - "learning_rate": 1.9825603703010804e-05, - "loss": 1.1379, + "learning_rate": 1.982626142225606e-05, + "loss": 1.0782, "step": 3093 }, { - "epoch": 0.08779795686719637, + "epoch": 0.08767604636005553, "grad_norm": 0.0, - "learning_rate": 1.982543276458958e-05, - "loss": 1.2012, + "learning_rate": 1.9826091042880984e-05, + "loss": 1.181, "step": 3094 }, { - "epoch": 0.08782633371169125, + "epoch": 0.087704383802318, "grad_norm": 0.0, - "learning_rate": 1.9825261743172486e-05, - "loss": 1.0625, + "learning_rate": 1.98259205807371e-05, + "loss": 1.0228, "step": 3095 }, { - "epoch": 0.08785471055618616, + "epoch": 0.08773272124458047, "grad_norm": 0.0, - "learning_rate": 1.9825090638760963e-05, - "loss": 0.9854, + "learning_rate": 1.9825750035825834e-05, + "loss": 1.0806, "step": 3096 }, { - "epoch": 0.08788308740068104, + "epoch": 0.08776105868684292, "grad_norm": 0.0, - "learning_rate": 1.982491945135646e-05, - "loss": 1.0372, + "learning_rate": 1.982557940814863e-05, + "loss": 1.1021, "step": 3097 }, { - "epoch": 0.08791146424517593, + "epoch": 0.08778939612910538, "grad_norm": 0.0, - "learning_rate": 1.9824748180960415e-05, - "loss": 1.0804, + "learning_rate": 1.9825408697706917e-05, + "loss": 1.2054, "step": 3098 }, { - "epoch": 0.08793984108967083, + "epoch": 0.08781773357136785, "grad_norm": 0.0, - "learning_rate": 1.9824576827574287e-05, - "loss": 1.0005, + "learning_rate": 1.9825237904502143e-05, + "loss": 1.0743, "step": 3099 }, { - "epoch": 0.08796821793416572, + "epoch": 0.0878460710136303, "grad_norm": 0.0, - "learning_rate": 1.9824405391199514e-05, - "loss": 0.8981, + "learning_rate": 1.982506702853574e-05, + "loss": 1.1223, "step": 3100 }, { - "epoch": 0.08799659477866062, + "epoch": 0.08787440845589277, "grad_norm": 0.0, - "learning_rate": 1.9824233871837547e-05, - "loss": 0.9965, + "learning_rate": 1.9824896069809148e-05, + "loss": 1.1786, "step": 3101 }, { - "epoch": 0.0880249716231555, + "epoch": 0.08790274589815524, "grad_norm": 0.0, - "learning_rate": 1.9824062269489836e-05, - "loss": 1.0287, + "learning_rate": 1.9824725028323808e-05, + "loss": 1.092, "step": 3102 }, { - "epoch": 0.0880533484676504, + "epoch": 0.08793108334041769, "grad_norm": 0.0, - "learning_rate": 1.9823890584157828e-05, - "loss": 0.9384, + "learning_rate": 1.9824553904081163e-05, + "loss": 1.1059, "step": 3103 }, { - "epoch": 0.0880817253121453, + "epoch": 0.08795942078268015, "grad_norm": 0.0, - "learning_rate": 1.9823718815842975e-05, - "loss": 1.0393, + "learning_rate": 1.982438269708265e-05, + "loss": 1.06, "step": 3104 }, { - "epoch": 0.08811010215664018, + "epoch": 0.08798775822494262, "grad_norm": 0.0, - "learning_rate": 1.9823546964546727e-05, - "loss": 1.0001, + "learning_rate": 1.9824211407329717e-05, + "loss": 1.1001, "step": 3105 }, { - "epoch": 0.08813847900113507, + "epoch": 0.08801609566720507, "grad_norm": 0.0, - "learning_rate": 1.9823375030270537e-05, - "loss": 1.0969, + "learning_rate": 1.9824040034823796e-05, + "loss": 1.1433, "step": 3106 }, { - "epoch": 0.08816685584562997, + "epoch": 0.08804443310946754, "grad_norm": 0.0, - "learning_rate": 1.9823203013015856e-05, - "loss": 0.9513, + "learning_rate": 1.9823868579566344e-05, + "loss": 1.0677, "step": 3107 }, { - "epoch": 0.08819523269012486, + "epoch": 0.08807277055173, "grad_norm": 0.0, - "learning_rate": 1.982303091278414e-05, - "loss": 1.0456, + "learning_rate": 1.9823697041558797e-05, + "loss": 1.0771, "step": 3108 }, { - "epoch": 0.08822360953461975, + "epoch": 0.08810110799399246, "grad_norm": 0.0, - "learning_rate": 1.9822858729576838e-05, - "loss": 1.0796, + "learning_rate": 1.9823525420802603e-05, + "loss": 1.0831, "step": 3109 }, { - "epoch": 0.08825198637911465, + "epoch": 0.08812944543625492, "grad_norm": 0.0, - "learning_rate": 1.9822686463395406e-05, - "loss": 1.0314, + "learning_rate": 1.9823353717299205e-05, + "loss": 1.0201, "step": 3110 }, { - "epoch": 0.08828036322360953, + "epoch": 0.08815778287851739, "grad_norm": 0.0, - "learning_rate": 1.9822514114241302e-05, - "loss": 1.0544, + "learning_rate": 1.9823181931050052e-05, + "loss": 1.0529, "step": 3111 }, { - "epoch": 0.08830874006810442, + "epoch": 0.08818612032077984, "grad_norm": 0.0, - "learning_rate": 1.982234168211598e-05, - "loss": 0.9525, + "learning_rate": 1.982301006205659e-05, + "loss": 1.1139, "step": 3112 }, { - "epoch": 0.08833711691259932, + "epoch": 0.0882144577630423, "grad_norm": 0.0, - "learning_rate": 1.9822169167020894e-05, - "loss": 1.0435, + "learning_rate": 1.9822838110320265e-05, + "loss": 1.1873, "step": 3113 }, { - "epoch": 0.08836549375709421, + "epoch": 0.08824279520530477, "grad_norm": 0.0, - "learning_rate": 1.9821996568957506e-05, - "loss": 1.115, + "learning_rate": 1.9822666075842527e-05, + "loss": 1.0791, "step": 3114 }, { - "epoch": 0.0883938706015891, + "epoch": 0.08827113264756722, "grad_norm": 0.0, - "learning_rate": 1.9821823887927264e-05, - "loss": 1.0748, + "learning_rate": 1.9822493958624825e-05, + "loss": 1.1232, "step": 3115 }, { - "epoch": 0.088422247446084, + "epoch": 0.08829947008982969, "grad_norm": 0.0, - "learning_rate": 1.9821651123931643e-05, - "loss": 0.9722, + "learning_rate": 1.982232175866861e-05, + "loss": 1.1282, "step": 3116 }, { - "epoch": 0.08845062429057889, + "epoch": 0.08832780753209216, "grad_norm": 0.0, - "learning_rate": 1.982147827697209e-05, - "loss": 1.0799, + "learning_rate": 1.982214947597533e-05, + "loss": 1.0048, "step": 3117 }, { - "epoch": 0.08847900113507377, + "epoch": 0.08835614497435461, "grad_norm": 0.0, - "learning_rate": 1.982130534705007e-05, - "loss": 1.0217, + "learning_rate": 1.982197711054644e-05, + "loss": 1.0263, "step": 3118 }, { - "epoch": 0.08850737797956867, + "epoch": 0.08838448241661707, "grad_norm": 0.0, - "learning_rate": 1.982113233416704e-05, - "loss": 1.0721, + "learning_rate": 1.9821804662383388e-05, + "loss": 1.0527, "step": 3119 }, { - "epoch": 0.08853575482406356, + "epoch": 0.08841281985887954, "grad_norm": 0.0, - "learning_rate": 1.9820959238324463e-05, - "loss": 1.0549, + "learning_rate": 1.9821632131487626e-05, + "loss": 1.068, "step": 3120 }, { - "epoch": 0.08856413166855846, + "epoch": 0.08844115730114199, "grad_norm": 0.0, - "learning_rate": 1.9820786059523804e-05, - "loss": 0.9958, + "learning_rate": 1.9821459517860614e-05, + "loss": 1.1339, "step": 3121 }, { - "epoch": 0.08859250851305335, + "epoch": 0.08846949474340446, "grad_norm": 0.0, - "learning_rate": 1.9820612797766527e-05, - "loss": 1.2394, + "learning_rate": 1.98212868215038e-05, + "loss": 1.0897, "step": 3122 }, { - "epoch": 0.08862088535754824, + "epoch": 0.08849783218566692, "grad_norm": 0.0, - "learning_rate": 1.9820439453054085e-05, - "loss": 1.0591, + "learning_rate": 1.9821114042418638e-05, + "loss": 1.105, "step": 3123 }, { - "epoch": 0.08864926220204314, + "epoch": 0.08852616962792938, "grad_norm": 0.0, - "learning_rate": 1.9820266025387953e-05, - "loss": 0.9767, + "learning_rate": 1.982094118060659e-05, + "loss": 1.1649, "step": 3124 }, { - "epoch": 0.08867763904653803, + "epoch": 0.08855450707019184, "grad_norm": 0.0, - "learning_rate": 1.9820092514769595e-05, - "loss": 0.9785, + "learning_rate": 1.98207682360691e-05, + "loss": 1.0689, "step": 3125 }, { - "epoch": 0.08870601589103291, + "epoch": 0.08858284451245431, "grad_norm": 0.0, - "learning_rate": 1.981991892120047e-05, - "loss": 1.1031, + "learning_rate": 1.9820595208807636e-05, + "loss": 1.0516, "step": 3126 }, { - "epoch": 0.08873439273552781, + "epoch": 0.08861118195471676, "grad_norm": 0.0, - "learning_rate": 1.981974524468205e-05, - "loss": 1.0479, + "learning_rate": 1.9820422098823653e-05, + "loss": 1.066, "step": 3127 }, { - "epoch": 0.0887627695800227, + "epoch": 0.08863951939697923, "grad_norm": 0.0, - "learning_rate": 1.98195714852158e-05, - "loss": 1.0386, + "learning_rate": 1.9820248906118606e-05, + "loss": 1.107, "step": 3128 }, { - "epoch": 0.08879114642451759, + "epoch": 0.0886678568392417, "grad_norm": 0.0, - "learning_rate": 1.9819397642803187e-05, - "loss": 1.1217, + "learning_rate": 1.9820075630693955e-05, + "loss": 1.0604, "step": 3129 }, { - "epoch": 0.08881952326901249, + "epoch": 0.08869619428150415, "grad_norm": 0.0, - "learning_rate": 1.9819223717445686e-05, - "loss": 1.1127, + "learning_rate": 1.9819902272551162e-05, + "loss": 1.0695, "step": 3130 }, { - "epoch": 0.08884790011350738, + "epoch": 0.08872453172376661, "grad_norm": 0.0, - "learning_rate": 1.9819049709144754e-05, - "loss": 1.1587, + "learning_rate": 1.981972883169169e-05, + "loss": 1.0245, "step": 3131 }, { - "epoch": 0.08887627695800226, + "epoch": 0.08875286916602908, "grad_norm": 0.0, - "learning_rate": 1.9818875617901874e-05, - "loss": 0.9863, + "learning_rate": 1.981955530811699e-05, + "loss": 1.1393, "step": 3132 }, { - "epoch": 0.08890465380249717, + "epoch": 0.08878120660829153, "grad_norm": 0.0, - "learning_rate": 1.9818701443718504e-05, - "loss": 1.0681, + "learning_rate": 1.9819381701828532e-05, + "loss": 1.102, "step": 3133 }, { - "epoch": 0.08893303064699205, + "epoch": 0.088809544050554, "grad_norm": 0.0, - "learning_rate": 1.9818527186596128e-05, - "loss": 1.0673, + "learning_rate": 1.9819208012827772e-05, + "loss": 1.1015, "step": 3134 }, { - "epoch": 0.08896140749148694, + "epoch": 0.08883788149281646, "grad_norm": 0.0, - "learning_rate": 1.9818352846536205e-05, - "loss": 1.0964, + "learning_rate": 1.981903424111618e-05, + "loss": 1.1519, "step": 3135 }, { - "epoch": 0.08898978433598184, + "epoch": 0.08886621893507891, "grad_norm": 0.0, - "learning_rate": 1.9818178423540217e-05, - "loss": 1.0964, + "learning_rate": 1.9818860386695214e-05, + "loss": 1.09, "step": 3136 }, { - "epoch": 0.08901816118047673, + "epoch": 0.08889455637734138, "grad_norm": 0.0, - "learning_rate": 1.9818003917609637e-05, - "loss": 0.9451, + "learning_rate": 1.981868644956634e-05, + "loss": 1.0935, "step": 3137 }, { - "epoch": 0.08904653802497162, + "epoch": 0.08892289381960385, "grad_norm": 0.0, - "learning_rate": 1.9817829328745936e-05, - "loss": 1.0577, + "learning_rate": 1.981851242973103e-05, + "loss": 0.9823, "step": 3138 }, { - "epoch": 0.08907491486946652, + "epoch": 0.0889512312618663, "grad_norm": 0.0, - "learning_rate": 1.9817654656950584e-05, - "loss": 1.0256, + "learning_rate": 1.981833832719074e-05, + "loss": 1.0794, "step": 3139 }, { - "epoch": 0.0891032917139614, + "epoch": 0.08897956870412876, "grad_norm": 0.0, - "learning_rate": 1.981747990222507e-05, - "loss": 1.0408, + "learning_rate": 1.9818164141946938e-05, + "loss": 0.9609, "step": 3140 }, { - "epoch": 0.0891316685584563, + "epoch": 0.08900790614639123, "grad_norm": 0.0, - "learning_rate": 1.9817305064570854e-05, - "loss": 1.149, + "learning_rate": 1.9817989874001096e-05, + "loss": 1.0413, "step": 3141 }, { - "epoch": 0.0891600454029512, + "epoch": 0.08903624358865368, "grad_norm": 0.0, - "learning_rate": 1.9817130143989424e-05, - "loss": 0.9412, + "learning_rate": 1.981781552335468e-05, + "loss": 1.1547, "step": 3142 }, { - "epoch": 0.08918842224744608, + "epoch": 0.08906458103091615, "grad_norm": 0.0, - "learning_rate": 1.9816955140482258e-05, - "loss": 1.0599, + "learning_rate": 1.9817641090009157e-05, + "loss": 1.1032, "step": 3143 }, { - "epoch": 0.08921679909194098, + "epoch": 0.08909291847317861, "grad_norm": 0.0, - "learning_rate": 1.9816780054050824e-05, - "loss": 0.9804, + "learning_rate": 1.9817466573965996e-05, + "loss": 1.0793, "step": 3144 }, { - "epoch": 0.08924517593643587, + "epoch": 0.08912125591544107, "grad_norm": 0.0, - "learning_rate": 1.981660488469661e-05, - "loss": 1.0039, + "learning_rate": 1.981729197522667e-05, + "loss": 1.0169, "step": 3145 }, { - "epoch": 0.08927355278093076, + "epoch": 0.08914959335770353, "grad_norm": 0.0, - "learning_rate": 1.9816429632421095e-05, - "loss": 0.9846, + "learning_rate": 1.981711729379265e-05, + "loss": 1.129, "step": 3146 }, { - "epoch": 0.08930192962542566, + "epoch": 0.089177930799966, "grad_norm": 0.0, - "learning_rate": 1.9816254297225758e-05, - "loss": 1.2161, + "learning_rate": 1.9816942529665404e-05, + "loss": 1.0429, "step": 3147 }, { - "epoch": 0.08933030646992055, + "epoch": 0.08920626824222845, "grad_norm": 0.0, - "learning_rate": 1.981607887911208e-05, - "loss": 1.0774, + "learning_rate": 1.9816767682846404e-05, + "loss": 1.1974, "step": 3148 }, { - "epoch": 0.08935868331441543, + "epoch": 0.08923460568449092, "grad_norm": 0.0, - "learning_rate": 1.9815903378081538e-05, - "loss": 0.9139, + "learning_rate": 1.9816592753337125e-05, + "loss": 1.027, "step": 3149 }, { - "epoch": 0.08938706015891033, + "epoch": 0.08926294312675338, "grad_norm": 0.0, - "learning_rate": 1.9815727794135622e-05, - "loss": 0.9292, + "learning_rate": 1.9816417741139042e-05, + "loss": 1.0033, "step": 3150 }, { - "epoch": 0.08941543700340522, + "epoch": 0.08929128056901584, "grad_norm": 0.0, - "learning_rate": 1.9815552127275814e-05, - "loss": 1.116, + "learning_rate": 1.9816242646253626e-05, + "loss": 1.208, "step": 3151 }, { - "epoch": 0.08944381384790011, + "epoch": 0.0893196180112783, "grad_norm": 0.0, - "learning_rate": 1.9815376377503592e-05, - "loss": 0.9881, + "learning_rate": 1.9816067468682352e-05, + "loss": 1.1665, "step": 3152 }, { - "epoch": 0.08947219069239501, + "epoch": 0.08934795545354077, "grad_norm": 0.0, - "learning_rate": 1.9815200544820444e-05, - "loss": 1.1057, + "learning_rate": 1.9815892208426696e-05, + "loss": 1.1225, "step": 3153 }, { - "epoch": 0.0895005675368899, + "epoch": 0.08937629289580322, "grad_norm": 0.0, - "learning_rate": 1.981502462922786e-05, - "loss": 1.0333, + "learning_rate": 1.9815716865488136e-05, + "loss": 1.0728, "step": 3154 }, { - "epoch": 0.08952894438138478, + "epoch": 0.08940463033806569, "grad_norm": 0.0, - "learning_rate": 1.9814848630727323e-05, - "loss": 1.1231, + "learning_rate": 1.9815541439868152e-05, + "loss": 0.9888, "step": 3155 }, { - "epoch": 0.08955732122587969, + "epoch": 0.08943296778032815, "grad_norm": 0.0, - "learning_rate": 1.981467254932031e-05, - "loss": 0.989, + "learning_rate": 1.9815365931568212e-05, + "loss": 1.0914, "step": 3156 }, { - "epoch": 0.08958569807037457, + "epoch": 0.0894613052225906, "grad_norm": 0.0, - "learning_rate": 1.9814496385008323e-05, - "loss": 0.971, + "learning_rate": 1.9815190340589802e-05, + "loss": 1.0446, "step": 3157 }, { - "epoch": 0.08961407491486946, + "epoch": 0.08948964266485307, "grad_norm": 0.0, - "learning_rate": 1.9814320137792846e-05, - "loss": 1.0616, + "learning_rate": 1.9815014666934397e-05, + "loss": 1.027, "step": 3158 }, { - "epoch": 0.08964245175936436, + "epoch": 0.08951798010711554, "grad_norm": 0.0, - "learning_rate": 1.981414380767536e-05, - "loss": 0.9978, + "learning_rate": 1.981483891060348e-05, + "loss": 1.0981, "step": 3159 }, { - "epoch": 0.08967082860385925, + "epoch": 0.08954631754937799, "grad_norm": 0.0, - "learning_rate": 1.9813967394657363e-05, - "loss": 1.0588, + "learning_rate": 1.9814663071598534e-05, + "loss": 1.211, "step": 3160 }, { - "epoch": 0.08969920544835415, + "epoch": 0.08957465499164045, "grad_norm": 0.0, - "learning_rate": 1.981379089874034e-05, - "loss": 0.9516, + "learning_rate": 1.9814487149921033e-05, + "loss": 1.0152, "step": 3161 }, { - "epoch": 0.08972758229284904, + "epoch": 0.08960299243390292, "grad_norm": 0.0, - "learning_rate": 1.9813614319925785e-05, - "loss": 1.0363, + "learning_rate": 1.981431114557246e-05, + "loss": 0.9996, "step": 3162 }, { - "epoch": 0.08975595913734392, + "epoch": 0.08963132987616537, "grad_norm": 0.0, - "learning_rate": 1.9813437658215192e-05, - "loss": 0.9661, + "learning_rate": 1.9814135058554303e-05, + "loss": 1.0118, "step": 3163 }, { - "epoch": 0.08978433598183883, + "epoch": 0.08965966731842784, "grad_norm": 0.0, - "learning_rate": 1.9813260913610048e-05, - "loss": 1.0234, + "learning_rate": 1.981395888886804e-05, + "loss": 1.0508, "step": 3164 }, { - "epoch": 0.08981271282633371, + "epoch": 0.0896880047606903, "grad_norm": 0.0, - "learning_rate": 1.9813084086111847e-05, - "loss": 0.998, + "learning_rate": 1.9813782636515157e-05, + "loss": 0.9637, "step": 3165 }, { - "epoch": 0.0898410896708286, + "epoch": 0.08971634220295276, "grad_norm": 0.0, - "learning_rate": 1.9812907175722085e-05, - "loss": 0.9939, + "learning_rate": 1.9813606301497138e-05, + "loss": 1.0316, "step": 3166 }, { - "epoch": 0.0898694665153235, + "epoch": 0.08974467964521522, "grad_norm": 0.0, - "learning_rate": 1.9812730182442253e-05, - "loss": 1.0566, + "learning_rate": 1.981342988381547e-05, + "loss": 1.0972, "step": 3167 }, { - "epoch": 0.08989784335981839, + "epoch": 0.08977301708747769, "grad_norm": 0.0, - "learning_rate": 1.9812553106273848e-05, - "loss": 1.0298, + "learning_rate": 1.9813253383471643e-05, + "loss": 1.0892, "step": 3168 }, { - "epoch": 0.08992622020431328, + "epoch": 0.08980135452974014, "grad_norm": 0.0, - "learning_rate": 1.9812375947218366e-05, - "loss": 1.013, + "learning_rate": 1.9813076800467134e-05, + "loss": 1.0829, "step": 3169 }, { - "epoch": 0.08995459704880818, + "epoch": 0.0898296919720026, "grad_norm": 0.0, - "learning_rate": 1.9812198705277306e-05, - "loss": 1.0811, + "learning_rate": 1.981290013480343e-05, + "loss": 1.2329, "step": 3170 }, { - "epoch": 0.08998297389330306, + "epoch": 0.08985802941426507, "grad_norm": 0.0, - "learning_rate": 1.981202138045216e-05, - "loss": 1.0925, + "learning_rate": 1.9812723386482032e-05, + "loss": 1.0954, "step": 3171 }, { - "epoch": 0.09001135073779795, + "epoch": 0.08988636685652752, "grad_norm": 0.0, - "learning_rate": 1.981184397274443e-05, - "loss": 0.9519, + "learning_rate": 1.981254655550442e-05, + "loss": 0.9765, "step": 3172 }, { - "epoch": 0.09003972758229285, + "epoch": 0.08991470429878999, "grad_norm": 0.0, - "learning_rate": 1.9811666482155608e-05, - "loss": 0.9943, + "learning_rate": 1.9812369641872083e-05, + "loss": 0.9642, "step": 3173 }, { - "epoch": 0.09006810442678774, + "epoch": 0.08994304174105246, "grad_norm": 0.0, - "learning_rate": 1.9811488908687204e-05, - "loss": 1.1011, + "learning_rate": 1.981219264558651e-05, + "loss": 1.0875, "step": 3174 }, { - "epoch": 0.09009648127128263, + "epoch": 0.08997137918331491, "grad_norm": 0.0, - "learning_rate": 1.981131125234071e-05, - "loss": 1.041, + "learning_rate": 1.98120155666492e-05, + "loss": 0.9621, "step": 3175 }, { - "epoch": 0.09012485811577753, + "epoch": 0.08999971662557738, "grad_norm": 0.0, - "learning_rate": 1.981113351311763e-05, - "loss": 1.013, + "learning_rate": 1.9811838405061638e-05, + "loss": 1.0229, "step": 3176 }, { - "epoch": 0.09015323496027242, + "epoch": 0.09002805406783984, "grad_norm": 0.0, - "learning_rate": 1.981095569101946e-05, - "loss": 0.9621, + "learning_rate": 1.9811661160825314e-05, + "loss": 1.0797, "step": 3177 }, { - "epoch": 0.0901816118047673, + "epoch": 0.0900563915101023, "grad_norm": 0.0, - "learning_rate": 1.981077778604771e-05, - "loss": 1.1559, + "learning_rate": 1.9811483833941726e-05, + "loss": 1.0211, "step": 3178 }, { - "epoch": 0.0902099886492622, + "epoch": 0.09008472895236476, "grad_norm": 0.0, - "learning_rate": 1.981059979820388e-05, - "loss": 1.0059, + "learning_rate": 1.9811306424412368e-05, + "loss": 1.1318, "step": 3179 }, { - "epoch": 0.09023836549375709, + "epoch": 0.09011306639462723, "grad_norm": 0.0, - "learning_rate": 1.981042172748947e-05, - "loss": 0.959, + "learning_rate": 1.9811128932238733e-05, + "loss": 1.1936, "step": 3180 }, { - "epoch": 0.09026674233825199, + "epoch": 0.09014140383688968, "grad_norm": 0.0, - "learning_rate": 1.9810243573905987e-05, - "loss": 0.9784, + "learning_rate": 1.9810951357422313e-05, + "loss": 1.1418, "step": 3181 }, { - "epoch": 0.09029511918274688, + "epoch": 0.09016974127915214, "grad_norm": 0.0, - "learning_rate": 1.9810065337454934e-05, - "loss": 1.1113, + "learning_rate": 1.981077369996461e-05, + "loss": 1.133, "step": 3182 }, { - "epoch": 0.09032349602724177, + "epoch": 0.09019807872141461, "grad_norm": 0.0, - "learning_rate": 1.9809887018137824e-05, - "loss": 0.9549, + "learning_rate": 1.9810595959867114e-05, + "loss": 1.1461, "step": 3183 }, { - "epoch": 0.09035187287173667, + "epoch": 0.09022641616367706, "grad_norm": 0.0, - "learning_rate": 1.9809708615956152e-05, - "loss": 1.056, + "learning_rate": 1.9810418137131326e-05, + "loss": 1.1681, "step": 3184 }, { - "epoch": 0.09038024971623156, + "epoch": 0.09025475360593953, "grad_norm": 0.0, - "learning_rate": 1.980953013091143e-05, - "loss": 1.1133, + "learning_rate": 1.9810240231758743e-05, + "loss": 1.1524, "step": 3185 }, { - "epoch": 0.09040862656072644, + "epoch": 0.090283091048202, "grad_norm": 0.0, - "learning_rate": 1.9809351563005168e-05, - "loss": 1.1522, + "learning_rate": 1.9810062243750866e-05, + "loss": 1.1247, "step": 3186 }, { - "epoch": 0.09043700340522134, + "epoch": 0.09031142849046445, "grad_norm": 0.0, - "learning_rate": 1.9809172912238872e-05, - "loss": 1.0492, + "learning_rate": 1.980988417310919e-05, + "loss": 1.112, "step": 3187 }, { - "epoch": 0.09046538024971623, + "epoch": 0.09033976593272691, "grad_norm": 0.0, - "learning_rate": 1.9808994178614054e-05, - "loss": 1.1964, + "learning_rate": 1.9809706019835215e-05, + "loss": 1.0931, "step": 3188 }, { - "epoch": 0.09049375709421112, + "epoch": 0.09036810337498938, "grad_norm": 0.0, - "learning_rate": 1.9808815362132216e-05, - "loss": 1.0738, + "learning_rate": 1.9809527783930444e-05, + "loss": 1.2107, "step": 3189 }, { - "epoch": 0.09052213393870602, + "epoch": 0.09039644081725183, "grad_norm": 0.0, - "learning_rate": 1.980863646279488e-05, - "loss": 1.028, + "learning_rate": 1.980934946539638e-05, + "loss": 1.1019, "step": 3190 }, { - "epoch": 0.09055051078320091, + "epoch": 0.0904247782595143, "grad_norm": 0.0, - "learning_rate": 1.9808457480603547e-05, - "loss": 1.0062, + "learning_rate": 1.9809171064234524e-05, + "loss": 1.0751, "step": 3191 }, { - "epoch": 0.0905788876276958, + "epoch": 0.09045311570177676, "grad_norm": 0.0, - "learning_rate": 1.9808278415559732e-05, - "loss": 1.0396, + "learning_rate": 1.9808992580446374e-05, + "loss": 1.1934, "step": 3192 }, { - "epoch": 0.0906072644721907, + "epoch": 0.09048145314403921, "grad_norm": 0.0, - "learning_rate": 1.980809926766495e-05, - "loss": 1.0601, + "learning_rate": 1.9808814014033436e-05, + "loss": 0.9684, "step": 3193 }, { - "epoch": 0.09063564131668558, + "epoch": 0.09050979058630168, "grad_norm": 0.0, - "learning_rate": 1.980792003692071e-05, - "loss": 0.9476, + "learning_rate": 1.9808635364997218e-05, + "loss": 1.14, "step": 3194 }, { - "epoch": 0.09066401816118047, + "epoch": 0.09053812802856415, "grad_norm": 0.0, - "learning_rate": 1.980774072332853e-05, - "loss": 1.0639, + "learning_rate": 1.980845663333922e-05, + "loss": 0.9881, "step": 3195 }, { - "epoch": 0.09069239500567537, + "epoch": 0.0905664654708266, "grad_norm": 0.0, - "learning_rate": 1.9807561326889926e-05, - "loss": 1.1099, + "learning_rate": 1.9808277819060953e-05, + "loss": 1.0559, "step": 3196 }, { - "epoch": 0.09072077185017026, + "epoch": 0.09059480291308906, "grad_norm": 0.0, - "learning_rate": 1.980738184760641e-05, - "loss": 1.0213, + "learning_rate": 1.9808098922163918e-05, + "loss": 1.0278, "step": 3197 }, { - "epoch": 0.09074914869466515, + "epoch": 0.09062314035535153, "grad_norm": 0.0, - "learning_rate": 1.9807202285479493e-05, - "loss": 1.0658, + "learning_rate": 1.980791994264962e-05, + "loss": 1.1498, "step": 3198 }, { - "epoch": 0.09077752553916005, + "epoch": 0.09065147779761398, "grad_norm": 0.0, - "learning_rate": 1.9807022640510704e-05, - "loss": 1.1291, + "learning_rate": 1.9807740880519574e-05, + "loss": 1.1366, "step": 3199 }, { - "epoch": 0.09080590238365494, + "epoch": 0.09067981523987645, "grad_norm": 0.0, - "learning_rate": 1.980684291270155e-05, - "loss": 1.0424, + "learning_rate": 1.9807561735775285e-05, + "loss": 1.1052, "step": 3200 }, { - "epoch": 0.09083427922814984, + "epoch": 0.09070815268213892, "grad_norm": 0.0, - "learning_rate": 1.9806663102053555e-05, - "loss": 1.0102, + "learning_rate": 1.980738250841826e-05, + "loss": 1.0977, "step": 3201 }, { - "epoch": 0.09086265607264472, + "epoch": 0.09073649012440137, "grad_norm": 0.0, - "learning_rate": 1.9806483208568234e-05, - "loss": 1.0331, + "learning_rate": 1.980720319845001e-05, + "loss": 1.113, "step": 3202 }, { - "epoch": 0.09089103291713961, + "epoch": 0.09076482756666383, "grad_norm": 0.0, - "learning_rate": 1.9806303232247112e-05, - "loss": 0.9719, + "learning_rate": 1.9807023805872047e-05, + "loss": 1.0482, "step": 3203 }, { - "epoch": 0.09091940976163451, + "epoch": 0.0907931650089263, "grad_norm": 0.0, - "learning_rate": 1.9806123173091704e-05, - "loss": 1.0991, + "learning_rate": 1.980684433068588e-05, + "loss": 1.204, "step": 3204 }, { - "epoch": 0.0909477866061294, + "epoch": 0.09082150245118875, "grad_norm": 0.0, - "learning_rate": 1.980594303110353e-05, - "loss": 1.0121, + "learning_rate": 1.9806664772893024e-05, + "loss": 1.0219, "step": 3205 }, { - "epoch": 0.09097616345062429, + "epoch": 0.09084983989345122, "grad_norm": 0.0, - "learning_rate": 1.980576280628412e-05, - "loss": 0.9644, + "learning_rate": 1.9806485132494984e-05, + "loss": 1.0367, "step": 3206 }, { - "epoch": 0.09100454029511919, + "epoch": 0.09087817733571368, "grad_norm": 0.0, - "learning_rate": 1.9805582498634987e-05, - "loss": 1.0246, + "learning_rate": 1.980630540949328e-05, + "loss": 1.12, "step": 3207 }, { - "epoch": 0.09103291713961408, + "epoch": 0.09090651477797614, "grad_norm": 0.0, - "learning_rate": 1.9805402108157658e-05, - "loss": 0.9504, + "learning_rate": 1.9806125603889424e-05, + "loss": 0.9457, "step": 3208 }, { - "epoch": 0.09106129398410896, + "epoch": 0.0909348522202386, "grad_norm": 0.0, - "learning_rate": 1.9805221634853656e-05, - "loss": 1.1105, + "learning_rate": 1.9805945715684933e-05, + "loss": 1.182, "step": 3209 }, { - "epoch": 0.09108967082860386, + "epoch": 0.09096318966250107, "grad_norm": 0.0, - "learning_rate": 1.980504107872451e-05, - "loss": 1.0808, + "learning_rate": 1.980576574488132e-05, + "loss": 1.138, "step": 3210 }, { - "epoch": 0.09111804767309875, + "epoch": 0.09099152710476352, "grad_norm": 0.0, - "learning_rate": 1.9804860439771735e-05, - "loss": 1.1361, + "learning_rate": 1.9805585691480098e-05, + "loss": 1.0334, "step": 3211 }, { - "epoch": 0.09114642451759364, + "epoch": 0.09101986454702599, "grad_norm": 0.0, - "learning_rate": 1.9804679717996866e-05, - "loss": 1.1819, + "learning_rate": 1.9805405555482786e-05, + "loss": 1.048, "step": 3212 }, { - "epoch": 0.09117480136208854, + "epoch": 0.09104820198928845, "grad_norm": 0.0, - "learning_rate": 1.980449891340143e-05, - "loss": 1.0068, + "learning_rate": 1.9805225336890905e-05, + "loss": 1.054, "step": 3213 }, { - "epoch": 0.09120317820658343, + "epoch": 0.0910765394315509, "grad_norm": 0.0, - "learning_rate": 1.9804318025986946e-05, - "loss": 1.012, + "learning_rate": 1.9805045035705966e-05, + "loss": 1.0702, "step": 3214 }, { - "epoch": 0.09123155505107831, + "epoch": 0.09110487687381337, "grad_norm": 0.0, - "learning_rate": 1.980413705575495e-05, - "loss": 1.0943, + "learning_rate": 1.9804864651929495e-05, + "loss": 1.1541, "step": 3215 }, { - "epoch": 0.09125993189557322, + "epoch": 0.09113321431607584, "grad_norm": 0.0, - "learning_rate": 1.9803956002706967e-05, - "loss": 1.0037, + "learning_rate": 1.980468418556301e-05, + "loss": 1.2114, "step": 3216 }, { - "epoch": 0.0912883087400681, + "epoch": 0.09116155175833829, "grad_norm": 0.0, - "learning_rate": 1.9803774866844527e-05, - "loss": 1.0651, + "learning_rate": 1.9804503636608026e-05, + "loss": 1.1546, "step": 3217 }, { - "epoch": 0.09131668558456299, + "epoch": 0.09118988920060075, "grad_norm": 0.0, - "learning_rate": 1.980359364816916e-05, - "loss": 1.0483, + "learning_rate": 1.9804323005066066e-05, + "loss": 1.0983, "step": 3218 }, { - "epoch": 0.09134506242905789, + "epoch": 0.09121822664286322, "grad_norm": 0.0, - "learning_rate": 1.9803412346682393e-05, - "loss": 1.0516, + "learning_rate": 1.9804142290938654e-05, + "loss": 1.131, "step": 3219 }, { - "epoch": 0.09137343927355278, + "epoch": 0.09124656408512567, "grad_norm": 0.0, - "learning_rate": 1.9803230962385766e-05, - "loss": 1.0642, + "learning_rate": 1.980396149422731e-05, + "loss": 1.1437, "step": 3220 }, { - "epoch": 0.09140181611804768, + "epoch": 0.09127490152738814, "grad_norm": 0.0, - "learning_rate": 1.9803049495280805e-05, - "loss": 1.0385, + "learning_rate": 1.980378061493356e-05, + "loss": 0.9281, "step": 3221 }, { - "epoch": 0.09143019296254257, + "epoch": 0.0913032389696506, "grad_norm": 0.0, - "learning_rate": 1.9802867945369044e-05, - "loss": 1.1652, + "learning_rate": 1.980359965305892e-05, + "loss": 1.0454, "step": 3222 }, { - "epoch": 0.09145856980703745, + "epoch": 0.09133157641191306, "grad_norm": 0.0, - "learning_rate": 1.980268631265202e-05, - "loss": 1.0547, + "learning_rate": 1.9803418608604926e-05, + "loss": 1.1277, "step": 3223 }, { - "epoch": 0.09148694665153236, + "epoch": 0.09135991385417552, "grad_norm": 0.0, - "learning_rate": 1.9802504597131258e-05, - "loss": 1.058, + "learning_rate": 1.9803237481573096e-05, + "loss": 1.091, "step": 3224 }, { - "epoch": 0.09151532349602724, + "epoch": 0.09138825129643799, "grad_norm": 0.0, - "learning_rate": 1.98023227988083e-05, - "loss": 1.0549, + "learning_rate": 1.9803056271964952e-05, + "loss": 1.1185, "step": 3225 }, { - "epoch": 0.09154370034052213, + "epoch": 0.09141658873870044, "grad_norm": 0.0, - "learning_rate": 1.980214091768468e-05, - "loss": 1.0141, + "learning_rate": 1.980287497978203e-05, + "loss": 1.0383, "step": 3226 }, { - "epoch": 0.09157207718501703, + "epoch": 0.09144492618096291, "grad_norm": 0.0, - "learning_rate": 1.980195895376194e-05, - "loss": 1.0279, + "learning_rate": 1.980269360502585e-05, + "loss": 1.1241, "step": 3227 }, { - "epoch": 0.09160045402951192, + "epoch": 0.09147326362322537, "grad_norm": 0.0, - "learning_rate": 1.980177690704161e-05, - "loss": 0.989, + "learning_rate": 1.980251214769794e-05, + "loss": 1.0319, "step": 3228 }, { - "epoch": 0.0916288308740068, + "epoch": 0.09150160106548783, "grad_norm": 0.0, - "learning_rate": 1.980159477752523e-05, - "loss": 1.0735, + "learning_rate": 1.9802330607799832e-05, + "loss": 1.0914, "step": 3229 }, { - "epoch": 0.09165720771850171, + "epoch": 0.09152993850775029, "grad_norm": 0.0, - "learning_rate": 1.9801412565214338e-05, - "loss": 0.9756, + "learning_rate": 1.980214898533305e-05, + "loss": 1.1389, "step": 3230 }, { - "epoch": 0.0916855845629966, + "epoch": 0.09155827595001276, "grad_norm": 0.0, - "learning_rate": 1.9801230270110474e-05, - "loss": 1.0358, + "learning_rate": 1.980196728029913e-05, + "loss": 1.0461, "step": 3231 }, { - "epoch": 0.09171396140749148, + "epoch": 0.09158661339227521, "grad_norm": 0.0, - "learning_rate": 1.9801047892215176e-05, - "loss": 1.0862, + "learning_rate": 1.98017854926996e-05, + "loss": 1.1457, "step": 3232 }, { - "epoch": 0.09174233825198638, + "epoch": 0.09161495083453768, "grad_norm": 0.0, - "learning_rate": 1.9800865431529988e-05, - "loss": 1.0848, + "learning_rate": 1.980160362253599e-05, + "loss": 1.0156, "step": 3233 }, { - "epoch": 0.09177071509648127, + "epoch": 0.09164328827680014, "grad_norm": 0.0, - "learning_rate": 1.9800682888056446e-05, - "loss": 0.9367, + "learning_rate": 1.9801421669809833e-05, + "loss": 0.9429, "step": 3234 }, { - "epoch": 0.09179909194097616, + "epoch": 0.0916716257190626, "grad_norm": 0.0, - "learning_rate": 1.9800500261796096e-05, - "loss": 1.074, + "learning_rate": 1.980123963452266e-05, + "loss": 1.0626, "step": 3235 }, { - "epoch": 0.09182746878547106, + "epoch": 0.09169996316132506, "grad_norm": 0.0, - "learning_rate": 1.980031755275048e-05, - "loss": 1.1093, + "learning_rate": 1.9801057516676008e-05, + "loss": 1.068, "step": 3236 }, { - "epoch": 0.09185584562996595, + "epoch": 0.09172830060358753, "grad_norm": 0.0, - "learning_rate": 1.9800134760921145e-05, - "loss": 1.0247, + "learning_rate": 1.980087531627141e-05, + "loss": 1.1352, "step": 3237 }, { - "epoch": 0.09188422247446083, + "epoch": 0.09175663804584998, "grad_norm": 0.0, - "learning_rate": 1.9799951886309624e-05, - "loss": 1.0486, + "learning_rate": 1.98006930333104e-05, + "loss": 1.0424, "step": 3238 }, { - "epoch": 0.09191259931895573, + "epoch": 0.09178497548811244, "grad_norm": 0.0, - "learning_rate": 1.9799768928917474e-05, - "loss": 1.036, + "learning_rate": 1.980051066779451e-05, + "loss": 1.1624, "step": 3239 }, { - "epoch": 0.09194097616345062, + "epoch": 0.09181331293037491, "grad_norm": 0.0, - "learning_rate": 1.9799585888746235e-05, - "loss": 1.2516, + "learning_rate": 1.980032821972528e-05, + "loss": 1.061, "step": 3240 }, { - "epoch": 0.09196935300794552, + "epoch": 0.09184165037263736, "grad_norm": 0.0, - "learning_rate": 1.979940276579745e-05, - "loss": 1.0283, + "learning_rate": 1.9800145689104247e-05, + "loss": 1.0277, "step": 3241 }, { - "epoch": 0.09199772985244041, + "epoch": 0.09186998781489983, "grad_norm": 0.0, - "learning_rate": 1.9799219560072673e-05, - "loss": 1.0818, + "learning_rate": 1.979996307593295e-05, + "loss": 1.1172, "step": 3242 }, { - "epoch": 0.0920261066969353, + "epoch": 0.0918983252571623, "grad_norm": 0.0, - "learning_rate": 1.9799036271573447e-05, - "loss": 1.1303, + "learning_rate": 1.9799780380212922e-05, + "loss": 1.1345, "step": 3243 }, { - "epoch": 0.0920544835414302, + "epoch": 0.09192666269942475, "grad_norm": 0.0, - "learning_rate": 1.979885290030132e-05, - "loss": 1.0066, + "learning_rate": 1.97995976019457e-05, + "loss": 1.1045, "step": 3244 }, { - "epoch": 0.09208286038592509, + "epoch": 0.09195500014168721, "grad_norm": 0.0, - "learning_rate": 1.9798669446257844e-05, - "loss": 0.9967, + "learning_rate": 1.9799414741132836e-05, + "loss": 1.1283, "step": 3245 }, { - "epoch": 0.09211123723041997, + "epoch": 0.09198333758394968, "grad_norm": 0.0, - "learning_rate": 1.9798485909444563e-05, - "loss": 1.0948, + "learning_rate": 1.979923179777586e-05, + "loss": 0.9911, "step": 3246 }, { - "epoch": 0.09213961407491487, + "epoch": 0.09201167502621213, "grad_norm": 0.0, - "learning_rate": 1.9798302289863035e-05, - "loss": 1.0991, + "learning_rate": 1.9799048771876316e-05, + "loss": 1.1567, "step": 3247 }, { - "epoch": 0.09216799091940976, + "epoch": 0.0920400124684746, "grad_norm": 0.0, - "learning_rate": 1.9798118587514804e-05, - "loss": 1.0377, + "learning_rate": 1.9798865663435746e-05, + "loss": 1.1348, "step": 3248 }, { - "epoch": 0.09219636776390465, + "epoch": 0.09206834991073706, "grad_norm": 0.0, - "learning_rate": 1.9797934802401425e-05, - "loss": 1.0662, + "learning_rate": 1.9798682472455694e-05, + "loss": 0.9442, "step": 3249 }, { - "epoch": 0.09222474460839955, + "epoch": 0.09209668735299952, "grad_norm": 0.0, - "learning_rate": 1.9797750934524453e-05, - "loss": 0.9948, + "learning_rate": 1.9798499198937696e-05, + "loss": 1.1203, "step": 3250 }, { - "epoch": 0.09225312145289444, + "epoch": 0.09212502479526198, "grad_norm": 0.0, - "learning_rate": 1.9797566983885437e-05, - "loss": 1.0279, + "learning_rate": 1.9798315842883303e-05, + "loss": 1.0555, "step": 3251 }, { - "epoch": 0.09228149829738932, + "epoch": 0.09215336223752445, "grad_norm": 0.0, - "learning_rate": 1.979738295048593e-05, - "loss": 1.028, + "learning_rate": 1.9798132404294057e-05, + "loss": 1.1581, "step": 3252 }, { - "epoch": 0.09230987514188423, + "epoch": 0.0921816996797869, "grad_norm": 0.0, - "learning_rate": 1.9797198834327492e-05, - "loss": 1.099, + "learning_rate": 1.9797948883171503e-05, + "loss": 1.0685, "step": 3253 }, { - "epoch": 0.09233825198637911, + "epoch": 0.09221003712204937, "grad_norm": 0.0, - "learning_rate": 1.9797014635411674e-05, - "loss": 1.0753, + "learning_rate": 1.9797765279517186e-05, + "loss": 1.1183, "step": 3254 }, { - "epoch": 0.092366628830874, + "epoch": 0.09223837456431183, "grad_norm": 0.0, - "learning_rate": 1.9796830353740036e-05, - "loss": 1.086, + "learning_rate": 1.9797581593332657e-05, + "loss": 1.0393, "step": 3255 }, { - "epoch": 0.0923950056753689, + "epoch": 0.09226671200657428, "grad_norm": 0.0, - "learning_rate": 1.9796645989314126e-05, - "loss": 0.9627, + "learning_rate": 1.979739782461946e-05, + "loss": 1.1105, "step": 3256 }, { - "epoch": 0.09242338251986379, + "epoch": 0.09229504944883675, "grad_norm": 0.0, - "learning_rate": 1.979646154213551e-05, - "loss": 1.0598, + "learning_rate": 1.979721397337914e-05, + "loss": 1.0468, "step": 3257 }, { - "epoch": 0.09245175936435868, + "epoch": 0.09232338689109922, "grad_norm": 0.0, - "learning_rate": 1.9796277012205744e-05, - "loss": 1.0856, + "learning_rate": 1.979703003961325e-05, + "loss": 1.0193, "step": 3258 }, { - "epoch": 0.09248013620885358, + "epoch": 0.09235172433336167, "grad_norm": 0.0, - "learning_rate": 1.9796092399526383e-05, - "loss": 1.0626, + "learning_rate": 1.9796846023323336e-05, + "loss": 1.1248, "step": 3259 }, { - "epoch": 0.09250851305334847, + "epoch": 0.09238006177562413, "grad_norm": 0.0, - "learning_rate": 1.979590770409899e-05, - "loss": 0.9382, + "learning_rate": 1.9796661924510952e-05, + "loss": 1.0962, "step": 3260 }, { - "epoch": 0.09253688989784337, + "epoch": 0.0924083992178866, "grad_norm": 0.0, - "learning_rate": 1.9795722925925126e-05, - "loss": 0.9819, + "learning_rate": 1.9796477743177648e-05, + "loss": 1.1765, "step": 3261 }, { - "epoch": 0.09256526674233825, + "epoch": 0.09243673666014905, "grad_norm": 0.0, - "learning_rate": 1.9795538065006348e-05, - "loss": 0.9449, + "learning_rate": 1.9796293479324974e-05, + "loss": 0.9516, "step": 3262 }, { - "epoch": 0.09259364358683314, + "epoch": 0.09246507410241152, "grad_norm": 0.0, - "learning_rate": 1.979535312134422e-05, - "loss": 1.0308, + "learning_rate": 1.979610913295448e-05, + "loss": 1.0213, "step": 3263 }, { - "epoch": 0.09262202043132804, + "epoch": 0.09249341154467398, "grad_norm": 0.0, - "learning_rate": 1.9795168094940303e-05, - "loss": 1.077, + "learning_rate": 1.979592470406772e-05, + "loss": 1.0694, "step": 3264 }, { - "epoch": 0.09265039727582293, + "epoch": 0.09252174898693644, "grad_norm": 0.0, - "learning_rate": 1.979498298579616e-05, - "loss": 1.1018, + "learning_rate": 1.979574019266625e-05, + "loss": 1.1627, "step": 3265 }, { - "epoch": 0.09267877412031782, + "epoch": 0.0925500864291989, "grad_norm": 0.0, - "learning_rate": 1.979479779391336e-05, - "loss": 1.1416, + "learning_rate": 1.9795555598751623e-05, + "loss": 1.0976, "step": 3266 }, { - "epoch": 0.09270715096481272, + "epoch": 0.09257842387146137, "grad_norm": 0.0, - "learning_rate": 1.9794612519293462e-05, - "loss": 1.0382, + "learning_rate": 1.9795370922325396e-05, + "loss": 1.006, "step": 3267 }, { - "epoch": 0.0927355278093076, + "epoch": 0.09260676131372382, "grad_norm": 0.0, - "learning_rate": 1.9794427161938027e-05, - "loss": 1.0165, + "learning_rate": 1.979518616338912e-05, + "loss": 0.9158, "step": 3268 }, { - "epoch": 0.09276390465380249, + "epoch": 0.09263509875598629, "grad_norm": 0.0, - "learning_rate": 1.979424172184863e-05, - "loss": 0.9869, + "learning_rate": 1.9795001321944354e-05, + "loss": 0.9979, "step": 3269 }, { - "epoch": 0.0927922814982974, + "epoch": 0.09266343619824875, "grad_norm": 0.0, - "learning_rate": 1.979405619902683e-05, - "loss": 0.9706, + "learning_rate": 1.9794816397992656e-05, + "loss": 1.1813, "step": 3270 }, { - "epoch": 0.09282065834279228, + "epoch": 0.0926917736405112, "grad_norm": 0.0, - "learning_rate": 1.9793870593474202e-05, - "loss": 1.0291, + "learning_rate": 1.9794631391535576e-05, + "loss": 1.1033, "step": 3271 }, { - "epoch": 0.09284903518728717, + "epoch": 0.09272011108277367, "grad_norm": 0.0, - "learning_rate": 1.9793684905192302e-05, - "loss": 0.9843, + "learning_rate": 1.9794446302574687e-05, + "loss": 1.0755, "step": 3272 }, { - "epoch": 0.09287741203178207, + "epoch": 0.09274844852503614, "grad_norm": 0.0, - "learning_rate": 1.979349913418271e-05, - "loss": 0.9858, + "learning_rate": 1.9794261131111537e-05, + "loss": 1.0145, "step": 3273 }, { - "epoch": 0.09290578887627696, + "epoch": 0.09277678596729859, "grad_norm": 0.0, - "learning_rate": 1.979331328044699e-05, - "loss": 1.2099, + "learning_rate": 1.9794075877147688e-05, + "loss": 1.0116, "step": 3274 }, { - "epoch": 0.09293416572077184, + "epoch": 0.09280512340956105, "grad_norm": 0.0, - "learning_rate": 1.979312734398671e-05, - "loss": 1.172, + "learning_rate": 1.9793890540684698e-05, + "loss": 0.9694, "step": 3275 }, { - "epoch": 0.09296254256526675, + "epoch": 0.09283346085182352, "grad_norm": 0.0, - "learning_rate": 1.9792941324803444e-05, - "loss": 0.9655, + "learning_rate": 1.9793705121724134e-05, + "loss": 1.1041, "step": 3276 }, { - "epoch": 0.09299091940976163, + "epoch": 0.09286179829408597, "grad_norm": 0.0, - "learning_rate": 1.979275522289876e-05, - "loss": 1.0391, + "learning_rate": 1.9793519620267555e-05, + "loss": 1.2185, "step": 3277 }, { - "epoch": 0.09301929625425652, + "epoch": 0.09289013573634844, "grad_norm": 0.0, - "learning_rate": 1.979256903827424e-05, - "loss": 1.1383, + "learning_rate": 1.9793334036316523e-05, + "loss": 1.1447, "step": 3278 }, { - "epoch": 0.09304767309875142, + "epoch": 0.0929184731786109, "grad_norm": 0.0, - "learning_rate": 1.9792382770931443e-05, - "loss": 1.0054, + "learning_rate": 1.97931483698726e-05, + "loss": 1.0361, "step": 3279 }, { - "epoch": 0.09307604994324631, + "epoch": 0.09294681062087336, "grad_norm": 0.0, - "learning_rate": 1.979219642087195e-05, - "loss": 1.0399, + "learning_rate": 1.9792962620937354e-05, + "loss": 1.0216, "step": 3280 }, { - "epoch": 0.09310442678774121, + "epoch": 0.09297514806313582, "grad_norm": 0.0, - "learning_rate": 1.9792009988097334e-05, - "loss": 1.1169, + "learning_rate": 1.9792776789512348e-05, + "loss": 1.0876, "step": 3281 }, { - "epoch": 0.0931328036322361, + "epoch": 0.09300348550539829, "grad_norm": 0.0, - "learning_rate": 1.9791823472609167e-05, - "loss": 1.0315, + "learning_rate": 1.9792590875599144e-05, + "loss": 1.0812, "step": 3282 }, { - "epoch": 0.09316118047673098, + "epoch": 0.09303182294766074, "grad_norm": 0.0, - "learning_rate": 1.9791636874409032e-05, - "loss": 1.0413, + "learning_rate": 1.9792404879199313e-05, + "loss": 0.9881, "step": 3283 }, { - "epoch": 0.09318955732122589, + "epoch": 0.09306016038992321, "grad_norm": 0.0, - "learning_rate": 1.97914501934985e-05, - "loss": 1.0582, + "learning_rate": 1.979221880031442e-05, + "loss": 0.9501, "step": 3284 }, { - "epoch": 0.09321793416572077, + "epoch": 0.09308849783218567, "grad_norm": 0.0, - "learning_rate": 1.979126342987914e-05, - "loss": 1.0698, + "learning_rate": 1.979203263894603e-05, + "loss": 1.1689, "step": 3285 }, { - "epoch": 0.09324631101021566, + "epoch": 0.09311683527444813, "grad_norm": 0.0, - "learning_rate": 1.9791076583552543e-05, - "loss": 1.1348, + "learning_rate": 1.9791846395095715e-05, + "loss": 1.1178, "step": 3286 }, { - "epoch": 0.09327468785471056, + "epoch": 0.09314517271671059, "grad_norm": 0.0, - "learning_rate": 1.9790889654520282e-05, - "loss": 1.1465, + "learning_rate": 1.9791660068765038e-05, + "loss": 1.0817, "step": 3287 }, { - "epoch": 0.09330306469920545, + "epoch": 0.09317351015897306, "grad_norm": 0.0, - "learning_rate": 1.9790702642783938e-05, - "loss": 1.1039, + "learning_rate": 1.9791473659955575e-05, + "loss": 1.0757, "step": 3288 }, { - "epoch": 0.09333144154370034, + "epoch": 0.09320184760123551, "grad_norm": 0.0, - "learning_rate": 1.9790515548345085e-05, - "loss": 1.0408, + "learning_rate": 1.979128716866889e-05, + "loss": 1.004, "step": 3289 }, { - "epoch": 0.09335981838819524, + "epoch": 0.09323018504349798, "grad_norm": 0.0, - "learning_rate": 1.979032837120531e-05, - "loss": 1.1668, + "learning_rate": 1.979110059490656e-05, + "loss": 1.0155, "step": 3290 }, { - "epoch": 0.09338819523269012, + "epoch": 0.09325852248576043, "grad_norm": 0.0, - "learning_rate": 1.979014111136619e-05, - "loss": 1.1219, + "learning_rate": 1.9790913938670155e-05, + "loss": 1.1328, "step": 3291 }, { - "epoch": 0.09341657207718501, + "epoch": 0.0932868599280229, "grad_norm": 0.0, - "learning_rate": 1.9789953768829306e-05, - "loss": 1.1169, + "learning_rate": 1.9790727199961244e-05, + "loss": 0.9713, "step": 3292 }, { - "epoch": 0.09344494892167991, + "epoch": 0.09331519737028536, "grad_norm": 0.0, - "learning_rate": 1.9789766343596244e-05, - "loss": 1.0452, + "learning_rate": 1.9790540378781403e-05, + "loss": 1.0642, "step": 3293 }, { - "epoch": 0.0934733257661748, + "epoch": 0.09334353481254781, "grad_norm": 0.0, - "learning_rate": 1.978957883566859e-05, - "loss": 0.9305, + "learning_rate": 1.9790353475132206e-05, + "loss": 1.1281, "step": 3294 }, { - "epoch": 0.09350170261066969, + "epoch": 0.09337187225481028, "grad_norm": 0.0, - "learning_rate": 1.9789391245047915e-05, - "loss": 1.0405, + "learning_rate": 1.9790166489015223e-05, + "loss": 1.0574, "step": 3295 }, { - "epoch": 0.09353007945516459, + "epoch": 0.09340020969707274, "grad_norm": 0.0, - "learning_rate": 1.9789203571735816e-05, - "loss": 1.0965, + "learning_rate": 1.978997942043203e-05, + "loss": 0.9748, "step": 3296 }, { - "epoch": 0.09355845629965948, + "epoch": 0.0934285471393352, "grad_norm": 0.0, - "learning_rate": 1.9789015815733878e-05, - "loss": 1.0203, + "learning_rate": 1.9789792269384212e-05, + "loss": 1.1106, "step": 3297 }, { - "epoch": 0.09358683314415436, + "epoch": 0.09345688458159766, "grad_norm": 0.0, - "learning_rate": 1.978882797704368e-05, - "loss": 1.0753, + "learning_rate": 1.9789605035873338e-05, + "loss": 1.0826, "step": 3298 }, { - "epoch": 0.09361520998864926, + "epoch": 0.09348522202386013, "grad_norm": 0.0, - "learning_rate": 1.978864005566681e-05, - "loss": 1.1902, + "learning_rate": 1.978941771990098e-05, + "loss": 1.0788, "step": 3299 }, { - "epoch": 0.09364358683314415, + "epoch": 0.09351355946612258, "grad_norm": 0.0, - "learning_rate": 1.978845205160486e-05, - "loss": 1.1082, + "learning_rate": 1.9789230321468725e-05, + "loss": 1.1229, "step": 3300 }, { - "epoch": 0.09367196367763905, + "epoch": 0.09354189690838505, "grad_norm": 0.0, - "learning_rate": 1.9788263964859415e-05, - "loss": 1.0112, + "learning_rate": 1.9789042840578148e-05, + "loss": 1.0075, "step": 3301 }, { - "epoch": 0.09370034052213394, + "epoch": 0.09357023435064751, "grad_norm": 0.0, - "learning_rate": 1.9788075795432065e-05, - "loss": 0.9495, + "learning_rate": 1.9788855277230824e-05, + "loss": 1.15, "step": 3302 }, { - "epoch": 0.09372871736662883, + "epoch": 0.09359857179290997, "grad_norm": 0.0, - "learning_rate": 1.9787887543324397e-05, - "loss": 1.0324, + "learning_rate": 1.978866763142834e-05, + "loss": 0.9116, "step": 3303 }, { - "epoch": 0.09375709421112373, + "epoch": 0.09362690923517243, "grad_norm": 0.0, - "learning_rate": 1.9787699208538e-05, - "loss": 1.1414, + "learning_rate": 1.9788479903172276e-05, + "loss": 1.0754, "step": 3304 }, { - "epoch": 0.09378547105561862, + "epoch": 0.0936552466774349, "grad_norm": 0.0, - "learning_rate": 1.9787510791074475e-05, - "loss": 1.1165, + "learning_rate": 1.9788292092464207e-05, + "loss": 1.1451, "step": 3305 }, { - "epoch": 0.0938138479001135, + "epoch": 0.09368358411969735, "grad_norm": 0.0, - "learning_rate": 1.9787322290935403e-05, - "loss": 1.0354, + "learning_rate": 1.978810419930572e-05, + "loss": 0.8193, "step": 3306 }, { - "epoch": 0.0938422247446084, + "epoch": 0.09371192156195982, "grad_norm": 0.0, - "learning_rate": 1.978713370812238e-05, - "loss": 1.1451, + "learning_rate": 1.9787916223698397e-05, + "loss": 1.0987, "step": 3307 }, { - "epoch": 0.09387060158910329, + "epoch": 0.09374025900422228, "grad_norm": 0.0, - "learning_rate": 1.9786945042636998e-05, - "loss": 1.0158, + "learning_rate": 1.9787728165643822e-05, + "loss": 1.0575, "step": 3308 }, { - "epoch": 0.09389897843359818, + "epoch": 0.09376859644648473, "grad_norm": 0.0, - "learning_rate": 1.9786756294480855e-05, - "loss": 1.0743, + "learning_rate": 1.9787540025143576e-05, + "loss": 1.0443, "step": 3309 }, { - "epoch": 0.09392735527809308, + "epoch": 0.0937969338887472, "grad_norm": 0.0, - "learning_rate": 1.9786567463655536e-05, - "loss": 1.0896, + "learning_rate": 1.978735180219925e-05, + "loss": 1.1397, "step": 3310 }, { - "epoch": 0.09395573212258797, + "epoch": 0.09382527133100967, "grad_norm": 0.0, - "learning_rate": 1.9786378550162645e-05, - "loss": 1.0375, + "learning_rate": 1.978716349681242e-05, + "loss": 1.1447, "step": 3311 }, { - "epoch": 0.09398410896708286, + "epoch": 0.09385360877327212, "grad_norm": 0.0, - "learning_rate": 1.9786189554003773e-05, - "loss": 1.0253, + "learning_rate": 1.978697510898468e-05, + "loss": 1.0484, "step": 3312 }, { - "epoch": 0.09401248581157776, + "epoch": 0.09388194621553458, "grad_norm": 0.0, - "learning_rate": 1.9786000475180516e-05, - "loss": 1.0197, + "learning_rate": 1.9786786638717615e-05, + "loss": 0.987, "step": 3313 }, { - "epoch": 0.09404086265607264, + "epoch": 0.09391028365779705, "grad_norm": 0.0, - "learning_rate": 1.9785811313694477e-05, - "loss": 1.0563, + "learning_rate": 1.978659808601281e-05, + "loss": 0.9798, "step": 3314 }, { - "epoch": 0.09406923950056753, + "epoch": 0.0939386211000595, "grad_norm": 0.0, - "learning_rate": 1.978562206954725e-05, - "loss": 1.0118, + "learning_rate": 1.9786409450871855e-05, + "loss": 1.0741, "step": 3315 }, { - "epoch": 0.09409761634506243, + "epoch": 0.09396695854232197, "grad_norm": 0.0, - "learning_rate": 1.978543274274043e-05, - "loss": 1.083, + "learning_rate": 1.9786220733296343e-05, + "loss": 1.0911, "step": 3316 }, { - "epoch": 0.09412599318955732, + "epoch": 0.09399529598458443, "grad_norm": 0.0, - "learning_rate": 1.9785243333275622e-05, - "loss": 0.9782, + "learning_rate": 1.9786031933287855e-05, + "loss": 1.0642, "step": 3317 }, { - "epoch": 0.0941543700340522, + "epoch": 0.09402363342684689, "grad_norm": 0.0, - "learning_rate": 1.9785053841154426e-05, - "loss": 1.0338, + "learning_rate": 1.9785843050847988e-05, + "loss": 1.0886, "step": 3318 }, { - "epoch": 0.09418274687854711, + "epoch": 0.09405197086910935, "grad_norm": 0.0, - "learning_rate": 1.9784864266378434e-05, - "loss": 1.0688, + "learning_rate": 1.9785654085978334e-05, + "loss": 1.1423, "step": 3319 }, { - "epoch": 0.094211123723042, + "epoch": 0.09408030831137182, "grad_norm": 0.0, - "learning_rate": 1.9784674608949258e-05, - "loss": 0.9292, + "learning_rate": 1.9785465038680474e-05, + "loss": 0.9769, "step": 3320 }, { - "epoch": 0.0942395005675369, + "epoch": 0.09410864575363427, "grad_norm": 0.0, - "learning_rate": 1.9784484868868494e-05, - "loss": 0.9317, + "learning_rate": 1.9785275908956016e-05, + "loss": 1.0896, "step": 3321 }, { - "epoch": 0.09426787741203178, + "epoch": 0.09413698319589674, "grad_norm": 0.0, - "learning_rate": 1.978429504613775e-05, - "loss": 1.0899, + "learning_rate": 1.9785086696806544e-05, + "loss": 0.9945, "step": 3322 }, { - "epoch": 0.09429625425652667, + "epoch": 0.0941653206381592, "grad_norm": 0.0, - "learning_rate": 1.9784105140758623e-05, - "loss": 1.1006, + "learning_rate": 1.9784897402233652e-05, + "loss": 1.0322, "step": 3323 }, { - "epoch": 0.09432463110102157, + "epoch": 0.09419365808042165, "grad_norm": 0.0, - "learning_rate": 1.978391515273272e-05, - "loss": 1.2171, + "learning_rate": 1.9784708025238935e-05, + "loss": 1.068, "step": 3324 }, { - "epoch": 0.09435300794551646, + "epoch": 0.09422199552268412, "grad_norm": 0.0, - "learning_rate": 1.9783725082061646e-05, - "loss": 1.0463, + "learning_rate": 1.9784518565823988e-05, + "loss": 1.0007, "step": 3325 }, { - "epoch": 0.09438138479001135, + "epoch": 0.09425033296494659, "grad_norm": 0.0, - "learning_rate": 1.9783534928747006e-05, - "loss": 0.958, + "learning_rate": 1.9784329023990408e-05, + "loss": 1.0566, "step": 3326 }, { - "epoch": 0.09440976163450625, + "epoch": 0.09427867040720904, "grad_norm": 0.0, - "learning_rate": 1.9783344692790407e-05, - "loss": 0.9923, + "learning_rate": 1.9784139399739794e-05, + "loss": 1.0726, "step": 3327 }, { - "epoch": 0.09443813847900114, + "epoch": 0.0943070078494715, "grad_norm": 0.0, - "learning_rate": 1.9783154374193455e-05, - "loss": 1.1509, + "learning_rate": 1.9783949693073738e-05, + "loss": 0.9866, "step": 3328 }, { - "epoch": 0.09446651532349602, + "epoch": 0.09433534529173397, "grad_norm": 0.0, - "learning_rate": 1.978296397295776e-05, - "loss": 1.0559, + "learning_rate": 1.9783759903993843e-05, + "loss": 1.0289, "step": 3329 }, { - "epoch": 0.09449489216799092, + "epoch": 0.09436368273399642, "grad_norm": 0.0, - "learning_rate": 1.9782773489084927e-05, - "loss": 0.9695, + "learning_rate": 1.97835700325017e-05, + "loss": 1.0341, "step": 3330 }, { - "epoch": 0.09452326901248581, + "epoch": 0.09439202017625889, "grad_norm": 0.0, - "learning_rate": 1.9782582922576567e-05, - "loss": 1.0674, + "learning_rate": 1.978338007859892e-05, + "loss": 1.0627, "step": 3331 }, { - "epoch": 0.0945516458569807, + "epoch": 0.09442035761852136, "grad_norm": 0.0, - "learning_rate": 1.978239227343429e-05, - "loss": 1.019, + "learning_rate": 1.9783190042287093e-05, + "loss": 1.0961, "step": 3332 }, { - "epoch": 0.0945800227014756, + "epoch": 0.09444869506078381, "grad_norm": 0.0, - "learning_rate": 1.9782201541659705e-05, - "loss": 1.0071, + "learning_rate": 1.9782999923567826e-05, + "loss": 0.9663, "step": 3333 }, { - "epoch": 0.09460839954597049, + "epoch": 0.09447703250304627, "grad_norm": 0.0, - "learning_rate": 1.9782010727254427e-05, - "loss": 1.0502, + "learning_rate": 1.9782809722442713e-05, + "loss": 1.0592, "step": 3334 }, { - "epoch": 0.09463677639046537, + "epoch": 0.09450536994530874, "grad_norm": 0.0, - "learning_rate": 1.9781819830220058e-05, - "loss": 1.0178, + "learning_rate": 1.9782619438913365e-05, + "loss": 1.0327, "step": 3335 }, { - "epoch": 0.09466515323496028, + "epoch": 0.09453370738757119, "grad_norm": 0.0, - "learning_rate": 1.9781628850558224e-05, - "loss": 1.0819, + "learning_rate": 1.978242907298138e-05, + "loss": 1.0303, "step": 3336 }, { - "epoch": 0.09469353007945516, + "epoch": 0.09456204482983366, "grad_norm": 0.0, - "learning_rate": 1.978143778827053e-05, - "loss": 1.0248, + "learning_rate": 1.9782238624648363e-05, + "loss": 1.0515, "step": 3337 }, { - "epoch": 0.09472190692395005, + "epoch": 0.09459038227209612, "grad_norm": 0.0, - "learning_rate": 1.978124664335859e-05, - "loss": 0.953, + "learning_rate": 1.9782048093915916e-05, + "loss": 1.136, "step": 3338 }, { - "epoch": 0.09475028376844495, + "epoch": 0.09461871971435858, "grad_norm": 0.0, - "learning_rate": 1.9781055415824015e-05, - "loss": 1.014, + "learning_rate": 1.9781857480785645e-05, + "loss": 1.0947, "step": 3339 }, { - "epoch": 0.09477866061293984, + "epoch": 0.09464705715662104, "grad_norm": 0.0, - "learning_rate": 1.978086410566843e-05, - "loss": 1.0883, + "learning_rate": 1.978166678525916e-05, + "loss": 1.0109, "step": 3340 }, { - "epoch": 0.09480703745743474, + "epoch": 0.09467539459888351, "grad_norm": 0.0, - "learning_rate": 1.9780672712893447e-05, - "loss": 1.0946, + "learning_rate": 1.9781476007338058e-05, + "loss": 1.1552, "step": 3341 }, { - "epoch": 0.09483541430192963, + "epoch": 0.09470373204114596, "grad_norm": 0.0, - "learning_rate": 1.9780481237500682e-05, - "loss": 1.2756, + "learning_rate": 1.9781285147023953e-05, + "loss": 1.0166, "step": 3342 }, { - "epoch": 0.09486379114642451, + "epoch": 0.09473206948340843, "grad_norm": 0.0, - "learning_rate": 1.9780289679491752e-05, - "loss": 0.902, + "learning_rate": 1.9781094204318455e-05, + "loss": 1.0925, "step": 3343 }, { - "epoch": 0.09489216799091942, + "epoch": 0.09476040692567089, "grad_norm": 0.0, - "learning_rate": 1.978009803886827e-05, - "loss": 1.1265, + "learning_rate": 1.978090317922316e-05, + "loss": 1.106, "step": 3344 }, { - "epoch": 0.0949205448354143, + "epoch": 0.09478874436793334, "grad_norm": 0.0, - "learning_rate": 1.977990631563187e-05, - "loss": 0.9805, + "learning_rate": 1.978071207173969e-05, + "loss": 0.9142, "step": 3345 }, { - "epoch": 0.09494892167990919, + "epoch": 0.09481708181019581, "grad_norm": 0.0, - "learning_rate": 1.9779714509784155e-05, - "loss": 0.927, + "learning_rate": 1.9780520881869653e-05, + "loss": 1.0904, "step": 3346 }, { - "epoch": 0.09497729852440409, + "epoch": 0.09484541925245828, "grad_norm": 0.0, - "learning_rate": 1.9779522621326754e-05, - "loss": 1.0476, + "learning_rate": 1.9780329609614654e-05, + "loss": 1.1256, "step": 3347 }, { - "epoch": 0.09500567536889898, + "epoch": 0.09487375669472073, "grad_norm": 0.0, - "learning_rate": 1.9779330650261282e-05, - "loss": 1.0313, + "learning_rate": 1.978013825497631e-05, + "loss": 1.1051, "step": 3348 }, { - "epoch": 0.09503405221339387, + "epoch": 0.0949020941369832, "grad_norm": 0.0, - "learning_rate": 1.977913859658937e-05, - "loss": 1.016, + "learning_rate": 1.9779946817956227e-05, + "loss": 1.1282, "step": 3349 }, { - "epoch": 0.09506242905788877, + "epoch": 0.09493043157924566, "grad_norm": 0.0, - "learning_rate": 1.9778946460312632e-05, - "loss": 1.0275, + "learning_rate": 1.977975529855602e-05, + "loss": 1.0733, "step": 3350 }, { - "epoch": 0.09509080590238365, + "epoch": 0.09495876902150811, "grad_norm": 0.0, - "learning_rate": 1.9778754241432696e-05, - "loss": 1.0253, + "learning_rate": 1.9779563696777303e-05, + "loss": 1.0797, "step": 3351 }, { - "epoch": 0.09511918274687854, + "epoch": 0.09498710646377058, "grad_norm": 0.0, - "learning_rate": 1.9778561939951178e-05, - "loss": 1.0728, + "learning_rate": 1.9779372012621688e-05, + "loss": 1.0021, "step": 3352 }, { - "epoch": 0.09514755959137344, + "epoch": 0.09501544390603305, "grad_norm": 0.0, - "learning_rate": 1.9778369555869714e-05, - "loss": 1.138, + "learning_rate": 1.9779180246090793e-05, + "loss": 1.2443, "step": 3353 }, { - "epoch": 0.09517593643586833, + "epoch": 0.0950437813482955, "grad_norm": 0.0, - "learning_rate": 1.9778177089189917e-05, - "loss": 1.1155, + "learning_rate": 1.977898839718623e-05, + "loss": 1.1248, "step": 3354 }, { - "epoch": 0.09520431328036322, + "epoch": 0.09507211879055796, "grad_norm": 0.0, - "learning_rate": 1.9777984539913423e-05, - "loss": 1.1815, + "learning_rate": 1.977879646590962e-05, + "loss": 1.1069, "step": 3355 }, { - "epoch": 0.09523269012485812, + "epoch": 0.09510045623282043, "grad_norm": 0.0, - "learning_rate": 1.977779190804185e-05, - "loss": 1.1064, + "learning_rate": 1.977860445226257e-05, + "loss": 1.0163, "step": 3356 }, { - "epoch": 0.095261066969353, + "epoch": 0.09512879367508288, "grad_norm": 0.0, - "learning_rate": 1.9777599193576833e-05, - "loss": 1.129, + "learning_rate": 1.977841235624671e-05, + "loss": 1.1124, "step": 3357 }, { - "epoch": 0.0952894438138479, + "epoch": 0.09515713111734535, "grad_norm": 0.0, - "learning_rate": 1.9777406396519997e-05, - "loss": 1.0394, + "learning_rate": 1.977822017786365e-05, + "loss": 1.0461, "step": 3358 }, { - "epoch": 0.0953178206583428, + "epoch": 0.09518546855960781, "grad_norm": 0.0, - "learning_rate": 1.9777213516872965e-05, - "loss": 1.1003, + "learning_rate": 1.9778027917115006e-05, + "loss": 0.9961, "step": 3359 }, { - "epoch": 0.09534619750283768, + "epoch": 0.09521380600187027, "grad_norm": 0.0, - "learning_rate": 1.9777020554637376e-05, - "loss": 1.0303, + "learning_rate": 1.9777835574002405e-05, + "loss": 1.1132, "step": 3360 }, { - "epoch": 0.09537457434733258, + "epoch": 0.09524214344413273, "grad_norm": 0.0, - "learning_rate": 1.977682750981485e-05, - "loss": 0.9908, + "learning_rate": 1.9777643148527464e-05, + "loss": 1.1483, "step": 3361 }, { - "epoch": 0.09540295119182747, + "epoch": 0.0952704808863952, "grad_norm": 0.0, - "learning_rate": 1.9776634382407027e-05, - "loss": 1.0547, + "learning_rate": 1.97774506406918e-05, + "loss": 0.954, "step": 3362 }, { - "epoch": 0.09543132803632236, + "epoch": 0.09529881832865765, "grad_norm": 0.0, - "learning_rate": 1.977644117241553e-05, - "loss": 1.0215, + "learning_rate": 1.9777258050497044e-05, + "loss": 1.0103, "step": 3363 }, { - "epoch": 0.09545970488081726, + "epoch": 0.09532715577092012, "grad_norm": 0.0, - "learning_rate": 1.9776247879841997e-05, - "loss": 1.0409, + "learning_rate": 1.9777065377944812e-05, + "loss": 1.0309, "step": 3364 }, { - "epoch": 0.09548808172531215, + "epoch": 0.09535549321318258, "grad_norm": 0.0, - "learning_rate": 1.9776054504688057e-05, - "loss": 1.0162, + "learning_rate": 1.9776872623036725e-05, + "loss": 1.0269, "step": 3365 }, { - "epoch": 0.09551645856980703, + "epoch": 0.09538383065544503, "grad_norm": 0.0, - "learning_rate": 1.9775861046955347e-05, - "loss": 1.1134, + "learning_rate": 1.9776679785774412e-05, + "loss": 1.0365, "step": 3366 }, { - "epoch": 0.09554483541430193, + "epoch": 0.0954121680977075, "grad_norm": 0.0, - "learning_rate": 1.9775667506645497e-05, - "loss": 1.2161, + "learning_rate": 1.977648686615949e-05, + "loss": 0.9487, "step": 3367 }, { - "epoch": 0.09557321225879682, + "epoch": 0.09544050553996997, "grad_norm": 0.0, - "learning_rate": 1.9775473883760147e-05, - "loss": 1.0745, + "learning_rate": 1.9776293864193594e-05, + "loss": 1.053, "step": 3368 }, { - "epoch": 0.09560158910329171, + "epoch": 0.09546884298223242, "grad_norm": 0.0, - "learning_rate": 1.977528017830093e-05, - "loss": 1.039, + "learning_rate": 1.9776100779878344e-05, + "loss": 1.0973, "step": 3369 }, { - "epoch": 0.09562996594778661, + "epoch": 0.09549718042449488, "grad_norm": 0.0, - "learning_rate": 1.9775086390269476e-05, - "loss": 1.0899, + "learning_rate": 1.9775907613215364e-05, + "loss": 1.111, "step": 3370 }, { - "epoch": 0.0956583427922815, + "epoch": 0.09552551786675735, "grad_norm": 0.0, - "learning_rate": 1.9774892519667433e-05, - "loss": 1.045, + "learning_rate": 1.9775714364206288e-05, + "loss": 1.0477, "step": 3371 }, { - "epoch": 0.09568671963677639, + "epoch": 0.0955538553090198, "grad_norm": 0.0, - "learning_rate": 1.9774698566496435e-05, - "loss": 1.0668, + "learning_rate": 1.9775521032852737e-05, + "loss": 1.1122, "step": 3372 }, { - "epoch": 0.09571509648127129, + "epoch": 0.09558219275128227, "grad_norm": 0.0, - "learning_rate": 1.9774504530758115e-05, - "loss": 1.0602, + "learning_rate": 1.977532761915634e-05, + "loss": 1.144, "step": 3373 }, { - "epoch": 0.09574347332576617, + "epoch": 0.09561053019354473, "grad_norm": 0.0, - "learning_rate": 1.9774310412454117e-05, - "loss": 1.0526, + "learning_rate": 1.9775134123118732e-05, + "loss": 1.0691, "step": 3374 }, { - "epoch": 0.09577185017026106, + "epoch": 0.09563886763580719, "grad_norm": 0.0, - "learning_rate": 1.977411621158608e-05, - "loss": 1.0491, + "learning_rate": 1.9774940544741537e-05, + "loss": 1.0052, "step": 3375 }, { - "epoch": 0.09580022701475596, + "epoch": 0.09566720507806965, "grad_norm": 0.0, - "learning_rate": 1.9773921928155645e-05, - "loss": 0.9114, + "learning_rate": 1.977474688402639e-05, + "loss": 1.1351, "step": 3376 }, { - "epoch": 0.09582860385925085, + "epoch": 0.09569554252033212, "grad_norm": 0.0, - "learning_rate": 1.977372756216445e-05, - "loss": 1.1138, + "learning_rate": 1.9774553140974922e-05, + "loss": 1.0968, "step": 3377 }, { - "epoch": 0.09585698070374574, + "epoch": 0.09572387996259457, "grad_norm": 0.0, - "learning_rate": 1.9773533113614142e-05, - "loss": 1.0186, + "learning_rate": 1.9774359315588758e-05, + "loss": 1.0869, "step": 3378 }, { - "epoch": 0.09588535754824064, + "epoch": 0.09575221740485704, "grad_norm": 0.0, - "learning_rate": 1.9773338582506357e-05, - "loss": 1.0389, + "learning_rate": 1.9774165407869538e-05, + "loss": 1.1705, "step": 3379 }, { - "epoch": 0.09591373439273553, + "epoch": 0.0957805548471195, "grad_norm": 0.0, - "learning_rate": 1.9773143968842743e-05, - "loss": 0.9442, + "learning_rate": 1.9773971417818894e-05, + "loss": 1.0578, "step": 3380 }, { - "epoch": 0.09594211123723043, + "epoch": 0.09580889228938196, "grad_norm": 0.0, - "learning_rate": 1.9772949272624943e-05, - "loss": 0.8875, + "learning_rate": 1.977377734543846e-05, + "loss": 1.0942, "step": 3381 }, { - "epoch": 0.09597048808172531, + "epoch": 0.09583722973164442, "grad_norm": 0.0, - "learning_rate": 1.97727544938546e-05, - "loss": 0.9076, + "learning_rate": 1.977358319072987e-05, + "loss": 1.1325, "step": 3382 }, { - "epoch": 0.0959988649262202, + "epoch": 0.09586556717390689, "grad_norm": 0.0, - "learning_rate": 1.9772559632533362e-05, - "loss": 1.0511, + "learning_rate": 1.9773388953694758e-05, + "loss": 1.1034, "step": 3383 }, { - "epoch": 0.0960272417707151, + "epoch": 0.09589390461616934, "grad_norm": 0.0, - "learning_rate": 1.9772364688662874e-05, - "loss": 0.9625, + "learning_rate": 1.9773194634334764e-05, + "loss": 0.907, "step": 3384 }, { - "epoch": 0.09605561861520999, + "epoch": 0.0959222420584318, "grad_norm": 0.0, - "learning_rate": 1.9772169662244777e-05, - "loss": 1.1696, + "learning_rate": 1.977300023265152e-05, + "loss": 1.077, "step": 3385 }, { - "epoch": 0.09608399545970488, + "epoch": 0.09595057950069427, "grad_norm": 0.0, - "learning_rate": 1.9771974553280727e-05, - "loss": 1.1187, + "learning_rate": 1.9772805748646667e-05, + "loss": 1.2027, "step": 3386 }, { - "epoch": 0.09611237230419978, + "epoch": 0.09597891694295672, "grad_norm": 0.0, - "learning_rate": 1.9771779361772368e-05, - "loss": 0.9477, + "learning_rate": 1.977261118232184e-05, + "loss": 1.0629, "step": 3387 }, { - "epoch": 0.09614074914869467, + "epoch": 0.09600725438521919, "grad_norm": 0.0, - "learning_rate": 1.9771584087721353e-05, - "loss": 1.0238, + "learning_rate": 1.9772416533678683e-05, + "loss": 1.0634, "step": 3388 }, { - "epoch": 0.09616912599318955, + "epoch": 0.09603559182748166, "grad_norm": 0.0, - "learning_rate": 1.9771388731129325e-05, - "loss": 1.0061, + "learning_rate": 1.977222180271883e-05, + "loss": 0.9976, "step": 3389 }, { - "epoch": 0.09619750283768445, + "epoch": 0.09606392926974411, "grad_norm": 0.0, - "learning_rate": 1.9771193291997938e-05, - "loss": 1.0017, + "learning_rate": 1.977202698944393e-05, + "loss": 1.0624, "step": 3390 }, { - "epoch": 0.09622587968217934, + "epoch": 0.09609226671200657, "grad_norm": 0.0, - "learning_rate": 1.977099777032884e-05, - "loss": 1.1331, + "learning_rate": 1.977183209385561e-05, + "loss": 1.0057, "step": 3391 }, { - "epoch": 0.09625425652667423, + "epoch": 0.09612060415426904, "grad_norm": 0.0, - "learning_rate": 1.977080216612369e-05, - "loss": 0.9851, + "learning_rate": 1.9771637115955524e-05, + "loss": 1.0427, "step": 3392 }, { - "epoch": 0.09628263337116913, + "epoch": 0.09614894159653149, "grad_norm": 0.0, - "learning_rate": 1.977060647938413e-05, - "loss": 1.1373, + "learning_rate": 1.977144205574531e-05, + "loss": 1.1241, "step": 3393 }, { - "epoch": 0.09631101021566402, + "epoch": 0.09617727903879396, "grad_norm": 0.0, - "learning_rate": 1.9770410710111817e-05, - "loss": 1.0918, + "learning_rate": 1.977124691322661e-05, + "loss": 1.1173, "step": 3394 }, { - "epoch": 0.0963393870601589, + "epoch": 0.09620561648105642, "grad_norm": 0.0, - "learning_rate": 1.9770214858308407e-05, - "loss": 1.0576, + "learning_rate": 1.9771051688401065e-05, + "loss": 1.0773, "step": 3395 }, { - "epoch": 0.0963677639046538, + "epoch": 0.09623395392331888, "grad_norm": 0.0, - "learning_rate": 1.9770018923975555e-05, - "loss": 1.137, + "learning_rate": 1.977085638127033e-05, + "loss": 0.9946, "step": 3396 }, { - "epoch": 0.09639614074914869, + "epoch": 0.09626229136558134, "grad_norm": 0.0, - "learning_rate": 1.976982290711491e-05, - "loss": 1.11, + "learning_rate": 1.977066099183604e-05, + "loss": 1.0383, "step": 3397 }, { - "epoch": 0.09642451759364358, + "epoch": 0.09629062880784381, "grad_norm": 0.0, - "learning_rate": 1.9769626807728134e-05, - "loss": 1.0186, + "learning_rate": 1.9770465520099843e-05, + "loss": 1.2223, "step": 3398 }, { - "epoch": 0.09645289443813848, + "epoch": 0.09631896625010626, "grad_norm": 0.0, - "learning_rate": 1.9769430625816882e-05, - "loss": 1.1018, + "learning_rate": 1.977026996606339e-05, + "loss": 1.0887, "step": 3399 }, { - "epoch": 0.09648127128263337, + "epoch": 0.09634730369236873, "grad_norm": 0.0, - "learning_rate": 1.976923436138281e-05, - "loss": 1.0255, + "learning_rate": 1.977007432972832e-05, + "loss": 1.0717, "step": 3400 }, { - "epoch": 0.09650964812712827, + "epoch": 0.0963756411346312, "grad_norm": 0.0, - "learning_rate": 1.9769038014427574e-05, - "loss": 1.067, + "learning_rate": 1.976987861109629e-05, + "loss": 0.9318, "step": 3401 }, { - "epoch": 0.09653802497162316, + "epoch": 0.09640397857689365, "grad_norm": 0.0, - "learning_rate": 1.9768841584952837e-05, - "loss": 0.9745, + "learning_rate": 1.9769682810168944e-05, + "loss": 0.9876, "step": 3402 }, { - "epoch": 0.09656640181611804, + "epoch": 0.09643231601915611, "grad_norm": 0.0, - "learning_rate": 1.9768645072960256e-05, - "loss": 1.1588, + "learning_rate": 1.976948692694793e-05, + "loss": 1.0123, "step": 3403 }, { - "epoch": 0.09659477866061295, + "epoch": 0.09646065346141858, "grad_norm": 0.0, - "learning_rate": 1.9768448478451493e-05, - "loss": 1.1032, + "learning_rate": 1.9769290961434904e-05, + "loss": 1.1154, "step": 3404 }, { - "epoch": 0.09662315550510783, + "epoch": 0.09648899090368103, "grad_norm": 0.0, - "learning_rate": 1.9768251801428203e-05, - "loss": 1.006, + "learning_rate": 1.976909491363151e-05, + "loss": 1.0998, "step": 3405 }, { - "epoch": 0.09665153234960272, + "epoch": 0.0965173283459435, "grad_norm": 0.0, - "learning_rate": 1.976805504189205e-05, - "loss": 1.0493, + "learning_rate": 1.97688987835394e-05, + "loss": 1.0666, "step": 3406 }, { - "epoch": 0.09667990919409762, + "epoch": 0.09654566578820596, "grad_norm": 0.0, - "learning_rate": 1.9767858199844697e-05, - "loss": 1.0494, + "learning_rate": 1.976870257116023e-05, + "loss": 1.0893, "step": 3407 }, { - "epoch": 0.09670828603859251, + "epoch": 0.09657400323046841, "grad_norm": 0.0, - "learning_rate": 1.976766127528781e-05, - "loss": 0.9875, + "learning_rate": 1.9768506276495652e-05, + "loss": 1.114, "step": 3408 }, { - "epoch": 0.0967366628830874, + "epoch": 0.09660234067273088, "grad_norm": 0.0, - "learning_rate": 1.9767464268223047e-05, - "loss": 1.0834, + "learning_rate": 1.9768309899547313e-05, + "loss": 1.1288, "step": 3409 }, { - "epoch": 0.0967650397275823, + "epoch": 0.09663067811499335, "grad_norm": 0.0, - "learning_rate": 1.9767267178652075e-05, - "loss": 1.0919, + "learning_rate": 1.9768113440316878e-05, + "loss": 1.0777, "step": 3410 }, { - "epoch": 0.09679341657207718, + "epoch": 0.0966590155572558, "grad_norm": 0.0, - "learning_rate": 1.9767070006576557e-05, - "loss": 1.1204, + "learning_rate": 1.9767916898805994e-05, + "loss": 1.0658, "step": 3411 }, { - "epoch": 0.09682179341657207, + "epoch": 0.09668735299951826, "grad_norm": 0.0, - "learning_rate": 1.9766872751998163e-05, - "loss": 1.0404, + "learning_rate": 1.976772027501632e-05, + "loss": 1.0935, "step": 3412 }, { - "epoch": 0.09685017026106697, + "epoch": 0.09671569044178073, "grad_norm": 0.0, - "learning_rate": 1.9766675414918554e-05, - "loss": 1.0547, + "learning_rate": 1.9767523568949506e-05, + "loss": 1.116, "step": 3413 }, { - "epoch": 0.09687854710556186, + "epoch": 0.09674402788404318, "grad_norm": 0.0, - "learning_rate": 1.9766477995339395e-05, - "loss": 0.905, + "learning_rate": 1.9767326780607218e-05, + "loss": 0.9515, "step": 3414 }, { - "epoch": 0.09690692395005675, + "epoch": 0.09677236532630565, "grad_norm": 0.0, - "learning_rate": 1.9766280493262362e-05, - "loss": 0.9047, + "learning_rate": 1.976712990999111e-05, + "loss": 0.9176, "step": 3415 }, { - "epoch": 0.09693530079455165, + "epoch": 0.09680070276856811, "grad_norm": 0.0, - "learning_rate": 1.9766082908689117e-05, - "loss": 1.0473, + "learning_rate": 1.976693295710284e-05, + "loss": 1.0984, "step": 3416 }, { - "epoch": 0.09696367763904654, + "epoch": 0.09682904021083057, "grad_norm": 0.0, - "learning_rate": 1.976588524162133e-05, - "loss": 0.9252, + "learning_rate": 1.9766735921944064e-05, + "loss": 1.0618, "step": 3417 }, { - "epoch": 0.09699205448354142, + "epoch": 0.09685737765309303, "grad_norm": 0.0, - "learning_rate": 1.9765687492060674e-05, - "loss": 0.9709, + "learning_rate": 1.976653880451645e-05, + "loss": 1.125, "step": 3418 }, { - "epoch": 0.09702043132803632, + "epoch": 0.0968857150953555, "grad_norm": 0.0, - "learning_rate": 1.9765489660008813e-05, - "loss": 1.0221, + "learning_rate": 1.9766341604821646e-05, + "loss": 0.9579, "step": 3419 }, { - "epoch": 0.09704880817253121, + "epoch": 0.09691405253761795, "grad_norm": 0.0, - "learning_rate": 1.9765291745467423e-05, - "loss": 1.0105, + "learning_rate": 1.9766144322861323e-05, + "loss": 1.093, "step": 3420 }, { - "epoch": 0.09707718501702611, + "epoch": 0.09694238997988042, "grad_norm": 0.0, - "learning_rate": 1.9765093748438176e-05, - "loss": 1.0939, + "learning_rate": 1.976594695863714e-05, + "loss": 1.109, "step": 3421 }, { - "epoch": 0.097105561861521, + "epoch": 0.09697072742214288, "grad_norm": 0.0, - "learning_rate": 1.9764895668922743e-05, - "loss": 0.9964, + "learning_rate": 1.976574951215076e-05, + "loss": 1.017, "step": 3422 }, { - "epoch": 0.09713393870601589, + "epoch": 0.09699906486440533, "grad_norm": 0.0, - "learning_rate": 1.9764697506922797e-05, - "loss": 1.0641, + "learning_rate": 1.9765551983403844e-05, + "loss": 0.965, "step": 3423 }, { - "epoch": 0.09716231555051079, + "epoch": 0.0970274023066678, "grad_norm": 0.0, - "learning_rate": 1.9764499262440014e-05, - "loss": 1.1073, + "learning_rate": 1.976535437239806e-05, + "loss": 1.0113, "step": 3424 }, { - "epoch": 0.09719069239500568, + "epoch": 0.09705573974893027, "grad_norm": 0.0, - "learning_rate": 1.9764300935476065e-05, - "loss": 1.0741, + "learning_rate": 1.9765156679135067e-05, + "loss": 1.1984, "step": 3425 }, { - "epoch": 0.09721906923950056, + "epoch": 0.09708407719119272, "grad_norm": 0.0, - "learning_rate": 1.9764102526032625e-05, - "loss": 0.8891, + "learning_rate": 1.9764958903616533e-05, + "loss": 1.0045, "step": 3426 }, { - "epoch": 0.09724744608399546, + "epoch": 0.09711241463345519, "grad_norm": 0.0, - "learning_rate": 1.9763904034111375e-05, - "loss": 1.0419, + "learning_rate": 1.9764761045844128e-05, + "loss": 0.9674, "step": 3427 }, { - "epoch": 0.09727582292849035, + "epoch": 0.09714075207571765, "grad_norm": 0.0, - "learning_rate": 1.9763705459713985e-05, - "loss": 1.0368, + "learning_rate": 1.976456310581951e-05, + "loss": 0.9355, "step": 3428 }, { - "epoch": 0.09730419977298524, + "epoch": 0.0971690895179801, "grad_norm": 0.0, - "learning_rate": 1.9763506802842142e-05, - "loss": 0.9238, + "learning_rate": 1.976436508354435e-05, + "loss": 1.1005, "step": 3429 }, { - "epoch": 0.09733257661748014, + "epoch": 0.09719742696024257, "grad_norm": 0.0, - "learning_rate": 1.976330806349751e-05, - "loss": 0.9627, + "learning_rate": 1.976416697902032e-05, + "loss": 1.1038, "step": 3430 }, { - "epoch": 0.09736095346197503, + "epoch": 0.09722576440250504, "grad_norm": 0.0, - "learning_rate": 1.9763109241681785e-05, - "loss": 1.0133, + "learning_rate": 1.9763968792249087e-05, + "loss": 1.1368, "step": 3431 }, { - "epoch": 0.09738933030646992, + "epoch": 0.09725410184476749, "grad_norm": 0.0, - "learning_rate": 1.976291033739663e-05, - "loss": 1.0398, + "learning_rate": 1.976377052323232e-05, + "loss": 1.1147, "step": 3432 }, { - "epoch": 0.09741770715096482, + "epoch": 0.09728243928702995, "grad_norm": 0.0, - "learning_rate": 1.976271135064374e-05, - "loss": 1.067, + "learning_rate": 1.9763572171971685e-05, + "loss": 1.1428, "step": 3433 }, { - "epoch": 0.0974460839954597, + "epoch": 0.09731077672929242, "grad_norm": 0.0, - "learning_rate": 1.9762512281424778e-05, - "loss": 1.0749, + "learning_rate": 1.9763373738468857e-05, + "loss": 1.0717, "step": 3434 }, { - "epoch": 0.09747446083995459, + "epoch": 0.09733911417155487, "grad_norm": 0.0, - "learning_rate": 1.9762313129741444e-05, - "loss": 1.0556, + "learning_rate": 1.9763175222725507e-05, + "loss": 1.066, "step": 3435 }, { - "epoch": 0.09750283768444949, + "epoch": 0.09736745161381734, "grad_norm": 0.0, - "learning_rate": 1.976211389559541e-05, - "loss": 0.9664, + "learning_rate": 1.9762976624743304e-05, + "loss": 1.1079, "step": 3436 }, { - "epoch": 0.09753121452894438, + "epoch": 0.0973957890560798, "grad_norm": 0.0, - "learning_rate": 1.9761914578988358e-05, - "loss": 0.9712, + "learning_rate": 1.9762777944523926e-05, + "loss": 1.0825, "step": 3437 }, { - "epoch": 0.09755959137343927, + "epoch": 0.09742412649834226, "grad_norm": 0.0, - "learning_rate": 1.976171517992198e-05, - "loss": 1.053, + "learning_rate": 1.9762579182069043e-05, + "loss": 1.0645, "step": 3438 }, { - "epoch": 0.09758796821793417, + "epoch": 0.09745246394060472, "grad_norm": 0.0, - "learning_rate": 1.9761515698397952e-05, - "loss": 1.0278, + "learning_rate": 1.976238033738033e-05, + "loss": 1.1491, "step": 3439 }, { - "epoch": 0.09761634506242906, + "epoch": 0.09748080138286719, "grad_norm": 0.0, - "learning_rate": 1.9761316134417963e-05, - "loss": 1.0391, + "learning_rate": 1.976218141045946e-05, + "loss": 0.9743, "step": 3440 }, { - "epoch": 0.09764472190692396, + "epoch": 0.09750913882512964, "grad_norm": 0.0, - "learning_rate": 1.9761116487983697e-05, - "loss": 1.0753, + "learning_rate": 1.9761982401308116e-05, + "loss": 1.0506, "step": 3441 }, { - "epoch": 0.09767309875141884, + "epoch": 0.0975374762673921, "grad_norm": 0.0, - "learning_rate": 1.9760916759096846e-05, - "loss": 1.1611, + "learning_rate": 1.9761783309927968e-05, + "loss": 1.0476, "step": 3442 }, { - "epoch": 0.09770147559591373, + "epoch": 0.09756581370965457, "grad_norm": 0.0, - "learning_rate": 1.9760716947759086e-05, - "loss": 0.9018, + "learning_rate": 1.976158413632069e-05, + "loss": 1.1582, "step": 3443 }, { - "epoch": 0.09772985244040863, + "epoch": 0.09759415115191702, "grad_norm": 0.0, - "learning_rate": 1.9760517053972114e-05, - "loss": 1.0439, + "learning_rate": 1.9761384880487967e-05, + "loss": 1.0475, "step": 3444 }, { - "epoch": 0.09775822928490352, + "epoch": 0.09762248859417949, "grad_norm": 0.0, - "learning_rate": 1.9760317077737615e-05, - "loss": 1.0625, + "learning_rate": 1.9761185542431475e-05, + "loss": 1.0494, "step": 3445 }, { - "epoch": 0.0977866061293984, + "epoch": 0.09765082603644196, "grad_norm": 0.0, - "learning_rate": 1.976011701905728e-05, - "loss": 0.9885, + "learning_rate": 1.9760986122152896e-05, + "loss": 1.0636, "step": 3446 }, { - "epoch": 0.09781498297389331, + "epoch": 0.09767916347870441, "grad_norm": 0.0, - "learning_rate": 1.9759916877932795e-05, - "loss": 1.0078, + "learning_rate": 1.97607866196539e-05, + "loss": 1.11, "step": 3447 }, { - "epoch": 0.0978433598183882, + "epoch": 0.09770750092096687, "grad_norm": 0.0, - "learning_rate": 1.9759716654365854e-05, - "loss": 1.0072, + "learning_rate": 1.9760587034936175e-05, + "loss": 1.1228, "step": 3448 }, { - "epoch": 0.09787173666288308, + "epoch": 0.09773583836322934, "grad_norm": 0.0, - "learning_rate": 1.975951634835815e-05, - "loss": 1.1088, + "learning_rate": 1.97603873680014e-05, + "loss": 1.1406, "step": 3449 }, { - "epoch": 0.09790011350737798, + "epoch": 0.0977641758054918, "grad_norm": 0.0, - "learning_rate": 1.975931595991137e-05, - "loss": 1.1278, + "learning_rate": 1.9760187618851262e-05, + "loss": 1.1306, "step": 3450 }, { - "epoch": 0.09792849035187287, + "epoch": 0.09779251324775426, "grad_norm": 0.0, - "learning_rate": 1.975911548902721e-05, - "loss": 1.1053, + "learning_rate": 1.9759987787487437e-05, + "loss": 1.1852, "step": 3451 }, { - "epoch": 0.09795686719636776, + "epoch": 0.09782085069001673, "grad_norm": 0.0, - "learning_rate": 1.9758914935707366e-05, - "loss": 1.0845, + "learning_rate": 1.9759787873911608e-05, + "loss": 0.9895, "step": 3452 }, { - "epoch": 0.09798524404086266, + "epoch": 0.09784918813227918, "grad_norm": 0.0, - "learning_rate": 1.9758714299953526e-05, - "loss": 1.0907, + "learning_rate": 1.9759587878125468e-05, + "loss": 1.1215, "step": 3453 }, { - "epoch": 0.09801362088535755, + "epoch": 0.09787752557454164, "grad_norm": 0.0, - "learning_rate": 1.9758513581767388e-05, - "loss": 1.0812, + "learning_rate": 1.975938780013069e-05, + "loss": 1.0945, "step": 3454 }, { - "epoch": 0.09804199772985243, + "epoch": 0.09790586301680411, "grad_norm": 0.0, - "learning_rate": 1.975831278115065e-05, - "loss": 0.8931, + "learning_rate": 1.9759187639928967e-05, + "loss": 1.166, "step": 3455 }, { - "epoch": 0.09807037457434734, + "epoch": 0.09793420045906656, "grad_norm": 0.0, - "learning_rate": 1.9758111898105002e-05, - "loss": 0.9612, + "learning_rate": 1.9758987397521977e-05, + "loss": 1.0244, "step": 3456 }, { - "epoch": 0.09809875141884222, + "epoch": 0.09796253790132903, "grad_norm": 0.0, - "learning_rate": 1.9757910932632142e-05, - "loss": 1.1064, + "learning_rate": 1.975878707291142e-05, + "loss": 1.0898, "step": 3457 }, { - "epoch": 0.09812712826333711, + "epoch": 0.0979908753435915, "grad_norm": 0.0, - "learning_rate": 1.9757709884733774e-05, - "loss": 0.992, + "learning_rate": 1.975858666609897e-05, + "loss": 1.061, "step": 3458 }, { - "epoch": 0.09815550510783201, + "epoch": 0.09801921278585395, "grad_norm": 0.0, - "learning_rate": 1.975750875441159e-05, - "loss": 1.1125, + "learning_rate": 1.9758386177086324e-05, + "loss": 1.0441, "step": 3459 }, { - "epoch": 0.0981838819523269, + "epoch": 0.09804755022811641, "grad_norm": 0.0, - "learning_rate": 1.975730754166729e-05, - "loss": 1.1681, + "learning_rate": 1.9758185605875165e-05, + "loss": 1.1163, "step": 3460 }, { - "epoch": 0.0982122587968218, + "epoch": 0.09807588767037888, "grad_norm": 0.0, - "learning_rate": 1.9757106246502577e-05, - "loss": 0.9614, + "learning_rate": 1.9757984952467186e-05, + "loss": 1.1481, "step": 3461 }, { - "epoch": 0.09824063564131669, + "epoch": 0.09810422511264133, "grad_norm": 0.0, - "learning_rate": 1.9756904868919145e-05, - "loss": 1.0956, + "learning_rate": 1.975778421686408e-05, + "loss": 1.0272, "step": 3462 }, { - "epoch": 0.09826901248581157, + "epoch": 0.0981325625549038, "grad_norm": 0.0, - "learning_rate": 1.9756703408918706e-05, - "loss": 1.1205, + "learning_rate": 1.975758339906753e-05, + "loss": 1.0663, "step": 3463 }, { - "epoch": 0.09829738933030648, + "epoch": 0.09816089999716626, "grad_norm": 0.0, - "learning_rate": 1.9756501866502947e-05, - "loss": 1.1531, + "learning_rate": 1.975738249907923e-05, + "loss": 1.101, "step": 3464 }, { - "epoch": 0.09832576617480136, + "epoch": 0.09818923743942871, "grad_norm": 0.0, - "learning_rate": 1.9756300241673583e-05, - "loss": 1.0279, + "learning_rate": 1.975718151690088e-05, + "loss": 1.0046, "step": 3465 }, { - "epoch": 0.09835414301929625, + "epoch": 0.09821757488169118, "grad_norm": 0.0, - "learning_rate": 1.9756098534432314e-05, - "loss": 0.9157, + "learning_rate": 1.975698045253416e-05, + "loss": 1.1042, "step": 3466 }, { - "epoch": 0.09838251986379115, + "epoch": 0.09824591232395365, "grad_norm": 0.0, - "learning_rate": 1.9755896744780835e-05, - "loss": 1.0191, + "learning_rate": 1.9756779305980775e-05, + "loss": 1.0837, "step": 3467 }, { - "epoch": 0.09841089670828604, + "epoch": 0.0982742497662161, "grad_norm": 0.0, - "learning_rate": 1.9755694872720863e-05, - "loss": 1.0406, + "learning_rate": 1.975657807724241e-05, + "loss": 0.9861, "step": 3468 }, { - "epoch": 0.09843927355278093, + "epoch": 0.09830258720847856, "grad_norm": 0.0, - "learning_rate": 1.97554929182541e-05, - "loss": 1.0674, + "learning_rate": 1.975637676632077e-05, + "loss": 1.0969, "step": 3469 }, { - "epoch": 0.09846765039727583, + "epoch": 0.09833092465074103, "grad_norm": 0.0, - "learning_rate": 1.9755290881382244e-05, - "loss": 1.0881, + "learning_rate": 1.9756175373217547e-05, + "loss": 1.0774, "step": 3470 }, { - "epoch": 0.09849602724177071, + "epoch": 0.09835926209300348, "grad_norm": 0.0, - "learning_rate": 1.975508876210701e-05, - "loss": 1.0353, + "learning_rate": 1.975597389793443e-05, + "loss": 1.077, "step": 3471 }, { - "epoch": 0.0985244040862656, + "epoch": 0.09838759953526595, "grad_norm": 0.0, - "learning_rate": 1.97548865604301e-05, - "loss": 1.111, + "learning_rate": 1.9755772340473124e-05, + "loss": 1.0545, "step": 3472 }, { - "epoch": 0.0985527809307605, + "epoch": 0.09841593697752841, "grad_norm": 0.0, - "learning_rate": 1.9754684276353226e-05, - "loss": 1.0345, + "learning_rate": 1.9755570700835327e-05, + "loss": 1.132, "step": 3473 }, { - "epoch": 0.09858115777525539, + "epoch": 0.09844427441979087, "grad_norm": 0.0, - "learning_rate": 1.97544819098781e-05, - "loss": 1.0751, + "learning_rate": 1.9755368979022734e-05, + "loss": 1.0358, "step": 3474 }, { - "epoch": 0.09860953461975028, + "epoch": 0.09847261186205333, "grad_norm": 0.0, - "learning_rate": 1.9754279461006424e-05, - "loss": 1.0522, + "learning_rate": 1.9755167175037044e-05, + "loss": 1.0256, "step": 3475 }, { - "epoch": 0.09863791146424518, + "epoch": 0.0985009493043158, "grad_norm": 0.0, - "learning_rate": 1.9754076929739906e-05, - "loss": 1.1185, + "learning_rate": 1.975496528887996e-05, + "loss": 1.1766, "step": 3476 }, { - "epoch": 0.09866628830874007, + "epoch": 0.09852928674657825, "grad_norm": 0.0, - "learning_rate": 1.9753874316080268e-05, - "loss": 1.031, + "learning_rate": 1.975476332055318e-05, + "loss": 1.1709, "step": 3477 }, { - "epoch": 0.09869466515323495, + "epoch": 0.09855762418884072, "grad_norm": 0.0, - "learning_rate": 1.9753671620029213e-05, - "loss": 0.9507, + "learning_rate": 1.975456127005841e-05, + "loss": 1.0787, "step": 3478 }, { - "epoch": 0.09872304199772985, + "epoch": 0.09858596163110318, "grad_norm": 0.0, - "learning_rate": 1.9753468841588456e-05, - "loss": 1.0375, + "learning_rate": 1.9754359137397343e-05, + "loss": 1.0323, "step": 3479 }, { - "epoch": 0.09875141884222474, + "epoch": 0.09861429907336564, "grad_norm": 0.0, - "learning_rate": 1.9753265980759707e-05, - "loss": 1.019, + "learning_rate": 1.975415692257169e-05, + "loss": 1.0739, "step": 3480 }, { - "epoch": 0.09877979568671964, + "epoch": 0.0986426365156281, "grad_norm": 0.0, - "learning_rate": 1.9753063037544686e-05, - "loss": 1.1734, + "learning_rate": 1.975395462558315e-05, + "loss": 1.101, "step": 3481 }, { - "epoch": 0.09880817253121453, + "epoch": 0.09867097395789057, "grad_norm": 0.0, - "learning_rate": 1.9752860011945098e-05, - "loss": 1.1365, + "learning_rate": 1.9753752246433427e-05, + "loss": 1.1934, "step": 3482 }, { - "epoch": 0.09883654937570942, + "epoch": 0.09869931140015302, "grad_norm": 0.0, - "learning_rate": 1.9752656903962666e-05, - "loss": 1.034, + "learning_rate": 1.975354978512423e-05, + "loss": 1.1924, "step": 3483 }, { - "epoch": 0.09886492622020432, + "epoch": 0.09872764884241549, "grad_norm": 0.0, - "learning_rate": 1.97524537135991e-05, - "loss": 0.9448, + "learning_rate": 1.9753347241657258e-05, + "loss": 1.2427, "step": 3484 }, { - "epoch": 0.0988933030646992, + "epoch": 0.09875598628467795, "grad_norm": 0.0, - "learning_rate": 1.9752250440856123e-05, - "loss": 1.0301, + "learning_rate": 1.9753144616034224e-05, + "loss": 1.0785, "step": 3485 }, { - "epoch": 0.0989216799091941, + "epoch": 0.0987843237269404, "grad_norm": 0.0, - "learning_rate": 1.9752047085735447e-05, - "loss": 1.0322, + "learning_rate": 1.9752941908256828e-05, + "loss": 1.0203, "step": 3486 }, { - "epoch": 0.098950056753689, + "epoch": 0.09881266116920287, "grad_norm": 0.0, - "learning_rate": 1.975184364823879e-05, - "loss": 1.0555, + "learning_rate": 1.975273911832678e-05, + "loss": 0.9946, "step": 3487 }, { - "epoch": 0.09897843359818388, + "epoch": 0.09884099861146534, "grad_norm": 0.0, - "learning_rate": 1.9751640128367872e-05, - "loss": 1.071, + "learning_rate": 1.9752536246245793e-05, + "loss": 1.1077, "step": 3488 }, { - "epoch": 0.09900681044267877, + "epoch": 0.09886933605372779, "grad_norm": 0.0, - "learning_rate": 1.9751436526124412e-05, - "loss": 1.1312, + "learning_rate": 1.9752333292015565e-05, + "loss": 1.0039, "step": 3489 }, { - "epoch": 0.09903518728717367, + "epoch": 0.09889767349599025, "grad_norm": 0.0, - "learning_rate": 1.975123284151013e-05, - "loss": 1.0601, + "learning_rate": 1.975213025563782e-05, + "loss": 0.9673, "step": 3490 }, { - "epoch": 0.09906356413166856, + "epoch": 0.0989260109382527, "grad_norm": 0.0, - "learning_rate": 1.9751029074526743e-05, - "loss": 1.0803, + "learning_rate": 1.9751927137114255e-05, + "loss": 1.0639, "step": 3491 }, { - "epoch": 0.09909194097616345, + "epoch": 0.09895434838051517, "grad_norm": 0.0, - "learning_rate": 1.9750825225175976e-05, - "loss": 1.0916, + "learning_rate": 1.975172393644659e-05, + "loss": 0.9836, "step": 3492 }, { - "epoch": 0.09912031782065835, + "epoch": 0.09898268582277764, "grad_norm": 0.0, - "learning_rate": 1.975062129345955e-05, - "loss": 1.1354, + "learning_rate": 1.9751520653636528e-05, + "loss": 1.0858, "step": 3493 }, { - "epoch": 0.09914869466515323, + "epoch": 0.09901102326504009, "grad_norm": 0.0, - "learning_rate": 1.975041727937919e-05, - "loss": 0.7789, + "learning_rate": 1.975131728868579e-05, + "loss": 1.0818, "step": 3494 }, { - "epoch": 0.09917707150964812, + "epoch": 0.09903936070730256, "grad_norm": 0.0, - "learning_rate": 1.9750213182936614e-05, - "loss": 0.9893, + "learning_rate": 1.9751113841596087e-05, + "loss": 1.034, "step": 3495 }, { - "epoch": 0.09920544835414302, + "epoch": 0.09906769814956502, "grad_norm": 0.0, - "learning_rate": 1.975000900413355e-05, - "loss": 0.9857, + "learning_rate": 1.975091031236913e-05, + "loss": 1.0846, "step": 3496 }, { - "epoch": 0.09923382519863791, + "epoch": 0.09909603559182747, "grad_norm": 0.0, - "learning_rate": 1.974980474297172e-05, - "loss": 1.0864, + "learning_rate": 1.9750706701006632e-05, + "loss": 1.0581, "step": 3497 }, { - "epoch": 0.0992622020431328, + "epoch": 0.09912437303408994, "grad_norm": 0.0, - "learning_rate": 1.9749600399452852e-05, - "loss": 0.9478, + "learning_rate": 1.9750503007510314e-05, + "loss": 1.1159, "step": 3498 }, { - "epoch": 0.0992905788876277, + "epoch": 0.09915271047635241, "grad_norm": 0.0, - "learning_rate": 1.9749395973578674e-05, - "loss": 1.1232, + "learning_rate": 1.9750299231881887e-05, + "loss": 1.0616, "step": 3499 }, { - "epoch": 0.09931895573212259, + "epoch": 0.09918104791861486, "grad_norm": 0.0, - "learning_rate": 1.974919146535091e-05, - "loss": 1.0239, + "learning_rate": 1.9750095374123067e-05, + "loss": 0.9562, "step": 3500 }, { - "epoch": 0.09934733257661749, + "epoch": 0.09920938536087733, "grad_norm": 0.0, - "learning_rate": 1.9748986874771283e-05, - "loss": 1.0718, + "learning_rate": 1.9749891434235575e-05, + "loss": 1.0468, "step": 3501 }, { - "epoch": 0.09937570942111237, + "epoch": 0.09923772280313979, "grad_norm": 0.0, - "learning_rate": 1.9748782201841528e-05, - "loss": 1.0713, + "learning_rate": 1.974968741222113e-05, + "loss": 1.0079, "step": 3502 }, { - "epoch": 0.09940408626560726, + "epoch": 0.09926606024540224, "grad_norm": 0.0, - "learning_rate": 1.974857744656337e-05, - "loss": 0.9443, + "learning_rate": 1.974948330808144e-05, + "loss": 1.0041, "step": 3503 }, { - "epoch": 0.09943246311010216, + "epoch": 0.09929439768766471, "grad_norm": 0.0, - "learning_rate": 1.974837260893854e-05, - "loss": 1.0094, + "learning_rate": 1.9749279121818235e-05, + "loss": 1.0911, "step": 3504 }, { - "epoch": 0.09946083995459705, + "epoch": 0.09932273512992718, "grad_norm": 0.0, - "learning_rate": 1.974816768896877e-05, - "loss": 1.0887, + "learning_rate": 1.9749074853433236e-05, + "loss": 1.0849, "step": 3505 }, { - "epoch": 0.09948921679909194, + "epoch": 0.09935107257218963, "grad_norm": 0.0, - "learning_rate": 1.974796268665579e-05, - "loss": 1.0668, + "learning_rate": 1.9748870502928155e-05, + "loss": 1.1301, "step": 3506 }, { - "epoch": 0.09951759364358684, + "epoch": 0.0993794100144521, "grad_norm": 0.0, - "learning_rate": 1.974775760200133e-05, - "loss": 1.004, + "learning_rate": 1.9748666070304717e-05, + "loss": 0.9775, "step": 3507 }, { - "epoch": 0.09954597048808173, + "epoch": 0.09940774745671456, "grad_norm": 0.0, - "learning_rate": 1.9747552435007125e-05, - "loss": 0.9909, + "learning_rate": 1.9748461555564647e-05, + "loss": 0.9924, "step": 3508 }, { - "epoch": 0.09957434733257661, + "epoch": 0.09943608489897701, "grad_norm": 0.0, - "learning_rate": 1.9747347185674902e-05, - "loss": 0.975, + "learning_rate": 1.9748256958709666e-05, + "loss": 1.0114, "step": 3509 }, { - "epoch": 0.09960272417707151, + "epoch": 0.09946442234123948, "grad_norm": 0.0, - "learning_rate": 1.9747141854006405e-05, - "loss": 1.0869, + "learning_rate": 1.9748052279741494e-05, + "loss": 1.1147, "step": 3510 }, { - "epoch": 0.0996311010215664, + "epoch": 0.09949275978350194, "grad_norm": 0.0, - "learning_rate": 1.9746936440003357e-05, - "loss": 0.972, + "learning_rate": 1.974784751866186e-05, + "loss": 1.1669, "step": 3511 }, { - "epoch": 0.09965947786606129, + "epoch": 0.0995210972257644, "grad_norm": 0.0, - "learning_rate": 1.97467309436675e-05, - "loss": 1.1942, + "learning_rate": 1.9747642675472484e-05, + "loss": 1.0164, "step": 3512 }, { - "epoch": 0.09968785471055619, + "epoch": 0.09954943466802686, "grad_norm": 0.0, - "learning_rate": 1.9746525365000574e-05, - "loss": 1.0796, + "learning_rate": 1.9747437750175097e-05, + "loss": 1.2054, "step": 3513 }, { - "epoch": 0.09971623155505108, + "epoch": 0.09957777211028933, "grad_norm": 0.0, - "learning_rate": 1.9746319704004305e-05, - "loss": 1.0573, + "learning_rate": 1.974723274277142e-05, + "loss": 1.0868, "step": 3514 }, { - "epoch": 0.09974460839954596, + "epoch": 0.09960610955255178, "grad_norm": 0.0, - "learning_rate": 1.9746113960680435e-05, - "loss": 1.1512, + "learning_rate": 1.9747027653263183e-05, + "loss": 1.1428, "step": 3515 }, { - "epoch": 0.09977298524404087, + "epoch": 0.09963444699481425, "grad_norm": 0.0, - "learning_rate": 1.9745908135030705e-05, - "loss": 1.0711, + "learning_rate": 1.974682248165211e-05, + "loss": 1.1701, "step": 3516 }, { - "epoch": 0.09980136208853575, + "epoch": 0.09966278443707671, "grad_norm": 0.0, - "learning_rate": 1.974570222705685e-05, - "loss": 1.0122, + "learning_rate": 1.9746617227939935e-05, + "loss": 0.983, "step": 3517 }, { - "epoch": 0.09982973893303064, + "epoch": 0.09969112187933916, "grad_norm": 0.0, - "learning_rate": 1.9745496236760608e-05, - "loss": 1.0543, + "learning_rate": 1.9746411892128383e-05, + "loss": 0.972, "step": 3518 }, { - "epoch": 0.09985811577752554, + "epoch": 0.09971945932160163, "grad_norm": 0.0, - "learning_rate": 1.974529016414372e-05, - "loss": 1.1036, + "learning_rate": 1.9746206474219182e-05, + "loss": 1.0782, "step": 3519 }, { - "epoch": 0.09988649262202043, + "epoch": 0.0997477967638641, "grad_norm": 0.0, - "learning_rate": 1.9745084009207934e-05, - "loss": 1.0399, + "learning_rate": 1.9746000974214067e-05, + "loss": 1.0247, "step": 3520 }, { - "epoch": 0.09991486946651533, + "epoch": 0.09977613420612655, "grad_norm": 0.0, - "learning_rate": 1.974487777195498e-05, - "loss": 1.0736, + "learning_rate": 1.974579539211477e-05, + "loss": 1.05, "step": 3521 }, { - "epoch": 0.09994324631101022, + "epoch": 0.09980447164838901, "grad_norm": 0.0, - "learning_rate": 1.974467145238661e-05, - "loss": 1.0114, + "learning_rate": 1.9745589727923014e-05, + "loss": 1.0891, "step": 3522 }, { - "epoch": 0.0999716231555051, + "epoch": 0.09983280909065148, "grad_norm": 0.0, - "learning_rate": 1.9744465050504558e-05, - "loss": 1.247, + "learning_rate": 1.974538398164054e-05, + "loss": 1.0952, "step": 3523 }, { - "epoch": 0.1, + "epoch": 0.09986114653291393, "grad_norm": 0.0, - "learning_rate": 1.9744258566310574e-05, - "loss": 0.9433, + "learning_rate": 1.9745178153269075e-05, + "loss": 1.0352, "step": 3524 }, { - "epoch": 0.10002837684449489, + "epoch": 0.0998894839751764, "grad_norm": 0.0, - "learning_rate": 1.9744051999806397e-05, - "loss": 0.9902, + "learning_rate": 1.974497224281036e-05, + "loss": 1.1188, "step": 3525 }, { - "epoch": 0.10005675368898978, + "epoch": 0.09991782141743887, "grad_norm": 0.0, - "learning_rate": 1.974384535099378e-05, - "loss": 1.0911, + "learning_rate": 1.974476625026612e-05, + "loss": 1.1099, "step": 3526 }, { - "epoch": 0.10008513053348468, + "epoch": 0.09994615885970132, "grad_norm": 0.0, - "learning_rate": 1.9743638619874458e-05, - "loss": 1.06, + "learning_rate": 1.97445601756381e-05, + "loss": 1.0125, "step": 3527 }, { - "epoch": 0.10011350737797957, + "epoch": 0.09997449630196378, "grad_norm": 0.0, - "learning_rate": 1.9743431806450185e-05, - "loss": 1.0044, + "learning_rate": 1.974435401892803e-05, + "loss": 1.0477, "step": 3528 }, { - "epoch": 0.10014188422247446, + "epoch": 0.10000283374422625, "grad_norm": 0.0, - "learning_rate": 1.9743224910722706e-05, - "loss": 1.1276, + "learning_rate": 1.9744147780137644e-05, + "loss": 1.103, "step": 3529 }, { - "epoch": 0.10017026106696936, + "epoch": 0.1000311711864887, "grad_norm": 0.0, - "learning_rate": 1.9743017932693766e-05, - "loss": 1.101, + "learning_rate": 1.974394145926869e-05, + "loss": 1.2067, "step": 3530 }, { - "epoch": 0.10019863791146424, + "epoch": 0.10005950862875117, "grad_norm": 0.0, - "learning_rate": 1.974281087236512e-05, - "loss": 1.0162, + "learning_rate": 1.974373505632289e-05, + "loss": 1.1214, "step": 3531 }, { - "epoch": 0.10022701475595913, + "epoch": 0.10008784607101363, "grad_norm": 0.0, - "learning_rate": 1.9742603729738506e-05, - "loss": 1.086, + "learning_rate": 1.9743528571301996e-05, + "loss": 1.0717, "step": 3532 }, { - "epoch": 0.10025539160045403, + "epoch": 0.10011618351327609, "grad_norm": 0.0, - "learning_rate": 1.9742396504815687e-05, - "loss": 0.9487, + "learning_rate": 1.9743322004207743e-05, + "loss": 0.999, "step": 3533 }, { - "epoch": 0.10028376844494892, + "epoch": 0.10014452095553855, "grad_norm": 0.0, - "learning_rate": 1.97421891975984e-05, - "loss": 1.1104, + "learning_rate": 1.9743115355041868e-05, + "loss": 1.1105, "step": 3534 }, { - "epoch": 0.10031214528944381, + "epoch": 0.10017285839780102, "grad_norm": 0.0, - "learning_rate": 1.974198180808841e-05, - "loss": 1.0964, + "learning_rate": 1.9742908623806117e-05, + "loss": 1.0576, "step": 3535 }, { - "epoch": 0.10034052213393871, + "epoch": 0.10020119584006347, "grad_norm": 0.0, - "learning_rate": 1.9741774336287456e-05, - "loss": 1.0555, + "learning_rate": 1.9742701810502228e-05, + "loss": 1.1117, "step": 3536 }, { - "epoch": 0.1003688989784336, + "epoch": 0.10022953328232594, "grad_norm": 0.0, - "learning_rate": 1.97415667821973e-05, - "loss": 1.0655, + "learning_rate": 1.9742494915131943e-05, + "loss": 1.0069, "step": 3537 }, { - "epoch": 0.1003972758229285, + "epoch": 0.1002578707245884, "grad_norm": 0.0, - "learning_rate": 1.9741359145819688e-05, - "loss": 0.9799, + "learning_rate": 1.9742287937697006e-05, + "loss": 1.0966, "step": 3538 }, { - "epoch": 0.10042565266742338, + "epoch": 0.10028620816685085, "grad_norm": 0.0, - "learning_rate": 1.974115142715638e-05, - "loss": 1.1064, + "learning_rate": 1.974208087819916e-05, + "loss": 1.0849, "step": 3539 }, { - "epoch": 0.10045402951191827, + "epoch": 0.10031454560911332, "grad_norm": 0.0, - "learning_rate": 1.9740943626209127e-05, - "loss": 1.0018, + "learning_rate": 1.974187373664015e-05, + "loss": 1.1066, "step": 3540 }, { - "epoch": 0.10048240635641317, + "epoch": 0.10034288305137579, "grad_norm": 0.0, - "learning_rate": 1.9740735742979688e-05, - "loss": 1.0405, + "learning_rate": 1.9741666513021718e-05, + "loss": 1.0194, "step": 3541 }, { - "epoch": 0.10051078320090806, + "epoch": 0.10037122049363824, "grad_norm": 0.0, - "learning_rate": 1.974052777746981e-05, - "loss": 0.932, + "learning_rate": 1.9741459207345614e-05, + "loss": 1.1118, "step": 3542 }, { - "epoch": 0.10053916004540295, + "epoch": 0.1003995579359007, "grad_norm": 0.0, - "learning_rate": 1.9740319729681262e-05, - "loss": 1.0559, + "learning_rate": 1.974125181961358e-05, + "loss": 1.0517, "step": 3543 }, { - "epoch": 0.10056753688989785, + "epoch": 0.10042789537816317, "grad_norm": 0.0, - "learning_rate": 1.9740111599615793e-05, - "loss": 0.9965, + "learning_rate": 1.9741044349827365e-05, + "loss": 1.138, "step": 3544 }, { - "epoch": 0.10059591373439274, + "epoch": 0.10045623282042562, "grad_norm": 0.0, - "learning_rate": 1.9739903387275165e-05, - "loss": 0.9683, + "learning_rate": 1.9740836797988717e-05, + "loss": 1.0591, "step": 3545 }, { - "epoch": 0.10062429057888762, + "epoch": 0.10048457026268809, "grad_norm": 0.0, - "learning_rate": 1.9739695092661132e-05, - "loss": 1.0754, + "learning_rate": 1.9740629164099384e-05, + "loss": 1.0843, "step": 3546 }, { - "epoch": 0.10065266742338252, + "epoch": 0.10051290770495055, "grad_norm": 0.0, - "learning_rate": 1.9739486715775463e-05, - "loss": 1.0304, + "learning_rate": 1.974042144816111e-05, + "loss": 1.062, "step": 3547 }, { - "epoch": 0.10068104426787741, + "epoch": 0.10054124514721301, "grad_norm": 0.0, - "learning_rate": 1.9739278256619906e-05, - "loss": 1.0106, + "learning_rate": 1.9740213650175653e-05, + "loss": 1.0266, "step": 3548 }, { - "epoch": 0.1007094211123723, + "epoch": 0.10056958258947547, "grad_norm": 0.0, - "learning_rate": 1.9739069715196227e-05, - "loss": 1.1338, + "learning_rate": 1.9740005770144762e-05, + "loss": 1.2122, "step": 3549 }, { - "epoch": 0.1007377979568672, + "epoch": 0.10059792003173794, "grad_norm": 0.0, - "learning_rate": 1.973886109150619e-05, - "loss": 0.9789, + "learning_rate": 1.973979780807018e-05, + "loss": 1.0826, "step": 3550 }, { - "epoch": 0.10076617480136209, + "epoch": 0.10062625747400039, "grad_norm": 0.0, - "learning_rate": 1.973865238555156e-05, - "loss": 1.0524, + "learning_rate": 1.9739589763953665e-05, + "loss": 1.0234, "step": 3551 }, { - "epoch": 0.10079455164585698, + "epoch": 0.10065459491626286, "grad_norm": 0.0, - "learning_rate": 1.973844359733409e-05, - "loss": 1.0058, + "learning_rate": 1.9739381637796972e-05, + "loss": 1.0022, "step": 3552 }, { - "epoch": 0.10082292849035188, + "epoch": 0.10068293235852532, "grad_norm": 0.0, - "learning_rate": 1.9738234726855553e-05, - "loss": 0.9272, + "learning_rate": 1.973917342960185e-05, + "loss": 1.0731, "step": 3553 }, { - "epoch": 0.10085130533484676, + "epoch": 0.10071126980078778, "grad_norm": 0.0, - "learning_rate": 1.9738025774117707e-05, - "loss": 1.0956, + "learning_rate": 1.9738965139370052e-05, + "loss": 1.0465, "step": 3554 }, { - "epoch": 0.10087968217934165, + "epoch": 0.10073960724305024, "grad_norm": 0.0, - "learning_rate": 1.973781673912232e-05, - "loss": 1.0414, + "learning_rate": 1.9738756767103335e-05, + "loss": 1.0931, "step": 3555 }, { - "epoch": 0.10090805902383655, + "epoch": 0.10076794468531271, "grad_norm": 0.0, - "learning_rate": 1.9737607621871157e-05, - "loss": 1.1408, + "learning_rate": 1.9738548312803452e-05, + "loss": 1.0911, "step": 3556 }, { - "epoch": 0.10093643586833144, + "epoch": 0.10079628212757516, "grad_norm": 0.0, - "learning_rate": 1.9737398422365986e-05, - "loss": 1.0978, + "learning_rate": 1.9738339776472162e-05, + "loss": 1.1167, "step": 3557 }, { - "epoch": 0.10096481271282634, + "epoch": 0.10082461956983763, "grad_norm": 0.0, - "learning_rate": 1.973718914060857e-05, - "loss": 1.0461, + "learning_rate": 1.973813115811122e-05, + "loss": 1.1978, "step": 3558 }, { - "epoch": 0.10099318955732123, + "epoch": 0.10085295701210009, "grad_norm": 0.0, - "learning_rate": 1.973697977660068e-05, - "loss": 0.9981, + "learning_rate": 1.973792245772238e-05, + "loss": 0.9921, "step": 3559 }, { - "epoch": 0.10102156640181612, + "epoch": 0.10088129445436254, "grad_norm": 0.0, - "learning_rate": 1.9736770330344084e-05, - "loss": 0.958, + "learning_rate": 1.973771367530741e-05, + "loss": 1.117, "step": 3560 }, { - "epoch": 0.10104994324631102, + "epoch": 0.10090963189662501, "grad_norm": 0.0, - "learning_rate": 1.9736560801840555e-05, - "loss": 1.1153, + "learning_rate": 1.9737504810868055e-05, + "loss": 1.046, "step": 3561 }, { - "epoch": 0.1010783200908059, + "epoch": 0.10093796933888748, "grad_norm": 0.0, - "learning_rate": 1.973635119109186e-05, - "loss": 1.0241, + "learning_rate": 1.9737295864406082e-05, + "loss": 1.1212, "step": 3562 }, { - "epoch": 0.10110669693530079, + "epoch": 0.10096630678114993, "grad_norm": 0.0, - "learning_rate": 1.9736141498099765e-05, - "loss": 0.9738, + "learning_rate": 1.9737086835923252e-05, + "loss": 1.0531, "step": 3563 }, { - "epoch": 0.10113507377979569, + "epoch": 0.1009946442234124, "grad_norm": 0.0, - "learning_rate": 1.9735931722866045e-05, - "loss": 1.1551, + "learning_rate": 1.9736877725421325e-05, + "loss": 1.0668, "step": 3564 }, { - "epoch": 0.10116345062429058, + "epoch": 0.10102298166567486, "grad_norm": 0.0, - "learning_rate": 1.9735721865392473e-05, - "loss": 1.1132, + "learning_rate": 1.973666853290206e-05, + "loss": 1.0375, "step": 3565 }, { - "epoch": 0.10119182746878547, + "epoch": 0.10105131910793731, "grad_norm": 0.0, - "learning_rate": 1.973551192568082e-05, - "loss": 1.0854, + "learning_rate": 1.973645925836722e-05, + "loss": 1.0373, "step": 3566 }, { - "epoch": 0.10122020431328037, + "epoch": 0.10107965655019978, "grad_norm": 0.0, - "learning_rate": 1.9735301903732862e-05, - "loss": 1.0569, + "learning_rate": 1.9736249901818567e-05, + "loss": 1.0182, "step": 3567 }, { - "epoch": 0.10124858115777526, + "epoch": 0.10110799399246224, "grad_norm": 0.0, - "learning_rate": 1.973509179955037e-05, - "loss": 1.0623, + "learning_rate": 1.9736040463257865e-05, + "loss": 1.0484, "step": 3568 }, { - "epoch": 0.10127695800227014, + "epoch": 0.1011363314347247, "grad_norm": 0.0, - "learning_rate": 1.973488161313512e-05, - "loss": 1.1047, + "learning_rate": 1.973583094268688e-05, + "loss": 1.0696, "step": 3569 }, { - "epoch": 0.10130533484676504, + "epoch": 0.10116466887698716, "grad_norm": 0.0, - "learning_rate": 1.9734671344488886e-05, - "loss": 1.137, + "learning_rate": 1.9735621340107376e-05, + "loss": 1.1539, "step": 3570 }, { - "epoch": 0.10133371169125993, + "epoch": 0.10119300631924963, "grad_norm": 0.0, - "learning_rate": 1.973446099361345e-05, - "loss": 0.9609, + "learning_rate": 1.9735411655521116e-05, + "loss": 1.1487, "step": 3571 }, { - "epoch": 0.10136208853575482, + "epoch": 0.10122134376151208, "grad_norm": 0.0, - "learning_rate": 1.973425056051058e-05, - "loss": 1.1196, + "learning_rate": 1.9735201888929873e-05, + "loss": 0.9521, "step": 3572 }, { - "epoch": 0.10139046538024972, + "epoch": 0.10124968120377455, "grad_norm": 0.0, - "learning_rate": 1.973404004518206e-05, - "loss": 1.0605, + "learning_rate": 1.973499204033541e-05, + "loss": 1.0187, "step": 3573 }, { - "epoch": 0.10141884222474461, + "epoch": 0.10127801864603701, "grad_norm": 0.0, - "learning_rate": 1.9733829447629665e-05, - "loss": 1.1011, + "learning_rate": 1.9734782109739488e-05, + "loss": 1.0448, "step": 3574 }, { - "epoch": 0.1014472190692395, + "epoch": 0.10130635608829947, "grad_norm": 0.0, - "learning_rate": 1.9733618767855173e-05, - "loss": 1.0525, + "learning_rate": 1.9734572097143884e-05, + "loss": 1.0934, "step": 3575 }, { - "epoch": 0.1014755959137344, + "epoch": 0.10133469353056193, "grad_norm": 0.0, - "learning_rate": 1.9733408005860372e-05, - "loss": 1.044, + "learning_rate": 1.9734362002550363e-05, + "loss": 0.9802, "step": 3576 }, { - "epoch": 0.10150397275822928, + "epoch": 0.1013630309728244, "grad_norm": 0.0, - "learning_rate": 1.973319716164703e-05, - "loss": 1.0375, + "learning_rate": 1.97341518259607e-05, + "loss": 1.0839, "step": 3577 }, { - "epoch": 0.10153234960272418, + "epoch": 0.10139136841508685, "grad_norm": 0.0, - "learning_rate": 1.9732986235216937e-05, - "loss": 1.0272, + "learning_rate": 1.973394156737666e-05, + "loss": 1.1013, "step": 3578 }, { - "epoch": 0.10156072644721907, + "epoch": 0.10141970585734932, "grad_norm": 0.0, - "learning_rate": 1.973277522657187e-05, - "loss": 1.1305, + "learning_rate": 1.9733731226800016e-05, + "loss": 1.0564, "step": 3579 }, { - "epoch": 0.10158910329171396, + "epoch": 0.10144804329961178, "grad_norm": 0.0, - "learning_rate": 1.9732564135713614e-05, - "loss": 1.0924, + "learning_rate": 1.9733520804232536e-05, + "loss": 1.1415, "step": 3580 }, { - "epoch": 0.10161748013620886, + "epoch": 0.10147638074187423, "grad_norm": 0.0, - "learning_rate": 1.9732352962643952e-05, - "loss": 1.1166, + "learning_rate": 1.9733310299675995e-05, + "loss": 1.0121, "step": 3581 }, { - "epoch": 0.10164585698070375, + "epoch": 0.1015047181841367, "grad_norm": 0.0, - "learning_rate": 1.9732141707364667e-05, - "loss": 1.1133, + "learning_rate": 1.973309971313217e-05, + "loss": 1.0988, "step": 3582 }, { - "epoch": 0.10167423382519863, + "epoch": 0.10153305562639917, "grad_norm": 0.0, - "learning_rate": 1.973193036987754e-05, - "loss": 1.0737, + "learning_rate": 1.9732889044602836e-05, + "loss": 1.1119, "step": 3583 }, { - "epoch": 0.10170261066969354, + "epoch": 0.10156139306866162, "grad_norm": 0.0, - "learning_rate": 1.9731718950184368e-05, - "loss": 1.0994, + "learning_rate": 1.9732678294089756e-05, + "loss": 1.1064, "step": 3584 }, { - "epoch": 0.10173098751418842, + "epoch": 0.10158973051092408, "grad_norm": 0.0, - "learning_rate": 1.973150744828692e-05, - "loss": 1.0409, + "learning_rate": 1.973246746159472e-05, + "loss": 1.1348, "step": 3585 }, { - "epoch": 0.10175936435868331, + "epoch": 0.10161806795318655, "grad_norm": 0.0, - "learning_rate": 1.9731295864186996e-05, - "loss": 0.9987, + "learning_rate": 1.973225654711949e-05, + "loss": 1.0573, "step": 3586 }, { - "epoch": 0.10178774120317821, + "epoch": 0.101646405395449, "grad_norm": 0.0, - "learning_rate": 1.9731084197886376e-05, - "loss": 0.9942, + "learning_rate": 1.9732045550665853e-05, + "loss": 1.1024, "step": 3587 }, { - "epoch": 0.1018161180476731, + "epoch": 0.10167474283771147, "grad_norm": 0.0, - "learning_rate": 1.973087244938685e-05, - "loss": 1.1995, + "learning_rate": 1.973183447223558e-05, + "loss": 1.0648, "step": 3588 }, { - "epoch": 0.10184449489216799, + "epoch": 0.10170308027997393, "grad_norm": 0.0, - "learning_rate": 1.973066061869021e-05, - "loss": 1.1969, + "learning_rate": 1.973162331183045e-05, + "loss": 1.0344, "step": 3589 }, { - "epoch": 0.10187287173666289, + "epoch": 0.10173141772223639, "grad_norm": 0.0, - "learning_rate": 1.973044870579824e-05, - "loss": 1.0117, + "learning_rate": 1.9731412069452248e-05, + "loss": 1.1367, "step": 3590 }, { - "epoch": 0.10190124858115777, + "epoch": 0.10175975516449885, "grad_norm": 0.0, - "learning_rate": 1.9730236710712733e-05, - "loss": 1.0494, + "learning_rate": 1.9731200745102742e-05, + "loss": 0.9938, "step": 3591 }, { - "epoch": 0.10192962542565266, + "epoch": 0.10178809260676132, "grad_norm": 0.0, - "learning_rate": 1.9730024633435478e-05, - "loss": 1.1045, + "learning_rate": 1.9730989338783724e-05, + "loss": 1.0586, "step": 3592 }, { - "epoch": 0.10195800227014756, + "epoch": 0.10181643004902377, "grad_norm": 0.0, - "learning_rate": 1.972981247396827e-05, - "loss": 1.0735, + "learning_rate": 1.9730777850496968e-05, + "loss": 0.972, "step": 3593 }, { - "epoch": 0.10198637911464245, + "epoch": 0.10184476749128624, "grad_norm": 0.0, - "learning_rate": 1.9729600232312896e-05, - "loss": 1.0153, + "learning_rate": 1.9730566280244256e-05, + "loss": 0.9592, "step": 3594 }, { - "epoch": 0.10201475595913734, + "epoch": 0.1018731049335487, "grad_norm": 0.0, - "learning_rate": 1.9729387908471153e-05, - "loss": 1.0352, + "learning_rate": 1.9730354628027372e-05, + "loss": 1.1676, "step": 3595 }, { - "epoch": 0.10204313280363224, + "epoch": 0.10190144237581115, "grad_norm": 0.0, - "learning_rate": 1.9729175502444832e-05, - "loss": 1.0937, + "learning_rate": 1.9730142893848097e-05, + "loss": 1.1302, "step": 3596 }, { - "epoch": 0.10207150964812713, + "epoch": 0.10192977981807362, "grad_norm": 0.0, - "learning_rate": 1.972896301423573e-05, - "loss": 1.1312, + "learning_rate": 1.9729931077708216e-05, + "loss": 1.0425, "step": 3597 }, { - "epoch": 0.10209988649262203, + "epoch": 0.10195811726033609, "grad_norm": 0.0, - "learning_rate": 1.972875044384564e-05, - "loss": 1.0407, + "learning_rate": 1.972971917960951e-05, + "loss": 1.0483, "step": 3598 }, { - "epoch": 0.10212826333711691, + "epoch": 0.10198645470259854, "grad_norm": 0.0, - "learning_rate": 1.9728537791276353e-05, - "loss": 1.0355, + "learning_rate": 1.9729507199553767e-05, + "loss": 1.1092, "step": 3599 }, { - "epoch": 0.1021566401816118, + "epoch": 0.102014792144861, "grad_norm": 0.0, - "learning_rate": 1.9728325056529675e-05, - "loss": 0.9993, + "learning_rate": 1.9729295137542773e-05, + "loss": 1.0671, "step": 3600 }, { - "epoch": 0.1021850170261067, + "epoch": 0.10204312958712347, "grad_norm": 0.0, - "learning_rate": 1.9728112239607394e-05, - "loss": 1.0873, + "learning_rate": 1.9729082993578315e-05, + "loss": 1.1135, "step": 3601 }, { - "epoch": 0.10221339387060159, + "epoch": 0.10207146702938592, "grad_norm": 0.0, - "learning_rate": 1.9727899340511312e-05, - "loss": 1.143, + "learning_rate": 1.9728870767662177e-05, + "loss": 1.0196, "step": 3602 }, { - "epoch": 0.10224177071509648, + "epoch": 0.10209980447164839, "grad_norm": 0.0, - "learning_rate": 1.9727686359243233e-05, - "loss": 0.9604, + "learning_rate": 1.9728658459796143e-05, + "loss": 1.1172, "step": 3603 }, { - "epoch": 0.10227014755959138, + "epoch": 0.10212814191391086, "grad_norm": 0.0, - "learning_rate": 1.9727473295804945e-05, - "loss": 1.0585, + "learning_rate": 1.972844606998201e-05, + "loss": 1.0339, "step": 3604 }, { - "epoch": 0.10229852440408627, + "epoch": 0.10215647935617331, "grad_norm": 0.0, - "learning_rate": 1.9727260150198254e-05, - "loss": 0.9715, + "learning_rate": 1.9728233598221565e-05, + "loss": 1.0825, "step": 3605 }, { - "epoch": 0.10232690124858115, + "epoch": 0.10218481679843577, "grad_norm": 0.0, - "learning_rate": 1.9727046922424956e-05, - "loss": 0.9969, + "learning_rate": 1.972802104451659e-05, + "loss": 1.0265, "step": 3606 }, { - "epoch": 0.10235527809307605, + "epoch": 0.10221315424069824, "grad_norm": 0.0, - "learning_rate": 1.9726833612486864e-05, - "loss": 1.1148, + "learning_rate": 1.972780840886889e-05, + "loss": 1.0859, "step": 3607 }, { - "epoch": 0.10238365493757094, + "epoch": 0.10224149168296069, "grad_norm": 0.0, - "learning_rate": 1.9726620220385762e-05, - "loss": 1.1417, + "learning_rate": 1.972759569128024e-05, + "loss": 1.1656, "step": 3608 }, { - "epoch": 0.10241203178206583, + "epoch": 0.10226982912522316, "grad_norm": 0.0, - "learning_rate": 1.9726406746123464e-05, - "loss": 1.0886, + "learning_rate": 1.9727382891752446e-05, + "loss": 1.0925, "step": 3609 }, { - "epoch": 0.10244040862656073, + "epoch": 0.10229816656748562, "grad_norm": 0.0, - "learning_rate": 1.9726193189701775e-05, - "loss": 1.1069, + "learning_rate": 1.9727170010287287e-05, + "loss": 1.202, "step": 3610 }, { - "epoch": 0.10246878547105562, + "epoch": 0.10232650400974808, "grad_norm": 0.0, - "learning_rate": 1.972597955112249e-05, - "loss": 1.0205, + "learning_rate": 1.9726957046886565e-05, + "loss": 1.0642, "step": 3611 }, { - "epoch": 0.1024971623155505, + "epoch": 0.10235484145201054, "grad_norm": 0.0, - "learning_rate": 1.972576583038742e-05, - "loss": 1.0976, + "learning_rate": 1.9726744001552075e-05, + "loss": 1.0254, "step": 3612 }, { - "epoch": 0.1025255391600454, + "epoch": 0.10238317889427301, "grad_norm": 0.0, - "learning_rate": 1.972555202749837e-05, - "loss": 0.9735, + "learning_rate": 1.9726530874285602e-05, + "loss": 0.9982, "step": 3613 }, { - "epoch": 0.1025539160045403, + "epoch": 0.10241151633653546, "grad_norm": 0.0, - "learning_rate": 1.9725338142457145e-05, - "loss": 1.0559, + "learning_rate": 1.9726317665088953e-05, + "loss": 1.062, "step": 3614 }, { - "epoch": 0.10258229284903518, + "epoch": 0.10243985377879793, "grad_norm": 0.0, - "learning_rate": 1.972512417526555e-05, - "loss": 1.0803, + "learning_rate": 1.9726104373963916e-05, + "loss": 0.9914, "step": 3615 }, { - "epoch": 0.10261066969353008, + "epoch": 0.10246819122106039, "grad_norm": 0.0, - "learning_rate": 1.9724910125925395e-05, - "loss": 1.0328, + "learning_rate": 1.9725891000912294e-05, + "loss": 1.1086, "step": 3616 }, { - "epoch": 0.10263904653802497, + "epoch": 0.10249652866332284, "grad_norm": 0.0, - "learning_rate": 1.9724695994438487e-05, - "loss": 1.0627, + "learning_rate": 1.9725677545935876e-05, + "loss": 1.1443, "step": 3617 }, { - "epoch": 0.10266742338251987, + "epoch": 0.10252486610558531, "grad_norm": 0.0, - "learning_rate": 1.9724481780806635e-05, - "loss": 1.055, + "learning_rate": 1.9725464009036467e-05, + "loss": 1.0017, "step": 3618 }, { - "epoch": 0.10269580022701476, + "epoch": 0.10255320354784778, "grad_norm": 0.0, - "learning_rate": 1.972426748503165e-05, - "loss": 0.9743, + "learning_rate": 1.9725250390215863e-05, + "loss": 0.964, "step": 3619 }, { - "epoch": 0.10272417707150965, + "epoch": 0.10258154099011023, "grad_norm": 0.0, - "learning_rate": 1.9724053107115337e-05, - "loss": 0.9743, + "learning_rate": 1.972503668947586e-05, + "loss": 0.9787, "step": 3620 }, { - "epoch": 0.10275255391600455, + "epoch": 0.1026098784323727, "grad_norm": 0.0, - "learning_rate": 1.972383864705951e-05, - "loss": 1.0058, + "learning_rate": 1.9724822906818265e-05, + "loss": 1.1158, "step": 3621 }, { - "epoch": 0.10278093076049943, + "epoch": 0.10263821587463516, "grad_norm": 0.0, - "learning_rate": 1.972362410486598e-05, - "loss": 1.0513, + "learning_rate": 1.9724609042244876e-05, + "loss": 1.1319, "step": 3622 }, { - "epoch": 0.10280930760499432, + "epoch": 0.10266655331689761, "grad_norm": 0.0, - "learning_rate": 1.9723409480536566e-05, - "loss": 1.0341, + "learning_rate": 1.9724395095757495e-05, + "loss": 1.0278, "step": 3623 }, { - "epoch": 0.10283768444948922, + "epoch": 0.10269489075916008, "grad_norm": 0.0, - "learning_rate": 1.9723194774073068e-05, - "loss": 1.0746, + "learning_rate": 1.9724181067357918e-05, + "loss": 1.0162, "step": 3624 }, { - "epoch": 0.10286606129398411, + "epoch": 0.10272322820142255, "grad_norm": 0.0, - "learning_rate": 1.9722979985477315e-05, - "loss": 1.0501, + "learning_rate": 1.9723966957047955e-05, + "loss": 1.0199, "step": 3625 }, { - "epoch": 0.102894438138479, + "epoch": 0.102751565643685, "grad_norm": 0.0, - "learning_rate": 1.9722765114751105e-05, - "loss": 1.0183, + "learning_rate": 1.972375276482941e-05, + "loss": 1.0909, "step": 3626 }, { - "epoch": 0.1029228149829739, + "epoch": 0.10277990308594746, "grad_norm": 0.0, - "learning_rate": 1.9722550161896263e-05, - "loss": 1.0383, + "learning_rate": 1.972353849070408e-05, + "loss": 1.0604, "step": 3627 }, { - "epoch": 0.10295119182746879, + "epoch": 0.10280824052820993, "grad_norm": 0.0, - "learning_rate": 1.9722335126914607e-05, - "loss": 1.0457, + "learning_rate": 1.972332413467378e-05, + "loss": 0.9317, "step": 3628 }, { - "epoch": 0.10297956867196367, + "epoch": 0.10283657797047238, "grad_norm": 0.0, - "learning_rate": 1.9722120009807947e-05, - "loss": 1.0242, + "learning_rate": 1.972310969674031e-05, + "loss": 1.0945, "step": 3629 }, { - "epoch": 0.10300794551645857, + "epoch": 0.10286491541273485, "grad_norm": 0.0, - "learning_rate": 1.97219048105781e-05, - "loss": 0.9679, + "learning_rate": 1.972289517690547e-05, + "loss": 1.052, "step": 3630 }, { - "epoch": 0.10303632236095346, + "epoch": 0.10289325285499731, "grad_norm": 0.0, - "learning_rate": 1.9721689529226885e-05, - "loss": 1.0583, + "learning_rate": 1.9722680575171077e-05, + "loss": 1.1219, "step": 3631 }, { - "epoch": 0.10306469920544835, + "epoch": 0.10292159029725977, "grad_norm": 0.0, - "learning_rate": 1.9721474165756124e-05, - "loss": 1.0414, + "learning_rate": 1.9722465891538935e-05, + "loss": 1.16, "step": 3632 }, { - "epoch": 0.10309307604994325, + "epoch": 0.10294992773952223, "grad_norm": 0.0, - "learning_rate": 1.9721258720167634e-05, - "loss": 1.0932, + "learning_rate": 1.9722251126010854e-05, + "loss": 1.1703, "step": 3633 }, { - "epoch": 0.10312145289443814, + "epoch": 0.1029782651817847, "grad_norm": 0.0, - "learning_rate": 1.9721043192463234e-05, - "loss": 0.9842, + "learning_rate": 1.9722036278588642e-05, + "loss": 1.0697, "step": 3634 }, { - "epoch": 0.10314982973893302, + "epoch": 0.10300660262404715, "grad_norm": 0.0, - "learning_rate": 1.9720827582644745e-05, - "loss": 1.0837, + "learning_rate": 1.9721821349274102e-05, + "loss": 1.0873, "step": 3635 }, { - "epoch": 0.10317820658342793, + "epoch": 0.10303494006630962, "grad_norm": 0.0, - "learning_rate": 1.9720611890713988e-05, - "loss": 0.9911, + "learning_rate": 1.9721606338069058e-05, + "loss": 1.1411, "step": 3636 }, { - "epoch": 0.10320658342792281, + "epoch": 0.10306327750857208, "grad_norm": 0.0, - "learning_rate": 1.9720396116672786e-05, - "loss": 0.9745, + "learning_rate": 1.972139124497531e-05, + "loss": 1.1768, "step": 3637 }, { - "epoch": 0.10323496027241771, + "epoch": 0.10309161495083453, "grad_norm": 0.0, - "learning_rate": 1.9720180260522964e-05, - "loss": 1.149, + "learning_rate": 1.9721176069994677e-05, + "loss": 1.1022, "step": 3638 }, { - "epoch": 0.1032633371169126, + "epoch": 0.103119952393097, "grad_norm": 0.0, - "learning_rate": 1.9719964322266335e-05, - "loss": 1.1184, + "learning_rate": 1.9720960813128966e-05, + "loss": 1.0727, "step": 3639 }, { - "epoch": 0.10329171396140749, + "epoch": 0.10314828983535947, "grad_norm": 0.0, - "learning_rate": 1.9719748301904736e-05, - "loss": 1.0411, + "learning_rate": 1.972074547437999e-05, + "loss": 1.0517, "step": 3640 }, { - "epoch": 0.10332009080590239, + "epoch": 0.10317662727762192, "grad_norm": 0.0, - "learning_rate": 1.9719532199439986e-05, - "loss": 1.1326, + "learning_rate": 1.972053005374957e-05, + "loss": 1.0936, "step": 3641 }, { - "epoch": 0.10334846765039728, + "epoch": 0.10320496471988438, "grad_norm": 0.0, - "learning_rate": 1.971931601487391e-05, - "loss": 1.0947, + "learning_rate": 1.972031455123951e-05, + "loss": 1.0961, "step": 3642 }, { - "epoch": 0.10337684449489216, + "epoch": 0.10323330216214685, "grad_norm": 0.0, - "learning_rate": 1.9719099748208332e-05, - "loss": 1.0917, + "learning_rate": 1.9720098966851635e-05, + "loss": 1.032, "step": 3643 }, { - "epoch": 0.10340522133938707, + "epoch": 0.1032616396044093, "grad_norm": 0.0, - "learning_rate": 1.9718883399445087e-05, - "loss": 0.9757, + "learning_rate": 1.9719883300587755e-05, + "loss": 1.0193, "step": 3644 }, { - "epoch": 0.10343359818388195, + "epoch": 0.10328997704667177, "grad_norm": 0.0, - "learning_rate": 1.9718666968585992e-05, - "loss": 1.0706, + "learning_rate": 1.971966755244969e-05, + "loss": 1.0495, "step": 3645 }, { - "epoch": 0.10346197502837684, + "epoch": 0.10331831448893423, "grad_norm": 0.0, - "learning_rate": 1.9718450455632884e-05, - "loss": 1.0762, + "learning_rate": 1.9719451722439255e-05, + "loss": 0.9925, "step": 3646 }, { - "epoch": 0.10349035187287174, + "epoch": 0.10334665193119669, "grad_norm": 0.0, - "learning_rate": 1.9718233860587586e-05, - "loss": 1.0131, + "learning_rate": 1.971923581055827e-05, + "loss": 1.1706, "step": 3647 }, { - "epoch": 0.10351872871736663, + "epoch": 0.10337498937345915, "grad_norm": 0.0, - "learning_rate": 1.971801718345193e-05, - "loss": 1.1465, + "learning_rate": 1.9719019816808553e-05, + "loss": 1.0163, "step": 3648 }, { - "epoch": 0.10354710556186152, + "epoch": 0.10340332681572162, "grad_norm": 0.0, - "learning_rate": 1.9717800424227744e-05, - "loss": 0.9613, + "learning_rate": 1.9718803741191918e-05, + "loss": 1.0653, "step": 3649 }, { - "epoch": 0.10357548240635642, + "epoch": 0.10343166425798407, "grad_norm": 0.0, - "learning_rate": 1.9717583582916862e-05, - "loss": 1.0522, + "learning_rate": 1.9718587583710196e-05, + "loss": 1.0804, "step": 3650 }, { - "epoch": 0.1036038592508513, + "epoch": 0.10346000170024654, "grad_norm": 0.0, - "learning_rate": 1.971736665952112e-05, - "loss": 0.9089, + "learning_rate": 1.97183713443652e-05, + "loss": 1.0573, "step": 3651 }, { - "epoch": 0.10363223609534619, + "epoch": 0.103488339142509, "grad_norm": 0.0, - "learning_rate": 1.9717149654042337e-05, - "loss": 0.994, + "learning_rate": 1.9718155023158752e-05, + "loss": 0.9924, "step": 3652 }, { - "epoch": 0.10366061293984109, + "epoch": 0.10351667658477146, "grad_norm": 0.0, - "learning_rate": 1.971693256648236e-05, - "loss": 1.1401, + "learning_rate": 1.971793862009268e-05, + "loss": 1.066, "step": 3653 }, { - "epoch": 0.10368898978433598, + "epoch": 0.10354501402703392, "grad_norm": 0.0, - "learning_rate": 1.971671539684301e-05, - "loss": 1.0274, + "learning_rate": 1.9717722135168796e-05, + "loss": 1.1921, "step": 3654 }, { - "epoch": 0.10371736662883087, + "epoch": 0.10357335146929639, "grad_norm": 0.0, - "learning_rate": 1.9716498145126134e-05, - "loss": 1.0338, + "learning_rate": 1.9717505568388936e-05, + "loss": 1.1383, "step": 3655 }, { - "epoch": 0.10374574347332577, + "epoch": 0.10360168891155884, "grad_norm": 0.0, - "learning_rate": 1.9716280811333563e-05, - "loss": 0.9206, + "learning_rate": 1.9717288919754912e-05, + "loss": 1.0003, "step": 3656 }, { - "epoch": 0.10377412031782066, + "epoch": 0.1036300263538213, "grad_norm": 0.0, - "learning_rate": 1.9716063395467128e-05, - "loss": 1.1053, + "learning_rate": 1.9717072189268558e-05, + "loss": 1.1339, "step": 3657 }, { - "epoch": 0.10380249716231556, + "epoch": 0.10365836379608377, "grad_norm": 0.0, - "learning_rate": 1.9715845897528666e-05, - "loss": 0.9131, + "learning_rate": 1.9716855376931696e-05, + "loss": 1.0485, "step": 3658 }, { - "epoch": 0.10383087400681044, + "epoch": 0.10368670123834622, "grad_norm": 0.0, - "learning_rate": 1.9715628317520022e-05, - "loss": 1.0494, + "learning_rate": 1.9716638482746155e-05, + "loss": 0.9912, "step": 3659 }, { - "epoch": 0.10385925085130533, + "epoch": 0.10371503868060869, "grad_norm": 0.0, - "learning_rate": 1.9715410655443027e-05, - "loss": 1.0027, + "learning_rate": 1.9716421506713758e-05, + "loss": 1.1128, "step": 3660 }, { - "epoch": 0.10388762769580023, + "epoch": 0.10374337612287116, "grad_norm": 0.0, - "learning_rate": 1.971519291129952e-05, - "loss": 0.998, + "learning_rate": 1.9716204448836335e-05, + "loss": 1.0812, "step": 3661 }, { - "epoch": 0.10391600454029512, + "epoch": 0.10377171356513361, "grad_norm": 0.0, - "learning_rate": 1.9714975085091343e-05, - "loss": 1.1465, + "learning_rate": 1.9715987309115713e-05, + "loss": 1.0742, "step": 3662 }, { - "epoch": 0.10394438138479001, + "epoch": 0.10380005100739607, "grad_norm": 0.0, - "learning_rate": 1.9714757176820336e-05, - "loss": 1.0056, + "learning_rate": 1.9715770087553723e-05, + "loss": 1.1678, "step": 3663 }, { - "epoch": 0.10397275822928491, + "epoch": 0.10382838844965854, "grad_norm": 0.0, - "learning_rate": 1.9714539186488336e-05, - "loss": 0.9564, + "learning_rate": 1.9715552784152193e-05, + "loss": 1.0779, "step": 3664 }, { - "epoch": 0.1040011350737798, + "epoch": 0.10385672589192099, "grad_norm": 0.0, - "learning_rate": 1.9714321114097187e-05, - "loss": 1.0884, + "learning_rate": 1.9715335398912955e-05, + "loss": 1.0699, "step": 3665 }, { - "epoch": 0.10402951191827468, + "epoch": 0.10388506333418346, "grad_norm": 0.0, - "learning_rate": 1.9714102959648732e-05, - "loss": 1.0174, + "learning_rate": 1.971511793183784e-05, + "loss": 1.0783, "step": 3666 }, { - "epoch": 0.10405788876276958, + "epoch": 0.10391340077644592, "grad_norm": 0.0, - "learning_rate": 1.971388472314481e-05, - "loss": 0.9662, + "learning_rate": 1.9714900382928674e-05, + "loss": 1.0643, "step": 3667 }, { - "epoch": 0.10408626560726447, + "epoch": 0.10394173821870838, "grad_norm": 0.0, - "learning_rate": 1.9713666404587274e-05, - "loss": 1.0676, + "learning_rate": 1.97146827521873e-05, + "loss": 1.0194, "step": 3668 }, { - "epoch": 0.10411464245175936, + "epoch": 0.10397007566097084, "grad_norm": 0.0, - "learning_rate": 1.9713448003977956e-05, - "loss": 1.0526, + "learning_rate": 1.9714465039615545e-05, + "loss": 1.2052, "step": 3669 }, { - "epoch": 0.10414301929625426, + "epoch": 0.10399841310323331, "grad_norm": 0.0, - "learning_rate": 1.9713229521318705e-05, - "loss": 1.0454, + "learning_rate": 1.9714247245215242e-05, + "loss": 1.158, "step": 3670 }, { - "epoch": 0.10417139614074915, + "epoch": 0.10402675054549576, "grad_norm": 0.0, - "learning_rate": 1.971301095661137e-05, - "loss": 0.9981, + "learning_rate": 1.9714029368988227e-05, + "loss": 1.0572, "step": 3671 }, { - "epoch": 0.10419977298524404, + "epoch": 0.10405508798775823, "grad_norm": 0.0, - "learning_rate": 1.9712792309857795e-05, - "loss": 1.0312, + "learning_rate": 1.9713811410936338e-05, + "loss": 1.0448, "step": 3672 }, { - "epoch": 0.10422814982973894, + "epoch": 0.10408342543002069, "grad_norm": 0.0, - "learning_rate": 1.9712573581059826e-05, - "loss": 1.1616, + "learning_rate": 1.9713593371061405e-05, + "loss": 1.1029, "step": 3673 }, { - "epoch": 0.10425652667423382, + "epoch": 0.10411176287228315, "grad_norm": 0.0, - "learning_rate": 1.971235477021931e-05, - "loss": 1.0757, + "learning_rate": 1.9713375249365268e-05, + "loss": 1.1209, "step": 3674 }, { - "epoch": 0.10428490351872871, + "epoch": 0.10414010031454561, "grad_norm": 0.0, - "learning_rate": 1.97121358773381e-05, - "loss": 1.0664, + "learning_rate": 1.971315704584977e-05, + "loss": 0.8932, "step": 3675 }, { - "epoch": 0.10431328036322361, + "epoch": 0.10416843775680808, "grad_norm": 0.0, - "learning_rate": 1.9711916902418037e-05, - "loss": 1.0455, + "learning_rate": 1.9712938760516737e-05, + "loss": 1.032, "step": 3676 }, { - "epoch": 0.1043416572077185, + "epoch": 0.10419677519907053, "grad_norm": 0.0, - "learning_rate": 1.9711697845460978e-05, - "loss": 0.973, + "learning_rate": 1.9712720393368013e-05, + "loss": 1.1116, "step": 3677 }, { - "epoch": 0.1043700340522134, + "epoch": 0.104225112641333, "grad_norm": 0.0, - "learning_rate": 1.971147870646877e-05, - "loss": 0.9734, + "learning_rate": 1.9712501944405443e-05, + "loss": 1.1773, "step": 3678 }, { - "epoch": 0.10439841089670829, + "epoch": 0.10425345008359546, "grad_norm": 0.0, - "learning_rate": 1.9711259485443266e-05, - "loss": 0.962, + "learning_rate": 1.9712283413630865e-05, + "loss": 1.0518, "step": 3679 }, { - "epoch": 0.10442678774120318, + "epoch": 0.10428178752585791, "grad_norm": 0.0, - "learning_rate": 1.9711040182386317e-05, - "loss": 1.0018, + "learning_rate": 1.9712064801046108e-05, + "loss": 1.0656, "step": 3680 }, { - "epoch": 0.10445516458569808, + "epoch": 0.10431012496812038, "grad_norm": 0.0, - "learning_rate": 1.9710820797299773e-05, - "loss": 1.1083, + "learning_rate": 1.971184610665303e-05, + "loss": 1.0214, "step": 3681 }, { - "epoch": 0.10448354143019296, + "epoch": 0.10433846241038285, "grad_norm": 0.0, - "learning_rate": 1.971060133018549e-05, - "loss": 0.9137, + "learning_rate": 1.9711627330453465e-05, + "loss": 1.0892, "step": 3682 }, { - "epoch": 0.10451191827468785, + "epoch": 0.1043667998526453, "grad_norm": 0.0, - "learning_rate": 1.971038178104532e-05, - "loss": 1.1232, + "learning_rate": 1.9711408472449256e-05, + "loss": 1.0183, "step": 3683 }, { - "epoch": 0.10454029511918275, + "epoch": 0.10439513729490776, "grad_norm": 0.0, - "learning_rate": 1.971016214988112e-05, - "loss": 0.9523, + "learning_rate": 1.9711189532642244e-05, + "loss": 0.9626, "step": 3684 }, { - "epoch": 0.10456867196367764, + "epoch": 0.10442347473717023, "grad_norm": 0.0, - "learning_rate": 1.9709942436694745e-05, - "loss": 1.0433, + "learning_rate": 1.9710970511034282e-05, + "loss": 1.2065, "step": 3685 }, { - "epoch": 0.10459704880817253, + "epoch": 0.10445181217943268, "grad_norm": 0.0, - "learning_rate": 1.970972264148805e-05, - "loss": 1.0799, + "learning_rate": 1.9710751407627203e-05, + "loss": 1.0839, "step": 3686 }, { - "epoch": 0.10462542565266743, + "epoch": 0.10448014962169515, "grad_norm": 0.0, - "learning_rate": 1.970950276426289e-05, - "loss": 0.9716, + "learning_rate": 1.9710532222422864e-05, + "loss": 1.0734, "step": 3687 }, { - "epoch": 0.10465380249716232, + "epoch": 0.10450848706395761, "grad_norm": 0.0, - "learning_rate": 1.970928280502112e-05, - "loss": 0.9983, + "learning_rate": 1.9710312955423104e-05, + "loss": 1.1335, "step": 3688 }, { - "epoch": 0.1046821793416572, + "epoch": 0.10453682450622007, "grad_norm": 0.0, - "learning_rate": 1.9709062763764604e-05, - "loss": 1.0112, + "learning_rate": 1.9710093606629774e-05, + "loss": 1.0705, "step": 3689 }, { - "epoch": 0.1047105561861521, + "epoch": 0.10456516194848253, "grad_norm": 0.0, - "learning_rate": 1.97088426404952e-05, - "loss": 1.0959, + "learning_rate": 1.9709874176044717e-05, + "loss": 1.0575, "step": 3690 }, { - "epoch": 0.10473893303064699, + "epoch": 0.10459349939074498, "grad_norm": 0.0, - "learning_rate": 1.9708622435214768e-05, - "loss": 1.1113, + "learning_rate": 1.9709654663669785e-05, + "loss": 1.1032, "step": 3691 }, { - "epoch": 0.10476730987514188, + "epoch": 0.10462183683300745, "grad_norm": 0.0, - "learning_rate": 1.9708402147925164e-05, - "loss": 1.0699, + "learning_rate": 1.970943506950683e-05, + "loss": 1.0441, "step": 3692 }, { - "epoch": 0.10479568671963678, + "epoch": 0.10465017427526992, "grad_norm": 0.0, - "learning_rate": 1.9708181778628247e-05, - "loss": 0.9507, + "learning_rate": 1.9709215393557693e-05, + "loss": 1.006, "step": 3693 }, { - "epoch": 0.10482406356413167, + "epoch": 0.10467851171753237, "grad_norm": 0.0, - "learning_rate": 1.9707961327325886e-05, - "loss": 1.1241, + "learning_rate": 1.970899563582423e-05, + "loss": 0.9965, "step": 3694 }, { - "epoch": 0.10485244040862655, + "epoch": 0.10470684915979483, "grad_norm": 0.0, - "learning_rate": 1.9707740794019937e-05, - "loss": 1.0333, + "learning_rate": 1.9708775796308293e-05, + "loss": 1.0967, "step": 3695 }, { - "epoch": 0.10488081725312146, + "epoch": 0.1047351866020573, "grad_norm": 0.0, - "learning_rate": 1.9707520178712268e-05, - "loss": 1.0883, + "learning_rate": 1.970855587501173e-05, + "loss": 0.968, "step": 3696 }, { - "epoch": 0.10490919409761634, + "epoch": 0.10476352404431975, "grad_norm": 0.0, - "learning_rate": 1.970729948140474e-05, - "loss": 1.1978, + "learning_rate": 1.97083358719364e-05, + "loss": 1.0287, "step": 3697 }, { - "epoch": 0.10493757094211124, + "epoch": 0.10479186148658222, "grad_norm": 0.0, - "learning_rate": 1.9707078702099213e-05, - "loss": 0.9967, + "learning_rate": 1.970811578708415e-05, + "loss": 1.0994, "step": 3698 }, { - "epoch": 0.10496594778660613, + "epoch": 0.10482019892884469, "grad_norm": 0.0, - "learning_rate": 1.9706857840797557e-05, - "loss": 0.965, + "learning_rate": 1.9707895620456832e-05, + "loss": 1.1656, "step": 3699 }, { - "epoch": 0.10499432463110102, + "epoch": 0.10484853637110714, "grad_norm": 0.0, - "learning_rate": 1.9706636897501637e-05, - "loss": 1.0683, + "learning_rate": 1.9707675372056308e-05, + "loss": 1.0966, "step": 3700 }, { - "epoch": 0.10502270147559592, + "epoch": 0.1048768738133696, "grad_norm": 0.0, - "learning_rate": 1.970641587221332e-05, - "loss": 0.934, + "learning_rate": 1.9707455041884428e-05, + "loss": 1.0413, "step": 3701 }, { - "epoch": 0.10505107832009081, + "epoch": 0.10490521125563207, "grad_norm": 0.0, - "learning_rate": 1.970619476493447e-05, - "loss": 1.1224, + "learning_rate": 1.9707234629943048e-05, + "loss": 1.0631, "step": 3702 }, { - "epoch": 0.1050794551645857, + "epoch": 0.10493354869789452, "grad_norm": 0.0, - "learning_rate": 1.9705973575666956e-05, - "loss": 1.1058, + "learning_rate": 1.970701413623403e-05, + "loss": 1.0288, "step": 3703 }, { - "epoch": 0.1051078320090806, + "epoch": 0.10496188614015699, "grad_norm": 0.0, - "learning_rate": 1.970575230441265e-05, - "loss": 0.9666, + "learning_rate": 1.9706793560759223e-05, + "loss": 0.9742, "step": 3704 }, { - "epoch": 0.10513620885357548, + "epoch": 0.10499022358241945, "grad_norm": 0.0, - "learning_rate": 1.9705530951173414e-05, - "loss": 1.1479, + "learning_rate": 1.9706572903520492e-05, + "loss": 1.113, "step": 3705 }, { - "epoch": 0.10516458569807037, + "epoch": 0.1050185610246819, "grad_norm": 0.0, - "learning_rate": 1.9705309515951123e-05, - "loss": 1.0548, + "learning_rate": 1.9706352164519694e-05, + "loss": 1.0184, "step": 3706 }, { - "epoch": 0.10519296254256527, + "epoch": 0.10504689846694437, "grad_norm": 0.0, - "learning_rate": 1.970508799874765e-05, - "loss": 0.9705, + "learning_rate": 1.9706131343758685e-05, + "loss": 0.962, "step": 3707 }, { - "epoch": 0.10522133938706016, + "epoch": 0.10507523590920684, "grad_norm": 0.0, - "learning_rate": 1.970486639956486e-05, - "loss": 1.031, + "learning_rate": 1.9705910441239328e-05, + "loss": 1.0219, "step": 3708 }, { - "epoch": 0.10524971623155505, + "epoch": 0.10510357335146929, "grad_norm": 0.0, - "learning_rate": 1.9704644718404626e-05, - "loss": 0.9939, + "learning_rate": 1.9705689456963484e-05, + "loss": 1.0188, "step": 3709 }, { - "epoch": 0.10527809307604995, + "epoch": 0.10513191079373176, "grad_norm": 0.0, - "learning_rate": 1.970442295526882e-05, - "loss": 1.0391, + "learning_rate": 1.9705468390933012e-05, + "loss": 1.013, "step": 3710 }, { - "epoch": 0.10530646992054483, + "epoch": 0.10516024823599422, "grad_norm": 0.0, - "learning_rate": 1.970420111015932e-05, - "loss": 0.9757, + "learning_rate": 1.9705247243149777e-05, + "loss": 1.0734, "step": 3711 }, { - "epoch": 0.10533484676503972, + "epoch": 0.10518858567825667, "grad_norm": 0.0, - "learning_rate": 1.9703979183078e-05, - "loss": 1.0735, + "learning_rate": 1.970502601361564e-05, + "loss": 1.0881, "step": 3712 }, { - "epoch": 0.10536322360953462, + "epoch": 0.10521692312051914, "grad_norm": 0.0, - "learning_rate": 1.9703757174026726e-05, - "loss": 1.0389, + "learning_rate": 1.9704804702332465e-05, + "loss": 1.1523, "step": 3713 }, { - "epoch": 0.10539160045402951, + "epoch": 0.1052452605627816, "grad_norm": 0.0, - "learning_rate": 1.970353508300738e-05, - "loss": 1.0256, + "learning_rate": 1.9704583309302115e-05, + "loss": 1.0443, "step": 3714 }, { - "epoch": 0.1054199772985244, + "epoch": 0.10527359800504406, "grad_norm": 0.0, - "learning_rate": 1.970331291002184e-05, - "loss": 1.0171, + "learning_rate": 1.970436183452646e-05, + "loss": 1.1552, "step": 3715 }, { - "epoch": 0.1054483541430193, + "epoch": 0.10530193544730652, "grad_norm": 0.0, - "learning_rate": 1.9703090655071978e-05, - "loss": 1.1111, + "learning_rate": 1.970414027800736e-05, + "loss": 1.0198, "step": 3716 }, { - "epoch": 0.10547673098751419, + "epoch": 0.10533027288956899, "grad_norm": 0.0, - "learning_rate": 1.9702868318159673e-05, - "loss": 1.1331, + "learning_rate": 1.970391863974668e-05, + "loss": 1.1173, "step": 3717 }, { - "epoch": 0.10550510783200909, + "epoch": 0.10535861033183144, "grad_norm": 0.0, - "learning_rate": 1.97026458992868e-05, - "loss": 0.9752, + "learning_rate": 1.9703696919746292e-05, + "loss": 1.0869, "step": 3718 }, { - "epoch": 0.10553348467650397, + "epoch": 0.10538694777409391, "grad_norm": 0.0, - "learning_rate": 1.9702423398455245e-05, - "loss": 1.1581, + "learning_rate": 1.970347511800806e-05, + "loss": 1.0507, "step": 3719 }, { - "epoch": 0.10556186152099886, + "epoch": 0.10541528521635637, "grad_norm": 0.0, - "learning_rate": 1.970220081566688e-05, - "loss": 0.992, + "learning_rate": 1.9703253234533858e-05, + "loss": 1.0537, "step": 3720 }, { - "epoch": 0.10559023836549376, + "epoch": 0.10544362265861883, "grad_norm": 0.0, - "learning_rate": 1.9701978150923594e-05, - "loss": 1.0598, + "learning_rate": 1.9703031269325546e-05, + "loss": 0.9951, "step": 3721 }, { - "epoch": 0.10561861520998865, + "epoch": 0.10547196010088129, "grad_norm": 0.0, - "learning_rate": 1.970175540422726e-05, - "loss": 1.1051, + "learning_rate": 1.9702809222385002e-05, + "loss": 1.0143, "step": 3722 }, { - "epoch": 0.10564699205448354, + "epoch": 0.10550029754314376, "grad_norm": 0.0, - "learning_rate": 1.9701532575579757e-05, - "loss": 1.0385, + "learning_rate": 1.9702587093714093e-05, + "loss": 1.2041, "step": 3723 }, { - "epoch": 0.10567536889897844, + "epoch": 0.10552863498540621, "grad_norm": 0.0, - "learning_rate": 1.970130966498298e-05, - "loss": 0.9106, + "learning_rate": 1.9702364883314687e-05, + "loss": 1.0989, "step": 3724 }, { - "epoch": 0.10570374574347333, + "epoch": 0.10555697242766868, "grad_norm": 0.0, - "learning_rate": 1.97010866724388e-05, - "loss": 1.1364, + "learning_rate": 1.970214259118866e-05, + "loss": 1.1199, "step": 3725 }, { - "epoch": 0.10573212258796821, + "epoch": 0.10558530986993114, "grad_norm": 0.0, - "learning_rate": 1.9700863597949104e-05, - "loss": 1.0276, + "learning_rate": 1.9701920217337883e-05, + "loss": 1.0898, "step": 3726 }, { - "epoch": 0.10576049943246311, + "epoch": 0.1056136473121936, "grad_norm": 0.0, - "learning_rate": 1.970064044151578e-05, - "loss": 1.0707, + "learning_rate": 1.9701697761764227e-05, + "loss": 1.1368, "step": 3727 }, { - "epoch": 0.105788876276958, + "epoch": 0.10564198475445606, "grad_norm": 0.0, - "learning_rate": 1.970041720314071e-05, - "loss": 1.0216, + "learning_rate": 1.970147522446957e-05, + "loss": 0.9589, "step": 3728 }, { - "epoch": 0.10581725312145289, + "epoch": 0.10567032219671853, "grad_norm": 0.0, - "learning_rate": 1.9700193882825777e-05, - "loss": 1.0145, + "learning_rate": 1.970125260545579e-05, + "loss": 1.1004, "step": 3729 }, { - "epoch": 0.10584562996594779, + "epoch": 0.10569865963898098, "grad_norm": 0.0, - "learning_rate": 1.969997048057287e-05, - "loss": 1.0427, + "learning_rate": 1.970102990472475e-05, + "loss": 1.129, "step": 3730 }, { - "epoch": 0.10587400681044268, + "epoch": 0.10572699708124345, "grad_norm": 0.0, - "learning_rate": 1.969974699638388e-05, - "loss": 1.0398, + "learning_rate": 1.9700807122278336e-05, + "loss": 1.0615, "step": 3731 }, { - "epoch": 0.10590238365493757, + "epoch": 0.10575533452350591, "grad_norm": 0.0, - "learning_rate": 1.969952343026069e-05, - "loss": 1.0881, + "learning_rate": 1.970058425811842e-05, + "loss": 1.031, "step": 3732 }, { - "epoch": 0.10593076049943247, + "epoch": 0.10578367196576836, "grad_norm": 0.0, - "learning_rate": 1.9699299782205186e-05, - "loss": 1.1079, + "learning_rate": 1.9700361312246877e-05, + "loss": 1.0915, "step": 3733 }, { - "epoch": 0.10595913734392735, + "epoch": 0.10581200940803083, "grad_norm": 0.0, - "learning_rate": 1.9699076052219263e-05, - "loss": 1.0644, + "learning_rate": 1.9700138284665593e-05, + "loss": 0.9732, "step": 3734 }, { - "epoch": 0.10598751418842224, + "epoch": 0.1058403468502933, "grad_norm": 0.0, - "learning_rate": 1.969885224030481e-05, - "loss": 1.011, + "learning_rate": 1.9699915175376442e-05, + "loss": 1.1599, "step": 3735 }, { - "epoch": 0.10601589103291714, + "epoch": 0.10586868429255575, "grad_norm": 0.0, - "learning_rate": 1.9698628346463714e-05, - "loss": 1.0438, + "learning_rate": 1.9699691984381304e-05, + "loss": 1.0504, "step": 3736 }, { - "epoch": 0.10604426787741203, + "epoch": 0.10589702173481821, "grad_norm": 0.0, - "learning_rate": 1.969840437069787e-05, - "loss": 1.02, + "learning_rate": 1.9699468711682055e-05, + "loss": 1.1015, "step": 3737 }, { - "epoch": 0.10607264472190693, + "epoch": 0.10592535917708068, "grad_norm": 0.0, - "learning_rate": 1.9698180313009166e-05, - "loss": 1.0275, + "learning_rate": 1.9699245357280583e-05, + "loss": 1.012, "step": 3738 }, { - "epoch": 0.10610102156640182, + "epoch": 0.10595369661934313, "grad_norm": 0.0, - "learning_rate": 1.96979561733995e-05, - "loss": 1.1321, + "learning_rate": 1.9699021921178762e-05, + "loss": 0.9917, "step": 3739 }, { - "epoch": 0.1061293984108967, + "epoch": 0.1059820340616056, "grad_norm": 0.0, - "learning_rate": 1.969773195187076e-05, - "loss": 1.0251, + "learning_rate": 1.9698798403378482e-05, + "loss": 1.1673, "step": 3740 }, { - "epoch": 0.1061577752553916, + "epoch": 0.10601037150386806, "grad_norm": 0.0, - "learning_rate": 1.9697507648424844e-05, - "loss": 1.1226, + "learning_rate": 1.969857480388162e-05, + "loss": 1.1025, "step": 3741 }, { - "epoch": 0.1061861520998865, + "epoch": 0.10603870894613052, "grad_norm": 0.0, - "learning_rate": 1.9697283263063645e-05, - "loss": 1.0585, + "learning_rate": 1.969835112269006e-05, + "loss": 0.9856, "step": 3742 }, { - "epoch": 0.10621452894438138, + "epoch": 0.10606704638839298, "grad_norm": 0.0, - "learning_rate": 1.9697058795789056e-05, - "loss": 1.0884, + "learning_rate": 1.969812735980569e-05, + "loss": 0.9996, "step": 3743 }, { - "epoch": 0.10624290578887628, + "epoch": 0.10609538383065545, "grad_norm": 0.0, - "learning_rate": 1.9696834246602977e-05, - "loss": 1.0474, + "learning_rate": 1.9697903515230387e-05, + "loss": 0.9551, "step": 3744 }, { - "epoch": 0.10627128263337117, + "epoch": 0.1061237212729179, "grad_norm": 0.0, - "learning_rate": 1.9696609615507304e-05, - "loss": 1.0028, + "learning_rate": 1.9697679588966042e-05, + "loss": 1.0164, "step": 3745 }, { - "epoch": 0.10629965947786606, + "epoch": 0.10615205871518037, "grad_norm": 0.0, - "learning_rate": 1.9696384902503932e-05, - "loss": 0.9473, + "learning_rate": 1.969745558101454e-05, + "loss": 1.0359, "step": 3746 }, { - "epoch": 0.10632803632236096, + "epoch": 0.10618039615744283, "grad_norm": 0.0, - "learning_rate": 1.969616010759476e-05, - "loss": 1.0796, + "learning_rate": 1.9697231491377775e-05, + "loss": 1.0122, "step": 3747 }, { - "epoch": 0.10635641316685585, + "epoch": 0.10620873359970529, "grad_norm": 0.0, - "learning_rate": 1.969593523078169e-05, - "loss": 1.0498, + "learning_rate": 1.9697007320057624e-05, + "loss": 0.9777, "step": 3748 }, { - "epoch": 0.10638479001135073, + "epoch": 0.10623707104196775, "grad_norm": 0.0, - "learning_rate": 1.9695710272066623e-05, - "loss": 1.0355, + "learning_rate": 1.969678306705598e-05, + "loss": 1.0326, "step": 3749 }, { - "epoch": 0.10641316685584563, + "epoch": 0.10626540848423022, "grad_norm": 0.0, - "learning_rate": 1.9695485231451448e-05, - "loss": 1.045, + "learning_rate": 1.969655873237473e-05, + "loss": 1.0152, "step": 3750 }, { - "epoch": 0.10644154370034052, + "epoch": 0.10629374592649267, "grad_norm": 0.0, - "learning_rate": 1.9695260108938078e-05, - "loss": 0.9935, + "learning_rate": 1.9696334316015768e-05, + "loss": 1.127, "step": 3751 }, { - "epoch": 0.10646992054483541, + "epoch": 0.10632208336875514, "grad_norm": 0.0, - "learning_rate": 1.969503490452841e-05, - "loss": 0.965, + "learning_rate": 1.9696109817980978e-05, + "loss": 1.0287, "step": 3752 }, { - "epoch": 0.10649829738933031, + "epoch": 0.1063504208110176, "grad_norm": 0.0, - "learning_rate": 1.969480961822434e-05, - "loss": 0.9568, + "learning_rate": 1.9695885238272256e-05, + "loss": 0.986, "step": 3753 }, { - "epoch": 0.1065266742338252, + "epoch": 0.10637875825328005, "grad_norm": 0.0, - "learning_rate": 1.9694584250027784e-05, - "loss": 1.1124, + "learning_rate": 1.969566057689149e-05, + "loss": 1.0052, "step": 3754 }, { - "epoch": 0.10655505107832008, + "epoch": 0.10640709569554252, "grad_norm": 0.0, - "learning_rate": 1.9694358799940637e-05, - "loss": 1.0518, + "learning_rate": 1.969543583384058e-05, + "loss": 1.141, "step": 3755 }, { - "epoch": 0.10658342792281499, + "epoch": 0.10643543313780499, "grad_norm": 0.0, - "learning_rate": 1.9694133267964804e-05, - "loss": 1.0455, + "learning_rate": 1.9695211009121407e-05, + "loss": 1.0683, "step": 3756 }, { - "epoch": 0.10661180476730987, + "epoch": 0.10646377058006744, "grad_norm": 0.0, - "learning_rate": 1.9693907654102197e-05, - "loss": 1.0206, + "learning_rate": 1.9694986102735876e-05, + "loss": 0.9666, "step": 3757 }, { - "epoch": 0.10664018161180477, + "epoch": 0.1064921080223299, "grad_norm": 0.0, - "learning_rate": 1.969368195835471e-05, - "loss": 1.0918, + "learning_rate": 1.9694761114685876e-05, + "loss": 1.0638, "step": 3758 }, { - "epoch": 0.10666855845629966, + "epoch": 0.10652044546459237, "grad_norm": 0.0, - "learning_rate": 1.969345618072426e-05, - "loss": 1.1094, + "learning_rate": 1.9694536044973303e-05, + "loss": 1.0857, "step": 3759 }, { - "epoch": 0.10669693530079455, + "epoch": 0.10654878290685482, "grad_norm": 0.0, - "learning_rate": 1.9693230321212748e-05, - "loss": 1.0937, + "learning_rate": 1.9694310893600053e-05, + "loss": 1.011, "step": 3760 }, { - "epoch": 0.10672531214528945, + "epoch": 0.10657712034911729, "grad_norm": 0.0, - "learning_rate": 1.9693004379822083e-05, - "loss": 1.0017, + "learning_rate": 1.9694085660568023e-05, + "loss": 1.1065, "step": 3761 }, { - "epoch": 0.10675368898978434, + "epoch": 0.10660545779137975, "grad_norm": 0.0, - "learning_rate": 1.9692778356554174e-05, - "loss": 0.9722, + "learning_rate": 1.969386034587911e-05, + "loss": 1.0303, "step": 3762 }, { - "epoch": 0.10678206583427922, + "epoch": 0.1066337952336422, "grad_norm": 0.0, - "learning_rate": 1.969255225141093e-05, - "loss": 1.0777, + "learning_rate": 1.969363494953521e-05, + "loss": 1.0221, "step": 3763 }, { - "epoch": 0.10681044267877413, + "epoch": 0.10666213267590467, "grad_norm": 0.0, - "learning_rate": 1.9692326064394266e-05, - "loss": 1.0916, + "learning_rate": 1.9693409471538222e-05, + "loss": 0.9822, "step": 3764 }, { - "epoch": 0.10683881952326901, + "epoch": 0.10669047011816714, "grad_norm": 0.0, - "learning_rate": 1.9692099795506084e-05, - "loss": 1.1364, + "learning_rate": 1.9693183911890047e-05, + "loss": 1.1811, "step": 3765 }, { - "epoch": 0.1068671963677639, + "epoch": 0.10671880756042959, "grad_norm": 0.0, - "learning_rate": 1.96918734447483e-05, - "loss": 1.0923, + "learning_rate": 1.9692958270592587e-05, + "loss": 1.1432, "step": 3766 }, { - "epoch": 0.1068955732122588, + "epoch": 0.10674714500269206, "grad_norm": 0.0, - "learning_rate": 1.9691647012122825e-05, - "loss": 1.0422, + "learning_rate": 1.969273254764774e-05, + "loss": 1.0012, "step": 3767 }, { - "epoch": 0.10692395005675369, + "epoch": 0.10677548244495452, "grad_norm": 0.0, - "learning_rate": 1.9691420497631576e-05, - "loss": 1.0167, + "learning_rate": 1.9692506743057405e-05, + "loss": 1.1021, "step": 3768 }, { - "epoch": 0.10695232690124858, + "epoch": 0.10680381988721697, "grad_norm": 0.0, - "learning_rate": 1.9691193901276456e-05, - "loss": 0.9993, + "learning_rate": 1.9692280856823486e-05, + "loss": 1.0934, "step": 3769 }, { - "epoch": 0.10698070374574348, + "epoch": 0.10683215732947944, "grad_norm": 0.0, - "learning_rate": 1.9690967223059386e-05, - "loss": 0.9253, + "learning_rate": 1.969205488894789e-05, + "loss": 1.069, "step": 3770 }, { - "epoch": 0.10700908059023836, + "epoch": 0.1068604947717419, "grad_norm": 0.0, - "learning_rate": 1.9690740462982284e-05, - "loss": 1.2001, + "learning_rate": 1.969182883943251e-05, + "loss": 1.1023, "step": 3771 }, { - "epoch": 0.10703745743473325, + "epoch": 0.10688883221400436, "grad_norm": 0.0, - "learning_rate": 1.9690513621047064e-05, - "loss": 0.9581, + "learning_rate": 1.9691602708279266e-05, + "loss": 1.0707, "step": 3772 }, { - "epoch": 0.10706583427922815, + "epoch": 0.10691716965626683, "grad_norm": 0.0, - "learning_rate": 1.9690286697255633e-05, - "loss": 1.0869, + "learning_rate": 1.9691376495490046e-05, + "loss": 1.1901, "step": 3773 }, { - "epoch": 0.10709421112372304, + "epoch": 0.10694550709852929, "grad_norm": 0.0, - "learning_rate": 1.969005969160992e-05, - "loss": 1.0121, + "learning_rate": 1.9691150201066765e-05, + "loss": 1.0557, "step": 3774 }, { - "epoch": 0.10712258796821793, + "epoch": 0.10697384454079174, "grad_norm": 0.0, - "learning_rate": 1.9689832604111836e-05, - "loss": 1.059, + "learning_rate": 1.969092382501133e-05, + "loss": 0.9284, "step": 3775 }, { - "epoch": 0.10715096481271283, + "epoch": 0.10700218198305421, "grad_norm": 0.0, - "learning_rate": 1.9689605434763298e-05, - "loss": 1.0981, + "learning_rate": 1.9690697367325642e-05, + "loss": 1.0519, "step": 3776 }, { - "epoch": 0.10717934165720772, + "epoch": 0.10703051942531668, "grad_norm": 0.0, - "learning_rate": 1.968937818356623e-05, - "loss": 1.0545, + "learning_rate": 1.969047082801161e-05, + "loss": 1.1081, "step": 3777 }, { - "epoch": 0.10720771850170262, + "epoch": 0.10705885686757913, "grad_norm": 0.0, - "learning_rate": 1.9689150850522548e-05, - "loss": 1.0156, + "learning_rate": 1.9690244207071146e-05, + "loss": 1.1674, "step": 3778 }, { - "epoch": 0.1072360953461975, + "epoch": 0.1070871943098416, "grad_norm": 0.0, - "learning_rate": 1.9688923435634174e-05, - "loss": 1.0841, + "learning_rate": 1.969001750450616e-05, + "loss": 1.0648, "step": 3779 }, { - "epoch": 0.10726447219069239, + "epoch": 0.10711553175210406, "grad_norm": 0.0, - "learning_rate": 1.9688695938903025e-05, - "loss": 0.9929, + "learning_rate": 1.9689790720318555e-05, + "loss": 0.9986, "step": 3780 }, { - "epoch": 0.1072928490351873, + "epoch": 0.10714386919436651, "grad_norm": 0.0, - "learning_rate": 1.9688468360331026e-05, - "loss": 1.0351, + "learning_rate": 1.9689563854510243e-05, + "loss": 1.0518, "step": 3781 }, { - "epoch": 0.10732122587968218, + "epoch": 0.10717220663662898, "grad_norm": 0.0, - "learning_rate": 1.9688240699920104e-05, - "loss": 0.9815, + "learning_rate": 1.9689336907083138e-05, + "loss": 1.0427, "step": 3782 }, { - "epoch": 0.10734960272417707, + "epoch": 0.10720054407889144, "grad_norm": 0.0, - "learning_rate": 1.9688012957672172e-05, - "loss": 1.2264, + "learning_rate": 1.968910987803915e-05, + "loss": 1.1095, "step": 3783 }, { - "epoch": 0.10737797956867197, + "epoch": 0.1072288815211539, "grad_norm": 0.0, - "learning_rate": 1.9687785133589165e-05, - "loss": 0.9223, + "learning_rate": 1.968888276738019e-05, + "loss": 1.1271, "step": 3784 }, { - "epoch": 0.10740635641316686, + "epoch": 0.10725721896341636, "grad_norm": 0.0, - "learning_rate": 1.9687557227672997e-05, - "loss": 1.038, + "learning_rate": 1.9688655575108175e-05, + "loss": 1.1136, "step": 3785 }, { - "epoch": 0.10743473325766174, + "epoch": 0.10728555640567883, "grad_norm": 0.0, - "learning_rate": 1.9687329239925595e-05, - "loss": 1.0379, + "learning_rate": 1.9688428301225017e-05, + "loss": 1.1278, "step": 3786 }, { - "epoch": 0.10746311010215664, + "epoch": 0.10731389384794128, "grad_norm": 0.0, - "learning_rate": 1.9687101170348893e-05, - "loss": 1.0182, + "learning_rate": 1.9688200945732627e-05, + "loss": 1.0349, "step": 3787 }, { - "epoch": 0.10749148694665153, + "epoch": 0.10734223129020375, "grad_norm": 0.0, - "learning_rate": 1.9686873018944812e-05, - "loss": 1.0819, + "learning_rate": 1.9687973508632925e-05, + "loss": 1.0635, "step": 3788 }, { - "epoch": 0.10751986379114642, + "epoch": 0.10737056873246621, "grad_norm": 0.0, - "learning_rate": 1.9686644785715276e-05, - "loss": 0.982, + "learning_rate": 1.9687745989927823e-05, + "loss": 1.0506, "step": 3789 }, { - "epoch": 0.10754824063564132, + "epoch": 0.10739890617472866, "grad_norm": 0.0, - "learning_rate": 1.9686416470662218e-05, - "loss": 0.9872, + "learning_rate": 1.9687518389619242e-05, + "loss": 1.0949, "step": 3790 }, { - "epoch": 0.10757661748013621, + "epoch": 0.10742724361699113, "grad_norm": 0.0, - "learning_rate": 1.9686188073787563e-05, - "loss": 1.0275, + "learning_rate": 1.9687290707709092e-05, + "loss": 1.1547, "step": 3791 }, { - "epoch": 0.1076049943246311, + "epoch": 0.1074555810592536, "grad_norm": 0.0, - "learning_rate": 1.968595959509324e-05, - "loss": 0.9137, + "learning_rate": 1.9687062944199294e-05, + "loss": 1.106, "step": 3792 }, { - "epoch": 0.107633371169126, + "epoch": 0.10748391850151605, "grad_norm": 0.0, - "learning_rate": 1.968573103458119e-05, - "loss": 1.0807, + "learning_rate": 1.968683509909177e-05, + "loss": 1.0335, "step": 3793 }, { - "epoch": 0.10766174801362088, + "epoch": 0.10751225594377851, "grad_norm": 0.0, - "learning_rate": 1.9685502392253326e-05, - "loss": 1.0134, + "learning_rate": 1.968660717238844e-05, + "loss": 1.0708, "step": 3794 }, { - "epoch": 0.10769012485811577, + "epoch": 0.10754059338604098, "grad_norm": 0.0, - "learning_rate": 1.9685273668111588e-05, - "loss": 0.9907, + "learning_rate": 1.9686379164091218e-05, + "loss": 1.1041, "step": 3795 }, { - "epoch": 0.10771850170261067, + "epoch": 0.10756893082830343, "grad_norm": 0.0, - "learning_rate": 1.9685044862157912e-05, - "loss": 1.0128, + "learning_rate": 1.9686151074202028e-05, + "loss": 1.0373, "step": 3796 }, { - "epoch": 0.10774687854710556, + "epoch": 0.1075972682705659, "grad_norm": 0.0, - "learning_rate": 1.9684815974394223e-05, - "loss": 1.005, + "learning_rate": 1.968592290272279e-05, + "loss": 1.0667, "step": 3797 }, { - "epoch": 0.10777525539160046, + "epoch": 0.10762560571282836, "grad_norm": 0.0, - "learning_rate": 1.9684587004822462e-05, - "loss": 1.0106, + "learning_rate": 1.9685694649655426e-05, + "loss": 1.064, "step": 3798 }, { - "epoch": 0.10780363223609535, + "epoch": 0.10765394315509082, "grad_norm": 0.0, - "learning_rate": 1.9684357953444558e-05, - "loss": 1.0547, + "learning_rate": 1.9685466315001863e-05, + "loss": 1.0619, "step": 3799 }, { - "epoch": 0.10783200908059024, + "epoch": 0.10768228059735328, "grad_norm": 0.0, - "learning_rate": 1.9684128820262444e-05, - "loss": 1.0667, + "learning_rate": 1.9685237898764014e-05, + "loss": 1.0029, "step": 3800 }, { - "epoch": 0.10786038592508514, + "epoch": 0.10771061803961575, "grad_norm": 0.0, - "learning_rate": 1.9683899605278062e-05, - "loss": 0.9875, + "learning_rate": 1.9685009400943815e-05, + "loss": 1.1421, "step": 3801 }, { - "epoch": 0.10788876276958002, + "epoch": 0.1077389554818782, "grad_norm": 0.0, - "learning_rate": 1.968367030849334e-05, - "loss": 1.012, + "learning_rate": 1.9684780821543185e-05, + "loss": 1.2068, "step": 3802 }, { - "epoch": 0.10791713961407491, + "epoch": 0.10776729292414067, "grad_norm": 0.0, - "learning_rate": 1.9683440929910223e-05, - "loss": 1.0896, + "learning_rate": 1.9684552160564047e-05, + "loss": 0.9427, "step": 3803 }, { - "epoch": 0.10794551645856981, + "epoch": 0.10779563036640313, "grad_norm": 0.0, - "learning_rate": 1.9683211469530646e-05, - "loss": 0.9877, + "learning_rate": 1.968432341800833e-05, + "loss": 0.9499, "step": 3804 }, { - "epoch": 0.1079738933030647, + "epoch": 0.10782396780866559, "grad_norm": 0.0, - "learning_rate": 1.9682981927356545e-05, - "loss": 1.0458, + "learning_rate": 1.9684094593877964e-05, + "loss": 1.1516, "step": 3805 }, { - "epoch": 0.10800227014755959, + "epoch": 0.10785230525092805, "grad_norm": 0.0, - "learning_rate": 1.9682752303389864e-05, - "loss": 0.9866, + "learning_rate": 1.9683865688174873e-05, + "loss": 1.1173, "step": 3806 }, { - "epoch": 0.10803064699205449, + "epoch": 0.10788064269319052, "grad_norm": 0.0, - "learning_rate": 1.9682522597632533e-05, - "loss": 1.011, + "learning_rate": 1.9683636700900984e-05, + "loss": 1.1164, "step": 3807 }, { - "epoch": 0.10805902383654938, + "epoch": 0.10790898013545297, "grad_norm": 0.0, - "learning_rate": 1.9682292810086503e-05, - "loss": 1.0513, + "learning_rate": 1.9683407632058226e-05, + "loss": 0.9258, "step": 3808 }, { - "epoch": 0.10808740068104426, + "epoch": 0.10793731757771544, "grad_norm": 0.0, - "learning_rate": 1.968206294075371e-05, - "loss": 0.9928, + "learning_rate": 1.968317848164853e-05, + "loss": 1.0388, "step": 3809 }, { - "epoch": 0.10811577752553916, + "epoch": 0.1079656550199779, "grad_norm": 0.0, - "learning_rate": 1.9681832989636093e-05, - "loss": 0.9918, + "learning_rate": 1.9682949249673825e-05, + "loss": 1.0627, "step": 3810 }, { - "epoch": 0.10814415437003405, + "epoch": 0.10799399246224035, "grad_norm": 0.0, - "learning_rate": 1.96816029567356e-05, - "loss": 1.0676, + "learning_rate": 1.9682719936136045e-05, + "loss": 1.0964, "step": 3811 }, { - "epoch": 0.10817253121452894, + "epoch": 0.10802232990450282, "grad_norm": 0.0, - "learning_rate": 1.968137284205417e-05, - "loss": 0.9949, + "learning_rate": 1.9682490541037117e-05, + "loss": 1.014, "step": 3812 }, { - "epoch": 0.10820090805902384, + "epoch": 0.10805066734676529, "grad_norm": 0.0, - "learning_rate": 1.968114264559375e-05, - "loss": 0.9839, + "learning_rate": 1.968226106437898e-05, + "loss": 0.9158, "step": 3813 }, { - "epoch": 0.10822928490351873, + "epoch": 0.10807900478902774, "grad_norm": 0.0, - "learning_rate": 1.968091236735628e-05, - "loss": 1.1088, + "learning_rate": 1.9682031506163556e-05, + "loss": 1.0662, "step": 3814 }, { - "epoch": 0.10825766174801361, + "epoch": 0.1081073422312902, "grad_norm": 0.0, - "learning_rate": 1.968068200734371e-05, - "loss": 1.0597, + "learning_rate": 1.968180186639279e-05, + "loss": 1.1237, "step": 3815 }, { - "epoch": 0.10828603859250852, + "epoch": 0.10813567967355267, "grad_norm": 0.0, - "learning_rate": 1.9680451565557985e-05, - "loss": 1.0405, + "learning_rate": 1.968157214506861e-05, + "loss": 0.9884, "step": 3816 }, { - "epoch": 0.1083144154370034, + "epoch": 0.10816401711581512, "grad_norm": 0.0, - "learning_rate": 1.968022104200105e-05, - "loss": 1.0875, + "learning_rate": 1.968134234219295e-05, + "loss": 1.0782, "step": 3817 }, { - "epoch": 0.1083427922814983, + "epoch": 0.10819235455807759, "grad_norm": 0.0, - "learning_rate": 1.9679990436674853e-05, - "loss": 1.0832, + "learning_rate": 1.9681112457767755e-05, + "loss": 1.0177, "step": 3818 }, { - "epoch": 0.10837116912599319, + "epoch": 0.10822069200034005, "grad_norm": 0.0, - "learning_rate": 1.967975974958134e-05, - "loss": 1.0407, + "learning_rate": 1.9680882491794953e-05, + "loss": 1.0002, "step": 3819 }, { - "epoch": 0.10839954597048808, + "epoch": 0.1082490294426025, "grad_norm": 0.0, - "learning_rate": 1.967952898072246e-05, - "loss": 1.0824, + "learning_rate": 1.968065244427648e-05, + "loss": 0.9186, "step": 3820 }, { - "epoch": 0.10842792281498298, + "epoch": 0.10827736688486497, "grad_norm": 0.0, - "learning_rate": 1.9679298130100165e-05, - "loss": 1.0775, + "learning_rate": 1.9680422315214278e-05, + "loss": 1.0743, "step": 3821 }, { - "epoch": 0.10845629965947787, + "epoch": 0.10830570432712744, "grad_norm": 0.0, - "learning_rate": 1.9679067197716408e-05, - "loss": 1.0666, + "learning_rate": 1.9680192104610283e-05, + "loss": 0.9785, "step": 3822 }, { - "epoch": 0.10848467650397275, + "epoch": 0.10833404176938989, "grad_norm": 0.0, - "learning_rate": 1.967883618357313e-05, - "loss": 0.9252, + "learning_rate": 1.967996181246644e-05, + "loss": 1.1013, "step": 3823 }, { - "epoch": 0.10851305334846766, + "epoch": 0.10836237921165236, "grad_norm": 0.0, - "learning_rate": 1.967860508767229e-05, - "loss": 1.0374, + "learning_rate": 1.967973143878468e-05, + "loss": 1.0764, "step": 3824 }, { - "epoch": 0.10854143019296254, + "epoch": 0.10839071665391482, "grad_norm": 0.0, - "learning_rate": 1.9678373910015834e-05, - "loss": 1.0803, + "learning_rate": 1.9679500983566947e-05, + "loss": 1.0648, "step": 3825 }, { - "epoch": 0.10856980703745743, + "epoch": 0.10841905409617728, "grad_norm": 0.0, - "learning_rate": 1.9678142650605724e-05, - "loss": 0.9329, + "learning_rate": 1.9679270446815183e-05, + "loss": 1.0867, "step": 3826 }, { - "epoch": 0.10859818388195233, + "epoch": 0.10844739153843974, "grad_norm": 0.0, - "learning_rate": 1.9677911309443907e-05, - "loss": 1.0905, + "learning_rate": 1.967903982853133e-05, + "loss": 1.0099, "step": 3827 }, { - "epoch": 0.10862656072644722, + "epoch": 0.10847572898070221, "grad_norm": 0.0, - "learning_rate": 1.9677679886532335e-05, - "loss": 1.084, + "learning_rate": 1.9678809128717332e-05, + "loss": 1.083, "step": 3828 }, { - "epoch": 0.1086549375709421, + "epoch": 0.10850406642296466, "grad_norm": 0.0, - "learning_rate": 1.9677448381872966e-05, - "loss": 1.0524, + "learning_rate": 1.967857834737513e-05, + "loss": 1.0731, "step": 3829 }, { - "epoch": 0.10868331441543701, + "epoch": 0.10853240386522713, "grad_norm": 0.0, - "learning_rate": 1.967721679546776e-05, - "loss": 1.1514, + "learning_rate": 1.9678347484506667e-05, + "loss": 1.1437, "step": 3830 }, { - "epoch": 0.1087116912599319, + "epoch": 0.10856074130748959, "grad_norm": 0.0, - "learning_rate": 1.9676985127318665e-05, - "loss": 1.1299, + "learning_rate": 1.967811654011389e-05, + "loss": 0.9666, "step": 3831 }, { - "epoch": 0.10874006810442678, + "epoch": 0.10858907874975204, "grad_norm": 0.0, - "learning_rate": 1.967675337742764e-05, - "loss": 1.0794, + "learning_rate": 1.9677885514198746e-05, + "loss": 1.0704, "step": 3832 }, { - "epoch": 0.10876844494892168, + "epoch": 0.10861741619201451, "grad_norm": 0.0, - "learning_rate": 1.9676521545796648e-05, - "loss": 1.0464, + "learning_rate": 1.9677654406763172e-05, + "loss": 1.1684, "step": 3833 }, { - "epoch": 0.10879682179341657, + "epoch": 0.10864575363427698, "grad_norm": 0.0, - "learning_rate": 1.9676289632427643e-05, - "loss": 1.0044, + "learning_rate": 1.9677423217809127e-05, + "loss": 1.0497, "step": 3834 }, { - "epoch": 0.10882519863791146, + "epoch": 0.10867409107653943, "grad_norm": 0.0, - "learning_rate": 1.9676057637322584e-05, - "loss": 1.1542, + "learning_rate": 1.9677191947338552e-05, + "loss": 1.0291, "step": 3835 }, { - "epoch": 0.10885357548240636, + "epoch": 0.1087024285188019, "grad_norm": 0.0, - "learning_rate": 1.967582556048343e-05, - "loss": 1.0635, + "learning_rate": 1.9676960595353392e-05, + "loss": 1.1045, "step": 3836 }, { - "epoch": 0.10888195232690125, + "epoch": 0.10873076596106436, "grad_norm": 0.0, - "learning_rate": 1.9675593401912142e-05, - "loss": 1.0304, + "learning_rate": 1.9676729161855603e-05, + "loss": 1.1013, "step": 3837 }, { - "epoch": 0.10891032917139615, + "epoch": 0.10875910340332681, "grad_norm": 0.0, - "learning_rate": 1.9675361161610686e-05, - "loss": 1.0347, + "learning_rate": 1.967649764684713e-05, + "loss": 1.0714, "step": 3838 }, { - "epoch": 0.10893870601589103, + "epoch": 0.10878744084558928, "grad_norm": 0.0, - "learning_rate": 1.9675128839581013e-05, - "loss": 0.9639, + "learning_rate": 1.9676266050329926e-05, + "loss": 1.0748, "step": 3839 }, { - "epoch": 0.10896708286038592, + "epoch": 0.10881577828785174, "grad_norm": 0.0, - "learning_rate": 1.96748964358251e-05, - "loss": 0.8531, + "learning_rate": 1.9676034372305938e-05, + "loss": 1.0925, "step": 3840 }, { - "epoch": 0.10899545970488082, + "epoch": 0.1088441157301142, "grad_norm": 0.0, - "learning_rate": 1.9674663950344894e-05, - "loss": 1.1161, + "learning_rate": 1.9675802612777117e-05, + "loss": 1.1287, "step": 3841 }, { - "epoch": 0.10902383654937571, + "epoch": 0.10887245317237666, "grad_norm": 0.0, - "learning_rate": 1.967443138314237e-05, - "loss": 1.1257, + "learning_rate": 1.967557077174542e-05, + "loss": 0.9717, "step": 3842 }, { - "epoch": 0.1090522133938706, + "epoch": 0.10890079061463913, "grad_norm": 0.0, - "learning_rate": 1.967419873421949e-05, - "loss": 1.0345, + "learning_rate": 1.96753388492128e-05, + "loss": 1.1369, "step": 3843 }, { - "epoch": 0.1090805902383655, + "epoch": 0.10892912805690158, "grad_norm": 0.0, - "learning_rate": 1.9673966003578215e-05, - "loss": 0.9323, + "learning_rate": 1.9675106845181205e-05, + "loss": 1.0757, "step": 3844 }, { - "epoch": 0.10910896708286039, + "epoch": 0.10895746549916405, "grad_norm": 0.0, - "learning_rate": 1.9673733191220515e-05, - "loss": 1.0724, + "learning_rate": 1.967487475965259e-05, + "loss": 1.1063, "step": 3845 }, { - "epoch": 0.10913734392735527, + "epoch": 0.10898580294142651, "grad_norm": 0.0, - "learning_rate": 1.9673500297148357e-05, - "loss": 1.081, + "learning_rate": 1.9674642592628913e-05, + "loss": 1.0832, "step": 3846 }, { - "epoch": 0.10916572077185017, + "epoch": 0.10901414038368896, "grad_norm": 0.0, - "learning_rate": 1.9673267321363707e-05, - "loss": 1.1114, + "learning_rate": 1.9674410344112132e-05, + "loss": 1.0984, "step": 3847 }, { - "epoch": 0.10919409761634506, + "epoch": 0.10904247782595143, "grad_norm": 0.0, - "learning_rate": 1.9673034263868533e-05, - "loss": 1.0005, + "learning_rate": 1.9674178014104198e-05, + "loss": 1.0336, "step": 3848 }, { - "epoch": 0.10922247446083995, + "epoch": 0.1090708152682139, "grad_norm": 0.0, - "learning_rate": 1.9672801124664807e-05, - "loss": 1.1236, + "learning_rate": 1.9673945602607073e-05, + "loss": 1.0465, "step": 3849 }, { - "epoch": 0.10925085130533485, + "epoch": 0.10909915271047635, "grad_norm": 0.0, - "learning_rate": 1.967256790375449e-05, - "loss": 1.1028, + "learning_rate": 1.967371310962271e-05, + "loss": 1.1512, "step": 3850 }, { - "epoch": 0.10927922814982974, + "epoch": 0.10912749015273882, "grad_norm": 0.0, - "learning_rate": 1.967233460113956e-05, - "loss": 1.0385, + "learning_rate": 1.9673480535153067e-05, + "loss": 1.0382, "step": 3851 }, { - "epoch": 0.10930760499432463, + "epoch": 0.10915582759500128, "grad_norm": 0.0, - "learning_rate": 1.9672101216821983e-05, - "loss": 1.0285, + "learning_rate": 1.967324787920011e-05, + "loss": 1.0207, "step": 3852 }, { - "epoch": 0.10933598183881953, + "epoch": 0.10918416503726373, "grad_norm": 0.0, - "learning_rate": 1.9671867750803737e-05, - "loss": 1.0535, + "learning_rate": 1.967301514176579e-05, + "loss": 0.9619, "step": 3853 }, { - "epoch": 0.10936435868331441, + "epoch": 0.1092125024795262, "grad_norm": 0.0, - "learning_rate": 1.9671634203086784e-05, - "loss": 1.1032, + "learning_rate": 1.9672782322852073e-05, + "loss": 1.0927, "step": 3854 }, { - "epoch": 0.1093927355278093, + "epoch": 0.10924083992178867, "grad_norm": 0.0, - "learning_rate": 1.9671400573673104e-05, - "loss": 1.0309, + "learning_rate": 1.967254942246092e-05, + "loss": 1.03, "step": 3855 }, { - "epoch": 0.1094211123723042, + "epoch": 0.10926917736405112, "grad_norm": 0.0, - "learning_rate": 1.9671166862564668e-05, - "loss": 1.0415, + "learning_rate": 1.967231644059429e-05, + "loss": 0.9898, "step": 3856 }, { - "epoch": 0.10944948921679909, + "epoch": 0.10929751480631358, "grad_norm": 0.0, - "learning_rate": 1.967093306976345e-05, - "loss": 0.9711, + "learning_rate": 1.9672083377254144e-05, + "loss": 1.0674, "step": 3857 }, { - "epoch": 0.10947786606129399, + "epoch": 0.10932585224857605, "grad_norm": 0.0, - "learning_rate": 1.967069919527143e-05, - "loss": 0.9654, + "learning_rate": 1.9671850232442453e-05, + "loss": 1.0373, "step": 3858 }, { - "epoch": 0.10950624290578888, + "epoch": 0.1093541896908385, "grad_norm": 0.0, - "learning_rate": 1.9670465239090576e-05, - "loss": 1.0178, + "learning_rate": 1.967161700616117e-05, + "loss": 0.9546, "step": 3859 }, { - "epoch": 0.10953461975028377, + "epoch": 0.10938252713310097, "grad_norm": 0.0, - "learning_rate": 1.9670231201222866e-05, - "loss": 0.9866, + "learning_rate": 1.9671383698412275e-05, + "loss": 1.1603, "step": 3860 }, { - "epoch": 0.10956299659477867, + "epoch": 0.10941086457536343, "grad_norm": 0.0, - "learning_rate": 1.966999708167028e-05, - "loss": 1.0566, + "learning_rate": 1.9671150309197714e-05, + "loss": 0.9996, "step": 3861 }, { - "epoch": 0.10959137343927355, + "epoch": 0.10943920201762589, "grad_norm": 0.0, - "learning_rate": 1.9669762880434796e-05, - "loss": 1.0374, + "learning_rate": 1.967091683851947e-05, + "loss": 0.9416, "step": 3862 }, { - "epoch": 0.10961975028376844, + "epoch": 0.10946753945988835, "grad_norm": 0.0, - "learning_rate": 1.9669528597518388e-05, - "loss": 1.0616, + "learning_rate": 1.9670683286379496e-05, + "loss": 1.0798, "step": 3863 }, { - "epoch": 0.10964812712826334, + "epoch": 0.10949587690215082, "grad_norm": 0.0, - "learning_rate": 1.9669294232923034e-05, - "loss": 1.055, + "learning_rate": 1.967044965277977e-05, + "loss": 0.9496, "step": 3864 }, { - "epoch": 0.10967650397275823, + "epoch": 0.10952421434441327, "grad_norm": 0.0, - "learning_rate": 1.9669059786650722e-05, - "loss": 1.0702, + "learning_rate": 1.9670215937722256e-05, + "loss": 0.9979, "step": 3865 }, { - "epoch": 0.10970488081725312, + "epoch": 0.10955255178667574, "grad_norm": 0.0, - "learning_rate": 1.9668825258703424e-05, - "loss": 1.0611, + "learning_rate": 1.9669982141208917e-05, + "loss": 1.0492, "step": 3866 }, { - "epoch": 0.10973325766174802, + "epoch": 0.1095808892289382, "grad_norm": 0.0, - "learning_rate": 1.966859064908313e-05, - "loss": 1.1299, + "learning_rate": 1.9669748263241733e-05, + "loss": 1.1324, "step": 3867 }, { - "epoch": 0.1097616345062429, + "epoch": 0.10960922667120065, "grad_norm": 0.0, - "learning_rate": 1.966835595779181e-05, - "loss": 1.097, + "learning_rate": 1.9669514303822665e-05, + "loss": 1.0161, "step": 3868 }, { - "epoch": 0.10979001135073779, + "epoch": 0.10963756411346312, "grad_norm": 0.0, - "learning_rate": 1.9668121184831456e-05, - "loss": 1.092, + "learning_rate": 1.966928026295369e-05, + "loss": 1.1542, "step": 3869 }, { - "epoch": 0.1098183881952327, + "epoch": 0.10966590155572559, "grad_norm": 0.0, - "learning_rate": 1.9667886330204045e-05, - "loss": 1.0119, + "learning_rate": 1.9669046140636773e-05, + "loss": 1.1465, "step": 3870 }, { - "epoch": 0.10984676503972758, + "epoch": 0.10969423899798804, "grad_norm": 0.0, - "learning_rate": 1.9667651393911565e-05, - "loss": 0.998, + "learning_rate": 1.9668811936873894e-05, + "loss": 1.0824, "step": 3871 }, { - "epoch": 0.10987514188422247, + "epoch": 0.1097225764402505, "grad_norm": 0.0, - "learning_rate": 1.9667416375956e-05, - "loss": 1.0754, + "learning_rate": 1.966857765166702e-05, + "loss": 1.0033, "step": 3872 }, { - "epoch": 0.10990351872871737, + "epoch": 0.10975091388251297, "grad_norm": 0.0, - "learning_rate": 1.966718127633933e-05, - "loss": 1.1221, + "learning_rate": 1.9668343285018127e-05, + "loss": 1.0851, "step": 3873 }, { - "epoch": 0.10993189557321226, + "epoch": 0.10977925132477542, "grad_norm": 0.0, - "learning_rate": 1.9666946095063553e-05, - "loss": 1.0263, + "learning_rate": 1.9668108836929187e-05, + "loss": 1.071, "step": 3874 }, { - "epoch": 0.10996027241770714, + "epoch": 0.10980758876703789, "grad_norm": 0.0, - "learning_rate": 1.966671083213064e-05, - "loss": 1.0847, + "learning_rate": 1.9667874307402176e-05, + "loss": 1.2281, "step": 3875 }, { - "epoch": 0.10998864926220205, + "epoch": 0.10983592620930036, "grad_norm": 0.0, - "learning_rate": 1.966647548754259e-05, - "loss": 1.1119, + "learning_rate": 1.966763969643907e-05, + "loss": 1.0918, "step": 3876 }, { - "epoch": 0.11001702610669693, + "epoch": 0.10986426365156281, "grad_norm": 0.0, - "learning_rate": 1.9666240061301388e-05, - "loss": 1.0319, + "learning_rate": 1.9667405004041846e-05, + "loss": 1.1279, "step": 3877 }, { - "epoch": 0.11004540295119183, + "epoch": 0.10989260109382527, "grad_norm": 0.0, - "learning_rate": 1.966600455340902e-05, - "loss": 0.9365, + "learning_rate": 1.9667170230212478e-05, + "loss": 1.0705, "step": 3878 }, { - "epoch": 0.11007377979568672, + "epoch": 0.10992093853608774, "grad_norm": 0.0, - "learning_rate": 1.9665768963867474e-05, - "loss": 1.1236, + "learning_rate": 1.9666935374952946e-05, + "loss": 0.917, "step": 3879 }, { - "epoch": 0.11010215664018161, + "epoch": 0.10994927597835019, "grad_norm": 0.0, - "learning_rate": 1.966553329267875e-05, - "loss": 0.9409, + "learning_rate": 1.9666700438265227e-05, + "loss": 1.06, "step": 3880 }, { - "epoch": 0.11013053348467651, + "epoch": 0.10997761342061266, "grad_norm": 0.0, - "learning_rate": 1.9665297539844826e-05, - "loss": 1.067, + "learning_rate": 1.9666465420151303e-05, + "loss": 1.075, "step": 3881 }, { - "epoch": 0.1101589103291714, + "epoch": 0.11000595086287512, "grad_norm": 0.0, - "learning_rate": 1.96650617053677e-05, - "loss": 1.0491, + "learning_rate": 1.9666230320613148e-05, + "loss": 1.0129, "step": 3882 }, { - "epoch": 0.11018728717366628, + "epoch": 0.11003428830513758, "grad_norm": 0.0, - "learning_rate": 1.9664825789249366e-05, - "loss": 0.9978, + "learning_rate": 1.966599513965275e-05, + "loss": 0.9809, "step": 3883 }, { - "epoch": 0.11021566401816119, + "epoch": 0.11006262574740004, "grad_norm": 0.0, - "learning_rate": 1.9664589791491812e-05, - "loss": 1.0654, + "learning_rate": 1.966575987727208e-05, + "loss": 1.0945, "step": 3884 }, { - "epoch": 0.11024404086265607, + "epoch": 0.11009096318966251, "grad_norm": 0.0, - "learning_rate": 1.9664353712097033e-05, - "loss": 0.9473, + "learning_rate": 1.966552453347313e-05, + "loss": 1.1167, "step": 3885 }, { - "epoch": 0.11027241770715096, + "epoch": 0.11011930063192496, "grad_norm": 0.0, - "learning_rate": 1.9664117551067025e-05, - "loss": 1.0544, + "learning_rate": 1.966528910825787e-05, + "loss": 1.0105, "step": 3886 }, { - "epoch": 0.11030079455164586, + "epoch": 0.11014763807418743, "grad_norm": 0.0, - "learning_rate": 1.9663881308403786e-05, - "loss": 0.9942, + "learning_rate": 1.96650536016283e-05, + "loss": 0.9327, "step": 3887 }, { - "epoch": 0.11032917139614075, + "epoch": 0.11017597551644988, "grad_norm": 0.0, - "learning_rate": 1.9663644984109302e-05, - "loss": 1.1034, + "learning_rate": 1.966481801358639e-05, + "loss": 1.0777, "step": 3888 }, { - "epoch": 0.11035754824063564, + "epoch": 0.11020431295871234, "grad_norm": 0.0, - "learning_rate": 1.9663408578185575e-05, - "loss": 1.0482, + "learning_rate": 1.966458234413413e-05, + "loss": 1.1112, "step": 3889 }, { - "epoch": 0.11038592508513054, + "epoch": 0.11023265040097481, "grad_norm": 0.0, - "learning_rate": 1.96631720906346e-05, - "loss": 1.0196, + "learning_rate": 1.96643465932735e-05, + "loss": 1.0327, "step": 3890 }, { - "epoch": 0.11041430192962542, + "epoch": 0.11026098784323726, "grad_norm": 0.0, - "learning_rate": 1.966293552145838e-05, - "loss": 1.0534, + "learning_rate": 1.9664110761006497e-05, + "loss": 1.0424, "step": 3891 }, { - "epoch": 0.11044267877412031, + "epoch": 0.11028932528549973, "grad_norm": 0.0, - "learning_rate": 1.9662698870658904e-05, - "loss": 1.1731, + "learning_rate": 1.9663874847335096e-05, + "loss": 1.0366, "step": 3892 }, { - "epoch": 0.11047105561861521, + "epoch": 0.1103176627277622, "grad_norm": 0.0, - "learning_rate": 1.966246213823818e-05, - "loss": 1.0567, + "learning_rate": 1.966363885226129e-05, + "loss": 0.9711, "step": 3893 }, { - "epoch": 0.1104994324631101, + "epoch": 0.11034600017002465, "grad_norm": 0.0, - "learning_rate": 1.96622253241982e-05, - "loss": 1.0325, + "learning_rate": 1.9663402775787066e-05, + "loss": 1.115, "step": 3894 }, { - "epoch": 0.11052780930760499, + "epoch": 0.11037433761228711, "grad_norm": 0.0, - "learning_rate": 1.966198842854097e-05, - "loss": 1.0061, + "learning_rate": 1.966316661791441e-05, + "loss": 1.0555, "step": 3895 }, { - "epoch": 0.11055618615209989, + "epoch": 0.11040267505454958, "grad_norm": 0.0, - "learning_rate": 1.9661751451268493e-05, - "loss": 1.1212, + "learning_rate": 1.9662930378645313e-05, + "loss": 1.1126, "step": 3896 }, { - "epoch": 0.11058456299659478, + "epoch": 0.11043101249681203, "grad_norm": 0.0, - "learning_rate": 1.9661514392382764e-05, - "loss": 1.04, + "learning_rate": 1.9662694057981768e-05, + "loss": 0.9509, "step": 3897 }, { - "epoch": 0.11061293984108968, + "epoch": 0.1104593499390745, "grad_norm": 0.0, - "learning_rate": 1.966127725188579e-05, - "loss": 1.0472, + "learning_rate": 1.966245765592576e-05, + "loss": 1.0611, "step": 3898 }, { - "epoch": 0.11064131668558456, + "epoch": 0.11048768738133696, "grad_norm": 0.0, - "learning_rate": 1.966104002977957e-05, - "loss": 1.1032, + "learning_rate": 1.9662221172479287e-05, + "loss": 1.0869, "step": 3899 }, { - "epoch": 0.11066969353007945, + "epoch": 0.11051602482359942, "grad_norm": 0.0, - "learning_rate": 1.9660802726066114e-05, - "loss": 1.1075, + "learning_rate": 1.9661984607644332e-05, + "loss": 1.1232, "step": 3900 }, { - "epoch": 0.11069807037457435, + "epoch": 0.11054436226586188, "grad_norm": 0.0, - "learning_rate": 1.9660565340747423e-05, - "loss": 0.9967, + "learning_rate": 1.9661747961422894e-05, + "loss": 1.0048, "step": 3901 }, { - "epoch": 0.11072644721906924, + "epoch": 0.11057269970812435, "grad_norm": 0.0, - "learning_rate": 1.9660327873825503e-05, - "loss": 1.0114, + "learning_rate": 1.9661511233816965e-05, + "loss": 1.0612, "step": 3902 }, { - "epoch": 0.11075482406356413, + "epoch": 0.1106010371503868, "grad_norm": 0.0, - "learning_rate": 1.9660090325302357e-05, - "loss": 1.0151, + "learning_rate": 1.966127442482854e-05, + "loss": 1.0187, "step": 3903 }, { - "epoch": 0.11078320090805903, + "epoch": 0.11062937459264927, "grad_norm": 0.0, - "learning_rate": 1.9659852695179994e-05, - "loss": 1.1564, + "learning_rate": 1.9661037534459614e-05, + "loss": 0.9816, "step": 3904 }, { - "epoch": 0.11081157775255392, + "epoch": 0.11065771203491173, "grad_norm": 0.0, - "learning_rate": 1.965961498346042e-05, - "loss": 1.0523, + "learning_rate": 1.9660800562712183e-05, + "loss": 0.9462, "step": 3905 }, { - "epoch": 0.1108399545970488, + "epoch": 0.11068604947717418, "grad_norm": 0.0, - "learning_rate": 1.9659377190145646e-05, - "loss": 1.0898, + "learning_rate": 1.9660563509588236e-05, + "loss": 0.9537, "step": 3906 }, { - "epoch": 0.1108683314415437, + "epoch": 0.11071438691943665, "grad_norm": 0.0, - "learning_rate": 1.9659139315237677e-05, - "loss": 1.1128, + "learning_rate": 1.966032637508978e-05, + "loss": 1.1274, "step": 3907 }, { - "epoch": 0.11089670828603859, + "epoch": 0.11074272436169912, "grad_norm": 0.0, - "learning_rate": 1.9658901358738526e-05, - "loss": 1.0497, + "learning_rate": 1.9660089159218807e-05, + "loss": 1.0406, "step": 3908 }, { - "epoch": 0.11092508513053348, + "epoch": 0.11077106180396157, "grad_norm": 0.0, - "learning_rate": 1.9658663320650198e-05, - "loss": 1.0126, + "learning_rate": 1.9659851861977316e-05, + "loss": 1.1923, "step": 3909 }, { - "epoch": 0.11095346197502838, + "epoch": 0.11079939924622403, "grad_norm": 0.0, - "learning_rate": 1.9658425200974708e-05, - "loss": 1.0396, + "learning_rate": 1.9659614483367302e-05, + "loss": 1.0758, "step": 3910 }, { - "epoch": 0.11098183881952327, + "epoch": 0.1108277366884865, "grad_norm": 0.0, - "learning_rate": 1.9658186999714067e-05, - "loss": 0.9486, + "learning_rate": 1.965937702339077e-05, + "loss": 1.1283, "step": 3911 }, { - "epoch": 0.11101021566401816, + "epoch": 0.11085607413074895, "grad_norm": 0.0, - "learning_rate": 1.9657948716870287e-05, - "loss": 0.9679, + "learning_rate": 1.965913948204972e-05, + "loss": 1.0097, "step": 3912 }, { - "epoch": 0.11103859250851306, + "epoch": 0.11088441157301142, "grad_norm": 0.0, - "learning_rate": 1.965771035244538e-05, - "loss": 1.0301, + "learning_rate": 1.965890185934615e-05, + "loss": 0.9638, "step": 3913 }, { - "epoch": 0.11106696935300794, + "epoch": 0.11091274901527388, "grad_norm": 0.0, - "learning_rate": 1.9657471906441358e-05, - "loss": 1.129, + "learning_rate": 1.9658664155282065e-05, + "loss": 1.0465, "step": 3914 }, { - "epoch": 0.11109534619750283, + "epoch": 0.11094108645753634, "grad_norm": 0.0, - "learning_rate": 1.9657233378860235e-05, - "loss": 1.1477, + "learning_rate": 1.965842636985946e-05, + "loss": 0.9667, "step": 3915 }, { - "epoch": 0.11112372304199773, + "epoch": 0.1109694238997988, "grad_norm": 0.0, - "learning_rate": 1.9656994769704032e-05, - "loss": 1.1165, + "learning_rate": 1.9658188503080347e-05, + "loss": 1.0955, "step": 3916 }, { - "epoch": 0.11115209988649262, + "epoch": 0.11099776134206127, "grad_norm": 0.0, - "learning_rate": 1.9656756078974758e-05, - "loss": 0.9768, + "learning_rate": 1.9657950554946724e-05, + "loss": 1.0849, "step": 3917 }, { - "epoch": 0.11118047673098752, + "epoch": 0.11102609878432372, "grad_norm": 0.0, - "learning_rate": 1.965651730667443e-05, - "loss": 1.0361, + "learning_rate": 1.9657712525460598e-05, + "loss": 1.0488, "step": 3918 }, { - "epoch": 0.11120885357548241, + "epoch": 0.11105443622658619, "grad_norm": 0.0, - "learning_rate": 1.9656278452805067e-05, - "loss": 1.0682, + "learning_rate": 1.9657474414623974e-05, + "loss": 1.0172, "step": 3919 }, { - "epoch": 0.1112372304199773, + "epoch": 0.11108277366884865, "grad_norm": 0.0, - "learning_rate": 1.9656039517368686e-05, - "loss": 1.0405, + "learning_rate": 1.9657236222438855e-05, + "loss": 0.9377, "step": 3920 }, { - "epoch": 0.1112656072644722, + "epoch": 0.1111111111111111, "grad_norm": 0.0, - "learning_rate": 1.9655800500367304e-05, - "loss": 1.0437, + "learning_rate": 1.9656997948907253e-05, + "loss": 0.962, "step": 3921 }, { - "epoch": 0.11129398410896708, + "epoch": 0.11113944855337357, "grad_norm": 0.0, - "learning_rate": 1.9655561401802945e-05, - "loss": 1.0963, + "learning_rate": 1.9656759594031168e-05, + "loss": 1.1068, "step": 3922 }, { - "epoch": 0.11132236095346197, + "epoch": 0.11116778599563604, "grad_norm": 0.0, - "learning_rate": 1.965532222167762e-05, - "loss": 1.0634, + "learning_rate": 1.965652115781261e-05, + "loss": 0.9587, "step": 3923 }, { - "epoch": 0.11135073779795687, + "epoch": 0.11119612343789849, "grad_norm": 0.0, - "learning_rate": 1.9655082959993358e-05, - "loss": 0.9977, + "learning_rate": 1.965628264025359e-05, + "loss": 1.05, "step": 3924 }, { - "epoch": 0.11137911464245176, + "epoch": 0.11122446088016096, "grad_norm": 0.0, - "learning_rate": 1.965484361675217e-05, - "loss": 1.1841, + "learning_rate": 1.9656044041356116e-05, + "loss": 1.0749, "step": 3925 }, { - "epoch": 0.11140749148694665, + "epoch": 0.11125279832242342, "grad_norm": 0.0, - "learning_rate": 1.965460419195609e-05, - "loss": 1.1149, + "learning_rate": 1.9655805361122197e-05, + "loss": 1.1262, "step": 3926 }, { - "epoch": 0.11143586833144155, + "epoch": 0.11128113576468587, "grad_norm": 0.0, - "learning_rate": 1.9654364685607134e-05, - "loss": 1.0471, + "learning_rate": 1.9655566599553846e-05, + "loss": 1.0028, "step": 3927 }, { - "epoch": 0.11146424517593644, + "epoch": 0.11130947320694834, "grad_norm": 0.0, - "learning_rate": 1.9654125097707324e-05, - "loss": 1.13, + "learning_rate": 1.965532775665307e-05, + "loss": 1.085, "step": 3928 }, { - "epoch": 0.11149262202043132, + "epoch": 0.1113378106492108, "grad_norm": 0.0, - "learning_rate": 1.9653885428258686e-05, - "loss": 0.9367, + "learning_rate": 1.965508883242188e-05, + "loss": 1.1808, "step": 3929 }, { - "epoch": 0.11152099886492622, + "epoch": 0.11136614809147326, "grad_norm": 0.0, - "learning_rate": 1.9653645677263246e-05, - "loss": 1.0815, + "learning_rate": 1.9654849826862295e-05, + "loss": 0.9226, "step": 3930 }, { - "epoch": 0.11154937570942111, + "epoch": 0.11139448553373572, "grad_norm": 0.0, - "learning_rate": 1.965340584472302e-05, - "loss": 1.1346, + "learning_rate": 1.9654610739976325e-05, + "loss": 1.0503, "step": 3931 }, { - "epoch": 0.111577752553916, + "epoch": 0.11142282297599819, "grad_norm": 0.0, - "learning_rate": 1.9653165930640047e-05, - "loss": 1.0823, + "learning_rate": 1.9654371571765983e-05, + "loss": 1.0806, "step": 3932 }, { - "epoch": 0.1116061293984109, + "epoch": 0.11145116041826064, "grad_norm": 0.0, - "learning_rate": 1.9652925935016344e-05, - "loss": 1.0073, + "learning_rate": 1.965413232223328e-05, + "loss": 1.0605, "step": 3933 }, { - "epoch": 0.11163450624290579, + "epoch": 0.11147949786052311, "grad_norm": 0.0, - "learning_rate": 1.9652685857853947e-05, - "loss": 1.0493, + "learning_rate": 1.965389299138024e-05, + "loss": 1.0646, "step": 3934 }, { - "epoch": 0.11166288308740067, + "epoch": 0.11150783530278557, "grad_norm": 0.0, - "learning_rate": 1.9652445699154874e-05, - "loss": 1.0998, + "learning_rate": 1.9653653579208877e-05, + "loss": 0.9535, "step": 3935 }, { - "epoch": 0.11169125993189558, + "epoch": 0.11153617274504803, "grad_norm": 0.0, - "learning_rate": 1.965220545892116e-05, - "loss": 1.0231, + "learning_rate": 1.96534140857212e-05, + "loss": 1.1627, "step": 3936 }, { - "epoch": 0.11171963677639046, + "epoch": 0.11156451018731049, "grad_norm": 0.0, - "learning_rate": 1.9651965137154833e-05, - "loss": 0.953, + "learning_rate": 1.9653174510919234e-05, + "loss": 1.0692, "step": 3937 }, { - "epoch": 0.11174801362088536, + "epoch": 0.11159284762957296, "grad_norm": 0.0, - "learning_rate": 1.965172473385792e-05, - "loss": 1.0634, + "learning_rate": 1.965293485480499e-05, + "loss": 1.0664, "step": 3938 }, { - "epoch": 0.11177639046538025, + "epoch": 0.11162118507183541, "grad_norm": 0.0, - "learning_rate": 1.9651484249032456e-05, - "loss": 1.0216, + "learning_rate": 1.9652695117380496e-05, + "loss": 1.1005, "step": 3939 }, { - "epoch": 0.11180476730987514, + "epoch": 0.11164952251409788, "grad_norm": 0.0, - "learning_rate": 1.965124368268047e-05, - "loss": 0.9788, + "learning_rate": 1.9652455298647766e-05, + "loss": 1.0341, "step": 3940 }, { - "epoch": 0.11183314415437004, + "epoch": 0.11167785995636034, "grad_norm": 0.0, - "learning_rate": 1.9651003034803998e-05, - "loss": 1.0262, + "learning_rate": 1.9652215398608818e-05, + "loss": 1.1476, "step": 3941 }, { - "epoch": 0.11186152099886493, + "epoch": 0.1117061973986228, "grad_norm": 0.0, - "learning_rate": 1.9650762305405065e-05, - "loss": 1.0673, + "learning_rate": 1.9651975417265678e-05, + "loss": 1.0666, "step": 3942 }, { - "epoch": 0.11188989784335981, + "epoch": 0.11173453484088526, "grad_norm": 0.0, - "learning_rate": 1.965052149448571e-05, - "loss": 0.9691, + "learning_rate": 1.965173535462036e-05, + "loss": 0.9252, "step": 3943 }, { - "epoch": 0.11191827468785472, + "epoch": 0.11176287228314773, "grad_norm": 0.0, - "learning_rate": 1.9650280602047965e-05, - "loss": 0.9687, + "learning_rate": 1.96514952106749e-05, + "loss": 0.9138, "step": 3944 }, { - "epoch": 0.1119466515323496, + "epoch": 0.11179120972541018, "grad_norm": 0.0, - "learning_rate": 1.965003962809387e-05, - "loss": 1.0421, + "learning_rate": 1.9651254985431304e-05, + "loss": 1.1385, "step": 3945 }, { - "epoch": 0.11197502837684449, + "epoch": 0.11181954716767264, "grad_norm": 0.0, - "learning_rate": 1.9649798572625457e-05, - "loss": 1.1017, + "learning_rate": 1.965101467889161e-05, + "loss": 1.0026, "step": 3946 }, { - "epoch": 0.11200340522133939, + "epoch": 0.11184788460993511, "grad_norm": 0.0, - "learning_rate": 1.9649557435644757e-05, - "loss": 1.0643, + "learning_rate": 1.965077429105783e-05, + "loss": 1.0312, "step": 3947 }, { - "epoch": 0.11203178206583428, + "epoch": 0.11187622205219756, "grad_norm": 0.0, - "learning_rate": 1.9649316217153812e-05, - "loss": 1.0882, + "learning_rate": 1.9650533821931998e-05, + "loss": 1.0749, "step": 3948 }, { - "epoch": 0.11206015891032917, + "epoch": 0.11190455949446003, "grad_norm": 0.0, - "learning_rate": 1.964907491715466e-05, - "loss": 0.9493, + "learning_rate": 1.9650293271516135e-05, + "loss": 1.076, "step": 3949 }, { - "epoch": 0.11208853575482407, + "epoch": 0.1119328969367225, "grad_norm": 0.0, - "learning_rate": 1.9648833535649342e-05, - "loss": 0.9595, + "learning_rate": 1.9650052639812268e-05, + "loss": 1.1203, "step": 3950 }, { - "epoch": 0.11211691259931895, + "epoch": 0.11196123437898495, "grad_norm": 0.0, - "learning_rate": 1.964859207263989e-05, - "loss": 1.0338, + "learning_rate": 1.964981192682242e-05, + "loss": 1.1374, "step": 3951 }, { - "epoch": 0.11214528944381384, + "epoch": 0.11198957182124741, "grad_norm": 0.0, - "learning_rate": 1.9648350528128346e-05, - "loss": 1.0324, + "learning_rate": 1.964957113254863e-05, + "loss": 1.1822, "step": 3952 }, { - "epoch": 0.11217366628830874, + "epoch": 0.11201790926350988, "grad_norm": 0.0, - "learning_rate": 1.9648108902116754e-05, - "loss": 1.0728, + "learning_rate": 1.9649330256992917e-05, + "loss": 1.0614, "step": 3953 }, { - "epoch": 0.11220204313280363, + "epoch": 0.11204624670577233, "grad_norm": 0.0, - "learning_rate": 1.964786719460715e-05, - "loss": 1.0371, + "learning_rate": 1.9649089300157307e-05, + "loss": 1.0721, "step": 3954 }, { - "epoch": 0.11223041997729852, + "epoch": 0.1120745841480348, "grad_norm": 0.0, - "learning_rate": 1.964762540560158e-05, - "loss": 1.0735, + "learning_rate": 1.9648848262043837e-05, + "loss": 1.0522, "step": 3955 }, { - "epoch": 0.11225879682179342, + "epoch": 0.11210292159029726, "grad_norm": 0.0, - "learning_rate": 1.9647383535102085e-05, - "loss": 1.0755, + "learning_rate": 1.9648607142654537e-05, + "loss": 1.139, "step": 3956 }, { - "epoch": 0.1122871736662883, + "epoch": 0.11213125903255972, "grad_norm": 0.0, - "learning_rate": 1.9647141583110704e-05, - "loss": 0.9772, + "learning_rate": 1.964836594199144e-05, + "loss": 0.9546, "step": 3957 }, { - "epoch": 0.11231555051078321, + "epoch": 0.11215959647482218, "grad_norm": 0.0, - "learning_rate": 1.964689954962949e-05, - "loss": 0.9538, + "learning_rate": 1.964812466005657e-05, + "loss": 1.0715, "step": 3958 }, { - "epoch": 0.1123439273552781, + "epoch": 0.11218793391708465, "grad_norm": 0.0, - "learning_rate": 1.9646657434660478e-05, - "loss": 1.0361, + "learning_rate": 1.964788329685196e-05, + "loss": 0.953, "step": 3959 }, { - "epoch": 0.11237230419977298, + "epoch": 0.1122162713593471, "grad_norm": 0.0, - "learning_rate": 1.964641523820572e-05, - "loss": 0.989, + "learning_rate": 1.964764185237965e-05, + "loss": 1.2307, "step": 3960 }, { - "epoch": 0.11240068104426788, + "epoch": 0.11224460880160957, "grad_norm": 0.0, - "learning_rate": 1.9646172960267254e-05, - "loss": 0.9778, + "learning_rate": 1.964740032664167e-05, + "loss": 1.0446, "step": 3961 }, { - "epoch": 0.11242905788876277, + "epoch": 0.11227294624387203, "grad_norm": 0.0, - "learning_rate": 1.9645930600847134e-05, - "loss": 1.1116, + "learning_rate": 1.964715871964005e-05, + "loss": 1.1701, "step": 3962 }, { - "epoch": 0.11245743473325766, + "epoch": 0.11230128368613448, "grad_norm": 0.0, - "learning_rate": 1.96456881599474e-05, - "loss": 1.0504, + "learning_rate": 1.9646917031376834e-05, + "loss": 0.9512, "step": 3963 }, { - "epoch": 0.11248581157775256, + "epoch": 0.11232962112839695, "grad_norm": 0.0, - "learning_rate": 1.9645445637570112e-05, - "loss": 1.0173, + "learning_rate": 1.9646675261854053e-05, + "loss": 0.972, "step": 3964 }, { - "epoch": 0.11251418842224745, + "epoch": 0.11235795857065942, "grad_norm": 0.0, - "learning_rate": 1.964520303371731e-05, - "loss": 1.1243, + "learning_rate": 1.9646433411073745e-05, + "loss": 1.1536, "step": 3965 }, { - "epoch": 0.11254256526674233, + "epoch": 0.11238629601292187, "grad_norm": 0.0, - "learning_rate": 1.964496034839104e-05, - "loss": 1.0309, + "learning_rate": 1.9646191479037946e-05, + "loss": 1.1202, "step": 3966 }, { - "epoch": 0.11257094211123723, + "epoch": 0.11241463345518433, "grad_norm": 0.0, - "learning_rate": 1.964471758159336e-05, - "loss": 1.0316, + "learning_rate": 1.964594946574869e-05, + "loss": 1.0866, "step": 3967 }, { - "epoch": 0.11259931895573212, + "epoch": 0.1124429708974468, "grad_norm": 0.0, - "learning_rate": 1.9644474733326316e-05, - "loss": 0.9843, + "learning_rate": 1.9645707371208025e-05, + "loss": 1.0669, "step": 3968 }, { - "epoch": 0.11262769580022701, + "epoch": 0.11247130833970925, "grad_norm": 0.0, - "learning_rate": 1.9644231803591964e-05, - "loss": 1.0639, + "learning_rate": 1.9645465195417986e-05, + "loss": 1.0544, "step": 3969 }, { - "epoch": 0.11265607264472191, + "epoch": 0.11249964578197172, "grad_norm": 0.0, - "learning_rate": 1.9643988792392347e-05, - "loss": 1.0083, + "learning_rate": 1.964522293838061e-05, + "loss": 1.0602, "step": 3970 }, { - "epoch": 0.1126844494892168, + "epoch": 0.11252798322423418, "grad_norm": 0.0, - "learning_rate": 1.9643745699729523e-05, - "loss": 1.0775, + "learning_rate": 1.9644980600097935e-05, + "loss": 1.0404, "step": 3971 }, { - "epoch": 0.11271282633371169, + "epoch": 0.11255632066649664, "grad_norm": 0.0, - "learning_rate": 1.9643502525605548e-05, - "loss": 1.0143, + "learning_rate": 1.964473818057201e-05, + "loss": 1.0007, "step": 3972 }, { - "epoch": 0.11274120317820659, + "epoch": 0.1125846581087591, "grad_norm": 0.0, - "learning_rate": 1.9643259270022472e-05, - "loss": 1.0037, + "learning_rate": 1.9644495679804873e-05, + "loss": 0.9317, "step": 3973 }, { - "epoch": 0.11276958002270147, + "epoch": 0.11261299555102157, "grad_norm": 0.0, - "learning_rate": 1.9643015932982355e-05, - "loss": 1.1753, + "learning_rate": 1.9644253097798572e-05, + "loss": 1.1327, "step": 3974 }, { - "epoch": 0.11279795686719636, + "epoch": 0.11264133299328402, "grad_norm": 0.0, - "learning_rate": 1.9642772514487244e-05, - "loss": 1.1593, + "learning_rate": 1.964401043455514e-05, + "loss": 1.058, "step": 3975 }, { - "epoch": 0.11282633371169126, + "epoch": 0.11266967043554649, "grad_norm": 0.0, - "learning_rate": 1.96425290145392e-05, - "loss": 1.0141, + "learning_rate": 1.964376769007663e-05, + "loss": 1.0316, "step": 3976 }, { - "epoch": 0.11285471055618615, + "epoch": 0.11269800787780895, "grad_norm": 0.0, - "learning_rate": 1.9642285433140285e-05, - "loss": 0.9432, + "learning_rate": 1.9643524864365086e-05, + "loss": 1.1106, "step": 3977 }, { - "epoch": 0.11288308740068105, + "epoch": 0.1127263453200714, "grad_norm": 0.0, - "learning_rate": 1.964204177029255e-05, - "loss": 1.0115, + "learning_rate": 1.9643281957422547e-05, + "loss": 1.1522, "step": 3978 }, { - "epoch": 0.11291146424517594, + "epoch": 0.11275468276233387, "grad_norm": 0.0, - "learning_rate": 1.9641798025998057e-05, - "loss": 1.08, + "learning_rate": 1.9643038969251066e-05, + "loss": 1.01, "step": 3979 }, { - "epoch": 0.11293984108967083, + "epoch": 0.11278302020459634, "grad_norm": 0.0, - "learning_rate": 1.9641554200258856e-05, - "loss": 1.0492, + "learning_rate": 1.9642795899852682e-05, + "loss": 1.0763, "step": 3980 }, { - "epoch": 0.11296821793416573, + "epoch": 0.11281135764685879, "grad_norm": 0.0, - "learning_rate": 1.9641310293077017e-05, - "loss": 0.9889, + "learning_rate": 1.964255274922945e-05, + "loss": 1.0755, "step": 3981 }, { - "epoch": 0.11299659477866061, + "epoch": 0.11283969508912126, "grad_norm": 0.0, - "learning_rate": 1.9641066304454596e-05, - "loss": 0.8692, + "learning_rate": 1.964230951738342e-05, + "loss": 0.9864, "step": 3982 }, { - "epoch": 0.1130249716231555, + "epoch": 0.11286803253138372, "grad_norm": 0.0, - "learning_rate": 1.9640822234393654e-05, - "loss": 0.9804, + "learning_rate": 1.964206620431663e-05, + "loss": 1.131, "step": 3983 }, { - "epoch": 0.1130533484676504, + "epoch": 0.11289636997364617, "grad_norm": 0.0, - "learning_rate": 1.9640578082896254e-05, - "loss": 0.9531, + "learning_rate": 1.9641822810031135e-05, + "loss": 1.0988, "step": 3984 }, { - "epoch": 0.11308172531214529, + "epoch": 0.11292470741590864, "grad_norm": 0.0, - "learning_rate": 1.9640333849964456e-05, - "loss": 0.9972, + "learning_rate": 1.9641579334528992e-05, + "loss": 1.1697, "step": 3985 }, { - "epoch": 0.11311010215664018, + "epoch": 0.1129530448581711, "grad_norm": 0.0, - "learning_rate": 1.9640089535600327e-05, - "loss": 1.0086, + "learning_rate": 1.9641335777812243e-05, + "loss": 1.0087, "step": 3986 }, { - "epoch": 0.11313847900113508, + "epoch": 0.11298138230043356, "grad_norm": 0.0, - "learning_rate": 1.963984513980593e-05, - "loss": 1.0038, + "learning_rate": 1.9641092139882943e-05, + "loss": 1.1896, "step": 3987 }, { - "epoch": 0.11316685584562997, + "epoch": 0.11300971974269602, "grad_norm": 0.0, - "learning_rate": 1.9639600662583325e-05, - "loss": 1.1813, + "learning_rate": 1.9640848420743143e-05, + "loss": 1.1455, "step": 3988 }, { - "epoch": 0.11319523269012485, + "epoch": 0.11303805718495849, "grad_norm": 0.0, - "learning_rate": 1.9639356103934578e-05, - "loss": 1.0617, + "learning_rate": 1.96406046203949e-05, + "loss": 1.1293, "step": 3989 }, { - "epoch": 0.11322360953461975, + "epoch": 0.11306639462722094, "grad_norm": 0.0, - "learning_rate": 1.963911146386176e-05, - "loss": 1.0091, + "learning_rate": 1.964036073884026e-05, + "loss": 1.0776, "step": 3990 }, { - "epoch": 0.11325198637911464, + "epoch": 0.11309473206948341, "grad_norm": 0.0, - "learning_rate": 1.9638866742366932e-05, - "loss": 1.0696, + "learning_rate": 1.9640116776081282e-05, + "loss": 1.1054, "step": 3991 }, { - "epoch": 0.11328036322360953, + "epoch": 0.11312306951174587, "grad_norm": 0.0, - "learning_rate": 1.9638621939452166e-05, - "loss": 1.0374, + "learning_rate": 1.963987273212002e-05, + "loss": 1.1144, "step": 3992 }, { - "epoch": 0.11330874006810443, + "epoch": 0.11315140695400833, "grad_norm": 0.0, - "learning_rate": 1.9638377055119522e-05, - "loss": 1.0124, + "learning_rate": 1.9639628606958535e-05, + "loss": 1.0769, "step": 3993 }, { - "epoch": 0.11333711691259932, + "epoch": 0.11317974439627079, "grad_norm": 0.0, - "learning_rate": 1.9638132089371078e-05, - "loss": 1.0608, + "learning_rate": 1.9639384400598876e-05, + "loss": 0.9577, "step": 3994 }, { - "epoch": 0.1133654937570942, + "epoch": 0.11320808183853326, "grad_norm": 0.0, - "learning_rate": 1.96378870422089e-05, - "loss": 0.9642, + "learning_rate": 1.9639140113043102e-05, + "loss": 1.0865, "step": 3995 }, { - "epoch": 0.1133938706015891, + "epoch": 0.11323641928079571, "grad_norm": 0.0, - "learning_rate": 1.9637641913635055e-05, - "loss": 1.0489, + "learning_rate": 1.963889574429327e-05, + "loss": 1.0339, "step": 3996 }, { - "epoch": 0.11342224744608399, + "epoch": 0.11326475672305818, "grad_norm": 0.0, - "learning_rate": 1.9637396703651616e-05, - "loss": 0.9766, + "learning_rate": 1.9638651294351442e-05, + "loss": 1.1285, "step": 3997 }, { - "epoch": 0.1134506242905789, + "epoch": 0.11329309416532064, "grad_norm": 0.0, - "learning_rate": 1.9637151412260654e-05, - "loss": 1.0166, + "learning_rate": 1.963840676321968e-05, + "loss": 1.0587, "step": 3998 }, { - "epoch": 0.11347900113507378, + "epoch": 0.1133214316075831, "grad_norm": 0.0, - "learning_rate": 1.963690603946424e-05, - "loss": 0.9995, + "learning_rate": 1.9638162150900028e-05, + "loss": 1.1453, "step": 3999 }, { - "epoch": 0.11350737797956867, + "epoch": 0.11334976904984556, "grad_norm": 0.0, - "learning_rate": 1.9636660585264447e-05, - "loss": 1.1224, + "learning_rate": 1.9637917457394563e-05, + "loss": 1.173, "step": 4000 }, { - "epoch": 0.11353575482406357, + "epoch": 0.11337810649210803, "grad_norm": 0.0, - "learning_rate": 1.963641504966335e-05, - "loss": 0.9715, + "learning_rate": 1.9637672682705344e-05, + "loss": 1.0817, "step": 4001 }, { - "epoch": 0.11356413166855846, + "epoch": 0.11340644393437048, "grad_norm": 0.0, - "learning_rate": 1.9636169432663022e-05, - "loss": 1.0498, + "learning_rate": 1.963742782683442e-05, + "loss": 1.0807, "step": 4002 }, { - "epoch": 0.11359250851305334, + "epoch": 0.11343478137663295, "grad_norm": 0.0, - "learning_rate": 1.9635923734265536e-05, - "loss": 0.9585, + "learning_rate": 1.963718288978387e-05, + "loss": 1.0661, "step": 4003 }, { - "epoch": 0.11362088535754825, + "epoch": 0.11346311881889541, "grad_norm": 0.0, - "learning_rate": 1.963567795447297e-05, - "loss": 0.9693, + "learning_rate": 1.9636937871555747e-05, + "loss": 1.0477, "step": 4004 }, { - "epoch": 0.11364926220204313, + "epoch": 0.11349145626115786, "grad_norm": 0.0, - "learning_rate": 1.96354320932874e-05, - "loss": 1.1066, + "learning_rate": 1.9636692772152117e-05, + "loss": 1.0582, "step": 4005 }, { - "epoch": 0.11367763904653802, + "epoch": 0.11351979370342033, "grad_norm": 0.0, - "learning_rate": 1.9635186150710898e-05, - "loss": 0.9887, + "learning_rate": 1.9636447591575047e-05, + "loss": 1.1155, "step": 4006 }, { - "epoch": 0.11370601589103292, + "epoch": 0.1135481311456828, "grad_norm": 0.0, - "learning_rate": 1.9634940126745548e-05, - "loss": 1.0702, + "learning_rate": 1.9636202329826602e-05, + "loss": 1.0857, "step": 4007 }, { - "epoch": 0.11373439273552781, + "epoch": 0.11357646858794525, "grad_norm": 0.0, - "learning_rate": 1.9634694021393427e-05, - "loss": 1.0962, + "learning_rate": 1.963595698690884e-05, + "loss": 1.0361, "step": 4008 }, { - "epoch": 0.1137627695800227, + "epoch": 0.11360480603020771, "grad_norm": 0.0, - "learning_rate": 1.9634447834656615e-05, - "loss": 0.9379, + "learning_rate": 1.963571156282384e-05, + "loss": 1.0185, "step": 4009 }, { - "epoch": 0.1137911464245176, + "epoch": 0.11363314347247018, "grad_norm": 0.0, - "learning_rate": 1.9634201566537182e-05, - "loss": 0.9187, + "learning_rate": 1.9635466057573662e-05, + "loss": 1.0672, "step": 4010 }, { - "epoch": 0.11381952326901248, + "epoch": 0.11366148091473263, "grad_norm": 0.0, - "learning_rate": 1.963395521703722e-05, - "loss": 0.98, + "learning_rate": 1.9635220471160375e-05, + "loss": 1.1373, "step": 4011 }, { - "epoch": 0.11384790011350737, + "epoch": 0.1136898183569951, "grad_norm": 0.0, - "learning_rate": 1.9633708786158803e-05, - "loss": 1.0034, + "learning_rate": 1.963497480358605e-05, + "loss": 1.0185, "step": 4012 }, { - "epoch": 0.11387627695800227, + "epoch": 0.11371815579925756, "grad_norm": 0.0, - "learning_rate": 1.9633462273904018e-05, - "loss": 1.0912, + "learning_rate": 1.9634729054852752e-05, + "loss": 1.0482, "step": 4013 }, { - "epoch": 0.11390465380249716, + "epoch": 0.11374649324152002, "grad_norm": 0.0, - "learning_rate": 1.9633215680274943e-05, - "loss": 1.1396, + "learning_rate": 1.9634483224962555e-05, + "loss": 1.0915, "step": 4014 }, { - "epoch": 0.11393303064699205, + "epoch": 0.11377483068378248, "grad_norm": 0.0, - "learning_rate": 1.963296900527366e-05, - "loss": 0.9824, + "learning_rate": 1.963423731391753e-05, + "loss": 1.0652, "step": 4015 }, { - "epoch": 0.11396140749148695, + "epoch": 0.11380316812604495, "grad_norm": 0.0, - "learning_rate": 1.9632722248902257e-05, - "loss": 1.0099, + "learning_rate": 1.963399132171974e-05, + "loss": 1.0959, "step": 4016 }, { - "epoch": 0.11398978433598184, + "epoch": 0.1138315055683074, "grad_norm": 0.0, - "learning_rate": 1.9632475411162814e-05, - "loss": 0.9951, + "learning_rate": 1.9633745248371268e-05, + "loss": 1.0526, "step": 4017 }, { - "epoch": 0.11401816118047674, + "epoch": 0.11385984301056987, "grad_norm": 0.0, - "learning_rate": 1.963222849205742e-05, - "loss": 1.1091, + "learning_rate": 1.9633499093874183e-05, + "loss": 1.0917, "step": 4018 }, { - "epoch": 0.11404653802497162, + "epoch": 0.11388818045283233, "grad_norm": 0.0, - "learning_rate": 1.963198149158816e-05, - "loss": 1.0598, + "learning_rate": 1.9633252858230553e-05, + "loss": 0.9306, "step": 4019 }, { - "epoch": 0.11407491486946651, + "epoch": 0.11391651789509478, "grad_norm": 0.0, - "learning_rate": 1.9631734409757115e-05, - "loss": 1.0353, + "learning_rate": 1.9633006541442464e-05, + "loss": 1.1345, "step": 4020 }, { - "epoch": 0.11410329171396141, + "epoch": 0.11394485533735725, "grad_norm": 0.0, - "learning_rate": 1.963148724656638e-05, - "loss": 1.0403, + "learning_rate": 1.9632760143511976e-05, + "loss": 1.009, "step": 4021 }, { - "epoch": 0.1141316685584563, + "epoch": 0.11397319277961972, "grad_norm": 0.0, - "learning_rate": 1.9631240002018038e-05, - "loss": 1.0114, + "learning_rate": 1.963251366444118e-05, + "loss": 1.2195, "step": 4022 }, { - "epoch": 0.11416004540295119, + "epoch": 0.11400153022188217, "grad_norm": 0.0, - "learning_rate": 1.963099267611418e-05, - "loss": 1.1235, + "learning_rate": 1.963226710423214e-05, + "loss": 1.184, "step": 4023 }, { - "epoch": 0.11418842224744609, + "epoch": 0.11402986766414464, "grad_norm": 0.0, - "learning_rate": 1.9630745268856892e-05, - "loss": 1.0745, + "learning_rate": 1.9632020462886937e-05, + "loss": 1.1285, "step": 4024 }, { - "epoch": 0.11421679909194098, + "epoch": 0.1140582051064071, "grad_norm": 0.0, - "learning_rate": 1.9630497780248268e-05, - "loss": 0.9385, + "learning_rate": 1.963177374040765e-05, + "loss": 0.9666, "step": 4025 }, { - "epoch": 0.11424517593643586, + "epoch": 0.11408654254866955, "grad_norm": 0.0, - "learning_rate": 1.9630250210290397e-05, - "loss": 1.0494, + "learning_rate": 1.9631526936796357e-05, + "loss": 1.0893, "step": 4026 }, { - "epoch": 0.11427355278093076, + "epoch": 0.11411487999093202, "grad_norm": 0.0, - "learning_rate": 1.9630002558985366e-05, - "loss": 0.9693, + "learning_rate": 1.9631280052055136e-05, + "loss": 1.1316, "step": 4027 }, { - "epoch": 0.11430192962542565, + "epoch": 0.11414321743319449, "grad_norm": 0.0, - "learning_rate": 1.962975482633527e-05, - "loss": 1.023, + "learning_rate": 1.9631033086186066e-05, + "loss": 1.0245, "step": 4028 }, { - "epoch": 0.11433030646992054, + "epoch": 0.11417155487545694, "grad_norm": 0.0, - "learning_rate": 1.9629507012342207e-05, - "loss": 1.0156, + "learning_rate": 1.963078603919123e-05, + "loss": 1.0496, "step": 4029 }, { - "epoch": 0.11435868331441544, + "epoch": 0.1141998923177194, "grad_norm": 0.0, - "learning_rate": 1.9629259117008264e-05, - "loss": 1.0849, + "learning_rate": 1.9630538911072702e-05, + "loss": 1.0946, "step": 4030 }, { - "epoch": 0.11438706015891033, + "epoch": 0.11422822975998187, "grad_norm": 0.0, - "learning_rate": 1.9629011140335536e-05, - "loss": 1.103, + "learning_rate": 1.963029170183257e-05, + "loss": 1.074, "step": 4031 }, { - "epoch": 0.11441543700340522, + "epoch": 0.11425656720224432, "grad_norm": 0.0, - "learning_rate": 1.9628763082326113e-05, - "loss": 1.0548, + "learning_rate": 1.963004441147292e-05, + "loss": 1.1071, "step": 4032 }, { - "epoch": 0.11444381384790012, + "epoch": 0.11428490464450679, "grad_norm": 0.0, - "learning_rate": 1.96285149429821e-05, - "loss": 1.0284, + "learning_rate": 1.9629797039995823e-05, + "loss": 1.1352, "step": 4033 }, { - "epoch": 0.114472190692395, + "epoch": 0.11431324208676925, "grad_norm": 0.0, - "learning_rate": 1.9628266722305587e-05, - "loss": 0.9993, + "learning_rate": 1.9629549587403373e-05, + "loss": 1.1239, "step": 4034 }, { - "epoch": 0.11450056753688989, + "epoch": 0.1143415795290317, "grad_norm": 0.0, - "learning_rate": 1.9628018420298675e-05, - "loss": 1.0354, + "learning_rate": 1.962930205369765e-05, + "loss": 1.0708, "step": 4035 }, { - "epoch": 0.11452894438138479, + "epoch": 0.11436991697129417, "grad_norm": 0.0, - "learning_rate": 1.9627770036963457e-05, - "loss": 0.9359, + "learning_rate": 1.9629054438880742e-05, + "loss": 1.0578, "step": 4036 }, { - "epoch": 0.11455732122587968, + "epoch": 0.11439825441355664, "grad_norm": 0.0, - "learning_rate": 1.9627521572302035e-05, - "loss": 0.9583, + "learning_rate": 1.962880674295473e-05, + "loss": 1.0218, "step": 4037 }, { - "epoch": 0.11458569807037458, + "epoch": 0.11442659185581909, "grad_norm": 0.0, - "learning_rate": 1.96272730263165e-05, - "loss": 0.9571, + "learning_rate": 1.9628558965921708e-05, + "loss": 1.1422, "step": 4038 }, { - "epoch": 0.11461407491486947, + "epoch": 0.11445492929808156, "grad_norm": 0.0, - "learning_rate": 1.962702439900896e-05, - "loss": 0.8769, + "learning_rate": 1.9628311107783753e-05, + "loss": 1.0855, "step": 4039 }, { - "epoch": 0.11464245175936436, + "epoch": 0.11448326674034402, "grad_norm": 0.0, - "learning_rate": 1.9626775690381513e-05, - "loss": 1.0424, + "learning_rate": 1.962806316854296e-05, + "loss": 1.036, "step": 4040 }, { - "epoch": 0.11467082860385926, + "epoch": 0.11451160418260647, "grad_norm": 0.0, - "learning_rate": 1.9626526900436257e-05, - "loss": 1.0158, + "learning_rate": 1.9627815148201417e-05, + "loss": 1.0996, "step": 4041 }, { - "epoch": 0.11469920544835414, + "epoch": 0.11453994162486894, "grad_norm": 0.0, - "learning_rate": 1.9626278029175296e-05, - "loss": 0.9816, + "learning_rate": 1.9627567046761207e-05, + "loss": 1.0572, "step": 4042 }, { - "epoch": 0.11472758229284903, + "epoch": 0.1145682790671314, "grad_norm": 0.0, - "learning_rate": 1.9626029076600734e-05, - "loss": 1.0655, + "learning_rate": 1.9627318864224433e-05, + "loss": 0.9373, "step": 4043 }, { - "epoch": 0.11475595913734393, + "epoch": 0.11459661650939386, "grad_norm": 0.0, - "learning_rate": 1.9625780042714672e-05, - "loss": 1.082, + "learning_rate": 1.9627070600593172e-05, + "loss": 1.0891, "step": 4044 }, { - "epoch": 0.11478433598183882, + "epoch": 0.11462495395165632, "grad_norm": 0.0, - "learning_rate": 1.9625530927519206e-05, - "loss": 1.1532, + "learning_rate": 1.962682225586952e-05, + "loss": 1.0108, "step": 4045 }, { - "epoch": 0.11481271282633371, + "epoch": 0.11465329139391879, "grad_norm": 0.0, - "learning_rate": 1.9625281731016455e-05, - "loss": 0.8811, + "learning_rate": 1.962657383005557e-05, + "loss": 1.1385, "step": 4046 }, { - "epoch": 0.11484108967082861, + "epoch": 0.11468162883618124, "grad_norm": 0.0, - "learning_rate": 1.9625032453208516e-05, - "loss": 0.9869, + "learning_rate": 1.9626325323153414e-05, + "loss": 1.0674, "step": 4047 }, { - "epoch": 0.1148694665153235, + "epoch": 0.11470996627844371, "grad_norm": 0.0, - "learning_rate": 1.962478309409749e-05, - "loss": 1.0527, + "learning_rate": 1.9626076735165146e-05, + "loss": 1.2121, "step": 4048 }, { - "epoch": 0.11489784335981838, + "epoch": 0.11473830372070618, "grad_norm": 0.0, - "learning_rate": 1.9624533653685495e-05, - "loss": 0.9981, + "learning_rate": 1.9625828066092857e-05, + "loss": 0.997, "step": 4049 }, { - "epoch": 0.11492622020431328, + "epoch": 0.11476664116296863, "grad_norm": 0.0, - "learning_rate": 1.9624284131974625e-05, - "loss": 1.1624, + "learning_rate": 1.9625579315938644e-05, + "loss": 1.1561, "step": 4050 }, { - "epoch": 0.11495459704880817, + "epoch": 0.1147949786052311, "grad_norm": 0.0, - "learning_rate": 1.9624034528967e-05, - "loss": 1.0918, + "learning_rate": 1.9625330484704604e-05, + "loss": 1.0475, "step": 4051 }, { - "epoch": 0.11498297389330306, + "epoch": 0.11482331604749356, "grad_norm": 0.0, - "learning_rate": 1.962378484466472e-05, - "loss": 1.0639, + "learning_rate": 1.9625081572392832e-05, + "loss": 1.1797, "step": 4052 }, { - "epoch": 0.11501135073779796, + "epoch": 0.11485165348975601, "grad_norm": 0.0, - "learning_rate": 1.9623535079069897e-05, - "loss": 1.0518, + "learning_rate": 1.962483257900542e-05, + "loss": 0.9171, "step": 4053 }, { - "epoch": 0.11503972758229285, + "epoch": 0.11487999093201848, "grad_norm": 0.0, - "learning_rate": 1.962328523218464e-05, - "loss": 0.9777, + "learning_rate": 1.962458350454447e-05, + "loss": 1.0263, "step": 4054 }, { - "epoch": 0.11506810442678773, + "epoch": 0.11490832837428094, "grad_norm": 0.0, - "learning_rate": 1.9623035304011062e-05, - "loss": 1.0412, + "learning_rate": 1.9624334349012083e-05, + "loss": 1.0508, "step": 4055 }, { - "epoch": 0.11509648127128264, + "epoch": 0.1149366658165434, "grad_norm": 0.0, - "learning_rate": 1.9622785294551273e-05, - "loss": 1.0355, + "learning_rate": 1.962408511241035e-05, + "loss": 1.1013, "step": 4056 }, { - "epoch": 0.11512485811577752, + "epoch": 0.11496500325880586, "grad_norm": 0.0, - "learning_rate": 1.9622535203807382e-05, - "loss": 0.9867, + "learning_rate": 1.9623835794741374e-05, + "loss": 1.1262, "step": 4057 }, { - "epoch": 0.11515323496027242, + "epoch": 0.11499334070106833, "grad_norm": 0.0, - "learning_rate": 1.9622285031781505e-05, - "loss": 1.1063, + "learning_rate": 1.962358639600726e-05, + "loss": 1.1195, "step": 4058 }, { - "epoch": 0.11518161180476731, + "epoch": 0.11502167814333078, "grad_norm": 0.0, - "learning_rate": 1.9622034778475753e-05, - "loss": 0.9658, + "learning_rate": 1.96233369162101e-05, + "loss": 0.9871, "step": 4059 }, { - "epoch": 0.1152099886492622, + "epoch": 0.11505001558559325, "grad_norm": 0.0, - "learning_rate": 1.962178444389224e-05, - "loss": 1.01, + "learning_rate": 1.9623087355351998e-05, + "loss": 1.0822, "step": 4060 }, { - "epoch": 0.1152383654937571, + "epoch": 0.11507835302785571, "grad_norm": 0.0, - "learning_rate": 1.962153402803308e-05, - "loss": 1.1501, + "learning_rate": 1.962283771343506e-05, + "loss": 1.0845, "step": 4061 }, { - "epoch": 0.11526674233825199, + "epoch": 0.11510669047011816, "grad_norm": 0.0, - "learning_rate": 1.962128353090039e-05, - "loss": 1.0704, + "learning_rate": 1.9622587990461387e-05, + "loss": 1.0824, "step": 4062 }, { - "epoch": 0.11529511918274687, + "epoch": 0.11513502791238063, "grad_norm": 0.0, - "learning_rate": 1.962103295249629e-05, - "loss": 1.2034, + "learning_rate": 1.9622338186433084e-05, + "loss": 1.098, "step": 4063 }, { - "epoch": 0.11532349602724178, + "epoch": 0.1151633653546431, "grad_norm": 0.0, - "learning_rate": 1.962078229282289e-05, - "loss": 1.1685, + "learning_rate": 1.9622088301352253e-05, + "loss": 1.0461, "step": 4064 }, { - "epoch": 0.11535187287173666, + "epoch": 0.11519170279690555, "grad_norm": 0.0, - "learning_rate": 1.9620531551882305e-05, - "loss": 0.8994, + "learning_rate": 1.9621838335220997e-05, + "loss": 1.0172, "step": 4065 }, { - "epoch": 0.11538024971623155, + "epoch": 0.11522004023916801, "grad_norm": 0.0, - "learning_rate": 1.9620280729676662e-05, - "loss": 1.0763, + "learning_rate": 1.9621588288041423e-05, + "loss": 0.9963, "step": 4066 }, { - "epoch": 0.11540862656072645, + "epoch": 0.11524837768143048, "grad_norm": 0.0, - "learning_rate": 1.9620029826208074e-05, - "loss": 0.9896, + "learning_rate": 1.962133815981564e-05, + "loss": 1.0089, "step": 4067 }, { - "epoch": 0.11543700340522134, + "epoch": 0.11527671512369293, "grad_norm": 0.0, - "learning_rate": 1.9619778841478662e-05, - "loss": 1.03, + "learning_rate": 1.962108795054575e-05, + "loss": 1.0443, "step": 4068 }, { - "epoch": 0.11546538024971623, + "epoch": 0.1153050525659554, "grad_norm": 0.0, - "learning_rate": 1.9619527775490543e-05, - "loss": 1.1281, + "learning_rate": 1.9620837660233866e-05, + "loss": 1.0892, "step": 4069 }, { - "epoch": 0.11549375709421113, + "epoch": 0.11533339000821786, "grad_norm": 0.0, - "learning_rate": 1.9619276628245846e-05, - "loss": 1.0923, + "learning_rate": 1.9620587288882095e-05, + "loss": 1.1014, "step": 4070 }, { - "epoch": 0.11552213393870601, + "epoch": 0.11536172745048032, "grad_norm": 0.0, - "learning_rate": 1.961902539974668e-05, - "loss": 1.2062, + "learning_rate": 1.962033683649254e-05, + "loss": 1.1119, "step": 4071 }, { - "epoch": 0.1155505107832009, + "epoch": 0.11539006489274278, "grad_norm": 0.0, - "learning_rate": 1.961877408999518e-05, - "loss": 1.0784, + "learning_rate": 1.962008630306732e-05, + "loss": 0.8849, "step": 4072 }, { - "epoch": 0.1155788876276958, + "epoch": 0.11541840233500525, "grad_norm": 0.0, - "learning_rate": 1.961852269899346e-05, - "loss": 1.0352, + "learning_rate": 1.9619835688608537e-05, + "loss": 0.9921, "step": 4073 }, { - "epoch": 0.11560726447219069, + "epoch": 0.1154467397772677, "grad_norm": 0.0, - "learning_rate": 1.9618271226743644e-05, - "loss": 1.0471, + "learning_rate": 1.9619584993118308e-05, + "loss": 1.1702, "step": 4074 }, { - "epoch": 0.11563564131668558, + "epoch": 0.11547507721953017, "grad_norm": 0.0, - "learning_rate": 1.9618019673247864e-05, - "loss": 1.0102, + "learning_rate": 1.9619334216598746e-05, + "loss": 1.0501, "step": 4075 }, { - "epoch": 0.11566401816118048, + "epoch": 0.11550341466179263, "grad_norm": 0.0, - "learning_rate": 1.961776803850824e-05, - "loss": 1.0044, + "learning_rate": 1.961908335905195e-05, + "loss": 1.0195, "step": 4076 }, { - "epoch": 0.11569239500567537, + "epoch": 0.11553175210405509, "grad_norm": 0.0, - "learning_rate": 1.961751632252689e-05, - "loss": 1.0907, + "learning_rate": 1.961883242048005e-05, + "loss": 1.0481, "step": 4077 }, { - "epoch": 0.11572077185017027, + "epoch": 0.11556008954631755, "grad_norm": 0.0, - "learning_rate": 1.9617264525305955e-05, - "loss": 0.9081, + "learning_rate": 1.9618581400885156e-05, + "loss": 0.9608, "step": 4078 }, { - "epoch": 0.11574914869466515, + "epoch": 0.11558842698858002, "grad_norm": 0.0, - "learning_rate": 1.9617012646847547e-05, - "loss": 1.0083, + "learning_rate": 1.9618330300269372e-05, + "loss": 1.1041, "step": 4079 }, { - "epoch": 0.11577752553916004, + "epoch": 0.11561676443084247, "grad_norm": 0.0, - "learning_rate": 1.9616760687153807e-05, - "loss": 0.9765, + "learning_rate": 1.961807911863482e-05, + "loss": 1.1039, "step": 4080 }, { - "epoch": 0.11580590238365494, + "epoch": 0.11564510187310494, "grad_norm": 0.0, - "learning_rate": 1.9616508646226853e-05, - "loss": 1.0609, + "learning_rate": 1.9617827855983623e-05, + "loss": 1.0827, "step": 4081 }, { - "epoch": 0.11583427922814983, + "epoch": 0.1156734393153674, "grad_norm": 0.0, - "learning_rate": 1.9616256524068822e-05, - "loss": 0.9904, + "learning_rate": 1.9617576512317888e-05, + "loss": 1.0258, "step": 4082 }, { - "epoch": 0.11586265607264472, + "epoch": 0.11570177675762985, "grad_norm": 0.0, - "learning_rate": 1.9616004320681836e-05, - "loss": 1.0531, + "learning_rate": 1.961732508763973e-05, + "loss": 1.021, "step": 4083 }, { - "epoch": 0.11589103291713962, + "epoch": 0.11573011419989232, "grad_norm": 0.0, - "learning_rate": 1.961575203606803e-05, - "loss": 1.0703, + "learning_rate": 1.9617073581951274e-05, + "loss": 1.1032, "step": 4084 }, { - "epoch": 0.1159194097616345, + "epoch": 0.11575845164215479, "grad_norm": 0.0, - "learning_rate": 1.9615499670229537e-05, - "loss": 1.0513, + "learning_rate": 1.961682199525464e-05, + "loss": 0.9996, "step": 4085 }, { - "epoch": 0.1159477866061294, + "epoch": 0.11578678908441724, "grad_norm": 0.0, - "learning_rate": 1.9615247223168482e-05, - "loss": 0.9217, + "learning_rate": 1.961657032755194e-05, + "loss": 1.0795, "step": 4086 }, { - "epoch": 0.1159761634506243, + "epoch": 0.1158151265266797, "grad_norm": 0.0, - "learning_rate": 1.9614994694887003e-05, - "loss": 1.1132, + "learning_rate": 1.9616318578845294e-05, + "loss": 1.0575, "step": 4087 }, { - "epoch": 0.11600454029511918, + "epoch": 0.11584346396894216, "grad_norm": 0.0, - "learning_rate": 1.9614742085387232e-05, - "loss": 1.0988, + "learning_rate": 1.961606674913683e-05, + "loss": 1.0825, "step": 4088 }, { - "epoch": 0.11603291713961407, + "epoch": 0.11587180141120462, "grad_norm": 0.0, - "learning_rate": 1.9614489394671305e-05, - "loss": 1.011, + "learning_rate": 1.9615814838428662e-05, + "loss": 1.0261, "step": 4089 }, { - "epoch": 0.11606129398410897, + "epoch": 0.11590013885346709, "grad_norm": 0.0, - "learning_rate": 1.961423662274135e-05, - "loss": 1.0355, + "learning_rate": 1.9615562846722915e-05, + "loss": 1.1195, "step": 4090 }, { - "epoch": 0.11608967082860386, + "epoch": 0.11592847629572954, "grad_norm": 0.0, - "learning_rate": 1.961398376959951e-05, - "loss": 1.0412, + "learning_rate": 1.9615310774021715e-05, + "loss": 1.0634, "step": 4091 }, { - "epoch": 0.11611804767309875, + "epoch": 0.115956813737992, "grad_norm": 0.0, - "learning_rate": 1.9613730835247914e-05, - "loss": 1.0043, + "learning_rate": 1.961505862032718e-05, + "loss": 1.0347, "step": 4092 }, { - "epoch": 0.11614642451759365, + "epoch": 0.11598515118025447, "grad_norm": 0.0, - "learning_rate": 1.9613477819688703e-05, - "loss": 1.1013, + "learning_rate": 1.9614806385641433e-05, + "loss": 1.0684, "step": 4093 }, { - "epoch": 0.11617480136208853, + "epoch": 0.11601348862251692, "grad_norm": 0.0, - "learning_rate": 1.961322472292401e-05, - "loss": 0.9677, + "learning_rate": 1.9614554069966606e-05, + "loss": 1.1111, "step": 4094 }, { - "epoch": 0.11620317820658342, + "epoch": 0.11604182606477939, "grad_norm": 0.0, - "learning_rate": 1.9612971544955978e-05, - "loss": 1.0467, + "learning_rate": 1.9614301673304815e-05, + "loss": 0.9959, "step": 4095 }, { - "epoch": 0.11623155505107832, + "epoch": 0.11607016350704186, "grad_norm": 0.0, - "learning_rate": 1.9612718285786743e-05, - "loss": 1.069, + "learning_rate": 1.9614049195658197e-05, + "loss": 1.0498, "step": 4096 }, { - "epoch": 0.11625993189557321, + "epoch": 0.11609850094930431, "grad_norm": 0.0, - "learning_rate": 1.9612464945418444e-05, - "loss": 1.0786, + "learning_rate": 1.961379663702887e-05, + "loss": 1.0192, "step": 4097 }, { - "epoch": 0.11628830874006811, + "epoch": 0.11612683839156678, "grad_norm": 0.0, - "learning_rate": 1.961221152385322e-05, - "loss": 1.1109, + "learning_rate": 1.9613543997418963e-05, + "loss": 1.0269, "step": 4098 }, { - "epoch": 0.116316685584563, + "epoch": 0.11615517583382924, "grad_norm": 0.0, - "learning_rate": 1.9611958021093214e-05, - "loss": 1.0944, + "learning_rate": 1.9613291276830604e-05, + "loss": 1.092, "step": 4099 }, { - "epoch": 0.11634506242905789, + "epoch": 0.1161835132760917, "grad_norm": 0.0, - "learning_rate": 1.961170443714057e-05, - "loss": 1.07, + "learning_rate": 1.9613038475265922e-05, + "loss": 1.1691, "step": 4100 }, { - "epoch": 0.11637343927355279, + "epoch": 0.11621185071835416, "grad_norm": 0.0, - "learning_rate": 1.9611450771997422e-05, - "loss": 1.1453, + "learning_rate": 1.9612785592727048e-05, + "loss": 1.0387, "step": 4101 }, { - "epoch": 0.11640181611804767, + "epoch": 0.11624018816061663, "grad_norm": 0.0, - "learning_rate": 1.9611197025665916e-05, - "loss": 1.0072, + "learning_rate": 1.9612532629216114e-05, + "loss": 0.9898, "step": 4102 }, { - "epoch": 0.11643019296254256, + "epoch": 0.11626852560287908, "grad_norm": 0.0, - "learning_rate": 1.9610943198148203e-05, - "loss": 1.0079, + "learning_rate": 1.9612279584735247e-05, + "loss": 1.0219, "step": 4103 }, { - "epoch": 0.11645856980703746, + "epoch": 0.11629686304514154, "grad_norm": 0.0, - "learning_rate": 1.961068928944642e-05, - "loss": 1.0444, + "learning_rate": 1.961202645928658e-05, + "loss": 1.1041, "step": 4104 }, { - "epoch": 0.11648694665153235, + "epoch": 0.11632520048740401, "grad_norm": 0.0, - "learning_rate": 1.9610435299562706e-05, - "loss": 0.9949, + "learning_rate": 1.961177325287224e-05, + "loss": 1.1206, "step": 4105 }, { - "epoch": 0.11651532349602724, + "epoch": 0.11635353792966646, "grad_norm": 0.0, - "learning_rate": 1.961018122849922e-05, - "loss": 1.0096, + "learning_rate": 1.961151996549437e-05, + "loss": 1.024, "step": 4106 }, { - "epoch": 0.11654370034052214, + "epoch": 0.11638187537192893, "grad_norm": 0.0, - "learning_rate": 1.96099270762581e-05, - "loss": 0.8753, + "learning_rate": 1.9611266597155097e-05, + "loss": 1.1184, "step": 4107 }, { - "epoch": 0.11657207718501703, + "epoch": 0.1164102128141914, "grad_norm": 0.0, - "learning_rate": 1.960967284284149e-05, - "loss": 1.0131, + "learning_rate": 1.9611013147856558e-05, + "loss": 1.1423, "step": 4108 }, { - "epoch": 0.11660045402951191, + "epoch": 0.11643855025645385, "grad_norm": 0.0, - "learning_rate": 1.9609418528251543e-05, - "loss": 1.0196, + "learning_rate": 1.9610759617600883e-05, + "loss": 1.0784, "step": 4109 }, { - "epoch": 0.11662883087400681, + "epoch": 0.11646688769871631, "grad_norm": 0.0, - "learning_rate": 1.9609164132490407e-05, - "loss": 0.9666, + "learning_rate": 1.961050600639021e-05, + "loss": 1.0687, "step": 4110 }, { - "epoch": 0.1166572077185017, + "epoch": 0.11649522514097878, "grad_norm": 0.0, - "learning_rate": 1.9608909655560227e-05, - "loss": 0.9917, + "learning_rate": 1.9610252314226682e-05, + "loss": 1.1033, "step": 4111 }, { - "epoch": 0.11668558456299659, + "epoch": 0.11652356258324123, "grad_norm": 0.0, - "learning_rate": 1.9608655097463156e-05, - "loss": 1.0291, + "learning_rate": 1.9609998541112424e-05, + "loss": 1.051, "step": 4112 }, { - "epoch": 0.11671396140749149, + "epoch": 0.1165519000255037, "grad_norm": 0.0, - "learning_rate": 1.9608400458201348e-05, - "loss": 1.0295, + "learning_rate": 1.9609744687049582e-05, + "loss": 1.0123, "step": 4113 }, { - "epoch": 0.11674233825198638, + "epoch": 0.11658023746776616, "grad_norm": 0.0, - "learning_rate": 1.9608145737776945e-05, - "loss": 1.0032, + "learning_rate": 1.960949075204029e-05, + "loss": 1.1591, "step": 4114 }, { - "epoch": 0.11677071509648126, + "epoch": 0.11660857491002861, "grad_norm": 0.0, - "learning_rate": 1.9607890936192105e-05, - "loss": 1.1292, + "learning_rate": 1.9609236736086695e-05, + "loss": 1.0113, "step": 4115 }, { - "epoch": 0.11679909194097617, + "epoch": 0.11663691235229108, "grad_norm": 0.0, - "learning_rate": 1.9607636053448975e-05, - "loss": 1.0858, + "learning_rate": 1.9608982639190925e-05, + "loss": 1.0366, "step": 4116 }, { - "epoch": 0.11682746878547105, + "epoch": 0.11666524979455355, "grad_norm": 0.0, - "learning_rate": 1.9607381089549715e-05, - "loss": 1.0394, + "learning_rate": 1.960872846135513e-05, + "loss": 1.0426, "step": 4117 }, { - "epoch": 0.11685584562996595, + "epoch": 0.116693587236816, "grad_norm": 0.0, - "learning_rate": 1.9607126044496476e-05, - "loss": 1.0952, + "learning_rate": 1.9608474202581444e-05, + "loss": 0.9921, "step": 4118 }, { - "epoch": 0.11688422247446084, + "epoch": 0.11672192467907846, "grad_norm": 0.0, - "learning_rate": 1.9606870918291405e-05, - "loss": 1.0227, + "learning_rate": 1.960821986287201e-05, + "loss": 0.9918, "step": 4119 }, { - "epoch": 0.11691259931895573, + "epoch": 0.11675026212134093, "grad_norm": 0.0, - "learning_rate": 1.9606615710936668e-05, - "loss": 0.9898, + "learning_rate": 1.9607965442228977e-05, + "loss": 0.9914, "step": 4120 }, { - "epoch": 0.11694097616345063, + "epoch": 0.11677859956360338, "grad_norm": 0.0, - "learning_rate": 1.960636042243442e-05, - "loss": 0.9616, + "learning_rate": 1.9607710940654482e-05, + "loss": 1.1129, "step": 4121 }, { - "epoch": 0.11696935300794552, + "epoch": 0.11680693700586585, "grad_norm": 0.0, - "learning_rate": 1.9606105052786808e-05, - "loss": 1.1262, + "learning_rate": 1.9607456358150668e-05, + "loss": 1.0615, "step": 4122 }, { - "epoch": 0.1169977298524404, + "epoch": 0.11683527444812832, "grad_norm": 0.0, - "learning_rate": 1.9605849601995994e-05, - "loss": 1.1374, + "learning_rate": 1.9607201694719683e-05, + "loss": 1.0159, "step": 4123 }, { - "epoch": 0.1170261066969353, + "epoch": 0.11686361189039077, "grad_norm": 0.0, - "learning_rate": 1.960559407006414e-05, - "loss": 1.0926, + "learning_rate": 1.960694695036367e-05, + "loss": 1.112, "step": 4124 }, { - "epoch": 0.1170544835414302, + "epoch": 0.11689194933265323, "grad_norm": 0.0, - "learning_rate": 1.96053384569934e-05, - "loss": 1.0398, + "learning_rate": 1.9606692125084775e-05, + "loss": 1.005, "step": 4125 }, { - "epoch": 0.11708286038592508, + "epoch": 0.1169202867749157, "grad_norm": 0.0, - "learning_rate": 1.9605082762785933e-05, - "loss": 1.0651, + "learning_rate": 1.9606437218885145e-05, + "loss": 1.0249, "step": 4126 }, { - "epoch": 0.11711123723041998, + "epoch": 0.11694862421717815, "grad_norm": 0.0, - "learning_rate": 1.9604826987443905e-05, - "loss": 1.0579, + "learning_rate": 1.960618223176693e-05, + "loss": 1.0597, "step": 4127 }, { - "epoch": 0.11713961407491487, + "epoch": 0.11697696165944062, "grad_norm": 0.0, - "learning_rate": 1.9604571130969467e-05, - "loss": 0.9307, + "learning_rate": 1.9605927163732274e-05, + "loss": 1.0108, "step": 4128 }, { - "epoch": 0.11716799091940976, + "epoch": 0.11700529910170308, "grad_norm": 0.0, - "learning_rate": 1.9604315193364787e-05, - "loss": 1.1095, + "learning_rate": 1.960567201478332e-05, + "loss": 1.1323, "step": 4129 }, { - "epoch": 0.11719636776390466, + "epoch": 0.11703363654396554, "grad_norm": 0.0, - "learning_rate": 1.9604059174632023e-05, - "loss": 0.9648, + "learning_rate": 1.960541678492223e-05, + "loss": 1.0828, "step": 4130 }, { - "epoch": 0.11722474460839954, + "epoch": 0.117061973986228, "grad_norm": 0.0, - "learning_rate": 1.960380307477334e-05, - "loss": 1.0556, + "learning_rate": 1.9605161474151148e-05, + "loss": 1.0895, "step": 4131 }, { - "epoch": 0.11725312145289443, + "epoch": 0.11709031142849047, "grad_norm": 0.0, - "learning_rate": 1.9603546893790903e-05, - "loss": 1.1156, + "learning_rate": 1.9604906082472223e-05, + "loss": 0.9819, "step": 4132 }, { - "epoch": 0.11728149829738933, + "epoch": 0.11711864887075292, "grad_norm": 0.0, - "learning_rate": 1.960329063168687e-05, - "loss": 1.1877, + "learning_rate": 1.9604650609887604e-05, + "loss": 1.0667, "step": 4133 }, { - "epoch": 0.11730987514188422, + "epoch": 0.11714698631301539, "grad_norm": 0.0, - "learning_rate": 1.960303428846341e-05, - "loss": 1.1138, + "learning_rate": 1.960439505639945e-05, + "loss": 1.0174, "step": 4134 }, { - "epoch": 0.11733825198637911, + "epoch": 0.11717532375527785, "grad_norm": 0.0, - "learning_rate": 1.960277786412269e-05, - "loss": 1.0115, + "learning_rate": 1.9604139422009908e-05, + "loss": 0.9725, "step": 4135 }, { - "epoch": 0.11736662883087401, + "epoch": 0.1172036611975403, "grad_norm": 0.0, - "learning_rate": 1.960252135866687e-05, - "loss": 1.1026, + "learning_rate": 1.960388370672113e-05, + "loss": 1.0257, "step": 4136 }, { - "epoch": 0.1173950056753689, + "epoch": 0.11723199863980277, "grad_norm": 0.0, - "learning_rate": 1.9602264772098123e-05, - "loss": 1.122, + "learning_rate": 1.9603627910535282e-05, + "loss": 0.9785, "step": 4137 }, { - "epoch": 0.1174233825198638, + "epoch": 0.11726033608206524, "grad_norm": 0.0, - "learning_rate": 1.9602008104418615e-05, - "loss": 1.1437, + "learning_rate": 1.9603372033454504e-05, + "loss": 1.2129, "step": 4138 }, { - "epoch": 0.11745175936435868, + "epoch": 0.11728867352432769, "grad_norm": 0.0, - "learning_rate": 1.9601751355630512e-05, - "loss": 1.1245, + "learning_rate": 1.960311607548096e-05, + "loss": 1.0883, "step": 4139 }, { - "epoch": 0.11748013620885357, + "epoch": 0.11731701096659015, "grad_norm": 0.0, - "learning_rate": 1.960149452573598e-05, - "loss": 0.9928, + "learning_rate": 1.96028600366168e-05, + "loss": 1.025, "step": 4140 }, { - "epoch": 0.11750851305334847, + "epoch": 0.11734534840885262, "grad_norm": 0.0, - "learning_rate": 1.9601237614737196e-05, - "loss": 1.0892, + "learning_rate": 1.9602603916864186e-05, + "loss": 1.0925, "step": 4141 }, { - "epoch": 0.11753688989784336, + "epoch": 0.11737368585111507, "grad_norm": 0.0, - "learning_rate": 1.9600980622636328e-05, - "loss": 1.0927, + "learning_rate": 1.9602347716225272e-05, + "loss": 1.1383, "step": 4142 }, { - "epoch": 0.11756526674233825, + "epoch": 0.11740202329337754, "grad_norm": 0.0, - "learning_rate": 1.960072354943554e-05, - "loss": 1.075, + "learning_rate": 1.9602091434702217e-05, + "loss": 1.0051, "step": 4143 }, { - "epoch": 0.11759364358683315, + "epoch": 0.11743036073564, "grad_norm": 0.0, - "learning_rate": 1.9600466395137014e-05, - "loss": 1.0562, + "learning_rate": 1.960183507229718e-05, + "loss": 1.0997, "step": 4144 }, { - "epoch": 0.11762202043132804, + "epoch": 0.11745869817790246, "grad_norm": 0.0, - "learning_rate": 1.9600209159742913e-05, - "loss": 0.9906, + "learning_rate": 1.9601578629012327e-05, + "loss": 1.1526, "step": 4145 }, { - "epoch": 0.11765039727582292, + "epoch": 0.11748703562016492, "grad_norm": 0.0, - "learning_rate": 1.9599951843255415e-05, - "loss": 1.0276, + "learning_rate": 1.9601322104849806e-05, + "loss": 1.1073, "step": 4146 }, { - "epoch": 0.11767877412031783, + "epoch": 0.11751537306242739, "grad_norm": 0.0, - "learning_rate": 1.959969444567669e-05, - "loss": 1.0477, + "learning_rate": 1.9601065499811783e-05, + "loss": 0.9791, "step": 4147 }, { - "epoch": 0.11770715096481271, + "epoch": 0.11754371050468984, "grad_norm": 0.0, - "learning_rate": 1.959943696700892e-05, - "loss": 0.9979, + "learning_rate": 1.960080881390042e-05, + "loss": 1.1336, "step": 4148 }, { - "epoch": 0.1177355278093076, + "epoch": 0.11757204794695231, "grad_norm": 0.0, - "learning_rate": 1.959917940725427e-05, - "loss": 1.175, + "learning_rate": 1.9600552047117883e-05, + "loss": 1.0738, "step": 4149 }, { - "epoch": 0.1177639046538025, + "epoch": 0.11760038538921477, "grad_norm": 0.0, - "learning_rate": 1.9598921766414923e-05, - "loss": 0.9969, + "learning_rate": 1.9600295199466327e-05, + "loss": 1.109, "step": 4150 }, { - "epoch": 0.11779228149829739, + "epoch": 0.11762872283147723, "grad_norm": 0.0, - "learning_rate": 1.959866404449305e-05, - "loss": 1.0733, + "learning_rate": 1.9600038270947923e-05, + "loss": 0.9592, "step": 4151 }, { - "epoch": 0.11782065834279228, + "epoch": 0.11765706027373969, "grad_norm": 0.0, - "learning_rate": 1.959840624149083e-05, - "loss": 1.1105, + "learning_rate": 1.9599781261564827e-05, + "loss": 1.0819, "step": 4152 }, { - "epoch": 0.11784903518728718, + "epoch": 0.11768539771600216, "grad_norm": 0.0, - "learning_rate": 1.959814835741044e-05, - "loss": 1.0872, + "learning_rate": 1.9599524171319214e-05, + "loss": 1.0542, "step": 4153 }, { - "epoch": 0.11787741203178206, + "epoch": 0.11771373515826461, "grad_norm": 0.0, - "learning_rate": 1.959789039225406e-05, - "loss": 1.1017, + "learning_rate": 1.959926700021324e-05, + "loss": 1.1537, "step": 4154 }, { - "epoch": 0.11790578887627695, + "epoch": 0.11774207260052708, "grad_norm": 0.0, - "learning_rate": 1.9597632346023873e-05, - "loss": 1.0811, + "learning_rate": 1.959900974824908e-05, + "loss": 1.005, "step": 4155 }, { - "epoch": 0.11793416572077185, + "epoch": 0.11777041004278954, "grad_norm": 0.0, - "learning_rate": 1.959737421872205e-05, - "loss": 1.1561, + "learning_rate": 1.9598752415428893e-05, + "loss": 1.0484, "step": 4156 }, { - "epoch": 0.11796254256526674, + "epoch": 0.117798747485052, "grad_norm": 0.0, - "learning_rate": 1.9597116010350773e-05, - "loss": 1.0168, + "learning_rate": 1.959849500175485e-05, + "loss": 1.0725, "step": 4157 }, { - "epoch": 0.11799091940976164, + "epoch": 0.11782708492731446, "grad_norm": 0.0, - "learning_rate": 1.959685772091223e-05, - "loss": 1.0173, + "learning_rate": 1.9598237507229122e-05, + "loss": 1.2118, "step": 4158 }, { - "epoch": 0.11801929625425653, + "epoch": 0.11785542236957693, "grad_norm": 0.0, - "learning_rate": 1.9596599350408594e-05, - "loss": 1.0641, + "learning_rate": 1.959797993185387e-05, + "loss": 0.9785, "step": 4159 }, { - "epoch": 0.11804767309875142, + "epoch": 0.11788375981183938, "grad_norm": 0.0, - "learning_rate": 1.959634089884206e-05, - "loss": 0.9816, + "learning_rate": 1.959772227563127e-05, + "loss": 1.0134, "step": 4160 }, { - "epoch": 0.11807604994324632, + "epoch": 0.11791209725410184, "grad_norm": 0.0, - "learning_rate": 1.9596082366214797e-05, - "loss": 0.9308, + "learning_rate": 1.9597464538563495e-05, + "loss": 0.9993, "step": 4161 }, { - "epoch": 0.1181044267877412, + "epoch": 0.11794043469636431, "grad_norm": 0.0, - "learning_rate": 1.9595823752528993e-05, - "loss": 1.0388, + "learning_rate": 1.959720672065271e-05, + "loss": 1.1471, "step": 4162 }, { - "epoch": 0.11813280363223609, + "epoch": 0.11796877213862676, "grad_norm": 0.0, - "learning_rate": 1.9595565057786838e-05, - "loss": 1.0391, + "learning_rate": 1.959694882190109e-05, + "loss": 1.1329, "step": 4163 }, { - "epoch": 0.11816118047673099, + "epoch": 0.11799710958088923, "grad_norm": 0.0, - "learning_rate": 1.9595306281990514e-05, - "loss": 1.0997, + "learning_rate": 1.9596690842310807e-05, + "loss": 1.0154, "step": 4164 }, { - "epoch": 0.11818955732122588, + "epoch": 0.1180254470231517, "grad_norm": 0.0, - "learning_rate": 1.9595047425142204e-05, - "loss": 1.1123, + "learning_rate": 1.9596432781884033e-05, + "loss": 1.0413, "step": 4165 }, { - "epoch": 0.11821793416572077, + "epoch": 0.11805378446541415, "grad_norm": 0.0, - "learning_rate": 1.95947884872441e-05, - "loss": 0.985, + "learning_rate": 1.9596174640622944e-05, + "loss": 0.9599, "step": 4166 }, { - "epoch": 0.11824631101021567, + "epoch": 0.11808212190767661, "grad_norm": 0.0, - "learning_rate": 1.9594529468298383e-05, - "loss": 1.028, + "learning_rate": 1.9595916418529706e-05, + "loss": 0.9699, "step": 4167 }, { - "epoch": 0.11827468785471056, + "epoch": 0.11811045934993908, "grad_norm": 0.0, - "learning_rate": 1.9594270368307247e-05, - "loss": 1.0864, + "learning_rate": 1.9595658115606507e-05, + "loss": 0.9862, "step": 4168 }, { - "epoch": 0.11830306469920544, + "epoch": 0.11813879679220153, "grad_norm": 0.0, - "learning_rate": 1.9594011187272875e-05, - "loss": 1.0398, + "learning_rate": 1.9595399731855514e-05, + "loss": 0.9853, "step": 4169 }, { - "epoch": 0.11833144154370034, + "epoch": 0.118167134234464, "grad_norm": 0.0, - "learning_rate": 1.959375192519746e-05, - "loss": 0.9711, + "learning_rate": 1.9595141267278906e-05, + "loss": 1.1548, "step": 4170 }, { - "epoch": 0.11835981838819523, + "epoch": 0.11819547167672646, "grad_norm": 0.0, - "learning_rate": 1.9593492582083196e-05, - "loss": 1.0677, + "learning_rate": 1.959488272187886e-05, + "loss": 1.081, "step": 4171 }, { - "epoch": 0.11838819523269012, + "epoch": 0.11822380911898892, "grad_norm": 0.0, - "learning_rate": 1.9593233157932265e-05, - "loss": 1.0674, + "learning_rate": 1.9594624095657554e-05, + "loss": 0.9264, "step": 4172 }, { - "epoch": 0.11841657207718502, + "epoch": 0.11825214656125138, "grad_norm": 0.0, - "learning_rate": 1.9592973652746862e-05, - "loss": 1.1241, + "learning_rate": 1.9594365388617165e-05, + "loss": 1.0841, "step": 4173 }, { - "epoch": 0.11844494892167991, + "epoch": 0.11828048400351385, "grad_norm": 0.0, - "learning_rate": 1.959271406652918e-05, - "loss": 1.0658, + "learning_rate": 1.9594106600759875e-05, + "loss": 0.968, "step": 4174 }, { - "epoch": 0.1184733257661748, + "epoch": 0.1183088214457763, "grad_norm": 0.0, - "learning_rate": 1.9592454399281414e-05, - "loss": 1.0431, + "learning_rate": 1.9593847732087863e-05, + "loss": 0.9964, "step": 4175 }, { - "epoch": 0.1185017026106697, + "epoch": 0.11833715888803877, "grad_norm": 0.0, - "learning_rate": 1.959219465100575e-05, - "loss": 1.1276, + "learning_rate": 1.9593588782603307e-05, + "loss": 1.0685, "step": 4176 }, { - "epoch": 0.11853007945516458, + "epoch": 0.11836549633030123, "grad_norm": 0.0, - "learning_rate": 1.959193482170439e-05, - "loss": 1.0502, + "learning_rate": 1.9593329752308392e-05, + "loss": 0.9598, "step": 4177 }, { - "epoch": 0.11855845629965948, + "epoch": 0.11839383377256368, "grad_norm": 0.0, - "learning_rate": 1.9591674911379523e-05, - "loss": 1.0696, + "learning_rate": 1.9593070641205298e-05, + "loss": 1.0769, "step": 4178 }, { - "epoch": 0.11858683314415437, + "epoch": 0.11842217121482615, "grad_norm": 0.0, - "learning_rate": 1.9591414920033345e-05, - "loss": 0.8493, + "learning_rate": 1.9592811449296206e-05, + "loss": 1.0693, "step": 4179 }, { - "epoch": 0.11861520998864926, + "epoch": 0.11845050865708862, "grad_norm": 0.0, - "learning_rate": 1.9591154847668057e-05, - "loss": 1.0138, + "learning_rate": 1.9592552176583305e-05, + "loss": 1.09, "step": 4180 }, { - "epoch": 0.11864358683314416, + "epoch": 0.11847884609935107, "grad_norm": 0.0, - "learning_rate": 1.9590894694285852e-05, - "loss": 1.0174, + "learning_rate": 1.959229282306877e-05, + "loss": 1.2339, "step": 4181 }, { - "epoch": 0.11867196367763905, + "epoch": 0.11850718354161353, "grad_norm": 0.0, - "learning_rate": 1.9590634459888927e-05, - "loss": 1.0338, + "learning_rate": 1.9592033388754792e-05, + "loss": 1.0872, "step": 4182 }, { - "epoch": 0.11870034052213393, + "epoch": 0.118535520983876, "grad_norm": 0.0, - "learning_rate": 1.9590374144479484e-05, - "loss": 1.0905, + "learning_rate": 1.9591773873643555e-05, + "loss": 1.038, "step": 4183 }, { - "epoch": 0.11872871736662884, + "epoch": 0.11856385842613845, "grad_norm": 0.0, - "learning_rate": 1.959011374805972e-05, - "loss": 1.0157, + "learning_rate": 1.9591514277737243e-05, + "loss": 1.0357, "step": 4184 }, { - "epoch": 0.11875709421112372, + "epoch": 0.11859219586840092, "grad_norm": 0.0, - "learning_rate": 1.9589853270631833e-05, - "loss": 1.0246, + "learning_rate": 1.959125460103805e-05, + "loss": 0.996, "step": 4185 }, { - "epoch": 0.11878547105561861, + "epoch": 0.11862053331066338, "grad_norm": 0.0, - "learning_rate": 1.958959271219802e-05, - "loss": 1.0075, + "learning_rate": 1.9590994843548155e-05, + "loss": 1.136, "step": 4186 }, { - "epoch": 0.11881384790011351, + "epoch": 0.11864887075292584, "grad_norm": 0.0, - "learning_rate": 1.9589332072760493e-05, - "loss": 1.0144, + "learning_rate": 1.959073500526975e-05, + "loss": 1.0598, "step": 4187 }, { - "epoch": 0.1188422247446084, + "epoch": 0.1186772081951883, "grad_norm": 0.0, - "learning_rate": 1.9589071352321443e-05, - "loss": 1.0042, + "learning_rate": 1.959047508620502e-05, + "loss": 1.0997, "step": 4188 }, { - "epoch": 0.11887060158910329, + "epoch": 0.11870554563745077, "grad_norm": 0.0, - "learning_rate": 1.958881055088308e-05, - "loss": 1.0262, + "learning_rate": 1.959021508635616e-05, + "loss": 1.1249, "step": 4189 }, { - "epoch": 0.11889897843359819, + "epoch": 0.11873388307971322, "grad_norm": 0.0, - "learning_rate": 1.9588549668447595e-05, - "loss": 0.9699, + "learning_rate": 1.9589955005725354e-05, + "loss": 0.9071, "step": 4190 }, { - "epoch": 0.11892735527809307, + "epoch": 0.11876222052197569, "grad_norm": 0.0, - "learning_rate": 1.9588288705017205e-05, - "loss": 0.9053, + "learning_rate": 1.9589694844314798e-05, + "loss": 1.0572, "step": 4191 }, { - "epoch": 0.11895573212258796, + "epoch": 0.11879055796423815, "grad_norm": 0.0, - "learning_rate": 1.9588027660594114e-05, - "loss": 0.9716, + "learning_rate": 1.9589434602126678e-05, + "loss": 0.9386, "step": 4192 }, { - "epoch": 0.11898410896708286, + "epoch": 0.1188188954065006, "grad_norm": 0.0, - "learning_rate": 1.9587766535180518e-05, - "loss": 1.1319, + "learning_rate": 1.9589174279163192e-05, + "loss": 1.0992, "step": 4193 }, { - "epoch": 0.11901248581157775, + "epoch": 0.11884723284876307, "grad_norm": 0.0, - "learning_rate": 1.9587505328778626e-05, - "loss": 1.0848, + "learning_rate": 1.9588913875426532e-05, + "loss": 0.9205, "step": 4194 }, { - "epoch": 0.11904086265607264, + "epoch": 0.11887557029102554, "grad_norm": 0.0, - "learning_rate": 1.9587244041390646e-05, - "loss": 0.9986, + "learning_rate": 1.9588653390918887e-05, + "loss": 1.0013, "step": 4195 }, { - "epoch": 0.11906923950056754, + "epoch": 0.11890390773328799, "grad_norm": 0.0, - "learning_rate": 1.9586982673018785e-05, - "loss": 1.1578, + "learning_rate": 1.9588392825642453e-05, + "loss": 1.0682, "step": 4196 }, { - "epoch": 0.11909761634506243, + "epoch": 0.11893224517555046, "grad_norm": 0.0, - "learning_rate": 1.958672122366525e-05, - "loss": 1.0323, + "learning_rate": 1.958813217959943e-05, + "loss": 1.0526, "step": 4197 }, { - "epoch": 0.11912599318955733, + "epoch": 0.11896058261781292, "grad_norm": 0.0, - "learning_rate": 1.958645969333225e-05, - "loss": 1.0347, + "learning_rate": 1.9587871452792006e-05, + "loss": 1.1177, "step": 4198 }, { - "epoch": 0.11915437003405221, + "epoch": 0.11898892006007537, "grad_norm": 0.0, - "learning_rate": 1.9586198082021995e-05, - "loss": 1.1457, + "learning_rate": 1.958761064522238e-05, + "loss": 0.9989, "step": 4199 }, { - "epoch": 0.1191827468785471, + "epoch": 0.11901725750233784, "grad_norm": 0.0, - "learning_rate": 1.9585936389736695e-05, - "loss": 1.1045, + "learning_rate": 1.958734975689275e-05, + "loss": 1.0394, "step": 4200 }, { - "epoch": 0.119211123723042, + "epoch": 0.1190455949446003, "grad_norm": 0.0, - "learning_rate": 1.9585674616478558e-05, - "loss": 1.189, + "learning_rate": 1.958708878780531e-05, + "loss": 1.0621, "step": 4201 }, { - "epoch": 0.11923950056753689, + "epoch": 0.11907393238686276, "grad_norm": 0.0, - "learning_rate": 1.9585412762249798e-05, - "loss": 1.181, + "learning_rate": 1.9586827737962264e-05, + "loss": 1.0398, "step": 4202 }, { - "epoch": 0.11926787741203178, + "epoch": 0.11910226982912522, "grad_norm": 0.0, - "learning_rate": 1.9585150827052622e-05, - "loss": 0.964, + "learning_rate": 1.958656660736581e-05, + "loss": 1.0603, "step": 4203 }, { - "epoch": 0.11929625425652668, + "epoch": 0.11913060727138769, "grad_norm": 0.0, - "learning_rate": 1.958488881088925e-05, - "loss": 1.0262, + "learning_rate": 1.9586305396018145e-05, + "loss": 1.0001, "step": 4204 }, { - "epoch": 0.11932463110102157, + "epoch": 0.11915894471365014, "grad_norm": 0.0, - "learning_rate": 1.958462671376189e-05, - "loss": 1.0238, + "learning_rate": 1.9586044103921468e-05, + "loss": 1.1084, "step": 4205 }, { - "epoch": 0.11935300794551645, + "epoch": 0.11918728215591261, "grad_norm": 0.0, - "learning_rate": 1.9584364535672753e-05, - "loss": 0.9445, + "learning_rate": 1.958578273107798e-05, + "loss": 1.0641, "step": 4206 }, { - "epoch": 0.11938138479001136, + "epoch": 0.11921561959817507, "grad_norm": 0.0, - "learning_rate": 1.958410227662406e-05, - "loss": 1.08, + "learning_rate": 1.9585521277489886e-05, + "loss": 1.0673, "step": 4207 }, { - "epoch": 0.11940976163450624, + "epoch": 0.11924395704043753, "grad_norm": 0.0, - "learning_rate": 1.958383993661803e-05, - "loss": 1.084, + "learning_rate": 1.9585259743159388e-05, + "loss": 1.1565, "step": 4208 }, { - "epoch": 0.11943813847900113, + "epoch": 0.11927229448269999, "grad_norm": 0.0, - "learning_rate": 1.9583577515656867e-05, - "loss": 0.9503, + "learning_rate": 1.9584998128088686e-05, + "loss": 1.0882, "step": 4209 }, { - "epoch": 0.11946651532349603, + "epoch": 0.11930063192496246, "grad_norm": 0.0, - "learning_rate": 1.9583315013742798e-05, - "loss": 1.1407, + "learning_rate": 1.9584736432279986e-05, + "loss": 1.0242, "step": 4210 }, { - "epoch": 0.11949489216799092, + "epoch": 0.11932896936722491, "grad_norm": 0.0, - "learning_rate": 1.9583052430878035e-05, - "loss": 1.1064, + "learning_rate": 1.9584474655735493e-05, + "loss": 1.0747, "step": 4211 }, { - "epoch": 0.1195232690124858, + "epoch": 0.11935730680948738, "grad_norm": 0.0, - "learning_rate": 1.9582789767064795e-05, - "loss": 1.0126, + "learning_rate": 1.958421279845741e-05, + "loss": 1.1447, "step": 4212 }, { - "epoch": 0.1195516458569807, + "epoch": 0.11938564425174984, "grad_norm": 0.0, - "learning_rate": 1.9582527022305303e-05, - "loss": 1.0219, + "learning_rate": 1.9583950860447944e-05, + "loss": 1.0988, "step": 4213 }, { - "epoch": 0.1195800227014756, + "epoch": 0.1194139816940123, "grad_norm": 0.0, - "learning_rate": 1.958226419660177e-05, - "loss": 1.0579, + "learning_rate": 1.9583688841709302e-05, + "loss": 1.1292, "step": 4214 }, { - "epoch": 0.11960839954597048, + "epoch": 0.11944231913627476, "grad_norm": 0.0, - "learning_rate": 1.9582001289956423e-05, - "loss": 0.9217, + "learning_rate": 1.958342674224369e-05, + "loss": 1.131, "step": 4215 }, { - "epoch": 0.11963677639046538, + "epoch": 0.11947065657853723, "grad_norm": 0.0, - "learning_rate": 1.958173830237148e-05, - "loss": 1.0652, + "learning_rate": 1.9583164562053314e-05, + "loss": 1.1292, "step": 4216 }, { - "epoch": 0.11966515323496027, + "epoch": 0.11949899402079968, "grad_norm": 0.0, - "learning_rate": 1.9581475233849165e-05, - "loss": 1.0363, + "learning_rate": 1.9582902301140386e-05, + "loss": 1.0299, "step": 4217 }, { - "epoch": 0.11969353007945517, + "epoch": 0.11952733146306214, "grad_norm": 0.0, - "learning_rate": 1.9581212084391693e-05, - "loss": 1.0451, + "learning_rate": 1.9582639959507113e-05, + "loss": 1.0387, "step": 4218 }, { - "epoch": 0.11972190692395006, + "epoch": 0.11955566890532461, "grad_norm": 0.0, - "learning_rate": 1.9580948854001293e-05, - "loss": 1.0936, + "learning_rate": 1.9582377537155703e-05, + "loss": 1.0843, "step": 4219 }, { - "epoch": 0.11975028376844495, + "epoch": 0.11958400634758706, "grad_norm": 0.0, - "learning_rate": 1.9580685542680192e-05, - "loss": 0.8717, + "learning_rate": 1.958211503408837e-05, + "loss": 1.0, "step": 4220 }, { - "epoch": 0.11977866061293985, + "epoch": 0.11961234378984953, "grad_norm": 0.0, - "learning_rate": 1.958042215043061e-05, - "loss": 0.972, + "learning_rate": 1.9581852450307324e-05, + "loss": 0.9897, "step": 4221 }, { - "epoch": 0.11980703745743473, + "epoch": 0.119640681232112, "grad_norm": 0.0, - "learning_rate": 1.9580158677254762e-05, - "loss": 1.0518, + "learning_rate": 1.958158978581478e-05, + "loss": 1.1543, "step": 4222 }, { - "epoch": 0.11983541430192962, + "epoch": 0.11966901867437445, "grad_norm": 0.0, - "learning_rate": 1.957989512315489e-05, - "loss": 0.9891, + "learning_rate": 1.9581327040612942e-05, + "loss": 1.0612, "step": 4223 }, { - "epoch": 0.11986379114642452, + "epoch": 0.11969735611663691, "grad_norm": 0.0, - "learning_rate": 1.957963148813321e-05, - "loss": 1.0074, + "learning_rate": 1.958106421470403e-05, + "loss": 1.0738, "step": 4224 }, { - "epoch": 0.11989216799091941, + "epoch": 0.11972569355889938, "grad_norm": 0.0, - "learning_rate": 1.9579367772191956e-05, - "loss": 1.0855, + "learning_rate": 1.9580801308090257e-05, + "loss": 1.0492, "step": 4225 }, { - "epoch": 0.1199205448354143, + "epoch": 0.11975403100116183, "grad_norm": 0.0, - "learning_rate": 1.957910397533335e-05, - "loss": 1.0192, + "learning_rate": 1.9580538320773838e-05, + "loss": 0.9818, "step": 4226 }, { - "epoch": 0.1199489216799092, + "epoch": 0.1197823684434243, "grad_norm": 0.0, - "learning_rate": 1.9578840097559624e-05, - "loss": 1.0047, + "learning_rate": 1.9580275252756987e-05, + "loss": 1.071, "step": 4227 }, { - "epoch": 0.11997729852440409, + "epoch": 0.11981070588568676, "grad_norm": 0.0, - "learning_rate": 1.9578576138873003e-05, - "loss": 1.1127, + "learning_rate": 1.9580012104041916e-05, + "loss": 1.1495, "step": 4228 }, { - "epoch": 0.12000567536889897, + "epoch": 0.11983904332794922, "grad_norm": 0.0, - "learning_rate": 1.957831209927572e-05, - "loss": 1.0288, + "learning_rate": 1.957974887463085e-05, + "loss": 1.0062, "step": 4229 }, { - "epoch": 0.12003405221339387, + "epoch": 0.11986738077021168, "grad_norm": 0.0, - "learning_rate": 1.9578047978770003e-05, - "loss": 1.0513, + "learning_rate": 1.9579485564526e-05, + "loss": 1.0847, "step": 4230 }, { - "epoch": 0.12006242905788876, + "epoch": 0.11989571821247415, "grad_norm": 0.0, - "learning_rate": 1.9577783777358085e-05, - "loss": 1.0339, + "learning_rate": 1.9579222173729585e-05, + "loss": 1.1503, "step": 4231 }, { - "epoch": 0.12009080590238365, + "epoch": 0.1199240556547366, "grad_norm": 0.0, - "learning_rate": 1.9577519495042194e-05, - "loss": 1.1266, + "learning_rate": 1.9578958702243827e-05, + "loss": 1.0822, "step": 4232 }, { - "epoch": 0.12011918274687855, + "epoch": 0.11995239309699907, "grad_norm": 0.0, - "learning_rate": 1.9577255131824568e-05, - "loss": 1.0908, + "learning_rate": 1.957869515007094e-05, + "loss": 1.0073, "step": 4233 }, { - "epoch": 0.12014755959137344, + "epoch": 0.11998073053926153, "grad_norm": 0.0, - "learning_rate": 1.9576990687707437e-05, - "loss": 1.055, + "learning_rate": 1.9578431517213148e-05, + "loss": 1.0863, "step": 4234 }, { - "epoch": 0.12017593643586832, + "epoch": 0.12000906798152398, "grad_norm": 0.0, - "learning_rate": 1.9576726162693036e-05, - "loss": 1.103, + "learning_rate": 1.9578167803672668e-05, + "loss": 1.1052, "step": 4235 }, { - "epoch": 0.12020431328036323, + "epoch": 0.12003740542378645, "grad_norm": 0.0, - "learning_rate": 1.9576461556783597e-05, - "loss": 1.0001, + "learning_rate": 1.9577904009451728e-05, + "loss": 1.0472, "step": 4236 }, { - "epoch": 0.12023269012485811, + "epoch": 0.12006574286604892, "grad_norm": 0.0, - "learning_rate": 1.957619686998136e-05, - "loss": 0.929, + "learning_rate": 1.9577640134552546e-05, + "loss": 1.1091, "step": 4237 }, { - "epoch": 0.12026106696935301, + "epoch": 0.12009408030831137, "grad_norm": 0.0, - "learning_rate": 1.9575932102288553e-05, - "loss": 1.0137, + "learning_rate": 1.957737617897734e-05, + "loss": 0.9411, "step": 4238 }, { - "epoch": 0.1202894438138479, + "epoch": 0.12012241775057383, "grad_norm": 0.0, - "learning_rate": 1.957566725370742e-05, - "loss": 1.0313, + "learning_rate": 1.957711214272834e-05, + "loss": 1.1331, "step": 4239 }, { - "epoch": 0.12031782065834279, + "epoch": 0.1201507551928363, "grad_norm": 0.0, - "learning_rate": 1.9575402324240195e-05, - "loss": 1.0002, + "learning_rate": 1.9576848025807772e-05, + "loss": 1.0207, "step": 4240 }, { - "epoch": 0.12034619750283769, + "epoch": 0.12017909263509875, "grad_norm": 0.0, - "learning_rate": 1.9575137313889115e-05, - "loss": 1.032, + "learning_rate": 1.9576583828217854e-05, + "loss": 0.9914, "step": 4241 }, { - "epoch": 0.12037457434733258, + "epoch": 0.12020743007736122, "grad_norm": 0.0, - "learning_rate": 1.957487222265642e-05, - "loss": 1.0192, + "learning_rate": 1.9576319549960813e-05, + "loss": 0.953, "step": 4242 }, { - "epoch": 0.12040295119182746, + "epoch": 0.12023576751962368, "grad_norm": 0.0, - "learning_rate": 1.9574607050544346e-05, - "loss": 1.1024, + "learning_rate": 1.9576055191038877e-05, + "loss": 1.1521, "step": 4243 }, { - "epoch": 0.12043132803632237, + "epoch": 0.12026410496188614, "grad_norm": 0.0, - "learning_rate": 1.9574341797555143e-05, - "loss": 0.9782, + "learning_rate": 1.9575790751454274e-05, + "loss": 0.9651, "step": 4244 }, { - "epoch": 0.12045970488081725, + "epoch": 0.1202924424041486, "grad_norm": 0.0, - "learning_rate": 1.957407646369104e-05, - "loss": 1.1007, + "learning_rate": 1.9575526231209224e-05, + "loss": 1.0564, "step": 4245 }, { - "epoch": 0.12048808172531214, + "epoch": 0.12032077984641107, "grad_norm": 0.0, - "learning_rate": 1.957381104895428e-05, - "loss": 1.028, + "learning_rate": 1.9575261630305966e-05, + "loss": 0.9615, "step": 4246 }, { - "epoch": 0.12051645856980704, + "epoch": 0.12034911728867352, "grad_norm": 0.0, - "learning_rate": 1.9573545553347114e-05, - "loss": 0.9325, + "learning_rate": 1.9574996948746724e-05, + "loss": 1.0353, "step": 4247 }, { - "epoch": 0.12054483541430193, + "epoch": 0.12037745473093599, "grad_norm": 0.0, - "learning_rate": 1.9573279976871772e-05, - "loss": 1.0269, + "learning_rate": 1.9574732186533723e-05, + "loss": 1.0507, "step": 4248 }, { - "epoch": 0.12057321225879682, + "epoch": 0.12040579217319845, "grad_norm": 0.0, - "learning_rate": 1.957301431953051e-05, - "loss": 1.0361, + "learning_rate": 1.9574467343669202e-05, + "loss": 0.9071, "step": 4249 }, { - "epoch": 0.12060158910329172, + "epoch": 0.1204341296154609, "grad_norm": 0.0, - "learning_rate": 1.9572748581325564e-05, - "loss": 1.1081, + "learning_rate": 1.9574202420155384e-05, + "loss": 1.1193, "step": 4250 }, { - "epoch": 0.1206299659477866, + "epoch": 0.12046246705772337, "grad_norm": 0.0, - "learning_rate": 1.9572482762259176e-05, - "loss": 1.0004, + "learning_rate": 1.9573937415994506e-05, + "loss": 1.1124, "step": 4251 }, { - "epoch": 0.12065834279228149, + "epoch": 0.12049080449998584, "grad_norm": 0.0, - "learning_rate": 1.9572216862333597e-05, - "loss": 1.0369, + "learning_rate": 1.9573672331188797e-05, + "loss": 0.9905, "step": 4252 }, { - "epoch": 0.1206867196367764, + "epoch": 0.12051914194224829, "grad_norm": 0.0, - "learning_rate": 1.9571950881551073e-05, - "loss": 1.1804, + "learning_rate": 1.957340716574049e-05, + "loss": 1.0326, "step": 4253 }, { - "epoch": 0.12071509648127128, + "epoch": 0.12054747938451076, "grad_norm": 0.0, - "learning_rate": 1.9571684819913853e-05, - "loss": 1.0, + "learning_rate": 1.957314191965182e-05, + "loss": 1.0075, "step": 4254 }, { - "epoch": 0.12074347332576617, + "epoch": 0.12057581682677322, "grad_norm": 0.0, - "learning_rate": 1.9571418677424177e-05, - "loss": 0.9698, + "learning_rate": 1.957287659292502e-05, + "loss": 1.0006, "step": 4255 }, { - "epoch": 0.12077185017026107, + "epoch": 0.12060415426903567, "grad_norm": 0.0, - "learning_rate": 1.95711524540843e-05, - "loss": 1.0908, + "learning_rate": 1.9572611185562323e-05, + "loss": 1.0345, "step": 4256 }, { - "epoch": 0.12080022701475596, + "epoch": 0.12063249171129814, "grad_norm": 0.0, - "learning_rate": 1.9570886149896467e-05, - "loss": 0.9986, + "learning_rate": 1.9572345697565973e-05, + "loss": 1.0372, "step": 4257 }, { - "epoch": 0.12082860385925086, + "epoch": 0.1206608291535606, "grad_norm": 0.0, - "learning_rate": 1.9570619764862925e-05, - "loss": 1.0992, + "learning_rate": 1.95720801289382e-05, + "loss": 1.0657, "step": 4258 }, { - "epoch": 0.12085698070374574, + "epoch": 0.12068916659582306, "grad_norm": 0.0, - "learning_rate": 1.957035329898593e-05, - "loss": 1.0654, + "learning_rate": 1.9571814479681236e-05, + "loss": 1.0475, "step": 4259 }, { - "epoch": 0.12088535754824063, + "epoch": 0.12071750403808552, "grad_norm": 0.0, - "learning_rate": 1.9570086752267734e-05, - "loss": 0.9616, + "learning_rate": 1.9571548749797328e-05, + "loss": 1.0995, "step": 4260 }, { - "epoch": 0.12091373439273553, + "epoch": 0.12074584148034799, "grad_norm": 0.0, - "learning_rate": 1.956982012471058e-05, - "loss": 1.0689, + "learning_rate": 1.957128293928871e-05, + "loss": 1.0169, "step": 4261 }, { - "epoch": 0.12094211123723042, + "epoch": 0.12077417892261044, "grad_norm": 0.0, - "learning_rate": 1.9569553416316725e-05, - "loss": 0.9921, + "learning_rate": 1.9571017048157627e-05, + "loss": 1.0509, "step": 4262 }, { - "epoch": 0.12097048808172531, + "epoch": 0.12080251636487291, "grad_norm": 0.0, - "learning_rate": 1.9569286627088426e-05, - "loss": 1.0349, + "learning_rate": 1.9570751076406304e-05, + "loss": 0.99, "step": 4263 }, { - "epoch": 0.12099886492622021, + "epoch": 0.12083085380713537, "grad_norm": 0.0, - "learning_rate": 1.956901975702793e-05, - "loss": 1.0918, + "learning_rate": 1.9570485024036997e-05, + "loss": 1.0786, "step": 4264 }, { - "epoch": 0.1210272417707151, + "epoch": 0.12085919124939783, "grad_norm": 0.0, - "learning_rate": 1.9568752806137495e-05, - "loss": 0.9783, + "learning_rate": 1.957021889105194e-05, + "loss": 1.0226, "step": 4265 }, { - "epoch": 0.12105561861520998, + "epoch": 0.12088752869166029, "grad_norm": 0.0, - "learning_rate": 1.9568485774419377e-05, - "loss": 1.0912, + "learning_rate": 1.9569952677453373e-05, + "loss": 1.0514, "step": 4266 }, { - "epoch": 0.12108399545970489, + "epoch": 0.12091586613392276, "grad_norm": 0.0, - "learning_rate": 1.9568218661875827e-05, - "loss": 0.9618, + "learning_rate": 1.9569686383243542e-05, + "loss": 1.1209, "step": 4267 }, { - "epoch": 0.12111237230419977, + "epoch": 0.12094420357618521, "grad_norm": 0.0, - "learning_rate": 1.9567951468509104e-05, - "loss": 1.1501, + "learning_rate": 1.956942000842469e-05, + "loss": 1.0201, "step": 4268 }, { - "epoch": 0.12114074914869466, + "epoch": 0.12097254101844768, "grad_norm": 0.0, - "learning_rate": 1.9567684194321462e-05, - "loss": 0.95, + "learning_rate": 1.9569153552999057e-05, + "loss": 1.0098, "step": 4269 }, { - "epoch": 0.12116912599318956, + "epoch": 0.12100087846071014, "grad_norm": 0.0, - "learning_rate": 1.9567416839315168e-05, - "loss": 0.9897, + "learning_rate": 1.9568887016968895e-05, + "loss": 1.0786, "step": 4270 }, { - "epoch": 0.12119750283768445, + "epoch": 0.1210292159029726, "grad_norm": 0.0, - "learning_rate": 1.9567149403492467e-05, - "loss": 1.0158, + "learning_rate": 1.9568620400336437e-05, + "loss": 1.0023, "step": 4271 }, { - "epoch": 0.12122587968217934, + "epoch": 0.12105755334523506, "grad_norm": 0.0, - "learning_rate": 1.956688188685563e-05, - "loss": 1.0293, + "learning_rate": 1.9568353703103945e-05, + "loss": 0.9704, "step": 4272 }, { - "epoch": 0.12125425652667424, + "epoch": 0.12108589078749753, "grad_norm": 0.0, - "learning_rate": 1.956661428940691e-05, - "loss": 1.1032, + "learning_rate": 1.956808692527365e-05, + "loss": 0.966, "step": 4273 }, { - "epoch": 0.12128263337116912, + "epoch": 0.12111422822975998, "grad_norm": 0.0, - "learning_rate": 1.956634661114857e-05, - "loss": 1.0709, + "learning_rate": 1.956782006684781e-05, + "loss": 1.0891, "step": 4274 }, { - "epoch": 0.12131101021566401, + "epoch": 0.12114256567202245, "grad_norm": 0.0, - "learning_rate": 1.956607885208287e-05, - "loss": 1.0605, + "learning_rate": 1.9567553127828663e-05, + "loss": 1.034, "step": 4275 }, { - "epoch": 0.12133938706015891, + "epoch": 0.12117090311428491, "grad_norm": 0.0, - "learning_rate": 1.956581101221207e-05, - "loss": 1.1083, + "learning_rate": 1.9567286108218467e-05, + "loss": 1.1286, "step": 4276 }, { - "epoch": 0.1213677639046538, + "epoch": 0.12119924055654736, "grad_norm": 0.0, - "learning_rate": 1.9565543091538436e-05, - "loss": 1.1079, + "learning_rate": 1.9567019008019467e-05, + "loss": 1.0532, "step": 4277 }, { - "epoch": 0.1213961407491487, + "epoch": 0.12122757799880983, "grad_norm": 0.0, - "learning_rate": 1.956527509006423e-05, - "loss": 1.0455, + "learning_rate": 1.9566751827233914e-05, + "loss": 1.0286, "step": 4278 }, { - "epoch": 0.12142451759364359, + "epoch": 0.1212559154410723, "grad_norm": 0.0, - "learning_rate": 1.9565007007791713e-05, - "loss": 1.029, + "learning_rate": 1.9566484565864056e-05, + "loss": 1.0945, "step": 4279 }, { - "epoch": 0.12145289443813848, + "epoch": 0.12128425288333475, "grad_norm": 0.0, - "learning_rate": 1.9564738844723153e-05, - "loss": 1.0315, + "learning_rate": 1.956621722391215e-05, + "loss": 1.006, "step": 4280 }, { - "epoch": 0.12148127128263338, + "epoch": 0.12131259032559721, "grad_norm": 0.0, - "learning_rate": 1.9564470600860816e-05, - "loss": 1.0883, + "learning_rate": 1.9565949801380442e-05, + "loss": 1.1116, "step": 4281 }, { - "epoch": 0.12150964812712826, + "epoch": 0.12134092776785968, "grad_norm": 0.0, - "learning_rate": 1.956420227620696e-05, - "loss": 0.927, + "learning_rate": 1.9565682298271186e-05, + "loss": 1.1235, "step": 4282 }, { - "epoch": 0.12153802497162315, + "epoch": 0.12136926521012213, "grad_norm": 0.0, - "learning_rate": 1.9563933870763864e-05, - "loss": 1.0752, + "learning_rate": 1.9565414714586636e-05, + "loss": 1.1007, "step": 4283 }, { - "epoch": 0.12156640181611805, + "epoch": 0.1213976026523846, "grad_norm": 0.0, - "learning_rate": 1.9563665384533786e-05, - "loss": 1.2196, + "learning_rate": 1.9565147050329046e-05, + "loss": 1.133, "step": 4284 }, { - "epoch": 0.12159477866061294, + "epoch": 0.12142594009464706, "grad_norm": 0.0, - "learning_rate": 1.9563396817518994e-05, - "loss": 1.0078, + "learning_rate": 1.956487930550067e-05, + "loss": 1.0222, "step": 4285 }, { - "epoch": 0.12162315550510783, + "epoch": 0.12145427753690952, "grad_norm": 0.0, - "learning_rate": 1.9563128169721763e-05, - "loss": 0.9704, + "learning_rate": 1.9564611480103762e-05, + "loss": 0.9623, "step": 4286 }, { - "epoch": 0.12165153234960273, + "epoch": 0.12148261497917198, "grad_norm": 0.0, - "learning_rate": 1.9562859441144357e-05, - "loss": 0.9243, + "learning_rate": 1.9564343574140585e-05, + "loss": 1.0948, "step": 4287 }, { - "epoch": 0.12167990919409762, + "epoch": 0.12151095242143443, "grad_norm": 0.0, - "learning_rate": 1.9562590631789045e-05, - "loss": 1.108, + "learning_rate": 1.9564075587613386e-05, + "loss": 1.1006, "step": 4288 }, { - "epoch": 0.1217082860385925, + "epoch": 0.1215392898636969, "grad_norm": 0.0, - "learning_rate": 1.9562321741658106e-05, - "loss": 0.9806, + "learning_rate": 1.9563807520524426e-05, + "loss": 1.0616, "step": 4289 }, { - "epoch": 0.1217366628830874, + "epoch": 0.12156762730595937, "grad_norm": 0.0, - "learning_rate": 1.9562052770753804e-05, - "loss": 1.063, + "learning_rate": 1.9563539372875965e-05, + "loss": 1.0571, "step": 4290 }, { - "epoch": 0.12176503972758229, + "epoch": 0.12159596474822182, "grad_norm": 0.0, - "learning_rate": 1.956178371907841e-05, - "loss": 1.0398, + "learning_rate": 1.9563271144670263e-05, + "loss": 1.0563, "step": 4291 }, { - "epoch": 0.12179341657207718, + "epoch": 0.12162430219048428, "grad_norm": 0.0, - "learning_rate": 1.95615145866342e-05, - "loss": 1.0381, + "learning_rate": 1.9563002835909576e-05, + "loss": 1.092, "step": 4292 }, { - "epoch": 0.12182179341657208, + "epoch": 0.12165263963274675, "grad_norm": 0.0, - "learning_rate": 1.9561245373423445e-05, - "loss": 0.9782, + "learning_rate": 1.9562734446596164e-05, + "loss": 1.0033, "step": 4293 }, { - "epoch": 0.12185017026106697, + "epoch": 0.1216809770750092, "grad_norm": 0.0, - "learning_rate": 1.956097607944842e-05, - "loss": 1.0728, + "learning_rate": 1.9562465976732288e-05, + "loss": 1.0633, "step": 4294 }, { - "epoch": 0.12187854710556185, + "epoch": 0.12170931451727167, "grad_norm": 0.0, - "learning_rate": 1.9560706704711402e-05, - "loss": 1.006, + "learning_rate": 1.956219742632021e-05, + "loss": 1.0688, "step": 4295 }, { - "epoch": 0.12190692395005676, + "epoch": 0.12173765195953413, "grad_norm": 0.0, - "learning_rate": 1.9560437249214665e-05, - "loss": 1.0505, + "learning_rate": 1.9561928795362192e-05, + "loss": 0.9957, "step": 4296 }, { - "epoch": 0.12193530079455164, + "epoch": 0.12176598940179659, "grad_norm": 0.0, - "learning_rate": 1.9560167712960486e-05, - "loss": 1.0984, + "learning_rate": 1.95616600838605e-05, + "loss": 0.9962, "step": 4297 }, { - "epoch": 0.12196367763904654, + "epoch": 0.12179432684405905, "grad_norm": 0.0, - "learning_rate": 1.9559898095951137e-05, - "loss": 0.9584, + "learning_rate": 1.9561391291817393e-05, + "loss": 1.0172, "step": 4298 }, { - "epoch": 0.12199205448354143, + "epoch": 0.12182266428632152, "grad_norm": 0.0, - "learning_rate": 1.9559628398188903e-05, - "loss": 1.0835, + "learning_rate": 1.9561122419235137e-05, + "loss": 1.0538, "step": 4299 }, { - "epoch": 0.12202043132803632, + "epoch": 0.12185100172858397, "grad_norm": 0.0, - "learning_rate": 1.9559358619676053e-05, - "loss": 1.0414, + "learning_rate": 1.9560853466115996e-05, + "loss": 1.0224, "step": 4300 }, { - "epoch": 0.12204880817253122, + "epoch": 0.12187933917084644, "grad_norm": 0.0, - "learning_rate": 1.9559088760414875e-05, - "loss": 1.0281, + "learning_rate": 1.9560584432462236e-05, + "loss": 0.9797, "step": 4301 }, { - "epoch": 0.12207718501702611, + "epoch": 0.1219076766131089, "grad_norm": 0.0, - "learning_rate": 1.9558818820407645e-05, - "loss": 1.0573, + "learning_rate": 1.956031531827612e-05, + "loss": 1.0812, "step": 4302 }, { - "epoch": 0.122105561861521, + "epoch": 0.12193601405537136, "grad_norm": 0.0, - "learning_rate": 1.955854879965664e-05, - "loss": 1.0529, + "learning_rate": 1.9560046123559923e-05, + "loss": 1.0459, "step": 4303 }, { - "epoch": 0.1221339387060159, + "epoch": 0.12196435149763382, "grad_norm": 0.0, - "learning_rate": 1.9558278698164147e-05, - "loss": 1.144, + "learning_rate": 1.9559776848315907e-05, + "loss": 0.958, "step": 4304 }, { - "epoch": 0.12216231555051078, + "epoch": 0.12199268893989629, "grad_norm": 0.0, - "learning_rate": 1.955800851593244e-05, - "loss": 0.9736, + "learning_rate": 1.9559507492546336e-05, + "loss": 1.0086, "step": 4305 }, { - "epoch": 0.12219069239500567, + "epoch": 0.12202102638215874, "grad_norm": 0.0, - "learning_rate": 1.955773825296381e-05, - "loss": 1.0709, + "learning_rate": 1.9559238056253485e-05, + "loss": 1.114, "step": 4306 }, { - "epoch": 0.12221906923950057, + "epoch": 0.1220493638244212, "grad_norm": 0.0, - "learning_rate": 1.955746790926053e-05, - "loss": 1.1087, + "learning_rate": 1.9558968539439627e-05, + "loss": 1.0049, "step": 4307 }, { - "epoch": 0.12224744608399546, + "epoch": 0.12207770126668367, "grad_norm": 0.0, - "learning_rate": 1.9557197484824893e-05, - "loss": 1.0559, + "learning_rate": 1.9558698942107023e-05, + "loss": 1.0605, "step": 4308 }, { - "epoch": 0.12227582292849035, + "epoch": 0.12210603870894612, "grad_norm": 0.0, - "learning_rate": 1.9556926979659178e-05, - "loss": 0.9951, + "learning_rate": 1.955842926425795e-05, + "loss": 1.1237, "step": 4309 }, { - "epoch": 0.12230419977298525, + "epoch": 0.12213437615120859, "grad_norm": 0.0, - "learning_rate": 1.9556656393765673e-05, - "loss": 1.0044, + "learning_rate": 1.9558159505894676e-05, + "loss": 0.9602, "step": 4310 }, { - "epoch": 0.12233257661748013, + "epoch": 0.12216271359347106, "grad_norm": 0.0, - "learning_rate": 1.955638572714666e-05, - "loss": 0.9946, + "learning_rate": 1.9557889667019477e-05, + "loss": 1.076, "step": 4311 }, { - "epoch": 0.12236095346197502, + "epoch": 0.12219105103573351, "grad_norm": 0.0, - "learning_rate": 1.9556114979804426e-05, - "loss": 1.1227, + "learning_rate": 1.9557619747634623e-05, + "loss": 1.153, "step": 4312 }, { - "epoch": 0.12238933030646992, + "epoch": 0.12221938847799597, "grad_norm": 0.0, - "learning_rate": 1.955584415174126e-05, - "loss": 1.0947, + "learning_rate": 1.955734974774239e-05, + "loss": 1.2156, "step": 4313 }, { - "epoch": 0.12241770715096481, + "epoch": 0.12224772592025844, "grad_norm": 0.0, - "learning_rate": 1.955557324295945e-05, - "loss": 1.034, + "learning_rate": 1.955707966734505e-05, + "loss": 0.9401, "step": 4314 }, { - "epoch": 0.1224460839954597, + "epoch": 0.12227606336252089, "grad_norm": 0.0, - "learning_rate": 1.955530225346128e-05, - "loss": 1.0192, + "learning_rate": 1.955680950644488e-05, + "loss": 1.04, "step": 4315 }, { - "epoch": 0.1224744608399546, + "epoch": 0.12230440080478336, "grad_norm": 0.0, - "learning_rate": 1.9555031183249046e-05, - "loss": 1.1046, + "learning_rate": 1.9556539265044153e-05, + "loss": 0.9478, "step": 4316 }, { - "epoch": 0.12250283768444949, + "epoch": 0.12233273824704582, "grad_norm": 0.0, - "learning_rate": 1.9554760032325032e-05, - "loss": 1.1022, + "learning_rate": 1.9556268943145146e-05, + "loss": 1.1146, "step": 4317 }, { - "epoch": 0.12253121452894439, + "epoch": 0.12236107568930828, "grad_norm": 0.0, - "learning_rate": 1.9554488800691532e-05, - "loss": 1.1385, + "learning_rate": 1.955599854075014e-05, + "loss": 1.1389, "step": 4318 }, { - "epoch": 0.12255959137343928, + "epoch": 0.12238941313157074, "grad_norm": 0.0, - "learning_rate": 1.955421748835084e-05, - "loss": 0.9344, + "learning_rate": 1.955572805786141e-05, + "loss": 1.0105, "step": 4319 }, { - "epoch": 0.12258796821793416, + "epoch": 0.12241775057383321, "grad_norm": 0.0, - "learning_rate": 1.9553946095305235e-05, - "loss": 1.0627, + "learning_rate": 1.955545749448123e-05, + "loss": 1.059, "step": 4320 }, { - "epoch": 0.12261634506242906, + "epoch": 0.12244608801609566, "grad_norm": 0.0, - "learning_rate": 1.9553674621557023e-05, - "loss": 0.9851, + "learning_rate": 1.955518685061189e-05, + "loss": 1.0322, "step": 4321 }, { - "epoch": 0.12264472190692395, + "epoch": 0.12247442545835813, "grad_norm": 0.0, - "learning_rate": 1.955340306710849e-05, - "loss": 1.0997, + "learning_rate": 1.9554916126255657e-05, + "loss": 1.1997, "step": 4322 }, { - "epoch": 0.12267309875141884, + "epoch": 0.1225027629006206, "grad_norm": 0.0, - "learning_rate": 1.955313143196193e-05, - "loss": 1.0444, + "learning_rate": 1.9554645321414818e-05, + "loss": 0.9837, "step": 4323 }, { - "epoch": 0.12270147559591374, + "epoch": 0.12253110034288305, "grad_norm": 0.0, - "learning_rate": 1.955285971611964e-05, - "loss": 1.0107, + "learning_rate": 1.9554374436091653e-05, + "loss": 1.0022, "step": 4324 }, { - "epoch": 0.12272985244040863, + "epoch": 0.12255943778514551, "grad_norm": 0.0, - "learning_rate": 1.9552587919583915e-05, - "loss": 1.0318, + "learning_rate": 1.9554103470288452e-05, + "loss": 1.0347, "step": 4325 }, { - "epoch": 0.12275822928490351, + "epoch": 0.12258777522740798, "grad_norm": 0.0, - "learning_rate": 1.9552316042357054e-05, - "loss": 0.9616, + "learning_rate": 1.9553832424007478e-05, + "loss": 1.1321, "step": 4326 }, { - "epoch": 0.12278660612939842, + "epoch": 0.12261611266967043, "grad_norm": 0.0, - "learning_rate": 1.9552044084441347e-05, - "loss": 1.0266, + "learning_rate": 1.9553561297251033e-05, + "loss": 0.9622, "step": 4327 }, { - "epoch": 0.1228149829738933, + "epoch": 0.1226444501119329, "grad_norm": 0.0, - "learning_rate": 1.9551772045839094e-05, - "loss": 1.0522, + "learning_rate": 1.9553290090021392e-05, + "loss": 1.1105, "step": 4328 }, { - "epoch": 0.12284335981838819, + "epoch": 0.12267278755419536, "grad_norm": 0.0, - "learning_rate": 1.9551499926552595e-05, - "loss": 1.1049, + "learning_rate": 1.9553018802320843e-05, + "loss": 1.1274, "step": 4329 }, { - "epoch": 0.12287173666288309, + "epoch": 0.12270112499645781, "grad_norm": 0.0, - "learning_rate": 1.9551227726584145e-05, - "loss": 1.0912, + "learning_rate": 1.9552747434151665e-05, + "loss": 1.0273, "step": 4330 }, { - "epoch": 0.12290011350737798, + "epoch": 0.12272946243872028, "grad_norm": 0.0, - "learning_rate": 1.9550955445936047e-05, - "loss": 1.0573, + "learning_rate": 1.955247598551615e-05, + "loss": 1.0858, "step": 4331 }, { - "epoch": 0.12292849035187287, + "epoch": 0.12275779988098275, "grad_norm": 0.0, - "learning_rate": 1.9550683084610598e-05, - "loss": 1.0483, + "learning_rate": 1.9552204456416585e-05, + "loss": 0.9539, "step": 4332 }, { - "epoch": 0.12295686719636777, + "epoch": 0.1227861373232452, "grad_norm": 0.0, - "learning_rate": 1.95504106426101e-05, - "loss": 1.0861, + "learning_rate": 1.955193284685525e-05, + "loss": 0.9835, "step": 4333 }, { - "epoch": 0.12298524404086265, + "epoch": 0.12281447476550766, "grad_norm": 0.0, - "learning_rate": 1.955013811993685e-05, - "loss": 1.1733, + "learning_rate": 1.955166115683444e-05, + "loss": 1.1348, "step": 4334 }, { - "epoch": 0.12301362088535754, + "epoch": 0.12284281220777013, "grad_norm": 0.0, - "learning_rate": 1.9549865516593158e-05, - "loss": 1.0495, + "learning_rate": 1.9551389386356444e-05, + "loss": 0.9284, "step": 4335 }, { - "epoch": 0.12304199772985244, + "epoch": 0.12287114965003258, "grad_norm": 0.0, - "learning_rate": 1.9549592832581323e-05, - "loss": 1.0343, + "learning_rate": 1.9551117535423546e-05, + "loss": 1.1009, "step": 4336 }, { - "epoch": 0.12307037457434733, + "epoch": 0.12289948709229505, "grad_norm": 0.0, - "learning_rate": 1.9549320067903644e-05, - "loss": 1.0581, + "learning_rate": 1.9550845604038035e-05, + "loss": 0.9857, "step": 4337 }, { - "epoch": 0.12309875141884223, + "epoch": 0.12292782453455751, "grad_norm": 0.0, - "learning_rate": 1.9549047222562434e-05, - "loss": 1.1263, + "learning_rate": 1.955057359220221e-05, + "loss": 1.0808, "step": 4338 }, { - "epoch": 0.12312712826333712, + "epoch": 0.12295616197681997, "grad_norm": 0.0, - "learning_rate": 1.954877429655999e-05, - "loss": 1.1585, + "learning_rate": 1.9550301499918355e-05, + "loss": 1.1275, "step": 4339 }, { - "epoch": 0.123155505107832, + "epoch": 0.12298449941908243, "grad_norm": 0.0, - "learning_rate": 1.954850128989862e-05, - "loss": 0.9437, + "learning_rate": 1.9550029327188763e-05, + "loss": 1.0529, "step": 4340 }, { - "epoch": 0.1231838819523269, + "epoch": 0.1230128368613449, "grad_norm": 0.0, - "learning_rate": 1.9548228202580635e-05, - "loss": 1.1284, + "learning_rate": 1.9549757074015727e-05, + "loss": 1.0574, "step": 4341 }, { - "epoch": 0.1232122587968218, + "epoch": 0.12304117430360735, "grad_norm": 0.0, - "learning_rate": 1.9547955034608333e-05, - "loss": 1.0905, + "learning_rate": 1.9549484740401544e-05, + "loss": 1.0168, "step": 4342 }, { - "epoch": 0.12324063564131668, + "epoch": 0.12306951174586982, "grad_norm": 0.0, - "learning_rate": 1.9547681785984024e-05, - "loss": 1.0514, + "learning_rate": 1.9549212326348505e-05, + "loss": 1.0681, "step": 4343 }, { - "epoch": 0.12326901248581158, + "epoch": 0.12309784918813228, "grad_norm": 0.0, - "learning_rate": 1.954740845671002e-05, - "loss": 1.0458, + "learning_rate": 1.95489398318589e-05, + "loss": 1.1624, "step": 4344 }, { - "epoch": 0.12329738933030647, + "epoch": 0.12312618663039473, "grad_norm": 0.0, - "learning_rate": 1.9547135046788627e-05, - "loss": 1.1177, + "learning_rate": 1.954866725693503e-05, + "loss": 0.9582, "step": 4345 }, { - "epoch": 0.12332576617480136, + "epoch": 0.1231545240726572, "grad_norm": 0.0, - "learning_rate": 1.954686155622216e-05, - "loss": 1.0179, + "learning_rate": 1.9548394601579192e-05, + "loss": 1.0088, "step": 4346 }, { - "epoch": 0.12335414301929626, + "epoch": 0.12318286151491967, "grad_norm": 0.0, - "learning_rate": 1.9546587985012918e-05, - "loss": 1.0141, + "learning_rate": 1.954812186579368e-05, + "loss": 0.9567, "step": 4347 }, { - "epoch": 0.12338251986379115, + "epoch": 0.12321119895718212, "grad_norm": 0.0, - "learning_rate": 1.954631433316322e-05, - "loss": 1.1134, + "learning_rate": 1.9547849049580792e-05, + "loss": 1.0817, "step": 4348 }, { - "epoch": 0.12341089670828603, + "epoch": 0.12323953639944459, "grad_norm": 0.0, - "learning_rate": 1.9546040600675375e-05, - "loss": 1.138, + "learning_rate": 1.954757615294283e-05, + "loss": 1.0155, "step": 4349 }, { - "epoch": 0.12343927355278093, + "epoch": 0.12326787384170705, "grad_norm": 0.0, - "learning_rate": 1.9545766787551697e-05, - "loss": 1.0737, + "learning_rate": 1.9547303175882085e-05, + "loss": 1.0637, "step": 4350 }, { - "epoch": 0.12346765039727582, + "epoch": 0.1232962112839695, "grad_norm": 0.0, - "learning_rate": 1.95454928937945e-05, - "loss": 1.1171, + "learning_rate": 1.9547030118400857e-05, + "loss": 1.1226, "step": 4351 }, { - "epoch": 0.12349602724177071, + "epoch": 0.12332454872623197, "grad_norm": 0.0, - "learning_rate": 1.954521891940609e-05, - "loss": 1.0168, + "learning_rate": 1.9546756980501454e-05, + "loss": 1.0872, "step": 4352 }, { - "epoch": 0.12352440408626561, + "epoch": 0.12335288616849444, "grad_norm": 0.0, - "learning_rate": 1.954494486438879e-05, - "loss": 1.0589, + "learning_rate": 1.954648376218617e-05, + "loss": 1.0201, "step": 4353 }, { - "epoch": 0.1235527809307605, + "epoch": 0.12338122361075689, "grad_norm": 0.0, - "learning_rate": 1.9544670728744912e-05, - "loss": 1.0789, + "learning_rate": 1.9546210463457306e-05, + "loss": 1.0634, "step": 4354 }, { - "epoch": 0.12358115777525538, + "epoch": 0.12340956105301935, "grad_norm": 0.0, - "learning_rate": 1.954439651247677e-05, - "loss": 1.0273, + "learning_rate": 1.954593708431717e-05, + "loss": 0.9336, "step": 4355 }, { - "epoch": 0.12360953461975029, + "epoch": 0.12343789849528182, "grad_norm": 0.0, - "learning_rate": 1.9544122215586682e-05, - "loss": 1.0322, + "learning_rate": 1.954566362476806e-05, + "loss": 0.9696, "step": 4356 }, { - "epoch": 0.12363791146424517, + "epoch": 0.12346623593754427, "grad_norm": 0.0, - "learning_rate": 1.9543847838076966e-05, - "loss": 1.0545, + "learning_rate": 1.9545390084812278e-05, + "loss": 1.1631, "step": 4357 }, { - "epoch": 0.12366628830874007, + "epoch": 0.12349457337980674, "grad_norm": 0.0, - "learning_rate": 1.9543573379949937e-05, - "loss": 1.044, + "learning_rate": 1.9545116464452133e-05, + "loss": 1.0892, "step": 4358 }, { - "epoch": 0.12369466515323496, + "epoch": 0.1235229108220692, "grad_norm": 0.0, - "learning_rate": 1.9543298841207913e-05, - "loss": 1.0353, + "learning_rate": 1.9544842763689928e-05, + "loss": 1.0468, "step": 4359 }, { - "epoch": 0.12372304199772985, + "epoch": 0.12355124826433166, "grad_norm": 0.0, - "learning_rate": 1.9543024221853218e-05, - "loss": 1.0447, + "learning_rate": 1.9544568982527968e-05, + "loss": 1.0581, "step": 4360 }, { - "epoch": 0.12375141884222475, + "epoch": 0.12357958570659412, "grad_norm": 0.0, - "learning_rate": 1.9542749521888165e-05, - "loss": 1.0099, + "learning_rate": 1.9544295120968558e-05, + "loss": 1.053, "step": 4361 }, { - "epoch": 0.12377979568671964, + "epoch": 0.12360792314885659, "grad_norm": 0.0, - "learning_rate": 1.954247474131508e-05, - "loss": 0.9728, + "learning_rate": 1.954402117901401e-05, + "loss": 0.9168, "step": 4362 }, { - "epoch": 0.12380817253121452, + "epoch": 0.12363626059111904, "grad_norm": 0.0, - "learning_rate": 1.9542199880136284e-05, - "loss": 1.1204, + "learning_rate": 1.954374715666662e-05, + "loss": 1.072, "step": 4363 }, { - "epoch": 0.12383654937570943, + "epoch": 0.1236645980333815, "grad_norm": 0.0, - "learning_rate": 1.9541924938354096e-05, - "loss": 1.0256, + "learning_rate": 1.954347305392871e-05, + "loss": 0.9448, "step": 4364 }, { - "epoch": 0.12386492622020431, + "epoch": 0.12369293547564397, "grad_norm": 0.0, - "learning_rate": 1.9541649915970835e-05, - "loss": 1.0178, + "learning_rate": 1.954319887080258e-05, + "loss": 1.1364, "step": 4365 }, { - "epoch": 0.1238933030646992, + "epoch": 0.12372127291790642, "grad_norm": 0.0, - "learning_rate": 1.954137481298883e-05, - "loss": 1.091, + "learning_rate": 1.9542924607290542e-05, + "loss": 0.9094, "step": 4366 }, { - "epoch": 0.1239216799091941, + "epoch": 0.12374961036016889, "grad_norm": 0.0, - "learning_rate": 1.9541099629410405e-05, - "loss": 1.0563, + "learning_rate": 1.9542650263394905e-05, + "loss": 1.0017, "step": 4367 }, { - "epoch": 0.12395005675368899, + "epoch": 0.12377794780243136, "grad_norm": 0.0, - "learning_rate": 1.954082436523788e-05, - "loss": 0.9957, + "learning_rate": 1.9542375839117986e-05, + "loss": 1.0598, "step": 4368 }, { - "epoch": 0.12397843359818388, + "epoch": 0.12380628524469381, "grad_norm": 0.0, - "learning_rate": 1.9540549020473584e-05, - "loss": 1.0125, + "learning_rate": 1.954210133446209e-05, + "loss": 0.9596, "step": 4369 }, { - "epoch": 0.12400681044267878, + "epoch": 0.12383462268695627, "grad_norm": 0.0, - "learning_rate": 1.9540273595119842e-05, - "loss": 1.1509, + "learning_rate": 1.9541826749429528e-05, + "loss": 1.0294, "step": 4370 }, { - "epoch": 0.12403518728717366, + "epoch": 0.12386296012921874, "grad_norm": 0.0, - "learning_rate": 1.953999808917898e-05, - "loss": 1.0643, + "learning_rate": 1.954155208402262e-05, + "loss": 1.0665, "step": 4371 }, { - "epoch": 0.12406356413166855, + "epoch": 0.1238912975714812, "grad_norm": 0.0, - "learning_rate": 1.9539722502653324e-05, - "loss": 1.0358, + "learning_rate": 1.9541277338243674e-05, + "loss": 1.0164, "step": 4372 }, { - "epoch": 0.12409194097616345, + "epoch": 0.12391963501374366, "grad_norm": 0.0, - "learning_rate": 1.9539446835545202e-05, - "loss": 1.0133, + "learning_rate": 1.9541002512095005e-05, + "loss": 0.9555, "step": 4373 }, { - "epoch": 0.12412031782065834, + "epoch": 0.12394797245600613, "grad_norm": 0.0, - "learning_rate": 1.953917108785695e-05, - "loss": 1.0349, + "learning_rate": 1.954072760557893e-05, + "loss": 1.0597, "step": 4374 }, { - "epoch": 0.12414869466515323, + "epoch": 0.12397630989826858, "grad_norm": 0.0, - "learning_rate": 1.9538895259590885e-05, - "loss": 1.0717, + "learning_rate": 1.9540452618697763e-05, + "loss": 1.0051, "step": 4375 }, { - "epoch": 0.12417707150964813, + "epoch": 0.12400464734053104, "grad_norm": 0.0, - "learning_rate": 1.9538619350749345e-05, - "loss": 1.1575, + "learning_rate": 1.954017755145382e-05, + "loss": 1.0593, "step": 4376 }, { - "epoch": 0.12420544835414302, + "epoch": 0.12403298478279351, "grad_norm": 0.0, - "learning_rate": 1.9538343361334657e-05, - "loss": 1.0524, + "learning_rate": 1.953990240384942e-05, + "loss": 0.9648, "step": 4377 }, { - "epoch": 0.12423382519863792, + "epoch": 0.12406132222505596, "grad_norm": 0.0, - "learning_rate": 1.9538067291349154e-05, - "loss": 0.9786, + "learning_rate": 1.953962717588688e-05, + "loss": 0.9456, "step": 4378 }, { - "epoch": 0.1242622020431328, + "epoch": 0.12408965966731843, "grad_norm": 0.0, - "learning_rate": 1.953779114079517e-05, - "loss": 1.0057, + "learning_rate": 1.9539351867568516e-05, + "loss": 1.0707, "step": 4379 }, { - "epoch": 0.12429057888762769, + "epoch": 0.1241179971095809, "grad_norm": 0.0, - "learning_rate": 1.9537514909675034e-05, - "loss": 1.0663, + "learning_rate": 1.953907647889665e-05, + "loss": 0.9787, "step": 4380 }, { - "epoch": 0.1243189557321226, + "epoch": 0.12414633455184335, "grad_norm": 0.0, - "learning_rate": 1.9537238597991082e-05, - "loss": 1.1024, + "learning_rate": 1.95388010098736e-05, + "loss": 1.1275, "step": 4381 }, { - "epoch": 0.12434733257661748, + "epoch": 0.12417467199410581, "grad_norm": 0.0, - "learning_rate": 1.9536962205745647e-05, - "loss": 1.029, + "learning_rate": 1.9538525460501687e-05, + "loss": 1.0367, "step": 4382 }, { - "epoch": 0.12437570942111237, + "epoch": 0.12420300943636828, "grad_norm": 0.0, - "learning_rate": 1.953668573294106e-05, - "loss": 1.104, + "learning_rate": 1.9538249830783233e-05, + "loss": 1.0437, "step": 4383 }, { - "epoch": 0.12440408626560727, + "epoch": 0.12423134687863073, "grad_norm": 0.0, - "learning_rate": 1.953640917957966e-05, - "loss": 1.1447, + "learning_rate": 1.953797412072056e-05, + "loss": 1.0373, "step": 4384 }, { - "epoch": 0.12443246311010216, + "epoch": 0.1242596843208932, "grad_norm": 0.0, - "learning_rate": 1.953613254566379e-05, - "loss": 0.9972, + "learning_rate": 1.9537698330315987e-05, + "loss": 1.1073, "step": 4385 }, { - "epoch": 0.12446083995459704, + "epoch": 0.12428802176315566, "grad_norm": 0.0, - "learning_rate": 1.9535855831195776e-05, - "loss": 1.043, + "learning_rate": 1.953742245957184e-05, + "loss": 0.9616, "step": 4386 }, { - "epoch": 0.12448921679909195, + "epoch": 0.12431635920541811, "grad_norm": 0.0, - "learning_rate": 1.9535579036177955e-05, - "loss": 1.0994, + "learning_rate": 1.953714650849044e-05, + "loss": 0.9033, "step": 4387 }, { - "epoch": 0.12451759364358683, + "epoch": 0.12434469664768058, "grad_norm": 0.0, - "learning_rate": 1.9535302160612674e-05, - "loss": 1.0052, + "learning_rate": 1.9536870477074113e-05, + "loss": 1.0858, "step": 4388 }, { - "epoch": 0.12454597048808172, + "epoch": 0.12437303408994305, "grad_norm": 0.0, - "learning_rate": 1.9535025204502265e-05, - "loss": 1.0405, + "learning_rate": 1.953659436532519e-05, + "loss": 0.9793, "step": 4389 }, { - "epoch": 0.12457434733257662, + "epoch": 0.1244013715322055, "grad_norm": 0.0, - "learning_rate": 1.9534748167849068e-05, - "loss": 1.122, + "learning_rate": 1.9536318173245987e-05, + "loss": 1.0472, "step": 4390 }, { - "epoch": 0.12460272417707151, + "epoch": 0.12442970897446796, "grad_norm": 0.0, - "learning_rate": 1.9534471050655428e-05, - "loss": 0.9954, + "learning_rate": 1.9536041900838834e-05, + "loss": 1.1064, "step": 4391 }, { - "epoch": 0.1246311010215664, + "epoch": 0.12445804641673043, "grad_norm": 0.0, - "learning_rate": 1.9534193852923683e-05, - "loss": 0.931, + "learning_rate": 1.9535765548106063e-05, + "loss": 1.0766, "step": 4392 }, { - "epoch": 0.1246594778660613, + "epoch": 0.12448638385899288, "grad_norm": 0.0, - "learning_rate": 1.953391657465617e-05, - "loss": 1.14, + "learning_rate": 1.9535489115049994e-05, + "loss": 1.0405, "step": 4393 }, { - "epoch": 0.12468785471055618, + "epoch": 0.12451472130125535, "grad_norm": 0.0, - "learning_rate": 1.953363921585524e-05, - "loss": 1.0948, + "learning_rate": 1.9535212601672963e-05, + "loss": 0.9424, "step": 4394 }, { - "epoch": 0.12471623155505107, + "epoch": 0.12454305874351781, "grad_norm": 0.0, - "learning_rate": 1.9533361776523226e-05, - "loss": 1.0461, + "learning_rate": 1.9534936007977292e-05, + "loss": 1.1571, "step": 4395 }, { - "epoch": 0.12474460839954597, + "epoch": 0.12457139618578027, "grad_norm": 0.0, - "learning_rate": 1.953308425666248e-05, - "loss": 1.0425, + "learning_rate": 1.9534659333965317e-05, + "loss": 0.9901, "step": 4396 }, { - "epoch": 0.12477298524404086, + "epoch": 0.12459973362804273, "grad_norm": 0.0, - "learning_rate": 1.953280665627534e-05, - "loss": 1.1243, + "learning_rate": 1.9534382579639365e-05, + "loss": 1.0647, "step": 4397 }, { - "epoch": 0.12480136208853576, + "epoch": 0.1246280710703052, "grad_norm": 0.0, - "learning_rate": 1.9532528975364154e-05, - "loss": 1.056, + "learning_rate": 1.9534105745001764e-05, + "loss": 1.1598, "step": 4398 }, { - "epoch": 0.12482973893303065, + "epoch": 0.12465640851256765, "grad_norm": 0.0, - "learning_rate": 1.953225121393127e-05, - "loss": 1.0702, + "learning_rate": 1.9533828830054852e-05, + "loss": 1.0691, "step": 4399 }, { - "epoch": 0.12485811577752554, + "epoch": 0.12468474595483012, "grad_norm": 0.0, - "learning_rate": 1.953197337197903e-05, - "loss": 1.0561, + "learning_rate": 1.9533551834800963e-05, + "loss": 1.0956, "step": 4400 }, { - "epoch": 0.12488649262202044, + "epoch": 0.12471308339709258, "grad_norm": 0.0, - "learning_rate": 1.953169544950978e-05, - "loss": 1.0368, + "learning_rate": 1.9533274759242424e-05, + "loss": 1.0365, "step": 4401 }, { - "epoch": 0.12491486946651532, + "epoch": 0.12474142083935504, "grad_norm": 0.0, - "learning_rate": 1.9531417446525873e-05, - "loss": 0.8981, + "learning_rate": 1.953299760338157e-05, + "loss": 1.0961, "step": 4402 }, { - "epoch": 0.12494324631101021, + "epoch": 0.1247697582816175, "grad_norm": 0.0, - "learning_rate": 1.953113936302965e-05, - "loss": 1.1171, + "learning_rate": 1.953272036722074e-05, + "loss": 0.9999, "step": 4403 }, { - "epoch": 0.12497162315550511, + "epoch": 0.12479809572387997, "grad_norm": 0.0, - "learning_rate": 1.9530861199023472e-05, - "loss": 1.0072, + "learning_rate": 1.9532443050762265e-05, + "loss": 1.0242, "step": 4404 }, { - "epoch": 0.125, + "epoch": 0.12482643316614242, "grad_norm": 0.0, - "learning_rate": 1.9530582954509675e-05, - "loss": 0.9988, + "learning_rate": 1.9532165654008484e-05, + "loss": 1.1601, "step": 4405 }, { - "epoch": 0.1250283768444949, + "epoch": 0.12485477060840489, "grad_norm": 0.0, - "learning_rate": 1.953030462949062e-05, - "loss": 0.9577, + "learning_rate": 1.953188817696173e-05, + "loss": 1.0236, "step": 4406 }, { - "epoch": 0.12505675368898977, + "epoch": 0.12488310805066735, "grad_norm": 0.0, - "learning_rate": 1.953002622396865e-05, - "loss": 1.0273, + "learning_rate": 1.9531610619624345e-05, + "loss": 1.1, "step": 4407 }, { - "epoch": 0.1250851305334847, + "epoch": 0.1249114454929298, "grad_norm": 0.0, - "learning_rate": 1.9529747737946117e-05, - "loss": 1.1085, + "learning_rate": 1.953133298199866e-05, + "loss": 1.0885, "step": 4408 }, { - "epoch": 0.12511350737797958, + "epoch": 0.12493978293519227, "grad_norm": 0.0, - "learning_rate": 1.9529469171425383e-05, - "loss": 1.1099, + "learning_rate": 1.9531055264087025e-05, + "loss": 1.1245, "step": 4409 }, { - "epoch": 0.12514188422247446, + "epoch": 0.12496812037745474, "grad_norm": 0.0, - "learning_rate": 1.952919052440879e-05, - "loss": 1.0776, + "learning_rate": 1.9530777465891767e-05, + "loss": 0.9862, "step": 4410 }, { - "epoch": 0.12517026106696935, + "epoch": 0.12499645781971719, "grad_norm": 0.0, - "learning_rate": 1.95289117968987e-05, - "loss": 1.0391, + "learning_rate": 1.953049958741523e-05, + "loss": 0.9769, "step": 4411 }, { - "epoch": 0.12519863791146424, + "epoch": 0.12502479526197965, "grad_norm": 0.0, - "learning_rate": 1.952863298889746e-05, - "loss": 1.0793, + "learning_rate": 1.9530221628659758e-05, + "loss": 0.8865, "step": 4412 }, { - "epoch": 0.12522701475595913, + "epoch": 0.12505313270424212, "grad_norm": 0.0, - "learning_rate": 1.9528354100407433e-05, - "loss": 0.8961, + "learning_rate": 1.952994358962769e-05, + "loss": 1.2537, "step": 4413 }, { - "epoch": 0.12525539160045404, + "epoch": 0.1250814701465046, "grad_norm": 0.0, - "learning_rate": 1.9528075131430967e-05, - "loss": 1.1005, + "learning_rate": 1.9529665470321368e-05, + "loss": 1.0131, "step": 4414 }, { - "epoch": 0.12528376844494893, + "epoch": 0.12510980758876702, "grad_norm": 0.0, - "learning_rate": 1.9527796081970423e-05, - "loss": 1.135, + "learning_rate": 1.9529387270743135e-05, + "loss": 1.1429, "step": 4415 }, { - "epoch": 0.12531214528944382, + "epoch": 0.1251381450310295, "grad_norm": 0.0, - "learning_rate": 1.9527516952028156e-05, - "loss": 1.0354, + "learning_rate": 1.9529108990895335e-05, + "loss": 1.0844, "step": 4416 }, { - "epoch": 0.1253405221339387, + "epoch": 0.12516648247329196, "grad_norm": 0.0, - "learning_rate": 1.9527237741606527e-05, - "loss": 1.0938, + "learning_rate": 1.9528830630780313e-05, + "loss": 1.0756, "step": 4417 }, { - "epoch": 0.1253688989784336, + "epoch": 0.12519481991555442, "grad_norm": 0.0, - "learning_rate": 1.952695845070789e-05, - "loss": 1.0822, + "learning_rate": 1.952855219040041e-05, + "loss": 1.0741, "step": 4418 }, { - "epoch": 0.12539727582292848, + "epoch": 0.1252231573578169, "grad_norm": 0.0, - "learning_rate": 1.952667907933461e-05, - "loss": 1.0072, + "learning_rate": 1.9528273669757974e-05, + "loss": 1.0858, "step": 4419 }, { - "epoch": 0.1254256526674234, + "epoch": 0.12525149480007935, "grad_norm": 0.0, - "learning_rate": 1.952639962748904e-05, - "loss": 0.9518, + "learning_rate": 1.952799506885535e-05, + "loss": 1.067, "step": 4420 }, { - "epoch": 0.12545402951191828, + "epoch": 0.1252798322423418, "grad_norm": 0.0, - "learning_rate": 1.9526120095173548e-05, - "loss": 1.1264, + "learning_rate": 1.9527716387694888e-05, + "loss": 1.0264, "step": 4421 }, { - "epoch": 0.12548240635641317, + "epoch": 0.12530816968460426, "grad_norm": 0.0, - "learning_rate": 1.952584048239049e-05, - "loss": 1.0065, + "learning_rate": 1.952743762627893e-05, + "loss": 0.9845, "step": 4422 }, { - "epoch": 0.12551078320090805, + "epoch": 0.12533650712686673, "grad_norm": 0.0, - "learning_rate": 1.952556078914223e-05, - "loss": 1.1197, + "learning_rate": 1.9527158784609828e-05, + "loss": 1.1068, "step": 4423 }, { - "epoch": 0.12553916004540294, + "epoch": 0.1253648445691292, "grad_norm": 0.0, - "learning_rate": 1.9525281015431127e-05, - "loss": 1.0114, + "learning_rate": 1.9526879862689928e-05, + "loss": 1.0898, "step": 4424 }, { - "epoch": 0.12556753688989786, + "epoch": 0.12539318201139166, "grad_norm": 0.0, - "learning_rate": 1.952500116125955e-05, - "loss": 1.0599, + "learning_rate": 1.952660086052158e-05, + "loss": 0.9305, "step": 4425 }, { - "epoch": 0.12559591373439274, + "epoch": 0.12542151945365412, "grad_norm": 0.0, - "learning_rate": 1.9524721226629856e-05, - "loss": 1.0851, + "learning_rate": 1.9526321778107143e-05, + "loss": 0.9955, "step": 4426 }, { - "epoch": 0.12562429057888763, + "epoch": 0.12544985689591656, "grad_norm": 0.0, - "learning_rate": 1.9524441211544416e-05, - "loss": 0.949, + "learning_rate": 1.9526042615448953e-05, + "loss": 1.0534, "step": 4427 }, { - "epoch": 0.12565266742338252, + "epoch": 0.12547819433817903, "grad_norm": 0.0, - "learning_rate": 1.9524161116005596e-05, - "loss": 0.9689, + "learning_rate": 1.952576337254937e-05, + "loss": 0.9487, "step": 4428 }, { - "epoch": 0.1256810442678774, + "epoch": 0.1255065317804415, "grad_norm": 0.0, - "learning_rate": 1.9523880940015755e-05, - "loss": 1.1024, + "learning_rate": 1.9525484049410746e-05, + "loss": 1.0438, "step": 4429 }, { - "epoch": 0.1257094211123723, + "epoch": 0.12553486922270396, "grad_norm": 0.0, - "learning_rate": 1.9523600683577263e-05, - "loss": 1.0253, + "learning_rate": 1.9525204646035432e-05, + "loss": 1.0465, "step": 4430 }, { - "epoch": 0.1257377979568672, + "epoch": 0.12556320666496643, "grad_norm": 0.0, - "learning_rate": 1.952332034669249e-05, - "loss": 0.9807, + "learning_rate": 1.9524925162425783e-05, + "loss": 1.0084, "step": 4431 }, { - "epoch": 0.1257661748013621, + "epoch": 0.1255915441072289, "grad_norm": 0.0, - "learning_rate": 1.95230399293638e-05, - "loss": 1.0428, + "learning_rate": 1.9524645598584153e-05, + "loss": 0.9922, "step": 4432 }, { - "epoch": 0.12579455164585698, + "epoch": 0.12561988154949133, "grad_norm": 0.0, - "learning_rate": 1.9522759431593566e-05, - "loss": 0.999, + "learning_rate": 1.9524365954512893e-05, + "loss": 1.0709, "step": 4433 }, { - "epoch": 0.12582292849035187, + "epoch": 0.1256482189917538, "grad_norm": 0.0, - "learning_rate": 1.9522478853384154e-05, - "loss": 1.0584, + "learning_rate": 1.9524086230214366e-05, + "loss": 1.0356, "step": 4434 }, { - "epoch": 0.12585130533484676, + "epoch": 0.12567655643401626, "grad_norm": 0.0, - "learning_rate": 1.9522198194737935e-05, - "loss": 1.0218, + "learning_rate": 1.952380642569092e-05, + "loss": 1.1378, "step": 4435 }, { - "epoch": 0.12587968217934165, + "epoch": 0.12570489387627873, "grad_norm": 0.0, - "learning_rate": 1.952191745565728e-05, - "loss": 1.0748, + "learning_rate": 1.9523526540944918e-05, + "loss": 1.0973, "step": 4436 }, { - "epoch": 0.12590805902383656, + "epoch": 0.1257332313185412, "grad_norm": 0.0, - "learning_rate": 1.952163663614456e-05, - "loss": 1.0215, + "learning_rate": 1.9523246575978713e-05, + "loss": 1.0725, "step": 4437 }, { - "epoch": 0.12593643586833145, + "epoch": 0.12576156876080366, "grad_norm": 0.0, - "learning_rate": 1.9521355736202148e-05, - "loss": 1.0691, + "learning_rate": 1.952296653079467e-05, + "loss": 1.0305, "step": 4438 }, { - "epoch": 0.12596481271282634, + "epoch": 0.1257899062030661, "grad_norm": 0.0, - "learning_rate": 1.9521074755832416e-05, - "loss": 1.0283, + "learning_rate": 1.9522686405395143e-05, + "loss": 1.1002, "step": 4439 }, { - "epoch": 0.12599318955732122, + "epoch": 0.12581824364532856, "grad_norm": 0.0, - "learning_rate": 1.952079369503774e-05, - "loss": 1.0756, + "learning_rate": 1.952240619978249e-05, + "loss": 1.0342, "step": 4440 }, { - "epoch": 0.1260215664018161, + "epoch": 0.12584658108759103, "grad_norm": 0.0, - "learning_rate": 1.9520512553820485e-05, - "loss": 1.1709, + "learning_rate": 1.9522125913959073e-05, + "loss": 0.9961, "step": 4441 }, { - "epoch": 0.126049943246311, + "epoch": 0.1258749185298535, "grad_norm": 0.0, - "learning_rate": 1.9520231332183037e-05, - "loss": 1.1193, + "learning_rate": 1.9521845547927256e-05, + "loss": 1.1303, "step": 4442 }, { - "epoch": 0.1260783200908059, + "epoch": 0.12590325597211596, "grad_norm": 0.0, - "learning_rate": 1.9519950030127765e-05, - "loss": 0.8883, + "learning_rate": 1.9521565101689396e-05, + "loss": 1.1036, "step": 4443 }, { - "epoch": 0.1261066969353008, + "epoch": 0.12593159341437843, "grad_norm": 0.0, - "learning_rate": 1.951966864765705e-05, - "loss": 0.9745, + "learning_rate": 1.952128457524786e-05, + "loss": 1.068, "step": 4444 }, { - "epoch": 0.1261350737797957, + "epoch": 0.12595993085664087, "grad_norm": 0.0, - "learning_rate": 1.9519387184773264e-05, - "loss": 0.9839, + "learning_rate": 1.9521003968605004e-05, + "loss": 1.0619, "step": 4445 }, { - "epoch": 0.12616345062429057, + "epoch": 0.12598826829890333, "grad_norm": 0.0, - "learning_rate": 1.9519105641478788e-05, - "loss": 1.0721, + "learning_rate": 1.95207232817632e-05, + "loss": 1.0762, "step": 4446 }, { - "epoch": 0.12619182746878546, + "epoch": 0.1260166057411658, "grad_norm": 0.0, - "learning_rate": 1.9518824017775995e-05, - "loss": 0.9609, + "learning_rate": 1.9520442514724807e-05, + "loss": 0.9556, "step": 4447 }, { - "epoch": 0.12622020431328038, + "epoch": 0.12604494318342827, "grad_norm": 0.0, - "learning_rate": 1.9518542313667272e-05, - "loss": 1.0299, + "learning_rate": 1.952016166749219e-05, + "loss": 1.0985, "step": 4448 }, { - "epoch": 0.12624858115777526, + "epoch": 0.12607328062569073, "grad_norm": 0.0, - "learning_rate": 1.9518260529154994e-05, - "loss": 1.1074, + "learning_rate": 1.951988074006772e-05, + "loss": 1.0236, "step": 4449 }, { - "epoch": 0.12627695800227015, + "epoch": 0.1261016180679532, "grad_norm": 0.0, - "learning_rate": 1.9517978664241538e-05, - "loss": 1.0185, + "learning_rate": 1.9519599732453756e-05, + "loss": 1.0961, "step": 4450 }, { - "epoch": 0.12630533484676504, + "epoch": 0.12612995551021564, "grad_norm": 0.0, - "learning_rate": 1.951769671892929e-05, - "loss": 1.0052, + "learning_rate": 1.951931864465267e-05, + "loss": 1.1011, "step": 4451 }, { - "epoch": 0.12633371169125993, + "epoch": 0.1261582929524781, "grad_norm": 0.0, - "learning_rate": 1.951741469322063e-05, - "loss": 0.9365, + "learning_rate": 1.9519037476666827e-05, + "loss": 1.1328, "step": 4452 }, { - "epoch": 0.1263620885357548, + "epoch": 0.12618663039474057, "grad_norm": 0.0, - "learning_rate": 1.951713258711794e-05, - "loss": 1.0705, + "learning_rate": 1.9518756228498596e-05, + "loss": 1.0188, "step": 4453 }, { - "epoch": 0.12639046538024973, + "epoch": 0.12621496783700303, "grad_norm": 0.0, - "learning_rate": 1.9516850400623605e-05, - "loss": 0.995, + "learning_rate": 1.9518474900150348e-05, + "loss": 0.9207, "step": 4454 }, { - "epoch": 0.12641884222474462, + "epoch": 0.1262433052792655, "grad_norm": 0.0, - "learning_rate": 1.9516568133740005e-05, - "loss": 1.105, + "learning_rate": 1.951819349162445e-05, + "loss": 1.0002, "step": 4455 }, { - "epoch": 0.1264472190692395, + "epoch": 0.12627164272152797, "grad_norm": 0.0, - "learning_rate": 1.9516285786469526e-05, - "loss": 0.8887, + "learning_rate": 1.9517912002923274e-05, + "loss": 1.0344, "step": 4456 }, { - "epoch": 0.1264755959137344, + "epoch": 0.1262999801637904, "grad_norm": 0.0, - "learning_rate": 1.9516003358814553e-05, - "loss": 1.1491, + "learning_rate": 1.9517630434049192e-05, + "loss": 0.9601, "step": 4457 }, { - "epoch": 0.12650397275822928, + "epoch": 0.12632831760605287, "grad_norm": 0.0, - "learning_rate": 1.9515720850777475e-05, - "loss": 0.9475, + "learning_rate": 1.9517348785004574e-05, + "loss": 1.1409, "step": 4458 }, { - "epoch": 0.12653234960272416, + "epoch": 0.12635665504831534, "grad_norm": 0.0, - "learning_rate": 1.9515438262360673e-05, - "loss": 0.9865, + "learning_rate": 1.9517067055791793e-05, + "loss": 1.0319, "step": 4459 }, { - "epoch": 0.12656072644721908, + "epoch": 0.1263849924905778, "grad_norm": 0.0, - "learning_rate": 1.9515155593566536e-05, - "loss": 1.0779, + "learning_rate": 1.9516785246413223e-05, + "loss": 1.0321, "step": 4460 }, { - "epoch": 0.12658910329171397, + "epoch": 0.12641332993284027, "grad_norm": 0.0, - "learning_rate": 1.951487284439745e-05, - "loss": 0.9311, + "learning_rate": 1.9516503356871234e-05, + "loss": 0.9095, "step": 4461 }, { - "epoch": 0.12661748013620885, + "epoch": 0.12644166737510273, "grad_norm": 0.0, - "learning_rate": 1.951459001485581e-05, - "loss": 1.0508, + "learning_rate": 1.9516221387168208e-05, + "loss": 1.0684, "step": 4462 }, { - "epoch": 0.12664585698070374, + "epoch": 0.12647000481736517, "grad_norm": 0.0, - "learning_rate": 1.9514307104943997e-05, - "loss": 0.8262, + "learning_rate": 1.951593933730651e-05, + "loss": 1.1306, "step": 4463 }, { - "epoch": 0.12667423382519863, + "epoch": 0.12649834225962764, "grad_norm": 0.0, - "learning_rate": 1.9514024114664404e-05, - "loss": 1.0121, + "learning_rate": 1.9515657207288528e-05, + "loss": 1.1523, "step": 4464 }, { - "epoch": 0.12670261066969354, + "epoch": 0.1265266797018901, "grad_norm": 0.0, - "learning_rate": 1.9513741044019423e-05, - "loss": 1.1328, + "learning_rate": 1.9515374997116626e-05, + "loss": 1.0114, "step": 4465 }, { - "epoch": 0.12673098751418843, + "epoch": 0.12655501714415257, "grad_norm": 0.0, - "learning_rate": 1.9513457893011445e-05, - "loss": 1.031, + "learning_rate": 1.951509270679319e-05, + "loss": 0.9854, "step": 4466 }, { - "epoch": 0.12675936435868332, + "epoch": 0.12658335458641504, "grad_norm": 0.0, - "learning_rate": 1.9513174661642857e-05, - "loss": 1.0106, + "learning_rate": 1.9514810336320594e-05, + "loss": 0.9987, "step": 4467 }, { - "epoch": 0.1267877412031782, + "epoch": 0.1266116920286775, "grad_norm": 0.0, - "learning_rate": 1.9512891349916056e-05, - "loss": 0.8683, + "learning_rate": 1.9514527885701216e-05, + "loss": 1.0687, "step": 4468 }, { - "epoch": 0.1268161180476731, + "epoch": 0.12664002947093994, "grad_norm": 0.0, - "learning_rate": 1.9512607957833438e-05, - "loss": 1.0208, + "learning_rate": 1.9514245354937437e-05, + "loss": 1.0567, "step": 4469 }, { - "epoch": 0.12684449489216798, + "epoch": 0.1266683669132024, "grad_norm": 0.0, - "learning_rate": 1.951232448539739e-05, - "loss": 1.1266, + "learning_rate": 1.951396274403164e-05, + "loss": 1.0525, "step": 4470 }, { - "epoch": 0.1268728717366629, + "epoch": 0.12669670435546487, "grad_norm": 0.0, - "learning_rate": 1.9512040932610307e-05, - "loss": 1.0003, + "learning_rate": 1.9513680052986195e-05, + "loss": 0.9528, "step": 4471 }, { - "epoch": 0.12690124858115778, + "epoch": 0.12672504179772734, "grad_norm": 0.0, - "learning_rate": 1.951175729947459e-05, - "loss": 0.9303, + "learning_rate": 1.9513397281803495e-05, + "loss": 1.1627, "step": 4472 }, { - "epoch": 0.12692962542565267, + "epoch": 0.1267533792399898, "grad_norm": 0.0, - "learning_rate": 1.951147358599263e-05, - "loss": 1.1216, + "learning_rate": 1.9513114430485914e-05, + "loss": 1.0556, "step": 4473 }, { - "epoch": 0.12695800227014756, + "epoch": 0.12678171668225227, "grad_norm": 0.0, - "learning_rate": 1.9511189792166826e-05, - "loss": 0.9809, + "learning_rate": 1.951283149903584e-05, + "loss": 1.0919, "step": 4474 }, { - "epoch": 0.12698637911464244, + "epoch": 0.1268100541245147, "grad_norm": 0.0, - "learning_rate": 1.9510905917999573e-05, - "loss": 0.973, + "learning_rate": 1.9512548487455655e-05, + "loss": 1.0822, "step": 4475 }, { - "epoch": 0.12701475595913733, + "epoch": 0.12683839156677718, "grad_norm": 0.0, - "learning_rate": 1.9510621963493272e-05, - "loss": 0.9947, + "learning_rate": 1.951226539574774e-05, + "loss": 1.09, "step": 4476 }, { - "epoch": 0.12704313280363225, + "epoch": 0.12686672900903964, "grad_norm": 0.0, - "learning_rate": 1.9510337928650318e-05, - "loss": 1.0152, + "learning_rate": 1.9511982223914477e-05, + "loss": 1.065, "step": 4477 }, { - "epoch": 0.12707150964812713, + "epoch": 0.1268950664513021, "grad_norm": 0.0, - "learning_rate": 1.9510053813473115e-05, - "loss": 1.1266, + "learning_rate": 1.9511698971958265e-05, + "loss": 1.1078, "step": 4478 }, { - "epoch": 0.12709988649262202, + "epoch": 0.12692340389356457, "grad_norm": 0.0, - "learning_rate": 1.9509769617964057e-05, - "loss": 1.0918, + "learning_rate": 1.9511415639881474e-05, + "loss": 1.1195, "step": 4479 }, { - "epoch": 0.1271282633371169, + "epoch": 0.12695174133582704, "grad_norm": 0.0, - "learning_rate": 1.9509485342125548e-05, - "loss": 1.0941, + "learning_rate": 1.95111322276865e-05, + "loss": 0.8486, "step": 4480 }, { - "epoch": 0.1271566401816118, + "epoch": 0.12698007877808948, "grad_norm": 0.0, - "learning_rate": 1.950920098595999e-05, - "loss": 0.9063, + "learning_rate": 1.9510848735375723e-05, + "loss": 1.0122, "step": 4481 }, { - "epoch": 0.12718501702610668, + "epoch": 0.12700841622035194, "grad_norm": 0.0, - "learning_rate": 1.9508916549469784e-05, - "loss": 1.0302, + "learning_rate": 1.9510565162951538e-05, + "loss": 1.06, "step": 4482 }, { - "epoch": 0.1272133938706016, + "epoch": 0.1270367536626144, "grad_norm": 0.0, - "learning_rate": 1.9508632032657327e-05, - "loss": 1.1306, + "learning_rate": 1.951028151041633e-05, + "loss": 1.0542, "step": 4483 }, { - "epoch": 0.12724177071509649, + "epoch": 0.12706509110487688, "grad_norm": 0.0, - "learning_rate": 1.9508347435525035e-05, - "loss": 1.0192, + "learning_rate": 1.950999777777249e-05, + "loss": 0.9537, "step": 4484 }, { - "epoch": 0.12727014755959137, + "epoch": 0.12709342854713934, "grad_norm": 0.0, - "learning_rate": 1.9508062758075304e-05, - "loss": 0.9943, + "learning_rate": 1.9509713965022407e-05, + "loss": 1.1105, "step": 4485 }, { - "epoch": 0.12729852440408626, + "epoch": 0.1271217659894018, "grad_norm": 0.0, - "learning_rate": 1.9507778000310535e-05, - "loss": 1.0387, + "learning_rate": 1.9509430072168473e-05, + "loss": 1.0636, "step": 4486 }, { - "epoch": 0.12732690124858115, + "epoch": 0.12715010343166425, "grad_norm": 0.0, - "learning_rate": 1.950749316223314e-05, - "loss": 1.0338, + "learning_rate": 1.950914609921308e-05, + "loss": 1.1702, "step": 4487 }, { - "epoch": 0.12735527809307606, + "epoch": 0.1271784408739267, "grad_norm": 0.0, - "learning_rate": 1.9507208243845522e-05, - "loss": 0.9158, + "learning_rate": 1.9508862046158615e-05, + "loss": 1.0099, "step": 4488 }, { - "epoch": 0.12738365493757095, + "epoch": 0.12720677831618918, "grad_norm": 0.0, - "learning_rate": 1.9506923245150092e-05, - "loss": 0.9373, + "learning_rate": 1.9508577913007475e-05, + "loss": 0.996, "step": 4489 }, { - "epoch": 0.12741203178206584, + "epoch": 0.12723511575845164, "grad_norm": 0.0, - "learning_rate": 1.950663816614925e-05, - "loss": 1.188, + "learning_rate": 1.950829369976205e-05, + "loss": 1.0102, "step": 4490 }, { - "epoch": 0.12744040862656072, + "epoch": 0.1272634532007141, "grad_norm": 0.0, - "learning_rate": 1.950635300684541e-05, - "loss": 0.9934, + "learning_rate": 1.950800940642474e-05, + "loss": 1.0744, "step": 4491 }, { - "epoch": 0.1274687854710556, + "epoch": 0.12729179064297658, "grad_norm": 0.0, - "learning_rate": 1.950606776724098e-05, - "loss": 0.9985, + "learning_rate": 1.950772503299793e-05, + "loss": 1.0699, "step": 4492 }, { - "epoch": 0.1274971623155505, + "epoch": 0.12732012808523901, "grad_norm": 0.0, - "learning_rate": 1.9505782447338367e-05, - "loss": 1.0064, + "learning_rate": 1.950744057948403e-05, + "loss": 1.128, "step": 4493 }, { - "epoch": 0.12752553916004541, + "epoch": 0.12734846552750148, "grad_norm": 0.0, - "learning_rate": 1.9505497047139982e-05, - "loss": 1.1032, + "learning_rate": 1.9507156045885423e-05, + "loss": 0.9782, "step": 4494 }, { - "epoch": 0.1275539160045403, + "epoch": 0.12737680296976395, "grad_norm": 0.0, - "learning_rate": 1.9505211566648238e-05, - "loss": 1.0414, + "learning_rate": 1.950687143220451e-05, + "loss": 0.9579, "step": 4495 }, { - "epoch": 0.1275822928490352, + "epoch": 0.1274051404120264, "grad_norm": 0.0, - "learning_rate": 1.9504926005865542e-05, - "loss": 0.9835, + "learning_rate": 1.950658673844369e-05, + "loss": 1.2316, "step": 4496 }, { - "epoch": 0.12761066969353008, + "epoch": 0.12743347785428888, "grad_norm": 0.0, - "learning_rate": 1.950464036479431e-05, - "loss": 1.0311, + "learning_rate": 1.9506301964605358e-05, + "loss": 1.1133, "step": 4497 }, { - "epoch": 0.12763904653802496, + "epoch": 0.12746181529655135, "grad_norm": 0.0, - "learning_rate": 1.950435464343695e-05, - "loss": 1.0563, + "learning_rate": 1.9506017110691918e-05, + "loss": 1.0392, "step": 4498 }, { - "epoch": 0.12766742338251985, + "epoch": 0.12749015273881378, "grad_norm": 0.0, - "learning_rate": 1.9504068841795886e-05, - "loss": 1.0758, + "learning_rate": 1.9505732176705763e-05, + "loss": 1.0527, "step": 4499 }, { - "epoch": 0.12769580022701477, + "epoch": 0.12751849018107625, "grad_norm": 0.0, - "learning_rate": 1.950378295987352e-05, - "loss": 0.9931, + "learning_rate": 1.9505447162649296e-05, + "loss": 1.1156, "step": 4500 }, { - "epoch": 0.12772417707150965, + "epoch": 0.12754682762333872, "grad_norm": 0.0, - "learning_rate": 1.9503496997672275e-05, - "loss": 1.0852, + "learning_rate": 1.9505162068524915e-05, + "loss": 0.9157, "step": 4501 }, { - "epoch": 0.12775255391600454, + "epoch": 0.12757516506560118, "grad_norm": 0.0, - "learning_rate": 1.950321095519456e-05, - "loss": 1.1292, + "learning_rate": 1.9504876894335033e-05, + "loss": 0.9828, "step": 4502 }, { - "epoch": 0.12778093076049943, + "epoch": 0.12760350250786365, "grad_norm": 0.0, - "learning_rate": 1.9502924832442796e-05, - "loss": 1.0229, + "learning_rate": 1.9504591640082034e-05, + "loss": 1.0687, "step": 4503 }, { - "epoch": 0.12780930760499432, + "epoch": 0.1276318399501261, "grad_norm": 0.0, - "learning_rate": 1.95026386294194e-05, - "loss": 1.1027, + "learning_rate": 1.9504306305768335e-05, + "loss": 1.1111, "step": 4504 }, { - "epoch": 0.12783768444948923, + "epoch": 0.12766017739238855, "grad_norm": 0.0, - "learning_rate": 1.950235234612679e-05, - "loss": 1.1139, + "learning_rate": 1.9504020891396335e-05, + "loss": 1.0517, "step": 4505 }, { - "epoch": 0.12786606129398412, + "epoch": 0.12768851483465102, "grad_norm": 0.0, - "learning_rate": 1.9502065982567382e-05, - "loss": 1.0578, + "learning_rate": 1.9503735396968435e-05, + "loss": 1.0095, "step": 4506 }, { - "epoch": 0.127894438138479, + "epoch": 0.12771685227691348, "grad_norm": 0.0, - "learning_rate": 1.9501779538743593e-05, - "loss": 1.0697, + "learning_rate": 1.9503449822487045e-05, + "loss": 1.1511, "step": 4507 }, { - "epoch": 0.1279228149829739, + "epoch": 0.12774518971917595, "grad_norm": 0.0, - "learning_rate": 1.9501493014657848e-05, - "loss": 1.0737, + "learning_rate": 1.9503164167954566e-05, + "loss": 1.0551, "step": 4508 }, { - "epoch": 0.12795119182746878, + "epoch": 0.12777352716143842, "grad_norm": 0.0, - "learning_rate": 1.950120641031256e-05, - "loss": 1.1315, + "learning_rate": 1.950287843337341e-05, + "loss": 1.0288, "step": 4509 }, { - "epoch": 0.12797956867196367, + "epoch": 0.12780186460370088, "grad_norm": 0.0, - "learning_rate": 1.950091972571016e-05, - "loss": 1.0094, + "learning_rate": 1.9502592618745977e-05, + "loss": 0.9376, "step": 4510 }, { - "epoch": 0.12800794551645858, + "epoch": 0.12783020204596332, "grad_norm": 0.0, - "learning_rate": 1.950063296085306e-05, - "loss": 1.1406, + "learning_rate": 1.9502306724074678e-05, + "loss": 1.0973, "step": 4511 }, { - "epoch": 0.12803632236095347, + "epoch": 0.1278585394882258, "grad_norm": 0.0, - "learning_rate": 1.9500346115743687e-05, - "loss": 0.895, + "learning_rate": 1.9502020749361922e-05, + "loss": 1.0674, "step": 4512 }, { - "epoch": 0.12806469920544836, + "epoch": 0.12788687693048825, "grad_norm": 0.0, - "learning_rate": 1.9500059190384463e-05, - "loss": 0.9243, + "learning_rate": 1.9501734694610113e-05, + "loss": 1.0331, "step": 4513 }, { - "epoch": 0.12809307604994324, + "epoch": 0.12791521437275072, "grad_norm": 0.0, - "learning_rate": 1.9499772184777814e-05, - "loss": 0.9912, + "learning_rate": 1.950144855982167e-05, + "loss": 1.077, "step": 4514 }, { - "epoch": 0.12812145289443813, + "epoch": 0.12794355181501318, "grad_norm": 0.0, - "learning_rate": 1.949948509892616e-05, - "loss": 1.0574, + "learning_rate": 1.9501162344998994e-05, + "loss": 0.9508, "step": 4515 }, { - "epoch": 0.12814982973893302, + "epoch": 0.12797188925727565, "grad_norm": 0.0, - "learning_rate": 1.949919793283193e-05, - "loss": 0.9457, + "learning_rate": 1.95008760501445e-05, + "loss": 1.0273, "step": 4516 }, { - "epoch": 0.12817820658342793, + "epoch": 0.1280002266995381, "grad_norm": 0.0, - "learning_rate": 1.949891068649755e-05, - "loss": 0.9875, + "learning_rate": 1.9500589675260596e-05, + "loss": 0.9225, "step": 4517 }, { - "epoch": 0.12820658342792282, + "epoch": 0.12802856414180055, "grad_norm": 0.0, - "learning_rate": 1.949862335992544e-05, - "loss": 1.0781, + "learning_rate": 1.9500303220349704e-05, + "loss": 1.0229, "step": 4518 }, { - "epoch": 0.1282349602724177, + "epoch": 0.12805690158406302, "grad_norm": 0.0, - "learning_rate": 1.9498335953118032e-05, - "loss": 1.1153, + "learning_rate": 1.9500016685414226e-05, + "loss": 0.9283, "step": 4519 }, { - "epoch": 0.1282633371169126, + "epoch": 0.1280852390263255, "grad_norm": 0.0, - "learning_rate": 1.9498048466077755e-05, - "loss": 0.9688, + "learning_rate": 1.949973007045658e-05, + "loss": 0.9649, "step": 4520 }, { - "epoch": 0.12829171396140748, + "epoch": 0.12811357646858795, "grad_norm": 0.0, - "learning_rate": 1.9497760898807035e-05, - "loss": 1.0112, + "learning_rate": 1.949944337547918e-05, + "loss": 0.9264, "step": 4521 }, { - "epoch": 0.12832009080590237, + "epoch": 0.12814191391085042, "grad_norm": 0.0, - "learning_rate": 1.9497473251308303e-05, - "loss": 0.9839, + "learning_rate": 1.9499156600484442e-05, + "loss": 0.9994, "step": 4522 }, { - "epoch": 0.12834846765039729, + "epoch": 0.12817025135311286, "grad_norm": 0.0, - "learning_rate": 1.9497185523583986e-05, - "loss": 1.0644, + "learning_rate": 1.9498869745474777e-05, + "loss": 0.9974, "step": 4523 }, { - "epoch": 0.12837684449489217, + "epoch": 0.12819858879537532, "grad_norm": 0.0, - "learning_rate": 1.9496897715636516e-05, - "loss": 1.0759, + "learning_rate": 1.9498582810452607e-05, + "loss": 0.9706, "step": 4524 }, { - "epoch": 0.12840522133938706, + "epoch": 0.1282269262376378, "grad_norm": 0.0, - "learning_rate": 1.9496609827468323e-05, - "loss": 1.0115, + "learning_rate": 1.949829579542035e-05, + "loss": 1.1072, "step": 4525 }, { - "epoch": 0.12843359818388195, + "epoch": 0.12825526367990026, "grad_norm": 0.0, - "learning_rate": 1.9496321859081844e-05, - "loss": 1.051, + "learning_rate": 1.9498008700380414e-05, + "loss": 1.0061, "step": 4526 }, { - "epoch": 0.12846197502837683, + "epoch": 0.12828360112216272, "grad_norm": 0.0, - "learning_rate": 1.9496033810479506e-05, - "loss": 1.0855, + "learning_rate": 1.949772152533523e-05, + "loss": 1.0603, "step": 4527 }, { - "epoch": 0.12849035187287175, + "epoch": 0.1283119385644252, "grad_norm": 0.0, - "learning_rate": 1.9495745681663737e-05, - "loss": 0.989, + "learning_rate": 1.9497434270287208e-05, + "loss": 1.04, "step": 4528 }, { - "epoch": 0.12851872871736664, + "epoch": 0.12834027600668763, "grad_norm": 0.0, - "learning_rate": 1.9495457472636984e-05, - "loss": 0.9203, + "learning_rate": 1.949714693523877e-05, + "loss": 1.0161, "step": 4529 }, { - "epoch": 0.12854710556186152, + "epoch": 0.1283686134489501, "grad_norm": 0.0, - "learning_rate": 1.9495169183401676e-05, - "loss": 0.9284, + "learning_rate": 1.9496859520192336e-05, + "loss": 1.1608, "step": 4530 }, { - "epoch": 0.1285754824063564, + "epoch": 0.12839695089121256, "grad_norm": 0.0, - "learning_rate": 1.9494880813960245e-05, - "loss": 1.1149, + "learning_rate": 1.9496572025150332e-05, + "loss": 1.05, "step": 4531 }, { - "epoch": 0.1286038592508513, + "epoch": 0.12842528833347502, "grad_norm": 0.0, - "learning_rate": 1.9494592364315125e-05, - "loss": 1.0198, + "learning_rate": 1.949628445011517e-05, + "loss": 0.9913, "step": 4532 }, { - "epoch": 0.1286322360953462, + "epoch": 0.1284536257757375, "grad_norm": 0.0, - "learning_rate": 1.949430383446876e-05, - "loss": 1.079, + "learning_rate": 1.949599679508928e-05, + "loss": 0.9306, "step": 4533 }, { - "epoch": 0.1286606129398411, + "epoch": 0.12848196321799996, "grad_norm": 0.0, - "learning_rate": 1.9494015224423583e-05, - "loss": 1.0387, + "learning_rate": 1.949570906007508e-05, + "loss": 1.0245, "step": 4534 }, { - "epoch": 0.128688989784336, + "epoch": 0.1285103006602624, "grad_norm": 0.0, - "learning_rate": 1.9493726534182037e-05, - "loss": 0.9759, + "learning_rate": 1.9495421245075e-05, + "loss": 1.0982, "step": 4535 }, { - "epoch": 0.12871736662883088, + "epoch": 0.12853863810252486, "grad_norm": 0.0, - "learning_rate": 1.949343776374655e-05, - "loss": 0.9684, + "learning_rate": 1.9495133350091463e-05, + "loss": 1.0351, "step": 4536 }, { - "epoch": 0.12874574347332576, + "epoch": 0.12856697554478733, "grad_norm": 0.0, - "learning_rate": 1.9493148913119572e-05, - "loss": 1.0074, + "learning_rate": 1.949484537512689e-05, + "loss": 1.0529, "step": 4537 }, { - "epoch": 0.12877412031782065, + "epoch": 0.1285953129870498, "grad_norm": 0.0, - "learning_rate": 1.9492859982303535e-05, - "loss": 1.131, + "learning_rate": 1.9494557320183706e-05, + "loss": 0.8805, "step": 4538 }, { - "epoch": 0.12880249716231554, + "epoch": 0.12862365042931226, "grad_norm": 0.0, - "learning_rate": 1.9492570971300885e-05, - "loss": 1.0976, + "learning_rate": 1.949426918526434e-05, + "loss": 1.061, "step": 4539 }, { - "epoch": 0.12883087400681045, + "epoch": 0.12865198787157472, "grad_norm": 0.0, - "learning_rate": 1.949228188011406e-05, - "loss": 1.0475, + "learning_rate": 1.949398097037122e-05, + "loss": 1.0483, "step": 4540 }, { - "epoch": 0.12885925085130534, + "epoch": 0.12868032531383716, "grad_norm": 0.0, - "learning_rate": 1.9491992708745502e-05, - "loss": 1.075, + "learning_rate": 1.9493692675506774e-05, + "loss": 1.0183, "step": 4541 }, { - "epoch": 0.12888762769580023, + "epoch": 0.12870866275609963, "grad_norm": 0.0, - "learning_rate": 1.9491703457197658e-05, - "loss": 0.9607, + "learning_rate": 1.9493404300673426e-05, + "loss": 1.009, "step": 4542 }, { - "epoch": 0.12891600454029511, + "epoch": 0.1287370001983621, "grad_norm": 0.0, - "learning_rate": 1.949141412547297e-05, - "loss": 1.0831, + "learning_rate": 1.9493115845873612e-05, + "loss": 1.1425, "step": 4543 }, { - "epoch": 0.12894438138479, + "epoch": 0.12876533764062456, "grad_norm": 0.0, - "learning_rate": 1.9491124713573873e-05, - "loss": 1.0429, + "learning_rate": 1.9492827311109758e-05, + "loss": 1.0509, "step": 4544 }, { - "epoch": 0.12897275822928492, + "epoch": 0.12879367508288703, "grad_norm": 0.0, - "learning_rate": 1.9490835221502825e-05, - "loss": 1.0658, + "learning_rate": 1.949253869638429e-05, + "loss": 1.0232, "step": 4545 }, { - "epoch": 0.1290011350737798, + "epoch": 0.1288220125251495, "grad_norm": 0.0, - "learning_rate": 1.9490545649262262e-05, - "loss": 0.9835, + "learning_rate": 1.949225000169965e-05, + "loss": 1.0918, "step": 4546 }, { - "epoch": 0.1290295119182747, + "epoch": 0.12885034996741193, "grad_norm": 0.0, - "learning_rate": 1.9490255996854633e-05, - "loss": 1.0013, + "learning_rate": 1.949196122705826e-05, + "loss": 1.0953, "step": 4547 }, { - "epoch": 0.12905788876276958, + "epoch": 0.1288786874096744, "grad_norm": 0.0, - "learning_rate": 1.948996626428239e-05, - "loss": 1.0755, + "learning_rate": 1.9491672372462554e-05, + "loss": 0.9294, "step": 4548 }, { - "epoch": 0.12908626560726447, + "epoch": 0.12890702485193686, "grad_norm": 0.0, - "learning_rate": 1.948967645154797e-05, - "loss": 1.1118, + "learning_rate": 1.9491383437914968e-05, + "loss": 1.0046, "step": 4549 }, { - "epoch": 0.12911464245175935, + "epoch": 0.12893536229419933, "grad_norm": 0.0, - "learning_rate": 1.948938655865383e-05, - "loss": 1.0358, + "learning_rate": 1.9491094423417934e-05, + "loss": 0.9948, "step": 4550 }, { - "epoch": 0.12914301929625427, + "epoch": 0.1289636997364618, "grad_norm": 0.0, - "learning_rate": 1.9489096585602413e-05, - "loss": 1.0435, + "learning_rate": 1.949080532897389e-05, + "loss": 1.0031, "step": 4551 }, { - "epoch": 0.12917139614074916, + "epoch": 0.12899203717872426, "grad_norm": 0.0, - "learning_rate": 1.948880653239617e-05, - "loss": 1.057, + "learning_rate": 1.9490516154585268e-05, + "loss": 0.9948, "step": 4552 }, { - "epoch": 0.12919977298524404, + "epoch": 0.1290203746209867, "grad_norm": 0.0, - "learning_rate": 1.9488516399037552e-05, - "loss": 1.0685, + "learning_rate": 1.9490226900254504e-05, + "loss": 1.0824, "step": 4553 }, { - "epoch": 0.12922814982973893, + "epoch": 0.12904871206324917, "grad_norm": 0.0, - "learning_rate": 1.948822618552901e-05, - "loss": 1.0414, + "learning_rate": 1.9489937565984033e-05, + "loss": 0.9629, "step": 4554 }, { - "epoch": 0.12925652667423382, + "epoch": 0.12907704950551163, "grad_norm": 0.0, - "learning_rate": 1.9487935891872997e-05, - "loss": 0.9803, + "learning_rate": 1.94896481517763e-05, + "loss": 1.0003, "step": 4555 }, { - "epoch": 0.1292849035187287, + "epoch": 0.1291053869477741, "grad_norm": 0.0, - "learning_rate": 1.948764551807196e-05, - "loss": 1.0276, + "learning_rate": 1.948935865763373e-05, + "loss": 0.9133, "step": 4556 }, { - "epoch": 0.12931328036322362, + "epoch": 0.12913372439003656, "grad_norm": 0.0, - "learning_rate": 1.9487355064128356e-05, - "loss": 1.0033, + "learning_rate": 1.9489069083558768e-05, + "loss": 0.977, "step": 4557 }, { - "epoch": 0.1293416572077185, + "epoch": 0.12916206183229903, "grad_norm": 0.0, - "learning_rate": 1.9487064530044636e-05, - "loss": 1.0996, + "learning_rate": 1.9488779429553855e-05, + "loss": 1.002, "step": 4558 }, { - "epoch": 0.1293700340522134, + "epoch": 0.12919039927456147, "grad_norm": 0.0, - "learning_rate": 1.9486773915823258e-05, - "loss": 1.0008, + "learning_rate": 1.9488489695621432e-05, + "loss": 1.1081, "step": 4559 }, { - "epoch": 0.12939841089670828, + "epoch": 0.12921873671682393, "grad_norm": 0.0, - "learning_rate": 1.948648322146667e-05, - "loss": 1.0949, + "learning_rate": 1.9488199881763932e-05, + "loss": 1.0138, "step": 4560 }, { - "epoch": 0.12942678774120317, + "epoch": 0.1292470741590864, "grad_norm": 0.0, - "learning_rate": 1.9486192446977336e-05, - "loss": 0.9406, + "learning_rate": 1.9487909987983805e-05, + "loss": 1.0312, "step": 4561 }, { - "epoch": 0.12945516458569806, + "epoch": 0.12927541160134887, "grad_norm": 0.0, - "learning_rate": 1.9485901592357708e-05, - "loss": 0.9581, + "learning_rate": 1.9487620014283487e-05, + "loss": 0.9723, "step": 4562 }, { - "epoch": 0.12948354143019297, + "epoch": 0.12930374904361133, "grad_norm": 0.0, - "learning_rate": 1.948561065761024e-05, - "loss": 1.0338, + "learning_rate": 1.9487329960665424e-05, + "loss": 1.0182, "step": 4563 }, { - "epoch": 0.12951191827468786, + "epoch": 0.1293320864858738, "grad_norm": 0.0, - "learning_rate": 1.9485319642737396e-05, - "loss": 0.9832, + "learning_rate": 1.9487039827132056e-05, + "loss": 1.0638, "step": 4564 }, { - "epoch": 0.12954029511918275, + "epoch": 0.12936042392813624, "grad_norm": 0.0, - "learning_rate": 1.9485028547741628e-05, - "loss": 1.1264, + "learning_rate": 1.9486749613685828e-05, + "loss": 1.1133, "step": 4565 }, { - "epoch": 0.12956867196367763, + "epoch": 0.1293887613703987, "grad_norm": 0.0, - "learning_rate": 1.9484737372625394e-05, - "loss": 1.0071, + "learning_rate": 1.9486459320329187e-05, + "loss": 1.1229, "step": 4566 }, { - "epoch": 0.12959704880817252, + "epoch": 0.12941709881266117, "grad_norm": 0.0, - "learning_rate": 1.9484446117391162e-05, - "loss": 1.1017, + "learning_rate": 1.9486168947064576e-05, + "loss": 1.2549, "step": 4567 }, { - "epoch": 0.12962542565266744, + "epoch": 0.12944543625492363, "grad_norm": 0.0, - "learning_rate": 1.948415478204139e-05, - "loss": 1.0107, + "learning_rate": 1.948587849389444e-05, + "loss": 1.1534, "step": 4568 }, { - "epoch": 0.12965380249716232, + "epoch": 0.1294737736971861, "grad_norm": 0.0, - "learning_rate": 1.948386336657853e-05, - "loss": 1.0441, + "learning_rate": 1.948558796082123e-05, + "loss": 1.0073, "step": 4569 }, { - "epoch": 0.1296821793416572, + "epoch": 0.12950211113944857, "grad_norm": 0.0, - "learning_rate": 1.9483571871005052e-05, - "loss": 1.0261, + "learning_rate": 1.9485297347847388e-05, + "loss": 1.0334, "step": 4570 }, { - "epoch": 0.1297105561861521, + "epoch": 0.129530448581711, "grad_norm": 0.0, - "learning_rate": 1.9483280295323416e-05, - "loss": 1.1154, + "learning_rate": 1.9485006654975366e-05, + "loss": 0.9386, "step": 4571 }, { - "epoch": 0.12973893303064699, + "epoch": 0.12955878602397347, "grad_norm": 0.0, - "learning_rate": 1.9482988639536086e-05, - "loss": 0.9446, + "learning_rate": 1.9484715882207608e-05, + "loss": 1.0567, "step": 4572 }, { - "epoch": 0.12976730987514187, + "epoch": 0.12958712346623594, "grad_norm": 0.0, - "learning_rate": 1.9482696903645524e-05, - "loss": 1.069, + "learning_rate": 1.9484425029546567e-05, + "loss": 1.0632, "step": 4573 }, { - "epoch": 0.1297956867196368, + "epoch": 0.1296154609084984, "grad_norm": 0.0, - "learning_rate": 1.9482405087654194e-05, - "loss": 1.0801, + "learning_rate": 1.9484134096994693e-05, + "loss": 1.1605, "step": 4574 }, { - "epoch": 0.12982406356413168, + "epoch": 0.12964379835076087, "grad_norm": 0.0, - "learning_rate": 1.948211319156456e-05, - "loss": 1.0996, + "learning_rate": 1.9483843084554436e-05, + "loss": 1.1536, "step": 4575 }, { - "epoch": 0.12985244040862656, + "epoch": 0.12967213579302334, "grad_norm": 0.0, - "learning_rate": 1.9481821215379097e-05, - "loss": 1.003, + "learning_rate": 1.9483551992228245e-05, + "loss": 1.0774, "step": 4576 }, { - "epoch": 0.12988081725312145, + "epoch": 0.12970047323528577, "grad_norm": 0.0, - "learning_rate": 1.9481529159100258e-05, - "loss": 1.0534, + "learning_rate": 1.9483260820018577e-05, + "loss": 1.0396, "step": 4577 }, { - "epoch": 0.12990919409761634, + "epoch": 0.12972881067754824, "grad_norm": 0.0, - "learning_rate": 1.9481237022730516e-05, - "loss": 0.9887, + "learning_rate": 1.9482969567927878e-05, + "loss": 1.098, "step": 4578 }, { - "epoch": 0.12993757094211122, + "epoch": 0.1297571481198107, "grad_norm": 0.0, - "learning_rate": 1.9480944806272345e-05, - "loss": 1.0877, + "learning_rate": 1.9482678235958604e-05, + "loss": 1.0426, "step": 4579 }, { - "epoch": 0.12996594778660614, + "epoch": 0.12978548556207317, "grad_norm": 0.0, - "learning_rate": 1.94806525097282e-05, - "loss": 1.0644, + "learning_rate": 1.9482386824113215e-05, + "loss": 1.0706, "step": 4580 }, { - "epoch": 0.12999432463110103, + "epoch": 0.12981382300433564, "grad_norm": 0.0, - "learning_rate": 1.948036013310056e-05, - "loss": 1.0763, + "learning_rate": 1.9482095332394157e-05, + "loss": 0.9672, "step": 4581 }, { - "epoch": 0.13002270147559591, + "epoch": 0.1298421604465981, "grad_norm": 0.0, - "learning_rate": 1.948006767639189e-05, - "loss": 1.0391, + "learning_rate": 1.948180376080389e-05, + "loss": 0.9828, "step": 4582 }, { - "epoch": 0.1300510783200908, + "epoch": 0.12987049788886054, "grad_norm": 0.0, - "learning_rate": 1.947977513960466e-05, - "loss": 0.891, + "learning_rate": 1.948151210934487e-05, + "loss": 1.0029, "step": 4583 }, { - "epoch": 0.1300794551645857, + "epoch": 0.129898835331123, "grad_norm": 0.0, - "learning_rate": 1.9479482522741348e-05, - "loss": 0.9867, + "learning_rate": 1.9481220378019553e-05, + "loss": 1.1037, "step": 4584 }, { - "epoch": 0.1301078320090806, + "epoch": 0.12992717277338547, "grad_norm": 0.0, - "learning_rate": 1.9479189825804423e-05, - "loss": 0.9827, + "learning_rate": 1.948092856683039e-05, + "loss": 1.075, "step": 4585 }, { - "epoch": 0.1301362088535755, + "epoch": 0.12995551021564794, "grad_norm": 0.0, - "learning_rate": 1.947889704879635e-05, - "loss": 1.0865, + "learning_rate": 1.9480636675779853e-05, + "loss": 1.0903, "step": 4586 }, { - "epoch": 0.13016458569807038, + "epoch": 0.1299838476579104, "grad_norm": 0.0, - "learning_rate": 1.9478604191719607e-05, - "loss": 1.0073, + "learning_rate": 1.9480344704870387e-05, + "loss": 1.1167, "step": 4587 }, { - "epoch": 0.13019296254256527, + "epoch": 0.13001218510017284, "grad_norm": 0.0, - "learning_rate": 1.9478311254576673e-05, - "loss": 1.0308, + "learning_rate": 1.9480052654104458e-05, + "loss": 1.0914, "step": 4588 }, { - "epoch": 0.13022133938706015, + "epoch": 0.1300405225424353, "grad_norm": 0.0, - "learning_rate": 1.9478018237370014e-05, - "loss": 0.9808, + "learning_rate": 1.947976052348453e-05, + "loss": 1.0388, "step": 4589 }, { - "epoch": 0.13024971623155504, + "epoch": 0.13006885998469778, "grad_norm": 0.0, - "learning_rate": 1.947772514010211e-05, - "loss": 1.0699, + "learning_rate": 1.9479468313013055e-05, + "loss": 1.0937, "step": 4590 }, { - "epoch": 0.13027809307604996, + "epoch": 0.13009719742696024, "grad_norm": 0.0, - "learning_rate": 1.9477431962775438e-05, - "loss": 1.0444, + "learning_rate": 1.94791760226925e-05, + "loss": 1.0296, "step": 4591 }, { - "epoch": 0.13030646992054484, + "epoch": 0.1301255348692227, "grad_norm": 0.0, - "learning_rate": 1.9477138705392468e-05, - "loss": 0.9916, + "learning_rate": 1.9478883652525323e-05, + "loss": 1.0507, "step": 4592 }, { - "epoch": 0.13033484676503973, + "epoch": 0.13015387231148517, "grad_norm": 0.0, - "learning_rate": 1.9476845367955686e-05, - "loss": 1.0069, + "learning_rate": 1.947859120251399e-05, + "loss": 1.0308, "step": 4593 }, { - "epoch": 0.13036322360953462, + "epoch": 0.1301822097537476, "grad_norm": 0.0, - "learning_rate": 1.947655195046756e-05, - "loss": 1.0457, + "learning_rate": 1.9478298672660963e-05, + "loss": 1.0605, "step": 4594 }, { - "epoch": 0.1303916004540295, + "epoch": 0.13021054719601008, "grad_norm": 0.0, - "learning_rate": 1.9476258452930577e-05, - "loss": 1.0172, + "learning_rate": 1.947800606296871e-05, + "loss": 1.0507, "step": 4595 }, { - "epoch": 0.1304199772985244, + "epoch": 0.13023888463827255, "grad_norm": 0.0, - "learning_rate": 1.947596487534721e-05, - "loss": 0.9276, + "learning_rate": 1.947771337343969e-05, + "loss": 1.1042, "step": 4596 }, { - "epoch": 0.1304483541430193, + "epoch": 0.130267222080535, "grad_norm": 0.0, - "learning_rate": 1.9475671217719947e-05, - "loss": 1.0588, + "learning_rate": 1.947742060407637e-05, + "loss": 0.9458, "step": 4597 }, { - "epoch": 0.1304767309875142, + "epoch": 0.13029555952279748, "grad_norm": 0.0, - "learning_rate": 1.947537748005126e-05, - "loss": 1.0237, + "learning_rate": 1.9477127754881215e-05, + "loss": 1.096, "step": 4598 }, { - "epoch": 0.13050510783200908, + "epoch": 0.13032389696505994, "grad_norm": 0.0, - "learning_rate": 1.9475083662343633e-05, - "loss": 1.1265, + "learning_rate": 1.9476834825856696e-05, + "loss": 0.9566, "step": 4599 }, { - "epoch": 0.13053348467650397, + "epoch": 0.13035223440732238, "grad_norm": 0.0, - "learning_rate": 1.947478976459955e-05, - "loss": 1.1464, + "learning_rate": 1.9476541817005278e-05, + "loss": 1.0083, "step": 4600 }, { - "epoch": 0.13056186152099886, + "epoch": 0.13038057184958485, "grad_norm": 0.0, - "learning_rate": 1.9474495786821493e-05, - "loss": 1.0089, + "learning_rate": 1.947624872832943e-05, + "loss": 1.0954, "step": 4601 }, { - "epoch": 0.13059023836549374, + "epoch": 0.1304089092918473, "grad_norm": 0.0, - "learning_rate": 1.947420172901194e-05, - "loss": 0.917, + "learning_rate": 1.9475955559831622e-05, + "loss": 1.0762, "step": 4602 }, { - "epoch": 0.13061861520998866, + "epoch": 0.13043724673410978, "grad_norm": 0.0, - "learning_rate": 1.947390759117338e-05, - "loss": 1.0709, + "learning_rate": 1.9475662311514317e-05, + "loss": 0.9703, "step": 4603 }, { - "epoch": 0.13064699205448355, + "epoch": 0.13046558417637225, "grad_norm": 0.0, - "learning_rate": 1.94736133733083e-05, - "loss": 1.141, + "learning_rate": 1.9475368983379992e-05, + "loss": 1.0342, "step": 4604 }, { - "epoch": 0.13067536889897843, + "epoch": 0.1304939216186347, "grad_norm": 0.0, - "learning_rate": 1.947331907541918e-05, - "loss": 1.0096, + "learning_rate": 1.9475075575431112e-05, + "loss": 0.9788, "step": 4605 }, { - "epoch": 0.13070374574347332, + "epoch": 0.13052225906089715, "grad_norm": 0.0, - "learning_rate": 1.947302469750851e-05, - "loss": 1.0785, + "learning_rate": 1.9474782087670156e-05, + "loss": 1.1095, "step": 4606 }, { - "epoch": 0.1307321225879682, + "epoch": 0.13055059650315962, "grad_norm": 0.0, - "learning_rate": 1.947273023957877e-05, - "loss": 1.0078, + "learning_rate": 1.9474488520099594e-05, + "loss": 1.0567, "step": 4607 }, { - "epoch": 0.13076049943246312, + "epoch": 0.13057893394542208, "grad_norm": 0.0, - "learning_rate": 1.9472435701632457e-05, - "loss": 1.008, + "learning_rate": 1.9474194872721892e-05, + "loss": 1.1384, "step": 4608 }, { - "epoch": 0.130788876276958, + "epoch": 0.13060727138768455, "grad_norm": 0.0, - "learning_rate": 1.947214108367205e-05, - "loss": 1.1033, + "learning_rate": 1.947390114553953e-05, + "loss": 1.0814, "step": 4609 }, { - "epoch": 0.1308172531214529, + "epoch": 0.13063560882994701, "grad_norm": 0.0, - "learning_rate": 1.9471846385700042e-05, - "loss": 0.9841, + "learning_rate": 1.947360733855498e-05, + "loss": 1.104, "step": 4610 }, { - "epoch": 0.13084562996594779, + "epoch": 0.13066394627220948, "grad_norm": 0.0, - "learning_rate": 1.947155160771892e-05, - "loss": 0.9753, + "learning_rate": 1.9473313451770722e-05, + "loss": 1.0585, "step": 4611 }, { - "epoch": 0.13087400681044267, + "epoch": 0.13069228371447192, "grad_norm": 0.0, - "learning_rate": 1.9471256749731177e-05, - "loss": 1.0661, + "learning_rate": 1.947301948518922e-05, + "loss": 1.079, "step": 4612 }, { - "epoch": 0.13090238365493756, + "epoch": 0.13072062115673438, "grad_norm": 0.0, - "learning_rate": 1.9470961811739304e-05, - "loss": 1.1617, + "learning_rate": 1.9472725438812963e-05, + "loss": 1.0851, "step": 4613 }, { - "epoch": 0.13093076049943247, + "epoch": 0.13074895859899685, "grad_norm": 0.0, - "learning_rate": 1.9470666793745793e-05, - "loss": 0.9553, + "learning_rate": 1.947243131264442e-05, + "loss": 1.0772, "step": 4614 }, { - "epoch": 0.13095913734392736, + "epoch": 0.13077729604125932, "grad_norm": 0.0, - "learning_rate": 1.947037169575313e-05, - "loss": 1.0348, + "learning_rate": 1.9472137106686067e-05, + "loss": 1.0546, "step": 4615 }, { - "epoch": 0.13098751418842225, + "epoch": 0.13080563348352178, "grad_norm": 0.0, - "learning_rate": 1.9470076517763813e-05, - "loss": 0.9897, + "learning_rate": 1.947184282094039e-05, + "loss": 1.0691, "step": 4616 }, { - "epoch": 0.13101589103291714, + "epoch": 0.13083397092578425, "grad_norm": 0.0, - "learning_rate": 1.9469781259780332e-05, - "loss": 1.0145, + "learning_rate": 1.9471548455409866e-05, + "loss": 0.9499, "step": 4617 }, { - "epoch": 0.13104426787741202, + "epoch": 0.1308623083680467, "grad_norm": 0.0, - "learning_rate": 1.9469485921805183e-05, - "loss": 1.0746, + "learning_rate": 1.9471254010096967e-05, + "loss": 1.1187, "step": 4618 }, { - "epoch": 0.1310726447219069, + "epoch": 0.13089064581030915, "grad_norm": 0.0, - "learning_rate": 1.9469190503840868e-05, - "loss": 1.1158, + "learning_rate": 1.947095948500418e-05, + "loss": 1.1072, "step": 4619 }, { - "epoch": 0.13110102156640183, + "epoch": 0.13091898325257162, "grad_norm": 0.0, - "learning_rate": 1.9468895005889866e-05, - "loss": 1.0975, + "learning_rate": 1.9470664880133986e-05, + "loss": 0.9824, "step": 4620 }, { - "epoch": 0.1311293984108967, + "epoch": 0.13094732069483409, "grad_norm": 0.0, - "learning_rate": 1.9468599427954685e-05, - "loss": 0.8598, + "learning_rate": 1.9470370195488862e-05, + "loss": 0.9617, "step": 4621 }, { - "epoch": 0.1311577752553916, + "epoch": 0.13097565813709655, "grad_norm": 0.0, - "learning_rate": 1.946830377003782e-05, - "loss": 1.0443, + "learning_rate": 1.9470075431071293e-05, + "loss": 1.0363, "step": 4622 }, { - "epoch": 0.1311861520998865, + "epoch": 0.13100399557935902, "grad_norm": 0.0, - "learning_rate": 1.946800803214177e-05, - "loss": 1.0811, + "learning_rate": 1.9469780586883765e-05, + "loss": 1.0337, "step": 4623 }, { - "epoch": 0.13121452894438138, + "epoch": 0.13103233302162146, "grad_norm": 0.0, - "learning_rate": 1.9467712214269028e-05, - "loss": 1.1818, + "learning_rate": 1.9469485662928757e-05, + "loss": 0.9731, "step": 4624 }, { - "epoch": 0.1312429057888763, + "epoch": 0.13106067046388392, "grad_norm": 0.0, - "learning_rate": 1.9467416316422093e-05, - "loss": 1.0259, + "learning_rate": 1.9469190659208754e-05, + "loss": 1.1471, "step": 4625 }, { - "epoch": 0.13127128263337118, + "epoch": 0.1310890079061464, "grad_norm": 0.0, - "learning_rate": 1.946712033860347e-05, - "loss": 1.03, + "learning_rate": 1.9468895575726243e-05, + "loss": 1.0154, "step": 4626 }, { - "epoch": 0.13129965947786607, + "epoch": 0.13111734534840885, "grad_norm": 0.0, - "learning_rate": 1.9466824280815654e-05, - "loss": 1.2318, + "learning_rate": 1.946860041248371e-05, + "loss": 1.0814, "step": 4627 }, { - "epoch": 0.13132803632236095, + "epoch": 0.13114568279067132, "grad_norm": 0.0, - "learning_rate": 1.9466528143061148e-05, - "loss": 1.0018, + "learning_rate": 1.9468305169483637e-05, + "loss": 1.1037, "step": 4628 }, { - "epoch": 0.13135641316685584, + "epoch": 0.13117402023293379, "grad_norm": 0.0, - "learning_rate": 1.9466231925342453e-05, - "loss": 1.0174, + "learning_rate": 1.9468009846728515e-05, + "loss": 0.9931, "step": 4629 }, { - "epoch": 0.13138479001135073, + "epoch": 0.13120235767519622, "grad_norm": 0.0, - "learning_rate": 1.9465935627662075e-05, - "loss": 1.0393, + "learning_rate": 1.946771444422083e-05, + "loss": 1.0208, "step": 4630 }, { - "epoch": 0.13141316685584564, + "epoch": 0.1312306951174587, "grad_norm": 0.0, - "learning_rate": 1.946563925002251e-05, - "loss": 0.9847, + "learning_rate": 1.946741896196307e-05, + "loss": 1.0169, "step": 4631 }, { - "epoch": 0.13144154370034053, + "epoch": 0.13125903255972116, "grad_norm": 0.0, - "learning_rate": 1.9465342792426264e-05, - "loss": 0.9718, + "learning_rate": 1.9467123399957724e-05, + "loss": 1.0142, "step": 4632 }, { - "epoch": 0.13146992054483542, + "epoch": 0.13128737000198362, "grad_norm": 0.0, - "learning_rate": 1.9465046254875842e-05, - "loss": 0.9832, + "learning_rate": 1.9466827758207284e-05, + "loss": 1.0432, "step": 4633 }, { - "epoch": 0.1314982973893303, + "epoch": 0.1313157074442461, "grad_norm": 0.0, - "learning_rate": 1.946474963737375e-05, - "loss": 0.931, + "learning_rate": 1.9466532036714235e-05, + "loss": 1.1512, "step": 4634 }, { - "epoch": 0.1315266742338252, + "epoch": 0.13134404488650855, "grad_norm": 0.0, - "learning_rate": 1.9464452939922492e-05, - "loss": 1.0666, + "learning_rate": 1.9466236235481074e-05, + "loss": 1.0091, "step": 4635 }, { - "epoch": 0.13155505107832008, + "epoch": 0.131372382328771, "grad_norm": 0.0, - "learning_rate": 1.9464156162524578e-05, - "loss": 1.0729, + "learning_rate": 1.9465940354510287e-05, + "loss": 1.0558, "step": 4636 }, { - "epoch": 0.131583427922815, + "epoch": 0.13140071977103346, "grad_norm": 0.0, - "learning_rate": 1.9463859305182508e-05, - "loss": 1.0114, + "learning_rate": 1.9465644393804373e-05, + "loss": 1.0006, "step": 4637 }, { - "epoch": 0.13161180476730988, + "epoch": 0.13142905721329592, "grad_norm": 0.0, - "learning_rate": 1.9463562367898793e-05, - "loss": 1.027, + "learning_rate": 1.946534835336582e-05, + "loss": 1.0156, "step": 4638 }, { - "epoch": 0.13164018161180477, + "epoch": 0.1314573946555584, "grad_norm": 0.0, - "learning_rate": 1.946326535067594e-05, - "loss": 1.017, + "learning_rate": 1.9465052233197125e-05, + "loss": 1.0824, "step": 4639 }, { - "epoch": 0.13166855845629966, + "epoch": 0.13148573209782086, "grad_norm": 0.0, - "learning_rate": 1.9462968253516462e-05, - "loss": 1.023, + "learning_rate": 1.9464756033300775e-05, + "loss": 1.0379, "step": 4640 }, { - "epoch": 0.13169693530079454, + "epoch": 0.13151406954008332, "grad_norm": 0.0, - "learning_rate": 1.9462671076422863e-05, - "loss": 0.9801, + "learning_rate": 1.9464459753679272e-05, + "loss": 0.9449, "step": 4641 }, { - "epoch": 0.13172531214528943, + "epoch": 0.13154240698234576, "grad_norm": 0.0, - "learning_rate": 1.946237381939766e-05, - "loss": 0.967, + "learning_rate": 1.9464163394335112e-05, + "loss": 1.0157, "step": 4642 }, { - "epoch": 0.13175368898978435, + "epoch": 0.13157074442460823, "grad_norm": 0.0, - "learning_rate": 1.9462076482443355e-05, - "loss": 0.9576, + "learning_rate": 1.946386695527079e-05, + "loss": 1.0304, "step": 4643 }, { - "epoch": 0.13178206583427923, + "epoch": 0.1315990818668707, "grad_norm": 0.0, - "learning_rate": 1.9461779065562467e-05, - "loss": 1.0659, + "learning_rate": 1.9463570436488803e-05, + "loss": 0.9855, "step": 4644 }, { - "epoch": 0.13181044267877412, + "epoch": 0.13162741930913316, "grad_norm": 0.0, - "learning_rate": 1.946148156875751e-05, - "loss": 1.0311, + "learning_rate": 1.9463273837991643e-05, + "loss": 0.9715, "step": 4645 }, { - "epoch": 0.131838819523269, + "epoch": 0.13165575675139563, "grad_norm": 0.0, - "learning_rate": 1.9461183992030984e-05, - "loss": 1.0096, + "learning_rate": 1.946297715978182e-05, + "loss": 0.9922, "step": 4646 }, { - "epoch": 0.1318671963677639, + "epoch": 0.1316840941936581, "grad_norm": 0.0, - "learning_rate": 1.946088633538542e-05, - "loss": 1.0939, + "learning_rate": 1.946268040186182e-05, + "loss": 1.0871, "step": 4647 }, { - "epoch": 0.1318955732122588, + "epoch": 0.13171243163592053, "grad_norm": 0.0, - "learning_rate": 1.9460588598823317e-05, - "loss": 0.9298, + "learning_rate": 1.946238356423415e-05, + "loss": 1.1496, "step": 4648 }, { - "epoch": 0.1319239500567537, + "epoch": 0.131740769078183, "grad_norm": 0.0, - "learning_rate": 1.94602907823472e-05, - "loss": 1.0662, + "learning_rate": 1.946208664690131e-05, + "loss": 0.9843, "step": 4649 }, { - "epoch": 0.13195232690124858, + "epoch": 0.13176910652044546, "grad_norm": 0.0, - "learning_rate": 1.945999288595958e-05, - "loss": 1.0439, + "learning_rate": 1.9461789649865802e-05, + "loss": 1.0144, "step": 4650 }, { - "epoch": 0.13198070374574347, + "epoch": 0.13179744396270793, "grad_norm": 0.0, - "learning_rate": 1.945969490966298e-05, - "loss": 1.0263, + "learning_rate": 1.946149257313013e-05, + "loss": 1.1156, "step": 4651 }, { - "epoch": 0.13200908059023836, + "epoch": 0.1318257814049704, "grad_norm": 0.0, - "learning_rate": 1.9459396853459906e-05, - "loss": 1.1933, + "learning_rate": 1.9461195416696787e-05, + "loss": 0.9905, "step": 4652 }, { - "epoch": 0.13203745743473325, + "epoch": 0.13185411884723286, "grad_norm": 0.0, - "learning_rate": 1.9459098717352883e-05, - "loss": 1.0708, + "learning_rate": 1.9460898180568285e-05, + "loss": 1.0906, "step": 4653 }, { - "epoch": 0.13206583427922816, + "epoch": 0.1318824562894953, "grad_norm": 0.0, - "learning_rate": 1.9458800501344426e-05, - "loss": 0.9729, + "learning_rate": 1.946060086474712e-05, + "loss": 1.1064, "step": 4654 }, { - "epoch": 0.13209421112372305, + "epoch": 0.13191079373175776, "grad_norm": 0.0, - "learning_rate": 1.9458502205437063e-05, - "loss": 1.0594, + "learning_rate": 1.9460303469235808e-05, + "loss": 1.0056, "step": 4655 }, { - "epoch": 0.13212258796821794, + "epoch": 0.13193913117402023, "grad_norm": 0.0, - "learning_rate": 1.94582038296333e-05, - "loss": 0.9234, + "learning_rate": 1.946000599403684e-05, + "loss": 1.1154, "step": 4656 }, { - "epoch": 0.13215096481271282, + "epoch": 0.1319674686162827, "grad_norm": 0.0, - "learning_rate": 1.9457905373935667e-05, - "loss": 0.9799, + "learning_rate": 1.945970843915273e-05, + "loss": 1.0304, "step": 4657 }, { - "epoch": 0.1321793416572077, + "epoch": 0.13199580605854516, "grad_norm": 0.0, - "learning_rate": 1.945760683834668e-05, - "loss": 1.0086, + "learning_rate": 1.9459410804585984e-05, + "loss": 0.9067, "step": 4658 }, { - "epoch": 0.1322077185017026, + "epoch": 0.13202414350080763, "grad_norm": 0.0, - "learning_rate": 1.9457308222868866e-05, - "loss": 1.0815, + "learning_rate": 1.9459113090339107e-05, + "loss": 1.1121, "step": 4659 }, { - "epoch": 0.1322360953461975, + "epoch": 0.13205248094307007, "grad_norm": 0.0, - "learning_rate": 1.945700952750474e-05, - "loss": 0.979, + "learning_rate": 1.945881529641461e-05, + "loss": 1.0396, "step": 4660 }, { - "epoch": 0.1322644721906924, + "epoch": 0.13208081838533253, "grad_norm": 0.0, - "learning_rate": 1.9456710752256835e-05, - "loss": 0.9948, + "learning_rate": 1.9458517422814998e-05, + "loss": 1.0558, "step": 4661 }, { - "epoch": 0.1322928490351873, + "epoch": 0.132109155827595, "grad_norm": 0.0, - "learning_rate": 1.9456411897127667e-05, - "loss": 1.0494, + "learning_rate": 1.9458219469542782e-05, + "loss": 0.9856, "step": 4662 }, { - "epoch": 0.13232122587968217, + "epoch": 0.13213749326985746, "grad_norm": 0.0, - "learning_rate": 1.9456112962119762e-05, - "loss": 1.0773, + "learning_rate": 1.9457921436600473e-05, + "loss": 1.044, "step": 4663 }, { - "epoch": 0.13234960272417706, + "epoch": 0.13216583071211993, "grad_norm": 0.0, - "learning_rate": 1.9455813947235644e-05, - "loss": 1.0629, + "learning_rate": 1.9457623323990574e-05, + "loss": 1.0454, "step": 4664 }, { - "epoch": 0.13237797956867198, + "epoch": 0.1321941681543824, "grad_norm": 0.0, - "learning_rate": 1.9455514852477843e-05, - "loss": 1.0214, + "learning_rate": 1.9457325131715608e-05, + "loss": 1.078, "step": 4665 }, { - "epoch": 0.13240635641316686, + "epoch": 0.13222250559664483, "grad_norm": 0.0, - "learning_rate": 1.945521567784888e-05, - "loss": 1.0297, + "learning_rate": 1.9457026859778077e-05, + "loss": 0.9882, "step": 4666 }, { - "epoch": 0.13243473325766175, + "epoch": 0.1322508430389073, "grad_norm": 0.0, - "learning_rate": 1.9454916423351288e-05, - "loss": 0.9964, + "learning_rate": 1.94567285081805e-05, + "loss": 0.9367, "step": 4667 }, { - "epoch": 0.13246311010215664, + "epoch": 0.13227918048116977, "grad_norm": 0.0, - "learning_rate": 1.9454617088987592e-05, - "loss": 1.0146, + "learning_rate": 1.9456430076925382e-05, + "loss": 1.0106, "step": 4668 }, { - "epoch": 0.13249148694665153, + "epoch": 0.13230751792343223, "grad_norm": 0.0, - "learning_rate": 1.9454317674760318e-05, - "loss": 0.9777, + "learning_rate": 1.9456131566015245e-05, + "loss": 1.1138, "step": 4669 }, { - "epoch": 0.13251986379114641, + "epoch": 0.1323358553656947, "grad_norm": 0.0, - "learning_rate": 1.9454018180672002e-05, - "loss": 1.0457, + "learning_rate": 1.9455832975452604e-05, + "loss": 1.0162, "step": 4670 }, { - "epoch": 0.13254824063564133, + "epoch": 0.13236419280795717, "grad_norm": 0.0, - "learning_rate": 1.9453718606725166e-05, - "loss": 1.0579, + "learning_rate": 1.9455534305239964e-05, + "loss": 0.9405, "step": 4671 }, { - "epoch": 0.13257661748013622, + "epoch": 0.1323925302502196, "grad_norm": 0.0, - "learning_rate": 1.9453418952922344e-05, - "loss": 1.0236, + "learning_rate": 1.945523555537985e-05, + "loss": 0.9561, "step": 4672 }, { - "epoch": 0.1326049943246311, + "epoch": 0.13242086769248207, "grad_norm": 0.0, - "learning_rate": 1.9453119219266066e-05, - "loss": 1.038, + "learning_rate": 1.9454936725874775e-05, + "loss": 1.019, "step": 4673 }, { - "epoch": 0.132633371169126, + "epoch": 0.13244920513474454, "grad_norm": 0.0, - "learning_rate": 1.9452819405758868e-05, - "loss": 1.0503, + "learning_rate": 1.945463781672726e-05, + "loss": 1.1854, "step": 4674 }, { - "epoch": 0.13266174801362088, + "epoch": 0.132477542577007, "grad_norm": 0.0, - "learning_rate": 1.9452519512403277e-05, - "loss": 0.9994, + "learning_rate": 1.9454338827939817e-05, + "loss": 1.0361, "step": 4675 }, { - "epoch": 0.13269012485811577, + "epoch": 0.13250588001926947, "grad_norm": 0.0, - "learning_rate": 1.945221953920183e-05, - "loss": 1.0273, + "learning_rate": 1.945403975951497e-05, + "loss": 1.073, "step": 4676 }, { - "epoch": 0.13271850170261068, + "epoch": 0.13253421746153193, "grad_norm": 0.0, - "learning_rate": 1.9451919486157062e-05, - "loss": 1.0464, + "learning_rate": 1.9453740611455232e-05, + "loss": 1.0313, "step": 4677 }, { - "epoch": 0.13274687854710557, + "epoch": 0.13256255490379437, "grad_norm": 0.0, - "learning_rate": 1.9451619353271503e-05, - "loss": 1.108, + "learning_rate": 1.9453441383763128e-05, + "loss": 0.9621, "step": 4678 }, { - "epoch": 0.13277525539160046, + "epoch": 0.13259089234605684, "grad_norm": 0.0, - "learning_rate": 1.9451319140547692e-05, - "loss": 1.0107, + "learning_rate": 1.9453142076441173e-05, + "loss": 1.0381, "step": 4679 }, { - "epoch": 0.13280363223609534, + "epoch": 0.1326192297883193, "grad_norm": 0.0, - "learning_rate": 1.945101884798816e-05, - "loss": 1.0576, + "learning_rate": 1.9452842689491896e-05, + "loss": 1.1113, "step": 4680 }, { - "epoch": 0.13283200908059023, + "epoch": 0.13264756723058177, "grad_norm": 0.0, - "learning_rate": 1.945071847559545e-05, - "loss": 0.9905, + "learning_rate": 1.9452543222917816e-05, + "loss": 1.0222, "step": 4681 }, { - "epoch": 0.13286038592508512, + "epoch": 0.13267590467284424, "grad_norm": 0.0, - "learning_rate": 1.9450418023372094e-05, - "loss": 1.13, + "learning_rate": 1.945224367672145e-05, + "loss": 1.1239, "step": 4682 }, { - "epoch": 0.13288876276958003, + "epoch": 0.1327042421151067, "grad_norm": 0.0, - "learning_rate": 1.9450117491320633e-05, - "loss": 0.9693, + "learning_rate": 1.9451944050905328e-05, + "loss": 1.0332, "step": 4683 }, { - "epoch": 0.13291713961407492, + "epoch": 0.13273257955736914, "grad_norm": 0.0, - "learning_rate": 1.9449816879443604e-05, - "loss": 0.9962, + "learning_rate": 1.945164434547197e-05, + "loss": 1.0343, "step": 4684 }, { - "epoch": 0.1329455164585698, + "epoch": 0.1327609169996316, "grad_norm": 0.0, - "learning_rate": 1.9449516187743546e-05, - "loss": 1.0366, + "learning_rate": 1.9451344560423905e-05, + "loss": 1.0853, "step": 4685 }, { - "epoch": 0.1329738933030647, + "epoch": 0.13278925444189407, "grad_norm": 0.0, - "learning_rate": 1.9449215416223003e-05, - "loss": 0.9914, + "learning_rate": 1.945104469576365e-05, + "loss": 0.9388, "step": 4686 }, { - "epoch": 0.13300227014755958, + "epoch": 0.13281759188415654, "grad_norm": 0.0, - "learning_rate": 1.9448914564884515e-05, - "loss": 1.0449, + "learning_rate": 1.9450744751493743e-05, + "loss": 0.9674, "step": 4687 }, { - "epoch": 0.1330306469920545, + "epoch": 0.132845929326419, "grad_norm": 0.0, - "learning_rate": 1.9448613633730614e-05, - "loss": 1.0851, + "learning_rate": 1.94504447276167e-05, + "loss": 1.1224, "step": 4688 }, { - "epoch": 0.13305902383654938, + "epoch": 0.13287426676868147, "grad_norm": 0.0, - "learning_rate": 1.9448312622763852e-05, - "loss": 0.9816, + "learning_rate": 1.945014462413505e-05, + "loss": 0.8661, "step": 4689 }, { - "epoch": 0.13308740068104427, + "epoch": 0.1329026042109439, "grad_norm": 0.0, - "learning_rate": 1.9448011531986772e-05, - "loss": 1.0301, + "learning_rate": 1.9449844441051328e-05, + "loss": 1.0561, "step": 4690 }, { - "epoch": 0.13311577752553916, + "epoch": 0.13293094165320637, "grad_norm": 0.0, - "learning_rate": 1.944771036140191e-05, - "loss": 1.0715, + "learning_rate": 1.944954417836805e-05, + "loss": 1.0734, "step": 4691 }, { - "epoch": 0.13314415437003405, + "epoch": 0.13295927909546884, "grad_norm": 0.0, - "learning_rate": 1.9447409111011814e-05, - "loss": 1.0583, + "learning_rate": 1.9449243836087758e-05, + "loss": 1.0469, "step": 4692 }, { - "epoch": 0.13317253121452893, + "epoch": 0.1329876165377313, "grad_norm": 0.0, - "learning_rate": 1.9447107780819028e-05, - "loss": 1.0811, + "learning_rate": 1.9448943414212972e-05, + "loss": 1.105, "step": 4693 }, { - "epoch": 0.13320090805902385, + "epoch": 0.13301595397999377, "grad_norm": 0.0, - "learning_rate": 1.94468063708261e-05, - "loss": 1.0612, + "learning_rate": 1.944864291274623e-05, + "loss": 1.1712, "step": 4694 }, { - "epoch": 0.13322928490351874, + "epoch": 0.13304429142225624, "grad_norm": 0.0, - "learning_rate": 1.944650488103557e-05, - "loss": 1.0114, + "learning_rate": 1.944834233169006e-05, + "loss": 1.1055, "step": 4695 }, { - "epoch": 0.13325766174801362, + "epoch": 0.13307262886451868, "grad_norm": 0.0, - "learning_rate": 1.944620331144999e-05, - "loss": 1.0122, + "learning_rate": 1.9448041671046992e-05, + "loss": 0.8981, "step": 4696 }, { - "epoch": 0.1332860385925085, + "epoch": 0.13310096630678114, "grad_norm": 0.0, - "learning_rate": 1.9445901662071908e-05, - "loss": 1.158, + "learning_rate": 1.944774093081956e-05, + "loss": 0.9945, "step": 4697 }, { - "epoch": 0.1333144154370034, + "epoch": 0.1331293037490436, "grad_norm": 0.0, - "learning_rate": 1.944559993290387e-05, - "loss": 1.0326, + "learning_rate": 1.94474401110103e-05, + "loss": 0.9541, "step": 4698 }, { - "epoch": 0.13334279228149828, + "epoch": 0.13315764119130608, "grad_norm": 0.0, - "learning_rate": 1.9445298123948425e-05, - "loss": 0.9444, + "learning_rate": 1.944713921162174e-05, + "loss": 1.0107, "step": 4699 }, { - "epoch": 0.1333711691259932, + "epoch": 0.13318597863356854, "grad_norm": 0.0, - "learning_rate": 1.944499623520812e-05, - "loss": 1.1225, + "learning_rate": 1.9446838232656426e-05, + "loss": 1.0943, "step": 4700 }, { - "epoch": 0.1333995459704881, + "epoch": 0.133214316075831, "grad_norm": 0.0, - "learning_rate": 1.944469426668551e-05, - "loss": 1.0154, + "learning_rate": 1.9446537174116877e-05, + "loss": 0.9699, "step": 4701 }, { - "epoch": 0.13342792281498297, + "epoch": 0.13324265351809345, "grad_norm": 0.0, - "learning_rate": 1.944439221838314e-05, - "loss": 1.0539, + "learning_rate": 1.9446236036005645e-05, + "loss": 1.0504, "step": 4702 }, { - "epoch": 0.13345629965947786, + "epoch": 0.1332709909603559, "grad_norm": 0.0, - "learning_rate": 1.9444090090303567e-05, - "loss": 0.9347, + "learning_rate": 1.9445934818325255e-05, + "loss": 1.094, "step": 4703 }, { - "epoch": 0.13348467650397275, + "epoch": 0.13329932840261838, "grad_norm": 0.0, - "learning_rate": 1.944378788244934e-05, - "loss": 1.0651, + "learning_rate": 1.9445633521078246e-05, + "loss": 1.0255, "step": 4704 }, { - "epoch": 0.13351305334846766, + "epoch": 0.13332766584488084, "grad_norm": 0.0, - "learning_rate": 1.9443485594823012e-05, - "loss": 0.9453, + "learning_rate": 1.9445332144267162e-05, + "loss": 1.0834, "step": 4705 }, { - "epoch": 0.13354143019296255, + "epoch": 0.1333560032871433, "grad_norm": 0.0, - "learning_rate": 1.9443183227427137e-05, - "loss": 1.0273, + "learning_rate": 1.9445030687894535e-05, + "loss": 1.1628, "step": 4706 }, { - "epoch": 0.13356980703745744, + "epoch": 0.13338434072940578, "grad_norm": 0.0, - "learning_rate": 1.9442880780264266e-05, - "loss": 1.0954, + "learning_rate": 1.944472915196291e-05, + "loss": 1.0743, "step": 4707 }, { - "epoch": 0.13359818388195233, + "epoch": 0.13341267817166821, "grad_norm": 0.0, - "learning_rate": 1.944257825333696e-05, - "loss": 0.9572, + "learning_rate": 1.9444427536474823e-05, + "loss": 1.0686, "step": 4708 }, { - "epoch": 0.1336265607264472, + "epoch": 0.13344101561393068, "grad_norm": 0.0, - "learning_rate": 1.9442275646647768e-05, - "loss": 1.0363, + "learning_rate": 1.9444125841432817e-05, + "loss": 1.0804, "step": 4709 }, { - "epoch": 0.1336549375709421, + "epoch": 0.13346935305619315, "grad_norm": 0.0, - "learning_rate": 1.944197296019925e-05, - "loss": 0.9055, + "learning_rate": 1.944382406683943e-05, + "loss": 1.0238, "step": 4710 }, { - "epoch": 0.13368331441543702, + "epoch": 0.1334976904984556, "grad_norm": 0.0, - "learning_rate": 1.9441670193993962e-05, - "loss": 1.0806, + "learning_rate": 1.9443522212697208e-05, + "loss": 1.0595, "step": 4711 }, { - "epoch": 0.1337116912599319, + "epoch": 0.13352602794071808, "grad_norm": 0.0, - "learning_rate": 1.9441367348034464e-05, - "loss": 1.01, + "learning_rate": 1.944322027900869e-05, + "loss": 1.0039, "step": 4712 }, { - "epoch": 0.1337400681044268, + "epoch": 0.13355436538298054, "grad_norm": 0.0, - "learning_rate": 1.944106442232331e-05, - "loss": 1.076, + "learning_rate": 1.9442918265776424e-05, + "loss": 1.0212, "step": 4713 }, { - "epoch": 0.13376844494892168, + "epoch": 0.13358270282524298, "grad_norm": 0.0, - "learning_rate": 1.944076141686306e-05, - "loss": 1.0958, + "learning_rate": 1.9442616173002945e-05, + "loss": 1.0783, "step": 4714 }, { - "epoch": 0.13379682179341656, + "epoch": 0.13361104026750545, "grad_norm": 0.0, - "learning_rate": 1.9440458331656272e-05, - "loss": 0.938, + "learning_rate": 1.944231400069081e-05, + "loss": 1.0482, "step": 4715 }, { - "epoch": 0.13382519863791145, + "epoch": 0.13363937770976791, "grad_norm": 0.0, - "learning_rate": 1.944015516670551e-05, - "loss": 1.0461, + "learning_rate": 1.944201174884255e-05, + "loss": 1.0509, "step": 4716 }, { - "epoch": 0.13385357548240637, + "epoch": 0.13366771515203038, "grad_norm": 0.0, - "learning_rate": 1.9439851922013333e-05, - "loss": 1.0711, + "learning_rate": 1.944170941746073e-05, + "loss": 1.1552, "step": 4717 }, { - "epoch": 0.13388195232690125, + "epoch": 0.13369605259429285, "grad_norm": 0.0, - "learning_rate": 1.9439548597582304e-05, - "loss": 0.9865, + "learning_rate": 1.9441407006547875e-05, + "loss": 1.1037, "step": 4718 }, { - "epoch": 0.13391032917139614, + "epoch": 0.1337243900365553, "grad_norm": 0.0, - "learning_rate": 1.9439245193414984e-05, - "loss": 0.9833, + "learning_rate": 1.944110451610655e-05, + "loss": 1.0044, "step": 4719 }, { - "epoch": 0.13393870601589103, + "epoch": 0.13375272747881775, "grad_norm": 0.0, - "learning_rate": 1.9438941709513933e-05, - "loss": 1.0398, + "learning_rate": 1.9440801946139293e-05, + "loss": 0.9925, "step": 4720 }, { - "epoch": 0.13396708286038592, + "epoch": 0.13378106492108022, "grad_norm": 0.0, - "learning_rate": 1.9438638145881718e-05, - "loss": 1.052, + "learning_rate": 1.9440499296648653e-05, + "loss": 1.0305, "step": 4721 }, { - "epoch": 0.1339954597048808, + "epoch": 0.13380940236334268, "grad_norm": 0.0, - "learning_rate": 1.94383345025209e-05, - "loss": 1.0258, + "learning_rate": 1.9440196567637188e-05, + "loss": 1.1067, "step": 4722 }, { - "epoch": 0.13402383654937572, + "epoch": 0.13383773980560515, "grad_norm": 0.0, - "learning_rate": 1.943803077943405e-05, - "loss": 0.9958, + "learning_rate": 1.9439893759107435e-05, + "loss": 1.0142, "step": 4723 }, { - "epoch": 0.1340522133938706, + "epoch": 0.13386607724786762, "grad_norm": 0.0, - "learning_rate": 1.9437726976623726e-05, - "loss": 1.0387, + "learning_rate": 1.9439590871061956e-05, + "loss": 0.9807, "step": 4724 }, { - "epoch": 0.1340805902383655, + "epoch": 0.13389441469013008, "grad_norm": 0.0, - "learning_rate": 1.9437423094092503e-05, - "loss": 0.998, + "learning_rate": 1.9439287903503295e-05, + "loss": 0.9465, "step": 4725 }, { - "epoch": 0.13410896708286038, + "epoch": 0.13392275213239252, "grad_norm": 0.0, - "learning_rate": 1.9437119131842937e-05, - "loss": 1.1107, + "learning_rate": 1.9438984856434008e-05, + "loss": 1.0223, "step": 4726 }, { - "epoch": 0.13413734392735527, + "epoch": 0.13395108957465499, "grad_norm": 0.0, - "learning_rate": 1.9436815089877607e-05, - "loss": 1.0124, + "learning_rate": 1.9438681729856648e-05, + "loss": 1.0444, "step": 4727 }, { - "epoch": 0.13416572077185018, + "epoch": 0.13397942701691745, "grad_norm": 0.0, - "learning_rate": 1.9436510968199072e-05, - "loss": 1.0897, + "learning_rate": 1.9438378523773763e-05, + "loss": 0.9922, "step": 4728 }, { - "epoch": 0.13419409761634507, + "epoch": 0.13400776445917992, "grad_norm": 0.0, - "learning_rate": 1.9436206766809906e-05, - "loss": 1.1339, + "learning_rate": 1.9438075238187916e-05, + "loss": 1.1235, "step": 4729 }, { - "epoch": 0.13422247446083996, + "epoch": 0.13403610190144238, "grad_norm": 0.0, - "learning_rate": 1.9435902485712677e-05, - "loss": 1.0872, + "learning_rate": 1.9437771873101653e-05, + "loss": 1.0863, "step": 4730 }, { - "epoch": 0.13425085130533485, + "epoch": 0.13406443934370485, "grad_norm": 0.0, - "learning_rate": 1.9435598124909955e-05, - "loss": 1.0993, + "learning_rate": 1.9437468428517533e-05, + "loss": 1.0628, "step": 4731 }, { - "epoch": 0.13427922814982973, + "epoch": 0.1340927767859673, "grad_norm": 0.0, - "learning_rate": 1.943529368440431e-05, - "loss": 1.0969, + "learning_rate": 1.9437164904438114e-05, + "loss": 1.0248, "step": 4732 }, { - "epoch": 0.13430760499432462, + "epoch": 0.13412111422822975, "grad_norm": 0.0, - "learning_rate": 1.943498916419832e-05, - "loss": 1.0626, + "learning_rate": 1.9436861300865947e-05, + "loss": 1.1146, "step": 4733 }, { - "epoch": 0.13433598183881953, + "epoch": 0.13414945167049222, "grad_norm": 0.0, - "learning_rate": 1.9434684564294547e-05, - "loss": 1.0873, + "learning_rate": 1.9436557617803594e-05, + "loss": 1.1315, "step": 4734 }, { - "epoch": 0.13436435868331442, + "epoch": 0.1341777891127547, "grad_norm": 0.0, - "learning_rate": 1.9434379884695573e-05, - "loss": 1.0288, + "learning_rate": 1.9436253855253612e-05, + "loss": 1.0934, "step": 4735 }, { - "epoch": 0.1343927355278093, + "epoch": 0.13420612655501715, "grad_norm": 0.0, - "learning_rate": 1.9434075125403966e-05, - "loss": 1.1635, + "learning_rate": 1.9435950013218564e-05, + "loss": 1.0674, "step": 4736 }, { - "epoch": 0.1344211123723042, + "epoch": 0.13423446399727962, "grad_norm": 0.0, - "learning_rate": 1.9433770286422305e-05, - "loss": 1.0417, + "learning_rate": 1.9435646091701e-05, + "loss": 1.0822, "step": 4737 }, { - "epoch": 0.13444948921679908, + "epoch": 0.13426280143954206, "grad_norm": 0.0, - "learning_rate": 1.9433465367753158e-05, - "loss": 0.8746, + "learning_rate": 1.9435342090703485e-05, + "loss": 0.981, "step": 4738 }, { - "epoch": 0.13447786606129397, + "epoch": 0.13429113888180452, "grad_norm": 0.0, - "learning_rate": 1.9433160369399108e-05, - "loss": 0.9302, + "learning_rate": 1.9435038010228584e-05, + "loss": 1.0672, "step": 4739 }, { - "epoch": 0.1345062429057889, + "epoch": 0.134319476324067, "grad_norm": 0.0, - "learning_rate": 1.943285529136273e-05, - "loss": 1.0304, + "learning_rate": 1.9434733850278854e-05, + "loss": 1.1739, "step": 4740 }, { - "epoch": 0.13453461975028377, + "epoch": 0.13434781376632945, "grad_norm": 0.0, - "learning_rate": 1.9432550133646594e-05, - "loss": 1.0749, + "learning_rate": 1.9434429610856852e-05, + "loss": 1.077, "step": 4741 }, { - "epoch": 0.13456299659477866, + "epoch": 0.13437615120859192, "grad_norm": 0.0, - "learning_rate": 1.9432244896253287e-05, - "loss": 0.9208, + "learning_rate": 1.943412529196515e-05, + "loss": 1.0451, "step": 4742 }, { - "epoch": 0.13459137343927355, + "epoch": 0.1344044886508544, "grad_norm": 0.0, - "learning_rate": 1.9431939579185384e-05, - "loss": 1.0308, + "learning_rate": 1.9433820893606307e-05, + "loss": 0.9973, "step": 4743 }, { - "epoch": 0.13461975028376844, + "epoch": 0.13443282609311683, "grad_norm": 0.0, - "learning_rate": 1.943163418244546e-05, - "loss": 0.9863, + "learning_rate": 1.9433516415782887e-05, + "loss": 1.0385, "step": 4744 }, { - "epoch": 0.13464812712826335, + "epoch": 0.1344611635353793, "grad_norm": 0.0, - "learning_rate": 1.9431328706036102e-05, - "loss": 1.1182, + "learning_rate": 1.9433211858497456e-05, + "loss": 1.1069, "step": 4745 }, { - "epoch": 0.13467650397275824, + "epoch": 0.13448950097764176, "grad_norm": 0.0, - "learning_rate": 1.9431023149959882e-05, - "loss": 0.9562, + "learning_rate": 1.9432907221752576e-05, + "loss": 1.0607, "step": 4746 }, { - "epoch": 0.13470488081725313, + "epoch": 0.13451783841990422, "grad_norm": 0.0, - "learning_rate": 1.9430717514219387e-05, - "loss": 1.0922, + "learning_rate": 1.9432602505550818e-05, + "loss": 1.088, "step": 4747 }, { - "epoch": 0.134733257661748, + "epoch": 0.1345461758621667, "grad_norm": 0.0, - "learning_rate": 1.94304117988172e-05, - "loss": 1.116, + "learning_rate": 1.9432297709894747e-05, + "loss": 0.9692, "step": 4748 }, { - "epoch": 0.1347616345062429, + "epoch": 0.13457451330442916, "grad_norm": 0.0, - "learning_rate": 1.94301060037559e-05, - "loss": 0.9255, + "learning_rate": 1.943199283478693e-05, + "loss": 0.9708, "step": 4749 }, { - "epoch": 0.1347900113507378, + "epoch": 0.1346028507466916, "grad_norm": 0.0, - "learning_rate": 1.9429800129038068e-05, - "loss": 0.9765, + "learning_rate": 1.9431687880229934e-05, + "loss": 1.0364, "step": 4750 }, { - "epoch": 0.1348183881952327, + "epoch": 0.13463118818895406, "grad_norm": 0.0, - "learning_rate": 1.942949417466629e-05, - "loss": 1.1655, + "learning_rate": 1.9431382846226327e-05, + "loss": 1.1621, "step": 4751 }, { - "epoch": 0.1348467650397276, + "epoch": 0.13465952563121653, "grad_norm": 0.0, - "learning_rate": 1.9429188140643154e-05, - "loss": 1.0732, + "learning_rate": 1.943107773277868e-05, + "loss": 1.0831, "step": 4752 }, { - "epoch": 0.13487514188422248, + "epoch": 0.134687863073479, "grad_norm": 0.0, - "learning_rate": 1.942888202697124e-05, - "loss": 0.9914, + "learning_rate": 1.9430772539889565e-05, + "loss": 1.0425, "step": 4753 }, { - "epoch": 0.13490351872871736, + "epoch": 0.13471620051574146, "grad_norm": 0.0, - "learning_rate": 1.9428575833653137e-05, - "loss": 0.9616, + "learning_rate": 1.943046726756155e-05, + "loss": 0.9913, "step": 4754 }, { - "epoch": 0.13493189557321225, + "epoch": 0.13474453795800392, "grad_norm": 0.0, - "learning_rate": 1.942826956069143e-05, - "loss": 1.0185, + "learning_rate": 1.943016191579721e-05, + "loss": 1.0685, "step": 4755 }, { - "epoch": 0.13496027241770714, + "epoch": 0.13477287540026636, "grad_norm": 0.0, - "learning_rate": 1.9427963208088704e-05, - "loss": 0.8133, + "learning_rate": 1.9429856484599107e-05, + "loss": 1.1194, "step": 4756 }, { - "epoch": 0.13498864926220205, + "epoch": 0.13480121284252883, "grad_norm": 0.0, - "learning_rate": 1.9427656775847553e-05, - "loss": 1.0311, + "learning_rate": 1.9429550973969828e-05, + "loss": 1.0394, "step": 4757 }, { - "epoch": 0.13501702610669694, + "epoch": 0.1348295502847913, "grad_norm": 0.0, - "learning_rate": 1.9427350263970557e-05, - "loss": 0.9747, + "learning_rate": 1.9429245383911937e-05, + "loss": 1.0578, "step": 4758 }, { - "epoch": 0.13504540295119183, + "epoch": 0.13485788772705376, "grad_norm": 0.0, - "learning_rate": 1.942704367246031e-05, - "loss": 1.0197, + "learning_rate": 1.942893971442801e-05, + "loss": 0.9493, "step": 4759 }, { - "epoch": 0.13507377979568672, + "epoch": 0.13488622516931623, "grad_norm": 0.0, - "learning_rate": 1.9426737001319402e-05, - "loss": 0.9315, + "learning_rate": 1.9428633965520625e-05, + "loss": 1.1262, "step": 4760 }, { - "epoch": 0.1351021566401816, + "epoch": 0.1349145626115787, "grad_norm": 0.0, - "learning_rate": 1.9426430250550424e-05, - "loss": 0.9854, + "learning_rate": 1.9428328137192353e-05, + "loss": 1.06, "step": 4761 }, { - "epoch": 0.1351305334846765, + "epoch": 0.13494290005384113, "grad_norm": 0.0, - "learning_rate": 1.9426123420155965e-05, - "loss": 1.0383, + "learning_rate": 1.942802222944577e-05, + "loss": 1.0501, "step": 4762 }, { - "epoch": 0.1351589103291714, + "epoch": 0.1349712374961036, "grad_norm": 0.0, - "learning_rate": 1.942581651013862e-05, - "loss": 0.9684, + "learning_rate": 1.9427716242283462e-05, + "loss": 1.1493, "step": 4763 }, { - "epoch": 0.1351872871736663, + "epoch": 0.13499957493836606, "grad_norm": 0.0, - "learning_rate": 1.9425509520500977e-05, - "loss": 1.1064, + "learning_rate": 1.9427410175707993e-05, + "loss": 1.0988, "step": 4764 }, { - "epoch": 0.13521566401816118, + "epoch": 0.13502791238062853, "grad_norm": 0.0, - "learning_rate": 1.942520245124563e-05, - "loss": 1.009, + "learning_rate": 1.942710402972195e-05, + "loss": 0.9501, "step": 4765 }, { - "epoch": 0.13524404086265607, + "epoch": 0.135056249822891, "grad_norm": 0.0, - "learning_rate": 1.9424895302375177e-05, - "loss": 0.9044, + "learning_rate": 1.9426797804327904e-05, + "loss": 1.0347, "step": 4766 }, { - "epoch": 0.13527241770715095, + "epoch": 0.13508458726515346, "grad_norm": 0.0, - "learning_rate": 1.9424588073892207e-05, - "loss": 1.0396, + "learning_rate": 1.9426491499528444e-05, + "loss": 1.0133, "step": 4767 }, { - "epoch": 0.13530079455164587, + "epoch": 0.1351129247074159, "grad_norm": 0.0, - "learning_rate": 1.942428076579932e-05, - "loss": 1.0208, + "learning_rate": 1.9426185115326147e-05, + "loss": 0.9282, "step": 4768 }, { - "epoch": 0.13532917139614076, + "epoch": 0.13514126214967837, "grad_norm": 0.0, - "learning_rate": 1.942397337809911e-05, - "loss": 0.9939, + "learning_rate": 1.942587865172359e-05, + "loss": 0.9998, "step": 4769 }, { - "epoch": 0.13535754824063564, + "epoch": 0.13516959959194083, "grad_norm": 0.0, - "learning_rate": 1.9423665910794175e-05, - "loss": 1.0417, + "learning_rate": 1.9425572108723356e-05, + "loss": 1.0591, "step": 4770 }, { - "epoch": 0.13538592508513053, + "epoch": 0.1351979370342033, "grad_norm": 0.0, - "learning_rate": 1.9423358363887105e-05, - "loss": 0.9853, + "learning_rate": 1.942526548632803e-05, + "loss": 1.0075, "step": 4771 }, { - "epoch": 0.13541430192962542, + "epoch": 0.13522627447646576, "grad_norm": 0.0, - "learning_rate": 1.9423050737380507e-05, - "loss": 0.9926, + "learning_rate": 1.942495878454019e-05, + "loss": 1.0555, "step": 4772 }, { - "epoch": 0.1354426787741203, + "epoch": 0.13525461191872823, "grad_norm": 0.0, - "learning_rate": 1.9422743031276977e-05, - "loss": 1.0235, + "learning_rate": 1.942465200336243e-05, + "loss": 1.0147, "step": 4773 }, { - "epoch": 0.13547105561861522, + "epoch": 0.13528294936099067, "grad_norm": 0.0, - "learning_rate": 1.942243524557911e-05, - "loss": 1.024, + "learning_rate": 1.942434514279732e-05, + "loss": 1.1539, "step": 4774 }, { - "epoch": 0.1354994324631101, + "epoch": 0.13531128680325313, "grad_norm": 0.0, - "learning_rate": 1.9422127380289508e-05, - "loss": 1.0119, + "learning_rate": 1.942403820284745e-05, + "loss": 1.1456, "step": 4775 }, { - "epoch": 0.135527809307605, + "epoch": 0.1353396242455156, "grad_norm": 0.0, - "learning_rate": 1.9421819435410778e-05, - "loss": 0.9802, + "learning_rate": 1.9423731183515407e-05, + "loss": 1.1013, "step": 4776 }, { - "epoch": 0.13555618615209988, + "epoch": 0.13536796168777807, "grad_norm": 0.0, - "learning_rate": 1.942151141094551e-05, - "loss": 1.0577, + "learning_rate": 1.942342408480378e-05, + "loss": 1.1077, "step": 4777 }, { - "epoch": 0.13558456299659477, + "epoch": 0.13539629913004053, "grad_norm": 0.0, - "learning_rate": 1.942120330689631e-05, - "loss": 1.043, + "learning_rate": 1.942311690671515e-05, + "loss": 1.0925, "step": 4778 }, { - "epoch": 0.13561293984108966, + "epoch": 0.135424636572303, "grad_norm": 0.0, - "learning_rate": 1.942089512326579e-05, - "loss": 1.13, + "learning_rate": 1.942280964925211e-05, + "loss": 0.9397, "step": 4779 }, { - "epoch": 0.13564131668558457, + "epoch": 0.13545297401456544, "grad_norm": 0.0, - "learning_rate": 1.942058686005654e-05, - "loss": 0.9643, + "learning_rate": 1.9422502312417245e-05, + "loss": 0.9995, "step": 4780 }, { - "epoch": 0.13566969353007946, + "epoch": 0.1354813114568279, "grad_norm": 0.0, - "learning_rate": 1.942027851727117e-05, - "loss": 0.95, + "learning_rate": 1.942219489621314e-05, + "loss": 1.0461, "step": 4781 }, { - "epoch": 0.13569807037457435, + "epoch": 0.13550964889909037, "grad_norm": 0.0, - "learning_rate": 1.9419970094912283e-05, - "loss": 1.0923, + "learning_rate": 1.9421887400642392e-05, + "loss": 0.977, "step": 4782 }, { - "epoch": 0.13572644721906924, + "epoch": 0.13553798634135283, "grad_norm": 0.0, - "learning_rate": 1.9419661592982487e-05, - "loss": 1.1141, + "learning_rate": 1.9421579825707585e-05, + "loss": 1.0617, "step": 4783 }, { - "epoch": 0.13575482406356412, + "epoch": 0.1355663237836153, "grad_norm": 0.0, - "learning_rate": 1.941935301148439e-05, - "loss": 1.061, + "learning_rate": 1.9421272171411316e-05, + "loss": 1.0804, "step": 4784 }, { - "epoch": 0.13578320090805904, + "epoch": 0.13559466122587774, "grad_norm": 0.0, - "learning_rate": 1.9419044350420585e-05, - "loss": 0.9884, + "learning_rate": 1.9420964437756172e-05, + "loss": 1.0134, "step": 4785 }, { - "epoch": 0.13581157775255392, + "epoch": 0.1356229986681402, "grad_norm": 0.0, - "learning_rate": 1.9418735609793696e-05, - "loss": 1.0316, + "learning_rate": 1.9420656624744744e-05, + "loss": 1.0878, "step": 4786 }, { - "epoch": 0.1358399545970488, + "epoch": 0.13565133611040267, "grad_norm": 0.0, - "learning_rate": 1.941842678960632e-05, - "loss": 0.8896, + "learning_rate": 1.942034873237963e-05, + "loss": 1.0418, "step": 4787 }, { - "epoch": 0.1358683314415437, + "epoch": 0.13567967355266514, "grad_norm": 0.0, - "learning_rate": 1.9418117889861074e-05, - "loss": 1.1373, + "learning_rate": 1.942004076066342e-05, + "loss": 1.0193, "step": 4788 }, { - "epoch": 0.1358967082860386, + "epoch": 0.1357080109949276, "grad_norm": 0.0, - "learning_rate": 1.941780891056056e-05, - "loss": 1.0101, + "learning_rate": 1.9419732709598708e-05, + "loss": 1.0612, "step": 4789 }, { - "epoch": 0.13592508513053347, + "epoch": 0.13573634843719007, "grad_norm": 0.0, - "learning_rate": 1.9417499851707392e-05, - "loss": 0.9325, + "learning_rate": 1.941942457918809e-05, + "loss": 1.0593, "step": 4790 }, { - "epoch": 0.1359534619750284, + "epoch": 0.1357646858794525, "grad_norm": 0.0, - "learning_rate": 1.941719071330418e-05, - "loss": 1.0112, + "learning_rate": 1.9419116369434157e-05, + "loss": 1.1146, "step": 4791 }, { - "epoch": 0.13598183881952328, + "epoch": 0.13579302332171497, "grad_norm": 0.0, - "learning_rate": 1.9416881495353533e-05, - "loss": 0.9467, + "learning_rate": 1.9418808080339513e-05, + "loss": 1.0151, "step": 4792 }, { - "epoch": 0.13601021566401816, + "epoch": 0.13582136076397744, "grad_norm": 0.0, - "learning_rate": 1.9416572197858063e-05, - "loss": 1.1531, + "learning_rate": 1.941849971190675e-05, + "loss": 1.0741, "step": 4793 }, { - "epoch": 0.13603859250851305, + "epoch": 0.1358496982062399, "grad_norm": 0.0, - "learning_rate": 1.9416262820820386e-05, - "loss": 0.9598, + "learning_rate": 1.9418191264138468e-05, + "loss": 1.0916, "step": 4794 }, { - "epoch": 0.13606696935300794, + "epoch": 0.13587803564850237, "grad_norm": 0.0, - "learning_rate": 1.9415953364243117e-05, - "loss": 1.0335, + "learning_rate": 1.9417882737037262e-05, + "loss": 1.0094, "step": 4795 }, { - "epoch": 0.13609534619750283, + "epoch": 0.13590637309076484, "grad_norm": 0.0, - "learning_rate": 1.941564382812886e-05, - "loss": 1.0066, + "learning_rate": 1.9417574130605732e-05, + "loss": 1.0872, "step": 4796 }, { - "epoch": 0.13612372304199774, + "epoch": 0.13593471053302728, "grad_norm": 0.0, - "learning_rate": 1.941533421248024e-05, - "loss": 1.0315, + "learning_rate": 1.9417265444846476e-05, + "loss": 0.9607, "step": 4797 }, { - "epoch": 0.13615209988649263, + "epoch": 0.13596304797528974, "grad_norm": 0.0, - "learning_rate": 1.9415024517299872e-05, - "loss": 1.0112, + "learning_rate": 1.94169566797621e-05, + "loss": 0.9902, "step": 4798 }, { - "epoch": 0.13618047673098752, + "epoch": 0.1359913854175522, "grad_norm": 0.0, - "learning_rate": 1.9414714742590363e-05, - "loss": 1.044, + "learning_rate": 1.94166478353552e-05, + "loss": 1.0333, "step": 4799 }, { - "epoch": 0.1362088535754824, + "epoch": 0.13601972285981467, "grad_norm": 0.0, - "learning_rate": 1.9414404888354338e-05, - "loss": 0.8898, + "learning_rate": 1.9416338911628377e-05, + "loss": 0.9603, "step": 4800 }, { - "epoch": 0.1362372304199773, + "epoch": 0.13604806030207714, "grad_norm": 0.0, - "learning_rate": 1.9414094954594413e-05, - "loss": 1.001, + "learning_rate": 1.941602990858424e-05, + "loss": 1.0323, "step": 4801 }, { - "epoch": 0.13626560726447218, + "epoch": 0.1360763977443396, "grad_norm": 0.0, - "learning_rate": 1.9413784941313203e-05, - "loss": 1.0451, + "learning_rate": 1.9415720826225382e-05, + "loss": 1.0159, "step": 4802 }, { - "epoch": 0.1362939841089671, + "epoch": 0.13610473518660204, "grad_norm": 0.0, - "learning_rate": 1.9413474848513326e-05, - "loss": 1.0649, + "learning_rate": 1.941541166455441e-05, + "loss": 1.0603, "step": 4803 }, { - "epoch": 0.13632236095346198, + "epoch": 0.1361330726288645, "grad_norm": 0.0, - "learning_rate": 1.941316467619741e-05, - "loss": 1.078, + "learning_rate": 1.941510242357393e-05, + "loss": 1.0528, "step": 4804 }, { - "epoch": 0.13635073779795687, + "epoch": 0.13616141007112698, "grad_norm": 0.0, - "learning_rate": 1.9412854424368063e-05, - "loss": 1.0681, + "learning_rate": 1.9414793103286547e-05, + "loss": 1.2096, "step": 4805 }, { - "epoch": 0.13637911464245175, + "epoch": 0.13618974751338944, "grad_norm": 0.0, - "learning_rate": 1.9412544093027915e-05, - "loss": 1.0967, + "learning_rate": 1.9414483703694866e-05, + "loss": 1.1096, "step": 4806 }, { - "epoch": 0.13640749148694664, + "epoch": 0.1362180849556519, "grad_norm": 0.0, - "learning_rate": 1.941223368217958e-05, - "loss": 0.9809, + "learning_rate": 1.941417422480149e-05, + "loss": 1.0992, "step": 4807 }, { - "epoch": 0.13643586833144156, + "epoch": 0.13624642239791437, "grad_norm": 0.0, - "learning_rate": 1.9411923191825687e-05, - "loss": 0.8885, + "learning_rate": 1.9413864666609036e-05, + "loss": 1.0316, "step": 4808 }, { - "epoch": 0.13646424517593644, + "epoch": 0.1362747598401768, "grad_norm": 0.0, - "learning_rate": 1.9411612621968855e-05, - "loss": 0.985, + "learning_rate": 1.94135550291201e-05, + "loss": 0.9832, "step": 4809 }, { - "epoch": 0.13649262202043133, + "epoch": 0.13630309728243928, "grad_norm": 0.0, - "learning_rate": 1.941130197261171e-05, - "loss": 0.9578, + "learning_rate": 1.941324531233729e-05, + "loss": 1.0151, "step": 4810 }, { - "epoch": 0.13652099886492622, + "epoch": 0.13633143472470174, "grad_norm": 0.0, - "learning_rate": 1.941099124375687e-05, - "loss": 0.9689, + "learning_rate": 1.941293551626322e-05, + "loss": 1.1024, "step": 4811 }, { - "epoch": 0.1365493757094211, + "epoch": 0.1363597721669642, "grad_norm": 0.0, - "learning_rate": 1.941068043540697e-05, - "loss": 1.0014, + "learning_rate": 1.9412625640900503e-05, + "loss": 0.9457, "step": 4812 }, { - "epoch": 0.136577752553916, + "epoch": 0.13638810960922668, "grad_norm": 0.0, - "learning_rate": 1.9410369547564623e-05, - "loss": 1.0307, + "learning_rate": 1.9412315686251743e-05, + "loss": 1.0588, "step": 4813 }, { - "epoch": 0.1366061293984109, + "epoch": 0.13641644705148914, "grad_norm": 0.0, - "learning_rate": 1.9410058580232464e-05, - "loss": 1.1096, + "learning_rate": 1.9412005652319555e-05, + "loss": 1.0482, "step": 4814 }, { - "epoch": 0.1366345062429058, + "epoch": 0.13644478449375158, "grad_norm": 0.0, - "learning_rate": 1.940974753341312e-05, - "loss": 1.1026, + "learning_rate": 1.9411695539106546e-05, + "loss": 1.1101, "step": 4815 }, { - "epoch": 0.13666288308740068, + "epoch": 0.13647312193601405, "grad_norm": 0.0, - "learning_rate": 1.940943640710921e-05, - "loss": 0.992, + "learning_rate": 1.941138534661533e-05, + "loss": 1.0817, "step": 4816 }, { - "epoch": 0.13669125993189557, + "epoch": 0.1365014593782765, "grad_norm": 0.0, - "learning_rate": 1.9409125201323372e-05, - "loss": 1.0599, + "learning_rate": 1.9411075074848523e-05, + "loss": 0.9899, "step": 4817 }, { - "epoch": 0.13671963677639046, + "epoch": 0.13652979682053898, "grad_norm": 0.0, - "learning_rate": 1.940881391605823e-05, - "loss": 1.1032, + "learning_rate": 1.941076472380873e-05, + "loss": 1.0656, "step": 4818 }, { - "epoch": 0.13674801362088534, + "epoch": 0.13655813426280144, "grad_norm": 0.0, - "learning_rate": 1.9408502551316413e-05, - "loss": 1.0869, + "learning_rate": 1.9410454293498577e-05, + "loss": 1.1714, "step": 4819 }, { - "epoch": 0.13677639046538026, + "epoch": 0.1365864717050639, "grad_norm": 0.0, - "learning_rate": 1.9408191107100554e-05, - "loss": 1.0371, + "learning_rate": 1.941014378392067e-05, + "loss": 0.9513, "step": 4820 }, { - "epoch": 0.13680476730987515, + "epoch": 0.13661480914732635, "grad_norm": 0.0, - "learning_rate": 1.940787958341328e-05, - "loss": 1.0125, + "learning_rate": 1.9409833195077633e-05, + "loss": 1.02, "step": 4821 }, { - "epoch": 0.13683314415437003, + "epoch": 0.13664314658958882, "grad_norm": 0.0, - "learning_rate": 1.940756798025722e-05, - "loss": 1.1152, + "learning_rate": 1.940952252697207e-05, + "loss": 1.036, "step": 4822 }, { - "epoch": 0.13686152099886492, + "epoch": 0.13667148403185128, "grad_norm": 0.0, - "learning_rate": 1.9407256297635017e-05, - "loss": 0.9961, + "learning_rate": 1.9409211779606608e-05, + "loss": 1.0063, "step": 4823 }, { - "epoch": 0.1368898978433598, + "epoch": 0.13669982147411375, "grad_norm": 0.0, - "learning_rate": 1.9406944535549296e-05, - "loss": 0.9787, + "learning_rate": 1.940890095298386e-05, + "loss": 0.9593, "step": 4824 }, { - "epoch": 0.13691827468785472, + "epoch": 0.1367281589163762, "grad_norm": 0.0, - "learning_rate": 1.940663269400269e-05, - "loss": 0.9865, + "learning_rate": 1.9408590047106445e-05, + "loss": 1.0089, "step": 4825 }, { - "epoch": 0.1369466515323496, + "epoch": 0.13675649635863868, "grad_norm": 0.0, - "learning_rate": 1.940632077299783e-05, - "loss": 0.9302, + "learning_rate": 1.9408279061976985e-05, + "loss": 1.1095, "step": 4826 }, { - "epoch": 0.1369750283768445, + "epoch": 0.13678483380090112, "grad_norm": 0.0, - "learning_rate": 1.9406008772537364e-05, - "loss": 0.9564, + "learning_rate": 1.9407967997598093e-05, + "loss": 1.0142, "step": 4827 }, { - "epoch": 0.13700340522133939, + "epoch": 0.13681317124316358, "grad_norm": 0.0, - "learning_rate": 1.9405696692623915e-05, - "loss": 1.0092, + "learning_rate": 1.9407656853972394e-05, + "loss": 1.0803, "step": 4828 }, { - "epoch": 0.13703178206583427, + "epoch": 0.13684150868542605, "grad_norm": 0.0, - "learning_rate": 1.9405384533260124e-05, - "loss": 0.9861, + "learning_rate": 1.940734563110251e-05, + "loss": 1.0058, "step": 4829 }, { - "epoch": 0.13706015891032916, + "epoch": 0.13686984612768852, "grad_norm": 0.0, - "learning_rate": 1.9405072294448627e-05, - "loss": 1.0842, + "learning_rate": 1.9407034328991058e-05, + "loss": 1.2018, "step": 4830 }, { - "epoch": 0.13708853575482408, + "epoch": 0.13689818356995098, "grad_norm": 0.0, - "learning_rate": 1.9404759976192064e-05, - "loss": 1.0547, + "learning_rate": 1.9406722947640663e-05, + "loss": 1.0748, "step": 4831 }, { - "epoch": 0.13711691259931896, + "epoch": 0.13692652101221345, "grad_norm": 0.0, - "learning_rate": 1.9404447578493063e-05, - "loss": 1.1016, + "learning_rate": 1.940641148705395e-05, + "loss": 0.9988, "step": 4832 }, { - "epoch": 0.13714528944381385, + "epoch": 0.1369548584544759, "grad_norm": 0.0, - "learning_rate": 1.9404135101354277e-05, - "loss": 0.9909, + "learning_rate": 1.9406099947233537e-05, + "loss": 1.0222, "step": 4833 }, { - "epoch": 0.13717366628830874, + "epoch": 0.13698319589673835, "grad_norm": 0.0, - "learning_rate": 1.940382254477834e-05, - "loss": 1.1479, + "learning_rate": 1.9405788328182052e-05, + "loss": 0.9873, "step": 4834 }, { - "epoch": 0.13720204313280362, + "epoch": 0.13701153333900082, "grad_norm": 0.0, - "learning_rate": 1.940350990876789e-05, - "loss": 0.9308, + "learning_rate": 1.9405476629902123e-05, + "loss": 1.027, "step": 4835 }, { - "epoch": 0.1372304199772985, + "epoch": 0.13703987078126328, "grad_norm": 0.0, - "learning_rate": 1.9403197193325566e-05, - "loss": 1.0094, + "learning_rate": 1.9405164852396367e-05, + "loss": 0.9516, "step": 4836 }, { - "epoch": 0.13725879682179343, + "epoch": 0.13706820822352575, "grad_norm": 0.0, - "learning_rate": 1.9402884398454015e-05, - "loss": 1.0241, + "learning_rate": 1.9404852995667416e-05, + "loss": 1.0952, "step": 4837 }, { - "epoch": 0.13728717366628831, + "epoch": 0.13709654566578822, "grad_norm": 0.0, - "learning_rate": 1.9402571524155877e-05, - "loss": 1.018, + "learning_rate": 1.9404541059717895e-05, + "loss": 0.9631, "step": 4838 }, { - "epoch": 0.1373155505107832, + "epoch": 0.13712488310805065, "grad_norm": 0.0, - "learning_rate": 1.9402258570433794e-05, - "loss": 1.0446, + "learning_rate": 1.9404229044550432e-05, + "loss": 1.1146, "step": 4839 }, { - "epoch": 0.1373439273552781, + "epoch": 0.13715322055031312, "grad_norm": 0.0, - "learning_rate": 1.940194553729041e-05, - "loss": 0.9459, + "learning_rate": 1.940391695016766e-05, + "loss": 1.1016, "step": 4840 }, { - "epoch": 0.13737230419977298, + "epoch": 0.1371815579925756, "grad_norm": 0.0, - "learning_rate": 1.940163242472837e-05, - "loss": 0.9656, + "learning_rate": 1.94036047765722e-05, + "loss": 1.1556, "step": 4841 }, { - "epoch": 0.13740068104426786, + "epoch": 0.13720989543483805, "grad_norm": 0.0, - "learning_rate": 1.9401319232750317e-05, - "loss": 0.9288, + "learning_rate": 1.9403292523766685e-05, + "loss": 1.0116, "step": 4842 }, { - "epoch": 0.13742905788876278, + "epoch": 0.13723823287710052, "grad_norm": 0.0, - "learning_rate": 1.9401005961358898e-05, - "loss": 1.0375, + "learning_rate": 1.9402980191753747e-05, + "loss": 1.0494, "step": 4843 }, { - "epoch": 0.13745743473325767, + "epoch": 0.13726657031936298, "grad_norm": 0.0, - "learning_rate": 1.940069261055676e-05, - "loss": 1.1296, + "learning_rate": 1.9402667780536012e-05, + "loss": 1.0347, "step": 4844 }, { - "epoch": 0.13748581157775255, + "epoch": 0.13729490776162542, "grad_norm": 0.0, - "learning_rate": 1.940037918034655e-05, - "loss": 1.0042, + "learning_rate": 1.9402355290116116e-05, + "loss": 1.0217, "step": 4845 }, { - "epoch": 0.13751418842224744, + "epoch": 0.1373232452038879, "grad_norm": 0.0, - "learning_rate": 1.940006567073091e-05, - "loss": 1.0621, + "learning_rate": 1.940204272049669e-05, + "loss": 1.1601, "step": 4846 }, { - "epoch": 0.13754256526674233, + "epoch": 0.13735158264615036, "grad_norm": 0.0, - "learning_rate": 1.9399752081712498e-05, - "loss": 1.0551, + "learning_rate": 1.940173007168037e-05, + "loss": 0.9704, "step": 4847 }, { - "epoch": 0.13757094211123724, + "epoch": 0.13737992008841282, "grad_norm": 0.0, - "learning_rate": 1.9399438413293955e-05, - "loss": 0.9393, + "learning_rate": 1.940141734366978e-05, + "loss": 0.9824, "step": 4848 }, { - "epoch": 0.13759931895573213, + "epoch": 0.1374082575306753, "grad_norm": 0.0, - "learning_rate": 1.9399124665477934e-05, - "loss": 1.0757, + "learning_rate": 1.9401104536467566e-05, + "loss": 1.0363, "step": 4849 }, { - "epoch": 0.13762769580022702, + "epoch": 0.13743659497293775, "grad_norm": 0.0, - "learning_rate": 1.9398810838267087e-05, - "loss": 1.0601, + "learning_rate": 1.9400791650076355e-05, + "loss": 1.0278, "step": 4850 }, { - "epoch": 0.1376560726447219, + "epoch": 0.1374649324152002, "grad_norm": 0.0, - "learning_rate": 1.9398496931664058e-05, - "loss": 1.0182, + "learning_rate": 1.9400478684498788e-05, + "loss": 1.0362, "step": 4851 }, { - "epoch": 0.1376844494892168, + "epoch": 0.13749326985746266, "grad_norm": 0.0, - "learning_rate": 1.9398182945671507e-05, - "loss": 1.1577, + "learning_rate": 1.9400165639737495e-05, + "loss": 1.0326, "step": 4852 }, { - "epoch": 0.13771282633371168, + "epoch": 0.13752160729972512, "grad_norm": 0.0, - "learning_rate": 1.9397868880292077e-05, - "loss": 1.0511, + "learning_rate": 1.9399852515795115e-05, + "loss": 0.9615, "step": 4853 }, { - "epoch": 0.1377412031782066, + "epoch": 0.1375499447419876, "grad_norm": 0.0, - "learning_rate": 1.939755473552843e-05, - "loss": 1.0151, + "learning_rate": 1.939953931267429e-05, + "loss": 1.1088, "step": 4854 }, { - "epoch": 0.13776958002270148, + "epoch": 0.13757828218425006, "grad_norm": 0.0, - "learning_rate": 1.9397240511383213e-05, - "loss": 0.9753, + "learning_rate": 1.9399226030377654e-05, + "loss": 1.0537, "step": 4855 }, { - "epoch": 0.13779795686719637, + "epoch": 0.13760661962651252, "grad_norm": 0.0, - "learning_rate": 1.9396926207859085e-05, - "loss": 1.0366, + "learning_rate": 1.939891266890785e-05, + "loss": 0.9786, "step": 4856 }, { - "epoch": 0.13782633371169126, + "epoch": 0.13763495706877496, "grad_norm": 0.0, - "learning_rate": 1.9396611824958696e-05, - "loss": 1.0108, + "learning_rate": 1.939859922826751e-05, + "loss": 0.8695, "step": 4857 }, { - "epoch": 0.13785471055618614, + "epoch": 0.13766329451103743, "grad_norm": 0.0, - "learning_rate": 1.939629736268471e-05, - "loss": 0.978, + "learning_rate": 1.9398285708459278e-05, + "loss": 1.0629, "step": 4858 }, { - "epoch": 0.13788308740068103, + "epoch": 0.1376916319532999, "grad_norm": 0.0, - "learning_rate": 1.9395982821039772e-05, + "learning_rate": 1.9397972109485798e-05, "loss": 1.0273, "step": 4859 }, { - "epoch": 0.13791146424517595, + "epoch": 0.13771996939556236, "grad_norm": 0.0, - "learning_rate": 1.939566820002655e-05, - "loss": 1.1221, + "learning_rate": 1.939765843134971e-05, + "loss": 1.0376, "step": 4860 }, { - "epoch": 0.13793984108967083, + "epoch": 0.13774830683782482, "grad_norm": 0.0, - "learning_rate": 1.9395353499647692e-05, - "loss": 1.0351, + "learning_rate": 1.9397344674053653e-05, + "loss": 0.9173, "step": 4861 }, { - "epoch": 0.13796821793416572, + "epoch": 0.1377766442800873, "grad_norm": 0.0, - "learning_rate": 1.9395038719905862e-05, - "loss": 1.0343, + "learning_rate": 1.9397030837600273e-05, + "loss": 1.0525, "step": 4862 }, { - "epoch": 0.1379965947786606, + "epoch": 0.13780498172234973, "grad_norm": 0.0, - "learning_rate": 1.939472386080372e-05, - "loss": 1.11, + "learning_rate": 1.9396716921992213e-05, + "loss": 1.1209, "step": 4863 }, { - "epoch": 0.1380249716231555, + "epoch": 0.1378333191646122, "grad_norm": 0.0, - "learning_rate": 1.939440892234392e-05, - "loss": 1.0529, + "learning_rate": 1.9396402927232115e-05, + "loss": 0.9268, "step": 4864 }, { - "epoch": 0.1380533484676504, + "epoch": 0.13786165660687466, "grad_norm": 0.0, - "learning_rate": 1.939409390452913e-05, - "loss": 1.1329, + "learning_rate": 1.9396088853322627e-05, + "loss": 1.066, "step": 4865 }, { - "epoch": 0.1380817253121453, + "epoch": 0.13788999404913713, "grad_norm": 0.0, - "learning_rate": 1.9393778807362e-05, - "loss": 0.9935, + "learning_rate": 1.9395774700266394e-05, + "loss": 0.9684, "step": 4866 }, { - "epoch": 0.13811010215664019, + "epoch": 0.1379183314913996, "grad_norm": 0.0, - "learning_rate": 1.93934636308452e-05, - "loss": 1.025, + "learning_rate": 1.939546046806606e-05, + "loss": 0.9358, "step": 4867 }, { - "epoch": 0.13813847900113507, + "epoch": 0.13794666893366206, "grad_norm": 0.0, - "learning_rate": 1.9393148374981395e-05, - "loss": 1.0944, + "learning_rate": 1.9395146156724276e-05, + "loss": 1.1264, "step": 4868 }, { - "epoch": 0.13816685584562996, + "epoch": 0.1379750063759245, "grad_norm": 0.0, - "learning_rate": 1.939283303977324e-05, - "loss": 0.903, + "learning_rate": 1.9394831766243688e-05, + "loss": 1.0161, "step": 4869 }, { - "epoch": 0.13819523269012485, + "epoch": 0.13800334381818696, "grad_norm": 0.0, - "learning_rate": 1.93925176252234e-05, - "loss": 1.1352, + "learning_rate": 1.939451729662694e-05, + "loss": 1.0804, "step": 4870 }, { - "epoch": 0.13822360953461976, + "epoch": 0.13803168126044943, "grad_norm": 0.0, - "learning_rate": 1.9392202131334545e-05, - "loss": 1.0302, + "learning_rate": 1.9394202747876686e-05, + "loss": 1.0587, "step": 4871 }, { - "epoch": 0.13825198637911465, + "epoch": 0.1380600187027119, "grad_norm": 0.0, - "learning_rate": 1.9391886558109333e-05, - "loss": 1.0933, + "learning_rate": 1.939388811999557e-05, + "loss": 1.0404, "step": 4872 }, { - "epoch": 0.13828036322360954, + "epoch": 0.13808835614497436, "grad_norm": 0.0, - "learning_rate": 1.939157090555044e-05, - "loss": 1.1042, + "learning_rate": 1.9393573412986254e-05, + "loss": 1.0667, "step": 4873 }, { - "epoch": 0.13830874006810442, + "epoch": 0.13811669358723683, "grad_norm": 0.0, - "learning_rate": 1.9391255173660516e-05, - "loss": 1.0828, + "learning_rate": 1.9393258626851376e-05, + "loss": 1.0203, "step": 4874 }, { - "epoch": 0.1383371169125993, + "epoch": 0.13814503102949927, "grad_norm": 0.0, - "learning_rate": 1.939093936244224e-05, - "loss": 0.9267, + "learning_rate": 1.939294376159359e-05, + "loss": 0.9898, "step": 4875 }, { - "epoch": 0.1383654937570942, + "epoch": 0.13817336847176173, "grad_norm": 0.0, - "learning_rate": 1.939062347189828e-05, - "loss": 1.0731, + "learning_rate": 1.9392628817215556e-05, + "loss": 1.1429, "step": 4876 }, { - "epoch": 0.13839387060158911, + "epoch": 0.1382017059140242, "grad_norm": 0.0, - "learning_rate": 1.9390307502031304e-05, - "loss": 1.1428, + "learning_rate": 1.939231379371992e-05, + "loss": 1.0543, "step": 4877 }, { - "epoch": 0.138422247446084, + "epoch": 0.13823004335628666, "grad_norm": 0.0, - "learning_rate": 1.9389991452843974e-05, - "loss": 1.0053, + "learning_rate": 1.9391998691109335e-05, + "loss": 1.0308, "step": 4878 }, { - "epoch": 0.1384506242905789, + "epoch": 0.13825838079854913, "grad_norm": 0.0, - "learning_rate": 1.9389675324338965e-05, - "loss": 1.05, + "learning_rate": 1.9391683509386457e-05, + "loss": 0.9559, "step": 4879 }, { - "epoch": 0.13847900113507378, + "epoch": 0.1382867182408116, "grad_norm": 0.0, - "learning_rate": 1.9389359116518945e-05, - "loss": 1.0345, + "learning_rate": 1.9391368248553946e-05, + "loss": 1.0622, "step": 4880 }, { - "epoch": 0.13850737797956866, + "epoch": 0.13831505568307403, "grad_norm": 0.0, - "learning_rate": 1.938904282938659e-05, - "loss": 0.9975, + "learning_rate": 1.9391052908614448e-05, + "loss": 1.0405, "step": 4881 }, { - "epoch": 0.13853575482406355, + "epoch": 0.1383433931253365, "grad_norm": 0.0, - "learning_rate": 1.9388726462944564e-05, - "loss": 1.0972, + "learning_rate": 1.939073748957063e-05, + "loss": 1.0959, "step": 4882 }, { - "epoch": 0.13856413166855847, + "epoch": 0.13837173056759897, "grad_norm": 0.0, - "learning_rate": 1.9388410017195544e-05, - "loss": 0.9411, + "learning_rate": 1.9390421991425137e-05, + "loss": 0.9074, "step": 4883 }, { - "epoch": 0.13859250851305335, + "epoch": 0.13840006800986143, "grad_norm": 0.0, - "learning_rate": 1.9388093492142205e-05, - "loss": 1.0124, + "learning_rate": 1.9390106414180635e-05, + "loss": 0.9335, "step": 4884 }, { - "epoch": 0.13862088535754824, + "epoch": 0.1384284054521239, "grad_norm": 0.0, - "learning_rate": 1.9387776887787218e-05, - "loss": 1.0184, + "learning_rate": 1.9389790757839776e-05, + "loss": 1.1161, "step": 4885 }, { - "epoch": 0.13864926220204313, + "epoch": 0.13845674289438636, "grad_norm": 0.0, - "learning_rate": 1.9387460204133254e-05, - "loss": 1.0117, + "learning_rate": 1.9389475022405227e-05, + "loss": 0.9531, "step": 4886 }, { - "epoch": 0.13867763904653801, + "epoch": 0.1384850803366488, "grad_norm": 0.0, - "learning_rate": 1.9387143441183e-05, - "loss": 1.0162, + "learning_rate": 1.9389159207879644e-05, + "loss": 0.9588, "step": 4887 }, { - "epoch": 0.13870601589103293, + "epoch": 0.13851341777891127, "grad_norm": 0.0, - "learning_rate": 1.9386826598939114e-05, - "loss": 1.0798, + "learning_rate": 1.9388843314265684e-05, + "loss": 0.9849, "step": 4888 }, { - "epoch": 0.13873439273552782, + "epoch": 0.13854175522117373, "grad_norm": 0.0, - "learning_rate": 1.9386509677404286e-05, - "loss": 1.0046, + "learning_rate": 1.9388527341566012e-05, + "loss": 1.0717, "step": 4889 }, { - "epoch": 0.1387627695800227, + "epoch": 0.1385700926634362, "grad_norm": 0.0, - "learning_rate": 1.938619267658119e-05, - "loss": 1.0789, + "learning_rate": 1.9388211289783285e-05, + "loss": 1.0554, "step": 4890 }, { - "epoch": 0.1387911464245176, + "epoch": 0.13859843010569867, "grad_norm": 0.0, - "learning_rate": 1.9385875596472502e-05, - "loss": 1.0068, + "learning_rate": 1.938789515892017e-05, + "loss": 1.0407, "step": 4891 }, { - "epoch": 0.13881952326901248, + "epoch": 0.13862676754796113, "grad_norm": 0.0, - "learning_rate": 1.9385558437080898e-05, - "loss": 1.0023, + "learning_rate": 1.9387578948979326e-05, + "loss": 0.9103, "step": 4892 }, { - "epoch": 0.13884790011350737, + "epoch": 0.13865510499022357, "grad_norm": 0.0, - "learning_rate": 1.938524119840906e-05, - "loss": 0.9032, + "learning_rate": 1.9387262659963423e-05, + "loss": 1.099, "step": 4893 }, { - "epoch": 0.13887627695800228, + "epoch": 0.13868344243248604, "grad_norm": 0.0, - "learning_rate": 1.938492388045967e-05, - "loss": 1.0584, + "learning_rate": 1.9386946291875117e-05, + "loss": 0.9285, "step": 4894 }, { - "epoch": 0.13890465380249717, + "epoch": 0.1387117798747485, "grad_norm": 0.0, - "learning_rate": 1.9384606483235407e-05, - "loss": 0.9827, + "learning_rate": 1.938662984471708e-05, + "loss": 1.1174, "step": 4895 }, { - "epoch": 0.13893303064699206, + "epoch": 0.13874011731701097, "grad_norm": 0.0, - "learning_rate": 1.9384289006738947e-05, - "loss": 1.096, + "learning_rate": 1.938631331849197e-05, + "loss": 1.0594, "step": 4896 }, { - "epoch": 0.13896140749148694, + "epoch": 0.13876845475927344, "grad_norm": 0.0, - "learning_rate": 1.938397145097298e-05, - "loss": 0.9829, + "learning_rate": 1.9385996713202456e-05, + "loss": 1.106, "step": 4897 }, { - "epoch": 0.13898978433598183, + "epoch": 0.1387967922015359, "grad_norm": 0.0, - "learning_rate": 1.9383653815940184e-05, - "loss": 1.1701, + "learning_rate": 1.938568002885121e-05, + "loss": 1.0217, "step": 4898 }, { - "epoch": 0.13901816118047672, + "epoch": 0.13882512964379834, "grad_norm": 0.0, - "learning_rate": 1.9383336101643242e-05, - "loss": 1.0979, + "learning_rate": 1.9385363265440896e-05, + "loss": 1.079, "step": 4899 }, { - "epoch": 0.13904653802497163, + "epoch": 0.1388534670860608, "grad_norm": 0.0, - "learning_rate": 1.9383018308084836e-05, - "loss": 1.056, + "learning_rate": 1.938504642297418e-05, + "loss": 1.0664, "step": 4900 }, { - "epoch": 0.13907491486946652, + "epoch": 0.13888180452832327, "grad_norm": 0.0, - "learning_rate": 1.9382700435267653e-05, - "loss": 1.0877, + "learning_rate": 1.9384729501453737e-05, + "loss": 0.9895, "step": 4901 }, { - "epoch": 0.1391032917139614, + "epoch": 0.13891014197058574, "grad_norm": 0.0, - "learning_rate": 1.938238248319438e-05, - "loss": 0.9977, + "learning_rate": 1.9384412500882227e-05, + "loss": 0.9659, "step": 4902 }, { - "epoch": 0.1391316685584563, + "epoch": 0.1389384794128482, "grad_norm": 0.0, - "learning_rate": 1.93820644518677e-05, - "loss": 1.0111, + "learning_rate": 1.938409542126233e-05, + "loss": 0.9787, "step": 4903 }, { - "epoch": 0.13916004540295118, + "epoch": 0.13896681685511067, "grad_norm": 0.0, - "learning_rate": 1.93817463412903e-05, - "loss": 0.9663, + "learning_rate": 1.938377826259671e-05, + "loss": 1.1391, "step": 4904 }, { - "epoch": 0.1391884222474461, + "epoch": 0.1389951542973731, "grad_norm": 0.0, - "learning_rate": 1.9381428151464863e-05, - "loss": 1.098, + "learning_rate": 1.9383461024888046e-05, + "loss": 0.9174, "step": 4905 }, { - "epoch": 0.13921679909194098, + "epoch": 0.13902349173963557, "grad_norm": 0.0, - "learning_rate": 1.9381109882394084e-05, - "loss": 1.0654, + "learning_rate": 1.9383143708138997e-05, + "loss": 1.0374, "step": 4906 }, { - "epoch": 0.13924517593643587, + "epoch": 0.13905182918189804, "grad_norm": 0.0, - "learning_rate": 1.9380791534080648e-05, - "loss": 0.8942, + "learning_rate": 1.938282631235225e-05, + "loss": 1.0577, "step": 4907 }, { - "epoch": 0.13927355278093076, + "epoch": 0.1390801666241605, "grad_norm": 0.0, - "learning_rate": 1.9380473106527242e-05, - "loss": 1.0557, + "learning_rate": 1.9382508837530472e-05, + "loss": 0.9452, "step": 4908 }, { - "epoch": 0.13930192962542565, + "epoch": 0.13910850406642297, "grad_norm": 0.0, - "learning_rate": 1.938015459973656e-05, - "loss": 1.0863, + "learning_rate": 1.9382191283676336e-05, + "loss": 1.037, "step": 4909 }, { - "epoch": 0.13933030646992053, + "epoch": 0.13913684150868544, "grad_norm": 0.0, - "learning_rate": 1.937983601371129e-05, - "loss": 1.1867, + "learning_rate": 1.938187365079252e-05, + "loss": 1.0239, "step": 4910 }, { - "epoch": 0.13935868331441545, + "epoch": 0.13916517895094788, "grad_norm": 0.0, - "learning_rate": 1.9379517348454128e-05, - "loss": 1.0483, + "learning_rate": 1.93815559388817e-05, + "loss": 1.1036, "step": 4911 }, { - "epoch": 0.13938706015891034, + "epoch": 0.13919351639321034, "grad_norm": 0.0, - "learning_rate": 1.9379198603967756e-05, - "loss": 1.0158, + "learning_rate": 1.938123814794655e-05, + "loss": 0.9662, "step": 4912 }, { - "epoch": 0.13941543700340522, + "epoch": 0.1392218538354728, "grad_norm": 0.0, - "learning_rate": 1.937887978025487e-05, - "loss": 1.0973, + "learning_rate": 1.9380920277989746e-05, + "loss": 1.0307, "step": 4913 }, { - "epoch": 0.1394438138479001, + "epoch": 0.13925019127773527, "grad_norm": 0.0, - "learning_rate": 1.9378560877318168e-05, - "loss": 1.0049, + "learning_rate": 1.9380602329013967e-05, + "loss": 1.021, "step": 4914 }, { - "epoch": 0.139472190692395, + "epoch": 0.13927852871999774, "grad_norm": 0.0, - "learning_rate": 1.9378241895160342e-05, - "loss": 1.1872, + "learning_rate": 1.9380284301021894e-05, + "loss": 1.0137, "step": 4915 }, { - "epoch": 0.13950056753688989, + "epoch": 0.1393068661622602, "grad_norm": 0.0, - "learning_rate": 1.9377922833784085e-05, - "loss": 1.0562, + "learning_rate": 1.93799661940162e-05, + "loss": 1.0266, "step": 4916 }, { - "epoch": 0.1395289443813848, + "epoch": 0.13933520360452264, "grad_norm": 0.0, - "learning_rate": 1.937760369319209e-05, - "loss": 1.0096, + "learning_rate": 1.9379648007999567e-05, + "loss": 1.015, "step": 4917 }, { - "epoch": 0.1395573212258797, + "epoch": 0.1393635410467851, "grad_norm": 0.0, - "learning_rate": 1.9377284473387054e-05, - "loss": 0.9842, + "learning_rate": 1.9379329742974677e-05, + "loss": 0.973, "step": 4918 }, { - "epoch": 0.13958569807037458, + "epoch": 0.13939187848904758, "grad_norm": 0.0, - "learning_rate": 1.9376965174371678e-05, - "loss": 0.9394, + "learning_rate": 1.937901139894421e-05, + "loss": 1.0498, "step": 4919 }, { - "epoch": 0.13961407491486946, + "epoch": 0.13942021593131004, "grad_norm": 0.0, - "learning_rate": 1.9376645796148656e-05, - "loss": 1.0061, + "learning_rate": 1.9378692975910846e-05, + "loss": 1.0745, "step": 4920 }, { - "epoch": 0.13964245175936435, + "epoch": 0.1394485533735725, "grad_norm": 0.0, - "learning_rate": 1.937632633872068e-05, - "loss": 1.0774, + "learning_rate": 1.937837447387727e-05, + "loss": 1.0009, "step": 4921 }, { - "epoch": 0.13967082860385924, + "epoch": 0.13947689081583498, "grad_norm": 0.0, - "learning_rate": 1.937600680209046e-05, - "loss": 1.011, + "learning_rate": 1.9378055892846164e-05, + "loss": 1.1076, "step": 4922 }, { - "epoch": 0.13969920544835415, + "epoch": 0.1395052282580974, "grad_norm": 0.0, - "learning_rate": 1.9375687186260684e-05, - "loss": 0.8471, + "learning_rate": 1.937773723282021e-05, + "loss": 1.062, "step": 4923 }, { - "epoch": 0.13972758229284904, + "epoch": 0.13953356570035988, "grad_norm": 0.0, - "learning_rate": 1.9375367491234058e-05, - "loss": 0.8737, + "learning_rate": 1.937741849380209e-05, + "loss": 0.9747, "step": 4924 }, { - "epoch": 0.13975595913734393, + "epoch": 0.13956190314262235, "grad_norm": 0.0, - "learning_rate": 1.9375047717013283e-05, - "loss": 1.1132, + "learning_rate": 1.9377099675794495e-05, + "loss": 1.0366, "step": 4925 }, { - "epoch": 0.13978433598183881, + "epoch": 0.1395902405848848, "grad_norm": 0.0, - "learning_rate": 1.937472786360106e-05, - "loss": 1.0482, + "learning_rate": 1.9376780778800106e-05, + "loss": 1.0718, "step": 4926 }, { - "epoch": 0.1398127128263337, + "epoch": 0.13961857802714728, "grad_norm": 0.0, - "learning_rate": 1.9374407931000087e-05, - "loss": 1.036, + "learning_rate": 1.937646180282161e-05, + "loss": 1.0565, "step": 4927 }, { - "epoch": 0.13984108967082862, + "epoch": 0.13964691546940974, "grad_norm": 0.0, - "learning_rate": 1.937408791921307e-05, - "loss": 0.9308, + "learning_rate": 1.9376142747861693e-05, + "loss": 1.1297, "step": 4928 }, { - "epoch": 0.1398694665153235, + "epoch": 0.13967525291167218, "grad_norm": 0.0, - "learning_rate": 1.9373767828242708e-05, - "loss": 1.0672, + "learning_rate": 1.937582361392305e-05, + "loss": 1.1423, "step": 4929 }, { - "epoch": 0.1398978433598184, + "epoch": 0.13970359035393465, "grad_norm": 0.0, - "learning_rate": 1.9373447658091713e-05, - "loss": 0.9675, + "learning_rate": 1.9375504401008357e-05, + "loss": 1.0558, "step": 4930 }, { - "epoch": 0.13992622020431328, + "epoch": 0.13973192779619711, "grad_norm": 0.0, - "learning_rate": 1.937312740876278e-05, - "loss": 0.9927, + "learning_rate": 1.9375185109120313e-05, + "loss": 1.0933, "step": 4931 }, { - "epoch": 0.13995459704880817, + "epoch": 0.13976026523845958, "grad_norm": 0.0, - "learning_rate": 1.937280708025862e-05, - "loss": 0.9878, + "learning_rate": 1.93748657382616e-05, + "loss": 1.009, "step": 4932 }, { - "epoch": 0.13998297389330305, + "epoch": 0.13978860268072205, "grad_norm": 0.0, - "learning_rate": 1.9372486672581937e-05, - "loss": 1.0555, + "learning_rate": 1.9374546288434913e-05, + "loss": 0.9305, "step": 4933 }, { - "epoch": 0.14001135073779797, + "epoch": 0.1398169401229845, "grad_norm": 0.0, - "learning_rate": 1.9372166185735437e-05, - "loss": 1.087, + "learning_rate": 1.937422675964294e-05, + "loss": 0.9058, "step": 4934 }, { - "epoch": 0.14003972758229286, + "epoch": 0.13984527756524695, "grad_norm": 0.0, - "learning_rate": 1.937184561972183e-05, - "loss": 0.9567, + "learning_rate": 1.937390715188838e-05, + "loss": 1.005, "step": 4935 }, { - "epoch": 0.14006810442678774, + "epoch": 0.13987361500750942, "grad_norm": 0.0, - "learning_rate": 1.937152497454382e-05, - "loss": 0.963, + "learning_rate": 1.9373587465173915e-05, + "loss": 1.0244, "step": 4936 }, { - "epoch": 0.14009648127128263, + "epoch": 0.13990195244977188, "grad_norm": 0.0, - "learning_rate": 1.9371204250204116e-05, - "loss": 0.942, + "learning_rate": 1.937326769950224e-05, + "loss": 1.1044, "step": 4937 }, { - "epoch": 0.14012485811577752, + "epoch": 0.13993028989203435, "grad_norm": 0.0, - "learning_rate": 1.937088344670543e-05, - "loss": 1.0681, + "learning_rate": 1.9372947854876053e-05, + "loss": 0.9014, "step": 4938 }, { - "epoch": 0.1401532349602724, + "epoch": 0.13995862733429681, "grad_norm": 0.0, - "learning_rate": 1.937056256405047e-05, - "loss": 0.9935, + "learning_rate": 1.9372627931298043e-05, + "loss": 0.9499, "step": 4939 }, { - "epoch": 0.14018161180476732, + "epoch": 0.13998696477655928, "grad_norm": 0.0, - "learning_rate": 1.9370241602241952e-05, - "loss": 1.006, + "learning_rate": 1.9372307928770912e-05, + "loss": 1.1589, "step": 4940 }, { - "epoch": 0.1402099886492622, + "epoch": 0.14001530221882172, "grad_norm": 0.0, - "learning_rate": 1.9369920561282575e-05, - "loss": 0.9484, + "learning_rate": 1.9371987847297348e-05, + "loss": 0.9515, "step": 4941 }, { - "epoch": 0.1402383654937571, + "epoch": 0.14004363966108418, "grad_norm": 0.0, - "learning_rate": 1.936959944117506e-05, - "loss": 0.9738, + "learning_rate": 1.937166768688005e-05, + "loss": 1.0111, "step": 4942 }, { - "epoch": 0.14026674233825198, + "epoch": 0.14007197710334665, "grad_norm": 0.0, - "learning_rate": 1.9369278241922114e-05, - "loss": 0.9468, + "learning_rate": 1.9371347447521717e-05, + "loss": 1.0436, "step": 4943 }, { - "epoch": 0.14029511918274687, + "epoch": 0.14010031454560912, "grad_norm": 0.0, - "learning_rate": 1.9368956963526456e-05, - "loss": 1.0227, + "learning_rate": 1.9371027129225042e-05, + "loss": 1.0895, "step": 4944 }, { - "epoch": 0.14032349602724178, + "epoch": 0.14012865198787158, "grad_norm": 0.0, - "learning_rate": 1.93686356059908e-05, - "loss": 1.0968, + "learning_rate": 1.9370706731992724e-05, + "loss": 0.9957, "step": 4945 }, { - "epoch": 0.14035187287173667, + "epoch": 0.14015698943013405, "grad_norm": 0.0, - "learning_rate": 1.9368314169317858e-05, - "loss": 1.046, + "learning_rate": 1.937038625582747e-05, + "loss": 1.0206, "step": 4946 }, { - "epoch": 0.14038024971623156, + "epoch": 0.1401853268723965, "grad_norm": 0.0, - "learning_rate": 1.9367992653510344e-05, - "loss": 0.945, + "learning_rate": 1.9370065700731967e-05, + "loss": 1.0182, "step": 4947 }, { - "epoch": 0.14040862656072645, + "epoch": 0.14021366431465895, "grad_norm": 0.0, - "learning_rate": 1.936767105857097e-05, - "loss": 1.0165, + "learning_rate": 1.9369745066708922e-05, + "loss": 1.0513, "step": 4948 }, { - "epoch": 0.14043700340522133, + "epoch": 0.14024200175692142, "grad_norm": 0.0, - "learning_rate": 1.9367349384502462e-05, - "loss": 0.9402, + "learning_rate": 1.9369424353761037e-05, + "loss": 1.0627, "step": 4949 }, { - "epoch": 0.14046538024971622, + "epoch": 0.14027033919918389, "grad_norm": 0.0, - "learning_rate": 1.9367027631307533e-05, - "loss": 1.0114, + "learning_rate": 1.936910356189101e-05, + "loss": 1.0938, "step": 4950 }, { - "epoch": 0.14049375709421114, + "epoch": 0.14029867664144635, "grad_norm": 0.0, - "learning_rate": 1.9366705798988902e-05, - "loss": 1.0457, + "learning_rate": 1.9368782691101545e-05, + "loss": 1.0641, "step": 4951 }, { - "epoch": 0.14052213393870602, + "epoch": 0.14032701408370882, "grad_norm": 0.0, - "learning_rate": 1.9366383887549283e-05, - "loss": 1.0184, + "learning_rate": 1.9368461741395344e-05, + "loss": 0.9559, "step": 4952 }, { - "epoch": 0.1405505107832009, + "epoch": 0.14035535152597126, "grad_norm": 0.0, - "learning_rate": 1.93660618969914e-05, - "loss": 1.1269, + "learning_rate": 1.9368140712775113e-05, + "loss": 1.0709, "step": 4953 }, { - "epoch": 0.1405788876276958, + "epoch": 0.14038368896823372, "grad_norm": 0.0, - "learning_rate": 1.9365739827317967e-05, - "loss": 1.1476, + "learning_rate": 1.9367819605243553e-05, + "loss": 1.0268, "step": 4954 }, { - "epoch": 0.14060726447219069, + "epoch": 0.1404120264104962, "grad_norm": 0.0, - "learning_rate": 1.9365417678531713e-05, - "loss": 0.978, + "learning_rate": 1.936749841880337e-05, + "loss": 0.9638, "step": 4955 }, { - "epoch": 0.14063564131668557, + "epoch": 0.14044036385275865, "grad_norm": 0.0, - "learning_rate": 1.9365095450635353e-05, - "loss": 1.0804, + "learning_rate": 1.9367177153457268e-05, + "loss": 0.9385, "step": 4956 }, { - "epoch": 0.1406640181611805, + "epoch": 0.14046870129502112, "grad_norm": 0.0, - "learning_rate": 1.936477314363161e-05, - "loss": 1.0419, + "learning_rate": 1.9366855809207955e-05, + "loss": 1.0925, "step": 4957 }, { - "epoch": 0.14069239500567537, + "epoch": 0.14049703873728359, "grad_norm": 0.0, - "learning_rate": 1.936445075752321e-05, - "loss": 0.9638, + "learning_rate": 1.9366534386058137e-05, + "loss": 1.0483, "step": 4958 }, { - "epoch": 0.14072077185017026, + "epoch": 0.14052537617954602, "grad_norm": 0.0, - "learning_rate": 1.9364128292312872e-05, - "loss": 0.9981, + "learning_rate": 1.9366212884010523e-05, + "loss": 1.149, "step": 4959 }, { - "epoch": 0.14074914869466515, + "epoch": 0.1405537136218085, "grad_norm": 0.0, - "learning_rate": 1.9363805748003323e-05, - "loss": 1.0549, + "learning_rate": 1.9365891303067815e-05, + "loss": 0.986, "step": 4960 }, { - "epoch": 0.14077752553916004, + "epoch": 0.14058205106407096, "grad_norm": 0.0, - "learning_rate": 1.936348312459728e-05, - "loss": 0.9509, + "learning_rate": 1.9365569643232732e-05, + "loss": 1.0217, "step": 4961 }, { - "epoch": 0.14080590238365492, + "epoch": 0.14061038850633342, "grad_norm": 0.0, - "learning_rate": 1.936316042209748e-05, - "loss": 1.0727, + "learning_rate": 1.936524790450798e-05, + "loss": 1.0563, "step": 4962 }, { - "epoch": 0.14083427922814984, + "epoch": 0.1406387259485959, "grad_norm": 0.0, - "learning_rate": 1.936283764050664e-05, - "loss": 0.9037, + "learning_rate": 1.936492608689626e-05, + "loss": 0.9452, "step": 4963 }, { - "epoch": 0.14086265607264473, + "epoch": 0.14066706339085835, "grad_norm": 0.0, - "learning_rate": 1.9362514779827494e-05, - "loss": 1.0487, + "learning_rate": 1.9364604190400293e-05, + "loss": 1.0178, "step": 4964 }, { - "epoch": 0.1408910329171396, + "epoch": 0.1406954008331208, "grad_norm": 0.0, - "learning_rate": 1.936219184006276e-05, - "loss": 0.9592, + "learning_rate": 1.9364282215022788e-05, + "loss": 1.0687, "step": 4965 }, { - "epoch": 0.1409194097616345, + "epoch": 0.14072373827538326, "grad_norm": 0.0, - "learning_rate": 1.9361868821215174e-05, - "loss": 0.9466, + "learning_rate": 1.9363960160766457e-05, + "loss": 1.1012, "step": 4966 }, { - "epoch": 0.1409477866061294, + "epoch": 0.14075207571764572, "grad_norm": 0.0, - "learning_rate": 1.9361545723287462e-05, - "loss": 1.0604, + "learning_rate": 1.9363638027634012e-05, + "loss": 1.0314, "step": 4967 }, { - "epoch": 0.1409761634506243, + "epoch": 0.1407804131599082, "grad_norm": 0.0, - "learning_rate": 1.9361222546282346e-05, - "loss": 1.0846, + "learning_rate": 1.9363315815628166e-05, + "loss": 0.8916, "step": 4968 }, { - "epoch": 0.1410045402951192, + "epoch": 0.14080875060217066, "grad_norm": 0.0, - "learning_rate": 1.9360899290202568e-05, - "loss": 0.8837, + "learning_rate": 1.9362993524751634e-05, + "loss": 0.9559, "step": 4969 }, { - "epoch": 0.14103291713961408, + "epoch": 0.14083708804443312, "grad_norm": 0.0, - "learning_rate": 1.936057595505085e-05, - "loss": 0.9858, + "learning_rate": 1.9362671155007132e-05, + "loss": 1.1125, "step": 4970 }, { - "epoch": 0.14106129398410897, + "epoch": 0.14086542548669556, "grad_norm": 0.0, - "learning_rate": 1.936025254082993e-05, - "loss": 0.9212, + "learning_rate": 1.9362348706397374e-05, + "loss": 1.0007, "step": 4971 }, { - "epoch": 0.14108967082860385, + "epoch": 0.14089376292895803, "grad_norm": 0.0, - "learning_rate": 1.9359929047542533e-05, - "loss": 1.0105, + "learning_rate": 1.9362026178925074e-05, + "loss": 1.0279, "step": 4972 }, { - "epoch": 0.14111804767309874, + "epoch": 0.1409221003712205, "grad_norm": 0.0, - "learning_rate": 1.9359605475191396e-05, - "loss": 1.0915, + "learning_rate": 1.9361703572592954e-05, + "loss": 1.1118, "step": 4973 }, { - "epoch": 0.14114642451759365, + "epoch": 0.14095043781348296, "grad_norm": 0.0, - "learning_rate": 1.935928182377925e-05, - "loss": 1.0461, + "learning_rate": 1.9361380887403726e-05, + "loss": 1.0617, "step": 4974 }, { - "epoch": 0.14117480136208854, + "epoch": 0.14097877525574543, "grad_norm": 0.0, - "learning_rate": 1.9358958093308828e-05, - "loss": 1.0335, + "learning_rate": 1.936105812336011e-05, + "loss": 1.0382, "step": 4975 }, { - "epoch": 0.14120317820658343, + "epoch": 0.1410071126980079, "grad_norm": 0.0, - "learning_rate": 1.935863428378287e-05, - "loss": 1.0894, + "learning_rate": 1.936073528046483e-05, + "loss": 1.1312, "step": 4976 }, { - "epoch": 0.14123155505107832, + "epoch": 0.14103545014027033, "grad_norm": 0.0, - "learning_rate": 1.9358310395204104e-05, - "loss": 0.9928, + "learning_rate": 1.9360412358720596e-05, + "loss": 1.0011, "step": 4977 }, { - "epoch": 0.1412599318955732, + "epoch": 0.1410637875825328, "grad_norm": 0.0, - "learning_rate": 1.935798642757527e-05, - "loss": 0.9822, + "learning_rate": 1.9360089358130136e-05, + "loss": 1.0137, "step": 4978 }, { - "epoch": 0.1412883087400681, + "epoch": 0.14109212502479526, "grad_norm": 0.0, - "learning_rate": 1.9357662380899107e-05, - "loss": 0.9913, + "learning_rate": 1.9359766278696165e-05, + "loss": 1.0344, "step": 4979 }, { - "epoch": 0.141316685584563, + "epoch": 0.14112046246705773, "grad_norm": 0.0, - "learning_rate": 1.9357338255178348e-05, - "loss": 0.9787, + "learning_rate": 1.9359443120421408e-05, + "loss": 1.0462, "step": 4980 }, { - "epoch": 0.1413450624290579, + "epoch": 0.1411487999093202, "grad_norm": 0.0, - "learning_rate": 1.935701405041573e-05, - "loss": 1.0289, + "learning_rate": 1.9359119883308584e-05, + "loss": 0.858, "step": 4981 }, { - "epoch": 0.14137343927355278, + "epoch": 0.14117713735158266, "grad_norm": 0.0, - "learning_rate": 1.9356689766613993e-05, - "loss": 1.0381, + "learning_rate": 1.935879656736042e-05, + "loss": 0.9902, "step": 4982 }, { - "epoch": 0.14140181611804767, + "epoch": 0.1412054747938451, "grad_norm": 0.0, - "learning_rate": 1.9356365403775877e-05, - "loss": 1.0742, + "learning_rate": 1.935847317257964e-05, + "loss": 1.1787, "step": 4983 }, { - "epoch": 0.14143019296254256, + "epoch": 0.14123381223610756, "grad_norm": 0.0, - "learning_rate": 1.935604096190413e-05, - "loss": 0.9475, + "learning_rate": 1.935814969896896e-05, + "loss": 1.1081, "step": 4984 }, { - "epoch": 0.14145856980703747, + "epoch": 0.14126214967837003, "grad_norm": 0.0, - "learning_rate": 1.9355716441001475e-05, - "loss": 1.0412, + "learning_rate": 1.9357826146531114e-05, + "loss": 0.9622, "step": 4985 }, { - "epoch": 0.14148694665153236, + "epoch": 0.1412904871206325, "grad_norm": 0.0, - "learning_rate": 1.9355391841070665e-05, - "loss": 1.0302, + "learning_rate": 1.935750251526882e-05, + "loss": 1.0115, "step": 4986 }, { - "epoch": 0.14151532349602725, + "epoch": 0.14131882456289496, "grad_norm": 0.0, - "learning_rate": 1.935506716211444e-05, - "loss": 1.0808, + "learning_rate": 1.9357178805184808e-05, + "loss": 0.984, "step": 4987 }, { - "epoch": 0.14154370034052213, + "epoch": 0.1413471620051574, "grad_norm": 0.0, - "learning_rate": 1.935474240413554e-05, - "loss": 1.0117, + "learning_rate": 1.9356855016281807e-05, + "loss": 1.0161, "step": 4988 }, { - "epoch": 0.14157207718501702, + "epoch": 0.14137549944741987, "grad_norm": 0.0, - "learning_rate": 1.9354417567136716e-05, - "loss": 1.0195, + "learning_rate": 1.935653114856254e-05, + "loss": 1.0779, "step": 4989 }, { - "epoch": 0.1416004540295119, + "epoch": 0.14140383688968233, "grad_norm": 0.0, - "learning_rate": 1.93540926511207e-05, - "loss": 0.9507, + "learning_rate": 1.9356207202029737e-05, + "loss": 1.129, "step": 4990 }, { - "epoch": 0.14162883087400682, + "epoch": 0.1414321743319448, "grad_norm": 0.0, - "learning_rate": 1.935376765609025e-05, - "loss": 1.0184, + "learning_rate": 1.9355883176686125e-05, + "loss": 1.0579, "step": 4991 }, { - "epoch": 0.1416572077185017, + "epoch": 0.14146051177420726, "grad_norm": 0.0, - "learning_rate": 1.93534425820481e-05, - "loss": 1.1026, + "learning_rate": 1.9355559072534436e-05, + "loss": 0.9916, "step": 4992 }, { - "epoch": 0.1416855845629966, + "epoch": 0.14148884921646973, "grad_norm": 0.0, - "learning_rate": 1.9353117428997e-05, - "loss": 1.037, + "learning_rate": 1.93552348895774e-05, + "loss": 0.9326, "step": 4993 }, { - "epoch": 0.14171396140749148, + "epoch": 0.14151718665873217, "grad_norm": 0.0, - "learning_rate": 1.9352792196939697e-05, - "loss": 1.0868, + "learning_rate": 1.9354910627817745e-05, + "loss": 1.0701, "step": 4994 }, { - "epoch": 0.14174233825198637, + "epoch": 0.14154552410099464, "grad_norm": 0.0, - "learning_rate": 1.9352466885878937e-05, - "loss": 1.0258, + "learning_rate": 1.9354586287258205e-05, + "loss": 1.0681, "step": 4995 }, { - "epoch": 0.14177071509648126, + "epoch": 0.1415738615432571, "grad_norm": 0.0, - "learning_rate": 1.935214149581747e-05, - "loss": 0.9849, + "learning_rate": 1.9354261867901507e-05, + "loss": 1.0433, "step": 4996 }, { - "epoch": 0.14179909194097617, + "epoch": 0.14160219898551957, "grad_norm": 0.0, - "learning_rate": 1.9351816026758042e-05, - "loss": 1.0383, + "learning_rate": 1.9353937369750392e-05, + "loss": 1.054, "step": 4997 }, { - "epoch": 0.14182746878547106, + "epoch": 0.14163053642778203, "grad_norm": 0.0, - "learning_rate": 1.9351490478703403e-05, - "loss": 0.8959, + "learning_rate": 1.935361279280759e-05, + "loss": 1.0787, "step": 4998 }, { - "epoch": 0.14185584562996595, + "epoch": 0.1416588738700445, "grad_norm": 0.0, - "learning_rate": 1.9351164851656304e-05, - "loss": 1.0773, + "learning_rate": 1.935328813707583e-05, + "loss": 1.152, "step": 4999 }, { - "epoch": 0.14188422247446084, + "epoch": 0.14168721131230694, "grad_norm": 0.0, - "learning_rate": 1.9350839145619496e-05, - "loss": 1.0483, + "learning_rate": 1.935296340255785e-05, + "loss": 1.0898, "step": 5000 }, { - "epoch": 0.14191259931895572, + "epoch": 0.1417155487545694, "grad_norm": 0.0, - "learning_rate": 1.935051336059573e-05, - "loss": 0.996, + "learning_rate": 1.9352638589256392e-05, + "loss": 1.1025, "step": 5001 }, { - "epoch": 0.1419409761634506, + "epoch": 0.14174388619683187, "grad_norm": 0.0, - "learning_rate": 1.9350187496587757e-05, - "loss": 0.9728, + "learning_rate": 1.935231369717418e-05, + "loss": 1.2367, "step": 5002 }, { - "epoch": 0.14196935300794553, + "epoch": 0.14177222363909434, "grad_norm": 0.0, - "learning_rate": 1.934986155359833e-05, - "loss": 0.932, + "learning_rate": 1.935198872631396e-05, + "loss": 1.0208, "step": 5003 }, { - "epoch": 0.1419977298524404, + "epoch": 0.1418005610813568, "grad_norm": 0.0, - "learning_rate": 1.93495355316302e-05, - "loss": 1.0408, + "learning_rate": 1.9351663676678465e-05, + "loss": 1.0433, "step": 5004 }, { - "epoch": 0.1420261066969353, + "epoch": 0.14182889852361927, "grad_norm": 0.0, - "learning_rate": 1.9349209430686125e-05, - "loss": 1.0301, + "learning_rate": 1.9351338548270437e-05, + "loss": 1.028, "step": 5005 }, { - "epoch": 0.1420544835414302, + "epoch": 0.1418572359658817, "grad_norm": 0.0, - "learning_rate": 1.9348883250768858e-05, - "loss": 1.0464, + "learning_rate": 1.935101334109261e-05, + "loss": 1.0619, "step": 5006 }, { - "epoch": 0.14208286038592507, + "epoch": 0.14188557340814417, "grad_norm": 0.0, - "learning_rate": 1.9348556991881154e-05, - "loss": 1.1004, + "learning_rate": 1.9350688055147725e-05, + "loss": 1.0682, "step": 5007 }, { - "epoch": 0.14211123723042, + "epoch": 0.14191391085040664, "grad_norm": 0.0, - "learning_rate": 1.934823065402577e-05, - "loss": 0.9401, + "learning_rate": 1.9350362690438524e-05, + "loss": 1.1476, "step": 5008 }, { - "epoch": 0.14213961407491488, + "epoch": 0.1419422482926691, "grad_norm": 0.0, - "learning_rate": 1.934790423720546e-05, - "loss": 0.9591, + "learning_rate": 1.9350037246967744e-05, + "loss": 1.0928, "step": 5009 }, { - "epoch": 0.14216799091940976, + "epoch": 0.14197058573493157, "grad_norm": 0.0, - "learning_rate": 1.9347577741422983e-05, - "loss": 0.9517, + "learning_rate": 1.9349711724738126e-05, + "loss": 0.9533, "step": 5010 }, { - "epoch": 0.14219636776390465, + "epoch": 0.14199892317719404, "grad_norm": 0.0, - "learning_rate": 1.9347251166681094e-05, - "loss": 0.99, + "learning_rate": 1.9349386123752418e-05, + "loss": 1.0667, "step": 5011 }, { - "epoch": 0.14222474460839954, + "epoch": 0.14202726061945647, "grad_norm": 0.0, - "learning_rate": 1.9346924512982555e-05, - "loss": 1.1144, + "learning_rate": 1.9349060444013358e-05, + "loss": 1.0368, "step": 5012 }, { - "epoch": 0.14225312145289443, + "epoch": 0.14205559806171894, "grad_norm": 0.0, - "learning_rate": 1.9346597780330126e-05, - "loss": 1.0928, + "learning_rate": 1.9348734685523693e-05, + "loss": 1.0836, "step": 5013 }, { - "epoch": 0.14228149829738934, + "epoch": 0.1420839355039814, "grad_norm": 0.0, - "learning_rate": 1.9346270968726567e-05, - "loss": 0.9828, + "learning_rate": 1.9348408848286157e-05, + "loss": 1.0088, "step": 5014 }, { - "epoch": 0.14230987514188423, + "epoch": 0.14211227294624387, "grad_norm": 0.0, - "learning_rate": 1.9345944078174637e-05, - "loss": 0.9921, + "learning_rate": 1.9348082932303506e-05, + "loss": 1.0046, "step": 5015 }, { - "epoch": 0.14233825198637912, + "epoch": 0.14214061038850634, "grad_norm": 0.0, - "learning_rate": 1.934561710867709e-05, - "loss": 1.0625, + "learning_rate": 1.934775693757848e-05, + "loss": 1.0002, "step": 5016 }, { - "epoch": 0.142366628830874, + "epoch": 0.1421689478307688, "grad_norm": 0.0, - "learning_rate": 1.9345290060236706e-05, - "loss": 0.8986, + "learning_rate": 1.9347430864113827e-05, + "loss": 0.9787, "step": 5017 }, { - "epoch": 0.1423950056753689, + "epoch": 0.14219728527303124, "grad_norm": 0.0, - "learning_rate": 1.9344962932856228e-05, - "loss": 1.0826, + "learning_rate": 1.9347104711912294e-05, + "loss": 1.1114, "step": 5018 }, { - "epoch": 0.14242338251986378, + "epoch": 0.1422256227152937, "grad_norm": 0.0, - "learning_rate": 1.934463572653843e-05, - "loss": 0.9622, + "learning_rate": 1.9346778480976626e-05, + "loss": 1.1155, "step": 5019 }, { - "epoch": 0.1424517593643587, + "epoch": 0.14225396015755618, "grad_norm": 0.0, - "learning_rate": 1.934430844128608e-05, - "loss": 1.0456, + "learning_rate": 1.934645217130957e-05, + "loss": 1.1321, "step": 5020 }, { - "epoch": 0.14248013620885358, + "epoch": 0.14228229759981864, "grad_norm": 0.0, - "learning_rate": 1.934398107710193e-05, - "loss": 1.0391, + "learning_rate": 1.934612578291388e-05, + "loss": 0.9609, "step": 5021 }, { - "epoch": 0.14250851305334847, + "epoch": 0.1423106350420811, "grad_norm": 0.0, - "learning_rate": 1.9343653633988752e-05, - "loss": 1.0834, + "learning_rate": 1.93457993157923e-05, + "loss": 1.0507, "step": 5022 }, { - "epoch": 0.14253688989784336, + "epoch": 0.14233897248434357, "grad_norm": 0.0, - "learning_rate": 1.9343326111949315e-05, - "loss": 0.9883, + "learning_rate": 1.9345472769947582e-05, + "loss": 1.0177, "step": 5023 }, { - "epoch": 0.14256526674233824, + "epoch": 0.142367309926606, "grad_norm": 0.0, - "learning_rate": 1.934299851098638e-05, - "loss": 1.0815, + "learning_rate": 1.934514614538248e-05, + "loss": 1.139, "step": 5024 }, { - "epoch": 0.14259364358683316, + "epoch": 0.14239564736886848, "grad_norm": 0.0, - "learning_rate": 1.9342670831102716e-05, - "loss": 0.9861, + "learning_rate": 1.934481944209974e-05, + "loss": 1.1845, "step": 5025 }, { - "epoch": 0.14262202043132804, + "epoch": 0.14242398481113094, "grad_norm": 0.0, - "learning_rate": 1.9342343072301096e-05, - "loss": 1.0551, + "learning_rate": 1.9344492660102112e-05, + "loss": 0.9887, "step": 5026 }, { - "epoch": 0.14265039727582293, + "epoch": 0.1424523222533934, "grad_norm": 0.0, - "learning_rate": 1.9342015234584277e-05, - "loss": 0.9713, + "learning_rate": 1.9344165799392353e-05, + "loss": 1.0306, "step": 5027 }, { - "epoch": 0.14267877412031782, + "epoch": 0.14248065969565588, "grad_norm": 0.0, - "learning_rate": 1.934168731795504e-05, - "loss": 0.9762, + "learning_rate": 1.934383885997322e-05, + "loss": 1.0428, "step": 5028 }, { - "epoch": 0.1427071509648127, + "epoch": 0.14250899713791834, "grad_norm": 0.0, - "learning_rate": 1.934135932241615e-05, - "loss": 1.0309, + "learning_rate": 1.9343511841847458e-05, + "loss": 1.0525, "step": 5029 }, { - "epoch": 0.1427355278093076, + "epoch": 0.14253733458018078, "grad_norm": 0.0, - "learning_rate": 1.9341031247970377e-05, - "loss": 1.0364, + "learning_rate": 1.9343184745017828e-05, + "loss": 1.0434, "step": 5030 }, { - "epoch": 0.1427639046538025, + "epoch": 0.14256567202244325, "grad_norm": 0.0, - "learning_rate": 1.9340703094620493e-05, - "loss": 0.9403, + "learning_rate": 1.9342857569487084e-05, + "loss": 1.1819, "step": 5031 }, { - "epoch": 0.1427922814982974, + "epoch": 0.1425940094647057, "grad_norm": 0.0, - "learning_rate": 1.9340374862369273e-05, - "loss": 0.9427, + "learning_rate": 1.934253031525798e-05, + "loss": 0.9885, "step": 5032 }, { - "epoch": 0.14282065834279228, + "epoch": 0.14262234690696818, "grad_norm": 0.0, - "learning_rate": 1.934004655121948e-05, - "loss": 1.1016, + "learning_rate": 1.9342202982333272e-05, + "loss": 0.9866, "step": 5033 }, { - "epoch": 0.14284903518728717, + "epoch": 0.14265068434923064, "grad_norm": 0.0, - "learning_rate": 1.93397181611739e-05, - "loss": 0.9761, + "learning_rate": 1.9341875570715723e-05, + "loss": 1.0819, "step": 5034 }, { - "epoch": 0.14287741203178206, + "epoch": 0.1426790217914931, "grad_norm": 0.0, - "learning_rate": 1.93393896922353e-05, - "loss": 1.0286, + "learning_rate": 1.9341548080408085e-05, + "loss": 1.1271, "step": 5035 }, { - "epoch": 0.14290578887627695, + "epoch": 0.14270735923375555, "grad_norm": 0.0, - "learning_rate": 1.9339061144406455e-05, - "loss": 0.9762, + "learning_rate": 1.934122051141312e-05, + "loss": 0.9518, "step": 5036 }, { - "epoch": 0.14293416572077186, + "epoch": 0.14273569667601801, "grad_norm": 0.0, - "learning_rate": 1.9338732517690138e-05, - "loss": 1.02, + "learning_rate": 1.9340892863733585e-05, + "loss": 1.0082, "step": 5037 }, { - "epoch": 0.14296254256526675, + "epoch": 0.14276403411828048, "grad_norm": 0.0, - "learning_rate": 1.933840381208913e-05, - "loss": 1.128, + "learning_rate": 1.934056513737224e-05, + "loss": 1.0202, "step": 5038 }, { - "epoch": 0.14299091940976164, + "epoch": 0.14279237156054295, "grad_norm": 0.0, - "learning_rate": 1.933807502760621e-05, - "loss": 1.0542, + "learning_rate": 1.9340237332331848e-05, + "loss": 1.1248, "step": 5039 }, { - "epoch": 0.14301929625425652, + "epoch": 0.1428207090028054, "grad_norm": 0.0, - "learning_rate": 1.933774616424414e-05, - "loss": 1.0672, + "learning_rate": 1.933990944861517e-05, + "loss": 0.9853, "step": 5040 }, { - "epoch": 0.1430476730987514, + "epoch": 0.14284904644506788, "grad_norm": 0.0, - "learning_rate": 1.9337417222005715e-05, - "loss": 1.0054, + "learning_rate": 1.9339581486224964e-05, + "loss": 0.9731, "step": 5041 }, { - "epoch": 0.1430760499432463, + "epoch": 0.14287738388733032, "grad_norm": 0.0, - "learning_rate": 1.9337088200893706e-05, - "loss": 1.0143, + "learning_rate": 1.9339253445163994e-05, + "loss": 0.9987, "step": 5042 }, { - "epoch": 0.1431044267877412, + "epoch": 0.14290572132959278, "grad_norm": 0.0, - "learning_rate": 1.9336759100910893e-05, - "loss": 1.0265, + "learning_rate": 1.9338925325435026e-05, + "loss": 1.0623, "step": 5043 }, { - "epoch": 0.1431328036322361, + "epoch": 0.14293405877185525, "grad_norm": 0.0, - "learning_rate": 1.9336429922060055e-05, - "loss": 1.0917, + "learning_rate": 1.933859712704082e-05, + "loss": 0.8969, "step": 5044 }, { - "epoch": 0.143161180476731, + "epoch": 0.14296239621411772, "grad_norm": 0.0, - "learning_rate": 1.9336100664343976e-05, - "loss": 1.0579, + "learning_rate": 1.9338268849984146e-05, + "loss": 1.0466, "step": 5045 }, { - "epoch": 0.14318955732122587, + "epoch": 0.14299073365638018, "grad_norm": 0.0, - "learning_rate": 1.9335771327765433e-05, - "loss": 1.0947, + "learning_rate": 1.9337940494267763e-05, + "loss": 1.0476, "step": 5046 }, { - "epoch": 0.14321793416572076, + "epoch": 0.14301907109864265, "grad_norm": 0.0, - "learning_rate": 1.933544191232721e-05, - "loss": 0.9694, + "learning_rate": 1.933761205989444e-05, + "loss": 1.1043, "step": 5047 }, { - "epoch": 0.14324631101021568, + "epoch": 0.14304740854090509, "grad_norm": 0.0, - "learning_rate": 1.933511241803209e-05, - "loss": 0.9744, + "learning_rate": 1.9337283546866943e-05, + "loss": 1.056, "step": 5048 }, { - "epoch": 0.14327468785471056, + "epoch": 0.14307574598316755, "grad_norm": 0.0, - "learning_rate": 1.9334782844882856e-05, - "loss": 0.9749, + "learning_rate": 1.9336954955188042e-05, + "loss": 1.015, "step": 5049 }, { - "epoch": 0.14330306469920545, + "epoch": 0.14310408342543002, "grad_norm": 0.0, - "learning_rate": 1.933445319288229e-05, - "loss": 0.98, + "learning_rate": 1.93366262848605e-05, + "loss": 0.9691, "step": 5050 }, { - "epoch": 0.14333144154370034, + "epoch": 0.14313242086769248, "grad_norm": 0.0, - "learning_rate": 1.9334123462033184e-05, - "loss": 1.0035, + "learning_rate": 1.9336297535887088e-05, + "loss": 1.0723, "step": 5051 }, { - "epoch": 0.14335981838819523, + "epoch": 0.14316075830995495, "grad_norm": 0.0, - "learning_rate": 1.9333793652338313e-05, - "loss": 1.0417, + "learning_rate": 1.9335968708270575e-05, + "loss": 0.9494, "step": 5052 }, { - "epoch": 0.1433881952326901, + "epoch": 0.14318909575221742, "grad_norm": 0.0, - "learning_rate": 1.9333463763800467e-05, - "loss": 1.0192, + "learning_rate": 1.933563980201373e-05, + "loss": 1.1152, "step": 5053 }, { - "epoch": 0.14341657207718503, + "epoch": 0.14321743319447985, "grad_norm": 0.0, - "learning_rate": 1.9333133796422437e-05, - "loss": 1.0464, + "learning_rate": 1.9335310817119323e-05, + "loss": 0.9953, "step": 5054 }, { - "epoch": 0.14344494892167992, + "epoch": 0.14324577063674232, "grad_norm": 0.0, - "learning_rate": 1.9332803750207002e-05, - "loss": 0.9957, + "learning_rate": 1.933498175359013e-05, + "loss": 1.1834, "step": 5055 }, { - "epoch": 0.1434733257661748, + "epoch": 0.14327410807900479, "grad_norm": 0.0, - "learning_rate": 1.9332473625156957e-05, - "loss": 1.0678, + "learning_rate": 1.9334652611428915e-05, + "loss": 1.0, "step": 5056 }, { - "epoch": 0.1435017026106697, + "epoch": 0.14330244552126725, "grad_norm": 0.0, - "learning_rate": 1.9332143421275084e-05, - "loss": 0.9483, + "learning_rate": 1.9334323390638458e-05, + "loss": 1.0226, "step": 5057 }, { - "epoch": 0.14353007945516458, + "epoch": 0.14333078296352972, "grad_norm": 0.0, - "learning_rate": 1.9331813138564177e-05, - "loss": 0.9503, + "learning_rate": 1.9333994091221527e-05, + "loss": 1.033, "step": 5058 }, { - "epoch": 0.14355845629965946, + "epoch": 0.14335912040579218, "grad_norm": 0.0, - "learning_rate": 1.933148277702703e-05, - "loss": 1.0764, + "learning_rate": 1.93336647131809e-05, + "loss": 1.0246, "step": 5059 }, { - "epoch": 0.14358683314415438, + "epoch": 0.14338745784805462, "grad_norm": 0.0, - "learning_rate": 1.9331152336666422e-05, - "loss": 0.9857, + "learning_rate": 1.9333335256519346e-05, + "loss": 1.0487, "step": 5060 }, { - "epoch": 0.14361520998864927, + "epoch": 0.1434157952903171, "grad_norm": 0.0, - "learning_rate": 1.9330821817485154e-05, - "loss": 0.9607, + "learning_rate": 1.9333005721239647e-05, + "loss": 0.9613, "step": 5061 }, { - "epoch": 0.14364358683314415, + "epoch": 0.14344413273257955, "grad_norm": 0.0, - "learning_rate": 1.9330491219486013e-05, - "loss": 0.9927, + "learning_rate": 1.9332676107344573e-05, + "loss": 1.0637, "step": 5062 }, { - "epoch": 0.14367196367763904, + "epoch": 0.14347247017484202, "grad_norm": 0.0, - "learning_rate": 1.9330160542671794e-05, - "loss": 1.1264, + "learning_rate": 1.9332346414836904e-05, + "loss": 1.0806, "step": 5063 }, { - "epoch": 0.14370034052213393, + "epoch": 0.1435008076171045, "grad_norm": 0.0, - "learning_rate": 1.932982978704529e-05, - "loss": 1.0888, + "learning_rate": 1.9332016643719413e-05, + "loss": 1.1983, "step": 5064 }, { - "epoch": 0.14372871736662884, + "epoch": 0.14352914505936695, "grad_norm": 0.0, - "learning_rate": 1.9329498952609295e-05, - "loss": 0.9701, + "learning_rate": 1.9331686793994883e-05, + "loss": 0.9409, "step": 5065 }, { - "epoch": 0.14375709421112373, + "epoch": 0.1435574825016294, "grad_norm": 0.0, - "learning_rate": 1.9329168039366602e-05, - "loss": 0.9874, + "learning_rate": 1.933135686566609e-05, + "loss": 0.9726, "step": 5066 }, { - "epoch": 0.14378547105561862, + "epoch": 0.14358581994389186, "grad_norm": 0.0, - "learning_rate": 1.932883704732001e-05, - "loss": 1.0935, + "learning_rate": 1.933102685873581e-05, + "loss": 1.1324, "step": 5067 }, { - "epoch": 0.1438138479001135, + "epoch": 0.14361415738615432, "grad_norm": 0.0, - "learning_rate": 1.9328505976472307e-05, - "loss": 1.1889, + "learning_rate": 1.9330696773206826e-05, + "loss": 1.0843, "step": 5068 }, { - "epoch": 0.1438422247446084, + "epoch": 0.1436424948284168, "grad_norm": 0.0, - "learning_rate": 1.9328174826826297e-05, - "loss": 0.9474, + "learning_rate": 1.933036660908192e-05, + "loss": 1.0474, "step": 5069 }, { - "epoch": 0.14387060158910328, + "epoch": 0.14367083227067926, "grad_norm": 0.0, - "learning_rate": 1.9327843598384775e-05, - "loss": 1.0393, + "learning_rate": 1.9330036366363872e-05, + "loss": 1.0878, "step": 5070 }, { - "epoch": 0.1438989784335982, + "epoch": 0.14369916971294172, "grad_norm": 0.0, - "learning_rate": 1.9327512291150537e-05, - "loss": 1.028, + "learning_rate": 1.9329706045055463e-05, + "loss": 1.0675, "step": 5071 }, { - "epoch": 0.14392735527809308, + "epoch": 0.14372750715520416, "grad_norm": 0.0, - "learning_rate": 1.9327180905126386e-05, - "loss": 1.0393, + "learning_rate": 1.9329375645159473e-05, + "loss": 0.9851, "step": 5072 }, { - "epoch": 0.14395573212258797, + "epoch": 0.14375584459746663, "grad_norm": 0.0, - "learning_rate": 1.932684944031512e-05, - "loss": 1.1185, + "learning_rate": 1.932904516667869e-05, + "loss": 1.0748, "step": 5073 }, { - "epoch": 0.14398410896708286, + "epoch": 0.1437841820397291, "grad_norm": 0.0, - "learning_rate": 1.9326517896719533e-05, - "loss": 0.9554, + "learning_rate": 1.9328714609615896e-05, + "loss": 1.048, "step": 5074 }, { - "epoch": 0.14401248581157775, + "epoch": 0.14381251948199156, "grad_norm": 0.0, - "learning_rate": 1.932618627434243e-05, - "loss": 1.021, + "learning_rate": 1.932838397397387e-05, + "loss": 0.9675, "step": 5075 }, { - "epoch": 0.14404086265607263, + "epoch": 0.14384085692425402, "grad_norm": 0.0, - "learning_rate": 1.9325854573186618e-05, - "loss": 0.8838, + "learning_rate": 1.9328053259755406e-05, + "loss": 1.0243, "step": 5076 }, { - "epoch": 0.14406923950056755, + "epoch": 0.1438691943665165, "grad_norm": 0.0, - "learning_rate": 1.932552279325489e-05, - "loss": 1.054, + "learning_rate": 1.9327722466963285e-05, + "loss": 0.9883, "step": 5077 }, { - "epoch": 0.14409761634506243, + "epoch": 0.14389753180877893, "grad_norm": 0.0, - "learning_rate": 1.932519093455005e-05, - "loss": 0.9839, + "learning_rate": 1.9327391595600296e-05, + "loss": 0.9968, "step": 5078 }, { - "epoch": 0.14412599318955732, + "epoch": 0.1439258692510414, "grad_norm": 0.0, - "learning_rate": 1.9324858997074904e-05, - "loss": 1.0461, + "learning_rate": 1.932706064566922e-05, + "loss": 1.0788, "step": 5079 }, { - "epoch": 0.1441543700340522, + "epoch": 0.14395420669330386, "grad_norm": 0.0, - "learning_rate": 1.9324526980832256e-05, - "loss": 0.9269, + "learning_rate": 1.932672961717285e-05, + "loss": 1.0965, "step": 5080 }, { - "epoch": 0.1441827468785471, + "epoch": 0.14398254413556633, "grad_norm": 0.0, - "learning_rate": 1.932419488582491e-05, - "loss": 1.0455, + "learning_rate": 1.9326398510113974e-05, + "loss": 1.0922, "step": 5081 }, { - "epoch": 0.14421112372304198, + "epoch": 0.1440108815778288, "grad_norm": 0.0, - "learning_rate": 1.9323862712055668e-05, - "loss": 1.0242, + "learning_rate": 1.932606732449538e-05, + "loss": 0.9897, "step": 5082 }, { - "epoch": 0.1442395005675369, + "epoch": 0.14403921902009126, "grad_norm": 0.0, - "learning_rate": 1.9323530459527342e-05, - "loss": 1.0342, + "learning_rate": 1.9325736060319854e-05, + "loss": 1.009, "step": 5083 }, { - "epoch": 0.1442678774120318, + "epoch": 0.1440675564623537, "grad_norm": 0.0, - "learning_rate": 1.932319812824273e-05, - "loss": 0.9563, + "learning_rate": 1.9325404717590196e-05, + "loss": 0.9534, "step": 5084 }, { - "epoch": 0.14429625425652667, + "epoch": 0.14409589390461616, "grad_norm": 0.0, - "learning_rate": 1.932286571820465e-05, - "loss": 0.9883, + "learning_rate": 1.9325073296309186e-05, + "loss": 1.0941, "step": 5085 }, { - "epoch": 0.14432463110102156, + "epoch": 0.14412423134687863, "grad_norm": 0.0, - "learning_rate": 1.93225332294159e-05, - "loss": 0.9598, + "learning_rate": 1.9324741796479626e-05, + "loss": 1.0465, "step": 5086 }, { - "epoch": 0.14435300794551645, + "epoch": 0.1441525687891411, "grad_norm": 0.0, - "learning_rate": 1.9322200661879296e-05, - "loss": 0.964, + "learning_rate": 1.9324410218104297e-05, + "loss": 0.9838, "step": 5087 }, { - "epoch": 0.14438138479001136, + "epoch": 0.14418090623140356, "grad_norm": 0.0, - "learning_rate": 1.9321868015597642e-05, - "loss": 0.9982, + "learning_rate": 1.9324078561186002e-05, + "loss": 1.0158, "step": 5088 }, { - "epoch": 0.14440976163450625, + "epoch": 0.14420924367366603, "grad_norm": 0.0, - "learning_rate": 1.9321535290573748e-05, - "loss": 1.0737, + "learning_rate": 1.932374682572753e-05, + "loss": 1.1004, "step": 5089 }, { - "epoch": 0.14443813847900114, + "epoch": 0.14423758111592846, "grad_norm": 0.0, - "learning_rate": 1.9321202486810427e-05, - "loss": 0.9344, + "learning_rate": 1.9323415011731677e-05, + "loss": 1.1366, "step": 5090 }, { - "epoch": 0.14446651532349603, + "epoch": 0.14426591855819093, "grad_norm": 0.0, - "learning_rate": 1.9320869604310495e-05, - "loss": 0.8775, + "learning_rate": 1.9323083119201237e-05, + "loss": 1.0843, "step": 5091 }, { - "epoch": 0.1444948921679909, + "epoch": 0.1442942560004534, "grad_norm": 0.0, - "learning_rate": 1.9320536643076756e-05, - "loss": 0.9296, + "learning_rate": 1.9322751148139005e-05, + "loss": 1.079, "step": 5092 }, { - "epoch": 0.1445232690124858, + "epoch": 0.14432259344271586, "grad_norm": 0.0, - "learning_rate": 1.932020360311202e-05, - "loss": 1.1159, + "learning_rate": 1.932241909854778e-05, + "loss": 0.9724, "step": 5093 }, { - "epoch": 0.14455164585698071, + "epoch": 0.14435093088497833, "grad_norm": 0.0, - "learning_rate": 1.931987048441911e-05, - "loss": 1.0958, + "learning_rate": 1.9322086970430355e-05, + "loss": 1.0335, "step": 5094 }, { - "epoch": 0.1445800227014756, + "epoch": 0.1443792683272408, "grad_norm": 0.0, - "learning_rate": 1.9319537287000832e-05, - "loss": 0.9436, + "learning_rate": 1.932175476378953e-05, + "loss": 1.0292, "step": 5095 }, { - "epoch": 0.1446083995459705, + "epoch": 0.14440760576950323, "grad_norm": 0.0, - "learning_rate": 1.9319204010860007e-05, - "loss": 1.0235, + "learning_rate": 1.93214224786281e-05, + "loss": 1.0502, "step": 5096 }, { - "epoch": 0.14463677639046538, + "epoch": 0.1444359432117657, "grad_norm": 0.0, - "learning_rate": 1.9318870655999445e-05, - "loss": 1.1387, + "learning_rate": 1.9321090114948876e-05, + "loss": 1.0666, "step": 5097 }, { - "epoch": 0.14466515323496026, + "epoch": 0.14446428065402817, "grad_norm": 0.0, - "learning_rate": 1.9318537222421964e-05, - "loss": 1.0562, + "learning_rate": 1.9320757672754643e-05, + "loss": 1.08, "step": 5098 }, { - "epoch": 0.14469353007945515, + "epoch": 0.14449261809629063, "grad_norm": 0.0, - "learning_rate": 1.931820371013038e-05, - "loss": 0.9832, + "learning_rate": 1.9320425152048205e-05, + "loss": 0.9866, "step": 5099 }, { - "epoch": 0.14472190692395007, + "epoch": 0.1445209555385531, "grad_norm": 0.0, - "learning_rate": 1.9317870119127508e-05, - "loss": 1.0508, + "learning_rate": 1.9320092552832367e-05, + "loss": 1.1024, "step": 5100 }, { - "epoch": 0.14475028376844495, + "epoch": 0.14454929298081556, "grad_norm": 0.0, - "learning_rate": 1.931753644941617e-05, - "loss": 1.0927, + "learning_rate": 1.931975987510993e-05, + "loss": 1.0296, "step": 5101 }, { - "epoch": 0.14477866061293984, + "epoch": 0.144577630423078, "grad_norm": 0.0, - "learning_rate": 1.9317202700999186e-05, - "loss": 0.9924, + "learning_rate": 1.9319427118883694e-05, + "loss": 0.9473, "step": 5102 }, { - "epoch": 0.14480703745743473, + "epoch": 0.14460596786534047, "grad_norm": 0.0, - "learning_rate": 1.9316868873879372e-05, - "loss": 0.9238, + "learning_rate": 1.931909428415646e-05, + "loss": 1.1321, "step": 5103 }, { - "epoch": 0.14483541430192962, + "epoch": 0.14463430530760293, "grad_norm": 0.0, - "learning_rate": 1.9316534968059542e-05, - "loss": 1.0301, + "learning_rate": 1.9318761370931037e-05, + "loss": 1.0755, "step": 5104 }, { - "epoch": 0.14486379114642453, + "epoch": 0.1446626427498654, "grad_norm": 0.0, - "learning_rate": 1.9316200983542525e-05, - "loss": 1.0003, + "learning_rate": 1.9318428379210224e-05, + "loss": 1.0693, "step": 5105 }, { - "epoch": 0.14489216799091942, + "epoch": 0.14469098019212787, "grad_norm": 0.0, - "learning_rate": 1.9315866920331143e-05, - "loss": 0.9599, + "learning_rate": 1.9318095308996833e-05, + "loss": 0.9922, "step": 5106 }, { - "epoch": 0.1449205448354143, + "epoch": 0.14471931763439033, "grad_norm": 0.0, - "learning_rate": 1.931553277842821e-05, - "loss": 0.975, + "learning_rate": 1.9317762160293663e-05, + "loss": 0.9857, "step": 5107 }, { - "epoch": 0.1449489216799092, + "epoch": 0.14474765507665277, "grad_norm": 0.0, - "learning_rate": 1.9315198557836555e-05, - "loss": 0.9938, + "learning_rate": 1.931742893310352e-05, + "loss": 1.1522, "step": 5108 }, { - "epoch": 0.14497729852440408, + "epoch": 0.14477599251891524, "grad_norm": 0.0, - "learning_rate": 1.9314864258558998e-05, - "loss": 0.9626, + "learning_rate": 1.9317095627429215e-05, + "loss": 1.0617, "step": 5109 }, { - "epoch": 0.14500567536889897, + "epoch": 0.1448043299611777, "grad_norm": 0.0, - "learning_rate": 1.931452988059836e-05, - "loss": 0.9448, + "learning_rate": 1.9316762243273555e-05, + "loss": 1.2104, "step": 5110 }, { - "epoch": 0.14503405221339388, + "epoch": 0.14483266740344017, "grad_norm": 0.0, - "learning_rate": 1.9314195423957475e-05, - "loss": 1.0648, + "learning_rate": 1.9316428780639347e-05, + "loss": 0.9292, "step": 5111 }, { - "epoch": 0.14506242905788877, + "epoch": 0.14486100484570263, "grad_norm": 0.0, - "learning_rate": 1.9313860888639164e-05, - "loss": 0.9891, + "learning_rate": 1.9316095239529397e-05, + "loss": 1.0341, "step": 5112 }, { - "epoch": 0.14509080590238366, + "epoch": 0.1448893422879651, "grad_norm": 0.0, - "learning_rate": 1.9313526274646247e-05, - "loss": 1.0041, + "learning_rate": 1.9315761619946517e-05, + "loss": 0.9943, "step": 5113 }, { - "epoch": 0.14511918274687854, + "epoch": 0.14491767973022754, "grad_norm": 0.0, - "learning_rate": 1.9313191581981554e-05, - "loss": 1.0038, + "learning_rate": 1.9315427921893518e-05, + "loss": 1.0541, "step": 5114 }, { - "epoch": 0.14514755959137343, + "epoch": 0.14494601717249, "grad_norm": 0.0, - "learning_rate": 1.9312856810647918e-05, - "loss": 1.0034, + "learning_rate": 1.9315094145373213e-05, + "loss": 1.0287, "step": 5115 }, { - "epoch": 0.14517593643586832, + "epoch": 0.14497435461475247, "grad_norm": 0.0, - "learning_rate": 1.931252196064816e-05, - "loss": 0.9598, + "learning_rate": 1.931476029038841e-05, + "loss": 1.1627, "step": 5116 }, { - "epoch": 0.14520431328036323, + "epoch": 0.14500269205701494, "grad_norm": 0.0, - "learning_rate": 1.9312187031985105e-05, - "loss": 1.0912, + "learning_rate": 1.931442635694192e-05, + "loss": 1.0297, "step": 5117 }, { - "epoch": 0.14523269012485812, + "epoch": 0.1450310294992774, "grad_norm": 0.0, - "learning_rate": 1.9311852024661594e-05, - "loss": 1.1048, + "learning_rate": 1.931409234503656e-05, + "loss": 1.0254, "step": 5118 }, { - "epoch": 0.145261066969353, + "epoch": 0.14505936694153987, "grad_norm": 0.0, - "learning_rate": 1.931151693868045e-05, - "loss": 0.8706, + "learning_rate": 1.9313758254675143e-05, + "loss": 0.9158, "step": 5119 }, { - "epoch": 0.1452894438138479, + "epoch": 0.1450877043838023, "grad_norm": 0.0, - "learning_rate": 1.93111817740445e-05, - "loss": 1.0604, + "learning_rate": 1.931342408586048e-05, + "loss": 1.0472, "step": 5120 }, { - "epoch": 0.14531782065834278, + "epoch": 0.14511604182606477, "grad_norm": 0.0, - "learning_rate": 1.931084653075658e-05, - "loss": 0.9897, + "learning_rate": 1.9313089838595387e-05, + "loss": 1.0114, "step": 5121 }, { - "epoch": 0.14534619750283767, + "epoch": 0.14514437926832724, "grad_norm": 0.0, - "learning_rate": 1.931051120881952e-05, - "loss": 0.9938, + "learning_rate": 1.931275551288268e-05, + "loss": 1.0093, "step": 5122 }, { - "epoch": 0.14537457434733259, + "epoch": 0.1451727167105897, "grad_norm": 0.0, - "learning_rate": 1.9310175808236157e-05, - "loss": 0.9562, + "learning_rate": 1.9312421108725175e-05, + "loss": 1.1423, "step": 5123 }, { - "epoch": 0.14540295119182747, + "epoch": 0.14520105415285217, "grad_norm": 0.0, - "learning_rate": 1.930984032900932e-05, - "loss": 0.9933, + "learning_rate": 1.931208662612569e-05, + "loss": 1.0486, "step": 5124 }, { - "epoch": 0.14543132803632236, + "epoch": 0.14522939159511464, "grad_norm": 0.0, - "learning_rate": 1.9309504771141844e-05, - "loss": 1.0159, + "learning_rate": 1.931175206508704e-05, + "loss": 1.0709, "step": 5125 }, { - "epoch": 0.14545970488081725, + "epoch": 0.14525772903737708, "grad_norm": 0.0, - "learning_rate": 1.9309169134636558e-05, - "loss": 0.9857, + "learning_rate": 1.9311417425612046e-05, + "loss": 0.9812, "step": 5126 }, { - "epoch": 0.14548808172531214, + "epoch": 0.14528606647963954, "grad_norm": 0.0, - "learning_rate": 1.9308833419496307e-05, - "loss": 0.8425, + "learning_rate": 1.9311082707703525e-05, + "loss": 0.9792, "step": 5127 }, { - "epoch": 0.14551645856980705, + "epoch": 0.145314403921902, "grad_norm": 0.0, - "learning_rate": 1.9308497625723918e-05, - "loss": 1.0358, + "learning_rate": 1.9310747911364296e-05, + "loss": 1.0642, "step": 5128 }, { - "epoch": 0.14554483541430194, + "epoch": 0.14534274136416447, "grad_norm": 0.0, - "learning_rate": 1.9308161753322232e-05, - "loss": 1.0035, + "learning_rate": 1.931041303659718e-05, + "loss": 1.0377, "step": 5129 }, { - "epoch": 0.14557321225879682, + "epoch": 0.14537107880642694, "grad_norm": 0.0, - "learning_rate": 1.9307825802294083e-05, - "loss": 1.0593, + "learning_rate": 1.9310078083404996e-05, + "loss": 0.9911, "step": 5130 }, { - "epoch": 0.1456015891032917, + "epoch": 0.1453994162486894, "grad_norm": 0.0, - "learning_rate": 1.9307489772642317e-05, - "loss": 0.9749, + "learning_rate": 1.930974305179057e-05, + "loss": 1.0757, "step": 5131 }, { - "epoch": 0.1456299659477866, + "epoch": 0.14542775369095184, "grad_norm": 0.0, - "learning_rate": 1.9307153664369762e-05, - "loss": 1.007, + "learning_rate": 1.930940794175672e-05, + "loss": 1.0221, "step": 5132 }, { - "epoch": 0.1456583427922815, + "epoch": 0.1454560911332143, "grad_norm": 0.0, - "learning_rate": 1.930681747747926e-05, - "loss": 0.9774, + "learning_rate": 1.930907275330627e-05, + "loss": 0.9821, "step": 5133 }, { - "epoch": 0.1456867196367764, + "epoch": 0.14548442857547678, "grad_norm": 0.0, - "learning_rate": 1.9306481211973658e-05, - "loss": 1.0429, + "learning_rate": 1.9308737486442045e-05, + "loss": 1.0501, "step": 5134 }, { - "epoch": 0.1457150964812713, + "epoch": 0.14551276601773924, "grad_norm": 0.0, - "learning_rate": 1.930614486785579e-05, - "loss": 1.0511, + "learning_rate": 1.9308402141166865e-05, + "loss": 1.0628, "step": 5135 }, { - "epoch": 0.14574347332576618, + "epoch": 0.1455411034600017, "grad_norm": 0.0, - "learning_rate": 1.9305808445128495e-05, - "loss": 1.0343, + "learning_rate": 1.9308066717483557e-05, + "loss": 1.0022, "step": 5136 }, { - "epoch": 0.14577185017026106, + "epoch": 0.14556944090226417, "grad_norm": 0.0, - "learning_rate": 1.9305471943794617e-05, - "loss": 1.0117, + "learning_rate": 1.930773121539495e-05, + "loss": 1.0623, "step": 5137 }, { - "epoch": 0.14580022701475595, + "epoch": 0.1455977783445266, "grad_norm": 0.0, - "learning_rate": 1.9305135363857e-05, - "loss": 1.0472, + "learning_rate": 1.9307395634903863e-05, + "loss": 0.9938, "step": 5138 }, { - "epoch": 0.14582860385925084, + "epoch": 0.14562611578678908, "grad_norm": 0.0, - "learning_rate": 1.9304798705318487e-05, - "loss": 0.9442, + "learning_rate": 1.930705997601313e-05, + "loss": 1.0502, "step": 5139 }, { - "epoch": 0.14585698070374575, + "epoch": 0.14565445322905154, "grad_norm": 0.0, - "learning_rate": 1.9304461968181923e-05, - "loss": 1.0387, + "learning_rate": 1.930672423872557e-05, + "loss": 1.0925, "step": 5140 }, { - "epoch": 0.14588535754824064, + "epoch": 0.145682790671314, "grad_norm": 0.0, - "learning_rate": 1.9304125152450147e-05, - "loss": 0.9883, + "learning_rate": 1.9306388423044018e-05, + "loss": 1.0063, "step": 5141 }, { - "epoch": 0.14591373439273553, + "epoch": 0.14571112811357648, "grad_norm": 0.0, - "learning_rate": 1.930378825812601e-05, - "loss": 1.0001, + "learning_rate": 1.93060525289713e-05, + "loss": 1.1428, "step": 5142 }, { - "epoch": 0.14594211123723042, + "epoch": 0.14573946555583894, "grad_norm": 0.0, - "learning_rate": 1.9303451285212356e-05, - "loss": 1.1179, + "learning_rate": 1.930571655651025e-05, + "loss": 1.0933, "step": 5143 }, { - "epoch": 0.1459704880817253, + "epoch": 0.14576780299810138, "grad_norm": 0.0, - "learning_rate": 1.930311423371203e-05, - "loss": 0.9242, + "learning_rate": 1.9305380505663686e-05, + "loss": 1.0083, "step": 5144 }, { - "epoch": 0.14599886492622022, + "epoch": 0.14579614044036385, "grad_norm": 0.0, - "learning_rate": 1.9302777103627876e-05, - "loss": 1.0788, + "learning_rate": 1.930504437643445e-05, + "loss": 0.9161, "step": 5145 }, { - "epoch": 0.1460272417707151, + "epoch": 0.1458244778826263, "grad_norm": 0.0, - "learning_rate": 1.930243989496275e-05, - "loss": 1.0625, + "learning_rate": 1.9304708168825373e-05, + "loss": 1.0296, "step": 5146 }, { - "epoch": 0.14605561861521, + "epoch": 0.14585281532488878, "grad_norm": 0.0, - "learning_rate": 1.9302102607719494e-05, - "loss": 1.0856, + "learning_rate": 1.930437188283928e-05, + "loss": 0.8614, "step": 5147 }, { - "epoch": 0.14608399545970488, + "epoch": 0.14588115276715125, "grad_norm": 0.0, - "learning_rate": 1.930176524190096e-05, - "loss": 1.0537, + "learning_rate": 1.930403551847901e-05, + "loss": 1.03, "step": 5148 }, { - "epoch": 0.14611237230419977, + "epoch": 0.1459094902094137, "grad_norm": 0.0, - "learning_rate": 1.930142779751e-05, - "loss": 1.0195, + "learning_rate": 1.9303699075747392e-05, + "loss": 1.0811, "step": 5149 }, { - "epoch": 0.14614074914869465, + "epoch": 0.14593782765167615, "grad_norm": 0.0, - "learning_rate": 1.9301090274549453e-05, - "loss": 1.0358, + "learning_rate": 1.9303362554647262e-05, + "loss": 1.0387, "step": 5150 }, { - "epoch": 0.14616912599318957, + "epoch": 0.14596616509393862, "grad_norm": 0.0, - "learning_rate": 1.9300752673022187e-05, - "loss": 0.9857, + "learning_rate": 1.9303025955181456e-05, + "loss": 1.0952, "step": 5151 }, { - "epoch": 0.14619750283768446, + "epoch": 0.14599450253620108, "grad_norm": 0.0, - "learning_rate": 1.930041499293104e-05, - "loss": 1.0426, + "learning_rate": 1.9302689277352808e-05, + "loss": 1.0975, "step": 5152 }, { - "epoch": 0.14622587968217934, + "epoch": 0.14602283997846355, "grad_norm": 0.0, - "learning_rate": 1.930007723427887e-05, - "loss": 1.001, + "learning_rate": 1.930235252116415e-05, + "loss": 1.0776, "step": 5153 }, { - "epoch": 0.14625425652667423, + "epoch": 0.146051177420726, "grad_norm": 0.0, - "learning_rate": 1.9299739397068535e-05, - "loss": 1.0639, + "learning_rate": 1.9302015686618328e-05, + "loss": 1.1394, "step": 5154 }, { - "epoch": 0.14628263337116912, + "epoch": 0.14607951486298848, "grad_norm": 0.0, - "learning_rate": 1.9299401481302874e-05, - "loss": 1.0866, + "learning_rate": 1.930167877371817e-05, + "loss": 1.1206, "step": 5155 }, { - "epoch": 0.146311010215664, + "epoch": 0.14610785230525092, "grad_norm": 0.0, - "learning_rate": 1.9299063486984756e-05, - "loss": 1.0325, + "learning_rate": 1.930134178246652e-05, + "loss": 1.1226, "step": 5156 }, { - "epoch": 0.14633938706015892, + "epoch": 0.14613618974751338, "grad_norm": 0.0, - "learning_rate": 1.929872541411703e-05, - "loss": 1.1312, + "learning_rate": 1.930100471286621e-05, + "loss": 1.0291, "step": 5157 }, { - "epoch": 0.1463677639046538, + "epoch": 0.14616452718977585, "grad_norm": 0.0, - "learning_rate": 1.9298387262702555e-05, - "loss": 0.9934, + "learning_rate": 1.9300667564920086e-05, + "loss": 1.098, "step": 5158 }, { - "epoch": 0.1463961407491487, + "epoch": 0.14619286463203832, "grad_norm": 0.0, - "learning_rate": 1.929804903274418e-05, - "loss": 1.014, + "learning_rate": 1.9300330338630986e-05, + "loss": 0.962, "step": 5159 }, { - "epoch": 0.14642451759364358, + "epoch": 0.14622120207430078, "grad_norm": 0.0, - "learning_rate": 1.9297710724244768e-05, - "loss": 1.0228, + "learning_rate": 1.9299993034001748e-05, + "loss": 1.0629, "step": 5160 }, { - "epoch": 0.14645289443813847, + "epoch": 0.14624953951656325, "grad_norm": 0.0, - "learning_rate": 1.929737233720718e-05, - "loss": 1.1325, + "learning_rate": 1.9299655651035216e-05, + "loss": 1.0534, "step": 5161 }, { - "epoch": 0.14648127128263336, + "epoch": 0.1462778769588257, "grad_norm": 0.0, - "learning_rate": 1.9297033871634266e-05, - "loss": 1.0166, + "learning_rate": 1.9299318189734234e-05, + "loss": 1.123, "step": 5162 }, { - "epoch": 0.14650964812712827, + "epoch": 0.14630621440108815, "grad_norm": 0.0, - "learning_rate": 1.9296695327528888e-05, - "loss": 1.0214, + "learning_rate": 1.9298980650101637e-05, + "loss": 0.9302, "step": 5163 }, { - "epoch": 0.14653802497162316, + "epoch": 0.14633455184335062, "grad_norm": 0.0, - "learning_rate": 1.929635670489391e-05, - "loss": 1.0709, + "learning_rate": 1.9298643032140275e-05, + "loss": 1.1119, "step": 5164 }, { - "epoch": 0.14656640181611805, + "epoch": 0.14636288928561308, "grad_norm": 0.0, - "learning_rate": 1.9296018003732185e-05, - "loss": 1.0356, + "learning_rate": 1.929830533585299e-05, + "loss": 1.0373, "step": 5165 }, { - "epoch": 0.14659477866061293, + "epoch": 0.14639122672787555, "grad_norm": 0.0, - "learning_rate": 1.9295679224046582e-05, - "loss": 0.9978, + "learning_rate": 1.929796756124263e-05, + "loss": 0.9384, "step": 5166 }, { - "epoch": 0.14662315550510782, + "epoch": 0.14641956417013802, "grad_norm": 0.0, - "learning_rate": 1.9295340365839955e-05, - "loss": 1.0266, + "learning_rate": 1.9297629708312033e-05, + "loss": 1.1293, "step": 5167 }, { - "epoch": 0.14665153234960274, + "epoch": 0.14644790161240046, "grad_norm": 0.0, - "learning_rate": 1.9295001429115175e-05, - "loss": 0.9261, + "learning_rate": 1.9297291777064046e-05, + "loss": 0.8747, "step": 5168 }, { - "epoch": 0.14667990919409762, + "epoch": 0.14647623905466292, "grad_norm": 0.0, - "learning_rate": 1.9294662413875097e-05, - "loss": 0.9827, + "learning_rate": 1.929695376750152e-05, + "loss": 1.0037, "step": 5169 }, { - "epoch": 0.1467082860385925, + "epoch": 0.1465045764969254, "grad_norm": 0.0, - "learning_rate": 1.9294323320122586e-05, - "loss": 1.0001, + "learning_rate": 1.9296615679627302e-05, + "loss": 1.0549, "step": 5170 }, { - "epoch": 0.1467366628830874, + "epoch": 0.14653291393918785, "grad_norm": 0.0, - "learning_rate": 1.929398414786051e-05, - "loss": 0.9579, + "learning_rate": 1.9296277513444234e-05, + "loss": 1.0757, "step": 5171 }, { - "epoch": 0.14676503972758229, + "epoch": 0.14656125138145032, "grad_norm": 0.0, - "learning_rate": 1.9293644897091735e-05, - "loss": 1.1112, + "learning_rate": 1.929593926895517e-05, + "loss": 1.075, "step": 5172 }, { - "epoch": 0.14679341657207717, + "epoch": 0.14658958882371279, "grad_norm": 0.0, - "learning_rate": 1.9293305567819118e-05, - "loss": 0.9832, + "learning_rate": 1.9295600946162957e-05, + "loss": 1.0769, "step": 5173 }, { - "epoch": 0.1468217934165721, + "epoch": 0.14661792626597522, "grad_norm": 0.0, - "learning_rate": 1.9292966160045537e-05, - "loss": 1.1111, + "learning_rate": 1.929526254507045e-05, + "loss": 1.1351, "step": 5174 }, { - "epoch": 0.14685017026106698, + "epoch": 0.1466462637082377, "grad_norm": 0.0, - "learning_rate": 1.9292626673773845e-05, - "loss": 0.9146, + "learning_rate": 1.929492406568049e-05, + "loss": 1.0921, "step": 5175 }, { - "epoch": 0.14687854710556186, + "epoch": 0.14667460115050016, "grad_norm": 0.0, - "learning_rate": 1.9292287109006926e-05, - "loss": 1.0672, + "learning_rate": 1.9294585507995937e-05, + "loss": 0.9626, "step": 5176 }, { - "epoch": 0.14690692395005675, + "epoch": 0.14670293859276262, "grad_norm": 0.0, - "learning_rate": 1.929194746574764e-05, - "loss": 1.1445, + "learning_rate": 1.929424687201963e-05, + "loss": 1.0929, "step": 5177 }, { - "epoch": 0.14693530079455164, + "epoch": 0.1467312760350251, "grad_norm": 0.0, - "learning_rate": 1.929160774399885e-05, - "loss": 1.1132, + "learning_rate": 1.929390815775444e-05, + "loss": 0.9055, "step": 5178 }, { - "epoch": 0.14696367763904652, + "epoch": 0.14675961347728755, "grad_norm": 0.0, - "learning_rate": 1.9291267943763435e-05, - "loss": 1.051, + "learning_rate": 1.9293569365203205e-05, + "loss": 0.9868, "step": 5179 }, { - "epoch": 0.14699205448354144, + "epoch": 0.14678795091955, "grad_norm": 0.0, - "learning_rate": 1.929092806504426e-05, - "loss": 1.0509, + "learning_rate": 1.929323049436879e-05, + "loss": 0.9831, "step": 5180 }, { - "epoch": 0.14702043132803633, + "epoch": 0.14681628836181246, "grad_norm": 0.0, - "learning_rate": 1.9290588107844203e-05, - "loss": 1.0723, + "learning_rate": 1.9292891545254036e-05, + "loss": 1.0956, "step": 5181 }, { - "epoch": 0.14704880817253121, + "epoch": 0.14684462580407492, "grad_norm": 0.0, - "learning_rate": 1.9290248072166125e-05, - "loss": 1.0098, + "learning_rate": 1.929255251786181e-05, + "loss": 0.9792, "step": 5182 }, { - "epoch": 0.1470771850170261, + "epoch": 0.1468729632463374, "grad_norm": 0.0, - "learning_rate": 1.9289907958012907e-05, - "loss": 1.1193, + "learning_rate": 1.9292213412194964e-05, + "loss": 1.0648, "step": 5183 }, { - "epoch": 0.147105561861521, + "epoch": 0.14690130068859986, "grad_norm": 0.0, - "learning_rate": 1.9289567765387417e-05, - "loss": 1.1517, + "learning_rate": 1.9291874228256355e-05, + "loss": 0.9282, "step": 5184 }, { - "epoch": 0.1471339387060159, + "epoch": 0.1469296381308623, "grad_norm": 0.0, - "learning_rate": 1.928922749429253e-05, - "loss": 1.0365, + "learning_rate": 1.929153496604883e-05, + "loss": 0.9371, "step": 5185 }, { - "epoch": 0.1471623155505108, + "epoch": 0.14695797557312476, "grad_norm": 0.0, - "learning_rate": 1.9288887144731126e-05, - "loss": 1.0754, + "learning_rate": 1.9291195625575264e-05, + "loss": 1.0822, "step": 5186 }, { - "epoch": 0.14719069239500568, + "epoch": 0.14698631301538723, "grad_norm": 0.0, - "learning_rate": 1.928854671670607e-05, - "loss": 1.0329, + "learning_rate": 1.9290856206838505e-05, + "loss": 1.0179, "step": 5187 }, { - "epoch": 0.14721906923950057, + "epoch": 0.1470146504576497, "grad_norm": 0.0, - "learning_rate": 1.928820621022024e-05, - "loss": 1.097, + "learning_rate": 1.9290516709841414e-05, + "loss": 1.002, "step": 5188 }, { - "epoch": 0.14724744608399545, + "epoch": 0.14704298789991216, "grad_norm": 0.0, - "learning_rate": 1.928786562527652e-05, - "loss": 0.9505, + "learning_rate": 1.929017713458685e-05, + "loss": 1.0169, "step": 5189 }, { - "epoch": 0.14727582292849034, + "epoch": 0.14707132534217462, "grad_norm": 0.0, - "learning_rate": 1.928752496187778e-05, - "loss": 1.0678, + "learning_rate": 1.928983748107768e-05, + "loss": 1.1104, "step": 5190 }, { - "epoch": 0.14730419977298526, + "epoch": 0.14709966278443706, "grad_norm": 0.0, - "learning_rate": 1.9287184220026898e-05, - "loss": 1.1303, + "learning_rate": 1.9289497749316754e-05, + "loss": 1.0301, "step": 5191 }, { - "epoch": 0.14733257661748014, + "epoch": 0.14712800022669953, "grad_norm": 0.0, - "learning_rate": 1.9286843399726754e-05, - "loss": 1.0931, + "learning_rate": 1.928915793930694e-05, + "loss": 0.9225, "step": 5192 }, { - "epoch": 0.14736095346197503, + "epoch": 0.147156337668962, "grad_norm": 0.0, - "learning_rate": 1.9286502500980226e-05, - "loss": 1.0196, + "learning_rate": 1.9288818051051096e-05, + "loss": 1.0587, "step": 5193 }, { - "epoch": 0.14738933030646992, + "epoch": 0.14718467511122446, "grad_norm": 0.0, - "learning_rate": 1.9286161523790197e-05, - "loss": 1.0154, + "learning_rate": 1.9288478084552092e-05, + "loss": 0.9847, "step": 5194 }, { - "epoch": 0.1474177071509648, + "epoch": 0.14721301255348693, "grad_norm": 0.0, - "learning_rate": 1.928582046815954e-05, - "loss": 1.0045, + "learning_rate": 1.9288138039812784e-05, + "loss": 0.9653, "step": 5195 }, { - "epoch": 0.1474460839954597, + "epoch": 0.1472413499957494, "grad_norm": 0.0, - "learning_rate": 1.9285479334091142e-05, - "loss": 0.9854, + "learning_rate": 1.9287797916836045e-05, + "loss": 0.9429, "step": 5196 }, { - "epoch": 0.1474744608399546, + "epoch": 0.14726968743801183, "grad_norm": 0.0, - "learning_rate": 1.928513812158788e-05, - "loss": 1.0277, + "learning_rate": 1.9287457715624734e-05, + "loss": 0.9357, "step": 5197 }, { - "epoch": 0.1475028376844495, + "epoch": 0.1472980248802743, "grad_norm": 0.0, - "learning_rate": 1.9284796830652642e-05, - "loss": 0.9273, + "learning_rate": 1.9287117436181714e-05, + "loss": 0.9865, "step": 5198 }, { - "epoch": 0.14753121452894438, + "epoch": 0.14732636232253676, "grad_norm": 0.0, - "learning_rate": 1.9284455461288307e-05, - "loss": 1.1288, + "learning_rate": 1.928677707850986e-05, + "loss": 1.0485, "step": 5199 }, { - "epoch": 0.14755959137343927, + "epoch": 0.14735469976479923, "grad_norm": 0.0, - "learning_rate": 1.928411401349776e-05, - "loss": 0.9738, + "learning_rate": 1.9286436642612032e-05, + "loss": 1.0276, "step": 5200 }, { - "epoch": 0.14758796821793416, + "epoch": 0.1473830372070617, "grad_norm": 0.0, - "learning_rate": 1.9283772487283885e-05, - "loss": 1.0229, + "learning_rate": 1.92860961284911e-05, + "loss": 1.02, "step": 5201 }, { - "epoch": 0.14761634506242904, + "epoch": 0.14741137464932416, "grad_norm": 0.0, - "learning_rate": 1.9283430882649562e-05, - "loss": 0.9583, + "learning_rate": 1.928575553614993e-05, + "loss": 1.1007, "step": 5202 }, { - "epoch": 0.14764472190692396, + "epoch": 0.1474397120915866, "grad_norm": 0.0, - "learning_rate": 1.9283089199597688e-05, - "loss": 0.9956, + "learning_rate": 1.9285414865591397e-05, + "loss": 1.0864, "step": 5203 }, { - "epoch": 0.14767309875141885, + "epoch": 0.14746804953384907, "grad_norm": 0.0, - "learning_rate": 1.928274743813114e-05, - "loss": 1.0528, + "learning_rate": 1.9285074116818364e-05, + "loss": 1.0054, "step": 5204 }, { - "epoch": 0.14770147559591373, + "epoch": 0.14749638697611153, "grad_norm": 0.0, - "learning_rate": 1.92824055982528e-05, - "loss": 1.0279, + "learning_rate": 1.9284733289833704e-05, + "loss": 1.0312, "step": 5205 }, { - "epoch": 0.14772985244040862, + "epoch": 0.147524724418374, "grad_norm": 0.0, - "learning_rate": 1.9282063679965573e-05, - "loss": 1.0109, + "learning_rate": 1.9284392384640286e-05, + "loss": 1.0549, "step": 5206 }, { - "epoch": 0.1477582292849035, + "epoch": 0.14755306186063646, "grad_norm": 0.0, - "learning_rate": 1.928172168327233e-05, - "loss": 1.072, + "learning_rate": 1.9284051401240988e-05, + "loss": 1.108, "step": 5207 }, { - "epoch": 0.14778660612939842, + "epoch": 0.14758139930289893, "grad_norm": 0.0, - "learning_rate": 1.928137960817597e-05, - "loss": 0.9688, + "learning_rate": 1.9283710339638674e-05, + "loss": 1.1874, "step": 5208 }, { - "epoch": 0.1478149829738933, + "epoch": 0.14760973674516137, "grad_norm": 0.0, - "learning_rate": 1.9281037454679377e-05, - "loss": 0.9887, + "learning_rate": 1.9283369199836222e-05, + "loss": 1.1778, "step": 5209 }, { - "epoch": 0.1478433598183882, + "epoch": 0.14763807418742383, "grad_norm": 0.0, - "learning_rate": 1.9280695222785443e-05, - "loss": 1.055, + "learning_rate": 1.92830279818365e-05, + "loss": 1.0814, "step": 5210 }, { - "epoch": 0.14787173666288309, + "epoch": 0.1476664116296863, "grad_norm": 0.0, - "learning_rate": 1.9280352912497062e-05, + "learning_rate": 1.9282686685642392e-05, "loss": 0.9918, "step": 5211 }, { - "epoch": 0.14790011350737797, + "epoch": 0.14769474907194877, "grad_norm": 0.0, - "learning_rate": 1.928001052381712e-05, - "loss": 1.0181, + "learning_rate": 1.9282345311256764e-05, + "loss": 1.1157, "step": 5212 }, { - "epoch": 0.14792849035187286, + "epoch": 0.14772308651421123, "grad_norm": 0.0, - "learning_rate": 1.9279668056748514e-05, - "loss": 1.0592, + "learning_rate": 1.9282003858682494e-05, + "loss": 1.0583, "step": 5213 }, { - "epoch": 0.14795686719636778, + "epoch": 0.1477514239564737, "grad_norm": 0.0, - "learning_rate": 1.9279325511294137e-05, - "loss": 1.1319, + "learning_rate": 1.9281662327922458e-05, + "loss": 1.0309, "step": 5214 }, { - "epoch": 0.14798524404086266, + "epoch": 0.14777976139873614, "grad_norm": 0.0, - "learning_rate": 1.9278982887456877e-05, - "loss": 0.9644, + "learning_rate": 1.9281320718979536e-05, + "loss": 1.0507, "step": 5215 }, { - "epoch": 0.14801362088535755, + "epoch": 0.1478080988409986, "grad_norm": 0.0, - "learning_rate": 1.927864018523963e-05, - "loss": 1.1096, + "learning_rate": 1.9280979031856604e-05, + "loss": 1.0001, "step": 5216 }, { - "epoch": 0.14804199772985244, + "epoch": 0.14783643628326107, "grad_norm": 0.0, - "learning_rate": 1.9278297404645294e-05, - "loss": 0.9535, + "learning_rate": 1.9280637266556533e-05, + "loss": 1.1559, "step": 5217 }, { - "epoch": 0.14807037457434732, + "epoch": 0.14786477372552354, "grad_norm": 0.0, - "learning_rate": 1.9277954545676763e-05, - "loss": 1.0555, + "learning_rate": 1.9280295423082215e-05, + "loss": 1.0253, "step": 5218 }, { - "epoch": 0.1480987514188422, + "epoch": 0.147893111167786, "grad_norm": 0.0, - "learning_rate": 1.9277611608336935e-05, - "loss": 0.9209, + "learning_rate": 1.9279953501436518e-05, + "loss": 0.9654, "step": 5219 }, { - "epoch": 0.14812712826333713, + "epoch": 0.14792144861004847, "grad_norm": 0.0, - "learning_rate": 1.92772685926287e-05, - "loss": 1.0107, + "learning_rate": 1.9279611501622328e-05, + "loss": 1.1602, "step": 5220 }, { - "epoch": 0.14815550510783201, + "epoch": 0.1479497860523109, "grad_norm": 0.0, - "learning_rate": 1.9276925498554963e-05, - "loss": 1.0768, + "learning_rate": 1.9279269423642528e-05, + "loss": 1.0548, "step": 5221 }, { - "epoch": 0.1481838819523269, + "epoch": 0.14797812349457337, "grad_norm": 0.0, - "learning_rate": 1.927658232611862e-05, - "loss": 0.9861, + "learning_rate": 1.927892726749999e-05, + "loss": 1.11, "step": 5222 }, { - "epoch": 0.1482122587968218, + "epoch": 0.14800646093683584, "grad_norm": 0.0, - "learning_rate": 1.9276239075322568e-05, - "loss": 1.018, + "learning_rate": 1.9278585033197604e-05, + "loss": 0.9725, "step": 5223 }, { - "epoch": 0.14824063564131668, + "epoch": 0.1480347983790983, "grad_norm": 0.0, - "learning_rate": 1.927589574616971e-05, - "loss": 0.9665, + "learning_rate": 1.9278242720738254e-05, + "loss": 1.0844, "step": 5224 }, { - "epoch": 0.1482690124858116, + "epoch": 0.14806313582136077, "grad_norm": 0.0, - "learning_rate": 1.927555233866294e-05, - "loss": 0.9908, + "learning_rate": 1.9277900330124817e-05, + "loss": 1.0513, "step": 5225 }, { - "epoch": 0.14829738933030648, + "epoch": 0.14809147326362324, "grad_norm": 0.0, - "learning_rate": 1.927520885280516e-05, - "loss": 1.0481, + "learning_rate": 1.927755786136018e-05, + "loss": 0.9662, "step": 5226 }, { - "epoch": 0.14832576617480137, + "epoch": 0.14811981070588567, "grad_norm": 0.0, - "learning_rate": 1.9274865288599282e-05, - "loss": 1.0257, + "learning_rate": 1.927721531444723e-05, + "loss": 1.0916, "step": 5227 }, { - "epoch": 0.14835414301929625, + "epoch": 0.14814814814814814, "grad_norm": 0.0, - "learning_rate": 1.927452164604819e-05, - "loss": 0.9236, + "learning_rate": 1.927687268938885e-05, + "loss": 0.9832, "step": 5228 }, { - "epoch": 0.14838251986379114, + "epoch": 0.1481764855904106, "grad_norm": 0.0, - "learning_rate": 1.9274177925154806e-05, - "loss": 0.9915, + "learning_rate": 1.9276529986187925e-05, + "loss": 0.9731, "step": 5229 }, { - "epoch": 0.14841089670828603, + "epoch": 0.14820482303267307, "grad_norm": 0.0, - "learning_rate": 1.9273834125922017e-05, - "loss": 1.0778, + "learning_rate": 1.9276187204847344e-05, + "loss": 1.0659, "step": 5230 }, { - "epoch": 0.14843927355278094, + "epoch": 0.14823316047493554, "grad_norm": 0.0, - "learning_rate": 1.927349024835274e-05, - "loss": 0.9812, + "learning_rate": 1.9275844345369993e-05, + "loss": 1.0667, "step": 5231 }, { - "epoch": 0.14846765039727583, + "epoch": 0.148261497917198, "grad_norm": 0.0, - "learning_rate": 1.927314629244987e-05, - "loss": 1.0294, + "learning_rate": 1.9275501407758762e-05, + "loss": 1.0327, "step": 5232 }, { - "epoch": 0.14849602724177072, + "epoch": 0.14828983535946044, "grad_norm": 0.0, - "learning_rate": 1.927280225821632e-05, - "loss": 0.9571, + "learning_rate": 1.9275158392016534e-05, + "loss": 1.1037, "step": 5233 }, { - "epoch": 0.1485244040862656, + "epoch": 0.1483181728017229, "grad_norm": 0.0, - "learning_rate": 1.927245814565499e-05, - "loss": 0.9497, + "learning_rate": 1.9274815298146207e-05, + "loss": 1.0331, "step": 5234 }, { - "epoch": 0.1485527809307605, + "epoch": 0.14834651024398537, "grad_norm": 0.0, - "learning_rate": 1.9272113954768786e-05, - "loss": 1.0724, + "learning_rate": 1.9274472126150667e-05, + "loss": 0.9513, "step": 5235 }, { - "epoch": 0.14858115777525538, + "epoch": 0.14837484768624784, "grad_norm": 0.0, - "learning_rate": 1.9271769685560623e-05, - "loss": 1.058, + "learning_rate": 1.9274128876032803e-05, + "loss": 0.9612, "step": 5236 }, { - "epoch": 0.1486095346197503, + "epoch": 0.1484031851285103, "grad_norm": 0.0, - "learning_rate": 1.927142533803341e-05, - "loss": 1.1655, + "learning_rate": 1.9273785547795506e-05, + "loss": 0.9229, "step": 5237 }, { - "epoch": 0.14863791146424518, + "epoch": 0.14843152257077277, "grad_norm": 0.0, - "learning_rate": 1.927108091219004e-05, - "loss": 0.9962, + "learning_rate": 1.927344214144167e-05, + "loss": 1.0988, "step": 5238 }, { - "epoch": 0.14866628830874007, + "epoch": 0.1484598600130352, "grad_norm": 0.0, - "learning_rate": 1.927073640803344e-05, - "loss": 1.0688, + "learning_rate": 1.9273098656974188e-05, + "loss": 0.9547, "step": 5239 }, { - "epoch": 0.14869466515323496, + "epoch": 0.14848819745529768, "grad_norm": 0.0, - "learning_rate": 1.9270391825566506e-05, - "loss": 0.9183, + "learning_rate": 1.9272755094395953e-05, + "loss": 1.0651, "step": 5240 }, { - "epoch": 0.14872304199772984, + "epoch": 0.14851653489756014, "grad_norm": 0.0, - "learning_rate": 1.9270047164792163e-05, - "loss": 1.0336, + "learning_rate": 1.927241145370986e-05, + "loss": 1.0239, "step": 5241 }, { - "epoch": 0.14875141884222473, + "epoch": 0.1485448723398226, "grad_norm": 0.0, - "learning_rate": 1.926970242571331e-05, - "loss": 0.9821, + "learning_rate": 1.92720677349188e-05, + "loss": 1.0344, "step": 5242 }, { - "epoch": 0.14877979568671965, + "epoch": 0.14857320978208508, "grad_norm": 0.0, - "learning_rate": 1.9269357608332867e-05, - "loss": 0.9, + "learning_rate": 1.9271723938025672e-05, + "loss": 1.0499, "step": 5243 }, { - "epoch": 0.14880817253121453, + "epoch": 0.14860154722434754, "grad_norm": 0.0, - "learning_rate": 1.926901271265374e-05, - "loss": 0.8723, + "learning_rate": 1.9271380063033368e-05, + "loss": 1.1839, "step": 5244 }, { - "epoch": 0.14883654937570942, + "epoch": 0.14862988466660998, "grad_norm": 0.0, - "learning_rate": 1.9268667738678846e-05, - "loss": 1.0503, + "learning_rate": 1.9271036109944786e-05, + "loss": 1.1143, "step": 5245 }, { - "epoch": 0.1488649262202043, + "epoch": 0.14865822210887245, "grad_norm": 0.0, - "learning_rate": 1.92683226864111e-05, - "loss": 1.0022, + "learning_rate": 1.9270692078762825e-05, + "loss": 1.0966, "step": 5246 }, { - "epoch": 0.1488933030646992, + "epoch": 0.1486865595511349, "grad_norm": 0.0, - "learning_rate": 1.9267977555853418e-05, - "loss": 0.9291, + "learning_rate": 1.9270347969490384e-05, + "loss": 0.9098, "step": 5247 }, { - "epoch": 0.1489216799091941, + "epoch": 0.14871489699339738, "grad_norm": 0.0, - "learning_rate": 1.926763234700871e-05, - "loss": 1.1389, + "learning_rate": 1.927000378213036e-05, + "loss": 0.9424, "step": 5248 }, { - "epoch": 0.148950056753689, + "epoch": 0.14874323443565984, "grad_norm": 0.0, - "learning_rate": 1.92672870598799e-05, - "loss": 0.9551, + "learning_rate": 1.926965951668565e-05, + "loss": 0.9794, "step": 5249 }, { - "epoch": 0.14897843359818388, + "epoch": 0.1487715718779223, "grad_norm": 0.0, - "learning_rate": 1.9266941694469895e-05, - "loss": 0.9963, + "learning_rate": 1.926931517315916e-05, + "loss": 0.993, "step": 5250 }, { - "epoch": 0.14900681044267877, + "epoch": 0.14879990932018475, "grad_norm": 0.0, - "learning_rate": 1.9266596250781616e-05, - "loss": 0.974, + "learning_rate": 1.926897075155378e-05, + "loss": 1.1175, "step": 5251 }, { - "epoch": 0.14903518728717366, + "epoch": 0.1488282467624472, "grad_norm": 0.0, - "learning_rate": 1.9266250728817985e-05, - "loss": 0.9374, + "learning_rate": 1.926862625187242e-05, + "loss": 0.9291, "step": 5252 }, { - "epoch": 0.14906356413166855, + "epoch": 0.14885658420470968, "grad_norm": 0.0, - "learning_rate": 1.9265905128581917e-05, - "loss": 1.0415, + "learning_rate": 1.926828167411798e-05, + "loss": 1.1128, "step": 5253 }, { - "epoch": 0.14909194097616346, + "epoch": 0.14888492164697215, "grad_norm": 0.0, - "learning_rate": 1.926555945007633e-05, - "loss": 1.0622, + "learning_rate": 1.9267937018293357e-05, + "loss": 1.0891, "step": 5254 }, { - "epoch": 0.14912031782065835, + "epoch": 0.1489132590892346, "grad_norm": 0.0, - "learning_rate": 1.9265213693304147e-05, - "loss": 0.9156, + "learning_rate": 1.9267592284401463e-05, + "loss": 1.02, "step": 5255 }, { - "epoch": 0.14914869466515324, + "epoch": 0.14894159653149708, "grad_norm": 0.0, - "learning_rate": 1.926486785826829e-05, - "loss": 0.9388, + "learning_rate": 1.92672474724452e-05, + "loss": 1.109, "step": 5256 }, { - "epoch": 0.14917707150964812, + "epoch": 0.14896993397375952, "grad_norm": 0.0, - "learning_rate": 1.9264521944971673e-05, - "loss": 1.1249, + "learning_rate": 1.9266902582427467e-05, + "loss": 0.9575, "step": 5257 }, { - "epoch": 0.149205448354143, + "epoch": 0.14899827141602198, "grad_norm": 0.0, - "learning_rate": 1.9264175953417222e-05, - "loss": 1.0027, + "learning_rate": 1.9266557614351172e-05, + "loss": 1.0404, "step": 5258 }, { - "epoch": 0.1492338251986379, + "epoch": 0.14902660885828445, "grad_norm": 0.0, - "learning_rate": 1.9263829883607867e-05, - "loss": 0.9301, + "learning_rate": 1.9266212568219223e-05, + "loss": 0.883, "step": 5259 }, { - "epoch": 0.1492622020431328, + "epoch": 0.14905494630054691, "grad_norm": 0.0, - "learning_rate": 1.9263483735546518e-05, - "loss": 1.0985, + "learning_rate": 1.9265867444034523e-05, + "loss": 1.0392, "step": 5260 }, { - "epoch": 0.1492905788876277, + "epoch": 0.14908328374280938, "grad_norm": 0.0, - "learning_rate": 1.926313750923611e-05, - "loss": 1.1348, + "learning_rate": 1.9265522241799982e-05, + "loss": 0.9426, "step": 5261 }, { - "epoch": 0.1493189557321226, + "epoch": 0.14911162118507185, "grad_norm": 0.0, - "learning_rate": 1.926279120467956e-05, - "loss": 1.0101, + "learning_rate": 1.9265176961518507e-05, + "loss": 0.9774, "step": 5262 }, { - "epoch": 0.14934733257661748, + "epoch": 0.14913995862733428, "grad_norm": 0.0, - "learning_rate": 1.9262444821879796e-05, - "loss": 1.0244, + "learning_rate": 1.9264831603193003e-05, + "loss": 0.9554, "step": 5263 }, { - "epoch": 0.14937570942111236, + "epoch": 0.14916829606959675, "grad_norm": 0.0, - "learning_rate": 1.9262098360839747e-05, - "loss": 1.0991, + "learning_rate": 1.9264486166826385e-05, + "loss": 1.0155, "step": 5264 }, { - "epoch": 0.14940408626560728, + "epoch": 0.14919663351185922, "grad_norm": 0.0, - "learning_rate": 1.9261751821562336e-05, - "loss": 1.0307, + "learning_rate": 1.9264140652421558e-05, + "loss": 1.0124, "step": 5265 }, { - "epoch": 0.14943246311010216, + "epoch": 0.14922497095412168, "grad_norm": 0.0, - "learning_rate": 1.926140520405049e-05, - "loss": 1.0335, + "learning_rate": 1.9263795059981435e-05, + "loss": 0.9594, "step": 5266 }, { - "epoch": 0.14946083995459705, + "epoch": 0.14925330839638415, "grad_norm": 0.0, - "learning_rate": 1.9261058508307137e-05, - "loss": 0.9919, + "learning_rate": 1.9263449389508927e-05, + "loss": 1.1641, "step": 5267 }, { - "epoch": 0.14948921679909194, + "epoch": 0.14928164583864661, "grad_norm": 0.0, - "learning_rate": 1.926071173433521e-05, - "loss": 0.9973, + "learning_rate": 1.9263103641006945e-05, + "loss": 1.0697, "step": 5268 }, { - "epoch": 0.14951759364358683, + "epoch": 0.14930998328090905, "grad_norm": 0.0, - "learning_rate": 1.926036488213763e-05, - "loss": 1.0249, + "learning_rate": 1.92627578144784e-05, + "loss": 1.1202, "step": 5269 }, { - "epoch": 0.14954597048808171, + "epoch": 0.14933832072317152, "grad_norm": 0.0, - "learning_rate": 1.9260017951717334e-05, - "loss": 0.9984, + "learning_rate": 1.9262411909926208e-05, + "loss": 1.0054, "step": 5270 }, { - "epoch": 0.14957434733257663, + "epoch": 0.14936665816543399, "grad_norm": 0.0, - "learning_rate": 1.925967094307725e-05, - "loss": 0.889, + "learning_rate": 1.9262065927353277e-05, + "loss": 1.0481, "step": 5271 }, { - "epoch": 0.14960272417707152, + "epoch": 0.14939499560769645, "grad_norm": 0.0, - "learning_rate": 1.925932385622031e-05, - "loss": 1.1302, + "learning_rate": 1.9261719866762527e-05, + "loss": 0.988, "step": 5272 }, { - "epoch": 0.1496311010215664, + "epoch": 0.14942333304995892, "grad_norm": 0.0, - "learning_rate": 1.9258976691149446e-05, - "loss": 1.0418, + "learning_rate": 1.9261373728156872e-05, + "loss": 1.0928, "step": 5273 }, { - "epoch": 0.1496594778660613, + "epoch": 0.14945167049222138, "grad_norm": 0.0, - "learning_rate": 1.9258629447867588e-05, - "loss": 1.0199, + "learning_rate": 1.9261027511539227e-05, + "loss": 1.1065, "step": 5274 }, { - "epoch": 0.14968785471055618, + "epoch": 0.14948000793448382, "grad_norm": 0.0, - "learning_rate": 1.925828212637767e-05, - "loss": 1.0109, + "learning_rate": 1.926068121691251e-05, + "loss": 0.9579, "step": 5275 }, { - "epoch": 0.14971623155505107, + "epoch": 0.1495083453767463, "grad_norm": 0.0, - "learning_rate": 1.9257934726682627e-05, - "loss": 1.0767, + "learning_rate": 1.9260334844279635e-05, + "loss": 1.0217, "step": 5276 }, { - "epoch": 0.14974460839954598, + "epoch": 0.14953668281900875, "grad_norm": 0.0, - "learning_rate": 1.9257587248785396e-05, - "loss": 0.9549, + "learning_rate": 1.9259988393643518e-05, + "loss": 1.0446, "step": 5277 }, { - "epoch": 0.14977298524404087, + "epoch": 0.14956502026127122, "grad_norm": 0.0, - "learning_rate": 1.9257239692688907e-05, - "loss": 0.9442, + "learning_rate": 1.9259641865007085e-05, + "loss": 1.23, "step": 5278 }, { - "epoch": 0.14980136208853576, + "epoch": 0.14959335770353369, "grad_norm": 0.0, - "learning_rate": 1.9256892058396098e-05, - "loss": 1.0399, + "learning_rate": 1.9259295258373245e-05, + "loss": 1.1063, "step": 5279 }, { - "epoch": 0.14982973893303064, + "epoch": 0.14962169514579615, "grad_norm": 0.0, - "learning_rate": 1.9256544345909904e-05, - "loss": 1.0143, + "learning_rate": 1.9258948573744927e-05, + "loss": 1.0261, "step": 5280 }, { - "epoch": 0.14985811577752553, + "epoch": 0.1496500325880586, "grad_norm": 0.0, - "learning_rate": 1.9256196555233268e-05, - "loss": 0.8882, + "learning_rate": 1.9258601811125044e-05, + "loss": 0.8167, "step": 5281 }, { - "epoch": 0.14988649262202042, + "epoch": 0.14967837003032106, "grad_norm": 0.0, - "learning_rate": 1.925584868636912e-05, - "loss": 0.9648, + "learning_rate": 1.925825497051652e-05, + "loss": 1.0702, "step": 5282 }, { - "epoch": 0.14991486946651533, + "epoch": 0.14970670747258352, "grad_norm": 0.0, - "learning_rate": 1.9255500739320405e-05, - "loss": 1.1407, + "learning_rate": 1.925790805192228e-05, + "loss": 1.0055, "step": 5283 }, { - "epoch": 0.14994324631101022, + "epoch": 0.149735044914846, "grad_norm": 0.0, - "learning_rate": 1.9255152714090056e-05, - "loss": 0.9287, + "learning_rate": 1.925756105534524e-05, + "loss": 1.0053, "step": 5284 }, { - "epoch": 0.1499716231555051, + "epoch": 0.14976338235710845, "grad_norm": 0.0, - "learning_rate": 1.925480461068102e-05, - "loss": 0.9946, + "learning_rate": 1.9257213980788325e-05, + "loss": 0.9784, "step": 5285 }, { - "epoch": 0.15, + "epoch": 0.14979171979937092, "grad_norm": 0.0, - "learning_rate": 1.925445642909623e-05, - "loss": 0.9367, + "learning_rate": 1.925686682825446e-05, + "loss": 0.969, "step": 5286 }, { - "epoch": 0.15002837684449488, + "epoch": 0.14982005724163336, "grad_norm": 0.0, - "learning_rate": 1.925410816933863e-05, - "loss": 1.0344, + "learning_rate": 1.9256519597746566e-05, + "loss": 1.0144, "step": 5287 }, { - "epoch": 0.1500567536889898, + "epoch": 0.14984839468389582, "grad_norm": 0.0, - "learning_rate": 1.9253759831411166e-05, - "loss": 1.0266, + "learning_rate": 1.9256172289267573e-05, + "loss": 1.0756, "step": 5288 }, { - "epoch": 0.15008513053348468, + "epoch": 0.1498767321261583, "grad_norm": 0.0, - "learning_rate": 1.9253411415316772e-05, - "loss": 0.9715, + "learning_rate": 1.9255824902820403e-05, + "loss": 1.0112, "step": 5289 }, { - "epoch": 0.15011350737797957, + "epoch": 0.14990506956842076, "grad_norm": 0.0, - "learning_rate": 1.9253062921058402e-05, - "loss": 0.9268, + "learning_rate": 1.9255477438407983e-05, + "loss": 1.1233, "step": 5290 }, { - "epoch": 0.15014188422247446, + "epoch": 0.14993340701068322, "grad_norm": 0.0, - "learning_rate": 1.925271434863899e-05, - "loss": 1.0503, + "learning_rate": 1.9255129896033238e-05, + "loss": 1.0126, "step": 5291 }, { - "epoch": 0.15017026106696935, + "epoch": 0.1499617444529457, "grad_norm": 0.0, - "learning_rate": 1.925236569806148e-05, - "loss": 0.9456, + "learning_rate": 1.92547822756991e-05, + "loss": 0.9232, "step": 5292 }, { - "epoch": 0.15019863791146423, + "epoch": 0.14999008189520813, "grad_norm": 0.0, - "learning_rate": 1.9252016969328826e-05, - "loss": 1.0093, + "learning_rate": 1.9254434577408492e-05, + "loss": 1.0997, "step": 5293 }, { - "epoch": 0.15022701475595915, + "epoch": 0.1500184193374706, "grad_norm": 0.0, - "learning_rate": 1.925166816244397e-05, - "loss": 1.0473, + "learning_rate": 1.9254086801164345e-05, + "loss": 1.0846, "step": 5294 }, { - "epoch": 0.15025539160045404, + "epoch": 0.15004675677973306, "grad_norm": 0.0, - "learning_rate": 1.9251319277409855e-05, - "loss": 1.1249, + "learning_rate": 1.925373894696959e-05, + "loss": 1.0702, "step": 5295 }, { - "epoch": 0.15028376844494892, + "epoch": 0.15007509422199553, "grad_norm": 0.0, - "learning_rate": 1.925097031422943e-05, - "loss": 0.8892, + "learning_rate": 1.9253391014827156e-05, + "loss": 0.9791, "step": 5296 }, { - "epoch": 0.1503121452894438, + "epoch": 0.150103431664258, "grad_norm": 0.0, - "learning_rate": 1.9250621272905643e-05, - "loss": 1.162, + "learning_rate": 1.9253043004739967e-05, + "loss": 0.9451, "step": 5297 }, { - "epoch": 0.1503405221339387, + "epoch": 0.15013176910652046, "grad_norm": 0.0, - "learning_rate": 1.925027215344144e-05, - "loss": 0.8555, + "learning_rate": 1.9252694916710965e-05, + "loss": 0.9556, "step": 5298 }, { - "epoch": 0.15036889897843358, + "epoch": 0.1501601065487829, "grad_norm": 0.0, - "learning_rate": 1.9249922955839774e-05, - "loss": 1.0005, + "learning_rate": 1.925234675074308e-05, + "loss": 1.0275, "step": 5299 }, { - "epoch": 0.1503972758229285, + "epoch": 0.15018844399104536, "grad_norm": 0.0, - "learning_rate": 1.9249573680103596e-05, - "loss": 0.9896, + "learning_rate": 1.925199850683924e-05, + "loss": 1.0872, "step": 5300 }, { - "epoch": 0.1504256526674234, + "epoch": 0.15021678143330783, "grad_norm": 0.0, - "learning_rate": 1.9249224326235852e-05, - "loss": 1.113, + "learning_rate": 1.925165018500238e-05, + "loss": 0.8752, "step": 5301 }, { - "epoch": 0.15045402951191827, + "epoch": 0.1502451188755703, "grad_norm": 0.0, - "learning_rate": 1.924887489423949e-05, - "loss": 1.0276, + "learning_rate": 1.925130178523544e-05, + "loss": 1.0572, "step": 5302 }, { - "epoch": 0.15048240635641316, + "epoch": 0.15027345631783276, "grad_norm": 0.0, - "learning_rate": 1.924852538411747e-05, - "loss": 1.1002, + "learning_rate": 1.9250953307541347e-05, + "loss": 0.9779, "step": 5303 }, { - "epoch": 0.15051078320090805, + "epoch": 0.15030179376009523, "grad_norm": 0.0, - "learning_rate": 1.9248175795872738e-05, - "loss": 1.0797, + "learning_rate": 1.9250604751923035e-05, + "loss": 1.0674, "step": 5304 }, { - "epoch": 0.15053916004540296, + "epoch": 0.15033013120235766, "grad_norm": 0.0, - "learning_rate": 1.9247826129508254e-05, - "loss": 0.9502, + "learning_rate": 1.925025611838345e-05, + "loss": 0.9418, "step": 5305 }, { - "epoch": 0.15056753688989785, + "epoch": 0.15035846864462013, "grad_norm": 0.0, - "learning_rate": 1.924747638502696e-05, - "loss": 0.94, + "learning_rate": 1.924990740692552e-05, + "loss": 0.9993, "step": 5306 }, { - "epoch": 0.15059591373439274, + "epoch": 0.1503868060868826, "grad_norm": 0.0, - "learning_rate": 1.9247126562431824e-05, - "loss": 1.0094, + "learning_rate": 1.9249558617552187e-05, + "loss": 0.9476, "step": 5307 }, { - "epoch": 0.15062429057888763, + "epoch": 0.15041514352914506, "grad_norm": 0.0, - "learning_rate": 1.924677666172579e-05, - "loss": 1.0161, + "learning_rate": 1.9249209750266385e-05, + "loss": 1.1124, "step": 5308 }, { - "epoch": 0.1506526674233825, + "epoch": 0.15044348097140753, "grad_norm": 0.0, - "learning_rate": 1.924642668291182e-05, - "loss": 0.9973, + "learning_rate": 1.9248860805071056e-05, + "loss": 1.0731, "step": 5309 }, { - "epoch": 0.1506810442678774, + "epoch": 0.15047181841367, "grad_norm": 0.0, - "learning_rate": 1.9246076625992865e-05, - "loss": 1.0193, + "learning_rate": 1.9248511781969135e-05, + "loss": 1.0334, "step": 5310 }, { - "epoch": 0.15070942111237232, + "epoch": 0.15050015585593243, "grad_norm": 0.0, - "learning_rate": 1.924572649097189e-05, - "loss": 1.0251, + "learning_rate": 1.924816268096357e-05, + "loss": 1.0759, "step": 5311 }, { - "epoch": 0.1507377979568672, + "epoch": 0.1505284932981949, "grad_norm": 0.0, - "learning_rate": 1.9245376277851846e-05, - "loss": 1.0231, + "learning_rate": 1.924781350205729e-05, + "loss": 1.0407, "step": 5312 }, { - "epoch": 0.1507661748013621, + "epoch": 0.15055683074045736, "grad_norm": 0.0, - "learning_rate": 1.924502598663569e-05, - "loss": 1.0141, + "learning_rate": 1.9247464245253246e-05, + "loss": 1.0833, "step": 5313 }, { - "epoch": 0.15079455164585698, + "epoch": 0.15058516818271983, "grad_norm": 0.0, - "learning_rate": 1.9244675617326388e-05, - "loss": 1.1013, + "learning_rate": 1.9247114910554376e-05, + "loss": 1.0208, "step": 5314 }, { - "epoch": 0.15082292849035187, + "epoch": 0.1506135056249823, "grad_norm": 0.0, - "learning_rate": 1.924432516992689e-05, - "loss": 0.8914, + "learning_rate": 1.9246765497963623e-05, + "loss": 1.0484, "step": 5315 }, { - "epoch": 0.15085130533484675, + "epoch": 0.15064184306724476, "grad_norm": 0.0, - "learning_rate": 1.924397464444017e-05, - "loss": 0.9453, + "learning_rate": 1.9246416007483932e-05, + "loss": 1.0516, "step": 5316 }, { - "epoch": 0.15087968217934167, + "epoch": 0.1506701805095072, "grad_norm": 0.0, - "learning_rate": 1.9243624040869173e-05, - "loss": 1.0838, + "learning_rate": 1.9246066439118247e-05, + "loss": 1.0844, "step": 5317 }, { - "epoch": 0.15090805902383655, + "epoch": 0.15069851795176967, "grad_norm": 0.0, - "learning_rate": 1.9243273359216873e-05, - "loss": 1.0783, + "learning_rate": 1.9245716792869505e-05, + "loss": 1.0308, "step": 5318 }, { - "epoch": 0.15093643586833144, + "epoch": 0.15072685539403213, "grad_norm": 0.0, - "learning_rate": 1.9242922599486225e-05, - "loss": 1.0113, + "learning_rate": 1.9245367068740664e-05, + "loss": 1.0306, "step": 5319 }, { - "epoch": 0.15096481271282633, + "epoch": 0.1507551928362946, "grad_norm": 0.0, - "learning_rate": 1.9242571761680193e-05, - "loss": 0.9737, + "learning_rate": 1.924501726673466e-05, + "loss": 1.0759, "step": 5320 }, { - "epoch": 0.15099318955732122, + "epoch": 0.15078353027855707, "grad_norm": 0.0, - "learning_rate": 1.9242220845801746e-05, - "loss": 1.0413, + "learning_rate": 1.9244667386854443e-05, + "loss": 1.0395, "step": 5321 }, { - "epoch": 0.15102156640181613, + "epoch": 0.15081186772081953, "grad_norm": 0.0, - "learning_rate": 1.924186985185384e-05, - "loss": 1.0203, + "learning_rate": 1.924431742910296e-05, + "loss": 0.9441, "step": 5322 }, { - "epoch": 0.15104994324631102, + "epoch": 0.15084020516308197, "grad_norm": 0.0, - "learning_rate": 1.9241518779839444e-05, - "loss": 1.0415, + "learning_rate": 1.9243967393483156e-05, + "loss": 1.0807, "step": 5323 }, { - "epoch": 0.1510783200908059, + "epoch": 0.15086854260534444, "grad_norm": 0.0, - "learning_rate": 1.9241167629761527e-05, - "loss": 1.0372, + "learning_rate": 1.9243617279997986e-05, + "loss": 1.0075, "step": 5324 }, { - "epoch": 0.1511066969353008, + "epoch": 0.1508968800476069, "grad_norm": 0.0, - "learning_rate": 1.924081640162305e-05, - "loss": 0.9978, + "learning_rate": 1.924326708865039e-05, + "loss": 1.0056, "step": 5325 }, { - "epoch": 0.15113507377979568, + "epoch": 0.15092521748986937, "grad_norm": 0.0, - "learning_rate": 1.924046509542698e-05, - "loss": 1.069, + "learning_rate": 1.9242916819443328e-05, + "loss": 1.062, "step": 5326 }, { - "epoch": 0.15116345062429057, + "epoch": 0.15095355493213183, "grad_norm": 0.0, - "learning_rate": 1.924011371117629e-05, - "loss": 0.9997, + "learning_rate": 1.9242566472379742e-05, + "loss": 0.9969, "step": 5327 }, { - "epoch": 0.15119182746878548, + "epoch": 0.1509818923743943, "grad_norm": 0.0, - "learning_rate": 1.9239762248873938e-05, - "loss": 0.9886, + "learning_rate": 1.9242216047462587e-05, + "loss": 0.9579, "step": 5328 }, { - "epoch": 0.15122020431328037, + "epoch": 0.15101022981665674, "grad_norm": 0.0, - "learning_rate": 1.9239410708522903e-05, - "loss": 1.004, + "learning_rate": 1.9241865544694817e-05, + "loss": 1.1995, "step": 5329 }, { - "epoch": 0.15124858115777526, + "epoch": 0.1510385672589192, "grad_norm": 0.0, - "learning_rate": 1.923905909012615e-05, - "loss": 1.1161, + "learning_rate": 1.924151496407938e-05, + "loss": 0.9552, "step": 5330 }, { - "epoch": 0.15127695800227015, + "epoch": 0.15106690470118167, "grad_norm": 0.0, - "learning_rate": 1.9238707393686648e-05, - "loss": 1.0331, + "learning_rate": 1.9241164305619228e-05, + "loss": 0.9434, "step": 5331 }, { - "epoch": 0.15130533484676503, + "epoch": 0.15109524214344414, "grad_norm": 0.0, - "learning_rate": 1.9238355619207372e-05, - "loss": 0.9795, + "learning_rate": 1.924081356931732e-05, + "loss": 1.1095, "step": 5332 }, { - "epoch": 0.15133371169125992, + "epoch": 0.1511235795857066, "grad_norm": 0.0, - "learning_rate": 1.923800376669129e-05, - "loss": 1.0485, + "learning_rate": 1.9240462755176604e-05, + "loss": 1.0505, "step": 5333 }, { - "epoch": 0.15136208853575484, + "epoch": 0.15115191702796907, "grad_norm": 0.0, - "learning_rate": 1.9237651836141374e-05, - "loss": 1.1508, + "learning_rate": 1.9240111863200047e-05, + "loss": 1.0067, "step": 5334 }, { - "epoch": 0.15139046538024972, + "epoch": 0.1511802544702315, "grad_norm": 0.0, - "learning_rate": 1.9237299827560595e-05, - "loss": 0.9393, + "learning_rate": 1.9239760893390592e-05, + "loss": 1.0935, "step": 5335 }, { - "epoch": 0.1514188422247446, + "epoch": 0.15120859191249397, "grad_norm": 0.0, - "learning_rate": 1.923694774095193e-05, - "loss": 1.0027, + "learning_rate": 1.92394098457512e-05, + "loss": 1.0358, "step": 5336 }, { - "epoch": 0.1514472190692395, + "epoch": 0.15123692935475644, "grad_norm": 0.0, - "learning_rate": 1.923659557631836e-05, - "loss": 1.0564, + "learning_rate": 1.9239058720284823e-05, + "loss": 1.0332, "step": 5337 }, { - "epoch": 0.15147559591373438, + "epoch": 0.1512652667970189, "grad_norm": 0.0, - "learning_rate": 1.9236243333662844e-05, - "loss": 0.9406, + "learning_rate": 1.9238707516994432e-05, + "loss": 1.0549, "step": 5338 }, { - "epoch": 0.15150397275822927, + "epoch": 0.15129360423928137, "grad_norm": 0.0, - "learning_rate": 1.9235891012988365e-05, - "loss": 1.0219, + "learning_rate": 1.923835623588297e-05, + "loss": 0.8948, "step": 5339 }, { - "epoch": 0.1515323496027242, + "epoch": 0.15132194168154384, "grad_norm": 0.0, - "learning_rate": 1.9235538614297905e-05, - "loss": 1.0407, + "learning_rate": 1.9238004876953406e-05, + "loss": 0.9906, "step": 5340 }, { - "epoch": 0.15156072644721907, + "epoch": 0.15135027912380628, "grad_norm": 0.0, - "learning_rate": 1.923518613759443e-05, - "loss": 1.0289, + "learning_rate": 1.9237653440208696e-05, + "loss": 1.1693, "step": 5341 }, { - "epoch": 0.15158910329171396, + "epoch": 0.15137861656606874, "grad_norm": 0.0, - "learning_rate": 1.9234833582880923e-05, - "loss": 1.0149, + "learning_rate": 1.9237301925651803e-05, + "loss": 1.0739, "step": 5342 }, { - "epoch": 0.15161748013620885, + "epoch": 0.1514069540083312, "grad_norm": 0.0, - "learning_rate": 1.9234480950160366e-05, - "loss": 1.1766, + "learning_rate": 1.9236950333285685e-05, + "loss": 1.0728, "step": 5343 }, { - "epoch": 0.15164585698070374, + "epoch": 0.15143529145059367, "grad_norm": 0.0, - "learning_rate": 1.923412823943573e-05, - "loss": 0.8758, + "learning_rate": 1.92365986631133e-05, + "loss": 1.0453, "step": 5344 }, { - "epoch": 0.15167423382519865, + "epoch": 0.15146362889285614, "grad_norm": 0.0, - "learning_rate": 1.9233775450709996e-05, - "loss": 1.1714, + "learning_rate": 1.9236246915137618e-05, + "loss": 0.9763, "step": 5345 }, { - "epoch": 0.15170261066969354, + "epoch": 0.1514919663351186, "grad_norm": 0.0, - "learning_rate": 1.923342258398615e-05, - "loss": 1.0573, + "learning_rate": 1.92358950893616e-05, + "loss": 1.0194, "step": 5346 }, { - "epoch": 0.15173098751418843, + "epoch": 0.15152030377738104, "grad_norm": 0.0, - "learning_rate": 1.9233069639267165e-05, - "loss": 1.0227, + "learning_rate": 1.9235543185788207e-05, + "loss": 1.1258, "step": 5347 }, { - "epoch": 0.1517593643586833, + "epoch": 0.1515486412196435, "grad_norm": 0.0, - "learning_rate": 1.9232716616556028e-05, - "loss": 0.9568, + "learning_rate": 1.92351912044204e-05, + "loss": 1.0688, "step": 5348 }, { - "epoch": 0.1517877412031782, + "epoch": 0.15157697866190598, "grad_norm": 0.0, - "learning_rate": 1.9232363515855717e-05, - "loss": 1.0904, + "learning_rate": 1.9234839145261154e-05, + "loss": 0.9801, "step": 5349 }, { - "epoch": 0.1518161180476731, + "epoch": 0.15160531610416844, "grad_norm": 0.0, - "learning_rate": 1.9232010337169216e-05, - "loss": 1.0116, + "learning_rate": 1.9234487008313426e-05, + "loss": 1.0463, "step": 5350 }, { - "epoch": 0.151844494892168, + "epoch": 0.1516336535464309, "grad_norm": 0.0, - "learning_rate": 1.9231657080499507e-05, - "loss": 1.0298, + "learning_rate": 1.9234134793580183e-05, + "loss": 1.1568, "step": 5351 }, { - "epoch": 0.1518728717366629, + "epoch": 0.15166199098869337, "grad_norm": 0.0, - "learning_rate": 1.9231303745849578e-05, - "loss": 1.1021, + "learning_rate": 1.9233782501064396e-05, + "loss": 0.8671, "step": 5352 }, { - "epoch": 0.15190124858115778, + "epoch": 0.1516903284309558, "grad_norm": 0.0, - "learning_rate": 1.9230950333222415e-05, - "loss": 0.88, + "learning_rate": 1.923343013076903e-05, + "loss": 1.0039, "step": 5353 }, { - "epoch": 0.15192962542565266, + "epoch": 0.15171866587321828, "grad_norm": 0.0, - "learning_rate": 1.9230596842620994e-05, - "loss": 0.9983, + "learning_rate": 1.9233077682697054e-05, + "loss": 0.9788, "step": 5354 }, { - "epoch": 0.15195800227014755, + "epoch": 0.15174700331548074, "grad_norm": 0.0, - "learning_rate": 1.9230243274048306e-05, - "loss": 0.9238, + "learning_rate": 1.923272515685143e-05, + "loss": 1.0721, "step": 5355 }, { - "epoch": 0.15198637911464244, + "epoch": 0.1517753407577432, "grad_norm": 0.0, - "learning_rate": 1.9229889627507344e-05, - "loss": 0.9879, + "learning_rate": 1.923237255323514e-05, + "loss": 0.966, "step": 5356 }, { - "epoch": 0.15201475595913735, + "epoch": 0.15180367820000568, "grad_norm": 0.0, - "learning_rate": 1.9229535903001085e-05, - "loss": 1.0186, + "learning_rate": 1.9232019871851145e-05, + "loss": 1.0082, "step": 5357 }, { - "epoch": 0.15204313280363224, + "epoch": 0.15183201564226814, "grad_norm": 0.0, - "learning_rate": 1.9229182100532524e-05, - "loss": 1.0183, + "learning_rate": 1.923166711270242e-05, + "loss": 1.0035, "step": 5358 }, { - "epoch": 0.15207150964812713, + "epoch": 0.15186035308453058, "grad_norm": 0.0, - "learning_rate": 1.9228828220104647e-05, - "loss": 0.861, + "learning_rate": 1.9231314275791934e-05, + "loss": 1.1139, "step": 5359 }, { - "epoch": 0.15209988649262202, + "epoch": 0.15188869052679305, "grad_norm": 0.0, - "learning_rate": 1.922847426172044e-05, - "loss": 1.1565, + "learning_rate": 1.923096136112266e-05, + "loss": 0.9791, "step": 5360 }, { - "epoch": 0.1521282633371169, + "epoch": 0.1519170279690555, "grad_norm": 0.0, - "learning_rate": 1.9228120225382895e-05, - "loss": 1.0591, + "learning_rate": 1.9230608368697572e-05, + "loss": 1.1144, "step": 5361 }, { - "epoch": 0.15215664018161182, + "epoch": 0.15194536541131798, "grad_norm": 0.0, - "learning_rate": 1.9227766111095008e-05, - "loss": 1.0219, + "learning_rate": 1.923025529851964e-05, + "loss": 1.0527, "step": 5362 }, { - "epoch": 0.1521850170261067, + "epoch": 0.15197370285358044, "grad_norm": 0.0, - "learning_rate": 1.9227411918859764e-05, - "loss": 1.0776, + "learning_rate": 1.922990215059184e-05, + "loss": 1.0193, "step": 5363 }, { - "epoch": 0.1522133938706016, + "epoch": 0.1520020402958429, "grad_norm": 0.0, - "learning_rate": 1.9227057648680155e-05, - "loss": 1.0651, + "learning_rate": 1.9229548924917146e-05, + "loss": 1.0773, "step": 5364 }, { - "epoch": 0.15224177071509648, + "epoch": 0.15203037773810535, "grad_norm": 0.0, - "learning_rate": 1.922670330055918e-05, - "loss": 0.9659, + "learning_rate": 1.9229195621498538e-05, + "loss": 1.0269, "step": 5365 }, { - "epoch": 0.15227014755959137, + "epoch": 0.15205871518036781, "grad_norm": 0.0, - "learning_rate": 1.9226348874499824e-05, - "loss": 1.0714, + "learning_rate": 1.9228842240338985e-05, + "loss": 1.0449, "step": 5366 }, { - "epoch": 0.15229852440408626, + "epoch": 0.15208705262263028, "grad_norm": 0.0, - "learning_rate": 1.9225994370505085e-05, - "loss": 1.1549, + "learning_rate": 1.9228488781441468e-05, + "loss": 0.8657, "step": 5367 }, { - "epoch": 0.15232690124858117, + "epoch": 0.15211539006489275, "grad_norm": 0.0, - "learning_rate": 1.9225639788577958e-05, - "loss": 0.8909, + "learning_rate": 1.9228135244808963e-05, + "loss": 1.026, "step": 5368 }, { - "epoch": 0.15235527809307606, + "epoch": 0.1521437275071552, "grad_norm": 0.0, - "learning_rate": 1.9225285128721435e-05, - "loss": 1.0487, + "learning_rate": 1.9227781630444448e-05, + "loss": 0.9687, "step": 5369 }, { - "epoch": 0.15238365493757094, + "epoch": 0.15217206494941768, "grad_norm": 0.0, - "learning_rate": 1.922493039093851e-05, - "loss": 0.9223, + "learning_rate": 1.92274279383509e-05, + "loss": 0.9401, "step": 5370 }, { - "epoch": 0.15241203178206583, + "epoch": 0.15220040239168012, "grad_norm": 0.0, - "learning_rate": 1.922457557523219e-05, - "loss": 0.9903, + "learning_rate": 1.9227074168531303e-05, + "loss": 0.9589, "step": 5371 }, { - "epoch": 0.15244040862656072, + "epoch": 0.15222873983394258, "grad_norm": 0.0, - "learning_rate": 1.9224220681605464e-05, - "loss": 0.9133, + "learning_rate": 1.922672032098863e-05, + "loss": 1.0332, "step": 5372 }, { - "epoch": 0.1524687854710556, + "epoch": 0.15225707727620505, "grad_norm": 0.0, - "learning_rate": 1.9223865710061328e-05, - "loss": 0.8601, + "learning_rate": 1.9226366395725868e-05, + "loss": 0.9277, "step": 5373 }, { - "epoch": 0.15249716231555052, + "epoch": 0.15228541471846752, "grad_norm": 0.0, - "learning_rate": 1.9223510660602785e-05, - "loss": 0.9028, + "learning_rate": 1.9226012392745994e-05, + "loss": 0.9771, "step": 5374 }, { - "epoch": 0.1525255391600454, + "epoch": 0.15231375216072998, "grad_norm": 0.0, - "learning_rate": 1.9223155533232837e-05, - "loss": 0.9727, + "learning_rate": 1.9225658312051993e-05, + "loss": 1.0256, "step": 5375 }, { - "epoch": 0.1525539160045403, + "epoch": 0.15234208960299245, "grad_norm": 0.0, - "learning_rate": 1.9222800327954476e-05, - "loss": 1.0124, + "learning_rate": 1.9225304153646845e-05, + "loss": 1.0983, "step": 5376 }, { - "epoch": 0.15258229284903518, + "epoch": 0.15237042704525489, "grad_norm": 0.0, - "learning_rate": 1.9222445044770706e-05, - "loss": 0.8979, + "learning_rate": 1.9224949917533536e-05, + "loss": 0.9704, "step": 5377 }, { - "epoch": 0.15261066969353007, + "epoch": 0.15239876448751735, "grad_norm": 0.0, - "learning_rate": 1.9222089683684527e-05, - "loss": 1.0922, + "learning_rate": 1.9224595603715047e-05, + "loss": 1.1756, "step": 5378 }, { - "epoch": 0.15263904653802496, + "epoch": 0.15242710192977982, "grad_norm": 0.0, - "learning_rate": 1.9221734244698944e-05, - "loss": 1.073, + "learning_rate": 1.9224241212194364e-05, + "loss": 1.0408, "step": 5379 }, { - "epoch": 0.15266742338251987, + "epoch": 0.15245543937204228, "grad_norm": 0.0, - "learning_rate": 1.9221378727816958e-05, - "loss": 0.9775, + "learning_rate": 1.9223886742974474e-05, + "loss": 0.9622, "step": 5380 }, { - "epoch": 0.15269580022701476, + "epoch": 0.15248377681430475, "grad_norm": 0.0, - "learning_rate": 1.922102313304157e-05, - "loss": 0.9754, + "learning_rate": 1.922353219605836e-05, + "loss": 1.0819, "step": 5381 }, { - "epoch": 0.15272417707150965, + "epoch": 0.15251211425656722, "grad_norm": 0.0, - "learning_rate": 1.9220667460375787e-05, - "loss": 1.148, + "learning_rate": 1.922317757144901e-05, + "loss": 1.0006, "step": 5382 }, { - "epoch": 0.15275255391600454, + "epoch": 0.15254045169882965, "grad_norm": 0.0, - "learning_rate": 1.922031170982261e-05, - "loss": 1.0168, + "learning_rate": 1.9222822869149406e-05, + "loss": 0.9516, "step": 5383 }, { - "epoch": 0.15278093076049942, + "epoch": 0.15256878914109212, "grad_norm": 0.0, - "learning_rate": 1.9219955881385042e-05, - "loss": 1.209, + "learning_rate": 1.9222468089162544e-05, + "loss": 0.9136, "step": 5384 }, { - "epoch": 0.15280930760499434, + "epoch": 0.1525971265833546, "grad_norm": 0.0, - "learning_rate": 1.9219599975066096e-05, - "loss": 0.9443, + "learning_rate": 1.922211323149141e-05, + "loss": 1.0663, "step": 5385 }, { - "epoch": 0.15283768444948923, + "epoch": 0.15262546402561705, "grad_norm": 0.0, - "learning_rate": 1.9219243990868776e-05, - "loss": 0.9756, + "learning_rate": 1.9221758296138986e-05, + "loss": 1.0076, "step": 5386 }, { - "epoch": 0.1528660612939841, + "epoch": 0.15265380146787952, "grad_norm": 0.0, - "learning_rate": 1.9218887928796083e-05, - "loss": 1.0079, + "learning_rate": 1.922140328310827e-05, + "loss": 1.0428, "step": 5387 }, { - "epoch": 0.152894438138479, + "epoch": 0.15268213891014196, "grad_norm": 0.0, - "learning_rate": 1.9218531788851034e-05, - "loss": 1.0302, + "learning_rate": 1.9221048192402252e-05, + "loss": 1.1583, "step": 5388 }, { - "epoch": 0.1529228149829739, + "epoch": 0.15271047635240442, "grad_norm": 0.0, - "learning_rate": 1.921817557103663e-05, - "loss": 1.0624, + "learning_rate": 1.922069302402392e-05, + "loss": 1.1094, "step": 5389 }, { - "epoch": 0.15295119182746877, + "epoch": 0.1527388137946669, "grad_norm": 0.0, - "learning_rate": 1.9217819275355882e-05, - "loss": 1.0759, + "learning_rate": 1.9220337777976263e-05, + "loss": 1.1432, "step": 5390 }, { - "epoch": 0.1529795686719637, + "epoch": 0.15276715123692935, "grad_norm": 0.0, - "learning_rate": 1.92174629018118e-05, - "loss": 1.0445, + "learning_rate": 1.9219982454262282e-05, + "loss": 1.0802, "step": 5391 }, { - "epoch": 0.15300794551645858, + "epoch": 0.15279548867919182, "grad_norm": 0.0, - "learning_rate": 1.9217106450407397e-05, - "loss": 0.9506, + "learning_rate": 1.9219627052884965e-05, + "loss": 1.0251, "step": 5392 }, { - "epoch": 0.15303632236095346, + "epoch": 0.1528238261214543, "grad_norm": 0.0, - "learning_rate": 1.921674992114568e-05, - "loss": 0.9984, + "learning_rate": 1.92192715738473e-05, + "loss": 1.057, "step": 5393 }, { - "epoch": 0.15306469920544835, + "epoch": 0.15285216356371673, "grad_norm": 0.0, - "learning_rate": 1.921639331402966e-05, - "loss": 1.1188, + "learning_rate": 1.9218916017152292e-05, + "loss": 0.9666, "step": 5394 }, { - "epoch": 0.15309307604994324, + "epoch": 0.1528805010059792, "grad_norm": 0.0, - "learning_rate": 1.9216036629062356e-05, - "loss": 1.0276, + "learning_rate": 1.921856038280293e-05, + "loss": 1.0613, "step": 5395 }, { - "epoch": 0.15312145289443813, + "epoch": 0.15290883844824166, "grad_norm": 0.0, - "learning_rate": 1.9215679866246774e-05, - "loss": 1.0539, + "learning_rate": 1.9218204670802212e-05, + "loss": 1.1712, "step": 5396 }, { - "epoch": 0.15314982973893304, + "epoch": 0.15293717589050412, "grad_norm": 0.0, - "learning_rate": 1.921532302558593e-05, - "loss": 1.1873, + "learning_rate": 1.921784888115313e-05, + "loss": 1.1611, "step": 5397 }, { - "epoch": 0.15317820658342793, + "epoch": 0.1529655133327666, "grad_norm": 0.0, - "learning_rate": 1.9214966107082835e-05, - "loss": 0.9886, + "learning_rate": 1.9217493013858687e-05, + "loss": 1.1115, "step": 5398 }, { - "epoch": 0.15320658342792282, + "epoch": 0.15299385077502906, "grad_norm": 0.0, - "learning_rate": 1.9214609110740514e-05, - "loss": 1.1812, + "learning_rate": 1.9217137068921875e-05, + "loss": 0.8784, "step": 5399 }, { - "epoch": 0.1532349602724177, + "epoch": 0.1530221882172915, "grad_norm": 0.0, - "learning_rate": 1.9214252036561973e-05, - "loss": 1.0403, + "learning_rate": 1.9216781046345696e-05, + "loss": 0.9809, "step": 5400 }, { - "epoch": 0.1532633371169126, + "epoch": 0.15305052565955396, "grad_norm": 0.0, - "learning_rate": 1.9213894884550225e-05, - "loss": 0.9954, + "learning_rate": 1.9216424946133146e-05, + "loss": 0.9957, "step": 5401 }, { - "epoch": 0.1532917139614075, + "epoch": 0.15307886310181643, "grad_norm": 0.0, - "learning_rate": 1.92135376547083e-05, - "loss": 0.956, + "learning_rate": 1.9216068768287228e-05, + "loss": 1.0648, "step": 5402 }, { - "epoch": 0.1533200908059024, + "epoch": 0.1531072005440789, "grad_norm": 0.0, - "learning_rate": 1.9213180347039203e-05, - "loss": 1.1311, + "learning_rate": 1.921571251281094e-05, + "loss": 0.9193, "step": 5403 }, { - "epoch": 0.15334846765039728, + "epoch": 0.15313553798634136, "grad_norm": 0.0, - "learning_rate": 1.921282296154596e-05, - "loss": 1.0209, + "learning_rate": 1.9215356179707285e-05, + "loss": 1.0183, "step": 5404 }, { - "epoch": 0.15337684449489217, + "epoch": 0.15316387542860382, "grad_norm": 0.0, - "learning_rate": 1.9212465498231587e-05, - "loss": 1.102, + "learning_rate": 1.921499976897926e-05, + "loss": 1.0724, "step": 5405 }, { - "epoch": 0.15340522133938705, + "epoch": 0.15319221287086626, "grad_norm": 0.0, - "learning_rate": 1.9212107957099103e-05, - "loss": 1.0712, + "learning_rate": 1.921464328062987e-05, + "loss": 0.9265, "step": 5406 }, { - "epoch": 0.15343359818388194, + "epoch": 0.15322055031312873, "grad_norm": 0.0, - "learning_rate": 1.9211750338151532e-05, - "loss": 0.9357, + "learning_rate": 1.921428671466212e-05, + "loss": 1.0841, "step": 5407 }, { - "epoch": 0.15346197502837686, + "epoch": 0.1532488877553912, "grad_norm": 0.0, - "learning_rate": 1.921139264139189e-05, - "loss": 1.0224, + "learning_rate": 1.921393007107901e-05, + "loss": 1.0184, "step": 5408 }, { - "epoch": 0.15349035187287174, + "epoch": 0.15327722519765366, "grad_norm": 0.0, - "learning_rate": 1.92110348668232e-05, - "loss": 0.9309, + "learning_rate": 1.9213573349883545e-05, + "loss": 0.9654, "step": 5409 }, { - "epoch": 0.15351872871736663, + "epoch": 0.15330556263991613, "grad_norm": 0.0, - "learning_rate": 1.9210677014448484e-05, - "loss": 0.8807, + "learning_rate": 1.9213216551078732e-05, + "loss": 1.1358, "step": 5410 }, { - "epoch": 0.15354710556186152, + "epoch": 0.1533339000821786, "grad_norm": 0.0, - "learning_rate": 1.9210319084270766e-05, - "loss": 0.9591, + "learning_rate": 1.9212859674667575e-05, + "loss": 1.0541, "step": 5411 }, { - "epoch": 0.1535754824063564, + "epoch": 0.15336223752444103, "grad_norm": 0.0, - "learning_rate": 1.9209961076293068e-05, - "loss": 0.9703, + "learning_rate": 1.921250272065308e-05, + "loss": 1.0967, "step": 5412 }, { - "epoch": 0.1536038592508513, + "epoch": 0.1533905749667035, "grad_norm": 0.0, - "learning_rate": 1.9209602990518413e-05, - "loss": 0.9623, + "learning_rate": 1.921214568903825e-05, + "loss": 1.1958, "step": 5413 }, { - "epoch": 0.1536322360953462, + "epoch": 0.15341891240896596, "grad_norm": 0.0, - "learning_rate": 1.9209244826949833e-05, - "loss": 0.9738, + "learning_rate": 1.92117885798261e-05, + "loss": 1.0955, "step": 5414 }, { - "epoch": 0.1536606129398411, + "epoch": 0.15344724985122843, "grad_norm": 0.0, - "learning_rate": 1.9208886585590343e-05, - "loss": 1.1146, + "learning_rate": 1.9211431393019634e-05, + "loss": 1.0726, "step": 5415 }, { - "epoch": 0.15368898978433598, + "epoch": 0.1534755872934909, "grad_norm": 0.0, - "learning_rate": 1.9208528266442976e-05, - "loss": 1.142, + "learning_rate": 1.9211074128621857e-05, + "loss": 0.9403, "step": 5416 }, { - "epoch": 0.15371736662883087, + "epoch": 0.15350392473575336, "grad_norm": 0.0, - "learning_rate": 1.9208169869510754e-05, - "loss": 0.9124, + "learning_rate": 1.9210716786635787e-05, + "loss": 1.1376, "step": 5417 }, { - "epoch": 0.15374574347332576, + "epoch": 0.1535322621780158, "grad_norm": 0.0, - "learning_rate": 1.920781139479671e-05, - "loss": 0.925, + "learning_rate": 1.9210359367064427e-05, + "loss": 1.0538, "step": 5418 }, { - "epoch": 0.15377412031782065, + "epoch": 0.15356059962027827, "grad_norm": 0.0, - "learning_rate": 1.920745284230387e-05, - "loss": 0.9803, + "learning_rate": 1.921000186991079e-05, + "loss": 1.0605, "step": 5419 }, { - "epoch": 0.15380249716231556, + "epoch": 0.15358893706254073, "grad_norm": 0.0, - "learning_rate": 1.920709421203526e-05, - "loss": 1.0526, + "learning_rate": 1.9209644295177884e-05, + "loss": 1.0123, "step": 5420 }, { - "epoch": 0.15383087400681045, + "epoch": 0.1536172745048032, "grad_norm": 0.0, - "learning_rate": 1.9206735503993914e-05, - "loss": 1.0437, + "learning_rate": 1.9209286642868728e-05, + "loss": 1.1016, "step": 5421 }, { - "epoch": 0.15385925085130533, + "epoch": 0.15364561194706566, "grad_norm": 0.0, - "learning_rate": 1.9206376718182855e-05, - "loss": 1.0349, + "learning_rate": 1.9208928912986332e-05, + "loss": 1.0557, "step": 5422 }, { - "epoch": 0.15388762769580022, + "epoch": 0.15367394938932813, "grad_norm": 0.0, - "learning_rate": 1.9206017854605122e-05, - "loss": 1.0032, + "learning_rate": 1.9208571105533703e-05, + "loss": 0.9802, "step": 5423 }, { - "epoch": 0.1539160045402951, + "epoch": 0.15370228683159057, "grad_norm": 0.0, - "learning_rate": 1.920565891326374e-05, - "loss": 0.9675, + "learning_rate": 1.9208213220513866e-05, + "loss": 0.9862, "step": 5424 }, { - "epoch": 0.15394438138479002, + "epoch": 0.15373062427385303, "grad_norm": 0.0, - "learning_rate": 1.9205299894161743e-05, - "loss": 1.0228, + "learning_rate": 1.9207855257929826e-05, + "loss": 0.9855, "step": 5425 }, { - "epoch": 0.1539727582292849, + "epoch": 0.1537589617161155, "grad_norm": 0.0, - "learning_rate": 1.9204940797302165e-05, - "loss": 1.0701, + "learning_rate": 1.9207497217784602e-05, + "loss": 0.9899, "step": 5426 }, { - "epoch": 0.1540011350737798, + "epoch": 0.15378729915837797, "grad_norm": 0.0, - "learning_rate": 1.920458162268804e-05, - "loss": 1.043, + "learning_rate": 1.9207139100081213e-05, + "loss": 1.0414, "step": 5427 }, { - "epoch": 0.1540295119182747, + "epoch": 0.15381563660064043, "grad_norm": 0.0, - "learning_rate": 1.92042223703224e-05, - "loss": 1.0027, + "learning_rate": 1.9206780904822667e-05, + "loss": 0.9465, "step": 5428 }, { - "epoch": 0.15405788876276957, + "epoch": 0.1538439740429029, "grad_norm": 0.0, - "learning_rate": 1.9203863040208274e-05, - "loss": 1.0861, + "learning_rate": 1.920642263201199e-05, + "loss": 1.0266, "step": 5429 }, { - "epoch": 0.15408626560726446, + "epoch": 0.15387231148516534, "grad_norm": 0.0, - "learning_rate": 1.920350363234871e-05, - "loss": 1.0389, + "learning_rate": 1.9206064281652194e-05, + "loss": 1.1734, "step": 5430 }, { - "epoch": 0.15411464245175938, + "epoch": 0.1539006489274278, "grad_norm": 0.0, - "learning_rate": 1.9203144146746736e-05, - "loss": 1.0598, + "learning_rate": 1.9205705853746305e-05, + "loss": 1.0392, "step": 5431 }, { - "epoch": 0.15414301929625426, + "epoch": 0.15392898636969027, "grad_norm": 0.0, - "learning_rate": 1.9202784583405386e-05, - "loss": 0.9086, + "learning_rate": 1.920534734829733e-05, + "loss": 1.133, "step": 5432 }, { - "epoch": 0.15417139614074915, + "epoch": 0.15395732381195273, "grad_norm": 0.0, - "learning_rate": 1.9202424942327705e-05, - "loss": 1.0026, + "learning_rate": 1.9204988765308302e-05, + "loss": 1.0321, "step": 5433 }, { - "epoch": 0.15419977298524404, + "epoch": 0.1539856612542152, "grad_norm": 0.0, - "learning_rate": 1.9202065223516722e-05, - "loss": 0.9148, + "learning_rate": 1.9204630104782232e-05, + "loss": 1.1128, "step": 5434 }, { - "epoch": 0.15422814982973893, + "epoch": 0.15401399869647767, "grad_norm": 0.0, - "learning_rate": 1.9201705426975485e-05, - "loss": 0.9375, + "learning_rate": 1.9204271366722148e-05, + "loss": 1.0178, "step": 5435 }, { - "epoch": 0.1542565266742338, + "epoch": 0.1540423361387401, "grad_norm": 0.0, - "learning_rate": 1.9201345552707028e-05, - "loss": 1.0808, + "learning_rate": 1.9203912551131064e-05, + "loss": 0.9191, "step": 5436 }, { - "epoch": 0.15428490351872873, + "epoch": 0.15407067358100257, "grad_norm": 0.0, - "learning_rate": 1.920098560071439e-05, - "loss": 1.0565, + "learning_rate": 1.920355365801201e-05, + "loss": 1.0386, "step": 5437 }, { - "epoch": 0.15431328036322361, + "epoch": 0.15409901102326504, "grad_norm": 0.0, - "learning_rate": 1.9200625571000616e-05, - "loss": 1.0612, + "learning_rate": 1.9203194687368005e-05, + "loss": 0.9871, "step": 5438 }, { - "epoch": 0.1543416572077185, + "epoch": 0.1541273484655275, "grad_norm": 0.0, - "learning_rate": 1.9200265463568742e-05, - "loss": 0.9051, + "learning_rate": 1.9202835639202075e-05, + "loss": 1.0822, "step": 5439 }, { - "epoch": 0.1543700340522134, + "epoch": 0.15415568590778997, "grad_norm": 0.0, - "learning_rate": 1.919990527842181e-05, - "loss": 1.1047, + "learning_rate": 1.920247651351724e-05, + "loss": 1.0708, "step": 5440 }, { - "epoch": 0.15439841089670828, + "epoch": 0.15418402335005243, "grad_norm": 0.0, - "learning_rate": 1.9199545015562866e-05, - "loss": 0.9906, + "learning_rate": 1.920211731031653e-05, + "loss": 0.9813, "step": 5441 }, { - "epoch": 0.1544267877412032, + "epoch": 0.15421236079231487, "grad_norm": 0.0, - "learning_rate": 1.9199184674994952e-05, - "loss": 0.9565, + "learning_rate": 1.920175802960297e-05, + "loss": 1.0366, "step": 5442 }, { - "epoch": 0.15445516458569808, + "epoch": 0.15424069823457734, "grad_norm": 0.0, - "learning_rate": 1.9198824256721113e-05, - "loss": 1.0612, + "learning_rate": 1.9201398671379585e-05, + "loss": 1.1654, "step": 5443 }, { - "epoch": 0.15448354143019297, + "epoch": 0.1542690356768398, "grad_norm": 0.0, - "learning_rate": 1.919846376074439e-05, - "loss": 1.0489, + "learning_rate": 1.92010392356494e-05, + "loss": 1.0215, "step": 5444 }, { - "epoch": 0.15451191827468785, + "epoch": 0.15429737311910227, "grad_norm": 0.0, - "learning_rate": 1.919810318706783e-05, - "loss": 1.0202, + "learning_rate": 1.9200679722415444e-05, + "loss": 1.0978, "step": 5445 }, { - "epoch": 0.15454029511918274, + "epoch": 0.15432571056136474, "grad_norm": 0.0, - "learning_rate": 1.919774253569448e-05, - "loss": 0.9666, + "learning_rate": 1.9200320131680746e-05, + "loss": 0.9949, "step": 5446 }, { - "epoch": 0.15456867196367763, + "epoch": 0.1543540480036272, "grad_norm": 0.0, - "learning_rate": 1.9197381806627383e-05, - "loss": 1.0822, + "learning_rate": 1.9199960463448337e-05, + "loss": 0.9692, "step": 5447 }, { - "epoch": 0.15459704880817254, + "epoch": 0.15438238544588964, "grad_norm": 0.0, - "learning_rate": 1.9197020999869594e-05, - "loss": 1.0344, + "learning_rate": 1.9199600717721247e-05, + "loss": 0.9975, "step": 5448 }, { - "epoch": 0.15462542565266743, + "epoch": 0.1544107228881521, "grad_norm": 0.0, - "learning_rate": 1.919666011542415e-05, - "loss": 1.0183, + "learning_rate": 1.91992408945025e-05, + "loss": 0.8808, "step": 5449 }, { - "epoch": 0.15465380249716232, + "epoch": 0.15443906033041457, "grad_norm": 0.0, - "learning_rate": 1.9196299153294107e-05, - "loss": 0.9817, + "learning_rate": 1.919888099379513e-05, + "loss": 0.9665, "step": 5450 }, { - "epoch": 0.1546821793416572, + "epoch": 0.15446739777267704, "grad_norm": 0.0, - "learning_rate": 1.919593811348251e-05, - "loss": 1.0802, + "learning_rate": 1.9198521015602174e-05, + "loss": 1.049, "step": 5451 }, { - "epoch": 0.1547105561861521, + "epoch": 0.1544957352149395, "grad_norm": 0.0, - "learning_rate": 1.919557699599241e-05, - "loss": 1.0194, + "learning_rate": 1.9198160959926656e-05, + "loss": 1.006, "step": 5452 }, { - "epoch": 0.15473893303064698, + "epoch": 0.15452407265720197, "grad_norm": 0.0, - "learning_rate": 1.919521580082686e-05, - "loss": 0.9781, + "learning_rate": 1.9197800826771615e-05, + "loss": 1.1211, "step": 5453 }, { - "epoch": 0.1547673098751419, + "epoch": 0.1545524100994644, "grad_norm": 0.0, - "learning_rate": 1.919485452798891e-05, - "loss": 0.9692, + "learning_rate": 1.919744061614008e-05, + "loss": 1.0723, "step": 5454 }, { - "epoch": 0.15479568671963678, + "epoch": 0.15458074754172688, "grad_norm": 0.0, - "learning_rate": 1.9194493177481607e-05, - "loss": 0.9359, + "learning_rate": 1.919708032803509e-05, + "loss": 0.983, "step": 5455 }, { - "epoch": 0.15482406356413167, + "epoch": 0.15460908498398934, "grad_norm": 0.0, - "learning_rate": 1.9194131749308006e-05, - "loss": 1.0516, + "learning_rate": 1.9196719962459673e-05, + "loss": 0.8029, "step": 5456 }, { - "epoch": 0.15485244040862656, + "epoch": 0.1546374224262518, "grad_norm": 0.0, - "learning_rate": 1.9193770243471164e-05, - "loss": 1.0618, + "learning_rate": 1.9196359519416872e-05, + "loss": 1.0929, "step": 5457 }, { - "epoch": 0.15488081725312144, + "epoch": 0.15466575986851427, "grad_norm": 0.0, - "learning_rate": 1.919340865997413e-05, - "loss": 0.8846, + "learning_rate": 1.919599899890972e-05, + "loss": 0.971, "step": 5458 }, { - "epoch": 0.15490919409761633, + "epoch": 0.15469409731077674, "grad_norm": 0.0, - "learning_rate": 1.919304699881996e-05, - "loss": 0.9635, + "learning_rate": 1.9195638400941254e-05, + "loss": 1.0446, "step": 5459 }, { - "epoch": 0.15493757094211125, + "epoch": 0.15472243475303918, "grad_norm": 0.0, - "learning_rate": 1.919268526001171e-05, - "loss": 1.0362, + "learning_rate": 1.919527772551451e-05, + "loss": 1.0303, "step": 5460 }, { - "epoch": 0.15496594778660613, + "epoch": 0.15475077219530164, "grad_norm": 0.0, - "learning_rate": 1.919232344355243e-05, - "loss": 1.0284, + "learning_rate": 1.9194916972632526e-05, + "loss": 1.0448, "step": 5461 }, { - "epoch": 0.15499432463110102, + "epoch": 0.1547791096375641, "grad_norm": 0.0, - "learning_rate": 1.9191961549445186e-05, - "loss": 1.0541, + "learning_rate": 1.919455614229834e-05, + "loss": 1.0995, "step": 5462 }, { - "epoch": 0.1550227014755959, + "epoch": 0.15480744707982658, "grad_norm": 0.0, - "learning_rate": 1.9191599577693026e-05, - "loss": 1.0839, + "learning_rate": 1.9194195234514996e-05, + "loss": 0.9959, "step": 5463 }, { - "epoch": 0.1550510783200908, + "epoch": 0.15483578452208904, "grad_norm": 0.0, - "learning_rate": 1.9191237528299014e-05, - "loss": 1.0335, + "learning_rate": 1.9193834249285532e-05, + "loss": 1.1154, "step": 5464 }, { - "epoch": 0.1550794551645857, + "epoch": 0.1548641219643515, "grad_norm": 0.0, - "learning_rate": 1.9190875401266203e-05, - "loss": 0.9605, + "learning_rate": 1.9193473186612988e-05, + "loss": 1.0529, "step": 5465 }, { - "epoch": 0.1551078320090806, + "epoch": 0.15489245940661395, "grad_norm": 0.0, - "learning_rate": 1.9190513196597656e-05, - "loss": 1.1068, + "learning_rate": 1.9193112046500405e-05, + "loss": 1.0073, "step": 5466 }, { - "epoch": 0.15513620885357549, + "epoch": 0.1549207968488764, "grad_norm": 0.0, - "learning_rate": 1.919015091429643e-05, - "loss": 1.0993, + "learning_rate": 1.9192750828950823e-05, + "loss": 1.165, "step": 5467 }, { - "epoch": 0.15516458569807037, + "epoch": 0.15494913429113888, "grad_norm": 0.0, - "learning_rate": 1.9189788554365586e-05, - "loss": 1.0101, + "learning_rate": 1.9192389533967292e-05, + "loss": 1.0884, "step": 5468 }, { - "epoch": 0.15519296254256526, + "epoch": 0.15497747173340135, "grad_norm": 0.0, - "learning_rate": 1.9189426116808185e-05, - "loss": 1.0135, + "learning_rate": 1.9192028161552848e-05, + "loss": 0.9756, "step": 5469 }, { - "epoch": 0.15522133938706015, + "epoch": 0.1550058091756638, "grad_norm": 0.0, - "learning_rate": 1.918906360162729e-05, - "loss": 1.0291, + "learning_rate": 1.9191666711710538e-05, + "loss": 0.9936, "step": 5470 }, { - "epoch": 0.15524971623155506, + "epoch": 0.15503414661792628, "grad_norm": 0.0, - "learning_rate": 1.9188701008825962e-05, - "loss": 0.9921, + "learning_rate": 1.9191305184443404e-05, + "loss": 1.0948, "step": 5471 }, { - "epoch": 0.15527809307604995, + "epoch": 0.15506248406018872, "grad_norm": 0.0, - "learning_rate": 1.918833833840726e-05, - "loss": 1.0966, + "learning_rate": 1.9190943579754493e-05, + "loss": 0.966, "step": 5472 }, { - "epoch": 0.15530646992054484, + "epoch": 0.15509082150245118, "grad_norm": 0.0, - "learning_rate": 1.9187975590374254e-05, - "loss": 1.043, + "learning_rate": 1.9190581897646852e-05, + "loss": 0.9611, "step": 5473 }, { - "epoch": 0.15533484676503972, + "epoch": 0.15511915894471365, "grad_norm": 0.0, - "learning_rate": 1.9187612764730004e-05, - "loss": 0.9752, + "learning_rate": 1.919022013812353e-05, + "loss": 0.9972, "step": 5474 }, { - "epoch": 0.1553632236095346, + "epoch": 0.1551474963869761, "grad_norm": 0.0, - "learning_rate": 1.9187249861477575e-05, - "loss": 1.038, + "learning_rate": 1.9189858301187568e-05, + "loss": 0.9455, "step": 5475 }, { - "epoch": 0.1553916004540295, + "epoch": 0.15517583382923858, "grad_norm": 0.0, - "learning_rate": 1.9186886880620036e-05, - "loss": 1.1429, + "learning_rate": 1.9189496386842016e-05, + "loss": 1.0348, "step": 5476 }, { - "epoch": 0.15541997729852441, + "epoch": 0.15520417127150105, "grad_norm": 0.0, - "learning_rate": 1.918652382216045e-05, - "loss": 0.9842, + "learning_rate": 1.9189134395089928e-05, + "loss": 1.0724, "step": 5477 }, { - "epoch": 0.1554483541430193, + "epoch": 0.15523250871376348, "grad_norm": 0.0, - "learning_rate": 1.918616068610188e-05, - "loss": 1.0947, + "learning_rate": 1.9188772325934346e-05, + "loss": 1.0491, "step": 5478 }, { - "epoch": 0.1554767309875142, + "epoch": 0.15526084615602595, "grad_norm": 0.0, - "learning_rate": 1.9185797472447402e-05, - "loss": 0.9774, + "learning_rate": 1.9188410179378324e-05, + "loss": 0.9927, "step": 5479 }, { - "epoch": 0.15550510783200908, + "epoch": 0.15528918359828842, "grad_norm": 0.0, - "learning_rate": 1.918543418120008e-05, + "learning_rate": 1.918804795542491e-05, "loss": 0.9594, "step": 5480 }, { - "epoch": 0.15553348467650396, + "epoch": 0.15531752104055088, "grad_norm": 0.0, - "learning_rate": 1.9185070812362982e-05, - "loss": 1.138, + "learning_rate": 1.9187685654077153e-05, + "loss": 1.0494, "step": 5481 }, { - "epoch": 0.15556186152099888, + "epoch": 0.15534585848281335, "grad_norm": 0.0, - "learning_rate": 1.9184707365939177e-05, - "loss": 0.9925, + "learning_rate": 1.918732327533811e-05, + "loss": 1.0279, "step": 5482 }, { - "epoch": 0.15559023836549377, + "epoch": 0.15537419592507581, "grad_norm": 0.0, - "learning_rate": 1.9184343841931735e-05, - "loss": 0.9534, + "learning_rate": 1.918696081921083e-05, + "loss": 1.0206, "step": 5483 }, { - "epoch": 0.15561861520998865, + "epoch": 0.15540253336733825, "grad_norm": 0.0, - "learning_rate": 1.9183980240343732e-05, - "loss": 1.1293, + "learning_rate": 1.9186598285698373e-05, + "loss": 1.0735, "step": 5484 }, { - "epoch": 0.15564699205448354, + "epoch": 0.15543087080960072, "grad_norm": 0.0, - "learning_rate": 1.9183616561178227e-05, - "loss": 0.9725, + "learning_rate": 1.918623567480378e-05, + "loss": 0.9427, "step": 5485 }, { - "epoch": 0.15567536889897843, + "epoch": 0.15545920825186318, "grad_norm": 0.0, - "learning_rate": 1.918325280443831e-05, - "loss": 1.1056, + "learning_rate": 1.9185872986530118e-05, + "loss": 0.9503, "step": 5486 }, { - "epoch": 0.15570374574347332, + "epoch": 0.15548754569412565, "grad_norm": 0.0, - "learning_rate": 1.9182888970127037e-05, - "loss": 0.9981, + "learning_rate": 1.9185510220880438e-05, + "loss": 1.0816, "step": 5487 }, { - "epoch": 0.15573212258796823, + "epoch": 0.15551588313638812, "grad_norm": 0.0, - "learning_rate": 1.918252505824749e-05, - "loss": 1.023, + "learning_rate": 1.9185147377857788e-05, + "loss": 1.0654, "step": 5488 }, { - "epoch": 0.15576049943246312, + "epoch": 0.15554422057865058, "grad_norm": 0.0, - "learning_rate": 1.9182161068802742e-05, - "loss": 0.975, + "learning_rate": 1.9184784457465238e-05, + "loss": 0.9161, "step": 5489 }, { - "epoch": 0.155788876276958, + "epoch": 0.15557255802091302, "grad_norm": 0.0, - "learning_rate": 1.9181797001795864e-05, - "loss": 1.0256, + "learning_rate": 1.9184421459705834e-05, + "loss": 1.076, "step": 5490 }, { - "epoch": 0.1558172531214529, + "epoch": 0.1556008954631755, "grad_norm": 0.0, - "learning_rate": 1.9181432857229936e-05, - "loss": 1.0709, + "learning_rate": 1.9184058384582638e-05, + "loss": 0.9947, "step": 5491 }, { - "epoch": 0.15584562996594778, + "epoch": 0.15562923290543795, "grad_norm": 0.0, - "learning_rate": 1.9181068635108032e-05, - "loss": 1.0067, + "learning_rate": 1.9183695232098707e-05, + "loss": 1.1385, "step": 5492 }, { - "epoch": 0.15587400681044267, + "epoch": 0.15565757034770042, "grad_norm": 0.0, - "learning_rate": 1.9180704335433228e-05, - "loss": 1.0697, + "learning_rate": 1.91833320022571e-05, + "loss": 0.9647, "step": 5493 }, { - "epoch": 0.15590238365493758, + "epoch": 0.15568590778996289, "grad_norm": 0.0, - "learning_rate": 1.9180339958208603e-05, - "loss": 1.0497, + "learning_rate": 1.918296869506088e-05, + "loss": 1.2305, "step": 5494 }, { - "epoch": 0.15593076049943247, + "epoch": 0.15571424523222535, "grad_norm": 0.0, - "learning_rate": 1.9179975503437235e-05, - "loss": 0.9691, + "learning_rate": 1.9182605310513102e-05, + "loss": 1.001, "step": 5495 }, { - "epoch": 0.15595913734392736, + "epoch": 0.1557425826744878, "grad_norm": 0.0, - "learning_rate": 1.91796109711222e-05, - "loss": 1.0974, + "learning_rate": 1.9182241848616834e-05, + "loss": 1.1171, "step": 5496 }, { - "epoch": 0.15598751418842224, + "epoch": 0.15577092011675026, "grad_norm": 0.0, - "learning_rate": 1.9179246361266576e-05, - "loss": 0.9753, + "learning_rate": 1.9181878309375128e-05, + "loss": 1.0679, "step": 5497 }, { - "epoch": 0.15601589103291713, + "epoch": 0.15579925755901272, "grad_norm": 0.0, - "learning_rate": 1.9178881673873448e-05, - "loss": 1.0832, + "learning_rate": 1.9181514692791054e-05, + "loss": 0.9892, "step": 5498 }, { - "epoch": 0.15604426787741202, + "epoch": 0.1558275950012752, "grad_norm": 0.0, - "learning_rate": 1.917851690894589e-05, - "loss": 1.0217, + "learning_rate": 1.9181150998867674e-05, + "loss": 1.0279, "step": 5499 }, { - "epoch": 0.15607264472190693, + "epoch": 0.15585593244353765, "grad_norm": 0.0, - "learning_rate": 1.9178152066486988e-05, - "loss": 0.9703, + "learning_rate": 1.9180787227608045e-05, + "loss": 1.0259, "step": 5500 }, { - "epoch": 0.15610102156640182, + "epoch": 0.15588426988580012, "grad_norm": 0.0, - "learning_rate": 1.9177787146499826e-05, - "loss": 1.0653, + "learning_rate": 1.918042337901524e-05, + "loss": 1.0509, "step": 5501 }, { - "epoch": 0.1561293984108967, + "epoch": 0.15591260732806256, "grad_norm": 0.0, - "learning_rate": 1.9177422148987482e-05, - "loss": 1.0728, + "learning_rate": 1.918005945309232e-05, + "loss": 0.9924, "step": 5502 }, { - "epoch": 0.1561577752553916, + "epoch": 0.15594094477032502, "grad_norm": 0.0, - "learning_rate": 1.917705707395304e-05, - "loss": 1.0308, + "learning_rate": 1.9179695449842347e-05, + "loss": 1.021, "step": 5503 }, { - "epoch": 0.15618615209988648, + "epoch": 0.1559692822125875, "grad_norm": 0.0, - "learning_rate": 1.9176691921399585e-05, - "loss": 1.0987, + "learning_rate": 1.9179331369268393e-05, + "loss": 1.0956, "step": 5504 }, { - "epoch": 0.1562145289443814, + "epoch": 0.15599761965484996, "grad_norm": 0.0, - "learning_rate": 1.91763266913302e-05, - "loss": 0.9064, + "learning_rate": 1.917896721137352e-05, + "loss": 1.0513, "step": 5505 }, { - "epoch": 0.15624290578887629, + "epoch": 0.15602595709711242, "grad_norm": 0.0, - "learning_rate": 1.9175961383747973e-05, - "loss": 1.1, + "learning_rate": 1.91786029761608e-05, + "loss": 1.1511, "step": 5506 }, { - "epoch": 0.15627128263337117, + "epoch": 0.1560542945393749, "grad_norm": 0.0, - "learning_rate": 1.9175595998655988e-05, - "loss": 0.9667, + "learning_rate": 1.91782386636333e-05, + "loss": 1.0559, "step": 5507 }, { - "epoch": 0.15629965947786606, + "epoch": 0.15608263198163733, "grad_norm": 0.0, - "learning_rate": 1.917523053605733e-05, - "loss": 1.1573, + "learning_rate": 1.9177874273794083e-05, + "loss": 1.1598, "step": 5508 }, { - "epoch": 0.15632803632236095, + "epoch": 0.1561109694238998, "grad_norm": 0.0, - "learning_rate": 1.9174864995955085e-05, - "loss": 1.2079, + "learning_rate": 1.9177509806646225e-05, + "loss": 1.0471, "step": 5509 }, { - "epoch": 0.15635641316685583, + "epoch": 0.15613930686616226, "grad_norm": 0.0, - "learning_rate": 1.9174499378352346e-05, - "loss": 1.061, + "learning_rate": 1.9177145262192797e-05, + "loss": 1.0448, "step": 5510 }, { - "epoch": 0.15638479001135075, + "epoch": 0.15616764430842472, "grad_norm": 0.0, - "learning_rate": 1.9174133683252195e-05, - "loss": 0.9802, + "learning_rate": 1.917678064043686e-05, + "loss": 0.9521, "step": 5511 }, { - "epoch": 0.15641316685584564, + "epoch": 0.1561959817506872, "grad_norm": 0.0, - "learning_rate": 1.9173767910657724e-05, - "loss": 0.9934, + "learning_rate": 1.9176415941381497e-05, + "loss": 1.0328, "step": 5512 }, { - "epoch": 0.15644154370034052, + "epoch": 0.15622431919294966, "grad_norm": 0.0, - "learning_rate": 1.9173402060572028e-05, - "loss": 0.9729, + "learning_rate": 1.9176051165029774e-05, + "loss": 1.1244, "step": 5513 }, { - "epoch": 0.1564699205448354, + "epoch": 0.1562526566352121, "grad_norm": 0.0, - "learning_rate": 1.9173036132998192e-05, - "loss": 1.0411, + "learning_rate": 1.9175686311384763e-05, + "loss": 1.082, "step": 5514 }, { - "epoch": 0.1564982973893303, + "epoch": 0.15628099407747456, "grad_norm": 0.0, - "learning_rate": 1.9172670127939304e-05, - "loss": 0.9336, + "learning_rate": 1.917532138044954e-05, + "loss": 1.0008, "step": 5515 }, { - "epoch": 0.15652667423382519, + "epoch": 0.15630933151973703, "grad_norm": 0.0, - "learning_rate": 1.917230404539846e-05, - "loss": 1.005, + "learning_rate": 1.917495637222718e-05, + "loss": 0.999, "step": 5516 }, { - "epoch": 0.1565550510783201, + "epoch": 0.1563376689619995, "grad_norm": 0.0, - "learning_rate": 1.9171937885378752e-05, - "loss": 0.9917, + "learning_rate": 1.9174591286720754e-05, + "loss": 1.0096, "step": 5517 }, { - "epoch": 0.156583427922815, + "epoch": 0.15636600640426196, "grad_norm": 0.0, - "learning_rate": 1.9171571647883272e-05, - "loss": 1.0356, + "learning_rate": 1.9174226123933336e-05, + "loss": 1.0409, "step": 5518 }, { - "epoch": 0.15661180476730988, + "epoch": 0.15639434384652443, "grad_norm": 0.0, - "learning_rate": 1.9171205332915113e-05, - "loss": 0.9081, + "learning_rate": 1.9173860883868008e-05, + "loss": 1.0271, "step": 5519 }, { - "epoch": 0.15664018161180476, + "epoch": 0.15642268128878686, "grad_norm": 0.0, - "learning_rate": 1.917083894047737e-05, - "loss": 0.9413, + "learning_rate": 1.917349556652784e-05, + "loss": 1.025, "step": 5520 }, { - "epoch": 0.15666855845629965, + "epoch": 0.15645101873104933, "grad_norm": 0.0, - "learning_rate": 1.917047247057314e-05, - "loss": 1.0912, + "learning_rate": 1.9173130171915914e-05, + "loss": 0.9559, "step": 5521 }, { - "epoch": 0.15669693530079457, + "epoch": 0.1564793561733118, "grad_norm": 0.0, - "learning_rate": 1.9170105923205517e-05, - "loss": 1.1143, + "learning_rate": 1.9172764700035308e-05, + "loss": 0.9542, "step": 5522 }, { - "epoch": 0.15672531214528945, + "epoch": 0.15650769361557426, "grad_norm": 0.0, - "learning_rate": 1.91697392983776e-05, - "loss": 1.0658, + "learning_rate": 1.9172399150889098e-05, + "loss": 0.9872, "step": 5523 }, { - "epoch": 0.15675368898978434, + "epoch": 0.15653603105783673, "grad_norm": 0.0, - "learning_rate": 1.9169372596092477e-05, - "loss": 1.201, + "learning_rate": 1.9172033524480364e-05, + "loss": 1.0348, "step": 5524 }, { - "epoch": 0.15678206583427923, + "epoch": 0.1565643685000992, "grad_norm": 0.0, - "learning_rate": 1.9169005816353254e-05, - "loss": 0.9645, + "learning_rate": 1.9171667820812183e-05, + "loss": 0.9706, "step": 5525 }, { - "epoch": 0.15681044267877411, + "epoch": 0.15659270594236163, "grad_norm": 0.0, - "learning_rate": 1.916863895916303e-05, - "loss": 0.8652, + "learning_rate": 1.917130203988764e-05, + "loss": 1.0136, "step": 5526 }, { - "epoch": 0.156838819523269, + "epoch": 0.1566210433846241, "grad_norm": 0.0, - "learning_rate": 1.9168272024524895e-05, - "loss": 0.9798, + "learning_rate": 1.9170936181709812e-05, + "loss": 0.9193, "step": 5527 }, { - "epoch": 0.15686719636776392, + "epoch": 0.15664938082688656, "grad_norm": 0.0, - "learning_rate": 1.9167905012441955e-05, - "loss": 1.027, + "learning_rate": 1.9170570246281786e-05, + "loss": 0.9467, "step": 5528 }, { - "epoch": 0.1568955732122588, + "epoch": 0.15667771826914903, "grad_norm": 0.0, - "learning_rate": 1.916753792291731e-05, - "loss": 1.0089, + "learning_rate": 1.917020423360664e-05, + "loss": 1.0235, "step": 5529 }, { - "epoch": 0.1569239500567537, + "epoch": 0.1567060557114115, "grad_norm": 0.0, - "learning_rate": 1.9167170755954062e-05, - "loss": 0.9726, + "learning_rate": 1.9169838143687462e-05, + "loss": 1.1071, "step": 5530 }, { - "epoch": 0.15695232690124858, + "epoch": 0.15673439315367396, "grad_norm": 0.0, - "learning_rate": 1.916680351155531e-05, - "loss": 0.9689, + "learning_rate": 1.9169471976527325e-05, + "loss": 1.0166, "step": 5531 }, { - "epoch": 0.15698070374574347, + "epoch": 0.1567627305959364, "grad_norm": 0.0, - "learning_rate": 1.9166436189724154e-05, - "loss": 1.0567, + "learning_rate": 1.9169105732129326e-05, + "loss": 1.0003, "step": 5532 }, { - "epoch": 0.15700908059023835, + "epoch": 0.15679106803819887, "grad_norm": 0.0, - "learning_rate": 1.91660687904637e-05, - "loss": 0.984, + "learning_rate": 1.9168739410496546e-05, + "loss": 1.011, "step": 5533 }, { - "epoch": 0.15703745743473327, + "epoch": 0.15681940548046133, "grad_norm": 0.0, - "learning_rate": 1.9165701313777055e-05, - "loss": 1.0547, + "learning_rate": 1.9168373011632063e-05, + "loss": 0.9784, "step": 5534 }, { - "epoch": 0.15706583427922816, + "epoch": 0.1568477429227238, "grad_norm": 0.0, - "learning_rate": 1.9165333759667314e-05, - "loss": 0.9479, + "learning_rate": 1.9168006535538973e-05, + "loss": 1.0699, "step": 5535 }, { - "epoch": 0.15709421112372304, + "epoch": 0.15687608036498626, "grad_norm": 0.0, - "learning_rate": 1.916496612813759e-05, - "loss": 1.0095, + "learning_rate": 1.916763998222036e-05, + "loss": 1.073, "step": 5536 }, { - "epoch": 0.15712258796821793, + "epoch": 0.15690441780724873, "grad_norm": 0.0, - "learning_rate": 1.9164598419190982e-05, - "loss": 1.0941, + "learning_rate": 1.9167273351679313e-05, + "loss": 1.0061, "step": 5537 }, { - "epoch": 0.15715096481271282, + "epoch": 0.15693275524951117, "grad_norm": 0.0, - "learning_rate": 1.9164230632830604e-05, - "loss": 1.0235, + "learning_rate": 1.9166906643918913e-05, + "loss": 1.0499, "step": 5538 }, { - "epoch": 0.1571793416572077, + "epoch": 0.15696109269177363, "grad_norm": 0.0, - "learning_rate": 1.9163862769059554e-05, - "loss": 0.9923, + "learning_rate": 1.9166539858942258e-05, + "loss": 1.0955, "step": 5539 }, { - "epoch": 0.15720771850170262, + "epoch": 0.1569894301340361, "grad_norm": 0.0, - "learning_rate": 1.9163494827880943e-05, - "loss": 0.9975, + "learning_rate": 1.9166172996752434e-05, + "loss": 1.0278, "step": 5540 }, { - "epoch": 0.1572360953461975, + "epoch": 0.15701776757629857, "grad_norm": 0.0, - "learning_rate": 1.916312680929788e-05, - "loss": 0.9266, + "learning_rate": 1.9165806057352528e-05, + "loss": 0.9838, "step": 5541 }, { - "epoch": 0.1572644721906924, + "epoch": 0.15704610501856103, "grad_norm": 0.0, - "learning_rate": 1.9162758713313473e-05, - "loss": 1.0372, + "learning_rate": 1.9165439040745637e-05, + "loss": 1.0762, "step": 5542 }, { - "epoch": 0.15729284903518728, + "epoch": 0.1570744424608235, "grad_norm": 0.0, - "learning_rate": 1.916239053993083e-05, - "loss": 1.0197, + "learning_rate": 1.9165071946934847e-05, + "loss": 0.9809, "step": 5543 }, { - "epoch": 0.15732122587968217, + "epoch": 0.15710277990308594, "grad_norm": 0.0, - "learning_rate": 1.9162022289153068e-05, - "loss": 0.9893, + "learning_rate": 1.9164704775923258e-05, + "loss": 1.0854, "step": 5544 }, { - "epoch": 0.15734960272417708, + "epoch": 0.1571311173453484, "grad_norm": 0.0, - "learning_rate": 1.9161653960983285e-05, - "loss": 1.0234, + "learning_rate": 1.916433752771395e-05, + "loss": 0.9601, "step": 5545 }, { - "epoch": 0.15737797956867197, + "epoch": 0.15715945478761087, "grad_norm": 0.0, - "learning_rate": 1.9161285555424604e-05, - "loss": 0.891, + "learning_rate": 1.916397020231003e-05, + "loss": 0.926, "step": 5546 }, { - "epoch": 0.15740635641316686, + "epoch": 0.15718779222987334, "grad_norm": 0.0, - "learning_rate": 1.916091707248013e-05, - "loss": 1.0404, + "learning_rate": 1.9163602799714583e-05, + "loss": 1.0041, "step": 5547 }, { - "epoch": 0.15743473325766175, + "epoch": 0.1572161296721358, "grad_norm": 0.0, - "learning_rate": 1.9160548512152975e-05, - "loss": 1.0096, + "learning_rate": 1.9163235319930706e-05, + "loss": 1.1034, "step": 5548 }, { - "epoch": 0.15746311010215663, + "epoch": 0.15724446711439827, "grad_norm": 0.0, - "learning_rate": 1.916017987444626e-05, - "loss": 1.0446, + "learning_rate": 1.9162867762961497e-05, + "loss": 1.1148, "step": 5549 }, { - "epoch": 0.15749148694665152, + "epoch": 0.1572728045566607, "grad_norm": 0.0, - "learning_rate": 1.9159811159363092e-05, - "loss": 1.0182, + "learning_rate": 1.916250012881005e-05, + "loss": 1.1121, "step": 5550 }, { - "epoch": 0.15751986379114644, + "epoch": 0.15730114199892317, "grad_norm": 0.0, - "learning_rate": 1.9159442366906585e-05, - "loss": 0.9917, + "learning_rate": 1.916213241747946e-05, + "loss": 1.0874, "step": 5551 }, { - "epoch": 0.15754824063564132, + "epoch": 0.15732947944118564, "grad_norm": 0.0, - "learning_rate": 1.9159073497079856e-05, - "loss": 0.8899, + "learning_rate": 1.916176462897283e-05, + "loss": 0.9816, "step": 5552 }, { - "epoch": 0.1575766174801362, + "epoch": 0.1573578168834481, "grad_norm": 0.0, - "learning_rate": 1.9158704549886025e-05, - "loss": 1.0366, + "learning_rate": 1.9161396763293252e-05, + "loss": 0.9885, "step": 5553 }, { - "epoch": 0.1576049943246311, + "epoch": 0.15738615432571057, "grad_norm": 0.0, - "learning_rate": 1.9158335525328206e-05, - "loss": 0.9791, + "learning_rate": 1.916102882044383e-05, + "loss": 0.9675, "step": 5554 }, { - "epoch": 0.15763337116912599, + "epoch": 0.15741449176797304, "grad_norm": 0.0, - "learning_rate": 1.915796642340951e-05, - "loss": 1.0125, + "learning_rate": 1.9160660800427658e-05, + "loss": 0.9852, "step": 5555 }, { - "epoch": 0.15766174801362087, + "epoch": 0.15744282921023547, "grad_norm": 0.0, - "learning_rate": 1.9157597244133065e-05, - "loss": 1.0732, + "learning_rate": 1.9160292703247836e-05, + "loss": 1.0324, "step": 5556 }, { - "epoch": 0.1576901248581158, + "epoch": 0.15747116665249794, "grad_norm": 0.0, - "learning_rate": 1.915722798750198e-05, - "loss": 1.0345, + "learning_rate": 1.915992452890747e-05, + "loss": 1.0515, "step": 5557 }, { - "epoch": 0.15771850170261068, + "epoch": 0.1574995040947604, "grad_norm": 0.0, - "learning_rate": 1.915685865351938e-05, - "loss": 1.1241, + "learning_rate": 1.9159556277409658e-05, + "loss": 0.9077, "step": 5558 }, { - "epoch": 0.15774687854710556, + "epoch": 0.15752784153702287, "grad_norm": 0.0, - "learning_rate": 1.9156489242188385e-05, - "loss": 1.038, + "learning_rate": 1.9159187948757503e-05, + "loss": 0.9853, "step": 5559 }, { - "epoch": 0.15777525539160045, + "epoch": 0.15755617897928534, "grad_norm": 0.0, - "learning_rate": 1.9156119753512114e-05, - "loss": 1.1698, + "learning_rate": 1.9158819542954105e-05, + "loss": 1.0255, "step": 5560 }, { - "epoch": 0.15780363223609534, + "epoch": 0.1575845164215478, "grad_norm": 0.0, - "learning_rate": 1.9155750187493684e-05, - "loss": 0.9265, + "learning_rate": 1.9158451060002566e-05, + "loss": 0.853, "step": 5561 }, { - "epoch": 0.15783200908059025, + "epoch": 0.15761285386381024, "grad_norm": 0.0, - "learning_rate": 1.9155380544136223e-05, - "loss": 0.98, + "learning_rate": 1.9158082499906e-05, + "loss": 0.9817, "step": 5562 }, { - "epoch": 0.15786038592508514, + "epoch": 0.1576411913060727, "grad_norm": 0.0, - "learning_rate": 1.9155010823442852e-05, - "loss": 0.9771, + "learning_rate": 1.91577138626675e-05, + "loss": 1.026, "step": 5563 }, { - "epoch": 0.15788876276958003, + "epoch": 0.15766952874833517, "grad_norm": 0.0, - "learning_rate": 1.9154641025416694e-05, - "loss": 0.9859, + "learning_rate": 1.9157345148290173e-05, + "loss": 1.0953, "step": 5564 }, { - "epoch": 0.15791713961407491, + "epoch": 0.15769786619059764, "grad_norm": 0.0, - "learning_rate": 1.915427115006087e-05, - "loss": 0.9267, + "learning_rate": 1.9156976356777132e-05, + "loss": 0.9912, "step": 5565 }, { - "epoch": 0.1579455164585698, + "epoch": 0.1577262036328601, "grad_norm": 0.0, - "learning_rate": 1.9153901197378507e-05, - "loss": 1.0977, + "learning_rate": 1.915660748813148e-05, + "loss": 0.9261, "step": 5566 }, { - "epoch": 0.1579738933030647, + "epoch": 0.15775454107512257, "grad_norm": 0.0, - "learning_rate": 1.9153531167372725e-05, - "loss": 1.0752, + "learning_rate": 1.915623854235632e-05, + "loss": 1.1058, "step": 5567 }, { - "epoch": 0.1580022701475596, + "epoch": 0.157782878517385, "grad_norm": 0.0, - "learning_rate": 1.915316106004666e-05, - "loss": 0.9445, + "learning_rate": 1.9155869519454762e-05, + "loss": 1.0601, "step": 5568 }, { - "epoch": 0.1580306469920545, + "epoch": 0.15781121595964748, "grad_norm": 0.0, - "learning_rate": 1.9152790875403428e-05, - "loss": 1.1137, + "learning_rate": 1.9155500419429916e-05, + "loss": 1.0006, "step": 5569 }, { - "epoch": 0.15805902383654938, + "epoch": 0.15783955340190994, "grad_norm": 0.0, - "learning_rate": 1.915242061344616e-05, - "loss": 1.0955, + "learning_rate": 1.9155131242284888e-05, + "loss": 1.0842, "step": 5570 }, { - "epoch": 0.15808740068104427, + "epoch": 0.1578678908441724, "grad_norm": 0.0, - "learning_rate": 1.9152050274177988e-05, - "loss": 1.0126, + "learning_rate": 1.9154761988022793e-05, + "loss": 1.1093, "step": 5571 }, { - "epoch": 0.15811577752553915, + "epoch": 0.15789622828643488, "grad_norm": 0.0, - "learning_rate": 1.915167985760203e-05, - "loss": 1.0206, + "learning_rate": 1.9154392656646736e-05, + "loss": 1.0647, "step": 5572 }, { - "epoch": 0.15814415437003404, + "epoch": 0.15792456572869734, "grad_norm": 0.0, - "learning_rate": 1.915130936372142e-05, - "loss": 0.9755, + "learning_rate": 1.9154023248159833e-05, + "loss": 1.0285, "step": 5573 }, { - "epoch": 0.15817253121452896, + "epoch": 0.15795290317095978, "grad_norm": 0.0, - "learning_rate": 1.9150938792539297e-05, - "loss": 1.0434, + "learning_rate": 1.915365376256519e-05, + "loss": 1.1139, "step": 5574 }, { - "epoch": 0.15820090805902384, + "epoch": 0.15798124061322225, "grad_norm": 0.0, - "learning_rate": 1.9150568144058774e-05, - "loss": 0.9305, + "learning_rate": 1.9153284199865926e-05, + "loss": 1.0099, "step": 5575 }, { - "epoch": 0.15822928490351873, + "epoch": 0.1580095780554847, "grad_norm": 0.0, - "learning_rate": 1.9150197418282993e-05, - "loss": 1.1519, + "learning_rate": 1.9152914560065146e-05, + "loss": 1.0744, "step": 5576 }, { - "epoch": 0.15825766174801362, + "epoch": 0.15803791549774718, "grad_norm": 0.0, - "learning_rate": 1.9149826615215085e-05, - "loss": 1.0011, + "learning_rate": 1.9152544843165968e-05, + "loss": 1.0511, "step": 5577 }, { - "epoch": 0.1582860385925085, + "epoch": 0.15806625294000964, "grad_norm": 0.0, - "learning_rate": 1.914945573485818e-05, - "loss": 0.9897, + "learning_rate": 1.9152175049171507e-05, + "loss": 0.9361, "step": 5578 }, { - "epoch": 0.1583144154370034, + "epoch": 0.1580945903822721, "grad_norm": 0.0, - "learning_rate": 1.9149084777215412e-05, - "loss": 0.8718, + "learning_rate": 1.9151805178084878e-05, + "loss": 0.9895, "step": 5579 }, { - "epoch": 0.1583427922814983, + "epoch": 0.15812292782453455, "grad_norm": 0.0, - "learning_rate": 1.9148713742289914e-05, - "loss": 1.0128, + "learning_rate": 1.9151435229909197e-05, + "loss": 0.987, "step": 5580 }, { - "epoch": 0.1583711691259932, + "epoch": 0.15815126526679701, "grad_norm": 0.0, - "learning_rate": 1.9148342630084817e-05, - "loss": 0.9705, + "learning_rate": 1.9151065204647576e-05, + "loss": 1.1356, "step": 5581 }, { - "epoch": 0.15839954597048808, + "epoch": 0.15817960270905948, "grad_norm": 0.0, - "learning_rate": 1.9147971440603257e-05, - "loss": 0.9387, + "learning_rate": 1.9150695102303138e-05, + "loss": 0.908, "step": 5582 }, { - "epoch": 0.15842792281498297, + "epoch": 0.15820794015132195, "grad_norm": 0.0, - "learning_rate": 1.9147600173848375e-05, - "loss": 1.0447, + "learning_rate": 1.9150324922878992e-05, + "loss": 1.1281, "step": 5583 }, { - "epoch": 0.15845629965947786, + "epoch": 0.1582362775935844, "grad_norm": 0.0, - "learning_rate": 1.91472288298233e-05, - "loss": 0.9703, + "learning_rate": 1.9149954666378264e-05, + "loss": 1.0842, "step": 5584 }, { - "epoch": 0.15848467650397277, + "epoch": 0.15826461503584685, "grad_norm": 0.0, - "learning_rate": 1.9146857408531174e-05, - "loss": 1.0214, + "learning_rate": 1.914958433280407e-05, + "loss": 1.0615, "step": 5585 }, { - "epoch": 0.15851305334846766, + "epoch": 0.15829295247810932, "grad_norm": 0.0, - "learning_rate": 1.914648590997513e-05, - "loss": 0.9598, + "learning_rate": 1.914921392215953e-05, + "loss": 1.0398, "step": 5586 }, { - "epoch": 0.15854143019296255, + "epoch": 0.15832128992037178, "grad_norm": 0.0, - "learning_rate": 1.9146114334158314e-05, - "loss": 1.0342, + "learning_rate": 1.9148843434447762e-05, + "loss": 0.95, "step": 5587 }, { - "epoch": 0.15856980703745743, + "epoch": 0.15834962736263425, "grad_norm": 0.0, - "learning_rate": 1.914574268108385e-05, - "loss": 1.0658, + "learning_rate": 1.914847286967189e-05, + "loss": 1.0889, "step": 5588 }, { - "epoch": 0.15859818388195232, + "epoch": 0.15837796480489671, "grad_norm": 0.0, - "learning_rate": 1.9145370950754896e-05, - "loss": 1.0171, + "learning_rate": 1.9148102227835033e-05, + "loss": 1.0088, "step": 5589 }, { - "epoch": 0.1586265607264472, + "epoch": 0.15840630224715918, "grad_norm": 0.0, - "learning_rate": 1.9144999143174576e-05, - "loss": 1.0626, + "learning_rate": 1.9147731508940313e-05, + "loss": 1.0147, "step": 5590 }, { - "epoch": 0.15865493757094212, + "epoch": 0.15843463968942162, "grad_norm": 0.0, - "learning_rate": 1.9144627258346042e-05, - "loss": 1.0273, + "learning_rate": 1.9147360712990857e-05, + "loss": 1.1193, "step": 5591 }, { - "epoch": 0.158683314415437, + "epoch": 0.15846297713168409, "grad_norm": 0.0, - "learning_rate": 1.9144255296272426e-05, - "loss": 1.0193, + "learning_rate": 1.9146989839989785e-05, + "loss": 1.0368, "step": 5592 }, { - "epoch": 0.1587116912599319, + "epoch": 0.15849131457394655, "grad_norm": 0.0, - "learning_rate": 1.9143883256956875e-05, - "loss": 1.0134, + "learning_rate": 1.9146618889940218e-05, + "loss": 0.9675, "step": 5593 }, { - "epoch": 0.15874006810442678, + "epoch": 0.15851965201620902, "grad_norm": 0.0, - "learning_rate": 1.9143511140402532e-05, - "loss": 1.1464, + "learning_rate": 1.9146247862845282e-05, + "loss": 1.0591, "step": 5594 }, { - "epoch": 0.15876844494892167, + "epoch": 0.15854798945847148, "grad_norm": 0.0, - "learning_rate": 1.914313894661254e-05, - "loss": 1.0806, + "learning_rate": 1.9145876758708106e-05, + "loss": 0.9498, "step": 5595 }, { - "epoch": 0.15879682179341656, + "epoch": 0.15857632690073395, "grad_norm": 0.0, - "learning_rate": 1.9142766675590043e-05, - "loss": 0.8908, + "learning_rate": 1.9145505577531816e-05, + "loss": 0.915, "step": 5596 }, { - "epoch": 0.15882519863791147, + "epoch": 0.1586046643429964, "grad_norm": 0.0, - "learning_rate": 1.914239432733818e-05, - "loss": 0.9206, + "learning_rate": 1.9145134319319533e-05, + "loss": 1.0729, "step": 5597 }, { - "epoch": 0.15885357548240636, + "epoch": 0.15863300178525885, "grad_norm": 0.0, - "learning_rate": 1.9142021901860107e-05, - "loss": 1.031, + "learning_rate": 1.914476298407439e-05, + "loss": 1.0662, "step": 5598 }, { - "epoch": 0.15888195232690125, + "epoch": 0.15866133922752132, "grad_norm": 0.0, - "learning_rate": 1.914164939915896e-05, - "loss": 1.0123, + "learning_rate": 1.9144391571799513e-05, + "loss": 0.9565, "step": 5599 }, { - "epoch": 0.15891032917139614, + "epoch": 0.15868967666978379, "grad_norm": 0.0, - "learning_rate": 1.9141276819237893e-05, - "loss": 0.9437, + "learning_rate": 1.9144020082498027e-05, + "loss": 0.963, "step": 5600 }, { - "epoch": 0.15893870601589102, + "epoch": 0.15871801411204625, "grad_norm": 0.0, - "learning_rate": 1.914090416210005e-05, - "loss": 1.082, + "learning_rate": 1.9143648516173064e-05, + "loss": 1.0202, "step": 5601 }, { - "epoch": 0.15896708286038594, + "epoch": 0.15874635155430872, "grad_norm": 0.0, - "learning_rate": 1.9140531427748574e-05, - "loss": 1.12, + "learning_rate": 1.9143276872827758e-05, + "loss": 0.9678, "step": 5602 }, { - "epoch": 0.15899545970488083, + "epoch": 0.15877468899657116, "grad_norm": 0.0, - "learning_rate": 1.9140158616186625e-05, - "loss": 1.0705, + "learning_rate": 1.9142905152465236e-05, + "loss": 1.0597, "step": 5603 }, { - "epoch": 0.1590238365493757, + "epoch": 0.15880302643883362, "grad_norm": 0.0, - "learning_rate": 1.9139785727417338e-05, - "loss": 1.0289, + "learning_rate": 1.9142533355088628e-05, + "loss": 1.0564, "step": 5604 }, { - "epoch": 0.1590522133938706, + "epoch": 0.1588313638810961, "grad_norm": 0.0, - "learning_rate": 1.9139412761443874e-05, - "loss": 0.9597, + "learning_rate": 1.914216148070106e-05, + "loss": 1.0712, "step": 5605 }, { - "epoch": 0.1590805902383655, + "epoch": 0.15885970132335855, "grad_norm": 0.0, - "learning_rate": 1.9139039718269378e-05, - "loss": 0.9722, + "learning_rate": 1.9141789529305678e-05, + "loss": 0.9436, "step": 5606 }, { - "epoch": 0.15910896708286038, + "epoch": 0.15888803876562102, "grad_norm": 0.0, - "learning_rate": 1.9138666597897004e-05, - "loss": 1.0728, + "learning_rate": 1.9141417500905604e-05, + "loss": 1.0045, "step": 5607 }, { - "epoch": 0.1591373439273553, + "epoch": 0.1589163762078835, "grad_norm": 0.0, - "learning_rate": 1.9138293400329902e-05, - "loss": 1.0102, + "learning_rate": 1.9141045395503978e-05, + "loss": 1.0218, "step": 5608 }, { - "epoch": 0.15916572077185018, + "epoch": 0.15894471365014592, "grad_norm": 0.0, - "learning_rate": 1.9137920125571225e-05, - "loss": 0.9041, + "learning_rate": 1.9140673213103932e-05, + "loss": 0.9695, "step": 5609 }, { - "epoch": 0.15919409761634506, + "epoch": 0.1589730510924084, "grad_norm": 0.0, - "learning_rate": 1.9137546773624126e-05, - "loss": 1.0009, + "learning_rate": 1.9140300953708602e-05, + "loss": 1.0079, "step": 5610 }, { - "epoch": 0.15922247446083995, + "epoch": 0.15900138853467086, "grad_norm": 0.0, - "learning_rate": 1.913717334449176e-05, - "loss": 1.0451, + "learning_rate": 1.9139928617321125e-05, + "loss": 1.0396, "step": 5611 }, { - "epoch": 0.15925085130533484, + "epoch": 0.15902972597693332, "grad_norm": 0.0, - "learning_rate": 1.913679983817728e-05, - "loss": 0.9232, + "learning_rate": 1.913955620394463e-05, + "loss": 1.0216, "step": 5612 }, { - "epoch": 0.15927922814982973, + "epoch": 0.1590580634191958, "grad_norm": 0.0, - "learning_rate": 1.9136426254683837e-05, - "loss": 0.9788, + "learning_rate": 1.9139183713582264e-05, + "loss": 0.9861, "step": 5613 }, { - "epoch": 0.15930760499432464, + "epoch": 0.15908640086145825, "grad_norm": 0.0, - "learning_rate": 1.9136052594014594e-05, - "loss": 1.0347, + "learning_rate": 1.9138811146237156e-05, + "loss": 1.0739, "step": 5614 }, { - "epoch": 0.15933598183881953, + "epoch": 0.1591147383037207, "grad_norm": 0.0, - "learning_rate": 1.9135678856172704e-05, - "loss": 0.994, + "learning_rate": 1.913843850191245e-05, + "loss": 1.0687, "step": 5615 }, { - "epoch": 0.15936435868331442, + "epoch": 0.15914307574598316, "grad_norm": 0.0, - "learning_rate": 1.9135305041161322e-05, - "loss": 0.9903, + "learning_rate": 1.9138065780611283e-05, + "loss": 1.0704, "step": 5616 }, { - "epoch": 0.1593927355278093, + "epoch": 0.15917141318824563, "grad_norm": 0.0, - "learning_rate": 1.913493114898361e-05, - "loss": 1.0737, + "learning_rate": 1.9137692982336794e-05, + "loss": 0.955, "step": 5617 }, { - "epoch": 0.1594211123723042, + "epoch": 0.1591997506305081, "grad_norm": 0.0, - "learning_rate": 1.9134557179642722e-05, - "loss": 1.0553, + "learning_rate": 1.9137320107092122e-05, + "loss": 1.0774, "step": 5618 }, { - "epoch": 0.15944948921679908, + "epoch": 0.15922808807277056, "grad_norm": 0.0, - "learning_rate": 1.913418313314182e-05, - "loss": 1.0131, + "learning_rate": 1.9136947154880413e-05, + "loss": 0.9783, "step": 5619 }, { - "epoch": 0.159477866061294, + "epoch": 0.15925642551503302, "grad_norm": 0.0, - "learning_rate": 1.9133809009484063e-05, - "loss": 1.0977, + "learning_rate": 1.9136574125704807e-05, + "loss": 1.013, "step": 5620 }, { - "epoch": 0.15950624290578888, + "epoch": 0.15928476295729546, "grad_norm": 0.0, - "learning_rate": 1.913343480867261e-05, - "loss": 0.8984, + "learning_rate": 1.9136201019568437e-05, + "loss": 1.0444, "step": 5621 }, { - "epoch": 0.15953461975028377, + "epoch": 0.15931310039955793, "grad_norm": 0.0, - "learning_rate": 1.913306053071062e-05, - "loss": 1.0982, + "learning_rate": 1.9135827836474463e-05, + "loss": 1.109, "step": 5622 }, { - "epoch": 0.15956299659477866, + "epoch": 0.1593414378418204, "grad_norm": 0.0, - "learning_rate": 1.913268617560126e-05, - "loss": 0.929, + "learning_rate": 1.913545457642601e-05, + "loss": 1.0259, "step": 5623 }, { - "epoch": 0.15959137343927354, + "epoch": 0.15936977528408286, "grad_norm": 0.0, - "learning_rate": 1.913231174334769e-05, - "loss": 0.9746, + "learning_rate": 1.9135081239426233e-05, + "loss": 0.8654, "step": 5624 }, { - "epoch": 0.15961975028376846, + "epoch": 0.15939811272634533, "grad_norm": 0.0, - "learning_rate": 1.9131937233953072e-05, - "loss": 1.0604, + "learning_rate": 1.9134707825478277e-05, + "loss": 1.0417, "step": 5625 }, { - "epoch": 0.15964812712826335, + "epoch": 0.1594264501686078, "grad_norm": 0.0, - "learning_rate": 1.9131562647420568e-05, - "loss": 0.9603, + "learning_rate": 1.9134334334585282e-05, + "loss": 0.967, "step": 5626 }, { - "epoch": 0.15967650397275823, + "epoch": 0.15945478761087023, "grad_norm": 0.0, - "learning_rate": 1.9131187983753344e-05, - "loss": 1.0482, + "learning_rate": 1.91339607667504e-05, + "loss": 1.1558, "step": 5627 }, { - "epoch": 0.15970488081725312, + "epoch": 0.1594831250531327, "grad_norm": 0.0, - "learning_rate": 1.9130813242954564e-05, - "loss": 0.9981, + "learning_rate": 1.913358712197677e-05, + "loss": 1.0399, "step": 5628 }, { - "epoch": 0.159733257661748, + "epoch": 0.15951146249539516, "grad_norm": 0.0, - "learning_rate": 1.9130438425027397e-05, - "loss": 1.0447, + "learning_rate": 1.9133213400267548e-05, + "loss": 0.9448, "step": 5629 }, { - "epoch": 0.1597616345062429, + "epoch": 0.15953979993765763, "grad_norm": 0.0, - "learning_rate": 1.9130063529975004e-05, - "loss": 0.909, + "learning_rate": 1.9132839601625877e-05, + "loss": 1.1023, "step": 5630 }, { - "epoch": 0.1597900113507378, + "epoch": 0.1595681373799201, "grad_norm": 0.0, - "learning_rate": 1.9129688557800555e-05, - "loss": 0.9181, + "learning_rate": 1.913246572605491e-05, + "loss": 1.0424, "step": 5631 }, { - "epoch": 0.1598183881952327, + "epoch": 0.15959647482218256, "grad_norm": 0.0, - "learning_rate": 1.9129313508507213e-05, - "loss": 0.9089, + "learning_rate": 1.9132091773557787e-05, + "loss": 1.0316, "step": 5632 }, { - "epoch": 0.15984676503972758, + "epoch": 0.159624812264445, "grad_norm": 0.0, - "learning_rate": 1.9128938382098155e-05, - "loss": 1.0451, + "learning_rate": 1.9131717744137667e-05, + "loss": 1.0589, "step": 5633 }, { - "epoch": 0.15987514188422247, + "epoch": 0.15965314970670746, "grad_norm": 0.0, - "learning_rate": 1.912856317857654e-05, - "loss": 1.0278, + "learning_rate": 1.9131343637797695e-05, + "loss": 1.0898, "step": 5634 }, { - "epoch": 0.15990351872871736, + "epoch": 0.15968148714896993, "grad_norm": 0.0, - "learning_rate": 1.9128187897945545e-05, - "loss": 0.9689, + "learning_rate": 1.9130969454541026e-05, + "loss": 1.1714, "step": 5635 }, { - "epoch": 0.15993189557321225, + "epoch": 0.1597098245912324, "grad_norm": 0.0, - "learning_rate": 1.9127812540208333e-05, - "loss": 1.1801, + "learning_rate": 1.913059519437081e-05, + "loss": 1.0185, "step": 5636 }, { - "epoch": 0.15996027241770716, + "epoch": 0.15973816203349486, "grad_norm": 0.0, - "learning_rate": 1.912743710536808e-05, - "loss": 1.0443, + "learning_rate": 1.91302208572902e-05, + "loss": 1.0181, "step": 5637 }, { - "epoch": 0.15998864926220205, + "epoch": 0.15976649947575733, "grad_norm": 0.0, - "learning_rate": 1.9127061593427954e-05, - "loss": 0.9183, + "learning_rate": 1.9129846443302354e-05, + "loss": 1.0666, "step": 5638 }, { - "epoch": 0.16001702610669694, + "epoch": 0.15979483691801977, "grad_norm": 0.0, - "learning_rate": 1.912668600439113e-05, - "loss": 0.9731, + "learning_rate": 1.9129471952410417e-05, + "loss": 0.9843, "step": 5639 }, { - "epoch": 0.16004540295119182, + "epoch": 0.15982317436028223, "grad_norm": 0.0, - "learning_rate": 1.912631033826078e-05, - "loss": 1.0636, + "learning_rate": 1.912909738461755e-05, + "loss": 1.0715, "step": 5640 }, { - "epoch": 0.1600737797956867, + "epoch": 0.1598515118025447, "grad_norm": 0.0, - "learning_rate": 1.9125934595040072e-05, - "loss": 1.0396, + "learning_rate": 1.91287227399269e-05, + "loss": 1.1478, "step": 5641 }, { - "epoch": 0.16010215664018163, + "epoch": 0.15987984924480717, "grad_norm": 0.0, - "learning_rate": 1.912555877473219e-05, - "loss": 0.9003, + "learning_rate": 1.9128348018341634e-05, + "loss": 1.1121, "step": 5642 }, { - "epoch": 0.1601305334846765, + "epoch": 0.15990818668706963, "grad_norm": 0.0, - "learning_rate": 1.91251828773403e-05, - "loss": 1.0492, + "learning_rate": 1.91279732198649e-05, + "loss": 0.9633, "step": 5643 }, { - "epoch": 0.1601589103291714, + "epoch": 0.1599365241293321, "grad_norm": 0.0, - "learning_rate": 1.9124806902867577e-05, - "loss": 0.9931, + "learning_rate": 1.912759834449986e-05, + "loss": 1.0357, "step": 5644 }, { - "epoch": 0.1601872871736663, + "epoch": 0.15996486157159454, "grad_norm": 0.0, - "learning_rate": 1.9124430851317204e-05, - "loss": 1.0289, + "learning_rate": 1.912722339224967e-05, + "loss": 1.0605, "step": 5645 }, { - "epoch": 0.16021566401816117, + "epoch": 0.159993199013857, "grad_norm": 0.0, - "learning_rate": 1.9124054722692352e-05, - "loss": 1.0815, + "learning_rate": 1.9126848363117487e-05, + "loss": 0.9487, "step": 5646 }, { - "epoch": 0.16024404086265606, + "epoch": 0.16002153645611947, "grad_norm": 0.0, - "learning_rate": 1.91236785169962e-05, - "loss": 1.0264, + "learning_rate": 1.9126473257106473e-05, + "loss": 0.9675, "step": 5647 }, { - "epoch": 0.16027241770715098, + "epoch": 0.16004987389838193, "grad_norm": 0.0, - "learning_rate": 1.9123302234231924e-05, - "loss": 0.9789, + "learning_rate": 1.9126098074219782e-05, + "loss": 1.0866, "step": 5648 }, { - "epoch": 0.16030079455164586, + "epoch": 0.1600782113406444, "grad_norm": 0.0, - "learning_rate": 1.9122925874402707e-05, - "loss": 0.9834, + "learning_rate": 1.9125722814460582e-05, + "loss": 1.0653, "step": 5649 }, { - "epoch": 0.16032917139614075, + "epoch": 0.16010654878290687, "grad_norm": 0.0, - "learning_rate": 1.912254943751172e-05, - "loss": 1.0338, + "learning_rate": 1.9125347477832024e-05, + "loss": 0.9451, "step": 5650 }, { - "epoch": 0.16035754824063564, + "epoch": 0.1601348862251693, "grad_norm": 0.0, - "learning_rate": 1.9122172923562152e-05, - "loss": 1.0242, + "learning_rate": 1.9124972064337283e-05, + "loss": 1.0361, "step": 5651 }, { - "epoch": 0.16038592508513053, + "epoch": 0.16016322366743177, "grad_norm": 0.0, - "learning_rate": 1.912179633255718e-05, - "loss": 0.8877, + "learning_rate": 1.912459657397951e-05, + "loss": 1.0349, "step": 5652 }, { - "epoch": 0.1604143019296254, + "epoch": 0.16019156110969424, "grad_norm": 0.0, - "learning_rate": 1.912141966449998e-05, - "loss": 1.0226, + "learning_rate": 1.912422100676187e-05, + "loss": 0.9184, "step": 5653 }, { - "epoch": 0.16044267877412033, + "epoch": 0.1602198985519567, "grad_norm": 0.0, - "learning_rate": 1.9121042919393742e-05, - "loss": 1.0276, + "learning_rate": 1.912384536268753e-05, + "loss": 0.9487, "step": 5654 }, { - "epoch": 0.16047105561861522, + "epoch": 0.16024823599421917, "grad_norm": 0.0, - "learning_rate": 1.9120666097241644e-05, - "loss": 1.019, + "learning_rate": 1.912346964175965e-05, + "loss": 1.0714, "step": 5655 }, { - "epoch": 0.1604994324631101, + "epoch": 0.16027657343648163, "grad_norm": 0.0, - "learning_rate": 1.9120289198046868e-05, - "loss": 1.1377, + "learning_rate": 1.9123093843981403e-05, + "loss": 0.8867, "step": 5656 }, { - "epoch": 0.160527809307605, + "epoch": 0.16030491087874407, "grad_norm": 0.0, - "learning_rate": 1.91199122218126e-05, - "loss": 0.9869, + "learning_rate": 1.9122717969355945e-05, + "loss": 1.0857, "step": 5657 }, { - "epoch": 0.16055618615209988, + "epoch": 0.16033324832100654, "grad_norm": 0.0, - "learning_rate": 1.9119535168542024e-05, - "loss": 0.9179, + "learning_rate": 1.912234201788645e-05, + "loss": 0.9776, "step": 5658 }, { - "epoch": 0.16058456299659477, + "epoch": 0.160361585763269, "grad_norm": 0.0, - "learning_rate": 1.911915803823832e-05, - "loss": 1.0453, + "learning_rate": 1.9121965989576075e-05, + "loss": 1.0491, "step": 5659 }, { - "epoch": 0.16061293984108968, + "epoch": 0.16038992320553147, "grad_norm": 0.0, - "learning_rate": 1.911878083090468e-05, - "loss": 0.8893, + "learning_rate": 1.9121589884427995e-05, + "loss": 1.161, "step": 5660 }, { - "epoch": 0.16064131668558457, + "epoch": 0.16041826064779394, "grad_norm": 0.0, - "learning_rate": 1.9118403546544294e-05, - "loss": 1.1146, + "learning_rate": 1.9121213702445377e-05, + "loss": 1.0666, "step": 5661 }, { - "epoch": 0.16066969353007945, + "epoch": 0.1604465980900564, "grad_norm": 0.0, - "learning_rate": 1.911802618516034e-05, - "loss": 1.1563, + "learning_rate": 1.9120837443631388e-05, + "loss": 1.0264, "step": 5662 }, { - "epoch": 0.16069807037457434, + "epoch": 0.16047493553231884, "grad_norm": 0.0, - "learning_rate": 1.911764874675601e-05, - "loss": 0.9517, + "learning_rate": 1.91204611079892e-05, + "loss": 1.1456, "step": 5663 }, { - "epoch": 0.16072644721906923, + "epoch": 0.1605032729745813, "grad_norm": 0.0, - "learning_rate": 1.9117271231334488e-05, - "loss": 1.066, + "learning_rate": 1.912008469552198e-05, + "loss": 1.0226, "step": 5664 }, { - "epoch": 0.16075482406356414, + "epoch": 0.16053161041684377, "grad_norm": 0.0, - "learning_rate": 1.9116893638898965e-05, - "loss": 0.9962, + "learning_rate": 1.91197082062329e-05, + "loss": 1.0437, "step": 5665 }, { - "epoch": 0.16078320090805903, + "epoch": 0.16055994785910624, "grad_norm": 0.0, - "learning_rate": 1.9116515969452637e-05, - "loss": 1.0047, + "learning_rate": 1.911933164012513e-05, + "loss": 1.0547, "step": 5666 }, { - "epoch": 0.16081157775255392, + "epoch": 0.1605882853013687, "grad_norm": 0.0, - "learning_rate": 1.9116138222998685e-05, - "loss": 1.0599, + "learning_rate": 1.9118954997201845e-05, + "loss": 1.1693, "step": 5667 }, { - "epoch": 0.1608399545970488, + "epoch": 0.16061662274363117, "grad_norm": 0.0, - "learning_rate": 1.9115760399540303e-05, - "loss": 0.9026, + "learning_rate": 1.9118578277466215e-05, + "loss": 1.0314, "step": 5668 }, { - "epoch": 0.1608683314415437, + "epoch": 0.1606449601858936, "grad_norm": 0.0, - "learning_rate": 1.9115382499080687e-05, - "loss": 0.987, + "learning_rate": 1.9118201480921413e-05, + "loss": 1.061, "step": 5669 }, { - "epoch": 0.16089670828603858, + "epoch": 0.16067329762815608, "grad_norm": 0.0, - "learning_rate": 1.911500452162302e-05, - "loss": 0.9966, + "learning_rate": 1.9117824607570615e-05, + "loss": 1.0734, "step": 5670 }, { - "epoch": 0.1609250851305335, + "epoch": 0.16070163507041854, "grad_norm": 0.0, - "learning_rate": 1.9114626467170505e-05, - "loss": 1.0113, + "learning_rate": 1.9117447657416995e-05, + "loss": 1.0465, "step": 5671 }, { - "epoch": 0.16095346197502838, + "epoch": 0.160729972512681, "grad_norm": 0.0, - "learning_rate": 1.911424833572633e-05, - "loss": 1.053, + "learning_rate": 1.911707063046373e-05, + "loss": 1.04, "step": 5672 }, { - "epoch": 0.16098183881952327, + "epoch": 0.16075830995494347, "grad_norm": 0.0, - "learning_rate": 1.9113870127293688e-05, - "loss": 0.9873, + "learning_rate": 1.911669352671399e-05, + "loss": 1.0667, "step": 5673 }, { - "epoch": 0.16101021566401816, + "epoch": 0.16078664739720594, "grad_norm": 0.0, - "learning_rate": 1.911349184187577e-05, - "loss": 0.992, + "learning_rate": 1.9116316346170957e-05, + "loss": 1.0908, "step": 5674 }, { - "epoch": 0.16103859250851305, + "epoch": 0.16081498483946838, "grad_norm": 0.0, - "learning_rate": 1.9113113479475784e-05, - "loss": 0.9713, + "learning_rate": 1.9115939088837806e-05, + "loss": 1.022, "step": 5675 }, { - "epoch": 0.16106696935300793, + "epoch": 0.16084332228173084, "grad_norm": 0.0, - "learning_rate": 1.911273504009692e-05, - "loss": 0.9906, + "learning_rate": 1.9115561754717713e-05, + "loss": 1.0347, "step": 5676 }, { - "epoch": 0.16109534619750285, + "epoch": 0.1608716597239933, "grad_norm": 0.0, - "learning_rate": 1.911235652374237e-05, - "loss": 0.9621, + "learning_rate": 1.911518434381386e-05, + "loss": 1.1365, "step": 5677 }, { - "epoch": 0.16112372304199774, + "epoch": 0.16089999716625578, "grad_norm": 0.0, - "learning_rate": 1.9111977930415334e-05, - "loss": 0.9903, + "learning_rate": 1.9114806856129422e-05, + "loss": 1.0944, "step": 5678 }, { - "epoch": 0.16115209988649262, + "epoch": 0.16092833460851824, "grad_norm": 0.0, - "learning_rate": 1.9111599260119013e-05, - "loss": 0.9901, + "learning_rate": 1.9114429291667583e-05, + "loss": 1.0819, "step": 5679 }, { - "epoch": 0.1611804767309875, + "epoch": 0.1609566720507807, "grad_norm": 0.0, - "learning_rate": 1.9111220512856602e-05, - "loss": 1.0062, + "learning_rate": 1.9114051650431525e-05, + "loss": 0.9855, "step": 5680 }, { - "epoch": 0.1612088535754824, + "epoch": 0.16098500949304315, "grad_norm": 0.0, - "learning_rate": 1.9110841688631303e-05, - "loss": 0.9371, + "learning_rate": 1.911367393242442e-05, + "loss": 1.0491, "step": 5681 }, { - "epoch": 0.1612372304199773, + "epoch": 0.1610133469353056, "grad_norm": 0.0, - "learning_rate": 1.9110462787446313e-05, - "loss": 0.9251, + "learning_rate": 1.911329613764946e-05, + "loss": 0.9655, "step": 5682 }, { - "epoch": 0.1612656072644722, + "epoch": 0.16104168437756808, "grad_norm": 0.0, - "learning_rate": 1.9110083809304835e-05, - "loss": 0.9934, + "learning_rate": 1.9112918266109817e-05, + "loss": 1.1333, "step": 5683 }, { - "epoch": 0.1612939841089671, + "epoch": 0.16107002181983054, "grad_norm": 0.0, - "learning_rate": 1.910970475421007e-05, - "loss": 1.0958, + "learning_rate": 1.9112540317808683e-05, + "loss": 0.9182, "step": 5684 }, { - "epoch": 0.16132236095346197, + "epoch": 0.161098359262093, "grad_norm": 0.0, - "learning_rate": 1.910932562216522e-05, - "loss": 1.0046, + "learning_rate": 1.9112162292749236e-05, + "loss": 1.1277, "step": 5685 }, { - "epoch": 0.16135073779795686, + "epoch": 0.16112669670435548, "grad_norm": 0.0, - "learning_rate": 1.9108946413173485e-05, - "loss": 1.0488, + "learning_rate": 1.9111784190934665e-05, + "loss": 0.9809, "step": 5686 }, { - "epoch": 0.16137911464245175, + "epoch": 0.16115503414661791, "grad_norm": 0.0, - "learning_rate": 1.9108567127238072e-05, - "loss": 0.9313, + "learning_rate": 1.9111406012368153e-05, + "loss": 1.0106, "step": 5687 }, { - "epoch": 0.16140749148694666, + "epoch": 0.16118337158888038, "grad_norm": 0.0, - "learning_rate": 1.9108187764362185e-05, - "loss": 0.9604, + "learning_rate": 1.9111027757052882e-05, + "loss": 0.9635, "step": 5688 }, { - "epoch": 0.16143586833144155, + "epoch": 0.16121170903114285, "grad_norm": 0.0, - "learning_rate": 1.910780832454902e-05, - "loss": 1.0008, + "learning_rate": 1.911064942499204e-05, + "loss": 1.0365, "step": 5689 }, { - "epoch": 0.16146424517593644, + "epoch": 0.1612400464734053, "grad_norm": 0.0, - "learning_rate": 1.9107428807801796e-05, - "loss": 0.9893, + "learning_rate": 1.911027101618882e-05, + "loss": 1.0824, "step": 5690 }, { - "epoch": 0.16149262202043133, + "epoch": 0.16126838391566778, "grad_norm": 0.0, - "learning_rate": 1.9107049214123704e-05, - "loss": 1.0449, + "learning_rate": 1.91098925306464e-05, + "loss": 0.8353, "step": 5691 }, { - "epoch": 0.1615209988649262, + "epoch": 0.16129672135793025, "grad_norm": 0.0, - "learning_rate": 1.9106669543517965e-05, - "loss": 0.9731, + "learning_rate": 1.910951396836797e-05, + "loss": 1.0613, "step": 5692 }, { - "epoch": 0.1615493757094211, + "epoch": 0.16132505880019268, "grad_norm": 0.0, - "learning_rate": 1.9106289795987774e-05, - "loss": 0.9041, + "learning_rate": 1.9109135329356722e-05, + "loss": 1.0703, "step": 5693 }, { - "epoch": 0.16157775255391602, + "epoch": 0.16135339624245515, "grad_norm": 0.0, - "learning_rate": 1.9105909971536346e-05, - "loss": 1.1672, + "learning_rate": 1.9108756613615846e-05, + "loss": 0.9441, "step": 5694 }, { - "epoch": 0.1616061293984109, + "epoch": 0.16138173368471762, "grad_norm": 0.0, - "learning_rate": 1.910553007016689e-05, - "loss": 1.0721, + "learning_rate": 1.9108377821148534e-05, + "loss": 1.0667, "step": 5695 }, { - "epoch": 0.1616345062429058, + "epoch": 0.16141007112698008, "grad_norm": 0.0, - "learning_rate": 1.9105150091882608e-05, - "loss": 0.9245, + "learning_rate": 1.910799895195797e-05, + "loss": 1.0403, "step": 5696 }, { - "epoch": 0.16166288308740068, + "epoch": 0.16143840856924255, "grad_norm": 0.0, - "learning_rate": 1.9104770036686716e-05, - "loss": 0.9045, + "learning_rate": 1.9107620006047346e-05, + "loss": 1.0214, "step": 5697 }, { - "epoch": 0.16169125993189556, + "epoch": 0.161466746011505, "grad_norm": 0.0, - "learning_rate": 1.910438990458242e-05, - "loss": 0.9825, + "learning_rate": 1.910724098341986e-05, + "loss": 1.0137, "step": 5698 }, { - "epoch": 0.16171963677639045, + "epoch": 0.16149508345376745, "grad_norm": 0.0, - "learning_rate": 1.9104009695572933e-05, - "loss": 0.9807, + "learning_rate": 1.9106861884078704e-05, + "loss": 1.1287, "step": 5699 }, { - "epoch": 0.16174801362088537, + "epoch": 0.16152342089602992, "grad_norm": 0.0, - "learning_rate": 1.9103629409661468e-05, - "loss": 1.157, + "learning_rate": 1.9106482708027063e-05, + "loss": 1.1938, "step": 5700 }, { - "epoch": 0.16177639046538025, + "epoch": 0.16155175833829238, "grad_norm": 0.0, - "learning_rate": 1.9103249046851237e-05, - "loss": 1.0272, + "learning_rate": 1.9106103455268142e-05, + "loss": 0.9625, "step": 5701 }, { - "epoch": 0.16180476730987514, + "epoch": 0.16158009578055485, "grad_norm": 0.0, - "learning_rate": 1.910286860714545e-05, - "loss": 1.0145, + "learning_rate": 1.9105724125805126e-05, + "loss": 0.9896, "step": 5702 }, { - "epoch": 0.16183314415437003, + "epoch": 0.16160843322281732, "grad_norm": 0.0, - "learning_rate": 1.9102488090547323e-05, - "loss": 1.0505, + "learning_rate": 1.9105344719641213e-05, + "loss": 1.0095, "step": 5703 }, { - "epoch": 0.16186152099886492, + "epoch": 0.16163677066507978, "grad_norm": 0.0, - "learning_rate": 1.9102107497060072e-05, - "loss": 0.9296, + "learning_rate": 1.9104965236779605e-05, + "loss": 0.9303, "step": 5704 }, { - "epoch": 0.16188989784335983, + "epoch": 0.16166510810734222, "grad_norm": 0.0, - "learning_rate": 1.910172682668691e-05, - "loss": 1.0712, + "learning_rate": 1.910458567722349e-05, + "loss": 0.9945, "step": 5705 }, { - "epoch": 0.16191827468785472, + "epoch": 0.1616934455496047, "grad_norm": 0.0, - "learning_rate": 1.9101346079431052e-05, - "loss": 1.1414, + "learning_rate": 1.910420604097607e-05, + "loss": 0.9974, "step": 5706 }, { - "epoch": 0.1619466515323496, + "epoch": 0.16172178299186715, "grad_norm": 0.0, - "learning_rate": 1.910096525529571e-05, - "loss": 1.0324, + "learning_rate": 1.9103826328040546e-05, + "loss": 1.0854, "step": 5707 }, { - "epoch": 0.1619750283768445, + "epoch": 0.16175012043412962, "grad_norm": 0.0, - "learning_rate": 1.910058435428411e-05, - "loss": 0.9553, + "learning_rate": 1.9103446538420108e-05, + "loss": 1.0719, "step": 5708 }, { - "epoch": 0.16200340522133938, + "epoch": 0.16177845787639208, "grad_norm": 0.0, - "learning_rate": 1.9100203376399465e-05, - "loss": 0.9121, + "learning_rate": 1.9103066672117957e-05, + "loss": 1.0802, "step": 5709 }, { - "epoch": 0.16203178206583427, + "epoch": 0.16180679531865455, "grad_norm": 0.0, - "learning_rate": 1.9099822321644993e-05, - "loss": 0.9136, + "learning_rate": 1.91026867291373e-05, + "loss": 1.0615, "step": 5710 }, { - "epoch": 0.16206015891032918, + "epoch": 0.161835132760917, "grad_norm": 0.0, - "learning_rate": 1.909944119002391e-05, - "loss": 1.0497, + "learning_rate": 1.9102306709481327e-05, + "loss": 1.0723, "step": 5711 }, { - "epoch": 0.16208853575482407, + "epoch": 0.16186347020317945, "grad_norm": 0.0, - "learning_rate": 1.9099059981539437e-05, - "loss": 0.9849, + "learning_rate": 1.910192661315325e-05, + "loss": 1.076, "step": 5712 }, { - "epoch": 0.16211691259931896, + "epoch": 0.16189180764544192, "grad_norm": 0.0, - "learning_rate": 1.90986786961948e-05, - "loss": 1.0691, + "learning_rate": 1.9101546440156262e-05, + "loss": 0.9098, "step": 5713 }, { - "epoch": 0.16214528944381384, + "epoch": 0.1619201450877044, "grad_norm": 0.0, - "learning_rate": 1.9098297333993214e-05, - "loss": 0.9991, + "learning_rate": 1.9101166190493573e-05, + "loss": 1.0233, "step": 5714 }, { - "epoch": 0.16217366628830873, + "epoch": 0.16194848252996685, "grad_norm": 0.0, - "learning_rate": 1.9097915894937902e-05, - "loss": 1.1378, + "learning_rate": 1.9100785864168377e-05, + "loss": 1.0782, "step": 5715 }, { - "epoch": 0.16220204313280362, + "epoch": 0.16197681997222932, "grad_norm": 0.0, - "learning_rate": 1.9097534379032083e-05, - "loss": 0.9415, + "learning_rate": 1.910040546118388e-05, + "loss": 1.1053, "step": 5716 }, { - "epoch": 0.16223041997729853, + "epoch": 0.16200515741449176, "grad_norm": 0.0, - "learning_rate": 1.9097152786278983e-05, - "loss": 0.9821, + "learning_rate": 1.9100024981543296e-05, + "loss": 1.0745, "step": 5717 }, { - "epoch": 0.16225879682179342, + "epoch": 0.16203349485675422, "grad_norm": 0.0, - "learning_rate": 1.9096771116681825e-05, - "loss": 1.0465, + "learning_rate": 1.9099644425249818e-05, + "loss": 1.0763, "step": 5718 }, { - "epoch": 0.1622871736662883, + "epoch": 0.1620618322990167, "grad_norm": 0.0, - "learning_rate": 1.9096389370243833e-05, - "loss": 0.9514, + "learning_rate": 1.9099263792306654e-05, + "loss": 1.0172, "step": 5719 }, { - "epoch": 0.1623155505107832, + "epoch": 0.16209016974127916, "grad_norm": 0.0, - "learning_rate": 1.909600754696823e-05, - "loss": 1.0088, + "learning_rate": 1.9098883082717014e-05, + "loss": 0.9768, "step": 5720 }, { - "epoch": 0.16234392735527808, + "epoch": 0.16211850718354162, "grad_norm": 0.0, - "learning_rate": 1.909562564685824e-05, - "loss": 0.9846, + "learning_rate": 1.9098502296484102e-05, + "loss": 1.1107, "step": 5721 }, { - "epoch": 0.162372304199773, + "epoch": 0.1621468446258041, "grad_norm": 0.0, - "learning_rate": 1.9095243669917095e-05, - "loss": 0.9573, + "learning_rate": 1.909812143361113e-05, + "loss": 1.0774, "step": 5722 }, { - "epoch": 0.16240068104426789, + "epoch": 0.16217518206806653, "grad_norm": 0.0, - "learning_rate": 1.9094861616148018e-05, - "loss": 1.2346, + "learning_rate": 1.90977404941013e-05, + "loss": 0.9197, "step": 5723 }, { - "epoch": 0.16242905788876277, + "epoch": 0.162203519510329, "grad_norm": 0.0, - "learning_rate": 1.9094479485554235e-05, - "loss": 0.9609, + "learning_rate": 1.9097359477957825e-05, + "loss": 0.9169, "step": 5724 }, { - "epoch": 0.16245743473325766, + "epoch": 0.16223185695259146, "grad_norm": 0.0, - "learning_rate": 1.9094097278138975e-05, - "loss": 0.8745, + "learning_rate": 1.909697838518391e-05, + "loss": 0.9547, "step": 5725 }, { - "epoch": 0.16248581157775255, + "epoch": 0.16226019439485392, "grad_norm": 0.0, - "learning_rate": 1.9093714993905467e-05, - "loss": 0.9808, + "learning_rate": 1.909659721578277e-05, + "loss": 0.9746, "step": 5726 }, { - "epoch": 0.16251418842224744, + "epoch": 0.1622885318371164, "grad_norm": 0.0, - "learning_rate": 1.9093332632856936e-05, - "loss": 0.9641, + "learning_rate": 1.9096215969757616e-05, + "loss": 1.0766, "step": 5727 }, { - "epoch": 0.16254256526674235, + "epoch": 0.16231686927937886, "grad_norm": 0.0, - "learning_rate": 1.909295019499662e-05, - "loss": 0.9659, + "learning_rate": 1.9095834647111654e-05, + "loss": 1.0219, "step": 5728 }, { - "epoch": 0.16257094211123724, + "epoch": 0.1623452067216413, "grad_norm": 0.0, - "learning_rate": 1.909256768032774e-05, - "loss": 1.0023, + "learning_rate": 1.90954532478481e-05, + "loss": 0.9398, "step": 5729 }, { - "epoch": 0.16259931895573213, + "epoch": 0.16237354416390376, "grad_norm": 0.0, - "learning_rate": 1.9092185088853535e-05, - "loss": 1.0073, + "learning_rate": 1.9095071771970165e-05, + "loss": 1.0554, "step": 5730 }, { - "epoch": 0.162627695800227, + "epoch": 0.16240188160616623, "grad_norm": 0.0, - "learning_rate": 1.9091802420577237e-05, - "loss": 0.9997, + "learning_rate": 1.909469021948106e-05, + "loss": 0.9999, "step": 5731 }, { - "epoch": 0.1626560726447219, + "epoch": 0.1624302190484287, "grad_norm": 0.0, - "learning_rate": 1.909141967550207e-05, - "loss": 0.8861, + "learning_rate": 1.9094308590384007e-05, + "loss": 1.0199, "step": 5732 }, { - "epoch": 0.1626844494892168, + "epoch": 0.16245855649069116, "grad_norm": 0.0, - "learning_rate": 1.909103685363128e-05, - "loss": 1.0823, + "learning_rate": 1.9093926884682215e-05, + "loss": 1.0564, "step": 5733 }, { - "epoch": 0.1627128263337117, + "epoch": 0.16248689393295362, "grad_norm": 0.0, - "learning_rate": 1.9090653954968084e-05, - "loss": 1.1039, + "learning_rate": 1.90935451023789e-05, + "loss": 1.1443, "step": 5734 }, { - "epoch": 0.1627412031782066, + "epoch": 0.16251523137521606, "grad_norm": 0.0, - "learning_rate": 1.9090270979515728e-05, - "loss": 1.0264, + "learning_rate": 1.9093163243477274e-05, + "loss": 0.9503, "step": 5735 }, { - "epoch": 0.16276958002270148, + "epoch": 0.16254356881747853, "grad_norm": 0.0, - "learning_rate": 1.9089887927277446e-05, - "loss": 1.0197, + "learning_rate": 1.9092781307980562e-05, + "loss": 1.1012, "step": 5736 }, { - "epoch": 0.16279795686719636, + "epoch": 0.162571906259741, "grad_norm": 0.0, - "learning_rate": 1.9089504798256472e-05, - "loss": 0.8589, + "learning_rate": 1.9092399295891974e-05, + "loss": 0.9695, "step": 5737 }, { - "epoch": 0.16282633371169125, + "epoch": 0.16260024370200346, "grad_norm": 0.0, - "learning_rate": 1.9089121592456044e-05, - "loss": 0.9785, + "learning_rate": 1.909201720721473e-05, + "loss": 1.0096, "step": 5738 }, { - "epoch": 0.16285471055618614, + "epoch": 0.16262858114426593, "grad_norm": 0.0, - "learning_rate": 1.9088738309879394e-05, - "loss": 0.8833, + "learning_rate": 1.9091635041952052e-05, + "loss": 1.0304, "step": 5739 }, { - "epoch": 0.16288308740068105, + "epoch": 0.1626569185865284, "grad_norm": 0.0, - "learning_rate": 1.9088354950529764e-05, - "loss": 1.0081, + "learning_rate": 1.9091252800107153e-05, + "loss": 1.1286, "step": 5740 }, { - "epoch": 0.16291146424517594, + "epoch": 0.16268525602879083, "grad_norm": 0.0, - "learning_rate": 1.908797151441039e-05, - "loss": 1.073, + "learning_rate": 1.9090870481683258e-05, + "loss": 1.0615, "step": 5741 }, { - "epoch": 0.16293984108967083, + "epoch": 0.1627135934710533, "grad_norm": 0.0, - "learning_rate": 1.9087588001524514e-05, - "loss": 0.9679, + "learning_rate": 1.9090488086683587e-05, + "loss": 1.0232, "step": 5742 }, { - "epoch": 0.16296821793416572, + "epoch": 0.16274193091331576, "grad_norm": 0.0, - "learning_rate": 1.9087204411875375e-05, - "loss": 0.9732, + "learning_rate": 1.9090105615111354e-05, + "loss": 0.9504, "step": 5743 }, { - "epoch": 0.1629965947786606, + "epoch": 0.16277026835557823, "grad_norm": 0.0, - "learning_rate": 1.908682074546621e-05, - "loss": 1.0574, + "learning_rate": 1.9089723066969787e-05, + "loss": 1.0145, "step": 5744 }, { - "epoch": 0.16302497162315552, + "epoch": 0.1627986057978407, "grad_norm": 0.0, - "learning_rate": 1.9086437002300262e-05, - "loss": 0.9574, + "learning_rate": 1.908934044226211e-05, + "loss": 1.0727, "step": 5745 }, { - "epoch": 0.1630533484676504, + "epoch": 0.16282694324010316, "grad_norm": 0.0, - "learning_rate": 1.908605318238077e-05, - "loss": 0.945, + "learning_rate": 1.908895774099154e-05, + "loss": 1.0461, "step": 5746 }, { - "epoch": 0.1630817253121453, + "epoch": 0.1628552806823656, "grad_norm": 0.0, - "learning_rate": 1.908566928571098e-05, - "loss": 1.0203, + "learning_rate": 1.9088574963161304e-05, + "loss": 1.03, "step": 5747 }, { - "epoch": 0.16311010215664018, + "epoch": 0.16288361812462807, "grad_norm": 0.0, - "learning_rate": 1.9085285312294137e-05, - "loss": 1.1006, + "learning_rate": 1.9088192108774625e-05, + "loss": 1.129, "step": 5748 }, { - "epoch": 0.16313847900113507, + "epoch": 0.16291195556689053, "grad_norm": 0.0, - "learning_rate": 1.9084901262133475e-05, - "loss": 0.9881, + "learning_rate": 1.908780917783473e-05, + "loss": 1.0276, "step": 5749 }, { - "epoch": 0.16316685584562995, + "epoch": 0.162940293009153, "grad_norm": 0.0, - "learning_rate": 1.9084517135232245e-05, - "loss": 1.0513, + "learning_rate": 1.908742617034485e-05, + "loss": 1.0428, "step": 5750 }, { - "epoch": 0.16319523269012487, + "epoch": 0.16296863045141546, "grad_norm": 0.0, - "learning_rate": 1.908413293159369e-05, - "loss": 1.0079, + "learning_rate": 1.9087043086308198e-05, + "loss": 0.9616, "step": 5751 }, { - "epoch": 0.16322360953461976, + "epoch": 0.16299696789367793, "grad_norm": 0.0, - "learning_rate": 1.9083748651221057e-05, - "loss": 1.0126, + "learning_rate": 1.908665992572801e-05, + "loss": 1.0302, "step": 5752 }, { - "epoch": 0.16325198637911464, + "epoch": 0.16302530533594037, "grad_norm": 0.0, - "learning_rate": 1.908336429411759e-05, - "loss": 1.0098, + "learning_rate": 1.908627668860751e-05, + "loss": 1.0502, "step": 5753 }, { - "epoch": 0.16328036322360953, + "epoch": 0.16305364277820283, "grad_norm": 0.0, - "learning_rate": 1.9082979860286535e-05, - "loss": 1.0681, + "learning_rate": 1.9085893374949926e-05, + "loss": 1.0624, "step": 5754 }, { - "epoch": 0.16330874006810442, + "epoch": 0.1630819802204653, "grad_norm": 0.0, - "learning_rate": 1.9082595349731144e-05, - "loss": 0.9638, + "learning_rate": 1.9085509984758492e-05, + "loss": 1.0424, "step": 5755 }, { - "epoch": 0.1633371169125993, + "epoch": 0.16311031766272777, "grad_norm": 0.0, - "learning_rate": 1.908221076245466e-05, - "loss": 1.0506, + "learning_rate": 1.9085126518036432e-05, + "loss": 1.1076, "step": 5756 }, { - "epoch": 0.16336549375709422, + "epoch": 0.16313865510499023, "grad_norm": 0.0, - "learning_rate": 1.9081826098460335e-05, - "loss": 1.0455, + "learning_rate": 1.9084742974786978e-05, + "loss": 1.0501, "step": 5757 }, { - "epoch": 0.1633938706015891, + "epoch": 0.1631669925472527, "grad_norm": 0.0, - "learning_rate": 1.9081441357751415e-05, - "loss": 0.9822, + "learning_rate": 1.908435935501336e-05, + "loss": 0.9244, "step": 5758 }, { - "epoch": 0.163422247446084, + "epoch": 0.16319532998951514, "grad_norm": 0.0, - "learning_rate": 1.9081056540331152e-05, - "loss": 0.9767, + "learning_rate": 1.9083975658718808e-05, + "loss": 0.9862, "step": 5759 }, { - "epoch": 0.16345062429057888, + "epoch": 0.1632236674317776, "grad_norm": 0.0, - "learning_rate": 1.9080671646202797e-05, - "loss": 1.0299, + "learning_rate": 1.9083591885906555e-05, + "loss": 1.1595, "step": 5760 }, { - "epoch": 0.16347900113507377, + "epoch": 0.16325200487404007, "grad_norm": 0.0, - "learning_rate": 1.90802866753696e-05, - "loss": 1.0127, + "learning_rate": 1.908320803657984e-05, + "loss": 1.0224, "step": 5761 }, { - "epoch": 0.16350737797956869, + "epoch": 0.16328034231630253, "grad_norm": 0.0, - "learning_rate": 1.9079901627834813e-05, - "loss": 0.9851, + "learning_rate": 1.9082824110741886e-05, + "loss": 1.045, "step": 5762 }, { - "epoch": 0.16353575482406357, + "epoch": 0.163308679758565, "grad_norm": 0.0, - "learning_rate": 1.9079516503601687e-05, - "loss": 1.0888, + "learning_rate": 1.9082440108395933e-05, + "loss": 1.0613, "step": 5763 }, { - "epoch": 0.16356413166855846, + "epoch": 0.16333701720082747, "grad_norm": 0.0, - "learning_rate": 1.9079131302673478e-05, - "loss": 0.8767, + "learning_rate": 1.9082056029545215e-05, + "loss": 1.1109, "step": 5764 }, { - "epoch": 0.16359250851305335, + "epoch": 0.1633653546430899, "grad_norm": 0.0, - "learning_rate": 1.9078746025053442e-05, - "loss": 1.0595, + "learning_rate": 1.9081671874192965e-05, + "loss": 1.0507, "step": 5765 }, { - "epoch": 0.16362088535754823, + "epoch": 0.16339369208535237, "grad_norm": 0.0, - "learning_rate": 1.9078360670744826e-05, - "loss": 1.0094, + "learning_rate": 1.9081287642342422e-05, + "loss": 1.0123, "step": 5766 }, { - "epoch": 0.16364926220204312, + "epoch": 0.16342202952761484, "grad_norm": 0.0, - "learning_rate": 1.907797523975089e-05, - "loss": 0.8962, + "learning_rate": 1.908090333399682e-05, + "loss": 1.042, "step": 5767 }, { - "epoch": 0.16367763904653804, + "epoch": 0.1634503669698773, "grad_norm": 0.0, - "learning_rate": 1.907758973207489e-05, - "loss": 1.0996, + "learning_rate": 1.9080518949159395e-05, + "loss": 0.9236, "step": 5768 }, { - "epoch": 0.16370601589103292, + "epoch": 0.16347870441213977, "grad_norm": 0.0, - "learning_rate": 1.9077204147720085e-05, - "loss": 1.1318, + "learning_rate": 1.9080134487833393e-05, + "loss": 0.9734, "step": 5769 }, { - "epoch": 0.1637343927355278, + "epoch": 0.16350704185440224, "grad_norm": 0.0, - "learning_rate": 1.9076818486689725e-05, - "loss": 0.9662, + "learning_rate": 1.907974995002204e-05, + "loss": 0.9211, "step": 5770 }, { - "epoch": 0.1637627695800227, + "epoch": 0.16353537929666467, "grad_norm": 0.0, - "learning_rate": 1.907643274898707e-05, - "loss": 1.0413, + "learning_rate": 1.9079365335728586e-05, + "loss": 1.0338, "step": 5771 }, { - "epoch": 0.1637911464245176, + "epoch": 0.16356371673892714, "grad_norm": 0.0, - "learning_rate": 1.9076046934615382e-05, - "loss": 1.0273, + "learning_rate": 1.9078980644956263e-05, + "loss": 1.0625, "step": 5772 }, { - "epoch": 0.16381952326901247, + "epoch": 0.1635920541811896, "grad_norm": 0.0, - "learning_rate": 1.9075661043577915e-05, - "loss": 1.0425, + "learning_rate": 1.907859587770832e-05, + "loss": 1.0701, "step": 5773 }, { - "epoch": 0.1638479001135074, + "epoch": 0.16362039162345207, "grad_norm": 0.0, - "learning_rate": 1.907527507587793e-05, - "loss": 1.0277, + "learning_rate": 1.9078211033987986e-05, + "loss": 1.0724, "step": 5774 }, { - "epoch": 0.16387627695800228, + "epoch": 0.16364872906571454, "grad_norm": 0.0, - "learning_rate": 1.907488903151869e-05, - "loss": 1.0171, + "learning_rate": 1.907782611379851e-05, + "loss": 1.0789, "step": 5775 }, { - "epoch": 0.16390465380249716, + "epoch": 0.163677066507977, "grad_norm": 0.0, - "learning_rate": 1.907450291050346e-05, - "loss": 1.0803, + "learning_rate": 1.907744111714314e-05, + "loss": 1.0683, "step": 5776 }, { - "epoch": 0.16393303064699205, + "epoch": 0.16370540395023944, "grad_norm": 0.0, - "learning_rate": 1.907411671283549e-05, - "loss": 1.0982, + "learning_rate": 1.9077056044025107e-05, + "loss": 1.0527, "step": 5777 }, { - "epoch": 0.16396140749148694, + "epoch": 0.1637337413925019, "grad_norm": 0.0, - "learning_rate": 1.907373043851805e-05, - "loss": 1.0479, + "learning_rate": 1.907667089444766e-05, + "loss": 1.1691, "step": 5778 }, { - "epoch": 0.16398978433598183, + "epoch": 0.16376207883476437, "grad_norm": 0.0, - "learning_rate": 1.90733440875544e-05, - "loss": 0.9821, + "learning_rate": 1.9076285668414046e-05, + "loss": 1.0699, "step": 5779 }, { - "epoch": 0.16401816118047674, + "epoch": 0.16379041627702684, "grad_norm": 0.0, - "learning_rate": 1.9072957659947805e-05, - "loss": 0.9402, + "learning_rate": 1.9075900365927508e-05, + "loss": 1.0275, "step": 5780 }, { - "epoch": 0.16404653802497163, + "epoch": 0.1638187537192893, "grad_norm": 0.0, - "learning_rate": 1.907257115570153e-05, - "loss": 0.9699, + "learning_rate": 1.907551498699129e-05, + "loss": 0.9212, "step": 5781 }, { - "epoch": 0.16407491486946651, + "epoch": 0.16384709116155174, "grad_norm": 0.0, - "learning_rate": 1.9072184574818838e-05, - "loss": 0.9464, + "learning_rate": 1.9075129531608637e-05, + "loss": 0.9709, "step": 5782 }, { - "epoch": 0.1641032917139614, + "epoch": 0.1638754286038142, "grad_norm": 0.0, - "learning_rate": 1.9071797917302995e-05, - "loss": 0.987, + "learning_rate": 1.90747439997828e-05, + "loss": 0.9633, "step": 5783 }, { - "epoch": 0.1641316685584563, + "epoch": 0.16390376604607668, "grad_norm": 0.0, - "learning_rate": 1.907141118315727e-05, - "loss": 0.9969, + "learning_rate": 1.9074358391517026e-05, + "loss": 1.0322, "step": 5784 }, { - "epoch": 0.1641600454029512, + "epoch": 0.16393210348833914, "grad_norm": 0.0, - "learning_rate": 1.9071024372384923e-05, - "loss": 1.0508, + "learning_rate": 1.9073972706814555e-05, + "loss": 0.8944, "step": 5785 }, { - "epoch": 0.1641884222474461, + "epoch": 0.1639604409306016, "grad_norm": 0.0, - "learning_rate": 1.9070637484989224e-05, - "loss": 1.0612, + "learning_rate": 1.907358694567865e-05, + "loss": 0.8769, "step": 5786 }, { - "epoch": 0.16421679909194098, + "epoch": 0.16398877837286407, "grad_norm": 0.0, - "learning_rate": 1.9070250520973444e-05, - "loss": 1.0124, + "learning_rate": 1.907320110811255e-05, + "loss": 1.041, "step": 5787 }, { - "epoch": 0.16424517593643587, + "epoch": 0.1640171158151265, "grad_norm": 0.0, - "learning_rate": 1.9069863480340852e-05, - "loss": 1.0582, + "learning_rate": 1.9072815194119507e-05, + "loss": 1.1707, "step": 5788 }, { - "epoch": 0.16427355278093075, + "epoch": 0.16404545325738898, "grad_norm": 0.0, - "learning_rate": 1.9069476363094715e-05, - "loss": 0.9652, + "learning_rate": 1.9072429203702774e-05, + "loss": 1.0159, "step": 5789 }, { - "epoch": 0.16430192962542564, + "epoch": 0.16407379069965145, "grad_norm": 0.0, - "learning_rate": 1.90690891692383e-05, - "loss": 1.0672, + "learning_rate": 1.90720431368656e-05, + "loss": 0.9714, "step": 5790 }, { - "epoch": 0.16433030646992056, + "epoch": 0.1641021281419139, "grad_norm": 0.0, - "learning_rate": 1.9068701898774885e-05, - "loss": 0.998, + "learning_rate": 1.907165699361124e-05, + "loss": 1.0685, "step": 5791 }, { - "epoch": 0.16435868331441544, + "epoch": 0.16413046558417638, "grad_norm": 0.0, - "learning_rate": 1.9068314551707738e-05, - "loss": 1.0033, + "learning_rate": 1.907127077394294e-05, + "loss": 1.0793, "step": 5792 }, { - "epoch": 0.16438706015891033, + "epoch": 0.16415880302643884, "grad_norm": 0.0, - "learning_rate": 1.9067927128040127e-05, - "loss": 1.0796, + "learning_rate": 1.907088447786396e-05, + "loss": 0.9068, "step": 5793 }, { - "epoch": 0.16441543700340522, + "epoch": 0.16418714046870128, "grad_norm": 0.0, - "learning_rate": 1.906753962777533e-05, - "loss": 1.0453, + "learning_rate": 1.907049810537755e-05, + "loss": 0.9588, "step": 5794 }, { - "epoch": 0.1644438138479001, + "epoch": 0.16421547791096375, "grad_norm": 0.0, - "learning_rate": 1.906715205091662e-05, - "loss": 0.9471, + "learning_rate": 1.907011165648697e-05, + "loss": 1.0206, "step": 5795 }, { - "epoch": 0.164472190692395, + "epoch": 0.1642438153532262, "grad_norm": 0.0, - "learning_rate": 1.9066764397467266e-05, - "loss": 1.1074, + "learning_rate": 1.906972513119547e-05, + "loss": 1.046, "step": 5796 }, { - "epoch": 0.1645005675368899, + "epoch": 0.16427215279548868, "grad_norm": 0.0, - "learning_rate": 1.9066376667430546e-05, - "loss": 0.9034, + "learning_rate": 1.9069338529506307e-05, + "loss": 0.9155, "step": 5797 }, { - "epoch": 0.1645289443813848, + "epoch": 0.16430049023775115, "grad_norm": 0.0, - "learning_rate": 1.9065988860809735e-05, - "loss": 1.0042, + "learning_rate": 1.9068951851422735e-05, + "loss": 0.9237, "step": 5798 }, { - "epoch": 0.16455732122587968, + "epoch": 0.1643288276800136, "grad_norm": 0.0, - "learning_rate": 1.9065600977608107e-05, - "loss": 1.0441, + "learning_rate": 1.9068565096948017e-05, + "loss": 1.0386, "step": 5799 }, { - "epoch": 0.16458569807037457, + "epoch": 0.16435716512227605, "grad_norm": 0.0, - "learning_rate": 1.906521301782894e-05, - "loss": 1.1101, + "learning_rate": 1.9068178266085407e-05, + "loss": 0.9597, "step": 5800 }, { - "epoch": 0.16461407491486946, + "epoch": 0.16438550256453852, "grad_norm": 0.0, - "learning_rate": 1.9064824981475514e-05, - "loss": 1.0155, + "learning_rate": 1.9067791358838165e-05, + "loss": 0.9852, "step": 5801 }, { - "epoch": 0.16464245175936437, + "epoch": 0.16441384000680098, "grad_norm": 0.0, - "learning_rate": 1.9064436868551103e-05, - "loss": 1.0239, + "learning_rate": 1.906740437520955e-05, + "loss": 0.9818, "step": 5802 }, { - "epoch": 0.16467082860385926, + "epoch": 0.16444217744906345, "grad_norm": 0.0, - "learning_rate": 1.9064048679058986e-05, - "loss": 1.1002, + "learning_rate": 1.9067017315202815e-05, + "loss": 1.15, "step": 5803 }, { - "epoch": 0.16469920544835415, + "epoch": 0.16447051489132591, "grad_norm": 0.0, - "learning_rate": 1.9063660413002442e-05, - "loss": 1.0409, + "learning_rate": 1.9066630178821232e-05, + "loss": 0.9743, "step": 5804 }, { - "epoch": 0.16472758229284903, + "epoch": 0.16449885233358838, "grad_norm": 0.0, - "learning_rate": 1.906327207038475e-05, - "loss": 0.9577, + "learning_rate": 1.9066242966068055e-05, + "loss": 0.9338, "step": 5805 }, { - "epoch": 0.16475595913734392, + "epoch": 0.16452718977585082, "grad_norm": 0.0, - "learning_rate": 1.9062883651209193e-05, - "loss": 0.9881, + "learning_rate": 1.9065855676946546e-05, + "loss": 1.0026, "step": 5806 }, { - "epoch": 0.1647843359818388, + "epoch": 0.16455552721811328, "grad_norm": 0.0, - "learning_rate": 1.9062495155479053e-05, - "loss": 1.0004, + "learning_rate": 1.9065468311459966e-05, + "loss": 1.0236, "step": 5807 }, { - "epoch": 0.16481271282633372, + "epoch": 0.16458386466037575, "grad_norm": 0.0, - "learning_rate": 1.9062106583197605e-05, - "loss": 1.0702, + "learning_rate": 1.9065080869611586e-05, + "loss": 0.9831, "step": 5808 }, { - "epoch": 0.1648410896708286, + "epoch": 0.16461220210263822, "grad_norm": 0.0, - "learning_rate": 1.9061717934368137e-05, - "loss": 0.926, + "learning_rate": 1.9064693351404657e-05, + "loss": 1.0572, "step": 5809 }, { - "epoch": 0.1648694665153235, + "epoch": 0.16464053954490068, "grad_norm": 0.0, - "learning_rate": 1.9061329208993927e-05, - "loss": 0.9864, + "learning_rate": 1.9064305756842455e-05, + "loss": 0.9714, "step": 5810 }, { - "epoch": 0.16489784335981839, + "epoch": 0.16466887698716315, "grad_norm": 0.0, - "learning_rate": 1.9060940407078266e-05, - "loss": 1.0761, + "learning_rate": 1.9063918085928238e-05, + "loss": 0.9727, "step": 5811 }, { - "epoch": 0.16492622020431327, + "epoch": 0.1646972144294256, "grad_norm": 0.0, - "learning_rate": 1.906055152862443e-05, - "loss": 1.1371, + "learning_rate": 1.9063530338665273e-05, + "loss": 1.0609, "step": 5812 }, { - "epoch": 0.16495459704880816, + "epoch": 0.16472555187168805, "grad_norm": 0.0, - "learning_rate": 1.9060162573635713e-05, - "loss": 1.0498, + "learning_rate": 1.9063142515056824e-05, + "loss": 0.9949, "step": 5813 }, { - "epoch": 0.16498297389330308, + "epoch": 0.16475388931395052, "grad_norm": 0.0, - "learning_rate": 1.9059773542115394e-05, - "loss": 1.0021, + "learning_rate": 1.9062754615106162e-05, + "loss": 1.093, "step": 5814 }, { - "epoch": 0.16501135073779796, + "epoch": 0.16478222675621298, "grad_norm": 0.0, - "learning_rate": 1.905938443406676e-05, - "loss": 1.0931, + "learning_rate": 1.906236663881655e-05, + "loss": 1.0214, "step": 5815 }, { - "epoch": 0.16503972758229285, + "epoch": 0.16481056419847545, "grad_norm": 0.0, - "learning_rate": 1.90589952494931e-05, - "loss": 1.0201, + "learning_rate": 1.9061978586191263e-05, + "loss": 0.9495, "step": 5816 }, { - "epoch": 0.16506810442678774, + "epoch": 0.16483890164073792, "grad_norm": 0.0, - "learning_rate": 1.9058605988397692e-05, - "loss": 1.0536, + "learning_rate": 1.9061590457233562e-05, + "loss": 0.9486, "step": 5817 }, { - "epoch": 0.16509648127128262, + "epoch": 0.16486723908300036, "grad_norm": 0.0, - "learning_rate": 1.905821665078384e-05, - "loss": 0.9903, + "learning_rate": 1.9061202251946723e-05, + "loss": 0.9595, "step": 5818 }, { - "epoch": 0.1651248581157775, + "epoch": 0.16489557652526282, "grad_norm": 0.0, - "learning_rate": 1.905782723665482e-05, - "loss": 0.997, + "learning_rate": 1.906081397033401e-05, + "loss": 1.0942, "step": 5819 }, { - "epoch": 0.16515323496027243, + "epoch": 0.1649239139675253, "grad_norm": 0.0, - "learning_rate": 1.905743774601393e-05, - "loss": 0.9667, + "learning_rate": 1.9060425612398695e-05, + "loss": 0.9945, "step": 5820 }, { - "epoch": 0.16518161180476731, + "epoch": 0.16495225140978775, "grad_norm": 0.0, - "learning_rate": 1.9057048178864456e-05, - "loss": 1.128, + "learning_rate": 1.9060037178144054e-05, + "loss": 0.9986, "step": 5821 }, { - "epoch": 0.1652099886492622, + "epoch": 0.16498058885205022, "grad_norm": 0.0, - "learning_rate": 1.905665853520969e-05, - "loss": 1.1763, + "learning_rate": 1.9059648667573355e-05, + "loss": 0.9861, "step": 5822 }, { - "epoch": 0.1652383654937571, + "epoch": 0.16500892629431269, "grad_norm": 0.0, - "learning_rate": 1.905626881505292e-05, - "loss": 1.1056, + "learning_rate": 1.905926008068987e-05, + "loss": 1.0504, "step": 5823 }, { - "epoch": 0.16526674233825198, + "epoch": 0.16503726373657512, "grad_norm": 0.0, - "learning_rate": 1.9055879018397438e-05, - "loss": 0.9867, + "learning_rate": 1.905887141749687e-05, + "loss": 1.0435, "step": 5824 }, { - "epoch": 0.1652951191827469, + "epoch": 0.1650656011788376, "grad_norm": 0.0, - "learning_rate": 1.905548914524654e-05, - "loss": 1.0419, + "learning_rate": 1.9058482677997637e-05, + "loss": 1.0716, "step": 5825 }, { - "epoch": 0.16532349602724178, + "epoch": 0.16509393862110006, "grad_norm": 0.0, - "learning_rate": 1.905509919560352e-05, - "loss": 0.9425, + "learning_rate": 1.905809386219544e-05, + "loss": 1.0217, "step": 5826 }, { - "epoch": 0.16535187287173667, + "epoch": 0.16512227606336252, "grad_norm": 0.0, - "learning_rate": 1.905470916947167e-05, - "loss": 1.0196, + "learning_rate": 1.9057704970093556e-05, + "loss": 1.0393, "step": 5827 }, { - "epoch": 0.16538024971623155, + "epoch": 0.165150613505625, "grad_norm": 0.0, - "learning_rate": 1.9054319066854285e-05, - "loss": 1.1003, + "learning_rate": 1.9057316001695257e-05, + "loss": 0.9022, "step": 5828 }, { - "epoch": 0.16540862656072644, + "epoch": 0.16517895094788745, "grad_norm": 0.0, - "learning_rate": 1.9053928887754656e-05, - "loss": 0.9435, + "learning_rate": 1.905692695700382e-05, + "loss": 1.0486, "step": 5829 }, { - "epoch": 0.16543700340522133, + "epoch": 0.1652072883901499, "grad_norm": 0.0, - "learning_rate": 1.9053538632176088e-05, - "loss": 1.0128, + "learning_rate": 1.9056537836022526e-05, + "loss": 1.0616, "step": 5830 }, { - "epoch": 0.16546538024971624, + "epoch": 0.16523562583241236, "grad_norm": 0.0, - "learning_rate": 1.905314830012187e-05, - "loss": 0.9286, + "learning_rate": 1.9056148638754654e-05, + "loss": 1.0565, "step": 5831 }, { - "epoch": 0.16549375709421113, + "epoch": 0.16526396327467482, "grad_norm": 0.0, - "learning_rate": 1.90527578915953e-05, - "loss": 1.0312, + "learning_rate": 1.9055759365203476e-05, + "loss": 1.0018, "step": 5832 }, { - "epoch": 0.16552213393870602, + "epoch": 0.1652923007169373, "grad_norm": 0.0, - "learning_rate": 1.9052367406599678e-05, - "loss": 1.1654, + "learning_rate": 1.905537001537227e-05, + "loss": 0.9854, "step": 5833 }, { - "epoch": 0.1655505107832009, + "epoch": 0.16532063815919976, "grad_norm": 0.0, - "learning_rate": 1.90519768451383e-05, - "loss": 1.0048, + "learning_rate": 1.9054980589264326e-05, + "loss": 0.988, "step": 5834 }, { - "epoch": 0.1655788876276958, + "epoch": 0.16534897560146222, "grad_norm": 0.0, - "learning_rate": 1.9051586207214468e-05, - "loss": 1.0502, + "learning_rate": 1.9054591086882918e-05, + "loss": 1.0427, "step": 5835 }, { - "epoch": 0.16560726447219068, + "epoch": 0.16537731304372466, "grad_norm": 0.0, - "learning_rate": 1.9051195492831485e-05, - "loss": 0.9796, + "learning_rate": 1.9054201508231323e-05, + "loss": 0.9106, "step": 5836 }, { - "epoch": 0.1656356413166856, + "epoch": 0.16540565048598713, "grad_norm": 0.0, - "learning_rate": 1.905080470199264e-05, - "loss": 0.9851, + "learning_rate": 1.905381185331283e-05, + "loss": 1.1528, "step": 5837 }, { - "epoch": 0.16566401816118048, + "epoch": 0.1654339879282496, "grad_norm": 0.0, - "learning_rate": 1.9050413834701244e-05, - "loss": 0.9369, + "learning_rate": 1.9053422122130712e-05, + "loss": 0.9674, "step": 5838 }, { - "epoch": 0.16569239500567537, + "epoch": 0.16546232537051206, "grad_norm": 0.0, - "learning_rate": 1.9050022890960597e-05, - "loss": 1.103, + "learning_rate": 1.9053032314688264e-05, + "loss": 1.1941, "step": 5839 }, { - "epoch": 0.16572077185017026, + "epoch": 0.16549066281277452, "grad_norm": 0.0, - "learning_rate": 1.9049631870773996e-05, - "loss": 1.0114, + "learning_rate": 1.905264243098876e-05, + "loss": 0.9551, "step": 5840 }, { - "epoch": 0.16574914869466514, + "epoch": 0.165519000255037, "grad_norm": 0.0, - "learning_rate": 1.904924077414475e-05, - "loss": 1.0319, + "learning_rate": 1.9052252471035492e-05, + "loss": 1.0362, "step": 5841 }, { - "epoch": 0.16577752553916006, + "epoch": 0.16554733769729943, "grad_norm": 0.0, - "learning_rate": 1.904884960107616e-05, - "loss": 0.9857, + "learning_rate": 1.9051862434831735e-05, + "loss": 0.9627, "step": 5842 }, { - "epoch": 0.16580590238365495, + "epoch": 0.1655756751395619, "grad_norm": 0.0, - "learning_rate": 1.904845835157153e-05, - "loss": 1.0626, + "learning_rate": 1.9051472322380776e-05, + "loss": 0.9761, "step": 5843 }, { - "epoch": 0.16583427922814983, + "epoch": 0.16560401258182436, "grad_norm": 0.0, - "learning_rate": 1.9048067025634166e-05, - "loss": 0.9158, + "learning_rate": 1.905108213368591e-05, + "loss": 0.9775, "step": 5844 }, { - "epoch": 0.16586265607264472, + "epoch": 0.16563235002408683, "grad_norm": 0.0, - "learning_rate": 1.9047675623267374e-05, - "loss": 1.0538, + "learning_rate": 1.905069186875042e-05, + "loss": 1.0384, "step": 5845 }, { - "epoch": 0.1658910329171396, + "epoch": 0.1656606874663493, "grad_norm": 0.0, - "learning_rate": 1.904728414447446e-05, - "loss": 1.0393, + "learning_rate": 1.9050301527577587e-05, + "loss": 1.1178, "step": 5846 }, { - "epoch": 0.1659194097616345, + "epoch": 0.16568902490861176, "grad_norm": 0.0, - "learning_rate": 1.9046892589258728e-05, - "loss": 1.1502, + "learning_rate": 1.9049911110170704e-05, + "loss": 1.0635, "step": 5847 }, { - "epoch": 0.1659477866061294, + "epoch": 0.1657173623508742, "grad_norm": 0.0, - "learning_rate": 1.9046500957623485e-05, - "loss": 0.946, + "learning_rate": 1.904952061653306e-05, + "loss": 1.0682, "step": 5848 }, { - "epoch": 0.1659761634506243, + "epoch": 0.16574569979313666, "grad_norm": 0.0, - "learning_rate": 1.9046109249572046e-05, - "loss": 1.0047, + "learning_rate": 1.9049130046667943e-05, + "loss": 0.9882, "step": 5849 }, { - "epoch": 0.16600454029511919, + "epoch": 0.16577403723539913, "grad_norm": 0.0, - "learning_rate": 1.9045717465107715e-05, - "loss": 1.0874, + "learning_rate": 1.9048739400578645e-05, + "loss": 1.0767, "step": 5850 }, { - "epoch": 0.16603291713961407, + "epoch": 0.1658023746776616, "grad_norm": 0.0, - "learning_rate": 1.90453256042338e-05, - "loss": 1.101, + "learning_rate": 1.9048348678268453e-05, + "loss": 1.0708, "step": 5851 }, { - "epoch": 0.16606129398410896, + "epoch": 0.16583071211992406, "grad_norm": 0.0, - "learning_rate": 1.9044933666953615e-05, - "loss": 1.1093, + "learning_rate": 1.904795787974066e-05, + "loss": 0.9433, "step": 5852 }, { - "epoch": 0.16608967082860385, + "epoch": 0.16585904956218653, "grad_norm": 0.0, - "learning_rate": 1.9044541653270467e-05, - "loss": 1.0895, + "learning_rate": 1.904756700499856e-05, + "loss": 0.9648, "step": 5853 }, { - "epoch": 0.16611804767309876, + "epoch": 0.16588738700444897, "grad_norm": 0.0, - "learning_rate": 1.904414956318767e-05, - "loss": 0.8993, + "learning_rate": 1.9047176054045443e-05, + "loss": 1.0615, "step": 5854 }, { - "epoch": 0.16614642451759365, + "epoch": 0.16591572444671143, "grad_norm": 0.0, - "learning_rate": 1.9043757396708536e-05, - "loss": 1.0568, + "learning_rate": 1.90467850268846e-05, + "loss": 1.0994, "step": 5855 }, { - "epoch": 0.16617480136208854, + "epoch": 0.1659440618889739, "grad_norm": 0.0, - "learning_rate": 1.9043365153836378e-05, - "loss": 0.9394, + "learning_rate": 1.904639392351933e-05, + "loss": 1.0241, "step": 5856 }, { - "epoch": 0.16620317820658342, + "epoch": 0.16597239933123636, "grad_norm": 0.0, - "learning_rate": 1.9042972834574506e-05, - "loss": 1.0204, + "learning_rate": 1.904600274395292e-05, + "loss": 1.0147, "step": 5857 }, { - "epoch": 0.1662315550510783, + "epoch": 0.16600073677349883, "grad_norm": 0.0, - "learning_rate": 1.9042580438926234e-05, - "loss": 1.0286, + "learning_rate": 1.9045611488188672e-05, + "loss": 0.9757, "step": 5858 }, { - "epoch": 0.1662599318955732, + "epoch": 0.1660290742157613, "grad_norm": 0.0, - "learning_rate": 1.9042187966894882e-05, - "loss": 1.0114, + "learning_rate": 1.904522015622988e-05, + "loss": 1.0439, "step": 5859 }, { - "epoch": 0.1662883087400681, + "epoch": 0.16605741165802373, "grad_norm": 0.0, - "learning_rate": 1.904179541848376e-05, - "loss": 1.0518, + "learning_rate": 1.9044828748079842e-05, + "loss": 1.0198, "step": 5860 }, { - "epoch": 0.166316685584563, + "epoch": 0.1660857491002862, "grad_norm": 0.0, - "learning_rate": 1.9041402793696186e-05, - "loss": 1.0491, + "learning_rate": 1.904443726374185e-05, + "loss": 0.91, "step": 5861 }, { - "epoch": 0.1663450624290579, + "epoch": 0.16611408654254867, "grad_norm": 0.0, - "learning_rate": 1.9041010092535477e-05, - "loss": 0.9589, + "learning_rate": 1.9044045703219207e-05, + "loss": 1.0222, "step": 5862 }, { - "epoch": 0.16637343927355278, + "epoch": 0.16614242398481113, "grad_norm": 0.0, - "learning_rate": 1.9040617315004945e-05, - "loss": 1.0249, + "learning_rate": 1.9043654066515206e-05, + "loss": 1.1337, "step": 5863 }, { - "epoch": 0.16640181611804766, + "epoch": 0.1661707614270736, "grad_norm": 0.0, - "learning_rate": 1.9040224461107918e-05, - "loss": 0.9607, + "learning_rate": 1.904326235363315e-05, + "loss": 0.9615, "step": 5864 }, { - "epoch": 0.16643019296254258, + "epoch": 0.16619909886933606, "grad_norm": 0.0, - "learning_rate": 1.9039831530847706e-05, - "loss": 0.9552, + "learning_rate": 1.9042870564576338e-05, + "loss": 0.8545, "step": 5865 }, { - "epoch": 0.16645856980703747, + "epoch": 0.1662274363115985, "grad_norm": 0.0, - "learning_rate": 1.903943852422763e-05, - "loss": 1.0603, + "learning_rate": 1.904247869934807e-05, + "loss": 1.0506, "step": 5866 }, { - "epoch": 0.16648694665153235, + "epoch": 0.16625577375386097, "grad_norm": 0.0, - "learning_rate": 1.903904544125101e-05, - "loss": 1.0385, + "learning_rate": 1.904208675795164e-05, + "loss": 0.9833, "step": 5867 }, { - "epoch": 0.16651532349602724, + "epoch": 0.16628411119612344, "grad_norm": 0.0, - "learning_rate": 1.903865228192117e-05, - "loss": 1.0562, + "learning_rate": 1.9041694740390362e-05, + "loss": 1.0392, "step": 5868 }, { - "epoch": 0.16654370034052213, + "epoch": 0.1663124486383859, "grad_norm": 0.0, - "learning_rate": 1.9038259046241424e-05, - "loss": 0.9934, + "learning_rate": 1.9041302646667526e-05, + "loss": 1.0969, "step": 5869 }, { - "epoch": 0.16657207718501701, + "epoch": 0.16634078608064837, "grad_norm": 0.0, - "learning_rate": 1.90378657342151e-05, - "loss": 0.9786, + "learning_rate": 1.9040910476786443e-05, + "loss": 1.0488, "step": 5870 }, { - "epoch": 0.16660045402951193, + "epoch": 0.16636912352291083, "grad_norm": 0.0, - "learning_rate": 1.903747234584552e-05, - "loss": 1.0144, + "learning_rate": 1.9040518230750414e-05, + "loss": 0.9957, "step": 5871 }, { - "epoch": 0.16662883087400682, + "epoch": 0.16639746096517327, "grad_norm": 0.0, - "learning_rate": 1.9037078881136e-05, - "loss": 1.1326, + "learning_rate": 1.9040125908562745e-05, + "loss": 0.9948, "step": 5872 }, { - "epoch": 0.1666572077185017, + "epoch": 0.16642579840743574, "grad_norm": 0.0, - "learning_rate": 1.903668534008987e-05, - "loss": 0.8886, + "learning_rate": 1.9039733510226735e-05, + "loss": 0.9941, "step": 5873 }, { - "epoch": 0.1666855845629966, + "epoch": 0.1664541358496982, "grad_norm": 0.0, - "learning_rate": 1.9036291722710454e-05, - "loss": 1.1085, + "learning_rate": 1.9039341035745696e-05, + "loss": 1.0053, "step": 5874 }, { - "epoch": 0.16671396140749148, + "epoch": 0.16648247329196067, "grad_norm": 0.0, - "learning_rate": 1.9035898029001076e-05, - "loss": 1.0781, + "learning_rate": 1.903894848512293e-05, + "loss": 1.0703, "step": 5875 }, { - "epoch": 0.16674233825198637, + "epoch": 0.16651081073422314, "grad_norm": 0.0, - "learning_rate": 1.903550425896506e-05, - "loss": 0.9869, + "learning_rate": 1.9038555858361743e-05, + "loss": 1.0202, "step": 5876 }, { - "epoch": 0.16677071509648128, + "epoch": 0.1665391481764856, "grad_norm": 0.0, - "learning_rate": 1.903511041260573e-05, - "loss": 0.9988, + "learning_rate": 1.9038163155465446e-05, + "loss": 0.9717, "step": 5877 }, { - "epoch": 0.16679909194097617, + "epoch": 0.16656748561874804, "grad_norm": 0.0, - "learning_rate": 1.903471648992642e-05, - "loss": 0.9881, + "learning_rate": 1.9037770376437343e-05, + "loss": 0.9303, "step": 5878 }, { - "epoch": 0.16682746878547106, + "epoch": 0.1665958230610105, "grad_norm": 0.0, - "learning_rate": 1.9034322490930456e-05, - "loss": 0.9726, + "learning_rate": 1.903737752128074e-05, + "loss": 1.014, "step": 5879 }, { - "epoch": 0.16685584562996594, + "epoch": 0.16662416050327297, "grad_norm": 0.0, - "learning_rate": 1.903392841562116e-05, - "loss": 1.0274, + "learning_rate": 1.9036984589998953e-05, + "loss": 0.9191, "step": 5880 }, { - "epoch": 0.16688422247446083, + "epoch": 0.16665249794553544, "grad_norm": 0.0, - "learning_rate": 1.9033534264001867e-05, - "loss": 1.0284, + "learning_rate": 1.9036591582595294e-05, + "loss": 1.1168, "step": 5881 }, { - "epoch": 0.16691259931895575, + "epoch": 0.1666808353877979, "grad_norm": 0.0, - "learning_rate": 1.90331400360759e-05, - "loss": 0.9948, + "learning_rate": 1.9036198499073063e-05, + "loss": 1.0826, "step": 5882 }, { - "epoch": 0.16694097616345063, + "epoch": 0.16670917283006037, "grad_norm": 0.0, - "learning_rate": 1.9032745731846598e-05, - "loss": 1.0823, + "learning_rate": 1.9035805339435577e-05, + "loss": 1.0355, "step": 5883 }, { - "epoch": 0.16696935300794552, + "epoch": 0.1667375102723228, "grad_norm": 0.0, - "learning_rate": 1.9032351351317283e-05, - "loss": 0.9273, + "learning_rate": 1.9035412103686147e-05, + "loss": 1.0608, "step": 5884 }, { - "epoch": 0.1669977298524404, + "epoch": 0.16676584771458527, "grad_norm": 0.0, - "learning_rate": 1.903195689449129e-05, - "loss": 1.0632, + "learning_rate": 1.903501879182809e-05, + "loss": 1.0827, "step": 5885 }, { - "epoch": 0.1670261066969353, + "epoch": 0.16679418515684774, "grad_norm": 0.0, - "learning_rate": 1.9031562361371955e-05, - "loss": 1.0034, + "learning_rate": 1.903462540386471e-05, + "loss": 1.0425, "step": 5886 }, { - "epoch": 0.16705448354143018, + "epoch": 0.1668225225991102, "grad_norm": 0.0, - "learning_rate": 1.9031167751962604e-05, - "loss": 0.9451, + "learning_rate": 1.9034231939799328e-05, + "loss": 1.0857, "step": 5887 }, { - "epoch": 0.1670828603859251, + "epoch": 0.16685086004137267, "grad_norm": 0.0, - "learning_rate": 1.9030773066266573e-05, - "loss": 0.9405, + "learning_rate": 1.9033838399635255e-05, + "loss": 0.9145, "step": 5888 }, { - "epoch": 0.16711123723041998, + "epoch": 0.16687919748363514, "grad_norm": 0.0, - "learning_rate": 1.9030378304287198e-05, - "loss": 0.9706, + "learning_rate": 1.9033444783375806e-05, + "loss": 1.072, "step": 5889 }, { - "epoch": 0.16713961407491487, + "epoch": 0.16690753492589758, "grad_norm": 0.0, - "learning_rate": 1.902998346602781e-05, - "loss": 1.0038, + "learning_rate": 1.9033051091024293e-05, + "loss": 0.9998, "step": 5890 }, { - "epoch": 0.16716799091940976, + "epoch": 0.16693587236816004, "grad_norm": 0.0, - "learning_rate": 1.9029588551491743e-05, - "loss": 0.9496, + "learning_rate": 1.9032657322584045e-05, + "loss": 0.9802, "step": 5891 }, { - "epoch": 0.16719636776390465, + "epoch": 0.1669642098104225, "grad_norm": 0.0, - "learning_rate": 1.902919356068234e-05, - "loss": 0.9821, + "learning_rate": 1.9032263478058365e-05, + "loss": 0.9733, "step": 5892 }, { - "epoch": 0.16722474460839953, + "epoch": 0.16699254725268498, "grad_norm": 0.0, - "learning_rate": 1.902879849360293e-05, - "loss": 0.9883, + "learning_rate": 1.9031869557450575e-05, + "loss": 0.9995, "step": 5893 }, { - "epoch": 0.16725312145289445, + "epoch": 0.16702088469494744, "grad_norm": 0.0, - "learning_rate": 1.9028403350256855e-05, - "loss": 1.1369, + "learning_rate": 1.9031475560763994e-05, + "loss": 0.9671, "step": 5894 }, { - "epoch": 0.16728149829738934, + "epoch": 0.1670492221372099, "grad_norm": 0.0, - "learning_rate": 1.902800813064745e-05, - "loss": 1.082, + "learning_rate": 1.9031081488001942e-05, + "loss": 1.0191, "step": 5895 }, { - "epoch": 0.16730987514188422, + "epoch": 0.16707755957947235, "grad_norm": 0.0, - "learning_rate": 1.9027612834778058e-05, - "loss": 0.9657, + "learning_rate": 1.903068733916774e-05, + "loss": 0.9758, "step": 5896 }, { - "epoch": 0.1673382519863791, + "epoch": 0.1671058970217348, "grad_norm": 0.0, - "learning_rate": 1.902721746265201e-05, - "loss": 1.0138, + "learning_rate": 1.9030293114264698e-05, + "loss": 0.9978, "step": 5897 }, { - "epoch": 0.167366628830874, + "epoch": 0.16713423446399728, "grad_norm": 0.0, - "learning_rate": 1.9026822014272653e-05, - "loss": 0.9352, + "learning_rate": 1.9029898813296147e-05, + "loss": 1.0212, "step": 5898 }, { - "epoch": 0.16739500567536889, + "epoch": 0.16716257190625974, "grad_norm": 0.0, - "learning_rate": 1.9026426489643325e-05, - "loss": 0.9442, + "learning_rate": 1.9029504436265406e-05, + "loss": 1.0004, "step": 5899 }, { - "epoch": 0.1674233825198638, + "epoch": 0.1671909093485222, "grad_norm": 0.0, - "learning_rate": 1.9026030888767364e-05, - "loss": 1.0444, + "learning_rate": 1.90291099831758e-05, + "loss": 1.0071, "step": 5900 }, { - "epoch": 0.1674517593643587, + "epoch": 0.16721924679078468, "grad_norm": 0.0, - "learning_rate": 1.902563521164811e-05, - "loss": 1.0089, + "learning_rate": 1.902871545403064e-05, + "loss": 1.0898, "step": 5901 }, { - "epoch": 0.16748013620885357, + "epoch": 0.16724758423304711, "grad_norm": 0.0, - "learning_rate": 1.9025239458288916e-05, - "loss": 1.0294, + "learning_rate": 1.902832084883326e-05, + "loss": 1.0101, "step": 5902 }, { - "epoch": 0.16750851305334846, + "epoch": 0.16727592167530958, "grad_norm": 0.0, - "learning_rate": 1.902484362869312e-05, - "loss": 0.9901, + "learning_rate": 1.9027926167586982e-05, + "loss": 1.0773, "step": 5903 }, { - "epoch": 0.16753688989784335, + "epoch": 0.16730425911757205, "grad_norm": 0.0, - "learning_rate": 1.9024447722864062e-05, - "loss": 1.0133, + "learning_rate": 1.9027531410295128e-05, + "loss": 1.0385, "step": 5904 }, { - "epoch": 0.16756526674233826, + "epoch": 0.1673325965598345, "grad_norm": 0.0, - "learning_rate": 1.902405174080509e-05, - "loss": 1.1036, + "learning_rate": 1.9027136576961027e-05, + "loss": 1.0431, "step": 5905 }, { - "epoch": 0.16759364358683315, + "epoch": 0.16736093400209698, "grad_norm": 0.0, - "learning_rate": 1.9023655682519548e-05, - "loss": 1.0472, + "learning_rate": 1.9026741667588002e-05, + "loss": 1.0411, "step": 5906 }, { - "epoch": 0.16762202043132804, + "epoch": 0.16738927144435944, "grad_norm": 0.0, - "learning_rate": 1.9023259548010777e-05, - "loss": 1.0041, + "learning_rate": 1.9026346682179374e-05, + "loss": 1.1446, "step": 5907 }, { - "epoch": 0.16765039727582293, + "epoch": 0.16741760888662188, "grad_norm": 0.0, - "learning_rate": 1.9022863337282125e-05, - "loss": 0.9723, + "learning_rate": 1.9025951620738483e-05, + "loss": 0.9794, "step": 5908 }, { - "epoch": 0.16767877412031781, + "epoch": 0.16744594632888435, "grad_norm": 0.0, - "learning_rate": 1.9022467050336945e-05, - "loss": 1.0122, + "learning_rate": 1.9025556483268647e-05, + "loss": 1.0736, "step": 5909 }, { - "epoch": 0.1677071509648127, + "epoch": 0.16747428377114681, "grad_norm": 0.0, - "learning_rate": 1.902207068717858e-05, - "loss": 1.0241, + "learning_rate": 1.90251612697732e-05, + "loss": 0.9707, "step": 5910 }, { - "epoch": 0.16773552780930762, + "epoch": 0.16750262121340928, "grad_norm": 0.0, - "learning_rate": 1.902167424781038e-05, - "loss": 0.9487, + "learning_rate": 1.9024765980255467e-05, + "loss": 0.9968, "step": 5911 }, { - "epoch": 0.1677639046538025, + "epoch": 0.16753095865567175, "grad_norm": 0.0, - "learning_rate": 1.9021277732235687e-05, - "loss": 1.0908, + "learning_rate": 1.9024370614718774e-05, + "loss": 1.0614, "step": 5912 }, { - "epoch": 0.1677922814982974, + "epoch": 0.1675592960979342, "grad_norm": 0.0, - "learning_rate": 1.9020881140457857e-05, - "loss": 0.9404, + "learning_rate": 1.902397517316646e-05, + "loss": 1.0603, "step": 5913 }, { - "epoch": 0.16782065834279228, + "epoch": 0.16758763354019665, "grad_norm": 0.0, - "learning_rate": 1.902048447248024e-05, - "loss": 1.0182, + "learning_rate": 1.9023579655601852e-05, + "loss": 0.8893, "step": 5914 }, { - "epoch": 0.16784903518728717, + "epoch": 0.16761597098245912, "grad_norm": 0.0, - "learning_rate": 1.9020087728306188e-05, - "loss": 0.9866, + "learning_rate": 1.9023184062028284e-05, + "loss": 1.0201, "step": 5915 }, { - "epoch": 0.16787741203178205, + "epoch": 0.16764430842472158, "grad_norm": 0.0, - "learning_rate": 1.9019690907939047e-05, - "loss": 1.0641, + "learning_rate": 1.9022788392449085e-05, + "loss": 1.0064, "step": 5916 }, { - "epoch": 0.16790578887627697, + "epoch": 0.16767264586698405, "grad_norm": 0.0, - "learning_rate": 1.901929401138217e-05, - "loss": 1.0016, + "learning_rate": 1.9022392646867587e-05, + "loss": 0.9963, "step": 5917 }, { - "epoch": 0.16793416572077186, + "epoch": 0.16770098330924652, "grad_norm": 0.0, - "learning_rate": 1.901889703863891e-05, - "loss": 0.9825, + "learning_rate": 1.9021996825287126e-05, + "loss": 1.0194, "step": 5918 }, { - "epoch": 0.16796254256526674, + "epoch": 0.16772932075150898, "grad_norm": 0.0, - "learning_rate": 1.9018499989712627e-05, - "loss": 0.9775, + "learning_rate": 1.9021600927711037e-05, + "loss": 1.0379, "step": 5919 }, { - "epoch": 0.16799091940976163, + "epoch": 0.16775765819377142, "grad_norm": 0.0, - "learning_rate": 1.9018102864606664e-05, - "loss": 0.9767, + "learning_rate": 1.902120495414265e-05, + "loss": 0.9777, "step": 5920 }, { - "epoch": 0.16801929625425652, + "epoch": 0.16778599563603389, "grad_norm": 0.0, - "learning_rate": 1.9017705663324382e-05, - "loss": 0.9908, + "learning_rate": 1.9020808904585307e-05, + "loss": 1.1043, "step": 5921 }, { - "epoch": 0.16804767309875143, + "epoch": 0.16781433307829635, "grad_norm": 0.0, - "learning_rate": 1.9017308385869136e-05, - "loss": 1.0011, + "learning_rate": 1.902041277904234e-05, + "loss": 1.0379, "step": 5922 }, { - "epoch": 0.16807604994324632, + "epoch": 0.16784267052055882, "grad_norm": 0.0, - "learning_rate": 1.901691103224428e-05, - "loss": 1.0515, + "learning_rate": 1.9020016577517087e-05, + "loss": 1.0491, "step": 5923 }, { - "epoch": 0.1681044267877412, + "epoch": 0.16787100796282128, "grad_norm": 0.0, - "learning_rate": 1.9016513602453172e-05, - "loss": 1.1144, + "learning_rate": 1.901962030001288e-05, + "loss": 1.0624, "step": 5924 }, { - "epoch": 0.1681328036322361, + "epoch": 0.16789934540508375, "grad_norm": 0.0, - "learning_rate": 1.9016116096499163e-05, - "loss": 0.9018, + "learning_rate": 1.9019223946533066e-05, + "loss": 1.1165, "step": 5925 }, { - "epoch": 0.16816118047673098, + "epoch": 0.1679276828473462, "grad_norm": 0.0, - "learning_rate": 1.901571851438562e-05, - "loss": 0.9913, + "learning_rate": 1.9018827517080982e-05, + "loss": 0.9444, "step": 5926 }, { - "epoch": 0.16818955732122587, + "epoch": 0.16795602028960865, "grad_norm": 0.0, - "learning_rate": 1.90153208561159e-05, - "loss": 0.9883, + "learning_rate": 1.901843101165996e-05, + "loss": 1.0858, "step": 5927 }, { - "epoch": 0.16821793416572078, + "epoch": 0.16798435773187112, "grad_norm": 0.0, - "learning_rate": 1.9014923121693355e-05, - "loss": 1.104, + "learning_rate": 1.9018034430273346e-05, + "loss": 1.0598, "step": 5928 }, { - "epoch": 0.16824631101021567, + "epoch": 0.16801269517413359, "grad_norm": 0.0, - "learning_rate": 1.901452531112135e-05, - "loss": 1.0536, + "learning_rate": 1.9017637772924483e-05, + "loss": 1.0851, "step": 5929 }, { - "epoch": 0.16827468785471056, + "epoch": 0.16804103261639605, "grad_norm": 0.0, - "learning_rate": 1.9014127424403248e-05, - "loss": 1.0265, + "learning_rate": 1.9017241039616704e-05, + "loss": 1.0625, "step": 5930 }, { - "epoch": 0.16830306469920545, + "epoch": 0.16806937005865852, "grad_norm": 0.0, - "learning_rate": 1.90137294615424e-05, - "loss": 0.9207, + "learning_rate": 1.901684423035336e-05, + "loss": 1.0963, "step": 5931 }, { - "epoch": 0.16833144154370033, + "epoch": 0.16809770750092096, "grad_norm": 0.0, - "learning_rate": 1.9013331422542184e-05, - "loss": 0.9147, + "learning_rate": 1.901644734513778e-05, + "loss": 1.1015, "step": 5932 }, { - "epoch": 0.16835981838819522, + "epoch": 0.16812604494318342, "grad_norm": 0.0, - "learning_rate": 1.9012933307405945e-05, - "loss": 1.0406, + "learning_rate": 1.9016050383973322e-05, + "loss": 1.0666, "step": 5933 }, { - "epoch": 0.16838819523269014, + "epoch": 0.1681543823854459, "grad_norm": 0.0, - "learning_rate": 1.9012535116137054e-05, - "loss": 1.0515, + "learning_rate": 1.9015653346863322e-05, + "loss": 1.1084, "step": 5934 }, { - "epoch": 0.16841657207718502, + "epoch": 0.16818271982770835, "grad_norm": 0.0, - "learning_rate": 1.9012136848738874e-05, - "loss": 1.0546, + "learning_rate": 1.901525623381113e-05, + "loss": 1.0202, "step": 5935 }, { - "epoch": 0.1684449489216799, + "epoch": 0.16821105726997082, "grad_norm": 0.0, - "learning_rate": 1.901173850521477e-05, - "loss": 1.0135, + "learning_rate": 1.9014859044820082e-05, + "loss": 1.0495, "step": 5936 }, { - "epoch": 0.1684733257661748, + "epoch": 0.1682393947122333, "grad_norm": 0.0, - "learning_rate": 1.9011340085568103e-05, - "loss": 1.0218, + "learning_rate": 1.901446177989353e-05, + "loss": 1.0182, "step": 5937 }, { - "epoch": 0.16850170261066968, + "epoch": 0.16826773215449572, "grad_norm": 0.0, - "learning_rate": 1.9010941589802245e-05, - "loss": 1.0179, + "learning_rate": 1.9014064439034818e-05, + "loss": 1.0817, "step": 5938 }, { - "epoch": 0.16853007945516457, + "epoch": 0.1682960695967582, "grad_norm": 0.0, - "learning_rate": 1.901054301792056e-05, - "loss": 1.048, + "learning_rate": 1.9013667022247297e-05, + "loss": 1.0165, "step": 5939 }, { - "epoch": 0.1685584562996595, + "epoch": 0.16832440703902066, "grad_norm": 0.0, - "learning_rate": 1.9010144369926407e-05, - "loss": 1.0368, + "learning_rate": 1.901326952953431e-05, + "loss": 0.9936, "step": 5940 }, { - "epoch": 0.16858683314415437, + "epoch": 0.16835274448128312, "grad_norm": 0.0, - "learning_rate": 1.9009745645823162e-05, - "loss": 0.8713, + "learning_rate": 1.9012871960899206e-05, + "loss": 1.0155, "step": 5941 }, { - "epoch": 0.16861520998864926, + "epoch": 0.1683810819235456, "grad_norm": 0.0, - "learning_rate": 1.900934684561419e-05, - "loss": 0.9048, + "learning_rate": 1.9012474316345335e-05, + "loss": 0.9671, "step": 5942 }, { - "epoch": 0.16864358683314415, + "epoch": 0.16840941936580806, "grad_norm": 0.0, - "learning_rate": 1.900894796930286e-05, - "loss": 0.9547, + "learning_rate": 1.9012076595876045e-05, + "loss": 0.9495, "step": 5943 }, { - "epoch": 0.16867196367763904, + "epoch": 0.1684377568080705, "grad_norm": 0.0, - "learning_rate": 1.9008549016892544e-05, - "loss": 1.0149, + "learning_rate": 1.901167879949469e-05, + "loss": 1.0383, "step": 5944 }, { - "epoch": 0.16870034052213395, + "epoch": 0.16846609425033296, "grad_norm": 0.0, - "learning_rate": 1.9008149988386608e-05, - "loss": 0.9929, + "learning_rate": 1.9011280927204616e-05, + "loss": 1.0613, "step": 5945 }, { - "epoch": 0.16872871736662884, + "epoch": 0.16849443169259543, "grad_norm": 0.0, - "learning_rate": 1.9007750883788422e-05, - "loss": 1.1247, + "learning_rate": 1.901088297900918e-05, + "loss": 1.1078, "step": 5946 }, { - "epoch": 0.16875709421112373, + "epoch": 0.1685227691348579, "grad_norm": 0.0, - "learning_rate": 1.900735170310136e-05, - "loss": 0.9759, + "learning_rate": 1.9010484954911724e-05, + "loss": 0.9988, "step": 5947 }, { - "epoch": 0.1687854710556186, + "epoch": 0.16855110657712036, "grad_norm": 0.0, - "learning_rate": 1.9006952446328797e-05, - "loss": 1.0185, + "learning_rate": 1.9010086854915613e-05, + "loss": 1.0281, "step": 5948 }, { - "epoch": 0.1688138479001135, + "epoch": 0.16857944401938282, "grad_norm": 0.0, - "learning_rate": 1.90065531134741e-05, - "loss": 0.9539, + "learning_rate": 1.900968867902419e-05, + "loss": 0.9888, "step": 5949 }, { - "epoch": 0.1688422247446084, + "epoch": 0.16860778146164526, "grad_norm": 0.0, - "learning_rate": 1.9006153704540643e-05, - "loss": 1.0099, + "learning_rate": 1.9009290427240817e-05, + "loss": 0.9199, "step": 5950 }, { - "epoch": 0.1688706015891033, + "epoch": 0.16863611890390773, "grad_norm": 0.0, - "learning_rate": 1.90057542195318e-05, - "loss": 1.0758, + "learning_rate": 1.9008892099568847e-05, + "loss": 0.9199, "step": 5951 }, { - "epoch": 0.1688989784335982, + "epoch": 0.1686644563461702, "grad_norm": 0.0, - "learning_rate": 1.9005354658450948e-05, - "loss": 0.9981, + "learning_rate": 1.900849369601163e-05, + "loss": 0.8648, "step": 5952 }, { - "epoch": 0.16892735527809308, + "epoch": 0.16869279378843266, "grad_norm": 0.0, - "learning_rate": 1.9004955021301458e-05, - "loss": 0.9448, + "learning_rate": 1.9008095216572526e-05, + "loss": 0.8949, "step": 5953 }, { - "epoch": 0.16895573212258796, + "epoch": 0.16872113123069513, "grad_norm": 0.0, - "learning_rate": 1.900455530808671e-05, - "loss": 1.0793, + "learning_rate": 1.900769666125489e-05, + "loss": 1.0537, "step": 5954 }, { - "epoch": 0.16898410896708285, + "epoch": 0.1687494686729576, "grad_norm": 0.0, - "learning_rate": 1.900415551881008e-05, - "loss": 1.0434, + "learning_rate": 1.9007298030062085e-05, + "loss": 1.0094, "step": 5955 }, { - "epoch": 0.16901248581157774, + "epoch": 0.16877780611522003, "grad_norm": 0.0, - "learning_rate": 1.900375565347494e-05, - "loss": 1.0352, + "learning_rate": 1.9006899322997457e-05, + "loss": 1.016, "step": 5956 }, { - "epoch": 0.16904086265607265, + "epoch": 0.1688061435574825, "grad_norm": 0.0, - "learning_rate": 1.9003355712084673e-05, - "loss": 0.9385, + "learning_rate": 1.9006500540064377e-05, + "loss": 1.0535, "step": 5957 }, { - "epoch": 0.16906923950056754, + "epoch": 0.16883448099974496, "grad_norm": 0.0, - "learning_rate": 1.900295569464266e-05, - "loss": 0.9962, + "learning_rate": 1.9006101681266194e-05, + "loss": 0.9818, "step": 5958 }, { - "epoch": 0.16909761634506243, + "epoch": 0.16886281844200743, "grad_norm": 0.0, - "learning_rate": 1.900255560115227e-05, - "loss": 0.9182, + "learning_rate": 1.9005702746606274e-05, + "loss": 1.0285, "step": 5959 }, { - "epoch": 0.16912599318955732, + "epoch": 0.1688911558842699, "grad_norm": 0.0, - "learning_rate": 1.900215543161689e-05, - "loss": 1.0722, + "learning_rate": 1.9005303736087976e-05, + "loss": 1.0633, "step": 5960 }, { - "epoch": 0.1691543700340522, + "epoch": 0.16891949332653236, "grad_norm": 0.0, - "learning_rate": 1.9001755186039896e-05, - "loss": 0.9645, + "learning_rate": 1.9004904649714663e-05, + "loss": 1.0764, "step": 5961 }, { - "epoch": 0.16918274687854712, + "epoch": 0.1689478307687948, "grad_norm": 0.0, - "learning_rate": 1.9001354864424673e-05, - "loss": 1.0315, + "learning_rate": 1.9004505487489692e-05, + "loss": 1.0731, "step": 5962 }, { - "epoch": 0.169211123723042, + "epoch": 0.16897616821105726, "grad_norm": 0.0, - "learning_rate": 1.9000954466774603e-05, - "loss": 1.0486, + "learning_rate": 1.9004106249416427e-05, + "loss": 1.0661, "step": 5963 }, { - "epoch": 0.1692395005675369, + "epoch": 0.16900450565331973, "grad_norm": 0.0, - "learning_rate": 1.9000553993093063e-05, - "loss": 0.9088, + "learning_rate": 1.9003706935498233e-05, + "loss": 0.9479, "step": 5964 }, { - "epoch": 0.16926787741203178, + "epoch": 0.1690328430955822, "grad_norm": 0.0, - "learning_rate": 1.900015344338344e-05, - "loss": 1.007, + "learning_rate": 1.900330754573847e-05, + "loss": 1.0359, "step": 5965 }, { - "epoch": 0.16929625425652667, + "epoch": 0.16906118053784466, "grad_norm": 0.0, - "learning_rate": 1.8999752817649116e-05, - "loss": 1.1231, + "learning_rate": 1.9002908080140504e-05, + "loss": 1.0353, "step": 5966 }, { - "epoch": 0.16932463110102156, + "epoch": 0.16908951798010713, "grad_norm": 0.0, - "learning_rate": 1.8999352115893476e-05, - "loss": 1.0846, + "learning_rate": 1.90025085387077e-05, + "loss": 0.9718, "step": 5967 }, { - "epoch": 0.16935300794551647, + "epoch": 0.16911785542236957, "grad_norm": 0.0, - "learning_rate": 1.8998951338119904e-05, - "loss": 1.0361, + "learning_rate": 1.9002108921443424e-05, + "loss": 1.0427, "step": 5968 }, { - "epoch": 0.16938138479001136, + "epoch": 0.16914619286463203, "grad_norm": 0.0, - "learning_rate": 1.8998550484331787e-05, - "loss": 0.9924, + "learning_rate": 1.9001709228351042e-05, + "loss": 1.0933, "step": 5969 }, { - "epoch": 0.16940976163450625, + "epoch": 0.1691745303068945, "grad_norm": 0.0, - "learning_rate": 1.8998149554532505e-05, - "loss": 1.0581, + "learning_rate": 1.900130945943392e-05, + "loss": 1.0303, "step": 5970 }, { - "epoch": 0.16943813847900113, + "epoch": 0.16920286774915697, "grad_norm": 0.0, - "learning_rate": 1.899774854872545e-05, - "loss": 0.992, + "learning_rate": 1.900090961469543e-05, + "loss": 0.9095, "step": 5971 }, { - "epoch": 0.16946651532349602, + "epoch": 0.16923120519141943, "grad_norm": 0.0, - "learning_rate": 1.8997347466914014e-05, - "loss": 1.1344, + "learning_rate": 1.900050969413893e-05, + "loss": 1.0622, "step": 5972 }, { - "epoch": 0.1694948921679909, + "epoch": 0.1692595426336819, "grad_norm": 0.0, - "learning_rate": 1.8996946309101574e-05, - "loss": 1.0264, + "learning_rate": 1.9000109697767798e-05, + "loss": 0.975, "step": 5973 }, { - "epoch": 0.16952326901248582, + "epoch": 0.16928788007594434, "grad_norm": 0.0, - "learning_rate": 1.8996545075291525e-05, - "loss": 0.9631, + "learning_rate": 1.8999709625585397e-05, + "loss": 0.9593, "step": 5974 }, { - "epoch": 0.1695516458569807, + "epoch": 0.1693162175182068, "grad_norm": 0.0, - "learning_rate": 1.899614376548726e-05, - "loss": 0.9991, + "learning_rate": 1.8999309477595103e-05, + "loss": 0.9509, "step": 5975 }, { - "epoch": 0.1695800227014756, + "epoch": 0.16934455496046927, "grad_norm": 0.0, - "learning_rate": 1.8995742379692158e-05, - "loss": 1.0496, + "learning_rate": 1.8998909253800283e-05, + "loss": 1.0133, "step": 5976 }, { - "epoch": 0.16960839954597048, + "epoch": 0.16937289240273173, "grad_norm": 0.0, - "learning_rate": 1.899534091790962e-05, - "loss": 1.0117, + "learning_rate": 1.899850895420431e-05, + "loss": 0.999, "step": 5977 }, { - "epoch": 0.16963677639046537, + "epoch": 0.1694012298449942, "grad_norm": 0.0, - "learning_rate": 1.899493938014303e-05, - "loss": 1.0491, + "learning_rate": 1.899810857881055e-05, + "loss": 1.0561, "step": 5978 }, { - "epoch": 0.16966515323496026, + "epoch": 0.16942956728725667, "grad_norm": 0.0, - "learning_rate": 1.8994537766395784e-05, - "loss": 0.9967, + "learning_rate": 1.8997708127622384e-05, + "loss": 1.0912, "step": 5979 }, { - "epoch": 0.16969353007945517, + "epoch": 0.1694579047295191, "grad_norm": 0.0, - "learning_rate": 1.899413607667127e-05, - "loss": 1.0491, + "learning_rate": 1.899730760064318e-05, + "loss": 1.0467, "step": 5980 }, { - "epoch": 0.16972190692395006, + "epoch": 0.16948624217178157, "grad_norm": 0.0, - "learning_rate": 1.899373431097289e-05, - "loss": 0.8124, + "learning_rate": 1.8996906997876312e-05, + "loss": 0.8781, "step": 5981 }, { - "epoch": 0.16975028376844495, + "epoch": 0.16951457961404404, "grad_norm": 0.0, - "learning_rate": 1.899333246930403e-05, - "loss": 1.0354, + "learning_rate": 1.8996506319325156e-05, + "loss": 0.9037, "step": 5982 }, { - "epoch": 0.16977866061293984, + "epoch": 0.1695429170563065, "grad_norm": 0.0, - "learning_rate": 1.8992930551668084e-05, - "loss": 0.95, + "learning_rate": 1.899610556499309e-05, + "loss": 1.0034, "step": 5983 }, { - "epoch": 0.16980703745743472, + "epoch": 0.16957125449856897, "grad_norm": 0.0, - "learning_rate": 1.899252855806845e-05, - "loss": 0.9635, + "learning_rate": 1.8995704734883484e-05, + "loss": 1.0683, "step": 5984 }, { - "epoch": 0.16983541430192964, + "epoch": 0.1695995919408314, "grad_norm": 0.0, - "learning_rate": 1.8992126488508527e-05, - "loss": 0.9964, + "learning_rate": 1.8995303828999713e-05, + "loss": 1.0225, "step": 5985 }, { - "epoch": 0.16986379114642453, + "epoch": 0.16962792938309387, "grad_norm": 0.0, - "learning_rate": 1.8991724342991703e-05, - "loss": 1.0557, + "learning_rate": 1.8994902847345162e-05, + "loss": 0.9875, "step": 5986 }, { - "epoch": 0.1698921679909194, + "epoch": 0.16965626682535634, "grad_norm": 0.0, - "learning_rate": 1.899132212152138e-05, - "loss": 0.9405, + "learning_rate": 1.89945017899232e-05, + "loss": 0.9965, "step": 5987 }, { - "epoch": 0.1699205448354143, + "epoch": 0.1696846042676188, "grad_norm": 0.0, - "learning_rate": 1.8990919824100957e-05, - "loss": 0.9391, + "learning_rate": 1.8994100656737212e-05, + "loss": 1.1023, "step": 5988 }, { - "epoch": 0.1699489216799092, + "epoch": 0.16971294170988127, "grad_norm": 0.0, - "learning_rate": 1.8990517450733828e-05, - "loss": 1.0823, + "learning_rate": 1.8993699447790576e-05, + "loss": 0.9201, "step": 5989 }, { - "epoch": 0.16997729852440407, + "epoch": 0.16974127915214374, "grad_norm": 0.0, - "learning_rate": 1.8990115001423396e-05, - "loss": 1.0067, + "learning_rate": 1.8993298163086668e-05, + "loss": 1.0447, "step": 5990 }, { - "epoch": 0.170005675368899, + "epoch": 0.16976961659440618, "grad_norm": 0.0, - "learning_rate": 1.8989712476173054e-05, - "loss": 1.085, + "learning_rate": 1.899289680262887e-05, + "loss": 1.0748, "step": 5991 }, { - "epoch": 0.17003405221339388, + "epoch": 0.16979795403666864, "grad_norm": 0.0, - "learning_rate": 1.898930987498621e-05, - "loss": 1.0416, + "learning_rate": 1.899249536642056e-05, + "loss": 0.9645, "step": 5992 }, { - "epoch": 0.17006242905788876, + "epoch": 0.1698262914789311, "grad_norm": 0.0, - "learning_rate": 1.8988907197866262e-05, - "loss": 0.9643, + "learning_rate": 1.8992093854465124e-05, + "loss": 1.0259, "step": 5993 }, { - "epoch": 0.17009080590238365, + "epoch": 0.16985462892119357, "grad_norm": 0.0, - "learning_rate": 1.898850444481661e-05, - "loss": 0.9039, + "learning_rate": 1.8991692266765947e-05, + "loss": 1.1338, "step": 5994 }, { - "epoch": 0.17011918274687854, + "epoch": 0.16988296636345604, "grad_norm": 0.0, - "learning_rate": 1.8988101615840656e-05, - "loss": 0.9786, + "learning_rate": 1.89912906033264e-05, + "loss": 1.052, "step": 5995 }, { - "epoch": 0.17014755959137343, + "epoch": 0.1699113038057185, "grad_norm": 0.0, - "learning_rate": 1.8987698710941802e-05, - "loss": 1.0805, + "learning_rate": 1.8990888864149876e-05, + "loss": 1.0522, "step": 5996 }, { - "epoch": 0.17017593643586834, + "epoch": 0.16993964124798094, "grad_norm": 0.0, - "learning_rate": 1.8987295730123457e-05, - "loss": 1.0943, + "learning_rate": 1.8990487049239758e-05, + "loss": 1.0704, "step": 5997 }, { - "epoch": 0.17020431328036323, + "epoch": 0.1699679786902434, "grad_norm": 0.0, - "learning_rate": 1.898689267338902e-05, - "loss": 1.1125, + "learning_rate": 1.899008515859943e-05, + "loss": 0.9956, "step": 5998 }, { - "epoch": 0.17023269012485812, + "epoch": 0.16999631613250588, "grad_norm": 0.0, - "learning_rate": 1.8986489540741895e-05, - "loss": 1.0146, + "learning_rate": 1.8989683192232276e-05, + "loss": 1.0888, "step": 5999 }, { - "epoch": 0.170261066969353, + "epoch": 0.17002465357476834, "grad_norm": 0.0, - "learning_rate": 1.8986086332185488e-05, - "loss": 1.0696, + "learning_rate": 1.8989281150141678e-05, + "loss": 0.9794, "step": 6000 }, { - "epoch": 0.1702894438138479, + "epoch": 0.1700529910170308, "grad_norm": 0.0, - "learning_rate": 1.898568304772321e-05, - "loss": 0.9616, + "learning_rate": 1.898887903233103e-05, + "loss": 1.0159, "step": 6001 }, { - "epoch": 0.1703178206583428, + "epoch": 0.17008132845929327, "grad_norm": 0.0, - "learning_rate": 1.898527968735846e-05, - "loss": 1.0242, + "learning_rate": 1.8988476838803714e-05, + "loss": 1.0347, "step": 6002 }, { - "epoch": 0.1703461975028377, + "epoch": 0.1701096659015557, "grad_norm": 0.0, - "learning_rate": 1.898487625109465e-05, - "loss": 1.0523, + "learning_rate": 1.8988074569563122e-05, + "loss": 1.1392, "step": 6003 }, { - "epoch": 0.17037457434733258, + "epoch": 0.17013800334381818, "grad_norm": 0.0, - "learning_rate": 1.8984472738935183e-05, - "loss": 1.0043, + "learning_rate": 1.898767222461264e-05, + "loss": 0.9631, "step": 6004 }, { - "epoch": 0.17040295119182747, + "epoch": 0.17016634078608064, "grad_norm": 0.0, - "learning_rate": 1.8984069150883475e-05, - "loss": 1.0152, + "learning_rate": 1.8987269803955656e-05, + "loss": 1.0094, "step": 6005 }, { - "epoch": 0.17043132803632235, + "epoch": 0.1701946782283431, "grad_norm": 0.0, - "learning_rate": 1.898366548694293e-05, - "loss": 1.0394, + "learning_rate": 1.8986867307595562e-05, + "loss": 0.9868, "step": 6006 }, { - "epoch": 0.17045970488081724, + "epoch": 0.17022301567060558, "grad_norm": 0.0, - "learning_rate": 1.898326174711696e-05, - "loss": 1.0821, + "learning_rate": 1.898646473553575e-05, + "loss": 1.0305, "step": 6007 }, { - "epoch": 0.17048808172531216, + "epoch": 0.17025135311286804, "grad_norm": 0.0, - "learning_rate": 1.8982857931408972e-05, - "loss": 1.1725, + "learning_rate": 1.8986062087779604e-05, + "loss": 0.9196, "step": 6008 }, { - "epoch": 0.17051645856980704, + "epoch": 0.17027969055513048, "grad_norm": 0.0, - "learning_rate": 1.8982454039822377e-05, - "loss": 1.0322, + "learning_rate": 1.8985659364330522e-05, + "loss": 1.0282, "step": 6009 }, { - "epoch": 0.17054483541430193, + "epoch": 0.17030802799739295, "grad_norm": 0.0, - "learning_rate": 1.8982050072360594e-05, - "loss": 0.9412, + "learning_rate": 1.8985256565191898e-05, + "loss": 0.9752, "step": 6010 }, { - "epoch": 0.17057321225879682, + "epoch": 0.1703363654396554, "grad_norm": 0.0, - "learning_rate": 1.8981646029027027e-05, - "loss": 0.9984, + "learning_rate": 1.8984853690367116e-05, + "loss": 1.0945, "step": 6011 }, { - "epoch": 0.1706015891032917, + "epoch": 0.17036470288191788, "grad_norm": 0.0, - "learning_rate": 1.898124190982509e-05, - "loss": 1.0714, + "learning_rate": 1.8984450739859577e-05, + "loss": 0.9914, "step": 6012 }, { - "epoch": 0.1706299659477866, + "epoch": 0.17039304032418034, "grad_norm": 0.0, - "learning_rate": 1.8980837714758206e-05, - "loss": 1.0334, + "learning_rate": 1.8984047713672673e-05, + "loss": 1.0068, "step": 6013 }, { - "epoch": 0.1706583427922815, + "epoch": 0.1704213777664428, "grad_norm": 0.0, - "learning_rate": 1.8980433443829776e-05, - "loss": 1.0396, + "learning_rate": 1.8983644611809796e-05, + "loss": 1.1082, "step": 6014 }, { - "epoch": 0.1706867196367764, + "epoch": 0.17044971520870525, "grad_norm": 0.0, - "learning_rate": 1.8980029097043227e-05, - "loss": 0.9555, + "learning_rate": 1.898324143427435e-05, + "loss": 1.073, "step": 6015 }, { - "epoch": 0.17071509648127128, + "epoch": 0.17047805265096772, "grad_norm": 0.0, - "learning_rate": 1.897962467440196e-05, - "loss": 1.0235, + "learning_rate": 1.8982838181069723e-05, + "loss": 1.047, "step": 6016 }, { - "epoch": 0.17074347332576617, + "epoch": 0.17050639009323018, "grad_norm": 0.0, - "learning_rate": 1.8979220175909408e-05, - "loss": 1.0656, + "learning_rate": 1.8982434852199316e-05, + "loss": 0.9219, "step": 6017 }, { - "epoch": 0.17077185017026106, + "epoch": 0.17053472753549265, "grad_norm": 0.0, - "learning_rate": 1.8978815601568975e-05, - "loss": 0.997, + "learning_rate": 1.8982031447666522e-05, + "loss": 1.047, "step": 6018 }, { - "epoch": 0.17080022701475595, + "epoch": 0.1705630649777551, "grad_norm": 0.0, - "learning_rate": 1.8978410951384084e-05, - "loss": 1.0907, + "learning_rate": 1.898162796747474e-05, + "loss": 1.0054, "step": 6019 }, { - "epoch": 0.17082860385925086, + "epoch": 0.17059140242001758, "grad_norm": 0.0, - "learning_rate": 1.8978006225358154e-05, - "loss": 1.0828, + "learning_rate": 1.8981224411627376e-05, + "loss": 0.9711, "step": 6020 }, { - "epoch": 0.17085698070374575, + "epoch": 0.17061973986228002, "grad_norm": 0.0, - "learning_rate": 1.89776014234946e-05, - "loss": 1.022, + "learning_rate": 1.8980820780127818e-05, + "loss": 0.9227, "step": 6021 }, { - "epoch": 0.17088535754824064, + "epoch": 0.17064807730454248, "grad_norm": 0.0, - "learning_rate": 1.8977196545796844e-05, - "loss": 1.0431, + "learning_rate": 1.8980417072979476e-05, + "loss": 0.9468, "step": 6022 }, { - "epoch": 0.17091373439273552, + "epoch": 0.17067641474680495, "grad_norm": 0.0, - "learning_rate": 1.8976791592268303e-05, - "loss": 1.0537, + "learning_rate": 1.8980013290185743e-05, + "loss": 1.1199, "step": 6023 }, { - "epoch": 0.1709421112372304, + "epoch": 0.17070475218906742, "grad_norm": 0.0, - "learning_rate": 1.89763865629124e-05, - "loss": 1.0791, + "learning_rate": 1.8979609431750025e-05, + "loss": 0.8233, "step": 6024 }, { - "epoch": 0.17097048808172532, + "epoch": 0.17073308963132988, "grad_norm": 0.0, - "learning_rate": 1.8975981457732557e-05, - "loss": 0.9958, + "learning_rate": 1.8979205497675722e-05, + "loss": 0.8857, "step": 6025 }, { - "epoch": 0.1709988649262202, + "epoch": 0.17076142707359235, "grad_norm": 0.0, - "learning_rate": 1.8975576276732196e-05, - "loss": 0.9428, + "learning_rate": 1.897880148796624e-05, + "loss": 1.0196, "step": 6026 }, { - "epoch": 0.1710272417707151, + "epoch": 0.1707897645158548, "grad_norm": 0.0, - "learning_rate": 1.897517101991474e-05, - "loss": 1.1288, + "learning_rate": 1.8978397402624975e-05, + "loss": 1.1489, "step": 6027 }, { - "epoch": 0.17105561861521, + "epoch": 0.17081810195811725, "grad_norm": 0.0, - "learning_rate": 1.897476568728361e-05, - "loss": 1.0599, + "learning_rate": 1.8977993241655333e-05, + "loss": 1.1172, "step": 6028 }, { - "epoch": 0.17108399545970487, + "epoch": 0.17084643940037972, "grad_norm": 0.0, - "learning_rate": 1.8974360278842226e-05, - "loss": 1.1563, + "learning_rate": 1.8977589005060723e-05, + "loss": 1.02, "step": 6029 }, { - "epoch": 0.17111237230419976, + "epoch": 0.17087477684264218, "grad_norm": 0.0, - "learning_rate": 1.8973954794594024e-05, - "loss": 1.0239, + "learning_rate": 1.897718469284455e-05, + "loss": 1.0946, "step": 6030 }, { - "epoch": 0.17114074914869468, + "epoch": 0.17090311428490465, "grad_norm": 0.0, - "learning_rate": 1.8973549234542418e-05, - "loss": 0.9961, + "learning_rate": 1.897678030501021e-05, + "loss": 0.9935, "step": 6031 }, { - "epoch": 0.17116912599318956, + "epoch": 0.17093145172716712, "grad_norm": 0.0, - "learning_rate": 1.8973143598690843e-05, - "loss": 1.0029, + "learning_rate": 1.897637584156112e-05, + "loss": 0.9959, "step": 6032 }, { - "epoch": 0.17119750283768445, + "epoch": 0.17095978916942955, "grad_norm": 0.0, - "learning_rate": 1.8972737887042715e-05, - "loss": 1.0745, + "learning_rate": 1.8975971302500683e-05, + "loss": 0.9927, "step": 6033 }, { - "epoch": 0.17122587968217934, + "epoch": 0.17098812661169202, "grad_norm": 0.0, - "learning_rate": 1.897233209960147e-05, - "loss": 1.0445, + "learning_rate": 1.8975566687832304e-05, + "loss": 1.0092, "step": 6034 }, { - "epoch": 0.17125425652667423, + "epoch": 0.1710164640539545, "grad_norm": 0.0, - "learning_rate": 1.8971926236370532e-05, - "loss": 1.0122, + "learning_rate": 1.8975161997559393e-05, + "loss": 1.0666, "step": 6035 }, { - "epoch": 0.1712826333711691, + "epoch": 0.17104480149621695, "grad_norm": 0.0, - "learning_rate": 1.8971520297353326e-05, - "loss": 1.097, + "learning_rate": 1.8974757231685367e-05, + "loss": 1.0859, "step": 6036 }, { - "epoch": 0.17131101021566403, + "epoch": 0.17107313893847942, "grad_norm": 0.0, - "learning_rate": 1.897111428255329e-05, - "loss": 1.0404, + "learning_rate": 1.8974352390213623e-05, + "loss": 0.9899, "step": 6037 }, { - "epoch": 0.17133938706015892, + "epoch": 0.17110147638074188, "grad_norm": 0.0, - "learning_rate": 1.8970708191973847e-05, - "loss": 1.1627, + "learning_rate": 1.8973947473147576e-05, + "loss": 1.014, "step": 6038 }, { - "epoch": 0.1713677639046538, + "epoch": 0.17112981382300432, "grad_norm": 0.0, - "learning_rate": 1.897030202561843e-05, - "loss": 1.0227, + "learning_rate": 1.8973542480490636e-05, + "loss": 0.9724, "step": 6039 }, { - "epoch": 0.1713961407491487, + "epoch": 0.1711581512652668, "grad_norm": 0.0, - "learning_rate": 1.8969895783490466e-05, - "loss": 0.9823, + "learning_rate": 1.897313741224622e-05, + "loss": 1.027, "step": 6040 }, { - "epoch": 0.17142451759364358, + "epoch": 0.17118648870752926, "grad_norm": 0.0, - "learning_rate": 1.8969489465593393e-05, - "loss": 0.9386, + "learning_rate": 1.8972732268417734e-05, + "loss": 1.0072, "step": 6041 }, { - "epoch": 0.1714528944381385, + "epoch": 0.17121482614979172, "grad_norm": 0.0, - "learning_rate": 1.8969083071930635e-05, - "loss": 1.0158, + "learning_rate": 1.897232704900859e-05, + "loss": 0.9926, "step": 6042 }, { - "epoch": 0.17148127128263338, + "epoch": 0.1712431635920542, "grad_norm": 0.0, - "learning_rate": 1.8968676602505626e-05, - "loss": 0.988, + "learning_rate": 1.8971921754022202e-05, + "loss": 1.1282, "step": 6043 }, { - "epoch": 0.17150964812712827, + "epoch": 0.17127150103431665, "grad_norm": 0.0, - "learning_rate": 1.896827005732181e-05, - "loss": 0.9766, + "learning_rate": 1.8971516383461986e-05, + "loss": 0.9774, "step": 6044 }, { - "epoch": 0.17153802497162315, + "epoch": 0.1712998384765791, "grad_norm": 0.0, - "learning_rate": 1.896786343638261e-05, - "loss": 1.0317, + "learning_rate": 1.897111093733136e-05, + "loss": 0.9272, "step": 6045 }, { - "epoch": 0.17156640181611804, + "epoch": 0.17132817591884156, "grad_norm": 0.0, - "learning_rate": 1.8967456739691464e-05, - "loss": 0.9296, + "learning_rate": 1.8970705415633732e-05, + "loss": 0.9738, "step": 6046 }, { - "epoch": 0.17159477866061293, + "epoch": 0.17135651336110402, "grad_norm": 0.0, - "learning_rate": 1.896704996725181e-05, - "loss": 0.9877, + "learning_rate": 1.8970299818372525e-05, + "loss": 1.0475, "step": 6047 }, { - "epoch": 0.17162315550510784, + "epoch": 0.1713848508033665, "grad_norm": 0.0, - "learning_rate": 1.896664311906708e-05, - "loss": 0.9673, + "learning_rate": 1.896989414555115e-05, + "loss": 1.0127, "step": 6048 }, { - "epoch": 0.17165153234960273, + "epoch": 0.17141318824562896, "grad_norm": 0.0, - "learning_rate": 1.8966236195140714e-05, - "loss": 1.1117, + "learning_rate": 1.8969488397173023e-05, + "loss": 1.1199, "step": 6049 }, { - "epoch": 0.17167990919409762, + "epoch": 0.17144152568789142, "grad_norm": 0.0, - "learning_rate": 1.8965829195476146e-05, - "loss": 0.9757, + "learning_rate": 1.8969082573241567e-05, + "loss": 0.9813, "step": 6050 }, { - "epoch": 0.1717082860385925, + "epoch": 0.17146986313015386, "grad_norm": 0.0, - "learning_rate": 1.8965422120076814e-05, - "loss": 0.9824, + "learning_rate": 1.8968676673760192e-05, + "loss": 1.0002, "step": 6051 }, { - "epoch": 0.1717366628830874, + "epoch": 0.17149820057241633, "grad_norm": 0.0, - "learning_rate": 1.8965014968946166e-05, - "loss": 0.9655, + "learning_rate": 1.896827069873233e-05, + "loss": 0.9487, "step": 6052 }, { - "epoch": 0.17176503972758228, + "epoch": 0.1715265380146788, "grad_norm": 0.0, - "learning_rate": 1.8964607742087626e-05, - "loss": 0.8744, + "learning_rate": 1.8967864648161392e-05, + "loss": 0.9831, "step": 6053 }, { - "epoch": 0.1717934165720772, + "epoch": 0.17155487545694126, "grad_norm": 0.0, - "learning_rate": 1.8964200439504644e-05, - "loss": 1.0587, + "learning_rate": 1.89674585220508e-05, + "loss": 0.8971, "step": 6054 }, { - "epoch": 0.17182179341657208, + "epoch": 0.17158321289920372, "grad_norm": 0.0, - "learning_rate": 1.896379306120066e-05, - "loss": 1.054, + "learning_rate": 1.8967052320403974e-05, + "loss": 1.0004, "step": 6055 }, { - "epoch": 0.17185017026106697, + "epoch": 0.1716115503414662, "grad_norm": 0.0, - "learning_rate": 1.896338560717911e-05, - "loss": 1.0566, + "learning_rate": 1.8966646043224333e-05, + "loss": 1.0225, "step": 6056 }, { - "epoch": 0.17187854710556186, + "epoch": 0.17163988778372863, "grad_norm": 0.0, - "learning_rate": 1.896297807744344e-05, - "loss": 1.0709, + "learning_rate": 1.896623969051531e-05, + "loss": 1.0205, "step": 6057 }, { - "epoch": 0.17190692395005674, + "epoch": 0.1716682252259911, "grad_norm": 0.0, - "learning_rate": 1.896257047199709e-05, - "loss": 0.9067, + "learning_rate": 1.8965833262280314e-05, + "loss": 1.0837, "step": 6058 }, { - "epoch": 0.17193530079455163, + "epoch": 0.17169656266825356, "grad_norm": 0.0, - "learning_rate": 1.8962162790843504e-05, - "loss": 0.9309, + "learning_rate": 1.896542675852278e-05, + "loss": 0.9576, "step": 6059 }, { - "epoch": 0.17196367763904655, + "epoch": 0.17172490011051603, "grad_norm": 0.0, - "learning_rate": 1.896175503398613e-05, - "loss": 1.0152, + "learning_rate": 1.8965020179246124e-05, + "loss": 1.0775, "step": 6060 }, { - "epoch": 0.17199205448354143, + "epoch": 0.1717532375527785, "grad_norm": 0.0, - "learning_rate": 1.8961347201428405e-05, - "loss": 0.9874, + "learning_rate": 1.8964613524453775e-05, + "loss": 1.1407, "step": 6061 }, { - "epoch": 0.17202043132803632, + "epoch": 0.17178157499504096, "grad_norm": 0.0, - "learning_rate": 1.8960939293173777e-05, - "loss": 0.9601, + "learning_rate": 1.8964206794149157e-05, + "loss": 1.0098, "step": 6062 }, { - "epoch": 0.1720488081725312, + "epoch": 0.1718099124373034, "grad_norm": 0.0, - "learning_rate": 1.8960531309225695e-05, - "loss": 1.0641, + "learning_rate": 1.89637999883357e-05, + "loss": 1.0668, "step": 6063 }, { - "epoch": 0.1720771850170261, + "epoch": 0.17183824987956586, "grad_norm": 0.0, - "learning_rate": 1.89601232495876e-05, - "loss": 0.9454, + "learning_rate": 1.8963393107016822e-05, + "loss": 1.0169, "step": 6064 }, { - "epoch": 0.172105561861521, + "epoch": 0.17186658732182833, "grad_norm": 0.0, - "learning_rate": 1.895971511426294e-05, - "loss": 0.9394, + "learning_rate": 1.8962986150195956e-05, + "loss": 1.0911, "step": 6065 }, { - "epoch": 0.1721339387060159, + "epoch": 0.1718949247640908, "grad_norm": 0.0, - "learning_rate": 1.8959306903255162e-05, - "loss": 0.9216, + "learning_rate": 1.8962579117876533e-05, + "loss": 1.118, "step": 6066 }, { - "epoch": 0.17216231555051079, + "epoch": 0.17192326220635326, "grad_norm": 0.0, - "learning_rate": 1.8958898616567724e-05, - "loss": 1.0641, + "learning_rate": 1.8962172010061975e-05, + "loss": 1.1144, "step": 6067 }, { - "epoch": 0.17219069239500567, + "epoch": 0.17195159964861573, "grad_norm": 0.0, - "learning_rate": 1.895849025420406e-05, - "loss": 1.1009, + "learning_rate": 1.8961764826755714e-05, + "loss": 1.1092, "step": 6068 }, { - "epoch": 0.17221906923950056, + "epoch": 0.17197993709087817, "grad_norm": 0.0, - "learning_rate": 1.8958081816167628e-05, - "loss": 1.0573, + "learning_rate": 1.8961357567961182e-05, + "loss": 1.0811, "step": 6069 }, { - "epoch": 0.17224744608399545, + "epoch": 0.17200827453314063, "grad_norm": 0.0, - "learning_rate": 1.8957673302461875e-05, - "loss": 1.0599, + "learning_rate": 1.8960950233681804e-05, + "loss": 1.1359, "step": 6070 }, { - "epoch": 0.17227582292849036, + "epoch": 0.1720366119754031, "grad_norm": 0.0, - "learning_rate": 1.8957264713090256e-05, - "loss": 1.1403, + "learning_rate": 1.8960542823921017e-05, + "loss": 0.9455, "step": 6071 }, { - "epoch": 0.17230419977298525, + "epoch": 0.17206494941766556, "grad_norm": 0.0, - "learning_rate": 1.8956856048056215e-05, - "loss": 0.9539, + "learning_rate": 1.8960135338682252e-05, + "loss": 1.0821, "step": 6072 }, { - "epoch": 0.17233257661748014, + "epoch": 0.17209328685992803, "grad_norm": 0.0, - "learning_rate": 1.895644730736321e-05, - "loss": 0.9782, + "learning_rate": 1.8959727777968938e-05, + "loss": 0.9236, "step": 6073 }, { - "epoch": 0.17236095346197502, + "epoch": 0.1721216243021905, "grad_norm": 0.0, - "learning_rate": 1.8956038491014693e-05, - "loss": 1.0813, + "learning_rate": 1.8959320141784508e-05, + "loss": 1.0208, "step": 6074 }, { - "epoch": 0.1723893303064699, + "epoch": 0.17214996174445293, "grad_norm": 0.0, - "learning_rate": 1.8955629599014116e-05, - "loss": 1.0124, + "learning_rate": 1.8958912430132403e-05, + "loss": 1.0169, "step": 6075 }, { - "epoch": 0.1724177071509648, + "epoch": 0.1721782991867154, "grad_norm": 0.0, - "learning_rate": 1.895522063136493e-05, - "loss": 1.0657, + "learning_rate": 1.8958504643016045e-05, + "loss": 0.9544, "step": 6076 }, { - "epoch": 0.17244608399545971, + "epoch": 0.17220663662897787, "grad_norm": 0.0, - "learning_rate": 1.8954811588070598e-05, - "loss": 1.1488, + "learning_rate": 1.8958096780438878e-05, + "loss": 0.9562, "step": 6077 }, { - "epoch": 0.1724744608399546, + "epoch": 0.17223497407124033, "grad_norm": 0.0, - "learning_rate": 1.8954402469134566e-05, - "loss": 1.1279, + "learning_rate": 1.895768884240434e-05, + "loss": 1.0867, "step": 6078 }, { - "epoch": 0.1725028376844495, + "epoch": 0.1722633115135028, "grad_norm": 0.0, - "learning_rate": 1.8953993274560298e-05, - "loss": 1.0081, + "learning_rate": 1.8957280828915855e-05, + "loss": 0.9927, "step": 6079 }, { - "epoch": 0.17253121452894438, + "epoch": 0.17229164895576526, "grad_norm": 0.0, - "learning_rate": 1.8953584004351243e-05, - "loss": 0.9474, + "learning_rate": 1.895687273997687e-05, + "loss": 0.9833, "step": 6080 }, { - "epoch": 0.17255959137343926, + "epoch": 0.1723199863980277, "grad_norm": 0.0, - "learning_rate": 1.8953174658510863e-05, - "loss": 1.0272, + "learning_rate": 1.8956464575590823e-05, + "loss": 1.0132, "step": 6081 }, { - "epoch": 0.17258796821793418, + "epoch": 0.17234832384029017, "grad_norm": 0.0, - "learning_rate": 1.895276523704261e-05, - "loss": 1.0277, + "learning_rate": 1.8956056335761146e-05, + "loss": 0.9624, "step": 6082 }, { - "epoch": 0.17261634506242907, + "epoch": 0.17237666128255263, "grad_norm": 0.0, - "learning_rate": 1.8952355739949953e-05, - "loss": 0.9905, + "learning_rate": 1.8955648020491282e-05, + "loss": 1.0638, "step": 6083 }, { - "epoch": 0.17264472190692395, + "epoch": 0.1724049987248151, "grad_norm": 0.0, - "learning_rate": 1.8951946167236342e-05, - "loss": 1.051, + "learning_rate": 1.8955239629784667e-05, + "loss": 1.1519, "step": 6084 }, { - "epoch": 0.17267309875141884, + "epoch": 0.17243333616707757, "grad_norm": 0.0, - "learning_rate": 1.895153651890524e-05, - "loss": 0.9937, + "learning_rate": 1.8954831163644748e-05, + "loss": 1.0014, "step": 6085 }, { - "epoch": 0.17270147559591373, + "epoch": 0.17246167360934003, "grad_norm": 0.0, - "learning_rate": 1.8951126794960103e-05, - "loss": 1.0082, + "learning_rate": 1.8954422622074955e-05, + "loss": 1.0658, "step": 6086 }, { - "epoch": 0.17272985244040862, + "epoch": 0.17249001105160247, "grad_norm": 0.0, - "learning_rate": 1.8950716995404397e-05, - "loss": 0.837, + "learning_rate": 1.895401400507874e-05, + "loss": 1.0819, "step": 6087 }, { - "epoch": 0.17275822928490353, + "epoch": 0.17251834849386494, "grad_norm": 0.0, - "learning_rate": 1.8950307120241585e-05, - "loss": 0.9932, + "learning_rate": 1.8953605312659537e-05, + "loss": 0.9287, "step": 6088 }, { - "epoch": 0.17278660612939842, + "epoch": 0.1725466859361274, "grad_norm": 0.0, - "learning_rate": 1.8949897169475126e-05, - "loss": 1.0025, + "learning_rate": 1.895319654482079e-05, + "loss": 1.0555, "step": 6089 }, { - "epoch": 0.1728149829738933, + "epoch": 0.17257502337838987, "grad_norm": 0.0, - "learning_rate": 1.894948714310848e-05, - "loss": 0.9431, + "learning_rate": 1.895278770156595e-05, + "loss": 0.9366, "step": 6090 }, { - "epoch": 0.1728433598183882, + "epoch": 0.17260336082065234, "grad_norm": 0.0, - "learning_rate": 1.8949077041145115e-05, - "loss": 1.0247, + "learning_rate": 1.895237878289845e-05, + "loss": 1.042, "step": 6091 }, { - "epoch": 0.17287173666288308, + "epoch": 0.1726316982629148, "grad_norm": 0.0, - "learning_rate": 1.8948666863588495e-05, - "loss": 1.0205, + "learning_rate": 1.895196978882174e-05, + "loss": 0.9603, "step": 6092 }, { - "epoch": 0.17290011350737797, + "epoch": 0.17266003570517724, "grad_norm": 0.0, - "learning_rate": 1.8948256610442085e-05, - "loss": 0.9798, + "learning_rate": 1.8951560719339265e-05, + "loss": 0.9583, "step": 6093 }, { - "epoch": 0.17292849035187288, + "epoch": 0.1726883731474397, "grad_norm": 0.0, - "learning_rate": 1.8947846281709346e-05, - "loss": 0.9106, + "learning_rate": 1.8951151574454467e-05, + "loss": 1.1373, "step": 6094 }, { - "epoch": 0.17295686719636777, + "epoch": 0.17271671058970217, "grad_norm": 0.0, - "learning_rate": 1.894743587739375e-05, - "loss": 1.036, + "learning_rate": 1.89507423541708e-05, + "loss": 1.0441, "step": 6095 }, { - "epoch": 0.17298524404086266, + "epoch": 0.17274504803196464, "grad_norm": 0.0, - "learning_rate": 1.8947025397498757e-05, - "loss": 0.9513, + "learning_rate": 1.8950333058491702e-05, + "loss": 0.9899, "step": 6096 }, { - "epoch": 0.17301362088535754, + "epoch": 0.1727733854742271, "grad_norm": 0.0, - "learning_rate": 1.8946614842027844e-05, - "loss": 0.8803, + "learning_rate": 1.8949923687420626e-05, + "loss": 0.9349, "step": 6097 }, { - "epoch": 0.17304199772985243, + "epoch": 0.17280172291648957, "grad_norm": 0.0, - "learning_rate": 1.8946204210984468e-05, - "loss": 1.0052, + "learning_rate": 1.8949514240961023e-05, + "loss": 0.9711, "step": 6098 }, { - "epoch": 0.17307037457434732, + "epoch": 0.172830060358752, "grad_norm": 0.0, - "learning_rate": 1.8945793504372106e-05, - "loss": 0.8879, + "learning_rate": 1.8949104719116334e-05, + "loss": 0.9083, "step": 6099 }, { - "epoch": 0.17309875141884223, + "epoch": 0.17285839780101447, "grad_norm": 0.0, - "learning_rate": 1.8945382722194224e-05, - "loss": 1.0776, + "learning_rate": 1.8948695121890016e-05, + "loss": 1.0797, "step": 6100 }, { - "epoch": 0.17312712826333712, + "epoch": 0.17288673524327694, "grad_norm": 0.0, - "learning_rate": 1.894497186445429e-05, - "loss": 0.9666, + "learning_rate": 1.894828544928551e-05, + "loss": 1.0997, "step": 6101 }, { - "epoch": 0.173155505107832, + "epoch": 0.1729150726855394, "grad_norm": 0.0, - "learning_rate": 1.894456093115578e-05, - "loss": 0.9585, + "learning_rate": 1.894787570130628e-05, + "loss": 1.0773, "step": 6102 }, { - "epoch": 0.1731838819523269, + "epoch": 0.17294341012780187, "grad_norm": 0.0, - "learning_rate": 1.8944149922302156e-05, - "loss": 1.0223, + "learning_rate": 1.8947465877955767e-05, + "loss": 1.1133, "step": 6103 }, { - "epoch": 0.17321225879682178, + "epoch": 0.17297174757006434, "grad_norm": 0.0, - "learning_rate": 1.89437388378969e-05, - "loss": 1.0061, + "learning_rate": 1.8947055979237427e-05, + "loss": 1.0107, "step": 6104 }, { - "epoch": 0.1732406356413167, + "epoch": 0.17300008501232678, "grad_norm": 0.0, - "learning_rate": 1.8943327677943483e-05, - "loss": 1.0687, + "learning_rate": 1.8946646005154712e-05, + "loss": 1.0187, "step": 6105 }, { - "epoch": 0.17326901248581159, + "epoch": 0.17302842245458924, "grad_norm": 0.0, - "learning_rate": 1.8942916442445368e-05, - "loss": 0.9596, + "learning_rate": 1.8946235955711073e-05, + "loss": 1.0373, "step": 6106 }, { - "epoch": 0.17329738933030647, + "epoch": 0.1730567598968517, "grad_norm": 0.0, - "learning_rate": 1.894250513140604e-05, - "loss": 1.0085, + "learning_rate": 1.8945825830909972e-05, + "loss": 0.9565, "step": 6107 }, { - "epoch": 0.17332576617480136, + "epoch": 0.17308509733911417, "grad_norm": 0.0, - "learning_rate": 1.894209374482897e-05, - "loss": 0.8933, + "learning_rate": 1.8945415630754852e-05, + "loss": 1.0325, "step": 6108 }, { - "epoch": 0.17335414301929625, + "epoch": 0.17311343478137664, "grad_norm": 0.0, - "learning_rate": 1.8941682282717625e-05, - "loss": 1.0448, + "learning_rate": 1.894500535524918e-05, + "loss": 1.045, "step": 6109 }, { - "epoch": 0.17338251986379113, + "epoch": 0.1731417722236391, "grad_norm": 0.0, - "learning_rate": 1.89412707450755e-05, - "loss": 0.9727, + "learning_rate": 1.8944595004396404e-05, + "loss": 0.9997, "step": 6110 }, { - "epoch": 0.17341089670828605, + "epoch": 0.17317010966590154, "grad_norm": 0.0, - "learning_rate": 1.8940859131906047e-05, - "loss": 0.9091, + "learning_rate": 1.894418457819998e-05, + "loss": 0.9861, "step": 6111 }, { - "epoch": 0.17343927355278094, + "epoch": 0.173198447108164, "grad_norm": 0.0, - "learning_rate": 1.894044744321276e-05, - "loss": 1.0644, + "learning_rate": 1.8943774076663372e-05, + "loss": 1.0098, "step": 6112 }, { - "epoch": 0.17346765039727582, + "epoch": 0.17322678455042648, "grad_norm": 0.0, - "learning_rate": 1.8940035678999114e-05, - "loss": 0.9865, + "learning_rate": 1.894336349979003e-05, + "loss": 0.9706, "step": 6113 }, { - "epoch": 0.1734960272417707, + "epoch": 0.17325512199268894, "grad_norm": 0.0, - "learning_rate": 1.8939623839268583e-05, - "loss": 0.9559, + "learning_rate": 1.8942952847583417e-05, + "loss": 0.9655, "step": 6114 }, { - "epoch": 0.1735244040862656, + "epoch": 0.1732834594349514, "grad_norm": 0.0, - "learning_rate": 1.8939211924024644e-05, - "loss": 0.9358, + "learning_rate": 1.8942542120046993e-05, + "loss": 1.0446, "step": 6115 }, { - "epoch": 0.1735527809307605, + "epoch": 0.17331179687721388, "grad_norm": 0.0, - "learning_rate": 1.8938799933270784e-05, - "loss": 0.9356, + "learning_rate": 1.8942131317184214e-05, + "loss": 0.9198, "step": 6116 }, { - "epoch": 0.1735811577752554, + "epoch": 0.1733401343194763, "grad_norm": 0.0, - "learning_rate": 1.8938387867010476e-05, - "loss": 1.0628, + "learning_rate": 1.8941720438998545e-05, + "loss": 0.9947, "step": 6117 }, { - "epoch": 0.1736095346197503, + "epoch": 0.17336847176173878, "grad_norm": 0.0, - "learning_rate": 1.893797572524721e-05, - "loss": 1.0325, + "learning_rate": 1.8941309485493443e-05, + "loss": 1.1242, "step": 6118 }, { - "epoch": 0.17363791146424518, + "epoch": 0.17339680920400125, "grad_norm": 0.0, - "learning_rate": 1.8937563507984454e-05, - "loss": 1.0102, + "learning_rate": 1.894089845667237e-05, + "loss": 0.9945, "step": 6119 }, { - "epoch": 0.17366628830874006, + "epoch": 0.1734251466462637, "grad_norm": 0.0, - "learning_rate": 1.8937151215225703e-05, - "loss": 1.1099, + "learning_rate": 1.894048735253879e-05, + "loss": 1.0777, "step": 6120 }, { - "epoch": 0.17369466515323495, + "epoch": 0.17345348408852618, "grad_norm": 0.0, - "learning_rate": 1.8936738846974433e-05, - "loss": 1.0567, + "learning_rate": 1.8940076173096166e-05, + "loss": 0.9564, "step": 6121 }, { - "epoch": 0.17372304199772987, + "epoch": 0.17348182153078864, "grad_norm": 0.0, - "learning_rate": 1.8936326403234125e-05, - "loss": 1.0084, + "learning_rate": 1.893966491834796e-05, + "loss": 1.0689, "step": 6122 }, { - "epoch": 0.17375141884222475, + "epoch": 0.17351015897305108, "grad_norm": 0.0, - "learning_rate": 1.8935913884008265e-05, - "loss": 1.0388, + "learning_rate": 1.8939253588297638e-05, + "loss": 1.0034, "step": 6123 }, { - "epoch": 0.17377979568671964, + "epoch": 0.17353849641531355, "grad_norm": 0.0, - "learning_rate": 1.8935501289300344e-05, - "loss": 1.0626, + "learning_rate": 1.893884218294866e-05, + "loss": 1.0168, "step": 6124 }, { - "epoch": 0.17380817253121453, + "epoch": 0.17356683385757601, "grad_norm": 0.0, - "learning_rate": 1.893508861911384e-05, - "loss": 0.9721, + "learning_rate": 1.8938430702304497e-05, + "loss": 0.96, "step": 6125 }, { - "epoch": 0.17383654937570941, + "epoch": 0.17359517129983848, "grad_norm": 0.0, - "learning_rate": 1.893467587345224e-05, - "loss": 0.8867, + "learning_rate": 1.8938019146368614e-05, + "loss": 1.0489, "step": 6126 }, { - "epoch": 0.1738649262202043, + "epoch": 0.17362350874210095, "grad_norm": 0.0, - "learning_rate": 1.893426305231903e-05, - "loss": 0.8752, + "learning_rate": 1.8937607515144475e-05, + "loss": 1.0078, "step": 6127 }, { - "epoch": 0.17389330306469922, + "epoch": 0.1736518461843634, "grad_norm": 0.0, - "learning_rate": 1.8933850155717702e-05, - "loss": 0.968, + "learning_rate": 1.893719580863555e-05, + "loss": 0.9184, "step": 6128 }, { - "epoch": 0.1739216799091941, + "epoch": 0.17368018362662585, "grad_norm": 0.0, - "learning_rate": 1.8933437183651737e-05, - "loss": 1.1057, + "learning_rate": 1.8936784026845304e-05, + "loss": 1.0539, "step": 6129 }, { - "epoch": 0.173950056753689, + "epoch": 0.17370852106888832, "grad_norm": 0.0, - "learning_rate": 1.893302413612463e-05, - "loss": 1.0276, + "learning_rate": 1.8936372169777208e-05, + "loss": 0.9832, "step": 6130 }, { - "epoch": 0.17397843359818388, + "epoch": 0.17373685851115078, "grad_norm": 0.0, - "learning_rate": 1.8932611013139862e-05, - "loss": 1.0238, + "learning_rate": 1.8935960237434733e-05, + "loss": 1.1805, "step": 6131 }, { - "epoch": 0.17400681044267877, + "epoch": 0.17376519595341325, "grad_norm": 0.0, - "learning_rate": 1.893219781470093e-05, - "loss": 0.9627, + "learning_rate": 1.8935548229821346e-05, + "loss": 1.1436, "step": 6132 }, { - "epoch": 0.17403518728717365, + "epoch": 0.17379353339567571, "grad_norm": 0.0, - "learning_rate": 1.893178454081132e-05, - "loss": 1.0583, + "learning_rate": 1.8935136146940517e-05, + "loss": 1.0745, "step": 6133 }, { - "epoch": 0.17406356413166857, + "epoch": 0.17382187083793818, "grad_norm": 0.0, - "learning_rate": 1.8931371191474526e-05, - "loss": 1.0414, + "learning_rate": 1.893472398879572e-05, + "loss": 1.0245, "step": 6134 }, { - "epoch": 0.17409194097616346, + "epoch": 0.17385020828020062, "grad_norm": 0.0, - "learning_rate": 1.893095776669404e-05, - "loss": 1.066, + "learning_rate": 1.8934311755390423e-05, + "loss": 1.0196, "step": 6135 }, { - "epoch": 0.17412031782065834, + "epoch": 0.17387854572246308, "grad_norm": 0.0, - "learning_rate": 1.8930544266473348e-05, - "loss": 1.0276, + "learning_rate": 1.8933899446728103e-05, + "loss": 0.883, "step": 6136 }, { - "epoch": 0.17414869466515323, + "epoch": 0.17390688316472555, "grad_norm": 0.0, - "learning_rate": 1.8930130690815947e-05, - "loss": 1.0654, + "learning_rate": 1.8933487062812225e-05, + "loss": 0.9399, "step": 6137 }, { - "epoch": 0.17417707150964812, + "epoch": 0.17393522060698802, "grad_norm": 0.0, - "learning_rate": 1.8929717039725333e-05, - "loss": 1.0956, + "learning_rate": 1.8933074603646275e-05, + "loss": 0.9329, "step": 6138 }, { - "epoch": 0.174205448354143, + "epoch": 0.17396355804925048, "grad_norm": 0.0, - "learning_rate": 1.8929303313204998e-05, - "loss": 1.0548, + "learning_rate": 1.8932662069233717e-05, + "loss": 0.8999, "step": 6139 }, { - "epoch": 0.17423382519863792, + "epoch": 0.17399189549151295, "grad_norm": 0.0, - "learning_rate": 1.8928889511258432e-05, - "loss": 0.9446, + "learning_rate": 1.893224945957803e-05, + "loss": 0.9375, "step": 6140 }, { - "epoch": 0.1742622020431328, + "epoch": 0.1740202329337754, "grad_norm": 0.0, - "learning_rate": 1.892847563388914e-05, - "loss": 1.069, + "learning_rate": 1.893183677468269e-05, + "loss": 1.0278, "step": 6141 }, { - "epoch": 0.1742905788876277, + "epoch": 0.17404857037603785, "grad_norm": 0.0, - "learning_rate": 1.8928061681100615e-05, - "loss": 1.0217, + "learning_rate": 1.8931424014551168e-05, + "loss": 0.9402, "step": 6142 }, { - "epoch": 0.17431895573212258, + "epoch": 0.17407690781830032, "grad_norm": 0.0, - "learning_rate": 1.8927647652896347e-05, - "loss": 0.9617, + "learning_rate": 1.8931011179186946e-05, + "loss": 1.0562, "step": 6143 }, { - "epoch": 0.17434733257661747, + "epoch": 0.17410524526056279, "grad_norm": 0.0, - "learning_rate": 1.8927233549279837e-05, - "loss": 0.9677, + "learning_rate": 1.8930598268593503e-05, + "loss": 1.184, "step": 6144 }, { - "epoch": 0.17437570942111238, + "epoch": 0.17413358270282525, "grad_norm": 0.0, - "learning_rate": 1.8926819370254586e-05, - "loss": 1.0262, + "learning_rate": 1.8930185282774315e-05, + "loss": 1.0266, "step": 6145 }, { - "epoch": 0.17440408626560727, + "epoch": 0.17416192014508772, "grad_norm": 0.0, - "learning_rate": 1.892640511582409e-05, - "loss": 1.0459, + "learning_rate": 1.8929772221732856e-05, + "loss": 0.8712, "step": 6146 }, { - "epoch": 0.17443246311010216, + "epoch": 0.17419025758735016, "grad_norm": 0.0, - "learning_rate": 1.892599078599185e-05, - "loss": 1.0418, + "learning_rate": 1.892935908547261e-05, + "loss": 1.0506, "step": 6147 }, { - "epoch": 0.17446083995459705, + "epoch": 0.17421859502961262, "grad_norm": 0.0, - "learning_rate": 1.8925576380761366e-05, - "loss": 1.0609, + "learning_rate": 1.892894587399706e-05, + "loss": 1.1258, "step": 6148 }, { - "epoch": 0.17448921679909193, + "epoch": 0.1742469324718751, "grad_norm": 0.0, - "learning_rate": 1.8925161900136133e-05, - "loss": 1.1014, + "learning_rate": 1.8928532587309682e-05, + "loss": 0.9807, "step": 6149 }, { - "epoch": 0.17451759364358682, + "epoch": 0.17427526991413755, "grad_norm": 0.0, - "learning_rate": 1.8924747344119658e-05, - "loss": 0.9979, + "learning_rate": 1.8928119225413958e-05, + "loss": 0.8546, "step": 6150 }, { - "epoch": 0.17454597048808174, + "epoch": 0.17430360735640002, "grad_norm": 0.0, - "learning_rate": 1.892433271271544e-05, - "loss": 1.0802, + "learning_rate": 1.8927705788313373e-05, + "loss": 1.0531, "step": 6151 }, { - "epoch": 0.17457434733257662, + "epoch": 0.17433194479866249, "grad_norm": 0.0, - "learning_rate": 1.8923918005926984e-05, - "loss": 0.9784, + "learning_rate": 1.8927292276011404e-05, + "loss": 0.9689, "step": 6152 }, { - "epoch": 0.1746027241770715, + "epoch": 0.17436028224092492, "grad_norm": 0.0, - "learning_rate": 1.892350322375779e-05, - "loss": 1.0888, + "learning_rate": 1.8926878688511537e-05, + "loss": 1.0637, "step": 6153 }, { - "epoch": 0.1746311010215664, + "epoch": 0.1743886196831874, "grad_norm": 0.0, - "learning_rate": 1.8923088366211367e-05, - "loss": 0.9413, + "learning_rate": 1.892646502581726e-05, + "loss": 1.0099, "step": 6154 }, { - "epoch": 0.17465947786606129, + "epoch": 0.17441695712544986, "grad_norm": 0.0, - "learning_rate": 1.892267343329121e-05, - "loss": 0.8551, + "learning_rate": 1.892605128793205e-05, + "loss": 0.9724, "step": 6155 }, { - "epoch": 0.17468785471055617, + "epoch": 0.17444529456771232, "grad_norm": 0.0, - "learning_rate": 1.8922258425000833e-05, - "loss": 1.0883, + "learning_rate": 1.8925637474859394e-05, + "loss": 1.1151, "step": 6156 }, { - "epoch": 0.1747162315550511, + "epoch": 0.1744736320099748, "grad_norm": 0.0, - "learning_rate": 1.8921843341343737e-05, - "loss": 1.0022, + "learning_rate": 1.892522358660278e-05, + "loss": 1.012, "step": 6157 }, { - "epoch": 0.17474460839954598, + "epoch": 0.17450196945223725, "grad_norm": 0.0, - "learning_rate": 1.892142818232343e-05, - "loss": 1.0099, + "learning_rate": 1.8924809623165694e-05, + "loss": 0.9341, "step": 6158 }, { - "epoch": 0.17477298524404086, + "epoch": 0.1745303068944997, "grad_norm": 0.0, - "learning_rate": 1.8921012947943418e-05, - "loss": 0.8884, + "learning_rate": 1.8924395584551624e-05, + "loss": 1.0812, "step": 6159 }, { - "epoch": 0.17480136208853575, + "epoch": 0.17455864433676216, "grad_norm": 0.0, - "learning_rate": 1.8920597638207208e-05, - "loss": 1.0276, + "learning_rate": 1.892398147076405e-05, + "loss": 1.0784, "step": 6160 }, { - "epoch": 0.17482973893303064, + "epoch": 0.17458698177902462, "grad_norm": 0.0, - "learning_rate": 1.892018225311831e-05, - "loss": 1.042, + "learning_rate": 1.8923567281806475e-05, + "loss": 1.0951, "step": 6161 }, { - "epoch": 0.17485811577752555, + "epoch": 0.1746153192212871, "grad_norm": 0.0, - "learning_rate": 1.8919766792680227e-05, - "loss": 1.004, + "learning_rate": 1.8923153017682372e-05, + "loss": 0.9915, "step": 6162 }, { - "epoch": 0.17488649262202044, + "epoch": 0.17464365666354956, "grad_norm": 0.0, - "learning_rate": 1.8919351256896477e-05, - "loss": 1.0297, + "learning_rate": 1.892273867839524e-05, + "loss": 1.0015, "step": 6163 }, { - "epoch": 0.17491486946651533, + "epoch": 0.17467199410581202, "grad_norm": 0.0, - "learning_rate": 1.8918935645770565e-05, - "loss": 1.146, + "learning_rate": 1.8922324263948567e-05, + "loss": 1.1499, "step": 6164 }, { - "epoch": 0.17494324631101021, + "epoch": 0.17470033154807446, "grad_norm": 0.0, - "learning_rate": 1.8918519959306003e-05, - "loss": 1.0668, + "learning_rate": 1.8921909774345842e-05, + "loss": 1.0958, "step": 6165 }, { - "epoch": 0.1749716231555051, + "epoch": 0.17472866899033693, "grad_norm": 0.0, - "learning_rate": 1.89181041975063e-05, - "loss": 0.963, + "learning_rate": 1.8921495209590562e-05, + "loss": 0.8745, "step": 6166 }, { - "epoch": 0.175, + "epoch": 0.1747570064325994, "grad_norm": 0.0, - "learning_rate": 1.891768836037497e-05, - "loss": 1.0323, + "learning_rate": 1.892108056968621e-05, + "loss": 0.9745, "step": 6167 }, { - "epoch": 0.1750283768444949, + "epoch": 0.17478534387486186, "grad_norm": 0.0, - "learning_rate": 1.8917272447915528e-05, - "loss": 0.9962, + "learning_rate": 1.8920665854636285e-05, + "loss": 1.0028, "step": 6168 }, { - "epoch": 0.1750567536889898, + "epoch": 0.17481368131712433, "grad_norm": 0.0, - "learning_rate": 1.8916856460131487e-05, - "loss": 0.9892, + "learning_rate": 1.8920251064444284e-05, + "loss": 1.0307, "step": 6169 }, { - "epoch": 0.17508513053348468, + "epoch": 0.1748420187593868, "grad_norm": 0.0, - "learning_rate": 1.8916440397026355e-05, - "loss": 1.0098, + "learning_rate": 1.891983619911369e-05, + "loss": 1.0023, "step": 6170 }, { - "epoch": 0.17511350737797957, + "epoch": 0.17487035620164923, "grad_norm": 0.0, - "learning_rate": 1.891602425860365e-05, - "loss": 0.9894, + "learning_rate": 1.8919421258648007e-05, + "loss": 0.9633, "step": 6171 }, { - "epoch": 0.17514188422247445, + "epoch": 0.1748986936439117, "grad_norm": 0.0, - "learning_rate": 1.8915608044866885e-05, - "loss": 0.9725, + "learning_rate": 1.8919006243050723e-05, + "loss": 1.0524, "step": 6172 }, { - "epoch": 0.17517026106696934, + "epoch": 0.17492703108617416, "grad_norm": 0.0, - "learning_rate": 1.8915191755819584e-05, - "loss": 0.9588, + "learning_rate": 1.891859115232534e-05, + "loss": 0.9257, "step": 6173 }, { - "epoch": 0.17519863791146426, + "epoch": 0.17495536852843663, "grad_norm": 0.0, - "learning_rate": 1.8914775391465254e-05, - "loss": 0.9913, + "learning_rate": 1.891817598647535e-05, + "loss": 1.0338, "step": 6174 }, { - "epoch": 0.17522701475595914, + "epoch": 0.1749837059706991, "grad_norm": 0.0, - "learning_rate": 1.8914358951807418e-05, - "loss": 0.9375, + "learning_rate": 1.8917760745504252e-05, + "loss": 1.0754, "step": 6175 }, { - "epoch": 0.17525539160045403, + "epoch": 0.17501204341296156, "grad_norm": 0.0, - "learning_rate": 1.891394243684959e-05, - "loss": 1.004, + "learning_rate": 1.8917345429415546e-05, + "loss": 1.0795, "step": 6176 }, { - "epoch": 0.17528376844494892, + "epoch": 0.175040380855224, "grad_norm": 0.0, - "learning_rate": 1.8913525846595285e-05, - "loss": 1.1022, + "learning_rate": 1.8916930038212726e-05, + "loss": 0.9788, "step": 6177 }, { - "epoch": 0.1753121452894438, + "epoch": 0.17506871829748646, "grad_norm": 0.0, - "learning_rate": 1.8913109181048032e-05, - "loss": 1.015, + "learning_rate": 1.8916514571899295e-05, + "loss": 1.1468, "step": 6178 }, { - "epoch": 0.17534052213393872, + "epoch": 0.17509705573974893, "grad_norm": 0.0, - "learning_rate": 1.8912692440211345e-05, - "loss": 1.0165, + "learning_rate": 1.8916099030478747e-05, + "loss": 0.9597, "step": 6179 }, { - "epoch": 0.1753688989784336, + "epoch": 0.1751253931820114, "grad_norm": 0.0, - "learning_rate": 1.891227562408874e-05, - "loss": 1.0199, + "learning_rate": 1.8915683413954592e-05, + "loss": 1.1072, "step": 6180 }, { - "epoch": 0.1753972758229285, + "epoch": 0.17515373062427386, "grad_norm": 0.0, - "learning_rate": 1.8911858732683746e-05, - "loss": 1.0703, + "learning_rate": 1.891526772233032e-05, + "loss": 1.0195, "step": 6181 }, { - "epoch": 0.17542565266742338, + "epoch": 0.1751820680665363, "grad_norm": 0.0, - "learning_rate": 1.891144176599988e-05, - "loss": 1.0002, + "learning_rate": 1.891485195560944e-05, + "loss": 0.9778, "step": 6182 }, { - "epoch": 0.17545402951191827, + "epoch": 0.17521040550879877, "grad_norm": 0.0, - "learning_rate": 1.8911024724040665e-05, - "loss": 0.9532, + "learning_rate": 1.891443611379545e-05, + "loss": 1.1291, "step": 6183 }, { - "epoch": 0.17548240635641316, + "epoch": 0.17523874295106123, "grad_norm": 0.0, - "learning_rate": 1.891060760680962e-05, - "loss": 1.1691, + "learning_rate": 1.8914020196891853e-05, + "loss": 1.0578, "step": 6184 }, { - "epoch": 0.17551078320090807, + "epoch": 0.1752670803933237, "grad_norm": 0.0, - "learning_rate": 1.8910190414310276e-05, - "loss": 1.0064, + "learning_rate": 1.8913604204902155e-05, + "loss": 0.9157, "step": 6185 }, { - "epoch": 0.17553916004540296, + "epoch": 0.17529541783558616, "grad_norm": 0.0, - "learning_rate": 1.8909773146546152e-05, - "loss": 0.9478, + "learning_rate": 1.891318813782986e-05, + "loss": 1.0634, "step": 6186 }, { - "epoch": 0.17556753688989785, + "epoch": 0.17532375527784863, "grad_norm": 0.0, - "learning_rate": 1.8909355803520776e-05, - "loss": 0.9826, + "learning_rate": 1.8912771995678468e-05, + "loss": 1.0522, "step": 6187 }, { - "epoch": 0.17559591373439273, + "epoch": 0.17535209272011107, "grad_norm": 0.0, - "learning_rate": 1.8908938385237667e-05, - "loss": 1.0161, + "learning_rate": 1.8912355778451494e-05, + "loss": 0.9561, "step": 6188 }, { - "epoch": 0.17562429057888762, + "epoch": 0.17538043016237354, "grad_norm": 0.0, - "learning_rate": 1.8908520891700357e-05, - "loss": 0.9844, + "learning_rate": 1.8911939486152433e-05, + "loss": 1.0065, "step": 6189 }, { - "epoch": 0.1756526674233825, + "epoch": 0.175408767604636, "grad_norm": 0.0, - "learning_rate": 1.890810332291237e-05, - "loss": 1.0104, + "learning_rate": 1.8911523118784797e-05, + "loss": 1.0782, "step": 6190 }, { - "epoch": 0.17568104426787742, + "epoch": 0.17543710504689847, "grad_norm": 0.0, - "learning_rate": 1.8907685678877233e-05, - "loss": 1.0316, + "learning_rate": 1.8911106676352094e-05, + "loss": 1.0451, "step": 6191 }, { - "epoch": 0.1757094211123723, + "epoch": 0.17546544248916093, "grad_norm": 0.0, - "learning_rate": 1.8907267959598473e-05, - "loss": 1.0681, + "learning_rate": 1.891069015885783e-05, + "loss": 0.8689, "step": 6192 }, { - "epoch": 0.1757377979568672, + "epoch": 0.1754937799314234, "grad_norm": 0.0, - "learning_rate": 1.8906850165079627e-05, - "loss": 1.0576, + "learning_rate": 1.8910273566305514e-05, + "loss": 0.9781, "step": 6193 }, { - "epoch": 0.17576617480136209, + "epoch": 0.17552211737368584, "grad_norm": 0.0, - "learning_rate": 1.890643229532421e-05, - "loss": 1.073, + "learning_rate": 1.890985689869865e-05, + "loss": 1.0224, "step": 6194 }, { - "epoch": 0.17579455164585697, + "epoch": 0.1755504548159483, "grad_norm": 0.0, - "learning_rate": 1.890601435033576e-05, - "loss": 1.0051, + "learning_rate": 1.890944015604076e-05, + "loss": 1.0384, "step": 6195 }, { - "epoch": 0.17582292849035186, + "epoch": 0.17557879225821077, "grad_norm": 0.0, - "learning_rate": 1.890559633011781e-05, - "loss": 0.9583, + "learning_rate": 1.8909023338335345e-05, + "loss": 0.9748, "step": 6196 }, { - "epoch": 0.17585130533484677, + "epoch": 0.17560712970047324, "grad_norm": 0.0, - "learning_rate": 1.8905178234673886e-05, - "loss": 0.8911, + "learning_rate": 1.8908606445585914e-05, + "loss": 0.9293, "step": 6197 }, { - "epoch": 0.17587968217934166, + "epoch": 0.1756354671427357, "grad_norm": 0.0, - "learning_rate": 1.890476006400752e-05, - "loss": 0.9493, + "learning_rate": 1.8908189477795988e-05, + "loss": 0.9414, "step": 6198 }, { - "epoch": 0.17590805902383655, + "epoch": 0.17566380458499817, "grad_norm": 0.0, - "learning_rate": 1.890434181812225e-05, - "loss": 1.0546, + "learning_rate": 1.8907772434969073e-05, + "loss": 0.9461, "step": 6199 }, { - "epoch": 0.17593643586833144, + "epoch": 0.1756921420272606, "grad_norm": 0.0, - "learning_rate": 1.89039234970216e-05, - "loss": 1.0072, + "learning_rate": 1.8907355317108683e-05, + "loss": 1.0424, "step": 6200 }, { - "epoch": 0.17596481271282632, + "epoch": 0.17572047946952307, "grad_norm": 0.0, - "learning_rate": 1.8903505100709107e-05, - "loss": 1.0251, + "learning_rate": 1.8906938124218328e-05, + "loss": 0.9978, "step": 6201 }, { - "epoch": 0.17599318955732124, + "epoch": 0.17574881691178554, "grad_norm": 0.0, - "learning_rate": 1.8903086629188308e-05, - "loss": 1.042, + "learning_rate": 1.8906520856301528e-05, + "loss": 1.1376, "step": 6202 }, { - "epoch": 0.17602156640181613, + "epoch": 0.175777154354048, "grad_norm": 0.0, - "learning_rate": 1.8902668082462737e-05, - "loss": 1.0182, + "learning_rate": 1.8906103513361797e-05, + "loss": 1.0225, "step": 6203 }, { - "epoch": 0.176049943246311, + "epoch": 0.17580549179631047, "grad_norm": 0.0, - "learning_rate": 1.8902249460535927e-05, - "loss": 0.9849, + "learning_rate": 1.8905686095402648e-05, + "loss": 1.0396, "step": 6204 }, { - "epoch": 0.1760783200908059, + "epoch": 0.17583382923857294, "grad_norm": 0.0, - "learning_rate": 1.8901830763411417e-05, - "loss": 0.9788, + "learning_rate": 1.89052686024276e-05, + "loss": 1.0667, "step": 6205 }, { - "epoch": 0.1761066969353008, + "epoch": 0.17586216668083537, "grad_norm": 0.0, - "learning_rate": 1.890141199109274e-05, - "loss": 1.0586, + "learning_rate": 1.890485103444016e-05, + "loss": 1.0156, "step": 6206 }, { - "epoch": 0.17613507377979568, + "epoch": 0.17589050412309784, "grad_norm": 0.0, - "learning_rate": 1.890099314358344e-05, - "loss": 1.0198, + "learning_rate": 1.890443339144386e-05, + "loss": 0.9691, "step": 6207 }, { - "epoch": 0.1761634506242906, + "epoch": 0.1759188415653603, "grad_norm": 0.0, - "learning_rate": 1.890057422088705e-05, - "loss": 1.1155, + "learning_rate": 1.890401567344221e-05, + "loss": 0.9131, "step": 6208 }, { - "epoch": 0.17619182746878548, + "epoch": 0.17594717900762277, "grad_norm": 0.0, - "learning_rate": 1.8900155223007105e-05, - "loss": 1.0142, + "learning_rate": 1.890359788043873e-05, + "loss": 1.0581, "step": 6209 }, { - "epoch": 0.17622020431328037, + "epoch": 0.17597551644988524, "grad_norm": 0.0, - "learning_rate": 1.8899736149947152e-05, - "loss": 1.068, + "learning_rate": 1.8903180012436935e-05, + "loss": 0.9968, "step": 6210 }, { - "epoch": 0.17624858115777525, + "epoch": 0.1760038538921477, "grad_norm": 0.0, - "learning_rate": 1.8899317001710726e-05, - "loss": 1.0121, + "learning_rate": 1.890276206944035e-05, + "loss": 0.9142, "step": 6211 }, { - "epoch": 0.17627695800227014, + "epoch": 0.17603219133441014, "grad_norm": 0.0, - "learning_rate": 1.889889777830137e-05, - "loss": 1.055, + "learning_rate": 1.89023440514525e-05, + "loss": 1.0385, "step": 6212 }, { - "epoch": 0.17630533484676503, + "epoch": 0.1760605287766726, "grad_norm": 0.0, - "learning_rate": 1.8898478479722624e-05, - "loss": 1.0463, + "learning_rate": 1.8901925958476894e-05, + "loss": 1.1002, "step": 6213 }, { - "epoch": 0.17633371169125994, + "epoch": 0.17608886621893508, "grad_norm": 0.0, - "learning_rate": 1.889805910597803e-05, - "loss": 0.9542, + "learning_rate": 1.8901507790517064e-05, + "loss": 0.9914, "step": 6214 }, { - "epoch": 0.17636208853575483, + "epoch": 0.17611720366119754, "grad_norm": 0.0, - "learning_rate": 1.8897639657071135e-05, - "loss": 1.0124, + "learning_rate": 1.890108954757652e-05, + "loss": 1.0356, "step": 6215 }, { - "epoch": 0.17639046538024972, + "epoch": 0.17614554110346, "grad_norm": 0.0, - "learning_rate": 1.8897220133005475e-05, - "loss": 1.0329, + "learning_rate": 1.8900671229658802e-05, + "loss": 0.9799, "step": 6216 }, { - "epoch": 0.1764188422247446, + "epoch": 0.17617387854572247, "grad_norm": 0.0, - "learning_rate": 1.8896800533784593e-05, - "loss": 1.0152, + "learning_rate": 1.8900252836767424e-05, + "loss": 0.9586, "step": 6217 }, { - "epoch": 0.1764472190692395, + "epoch": 0.1762022159879849, "grad_norm": 0.0, - "learning_rate": 1.889638085941204e-05, - "loss": 1.0588, + "learning_rate": 1.889983436890591e-05, + "loss": 1.0081, "step": 6218 }, { - "epoch": 0.1764755959137344, + "epoch": 0.17623055343024738, "grad_norm": 0.0, - "learning_rate": 1.8895961109891358e-05, - "loss": 1.0178, + "learning_rate": 1.8899415826077784e-05, + "loss": 0.9443, "step": 6219 }, { - "epoch": 0.1765039727582293, + "epoch": 0.17625889087250984, "grad_norm": 0.0, - "learning_rate": 1.8895541285226095e-05, - "loss": 0.9151, + "learning_rate": 1.8898997208286576e-05, + "loss": 1.0546, "step": 6220 }, { - "epoch": 0.17653234960272418, + "epoch": 0.1762872283147723, "grad_norm": 0.0, - "learning_rate": 1.889512138541979e-05, - "loss": 1.0054, + "learning_rate": 1.889857851553581e-05, + "loss": 0.9347, "step": 6221 }, { - "epoch": 0.17656072644721907, + "epoch": 0.17631556575703478, "grad_norm": 0.0, - "learning_rate": 1.8894701410475998e-05, - "loss": 0.9709, + "learning_rate": 1.8898159747829014e-05, + "loss": 0.9735, "step": 6222 }, { - "epoch": 0.17658910329171396, + "epoch": 0.17634390319929724, "grad_norm": 0.0, - "learning_rate": 1.8894281360398263e-05, - "loss": 1.0278, + "learning_rate": 1.889774090516971e-05, + "loss": 1.0284, "step": 6223 }, { - "epoch": 0.17661748013620884, + "epoch": 0.17637224064155968, "grad_norm": 0.0, - "learning_rate": 1.889386123519013e-05, - "loss": 1.0596, + "learning_rate": 1.8897321987561436e-05, + "loss": 1.0054, "step": 6224 }, { - "epoch": 0.17664585698070376, + "epoch": 0.17640057808382215, "grad_norm": 0.0, - "learning_rate": 1.889344103485516e-05, - "loss": 1.0366, + "learning_rate": 1.889690299500771e-05, + "loss": 0.9988, "step": 6225 }, { - "epoch": 0.17667423382519865, + "epoch": 0.1764289155260846, "grad_norm": 0.0, - "learning_rate": 1.8893020759396884e-05, - "loss": 1.0742, + "learning_rate": 1.889648392751207e-05, + "loss": 0.9978, "step": 6226 }, { - "epoch": 0.17670261066969353, + "epoch": 0.17645725296834708, "grad_norm": 0.0, - "learning_rate": 1.8892600408818866e-05, - "loss": 1.0915, + "learning_rate": 1.889606478507804e-05, + "loss": 1.1082, "step": 6227 }, { - "epoch": 0.17673098751418842, + "epoch": 0.17648559041060954, "grad_norm": 0.0, - "learning_rate": 1.889217998312465e-05, - "loss": 0.906, + "learning_rate": 1.8895645567709154e-05, + "loss": 1.1267, "step": 6228 }, { - "epoch": 0.1767593643586833, + "epoch": 0.176513927852872, "grad_norm": 0.0, - "learning_rate": 1.8891759482317798e-05, - "loss": 0.9787, + "learning_rate": 1.889522627540894e-05, + "loss": 1.0355, "step": 6229 }, { - "epoch": 0.1767877412031782, + "epoch": 0.17654226529513445, "grad_norm": 0.0, - "learning_rate": 1.8891338906401846e-05, - "loss": 0.9276, + "learning_rate": 1.8894806908180934e-05, + "loss": 0.8982, "step": 6230 }, { - "epoch": 0.1768161180476731, + "epoch": 0.17657060273739691, "grad_norm": 0.0, - "learning_rate": 1.889091825538036e-05, - "loss": 0.9793, + "learning_rate": 1.8894387466028665e-05, + "loss": 1.0621, "step": 6231 }, { - "epoch": 0.176844494892168, + "epoch": 0.17659894017965938, "grad_norm": 0.0, - "learning_rate": 1.8890497529256882e-05, - "loss": 1.0713, + "learning_rate": 1.889396794895567e-05, + "loss": 0.9981, "step": 6232 }, { - "epoch": 0.17687287173666288, + "epoch": 0.17662727762192185, "grad_norm": 0.0, - "learning_rate": 1.8890076728034974e-05, - "loss": 1.0063, + "learning_rate": 1.8893548356965477e-05, + "loss": 1.0874, "step": 6233 }, { - "epoch": 0.17690124858115777, + "epoch": 0.1766556150641843, "grad_norm": 0.0, - "learning_rate": 1.8889655851718188e-05, - "loss": 0.9727, + "learning_rate": 1.8893128690061625e-05, + "loss": 0.97, "step": 6234 }, { - "epoch": 0.17692962542565266, + "epoch": 0.17668395250644678, "grad_norm": 0.0, - "learning_rate": 1.888923490031008e-05, - "loss": 1.067, + "learning_rate": 1.889270894824765e-05, + "loss": 0.9919, "step": 6235 }, { - "epoch": 0.17695800227014755, + "epoch": 0.17671228994870922, "grad_norm": 0.0, - "learning_rate": 1.8888813873814208e-05, - "loss": 0.9386, + "learning_rate": 1.8892289131527078e-05, + "loss": 0.951, "step": 6236 }, { - "epoch": 0.17698637911464246, + "epoch": 0.17674062739097168, "grad_norm": 0.0, - "learning_rate": 1.8888392772234122e-05, - "loss": 1.1674, + "learning_rate": 1.889186923990346e-05, + "loss": 1.122, "step": 6237 }, { - "epoch": 0.17701475595913735, + "epoch": 0.17676896483323415, "grad_norm": 0.0, - "learning_rate": 1.8887971595573385e-05, - "loss": 0.9949, + "learning_rate": 1.889144927338032e-05, + "loss": 0.9855, "step": 6238 }, { - "epoch": 0.17704313280363224, + "epoch": 0.17679730227549662, "grad_norm": 0.0, - "learning_rate": 1.888755034383555e-05, - "loss": 1.1348, + "learning_rate": 1.8891029231961208e-05, + "loss": 1.0639, "step": 6239 }, { - "epoch": 0.17707150964812712, + "epoch": 0.17682563971775908, "grad_norm": 0.0, - "learning_rate": 1.8887129017024183e-05, - "loss": 0.9881, + "learning_rate": 1.8890609115649653e-05, + "loss": 0.9774, "step": 6240 }, { - "epoch": 0.177099886492622, + "epoch": 0.17685397716002155, "grad_norm": 0.0, - "learning_rate": 1.888670761514283e-05, - "loss": 1.0163, + "learning_rate": 1.8890188924449192e-05, + "loss": 0.9486, "step": 6241 }, { - "epoch": 0.17712826333711693, + "epoch": 0.17688231460228399, "grad_norm": 0.0, - "learning_rate": 1.8886286138195063e-05, - "loss": 0.9533, + "learning_rate": 1.888976865836337e-05, + "loss": 1.0388, "step": 6242 }, { - "epoch": 0.1771566401816118, + "epoch": 0.17691065204454645, "grad_norm": 0.0, - "learning_rate": 1.8885864586184435e-05, - "loss": 1.0103, + "learning_rate": 1.8889348317395727e-05, + "loss": 1.0485, "step": 6243 }, { - "epoch": 0.1771850170261067, + "epoch": 0.17693898948680892, "grad_norm": 0.0, - "learning_rate": 1.888544295911451e-05, - "loss": 1.0114, + "learning_rate": 1.88889279015498e-05, + "loss": 0.9932, "step": 6244 }, { - "epoch": 0.1772133938706016, + "epoch": 0.17696732692907138, "grad_norm": 0.0, - "learning_rate": 1.888502125698885e-05, - "loss": 1.0644, + "learning_rate": 1.8888507410829136e-05, + "loss": 1.0255, "step": 6245 }, { - "epoch": 0.17724177071509647, + "epoch": 0.17699566437133385, "grad_norm": 0.0, - "learning_rate": 1.8884599479811018e-05, - "loss": 1.0243, + "learning_rate": 1.888808684523727e-05, + "loss": 1.0502, "step": 6246 }, { - "epoch": 0.17727014755959136, + "epoch": 0.17702400181359632, "grad_norm": 0.0, - "learning_rate": 1.888417762758457e-05, - "loss": 1.0432, + "learning_rate": 1.888766620477775e-05, + "loss": 1.0241, "step": 6247 }, { - "epoch": 0.17729852440408628, + "epoch": 0.17705233925585875, "grad_norm": 0.0, - "learning_rate": 1.888375570031308e-05, - "loss": 1.1152, + "learning_rate": 1.8887245489454117e-05, + "loss": 0.9481, "step": 6248 }, { - "epoch": 0.17732690124858116, + "epoch": 0.17708067669812122, "grad_norm": 0.0, - "learning_rate": 1.8883333698000102e-05, - "loss": 0.9403, + "learning_rate": 1.8886824699269916e-05, + "loss": 0.9858, "step": 6249 }, { - "epoch": 0.17735527809307605, + "epoch": 0.17710901414038369, "grad_norm": 0.0, - "learning_rate": 1.8882911620649204e-05, - "loss": 1.0138, + "learning_rate": 1.888640383422869e-05, + "loss": 0.9891, "step": 6250 }, { - "epoch": 0.17738365493757094, + "epoch": 0.17713735158264615, "grad_norm": 0.0, - "learning_rate": 1.8882489468263954e-05, - "loss": 1.0191, + "learning_rate": 1.888598289433398e-05, + "loss": 1.0694, "step": 6251 }, { - "epoch": 0.17741203178206583, + "epoch": 0.17716568902490862, "grad_norm": 0.0, - "learning_rate": 1.8882067240847916e-05, - "loss": 0.981, + "learning_rate": 1.888556187958934e-05, + "loss": 1.0904, "step": 6252 }, { - "epoch": 0.17744040862656071, + "epoch": 0.17719402646717108, "grad_norm": 0.0, - "learning_rate": 1.888164493840466e-05, - "loss": 1.007, + "learning_rate": 1.888514078999831e-05, + "loss": 1.0565, "step": 6253 }, { - "epoch": 0.17746878547105563, + "epoch": 0.17722236390943352, "grad_norm": 0.0, - "learning_rate": 1.8881222560937745e-05, - "loss": 1.0659, + "learning_rate": 1.8884719625564444e-05, + "loss": 1.0278, "step": 6254 }, { - "epoch": 0.17749716231555052, + "epoch": 0.177250701351696, "grad_norm": 0.0, - "learning_rate": 1.8880800108450748e-05, - "loss": 1.0757, + "learning_rate": 1.8884298386291286e-05, + "loss": 0.9829, "step": 6255 }, { - "epoch": 0.1775255391600454, + "epoch": 0.17727903879395845, "grad_norm": 0.0, - "learning_rate": 1.888037758094723e-05, - "loss": 0.9007, + "learning_rate": 1.888387707218238e-05, + "loss": 1.0981, "step": 6256 }, { - "epoch": 0.1775539160045403, + "epoch": 0.17730737623622092, "grad_norm": 0.0, - "learning_rate": 1.8879954978430762e-05, - "loss": 0.8769, + "learning_rate": 1.888345568324128e-05, + "loss": 1.0036, "step": 6257 }, { - "epoch": 0.17758229284903518, + "epoch": 0.1773357136784834, "grad_norm": 0.0, - "learning_rate": 1.8879532300904923e-05, - "loss": 0.9857, + "learning_rate": 1.8883034219471534e-05, + "loss": 1.0801, "step": 6258 }, { - "epoch": 0.1776106696935301, + "epoch": 0.17736405112074585, "grad_norm": 0.0, - "learning_rate": 1.8879109548373265e-05, - "loss": 1.0693, + "learning_rate": 1.888261268087669e-05, + "loss": 1.0833, "step": 6259 }, { - "epoch": 0.17763904653802498, + "epoch": 0.1773923885630083, "grad_norm": 0.0, - "learning_rate": 1.8878686720839378e-05, - "loss": 1.0665, + "learning_rate": 1.8882191067460305e-05, + "loss": 1.0505, "step": 6260 }, { - "epoch": 0.17766742338251987, + "epoch": 0.17742072600527076, "grad_norm": 0.0, - "learning_rate": 1.8878263818306818e-05, - "loss": 1.0103, + "learning_rate": 1.888176937922592e-05, + "loss": 1.0105, "step": 6261 }, { - "epoch": 0.17769580022701476, + "epoch": 0.17744906344753322, "grad_norm": 0.0, - "learning_rate": 1.887784084077917e-05, - "loss": 1.0178, + "learning_rate": 1.8881347616177103e-05, + "loss": 1.0505, "step": 6262 }, { - "epoch": 0.17772417707150964, + "epoch": 0.1774774008897957, "grad_norm": 0.0, - "learning_rate": 1.887741778826e-05, - "loss": 1.1224, + "learning_rate": 1.888092577831739e-05, + "loss": 1.0151, "step": 6263 }, { - "epoch": 0.17775255391600453, + "epoch": 0.17750573833205815, "grad_norm": 0.0, - "learning_rate": 1.8876994660752885e-05, - "loss": 1.0537, + "learning_rate": 1.888050386565034e-05, + "loss": 1.1333, "step": 6264 }, { - "epoch": 0.17778093076049944, + "epoch": 0.17753407577432062, "grad_norm": 0.0, - "learning_rate": 1.887657145826139e-05, - "loss": 1.0886, + "learning_rate": 1.888008187817951e-05, + "loss": 1.1588, "step": 6265 }, { - "epoch": 0.17780930760499433, + "epoch": 0.17756241321658306, "grad_norm": 0.0, - "learning_rate": 1.88761481807891e-05, - "loss": 1.0518, + "learning_rate": 1.8879659815908457e-05, + "loss": 1.0065, "step": 6266 }, { - "epoch": 0.17783768444948922, + "epoch": 0.17759075065884553, "grad_norm": 0.0, - "learning_rate": 1.8875724828339593e-05, - "loss": 1.0211, + "learning_rate": 1.887923767884073e-05, + "loss": 1.0194, "step": 6267 }, { - "epoch": 0.1778660612939841, + "epoch": 0.177619088101108, "grad_norm": 0.0, - "learning_rate": 1.8875301400916433e-05, - "loss": 1.0887, + "learning_rate": 1.8878815466979886e-05, + "loss": 0.9196, "step": 6268 }, { - "epoch": 0.177894438138479, + "epoch": 0.17764742554337046, "grad_norm": 0.0, - "learning_rate": 1.8874877898523204e-05, - "loss": 0.9325, + "learning_rate": 1.8878393180329482e-05, + "loss": 0.9761, "step": 6269 }, { - "epoch": 0.17792281498297388, + "epoch": 0.17767576298563292, "grad_norm": 0.0, - "learning_rate": 1.8874454321163483e-05, - "loss": 0.8967, + "learning_rate": 1.8877970818893075e-05, + "loss": 1.0011, "step": 6270 }, { - "epoch": 0.1779511918274688, + "epoch": 0.1777041004278954, "grad_norm": 0.0, - "learning_rate": 1.887403066884085e-05, - "loss": 0.9767, + "learning_rate": 1.8877548382674223e-05, + "loss": 1.0983, "step": 6271 }, { - "epoch": 0.17797956867196368, + "epoch": 0.17773243787015783, "grad_norm": 0.0, - "learning_rate": 1.8873606941558878e-05, - "loss": 1.05, + "learning_rate": 1.8877125871676484e-05, + "loss": 1.0604, "step": 6272 }, { - "epoch": 0.17800794551645857, + "epoch": 0.1777607753124203, "grad_norm": 0.0, - "learning_rate": 1.8873183139321152e-05, - "loss": 0.9876, + "learning_rate": 1.8876703285903418e-05, + "loss": 1.0799, "step": 6273 }, { - "epoch": 0.17803632236095346, + "epoch": 0.17778911275468276, "grad_norm": 0.0, - "learning_rate": 1.8872759262131247e-05, - "loss": 0.9059, + "learning_rate": 1.8876280625358583e-05, + "loss": 1.0019, "step": 6274 }, { - "epoch": 0.17806469920544835, + "epoch": 0.17781745019694523, "grad_norm": 0.0, - "learning_rate": 1.8872335309992745e-05, - "loss": 1.0448, + "learning_rate": 1.8875857890045544e-05, + "loss": 0.9564, "step": 6275 }, { - "epoch": 0.17809307604994323, + "epoch": 0.1778457876392077, "grad_norm": 0.0, - "learning_rate": 1.887191128290923e-05, - "loss": 1.0404, + "learning_rate": 1.8875435079967853e-05, + "loss": 1.0544, "step": 6276 }, { - "epoch": 0.17812145289443815, + "epoch": 0.17787412508147016, "grad_norm": 0.0, - "learning_rate": 1.8871487180884277e-05, - "loss": 1.0329, + "learning_rate": 1.887501219512908e-05, + "loss": 1.05, "step": 6277 }, { - "epoch": 0.17814982973893304, + "epoch": 0.1779024625237326, "grad_norm": 0.0, - "learning_rate": 1.887106300392148e-05, - "loss": 1.0083, + "learning_rate": 1.8874589235532782e-05, + "loss": 0.9702, "step": 6278 }, { - "epoch": 0.17817820658342792, + "epoch": 0.17793079996599506, "grad_norm": 0.0, - "learning_rate": 1.8870638752024408e-05, - "loss": 0.9276, + "learning_rate": 1.8874166201182526e-05, + "loss": 0.919, "step": 6279 }, { - "epoch": 0.1782065834279228, + "epoch": 0.17795913740825753, "grad_norm": 0.0, - "learning_rate": 1.8870214425196658e-05, - "loss": 1.0446, + "learning_rate": 1.8873743092081866e-05, + "loss": 1.0872, "step": 6280 }, { - "epoch": 0.1782349602724177, + "epoch": 0.17798747485052, "grad_norm": 0.0, - "learning_rate": 1.8869790023441802e-05, - "loss": 1.0102, + "learning_rate": 1.8873319908234377e-05, + "loss": 1.0344, "step": 6281 }, { - "epoch": 0.1782633371169126, + "epoch": 0.17801581229278246, "grad_norm": 0.0, - "learning_rate": 1.8869365546763437e-05, - "loss": 1.0869, + "learning_rate": 1.887289664964362e-05, + "loss": 1.0403, "step": 6282 }, { - "epoch": 0.1782917139614075, + "epoch": 0.17804414973504493, "grad_norm": 0.0, - "learning_rate": 1.8868940995165136e-05, - "loss": 1.0197, + "learning_rate": 1.887247331631316e-05, + "loss": 0.9098, "step": 6283 }, { - "epoch": 0.1783200908059024, + "epoch": 0.17807248717730736, "grad_norm": 0.0, - "learning_rate": 1.88685163686505e-05, - "loss": 0.9335, + "learning_rate": 1.8872049908246564e-05, + "loss": 1.1163, "step": 6284 }, { - "epoch": 0.17834846765039727, + "epoch": 0.17810082461956983, "grad_norm": 0.0, - "learning_rate": 1.8868091667223098e-05, - "loss": 0.9186, + "learning_rate": 1.8871626425447392e-05, + "loss": 0.9787, "step": 6285 }, { - "epoch": 0.17837684449489216, + "epoch": 0.1781291620618323, "grad_norm": 0.0, - "learning_rate": 1.8867666890886532e-05, - "loss": 1.0013, + "learning_rate": 1.887120286791922e-05, + "loss": 1.066, "step": 6286 }, { - "epoch": 0.17840522133938705, + "epoch": 0.17815749950409476, "grad_norm": 0.0, - "learning_rate": 1.8867242039644387e-05, - "loss": 0.9525, + "learning_rate": 1.887077923566561e-05, + "loss": 1.0161, "step": 6287 }, { - "epoch": 0.17843359818388196, + "epoch": 0.17818583694635723, "grad_norm": 0.0, - "learning_rate": 1.8866817113500247e-05, - "loss": 0.9891, + "learning_rate": 1.8870355528690134e-05, + "loss": 0.9692, "step": 6288 }, { - "epoch": 0.17846197502837685, + "epoch": 0.1782141743886197, "grad_norm": 0.0, - "learning_rate": 1.8866392112457706e-05, - "loss": 0.9418, + "learning_rate": 1.886993174699636e-05, + "loss": 0.9974, "step": 6289 }, { - "epoch": 0.17849035187287174, + "epoch": 0.17824251183088213, "grad_norm": 0.0, - "learning_rate": 1.886596703652035e-05, - "loss": 1.0047, + "learning_rate": 1.8869507890587854e-05, + "loss": 0.9633, "step": 6290 }, { - "epoch": 0.17851872871736663, + "epoch": 0.1782708492731446, "grad_norm": 0.0, - "learning_rate": 1.8865541885691775e-05, - "loss": 1.0409, + "learning_rate": 1.8869083959468194e-05, + "loss": 1.0166, "step": 6291 }, { - "epoch": 0.1785471055618615, + "epoch": 0.17829918671540707, "grad_norm": 0.0, - "learning_rate": 1.8865116659975567e-05, - "loss": 1.071, + "learning_rate": 1.8868659953640943e-05, + "loss": 1.1055, "step": 6292 }, { - "epoch": 0.1785754824063564, + "epoch": 0.17832752415766953, "grad_norm": 0.0, - "learning_rate": 1.886469135937532e-05, - "loss": 1.0566, + "learning_rate": 1.8868235873109676e-05, + "loss": 1.0077, "step": 6293 }, { - "epoch": 0.17860385925085132, + "epoch": 0.178355861599932, "grad_norm": 0.0, - "learning_rate": 1.8864265983894626e-05, - "loss": 1.1882, + "learning_rate": 1.8867811717877966e-05, + "loss": 1.0275, "step": 6294 }, { - "epoch": 0.1786322360953462, + "epoch": 0.17838419904219446, "grad_norm": 0.0, - "learning_rate": 1.886384053353708e-05, - "loss": 0.9763, + "learning_rate": 1.8867387487949385e-05, + "loss": 1.0397, "step": 6295 }, { - "epoch": 0.1786606129398411, + "epoch": 0.1784125364844569, "grad_norm": 0.0, - "learning_rate": 1.8863415008306277e-05, - "loss": 0.9191, + "learning_rate": 1.8866963183327508e-05, + "loss": 0.9458, "step": 6296 }, { - "epoch": 0.17868898978433598, + "epoch": 0.17844087392671937, "grad_norm": 0.0, - "learning_rate": 1.8862989408205803e-05, - "loss": 0.9691, + "learning_rate": 1.8866538804015905e-05, + "loss": 1.102, "step": 6297 }, { - "epoch": 0.17871736662883086, + "epoch": 0.17846921136898183, "grad_norm": 0.0, - "learning_rate": 1.8862563733239265e-05, - "loss": 1.0325, + "learning_rate": 1.886611435001815e-05, + "loss": 1.0274, "step": 6298 }, { - "epoch": 0.17874574347332578, + "epoch": 0.1784975488112443, "grad_norm": 0.0, - "learning_rate": 1.886213798341025e-05, - "loss": 1.0562, + "learning_rate": 1.8865689821337828e-05, + "loss": 1.0121, "step": 6299 }, { - "epoch": 0.17877412031782067, + "epoch": 0.17852588625350677, "grad_norm": 0.0, - "learning_rate": 1.8861712158722355e-05, - "loss": 1.1302, + "learning_rate": 1.8865265217978503e-05, + "loss": 1.1281, "step": 6300 }, { - "epoch": 0.17880249716231555, + "epoch": 0.17855422369576923, "grad_norm": 0.0, - "learning_rate": 1.8861286259179183e-05, - "loss": 0.9015, + "learning_rate": 1.886484053994376e-05, + "loss": 1.0546, "step": 6301 }, { - "epoch": 0.17883087400681044, + "epoch": 0.17858256113803167, "grad_norm": 0.0, - "learning_rate": 1.8860860284784323e-05, - "loss": 1.0406, + "learning_rate": 1.8864415787237174e-05, + "loss": 0.9473, "step": 6302 }, { - "epoch": 0.17885925085130533, + "epoch": 0.17861089858029414, "grad_norm": 0.0, - "learning_rate": 1.8860434235541383e-05, - "loss": 0.9924, + "learning_rate": 1.886399095986232e-05, + "loss": 0.9611, "step": 6303 }, { - "epoch": 0.17888762769580022, + "epoch": 0.1786392360225566, "grad_norm": 0.0, - "learning_rate": 1.8860008111453955e-05, - "loss": 1.0983, + "learning_rate": 1.886356605782278e-05, + "loss": 0.9133, "step": 6304 }, { - "epoch": 0.17891600454029513, + "epoch": 0.17866757346481907, "grad_norm": 0.0, - "learning_rate": 1.8859581912525636e-05, - "loss": 0.976, + "learning_rate": 1.8863141081122132e-05, + "loss": 0.9152, "step": 6305 }, { - "epoch": 0.17894438138479002, + "epoch": 0.17869591090708153, "grad_norm": 0.0, - "learning_rate": 1.8859155638760035e-05, - "loss": 1.0364, + "learning_rate": 1.8862716029763954e-05, + "loss": 0.926, "step": 6306 }, { - "epoch": 0.1789727582292849, + "epoch": 0.178724248349344, "grad_norm": 0.0, - "learning_rate": 1.885872929016075e-05, - "loss": 1.006, + "learning_rate": 1.886229090375183e-05, + "loss": 0.9852, "step": 6307 }, { - "epoch": 0.1790011350737798, + "epoch": 0.17875258579160644, "grad_norm": 0.0, - "learning_rate": 1.8858302866731377e-05, - "loss": 0.9473, + "learning_rate": 1.8861865703089338e-05, + "loss": 1.1252, "step": 6308 }, { - "epoch": 0.17902951191827468, + "epoch": 0.1787809232338689, "grad_norm": 0.0, - "learning_rate": 1.8857876368475523e-05, - "loss": 0.9913, + "learning_rate": 1.886144042778006e-05, + "loss": 1.013, "step": 6309 }, { - "epoch": 0.17905788876276957, + "epoch": 0.17880926067613137, "grad_norm": 0.0, - "learning_rate": 1.8857449795396786e-05, - "loss": 0.9434, + "learning_rate": 1.8861015077827578e-05, + "loss": 1.1576, "step": 6310 }, { - "epoch": 0.17908626560726448, + "epoch": 0.17883759811839384, "grad_norm": 0.0, - "learning_rate": 1.8857023147498776e-05, - "loss": 1.0181, + "learning_rate": 1.8860589653235475e-05, + "loss": 1.0551, "step": 6311 }, { - "epoch": 0.17911464245175937, + "epoch": 0.1788659355606563, "grad_norm": 0.0, - "learning_rate": 1.8856596424785093e-05, - "loss": 0.9352, + "learning_rate": 1.8860164154007335e-05, + "loss": 0.9479, "step": 6312 }, { - "epoch": 0.17914301929625426, + "epoch": 0.17889427300291877, "grad_norm": 0.0, - "learning_rate": 1.885616962725934e-05, - "loss": 1.0848, + "learning_rate": 1.8859738580146746e-05, + "loss": 0.9948, "step": 6313 }, { - "epoch": 0.17917139614074915, + "epoch": 0.1789226104451812, "grad_norm": 0.0, - "learning_rate": 1.8855742754925122e-05, - "loss": 0.9811, + "learning_rate": 1.8859312931657285e-05, + "loss": 0.9604, "step": 6314 }, { - "epoch": 0.17919977298524403, + "epoch": 0.17895094788744367, "grad_norm": 0.0, - "learning_rate": 1.8855315807786048e-05, - "loss": 1.0414, + "learning_rate": 1.8858887208542542e-05, + "loss": 1.0424, "step": 6315 }, { - "epoch": 0.17922814982973892, + "epoch": 0.17897928532970614, "grad_norm": 0.0, - "learning_rate": 1.885488878584573e-05, - "loss": 1.1918, + "learning_rate": 1.8858461410806103e-05, + "loss": 1.0156, "step": 6316 }, { - "epoch": 0.17925652667423383, + "epoch": 0.1790076227719686, "grad_norm": 0.0, - "learning_rate": 1.885446168910776e-05, - "loss": 1.0164, + "learning_rate": 1.8858035538451554e-05, + "loss": 1.0354, "step": 6317 }, { - "epoch": 0.17928490351872872, + "epoch": 0.17903596021423107, "grad_norm": 0.0, - "learning_rate": 1.8854034517575758e-05, - "loss": 1.0471, + "learning_rate": 1.885760959148248e-05, + "loss": 1.0416, "step": 6318 }, { - "epoch": 0.1793132803632236, + "epoch": 0.17906429765649354, "grad_norm": 0.0, - "learning_rate": 1.885360727125333e-05, - "loss": 0.9198, + "learning_rate": 1.8857183569902476e-05, + "loss": 0.9845, "step": 6319 }, { - "epoch": 0.1793416572077185, + "epoch": 0.17909263509875598, "grad_norm": 0.0, - "learning_rate": 1.8853179950144077e-05, - "loss": 0.9755, + "learning_rate": 1.885675747371512e-05, + "loss": 0.9104, "step": 6320 }, { - "epoch": 0.17937003405221338, + "epoch": 0.17912097254101844, "grad_norm": 0.0, - "learning_rate": 1.885275255425162e-05, - "loss": 0.9809, + "learning_rate": 1.8856331302924013e-05, + "loss": 1.1904, "step": 6321 }, { - "epoch": 0.1793984108967083, + "epoch": 0.1791493099832809, "grad_norm": 0.0, - "learning_rate": 1.8852325083579564e-05, - "loss": 0.9081, + "learning_rate": 1.885590505753273e-05, + "loss": 0.8692, "step": 6322 }, { - "epoch": 0.1794267877412032, + "epoch": 0.17917764742554337, "grad_norm": 0.0, - "learning_rate": 1.885189753813152e-05, - "loss": 0.9806, + "learning_rate": 1.8855478737544878e-05, + "loss": 1.0101, "step": 6323 }, { - "epoch": 0.17945516458569807, + "epoch": 0.17920598486780584, "grad_norm": 0.0, - "learning_rate": 1.8851469917911098e-05, - "loss": 1.0782, + "learning_rate": 1.885505234296404e-05, + "loss": 1.0184, "step": 6324 }, { - "epoch": 0.17948354143019296, + "epoch": 0.1792343223100683, "grad_norm": 0.0, - "learning_rate": 1.8851042222921912e-05, - "loss": 0.9452, + "learning_rate": 1.8854625873793807e-05, + "loss": 1.058, "step": 6325 }, { - "epoch": 0.17951191827468785, + "epoch": 0.17926265975233074, "grad_norm": 0.0, - "learning_rate": 1.8850614453167577e-05, - "loss": 0.9079, + "learning_rate": 1.8854199330037772e-05, + "loss": 1.1317, "step": 6326 }, { - "epoch": 0.17954029511918274, + "epoch": 0.1792909971945932, "grad_norm": 0.0, - "learning_rate": 1.8850186608651702e-05, - "loss": 0.9182, + "learning_rate": 1.8853772711699524e-05, + "loss": 0.9507, "step": 6327 }, { - "epoch": 0.17956867196367765, + "epoch": 0.17931933463685568, "grad_norm": 0.0, - "learning_rate": 1.8849758689377902e-05, - "loss": 0.9227, + "learning_rate": 1.8853346018782665e-05, + "loss": 1.0636, "step": 6328 }, { - "epoch": 0.17959704880817254, + "epoch": 0.17934767207911814, "grad_norm": 0.0, - "learning_rate": 1.8849330695349795e-05, - "loss": 1.0497, + "learning_rate": 1.8852919251290785e-05, + "loss": 0.9716, "step": 6329 }, { - "epoch": 0.17962542565266743, + "epoch": 0.1793760095213806, "grad_norm": 0.0, - "learning_rate": 1.884890262657099e-05, - "loss": 0.9795, + "learning_rate": 1.8852492409227476e-05, + "loss": 1.0288, "step": 6330 }, { - "epoch": 0.1796538024971623, + "epoch": 0.17940434696364307, "grad_norm": 0.0, - "learning_rate": 1.884847448304511e-05, - "loss": 1.0322, + "learning_rate": 1.885206549259634e-05, + "loss": 1.024, "step": 6331 }, { - "epoch": 0.1796821793416572, + "epoch": 0.1794326844059055, "grad_norm": 0.0, - "learning_rate": 1.8848046264775766e-05, - "loss": 0.9718, + "learning_rate": 1.8851638501400965e-05, + "loss": 1.1446, "step": 6332 }, { - "epoch": 0.1797105561861521, + "epoch": 0.17946102184816798, "grad_norm": 0.0, - "learning_rate": 1.8847617971766577e-05, - "loss": 1.1306, + "learning_rate": 1.8851211435644952e-05, + "loss": 1.0935, "step": 6333 }, { - "epoch": 0.179738933030647, + "epoch": 0.17948935929043044, "grad_norm": 0.0, - "learning_rate": 1.8847189604021165e-05, - "loss": 0.9569, + "learning_rate": 1.8850784295331903e-05, + "loss": 1.0403, "step": 6334 }, { - "epoch": 0.1797673098751419, + "epoch": 0.1795176967326929, "grad_norm": 0.0, - "learning_rate": 1.8846761161543143e-05, - "loss": 1.0153, + "learning_rate": 1.885035708046541e-05, + "loss": 1.022, "step": 6335 }, { - "epoch": 0.17979568671963678, + "epoch": 0.17954603417495538, "grad_norm": 0.0, - "learning_rate": 1.884633264433613e-05, - "loss": 1.112, + "learning_rate": 1.884992979104907e-05, + "loss": 1.0461, "step": 6336 }, { - "epoch": 0.17982406356413166, + "epoch": 0.17957437161721784, "grad_norm": 0.0, - "learning_rate": 1.884590405240375e-05, - "loss": 0.8921, + "learning_rate": 1.8849502427086486e-05, + "loss": 0.9749, "step": 6337 }, { - "epoch": 0.17985244040862655, + "epoch": 0.17960270905948028, "grad_norm": 0.0, - "learning_rate": 1.8845475385749622e-05, - "loss": 1.0049, + "learning_rate": 1.8849074988581258e-05, + "loss": 1.0619, "step": 6338 }, { - "epoch": 0.17988081725312147, + "epoch": 0.17963104650174275, "grad_norm": 0.0, - "learning_rate": 1.8845046644377363e-05, - "loss": 0.9366, + "learning_rate": 1.884864747553698e-05, + "loss": 0.9444, "step": 6339 }, { - "epoch": 0.17990919409761635, + "epoch": 0.1796593839440052, "grad_norm": 0.0, - "learning_rate": 1.8844617828290597e-05, - "loss": 1.0223, + "learning_rate": 1.8848219887957265e-05, + "loss": 0.9796, "step": 6340 }, { - "epoch": 0.17993757094211124, + "epoch": 0.17968772138626768, "grad_norm": 0.0, - "learning_rate": 1.8844188937492948e-05, - "loss": 0.9737, + "learning_rate": 1.8847792225845707e-05, + "loss": 0.8701, "step": 6341 }, { - "epoch": 0.17996594778660613, + "epoch": 0.17971605882853015, "grad_norm": 0.0, - "learning_rate": 1.8843759971988036e-05, - "loss": 1.1011, + "learning_rate": 1.8847364489205908e-05, + "loss": 1.0905, "step": 6342 }, { - "epoch": 0.17999432463110102, + "epoch": 0.1797443962707926, "grad_norm": 0.0, - "learning_rate": 1.8843330931779486e-05, - "loss": 0.9869, + "learning_rate": 1.884693667804147e-05, + "loss": 1.121, "step": 6343 }, { - "epoch": 0.1800227014755959, + "epoch": 0.17977273371305505, "grad_norm": 0.0, - "learning_rate": 1.8842901816870922e-05, - "loss": 0.9775, + "learning_rate": 1.8846508792356007e-05, + "loss": 0.8089, "step": 6344 }, { - "epoch": 0.18005107832009082, + "epoch": 0.17980107115531752, "grad_norm": 0.0, - "learning_rate": 1.884247262726597e-05, - "loss": 1.0895, + "learning_rate": 1.8846080832153107e-05, + "loss": 0.9826, "step": 6345 }, { - "epoch": 0.1800794551645857, + "epoch": 0.17982940859757998, "grad_norm": 0.0, - "learning_rate": 1.8842043362968256e-05, - "loss": 0.9847, + "learning_rate": 1.884565279743639e-05, + "loss": 1.0633, "step": 6346 }, { - "epoch": 0.1801078320090806, + "epoch": 0.17985774603984245, "grad_norm": 0.0, - "learning_rate": 1.88416140239814e-05, - "loss": 1.0861, + "learning_rate": 1.8845224688209448e-05, + "loss": 0.9525, "step": 6347 }, { - "epoch": 0.18013620885357548, + "epoch": 0.1798860834821049, "grad_norm": 0.0, - "learning_rate": 1.884118461030904e-05, - "loss": 0.9209, + "learning_rate": 1.8844796504475898e-05, + "loss": 0.9869, "step": 6348 }, { - "epoch": 0.18016458569807037, + "epoch": 0.17991442092436738, "grad_norm": 0.0, - "learning_rate": 1.884075512195479e-05, - "loss": 1.1302, + "learning_rate": 1.8844368246239343e-05, + "loss": 1.0397, "step": 6349 }, { - "epoch": 0.18019296254256525, + "epoch": 0.17994275836662982, "grad_norm": 0.0, - "learning_rate": 1.8840325558922285e-05, - "loss": 1.0317, + "learning_rate": 1.884393991350339e-05, + "loss": 0.9247, "step": 6350 }, { - "epoch": 0.18022133938706017, + "epoch": 0.17997109580889228, "grad_norm": 0.0, - "learning_rate": 1.8839895921215153e-05, - "loss": 1.0132, + "learning_rate": 1.8843511506271647e-05, + "loss": 0.9897, "step": 6351 }, { - "epoch": 0.18024971623155506, + "epoch": 0.17999943325115475, "grad_norm": 0.0, - "learning_rate": 1.8839466208837022e-05, - "loss": 0.9623, + "learning_rate": 1.884308302454772e-05, + "loss": 0.9555, "step": 6352 }, { - "epoch": 0.18027809307604994, + "epoch": 0.18002777069341722, "grad_norm": 0.0, - "learning_rate": 1.8839036421791524e-05, - "loss": 0.8889, + "learning_rate": 1.8842654468335226e-05, + "loss": 1.0539, "step": 6353 }, { - "epoch": 0.18030646992054483, + "epoch": 0.18005610813567968, "grad_norm": 0.0, - "learning_rate": 1.8838606560082287e-05, - "loss": 1.0383, + "learning_rate": 1.8842225837637765e-05, + "loss": 1.0904, "step": 6354 }, { - "epoch": 0.18033484676503972, + "epoch": 0.18008444557794215, "grad_norm": 0.0, - "learning_rate": 1.883817662371294e-05, - "loss": 1.11, + "learning_rate": 1.8841797132458953e-05, + "loss": 1.1022, "step": 6355 }, { - "epoch": 0.1803632236095346, + "epoch": 0.1801127830202046, "grad_norm": 0.0, - "learning_rate": 1.8837746612687123e-05, - "loss": 1.0094, + "learning_rate": 1.88413683528024e-05, + "loss": 1.025, "step": 6356 }, { - "epoch": 0.18039160045402952, + "epoch": 0.18014112046246705, "grad_norm": 0.0, - "learning_rate": 1.883731652700846e-05, - "loss": 0.8709, + "learning_rate": 1.8840939498671716e-05, + "loss": 0.9012, "step": 6357 }, { - "epoch": 0.1804199772985244, + "epoch": 0.18016945790472952, "grad_norm": 0.0, - "learning_rate": 1.8836886366680587e-05, - "loss": 0.8553, + "learning_rate": 1.8840510570070522e-05, + "loss": 0.9845, "step": 6358 }, { - "epoch": 0.1804483541430193, + "epoch": 0.18019779534699198, "grad_norm": 0.0, - "learning_rate": 1.8836456131707143e-05, - "loss": 0.9492, + "learning_rate": 1.884008156700242e-05, + "loss": 1.0287, "step": 6359 }, { - "epoch": 0.18047673098751418, + "epoch": 0.18022613278925445, "grad_norm": 0.0, - "learning_rate": 1.883602582209175e-05, - "loss": 0.9048, + "learning_rate": 1.883965248947103e-05, + "loss": 0.906, "step": 6360 }, { - "epoch": 0.18050510783200907, + "epoch": 0.18025447023151692, "grad_norm": 0.0, - "learning_rate": 1.8835595437838052e-05, - "loss": 1.0671, + "learning_rate": 1.8839223337479966e-05, + "loss": 1.0007, "step": 6361 }, { - "epoch": 0.18053348467650399, + "epoch": 0.18028280767377936, "grad_norm": 0.0, - "learning_rate": 1.8835164978949683e-05, - "loss": 0.9635, + "learning_rate": 1.883879411103284e-05, + "loss": 1.0038, "step": 6362 }, { - "epoch": 0.18056186152099887, + "epoch": 0.18031114511604182, "grad_norm": 0.0, - "learning_rate": 1.8834734445430277e-05, - "loss": 1.0303, + "learning_rate": 1.883836481013327e-05, + "loss": 1.15, "step": 6363 }, { - "epoch": 0.18059023836549376, + "epoch": 0.1803394825583043, "grad_norm": 0.0, - "learning_rate": 1.8834303837283474e-05, - "loss": 1.116, + "learning_rate": 1.8837935434784865e-05, + "loss": 1.059, "step": 6364 }, { - "epoch": 0.18061861520998865, + "epoch": 0.18036782000056675, "grad_norm": 0.0, - "learning_rate": 1.883387315451291e-05, - "loss": 1.094, + "learning_rate": 1.8837505984991254e-05, + "loss": 1.0091, "step": 6365 }, { - "epoch": 0.18064699205448354, + "epoch": 0.18039615744282922, "grad_norm": 0.0, - "learning_rate": 1.883344239712222e-05, - "loss": 0.9243, + "learning_rate": 1.883707646075605e-05, + "loss": 1.0313, "step": 6366 }, { - "epoch": 0.18067536889897842, + "epoch": 0.18042449488509169, "grad_norm": 0.0, - "learning_rate": 1.8833011565115043e-05, - "loss": 0.986, + "learning_rate": 1.8836646862082864e-05, + "loss": 1.0083, "step": 6367 }, { - "epoch": 0.18070374574347334, + "epoch": 0.18045283232735412, "grad_norm": 0.0, - "learning_rate": 1.8832580658495026e-05, - "loss": 1.0043, + "learning_rate": 1.8836217188975325e-05, + "loss": 1.0228, "step": 6368 }, { - "epoch": 0.18073212258796822, + "epoch": 0.1804811697696166, "grad_norm": 0.0, - "learning_rate": 1.8832149677265796e-05, - "loss": 0.9064, + "learning_rate": 1.8835787441437043e-05, + "loss": 1.0751, "step": 6369 }, { - "epoch": 0.1807604994324631, + "epoch": 0.18050950721187906, "grad_norm": 0.0, - "learning_rate": 1.8831718621431004e-05, - "loss": 1.0758, + "learning_rate": 1.8835357619471642e-05, + "loss": 1.1034, "step": 6370 }, { - "epoch": 0.180788876276958, + "epoch": 0.18053784465414152, "grad_norm": 0.0, - "learning_rate": 1.883128749099429e-05, - "loss": 1.0882, + "learning_rate": 1.883492772308275e-05, + "loss": 1.0001, "step": 6371 }, { - "epoch": 0.1808172531214529, + "epoch": 0.180566182096404, "grad_norm": 0.0, - "learning_rate": 1.883085628595929e-05, - "loss": 0.9979, + "learning_rate": 1.8834497752273975e-05, + "loss": 0.9543, "step": 6372 }, { - "epoch": 0.18084562996594777, + "epoch": 0.18059451953866645, "grad_norm": 0.0, - "learning_rate": 1.8830425006329657e-05, - "loss": 0.9074, + "learning_rate": 1.8834067707048948e-05, + "loss": 0.9859, "step": 6373 }, { - "epoch": 0.1808740068104427, + "epoch": 0.1806228569809289, "grad_norm": 0.0, - "learning_rate": 1.882999365210902e-05, - "loss": 0.9399, + "learning_rate": 1.8833637587411284e-05, + "loss": 1.036, "step": 6374 }, { - "epoch": 0.18090238365493758, + "epoch": 0.18065119442319136, "grad_norm": 0.0, - "learning_rate": 1.882956222330103e-05, - "loss": 0.9673, + "learning_rate": 1.883320739336461e-05, + "loss": 1.0012, "step": 6375 }, { - "epoch": 0.18093076049943246, + "epoch": 0.18067953186545382, "grad_norm": 0.0, - "learning_rate": 1.8829130719909332e-05, - "loss": 1.132, + "learning_rate": 1.8832777124912556e-05, + "loss": 1.0469, "step": 6376 }, { - "epoch": 0.18095913734392735, + "epoch": 0.1807078693077163, "grad_norm": 0.0, - "learning_rate": 1.882869914193757e-05, - "loss": 1.1569, + "learning_rate": 1.8832346782058736e-05, + "loss": 1.1249, "step": 6377 }, { - "epoch": 0.18098751418842224, + "epoch": 0.18073620674997876, "grad_norm": 0.0, - "learning_rate": 1.8828267489389393e-05, - "loss": 1.0083, + "learning_rate": 1.883191636480678e-05, + "loss": 0.9862, "step": 6378 }, { - "epoch": 0.18101589103291715, + "epoch": 0.18076454419224122, "grad_norm": 0.0, - "learning_rate": 1.8827835762268438e-05, - "loss": 1.0799, + "learning_rate": 1.8831485873160312e-05, + "loss": 1.0045, "step": 6379 }, { - "epoch": 0.18104426787741204, + "epoch": 0.18079288163450366, "grad_norm": 0.0, - "learning_rate": 1.882740396057836e-05, - "loss": 0.9386, + "learning_rate": 1.883105530712296e-05, + "loss": 0.9682, "step": 6380 }, { - "epoch": 0.18107264472190693, + "epoch": 0.18082121907676613, "grad_norm": 0.0, - "learning_rate": 1.8826972084322802e-05, - "loss": 1.0343, + "learning_rate": 1.883062466669835e-05, + "loss": 1.0807, "step": 6381 }, { - "epoch": 0.18110102156640182, + "epoch": 0.1808495565190286, "grad_norm": 0.0, - "learning_rate": 1.8826540133505416e-05, - "loss": 1.0392, + "learning_rate": 1.883019395189011e-05, + "loss": 1.0309, "step": 6382 }, { - "epoch": 0.1811293984108967, + "epoch": 0.18087789396129106, "grad_norm": 0.0, - "learning_rate": 1.882610810812985e-05, - "loss": 1.1032, + "learning_rate": 1.8829763162701866e-05, + "loss": 1.0187, "step": 6383 }, { - "epoch": 0.1811577752553916, + "epoch": 0.18090623140355352, "grad_norm": 0.0, - "learning_rate": 1.8825676008199753e-05, - "loss": 1.0299, + "learning_rate": 1.8829332299137245e-05, + "loss": 1.0111, "step": 6384 }, { - "epoch": 0.1811861520998865, + "epoch": 0.18093456884581596, "grad_norm": 0.0, - "learning_rate": 1.882524383371877e-05, - "loss": 0.9916, + "learning_rate": 1.8828901361199885e-05, + "loss": 0.9877, "step": 6385 }, { - "epoch": 0.1812145289443814, + "epoch": 0.18096290628807843, "grad_norm": 0.0, - "learning_rate": 1.8824811584690557e-05, - "loss": 1.0369, + "learning_rate": 1.882847034889341e-05, + "loss": 1.0034, "step": 6386 }, { - "epoch": 0.18124290578887628, + "epoch": 0.1809912437303409, "grad_norm": 0.0, - "learning_rate": 1.8824379261118765e-05, - "loss": 1.0493, + "learning_rate": 1.8828039262221448e-05, + "loss": 1.021, "step": 6387 }, { - "epoch": 0.18127128263337117, + "epoch": 0.18101958117260336, "grad_norm": 0.0, - "learning_rate": 1.8823946863007044e-05, - "loss": 0.9352, + "learning_rate": 1.8827608101187634e-05, + "loss": 0.9648, "step": 6388 }, { - "epoch": 0.18129965947786605, + "epoch": 0.18104791861486583, "grad_norm": 0.0, - "learning_rate": 1.8823514390359046e-05, - "loss": 0.9841, + "learning_rate": 1.8827176865795597e-05, + "loss": 1.0284, "step": 6389 }, { - "epoch": 0.18132803632236094, + "epoch": 0.1810762560571283, "grad_norm": 0.0, - "learning_rate": 1.8823081843178426e-05, - "loss": 1.0529, + "learning_rate": 1.8826745556048975e-05, + "loss": 1.1326, "step": 6390 }, { - "epoch": 0.18135641316685586, + "epoch": 0.18110459349939073, "grad_norm": 0.0, - "learning_rate": 1.882264922146884e-05, - "loss": 0.9948, + "learning_rate": 1.8826314171951393e-05, + "loss": 1.0657, "step": 6391 }, { - "epoch": 0.18138479001135074, + "epoch": 0.1811329309416532, "grad_norm": 0.0, - "learning_rate": 1.8822216525233937e-05, - "loss": 0.9566, + "learning_rate": 1.8825882713506493e-05, + "loss": 1.0317, "step": 6392 }, { - "epoch": 0.18141316685584563, + "epoch": 0.18116126838391566, "grad_norm": 0.0, - "learning_rate": 1.8821783754477373e-05, - "loss": 0.8743, + "learning_rate": 1.8825451180717905e-05, + "loss": 0.9847, "step": 6393 }, { - "epoch": 0.18144154370034052, + "epoch": 0.18118960582617813, "grad_norm": 0.0, - "learning_rate": 1.8821350909202806e-05, - "loss": 1.0751, + "learning_rate": 1.8825019573589264e-05, + "loss": 0.9645, "step": 6394 }, { - "epoch": 0.1814699205448354, + "epoch": 0.1812179432684406, "grad_norm": 0.0, - "learning_rate": 1.882091798941389e-05, - "loss": 1.0136, + "learning_rate": 1.8824587892124208e-05, + "loss": 1.0543, "step": 6395 }, { - "epoch": 0.1814982973893303, + "epoch": 0.18124628071070306, "grad_norm": 0.0, - "learning_rate": 1.8820484995114286e-05, - "loss": 0.9014, + "learning_rate": 1.882415613632637e-05, + "loss": 0.892, "step": 6396 }, { - "epoch": 0.1815266742338252, + "epoch": 0.1812746181529655, "grad_norm": 0.0, - "learning_rate": 1.8820051926307647e-05, - "loss": 1.0902, + "learning_rate": 1.8823724306199385e-05, + "loss": 1.0325, "step": 6397 }, { - "epoch": 0.1815550510783201, + "epoch": 0.18130295559522797, "grad_norm": 0.0, - "learning_rate": 1.8819618782997634e-05, - "loss": 0.9735, + "learning_rate": 1.8823292401746895e-05, + "loss": 0.9429, "step": 6398 }, { - "epoch": 0.18158342792281498, + "epoch": 0.18133129303749043, "grad_norm": 0.0, - "learning_rate": 1.8819185565187904e-05, - "loss": 0.973, + "learning_rate": 1.882286042297254e-05, + "loss": 1.092, "step": 6399 }, { - "epoch": 0.18161180476730987, + "epoch": 0.1813596304797529, "grad_norm": 0.0, - "learning_rate": 1.881875227288212e-05, - "loss": 0.9258, + "learning_rate": 1.882242836987995e-05, + "loss": 1.0936, "step": 6400 }, { - "epoch": 0.18164018161180476, + "epoch": 0.18138796792201536, "grad_norm": 0.0, - "learning_rate": 1.8818318906083936e-05, - "loss": 1.0609, + "learning_rate": 1.8821996242472772e-05, + "loss": 1.0823, "step": 6401 }, { - "epoch": 0.18166855845629967, + "epoch": 0.18141630536427783, "grad_norm": 0.0, - "learning_rate": 1.8817885464797014e-05, - "loss": 0.8623, + "learning_rate": 1.8821564040754646e-05, + "loss": 1.0248, "step": 6402 }, { - "epoch": 0.18169693530079456, + "epoch": 0.18144464280654027, "grad_norm": 0.0, - "learning_rate": 1.8817451949025018e-05, - "loss": 0.9808, + "learning_rate": 1.882113176472921e-05, + "loss": 1.2014, "step": 6403 }, { - "epoch": 0.18172531214528945, + "epoch": 0.18147298024880273, "grad_norm": 0.0, - "learning_rate": 1.8817018358771612e-05, - "loss": 0.9532, + "learning_rate": 1.88206994144001e-05, + "loss": 1.0306, "step": 6404 }, { - "epoch": 0.18175368898978433, + "epoch": 0.1815013176910652, "grad_norm": 0.0, - "learning_rate": 1.8816584694040455e-05, - "loss": 0.9568, + "learning_rate": 1.882026698977097e-05, + "loss": 1.0706, "step": 6405 }, { - "epoch": 0.18178206583427922, + "epoch": 0.18152965513332767, "grad_norm": 0.0, - "learning_rate": 1.8816150954835208e-05, - "loss": 0.9836, + "learning_rate": 1.881983449084545e-05, + "loss": 1.0229, "step": 6406 }, { - "epoch": 0.1818104426787741, + "epoch": 0.18155799257559013, "grad_norm": 0.0, - "learning_rate": 1.8815717141159538e-05, - "loss": 1.0502, + "learning_rate": 1.8819401917627195e-05, + "loss": 0.9113, "step": 6407 }, { - "epoch": 0.18183881952326902, + "epoch": 0.1815863300178526, "grad_norm": 0.0, - "learning_rate": 1.881528325301711e-05, - "loss": 0.964, + "learning_rate": 1.881896927011984e-05, + "loss": 0.9897, "step": 6408 }, { - "epoch": 0.1818671963677639, + "epoch": 0.18161466746011504, "grad_norm": 0.0, - "learning_rate": 1.8814849290411587e-05, - "loss": 1.0002, + "learning_rate": 1.881853654832703e-05, + "loss": 0.925, "step": 6409 }, { - "epoch": 0.1818955732122588, + "epoch": 0.1816430049023775, "grad_norm": 0.0, - "learning_rate": 1.8814415253346638e-05, - "loss": 1.0955, + "learning_rate": 1.8818103752252414e-05, + "loss": 1.0437, "step": 6410 }, { - "epoch": 0.18192395005675369, + "epoch": 0.18167134234463997, "grad_norm": 0.0, - "learning_rate": 1.8813981141825927e-05, - "loss": 0.9325, + "learning_rate": 1.8817670881899635e-05, + "loss": 0.9592, "step": 6411 }, { - "epoch": 0.18195232690124857, + "epoch": 0.18169967978690243, "grad_norm": 0.0, - "learning_rate": 1.881354695585312e-05, - "loss": 0.869, + "learning_rate": 1.881723793727234e-05, + "loss": 1.1201, "step": 6412 }, { - "epoch": 0.18198070374574346, + "epoch": 0.1817280172291649, "grad_norm": 0.0, - "learning_rate": 1.8813112695431885e-05, - "loss": 0.8888, + "learning_rate": 1.8816804918374175e-05, + "loss": 1.0943, "step": 6413 }, { - "epoch": 0.18200908059023838, + "epoch": 0.18175635467142737, "grad_norm": 0.0, - "learning_rate": 1.8812678360565893e-05, - "loss": 1.0801, + "learning_rate": 1.881637182520879e-05, + "loss": 0.9897, "step": 6414 }, { - "epoch": 0.18203745743473326, + "epoch": 0.1817846921136898, "grad_norm": 0.0, - "learning_rate": 1.881224395125881e-05, - "loss": 0.9736, + "learning_rate": 1.8815938657779828e-05, + "loss": 1.0078, "step": 6415 }, { - "epoch": 0.18206583427922815, + "epoch": 0.18181302955595227, "grad_norm": 0.0, - "learning_rate": 1.8811809467514302e-05, - "loss": 1.0884, + "learning_rate": 1.8815505416090946e-05, + "loss": 0.9207, "step": 6416 }, { - "epoch": 0.18209421112372304, + "epoch": 0.18184136699821474, "grad_norm": 0.0, - "learning_rate": 1.881137490933605e-05, - "loss": 0.943, + "learning_rate": 1.8815072100145785e-05, + "loss": 0.9783, "step": 6417 }, { - "epoch": 0.18212258796821792, + "epoch": 0.1818697044404772, "grad_norm": 0.0, - "learning_rate": 1.8810940276727713e-05, - "loss": 0.9417, + "learning_rate": 1.8814638709947995e-05, + "loss": 1.0822, "step": 6418 }, { - "epoch": 0.18215096481271284, + "epoch": 0.18189804188273967, "grad_norm": 0.0, - "learning_rate": 1.881050556969297e-05, - "loss": 1.0446, + "learning_rate": 1.8814205245501235e-05, + "loss": 1.0256, "step": 6419 }, { - "epoch": 0.18217934165720773, + "epoch": 0.18192637932500214, "grad_norm": 0.0, - "learning_rate": 1.881007078823549e-05, - "loss": 1.0779, + "learning_rate": 1.881377170680915e-05, + "loss": 1.0003, "step": 6420 }, { - "epoch": 0.18220771850170261, + "epoch": 0.18195471676726457, "grad_norm": 0.0, - "learning_rate": 1.8809635932358946e-05, - "loss": 1.067, + "learning_rate": 1.8813338093875393e-05, + "loss": 0.9441, "step": 6421 }, { - "epoch": 0.1822360953461975, + "epoch": 0.18198305420952704, "grad_norm": 0.0, - "learning_rate": 1.880920100206701e-05, - "loss": 0.9064, + "learning_rate": 1.8812904406703617e-05, + "loss": 1.0059, "step": 6422 }, { - "epoch": 0.1822644721906924, + "epoch": 0.1820113916517895, "grad_norm": 0.0, - "learning_rate": 1.880876599736336e-05, - "loss": 0.9287, + "learning_rate": 1.8812470645297473e-05, + "loss": 0.9257, "step": 6423 }, { - "epoch": 0.18229284903518728, + "epoch": 0.18203972909405197, "grad_norm": 0.0, - "learning_rate": 1.8808330918251667e-05, - "loss": 1.0022, + "learning_rate": 1.8812036809660618e-05, + "loss": 1.0234, "step": 6424 }, { - "epoch": 0.1823212258796822, + "epoch": 0.18206806653631444, "grad_norm": 0.0, - "learning_rate": 1.8807895764735605e-05, - "loss": 0.9531, + "learning_rate": 1.8811602899796703e-05, + "loss": 0.9157, "step": 6425 }, { - "epoch": 0.18234960272417708, + "epoch": 0.1820964039785769, "grad_norm": 0.0, - "learning_rate": 1.880746053681885e-05, - "loss": 1.1246, + "learning_rate": 1.8811168915709385e-05, + "loss": 0.9155, "step": 6426 }, { - "epoch": 0.18237797956867197, + "epoch": 0.18212474142083934, "grad_norm": 0.0, - "learning_rate": 1.8807025234505083e-05, - "loss": 0.9581, + "learning_rate": 1.881073485740232e-05, + "loss": 1.057, "step": 6427 }, { - "epoch": 0.18240635641316685, + "epoch": 0.1821530788631018, "grad_norm": 0.0, - "learning_rate": 1.8806589857797977e-05, - "loss": 1.0727, + "learning_rate": 1.8810300724879163e-05, + "loss": 0.937, "step": 6428 }, { - "epoch": 0.18243473325766174, + "epoch": 0.18218141630536427, "grad_norm": 0.0, - "learning_rate": 1.880615440670121e-05, - "loss": 1.1356, + "learning_rate": 1.880986651814357e-05, + "loss": 1.092, "step": 6429 }, { - "epoch": 0.18246311010215663, + "epoch": 0.18220975374762674, "grad_norm": 0.0, - "learning_rate": 1.8805718881218463e-05, - "loss": 1.0799, + "learning_rate": 1.8809432237199204e-05, + "loss": 1.0199, "step": 6430 }, { - "epoch": 0.18249148694665154, + "epoch": 0.1822380911898892, "grad_norm": 0.0, - "learning_rate": 1.8805283281353413e-05, - "loss": 1.0104, + "learning_rate": 1.8808997882049717e-05, + "loss": 0.9655, "step": 6431 }, { - "epoch": 0.18251986379114643, + "epoch": 0.18226642863215167, "grad_norm": 0.0, - "learning_rate": 1.8804847607109735e-05, - "loss": 1.0731, + "learning_rate": 1.8808563452698768e-05, + "loss": 1.0123, "step": 6432 }, { - "epoch": 0.18254824063564132, + "epoch": 0.1822947660744141, "grad_norm": 0.0, - "learning_rate": 1.8804411858491117e-05, - "loss": 0.9969, + "learning_rate": 1.880812894915002e-05, + "loss": 1.0651, "step": 6433 }, { - "epoch": 0.1825766174801362, + "epoch": 0.18232310351667658, "grad_norm": 0.0, - "learning_rate": 1.8803976035501234e-05, - "loss": 1.0557, + "learning_rate": 1.8807694371407132e-05, + "loss": 0.9951, "step": 6434 }, { - "epoch": 0.1826049943246311, + "epoch": 0.18235144095893904, "grad_norm": 0.0, - "learning_rate": 1.8803540138143772e-05, - "loss": 1.064, + "learning_rate": 1.880725971947376e-05, + "loss": 0.9612, "step": 6435 }, { - "epoch": 0.18263337116912598, + "epoch": 0.1823797784012015, "grad_norm": 0.0, - "learning_rate": 1.880310416642241e-05, - "loss": 0.956, + "learning_rate": 1.880682499335357e-05, + "loss": 0.9991, "step": 6436 }, { - "epoch": 0.1826617480136209, + "epoch": 0.18240811584346397, "grad_norm": 0.0, - "learning_rate": 1.880266812034083e-05, - "loss": 1.0348, + "learning_rate": 1.8806390193050223e-05, + "loss": 0.8877, "step": 6437 }, { - "epoch": 0.18269012485811578, + "epoch": 0.18243645328572644, "grad_norm": 0.0, - "learning_rate": 1.880223199990272e-05, - "loss": 1.0868, + "learning_rate": 1.880595531856738e-05, + "loss": 0.9537, "step": 6438 }, { - "epoch": 0.18271850170261067, + "epoch": 0.18246479072798888, "grad_norm": 0.0, - "learning_rate": 1.880179580511176e-05, - "loss": 1.0597, + "learning_rate": 1.8805520369908707e-05, + "loss": 1.1339, "step": 6439 }, { - "epoch": 0.18274687854710556, + "epoch": 0.18249312817025135, "grad_norm": 0.0, - "learning_rate": 1.8801359535971628e-05, - "loss": 1.0139, + "learning_rate": 1.880508534707787e-05, + "loss": 1.084, "step": 6440 }, { - "epoch": 0.18277525539160044, + "epoch": 0.1825214656125138, "grad_norm": 0.0, - "learning_rate": 1.8800923192486023e-05, - "loss": 0.9448, + "learning_rate": 1.8804650250078525e-05, + "loss": 1.0498, "step": 6441 }, { - "epoch": 0.18280363223609536, + "epoch": 0.18254980305477628, "grad_norm": 0.0, - "learning_rate": 1.8800486774658623e-05, - "loss": 0.9473, + "learning_rate": 1.880421507891434e-05, + "loss": 1.0681, "step": 6442 }, { - "epoch": 0.18283200908059025, + "epoch": 0.18257814049703874, "grad_norm": 0.0, - "learning_rate": 1.8800050282493118e-05, - "loss": 1.036, + "learning_rate": 1.8803779833588983e-05, + "loss": 0.9621, "step": 6443 }, { - "epoch": 0.18286038592508513, + "epoch": 0.1826064779393012, "grad_norm": 0.0, - "learning_rate": 1.8799613715993187e-05, - "loss": 1.0336, + "learning_rate": 1.8803344514106123e-05, + "loss": 1.1356, "step": 6444 }, { - "epoch": 0.18288876276958002, + "epoch": 0.18263481538156365, "grad_norm": 0.0, - "learning_rate": 1.8799177075162527e-05, - "loss": 1.0523, + "learning_rate": 1.880290912046942e-05, + "loss": 0.9819, "step": 6445 }, { - "epoch": 0.1829171396140749, + "epoch": 0.1826631528238261, "grad_norm": 0.0, - "learning_rate": 1.8798740360004825e-05, - "loss": 1.0652, + "learning_rate": 1.8802473652682543e-05, + "loss": 1.041, "step": 6446 }, { - "epoch": 0.1829455164585698, + "epoch": 0.18269149026608858, "grad_norm": 0.0, - "learning_rate": 1.8798303570523762e-05, - "loss": 0.9563, + "learning_rate": 1.8802038110749166e-05, + "loss": 0.948, "step": 6447 }, { - "epoch": 0.1829738933030647, + "epoch": 0.18271982770835105, "grad_norm": 0.0, - "learning_rate": 1.879786670672304e-05, - "loss": 0.9966, + "learning_rate": 1.880160249467295e-05, + "loss": 1.0231, "step": 6448 }, { - "epoch": 0.1830022701475596, + "epoch": 0.1827481651506135, "grad_norm": 0.0, - "learning_rate": 1.8797429768606336e-05, - "loss": 1.0247, + "learning_rate": 1.880116680445757e-05, + "loss": 1.0029, "step": 6449 }, { - "epoch": 0.18303064699205449, + "epoch": 0.18277650259287598, "grad_norm": 0.0, - "learning_rate": 1.879699275617735e-05, - "loss": 1.1024, + "learning_rate": 1.880073104010669e-05, + "loss": 0.9724, "step": 6450 }, { - "epoch": 0.18305902383654937, + "epoch": 0.18280484003513842, "grad_norm": 0.0, - "learning_rate": 1.879655566943977e-05, - "loss": 0.8653, + "learning_rate": 1.880029520162399e-05, + "loss": 1.1026, "step": 6451 }, { - "epoch": 0.18308740068104426, + "epoch": 0.18283317747740088, "grad_norm": 0.0, - "learning_rate": 1.879611850839729e-05, - "loss": 1.0274, + "learning_rate": 1.879985928901313e-05, + "loss": 1.0503, "step": 6452 }, { - "epoch": 0.18311577752553915, + "epoch": 0.18286151491966335, "grad_norm": 0.0, - "learning_rate": 1.87956812730536e-05, - "loss": 1.0322, + "learning_rate": 1.8799423302277793e-05, + "loss": 1.138, "step": 6453 }, { - "epoch": 0.18314415437003406, + "epoch": 0.18288985236192581, "grad_norm": 0.0, - "learning_rate": 1.8795243963412394e-05, - "loss": 1.0427, + "learning_rate": 1.879898724142164e-05, + "loss": 1.0311, "step": 6454 }, { - "epoch": 0.18317253121452895, + "epoch": 0.18291818980418828, "grad_norm": 0.0, - "learning_rate": 1.8794806579477372e-05, - "loss": 0.9465, + "learning_rate": 1.8798551106448354e-05, + "loss": 0.9713, "step": 6455 }, { - "epoch": 0.18320090805902384, + "epoch": 0.18294652724645075, "grad_norm": 0.0, - "learning_rate": 1.8794369121252217e-05, - "loss": 0.9563, + "learning_rate": 1.8798114897361602e-05, + "loss": 0.9519, "step": 6456 }, { - "epoch": 0.18322928490351872, + "epoch": 0.18297486468871318, "grad_norm": 0.0, - "learning_rate": 1.8793931588740636e-05, - "loss": 0.9644, + "learning_rate": 1.8797678614165065e-05, + "loss": 1.1326, "step": 6457 }, { - "epoch": 0.1832576617480136, + "epoch": 0.18300320213097565, "grad_norm": 0.0, - "learning_rate": 1.879349398194632e-05, - "loss": 0.948, + "learning_rate": 1.8797242256862414e-05, + "loss": 1.0005, "step": 6458 }, { - "epoch": 0.18328603859250853, + "epoch": 0.18303153957323812, "grad_norm": 0.0, - "learning_rate": 1.879305630087296e-05, - "loss": 0.9925, + "learning_rate": 1.8796805825457324e-05, + "loss": 1.0248, "step": 6459 }, { - "epoch": 0.18331441543700341, + "epoch": 0.18305987701550058, "grad_norm": 0.0, - "learning_rate": 1.879261854552426e-05, - "loss": 1.0117, + "learning_rate": 1.879636931995347e-05, + "loss": 1.0412, "step": 6460 }, { - "epoch": 0.1833427922814983, + "epoch": 0.18308821445776305, "grad_norm": 0.0, - "learning_rate": 1.8792180715903917e-05, - "loss": 1.0381, + "learning_rate": 1.879593274035453e-05, + "loss": 1.0616, "step": 6461 }, { - "epoch": 0.1833711691259932, + "epoch": 0.18311655190002551, "grad_norm": 0.0, - "learning_rate": 1.879174281201563e-05, - "loss": 0.975, + "learning_rate": 1.879549608666418e-05, + "loss": 1.0871, "step": 6462 }, { - "epoch": 0.18339954597048808, + "epoch": 0.18314488934228795, "grad_norm": 0.0, - "learning_rate": 1.8791304833863094e-05, - "loss": 1.1044, + "learning_rate": 1.8795059358886108e-05, + "loss": 1.1143, "step": 6463 }, { - "epoch": 0.18342792281498296, + "epoch": 0.18317322678455042, "grad_norm": 0.0, - "learning_rate": 1.879086678145001e-05, - "loss": 1.0711, + "learning_rate": 1.8794622557023982e-05, + "loss": 1.1336, "step": 6464 }, { - "epoch": 0.18345629965947788, + "epoch": 0.18320156422681289, "grad_norm": 0.0, - "learning_rate": 1.879042865478008e-05, - "loss": 0.9766, + "learning_rate": 1.879418568108148e-05, + "loss": 0.9266, "step": 6465 }, { - "epoch": 0.18348467650397277, + "epoch": 0.18322990166907535, "grad_norm": 0.0, - "learning_rate": 1.8789990453857005e-05, - "loss": 0.9596, + "learning_rate": 1.8793748731062293e-05, + "loss": 1.0002, "step": 6466 }, { - "epoch": 0.18351305334846765, + "epoch": 0.18325823911133782, "grad_norm": 0.0, - "learning_rate": 1.8789552178684483e-05, - "loss": 0.994, + "learning_rate": 1.8793311706970088e-05, + "loss": 0.9634, "step": 6467 }, { - "epoch": 0.18354143019296254, + "epoch": 0.18328657655360028, "grad_norm": 0.0, - "learning_rate": 1.8789113829266223e-05, - "loss": 1.0937, + "learning_rate": 1.879287460880856e-05, + "loss": 1.1235, "step": 6468 }, { - "epoch": 0.18356980703745743, + "epoch": 0.18331491399586272, "grad_norm": 0.0, - "learning_rate": 1.8788675405605915e-05, - "loss": 0.9268, + "learning_rate": 1.8792437436581382e-05, + "loss": 1.1277, "step": 6469 }, { - "epoch": 0.18359818388195231, + "epoch": 0.1833432514381252, "grad_norm": 0.0, - "learning_rate": 1.878823690770728e-05, - "loss": 1.0343, + "learning_rate": 1.8792000190292236e-05, + "loss": 0.9324, "step": 6470 }, { - "epoch": 0.18362656072644723, + "epoch": 0.18337158888038765, "grad_norm": 0.0, - "learning_rate": 1.8787798335574007e-05, - "loss": 1.1396, + "learning_rate": 1.879156286994481e-05, + "loss": 0.9906, "step": 6471 }, { - "epoch": 0.18365493757094212, + "epoch": 0.18339992632265012, "grad_norm": 0.0, - "learning_rate": 1.8787359689209808e-05, - "loss": 1.0162, + "learning_rate": 1.879112547554278e-05, + "loss": 0.9793, "step": 6472 }, { - "epoch": 0.183683314415437, + "epoch": 0.18342826376491259, "grad_norm": 0.0, - "learning_rate": 1.8786920968618387e-05, - "loss": 0.8803, + "learning_rate": 1.8790688007089842e-05, + "loss": 0.9456, "step": 6473 }, { - "epoch": 0.1837116912599319, + "epoch": 0.18345660120717505, "grad_norm": 0.0, - "learning_rate": 1.878648217380345e-05, - "loss": 1.0203, + "learning_rate": 1.8790250464589676e-05, + "loss": 0.9486, "step": 6474 }, { - "epoch": 0.18374006810442678, + "epoch": 0.1834849386494375, "grad_norm": 0.0, - "learning_rate": 1.8786043304768702e-05, - "loss": 0.9964, + "learning_rate": 1.8789812848045962e-05, + "loss": 1.0097, "step": 6475 }, { - "epoch": 0.18376844494892167, + "epoch": 0.18351327609169996, "grad_norm": 0.0, - "learning_rate": 1.8785604361517852e-05, - "loss": 1.0846, + "learning_rate": 1.878937515746239e-05, + "loss": 1.0434, "step": 6476 }, { - "epoch": 0.18379682179341658, + "epoch": 0.18354161353396242, "grad_norm": 0.0, - "learning_rate": 1.8785165344054607e-05, - "loss": 0.983, + "learning_rate": 1.8788937392842646e-05, + "loss": 0.9219, "step": 6477 }, { - "epoch": 0.18382519863791147, + "epoch": 0.1835699509762249, "grad_norm": 0.0, - "learning_rate": 1.878472625238268e-05, - "loss": 1.0284, + "learning_rate": 1.8788499554190424e-05, + "loss": 0.9709, "step": 6478 }, { - "epoch": 0.18385357548240636, + "epoch": 0.18359828841848735, "grad_norm": 0.0, - "learning_rate": 1.878428708650577e-05, - "loss": 1.1753, + "learning_rate": 1.8788061641509402e-05, + "loss": 0.9938, "step": 6479 }, { - "epoch": 0.18388195232690124, + "epoch": 0.18362662586074982, "grad_norm": 0.0, - "learning_rate": 1.8783847846427593e-05, - "loss": 1.0176, + "learning_rate": 1.8787623654803274e-05, + "loss": 0.9987, "step": 6480 }, { - "epoch": 0.18391032917139613, + "epoch": 0.18365496330301226, "grad_norm": 0.0, - "learning_rate": 1.878340853215186e-05, - "loss": 1.0819, + "learning_rate": 1.878718559407573e-05, + "loss": 1.0798, "step": 6481 }, { - "epoch": 0.18393870601589105, + "epoch": 0.18368330074527472, "grad_norm": 0.0, - "learning_rate": 1.8782969143682278e-05, - "loss": 1.116, + "learning_rate": 1.878674745933046e-05, + "loss": 1.0555, "step": 6482 }, { - "epoch": 0.18396708286038593, + "epoch": 0.1837116381875372, "grad_norm": 0.0, - "learning_rate": 1.878252968102256e-05, - "loss": 0.933, + "learning_rate": 1.8786309250571155e-05, + "loss": 1.0506, "step": 6483 }, { - "epoch": 0.18399545970488082, + "epoch": 0.18373997562979966, "grad_norm": 0.0, - "learning_rate": 1.878209014417642e-05, - "loss": 0.9852, + "learning_rate": 1.87858709678015e-05, + "loss": 1.0197, "step": 6484 }, { - "epoch": 0.1840238365493757, + "epoch": 0.18376831307206212, "grad_norm": 0.0, - "learning_rate": 1.8781650533147572e-05, - "loss": 1.01, + "learning_rate": 1.878543261102519e-05, + "loss": 0.8808, "step": 6485 }, { - "epoch": 0.1840522133938706, + "epoch": 0.1837966505143246, "grad_norm": 0.0, - "learning_rate": 1.8781210847939726e-05, - "loss": 1.0763, + "learning_rate": 1.8784994180245922e-05, + "loss": 1.0194, "step": 6486 }, { - "epoch": 0.18408059023836548, + "epoch": 0.18382498795658703, "grad_norm": 0.0, - "learning_rate": 1.8780771088556595e-05, - "loss": 1.0972, + "learning_rate": 1.8784555675467383e-05, + "loss": 0.8523, "step": 6487 }, { - "epoch": 0.1841089670828604, + "epoch": 0.1838533253988495, "grad_norm": 0.0, - "learning_rate": 1.8780331255001896e-05, - "loss": 0.9945, + "learning_rate": 1.878411709669327e-05, + "loss": 0.9534, "step": 6488 }, { - "epoch": 0.18413734392735528, + "epoch": 0.18388166284111196, "grad_norm": 0.0, - "learning_rate": 1.8779891347279346e-05, - "loss": 1.0423, + "learning_rate": 1.8783678443927282e-05, + "loss": 0.9696, "step": 6489 }, { - "epoch": 0.18416572077185017, + "epoch": 0.18391000028337443, "grad_norm": 0.0, - "learning_rate": 1.877945136539266e-05, - "loss": 1.0781, + "learning_rate": 1.8783239717173106e-05, + "loss": 1.0014, "step": 6490 }, { - "epoch": 0.18419409761634506, + "epoch": 0.1839383377256369, "grad_norm": 0.0, - "learning_rate": 1.8779011309345554e-05, - "loss": 1.1011, + "learning_rate": 1.878280091643444e-05, + "loss": 1.018, "step": 6491 }, { - "epoch": 0.18422247446083995, + "epoch": 0.18396667516789936, "grad_norm": 0.0, - "learning_rate": 1.8778571179141743e-05, - "loss": 1.0535, + "learning_rate": 1.8782362041714978e-05, + "loss": 0.9931, "step": 6492 }, { - "epoch": 0.18425085130533483, + "epoch": 0.1839950126101618, "grad_norm": 0.0, - "learning_rate": 1.8778130974784944e-05, - "loss": 1.0689, + "learning_rate": 1.878192309301842e-05, + "loss": 1.1193, "step": 6493 }, { - "epoch": 0.18427922814982975, + "epoch": 0.18402335005242426, "grad_norm": 0.0, - "learning_rate": 1.877769069627888e-05, - "loss": 0.9885, + "learning_rate": 1.8781484070348464e-05, + "loss": 1.1311, "step": 6494 }, { - "epoch": 0.18430760499432464, + "epoch": 0.18405168749468673, "grad_norm": 0.0, - "learning_rate": 1.8777250343627274e-05, - "loss": 1.0001, + "learning_rate": 1.8781044973708807e-05, + "loss": 0.9564, "step": 6495 }, { - "epoch": 0.18433598183881952, + "epoch": 0.1840800249369492, "grad_norm": 0.0, - "learning_rate": 1.8776809916833833e-05, - "loss": 0.9648, + "learning_rate": 1.8780605803103147e-05, + "loss": 0.9359, "step": 6496 }, { - "epoch": 0.1843643586833144, + "epoch": 0.18410836237921166, "grad_norm": 0.0, - "learning_rate": 1.877636941590229e-05, - "loss": 1.0813, + "learning_rate": 1.8780166558535183e-05, + "loss": 1.0623, "step": 6497 }, { - "epoch": 0.1843927355278093, + "epoch": 0.18413669982147413, "grad_norm": 0.0, - "learning_rate": 1.8775928840836353e-05, - "loss": 0.9619, + "learning_rate": 1.8779727240008618e-05, + "loss": 1.0281, "step": 6498 }, { - "epoch": 0.1844211123723042, + "epoch": 0.18416503726373656, "grad_norm": 0.0, - "learning_rate": 1.8775488191639755e-05, - "loss": 1.0073, + "learning_rate": 1.8779287847527146e-05, + "loss": 1.1072, "step": 6499 }, { - "epoch": 0.1844494892167991, + "epoch": 0.18419337470599903, "grad_norm": 0.0, - "learning_rate": 1.8775047468316213e-05, - "loss": 1.06, + "learning_rate": 1.8778848381094477e-05, + "loss": 1.0685, "step": 6500 }, { - "epoch": 0.184477866061294, + "epoch": 0.1842217121482615, "grad_norm": 0.0, - "learning_rate": 1.8774606670869445e-05, - "loss": 0.9951, + "learning_rate": 1.8778408840714307e-05, + "loss": 1.0783, "step": 6501 }, { - "epoch": 0.18450624290578888, + "epoch": 0.18425004959052396, "grad_norm": 0.0, - "learning_rate": 1.8774165799303188e-05, - "loss": 1.0329, + "learning_rate": 1.877796922639034e-05, + "loss": 0.9768, "step": 6502 }, { - "epoch": 0.18453461975028376, + "epoch": 0.18427838703278643, "grad_norm": 0.0, - "learning_rate": 1.8773724853621154e-05, - "loss": 0.9731, + "learning_rate": 1.8777529538126273e-05, + "loss": 0.9568, "step": 6503 }, { - "epoch": 0.18456299659477865, + "epoch": 0.1843067244750489, "grad_norm": 0.0, - "learning_rate": 1.8773283833827076e-05, - "loss": 0.8956, + "learning_rate": 1.8777089775925822e-05, + "loss": 0.8413, "step": 6504 }, { - "epoch": 0.18459137343927357, + "epoch": 0.18433506191731133, "grad_norm": 0.0, - "learning_rate": 1.877284273992467e-05, - "loss": 0.987, + "learning_rate": 1.877664993979268e-05, + "loss": 1.0113, "step": 6505 }, { - "epoch": 0.18461975028376845, + "epoch": 0.1843633993595738, "grad_norm": 0.0, - "learning_rate": 1.877240157191767e-05, - "loss": 0.8973, + "learning_rate": 1.877621002973056e-05, + "loss": 1.0978, "step": 6506 }, { - "epoch": 0.18464812712826334, + "epoch": 0.18439173680183626, "grad_norm": 0.0, - "learning_rate": 1.8771960329809794e-05, - "loss": 1.0651, + "learning_rate": 1.8775770045743163e-05, + "loss": 1.0498, "step": 6507 }, { - "epoch": 0.18467650397275823, + "epoch": 0.18442007424409873, "grad_norm": 0.0, - "learning_rate": 1.8771519013604782e-05, - "loss": 1.0259, + "learning_rate": 1.8775329987834193e-05, + "loss": 0.9781, "step": 6508 }, { - "epoch": 0.18470488081725311, + "epoch": 0.1844484116863612, "grad_norm": 0.0, - "learning_rate": 1.8771077623306353e-05, - "loss": 0.977, + "learning_rate": 1.8774889856007365e-05, + "loss": 0.9842, "step": 6509 }, { - "epoch": 0.184733257661748, + "epoch": 0.18447674912862366, "grad_norm": 0.0, - "learning_rate": 1.8770636158918236e-05, - "loss": 0.9646, + "learning_rate": 1.8774449650266375e-05, + "loss": 1.0833, "step": 6510 }, { - "epoch": 0.18476163450624292, + "epoch": 0.1845050865708861, "grad_norm": 0.0, - "learning_rate": 1.877019462044416e-05, - "loss": 0.9446, + "learning_rate": 1.8774009370614944e-05, + "loss": 1.0254, "step": 6511 }, { - "epoch": 0.1847900113507378, + "epoch": 0.18453342401314857, "grad_norm": 0.0, - "learning_rate": 1.8769753007887858e-05, - "loss": 0.942, + "learning_rate": 1.8773569017056768e-05, + "loss": 0.9247, "step": 6512 }, { - "epoch": 0.1848183881952327, + "epoch": 0.18456176145541103, "grad_norm": 0.0, - "learning_rate": 1.8769311321253054e-05, - "loss": 1.1045, + "learning_rate": 1.8773128589595567e-05, + "loss": 0.882, "step": 6513 }, { - "epoch": 0.18484676503972758, + "epoch": 0.1845900988976735, "grad_norm": 0.0, - "learning_rate": 1.8768869560543488e-05, - "loss": 0.9527, + "learning_rate": 1.8772688088235044e-05, + "loss": 1.0731, "step": 6514 }, { - "epoch": 0.18487514188422247, + "epoch": 0.18461843633993597, "grad_norm": 0.0, - "learning_rate": 1.8768427725762883e-05, - "loss": 1.0727, + "learning_rate": 1.8772247512978913e-05, + "loss": 1.0562, "step": 6515 }, { - "epoch": 0.18490351872871735, + "epoch": 0.18464677378219843, "grad_norm": 0.0, - "learning_rate": 1.8767985816914976e-05, - "loss": 1.0738, + "learning_rate": 1.877180686383088e-05, + "loss": 0.934, "step": 6516 }, { - "epoch": 0.18493189557321227, + "epoch": 0.18467511122446087, "grad_norm": 0.0, - "learning_rate": 1.8767543834003497e-05, - "loss": 1.0756, + "learning_rate": 1.8771366140794662e-05, + "loss": 1.0272, "step": 6517 }, { - "epoch": 0.18496027241770716, + "epoch": 0.18470344866672334, "grad_norm": 0.0, - "learning_rate": 1.8767101777032188e-05, - "loss": 1.0589, + "learning_rate": 1.8770925343873972e-05, + "loss": 0.9433, "step": 6518 }, { - "epoch": 0.18498864926220204, + "epoch": 0.1847317861089858, "grad_norm": 0.0, - "learning_rate": 1.876665964600477e-05, - "loss": 1.0507, + "learning_rate": 1.8770484473072518e-05, + "loss": 0.9808, "step": 6519 }, { - "epoch": 0.18501702610669693, + "epoch": 0.18476012355124827, "grad_norm": 0.0, - "learning_rate": 1.8766217440924986e-05, - "loss": 1.057, + "learning_rate": 1.877004352839402e-05, + "loss": 1.0337, "step": 6520 }, { - "epoch": 0.18504540295119182, + "epoch": 0.18478846099351073, "grad_norm": 0.0, - "learning_rate": 1.8765775161796565e-05, - "loss": 0.9144, + "learning_rate": 1.8769602509842185e-05, + "loss": 0.9094, "step": 6521 }, { - "epoch": 0.18507377979568673, + "epoch": 0.1848167984357732, "grad_norm": 0.0, - "learning_rate": 1.8765332808623255e-05, - "loss": 1.0329, + "learning_rate": 1.8769161417420733e-05, + "loss": 1.0276, "step": 6522 }, { - "epoch": 0.18510215664018162, + "epoch": 0.18484513587803564, "grad_norm": 0.0, - "learning_rate": 1.8764890381408775e-05, - "loss": 1.1051, + "learning_rate": 1.876872025113338e-05, + "loss": 1.108, "step": 6523 }, { - "epoch": 0.1851305334846765, + "epoch": 0.1848734733202981, "grad_norm": 0.0, - "learning_rate": 1.876444788015688e-05, - "loss": 1.0051, + "learning_rate": 1.8768279010983836e-05, + "loss": 1.0467, "step": 6524 }, { - "epoch": 0.1851589103291714, + "epoch": 0.18490181076256057, "grad_norm": 0.0, - "learning_rate": 1.8764005304871297e-05, - "loss": 0.9879, + "learning_rate": 1.8767837696975824e-05, + "loss": 1.0867, "step": 6525 }, { - "epoch": 0.18518728717366628, + "epoch": 0.18493014820482304, "grad_norm": 0.0, - "learning_rate": 1.8763562655555766e-05, - "loss": 1.0326, + "learning_rate": 1.8767396309113058e-05, + "loss": 1.0191, "step": 6526 }, { - "epoch": 0.18521566401816117, + "epoch": 0.1849584856470855, "grad_norm": 0.0, - "learning_rate": 1.8763119932214028e-05, - "loss": 1.0128, + "learning_rate": 1.8766954847399258e-05, + "loss": 1.0681, "step": 6527 }, { - "epoch": 0.18524404086265608, + "epoch": 0.18498682308934797, "grad_norm": 0.0, - "learning_rate": 1.8762677134849823e-05, - "loss": 0.9412, + "learning_rate": 1.8766513311838147e-05, + "loss": 0.9969, "step": 6528 }, { - "epoch": 0.18527241770715097, + "epoch": 0.1850151605316104, "grad_norm": 0.0, - "learning_rate": 1.8762234263466892e-05, - "loss": 1.0144, + "learning_rate": 1.876607170243343e-05, + "loss": 0.9466, "step": 6529 }, { - "epoch": 0.18530079455164586, + "epoch": 0.18504349797387287, "grad_norm": 0.0, - "learning_rate": 1.8761791318068972e-05, - "loss": 0.9397, + "learning_rate": 1.8765630019188844e-05, + "loss": 1.095, "step": 6530 }, { - "epoch": 0.18532917139614075, + "epoch": 0.18507183541613534, "grad_norm": 0.0, - "learning_rate": 1.8761348298659806e-05, - "loss": 1.1646, + "learning_rate": 1.8765188262108096e-05, + "loss": 1.0077, "step": 6531 }, { - "epoch": 0.18535754824063563, + "epoch": 0.1851001728583978, "grad_norm": 0.0, - "learning_rate": 1.876090520524314e-05, - "loss": 0.8765, + "learning_rate": 1.8764746431194915e-05, + "loss": 0.9461, "step": 6532 }, { - "epoch": 0.18538592508513052, + "epoch": 0.18512851030066027, "grad_norm": 0.0, - "learning_rate": 1.876046203782271e-05, - "loss": 0.9662, + "learning_rate": 1.876430452645302e-05, + "loss": 1.1135, "step": 6533 }, { - "epoch": 0.18541430192962544, + "epoch": 0.18515684774292274, "grad_norm": 0.0, - "learning_rate": 1.8760018796402267e-05, - "loss": 1.0827, + "learning_rate": 1.8763862547886133e-05, + "loss": 1.0455, "step": 6534 }, { - "epoch": 0.18544267877412032, + "epoch": 0.18518518518518517, "grad_norm": 0.0, - "learning_rate": 1.875957548098555e-05, - "loss": 1.0559, + "learning_rate": 1.8763420495497976e-05, + "loss": 1.057, "step": 6535 }, { - "epoch": 0.1854710556186152, + "epoch": 0.18521352262744764, "grad_norm": 0.0, - "learning_rate": 1.8759132091576302e-05, - "loss": 0.9302, + "learning_rate": 1.8762978369292273e-05, + "loss": 0.9839, "step": 6536 }, { - "epoch": 0.1854994324631101, + "epoch": 0.1852418600697101, "grad_norm": 0.0, - "learning_rate": 1.8758688628178276e-05, - "loss": 0.9247, + "learning_rate": 1.876253616927275e-05, + "loss": 1.1316, "step": 6537 }, { - "epoch": 0.18552780930760499, + "epoch": 0.18527019751197257, "grad_norm": 0.0, - "learning_rate": 1.875824509079521e-05, - "loss": 1.0093, + "learning_rate": 1.8762093895443132e-05, + "loss": 0.9675, "step": 6538 }, { - "epoch": 0.1855561861520999, + "epoch": 0.18529853495423504, "grad_norm": 0.0, - "learning_rate": 1.8757801479430858e-05, - "loss": 0.9377, + "learning_rate": 1.8761651547807144e-05, + "loss": 0.9559, "step": 6539 }, { - "epoch": 0.1855845629965948, + "epoch": 0.1853268723964975, "grad_norm": 0.0, - "learning_rate": 1.875735779408896e-05, - "loss": 0.9981, + "learning_rate": 1.876120912636851e-05, + "loss": 1.0645, "step": 6540 }, { - "epoch": 0.18561293984108967, + "epoch": 0.18535520983875994, "grad_norm": 0.0, - "learning_rate": 1.875691403477327e-05, - "loss": 1.0344, + "learning_rate": 1.876076663113096e-05, + "loss": 0.9755, "step": 6541 }, { - "epoch": 0.18564131668558456, + "epoch": 0.1853835472810224, "grad_norm": 0.0, - "learning_rate": 1.875647020148753e-05, - "loss": 1.06, + "learning_rate": 1.8760324062098218e-05, + "loss": 1.0673, "step": 6542 }, { - "epoch": 0.18566969353007945, + "epoch": 0.18541188472328488, "grad_norm": 0.0, - "learning_rate": 1.8756026294235494e-05, - "loss": 0.9782, + "learning_rate": 1.8759881419274012e-05, + "loss": 0.9857, "step": 6543 }, { - "epoch": 0.18569807037457434, + "epoch": 0.18544022216554734, "grad_norm": 0.0, - "learning_rate": 1.8755582313020912e-05, - "loss": 1.0138, + "learning_rate": 1.8759438702662076e-05, + "loss": 0.9567, "step": 6544 }, { - "epoch": 0.18572644721906925, + "epoch": 0.1854685596078098, "grad_norm": 0.0, - "learning_rate": 1.875513825784753e-05, - "loss": 0.988, + "learning_rate": 1.875899591226613e-05, + "loss": 1.1052, "step": 6545 }, { - "epoch": 0.18575482406356414, + "epoch": 0.18549689705007227, "grad_norm": 0.0, - "learning_rate": 1.8754694128719103e-05, - "loss": 0.9764, + "learning_rate": 1.875855304808991e-05, + "loss": 0.9429, "step": 6546 }, { - "epoch": 0.18578320090805903, + "epoch": 0.1855252344923347, "grad_norm": 0.0, - "learning_rate": 1.875424992563938e-05, - "loss": 0.9028, + "learning_rate": 1.875811011013715e-05, + "loss": 1.0382, "step": 6547 }, { - "epoch": 0.1858115777525539, + "epoch": 0.18555357193459718, "grad_norm": 0.0, - "learning_rate": 1.8753805648612115e-05, - "loss": 1.045, + "learning_rate": 1.8757667098411572e-05, + "loss": 1.0335, "step": 6548 }, { - "epoch": 0.1858399545970488, + "epoch": 0.18558190937685964, "grad_norm": 0.0, - "learning_rate": 1.8753361297641058e-05, - "loss": 0.9886, + "learning_rate": 1.8757224012916913e-05, + "loss": 0.9705, "step": 6549 }, { - "epoch": 0.1858683314415437, + "epoch": 0.1856102468191221, "grad_norm": 0.0, - "learning_rate": 1.8752916872729967e-05, - "loss": 0.989, + "learning_rate": 1.8756780853656902e-05, + "loss": 1.0347, "step": 6550 }, { - "epoch": 0.1858967082860386, + "epoch": 0.18563858426138458, "grad_norm": 0.0, - "learning_rate": 1.8752472373882597e-05, - "loss": 1.0767, + "learning_rate": 1.8756337620635277e-05, + "loss": 1.0571, "step": 6551 }, { - "epoch": 0.1859250851305335, + "epoch": 0.18566692170364704, "grad_norm": 0.0, - "learning_rate": 1.8752027801102694e-05, - "loss": 0.9443, + "learning_rate": 1.8755894313855768e-05, + "loss": 1.0468, "step": 6552 }, { - "epoch": 0.18595346197502838, + "epoch": 0.18569525914590948, "grad_norm": 0.0, - "learning_rate": 1.8751583154394023e-05, - "loss": 1.0769, + "learning_rate": 1.875545093332211e-05, + "loss": 0.8786, "step": 6553 }, { - "epoch": 0.18598183881952327, + "epoch": 0.18572359658817195, "grad_norm": 0.0, - "learning_rate": 1.875113843376033e-05, - "loss": 1.0063, + "learning_rate": 1.8755007479038038e-05, + "loss": 0.9778, "step": 6554 }, { - "epoch": 0.18601021566401815, + "epoch": 0.1857519340304344, "grad_norm": 0.0, - "learning_rate": 1.875069363920538e-05, - "loss": 0.9595, + "learning_rate": 1.8754563951007286e-05, + "loss": 1.1248, "step": 6555 }, { - "epoch": 0.18603859250851304, + "epoch": 0.18578027147269688, "grad_norm": 0.0, - "learning_rate": 1.875024877073293e-05, - "loss": 1.0256, + "learning_rate": 1.8754120349233594e-05, + "loss": 0.9733, "step": 6556 }, { - "epoch": 0.18606696935300795, + "epoch": 0.18580860891495934, "grad_norm": 0.0, - "learning_rate": 1.8749803828346732e-05, - "loss": 1.0612, + "learning_rate": 1.875367667372069e-05, + "loss": 0.9804, "step": 6557 }, { - "epoch": 0.18609534619750284, + "epoch": 0.1858369463572218, "grad_norm": 0.0, - "learning_rate": 1.874935881205055e-05, - "loss": 1.0345, + "learning_rate": 1.875323292447232e-05, + "loss": 0.9543, "step": 6558 }, { - "epoch": 0.18612372304199773, + "epoch": 0.18586528379948425, "grad_norm": 0.0, - "learning_rate": 1.874891372184814e-05, - "loss": 0.9107, + "learning_rate": 1.8752789101492217e-05, + "loss": 1.0172, "step": 6559 }, { - "epoch": 0.18615209988649262, + "epoch": 0.18589362124174671, "grad_norm": 0.0, - "learning_rate": 1.8748468557743265e-05, - "loss": 0.8879, + "learning_rate": 1.8752345204784123e-05, + "loss": 1.0403, "step": 6560 }, { - "epoch": 0.1861804767309875, + "epoch": 0.18592195868400918, "grad_norm": 0.0, - "learning_rate": 1.8748023319739678e-05, - "loss": 0.9162, + "learning_rate": 1.8751901234351775e-05, + "loss": 1.0701, "step": 6561 }, { - "epoch": 0.18620885357548242, + "epoch": 0.18595029612627165, "grad_norm": 0.0, - "learning_rate": 1.8747578007841146e-05, - "loss": 0.9601, + "learning_rate": 1.875145719019891e-05, + "loss": 0.9622, "step": 6562 }, { - "epoch": 0.1862372304199773, + "epoch": 0.1859786335685341, "grad_norm": 0.0, - "learning_rate": 1.874713262205143e-05, - "loss": 1.0666, + "learning_rate": 1.8751013072329274e-05, + "loss": 0.942, "step": 6563 }, { - "epoch": 0.1862656072644722, + "epoch": 0.18600697101079658, "grad_norm": 0.0, - "learning_rate": 1.8746687162374294e-05, - "loss": 0.9504, + "learning_rate": 1.8750568880746606e-05, + "loss": 0.9396, "step": 6564 }, { - "epoch": 0.18629398410896708, + "epoch": 0.18603530845305902, "grad_norm": 0.0, - "learning_rate": 1.8746241628813498e-05, - "loss": 0.9437, + "learning_rate": 1.8750124615454645e-05, + "loss": 1.0222, "step": 6565 }, { - "epoch": 0.18632236095346197, + "epoch": 0.18606364589532148, "grad_norm": 0.0, - "learning_rate": 1.87457960213728e-05, - "loss": 1.1493, + "learning_rate": 1.8749680276457137e-05, + "loss": 0.9679, "step": 6566 }, { - "epoch": 0.18635073779795686, + "epoch": 0.18609198333758395, "grad_norm": 0.0, - "learning_rate": 1.8745350340055974e-05, - "loss": 1.032, + "learning_rate": 1.8749235863757822e-05, + "loss": 1.061, "step": 6567 }, { - "epoch": 0.18637911464245177, + "epoch": 0.18612032077984642, "grad_norm": 0.0, - "learning_rate": 1.8744904584866782e-05, - "loss": 0.9407, + "learning_rate": 1.8748791377360443e-05, + "loss": 1.0083, "step": 6568 }, { - "epoch": 0.18640749148694666, + "epoch": 0.18614865822210888, "grad_norm": 0.0, - "learning_rate": 1.8744458755808988e-05, - "loss": 1.0222, + "learning_rate": 1.874834681726875e-05, + "loss": 1.0446, "step": 6569 }, { - "epoch": 0.18643586833144155, + "epoch": 0.18617699566437135, "grad_norm": 0.0, - "learning_rate": 1.8744012852886357e-05, - "loss": 1.001, + "learning_rate": 1.874790218348648e-05, + "loss": 0.9997, "step": 6570 }, { - "epoch": 0.18646424517593643, + "epoch": 0.18620533310663379, "grad_norm": 0.0, - "learning_rate": 1.8743566876102655e-05, - "loss": 1.0004, + "learning_rate": 1.874745747601738e-05, + "loss": 1.0153, "step": 6571 }, { - "epoch": 0.18649262202043132, + "epoch": 0.18623367054889625, "grad_norm": 0.0, - "learning_rate": 1.874312082546165e-05, - "loss": 1.1311, + "learning_rate": 1.8747012694865197e-05, + "loss": 1.0869, "step": 6572 }, { - "epoch": 0.1865209988649262, + "epoch": 0.18626200799115872, "grad_norm": 0.0, - "learning_rate": 1.874267470096711e-05, - "loss": 1.0044, + "learning_rate": 1.874656784003368e-05, + "loss": 0.9807, "step": 6573 }, { - "epoch": 0.18654937570942112, + "epoch": 0.18629034543342118, "grad_norm": 0.0, - "learning_rate": 1.8742228502622807e-05, - "loss": 0.9982, + "learning_rate": 1.8746122911526575e-05, + "loss": 1.0632, "step": 6574 }, { - "epoch": 0.186577752553916, + "epoch": 0.18631868287568365, "grad_norm": 0.0, - "learning_rate": 1.8741782230432503e-05, - "loss": 0.9474, + "learning_rate": 1.8745677909347626e-05, + "loss": 0.9748, "step": 6575 }, { - "epoch": 0.1866061293984109, + "epoch": 0.18634702031794612, "grad_norm": 0.0, - "learning_rate": 1.8741335884399974e-05, - "loss": 1.1313, + "learning_rate": 1.8745232833500585e-05, + "loss": 1.0529, "step": 6576 }, { - "epoch": 0.18663450624290578, + "epoch": 0.18637535776020855, "grad_norm": 0.0, - "learning_rate": 1.8740889464528988e-05, - "loss": 0.977, + "learning_rate": 1.8744787683989203e-05, + "loss": 1.147, "step": 6577 }, { - "epoch": 0.18666288308740067, + "epoch": 0.18640369520247102, "grad_norm": 0.0, - "learning_rate": 1.8740442970823315e-05, - "loss": 1.0845, + "learning_rate": 1.8744342460817227e-05, + "loss": 0.9824, "step": 6578 }, { - "epoch": 0.1866912599318956, + "epoch": 0.1864320326447335, "grad_norm": 0.0, - "learning_rate": 1.8739996403286727e-05, - "loss": 0.9176, + "learning_rate": 1.87438971639884e-05, + "loss": 1.0659, "step": 6579 }, { - "epoch": 0.18671963677639047, + "epoch": 0.18646037008699595, "grad_norm": 0.0, - "learning_rate": 1.8739549761923e-05, - "loss": 0.9128, + "learning_rate": 1.874345179350649e-05, + "loss": 1.0281, "step": 6580 }, { - "epoch": 0.18674801362088536, + "epoch": 0.18648870752925842, "grad_norm": 0.0, - "learning_rate": 1.8739103046735896e-05, - "loss": 0.9819, + "learning_rate": 1.8743006349375236e-05, + "loss": 0.9777, "step": 6581 }, { - "epoch": 0.18677639046538025, + "epoch": 0.18651704497152086, "grad_norm": 0.0, - "learning_rate": 1.8738656257729202e-05, - "loss": 1.0152, + "learning_rate": 1.874256083159839e-05, + "loss": 1.1229, "step": 6582 }, { - "epoch": 0.18680476730987514, + "epoch": 0.18654538241378332, "grad_norm": 0.0, - "learning_rate": 1.8738209394906683e-05, - "loss": 1.0577, + "learning_rate": 1.874211524017971e-05, + "loss": 0.9545, "step": 6583 }, { - "epoch": 0.18683314415437002, + "epoch": 0.1865737198560458, "grad_norm": 0.0, - "learning_rate": 1.8737762458272114e-05, - "loss": 0.9373, + "learning_rate": 1.8741669575122946e-05, + "loss": 1.0627, "step": 6584 }, { - "epoch": 0.18686152099886494, + "epoch": 0.18660205729830825, "grad_norm": 0.0, - "learning_rate": 1.873731544782928e-05, - "loss": 1.1229, + "learning_rate": 1.874122383643185e-05, + "loss": 0.9307, "step": 6585 }, { - "epoch": 0.18688989784335983, + "epoch": 0.18663039474057072, "grad_norm": 0.0, - "learning_rate": 1.8736868363581943e-05, - "loss": 1.0989, + "learning_rate": 1.8740778024110188e-05, + "loss": 0.9878, "step": 6586 }, { - "epoch": 0.1869182746878547, + "epoch": 0.1866587321828332, "grad_norm": 0.0, - "learning_rate": 1.873642120553389e-05, - "loss": 0.9413, + "learning_rate": 1.87403321381617e-05, + "loss": 1.0996, "step": 6587 }, { - "epoch": 0.1869466515323496, + "epoch": 0.18668706962509563, "grad_norm": 0.0, - "learning_rate": 1.8735973973688894e-05, - "loss": 0.9649, + "learning_rate": 1.873988617859015e-05, + "loss": 1.0532, "step": 6588 }, { - "epoch": 0.1869750283768445, + "epoch": 0.1867154070673581, "grad_norm": 0.0, - "learning_rate": 1.8735526668050733e-05, - "loss": 1.0052, + "learning_rate": 1.8739440145399295e-05, + "loss": 0.9225, "step": 6589 }, { - "epoch": 0.18700340522133937, + "epoch": 0.18674374450962056, "grad_norm": 0.0, - "learning_rate": 1.8735079288623182e-05, - "loss": 0.9961, + "learning_rate": 1.873899403859289e-05, + "loss": 1.0802, "step": 6590 }, { - "epoch": 0.1870317820658343, + "epoch": 0.18677208195188302, "grad_norm": 0.0, - "learning_rate": 1.8734631835410025e-05, - "loss": 0.9242, + "learning_rate": 1.8738547858174692e-05, + "loss": 1.0759, "step": 6591 }, { - "epoch": 0.18706015891032918, + "epoch": 0.1868004193941455, "grad_norm": 0.0, - "learning_rate": 1.8734184308415044e-05, - "loss": 1.0549, + "learning_rate": 1.873810160414846e-05, + "loss": 0.9566, "step": 6592 }, { - "epoch": 0.18708853575482406, + "epoch": 0.18682875683640796, "grad_norm": 0.0, - "learning_rate": 1.8733736707642012e-05, - "loss": 1.0238, + "learning_rate": 1.8737655276517953e-05, + "loss": 0.9831, "step": 6593 }, { - "epoch": 0.18711691259931895, + "epoch": 0.1868570942786704, "grad_norm": 0.0, - "learning_rate": 1.8733289033094715e-05, - "loss": 0.9416, + "learning_rate": 1.8737208875286933e-05, + "loss": 0.9327, "step": 6594 }, { - "epoch": 0.18714528944381384, + "epoch": 0.18688543172093286, "grad_norm": 0.0, - "learning_rate": 1.8732841284776932e-05, - "loss": 1.0197, + "learning_rate": 1.8736762400459155e-05, + "loss": 1.0853, "step": 6595 }, { - "epoch": 0.18717366628830873, + "epoch": 0.18691376916319533, "grad_norm": 0.0, - "learning_rate": 1.8732393462692447e-05, - "loss": 1.0037, + "learning_rate": 1.8736315852038387e-05, + "loss": 1.0217, "step": 6596 }, { - "epoch": 0.18720204313280364, + "epoch": 0.1869421066054578, "grad_norm": 0.0, - "learning_rate": 1.8731945566845042e-05, - "loss": 1.0752, + "learning_rate": 1.873586923002838e-05, + "loss": 1.0776, "step": 6597 }, { - "epoch": 0.18723041997729853, + "epoch": 0.18697044404772026, "grad_norm": 0.0, - "learning_rate": 1.87314975972385e-05, - "loss": 1.0817, + "learning_rate": 1.873542253443291e-05, + "loss": 1.14, "step": 6598 }, { - "epoch": 0.18725879682179342, + "epoch": 0.18699878148998272, "grad_norm": 0.0, - "learning_rate": 1.8731049553876604e-05, - "loss": 1.0675, + "learning_rate": 1.8734975765255732e-05, + "loss": 1.0735, "step": 6599 }, { - "epoch": 0.1872871736662883, + "epoch": 0.18702711893224516, "grad_norm": 0.0, - "learning_rate": 1.873060143676314e-05, - "loss": 1.0848, + "learning_rate": 1.8734528922500606e-05, + "loss": 0.9486, "step": 6600 }, { - "epoch": 0.1873155505107832, + "epoch": 0.18705545637450763, "grad_norm": 0.0, - "learning_rate": 1.8730153245901895e-05, - "loss": 0.9826, + "learning_rate": 1.87340820061713e-05, + "loss": 0.8844, "step": 6601 }, { - "epoch": 0.1873439273552781, + "epoch": 0.1870837938167701, "grad_norm": 0.0, - "learning_rate": 1.8729704981296654e-05, - "loss": 1.0535, + "learning_rate": 1.8733635016271576e-05, + "loss": 1.0602, "step": 6602 }, { - "epoch": 0.187372304199773, + "epoch": 0.18711213125903256, "grad_norm": 0.0, - "learning_rate": 1.87292566429512e-05, - "loss": 1.0521, + "learning_rate": 1.8733187952805204e-05, + "loss": 1.0258, "step": 6603 }, { - "epoch": 0.18740068104426788, + "epoch": 0.18714046870129503, "grad_norm": 0.0, - "learning_rate": 1.872880823086932e-05, - "loss": 1.0164, + "learning_rate": 1.8732740815775945e-05, + "loss": 1.0905, "step": 6604 }, { - "epoch": 0.18742905788876277, + "epoch": 0.1871688061435575, "grad_norm": 0.0, - "learning_rate": 1.8728359745054808e-05, - "loss": 1.0288, + "learning_rate": 1.873229360518757e-05, + "loss": 0.9792, "step": 6605 }, { - "epoch": 0.18745743473325766, + "epoch": 0.18719714358581993, "grad_norm": 0.0, - "learning_rate": 1.8727911185511448e-05, - "loss": 0.9577, + "learning_rate": 1.8731846321043842e-05, + "loss": 0.9874, "step": 6606 }, { - "epoch": 0.18748581157775254, + "epoch": 0.1872254810280824, "grad_norm": 0.0, - "learning_rate": 1.8727462552243034e-05, - "loss": 1.0123, + "learning_rate": 1.8731398963348527e-05, + "loss": 0.9756, "step": 6607 }, { - "epoch": 0.18751418842224746, + "epoch": 0.18725381847034486, "grad_norm": 0.0, - "learning_rate": 1.8727013845253346e-05, - "loss": 0.9388, + "learning_rate": 1.8730951532105403e-05, + "loss": 1.0274, "step": 6608 }, { - "epoch": 0.18754256526674234, + "epoch": 0.18728215591260733, "grad_norm": 0.0, - "learning_rate": 1.8726565064546183e-05, - "loss": 0.8242, + "learning_rate": 1.8730504027318223e-05, + "loss": 1.0139, "step": 6609 }, { - "epoch": 0.18757094211123723, + "epoch": 0.1873104933548698, "grad_norm": 0.0, - "learning_rate": 1.872611621012533e-05, - "loss": 1.0027, + "learning_rate": 1.873005644899077e-05, + "loss": 0.9696, "step": 6610 }, { - "epoch": 0.18759931895573212, + "epoch": 0.18733883079713226, "grad_norm": 0.0, - "learning_rate": 1.8725667281994584e-05, - "loss": 0.9956, + "learning_rate": 1.8729608797126813e-05, + "loss": 1.0735, "step": 6611 }, { - "epoch": 0.187627695800227, + "epoch": 0.1873671682393947, "grad_norm": 0.0, - "learning_rate": 1.8725218280157734e-05, - "loss": 1.0591, + "learning_rate": 1.8729161071730115e-05, + "loss": 1.0267, "step": 6612 }, { - "epoch": 0.1876560726447219, + "epoch": 0.18739550568165717, "grad_norm": 0.0, - "learning_rate": 1.872476920461857e-05, - "loss": 1.114, + "learning_rate": 1.8728713272804455e-05, + "loss": 0.9765, "step": 6613 }, { - "epoch": 0.1876844494892168, + "epoch": 0.18742384312391963, "grad_norm": 0.0, - "learning_rate": 1.8724320055380892e-05, - "loss": 0.9719, + "learning_rate": 1.87282654003536e-05, + "loss": 1.0523, "step": 6614 }, { - "epoch": 0.1877128263337117, + "epoch": 0.1874521805661821, "grad_norm": 0.0, - "learning_rate": 1.8723870832448487e-05, - "loss": 0.9687, + "learning_rate": 1.8727817454381325e-05, + "loss": 1.066, "step": 6615 }, { - "epoch": 0.18774120317820658, + "epoch": 0.18748051800844456, "grad_norm": 0.0, - "learning_rate": 1.8723421535825153e-05, - "loss": 1.0443, + "learning_rate": 1.87273694348914e-05, + "loss": 1.0497, "step": 6616 }, { - "epoch": 0.18776958002270147, + "epoch": 0.18750885545070703, "grad_norm": 0.0, - "learning_rate": 1.872297216551469e-05, - "loss": 0.9225, + "learning_rate": 1.8726921341887606e-05, + "loss": 1.0209, "step": 6617 }, { - "epoch": 0.18779795686719636, + "epoch": 0.18753719289296947, "grad_norm": 0.0, - "learning_rate": 1.8722522721520885e-05, - "loss": 1.0028, + "learning_rate": 1.872647317537371e-05, + "loss": 0.9639, "step": 6618 }, { - "epoch": 0.18782633371169127, + "epoch": 0.18756553033523193, "grad_norm": 0.0, - "learning_rate": 1.872207320384754e-05, - "loss": 1.0437, + "learning_rate": 1.872602493535349e-05, + "loss": 1.064, "step": 6619 }, { - "epoch": 0.18785471055618616, + "epoch": 0.1875938677774944, "grad_norm": 0.0, - "learning_rate": 1.872162361249845e-05, - "loss": 0.9514, + "learning_rate": 1.8725576621830722e-05, + "loss": 1.0962, "step": 6620 }, { - "epoch": 0.18788308740068105, + "epoch": 0.18762220521975687, "grad_norm": 0.0, - "learning_rate": 1.872117394747741e-05, - "loss": 0.9931, + "learning_rate": 1.8725128234809183e-05, + "loss": 1.0491, "step": 6621 }, { - "epoch": 0.18791146424517594, + "epoch": 0.18765054266201933, "grad_norm": 0.0, - "learning_rate": 1.8720724208788225e-05, - "loss": 1.0286, + "learning_rate": 1.8724679774292648e-05, + "loss": 0.9122, "step": 6622 }, { - "epoch": 0.18793984108967082, + "epoch": 0.1876788801042818, "grad_norm": 0.0, - "learning_rate": 1.872027439643469e-05, - "loss": 1.0099, + "learning_rate": 1.8724231240284895e-05, + "loss": 0.9707, "step": 6623 }, { - "epoch": 0.1879682179341657, + "epoch": 0.18770721754654424, "grad_norm": 0.0, - "learning_rate": 1.8719824510420606e-05, - "loss": 1.104, + "learning_rate": 1.87237826327897e-05, + "loss": 0.9658, "step": 6624 }, { - "epoch": 0.18799659477866063, + "epoch": 0.1877355549888067, "grad_norm": 0.0, - "learning_rate": 1.8719374550749768e-05, - "loss": 0.9702, + "learning_rate": 1.872333395181085e-05, + "loss": 1.0884, "step": 6625 }, { - "epoch": 0.1880249716231555, + "epoch": 0.18776389243106917, "grad_norm": 0.0, - "learning_rate": 1.8718924517425987e-05, - "loss": 1.0352, + "learning_rate": 1.8722885197352113e-05, + "loss": 0.9359, "step": 6626 }, { - "epoch": 0.1880533484676504, + "epoch": 0.18779222987333163, "grad_norm": 0.0, - "learning_rate": 1.8718474410453056e-05, - "loss": 0.994, + "learning_rate": 1.872243636941728e-05, + "loss": 0.9868, "step": 6627 }, { - "epoch": 0.1880817253121453, + "epoch": 0.1878205673155941, "grad_norm": 0.0, - "learning_rate": 1.871802422983478e-05, - "loss": 1.0038, + "learning_rate": 1.8721987468010124e-05, + "loss": 1.0947, "step": 6628 }, { - "epoch": 0.18811010215664017, + "epoch": 0.18784890475785657, "grad_norm": 0.0, - "learning_rate": 1.8717573975574962e-05, - "loss": 0.994, + "learning_rate": 1.8721538493134428e-05, + "loss": 1.0593, "step": 6629 }, { - "epoch": 0.18813847900113506, + "epoch": 0.187877242200119, "grad_norm": 0.0, - "learning_rate": 1.8717123647677404e-05, - "loss": 0.9963, + "learning_rate": 1.8721089444793978e-05, + "loss": 0.9029, "step": 6630 }, { - "epoch": 0.18816685584562998, + "epoch": 0.18790557964238147, "grad_norm": 0.0, - "learning_rate": 1.871667324614591e-05, - "loss": 1.0499, + "learning_rate": 1.872064032299255e-05, + "loss": 1.0421, "step": 6631 }, { - "epoch": 0.18819523269012486, + "epoch": 0.18793391708464394, "grad_norm": 0.0, - "learning_rate": 1.871622277098429e-05, - "loss": 0.9779, + "learning_rate": 1.872019112773393e-05, + "loss": 1.032, "step": 6632 }, { - "epoch": 0.18822360953461975, + "epoch": 0.1879622545269064, "grad_norm": 0.0, - "learning_rate": 1.8715772222196337e-05, - "loss": 0.9854, + "learning_rate": 1.8719741859021904e-05, + "loss": 1.0171, "step": 6633 }, { - "epoch": 0.18825198637911464, + "epoch": 0.18799059196916887, "grad_norm": 0.0, - "learning_rate": 1.871532159978587e-05, - "loss": 1.1144, + "learning_rate": 1.8719292516860253e-05, + "loss": 1.041, "step": 6634 }, { - "epoch": 0.18828036322360953, + "epoch": 0.18801892941143133, "grad_norm": 0.0, - "learning_rate": 1.8714870903756684e-05, - "loss": 0.8969, + "learning_rate": 1.8718843101252765e-05, + "loss": 0.9915, "step": 6635 }, { - "epoch": 0.1883087400681044, + "epoch": 0.18804726685369377, "grad_norm": 0.0, - "learning_rate": 1.87144201341126e-05, - "loss": 1.0641, + "learning_rate": 1.871839361220322e-05, + "loss": 0.9926, "step": 6636 }, { - "epoch": 0.18833711691259933, + "epoch": 0.18807560429595624, "grad_norm": 0.0, - "learning_rate": 1.8713969290857412e-05, - "loss": 1.0877, + "learning_rate": 1.8717944049715412e-05, + "loss": 1.1075, "step": 6637 }, { - "epoch": 0.18836549375709422, + "epoch": 0.1881039417382187, "grad_norm": 0.0, - "learning_rate": 1.8713518373994932e-05, - "loss": 1.0525, + "learning_rate": 1.8717494413793123e-05, + "loss": 1.0602, "step": 6638 }, { - "epoch": 0.1883938706015891, + "epoch": 0.18813227918048117, "grad_norm": 0.0, - "learning_rate": 1.8713067383528975e-05, - "loss": 0.9858, + "learning_rate": 1.871704470444014e-05, + "loss": 1.0309, "step": 6639 }, { - "epoch": 0.188422247446084, + "epoch": 0.18816061662274364, "grad_norm": 0.0, - "learning_rate": 1.8712616319463343e-05, - "loss": 1.0497, + "learning_rate": 1.8716594921660253e-05, + "loss": 1.0683, "step": 6640 }, { - "epoch": 0.18845062429057888, + "epoch": 0.1881889540650061, "grad_norm": 0.0, - "learning_rate": 1.871216518180185e-05, - "loss": 1.0317, + "learning_rate": 1.871614506545725e-05, + "loss": 1.03, "step": 6641 }, { - "epoch": 0.1884790011350738, + "epoch": 0.18821729150726854, "grad_norm": 0.0, - "learning_rate": 1.871171397054831e-05, - "loss": 0.9879, + "learning_rate": 1.871569513583492e-05, + "loss": 0.9185, "step": 6642 }, { - "epoch": 0.18850737797956868, + "epoch": 0.188245628949531, "grad_norm": 0.0, - "learning_rate": 1.8711262685706525e-05, - "loss": 1.0005, + "learning_rate": 1.8715245132797056e-05, + "loss": 1.0316, "step": 6643 }, { - "epoch": 0.18853575482406357, + "epoch": 0.18827396639179347, "grad_norm": 0.0, - "learning_rate": 1.8710811327280314e-05, - "loss": 0.9255, + "learning_rate": 1.8714795056347443e-05, + "loss": 1.0402, "step": 6644 }, { - "epoch": 0.18856413166855845, + "epoch": 0.18830230383405594, "grad_norm": 0.0, - "learning_rate": 1.8710359895273487e-05, - "loss": 1.0396, + "learning_rate": 1.8714344906489878e-05, + "loss": 1.0761, "step": 6645 }, { - "epoch": 0.18859250851305334, + "epoch": 0.1883306412763184, "grad_norm": 0.0, - "learning_rate": 1.870990838968986e-05, - "loss": 1.0442, + "learning_rate": 1.8713894683228147e-05, + "loss": 1.0263, "step": 6646 }, { - "epoch": 0.18862088535754823, + "epoch": 0.18835897871858087, "grad_norm": 0.0, - "learning_rate": 1.8709456810533248e-05, - "loss": 0.8489, + "learning_rate": 1.8713444386566047e-05, + "loss": 0.9859, "step": 6647 }, { - "epoch": 0.18864926220204314, + "epoch": 0.1883873161608433, "grad_norm": 0.0, - "learning_rate": 1.870900515780746e-05, - "loss": 1.0615, + "learning_rate": 1.871299401650737e-05, + "loss": 1.0433, "step": 6648 }, { - "epoch": 0.18867763904653803, + "epoch": 0.18841565360310578, "grad_norm": 0.0, - "learning_rate": 1.870855343151631e-05, - "loss": 1.0364, + "learning_rate": 1.8712543573055907e-05, + "loss": 0.9197, "step": 6649 }, { - "epoch": 0.18870601589103292, + "epoch": 0.18844399104536824, "grad_norm": 0.0, - "learning_rate": 1.8708101631663623e-05, - "loss": 0.9388, + "learning_rate": 1.8712093056215453e-05, + "loss": 0.956, "step": 6650 }, { - "epoch": 0.1887343927355278, + "epoch": 0.1884723284876307, "grad_norm": 0.0, - "learning_rate": 1.870764975825321e-05, - "loss": 1.0686, + "learning_rate": 1.8711642465989806e-05, + "loss": 0.9458, "step": 6651 }, { - "epoch": 0.1887627695800227, + "epoch": 0.18850066592989317, "grad_norm": 0.0, - "learning_rate": 1.8707197811288887e-05, - "loss": 1.065, + "learning_rate": 1.871119180238276e-05, + "loss": 0.9609, "step": 6652 }, { - "epoch": 0.18879114642451758, + "epoch": 0.18852900337215564, "grad_norm": 0.0, - "learning_rate": 1.870674579077447e-05, - "loss": 0.9559, + "learning_rate": 1.8710741065398112e-05, + "loss": 1.0028, "step": 6653 }, { - "epoch": 0.1888195232690125, + "epoch": 0.18855734081441808, "grad_norm": 0.0, - "learning_rate": 1.8706293696713783e-05, - "loss": 1.0455, + "learning_rate": 1.8710290255039654e-05, + "loss": 1.0254, "step": 6654 }, { - "epoch": 0.18884790011350738, + "epoch": 0.18858567825668054, "grad_norm": 0.0, - "learning_rate": 1.8705841529110642e-05, - "loss": 0.9985, + "learning_rate": 1.870983937131119e-05, + "loss": 1.1281, "step": 6655 }, { - "epoch": 0.18887627695800227, + "epoch": 0.188614015698943, "grad_norm": 0.0, - "learning_rate": 1.8705389287968866e-05, - "loss": 0.9385, + "learning_rate": 1.870938841421651e-05, + "loss": 1.0001, "step": 6656 }, { - "epoch": 0.18890465380249716, + "epoch": 0.18864235314120548, "grad_norm": 0.0, - "learning_rate": 1.8704936973292274e-05, - "loss": 0.9847, + "learning_rate": 1.870893738375942e-05, + "loss": 1.0669, "step": 6657 }, { - "epoch": 0.18893303064699205, + "epoch": 0.18867069058346794, "grad_norm": 0.0, - "learning_rate": 1.8704484585084688e-05, - "loss": 1.0077, + "learning_rate": 1.870848627994372e-05, + "loss": 0.9194, "step": 6658 }, { - "epoch": 0.18896140749148696, + "epoch": 0.1886990280257304, "grad_norm": 0.0, - "learning_rate": 1.8704032123349932e-05, - "loss": 0.991, + "learning_rate": 1.8708035102773198e-05, + "loss": 1.0083, "step": 6659 }, { - "epoch": 0.18898978433598185, + "epoch": 0.18872736546799285, "grad_norm": 0.0, - "learning_rate": 1.8703579588091822e-05, - "loss": 1.1086, + "learning_rate": 1.870758385225167e-05, + "loss": 1.0823, "step": 6660 }, { - "epoch": 0.18901816118047673, + "epoch": 0.1887557029102553, "grad_norm": 0.0, - "learning_rate": 1.870312697931419e-05, - "loss": 1.2551, + "learning_rate": 1.8707132528382927e-05, + "loss": 0.9995, "step": 6661 }, { - "epoch": 0.18904653802497162, + "epoch": 0.18878404035251778, "grad_norm": 0.0, - "learning_rate": 1.8702674297020843e-05, - "loss": 1.0474, + "learning_rate": 1.8706681131170773e-05, + "loss": 0.8715, "step": 6662 }, { - "epoch": 0.1890749148694665, + "epoch": 0.18881237779478025, "grad_norm": 0.0, - "learning_rate": 1.8702221541215624e-05, - "loss": 0.9999, + "learning_rate": 1.870622966061901e-05, + "loss": 0.9914, "step": 6663 }, { - "epoch": 0.1891032917139614, + "epoch": 0.1888407152370427, "grad_norm": 0.0, - "learning_rate": 1.8701768711902342e-05, - "loss": 1.097, + "learning_rate": 1.8705778116731443e-05, + "loss": 0.9873, "step": 6664 }, { - "epoch": 0.1891316685584563, + "epoch": 0.18886905267930518, "grad_norm": 0.0, - "learning_rate": 1.8701315809084835e-05, - "loss": 0.9676, + "learning_rate": 1.8705326499511873e-05, + "loss": 1.0146, "step": 6665 }, { - "epoch": 0.1891600454029512, + "epoch": 0.18889739012156762, "grad_norm": 0.0, - "learning_rate": 1.870086283276692e-05, - "loss": 0.9745, + "learning_rate": 1.8704874808964105e-05, + "loss": 0.9007, "step": 6666 }, { - "epoch": 0.1891884222474461, + "epoch": 0.18892572756383008, "grad_norm": 0.0, - "learning_rate": 1.870040978295242e-05, - "loss": 1.0901, + "learning_rate": 1.8704423045091948e-05, + "loss": 0.9739, "step": 6667 }, { - "epoch": 0.18921679909194097, + "epoch": 0.18895406500609255, "grad_norm": 0.0, - "learning_rate": 1.8699956659645173e-05, - "loss": 1.0785, + "learning_rate": 1.87039712078992e-05, + "loss": 0.9584, "step": 6668 }, { - "epoch": 0.18924517593643586, + "epoch": 0.188982402448355, "grad_norm": 0.0, - "learning_rate": 1.8699503462849002e-05, - "loss": 0.776, + "learning_rate": 1.870351929738967e-05, + "loss": 0.9591, "step": 6669 }, { - "epoch": 0.18927355278093075, + "epoch": 0.18901073989061748, "grad_norm": 0.0, - "learning_rate": 1.869905019256773e-05, - "loss": 0.9168, + "learning_rate": 1.8703067313567166e-05, + "loss": 0.9989, "step": 6670 }, { - "epoch": 0.18930192962542566, + "epoch": 0.18903907733287995, "grad_norm": 0.0, - "learning_rate": 1.8698596848805194e-05, - "loss": 0.9562, + "learning_rate": 1.8702615256435495e-05, + "loss": 0.9837, "step": 6671 }, { - "epoch": 0.18933030646992055, + "epoch": 0.18906741477514238, "grad_norm": 0.0, - "learning_rate": 1.8698143431565215e-05, - "loss": 1.0419, + "learning_rate": 1.8702163125998462e-05, + "loss": 0.9364, "step": 6672 }, { - "epoch": 0.18935868331441544, + "epoch": 0.18909575221740485, "grad_norm": 0.0, - "learning_rate": 1.8697689940851633e-05, - "loss": 0.9139, + "learning_rate": 1.870171092225988e-05, + "loss": 0.9362, "step": 6673 }, { - "epoch": 0.18938706015891033, + "epoch": 0.18912408965966732, "grad_norm": 0.0, - "learning_rate": 1.869723637666827e-05, - "loss": 1.0325, + "learning_rate": 1.8701258645223555e-05, + "loss": 1.0687, "step": 6674 }, { - "epoch": 0.1894154370034052, + "epoch": 0.18915242710192978, "grad_norm": 0.0, - "learning_rate": 1.8696782739018958e-05, - "loss": 1.057, + "learning_rate": 1.8700806294893296e-05, + "loss": 1.0142, "step": 6675 }, { - "epoch": 0.1894438138479001, + "epoch": 0.18918076454419225, "grad_norm": 0.0, - "learning_rate": 1.8696329027907535e-05, - "loss": 1.0143, + "learning_rate": 1.8700353871272915e-05, + "loss": 0.9825, "step": 6676 }, { - "epoch": 0.18947219069239501, + "epoch": 0.18920910198645471, "grad_norm": 0.0, - "learning_rate": 1.8695875243337827e-05, - "loss": 1.001, + "learning_rate": 1.8699901374366224e-05, + "loss": 1.0449, "step": 6677 }, { - "epoch": 0.1895005675368899, + "epoch": 0.18923743942871715, "grad_norm": 0.0, - "learning_rate": 1.8695421385313673e-05, - "loss": 0.9438, + "learning_rate": 1.8699448804177028e-05, + "loss": 1.0262, "step": 6678 }, { - "epoch": 0.1895289443813848, + "epoch": 0.18926577687097962, "grad_norm": 0.0, - "learning_rate": 1.86949674538389e-05, - "loss": 1.0173, + "learning_rate": 1.8698996160709147e-05, + "loss": 1.0977, "step": 6679 }, { - "epoch": 0.18955732122587968, + "epoch": 0.18929411431324208, "grad_norm": 0.0, - "learning_rate": 1.869451344891735e-05, - "loss": 1.1224, + "learning_rate": 1.8698543443966395e-05, + "loss": 0.9293, "step": 6680 }, { - "epoch": 0.18958569807037456, + "epoch": 0.18932245175550455, "grad_norm": 0.0, - "learning_rate": 1.869405937055285e-05, - "loss": 1.0324, + "learning_rate": 1.869809065395258e-05, + "loss": 1.0205, "step": 6681 }, { - "epoch": 0.18961407491486948, + "epoch": 0.18935078919776702, "grad_norm": 0.0, - "learning_rate": 1.869360521874924e-05, - "loss": 1.0217, + "learning_rate": 1.8697637790671514e-05, + "loss": 1.0232, "step": 6682 }, { - "epoch": 0.18964245175936437, + "epoch": 0.18937912664002948, "grad_norm": 0.0, - "learning_rate": 1.869315099351036e-05, - "loss": 0.9627, + "learning_rate": 1.8697184854127015e-05, + "loss": 0.9609, "step": 6683 }, { - "epoch": 0.18967082860385925, + "epoch": 0.18940746408229192, "grad_norm": 0.0, - "learning_rate": 1.869269669484004e-05, - "loss": 0.9624, + "learning_rate": 1.86967318443229e-05, + "loss": 1.0215, "step": 6684 }, { - "epoch": 0.18969920544835414, + "epoch": 0.1894358015245544, "grad_norm": 0.0, - "learning_rate": 1.869224232274212e-05, - "loss": 0.9735, + "learning_rate": 1.8696278761262986e-05, + "loss": 1.0681, "step": 6685 }, { - "epoch": 0.18972758229284903, + "epoch": 0.18946413896681685, "grad_norm": 0.0, - "learning_rate": 1.869178787722044e-05, - "loss": 0.9991, + "learning_rate": 1.8695825604951083e-05, + "loss": 1.1314, "step": 6686 }, { - "epoch": 0.18975595913734392, + "epoch": 0.18949247640907932, "grad_norm": 0.0, - "learning_rate": 1.8691333358278835e-05, - "loss": 1.0114, + "learning_rate": 1.8695372375391013e-05, + "loss": 0.9563, "step": 6687 }, { - "epoch": 0.18978433598183883, + "epoch": 0.18952081385134179, "grad_norm": 0.0, - "learning_rate": 1.869087876592115e-05, - "loss": 0.9047, + "learning_rate": 1.8694919072586595e-05, + "loss": 1.076, "step": 6688 }, { - "epoch": 0.18981271282633372, + "epoch": 0.18954915129360425, "grad_norm": 0.0, - "learning_rate": 1.869042410015122e-05, - "loss": 0.9004, + "learning_rate": 1.8694465696541643e-05, + "loss": 0.9604, "step": 6689 }, { - "epoch": 0.1898410896708286, + "epoch": 0.1895774887358667, "grad_norm": 0.0, - "learning_rate": 1.8689969360972883e-05, - "loss": 0.9784, + "learning_rate": 1.8694012247259977e-05, + "loss": 0.9033, "step": 6690 }, { - "epoch": 0.1898694665153235, + "epoch": 0.18960582617812916, "grad_norm": 0.0, - "learning_rate": 1.8689514548389987e-05, - "loss": 0.9589, + "learning_rate": 1.8693558724745417e-05, + "loss": 1.0011, "step": 6691 }, { - "epoch": 0.18989784335981838, + "epoch": 0.18963416362039162, "grad_norm": 0.0, - "learning_rate": 1.8689059662406373e-05, - "loss": 0.8813, + "learning_rate": 1.8693105129001786e-05, + "loss": 1.0041, "step": 6692 }, { - "epoch": 0.18992622020431327, + "epoch": 0.1896625010626541, "grad_norm": 0.0, - "learning_rate": 1.8688604703025878e-05, - "loss": 1.0809, + "learning_rate": 1.86926514600329e-05, + "loss": 1.0047, "step": 6693 }, { - "epoch": 0.18995459704880818, + "epoch": 0.18969083850491655, "grad_norm": 0.0, - "learning_rate": 1.8688149670252352e-05, - "loss": 0.9464, + "learning_rate": 1.8692197717842583e-05, + "loss": 1.0811, "step": 6694 }, { - "epoch": 0.18998297389330307, + "epoch": 0.18971917594717902, "grad_norm": 0.0, - "learning_rate": 1.8687694564089632e-05, - "loss": 0.982, + "learning_rate": 1.869174390243466e-05, + "loss": 0.9352, "step": 6695 }, { - "epoch": 0.19001135073779796, + "epoch": 0.18974751338944146, "grad_norm": 0.0, - "learning_rate": 1.8687239384541563e-05, - "loss": 1.07, + "learning_rate": 1.8691290013812943e-05, + "loss": 1.0889, "step": 6696 }, { - "epoch": 0.19003972758229284, + "epoch": 0.18977585083170392, "grad_norm": 0.0, - "learning_rate": 1.8686784131611998e-05, - "loss": 1.0825, + "learning_rate": 1.869083605198127e-05, + "loss": 1.0464, "step": 6697 }, { - "epoch": 0.19006810442678773, + "epoch": 0.1898041882739664, "grad_norm": 0.0, - "learning_rate": 1.8686328805304775e-05, - "loss": 1.1033, + "learning_rate": 1.8690382016943455e-05, + "loss": 0.9091, "step": 6698 }, { - "epoch": 0.19009648127128265, + "epoch": 0.18983252571622886, "grad_norm": 0.0, - "learning_rate": 1.8685873405623742e-05, - "loss": 0.9903, + "learning_rate": 1.8689927908703325e-05, + "loss": 1.0464, "step": 6699 }, { - "epoch": 0.19012485811577753, + "epoch": 0.18986086315849132, "grad_norm": 0.0, - "learning_rate": 1.8685417932572747e-05, - "loss": 1.0054, + "learning_rate": 1.8689473727264705e-05, + "loss": 1.0135, "step": 6700 }, { - "epoch": 0.19015323496027242, + "epoch": 0.1898892006007538, "grad_norm": 0.0, - "learning_rate": 1.8684962386155633e-05, - "loss": 1.0027, + "learning_rate": 1.8689019472631424e-05, + "loss": 1.1419, "step": 6701 }, { - "epoch": 0.1901816118047673, + "epoch": 0.18991753804301623, "grad_norm": 0.0, - "learning_rate": 1.8684506766376254e-05, - "loss": 1.0344, + "learning_rate": 1.8688565144807303e-05, + "loss": 0.9715, "step": 6702 }, { - "epoch": 0.1902099886492622, + "epoch": 0.1899458754852787, "grad_norm": 0.0, - "learning_rate": 1.8684051073238455e-05, - "loss": 1.0887, + "learning_rate": 1.8688110743796174e-05, + "loss": 1.1027, "step": 6703 }, { - "epoch": 0.19023836549375708, + "epoch": 0.18997421292754116, "grad_norm": 0.0, - "learning_rate": 1.868359530674609e-05, - "loss": 1.0029, + "learning_rate": 1.8687656269601856e-05, + "loss": 0.9724, "step": 6704 }, { - "epoch": 0.190266742338252, + "epoch": 0.19000255036980362, "grad_norm": 0.0, - "learning_rate": 1.8683139466903e-05, - "loss": 0.998, + "learning_rate": 1.8687201722228188e-05, + "loss": 1.1088, "step": 6705 }, { - "epoch": 0.19029511918274689, + "epoch": 0.1900308878120661, "grad_norm": 0.0, - "learning_rate": 1.868268355371304e-05, - "loss": 0.9958, + "learning_rate": 1.8686747101678994e-05, + "loss": 0.9497, "step": 6706 }, { - "epoch": 0.19032349602724177, + "epoch": 0.19005922525432856, "grad_norm": 0.0, - "learning_rate": 1.8682227567180064e-05, - "loss": 0.9843, + "learning_rate": 1.86862924079581e-05, + "loss": 1.1106, "step": 6707 }, { - "epoch": 0.19035187287173666, + "epoch": 0.190087562696591, "grad_norm": 0.0, - "learning_rate": 1.8681771507307922e-05, - "loss": 0.9983, + "learning_rate": 1.8685837641069342e-05, + "loss": 1.002, "step": 6708 }, { - "epoch": 0.19038024971623155, + "epoch": 0.19011590013885346, "grad_norm": 0.0, - "learning_rate": 1.8681315374100468e-05, - "loss": 1.0114, + "learning_rate": 1.868538280101655e-05, + "loss": 1.0854, "step": 6709 }, { - "epoch": 0.19040862656072643, + "epoch": 0.19014423758111593, "grad_norm": 0.0, - "learning_rate": 1.868085916756155e-05, - "loss": 1.0055, + "learning_rate": 1.868492788780355e-05, + "loss": 1.0592, "step": 6710 }, { - "epoch": 0.19043700340522135, + "epoch": 0.1901725750233784, "grad_norm": 0.0, - "learning_rate": 1.8680402887695025e-05, - "loss": 0.9542, + "learning_rate": 1.8684472901434178e-05, + "loss": 0.9075, "step": 6711 }, { - "epoch": 0.19046538024971624, + "epoch": 0.19020091246564086, "grad_norm": 0.0, - "learning_rate": 1.8679946534504742e-05, - "loss": 1.0431, + "learning_rate": 1.8684017841912266e-05, + "loss": 1.0649, "step": 6712 }, { - "epoch": 0.19049375709421112, + "epoch": 0.19022924990790333, "grad_norm": 0.0, - "learning_rate": 1.8679490107994565e-05, - "loss": 1.0338, + "learning_rate": 1.868356270924165e-05, + "loss": 1.0265, "step": 6713 }, { - "epoch": 0.190522133938706, + "epoch": 0.19025758735016576, "grad_norm": 0.0, - "learning_rate": 1.8679033608168344e-05, - "loss": 1.0225, + "learning_rate": 1.8683107503426158e-05, + "loss": 1.1414, "step": 6714 }, { - "epoch": 0.1905505107832009, + "epoch": 0.19028592479242823, "grad_norm": 0.0, - "learning_rate": 1.867857703502994e-05, - "loss": 1.0086, + "learning_rate": 1.8682652224469625e-05, + "loss": 0.9534, "step": 6715 }, { - "epoch": 0.1905788876276958, + "epoch": 0.1903142622346907, "grad_norm": 0.0, - "learning_rate": 1.86781203885832e-05, - "loss": 1.0062, + "learning_rate": 1.868219687237589e-05, + "loss": 1.1245, "step": 6716 }, { - "epoch": 0.1906072644721907, + "epoch": 0.19034259967695316, "grad_norm": 0.0, - "learning_rate": 1.867766366883199e-05, - "loss": 0.9231, + "learning_rate": 1.8681741447148788e-05, + "loss": 0.9369, "step": 6717 }, { - "epoch": 0.1906356413166856, + "epoch": 0.19037093711921563, "grad_norm": 0.0, - "learning_rate": 1.8677206875780162e-05, - "loss": 1.0038, + "learning_rate": 1.8681285948792154e-05, + "loss": 0.9283, "step": 6718 }, { - "epoch": 0.19066401816118048, + "epoch": 0.1903992745614781, "grad_norm": 0.0, - "learning_rate": 1.867675000943158e-05, - "loss": 0.8687, + "learning_rate": 1.868083037730982e-05, + "loss": 1.0712, "step": 6719 }, { - "epoch": 0.19069239500567536, + "epoch": 0.19042761200374053, "grad_norm": 0.0, - "learning_rate": 1.8676293069790097e-05, - "loss": 0.9626, + "learning_rate": 1.8680374732705634e-05, + "loss": 1.1053, "step": 6720 }, { - "epoch": 0.19072077185017025, + "epoch": 0.190455949446003, "grad_norm": 0.0, - "learning_rate": 1.867583605685958e-05, - "loss": 0.9021, + "learning_rate": 1.867991901498343e-05, + "loss": 1.0451, "step": 6721 }, { - "epoch": 0.19074914869466517, + "epoch": 0.19048428688826546, "grad_norm": 0.0, - "learning_rate": 1.8675378970643888e-05, - "loss": 1.0056, + "learning_rate": 1.8679463224147037e-05, + "loss": 1.0748, "step": 6722 }, { - "epoch": 0.19077752553916005, + "epoch": 0.19051262433052793, "grad_norm": 0.0, - "learning_rate": 1.867492181114688e-05, - "loss": 0.8785, + "learning_rate": 1.8679007360200304e-05, + "loss": 1.0131, "step": 6723 }, { - "epoch": 0.19080590238365494, + "epoch": 0.1905409617727904, "grad_norm": 0.0, - "learning_rate": 1.867446457837241e-05, - "loss": 1.0178, + "learning_rate": 1.8678551423147073e-05, + "loss": 0.9774, "step": 6724 }, { - "epoch": 0.19083427922814983, + "epoch": 0.19056929921505286, "grad_norm": 0.0, - "learning_rate": 1.8674007272324352e-05, - "loss": 1.0621, + "learning_rate": 1.867809541299118e-05, + "loss": 0.9775, "step": 6725 }, { - "epoch": 0.19086265607264472, + "epoch": 0.1905976366573153, "grad_norm": 0.0, - "learning_rate": 1.8673549893006566e-05, - "loss": 1.0023, + "learning_rate": 1.8677639329736464e-05, + "loss": 0.929, "step": 6726 }, { - "epoch": 0.1908910329171396, + "epoch": 0.19062597409957777, "grad_norm": 0.0, - "learning_rate": 1.8673092440422916e-05, - "loss": 1.0432, + "learning_rate": 1.867718317338677e-05, + "loss": 0.9494, "step": 6727 }, { - "epoch": 0.19091940976163452, + "epoch": 0.19065431154184023, "grad_norm": 0.0, - "learning_rate": 1.867263491457726e-05, - "loss": 0.9655, + "learning_rate": 1.8676726943945945e-05, + "loss": 1.0006, "step": 6728 }, { - "epoch": 0.1909477866061294, + "epoch": 0.1906826489841027, "grad_norm": 0.0, - "learning_rate": 1.8672177315473465e-05, - "loss": 0.9913, + "learning_rate": 1.8676270641417824e-05, + "loss": 1.0298, "step": 6729 }, { - "epoch": 0.1909761634506243, + "epoch": 0.19071098642636516, "grad_norm": 0.0, - "learning_rate": 1.8671719643115405e-05, - "loss": 1.0467, + "learning_rate": 1.867581426580625e-05, + "loss": 1.0563, "step": 6730 }, { - "epoch": 0.19100454029511918, + "epoch": 0.19073932386862763, "grad_norm": 0.0, - "learning_rate": 1.8671261897506934e-05, - "loss": 1.1085, + "learning_rate": 1.8675357817115076e-05, + "loss": 1.0182, "step": 6731 }, { - "epoch": 0.19103291713961407, + "epoch": 0.19076766131089007, "grad_norm": 0.0, - "learning_rate": 1.8670804078651927e-05, - "loss": 1.0432, + "learning_rate": 1.8674901295348136e-05, + "loss": 1.0214, "step": 6732 }, { - "epoch": 0.19106129398410895, + "epoch": 0.19079599875315253, "grad_norm": 0.0, - "learning_rate": 1.8670346186554245e-05, - "loss": 1.1676, + "learning_rate": 1.8674444700509287e-05, + "loss": 0.9954, "step": 6733 }, { - "epoch": 0.19108967082860387, + "epoch": 0.190824336195415, "grad_norm": 0.0, - "learning_rate": 1.866988822121776e-05, - "loss": 0.9896, + "learning_rate": 1.8673988032602368e-05, + "loss": 1.0346, "step": 6734 }, { - "epoch": 0.19111804767309876, + "epoch": 0.19085267363767747, "grad_norm": 0.0, - "learning_rate": 1.8669430182646345e-05, - "loss": 1.0304, + "learning_rate": 1.8673531291631223e-05, + "loss": 1.0785, "step": 6735 }, { - "epoch": 0.19114642451759364, + "epoch": 0.19088101107993993, "grad_norm": 0.0, - "learning_rate": 1.8668972070843857e-05, - "loss": 1.0446, + "learning_rate": 1.8673074477599705e-05, + "loss": 1.0786, "step": 6736 }, { - "epoch": 0.19117480136208853, + "epoch": 0.1909093485222024, "grad_norm": 0.0, - "learning_rate": 1.8668513885814176e-05, - "loss": 1.0247, + "learning_rate": 1.867261759051166e-05, + "loss": 1.0279, "step": 6737 }, { - "epoch": 0.19120317820658342, + "epoch": 0.19093768596446484, "grad_norm": 0.0, - "learning_rate": 1.866805562756117e-05, - "loss": 1.0112, + "learning_rate": 1.867216063037094e-05, + "loss": 0.9453, "step": 6738 }, { - "epoch": 0.19123155505107833, + "epoch": 0.1909660234067273, "grad_norm": 0.0, - "learning_rate": 1.86675972960887e-05, - "loss": 0.9807, + "learning_rate": 1.8671703597181387e-05, + "loss": 0.9546, "step": 6739 }, { - "epoch": 0.19125993189557322, + "epoch": 0.19099436084898977, "grad_norm": 0.0, - "learning_rate": 1.8667138891400655e-05, - "loss": 1.1387, + "learning_rate": 1.8671246490946853e-05, + "loss": 1.0259, "step": 6740 }, { - "epoch": 0.1912883087400681, + "epoch": 0.19102269829125224, "grad_norm": 0.0, - "learning_rate": 1.8666680413500892e-05, - "loss": 1.0684, + "learning_rate": 1.8670789311671195e-05, + "loss": 1.0204, "step": 6741 }, { - "epoch": 0.191316685584563, + "epoch": 0.1910510357335147, "grad_norm": 0.0, - "learning_rate": 1.8666221862393295e-05, - "loss": 0.8984, + "learning_rate": 1.867033205935825e-05, + "loss": 1.0466, "step": 6742 }, { - "epoch": 0.19134506242905788, + "epoch": 0.19107937317577717, "grad_norm": 0.0, - "learning_rate": 1.8665763238081726e-05, - "loss": 1.028, + "learning_rate": 1.8669874734011885e-05, + "loss": 0.9119, "step": 6743 }, { - "epoch": 0.19137343927355277, + "epoch": 0.1911077106180396, "grad_norm": 0.0, - "learning_rate": 1.8665304540570074e-05, - "loss": 0.9394, + "learning_rate": 1.8669417335635946e-05, + "loss": 1.1426, "step": 6744 }, { - "epoch": 0.19140181611804769, + "epoch": 0.19113604806030207, "grad_norm": 0.0, - "learning_rate": 1.8664845769862196e-05, - "loss": 0.9223, + "learning_rate": 1.8668959864234284e-05, + "loss": 0.9354, "step": 6745 }, { - "epoch": 0.19143019296254257, + "epoch": 0.19116438550256454, "grad_norm": 0.0, - "learning_rate": 1.866438692596198e-05, - "loss": 0.9813, + "learning_rate": 1.866850231981075e-05, + "loss": 1.1165, "step": 6746 }, { - "epoch": 0.19145856980703746, + "epoch": 0.191192722944827, "grad_norm": 0.0, - "learning_rate": 1.8663928008873303e-05, - "loss": 0.9902, + "learning_rate": 1.866804470236921e-05, + "loss": 1.0293, "step": 6747 }, { - "epoch": 0.19148694665153235, + "epoch": 0.19122106038708947, "grad_norm": 0.0, - "learning_rate": 1.8663469018600027e-05, - "loss": 0.9337, + "learning_rate": 1.8667587011913507e-05, + "loss": 0.9415, "step": 6748 }, { - "epoch": 0.19151532349602723, + "epoch": 0.19124939782935194, "grad_norm": 0.0, - "learning_rate": 1.8663009955146046e-05, - "loss": 0.9606, + "learning_rate": 1.8667129248447498e-05, + "loss": 0.9945, "step": 6749 }, { - "epoch": 0.19154370034052212, + "epoch": 0.19127773527161437, "grad_norm": 0.0, - "learning_rate": 1.8662550818515228e-05, - "loss": 1.0516, + "learning_rate": 1.8666671411975043e-05, + "loss": 0.9858, "step": 6750 }, { - "epoch": 0.19157207718501704, + "epoch": 0.19130607271387684, "grad_norm": 0.0, - "learning_rate": 1.8662091608711453e-05, - "loss": 0.9283, + "learning_rate": 1.8666213502499995e-05, + "loss": 1.0963, "step": 6751 }, { - "epoch": 0.19160045402951192, + "epoch": 0.1913344101561393, "grad_norm": 0.0, - "learning_rate": 1.8661632325738607e-05, - "loss": 1.0308, + "learning_rate": 1.8665755520026215e-05, + "loss": 1.0158, "step": 6752 }, { - "epoch": 0.1916288308740068, + "epoch": 0.19136274759840177, "grad_norm": 0.0, - "learning_rate": 1.8661172969600556e-05, - "loss": 1.0178, + "learning_rate": 1.866529746455756e-05, + "loss": 1.0722, "step": 6753 }, { - "epoch": 0.1916572077185017, + "epoch": 0.19139108504066424, "grad_norm": 0.0, - "learning_rate": 1.866071354030119e-05, - "loss": 0.902, + "learning_rate": 1.866483933609788e-05, + "loss": 1.0343, "step": 6754 }, { - "epoch": 0.19168558456299659, + "epoch": 0.1914194224829267, "grad_norm": 0.0, - "learning_rate": 1.866025403784439e-05, - "loss": 1.0668, + "learning_rate": 1.8664381134651047e-05, + "loss": 0.9732, "step": 6755 }, { - "epoch": 0.19171396140749147, + "epoch": 0.19144775992518914, "grad_norm": 0.0, - "learning_rate": 1.8659794462234032e-05, - "loss": 1.0519, + "learning_rate": 1.8663922860220908e-05, + "loss": 1.0977, "step": 6756 }, { - "epoch": 0.1917423382519864, + "epoch": 0.1914760973674516, "grad_norm": 0.0, - "learning_rate": 1.8659334813474e-05, - "loss": 0.9778, + "learning_rate": 1.8663464512811334e-05, + "loss": 0.9622, "step": 6757 }, { - "epoch": 0.19177071509648128, + "epoch": 0.19150443480971407, "grad_norm": 0.0, - "learning_rate": 1.865887509156818e-05, - "loss": 0.9418, + "learning_rate": 1.8663006092426182e-05, + "loss": 0.9933, "step": 6758 }, { - "epoch": 0.19179909194097616, + "epoch": 0.19153277225197654, "grad_norm": 0.0, - "learning_rate": 1.865841529652045e-05, - "loss": 0.9653, + "learning_rate": 1.866254759906931e-05, + "loss": 0.9289, "step": 6759 }, { - "epoch": 0.19182746878547105, + "epoch": 0.191561109694239, "grad_norm": 0.0, - "learning_rate": 1.86579554283347e-05, - "loss": 0.9507, + "learning_rate": 1.8662089032744585e-05, + "loss": 0.8337, "step": 6760 }, { - "epoch": 0.19185584562996594, + "epoch": 0.19158944713650147, "grad_norm": 0.0, - "learning_rate": 1.865749548701481e-05, - "loss": 1.0344, + "learning_rate": 1.8661630393455864e-05, + "loss": 1.0072, "step": 6761 }, { - "epoch": 0.19188422247446085, + "epoch": 0.1916177845787639, "grad_norm": 0.0, - "learning_rate": 1.8657035472564665e-05, - "loss": 1.0712, + "learning_rate": 1.8661171681207014e-05, + "loss": 0.9708, "step": 6762 }, { - "epoch": 0.19191259931895574, + "epoch": 0.19164612202102638, "grad_norm": 0.0, - "learning_rate": 1.8656575384988152e-05, - "loss": 1.1493, + "learning_rate": 1.8660712896001903e-05, + "loss": 0.978, "step": 6763 }, { - "epoch": 0.19194097616345063, + "epoch": 0.19167445946328884, "grad_norm": 0.0, - "learning_rate": 1.865611522428916e-05, - "loss": 0.9563, + "learning_rate": 1.866025403784439e-05, + "loss": 1.0621, "step": 6764 }, { - "epoch": 0.19196935300794551, + "epoch": 0.1917027969055513, "grad_norm": 0.0, - "learning_rate": 1.8655654990471572e-05, - "loss": 0.9048, + "learning_rate": 1.865979510673834e-05, + "loss": 0.9957, "step": 6765 }, { - "epoch": 0.1919977298524404, + "epoch": 0.19173113434781378, "grad_norm": 0.0, - "learning_rate": 1.865519468353928e-05, - "loss": 1.1686, + "learning_rate": 1.8659336102687618e-05, + "loss": 0.9223, "step": 6766 }, { - "epoch": 0.1920261066969353, + "epoch": 0.19175947179007624, "grad_norm": 0.0, - "learning_rate": 1.8654734303496166e-05, - "loss": 1.1316, + "learning_rate": 1.8658877025696095e-05, + "loss": 1.0888, "step": 6767 }, { - "epoch": 0.1920544835414302, + "epoch": 0.19178780923233868, "grad_norm": 0.0, - "learning_rate": 1.865427385034612e-05, - "loss": 0.8995, + "learning_rate": 1.8658417875767636e-05, + "loss": 1.0059, "step": 6768 }, { - "epoch": 0.1920828603859251, + "epoch": 0.19181614667460115, "grad_norm": 0.0, - "learning_rate": 1.865381332409304e-05, - "loss": 1.0448, + "learning_rate": 1.8657958652906106e-05, + "loss": 0.9867, "step": 6769 }, { - "epoch": 0.19211123723041998, + "epoch": 0.1918444841168636, "grad_norm": 0.0, - "learning_rate": 1.865335272474081e-05, - "loss": 1.0381, + "learning_rate": 1.865749935711538e-05, + "loss": 0.9309, "step": 6770 }, { - "epoch": 0.19213961407491487, + "epoch": 0.19187282155912608, "grad_norm": 0.0, - "learning_rate": 1.8652892052293314e-05, - "loss": 0.9945, + "learning_rate": 1.8657039988399315e-05, + "loss": 1.0479, "step": 6771 }, { - "epoch": 0.19216799091940975, + "epoch": 0.19190115900138854, "grad_norm": 0.0, - "learning_rate": 1.865243130675445e-05, - "loss": 0.9272, + "learning_rate": 1.8656580546761792e-05, + "loss": 1.1563, "step": 6772 }, { - "epoch": 0.19219636776390464, + "epoch": 0.191929496443651, "grad_norm": 0.0, - "learning_rate": 1.8651970488128115e-05, - "loss": 1.0496, + "learning_rate": 1.8656121032206673e-05, + "loss": 1.041, "step": 6773 }, { - "epoch": 0.19222474460839956, + "epoch": 0.19195783388591345, "grad_norm": 0.0, - "learning_rate": 1.86515095964182e-05, - "loss": 0.9249, + "learning_rate": 1.8655661444737835e-05, + "loss": 1.0643, "step": 6774 }, { - "epoch": 0.19225312145289444, + "epoch": 0.19198617132817591, "grad_norm": 0.0, - "learning_rate": 1.8651048631628585e-05, - "loss": 1.0366, + "learning_rate": 1.8655201784359146e-05, + "loss": 1.0596, "step": 6775 }, { - "epoch": 0.19228149829738933, + "epoch": 0.19201450877043838, "grad_norm": 0.0, - "learning_rate": 1.865058759376318e-05, - "loss": 1.1197, + "learning_rate": 1.865474205107448e-05, + "loss": 1.0199, "step": 6776 }, { - "epoch": 0.19230987514188422, + "epoch": 0.19204284621270085, "grad_norm": 0.0, - "learning_rate": 1.8650126482825867e-05, - "loss": 1.0396, + "learning_rate": 1.8654282244887704e-05, + "loss": 0.9596, "step": 6777 }, { - "epoch": 0.1923382519863791, + "epoch": 0.1920711836549633, "grad_norm": 0.0, - "learning_rate": 1.8649665298820552e-05, - "loss": 0.9961, + "learning_rate": 1.86538223658027e-05, + "loss": 1.0128, "step": 6778 }, { - "epoch": 0.19236662883087402, + "epoch": 0.19209952109722575, "grad_norm": 0.0, - "learning_rate": 1.8649204041751123e-05, - "loss": 1.0006, + "learning_rate": 1.8653362413823333e-05, + "loss": 1.0611, "step": 6779 }, { - "epoch": 0.1923950056753689, + "epoch": 0.19212785853948822, "grad_norm": 0.0, - "learning_rate": 1.8648742711621478e-05, - "loss": 1.0533, + "learning_rate": 1.8652902388953478e-05, + "loss": 0.9743, "step": 6780 }, { - "epoch": 0.1924233825198638, + "epoch": 0.19215619598175068, "grad_norm": 0.0, - "learning_rate": 1.8648281308435515e-05, - "loss": 0.9802, + "learning_rate": 1.865244229119702e-05, + "loss": 1.0432, "step": 6781 }, { - "epoch": 0.19245175936435868, + "epoch": 0.19218453342401315, "grad_norm": 0.0, - "learning_rate": 1.864781983219713e-05, - "loss": 1.0457, + "learning_rate": 1.8651982120557824e-05, + "loss": 1.0497, "step": 6782 }, { - "epoch": 0.19248013620885357, + "epoch": 0.19221287086627561, "grad_norm": 0.0, - "learning_rate": 1.8647358282910226e-05, - "loss": 1.0112, + "learning_rate": 1.865152187703977e-05, + "loss": 0.9582, "step": 6783 }, { - "epoch": 0.19250851305334846, + "epoch": 0.19224120830853808, "grad_norm": 0.0, - "learning_rate": 1.8646896660578697e-05, - "loss": 1.166, + "learning_rate": 1.865106156064674e-05, + "loss": 1.1229, "step": 6784 }, { - "epoch": 0.19253688989784337, + "epoch": 0.19226954575080052, "grad_norm": 0.0, - "learning_rate": 1.864643496520644e-05, - "loss": 0.9377, + "learning_rate": 1.8650601171382595e-05, + "loss": 1.0217, "step": 6785 }, { - "epoch": 0.19256526674233826, + "epoch": 0.19229788319306299, "grad_norm": 0.0, - "learning_rate": 1.8645973196797362e-05, - "loss": 1.0085, + "learning_rate": 1.8650140709251233e-05, + "loss": 0.886, "step": 6786 }, { - "epoch": 0.19259364358683315, + "epoch": 0.19232622063532545, "grad_norm": 0.0, - "learning_rate": 1.8645511355355356e-05, - "loss": 1.0344, + "learning_rate": 1.8649680174256518e-05, + "loss": 1.0188, "step": 6787 }, { - "epoch": 0.19262202043132803, + "epoch": 0.19235455807758792, "grad_norm": 0.0, - "learning_rate": 1.8645049440884328e-05, - "loss": 0.9811, + "learning_rate": 1.8649219566402336e-05, + "loss": 1.0608, "step": 6788 }, { - "epoch": 0.19265039727582292, + "epoch": 0.19238289551985038, "grad_norm": 0.0, - "learning_rate": 1.864458745338818e-05, - "loss": 0.9897, + "learning_rate": 1.864875888569257e-05, + "loss": 0.9291, "step": 6789 }, { - "epoch": 0.1926787741203178, + "epoch": 0.19241123296211285, "grad_norm": 0.0, - "learning_rate": 1.864412539287081e-05, - "loss": 1.0432, + "learning_rate": 1.8648298132131092e-05, + "loss": 1.0337, "step": 6790 }, { - "epoch": 0.19270715096481272, + "epoch": 0.1924395704043753, "grad_norm": 0.0, - "learning_rate": 1.8643663259336126e-05, - "loss": 0.9573, + "learning_rate": 1.864783730572179e-05, + "loss": 1.0862, "step": 6791 }, { - "epoch": 0.1927355278093076, + "epoch": 0.19246790784663775, "grad_norm": 0.0, - "learning_rate": 1.8643201052788033e-05, - "loss": 1.0044, + "learning_rate": 1.864737640646854e-05, + "loss": 1.0909, "step": 6792 }, { - "epoch": 0.1927639046538025, + "epoch": 0.19249624528890022, "grad_norm": 0.0, - "learning_rate": 1.864273877323043e-05, - "loss": 0.9218, + "learning_rate": 1.8646915434375233e-05, + "loss": 1.0115, "step": 6793 }, { - "epoch": 0.19279228149829739, + "epoch": 0.19252458273116269, "grad_norm": 0.0, - "learning_rate": 1.864227642066722e-05, - "loss": 1.0276, + "learning_rate": 1.864645438944574e-05, + "loss": 0.9192, "step": 6794 }, { - "epoch": 0.19282065834279227, + "epoch": 0.19255292017342515, "grad_norm": 0.0, - "learning_rate": 1.8641813995102317e-05, - "loss": 1.0012, + "learning_rate": 1.8645993271683953e-05, + "loss": 1.0443, "step": 6795 }, { - "epoch": 0.19284903518728716, + "epoch": 0.19258125761568762, "grad_norm": 0.0, - "learning_rate": 1.8641351496539625e-05, - "loss": 0.9935, + "learning_rate": 1.8645532081093756e-05, + "loss": 1.133, "step": 6796 }, { - "epoch": 0.19287741203178208, + "epoch": 0.19260959505795006, "grad_norm": 0.0, - "learning_rate": 1.8640888924983045e-05, - "loss": 0.935, + "learning_rate": 1.864507081767903e-05, + "loss": 1.0343, "step": 6797 }, { - "epoch": 0.19290578887627696, + "epoch": 0.19263793250021252, "grad_norm": 0.0, - "learning_rate": 1.8640426280436488e-05, - "loss": 1.0186, + "learning_rate": 1.864460948144366e-05, + "loss": 0.9762, "step": 6798 }, { - "epoch": 0.19293416572077185, + "epoch": 0.192666269942475, "grad_norm": 0.0, - "learning_rate": 1.863996356290386e-05, - "loss": 0.9248, + "learning_rate": 1.864414807239154e-05, + "loss": 0.95, "step": 6799 }, { - "epoch": 0.19296254256526674, + "epoch": 0.19269460738473745, "grad_norm": 0.0, - "learning_rate": 1.8639500772389073e-05, - "loss": 1.0796, + "learning_rate": 1.8643686590526547e-05, + "loss": 1.0011, "step": 6800 }, { - "epoch": 0.19299091940976162, + "epoch": 0.19272294482699992, "grad_norm": 0.0, - "learning_rate": 1.863903790889604e-05, - "loss": 1.1368, + "learning_rate": 1.8643225035852573e-05, + "loss": 1.0001, "step": 6801 }, { - "epoch": 0.19301929625425654, + "epoch": 0.1927512822692624, "grad_norm": 0.0, - "learning_rate": 1.8638574972428657e-05, - "loss": 1.0037, + "learning_rate": 1.8642763408373502e-05, + "loss": 1.0051, "step": 6802 }, { - "epoch": 0.19304767309875143, + "epoch": 0.19277961971152482, "grad_norm": 0.0, - "learning_rate": 1.8638111962990847e-05, - "loss": 0.9857, + "learning_rate": 1.864230170809323e-05, + "loss": 0.9931, "step": 6803 }, { - "epoch": 0.19307604994324631, + "epoch": 0.1928079571537873, "grad_norm": 0.0, - "learning_rate": 1.8637648880586515e-05, - "loss": 1.006, + "learning_rate": 1.864183993501564e-05, + "loss": 1.0965, "step": 6804 }, { - "epoch": 0.1931044267877412, + "epoch": 0.19283629459604976, "grad_norm": 0.0, - "learning_rate": 1.8637185725219578e-05, - "loss": 1.0735, + "learning_rate": 1.864137808914462e-05, + "loss": 0.9758, "step": 6805 }, { - "epoch": 0.1931328036322361, + "epoch": 0.19286463203831222, "grad_norm": 0.0, - "learning_rate": 1.8636722496893943e-05, - "loss": 1.0378, + "learning_rate": 1.864091617048407e-05, + "loss": 1.0869, "step": 6806 }, { - "epoch": 0.19316118047673098, + "epoch": 0.1928929694805747, "grad_norm": 0.0, - "learning_rate": 1.8636259195613526e-05, - "loss": 0.9966, + "learning_rate": 1.864045417903787e-05, + "loss": 0.886, "step": 6807 }, { - "epoch": 0.1931895573212259, + "epoch": 0.19292130692283715, "grad_norm": 0.0, - "learning_rate": 1.8635795821382237e-05, - "loss": 1.0597, + "learning_rate": 1.8639992114809918e-05, + "loss": 1.0107, "step": 6808 }, { - "epoch": 0.19321793416572078, + "epoch": 0.1929496443650996, "grad_norm": 0.0, - "learning_rate": 1.8635332374203993e-05, - "loss": 0.995, + "learning_rate": 1.86395299778041e-05, + "loss": 1.0707, "step": 6809 }, { - "epoch": 0.19324631101021567, + "epoch": 0.19297798180736206, "grad_norm": 0.0, - "learning_rate": 1.8634868854082707e-05, - "loss": 0.9232, + "learning_rate": 1.8639067768024315e-05, + "loss": 1.0172, "step": 6810 }, { - "epoch": 0.19327468785471055, + "epoch": 0.19300631924962453, "grad_norm": 0.0, - "learning_rate": 1.86344052610223e-05, - "loss": 1.043, + "learning_rate": 1.8638605485474455e-05, + "loss": 0.8744, "step": 6811 }, { - "epoch": 0.19330306469920544, + "epoch": 0.193034656691887, "grad_norm": 0.0, - "learning_rate": 1.863394159502668e-05, - "loss": 1.1074, + "learning_rate": 1.8638143130158415e-05, + "loss": 0.9527, "step": 6812 }, { - "epoch": 0.19333144154370033, + "epoch": 0.19306299413414946, "grad_norm": 0.0, - "learning_rate": 1.863347785609977e-05, - "loss": 1.0879, + "learning_rate": 1.8637680702080082e-05, + "loss": 1.0393, "step": 6813 }, { - "epoch": 0.19335981838819524, + "epoch": 0.19309133157641192, "grad_norm": 0.0, - "learning_rate": 1.8633014044245482e-05, - "loss": 0.9993, + "learning_rate": 1.863721820124336e-05, + "loss": 0.983, "step": 6814 }, { - "epoch": 0.19338819523269013, + "epoch": 0.19311966901867436, "grad_norm": 0.0, - "learning_rate": 1.8632550159467734e-05, - "loss": 0.907, + "learning_rate": 1.8636755627652143e-05, + "loss": 1.0562, "step": 6815 }, { - "epoch": 0.19341657207718502, + "epoch": 0.19314800646093683, "grad_norm": 0.0, - "learning_rate": 1.863208620177045e-05, - "loss": 1.0929, + "learning_rate": 1.8636292981310327e-05, + "loss": 0.9621, "step": 6816 }, { - "epoch": 0.1934449489216799, + "epoch": 0.1931763439031993, "grad_norm": 0.0, - "learning_rate": 1.863162217115755e-05, - "loss": 0.943, + "learning_rate": 1.8635830262221804e-05, + "loss": 0.9478, "step": 6817 }, { - "epoch": 0.1934733257661748, + "epoch": 0.19320468134546176, "grad_norm": 0.0, - "learning_rate": 1.8631158067632942e-05, - "loss": 0.9904, + "learning_rate": 1.8635367470390478e-05, + "loss": 0.9526, "step": 6818 }, { - "epoch": 0.1935017026106697, + "epoch": 0.19323301878772423, "grad_norm": 0.0, - "learning_rate": 1.8630693891200557e-05, - "loss": 0.9371, + "learning_rate": 1.863490460582025e-05, + "loss": 1.1111, "step": 6819 }, { - "epoch": 0.1935300794551646, + "epoch": 0.1932613562299867, "grad_norm": 0.0, - "learning_rate": 1.863022964186431e-05, - "loss": 1.0044, + "learning_rate": 1.8634441668515005e-05, + "loss": 1.0538, "step": 6820 }, { - "epoch": 0.19355845629965948, + "epoch": 0.19328969367224913, "grad_norm": 0.0, - "learning_rate": 1.862976531962813e-05, - "loss": 1.0111, + "learning_rate": 1.8633978658478658e-05, + "loss": 1.0901, "step": 6821 }, { - "epoch": 0.19358683314415437, + "epoch": 0.1933180311145116, "grad_norm": 0.0, - "learning_rate": 1.862930092449593e-05, - "loss": 0.9605, + "learning_rate": 1.86335155757151e-05, + "loss": 0.9707, "step": 6822 }, { - "epoch": 0.19361520998864926, + "epoch": 0.19334636855677406, "grad_norm": 0.0, - "learning_rate": 1.862883645647164e-05, - "loss": 1.0231, + "learning_rate": 1.8633052420228236e-05, + "loss": 1.1342, "step": 6823 }, { - "epoch": 0.19364358683314414, + "epoch": 0.19337470599903653, "grad_norm": 0.0, - "learning_rate": 1.8628371915559183e-05, - "loss": 1.0116, + "learning_rate": 1.8632589192021964e-05, + "loss": 1.052, "step": 6824 }, { - "epoch": 0.19367196367763906, + "epoch": 0.193403043441299, "grad_norm": 0.0, - "learning_rate": 1.8627907301762475e-05, - "loss": 0.9222, + "learning_rate": 1.8632125891100184e-05, + "loss": 0.966, "step": 6825 }, { - "epoch": 0.19370034052213395, + "epoch": 0.19343138088356146, "grad_norm": 0.0, - "learning_rate": 1.862744261508545e-05, - "loss": 1.0031, + "learning_rate": 1.863166251746681e-05, + "loss": 1.0015, "step": 6826 }, { - "epoch": 0.19372871736662883, + "epoch": 0.1934597183258239, "grad_norm": 0.0, - "learning_rate": 1.862697785553203e-05, - "loss": 1.0138, + "learning_rate": 1.8631199071125735e-05, + "loss": 0.9412, "step": 6827 }, { - "epoch": 0.19375709421112372, + "epoch": 0.19348805576808636, "grad_norm": 0.0, - "learning_rate": 1.8626513023106135e-05, - "loss": 0.9584, + "learning_rate": 1.8630735552080862e-05, + "loss": 0.9366, "step": 6828 }, { - "epoch": 0.1937854710556186, + "epoch": 0.19351639321034883, "grad_norm": 0.0, - "learning_rate": 1.86260481178117e-05, - "loss": 1.035, + "learning_rate": 1.86302719603361e-05, + "loss": 1.0072, "step": 6829 }, { - "epoch": 0.1938138479001135, + "epoch": 0.1935447306526113, "grad_norm": 0.0, - "learning_rate": 1.862558313965265e-05, - "loss": 0.9189, + "learning_rate": 1.8629808295895352e-05, + "loss": 1.025, "step": 6830 }, { - "epoch": 0.1938422247446084, + "epoch": 0.19357306809487376, "grad_norm": 0.0, - "learning_rate": 1.8625118088632913e-05, - "loss": 1.065, + "learning_rate": 1.8629344558762524e-05, + "loss": 1.1133, "step": 6831 }, { - "epoch": 0.1938706015891033, + "epoch": 0.19360140553713623, "grad_norm": 0.0, - "learning_rate": 1.8624652964756412e-05, - "loss": 1.1252, + "learning_rate": 1.8628880748941523e-05, + "loss": 1.0267, "step": 6832 }, { - "epoch": 0.19389897843359818, + "epoch": 0.19362974297939867, "grad_norm": 0.0, - "learning_rate": 1.862418776802708e-05, - "loss": 1.0343, + "learning_rate": 1.8628416866436256e-05, + "loss": 1.0723, "step": 6833 }, { - "epoch": 0.19392735527809307, + "epoch": 0.19365808042166113, "grad_norm": 0.0, - "learning_rate": 1.8623722498448846e-05, - "loss": 1.108, + "learning_rate": 1.8627952911250632e-05, + "loss": 1.0352, "step": 6834 }, { - "epoch": 0.19395573212258796, + "epoch": 0.1936864178639236, "grad_norm": 0.0, - "learning_rate": 1.862325715602564e-05, - "loss": 1.0638, + "learning_rate": 1.862748888338855e-05, + "loss": 0.968, "step": 6835 }, { - "epoch": 0.19398410896708285, + "epoch": 0.19371475530618606, "grad_norm": 0.0, - "learning_rate": 1.8622791740761398e-05, - "loss": 1.0112, + "learning_rate": 1.862702478285393e-05, + "loss": 1.0943, "step": 6836 }, { - "epoch": 0.19401248581157776, + "epoch": 0.19374309274844853, "grad_norm": 0.0, - "learning_rate": 1.8622326252660042e-05, - "loss": 0.9652, + "learning_rate": 1.8626560609650676e-05, + "loss": 1.0231, "step": 6837 }, { - "epoch": 0.19404086265607265, + "epoch": 0.193771430190711, "grad_norm": 0.0, - "learning_rate": 1.862186069172551e-05, - "loss": 1.0245, + "learning_rate": 1.8626096363782697e-05, + "loss": 0.9533, "step": 6838 }, { - "epoch": 0.19406923950056754, + "epoch": 0.19379976763297344, "grad_norm": 0.0, - "learning_rate": 1.862139505796173e-05, - "loss": 0.9472, + "learning_rate": 1.862563204525391e-05, + "loss": 1.0689, "step": 6839 }, { - "epoch": 0.19409761634506242, + "epoch": 0.1938281050752359, "grad_norm": 0.0, - "learning_rate": 1.8620929351372644e-05, - "loss": 0.9942, + "learning_rate": 1.8625167654068216e-05, + "loss": 1.0281, "step": 6840 }, { - "epoch": 0.1941259931895573, + "epoch": 0.19385644251749837, "grad_norm": 0.0, - "learning_rate": 1.8620463571962173e-05, - "loss": 0.8878, + "learning_rate": 1.8624703190229535e-05, + "loss": 0.9468, "step": 6841 }, { - "epoch": 0.19415437003405223, + "epoch": 0.19388477995976083, "grad_norm": 0.0, - "learning_rate": 1.8619997719734266e-05, - "loss": 0.9386, + "learning_rate": 1.8624238653741775e-05, + "loss": 1.0336, "step": 6842 }, { - "epoch": 0.1941827468785471, + "epoch": 0.1939131174020233, "grad_norm": 0.0, - "learning_rate": 1.861953179469285e-05, - "loss": 0.9857, + "learning_rate": 1.862377404460885e-05, + "loss": 1.0384, "step": 6843 }, { - "epoch": 0.194211123723042, + "epoch": 0.19394145484428577, "grad_norm": 0.0, - "learning_rate": 1.8619065796841858e-05, - "loss": 0.9743, + "learning_rate": 1.8623309362834674e-05, + "loss": 1.0101, "step": 6844 }, { - "epoch": 0.1942395005675369, + "epoch": 0.1939697922865482, "grad_norm": 0.0, - "learning_rate": 1.861859972618523e-05, - "loss": 1.0219, + "learning_rate": 1.862284460842316e-05, + "loss": 1.078, "step": 6845 }, { - "epoch": 0.19426787741203178, + "epoch": 0.19399812972881067, "grad_norm": 0.0, - "learning_rate": 1.8618133582726905e-05, - "loss": 1.0745, + "learning_rate": 1.8622379781378226e-05, + "loss": 0.9557, "step": 6846 }, { - "epoch": 0.19429625425652666, + "epoch": 0.19402646717107314, "grad_norm": 0.0, - "learning_rate": 1.8617667366470817e-05, - "loss": 0.906, + "learning_rate": 1.8621914881703785e-05, + "loss": 1.0434, "step": 6847 }, { - "epoch": 0.19432463110102158, + "epoch": 0.1940548046133356, "grad_norm": 0.0, - "learning_rate": 1.8617201077420907e-05, - "loss": 1.0061, + "learning_rate": 1.862144990940375e-05, + "loss": 1.0623, "step": 6848 }, { - "epoch": 0.19435300794551646, + "epoch": 0.19408314205559807, "grad_norm": 0.0, - "learning_rate": 1.861673471558111e-05, - "loss": 0.9526, + "learning_rate": 1.8620984864482046e-05, + "loss": 1.0211, "step": 6849 }, { - "epoch": 0.19438138479001135, + "epoch": 0.19411147949786053, "grad_norm": 0.0, - "learning_rate": 1.8616268280955366e-05, - "loss": 1.0056, + "learning_rate": 1.8620519746942582e-05, + "loss": 1.0251, "step": 6850 }, { - "epoch": 0.19440976163450624, + "epoch": 0.19413981694012297, "grad_norm": 0.0, - "learning_rate": 1.861580177354762e-05, - "loss": 0.9745, + "learning_rate": 1.862005455678928e-05, + "loss": 1.0604, "step": 6851 }, { - "epoch": 0.19443813847900113, + "epoch": 0.19416815438238544, "grad_norm": 0.0, - "learning_rate": 1.8615335193361806e-05, - "loss": 0.971, + "learning_rate": 1.8619589294026058e-05, + "loss": 0.9934, "step": 6852 }, { - "epoch": 0.19446651532349601, + "epoch": 0.1941964918246479, "grad_norm": 0.0, - "learning_rate": 1.861486854040187e-05, - "loss": 0.9754, + "learning_rate": 1.8619123958656832e-05, + "loss": 1.0005, "step": 6853 }, { - "epoch": 0.19449489216799093, + "epoch": 0.19422482926691037, "grad_norm": 0.0, - "learning_rate": 1.861440181467175e-05, - "loss": 0.8829, + "learning_rate": 1.8618658550685528e-05, + "loss": 1.0195, "step": 6854 }, { - "epoch": 0.19452326901248582, + "epoch": 0.19425316670917284, "grad_norm": 0.0, - "learning_rate": 1.8613935016175396e-05, - "loss": 1.0357, + "learning_rate": 1.861819307011606e-05, + "loss": 1.017, "step": 6855 }, { - "epoch": 0.1945516458569807, + "epoch": 0.1942815041514353, "grad_norm": 0.0, - "learning_rate": 1.8613468144916742e-05, - "loss": 0.9201, + "learning_rate": 1.8617727516952353e-05, + "loss": 1.1924, "step": 6856 }, { - "epoch": 0.1945800227014756, + "epoch": 0.19430984159369774, "grad_norm": 0.0, - "learning_rate": 1.8613001200899737e-05, - "loss": 0.9266, + "learning_rate": 1.8617261891198325e-05, + "loss": 1.171, "step": 6857 }, { - "epoch": 0.19460839954597048, + "epoch": 0.1943381790359602, "grad_norm": 0.0, - "learning_rate": 1.861253418412832e-05, - "loss": 0.9542, + "learning_rate": 1.86167961928579e-05, + "loss": 1.0729, "step": 6858 }, { - "epoch": 0.1946367763904654, + "epoch": 0.19436651647822267, "grad_norm": 0.0, - "learning_rate": 1.8612067094606442e-05, - "loss": 0.9961, + "learning_rate": 1.8616330421935004e-05, + "loss": 1.0464, "step": 6859 }, { - "epoch": 0.19466515323496028, + "epoch": 0.19439485392048514, "grad_norm": 0.0, - "learning_rate": 1.8611599932338046e-05, - "loss": 1.0648, + "learning_rate": 1.8615864578433552e-05, + "loss": 1.04, "step": 6860 }, { - "epoch": 0.19469353007945517, + "epoch": 0.1944231913627476, "grad_norm": 0.0, - "learning_rate": 1.861113269732708e-05, - "loss": 0.9937, + "learning_rate": 1.8615398662357477e-05, + "loss": 0.9707, "step": 6861 }, { - "epoch": 0.19472190692395006, + "epoch": 0.19445152880501007, "grad_norm": 0.0, - "learning_rate": 1.861066538957749e-05, - "loss": 0.9797, + "learning_rate": 1.8614932673710702e-05, + "loss": 1.1287, "step": 6862 }, { - "epoch": 0.19475028376844494, + "epoch": 0.1944798662472725, "grad_norm": 0.0, - "learning_rate": 1.861019800909322e-05, - "loss": 0.9126, + "learning_rate": 1.8614466612497147e-05, + "loss": 1.076, "step": 6863 }, { - "epoch": 0.19477866061293983, + "epoch": 0.19450820368953498, "grad_norm": 0.0, - "learning_rate": 1.8609730555878217e-05, - "loss": 0.9823, + "learning_rate": 1.8614000478720743e-05, + "loss": 1.0607, "step": 6864 }, { - "epoch": 0.19480703745743475, + "epoch": 0.19453654113179744, "grad_norm": 0.0, - "learning_rate": 1.860926302993644e-05, - "loss": 1.1185, + "learning_rate": 1.861353427238541e-05, + "loss": 0.9718, "step": 6865 }, { - "epoch": 0.19483541430192963, + "epoch": 0.1945648785740599, "grad_norm": 0.0, - "learning_rate": 1.8608795431271823e-05, - "loss": 1.1388, + "learning_rate": 1.8613067993495084e-05, + "loss": 1.0369, "step": 6866 }, { - "epoch": 0.19486379114642452, + "epoch": 0.19459321601632237, "grad_norm": 0.0, - "learning_rate": 1.860832775988833e-05, - "loss": 0.9626, + "learning_rate": 1.8612601642053686e-05, + "loss": 0.9893, "step": 6867 }, { - "epoch": 0.1948921679909194, + "epoch": 0.19462155345858484, "grad_norm": 0.0, - "learning_rate": 1.8607860015789903e-05, - "loss": 1.0041, + "learning_rate": 1.8612135218065142e-05, + "loss": 0.9362, "step": 6868 }, { - "epoch": 0.1949205448354143, + "epoch": 0.19464989090084728, "grad_norm": 0.0, - "learning_rate": 1.8607392198980496e-05, - "loss": 0.9861, + "learning_rate": 1.861166872153339e-05, + "loss": 1.1258, "step": 6869 }, { - "epoch": 0.19494892167990918, + "epoch": 0.19467822834310974, "grad_norm": 0.0, - "learning_rate": 1.8606924309464062e-05, - "loss": 1.0771, + "learning_rate": 1.8611202152462354e-05, + "loss": 0.9669, "step": 6870 }, { - "epoch": 0.1949772985244041, + "epoch": 0.1947065657853722, "grad_norm": 0.0, - "learning_rate": 1.8606456347244547e-05, - "loss": 0.856, + "learning_rate": 1.8610735510855966e-05, + "loss": 1.0541, "step": 6871 }, { - "epoch": 0.19500567536889898, + "epoch": 0.19473490322763468, "grad_norm": 0.0, - "learning_rate": 1.8605988312325915e-05, - "loss": 0.8942, + "learning_rate": 1.8610268796718153e-05, + "loss": 0.9662, "step": 6872 }, { - "epoch": 0.19503405221339387, + "epoch": 0.19476324066989714, "grad_norm": 0.0, - "learning_rate": 1.8605520204712107e-05, - "loss": 0.9688, + "learning_rate": 1.8609802010052846e-05, + "loss": 1.0254, "step": 6873 }, { - "epoch": 0.19506242905788876, + "epoch": 0.1947915781121596, "grad_norm": 0.0, - "learning_rate": 1.8605052024407083e-05, - "loss": 1.0965, + "learning_rate": 1.8609335150863982e-05, + "loss": 1.0386, "step": 6874 }, { - "epoch": 0.19509080590238365, + "epoch": 0.19481991555442205, "grad_norm": 0.0, - "learning_rate": 1.86045837714148e-05, - "loss": 0.9963, + "learning_rate": 1.8608868219155494e-05, + "loss": 1.0811, "step": 6875 }, { - "epoch": 0.19511918274687853, + "epoch": 0.1948482529966845, "grad_norm": 0.0, - "learning_rate": 1.860411544573921e-05, - "loss": 1.0806, + "learning_rate": 1.860840121493131e-05, + "loss": 1.0643, "step": 6876 }, { - "epoch": 0.19514755959137345, + "epoch": 0.19487659043894698, "grad_norm": 0.0, - "learning_rate": 1.8603647047384274e-05, - "loss": 1.0589, + "learning_rate": 1.860793413819536e-05, + "loss": 1.0762, "step": 6877 }, { - "epoch": 0.19517593643586834, + "epoch": 0.19490492788120944, "grad_norm": 0.0, - "learning_rate": 1.860317857635394e-05, - "loss": 0.9798, + "learning_rate": 1.8607466988951594e-05, + "loss": 1.0881, "step": 6878 }, { - "epoch": 0.19520431328036322, + "epoch": 0.1949332653234719, "grad_norm": 0.0, - "learning_rate": 1.8602710032652173e-05, - "loss": 0.9928, + "learning_rate": 1.860699976720393e-05, + "loss": 1.0111, "step": 6879 }, { - "epoch": 0.1952326901248581, + "epoch": 0.19496160276573438, "grad_norm": 0.0, - "learning_rate": 1.8602241416282926e-05, - "loss": 0.9398, + "learning_rate": 1.860653247295632e-05, + "loss": 1.0248, "step": 6880 }, { - "epoch": 0.195261066969353, + "epoch": 0.19498994020799681, "grad_norm": 0.0, - "learning_rate": 1.8601772727250158e-05, - "loss": 1.1038, + "learning_rate": 1.8606065106212682e-05, + "loss": 1.1519, "step": 6881 }, { - "epoch": 0.1952894438138479, + "epoch": 0.19501827765025928, "grad_norm": 0.0, - "learning_rate": 1.860130396555783e-05, - "loss": 0.9179, + "learning_rate": 1.8605597666976964e-05, + "loss": 0.9545, "step": 6882 }, { - "epoch": 0.1953178206583428, + "epoch": 0.19504661509252175, "grad_norm": 0.0, - "learning_rate": 1.8600835131209902e-05, - "loss": 0.9968, + "learning_rate": 1.86051301552531e-05, + "loss": 1.0517, "step": 6883 }, { - "epoch": 0.1953461975028377, + "epoch": 0.1950749525347842, "grad_norm": 0.0, - "learning_rate": 1.8600366224210332e-05, - "loss": 1.0048, + "learning_rate": 1.8604662571045033e-05, + "loss": 1.0422, "step": 6884 }, { - "epoch": 0.19537457434733257, + "epoch": 0.19510328997704668, "grad_norm": 0.0, - "learning_rate": 1.859989724456308e-05, - "loss": 1.0666, + "learning_rate": 1.8604194914356695e-05, + "loss": 0.9208, "step": 6885 }, { - "epoch": 0.19540295119182746, + "epoch": 0.19513162741930914, "grad_norm": 0.0, - "learning_rate": 1.8599428192272112e-05, - "loss": 0.9428, + "learning_rate": 1.8603727185192028e-05, + "loss": 1.0304, "step": 6886 }, { - "epoch": 0.19543132803632235, + "epoch": 0.19515996486157158, "grad_norm": 0.0, - "learning_rate": 1.8598959067341386e-05, - "loss": 1.0036, + "learning_rate": 1.8603259383554973e-05, + "loss": 1.0767, "step": 6887 }, { - "epoch": 0.19545970488081726, + "epoch": 0.19518830230383405, "grad_norm": 0.0, - "learning_rate": 1.859848986977487e-05, - "loss": 1.0768, + "learning_rate": 1.860279150944947e-05, + "loss": 0.9729, "step": 6888 }, { - "epoch": 0.19548808172531215, + "epoch": 0.19521663974609652, "grad_norm": 0.0, - "learning_rate": 1.859802059957652e-05, - "loss": 1.008, + "learning_rate": 1.8602323562879464e-05, + "loss": 0.982, "step": 6889 }, { - "epoch": 0.19551645856980704, + "epoch": 0.19524497718835898, "grad_norm": 0.0, - "learning_rate": 1.85975512567503e-05, - "loss": 1.0828, + "learning_rate": 1.8601855543848884e-05, + "loss": 1.0792, "step": 6890 }, { - "epoch": 0.19554483541430193, + "epoch": 0.19527331463062145, "grad_norm": 0.0, - "learning_rate": 1.8597081841300184e-05, - "loss": 0.9761, + "learning_rate": 1.8601387452361685e-05, + "loss": 0.9579, "step": 6891 }, { - "epoch": 0.1955732122587968, + "epoch": 0.1953016520728839, "grad_norm": 0.0, - "learning_rate": 1.8596612353230127e-05, - "loss": 0.9493, + "learning_rate": 1.8600919288421805e-05, + "loss": 0.8808, "step": 6892 }, { - "epoch": 0.1956015891032917, + "epoch": 0.19532998951514635, "grad_norm": 0.0, - "learning_rate": 1.8596142792544102e-05, - "loss": 0.9288, + "learning_rate": 1.8600451052033185e-05, + "loss": 0.9251, "step": 6893 }, { - "epoch": 0.19562996594778662, + "epoch": 0.19535832695740882, "grad_norm": 0.0, - "learning_rate": 1.8595673159246072e-05, - "loss": 1.0421, + "learning_rate": 1.8599982743199775e-05, + "loss": 1.0822, "step": 6894 }, { - "epoch": 0.1956583427922815, + "epoch": 0.19538666439967128, "grad_norm": 0.0, - "learning_rate": 1.8595203453340005e-05, - "loss": 0.8879, + "learning_rate": 1.859951436192552e-05, + "loss": 1.0249, "step": 6895 }, { - "epoch": 0.1956867196367764, + "epoch": 0.19541500184193375, "grad_norm": 0.0, - "learning_rate": 1.859473367482987e-05, - "loss": 0.8936, + "learning_rate": 1.8599045908214356e-05, + "loss": 1.0796, "step": 6896 }, { - "epoch": 0.19571509648127128, + "epoch": 0.19544333928419622, "grad_norm": 0.0, - "learning_rate": 1.8594263823719627e-05, - "loss": 1.0072, + "learning_rate": 1.859857738207024e-05, + "loss": 0.9997, "step": 6897 }, { - "epoch": 0.19574347332576617, + "epoch": 0.19547167672645868, "grad_norm": 0.0, - "learning_rate": 1.8593793900013254e-05, - "loss": 1.0633, + "learning_rate": 1.859810878349711e-05, + "loss": 0.8778, "step": 6898 }, { - "epoch": 0.19577185017026108, + "epoch": 0.19550001416872112, "grad_norm": 0.0, - "learning_rate": 1.8593323903714718e-05, - "loss": 0.981, + "learning_rate": 1.8597640112498917e-05, + "loss": 0.9818, "step": 6899 }, { - "epoch": 0.19580022701475597, + "epoch": 0.1955283516109836, "grad_norm": 0.0, - "learning_rate": 1.859285383482799e-05, - "loss": 0.8991, + "learning_rate": 1.859717136907961e-05, + "loss": 1.0465, "step": 6900 }, { - "epoch": 0.19582860385925085, + "epoch": 0.19555668905324605, "grad_norm": 0.0, - "learning_rate": 1.859238369335704e-05, - "loss": 1.0645, + "learning_rate": 1.8596702553243137e-05, + "loss": 0.9363, "step": 6901 }, { - "epoch": 0.19585698070374574, + "epoch": 0.19558502649550852, "grad_norm": 0.0, - "learning_rate": 1.8591913479305834e-05, - "loss": 1.0467, + "learning_rate": 1.8596233664993444e-05, + "loss": 1.019, "step": 6902 }, { - "epoch": 0.19588535754824063, + "epoch": 0.19561336393777098, "grad_norm": 0.0, - "learning_rate": 1.8591443192678353e-05, - "loss": 1.0975, + "learning_rate": 1.8595764704334486e-05, + "loss": 1.0063, "step": 6903 }, { - "epoch": 0.19591373439273552, + "epoch": 0.19564170138003345, "grad_norm": 0.0, - "learning_rate": 1.8590972833478562e-05, - "loss": 1.0377, + "learning_rate": 1.8595295671270203e-05, + "loss": 1.1426, "step": 6904 }, { - "epoch": 0.19594211123723043, + "epoch": 0.1956700388222959, "grad_norm": 0.0, - "learning_rate": 1.859050240171044e-05, - "loss": 0.8591, + "learning_rate": 1.859482656580456e-05, + "loss": 0.9895, "step": 6905 }, { - "epoch": 0.19597048808172532, + "epoch": 0.19569837626455835, "grad_norm": 0.0, - "learning_rate": 1.8590031897377954e-05, - "loss": 0.9405, + "learning_rate": 1.8594357387941498e-05, + "loss": 1.0516, "step": 6906 }, { - "epoch": 0.1959988649262202, + "epoch": 0.19572671370682082, "grad_norm": 0.0, - "learning_rate": 1.858956132048509e-05, - "loss": 0.9268, + "learning_rate": 1.859388813768497e-05, + "loss": 1.1113, "step": 6907 }, { - "epoch": 0.1960272417707151, + "epoch": 0.1957550511490833, "grad_norm": 0.0, - "learning_rate": 1.858909067103581e-05, - "loss": 0.9957, + "learning_rate": 1.8593418815038937e-05, + "loss": 0.9952, "step": 6908 }, { - "epoch": 0.19605561861520998, + "epoch": 0.19578338859134575, "grad_norm": 0.0, - "learning_rate": 1.8588619949034094e-05, - "loss": 1.0208, + "learning_rate": 1.859294942000734e-05, + "loss": 0.95, "step": 6909 }, { - "epoch": 0.19608399545970487, + "epoch": 0.19581172603360822, "grad_norm": 0.0, - "learning_rate": 1.8588149154483922e-05, - "loss": 1.0469, + "learning_rate": 1.8592479952594145e-05, + "loss": 1.0372, "step": 6910 }, { - "epoch": 0.19611237230419978, + "epoch": 0.19584006347587066, "grad_norm": 0.0, - "learning_rate": 1.8587678287389265e-05, - "loss": 1.0103, + "learning_rate": 1.8592010412803297e-05, + "loss": 0.9911, "step": 6911 }, { - "epoch": 0.19614074914869467, + "epoch": 0.19586840091813312, "grad_norm": 0.0, - "learning_rate": 1.8587207347754106e-05, - "loss": 0.939, + "learning_rate": 1.859154080063876e-05, + "loss": 0.975, "step": 6912 }, { - "epoch": 0.19616912599318956, + "epoch": 0.1958967383603956, "grad_norm": 0.0, - "learning_rate": 1.858673633558242e-05, - "loss": 1.0533, + "learning_rate": 1.8591071116104476e-05, + "loss": 0.9304, "step": 6913 }, { - "epoch": 0.19619750283768445, + "epoch": 0.19592507580265806, "grad_norm": 0.0, - "learning_rate": 1.8586265250878185e-05, - "loss": 1.0433, + "learning_rate": 1.8590601359204417e-05, + "loss": 1.1116, "step": 6914 }, { - "epoch": 0.19622587968217933, + "epoch": 0.19595341324492052, "grad_norm": 0.0, - "learning_rate": 1.8585794093645385e-05, - "loss": 1.059, + "learning_rate": 1.8590131529942526e-05, + "loss": 1.0267, "step": 6915 }, { - "epoch": 0.19625425652667422, + "epoch": 0.195981750687183, "grad_norm": 0.0, - "learning_rate": 1.8585322863887995e-05, - "loss": 1.0774, + "learning_rate": 1.858966162832277e-05, + "loss": 1.0247, "step": 6916 }, { - "epoch": 0.19628263337116914, + "epoch": 0.19601008812944543, "grad_norm": 0.0, - "learning_rate": 1.8584851561609995e-05, - "loss": 1.1074, + "learning_rate": 1.8589191654349107e-05, + "loss": 1.0288, "step": 6917 }, { - "epoch": 0.19631101021566402, + "epoch": 0.1960384255717079, "grad_norm": 0.0, - "learning_rate": 1.8584380186815366e-05, - "loss": 0.8846, + "learning_rate": 1.858872160802549e-05, + "loss": 1.0579, "step": 6918 }, { - "epoch": 0.1963393870601589, + "epoch": 0.19606676301397036, "grad_norm": 0.0, - "learning_rate": 1.8583908739508097e-05, - "loss": 1.189, + "learning_rate": 1.8588251489355883e-05, + "loss": 0.9895, "step": 6919 }, { - "epoch": 0.1963677639046538, + "epoch": 0.19609510045623282, "grad_norm": 0.0, - "learning_rate": 1.858343721969216e-05, - "loss": 1.0758, + "learning_rate": 1.858778129834425e-05, + "loss": 1.0884, "step": 6920 }, { - "epoch": 0.19639614074914868, + "epoch": 0.1961234378984953, "grad_norm": 0.0, - "learning_rate": 1.8582965627371546e-05, - "loss": 1.0699, + "learning_rate": 1.8587311034994537e-05, + "loss": 1.0264, "step": 6921 }, { - "epoch": 0.1964245175936436, + "epoch": 0.19615177534075776, "grad_norm": 0.0, - "learning_rate": 1.8582493962550235e-05, - "loss": 0.9032, + "learning_rate": 1.858684069931072e-05, + "loss": 1.0261, "step": 6922 }, { - "epoch": 0.1964528944381385, + "epoch": 0.1961801127830202, "grad_norm": 0.0, - "learning_rate": 1.8582022225232213e-05, - "loss": 1.0258, + "learning_rate": 1.858637029129675e-05, + "loss": 0.9832, "step": 6923 }, { - "epoch": 0.19648127128263337, + "epoch": 0.19620845022528266, "grad_norm": 0.0, - "learning_rate": 1.858155041542146e-05, - "loss": 1.0788, + "learning_rate": 1.85858998109566e-05, + "loss": 1.1477, "step": 6924 }, { - "epoch": 0.19650964812712826, + "epoch": 0.19623678766754513, "grad_norm": 0.0, - "learning_rate": 1.8581078533121965e-05, - "loss": 0.9514, + "learning_rate": 1.8585429258294226e-05, + "loss": 1.0325, "step": 6925 }, { - "epoch": 0.19653802497162315, + "epoch": 0.1962651251098076, "grad_norm": 0.0, - "learning_rate": 1.8580606578337718e-05, - "loss": 1.1099, + "learning_rate": 1.858495863331359e-05, + "loss": 1.0367, "step": 6926 }, { - "epoch": 0.19656640181611804, + "epoch": 0.19629346255207006, "grad_norm": 0.0, - "learning_rate": 1.8580134551072698e-05, - "loss": 0.8672, + "learning_rate": 1.8584487936018663e-05, + "loss": 1.0294, "step": 6927 }, { - "epoch": 0.19659477866061295, + "epoch": 0.19632179999433252, "grad_norm": 0.0, - "learning_rate": 1.8579662451330898e-05, - "loss": 0.8967, + "learning_rate": 1.8584017166413406e-05, + "loss": 0.9694, "step": 6928 }, { - "epoch": 0.19662315550510784, + "epoch": 0.19635013743659496, "grad_norm": 0.0, - "learning_rate": 1.8579190279116305e-05, - "loss": 1.0659, + "learning_rate": 1.8583546324501783e-05, + "loss": 0.9823, "step": 6929 }, { - "epoch": 0.19665153234960273, + "epoch": 0.19637847487885743, "grad_norm": 0.0, - "learning_rate": 1.85787180344329e-05, - "loss": 1.1232, + "learning_rate": 1.8583075410287764e-05, + "loss": 1.0675, "step": 6930 }, { - "epoch": 0.1966799091940976, + "epoch": 0.1964068123211199, "grad_norm": 0.0, - "learning_rate": 1.8578245717284682e-05, - "loss": 0.9107, + "learning_rate": 1.858260442377531e-05, + "loss": 0.8871, "step": 6931 }, { - "epoch": 0.1967082860385925, + "epoch": 0.19643514976338236, "grad_norm": 0.0, - "learning_rate": 1.8577773327675637e-05, - "loss": 0.9855, + "learning_rate": 1.8582133364968394e-05, + "loss": 1.0392, "step": 6932 }, { - "epoch": 0.1967366628830874, + "epoch": 0.19646348720564483, "grad_norm": 0.0, - "learning_rate": 1.8577300865609755e-05, - "loss": 1.009, + "learning_rate": 1.8581662233870985e-05, + "loss": 1.0851, "step": 6933 }, { - "epoch": 0.1967650397275823, + "epoch": 0.1964918246479073, "grad_norm": 0.0, - "learning_rate": 1.8576828331091028e-05, - "loss": 0.9841, + "learning_rate": 1.8581191030487046e-05, + "loss": 0.8895, "step": 6934 }, { - "epoch": 0.1967934165720772, + "epoch": 0.19652016209016973, "grad_norm": 0.0, - "learning_rate": 1.8576355724123446e-05, - "loss": 1.1076, + "learning_rate": 1.8580719754820548e-05, + "loss": 1.0626, "step": 6935 }, { - "epoch": 0.19682179341657208, + "epoch": 0.1965484995324322, "grad_norm": 0.0, - "learning_rate": 1.8575883044711e-05, - "loss": 0.9853, + "learning_rate": 1.858024840687546e-05, + "loss": 0.9939, "step": 6936 }, { - "epoch": 0.19685017026106696, + "epoch": 0.19657683697469466, "grad_norm": 0.0, - "learning_rate": 1.8575410292857687e-05, - "loss": 1.1228, + "learning_rate": 1.8579776986655753e-05, + "loss": 0.996, "step": 6937 }, { - "epoch": 0.19687854710556185, + "epoch": 0.19660517441695713, "grad_norm": 0.0, - "learning_rate": 1.8574937468567495e-05, - "loss": 1.079, + "learning_rate": 1.8579305494165402e-05, + "loss": 0.9833, "step": 6938 }, { - "epoch": 0.19690692395005677, + "epoch": 0.1966335118592196, "grad_norm": 0.0, - "learning_rate": 1.857446457184442e-05, - "loss": 0.9943, + "learning_rate": 1.857883392940837e-05, + "loss": 0.9805, "step": 6939 }, { - "epoch": 0.19693530079455165, + "epoch": 0.19666184930148206, "grad_norm": 0.0, - "learning_rate": 1.8573991602692457e-05, - "loss": 1.0206, + "learning_rate": 1.857836229238864e-05, + "loss": 1.0186, "step": 6940 }, { - "epoch": 0.19696367763904654, + "epoch": 0.1966901867437445, "grad_norm": 0.0, - "learning_rate": 1.8573518561115604e-05, - "loss": 0.906, + "learning_rate": 1.8577890583110173e-05, + "loss": 1.015, "step": 6941 }, { - "epoch": 0.19699205448354143, + "epoch": 0.19671852418600697, "grad_norm": 0.0, - "learning_rate": 1.8573045447117854e-05, - "loss": 0.988, + "learning_rate": 1.8577418801576953e-05, + "loss": 1.1379, "step": 6942 }, { - "epoch": 0.19702043132803632, + "epoch": 0.19674686162826943, "grad_norm": 0.0, - "learning_rate": 1.8572572260703205e-05, - "loss": 0.9206, + "learning_rate": 1.857694694779295e-05, + "loss": 1.0238, "step": 6943 }, { - "epoch": 0.1970488081725312, + "epoch": 0.1967751990705319, "grad_norm": 0.0, - "learning_rate": 1.857209900187565e-05, - "loss": 0.9372, + "learning_rate": 1.8576475021762132e-05, + "loss": 0.9608, "step": 6944 }, { - "epoch": 0.19707718501702612, + "epoch": 0.19680353651279436, "grad_norm": 0.0, - "learning_rate": 1.8571625670639192e-05, - "loss": 0.9819, + "learning_rate": 1.8576003023488486e-05, + "loss": 1.0286, "step": 6945 }, { - "epoch": 0.197105561861521, + "epoch": 0.19683187395505683, "grad_norm": 0.0, - "learning_rate": 1.8571152266997824e-05, - "loss": 1.0094, + "learning_rate": 1.8575530952975977e-05, + "loss": 1.1588, "step": 6946 }, { - "epoch": 0.1971339387060159, + "epoch": 0.19686021139731927, "grad_norm": 0.0, - "learning_rate": 1.857067879095555e-05, - "loss": 0.9542, + "learning_rate": 1.857505881022859e-05, + "loss": 1.0565, "step": 6947 }, { - "epoch": 0.19716231555051078, + "epoch": 0.19688854883958173, "grad_norm": 0.0, - "learning_rate": 1.8570205242516366e-05, - "loss": 1.0209, + "learning_rate": 1.8574586595250298e-05, + "loss": 1.0106, "step": 6948 }, { - "epoch": 0.19719069239500567, + "epoch": 0.1969168862818442, "grad_norm": 0.0, - "learning_rate": 1.856973162168427e-05, - "loss": 0.919, + "learning_rate": 1.8574114308045077e-05, + "loss": 1.0414, "step": 6949 }, { - "epoch": 0.19721906923950056, + "epoch": 0.19694522372410667, "grad_norm": 0.0, - "learning_rate": 1.856925792846327e-05, - "loss": 0.9335, + "learning_rate": 1.857364194861691e-05, + "loss": 1.0355, "step": 6950 }, { - "epoch": 0.19724744608399547, + "epoch": 0.19697356116636913, "grad_norm": 0.0, - "learning_rate": 1.856878416285736e-05, - "loss": 0.9459, + "learning_rate": 1.857316951696977e-05, + "loss": 1.0084, "step": 6951 }, { - "epoch": 0.19727582292849036, + "epoch": 0.1970018986086316, "grad_norm": 0.0, - "learning_rate": 1.856831032487055e-05, - "loss": 1.0399, + "learning_rate": 1.8572697013107643e-05, + "loss": 0.9507, "step": 6952 }, { - "epoch": 0.19730419977298524, + "epoch": 0.19703023605089404, "grad_norm": 0.0, - "learning_rate": 1.8567836414506835e-05, - "loss": 1.0422, + "learning_rate": 1.8572224437034503e-05, + "loss": 1.0133, "step": 6953 }, { - "epoch": 0.19733257661748013, + "epoch": 0.1970585734931565, "grad_norm": 0.0, - "learning_rate": 1.856736243177022e-05, - "loss": 0.9853, + "learning_rate": 1.8571751788754336e-05, + "loss": 0.9959, "step": 6954 }, { - "epoch": 0.19736095346197502, + "epoch": 0.19708691093541897, "grad_norm": 0.0, - "learning_rate": 1.8566888376664707e-05, - "loss": 0.9836, + "learning_rate": 1.857127906827112e-05, + "loss": 1.0666, "step": 6955 }, { - "epoch": 0.1973893303064699, + "epoch": 0.19711524837768143, "grad_norm": 0.0, - "learning_rate": 1.8566414249194305e-05, - "loss": 0.9632, + "learning_rate": 1.8570806275588832e-05, + "loss": 0.9408, "step": 6956 }, { - "epoch": 0.19741770715096482, + "epoch": 0.1971435858199439, "grad_norm": 0.0, - "learning_rate": 1.8565940049363017e-05, - "loss": 1.1072, + "learning_rate": 1.8570333410711464e-05, + "loss": 1.063, "step": 6957 }, { - "epoch": 0.1974460839954597, + "epoch": 0.19717192326220637, "grad_norm": 0.0, - "learning_rate": 1.8565465777174848e-05, - "loss": 0.9996, + "learning_rate": 1.8569860473642996e-05, + "loss": 1.1077, "step": 6958 }, { - "epoch": 0.1974744608399546, + "epoch": 0.1972002607044688, "grad_norm": 0.0, - "learning_rate": 1.8564991432633805e-05, - "loss": 1.0824, + "learning_rate": 1.8569387464387412e-05, + "loss": 1.0207, "step": 6959 }, { - "epoch": 0.19750283768444948, + "epoch": 0.19722859814673127, "grad_norm": 0.0, - "learning_rate": 1.8564517015743894e-05, - "loss": 0.9532, + "learning_rate": 1.8568914382948694e-05, + "loss": 0.9679, "step": 6960 }, { - "epoch": 0.19753121452894437, + "epoch": 0.19725693558899374, "grad_norm": 0.0, - "learning_rate": 1.8564042526509123e-05, - "loss": 1.0707, + "learning_rate": 1.856844122933083e-05, + "loss": 1.027, "step": 6961 }, { - "epoch": 0.19755959137343929, + "epoch": 0.1972852730312562, "grad_norm": 0.0, - "learning_rate": 1.85635679649335e-05, - "loss": 1.0714, + "learning_rate": 1.85679680035378e-05, + "loss": 0.9443, "step": 6962 }, { - "epoch": 0.19758796821793417, + "epoch": 0.19731361047351867, "grad_norm": 0.0, - "learning_rate": 1.8563093331021034e-05, - "loss": 1.0455, + "learning_rate": 1.8567494705573595e-05, + "loss": 1.0526, "step": 6963 }, { - "epoch": 0.19761634506242906, + "epoch": 0.19734194791578114, "grad_norm": 0.0, - "learning_rate": 1.856261862477573e-05, - "loss": 0.9205, + "learning_rate": 1.8567021335442202e-05, + "loss": 1.0328, "step": 6964 }, { - "epoch": 0.19764472190692395, + "epoch": 0.19737028535804357, "grad_norm": 0.0, - "learning_rate": 1.856214384620161e-05, - "loss": 1.081, + "learning_rate": 1.8566547893147607e-05, + "loss": 0.9847, "step": 6965 }, { - "epoch": 0.19767309875141884, + "epoch": 0.19739862280030604, "grad_norm": 0.0, - "learning_rate": 1.8561668995302668e-05, - "loss": 0.893, + "learning_rate": 1.8566074378693795e-05, + "loss": 0.9215, "step": 6966 }, { - "epoch": 0.19770147559591372, + "epoch": 0.1974269602425685, "grad_norm": 0.0, - "learning_rate": 1.8561194072082925e-05, - "loss": 1.1189, + "learning_rate": 1.856560079208476e-05, + "loss": 1.0582, "step": 6967 }, { - "epoch": 0.19772985244040864, + "epoch": 0.19745529768483097, "grad_norm": 0.0, - "learning_rate": 1.856071907654639e-05, - "loss": 1.0211, + "learning_rate": 1.8565127133324487e-05, + "loss": 1.0391, "step": 6968 }, { - "epoch": 0.19775822928490353, + "epoch": 0.19748363512709344, "grad_norm": 0.0, - "learning_rate": 1.8560244008697076e-05, - "loss": 0.9381, + "learning_rate": 1.856465340241697e-05, + "loss": 1.0236, "step": 6969 }, { - "epoch": 0.1977866061293984, + "epoch": 0.1975119725693559, "grad_norm": 0.0, - "learning_rate": 1.8559768868539e-05, - "loss": 1.0726, + "learning_rate": 1.8564179599366195e-05, + "loss": 0.9909, "step": 6970 }, { - "epoch": 0.1978149829738933, + "epoch": 0.19754031001161834, "grad_norm": 0.0, - "learning_rate": 1.8559293656076167e-05, - "loss": 1.0939, + "learning_rate": 1.8563705724176158e-05, + "loss": 0.877, "step": 6971 }, { - "epoch": 0.1978433598183882, + "epoch": 0.1975686474538808, "grad_norm": 0.0, - "learning_rate": 1.8558818371312598e-05, - "loss": 0.9178, + "learning_rate": 1.8563231776850843e-05, + "loss": 0.9911, "step": 6972 }, { - "epoch": 0.19787173666288307, + "epoch": 0.19759698489614327, "grad_norm": 0.0, - "learning_rate": 1.8558343014252304e-05, - "loss": 0.9688, + "learning_rate": 1.856275775739425e-05, + "loss": 1.093, "step": 6973 }, { - "epoch": 0.197900113507378, + "epoch": 0.19762532233840574, "grad_norm": 0.0, - "learning_rate": 1.8557867584899307e-05, - "loss": 0.9713, + "learning_rate": 1.856228366581037e-05, + "loss": 0.8883, "step": 6974 }, { - "epoch": 0.19792849035187288, + "epoch": 0.1976536597806682, "grad_norm": 0.0, - "learning_rate": 1.855739208325761e-05, - "loss": 1.0505, + "learning_rate": 1.856180950210319e-05, + "loss": 0.9356, "step": 6975 }, { - "epoch": 0.19795686719636776, + "epoch": 0.19768199722293067, "grad_norm": 0.0, - "learning_rate": 1.855691650933124e-05, - "loss": 1.0182, + "learning_rate": 1.8561335266276713e-05, + "loss": 1.0343, "step": 6976 }, { - "epoch": 0.19798524404086265, + "epoch": 0.1977103346651931, "grad_norm": 0.0, - "learning_rate": 1.8556440863124215e-05, - "loss": 0.9184, + "learning_rate": 1.856086095833493e-05, + "loss": 0.981, "step": 6977 }, { - "epoch": 0.19801362088535754, + "epoch": 0.19773867210745558, "grad_norm": 0.0, - "learning_rate": 1.855596514464055e-05, - "loss": 1.0832, + "learning_rate": 1.8560386578281835e-05, + "loss": 0.8764, "step": 6978 }, { - "epoch": 0.19804199772985245, + "epoch": 0.19776700954971804, "grad_norm": 0.0, - "learning_rate": 1.855548935388426e-05, - "loss": 0.9012, + "learning_rate": 1.8559912126121428e-05, + "loss": 0.8153, "step": 6979 }, { - "epoch": 0.19807037457434734, + "epoch": 0.1977953469919805, "grad_norm": 0.0, - "learning_rate": 1.8555013490859367e-05, - "loss": 1.1046, + "learning_rate": 1.85594376018577e-05, + "loss": 0.9361, "step": 6980 }, { - "epoch": 0.19809875141884223, + "epoch": 0.19782368443424297, "grad_norm": 0.0, - "learning_rate": 1.855453755556989e-05, - "loss": 1.006, + "learning_rate": 1.855896300549465e-05, + "loss": 1.0044, "step": 6981 }, { - "epoch": 0.19812712826333712, + "epoch": 0.1978520218765054, "grad_norm": 0.0, - "learning_rate": 1.8554061548019847e-05, - "loss": 0.9231, + "learning_rate": 1.855848833703628e-05, + "loss": 0.9753, "step": 6982 }, { - "epoch": 0.198155505107832, + "epoch": 0.19788035931876788, "grad_norm": 0.0, - "learning_rate": 1.8553585468213264e-05, - "loss": 1.0136, + "learning_rate": 1.8558013596486578e-05, + "loss": 1.1175, "step": 6983 }, { - "epoch": 0.1981838819523269, + "epoch": 0.19790869676103034, "grad_norm": 0.0, - "learning_rate": 1.855310931615416e-05, - "loss": 0.9612, + "learning_rate": 1.8557538783849555e-05, + "loss": 1.1322, "step": 6984 }, { - "epoch": 0.1982122587968218, + "epoch": 0.1979370342032928, "grad_norm": 0.0, - "learning_rate": 1.855263309184656e-05, - "loss": 1.0546, + "learning_rate": 1.8557063899129205e-05, + "loss": 0.9397, "step": 6985 }, { - "epoch": 0.1982406356413167, + "epoch": 0.19796537164555528, "grad_norm": 0.0, - "learning_rate": 1.8552156795294482e-05, - "loss": 1.0896, + "learning_rate": 1.8556588942329522e-05, + "loss": 1.0768, "step": 6986 }, { - "epoch": 0.19826901248581158, + "epoch": 0.19799370908781774, "grad_norm": 0.0, - "learning_rate": 1.855168042650195e-05, - "loss": 0.9965, + "learning_rate": 1.855611391345452e-05, + "loss": 1.0821, "step": 6987 }, { - "epoch": 0.19829738933030647, + "epoch": 0.19802204653008018, "grad_norm": 0.0, - "learning_rate": 1.855120398547299e-05, - "loss": 1.0424, + "learning_rate": 1.855563881250819e-05, + "loss": 1.056, "step": 6988 }, { - "epoch": 0.19832576617480135, + "epoch": 0.19805038397234265, "grad_norm": 0.0, - "learning_rate": 1.8550727472211624e-05, - "loss": 0.981, + "learning_rate": 1.8555163639494537e-05, + "loss": 1.0912, "step": 6989 }, { - "epoch": 0.19835414301929624, + "epoch": 0.1980787214146051, "grad_norm": 0.0, - "learning_rate": 1.855025088672188e-05, - "loss": 0.9515, + "learning_rate": 1.8554688394417566e-05, + "loss": 1.092, "step": 6990 }, { - "epoch": 0.19838251986379116, + "epoch": 0.19810705885686758, "grad_norm": 0.0, - "learning_rate": 1.8549774229007784e-05, - "loss": 0.9498, + "learning_rate": 1.8554213077281275e-05, + "loss": 0.8805, "step": 6991 }, { - "epoch": 0.19841089670828604, + "epoch": 0.19813539629913005, "grad_norm": 0.0, - "learning_rate": 1.8549297499073358e-05, - "loss": 1.1767, + "learning_rate": 1.8553737688089674e-05, + "loss": 1.0039, "step": 6992 }, { - "epoch": 0.19843927355278093, + "epoch": 0.1981637337413925, "grad_norm": 0.0, - "learning_rate": 1.854882069692263e-05, - "loss": 0.9299, + "learning_rate": 1.8553262226846763e-05, + "loss": 1.1237, "step": 6993 }, { - "epoch": 0.19846765039727582, + "epoch": 0.19819207118365495, "grad_norm": 0.0, - "learning_rate": 1.8548343822559632e-05, - "loss": 0.9475, + "learning_rate": 1.855278669355655e-05, + "loss": 1.0616, "step": 6994 }, { - "epoch": 0.1984960272417707, + "epoch": 0.19822040862591742, "grad_norm": 0.0, - "learning_rate": 1.8547866875988392e-05, - "loss": 0.9358, + "learning_rate": 1.855231108822303e-05, + "loss": 0.971, "step": 6995 }, { - "epoch": 0.1985244040862656, + "epoch": 0.19824874606817988, "grad_norm": 0.0, - "learning_rate": 1.8547389857212933e-05, - "loss": 0.9912, + "learning_rate": 1.8551835410850227e-05, + "loss": 1.0582, "step": 6996 }, { - "epoch": 0.1985527809307605, + "epoch": 0.19827708351044235, "grad_norm": 0.0, - "learning_rate": 1.854691276623729e-05, - "loss": 0.9726, + "learning_rate": 1.8551359661442134e-05, + "loss": 0.9644, "step": 6997 }, { - "epoch": 0.1985811577752554, + "epoch": 0.19830542095270481, "grad_norm": 0.0, - "learning_rate": 1.8546435603065488e-05, - "loss": 1.0623, + "learning_rate": 1.8550883840002766e-05, + "loss": 0.9508, "step": 6998 }, { - "epoch": 0.19860953461975028, + "epoch": 0.19833375839496728, "grad_norm": 0.0, - "learning_rate": 1.8545958367701563e-05, - "loss": 1.0974, + "learning_rate": 1.8550407946536127e-05, + "loss": 1.038, "step": 6999 }, { - "epoch": 0.19863791146424517, + "epoch": 0.19836209583722972, "grad_norm": 0.0, - "learning_rate": 1.8545481060149543e-05, - "loss": 1.0553, + "learning_rate": 1.8549931981046226e-05, + "loss": 1.0116, "step": 7000 }, { - "epoch": 0.19866628830874006, + "epoch": 0.19839043327949218, "grad_norm": 0.0, - "learning_rate": 1.854500368041346e-05, - "loss": 0.9204, + "learning_rate": 1.8549455943537077e-05, + "loss": 0.9402, "step": 7001 }, { - "epoch": 0.19869466515323497, + "epoch": 0.19841877072175465, "grad_norm": 0.0, - "learning_rate": 1.8544526228497342e-05, - "loss": 0.9772, + "learning_rate": 1.854897983401268e-05, + "loss": 1.0445, "step": 7002 }, { - "epoch": 0.19872304199772986, + "epoch": 0.19844710816401712, "grad_norm": 0.0, - "learning_rate": 1.8544048704405232e-05, - "loss": 0.9143, + "learning_rate": 1.8548503652477054e-05, + "loss": 1.0131, "step": 7003 }, { - "epoch": 0.19875141884222475, + "epoch": 0.19847544560627958, "grad_norm": 0.0, - "learning_rate": 1.8543571108141158e-05, - "loss": 0.9504, + "learning_rate": 1.854802739893421e-05, + "loss": 0.9049, "step": 7004 }, { - "epoch": 0.19877979568671963, + "epoch": 0.19850378304854205, "grad_norm": 0.0, - "learning_rate": 1.854309343970915e-05, - "loss": 0.9332, + "learning_rate": 1.8547551073388152e-05, + "loss": 0.955, "step": 7005 }, { - "epoch": 0.19880817253121452, + "epoch": 0.1985321204908045, "grad_norm": 0.0, - "learning_rate": 1.8542615699113255e-05, - "loss": 1.0026, + "learning_rate": 1.85470746758429e-05, + "loss": 1.031, "step": 7006 }, { - "epoch": 0.1988365493757094, + "epoch": 0.19856045793306695, "grad_norm": 0.0, - "learning_rate": 1.85421378863575e-05, - "loss": 1.0354, + "learning_rate": 1.854659820630246e-05, + "loss": 0.9322, "step": 7007 }, { - "epoch": 0.19886492622020432, + "epoch": 0.19858879537532942, "grad_norm": 0.0, - "learning_rate": 1.854166000144592e-05, - "loss": 0.9905, + "learning_rate": 1.8546121664770857e-05, + "loss": 0.9903, "step": 7008 }, { - "epoch": 0.1988933030646992, + "epoch": 0.19861713281759188, "grad_norm": 0.0, - "learning_rate": 1.854118204438255e-05, - "loss": 1.0522, + "learning_rate": 1.8545645051252094e-05, + "loss": 1.0319, "step": 7009 }, { - "epoch": 0.1989216799091941, + "epoch": 0.19864547025985435, "grad_norm": 0.0, - "learning_rate": 1.854070401517144e-05, - "loss": 0.9101, + "learning_rate": 1.8545168365750188e-05, + "loss": 1.0678, "step": 7010 }, { - "epoch": 0.198950056753689, + "epoch": 0.19867380770211682, "grad_norm": 0.0, - "learning_rate": 1.854022591381661e-05, - "loss": 1.0935, + "learning_rate": 1.8544691608269156e-05, + "loss": 1.0032, "step": 7011 }, { - "epoch": 0.19897843359818387, + "epoch": 0.19870214514437926, "grad_norm": 0.0, - "learning_rate": 1.8539747740322114e-05, - "loss": 1.0582, + "learning_rate": 1.8544214778813018e-05, + "loss": 0.9551, "step": 7012 }, { - "epoch": 0.19900681044267876, + "epoch": 0.19873048258664172, "grad_norm": 0.0, - "learning_rate": 1.8539269494691984e-05, - "loss": 0.9469, + "learning_rate": 1.8543737877385778e-05, + "loss": 1.0255, "step": 7013 }, { - "epoch": 0.19903518728717368, + "epoch": 0.1987588200289042, "grad_norm": 0.0, - "learning_rate": 1.853879117693026e-05, - "loss": 0.9819, + "learning_rate": 1.8543260903991467e-05, + "loss": 0.9578, "step": 7014 }, { - "epoch": 0.19906356413166856, + "epoch": 0.19878715747116665, "grad_norm": 0.0, - "learning_rate": 1.8538312787040983e-05, - "loss": 0.8992, + "learning_rate": 1.85427838586341e-05, + "loss": 1.0775, "step": 7015 }, { - "epoch": 0.19909194097616345, + "epoch": 0.19881549491342912, "grad_norm": 0.0, - "learning_rate": 1.8537834325028196e-05, - "loss": 1.1005, + "learning_rate": 1.8542306741317686e-05, + "loss": 1.0111, "step": 7016 }, { - "epoch": 0.19912031782065834, + "epoch": 0.19884383235569159, "grad_norm": 0.0, - "learning_rate": 1.8537355790895934e-05, - "loss": 0.9671, + "learning_rate": 1.854182955204625e-05, + "loss": 1.1158, "step": 7017 }, { - "epoch": 0.19914869466515323, + "epoch": 0.19887216979795402, "grad_norm": 0.0, - "learning_rate": 1.853687718464825e-05, - "loss": 0.8914, + "learning_rate": 1.8541352290823816e-05, + "loss": 1.1788, "step": 7018 }, { - "epoch": 0.19917707150964814, + "epoch": 0.1989005072402165, "grad_norm": 0.0, - "learning_rate": 1.8536398506289176e-05, - "loss": 0.9772, + "learning_rate": 1.85408749576544e-05, + "loss": 0.9448, "step": 7019 }, { - "epoch": 0.19920544835414303, + "epoch": 0.19892884468247896, "grad_norm": 0.0, - "learning_rate": 1.853591975582276e-05, - "loss": 1.0508, + "learning_rate": 1.854039755254202e-05, + "loss": 0.9776, "step": 7020 }, { - "epoch": 0.19923382519863791, + "epoch": 0.19895718212474142, "grad_norm": 0.0, - "learning_rate": 1.853544093325305e-05, - "loss": 1.0912, + "learning_rate": 1.85399200754907e-05, + "loss": 0.9149, "step": 7021 }, { - "epoch": 0.1992622020431328, + "epoch": 0.1989855195670039, "grad_norm": 0.0, - "learning_rate": 1.8534962038584083e-05, - "loss": 0.9293, + "learning_rate": 1.8539442526504457e-05, + "loss": 0.9828, "step": 7022 }, { - "epoch": 0.1992905788876277, + "epoch": 0.19901385700926635, "grad_norm": 0.0, - "learning_rate": 1.853448307181991e-05, - "loss": 0.9554, + "learning_rate": 1.8538964905587327e-05, + "loss": 1.1036, "step": 7023 }, { - "epoch": 0.19931895573212258, + "epoch": 0.1990421944515288, "grad_norm": 0.0, - "learning_rate": 1.8534004032964574e-05, - "loss": 0.9429, + "learning_rate": 1.8538487212743322e-05, + "loss": 1.0492, "step": 7024 }, { - "epoch": 0.1993473325766175, + "epoch": 0.19907053189379126, "grad_norm": 0.0, - "learning_rate": 1.8533524922022123e-05, - "loss": 1.0567, + "learning_rate": 1.8538009447976467e-05, + "loss": 1.0533, "step": 7025 }, { - "epoch": 0.19937570942111238, + "epoch": 0.19909886933605372, "grad_norm": 0.0, - "learning_rate": 1.8533045738996602e-05, - "loss": 0.9076, + "learning_rate": 1.853753161129079e-05, + "loss": 1.1261, "step": 7026 }, { - "epoch": 0.19940408626560727, + "epoch": 0.1991272067783162, "grad_norm": 0.0, - "learning_rate": 1.853256648389206e-05, - "loss": 1.0133, + "learning_rate": 1.8537053702690314e-05, + "loss": 0.9309, "step": 7027 }, { - "epoch": 0.19943246311010215, + "epoch": 0.19915554422057866, "grad_norm": 0.0, - "learning_rate": 1.8532087156712547e-05, - "loss": 1.0799, + "learning_rate": 1.853657572217906e-05, + "loss": 0.9391, "step": 7028 }, { - "epoch": 0.19946083995459704, + "epoch": 0.19918388166284112, "grad_norm": 0.0, - "learning_rate": 1.853160775746211e-05, - "loss": 1.0013, + "learning_rate": 1.8536097669761066e-05, + "loss": 0.8529, "step": 7029 }, { - "epoch": 0.19948921679909193, + "epoch": 0.19921221910510356, "grad_norm": 0.0, - "learning_rate": 1.85311282861448e-05, - "loss": 1.0809, + "learning_rate": 1.8535619545440345e-05, + "loss": 1.1038, "step": 7030 }, { - "epoch": 0.19951759364358684, + "epoch": 0.19924055654736603, "grad_norm": 0.0, - "learning_rate": 1.853064874276466e-05, - "loss": 1.0508, + "learning_rate": 1.8535141349220937e-05, + "loss": 1.0213, "step": 7031 }, { - "epoch": 0.19954597048808173, + "epoch": 0.1992688939896285, "grad_norm": 0.0, - "learning_rate": 1.853016912732575e-05, - "loss": 0.9376, + "learning_rate": 1.853466308110686e-05, + "loss": 0.9362, "step": 7032 }, { - "epoch": 0.19957434733257662, + "epoch": 0.19929723143189096, "grad_norm": 0.0, - "learning_rate": 1.852968943983212e-05, - "loss": 0.9757, + "learning_rate": 1.853418474110215e-05, + "loss": 1.033, "step": 7033 }, { - "epoch": 0.1996027241770715, + "epoch": 0.19932556887415342, "grad_norm": 0.0, - "learning_rate": 1.852920968028782e-05, - "loss": 1.0503, + "learning_rate": 1.853370632921083e-05, + "loss": 0.9853, "step": 7034 }, { - "epoch": 0.1996311010215664, + "epoch": 0.1993539063164159, "grad_norm": 0.0, - "learning_rate": 1.85287298486969e-05, - "loss": 1.0378, + "learning_rate": 1.8533227845436932e-05, + "loss": 0.8502, "step": 7035 }, { - "epoch": 0.19965947786606128, + "epoch": 0.19938224375867833, "grad_norm": 0.0, - "learning_rate": 1.852824994506342e-05, - "loss": 1.0396, + "learning_rate": 1.853274928978449e-05, + "loss": 0.9072, "step": 7036 }, { - "epoch": 0.1996878547105562, + "epoch": 0.1994105812009408, "grad_norm": 0.0, - "learning_rate": 1.8527769969391425e-05, - "loss": 0.957, + "learning_rate": 1.8532270662257528e-05, + "loss": 1.0056, "step": 7037 }, { - "epoch": 0.19971623155505108, + "epoch": 0.19943891864320326, "grad_norm": 0.0, - "learning_rate": 1.852728992168497e-05, - "loss": 0.9576, + "learning_rate": 1.8531791962860084e-05, + "loss": 0.9077, "step": 7038 }, { - "epoch": 0.19974460839954597, + "epoch": 0.19946725608546573, "grad_norm": 0.0, - "learning_rate": 1.8526809801948123e-05, - "loss": 1.0345, + "learning_rate": 1.853131319159619e-05, + "loss": 1.0266, "step": 7039 }, { - "epoch": 0.19977298524404086, + "epoch": 0.1994955935277282, "grad_norm": 0.0, - "learning_rate": 1.8526329610184922e-05, - "loss": 0.8144, + "learning_rate": 1.853083434846987e-05, + "loss": 1.0316, "step": 7040 }, { - "epoch": 0.19980136208853574, + "epoch": 0.19952393096999066, "grad_norm": 0.0, - "learning_rate": 1.8525849346399435e-05, - "loss": 0.9803, + "learning_rate": 1.8530355433485172e-05, + "loss": 1.0244, "step": 7041 }, { - "epoch": 0.19982973893303066, + "epoch": 0.1995522684122531, "grad_norm": 0.0, - "learning_rate": 1.8525369010595717e-05, - "loss": 0.9026, + "learning_rate": 1.8529876446646122e-05, + "loss": 1.006, "step": 7042 }, { - "epoch": 0.19985811577752555, + "epoch": 0.19958060585451556, "grad_norm": 0.0, - "learning_rate": 1.852488860277782e-05, - "loss": 0.9897, + "learning_rate": 1.852939738795675e-05, + "loss": 0.975, "step": 7043 }, { - "epoch": 0.19988649262202043, + "epoch": 0.19960894329677803, "grad_norm": 0.0, - "learning_rate": 1.852440812294981e-05, - "loss": 1.0151, + "learning_rate": 1.85289182574211e-05, + "loss": 0.9638, "step": 7044 }, { - "epoch": 0.19991486946651532, + "epoch": 0.1996372807390405, "grad_norm": 0.0, - "learning_rate": 1.852392757111574e-05, - "loss": 1.0304, + "learning_rate": 1.8528439055043207e-05, + "loss": 1.097, "step": 7045 }, { - "epoch": 0.1999432463110102, + "epoch": 0.19966561818130296, "grad_norm": 0.0, - "learning_rate": 1.8523446947279668e-05, - "loss": 1.0368, + "learning_rate": 1.85279597808271e-05, + "loss": 1.0722, "step": 7046 }, { - "epoch": 0.1999716231555051, + "epoch": 0.19969395562356543, "grad_norm": 0.0, - "learning_rate": 1.852296625144566e-05, - "loss": 1.0816, + "learning_rate": 1.8527480434776825e-05, + "loss": 1.0922, "step": 7047 }, { - "epoch": 0.2, + "epoch": 0.19972229306582787, "grad_norm": 0.0, - "learning_rate": 1.852248548361777e-05, - "loss": 0.9651, + "learning_rate": 1.8527001016896413e-05, + "loss": 1.0672, "step": 7048 }, { - "epoch": 0.2000283768444949, + "epoch": 0.19975063050809033, "grad_norm": 0.0, - "learning_rate": 1.8522004643800064e-05, - "loss": 1.0133, + "learning_rate": 1.8526521527189905e-05, + "loss": 0.9174, "step": 7049 }, { - "epoch": 0.20005675368898979, + "epoch": 0.1997789679503528, "grad_norm": 0.0, - "learning_rate": 1.85215237319966e-05, - "loss": 1.0083, + "learning_rate": 1.8526041965661342e-05, + "loss": 1.0066, "step": 7050 }, { - "epoch": 0.20008513053348467, + "epoch": 0.19980730539261526, "grad_norm": 0.0, - "learning_rate": 1.8521042748211446e-05, - "loss": 0.9897, + "learning_rate": 1.852556233231476e-05, + "loss": 1.0031, "step": 7051 }, { - "epoch": 0.20011350737797956, + "epoch": 0.19983564283487773, "grad_norm": 0.0, - "learning_rate": 1.8520561692448655e-05, - "loss": 1.0681, + "learning_rate": 1.85250826271542e-05, + "loss": 0.9253, "step": 7052 }, { - "epoch": 0.20014188422247445, + "epoch": 0.1998639802771402, "grad_norm": 0.0, - "learning_rate": 1.8520080564712303e-05, - "loss": 0.9805, + "learning_rate": 1.85246028501837e-05, + "loss": 0.9588, "step": 7053 }, { - "epoch": 0.20017026106696936, + "epoch": 0.19989231771940263, "grad_norm": 0.0, - "learning_rate": 1.851959936500644e-05, - "loss": 0.97, + "learning_rate": 1.8524123001407312e-05, + "loss": 1.0711, "step": 7054 }, { - "epoch": 0.20019863791146425, + "epoch": 0.1999206551616651, "grad_norm": 0.0, - "learning_rate": 1.8519118093335146e-05, - "loss": 1.0631, + "learning_rate": 1.8523643080829065e-05, + "loss": 1.0231, "step": 7055 }, { - "epoch": 0.20022701475595914, + "epoch": 0.19994899260392757, "grad_norm": 0.0, - "learning_rate": 1.8518636749702473e-05, - "loss": 0.973, + "learning_rate": 1.8523163088453013e-05, + "loss": 1.0964, "step": 7056 }, { - "epoch": 0.20025539160045402, + "epoch": 0.19997733004619003, "grad_norm": 0.0, - "learning_rate": 1.8518155334112494e-05, - "loss": 1.1588, + "learning_rate": 1.852268302428319e-05, + "loss": 1.0675, "step": 7057 }, { - "epoch": 0.2002837684449489, + "epoch": 0.2000056674884525, "grad_norm": 0.0, - "learning_rate": 1.851767384656927e-05, - "loss": 0.9799, + "learning_rate": 1.852220288832364e-05, + "loss": 0.9937, "step": 7058 }, { - "epoch": 0.20031214528944383, + "epoch": 0.20003400493071496, "grad_norm": 0.0, - "learning_rate": 1.851719228707688e-05, - "loss": 1.0783, + "learning_rate": 1.8521722680578413e-05, + "loss": 1.0751, "step": 7059 }, { - "epoch": 0.20034052213393871, + "epoch": 0.2000623423729774, "grad_norm": 0.0, - "learning_rate": 1.8516710655639377e-05, - "loss": 1.0345, + "learning_rate": 1.8521242401051554e-05, + "loss": 1.0281, "step": 7060 }, { - "epoch": 0.2003688989784336, + "epoch": 0.20009067981523987, "grad_norm": 0.0, - "learning_rate": 1.851622895226084e-05, - "loss": 1.025, + "learning_rate": 1.8520762049747102e-05, + "loss": 0.9973, "step": 7061 }, { - "epoch": 0.2003972758229285, + "epoch": 0.20011901725750234, "grad_norm": 0.0, - "learning_rate": 1.8515747176945333e-05, - "loss": 1.0486, + "learning_rate": 1.852028162666911e-05, + "loss": 0.9827, "step": 7062 }, { - "epoch": 0.20042565266742338, + "epoch": 0.2001473546997648, "grad_norm": 0.0, - "learning_rate": 1.851526532969693e-05, - "loss": 0.983, + "learning_rate": 1.851980113182162e-05, + "loss": 0.9271, "step": 7063 }, { - "epoch": 0.20045402951191826, + "epoch": 0.20017569214202727, "grad_norm": 0.0, - "learning_rate": 1.8514783410519693e-05, - "loss": 0.9422, + "learning_rate": 1.8519320565208682e-05, + "loss": 1.0162, "step": 7064 }, { - "epoch": 0.20048240635641318, + "epoch": 0.20020402958428973, "grad_norm": 0.0, - "learning_rate": 1.8514301419417697e-05, - "loss": 1.0078, + "learning_rate": 1.8518839926834343e-05, + "loss": 1.0068, "step": 7065 }, { - "epoch": 0.20051078320090807, + "epoch": 0.20023236702655217, "grad_norm": 0.0, - "learning_rate": 1.851381935639502e-05, - "loss": 0.9572, + "learning_rate": 1.8518359216702653e-05, + "loss": 1.0618, "step": 7066 }, { - "epoch": 0.20053916004540295, + "epoch": 0.20026070446881464, "grad_norm": 0.0, - "learning_rate": 1.8513337221455723e-05, - "loss": 0.9309, + "learning_rate": 1.851787843481766e-05, + "loss": 0.953, "step": 7067 }, { - "epoch": 0.20056753688989784, + "epoch": 0.2002890419110771, "grad_norm": 0.0, - "learning_rate": 1.851285501460389e-05, - "loss": 0.9598, + "learning_rate": 1.8517397581183412e-05, + "loss": 0.9147, "step": 7068 }, { - "epoch": 0.20059591373439273, + "epoch": 0.20031737935333957, "grad_norm": 0.0, - "learning_rate": 1.851237273584358e-05, - "loss": 0.8198, + "learning_rate": 1.8516916655803963e-05, + "loss": 0.9704, "step": 7069 }, { - "epoch": 0.20062429057888762, + "epoch": 0.20034571679560204, "grad_norm": 0.0, - "learning_rate": 1.851189038517888e-05, - "loss": 1.1971, + "learning_rate": 1.851643565868336e-05, + "loss": 1.0437, "step": 7070 }, { - "epoch": 0.20065266742338253, + "epoch": 0.2003740542378645, "grad_norm": 0.0, - "learning_rate": 1.8511407962613855e-05, - "loss": 0.9045, + "learning_rate": 1.851595458982566e-05, + "loss": 0.959, "step": 7071 }, { - "epoch": 0.20068104426787742, + "epoch": 0.20040239168012694, "grad_norm": 0.0, - "learning_rate": 1.851092546815259e-05, - "loss": 0.9296, + "learning_rate": 1.851547344923491e-05, + "loss": 0.9939, "step": 7072 }, { - "epoch": 0.2007094211123723, + "epoch": 0.2004307291223894, "grad_norm": 0.0, - "learning_rate": 1.8510442901799153e-05, - "loss": 0.8705, + "learning_rate": 1.8514992236915166e-05, + "loss": 1.0465, "step": 7073 }, { - "epoch": 0.2007377979568672, + "epoch": 0.20045906656465187, "grad_norm": 0.0, - "learning_rate": 1.850996026355762e-05, - "loss": 0.998, + "learning_rate": 1.851451095287048e-05, + "loss": 0.9392, "step": 7074 }, { - "epoch": 0.20076617480136208, + "epoch": 0.20048740400691434, "grad_norm": 0.0, - "learning_rate": 1.8509477553432073e-05, - "loss": 1.0594, + "learning_rate": 1.8514029597104907e-05, + "loss": 1.1049, "step": 7075 }, { - "epoch": 0.200794551645857, + "epoch": 0.2005157414491768, "grad_norm": 0.0, - "learning_rate": 1.8508994771426585e-05, - "loss": 0.9392, + "learning_rate": 1.85135481696225e-05, + "loss": 0.9467, "step": 7076 }, { - "epoch": 0.20082292849035188, + "epoch": 0.20054407889143927, "grad_norm": 0.0, - "learning_rate": 1.8508511917545236e-05, - "loss": 1.0864, + "learning_rate": 1.851306667042732e-05, + "loss": 0.9263, "step": 7077 }, { - "epoch": 0.20085130533484677, + "epoch": 0.2005724163337017, "grad_norm": 0.0, - "learning_rate": 1.85080289917921e-05, - "loss": 0.9911, + "learning_rate": 1.8512585099523412e-05, + "loss": 0.9655, "step": 7078 }, { - "epoch": 0.20087968217934166, + "epoch": 0.20060075377596417, "grad_norm": 0.0, - "learning_rate": 1.850754599417127e-05, - "loss": 1.0247, + "learning_rate": 1.851210345691484e-05, + "loss": 1.0525, "step": 7079 }, { - "epoch": 0.20090805902383654, + "epoch": 0.20062909121822664, "grad_norm": 0.0, - "learning_rate": 1.850706292468681e-05, - "loss": 0.89, + "learning_rate": 1.8511621742605662e-05, + "loss": 1.0176, "step": 7080 }, { - "epoch": 0.20093643586833143, + "epoch": 0.2006574286604891, "grad_norm": 0.0, - "learning_rate": 1.8506579783342808e-05, - "loss": 1.0307, + "learning_rate": 1.851113995659993e-05, + "loss": 0.9273, "step": 7081 }, { - "epoch": 0.20096481271282635, + "epoch": 0.20068576610275157, "grad_norm": 0.0, - "learning_rate": 1.8506096570143343e-05, - "loss": 1.0515, + "learning_rate": 1.851065809890171e-05, + "loss": 0.998, "step": 7082 }, { - "epoch": 0.20099318955732123, + "epoch": 0.20071410354501404, "grad_norm": 0.0, - "learning_rate": 1.85056132850925e-05, - "loss": 0.8666, + "learning_rate": 1.8510176169515056e-05, + "loss": 0.9803, "step": 7083 }, { - "epoch": 0.20102156640181612, + "epoch": 0.20074244098727648, "grad_norm": 0.0, - "learning_rate": 1.850512992819436e-05, - "loss": 0.9247, + "learning_rate": 1.8509694168444025e-05, + "loss": 1.0673, "step": 7084 }, { - "epoch": 0.201049943246311, + "epoch": 0.20077077842953894, "grad_norm": 0.0, - "learning_rate": 1.8504646499453003e-05, - "loss": 1.0104, + "learning_rate": 1.850921209569268e-05, + "loss": 1.0309, "step": 7085 }, { - "epoch": 0.2010783200908059, + "epoch": 0.2007991158718014, "grad_norm": 0.0, - "learning_rate": 1.8504162998872518e-05, - "loss": 1.0024, + "learning_rate": 1.8508729951265082e-05, + "loss": 0.8971, "step": 7086 }, { - "epoch": 0.20110669693530078, + "epoch": 0.20082745331406388, "grad_norm": 0.0, - "learning_rate": 1.850367942645698e-05, - "loss": 0.9445, + "learning_rate": 1.8508247735165294e-05, + "loss": 1.0184, "step": 7087 }, { - "epoch": 0.2011350737797957, + "epoch": 0.20085579075632634, "grad_norm": 0.0, - "learning_rate": 1.8503195782210484e-05, - "loss": 0.9563, + "learning_rate": 1.8507765447397375e-05, + "loss": 0.9122, "step": 7088 }, { - "epoch": 0.20116345062429059, + "epoch": 0.2008841281985888, "grad_norm": 0.0, - "learning_rate": 1.850271206613711e-05, - "loss": 1.0835, + "learning_rate": 1.850728308796539e-05, + "loss": 0.9452, "step": 7089 }, { - "epoch": 0.20119182746878547, + "epoch": 0.20091246564085125, "grad_norm": 0.0, - "learning_rate": 1.8502228278240945e-05, - "loss": 1.0559, + "learning_rate": 1.8506800656873397e-05, + "loss": 0.9868, "step": 7090 }, { - "epoch": 0.20122020431328036, + "epoch": 0.2009408030831137, "grad_norm": 0.0, - "learning_rate": 1.8501744418526074e-05, - "loss": 0.9026, + "learning_rate": 1.8506318154125463e-05, + "loss": 0.9431, "step": 7091 }, { - "epoch": 0.20124858115777525, + "epoch": 0.20096914052537618, "grad_norm": 0.0, - "learning_rate": 1.850126048699659e-05, - "loss": 0.9872, + "learning_rate": 1.8505835579725653e-05, + "loss": 0.9469, "step": 7092 }, { - "epoch": 0.20127695800227013, + "epoch": 0.20099747796763864, "grad_norm": 0.0, - "learning_rate": 1.8500776483656574e-05, - "loss": 1.0699, + "learning_rate": 1.8505352933678037e-05, + "loss": 1.0043, "step": 7093 }, { - "epoch": 0.20130533484676505, + "epoch": 0.2010258154099011, "grad_norm": 0.0, - "learning_rate": 1.8500292408510114e-05, - "loss": 0.9359, + "learning_rate": 1.8504870215986667e-05, + "loss": 0.8636, "step": 7094 }, { - "epoch": 0.20133371169125994, + "epoch": 0.20105415285216358, "grad_norm": 0.0, - "learning_rate": 1.8499808261561308e-05, - "loss": 0.9484, + "learning_rate": 1.850438742665562e-05, + "loss": 0.9615, "step": 7095 }, { - "epoch": 0.20136208853575482, + "epoch": 0.20108249029442601, "grad_norm": 0.0, - "learning_rate": 1.8499324042814236e-05, - "loss": 1.0403, + "learning_rate": 1.850390456568896e-05, + "loss": 0.9097, "step": 7096 }, { - "epoch": 0.2013904653802497, + "epoch": 0.20111082773668848, "grad_norm": 0.0, - "learning_rate": 1.849883975227299e-05, - "loss": 0.9987, + "learning_rate": 1.8503421633090755e-05, + "loss": 1.0417, "step": 7097 }, { - "epoch": 0.2014188422247446, + "epoch": 0.20113916517895095, "grad_norm": 0.0, - "learning_rate": 1.8498355389941666e-05, - "loss": 0.915, + "learning_rate": 1.850293862886507e-05, + "loss": 1.0078, "step": 7098 }, { - "epoch": 0.2014472190692395, + "epoch": 0.2011675026212134, "grad_norm": 0.0, - "learning_rate": 1.8497870955824347e-05, - "loss": 1.1957, + "learning_rate": 1.8502455553015978e-05, + "loss": 1.116, "step": 7099 }, { - "epoch": 0.2014755959137344, + "epoch": 0.20119584006347588, "grad_norm": 0.0, - "learning_rate": 1.8497386449925137e-05, - "loss": 0.9969, + "learning_rate": 1.8501972405547547e-05, + "loss": 1.0158, "step": 7100 }, { - "epoch": 0.2015039727582293, + "epoch": 0.20122417750573834, "grad_norm": 0.0, - "learning_rate": 1.8496901872248117e-05, - "loss": 0.962, + "learning_rate": 1.850148918646384e-05, + "loss": 1.0357, "step": 7101 }, { - "epoch": 0.20153234960272418, + "epoch": 0.20125251494800078, "grad_norm": 0.0, - "learning_rate": 1.8496417222797385e-05, - "loss": 1.0173, + "learning_rate": 1.8501005895768934e-05, + "loss": 0.9885, "step": 7102 }, { - "epoch": 0.20156072644721906, + "epoch": 0.20128085239026325, "grad_norm": 0.0, - "learning_rate": 1.8495932501577036e-05, - "loss": 1.022, + "learning_rate": 1.8500522533466897e-05, + "loss": 1.0759, "step": 7103 }, { - "epoch": 0.20158910329171395, + "epoch": 0.20130918983252571, "grad_norm": 0.0, - "learning_rate": 1.8495447708591163e-05, - "loss": 1.0125, + "learning_rate": 1.8500039099561807e-05, + "loss": 0.975, "step": 7104 }, { - "epoch": 0.20161748013620887, + "epoch": 0.20133752727478818, "grad_norm": 0.0, - "learning_rate": 1.8494962843843862e-05, - "loss": 1.0168, + "learning_rate": 1.849955559405773e-05, + "loss": 0.9822, "step": 7105 }, { - "epoch": 0.20164585698070375, + "epoch": 0.20136586471705065, "grad_norm": 0.0, - "learning_rate": 1.8494477907339224e-05, - "loss": 0.92, + "learning_rate": 1.849907201695874e-05, + "loss": 0.9823, "step": 7106 }, { - "epoch": 0.20167423382519864, + "epoch": 0.2013942021593131, "grad_norm": 0.0, - "learning_rate": 1.8493992899081354e-05, - "loss": 0.965, + "learning_rate": 1.8498588368268905e-05, + "loss": 0.9697, "step": 7107 }, { - "epoch": 0.20170261066969353, + "epoch": 0.20142253960157555, "grad_norm": 0.0, - "learning_rate": 1.8493507819074342e-05, - "loss": 0.8959, + "learning_rate": 1.849810464799231e-05, + "loss": 0.8958, "step": 7108 }, { - "epoch": 0.20173098751418841, + "epoch": 0.20145087704383802, "grad_norm": 0.0, - "learning_rate": 1.849302266732229e-05, - "loss": 1.0577, + "learning_rate": 1.849762085613302e-05, + "loss": 0.979, "step": 7109 }, { - "epoch": 0.2017593643586833, + "epoch": 0.20147921448610048, "grad_norm": 0.0, - "learning_rate": 1.8492537443829293e-05, - "loss": 1.0115, + "learning_rate": 1.8497136992695117e-05, + "loss": 1.1234, "step": 7110 }, { - "epoch": 0.20178774120317822, + "epoch": 0.20150755192836295, "grad_norm": 0.0, - "learning_rate": 1.849205214859945e-05, - "loss": 1.1033, + "learning_rate": 1.8496653057682673e-05, + "loss": 0.824, "step": 7111 }, { - "epoch": 0.2018161180476731, + "epoch": 0.20153588937062542, "grad_norm": 0.0, - "learning_rate": 1.8491566781636862e-05, - "loss": 0.9323, + "learning_rate": 1.8496169051099766e-05, + "loss": 0.9648, "step": 7112 }, { - "epoch": 0.201844494892168, + "epoch": 0.20156422681288788, "grad_norm": 0.0, - "learning_rate": 1.8491081342945624e-05, - "loss": 0.892, + "learning_rate": 1.8495684972950473e-05, + "loss": 0.8095, "step": 7113 }, { - "epoch": 0.20187287173666288, + "epoch": 0.20159256425515032, "grad_norm": 0.0, - "learning_rate": 1.8490595832529844e-05, - "loss": 0.9025, + "learning_rate": 1.849520082323887e-05, + "loss": 1.0166, "step": 7114 }, { - "epoch": 0.20190124858115777, + "epoch": 0.20162090169741279, "grad_norm": 0.0, - "learning_rate": 1.8490110250393617e-05, - "loss": 0.9708, + "learning_rate": 1.8494716601969034e-05, + "loss": 0.9413, "step": 7115 }, { - "epoch": 0.20192962542565268, + "epoch": 0.20164923913967525, "grad_norm": 0.0, - "learning_rate": 1.8489624596541045e-05, - "loss": 0.9973, + "learning_rate": 1.8494232309145044e-05, + "loss": 1.0276, "step": 7116 }, { - "epoch": 0.20195800227014757, + "epoch": 0.20167757658193772, "grad_norm": 0.0, - "learning_rate": 1.8489138870976234e-05, - "loss": 1.1145, + "learning_rate": 1.8493747944770985e-05, + "loss": 1.1169, "step": 7117 }, { - "epoch": 0.20198637911464246, + "epoch": 0.20170591402420018, "grad_norm": 0.0, - "learning_rate": 1.8488653073703287e-05, - "loss": 1.0608, + "learning_rate": 1.849326350885093e-05, + "loss": 0.9512, "step": 7118 }, { - "epoch": 0.20201475595913734, + "epoch": 0.20173425146646265, "grad_norm": 0.0, - "learning_rate": 1.8488167204726308e-05, - "loss": 0.9257, + "learning_rate": 1.8492779001388964e-05, + "loss": 0.9433, "step": 7119 }, { - "epoch": 0.20204313280363223, + "epoch": 0.2017625889087251, "grad_norm": 0.0, - "learning_rate": 1.8487681264049396e-05, - "loss": 0.9438, + "learning_rate": 1.8492294422389167e-05, + "loss": 1.1046, "step": 7120 }, { - "epoch": 0.20207150964812712, + "epoch": 0.20179092635098755, "grad_norm": 0.0, - "learning_rate": 1.848719525167666e-05, - "loss": 1.0494, + "learning_rate": 1.849180977185562e-05, + "loss": 1.0003, "step": 7121 }, { - "epoch": 0.20209988649262203, + "epoch": 0.20181926379325002, "grad_norm": 0.0, - "learning_rate": 1.8486709167612203e-05, - "loss": 1.0665, + "learning_rate": 1.8491325049792407e-05, + "loss": 0.9601, "step": 7122 }, { - "epoch": 0.20212826333711692, + "epoch": 0.20184760123551249, "grad_norm": 0.0, - "learning_rate": 1.848622301186013e-05, - "loss": 1.1099, + "learning_rate": 1.849084025620361e-05, + "loss": 0.9421, "step": 7123 }, { - "epoch": 0.2021566401816118, + "epoch": 0.20187593867777495, "grad_norm": 0.0, - "learning_rate": 1.8485736784424554e-05, - "loss": 1.0774, + "learning_rate": 1.849035539109331e-05, + "loss": 1.0256, "step": 7124 }, { - "epoch": 0.2021850170261067, + "epoch": 0.20190427612003742, "grad_norm": 0.0, - "learning_rate": 1.8485250485309578e-05, - "loss": 1.0442, + "learning_rate": 1.8489870454465596e-05, + "loss": 1.0707, "step": 7125 }, { - "epoch": 0.20221339387060158, + "epoch": 0.20193261356229986, "grad_norm": 0.0, - "learning_rate": 1.8484764114519306e-05, - "loss": 0.9344, + "learning_rate": 1.8489385446324552e-05, + "loss": 1.0101, "step": 7126 }, { - "epoch": 0.20224177071509647, + "epoch": 0.20196095100456232, "grad_norm": 0.0, - "learning_rate": 1.848427767205785e-05, - "loss": 1.0901, + "learning_rate": 1.848890036667426e-05, + "loss": 1.0515, "step": 7127 }, { - "epoch": 0.20227014755959138, + "epoch": 0.2019892884468248, "grad_norm": 0.0, - "learning_rate": 1.8483791157929323e-05, - "loss": 0.9547, + "learning_rate": 1.8488415215518807e-05, + "loss": 1.088, "step": 7128 }, { - "epoch": 0.20229852440408627, + "epoch": 0.20201762588908725, "grad_norm": 0.0, - "learning_rate": 1.8483304572137827e-05, - "loss": 1.0941, + "learning_rate": 1.8487929992862282e-05, + "loss": 0.9541, "step": 7129 }, { - "epoch": 0.20232690124858116, + "epoch": 0.20204596333134972, "grad_norm": 0.0, - "learning_rate": 1.848281791468748e-05, - "loss": 1.0918, + "learning_rate": 1.848744469870877e-05, + "loss": 0.9803, "step": 7130 }, { - "epoch": 0.20235527809307605, + "epoch": 0.2020743007736122, "grad_norm": 0.0, - "learning_rate": 1.8482331185582383e-05, - "loss": 0.9697, + "learning_rate": 1.8486959333062356e-05, + "loss": 1.045, "step": 7131 }, { - "epoch": 0.20238365493757093, + "epoch": 0.20210263821587462, "grad_norm": 0.0, - "learning_rate": 1.8481844384826658e-05, - "loss": 0.9608, + "learning_rate": 1.848647389592714e-05, + "loss": 0.9567, "step": 7132 }, { - "epoch": 0.20241203178206582, + "epoch": 0.2021309756581371, "grad_norm": 0.0, - "learning_rate": 1.848135751242441e-05, - "loss": 1.0974, + "learning_rate": 1.8485988387307197e-05, + "loss": 0.9494, "step": 7133 }, { - "epoch": 0.20244040862656074, + "epoch": 0.20215931310039956, "grad_norm": 0.0, - "learning_rate": 1.8480870568379757e-05, - "loss": 1.0432, + "learning_rate": 1.8485502807206624e-05, + "loss": 1.0073, "step": 7134 }, { - "epoch": 0.20246878547105562, + "epoch": 0.20218765054266202, "grad_norm": 0.0, - "learning_rate": 1.8480383552696805e-05, - "loss": 1.0049, + "learning_rate": 1.848501715562951e-05, + "loss": 1.0015, "step": 7135 }, { - "epoch": 0.2024971623155505, + "epoch": 0.2022159879849245, "grad_norm": 0.0, - "learning_rate": 1.8479896465379676e-05, - "loss": 0.9692, + "learning_rate": 1.8484531432579947e-05, + "loss": 0.9569, "step": 7136 }, { - "epoch": 0.2025255391600454, + "epoch": 0.20224432542718696, "grad_norm": 0.0, - "learning_rate": 1.8479409306432474e-05, - "loss": 0.9767, + "learning_rate": 1.8484045638062022e-05, + "loss": 1.0783, "step": 7137 }, { - "epoch": 0.20255391600454029, + "epoch": 0.2022726628694494, "grad_norm": 0.0, - "learning_rate": 1.8478922075859326e-05, - "loss": 1.1163, + "learning_rate": 1.8483559772079833e-05, + "loss": 1.0591, "step": 7138 }, { - "epoch": 0.2025822928490352, + "epoch": 0.20230100031171186, "grad_norm": 0.0, - "learning_rate": 1.847843477366434e-05, - "loss": 0.9914, + "learning_rate": 1.848307383463747e-05, + "loss": 1.029, "step": 7139 }, { - "epoch": 0.2026106696935301, + "epoch": 0.20232933775397433, "grad_norm": 0.0, - "learning_rate": 1.8477947399851633e-05, - "loss": 0.8989, + "learning_rate": 1.8482587825739023e-05, + "loss": 1.0962, "step": 7140 }, { - "epoch": 0.20263904653802498, + "epoch": 0.2023576751962368, "grad_norm": 0.0, - "learning_rate": 1.847745995442533e-05, - "loss": 1.041, + "learning_rate": 1.848210174538859e-05, + "loss": 0.8982, "step": 7141 }, { - "epoch": 0.20266742338251986, + "epoch": 0.20238601263849926, "grad_norm": 0.0, - "learning_rate": 1.8476972437389535e-05, - "loss": 1.0576, + "learning_rate": 1.8481615593590265e-05, + "loss": 0.9791, "step": 7142 }, { - "epoch": 0.20269580022701475, + "epoch": 0.20241435008076172, "grad_norm": 0.0, - "learning_rate": 1.8476484848748373e-05, - "loss": 1.004, + "learning_rate": 1.8481129370348142e-05, + "loss": 1.0317, "step": 7143 }, { - "epoch": 0.20272417707150964, + "epoch": 0.20244268752302416, "grad_norm": 0.0, - "learning_rate": 1.8475997188505962e-05, - "loss": 0.9675, + "learning_rate": 1.848064307566632e-05, + "loss": 0.897, "step": 7144 }, { - "epoch": 0.20275255391600455, + "epoch": 0.20247102496528663, "grad_norm": 0.0, - "learning_rate": 1.8475509456666423e-05, - "loss": 1.0638, + "learning_rate": 1.8480156709548888e-05, + "loss": 0.9442, "step": 7145 }, { - "epoch": 0.20278093076049944, + "epoch": 0.2024993624075491, "grad_norm": 0.0, - "learning_rate": 1.847502165323388e-05, - "loss": 0.9679, + "learning_rate": 1.8479670271999945e-05, + "loss": 0.9917, "step": 7146 }, { - "epoch": 0.20280930760499433, + "epoch": 0.20252769984981156, "grad_norm": 0.0, - "learning_rate": 1.847453377821244e-05, - "loss": 0.9399, + "learning_rate": 1.8479183763023597e-05, + "loss": 0.9615, "step": 7147 }, { - "epoch": 0.20283768444948921, + "epoch": 0.20255603729207403, "grad_norm": 0.0, - "learning_rate": 1.8474045831606234e-05, - "loss": 1.0891, + "learning_rate": 1.847869718262393e-05, + "loss": 1.0397, "step": 7148 }, { - "epoch": 0.2028660612939841, + "epoch": 0.2025843747343365, "grad_norm": 0.0, - "learning_rate": 1.8473557813419388e-05, - "loss": 1.0695, + "learning_rate": 1.847821053080505e-05, + "loss": 0.9554, "step": 7149 }, { - "epoch": 0.202894438138479, + "epoch": 0.20261271217659893, "grad_norm": 0.0, - "learning_rate": 1.847306972365601e-05, - "loss": 0.9977, + "learning_rate": 1.8477723807571055e-05, + "loss": 0.8673, "step": 7150 }, { - "epoch": 0.2029228149829739, + "epoch": 0.2026410496188614, "grad_norm": 0.0, - "learning_rate": 1.8472581562320235e-05, - "loss": 1.0023, + "learning_rate": 1.8477237012926042e-05, + "loss": 1.1939, "step": 7151 }, { - "epoch": 0.2029511918274688, + "epoch": 0.20266938706112386, "grad_norm": 0.0, - "learning_rate": 1.847209332941618e-05, - "loss": 0.9994, + "learning_rate": 1.847675014687412e-05, + "loss": 1.0607, "step": 7152 }, { - "epoch": 0.20297956867196368, + "epoch": 0.20269772450338633, "grad_norm": 0.0, - "learning_rate": 1.8471605024947978e-05, - "loss": 0.9914, + "learning_rate": 1.8476263209419376e-05, + "loss": 0.9694, "step": 7153 }, { - "epoch": 0.20300794551645857, + "epoch": 0.2027260619456488, "grad_norm": 0.0, - "learning_rate": 1.8471116648919744e-05, - "loss": 1.0322, + "learning_rate": 1.8475776200565924e-05, + "loss": 0.9679, "step": 7154 }, { - "epoch": 0.20303632236095345, + "epoch": 0.20275439938791126, "grad_norm": 0.0, - "learning_rate": 1.8470628201335604e-05, - "loss": 0.901, + "learning_rate": 1.847528912031786e-05, + "loss": 1.0791, "step": 7155 }, { - "epoch": 0.20306469920544837, + "epoch": 0.2027827368301737, "grad_norm": 0.0, - "learning_rate": 1.8470139682199693e-05, - "loss": 1.0494, + "learning_rate": 1.847480196867929e-05, + "loss": 0.9817, "step": 7156 }, { - "epoch": 0.20309307604994326, + "epoch": 0.20281107427243616, "grad_norm": 0.0, - "learning_rate": 1.8469651091516126e-05, - "loss": 1.0934, + "learning_rate": 1.8474314745654313e-05, + "loss": 0.9444, "step": 7157 }, { - "epoch": 0.20312145289443814, + "epoch": 0.20283941171469863, "grad_norm": 0.0, - "learning_rate": 1.846916242928904e-05, - "loss": 0.9818, + "learning_rate": 1.8473827451247038e-05, + "loss": 0.9917, "step": 7158 }, { - "epoch": 0.20314982973893303, + "epoch": 0.2028677491569611, "grad_norm": 0.0, - "learning_rate": 1.8468673695522552e-05, - "loss": 1.1008, + "learning_rate": 1.847334008546157e-05, + "loss": 0.8993, "step": 7159 }, { - "epoch": 0.20317820658342792, + "epoch": 0.20289608659922356, "grad_norm": 0.0, - "learning_rate": 1.8468184890220803e-05, - "loss": 0.992, + "learning_rate": 1.847285264830201e-05, + "loss": 1.0797, "step": 7160 }, { - "epoch": 0.2032065834279228, + "epoch": 0.20292442404148603, "grad_norm": 0.0, - "learning_rate": 1.846769601338791e-05, - "loss": 1.0183, + "learning_rate": 1.8472365139772465e-05, + "loss": 0.9964, "step": 7161 }, { - "epoch": 0.20323496027241772, + "epoch": 0.20295276148374847, "grad_norm": 0.0, - "learning_rate": 1.846720706502801e-05, - "loss": 1.0741, + "learning_rate": 1.847187755987704e-05, + "loss": 1.0619, "step": 7162 }, { - "epoch": 0.2032633371169126, + "epoch": 0.20298109892601093, "grad_norm": 0.0, - "learning_rate": 1.8466718045145233e-05, - "loss": 1.0897, + "learning_rate": 1.847138990861985e-05, + "loss": 1.0882, "step": 7163 }, { - "epoch": 0.2032917139614075, + "epoch": 0.2030094363682734, "grad_norm": 0.0, - "learning_rate": 1.846622895374371e-05, - "loss": 1.0526, + "learning_rate": 1.8470902186004995e-05, + "loss": 0.8842, "step": 7164 }, { - "epoch": 0.20332009080590238, + "epoch": 0.20303777381053587, "grad_norm": 0.0, - "learning_rate": 1.8465739790827566e-05, - "loss": 0.9948, + "learning_rate": 1.8470414392036582e-05, + "loss": 0.9557, "step": 7165 }, { - "epoch": 0.20334846765039727, + "epoch": 0.20306611125279833, "grad_norm": 0.0, - "learning_rate": 1.8465250556400938e-05, - "loss": 1.0976, + "learning_rate": 1.8469926526718726e-05, + "loss": 1.0698, "step": 7166 }, { - "epoch": 0.20337684449489216, + "epoch": 0.2030944486950608, "grad_norm": 0.0, - "learning_rate": 1.846476125046796e-05, - "loss": 0.8618, + "learning_rate": 1.846943859005553e-05, + "loss": 1.1021, "step": 7167 }, { - "epoch": 0.20340522133938707, + "epoch": 0.20312278613732324, "grad_norm": 0.0, - "learning_rate": 1.8464271873032762e-05, - "loss": 1.086, + "learning_rate": 1.8468950582051116e-05, + "loss": 0.9461, "step": 7168 }, { - "epoch": 0.20343359818388196, + "epoch": 0.2031511235795857, "grad_norm": 0.0, - "learning_rate": 1.8463782424099478e-05, - "loss": 1.0213, + "learning_rate": 1.846846250270958e-05, + "loss": 1.0026, "step": 7169 }, { - "epoch": 0.20346197502837685, + "epoch": 0.20317946102184817, "grad_norm": 0.0, - "learning_rate": 1.8463292903672246e-05, - "loss": 1.054, + "learning_rate": 1.846797435203504e-05, + "loss": 0.9417, "step": 7170 }, { - "epoch": 0.20349035187287173, + "epoch": 0.20320779846411063, "grad_norm": 0.0, - "learning_rate": 1.8462803311755196e-05, - "loss": 0.9692, + "learning_rate": 1.846748613003161e-05, + "loss": 0.9696, "step": 7171 }, { - "epoch": 0.20351872871736662, + "epoch": 0.2032361359063731, "grad_norm": 0.0, - "learning_rate": 1.846231364835247e-05, - "loss": 1.0312, + "learning_rate": 1.8466997836703397e-05, + "loss": 0.976, "step": 7172 }, { - "epoch": 0.2035471055618615, + "epoch": 0.20326447334863557, "grad_norm": 0.0, - "learning_rate": 1.8461823913468194e-05, - "loss": 0.9082, + "learning_rate": 1.846650947205452e-05, + "loss": 0.9908, "step": 7173 }, { - "epoch": 0.20357548240635642, + "epoch": 0.203292810790898, "grad_norm": 0.0, - "learning_rate": 1.8461334107106515e-05, - "loss": 0.9216, + "learning_rate": 1.8466021036089085e-05, + "loss": 0.9703, "step": 7174 }, { - "epoch": 0.2036038592508513, + "epoch": 0.20332114823316047, "grad_norm": 0.0, - "learning_rate": 1.8460844229271566e-05, - "loss": 1.0453, + "learning_rate": 1.8465532528811216e-05, + "loss": 0.9445, "step": 7175 }, { - "epoch": 0.2036322360953462, + "epoch": 0.20334948567542294, "grad_norm": 0.0, - "learning_rate": 1.8460354279967484e-05, - "loss": 0.9645, + "learning_rate": 1.846504395022502e-05, + "loss": 1.0736, "step": 7176 }, { - "epoch": 0.20366061293984108, + "epoch": 0.2033778231176854, "grad_norm": 0.0, - "learning_rate": 1.845986425919841e-05, - "loss": 0.9614, + "learning_rate": 1.846455530033462e-05, + "loss": 0.9591, "step": 7177 }, { - "epoch": 0.20368898978433597, + "epoch": 0.20340616055994787, "grad_norm": 0.0, - "learning_rate": 1.8459374166968483e-05, - "loss": 1.0727, + "learning_rate": 1.8464066579144123e-05, + "loss": 1.0245, "step": 7178 }, { - "epoch": 0.2037173666288309, + "epoch": 0.2034344980022103, "grad_norm": 0.0, - "learning_rate": 1.8458884003281846e-05, - "loss": 0.9723, + "learning_rate": 1.8463577786657653e-05, + "loss": 0.9157, "step": 7179 }, { - "epoch": 0.20374574347332577, + "epoch": 0.20346283544447277, "grad_norm": 0.0, - "learning_rate": 1.845839376814263e-05, - "loss": 1.0656, + "learning_rate": 1.846308892287932e-05, + "loss": 0.9226, "step": 7180 }, { - "epoch": 0.20377412031782066, + "epoch": 0.20349117288673524, "grad_norm": 0.0, - "learning_rate": 1.845790346155498e-05, - "loss": 0.9467, + "learning_rate": 1.846259998781325e-05, + "loss": 0.9868, "step": 7181 }, { - "epoch": 0.20380249716231555, + "epoch": 0.2035195103289977, "grad_norm": 0.0, - "learning_rate": 1.8457413083523045e-05, - "loss": 1.0159, + "learning_rate": 1.8462110981463555e-05, + "loss": 0.9848, "step": 7182 }, { - "epoch": 0.20383087400681044, + "epoch": 0.20354784777126017, "grad_norm": 0.0, - "learning_rate": 1.8456922634050957e-05, - "loss": 0.8266, + "learning_rate": 1.846162190383436e-05, + "loss": 1.0436, "step": 7183 }, { - "epoch": 0.20385925085130532, + "epoch": 0.20357618521352264, "grad_norm": 0.0, - "learning_rate": 1.8456432113142866e-05, - "loss": 0.951, + "learning_rate": 1.846113275492978e-05, + "loss": 0.9812, "step": 7184 }, { - "epoch": 0.20388762769580024, + "epoch": 0.20360452265578508, "grad_norm": 0.0, - "learning_rate": 1.845594152080291e-05, - "loss": 0.9202, + "learning_rate": 1.8460643534753938e-05, + "loss": 0.9841, "step": 7185 }, { - "epoch": 0.20391600454029513, + "epoch": 0.20363286009804754, "grad_norm": 0.0, - "learning_rate": 1.845545085703524e-05, - "loss": 0.9465, + "learning_rate": 1.8460154243310953e-05, + "loss": 0.9897, "step": 7186 }, { - "epoch": 0.20394438138479, + "epoch": 0.20366119754031, "grad_norm": 0.0, - "learning_rate": 1.845496012184399e-05, - "loss": 1.0374, + "learning_rate": 1.8459664880604946e-05, + "loss": 1.0611, "step": 7187 }, { - "epoch": 0.2039727582292849, + "epoch": 0.20368953498257247, "grad_norm": 0.0, - "learning_rate": 1.845446931523332e-05, - "loss": 1.0287, + "learning_rate": 1.8459175446640042e-05, + "loss": 1.0201, "step": 7188 }, { - "epoch": 0.2040011350737798, + "epoch": 0.20371787242483494, "grad_norm": 0.0, - "learning_rate": 1.845397843720736e-05, - "loss": 1.0651, + "learning_rate": 1.845868594142036e-05, + "loss": 0.867, "step": 7189 }, { - "epoch": 0.20402951191827468, + "epoch": 0.2037462098670974, "grad_norm": 0.0, - "learning_rate": 1.845348748777027e-05, - "loss": 1.0349, + "learning_rate": 1.845819636495003e-05, + "loss": 0.9593, "step": 7190 }, { - "epoch": 0.2040578887627696, + "epoch": 0.20377454730935984, "grad_norm": 0.0, - "learning_rate": 1.845299646692619e-05, - "loss": 1.1381, + "learning_rate": 1.8457706717233165e-05, + "loss": 1.0895, "step": 7191 }, { - "epoch": 0.20408626560726448, + "epoch": 0.2038028847516223, "grad_norm": 0.0, - "learning_rate": 1.8452505374679265e-05, - "loss": 1.024, + "learning_rate": 1.8457216998273896e-05, + "loss": 0.9368, "step": 7192 }, { - "epoch": 0.20411464245175936, + "epoch": 0.20383122219388478, "grad_norm": 0.0, - "learning_rate": 1.845201421103365e-05, - "loss": 0.9756, + "learning_rate": 1.845672720807635e-05, + "loss": 0.9547, "step": 7193 }, { - "epoch": 0.20414301929625425, + "epoch": 0.20385955963614724, "grad_norm": 0.0, - "learning_rate": 1.845152297599349e-05, - "loss": 0.8821, + "learning_rate": 1.845623734664465e-05, + "loss": 1.0063, "step": 7194 }, { - "epoch": 0.20417139614074914, + "epoch": 0.2038878970784097, "grad_norm": 0.0, - "learning_rate": 1.8451031669562938e-05, - "loss": 0.9946, + "learning_rate": 1.8455747413982927e-05, + "loss": 0.9928, "step": 7195 }, { - "epoch": 0.20419977298524405, + "epoch": 0.20391623452067217, "grad_norm": 0.0, - "learning_rate": 1.8450540291746138e-05, - "loss": 0.9404, + "learning_rate": 1.8455257410095296e-05, + "loss": 0.9662, "step": 7196 }, { - "epoch": 0.20422814982973894, + "epoch": 0.2039445719629346, "grad_norm": 0.0, - "learning_rate": 1.8450048842547246e-05, - "loss": 0.9444, + "learning_rate": 1.8454767334985896e-05, + "loss": 0.9202, "step": 7197 }, { - "epoch": 0.20425652667423383, + "epoch": 0.20397290940519708, "grad_norm": 0.0, - "learning_rate": 1.8449557321970416e-05, - "loss": 0.9589, + "learning_rate": 1.845427718865885e-05, + "loss": 1.093, "step": 7198 }, { - "epoch": 0.20428490351872872, + "epoch": 0.20400124684745954, "grad_norm": 0.0, - "learning_rate": 1.8449065730019788e-05, - "loss": 1.0627, + "learning_rate": 1.845378697111829e-05, + "loss": 1.0011, "step": 7199 }, { - "epoch": 0.2043132803632236, + "epoch": 0.204029584289722, "grad_norm": 0.0, - "learning_rate": 1.844857406669953e-05, - "loss": 1.0215, + "learning_rate": 1.8453296682368344e-05, + "loss": 1.1125, "step": 7200 }, { - "epoch": 0.2043416572077185, + "epoch": 0.20405792173198448, "grad_norm": 0.0, - "learning_rate": 1.844808233201378e-05, - "loss": 1.044, + "learning_rate": 1.845280632241314e-05, + "loss": 0.9913, "step": 7201 }, { - "epoch": 0.2043700340522134, + "epoch": 0.20408625917424694, "grad_norm": 0.0, - "learning_rate": 1.84475905259667e-05, - "loss": 1.0236, + "learning_rate": 1.8452315891256806e-05, + "loss": 1.1284, "step": 7202 }, { - "epoch": 0.2043984108967083, + "epoch": 0.20411459661650938, "grad_norm": 0.0, - "learning_rate": 1.8447098648562444e-05, - "loss": 1.0124, + "learning_rate": 1.845182538890348e-05, + "loss": 1.0348, "step": 7203 }, { - "epoch": 0.20442678774120318, + "epoch": 0.20414293405877185, "grad_norm": 0.0, - "learning_rate": 1.8446606699805164e-05, - "loss": 1.0471, + "learning_rate": 1.845133481535729e-05, + "loss": 0.9814, "step": 7204 }, { - "epoch": 0.20445516458569807, + "epoch": 0.2041712715010343, "grad_norm": 0.0, - "learning_rate": 1.844611467969902e-05, - "loss": 0.9612, + "learning_rate": 1.8450844170622366e-05, + "loss": 1.0581, "step": 7205 }, { - "epoch": 0.20448354143019296, + "epoch": 0.20419960894329678, "grad_norm": 0.0, - "learning_rate": 1.8445622588248168e-05, - "loss": 1.0225, + "learning_rate": 1.8450353454702845e-05, + "loss": 1.1135, "step": 7206 }, { - "epoch": 0.20451191827468784, + "epoch": 0.20422794638555924, "grad_norm": 0.0, - "learning_rate": 1.8445130425456758e-05, - "loss": 0.9797, + "learning_rate": 1.8449862667602858e-05, + "loss": 1.1407, "step": 7207 }, { - "epoch": 0.20454029511918276, + "epoch": 0.2042562838278217, "grad_norm": 0.0, - "learning_rate": 1.844463819132895e-05, - "loss": 1.024, + "learning_rate": 1.844937180932654e-05, + "loss": 1.1403, "step": 7208 }, { - "epoch": 0.20456867196367765, + "epoch": 0.20428462127008415, "grad_norm": 0.0, - "learning_rate": 1.8444145885868908e-05, - "loss": 1.0389, + "learning_rate": 1.8448880879878026e-05, + "loss": 0.9614, "step": 7209 }, { - "epoch": 0.20459704880817253, + "epoch": 0.20431295871234662, "grad_norm": 0.0, - "learning_rate": 1.8443653509080787e-05, - "loss": 1.052, + "learning_rate": 1.844838987926145e-05, + "loss": 1.0351, "step": 7210 }, { - "epoch": 0.20462542565266742, + "epoch": 0.20434129615460908, "grad_norm": 0.0, - "learning_rate": 1.844316106096874e-05, - "loss": 0.9367, + "learning_rate": 1.844789880748095e-05, + "loss": 1.0644, "step": 7211 }, { - "epoch": 0.2046538024971623, + "epoch": 0.20436963359687155, "grad_norm": 0.0, - "learning_rate": 1.844266854153694e-05, - "loss": 1.0263, + "learning_rate": 1.8447407664540662e-05, + "loss": 1.0245, "step": 7212 }, { - "epoch": 0.2046821793416572, + "epoch": 0.204397971039134, "grad_norm": 0.0, - "learning_rate": 1.8442175950789533e-05, - "loss": 0.9564, + "learning_rate": 1.8446916450444723e-05, + "loss": 0.9756, "step": 7213 }, { - "epoch": 0.2047105561861521, + "epoch": 0.20442630848139648, "grad_norm": 0.0, - "learning_rate": 1.8441683288730686e-05, - "loss": 0.901, + "learning_rate": 1.844642516519727e-05, + "loss": 1.0331, "step": 7214 }, { - "epoch": 0.204738933030647, + "epoch": 0.20445464592365892, "grad_norm": 0.0, - "learning_rate": 1.8441190555364567e-05, - "loss": 1.0353, + "learning_rate": 1.844593380880244e-05, + "loss": 0.9456, "step": 7215 }, { - "epoch": 0.20476730987514188, + "epoch": 0.20448298336592138, "grad_norm": 0.0, - "learning_rate": 1.8440697750695326e-05, - "loss": 1.0619, + "learning_rate": 1.8445442381264372e-05, + "loss": 0.9127, "step": 7216 }, { - "epoch": 0.20479568671963677, + "epoch": 0.20451132080818385, "grad_norm": 0.0, - "learning_rate": 1.844020487472713e-05, - "loss": 1.0202, + "learning_rate": 1.8444950882587213e-05, + "loss": 0.8988, "step": 7217 }, { - "epoch": 0.20482406356413166, + "epoch": 0.20453965825044632, "grad_norm": 0.0, - "learning_rate": 1.843971192746415e-05, - "loss": 0.9587, + "learning_rate": 1.844445931277509e-05, + "loss": 0.9511, "step": 7218 }, { - "epoch": 0.20485244040862657, + "epoch": 0.20456799569270878, "grad_norm": 0.0, - "learning_rate": 1.8439218908910538e-05, - "loss": 1.0399, + "learning_rate": 1.844396767183215e-05, + "loss": 1.0195, "step": 7219 }, { - "epoch": 0.20488081725312146, + "epoch": 0.20459633313497125, "grad_norm": 0.0, - "learning_rate": 1.8438725819070467e-05, - "loss": 0.9738, + "learning_rate": 1.8443475959762538e-05, + "loss": 0.9566, "step": 7220 }, { - "epoch": 0.20490919409761635, + "epoch": 0.20462467057723369, "grad_norm": 0.0, - "learning_rate": 1.8438232657948102e-05, - "loss": 1.0553, + "learning_rate": 1.844298417657039e-05, + "loss": 1.0201, "step": 7221 }, { - "epoch": 0.20493757094211124, + "epoch": 0.20465300801949615, "grad_norm": 0.0, - "learning_rate": 1.8437739425547602e-05, - "loss": 0.9926, + "learning_rate": 1.8442492322259854e-05, + "loss": 1.0927, "step": 7222 }, { - "epoch": 0.20496594778660612, + "epoch": 0.20468134546175862, "grad_norm": 0.0, - "learning_rate": 1.8437246121873143e-05, - "loss": 1.0064, + "learning_rate": 1.8442000396835072e-05, + "loss": 0.9476, "step": 7223 }, { - "epoch": 0.204994324631101, + "epoch": 0.20470968290402108, "grad_norm": 0.0, - "learning_rate": 1.8436752746928884e-05, - "loss": 0.9013, + "learning_rate": 1.844150840030018e-05, + "loss": 0.9874, "step": 7224 }, { - "epoch": 0.20502270147559593, + "epoch": 0.20473802034628355, "grad_norm": 0.0, - "learning_rate": 1.8436259300718996e-05, - "loss": 1.04, + "learning_rate": 1.844101633265933e-05, + "loss": 0.9334, "step": 7225 }, { - "epoch": 0.2050510783200908, + "epoch": 0.20476635778854602, "grad_norm": 0.0, - "learning_rate": 1.8435765783247644e-05, - "loss": 1.0026, + "learning_rate": 1.8440524193916667e-05, + "loss": 1.0776, "step": 7226 }, { - "epoch": 0.2050794551645857, + "epoch": 0.20479469523080845, "grad_norm": 0.0, - "learning_rate": 1.8435272194519002e-05, - "loss": 1.0098, + "learning_rate": 1.8440031984076333e-05, + "loss": 0.993, "step": 7227 }, { - "epoch": 0.2051078320090806, + "epoch": 0.20482303267307092, "grad_norm": 0.0, - "learning_rate": 1.8434778534537233e-05, - "loss": 1.0338, + "learning_rate": 1.8439539703142475e-05, + "loss": 1.0166, "step": 7228 }, { - "epoch": 0.20513620885357547, + "epoch": 0.2048513701153334, "grad_norm": 0.0, - "learning_rate": 1.8434284803306515e-05, - "loss": 1.0285, + "learning_rate": 1.8439047351119242e-05, + "loss": 0.851, "step": 7229 }, { - "epoch": 0.20516458569807036, + "epoch": 0.20487970755759585, "grad_norm": 0.0, - "learning_rate": 1.8433791000831012e-05, - "loss": 0.9766, + "learning_rate": 1.843855492801078e-05, + "loss": 0.9052, "step": 7230 }, { - "epoch": 0.20519296254256528, + "epoch": 0.20490804499985832, "grad_norm": 0.0, - "learning_rate": 1.84332971271149e-05, - "loss": 0.9423, + "learning_rate": 1.8438062433821234e-05, + "loss": 1.0486, "step": 7231 }, { - "epoch": 0.20522133938706016, + "epoch": 0.20493638244212078, "grad_norm": 0.0, - "learning_rate": 1.8432803182162346e-05, - "loss": 1.0652, + "learning_rate": 1.8437569868554753e-05, + "loss": 0.9744, "step": 7232 }, { - "epoch": 0.20524971623155505, + "epoch": 0.20496471988438322, "grad_norm": 0.0, - "learning_rate": 1.8432309165977523e-05, - "loss": 1.0242, + "learning_rate": 1.843707723221549e-05, + "loss": 0.8793, "step": 7233 }, { - "epoch": 0.20527809307604994, + "epoch": 0.2049930573266457, "grad_norm": 0.0, - "learning_rate": 1.8431815078564606e-05, - "loss": 1.0087, + "learning_rate": 1.8436584524807593e-05, + "loss": 0.9881, "step": 7234 }, { - "epoch": 0.20530646992054483, + "epoch": 0.20502139476890816, "grad_norm": 0.0, - "learning_rate": 1.8431320919927768e-05, - "loss": 0.9839, + "learning_rate": 1.843609174633521e-05, + "loss": 1.1596, "step": 7235 }, { - "epoch": 0.20533484676503974, + "epoch": 0.20504973221117062, "grad_norm": 0.0, - "learning_rate": 1.843082669007118e-05, - "loss": 0.9346, + "learning_rate": 1.8435598896802497e-05, + "loss": 0.9481, "step": 7236 }, { - "epoch": 0.20536322360953463, + "epoch": 0.2050780696534331, "grad_norm": 0.0, - "learning_rate": 1.8430332388999027e-05, - "loss": 0.9331, + "learning_rate": 1.84351059762136e-05, + "loss": 1.054, "step": 7237 }, { - "epoch": 0.20539160045402952, + "epoch": 0.20510640709569555, "grad_norm": 0.0, - "learning_rate": 1.8429838016715472e-05, - "loss": 1.0072, + "learning_rate": 1.843461298457267e-05, + "loss": 0.865, "step": 7238 }, { - "epoch": 0.2054199772985244, + "epoch": 0.205134744537958, "grad_norm": 0.0, - "learning_rate": 1.84293435732247e-05, - "loss": 1.089, + "learning_rate": 1.8434119921883865e-05, + "loss": 1.1013, "step": 7239 }, { - "epoch": 0.2054483541430193, + "epoch": 0.20516308198022046, "grad_norm": 0.0, - "learning_rate": 1.842884905853088e-05, - "loss": 1.0562, + "learning_rate": 1.8433626788151337e-05, + "loss": 0.9751, "step": 7240 }, { - "epoch": 0.20547673098751418, + "epoch": 0.20519141942248292, "grad_norm": 0.0, - "learning_rate": 1.84283544726382e-05, - "loss": 0.9523, + "learning_rate": 1.8433133583379236e-05, + "loss": 1.0762, "step": 7241 }, { - "epoch": 0.2055051078320091, + "epoch": 0.2052197568647454, "grad_norm": 0.0, - "learning_rate": 1.8427859815550824e-05, - "loss": 1.0175, + "learning_rate": 1.843264030757172e-05, + "loss": 1.1281, "step": 7242 }, { - "epoch": 0.20553348467650398, + "epoch": 0.20524809430700786, "grad_norm": 0.0, - "learning_rate": 1.842736508727294e-05, - "loss": 1.054, + "learning_rate": 1.843214696073295e-05, + "loss": 1.065, "step": 7243 }, { - "epoch": 0.20556186152099887, + "epoch": 0.20527643174927032, "grad_norm": 0.0, - "learning_rate": 1.8426870287808723e-05, - "loss": 1.0735, + "learning_rate": 1.8431653542867068e-05, + "loss": 0.9805, "step": 7244 }, { - "epoch": 0.20559023836549375, + "epoch": 0.20530476919153276, "grad_norm": 0.0, - "learning_rate": 1.8426375417162353e-05, - "loss": 1.0208, + "learning_rate": 1.843116005397824e-05, + "loss": 1.0274, "step": 7245 }, { - "epoch": 0.20561861520998864, + "epoch": 0.20533310663379523, "grad_norm": 0.0, - "learning_rate": 1.8425880475338015e-05, - "loss": 0.9774, + "learning_rate": 1.8430666494070615e-05, + "loss": 1.0752, "step": 7246 }, { - "epoch": 0.20564699205448353, + "epoch": 0.2053614440760577, "grad_norm": 0.0, - "learning_rate": 1.8425385462339884e-05, - "loss": 0.99, + "learning_rate": 1.843017286314836e-05, + "loss": 1.0083, "step": 7247 }, { - "epoch": 0.20567536889897844, + "epoch": 0.20538978151832016, "grad_norm": 0.0, - "learning_rate": 1.842489037817214e-05, - "loss": 1.0305, + "learning_rate": 1.842967916121563e-05, + "loss": 0.8928, "step": 7248 }, { - "epoch": 0.20570374574347333, + "epoch": 0.20541811896058262, "grad_norm": 0.0, - "learning_rate": 1.8424395222838972e-05, - "loss": 0.9762, + "learning_rate": 1.8429185388276578e-05, + "loss": 0.9922, "step": 7249 }, { - "epoch": 0.20573212258796822, + "epoch": 0.2054464564028451, "grad_norm": 0.0, - "learning_rate": 1.842389999634456e-05, - "loss": 0.8121, + "learning_rate": 1.842869154433537e-05, + "loss": 1.0239, "step": 7250 }, { - "epoch": 0.2057604994324631, + "epoch": 0.20547479384510753, "grad_norm": 0.0, - "learning_rate": 1.8423404698693082e-05, - "loss": 0.9826, + "learning_rate": 1.8428197629396162e-05, + "loss": 1.0751, "step": 7251 }, { - "epoch": 0.205788876276958, + "epoch": 0.20550313128737, "grad_norm": 0.0, - "learning_rate": 1.842290932988873e-05, - "loss": 0.9297, + "learning_rate": 1.8427703643463118e-05, + "loss": 1.0023, "step": 7252 }, { - "epoch": 0.20581725312145288, + "epoch": 0.20553146872963246, "grad_norm": 0.0, - "learning_rate": 1.8422413889935678e-05, - "loss": 0.8785, + "learning_rate": 1.8427209586540392e-05, + "loss": 0.8556, "step": 7253 }, { - "epoch": 0.2058456299659478, + "epoch": 0.20555980617189493, "grad_norm": 0.0, - "learning_rate": 1.8421918378838125e-05, - "loss": 1.0914, + "learning_rate": 1.8426715458632154e-05, + "loss": 1.0618, "step": 7254 }, { - "epoch": 0.20587400681044268, + "epoch": 0.2055881436141574, "grad_norm": 0.0, - "learning_rate": 1.8421422796600243e-05, - "loss": 1.0403, + "learning_rate": 1.842622125974256e-05, + "loss": 1.0212, "step": 7255 }, { - "epoch": 0.20590238365493757, + "epoch": 0.20561648105641986, "grad_norm": 0.0, - "learning_rate": 1.8420927143226226e-05, - "loss": 1.015, + "learning_rate": 1.8425726989875777e-05, + "loss": 0.9821, "step": 7256 }, { - "epoch": 0.20593076049943246, + "epoch": 0.2056448184986823, "grad_norm": 0.0, - "learning_rate": 1.842043141872026e-05, - "loss": 1.0742, + "learning_rate": 1.842523264903597e-05, + "loss": 1.0434, "step": 7257 }, { - "epoch": 0.20595913734392735, + "epoch": 0.20567315594094476, "grad_norm": 0.0, - "learning_rate": 1.8419935623086532e-05, - "loss": 0.9916, + "learning_rate": 1.842473823722729e-05, + "loss": 1.0841, "step": 7258 }, { - "epoch": 0.20598751418842226, + "epoch": 0.20570149338320723, "grad_norm": 0.0, - "learning_rate": 1.8419439756329226e-05, - "loss": 1.043, + "learning_rate": 1.842424375445392e-05, + "loss": 1.0513, "step": 7259 }, { - "epoch": 0.20601589103291715, + "epoch": 0.2057298308254697, "grad_norm": 0.0, - "learning_rate": 1.8418943818452536e-05, - "loss": 0.9937, + "learning_rate": 1.8423749200720017e-05, + "loss": 1.0061, "step": 7260 }, { - "epoch": 0.20604426787741204, + "epoch": 0.20575816826773216, "grad_norm": 0.0, - "learning_rate": 1.8418447809460648e-05, - "loss": 1.0833, + "learning_rate": 1.8423254576029744e-05, + "loss": 1.0773, "step": 7261 }, { - "epoch": 0.20607264472190692, + "epoch": 0.20578650570999463, "grad_norm": 0.0, - "learning_rate": 1.841795172935775e-05, - "loss": 0.925, + "learning_rate": 1.842275988038727e-05, + "loss": 1.041, "step": 7262 }, { - "epoch": 0.2061010215664018, + "epoch": 0.20581484315225707, "grad_norm": 0.0, - "learning_rate": 1.8417455578148042e-05, - "loss": 0.8857, + "learning_rate": 1.842226511379676e-05, + "loss": 1.0204, "step": 7263 }, { - "epoch": 0.2061293984108967, + "epoch": 0.20584318059451953, "grad_norm": 0.0, - "learning_rate": 1.8416959355835703e-05, - "loss": 1.0507, + "learning_rate": 1.8421770276262386e-05, + "loss": 0.904, "step": 7264 }, { - "epoch": 0.2061577752553916, + "epoch": 0.205871518036782, "grad_norm": 0.0, - "learning_rate": 1.8416463062424933e-05, - "loss": 0.9826, + "learning_rate": 1.842127536778831e-05, + "loss": 1.0078, "step": 7265 }, { - "epoch": 0.2061861520998865, + "epoch": 0.20589985547904446, "grad_norm": 0.0, - "learning_rate": 1.8415966697919924e-05, - "loss": 0.9995, + "learning_rate": 1.8420780388378708e-05, + "loss": 0.9718, "step": 7266 }, { - "epoch": 0.2062145289443814, + "epoch": 0.20592819292130693, "grad_norm": 0.0, - "learning_rate": 1.841547026232486e-05, - "loss": 1.024, + "learning_rate": 1.8420285338037747e-05, + "loss": 0.9913, "step": 7267 }, { - "epoch": 0.20624290578887627, + "epoch": 0.2059565303635694, "grad_norm": 0.0, - "learning_rate": 1.8414973755643942e-05, - "loss": 1.121, + "learning_rate": 1.8419790216769594e-05, + "loss": 0.9554, "step": 7268 }, { - "epoch": 0.20627128263337116, + "epoch": 0.20598486780583183, "grad_norm": 0.0, - "learning_rate": 1.8414477177881366e-05, - "loss": 1.0197, + "learning_rate": 1.8419295024578417e-05, + "loss": 1.1017, "step": 7269 }, { - "epoch": 0.20629965947786605, + "epoch": 0.2060132052480943, "grad_norm": 0.0, - "learning_rate": 1.8413980529041318e-05, - "loss": 0.9521, + "learning_rate": 1.8418799761468397e-05, + "loss": 0.9688, "step": 7270 }, { - "epoch": 0.20632803632236096, + "epoch": 0.20604154269035677, "grad_norm": 0.0, - "learning_rate": 1.8413483809128002e-05, - "loss": 0.9537, + "learning_rate": 1.84183044274437e-05, + "loss": 0.9302, "step": 7271 }, { - "epoch": 0.20635641316685585, + "epoch": 0.20606988013261923, "grad_norm": 0.0, - "learning_rate": 1.8412987018145607e-05, - "loss": 1.0333, + "learning_rate": 1.8417809022508498e-05, + "loss": 0.9849, "step": 7272 }, { - "epoch": 0.20638479001135074, + "epoch": 0.2060982175748817, "grad_norm": 0.0, - "learning_rate": 1.8412490156098336e-05, - "loss": 0.8684, + "learning_rate": 1.8417313546666963e-05, + "loss": 1.1033, "step": 7273 }, { - "epoch": 0.20641316685584563, + "epoch": 0.20612655501714416, "grad_norm": 0.0, - "learning_rate": 1.8411993222990378e-05, - "loss": 1.0895, + "learning_rate": 1.841681799992327e-05, + "loss": 0.9613, "step": 7274 }, { - "epoch": 0.2064415437003405, + "epoch": 0.2061548924594066, "grad_norm": 0.0, - "learning_rate": 1.8411496218825938e-05, - "loss": 0.9418, + "learning_rate": 1.8416322382281597e-05, + "loss": 1.1103, "step": 7275 }, { - "epoch": 0.20646992054483543, + "epoch": 0.20618322990166907, "grad_norm": 0.0, - "learning_rate": 1.841099914360921e-05, - "loss": 0.9363, + "learning_rate": 1.841582669374611e-05, + "loss": 0.9522, "step": 7276 }, { - "epoch": 0.20649829738933032, + "epoch": 0.20621156734393153, "grad_norm": 0.0, - "learning_rate": 1.8410501997344394e-05, - "loss": 1.0569, + "learning_rate": 1.8415330934320995e-05, + "loss": 1.0595, "step": 7277 }, { - "epoch": 0.2065266742338252, + "epoch": 0.206239904786194, "grad_norm": 0.0, - "learning_rate": 1.841000478003569e-05, - "loss": 1.033, + "learning_rate": 1.8414835104010422e-05, + "loss": 1.0109, "step": 7278 }, { - "epoch": 0.2065550510783201, + "epoch": 0.20626824222845647, "grad_norm": 0.0, - "learning_rate": 1.84095074916873e-05, - "loss": 0.9981, + "learning_rate": 1.8414339202818564e-05, + "loss": 1.0129, "step": 7279 }, { - "epoch": 0.20658342792281498, + "epoch": 0.20629657967071893, "grad_norm": 0.0, - "learning_rate": 1.840901013230342e-05, - "loss": 1.0193, + "learning_rate": 1.8413843230749607e-05, + "loss": 1.0395, "step": 7280 }, { - "epoch": 0.20661180476730986, + "epoch": 0.20632491711298137, "grad_norm": 0.0, - "learning_rate": 1.8408512701888256e-05, - "loss": 1.0761, + "learning_rate": 1.8413347187807724e-05, + "loss": 1.011, "step": 7281 }, { - "epoch": 0.20664018161180478, + "epoch": 0.20635325455524384, "grad_norm": 0.0, - "learning_rate": 1.8408015200446e-05, - "loss": 0.8506, + "learning_rate": 1.841285107399709e-05, + "loss": 0.9326, "step": 7282 }, { - "epoch": 0.20666855845629967, + "epoch": 0.2063815919975063, "grad_norm": 0.0, - "learning_rate": 1.840751762798087e-05, - "loss": 0.9056, + "learning_rate": 1.8412354889321888e-05, + "loss": 1.0433, "step": 7283 }, { - "epoch": 0.20669693530079455, + "epoch": 0.20640992943976877, "grad_norm": 0.0, - "learning_rate": 1.8407019984497057e-05, - "loss": 1.1485, + "learning_rate": 1.8411858633786298e-05, + "loss": 0.9976, "step": 7284 }, { - "epoch": 0.20672531214528944, + "epoch": 0.20643826688203123, "grad_norm": 0.0, - "learning_rate": 1.8406522269998766e-05, - "loss": 1.0279, + "learning_rate": 1.84113623073945e-05, + "loss": 0.9738, "step": 7285 }, { - "epoch": 0.20675368898978433, + "epoch": 0.2064666043242937, "grad_norm": 0.0, - "learning_rate": 1.8406024484490207e-05, - "loss": 1.0334, + "learning_rate": 1.841086591015067e-05, + "loss": 1.0478, "step": 7286 }, { - "epoch": 0.20678206583427922, + "epoch": 0.20649494176655614, "grad_norm": 0.0, - "learning_rate": 1.840552662797558e-05, - "loss": 0.9753, + "learning_rate": 1.8410369442058998e-05, + "loss": 1.0434, "step": 7287 }, { - "epoch": 0.20681044267877413, + "epoch": 0.2065232792088186, "grad_norm": 0.0, - "learning_rate": 1.840502870045909e-05, - "loss": 0.972, + "learning_rate": 1.8409872903123657e-05, + "loss": 0.9654, "step": 7288 }, { - "epoch": 0.20683881952326902, + "epoch": 0.20655161665108107, "grad_norm": 0.0, - "learning_rate": 1.840453070194495e-05, - "loss": 1.0021, + "learning_rate": 1.8409376293348836e-05, + "loss": 1.019, "step": 7289 }, { - "epoch": 0.2068671963677639, + "epoch": 0.20657995409334354, "grad_norm": 0.0, - "learning_rate": 1.8404032632437356e-05, - "loss": 0.9167, + "learning_rate": 1.8408879612738716e-05, + "loss": 0.9447, "step": 7290 }, { - "epoch": 0.2068955732122588, + "epoch": 0.206608291535606, "grad_norm": 0.0, - "learning_rate": 1.840353449194052e-05, - "loss": 1.0366, + "learning_rate": 1.840838286129748e-05, + "loss": 0.9483, "step": 7291 }, { - "epoch": 0.20692395005675368, + "epoch": 0.20663662897786847, "grad_norm": 0.0, - "learning_rate": 1.840303628045866e-05, - "loss": 0.9151, + "learning_rate": 1.8407886039029312e-05, + "loss": 1.0778, "step": 7292 }, { - "epoch": 0.20695232690124857, + "epoch": 0.2066649664201309, "grad_norm": 0.0, - "learning_rate": 1.8402537997995963e-05, - "loss": 0.9579, + "learning_rate": 1.84073891459384e-05, + "loss": 0.9529, "step": 7293 }, { - "epoch": 0.20698070374574348, + "epoch": 0.20669330386239337, "grad_norm": 0.0, - "learning_rate": 1.8402039644556654e-05, - "loss": 0.9017, + "learning_rate": 1.8406892182028925e-05, + "loss": 1.0267, "step": 7294 }, { - "epoch": 0.20700908059023837, + "epoch": 0.20672164130465584, "grad_norm": 0.0, - "learning_rate": 1.840154122014494e-05, - "loss": 1.0564, + "learning_rate": 1.8406395147305074e-05, + "loss": 0.9696, "step": 7295 }, { - "epoch": 0.20703745743473326, + "epoch": 0.2067499787469183, "grad_norm": 0.0, - "learning_rate": 1.840104272476503e-05, - "loss": 0.9798, + "learning_rate": 1.8405898041771037e-05, + "loss": 0.9981, "step": 7296 }, { - "epoch": 0.20706583427922814, + "epoch": 0.20677831618918077, "grad_norm": 0.0, - "learning_rate": 1.8400544158421133e-05, - "loss": 0.9855, + "learning_rate": 1.8405400865431e-05, + "loss": 1.0622, "step": 7297 }, { - "epoch": 0.20709421112372303, + "epoch": 0.20680665363144324, "grad_norm": 0.0, - "learning_rate": 1.840004552111746e-05, - "loss": 1.0185, + "learning_rate": 1.840490361828915e-05, + "loss": 0.9299, "step": 7298 }, { - "epoch": 0.20712258796821795, + "epoch": 0.20683499107370568, "grad_norm": 0.0, - "learning_rate": 1.8399546812858225e-05, - "loss": 1.0134, + "learning_rate": 1.8404406300349673e-05, + "loss": 1.0637, "step": 7299 }, { - "epoch": 0.20715096481271283, + "epoch": 0.20686332851596814, "grad_norm": 0.0, - "learning_rate": 1.8399048033647644e-05, - "loss": 1.0356, + "learning_rate": 1.8403908911616764e-05, + "loss": 1.0272, "step": 7300 }, { - "epoch": 0.20717934165720772, + "epoch": 0.2068916659582306, "grad_norm": 0.0, - "learning_rate": 1.8398549183489926e-05, - "loss": 0.9844, + "learning_rate": 1.8403411452094607e-05, + "loss": 0.9885, "step": 7301 }, { - "epoch": 0.2072077185017026, + "epoch": 0.20692000340049307, "grad_norm": 0.0, - "learning_rate": 1.8398050262389285e-05, - "loss": 1.0352, + "learning_rate": 1.8402913921787395e-05, + "loss": 1.054, "step": 7302 }, { - "epoch": 0.2072360953461975, + "epoch": 0.20694834084275554, "grad_norm": 0.0, - "learning_rate": 1.8397551270349936e-05, - "loss": 0.9954, + "learning_rate": 1.840241632069932e-05, + "loss": 1.0055, "step": 7303 }, { - "epoch": 0.20726447219069238, + "epoch": 0.206976678285018, "grad_norm": 0.0, - "learning_rate": 1.839705220737609e-05, - "loss": 0.9693, + "learning_rate": 1.8401918648834573e-05, + "loss": 1.0811, "step": 7304 }, { - "epoch": 0.2072928490351873, + "epoch": 0.20700501572728044, "grad_norm": 0.0, - "learning_rate": 1.839655307347197e-05, - "loss": 1.0083, + "learning_rate": 1.8401420906197346e-05, + "loss": 1.0592, "step": 7305 }, { - "epoch": 0.20732122587968219, + "epoch": 0.2070333531695429, "grad_norm": 0.0, - "learning_rate": 1.839605386864179e-05, - "loss": 1.0342, + "learning_rate": 1.8400923092791827e-05, + "loss": 0.9088, "step": 7306 }, { - "epoch": 0.20734960272417707, + "epoch": 0.20706169061180538, "grad_norm": 0.0, - "learning_rate": 1.8395554592889766e-05, - "loss": 1.0948, + "learning_rate": 1.8400425208622215e-05, + "loss": 0.9206, "step": 7307 }, { - "epoch": 0.20737797956867196, + "epoch": 0.20709002805406784, "grad_norm": 0.0, - "learning_rate": 1.8395055246220114e-05, - "loss": 0.9178, + "learning_rate": 1.83999272536927e-05, + "loss": 0.982, "step": 7308 }, { - "epoch": 0.20740635641316685, + "epoch": 0.2071183654963303, "grad_norm": 0.0, - "learning_rate": 1.839455582863705e-05, - "loss": 0.9418, + "learning_rate": 1.8399429228007484e-05, + "loss": 0.9995, "step": 7309 }, { - "epoch": 0.20743473325766174, + "epoch": 0.20714670293859277, "grad_norm": 0.0, - "learning_rate": 1.8394056340144796e-05, - "loss": 0.9626, + "learning_rate": 1.8398931131570755e-05, + "loss": 0.9039, "step": 7310 }, { - "epoch": 0.20746311010215665, + "epoch": 0.2071750403808552, "grad_norm": 0.0, - "learning_rate": 1.8393556780747573e-05, - "loss": 1.0145, + "learning_rate": 1.8398432964386707e-05, + "loss": 0.9406, "step": 7311 }, { - "epoch": 0.20749148694665154, + "epoch": 0.20720337782311768, "grad_norm": 0.0, - "learning_rate": 1.8393057150449597e-05, - "loss": 0.9738, + "learning_rate": 1.8397934726459544e-05, + "loss": 1.0751, "step": 7312 }, { - "epoch": 0.20751986379114643, + "epoch": 0.20723171526538015, "grad_norm": 0.0, - "learning_rate": 1.8392557449255094e-05, - "loss": 0.8407, + "learning_rate": 1.8397436417793454e-05, + "loss": 1.0565, "step": 7313 }, { - "epoch": 0.2075482406356413, + "epoch": 0.2072600527076426, "grad_norm": 0.0, - "learning_rate": 1.8392057677168274e-05, - "loss": 0.8499, + "learning_rate": 1.8396938038392636e-05, + "loss": 0.9594, "step": 7314 }, { - "epoch": 0.2075766174801362, + "epoch": 0.20728839014990508, "grad_norm": 0.0, - "learning_rate": 1.839155783419337e-05, - "loss": 1.0743, + "learning_rate": 1.8396439588261298e-05, + "loss": 0.9184, "step": 7315 }, { - "epoch": 0.20760499432463111, + "epoch": 0.20731672759216754, "grad_norm": 0.0, - "learning_rate": 1.8391057920334602e-05, - "loss": 1.0468, + "learning_rate": 1.839594106740363e-05, + "loss": 0.9793, "step": 7316 }, { - "epoch": 0.207633371169126, + "epoch": 0.20734506503442998, "grad_norm": 0.0, - "learning_rate": 1.8390557935596187e-05, - "loss": 0.9048, + "learning_rate": 1.839544247582383e-05, + "loss": 0.8726, "step": 7317 }, { - "epoch": 0.2076617480136209, + "epoch": 0.20737340247669245, "grad_norm": 0.0, - "learning_rate": 1.839005787998235e-05, - "loss": 1.0207, + "learning_rate": 1.8394943813526103e-05, + "loss": 0.9823, "step": 7318 }, { - "epoch": 0.20769012485811578, + "epoch": 0.2074017399189549, "grad_norm": 0.0, - "learning_rate": 1.838955775349732e-05, - "loss": 1.0441, + "learning_rate": 1.8394445080514643e-05, + "loss": 0.9183, "step": 7319 }, { - "epoch": 0.20771850170261066, + "epoch": 0.20743007736121738, "grad_norm": 0.0, - "learning_rate": 1.8389057556145318e-05, - "loss": 1.0535, + "learning_rate": 1.839394627679366e-05, + "loss": 1.0741, "step": 7320 }, { - "epoch": 0.20774687854710555, + "epoch": 0.20745841480347985, "grad_norm": 0.0, - "learning_rate": 1.838855728793057e-05, - "loss": 0.9949, + "learning_rate": 1.8393447402367346e-05, + "loss": 1.0964, "step": 7321 }, { - "epoch": 0.20777525539160047, + "epoch": 0.2074867522457423, "grad_norm": 0.0, - "learning_rate": 1.8388056948857303e-05, - "loss": 0.9929, + "learning_rate": 1.839294845723991e-05, + "loss": 0.9607, "step": 7322 }, { - "epoch": 0.20780363223609535, + "epoch": 0.20751508968800475, "grad_norm": 0.0, - "learning_rate": 1.838755653892974e-05, - "loss": 1.0891, + "learning_rate": 1.839244944141555e-05, + "loss": 0.9374, "step": 7323 }, { - "epoch": 0.20783200908059024, + "epoch": 0.20754342713026722, "grad_norm": 0.0, - "learning_rate": 1.8387056058152107e-05, - "loss": 0.9918, + "learning_rate": 1.8391950354898476e-05, + "loss": 1.0153, "step": 7324 }, { - "epoch": 0.20786038592508513, + "epoch": 0.20757176457252968, "grad_norm": 0.0, - "learning_rate": 1.838655550652864e-05, - "loss": 1.0203, + "learning_rate": 1.8391451197692883e-05, + "loss": 0.9751, "step": 7325 }, { - "epoch": 0.20788876276958002, + "epoch": 0.20760010201479215, "grad_norm": 0.0, - "learning_rate": 1.8386054884063558e-05, - "loss": 0.9066, + "learning_rate": 1.8390951969802983e-05, + "loss": 0.9972, "step": 7326 }, { - "epoch": 0.2079171396140749, + "epoch": 0.20762843945705461, "grad_norm": 0.0, - "learning_rate": 1.8385554190761096e-05, - "loss": 1.0225, + "learning_rate": 1.8390452671232982e-05, + "loss": 0.9455, "step": 7327 }, { - "epoch": 0.20794551645856982, + "epoch": 0.20765677689931708, "grad_norm": 0.0, - "learning_rate": 1.838505342662548e-05, - "loss": 1.0604, + "learning_rate": 1.8389953301987076e-05, + "loss": 1.0361, "step": 7328 }, { - "epoch": 0.2079738933030647, + "epoch": 0.20768511434157952, "grad_norm": 0.0, - "learning_rate": 1.838455259166094e-05, - "loss": 0.9851, + "learning_rate": 1.838945386206948e-05, + "loss": 0.9363, "step": 7329 }, { - "epoch": 0.2080022701475596, + "epoch": 0.20771345178384198, "grad_norm": 0.0, - "learning_rate": 1.8384051685871703e-05, - "loss": 1.0554, + "learning_rate": 1.8388954351484403e-05, + "loss": 0.9043, "step": 7330 }, { - "epoch": 0.20803064699205448, + "epoch": 0.20774178922610445, "grad_norm": 0.0, - "learning_rate": 1.838355070926201e-05, - "loss": 1.059, + "learning_rate": 1.8388454770236044e-05, + "loss": 1.0662, "step": 7331 }, { - "epoch": 0.20805902383654937, + "epoch": 0.20777012666836692, "grad_norm": 0.0, - "learning_rate": 1.838304966183609e-05, - "loss": 0.9639, + "learning_rate": 1.8387955118328617e-05, + "loss": 1.0064, "step": 7332 }, { - "epoch": 0.20808740068104425, + "epoch": 0.20779846411062938, "grad_norm": 0.0, - "learning_rate": 1.8382548543598166e-05, - "loss": 1.1278, + "learning_rate": 1.838745539576633e-05, + "loss": 1.1676, "step": 7333 }, { - "epoch": 0.20811577752553917, + "epoch": 0.20782680155289185, "grad_norm": 0.0, - "learning_rate": 1.838204735455248e-05, - "loss": 0.9385, + "learning_rate": 1.8386955602553393e-05, + "loss": 1.0422, "step": 7334 }, { - "epoch": 0.20814415437003406, + "epoch": 0.2078551389951543, "grad_norm": 0.0, - "learning_rate": 1.8381546094703263e-05, - "loss": 0.934, + "learning_rate": 1.838645573869401e-05, + "loss": 0.9055, "step": 7335 }, { - "epoch": 0.20817253121452894, + "epoch": 0.20788347643741675, "grad_norm": 0.0, - "learning_rate": 1.8381044764054753e-05, - "loss": 1.0237, + "learning_rate": 1.8385955804192396e-05, + "loss": 0.8505, "step": 7336 }, { - "epoch": 0.20820090805902383, + "epoch": 0.20791181387967922, "grad_norm": 0.0, - "learning_rate": 1.8380543362611175e-05, - "loss": 0.9232, + "learning_rate": 1.8385455799052768e-05, + "loss": 1.0002, "step": 7337 }, { - "epoch": 0.20822928490351872, + "epoch": 0.20794015132194169, "grad_norm": 0.0, - "learning_rate": 1.8380041890376778e-05, - "loss": 1.0328, + "learning_rate": 1.8384955723279327e-05, + "loss": 0.8794, "step": 7338 }, { - "epoch": 0.20825766174801363, + "epoch": 0.20796848876420415, "grad_norm": 0.0, - "learning_rate": 1.8379540347355786e-05, - "loss": 1.0173, + "learning_rate": 1.8384455576876292e-05, + "loss": 1.0255, "step": 7339 }, { - "epoch": 0.20828603859250852, + "epoch": 0.20799682620646662, "grad_norm": 0.0, - "learning_rate": 1.8379038733552435e-05, - "loss": 0.9576, + "learning_rate": 1.8383955359847872e-05, + "loss": 1.0451, "step": 7340 }, { - "epoch": 0.2083144154370034, + "epoch": 0.20802516364872906, "grad_norm": 0.0, - "learning_rate": 1.8378537048970974e-05, - "loss": 0.9352, + "learning_rate": 1.8383455072198284e-05, + "loss": 0.9559, "step": 7341 }, { - "epoch": 0.2083427922814983, + "epoch": 0.20805350109099152, "grad_norm": 0.0, - "learning_rate": 1.837803529361563e-05, - "loss": 0.9167, + "learning_rate": 1.8382954713931743e-05, + "loss": 0.9605, "step": 7342 }, { - "epoch": 0.20837116912599318, + "epoch": 0.208081838533254, "grad_norm": 0.0, - "learning_rate": 1.8377533467490648e-05, - "loss": 0.9959, + "learning_rate": 1.8382454285052458e-05, + "loss": 1.0145, "step": 7343 }, { - "epoch": 0.20839954597048807, + "epoch": 0.20811017597551645, "grad_norm": 0.0, - "learning_rate": 1.8377031570600267e-05, - "loss": 0.9463, + "learning_rate": 1.8381953785564653e-05, + "loss": 0.9992, "step": 7344 }, { - "epoch": 0.20842792281498299, + "epoch": 0.20813851341777892, "grad_norm": 0.0, - "learning_rate": 1.837652960294872e-05, - "loss": 1.0743, + "learning_rate": 1.8381453215472532e-05, + "loss": 0.9932, "step": 7345 }, { - "epoch": 0.20845629965947787, + "epoch": 0.20816685086004139, "grad_norm": 0.0, - "learning_rate": 1.837602756454025e-05, - "loss": 0.9938, + "learning_rate": 1.8380952574780323e-05, + "loss": 0.8914, "step": 7346 }, { - "epoch": 0.20848467650397276, + "epoch": 0.20819518830230382, "grad_norm": 0.0, - "learning_rate": 1.83755254553791e-05, - "loss": 0.95, + "learning_rate": 1.8380451863492238e-05, + "loss": 1.0026, "step": 7347 }, { - "epoch": 0.20851305334846765, + "epoch": 0.2082235257445663, "grad_norm": 0.0, - "learning_rate": 1.837502327546951e-05, - "loss": 0.9612, + "learning_rate": 1.837995108161249e-05, + "loss": 0.9301, "step": 7348 }, { - "epoch": 0.20854143019296253, + "epoch": 0.20825186318682876, "grad_norm": 0.0, - "learning_rate": 1.837452102481572e-05, - "loss": 1.0922, + "learning_rate": 1.8379450229145308e-05, + "loss": 1.0231, "step": 7349 }, { - "epoch": 0.20856980703745742, + "epoch": 0.20828020062909122, "grad_norm": 0.0, - "learning_rate": 1.8374018703421977e-05, - "loss": 0.9883, + "learning_rate": 1.8378949306094904e-05, + "loss": 0.962, "step": 7350 }, { - "epoch": 0.20859818388195234, + "epoch": 0.2083085380713537, "grad_norm": 0.0, - "learning_rate": 1.837351631129252e-05, - "loss": 0.981, + "learning_rate": 1.8378448312465495e-05, + "loss": 0.8921, "step": 7351 }, { - "epoch": 0.20862656072644722, + "epoch": 0.20833687551361615, "grad_norm": 0.0, - "learning_rate": 1.83730138484316e-05, - "loss": 1.0873, + "learning_rate": 1.837794724826131e-05, + "loss": 0.9581, "step": 7352 }, { - "epoch": 0.2086549375709421, + "epoch": 0.2083652129558786, "grad_norm": 0.0, - "learning_rate": 1.8372511314843445e-05, - "loss": 0.8064, + "learning_rate": 1.8377446113486562e-05, + "loss": 0.9173, "step": 7353 }, { - "epoch": 0.208683314415437, + "epoch": 0.20839355039814106, "grad_norm": 0.0, - "learning_rate": 1.837200871053232e-05, - "loss": 0.9952, + "learning_rate": 1.8376944908145474e-05, + "loss": 1.0847, "step": 7354 }, { - "epoch": 0.2087116912599319, + "epoch": 0.20842188784040352, "grad_norm": 0.0, - "learning_rate": 1.837150603550246e-05, - "loss": 0.8692, + "learning_rate": 1.8376443632242265e-05, + "loss": 1.0862, "step": 7355 }, { - "epoch": 0.2087400681044268, + "epoch": 0.208450225282666, "grad_norm": 0.0, - "learning_rate": 1.837100328975811e-05, - "loss": 0.9576, + "learning_rate": 1.8375942285781164e-05, + "loss": 1.0071, "step": 7356 }, { - "epoch": 0.2087684449489217, + "epoch": 0.20847856272492846, "grad_norm": 0.0, - "learning_rate": 1.8370500473303515e-05, - "loss": 0.9895, + "learning_rate": 1.8375440868766393e-05, + "loss": 1.0116, "step": 7357 }, { - "epoch": 0.20879682179341658, + "epoch": 0.20850690016719092, "grad_norm": 0.0, - "learning_rate": 1.836999758614293e-05, - "loss": 0.9766, + "learning_rate": 1.837493938120217e-05, + "loss": 1.0535, "step": 7358 }, { - "epoch": 0.20882519863791146, + "epoch": 0.20853523760945336, "grad_norm": 0.0, - "learning_rate": 1.8369494628280598e-05, - "loss": 1.0006, + "learning_rate": 1.8374437823092726e-05, + "loss": 1.0949, "step": 7359 }, { - "epoch": 0.20885357548240635, + "epoch": 0.20856357505171583, "grad_norm": 0.0, - "learning_rate": 1.836899159972077e-05, - "loss": 1.0686, + "learning_rate": 1.8373936194442277e-05, + "loss": 1.009, "step": 7360 }, { - "epoch": 0.20888195232690124, + "epoch": 0.2085919124939783, "grad_norm": 0.0, - "learning_rate": 1.8368488500467695e-05, - "loss": 0.9744, + "learning_rate": 1.837343449525506e-05, + "loss": 1.0983, "step": 7361 }, { - "epoch": 0.20891032917139615, + "epoch": 0.20862024993624076, "grad_norm": 0.0, - "learning_rate": 1.836798533052562e-05, - "loss": 1.0612, + "learning_rate": 1.837293272553529e-05, + "loss": 1.0894, "step": 7362 }, { - "epoch": 0.20893870601589104, + "epoch": 0.20864858737850323, "grad_norm": 0.0, - "learning_rate": 1.8367482089898792e-05, - "loss": 1.1021, + "learning_rate": 1.83724308852872e-05, + "loss": 0.9448, "step": 7363 }, { - "epoch": 0.20896708286038593, + "epoch": 0.2086769248207657, "grad_norm": 0.0, - "learning_rate": 1.8366978778591474e-05, - "loss": 0.9948, + "learning_rate": 1.8371928974515017e-05, + "loss": 0.9926, "step": 7364 }, { - "epoch": 0.20899545970488081, + "epoch": 0.20870526226302813, "grad_norm": 0.0, - "learning_rate": 1.8366475396607907e-05, - "loss": 1.0377, + "learning_rate": 1.8371426993222966e-05, + "loss": 0.9988, "step": 7365 }, { - "epoch": 0.2090238365493757, + "epoch": 0.2087335997052906, "grad_norm": 0.0, - "learning_rate": 1.8365971943952345e-05, - "loss": 1.0159, + "learning_rate": 1.8370924941415277e-05, + "loss": 1.0345, "step": 7366 }, { - "epoch": 0.2090522133938706, + "epoch": 0.20876193714755306, "grad_norm": 0.0, - "learning_rate": 1.836546842062904e-05, - "loss": 0.9618, + "learning_rate": 1.8370422819096177e-05, + "loss": 1.0043, "step": 7367 }, { - "epoch": 0.2090805902383655, + "epoch": 0.20879027458981553, "grad_norm": 0.0, - "learning_rate": 1.8364964826642255e-05, - "loss": 1.0199, + "learning_rate": 1.8369920626269903e-05, + "loss": 1.1373, "step": 7368 }, { - "epoch": 0.2091089670828604, + "epoch": 0.208818612032078, "grad_norm": 0.0, - "learning_rate": 1.836446116199623e-05, - "loss": 1.0186, + "learning_rate": 1.8369418362940675e-05, + "loss": 0.9104, "step": 7369 }, { - "epoch": 0.20913734392735528, + "epoch": 0.20884694947434046, "grad_norm": 0.0, - "learning_rate": 1.8363957426695227e-05, - "loss": 1.0656, + "learning_rate": 1.836891602911273e-05, + "loss": 1.0523, "step": 7370 }, { - "epoch": 0.20916572077185017, + "epoch": 0.2088752869166029, "grad_norm": 0.0, - "learning_rate": 1.83634536207435e-05, - "loss": 1.0148, + "learning_rate": 1.8368413624790297e-05, + "loss": 1.1359, "step": 7371 }, { - "epoch": 0.20919409761634505, + "epoch": 0.20890362435886536, "grad_norm": 0.0, - "learning_rate": 1.8362949744145303e-05, - "loss": 0.9766, + "learning_rate": 1.8367911149977606e-05, + "loss": 0.9838, "step": 7372 }, { - "epoch": 0.20922247446083994, + "epoch": 0.20893196180112783, "grad_norm": 0.0, - "learning_rate": 1.8362445796904896e-05, - "loss": 1.0193, + "learning_rate": 1.8367408604678893e-05, + "loss": 1.0173, "step": 7373 }, { - "epoch": 0.20925085130533486, + "epoch": 0.2089602992433903, "grad_norm": 0.0, - "learning_rate": 1.8361941779026532e-05, - "loss": 0.9228, + "learning_rate": 1.836690598889839e-05, + "loss": 1.0186, "step": 7374 }, { - "epoch": 0.20927922814982974, + "epoch": 0.20898863668565276, "grad_norm": 0.0, - "learning_rate": 1.836143769051447e-05, - "loss": 1.0446, + "learning_rate": 1.8366403302640338e-05, + "loss": 1.0521, "step": 7375 }, { - "epoch": 0.20930760499432463, + "epoch": 0.20901697412791523, "grad_norm": 0.0, - "learning_rate": 1.836093353137297e-05, - "loss": 0.9104, + "learning_rate": 1.8365900545908957e-05, + "loss": 1.0036, "step": 7376 }, { - "epoch": 0.20933598183881952, + "epoch": 0.20904531157017767, "grad_norm": 0.0, - "learning_rate": 1.8360429301606285e-05, - "loss": 1.044, + "learning_rate": 1.836539771870849e-05, + "loss": 1.0884, "step": 7377 }, { - "epoch": 0.2093643586833144, + "epoch": 0.20907364901244013, "grad_norm": 0.0, - "learning_rate": 1.8359925001218676e-05, - "loss": 1.0263, + "learning_rate": 1.8364894821043172e-05, + "loss": 1.0146, "step": 7378 }, { - "epoch": 0.20939273552780932, + "epoch": 0.2091019864547026, "grad_norm": 0.0, - "learning_rate": 1.8359420630214407e-05, - "loss": 1.0719, + "learning_rate": 1.836439185291724e-05, + "loss": 0.9042, "step": 7379 }, { - "epoch": 0.2094211123723042, + "epoch": 0.20913032389696506, "grad_norm": 0.0, - "learning_rate": 1.835891618859774e-05, - "loss": 0.9792, + "learning_rate": 1.8363888814334927e-05, + "loss": 0.9166, "step": 7380 }, { - "epoch": 0.2094494892167991, + "epoch": 0.20915866133922753, "grad_norm": 0.0, - "learning_rate": 1.835841167637293e-05, - "loss": 0.9051, + "learning_rate": 1.8363385705300473e-05, + "loss": 1.0346, "step": 7381 }, { - "epoch": 0.20947786606129398, + "epoch": 0.20918699878148997, "grad_norm": 0.0, - "learning_rate": 1.835790709354424e-05, - "loss": 1.0656, + "learning_rate": 1.836288252581812e-05, + "loss": 1.0419, "step": 7382 }, { - "epoch": 0.20950624290578887, + "epoch": 0.20921533622375243, "grad_norm": 0.0, - "learning_rate": 1.835740244011593e-05, - "loss": 0.8987, + "learning_rate": 1.8362379275892093e-05, + "loss": 1.0627, "step": 7383 }, { - "epoch": 0.20953461975028376, + "epoch": 0.2092436736660149, "grad_norm": 0.0, - "learning_rate": 1.835689771609227e-05, - "loss": 1.1108, + "learning_rate": 1.836187595552665e-05, + "loss": 1.0731, "step": 7384 }, { - "epoch": 0.20956299659477867, + "epoch": 0.20927201110827737, "grad_norm": 0.0, - "learning_rate": 1.835639292147752e-05, - "loss": 0.884, + "learning_rate": 1.8361372564726014e-05, + "loss": 0.9587, "step": 7385 }, { - "epoch": 0.20959137343927356, + "epoch": 0.20930034855053983, "grad_norm": 0.0, - "learning_rate": 1.8355888056275943e-05, - "loss": 1.071, + "learning_rate": 1.8360869103494434e-05, + "loss": 1.0596, "step": 7386 }, { - "epoch": 0.20961975028376845, + "epoch": 0.2093286859928023, "grad_norm": 0.0, - "learning_rate": 1.8355383120491803e-05, - "loss": 0.9833, + "learning_rate": 1.836036557183615e-05, + "loss": 1.0653, "step": 7387 }, { - "epoch": 0.20964812712826333, + "epoch": 0.20935702343506474, "grad_norm": 0.0, - "learning_rate": 1.8354878114129368e-05, - "loss": 0.9076, + "learning_rate": 1.83598619697554e-05, + "loss": 1.0294, "step": 7388 }, { - "epoch": 0.20967650397275822, + "epoch": 0.2093853608773272, "grad_norm": 0.0, - "learning_rate": 1.8354373037192896e-05, - "loss": 0.9953, + "learning_rate": 1.835935829725643e-05, + "loss": 0.9721, "step": 7389 }, { - "epoch": 0.2097048808172531, + "epoch": 0.20941369831958967, "grad_norm": 0.0, - "learning_rate": 1.8353867889686664e-05, - "loss": 1.0235, + "learning_rate": 1.835885455434348e-05, + "loss": 0.896, "step": 7390 }, { - "epoch": 0.20973325766174802, + "epoch": 0.20944203576185214, "grad_norm": 0.0, - "learning_rate": 1.8353362671614933e-05, - "loss": 1.0324, + "learning_rate": 1.8358350741020794e-05, + "loss": 1.0532, "step": 7391 }, { - "epoch": 0.2097616345062429, + "epoch": 0.2094703732041146, "grad_norm": 0.0, - "learning_rate": 1.8352857382981977e-05, - "loss": 0.9965, + "learning_rate": 1.8357846857292615e-05, + "loss": 0.9877, "step": 7392 }, { - "epoch": 0.2097900113507378, + "epoch": 0.20949871064637707, "grad_norm": 0.0, - "learning_rate": 1.8352352023792054e-05, - "loss": 1.0066, + "learning_rate": 1.835734290316319e-05, + "loss": 0.9631, "step": 7393 }, { - "epoch": 0.20981838819523269, + "epoch": 0.2095270480886395, "grad_norm": 0.0, - "learning_rate": 1.835184659404944e-05, - "loss": 0.9989, + "learning_rate": 1.8356838878636764e-05, + "loss": 1.0529, "step": 7394 }, { - "epoch": 0.20984676503972757, + "epoch": 0.20955538553090197, "grad_norm": 0.0, - "learning_rate": 1.83513410937584e-05, - "loss": 0.9445, + "learning_rate": 1.835633478371758e-05, + "loss": 0.9881, "step": 7395 }, { - "epoch": 0.2098751418842225, + "epoch": 0.20958372297316444, "grad_norm": 0.0, - "learning_rate": 1.8350835522923206e-05, - "loss": 1.0095, + "learning_rate": 1.835583061840988e-05, + "loss": 1.0241, "step": 7396 }, { - "epoch": 0.20990351872871738, + "epoch": 0.2096120604154269, "grad_norm": 0.0, - "learning_rate": 1.8350329881548133e-05, - "loss": 1.1174, + "learning_rate": 1.835532638271792e-05, + "loss": 0.935, "step": 7397 }, { - "epoch": 0.20993189557321226, + "epoch": 0.20964039785768937, "grad_norm": 0.0, - "learning_rate": 1.8349824169637444e-05, - "loss": 0.9447, + "learning_rate": 1.8354822076645944e-05, + "loss": 0.9025, "step": 7398 }, { - "epoch": 0.20996027241770715, + "epoch": 0.20966873529995184, "grad_norm": 0.0, - "learning_rate": 1.8349318387195416e-05, - "loss": 0.8416, + "learning_rate": 1.8354317700198198e-05, + "loss": 1.0193, "step": 7399 }, { - "epoch": 0.20998864926220204, + "epoch": 0.20969707274221427, "grad_norm": 0.0, - "learning_rate": 1.834881253422632e-05, - "loss": 0.9836, + "learning_rate": 1.835381325337893e-05, + "loss": 1.1194, "step": 7400 }, { - "epoch": 0.21001702610669692, + "epoch": 0.20972541018447674, "grad_norm": 0.0, - "learning_rate": 1.8348306610734433e-05, - "loss": 1.0144, + "learning_rate": 1.8353308736192396e-05, + "loss": 1.029, "step": 7401 }, { - "epoch": 0.21004540295119184, + "epoch": 0.2097537476267392, "grad_norm": 0.0, - "learning_rate": 1.8347800616724022e-05, - "loss": 0.9676, + "learning_rate": 1.8352804148642836e-05, + "loss": 1.0361, "step": 7402 }, { - "epoch": 0.21007377979568673, + "epoch": 0.20978208506900167, "grad_norm": 0.0, - "learning_rate": 1.8347294552199366e-05, - "loss": 1.1057, + "learning_rate": 1.8352299490734506e-05, + "loss": 0.9778, "step": 7403 }, { - "epoch": 0.21010215664018161, + "epoch": 0.20981042251126414, "grad_norm": 0.0, - "learning_rate": 1.8346788417164735e-05, - "loss": 1.0139, + "learning_rate": 1.8351794762471656e-05, + "loss": 0.944, "step": 7404 }, { - "epoch": 0.2101305334846765, + "epoch": 0.2098387599535266, "grad_norm": 0.0, - "learning_rate": 1.8346282211624404e-05, - "loss": 0.952, + "learning_rate": 1.8351289963858535e-05, + "loss": 1.0697, "step": 7405 }, { - "epoch": 0.2101589103291714, + "epoch": 0.20986709739578904, "grad_norm": 0.0, - "learning_rate": 1.8345775935582657e-05, - "loss": 0.9596, + "learning_rate": 1.8350785094899403e-05, + "loss": 0.9048, "step": 7406 }, { - "epoch": 0.21018728717366628, + "epoch": 0.2098954348380515, "grad_norm": 0.0, - "learning_rate": 1.8345269589043765e-05, - "loss": 1.0257, + "learning_rate": 1.8350280155598504e-05, + "loss": 1.0027, "step": 7407 }, { - "epoch": 0.2102156640181612, + "epoch": 0.20992377228031397, "grad_norm": 0.0, - "learning_rate": 1.8344763172012005e-05, - "loss": 0.9267, + "learning_rate": 1.8349775145960094e-05, + "loss": 1.0859, "step": 7408 }, { - "epoch": 0.21024404086265608, + "epoch": 0.20995210972257644, "grad_norm": 0.0, - "learning_rate": 1.8344256684491654e-05, - "loss": 0.9589, + "learning_rate": 1.834927006598843e-05, + "loss": 0.9441, "step": 7409 }, { - "epoch": 0.21027241770715097, + "epoch": 0.2099804471648389, "grad_norm": 0.0, - "learning_rate": 1.834375012648699e-05, - "loss": 0.8203, + "learning_rate": 1.834876491568776e-05, + "loss": 0.8908, "step": 7410 }, { - "epoch": 0.21030079455164585, + "epoch": 0.21000878460710137, "grad_norm": 0.0, - "learning_rate": 1.83432434980023e-05, - "loss": 0.9687, + "learning_rate": 1.8348259695062344e-05, + "loss": 0.9854, "step": 7411 }, { - "epoch": 0.21032917139614074, + "epoch": 0.2100371220493638, "grad_norm": 0.0, - "learning_rate": 1.834273679904185e-05, - "loss": 0.8729, + "learning_rate": 1.834775440411644e-05, + "loss": 0.9824, "step": 7412 }, { - "epoch": 0.21035754824063563, + "epoch": 0.21006545949162628, "grad_norm": 0.0, - "learning_rate": 1.834223002960993e-05, - "loss": 1.0542, + "learning_rate": 1.8347249042854294e-05, + "loss": 1.0587, "step": 7413 }, { - "epoch": 0.21038592508513054, + "epoch": 0.21009379693388874, "grad_norm": 0.0, - "learning_rate": 1.8341723189710824e-05, - "loss": 0.9509, + "learning_rate": 1.8346743611280174e-05, + "loss": 0.9819, "step": 7414 }, { - "epoch": 0.21041430192962543, + "epoch": 0.2101221343761512, "grad_norm": 0.0, - "learning_rate": 1.83412162793488e-05, - "loss": 0.9088, + "learning_rate": 1.8346238109398335e-05, + "loss": 1.0575, "step": 7415 }, { - "epoch": 0.21044267877412032, + "epoch": 0.21015047181841368, "grad_norm": 0.0, - "learning_rate": 1.834070929852815e-05, - "loss": 0.9227, + "learning_rate": 1.834573253721303e-05, + "loss": 0.9693, "step": 7416 }, { - "epoch": 0.2104710556186152, + "epoch": 0.21017880926067614, "grad_norm": 0.0, - "learning_rate": 1.8340202247253152e-05, - "loss": 0.9191, + "learning_rate": 1.834522689472852e-05, + "loss": 0.973, "step": 7417 }, { - "epoch": 0.2104994324631101, + "epoch": 0.21020714670293858, "grad_norm": 0.0, - "learning_rate": 1.833969512552809e-05, - "loss": 1.0043, + "learning_rate": 1.8344721181949065e-05, + "loss": 1.0003, "step": 7418 }, { - "epoch": 0.210527809307605, + "epoch": 0.21023548414520105, "grad_norm": 0.0, - "learning_rate": 1.833918793335725e-05, - "loss": 1.0207, + "learning_rate": 1.8344215398878925e-05, + "loss": 1.0147, "step": 7419 }, { - "epoch": 0.2105561861520999, + "epoch": 0.2102638215874635, "grad_norm": 0.0, - "learning_rate": 1.8338680670744913e-05, - "loss": 1.0222, + "learning_rate": 1.8343709545522364e-05, + "loss": 1.0548, "step": 7420 }, { - "epoch": 0.21058456299659478, + "epoch": 0.21029215902972598, "grad_norm": 0.0, - "learning_rate": 1.833817333769537e-05, - "loss": 1.036, + "learning_rate": 1.8343203621883634e-05, + "loss": 1.0036, "step": 7421 }, { - "epoch": 0.21061293984108967, + "epoch": 0.21032049647198844, "grad_norm": 0.0, - "learning_rate": 1.83376659342129e-05, - "loss": 0.9336, + "learning_rate": 1.8342697627967006e-05, + "loss": 1.01, "step": 7422 }, { - "epoch": 0.21064131668558456, + "epoch": 0.2103488339142509, "grad_norm": 0.0, - "learning_rate": 1.8337158460301786e-05, - "loss": 0.9284, + "learning_rate": 1.8342191563776738e-05, + "loss": 0.9313, "step": 7423 }, { - "epoch": 0.21066969353007944, + "epoch": 0.21037717135651335, "grad_norm": 0.0, - "learning_rate": 1.8336650915966325e-05, - "loss": 0.9552, + "learning_rate": 1.8341685429317087e-05, + "loss": 0.9803, "step": 7424 }, { - "epoch": 0.21069807037457436, + "epoch": 0.21040550879877581, "grad_norm": 0.0, - "learning_rate": 1.8336143301210794e-05, - "loss": 1.0821, + "learning_rate": 1.8341179224592327e-05, + "loss": 0.9754, "step": 7425 }, { - "epoch": 0.21072644721906925, + "epoch": 0.21043384624103828, "grad_norm": 0.0, - "learning_rate": 1.833563561603949e-05, - "loss": 0.9196, + "learning_rate": 1.8340672949606715e-05, + "loss": 1.0738, "step": 7426 }, { - "epoch": 0.21075482406356413, + "epoch": 0.21046218368330075, "grad_norm": 0.0, - "learning_rate": 1.83351278604567e-05, - "loss": 0.9997, + "learning_rate": 1.8340166604364518e-05, + "loss": 0.9358, "step": 7427 }, { - "epoch": 0.21078320090805902, + "epoch": 0.2104905211255632, "grad_norm": 0.0, - "learning_rate": 1.8334620034466706e-05, - "loss": 0.9113, + "learning_rate": 1.833966018887e-05, + "loss": 0.9293, "step": 7428 }, { - "epoch": 0.2108115777525539, + "epoch": 0.21051885856782568, "grad_norm": 0.0, - "learning_rate": 1.8334112138073805e-05, - "loss": 0.9274, + "learning_rate": 1.833915370312743e-05, + "loss": 0.836, "step": 7429 }, { - "epoch": 0.2108399545970488, + "epoch": 0.21054719601008812, "grad_norm": 0.0, - "learning_rate": 1.833360417128228e-05, - "loss": 1.0439, + "learning_rate": 1.8338647147141067e-05, + "loss": 0.9973, "step": 7430 }, { - "epoch": 0.2108683314415437, + "epoch": 0.21057553345235058, "grad_norm": 0.0, - "learning_rate": 1.8333096134096427e-05, - "loss": 1.1146, + "learning_rate": 1.8338140520915185e-05, + "loss": 1.0239, "step": 7431 }, { - "epoch": 0.2108967082860386, + "epoch": 0.21060387089461305, "grad_norm": 0.0, - "learning_rate": 1.8332588026520536e-05, - "loss": 1.0612, + "learning_rate": 1.833763382445405e-05, + "loss": 0.9044, "step": 7432 }, { - "epoch": 0.21092508513053349, + "epoch": 0.21063220833687551, "grad_norm": 0.0, - "learning_rate": 1.83320798485589e-05, - "loss": 0.8653, + "learning_rate": 1.833712705776193e-05, + "loss": 1.0538, "step": 7433 }, { - "epoch": 0.21095346197502837, + "epoch": 0.21066054577913798, "grad_norm": 0.0, - "learning_rate": 1.8331571600215813e-05, - "loss": 0.9632, + "learning_rate": 1.833662022084309e-05, + "loss": 1.0321, "step": 7434 }, { - "epoch": 0.21098183881952326, + "epoch": 0.21068888322140045, "grad_norm": 0.0, - "learning_rate": 1.8331063281495564e-05, - "loss": 1.008, + "learning_rate": 1.8336113313701807e-05, + "loss": 1.0609, "step": 7435 }, { - "epoch": 0.21101021566401817, + "epoch": 0.21071722066366289, "grad_norm": 0.0, - "learning_rate": 1.833055489240245e-05, - "loss": 1.0521, + "learning_rate": 1.833560633634234e-05, + "loss": 1.0759, "step": 7436 }, { - "epoch": 0.21103859250851306, + "epoch": 0.21074555810592535, "grad_norm": 0.0, - "learning_rate": 1.8330046432940764e-05, - "loss": 1.0359, + "learning_rate": 1.833509928876897e-05, + "loss": 1.0511, "step": 7437 }, { - "epoch": 0.21106696935300795, + "epoch": 0.21077389554818782, "grad_norm": 0.0, - "learning_rate": 1.83295379031148e-05, - "loss": 1.0756, + "learning_rate": 1.833459217098596e-05, + "loss": 1.0214, "step": 7438 }, { - "epoch": 0.21109534619750284, + "epoch": 0.21080223299045028, "grad_norm": 0.0, - "learning_rate": 1.8329029302928858e-05, - "loss": 0.9068, + "learning_rate": 1.833408498299759e-05, + "loss": 0.9818, "step": 7439 }, { - "epoch": 0.21112372304199772, + "epoch": 0.21083057043271275, "grad_norm": 0.0, - "learning_rate": 1.8328520632387226e-05, - "loss": 1.1057, + "learning_rate": 1.8333577724808123e-05, + "loss": 0.968, "step": 7440 }, { - "epoch": 0.2111520998864926, + "epoch": 0.21085890787497522, "grad_norm": 0.0, - "learning_rate": 1.832801189149421e-05, - "loss": 1.0424, + "learning_rate": 1.8333070396421838e-05, + "loss": 0.9173, "step": 7441 }, { - "epoch": 0.21118047673098753, + "epoch": 0.21088724531723765, "grad_norm": 0.0, - "learning_rate": 1.8327503080254107e-05, - "loss": 1.0059, + "learning_rate": 1.8332562997843007e-05, + "loss": 0.9077, "step": 7442 }, { - "epoch": 0.2112088535754824, + "epoch": 0.21091558275950012, "grad_norm": 0.0, - "learning_rate": 1.83269941986712e-05, - "loss": 1.0249, + "learning_rate": 1.8332055529075902e-05, + "loss": 0.9843, "step": 7443 }, { - "epoch": 0.2112372304199773, + "epoch": 0.21094392020176259, "grad_norm": 0.0, - "learning_rate": 1.832648524674981e-05, - "loss": 1.0664, + "learning_rate": 1.83315479901248e-05, + "loss": 0.9495, "step": 7444 }, { - "epoch": 0.2112656072644722, + "epoch": 0.21097225764402505, "grad_norm": 0.0, - "learning_rate": 1.832597622449422e-05, - "loss": 1.1174, + "learning_rate": 1.8331040380993977e-05, + "loss": 1.1165, "step": 7445 }, { - "epoch": 0.21129398410896708, + "epoch": 0.21100059508628752, "grad_norm": 0.0, - "learning_rate": 1.8325467131908735e-05, - "loss": 0.9395, + "learning_rate": 1.8330532701687705e-05, + "loss": 0.9809, "step": 7446 }, { - "epoch": 0.21132236095346196, + "epoch": 0.21102893252854998, "grad_norm": 0.0, - "learning_rate": 1.8324957968997652e-05, - "loss": 1.0506, + "learning_rate": 1.8330024952210263e-05, + "loss": 0.9505, "step": 7447 }, { - "epoch": 0.21135073779795688, + "epoch": 0.21105726997081242, "grad_norm": 0.0, - "learning_rate": 1.8324448735765278e-05, - "loss": 1.0117, + "learning_rate": 1.8329517132565926e-05, + "loss": 1.0171, "step": 7448 }, { - "epoch": 0.21137911464245177, + "epoch": 0.2110856074130749, "grad_norm": 0.0, - "learning_rate": 1.8323939432215908e-05, - "loss": 0.9359, + "learning_rate": 1.8329009242758977e-05, + "loss": 1.0414, "step": 7449 }, { - "epoch": 0.21140749148694665, + "epoch": 0.21111394485533735, "grad_norm": 0.0, - "learning_rate": 1.832343005835385e-05, - "loss": 1.0237, + "learning_rate": 1.8328501282793688e-05, + "loss": 1.024, "step": 7450 }, { - "epoch": 0.21143586833144154, + "epoch": 0.21114228229759982, "grad_norm": 0.0, - "learning_rate": 1.8322920614183405e-05, - "loss": 0.9547, + "learning_rate": 1.832799325267434e-05, + "loss": 1.0068, "step": 7451 }, { - "epoch": 0.21146424517593643, + "epoch": 0.2111706197398623, "grad_norm": 0.0, - "learning_rate": 1.8322411099708876e-05, - "loss": 1.0231, + "learning_rate": 1.832748515240521e-05, + "loss": 1.0307, "step": 7452 }, { - "epoch": 0.21149262202043131, + "epoch": 0.21119895718212475, "grad_norm": 0.0, - "learning_rate": 1.8321901514934562e-05, - "loss": 1.0715, + "learning_rate": 1.832697698199058e-05, + "loss": 0.9348, "step": 7453 }, { - "epoch": 0.21152099886492623, + "epoch": 0.2112272946243872, "grad_norm": 0.0, - "learning_rate": 1.8321391859864777e-05, - "loss": 1.0689, + "learning_rate": 1.8326468741434736e-05, + "loss": 1.0751, "step": 7454 }, { - "epoch": 0.21154937570942112, + "epoch": 0.21125563206664966, "grad_norm": 0.0, - "learning_rate": 1.8320882134503818e-05, - "loss": 0.9749, + "learning_rate": 1.8325960430741954e-05, + "loss": 1.0059, "step": 7455 }, { - "epoch": 0.211577752553916, + "epoch": 0.21128396950891212, "grad_norm": 0.0, - "learning_rate": 1.8320372338855993e-05, - "loss": 1.0152, + "learning_rate": 1.8325452049916514e-05, + "loss": 0.964, "step": 7456 }, { - "epoch": 0.2116061293984109, + "epoch": 0.2113123069511746, "grad_norm": 0.0, - "learning_rate": 1.831986247292561e-05, - "loss": 0.932, + "learning_rate": 1.83249435989627e-05, + "loss": 0.9636, "step": 7457 }, { - "epoch": 0.21163450624290578, + "epoch": 0.21134064439343705, "grad_norm": 0.0, - "learning_rate": 1.8319352536716974e-05, - "loss": 0.9852, + "learning_rate": 1.832443507788479e-05, + "loss": 1.0526, "step": 7458 }, { - "epoch": 0.2116628830874007, + "epoch": 0.21136898183569952, "grad_norm": 0.0, - "learning_rate": 1.831884253023439e-05, - "loss": 1.0253, + "learning_rate": 1.8323926486687076e-05, + "loss": 0.9297, "step": 7459 }, { - "epoch": 0.21169125993189558, + "epoch": 0.21139731927796196, "grad_norm": 0.0, - "learning_rate": 1.8318332453482176e-05, - "loss": 1.0856, + "learning_rate": 1.832341782537384e-05, + "loss": 1.0153, "step": 7460 }, { - "epoch": 0.21171963677639047, + "epoch": 0.21142565672022443, "grad_norm": 0.0, - "learning_rate": 1.831782230646463e-05, - "loss": 1.0414, + "learning_rate": 1.832290909394936e-05, + "loss": 0.9594, "step": 7461 }, { - "epoch": 0.21174801362088536, + "epoch": 0.2114539941624869, "grad_norm": 0.0, - "learning_rate": 1.8317312089186065e-05, - "loss": 0.9156, + "learning_rate": 1.8322400292417928e-05, + "loss": 1.1256, "step": 7462 }, { - "epoch": 0.21177639046538024, + "epoch": 0.21148233160474936, "grad_norm": 0.0, - "learning_rate": 1.8316801801650796e-05, - "loss": 1.0968, + "learning_rate": 1.8321891420783827e-05, + "loss": 0.9701, "step": 7463 }, { - "epoch": 0.21180476730987513, + "epoch": 0.21151066904701182, "grad_norm": 0.0, - "learning_rate": 1.8316291443863125e-05, - "loss": 0.9616, + "learning_rate": 1.832138247905135e-05, + "loss": 1.0672, "step": 7464 }, { - "epoch": 0.21183314415437005, + "epoch": 0.2115390064892743, "grad_norm": 0.0, - "learning_rate": 1.831578101582736e-05, - "loss": 0.9858, + "learning_rate": 1.8320873467224772e-05, + "loss": 0.9213, "step": 7465 }, { - "epoch": 0.21186152099886493, + "epoch": 0.21156734393153673, "grad_norm": 0.0, - "learning_rate": 1.831527051754783e-05, - "loss": 0.9716, + "learning_rate": 1.832036438530839e-05, + "loss": 1.0953, "step": 7466 }, { - "epoch": 0.21188989784335982, + "epoch": 0.2115956813737992, "grad_norm": 0.0, - "learning_rate": 1.8314759949028827e-05, - "loss": 0.9835, + "learning_rate": 1.8319855233306488e-05, + "loss": 0.9952, "step": 7467 }, { - "epoch": 0.2119182746878547, + "epoch": 0.21162401881606166, "grad_norm": 0.0, - "learning_rate": 1.8314249310274676e-05, - "loss": 1.1211, + "learning_rate": 1.8319346011223354e-05, + "loss": 1.0881, "step": 7468 }, { - "epoch": 0.2119466515323496, + "epoch": 0.21165235625832413, "grad_norm": 0.0, - "learning_rate": 1.8313738601289686e-05, - "loss": 1.0212, + "learning_rate": 1.831883671906328e-05, + "loss": 0.9493, "step": 7469 }, { - "epoch": 0.21197502837684448, + "epoch": 0.2116806937005866, "grad_norm": 0.0, - "learning_rate": 1.8313227822078174e-05, - "loss": 1.0113, + "learning_rate": 1.831832735683056e-05, + "loss": 1.0302, "step": 7470 }, { - "epoch": 0.2120034052213394, + "epoch": 0.21170903114284906, "grad_norm": 0.0, - "learning_rate": 1.8312716972644456e-05, - "loss": 0.9471, + "learning_rate": 1.831781792452947e-05, + "loss": 1.0947, "step": 7471 }, { - "epoch": 0.21203178206583428, + "epoch": 0.2117373685851115, "grad_norm": 0.0, - "learning_rate": 1.8312206052992838e-05, - "loss": 1.0494, + "learning_rate": 1.831730842216432e-05, + "loss": 0.888, "step": 7472 }, { - "epoch": 0.21206015891032917, + "epoch": 0.21176570602737396, "grad_norm": 0.0, - "learning_rate": 1.8311695063127647e-05, - "loss": 0.9381, + "learning_rate": 1.831679884973939e-05, + "loss": 0.8613, "step": 7473 }, { - "epoch": 0.21208853575482406, + "epoch": 0.21179404346963643, "grad_norm": 0.0, - "learning_rate": 1.831118400305319e-05, - "loss": 0.9118, + "learning_rate": 1.8316289207258973e-05, + "loss": 0.927, "step": 7474 }, { - "epoch": 0.21211691259931895, + "epoch": 0.2118223809118989, "grad_norm": 0.0, - "learning_rate": 1.8310672872773788e-05, - "loss": 1.0026, + "learning_rate": 1.8315779494727368e-05, + "loss": 0.9945, "step": 7475 }, { - "epoch": 0.21214528944381386, + "epoch": 0.21185071835416136, "grad_norm": 0.0, - "learning_rate": 1.8310161672293763e-05, - "loss": 1.0252, + "learning_rate": 1.831526971214886e-05, + "loss": 0.9157, "step": 7476 }, { - "epoch": 0.21217366628830875, + "epoch": 0.21187905579642383, "grad_norm": 0.0, - "learning_rate": 1.8309650401617425e-05, - "loss": 0.8637, + "learning_rate": 1.8314759859527748e-05, + "loss": 1.0927, "step": 7477 }, { - "epoch": 0.21220204313280364, + "epoch": 0.21190739323868626, "grad_norm": 0.0, - "learning_rate": 1.83091390607491e-05, - "loss": 1.1166, + "learning_rate": 1.831424993686833e-05, + "loss": 0.9809, "step": 7478 }, { - "epoch": 0.21223041997729852, + "epoch": 0.21193573068094873, "grad_norm": 0.0, - "learning_rate": 1.8308627649693102e-05, - "loss": 1.111, + "learning_rate": 1.8313739944174894e-05, + "loss": 0.9088, "step": 7479 }, { - "epoch": 0.2122587968217934, + "epoch": 0.2119640681232112, "grad_norm": 0.0, - "learning_rate": 1.830811616845375e-05, - "loss": 1.0094, + "learning_rate": 1.831322988145174e-05, + "loss": 1.0219, "step": 7480 }, { - "epoch": 0.2122871736662883, + "epoch": 0.21199240556547366, "grad_norm": 0.0, - "learning_rate": 1.8307604617035372e-05, - "loss": 0.9517, + "learning_rate": 1.8312719748703163e-05, + "loss": 0.8799, "step": 7481 }, { - "epoch": 0.2123155505107832, + "epoch": 0.21202074300773613, "grad_norm": 0.0, - "learning_rate": 1.830709299544228e-05, - "loss": 0.9597, + "learning_rate": 1.8312209545933458e-05, + "loss": 0.9363, "step": 7482 }, { - "epoch": 0.2123439273552781, + "epoch": 0.2120490804499986, "grad_norm": 0.0, - "learning_rate": 1.8306581303678804e-05, - "loss": 0.9599, + "learning_rate": 1.8311699273146933e-05, + "loss": 1.0133, "step": 7483 }, { - "epoch": 0.212372304199773, + "epoch": 0.21207741789226103, "grad_norm": 0.0, - "learning_rate": 1.830606954174926e-05, - "loss": 0.9596, + "learning_rate": 1.8311188930347873e-05, + "loss": 0.9933, "step": 7484 }, { - "epoch": 0.21240068104426787, + "epoch": 0.2121057553345235, "grad_norm": 0.0, - "learning_rate": 1.830555770965797e-05, - "loss": 1.185, + "learning_rate": 1.831067851754058e-05, + "loss": 0.9152, "step": 7485 }, { - "epoch": 0.21242905788876276, + "epoch": 0.21213409277678597, "grad_norm": 0.0, - "learning_rate": 1.8305045807409266e-05, - "loss": 0.9902, + "learning_rate": 1.831016803472936e-05, + "loss": 1.1488, "step": 7486 }, { - "epoch": 0.21245743473325765, + "epoch": 0.21216243021904843, "grad_norm": 0.0, - "learning_rate": 1.8304533835007466e-05, - "loss": 1.082, + "learning_rate": 1.830965748191851e-05, + "loss": 0.9602, "step": 7487 }, { - "epoch": 0.21248581157775256, + "epoch": 0.2121907676613109, "grad_norm": 0.0, - "learning_rate": 1.8304021792456894e-05, - "loss": 1.1565, + "learning_rate": 1.8309146859112328e-05, + "loss": 1.0173, "step": 7488 }, { - "epoch": 0.21251418842224745, + "epoch": 0.21221910510357336, "grad_norm": 0.0, - "learning_rate": 1.8303509679761877e-05, - "loss": 0.9453, + "learning_rate": 1.8308636166315114e-05, + "loss": 1.0041, "step": 7489 }, { - "epoch": 0.21254256526674234, + "epoch": 0.2122474425458358, "grad_norm": 0.0, - "learning_rate": 1.830299749692674e-05, - "loss": 1.0139, + "learning_rate": 1.8308125403531175e-05, + "loss": 1.0789, "step": 7490 }, { - "epoch": 0.21257094211123723, + "epoch": 0.21227577998809827, "grad_norm": 0.0, - "learning_rate": 1.830248524395581e-05, - "loss": 0.9254, + "learning_rate": 1.8307614570764806e-05, + "loss": 0.9185, "step": 7491 }, { - "epoch": 0.21259931895573211, + "epoch": 0.21230411743036073, "grad_norm": 0.0, - "learning_rate": 1.8301972920853416e-05, - "loss": 1.0298, + "learning_rate": 1.8307103668020318e-05, + "loss": 0.9931, "step": 7492 }, { - "epoch": 0.212627695800227, + "epoch": 0.2123324548726232, "grad_norm": 0.0, - "learning_rate": 1.830146052762388e-05, - "loss": 0.9833, + "learning_rate": 1.830659269530201e-05, + "loss": 0.9483, "step": 7493 }, { - "epoch": 0.21265607264472192, + "epoch": 0.21236079231488567, "grad_norm": 0.0, - "learning_rate": 1.8300948064271536e-05, - "loss": 1.093, + "learning_rate": 1.8306081652614192e-05, + "loss": 0.9184, "step": 7494 }, { - "epoch": 0.2126844494892168, + "epoch": 0.21238912975714813, "grad_norm": 0.0, - "learning_rate": 1.8300435530800712e-05, - "loss": 0.925, + "learning_rate": 1.830557053996116e-05, + "loss": 0.9761, "step": 7495 }, { - "epoch": 0.2127128263337117, + "epoch": 0.21241746719941057, "grad_norm": 0.0, - "learning_rate": 1.8299922927215733e-05, - "loss": 0.9834, + "learning_rate": 1.8305059357347222e-05, + "loss": 0.9017, "step": 7496 }, { - "epoch": 0.21274120317820658, + "epoch": 0.21244580464167304, "grad_norm": 0.0, - "learning_rate": 1.8299410253520932e-05, - "loss": 1.0134, + "learning_rate": 1.8304548104776687e-05, + "loss": 1.0192, "step": 7497 }, { - "epoch": 0.21276958002270147, + "epoch": 0.2124741420839355, "grad_norm": 0.0, - "learning_rate": 1.829889750972064e-05, - "loss": 0.8639, + "learning_rate": 1.8304036782253858e-05, + "loss": 1.0472, "step": 7498 }, { - "epoch": 0.21279795686719638, + "epoch": 0.21250247952619797, "grad_norm": 0.0, - "learning_rate": 1.829838469581919e-05, - "loss": 0.9184, + "learning_rate": 1.8303525389783045e-05, + "loss": 1.0535, "step": 7499 }, { - "epoch": 0.21282633371169127, + "epoch": 0.21253081696846043, "grad_norm": 0.0, - "learning_rate": 1.8297871811820907e-05, - "loss": 0.9333, + "learning_rate": 1.830301392736855e-05, + "loss": 0.9445, "step": 7500 }, { - "epoch": 0.21285471055618616, + "epoch": 0.2125591544107229, "grad_norm": 0.0, - "learning_rate": 1.8297358857730134e-05, - "loss": 0.9309, + "learning_rate": 1.830250239501469e-05, + "loss": 1.0567, "step": 7501 }, { - "epoch": 0.21288308740068104, + "epoch": 0.21258749185298534, "grad_norm": 0.0, - "learning_rate": 1.8296845833551192e-05, - "loss": 1.1852, + "learning_rate": 1.830199079272577e-05, + "loss": 1.038, "step": 7502 }, { - "epoch": 0.21291146424517593, + "epoch": 0.2126158292952478, "grad_norm": 0.0, - "learning_rate": 1.8296332739288422e-05, - "loss": 1.0369, + "learning_rate": 1.8301479120506097e-05, + "loss": 0.9789, "step": 7503 }, { - "epoch": 0.21293984108967082, + "epoch": 0.21264416673751027, "grad_norm": 0.0, - "learning_rate": 1.8295819574946158e-05, - "loss": 1.0132, + "learning_rate": 1.830096737835998e-05, + "loss": 1.0729, "step": 7504 }, { - "epoch": 0.21296821793416573, + "epoch": 0.21267250417977274, "grad_norm": 0.0, - "learning_rate": 1.8295306340528733e-05, - "loss": 0.9572, + "learning_rate": 1.8300455566291736e-05, + "loss": 1.0117, "step": 7505 }, { - "epoch": 0.21299659477866062, + "epoch": 0.2127008416220352, "grad_norm": 0.0, - "learning_rate": 1.829479303604048e-05, - "loss": 1.0179, + "learning_rate": 1.8299943684305672e-05, + "loss": 1.0462, "step": 7506 }, { - "epoch": 0.2130249716231555, + "epoch": 0.21272917906429767, "grad_norm": 0.0, - "learning_rate": 1.829427966148574e-05, - "loss": 1.0238, + "learning_rate": 1.8299431732406097e-05, + "loss": 1.026, "step": 7507 }, { - "epoch": 0.2130533484676504, + "epoch": 0.2127575165065601, "grad_norm": 0.0, - "learning_rate": 1.8293766216868842e-05, - "loss": 0.9968, + "learning_rate": 1.8298919710597333e-05, + "loss": 1.0623, "step": 7508 }, { - "epoch": 0.21308172531214528, + "epoch": 0.21278585394882257, "grad_norm": 0.0, - "learning_rate": 1.8293252702194134e-05, - "loss": 0.8939, + "learning_rate": 1.829840761888368e-05, + "loss": 0.9028, "step": 7509 }, { - "epoch": 0.21311010215664017, + "epoch": 0.21281419139108504, "grad_norm": 0.0, - "learning_rate": 1.8292739117465945e-05, - "loss": 1.1105, + "learning_rate": 1.8297895457269462e-05, + "loss": 1.1308, "step": 7510 }, { - "epoch": 0.21313847900113508, + "epoch": 0.2128425288333475, "grad_norm": 0.0, - "learning_rate": 1.8292225462688616e-05, - "loss": 1.0125, + "learning_rate": 1.8297383225758986e-05, + "loss": 0.9728, "step": 7511 }, { - "epoch": 0.21316685584562997, + "epoch": 0.21287086627560997, "grad_norm": 0.0, - "learning_rate": 1.8291711737866484e-05, - "loss": 1.0736, + "learning_rate": 1.8296870924356575e-05, + "loss": 0.937, "step": 7512 }, { - "epoch": 0.21319523269012486, + "epoch": 0.21289920371787244, "grad_norm": 0.0, - "learning_rate": 1.8291197943003895e-05, - "loss": 1.0361, + "learning_rate": 1.8296358553066532e-05, + "loss": 1.0539, "step": 7513 }, { - "epoch": 0.21322360953461975, + "epoch": 0.21292754116013488, "grad_norm": 0.0, - "learning_rate": 1.829068407810518e-05, - "loss": 0.9844, + "learning_rate": 1.8295846111893186e-05, + "loss": 1.0457, "step": 7514 }, { - "epoch": 0.21325198637911463, + "epoch": 0.21295587860239734, "grad_norm": 0.0, - "learning_rate": 1.8290170143174685e-05, - "loss": 0.9504, + "learning_rate": 1.8295333600840847e-05, + "loss": 1.0303, "step": 7515 }, { - "epoch": 0.21328036322360955, + "epoch": 0.2129842160446598, "grad_norm": 0.0, - "learning_rate": 1.8289656138216748e-05, - "loss": 1.0349, + "learning_rate": 1.829482101991383e-05, + "loss": 0.8921, "step": 7516 }, { - "epoch": 0.21330874006810444, + "epoch": 0.21301255348692227, "grad_norm": 0.0, - "learning_rate": 1.8289142063235718e-05, - "loss": 0.9719, + "learning_rate": 1.8294308369116457e-05, + "loss": 1.0169, "step": 7517 }, { - "epoch": 0.21333711691259932, + "epoch": 0.21304089092918474, "grad_norm": 0.0, - "learning_rate": 1.828862791823593e-05, - "loss": 0.9694, + "learning_rate": 1.8293795648453043e-05, + "loss": 0.9726, "step": 7518 }, { - "epoch": 0.2133654937570942, + "epoch": 0.2130692283714472, "grad_norm": 0.0, - "learning_rate": 1.8288113703221726e-05, - "loss": 1.0527, + "learning_rate": 1.8293282857927913e-05, + "loss": 0.9293, "step": 7519 }, { - "epoch": 0.2133938706015891, + "epoch": 0.21309756581370964, "grad_norm": 0.0, - "learning_rate": 1.8287599418197457e-05, - "loss": 0.9153, + "learning_rate": 1.8292769997545376e-05, + "loss": 0.9856, "step": 7520 }, { - "epoch": 0.21342224744608398, + "epoch": 0.2131259032559721, "grad_norm": 0.0, - "learning_rate": 1.8287085063167464e-05, - "loss": 1.0159, + "learning_rate": 1.829225706730976e-05, + "loss": 0.9716, "step": 7521 }, { - "epoch": 0.2134506242905789, + "epoch": 0.21315424069823458, "grad_norm": 0.0, - "learning_rate": 1.8286570638136086e-05, - "loss": 0.8586, + "learning_rate": 1.8291744067225387e-05, + "loss": 1.0104, "step": 7522 }, { - "epoch": 0.2134790011350738, + "epoch": 0.21318257814049704, "grad_norm": 0.0, - "learning_rate": 1.8286056143107677e-05, - "loss": 0.9221, + "learning_rate": 1.8291230997296572e-05, + "loss": 0.9224, "step": 7523 }, { - "epoch": 0.21350737797956867, + "epoch": 0.2132109155827595, "grad_norm": 0.0, - "learning_rate": 1.8285541578086578e-05, - "loss": 0.8843, + "learning_rate": 1.829071785752764e-05, + "loss": 1.0703, "step": 7524 }, { - "epoch": 0.21353575482406356, + "epoch": 0.21323925302502197, "grad_norm": 0.0, - "learning_rate": 1.828502694307714e-05, - "loss": 1.071, + "learning_rate": 1.829020464792291e-05, + "loss": 0.905, "step": 7525 }, { - "epoch": 0.21356413166855845, + "epoch": 0.2132675904672844, "grad_norm": 0.0, - "learning_rate": 1.8284512238083706e-05, - "loss": 0.974, + "learning_rate": 1.828969136848671e-05, + "loss": 1.0144, "step": 7526 }, { - "epoch": 0.21359250851305334, + "epoch": 0.21329592790954688, "grad_norm": 0.0, - "learning_rate": 1.828399746311062e-05, - "loss": 0.9772, + "learning_rate": 1.8289178019223363e-05, + "loss": 0.9595, "step": 7527 }, { - "epoch": 0.21362088535754825, + "epoch": 0.21332426535180934, "grad_norm": 0.0, - "learning_rate": 1.828348261816224e-05, - "loss": 0.9535, + "learning_rate": 1.8288664600137187e-05, + "loss": 1.0121, "step": 7528 }, { - "epoch": 0.21364926220204314, + "epoch": 0.2133526027940718, "grad_norm": 0.0, - "learning_rate": 1.8282967703242905e-05, - "loss": 0.8447, + "learning_rate": 1.8288151111232514e-05, + "loss": 0.852, "step": 7529 }, { - "epoch": 0.21367763904653803, + "epoch": 0.21338094023633428, "grad_norm": 0.0, - "learning_rate": 1.8282452718356976e-05, - "loss": 1.0707, + "learning_rate": 1.8287637552513668e-05, + "loss": 0.9276, "step": 7530 }, { - "epoch": 0.2137060158910329, + "epoch": 0.21340927767859674, "grad_norm": 0.0, - "learning_rate": 1.8281937663508792e-05, - "loss": 1.0317, + "learning_rate": 1.828712392398497e-05, + "loss": 1.0327, "step": 7531 }, { - "epoch": 0.2137343927355278, + "epoch": 0.21343761512085918, "grad_norm": 0.0, - "learning_rate": 1.828142253870271e-05, - "loss": 1.076, + "learning_rate": 1.8286610225650752e-05, + "loss": 1.0549, "step": 7532 }, { - "epoch": 0.2137627695800227, + "epoch": 0.21346595256312165, "grad_norm": 0.0, - "learning_rate": 1.8280907343943077e-05, - "loss": 0.9007, + "learning_rate": 1.8286096457515336e-05, + "loss": 0.9188, "step": 7533 }, { - "epoch": 0.2137911464245176, + "epoch": 0.2134942900053841, "grad_norm": 0.0, - "learning_rate": 1.828039207923425e-05, - "loss": 0.9906, + "learning_rate": 1.8285582619583056e-05, + "loss": 0.9878, "step": 7534 }, { - "epoch": 0.2138195232690125, + "epoch": 0.21352262744764658, "grad_norm": 0.0, - "learning_rate": 1.8279876744580577e-05, - "loss": 1.0619, + "learning_rate": 1.8285068711858237e-05, + "loss": 0.9358, "step": 7535 }, { - "epoch": 0.21384790011350738, + "epoch": 0.21355096488990905, "grad_norm": 0.0, - "learning_rate": 1.8279361339986414e-05, - "loss": 1.1074, + "learning_rate": 1.8284554734345204e-05, + "loss": 1.0164, "step": 7536 }, { - "epoch": 0.21387627695800226, + "epoch": 0.2135793023321715, "grad_norm": 0.0, - "learning_rate": 1.8278845865456115e-05, - "loss": 0.9841, + "learning_rate": 1.828404068704829e-05, + "loss": 1.2352, "step": 7537 }, { - "epoch": 0.21390465380249715, + "epoch": 0.21360763977443395, "grad_norm": 0.0, - "learning_rate": 1.8278330320994035e-05, - "loss": 0.9232, + "learning_rate": 1.8283526569971828e-05, + "loss": 1.0633, "step": 7538 }, { - "epoch": 0.21393303064699207, + "epoch": 0.21363597721669642, "grad_norm": 0.0, - "learning_rate": 1.827781470660452e-05, - "loss": 1.0953, + "learning_rate": 1.8283012383120148e-05, + "loss": 1.0102, "step": 7539 }, { - "epoch": 0.21396140749148695, + "epoch": 0.21366431465895888, "grad_norm": 0.0, - "learning_rate": 1.8277299022291935e-05, - "loss": 1.0913, + "learning_rate": 1.8282498126497575e-05, + "loss": 1.0306, "step": 7540 }, { - "epoch": 0.21398978433598184, + "epoch": 0.21369265210122135, "grad_norm": 0.0, - "learning_rate": 1.8276783268060633e-05, - "loss": 1.0456, + "learning_rate": 1.8281983800108446e-05, + "loss": 1.0609, "step": 7541 }, { - "epoch": 0.21401816118047673, + "epoch": 0.2137209895434838, "grad_norm": 0.0, - "learning_rate": 1.827626744391497e-05, - "loss": 0.9163, + "learning_rate": 1.828146940395709e-05, + "loss": 0.9702, "step": 7542 }, { - "epoch": 0.21404653802497162, + "epoch": 0.21374932698574628, "grad_norm": 0.0, - "learning_rate": 1.8275751549859306e-05, - "loss": 0.906, + "learning_rate": 1.8280954938047844e-05, + "loss": 1.0678, "step": 7543 }, { - "epoch": 0.2140749148694665, + "epoch": 0.21377766442800872, "grad_norm": 0.0, - "learning_rate": 1.8275235585897995e-05, - "loss": 0.9531, + "learning_rate": 1.828044040238504e-05, + "loss": 1.0107, "step": 7544 }, { - "epoch": 0.21410329171396142, + "epoch": 0.21380600187027118, "grad_norm": 0.0, - "learning_rate": 1.82747195520354e-05, - "loss": 0.9955, + "learning_rate": 1.827992579697301e-05, + "loss": 1.0427, "step": 7545 }, { - "epoch": 0.2141316685584563, + "epoch": 0.21383433931253365, "grad_norm": 0.0, - "learning_rate": 1.827420344827587e-05, - "loss": 0.9641, + "learning_rate": 1.827941112181609e-05, + "loss": 1.1002, "step": 7546 }, { - "epoch": 0.2141600454029512, + "epoch": 0.21386267675479612, "grad_norm": 0.0, - "learning_rate": 1.827368727462378e-05, - "loss": 1.0737, + "learning_rate": 1.8278896376918617e-05, + "loss": 1.052, "step": 7547 }, { - "epoch": 0.21418842224744608, + "epoch": 0.21389101419705858, "grad_norm": 0.0, - "learning_rate": 1.8273171031083477e-05, - "loss": 0.9352, + "learning_rate": 1.827838156228493e-05, + "loss": 0.9009, "step": 7548 }, { - "epoch": 0.21421679909194097, + "epoch": 0.21391935163932105, "grad_norm": 0.0, - "learning_rate": 1.8272654717659327e-05, - "loss": 1.1259, + "learning_rate": 1.8277866677919354e-05, + "loss": 0.933, "step": 7549 }, { - "epoch": 0.21424517593643586, + "epoch": 0.2139476890815835, "grad_norm": 0.0, - "learning_rate": 1.827213833435569e-05, - "loss": 0.9319, + "learning_rate": 1.8277351723826237e-05, + "loss": 0.9983, "step": 7550 }, { - "epoch": 0.21427355278093077, + "epoch": 0.21397602652384595, "grad_norm": 0.0, - "learning_rate": 1.827162188117693e-05, - "loss": 0.9369, + "learning_rate": 1.8276836700009908e-05, + "loss": 0.8573, "step": 7551 }, { - "epoch": 0.21430192962542566, + "epoch": 0.21400436396610842, "grad_norm": 0.0, - "learning_rate": 1.827110535812741e-05, - "loss": 0.9954, + "learning_rate": 1.8276321606474713e-05, + "loss": 1.0485, "step": 7552 }, { - "epoch": 0.21433030646992055, + "epoch": 0.21403270140837088, "grad_norm": 0.0, - "learning_rate": 1.8270588765211487e-05, - "loss": 1.037, + "learning_rate": 1.8275806443224987e-05, + "loss": 0.9672, "step": 7553 }, { - "epoch": 0.21435868331441543, + "epoch": 0.21406103885063335, "grad_norm": 0.0, - "learning_rate": 1.827007210243353e-05, - "loss": 0.8869, + "learning_rate": 1.827529121026507e-05, + "loss": 1.0991, "step": 7554 }, { - "epoch": 0.21438706015891032, + "epoch": 0.21408937629289582, "grad_norm": 0.0, - "learning_rate": 1.82695553697979e-05, - "loss": 1.0845, + "learning_rate": 1.8274775907599304e-05, + "loss": 0.9799, "step": 7555 }, { - "epoch": 0.21441543700340523, + "epoch": 0.21411771373515825, "grad_norm": 0.0, - "learning_rate": 1.8269038567308968e-05, - "loss": 1.011, + "learning_rate": 1.8274260535232028e-05, + "loss": 0.892, "step": 7556 }, { - "epoch": 0.21444381384790012, + "epoch": 0.21414605117742072, "grad_norm": 0.0, - "learning_rate": 1.8268521694971096e-05, - "loss": 1.0132, + "learning_rate": 1.827374509316758e-05, + "loss": 1.0403, "step": 7557 }, { - "epoch": 0.214472190692395, + "epoch": 0.2141743886196832, "grad_norm": 0.0, - "learning_rate": 1.8268004752788646e-05, - "loss": 1.052, + "learning_rate": 1.827322958141031e-05, + "loss": 0.9053, "step": 7558 }, { - "epoch": 0.2145005675368899, + "epoch": 0.21420272606194565, "grad_norm": 0.0, - "learning_rate": 1.826748774076599e-05, - "loss": 0.9739, + "learning_rate": 1.8272713999964547e-05, + "loss": 0.9575, "step": 7559 }, { - "epoch": 0.21452894438138478, + "epoch": 0.21423106350420812, "grad_norm": 0.0, - "learning_rate": 1.8266970658907493e-05, - "loss": 1.0864, + "learning_rate": 1.8272198348834648e-05, + "loss": 0.9936, "step": 7560 }, { - "epoch": 0.21455732122587967, + "epoch": 0.21425940094647059, "grad_norm": 0.0, - "learning_rate": 1.826645350721753e-05, - "loss": 1.0158, + "learning_rate": 1.827168262802495e-05, + "loss": 1.0251, "step": 7561 }, { - "epoch": 0.2145856980703746, + "epoch": 0.21428773838873302, "grad_norm": 0.0, - "learning_rate": 1.826593628570045e-05, - "loss": 1.0988, + "learning_rate": 1.8271166837539794e-05, + "loss": 1.0261, "step": 7562 }, { - "epoch": 0.21461407491486947, + "epoch": 0.2143160758309955, "grad_norm": 0.0, - "learning_rate": 1.8265418994360643e-05, - "loss": 1.1136, + "learning_rate": 1.8270650977383533e-05, + "loss": 0.967, "step": 7563 }, { - "epoch": 0.21464245175936436, + "epoch": 0.21434441327325796, "grad_norm": 0.0, - "learning_rate": 1.826490163320247e-05, - "loss": 0.9208, + "learning_rate": 1.8270135047560506e-05, + "loss": 0.988, "step": 7564 }, { - "epoch": 0.21467082860385925, + "epoch": 0.21437275071552042, "grad_norm": 0.0, - "learning_rate": 1.82643842022303e-05, - "loss": 1.033, + "learning_rate": 1.8269619048075056e-05, + "loss": 0.942, "step": 7565 }, { - "epoch": 0.21469920544835414, + "epoch": 0.2144010881577829, "grad_norm": 0.0, - "learning_rate": 1.8263866701448502e-05, - "loss": 0.9415, + "learning_rate": 1.8269102978931542e-05, + "loss": 0.9903, "step": 7566 }, { - "epoch": 0.21472758229284902, + "epoch": 0.21442942560004535, "grad_norm": 0.0, - "learning_rate": 1.8263349130861453e-05, - "loss": 0.9735, + "learning_rate": 1.8268586840134296e-05, + "loss": 1.1516, "step": 7567 }, { - "epoch": 0.21475595913734394, + "epoch": 0.2144577630423078, "grad_norm": 0.0, - "learning_rate": 1.8262831490473522e-05, - "loss": 0.9525, + "learning_rate": 1.826807063168768e-05, + "loss": 0.941, "step": 7568 }, { - "epoch": 0.21478433598183883, + "epoch": 0.21448610048457026, "grad_norm": 0.0, - "learning_rate": 1.8262313780289084e-05, - "loss": 1.0366, + "learning_rate": 1.8267554353596027e-05, + "loss": 1.05, "step": 7569 }, { - "epoch": 0.2148127128263337, + "epoch": 0.21451443792683272, "grad_norm": 0.0, - "learning_rate": 1.826179600031251e-05, - "loss": 0.982, + "learning_rate": 1.8267038005863698e-05, + "loss": 1.0173, "step": 7570 }, { - "epoch": 0.2148410896708286, + "epoch": 0.2145427753690952, "grad_norm": 0.0, - "learning_rate": 1.826127815054817e-05, - "loss": 0.9799, + "learning_rate": 1.8266521588495035e-05, + "loss": 0.9594, "step": 7571 }, { - "epoch": 0.2148694665153235, + "epoch": 0.21457111281135766, "grad_norm": 0.0, - "learning_rate": 1.8260760231000444e-05, - "loss": 1.0931, + "learning_rate": 1.8266005101494393e-05, + "loss": 0.9703, "step": 7572 }, { - "epoch": 0.21489784335981837, + "epoch": 0.21459945025362012, "grad_norm": 0.0, - "learning_rate": 1.8260242241673706e-05, - "loss": 1.0443, + "learning_rate": 1.826548854486612e-05, + "loss": 0.9967, "step": 7573 }, { - "epoch": 0.2149262202043133, + "epoch": 0.21462778769588256, "grad_norm": 0.0, - "learning_rate": 1.825972418257233e-05, - "loss": 0.9308, + "learning_rate": 1.826497191861457e-05, + "loss": 0.9911, "step": 7574 }, { - "epoch": 0.21495459704880818, + "epoch": 0.21465612513814503, "grad_norm": 0.0, - "learning_rate": 1.8259206053700692e-05, - "loss": 1.1551, + "learning_rate": 1.826445522274409e-05, + "loss": 0.9897, "step": 7575 }, { - "epoch": 0.21498297389330306, + "epoch": 0.2146844625804075, "grad_norm": 0.0, - "learning_rate": 1.825868785506317e-05, - "loss": 0.9303, + "learning_rate": 1.8263938457259038e-05, + "loss": 1.0613, "step": 7576 }, { - "epoch": 0.21501135073779795, + "epoch": 0.21471280002266996, "grad_norm": 0.0, - "learning_rate": 1.825816958666414e-05, - "loss": 0.905, + "learning_rate": 1.8263421622163758e-05, + "loss": 1.0057, "step": 7577 }, { - "epoch": 0.21503972758229284, + "epoch": 0.21474113746493242, "grad_norm": 0.0, - "learning_rate": 1.825765124850798e-05, - "loss": 0.9711, + "learning_rate": 1.826290471746261e-05, + "loss": 1.0489, "step": 7578 }, { - "epoch": 0.21506810442678775, + "epoch": 0.21476947490719486, "grad_norm": 0.0, - "learning_rate": 1.825713284059907e-05, - "loss": 0.9771, + "learning_rate": 1.826238774315995e-05, + "loss": 0.9601, "step": 7579 }, { - "epoch": 0.21509648127128264, + "epoch": 0.21479781234945733, "grad_norm": 0.0, - "learning_rate": 1.8256614362941785e-05, - "loss": 0.906, + "learning_rate": 1.8261870699260128e-05, + "loss": 0.958, "step": 7580 }, { - "epoch": 0.21512485811577753, + "epoch": 0.2148261497917198, "grad_norm": 0.0, - "learning_rate": 1.8256095815540512e-05, - "loss": 1.0811, + "learning_rate": 1.8261353585767504e-05, + "loss": 1.0914, "step": 7581 }, { - "epoch": 0.21515323496027242, + "epoch": 0.21485448723398226, "grad_norm": 0.0, - "learning_rate": 1.8255577198399625e-05, - "loss": 1.038, + "learning_rate": 1.8260836402686427e-05, + "loss": 0.9875, "step": 7582 }, { - "epoch": 0.2151816118047673, + "epoch": 0.21488282467624473, "grad_norm": 0.0, - "learning_rate": 1.82550585115235e-05, - "loss": 1.1177, + "learning_rate": 1.8260319150021262e-05, + "loss": 0.9876, "step": 7583 }, { - "epoch": 0.2152099886492622, + "epoch": 0.2149111621185072, "grad_norm": 0.0, - "learning_rate": 1.825453975491653e-05, - "loss": 1.0051, + "learning_rate": 1.8259801827776358e-05, + "loss": 1.0512, "step": 7584 }, { - "epoch": 0.2152383654937571, + "epoch": 0.21493949956076963, "grad_norm": 0.0, - "learning_rate": 1.825402092858309e-05, - "loss": 1.0513, + "learning_rate": 1.8259284435956077e-05, + "loss": 0.9783, "step": 7585 }, { - "epoch": 0.215266742338252, + "epoch": 0.2149678370030321, "grad_norm": 0.0, - "learning_rate": 1.825350203252757e-05, - "loss": 1.0151, + "learning_rate": 1.8258766974564778e-05, + "loss": 1.0018, "step": 7586 }, { - "epoch": 0.21529511918274688, + "epoch": 0.21499617444529456, "grad_norm": 0.0, - "learning_rate": 1.825298306675434e-05, - "loss": 1.1118, + "learning_rate": 1.8258249443606813e-05, + "loss": 1.0005, "step": 7587 }, { - "epoch": 0.21532349602724177, + "epoch": 0.21502451188755703, "grad_norm": 0.0, - "learning_rate": 1.8252464031267794e-05, - "loss": 1.1037, + "learning_rate": 1.825773184308655e-05, + "loss": 0.9908, "step": 7588 }, { - "epoch": 0.21535187287173665, + "epoch": 0.2150528493298195, "grad_norm": 0.0, - "learning_rate": 1.8251944926072313e-05, - "loss": 0.8811, + "learning_rate": 1.8257214173008347e-05, + "loss": 1.0349, "step": 7589 }, { - "epoch": 0.21538024971623154, + "epoch": 0.21508118677208196, "grad_norm": 0.0, - "learning_rate": 1.825142575117228e-05, - "loss": 1.0029, + "learning_rate": 1.8256696433376557e-05, + "loss": 0.9329, "step": 7590 }, { - "epoch": 0.21540862656072646, + "epoch": 0.2151095242143444, "grad_norm": 0.0, - "learning_rate": 1.8250906506572087e-05, - "loss": 0.9814, + "learning_rate": 1.825617862419555e-05, + "loss": 1.0508, "step": 7591 }, { - "epoch": 0.21543700340522134, + "epoch": 0.21513786165660687, "grad_norm": 0.0, - "learning_rate": 1.8250387192276115e-05, - "loss": 1.0792, + "learning_rate": 1.8255660745469685e-05, + "loss": 0.9225, "step": 7592 }, { - "epoch": 0.21546538024971623, + "epoch": 0.21516619909886933, "grad_norm": 0.0, - "learning_rate": 1.824986780828875e-05, - "loss": 0.9471, + "learning_rate": 1.8255142797203326e-05, + "loss": 0.9323, "step": 7593 }, { - "epoch": 0.21549375709421112, + "epoch": 0.2151945365411318, "grad_norm": 0.0, - "learning_rate": 1.824934835461438e-05, - "loss": 1.0402, + "learning_rate": 1.8254624779400828e-05, + "loss": 0.8434, "step": 7594 }, { - "epoch": 0.215522133938706, + "epoch": 0.21522287398339426, "grad_norm": 0.0, - "learning_rate": 1.8248828831257396e-05, - "loss": 0.9873, + "learning_rate": 1.8254106692066567e-05, + "loss": 0.9993, "step": 7595 }, { - "epoch": 0.21555051078320092, + "epoch": 0.21525121142565673, "grad_norm": 0.0, - "learning_rate": 1.8248309238222183e-05, - "loss": 0.9774, + "learning_rate": 1.8253588535204894e-05, + "loss": 1.07, "step": 7596 }, { - "epoch": 0.2155788876276958, + "epoch": 0.21527954886791917, "grad_norm": 0.0, - "learning_rate": 1.824778957551313e-05, - "loss": 1.0164, + "learning_rate": 1.8253070308820184e-05, + "loss": 1.0101, "step": 7597 }, { - "epoch": 0.2156072644721907, + "epoch": 0.21530788631018163, "grad_norm": 0.0, - "learning_rate": 1.824726984313463e-05, - "loss": 1.039, + "learning_rate": 1.82525520129168e-05, + "loss": 1.0695, "step": 7598 }, { - "epoch": 0.21563564131668558, + "epoch": 0.2153362237524441, "grad_norm": 0.0, - "learning_rate": 1.824675004109107e-05, - "loss": 1.064, + "learning_rate": 1.82520336474991e-05, + "loss": 1.0101, "step": 7599 }, { - "epoch": 0.21566401816118047, + "epoch": 0.21536456119470657, "grad_norm": 0.0, - "learning_rate": 1.824623016938684e-05, - "loss": 0.9111, + "learning_rate": 1.8251515212571457e-05, + "loss": 0.9377, "step": 7600 }, { - "epoch": 0.21569239500567536, + "epoch": 0.21539289863696903, "grad_norm": 0.0, - "learning_rate": 1.8245710228026336e-05, - "loss": 0.9532, + "learning_rate": 1.825099670813824e-05, + "loss": 0.8748, "step": 7601 }, { - "epoch": 0.21572077185017027, + "epoch": 0.2154212360792315, "grad_norm": 0.0, - "learning_rate": 1.8245190217013943e-05, - "loss": 0.8456, + "learning_rate": 1.8250478134203816e-05, + "loss": 0.9289, "step": 7602 }, { - "epoch": 0.21574914869466516, + "epoch": 0.21544957352149394, "grad_norm": 0.0, - "learning_rate": 1.8244670136354062e-05, - "loss": 0.9973, + "learning_rate": 1.8249959490772547e-05, + "loss": 1.0431, "step": 7603 }, { - "epoch": 0.21577752553916005, + "epoch": 0.2154779109637564, "grad_norm": 0.0, - "learning_rate": 1.8244149986051075e-05, - "loss": 0.9921, + "learning_rate": 1.8249440777848805e-05, + "loss": 1.1104, "step": 7604 }, { - "epoch": 0.21580590238365494, + "epoch": 0.21550624840601887, "grad_norm": 0.0, - "learning_rate": 1.8243629766109393e-05, - "loss": 0.9565, + "learning_rate": 1.824892199543696e-05, + "loss": 1.0036, "step": 7605 }, { - "epoch": 0.21583427922814982, + "epoch": 0.21553458584828133, "grad_norm": 0.0, - "learning_rate": 1.824310947653339e-05, - "loss": 0.8923, + "learning_rate": 1.8248403143541386e-05, + "loss": 0.992, "step": 7606 }, { - "epoch": 0.2158626560726447, + "epoch": 0.2155629232905438, "grad_norm": 0.0, - "learning_rate": 1.8242589117327477e-05, - "loss": 1.1177, + "learning_rate": 1.8247884222166447e-05, + "loss": 1.0168, "step": 7607 }, { - "epoch": 0.21589103291713962, + "epoch": 0.21559126073280627, "grad_norm": 0.0, - "learning_rate": 1.824206868849604e-05, - "loss": 1.0024, + "learning_rate": 1.8247365231316517e-05, + "loss": 1.0245, "step": 7608 }, { - "epoch": 0.2159194097616345, + "epoch": 0.2156195981750687, "grad_norm": 0.0, - "learning_rate": 1.8241548190043476e-05, - "loss": 1.0139, + "learning_rate": 1.8246846170995964e-05, + "loss": 1.0228, "step": 7609 }, { - "epoch": 0.2159477866061294, + "epoch": 0.21564793561733117, "grad_norm": 0.0, - "learning_rate": 1.824102762197419e-05, - "loss": 0.961, + "learning_rate": 1.8246327041209165e-05, + "loss": 0.93, "step": 7610 }, { - "epoch": 0.2159761634506243, + "epoch": 0.21567627305959364, "grad_norm": 0.0, - "learning_rate": 1.824050698429257e-05, - "loss": 1.0372, + "learning_rate": 1.8245807841960494e-05, + "loss": 1.0421, "step": 7611 }, { - "epoch": 0.21600454029511917, + "epoch": 0.2157046105018561, "grad_norm": 0.0, - "learning_rate": 1.8239986277003016e-05, - "loss": 0.9476, + "learning_rate": 1.824528857325432e-05, + "loss": 1.0973, "step": 7612 }, { - "epoch": 0.21603291713961406, + "epoch": 0.21573294794411857, "grad_norm": 0.0, - "learning_rate": 1.823946550010993e-05, - "loss": 1.0321, + "learning_rate": 1.8244769235095018e-05, + "loss": 0.9381, "step": 7613 }, { - "epoch": 0.21606129398410898, + "epoch": 0.21576128538638104, "grad_norm": 0.0, - "learning_rate": 1.8238944653617706e-05, - "loss": 0.9907, + "learning_rate": 1.8244249827486962e-05, + "loss": 1.0167, "step": 7614 }, { - "epoch": 0.21608967082860386, + "epoch": 0.21578962282864347, "grad_norm": 0.0, - "learning_rate": 1.8238423737530748e-05, - "loss": 0.9511, + "learning_rate": 1.8243730350434527e-05, + "loss": 1.0949, "step": 7615 }, { - "epoch": 0.21611804767309875, + "epoch": 0.21581796027090594, "grad_norm": 0.0, - "learning_rate": 1.8237902751853453e-05, - "loss": 0.936, + "learning_rate": 1.8243210803942097e-05, + "loss": 1.0285, "step": 7616 }, { - "epoch": 0.21614642451759364, + "epoch": 0.2158462977131684, "grad_norm": 0.0, - "learning_rate": 1.8237381696590227e-05, - "loss": 0.9702, + "learning_rate": 1.8242691188014032e-05, + "loss": 1.0537, "step": 7617 }, { - "epoch": 0.21617480136208853, + "epoch": 0.21587463515543087, "grad_norm": 0.0, - "learning_rate": 1.8236860571745463e-05, - "loss": 1.0991, + "learning_rate": 1.8242171502654725e-05, + "loss": 1.0085, "step": 7618 }, { - "epoch": 0.21620317820658344, + "epoch": 0.21590297259769334, "grad_norm": 0.0, - "learning_rate": 1.823633937732357e-05, - "loss": 0.9526, + "learning_rate": 1.824165174786854e-05, + "loss": 1.0078, "step": 7619 }, { - "epoch": 0.21623155505107833, + "epoch": 0.2159313100399558, "grad_norm": 0.0, - "learning_rate": 1.8235818113328944e-05, - "loss": 1.0938, + "learning_rate": 1.824113192365987e-05, + "loss": 1.0887, "step": 7620 }, { - "epoch": 0.21625993189557322, + "epoch": 0.21595964748221824, "grad_norm": 0.0, - "learning_rate": 1.8235296779765996e-05, - "loss": 0.9856, + "learning_rate": 1.8240612030033084e-05, + "loss": 1.002, "step": 7621 }, { - "epoch": 0.2162883087400681, + "epoch": 0.2159879849244807, "grad_norm": 0.0, - "learning_rate": 1.8234775376639128e-05, - "loss": 1.0559, + "learning_rate": 1.8240092066992557e-05, + "loss": 1.0049, "step": 7622 }, { - "epoch": 0.216316685584563, + "epoch": 0.21601632236674317, "grad_norm": 0.0, - "learning_rate": 1.8234253903952735e-05, - "loss": 0.9848, + "learning_rate": 1.8239572034542682e-05, + "loss": 0.9979, "step": 7623 }, { - "epoch": 0.21634506242905788, + "epoch": 0.21604465980900564, "grad_norm": 0.0, - "learning_rate": 1.8233732361711233e-05, - "loss": 0.9884, + "learning_rate": 1.8239051932687828e-05, + "loss": 1.0731, "step": 7624 }, { - "epoch": 0.2163734392735528, + "epoch": 0.2160729972512681, "grad_norm": 0.0, - "learning_rate": 1.8233210749919025e-05, - "loss": 0.9264, + "learning_rate": 1.823853176143238e-05, + "loss": 0.9778, "step": 7625 }, { - "epoch": 0.21640181611804768, + "epoch": 0.21610133469353057, "grad_norm": 0.0, - "learning_rate": 1.8232689068580516e-05, - "loss": 0.9565, + "learning_rate": 1.8238011520780722e-05, + "loss": 0.9714, "step": 7626 }, { - "epoch": 0.21643019296254257, + "epoch": 0.216129672135793, "grad_norm": 0.0, - "learning_rate": 1.8232167317700108e-05, - "loss": 0.998, + "learning_rate": 1.823749121073723e-05, + "loss": 0.9631, "step": 7627 }, { - "epoch": 0.21645856980703745, + "epoch": 0.21615800957805548, "grad_norm": 0.0, - "learning_rate": 1.8231645497282218e-05, - "loss": 1.0472, + "learning_rate": 1.8236970831306293e-05, + "loss": 0.9383, "step": 7628 }, { - "epoch": 0.21648694665153234, + "epoch": 0.21618634702031794, "grad_norm": 0.0, - "learning_rate": 1.8231123607331245e-05, - "loss": 0.9695, + "learning_rate": 1.8236450382492293e-05, + "loss": 0.9769, "step": 7629 }, { - "epoch": 0.21651532349602723, + "epoch": 0.2162146844625804, "grad_norm": 0.0, - "learning_rate": 1.8230601647851602e-05, - "loss": 1.0414, + "learning_rate": 1.823592986429961e-05, + "loss": 0.8668, "step": 7630 }, { - "epoch": 0.21654370034052214, + "epoch": 0.21624302190484287, "grad_norm": 0.0, - "learning_rate": 1.82300796188477e-05, - "loss": 1.0346, + "learning_rate": 1.8235409276732633e-05, + "loss": 1.0272, "step": 7631 }, { - "epoch": 0.21657207718501703, + "epoch": 0.21627135934710534, "grad_norm": 0.0, - "learning_rate": 1.8229557520323942e-05, - "loss": 1.037, + "learning_rate": 1.8234888619795747e-05, + "loss": 0.9883, "step": 7632 }, { - "epoch": 0.21660045402951192, + "epoch": 0.21629969678936778, "grad_norm": 0.0, - "learning_rate": 1.822903535228474e-05, - "loss": 0.9832, + "learning_rate": 1.8234367893493334e-05, + "loss": 1.0593, "step": 7633 }, { - "epoch": 0.2166288308740068, + "epoch": 0.21632803423163025, "grad_norm": 0.0, - "learning_rate": 1.8228513114734508e-05, - "loss": 1.0255, + "learning_rate": 1.823384709782978e-05, + "loss": 1.0166, "step": 7634 }, { - "epoch": 0.2166572077185017, + "epoch": 0.2163563716738927, "grad_norm": 0.0, - "learning_rate": 1.8227990807677657e-05, - "loss": 1.0191, + "learning_rate": 1.823332623280948e-05, + "loss": 0.9371, "step": 7635 }, { - "epoch": 0.2166855845629966, + "epoch": 0.21638470911615518, "grad_norm": 0.0, - "learning_rate": 1.8227468431118595e-05, - "loss": 0.9415, + "learning_rate": 1.8232805298436815e-05, + "loss": 1.0087, "step": 7636 }, { - "epoch": 0.2167139614074915, + "epoch": 0.21641304655841764, "grad_norm": 0.0, - "learning_rate": 1.8226945985061743e-05, - "loss": 1.0407, + "learning_rate": 1.823228429471617e-05, + "loss": 0.9344, "step": 7637 }, { - "epoch": 0.21674233825198638, + "epoch": 0.2164413840006801, "grad_norm": 0.0, - "learning_rate": 1.8226423469511503e-05, - "loss": 1.028, + "learning_rate": 1.823176322165194e-05, + "loss": 1.0602, "step": 7638 }, { - "epoch": 0.21677071509648127, + "epoch": 0.21646972144294255, "grad_norm": 0.0, - "learning_rate": 1.8225900884472296e-05, - "loss": 1.0008, + "learning_rate": 1.8231242079248512e-05, + "loss": 0.9469, "step": 7639 }, { - "epoch": 0.21679909194097616, + "epoch": 0.216498058885205, "grad_norm": 0.0, - "learning_rate": 1.8225378229948533e-05, - "loss": 0.9998, + "learning_rate": 1.8230720867510273e-05, + "loss": 0.9845, "step": 7640 }, { - "epoch": 0.21682746878547104, + "epoch": 0.21652639632746748, "grad_norm": 0.0, - "learning_rate": 1.8224855505944634e-05, - "loss": 0.9921, + "learning_rate": 1.823019958644162e-05, + "loss": 1.0736, "step": 7641 }, { - "epoch": 0.21685584562996596, + "epoch": 0.21655473376972995, "grad_norm": 0.0, - "learning_rate": 1.8224332712465008e-05, - "loss": 1.0163, + "learning_rate": 1.8229678236046936e-05, + "loss": 1.1113, "step": 7642 }, { - "epoch": 0.21688422247446085, + "epoch": 0.2165830712119924, "grad_norm": 0.0, - "learning_rate": 1.8223809849514074e-05, - "loss": 1.0116, + "learning_rate": 1.8229156816330616e-05, + "loss": 1.0503, "step": 7643 }, { - "epoch": 0.21691259931895573, + "epoch": 0.21661140865425488, "grad_norm": 0.0, - "learning_rate": 1.8223286917096247e-05, - "loss": 0.9701, + "learning_rate": 1.8228635327297054e-05, + "loss": 0.983, "step": 7644 }, { - "epoch": 0.21694097616345062, + "epoch": 0.21663974609651732, "grad_norm": 0.0, - "learning_rate": 1.822276391521595e-05, - "loss": 1.0367, + "learning_rate": 1.822811376895064e-05, + "loss": 1.0477, "step": 7645 }, { - "epoch": 0.2169693530079455, + "epoch": 0.21666808353877978, "grad_norm": 0.0, - "learning_rate": 1.8222240843877593e-05, - "loss": 1.0648, + "learning_rate": 1.8227592141295768e-05, + "loss": 1.0392, "step": 7646 }, { - "epoch": 0.2169977298524404, + "epoch": 0.21669642098104225, "grad_norm": 0.0, - "learning_rate": 1.82217177030856e-05, - "loss": 0.9348, + "learning_rate": 1.8227070444336833e-05, + "loss": 1.0167, "step": 7647 }, { - "epoch": 0.2170261066969353, + "epoch": 0.21672475842330471, "grad_norm": 0.0, - "learning_rate": 1.8221194492844383e-05, - "loss": 1.1225, + "learning_rate": 1.822654867807823e-05, + "loss": 1.1311, "step": 7648 }, { - "epoch": 0.2170544835414302, + "epoch": 0.21675309586556718, "grad_norm": 0.0, - "learning_rate": 1.8220671213158373e-05, - "loss": 0.944, + "learning_rate": 1.8226026842524353e-05, + "loss": 1.0398, "step": 7649 }, { - "epoch": 0.21708286038592509, + "epoch": 0.21678143330782965, "grad_norm": 0.0, - "learning_rate": 1.8220147864031984e-05, - "loss": 0.9353, + "learning_rate": 1.8225504937679592e-05, + "loss": 0.9705, "step": 7650 }, { - "epoch": 0.21711123723041997, + "epoch": 0.21680977075009208, "grad_norm": 0.0, - "learning_rate": 1.8219624445469633e-05, - "loss": 1.0425, + "learning_rate": 1.8224982963548353e-05, + "loss": 0.9147, "step": 7651 }, { - "epoch": 0.21713961407491486, + "epoch": 0.21683810819235455, "grad_norm": 0.0, - "learning_rate": 1.8219100957475747e-05, - "loss": 1.032, + "learning_rate": 1.8224460920135027e-05, + "loss": 0.9839, "step": 7652 }, { - "epoch": 0.21716799091940975, + "epoch": 0.21686644563461702, "grad_norm": 0.0, - "learning_rate": 1.8218577400054744e-05, - "loss": 0.9197, + "learning_rate": 1.8223938807444014e-05, + "loss": 1.0349, "step": 7653 }, { - "epoch": 0.21719636776390466, + "epoch": 0.21689478307687948, "grad_norm": 0.0, - "learning_rate": 1.8218053773211047e-05, - "loss": 0.8071, + "learning_rate": 1.8223416625479706e-05, + "loss": 0.96, "step": 7654 }, { - "epoch": 0.21722474460839955, + "epoch": 0.21692312051914195, "grad_norm": 0.0, - "learning_rate": 1.8217530076949083e-05, - "loss": 0.9397, + "learning_rate": 1.822289437424651e-05, + "loss": 1.0301, "step": 7655 }, { - "epoch": 0.21725312145289444, + "epoch": 0.21695145796140441, "grad_norm": 0.0, - "learning_rate": 1.821700631127327e-05, - "loss": 1.0876, + "learning_rate": 1.822237205374882e-05, + "loss": 0.9446, "step": 7656 }, { - "epoch": 0.21728149829738932, + "epoch": 0.21697979540366685, "grad_norm": 0.0, - "learning_rate": 1.821648247618804e-05, - "loss": 0.9105, + "learning_rate": 1.822184966399104e-05, + "loss": 1.0027, "step": 7657 }, { - "epoch": 0.2173098751418842, + "epoch": 0.21700813284592932, "grad_norm": 0.0, - "learning_rate": 1.8215958571697808e-05, - "loss": 0.9761, + "learning_rate": 1.8221327204977564e-05, + "loss": 1.1851, "step": 7658 }, { - "epoch": 0.21733825198637913, + "epoch": 0.21703647028819179, "grad_norm": 0.0, - "learning_rate": 1.821543459780701e-05, - "loss": 1.0484, + "learning_rate": 1.8220804676712797e-05, + "loss": 1.0309, "step": 7659 }, { - "epoch": 0.21736662883087401, + "epoch": 0.21706480773045425, "grad_norm": 0.0, - "learning_rate": 1.821491055452006e-05, - "loss": 0.9368, + "learning_rate": 1.8220282079201138e-05, + "loss": 0.9838, "step": 7660 }, { - "epoch": 0.2173950056753689, + "epoch": 0.21709314517271672, "grad_norm": 0.0, - "learning_rate": 1.8214386441841396e-05, - "loss": 0.9129, + "learning_rate": 1.8219759412446992e-05, + "loss": 1.0081, "step": 7661 }, { - "epoch": 0.2174233825198638, + "epoch": 0.21712148261497918, "grad_norm": 0.0, - "learning_rate": 1.821386225977544e-05, - "loss": 0.9608, + "learning_rate": 1.821923667645476e-05, + "loss": 0.947, "step": 7662 }, { - "epoch": 0.21745175936435868, + "epoch": 0.21714982005724162, "grad_norm": 0.0, - "learning_rate": 1.8213338008326623e-05, - "loss": 0.9273, + "learning_rate": 1.8218713871228844e-05, + "loss": 1.0385, "step": 7663 }, { - "epoch": 0.21748013620885356, + "epoch": 0.2171781574995041, "grad_norm": 0.0, - "learning_rate": 1.8212813687499365e-05, - "loss": 1.095, + "learning_rate": 1.821819099677365e-05, + "loss": 0.8885, "step": 7664 }, { - "epoch": 0.21750851305334848, + "epoch": 0.21720649494176655, "grad_norm": 0.0, - "learning_rate": 1.8212289297298104e-05, - "loss": 1.06, + "learning_rate": 1.8217668053093583e-05, + "loss": 0.9212, "step": 7665 }, { - "epoch": 0.21753688989784337, + "epoch": 0.21723483238402902, "grad_norm": 0.0, - "learning_rate": 1.8211764837727263e-05, - "loss": 1.0283, + "learning_rate": 1.8217145040193043e-05, + "loss": 1.0135, "step": 7666 }, { - "epoch": 0.21756526674233825, + "epoch": 0.21726316982629149, "grad_norm": 0.0, - "learning_rate": 1.821124030879128e-05, - "loss": 0.9201, + "learning_rate": 1.821662195807644e-05, + "loss": 0.9392, "step": 7667 }, { - "epoch": 0.21759364358683314, + "epoch": 0.21729150726855395, "grad_norm": 0.0, - "learning_rate": 1.8210715710494576e-05, - "loss": 0.9227, + "learning_rate": 1.821609880674818e-05, + "loss": 1.0592, "step": 7668 }, { - "epoch": 0.21762202043132803, + "epoch": 0.2173198447108164, "grad_norm": 0.0, - "learning_rate": 1.821019104284159e-05, - "loss": 1.0414, + "learning_rate": 1.8215575586212672e-05, + "loss": 1.0957, "step": 7669 }, { - "epoch": 0.21765039727582292, + "epoch": 0.21734818215307886, "grad_norm": 0.0, - "learning_rate": 1.820966630583675e-05, - "loss": 1.0477, + "learning_rate": 1.8215052296474315e-05, + "loss": 1.0336, "step": 7670 }, { - "epoch": 0.21767877412031783, + "epoch": 0.21737651959534132, "grad_norm": 0.0, - "learning_rate": 1.820914149948449e-05, - "loss": 1.0003, + "learning_rate": 1.8214528937537523e-05, + "loss": 1.0242, "step": 7671 }, { - "epoch": 0.21770715096481272, + "epoch": 0.2174048570376038, "grad_norm": 0.0, - "learning_rate": 1.8208616623789246e-05, - "loss": 1.0158, + "learning_rate": 1.8214005509406708e-05, + "loss": 1.0616, "step": 7672 }, { - "epoch": 0.2177355278093076, + "epoch": 0.21743319447986625, "grad_norm": 0.0, - "learning_rate": 1.8208091678755444e-05, - "loss": 0.9887, + "learning_rate": 1.8213482012086268e-05, + "loss": 0.9572, "step": 7673 }, { - "epoch": 0.2177639046538025, + "epoch": 0.21746153192212872, "grad_norm": 0.0, - "learning_rate": 1.8207566664387526e-05, - "loss": 0.9406, + "learning_rate": 1.8212958445580623e-05, + "loss": 1.1776, "step": 7674 }, { - "epoch": 0.21779228149829738, + "epoch": 0.21748986936439116, "grad_norm": 0.0, - "learning_rate": 1.820704158068992e-05, - "loss": 0.9858, + "learning_rate": 1.8212434809894176e-05, + "loss": 0.9799, "step": 7675 }, { - "epoch": 0.2178206583427923, + "epoch": 0.21751820680665362, "grad_norm": 0.0, - "learning_rate": 1.820651642766707e-05, - "loss": 0.9922, + "learning_rate": 1.8211911105031344e-05, + "loss": 1.0463, "step": 7676 }, { - "epoch": 0.21784903518728718, + "epoch": 0.2175465442489161, "grad_norm": 0.0, - "learning_rate": 1.8205991205323402e-05, - "loss": 1.0831, + "learning_rate": 1.8211387330996536e-05, + "loss": 1.1146, "step": 7677 }, { - "epoch": 0.21787741203178207, + "epoch": 0.21757488169117856, "grad_norm": 0.0, - "learning_rate": 1.820546591366336e-05, - "loss": 0.8902, + "learning_rate": 1.821086348779416e-05, + "loss": 1.0505, "step": 7678 }, { - "epoch": 0.21790578887627696, + "epoch": 0.21760321913344102, "grad_norm": 0.0, - "learning_rate": 1.8204940552691375e-05, - "loss": 1.0555, + "learning_rate": 1.8210339575428632e-05, + "loss": 0.9878, "step": 7679 }, { - "epoch": 0.21793416572077184, + "epoch": 0.2176315565757035, "grad_norm": 0.0, - "learning_rate": 1.820441512241189e-05, - "loss": 1.1048, + "learning_rate": 1.8209815593904365e-05, + "loss": 1.0798, "step": 7680 }, { - "epoch": 0.21796254256526673, + "epoch": 0.21765989401796593, "grad_norm": 0.0, - "learning_rate": 1.8203889622829344e-05, - "loss": 1.0366, + "learning_rate": 1.8209291543225774e-05, + "loss": 1.0375, "step": 7681 }, { - "epoch": 0.21799091940976165, + "epoch": 0.2176882314602284, "grad_norm": 0.0, - "learning_rate": 1.8203364053948173e-05, - "loss": 1.0515, + "learning_rate": 1.8208767423397273e-05, + "loss": 0.9504, "step": 7682 }, { - "epoch": 0.21801929625425653, + "epoch": 0.21771656890249086, "grad_norm": 0.0, - "learning_rate": 1.8202838415772812e-05, - "loss": 0.8494, + "learning_rate": 1.8208243234423274e-05, + "loss": 0.9681, "step": 7683 }, { - "epoch": 0.21804767309875142, + "epoch": 0.21774490634475333, "grad_norm": 0.0, - "learning_rate": 1.820231270830771e-05, - "loss": 1.0419, + "learning_rate": 1.8207718976308194e-05, + "loss": 1.1966, "step": 7684 }, { - "epoch": 0.2180760499432463, + "epoch": 0.2177732437870158, "grad_norm": 0.0, - "learning_rate": 1.8201786931557305e-05, - "loss": 0.9569, + "learning_rate": 1.820719464905645e-05, + "loss": 1.0814, "step": 7685 }, { - "epoch": 0.2181044267877412, + "epoch": 0.21780158122927826, "grad_norm": 0.0, - "learning_rate": 1.8201261085526036e-05, - "loss": 0.8791, + "learning_rate": 1.8206670252672457e-05, + "loss": 1.0188, "step": 7686 }, { - "epoch": 0.21813280363223608, + "epoch": 0.2178299186715407, "grad_norm": 0.0, - "learning_rate": 1.8200735170218348e-05, - "loss": 0.9358, + "learning_rate": 1.8206145787160635e-05, + "loss": 1.0657, "step": 7687 }, { - "epoch": 0.218161180476731, + "epoch": 0.21785825611380316, "grad_norm": 0.0, - "learning_rate": 1.820020918563868e-05, - "loss": 0.9111, + "learning_rate": 1.82056212525254e-05, + "loss": 1.0674, "step": 7688 }, { - "epoch": 0.21818955732122589, + "epoch": 0.21788659355606563, "grad_norm": 0.0, - "learning_rate": 1.8199683131791474e-05, - "loss": 0.9991, + "learning_rate": 1.8205096648771166e-05, + "loss": 0.9394, "step": 7689 }, { - "epoch": 0.21821793416572077, + "epoch": 0.2179149309983281, "grad_norm": 0.0, - "learning_rate": 1.819915700868118e-05, - "loss": 1.004, + "learning_rate": 1.8204571975902362e-05, + "loss": 0.9611, "step": 7690 }, { - "epoch": 0.21824631101021566, + "epoch": 0.21794326844059056, "grad_norm": 0.0, - "learning_rate": 1.8198630816312236e-05, - "loss": 0.9184, + "learning_rate": 1.8204047233923394e-05, + "loss": 1.057, "step": 7691 }, { - "epoch": 0.21827468785471055, + "epoch": 0.21797160588285303, "grad_norm": 0.0, - "learning_rate": 1.8198104554689086e-05, - "loss": 0.9849, + "learning_rate": 1.8203522422838694e-05, + "loss": 1.01, "step": 7692 }, { - "epoch": 0.21830306469920543, + "epoch": 0.21799994332511546, "grad_norm": 0.0, - "learning_rate": 1.819757822381618e-05, - "loss": 1.0385, + "learning_rate": 1.820299754265268e-05, + "loss": 1.0099, "step": 7693 }, { - "epoch": 0.21833144154370035, + "epoch": 0.21802828076737793, "grad_norm": 0.0, - "learning_rate": 1.8197051823697964e-05, - "loss": 1.0491, + "learning_rate": 1.8202472593369765e-05, + "loss": 1.0674, "step": 7694 }, { - "epoch": 0.21835981838819524, + "epoch": 0.2180566182096404, "grad_norm": 0.0, - "learning_rate": 1.8196525354338882e-05, - "loss": 1.0165, + "learning_rate": 1.8201947574994385e-05, + "loss": 1.0161, "step": 7695 }, { - "epoch": 0.21838819523269012, + "epoch": 0.21808495565190286, "grad_norm": 0.0, - "learning_rate": 1.8195998815743382e-05, - "loss": 0.9298, + "learning_rate": 1.8201422487530953e-05, + "loss": 1.0441, "step": 7696 }, { - "epoch": 0.218416572077185, + "epoch": 0.21811329309416533, "grad_norm": 0.0, - "learning_rate": 1.819547220791591e-05, - "loss": 0.9825, + "learning_rate": 1.820089733098389e-05, + "loss": 0.9651, "step": 7697 }, { - "epoch": 0.2184449489216799, + "epoch": 0.2181416305364278, "grad_norm": 0.0, - "learning_rate": 1.819494553086092e-05, - "loss": 1.0347, + "learning_rate": 1.820037210535763e-05, + "loss": 0.952, "step": 7698 }, { - "epoch": 0.21847332576617481, + "epoch": 0.21816996797869023, "grad_norm": 0.0, - "learning_rate": 1.8194418784582854e-05, - "loss": 0.9427, + "learning_rate": 1.8199846810656586e-05, + "loss": 1.1507, "step": 7699 }, { - "epoch": 0.2185017026106697, + "epoch": 0.2181983054209527, "grad_norm": 0.0, - "learning_rate": 1.8193891969086164e-05, - "loss": 1.0542, + "learning_rate": 1.819932144688519e-05, + "loss": 0.9574, "step": 7700 }, { - "epoch": 0.2185300794551646, + "epoch": 0.21822664286321516, "grad_norm": 0.0, - "learning_rate": 1.81933650843753e-05, - "loss": 1.0818, + "learning_rate": 1.819879601404786e-05, + "loss": 1.0464, "step": 7701 }, { - "epoch": 0.21855845629965948, + "epoch": 0.21825498030547763, "grad_norm": 0.0, - "learning_rate": 1.8192838130454712e-05, - "loss": 1.0407, + "learning_rate": 1.819827051214903e-05, + "loss": 0.9483, "step": 7702 }, { - "epoch": 0.21858683314415436, + "epoch": 0.2182833177477401, "grad_norm": 0.0, - "learning_rate": 1.8192311107328855e-05, - "loss": 0.9837, + "learning_rate": 1.819774494119312e-05, + "loss": 1.0738, "step": 7703 }, { - "epoch": 0.21861520998864925, + "epoch": 0.21831165519000256, "grad_norm": 0.0, - "learning_rate": 1.8191784015002177e-05, - "loss": 0.9959, + "learning_rate": 1.8197219301184565e-05, + "loss": 1.0919, "step": 7704 }, { - "epoch": 0.21864358683314417, + "epoch": 0.218339992632265, "grad_norm": 0.0, - "learning_rate": 1.8191256853479128e-05, - "loss": 1.0904, + "learning_rate": 1.8196693592127786e-05, + "loss": 1.0471, "step": 7705 }, { - "epoch": 0.21867196367763905, + "epoch": 0.21836833007452747, "grad_norm": 0.0, - "learning_rate": 1.8190729622764167e-05, - "loss": 0.9642, + "learning_rate": 1.819616781402721e-05, + "loss": 0.9196, "step": 7706 }, { - "epoch": 0.21870034052213394, + "epoch": 0.21839666751678993, "grad_norm": 0.0, - "learning_rate": 1.8190202322861742e-05, - "loss": 1.108, + "learning_rate": 1.8195641966887274e-05, + "loss": 0.9644, "step": 7707 }, { - "epoch": 0.21872871736662883, + "epoch": 0.2184250049590524, "grad_norm": 0.0, - "learning_rate": 1.8189674953776312e-05, - "loss": 1.0019, + "learning_rate": 1.81951160507124e-05, + "loss": 1.0578, "step": 7708 }, { - "epoch": 0.21875709421112371, + "epoch": 0.21845334240131487, "grad_norm": 0.0, - "learning_rate": 1.818914751551233e-05, - "loss": 0.8325, + "learning_rate": 1.819459006550702e-05, + "loss": 1.0253, "step": 7709 }, { - "epoch": 0.2187854710556186, + "epoch": 0.21848167984357733, "grad_norm": 0.0, - "learning_rate": 1.8188620008074252e-05, - "loss": 1.0645, + "learning_rate": 1.8194064011275568e-05, + "loss": 1.0306, "step": 7710 }, { - "epoch": 0.21881384790011352, + "epoch": 0.21851001728583977, "grad_norm": 0.0, - "learning_rate": 1.818809243146653e-05, - "loss": 0.9355, + "learning_rate": 1.8193537888022466e-05, + "loss": 0.9034, "step": 7711 }, { - "epoch": 0.2188422247446084, + "epoch": 0.21853835472810224, "grad_norm": 0.0, - "learning_rate": 1.8187564785693627e-05, - "loss": 0.9619, + "learning_rate": 1.8193011695752155e-05, + "loss": 0.9885, "step": 7712 }, { - "epoch": 0.2188706015891033, + "epoch": 0.2185666921703647, "grad_norm": 0.0, - "learning_rate": 1.818703707075999e-05, - "loss": 1.0009, + "learning_rate": 1.819248543446907e-05, + "loss": 0.9557, "step": 7713 }, { - "epoch": 0.21889897843359818, + "epoch": 0.21859502961262717, "grad_norm": 0.0, - "learning_rate": 1.8186509286670086e-05, - "loss": 0.9741, + "learning_rate": 1.8191959104177628e-05, + "loss": 1.0227, "step": 7714 }, { - "epoch": 0.21892735527809307, + "epoch": 0.21862336705488963, "grad_norm": 0.0, - "learning_rate": 1.818598143342837e-05, - "loss": 0.8382, + "learning_rate": 1.8191432704882276e-05, + "loss": 0.9571, "step": 7715 }, { - "epoch": 0.21895573212258798, + "epoch": 0.2186517044971521, "grad_norm": 0.0, - "learning_rate": 1.8185453511039303e-05, - "loss": 0.896, + "learning_rate": 1.8190906236587448e-05, + "loss": 0.9821, "step": 7716 }, { - "epoch": 0.21898410896708287, + "epoch": 0.21868004193941454, "grad_norm": 0.0, - "learning_rate": 1.8184925519507337e-05, - "loss": 0.9991, + "learning_rate": 1.8190379699297567e-05, + "loss": 1.0188, "step": 7717 }, { - "epoch": 0.21901248581157776, + "epoch": 0.218708379381677, "grad_norm": 0.0, - "learning_rate": 1.818439745883694e-05, - "loss": 1.0156, + "learning_rate": 1.8189853093017084e-05, + "loss": 0.9608, "step": 7718 }, { - "epoch": 0.21904086265607264, + "epoch": 0.21873671682393947, "grad_norm": 0.0, - "learning_rate": 1.8183869329032573e-05, - "loss": 0.9918, + "learning_rate": 1.818932641775043e-05, + "loss": 1.0369, "step": 7719 }, { - "epoch": 0.21906923950056753, + "epoch": 0.21876505426620194, "grad_norm": 0.0, - "learning_rate": 1.8183341130098692e-05, - "loss": 1.0723, + "learning_rate": 1.818879967350203e-05, + "loss": 1.0298, "step": 7720 }, { - "epoch": 0.21909761634506242, + "epoch": 0.2187933917084644, "grad_norm": 0.0, - "learning_rate": 1.818281286203976e-05, - "loss": 1.0405, + "learning_rate": 1.818827286027633e-05, + "loss": 1.0605, "step": 7721 }, { - "epoch": 0.21912599318955733, + "epoch": 0.21882172915072687, "grad_norm": 0.0, - "learning_rate": 1.818228452486024e-05, - "loss": 0.9031, + "learning_rate": 1.8187745978077772e-05, + "loss": 1.0383, "step": 7722 }, { - "epoch": 0.21915437003405222, + "epoch": 0.2188500665929893, "grad_norm": 0.0, - "learning_rate": 1.818175611856459e-05, - "loss": 0.981, + "learning_rate": 1.818721902691079e-05, + "loss": 0.9617, "step": 7723 }, { - "epoch": 0.2191827468785471, + "epoch": 0.21887840403525177, "grad_norm": 0.0, - "learning_rate": 1.8181227643157286e-05, - "loss": 1.0148, + "learning_rate": 1.818669200677982e-05, + "loss": 0.9957, "step": 7724 }, { - "epoch": 0.219211123723042, + "epoch": 0.21890674147751424, "grad_norm": 0.0, - "learning_rate": 1.8180699098642778e-05, - "loss": 1.078, + "learning_rate": 1.81861649176893e-05, + "loss": 0.9152, "step": 7725 }, { - "epoch": 0.21923950056753688, + "epoch": 0.2189350789197767, "grad_norm": 0.0, - "learning_rate": 1.818017048502554e-05, - "loss": 0.9474, + "learning_rate": 1.8185637759643676e-05, + "loss": 1.0335, "step": 7726 }, { - "epoch": 0.21926787741203177, + "epoch": 0.21896341636203917, "grad_norm": 0.0, - "learning_rate": 1.8179641802310035e-05, - "loss": 1.0276, + "learning_rate": 1.8185110532647382e-05, + "loss": 1.0638, "step": 7727 }, { - "epoch": 0.21929625425652668, + "epoch": 0.21899175380430164, "grad_norm": 0.0, - "learning_rate": 1.8179113050500727e-05, - "loss": 1.0282, + "learning_rate": 1.8184583236704867e-05, + "loss": 1.0203, "step": 7728 }, { - "epoch": 0.21932463110102157, + "epoch": 0.21902009124656407, "grad_norm": 0.0, - "learning_rate": 1.8178584229602082e-05, - "loss": 0.8849, + "learning_rate": 1.8184055871820568e-05, + "loss": 1.1385, "step": 7729 }, { - "epoch": 0.21935300794551646, + "epoch": 0.21904842868882654, "grad_norm": 0.0, - "learning_rate": 1.817805533961857e-05, - "loss": 0.9011, + "learning_rate": 1.8183528437998924e-05, + "loss": 0.98, "step": 7730 }, { - "epoch": 0.21938138479001135, + "epoch": 0.219076766131089, "grad_norm": 0.0, - "learning_rate": 1.817752638055466e-05, - "loss": 1.0949, + "learning_rate": 1.8183000935244383e-05, + "loss": 0.9294, "step": 7731 }, { - "epoch": 0.21940976163450623, + "epoch": 0.21910510357335147, "grad_norm": 0.0, - "learning_rate": 1.817699735241481e-05, - "loss": 0.9925, + "learning_rate": 1.8182473363561385e-05, + "loss": 0.8702, "step": 7732 }, { - "epoch": 0.21943813847900112, + "epoch": 0.21913344101561394, "grad_norm": 0.0, - "learning_rate": 1.8176468255203502e-05, - "loss": 1.0344, + "learning_rate": 1.8181945722954377e-05, + "loss": 1.0758, "step": 7733 }, { - "epoch": 0.21946651532349604, + "epoch": 0.2191617784578764, "grad_norm": 0.0, - "learning_rate": 1.8175939088925195e-05, - "loss": 1.0693, + "learning_rate": 1.81814180134278e-05, + "loss": 1.1253, "step": 7734 }, { - "epoch": 0.21949489216799092, + "epoch": 0.21919011590013884, "grad_norm": 0.0, - "learning_rate": 1.8175409853584364e-05, - "loss": 0.9925, + "learning_rate": 1.8180890234986103e-05, + "loss": 1.0325, "step": 7735 }, { - "epoch": 0.2195232690124858, + "epoch": 0.2192184533424013, "grad_norm": 0.0, - "learning_rate": 1.817488054918548e-05, - "loss": 0.9563, + "learning_rate": 1.818036238763373e-05, + "loss": 1.0951, "step": 7736 }, { - "epoch": 0.2195516458569807, + "epoch": 0.21924679078466378, "grad_norm": 0.0, - "learning_rate": 1.817435117573301e-05, - "loss": 0.9352, + "learning_rate": 1.8179834471375127e-05, + "loss": 0.9896, "step": 7737 }, { - "epoch": 0.21958002270147559, + "epoch": 0.21927512822692624, "grad_norm": 0.0, - "learning_rate": 1.817382173323143e-05, - "loss": 1.0923, + "learning_rate": 1.817930648621474e-05, + "loss": 0.9817, "step": 7738 }, { - "epoch": 0.2196083995459705, + "epoch": 0.2193034656691887, "grad_norm": 0.0, - "learning_rate": 1.817329222168521e-05, - "loss": 1.0311, + "learning_rate": 1.8178778432157015e-05, + "loss": 0.9321, "step": 7739 }, { - "epoch": 0.2196367763904654, + "epoch": 0.21933180311145117, "grad_norm": 0.0, - "learning_rate": 1.8172762641098827e-05, - "loss": 1.0757, + "learning_rate": 1.8178250309206404e-05, + "loss": 1.0194, "step": 7740 }, { - "epoch": 0.21966515323496028, + "epoch": 0.2193601405537136, "grad_norm": 0.0, - "learning_rate": 1.8172232991476747e-05, - "loss": 1.1563, + "learning_rate": 1.8177722117367356e-05, + "loss": 1.0154, "step": 7741 }, { - "epoch": 0.21969353007945516, + "epoch": 0.21938847799597608, "grad_norm": 0.0, - "learning_rate": 1.8171703272823443e-05, - "loss": 0.8861, + "learning_rate": 1.8177193856644315e-05, + "loss": 0.9099, "step": 7742 }, { - "epoch": 0.21972190692395005, + "epoch": 0.21941681543823854, "grad_norm": 0.0, - "learning_rate": 1.8171173485143402e-05, - "loss": 1.0359, + "learning_rate": 1.8176665527041734e-05, + "loss": 0.8859, "step": 7743 }, { - "epoch": 0.21975028376844494, + "epoch": 0.219445152880501, "grad_norm": 0.0, - "learning_rate": 1.817064362844109e-05, - "loss": 0.9296, + "learning_rate": 1.817613712856406e-05, + "loss": 1.0218, "step": 7744 }, { - "epoch": 0.21977866061293985, + "epoch": 0.21947349032276348, "grad_norm": 0.0, - "learning_rate": 1.817011370272098e-05, - "loss": 1.0758, + "learning_rate": 1.8175608661215753e-05, + "loss": 1.017, "step": 7745 }, { - "epoch": 0.21980703745743474, + "epoch": 0.21950182776502594, "grad_norm": 0.0, - "learning_rate": 1.8169583707987552e-05, - "loss": 1.0537, + "learning_rate": 1.8175080125001257e-05, + "loss": 0.8762, "step": 7746 }, { - "epoch": 0.21983541430192963, + "epoch": 0.21953016520728838, "grad_norm": 0.0, - "learning_rate": 1.8169053644245287e-05, - "loss": 0.8928, + "learning_rate": 1.8174551519925025e-05, + "loss": 1.1072, "step": 7747 }, { - "epoch": 0.21986379114642451, + "epoch": 0.21955850264955085, "grad_norm": 0.0, - "learning_rate": 1.8168523511498657e-05, - "loss": 0.9878, + "learning_rate": 1.8174022845991506e-05, + "loss": 0.9751, "step": 7748 }, { - "epoch": 0.2198921679909194, + "epoch": 0.2195868400918133, "grad_norm": 0.0, - "learning_rate": 1.816799330975214e-05, - "loss": 1.0079, + "learning_rate": 1.817349410320516e-05, + "loss": 1.041, "step": 7749 }, { - "epoch": 0.2199205448354143, + "epoch": 0.21961517753407578, "grad_norm": 0.0, - "learning_rate": 1.816746303901022e-05, - "loss": 1.0059, + "learning_rate": 1.817296529157044e-05, + "loss": 0.9819, "step": 7750 }, { - "epoch": 0.2199489216799092, + "epoch": 0.21964351497633824, "grad_norm": 0.0, - "learning_rate": 1.816693269927737e-05, - "loss": 0.9798, + "learning_rate": 1.8172436411091795e-05, + "loss": 0.9867, "step": 7751 }, { - "epoch": 0.2199772985244041, + "epoch": 0.2196718524186007, "grad_norm": 0.0, - "learning_rate": 1.8166402290558073e-05, - "loss": 0.8838, + "learning_rate": 1.8171907461773686e-05, + "loss": 1.0002, "step": 7752 }, { - "epoch": 0.22000567536889898, + "epoch": 0.21970018986086315, "grad_norm": 0.0, - "learning_rate": 1.8165871812856807e-05, - "loss": 0.9402, + "learning_rate": 1.8171378443620563e-05, + "loss": 1.0738, "step": 7753 }, { - "epoch": 0.22003405221339387, + "epoch": 0.21972852730312561, "grad_norm": 0.0, - "learning_rate": 1.8165341266178056e-05, - "loss": 0.9815, + "learning_rate": 1.817084935663689e-05, + "loss": 0.962, "step": 7754 }, { - "epoch": 0.22006242905788875, + "epoch": 0.21975686474538808, "grad_norm": 0.0, - "learning_rate": 1.81648106505263e-05, - "loss": 1.0131, + "learning_rate": 1.8170320200827113e-05, + "loss": 0.987, "step": 7755 }, { - "epoch": 0.22009080590238367, + "epoch": 0.21978520218765055, "grad_norm": 0.0, - "learning_rate": 1.8164279965906023e-05, - "loss": 0.9814, + "learning_rate": 1.8169790976195696e-05, + "loss": 1.0411, "step": 7756 }, { - "epoch": 0.22011918274687856, + "epoch": 0.219813539629913, "grad_norm": 0.0, - "learning_rate": 1.8163749212321704e-05, - "loss": 0.9104, + "learning_rate": 1.8169261682747098e-05, + "loss": 1.0803, "step": 7757 }, { - "epoch": 0.22014755959137344, + "epoch": 0.21984187707217548, "grad_norm": 0.0, - "learning_rate": 1.8163218389777832e-05, - "loss": 1.0395, + "learning_rate": 1.8168732320485776e-05, + "loss": 1.0168, "step": 7758 }, { - "epoch": 0.22017593643586833, + "epoch": 0.21987021451443792, "grad_norm": 0.0, - "learning_rate": 1.8162687498278884e-05, - "loss": 0.8998, + "learning_rate": 1.8168202889416184e-05, + "loss": 0.9103, "step": 7759 }, { - "epoch": 0.22020431328036322, + "epoch": 0.21989855195670038, "grad_norm": 0.0, - "learning_rate": 1.8162156537829347e-05, - "loss": 1.082, + "learning_rate": 1.816767338954279e-05, + "loss": 1.0057, "step": 7760 }, { - "epoch": 0.2202326901248581, + "epoch": 0.21992688939896285, "grad_norm": 0.0, - "learning_rate": 1.816162550843371e-05, - "loss": 0.9783, + "learning_rate": 1.8167143820870046e-05, + "loss": 1.0404, "step": 7761 }, { - "epoch": 0.22026106696935302, + "epoch": 0.21995522684122532, "grad_norm": 0.0, - "learning_rate": 1.8161094410096455e-05, - "loss": 1.0436, + "learning_rate": 1.816661418340242e-05, + "loss": 0.8738, "step": 7762 }, { - "epoch": 0.2202894438138479, + "epoch": 0.21998356428348778, "grad_norm": 0.0, - "learning_rate": 1.8160563242822068e-05, - "loss": 1.0891, + "learning_rate": 1.816608447714437e-05, + "loss": 1.0148, "step": 7763 }, { - "epoch": 0.2203178206583428, + "epoch": 0.22001190172575025, "grad_norm": 0.0, - "learning_rate": 1.8160032006615032e-05, - "loss": 0.9182, + "learning_rate": 1.816555470210036e-05, + "loss": 0.9575, "step": 7764 }, { - "epoch": 0.22034619750283768, + "epoch": 0.22004023916801269, "grad_norm": 0.0, - "learning_rate": 1.8159500701479844e-05, - "loss": 1.0521, + "learning_rate": 1.8165024858274845e-05, + "loss": 1.0737, "step": 7765 }, { - "epoch": 0.22037457434733257, + "epoch": 0.22006857661027515, "grad_norm": 0.0, - "learning_rate": 1.8158969327420984e-05, - "loss": 0.918, + "learning_rate": 1.81644949456723e-05, + "loss": 0.9342, "step": 7766 }, { - "epoch": 0.22040295119182746, + "epoch": 0.22009691405253762, "grad_norm": 0.0, - "learning_rate": 1.8158437884442947e-05, - "loss": 1.0988, + "learning_rate": 1.8163964964297177e-05, + "loss": 0.9956, "step": 7767 }, { - "epoch": 0.22043132803632237, + "epoch": 0.22012525149480008, "grad_norm": 0.0, - "learning_rate": 1.8157906372550217e-05, - "loss": 0.9557, + "learning_rate": 1.8163434914153948e-05, + "loss": 0.9687, "step": 7768 }, { - "epoch": 0.22045970488081726, + "epoch": 0.22015358893706255, "grad_norm": 0.0, - "learning_rate": 1.8157374791747285e-05, - "loss": 0.9102, + "learning_rate": 1.8162904795247077e-05, + "loss": 1.0825, "step": 7769 }, { - "epoch": 0.22048808172531215, + "epoch": 0.22018192637932502, "grad_norm": 0.0, - "learning_rate": 1.815684314203864e-05, - "loss": 1.052, + "learning_rate": 1.8162374607581022e-05, + "loss": 0.9798, "step": 7770 }, { - "epoch": 0.22051645856980703, + "epoch": 0.22021026382158745, "grad_norm": 0.0, - "learning_rate": 1.8156311423428773e-05, - "loss": 0.9971, + "learning_rate": 1.816184435116026e-05, + "loss": 1.08, "step": 7771 }, { - "epoch": 0.22054483541430192, + "epoch": 0.22023860126384992, "grad_norm": 0.0, - "learning_rate": 1.8155779635922178e-05, - "loss": 0.9808, + "learning_rate": 1.816131402598925e-05, + "loss": 1.0475, "step": 7772 }, { - "epoch": 0.2205732122587968, + "epoch": 0.2202669387061124, "grad_norm": 0.0, - "learning_rate": 1.8155247779523348e-05, - "loss": 1.0685, + "learning_rate": 1.8160783632072463e-05, + "loss": 1.0182, "step": 7773 }, { - "epoch": 0.22060158910329172, + "epoch": 0.22029527614837485, "grad_norm": 0.0, - "learning_rate": 1.8154715854236775e-05, - "loss": 0.9676, + "learning_rate": 1.8160253169414363e-05, + "loss": 1.032, "step": 7774 }, { - "epoch": 0.2206299659477866, + "epoch": 0.22032361359063732, "grad_norm": 0.0, - "learning_rate": 1.8154183860066944e-05, - "loss": 1.015, + "learning_rate": 1.815972263801942e-05, + "loss": 0.9996, "step": 7775 }, { - "epoch": 0.2206583427922815, + "epoch": 0.22035195103289976, "grad_norm": 0.0, - "learning_rate": 1.815365179701836e-05, - "loss": 1.1113, + "learning_rate": 1.8159192037892106e-05, + "loss": 0.9036, "step": 7776 }, { - "epoch": 0.22068671963677639, + "epoch": 0.22038028847516222, "grad_norm": 0.0, - "learning_rate": 1.8153119665095514e-05, - "loss": 0.9219, + "learning_rate": 1.8158661369036883e-05, + "loss": 1.0232, "step": 7777 }, { - "epoch": 0.22071509648127127, + "epoch": 0.2204086259174247, "grad_norm": 0.0, - "learning_rate": 1.8152587464302896e-05, - "loss": 1.1039, + "learning_rate": 1.815813063145823e-05, + "loss": 1.1722, "step": 7778 }, { - "epoch": 0.2207434733257662, + "epoch": 0.22043696335968715, "grad_norm": 0.0, - "learning_rate": 1.8152055194645006e-05, - "loss": 0.9821, + "learning_rate": 1.815759982516061e-05, + "loss": 0.9811, "step": 7779 }, { - "epoch": 0.22077185017026107, + "epoch": 0.22046530080194962, "grad_norm": 0.0, - "learning_rate": 1.815152285612634e-05, - "loss": 0.9359, + "learning_rate": 1.81570689501485e-05, + "loss": 0.9652, "step": 7780 }, { - "epoch": 0.22080022701475596, + "epoch": 0.2204936382442121, "grad_norm": 0.0, - "learning_rate": 1.8150990448751393e-05, - "loss": 1.1432, + "learning_rate": 1.815653800642637e-05, + "loss": 0.8948, "step": 7781 }, { - "epoch": 0.22082860385925085, + "epoch": 0.22052197568647453, "grad_norm": 0.0, - "learning_rate": 1.8150457972524667e-05, - "loss": 1.0428, + "learning_rate": 1.815600699399869e-05, + "loss": 0.8941, "step": 7782 }, { - "epoch": 0.22085698070374574, + "epoch": 0.220550313128737, "grad_norm": 0.0, - "learning_rate": 1.8149925427450653e-05, - "loss": 1.1014, + "learning_rate": 1.8155475912869932e-05, + "loss": 1.0147, "step": 7783 }, { - "epoch": 0.22088535754824062, + "epoch": 0.22057865057099946, "grad_norm": 0.0, - "learning_rate": 1.8149392813533852e-05, - "loss": 1.0145, + "learning_rate": 1.8154944763044574e-05, + "loss": 1.0023, "step": 7784 }, { - "epoch": 0.22091373439273554, + "epoch": 0.22060698801326192, "grad_norm": 0.0, - "learning_rate": 1.8148860130778765e-05, - "loss": 0.9385, + "learning_rate": 1.815441354452709e-05, + "loss": 1.0205, "step": 7785 }, { - "epoch": 0.22094211123723043, + "epoch": 0.2206353254555244, "grad_norm": 0.0, - "learning_rate": 1.814832737918989e-05, - "loss": 0.9982, + "learning_rate": 1.815388225732195e-05, + "loss": 0.9402, "step": 7786 }, { - "epoch": 0.2209704880817253, + "epoch": 0.22066366289778686, "grad_norm": 0.0, - "learning_rate": 1.8147794558771728e-05, - "loss": 1.0975, + "learning_rate": 1.8153350901433633e-05, + "loss": 0.9216, "step": 7787 }, { - "epoch": 0.2209988649262202, + "epoch": 0.2206920003400493, "grad_norm": 0.0, - "learning_rate": 1.814726166952878e-05, - "loss": 0.9729, + "learning_rate": 1.8152819476866616e-05, + "loss": 1.0817, "step": 7788 }, { - "epoch": 0.2210272417707151, + "epoch": 0.22072033778231176, "grad_norm": 0.0, - "learning_rate": 1.814672871146554e-05, - "loss": 1.0341, + "learning_rate": 1.815228798362537e-05, + "loss": 0.9279, "step": 7789 }, { - "epoch": 0.22105561861520998, + "epoch": 0.22074867522457423, "grad_norm": 0.0, - "learning_rate": 1.814619568458652e-05, - "loss": 1.037, + "learning_rate": 1.8151756421714375e-05, + "loss": 1.0171, "step": 7790 }, { - "epoch": 0.2210839954597049, + "epoch": 0.2207770126668367, "grad_norm": 0.0, - "learning_rate": 1.814566258889622e-05, - "loss": 1.01, + "learning_rate": 1.8151224791138106e-05, + "loss": 1.1104, "step": 7791 }, { - "epoch": 0.22111237230419978, + "epoch": 0.22080535010909916, "grad_norm": 0.0, - "learning_rate": 1.8145129424399142e-05, - "loss": 0.9422, + "learning_rate": 1.815069309190105e-05, + "loss": 0.9346, "step": 7792 }, { - "epoch": 0.22114074914869467, + "epoch": 0.22083368755136162, "grad_norm": 0.0, - "learning_rate": 1.814459619109979e-05, - "loss": 1.0901, + "learning_rate": 1.8150161324007674e-05, + "loss": 0.9297, "step": 7793 }, { - "epoch": 0.22116912599318955, + "epoch": 0.22086202499362406, "grad_norm": 0.0, - "learning_rate": 1.8144062889002668e-05, - "loss": 0.999, + "learning_rate": 1.8149629487462466e-05, + "loss": 1.0035, "step": 7794 }, { - "epoch": 0.22119750283768444, + "epoch": 0.22089036243588653, "grad_norm": 0.0, - "learning_rate": 1.814352951811228e-05, - "loss": 1.0437, + "learning_rate": 1.81490975822699e-05, + "loss": 0.8739, "step": 7795 }, { - "epoch": 0.22122587968217935, + "epoch": 0.220918699878149, "grad_norm": 0.0, - "learning_rate": 1.814299607843313e-05, - "loss": 1.0963, + "learning_rate": 1.814856560843446e-05, + "loss": 1.0869, "step": 7796 }, { - "epoch": 0.22125425652667424, + "epoch": 0.22094703732041146, "grad_norm": 0.0, - "learning_rate": 1.814246256996973e-05, - "loss": 1.0034, + "learning_rate": 1.814803356596063e-05, + "loss": 0.9479, "step": 7797 }, { - "epoch": 0.22128263337116913, + "epoch": 0.22097537476267393, "grad_norm": 0.0, - "learning_rate": 1.814192899272658e-05, - "loss": 0.9559, + "learning_rate": 1.814750145485288e-05, + "loss": 0.9804, "step": 7798 }, { - "epoch": 0.22131101021566402, + "epoch": 0.2210037122049364, "grad_norm": 0.0, - "learning_rate": 1.8141395346708192e-05, - "loss": 0.9955, + "learning_rate": 1.8146969275115704e-05, + "loss": 1.0034, "step": 7799 }, { - "epoch": 0.2213393870601589, + "epoch": 0.22103204964719883, "grad_norm": 0.0, - "learning_rate": 1.814086163191907e-05, - "loss": 1.0579, + "learning_rate": 1.8146437026753584e-05, + "loss": 0.9802, "step": 7800 }, { - "epoch": 0.2213677639046538, + "epoch": 0.2210603870894613, "grad_norm": 0.0, - "learning_rate": 1.814032784836372e-05, - "loss": 0.9186, + "learning_rate": 1.8145904709770993e-05, + "loss": 1.0907, "step": 7801 }, { - "epoch": 0.2213961407491487, + "epoch": 0.22108872453172376, "grad_norm": 0.0, - "learning_rate": 1.8139793996046662e-05, - "loss": 0.9715, + "learning_rate": 1.8145372324172425e-05, + "loss": 1.0526, "step": 7802 }, { - "epoch": 0.2214245175936436, + "epoch": 0.22111706197398623, "grad_norm": 0.0, - "learning_rate": 1.8139260074972392e-05, - "loss": 0.9743, + "learning_rate": 1.814483986996236e-05, + "loss": 1.011, "step": 7803 }, { - "epoch": 0.22145289443813848, + "epoch": 0.2211453994162487, "grad_norm": 0.0, - "learning_rate": 1.813872608514543e-05, - "loss": 0.9395, + "learning_rate": 1.8144307347145287e-05, + "loss": 0.9338, "step": 7804 }, { - "epoch": 0.22148127128263337, + "epoch": 0.22117373685851116, "grad_norm": 0.0, - "learning_rate": 1.8138192026570282e-05, - "loss": 0.9482, + "learning_rate": 1.8143774755725685e-05, + "loss": 1.0018, "step": 7805 }, { - "epoch": 0.22150964812712826, + "epoch": 0.2212020743007736, "grad_norm": 0.0, - "learning_rate": 1.813765789925146e-05, - "loss": 0.8566, + "learning_rate": 1.814324209570805e-05, + "loss": 0.96, "step": 7806 }, { - "epoch": 0.22153802497162314, + "epoch": 0.22123041174303607, "grad_norm": 0.0, - "learning_rate": 1.8137123703193474e-05, - "loss": 0.9187, + "learning_rate": 1.8142709367096855e-05, + "loss": 0.8734, "step": 7807 }, { - "epoch": 0.22156640181611806, + "epoch": 0.22125874918529853, "grad_norm": 0.0, - "learning_rate": 1.8136589438400843e-05, - "loss": 1.1276, + "learning_rate": 1.8142176569896603e-05, + "loss": 1.0502, "step": 7808 }, { - "epoch": 0.22159477866061295, + "epoch": 0.221287086627561, "grad_norm": 0.0, - "learning_rate": 1.8136055104878072e-05, - "loss": 0.9683, + "learning_rate": 1.814164370411177e-05, + "loss": 0.9925, "step": 7809 }, { - "epoch": 0.22162315550510783, + "epoch": 0.22131542406982346, "grad_norm": 0.0, - "learning_rate": 1.8135520702629677e-05, - "loss": 1.0086, + "learning_rate": 1.8141110769746848e-05, + "loss": 1.0377, "step": 7810 }, { - "epoch": 0.22165153234960272, + "epoch": 0.22134376151208593, "grad_norm": 0.0, - "learning_rate": 1.8134986231660174e-05, - "loss": 1.0132, + "learning_rate": 1.8140577766806328e-05, + "loss": 0.9901, "step": 7811 }, { - "epoch": 0.2216799091940976, + "epoch": 0.22137209895434837, "grad_norm": 0.0, - "learning_rate": 1.8134451691974076e-05, - "loss": 0.8855, + "learning_rate": 1.81400446952947e-05, + "loss": 0.9155, "step": 7812 }, { - "epoch": 0.2217082860385925, + "epoch": 0.22140043639661083, "grad_norm": 0.0, - "learning_rate": 1.81339170835759e-05, - "loss": 1.0689, + "learning_rate": 1.813951155521645e-05, + "loss": 1.0878, "step": 7813 }, { - "epoch": 0.2217366628830874, + "epoch": 0.2214287738388733, "grad_norm": 0.0, - "learning_rate": 1.813338240647016e-05, - "loss": 1.0682, + "learning_rate": 1.8138978346576073e-05, + "loss": 0.9297, "step": 7814 }, { - "epoch": 0.2217650397275823, + "epoch": 0.22145711128113577, "grad_norm": 0.0, - "learning_rate": 1.8132847660661374e-05, - "loss": 0.999, + "learning_rate": 1.8138445069378062e-05, + "loss": 1.0733, "step": 7815 }, { - "epoch": 0.22179341657207718, + "epoch": 0.22148544872339823, "grad_norm": 0.0, - "learning_rate": 1.8132312846154058e-05, - "loss": 1.065, + "learning_rate": 1.8137911723626903e-05, + "loss": 1.0031, "step": 7816 }, { - "epoch": 0.22182179341657207, + "epoch": 0.2215137861656607, "grad_norm": 0.0, - "learning_rate": 1.8131777962952732e-05, - "loss": 1.0956, + "learning_rate": 1.813737830932709e-05, + "loss": 0.9691, "step": 7817 }, { - "epoch": 0.22185017026106696, + "epoch": 0.22154212360792314, "grad_norm": 0.0, - "learning_rate": 1.813124301106191e-05, - "loss": 0.993, + "learning_rate": 1.813684482648312e-05, + "loss": 0.8888, "step": 7818 }, { - "epoch": 0.22187854710556187, + "epoch": 0.2215704610501856, "grad_norm": 0.0, - "learning_rate": 1.813070799048611e-05, - "loss": 0.9368, + "learning_rate": 1.8136311275099484e-05, + "loss": 1.0089, "step": 7819 }, { - "epoch": 0.22190692395005676, + "epoch": 0.22159879849244807, "grad_norm": 0.0, - "learning_rate": 1.8130172901229857e-05, - "loss": 1.0063, + "learning_rate": 1.8135777655180676e-05, + "loss": 1.0382, "step": 7820 }, { - "epoch": 0.22193530079455165, + "epoch": 0.22162713593471053, "grad_norm": 0.0, - "learning_rate": 1.8129637743297667e-05, - "loss": 0.983, + "learning_rate": 1.8135243966731194e-05, + "loss": 0.9741, "step": 7821 }, { - "epoch": 0.22196367763904654, + "epoch": 0.221655473376973, "grad_norm": 0.0, - "learning_rate": 1.8129102516694063e-05, - "loss": 1.114, + "learning_rate": 1.8134710209755527e-05, + "loss": 1.0322, "step": 7822 }, { - "epoch": 0.22199205448354142, + "epoch": 0.22168381081923547, "grad_norm": 0.0, - "learning_rate": 1.8128567221423565e-05, - "loss": 1.0292, + "learning_rate": 1.813417638425818e-05, + "loss": 0.9821, "step": 7823 }, { - "epoch": 0.2220204313280363, + "epoch": 0.2217121482614979, "grad_norm": 0.0, - "learning_rate": 1.812803185749069e-05, - "loss": 1.02, + "learning_rate": 1.8133642490243642e-05, + "loss": 0.9704, "step": 7824 }, { - "epoch": 0.22204880817253123, + "epoch": 0.22174048570376037, "grad_norm": 0.0, - "learning_rate": 1.8127496424899968e-05, - "loss": 0.9906, + "learning_rate": 1.8133108527716413e-05, + "loss": 0.9926, "step": 7825 }, { - "epoch": 0.2220771850170261, + "epoch": 0.22176882314602284, "grad_norm": 0.0, - "learning_rate": 1.8126960923655915e-05, - "loss": 0.9189, + "learning_rate": 1.8132574496680996e-05, + "loss": 0.9763, "step": 7826 }, { - "epoch": 0.222105561861521, + "epoch": 0.2217971605882853, "grad_norm": 0.0, - "learning_rate": 1.812642535376306e-05, - "loss": 1.0056, + "learning_rate": 1.8132040397141878e-05, + "loss": 0.9066, "step": 7827 }, { - "epoch": 0.2221339387060159, + "epoch": 0.22182549803054777, "grad_norm": 0.0, - "learning_rate": 1.812588971522593e-05, - "loss": 0.9342, + "learning_rate": 1.8131506229103565e-05, + "loss": 0.9997, "step": 7828 }, { - "epoch": 0.22216231555051077, + "epoch": 0.22185383547281023, "grad_norm": 0.0, - "learning_rate": 1.8125354008049037e-05, - "loss": 0.9833, + "learning_rate": 1.8130971992570555e-05, + "loss": 1.0402, "step": 7829 }, { - "epoch": 0.22219069239500566, + "epoch": 0.22188217291507267, "grad_norm": 0.0, - "learning_rate": 1.8124818232236917e-05, - "loss": 0.9535, + "learning_rate": 1.813043768754735e-05, + "loss": 0.9902, "step": 7830 }, { - "epoch": 0.22221906923950058, + "epoch": 0.22191051035733514, "grad_norm": 0.0, - "learning_rate": 1.812428238779409e-05, - "loss": 0.9302, + "learning_rate": 1.8129903314038447e-05, + "loss": 1.0331, "step": 7831 }, { - "epoch": 0.22224744608399546, + "epoch": 0.2219388477995976, "grad_norm": 0.0, - "learning_rate": 1.8123746474725087e-05, - "loss": 1.0027, + "learning_rate": 1.8129368872048353e-05, + "loss": 0.9829, "step": 7832 }, { - "epoch": 0.22227582292849035, + "epoch": 0.22196718524186007, "grad_norm": 0.0, - "learning_rate": 1.812321049303443e-05, - "loss": 0.9315, + "learning_rate": 1.812883436158156e-05, + "loss": 0.9711, "step": 7833 }, { - "epoch": 0.22230419977298524, + "epoch": 0.22199552268412254, "grad_norm": 0.0, - "learning_rate": 1.812267444272665e-05, - "loss": 0.9847, + "learning_rate": 1.8128299782642585e-05, + "loss": 0.985, "step": 7834 }, { - "epoch": 0.22233257661748013, + "epoch": 0.222023860126385, "grad_norm": 0.0, - "learning_rate": 1.812213832380627e-05, - "loss": 1.0935, + "learning_rate": 1.812776513523592e-05, + "loss": 0.9409, "step": 7835 }, { - "epoch": 0.22236095346197504, + "epoch": 0.22205219756864744, "grad_norm": 0.0, - "learning_rate": 1.8121602136277828e-05, - "loss": 1.0518, + "learning_rate": 1.812723041936607e-05, + "loss": 0.9867, "step": 7836 }, { - "epoch": 0.22238933030646993, + "epoch": 0.2220805350109099, "grad_norm": 0.0, - "learning_rate": 1.8121065880145846e-05, - "loss": 1.0937, + "learning_rate": 1.8126695635037538e-05, + "loss": 1.002, "step": 7837 }, { - "epoch": 0.22241770715096482, + "epoch": 0.22210887245317237, "grad_norm": 0.0, - "learning_rate": 1.8120529555414855e-05, - "loss": 0.9798, + "learning_rate": 1.8126160782254832e-05, + "loss": 1.0188, "step": 7838 }, { - "epoch": 0.2224460839954597, + "epoch": 0.22213720989543484, "grad_norm": 0.0, - "learning_rate": 1.8119993162089387e-05, - "loss": 0.9093, + "learning_rate": 1.8125625861022455e-05, + "loss": 0.966, "step": 7839 }, { - "epoch": 0.2224744608399546, + "epoch": 0.2221655473376973, "grad_norm": 0.0, - "learning_rate": 1.811945670017397e-05, - "loss": 0.9592, + "learning_rate": 1.8125090871344917e-05, + "loss": 0.9766, "step": 7840 }, { - "epoch": 0.22250283768444948, + "epoch": 0.22219388477995977, "grad_norm": 0.0, - "learning_rate": 1.811892016967314e-05, - "loss": 0.9955, + "learning_rate": 1.812455581322672e-05, + "loss": 0.9676, "step": 7841 }, { - "epoch": 0.2225312145289444, + "epoch": 0.2222222222222222, "grad_norm": 0.0, - "learning_rate": 1.811838357059142e-05, - "loss": 1.0943, + "learning_rate": 1.812402068667237e-05, + "loss": 0.9795, "step": 7842 }, { - "epoch": 0.22255959137343928, + "epoch": 0.22225055966448468, "grad_norm": 0.0, - "learning_rate": 1.8117846902933355e-05, - "loss": 1.0003, + "learning_rate": 1.8123485491686382e-05, + "loss": 1.0911, "step": 7843 }, { - "epoch": 0.22258796821793417, + "epoch": 0.22227889710674714, "grad_norm": 0.0, - "learning_rate": 1.811731016670347e-05, - "loss": 1.0271, + "learning_rate": 1.8122950228273257e-05, + "loss": 0.9581, "step": 7844 }, { - "epoch": 0.22261634506242906, + "epoch": 0.2223072345490096, "grad_norm": 0.0, - "learning_rate": 1.81167733619063e-05, - "loss": 0.9084, + "learning_rate": 1.8122414896437502e-05, + "loss": 0.8579, "step": 7845 }, { - "epoch": 0.22264472190692394, + "epoch": 0.22233557199127207, "grad_norm": 0.0, - "learning_rate": 1.8116236488546388e-05, - "loss": 0.8946, + "learning_rate": 1.8121879496183636e-05, + "loss": 1.0794, "step": 7846 }, { - "epoch": 0.22267309875141883, + "epoch": 0.22236390943353454, "grad_norm": 0.0, - "learning_rate": 1.8115699546628254e-05, - "loss": 0.8899, + "learning_rate": 1.8121344027516163e-05, + "loss": 1.0783, "step": 7847 }, { - "epoch": 0.22270147559591374, + "epoch": 0.22239224687579698, "grad_norm": 0.0, - "learning_rate": 1.8115162536156445e-05, - "loss": 1.0276, + "learning_rate": 1.8120808490439588e-05, + "loss": 0.8834, "step": 7848 }, { - "epoch": 0.22272985244040863, + "epoch": 0.22242058431805944, "grad_norm": 0.0, - "learning_rate": 1.811462545713549e-05, - "loss": 0.9598, + "learning_rate": 1.812027288495843e-05, + "loss": 0.9376, "step": 7849 }, { - "epoch": 0.22275822928490352, + "epoch": 0.2224489217603219, "grad_norm": 0.0, - "learning_rate": 1.811408830956993e-05, - "loss": 0.9578, + "learning_rate": 1.81197372110772e-05, + "loss": 1.0469, "step": 7850 }, { - "epoch": 0.2227866061293984, + "epoch": 0.22247725920258438, "grad_norm": 0.0, - "learning_rate": 1.81135510934643e-05, - "loss": 1.0252, + "learning_rate": 1.8119201468800407e-05, + "loss": 1.0635, "step": 7851 }, { - "epoch": 0.2228149829738933, + "epoch": 0.22250559664484684, "grad_norm": 0.0, - "learning_rate": 1.811301380882314e-05, - "loss": 1.0544, + "learning_rate": 1.8118665658132566e-05, + "loss": 0.9716, "step": 7852 }, { - "epoch": 0.22284335981838818, + "epoch": 0.2225339340871093, "grad_norm": 0.0, - "learning_rate": 1.8112476455650987e-05, - "loss": 0.9787, + "learning_rate": 1.8118129779078185e-05, + "loss": 1.0783, "step": 7853 }, { - "epoch": 0.2228717366628831, + "epoch": 0.22256227152937175, "grad_norm": 0.0, - "learning_rate": 1.8111939033952382e-05, - "loss": 1.0655, + "learning_rate": 1.8117593831641788e-05, + "loss": 1.0859, "step": 7854 }, { - "epoch": 0.22290011350737798, + "epoch": 0.2225906089716342, "grad_norm": 0.0, - "learning_rate": 1.8111401543731862e-05, - "loss": 1.0372, + "learning_rate": 1.8117057815827883e-05, + "loss": 0.9622, "step": 7855 }, { - "epoch": 0.22292849035187287, + "epoch": 0.22261894641389668, "grad_norm": 0.0, - "learning_rate": 1.8110863984993972e-05, - "loss": 0.8988, + "learning_rate": 1.8116521731640984e-05, + "loss": 1.1288, "step": 7856 }, { - "epoch": 0.22295686719636776, + "epoch": 0.22264728385615914, "grad_norm": 0.0, - "learning_rate": 1.8110326357743247e-05, - "loss": 1.0685, + "learning_rate": 1.8115985579085607e-05, + "loss": 1.0037, "step": 7857 }, { - "epoch": 0.22298524404086265, + "epoch": 0.2226756212984216, "grad_norm": 0.0, - "learning_rate": 1.810978866198423e-05, - "loss": 1.0578, + "learning_rate": 1.811544935816627e-05, + "loss": 1.0247, "step": 7858 }, { - "epoch": 0.22301362088535756, + "epoch": 0.22270395874068408, "grad_norm": 0.0, - "learning_rate": 1.8109250897721465e-05, - "loss": 0.9211, + "learning_rate": 1.8114913068887493e-05, + "loss": 0.9782, "step": 7859 }, { - "epoch": 0.22304199772985245, + "epoch": 0.22273229618294652, "grad_norm": 0.0, - "learning_rate": 1.8108713064959493e-05, - "loss": 0.9717, + "learning_rate": 1.8114376711253788e-05, + "loss": 1.0657, "step": 7860 }, { - "epoch": 0.22307037457434734, + "epoch": 0.22276063362520898, "grad_norm": 0.0, - "learning_rate": 1.8108175163702855e-05, - "loss": 1.0399, + "learning_rate": 1.8113840285269674e-05, + "loss": 1.0312, "step": 7861 }, { - "epoch": 0.22309875141884222, + "epoch": 0.22278897106747145, "grad_norm": 0.0, - "learning_rate": 1.8107637193956102e-05, - "loss": 0.9305, + "learning_rate": 1.811330379093967e-05, + "loss": 0.9066, "step": 7862 }, { - "epoch": 0.2231271282633371, + "epoch": 0.2228173085097339, "grad_norm": 0.0, - "learning_rate": 1.8107099155723767e-05, - "loss": 0.9897, + "learning_rate": 1.8112767228268295e-05, + "loss": 1.0319, "step": 7863 }, { - "epoch": 0.223155505107832, + "epoch": 0.22284564595199638, "grad_norm": 0.0, - "learning_rate": 1.8106561049010404e-05, - "loss": 1.1283, + "learning_rate": 1.8112230597260073e-05, + "loss": 1.0026, "step": 7864 }, { - "epoch": 0.2231838819523269, + "epoch": 0.22287398339425885, "grad_norm": 0.0, - "learning_rate": 1.8106022873820553e-05, - "loss": 0.9719, + "learning_rate": 1.8111693897919518e-05, + "loss": 0.9291, "step": 7865 }, { - "epoch": 0.2232122587968218, + "epoch": 0.22290232083652128, "grad_norm": 0.0, - "learning_rate": 1.8105484630158768e-05, - "loss": 0.9671, + "learning_rate": 1.8111157130251153e-05, + "loss": 0.9934, "step": 7866 }, { - "epoch": 0.2232406356413167, + "epoch": 0.22293065827878375, "grad_norm": 0.0, - "learning_rate": 1.8104946318029585e-05, - "loss": 0.9945, + "learning_rate": 1.8110620294259497e-05, + "loss": 1.0191, "step": 7867 }, { - "epoch": 0.22326901248581157, + "epoch": 0.22295899572104622, "grad_norm": 0.0, - "learning_rate": 1.8104407937437558e-05, - "loss": 0.9671, + "learning_rate": 1.8110083389949074e-05, + "loss": 1.1242, "step": 7868 }, { - "epoch": 0.22329738933030646, + "epoch": 0.22298733316330868, "grad_norm": 0.0, - "learning_rate": 1.810386948838723e-05, - "loss": 1.0787, + "learning_rate": 1.810954641732441e-05, + "loss": 1.1241, "step": 7869 }, { - "epoch": 0.22332576617480135, + "epoch": 0.22301567060557115, "grad_norm": 0.0, - "learning_rate": 1.8103330970883156e-05, - "loss": 0.9566, + "learning_rate": 1.8109009376390024e-05, + "loss": 1.0078, "step": 7870 }, { - "epoch": 0.22335414301929626, + "epoch": 0.22304400804783361, "grad_norm": 0.0, - "learning_rate": 1.810279238492988e-05, - "loss": 1.074, + "learning_rate": 1.8108472267150442e-05, + "loss": 1.0193, "step": 7871 }, { - "epoch": 0.22338251986379115, + "epoch": 0.22307234549009605, "grad_norm": 0.0, - "learning_rate": 1.810225373053195e-05, - "loss": 0.9885, + "learning_rate": 1.8107935089610186e-05, + "loss": 0.9232, "step": 7872 }, { - "epoch": 0.22341089670828604, + "epoch": 0.22310068293235852, "grad_norm": 0.0, - "learning_rate": 1.8101715007693923e-05, - "loss": 0.9389, + "learning_rate": 1.8107397843773785e-05, + "loss": 0.9071, "step": 7873 }, { - "epoch": 0.22343927355278093, + "epoch": 0.22312902037462098, "grad_norm": 0.0, - "learning_rate": 1.8101176216420343e-05, - "loss": 0.9829, + "learning_rate": 1.8106860529645756e-05, + "loss": 1.0731, "step": 7874 }, { - "epoch": 0.2234676503972758, + "epoch": 0.22315735781688345, "grad_norm": 0.0, - "learning_rate": 1.810063735671576e-05, - "loss": 0.9799, + "learning_rate": 1.8106323147230636e-05, + "loss": 0.9579, "step": 7875 }, { - "epoch": 0.22349602724177073, + "epoch": 0.22318569525914592, "grad_norm": 0.0, - "learning_rate": 1.8100098428584733e-05, - "loss": 1.0074, + "learning_rate": 1.8105785696532944e-05, + "loss": 0.9701, "step": 7876 }, { - "epoch": 0.22352440408626562, + "epoch": 0.22321403270140838, "grad_norm": 0.0, - "learning_rate": 1.809955943203181e-05, - "loss": 0.9315, + "learning_rate": 1.8105248177557207e-05, + "loss": 0.9977, "step": 7877 }, { - "epoch": 0.2235527809307605, + "epoch": 0.22324237014367082, "grad_norm": 0.0, - "learning_rate": 1.8099020367061547e-05, - "loss": 1.0142, + "learning_rate": 1.8104710590307954e-05, + "loss": 0.817, "step": 7878 }, { - "epoch": 0.2235811577752554, + "epoch": 0.2232707075859333, "grad_norm": 0.0, - "learning_rate": 1.809848123367849e-05, - "loss": 0.9672, + "learning_rate": 1.8104172934789716e-05, + "loss": 1.079, "step": 7879 }, { - "epoch": 0.22360953461975028, + "epoch": 0.22329904502819575, "grad_norm": 0.0, - "learning_rate": 1.80979420318872e-05, - "loss": 0.968, + "learning_rate": 1.810363521100702e-05, + "loss": 1.0858, "step": 7880 }, { - "epoch": 0.22363791146424516, + "epoch": 0.22332738247045822, "grad_norm": 0.0, - "learning_rate": 1.809740276169223e-05, - "loss": 0.9436, + "learning_rate": 1.8103097418964398e-05, + "loss": 0.9889, "step": 7881 }, { - "epoch": 0.22366628830874008, + "epoch": 0.22335571991272068, "grad_norm": 0.0, - "learning_rate": 1.8096863423098136e-05, - "loss": 0.8457, + "learning_rate": 1.8102559558666374e-05, + "loss": 0.8813, "step": 7882 }, { - "epoch": 0.22369466515323497, + "epoch": 0.22338405735498315, "grad_norm": 0.0, - "learning_rate": 1.809632401610947e-05, - "loss": 1.0229, + "learning_rate": 1.810202163011748e-05, + "loss": 1.101, "step": 7883 }, { - "epoch": 0.22372304199772985, + "epoch": 0.2234123947972456, "grad_norm": 0.0, - "learning_rate": 1.809578454073079e-05, - "loss": 0.9693, + "learning_rate": 1.8101483633322255e-05, + "loss": 0.9596, "step": 7884 }, { - "epoch": 0.22375141884222474, + "epoch": 0.22344073223950806, "grad_norm": 0.0, - "learning_rate": 1.8095244996966655e-05, - "loss": 0.8691, + "learning_rate": 1.810094556828522e-05, + "loss": 1.0122, "step": 7885 }, { - "epoch": 0.22377979568671963, + "epoch": 0.22346906968177052, "grad_norm": 0.0, - "learning_rate": 1.809470538482163e-05, - "loss": 1.1315, + "learning_rate": 1.8100407435010914e-05, + "loss": 1.0648, "step": 7886 }, { - "epoch": 0.22380817253121452, + "epoch": 0.223497407124033, "grad_norm": 0.0, - "learning_rate": 1.8094165704300253e-05, - "loss": 0.9922, + "learning_rate": 1.8099869233503868e-05, + "loss": 1.0441, "step": 7887 }, { - "epoch": 0.22383654937570943, + "epoch": 0.22352574456629545, "grad_norm": 0.0, - "learning_rate": 1.80936259554071e-05, - "loss": 1.0311, + "learning_rate": 1.809933096376862e-05, + "loss": 0.8931, "step": 7888 }, { - "epoch": 0.22386492622020432, + "epoch": 0.22355408200855792, "grad_norm": 0.0, - "learning_rate": 1.8093086138146723e-05, - "loss": 1.0676, + "learning_rate": 1.809879262580969e-05, + "loss": 0.9915, "step": 7889 }, { - "epoch": 0.2238933030646992, + "epoch": 0.22358241945082036, "grad_norm": 0.0, - "learning_rate": 1.8092546252523685e-05, - "loss": 1.0621, + "learning_rate": 1.809825421963163e-05, + "loss": 1.0616, "step": 7890 }, { - "epoch": 0.2239216799091941, + "epoch": 0.22361075689308282, "grad_norm": 0.0, - "learning_rate": 1.8092006298542544e-05, - "loss": 1.0565, + "learning_rate": 1.8097715745238966e-05, + "loss": 1.0583, "step": 7891 }, { - "epoch": 0.22395005675368898, + "epoch": 0.2236390943353453, "grad_norm": 0.0, - "learning_rate": 1.8091466276207864e-05, - "loss": 0.9675, + "learning_rate": 1.8097177202636235e-05, + "loss": 0.9473, "step": 7892 }, { - "epoch": 0.22397843359818387, + "epoch": 0.22366743177760776, "grad_norm": 0.0, - "learning_rate": 1.80909261855242e-05, - "loss": 1.0822, + "learning_rate": 1.8096638591827974e-05, + "loss": 1.0085, "step": 7893 }, { - "epoch": 0.22400681044267878, + "epoch": 0.22369576921987022, "grad_norm": 0.0, - "learning_rate": 1.8090386026496123e-05, - "loss": 1.0111, + "learning_rate": 1.8096099912818718e-05, + "loss": 1.0391, "step": 7894 }, { - "epoch": 0.22403518728717367, + "epoch": 0.2237241066621327, "grad_norm": 0.0, - "learning_rate": 1.808984579912819e-05, - "loss": 0.9957, + "learning_rate": 1.8095561165613007e-05, + "loss": 1.0395, "step": 7895 }, { - "epoch": 0.22406356413166856, + "epoch": 0.22375244410439513, "grad_norm": 0.0, - "learning_rate": 1.8089305503424966e-05, - "loss": 1.0381, + "learning_rate": 1.8095022350215376e-05, + "loss": 1.0175, "step": 7896 }, { - "epoch": 0.22409194097616345, + "epoch": 0.2237807815466576, "grad_norm": 0.0, - "learning_rate": 1.8088765139391014e-05, - "loss": 0.9923, + "learning_rate": 1.8094483466630367e-05, + "loss": 1.1368, "step": 7897 }, { - "epoch": 0.22412031782065833, + "epoch": 0.22380911898892006, "grad_norm": 0.0, - "learning_rate": 1.8088224707030903e-05, - "loss": 0.953, + "learning_rate": 1.8093944514862523e-05, + "loss": 1.0223, "step": 7898 }, { - "epoch": 0.22414869466515325, + "epoch": 0.22383745643118252, "grad_norm": 0.0, - "learning_rate": 1.808768420634919e-05, - "loss": 1.0862, + "learning_rate": 1.8093405494916373e-05, + "loss": 0.8958, "step": 7899 }, { - "epoch": 0.22417707150964813, + "epoch": 0.223865793873445, "grad_norm": 0.0, - "learning_rate": 1.8087143637350445e-05, - "loss": 1.1074, + "learning_rate": 1.8092866406796465e-05, + "loss": 0.9988, "step": 7900 }, { - "epoch": 0.22420544835414302, + "epoch": 0.22389413131570746, "grad_norm": 0.0, - "learning_rate": 1.8086603000039237e-05, - "loss": 1.0195, + "learning_rate": 1.8092327250507335e-05, + "loss": 0.9302, "step": 7901 }, { - "epoch": 0.2242338251986379, + "epoch": 0.2239224687579699, "grad_norm": 0.0, - "learning_rate": 1.8086062294420127e-05, - "loss": 0.9576, + "learning_rate": 1.8091788026053533e-05, + "loss": 0.9471, "step": 7902 }, { - "epoch": 0.2242622020431328, + "epoch": 0.22395080620023236, "grad_norm": 0.0, - "learning_rate": 1.8085521520497686e-05, - "loss": 1.0851, + "learning_rate": 1.8091248733439593e-05, + "loss": 1.11, "step": 7903 }, { - "epoch": 0.22429057888762768, + "epoch": 0.22397914364249483, "grad_norm": 0.0, - "learning_rate": 1.8084980678276482e-05, - "loss": 0.9754, + "learning_rate": 1.809070937267006e-05, + "loss": 1.0482, "step": 7904 }, { - "epoch": 0.2243189557321226, + "epoch": 0.2240074810847573, "grad_norm": 0.0, - "learning_rate": 1.8084439767761084e-05, - "loss": 1.1398, + "learning_rate": 1.8090169943749477e-05, + "loss": 1.0126, "step": 7905 }, { - "epoch": 0.2243473325766175, + "epoch": 0.22403581852701976, "grad_norm": 0.0, - "learning_rate": 1.8083898788956058e-05, - "loss": 1.0647, + "learning_rate": 1.808963044668239e-05, + "loss": 0.9963, "step": 7906 }, { - "epoch": 0.22437570942111237, + "epoch": 0.22406415596928222, "grad_norm": 0.0, - "learning_rate": 1.8083357741865976e-05, - "loss": 1.0289, + "learning_rate": 1.808909088147334e-05, + "loss": 1.0554, "step": 7907 }, { - "epoch": 0.22440408626560726, + "epoch": 0.22409249341154466, "grad_norm": 0.0, - "learning_rate": 1.8082816626495407e-05, - "loss": 0.9761, + "learning_rate": 1.8088551248126875e-05, + "loss": 1.1307, "step": 7908 }, { - "epoch": 0.22443246311010215, + "epoch": 0.22412083085380713, "grad_norm": 0.0, - "learning_rate": 1.8082275442848923e-05, - "loss": 1.0649, + "learning_rate": 1.8088011546647536e-05, + "loss": 0.9269, "step": 7909 }, { - "epoch": 0.22446083995459704, + "epoch": 0.2241491682960696, "grad_norm": 0.0, - "learning_rate": 1.8081734190931096e-05, - "loss": 1.0669, + "learning_rate": 1.8087471777039877e-05, + "loss": 0.9984, "step": 7910 }, { - "epoch": 0.22448921679909195, + "epoch": 0.22417750573833206, "grad_norm": 0.0, - "learning_rate": 1.80811928707465e-05, - "loss": 0.9711, + "learning_rate": 1.8086931939308438e-05, + "loss": 1.0963, "step": 7911 }, { - "epoch": 0.22451759364358684, + "epoch": 0.22420584318059453, "grad_norm": 0.0, - "learning_rate": 1.80806514822997e-05, - "loss": 0.9438, + "learning_rate": 1.8086392033457766e-05, + "loss": 0.9825, "step": 7912 }, { - "epoch": 0.22454597048808173, + "epoch": 0.224234180622857, "grad_norm": 0.0, - "learning_rate": 1.8080110025595273e-05, - "loss": 0.9833, + "learning_rate": 1.8085852059492414e-05, + "loss": 0.9056, "step": 7913 }, { - "epoch": 0.2245743473325766, + "epoch": 0.22426251806511943, "grad_norm": 0.0, - "learning_rate": 1.8079568500637795e-05, - "loss": 1.0, + "learning_rate": 1.8085312017416926e-05, + "loss": 1.0823, "step": 7914 }, { - "epoch": 0.2246027241770715, + "epoch": 0.2242908555073819, "grad_norm": 0.0, - "learning_rate": 1.807902690743184e-05, - "loss": 1.0821, + "learning_rate": 1.8084771907235855e-05, + "loss": 1.0631, "step": 7915 }, { - "epoch": 0.22463110102156642, + "epoch": 0.22431919294964436, "grad_norm": 0.0, - "learning_rate": 1.8078485245981982e-05, - "loss": 0.8733, + "learning_rate": 1.8084231728953746e-05, + "loss": 1.0382, "step": 7916 }, { - "epoch": 0.2246594778660613, + "epoch": 0.22434753039190683, "grad_norm": 0.0, - "learning_rate": 1.8077943516292795e-05, - "loss": 0.8976, + "learning_rate": 1.808369148257515e-05, + "loss": 0.8884, "step": 7917 }, { - "epoch": 0.2246878547105562, + "epoch": 0.2243758678341693, "grad_norm": 0.0, - "learning_rate": 1.8077401718368853e-05, - "loss": 0.9564, + "learning_rate": 1.808315116810462e-05, + "loss": 0.9461, "step": 7918 }, { - "epoch": 0.22471623155505108, + "epoch": 0.22440420527643176, "grad_norm": 0.0, - "learning_rate": 1.807685985221474e-05, - "loss": 0.9425, + "learning_rate": 1.808261078554671e-05, + "loss": 0.938, "step": 7919 }, { - "epoch": 0.22474460839954596, + "epoch": 0.2244325427186942, "grad_norm": 0.0, - "learning_rate": 1.8076317917835027e-05, - "loss": 0.9051, + "learning_rate": 1.808207033490596e-05, + "loss": 0.9548, "step": 7920 }, { - "epoch": 0.22477298524404085, + "epoch": 0.22446088016095667, "grad_norm": 0.0, - "learning_rate": 1.8075775915234294e-05, - "loss": 0.8612, + "learning_rate": 1.8081529816186937e-05, + "loss": 0.8256, "step": 7921 }, { - "epoch": 0.22480136208853577, + "epoch": 0.22448921760321913, "grad_norm": 0.0, - "learning_rate": 1.8075233844417116e-05, - "loss": 0.7729, + "learning_rate": 1.8080989229394183e-05, + "loss": 0.9397, "step": 7922 }, { - "epoch": 0.22482973893303065, + "epoch": 0.2245175550454816, "grad_norm": 0.0, - "learning_rate": 1.8074691705388076e-05, - "loss": 0.9667, + "learning_rate": 1.808044857453226e-05, + "loss": 1.0164, "step": 7923 }, { - "epoch": 0.22485811577752554, + "epoch": 0.22454589248774406, "grad_norm": 0.0, - "learning_rate": 1.8074149498151753e-05, - "loss": 1.041, + "learning_rate": 1.8079907851605714e-05, + "loss": 1.0613, "step": 7924 }, { - "epoch": 0.22488649262202043, + "epoch": 0.22457422993000653, "grad_norm": 0.0, - "learning_rate": 1.807360722271273e-05, - "loss": 0.9606, + "learning_rate": 1.8079367060619107e-05, + "loss": 0.8944, "step": 7925 }, { - "epoch": 0.22491486946651532, + "epoch": 0.22460256737226897, "grad_norm": 0.0, - "learning_rate": 1.8073064879075577e-05, - "loss": 0.9373, + "learning_rate": 1.807882620157699e-05, + "loss": 1.0133, "step": 7926 }, { - "epoch": 0.2249432463110102, + "epoch": 0.22463090481453143, "grad_norm": 0.0, - "learning_rate": 1.8072522467244885e-05, - "loss": 0.9496, + "learning_rate": 1.807828527448392e-05, + "loss": 0.9575, "step": 7927 }, { - "epoch": 0.22497162315550512, + "epoch": 0.2246592422567939, "grad_norm": 0.0, - "learning_rate": 1.8071979987225233e-05, - "loss": 0.98, + "learning_rate": 1.807774427934445e-05, + "loss": 0.9924, "step": 7928 }, { - "epoch": 0.225, + "epoch": 0.22468757969905637, "grad_norm": 0.0, - "learning_rate": 1.8071437439021204e-05, - "loss": 0.9417, + "learning_rate": 1.8077203216163145e-05, + "loss": 0.9742, "step": 7929 }, { - "epoch": 0.2250283768444949, + "epoch": 0.22471591714131883, "grad_norm": 0.0, - "learning_rate": 1.8070894822637375e-05, - "loss": 0.9362, + "learning_rate": 1.807666208494456e-05, + "loss": 1.0828, "step": 7930 }, { - "epoch": 0.22505675368898978, + "epoch": 0.2247442545835813, "grad_norm": 0.0, - "learning_rate": 1.8070352138078342e-05, - "loss": 0.9944, + "learning_rate": 1.8076120885693245e-05, + "loss": 1.0018, "step": 7931 }, { - "epoch": 0.22508513053348467, + "epoch": 0.22477259202584374, "grad_norm": 0.0, - "learning_rate": 1.8069809385348677e-05, - "loss": 0.9855, + "learning_rate": 1.8075579618413767e-05, + "loss": 1.07, "step": 7932 }, { - "epoch": 0.22511350737797958, + "epoch": 0.2248009294681062, "grad_norm": 0.0, - "learning_rate": 1.806926656445297e-05, - "loss": 0.9311, + "learning_rate": 1.8075038283110682e-05, + "loss": 0.8322, "step": 7933 }, { - "epoch": 0.22514188422247447, + "epoch": 0.22482926691036867, "grad_norm": 0.0, - "learning_rate": 1.806872367539581e-05, - "loss": 0.8207, + "learning_rate": 1.8074496879788555e-05, + "loss": 0.9298, "step": 7934 }, { - "epoch": 0.22517026106696936, + "epoch": 0.22485760435263114, "grad_norm": 0.0, - "learning_rate": 1.8068180718181773e-05, - "loss": 1.0557, + "learning_rate": 1.807395540845194e-05, + "loss": 1.0814, "step": 7935 }, { - "epoch": 0.22519863791146424, + "epoch": 0.2248859417948936, "grad_norm": 0.0, - "learning_rate": 1.8067637692815455e-05, - "loss": 0.9159, + "learning_rate": 1.8073413869105397e-05, + "loss": 0.9903, "step": 7936 }, { - "epoch": 0.22522701475595913, + "epoch": 0.22491427923715607, "grad_norm": 0.0, - "learning_rate": 1.8067094599301433e-05, - "loss": 1.0025, + "learning_rate": 1.8072872261753494e-05, + "loss": 0.9873, "step": 7937 }, { - "epoch": 0.22525539160045402, + "epoch": 0.2249426166794185, "grad_norm": 0.0, - "learning_rate": 1.8066551437644306e-05, - "loss": 0.9013, + "learning_rate": 1.8072330586400793e-05, + "loss": 1.0485, "step": 7938 }, { - "epoch": 0.22528376844494893, + "epoch": 0.22497095412168097, "grad_norm": 0.0, - "learning_rate": 1.8066008207848653e-05, - "loss": 1.1318, + "learning_rate": 1.807178884305185e-05, + "loss": 1.0106, "step": 7939 }, { - "epoch": 0.22531214528944382, + "epoch": 0.22499929156394344, "grad_norm": 0.0, - "learning_rate": 1.806546490991907e-05, - "loss": 1.071, + "learning_rate": 1.8071247031711232e-05, + "loss": 0.9247, "step": 7940 }, { - "epoch": 0.2253405221339387, + "epoch": 0.2250276290062059, "grad_norm": 0.0, - "learning_rate": 1.8064921543860138e-05, - "loss": 1.0137, + "learning_rate": 1.8070705152383504e-05, + "loss": 1.0614, "step": 7941 }, { - "epoch": 0.2253688989784336, + "epoch": 0.22505596644846837, "grad_norm": 0.0, - "learning_rate": 1.806437810967645e-05, - "loss": 1.0071, + "learning_rate": 1.807016320507323e-05, + "loss": 1.0125, "step": 7942 }, { - "epoch": 0.22539727582292848, + "epoch": 0.22508430389073084, "grad_norm": 0.0, - "learning_rate": 1.8063834607372603e-05, - "loss": 0.9547, + "learning_rate": 1.8069621189784974e-05, + "loss": 1.0357, "step": 7943 }, { - "epoch": 0.22542565266742337, + "epoch": 0.22511264133299327, "grad_norm": 0.0, - "learning_rate": 1.806329103695318e-05, - "loss": 0.9978, + "learning_rate": 1.8069079106523303e-05, + "loss": 0.9887, "step": 7944 }, { - "epoch": 0.22545402951191829, + "epoch": 0.22514097877525574, "grad_norm": 0.0, - "learning_rate": 1.8062747398422775e-05, - "loss": 1.0697, + "learning_rate": 1.806853695529278e-05, + "loss": 0.9376, "step": 7945 }, { - "epoch": 0.22548240635641317, + "epoch": 0.2251693162175182, "grad_norm": 0.0, - "learning_rate": 1.8062203691785977e-05, - "loss": 0.9829, + "learning_rate": 1.8067994736097978e-05, + "loss": 0.9955, "step": 7946 }, { - "epoch": 0.22551078320090806, + "epoch": 0.22519765365978067, "grad_norm": 0.0, - "learning_rate": 1.8061659917047384e-05, - "loss": 1.0438, + "learning_rate": 1.8067452448943455e-05, + "loss": 1.1027, "step": 7947 }, { - "epoch": 0.22553916004540295, + "epoch": 0.22522599110204314, "grad_norm": 0.0, - "learning_rate": 1.806111607421159e-05, - "loss": 0.9528, + "learning_rate": 1.806691009383379e-05, + "loss": 1.0663, "step": 7948 }, { - "epoch": 0.22556753688989784, + "epoch": 0.2252543285443056, "grad_norm": 0.0, - "learning_rate": 1.8060572163283186e-05, - "loss": 0.9296, + "learning_rate": 1.8066367670773543e-05, + "loss": 0.9747, "step": 7949 }, { - "epoch": 0.22559591373439272, + "epoch": 0.22528266598656804, "grad_norm": 0.0, - "learning_rate": 1.8060028184266766e-05, - "loss": 1.1524, + "learning_rate": 1.8065825179767287e-05, + "loss": 0.8943, "step": 7950 }, { - "epoch": 0.22562429057888764, + "epoch": 0.2253110034288305, "grad_norm": 0.0, - "learning_rate": 1.8059484137166925e-05, - "loss": 0.9826, + "learning_rate": 1.8065282620819587e-05, + "loss": 0.9309, "step": 7951 }, { - "epoch": 0.22565266742338252, + "epoch": 0.22533934087109297, "grad_norm": 0.0, - "learning_rate": 1.8058940021988257e-05, - "loss": 1.0109, + "learning_rate": 1.806473999393502e-05, + "loss": 0.8914, "step": 7952 }, { - "epoch": 0.2256810442678774, + "epoch": 0.22536767831335544, "grad_norm": 0.0, - "learning_rate": 1.8058395838735364e-05, - "loss": 0.9218, + "learning_rate": 1.8064197299118153e-05, + "loss": 1.0605, "step": 7953 }, { - "epoch": 0.2257094211123723, + "epoch": 0.2253960157556179, "grad_norm": 0.0, - "learning_rate": 1.8057851587412837e-05, - "loss": 0.9121, + "learning_rate": 1.806365453637356e-05, + "loss": 0.9352, "step": 7954 }, { - "epoch": 0.2257377979568672, + "epoch": 0.22542435319788037, "grad_norm": 0.0, - "learning_rate": 1.8057307268025273e-05, - "loss": 1.0127, + "learning_rate": 1.80631117057058e-05, + "loss": 1.0533, "step": 7955 }, { - "epoch": 0.2257661748013621, + "epoch": 0.2254526906401428, "grad_norm": 0.0, - "learning_rate": 1.8056762880577277e-05, - "loss": 1.0422, + "learning_rate": 1.8062568807119465e-05, + "loss": 0.926, "step": 7956 }, { - "epoch": 0.225794551645857, + "epoch": 0.22548102808240528, "grad_norm": 0.0, - "learning_rate": 1.805621842507344e-05, - "loss": 1.0754, + "learning_rate": 1.8062025840619118e-05, + "loss": 0.9659, "step": 7957 }, { - "epoch": 0.22582292849035188, + "epoch": 0.22550936552466774, "grad_norm": 0.0, - "learning_rate": 1.8055673901518365e-05, - "loss": 1.0226, + "learning_rate": 1.806148280620933e-05, + "loss": 0.9477, "step": 7958 }, { - "epoch": 0.22585130533484676, + "epoch": 0.2255377029669302, "grad_norm": 0.0, - "learning_rate": 1.8055129309916652e-05, - "loss": 1.01, + "learning_rate": 1.8060939703894684e-05, + "loss": 1.0101, "step": 7959 }, { - "epoch": 0.22587968217934165, + "epoch": 0.22556604040919268, "grad_norm": 0.0, - "learning_rate": 1.8054584650272897e-05, - "loss": 1.0438, + "learning_rate": 1.8060396533679746e-05, + "loss": 0.9361, "step": 7960 }, { - "epoch": 0.22590805902383654, + "epoch": 0.22559437785145514, "grad_norm": 0.0, - "learning_rate": 1.8054039922591705e-05, - "loss": 0.9987, + "learning_rate": 1.8059853295569095e-05, + "loss": 0.8883, "step": 7961 }, { - "epoch": 0.22593643586833145, + "epoch": 0.22562271529371758, "grad_norm": 0.0, - "learning_rate": 1.8053495126877677e-05, - "loss": 0.9929, + "learning_rate": 1.8059309989567308e-05, + "loss": 1.0159, "step": 7962 }, { - "epoch": 0.22596481271282634, + "epoch": 0.22565105273598005, "grad_norm": 0.0, - "learning_rate": 1.8052950263135408e-05, - "loss": 0.9777, + "learning_rate": 1.8058766615678963e-05, + "loss": 0.9454, "step": 7963 }, { - "epoch": 0.22599318955732123, + "epoch": 0.2256793901782425, "grad_norm": 0.0, - "learning_rate": 1.805240533136951e-05, - "loss": 0.9422, + "learning_rate": 1.805822317390863e-05, + "loss": 1.1332, "step": 7964 }, { - "epoch": 0.22602156640181612, + "epoch": 0.22570772762050498, "grad_norm": 0.0, - "learning_rate": 1.8051860331584582e-05, - "loss": 0.9334, + "learning_rate": 1.805767966426089e-05, + "loss": 1.0527, "step": 7965 }, { - "epoch": 0.226049943246311, + "epoch": 0.22573606506276744, "grad_norm": 0.0, - "learning_rate": 1.805131526378523e-05, - "loss": 0.8978, + "learning_rate": 1.8057136086740326e-05, + "loss": 1.0263, "step": 7966 }, { - "epoch": 0.2260783200908059, + "epoch": 0.2257644025050299, "grad_norm": 0.0, - "learning_rate": 1.805077012797605e-05, - "loss": 1.1444, + "learning_rate": 1.805659244135151e-05, + "loss": 1.1428, "step": 7967 }, { - "epoch": 0.2261066969353008, + "epoch": 0.22579273994729235, "grad_norm": 0.0, - "learning_rate": 1.8050224924161658e-05, - "loss": 1.0506, + "learning_rate": 1.8056048728099024e-05, + "loss": 1.0378, "step": 7968 }, { - "epoch": 0.2261350737797957, + "epoch": 0.22582107738955481, "grad_norm": 0.0, - "learning_rate": 1.8049679652346653e-05, - "loss": 0.9066, + "learning_rate": 1.805550494698745e-05, + "loss": 0.9818, "step": 7969 }, { - "epoch": 0.22616345062429058, + "epoch": 0.22584941483181728, "grad_norm": 0.0, - "learning_rate": 1.804913431253564e-05, - "loss": 0.9376, + "learning_rate": 1.8054961098021366e-05, + "loss": 1.0139, "step": 7970 }, { - "epoch": 0.22619182746878547, + "epoch": 0.22587775227407975, "grad_norm": 0.0, - "learning_rate": 1.804858890473323e-05, - "loss": 0.9059, + "learning_rate": 1.805441718120535e-05, + "loss": 0.9074, "step": 7971 }, { - "epoch": 0.22622020431328035, + "epoch": 0.2259060897163422, "grad_norm": 0.0, - "learning_rate": 1.804804342894403e-05, - "loss": 1.0349, + "learning_rate": 1.8053873196543993e-05, + "loss": 0.9778, "step": 7972 }, { - "epoch": 0.22624858115777527, + "epoch": 0.22593442715860468, "grad_norm": 0.0, - "learning_rate": 1.8047497885172643e-05, - "loss": 1.0327, + "learning_rate": 1.8053329144041867e-05, + "loss": 1.0122, "step": 7973 }, { - "epoch": 0.22627695800227016, + "epoch": 0.22596276460086712, "grad_norm": 0.0, - "learning_rate": 1.804695227342368e-05, - "loss": 0.8487, + "learning_rate": 1.805278502370356e-05, + "loss": 0.9608, "step": 7974 }, { - "epoch": 0.22630533484676504, + "epoch": 0.22599110204312958, "grad_norm": 0.0, - "learning_rate": 1.8046406593701748e-05, - "loss": 0.9662, + "learning_rate": 1.805224083553365e-05, + "loss": 0.9952, "step": 7975 }, { - "epoch": 0.22633371169125993, + "epoch": 0.22601943948539205, "grad_norm": 0.0, - "learning_rate": 1.8045860846011456e-05, - "loss": 1.0429, + "learning_rate": 1.805169657953673e-05, + "loss": 1.0691, "step": 7976 }, { - "epoch": 0.22636208853575482, + "epoch": 0.22604777692765451, "grad_norm": 0.0, - "learning_rate": 1.804531503035742e-05, - "loss": 0.9508, + "learning_rate": 1.8051152255717383e-05, + "loss": 1.0128, "step": 7977 }, { - "epoch": 0.2263904653802497, + "epoch": 0.22607611436991698, "grad_norm": 0.0, - "learning_rate": 1.8044769146744242e-05, - "loss": 0.9725, + "learning_rate": 1.805060786408019e-05, + "loss": 1.0187, "step": 7978 }, { - "epoch": 0.22641884222474462, + "epoch": 0.22610445181217942, "grad_norm": 0.0, - "learning_rate": 1.804422319517654e-05, - "loss": 0.9556, + "learning_rate": 1.8050063404629733e-05, + "loss": 0.9214, "step": 7979 }, { - "epoch": 0.2264472190692395, + "epoch": 0.22613278925444188, "grad_norm": 0.0, - "learning_rate": 1.804367717565892e-05, - "loss": 0.9001, + "learning_rate": 1.8049518877370604e-05, + "loss": 1.0087, "step": 7980 }, { - "epoch": 0.2264755959137344, + "epoch": 0.22616112669670435, "grad_norm": 0.0, - "learning_rate": 1.8043131088196003e-05, - "loss": 1.0255, + "learning_rate": 1.804897428230739e-05, + "loss": 0.9348, "step": 7981 }, { - "epoch": 0.22650397275822928, + "epoch": 0.22618946413896682, "grad_norm": 0.0, - "learning_rate": 1.8042584932792392e-05, - "loss": 0.989, + "learning_rate": 1.8048429619444675e-05, + "loss": 1.0916, "step": 7982 }, { - "epoch": 0.22653234960272417, + "epoch": 0.22621780158122928, "grad_norm": 0.0, - "learning_rate": 1.804203870945271e-05, - "loss": 0.9955, + "learning_rate": 1.804788488878705e-05, + "loss": 1.0173, "step": 7983 }, { - "epoch": 0.22656072644721906, + "epoch": 0.22624613902349175, "grad_norm": 0.0, - "learning_rate": 1.804149241818156e-05, - "loss": 1.1599, + "learning_rate": 1.80473400903391e-05, + "loss": 1.0257, "step": 7984 }, { - "epoch": 0.22658910329171397, + "epoch": 0.2262744764657542, "grad_norm": 0.0, - "learning_rate": 1.804094605898356e-05, - "loss": 0.9493, + "learning_rate": 1.804679522410542e-05, + "loss": 1.0052, "step": 7985 }, { - "epoch": 0.22661748013620886, + "epoch": 0.22630281390801665, "grad_norm": 0.0, - "learning_rate": 1.8040399631863334e-05, - "loss": 0.9731, + "learning_rate": 1.8046250290090594e-05, + "loss": 0.9699, "step": 7986 }, { - "epoch": 0.22664585698070375, + "epoch": 0.22633115135027912, "grad_norm": 0.0, - "learning_rate": 1.8039853136825485e-05, - "loss": 1.0403, + "learning_rate": 1.8045705288299213e-05, + "loss": 0.9276, "step": 7987 }, { - "epoch": 0.22667423382519863, + "epoch": 0.22635948879254159, "grad_norm": 0.0, - "learning_rate": 1.803930657387464e-05, - "loss": 1.0149, + "learning_rate": 1.8045160218735866e-05, + "loss": 1.1014, "step": 7988 }, { - "epoch": 0.22670261066969352, + "epoch": 0.22638782623480405, "grad_norm": 0.0, - "learning_rate": 1.8038759943015406e-05, - "loss": 1.067, + "learning_rate": 1.8044615081405153e-05, + "loss": 1.0504, "step": 7989 }, { - "epoch": 0.2267309875141884, + "epoch": 0.22641616367706652, "grad_norm": 0.0, - "learning_rate": 1.8038213244252408e-05, - "loss": 1.0581, + "learning_rate": 1.8044069876311655e-05, + "loss": 0.9431, "step": 7990 }, { - "epoch": 0.22675936435868332, + "epoch": 0.22644450111932896, "grad_norm": 0.0, - "learning_rate": 1.803766647759026e-05, - "loss": 0.9147, + "learning_rate": 1.8043524603459973e-05, + "loss": 0.9987, "step": 7991 }, { - "epoch": 0.2267877412031782, + "epoch": 0.22647283856159142, "grad_norm": 0.0, - "learning_rate": 1.8037119643033586e-05, - "loss": 1.0111, + "learning_rate": 1.8042979262854695e-05, + "loss": 1.0596, "step": 7992 }, { - "epoch": 0.2268161180476731, + "epoch": 0.2265011760038539, "grad_norm": 0.0, - "learning_rate": 1.8036572740586997e-05, - "loss": 0.9139, + "learning_rate": 1.8042433854500416e-05, + "loss": 0.9828, "step": 7993 }, { - "epoch": 0.22684449489216799, + "epoch": 0.22652951344611635, "grad_norm": 0.0, - "learning_rate": 1.803602577025512e-05, - "loss": 1.0207, + "learning_rate": 1.8041888378401728e-05, + "loss": 0.9453, "step": 7994 }, { - "epoch": 0.22687287173666287, + "epoch": 0.22655785088837882, "grad_norm": 0.0, - "learning_rate": 1.803547873204257e-05, - "loss": 0.9706, + "learning_rate": 1.8041342834563227e-05, + "loss": 0.8588, "step": 7995 }, { - "epoch": 0.2269012485811578, + "epoch": 0.22658618833064129, "grad_norm": 0.0, - "learning_rate": 1.8034931625953967e-05, - "loss": 1.0046, + "learning_rate": 1.8040797222989514e-05, + "loss": 0.9315, "step": 7996 }, { - "epoch": 0.22692962542565268, + "epoch": 0.22661452577290372, "grad_norm": 0.0, - "learning_rate": 1.803438445199394e-05, - "loss": 1.0099, + "learning_rate": 1.804025154368518e-05, + "loss": 0.9223, "step": 7997 }, { - "epoch": 0.22695800227014756, + "epoch": 0.2266428632151662, "grad_norm": 0.0, - "learning_rate": 1.8033837210167107e-05, - "loss": 1.0412, + "learning_rate": 1.8039705796654815e-05, + "loss": 0.9143, "step": 7998 }, { - "epoch": 0.22698637911464245, + "epoch": 0.22667120065742866, "grad_norm": 0.0, - "learning_rate": 1.8033289900478086e-05, - "loss": 0.9779, + "learning_rate": 1.8039159981903028e-05, + "loss": 1.0148, "step": 7999 }, { - "epoch": 0.22701475595913734, + "epoch": 0.22669953809969112, "grad_norm": 0.0, - "learning_rate": 1.8032742522931507e-05, - "loss": 0.9762, + "learning_rate": 1.803861409943441e-05, + "loss": 1.0098, "step": 8000 }, { - "epoch": 0.22704313280363222, + "epoch": 0.2267278755419536, "grad_norm": 0.0, - "learning_rate": 1.8032195077531988e-05, - "loss": 0.9183, + "learning_rate": 1.803806814925356e-05, + "loss": 1.1061, "step": 8001 }, { - "epoch": 0.22707150964812714, + "epoch": 0.22675621298421605, "grad_norm": 0.0, - "learning_rate": 1.8031647564284158e-05, - "loss": 0.977, + "learning_rate": 1.803752213136508e-05, + "loss": 1.0615, "step": 8002 }, { - "epoch": 0.22709988649262203, + "epoch": 0.2267845504264785, "grad_norm": 0.0, - "learning_rate": 1.8031099983192638e-05, - "loss": 0.9611, + "learning_rate": 1.8036976045773564e-05, + "loss": 1.026, "step": 8003 }, { - "epoch": 0.22712826333711691, + "epoch": 0.22681288786874096, "grad_norm": 0.0, - "learning_rate": 1.803055233426206e-05, - "loss": 1.0699, + "learning_rate": 1.8036429892483615e-05, + "loss": 0.9504, "step": 8004 }, { - "epoch": 0.2271566401816118, + "epoch": 0.22684122531100342, "grad_norm": 0.0, - "learning_rate": 1.8030004617497043e-05, - "loss": 0.917, + "learning_rate": 1.803588367149983e-05, + "loss": 0.9883, "step": 8005 }, { - "epoch": 0.2271850170261067, + "epoch": 0.2268695627532659, "grad_norm": 0.0, - "learning_rate": 1.8029456832902215e-05, - "loss": 0.936, + "learning_rate": 1.8035337382826818e-05, + "loss": 1.0699, "step": 8006 }, { - "epoch": 0.22721339387060158, + "epoch": 0.22689790019552836, "grad_norm": 0.0, - "learning_rate": 1.8028908980482203e-05, - "loss": 1.0458, + "learning_rate": 1.803479102646917e-05, + "loss": 0.8907, "step": 8007 }, { - "epoch": 0.2272417707150965, + "epoch": 0.22692623763779082, "grad_norm": 0.0, - "learning_rate": 1.802836106024164e-05, - "loss": 1.0955, + "learning_rate": 1.8034244602431497e-05, + "loss": 0.9443, "step": 8008 }, { - "epoch": 0.22727014755959138, + "epoch": 0.22695457508005326, "grad_norm": 0.0, - "learning_rate": 1.8027813072185147e-05, - "loss": 1.0774, + "learning_rate": 1.8033698110718395e-05, + "loss": 0.8723, "step": 8009 }, { - "epoch": 0.22729852440408627, + "epoch": 0.22698291252231573, "grad_norm": 0.0, - "learning_rate": 1.8027265016317358e-05, - "loss": 0.8791, + "learning_rate": 1.8033151551334475e-05, + "loss": 0.9596, "step": 8010 }, { - "epoch": 0.22732690124858115, + "epoch": 0.2270112499645782, "grad_norm": 0.0, - "learning_rate": 1.80267168926429e-05, - "loss": 0.9434, + "learning_rate": 1.8032604924284332e-05, + "loss": 1.0029, "step": 8011 }, { - "epoch": 0.22735527809307604, + "epoch": 0.22703958740684066, "grad_norm": 0.0, - "learning_rate": 1.8026168701166402e-05, - "loss": 1.0052, + "learning_rate": 1.803205822957258e-05, + "loss": 0.9818, "step": 8012 }, { - "epoch": 0.22738365493757096, + "epoch": 0.22706792484910313, "grad_norm": 0.0, - "learning_rate": 1.8025620441892498e-05, - "loss": 1.049, + "learning_rate": 1.8031511467203816e-05, + "loss": 0.8709, "step": 8013 }, { - "epoch": 0.22741203178206584, + "epoch": 0.2270962622913656, "grad_norm": 0.0, - "learning_rate": 1.8025072114825817e-05, - "loss": 1.0266, + "learning_rate": 1.8030964637182648e-05, + "loss": 0.9405, "step": 8014 }, { - "epoch": 0.22744040862656073, + "epoch": 0.22712459973362803, "grad_norm": 0.0, - "learning_rate": 1.802452371997099e-05, - "loss": 1.0226, + "learning_rate": 1.8030417739513684e-05, + "loss": 0.9451, "step": 8015 }, { - "epoch": 0.22746878547105562, + "epoch": 0.2271529371758905, "grad_norm": 0.0, - "learning_rate": 1.8023975257332652e-05, - "loss": 1.0072, + "learning_rate": 1.8029870774201527e-05, + "loss": 0.8985, "step": 8016 }, { - "epoch": 0.2274971623155505, + "epoch": 0.22718127461815296, "grad_norm": 0.0, - "learning_rate": 1.8023426726915434e-05, - "loss": 1.0342, + "learning_rate": 1.8029323741250787e-05, + "loss": 0.936, "step": 8017 }, { - "epoch": 0.2275255391600454, + "epoch": 0.22720961206041543, "grad_norm": 0.0, - "learning_rate": 1.802287812872397e-05, - "loss": 0.9958, + "learning_rate": 1.8028776640666075e-05, + "loss": 0.9133, "step": 8018 }, { - "epoch": 0.2275539160045403, + "epoch": 0.2272379495026779, "grad_norm": 0.0, - "learning_rate": 1.802232946276289e-05, - "loss": 0.9159, + "learning_rate": 1.8028229472451994e-05, + "loss": 0.9061, "step": 8019 }, { - "epoch": 0.2275822928490352, + "epoch": 0.22726628694494036, "grad_norm": 0.0, - "learning_rate": 1.802178072903684e-05, - "loss": 1.002, + "learning_rate": 1.8027682236613152e-05, + "loss": 0.9722, "step": 8020 }, { - "epoch": 0.22761066969353008, + "epoch": 0.2272946243872028, "grad_norm": 0.0, - "learning_rate": 1.802123192755044e-05, - "loss": 0.8851, + "learning_rate": 1.8027134933154164e-05, + "loss": 1.0323, "step": 8021 }, { - "epoch": 0.22763904653802497, + "epoch": 0.22732296182946526, "grad_norm": 0.0, - "learning_rate": 1.8020683058308334e-05, - "loss": 1.0423, + "learning_rate": 1.802658756207964e-05, + "loss": 1.0241, "step": 8022 }, { - "epoch": 0.22766742338251986, + "epoch": 0.22735129927172773, "grad_norm": 0.0, - "learning_rate": 1.8020134121315162e-05, - "loss": 1.0914, + "learning_rate": 1.8026040123394187e-05, + "loss": 1.0637, "step": 8023 }, { - "epoch": 0.22769580022701474, + "epoch": 0.2273796367139902, "grad_norm": 0.0, - "learning_rate": 1.8019585116575554e-05, - "loss": 0.9833, + "learning_rate": 1.8025492617102415e-05, + "loss": 1.0036, "step": 8024 }, { - "epoch": 0.22772417707150966, + "epoch": 0.22740797415625266, "grad_norm": 0.0, - "learning_rate": 1.801903604409415e-05, - "loss": 1.0729, + "learning_rate": 1.802494504320894e-05, + "loss": 1.0197, "step": 8025 }, { - "epoch": 0.22775255391600455, + "epoch": 0.22743631159851513, "grad_norm": 0.0, - "learning_rate": 1.801848690387559e-05, - "loss": 0.9164, + "learning_rate": 1.8024397401718374e-05, + "loss": 0.8985, "step": 8026 }, { - "epoch": 0.22778093076049943, + "epoch": 0.22746464904077757, "grad_norm": 0.0, - "learning_rate": 1.801793769592451e-05, - "loss": 0.9959, + "learning_rate": 1.8023849692635327e-05, + "loss": 0.9416, "step": 8027 }, { - "epoch": 0.22780930760499432, + "epoch": 0.22749298648304003, "grad_norm": 0.0, - "learning_rate": 1.801738842024555e-05, - "loss": 0.9894, + "learning_rate": 1.8023301915964414e-05, + "loss": 1.0496, "step": 8028 }, { - "epoch": 0.2278376844494892, + "epoch": 0.2275213239253025, "grad_norm": 0.0, - "learning_rate": 1.8016839076843347e-05, - "loss": 0.9519, + "learning_rate": 1.8022754071710254e-05, + "loss": 0.8897, "step": 8029 }, { - "epoch": 0.2278660612939841, + "epoch": 0.22754966136756496, "grad_norm": 0.0, - "learning_rate": 1.8016289665722545e-05, - "loss": 0.9305, + "learning_rate": 1.8022206159877453e-05, + "loss": 0.9195, "step": 8030 }, { - "epoch": 0.227894438138479, + "epoch": 0.22757799880982743, "grad_norm": 0.0, - "learning_rate": 1.8015740186887785e-05, - "loss": 0.9181, + "learning_rate": 1.802165818047063e-05, + "loss": 1.0603, "step": 8031 }, { - "epoch": 0.2279228149829739, + "epoch": 0.2276063362520899, "grad_norm": 0.0, - "learning_rate": 1.8015190640343707e-05, - "loss": 0.9884, + "learning_rate": 1.8021110133494405e-05, + "loss": 0.9223, "step": 8032 }, { - "epoch": 0.22795119182746879, + "epoch": 0.22763467369435234, "grad_norm": 0.0, - "learning_rate": 1.801464102609495e-05, - "loss": 1.067, + "learning_rate": 1.802056201895339e-05, + "loss": 0.9546, "step": 8033 }, { - "epoch": 0.22797956867196367, + "epoch": 0.2276630111366148, "grad_norm": 0.0, - "learning_rate": 1.8014091344146166e-05, - "loss": 1.0988, + "learning_rate": 1.80200138368522e-05, + "loss": 1.0032, "step": 8034 }, { - "epoch": 0.22800794551645856, + "epoch": 0.22769134857887727, "grad_norm": 0.0, - "learning_rate": 1.801354159450199e-05, - "loss": 1.1248, + "learning_rate": 1.801946558719546e-05, + "loss": 1.0701, "step": 8035 }, { - "epoch": 0.22803632236095348, + "epoch": 0.22771968602113973, "grad_norm": 0.0, - "learning_rate": 1.8012991777167065e-05, - "loss": 1.0085, + "learning_rate": 1.8018917269987775e-05, + "loss": 1.0472, "step": 8036 }, { - "epoch": 0.22806469920544836, + "epoch": 0.2277480234634022, "grad_norm": 0.0, - "learning_rate": 1.8012441892146043e-05, - "loss": 1.005, + "learning_rate": 1.801836888523378e-05, + "loss": 1.1017, "step": 8037 }, { - "epoch": 0.22809307604994325, + "epoch": 0.22777636090566467, "grad_norm": 0.0, - "learning_rate": 1.8011891939443562e-05, - "loss": 0.9686, + "learning_rate": 1.8017820432938086e-05, + "loss": 1.0055, "step": 8038 }, { - "epoch": 0.22812145289443814, + "epoch": 0.2278046983479271, "grad_norm": 0.0, - "learning_rate": 1.801134191906427e-05, - "loss": 1.0614, + "learning_rate": 1.801727191310531e-05, + "loss": 0.9936, "step": 8039 }, { - "epoch": 0.22814982973893302, + "epoch": 0.22783303579018957, "grad_norm": 0.0, - "learning_rate": 1.801079183101281e-05, - "loss": 1.0506, + "learning_rate": 1.8016723325740075e-05, + "loss": 1.0999, "step": 8040 }, { - "epoch": 0.2281782065834279, + "epoch": 0.22786137323245204, "grad_norm": 0.0, - "learning_rate": 1.8010241675293834e-05, - "loss": 0.9485, + "learning_rate": 1.8016174670847005e-05, + "loss": 1.0431, "step": 8041 }, { - "epoch": 0.22820658342792283, + "epoch": 0.2278897106747145, "grad_norm": 0.0, - "learning_rate": 1.8009691451911986e-05, - "loss": 1.1129, + "learning_rate": 1.801562594843072e-05, + "loss": 1.0462, "step": 8042 }, { - "epoch": 0.22823496027241771, + "epoch": 0.22791804811697697, "grad_norm": 0.0, - "learning_rate": 1.800914116087191e-05, - "loss": 0.9255, + "learning_rate": 1.8015077158495836e-05, + "loss": 0.9949, "step": 8043 }, { - "epoch": 0.2282633371169126, + "epoch": 0.22794638555923943, "grad_norm": 0.0, - "learning_rate": 1.800859080217827e-05, - "loss": 0.987, + "learning_rate": 1.8014528301046987e-05, + "loss": 0.9598, "step": 8044 }, { - "epoch": 0.2282917139614075, + "epoch": 0.22797472300150187, "grad_norm": 0.0, - "learning_rate": 1.8008040375835692e-05, - "loss": 0.9733, + "learning_rate": 1.8013979376088785e-05, + "loss": 0.9912, "step": 8045 }, { - "epoch": 0.22832009080590238, + "epoch": 0.22800306044376434, "grad_norm": 0.0, - "learning_rate": 1.800748988184884e-05, - "loss": 0.999, + "learning_rate": 1.801343038362586e-05, + "loss": 1.0056, "step": 8046 }, { - "epoch": 0.22834846765039726, + "epoch": 0.2280313978860268, "grad_norm": 0.0, - "learning_rate": 1.8006939320222363e-05, - "loss": 0.9547, + "learning_rate": 1.801288132366284e-05, + "loss": 0.9419, "step": 8047 }, { - "epoch": 0.22837684449489218, + "epoch": 0.22805973532828927, "grad_norm": 0.0, - "learning_rate": 1.800638869096091e-05, - "loss": 1.0816, + "learning_rate": 1.8012332196204338e-05, + "loss": 0.9319, "step": 8048 }, { - "epoch": 0.22840522133938707, + "epoch": 0.22808807277055174, "grad_norm": 0.0, - "learning_rate": 1.800583799406913e-05, - "loss": 1.0304, + "learning_rate": 1.801178300125499e-05, + "loss": 1.0188, "step": 8049 }, { - "epoch": 0.22843359818388195, + "epoch": 0.2281164102128142, "grad_norm": 0.0, - "learning_rate": 1.8005287229551675e-05, - "loss": 0.9132, + "learning_rate": 1.8011233738819418e-05, + "loss": 0.9314, "step": 8050 }, { - "epoch": 0.22846197502837684, + "epoch": 0.22814474765507664, "grad_norm": 0.0, - "learning_rate": 1.8004736397413204e-05, - "loss": 0.9043, + "learning_rate": 1.801068440890225e-05, + "loss": 0.9188, "step": 8051 }, { - "epoch": 0.22849035187287173, + "epoch": 0.2281730850973391, "grad_norm": 0.0, - "learning_rate": 1.800418549765836e-05, - "loss": 0.9254, + "learning_rate": 1.8010135011508113e-05, + "loss": 0.9456, "step": 8052 }, { - "epoch": 0.22851872871736664, + "epoch": 0.22820142253960157, "grad_norm": 0.0, - "learning_rate": 1.80036345302918e-05, - "loss": 1.0521, + "learning_rate": 1.8009585546641634e-05, + "loss": 1.0719, "step": 8053 }, { - "epoch": 0.22854710556186153, + "epoch": 0.22822975998186404, "grad_norm": 0.0, - "learning_rate": 1.8003083495318187e-05, - "loss": 1.0177, + "learning_rate": 1.800903601430744e-05, + "loss": 1.0253, "step": 8054 }, { - "epoch": 0.22857548240635642, + "epoch": 0.2282580974241265, "grad_norm": 0.0, - "learning_rate": 1.8002532392742164e-05, - "loss": 0.9624, + "learning_rate": 1.8008486414510163e-05, + "loss": 1.1252, "step": 8055 }, { - "epoch": 0.2286038592508513, + "epoch": 0.22828643486638897, "grad_norm": 0.0, - "learning_rate": 1.8001981222568386e-05, - "loss": 0.9329, + "learning_rate": 1.800793674725443e-05, + "loss": 1.097, "step": 8056 }, { - "epoch": 0.2286322360953462, + "epoch": 0.2283147723086514, "grad_norm": 0.0, - "learning_rate": 1.8001429984801517e-05, - "loss": 0.9516, + "learning_rate": 1.8007387012544874e-05, + "loss": 1.012, "step": 8057 }, { - "epoch": 0.22866061293984108, + "epoch": 0.22834310975091388, "grad_norm": 0.0, - "learning_rate": 1.8000878679446207e-05, - "loss": 1.0449, + "learning_rate": 1.8006837210386124e-05, + "loss": 0.9979, "step": 8058 }, { - "epoch": 0.228688989784336, + "epoch": 0.22837144719317634, "grad_norm": 0.0, - "learning_rate": 1.800032730650712e-05, - "loss": 1.0089, + "learning_rate": 1.8006287340782807e-05, + "loss": 0.993, "step": 8059 }, { - "epoch": 0.22871736662883088, + "epoch": 0.2283997846354388, "grad_norm": 0.0, - "learning_rate": 1.7999775865988903e-05, - "loss": 0.9129, + "learning_rate": 1.800573740373956e-05, + "loss": 0.964, "step": 8060 }, { - "epoch": 0.22874574347332577, + "epoch": 0.22842812207770127, "grad_norm": 0.0, - "learning_rate": 1.7999224357896222e-05, - "loss": 0.9648, + "learning_rate": 1.8005187399261017e-05, + "loss": 0.8796, "step": 8061 }, { - "epoch": 0.22877412031782066, + "epoch": 0.22845645951996374, "grad_norm": 0.0, - "learning_rate": 1.7998672782233732e-05, - "loss": 1.1185, + "learning_rate": 1.8004637327351805e-05, + "loss": 0.9855, "step": 8062 }, { - "epoch": 0.22880249716231554, + "epoch": 0.22848479696222618, "grad_norm": 0.0, - "learning_rate": 1.7998121139006096e-05, - "loss": 0.9142, + "learning_rate": 1.800408718801656e-05, + "loss": 0.9957, "step": 8063 }, { - "epoch": 0.22883087400681043, + "epoch": 0.22851313440448864, "grad_norm": 0.0, - "learning_rate": 1.799756942821797e-05, - "loss": 1.0509, + "learning_rate": 1.800353698125992e-05, + "loss": 0.9853, "step": 8064 }, { - "epoch": 0.22885925085130535, + "epoch": 0.2285414718467511, "grad_norm": 0.0, - "learning_rate": 1.7997017649874015e-05, - "loss": 1.0072, + "learning_rate": 1.8002986707086515e-05, + "loss": 0.9959, "step": 8065 }, { - "epoch": 0.22888762769580023, + "epoch": 0.22856980928901358, "grad_norm": 0.0, - "learning_rate": 1.7996465803978893e-05, - "loss": 0.9755, + "learning_rate": 1.8002436365500975e-05, + "loss": 0.8421, "step": 8066 }, { - "epoch": 0.22891600454029512, + "epoch": 0.22859814673127604, "grad_norm": 0.0, - "learning_rate": 1.7995913890537268e-05, - "loss": 1.0437, + "learning_rate": 1.800188595650795e-05, + "loss": 1.0481, "step": 8067 }, { - "epoch": 0.22894438138479, + "epoch": 0.2286264841735385, "grad_norm": 0.0, - "learning_rate": 1.799536190955379e-05, - "loss": 0.9085, + "learning_rate": 1.8001335480112067e-05, + "loss": 0.9623, "step": 8068 }, { - "epoch": 0.2289727582292849, + "epoch": 0.22865482161580095, "grad_norm": 0.0, - "learning_rate": 1.799480986103314e-05, - "loss": 1.0414, + "learning_rate": 1.800078493631796e-05, + "loss": 0.9648, "step": 8069 }, { - "epoch": 0.22900113507377978, + "epoch": 0.2286831590580634, "grad_norm": 0.0, - "learning_rate": 1.7994257744979965e-05, - "loss": 1.0619, + "learning_rate": 1.8000234325130274e-05, + "loss": 0.9783, "step": 8070 }, { - "epoch": 0.2290295119182747, + "epoch": 0.22871149650032588, "grad_norm": 0.0, - "learning_rate": 1.799370556139894e-05, - "loss": 1.0325, + "learning_rate": 1.7999683646553642e-05, + "loss": 0.9508, "step": 8071 }, { - "epoch": 0.22905788876276958, + "epoch": 0.22873983394258834, "grad_norm": 0.0, - "learning_rate": 1.7993153310294722e-05, - "loss": 0.9071, + "learning_rate": 1.7999132900592703e-05, + "loss": 1.0508, "step": 8072 }, { - "epoch": 0.22908626560726447, + "epoch": 0.2287681713848508, "grad_norm": 0.0, - "learning_rate": 1.799260099167198e-05, - "loss": 1.0049, + "learning_rate": 1.7998582087252096e-05, + "loss": 0.8571, "step": 8073 }, { - "epoch": 0.22911464245175936, + "epoch": 0.22879650882711328, "grad_norm": 0.0, - "learning_rate": 1.7992048605535378e-05, - "loss": 1.0597, + "learning_rate": 1.7998031206536466e-05, + "loss": 0.9174, "step": 8074 }, { - "epoch": 0.22914301929625425, + "epoch": 0.22882484626937571, "grad_norm": 0.0, - "learning_rate": 1.7991496151889583e-05, - "loss": 1.092, + "learning_rate": 1.7997480258450447e-05, + "loss": 0.9998, "step": 8075 }, { - "epoch": 0.22917139614074916, + "epoch": 0.22885318371163818, "grad_norm": 0.0, - "learning_rate": 1.799094363073926e-05, - "loss": 1.0349, + "learning_rate": 1.7996929242998682e-05, + "loss": 1.0323, "step": 8076 }, { - "epoch": 0.22919977298524405, + "epoch": 0.22888152115390065, "grad_norm": 0.0, - "learning_rate": 1.7990391042089076e-05, - "loss": 0.9137, + "learning_rate": 1.799637816018581e-05, + "loss": 0.9909, "step": 8077 }, { - "epoch": 0.22922814982973894, + "epoch": 0.2289098585961631, "grad_norm": 0.0, - "learning_rate": 1.7989838385943697e-05, - "loss": 0.9916, + "learning_rate": 1.799582701001648e-05, + "loss": 0.9957, "step": 8078 }, { - "epoch": 0.22925652667423382, + "epoch": 0.22893819603842558, "grad_norm": 0.0, - "learning_rate": 1.79892856623078e-05, - "loss": 0.8367, + "learning_rate": 1.7995275792495327e-05, + "loss": 1.0028, "step": 8079 }, { - "epoch": 0.2292849035187287, + "epoch": 0.22896653348068804, "grad_norm": 0.0, - "learning_rate": 1.7988732871186045e-05, - "loss": 0.9803, + "learning_rate": 1.7994724507626996e-05, + "loss": 0.9582, "step": 8080 }, { - "epoch": 0.2293132803632236, + "epoch": 0.22899487092295048, "grad_norm": 0.0, - "learning_rate": 1.7988180012583105e-05, - "loss": 0.9939, + "learning_rate": 1.7994173155416133e-05, + "loss": 1.041, "step": 8081 }, { - "epoch": 0.2293416572077185, + "epoch": 0.22902320836521295, "grad_norm": 0.0, - "learning_rate": 1.798762708650365e-05, - "loss": 0.9602, + "learning_rate": 1.7993621735867385e-05, + "loss": 0.9959, "step": 8082 }, { - "epoch": 0.2293700340522134, + "epoch": 0.22905154580747542, "grad_norm": 0.0, - "learning_rate": 1.7987074092952347e-05, - "loss": 0.9822, + "learning_rate": 1.7993070248985386e-05, + "loss": 0.9733, "step": 8083 }, { - "epoch": 0.2293984108967083, + "epoch": 0.22907988324973788, "grad_norm": 0.0, - "learning_rate": 1.7986521031933876e-05, - "loss": 0.9798, + "learning_rate": 1.7992518694774794e-05, + "loss": 0.8974, "step": 8084 }, { - "epoch": 0.22942678774120318, + "epoch": 0.22910822069200035, "grad_norm": 0.0, - "learning_rate": 1.7985967903452897e-05, - "loss": 1.0492, + "learning_rate": 1.7991967073240245e-05, + "loss": 0.985, "step": 8085 }, { - "epoch": 0.22945516458569806, + "epoch": 0.2291365581342628, "grad_norm": 0.0, - "learning_rate": 1.798541470751409e-05, - "loss": 0.9487, + "learning_rate": 1.799141538438639e-05, + "loss": 1.0749, "step": 8086 }, { - "epoch": 0.22948354143019295, + "epoch": 0.22916489557652525, "grad_norm": 0.0, - "learning_rate": 1.7984861444122127e-05, - "loss": 0.9566, + "learning_rate": 1.799086362821788e-05, + "loss": 1.0669, "step": 8087 }, { - "epoch": 0.22951191827468786, + "epoch": 0.22919323301878772, "grad_norm": 0.0, - "learning_rate": 1.7984308113281677e-05, - "loss": 1.0779, + "learning_rate": 1.7990311804739352e-05, + "loss": 0.8955, "step": 8088 }, { - "epoch": 0.22954029511918275, + "epoch": 0.22922157046105018, "grad_norm": 0.0, - "learning_rate": 1.798375471499742e-05, - "loss": 1.0995, + "learning_rate": 1.7989759913955465e-05, + "loss": 1.0596, "step": 8089 }, { - "epoch": 0.22956867196367764, + "epoch": 0.22924990790331265, "grad_norm": 0.0, - "learning_rate": 1.7983201249274028e-05, - "loss": 1.0651, + "learning_rate": 1.798920795587086e-05, + "loss": 1.1535, "step": 8090 }, { - "epoch": 0.22959704880817253, + "epoch": 0.22927824534557512, "grad_norm": 0.0, - "learning_rate": 1.7982647716116173e-05, - "loss": 0.9857, + "learning_rate": 1.7988655930490192e-05, + "loss": 1.033, "step": 8091 }, { - "epoch": 0.22962542565266741, + "epoch": 0.22930658278783758, "grad_norm": 0.0, - "learning_rate": 1.798209411552854e-05, - "loss": 1.0072, + "learning_rate": 1.798810383781811e-05, + "loss": 0.8563, "step": 8092 }, { - "epoch": 0.22965380249716233, + "epoch": 0.22933492023010002, "grad_norm": 0.0, - "learning_rate": 1.7981540447515797e-05, - "loss": 1.1223, + "learning_rate": 1.798755167785926e-05, + "loss": 1.0191, "step": 8093 }, { - "epoch": 0.22968217934165722, + "epoch": 0.22936325767236249, "grad_norm": 0.0, - "learning_rate": 1.798098671208262e-05, - "loss": 0.8677, + "learning_rate": 1.7986999450618295e-05, + "loss": 0.9257, "step": 8094 }, { - "epoch": 0.2297105561861521, + "epoch": 0.22939159511462495, "grad_norm": 0.0, - "learning_rate": 1.7980432909233693e-05, - "loss": 0.8889, + "learning_rate": 1.7986447156099874e-05, + "loss": 1.0088, "step": 8095 }, { - "epoch": 0.229738933030647, + "epoch": 0.22941993255688742, "grad_norm": 0.0, - "learning_rate": 1.7979879038973686e-05, - "loss": 1.0608, + "learning_rate": 1.798589479430864e-05, + "loss": 0.9367, "step": 8096 }, { - "epoch": 0.22976730987514188, + "epoch": 0.22944826999914988, "grad_norm": 0.0, - "learning_rate": 1.797932510130729e-05, - "loss": 0.9563, + "learning_rate": 1.7985342365249247e-05, + "loss": 0.9814, "step": 8097 }, { - "epoch": 0.22979568671963677, + "epoch": 0.22947660744141235, "grad_norm": 0.0, - "learning_rate": 1.797877109623917e-05, - "loss": 0.9713, + "learning_rate": 1.7984789868926348e-05, + "loss": 0.9033, "step": 8098 }, { - "epoch": 0.22982406356413168, + "epoch": 0.2295049448836748, "grad_norm": 0.0, - "learning_rate": 1.7978217023774012e-05, - "loss": 1.0295, + "learning_rate": 1.7984237305344604e-05, + "loss": 0.9261, "step": 8099 }, { - "epoch": 0.22985244040862657, + "epoch": 0.22953328232593725, "grad_norm": 0.0, - "learning_rate": 1.7977662883916497e-05, - "loss": 0.9346, + "learning_rate": 1.7983684674508658e-05, + "loss": 0.9762, "step": 8100 }, { - "epoch": 0.22988081725312146, + "epoch": 0.22956161976819972, "grad_norm": 0.0, - "learning_rate": 1.7977108676671307e-05, - "loss": 1.0381, + "learning_rate": 1.7983131976423175e-05, + "loss": 0.9792, "step": 8101 }, { - "epoch": 0.22990919409761634, + "epoch": 0.2295899572104622, "grad_norm": 0.0, - "learning_rate": 1.7976554402043115e-05, - "loss": 0.98, + "learning_rate": 1.7982579211092807e-05, + "loss": 0.9683, "step": 8102 }, { - "epoch": 0.22993757094211123, + "epoch": 0.22961829465272465, "grad_norm": 0.0, - "learning_rate": 1.7976000060036617e-05, - "loss": 0.8614, + "learning_rate": 1.798202637852221e-05, + "loss": 0.9854, "step": 8103 }, { - "epoch": 0.22996594778660612, + "epoch": 0.22964663209498712, "grad_norm": 0.0, - "learning_rate": 1.7975445650656484e-05, - "loss": 0.9798, + "learning_rate": 1.7981473478716042e-05, + "loss": 0.9901, "step": 8104 }, { - "epoch": 0.22999432463110103, + "epoch": 0.22967496953724956, "grad_norm": 0.0, - "learning_rate": 1.7974891173907406e-05, - "loss": 0.9175, + "learning_rate": 1.798092051167896e-05, + "loss": 1.0747, "step": 8105 }, { - "epoch": 0.23002270147559592, + "epoch": 0.22970330697951202, "grad_norm": 0.0, - "learning_rate": 1.7974336629794062e-05, - "loss": 0.9991, + "learning_rate": 1.7980367477415614e-05, + "loss": 0.9549, "step": 8106 }, { - "epoch": 0.2300510783200908, + "epoch": 0.2297316444217745, "grad_norm": 0.0, - "learning_rate": 1.7973782018321134e-05, - "loss": 1.0163, + "learning_rate": 1.7979814375930676e-05, + "loss": 0.9905, "step": 8107 }, { - "epoch": 0.2300794551645857, + "epoch": 0.22975998186403696, "grad_norm": 0.0, - "learning_rate": 1.797322733949332e-05, - "loss": 0.9666, + "learning_rate": 1.79792612072288e-05, + "loss": 0.985, "step": 8108 }, { - "epoch": 0.23010783200908058, + "epoch": 0.22978831930629942, "grad_norm": 0.0, - "learning_rate": 1.7972672593315292e-05, - "loss": 0.9756, + "learning_rate": 1.7978707971314636e-05, + "loss": 0.9193, "step": 8109 }, { - "epoch": 0.23013620885357547, + "epoch": 0.2298166567485619, "grad_norm": 0.0, - "learning_rate": 1.7972117779791735e-05, - "loss": 1.0862, + "learning_rate": 1.7978154668192858e-05, + "loss": 1.0749, "step": 8110 }, { - "epoch": 0.23016458569807038, + "epoch": 0.22984499419082433, "grad_norm": 0.0, - "learning_rate": 1.7971562898927348e-05, - "loss": 0.8679, + "learning_rate": 1.7977601297868116e-05, + "loss": 1.0402, "step": 8111 }, { - "epoch": 0.23019296254256527, + "epoch": 0.2298733316330868, "grad_norm": 0.0, - "learning_rate": 1.7971007950726806e-05, - "loss": 0.9542, + "learning_rate": 1.797704786034508e-05, + "loss": 0.9608, "step": 8112 }, { - "epoch": 0.23022133938706016, + "epoch": 0.22990166907534926, "grad_norm": 0.0, - "learning_rate": 1.79704529351948e-05, - "loss": 1.0025, + "learning_rate": 1.7976494355628406e-05, + "loss": 0.933, "step": 8113 }, { - "epoch": 0.23024971623155505, + "epoch": 0.22993000651761172, "grad_norm": 0.0, - "learning_rate": 1.7969897852336028e-05, - "loss": 0.9942, + "learning_rate": 1.797594078372276e-05, + "loss": 1.007, "step": 8114 }, { - "epoch": 0.23027809307604993, + "epoch": 0.2299583439598742, "grad_norm": 0.0, - "learning_rate": 1.7969342702155164e-05, - "loss": 1.0552, + "learning_rate": 1.79753871446328e-05, + "loss": 0.964, "step": 8115 }, { - "epoch": 0.23030646992054485, + "epoch": 0.22998668140213666, "grad_norm": 0.0, - "learning_rate": 1.7968787484656905e-05, - "loss": 1.0689, + "learning_rate": 1.7974833438363192e-05, + "loss": 0.9689, "step": 8116 }, { - "epoch": 0.23033484676503974, + "epoch": 0.2300150188443991, "grad_norm": 0.0, - "learning_rate": 1.796823219984594e-05, - "loss": 0.9917, + "learning_rate": 1.7974279664918605e-05, + "loss": 1.0108, "step": 8117 }, { - "epoch": 0.23036322360953462, + "epoch": 0.23004335628666156, "grad_norm": 0.0, - "learning_rate": 1.796767684772696e-05, - "loss": 0.9622, + "learning_rate": 1.7973725824303694e-05, + "loss": 1.1088, "step": 8118 }, { - "epoch": 0.2303916004540295, + "epoch": 0.23007169372892403, "grad_norm": 0.0, - "learning_rate": 1.7967121428304656e-05, - "loss": 1.0369, + "learning_rate": 1.7973171916523134e-05, + "loss": 1.0426, "step": 8119 }, { - "epoch": 0.2304199772985244, + "epoch": 0.2301000311711865, "grad_norm": 0.0, - "learning_rate": 1.7966565941583718e-05, - "loss": 0.9795, + "learning_rate": 1.797261794158158e-05, + "loss": 1.0277, "step": 8120 }, { - "epoch": 0.23044835414301929, + "epoch": 0.23012836861344896, "grad_norm": 0.0, - "learning_rate": 1.796601038756884e-05, - "loss": 1.0292, + "learning_rate": 1.797206389948371e-05, + "loss": 0.9723, "step": 8121 }, { - "epoch": 0.2304767309875142, + "epoch": 0.23015670605571142, "grad_norm": 0.0, - "learning_rate": 1.7965454766264714e-05, - "loss": 1.0277, + "learning_rate": 1.7971509790234184e-05, + "loss": 1.0322, "step": 8122 }, { - "epoch": 0.2305051078320091, + "epoch": 0.23018504349797386, "grad_norm": 0.0, - "learning_rate": 1.7964899077676034e-05, - "loss": 1.0336, + "learning_rate": 1.7970955613837673e-05, + "loss": 0.9982, "step": 8123 }, { - "epoch": 0.23053348467650397, + "epoch": 0.23021338094023633, "grad_norm": 0.0, - "learning_rate": 1.7964343321807493e-05, - "loss": 0.8228, + "learning_rate": 1.797040137029884e-05, + "loss": 0.9277, "step": 8124 }, { - "epoch": 0.23056186152099886, + "epoch": 0.2302417183824988, "grad_norm": 0.0, - "learning_rate": 1.7963787498663783e-05, - "loss": 0.9844, + "learning_rate": 1.7969847059622355e-05, + "loss": 0.9534, "step": 8125 }, { - "epoch": 0.23059023836549375, + "epoch": 0.23027005582476126, "grad_norm": 0.0, - "learning_rate": 1.796323160824961e-05, - "loss": 1.0378, + "learning_rate": 1.796929268181289e-05, + "loss": 1.1049, "step": 8126 }, { - "epoch": 0.23061861520998864, + "epoch": 0.23029839326702373, "grad_norm": 0.0, - "learning_rate": 1.796267565056965e-05, - "loss": 1.0042, + "learning_rate": 1.796873823687512e-05, + "loss": 0.9916, "step": 8127 }, { - "epoch": 0.23064699205448355, + "epoch": 0.2303267307092862, "grad_norm": 0.0, - "learning_rate": 1.796211962562862e-05, - "loss": 0.9745, + "learning_rate": 1.7968183724813698e-05, + "loss": 0.9205, "step": 8128 }, { - "epoch": 0.23067536889897844, + "epoch": 0.23035506815154863, "grad_norm": 0.0, - "learning_rate": 1.7961563533431206e-05, - "loss": 1.0039, + "learning_rate": 1.7967629145633312e-05, + "loss": 1.0779, "step": 8129 }, { - "epoch": 0.23070374574347333, + "epoch": 0.2303834055938111, "grad_norm": 0.0, - "learning_rate": 1.7961007373982108e-05, - "loss": 0.9578, + "learning_rate": 1.796707449933863e-05, + "loss": 0.9785, "step": 8130 }, { - "epoch": 0.2307321225879682, + "epoch": 0.23041174303607356, "grad_norm": 0.0, - "learning_rate": 1.7960451147286017e-05, - "loss": 0.8898, + "learning_rate": 1.7966519785934313e-05, + "loss": 1.0872, "step": 8131 }, { - "epoch": 0.2307604994324631, + "epoch": 0.23044008047833603, "grad_norm": 0.0, - "learning_rate": 1.795989485334764e-05, - "loss": 1.0394, + "learning_rate": 1.7965965005425044e-05, + "loss": 0.8696, "step": 8132 }, { - "epoch": 0.23078887627695802, + "epoch": 0.2304684179205985, "grad_norm": 0.0, - "learning_rate": 1.7959338492171675e-05, - "loss": 0.8997, + "learning_rate": 1.7965410157815496e-05, + "loss": 0.8994, "step": 8133 }, { - "epoch": 0.2308172531214529, + "epoch": 0.23049675536286096, "grad_norm": 0.0, - "learning_rate": 1.795878206376282e-05, - "loss": 0.924, + "learning_rate": 1.796485524311034e-05, + "loss": 1.0388, "step": 8134 }, { - "epoch": 0.2308456299659478, + "epoch": 0.2305250928051234, "grad_norm": 0.0, - "learning_rate": 1.795822556812578e-05, - "loss": 1.0362, + "learning_rate": 1.796430026131425e-05, + "loss": 0.8846, "step": 8135 }, { - "epoch": 0.23087400681044268, + "epoch": 0.23055343024738587, "grad_norm": 0.0, - "learning_rate": 1.7957669005265243e-05, - "loss": 1.0117, + "learning_rate": 1.79637452124319e-05, + "loss": 0.9569, "step": 8136 }, { - "epoch": 0.23090238365493757, + "epoch": 0.23058176768964833, "grad_norm": 0.0, - "learning_rate": 1.795711237518592e-05, - "loss": 0.928, + "learning_rate": 1.796319009646797e-05, + "loss": 1.007, "step": 8137 }, { - "epoch": 0.23093076049943245, + "epoch": 0.2306101051319108, "grad_norm": 0.0, - "learning_rate": 1.795655567789251e-05, - "loss": 0.9921, + "learning_rate": 1.796263491342713e-05, + "loss": 1.0465, "step": 8138 }, { - "epoch": 0.23095913734392737, + "epoch": 0.23063844257417326, "grad_norm": 0.0, - "learning_rate": 1.795599891338972e-05, - "loss": 1.022, + "learning_rate": 1.796207966331406e-05, + "loss": 0.9811, "step": 8139 }, { - "epoch": 0.23098751418842225, + "epoch": 0.23066678001643573, "grad_norm": 0.0, - "learning_rate": 1.7955442081682246e-05, - "loss": 0.9989, + "learning_rate": 1.7961524346133437e-05, + "loss": 0.9606, "step": 8140 }, { - "epoch": 0.23101589103291714, + "epoch": 0.23069511745869817, "grad_norm": 0.0, - "learning_rate": 1.7954885182774798e-05, - "loss": 0.9827, + "learning_rate": 1.7960968961889936e-05, + "loss": 1.027, "step": 8141 }, { - "epoch": 0.23104426787741203, + "epoch": 0.23072345490096063, "grad_norm": 0.0, - "learning_rate": 1.7954328216672077e-05, - "loss": 0.9646, + "learning_rate": 1.7960413510588242e-05, + "loss": 0.9086, "step": 8142 }, { - "epoch": 0.23107264472190692, + "epoch": 0.2307517923432231, "grad_norm": 0.0, - "learning_rate": 1.7953771183378786e-05, - "loss": 0.9447, + "learning_rate": 1.7959857992233022e-05, + "loss": 0.9443, "step": 8143 }, { - "epoch": 0.2311010215664018, + "epoch": 0.23078012978548557, "grad_norm": 0.0, - "learning_rate": 1.7953214082899634e-05, - "loss": 0.8394, + "learning_rate": 1.7959302406828967e-05, + "loss": 1.0837, "step": 8144 }, { - "epoch": 0.23112939841089672, + "epoch": 0.23080846722774803, "grad_norm": 0.0, - "learning_rate": 1.7952656915239325e-05, - "loss": 0.8814, + "learning_rate": 1.7958746754380754e-05, + "loss": 1.0769, "step": 8145 }, { - "epoch": 0.2311577752553916, + "epoch": 0.2308368046700105, "grad_norm": 0.0, - "learning_rate": 1.7952099680402564e-05, - "loss": 1.0264, + "learning_rate": 1.7958191034893057e-05, + "loss": 0.9557, "step": 8146 }, { - "epoch": 0.2311861520998865, + "epoch": 0.23086514211227294, "grad_norm": 0.0, - "learning_rate": 1.795154237839406e-05, - "loss": 0.9651, + "learning_rate": 1.7957635248370563e-05, + "loss": 1.0588, "step": 8147 }, { - "epoch": 0.23121452894438138, + "epoch": 0.2308934795545354, "grad_norm": 0.0, - "learning_rate": 1.795098500921852e-05, - "loss": 0.8941, + "learning_rate": 1.7957079394817954e-05, + "loss": 0.9566, "step": 8148 }, { - "epoch": 0.23124290578887627, + "epoch": 0.23092181699679787, "grad_norm": 0.0, - "learning_rate": 1.7950427572880655e-05, - "loss": 1.0581, + "learning_rate": 1.795652347423991e-05, + "loss": 0.9743, "step": 8149 }, { - "epoch": 0.23127128263337116, + "epoch": 0.23095015443906033, "grad_norm": 0.0, - "learning_rate": 1.794987006938517e-05, - "loss": 1.0046, + "learning_rate": 1.795596748664111e-05, + "loss": 0.9367, "step": 8150 }, { - "epoch": 0.23129965947786607, + "epoch": 0.2309784918813228, "grad_norm": 0.0, - "learning_rate": 1.794931249873677e-05, - "loss": 1.2108, + "learning_rate": 1.7955411432026245e-05, + "loss": 0.9196, "step": 8151 }, { - "epoch": 0.23132803632236096, + "epoch": 0.23100682932358527, "grad_norm": 0.0, - "learning_rate": 1.7948754860940175e-05, - "loss": 1.0579, + "learning_rate": 1.7954855310399997e-05, + "loss": 0.9965, "step": 8152 }, { - "epoch": 0.23135641316685585, + "epoch": 0.2310351667658477, "grad_norm": 0.0, - "learning_rate": 1.7948197156000088e-05, - "loss": 0.9166, + "learning_rate": 1.795429912176705e-05, + "loss": 1.0109, "step": 8153 }, { - "epoch": 0.23138479001135073, + "epoch": 0.23106350420811017, "grad_norm": 0.0, - "learning_rate": 1.7947639383921225e-05, - "loss": 1.0578, + "learning_rate": 1.7953742866132082e-05, + "loss": 0.9931, "step": 8154 }, { - "epoch": 0.23141316685584562, + "epoch": 0.23109184165037264, "grad_norm": 0.0, - "learning_rate": 1.7947081544708292e-05, - "loss": 0.9455, + "learning_rate": 1.7953186543499786e-05, + "loss": 1.0494, "step": 8155 }, { - "epoch": 0.23144154370034054, + "epoch": 0.2311201790926351, "grad_norm": 0.0, - "learning_rate": 1.7946523638366006e-05, - "loss": 0.9062, + "learning_rate": 1.795263015387485e-05, + "loss": 1.0785, "step": 8156 }, { - "epoch": 0.23146992054483542, + "epoch": 0.23114851653489757, "grad_norm": 0.0, - "learning_rate": 1.7945965664899076e-05, - "loss": 1.04, + "learning_rate": 1.7952073697261954e-05, + "loss": 1.0786, "step": 8157 }, { - "epoch": 0.2314982973893303, + "epoch": 0.23117685397716004, "grad_norm": 0.0, - "learning_rate": 1.794540762431222e-05, - "loss": 1.0296, + "learning_rate": 1.795151717366579e-05, + "loss": 0.9721, "step": 8158 }, { - "epoch": 0.2315266742338252, + "epoch": 0.23120519141942247, "grad_norm": 0.0, - "learning_rate": 1.7944849516610147e-05, - "loss": 0.8615, + "learning_rate": 1.7950960583091045e-05, + "loss": 1.051, "step": 8159 }, { - "epoch": 0.23155505107832008, + "epoch": 0.23123352886168494, "grad_norm": 0.0, - "learning_rate": 1.794429134179757e-05, - "loss": 1.1806, + "learning_rate": 1.79504039255424e-05, + "loss": 0.9236, "step": 8160 }, { - "epoch": 0.23158342792281497, + "epoch": 0.2312618663039474, "grad_norm": 0.0, - "learning_rate": 1.794373309987921e-05, - "loss": 1.1083, + "learning_rate": 1.7949847201024558e-05, + "loss": 1.0464, "step": 8161 }, { - "epoch": 0.2316118047673099, + "epoch": 0.23129020374620987, "grad_norm": 0.0, - "learning_rate": 1.794317479085978e-05, - "loss": 0.9639, + "learning_rate": 1.7949290409542196e-05, + "loss": 0.946, "step": 8162 }, { - "epoch": 0.23164018161180477, + "epoch": 0.23131854118847234, "grad_norm": 0.0, - "learning_rate": 1.794261641474399e-05, - "loss": 0.8312, + "learning_rate": 1.7948733551100012e-05, + "loss": 0.9413, "step": 8163 }, { - "epoch": 0.23166855845629966, + "epoch": 0.2313468786307348, "grad_norm": 0.0, - "learning_rate": 1.794205797153657e-05, - "loss": 0.9669, + "learning_rate": 1.7948176625702692e-05, + "loss": 1.0337, "step": 8164 }, { - "epoch": 0.23169693530079455, + "epoch": 0.23137521607299724, "grad_norm": 0.0, - "learning_rate": 1.7941499461242225e-05, - "loss": 0.9909, + "learning_rate": 1.794761963335493e-05, + "loss": 0.8673, "step": 8165 }, { - "epoch": 0.23172531214528944, + "epoch": 0.2314035535152597, "grad_norm": 0.0, - "learning_rate": 1.794094088386568e-05, - "loss": 0.9225, + "learning_rate": 1.7947062574061417e-05, + "loss": 1.0046, "step": 8166 }, { - "epoch": 0.23175368898978432, + "epoch": 0.23143189095752217, "grad_norm": 0.0, - "learning_rate": 1.7940382239411646e-05, - "loss": 1.0309, + "learning_rate": 1.7946505447826843e-05, + "loss": 1.0815, "step": 8167 }, { - "epoch": 0.23178206583427924, + "epoch": 0.23146022839978464, "grad_norm": 0.0, - "learning_rate": 1.7939823527884845e-05, - "loss": 1.0427, + "learning_rate": 1.7945948254655904e-05, + "loss": 0.9492, "step": 8168 }, { - "epoch": 0.23181044267877413, + "epoch": 0.2314885658420471, "grad_norm": 0.0, - "learning_rate": 1.7939264749290005e-05, - "loss": 0.9369, + "learning_rate": 1.794539099455329e-05, + "loss": 1.0931, "step": 8169 }, { - "epoch": 0.231838819523269, + "epoch": 0.23151690328430957, "grad_norm": 0.0, - "learning_rate": 1.7938705903631833e-05, - "loss": 0.9911, + "learning_rate": 1.79448336675237e-05, + "loss": 1.0366, "step": 8170 }, { - "epoch": 0.2318671963677639, + "epoch": 0.231545240726572, "grad_norm": 0.0, - "learning_rate": 1.7938146990915055e-05, - "loss": 0.9606, + "learning_rate": 1.7944276273571823e-05, + "loss": 1.0958, "step": 8171 }, { - "epoch": 0.2318955732122588, + "epoch": 0.23157357816883448, "grad_norm": 0.0, - "learning_rate": 1.79375880111444e-05, - "loss": 1.0794, + "learning_rate": 1.794371881270236e-05, + "loss": 1.0722, "step": 8172 }, { - "epoch": 0.2319239500567537, + "epoch": 0.23160191561109694, "grad_norm": 0.0, - "learning_rate": 1.7937028964324575e-05, - "loss": 1.0498, + "learning_rate": 1.794316128492e-05, + "loss": 0.9442, "step": 8173 }, { - "epoch": 0.2319523269012486, + "epoch": 0.2316302530533594, "grad_norm": 0.0, - "learning_rate": 1.793646985046031e-05, - "loss": 0.9568, + "learning_rate": 1.7942603690229447e-05, + "loss": 0.9256, "step": 8174 }, { - "epoch": 0.23198070374574348, + "epoch": 0.23165859049562187, "grad_norm": 0.0, - "learning_rate": 1.793591066955633e-05, - "loss": 1.0378, + "learning_rate": 1.794204602863539e-05, + "loss": 0.9798, "step": 8175 }, { - "epoch": 0.23200908059023836, + "epoch": 0.2316869279378843, "grad_norm": 0.0, - "learning_rate": 1.793535142161735e-05, - "loss": 0.9846, + "learning_rate": 1.7941488300142535e-05, + "loss": 0.9356, "step": 8176 }, { - "epoch": 0.23203745743473325, + "epoch": 0.23171526538014678, "grad_norm": 0.0, - "learning_rate": 1.793479210664811e-05, - "loss": 0.8977, + "learning_rate": 1.7940930504755568e-05, + "loss": 1.0337, "step": 8177 }, { - "epoch": 0.23206583427922814, + "epoch": 0.23174360282240924, "grad_norm": 0.0, - "learning_rate": 1.7934232724653318e-05, - "loss": 0.9235, + "learning_rate": 1.79403726424792e-05, + "loss": 1.0356, "step": 8178 }, { - "epoch": 0.23209421112372305, + "epoch": 0.2317719402646717, "grad_norm": 0.0, - "learning_rate": 1.7933673275637704e-05, - "loss": 0.9688, + "learning_rate": 1.7939814713318123e-05, + "loss": 1.0204, "step": 8179 }, { - "epoch": 0.23212258796821794, + "epoch": 0.23180027770693418, "grad_norm": 0.0, - "learning_rate": 1.7933113759606e-05, - "loss": 0.8817, + "learning_rate": 1.793925671727704e-05, + "loss": 1.0341, "step": 8180 }, { - "epoch": 0.23215096481271283, + "epoch": 0.23182861514919664, "grad_norm": 0.0, - "learning_rate": 1.7932554176562923e-05, - "loss": 0.9848, + "learning_rate": 1.7938698654360646e-05, + "loss": 0.906, "step": 8181 }, { - "epoch": 0.23217934165720772, + "epoch": 0.23185695259145908, "grad_norm": 0.0, - "learning_rate": 1.7931994526513207e-05, - "loss": 0.9849, + "learning_rate": 1.793814052457365e-05, + "loss": 0.9808, "step": 8182 }, { - "epoch": 0.2322077185017026, + "epoch": 0.23188529003372155, "grad_norm": 0.0, - "learning_rate": 1.7931434809461577e-05, - "loss": 1.0518, + "learning_rate": 1.7937582327920745e-05, + "loss": 0.9417, "step": 8183 }, { - "epoch": 0.2322360953461975, + "epoch": 0.231913627475984, "grad_norm": 0.0, - "learning_rate": 1.793087502541276e-05, - "loss": 0.8728, + "learning_rate": 1.7937024064406637e-05, + "loss": 1.002, "step": 8184 }, { - "epoch": 0.2322644721906924, + "epoch": 0.23194196491824648, "grad_norm": 0.0, - "learning_rate": 1.7930315174371484e-05, - "loss": 0.9105, + "learning_rate": 1.793646573403603e-05, + "loss": 0.9472, "step": 8185 }, { - "epoch": 0.2322928490351873, + "epoch": 0.23197030236050895, "grad_norm": 0.0, - "learning_rate": 1.792975525634248e-05, - "loss": 0.995, + "learning_rate": 1.793590733681362e-05, + "loss": 0.9945, "step": 8186 }, { - "epoch": 0.23232122587968218, + "epoch": 0.2319986398027714, "grad_norm": 0.0, - "learning_rate": 1.7929195271330475e-05, - "loss": 0.904, + "learning_rate": 1.793534887274412e-05, + "loss": 1.1028, "step": 8187 }, { - "epoch": 0.23234960272417707, + "epoch": 0.23202697724503385, "grad_norm": 0.0, - "learning_rate": 1.7928635219340204e-05, - "loss": 0.9222, + "learning_rate": 1.793479034183223e-05, + "loss": 0.9301, "step": 8188 }, { - "epoch": 0.23237797956867196, + "epoch": 0.23205531468729632, "grad_norm": 0.0, - "learning_rate": 1.7928075100376396e-05, - "loss": 1.0976, + "learning_rate": 1.7934231744082652e-05, + "loss": 1.1094, "step": 8189 }, { - "epoch": 0.23240635641316684, + "epoch": 0.23208365212955878, "grad_norm": 0.0, - "learning_rate": 1.792751491444378e-05, - "loss": 0.9435, + "learning_rate": 1.7933673079500097e-05, + "loss": 1.0428, "step": 8190 }, { - "epoch": 0.23243473325766176, + "epoch": 0.23211198957182125, "grad_norm": 0.0, - "learning_rate": 1.7926954661547084e-05, - "loss": 0.9745, + "learning_rate": 1.793311434808926e-05, + "loss": 1.2004, "step": 8191 }, { - "epoch": 0.23246311010215664, + "epoch": 0.2321403270140837, "grad_norm": 0.0, - "learning_rate": 1.792639434169105e-05, - "loss": 1.0427, + "learning_rate": 1.7932555549854862e-05, + "loss": 0.95, "step": 8192 }, { - "epoch": 0.23249148694665153, + "epoch": 0.23216866445634618, "grad_norm": 0.0, - "learning_rate": 1.7925833954880408e-05, - "loss": 1.0587, + "learning_rate": 1.7931996684801604e-05, + "loss": 0.9839, "step": 8193 }, { - "epoch": 0.23251986379114642, + "epoch": 0.23219700189860862, "grad_norm": 0.0, - "learning_rate": 1.792527350111989e-05, - "loss": 0.955, + "learning_rate": 1.7931437752934187e-05, + "loss": 0.97, "step": 8194 }, { - "epoch": 0.2325482406356413, + "epoch": 0.23222533934087108, "grad_norm": 0.0, - "learning_rate": 1.7924712980414227e-05, - "loss": 1.0511, + "learning_rate": 1.7930878754257328e-05, + "loss": 0.886, "step": 8195 }, { - "epoch": 0.23257661748013622, + "epoch": 0.23225367678313355, "grad_norm": 0.0, - "learning_rate": 1.792415239276816e-05, - "loss": 1.0132, + "learning_rate": 1.793031968877573e-05, + "loss": 0.9714, "step": 8196 }, { - "epoch": 0.2326049943246311, + "epoch": 0.23228201422539602, "grad_norm": 0.0, - "learning_rate": 1.7923591738186423e-05, - "loss": 0.9878, + "learning_rate": 1.7929760556494107e-05, + "loss": 0.8426, "step": 8197 }, { - "epoch": 0.232633371169126, + "epoch": 0.23231035166765848, "grad_norm": 0.0, - "learning_rate": 1.7923031016673748e-05, - "loss": 0.9768, + "learning_rate": 1.7929201357417164e-05, + "loss": 1.038, "step": 8198 }, { - "epoch": 0.23266174801362088, + "epoch": 0.23233868910992095, "grad_norm": 0.0, - "learning_rate": 1.7922470228234875e-05, - "loss": 0.9834, + "learning_rate": 1.7928642091549616e-05, + "loss": 1.0071, "step": 8199 }, { - "epoch": 0.23269012485811577, + "epoch": 0.2323670265521834, "grad_norm": 0.0, - "learning_rate": 1.792190937287454e-05, - "loss": 0.9578, + "learning_rate": 1.792808275889617e-05, + "loss": 0.9412, "step": 8200 }, { - "epoch": 0.23271850170261066, + "epoch": 0.23239536399444585, "grad_norm": 0.0, - "learning_rate": 1.7921348450597478e-05, - "loss": 1.0502, + "learning_rate": 1.792752335946154e-05, + "loss": 1.0456, "step": 8201 }, { - "epoch": 0.23274687854710557, + "epoch": 0.23242370143670832, "grad_norm": 0.0, - "learning_rate": 1.7920787461408432e-05, - "loss": 0.9534, + "learning_rate": 1.7926963893250434e-05, + "loss": 1.0511, "step": 8202 }, { - "epoch": 0.23277525539160046, + "epoch": 0.23245203887897078, "grad_norm": 0.0, - "learning_rate": 1.7920226405312136e-05, - "loss": 0.925, + "learning_rate": 1.7926404360267567e-05, + "loss": 1.0986, "step": 8203 }, { - "epoch": 0.23280363223609535, + "epoch": 0.23248037632123325, "grad_norm": 0.0, - "learning_rate": 1.7919665282313335e-05, - "loss": 1.0602, + "learning_rate": 1.7925844760517657e-05, + "loss": 0.8525, "step": 8204 }, { - "epoch": 0.23283200908059024, + "epoch": 0.23250871376349572, "grad_norm": 0.0, - "learning_rate": 1.791910409241676e-05, - "loss": 1.0189, + "learning_rate": 1.7925285094005412e-05, + "loss": 0.9513, "step": 8205 }, { - "epoch": 0.23286038592508512, + "epoch": 0.23253705120575816, "grad_norm": 0.0, - "learning_rate": 1.791854283562716e-05, - "loss": 0.9495, + "learning_rate": 1.7924725360735547e-05, + "loss": 0.9374, "step": 8206 }, { - "epoch": 0.23288876276958, + "epoch": 0.23256538864802062, "grad_norm": 0.0, - "learning_rate": 1.7917981511949273e-05, - "loss": 0.99, + "learning_rate": 1.7924165560712776e-05, + "loss": 1.0282, "step": 8207 }, { - "epoch": 0.23291713961407493, + "epoch": 0.2325937260902831, "grad_norm": 0.0, - "learning_rate": 1.791742012138784e-05, - "loss": 1.0888, + "learning_rate": 1.7923605693941818e-05, + "loss": 1.0059, "step": 8208 }, { - "epoch": 0.2329455164585698, + "epoch": 0.23262206353254555, "grad_norm": 0.0, - "learning_rate": 1.7916858663947602e-05, - "loss": 0.9669, + "learning_rate": 1.7923045760427387e-05, + "loss": 1.0399, "step": 8209 }, { - "epoch": 0.2329738933030647, + "epoch": 0.23265040097480802, "grad_norm": 0.0, - "learning_rate": 1.7916297139633305e-05, - "loss": 1.0545, + "learning_rate": 1.7922485760174197e-05, + "loss": 0.9394, "step": 8210 }, { - "epoch": 0.2330022701475596, + "epoch": 0.23267873841707049, "grad_norm": 0.0, - "learning_rate": 1.7915735548449687e-05, - "loss": 1.0537, + "learning_rate": 1.792192569318697e-05, + "loss": 0.8667, "step": 8211 }, { - "epoch": 0.23303064699205447, + "epoch": 0.23270707585933292, "grad_norm": 0.0, - "learning_rate": 1.7915173890401502e-05, - "loss": 1.0143, + "learning_rate": 1.792136555947042e-05, + "loss": 0.9454, "step": 8212 }, { - "epoch": 0.2330590238365494, + "epoch": 0.2327354133015954, "grad_norm": 0.0, - "learning_rate": 1.7914612165493483e-05, - "loss": 0.9899, + "learning_rate": 1.7920805359029267e-05, + "loss": 0.9552, "step": 8213 }, { - "epoch": 0.23308740068104428, + "epoch": 0.23276375074385786, "grad_norm": 0.0, - "learning_rate": 1.791405037373038e-05, - "loss": 1.0369, + "learning_rate": 1.792024509186823e-05, + "loss": 0.9088, "step": 8214 }, { - "epoch": 0.23311577752553916, + "epoch": 0.23279208818612032, "grad_norm": 0.0, - "learning_rate": 1.7913488515116935e-05, - "loss": 1.0925, + "learning_rate": 1.7919684757992027e-05, + "loss": 1.0208, "step": 8215 }, { - "epoch": 0.23314415437003405, + "epoch": 0.2328204256283828, "grad_norm": 0.0, - "learning_rate": 1.7912926589657897e-05, - "loss": 0.9898, + "learning_rate": 1.7919124357405374e-05, + "loss": 0.9677, "step": 8216 }, { - "epoch": 0.23317253121452894, + "epoch": 0.23284876307064525, "grad_norm": 0.0, - "learning_rate": 1.7912364597358015e-05, - "loss": 0.955, + "learning_rate": 1.7918563890113003e-05, + "loss": 0.9756, "step": 8217 }, { - "epoch": 0.23320090805902383, + "epoch": 0.2328771005129077, "grad_norm": 0.0, - "learning_rate": 1.791180253822203e-05, - "loss": 1.0034, + "learning_rate": 1.7918003356119622e-05, + "loss": 1.0596, "step": 8218 }, { - "epoch": 0.23322928490351874, + "epoch": 0.23290543795517016, "grad_norm": 0.0, - "learning_rate": 1.7911240412254698e-05, - "loss": 0.987, + "learning_rate": 1.791744275542996e-05, + "loss": 0.925, "step": 8219 }, { - "epoch": 0.23325766174801363, + "epoch": 0.23293377539743262, "grad_norm": 0.0, - "learning_rate": 1.791067821946076e-05, - "loss": 0.8772, + "learning_rate": 1.791688208804874e-05, + "loss": 1.0092, "step": 8220 }, { - "epoch": 0.23328603859250852, + "epoch": 0.2329621128396951, "grad_norm": 0.0, - "learning_rate": 1.7910115959844967e-05, - "loss": 0.8957, + "learning_rate": 1.791632135398068e-05, + "loss": 1.0729, "step": 8221 }, { - "epoch": 0.2333144154370034, + "epoch": 0.23299045028195756, "grad_norm": 0.0, - "learning_rate": 1.7909553633412068e-05, - "loss": 0.991, + "learning_rate": 1.791576055323051e-05, + "loss": 0.8632, "step": 8222 }, { - "epoch": 0.2333427922814983, + "epoch": 0.23301878772422002, "grad_norm": 0.0, - "learning_rate": 1.7908991240166817e-05, - "loss": 0.8822, + "learning_rate": 1.7915199685802944e-05, + "loss": 0.9654, "step": 8223 }, { - "epoch": 0.23337116912599318, + "epoch": 0.23304712516648246, "grad_norm": 0.0, - "learning_rate": 1.790842878011396e-05, - "loss": 0.9078, + "learning_rate": 1.7914638751702713e-05, + "loss": 1.0712, "step": 8224 }, { - "epoch": 0.2333995459704881, + "epoch": 0.23307546260874493, "grad_norm": 0.0, - "learning_rate": 1.790786625325825e-05, - "loss": 1.0603, + "learning_rate": 1.791407775093454e-05, + "loss": 1.0128, "step": 8225 }, { - "epoch": 0.23342792281498298, + "epoch": 0.2331038000510074, "grad_norm": 0.0, - "learning_rate": 1.790730365960444e-05, - "loss": 0.888, + "learning_rate": 1.7913516683503155e-05, + "loss": 0.9541, "step": 8226 }, { - "epoch": 0.23345629965947787, + "epoch": 0.23313213749326986, "grad_norm": 0.0, - "learning_rate": 1.7906740999157275e-05, - "loss": 1.101, + "learning_rate": 1.7912955549413274e-05, + "loss": 1.1517, "step": 8227 }, { - "epoch": 0.23348467650397275, + "epoch": 0.23316047493553232, "grad_norm": 0.0, - "learning_rate": 1.7906178271921518e-05, - "loss": 0.9059, + "learning_rate": 1.791239434866964e-05, + "loss": 0.9799, "step": 8228 }, { - "epoch": 0.23351305334846764, + "epoch": 0.2331888123777948, "grad_norm": 0.0, - "learning_rate": 1.7905615477901916e-05, - "loss": 0.9586, + "learning_rate": 1.7911833081276962e-05, + "loss": 1.0166, "step": 8229 }, { - "epoch": 0.23354143019296253, + "epoch": 0.23321714982005723, "grad_norm": 0.0, - "learning_rate": 1.7905052617103226e-05, - "loss": 0.9746, + "learning_rate": 1.7911271747239977e-05, + "loss": 0.9858, "step": 8230 }, { - "epoch": 0.23356980703745744, + "epoch": 0.2332454872623197, "grad_norm": 0.0, - "learning_rate": 1.79044896895302e-05, - "loss": 0.9798, + "learning_rate": 1.7910710346563417e-05, + "loss": 1.0447, "step": 8231 }, { - "epoch": 0.23359818388195233, + "epoch": 0.23327382470458216, "grad_norm": 0.0, - "learning_rate": 1.7903926695187595e-05, - "loss": 0.9923, + "learning_rate": 1.7910148879251998e-05, + "loss": 1.2229, "step": 8232 }, { - "epoch": 0.23362656072644722, + "epoch": 0.23330216214684463, "grad_norm": 0.0, - "learning_rate": 1.7903363634080168e-05, - "loss": 1.021, + "learning_rate": 1.7909587345310464e-05, + "loss": 0.943, "step": 8233 }, { - "epoch": 0.2336549375709421, + "epoch": 0.2333304995891071, "grad_norm": 0.0, - "learning_rate": 1.7902800506212667e-05, - "loss": 0.8841, + "learning_rate": 1.7909025744743537e-05, + "loss": 1.0169, "step": 8234 }, { - "epoch": 0.233683314415437, + "epoch": 0.23335883703136956, "grad_norm": 0.0, - "learning_rate": 1.790223731158986e-05, - "loss": 0.9562, + "learning_rate": 1.790846407755595e-05, + "loss": 1.017, "step": 8235 }, { - "epoch": 0.2337116912599319, + "epoch": 0.233387174473632, "grad_norm": 0.0, - "learning_rate": 1.7901674050216498e-05, - "loss": 0.9772, + "learning_rate": 1.7907902343752432e-05, + "loss": 1.039, "step": 8236 }, { - "epoch": 0.2337400681044268, + "epoch": 0.23341551191589446, "grad_norm": 0.0, - "learning_rate": 1.790111072209734e-05, - "loss": 1.0275, + "learning_rate": 1.7907340543337714e-05, + "loss": 1.063, "step": 8237 }, { - "epoch": 0.23376844494892168, + "epoch": 0.23344384935815693, "grad_norm": 0.0, - "learning_rate": 1.7900547327237143e-05, - "loss": 1.0379, + "learning_rate": 1.7906778676316536e-05, + "loss": 0.9122, "step": 8238 }, { - "epoch": 0.23379682179341657, + "epoch": 0.2334721868004194, "grad_norm": 0.0, - "learning_rate": 1.789998386564067e-05, - "loss": 0.9332, + "learning_rate": 1.790621674269362e-05, + "loss": 0.9619, "step": 8239 }, { - "epoch": 0.23382519863791146, + "epoch": 0.23350052424268186, "grad_norm": 0.0, - "learning_rate": 1.7899420337312675e-05, - "loss": 1.0139, + "learning_rate": 1.7905654742473707e-05, + "loss": 1.0486, "step": 8240 }, { - "epoch": 0.23385357548240635, + "epoch": 0.23352886168494433, "grad_norm": 0.0, - "learning_rate": 1.7898856742257925e-05, - "loss": 0.9749, + "learning_rate": 1.7905092675661526e-05, + "loss": 0.9819, "step": 8241 }, { - "epoch": 0.23388195232690126, + "epoch": 0.23355719912720677, "grad_norm": 0.0, - "learning_rate": 1.7898293080481176e-05, - "loss": 0.9821, + "learning_rate": 1.790453054226182e-05, + "loss": 1.043, "step": 8242 }, { - "epoch": 0.23391032917139615, + "epoch": 0.23358553656946923, "grad_norm": 0.0, - "learning_rate": 1.7897729351987186e-05, - "loss": 1.1137, + "learning_rate": 1.790396834227931e-05, + "loss": 1.0862, "step": 8243 }, { - "epoch": 0.23393870601589103, + "epoch": 0.2336138740117317, "grad_norm": 0.0, - "learning_rate": 1.7897165556780726e-05, - "loss": 1.0421, + "learning_rate": 1.7903406075718744e-05, + "loss": 0.9327, "step": 8244 }, { - "epoch": 0.23396708286038592, + "epoch": 0.23364221145399416, "grad_norm": 0.0, - "learning_rate": 1.7896601694866555e-05, - "loss": 1.0096, + "learning_rate": 1.7902843742584855e-05, + "loss": 0.8341, "step": 8245 }, { - "epoch": 0.2339954597048808, + "epoch": 0.23367054889625663, "grad_norm": 0.0, - "learning_rate": 1.789603776624943e-05, - "loss": 0.89, + "learning_rate": 1.7902281342882374e-05, + "loss": 1.1031, "step": 8246 }, { - "epoch": 0.2340238365493757, + "epoch": 0.2336988863385191, "grad_norm": 0.0, - "learning_rate": 1.789547377093412e-05, - "loss": 1.0445, + "learning_rate": 1.7901718876616048e-05, + "loss": 1.0213, "step": 8247 }, { - "epoch": 0.2340522133938706, + "epoch": 0.23372722378078153, "grad_norm": 0.0, - "learning_rate": 1.789490970892539e-05, - "loss": 0.8019, + "learning_rate": 1.7901156343790606e-05, + "loss": 0.8977, "step": 8248 }, { - "epoch": 0.2340805902383655, + "epoch": 0.233755561223044, "grad_norm": 0.0, - "learning_rate": 1.7894345580228e-05, - "loss": 1.1509, + "learning_rate": 1.790059374441079e-05, + "loss": 0.8993, "step": 8249 }, { - "epoch": 0.2341089670828604, + "epoch": 0.23378389866530647, "grad_norm": 0.0, - "learning_rate": 1.7893781384846722e-05, - "loss": 0.9966, + "learning_rate": 1.7900031078481343e-05, + "loss": 0.9408, "step": 8250 }, { - "epoch": 0.23413734392735527, + "epoch": 0.23381223610756893, "grad_norm": 0.0, - "learning_rate": 1.7893217122786316e-05, - "loss": 0.9659, + "learning_rate": 1.7899468346006995e-05, + "loss": 1.0297, "step": 8251 }, { - "epoch": 0.23416572077185016, + "epoch": 0.2338405735498314, "grad_norm": 0.0, - "learning_rate": 1.789265279405155e-05, - "loss": 1.0173, + "learning_rate": 1.7898905546992494e-05, + "loss": 0.8784, "step": 8252 }, { - "epoch": 0.23419409761634508, + "epoch": 0.23386891099209386, "grad_norm": 0.0, - "learning_rate": 1.789208839864719e-05, - "loss": 0.975, + "learning_rate": 1.789834268144258e-05, + "loss": 0.9341, "step": 8253 }, { - "epoch": 0.23422247446083996, + "epoch": 0.2338972484343563, "grad_norm": 0.0, - "learning_rate": 1.7891523936578004e-05, - "loss": 0.9995, + "learning_rate": 1.789777974936199e-05, + "loss": 1.0062, "step": 8254 }, { - "epoch": 0.23425085130533485, + "epoch": 0.23392558587661877, "grad_norm": 0.0, - "learning_rate": 1.7890959407848766e-05, - "loss": 0.9776, + "learning_rate": 1.789721675075547e-05, + "loss": 0.9545, "step": 8255 }, { - "epoch": 0.23427922814982974, + "epoch": 0.23395392331888124, "grad_norm": 0.0, - "learning_rate": 1.7890394812464233e-05, - "loss": 1.0209, + "learning_rate": 1.7896653685627762e-05, + "loss": 0.9163, "step": 8256 }, { - "epoch": 0.23430760499432463, + "epoch": 0.2339822607611437, "grad_norm": 0.0, - "learning_rate": 1.788983015042918e-05, - "loss": 0.9016, + "learning_rate": 1.7896090553983606e-05, + "loss": 1.0483, "step": 8257 }, { - "epoch": 0.2343359818388195, + "epoch": 0.23401059820340617, "grad_norm": 0.0, - "learning_rate": 1.788926542174838e-05, - "loss": 0.8873, + "learning_rate": 1.789552735582775e-05, + "loss": 1.0419, "step": 8258 }, { - "epoch": 0.23436435868331443, + "epoch": 0.23403893564566863, "grad_norm": 0.0, - "learning_rate": 1.78887006264266e-05, - "loss": 1.0692, + "learning_rate": 1.7894964091164932e-05, + "loss": 0.9552, "step": 8259 }, { - "epoch": 0.23439273552780931, + "epoch": 0.23406727308793107, "grad_norm": 0.0, - "learning_rate": 1.7888135764468612e-05, - "loss": 1.0322, + "learning_rate": 1.7894400759999898e-05, + "loss": 0.9401, "step": 8260 }, { - "epoch": 0.2344211123723042, + "epoch": 0.23409561053019354, "grad_norm": 0.0, - "learning_rate": 1.7887570835879183e-05, - "loss": 1.0194, + "learning_rate": 1.7893837362337397e-05, + "loss": 0.9994, "step": 8261 }, { - "epoch": 0.2344494892167991, + "epoch": 0.234123947972456, "grad_norm": 0.0, - "learning_rate": 1.788700584066309e-05, - "loss": 1.0526, + "learning_rate": 1.7893273898182177e-05, + "loss": 0.945, "step": 8262 }, { - "epoch": 0.23447786606129398, + "epoch": 0.23415228541471847, "grad_norm": 0.0, - "learning_rate": 1.788644077882511e-05, - "loss": 0.9475, + "learning_rate": 1.7892710367538973e-05, + "loss": 1.0057, "step": 8263 }, { - "epoch": 0.23450624290578886, + "epoch": 0.23418062285698094, "grad_norm": 0.0, - "learning_rate": 1.7885875650370002e-05, - "loss": 0.9207, + "learning_rate": 1.7892146770412543e-05, + "loss": 0.8438, "step": 8264 }, { - "epoch": 0.23453461975028378, + "epoch": 0.2342089602992434, "grad_norm": 0.0, - "learning_rate": 1.7885310455302554e-05, - "loss": 1.0329, + "learning_rate": 1.7891583106807626e-05, + "loss": 0.9108, "step": 8265 }, { - "epoch": 0.23456299659477867, + "epoch": 0.23423729774150584, "grad_norm": 0.0, - "learning_rate": 1.7884745193627532e-05, - "loss": 1.0857, + "learning_rate": 1.7891019376728976e-05, + "loss": 0.9562, "step": 8266 }, { - "epoch": 0.23459137343927355, + "epoch": 0.2342656351837683, "grad_norm": 0.0, - "learning_rate": 1.7884179865349713e-05, - "loss": 0.9591, + "learning_rate": 1.789045558018134e-05, + "loss": 1.1026, "step": 8267 }, { - "epoch": 0.23461975028376844, + "epoch": 0.23429397262603077, "grad_norm": 0.0, - "learning_rate": 1.788361447047387e-05, - "loss": 0.9046, + "learning_rate": 1.7889891717169466e-05, + "loss": 0.9608, "step": 8268 }, { - "epoch": 0.23464812712826333, + "epoch": 0.23432231006829324, "grad_norm": 0.0, - "learning_rate": 1.7883049009004782e-05, - "loss": 1.0711, + "learning_rate": 1.7889327787698105e-05, + "loss": 1.0464, "step": 8269 }, { - "epoch": 0.23467650397275822, + "epoch": 0.2343506475105557, "grad_norm": 0.0, - "learning_rate": 1.7882483480947226e-05, - "loss": 0.8507, + "learning_rate": 1.7888763791772006e-05, + "loss": 1.1124, "step": 8270 }, { - "epoch": 0.23470488081725313, + "epoch": 0.23437898495281817, "grad_norm": 0.0, - "learning_rate": 1.7881917886305978e-05, - "loss": 1.0934, + "learning_rate": 1.7888199729395924e-05, + "loss": 1.0165, "step": 8271 }, { - "epoch": 0.23473325766174802, + "epoch": 0.2344073223950806, "grad_norm": 0.0, - "learning_rate": 1.7881352225085812e-05, - "loss": 0.9877, + "learning_rate": 1.78876356005746e-05, + "loss": 0.9479, "step": 8272 }, { - "epoch": 0.2347616345062429, + "epoch": 0.23443565983734307, "grad_norm": 0.0, - "learning_rate": 1.7880786497291514e-05, - "loss": 1.075, + "learning_rate": 1.7887071405312798e-05, + "loss": 0.9062, "step": 8273 }, { - "epoch": 0.2347900113507378, + "epoch": 0.23446399727960554, "grad_norm": 0.0, - "learning_rate": 1.7880220702927855e-05, - "loss": 1.0016, + "learning_rate": 1.788650714361526e-05, + "loss": 0.8811, "step": 8274 }, { - "epoch": 0.23481838819523268, + "epoch": 0.234492334721868, "grad_norm": 0.0, - "learning_rate": 1.787965484199962e-05, - "loss": 0.9722, + "learning_rate": 1.7885942815486746e-05, + "loss": 1.0134, "step": 8275 }, { - "epoch": 0.2348467650397276, + "epoch": 0.23452067216413047, "grad_norm": 0.0, - "learning_rate": 1.7879088914511583e-05, - "loss": 0.918, + "learning_rate": 1.7885378420932006e-05, + "loss": 0.883, "step": 8276 }, { - "epoch": 0.23487514188422248, + "epoch": 0.23454900960639294, "grad_norm": 0.0, - "learning_rate": 1.787852292046853e-05, - "loss": 0.9811, + "learning_rate": 1.7884813959955796e-05, + "loss": 1.02, "step": 8277 }, { - "epoch": 0.23490351872871737, + "epoch": 0.23457734704865538, "grad_norm": 0.0, - "learning_rate": 1.7877956859875238e-05, - "loss": 0.9258, + "learning_rate": 1.788424943256287e-05, + "loss": 0.869, "step": 8278 }, { - "epoch": 0.23493189557321226, + "epoch": 0.23460568449091784, "grad_norm": 0.0, - "learning_rate": 1.7877390732736492e-05, - "loss": 1.0096, + "learning_rate": 1.7883684838757983e-05, + "loss": 0.9163, "step": 8279 }, { - "epoch": 0.23496027241770714, + "epoch": 0.2346340219331803, "grad_norm": 0.0, - "learning_rate": 1.787682453905707e-05, - "loss": 0.9001, + "learning_rate": 1.7883120178545895e-05, + "loss": 0.8725, "step": 8280 }, { - "epoch": 0.23498864926220203, + "epoch": 0.23466235937544278, "grad_norm": 0.0, - "learning_rate": 1.7876258278841764e-05, - "loss": 0.8944, + "learning_rate": 1.7882555451931353e-05, + "loss": 1.0162, "step": 8281 }, { - "epoch": 0.23501702610669695, + "epoch": 0.23469069681770524, "grad_norm": 0.0, - "learning_rate": 1.7875691952095344e-05, - "loss": 0.8878, + "learning_rate": 1.7881990658919122e-05, + "loss": 1.0114, "step": 8282 }, { - "epoch": 0.23504540295119183, + "epoch": 0.2347190342599677, "grad_norm": 0.0, - "learning_rate": 1.78751255588226e-05, - "loss": 1.0652, + "learning_rate": 1.7881425799513955e-05, + "loss": 0.9451, "step": 8283 }, { - "epoch": 0.23507377979568672, + "epoch": 0.23474737170223015, "grad_norm": 0.0, - "learning_rate": 1.787455909902832e-05, - "loss": 0.8293, + "learning_rate": 1.7880860873720615e-05, + "loss": 0.9896, "step": 8284 }, { - "epoch": 0.2351021566401816, + "epoch": 0.2347757091444926, "grad_norm": 0.0, - "learning_rate": 1.7873992572717282e-05, - "loss": 0.9232, + "learning_rate": 1.7880295881543856e-05, + "loss": 1.1351, "step": 8285 }, { - "epoch": 0.2351305334846765, + "epoch": 0.23480404658675508, "grad_norm": 0.0, - "learning_rate": 1.7873425979894276e-05, - "loss": 0.9506, + "learning_rate": 1.787973082298844e-05, + "loss": 0.9756, "step": 8286 }, { - "epoch": 0.23515891032917138, + "epoch": 0.23483238402901754, "grad_norm": 0.0, - "learning_rate": 1.7872859320564085e-05, - "loss": 0.93, + "learning_rate": 1.787916569805912e-05, + "loss": 0.92, "step": 8287 }, { - "epoch": 0.2351872871736663, + "epoch": 0.23486072147128, "grad_norm": 0.0, - "learning_rate": 1.78722925947315e-05, - "loss": 0.9409, + "learning_rate": 1.7878600506760665e-05, + "loss": 0.9412, "step": 8288 }, { - "epoch": 0.23521566401816119, + "epoch": 0.23488905891354248, "grad_norm": 0.0, - "learning_rate": 1.7871725802401303e-05, - "loss": 0.9148, + "learning_rate": 1.787803524909783e-05, + "loss": 0.9571, "step": 8289 }, { - "epoch": 0.23524404086265607, + "epoch": 0.2349173963558049, "grad_norm": 0.0, - "learning_rate": 1.787115894357829e-05, - "loss": 0.9531, + "learning_rate": 1.7877469925075383e-05, + "loss": 1.0575, "step": 8290 }, { - "epoch": 0.23527241770715096, + "epoch": 0.23494573379806738, "grad_norm": 0.0, - "learning_rate": 1.7870592018267237e-05, - "loss": 0.8171, + "learning_rate": 1.7876904534698082e-05, + "loss": 1.0167, "step": 8291 }, { - "epoch": 0.23530079455164585, + "epoch": 0.23497407124032985, "grad_norm": 0.0, - "learning_rate": 1.7870025026472942e-05, - "loss": 0.9694, + "learning_rate": 1.7876339077970684e-05, + "loss": 0.9666, "step": 8292 }, { - "epoch": 0.23532917139614076, + "epoch": 0.2350024086825923, "grad_norm": 0.0, - "learning_rate": 1.7869457968200192e-05, - "loss": 0.9008, + "learning_rate": 1.787577355489796e-05, + "loss": 0.9379, "step": 8293 }, { - "epoch": 0.23535754824063565, + "epoch": 0.23503074612485478, "grad_norm": 0.0, - "learning_rate": 1.7868890843453776e-05, - "loss": 1.0605, + "learning_rate": 1.787520796548467e-05, + "loss": 1.0452, "step": 8294 }, { - "epoch": 0.23538592508513054, + "epoch": 0.23505908356711724, "grad_norm": 0.0, - "learning_rate": 1.7868323652238486e-05, - "loss": 1.0262, + "learning_rate": 1.7874642309735576e-05, + "loss": 1.0709, "step": 8295 }, { - "epoch": 0.23541430192962542, + "epoch": 0.23508742100937968, "grad_norm": 0.0, - "learning_rate": 1.786775639455911e-05, - "loss": 0.9299, + "learning_rate": 1.787407658765545e-05, + "loss": 0.9528, "step": 8296 }, { - "epoch": 0.2354426787741203, + "epoch": 0.23511575845164215, "grad_norm": 0.0, - "learning_rate": 1.7867189070420446e-05, - "loss": 1.021, + "learning_rate": 1.7873510799249052e-05, + "loss": 0.9015, "step": 8297 }, { - "epoch": 0.2354710556186152, + "epoch": 0.23514409589390461, "grad_norm": 0.0, - "learning_rate": 1.786662167982728e-05, - "loss": 0.9982, + "learning_rate": 1.787294494452115e-05, + "loss": 0.9666, "step": 8298 }, { - "epoch": 0.23549943246311011, + "epoch": 0.23517243333616708, "grad_norm": 0.0, - "learning_rate": 1.786605422278441e-05, - "loss": 0.9451, + "learning_rate": 1.787237902347651e-05, + "loss": 1.119, "step": 8299 }, { - "epoch": 0.235527809307605, + "epoch": 0.23520077077842955, "grad_norm": 0.0, - "learning_rate": 1.7865486699296624e-05, - "loss": 0.9269, + "learning_rate": 1.7871813036119893e-05, + "loss": 1.0185, "step": 8300 }, { - "epoch": 0.2355561861520999, + "epoch": 0.235229108220692, "grad_norm": 0.0, - "learning_rate": 1.7864919109368716e-05, - "loss": 0.9669, + "learning_rate": 1.7871246982456075e-05, + "loss": 1.0765, "step": 8301 }, { - "epoch": 0.23558456299659478, + "epoch": 0.23525744566295445, "grad_norm": 0.0, - "learning_rate": 1.7864351453005486e-05, - "loss": 0.9821, + "learning_rate": 1.787068086248982e-05, + "loss": 0.9918, "step": 8302 }, { - "epoch": 0.23561293984108966, + "epoch": 0.23528578310521692, "grad_norm": 0.0, - "learning_rate": 1.7863783730211722e-05, - "loss": 0.99, + "learning_rate": 1.78701146762259e-05, + "loss": 0.8581, "step": 8303 }, { - "epoch": 0.23564131668558455, + "epoch": 0.23531412054747938, "grad_norm": 0.0, - "learning_rate": 1.7863215940992228e-05, - "loss": 0.9837, + "learning_rate": 1.7869548423669075e-05, + "loss": 0.9724, "step": 8304 }, { - "epoch": 0.23566969353007947, + "epoch": 0.23534245798974185, "grad_norm": 0.0, - "learning_rate": 1.7862648085351795e-05, - "loss": 0.8922, + "learning_rate": 1.7868982104824127e-05, + "loss": 0.981, "step": 8305 }, { - "epoch": 0.23569807037457435, + "epoch": 0.23537079543200431, "grad_norm": 0.0, - "learning_rate": 1.7862080163295218e-05, - "loss": 0.9986, + "learning_rate": 1.786841571969582e-05, + "loss": 1.0252, "step": 8306 }, { - "epoch": 0.23572644721906924, + "epoch": 0.23539913287426678, "grad_norm": 0.0, - "learning_rate": 1.7861512174827297e-05, - "loss": 0.9045, + "learning_rate": 1.7867849268288924e-05, + "loss": 1.0493, "step": 8307 }, { - "epoch": 0.23575482406356413, + "epoch": 0.23542747031652922, "grad_norm": 0.0, - "learning_rate": 1.786094411995283e-05, - "loss": 0.9594, + "learning_rate": 1.7867282750608212e-05, + "loss": 0.9641, "step": 8308 }, { - "epoch": 0.23578320090805902, + "epoch": 0.23545580775879169, "grad_norm": 0.0, - "learning_rate": 1.7860375998676613e-05, - "loss": 0.8879, + "learning_rate": 1.786671616665846e-05, + "loss": 1.0369, "step": 8309 }, { - "epoch": 0.2358115777525539, + "epoch": 0.23548414520105415, "grad_norm": 0.0, - "learning_rate": 1.7859807811003446e-05, - "loss": 0.9639, + "learning_rate": 1.786614951644443e-05, + "loss": 1.0096, "step": 8310 }, { - "epoch": 0.23583995459704882, + "epoch": 0.23551248264331662, "grad_norm": 0.0, - "learning_rate": 1.7859239556938132e-05, - "loss": 1.0112, + "learning_rate": 1.7865582799970904e-05, + "loss": 0.9304, "step": 8311 }, { - "epoch": 0.2358683314415437, + "epoch": 0.23554082008557908, "grad_norm": 0.0, - "learning_rate": 1.7858671236485466e-05, - "loss": 1.071, + "learning_rate": 1.7865016017242656e-05, + "loss": 1.044, "step": 8312 }, { - "epoch": 0.2358967082860386, + "epoch": 0.23556915752784155, "grad_norm": 0.0, - "learning_rate": 1.7858102849650252e-05, - "loss": 1.0746, + "learning_rate": 1.7864449168264456e-05, + "loss": 1.1052, "step": 8313 }, { - "epoch": 0.23592508513053348, + "epoch": 0.235597494970104, "grad_norm": 0.0, - "learning_rate": 1.7857534396437287e-05, - "loss": 0.9335, + "learning_rate": 1.786388225304108e-05, + "loss": 1.0865, "step": 8314 }, { - "epoch": 0.23595346197502837, + "epoch": 0.23562583241236645, "grad_norm": 0.0, - "learning_rate": 1.7856965876851383e-05, - "loss": 0.9516, + "learning_rate": 1.7863315271577303e-05, + "loss": 1.0193, "step": 8315 }, { - "epoch": 0.23598183881952328, + "epoch": 0.23565416985462892, "grad_norm": 0.0, - "learning_rate": 1.785639729089733e-05, - "loss": 0.9535, + "learning_rate": 1.78627482238779e-05, + "loss": 0.9313, "step": 8316 }, { - "epoch": 0.23601021566401817, + "epoch": 0.23568250729689139, "grad_norm": 0.0, - "learning_rate": 1.7855828638579934e-05, - "loss": 0.9885, + "learning_rate": 1.7862181109947653e-05, + "loss": 0.9684, "step": 8317 }, { - "epoch": 0.23603859250851306, + "epoch": 0.23571084473915385, "grad_norm": 0.0, - "learning_rate": 1.7855259919904004e-05, - "loss": 0.9565, + "learning_rate": 1.7861613929791333e-05, + "loss": 0.9531, "step": 8318 }, { - "epoch": 0.23606696935300794, + "epoch": 0.23573918218141632, "grad_norm": 0.0, - "learning_rate": 1.785469113487434e-05, - "loss": 0.9274, + "learning_rate": 1.7861046683413717e-05, + "loss": 1.1447, "step": 8319 }, { - "epoch": 0.23609534619750283, + "epoch": 0.23576751962367876, "grad_norm": 0.0, - "learning_rate": 1.7854122283495747e-05, - "loss": 0.9391, + "learning_rate": 1.7860479370819588e-05, + "loss": 0.9361, "step": 8320 }, { - "epoch": 0.23612372304199772, + "epoch": 0.23579585706594122, "grad_norm": 0.0, - "learning_rate": 1.7853553365773027e-05, - "loss": 0.9514, + "learning_rate": 1.7859911992013724e-05, + "loss": 1.111, "step": 8321 }, { - "epoch": 0.23615209988649263, + "epoch": 0.2358241945082037, "grad_norm": 0.0, - "learning_rate": 1.785298438171099e-05, - "loss": 0.934, + "learning_rate": 1.7859344547000898e-05, + "loss": 0.9464, "step": 8322 }, { - "epoch": 0.23618047673098752, + "epoch": 0.23585253195046615, "grad_norm": 0.0, - "learning_rate": 1.785241533131444e-05, - "loss": 0.9064, + "learning_rate": 1.7858777035785898e-05, + "loss": 0.9535, "step": 8323 }, { - "epoch": 0.2362088535754824, + "epoch": 0.23588086939272862, "grad_norm": 0.0, - "learning_rate": 1.7851846214588187e-05, - "loss": 0.8847, + "learning_rate": 1.78582094583735e-05, + "loss": 1.0449, "step": 8324 }, { - "epoch": 0.2362372304199773, + "epoch": 0.2359092068349911, "grad_norm": 0.0, - "learning_rate": 1.7851277031537036e-05, - "loss": 1.0292, + "learning_rate": 1.785764181476849e-05, + "loss": 0.9711, "step": 8325 }, { - "epoch": 0.23626560726447218, + "epoch": 0.23593754427725352, "grad_norm": 0.0, - "learning_rate": 1.7850707782165796e-05, - "loss": 1.0054, + "learning_rate": 1.7857074104975637e-05, + "loss": 0.9051, "step": 8326 }, { - "epoch": 0.23629398410896707, + "epoch": 0.235965881719516, "grad_norm": 0.0, - "learning_rate": 1.785013846647927e-05, - "loss": 0.9852, + "learning_rate": 1.7856506328999734e-05, + "loss": 1.009, "step": 8327 }, { - "epoch": 0.23632236095346199, + "epoch": 0.23599421916177846, "grad_norm": 0.0, - "learning_rate": 1.7849569084482275e-05, - "loss": 1.0027, + "learning_rate": 1.7855938486845563e-05, + "loss": 1.0649, "step": 8328 }, { - "epoch": 0.23635073779795687, + "epoch": 0.23602255660404092, "grad_norm": 0.0, - "learning_rate": 1.7848999636179615e-05, - "loss": 0.9527, + "learning_rate": 1.7855370578517902e-05, + "loss": 1.0751, "step": 8329 }, { - "epoch": 0.23637911464245176, + "epoch": 0.2360508940463034, "grad_norm": 0.0, - "learning_rate": 1.7848430121576105e-05, - "loss": 0.9433, + "learning_rate": 1.785480260402154e-05, + "loss": 1.0606, "step": 8330 }, { - "epoch": 0.23640749148694665, + "epoch": 0.23607923148856585, "grad_norm": 0.0, - "learning_rate": 1.784786054067655e-05, - "loss": 0.9986, + "learning_rate": 1.785423456336126e-05, + "loss": 0.9769, "step": 8331 }, { - "epoch": 0.23643586833144153, + "epoch": 0.2361075689308283, "grad_norm": 0.0, - "learning_rate": 1.7847290893485766e-05, - "loss": 0.9616, + "learning_rate": 1.7853666456541843e-05, + "loss": 0.8819, "step": 8332 }, { - "epoch": 0.23646424517593645, + "epoch": 0.23613590637309076, "grad_norm": 0.0, - "learning_rate": 1.7846721180008563e-05, - "loss": 0.9374, + "learning_rate": 1.785309828356808e-05, + "loss": 1.0499, "step": 8333 }, { - "epoch": 0.23649262202043134, + "epoch": 0.23616424381535323, "grad_norm": 0.0, - "learning_rate": 1.7846151400249754e-05, - "loss": 1.0554, + "learning_rate": 1.785253004444475e-05, + "loss": 0.8816, "step": 8334 }, { - "epoch": 0.23652099886492622, + "epoch": 0.2361925812576157, "grad_norm": 0.0, - "learning_rate": 1.784558155421415e-05, - "loss": 1.0493, + "learning_rate": 1.7851961739176645e-05, + "loss": 0.9909, "step": 8335 }, { - "epoch": 0.2365493757094211, + "epoch": 0.23622091869987816, "grad_norm": 0.0, - "learning_rate": 1.7845011641906565e-05, - "loss": 0.957, + "learning_rate": 1.785139336776855e-05, + "loss": 0.8975, "step": 8336 }, { - "epoch": 0.236577752553916, + "epoch": 0.23624925614214062, "grad_norm": 0.0, - "learning_rate": 1.7844441663331817e-05, - "loss": 1.009, + "learning_rate": 1.7850824930225255e-05, + "loss": 1.014, "step": 8337 }, { - "epoch": 0.23660612939841089, + "epoch": 0.23627759358440306, "grad_norm": 0.0, - "learning_rate": 1.7843871618494714e-05, - "loss": 1.0489, + "learning_rate": 1.7850256426551546e-05, + "loss": 1.1055, "step": 8338 }, { - "epoch": 0.2366345062429058, + "epoch": 0.23630593102666553, "grad_norm": 0.0, - "learning_rate": 1.784330150740008e-05, - "loss": 0.8914, + "learning_rate": 1.784968785675221e-05, + "loss": 0.982, "step": 8339 }, { - "epoch": 0.2366628830874007, + "epoch": 0.236334268468928, "grad_norm": 0.0, - "learning_rate": 1.784273133005272e-05, - "loss": 1.1019, + "learning_rate": 1.7849119220832037e-05, + "loss": 0.9158, "step": 8340 }, { - "epoch": 0.23669125993189558, + "epoch": 0.23636260591119046, "grad_norm": 0.0, - "learning_rate": 1.784216108645746e-05, - "loss": 1.0304, + "learning_rate": 1.7848550518795826e-05, + "loss": 1.0353, "step": 8341 }, { - "epoch": 0.23671963677639046, + "epoch": 0.23639094335345293, "grad_norm": 0.0, - "learning_rate": 1.784159077661911e-05, - "loss": 0.9526, + "learning_rate": 1.784798175064835e-05, + "loss": 1.1361, "step": 8342 }, { - "epoch": 0.23674801362088535, + "epoch": 0.2364192807957154, "grad_norm": 0.0, - "learning_rate": 1.7841020400542493e-05, - "loss": 1.0461, + "learning_rate": 1.7847412916394416e-05, + "loss": 0.8591, "step": 8343 }, { - "epoch": 0.23677639046538024, + "epoch": 0.23644761823797783, "grad_norm": 0.0, - "learning_rate": 1.784044995823242e-05, - "loss": 0.9564, + "learning_rate": 1.7846844016038803e-05, + "loss": 0.9946, "step": 8344 }, { - "epoch": 0.23680476730987515, + "epoch": 0.2364759556802403, "grad_norm": 0.0, - "learning_rate": 1.7839879449693716e-05, - "loss": 0.9371, + "learning_rate": 1.7846275049586316e-05, + "loss": 0.9064, "step": 8345 }, { - "epoch": 0.23683314415437004, + "epoch": 0.23650429312250276, "grad_norm": 0.0, - "learning_rate": 1.78393088749312e-05, - "loss": 1.0595, + "learning_rate": 1.7845706017041734e-05, + "loss": 0.9806, "step": 8346 }, { - "epoch": 0.23686152099886493, + "epoch": 0.23653263056476523, "grad_norm": 0.0, - "learning_rate": 1.783873823394969e-05, - "loss": 0.9156, + "learning_rate": 1.784513691840986e-05, + "loss": 0.963, "step": 8347 }, { - "epoch": 0.23688989784335981, + "epoch": 0.2365609680070277, "grad_norm": 0.0, - "learning_rate": 1.7838167526754002e-05, - "loss": 1.0165, + "learning_rate": 1.7844567753695485e-05, + "loss": 0.9277, "step": 8348 }, { - "epoch": 0.2369182746878547, + "epoch": 0.23658930544929016, "grad_norm": 0.0, - "learning_rate": 1.783759675334896e-05, - "loss": 1.0051, + "learning_rate": 1.78439985229034e-05, + "loss": 0.9946, "step": 8349 }, { - "epoch": 0.2369466515323496, + "epoch": 0.2366176428915526, "grad_norm": 0.0, - "learning_rate": 1.783702591373939e-05, - "loss": 1.0204, + "learning_rate": 1.7843429226038408e-05, + "loss": 0.9135, "step": 8350 }, { - "epoch": 0.2369750283768445, + "epoch": 0.23664598033381506, "grad_norm": 0.0, - "learning_rate": 1.7836455007930107e-05, - "loss": 0.99, + "learning_rate": 1.7842859863105295e-05, + "loss": 1.0477, "step": 8351 }, { - "epoch": 0.2370034052213394, + "epoch": 0.23667431777607753, "grad_norm": 0.0, - "learning_rate": 1.7835884035925936e-05, - "loss": 1.0292, + "learning_rate": 1.784229043410886e-05, + "loss": 0.8697, "step": 8352 }, { - "epoch": 0.23703178206583428, + "epoch": 0.23670265521834, "grad_norm": 0.0, - "learning_rate": 1.7835312997731704e-05, - "loss": 1.0076, + "learning_rate": 1.7841720939053902e-05, + "loss": 1.0806, "step": 8353 }, { - "epoch": 0.23706015891032917, + "epoch": 0.23673099266060246, "grad_norm": 0.0, - "learning_rate": 1.783474189335223e-05, - "loss": 1.1458, + "learning_rate": 1.784115137794522e-05, + "loss": 1.0602, "step": 8354 }, { - "epoch": 0.23708853575482405, + "epoch": 0.23675933010286493, "grad_norm": 0.0, - "learning_rate": 1.7834170722792334e-05, - "loss": 1.0803, + "learning_rate": 1.7840581750787603e-05, + "loss": 0.9949, "step": 8355 }, { - "epoch": 0.23711691259931897, + "epoch": 0.23678766754512737, "grad_norm": 0.0, - "learning_rate": 1.783359948605685e-05, - "loss": 0.8764, + "learning_rate": 1.7840012057585858e-05, + "loss": 1.0033, "step": 8356 }, { - "epoch": 0.23714528944381386, + "epoch": 0.23681600498738983, "grad_norm": 0.0, - "learning_rate": 1.7833028183150597e-05, - "loss": 0.9552, + "learning_rate": 1.783944229834478e-05, + "loss": 1.0223, "step": 8357 }, { - "epoch": 0.23717366628830874, + "epoch": 0.2368443424296523, "grad_norm": 0.0, - "learning_rate": 1.7832456814078404e-05, - "loss": 0.9186, + "learning_rate": 1.7838872473069164e-05, + "loss": 1.0331, "step": 8358 }, { - "epoch": 0.23720204313280363, + "epoch": 0.23687267987191477, "grad_norm": 0.0, - "learning_rate": 1.7831885378845095e-05, - "loss": 0.9794, + "learning_rate": 1.7838302581763818e-05, + "loss": 0.9786, "step": 8359 }, { - "epoch": 0.23723041997729852, + "epoch": 0.23690101731417723, "grad_norm": 0.0, - "learning_rate": 1.78313138774555e-05, - "loss": 0.8585, + "learning_rate": 1.783773262443354e-05, + "loss": 0.904, "step": 8360 }, { - "epoch": 0.2372587968217934, + "epoch": 0.2369293547564397, "grad_norm": 0.0, - "learning_rate": 1.7830742309914444e-05, - "loss": 0.9966, + "learning_rate": 1.7837162601083123e-05, + "loss": 1.0348, "step": 8361 }, { - "epoch": 0.23728717366628832, + "epoch": 0.23695769219870214, "grad_norm": 0.0, - "learning_rate": 1.7830170676226757e-05, - "loss": 1.0054, + "learning_rate": 1.7836592511717384e-05, + "loss": 0.9586, "step": 8362 }, { - "epoch": 0.2373155505107832, + "epoch": 0.2369860296409646, "grad_norm": 0.0, - "learning_rate": 1.782959897639726e-05, - "loss": 0.9966, + "learning_rate": 1.7836022356341113e-05, + "loss": 0.986, "step": 8363 }, { - "epoch": 0.2373439273552781, + "epoch": 0.23701436708322707, "grad_norm": 0.0, - "learning_rate": 1.7829027210430796e-05, - "loss": 0.9228, + "learning_rate": 1.7835452134959112e-05, + "loss": 0.8822, "step": 8364 }, { - "epoch": 0.23737230419977298, + "epoch": 0.23704270452548953, "grad_norm": 0.0, - "learning_rate": 1.7828455378332186e-05, - "loss": 1.0394, + "learning_rate": 1.7834881847576192e-05, + "loss": 1.1043, "step": 8365 }, { - "epoch": 0.23740068104426787, + "epoch": 0.237071041967752, "grad_norm": 0.0, - "learning_rate": 1.782788348010626e-05, - "loss": 1.0972, + "learning_rate": 1.783431149419715e-05, + "loss": 1.0421, "step": 8366 }, { - "epoch": 0.23742905788876276, + "epoch": 0.23709937941001447, "grad_norm": 0.0, - "learning_rate": 1.782731151575785e-05, - "loss": 1.0331, + "learning_rate": 1.7833741074826796e-05, + "loss": 1.0664, "step": 8367 }, { - "epoch": 0.23745743473325767, + "epoch": 0.2371277168522769, "grad_norm": 0.0, - "learning_rate": 1.782673948529179e-05, - "loss": 1.0696, + "learning_rate": 1.7833170589469932e-05, + "loss": 1.126, "step": 8368 }, { - "epoch": 0.23748581157775256, + "epoch": 0.23715605429453937, "grad_norm": 0.0, - "learning_rate": 1.7826167388712905e-05, - "loss": 1.0023, + "learning_rate": 1.783260003813136e-05, + "loss": 0.9725, "step": 8369 }, { - "epoch": 0.23751418842224745, + "epoch": 0.23718439173680184, "grad_norm": 0.0, - "learning_rate": 1.7825595226026036e-05, - "loss": 0.9653, + "learning_rate": 1.783202942081589e-05, + "loss": 1.1045, "step": 8370 }, { - "epoch": 0.23754256526674233, + "epoch": 0.2372127291790643, "grad_norm": 0.0, - "learning_rate": 1.782502299723601e-05, - "loss": 0.9427, + "learning_rate": 1.783145873752833e-05, + "loss": 0.9345, "step": 8371 }, { - "epoch": 0.23757094211123722, + "epoch": 0.23724106662132677, "grad_norm": 0.0, - "learning_rate": 1.7824450702347663e-05, - "loss": 1.1085, + "learning_rate": 1.7830887988273486e-05, + "loss": 1.0374, "step": 8372 }, { - "epoch": 0.23759931895573214, + "epoch": 0.23726940406358923, "grad_norm": 0.0, - "learning_rate": 1.7823878341365835e-05, - "loss": 1.0368, + "learning_rate": 1.783031717305616e-05, + "loss": 1.069, "step": 8373 }, { - "epoch": 0.23762769580022702, + "epoch": 0.23729774150585167, "grad_norm": 0.0, - "learning_rate": 1.782330591429535e-05, - "loss": 0.99, + "learning_rate": 1.782974629188117e-05, + "loss": 0.9906, "step": 8374 }, { - "epoch": 0.2376560726447219, + "epoch": 0.23732607894811414, "grad_norm": 0.0, - "learning_rate": 1.7822733421141046e-05, - "loss": 0.9345, + "learning_rate": 1.7829175344753316e-05, + "loss": 1.0271, "step": 8375 }, { - "epoch": 0.2376844494892168, + "epoch": 0.2373544163903766, "grad_norm": 0.0, - "learning_rate": 1.7822160861907765e-05, - "loss": 1.0132, + "learning_rate": 1.7828604331677412e-05, + "loss": 0.9522, "step": 8376 }, { - "epoch": 0.23771282633371169, + "epoch": 0.23738275383263907, "grad_norm": 0.0, - "learning_rate": 1.7821588236600337e-05, - "loss": 0.9051, + "learning_rate": 1.782803325265827e-05, + "loss": 0.9594, "step": 8377 }, { - "epoch": 0.23774120317820657, + "epoch": 0.23741109127490154, "grad_norm": 0.0, - "learning_rate": 1.7821015545223604e-05, - "loss": 1.0371, + "learning_rate": 1.782746210770069e-05, + "loss": 1.06, "step": 8378 }, { - "epoch": 0.2377695800227015, + "epoch": 0.23743942871716398, "grad_norm": 0.0, - "learning_rate": 1.78204427877824e-05, - "loss": 0.9925, + "learning_rate": 1.7826890896809492e-05, + "loss": 1.0408, "step": 8379 }, { - "epoch": 0.23779795686719638, + "epoch": 0.23746776615942644, "grad_norm": 0.0, - "learning_rate": 1.7819869964281566e-05, - "loss": 0.8797, + "learning_rate": 1.7826319619989487e-05, + "loss": 0.983, "step": 8380 }, { - "epoch": 0.23782633371169126, + "epoch": 0.2374961036016889, "grad_norm": 0.0, - "learning_rate": 1.7819297074725937e-05, - "loss": 0.9899, + "learning_rate": 1.7825748277245484e-05, + "loss": 0.9722, "step": 8381 }, { - "epoch": 0.23785471055618615, + "epoch": 0.23752444104395137, "grad_norm": 0.0, - "learning_rate": 1.7818724119120352e-05, - "loss": 1.0592, + "learning_rate": 1.78251768685823e-05, + "loss": 1.0854, "step": 8382 }, { - "epoch": 0.23788308740068104, + "epoch": 0.23755277848621384, "grad_norm": 0.0, - "learning_rate": 1.7818151097469658e-05, - "loss": 0.8995, + "learning_rate": 1.7824605394004747e-05, + "loss": 0.9867, "step": 8383 }, { - "epoch": 0.23791146424517592, + "epoch": 0.2375811159284763, "grad_norm": 0.0, - "learning_rate": 1.7817578009778687e-05, - "loss": 0.8972, + "learning_rate": 1.782403385351763e-05, + "loss": 0.975, "step": 8384 }, { - "epoch": 0.23793984108967084, + "epoch": 0.23760945337073874, "grad_norm": 0.0, - "learning_rate": 1.7817004856052286e-05, - "loss": 1.0263, + "learning_rate": 1.7823462247125775e-05, + "loss": 1.0342, "step": 8385 }, { - "epoch": 0.23796821793416573, + "epoch": 0.2376377908130012, "grad_norm": 0.0, - "learning_rate": 1.781643163629529e-05, - "loss": 0.9336, + "learning_rate": 1.7822890574833995e-05, + "loss": 0.9708, "step": 8386 }, { - "epoch": 0.23799659477866061, + "epoch": 0.23766612825526368, "grad_norm": 0.0, - "learning_rate": 1.7815858350512542e-05, - "loss": 1.0015, + "learning_rate": 1.78223188366471e-05, + "loss": 1.0991, "step": 8387 }, { - "epoch": 0.2380249716231555, + "epoch": 0.23769446569752614, "grad_norm": 0.0, - "learning_rate": 1.7815284998708892e-05, - "loss": 0.9092, + "learning_rate": 1.782174703256991e-05, + "loss": 0.9411, "step": 8388 }, { - "epoch": 0.2380533484676504, + "epoch": 0.2377228031397886, "grad_norm": 0.0, - "learning_rate": 1.781471158088918e-05, - "loss": 0.8797, + "learning_rate": 1.7821175162607235e-05, + "loss": 0.9646, "step": 8389 }, { - "epoch": 0.23808172531214528, + "epoch": 0.23775114058205107, "grad_norm": 0.0, - "learning_rate": 1.7814138097058246e-05, - "loss": 0.9889, + "learning_rate": 1.78206032267639e-05, + "loss": 0.9053, "step": 8390 }, { - "epoch": 0.2381101021566402, + "epoch": 0.2377794780243135, "grad_norm": 0.0, - "learning_rate": 1.7813564547220933e-05, + "learning_rate": 1.782003122504472e-05, "loss": 1.0023, "step": 8391 }, { - "epoch": 0.23813847900113508, + "epoch": 0.23780781546657598, "grad_norm": 0.0, - "learning_rate": 1.781299093138209e-05, - "loss": 1.0002, + "learning_rate": 1.7819459157454516e-05, + "loss": 1.056, "step": 8392 }, { - "epoch": 0.23816685584562997, + "epoch": 0.23783615290883844, "grad_norm": 0.0, - "learning_rate": 1.7812417249546564e-05, - "loss": 1.0243, + "learning_rate": 1.7818887023998104e-05, + "loss": 0.8832, "step": 8393 }, { - "epoch": 0.23819523269012485, + "epoch": 0.2378644903511009, "grad_norm": 0.0, - "learning_rate": 1.7811843501719196e-05, - "loss": 1.0035, + "learning_rate": 1.78183148246803e-05, + "loss": 0.9017, "step": 8394 }, { - "epoch": 0.23822360953461974, + "epoch": 0.23789282779336338, "grad_norm": 0.0, - "learning_rate": 1.7811269687904837e-05, - "loss": 1.0035, + "learning_rate": 1.7817742559505928e-05, + "loss": 1.0028, "step": 8395 }, { - "epoch": 0.23825198637911466, + "epoch": 0.23792116523562584, "grad_norm": 0.0, - "learning_rate": 1.7810695808108333e-05, - "loss": 1.06, + "learning_rate": 1.7817170228479806e-05, + "loss": 0.857, "step": 8396 }, { - "epoch": 0.23828036322360954, + "epoch": 0.23794950267788828, "grad_norm": 0.0, - "learning_rate": 1.7810121862334528e-05, - "loss": 0.9734, + "learning_rate": 1.781659783160676e-05, + "loss": 0.9653, "step": 8397 }, { - "epoch": 0.23830874006810443, + "epoch": 0.23797784012015075, "grad_norm": 0.0, - "learning_rate": 1.780954785058827e-05, - "loss": 1.016, + "learning_rate": 1.7816025368891602e-05, + "loss": 0.9881, "step": 8398 }, { - "epoch": 0.23833711691259932, + "epoch": 0.2380061775624132, "grad_norm": 0.0, - "learning_rate": 1.7808973772874414e-05, - "loss": 1.0068, + "learning_rate": 1.7815452840339166e-05, + "loss": 0.9345, "step": 8399 }, { - "epoch": 0.2383654937570942, + "epoch": 0.23803451500467568, "grad_norm": 0.0, - "learning_rate": 1.7808399629197802e-05, - "loss": 1.0271, + "learning_rate": 1.7814880245954268e-05, + "loss": 1.0118, "step": 8400 }, { - "epoch": 0.2383938706015891, + "epoch": 0.23806285244693814, "grad_norm": 0.0, - "learning_rate": 1.780782541956329e-05, - "loss": 0.9351, + "learning_rate": 1.7814307585741727e-05, + "loss": 1.0621, "step": 8401 }, { - "epoch": 0.238422247446084, + "epoch": 0.2380911898892006, "grad_norm": 0.0, - "learning_rate": 1.7807251143975727e-05, - "loss": 0.9377, + "learning_rate": 1.7813734859706374e-05, + "loss": 0.9944, "step": 8402 }, { - "epoch": 0.2384506242905789, + "epoch": 0.23811952733146305, "grad_norm": 0.0, - "learning_rate": 1.780667680243996e-05, - "loss": 1.0398, + "learning_rate": 1.781316206785303e-05, + "loss": 0.9233, "step": 8403 }, { - "epoch": 0.23847900113507378, + "epoch": 0.23814786477372551, "grad_norm": 0.0, - "learning_rate": 1.7806102394960846e-05, - "loss": 1.1302, + "learning_rate": 1.7812589210186523e-05, + "loss": 1.0082, "step": 8404 }, { - "epoch": 0.23850737797956867, + "epoch": 0.23817620221598798, "grad_norm": 0.0, - "learning_rate": 1.7805527921543232e-05, - "loss": 1.0442, + "learning_rate": 1.7812016286711673e-05, + "loss": 0.9737, "step": 8405 }, { - "epoch": 0.23853575482406356, + "epoch": 0.23820453965825045, "grad_norm": 0.0, - "learning_rate": 1.7804953382191974e-05, - "loss": 0.9541, + "learning_rate": 1.781144329743331e-05, + "loss": 1.0428, "step": 8406 }, { - "epoch": 0.23856413166855844, + "epoch": 0.2382328771005129, "grad_norm": 0.0, - "learning_rate": 1.7804378776911923e-05, - "loss": 0.9564, + "learning_rate": 1.781087024235626e-05, + "loss": 0.9621, "step": 8407 }, { - "epoch": 0.23859250851305336, + "epoch": 0.23826121454277538, "grad_norm": 0.0, - "learning_rate": 1.7803804105707935e-05, - "loss": 0.8895, + "learning_rate": 1.7810297121485348e-05, + "loss": 0.9503, "step": 8408 }, { - "epoch": 0.23862088535754825, + "epoch": 0.23828955198503782, "grad_norm": 0.0, - "learning_rate": 1.780322936858486e-05, - "loss": 1.0505, + "learning_rate": 1.7809723934825405e-05, + "loss": 1.063, "step": 8409 }, { - "epoch": 0.23864926220204313, + "epoch": 0.23831788942730028, "grad_norm": 0.0, - "learning_rate": 1.7802654565547557e-05, - "loss": 0.9769, + "learning_rate": 1.7809150682381257e-05, + "loss": 0.9659, "step": 8410 }, { - "epoch": 0.23867763904653802, + "epoch": 0.23834622686956275, "grad_norm": 0.0, - "learning_rate": 1.780207969660088e-05, - "loss": 0.9046, + "learning_rate": 1.780857736415773e-05, + "loss": 0.8936, "step": 8411 }, { - "epoch": 0.2387060158910329, + "epoch": 0.23837456431182522, "grad_norm": 0.0, - "learning_rate": 1.7801504761749686e-05, - "loss": 0.8781, + "learning_rate": 1.780800398015966e-05, + "loss": 1.0154, "step": 8412 }, { - "epoch": 0.23873439273552782, + "epoch": 0.23840290175408768, "grad_norm": 0.0, - "learning_rate": 1.7800929760998832e-05, - "loss": 1.0064, + "learning_rate": 1.7807430530391873e-05, + "loss": 1.0468, "step": 8413 }, { - "epoch": 0.2387627695800227, + "epoch": 0.23843123919635015, "grad_norm": 0.0, - "learning_rate": 1.780035469435317e-05, - "loss": 1.0484, + "learning_rate": 1.7806857014859197e-05, + "loss": 1.0001, "step": 8414 }, { - "epoch": 0.2387911464245176, + "epoch": 0.23845957663861259, "grad_norm": 0.0, - "learning_rate": 1.779977956181756e-05, - "loss": 0.9202, + "learning_rate": 1.7806283433566465e-05, + "loss": 1.0287, "step": 8415 }, { - "epoch": 0.23881952326901248, + "epoch": 0.23848791408087505, "grad_norm": 0.0, - "learning_rate": 1.779920436339687e-05, - "loss": 0.8359, + "learning_rate": 1.7805709786518514e-05, + "loss": 0.9757, "step": 8416 }, { - "epoch": 0.23884790011350737, + "epoch": 0.23851625152313752, "grad_norm": 0.0, - "learning_rate": 1.779862909909594e-05, - "loss": 0.9492, + "learning_rate": 1.7805136073720163e-05, + "loss": 0.9996, "step": 8417 }, { - "epoch": 0.23887627695800226, + "epoch": 0.23854458896539998, "grad_norm": 0.0, - "learning_rate": 1.7798053768919646e-05, - "loss": 1.0414, + "learning_rate": 1.780456229517626e-05, + "loss": 1.0164, "step": 8418 }, { - "epoch": 0.23890465380249717, + "epoch": 0.23857292640766245, "grad_norm": 0.0, - "learning_rate": 1.7797478372872838e-05, - "loss": 1.054, + "learning_rate": 1.7803988450891628e-05, + "loss": 0.9864, "step": 8419 }, { - "epoch": 0.23893303064699206, + "epoch": 0.23860126384992492, "grad_norm": 0.0, - "learning_rate": 1.779690291096038e-05, - "loss": 1.0424, + "learning_rate": 1.7803414540871097e-05, + "loss": 1.0715, "step": 8420 }, { - "epoch": 0.23896140749148695, + "epoch": 0.23862960129218735, "grad_norm": 0.0, - "learning_rate": 1.7796327383187136e-05, - "loss": 0.9677, + "learning_rate": 1.7802840565119516e-05, + "loss": 0.9182, "step": 8421 }, { - "epoch": 0.23898978433598184, + "epoch": 0.23865793873444982, "grad_norm": 0.0, - "learning_rate": 1.779575178955796e-05, - "loss": 1.0249, + "learning_rate": 1.780226652364171e-05, + "loss": 1.0188, "step": 8422 }, { - "epoch": 0.23901816118047672, + "epoch": 0.2386862761767123, "grad_norm": 0.0, - "learning_rate": 1.779517613007772e-05, - "loss": 1.0133, + "learning_rate": 1.7801692416442513e-05, + "loss": 0.9341, "step": 8423 }, { - "epoch": 0.2390465380249716, + "epoch": 0.23871461361897475, "grad_norm": 0.0, - "learning_rate": 1.7794600404751276e-05, - "loss": 1.0045, + "learning_rate": 1.7801118243526764e-05, + "loss": 1.0284, "step": 8424 }, { - "epoch": 0.23907491486946653, + "epoch": 0.23874295106123722, "grad_norm": 0.0, - "learning_rate": 1.7794024613583493e-05, - "loss": 1.0774, + "learning_rate": 1.78005440048993e-05, + "loss": 1.0472, "step": 8425 }, { - "epoch": 0.2391032917139614, + "epoch": 0.23877128850349968, "grad_norm": 0.0, - "learning_rate": 1.7793448756579233e-05, - "loss": 1.0076, + "learning_rate": 1.779996970056496e-05, + "loss": 0.9517, "step": 8426 }, { - "epoch": 0.2391316685584563, + "epoch": 0.23879962594576212, "grad_norm": 0.0, - "learning_rate": 1.7792872833743363e-05, - "loss": 0.8775, + "learning_rate": 1.7799395330528574e-05, + "loss": 1.0388, "step": 8427 }, { - "epoch": 0.2391600454029512, + "epoch": 0.2388279633880246, "grad_norm": 0.0, - "learning_rate": 1.7792296845080744e-05, - "loss": 0.945, + "learning_rate": 1.7798820894794988e-05, + "loss": 1.1252, "step": 8428 }, { - "epoch": 0.23918842224744608, + "epoch": 0.23885630083028705, "grad_norm": 0.0, - "learning_rate": 1.7791720790596242e-05, - "loss": 1.0206, + "learning_rate": 1.7798246393369037e-05, + "loss": 0.9787, "step": 8429 }, { - "epoch": 0.23921679909194096, + "epoch": 0.23888463827254952, "grad_norm": 0.0, - "learning_rate": 1.7791144670294725e-05, - "loss": 0.9323, + "learning_rate": 1.779767182625556e-05, + "loss": 1.0391, "step": 8430 }, { - "epoch": 0.23924517593643588, + "epoch": 0.238912975714812, "grad_norm": 0.0, - "learning_rate": 1.7790568484181063e-05, - "loss": 1.0195, + "learning_rate": 1.77970971934594e-05, + "loss": 1.0352, "step": 8431 }, { - "epoch": 0.23927355278093076, + "epoch": 0.23894131315707445, "grad_norm": 0.0, - "learning_rate": 1.7789992232260113e-05, - "loss": 1.0212, + "learning_rate": 1.779652249498539e-05, + "loss": 1.0209, "step": 8432 }, { - "epoch": 0.23930192962542565, + "epoch": 0.2389696505993369, "grad_norm": 0.0, - "learning_rate": 1.778941591453675e-05, - "loss": 0.9639, + "learning_rate": 1.779594773083838e-05, + "loss": 1.0121, "step": 8433 }, { - "epoch": 0.23933030646992054, + "epoch": 0.23899798804159936, "grad_norm": 0.0, - "learning_rate": 1.7788839531015846e-05, - "loss": 0.9628, + "learning_rate": 1.7795372901023206e-05, + "loss": 1.0439, "step": 8434 }, { - "epoch": 0.23935868331441543, + "epoch": 0.23902632548386182, "grad_norm": 0.0, - "learning_rate": 1.7788263081702256e-05, - "loss": 0.8031, + "learning_rate": 1.779479800554471e-05, + "loss": 1.08, "step": 8435 }, { - "epoch": 0.23938706015891034, + "epoch": 0.2390546629261243, "grad_norm": 0.0, - "learning_rate": 1.778768656660086e-05, - "loss": 0.8933, + "learning_rate": 1.7794223044407738e-05, + "loss": 1.0444, "step": 8436 }, { - "epoch": 0.23941543700340523, + "epoch": 0.23908300036838676, "grad_norm": 0.0, - "learning_rate": 1.778710998571653e-05, - "loss": 1.0819, + "learning_rate": 1.779364801761713e-05, + "loss": 0.9682, "step": 8437 }, { - "epoch": 0.23944381384790012, + "epoch": 0.23911133781064922, "grad_norm": 0.0, - "learning_rate": 1.7786533339054126e-05, - "loss": 1.0255, + "learning_rate": 1.779307292517773e-05, + "loss": 1.0084, "step": 8438 }, { - "epoch": 0.239472190692395, + "epoch": 0.23913967525291166, "grad_norm": 0.0, - "learning_rate": 1.7785956626618527e-05, - "loss": 1.0064, + "learning_rate": 1.7792497767094384e-05, + "loss": 0.9576, "step": 8439 }, { - "epoch": 0.2395005675368899, + "epoch": 0.23916801269517413, "grad_norm": 0.0, - "learning_rate": 1.7785379848414607e-05, - "loss": 0.9346, + "learning_rate": 1.7791922543371936e-05, + "loss": 1.0398, "step": 8440 }, { - "epoch": 0.23952894438138478, + "epoch": 0.2391963501374366, "grad_norm": 0.0, - "learning_rate": 1.7784803004447226e-05, - "loss": 1.1198, + "learning_rate": 1.779134725401523e-05, + "loss": 1.0549, "step": 8441 }, { - "epoch": 0.2395573212258797, + "epoch": 0.23922468757969906, "grad_norm": 0.0, - "learning_rate": 1.7784226094721268e-05, - "loss": 0.933, + "learning_rate": 1.7790771899029115e-05, + "loss": 0.9805, "step": 8442 }, { - "epoch": 0.23958569807037458, + "epoch": 0.23925302502196152, "grad_norm": 0.0, - "learning_rate": 1.7783649119241603e-05, - "loss": 0.936, + "learning_rate": 1.7790196478418432e-05, + "loss": 0.9811, "step": 8443 }, { - "epoch": 0.23961407491486947, + "epoch": 0.239281362464224, "grad_norm": 0.0, - "learning_rate": 1.7783072078013103e-05, - "loss": 0.9595, + "learning_rate": 1.7789620992188033e-05, + "loss": 1.0995, "step": 8444 }, { - "epoch": 0.23964245175936436, + "epoch": 0.23930969990648643, "grad_norm": 0.0, - "learning_rate": 1.778249497104064e-05, - "loss": 0.9498, + "learning_rate": 1.778904544034276e-05, + "loss": 1.0571, "step": 8445 }, { - "epoch": 0.23967082860385924, + "epoch": 0.2393380373487489, "grad_norm": 0.0, - "learning_rate": 1.7781917798329095e-05, - "loss": 1.0234, + "learning_rate": 1.778846982288747e-05, + "loss": 1.0824, "step": 8446 }, { - "epoch": 0.23969920544835413, + "epoch": 0.23936637479101136, "grad_norm": 0.0, - "learning_rate": 1.778134055988334e-05, - "loss": 1.0554, + "learning_rate": 1.7787894139827006e-05, + "loss": 1.0711, "step": 8447 }, { - "epoch": 0.23972758229284905, + "epoch": 0.23939471223327383, "grad_norm": 0.0, - "learning_rate": 1.7780763255708252e-05, - "loss": 0.9862, + "learning_rate": 1.7787318391166216e-05, + "loss": 1.04, "step": 8448 }, { - "epoch": 0.23975595913734393, + "epoch": 0.2394230496755363, "grad_norm": 0.0, - "learning_rate": 1.7780185885808708e-05, - "loss": 0.9772, + "learning_rate": 1.7786742576909955e-05, + "loss": 1.0898, "step": 8449 }, { - "epoch": 0.23978433598183882, + "epoch": 0.23945138711779876, "grad_norm": 0.0, - "learning_rate": 1.7779608450189582e-05, - "loss": 1.016, + "learning_rate": 1.7786166697063067e-05, + "loss": 0.9697, "step": 8450 }, { - "epoch": 0.2398127128263337, + "epoch": 0.2394797245600612, "grad_norm": 0.0, - "learning_rate": 1.7779030948855756e-05, - "loss": 0.8566, + "learning_rate": 1.7785590751630404e-05, + "loss": 0.9435, "step": 8451 }, { - "epoch": 0.2398410896708286, + "epoch": 0.23950806200232366, "grad_norm": 0.0, - "learning_rate": 1.7778453381812102e-05, - "loss": 0.8473, + "learning_rate": 1.778501474061682e-05, + "loss": 1.072, "step": 8452 }, { - "epoch": 0.2398694665153235, + "epoch": 0.23953639944458613, "grad_norm": 0.0, - "learning_rate": 1.777787574906351e-05, - "loss": 0.9327, + "learning_rate": 1.7784438664027165e-05, + "loss": 0.9433, "step": 8453 }, { - "epoch": 0.2398978433598184, + "epoch": 0.2395647368868486, "grad_norm": 0.0, - "learning_rate": 1.7777298050614845e-05, - "loss": 0.9819, + "learning_rate": 1.7783862521866296e-05, + "loss": 1.0605, "step": 8454 }, { - "epoch": 0.23992622020431328, + "epoch": 0.23959307432911106, "grad_norm": 0.0, - "learning_rate": 1.7776720286470998e-05, - "loss": 0.9824, + "learning_rate": 1.778328631413906e-05, + "loss": 0.8719, "step": 8455 }, { - "epoch": 0.23995459704880817, + "epoch": 0.23962141177137353, "grad_norm": 0.0, - "learning_rate": 1.7776142456636843e-05, - "loss": 1.1274, + "learning_rate": 1.7782710040850314e-05, + "loss": 1.0058, "step": 8456 }, { - "epoch": 0.23998297389330306, + "epoch": 0.23964974921363597, "grad_norm": 0.0, - "learning_rate": 1.7775564561117265e-05, - "loss": 0.9524, + "learning_rate": 1.778213370200491e-05, + "loss": 1.013, "step": 8457 }, { - "epoch": 0.24001135073779795, + "epoch": 0.23967808665589843, "grad_norm": 0.0, - "learning_rate": 1.777498659991714e-05, - "loss": 1.0365, + "learning_rate": 1.7781557297607704e-05, + "loss": 1.0011, "step": 8458 }, { - "epoch": 0.24003972758229286, + "epoch": 0.2397064240981609, "grad_norm": 0.0, - "learning_rate": 1.7774408573041356e-05, - "loss": 0.9536, + "learning_rate": 1.7780980827663553e-05, + "loss": 0.889, "step": 8459 }, { - "epoch": 0.24006810442678775, + "epoch": 0.23973476154042336, "grad_norm": 0.0, - "learning_rate": 1.7773830480494792e-05, - "loss": 1.0342, + "learning_rate": 1.7780404292177308e-05, + "loss": 1.0471, "step": 8460 }, { - "epoch": 0.24009648127128264, + "epoch": 0.23976309898268583, "grad_norm": 0.0, - "learning_rate": 1.7773252322282335e-05, - "loss": 1.0269, + "learning_rate": 1.7779827691153832e-05, + "loss": 1.0064, "step": 8461 }, { - "epoch": 0.24012485811577752, + "epoch": 0.2397914364249483, "grad_norm": 0.0, - "learning_rate": 1.7772674098408863e-05, - "loss": 0.9932, + "learning_rate": 1.7779251024597976e-05, + "loss": 1.002, "step": 8462 }, { - "epoch": 0.2401532349602724, + "epoch": 0.23981977386721073, "grad_norm": 0.0, - "learning_rate": 1.7772095808879265e-05, - "loss": 0.8202, + "learning_rate": 1.77786742925146e-05, + "loss": 0.987, "step": 8463 }, { - "epoch": 0.2401816118047673, + "epoch": 0.2398481113094732, "grad_norm": 0.0, - "learning_rate": 1.7771517453698423e-05, - "loss": 0.976, + "learning_rate": 1.7778097494908564e-05, + "loss": 0.9337, "step": 8464 }, { - "epoch": 0.2402099886492622, + "epoch": 0.23987644875173567, "grad_norm": 0.0, - "learning_rate": 1.7770939032871227e-05, - "loss": 0.9784, + "learning_rate": 1.7777520631784723e-05, + "loss": 0.998, "step": 8465 }, { - "epoch": 0.2402383654937571, + "epoch": 0.23990478619399813, "grad_norm": 0.0, - "learning_rate": 1.777036054640256e-05, - "loss": 1.0186, + "learning_rate": 1.777694370314794e-05, + "loss": 0.9602, "step": 8466 }, { - "epoch": 0.240266742338252, + "epoch": 0.2399331236362606, "grad_norm": 0.0, - "learning_rate": 1.7769781994297305e-05, - "loss": 1.0792, + "learning_rate": 1.777636670900307e-05, + "loss": 0.9739, "step": 8467 }, { - "epoch": 0.24029511918274687, + "epoch": 0.23996146107852306, "grad_norm": 0.0, - "learning_rate": 1.7769203376560353e-05, - "loss": 0.9953, + "learning_rate": 1.7775789649354973e-05, + "loss": 0.9774, "step": 8468 }, { - "epoch": 0.24032349602724176, + "epoch": 0.2399897985207855, "grad_norm": 0.0, - "learning_rate": 1.776862469319659e-05, - "loss": 1.1203, + "learning_rate": 1.7775212524208513e-05, + "loss": 1.0417, "step": 8469 }, { - "epoch": 0.24035187287173665, + "epoch": 0.24001813596304797, "grad_norm": 0.0, - "learning_rate": 1.7768045944210906e-05, - "loss": 1.1005, + "learning_rate": 1.7774635333568554e-05, + "loss": 0.957, "step": 8470 }, { - "epoch": 0.24038024971623156, + "epoch": 0.24004647340531043, "grad_norm": 0.0, - "learning_rate": 1.7767467129608186e-05, - "loss": 0.928, + "learning_rate": 1.777405807743995e-05, + "loss": 1.0032, "step": 8471 }, { - "epoch": 0.24040862656072645, + "epoch": 0.2400748108475729, "grad_norm": 0.0, - "learning_rate": 1.7766888249393325e-05, - "loss": 1.0504, + "learning_rate": 1.7773480755827574e-05, + "loss": 0.8873, "step": 8472 }, { - "epoch": 0.24043700340522134, + "epoch": 0.24010314828983537, "grad_norm": 0.0, - "learning_rate": 1.7766309303571212e-05, - "loss": 0.9954, + "learning_rate": 1.777290336873628e-05, + "loss": 1.0007, "step": 8473 }, { - "epoch": 0.24046538024971623, + "epoch": 0.24013148573209783, "grad_norm": 0.0, - "learning_rate": 1.776573029214673e-05, - "loss": 0.9778, + "learning_rate": 1.7772325916170935e-05, + "loss": 1.0312, "step": 8474 }, { - "epoch": 0.2404937570942111, + "epoch": 0.24015982317436027, "grad_norm": 0.0, - "learning_rate": 1.776515121512478e-05, - "loss": 1.1823, + "learning_rate": 1.77717483981364e-05, + "loss": 0.989, "step": 8475 }, { - "epoch": 0.24052213393870603, + "epoch": 0.24018816061662274, "grad_norm": 0.0, - "learning_rate": 1.7764572072510248e-05, - "loss": 0.9761, + "learning_rate": 1.7771170814637547e-05, + "loss": 1.0535, "step": 8476 }, { - "epoch": 0.24055051078320092, + "epoch": 0.2402164980588852, "grad_norm": 0.0, - "learning_rate": 1.7763992864308024e-05, - "loss": 0.9767, + "learning_rate": 1.7770593165679234e-05, + "loss": 1.1198, "step": 8477 }, { - "epoch": 0.2405788876276958, + "epoch": 0.24024483550114767, "grad_norm": 0.0, - "learning_rate": 1.7763413590523004e-05, - "loss": 1.056, + "learning_rate": 1.777001545126633e-05, + "loss": 1.1423, "step": 8478 }, { - "epoch": 0.2406072644721907, + "epoch": 0.24027317294341013, "grad_norm": 0.0, - "learning_rate": 1.776283425116008e-05, - "loss": 1.0311, + "learning_rate": 1.77694376714037e-05, + "loss": 0.9464, "step": 8479 }, { - "epoch": 0.24063564131668558, + "epoch": 0.2403015103856726, "grad_norm": 0.0, - "learning_rate": 1.7762254846224144e-05, - "loss": 0.9532, + "learning_rate": 1.776885982609621e-05, + "loss": 1.0001, "step": 8480 }, { - "epoch": 0.24066401816118047, + "epoch": 0.24032984782793504, "grad_norm": 0.0, - "learning_rate": 1.7761675375720096e-05, - "loss": 0.9496, + "learning_rate": 1.776828191534873e-05, + "loss": 1.0189, "step": 8481 }, { - "epoch": 0.24069239500567538, + "epoch": 0.2403581852701975, "grad_norm": 0.0, - "learning_rate": 1.776109583965283e-05, - "loss": 0.9189, + "learning_rate": 1.7767703939166124e-05, + "loss": 0.9586, "step": 8482 }, { - "epoch": 0.24072077185017027, + "epoch": 0.24038652271245997, "grad_norm": 0.0, - "learning_rate": 1.7760516238027233e-05, - "loss": 1.136, + "learning_rate": 1.7767125897553268e-05, + "loss": 1.0146, "step": 8483 }, { - "epoch": 0.24074914869466515, + "epoch": 0.24041486015472244, "grad_norm": 0.0, - "learning_rate": 1.775993657084821e-05, - "loss": 0.9478, + "learning_rate": 1.776654779051502e-05, + "loss": 1.0353, "step": 8484 }, { - "epoch": 0.24077752553916004, + "epoch": 0.2404431975969849, "grad_norm": 0.0, - "learning_rate": 1.775935683812065e-05, - "loss": 0.888, + "learning_rate": 1.7765969618056266e-05, + "loss": 0.8729, "step": 8485 }, { - "epoch": 0.24080590238365493, + "epoch": 0.24047153503924737, "grad_norm": 0.0, - "learning_rate": 1.775877703984946e-05, - "loss": 0.9818, + "learning_rate": 1.7765391380181858e-05, + "loss": 0.9601, "step": 8486 }, { - "epoch": 0.24083427922814982, + "epoch": 0.2404998724815098, "grad_norm": 0.0, - "learning_rate": 1.7758197176039526e-05, - "loss": 0.9786, + "learning_rate": 1.7764813076896675e-05, + "loss": 0.9453, "step": 8487 }, { - "epoch": 0.24086265607264473, + "epoch": 0.24052820992377227, "grad_norm": 0.0, - "learning_rate": 1.7757617246695755e-05, - "loss": 0.8965, + "learning_rate": 1.7764234708205594e-05, + "loss": 1.0399, "step": 8488 }, { - "epoch": 0.24089103291713962, + "epoch": 0.24055654736603474, "grad_norm": 0.0, - "learning_rate": 1.775703725182304e-05, - "loss": 0.8493, + "learning_rate": 1.7763656274113476e-05, + "loss": 0.9921, "step": 8489 }, { - "epoch": 0.2409194097616345, + "epoch": 0.2405848848082972, "grad_norm": 0.0, - "learning_rate": 1.7756457191426282e-05, - "loss": 0.8683, + "learning_rate": 1.77630777746252e-05, + "loss": 1.0141, "step": 8490 }, { - "epoch": 0.2409477866061294, + "epoch": 0.24061322225055967, "grad_norm": 0.0, - "learning_rate": 1.7755877065510386e-05, - "loss": 1.0242, + "learning_rate": 1.7762499209745634e-05, + "loss": 1.0374, "step": 8491 }, { - "epoch": 0.24097616345062428, + "epoch": 0.24064155969282214, "grad_norm": 0.0, - "learning_rate": 1.7755296874080242e-05, - "loss": 0.9791, + "learning_rate": 1.7761920579479656e-05, + "loss": 0.9672, "step": 8492 }, { - "epoch": 0.2410045402951192, + "epoch": 0.24066989713508458, "grad_norm": 0.0, - "learning_rate": 1.775471661714076e-05, - "loss": 0.9512, + "learning_rate": 1.776134188383214e-05, + "loss": 0.9636, "step": 8493 }, { - "epoch": 0.24103291713961408, + "epoch": 0.24069823457734704, "grad_norm": 0.0, - "learning_rate": 1.775413629469684e-05, - "loss": 1.0071, + "learning_rate": 1.776076312280796e-05, + "loss": 0.9019, "step": 8494 }, { - "epoch": 0.24106129398410897, + "epoch": 0.2407265720196095, "grad_norm": 0.0, - "learning_rate": 1.775355590675338e-05, - "loss": 0.9753, + "learning_rate": 1.776018429641199e-05, + "loss": 1.0197, "step": 8495 }, { - "epoch": 0.24108967082860386, + "epoch": 0.24075490946187197, "grad_norm": 0.0, - "learning_rate": 1.775297545331528e-05, - "loss": 0.9821, + "learning_rate": 1.77596054046491e-05, + "loss": 1.0933, "step": 8496 }, { - "epoch": 0.24111804767309875, + "epoch": 0.24078324690413444, "grad_norm": 0.0, - "learning_rate": 1.7752394934387455e-05, - "loss": 0.963, + "learning_rate": 1.775902644752418e-05, + "loss": 0.997, "step": 8497 }, { - "epoch": 0.24114642451759363, + "epoch": 0.2408115843463969, "grad_norm": 0.0, - "learning_rate": 1.77518143499748e-05, - "loss": 0.9975, + "learning_rate": 1.7758447425042096e-05, + "loss": 0.9894, "step": 8498 }, { - "epoch": 0.24117480136208855, + "epoch": 0.24083992178865934, "grad_norm": 0.0, - "learning_rate": 1.775123370008222e-05, - "loss": 1.0464, + "learning_rate": 1.775786833720773e-05, + "loss": 0.8733, "step": 8499 }, { - "epoch": 0.24120317820658344, + "epoch": 0.2408682592309218, "grad_norm": 0.0, - "learning_rate": 1.7750652984714617e-05, - "loss": 0.9218, + "learning_rate": 1.7757289184025958e-05, + "loss": 1.0574, "step": 8500 }, { - "epoch": 0.24123155505107832, + "epoch": 0.24089659667318428, "grad_norm": 0.0, - "learning_rate": 1.7750072203876905e-05, - "loss": 0.9599, + "learning_rate": 1.775670996550166e-05, + "loss": 1.0641, "step": 8501 }, { - "epoch": 0.2412599318955732, + "epoch": 0.24092493411544674, "grad_norm": 0.0, - "learning_rate": 1.774949135757398e-05, - "loss": 0.8682, + "learning_rate": 1.7756130681639708e-05, + "loss": 0.9955, "step": 8502 }, { - "epoch": 0.2412883087400681, + "epoch": 0.2409532715577092, "grad_norm": 0.0, - "learning_rate": 1.7748910445810756e-05, - "loss": 1.0491, + "learning_rate": 1.7755551332444988e-05, + "loss": 0.996, "step": 8503 }, { - "epoch": 0.24131668558456298, + "epoch": 0.24098160899997167, "grad_norm": 0.0, - "learning_rate": 1.774832946859214e-05, - "loss": 0.9524, + "learning_rate": 1.7754971917922384e-05, + "loss": 1.0101, "step": 8504 }, { - "epoch": 0.2413450624290579, + "epoch": 0.2410099464422341, "grad_norm": 0.0, - "learning_rate": 1.774774842592303e-05, - "loss": 1.0581, + "learning_rate": 1.775439243807677e-05, + "loss": 1.0351, "step": 8505 }, { - "epoch": 0.2413734392735528, + "epoch": 0.24103828388449658, "grad_norm": 0.0, - "learning_rate": 1.7747167317808346e-05, - "loss": 0.9272, + "learning_rate": 1.7753812892913024e-05, + "loss": 0.9104, "step": 8506 }, { - "epoch": 0.24140181611804767, + "epoch": 0.24106662132675905, "grad_norm": 0.0, - "learning_rate": 1.774658614425299e-05, - "loss": 1.0197, + "learning_rate": 1.7753233282436036e-05, + "loss": 0.963, "step": 8507 }, { - "epoch": 0.24143019296254256, + "epoch": 0.2410949587690215, "grad_norm": 0.0, - "learning_rate": 1.774600490526187e-05, - "loss": 0.9078, + "learning_rate": 1.7752653606650687e-05, + "loss": 0.9737, "step": 8508 }, { - "epoch": 0.24145856980703745, + "epoch": 0.24112329621128398, "grad_norm": 0.0, - "learning_rate": 1.77454236008399e-05, - "loss": 0.9566, + "learning_rate": 1.775207386556186e-05, + "loss": 0.9826, "step": 8509 }, { - "epoch": 0.24148694665153234, + "epoch": 0.24115163365354644, "grad_norm": 0.0, - "learning_rate": 1.774484223099199e-05, - "loss": 1.0909, + "learning_rate": 1.775149405917443e-05, + "loss": 0.9476, "step": 8510 }, { - "epoch": 0.24151532349602725, + "epoch": 0.24117997109580888, "grad_norm": 0.0, - "learning_rate": 1.774426079572305e-05, - "loss": 0.9745, + "learning_rate": 1.775091418749329e-05, + "loss": 0.962, "step": 8511 }, { - "epoch": 0.24154370034052214, + "epoch": 0.24120830853807135, "grad_norm": 0.0, - "learning_rate": 1.774367929503799e-05, - "loss": 1.02, + "learning_rate": 1.775033425052332e-05, + "loss": 1.0023, "step": 8512 }, { - "epoch": 0.24157207718501703, + "epoch": 0.2412366459803338, "grad_norm": 0.0, - "learning_rate": 1.7743097728941723e-05, - "loss": 0.9829, + "learning_rate": 1.7749754248269407e-05, + "loss": 1.0866, "step": 8513 }, { - "epoch": 0.2416004540295119, + "epoch": 0.24126498342259628, "grad_norm": 0.0, - "learning_rate": 1.774251609743916e-05, - "loss": 0.9391, + "learning_rate": 1.7749174180736443e-05, + "loss": 1.0349, "step": 8514 }, { - "epoch": 0.2416288308740068, + "epoch": 0.24129332086485875, "grad_norm": 0.0, - "learning_rate": 1.7741934400535216e-05, - "loss": 0.928, + "learning_rate": 1.7748594047929297e-05, + "loss": 1.0058, "step": 8515 }, { - "epoch": 0.24165720771850172, + "epoch": 0.2413216583071212, "grad_norm": 0.0, - "learning_rate": 1.7741352638234807e-05, - "loss": 1.0232, + "learning_rate": 1.774801384985287e-05, + "loss": 1.012, "step": 8516 }, { - "epoch": 0.2416855845629966, + "epoch": 0.24134999574938365, "grad_norm": 0.0, - "learning_rate": 1.774077081054284e-05, - "loss": 1.025, + "learning_rate": 1.774743358651205e-05, + "loss": 0.9968, "step": 8517 }, { - "epoch": 0.2417139614074915, + "epoch": 0.24137833319164612, "grad_norm": 0.0, - "learning_rate": 1.774018891746424e-05, - "loss": 0.918, + "learning_rate": 1.7746853257911713e-05, + "loss": 0.9144, "step": 8518 }, { - "epoch": 0.24174233825198638, + "epoch": 0.24140667063390858, "grad_norm": 0.0, - "learning_rate": 1.773960695900391e-05, - "loss": 1.0599, + "learning_rate": 1.7746272864056754e-05, + "loss": 1.0408, "step": 8519 }, { - "epoch": 0.24177071509648126, + "epoch": 0.24143500807617105, "grad_norm": 0.0, - "learning_rate": 1.7739024935166773e-05, - "loss": 0.9611, + "learning_rate": 1.7745692404952066e-05, + "loss": 0.869, "step": 8520 }, { - "epoch": 0.24179909194097615, + "epoch": 0.24146334551843351, "grad_norm": 0.0, - "learning_rate": 1.7738442845957745e-05, - "loss": 1.0627, + "learning_rate": 1.7745111880602534e-05, + "loss": 1.1005, "step": 8521 }, { - "epoch": 0.24182746878547107, + "epoch": 0.24149168296069598, "grad_norm": 0.0, - "learning_rate": 1.773786069138174e-05, - "loss": 0.9617, + "learning_rate": 1.7744531291013047e-05, + "loss": 0.9463, "step": 8522 }, { - "epoch": 0.24185584562996595, + "epoch": 0.24152002040295842, "grad_norm": 0.0, - "learning_rate": 1.7737278471443685e-05, - "loss": 1.0335, + "learning_rate": 1.77439506361885e-05, + "loss": 0.9556, "step": 8523 }, { - "epoch": 0.24188422247446084, + "epoch": 0.24154835784522088, "grad_norm": 0.0, - "learning_rate": 1.773669618614849e-05, - "loss": 1.0392, + "learning_rate": 1.774336991613378e-05, + "loss": 1.0484, "step": 8524 }, { - "epoch": 0.24191259931895573, + "epoch": 0.24157669528748335, "grad_norm": 0.0, - "learning_rate": 1.7736113835501068e-05, - "loss": 0.9794, + "learning_rate": 1.774278913085378e-05, + "loss": 1.0663, "step": 8525 }, { - "epoch": 0.24194097616345062, + "epoch": 0.24160503272974582, "grad_norm": 0.0, - "learning_rate": 1.773553141950635e-05, - "loss": 0.9582, + "learning_rate": 1.7742208280353387e-05, + "loss": 0.9848, "step": 8526 }, { - "epoch": 0.2419693530079455, + "epoch": 0.24163337017200828, "grad_norm": 0.0, - "learning_rate": 1.7734948938169244e-05, - "loss": 0.953, + "learning_rate": 1.7741627364637506e-05, + "loss": 0.9935, "step": 8527 }, { - "epoch": 0.24199772985244042, + "epoch": 0.24166170761427075, "grad_norm": 0.0, - "learning_rate": 1.7734366391494686e-05, - "loss": 0.9497, + "learning_rate": 1.774104638371102e-05, + "loss": 1.0084, "step": 8528 }, { - "epoch": 0.2420261066969353, + "epoch": 0.2416900450565332, "grad_norm": 0.0, - "learning_rate": 1.773378377948758e-05, - "loss": 0.9, + "learning_rate": 1.7740465337578823e-05, + "loss": 0.9685, "step": 8529 }, { - "epoch": 0.2420544835414302, + "epoch": 0.24171838249879565, "grad_norm": 0.0, - "learning_rate": 1.773320110215286e-05, - "loss": 0.9517, + "learning_rate": 1.7739884226245813e-05, + "loss": 0.9457, "step": 8530 }, { - "epoch": 0.24208286038592508, + "epoch": 0.24174671994105812, "grad_norm": 0.0, - "learning_rate": 1.7732618359495436e-05, - "loss": 0.9677, + "learning_rate": 1.7739303049716886e-05, + "loss": 1.007, "step": 8531 }, { - "epoch": 0.24211123723041997, + "epoch": 0.24177505738332059, "grad_norm": 0.0, - "learning_rate": 1.773203555152024e-05, - "loss": 0.9241, + "learning_rate": 1.7738721807996933e-05, + "loss": 0.9879, "step": 8532 }, { - "epoch": 0.24213961407491488, + "epoch": 0.24180339482558305, "grad_norm": 0.0, - "learning_rate": 1.7731452678232196e-05, - "loss": 0.9701, + "learning_rate": 1.7738140501090856e-05, + "loss": 0.9581, "step": 8533 }, { - "epoch": 0.24216799091940977, + "epoch": 0.24183173226784552, "grad_norm": 0.0, - "learning_rate": 1.7730869739636222e-05, - "loss": 1.0366, + "learning_rate": 1.7737559129003547e-05, + "loss": 0.924, "step": 8534 }, { - "epoch": 0.24219636776390466, + "epoch": 0.24186006971010796, "grad_norm": 0.0, - "learning_rate": 1.773028673573724e-05, - "loss": 0.9966, + "learning_rate": 1.7736977691739906e-05, + "loss": 0.8825, "step": 8535 }, { - "epoch": 0.24222474460839954, + "epoch": 0.24188840715237042, "grad_norm": 0.0, - "learning_rate": 1.7729703666540183e-05, - "loss": 1.127, + "learning_rate": 1.7736396189304824e-05, + "loss": 1.0541, "step": 8536 }, { - "epoch": 0.24225312145289443, + "epoch": 0.2419167445946329, "grad_norm": 0.0, - "learning_rate": 1.7729120532049967e-05, - "loss": 0.9765, + "learning_rate": 1.773581462170321e-05, + "loss": 0.9719, "step": 8537 }, { - "epoch": 0.24228149829738932, + "epoch": 0.24194508203689535, "grad_norm": 0.0, - "learning_rate": 1.7728537332271526e-05, - "loss": 0.9599, + "learning_rate": 1.773523298893995e-05, + "loss": 1.0317, "step": 8538 }, { - "epoch": 0.24230987514188423, + "epoch": 0.24197341947915782, "grad_norm": 0.0, - "learning_rate": 1.7727954067209782e-05, - "loss": 1.0105, + "learning_rate": 1.7734651291019955e-05, + "loss": 0.8802, "step": 8539 }, { - "epoch": 0.24233825198637912, + "epoch": 0.24200175692142029, "grad_norm": 0.0, - "learning_rate": 1.7727370736869662e-05, - "loss": 0.9273, + "learning_rate": 1.773406952794812e-05, + "loss": 1.0111, "step": 8540 }, { - "epoch": 0.242366628830874, + "epoch": 0.24203009436368272, "grad_norm": 0.0, - "learning_rate": 1.77267873412561e-05, - "loss": 0.9559, + "learning_rate": 1.7733487699729344e-05, + "loss": 0.9823, "step": 8541 }, { - "epoch": 0.2423950056753689, + "epoch": 0.2420584318059452, "grad_norm": 0.0, - "learning_rate": 1.772620388037401e-05, - "loss": 0.9157, + "learning_rate": 1.7732905806368526e-05, + "loss": 0.978, "step": 8542 }, { - "epoch": 0.24242338251986378, + "epoch": 0.24208676924820766, "grad_norm": 0.0, - "learning_rate": 1.772562035422833e-05, - "loss": 0.923, + "learning_rate": 1.7732323847870577e-05, + "loss": 0.9628, "step": 8543 }, { - "epoch": 0.24245175936435867, + "epoch": 0.24211510669047012, "grad_norm": 0.0, - "learning_rate": 1.7725036762823987e-05, - "loss": 0.9973, + "learning_rate": 1.7731741824240385e-05, + "loss": 0.9894, "step": 8544 }, { - "epoch": 0.24248013620885359, + "epoch": 0.2421434441327326, "grad_norm": 0.0, - "learning_rate": 1.7724453106165915e-05, - "loss": 0.8442, + "learning_rate": 1.773115973548287e-05, + "loss": 0.9144, "step": 8545 }, { - "epoch": 0.24250851305334847, + "epoch": 0.24217178157499505, "grad_norm": 0.0, - "learning_rate": 1.7723869384259038e-05, - "loss": 0.8405, + "learning_rate": 1.773057758160292e-05, + "loss": 0.7567, "step": 8546 }, { - "epoch": 0.24253688989784336, + "epoch": 0.2422001190172575, "grad_norm": 0.0, - "learning_rate": 1.772328559710829e-05, - "loss": 0.9565, + "learning_rate": 1.7729995362605444e-05, + "loss": 0.9574, "step": 8547 }, { - "epoch": 0.24256526674233825, + "epoch": 0.24222845645951996, "grad_norm": 0.0, - "learning_rate": 1.77227017447186e-05, - "loss": 1.0905, + "learning_rate": 1.772941307849535e-05, + "loss": 1.0564, "step": 8548 }, { - "epoch": 0.24259364358683314, + "epoch": 0.24225679390178242, "grad_norm": 0.0, - "learning_rate": 1.7722117827094904e-05, - "loss": 1.0572, + "learning_rate": 1.772883072927754e-05, + "loss": 1.0253, "step": 8549 }, { - "epoch": 0.24262202043132802, + "epoch": 0.2422851313440449, "grad_norm": 0.0, - "learning_rate": 1.7721533844242126e-05, - "loss": 1.1836, + "learning_rate": 1.7728248314956915e-05, + "loss": 1.1062, "step": 8550 }, { - "epoch": 0.24265039727582294, + "epoch": 0.24231346878630736, "grad_norm": 0.0, - "learning_rate": 1.772094979616521e-05, - "loss": 1.0289, + "learning_rate": 1.7727665835538386e-05, + "loss": 0.9017, "step": 8551 }, { - "epoch": 0.24267877412031783, + "epoch": 0.24234180622856982, "grad_norm": 0.0, - "learning_rate": 1.772036568286908e-05, - "loss": 0.9548, + "learning_rate": 1.7727083291026855e-05, + "loss": 1.0041, "step": 8552 }, { - "epoch": 0.2427071509648127, + "epoch": 0.24237014367083226, "grad_norm": 0.0, - "learning_rate": 1.7719781504358678e-05, - "loss": 1.0242, + "learning_rate": 1.7726500681427236e-05, + "loss": 0.9172, "step": 8553 }, { - "epoch": 0.2427355278093076, + "epoch": 0.24239848111309473, "grad_norm": 0.0, - "learning_rate": 1.7719197260638933e-05, - "loss": 1.0425, + "learning_rate": 1.772591800674443e-05, + "loss": 0.9829, "step": 8554 }, { - "epoch": 0.2427639046538025, + "epoch": 0.2424268185553572, "grad_norm": 0.0, - "learning_rate": 1.771861295171478e-05, - "loss": 0.9153, + "learning_rate": 1.7725335266983352e-05, + "loss": 0.9382, "step": 8555 }, { - "epoch": 0.2427922814982974, + "epoch": 0.24245515599761966, "grad_norm": 0.0, - "learning_rate": 1.7718028577591156e-05, - "loss": 0.9728, + "learning_rate": 1.7724752462148903e-05, + "loss": 0.9698, "step": 8556 }, { - "epoch": 0.2428206583427923, + "epoch": 0.24248349343988213, "grad_norm": 0.0, - "learning_rate": 1.7717444138273e-05, - "loss": 0.9543, + "learning_rate": 1.7724169592245996e-05, + "loss": 1.0176, "step": 8557 }, { - "epoch": 0.24284903518728718, + "epoch": 0.2425118308821446, "grad_norm": 0.0, - "learning_rate": 1.7716859633765248e-05, - "loss": 0.9874, + "learning_rate": 1.772358665727954e-05, + "loss": 0.966, "step": 8558 }, { - "epoch": 0.24287741203178206, + "epoch": 0.24254016832440703, "grad_norm": 0.0, - "learning_rate": 1.7716275064072828e-05, - "loss": 0.9847, + "learning_rate": 1.7723003657254447e-05, + "loss": 0.9902, "step": 8559 }, { - "epoch": 0.24290578887627695, + "epoch": 0.2425685057666695, "grad_norm": 0.0, - "learning_rate": 1.7715690429200695e-05, - "loss": 0.9785, + "learning_rate": 1.7722420592175624e-05, + "loss": 0.8776, "step": 8560 }, { - "epoch": 0.24293416572077184, + "epoch": 0.24259684320893196, "grad_norm": 0.0, - "learning_rate": 1.7715105729153774e-05, - "loss": 1.1307, + "learning_rate": 1.7721837462047987e-05, + "loss": 1.071, "step": 8561 }, { - "epoch": 0.24296254256526675, + "epoch": 0.24262518065119443, "grad_norm": 0.0, - "learning_rate": 1.771452096393701e-05, - "loss": 0.9576, + "learning_rate": 1.7721254266876443e-05, + "loss": 1.1115, "step": 8562 }, { - "epoch": 0.24299091940976164, + "epoch": 0.2426535180934569, "grad_norm": 0.0, - "learning_rate": 1.7713936133555336e-05, - "loss": 1.0134, + "learning_rate": 1.772067100666591e-05, + "loss": 0.9984, "step": 8563 }, { - "epoch": 0.24301929625425653, + "epoch": 0.24268185553571936, "grad_norm": 0.0, - "learning_rate": 1.77133512380137e-05, - "loss": 0.9946, + "learning_rate": 1.7720087681421297e-05, + "loss": 0.9964, "step": 8564 }, { - "epoch": 0.24304767309875142, + "epoch": 0.2427101929779818, "grad_norm": 0.0, - "learning_rate": 1.771276627731704e-05, - "loss": 0.9744, + "learning_rate": 1.7719504291147517e-05, + "loss": 1.1205, "step": 8565 }, { - "epoch": 0.2430760499432463, + "epoch": 0.24273853042024426, "grad_norm": 0.0, - "learning_rate": 1.7712181251470297e-05, - "loss": 1.038, + "learning_rate": 1.771892083584949e-05, + "loss": 1.0724, "step": 8566 }, { - "epoch": 0.2431044267877412, + "epoch": 0.24276686786250673, "grad_norm": 0.0, - "learning_rate": 1.771159616047841e-05, - "loss": 0.9919, + "learning_rate": 1.771833731553212e-05, + "loss": 0.9792, "step": 8567 }, { - "epoch": 0.2431328036322361, + "epoch": 0.2427952053047692, "grad_norm": 0.0, - "learning_rate": 1.7711011004346323e-05, - "loss": 1.0149, + "learning_rate": 1.7717753730200334e-05, + "loss": 0.8866, "step": 8568 }, { - "epoch": 0.243161180476731, + "epoch": 0.24282354274703166, "grad_norm": 0.0, - "learning_rate": 1.7710425783078982e-05, - "loss": 0.9149, + "learning_rate": 1.771717007985904e-05, + "loss": 1.0465, "step": 8569 }, { - "epoch": 0.24318955732122588, + "epoch": 0.24285188018929413, "grad_norm": 0.0, - "learning_rate": 1.7709840496681327e-05, - "loss": 1.0512, + "learning_rate": 1.771658636451316e-05, + "loss": 0.8956, "step": 8570 }, { - "epoch": 0.24321793416572077, + "epoch": 0.24288021763155657, "grad_norm": 0.0, - "learning_rate": 1.7709255145158303e-05, - "loss": 0.8593, + "learning_rate": 1.7716002584167605e-05, + "loss": 0.9786, "step": 8571 }, { - "epoch": 0.24324631101021565, + "epoch": 0.24290855507381903, "grad_norm": 0.0, - "learning_rate": 1.770866972851485e-05, - "loss": 0.8949, + "learning_rate": 1.7715418738827296e-05, + "loss": 0.9292, "step": 8572 }, { - "epoch": 0.24327468785471057, + "epoch": 0.2429368925160815, "grad_norm": 0.0, - "learning_rate": 1.7708084246755924e-05, - "loss": 1.13, + "learning_rate": 1.7714834828497144e-05, + "loss": 1.0084, "step": 8573 }, { - "epoch": 0.24330306469920546, + "epoch": 0.24296522995834396, "grad_norm": 0.0, - "learning_rate": 1.7707498699886462e-05, - "loss": 0.9983, + "learning_rate": 1.771425085318208e-05, + "loss": 0.9237, "step": 8574 }, { - "epoch": 0.24333144154370034, + "epoch": 0.24299356740060643, "grad_norm": 0.0, - "learning_rate": 1.770691308791141e-05, - "loss": 0.9921, + "learning_rate": 1.7713666812887016e-05, + "loss": 0.9189, "step": 8575 }, { - "epoch": 0.24335981838819523, + "epoch": 0.24302190484286887, "grad_norm": 0.0, - "learning_rate": 1.7706327410835717e-05, - "loss": 0.9735, + "learning_rate": 1.771308270761687e-05, + "loss": 0.9814, "step": 8576 }, { - "epoch": 0.24338819523269012, + "epoch": 0.24305024228513133, "grad_norm": 0.0, - "learning_rate": 1.770574166866433e-05, - "loss": 0.9386, + "learning_rate": 1.7712498537376565e-05, + "loss": 1.0885, "step": 8577 }, { - "epoch": 0.243416572077185, + "epoch": 0.2430785797273938, "grad_norm": 0.0, - "learning_rate": 1.7705155861402195e-05, - "loss": 0.8967, + "learning_rate": 1.7711914302171022e-05, + "loss": 0.9665, "step": 8578 }, { - "epoch": 0.24344494892167992, + "epoch": 0.24310691716965627, "grad_norm": 0.0, - "learning_rate": 1.770456998905427e-05, - "loss": 0.9099, + "learning_rate": 1.7711330002005157e-05, + "loss": 0.9607, "step": 8579 }, { - "epoch": 0.2434733257661748, + "epoch": 0.24313525461191873, "grad_norm": 0.0, - "learning_rate": 1.7703984051625487e-05, - "loss": 1.0296, + "learning_rate": 1.77107456368839e-05, + "loss": 1.0037, "step": 8580 }, { - "epoch": 0.2435017026106697, + "epoch": 0.2431635920541812, "grad_norm": 0.0, - "learning_rate": 1.7703398049120804e-05, - "loss": 1.0226, + "learning_rate": 1.7710161206812166e-05, + "loss": 0.9296, "step": 8581 }, { - "epoch": 0.24353007945516458, + "epoch": 0.24319192949644364, "grad_norm": 0.0, - "learning_rate": 1.7702811981545177e-05, - "loss": 0.9473, + "learning_rate": 1.7709576711794886e-05, + "loss": 0.9578, "step": 8582 }, { - "epoch": 0.24355845629965947, + "epoch": 0.2432202669387061, "grad_norm": 0.0, - "learning_rate": 1.7702225848903548e-05, - "loss": 1.0121, + "learning_rate": 1.7708992151836972e-05, + "loss": 1.0883, "step": 8583 }, { - "epoch": 0.24358683314415436, + "epoch": 0.24324860438096857, "grad_norm": 0.0, - "learning_rate": 1.770163965120087e-05, - "loss": 1.0073, + "learning_rate": 1.770840752694336e-05, + "loss": 0.985, "step": 8584 }, { - "epoch": 0.24361520998864927, + "epoch": 0.24327694182323104, "grad_norm": 0.0, - "learning_rate": 1.7701053388442103e-05, - "loss": 1.1016, + "learning_rate": 1.7707822837118966e-05, + "loss": 1.0059, "step": 8585 }, { - "epoch": 0.24364358683314416, + "epoch": 0.2433052792654935, "grad_norm": 0.0, - "learning_rate": 1.7700467060632184e-05, - "loss": 0.9973, + "learning_rate": 1.770723808236872e-05, + "loss": 0.8465, "step": 8586 }, { - "epoch": 0.24367196367763905, + "epoch": 0.24333361670775597, "grad_norm": 0.0, - "learning_rate": 1.7699880667776078e-05, - "loss": 0.9662, + "learning_rate": 1.770665326269754e-05, + "loss": 1.02, "step": 8587 }, { - "epoch": 0.24370034052213393, + "epoch": 0.2433619541500184, "grad_norm": 0.0, - "learning_rate": 1.769929420987873e-05, - "loss": 0.9897, + "learning_rate": 1.7706068378110367e-05, + "loss": 1.0472, "step": 8588 }, { - "epoch": 0.24372871736662882, + "epoch": 0.24339029159228087, "grad_norm": 0.0, - "learning_rate": 1.76987076869451e-05, - "loss": 1.0181, + "learning_rate": 1.7705483428612114e-05, + "loss": 1.0405, "step": 8589 }, { - "epoch": 0.2437570942111237, + "epoch": 0.24341862903454334, "grad_norm": 0.0, - "learning_rate": 1.769812109898014e-05, - "loss": 0.924, + "learning_rate": 1.770489841420771e-05, + "loss": 1.0407, "step": 8590 }, { - "epoch": 0.24378547105561862, + "epoch": 0.2434469664768058, "grad_norm": 0.0, - "learning_rate": 1.7697534445988804e-05, - "loss": 0.9176, + "learning_rate": 1.7704313334902087e-05, + "loss": 0.9988, "step": 8591 }, { - "epoch": 0.2438138479001135, + "epoch": 0.24347530391906827, "grad_norm": 0.0, - "learning_rate": 1.769694772797605e-05, - "loss": 0.9429, + "learning_rate": 1.7703728190700172e-05, + "loss": 1.0168, "step": 8592 }, { - "epoch": 0.2438422247446084, + "epoch": 0.24350364136133074, "grad_norm": 0.0, - "learning_rate": 1.769636094494683e-05, - "loss": 1.0103, + "learning_rate": 1.7703142981606894e-05, + "loss": 0.9742, "step": 8593 }, { - "epoch": 0.2438706015891033, + "epoch": 0.24353197880359317, "grad_norm": 0.0, - "learning_rate": 1.7695774096906105e-05, - "loss": 0.9462, + "learning_rate": 1.7702557707627185e-05, + "loss": 1.0493, "step": 8594 }, { - "epoch": 0.24389897843359817, + "epoch": 0.24356031624585564, "grad_norm": 0.0, - "learning_rate": 1.7695187183858832e-05, - "loss": 1.0049, + "learning_rate": 1.7701972368765973e-05, + "loss": 1.0051, "step": 8595 }, { - "epoch": 0.2439273552780931, + "epoch": 0.2435886536881181, "grad_norm": 0.0, - "learning_rate": 1.7694600205809963e-05, - "loss": 0.9749, + "learning_rate": 1.7701386965028182e-05, + "loss": 0.9921, "step": 8596 }, { - "epoch": 0.24395573212258798, + "epoch": 0.24361699113038057, "grad_norm": 0.0, - "learning_rate": 1.769401316276446e-05, - "loss": 0.9745, + "learning_rate": 1.770080149641875e-05, + "loss": 1.0927, "step": 8597 }, { - "epoch": 0.24398410896708286, + "epoch": 0.24364532857264304, "grad_norm": 0.0, - "learning_rate": 1.7693426054727287e-05, - "loss": 1.0124, + "learning_rate": 1.770021596294261e-05, + "loss": 1.0193, "step": 8598 }, { - "epoch": 0.24401248581157775, + "epoch": 0.2436736660149055, "grad_norm": 0.0, - "learning_rate": 1.7692838881703397e-05, - "loss": 0.912, + "learning_rate": 1.769963036460469e-05, + "loss": 0.8829, "step": 8599 }, { - "epoch": 0.24404086265607264, + "epoch": 0.24370200345716794, "grad_norm": 0.0, - "learning_rate": 1.7692251643697748e-05, - "loss": 0.9543, + "learning_rate": 1.7699044701409923e-05, + "loss": 1.0595, "step": 8600 }, { - "epoch": 0.24406923950056753, + "epoch": 0.2437303408994304, "grad_norm": 0.0, - "learning_rate": 1.7691664340715307e-05, - "loss": 1.0016, + "learning_rate": 1.7698458973363248e-05, + "loss": 0.8894, "step": 8601 }, { - "epoch": 0.24409761634506244, + "epoch": 0.24375867834169287, "grad_norm": 0.0, - "learning_rate": 1.7691076972761028e-05, - "loss": 0.9088, + "learning_rate": 1.769787318046959e-05, + "loss": 0.9392, "step": 8602 }, { - "epoch": 0.24412599318955733, + "epoch": 0.24378701578395534, "grad_norm": 0.0, - "learning_rate": 1.7690489539839877e-05, - "loss": 0.9838, + "learning_rate": 1.769728732273389e-05, + "loss": 1.1183, "step": 8603 }, { - "epoch": 0.24415437003405221, + "epoch": 0.2438153532262178, "grad_norm": 0.0, - "learning_rate": 1.768990204195682e-05, - "loss": 1.0116, + "learning_rate": 1.7696701400161077e-05, + "loss": 1.0106, "step": 8604 }, { - "epoch": 0.2441827468785471, + "epoch": 0.24384369066848027, "grad_norm": 0.0, - "learning_rate": 1.7689314479116806e-05, - "loss": 0.8807, + "learning_rate": 1.7696115412756095e-05, + "loss": 1.0379, "step": 8605 }, { - "epoch": 0.244211123723042, + "epoch": 0.2438720281107427, "grad_norm": 0.0, - "learning_rate": 1.7688726851324812e-05, - "loss": 0.8358, + "learning_rate": 1.769552936052387e-05, + "loss": 0.963, "step": 8606 }, { - "epoch": 0.24423950056753688, + "epoch": 0.24390036555300518, "grad_norm": 0.0, - "learning_rate": 1.76881391585858e-05, - "loss": 0.9498, + "learning_rate": 1.7694943243469348e-05, + "loss": 0.9231, "step": 8607 }, { - "epoch": 0.2442678774120318, + "epoch": 0.24392870299526764, "grad_norm": 0.0, - "learning_rate": 1.7687551400904725e-05, - "loss": 1.0954, + "learning_rate": 1.769435706159746e-05, + "loss": 1.0054, "step": 8608 }, { - "epoch": 0.24429625425652668, + "epoch": 0.2439570404375301, "grad_norm": 0.0, - "learning_rate": 1.768696357828656e-05, - "loss": 0.8835, + "learning_rate": 1.7693770814913144e-05, + "loss": 0.9394, "step": 8609 }, { - "epoch": 0.24432463110102157, + "epoch": 0.24398537787979258, "grad_norm": 0.0, - "learning_rate": 1.7686375690736265e-05, - "loss": 1.0208, + "learning_rate": 1.7693184503421342e-05, + "loss": 0.9067, "step": 8610 }, { - "epoch": 0.24435300794551645, + "epoch": 0.24401371532205504, "grad_norm": 0.0, - "learning_rate": 1.7685787738258816e-05, - "loss": 0.9261, + "learning_rate": 1.7692598127126986e-05, + "loss": 1.0477, "step": 8611 }, { - "epoch": 0.24438138479001134, + "epoch": 0.24404205276431748, "grad_norm": 0.0, - "learning_rate": 1.768519972085917e-05, - "loss": 0.9945, + "learning_rate": 1.7692011686035023e-05, + "loss": 1.0353, "step": 8612 }, { - "epoch": 0.24440976163450626, + "epoch": 0.24407039020657995, "grad_norm": 0.0, - "learning_rate": 1.7684611638542293e-05, - "loss": 1.1082, + "learning_rate": 1.7691425180150386e-05, + "loss": 0.9556, "step": 8613 }, { - "epoch": 0.24443813847900114, + "epoch": 0.2440987276488424, "grad_norm": 0.0, - "learning_rate": 1.7684023491313155e-05, - "loss": 1.0361, + "learning_rate": 1.769083860947802e-05, + "loss": 0.9325, "step": 8614 }, { - "epoch": 0.24446651532349603, + "epoch": 0.24412706509110488, "grad_norm": 0.0, - "learning_rate": 1.768343527917673e-05, - "loss": 0.9603, + "learning_rate": 1.7690251974022866e-05, + "loss": 1.0458, "step": 8615 }, { - "epoch": 0.24449489216799092, + "epoch": 0.24415540253336734, "grad_norm": 0.0, - "learning_rate": 1.768284700213798e-05, - "loss": 0.9516, + "learning_rate": 1.7689665273789863e-05, + "loss": 1.0219, "step": 8616 }, { - "epoch": 0.2445232690124858, + "epoch": 0.2441837399756298, "grad_norm": 0.0, - "learning_rate": 1.7682258660201874e-05, - "loss": 1.1172, + "learning_rate": 1.7689078508783953e-05, + "loss": 0.8989, "step": 8617 }, { - "epoch": 0.2445516458569807, + "epoch": 0.24421207741789225, "grad_norm": 0.0, - "learning_rate": 1.7681670253373387e-05, - "loss": 0.9117, + "learning_rate": 1.768849167901008e-05, + "loss": 0.9576, "step": 8618 }, { - "epoch": 0.2445800227014756, + "epoch": 0.24424041486015471, "grad_norm": 0.0, - "learning_rate": 1.7681081781657485e-05, - "loss": 1.0685, + "learning_rate": 1.768790478447319e-05, + "loss": 0.9409, "step": 8619 }, { - "epoch": 0.2446083995459705, + "epoch": 0.24426875230241718, "grad_norm": 0.0, - "learning_rate": 1.768049324505914e-05, - "loss": 0.9523, + "learning_rate": 1.7687317825178222e-05, + "loss": 0.9604, "step": 8620 }, { - "epoch": 0.24463677639046538, + "epoch": 0.24429708974467965, "grad_norm": 0.0, - "learning_rate": 1.7679904643583322e-05, - "loss": 0.9841, + "learning_rate": 1.7686730801130118e-05, + "loss": 0.939, "step": 8621 }, { - "epoch": 0.24466515323496027, + "epoch": 0.2443254271869421, "grad_norm": 0.0, - "learning_rate": 1.7679315977235006e-05, - "loss": 1.0131, + "learning_rate": 1.768614371233383e-05, + "loss": 1.0027, "step": 8622 }, { - "epoch": 0.24469353007945516, + "epoch": 0.24435376462920458, "grad_norm": 0.0, - "learning_rate": 1.7678727246019162e-05, - "loss": 1.1353, + "learning_rate": 1.768555655879429e-05, + "loss": 1.127, "step": 8623 }, { - "epoch": 0.24472190692395004, + "epoch": 0.24438210207146702, "grad_norm": 0.0, - "learning_rate": 1.7678138449940764e-05, - "loss": 1.0485, + "learning_rate": 1.7684969340516463e-05, + "loss": 1.0585, "step": 8624 }, { - "epoch": 0.24475028376844496, + "epoch": 0.24441043951372948, "grad_norm": 0.0, - "learning_rate": 1.7677549589004786e-05, - "loss": 0.9159, + "learning_rate": 1.7684382057505284e-05, + "loss": 0.9617, "step": 8625 }, { - "epoch": 0.24477866061293985, + "epoch": 0.24443877695599195, "grad_norm": 0.0, - "learning_rate": 1.7676960663216197e-05, - "loss": 0.8851, + "learning_rate": 1.7683794709765697e-05, + "loss": 1.0735, "step": 8626 }, { - "epoch": 0.24480703745743473, + "epoch": 0.24446711439825441, "grad_norm": 0.0, - "learning_rate": 1.7676371672579983e-05, - "loss": 0.9812, + "learning_rate": 1.768320729730266e-05, + "loss": 0.995, "step": 8627 }, { - "epoch": 0.24483541430192962, + "epoch": 0.24449545184051688, "grad_norm": 0.0, - "learning_rate": 1.767578261710111e-05, - "loss": 0.9312, + "learning_rate": 1.768261982012111e-05, + "loss": 1.051, "step": 8628 }, { - "epoch": 0.2448637911464245, + "epoch": 0.24452378928277935, "grad_norm": 0.0, - "learning_rate": 1.7675193496784553e-05, - "loss": 1.0163, + "learning_rate": 1.7682032278226002e-05, + "loss": 0.8854, "step": 8629 }, { - "epoch": 0.2448921679909194, + "epoch": 0.24455212672504179, "grad_norm": 0.0, - "learning_rate": 1.7674604311635296e-05, - "loss": 0.969, + "learning_rate": 1.7681444671622284e-05, + "loss": 0.9606, "step": 8630 }, { - "epoch": 0.2449205448354143, + "epoch": 0.24458046416730425, "grad_norm": 0.0, - "learning_rate": 1.7674015061658308e-05, - "loss": 1.0709, + "learning_rate": 1.7680857000314904e-05, + "loss": 1.0704, "step": 8631 }, { - "epoch": 0.2449489216799092, + "epoch": 0.24460880160956672, "grad_norm": 0.0, - "learning_rate": 1.767342574685857e-05, - "loss": 0.9381, + "learning_rate": 1.7680269264308814e-05, + "loss": 0.9332, "step": 8632 }, { - "epoch": 0.24497729852440409, + "epoch": 0.24463713905182918, "grad_norm": 0.0, - "learning_rate": 1.767283636724106e-05, - "loss": 1.0358, + "learning_rate": 1.7679681463608963e-05, + "loss": 0.9759, "step": 8633 }, { - "epoch": 0.24500567536889897, + "epoch": 0.24466547649409165, "grad_norm": 0.0, - "learning_rate": 1.767224692281076e-05, - "loss": 1.0526, + "learning_rate": 1.7679093598220305e-05, + "loss": 0.8499, "step": 8634 }, { - "epoch": 0.24503405221339386, + "epoch": 0.24469381393635412, "grad_norm": 0.0, - "learning_rate": 1.767165741357264e-05, - "loss": 1.0248, + "learning_rate": 1.767850566814779e-05, + "loss": 0.9568, "step": 8635 }, { - "epoch": 0.24506242905788878, + "epoch": 0.24472215137861655, "grad_norm": 0.0, - "learning_rate": 1.767106783953169e-05, - "loss": 0.8599, + "learning_rate": 1.767791767339637e-05, + "loss": 1.0112, "step": 8636 }, { - "epoch": 0.24509080590238366, + "epoch": 0.24475048882087902, "grad_norm": 0.0, - "learning_rate": 1.767047820069288e-05, - "loss": 0.955, + "learning_rate": 1.7677329613970995e-05, + "loss": 1.0533, "step": 8637 }, { - "epoch": 0.24511918274687855, + "epoch": 0.24477882626314149, "grad_norm": 0.0, - "learning_rate": 1.76698884970612e-05, - "loss": 0.9895, + "learning_rate": 1.7676741489876625e-05, + "loss": 1.0185, "step": 8638 }, { - "epoch": 0.24514755959137344, + "epoch": 0.24480716370540395, "grad_norm": 0.0, - "learning_rate": 1.7669298728641622e-05, - "loss": 1.0337, + "learning_rate": 1.7676153301118207e-05, + "loss": 0.8327, "step": 8639 }, { - "epoch": 0.24517593643586832, + "epoch": 0.24483550114766642, "grad_norm": 0.0, - "learning_rate": 1.766870889543914e-05, - "loss": 0.9717, + "learning_rate": 1.7675565047700706e-05, + "loss": 0.9483, "step": 8640 }, { - "epoch": 0.2452043132803632, + "epoch": 0.24486383858992888, "grad_norm": 0.0, - "learning_rate": 1.7668118997458725e-05, - "loss": 0.9265, + "learning_rate": 1.7674976729629065e-05, + "loss": 0.9728, "step": 8641 }, { - "epoch": 0.24523269012485813, + "epoch": 0.24489217603219132, "grad_norm": 0.0, - "learning_rate": 1.7667529034705365e-05, - "loss": 0.9806, + "learning_rate": 1.7674388346908248e-05, + "loss": 0.984, "step": 8642 }, { - "epoch": 0.24526106696935301, + "epoch": 0.2449205134744538, "grad_norm": 0.0, - "learning_rate": 1.766693900718404e-05, - "loss": 0.9938, + "learning_rate": 1.7673799899543207e-05, + "loss": 0.8709, "step": 8643 }, { - "epoch": 0.2452894438138479, + "epoch": 0.24494885091671625, "grad_norm": 0.0, - "learning_rate": 1.766634891489974e-05, - "loss": 0.9636, + "learning_rate": 1.76732113875389e-05, + "loss": 1.055, "step": 8644 }, { - "epoch": 0.2453178206583428, + "epoch": 0.24497718835897872, "grad_norm": 0.0, - "learning_rate": 1.7665758757857445e-05, - "loss": 0.9777, + "learning_rate": 1.7672622810900285e-05, + "loss": 0.9888, "step": 8645 }, { - "epoch": 0.24534619750283768, + "epoch": 0.2450055258012412, "grad_norm": 0.0, - "learning_rate": 1.7665168536062142e-05, - "loss": 1.0368, + "learning_rate": 1.7672034169632316e-05, + "loss": 0.927, "step": 8646 }, { - "epoch": 0.24537457434733256, + "epoch": 0.24503386324350365, "grad_norm": 0.0, - "learning_rate": 1.7664578249518817e-05, - "loss": 0.869, + "learning_rate": 1.767144546373996e-05, + "loss": 1.0701, "step": 8647 }, { - "epoch": 0.24540295119182748, + "epoch": 0.2450622006857661, "grad_norm": 0.0, - "learning_rate": 1.766398789823245e-05, - "loss": 1.0695, + "learning_rate": 1.7670856693228163e-05, + "loss": 0.9325, "step": 8648 }, { - "epoch": 0.24543132803632237, + "epoch": 0.24509053812802856, "grad_norm": 0.0, - "learning_rate": 1.7663397482208037e-05, - "loss": 0.9226, + "learning_rate": 1.7670267858101895e-05, + "loss": 0.9529, "step": 8649 }, { - "epoch": 0.24545970488081725, + "epoch": 0.24511887557029102, "grad_norm": 0.0, - "learning_rate": 1.766280700145056e-05, - "loss": 0.9357, + "learning_rate": 1.7669678958366112e-05, + "loss": 1.0443, "step": 8650 }, { - "epoch": 0.24548808172531214, + "epoch": 0.2451472130125535, "grad_norm": 0.0, - "learning_rate": 1.7662216455965004e-05, - "loss": 0.9831, + "learning_rate": 1.7669089994025778e-05, + "loss": 0.8906, "step": 8651 }, { - "epoch": 0.24551645856980703, + "epoch": 0.24517555045481595, "grad_norm": 0.0, - "learning_rate": 1.7661625845756366e-05, - "loss": 1.1235, + "learning_rate": 1.7668500965085845e-05, + "loss": 0.8872, "step": 8652 }, { - "epoch": 0.24554483541430194, + "epoch": 0.24520388789707842, "grad_norm": 0.0, - "learning_rate": 1.7661035170829627e-05, - "loss": 1.0669, + "learning_rate": 1.7667911871551286e-05, + "loss": 1.0922, "step": 8653 }, { - "epoch": 0.24557321225879683, + "epoch": 0.24523222533934086, "grad_norm": 0.0, - "learning_rate": 1.766044443118978e-05, - "loss": 0.9077, + "learning_rate": 1.7667322713427055e-05, + "loss": 0.8818, "step": 8654 }, { - "epoch": 0.24560158910329172, + "epoch": 0.24526056278160333, "grad_norm": 0.0, - "learning_rate": 1.7659853626841818e-05, - "loss": 0.8493, + "learning_rate": 1.766673349071812e-05, + "loss": 0.9146, "step": 8655 }, { - "epoch": 0.2456299659477866, + "epoch": 0.2452889002238658, "grad_norm": 0.0, - "learning_rate": 1.7659262757790726e-05, - "loss": 1.0079, + "learning_rate": 1.766614420342944e-05, + "loss": 0.9404, "step": 8656 }, { - "epoch": 0.2456583427922815, + "epoch": 0.24531723766612826, "grad_norm": 0.0, - "learning_rate": 1.7658671824041496e-05, - "loss": 0.9729, + "learning_rate": 1.7665554851565983e-05, + "loss": 1.0916, "step": 8657 }, { - "epoch": 0.24568671963677638, + "epoch": 0.24534557510839072, "grad_norm": 0.0, - "learning_rate": 1.7658080825599122e-05, - "loss": 1.0054, + "learning_rate": 1.766496543513271e-05, + "loss": 0.9699, "step": 8658 }, { - "epoch": 0.2457150964812713, + "epoch": 0.2453739125506532, "grad_norm": 0.0, - "learning_rate": 1.7657489762468595e-05, - "loss": 0.8821, + "learning_rate": 1.7664375954134586e-05, + "loss": 0.943, "step": 8659 }, { - "epoch": 0.24574347332576618, + "epoch": 0.24540224999291563, "grad_norm": 0.0, - "learning_rate": 1.7656898634654907e-05, - "loss": 0.997, + "learning_rate": 1.7663786408576574e-05, + "loss": 0.8811, "step": 8660 }, { - "epoch": 0.24577185017026107, + "epoch": 0.2454305874351781, "grad_norm": 0.0, - "learning_rate": 1.765630744216305e-05, - "loss": 1.0369, + "learning_rate": 1.7663196798463647e-05, + "loss": 1.0013, "step": 8661 }, { - "epoch": 0.24580022701475596, + "epoch": 0.24545892487744056, "grad_norm": 0.0, - "learning_rate": 1.7655716184998025e-05, - "loss": 1.0188, + "learning_rate": 1.766260712380077e-05, + "loss": 0.9621, "step": 8662 }, { - "epoch": 0.24582860385925084, + "epoch": 0.24548726231970303, "grad_norm": 0.0, - "learning_rate": 1.7655124863164817e-05, - "loss": 0.9552, + "learning_rate": 1.7662017384592905e-05, + "loss": 0.9886, "step": 8663 }, { - "epoch": 0.24585698070374573, + "epoch": 0.2455155997619655, "grad_norm": 0.0, - "learning_rate": 1.7654533476668427e-05, - "loss": 0.9536, + "learning_rate": 1.766142758084502e-05, + "loss": 1.0958, "step": 8664 }, { - "epoch": 0.24588535754824065, + "epoch": 0.24554393720422796, "grad_norm": 0.0, - "learning_rate": 1.7653942025513847e-05, - "loss": 0.9857, + "learning_rate": 1.766083771256209e-05, + "loss": 1.0445, "step": 8665 }, { - "epoch": 0.24591373439273553, + "epoch": 0.2455722746464904, "grad_norm": 0.0, - "learning_rate": 1.7653350509706077e-05, - "loss": 0.9076, + "learning_rate": 1.7660247779749074e-05, + "loss": 0.9992, "step": 8666 }, { - "epoch": 0.24594211123723042, + "epoch": 0.24560061208875286, "grad_norm": 0.0, - "learning_rate": 1.7652758929250112e-05, - "loss": 0.9467, + "learning_rate": 1.7659657782410952e-05, + "loss": 0.9196, "step": 8667 }, { - "epoch": 0.2459704880817253, + "epoch": 0.24562894953101533, "grad_norm": 0.0, - "learning_rate": 1.7652167284150944e-05, - "loss": 1.0393, + "learning_rate": 1.7659067720552685e-05, + "loss": 0.9402, "step": 8668 }, { - "epoch": 0.2459988649262202, + "epoch": 0.2456572869732778, "grad_norm": 0.0, - "learning_rate": 1.765157557441358e-05, - "loss": 1.0024, + "learning_rate": 1.7658477594179248e-05, + "loss": 1.0028, "step": 8669 }, { - "epoch": 0.24602724177071508, + "epoch": 0.24568562441554026, "grad_norm": 0.0, - "learning_rate": 1.765098380004301e-05, - "loss": 1.0684, + "learning_rate": 1.7657887403295605e-05, + "loss": 1.1236, "step": 8670 }, { - "epoch": 0.24605561861521, + "epoch": 0.24571396185780273, "grad_norm": 0.0, - "learning_rate": 1.7650391961044234e-05, - "loss": 1.0448, + "learning_rate": 1.7657297147906738e-05, + "loss": 0.9617, "step": 8671 }, { - "epoch": 0.24608399545970489, + "epoch": 0.24574229930006516, "grad_norm": 0.0, - "learning_rate": 1.7649800057422256e-05, - "loss": 1.0056, + "learning_rate": 1.7656706828017616e-05, + "loss": 1.0473, "step": 8672 }, { - "epoch": 0.24611237230419977, + "epoch": 0.24577063674232763, "grad_norm": 0.0, - "learning_rate": 1.7649208089182072e-05, - "loss": 0.9301, + "learning_rate": 1.7656116443633204e-05, + "loss": 1.0643, "step": 8673 }, { - "epoch": 0.24614074914869466, + "epoch": 0.2457989741845901, "grad_norm": 0.0, - "learning_rate": 1.7648616056328687e-05, - "loss": 0.8479, + "learning_rate": 1.7655525994758484e-05, + "loss": 0.846, "step": 8674 }, { - "epoch": 0.24616912599318955, + "epoch": 0.24582731162685256, "grad_norm": 0.0, - "learning_rate": 1.7648023958867094e-05, - "loss": 0.8598, + "learning_rate": 1.7654935481398424e-05, + "loss": 1.0439, "step": 8675 }, { - "epoch": 0.24619750283768446, + "epoch": 0.24585564906911503, "grad_norm": 0.0, - "learning_rate": 1.7647431796802302e-05, - "loss": 0.9489, + "learning_rate": 1.7654344903557995e-05, + "loss": 0.9588, "step": 8676 }, { - "epoch": 0.24622587968217935, + "epoch": 0.2458839865113775, "grad_norm": 0.0, - "learning_rate": 1.764683957013931e-05, - "loss": 0.9719, + "learning_rate": 1.7653754261242184e-05, + "loss": 0.9457, "step": 8677 }, { - "epoch": 0.24625425652667424, + "epoch": 0.24591232395363993, "grad_norm": 0.0, - "learning_rate": 1.7646247278883113e-05, - "loss": 0.9026, + "learning_rate": 1.7653163554455957e-05, + "loss": 1.0145, "step": 8678 }, { - "epoch": 0.24628263337116912, + "epoch": 0.2459406613959024, "grad_norm": 0.0, - "learning_rate": 1.7645654923038732e-05, - "loss": 0.913, + "learning_rate": 1.7652572783204286e-05, + "loss": 0.8852, "step": 8679 }, { - "epoch": 0.246311010215664, + "epoch": 0.24596899883816487, "grad_norm": 0.0, - "learning_rate": 1.7645062502611154e-05, - "loss": 1.0721, + "learning_rate": 1.7651981947492157e-05, + "loss": 0.8188, "step": 8680 }, { - "epoch": 0.2463393870601589, + "epoch": 0.24599733628042733, "grad_norm": 0.0, - "learning_rate": 1.764447001760539e-05, - "loss": 0.9429, + "learning_rate": 1.7651391047324544e-05, + "loss": 1.0105, "step": 8681 }, { - "epoch": 0.2463677639046538, + "epoch": 0.2460256737226898, "grad_norm": 0.0, - "learning_rate": 1.7643877468026445e-05, - "loss": 1.0844, + "learning_rate": 1.7650800082706422e-05, + "loss": 0.8801, "step": 8682 }, { - "epoch": 0.2463961407491487, + "epoch": 0.24605401116495226, "grad_norm": 0.0, - "learning_rate": 1.7643284853879322e-05, - "loss": 1.0862, + "learning_rate": 1.765020905364277e-05, + "loss": 0.9545, "step": 8683 }, { - "epoch": 0.2464245175936436, + "epoch": 0.2460823486072147, "grad_norm": 0.0, - "learning_rate": 1.764269217516903e-05, - "loss": 1.0695, + "learning_rate": 1.7649617960138566e-05, + "loss": 1.0611, "step": 8684 }, { - "epoch": 0.24645289443813848, + "epoch": 0.24611068604947717, "grad_norm": 0.0, - "learning_rate": 1.764209943190057e-05, - "loss": 1.0243, + "learning_rate": 1.7649026802198786e-05, + "loss": 0.9896, "step": 8685 }, { - "epoch": 0.24648127128263336, + "epoch": 0.24613902349173963, "grad_norm": 0.0, - "learning_rate": 1.7641506624078958e-05, - "loss": 1.0039, + "learning_rate": 1.764843557982842e-05, + "loss": 0.9948, "step": 8686 }, { - "epoch": 0.24650964812712825, + "epoch": 0.2461673609340021, "grad_norm": 0.0, - "learning_rate": 1.7640913751709196e-05, - "loss": 0.9589, + "learning_rate": 1.7647844293032435e-05, + "loss": 0.9906, "step": 8687 }, { - "epoch": 0.24653802497162317, + "epoch": 0.24619569837626457, "grad_norm": 0.0, - "learning_rate": 1.764032081479629e-05, - "loss": 0.8382, + "learning_rate": 1.7647252941815817e-05, + "loss": 1.0172, "step": 8688 }, { - "epoch": 0.24656640181611805, + "epoch": 0.24622403581852703, "grad_norm": 0.0, - "learning_rate": 1.7639727813345248e-05, - "loss": 0.9308, + "learning_rate": 1.764666152618355e-05, + "loss": 0.9846, "step": 8689 }, { - "epoch": 0.24659477866061294, + "epoch": 0.24625237326078947, "grad_norm": 0.0, - "learning_rate": 1.7639134747361084e-05, - "loss": 0.9923, + "learning_rate": 1.7646070046140614e-05, + "loss": 0.9105, "step": 8690 }, { - "epoch": 0.24662315550510783, + "epoch": 0.24628071070305194, "grad_norm": 0.0, - "learning_rate": 1.7638541616848806e-05, - "loss": 1.0212, + "learning_rate": 1.764547850169199e-05, + "loss": 0.8639, "step": 8691 }, { - "epoch": 0.24665153234960271, + "epoch": 0.2463090481453144, "grad_norm": 0.0, - "learning_rate": 1.7637948421813423e-05, - "loss": 1.1012, + "learning_rate": 1.7644886892842664e-05, + "loss": 1.0053, "step": 8692 }, { - "epoch": 0.24667990919409763, + "epoch": 0.24633738558757687, "grad_norm": 0.0, - "learning_rate": 1.7637355162259947e-05, - "loss": 0.9129, + "learning_rate": 1.7644295219597614e-05, + "loss": 0.9615, "step": 8693 }, { - "epoch": 0.24670828603859252, + "epoch": 0.24636572302983933, "grad_norm": 0.0, - "learning_rate": 1.7636761838193386e-05, - "loss": 0.9568, + "learning_rate": 1.764370348196183e-05, + "loss": 0.9815, "step": 8694 }, { - "epoch": 0.2467366628830874, + "epoch": 0.2463940604721018, "grad_norm": 0.0, - "learning_rate": 1.7636168449618754e-05, - "loss": 1.0342, + "learning_rate": 1.7643111679940286e-05, + "loss": 1.0655, "step": 8695 }, { - "epoch": 0.2467650397275823, + "epoch": 0.24642239791436424, "grad_norm": 0.0, - "learning_rate": 1.7635574996541066e-05, - "loss": 0.9588, + "learning_rate": 1.7642519813537983e-05, + "loss": 1.0287, "step": 8696 }, { - "epoch": 0.24679341657207718, + "epoch": 0.2464507353566267, "grad_norm": 0.0, - "learning_rate": 1.7634981478965335e-05, - "loss": 1.0756, + "learning_rate": 1.764192788275989e-05, + "loss": 1.0761, "step": 8697 }, { - "epoch": 0.24682179341657207, + "epoch": 0.24647907279888917, "grad_norm": 0.0, - "learning_rate": 1.763438789689657e-05, - "loss": 0.8963, + "learning_rate": 1.7641335887611005e-05, + "loss": 1.0022, "step": 8698 }, { - "epoch": 0.24685017026106698, + "epoch": 0.24650741024115164, "grad_norm": 0.0, - "learning_rate": 1.763379425033979e-05, - "loss": 1.0553, + "learning_rate": 1.7640743828096306e-05, + "loss": 0.9025, "step": 8699 }, { - "epoch": 0.24687854710556187, + "epoch": 0.2465357476834141, "grad_norm": 0.0, - "learning_rate": 1.7633200539300005e-05, - "loss": 0.8902, + "learning_rate": 1.764015170422079e-05, + "loss": 0.8312, "step": 8700 }, { - "epoch": 0.24690692395005676, + "epoch": 0.24656408512567657, "grad_norm": 0.0, - "learning_rate": 1.7632606763782227e-05, - "loss": 0.9317, + "learning_rate": 1.7639559515989436e-05, + "loss": 1.0258, "step": 8701 }, { - "epoch": 0.24693530079455164, + "epoch": 0.246592422567939, "grad_norm": 0.0, - "learning_rate": 1.7632012923791482e-05, - "loss": 1.1197, + "learning_rate": 1.7638967263407238e-05, + "loss": 1.024, "step": 8702 }, { - "epoch": 0.24696367763904653, + "epoch": 0.24662076001020147, "grad_norm": 0.0, - "learning_rate": 1.7631419019332782e-05, - "loss": 1.0338, + "learning_rate": 1.763837494647918e-05, + "loss": 0.8562, "step": 8703 }, { - "epoch": 0.24699205448354142, + "epoch": 0.24664909745246394, "grad_norm": 0.0, - "learning_rate": 1.7630825050411138e-05, - "loss": 1.0724, + "learning_rate": 1.7637782565210252e-05, + "loss": 0.9193, "step": 8704 }, { - "epoch": 0.24702043132803633, + "epoch": 0.2466774348947264, "grad_norm": 0.0, - "learning_rate": 1.763023101703157e-05, - "loss": 1.0453, + "learning_rate": 1.7637190119605447e-05, + "loss": 1.0818, "step": 8705 }, { - "epoch": 0.24704880817253122, + "epoch": 0.24670577233698887, "grad_norm": 0.0, - "learning_rate": 1.7629636919199104e-05, - "loss": 1.0272, + "learning_rate": 1.7636597609669753e-05, + "loss": 0.9466, "step": 8706 }, { - "epoch": 0.2470771850170261, + "epoch": 0.24673410977925134, "grad_norm": 0.0, - "learning_rate": 1.762904275691875e-05, - "loss": 1.0452, + "learning_rate": 1.763600503540816e-05, + "loss": 1.0781, "step": 8707 }, { - "epoch": 0.247105561861521, + "epoch": 0.24676244722151378, "grad_norm": 0.0, - "learning_rate": 1.7628448530195527e-05, - "loss": 0.9823, + "learning_rate": 1.7635412396825663e-05, + "loss": 1.1277, "step": 8708 }, { - "epoch": 0.24713393870601588, + "epoch": 0.24679078466377624, "grad_norm": 0.0, - "learning_rate": 1.7627854239034458e-05, - "loss": 1.0887, + "learning_rate": 1.7634819693927254e-05, + "loss": 1.0943, "step": 8709 }, { - "epoch": 0.24716231555051077, + "epoch": 0.2468191221060387, "grad_norm": 0.0, - "learning_rate": 1.7627259883440562e-05, - "loss": 1.1693, + "learning_rate": 1.763422692671792e-05, + "loss": 1.0471, "step": 8710 }, { - "epoch": 0.24719069239500568, + "epoch": 0.24684745954830117, "grad_norm": 0.0, - "learning_rate": 1.7626665463418857e-05, - "loss": 0.8755, + "learning_rate": 1.7633634095202657e-05, + "loss": 0.9356, "step": 8711 }, { - "epoch": 0.24721906923950057, + "epoch": 0.24687579699056364, "grad_norm": 0.0, - "learning_rate": 1.7626070978974367e-05, - "loss": 0.9677, + "learning_rate": 1.7633041199386464e-05, + "loss": 0.8932, "step": 8712 }, { - "epoch": 0.24724744608399546, + "epoch": 0.2469041344328261, "grad_norm": 0.0, - "learning_rate": 1.7625476430112113e-05, - "loss": 1.0793, + "learning_rate": 1.7632448239274328e-05, + "loss": 1.0021, "step": 8713 }, { - "epoch": 0.24727582292849035, + "epoch": 0.24693247187508854, "grad_norm": 0.0, - "learning_rate": 1.7624881816837116e-05, - "loss": 1.0626, + "learning_rate": 1.7631855214871245e-05, + "loss": 0.9988, "step": 8714 }, { - "epoch": 0.24730419977298523, + "epoch": 0.246960809317351, "grad_norm": 0.0, - "learning_rate": 1.76242871391544e-05, - "loss": 0.9073, + "learning_rate": 1.7631262126182215e-05, + "loss": 0.9514, "step": 8715 }, { - "epoch": 0.24733257661748015, + "epoch": 0.24698914675961348, "grad_norm": 0.0, - "learning_rate": 1.7623692397068985e-05, - "loss": 0.9958, + "learning_rate": 1.7630668973212226e-05, + "loss": 0.8931, "step": 8716 }, { - "epoch": 0.24736095346197504, + "epoch": 0.24701748420187594, "grad_norm": 0.0, - "learning_rate": 1.7623097590585902e-05, - "loss": 0.9452, + "learning_rate": 1.7630075755966284e-05, + "loss": 0.9787, "step": 8717 }, { - "epoch": 0.24738933030646992, + "epoch": 0.2470458216441384, "grad_norm": 0.0, - "learning_rate": 1.7622502719710168e-05, - "loss": 1.0641, + "learning_rate": 1.7629482474449373e-05, + "loss": 1.0264, "step": 8718 }, { - "epoch": 0.2474177071509648, + "epoch": 0.24707415908640087, "grad_norm": 0.0, - "learning_rate": 1.762190778444681e-05, - "loss": 0.9488, + "learning_rate": 1.7628889128666503e-05, + "loss": 1.0189, "step": 8719 }, { - "epoch": 0.2474460839954597, + "epoch": 0.2471024965286633, "grad_norm": 0.0, - "learning_rate": 1.762131278480086e-05, - "loss": 1.0462, + "learning_rate": 1.7628295718622666e-05, + "loss": 1.0425, "step": 8720 }, { - "epoch": 0.24747446083995459, + "epoch": 0.24713083397092578, "grad_norm": 0.0, - "learning_rate": 1.7620717720777335e-05, - "loss": 1.0445, + "learning_rate": 1.7627702244322865e-05, + "loss": 0.9296, "step": 8721 }, { - "epoch": 0.2475028376844495, + "epoch": 0.24715917141318824, "grad_norm": 0.0, - "learning_rate": 1.762012259238126e-05, - "loss": 0.9208, + "learning_rate": 1.7627108705772088e-05, + "loss": 1.0456, "step": 8722 }, { - "epoch": 0.2475312145289444, + "epoch": 0.2471875088554507, "grad_norm": 0.0, - "learning_rate": 1.7619527399617676e-05, - "loss": 0.9928, + "learning_rate": 1.7626515102975346e-05, + "loss": 0.9984, "step": 8723 }, { - "epoch": 0.24755959137343928, + "epoch": 0.24721584629771318, "grad_norm": 0.0, - "learning_rate": 1.7618932142491595e-05, - "loss": 1.0656, + "learning_rate": 1.762592143593764e-05, + "loss": 1.0941, "step": 8724 }, { - "epoch": 0.24758796821793416, + "epoch": 0.24724418373997564, "grad_norm": 0.0, - "learning_rate": 1.7618336821008052e-05, - "loss": 1.0432, + "learning_rate": 1.762532770466396e-05, + "loss": 1.0595, "step": 8725 }, { - "epoch": 0.24761634506242905, + "epoch": 0.24727252118223808, "grad_norm": 0.0, - "learning_rate": 1.761774143517208e-05, - "loss": 0.9618, + "learning_rate": 1.7624733909159312e-05, + "loss": 1.036, "step": 8726 }, { - "epoch": 0.24764472190692394, + "epoch": 0.24730085862450055, "grad_norm": 0.0, - "learning_rate": 1.76171459849887e-05, - "loss": 1.1056, + "learning_rate": 1.7624140049428705e-05, + "loss": 0.9857, "step": 8727 }, { - "epoch": 0.24767309875141885, + "epoch": 0.247329196066763, "grad_norm": 0.0, - "learning_rate": 1.7616550470462946e-05, - "loss": 0.809, + "learning_rate": 1.762354612547713e-05, + "loss": 1.0226, "step": 8728 }, { - "epoch": 0.24770147559591374, + "epoch": 0.24735753350902548, "grad_norm": 0.0, - "learning_rate": 1.7615954891599846e-05, - "loss": 1.0281, + "learning_rate": 1.7622952137309596e-05, + "loss": 0.9582, "step": 8729 }, { - "epoch": 0.24772985244040863, + "epoch": 0.24738587095128795, "grad_norm": 0.0, - "learning_rate": 1.7615359248404435e-05, - "loss": 1.0513, + "learning_rate": 1.7622358084931107e-05, + "loss": 0.9609, "step": 8730 }, { - "epoch": 0.24775822928490351, + "epoch": 0.2474142083935504, "grad_norm": 0.0, - "learning_rate": 1.761476354088174e-05, - "loss": 1.0391, + "learning_rate": 1.7621763968346663e-05, + "loss": 0.9624, "step": 8731 }, { - "epoch": 0.2477866061293984, + "epoch": 0.24744254583581285, "grad_norm": 0.0, - "learning_rate": 1.76141677690368e-05, - "loss": 0.9241, + "learning_rate": 1.7621169787561275e-05, + "loss": 1.0211, "step": 8732 }, { - "epoch": 0.24781498297389332, + "epoch": 0.24747088327807532, "grad_norm": 0.0, - "learning_rate": 1.7613571932874638e-05, - "loss": 0.9768, + "learning_rate": 1.7620575542579938e-05, + "loss": 0.8258, "step": 8733 }, { - "epoch": 0.2478433598183882, + "epoch": 0.24749922072033778, "grad_norm": 0.0, - "learning_rate": 1.7612976032400294e-05, - "loss": 0.8853, + "learning_rate": 1.761998123340767e-05, + "loss": 1.0601, "step": 8734 }, { - "epoch": 0.2478717366628831, + "epoch": 0.24752755816260025, "grad_norm": 0.0, - "learning_rate": 1.7612380067618796e-05, - "loss": 0.9733, + "learning_rate": 1.7619386860049466e-05, + "loss": 1.0703, "step": 8735 }, { - "epoch": 0.24790011350737798, + "epoch": 0.2475558956048627, "grad_norm": 0.0, - "learning_rate": 1.7611784038535184e-05, - "loss": 0.9558, + "learning_rate": 1.7618792422510337e-05, + "loss": 0.9662, "step": 8736 }, { - "epoch": 0.24792849035187287, + "epoch": 0.24758423304712518, "grad_norm": 0.0, - "learning_rate": 1.761118794515449e-05, - "loss": 1.0505, + "learning_rate": 1.7618197920795292e-05, + "loss": 0.9862, "step": 8737 }, { - "epoch": 0.24795686719636775, + "epoch": 0.24761257048938762, "grad_norm": 0.0, - "learning_rate": 1.761059178748175e-05, - "loss": 1.0649, + "learning_rate": 1.7617603354909336e-05, + "loss": 0.9096, "step": 8738 }, { - "epoch": 0.24798524404086267, + "epoch": 0.24764090793165008, "grad_norm": 0.0, - "learning_rate": 1.7609995565521997e-05, - "loss": 0.8604, + "learning_rate": 1.761700872485748e-05, + "loss": 0.98, "step": 8739 }, { - "epoch": 0.24801362088535756, + "epoch": 0.24766924537391255, "grad_norm": 0.0, - "learning_rate": 1.760939927928027e-05, - "loss": 0.9388, + "learning_rate": 1.761641403064473e-05, + "loss": 0.9981, "step": 8740 }, { - "epoch": 0.24804199772985244, + "epoch": 0.24769758281617502, "grad_norm": 0.0, - "learning_rate": 1.7608802928761607e-05, - "loss": 0.9746, + "learning_rate": 1.7615819272276095e-05, + "loss": 0.9092, "step": 8741 }, { - "epoch": 0.24807037457434733, + "epoch": 0.24772592025843748, "grad_norm": 0.0, - "learning_rate": 1.760820651397104e-05, - "loss": 0.9803, + "learning_rate": 1.7615224449756588e-05, + "loss": 1.0997, "step": 8742 }, { - "epoch": 0.24809875141884222, + "epoch": 0.24775425770069995, "grad_norm": 0.0, - "learning_rate": 1.7607610034913612e-05, - "loss": 0.9309, + "learning_rate": 1.761462956309122e-05, + "loss": 0.9086, "step": 8743 }, { - "epoch": 0.2481271282633371, + "epoch": 0.2477825951429624, "grad_norm": 0.0, - "learning_rate": 1.7607013491594363e-05, - "loss": 0.8969, + "learning_rate": 1.761403461228499e-05, + "loss": 0.8421, "step": 8744 }, { - "epoch": 0.24815550510783202, + "epoch": 0.24781093258522485, "grad_norm": 0.0, - "learning_rate": 1.7606416884018324e-05, - "loss": 1.037, + "learning_rate": 1.7613439597342928e-05, + "loss": 0.8884, "step": 8745 }, { - "epoch": 0.2481838819523269, + "epoch": 0.24783927002748732, "grad_norm": 0.0, - "learning_rate": 1.7605820212190547e-05, - "loss": 1.0164, + "learning_rate": 1.7612844518270033e-05, + "loss": 0.9792, "step": 8746 }, { - "epoch": 0.2482122587968218, + "epoch": 0.24786760746974978, "grad_norm": 0.0, - "learning_rate": 1.7605223476116056e-05, - "loss": 0.9858, + "learning_rate": 1.7612249375071323e-05, + "loss": 1.0127, "step": 8747 }, { - "epoch": 0.24824063564131668, + "epoch": 0.24789594491201225, "grad_norm": 0.0, - "learning_rate": 1.7604626675799906e-05, - "loss": 1.0216, + "learning_rate": 1.761165416775181e-05, + "loss": 0.9402, "step": 8748 }, { - "epoch": 0.24826901248581157, + "epoch": 0.24792428235427472, "grad_norm": 0.0, - "learning_rate": 1.7604029811247132e-05, - "loss": 1.1393, + "learning_rate": 1.761105889631651e-05, + "loss": 0.8768, "step": 8749 }, { - "epoch": 0.24829738933030646, + "epoch": 0.24795261979653715, "grad_norm": 0.0, - "learning_rate": 1.7603432882462773e-05, - "loss": 1.1622, + "learning_rate": 1.761046356077043e-05, + "loss": 0.9391, "step": 8750 }, { - "epoch": 0.24832576617480137, + "epoch": 0.24798095723879962, "grad_norm": 0.0, - "learning_rate": 1.7602835889451875e-05, - "loss": 0.9677, + "learning_rate": 1.760986816111859e-05, + "loss": 0.9428, "step": 8751 }, { - "epoch": 0.24835414301929626, + "epoch": 0.2480092946810621, "grad_norm": 0.0, - "learning_rate": 1.7602238832219486e-05, - "loss": 0.9548, + "learning_rate": 1.7609272697366008e-05, + "loss": 0.9738, "step": 8752 }, { - "epoch": 0.24838251986379115, + "epoch": 0.24803763212332455, "grad_norm": 0.0, - "learning_rate": 1.760164171077064e-05, - "loss": 0.9413, + "learning_rate": 1.7608677169517693e-05, + "loss": 1.0581, "step": 8753 }, { - "epoch": 0.24841089670828603, + "epoch": 0.24806596956558702, "grad_norm": 0.0, - "learning_rate": 1.7601044525110385e-05, - "loss": 1.0564, + "learning_rate": 1.7608081577578665e-05, + "loss": 1.0092, "step": 8754 }, { - "epoch": 0.24843927355278092, + "epoch": 0.24809430700784948, "grad_norm": 0.0, - "learning_rate": 1.7600447275243766e-05, - "loss": 1.0289, + "learning_rate": 1.7607485921553943e-05, + "loss": 0.9015, "step": 8755 }, { - "epoch": 0.24846765039727584, + "epoch": 0.24812264445011192, "grad_norm": 0.0, - "learning_rate": 1.7599849961175825e-05, - "loss": 1.011, + "learning_rate": 1.760689020144854e-05, + "loss": 0.9581, "step": 8756 }, { - "epoch": 0.24849602724177072, + "epoch": 0.2481509818923744, "grad_norm": 0.0, - "learning_rate": 1.7599252582911613e-05, - "loss": 0.9802, + "learning_rate": 1.760629441726748e-05, + "loss": 0.9992, "step": 8757 }, { - "epoch": 0.2485244040862656, + "epoch": 0.24817931933463686, "grad_norm": 0.0, - "learning_rate": 1.759865514045617e-05, - "loss": 0.9528, + "learning_rate": 1.7605698569015773e-05, + "loss": 0.8892, "step": 8758 }, { - "epoch": 0.2485527809307605, + "epoch": 0.24820765677689932, "grad_norm": 0.0, - "learning_rate": 1.7598057633814547e-05, - "loss": 1.0711, + "learning_rate": 1.7605102656698444e-05, + "loss": 1.0689, "step": 8759 }, { - "epoch": 0.24858115777525538, + "epoch": 0.2482359942191618, "grad_norm": 0.0, - "learning_rate": 1.759746006299179e-05, - "loss": 1.0264, + "learning_rate": 1.7604506680320512e-05, + "loss": 1.056, "step": 8760 }, { - "epoch": 0.24860953461975027, + "epoch": 0.24826433166142425, "grad_norm": 0.0, - "learning_rate": 1.7596862427992945e-05, - "loss": 0.8923, + "learning_rate": 1.7603910639886998e-05, + "loss": 0.9668, "step": 8761 }, { - "epoch": 0.2486379114642452, + "epoch": 0.2482926691036867, "grad_norm": 0.0, - "learning_rate": 1.7596264728823063e-05, - "loss": 0.9815, + "learning_rate": 1.7603314535402915e-05, + "loss": 0.9494, "step": 8762 }, { - "epoch": 0.24866628830874007, + "epoch": 0.24832100654594916, "grad_norm": 0.0, - "learning_rate": 1.759566696548719e-05, - "loss": 0.9076, + "learning_rate": 1.7602718366873296e-05, + "loss": 1.0024, "step": 8763 }, { - "epoch": 0.24869466515323496, + "epoch": 0.24834934398821162, "grad_norm": 0.0, - "learning_rate": 1.7595069137990377e-05, - "loss": 0.9312, + "learning_rate": 1.7602122134303154e-05, + "loss": 0.9982, "step": 8764 }, { - "epoch": 0.24872304199772985, + "epoch": 0.2483776814304741, "grad_norm": 0.0, - "learning_rate": 1.7594471246337676e-05, - "loss": 1.0101, + "learning_rate": 1.7601525837697515e-05, + "loss": 1.0692, "step": 8765 }, { - "epoch": 0.24875141884222474, + "epoch": 0.24840601887273656, "grad_norm": 0.0, - "learning_rate": 1.7593873290534128e-05, - "loss": 1.0176, + "learning_rate": 1.7600929477061403e-05, + "loss": 1.0563, "step": 8766 }, { - "epoch": 0.24877979568671962, + "epoch": 0.24843435631499902, "grad_norm": 0.0, - "learning_rate": 1.75932752705848e-05, - "loss": 0.9065, + "learning_rate": 1.760033305239984e-05, + "loss": 1.128, "step": 8767 }, { - "epoch": 0.24880817253121454, + "epoch": 0.24846269375726146, "grad_norm": 0.0, - "learning_rate": 1.759267718649473e-05, - "loss": 0.9375, + "learning_rate": 1.7599736563717847e-05, + "loss": 0.8914, "step": 8768 }, { - "epoch": 0.24883654937570943, + "epoch": 0.24849103119952393, "grad_norm": 0.0, - "learning_rate": 1.7592079038268975e-05, - "loss": 0.9883, + "learning_rate": 1.759914001102045e-05, + "loss": 0.9995, "step": 8769 }, { - "epoch": 0.2488649262202043, + "epoch": 0.2485193686417864, "grad_norm": 0.0, - "learning_rate": 1.7591480825912585e-05, - "loss": 0.9867, + "learning_rate": 1.7598543394312674e-05, + "loss": 0.9242, "step": 8770 }, { - "epoch": 0.2488933030646992, + "epoch": 0.24854770608404886, "grad_norm": 0.0, - "learning_rate": 1.7590882549430617e-05, - "loss": 0.8681, + "learning_rate": 1.759794671359955e-05, + "loss": 0.9958, "step": 8771 }, { - "epoch": 0.2489216799091941, + "epoch": 0.24857604352631132, "grad_norm": 0.0, - "learning_rate": 1.7590284208828122e-05, - "loss": 0.96, + "learning_rate": 1.7597349968886096e-05, + "loss": 0.8764, "step": 8772 }, { - "epoch": 0.248950056753689, + "epoch": 0.24860438096857376, "grad_norm": 0.0, - "learning_rate": 1.7589685804110157e-05, - "loss": 0.8923, + "learning_rate": 1.759675316017734e-05, + "loss": 0.9525, "step": 8773 }, { - "epoch": 0.2489784335981839, + "epoch": 0.24863271841083623, "grad_norm": 0.0, - "learning_rate": 1.7589087335281775e-05, - "loss": 1.0305, + "learning_rate": 1.7596156287478316e-05, + "loss": 0.9803, "step": 8774 }, { - "epoch": 0.24900681044267878, + "epoch": 0.2486610558530987, "grad_norm": 0.0, - "learning_rate": 1.758848880234803e-05, - "loss": 1.0048, + "learning_rate": 1.7595559350794046e-05, + "loss": 0.9064, "step": 8775 }, { - "epoch": 0.24903518728717366, + "epoch": 0.24868939329536116, "grad_norm": 0.0, - "learning_rate": 1.7587890205313977e-05, - "loss": 0.9798, + "learning_rate": 1.7594962350129555e-05, + "loss": 0.8684, "step": 8776 }, { - "epoch": 0.24906356413166855, + "epoch": 0.24871773073762363, "grad_norm": 0.0, - "learning_rate": 1.7587291544184678e-05, - "loss": 0.8959, + "learning_rate": 1.759436528548988e-05, + "loss": 1.1658, "step": 8777 }, { - "epoch": 0.24909194097616344, + "epoch": 0.2487460681798861, "grad_norm": 0.0, - "learning_rate": 1.7586692818965183e-05, - "loss": 1.0164, + "learning_rate": 1.7593768156880043e-05, + "loss": 0.9651, "step": 8778 }, { - "epoch": 0.24912031782065835, + "epoch": 0.24877440562214853, "grad_norm": 0.0, - "learning_rate": 1.758609402966055e-05, - "loss": 0.9214, + "learning_rate": 1.759317096430508e-05, + "loss": 0.9328, "step": 8779 }, { - "epoch": 0.24914869466515324, + "epoch": 0.248802743064411, "grad_norm": 0.0, - "learning_rate": 1.758549517627585e-05, - "loss": 0.966, + "learning_rate": 1.759257370777002e-05, + "loss": 1.0492, "step": 8780 }, { - "epoch": 0.24917707150964813, + "epoch": 0.24883108050667346, "grad_norm": 0.0, - "learning_rate": 1.7584896258816122e-05, - "loss": 0.9548, + "learning_rate": 1.7591976387279887e-05, + "loss": 0.9515, "step": 8781 }, { - "epoch": 0.24920544835414302, + "epoch": 0.24885941794893593, "grad_norm": 0.0, - "learning_rate": 1.7584297277286436e-05, - "loss": 1.0595, + "learning_rate": 1.759137900283972e-05, + "loss": 0.9738, "step": 8782 }, { - "epoch": 0.2492338251986379, + "epoch": 0.2488877553911984, "grad_norm": 0.0, - "learning_rate": 1.7583698231691856e-05, - "loss": 1.0653, + "learning_rate": 1.759078155445455e-05, + "loss": 0.9691, "step": 8783 }, { - "epoch": 0.2492622020431328, + "epoch": 0.24891609283346086, "grad_norm": 0.0, - "learning_rate": 1.758309912203743e-05, - "loss": 0.9593, + "learning_rate": 1.7590184042129406e-05, + "loss": 1.0466, "step": 8784 }, { - "epoch": 0.2492905788876277, + "epoch": 0.2489444302757233, "grad_norm": 0.0, - "learning_rate": 1.7582499948328225e-05, - "loss": 1.0041, + "learning_rate": 1.7589586465869324e-05, + "loss": 1.037, "step": 8785 }, { - "epoch": 0.2493189557321226, + "epoch": 0.24897276771798577, "grad_norm": 0.0, - "learning_rate": 1.75819007105693e-05, - "loss": 0.9015, + "learning_rate": 1.7588988825679336e-05, + "loss": 0.8962, "step": 8786 }, { - "epoch": 0.24934733257661748, + "epoch": 0.24900110516024823, "grad_norm": 0.0, - "learning_rate": 1.7581301408765727e-05, - "loss": 0.9682, + "learning_rate": 1.758839112156448e-05, + "loss": 0.9132, "step": 8787 }, { - "epoch": 0.24937570942111237, + "epoch": 0.2490294426025107, "grad_norm": 0.0, - "learning_rate": 1.7580702042922556e-05, - "loss": 0.9829, + "learning_rate": 1.7587793353529786e-05, + "loss": 1.0358, "step": 8788 }, { - "epoch": 0.24940408626560726, + "epoch": 0.24905778004477316, "grad_norm": 0.0, - "learning_rate": 1.7580102613044853e-05, - "loss": 0.9566, + "learning_rate": 1.758719552158029e-05, + "loss": 0.9382, "step": 8789 }, { - "epoch": 0.24943246311010214, + "epoch": 0.24908611748703563, "grad_norm": 0.0, - "learning_rate": 1.7579503119137683e-05, - "loss": 1.055, + "learning_rate": 1.7586597625721026e-05, + "loss": 0.9558, "step": 8790 }, { - "epoch": 0.24946083995459706, + "epoch": 0.24911445492929807, "grad_norm": 0.0, - "learning_rate": 1.7578903561206108e-05, - "loss": 0.9541, + "learning_rate": 1.758599966595704e-05, + "loss": 0.8992, "step": 8791 }, { - "epoch": 0.24948921679909195, + "epoch": 0.24914279237156053, "grad_norm": 0.0, - "learning_rate": 1.7578303939255197e-05, - "loss": 0.9613, + "learning_rate": 1.7585401642293356e-05, + "loss": 0.9956, "step": 8792 }, { - "epoch": 0.24951759364358683, + "epoch": 0.249171129813823, "grad_norm": 0.0, - "learning_rate": 1.7577704253290007e-05, - "loss": 0.9897, + "learning_rate": 1.758480355473502e-05, + "loss": 1.0311, "step": 8793 }, { - "epoch": 0.24954597048808172, + "epoch": 0.24919946725608547, "grad_norm": 0.0, - "learning_rate": 1.7577104503315613e-05, - "loss": 0.9136, + "learning_rate": 1.7584205403287064e-05, + "loss": 1.0103, "step": 8794 }, { - "epoch": 0.2495743473325766, + "epoch": 0.24922780469834793, "grad_norm": 0.0, - "learning_rate": 1.7576504689337074e-05, - "loss": 0.9767, + "learning_rate": 1.7583607187954532e-05, + "loss": 0.9628, "step": 8795 }, { - "epoch": 0.24960272417707152, + "epoch": 0.2492561421406104, "grad_norm": 0.0, - "learning_rate": 1.7575904811359463e-05, - "loss": 0.9122, + "learning_rate": 1.758300890874246e-05, + "loss": 0.9415, "step": 8796 }, { - "epoch": 0.2496311010215664, + "epoch": 0.24928447958287284, "grad_norm": 0.0, - "learning_rate": 1.7575304869387843e-05, - "loss": 1.0441, + "learning_rate": 1.758241056565589e-05, + "loss": 0.8233, "step": 8797 }, { - "epoch": 0.2496594778660613, + "epoch": 0.2493128170251353, "grad_norm": 0.0, - "learning_rate": 1.7574704863427277e-05, - "loss": 1.0068, + "learning_rate": 1.758181215869986e-05, + "loss": 0.9835, "step": 8798 }, { - "epoch": 0.24968785471055618, + "epoch": 0.24934115446739777, "grad_norm": 0.0, - "learning_rate": 1.7574104793482846e-05, - "loss": 0.9509, + "learning_rate": 1.758121368787941e-05, + "loss": 1.1035, "step": 8799 }, { - "epoch": 0.24971623155505107, + "epoch": 0.24936949190966023, "grad_norm": 0.0, - "learning_rate": 1.7573504659559604e-05, - "loss": 0.9482, + "learning_rate": 1.758061515319958e-05, + "loss": 0.9125, "step": 8800 }, { - "epoch": 0.24974460839954596, + "epoch": 0.2493978293519227, "grad_norm": 0.0, - "learning_rate": 1.7572904461662633e-05, - "loss": 0.9083, + "learning_rate": 1.7580016554665412e-05, + "loss": 1.0477, "step": 8801 }, { - "epoch": 0.24977298524404087, + "epoch": 0.24942616679418517, "grad_norm": 0.0, - "learning_rate": 1.7572304199796995e-05, - "loss": 0.9618, + "learning_rate": 1.7579417892281955e-05, + "loss": 0.9566, "step": 8802 }, { - "epoch": 0.24980136208853576, + "epoch": 0.2494545042364476, "grad_norm": 0.0, - "learning_rate": 1.7571703873967766e-05, - "loss": 0.9263, + "learning_rate": 1.7578819166054244e-05, + "loss": 0.9155, "step": 8803 }, { - "epoch": 0.24982973893303065, + "epoch": 0.24948284167871007, "grad_norm": 0.0, - "learning_rate": 1.7571103484180008e-05, - "loss": 0.9186, + "learning_rate": 1.7578220375987326e-05, + "loss": 1.0193, "step": 8804 }, { - "epoch": 0.24985811577752554, + "epoch": 0.24951117912097254, "grad_norm": 0.0, - "learning_rate": 1.7570503030438802e-05, - "loss": 0.9784, + "learning_rate": 1.757762152208624e-05, + "loss": 1.0933, "step": 8805 }, { - "epoch": 0.24988649262202042, + "epoch": 0.249539516563235, "grad_norm": 0.0, - "learning_rate": 1.756990251274922e-05, - "loss": 0.9687, + "learning_rate": 1.757702260435604e-05, + "loss": 0.9804, "step": 8806 }, { - "epoch": 0.2499148694665153, + "epoch": 0.24956785400549747, "grad_norm": 0.0, - "learning_rate": 1.7569301931116326e-05, - "loss": 0.9389, + "learning_rate": 1.757642362280176e-05, + "loss": 0.9328, "step": 8807 }, { - "epoch": 0.24994324631101023, + "epoch": 0.24959619144775994, "grad_norm": 0.0, - "learning_rate": 1.75687012855452e-05, - "loss": 0.963, + "learning_rate": 1.7575824577428453e-05, + "loss": 0.9536, "step": 8808 }, { - "epoch": 0.2499716231555051, + "epoch": 0.24962452889002237, "grad_norm": 0.0, - "learning_rate": 1.7568100576040916e-05, - "loss": 1.0299, + "learning_rate": 1.757522546824116e-05, + "loss": 0.984, "step": 8809 }, { - "epoch": 0.25, + "epoch": 0.24965286633228484, "grad_norm": 0.0, - "learning_rate": 1.7567499802608544e-05, - "loss": 0.8824, + "learning_rate": 1.7574626295244935e-05, + "loss": 1.0624, "step": 8810 }, { - "epoch": 0.2500283768444949, + "epoch": 0.2496812037745473, "grad_norm": 0.0, - "learning_rate": 1.756689896525316e-05, - "loss": 1.0324, + "learning_rate": 1.7574027058444815e-05, + "loss": 0.9753, "step": 8811 }, { - "epoch": 0.2500567536889898, + "epoch": 0.24970954121680977, "grad_norm": 0.0, - "learning_rate": 1.7566298063979844e-05, - "loss": 1.0819, + "learning_rate": 1.757342775784585e-05, + "loss": 0.9726, "step": 8812 }, { - "epoch": 0.2500851305334847, + "epoch": 0.24973787865907224, "grad_norm": 0.0, - "learning_rate": 1.7565697098793663e-05, - "loss": 0.9228, + "learning_rate": 1.7572828393453098e-05, + "loss": 0.9379, "step": 8813 }, { - "epoch": 0.25011350737797955, + "epoch": 0.2497662161013347, "grad_norm": 0.0, - "learning_rate": 1.7565096069699703e-05, - "loss": 1.026, + "learning_rate": 1.7572228965271595e-05, + "loss": 0.954, "step": 8814 }, { - "epoch": 0.25014188422247446, + "epoch": 0.24979455354359714, "grad_norm": 0.0, - "learning_rate": 1.7564494976703034e-05, - "loss": 0.9896, + "learning_rate": 1.75716294733064e-05, + "loss": 0.9212, "step": 8815 }, { - "epoch": 0.2501702610669694, + "epoch": 0.2498228909858596, "grad_norm": 0.0, - "learning_rate": 1.7563893819808736e-05, - "loss": 1.0139, + "learning_rate": 1.7571029917562553e-05, + "loss": 0.9776, "step": 8816 }, { - "epoch": 0.25019863791146424, + "epoch": 0.24985122842812207, "grad_norm": 0.0, - "learning_rate": 1.756329259902189e-05, - "loss": 0.8813, + "learning_rate": 1.7570430298045113e-05, + "loss": 0.9688, "step": 8817 }, { - "epoch": 0.25022701475595915, + "epoch": 0.24987956587038454, "grad_norm": 0.0, - "learning_rate": 1.7562691314347566e-05, - "loss": 0.9969, + "learning_rate": 1.7569830614759126e-05, + "loss": 1.076, "step": 8818 }, { - "epoch": 0.250255391600454, + "epoch": 0.249907903312647, "grad_norm": 0.0, - "learning_rate": 1.7562089965790855e-05, - "loss": 0.8759, + "learning_rate": 1.7569230867709648e-05, + "loss": 0.9628, "step": 8819 }, { - "epoch": 0.25028376844494893, + "epoch": 0.24993624075490947, "grad_norm": 0.0, - "learning_rate": 1.7561488553356824e-05, - "loss": 1.018, + "learning_rate": 1.7568631056901723e-05, + "loss": 1.0044, "step": 8820 }, { - "epoch": 0.2503121452894438, + "epoch": 0.2499645781971719, "grad_norm": 0.0, - "learning_rate": 1.756088707705056e-05, - "loss": 0.8551, + "learning_rate": 1.756803118234041e-05, + "loss": 0.9566, "step": 8821 }, { - "epoch": 0.2503405221339387, + "epoch": 0.24999291563943438, "grad_norm": 0.0, - "learning_rate": 1.7560285536877147e-05, - "loss": 0.9184, + "learning_rate": 1.756743124403076e-05, + "loss": 1.084, "step": 8822 }, { - "epoch": 0.2503688989784336, + "epoch": 0.25002125308169687, "grad_norm": 0.0, - "learning_rate": 1.755968393284166e-05, - "loss": 0.8664, + "learning_rate": 1.756683124197783e-05, + "loss": 1.0502, "step": 8823 }, { - "epoch": 0.2503972758229285, + "epoch": 0.2500495905239593, "grad_norm": 0.0, - "learning_rate": 1.7559082264949182e-05, - "loss": 0.9549, + "learning_rate": 1.7566231176186664e-05, + "loss": 1.0301, "step": 8824 }, { - "epoch": 0.2504256526674234, + "epoch": 0.25007792796622175, "grad_norm": 0.0, - "learning_rate": 1.75584805332048e-05, - "loss": 1.0728, + "learning_rate": 1.7565631046662328e-05, + "loss": 0.9993, "step": 8825 }, { - "epoch": 0.25045402951191825, + "epoch": 0.25010626540848424, "grad_norm": 0.0, - "learning_rate": 1.755787873761359e-05, - "loss": 0.9756, + "learning_rate": 1.756503085340987e-05, + "loss": 0.9434, "step": 8826 }, { - "epoch": 0.25048240635641317, + "epoch": 0.2501346028507467, "grad_norm": 0.0, - "learning_rate": 1.755727687818064e-05, - "loss": 1.0006, + "learning_rate": 1.7564430596434346e-05, + "loss": 0.9082, "step": 8827 }, { - "epoch": 0.2505107832009081, + "epoch": 0.2501629402930092, "grad_norm": 0.0, - "learning_rate": 1.755667495491103e-05, - "loss": 0.9063, + "learning_rate": 1.756383027574082e-05, + "loss": 1.0791, "step": 8828 }, { - "epoch": 0.25053916004540294, + "epoch": 0.2501912777352716, "grad_norm": 0.0, - "learning_rate": 1.755607296780985e-05, - "loss": 0.9843, + "learning_rate": 1.756322989133434e-05, + "loss": 0.9465, "step": 8829 }, { - "epoch": 0.25056753688989786, + "epoch": 0.25021961517753405, "grad_norm": 0.0, - "learning_rate": 1.755547091688218e-05, - "loss": 0.9801, + "learning_rate": 1.7562629443219964e-05, + "loss": 1.0094, "step": 8830 }, { - "epoch": 0.2505959137343927, + "epoch": 0.25024795261979654, "grad_norm": 0.0, - "learning_rate": 1.7554868802133106e-05, - "loss": 1.0599, + "learning_rate": 1.7562028931402754e-05, + "loss": 1.0856, "step": 8831 }, { - "epoch": 0.25062429057888763, + "epoch": 0.250276290062059, "grad_norm": 0.0, - "learning_rate": 1.7554266623567718e-05, - "loss": 1.0281, + "learning_rate": 1.7561428355887765e-05, + "loss": 1.0853, "step": 8832 }, { - "epoch": 0.25065266742338255, + "epoch": 0.2503046275043215, "grad_norm": 0.0, - "learning_rate": 1.75536643811911e-05, - "loss": 0.947, + "learning_rate": 1.756082771668006e-05, + "loss": 0.9202, "step": 8833 }, { - "epoch": 0.2506810442678774, + "epoch": 0.2503329649465839, "grad_norm": 0.0, - "learning_rate": 1.755306207500834e-05, - "loss": 1.0408, + "learning_rate": 1.756022701378469e-05, + "loss": 0.8565, "step": 8834 }, { - "epoch": 0.2507094211123723, + "epoch": 0.2503613023888464, "grad_norm": 0.0, - "learning_rate": 1.7552459705024526e-05, - "loss": 1.0132, + "learning_rate": 1.7559626247206723e-05, + "loss": 0.9851, "step": 8835 }, { - "epoch": 0.2507377979568672, + "epoch": 0.25038963983110885, "grad_norm": 0.0, - "learning_rate": 1.7551857271244744e-05, - "loss": 0.8492, + "learning_rate": 1.755902541695122e-05, + "loss": 0.9071, "step": 8836 }, { - "epoch": 0.2507661748013621, + "epoch": 0.2504179772733713, "grad_norm": 0.0, - "learning_rate": 1.7551254773674083e-05, - "loss": 1.0596, + "learning_rate": 1.7558424523023238e-05, + "loss": 1.0063, "step": 8837 }, { - "epoch": 0.25079455164585696, + "epoch": 0.2504463147156338, "grad_norm": 0.0, - "learning_rate": 1.7550652212317634e-05, - "loss": 0.9611, + "learning_rate": 1.7557823565427838e-05, + "loss": 0.9538, "step": 8838 }, { - "epoch": 0.25082292849035187, + "epoch": 0.2504746521578962, "grad_norm": 0.0, - "learning_rate": 1.7550049587180485e-05, - "loss": 0.7995, + "learning_rate": 1.755722254417008e-05, + "loss": 0.9835, "step": 8839 }, { - "epoch": 0.2508513053348468, + "epoch": 0.2505029896001587, "grad_norm": 0.0, - "learning_rate": 1.754944689826773e-05, - "loss": 0.7791, + "learning_rate": 1.7556621459255038e-05, + "loss": 1.0049, "step": 8840 }, { - "epoch": 0.25087968217934165, + "epoch": 0.25053132704242115, "grad_norm": 0.0, - "learning_rate": 1.754884414558446e-05, - "loss": 0.9638, + "learning_rate": 1.7556020310687762e-05, + "loss": 0.9952, "step": 8841 }, { - "epoch": 0.25090805902383656, + "epoch": 0.2505596644846836, "grad_norm": 0.0, - "learning_rate": 1.754824132913576e-05, - "loss": 1.0816, + "learning_rate": 1.7555419098473323e-05, + "loss": 1.039, "step": 8842 }, { - "epoch": 0.2509364358683314, + "epoch": 0.2505880019269461, "grad_norm": 0.0, - "learning_rate": 1.754763844892673e-05, - "loss": 0.9412, + "learning_rate": 1.7554817822616782e-05, + "loss": 0.8627, "step": 8843 }, { - "epoch": 0.25096481271282634, + "epoch": 0.2506163393692085, "grad_norm": 0.0, - "learning_rate": 1.7547035504962455e-05, - "loss": 0.9378, + "learning_rate": 1.7554216483123205e-05, + "loss": 1.0007, "step": 8844 }, { - "epoch": 0.25099318955732125, + "epoch": 0.250644676811471, "grad_norm": 0.0, - "learning_rate": 1.754643249724803e-05, - "loss": 1.0235, + "learning_rate": 1.755361507999766e-05, + "loss": 1.0656, "step": 8845 }, { - "epoch": 0.2510215664018161, + "epoch": 0.25067301425373345, "grad_norm": 0.0, - "learning_rate": 1.7545829425788555e-05, - "loss": 0.9556, + "learning_rate": 1.7553013613245208e-05, + "loss": 1.0149, "step": 8846 }, { - "epoch": 0.251049943246311, + "epoch": 0.25070135169599594, "grad_norm": 0.0, - "learning_rate": 1.754522629058912e-05, - "loss": 0.9419, + "learning_rate": 1.7552412082870916e-05, + "loss": 0.9451, "step": 8847 }, { - "epoch": 0.2510783200908059, + "epoch": 0.2507296891382584, "grad_norm": 0.0, - "learning_rate": 1.7544623091654817e-05, - "loss": 0.9249, + "learning_rate": 1.7551810488879856e-05, + "loss": 0.9518, "step": 8848 }, { - "epoch": 0.2511066969353008, + "epoch": 0.2507580265805208, "grad_norm": 0.0, - "learning_rate": 1.7544019828990744e-05, - "loss": 1.006, + "learning_rate": 1.7551208831277092e-05, + "loss": 1.033, "step": 8849 }, { - "epoch": 0.2511350737797957, + "epoch": 0.2507863640227833, "grad_norm": 0.0, - "learning_rate": 1.7543416502601994e-05, - "loss": 1.0126, + "learning_rate": 1.7550607110067686e-05, + "loss": 1.0366, "step": 8850 }, { - "epoch": 0.2511634506242906, + "epoch": 0.25081470146504575, "grad_norm": 0.0, - "learning_rate": 1.754281311249367e-05, - "loss": 0.977, + "learning_rate": 1.7550005325256718e-05, + "loss": 0.8785, "step": 8851 }, { - "epoch": 0.2511918274687855, + "epoch": 0.25084303890730825, "grad_norm": 0.0, - "learning_rate": 1.7542209658670862e-05, - "loss": 0.9639, + "learning_rate": 1.7549403476849253e-05, + "loss": 1.007, "step": 8852 }, { - "epoch": 0.25122020431328035, + "epoch": 0.2508713763495707, "grad_norm": 0.0, - "learning_rate": 1.7541606141138672e-05, - "loss": 1.0082, + "learning_rate": 1.754880156485035e-05, + "loss": 1.0218, "step": 8853 }, { - "epoch": 0.25124858115777526, + "epoch": 0.2508997137918331, "grad_norm": 0.0, - "learning_rate": 1.754100255990219e-05, - "loss": 1.0636, + "learning_rate": 1.7548199589265097e-05, + "loss": 1.0333, "step": 8854 }, { - "epoch": 0.2512769580022701, + "epoch": 0.2509280512340956, "grad_norm": 0.0, - "learning_rate": 1.7540398914966528e-05, - "loss": 1.0366, + "learning_rate": 1.7547597550098558e-05, + "loss": 0.9761, "step": 8855 }, { - "epoch": 0.25130533484676504, + "epoch": 0.25095638867635806, "grad_norm": 0.0, - "learning_rate": 1.7539795206336773e-05, - "loss": 1.0555, + "learning_rate": 1.7546995447355795e-05, + "loss": 0.9664, "step": 8856 }, { - "epoch": 0.25133371169125995, + "epoch": 0.25098472611862055, "grad_norm": 0.0, - "learning_rate": 1.7539191434018034e-05, - "loss": 0.9541, + "learning_rate": 1.754639328104189e-05, + "loss": 1.0026, "step": 8857 }, { - "epoch": 0.2513620885357548, + "epoch": 0.251013063560883, "grad_norm": 0.0, - "learning_rate": 1.75385875980154e-05, - "loss": 0.8282, + "learning_rate": 1.754579105116191e-05, + "loss": 1.0254, "step": 8858 }, { - "epoch": 0.25139046538024973, + "epoch": 0.2510414010031455, "grad_norm": 0.0, - "learning_rate": 1.753798369833398e-05, - "loss": 1.0555, + "learning_rate": 1.7545188757720933e-05, + "loss": 0.9786, "step": 8859 }, { - "epoch": 0.2514188422247446, + "epoch": 0.2510697384454079, "grad_norm": 0.0, - "learning_rate": 1.7537379734978876e-05, - "loss": 1.0468, + "learning_rate": 1.754458640072403e-05, + "loss": 0.9319, "step": 8860 }, { - "epoch": 0.2514472190692395, + "epoch": 0.25109807588767036, "grad_norm": 0.0, - "learning_rate": 1.7536775707955182e-05, - "loss": 1.0359, + "learning_rate": 1.7543983980176268e-05, + "loss": 0.9115, "step": 8861 }, { - "epoch": 0.2514755959137344, + "epoch": 0.25112641332993285, "grad_norm": 0.0, - "learning_rate": 1.7536171617268004e-05, - "loss": 0.8684, + "learning_rate": 1.754338149608273e-05, + "loss": 0.9726, "step": 8862 }, { - "epoch": 0.2515039727582293, + "epoch": 0.2511547507721953, "grad_norm": 0.0, - "learning_rate": 1.753556746292245e-05, - "loss": 0.9166, + "learning_rate": 1.754277894844849e-05, + "loss": 0.9039, "step": 8863 }, { - "epoch": 0.2515323496027242, + "epoch": 0.2511830882144578, "grad_norm": 0.0, - "learning_rate": 1.7534963244923618e-05, - "loss": 1.0201, + "learning_rate": 1.7542176337278623e-05, + "loss": 0.9021, "step": 8864 }, { - "epoch": 0.25156072644721905, + "epoch": 0.2512114256567202, "grad_norm": 0.0, - "learning_rate": 1.7534358963276606e-05, - "loss": 1.0473, + "learning_rate": 1.7541573662578205e-05, + "loss": 1.1199, "step": 8865 }, { - "epoch": 0.25158910329171397, + "epoch": 0.25123976309898266, "grad_norm": 0.0, - "learning_rate": 1.7533754617986533e-05, - "loss": 0.9533, + "learning_rate": 1.754097092435231e-05, + "loss": 0.8827, "step": 8866 }, { - "epoch": 0.2516174801362088, + "epoch": 0.25126810054124515, "grad_norm": 0.0, - "learning_rate": 1.7533150209058492e-05, - "loss": 1.0204, + "learning_rate": 1.754036812260601e-05, + "loss": 0.9763, "step": 8867 }, { - "epoch": 0.25164585698070374, + "epoch": 0.2512964379835076, "grad_norm": 0.0, - "learning_rate": 1.7532545736497596e-05, - "loss": 1.0272, + "learning_rate": 1.75397652573444e-05, + "loss": 1.0322, "step": 8868 }, { - "epoch": 0.25167423382519866, + "epoch": 0.2513247754257701, "grad_norm": 0.0, - "learning_rate": 1.7531941200308946e-05, - "loss": 0.9093, + "learning_rate": 1.7539162328572543e-05, + "loss": 1.0536, "step": 8869 }, { - "epoch": 0.2517026106696935, + "epoch": 0.2513531128680325, "grad_norm": 0.0, - "learning_rate": 1.753133660049765e-05, - "loss": 0.988, + "learning_rate": 1.7538559336295522e-05, + "loss": 0.982, "step": 8870 }, { - "epoch": 0.25173098751418843, + "epoch": 0.251381450310295, "grad_norm": 0.0, - "learning_rate": 1.7530731937068815e-05, - "loss": 0.9501, + "learning_rate": 1.7537956280518417e-05, + "loss": 1.0329, "step": 8871 }, { - "epoch": 0.2517593643586833, + "epoch": 0.25140978775255746, "grad_norm": 0.0, - "learning_rate": 1.753012721002755e-05, - "loss": 0.9921, + "learning_rate": 1.7537353161246305e-05, + "loss": 0.8557, "step": 8872 }, { - "epoch": 0.2517877412031782, + "epoch": 0.2514381251948199, "grad_norm": 0.0, - "learning_rate": 1.752952241937896e-05, - "loss": 0.8772, + "learning_rate": 1.753674997848427e-05, + "loss": 1.027, "step": 8873 }, { - "epoch": 0.2518161180476731, + "epoch": 0.2514664626370824, "grad_norm": 0.0, - "learning_rate": 1.7528917565128155e-05, - "loss": 0.8755, + "learning_rate": 1.753614673223739e-05, + "loss": 0.9955, "step": 8874 }, { - "epoch": 0.251844494892168, + "epoch": 0.2514948000793448, "grad_norm": 0.0, - "learning_rate": 1.752831264728025e-05, - "loss": 0.9656, + "learning_rate": 1.753554342251075e-05, + "loss": 1.0342, "step": 8875 }, { - "epoch": 0.2518728717366629, + "epoch": 0.2515231375216073, "grad_norm": 0.0, - "learning_rate": 1.7527707665840346e-05, - "loss": 0.947, + "learning_rate": 1.7534940049309427e-05, + "loss": 0.9392, "step": 8876 }, { - "epoch": 0.25190124858115776, + "epoch": 0.25155147496386976, "grad_norm": 0.0, - "learning_rate": 1.7527102620813553e-05, - "loss": 1.0178, + "learning_rate": 1.7534336612638508e-05, + "loss": 0.9722, "step": 8877 }, { - "epoch": 0.25192962542565267, + "epoch": 0.2515798124061322, "grad_norm": 0.0, - "learning_rate": 1.752649751220499e-05, - "loss": 0.8695, + "learning_rate": 1.7533733112503073e-05, + "loss": 1.0148, "step": 8878 }, { - "epoch": 0.2519580022701476, + "epoch": 0.2516081498483947, "grad_norm": 0.0, - "learning_rate": 1.7525892340019766e-05, - "loss": 0.9932, + "learning_rate": 1.7533129548908205e-05, + "loss": 0.9904, "step": 8879 }, { - "epoch": 0.25198637911464244, + "epoch": 0.25163648729065713, "grad_norm": 0.0, - "learning_rate": 1.7525287104262987e-05, - "loss": 0.8247, + "learning_rate": 1.7532525921858988e-05, + "loss": 0.9356, "step": 8880 }, { - "epoch": 0.25201475595913736, + "epoch": 0.2516648247329196, "grad_norm": 0.0, - "learning_rate": 1.752468180493977e-05, - "loss": 1.0127, + "learning_rate": 1.7531922231360515e-05, + "loss": 0.8821, "step": 8881 }, { - "epoch": 0.2520431328036322, + "epoch": 0.25169316217518206, "grad_norm": 0.0, - "learning_rate": 1.752407644205523e-05, - "loss": 1.0518, + "learning_rate": 1.7531318477417858e-05, + "loss": 1.0133, "step": 8882 }, { - "epoch": 0.25207150964812713, + "epoch": 0.25172149961744456, "grad_norm": 0.0, - "learning_rate": 1.752347101561448e-05, - "loss": 0.9966, + "learning_rate": 1.7530714660036112e-05, + "loss": 0.9321, "step": 8883 }, { - "epoch": 0.252099886492622, + "epoch": 0.251749837059707, "grad_norm": 0.0, - "learning_rate": 1.7522865525622628e-05, - "loss": 1.0996, + "learning_rate": 1.7530110779220358e-05, + "loss": 0.9274, "step": 8884 }, { - "epoch": 0.2521282633371169, + "epoch": 0.25177817450196943, "grad_norm": 0.0, - "learning_rate": 1.7522259972084793e-05, - "loss": 0.8813, + "learning_rate": 1.7529506834975686e-05, + "loss": 1.0283, "step": 8885 }, { - "epoch": 0.2521566401816118, + "epoch": 0.2518065119442319, "grad_norm": 0.0, - "learning_rate": 1.7521654355006094e-05, - "loss": 0.9245, + "learning_rate": 1.752890282730718e-05, + "loss": 1.0468, "step": 8886 }, { - "epoch": 0.2521850170261067, + "epoch": 0.25183484938649436, "grad_norm": 0.0, - "learning_rate": 1.752104867439164e-05, - "loss": 0.9632, + "learning_rate": 1.752829875621993e-05, + "loss": 1.0485, "step": 8887 }, { - "epoch": 0.2522133938706016, + "epoch": 0.25186318682875686, "grad_norm": 0.0, - "learning_rate": 1.7520442930246546e-05, - "loss": 0.9857, + "learning_rate": 1.7527694621719024e-05, + "loss": 0.9546, "step": 8888 }, { - "epoch": 0.25224177071509646, + "epoch": 0.2518915242710193, "grad_norm": 0.0, - "learning_rate": 1.7519837122575936e-05, - "loss": 1.0023, + "learning_rate": 1.7527090423809553e-05, + "loss": 1.0159, "step": 8889 }, { - "epoch": 0.2522701475595914, + "epoch": 0.25191986171328173, "grad_norm": 0.0, - "learning_rate": 1.7519231251384923e-05, - "loss": 1.054, + "learning_rate": 1.7526486162496604e-05, + "loss": 1.016, "step": 8890 }, { - "epoch": 0.2522985244040863, + "epoch": 0.25194819915554423, "grad_norm": 0.0, - "learning_rate": 1.751862531667863e-05, - "loss": 0.9589, + "learning_rate": 1.7525881837785264e-05, + "loss": 0.9186, "step": 8891 }, { - "epoch": 0.25232690124858115, + "epoch": 0.25197653659780667, "grad_norm": 0.0, - "learning_rate": 1.751801931846216e-05, - "loss": 1.0415, + "learning_rate": 1.752527744968063e-05, + "loss": 0.9149, "step": 8892 }, { - "epoch": 0.25235527809307606, + "epoch": 0.25200487404006916, "grad_norm": 0.0, - "learning_rate": 1.751741325674065e-05, - "loss": 1.097, + "learning_rate": 1.7524672998187788e-05, + "loss": 1.0427, "step": 8893 }, { - "epoch": 0.2523836549375709, + "epoch": 0.2520332114823316, "grad_norm": 0.0, - "learning_rate": 1.7516807131519216e-05, - "loss": 0.989, + "learning_rate": 1.7524068483311832e-05, + "loss": 0.9424, "step": 8894 }, { - "epoch": 0.25241203178206584, + "epoch": 0.2520615489245941, "grad_norm": 0.0, - "learning_rate": 1.751620094280297e-05, - "loss": 0.9121, + "learning_rate": 1.7523463905057853e-05, + "loss": 1.1085, "step": 8895 }, { - "epoch": 0.25244040862656075, + "epoch": 0.25208988636685653, "grad_norm": 0.0, - "learning_rate": 1.7515594690597037e-05, - "loss": 0.9318, + "learning_rate": 1.7522859263430942e-05, + "loss": 1.032, "step": 8896 }, { - "epoch": 0.2524687854710556, + "epoch": 0.25211822380911897, "grad_norm": 0.0, - "learning_rate": 1.7514988374906535e-05, - "loss": 0.9277, + "learning_rate": 1.7522254558436195e-05, + "loss": 0.9312, "step": 8897 }, { - "epoch": 0.2524971623155505, + "epoch": 0.25214656125138146, "grad_norm": 0.0, - "learning_rate": 1.7514381995736592e-05, - "loss": 0.9368, + "learning_rate": 1.7521649790078705e-05, + "loss": 0.9683, "step": 8898 }, { - "epoch": 0.2525255391600454, + "epoch": 0.2521748986936439, "grad_norm": 0.0, - "learning_rate": 1.7513775553092328e-05, - "loss": 0.9211, + "learning_rate": 1.7521044958363567e-05, + "loss": 0.9224, "step": 8899 }, { - "epoch": 0.2525539160045403, + "epoch": 0.2522032361359064, "grad_norm": 0.0, - "learning_rate": 1.7513169046978862e-05, - "loss": 0.978, + "learning_rate": 1.7520440063295874e-05, + "loss": 0.938, "step": 8900 }, { - "epoch": 0.25258229284903516, + "epoch": 0.25223157357816883, "grad_norm": 0.0, - "learning_rate": 1.7512562477401317e-05, - "loss": 0.9907, + "learning_rate": 1.751983510488072e-05, + "loss": 0.8417, "step": 8901 }, { - "epoch": 0.2526106696935301, + "epoch": 0.25225991102043127, "grad_norm": 0.0, - "learning_rate": 1.7511955844364822e-05, - "loss": 1.0281, + "learning_rate": 1.75192300831232e-05, + "loss": 0.9411, "step": 8902 }, { - "epoch": 0.252639046538025, + "epoch": 0.25228824846269376, "grad_norm": 0.0, - "learning_rate": 1.75113491478745e-05, - "loss": 0.9802, + "learning_rate": 1.751862499802842e-05, + "loss": 0.9693, "step": 8903 }, { - "epoch": 0.25266742338251985, + "epoch": 0.2523165859049562, "grad_norm": 0.0, - "learning_rate": 1.7510742387935472e-05, - "loss": 0.9885, + "learning_rate": 1.7518019849601466e-05, + "loss": 0.9898, "step": 8904 }, { - "epoch": 0.25269580022701477, + "epoch": 0.2523449233472187, "grad_norm": 0.0, - "learning_rate": 1.7510135564552863e-05, - "loss": 0.9972, + "learning_rate": 1.7517414637847435e-05, + "loss": 1.0319, "step": 8905 }, { - "epoch": 0.2527241770715096, + "epoch": 0.25237326078948114, "grad_norm": 0.0, - "learning_rate": 1.7509528677731806e-05, - "loss": 1.0062, + "learning_rate": 1.7516809362771434e-05, + "loss": 0.8142, "step": 8906 }, { - "epoch": 0.25275255391600454, + "epoch": 0.25240159823174363, "grad_norm": 0.0, - "learning_rate": 1.750892172747742e-05, - "loss": 0.9767, + "learning_rate": 1.7516204024378555e-05, + "loss": 0.9356, "step": 8907 }, { - "epoch": 0.25278093076049946, + "epoch": 0.25242993567400607, "grad_norm": 0.0, - "learning_rate": 1.7508314713794835e-05, - "loss": 1.0803, + "learning_rate": 1.75155986226739e-05, + "loss": 0.9771, "step": 8908 }, { - "epoch": 0.2528093076049943, + "epoch": 0.2524582731162685, "grad_norm": 0.0, - "learning_rate": 1.750770763668918e-05, - "loss": 0.9093, + "learning_rate": 1.7514993157662564e-05, + "loss": 1.0602, "step": 8909 }, { - "epoch": 0.25283768444948923, + "epoch": 0.252486610558531, "grad_norm": 0.0, - "learning_rate": 1.7507100496165575e-05, - "loss": 0.9785, + "learning_rate": 1.7514387629349655e-05, + "loss": 0.9417, "step": 8910 }, { - "epoch": 0.2528660612939841, + "epoch": 0.25251494800079344, "grad_norm": 0.0, - "learning_rate": 1.750649329222916e-05, - "loss": 1.0118, + "learning_rate": 1.751378203774026e-05, + "loss": 0.9658, "step": 8911 }, { - "epoch": 0.252894438138479, + "epoch": 0.25254328544305593, "grad_norm": 0.0, - "learning_rate": 1.7505886024885055e-05, - "loss": 0.8594, + "learning_rate": 1.7513176382839496e-05, + "loss": 0.8992, "step": 8912 }, { - "epoch": 0.2529228149829739, + "epoch": 0.25257162288531837, "grad_norm": 0.0, - "learning_rate": 1.7505278694138397e-05, - "loss": 1.0207, + "learning_rate": 1.7512570664652456e-05, + "loss": 1.0089, "step": 8913 }, { - "epoch": 0.2529511918274688, + "epoch": 0.2525999603275808, "grad_norm": 0.0, - "learning_rate": 1.750467129999431e-05, - "loss": 1.0447, + "learning_rate": 1.751196488318424e-05, + "loss": 1.1005, "step": 8914 }, { - "epoch": 0.2529795686719637, + "epoch": 0.2526282977698433, "grad_norm": 0.0, - "learning_rate": 1.750406384245793e-05, - "loss": 0.9709, + "learning_rate": 1.751135903843996e-05, + "loss": 1.0483, "step": 8915 }, { - "epoch": 0.25300794551645855, + "epoch": 0.25265663521210574, "grad_norm": 0.0, - "learning_rate": 1.7503456321534383e-05, - "loss": 0.9977, + "learning_rate": 1.751075313042471e-05, + "loss": 0.9622, "step": 8916 }, { - "epoch": 0.25303632236095347, + "epoch": 0.25268497265436823, "grad_norm": 0.0, - "learning_rate": 1.7502848737228803e-05, - "loss": 1.029, + "learning_rate": 1.7510147159143598e-05, + "loss": 1.0704, "step": 8917 }, { - "epoch": 0.25306469920544833, + "epoch": 0.25271331009663067, "grad_norm": 0.0, - "learning_rate": 1.7502241089546323e-05, - "loss": 1.0437, + "learning_rate": 1.750954112460173e-05, + "loss": 0.9399, "step": 8918 }, { - "epoch": 0.25309307604994324, + "epoch": 0.25274164753889317, "grad_norm": 0.0, - "learning_rate": 1.7501633378492077e-05, - "loss": 1.0286, + "learning_rate": 1.7508935026804202e-05, + "loss": 1.0589, "step": 8919 }, { - "epoch": 0.25312145289443816, + "epoch": 0.2527699849811556, "grad_norm": 0.0, - "learning_rate": 1.7501025604071195e-05, - "loss": 0.8714, + "learning_rate": 1.750832886575613e-05, + "loss": 0.9965, "step": 8920 }, { - "epoch": 0.253149829738933, + "epoch": 0.25279832242341804, "grad_norm": 0.0, - "learning_rate": 1.7500417766288812e-05, - "loss": 0.9651, + "learning_rate": 1.750772264146262e-05, + "loss": 1.0163, "step": 8921 }, { - "epoch": 0.25317820658342793, + "epoch": 0.25282665986568054, "grad_norm": 0.0, - "learning_rate": 1.7499809865150063e-05, - "loss": 0.9512, + "learning_rate": 1.7507116353928767e-05, + "loss": 1.031, "step": 8922 }, { - "epoch": 0.2532065834279228, + "epoch": 0.252854997307943, "grad_norm": 0.0, - "learning_rate": 1.7499201900660083e-05, - "loss": 0.9521, + "learning_rate": 1.7506510003159687e-05, + "loss": 0.9787, "step": 8923 }, { - "epoch": 0.2532349602724177, + "epoch": 0.25288333475020547, "grad_norm": 0.0, - "learning_rate": 1.749859387282401e-05, - "loss": 1.0243, + "learning_rate": 1.7505903589160488e-05, + "loss": 1.0089, "step": 8924 }, { - "epoch": 0.2532633371169126, + "epoch": 0.2529116721924679, "grad_norm": 0.0, - "learning_rate": 1.7497985781646974e-05, - "loss": 1.0199, + "learning_rate": 1.7505297111936273e-05, + "loss": 0.9874, "step": 8925 }, { - "epoch": 0.2532917139614075, + "epoch": 0.25294000963473035, "grad_norm": 0.0, - "learning_rate": 1.7497377627134117e-05, - "loss": 0.9519, + "learning_rate": 1.750469057149216e-05, + "loss": 0.8704, "step": 8926 }, { - "epoch": 0.2533200908059024, + "epoch": 0.25296834707699284, "grad_norm": 0.0, - "learning_rate": 1.7496769409290572e-05, - "loss": 0.9446, + "learning_rate": 1.7504083967833246e-05, + "loss": 0.9461, "step": 8927 }, { - "epoch": 0.25334846765039726, + "epoch": 0.2529966845192553, "grad_norm": 0.0, - "learning_rate": 1.7496161128121482e-05, - "loss": 1.0608, + "learning_rate": 1.7503477300964643e-05, + "loss": 0.9249, "step": 8928 }, { - "epoch": 0.2533768444948922, + "epoch": 0.25302502196151777, "grad_norm": 0.0, - "learning_rate": 1.749555278363198e-05, - "loss": 0.9379, + "learning_rate": 1.750287057089147e-05, + "loss": 0.9179, "step": 8929 }, { - "epoch": 0.2534052213393871, + "epoch": 0.2530533594037802, "grad_norm": 0.0, - "learning_rate": 1.749494437582721e-05, - "loss": 0.8561, + "learning_rate": 1.7502263777618833e-05, + "loss": 0.9538, "step": 8930 }, { - "epoch": 0.25343359818388195, + "epoch": 0.2530816968460427, "grad_norm": 0.0, - "learning_rate": 1.74943359047123e-05, - "loss": 0.9733, + "learning_rate": 1.750165692115184e-05, + "loss": 1.0073, "step": 8931 }, { - "epoch": 0.25346197502837686, + "epoch": 0.25311003428830514, "grad_norm": 0.0, - "learning_rate": 1.7493727370292405e-05, - "loss": 0.9107, + "learning_rate": 1.7501050001495603e-05, + "loss": 1.0394, "step": 8932 }, { - "epoch": 0.2534903518728717, + "epoch": 0.2531383717305676, "grad_norm": 0.0, - "learning_rate": 1.7493118772572655e-05, - "loss": 0.8714, + "learning_rate": 1.7500443018655237e-05, + "loss": 0.9763, "step": 8933 }, { - "epoch": 0.25351872871736664, + "epoch": 0.2531667091728301, "grad_norm": 0.0, - "learning_rate": 1.7492510111558196e-05, - "loss": 0.9196, + "learning_rate": 1.749983597263586e-05, + "loss": 0.9826, "step": 8934 }, { - "epoch": 0.2535471055618615, + "epoch": 0.2531950466150925, "grad_norm": 0.0, - "learning_rate": 1.7491901387254163e-05, - "loss": 0.9586, + "learning_rate": 1.749922886344257e-05, + "loss": 1.0378, "step": 8935 }, { - "epoch": 0.2535754824063564, + "epoch": 0.253223384057355, "grad_norm": 0.0, - "learning_rate": 1.7491292599665704e-05, - "loss": 1.0678, + "learning_rate": 1.7498621691080497e-05, + "loss": 1.011, "step": 8936 }, { - "epoch": 0.2536038592508513, + "epoch": 0.25325172149961744, "grad_norm": 0.0, - "learning_rate": 1.7490683748797964e-05, - "loss": 0.8449, + "learning_rate": 1.749801445555475e-05, + "loss": 0.9242, "step": 8937 }, { - "epoch": 0.2536322360953462, + "epoch": 0.2532800589418799, "grad_norm": 0.0, - "learning_rate": 1.7490074834656077e-05, - "loss": 0.9202, + "learning_rate": 1.749740715687044e-05, + "loss": 1.0359, "step": 8938 }, { - "epoch": 0.2536606129398411, + "epoch": 0.2533083963841424, "grad_norm": 0.0, - "learning_rate": 1.7489465857245193e-05, - "loss": 0.8793, + "learning_rate": 1.7496799795032685e-05, + "loss": 0.9464, "step": 8939 }, { - "epoch": 0.25368898978433596, + "epoch": 0.2533367338264048, "grad_norm": 0.0, - "learning_rate": 1.7488856816570455e-05, - "loss": 0.9125, + "learning_rate": 1.7496192370046602e-05, + "loss": 1.0431, "step": 8940 }, { - "epoch": 0.2537173666288309, + "epoch": 0.2533650712686673, "grad_norm": 0.0, - "learning_rate": 1.7488247712637006e-05, - "loss": 1.0054, + "learning_rate": 1.7495584881917307e-05, + "loss": 0.8592, "step": 8941 }, { - "epoch": 0.2537457434733258, + "epoch": 0.25339340871092975, "grad_norm": 0.0, - "learning_rate": 1.7487638545449993e-05, - "loss": 0.8951, + "learning_rate": 1.7494977330649917e-05, + "loss": 0.9219, "step": 8942 }, { - "epoch": 0.25377412031782065, + "epoch": 0.25342174615319224, "grad_norm": 0.0, - "learning_rate": 1.7487029315014558e-05, - "loss": 0.9027, + "learning_rate": 1.749436971624955e-05, + "loss": 1.0256, "step": 8943 }, { - "epoch": 0.25380249716231557, + "epoch": 0.2534500835954547, "grad_norm": 0.0, - "learning_rate": 1.748642002133585e-05, - "loss": 0.9996, + "learning_rate": 1.7493762038721326e-05, + "loss": 1.0442, "step": 8944 }, { - "epoch": 0.2538308740068104, + "epoch": 0.2534784210377171, "grad_norm": 0.0, - "learning_rate": 1.7485810664419015e-05, - "loss": 1.0469, + "learning_rate": 1.7493154298070357e-05, + "loss": 1.0061, "step": 8945 }, { - "epoch": 0.25385925085130534, + "epoch": 0.2535067584799796, "grad_norm": 0.0, - "learning_rate": 1.7485201244269204e-05, - "loss": 1.0377, + "learning_rate": 1.749254649430177e-05, + "loss": 0.8333, "step": 8946 }, { - "epoch": 0.2538876276958002, + "epoch": 0.25353509592224205, "grad_norm": 0.0, - "learning_rate": 1.748459176089156e-05, - "loss": 1.009, + "learning_rate": 1.749193862742068e-05, + "loss": 1.0479, "step": 8947 }, { - "epoch": 0.2539160045402951, + "epoch": 0.25356343336450454, "grad_norm": 0.0, - "learning_rate": 1.7483982214291233e-05, - "loss": 0.961, + "learning_rate": 1.7491330697432213e-05, + "loss": 0.9428, "step": 8948 }, { - "epoch": 0.25394438138479003, + "epoch": 0.253591770806767, "grad_norm": 0.0, - "learning_rate": 1.7483372604473373e-05, - "loss": 0.9069, + "learning_rate": 1.749072270434148e-05, + "loss": 0.9365, "step": 8949 }, { - "epoch": 0.2539727582292849, + "epoch": 0.2536201082490294, "grad_norm": 0.0, - "learning_rate": 1.7482762931443125e-05, - "loss": 0.9093, + "learning_rate": 1.7490114648153615e-05, + "loss": 1.0727, "step": 8950 }, { - "epoch": 0.2540011350737798, + "epoch": 0.2536484456912919, "grad_norm": 0.0, - "learning_rate": 1.7482153195205646e-05, - "loss": 0.919, + "learning_rate": 1.7489506528873724e-05, + "loss": 0.9582, "step": 8951 }, { - "epoch": 0.25402951191827466, + "epoch": 0.25367678313355435, "grad_norm": 0.0, - "learning_rate": 1.7481543395766078e-05, - "loss": 0.9918, + "learning_rate": 1.7488898346506948e-05, + "loss": 1.0581, "step": 8952 }, { - "epoch": 0.2540578887627696, + "epoch": 0.25370512057581684, "grad_norm": 0.0, - "learning_rate": 1.7480933533129582e-05, - "loss": 0.9447, + "learning_rate": 1.7488290101058392e-05, + "loss": 0.8865, "step": 8953 }, { - "epoch": 0.2540862656072645, + "epoch": 0.2537334580180793, "grad_norm": 0.0, - "learning_rate": 1.74803236073013e-05, - "loss": 0.8662, + "learning_rate": 1.748768179253319e-05, + "loss": 1.0156, "step": 8954 }, { - "epoch": 0.25411464245175935, + "epoch": 0.2537617954603418, "grad_norm": 0.0, - "learning_rate": 1.7479713618286388e-05, - "loss": 0.9032, + "learning_rate": 1.7487073420936466e-05, + "loss": 0.9869, "step": 8955 }, { - "epoch": 0.25414301929625427, + "epoch": 0.2537901329026042, "grad_norm": 0.0, - "learning_rate": 1.7479103566090003e-05, - "loss": 0.9465, + "learning_rate": 1.748646498627334e-05, + "loss": 1.0666, "step": 8956 }, { - "epoch": 0.25417139614074913, + "epoch": 0.25381847034486665, "grad_norm": 0.0, - "learning_rate": 1.747849345071729e-05, - "loss": 0.9562, + "learning_rate": 1.7485856488548944e-05, + "loss": 1.0115, "step": 8957 }, { - "epoch": 0.25419977298524404, + "epoch": 0.25384680778712915, "grad_norm": 0.0, - "learning_rate": 1.7477883272173408e-05, - "loss": 0.9279, + "learning_rate": 1.7485247927768393e-05, + "loss": 0.9426, "step": 8958 }, { - "epoch": 0.25422814982973896, + "epoch": 0.2538751452293916, "grad_norm": 0.0, - "learning_rate": 1.7477273030463512e-05, - "loss": 0.8417, + "learning_rate": 1.7484639303936823e-05, + "loss": 1.0776, "step": 8959 }, { - "epoch": 0.2542565266742338, + "epoch": 0.2539034826716541, "grad_norm": 0.0, - "learning_rate": 1.747666272559275e-05, - "loss": 0.8952, + "learning_rate": 1.7484030617059354e-05, + "loss": 1.0192, "step": 8960 }, { - "epoch": 0.25428490351872873, + "epoch": 0.2539318201139165, "grad_norm": 0.0, - "learning_rate": 1.7476052357566287e-05, - "loss": 0.9753, + "learning_rate": 1.748342186714112e-05, + "loss": 0.9797, "step": 8961 }, { - "epoch": 0.2543132803632236, + "epoch": 0.25396015755617896, "grad_norm": 0.0, - "learning_rate": 1.747544192638927e-05, - "loss": 0.9742, + "learning_rate": 1.7482813054187242e-05, + "loss": 0.9195, "step": 8962 }, { - "epoch": 0.2543416572077185, + "epoch": 0.25398849499844145, "grad_norm": 0.0, - "learning_rate": 1.747483143206686e-05, - "loss": 1.0278, + "learning_rate": 1.748220417820285e-05, + "loss": 0.8775, "step": 8963 }, { - "epoch": 0.25437003405221337, + "epoch": 0.2540168324407039, "grad_norm": 0.0, - "learning_rate": 1.7474220874604213e-05, - "loss": 1.0451, + "learning_rate": 1.7481595239193073e-05, + "loss": 0.953, "step": 8964 }, { - "epoch": 0.2543984108967083, + "epoch": 0.2540451698829664, "grad_norm": 0.0, - "learning_rate": 1.7473610254006484e-05, - "loss": 0.9789, + "learning_rate": 1.7480986237163044e-05, + "loss": 0.8688, "step": 8965 }, { - "epoch": 0.2544267877412032, + "epoch": 0.2540735073252288, "grad_norm": 0.0, - "learning_rate": 1.7472999570278837e-05, - "loss": 0.8198, + "learning_rate": 1.748037717211789e-05, + "loss": 0.9865, "step": 8966 }, { - "epoch": 0.25445516458569806, + "epoch": 0.2541018447674913, "grad_norm": 0.0, - "learning_rate": 1.7472388823426426e-05, - "loss": 1.056, + "learning_rate": 1.7479768044062743e-05, + "loss": 0.9199, "step": 8967 }, { - "epoch": 0.25448354143019297, + "epoch": 0.25413018220975375, "grad_norm": 0.0, - "learning_rate": 1.747177801345441e-05, - "loss": 0.9493, + "learning_rate": 1.7479158853002726e-05, + "loss": 0.9859, "step": 8968 }, { - "epoch": 0.25451191827468783, + "epoch": 0.2541585196520162, "grad_norm": 0.0, - "learning_rate": 1.7471167140367948e-05, - "loss": 0.8911, + "learning_rate": 1.7478549598942983e-05, + "loss": 0.843, "step": 8969 }, { - "epoch": 0.25454029511918275, + "epoch": 0.2541868570942787, "grad_norm": 0.0, - "learning_rate": 1.7470556204172204e-05, - "loss": 0.9714, + "learning_rate": 1.7477940281888635e-05, + "loss": 0.898, "step": 8970 }, { - "epoch": 0.25456867196367766, + "epoch": 0.2542151945365411, "grad_norm": 0.0, - "learning_rate": 1.7469945204872333e-05, - "loss": 0.9906, + "learning_rate": 1.747733090184482e-05, + "loss": 1.0404, "step": 8971 }, { - "epoch": 0.2545970488081725, + "epoch": 0.2542435319788036, "grad_norm": 0.0, - "learning_rate": 1.7469334142473502e-05, - "loss": 0.981, + "learning_rate": 1.7476721458816672e-05, + "loss": 1.0886, "step": 8972 }, { - "epoch": 0.25462542565266744, + "epoch": 0.25427186942106605, "grad_norm": 0.0, - "learning_rate": 1.7468723016980866e-05, - "loss": 1.0119, + "learning_rate": 1.747611195280932e-05, + "loss": 1.0102, "step": 8973 }, { - "epoch": 0.2546538024971623, + "epoch": 0.2543002068633285, "grad_norm": 0.0, - "learning_rate": 1.7468111828399594e-05, - "loss": 0.9625, + "learning_rate": 1.7475502383827906e-05, + "loss": 0.9126, "step": 8974 }, { - "epoch": 0.2546821793416572, + "epoch": 0.254328544305591, "grad_norm": 0.0, - "learning_rate": 1.7467500576734842e-05, - "loss": 1.0379, + "learning_rate": 1.7474892751877553e-05, + "loss": 1.0552, "step": 8975 }, { - "epoch": 0.2547105561861521, + "epoch": 0.2543568817478534, "grad_norm": 0.0, - "learning_rate": 1.746688926199178e-05, - "loss": 0.9909, + "learning_rate": 1.747428305696341e-05, + "loss": 0.8888, "step": 8976 }, { - "epoch": 0.254738933030647, + "epoch": 0.2543852191901159, "grad_norm": 0.0, - "learning_rate": 1.7466277884175572e-05, - "loss": 1.0437, + "learning_rate": 1.7473673299090598e-05, + "loss": 0.9364, "step": 8977 }, { - "epoch": 0.2547673098751419, + "epoch": 0.25441355663237836, "grad_norm": 0.0, - "learning_rate": 1.7465666443291374e-05, - "loss": 1.1022, + "learning_rate": 1.7473063478264264e-05, + "loss": 1.0481, "step": 8978 }, { - "epoch": 0.25479568671963676, + "epoch": 0.2544418940746408, "grad_norm": 0.0, - "learning_rate": 1.7465054939344357e-05, - "loss": 0.9736, + "learning_rate": 1.747245359448954e-05, + "loss": 1.108, "step": 8979 }, { - "epoch": 0.2548240635641317, + "epoch": 0.2544702315169033, "grad_norm": 0.0, - "learning_rate": 1.7464443372339688e-05, - "loss": 0.9988, + "learning_rate": 1.7471843647771565e-05, + "loss": 0.9456, "step": 8980 }, { - "epoch": 0.25485244040862653, + "epoch": 0.2544985689591657, "grad_norm": 0.0, - "learning_rate": 1.7463831742282526e-05, - "loss": 0.8109, + "learning_rate": 1.747123363811548e-05, + "loss": 0.988, "step": 8981 }, { - "epoch": 0.25488081725312145, + "epoch": 0.2545269064014282, "grad_norm": 0.0, - "learning_rate": 1.7463220049178046e-05, - "loss": 0.9764, + "learning_rate": 1.7470623565526414e-05, + "loss": 0.9526, "step": 8982 }, { - "epoch": 0.25490919409761637, + "epoch": 0.25455524384369066, "grad_norm": 0.0, - "learning_rate": 1.7462608293031407e-05, - "loss": 1.0772, + "learning_rate": 1.7470013430009512e-05, + "loss": 0.9218, "step": 8983 }, { - "epoch": 0.2549375709421112, + "epoch": 0.25458358128595315, "grad_norm": 0.0, - "learning_rate": 1.7461996473847783e-05, - "loss": 0.8696, + "learning_rate": 1.7469403231569918e-05, + "loss": 0.9936, "step": 8984 }, { - "epoch": 0.25496594778660614, + "epoch": 0.2546119187282156, "grad_norm": 0.0, - "learning_rate": 1.7461384591632335e-05, - "loss": 1.0001, + "learning_rate": 1.7468792970212764e-05, + "loss": 0.9933, "step": 8985 }, { - "epoch": 0.254994324631101, + "epoch": 0.25464025617047803, "grad_norm": 0.0, - "learning_rate": 1.7460772646390243e-05, - "loss": 1.0035, + "learning_rate": 1.7468182645943193e-05, + "loss": 0.9103, "step": 8986 }, { - "epoch": 0.2550227014755959, + "epoch": 0.2546685936127405, "grad_norm": 0.0, - "learning_rate": 1.7460160638126663e-05, - "loss": 1.0175, + "learning_rate": 1.7467572258766345e-05, + "loss": 1.0228, "step": 8987 }, { - "epoch": 0.25505107832009083, + "epoch": 0.25469693105500296, "grad_norm": 0.0, - "learning_rate": 1.7459548566846773e-05, - "loss": 0.9693, + "learning_rate": 1.7466961808687367e-05, + "loss": 0.9895, "step": 8988 }, { - "epoch": 0.2550794551645857, + "epoch": 0.25472526849726546, "grad_norm": 0.0, - "learning_rate": 1.745893643255574e-05, - "loss": 0.9529, + "learning_rate": 1.746635129571139e-05, + "loss": 1.0419, "step": 8989 }, { - "epoch": 0.2551078320090806, + "epoch": 0.2547536059395279, "grad_norm": 0.0, - "learning_rate": 1.7458324235258737e-05, - "loss": 1.0083, + "learning_rate": 1.746574071984357e-05, + "loss": 0.985, "step": 8990 }, { - "epoch": 0.25513620885357546, + "epoch": 0.25478194338179033, "grad_norm": 0.0, - "learning_rate": 1.7457711974960934e-05, - "loss": 0.9703, + "learning_rate": 1.746513008108904e-05, + "loss": 0.9623, "step": 8991 }, { - "epoch": 0.2551645856980704, + "epoch": 0.2548102808240528, "grad_norm": 0.0, - "learning_rate": 1.74570996516675e-05, - "loss": 1.1212, + "learning_rate": 1.746451937945295e-05, + "loss": 1.0104, "step": 8992 }, { - "epoch": 0.2551929625425653, + "epoch": 0.25483861826631526, "grad_norm": 0.0, - "learning_rate": 1.7456487265383614e-05, - "loss": 1.0193, + "learning_rate": 1.746390861494044e-05, + "loss": 1.0494, "step": 8993 }, { - "epoch": 0.25522133938706015, + "epoch": 0.25486695570857776, "grad_norm": 0.0, - "learning_rate": 1.745587481611444e-05, - "loss": 0.9706, + "learning_rate": 1.7463297787556656e-05, + "loss": 0.9316, "step": 8994 }, { - "epoch": 0.25524971623155507, + "epoch": 0.2548952931508402, "grad_norm": 0.0, - "learning_rate": 1.745526230386516e-05, - "loss": 0.8932, + "learning_rate": 1.746268689730674e-05, + "loss": 1.0141, "step": 8995 }, { - "epoch": 0.25527809307604993, + "epoch": 0.2549236305931027, "grad_norm": 0.0, - "learning_rate": 1.7454649728640944e-05, - "loss": 1.0075, + "learning_rate": 1.7462075944195848e-05, + "loss": 0.9817, "step": 8996 }, { - "epoch": 0.25530646992054484, + "epoch": 0.25495196803536513, "grad_norm": 0.0, - "learning_rate": 1.7454037090446968e-05, - "loss": 0.9648, + "learning_rate": 1.7461464928229116e-05, + "loss": 0.9633, "step": 8997 }, { - "epoch": 0.2553348467650397, + "epoch": 0.25498030547762757, "grad_norm": 0.0, - "learning_rate": 1.7453424389288404e-05, - "loss": 1.0944, + "learning_rate": 1.7460853849411692e-05, + "loss": 0.9532, "step": 8998 }, { - "epoch": 0.2553632236095346, + "epoch": 0.25500864291989006, "grad_norm": 0.0, - "learning_rate": 1.745281162517043e-05, - "loss": 1.0303, + "learning_rate": 1.746024270774873e-05, + "loss": 1.0372, "step": 8999 }, { - "epoch": 0.25539160045402953, + "epoch": 0.2550369803621525, "grad_norm": 0.0, - "learning_rate": 1.7452198798098217e-05, - "loss": 0.9361, + "learning_rate": 1.745963150324537e-05, + "loss": 1.0228, "step": 9000 }, { - "epoch": 0.2554199772985244, + "epoch": 0.255065317804415, "grad_norm": 0.0, - "learning_rate": 1.7451585908076948e-05, - "loss": 0.9443, + "learning_rate": 1.745902023590676e-05, + "loss": 0.9158, "step": 9001 }, { - "epoch": 0.2554483541430193, + "epoch": 0.25509365524667743, "grad_norm": 0.0, - "learning_rate": 1.74509729551118e-05, - "loss": 1.1018, + "learning_rate": 1.7458408905738064e-05, + "loss": 0.9378, "step": 9002 }, { - "epoch": 0.25547673098751417, + "epoch": 0.25512199268893987, "grad_norm": 0.0, - "learning_rate": 1.745035993920795e-05, - "loss": 1.0009, + "learning_rate": 1.745779751274441e-05, + "loss": 1.0021, "step": 9003 }, { - "epoch": 0.2555051078320091, + "epoch": 0.25515033013120236, "grad_norm": 0.0, - "learning_rate": 1.744974686037057e-05, - "loss": 0.8754, + "learning_rate": 1.7457186056930963e-05, + "loss": 0.9874, "step": 9004 }, { - "epoch": 0.255533484676504, + "epoch": 0.2551786675734648, "grad_norm": 0.0, - "learning_rate": 1.7449133718604845e-05, - "loss": 0.9922, + "learning_rate": 1.745657453830287e-05, + "loss": 1.0591, "step": 9005 }, { - "epoch": 0.25556186152099886, + "epoch": 0.2552070050157273, "grad_norm": 0.0, - "learning_rate": 1.7448520513915955e-05, - "loss": 1.178, + "learning_rate": 1.7455962956865273e-05, + "loss": 0.9651, "step": 9006 }, { - "epoch": 0.25559023836549377, + "epoch": 0.25523534245798973, "grad_norm": 0.0, - "learning_rate": 1.7447907246309072e-05, - "loss": 1.0175, + "learning_rate": 1.745535131262334e-05, + "loss": 0.971, "step": 9007 }, { - "epoch": 0.25561861520998863, + "epoch": 0.2552636799002522, "grad_norm": 0.0, - "learning_rate": 1.744729391578939e-05, - "loss": 0.9973, + "learning_rate": 1.745473960558221e-05, + "loss": 0.9677, "step": 9008 }, { - "epoch": 0.25564699205448355, + "epoch": 0.25529201734251467, "grad_norm": 0.0, - "learning_rate": 1.7446680522362073e-05, - "loss": 0.9768, + "learning_rate": 1.745412783574704e-05, + "loss": 0.9854, "step": 9009 }, { - "epoch": 0.25567536889897846, + "epoch": 0.2553203547847771, "grad_norm": 0.0, - "learning_rate": 1.7446067066032312e-05, - "loss": 0.8979, + "learning_rate": 1.7453516003122982e-05, + "loss": 0.9942, "step": 9010 }, { - "epoch": 0.2557037457434733, + "epoch": 0.2553486922270396, "grad_norm": 0.0, - "learning_rate": 1.744545354680529e-05, - "loss": 1.0576, + "learning_rate": 1.7452904107715196e-05, + "loss": 0.8927, "step": 9011 }, { - "epoch": 0.25573212258796824, + "epoch": 0.25537702966930204, "grad_norm": 0.0, - "learning_rate": 1.7444839964686186e-05, - "loss": 0.9692, + "learning_rate": 1.7452292149528827e-05, + "loss": 0.9128, "step": 9012 }, { - "epoch": 0.2557604994324631, + "epoch": 0.25540536711156453, "grad_norm": 0.0, - "learning_rate": 1.7444226319680188e-05, - "loss": 0.8579, + "learning_rate": 1.7451680128569033e-05, + "loss": 0.9964, "step": 9013 }, { - "epoch": 0.255788876276958, + "epoch": 0.25543370455382697, "grad_norm": 0.0, - "learning_rate": 1.7443612611792473e-05, - "loss": 1.0057, + "learning_rate": 1.7451068044840974e-05, + "loss": 0.9831, "step": 9014 }, { - "epoch": 0.25581725312145287, + "epoch": 0.2554620419960894, "grad_norm": 0.0, - "learning_rate": 1.744299884102823e-05, - "loss": 1.0295, + "learning_rate": 1.74504558983498e-05, + "loss": 0.9507, "step": 9015 }, { - "epoch": 0.2558456299659478, + "epoch": 0.2554903794383519, "grad_norm": 0.0, - "learning_rate": 1.7442385007392636e-05, - "loss": 1.0068, + "learning_rate": 1.744984368910067e-05, + "loss": 0.9868, "step": 9016 }, { - "epoch": 0.2558740068104427, + "epoch": 0.25551871688061434, "grad_norm": 0.0, - "learning_rate": 1.7441771110890884e-05, - "loss": 0.8499, + "learning_rate": 1.744923141709874e-05, + "loss": 0.9454, "step": 9017 }, { - "epoch": 0.25590238365493756, + "epoch": 0.25554705432287683, "grad_norm": 0.0, - "learning_rate": 1.7441157151528157e-05, - "loss": 0.9609, + "learning_rate": 1.7448619082349166e-05, + "loss": 0.9263, "step": 9018 }, { - "epoch": 0.2559307604994325, + "epoch": 0.25557539176513927, "grad_norm": 0.0, - "learning_rate": 1.7440543129309643e-05, - "loss": 1.0969, + "learning_rate": 1.7448006684857108e-05, + "loss": 0.9549, "step": 9019 }, { - "epoch": 0.25595913734392733, + "epoch": 0.25560372920740176, "grad_norm": 0.0, - "learning_rate": 1.7439929044240522e-05, - "loss": 1.0021, + "learning_rate": 1.7447394224627725e-05, + "loss": 0.9905, "step": 9020 }, { - "epoch": 0.25598751418842225, + "epoch": 0.2556320666496642, "grad_norm": 0.0, - "learning_rate": 1.7439314896325988e-05, - "loss": 0.9981, + "learning_rate": 1.7446781701666174e-05, + "loss": 0.9181, "step": 9021 }, { - "epoch": 0.25601589103291716, + "epoch": 0.25566040409192664, "grad_norm": 0.0, - "learning_rate": 1.743870068557123e-05, - "loss": 1.0104, + "learning_rate": 1.7446169115977616e-05, + "loss": 0.9806, "step": 9022 }, { - "epoch": 0.256044267877412, + "epoch": 0.25568874153418913, "grad_norm": 0.0, - "learning_rate": 1.743808641198143e-05, - "loss": 0.9424, + "learning_rate": 1.7445556467567212e-05, + "loss": 1.1248, "step": 9023 }, { - "epoch": 0.25607264472190694, + "epoch": 0.2557170789764516, "grad_norm": 0.0, - "learning_rate": 1.743747207556178e-05, - "loss": 0.8839, + "learning_rate": 1.744494375644012e-05, + "loss": 1.0304, "step": 9024 }, { - "epoch": 0.2561010215664018, + "epoch": 0.25574541641871407, "grad_norm": 0.0, - "learning_rate": 1.743685767631747e-05, - "loss": 1.0042, + "learning_rate": 1.74443309826015e-05, + "loss": 1.0296, "step": 9025 }, { - "epoch": 0.2561293984108967, + "epoch": 0.2557737538609765, "grad_norm": 0.0, - "learning_rate": 1.7436243214253686e-05, - "loss": 0.9697, + "learning_rate": 1.7443718146056517e-05, + "loss": 1.0566, "step": 9026 }, { - "epoch": 0.2561577752553916, + "epoch": 0.25580209130323894, "grad_norm": 0.0, - "learning_rate": 1.7435628689375622e-05, - "loss": 1.0327, + "learning_rate": 1.7443105246810333e-05, + "loss": 0.9468, "step": 9027 }, { - "epoch": 0.2561861520998865, + "epoch": 0.25583042874550144, "grad_norm": 0.0, - "learning_rate": 1.7435014101688474e-05, - "loss": 0.8754, + "learning_rate": 1.744249228486811e-05, + "loss": 0.886, "step": 9028 }, { - "epoch": 0.2562145289443814, + "epoch": 0.2558587661877639, "grad_norm": 0.0, - "learning_rate": 1.743439945119742e-05, - "loss": 1.0306, + "learning_rate": 1.744187926023501e-05, + "loss": 0.8913, "step": 9029 }, { - "epoch": 0.25624290578887626, + "epoch": 0.25588710363002637, "grad_norm": 0.0, - "learning_rate": 1.7433784737907662e-05, - "loss": 0.9953, + "learning_rate": 1.7441266172916195e-05, + "loss": 0.9438, "step": 9030 }, { - "epoch": 0.2562712826333712, + "epoch": 0.2559154410722888, "grad_norm": 0.0, - "learning_rate": 1.743316996182439e-05, - "loss": 0.91, + "learning_rate": 1.7440653022916834e-05, + "loss": 0.9578, "step": 9031 }, { - "epoch": 0.25629965947786604, + "epoch": 0.2559437785145513, "grad_norm": 0.0, - "learning_rate": 1.74325551229528e-05, - "loss": 0.9887, + "learning_rate": 1.7440039810242087e-05, + "loss": 1.0214, "step": 9032 }, { - "epoch": 0.25632803632236095, + "epoch": 0.25597211595681374, "grad_norm": 0.0, - "learning_rate": 1.7431940221298082e-05, - "loss": 0.9624, + "learning_rate": 1.7439426534897127e-05, + "loss": 0.89, "step": 9033 }, { - "epoch": 0.25635641316685587, + "epoch": 0.2560004533990762, "grad_norm": 0.0, - "learning_rate": 1.7431325256865427e-05, - "loss": 0.8724, + "learning_rate": 1.7438813196887112e-05, + "loss": 1.1207, "step": 9034 }, { - "epoch": 0.2563847900113507, + "epoch": 0.25602879084133867, "grad_norm": 0.0, - "learning_rate": 1.7430710229660037e-05, - "loss": 0.9746, + "learning_rate": 1.743819979621721e-05, + "loss": 0.9716, "step": 9035 }, { - "epoch": 0.25641316685584564, + "epoch": 0.2560571282836011, "grad_norm": 0.0, - "learning_rate": 1.74300951396871e-05, - "loss": 0.9318, + "learning_rate": 1.743758633289259e-05, + "loss": 0.9999, "step": 9036 }, { - "epoch": 0.2564415437003405, + "epoch": 0.2560854657258636, "grad_norm": 0.0, - "learning_rate": 1.7429479986951822e-05, - "loss": 0.9634, + "learning_rate": 1.7436972806918418e-05, + "loss": 0.9565, "step": 9037 }, { - "epoch": 0.2564699205448354, + "epoch": 0.25611380316812604, "grad_norm": 0.0, - "learning_rate": 1.7428864771459387e-05, - "loss": 0.9923, + "learning_rate": 1.7436359218299865e-05, + "loss": 0.9341, "step": 9038 }, { - "epoch": 0.25649829738933033, + "epoch": 0.2561421406103885, "grad_norm": 0.0, - "learning_rate": 1.7428249493215e-05, - "loss": 1.0334, + "learning_rate": 1.7435745567042096e-05, + "loss": 0.9368, "step": 9039 }, { - "epoch": 0.2565266742338252, + "epoch": 0.256170478052651, "grad_norm": 0.0, - "learning_rate": 1.7427634152223857e-05, - "loss": 1.0047, + "learning_rate": 1.7435131853150277e-05, + "loss": 0.9933, "step": 9040 }, { - "epoch": 0.2565550510783201, + "epoch": 0.2561988154949134, "grad_norm": 0.0, - "learning_rate": 1.742701874849115e-05, - "loss": 0.9982, + "learning_rate": 1.7434518076629586e-05, + "loss": 0.9966, "step": 9041 }, { - "epoch": 0.25658342792281497, + "epoch": 0.2562271529371759, "grad_norm": 0.0, - "learning_rate": 1.7426403282022084e-05, - "loss": 0.9948, + "learning_rate": 1.7433904237485186e-05, + "loss": 0.9773, "step": 9042 }, { - "epoch": 0.2566118047673099, + "epoch": 0.25625549037943834, "grad_norm": 0.0, - "learning_rate": 1.7425787752821853e-05, - "loss": 1.016, + "learning_rate": 1.743329033572225e-05, + "loss": 0.9656, "step": 9043 }, { - "epoch": 0.25664018161180474, + "epoch": 0.25628382782170084, "grad_norm": 0.0, - "learning_rate": 1.7425172160895664e-05, - "loss": 0.9994, + "learning_rate": 1.743267637134595e-05, + "loss": 0.9296, "step": 9044 }, { - "epoch": 0.25666855845629966, + "epoch": 0.2563121652639633, "grad_norm": 0.0, - "learning_rate": 1.742455650624871e-05, - "loss": 0.9549, + "learning_rate": 1.7432062344361456e-05, + "loss": 1.0559, "step": 9045 }, { - "epoch": 0.25669693530079457, + "epoch": 0.2563405027062257, "grad_norm": 0.0, - "learning_rate": 1.7423940788886192e-05, - "loss": 0.9517, + "learning_rate": 1.7431448254773943e-05, + "loss": 0.9685, "step": 9046 }, { - "epoch": 0.25672531214528943, + "epoch": 0.2563688401484882, "grad_norm": 0.0, - "learning_rate": 1.7423325008813315e-05, - "loss": 1.04, + "learning_rate": 1.743083410258858e-05, + "loss": 0.8209, "step": 9047 }, { - "epoch": 0.25675368898978435, + "epoch": 0.25639717759075065, "grad_norm": 0.0, - "learning_rate": 1.7422709166035273e-05, - "loss": 0.9477, + "learning_rate": 1.7430219887810543e-05, + "loss": 1.018, "step": 9048 }, { - "epoch": 0.2567820658342792, + "epoch": 0.25642551503301314, "grad_norm": 0.0, - "learning_rate": 1.7422093260557277e-05, - "loss": 0.9937, + "learning_rate": 1.7429605610445007e-05, + "loss": 0.9143, "step": 9049 }, { - "epoch": 0.2568104426787741, + "epoch": 0.2564538524752756, "grad_norm": 0.0, - "learning_rate": 1.7421477292384524e-05, - "loss": 0.9758, + "learning_rate": 1.742899127049714e-05, + "loss": 1.0023, "step": 9050 }, { - "epoch": 0.25683881952326904, + "epoch": 0.256482189917538, "grad_norm": 0.0, - "learning_rate": 1.7420861261522223e-05, - "loss": 0.8509, + "learning_rate": 1.7428376867972122e-05, + "loss": 0.8602, "step": 9051 }, { - "epoch": 0.2568671963677639, + "epoch": 0.2565105273598005, "grad_norm": 0.0, - "learning_rate": 1.742024516797557e-05, - "loss": 0.9223, + "learning_rate": 1.7427762402875127e-05, + "loss": 1.0536, "step": 9052 }, { - "epoch": 0.2568955732122588, + "epoch": 0.25653886480206295, "grad_norm": 0.0, - "learning_rate": 1.741962901174977e-05, - "loss": 1.0774, + "learning_rate": 1.742714787521133e-05, + "loss": 0.9415, "step": 9053 }, { - "epoch": 0.25692395005675367, + "epoch": 0.25656720224432544, "grad_norm": 0.0, - "learning_rate": 1.741901279285003e-05, - "loss": 0.962, + "learning_rate": 1.7426533284985912e-05, + "loss": 1.0622, "step": 9054 }, { - "epoch": 0.2569523269012486, + "epoch": 0.2565955396865879, "grad_norm": 0.0, - "learning_rate": 1.741839651128156e-05, - "loss": 0.7991, + "learning_rate": 1.7425918632204044e-05, + "loss": 0.9007, "step": 9055 }, { - "epoch": 0.2569807037457435, + "epoch": 0.2566238771288504, "grad_norm": 0.0, - "learning_rate": 1.741778016704956e-05, - "loss": 0.9187, + "learning_rate": 1.7425303916870907e-05, + "loss": 0.9306, "step": 9056 }, { - "epoch": 0.25700908059023836, + "epoch": 0.2566522145711128, "grad_norm": 0.0, - "learning_rate": 1.7417163760159238e-05, - "loss": 1.0131, + "learning_rate": 1.742468913899168e-05, + "loss": 1.0808, "step": 9057 }, { - "epoch": 0.2570374574347333, + "epoch": 0.25668055201337525, "grad_norm": 0.0, - "learning_rate": 1.7416547290615798e-05, - "loss": 0.9218, + "learning_rate": 1.742407429857153e-05, + "loss": 1.0083, "step": 9058 }, { - "epoch": 0.25706583427922813, + "epoch": 0.25670888945563775, "grad_norm": 0.0, - "learning_rate": 1.741593075842445e-05, - "loss": 1.0318, + "learning_rate": 1.7423459395615654e-05, + "loss": 0.9928, "step": 9059 }, { - "epoch": 0.25709421112372305, + "epoch": 0.2567372268979002, "grad_norm": 0.0, - "learning_rate": 1.7415314163590405e-05, - "loss": 1.0955, + "learning_rate": 1.742284443012922e-05, + "loss": 0.8564, "step": 9060 }, { - "epoch": 0.2571225879682179, + "epoch": 0.2567655643401627, "grad_norm": 0.0, - "learning_rate": 1.7414697506118862e-05, - "loss": 0.9567, + "learning_rate": 1.7422229402117413e-05, + "loss": 1.0451, "step": 9061 }, { - "epoch": 0.2571509648127128, + "epoch": 0.2567939017824251, "grad_norm": 0.0, - "learning_rate": 1.741408078601504e-05, - "loss": 0.9685, + "learning_rate": 1.7421614311585407e-05, + "loss": 0.9004, "step": 9062 }, { - "epoch": 0.25717934165720774, + "epoch": 0.25682223922468755, "grad_norm": 0.0, - "learning_rate": 1.7413464003284143e-05, - "loss": 1.0286, + "learning_rate": 1.7420999158538393e-05, + "loss": 1.0042, "step": 9063 }, { - "epoch": 0.2572077185017026, + "epoch": 0.25685057666695005, "grad_norm": 0.0, - "learning_rate": 1.7412847157931382e-05, - "loss": 0.9884, + "learning_rate": 1.7420383942981543e-05, + "loss": 0.8951, "step": 9064 }, { - "epoch": 0.2572360953461975, + "epoch": 0.2568789141092125, "grad_norm": 0.0, - "learning_rate": 1.741223024996197e-05, - "loss": 0.9122, + "learning_rate": 1.741976866492005e-05, + "loss": 0.959, "step": 9065 }, { - "epoch": 0.2572644721906924, + "epoch": 0.256907251551475, "grad_norm": 0.0, - "learning_rate": 1.7411613279381116e-05, - "loss": 1.0282, + "learning_rate": 1.7419153324359082e-05, + "loss": 1.0067, "step": 9066 }, { - "epoch": 0.2572928490351873, + "epoch": 0.2569355889937374, "grad_norm": 0.0, - "learning_rate": 1.7410996246194028e-05, - "loss": 0.9897, + "learning_rate": 1.7418537921303836e-05, + "loss": 1.0269, "step": 9067 }, { - "epoch": 0.2573212258796822, + "epoch": 0.2569639264359999, "grad_norm": 0.0, - "learning_rate": 1.7410379150405924e-05, - "loss": 0.9986, + "learning_rate": 1.741792245575949e-05, + "loss": 1.0088, "step": 9068 }, { - "epoch": 0.25734960272417706, + "epoch": 0.25699226387826235, "grad_norm": 0.0, - "learning_rate": 1.7409761992022013e-05, - "loss": 0.9887, + "learning_rate": 1.7417306927731226e-05, + "loss": 1.087, "step": 9069 }, { - "epoch": 0.257377979568672, + "epoch": 0.2570206013205248, "grad_norm": 0.0, - "learning_rate": 1.7409144771047508e-05, - "loss": 0.9843, + "learning_rate": 1.7416691337224234e-05, + "loss": 1.0814, "step": 9070 }, { - "epoch": 0.25740635641316684, + "epoch": 0.2570489387627873, "grad_norm": 0.0, - "learning_rate": 1.7408527487487626e-05, - "loss": 0.954, + "learning_rate": 1.7416075684243693e-05, + "loss": 0.9808, "step": 9071 }, { - "epoch": 0.25743473325766175, + "epoch": 0.2570772762050497, "grad_norm": 0.0, - "learning_rate": 1.740791014134758e-05, - "loss": 0.9478, + "learning_rate": 1.7415459968794795e-05, + "loss": 0.9832, "step": 9072 }, { - "epoch": 0.25746311010215667, + "epoch": 0.2571056136473122, "grad_norm": 0.0, - "learning_rate": 1.7407292732632582e-05, - "loss": 0.866, + "learning_rate": 1.7414844190882725e-05, + "loss": 0.8858, "step": 9073 }, { - "epoch": 0.2574914869466515, + "epoch": 0.25713395108957465, "grad_norm": 0.0, - "learning_rate": 1.740667526134785e-05, - "loss": 0.9198, + "learning_rate": 1.741422835051267e-05, + "loss": 1.0367, "step": 9074 }, { - "epoch": 0.25751986379114644, + "epoch": 0.2571622885318371, "grad_norm": 0.0, - "learning_rate": 1.7406057727498602e-05, - "loss": 1.0606, + "learning_rate": 1.7413612447689813e-05, + "loss": 1.0698, "step": 9075 }, { - "epoch": 0.2575482406356413, + "epoch": 0.2571906259740996, "grad_norm": 0.0, - "learning_rate": 1.740544013109005e-05, - "loss": 0.9838, + "learning_rate": 1.7412996482419348e-05, + "loss": 0.9547, "step": 9076 }, { - "epoch": 0.2575766174801362, + "epoch": 0.257218963416362, "grad_norm": 0.0, - "learning_rate": 1.7404822472127406e-05, - "loss": 0.9631, + "learning_rate": 1.7412380454706458e-05, + "loss": 1.0057, "step": 9077 }, { - "epoch": 0.2576049943246311, + "epoch": 0.2572473008586245, "grad_norm": 0.0, - "learning_rate": 1.74042047506159e-05, - "loss": 0.97, + "learning_rate": 1.7411764364556336e-05, + "loss": 1.0716, "step": 9078 }, { - "epoch": 0.257633371169126, + "epoch": 0.25727563830088696, "grad_norm": 0.0, - "learning_rate": 1.7403586966560743e-05, - "loss": 0.925, + "learning_rate": 1.741114821197417e-05, + "loss": 0.9937, "step": 9079 }, { - "epoch": 0.2576617480136209, + "epoch": 0.25730397574314945, "grad_norm": 0.0, - "learning_rate": 1.7402969119967154e-05, - "loss": 1.0544, + "learning_rate": 1.7410531996965152e-05, + "loss": 1.0839, "step": 9080 }, { - "epoch": 0.25769012485811577, + "epoch": 0.2573323131854119, "grad_norm": 0.0, - "learning_rate": 1.7402351210840352e-05, - "loss": 1.0105, + "learning_rate": 1.740991571953447e-05, + "loss": 1.0112, "step": 9081 }, { - "epoch": 0.2577185017026107, + "epoch": 0.2573606506276743, "grad_norm": 0.0, - "learning_rate": 1.7401733239185557e-05, - "loss": 0.9454, + "learning_rate": 1.7409299379687316e-05, + "loss": 1.0668, "step": 9082 }, { - "epoch": 0.25774687854710554, + "epoch": 0.2573889880699368, "grad_norm": 0.0, - "learning_rate": 1.7401115205007987e-05, - "loss": 0.9199, + "learning_rate": 1.7408682977428884e-05, + "loss": 1.0506, "step": 9083 }, { - "epoch": 0.25777525539160046, + "epoch": 0.25741732551219926, "grad_norm": 0.0, - "learning_rate": 1.7400497108312867e-05, - "loss": 1.0692, + "learning_rate": 1.7408066512764365e-05, + "loss": 1.01, "step": 9084 }, { - "epoch": 0.25780363223609537, + "epoch": 0.25744566295446175, "grad_norm": 0.0, - "learning_rate": 1.7399878949105414e-05, - "loss": 0.9834, + "learning_rate": 1.740744998569895e-05, + "loss": 1.0195, "step": 9085 }, { - "epoch": 0.25783200908059023, + "epoch": 0.2574740003967242, "grad_norm": 0.0, - "learning_rate": 1.7399260727390846e-05, - "loss": 0.9417, + "learning_rate": 1.740683339623783e-05, + "loss": 0.9845, "step": 9086 }, { - "epoch": 0.25786038592508514, + "epoch": 0.25750233783898663, "grad_norm": 0.0, - "learning_rate": 1.7398642443174395e-05, - "loss": 1.0114, + "learning_rate": 1.7406216744386205e-05, + "loss": 0.9548, "step": 9087 }, { - "epoch": 0.25788876276958, + "epoch": 0.2575306752812491, "grad_norm": 0.0, - "learning_rate": 1.739802409646128e-05, - "loss": 1.1028, + "learning_rate": 1.7405600030149262e-05, + "loss": 1.068, "step": 9088 }, { - "epoch": 0.2579171396140749, + "epoch": 0.25755901272351156, "grad_norm": 0.0, - "learning_rate": 1.739740568725672e-05, - "loss": 1.0221, + "learning_rate": 1.7404983253532205e-05, + "loss": 0.9435, "step": 9089 }, { - "epoch": 0.25794551645856983, + "epoch": 0.25758735016577405, "grad_norm": 0.0, - "learning_rate": 1.739678721556594e-05, - "loss": 0.9357, + "learning_rate": 1.740436641454022e-05, + "loss": 1.0555, "step": 9090 }, { - "epoch": 0.2579738933030647, + "epoch": 0.2576156876080365, "grad_norm": 0.0, - "learning_rate": 1.739616868139417e-05, - "loss": 0.9579, + "learning_rate": 1.740374951317851e-05, + "loss": 1.0515, "step": 9091 }, { - "epoch": 0.2580022701475596, + "epoch": 0.257644025050299, "grad_norm": 0.0, - "learning_rate": 1.7395550084746632e-05, - "loss": 0.9443, + "learning_rate": 1.740313254945227e-05, + "loss": 0.9739, "step": 9092 }, { - "epoch": 0.25803064699205447, + "epoch": 0.2576723624925614, "grad_norm": 0.0, - "learning_rate": 1.7394931425628543e-05, - "loss": 1.0238, + "learning_rate": 1.7402515523366692e-05, + "loss": 0.9779, "step": 9093 }, { - "epoch": 0.2580590238365494, + "epoch": 0.25770069993482386, "grad_norm": 0.0, - "learning_rate": 1.7394312704045143e-05, - "loss": 0.9444, + "learning_rate": 1.7401898434926978e-05, + "loss": 1.0018, "step": 9094 }, { - "epoch": 0.25808740068104424, + "epoch": 0.25772903737708636, "grad_norm": 0.0, - "learning_rate": 1.7393693920001647e-05, - "loss": 0.9968, + "learning_rate": 1.7401281284138324e-05, + "loss": 1.0423, "step": 9095 }, { - "epoch": 0.25811577752553916, + "epoch": 0.2577573748193488, "grad_norm": 0.0, - "learning_rate": 1.7393075073503286e-05, - "loss": 1.1219, + "learning_rate": 1.740066407100593e-05, + "loss": 0.9472, "step": 9096 }, { - "epoch": 0.2581441543700341, + "epoch": 0.2577857122616113, "grad_norm": 0.0, - "learning_rate": 1.739245616455529e-05, - "loss": 1.0191, + "learning_rate": 1.7400046795534996e-05, + "loss": 0.9726, "step": 9097 }, { - "epoch": 0.25817253121452893, + "epoch": 0.2578140497038737, "grad_norm": 0.0, - "learning_rate": 1.7391837193162885e-05, - "loss": 0.9777, + "learning_rate": 1.739942945773072e-05, + "loss": 0.9385, "step": 9098 }, { - "epoch": 0.25820090805902385, + "epoch": 0.25784238714613616, "grad_norm": 0.0, - "learning_rate": 1.7391218159331295e-05, - "loss": 1.0008, + "learning_rate": 1.73988120575983e-05, + "loss": 0.9352, "step": 9099 }, { - "epoch": 0.2582292849035187, + "epoch": 0.25787072458839866, "grad_norm": 0.0, - "learning_rate": 1.7390599063065753e-05, - "loss": 0.932, + "learning_rate": 1.739819459514294e-05, + "loss": 1.017, "step": 9100 }, { - "epoch": 0.2582576617480136, + "epoch": 0.2578990620306611, "grad_norm": 0.0, - "learning_rate": 1.738997990437149e-05, - "loss": 0.9869, + "learning_rate": 1.739757707036984e-05, + "loss": 0.9027, "step": 9101 }, { - "epoch": 0.25828603859250854, + "epoch": 0.2579273994729236, "grad_norm": 0.0, - "learning_rate": 1.7389360683253737e-05, - "loss": 0.9772, + "learning_rate": 1.7396959483284197e-05, + "loss": 1.025, "step": 9102 }, { - "epoch": 0.2583144154370034, + "epoch": 0.25795573691518603, "grad_norm": 0.0, - "learning_rate": 1.738874139971772e-05, - "loss": 0.8549, + "learning_rate": 1.7396341833891225e-05, + "loss": 0.9938, "step": 9103 }, { - "epoch": 0.2583427922814983, + "epoch": 0.2579840743574485, "grad_norm": 0.0, - "learning_rate": 1.7388122053768673e-05, - "loss": 1.0065, + "learning_rate": 1.7395724122196113e-05, + "loss": 0.9541, "step": 9104 }, { - "epoch": 0.25837116912599317, + "epoch": 0.25801241179971096, "grad_norm": 0.0, - "learning_rate": 1.7387502645411826e-05, - "loss": 1.016, + "learning_rate": 1.7395106348204073e-05, + "loss": 1.0759, "step": 9105 }, { - "epoch": 0.2583995459704881, + "epoch": 0.2580407492419734, "grad_norm": 0.0, - "learning_rate": 1.7386883174652415e-05, - "loss": 0.9743, + "learning_rate": 1.73944885119203e-05, + "loss": 0.9877, "step": 9106 }, { - "epoch": 0.25842792281498295, + "epoch": 0.2580690866842359, "grad_norm": 0.0, - "learning_rate": 1.7386263641495668e-05, - "loss": 1.0257, + "learning_rate": 1.7393870613350012e-05, + "loss": 0.9213, "step": 9107 }, { - "epoch": 0.25845629965947786, + "epoch": 0.25809742412649833, "grad_norm": 0.0, - "learning_rate": 1.7385644045946816e-05, - "loss": 0.9642, + "learning_rate": 1.7393252652498404e-05, + "loss": 1.0615, "step": 9108 }, { - "epoch": 0.2584846765039728, + "epoch": 0.2581257615687608, "grad_norm": 0.0, - "learning_rate": 1.73850243880111e-05, - "loss": 0.973, + "learning_rate": 1.7392634629370684e-05, + "loss": 0.8821, "step": 9109 }, { - "epoch": 0.25851305334846764, + "epoch": 0.25815409901102326, "grad_norm": 0.0, - "learning_rate": 1.738440466769375e-05, - "loss": 0.9658, + "learning_rate": 1.7392016543972056e-05, + "loss": 0.9365, "step": 9110 }, { - "epoch": 0.25854143019296255, + "epoch": 0.2581824364532857, "grad_norm": 0.0, - "learning_rate": 1.7383784885000004e-05, - "loss": 0.9774, + "learning_rate": 1.7391398396307728e-05, + "loss": 0.9921, "step": 9111 }, { - "epoch": 0.2585698070374574, + "epoch": 0.2582107738955482, "grad_norm": 0.0, - "learning_rate": 1.7383165039935094e-05, - "loss": 0.9515, + "learning_rate": 1.7390780186382907e-05, + "loss": 0.9072, "step": 9112 }, { - "epoch": 0.2585981838819523, + "epoch": 0.25823911133781063, "grad_norm": 0.0, - "learning_rate": 1.7382545132504255e-05, - "loss": 1.015, + "learning_rate": 1.73901619142028e-05, + "loss": 0.9333, "step": 9113 }, { - "epoch": 0.25862656072644724, + "epoch": 0.25826744878007313, "grad_norm": 0.0, - "learning_rate": 1.7381925162712727e-05, - "loss": 1.0648, + "learning_rate": 1.7389543579772613e-05, + "loss": 0.9679, "step": 9114 }, { - "epoch": 0.2586549375709421, + "epoch": 0.25829578622233557, "grad_norm": 0.0, - "learning_rate": 1.7381305130565747e-05, - "loss": 0.9422, + "learning_rate": 1.738892518309756e-05, + "loss": 0.9666, "step": 9115 }, { - "epoch": 0.258683314415437, + "epoch": 0.25832412366459806, "grad_norm": 0.0, - "learning_rate": 1.738068503606855e-05, - "loss": 0.9792, + "learning_rate": 1.7388306724182847e-05, + "loss": 0.914, "step": 9116 }, { - "epoch": 0.2587116912599319, + "epoch": 0.2583524611068605, "grad_norm": 0.0, - "learning_rate": 1.7380064879226374e-05, - "loss": 1.0379, + "learning_rate": 1.738768820303368e-05, + "loss": 0.8883, "step": 9117 }, { - "epoch": 0.2587400681044268, + "epoch": 0.25838079854912294, "grad_norm": 0.0, - "learning_rate": 1.7379444660044456e-05, - "loss": 0.962, + "learning_rate": 1.738706961965527e-05, + "loss": 1.0633, "step": 9118 }, { - "epoch": 0.2587684449489217, + "epoch": 0.25840913599138543, "grad_norm": 0.0, - "learning_rate": 1.737882437852804e-05, - "loss": 0.8831, + "learning_rate": 1.7386450974052836e-05, + "loss": 0.8596, "step": 9119 }, { - "epoch": 0.25879682179341656, + "epoch": 0.25843747343364787, "grad_norm": 0.0, - "learning_rate": 1.7378204034682364e-05, - "loss": 0.9544, + "learning_rate": 1.7385832266231576e-05, + "loss": 0.9704, "step": 9120 }, { - "epoch": 0.2588251986379115, + "epoch": 0.25846581087591036, "grad_norm": 0.0, - "learning_rate": 1.7377583628512665e-05, - "loss": 0.9732, + "learning_rate": 1.738521349619671e-05, + "loss": 0.8651, "step": 9121 }, { - "epoch": 0.25885357548240634, + "epoch": 0.2584941483181728, "grad_norm": 0.0, - "learning_rate": 1.7376963160024184e-05, - "loss": 0.948, + "learning_rate": 1.738459466395345e-05, + "loss": 1.0451, "step": 9122 }, { - "epoch": 0.25888195232690125, + "epoch": 0.25852248576043524, "grad_norm": 0.0, - "learning_rate": 1.7376342629222165e-05, - "loss": 0.9091, + "learning_rate": 1.7383975769507006e-05, + "loss": 1.058, "step": 9123 }, { - "epoch": 0.2589103291713961, + "epoch": 0.25855082320269773, "grad_norm": 0.0, - "learning_rate": 1.7375722036111848e-05, - "loss": 1.1245, + "learning_rate": 1.7383356812862595e-05, + "loss": 0.8834, "step": 9124 }, { - "epoch": 0.25893870601589103, + "epoch": 0.25857916064496017, "grad_norm": 0.0, - "learning_rate": 1.7375101380698475e-05, - "loss": 1.0428, + "learning_rate": 1.7382737794025422e-05, + "loss": 0.867, "step": 9125 }, { - "epoch": 0.25896708286038594, + "epoch": 0.25860749808722266, "grad_norm": 0.0, - "learning_rate": 1.737448066298729e-05, - "loss": 0.8579, + "learning_rate": 1.738211871300071e-05, + "loss": 1.04, "step": 9126 }, { - "epoch": 0.2589954597048808, + "epoch": 0.2586358355294851, "grad_norm": 0.0, - "learning_rate": 1.7373859882983537e-05, - "loss": 0.935, + "learning_rate": 1.738149956979367e-05, + "loss": 0.9688, "step": 9127 }, { - "epoch": 0.2590238365493757, + "epoch": 0.2586641729717476, "grad_norm": 0.0, - "learning_rate": 1.7373239040692457e-05, - "loss": 0.8934, + "learning_rate": 1.738088036440952e-05, + "loss": 0.8956, "step": 9128 }, { - "epoch": 0.2590522133938706, + "epoch": 0.25869251041401004, "grad_norm": 0.0, - "learning_rate": 1.7372618136119292e-05, - "loss": 0.9477, + "learning_rate": 1.738026109685347e-05, + "loss": 0.8867, "step": 9129 }, { - "epoch": 0.2590805902383655, + "epoch": 0.2587208478562725, "grad_norm": 0.0, - "learning_rate": 1.7371997169269293e-05, - "loss": 0.9005, + "learning_rate": 1.7379641767130745e-05, + "loss": 1.0948, "step": 9130 }, { - "epoch": 0.2591089670828604, + "epoch": 0.25874918529853497, "grad_norm": 0.0, - "learning_rate": 1.7371376140147705e-05, - "loss": 1.0615, + "learning_rate": 1.7379022375246554e-05, + "loss": 0.9422, "step": 9131 }, { - "epoch": 0.25913734392735527, + "epoch": 0.2587775227407974, "grad_norm": 0.0, - "learning_rate": 1.7370755048759767e-05, - "loss": 1.071, + "learning_rate": 1.737840292120612e-05, + "loss": 0.7953, "step": 9132 }, { - "epoch": 0.2591657207718502, + "epoch": 0.2588058601830599, "grad_norm": 0.0, - "learning_rate": 1.737013389511073e-05, - "loss": 0.8247, + "learning_rate": 1.7377783405014653e-05, + "loss": 1.0057, "step": 9133 }, { - "epoch": 0.25919409761634504, + "epoch": 0.25883419762532234, "grad_norm": 0.0, - "learning_rate": 1.7369512679205844e-05, - "loss": 1.0092, + "learning_rate": 1.7377163826677383e-05, + "loss": 0.9861, "step": 9134 }, { - "epoch": 0.25922247446083996, + "epoch": 0.2588625350675848, "grad_norm": 0.0, - "learning_rate": 1.736889140105035e-05, - "loss": 0.9442, + "learning_rate": 1.7376544186199518e-05, + "loss": 0.9852, "step": 9135 }, { - "epoch": 0.2592508513053349, + "epoch": 0.25889087250984727, "grad_norm": 0.0, - "learning_rate": 1.7368270060649503e-05, - "loss": 0.9615, + "learning_rate": 1.7375924483586285e-05, + "loss": 1.0344, "step": 9136 }, { - "epoch": 0.25927922814982973, + "epoch": 0.2589192099521097, "grad_norm": 0.0, - "learning_rate": 1.7367648658008544e-05, - "loss": 0.9078, + "learning_rate": 1.73753047188429e-05, + "loss": 0.918, "step": 9137 }, { - "epoch": 0.25930760499432465, + "epoch": 0.2589475473943722, "grad_norm": 0.0, - "learning_rate": 1.7367027193132723e-05, - "loss": 0.9521, + "learning_rate": 1.7374684891974585e-05, + "loss": 1.0081, "step": 9138 }, { - "epoch": 0.2593359818388195, + "epoch": 0.25897588483663464, "grad_norm": 0.0, - "learning_rate": 1.7366405666027296e-05, - "loss": 0.9362, + "learning_rate": 1.737406500298656e-05, + "loss": 0.9615, "step": 9139 }, { - "epoch": 0.2593643586833144, + "epoch": 0.25900422227889713, "grad_norm": 0.0, - "learning_rate": 1.736578407669751e-05, - "loss": 0.9418, + "learning_rate": 1.737344505188405e-05, + "loss": 0.9426, "step": 9140 }, { - "epoch": 0.2593927355278093, + "epoch": 0.25903255972115957, "grad_norm": 0.0, - "learning_rate": 1.7365162425148615e-05, - "loss": 0.9932, + "learning_rate": 1.737282503867227e-05, + "loss": 0.8767, "step": 9141 }, { - "epoch": 0.2594211123723042, + "epoch": 0.259060897163422, "grad_norm": 0.0, - "learning_rate": 1.736454071138586e-05, - "loss": 0.9677, + "learning_rate": 1.737220496335645e-05, + "loss": 0.894, "step": 9142 }, { - "epoch": 0.2594494892167991, + "epoch": 0.2590892346056845, "grad_norm": 0.0, - "learning_rate": 1.73639189354145e-05, - "loss": 0.8522, + "learning_rate": 1.7371584825941808e-05, + "loss": 0.996, "step": 9143 }, { - "epoch": 0.25947786606129397, + "epoch": 0.25911757204794694, "grad_norm": 0.0, - "learning_rate": 1.7363297097239784e-05, - "loss": 0.9832, + "learning_rate": 1.737096462643357e-05, + "loss": 1.0552, "step": 9144 }, { - "epoch": 0.2595062429057889, + "epoch": 0.25914590949020944, "grad_norm": 0.0, - "learning_rate": 1.7362675196866968e-05, - "loss": 0.9812, + "learning_rate": 1.737034436483696e-05, + "loss": 1.1226, "step": 9145 }, { - "epoch": 0.25953461975028375, + "epoch": 0.2591742469324719, "grad_norm": 0.0, - "learning_rate": 1.73620532343013e-05, - "loss": 0.9958, + "learning_rate": 1.7369724041157202e-05, + "loss": 0.873, "step": 9146 }, { - "epoch": 0.25956299659477866, + "epoch": 0.2592025843747343, "grad_norm": 0.0, - "learning_rate": 1.736143120954804e-05, - "loss": 0.9956, + "learning_rate": 1.7369103655399523e-05, + "loss": 0.9501, "step": 9147 }, { - "epoch": 0.2595913734392736, + "epoch": 0.2592309218169968, "grad_norm": 0.0, - "learning_rate": 1.736080912261244e-05, - "loss": 0.8895, + "learning_rate": 1.7368483207569146e-05, + "loss": 1.0538, "step": 9148 }, { - "epoch": 0.25961975028376844, + "epoch": 0.25925925925925924, "grad_norm": 0.0, - "learning_rate": 1.7360186973499752e-05, - "loss": 0.9951, + "learning_rate": 1.73678626976713e-05, + "loss": 0.9276, "step": 9149 }, { - "epoch": 0.25964812712826335, + "epoch": 0.25928759670152174, "grad_norm": 0.0, - "learning_rate": 1.735956476221524e-05, - "loss": 1.0687, + "learning_rate": 1.736724212571121e-05, + "loss": 0.9482, "step": 9150 }, { - "epoch": 0.2596765039727582, + "epoch": 0.2593159341437842, "grad_norm": 0.0, - "learning_rate": 1.735894248876415e-05, - "loss": 0.9838, + "learning_rate": 1.7366621491694103e-05, + "loss": 0.9543, "step": 9151 }, { - "epoch": 0.2597048808172531, + "epoch": 0.25934427158604667, "grad_norm": 0.0, - "learning_rate": 1.735832015315174e-05, - "loss": 1.0184, + "learning_rate": 1.736600079562521e-05, + "loss": 0.863, "step": 9152 }, { - "epoch": 0.25973325766174804, + "epoch": 0.2593726090283091, "grad_norm": 0.0, - "learning_rate": 1.735769775538327e-05, - "loss": 1.0076, + "learning_rate": 1.7365380037509756e-05, + "loss": 0.9523, "step": 9153 }, { - "epoch": 0.2597616345062429, + "epoch": 0.25940094647057155, "grad_norm": 0.0, - "learning_rate": 1.7357075295464e-05, - "loss": 0.9194, + "learning_rate": 1.736475921735297e-05, + "loss": 0.88, "step": 9154 }, { - "epoch": 0.2597900113507378, + "epoch": 0.25942928391283404, "grad_norm": 0.0, - "learning_rate": 1.7356452773399178e-05, - "loss": 0.9975, + "learning_rate": 1.736413833516008e-05, + "loss": 0.9609, "step": 9155 }, { - "epoch": 0.2598183881952327, + "epoch": 0.2594576213550965, "grad_norm": 0.0, - "learning_rate": 1.735583018919407e-05, - "loss": 0.9272, + "learning_rate": 1.736351739093632e-05, + "loss": 0.9688, "step": 9156 }, { - "epoch": 0.2598467650397276, + "epoch": 0.259485958797359, "grad_norm": 0.0, - "learning_rate": 1.7355207542853938e-05, - "loss": 0.9406, + "learning_rate": 1.736289638468692e-05, + "loss": 0.9351, "step": 9157 }, { - "epoch": 0.25987514188422245, + "epoch": 0.2595142962396214, "grad_norm": 0.0, - "learning_rate": 1.7354584834384036e-05, - "loss": 0.9551, + "learning_rate": 1.7362275316417112e-05, + "loss": 0.9114, "step": 9158 }, { - "epoch": 0.25990351872871736, + "epoch": 0.25954263368188385, "grad_norm": 0.0, - "learning_rate": 1.735396206378962e-05, - "loss": 1.021, + "learning_rate": 1.736165418613212e-05, + "loss": 0.9713, "step": 9159 }, { - "epoch": 0.2599318955732123, + "epoch": 0.25957097112414634, "grad_norm": 0.0, - "learning_rate": 1.7353339231075964e-05, - "loss": 0.9184, + "learning_rate": 1.7361032993837184e-05, + "loss": 1.0672, "step": 9160 }, { - "epoch": 0.25996027241770714, + "epoch": 0.2595993085664088, "grad_norm": 0.0, - "learning_rate": 1.7352716336248313e-05, - "loss": 1.0486, + "learning_rate": 1.7360411739537535e-05, + "loss": 0.9768, "step": 9161 }, { - "epoch": 0.25998864926220205, + "epoch": 0.2596276460086713, "grad_norm": 0.0, - "learning_rate": 1.7352093379311942e-05, - "loss": 1.0144, + "learning_rate": 1.73597904232384e-05, + "loss": 1.0385, "step": 9162 }, { - "epoch": 0.2600170261066969, + "epoch": 0.2596559834509337, "grad_norm": 0.0, - "learning_rate": 1.7351470360272107e-05, - "loss": 1.0297, + "learning_rate": 1.735916904494502e-05, + "loss": 0.8051, "step": 9163 }, { - "epoch": 0.26004540295119183, + "epoch": 0.2596843208931962, "grad_norm": 0.0, - "learning_rate": 1.7350847279134072e-05, - "loss": 0.9957, + "learning_rate": 1.7358547604662626e-05, + "loss": 0.9457, "step": 9164 }, { - "epoch": 0.26007377979568674, + "epoch": 0.25971265833545865, "grad_norm": 0.0, - "learning_rate": 1.7350224135903097e-05, - "loss": 1.0318, + "learning_rate": 1.7357926102396454e-05, + "loss": 0.9761, "step": 9165 }, { - "epoch": 0.2601021566401816, + "epoch": 0.2597409957777211, "grad_norm": 0.0, - "learning_rate": 1.734960093058445e-05, - "loss": 0.8897, + "learning_rate": 1.735730453815174e-05, + "loss": 0.9784, "step": 9166 }, { - "epoch": 0.2601305334846765, + "epoch": 0.2597693332199836, "grad_norm": 0.0, - "learning_rate": 1.7348977663183392e-05, - "loss": 0.9836, + "learning_rate": 1.7356682911933713e-05, + "loss": 1.0373, "step": 9167 }, { - "epoch": 0.2601589103291714, + "epoch": 0.259797670662246, "grad_norm": 0.0, - "learning_rate": 1.734835433370519e-05, - "loss": 0.9211, + "learning_rate": 1.7356061223747617e-05, + "loss": 0.9261, "step": 9168 }, { - "epoch": 0.2601872871736663, + "epoch": 0.2598260081045085, "grad_norm": 0.0, - "learning_rate": 1.734773094215511e-05, - "loss": 1.0753, + "learning_rate": 1.7355439473598682e-05, + "loss": 1.0622, "step": 9169 }, { - "epoch": 0.2602156640181612, + "epoch": 0.25985434554677095, "grad_norm": 0.0, - "learning_rate": 1.7347107488538413e-05, - "loss": 0.986, + "learning_rate": 1.7354817661492154e-05, + "loss": 1.0176, "step": 9170 }, { - "epoch": 0.26024404086265607, + "epoch": 0.2598826829890334, "grad_norm": 0.0, - "learning_rate": 1.7346483972860373e-05, - "loss": 0.9306, + "learning_rate": 1.7354195787433263e-05, + "loss": 1.0879, "step": 9171 }, { - "epoch": 0.260272417707151, + "epoch": 0.2599110204312959, "grad_norm": 0.0, - "learning_rate": 1.734586039512625e-05, - "loss": 0.9219, + "learning_rate": 1.735357385142725e-05, + "loss": 0.8988, "step": 9172 }, { - "epoch": 0.26030079455164584, + "epoch": 0.2599393578735583, "grad_norm": 0.0, - "learning_rate": 1.734523675534132e-05, - "loss": 1.0444, + "learning_rate": 1.7352951853479357e-05, + "loss": 1.015, "step": 9173 }, { - "epoch": 0.26032917139614076, + "epoch": 0.2599676953158208, "grad_norm": 0.0, - "learning_rate": 1.7344613053510838e-05, - "loss": 0.9518, + "learning_rate": 1.7352329793594817e-05, + "loss": 0.9812, "step": 9174 }, { - "epoch": 0.2603575482406356, + "epoch": 0.25999603275808325, "grad_norm": 0.0, - "learning_rate": 1.734398928964008e-05, - "loss": 0.9672, + "learning_rate": 1.7351707671778874e-05, + "loss": 1.0191, "step": 9175 }, { - "epoch": 0.26038592508513053, + "epoch": 0.2600243702003457, "grad_norm": 0.0, - "learning_rate": 1.7343365463734314e-05, - "loss": 0.9106, + "learning_rate": 1.7351085488036762e-05, + "loss": 0.9102, "step": 9176 }, { - "epoch": 0.26041430192962545, + "epoch": 0.2600527076426082, "grad_norm": 0.0, - "learning_rate": 1.7342741575798813e-05, - "loss": 1.0428, + "learning_rate": 1.7350463242373733e-05, + "loss": 1.0139, "step": 9177 }, { - "epoch": 0.2604426787741203, + "epoch": 0.2600810450848706, "grad_norm": 0.0, - "learning_rate": 1.7342117625838842e-05, - "loss": 1.1374, + "learning_rate": 1.7349840934795024e-05, + "loss": 0.9798, "step": 9178 }, { - "epoch": 0.2604710556186152, + "epoch": 0.2601093825271331, "grad_norm": 0.0, - "learning_rate": 1.7341493613859672e-05, - "loss": 1.0506, + "learning_rate": 1.734921856530587e-05, + "loss": 0.9753, "step": 9179 }, { - "epoch": 0.2604994324631101, + "epoch": 0.26013771996939555, "grad_norm": 0.0, - "learning_rate": 1.734086953986658e-05, - "loss": 0.8936, + "learning_rate": 1.7348596133911522e-05, + "loss": 0.976, "step": 9180 }, { - "epoch": 0.260527809307605, + "epoch": 0.26016605741165805, "grad_norm": 0.0, - "learning_rate": 1.7340245403864825e-05, - "loss": 0.925, + "learning_rate": 1.7347973640617222e-05, + "loss": 1.0867, "step": 9181 }, { - "epoch": 0.2605561861520999, + "epoch": 0.2601943948539205, "grad_norm": 0.0, - "learning_rate": 1.7339621205859692e-05, - "loss": 0.9938, + "learning_rate": 1.7347351085428208e-05, + "loss": 1.0363, "step": 9182 }, { - "epoch": 0.26058456299659477, + "epoch": 0.2602227322961829, "grad_norm": 0.0, - "learning_rate": 1.733899694585645e-05, - "loss": 0.9399, + "learning_rate": 1.734672846834973e-05, + "loss": 0.9311, "step": 9183 }, { - "epoch": 0.2606129398410897, + "epoch": 0.2602510697384454, "grad_norm": 0.0, - "learning_rate": 1.7338372623860372e-05, - "loss": 1.0, + "learning_rate": 1.734610578938703e-05, + "loss": 1.0313, "step": 9184 }, { - "epoch": 0.26064131668558455, + "epoch": 0.26027940718070786, "grad_norm": 0.0, - "learning_rate": 1.733774823987673e-05, - "loss": 0.9934, + "learning_rate": 1.7345483048545347e-05, + "loss": 1.0155, "step": 9185 }, { - "epoch": 0.26066969353007946, + "epoch": 0.26030774462297035, "grad_norm": 0.0, - "learning_rate": 1.733712379391079e-05, - "loss": 1.0128, + "learning_rate": 1.734486024582994e-05, + "loss": 0.9562, "step": 9186 }, { - "epoch": 0.2606980703745743, + "epoch": 0.2603360820652328, "grad_norm": 0.0, - "learning_rate": 1.7336499285967847e-05, - "loss": 0.9643, + "learning_rate": 1.7344237381246043e-05, + "loss": 1.0263, "step": 9187 }, { - "epoch": 0.26072644721906924, + "epoch": 0.2603644195074952, "grad_norm": 0.0, - "learning_rate": 1.733587471605316e-05, - "loss": 1.0702, + "learning_rate": 1.734361445479891e-05, + "loss": 1.0244, "step": 9188 }, { - "epoch": 0.26075482406356415, + "epoch": 0.2603927569497577, "grad_norm": 0.0, - "learning_rate": 1.733525008417201e-05, - "loss": 0.9159, + "learning_rate": 1.7342991466493785e-05, + "loss": 1.0589, "step": 9189 }, { - "epoch": 0.260783200908059, + "epoch": 0.26042109439202016, "grad_norm": 0.0, - "learning_rate": 1.7334625390329672e-05, - "loss": 0.9749, + "learning_rate": 1.7342368416335915e-05, + "loss": 0.9329, "step": 9190 }, { - "epoch": 0.2608115777525539, + "epoch": 0.26044943183428265, "grad_norm": 0.0, - "learning_rate": 1.7334000634531426e-05, - "loss": 0.9306, + "learning_rate": 1.734174530433055e-05, + "loss": 0.9724, "step": 9191 }, { - "epoch": 0.2608399545970488, + "epoch": 0.2604777692765451, "grad_norm": 0.0, - "learning_rate": 1.7333375816782542e-05, - "loss": 1.0225, + "learning_rate": 1.7341122130482938e-05, + "loss": 0.9149, "step": 9192 }, { - "epoch": 0.2608683314415437, + "epoch": 0.2605061067188076, "grad_norm": 0.0, - "learning_rate": 1.733275093708831e-05, - "loss": 0.9354, + "learning_rate": 1.7340498894798327e-05, + "loss": 0.9275, "step": 9193 }, { - "epoch": 0.2608967082860386, + "epoch": 0.26053444416107, "grad_norm": 0.0, - "learning_rate": 1.7332125995453992e-05, - "loss": 0.9403, + "learning_rate": 1.733987559728197e-05, + "loss": 0.9222, "step": 9194 }, { - "epoch": 0.2609250851305335, + "epoch": 0.26056278160333246, "grad_norm": 0.0, - "learning_rate": 1.7331500991884883e-05, - "loss": 1.0435, + "learning_rate": 1.7339252237939118e-05, + "loss": 0.9882, "step": 9195 }, { - "epoch": 0.2609534619750284, + "epoch": 0.26059111904559495, "grad_norm": 0.0, - "learning_rate": 1.7330875926386252e-05, - "loss": 0.9783, + "learning_rate": 1.7338628816775013e-05, + "loss": 1.0189, "step": 9196 }, { - "epoch": 0.26098183881952325, + "epoch": 0.2606194564878574, "grad_norm": 0.0, - "learning_rate": 1.7330250798963386e-05, - "loss": 0.9935, + "learning_rate": 1.7338005333794915e-05, + "loss": 0.9829, "step": 9197 }, { - "epoch": 0.26101021566401816, + "epoch": 0.2606477939301199, "grad_norm": 0.0, - "learning_rate": 1.732962560962156e-05, - "loss": 0.8939, + "learning_rate": 1.7337381789004074e-05, + "loss": 1.02, "step": 9198 }, { - "epoch": 0.2610385925085131, + "epoch": 0.2606761313723823, "grad_norm": 0.0, - "learning_rate": 1.7329000358366057e-05, - "loss": 0.9756, + "learning_rate": 1.733675818240774e-05, + "loss": 1.0941, "step": 9199 }, { - "epoch": 0.26106696935300794, + "epoch": 0.26070446881464476, "grad_norm": 0.0, - "learning_rate": 1.732837504520216e-05, - "loss": 1.0076, + "learning_rate": 1.7336134514011168e-05, + "loss": 0.8962, "step": 9200 }, { - "epoch": 0.26109534619750285, + "epoch": 0.26073280625690726, "grad_norm": 0.0, - "learning_rate": 1.732774967013515e-05, - "loss": 0.8791, + "learning_rate": 1.733551078381961e-05, + "loss": 0.9013, "step": 9201 }, { - "epoch": 0.2611237230419977, + "epoch": 0.2607611436991697, "grad_norm": 0.0, - "learning_rate": 1.7327124233170306e-05, - "loss": 0.9173, + "learning_rate": 1.7334886991838323e-05, + "loss": 0.9768, "step": 9202 }, { - "epoch": 0.26115209988649263, + "epoch": 0.2607894811414322, "grad_norm": 0.0, - "learning_rate": 1.732649873431291e-05, - "loss": 0.9626, + "learning_rate": 1.7334263138072557e-05, + "loss": 0.9364, "step": 9203 }, { - "epoch": 0.2611804767309875, + "epoch": 0.2608178185836946, "grad_norm": 0.0, - "learning_rate": 1.7325873173568258e-05, - "loss": 0.8394, + "learning_rate": 1.7333639222527572e-05, + "loss": 1.0043, "step": 9204 }, { - "epoch": 0.2612088535754824, + "epoch": 0.2608461560259571, "grad_norm": 0.0, - "learning_rate": 1.7325247550941627e-05, - "loss": 1.0064, + "learning_rate": 1.7333015245208614e-05, + "loss": 1.0666, "step": 9205 }, { - "epoch": 0.2612372304199773, + "epoch": 0.26087449346821956, "grad_norm": 0.0, - "learning_rate": 1.7324621866438297e-05, - "loss": 0.9768, + "learning_rate": 1.7332391206120954e-05, + "loss": 0.9644, "step": 9206 }, { - "epoch": 0.2612656072644722, + "epoch": 0.260902830910482, "grad_norm": 0.0, - "learning_rate": 1.7323996120063554e-05, - "loss": 0.9986, + "learning_rate": 1.7331767105269833e-05, + "loss": 0.9835, "step": 9207 }, { - "epoch": 0.2612939841089671, + "epoch": 0.2609311683527445, "grad_norm": 0.0, - "learning_rate": 1.7323370311822693e-05, - "loss": 0.9078, + "learning_rate": 1.733114294266052e-05, + "loss": 0.9171, "step": 9208 }, { - "epoch": 0.26132236095346195, + "epoch": 0.26095950579500693, "grad_norm": 0.0, - "learning_rate": 1.732274444172099e-05, - "loss": 1.158, + "learning_rate": 1.7330518718298263e-05, + "loss": 1.0244, "step": 9209 }, { - "epoch": 0.26135073779795687, + "epoch": 0.2609878432372694, "grad_norm": 0.0, - "learning_rate": 1.7322118509763734e-05, - "loss": 1.0223, + "learning_rate": 1.7329894432188328e-05, + "loss": 1.0729, "step": 9210 }, { - "epoch": 0.2613791146424518, + "epoch": 0.26101618067953186, "grad_norm": 0.0, - "learning_rate": 1.7321492515956216e-05, - "loss": 0.9566, + "learning_rate": 1.7329270084335972e-05, + "loss": 0.7637, "step": 9211 }, { - "epoch": 0.26140749148694664, + "epoch": 0.2610445181217943, "grad_norm": 0.0, - "learning_rate": 1.732086646030372e-05, - "loss": 0.9299, + "learning_rate": 1.7328645674746448e-05, + "loss": 1.0047, "step": 9212 }, { - "epoch": 0.26143586833144156, + "epoch": 0.2610728555640568, "grad_norm": 0.0, - "learning_rate": 1.7320240342811537e-05, - "loss": 1.0069, + "learning_rate": 1.7328021203425023e-05, + "loss": 0.9533, "step": 9213 }, { - "epoch": 0.2614642451759364, + "epoch": 0.26110119300631923, "grad_norm": 0.0, - "learning_rate": 1.7319614163484956e-05, - "loss": 0.9531, + "learning_rate": 1.7327396670376954e-05, + "loss": 0.899, "step": 9214 }, { - "epoch": 0.26149262202043133, + "epoch": 0.2611295304485817, "grad_norm": 0.0, - "learning_rate": 1.7318987922329263e-05, - "loss": 0.9904, + "learning_rate": 1.73267720756075e-05, + "loss": 0.8906, "step": 9215 }, { - "epoch": 0.26152099886492625, + "epoch": 0.26115786789084416, "grad_norm": 0.0, - "learning_rate": 1.731836161934975e-05, - "loss": 1.0216, + "learning_rate": 1.7326147419121926e-05, + "loss": 1.07, "step": 9216 }, { - "epoch": 0.2615493757094211, + "epoch": 0.26118620533310666, "grad_norm": 0.0, - "learning_rate": 1.731773525455171e-05, - "loss": 0.9755, + "learning_rate": 1.732552270092549e-05, + "loss": 1.0435, "step": 9217 }, { - "epoch": 0.261577752553916, + "epoch": 0.2612145427753691, "grad_norm": 0.0, - "learning_rate": 1.731710882794043e-05, - "loss": 0.9045, + "learning_rate": 1.7324897921023456e-05, + "loss": 0.8745, "step": 9218 }, { - "epoch": 0.2616061293984109, + "epoch": 0.26124288021763153, "grad_norm": 0.0, - "learning_rate": 1.7316482339521204e-05, - "loss": 1.0804, + "learning_rate": 1.732427307942109e-05, + "loss": 0.9885, "step": 9219 }, { - "epoch": 0.2616345062429058, + "epoch": 0.26127121765989403, "grad_norm": 0.0, - "learning_rate": 1.7315855789299322e-05, - "loss": 0.9224, + "learning_rate": 1.732364817612365e-05, + "loss": 1.0806, "step": 9220 }, { - "epoch": 0.26166288308740066, + "epoch": 0.26129955510215647, "grad_norm": 0.0, - "learning_rate": 1.7315229177280075e-05, - "loss": 0.9694, + "learning_rate": 1.73230232111364e-05, + "loss": 0.9633, "step": 9221 }, { - "epoch": 0.26169125993189557, + "epoch": 0.26132789254441896, "grad_norm": 0.0, - "learning_rate": 1.7314602503468758e-05, - "loss": 0.8924, + "learning_rate": 1.732239818446461e-05, + "loss": 1.0033, "step": 9222 }, { - "epoch": 0.2617196367763905, + "epoch": 0.2613562299866814, "grad_norm": 0.0, - "learning_rate": 1.7313975767870666e-05, - "loss": 0.9369, + "learning_rate": 1.732177309611354e-05, + "loss": 0.9667, "step": 9223 }, { - "epoch": 0.26174801362088534, + "epoch": 0.26138456742894384, "grad_norm": 0.0, - "learning_rate": 1.7313348970491093e-05, - "loss": 0.9501, + "learning_rate": 1.7321147946088454e-05, + "loss": 0.9759, "step": 9224 }, { - "epoch": 0.26177639046538026, + "epoch": 0.26141290487120633, "grad_norm": 0.0, - "learning_rate": 1.7312722111335333e-05, - "loss": 0.998, + "learning_rate": 1.7320522734394623e-05, + "loss": 0.8677, "step": 9225 }, { - "epoch": 0.2618047673098751, + "epoch": 0.26144124231346877, "grad_norm": 0.0, - "learning_rate": 1.731209519040868e-05, - "loss": 0.9966, + "learning_rate": 1.7319897461037308e-05, + "loss": 0.9094, "step": 9226 }, { - "epoch": 0.26183314415437003, + "epoch": 0.26146957975573126, "grad_norm": 0.0, - "learning_rate": 1.731146820771643e-05, - "loss": 1.0186, + "learning_rate": 1.731927212602178e-05, + "loss": 0.9808, "step": 9227 }, { - "epoch": 0.26186152099886495, + "epoch": 0.2614979171979937, "grad_norm": 0.0, - "learning_rate": 1.731084116326388e-05, - "loss": 1.0185, + "learning_rate": 1.73186467293533e-05, + "loss": 0.9964, "step": 9228 }, { - "epoch": 0.2618898978433598, + "epoch": 0.2615262546402562, "grad_norm": 0.0, - "learning_rate": 1.7310214057056326e-05, - "loss": 0.9804, + "learning_rate": 1.731802127103715e-05, + "loss": 0.9719, "step": 9229 }, { - "epoch": 0.2619182746878547, + "epoch": 0.26155459208251863, "grad_norm": 0.0, - "learning_rate": 1.7309586889099063e-05, - "loss": 1.0028, + "learning_rate": 1.7317395751078583e-05, + "loss": 0.9334, "step": 9230 }, { - "epoch": 0.2619466515323496, + "epoch": 0.26158292952478107, "grad_norm": 0.0, - "learning_rate": 1.7308959659397393e-05, - "loss": 0.8694, + "learning_rate": 1.7316770169482878e-05, + "loss": 0.9676, "step": 9231 }, { - "epoch": 0.2619750283768445, + "epoch": 0.26161126696704357, "grad_norm": 0.0, - "learning_rate": 1.730833236795661e-05, - "loss": 1.002, + "learning_rate": 1.7316144526255297e-05, + "loss": 1.035, "step": 9232 }, { - "epoch": 0.2620034052213394, + "epoch": 0.261639604409306, "grad_norm": 0.0, - "learning_rate": 1.730770501478202e-05, - "loss": 0.8429, + "learning_rate": 1.7315518821401117e-05, + "loss": 1.0069, "step": 9233 }, { - "epoch": 0.2620317820658343, + "epoch": 0.2616679418515685, "grad_norm": 0.0, - "learning_rate": 1.730707759987891e-05, - "loss": 1.0479, + "learning_rate": 1.7314893054925604e-05, + "loss": 0.9232, "step": 9234 }, { - "epoch": 0.2620601589103292, + "epoch": 0.26169627929383094, "grad_norm": 0.0, - "learning_rate": 1.7306450123252592e-05, - "loss": 0.983, + "learning_rate": 1.731426722683403e-05, + "loss": 0.8618, "step": 9235 }, { - "epoch": 0.26208853575482405, + "epoch": 0.2617246167360934, "grad_norm": 0.0, - "learning_rate": 1.730582258490836e-05, - "loss": 1.0597, + "learning_rate": 1.7313641337131668e-05, + "loss": 0.9944, "step": 9236 }, { - "epoch": 0.26211691259931896, + "epoch": 0.26175295417835587, "grad_norm": 0.0, - "learning_rate": 1.7305194984851515e-05, - "loss": 1.02, + "learning_rate": 1.731301538582379e-05, + "loss": 1.0239, "step": 9237 }, { - "epoch": 0.2621452894438138, + "epoch": 0.2617812916206183, "grad_norm": 0.0, - "learning_rate": 1.7304567323087362e-05, - "loss": 1.0718, + "learning_rate": 1.7312389372915664e-05, + "loss": 1.0183, "step": 9238 }, { - "epoch": 0.26217366628830874, + "epoch": 0.2618096290628808, "grad_norm": 0.0, - "learning_rate": 1.73039395996212e-05, - "loss": 0.9708, + "learning_rate": 1.731176329841257e-05, + "loss": 0.9444, "step": 9239 }, { - "epoch": 0.26220204313280365, + "epoch": 0.26183796650514324, "grad_norm": 0.0, - "learning_rate": 1.730331181445833e-05, - "loss": 1.0368, + "learning_rate": 1.731113716231978e-05, + "loss": 0.9002, "step": 9240 }, { - "epoch": 0.2622304199772985, + "epoch": 0.26186630394740573, "grad_norm": 0.0, - "learning_rate": 1.7302683967604062e-05, - "loss": 0.9983, + "learning_rate": 1.7310510964642564e-05, + "loss": 0.961, "step": 9241 }, { - "epoch": 0.2622587968217934, + "epoch": 0.26189464138966817, "grad_norm": 0.0, - "learning_rate": 1.730205605906369e-05, - "loss": 0.8524, + "learning_rate": 1.73098847053862e-05, + "loss": 0.8992, "step": 9242 }, { - "epoch": 0.2622871736662883, + "epoch": 0.2619229788319306, "grad_norm": 0.0, - "learning_rate": 1.730142808884252e-05, - "loss": 0.9476, + "learning_rate": 1.7309258384555962e-05, + "loss": 1.019, "step": 9243 }, { - "epoch": 0.2623155505107832, + "epoch": 0.2619513162741931, "grad_norm": 0.0, - "learning_rate": 1.7300800056945865e-05, - "loss": 1.0072, + "learning_rate": 1.730863200215713e-05, + "loss": 0.9866, "step": 9244 }, { - "epoch": 0.2623439273552781, + "epoch": 0.26197965371645554, "grad_norm": 0.0, - "learning_rate": 1.730017196337902e-05, - "loss": 1.0895, + "learning_rate": 1.7308005558194974e-05, + "loss": 0.9746, "step": 9245 }, { - "epoch": 0.262372304199773, + "epoch": 0.26200799115871803, "grad_norm": 0.0, - "learning_rate": 1.729954380814729e-05, - "loss": 1.0362, + "learning_rate": 1.7307379052674772e-05, + "loss": 1.0692, "step": 9246 }, { - "epoch": 0.2624006810442679, + "epoch": 0.2620363286009805, "grad_norm": 0.0, - "learning_rate": 1.7298915591255993e-05, - "loss": 0.9064, + "learning_rate": 1.7306752485601807e-05, + "loss": 0.9337, "step": 9247 }, { - "epoch": 0.26242905788876275, + "epoch": 0.2620646660432429, "grad_norm": 0.0, - "learning_rate": 1.7298287312710423e-05, - "loss": 0.9333, + "learning_rate": 1.7306125856981348e-05, + "loss": 0.9077, "step": 9248 }, { - "epoch": 0.26245743473325767, + "epoch": 0.2620930034855054, "grad_norm": 0.0, - "learning_rate": 1.7297658972515895e-05, - "loss": 1.1161, + "learning_rate": 1.730549916681868e-05, + "loss": 1.0121, "step": 9249 }, { - "epoch": 0.2624858115777526, + "epoch": 0.26212134092776784, "grad_norm": 0.0, - "learning_rate": 1.729703057067771e-05, - "loss": 0.9807, + "learning_rate": 1.7304872415119078e-05, + "loss": 0.9665, "step": 9250 }, { - "epoch": 0.26251418842224744, + "epoch": 0.26214967837003034, "grad_norm": 0.0, - "learning_rate": 1.7296402107201182e-05, - "loss": 0.9396, + "learning_rate": 1.7304245601887825e-05, + "loss": 0.9115, "step": 9251 }, { - "epoch": 0.26254256526674236, + "epoch": 0.2621780158122928, "grad_norm": 0.0, - "learning_rate": 1.729577358209162e-05, - "loss": 1.0162, + "learning_rate": 1.73036187271302e-05, + "loss": 1.0773, "step": 9252 }, { - "epoch": 0.2625709421112372, + "epoch": 0.26220635325455527, "grad_norm": 0.0, - "learning_rate": 1.729514499535433e-05, - "loss": 0.8221, + "learning_rate": 1.7302991790851477e-05, + "loss": 0.9559, "step": 9253 }, { - "epoch": 0.26259931895573213, + "epoch": 0.2622346906968177, "grad_norm": 0.0, - "learning_rate": 1.7294516346994615e-05, - "loss": 0.9553, + "learning_rate": 1.730236479305695e-05, + "loss": 0.9652, "step": 9254 }, { - "epoch": 0.262627695800227, + "epoch": 0.26226302813908015, "grad_norm": 0.0, - "learning_rate": 1.7293887637017798e-05, - "loss": 0.8331, + "learning_rate": 1.7301737733751888e-05, + "loss": 1.0104, "step": 9255 }, { - "epoch": 0.2626560726447219, + "epoch": 0.26229136558134264, "grad_norm": 0.0, - "learning_rate": 1.7293258865429186e-05, - "loss": 0.9341, + "learning_rate": 1.730111061294158e-05, + "loss": 0.8769, "step": 9256 }, { - "epoch": 0.2626844494892168, + "epoch": 0.2623197030236051, "grad_norm": 0.0, - "learning_rate": 1.7292630032234086e-05, - "loss": 0.8464, + "learning_rate": 1.73004834306313e-05, + "loss": 0.9313, "step": 9257 }, { - "epoch": 0.2627128263337117, + "epoch": 0.26234804046586757, "grad_norm": 0.0, - "learning_rate": 1.7292001137437814e-05, - "loss": 0.9588, + "learning_rate": 1.7299856186826344e-05, + "loss": 1.0465, "step": 9258 }, { - "epoch": 0.2627412031782066, + "epoch": 0.26237637790813, "grad_norm": 0.0, - "learning_rate": 1.7291372181045678e-05, - "loss": 0.8554, + "learning_rate": 1.7299228881531984e-05, + "loss": 0.973, "step": 9259 }, { - "epoch": 0.26276958002270145, + "epoch": 0.26240471535039245, "grad_norm": 0.0, - "learning_rate": 1.7290743163062995e-05, - "loss": 0.9484, + "learning_rate": 1.729860151475351e-05, + "loss": 0.9476, "step": 9260 }, { - "epoch": 0.26279795686719637, + "epoch": 0.26243305279265494, "grad_norm": 0.0, - "learning_rate": 1.7290114083495074e-05, - "loss": 0.8868, + "learning_rate": 1.7297974086496204e-05, + "loss": 0.9858, "step": 9261 }, { - "epoch": 0.2628263337116913, + "epoch": 0.2624613902349174, "grad_norm": 0.0, - "learning_rate": 1.7289484942347238e-05, - "loss": 1.0292, + "learning_rate": 1.7297346596765357e-05, + "loss": 1.0053, "step": 9262 }, { - "epoch": 0.26285471055618614, + "epoch": 0.2624897276771799, "grad_norm": 0.0, - "learning_rate": 1.728885573962479e-05, - "loss": 1.0001, + "learning_rate": 1.7296719045566244e-05, + "loss": 1.0378, "step": 9263 }, { - "epoch": 0.26288308740068106, + "epoch": 0.2625180651194423, "grad_norm": 0.0, - "learning_rate": 1.728822647533305e-05, - "loss": 0.9054, + "learning_rate": 1.7296091432904164e-05, + "loss": 1.0253, "step": 9264 }, { - "epoch": 0.2629114642451759, + "epoch": 0.2625464025617048, "grad_norm": 0.0, - "learning_rate": 1.7287597149477336e-05, - "loss": 1.0363, + "learning_rate": 1.7295463758784392e-05, + "loss": 0.9129, "step": 9265 }, { - "epoch": 0.26293984108967083, + "epoch": 0.26257474000396724, "grad_norm": 0.0, - "learning_rate": 1.7286967762062958e-05, - "loss": 1.0357, + "learning_rate": 1.729483602321222e-05, + "loss": 1.0601, "step": 9266 }, { - "epoch": 0.2629682179341657, + "epoch": 0.2626030774462297, "grad_norm": 0.0, - "learning_rate": 1.7286338313095237e-05, - "loss": 0.9485, + "learning_rate": 1.7294208226192935e-05, + "loss": 0.8058, "step": 9267 }, { - "epoch": 0.2629965947786606, + "epoch": 0.2626314148884922, "grad_norm": 0.0, - "learning_rate": 1.728570880257949e-05, - "loss": 0.961, + "learning_rate": 1.7293580367731824e-05, + "loss": 0.988, "step": 9268 }, { - "epoch": 0.2630249716231555, + "epoch": 0.2626597523307546, "grad_norm": 0.0, - "learning_rate": 1.7285079230521034e-05, - "loss": 0.905, + "learning_rate": 1.729295244783418e-05, + "loss": 0.9088, "step": 9269 }, { - "epoch": 0.2630533484676504, + "epoch": 0.2626880897730171, "grad_norm": 0.0, - "learning_rate": 1.7284449596925183e-05, - "loss": 0.9652, + "learning_rate": 1.729232446650529e-05, + "loss": 0.9603, "step": 9270 }, { - "epoch": 0.2630817253121453, + "epoch": 0.26271642721527955, "grad_norm": 0.0, - "learning_rate": 1.728381990179726e-05, - "loss": 0.8705, + "learning_rate": 1.729169642375044e-05, + "loss": 0.9191, "step": 9271 }, { - "epoch": 0.26311010215664016, + "epoch": 0.262744764657542, "grad_norm": 0.0, - "learning_rate": 1.7283190145142582e-05, - "loss": 1.0064, + "learning_rate": 1.7291068319574923e-05, + "loss": 1.0137, "step": 9272 }, { - "epoch": 0.2631384790011351, + "epoch": 0.2627731020998045, "grad_norm": 0.0, - "learning_rate": 1.7282560326966475e-05, - "loss": 0.9745, + "learning_rate": 1.7290440153984033e-05, + "loss": 0.9611, "step": 9273 }, { - "epoch": 0.26316685584563, + "epoch": 0.2628014395420669, "grad_norm": 0.0, - "learning_rate": 1.7281930447274247e-05, - "loss": 0.9886, + "learning_rate": 1.7289811926983054e-05, + "loss": 1.0367, "step": 9274 }, { - "epoch": 0.26319523269012485, + "epoch": 0.2628297769843294, "grad_norm": 0.0, - "learning_rate": 1.728130050607123e-05, - "loss": 0.8678, + "learning_rate": 1.7289183638577286e-05, + "loss": 0.9269, "step": 9275 }, { - "epoch": 0.26322360953461976, + "epoch": 0.26285811442659185, "grad_norm": 0.0, - "learning_rate": 1.7280670503362732e-05, - "loss": 0.995, + "learning_rate": 1.728855528877202e-05, + "loss": 0.9821, "step": 9276 }, { - "epoch": 0.2632519863791146, + "epoch": 0.26288645186885434, "grad_norm": 0.0, - "learning_rate": 1.728004043915409e-05, - "loss": 0.9945, + "learning_rate": 1.7287926877572543e-05, + "loss": 1.0603, "step": 9277 }, { - "epoch": 0.26328036322360954, + "epoch": 0.2629147893111168, "grad_norm": 0.0, - "learning_rate": 1.727941031345062e-05, - "loss": 1.0094, + "learning_rate": 1.728729840498415e-05, + "loss": 0.9521, "step": 9278 }, { - "epoch": 0.26330874006810445, + "epoch": 0.2629431267533792, "grad_norm": 0.0, - "learning_rate": 1.727878012625764e-05, - "loss": 1.0638, + "learning_rate": 1.728666987101214e-05, + "loss": 0.9852, "step": 9279 }, { - "epoch": 0.2633371169125993, + "epoch": 0.2629714641956417, "grad_norm": 0.0, - "learning_rate": 1.727814987758048e-05, - "loss": 0.9515, + "learning_rate": 1.7286041275661796e-05, + "loss": 0.9367, "step": 9280 }, { - "epoch": 0.2633654937570942, + "epoch": 0.26299980163790415, "grad_norm": 0.0, - "learning_rate": 1.7277519567424458e-05, - "loss": 0.9443, + "learning_rate": 1.728541261893843e-05, + "loss": 0.9519, "step": 9281 }, { - "epoch": 0.2633938706015891, + "epoch": 0.26302813908016665, "grad_norm": 0.0, - "learning_rate": 1.7276889195794905e-05, - "loss": 1.0428, + "learning_rate": 1.7284783900847327e-05, + "loss": 1.0379, "step": 9282 }, { - "epoch": 0.263422247446084, + "epoch": 0.2630564765224291, "grad_norm": 0.0, - "learning_rate": 1.727625876269714e-05, - "loss": 0.9746, + "learning_rate": 1.728415512139378e-05, + "loss": 1.0128, "step": 9283 }, { - "epoch": 0.26345062429057886, + "epoch": 0.2630848139646915, "grad_norm": 0.0, - "learning_rate": 1.7275628268136487e-05, - "loss": 1.0823, + "learning_rate": 1.7283526280583092e-05, + "loss": 0.9529, "step": 9284 }, { - "epoch": 0.2634790011350738, + "epoch": 0.263113151406954, "grad_norm": 0.0, - "learning_rate": 1.7274997712118276e-05, - "loss": 0.9008, + "learning_rate": 1.7282897378420557e-05, + "loss": 0.974, "step": 9285 }, { - "epoch": 0.2635073779795687, + "epoch": 0.26314148884921645, "grad_norm": 0.0, - "learning_rate": 1.7274367094647836e-05, - "loss": 1.0526, + "learning_rate": 1.728226841491147e-05, + "loss": 1.0457, "step": 9286 }, { - "epoch": 0.26353575482406355, + "epoch": 0.26316982629147895, "grad_norm": 0.0, - "learning_rate": 1.7273736415730488e-05, - "loss": 1.0591, + "learning_rate": 1.7281639390061136e-05, + "loss": 0.9419, "step": 9287 }, { - "epoch": 0.26356413166855847, + "epoch": 0.2631981637337414, "grad_norm": 0.0, - "learning_rate": 1.7273105675371562e-05, - "loss": 0.9861, + "learning_rate": 1.728101030387485e-05, + "loss": 0.918, "step": 9288 }, { - "epoch": 0.2635925085130533, + "epoch": 0.2632265011760039, "grad_norm": 0.0, - "learning_rate": 1.7272474873576385e-05, - "loss": 0.9512, + "learning_rate": 1.7280381156357907e-05, + "loss": 0.9347, "step": 9289 }, { - "epoch": 0.26362088535754824, + "epoch": 0.2632548386182663, "grad_norm": 0.0, - "learning_rate": 1.727184401035029e-05, - "loss": 1.0558, + "learning_rate": 1.727975194751561e-05, + "loss": 0.9269, "step": 9290 }, { - "epoch": 0.26364926220204316, + "epoch": 0.26328317606052876, "grad_norm": 0.0, - "learning_rate": 1.7271213085698593e-05, - "loss": 0.9839, + "learning_rate": 1.7279122677353263e-05, + "loss": 0.9424, "step": 9291 }, { - "epoch": 0.263677639046538, + "epoch": 0.26331151350279125, "grad_norm": 0.0, - "learning_rate": 1.7270582099626637e-05, - "loss": 0.9518, + "learning_rate": 1.7278493345876158e-05, + "loss": 1.0365, "step": 9292 }, { - "epoch": 0.26370601589103293, + "epoch": 0.2633398509450537, "grad_norm": 0.0, - "learning_rate": 1.7269951052139745e-05, - "loss": 0.9352, + "learning_rate": 1.7277863953089605e-05, + "loss": 0.9938, "step": 9293 }, { - "epoch": 0.2637343927355278, + "epoch": 0.2633681883873162, "grad_norm": 0.0, - "learning_rate": 1.7269319943243252e-05, - "loss": 0.9463, + "learning_rate": 1.7277234498998897e-05, + "loss": 1.0443, "step": 9294 }, { - "epoch": 0.2637627695800227, + "epoch": 0.2633965258295786, "grad_norm": 0.0, - "learning_rate": 1.7268688772942487e-05, - "loss": 1.0039, + "learning_rate": 1.7276604983609344e-05, + "loss": 0.9567, "step": 9295 }, { - "epoch": 0.2637911464245176, + "epoch": 0.26342486327184106, "grad_norm": 0.0, - "learning_rate": 1.7268057541242778e-05, - "loss": 0.9784, + "learning_rate": 1.7275975406926243e-05, + "loss": 0.9076, "step": 9296 }, { - "epoch": 0.2638195232690125, + "epoch": 0.26345320071410355, "grad_norm": 0.0, - "learning_rate": 1.7267426248149467e-05, - "loss": 0.9904, + "learning_rate": 1.72753457689549e-05, + "loss": 1.0137, "step": 9297 }, { - "epoch": 0.2638479001135074, + "epoch": 0.263481538156366, "grad_norm": 0.0, - "learning_rate": 1.7266794893667877e-05, - "loss": 0.9969, + "learning_rate": 1.727471606970062e-05, + "loss": 1.003, "step": 9298 }, { - "epoch": 0.26387627695800225, + "epoch": 0.2635098755986285, "grad_norm": 0.0, - "learning_rate": 1.726616347780334e-05, - "loss": 0.9223, + "learning_rate": 1.7274086309168702e-05, + "loss": 0.995, "step": 9299 }, { - "epoch": 0.26390465380249717, + "epoch": 0.2635382130408909, "grad_norm": 0.0, - "learning_rate": 1.7265532000561193e-05, - "loss": 1.0209, + "learning_rate": 1.7273456487364458e-05, + "loss": 1.1048, "step": 9300 }, { - "epoch": 0.26393303064699203, + "epoch": 0.2635665504831534, "grad_norm": 0.0, - "learning_rate": 1.7264900461946777e-05, - "loss": 1.0285, + "learning_rate": 1.7272826604293182e-05, + "loss": 0.7808, "step": 9301 }, { - "epoch": 0.26396140749148694, + "epoch": 0.26359488792541586, "grad_norm": 0.0, - "learning_rate": 1.7264268861965416e-05, - "loss": 0.916, + "learning_rate": 1.727219665996019e-05, + "loss": 1.0707, "step": 9302 }, { - "epoch": 0.26398978433598186, + "epoch": 0.2636232253676783, "grad_norm": 0.0, - "learning_rate": 1.7263637200622447e-05, - "loss": 1.0321, + "learning_rate": 1.727156665437079e-05, + "loss": 1.0773, "step": 9303 }, { - "epoch": 0.2640181611804767, + "epoch": 0.2636515628099408, "grad_norm": 0.0, - "learning_rate": 1.726300547792321e-05, - "loss": 0.9254, + "learning_rate": 1.7270936587530278e-05, + "loss": 1.0435, "step": 9304 }, { - "epoch": 0.26404653802497163, + "epoch": 0.2636799002522032, "grad_norm": 0.0, - "learning_rate": 1.726237369387304e-05, - "loss": 1.0241, + "learning_rate": 1.7270306459443972e-05, + "loss": 0.923, "step": 9305 }, { - "epoch": 0.2640749148694665, + "epoch": 0.2637082376944657, "grad_norm": 0.0, - "learning_rate": 1.7261741848477277e-05, - "loss": 1.0356, + "learning_rate": 1.726967627011717e-05, + "loss": 1.0712, "step": 9306 }, { - "epoch": 0.2641032917139614, + "epoch": 0.26373657513672816, "grad_norm": 0.0, - "learning_rate": 1.726110994174125e-05, - "loss": 0.8735, + "learning_rate": 1.7269046019555188e-05, + "loss": 1.0206, "step": 9307 }, { - "epoch": 0.2641316685584563, + "epoch": 0.2637649125789906, "grad_norm": 0.0, - "learning_rate": 1.72604779736703e-05, - "loss": 0.8722, + "learning_rate": 1.726841570776333e-05, + "loss": 0.9612, "step": 9308 }, { - "epoch": 0.2641600454029512, + "epoch": 0.2637932500212531, "grad_norm": 0.0, - "learning_rate": 1.7259845944269765e-05, - "loss": 1.0392, + "learning_rate": 1.726778533474691e-05, + "loss": 1.0316, "step": 9309 }, { - "epoch": 0.2641884222474461, + "epoch": 0.26382158746351553, "grad_norm": 0.0, - "learning_rate": 1.7259213853544992e-05, - "loss": 0.989, + "learning_rate": 1.7267154900511233e-05, + "loss": 0.9657, "step": 9310 }, { - "epoch": 0.26421679909194096, + "epoch": 0.263849924905778, "grad_norm": 0.0, - "learning_rate": 1.7258581701501305e-05, - "loss": 0.9407, + "learning_rate": 1.726652440506161e-05, + "loss": 0.9633, "step": 9311 }, { - "epoch": 0.26424517593643587, + "epoch": 0.26387826234804046, "grad_norm": 0.0, - "learning_rate": 1.7257949488144058e-05, - "loss": 0.8298, + "learning_rate": 1.726589384840336e-05, + "loss": 0.9995, "step": 9312 }, { - "epoch": 0.2642735527809308, + "epoch": 0.26390659979030295, "grad_norm": 0.0, - "learning_rate": 1.7257317213478583e-05, - "loss": 0.9978, + "learning_rate": 1.7265263230541783e-05, + "loss": 1.0277, "step": 9313 }, { - "epoch": 0.26430192962542565, + "epoch": 0.2639349372325654, "grad_norm": 0.0, - "learning_rate": 1.7256684877510223e-05, - "loss": 1.0486, + "learning_rate": 1.7264632551482198e-05, + "loss": 0.9752, "step": 9314 }, { - "epoch": 0.26433030646992056, + "epoch": 0.26396327467482783, "grad_norm": 0.0, - "learning_rate": 1.725605248024432e-05, - "loss": 0.9142, + "learning_rate": 1.7264001811229917e-05, + "loss": 1.0762, "step": 9315 }, { - "epoch": 0.2643586833144154, + "epoch": 0.2639916121170903, "grad_norm": 0.0, - "learning_rate": 1.7255420021686214e-05, - "loss": 0.9787, + "learning_rate": 1.726337100979025e-05, + "loss": 1.0334, "step": 9316 }, { - "epoch": 0.26438706015891034, + "epoch": 0.26401994955935276, "grad_norm": 0.0, - "learning_rate": 1.7254787501841252e-05, - "loss": 0.9949, + "learning_rate": 1.7262740147168508e-05, + "loss": 0.9027, "step": 9317 }, { - "epoch": 0.2644154370034052, + "epoch": 0.26404828700161526, "grad_norm": 0.0, - "learning_rate": 1.725415492071477e-05, - "loss": 0.9019, + "learning_rate": 1.726210922337001e-05, + "loss": 0.8699, "step": 9318 }, { - "epoch": 0.2644438138479001, + "epoch": 0.2640766244438777, "grad_norm": 0.0, - "learning_rate": 1.725352227831212e-05, - "loss": 0.8635, + "learning_rate": 1.726147823840007e-05, + "loss": 0.9842, "step": 9319 }, { - "epoch": 0.264472190692395, + "epoch": 0.26410496188614013, "grad_norm": 0.0, - "learning_rate": 1.725288957463864e-05, - "loss": 1.01, + "learning_rate": 1.7260847192264005e-05, + "loss": 0.9851, "step": 9320 }, { - "epoch": 0.2645005675368899, + "epoch": 0.2641332993284026, "grad_norm": 0.0, - "learning_rate": 1.7252256809699672e-05, - "loss": 0.954, + "learning_rate": 1.7260216084967127e-05, + "loss": 0.9522, "step": 9321 }, { - "epoch": 0.2645289443813848, + "epoch": 0.26416163677066506, "grad_norm": 0.0, - "learning_rate": 1.7251623983500566e-05, - "loss": 1.0459, + "learning_rate": 1.725958491651475e-05, + "loss": 0.8544, "step": 9322 }, { - "epoch": 0.26455732122587966, + "epoch": 0.26418997421292756, "grad_norm": 0.0, - "learning_rate": 1.725099109604667e-05, - "loss": 1.0792, + "learning_rate": 1.7258953686912197e-05, + "loss": 0.979, "step": 9323 }, { - "epoch": 0.2645856980703746, + "epoch": 0.26421831165519, "grad_norm": 0.0, - "learning_rate": 1.725035814734332e-05, - "loss": 0.9249, + "learning_rate": 1.725832239616478e-05, + "loss": 1.0422, "step": 9324 }, { - "epoch": 0.2646140749148695, + "epoch": 0.2642466490974525, "grad_norm": 0.0, - "learning_rate": 1.7249725137395876e-05, - "loss": 0.8573, + "learning_rate": 1.725769104427782e-05, + "loss": 0.9971, "step": 9325 }, { - "epoch": 0.26464245175936435, + "epoch": 0.26427498653971493, "grad_norm": 0.0, - "learning_rate": 1.7249092066209674e-05, - "loss": 0.9313, + "learning_rate": 1.725705963125663e-05, + "loss": 0.9796, "step": 9326 }, { - "epoch": 0.26467082860385927, + "epoch": 0.26430332398197737, "grad_norm": 0.0, - "learning_rate": 1.7248458933790068e-05, - "loss": 1.0219, + "learning_rate": 1.7256428157106532e-05, + "loss": 0.9632, "step": 9327 }, { - "epoch": 0.2646992054483541, + "epoch": 0.26433166142423986, "grad_norm": 0.0, - "learning_rate": 1.72478257401424e-05, - "loss": 0.9731, + "learning_rate": 1.7255796621832844e-05, + "loss": 0.9911, "step": 9328 }, { - "epoch": 0.26472758229284904, + "epoch": 0.2643599988665023, "grad_norm": 0.0, - "learning_rate": 1.724719248527202e-05, - "loss": 0.993, + "learning_rate": 1.7255165025440893e-05, + "loss": 0.9072, "step": 9329 }, { - "epoch": 0.26475595913734395, + "epoch": 0.2643883363087648, "grad_norm": 0.0, - "learning_rate": 1.7246559169184284e-05, - "loss": 0.9265, + "learning_rate": 1.7254533367935987e-05, + "loss": 0.9865, "step": 9330 }, { - "epoch": 0.2647843359818388, + "epoch": 0.26441667375102723, "grad_norm": 0.0, - "learning_rate": 1.7245925791884538e-05, - "loss": 0.9473, + "learning_rate": 1.7253901649323454e-05, + "loss": 0.9721, "step": 9331 }, { - "epoch": 0.26481271282633373, + "epoch": 0.26444501119328967, "grad_norm": 0.0, - "learning_rate": 1.7245292353378128e-05, - "loss": 0.8876, + "learning_rate": 1.7253269869608616e-05, + "loss": 1.0095, "step": 9332 }, { - "epoch": 0.2648410896708286, + "epoch": 0.26447334863555216, "grad_norm": 0.0, - "learning_rate": 1.724465885367041e-05, - "loss": 1.0649, + "learning_rate": 1.7252638028796788e-05, + "loss": 0.9896, "step": 9333 }, { - "epoch": 0.2648694665153235, + "epoch": 0.2645016860778146, "grad_norm": 0.0, - "learning_rate": 1.724402529276673e-05, - "loss": 0.9272, + "learning_rate": 1.72520061268933e-05, + "loss": 0.8992, "step": 9334 }, { - "epoch": 0.26489784335981836, + "epoch": 0.2645300235200771, "grad_norm": 0.0, - "learning_rate": 1.7243391670672445e-05, - "loss": 1.131, + "learning_rate": 1.725137416390347e-05, + "loss": 0.9673, "step": 9335 }, { - "epoch": 0.2649262202043133, + "epoch": 0.26455836096233953, "grad_norm": 0.0, - "learning_rate": 1.7242757987392903e-05, - "loss": 1.0486, + "learning_rate": 1.7250742139832623e-05, + "loss": 1.0047, "step": 9336 }, { - "epoch": 0.2649545970488082, + "epoch": 0.264586698404602, "grad_norm": 0.0, - "learning_rate": 1.7242124242933462e-05, - "loss": 0.9348, + "learning_rate": 1.7250110054686084e-05, + "loss": 0.9633, "step": 9337 }, { - "epoch": 0.26498297389330305, + "epoch": 0.26461503584686447, "grad_norm": 0.0, - "learning_rate": 1.724149043729947e-05, - "loss": 0.849, + "learning_rate": 1.7249477908469174e-05, + "loss": 1.048, "step": 9338 }, { - "epoch": 0.26501135073779797, + "epoch": 0.2646433732891269, "grad_norm": 0.0, - "learning_rate": 1.7240856570496283e-05, - "loss": 1.0269, + "learning_rate": 1.724884570118722e-05, + "loss": 0.9617, "step": 9339 }, { - "epoch": 0.26503972758229283, + "epoch": 0.2646717107313894, "grad_norm": 0.0, - "learning_rate": 1.724022264252925e-05, - "loss": 1.0272, + "learning_rate": 1.7248213432845546e-05, + "loss": 1.0076, "step": 9340 }, { - "epoch": 0.26506810442678774, + "epoch": 0.26470004817365184, "grad_norm": 0.0, - "learning_rate": 1.7239588653403737e-05, - "loss": 1.0032, + "learning_rate": 1.724758110344948e-05, + "loss": 0.8993, "step": 9341 }, { - "epoch": 0.26509648127128266, + "epoch": 0.26472838561591433, "grad_norm": 0.0, - "learning_rate": 1.7238954603125093e-05, - "loss": 0.9149, + "learning_rate": 1.7246948713004346e-05, + "loss": 0.9516, "step": 9342 }, { - "epoch": 0.2651248581157775, + "epoch": 0.26475672305817677, "grad_norm": 0.0, - "learning_rate": 1.7238320491698674e-05, - "loss": 1.0194, + "learning_rate": 1.7246316261515475e-05, + "loss": 0.926, "step": 9343 }, { - "epoch": 0.26515323496027243, + "epoch": 0.2647850605004392, "grad_norm": 0.0, - "learning_rate": 1.7237686319129835e-05, - "loss": 0.9307, + "learning_rate": 1.724568374898819e-05, + "loss": 0.8513, "step": 9344 }, { - "epoch": 0.2651816118047673, + "epoch": 0.2648133979427017, "grad_norm": 0.0, - "learning_rate": 1.7237052085423937e-05, - "loss": 0.9677, + "learning_rate": 1.7245051175427816e-05, + "loss": 1.0131, "step": 9345 }, { - "epoch": 0.2652099886492622, + "epoch": 0.26484173538496414, "grad_norm": 0.0, - "learning_rate": 1.723641779058633e-05, - "loss": 0.9522, + "learning_rate": 1.7244418540839688e-05, + "loss": 0.9584, "step": 9346 }, { - "epoch": 0.26523836549375707, + "epoch": 0.26487007282722663, "grad_norm": 0.0, - "learning_rate": 1.723578343462238e-05, - "loss": 0.8301, + "learning_rate": 1.7243785845229134e-05, + "loss": 0.8795, "step": 9347 }, { - "epoch": 0.265266742338252, + "epoch": 0.26489841026948907, "grad_norm": 0.0, - "learning_rate": 1.723514901753744e-05, - "loss": 0.9649, + "learning_rate": 1.7243153088601482e-05, + "loss": 1.0121, "step": 9348 }, { - "epoch": 0.2652951191827469, + "epoch": 0.26492674771175156, "grad_norm": 0.0, - "learning_rate": 1.723451453933687e-05, - "loss": 0.9919, + "learning_rate": 1.724252027096206e-05, + "loss": 1.0938, "step": 9349 }, { - "epoch": 0.26532349602724176, + "epoch": 0.264955085154014, "grad_norm": 0.0, - "learning_rate": 1.7233880000026034e-05, - "loss": 1.0782, + "learning_rate": 1.7241887392316204e-05, + "loss": 0.9519, "step": 9350 }, { - "epoch": 0.26535187287173667, + "epoch": 0.26498342259627644, "grad_norm": 0.0, - "learning_rate": 1.723324539961029e-05, - "loss": 1.0051, + "learning_rate": 1.7241254452669235e-05, + "loss": 0.9717, "step": 9351 }, { - "epoch": 0.26538024971623153, + "epoch": 0.26501176003853893, "grad_norm": 0.0, - "learning_rate": 1.7232610738094992e-05, - "loss": 0.9897, + "learning_rate": 1.7240621452026494e-05, + "loss": 1.0188, "step": 9352 }, { - "epoch": 0.26540862656072645, + "epoch": 0.2650400974808014, "grad_norm": 0.0, - "learning_rate": 1.7231976015485505e-05, - "loss": 1.0836, + "learning_rate": 1.723998839039331e-05, + "loss": 1.0629, "step": 9353 }, { - "epoch": 0.26543700340522136, + "epoch": 0.26506843492306387, "grad_norm": 0.0, - "learning_rate": 1.72313412317872e-05, - "loss": 0.9338, + "learning_rate": 1.723935526777502e-05, + "loss": 0.9527, "step": 9354 }, { - "epoch": 0.2654653802497162, + "epoch": 0.2650967723653263, "grad_norm": 0.0, - "learning_rate": 1.723070638700542e-05, - "loss": 0.9218, + "learning_rate": 1.7238722084176943e-05, + "loss": 0.9781, "step": 9355 }, { - "epoch": 0.26549375709421114, + "epoch": 0.26512510980758874, "grad_norm": 0.0, - "learning_rate": 1.7230071481145546e-05, - "loss": 1.0128, + "learning_rate": 1.723808883960443e-05, + "loss": 1.1164, "step": 9356 }, { - "epoch": 0.265522133938706, + "epoch": 0.26515344724985124, "grad_norm": 0.0, - "learning_rate": 1.7229436514212926e-05, - "loss": 1.0416, + "learning_rate": 1.72374555340628e-05, + "loss": 0.9905, "step": 9357 }, { - "epoch": 0.2655505107832009, + "epoch": 0.2651817846921137, "grad_norm": 0.0, - "learning_rate": 1.7228801486212937e-05, - "loss": 0.9667, + "learning_rate": 1.72368221675574e-05, + "loss": 0.8927, "step": 9358 }, { - "epoch": 0.2655788876276958, + "epoch": 0.26521012213437617, "grad_norm": 0.0, - "learning_rate": 1.7228166397150932e-05, - "loss": 0.946, + "learning_rate": 1.7236188740093557e-05, + "loss": 0.9353, "step": 9359 }, { - "epoch": 0.2656072644721907, + "epoch": 0.2652384595766386, "grad_norm": 0.0, - "learning_rate": 1.7227531247032286e-05, - "loss": 0.9508, + "learning_rate": 1.7235555251676608e-05, + "loss": 0.9792, "step": 9360 }, { - "epoch": 0.2656356413166856, + "epoch": 0.2652667970189011, "grad_norm": 0.0, - "learning_rate": 1.7226896035862357e-05, - "loss": 1.0574, + "learning_rate": 1.723492170231189e-05, + "loss": 0.868, "step": 9361 }, { - "epoch": 0.26566401816118046, + "epoch": 0.26529513446116354, "grad_norm": 0.0, - "learning_rate": 1.7226260763646513e-05, - "loss": 0.8796, + "learning_rate": 1.7234288092004745e-05, + "loss": 1.0407, "step": 9362 }, { - "epoch": 0.2656923950056754, + "epoch": 0.265323471903426, "grad_norm": 0.0, - "learning_rate": 1.7225625430390117e-05, - "loss": 0.9946, + "learning_rate": 1.72336544207605e-05, + "loss": 1.0099, "step": 9363 }, { - "epoch": 0.26572077185017023, + "epoch": 0.26535180934568847, "grad_norm": 0.0, - "learning_rate": 1.7224990036098536e-05, - "loss": 0.829, + "learning_rate": 1.72330206885845e-05, + "loss": 0.9926, "step": 9364 }, { - "epoch": 0.26574914869466515, + "epoch": 0.2653801467879509, "grad_norm": 0.0, - "learning_rate": 1.7224354580777144e-05, - "loss": 0.9834, + "learning_rate": 1.723238689548208e-05, + "loss": 0.966, "step": 9365 }, { - "epoch": 0.26577752553916006, + "epoch": 0.2654084842302134, "grad_norm": 0.0, - "learning_rate": 1.7223719064431304e-05, - "loss": 0.9991, + "learning_rate": 1.7231753041458578e-05, + "loss": 0.9475, "step": 9366 }, { - "epoch": 0.2658059023836549, + "epoch": 0.26543682167247584, "grad_norm": 0.0, - "learning_rate": 1.7223083487066382e-05, - "loss": 1.0188, + "learning_rate": 1.7231119126519336e-05, + "loss": 0.9387, "step": 9367 }, { - "epoch": 0.26583427922814984, + "epoch": 0.2654651591147383, "grad_norm": 0.0, - "learning_rate": 1.722244784868775e-05, - "loss": 0.9554, + "learning_rate": 1.7230485150669695e-05, + "loss": 1.0368, "step": 9368 }, { - "epoch": 0.2658626560726447, + "epoch": 0.2654934965570008, "grad_norm": 0.0, - "learning_rate": 1.7221812149300773e-05, - "loss": 0.9552, + "learning_rate": 1.722985111391499e-05, + "loss": 0.9939, "step": 9369 }, { - "epoch": 0.2658910329171396, + "epoch": 0.2655218339992632, "grad_norm": 0.0, - "learning_rate": 1.722117638891083e-05, - "loss": 0.9896, + "learning_rate": 1.7229217016260563e-05, + "loss": 0.9312, "step": 9370 }, { - "epoch": 0.26591940976163453, + "epoch": 0.2655501714415257, "grad_norm": 0.0, - "learning_rate": 1.7220540567523284e-05, - "loss": 0.9965, + "learning_rate": 1.7228582857711756e-05, + "loss": 1.0098, "step": 9371 }, { - "epoch": 0.2659477866061294, + "epoch": 0.26557850888378814, "grad_norm": 0.0, - "learning_rate": 1.7219904685143506e-05, - "loss": 1.1033, + "learning_rate": 1.7227948638273918e-05, + "loss": 0.951, "step": 9372 }, { - "epoch": 0.2659761634506243, + "epoch": 0.2656068463260506, "grad_norm": 0.0, - "learning_rate": 1.7219268741776868e-05, - "loss": 0.9799, + "learning_rate": 1.7227314357952378e-05, + "loss": 0.9563, "step": 9373 }, { - "epoch": 0.26600454029511916, + "epoch": 0.2656351837683131, "grad_norm": 0.0, - "learning_rate": 1.7218632737428743e-05, - "loss": 1.0666, + "learning_rate": 1.722668001675249e-05, + "loss": 0.9566, "step": 9374 }, { - "epoch": 0.2660329171396141, + "epoch": 0.2656635212105755, "grad_norm": 0.0, - "learning_rate": 1.72179966721045e-05, - "loss": 0.9825, + "learning_rate": 1.722604561467959e-05, + "loss": 0.9408, "step": 9375 }, { - "epoch": 0.266061293984109, + "epoch": 0.265691858652838, "grad_norm": 0.0, - "learning_rate": 1.7217360545809518e-05, - "loss": 0.922, + "learning_rate": 1.7225411151739023e-05, + "loss": 1.0768, "step": 9376 }, { - "epoch": 0.26608967082860385, + "epoch": 0.26572019609510045, "grad_norm": 0.0, - "learning_rate": 1.7216724358549165e-05, - "loss": 0.9987, + "learning_rate": 1.7224776627936135e-05, + "loss": 0.9432, "step": 9377 }, { - "epoch": 0.26611804767309877, + "epoch": 0.26574853353736294, "grad_norm": 0.0, - "learning_rate": 1.7216088110328823e-05, - "loss": 1.0163, + "learning_rate": 1.7224142043276273e-05, + "loss": 0.9767, "step": 9378 }, { - "epoch": 0.2661464245175936, + "epoch": 0.2657768709796254, "grad_norm": 0.0, - "learning_rate": 1.7215451801153852e-05, - "loss": 0.8994, + "learning_rate": 1.722350739776478e-05, + "loss": 0.9376, "step": 9379 }, { - "epoch": 0.26617480136208854, + "epoch": 0.2658052084218878, "grad_norm": 0.0, - "learning_rate": 1.721481543102964e-05, - "loss": 1.0334, + "learning_rate": 1.7222872691407e-05, + "loss": 0.9713, "step": 9380 }, { - "epoch": 0.2662031782065834, + "epoch": 0.2658335458641503, "grad_norm": 0.0, - "learning_rate": 1.7214178999961556e-05, - "loss": 0.9859, + "learning_rate": 1.722223792420828e-05, + "loss": 0.8972, "step": 9381 }, { - "epoch": 0.2662315550510783, + "epoch": 0.26586188330641275, "grad_norm": 0.0, - "learning_rate": 1.7213542507954978e-05, - "loss": 0.964, + "learning_rate": 1.7221603096173974e-05, + "loss": 0.8692, "step": 9382 }, { - "epoch": 0.26625993189557323, + "epoch": 0.26589022074867524, "grad_norm": 0.0, - "learning_rate": 1.7212905955015285e-05, - "loss": 0.9849, + "learning_rate": 1.7220968207309416e-05, + "loss": 0.9036, "step": 9383 }, { - "epoch": 0.2662883087400681, + "epoch": 0.2659185581909377, "grad_norm": 0.0, - "learning_rate": 1.7212269341147845e-05, - "loss": 0.9471, + "learning_rate": 1.7220333257619967e-05, + "loss": 0.8754, "step": 9384 }, { - "epoch": 0.266316685584563, + "epoch": 0.2659468956332001, "grad_norm": 0.0, - "learning_rate": 1.7211632666358045e-05, - "loss": 0.7941, + "learning_rate": 1.7219698247110964e-05, + "loss": 1.0059, "step": 9385 }, { - "epoch": 0.26634506242905787, + "epoch": 0.2659752330754626, "grad_norm": 0.0, - "learning_rate": 1.7210995930651262e-05, - "loss": 1.0847, + "learning_rate": 1.7219063175787768e-05, + "loss": 1.0238, "step": 9386 }, { - "epoch": 0.2663734392735528, + "epoch": 0.26600357051772505, "grad_norm": 0.0, - "learning_rate": 1.721035913403287e-05, - "loss": 0.9108, + "learning_rate": 1.721842804365572e-05, + "loss": 0.8687, "step": 9387 }, { - "epoch": 0.2664018161180477, + "epoch": 0.26603190795998755, "grad_norm": 0.0, - "learning_rate": 1.7209722276508253e-05, - "loss": 0.9084, + "learning_rate": 1.721779285072017e-05, + "loss": 0.9719, "step": 9388 }, { - "epoch": 0.26643019296254256, + "epoch": 0.26606024540225, "grad_norm": 0.0, - "learning_rate": 1.7209085358082785e-05, - "loss": 0.9973, + "learning_rate": 1.7217157596986474e-05, + "loss": 1.0893, "step": 9389 }, { - "epoch": 0.26645856980703747, + "epoch": 0.2660885828445125, "grad_norm": 0.0, - "learning_rate": 1.720844837876185e-05, - "loss": 1.0007, + "learning_rate": 1.7216522282459976e-05, + "loss": 0.8737, "step": 9390 }, { - "epoch": 0.26648694665153233, + "epoch": 0.2661169202867749, "grad_norm": 0.0, - "learning_rate": 1.7207811338550824e-05, - "loss": 0.8613, + "learning_rate": 1.7215886907146033e-05, + "loss": 1.0321, "step": 9391 }, { - "epoch": 0.26651532349602725, + "epoch": 0.26614525772903735, "grad_norm": 0.0, - "learning_rate": 1.7207174237455095e-05, - "loss": 1.075, + "learning_rate": 1.7215251471049994e-05, + "loss": 0.9886, "step": 9392 }, { - "epoch": 0.26654370034052216, + "epoch": 0.26617359517129985, "grad_norm": 0.0, - "learning_rate": 1.720653707548004e-05, - "loss": 1.0143, + "learning_rate": 1.7214615974177215e-05, + "loss": 0.9101, "step": 9393 }, { - "epoch": 0.266572077185017, + "epoch": 0.2662019326135623, "grad_norm": 0.0, - "learning_rate": 1.7205899852631044e-05, - "loss": 0.9501, + "learning_rate": 1.7213980416533043e-05, + "loss": 1.068, "step": 9394 }, { - "epoch": 0.26660045402951194, + "epoch": 0.2662302700558248, "grad_norm": 0.0, - "learning_rate": 1.720526256891349e-05, - "loss": 0.849, + "learning_rate": 1.7213344798122836e-05, + "loss": 1.0865, "step": 9395 }, { - "epoch": 0.2666288308740068, + "epoch": 0.2662586074980872, "grad_norm": 0.0, - "learning_rate": 1.7204625224332752e-05, - "loss": 0.9082, + "learning_rate": 1.721270911895195e-05, + "loss": 0.9173, "step": 9396 }, { - "epoch": 0.2666572077185017, + "epoch": 0.26628694494034966, "grad_norm": 0.0, - "learning_rate": 1.7203987818894226e-05, - "loss": 1.1013, + "learning_rate": 1.7212073379025733e-05, + "loss": 1.0283, "step": 9397 }, { - "epoch": 0.26668558456299657, + "epoch": 0.26631528238261215, "grad_norm": 0.0, - "learning_rate": 1.720335035260329e-05, - "loss": 1.0353, + "learning_rate": 1.7211437578349547e-05, + "loss": 1.0171, "step": 9398 }, { - "epoch": 0.2667139614074915, + "epoch": 0.2663436198248746, "grad_norm": 0.0, - "learning_rate": 1.720271282546533e-05, - "loss": 0.9284, + "learning_rate": 1.721080171692874e-05, + "loss": 0.9847, "step": 9399 }, { - "epoch": 0.2667423382519864, + "epoch": 0.2663719572671371, "grad_norm": 0.0, - "learning_rate": 1.7202075237485732e-05, - "loss": 0.9028, + "learning_rate": 1.7210165794768678e-05, + "loss": 0.8907, "step": 9400 }, { - "epoch": 0.26677071509648126, + "epoch": 0.2664002947093995, "grad_norm": 0.0, - "learning_rate": 1.720143758866988e-05, - "loss": 1.0701, + "learning_rate": 1.720952981187471e-05, + "loss": 1.0629, "step": 9401 }, { - "epoch": 0.2667990919409762, + "epoch": 0.266428632151662, "grad_norm": 0.0, - "learning_rate": 1.720079987902316e-05, - "loss": 0.9598, + "learning_rate": 1.7208893768252193e-05, + "loss": 0.9447, "step": 9402 }, { - "epoch": 0.26682746878547103, + "epoch": 0.26645696959392445, "grad_norm": 0.0, - "learning_rate": 1.720016210855096e-05, - "loss": 1.0081, + "learning_rate": 1.720825766390649e-05, + "loss": 0.9008, "step": 9403 }, { - "epoch": 0.26685584562996595, + "epoch": 0.2664853070361869, "grad_norm": 0.0, - "learning_rate": 1.7199524277258667e-05, - "loss": 0.952, + "learning_rate": 1.7207621498842953e-05, + "loss": 0.9913, "step": 9404 }, { - "epoch": 0.26688422247446086, + "epoch": 0.2665136444784494, "grad_norm": 0.0, - "learning_rate": 1.719888638515167e-05, - "loss": 0.9627, + "learning_rate": 1.7206985273066944e-05, + "loss": 0.951, "step": 9405 }, { - "epoch": 0.2669125993189557, + "epoch": 0.2665419819207118, "grad_norm": 0.0, - "learning_rate": 1.7198248432235352e-05, - "loss": 0.8742, + "learning_rate": 1.7206348986583824e-05, + "loss": 1.0008, "step": 9406 }, { - "epoch": 0.26694097616345064, + "epoch": 0.2665703193629743, "grad_norm": 0.0, - "learning_rate": 1.719761041851511e-05, - "loss": 1.0494, + "learning_rate": 1.720571263939895e-05, + "loss": 1.0336, "step": 9407 }, { - "epoch": 0.2669693530079455, + "epoch": 0.26659865680523676, "grad_norm": 0.0, - "learning_rate": 1.7196972343996328e-05, - "loss": 0.9641, + "learning_rate": 1.7205076231517682e-05, + "loss": 0.9999, "step": 9408 }, { - "epoch": 0.2669977298524404, + "epoch": 0.2666269942474992, "grad_norm": 0.0, - "learning_rate": 1.7196334208684398e-05, - "loss": 0.9985, + "learning_rate": 1.7204439762945382e-05, + "loss": 0.8923, "step": 9409 }, { - "epoch": 0.26702610669693533, + "epoch": 0.2666553316897617, "grad_norm": 0.0, - "learning_rate": 1.719569601258471e-05, - "loss": 1.0222, + "learning_rate": 1.720380323368741e-05, + "loss": 0.8022, "step": 9410 }, { - "epoch": 0.2670544835414302, + "epoch": 0.2666836691320241, "grad_norm": 0.0, - "learning_rate": 1.7195057755702653e-05, - "loss": 0.9941, + "learning_rate": 1.7203166643749125e-05, + "loss": 1.0122, "step": 9411 }, { - "epoch": 0.2670828603859251, + "epoch": 0.2667120065742866, "grad_norm": 0.0, - "learning_rate": 1.719441943804362e-05, - "loss": 0.8682, + "learning_rate": 1.7202529993135898e-05, + "loss": 1.092, "step": 9412 }, { - "epoch": 0.26711123723041996, + "epoch": 0.26674034401654906, "grad_norm": 0.0, - "learning_rate": 1.7193781059613002e-05, - "loss": 1.1422, + "learning_rate": 1.7201893281853083e-05, + "loss": 0.9166, "step": 9413 }, { - "epoch": 0.2671396140749149, + "epoch": 0.26676868145881155, "grad_norm": 0.0, - "learning_rate": 1.719314262041619e-05, - "loss": 1.0756, + "learning_rate": 1.720125650990605e-05, + "loss": 0.9524, "step": 9414 }, { - "epoch": 0.26716799091940974, + "epoch": 0.266797018901074, "grad_norm": 0.0, - "learning_rate": 1.719250412045858e-05, - "loss": 1.0452, + "learning_rate": 1.7200619677300153e-05, + "loss": 1.0052, "step": 9415 }, { - "epoch": 0.26719636776390465, + "epoch": 0.26682535634333643, "grad_norm": 0.0, - "learning_rate": 1.719186555974557e-05, - "loss": 0.9981, + "learning_rate": 1.7199982784040766e-05, + "loss": 1.0398, "step": 9416 }, { - "epoch": 0.26722474460839957, + "epoch": 0.2668536937855989, "grad_norm": 0.0, - "learning_rate": 1.7191226938282543e-05, - "loss": 0.9507, + "learning_rate": 1.719934583013325e-05, + "loss": 1.0555, "step": 9417 }, { - "epoch": 0.2672531214528944, + "epoch": 0.26688203122786136, "grad_norm": 0.0, - "learning_rate": 1.71905882560749e-05, - "loss": 0.9296, + "learning_rate": 1.7198708815582973e-05, + "loss": 0.9152, "step": 9418 }, { - "epoch": 0.26728149829738934, + "epoch": 0.26691036867012385, "grad_norm": 0.0, - "learning_rate": 1.7189949513128033e-05, - "loss": 0.9428, + "learning_rate": 1.71980717403953e-05, + "loss": 0.9156, "step": 9419 }, { - "epoch": 0.2673098751418842, + "epoch": 0.2669387061123863, "grad_norm": 0.0, - "learning_rate": 1.718931070944734e-05, - "loss": 1.0111, + "learning_rate": 1.7197434604575592e-05, + "loss": 0.9926, "step": 9420 }, { - "epoch": 0.2673382519863791, + "epoch": 0.26696704355464873, "grad_norm": 0.0, - "learning_rate": 1.718867184503822e-05, - "loss": 0.9684, + "learning_rate": 1.719679740812922e-05, + "loss": 0.9199, "step": 9421 }, { - "epoch": 0.26736662883087403, + "epoch": 0.2669953809969112, "grad_norm": 0.0, - "learning_rate": 1.718803291990606e-05, - "loss": 0.9899, + "learning_rate": 1.719616015106155e-05, + "loss": 0.9444, "step": 9422 }, { - "epoch": 0.2673950056753689, + "epoch": 0.26702371843917366, "grad_norm": 0.0, - "learning_rate": 1.7187393934056267e-05, - "loss": 1.0239, + "learning_rate": 1.7195522833377955e-05, + "loss": 0.9874, "step": 9423 }, { - "epoch": 0.2674233825198638, + "epoch": 0.26705205588143616, "grad_norm": 0.0, - "learning_rate": 1.718675488749423e-05, - "loss": 1.0011, + "learning_rate": 1.7194885455083795e-05, + "loss": 1.1551, "step": 9424 }, { - "epoch": 0.26745175936435867, + "epoch": 0.2670803933236986, "grad_norm": 0.0, - "learning_rate": 1.7186115780225357e-05, - "loss": 0.9495, + "learning_rate": 1.719424801618444e-05, + "loss": 0.8634, "step": 9425 }, { - "epoch": 0.2674801362088536, + "epoch": 0.2671087307659611, "grad_norm": 0.0, - "learning_rate": 1.718547661225504e-05, - "loss": 0.9043, + "learning_rate": 1.719361051668527e-05, + "loss": 0.9666, "step": 9426 }, { - "epoch": 0.26750851305334844, + "epoch": 0.2671370682082235, "grad_norm": 0.0, - "learning_rate": 1.7184837383588675e-05, - "loss": 0.8727, + "learning_rate": 1.7192972956591644e-05, + "loss": 0.9467, "step": 9427 }, { - "epoch": 0.26753688989784336, + "epoch": 0.26716540565048597, "grad_norm": 0.0, - "learning_rate": 1.718419809423167e-05, - "loss": 1.1013, + "learning_rate": 1.7192335335908937e-05, + "loss": 1.0577, "step": 9428 }, { - "epoch": 0.26756526674233827, + "epoch": 0.26719374309274846, "grad_norm": 0.0, - "learning_rate": 1.7183558744189416e-05, - "loss": 0.9702, + "learning_rate": 1.7191697654642517e-05, + "loss": 1.0219, "step": 9429 }, { - "epoch": 0.26759364358683313, + "epoch": 0.2672220805350109, "grad_norm": 0.0, - "learning_rate": 1.7182919333467323e-05, - "loss": 0.9784, + "learning_rate": 1.7191059912797758e-05, + "loss": 0.9777, "step": 9430 }, { - "epoch": 0.26762202043132804, + "epoch": 0.2672504179772734, "grad_norm": 0.0, - "learning_rate": 1.7182279862070786e-05, - "loss": 1.111, + "learning_rate": 1.719042211038003e-05, + "loss": 1.0252, "step": 9431 }, { - "epoch": 0.2676503972758229, + "epoch": 0.26727875541953583, "grad_norm": 0.0, - "learning_rate": 1.718164033000521e-05, - "loss": 1.0119, + "learning_rate": 1.7189784247394707e-05, + "loss": 1.0773, "step": 9432 }, { - "epoch": 0.2676787741203178, + "epoch": 0.26730709286179827, "grad_norm": 0.0, - "learning_rate": 1.718100073727599e-05, - "loss": 1.0025, + "learning_rate": 1.718914632384716e-05, + "loss": 0.9799, "step": 9433 }, { - "epoch": 0.26770715096481273, + "epoch": 0.26733543030406076, "grad_norm": 0.0, - "learning_rate": 1.7180361083888536e-05, - "loss": 1.0734, + "learning_rate": 1.7188508339742765e-05, + "loss": 0.9401, "step": 9434 }, { - "epoch": 0.2677355278093076, + "epoch": 0.2673637677463232, "grad_norm": 0.0, - "learning_rate": 1.717972136984825e-05, - "loss": 0.9869, + "learning_rate": 1.71878702950869e-05, + "loss": 0.8486, "step": 9435 }, { - "epoch": 0.2677639046538025, + "epoch": 0.2673921051885857, "grad_norm": 0.0, - "learning_rate": 1.7179081595160535e-05, - "loss": 1.0248, + "learning_rate": 1.7187232189884927e-05, + "loss": 1.096, "step": 9436 }, { - "epoch": 0.26779228149829737, + "epoch": 0.26742044263084813, "grad_norm": 0.0, - "learning_rate": 1.7178441759830797e-05, - "loss": 0.9088, + "learning_rate": 1.718659402414223e-05, + "loss": 0.9595, "step": 9437 }, { - "epoch": 0.2678206583427923, + "epoch": 0.2674487800731106, "grad_norm": 0.0, - "learning_rate": 1.7177801863864437e-05, - "loss": 0.999, + "learning_rate": 1.7185955797864184e-05, + "loss": 0.9888, "step": 9438 }, { - "epoch": 0.2678490351872872, + "epoch": 0.26747711751537306, "grad_norm": 0.0, - "learning_rate": 1.7177161907266858e-05, - "loss": 0.9736, + "learning_rate": 1.7185317511056166e-05, + "loss": 0.9029, "step": 9439 }, { - "epoch": 0.26787741203178206, + "epoch": 0.2675054549576355, "grad_norm": 0.0, - "learning_rate": 1.7176521890043476e-05, - "loss": 0.8928, + "learning_rate": 1.7184679163723545e-05, + "loss": 0.9526, "step": 9440 }, { - "epoch": 0.267905788876277, + "epoch": 0.267533792399898, "grad_norm": 0.0, - "learning_rate": 1.7175881812199687e-05, - "loss": 0.9422, + "learning_rate": 1.718404075587171e-05, + "loss": 1.0122, "step": 9441 }, { - "epoch": 0.26793416572077183, + "epoch": 0.26756212984216043, "grad_norm": 0.0, - "learning_rate": 1.7175241673740906e-05, - "loss": 0.8455, + "learning_rate": 1.7183402287506026e-05, + "loss": 1.0854, "step": 9442 }, { - "epoch": 0.26796254256526675, + "epoch": 0.26759046728442293, "grad_norm": 0.0, - "learning_rate": 1.7174601474672533e-05, - "loss": 0.8621, + "learning_rate": 1.718276375863188e-05, + "loss": 1.0485, "step": 9443 }, { - "epoch": 0.2679909194097616, + "epoch": 0.26761880472668537, "grad_norm": 0.0, - "learning_rate": 1.717396121499998e-05, - "loss": 0.998, + "learning_rate": 1.7182125169254646e-05, + "loss": 0.9666, "step": 9444 }, { - "epoch": 0.2680192962542565, + "epoch": 0.2676471421689478, "grad_norm": 0.0, - "learning_rate": 1.7173320894728653e-05, - "loss": 1.0327, + "learning_rate": 1.7181486519379705e-05, + "loss": 1.0159, "step": 9445 }, { - "epoch": 0.26804767309875144, + "epoch": 0.2676754796112103, "grad_norm": 0.0, - "learning_rate": 1.717268051386396e-05, - "loss": 1.0715, + "learning_rate": 1.718084780901244e-05, + "loss": 0.9731, "step": 9446 }, { - "epoch": 0.2680760499432463, + "epoch": 0.26770381705347274, "grad_norm": 0.0, - "learning_rate": 1.7172040072411316e-05, - "loss": 1.0359, + "learning_rate": 1.7180209038158224e-05, + "loss": 0.9944, "step": 9447 }, { - "epoch": 0.2681044267877412, + "epoch": 0.26773215449573523, "grad_norm": 0.0, - "learning_rate": 1.7171399570376124e-05, - "loss": 0.9969, + "learning_rate": 1.7179570206822442e-05, + "loss": 0.9666, "step": 9448 }, { - "epoch": 0.26813280363223607, + "epoch": 0.26776049193799767, "grad_norm": 0.0, - "learning_rate": 1.71707590077638e-05, - "loss": 0.9968, + "learning_rate": 1.7178931315010473e-05, + "loss": 0.9388, "step": 9449 }, { - "epoch": 0.268161180476731, + "epoch": 0.26778882938026016, "grad_norm": 0.0, - "learning_rate": 1.717011838457975e-05, - "loss": 1.0083, + "learning_rate": 1.71782923627277e-05, + "loss": 0.8481, "step": 9450 }, { - "epoch": 0.2681895573212259, + "epoch": 0.2678171668225226, "grad_norm": 0.0, - "learning_rate": 1.716947770082939e-05, - "loss": 1.1098, + "learning_rate": 1.7177653349979504e-05, + "loss": 0.8249, "step": 9451 }, { - "epoch": 0.26821793416572076, + "epoch": 0.26784550426478504, "grad_norm": 0.0, - "learning_rate": 1.7168836956518128e-05, - "loss": 1.0183, + "learning_rate": 1.717701427677127e-05, + "loss": 0.9862, "step": 9452 }, { - "epoch": 0.2682463110102157, + "epoch": 0.26787384170704753, "grad_norm": 0.0, - "learning_rate": 1.7168196151651378e-05, - "loss": 0.9739, + "learning_rate": 1.717637514310838e-05, + "loss": 0.957, "step": 9453 }, { - "epoch": 0.26827468785471054, + "epoch": 0.26790217914930997, "grad_norm": 0.0, - "learning_rate": 1.7167555286234556e-05, - "loss": 1.0311, + "learning_rate": 1.7175735948996213e-05, + "loss": 1.0002, "step": 9454 }, { - "epoch": 0.26830306469920545, + "epoch": 0.26793051659157247, "grad_norm": 0.0, - "learning_rate": 1.716691436027307e-05, - "loss": 0.9085, + "learning_rate": 1.7175096694440165e-05, + "loss": 0.9866, "step": 9455 }, { - "epoch": 0.26833144154370037, + "epoch": 0.2679588540338349, "grad_norm": 0.0, - "learning_rate": 1.7166273373772334e-05, - "loss": 0.9468, + "learning_rate": 1.717445737944561e-05, + "loss": 1.0296, "step": 9456 }, { - "epoch": 0.2683598183881952, + "epoch": 0.26798719147609734, "grad_norm": 0.0, - "learning_rate": 1.7165632326737767e-05, - "loss": 1.0157, + "learning_rate": 1.7173818004017935e-05, + "loss": 0.9084, "step": 9457 }, { - "epoch": 0.26838819523269014, + "epoch": 0.26801552891835984, "grad_norm": 0.0, - "learning_rate": 1.7164991219174784e-05, - "loss": 0.9386, + "learning_rate": 1.7173178568162525e-05, + "loss": 1.0117, "step": 9458 }, { - "epoch": 0.268416572077185, + "epoch": 0.2680438663606223, "grad_norm": 0.0, - "learning_rate": 1.7164350051088793e-05, - "loss": 1.0291, + "learning_rate": 1.717253907188477e-05, + "loss": 0.9811, "step": 9459 }, { - "epoch": 0.2684449489216799, + "epoch": 0.26807220380288477, "grad_norm": 0.0, - "learning_rate": 1.7163708822485216e-05, - "loss": 1.0673, + "learning_rate": 1.7171899515190058e-05, + "loss": 1.0333, "step": 9460 }, { - "epoch": 0.2684733257661748, + "epoch": 0.2681005412451472, "grad_norm": 0.0, - "learning_rate": 1.7163067533369472e-05, - "loss": 1.1302, + "learning_rate": 1.717125989808377e-05, + "loss": 0.9259, "step": 9461 }, { - "epoch": 0.2685017026106697, + "epoch": 0.2681288786874097, "grad_norm": 0.0, - "learning_rate": 1.716242618374697e-05, - "loss": 1.0967, + "learning_rate": 1.7170620220571295e-05, + "loss": 0.9165, "step": 9462 }, { - "epoch": 0.2685300794551646, + "epoch": 0.26815721612967214, "grad_norm": 0.0, - "learning_rate": 1.7161784773623134e-05, - "loss": 0.9901, + "learning_rate": 1.7169980482658027e-05, + "loss": 0.9403, "step": 9463 }, { - "epoch": 0.26855845629965946, + "epoch": 0.2681855535719346, "grad_norm": 0.0, - "learning_rate": 1.7161143303003382e-05, - "loss": 0.8763, + "learning_rate": 1.716934068434935e-05, + "loss": 1.0125, "step": 9464 }, { - "epoch": 0.2685868331441544, + "epoch": 0.26821389101419707, "grad_norm": 0.0, - "learning_rate": 1.716050177189313e-05, - "loss": 1.0714, + "learning_rate": 1.7168700825650655e-05, + "loss": 1.0229, "step": 9465 }, { - "epoch": 0.26861520998864924, + "epoch": 0.2682422284564595, "grad_norm": 0.0, - "learning_rate": 1.7159860180297795e-05, - "loss": 1.0128, + "learning_rate": 1.716806090656733e-05, + "loss": 0.9619, "step": 9466 }, { - "epoch": 0.26864358683314415, + "epoch": 0.268270565898722, "grad_norm": 0.0, - "learning_rate": 1.7159218528222803e-05, - "loss": 0.9579, + "learning_rate": 1.7167420927104766e-05, + "loss": 0.897, "step": 9467 }, { - "epoch": 0.26867196367763907, + "epoch": 0.26829890334098444, "grad_norm": 0.0, - "learning_rate": 1.7158576815673568e-05, - "loss": 0.9193, + "learning_rate": 1.7166780887268352e-05, + "loss": 0.9409, "step": 9468 }, { - "epoch": 0.26870034052213393, + "epoch": 0.2683272407832469, "grad_norm": 0.0, - "learning_rate": 1.7157935042655515e-05, - "loss": 0.9946, + "learning_rate": 1.7166140787063486e-05, + "loss": 0.9828, "step": 9469 }, { - "epoch": 0.26872871736662884, + "epoch": 0.2683555782255094, "grad_norm": 0.0, - "learning_rate": 1.7157293209174058e-05, - "loss": 1.0389, + "learning_rate": 1.716550062649555e-05, + "loss": 0.9928, "step": 9470 }, { - "epoch": 0.2687570942111237, + "epoch": 0.2683839156677718, "grad_norm": 0.0, - "learning_rate": 1.7156651315234628e-05, - "loss": 1.0089, + "learning_rate": 1.7164860405569946e-05, + "loss": 1.1304, "step": 9471 }, { - "epoch": 0.2687854710556186, + "epoch": 0.2684122531100343, "grad_norm": 0.0, - "learning_rate": 1.715600936084264e-05, - "loss": 1.0941, + "learning_rate": 1.7164220124292058e-05, + "loss": 0.9569, "step": 9472 }, { - "epoch": 0.26881384790011353, + "epoch": 0.26844059055229674, "grad_norm": 0.0, - "learning_rate": 1.715536734600352e-05, - "loss": 1.0469, + "learning_rate": 1.7163579782667285e-05, + "loss": 0.988, "step": 9473 }, { - "epoch": 0.2688422247446084, + "epoch": 0.26846892799455924, "grad_norm": 0.0, - "learning_rate": 1.7154725270722693e-05, - "loss": 0.8741, + "learning_rate": 1.716293938070102e-05, + "loss": 0.968, "step": 9474 }, { - "epoch": 0.2688706015891033, + "epoch": 0.2684972654368217, "grad_norm": 0.0, - "learning_rate": 1.7154083135005575e-05, - "loss": 1.0177, + "learning_rate": 1.7162298918398656e-05, + "loss": 0.8682, "step": 9475 }, { - "epoch": 0.26889897843359817, + "epoch": 0.2685256028790841, "grad_norm": 0.0, - "learning_rate": 1.71534409388576e-05, - "loss": 0.9804, + "learning_rate": 1.7161658395765588e-05, + "loss": 1.0972, "step": 9476 }, { - "epoch": 0.2689273552780931, + "epoch": 0.2685539403213466, "grad_norm": 0.0, - "learning_rate": 1.7152798682284183e-05, - "loss": 0.8639, + "learning_rate": 1.7161017812807213e-05, + "loss": 1.002, "step": 9477 }, { - "epoch": 0.26895573212258794, + "epoch": 0.26858227776360905, "grad_norm": 0.0, - "learning_rate": 1.715215636529076e-05, - "loss": 1.0414, + "learning_rate": 1.7160377169528928e-05, + "loss": 0.9649, "step": 9478 }, { - "epoch": 0.26898410896708286, + "epoch": 0.26861061520587154, "grad_norm": 0.0, - "learning_rate": 1.7151513987882745e-05, - "loss": 0.8134, + "learning_rate": 1.7159736465936124e-05, + "loss": 0.8822, "step": 9479 }, { - "epoch": 0.2690124858115778, + "epoch": 0.268638952648134, "grad_norm": 0.0, - "learning_rate": 1.7150871550065574e-05, - "loss": 1.0611, + "learning_rate": 1.71590957020342e-05, + "loss": 0.9462, "step": 9480 }, { - "epoch": 0.26904086265607263, + "epoch": 0.2686672900903964, "grad_norm": 0.0, - "learning_rate": 1.7150229051844667e-05, - "loss": 1.0837, + "learning_rate": 1.7158454877828557e-05, + "loss": 0.9873, "step": 9481 }, { - "epoch": 0.26906923950056755, + "epoch": 0.2686956275326589, "grad_norm": 0.0, - "learning_rate": 1.7149586493225453e-05, - "loss": 1.0239, + "learning_rate": 1.715781399332459e-05, + "loss": 0.9334, "step": 9482 }, { - "epoch": 0.2690976163450624, + "epoch": 0.26872396497492135, "grad_norm": 0.0, - "learning_rate": 1.7148943874213362e-05, - "loss": 1.0003, + "learning_rate": 1.71571730485277e-05, + "loss": 1.0746, "step": 9483 }, { - "epoch": 0.2691259931895573, + "epoch": 0.26875230241718384, "grad_norm": 0.0, - "learning_rate": 1.7148301194813823e-05, - "loss": 0.9084, + "learning_rate": 1.7156532043443278e-05, + "loss": 1.0146, "step": 9484 }, { - "epoch": 0.26915437003405224, + "epoch": 0.2687806398594463, "grad_norm": 0.0, - "learning_rate": 1.7147658455032263e-05, - "loss": 0.9983, + "learning_rate": 1.7155890978076732e-05, + "loss": 0.9005, "step": 9485 }, { - "epoch": 0.2691827468785471, + "epoch": 0.2688089773017088, "grad_norm": 0.0, - "learning_rate": 1.7147015654874106e-05, - "loss": 1.0125, + "learning_rate": 1.715524985243346e-05, + "loss": 0.9272, "step": 9486 }, { - "epoch": 0.269211123723042, + "epoch": 0.2688373147439712, "grad_norm": 0.0, - "learning_rate": 1.7146372794344787e-05, - "loss": 1.0356, + "learning_rate": 1.715460866651886e-05, + "loss": 0.9943, "step": 9487 }, { - "epoch": 0.26923950056753687, + "epoch": 0.26886565218623365, "grad_norm": 0.0, - "learning_rate": 1.714572987344974e-05, - "loss": 1.071, + "learning_rate": 1.7153967420338337e-05, + "loss": 1.038, "step": 9488 }, { - "epoch": 0.2692678774120318, + "epoch": 0.26889398962849614, "grad_norm": 0.0, - "learning_rate": 1.714508689219439e-05, - "loss": 0.9365, + "learning_rate": 1.7153326113897286e-05, + "loss": 1.0255, "step": 9489 }, { - "epoch": 0.2692962542565267, + "epoch": 0.2689223270707586, "grad_norm": 0.0, - "learning_rate": 1.7144443850584166e-05, - "loss": 1.0554, + "learning_rate": 1.7152684747201114e-05, + "loss": 0.9666, "step": 9490 }, { - "epoch": 0.26932463110102156, + "epoch": 0.2689506645130211, "grad_norm": 0.0, - "learning_rate": 1.7143800748624507e-05, - "loss": 0.8802, + "learning_rate": 1.7152043320255224e-05, + "loss": 1.0257, "step": 9491 }, { - "epoch": 0.2693530079455165, + "epoch": 0.2689790019552835, "grad_norm": 0.0, - "learning_rate": 1.714315758632084e-05, - "loss": 0.872, + "learning_rate": 1.7151401833065014e-05, + "loss": 0.9765, "step": 9492 }, { - "epoch": 0.26938138479001134, + "epoch": 0.26900733939754595, "grad_norm": 0.0, - "learning_rate": 1.7142514363678602e-05, - "loss": 0.9712, + "learning_rate": 1.7150760285635897e-05, + "loss": 0.9992, "step": 9493 }, { - "epoch": 0.26940976163450625, + "epoch": 0.26903567683980845, "grad_norm": 0.0, - "learning_rate": 1.7141871080703222e-05, - "loss": 0.8458, + "learning_rate": 1.7150118677973265e-05, + "loss": 1.0162, "step": 9494 }, { - "epoch": 0.2694381384790011, + "epoch": 0.2690640142820709, "grad_norm": 0.0, - "learning_rate": 1.7141227737400136e-05, - "loss": 0.9512, + "learning_rate": 1.714947701008253e-05, + "loss": 1.0544, "step": 9495 }, { - "epoch": 0.269466515323496, + "epoch": 0.2690923517243334, "grad_norm": 0.0, - "learning_rate": 1.7140584333774782e-05, - "loss": 0.9561, + "learning_rate": 1.7148835281969093e-05, + "loss": 0.9689, "step": 9496 }, { - "epoch": 0.26949489216799094, + "epoch": 0.2691206891665958, "grad_norm": 0.0, - "learning_rate": 1.713994086983259e-05, - "loss": 0.9, + "learning_rate": 1.7148193493638364e-05, + "loss": 0.9401, "step": 9497 }, { - "epoch": 0.2695232690124858, + "epoch": 0.2691490266088583, "grad_norm": 0.0, - "learning_rate": 1.7139297345578992e-05, - "loss": 0.9124, + "learning_rate": 1.7147551645095747e-05, + "loss": 0.9802, "step": 9498 }, { - "epoch": 0.2695516458569807, + "epoch": 0.26917736405112075, "grad_norm": 0.0, - "learning_rate": 1.7138653761019432e-05, - "loss": 0.9386, + "learning_rate": 1.714690973634665e-05, + "loss": 0.949, "step": 9499 }, { - "epoch": 0.2695800227014756, + "epoch": 0.2692057014933832, "grad_norm": 0.0, - "learning_rate": 1.7138010116159342e-05, - "loss": 1.0498, + "learning_rate": 1.7146267767396477e-05, + "loss": 1.1245, "step": 9500 }, { - "epoch": 0.2696083995459705, + "epoch": 0.2692340389356457, "grad_norm": 0.0, - "learning_rate": 1.713736641100416e-05, - "loss": 0.9769, + "learning_rate": 1.7145625738250636e-05, + "loss": 1.1373, "step": 9501 }, { - "epoch": 0.2696367763904654, + "epoch": 0.2692623763779081, "grad_norm": 0.0, - "learning_rate": 1.7136722645559322e-05, - "loss": 0.9625, + "learning_rate": 1.7144983648914536e-05, + "loss": 0.9884, "step": 9502 }, { - "epoch": 0.26966515323496026, + "epoch": 0.2692907138201706, "grad_norm": 0.0, - "learning_rate": 1.7136078819830267e-05, - "loss": 1.0466, + "learning_rate": 1.7144341499393587e-05, + "loss": 0.946, "step": 9503 }, { - "epoch": 0.2696935300794552, + "epoch": 0.26931905126243305, "grad_norm": 0.0, - "learning_rate": 1.713543493382244e-05, - "loss": 0.9867, + "learning_rate": 1.7143699289693193e-05, + "loss": 0.9584, "step": 9504 }, { - "epoch": 0.26972190692395004, + "epoch": 0.2693473887046955, "grad_norm": 0.0, - "learning_rate": 1.7134790987541263e-05, - "loss": 0.9984, + "learning_rate": 1.7143057019818773e-05, + "loss": 1.0079, "step": 9505 }, { - "epoch": 0.26975028376844495, + "epoch": 0.269375726146958, "grad_norm": 0.0, - "learning_rate": 1.713414698099219e-05, - "loss": 0.9325, + "learning_rate": 1.7142414689775723e-05, + "loss": 0.9438, "step": 9506 }, { - "epoch": 0.2697786606129398, + "epoch": 0.2694040635892204, "grad_norm": 0.0, - "learning_rate": 1.7133502914180657e-05, - "loss": 1.0257, + "learning_rate": 1.7141772299569467e-05, + "loss": 0.9286, "step": 9507 }, { - "epoch": 0.26980703745743473, + "epoch": 0.2694324010314829, "grad_norm": 0.0, - "learning_rate": 1.7132858787112107e-05, - "loss": 0.9166, + "learning_rate": 1.7141129849205414e-05, + "loss": 1.0405, "step": 9508 }, { - "epoch": 0.26983541430192964, + "epoch": 0.26946073847374535, "grad_norm": 0.0, - "learning_rate": 1.7132214599791975e-05, - "loss": 1.0424, + "learning_rate": 1.7140487338688967e-05, + "loss": 0.9719, "step": 9509 }, { - "epoch": 0.2698637911464245, + "epoch": 0.26948907591600785, "grad_norm": 0.0, - "learning_rate": 1.7131570352225703e-05, - "loss": 1.0311, + "learning_rate": 1.7139844768025544e-05, + "loss": 1.05, "step": 9510 }, { - "epoch": 0.2698921679909194, + "epoch": 0.2695174133582703, "grad_norm": 0.0, - "learning_rate": 1.713092604441874e-05, - "loss": 0.9852, + "learning_rate": 1.713920213722056e-05, + "loss": 1.0273, "step": 9511 }, { - "epoch": 0.2699205448354143, + "epoch": 0.2695457508005327, "grad_norm": 0.0, - "learning_rate": 1.7130281676376522e-05, - "loss": 0.9557, + "learning_rate": 1.7138559446279424e-05, + "loss": 1.0483, "step": 9512 }, { - "epoch": 0.2699489216799092, + "epoch": 0.2695740882427952, "grad_norm": 0.0, - "learning_rate": 1.71296372481045e-05, - "loss": 1.0364, + "learning_rate": 1.713791669520755e-05, + "loss": 1.0023, "step": 9513 }, { - "epoch": 0.2699772985244041, + "epoch": 0.26960242568505766, "grad_norm": 0.0, - "learning_rate": 1.71289927596081e-05, - "loss": 1.0027, + "learning_rate": 1.7137273884010356e-05, + "loss": 1.0277, "step": 9514 }, { - "epoch": 0.27000567536889897, + "epoch": 0.26963076312732015, "grad_norm": 0.0, - "learning_rate": 1.712834821089279e-05, - "loss": 0.8164, + "learning_rate": 1.713663101269325e-05, + "loss": 0.9348, "step": 9515 }, { - "epoch": 0.2700340522133939, + "epoch": 0.2696591005695826, "grad_norm": 0.0, - "learning_rate": 1.7127703601963995e-05, - "loss": 0.9355, + "learning_rate": 1.7135988081261655e-05, + "loss": 1.0546, "step": 9516 }, { - "epoch": 0.27006242905788874, + "epoch": 0.269687438011845, "grad_norm": 0.0, - "learning_rate": 1.712705893282717e-05, - "loss": 0.9637, + "learning_rate": 1.713534508972098e-05, + "loss": 0.9258, "step": 9517 }, { - "epoch": 0.27009080590238366, + "epoch": 0.2697157754541075, "grad_norm": 0.0, - "learning_rate": 1.7126414203487757e-05, - "loss": 0.9038, + "learning_rate": 1.7134702038076644e-05, + "loss": 1.0219, "step": 9518 }, { - "epoch": 0.27011918274687857, + "epoch": 0.26974411289636996, "grad_norm": 0.0, - "learning_rate": 1.71257694139512e-05, - "loss": 1.003, + "learning_rate": 1.7134058926334063e-05, + "loss": 0.8467, "step": 9519 }, { - "epoch": 0.27014755959137343, + "epoch": 0.26977245033863245, "grad_norm": 0.0, - "learning_rate": 1.712512456422295e-05, - "loss": 0.9545, + "learning_rate": 1.7133415754498655e-05, + "loss": 1.035, "step": 9520 }, { - "epoch": 0.27017593643586835, + "epoch": 0.2698007877808949, "grad_norm": 0.0, - "learning_rate": 1.7124479654308457e-05, - "loss": 1.0233, + "learning_rate": 1.7132772522575835e-05, + "loss": 0.8332, "step": 9521 }, { - "epoch": 0.2702043132803632, + "epoch": 0.2698291252231574, "grad_norm": 0.0, - "learning_rate": 1.7123834684213157e-05, - "loss": 0.9813, + "learning_rate": 1.7132129230571022e-05, + "loss": 1.0117, "step": 9522 }, { - "epoch": 0.2702326901248581, + "epoch": 0.2698574626654198, "grad_norm": 0.0, - "learning_rate": 1.7123189653942512e-05, - "loss": 1.0329, + "learning_rate": 1.7131485878489643e-05, + "loss": 1.0053, "step": 9523 }, { - "epoch": 0.270261066969353, + "epoch": 0.26988580010768226, "grad_norm": 0.0, - "learning_rate": 1.712254456350196e-05, - "loss": 0.953, + "learning_rate": 1.7130842466337106e-05, + "loss": 0.8723, "step": 9524 }, { - "epoch": 0.2702894438138479, + "epoch": 0.26991413754994475, "grad_norm": 0.0, - "learning_rate": 1.7121899412896954e-05, - "loss": 0.8936, + "learning_rate": 1.7130198994118835e-05, + "loss": 0.9354, "step": 9525 }, { - "epoch": 0.2703178206583428, + "epoch": 0.2699424749922072, "grad_norm": 0.0, - "learning_rate": 1.7121254202132943e-05, - "loss": 0.9886, + "learning_rate": 1.7129555461840252e-05, + "loss": 0.9529, "step": 9526 }, { - "epoch": 0.27034619750283767, + "epoch": 0.2699708124344697, "grad_norm": 0.0, - "learning_rate": 1.712060893121538e-05, - "loss": 1.0007, + "learning_rate": 1.7128911869506772e-05, + "loss": 0.9455, "step": 9527 }, { - "epoch": 0.2703745743473326, + "epoch": 0.2699991498767321, "grad_norm": 0.0, - "learning_rate": 1.7119963600149715e-05, - "loss": 0.9645, + "learning_rate": 1.7128268217123824e-05, + "loss": 0.921, "step": 9528 }, { - "epoch": 0.27040295119182745, + "epoch": 0.27002748731899456, "grad_norm": 0.0, - "learning_rate": 1.711931820894139e-05, - "loss": 1.0198, + "learning_rate": 1.7127624504696824e-05, + "loss": 0.939, "step": 9529 }, { - "epoch": 0.27043132803632236, + "epoch": 0.27005582476125706, "grad_norm": 0.0, - "learning_rate": 1.7118672757595873e-05, - "loss": 1.0485, + "learning_rate": 1.7126980732231196e-05, + "loss": 0.9915, "step": 9530 }, { - "epoch": 0.2704597048808173, + "epoch": 0.2700841622035195, "grad_norm": 0.0, - "learning_rate": 1.71180272461186e-05, - "loss": 0.8716, + "learning_rate": 1.7126336899732363e-05, + "loss": 0.8897, "step": 9531 }, { - "epoch": 0.27048808172531214, + "epoch": 0.270112499645782, "grad_norm": 0.0, - "learning_rate": 1.7117381674515034e-05, - "loss": 1.0685, + "learning_rate": 1.7125693007205745e-05, + "loss": 0.9409, "step": 9532 }, { - "epoch": 0.27051645856980705, + "epoch": 0.2701408370880444, "grad_norm": 0.0, - "learning_rate": 1.7116736042790624e-05, - "loss": 0.8965, + "learning_rate": 1.7125049054656773e-05, + "loss": 0.9469, "step": 9533 }, { - "epoch": 0.2705448354143019, + "epoch": 0.2701691745303069, "grad_norm": 0.0, - "learning_rate": 1.7116090350950825e-05, - "loss": 0.9326, + "learning_rate": 1.7124405042090865e-05, + "loss": 1.0512, "step": 9534 }, { - "epoch": 0.2705732122587968, + "epoch": 0.27019751197256936, "grad_norm": 0.0, - "learning_rate": 1.7115444599001093e-05, - "loss": 1.0013, + "learning_rate": 1.712376096951345e-05, + "loss": 1.0356, "step": 9535 }, { - "epoch": 0.27060158910329174, + "epoch": 0.2702258494148318, "grad_norm": 0.0, - "learning_rate": 1.7114798786946875e-05, - "loss": 0.8425, + "learning_rate": 1.712311683692995e-05, + "loss": 0.9327, "step": 9536 }, { - "epoch": 0.2706299659477866, + "epoch": 0.2702541868570943, "grad_norm": 0.0, - "learning_rate": 1.7114152914793636e-05, - "loss": 0.9581, + "learning_rate": 1.712247264434579e-05, + "loss": 0.9492, "step": 9537 }, { - "epoch": 0.2706583427922815, + "epoch": 0.27028252429935673, "grad_norm": 0.0, - "learning_rate": 1.711350698254683e-05, - "loss": 0.8829, + "learning_rate": 1.7121828391766398e-05, + "loss": 1.0402, "step": 9538 }, { - "epoch": 0.2706867196367764, + "epoch": 0.2703108617416192, "grad_norm": 0.0, - "learning_rate": 1.7112860990211908e-05, - "loss": 1.0294, + "learning_rate": 1.7121184079197202e-05, + "loss": 0.9474, "step": 9539 }, { - "epoch": 0.2707150964812713, + "epoch": 0.27033919918388166, "grad_norm": 0.0, - "learning_rate": 1.7112214937794327e-05, - "loss": 0.9381, + "learning_rate": 1.712053970664363e-05, + "loss": 1.0188, "step": 9540 }, { - "epoch": 0.27074347332576615, + "epoch": 0.2703675366261441, "grad_norm": 0.0, - "learning_rate": 1.711156882529955e-05, - "loss": 1.0034, + "learning_rate": 1.7119895274111105e-05, + "loss": 0.9147, "step": 9541 }, { - "epoch": 0.27077185017026106, + "epoch": 0.2703958740684066, "grad_norm": 0.0, - "learning_rate": 1.7110922652733028e-05, - "loss": 0.9896, + "learning_rate": 1.711925078160506e-05, + "loss": 1.0182, "step": 9542 }, { - "epoch": 0.270800227014756, + "epoch": 0.27042421151066903, "grad_norm": 0.0, - "learning_rate": 1.7110276420100226e-05, - "loss": 0.9833, + "learning_rate": 1.7118606229130922e-05, + "loss": 1.0466, "step": 9543 }, { - "epoch": 0.27082860385925084, + "epoch": 0.2704525489529315, "grad_norm": 0.0, - "learning_rate": 1.71096301274066e-05, - "loss": 0.8908, + "learning_rate": 1.711796161669412e-05, + "loss": 0.9941, "step": 9544 }, { - "epoch": 0.27085698070374575, + "epoch": 0.27048088639519396, "grad_norm": 0.0, - "learning_rate": 1.710898377465761e-05, - "loss": 0.9146, + "learning_rate": 1.7117316944300082e-05, + "loss": 1.0206, "step": 9545 }, { - "epoch": 0.2708853575482406, + "epoch": 0.27050922383745646, "grad_norm": 0.0, - "learning_rate": 1.710833736185871e-05, - "loss": 0.9607, + "learning_rate": 1.7116672211954242e-05, + "loss": 1.0142, "step": 9546 }, { - "epoch": 0.27091373439273553, + "epoch": 0.2705375612797189, "grad_norm": 0.0, - "learning_rate": 1.7107690889015364e-05, - "loss": 1.0295, + "learning_rate": 1.711602741966203e-05, + "loss": 1.0853, "step": 9547 }, { - "epoch": 0.27094211123723044, + "epoch": 0.27056589872198133, "grad_norm": 0.0, - "learning_rate": 1.710704435613304e-05, - "loss": 1.1541, + "learning_rate": 1.7115382567428875e-05, + "loss": 0.979, "step": 9548 }, { - "epoch": 0.2709704880817253, + "epoch": 0.27059423616424383, "grad_norm": 0.0, - "learning_rate": 1.7106397763217185e-05, - "loss": 0.8927, + "learning_rate": 1.711473765526021e-05, + "loss": 0.922, "step": 9549 }, { - "epoch": 0.2709988649262202, + "epoch": 0.27062257360650627, "grad_norm": 0.0, - "learning_rate": 1.7105751110273275e-05, - "loss": 0.9812, + "learning_rate": 1.7114092683161468e-05, + "loss": 0.9898, "step": 9550 }, { - "epoch": 0.2710272417707151, + "epoch": 0.27065091104876876, "grad_norm": 0.0, - "learning_rate": 1.710510439730676e-05, - "loss": 1.0438, + "learning_rate": 1.7113447651138086e-05, + "loss": 0.9991, "step": 9551 }, { - "epoch": 0.27105561861521, + "epoch": 0.2706792484910312, "grad_norm": 0.0, - "learning_rate": 1.7104457624323118e-05, - "loss": 0.9377, + "learning_rate": 1.711280255919549e-05, + "loss": 1.0937, "step": 9552 }, { - "epoch": 0.2710839954597049, + "epoch": 0.27070758593329364, "grad_norm": 0.0, - "learning_rate": 1.7103810791327796e-05, - "loss": 1.0259, + "learning_rate": 1.7112157407339118e-05, + "loss": 0.8118, "step": 9553 }, { - "epoch": 0.27111237230419977, + "epoch": 0.27073592337555613, "grad_norm": 0.0, - "learning_rate": 1.7103163898326265e-05, - "loss": 0.9118, + "learning_rate": 1.7111512195574402e-05, + "loss": 0.985, "step": 9554 }, { - "epoch": 0.2711407491486947, + "epoch": 0.27076426081781857, "grad_norm": 0.0, - "learning_rate": 1.7102516945323994e-05, - "loss": 0.9479, + "learning_rate": 1.7110866923906774e-05, + "loss": 0.9939, "step": 9555 }, { - "epoch": 0.27116912599318954, + "epoch": 0.27079259826008106, "grad_norm": 0.0, - "learning_rate": 1.7101869932326436e-05, - "loss": 1.0839, + "learning_rate": 1.711022159234168e-05, + "loss": 0.9581, "step": 9556 }, { - "epoch": 0.27119750283768446, + "epoch": 0.2708209357023435, "grad_norm": 0.0, - "learning_rate": 1.7101222859339067e-05, - "loss": 0.9315, + "learning_rate": 1.7109576200884543e-05, + "loss": 1.0699, "step": 9557 }, { - "epoch": 0.2712258796821793, + "epoch": 0.270849273144606, "grad_norm": 0.0, - "learning_rate": 1.710057572636735e-05, - "loss": 0.9084, + "learning_rate": 1.7108930749540807e-05, + "loss": 0.9753, "step": 9558 }, { - "epoch": 0.27125425652667423, + "epoch": 0.27087761058686843, "grad_norm": 0.0, - "learning_rate": 1.709992853341675e-05, - "loss": 0.9327, + "learning_rate": 1.710828523831591e-05, + "loss": 0.9102, "step": 9559 }, { - "epoch": 0.27128263337116915, + "epoch": 0.27090594802913087, "grad_norm": 0.0, - "learning_rate": 1.7099281280492733e-05, - "loss": 0.9783, + "learning_rate": 1.7107639667215288e-05, + "loss": 1.0478, "step": 9560 }, { - "epoch": 0.271311010215664, + "epoch": 0.27093428547139337, "grad_norm": 0.0, - "learning_rate": 1.7098633967600772e-05, - "loss": 0.991, + "learning_rate": 1.7106994036244375e-05, + "loss": 0.9549, "step": 9561 }, { - "epoch": 0.2713393870601589, + "epoch": 0.2709626229136558, "grad_norm": 0.0, - "learning_rate": 1.7097986594746328e-05, - "loss": 1.1127, + "learning_rate": 1.710634834540861e-05, + "loss": 1.0015, "step": 9562 }, { - "epoch": 0.2713677639046538, + "epoch": 0.2709909603559183, "grad_norm": 0.0, - "learning_rate": 1.709733916193487e-05, - "loss": 1.0654, + "learning_rate": 1.7105702594713437e-05, + "loss": 1.0983, "step": 9563 }, { - "epoch": 0.2713961407491487, + "epoch": 0.27101929779818074, "grad_norm": 0.0, - "learning_rate": 1.7096691669171872e-05, - "loss": 1.0663, + "learning_rate": 1.7105056784164295e-05, + "loss": 1.0307, "step": 9564 }, { - "epoch": 0.2714245175936436, + "epoch": 0.2710476352404432, "grad_norm": 0.0, - "learning_rate": 1.70960441164628e-05, - "loss": 0.9347, + "learning_rate": 1.7104410913766617e-05, + "loss": 0.8746, "step": 9565 }, { - "epoch": 0.27145289443813847, + "epoch": 0.27107597268270567, "grad_norm": 0.0, - "learning_rate": 1.7095396503813125e-05, - "loss": 1.0025, + "learning_rate": 1.710376498352585e-05, + "loss": 0.9748, "step": 9566 }, { - "epoch": 0.2714812712826334, + "epoch": 0.2711043101249681, "grad_norm": 0.0, - "learning_rate": 1.7094748831228318e-05, - "loss": 0.9908, + "learning_rate": 1.7103118993447432e-05, + "loss": 0.931, "step": 9567 }, { - "epoch": 0.27150964812712824, + "epoch": 0.2711326475672306, "grad_norm": 0.0, - "learning_rate": 1.7094101098713845e-05, - "loss": 1.0004, + "learning_rate": 1.7102472943536805e-05, + "loss": 0.9438, "step": 9568 }, { - "epoch": 0.27153802497162316, + "epoch": 0.27116098500949304, "grad_norm": 0.0, - "learning_rate": 1.7093453306275184e-05, - "loss": 0.9997, + "learning_rate": 1.710182683379941e-05, + "loss": 0.9256, "step": 9569 }, { - "epoch": 0.2715664018161181, + "epoch": 0.2711893224517555, "grad_norm": 0.0, - "learning_rate": 1.70928054539178e-05, - "loss": 1.0668, + "learning_rate": 1.710118066424069e-05, + "loss": 1.0251, "step": 9570 }, { - "epoch": 0.27159477866061293, + "epoch": 0.27121765989401797, "grad_norm": 0.0, - "learning_rate": 1.7092157541647173e-05, - "loss": 0.9927, + "learning_rate": 1.710053443486609e-05, + "loss": 1.0428, "step": 9571 }, { - "epoch": 0.27162315550510785, + "epoch": 0.2712459973362804, "grad_norm": 0.0, - "learning_rate": 1.709150956946877e-05, - "loss": 0.8965, + "learning_rate": 1.709988814568105e-05, + "loss": 1.0149, "step": 9572 }, { - "epoch": 0.2716515323496027, + "epoch": 0.2712743347785429, "grad_norm": 0.0, - "learning_rate": 1.709086153738807e-05, - "loss": 0.9601, + "learning_rate": 1.709924179669102e-05, + "loss": 1.0177, "step": 9573 }, { - "epoch": 0.2716799091940976, + "epoch": 0.27130267222080534, "grad_norm": 0.0, - "learning_rate": 1.7090213445410544e-05, - "loss": 0.9347, + "learning_rate": 1.7098595387901434e-05, + "loss": 0.9836, "step": 9574 }, { - "epoch": 0.2717082860385925, + "epoch": 0.27133100966306783, "grad_norm": 0.0, - "learning_rate": 1.7089565293541664e-05, - "loss": 0.994, + "learning_rate": 1.709794891931775e-05, + "loss": 0.8844, "step": 9575 }, { - "epoch": 0.2717366628830874, + "epoch": 0.2713593471053303, "grad_norm": 0.0, - "learning_rate": 1.7088917081786908e-05, - "loss": 0.9246, + "learning_rate": 1.70973023909454e-05, + "loss": 1.0055, "step": 9576 }, { - "epoch": 0.2717650397275823, + "epoch": 0.2713876845475927, "grad_norm": 0.0, - "learning_rate": 1.708826881015175e-05, - "loss": 0.966, + "learning_rate": 1.709665580278984e-05, + "loss": 1.0219, "step": 9577 }, { - "epoch": 0.2717934165720772, + "epoch": 0.2714160219898552, "grad_norm": 0.0, - "learning_rate": 1.708762047864167e-05, - "loss": 0.9585, + "learning_rate": 1.7096009154856513e-05, + "loss": 0.9568, "step": 9578 }, { - "epoch": 0.2718217934165721, + "epoch": 0.27144435943211764, "grad_norm": 0.0, - "learning_rate": 1.7086972087262138e-05, - "loss": 0.9635, + "learning_rate": 1.7095362447150866e-05, + "loss": 0.8824, "step": 9579 }, { - "epoch": 0.27185017026106695, + "epoch": 0.27147269687438014, "grad_norm": 0.0, - "learning_rate": 1.7086323636018635e-05, - "loss": 0.8598, + "learning_rate": 1.709471567967834e-05, + "loss": 1.0629, "step": 9580 }, { - "epoch": 0.27187854710556186, + "epoch": 0.2715010343166426, "grad_norm": 0.0, - "learning_rate": 1.7085675124916638e-05, - "loss": 1.0648, + "learning_rate": 1.7094068852444395e-05, + "loss": 0.9251, "step": 9581 }, { - "epoch": 0.2719069239500568, + "epoch": 0.271529371758905, "grad_norm": 0.0, - "learning_rate": 1.7085026553961622e-05, - "loss": 1.067, + "learning_rate": 1.7093421965454474e-05, + "loss": 0.9435, "step": 9582 }, { - "epoch": 0.27193530079455164, + "epoch": 0.2715577092011675, "grad_norm": 0.0, - "learning_rate": 1.7084377923159074e-05, - "loss": 0.8547, + "learning_rate": 1.7092775018714026e-05, + "loss": 0.9229, "step": 9583 }, { - "epoch": 0.27196367763904655, + "epoch": 0.27158604664342995, "grad_norm": 0.0, - "learning_rate": 1.708372923251446e-05, - "loss": 0.873, + "learning_rate": 1.7092128012228498e-05, + "loss": 0.956, "step": 9584 }, { - "epoch": 0.2719920544835414, + "epoch": 0.27161438408569244, "grad_norm": 0.0, - "learning_rate": 1.708308048203327e-05, - "loss": 0.9227, + "learning_rate": 1.7091480946003342e-05, + "loss": 1.0987, "step": 9585 }, { - "epoch": 0.2720204313280363, + "epoch": 0.2716427215279549, "grad_norm": 0.0, - "learning_rate": 1.7082431671720982e-05, - "loss": 0.9596, + "learning_rate": 1.7090833820044014e-05, + "loss": 0.9137, "step": 9586 }, { - "epoch": 0.2720488081725312, + "epoch": 0.27167105897021737, "grad_norm": 0.0, - "learning_rate": 1.7081782801583074e-05, - "loss": 0.9713, + "learning_rate": 1.7090186634355954e-05, + "loss": 0.9516, "step": 9587 }, { - "epoch": 0.2720771850170261, + "epoch": 0.2716993964124798, "grad_norm": 0.0, - "learning_rate": 1.708113387162503e-05, - "loss": 1.0332, + "learning_rate": 1.708953938894462e-05, + "loss": 1.0004, "step": 9588 }, { - "epoch": 0.272105561861521, + "epoch": 0.27172773385474225, "grad_norm": 0.0, - "learning_rate": 1.708048488185233e-05, - "loss": 0.8727, + "learning_rate": 1.708889208381546e-05, + "loss": 0.9522, "step": 9589 }, { - "epoch": 0.2721339387060159, + "epoch": 0.27175607129700474, "grad_norm": 0.0, - "learning_rate": 1.7079835832270455e-05, - "loss": 0.9529, + "learning_rate": 1.7088244718973936e-05, + "loss": 1.0272, "step": 9590 }, { - "epoch": 0.2721623155505108, + "epoch": 0.2717844087392672, "grad_norm": 0.0, - "learning_rate": 1.7079186722884882e-05, - "loss": 0.9716, + "learning_rate": 1.7087597294425492e-05, + "loss": 0.922, "step": 9591 }, { - "epoch": 0.27219069239500565, + "epoch": 0.2718127461815297, "grad_norm": 0.0, - "learning_rate": 1.707853755370111e-05, - "loss": 1.0261, + "learning_rate": 1.7086949810175584e-05, + "loss": 0.9617, "step": 9592 }, { - "epoch": 0.27221906923950057, + "epoch": 0.2718410836237921, "grad_norm": 0.0, - "learning_rate": 1.707788832472461e-05, - "loss": 1.0426, + "learning_rate": 1.7086302266229663e-05, + "loss": 0.9144, "step": 9593 }, { - "epoch": 0.2722474460839955, + "epoch": 0.27186942106605455, "grad_norm": 0.0, - "learning_rate": 1.7077239035960868e-05, - "loss": 1.0351, + "learning_rate": 1.7085654662593192e-05, + "loss": 0.8985, "step": 9594 }, { - "epoch": 0.27227582292849034, + "epoch": 0.27189775850831704, "grad_norm": 0.0, - "learning_rate": 1.7076589687415374e-05, - "loss": 0.9715, + "learning_rate": 1.7085006999271615e-05, + "loss": 0.9667, "step": 9595 }, { - "epoch": 0.27230419977298526, + "epoch": 0.2719260959505795, "grad_norm": 0.0, - "learning_rate": 1.7075940279093602e-05, - "loss": 1.0213, + "learning_rate": 1.70843592762704e-05, + "loss": 0.9569, "step": 9596 }, { - "epoch": 0.2723325766174801, + "epoch": 0.271954433392842, "grad_norm": 0.0, - "learning_rate": 1.707529081100105e-05, - "loss": 0.9058, + "learning_rate": 1.708371149359499e-05, + "loss": 0.987, "step": 9597 }, { - "epoch": 0.27236095346197503, + "epoch": 0.2719827708351044, "grad_norm": 0.0, - "learning_rate": 1.7074641283143195e-05, - "loss": 0.9946, + "learning_rate": 1.708306365125085e-05, + "loss": 0.9592, "step": 9598 }, { - "epoch": 0.27238933030646995, + "epoch": 0.2720111082773669, "grad_norm": 0.0, - "learning_rate": 1.7073991695525528e-05, - "loss": 0.9721, + "learning_rate": 1.7082415749243436e-05, + "loss": 0.8364, "step": 9599 }, { - "epoch": 0.2724177071509648, + "epoch": 0.27203944571962935, "grad_norm": 0.0, - "learning_rate": 1.7073342048153535e-05, - "loss": 0.9218, + "learning_rate": 1.70817677875782e-05, + "loss": 0.9761, "step": 9600 }, { - "epoch": 0.2724460839954597, + "epoch": 0.2720677831618918, "grad_norm": 0.0, - "learning_rate": 1.7072692341032705e-05, - "loss": 1.0539, + "learning_rate": 1.7081119766260607e-05, + "loss": 0.8985, "step": 9601 }, { - "epoch": 0.2724744608399546, + "epoch": 0.2720961206041543, "grad_norm": 0.0, - "learning_rate": 1.7072042574168524e-05, - "loss": 0.971, + "learning_rate": 1.7080471685296113e-05, + "loss": 1.0474, "step": 9602 }, { - "epoch": 0.2725028376844495, + "epoch": 0.2721244580464167, "grad_norm": 0.0, - "learning_rate": 1.707139274756648e-05, - "loss": 0.9074, + "learning_rate": 1.7079823544690176e-05, + "loss": 0.9583, "step": 9603 }, { - "epoch": 0.27253121452894435, + "epoch": 0.2721527954886792, "grad_norm": 0.0, - "learning_rate": 1.7070742861232066e-05, - "loss": 1.0184, + "learning_rate": 1.7079175344448258e-05, + "loss": 0.8331, "step": 9604 }, { - "epoch": 0.27255959137343927, + "epoch": 0.27218113293094165, "grad_norm": 0.0, - "learning_rate": 1.7070092915170767e-05, - "loss": 1.0672, + "learning_rate": 1.7078527084575816e-05, + "loss": 0.9136, "step": 9605 }, { - "epoch": 0.2725879682179342, + "epoch": 0.2722094703732041, "grad_norm": 0.0, - "learning_rate": 1.706944290938808e-05, - "loss": 0.9492, + "learning_rate": 1.707787876507831e-05, + "loss": 0.9815, "step": 9606 }, { - "epoch": 0.27261634506242904, + "epoch": 0.2722378078154666, "grad_norm": 0.0, - "learning_rate": 1.7068792843889486e-05, - "loss": 0.9314, + "learning_rate": 1.7077230385961206e-05, + "loss": 1.0618, "step": 9607 }, { - "epoch": 0.27264472190692396, + "epoch": 0.272266145257729, "grad_norm": 0.0, - "learning_rate": 1.706814271868048e-05, - "loss": 0.8251, + "learning_rate": 1.707658194722996e-05, + "loss": 0.886, "step": 9608 }, { - "epoch": 0.2726730987514188, + "epoch": 0.2722944826999915, "grad_norm": 0.0, - "learning_rate": 1.706749253376656e-05, - "loss": 0.9783, + "learning_rate": 1.7075933448890037e-05, + "loss": 0.9668, "step": 9609 }, { - "epoch": 0.27270147559591373, + "epoch": 0.27232282014225395, "grad_norm": 0.0, - "learning_rate": 1.7066842289153208e-05, - "loss": 0.9225, + "learning_rate": 1.7075284890946898e-05, + "loss": 1.0355, "step": 9610 }, { - "epoch": 0.27272985244040865, + "epoch": 0.27235115758451645, "grad_norm": 0.0, - "learning_rate": 1.7066191984845924e-05, - "loss": 1.0684, + "learning_rate": 1.7074636273406012e-05, + "loss": 1.0378, "step": 9611 }, { - "epoch": 0.2727582292849035, + "epoch": 0.2723794950267789, "grad_norm": 0.0, - "learning_rate": 1.70655416208502e-05, - "loss": 0.995, + "learning_rate": 1.7073987596272828e-05, + "loss": 1.0596, "step": 9612 }, { - "epoch": 0.2727866061293984, + "epoch": 0.2724078324690413, "grad_norm": 0.0, - "learning_rate": 1.706489119717153e-05, - "loss": 1.0076, + "learning_rate": 1.7073338859552828e-05, + "loss": 1.0153, "step": 9613 }, { - "epoch": 0.2728149829738933, + "epoch": 0.2724361699113038, "grad_norm": 0.0, - "learning_rate": 1.70642407138154e-05, - "loss": 1.0728, + "learning_rate": 1.7072690063251466e-05, + "loss": 0.912, "step": 9614 }, { - "epoch": 0.2728433598183882, + "epoch": 0.27246450735356625, "grad_norm": 0.0, - "learning_rate": 1.7063590170787314e-05, - "loss": 1.0639, + "learning_rate": 1.7072041207374212e-05, + "loss": 1.0279, "step": 9615 }, { - "epoch": 0.2728717366628831, + "epoch": 0.27249284479582875, "grad_norm": 0.0, - "learning_rate": 1.7062939568092765e-05, - "loss": 1.0017, + "learning_rate": 1.7071392291926523e-05, + "loss": 1.0441, "step": 9616 }, { - "epoch": 0.272900113507378, + "epoch": 0.2725211822380912, "grad_norm": 0.0, - "learning_rate": 1.7062288905737245e-05, - "loss": 1.0122, + "learning_rate": 1.7070743316913874e-05, + "loss": 0.9631, "step": 9617 }, { - "epoch": 0.2729284903518729, + "epoch": 0.2725495196803536, "grad_norm": 0.0, - "learning_rate": 1.7061638183726254e-05, - "loss": 0.9161, + "learning_rate": 1.7070094282341727e-05, + "loss": 0.9351, "step": 9618 }, { - "epoch": 0.27295686719636775, + "epoch": 0.2725778571226161, "grad_norm": 0.0, - "learning_rate": 1.7060987402065292e-05, - "loss": 1.0642, + "learning_rate": 1.706944518821555e-05, + "loss": 1.0563, "step": 9619 }, { - "epoch": 0.27298524404086266, + "epoch": 0.27260619456487856, "grad_norm": 0.0, - "learning_rate": 1.7060336560759848e-05, - "loss": 1.0485, + "learning_rate": 1.706879603454081e-05, + "loss": 1.0453, "step": 9620 }, { - "epoch": 0.2730136208853575, + "epoch": 0.27263453200714105, "grad_norm": 0.0, - "learning_rate": 1.7059685659815424e-05, - "loss": 0.8932, + "learning_rate": 1.7068146821322983e-05, + "loss": 0.9458, "step": 9621 }, { - "epoch": 0.27304199772985244, + "epoch": 0.2726628694494035, "grad_norm": 0.0, - "learning_rate": 1.7059034699237523e-05, - "loss": 1.0124, + "learning_rate": 1.7067497548567523e-05, + "loss": 0.9198, "step": 9622 }, { - "epoch": 0.27307037457434735, + "epoch": 0.272691206891666, "grad_norm": 0.0, - "learning_rate": 1.7058383679031632e-05, - "loss": 0.9811, + "learning_rate": 1.706684821627991e-05, + "loss": 0.8624, "step": 9623 }, { - "epoch": 0.2730987514188422, + "epoch": 0.2727195443339284, "grad_norm": 0.0, - "learning_rate": 1.7057732599203262e-05, - "loss": 0.9754, + "learning_rate": 1.706619882446561e-05, + "loss": 1.0677, "step": 9624 }, { - "epoch": 0.2731271282633371, + "epoch": 0.27274788177619086, "grad_norm": 0.0, - "learning_rate": 1.7057081459757904e-05, - "loss": 0.9168, + "learning_rate": 1.7065549373130094e-05, + "loss": 1.0764, "step": 9625 }, { - "epoch": 0.273155505107832, + "epoch": 0.27277621921845335, "grad_norm": 0.0, - "learning_rate": 1.705643026070106e-05, - "loss": 1.0309, + "learning_rate": 1.7064899862278833e-05, + "loss": 0.9956, "step": 9626 }, { - "epoch": 0.2731838819523269, + "epoch": 0.2728045566607158, "grad_norm": 0.0, - "learning_rate": 1.7055779002038235e-05, - "loss": 0.939, + "learning_rate": 1.7064250291917293e-05, + "loss": 0.9635, "step": 9627 }, { - "epoch": 0.2732122587968218, + "epoch": 0.2728328941029783, "grad_norm": 0.0, - "learning_rate": 1.705512768377493e-05, - "loss": 1.0179, + "learning_rate": 1.7063600662050954e-05, + "loss": 0.8943, "step": 9628 }, { - "epoch": 0.2732406356413167, + "epoch": 0.2728612315452407, "grad_norm": 0.0, - "learning_rate": 1.7054476305916644e-05, - "loss": 0.9008, + "learning_rate": 1.706295097268528e-05, + "loss": 0.9091, "step": 9629 }, { - "epoch": 0.2732690124858116, + "epoch": 0.27288956898750316, "grad_norm": 0.0, - "learning_rate": 1.7053824868468876e-05, - "loss": 0.9011, + "learning_rate": 1.706230122382575e-05, + "loss": 0.9797, "step": 9630 }, { - "epoch": 0.27329738933030645, + "epoch": 0.27291790642976566, "grad_norm": 0.0, - "learning_rate": 1.705317337143713e-05, - "loss": 0.9196, + "learning_rate": 1.7061651415477832e-05, + "loss": 1.0614, "step": 9631 }, { - "epoch": 0.27332576617480137, + "epoch": 0.2729462438720281, "grad_norm": 0.0, - "learning_rate": 1.7052521814826915e-05, - "loss": 0.9648, + "learning_rate": 1.7061001547647e-05, + "loss": 0.9133, "step": 9632 }, { - "epoch": 0.2733541430192963, + "epoch": 0.2729745813142906, "grad_norm": 0.0, - "learning_rate": 1.7051870198643734e-05, - "loss": 0.9893, + "learning_rate": 1.7060351620338734e-05, + "loss": 0.9574, "step": 9633 }, { - "epoch": 0.27338251986379114, + "epoch": 0.273002918756553, "grad_norm": 0.0, - "learning_rate": 1.7051218522893086e-05, - "loss": 1.0893, + "learning_rate": 1.7059701633558505e-05, + "loss": 0.9402, "step": 9634 }, { - "epoch": 0.27341089670828606, + "epoch": 0.2730312561988155, "grad_norm": 0.0, - "learning_rate": 1.7050566787580477e-05, - "loss": 1.0053, + "learning_rate": 1.7059051587311785e-05, + "loss": 1.018, "step": 9635 }, { - "epoch": 0.2734392735527809, + "epoch": 0.27305959364107796, "grad_norm": 0.0, - "learning_rate": 1.7049914992711412e-05, - "loss": 0.9132, + "learning_rate": 1.7058401481604054e-05, + "loss": 1.0326, "step": 9636 }, { - "epoch": 0.27346765039727583, + "epoch": 0.2730879310833404, "grad_norm": 0.0, - "learning_rate": 1.70492631382914e-05, - "loss": 1.1185, + "learning_rate": 1.7057751316440786e-05, + "loss": 0.968, "step": 9637 }, { - "epoch": 0.2734960272417707, + "epoch": 0.2731162685256029, "grad_norm": 0.0, - "learning_rate": 1.7048611224325947e-05, - "loss": 0.9412, + "learning_rate": 1.7057101091827454e-05, + "loss": 1.024, "step": 9638 }, { - "epoch": 0.2735244040862656, + "epoch": 0.27314460596786533, "grad_norm": 0.0, - "learning_rate": 1.704795925082056e-05, - "loss": 1.0093, + "learning_rate": 1.7056450807769543e-05, + "loss": 1.0468, "step": 9639 }, { - "epoch": 0.2735527809307605, + "epoch": 0.2731729434101278, "grad_norm": 0.0, - "learning_rate": 1.7047307217780737e-05, - "loss": 0.9076, + "learning_rate": 1.705580046427252e-05, + "loss": 1.0199, "step": 9640 }, { - "epoch": 0.2735811577752554, + "epoch": 0.27320128085239026, "grad_norm": 0.0, - "learning_rate": 1.7046655125211996e-05, - "loss": 0.9507, + "learning_rate": 1.7055150061341878e-05, + "loss": 0.9661, "step": 9641 }, { - "epoch": 0.2736095346197503, + "epoch": 0.2732296182946527, "grad_norm": 0.0, - "learning_rate": 1.7046002973119847e-05, - "loss": 0.919, + "learning_rate": 1.7054499598983084e-05, + "loss": 0.8853, "step": 9642 }, { - "epoch": 0.27363791146424515, + "epoch": 0.2732579557369152, "grad_norm": 0.0, - "learning_rate": 1.704535076150979e-05, - "loss": 0.9823, + "learning_rate": 1.7053849077201622e-05, + "loss": 1.0159, "step": 9643 }, { - "epoch": 0.27366628830874007, + "epoch": 0.27328629317917763, "grad_norm": 0.0, - "learning_rate": 1.704469849038734e-05, - "loss": 0.9163, + "learning_rate": 1.7053198496002967e-05, + "loss": 0.9005, "step": 9644 }, { - "epoch": 0.273694665153235, + "epoch": 0.2733146306214401, "grad_norm": 0.0, - "learning_rate": 1.7044046159758004e-05, - "loss": 0.9311, + "learning_rate": 1.7052547855392605e-05, + "loss": 1.0117, "step": 9645 }, { - "epoch": 0.27372304199772984, + "epoch": 0.27334296806370256, "grad_norm": 0.0, - "learning_rate": 1.7043393769627293e-05, - "loss": 0.995, + "learning_rate": 1.705189715537601e-05, + "loss": 0.9959, "step": 9646 }, { - "epoch": 0.27375141884222476, + "epoch": 0.27337130550596506, "grad_norm": 0.0, - "learning_rate": 1.704274132000072e-05, - "loss": 0.9429, + "learning_rate": 1.7051246395958666e-05, + "loss": 0.9317, "step": 9647 }, { - "epoch": 0.2737797956867196, + "epoch": 0.2733996429482275, "grad_norm": 0.0, - "learning_rate": 1.704208881088379e-05, - "loss": 1.0209, + "learning_rate": 1.705059557714606e-05, + "loss": 1.0094, "step": 9648 }, { - "epoch": 0.27380817253121453, + "epoch": 0.27342798039048993, "grad_norm": 0.0, - "learning_rate": 1.704143624228203e-05, - "loss": 0.9317, + "learning_rate": 1.7049944698943668e-05, + "loss": 0.9767, "step": 9649 }, { - "epoch": 0.27383654937570945, + "epoch": 0.2734563178327524, "grad_norm": 0.0, - "learning_rate": 1.7040783614200932e-05, - "loss": 0.9752, + "learning_rate": 1.7049293761356968e-05, + "loss": 0.994, "step": 9650 }, { - "epoch": 0.2738649262202043, + "epoch": 0.27348465527501487, "grad_norm": 0.0, - "learning_rate": 1.7040130926646023e-05, - "loss": 0.995, + "learning_rate": 1.7048642764391456e-05, + "loss": 0.9624, "step": 9651 }, { - "epoch": 0.2738933030646992, + "epoch": 0.27351299271727736, "grad_norm": 0.0, - "learning_rate": 1.7039478179622804e-05, - "loss": 0.934, + "learning_rate": 1.7047991708052607e-05, + "loss": 0.8527, "step": 9652 }, { - "epoch": 0.2739216799091941, + "epoch": 0.2735413301595398, "grad_norm": 0.0, - "learning_rate": 1.7038825373136806e-05, - "loss": 0.8797, + "learning_rate": 1.7047340592345903e-05, + "loss": 1.0499, "step": 9653 }, { - "epoch": 0.273950056753689, + "epoch": 0.27356966760180224, "grad_norm": 0.0, - "learning_rate": 1.7038172507193526e-05, - "loss": 0.8624, + "learning_rate": 1.7046689417276836e-05, + "loss": 1.0645, "step": 9654 }, { - "epoch": 0.27397843359818386, + "epoch": 0.27359800504406473, "grad_norm": 0.0, - "learning_rate": 1.703751958179849e-05, - "loss": 0.9074, + "learning_rate": 1.7046038182850886e-05, + "loss": 0.9857, "step": 9655 }, { - "epoch": 0.27400681044267877, + "epoch": 0.27362634248632717, "grad_norm": 0.0, - "learning_rate": 1.703686659695721e-05, - "loss": 0.9505, + "learning_rate": 1.704538688907354e-05, + "loss": 1.0329, "step": 9656 }, { - "epoch": 0.2740351872871737, + "epoch": 0.27365467992858966, "grad_norm": 0.0, - "learning_rate": 1.7036213552675202e-05, - "loss": 0.9815, + "learning_rate": 1.7044735535950284e-05, + "loss": 0.9702, "step": 9657 }, { - "epoch": 0.27406356413166855, + "epoch": 0.2736830173708521, "grad_norm": 0.0, - "learning_rate": 1.703556044895798e-05, - "loss": 0.9696, + "learning_rate": 1.7044084123486604e-05, + "loss": 1.0047, "step": 9658 }, { - "epoch": 0.27409194097616346, + "epoch": 0.2737113548131146, "grad_norm": 0.0, - "learning_rate": 1.7034907285811062e-05, - "loss": 0.9706, + "learning_rate": 1.7043432651687987e-05, + "loss": 0.961, "step": 9659 }, { - "epoch": 0.2741203178206583, + "epoch": 0.27373969225537703, "grad_norm": 0.0, - "learning_rate": 1.703425406323997e-05, - "loss": 1.033, + "learning_rate": 1.7042781120559924e-05, + "loss": 0.9685, "step": 9660 }, { - "epoch": 0.27414869466515324, + "epoch": 0.27376802969763947, "grad_norm": 0.0, - "learning_rate": 1.7033600781250213e-05, - "loss": 0.9025, + "learning_rate": 1.70421295301079e-05, + "loss": 0.9828, "step": 9661 }, { - "epoch": 0.27417707150964815, + "epoch": 0.27379636713990196, "grad_norm": 0.0, - "learning_rate": 1.7032947439847316e-05, - "loss": 1.0637, + "learning_rate": 1.7041477880337405e-05, + "loss": 0.9021, "step": 9662 }, { - "epoch": 0.274205448354143, + "epoch": 0.2738247045821644, "grad_norm": 0.0, - "learning_rate": 1.7032294039036797e-05, - "loss": 1.0247, + "learning_rate": 1.7040826171253923e-05, + "loss": 0.8623, "step": 9663 }, { - "epoch": 0.2742338251986379, + "epoch": 0.2738530420244269, "grad_norm": 0.0, - "learning_rate": 1.703164057882417e-05, - "loss": 0.9498, + "learning_rate": 1.704017440286295e-05, + "loss": 0.9201, "step": 9664 }, { - "epoch": 0.2742622020431328, + "epoch": 0.27388137946668933, "grad_norm": 0.0, - "learning_rate": 1.7030987059214963e-05, - "loss": 0.9084, + "learning_rate": 1.7039522575169973e-05, + "loss": 0.8734, "step": 9665 }, { - "epoch": 0.2742905788876277, + "epoch": 0.2739097169089518, "grad_norm": 0.0, - "learning_rate": 1.7030333480214693e-05, - "loss": 0.9756, + "learning_rate": 1.7038870688180485e-05, + "loss": 0.9926, "step": 9666 }, { - "epoch": 0.27431895573212256, + "epoch": 0.27393805435121427, "grad_norm": 0.0, - "learning_rate": 1.7029679841828878e-05, - "loss": 0.9137, + "learning_rate": 1.703821874189997e-05, + "loss": 0.968, "step": 9667 }, { - "epoch": 0.2743473325766175, + "epoch": 0.2739663917934767, "grad_norm": 0.0, - "learning_rate": 1.7029026144063042e-05, - "loss": 0.9093, + "learning_rate": 1.703756673633393e-05, + "loss": 1.0175, "step": 9668 }, { - "epoch": 0.2743757094211124, + "epoch": 0.2739947292357392, "grad_norm": 0.0, - "learning_rate": 1.7028372386922703e-05, - "loss": 1.0598, + "learning_rate": 1.7036914671487854e-05, + "loss": 0.9053, "step": 9669 }, { - "epoch": 0.27440408626560725, + "epoch": 0.27402306667800164, "grad_norm": 0.0, - "learning_rate": 1.7027718570413392e-05, - "loss": 0.9501, + "learning_rate": 1.703626254736723e-05, + "loss": 0.89, "step": 9670 }, { - "epoch": 0.27443246311010216, + "epoch": 0.27405140412026413, "grad_norm": 0.0, - "learning_rate": 1.7027064694540623e-05, - "loss": 1.0735, + "learning_rate": 1.703561036397755e-05, + "loss": 0.9532, "step": 9671 }, { - "epoch": 0.274460839954597, + "epoch": 0.27407974156252657, "grad_norm": 0.0, - "learning_rate": 1.702641075930992e-05, - "loss": 0.9226, + "learning_rate": 1.7034958121324314e-05, + "loss": 0.89, "step": 9672 }, { - "epoch": 0.27448921679909194, + "epoch": 0.274108079004789, "grad_norm": 0.0, - "learning_rate": 1.7025756764726815e-05, - "loss": 1.0083, + "learning_rate": 1.7034305819413016e-05, + "loss": 1.058, "step": 9673 }, { - "epoch": 0.27451759364358685, + "epoch": 0.2741364164470515, "grad_norm": 0.0, - "learning_rate": 1.7025102710796825e-05, - "loss": 0.9807, + "learning_rate": 1.7033653458249145e-05, + "loss": 1.0093, "step": 9674 }, { - "epoch": 0.2745459704880817, + "epoch": 0.27416475388931394, "grad_norm": 0.0, - "learning_rate": 1.7024448597525478e-05, - "loss": 1.0627, + "learning_rate": 1.70330010378382e-05, + "loss": 0.9196, "step": 9675 }, { - "epoch": 0.27457434733257663, + "epoch": 0.27419309133157643, "grad_norm": 0.0, - "learning_rate": 1.7023794424918298e-05, - "loss": 0.94, + "learning_rate": 1.7032348558185674e-05, + "loss": 1.0695, "step": 9676 }, { - "epoch": 0.2746027241770715, + "epoch": 0.27422142877383887, "grad_norm": 0.0, - "learning_rate": 1.7023140192980806e-05, - "loss": 0.9567, + "learning_rate": 1.7031696019297065e-05, + "loss": 0.9321, "step": 9677 }, { - "epoch": 0.2746311010215664, + "epoch": 0.2742497662161013, "grad_norm": 0.0, - "learning_rate": 1.7022485901718533e-05, - "loss": 0.9915, + "learning_rate": 1.7031043421177874e-05, + "loss": 0.8084, "step": 9678 }, { - "epoch": 0.2746594778660613, + "epoch": 0.2742781036583638, "grad_norm": 0.0, - "learning_rate": 1.702183155113701e-05, - "loss": 0.9929, + "learning_rate": 1.7030390763833588e-05, + "loss": 0.9036, "step": 9679 }, { - "epoch": 0.2746878547105562, + "epoch": 0.27430644110062624, "grad_norm": 0.0, - "learning_rate": 1.7021177141241757e-05, - "loss": 1.0731, + "learning_rate": 1.702973804726971e-05, + "loss": 0.9774, "step": 9680 }, { - "epoch": 0.2747162315550511, + "epoch": 0.27433477854288874, "grad_norm": 0.0, - "learning_rate": 1.702052267203831e-05, - "loss": 1.0367, + "learning_rate": 1.702908527149174e-05, + "loss": 0.9617, "step": 9681 }, { - "epoch": 0.27474460839954595, + "epoch": 0.2743631159851512, "grad_norm": 0.0, - "learning_rate": 1.701986814353219e-05, - "loss": 0.9193, + "learning_rate": 1.7028432436505177e-05, + "loss": 1.0587, "step": 9682 }, { - "epoch": 0.27477298524404087, + "epoch": 0.27439145342741367, "grad_norm": 0.0, - "learning_rate": 1.7019213555728925e-05, - "loss": 0.9995, + "learning_rate": 1.7027779542315513e-05, + "loss": 0.8618, "step": 9683 }, { - "epoch": 0.27480136208853573, + "epoch": 0.2744197908696761, "grad_norm": 0.0, - "learning_rate": 1.701855890863405e-05, - "loss": 1.0267, + "learning_rate": 1.7027126588928255e-05, + "loss": 1.0291, "step": 9684 }, { - "epoch": 0.27482973893303064, + "epoch": 0.27444812831193854, "grad_norm": 0.0, - "learning_rate": 1.7017904202253093e-05, - "loss": 0.9977, + "learning_rate": 1.70264735763489e-05, + "loss": 0.9894, "step": 9685 }, { - "epoch": 0.27485811577752556, + "epoch": 0.27447646575420104, "grad_norm": 0.0, - "learning_rate": 1.7017249436591584e-05, - "loss": 0.9789, + "learning_rate": 1.702582050458295e-05, + "loss": 0.9485, "step": 9686 }, { - "epoch": 0.2748864926220204, + "epoch": 0.2745048031964635, "grad_norm": 0.0, - "learning_rate": 1.7016594611655054e-05, - "loss": 0.9973, + "learning_rate": 1.7025167373635903e-05, + "loss": 1.0901, "step": 9687 }, { - "epoch": 0.27491486946651533, + "epoch": 0.27453314063872597, "grad_norm": 0.0, - "learning_rate": 1.7015939727449033e-05, - "loss": 0.8649, + "learning_rate": 1.702451418351326e-05, + "loss": 0.949, "step": 9688 }, { - "epoch": 0.2749432463110102, + "epoch": 0.2745614780809884, "grad_norm": 0.0, - "learning_rate": 1.701528478397905e-05, - "loss": 1.0213, + "learning_rate": 1.702386093422053e-05, + "loss": 0.9721, "step": 9689 }, { - "epoch": 0.2749716231555051, + "epoch": 0.27458981552325085, "grad_norm": 0.0, - "learning_rate": 1.7014629781250648e-05, - "loss": 0.9687, + "learning_rate": 1.702320762576321e-05, + "loss": 0.9574, "step": 9690 }, { - "epoch": 0.275, + "epoch": 0.27461815296551334, "grad_norm": 0.0, - "learning_rate": 1.701397471926935e-05, - "loss": 1.0318, + "learning_rate": 1.7022554258146802e-05, + "loss": 1.051, "step": 9691 }, { - "epoch": 0.2750283768444949, + "epoch": 0.2746464904077758, "grad_norm": 0.0, - "learning_rate": 1.701331959804069e-05, - "loss": 0.9601, + "learning_rate": 1.702190083137681e-05, + "loss": 0.9924, "step": 9692 }, { - "epoch": 0.2750567536889898, + "epoch": 0.2746748278500383, "grad_norm": 0.0, - "learning_rate": 1.7012664417570206e-05, - "loss": 0.847, + "learning_rate": 1.7021247345458746e-05, + "loss": 1.0335, "step": 9693 }, { - "epoch": 0.27508513053348466, + "epoch": 0.2747031652923007, "grad_norm": 0.0, - "learning_rate": 1.701200917786343e-05, - "loss": 1.0306, + "learning_rate": 1.7020593800398107e-05, + "loss": 0.9764, "step": 9694 }, { - "epoch": 0.27511350737797957, + "epoch": 0.2747315027345632, "grad_norm": 0.0, - "learning_rate": 1.70113538789259e-05, - "loss": 0.9854, + "learning_rate": 1.70199401962004e-05, + "loss": 0.9548, "step": 9695 }, { - "epoch": 0.2751418842224745, + "epoch": 0.27475984017682564, "grad_norm": 0.0, - "learning_rate": 1.7010698520763145e-05, - "loss": 0.9968, + "learning_rate": 1.7019286532871124e-05, + "loss": 0.9708, "step": 9696 }, { - "epoch": 0.27517026106696935, + "epoch": 0.2747881776190881, "grad_norm": 0.0, - "learning_rate": 1.7010043103380706e-05, - "loss": 0.9409, + "learning_rate": 1.7018632810415795e-05, + "loss": 0.8768, "step": 9697 }, { - "epoch": 0.27519863791146426, + "epoch": 0.2748165150613506, "grad_norm": 0.0, - "learning_rate": 1.7009387626784118e-05, - "loss": 0.9733, + "learning_rate": 1.7017979028839918e-05, + "loss": 0.9685, "step": 9698 }, { - "epoch": 0.2752270147559591, + "epoch": 0.274844852503613, "grad_norm": 0.0, - "learning_rate": 1.7008732090978917e-05, - "loss": 1.0019, + "learning_rate": 1.701732518814899e-05, + "loss": 1.0337, "step": 9699 }, { - "epoch": 0.27525539160045404, + "epoch": 0.2748731899458755, "grad_norm": 0.0, - "learning_rate": 1.700807649597064e-05, - "loss": 0.9865, + "learning_rate": 1.701667128834853e-05, + "loss": 0.9363, "step": 9700 }, { - "epoch": 0.2752837684449489, + "epoch": 0.27490152738813795, "grad_norm": 0.0, - "learning_rate": 1.7007420841764826e-05, - "loss": 0.9975, + "learning_rate": 1.7016017329444047e-05, + "loss": 0.8394, "step": 9701 }, { - "epoch": 0.2753121452894438, + "epoch": 0.2749298648304004, "grad_norm": 0.0, - "learning_rate": 1.7006765128367012e-05, - "loss": 0.9818, + "learning_rate": 1.701536331144104e-05, + "loss": 1.0296, "step": 9702 }, { - "epoch": 0.2753405221339387, + "epoch": 0.2749582022726629, "grad_norm": 0.0, - "learning_rate": 1.7006109355782743e-05, - "loss": 1.0243, + "learning_rate": 1.7014709234345024e-05, + "loss": 0.9156, "step": 9703 }, { - "epoch": 0.2753688989784336, + "epoch": 0.2749865397149253, "grad_norm": 0.0, - "learning_rate": 1.700545352401755e-05, - "loss": 0.9428, + "learning_rate": 1.7014055098161507e-05, + "loss": 1.0363, "step": 9704 }, { - "epoch": 0.2753972758229285, + "epoch": 0.2750148771571878, "grad_norm": 0.0, - "learning_rate": 1.7004797633076977e-05, - "loss": 0.9011, + "learning_rate": 1.7013400902896e-05, + "loss": 1.0359, "step": 9705 }, { - "epoch": 0.27542565266742336, + "epoch": 0.27504321459945025, "grad_norm": 0.0, - "learning_rate": 1.7004141682966563e-05, - "loss": 0.9641, + "learning_rate": 1.7012746648554008e-05, + "loss": 1.1619, "step": 9706 }, { - "epoch": 0.2754540295119183, + "epoch": 0.27507155204171274, "grad_norm": 0.0, - "learning_rate": 1.7003485673691845e-05, - "loss": 0.8766, + "learning_rate": 1.701209233514105e-05, + "loss": 0.9706, "step": 9707 }, { - "epoch": 0.2754824063564132, + "epoch": 0.2750998894839752, "grad_norm": 0.0, - "learning_rate": 1.7002829605258372e-05, - "loss": 1.0045, + "learning_rate": 1.7011437962662637e-05, + "loss": 1.0015, "step": 9708 }, { - "epoch": 0.27551078320090805, + "epoch": 0.2751282269262376, "grad_norm": 0.0, - "learning_rate": 1.7002173477671685e-05, - "loss": 0.8654, + "learning_rate": 1.7010783531124278e-05, + "loss": 1.0175, "step": 9709 }, { - "epoch": 0.27553916004540296, + "epoch": 0.2751565643685001, "grad_norm": 0.0, - "learning_rate": 1.7001517290937325e-05, - "loss": 1.0092, + "learning_rate": 1.7010129040531483e-05, + "loss": 1.0142, "step": 9710 }, { - "epoch": 0.2755675368898978, + "epoch": 0.27518490181076255, "grad_norm": 0.0, - "learning_rate": 1.700086104506083e-05, - "loss": 0.9746, + "learning_rate": 1.700947449088977e-05, + "loss": 0.9422, "step": 9711 }, { - "epoch": 0.27559591373439274, + "epoch": 0.27521323925302504, "grad_norm": 0.0, - "learning_rate": 1.700020474004775e-05, - "loss": 1.0514, + "learning_rate": 1.700881988220465e-05, + "loss": 0.9286, "step": 9712 }, { - "epoch": 0.27562429057888765, + "epoch": 0.2752415766952875, "grad_norm": 0.0, - "learning_rate": 1.699954837590362e-05, - "loss": 0.8964, + "learning_rate": 1.7008165214481636e-05, + "loss": 0.9989, "step": 9713 }, { - "epoch": 0.2756526674233825, + "epoch": 0.2752699141375499, "grad_norm": 0.0, - "learning_rate": 1.6998891952633994e-05, - "loss": 0.8891, + "learning_rate": 1.7007510487726247e-05, + "loss": 0.9358, "step": 9714 }, { - "epoch": 0.27568104426787743, + "epoch": 0.2752982515798124, "grad_norm": 0.0, - "learning_rate": 1.6998235470244413e-05, - "loss": 0.947, + "learning_rate": 1.7006855701943994e-05, + "loss": 0.9474, "step": 9715 }, { - "epoch": 0.2757094211123723, + "epoch": 0.27532658902207485, "grad_norm": 0.0, - "learning_rate": 1.6997578928740423e-05, - "loss": 1.0577, + "learning_rate": 1.7006200857140395e-05, + "loss": 1.0801, "step": 9716 }, { - "epoch": 0.2757377979568672, + "epoch": 0.27535492646433735, "grad_norm": 0.0, - "learning_rate": 1.699692232812757e-05, - "loss": 0.9694, + "learning_rate": 1.700554595332096e-05, + "loss": 0.8201, "step": 9717 }, { - "epoch": 0.27576617480136206, + "epoch": 0.2753832639065998, "grad_norm": 0.0, - "learning_rate": 1.6996265668411398e-05, - "loss": 0.9648, + "learning_rate": 1.700489099049121e-05, + "loss": 0.955, "step": 9718 }, { - "epoch": 0.275794551645857, + "epoch": 0.2754116013488623, "grad_norm": 0.0, - "learning_rate": 1.699560894959746e-05, - "loss": 0.9577, + "learning_rate": 1.7004235968656665e-05, + "loss": 1.06, "step": 9719 }, { - "epoch": 0.2758229284903519, + "epoch": 0.2754399387911247, "grad_norm": 0.0, - "learning_rate": 1.6994952171691293e-05, - "loss": 1.0157, + "learning_rate": 1.7003580887822838e-05, + "loss": 0.8762, "step": 9720 }, { - "epoch": 0.27585130533484675, + "epoch": 0.27546827623338715, "grad_norm": 0.0, - "learning_rate": 1.6994295334698453e-05, - "loss": 1.0726, + "learning_rate": 1.700292574799525e-05, + "loss": 0.9495, "step": 9721 }, { - "epoch": 0.27587968217934167, + "epoch": 0.27549661367564965, "grad_norm": 0.0, - "learning_rate": 1.6993638438624485e-05, - "loss": 1.0064, + "learning_rate": 1.7002270549179418e-05, + "loss": 0.9269, "step": 9722 }, { - "epoch": 0.2759080590238365, + "epoch": 0.2755249511179121, "grad_norm": 0.0, - "learning_rate": 1.6992981483474934e-05, - "loss": 0.9893, + "learning_rate": 1.700161529138086e-05, + "loss": 0.9121, "step": 9723 }, { - "epoch": 0.27593643586833144, + "epoch": 0.2755532885601746, "grad_norm": 0.0, - "learning_rate": 1.699232446925536e-05, - "loss": 0.9848, + "learning_rate": 1.7000959974605094e-05, + "loss": 0.9446, "step": 9724 }, { - "epoch": 0.27596481271282636, + "epoch": 0.275581626002437, "grad_norm": 0.0, - "learning_rate": 1.6991667395971306e-05, - "loss": 1.009, + "learning_rate": 1.7000304598857645e-05, + "loss": 1.0347, "step": 9725 }, { - "epoch": 0.2759931895573212, + "epoch": 0.27560996344469946, "grad_norm": 0.0, - "learning_rate": 1.6991010263628323e-05, - "loss": 1.0769, + "learning_rate": 1.699964916414403e-05, + "loss": 0.9194, "step": 9726 }, { - "epoch": 0.27602156640181613, + "epoch": 0.27563830088696195, "grad_norm": 0.0, - "learning_rate": 1.699035307223196e-05, - "loss": 0.9579, + "learning_rate": 1.699899367046978e-05, + "loss": 0.9609, "step": 9727 }, { - "epoch": 0.276049943246311, + "epoch": 0.2756666383292244, "grad_norm": 0.0, - "learning_rate": 1.698969582178777e-05, - "loss": 1.0547, + "learning_rate": 1.6998338117840396e-05, + "loss": 0.8925, "step": 9728 }, { - "epoch": 0.2760783200908059, + "epoch": 0.2756949757714869, "grad_norm": 0.0, - "learning_rate": 1.6989038512301312e-05, - "loss": 0.9896, + "learning_rate": 1.699768250626141e-05, + "loss": 0.9395, "step": 9729 }, { - "epoch": 0.2761066969353008, + "epoch": 0.2757233132137493, "grad_norm": 0.0, - "learning_rate": 1.6988381143778127e-05, - "loss": 0.893, + "learning_rate": 1.6997026835738354e-05, + "loss": 0.9599, "step": 9730 }, { - "epoch": 0.2761350737797957, + "epoch": 0.2757516506560118, "grad_norm": 0.0, - "learning_rate": 1.698772371622377e-05, - "loss": 0.9482, + "learning_rate": 1.6996371106276735e-05, + "loss": 0.944, "step": 9731 }, { - "epoch": 0.2761634506242906, + "epoch": 0.27577998809827425, "grad_norm": 0.0, - "learning_rate": 1.6987066229643797e-05, - "loss": 1.0653, + "learning_rate": 1.699571531788209e-05, + "loss": 0.9902, "step": 9732 }, { - "epoch": 0.27619182746878546, + "epoch": 0.2758083255405367, "grad_norm": 0.0, - "learning_rate": 1.6986408684043766e-05, - "loss": 0.9683, + "learning_rate": 1.6995059470559935e-05, + "loss": 1.0069, "step": 9733 }, { - "epoch": 0.27622020431328037, + "epoch": 0.2758366629827992, "grad_norm": 0.0, - "learning_rate": 1.6985751079429226e-05, - "loss": 0.9627, + "learning_rate": 1.6994403564315795e-05, + "loss": 0.9263, "step": 9734 }, { - "epoch": 0.27624858115777523, + "epoch": 0.2758650004250616, "grad_norm": 0.0, - "learning_rate": 1.6985093415805728e-05, - "loss": 0.9644, + "learning_rate": 1.6993747599155198e-05, + "loss": 0.9659, "step": 9735 }, { - "epoch": 0.27627695800227015, + "epoch": 0.2758933378673241, "grad_norm": 0.0, - "learning_rate": 1.6984435693178837e-05, - "loss": 0.9625, + "learning_rate": 1.699309157508367e-05, + "loss": 1.055, "step": 9736 }, { - "epoch": 0.27630533484676506, + "epoch": 0.27592167530958656, "grad_norm": 0.0, - "learning_rate": 1.6983777911554102e-05, - "loss": 0.9298, + "learning_rate": 1.6992435492106728e-05, + "loss": 0.9173, "step": 9737 }, { - "epoch": 0.2763337116912599, + "epoch": 0.275950012751849, "grad_norm": 0.0, - "learning_rate": 1.698312007093708e-05, - "loss": 0.9477, + "learning_rate": 1.6991779350229914e-05, + "loss": 1.0334, "step": 9738 }, { - "epoch": 0.27636208853575484, + "epoch": 0.2759783501941115, "grad_norm": 0.0, - "learning_rate": 1.6982462171333327e-05, - "loss": 0.9716, + "learning_rate": 1.699112314945874e-05, + "loss": 0.9285, "step": 9739 }, { - "epoch": 0.2763904653802497, + "epoch": 0.2760066876363739, "grad_norm": 0.0, - "learning_rate": 1.6981804212748406e-05, - "loss": 0.8628, + "learning_rate": 1.6990466889798743e-05, + "loss": 0.9325, "step": 9740 }, { - "epoch": 0.2764188422247446, + "epoch": 0.2760350250786364, "grad_norm": 0.0, - "learning_rate": 1.698114619518787e-05, - "loss": 0.9435, + "learning_rate": 1.6989810571255444e-05, + "loss": 1.0461, "step": 9741 }, { - "epoch": 0.2764472190692395, + "epoch": 0.27606336252089886, "grad_norm": 0.0, - "learning_rate": 1.6980488118657276e-05, - "loss": 0.9397, + "learning_rate": 1.698915419383438e-05, + "loss": 0.9237, "step": 9742 }, { - "epoch": 0.2764755959137344, + "epoch": 0.27609169996316135, "grad_norm": 0.0, - "learning_rate": 1.697982998316219e-05, - "loss": 0.9825, + "learning_rate": 1.698849775754107e-05, + "loss": 0.9551, "step": 9743 }, { - "epoch": 0.2765039727582293, + "epoch": 0.2761200374054238, "grad_norm": 0.0, - "learning_rate": 1.6979171788708158e-05, - "loss": 1.004, + "learning_rate": 1.698784126238105e-05, + "loss": 0.9505, "step": 9744 }, { - "epoch": 0.27653234960272416, + "epoch": 0.27614837484768623, "grad_norm": 0.0, - "learning_rate": 1.6978513535300756e-05, - "loss": 0.9902, + "learning_rate": 1.698718470835985e-05, + "loss": 0.9907, "step": 9745 }, { - "epoch": 0.2765607264472191, + "epoch": 0.2761767122899487, "grad_norm": 0.0, - "learning_rate": 1.6977855222945532e-05, - "loss": 0.9646, + "learning_rate": 1.6986528095482996e-05, + "loss": 0.9627, "step": 9746 }, { - "epoch": 0.276589103291714, + "epoch": 0.27620504973221116, "grad_norm": 0.0, - "learning_rate": 1.6977196851648053e-05, - "loss": 0.9703, + "learning_rate": 1.698587142375602e-05, + "loss": 0.963, "step": 9747 }, { - "epoch": 0.27661748013620885, + "epoch": 0.27623338717447365, "grad_norm": 0.0, - "learning_rate": 1.6976538421413877e-05, - "loss": 0.8339, + "learning_rate": 1.698521469318446e-05, + "loss": 0.8977, "step": 9748 }, { - "epoch": 0.27664585698070376, + "epoch": 0.2762617246167361, "grad_norm": 0.0, - "learning_rate": 1.6975879932248572e-05, - "loss": 0.989, + "learning_rate": 1.698455790377384e-05, + "loss": 0.9192, "step": 9749 }, { - "epoch": 0.2766742338251986, + "epoch": 0.27629006205899853, "grad_norm": 0.0, - "learning_rate": 1.6975221384157692e-05, - "loss": 0.9668, + "learning_rate": 1.6983901055529696e-05, + "loss": 0.871, "step": 9750 }, { - "epoch": 0.27670261066969354, + "epoch": 0.276318399501261, "grad_norm": 0.0, - "learning_rate": 1.6974562777146804e-05, - "loss": 0.985, + "learning_rate": 1.6983244148457558e-05, + "loss": 0.9956, "step": 9751 }, { - "epoch": 0.2767309875141884, + "epoch": 0.27634673694352346, "grad_norm": 0.0, - "learning_rate": 1.697390411122147e-05, - "loss": 0.9734, + "learning_rate": 1.6982587182562963e-05, + "loss": 0.9055, "step": 9752 }, { - "epoch": 0.2767593643586833, + "epoch": 0.27637507438578596, "grad_norm": 0.0, - "learning_rate": 1.6973245386387254e-05, - "loss": 1.0173, + "learning_rate": 1.6981930157851443e-05, + "loss": 0.9394, "step": 9753 }, { - "epoch": 0.27678774120317823, + "epoch": 0.2764034118280484, "grad_norm": 0.0, - "learning_rate": 1.6972586602649724e-05, - "loss": 1.0329, + "learning_rate": 1.698127307432853e-05, + "loss": 0.9992, "step": 9754 }, { - "epoch": 0.2768161180476731, + "epoch": 0.2764317492703109, "grad_norm": 0.0, - "learning_rate": 1.6971927760014437e-05, - "loss": 0.9137, + "learning_rate": 1.6980615931999767e-05, + "loss": 0.9826, "step": 9755 }, { - "epoch": 0.276844494892168, + "epoch": 0.2764600867125733, "grad_norm": 0.0, - "learning_rate": 1.6971268858486968e-05, - "loss": 0.9621, + "learning_rate": 1.6979958730870678e-05, + "loss": 0.9753, "step": 9756 }, { - "epoch": 0.27687287173666286, + "epoch": 0.27648842415483577, "grad_norm": 0.0, - "learning_rate": 1.6970609898072874e-05, - "loss": 0.8975, + "learning_rate": 1.697930147094681e-05, + "loss": 0.9586, "step": 9757 }, { - "epoch": 0.2769012485811578, + "epoch": 0.27651676159709826, "grad_norm": 0.0, - "learning_rate": 1.6969950878777725e-05, - "loss": 0.9341, + "learning_rate": 1.697864415223369e-05, + "loss": 1.1201, "step": 9758 }, { - "epoch": 0.2769296254256527, + "epoch": 0.2765450990393607, "grad_norm": 0.0, - "learning_rate": 1.6969291800607087e-05, - "loss": 1.0447, + "learning_rate": 1.697798677473686e-05, + "loss": 0.9716, "step": 9759 }, { - "epoch": 0.27695800227014755, + "epoch": 0.2765734364816232, "grad_norm": 0.0, - "learning_rate": 1.696863266356653e-05, - "loss": 0.9779, + "learning_rate": 1.6977329338461857e-05, + "loss": 0.9619, "step": 9760 }, { - "epoch": 0.27698637911464247, + "epoch": 0.27660177392388563, "grad_norm": 0.0, - "learning_rate": 1.696797346766162e-05, - "loss": 0.9496, + "learning_rate": 1.697667184341422e-05, + "loss": 0.888, "step": 9761 }, { - "epoch": 0.2770147559591373, + "epoch": 0.27663011136614807, "grad_norm": 0.0, - "learning_rate": 1.696731421289792e-05, - "loss": 0.9344, + "learning_rate": 1.6976014289599477e-05, + "loss": 0.8928, "step": 9762 }, { - "epoch": 0.27704313280363224, + "epoch": 0.27665844880841056, "grad_norm": 0.0, - "learning_rate": 1.6966654899281008e-05, - "loss": 0.9439, + "learning_rate": 1.6975356677023182e-05, + "loss": 0.9872, "step": 9763 }, { - "epoch": 0.2770715096481271, + "epoch": 0.276686786250673, "grad_norm": 0.0, - "learning_rate": 1.6965995526816447e-05, - "loss": 1.0322, + "learning_rate": 1.6974699005690868e-05, + "loss": 0.9114, "step": 9764 }, { - "epoch": 0.277099886492622, + "epoch": 0.2767151236929355, "grad_norm": 0.0, - "learning_rate": 1.696533609550981e-05, - "loss": 0.9849, + "learning_rate": 1.6974041275608074e-05, + "loss": 1.0936, "step": 9765 }, { - "epoch": 0.27712826333711693, + "epoch": 0.27674346113519793, "grad_norm": 0.0, - "learning_rate": 1.6964676605366666e-05, - "loss": 0.9915, + "learning_rate": 1.697338348678034e-05, + "loss": 0.9764, "step": 9766 }, { - "epoch": 0.2771566401816118, + "epoch": 0.2767717985774604, "grad_norm": 0.0, - "learning_rate": 1.6964017056392586e-05, - "loss": 0.9724, + "learning_rate": 1.6972725639213206e-05, + "loss": 0.8581, "step": 9767 }, { - "epoch": 0.2771850170261067, + "epoch": 0.27680013601972286, "grad_norm": 0.0, - "learning_rate": 1.6963357448593138e-05, - "loss": 0.9838, + "learning_rate": 1.6972067732912215e-05, + "loss": 0.8969, "step": 9768 }, { - "epoch": 0.27721339387060157, + "epoch": 0.2768284734619853, "grad_norm": 0.0, - "learning_rate": 1.69626977819739e-05, - "loss": 1.0791, + "learning_rate": 1.697140976788291e-05, + "loss": 0.96, "step": 9769 }, { - "epoch": 0.2772417707150965, + "epoch": 0.2768568109042478, "grad_norm": 0.0, - "learning_rate": 1.696203805654044e-05, - "loss": 0.8985, + "learning_rate": 1.6970751744130827e-05, + "loss": 0.9644, "step": 9770 }, { - "epoch": 0.2772701475595914, + "epoch": 0.27688514834651023, "grad_norm": 0.0, - "learning_rate": 1.696137827229833e-05, - "loss": 0.823, + "learning_rate": 1.697009366166152e-05, + "loss": 1.0961, "step": 9771 }, { - "epoch": 0.27729852440408626, + "epoch": 0.27691348578877273, "grad_norm": 0.0, - "learning_rate": 1.6960718429253148e-05, - "loss": 1.0191, + "learning_rate": 1.6969435520480522e-05, + "loss": 1.0385, "step": 9772 }, { - "epoch": 0.27732690124858117, + "epoch": 0.27694182323103517, "grad_norm": 0.0, - "learning_rate": 1.696005852741046e-05, - "loss": 0.8746, + "learning_rate": 1.6968777320593385e-05, + "loss": 1.0973, "step": 9773 }, { - "epoch": 0.27735527809307603, + "epoch": 0.2769701606732976, "grad_norm": 0.0, - "learning_rate": 1.6959398566775847e-05, - "loss": 0.9864, + "learning_rate": 1.6968119062005644e-05, + "loss": 1.0457, "step": 9774 }, { - "epoch": 0.27738365493757094, + "epoch": 0.2769984981155601, "grad_norm": 0.0, - "learning_rate": 1.6958738547354884e-05, - "loss": 0.8915, + "learning_rate": 1.6967460744722847e-05, + "loss": 0.9647, "step": 9775 }, { - "epoch": 0.27741203178206586, + "epoch": 0.27702683555782254, "grad_norm": 0.0, - "learning_rate": 1.695807846915314e-05, - "loss": 1.0839, + "learning_rate": 1.6966802368750546e-05, + "loss": 0.959, "step": 9776 }, { - "epoch": 0.2774404086265607, + "epoch": 0.27705517300008503, "grad_norm": 0.0, - "learning_rate": 1.6957418332176195e-05, - "loss": 1.0331, + "learning_rate": 1.6966143934094278e-05, + "loss": 1.0005, "step": 9777 }, { - "epoch": 0.27746878547105563, + "epoch": 0.27708351044234747, "grad_norm": 0.0, - "learning_rate": 1.6956758136429627e-05, - "loss": 0.9398, + "learning_rate": 1.6965485440759596e-05, + "loss": 0.9167, "step": 9778 }, { - "epoch": 0.2774971623155505, + "epoch": 0.2771118478846099, "grad_norm": 0.0, - "learning_rate": 1.6956097881919006e-05, - "loss": 0.8641, + "learning_rate": 1.696482688875204e-05, + "loss": 1.0605, "step": 9779 }, { - "epoch": 0.2775255391600454, + "epoch": 0.2771401853268724, "grad_norm": 0.0, - "learning_rate": 1.6955437568649916e-05, - "loss": 0.9992, + "learning_rate": 1.696416827807716e-05, + "loss": 1.0755, "step": 9780 }, { - "epoch": 0.27755391600454027, + "epoch": 0.27716852276913484, "grad_norm": 0.0, - "learning_rate": 1.6954777196627934e-05, - "loss": 0.9771, + "learning_rate": 1.6963509608740508e-05, + "loss": 0.8573, "step": 9781 }, { - "epoch": 0.2775822928490352, + "epoch": 0.27719686021139733, "grad_norm": 0.0, - "learning_rate": 1.6954116765858634e-05, - "loss": 0.9809, + "learning_rate": 1.696285088074763e-05, + "loss": 0.8842, "step": 9782 }, { - "epoch": 0.2776106696935301, + "epoch": 0.27722519765365977, "grad_norm": 0.0, - "learning_rate": 1.69534562763476e-05, - "loss": 0.9878, + "learning_rate": 1.696219209410407e-05, + "loss": 1.0037, "step": 9783 }, { - "epoch": 0.27763904653802496, + "epoch": 0.27725353509592227, "grad_norm": 0.0, - "learning_rate": 1.69527957281004e-05, - "loss": 1.0061, + "learning_rate": 1.6961533248815383e-05, + "loss": 0.9097, "step": 9784 }, { - "epoch": 0.2776674233825199, + "epoch": 0.2772818725381847, "grad_norm": 0.0, - "learning_rate": 1.6952135121122634e-05, - "loss": 0.9738, + "learning_rate": 1.6960874344887114e-05, + "loss": 0.9832, "step": 9785 }, { - "epoch": 0.27769580022701473, + "epoch": 0.27731020998044714, "grad_norm": 0.0, - "learning_rate": 1.6951474455419862e-05, - "loss": 0.9358, + "learning_rate": 1.696021538232482e-05, + "loss": 0.8658, "step": 9786 }, { - "epoch": 0.27772417707150965, + "epoch": 0.27733854742270964, "grad_norm": 0.0, - "learning_rate": 1.6950813730997673e-05, - "loss": 0.9002, + "learning_rate": 1.695955636113404e-05, + "loss": 1.0052, "step": 9787 }, { - "epoch": 0.27775255391600456, + "epoch": 0.2773668848649721, "grad_norm": 0.0, - "learning_rate": 1.695015294786165e-05, - "loss": 1.0134, + "learning_rate": 1.6958897281320336e-05, + "loss": 0.9907, "step": 9788 }, { - "epoch": 0.2777809307604994, + "epoch": 0.27739522230723457, "grad_norm": 0.0, - "learning_rate": 1.6949492106017374e-05, - "loss": 1.0112, + "learning_rate": 1.6958238142889258e-05, + "loss": 1.0167, "step": 9789 }, { - "epoch": 0.27780930760499434, + "epoch": 0.277423559749497, "grad_norm": 0.0, - "learning_rate": 1.6948831205470424e-05, - "loss": 1.0105, + "learning_rate": 1.6957578945846356e-05, + "loss": 0.9065, "step": 9790 }, { - "epoch": 0.2778376844494892, + "epoch": 0.27745189719175944, "grad_norm": 0.0, - "learning_rate": 1.6948170246226385e-05, - "loss": 0.8409, + "learning_rate": 1.695691969019718e-05, + "loss": 1.0922, "step": 9791 }, { - "epoch": 0.2778660612939841, + "epoch": 0.27748023463402194, "grad_norm": 0.0, - "learning_rate": 1.694750922829084e-05, - "loss": 0.8751, + "learning_rate": 1.6956260375947286e-05, + "loss": 0.906, "step": 9792 }, { - "epoch": 0.277894438138479, + "epoch": 0.2775085720762844, "grad_norm": 0.0, - "learning_rate": 1.6946848151669367e-05, - "loss": 0.9571, + "learning_rate": 1.6955601003102235e-05, + "loss": 0.8964, "step": 9793 }, { - "epoch": 0.2779228149829739, + "epoch": 0.27753690951854687, "grad_norm": 0.0, - "learning_rate": 1.694618701636756e-05, - "loss": 0.9894, + "learning_rate": 1.6954941571667566e-05, + "loss": 1.0426, "step": 9794 }, { - "epoch": 0.2779511918274688, + "epoch": 0.2775652469608093, "grad_norm": 0.0, - "learning_rate": 1.6945525822391e-05, - "loss": 0.9671, + "learning_rate": 1.6954282081648842e-05, + "loss": 1.009, "step": 9795 }, { - "epoch": 0.27797956867196366, + "epoch": 0.2775935844030718, "grad_norm": 0.0, - "learning_rate": 1.6944864569745273e-05, - "loss": 0.963, + "learning_rate": 1.695362253305162e-05, + "loss": 1.0223, "step": 9796 }, { - "epoch": 0.2780079455164586, + "epoch": 0.27762192184533424, "grad_norm": 0.0, - "learning_rate": 1.6944203258435957e-05, - "loss": 0.968, + "learning_rate": 1.6952962925881453e-05, + "loss": 0.9653, "step": 9797 }, { - "epoch": 0.27803632236095344, + "epoch": 0.2776502592875967, "grad_norm": 0.0, - "learning_rate": 1.6943541888468647e-05, - "loss": 0.9695, + "learning_rate": 1.6952303260143898e-05, + "loss": 0.9435, "step": 9798 }, { - "epoch": 0.27806469920544835, + "epoch": 0.2776785967298592, "grad_norm": 0.0, - "learning_rate": 1.6942880459848927e-05, - "loss": 0.8482, + "learning_rate": 1.6951643535844508e-05, + "loss": 1.0379, "step": 9799 }, { - "epoch": 0.27809307604994327, + "epoch": 0.2777069341721216, "grad_norm": 0.0, - "learning_rate": 1.6942218972582384e-05, - "loss": 0.9257, + "learning_rate": 1.6950983752988845e-05, + "loss": 0.9594, "step": 9800 }, { - "epoch": 0.2781214528944381, + "epoch": 0.2777352716143841, "grad_norm": 0.0, - "learning_rate": 1.69415574266746e-05, - "loss": 0.9778, + "learning_rate": 1.6950323911582464e-05, + "loss": 0.8844, "step": 9801 }, { - "epoch": 0.27814982973893304, + "epoch": 0.27776360905664654, "grad_norm": 0.0, - "learning_rate": 1.6940895822131173e-05, - "loss": 0.9258, + "learning_rate": 1.6949664011630927e-05, + "loss": 0.96, "step": 9802 }, { - "epoch": 0.2781782065834279, + "epoch": 0.277791946498909, "grad_norm": 0.0, - "learning_rate": 1.6940234158957686e-05, - "loss": 0.8731, + "learning_rate": 1.6949004053139785e-05, + "loss": 1.0264, "step": 9803 }, { - "epoch": 0.2782065834279228, + "epoch": 0.2778202839411715, "grad_norm": 0.0, - "learning_rate": 1.693957243715973e-05, - "loss": 1.1211, + "learning_rate": 1.6948344036114604e-05, + "loss": 1.0357, "step": 9804 }, { - "epoch": 0.27823496027241773, + "epoch": 0.2778486213834339, "grad_norm": 0.0, - "learning_rate": 1.6938910656742895e-05, - "loss": 0.9271, + "learning_rate": 1.694768396056094e-05, + "loss": 1.0781, "step": 9805 }, { - "epoch": 0.2782633371169126, + "epoch": 0.2778769588256964, "grad_norm": 0.0, - "learning_rate": 1.6938248817712767e-05, - "loss": 1.0969, + "learning_rate": 1.6947023826484353e-05, + "loss": 1.0212, "step": 9806 }, { - "epoch": 0.2782917139614075, + "epoch": 0.27790529626795885, "grad_norm": 0.0, - "learning_rate": 1.693758692007494e-05, - "loss": 1.0143, + "learning_rate": 1.6946363633890408e-05, + "loss": 0.9677, "step": 9807 }, { - "epoch": 0.27832009080590236, + "epoch": 0.27793363371022134, "grad_norm": 0.0, - "learning_rate": 1.6936924963835006e-05, - "loss": 0.8639, + "learning_rate": 1.694570338278466e-05, + "loss": 0.9453, "step": 9808 }, { - "epoch": 0.2783484676503973, + "epoch": 0.2779619711524838, "grad_norm": 0.0, - "learning_rate": 1.6936262948998552e-05, - "loss": 1.0151, + "learning_rate": 1.694504307317267e-05, + "loss": 1.0437, "step": 9809 }, { - "epoch": 0.2783768444948922, + "epoch": 0.2779903085947462, "grad_norm": 0.0, - "learning_rate": 1.6935600875571175e-05, - "loss": 0.9825, + "learning_rate": 1.694438270506001e-05, + "loss": 0.9172, "step": 9810 }, { - "epoch": 0.27840522133938705, + "epoch": 0.2780186460370087, "grad_norm": 0.0, - "learning_rate": 1.6934938743558467e-05, - "loss": 0.9553, + "learning_rate": 1.6943722278452234e-05, + "loss": 0.897, "step": 9811 }, { - "epoch": 0.27843359818388197, + "epoch": 0.27804698347927115, "grad_norm": 0.0, - "learning_rate": 1.6934276552966018e-05, - "loss": 0.8992, + "learning_rate": 1.69430617933549e-05, + "loss": 1.0101, "step": 9812 }, { - "epoch": 0.27846197502837683, + "epoch": 0.27807532092153364, "grad_norm": 0.0, - "learning_rate": 1.6933614303799424e-05, - "loss": 0.9848, + "learning_rate": 1.6942401249773585e-05, + "loss": 0.7717, "step": 9813 }, { - "epoch": 0.27849035187287174, + "epoch": 0.2781036583637961, "grad_norm": 0.0, - "learning_rate": 1.6932951996064276e-05, - "loss": 0.9436, + "learning_rate": 1.6941740647713847e-05, + "loss": 0.85, "step": 9814 }, { - "epoch": 0.2785187287173666, + "epoch": 0.2781319958060585, "grad_norm": 0.0, - "learning_rate": 1.693228962976617e-05, - "loss": 0.9488, + "learning_rate": 1.6941079987181245e-05, + "loss": 1.0312, "step": 9815 }, { - "epoch": 0.2785471055618615, + "epoch": 0.278160333248321, "grad_norm": 0.0, - "learning_rate": 1.6931627204910707e-05, - "loss": 0.9583, + "learning_rate": 1.694041926818135e-05, + "loss": 0.9571, "step": 9816 }, { - "epoch": 0.27857548240635643, + "epoch": 0.27818867069058345, "grad_norm": 0.0, - "learning_rate": 1.6930964721503477e-05, - "loss": 0.9084, + "learning_rate": 1.6939758490719727e-05, + "loss": 1.0854, "step": 9817 }, { - "epoch": 0.2786038592508513, + "epoch": 0.27821700813284594, "grad_norm": 0.0, - "learning_rate": 1.693030217955007e-05, - "loss": 0.9865, + "learning_rate": 1.6939097654801947e-05, + "loss": 0.9801, "step": 9818 }, { - "epoch": 0.2786322360953462, + "epoch": 0.2782453455751084, "grad_norm": 0.0, - "learning_rate": 1.6929639579056095e-05, - "loss": 0.9458, + "learning_rate": 1.6938436760433565e-05, + "loss": 1.0011, "step": 9819 }, { - "epoch": 0.27866061293984107, + "epoch": 0.2782736830173709, "grad_norm": 0.0, - "learning_rate": 1.692897692002714e-05, - "loss": 0.8996, + "learning_rate": 1.6937775807620152e-05, + "loss": 0.876, "step": 9820 }, { - "epoch": 0.278688989784336, + "epoch": 0.2783020204596333, "grad_norm": 0.0, - "learning_rate": 1.6928314202468806e-05, - "loss": 1.0341, + "learning_rate": 1.693711479636728e-05, + "loss": 0.9877, "step": 9821 }, { - "epoch": 0.2787173666288309, + "epoch": 0.27833035790189575, "grad_norm": 0.0, - "learning_rate": 1.6927651426386692e-05, - "loss": 0.8465, + "learning_rate": 1.6936453726680514e-05, + "loss": 0.9882, "step": 9822 }, { - "epoch": 0.27874574347332576, + "epoch": 0.27835869534415825, "grad_norm": 0.0, - "learning_rate": 1.6926988591786392e-05, - "loss": 0.9747, + "learning_rate": 1.693579259856542e-05, + "loss": 0.8587, "step": 9823 }, { - "epoch": 0.2787741203178207, + "epoch": 0.2783870327864207, "grad_norm": 0.0, - "learning_rate": 1.6926325698673513e-05, - "loss": 1.0259, + "learning_rate": 1.693513141202757e-05, + "loss": 0.932, "step": 9824 }, { - "epoch": 0.27880249716231553, + "epoch": 0.2784153702286832, "grad_norm": 0.0, - "learning_rate": 1.6925662747053646e-05, - "loss": 0.9654, + "learning_rate": 1.6934470167072536e-05, + "loss": 1.0207, "step": 9825 }, { - "epoch": 0.27883087400681045, + "epoch": 0.2784437076709456, "grad_norm": 0.0, - "learning_rate": 1.692499973693239e-05, - "loss": 0.9984, + "learning_rate": 1.6933808863705885e-05, + "loss": 1.0701, "step": 9826 }, { - "epoch": 0.27885925085130536, + "epoch": 0.27847204511320806, "grad_norm": 0.0, - "learning_rate": 1.6924336668315357e-05, - "loss": 1.0177, + "learning_rate": 1.6933147501933182e-05, + "loss": 0.9382, "step": 9827 }, { - "epoch": 0.2788876276958002, + "epoch": 0.27850038255547055, "grad_norm": 0.0, - "learning_rate": 1.6923673541208142e-05, - "loss": 0.9485, + "learning_rate": 1.693248608176001e-05, + "loss": 0.9619, "step": 9828 }, { - "epoch": 0.27891600454029514, + "epoch": 0.278528719997733, "grad_norm": 0.0, - "learning_rate": 1.6923010355616342e-05, - "loss": 1.0103, + "learning_rate": 1.6931824603191926e-05, + "loss": 0.8069, "step": 9829 }, { - "epoch": 0.27894438138479, + "epoch": 0.2785570574399955, "grad_norm": 0.0, - "learning_rate": 1.692234711154556e-05, - "loss": 0.919, + "learning_rate": 1.6931163066234514e-05, + "loss": 0.9262, "step": 9830 }, { - "epoch": 0.2789727582292849, + "epoch": 0.2785853948822579, "grad_norm": 0.0, - "learning_rate": 1.6921683809001403e-05, - "loss": 0.988, + "learning_rate": 1.693050147089334e-05, + "loss": 1.0262, "step": 9831 }, { - "epoch": 0.27900113507377977, + "epoch": 0.2786137323245204, "grad_norm": 0.0, - "learning_rate": 1.6921020447989472e-05, - "loss": 0.9691, + "learning_rate": 1.6929839817173977e-05, + "loss": 1.1096, "step": 9832 }, { - "epoch": 0.2790295119182747, + "epoch": 0.27864206976678285, "grad_norm": 0.0, - "learning_rate": 1.6920357028515368e-05, - "loss": 0.9513, + "learning_rate": 1.6929178105082003e-05, + "loss": 1.0125, "step": 9833 }, { - "epoch": 0.2790578887627696, + "epoch": 0.2786704072090453, "grad_norm": 0.0, - "learning_rate": 1.69196935505847e-05, - "loss": 0.9617, + "learning_rate": 1.6928516334622988e-05, + "loss": 0.959, "step": 9834 }, { - "epoch": 0.27908626560726446, + "epoch": 0.2786987446513078, "grad_norm": 0.0, - "learning_rate": 1.691903001420307e-05, - "loss": 1.1294, + "learning_rate": 1.6927854505802504e-05, + "loss": 1.0106, "step": 9835 }, { - "epoch": 0.2791146424517594, + "epoch": 0.2787270820935702, "grad_norm": 0.0, - "learning_rate": 1.691836641937608e-05, - "loss": 0.9564, + "learning_rate": 1.6927192618626133e-05, + "loss": 0.9457, "step": 9836 }, { - "epoch": 0.27914301929625424, + "epoch": 0.2787554195358327, "grad_norm": 0.0, - "learning_rate": 1.691770276610934e-05, - "loss": 0.947, + "learning_rate": 1.6926530673099444e-05, + "loss": 1.0112, "step": 9837 }, { - "epoch": 0.27917139614074915, + "epoch": 0.27878375697809515, "grad_norm": 0.0, - "learning_rate": 1.691703905440845e-05, - "loss": 1.0251, + "learning_rate": 1.6925868669228015e-05, + "loss": 1.0356, "step": 9838 }, { - "epoch": 0.27919977298524407, + "epoch": 0.2788120944203576, "grad_norm": 0.0, - "learning_rate": 1.6916375284279024e-05, - "loss": 1.0725, + "learning_rate": 1.6925206607017425e-05, + "loss": 1.0557, "step": 9839 }, { - "epoch": 0.2792281498297389, + "epoch": 0.2788404318626201, "grad_norm": 0.0, - "learning_rate": 1.6915711455726665e-05, - "loss": 0.9552, + "learning_rate": 1.6924544486473245e-05, + "loss": 0.974, "step": 9840 }, { - "epoch": 0.27925652667423384, + "epoch": 0.2788687693048825, "grad_norm": 0.0, - "learning_rate": 1.6915047568756978e-05, - "loss": 0.8916, + "learning_rate": 1.6923882307601055e-05, + "loss": 0.846, "step": 9841 }, { - "epoch": 0.2792849035187287, + "epoch": 0.278897106747145, "grad_norm": 0.0, - "learning_rate": 1.6914383623375577e-05, - "loss": 0.9339, + "learning_rate": 1.6923220070406438e-05, + "loss": 0.9401, "step": 9842 }, { - "epoch": 0.2793132803632236, + "epoch": 0.27892544418940746, "grad_norm": 0.0, - "learning_rate": 1.6913719619588062e-05, - "loss": 0.9411, + "learning_rate": 1.6922557774894965e-05, + "loss": 1.0092, "step": 9843 }, { - "epoch": 0.2793416572077185, + "epoch": 0.27895378163166995, "grad_norm": 0.0, - "learning_rate": 1.6913055557400052e-05, - "loss": 0.9532, + "learning_rate": 1.6921895421072215e-05, + "loss": 1.0462, "step": 9844 }, { - "epoch": 0.2793700340522134, + "epoch": 0.2789821190739324, "grad_norm": 0.0, - "learning_rate": 1.6912391436817147e-05, - "loss": 0.8994, + "learning_rate": 1.692123300894377e-05, + "loss": 0.9875, "step": 9845 }, { - "epoch": 0.2793984108967083, + "epoch": 0.2790104565161948, "grad_norm": 0.0, - "learning_rate": 1.691172725784496e-05, - "loss": 0.9722, + "learning_rate": 1.692057053851521e-05, + "loss": 0.9428, "step": 9846 }, { - "epoch": 0.27942678774120316, + "epoch": 0.2790387939584573, "grad_norm": 0.0, - "learning_rate": 1.6911063020489104e-05, - "loss": 0.9063, + "learning_rate": 1.6919908009792117e-05, + "loss": 0.8795, "step": 9847 }, { - "epoch": 0.2794551645856981, + "epoch": 0.27906713140071976, "grad_norm": 0.0, - "learning_rate": 1.6910398724755187e-05, - "loss": 1.0082, + "learning_rate": 1.6919245422780065e-05, + "loss": 1.1565, "step": 9848 }, { - "epoch": 0.27948354143019294, + "epoch": 0.27909546884298225, "grad_norm": 0.0, - "learning_rate": 1.6909734370648822e-05, - "loss": 1.0424, + "learning_rate": 1.6918582777484642e-05, + "loss": 0.9553, "step": 9849 }, { - "epoch": 0.27951191827468785, + "epoch": 0.2791238062852447, "grad_norm": 0.0, - "learning_rate": 1.6909069958175622e-05, - "loss": 1.0959, + "learning_rate": 1.6917920073911425e-05, + "loss": 0.962, "step": 9850 }, { - "epoch": 0.27954029511918277, + "epoch": 0.27915214372750713, "grad_norm": 0.0, - "learning_rate": 1.6908405487341195e-05, - "loss": 0.989, + "learning_rate": 1.6917257312066e-05, + "loss": 0.9442, "step": 9851 }, { - "epoch": 0.27956867196367763, + "epoch": 0.2791804811697696, "grad_norm": 0.0, - "learning_rate": 1.6907740958151158e-05, - "loss": 0.9211, + "learning_rate": 1.6916594491953948e-05, + "loss": 0.9973, "step": 9852 }, { - "epoch": 0.27959704880817254, + "epoch": 0.27920881861203206, "grad_norm": 0.0, - "learning_rate": 1.6907076370611118e-05, - "loss": 1.0191, + "learning_rate": 1.691593161358085e-05, + "loss": 0.9971, "step": 9853 }, { - "epoch": 0.2796254256526674, + "epoch": 0.27923715605429456, "grad_norm": 0.0, - "learning_rate": 1.69064117247267e-05, - "loss": 1.0896, + "learning_rate": 1.6915268676952295e-05, + "loss": 0.9926, "step": 9854 }, { - "epoch": 0.2796538024971623, + "epoch": 0.279265493496557, "grad_norm": 0.0, - "learning_rate": 1.6905747020503508e-05, - "loss": 1.0707, + "learning_rate": 1.6914605682073863e-05, + "loss": 1.1188, "step": 9855 }, { - "epoch": 0.27968217934165723, + "epoch": 0.2792938309388195, "grad_norm": 0.0, - "learning_rate": 1.690508225794716e-05, - "loss": 0.9525, + "learning_rate": 1.691394262895114e-05, + "loss": 0.9284, "step": 9856 }, { - "epoch": 0.2797105561861521, + "epoch": 0.2793221683810819, "grad_norm": 0.0, - "learning_rate": 1.6904417437063275e-05, - "loss": 0.9408, + "learning_rate": 1.691327951758971e-05, + "loss": 0.9156, "step": 9857 }, { - "epoch": 0.279738933030647, + "epoch": 0.27935050582334436, "grad_norm": 0.0, - "learning_rate": 1.6903752557857462e-05, - "loss": 1.0753, + "learning_rate": 1.691261634799516e-05, + "loss": 1.121, "step": 9858 }, { - "epoch": 0.27976730987514187, + "epoch": 0.27937884326560686, "grad_norm": 0.0, - "learning_rate": 1.6903087620335345e-05, - "loss": 1.0043, + "learning_rate": 1.6911953120173075e-05, + "loss": 0.9724, "step": 9859 }, { - "epoch": 0.2797956867196368, + "epoch": 0.2794071807078693, "grad_norm": 0.0, - "learning_rate": 1.690242262450253e-05, - "loss": 0.9836, + "learning_rate": 1.6911289834129042e-05, + "loss": 0.8594, "step": 9860 }, { - "epoch": 0.27982406356413164, + "epoch": 0.2794355181501318, "grad_norm": 0.0, - "learning_rate": 1.6901757570364648e-05, - "loss": 0.863, + "learning_rate": 1.691062648986865e-05, + "loss": 0.899, "step": 9861 }, { - "epoch": 0.27985244040862656, + "epoch": 0.27946385559239423, "grad_norm": 0.0, - "learning_rate": 1.6901092457927304e-05, - "loss": 1.0193, + "learning_rate": 1.6909963087397484e-05, + "loss": 1.0147, "step": 9862 }, { - "epoch": 0.27988081725312147, + "epoch": 0.27949219303465667, "grad_norm": 0.0, - "learning_rate": 1.6900427287196125e-05, - "loss": 0.9277, + "learning_rate": 1.690929962672113e-05, + "loss": 0.856, "step": 9863 }, { - "epoch": 0.27990919409761633, + "epoch": 0.27952053047691916, "grad_norm": 0.0, - "learning_rate": 1.6899762058176726e-05, - "loss": 0.8938, + "learning_rate": 1.690863610784518e-05, + "loss": 0.9286, "step": 9864 }, { - "epoch": 0.27993757094211125, + "epoch": 0.2795488679191816, "grad_norm": 0.0, - "learning_rate": 1.6899096770874725e-05, - "loss": 1.0368, + "learning_rate": 1.6907972530775227e-05, + "loss": 1.0663, "step": 9865 }, { - "epoch": 0.2799659477866061, + "epoch": 0.2795772053614441, "grad_norm": 0.0, - "learning_rate": 1.6898431425295746e-05, - "loss": 0.8856, + "learning_rate": 1.6907308895516854e-05, + "loss": 0.9938, "step": 9866 }, { - "epoch": 0.279994324631101, + "epoch": 0.27960554280370653, "grad_norm": 0.0, - "learning_rate": 1.6897766021445407e-05, - "loss": 1.0454, + "learning_rate": 1.6906645202075652e-05, + "loss": 0.9388, "step": 9867 }, { - "epoch": 0.28002270147559594, + "epoch": 0.279633880245969, "grad_norm": 0.0, - "learning_rate": 1.6897100559329324e-05, - "loss": 0.8597, + "learning_rate": 1.6905981450457216e-05, + "loss": 0.9493, "step": 9868 }, { - "epoch": 0.2800510783200908, + "epoch": 0.27966221768823146, "grad_norm": 0.0, - "learning_rate": 1.6896435038953126e-05, - "loss": 1.04, + "learning_rate": 1.690531764066713e-05, + "loss": 1.016, "step": 9869 }, { - "epoch": 0.2800794551645857, + "epoch": 0.2796905551304939, "grad_norm": 0.0, - "learning_rate": 1.689576946032243e-05, - "loss": 0.906, + "learning_rate": 1.690465377271099e-05, + "loss": 1.0692, "step": 9870 }, { - "epoch": 0.28010783200908057, + "epoch": 0.2797188925727564, "grad_norm": 0.0, - "learning_rate": 1.689510382344286e-05, - "loss": 1.1369, + "learning_rate": 1.690398984659439e-05, + "loss": 1.0518, "step": 9871 }, { - "epoch": 0.2801362088535755, + "epoch": 0.27974723001501883, "grad_norm": 0.0, - "learning_rate": 1.689443812832004e-05, - "loss": 0.9812, + "learning_rate": 1.6903325862322918e-05, + "loss": 0.8875, "step": 9872 }, { - "epoch": 0.2801645856980704, + "epoch": 0.2797755674572813, "grad_norm": 0.0, - "learning_rate": 1.6893772374959586e-05, - "loss": 0.9937, + "learning_rate": 1.6902661819902167e-05, + "loss": 0.9854, "step": 9873 }, { - "epoch": 0.28019296254256526, + "epoch": 0.27980390489954376, "grad_norm": 0.0, - "learning_rate": 1.689310656336713e-05, - "loss": 0.991, + "learning_rate": 1.6901997719337733e-05, + "loss": 0.9853, "step": 9874 }, { - "epoch": 0.2802213393870602, + "epoch": 0.2798322423418062, "grad_norm": 0.0, - "learning_rate": 1.6892440693548294e-05, - "loss": 0.9985, + "learning_rate": 1.690133356063521e-05, + "loss": 0.9053, "step": 9875 }, { - "epoch": 0.28024971623155503, + "epoch": 0.2798605797840687, "grad_norm": 0.0, - "learning_rate": 1.6891774765508698e-05, - "loss": 0.9813, + "learning_rate": 1.6900669343800195e-05, + "loss": 0.8823, "step": 9876 }, { - "epoch": 0.28027809307604995, + "epoch": 0.27988891722633114, "grad_norm": 0.0, - "learning_rate": 1.6891108779253974e-05, - "loss": 0.9256, + "learning_rate": 1.6900005068838274e-05, + "loss": 0.9503, "step": 9877 }, { - "epoch": 0.2803064699205448, + "epoch": 0.27991725466859363, "grad_norm": 0.0, - "learning_rate": 1.6890442734789743e-05, - "loss": 1.0555, + "learning_rate": 1.689934073575505e-05, + "loss": 0.978, "step": 9878 }, { - "epoch": 0.2803348467650397, + "epoch": 0.27994559211085607, "grad_norm": 0.0, - "learning_rate": 1.6889776632121634e-05, - "loss": 1.0535, + "learning_rate": 1.689867634455612e-05, + "loss": 1.011, "step": 9879 }, { - "epoch": 0.28036322360953464, + "epoch": 0.27997392955311856, "grad_norm": 0.0, - "learning_rate": 1.6889110471255272e-05, - "loss": 1.131, + "learning_rate": 1.6898011895247072e-05, + "loss": 0.904, "step": 9880 }, { - "epoch": 0.2803916004540295, + "epoch": 0.280002266995381, "grad_norm": 0.0, - "learning_rate": 1.6888444252196284e-05, - "loss": 0.9763, + "learning_rate": 1.689734738783351e-05, + "loss": 0.896, "step": 9881 }, { - "epoch": 0.2804199772985244, + "epoch": 0.28003060443764344, "grad_norm": 0.0, - "learning_rate": 1.6887777974950296e-05, - "loss": 0.9099, + "learning_rate": 1.6896682822321033e-05, + "loss": 0.7306, "step": 9882 }, { - "epoch": 0.2804483541430193, + "epoch": 0.28005894187990593, "grad_norm": 0.0, - "learning_rate": 1.688711163952294e-05, - "loss": 0.9499, + "learning_rate": 1.6896018198715235e-05, + "loss": 0.9203, "step": 9883 }, { - "epoch": 0.2804767309875142, + "epoch": 0.28008727932216837, "grad_norm": 0.0, - "learning_rate": 1.688644524591984e-05, - "loss": 0.9242, + "learning_rate": 1.6895353517021714e-05, + "loss": 1.0004, "step": 9884 }, { - "epoch": 0.2805051078320091, + "epoch": 0.28011561676443086, "grad_norm": 0.0, - "learning_rate": 1.688577879414663e-05, - "loss": 1.0291, + "learning_rate": 1.6894688777246065e-05, + "loss": 0.9331, "step": 9885 }, { - "epoch": 0.28053348467650396, + "epoch": 0.2801439542066933, "grad_norm": 0.0, - "learning_rate": 1.6885112284208934e-05, - "loss": 1.0049, + "learning_rate": 1.6894023979393898e-05, + "loss": 0.9473, "step": 9886 }, { - "epoch": 0.2805618615209989, + "epoch": 0.28017229164895574, "grad_norm": 0.0, - "learning_rate": 1.6884445716112388e-05, - "loss": 1.0122, + "learning_rate": 1.6893359123470805e-05, + "loss": 0.9765, "step": 9887 }, { - "epoch": 0.28059023836549374, + "epoch": 0.28020062909121823, "grad_norm": 0.0, - "learning_rate": 1.6883779089862618e-05, - "loss": 0.9954, + "learning_rate": 1.689269420948239e-05, + "loss": 0.9808, "step": 9888 }, { - "epoch": 0.28061861520998865, + "epoch": 0.2802289665334807, "grad_norm": 0.0, - "learning_rate": 1.6883112405465257e-05, - "loss": 1.0063, + "learning_rate": 1.6892029237434248e-05, + "loss": 1.0101, "step": 9889 }, { - "epoch": 0.28064699205448357, + "epoch": 0.28025730397574317, "grad_norm": 0.0, - "learning_rate": 1.6882445662925934e-05, - "loss": 0.9678, + "learning_rate": 1.6891364207331992e-05, + "loss": 0.9955, "step": 9890 }, { - "epoch": 0.28067536889897843, + "epoch": 0.2802856414180056, "grad_norm": 0.0, - "learning_rate": 1.6881778862250285e-05, - "loss": 0.9465, + "learning_rate": 1.6890699119181206e-05, + "loss": 1.0042, "step": 9891 }, { - "epoch": 0.28070374574347334, + "epoch": 0.2803139788602681, "grad_norm": 0.0, - "learning_rate": 1.6881112003443944e-05, - "loss": 0.9783, + "learning_rate": 1.689003397298751e-05, + "loss": 0.9776, "step": 9892 }, { - "epoch": 0.2807321225879682, + "epoch": 0.28034231630253054, "grad_norm": 0.0, - "learning_rate": 1.6880445086512535e-05, - "loss": 1.0703, + "learning_rate": 1.6889368768756495e-05, + "loss": 0.8471, "step": 9893 }, { - "epoch": 0.2807604994324631, + "epoch": 0.280370653744793, "grad_norm": 0.0, - "learning_rate": 1.68797781114617e-05, - "loss": 1.0677, + "learning_rate": 1.6888703506493774e-05, + "loss": 0.943, "step": 9894 }, { - "epoch": 0.280788876276958, + "epoch": 0.28039899118705547, "grad_norm": 0.0, - "learning_rate": 1.687911107829707e-05, - "loss": 0.9608, + "learning_rate": 1.688803818620494e-05, + "loss": 0.934, "step": 9895 }, { - "epoch": 0.2808172531214529, + "epoch": 0.2804273286293179, "grad_norm": 0.0, - "learning_rate": 1.6878443987024276e-05, - "loss": 1.027, + "learning_rate": 1.6887372807895604e-05, + "loss": 0.9945, "step": 9896 }, { - "epoch": 0.2808456299659478, + "epoch": 0.2804556660715804, "grad_norm": 0.0, - "learning_rate": 1.687777683764896e-05, - "loss": 0.9608, + "learning_rate": 1.6886707371571373e-05, + "loss": 1.0718, "step": 9897 }, { - "epoch": 0.28087400681044267, + "epoch": 0.28048400351384284, "grad_norm": 0.0, - "learning_rate": 1.6877109630176757e-05, - "loss": 1.0435, + "learning_rate": 1.6886041877237843e-05, + "loss": 1.0033, "step": 9898 }, { - "epoch": 0.2809023836549376, + "epoch": 0.2805123409561053, "grad_norm": 0.0, - "learning_rate": 1.6876442364613292e-05, - "loss": 1.0796, + "learning_rate": 1.688537632490063e-05, + "loss": 1.0158, "step": 9899 }, { - "epoch": 0.28093076049943244, + "epoch": 0.28054067839836777, "grad_norm": 0.0, - "learning_rate": 1.6875775040964214e-05, - "loss": 0.9976, + "learning_rate": 1.688471071456533e-05, + "loss": 1.0999, "step": 9900 }, { - "epoch": 0.28095913734392736, + "epoch": 0.2805690158406302, "grad_norm": 0.0, - "learning_rate": 1.6875107659235155e-05, - "loss": 0.9783, + "learning_rate": 1.688404504623756e-05, + "loss": 1.0366, "step": 9901 }, { - "epoch": 0.28098751418842227, + "epoch": 0.2805973532828927, "grad_norm": 0.0, - "learning_rate": 1.687444021943175e-05, - "loss": 0.9782, + "learning_rate": 1.6883379319922922e-05, + "loss": 0.9474, "step": 9902 }, { - "epoch": 0.28101589103291713, + "epoch": 0.28062569072515514, "grad_norm": 0.0, - "learning_rate": 1.6873772721559644e-05, - "loss": 0.8469, + "learning_rate": 1.688271353562702e-05, + "loss": 0.9177, "step": 9903 }, { - "epoch": 0.28104426787741205, + "epoch": 0.28065402816741764, "grad_norm": 0.0, - "learning_rate": 1.6873105165624465e-05, - "loss": 0.9931, + "learning_rate": 1.688204769335547e-05, + "loss": 0.9958, "step": 9904 }, { - "epoch": 0.2810726447219069, + "epoch": 0.2806823656096801, "grad_norm": 0.0, - "learning_rate": 1.6872437551631863e-05, - "loss": 0.8819, + "learning_rate": 1.688138179311387e-05, + "loss": 0.9791, "step": 9905 }, { - "epoch": 0.2811010215664018, + "epoch": 0.2807107030519425, "grad_norm": 0.0, - "learning_rate": 1.687176987958747e-05, - "loss": 0.8802, + "learning_rate": 1.6880715834907844e-05, + "loss": 1.004, "step": 9906 }, { - "epoch": 0.28112939841089674, + "epoch": 0.280739040494205, "grad_norm": 0.0, - "learning_rate": 1.6871102149496925e-05, - "loss": 0.8851, + "learning_rate": 1.688004981874299e-05, + "loss": 0.9359, "step": 9907 }, { - "epoch": 0.2811577752553916, + "epoch": 0.28076737793646744, "grad_norm": 0.0, - "learning_rate": 1.6870434361365875e-05, - "loss": 0.9837, + "learning_rate": 1.6879383744624922e-05, + "loss": 0.9391, "step": 9908 }, { - "epoch": 0.2811861520998865, + "epoch": 0.28079571537872994, "grad_norm": 0.0, - "learning_rate": 1.6869766515199956e-05, - "loss": 0.924, + "learning_rate": 1.687871761255925e-05, + "loss": 1.0476, "step": 9909 }, { - "epoch": 0.28121452894438137, + "epoch": 0.2808240528209924, "grad_norm": 0.0, - "learning_rate": 1.686909861100481e-05, - "loss": 0.8929, + "learning_rate": 1.6878051422551584e-05, + "loss": 0.9758, "step": 9910 }, { - "epoch": 0.2812429057888763, + "epoch": 0.2808523902632548, "grad_norm": 0.0, - "learning_rate": 1.686843064878608e-05, - "loss": 0.8789, + "learning_rate": 1.687738517460754e-05, + "loss": 0.9838, "step": 9911 }, { - "epoch": 0.28127128263337114, + "epoch": 0.2808807277055173, "grad_norm": 0.0, - "learning_rate": 1.686776262854941e-05, - "loss": 0.9624, + "learning_rate": 1.687671886873272e-05, + "loss": 0.966, "step": 9912 }, { - "epoch": 0.28129965947786606, + "epoch": 0.28090906514777975, "grad_norm": 0.0, - "learning_rate": 1.6867094550300436e-05, - "loss": 0.9424, + "learning_rate": 1.6876052504932753e-05, + "loss": 0.9849, "step": 9913 }, { - "epoch": 0.281328036322361, + "epoch": 0.28093740259004224, "grad_norm": 0.0, - "learning_rate": 1.686642641404481e-05, - "loss": 0.966, + "learning_rate": 1.6875386083213238e-05, + "loss": 0.9041, "step": 9914 }, { - "epoch": 0.28135641316685583, + "epoch": 0.2809657400323047, "grad_norm": 0.0, - "learning_rate": 1.6865758219788166e-05, - "loss": 0.8988, + "learning_rate": 1.687471960357979e-05, + "loss": 0.8276, "step": 9915 }, { - "epoch": 0.28138479001135075, + "epoch": 0.28099407747456717, "grad_norm": 0.0, - "learning_rate": 1.6865089967536156e-05, - "loss": 0.9128, + "learning_rate": 1.687405306603803e-05, + "loss": 1.0439, "step": 9916 }, { - "epoch": 0.2814131668558456, + "epoch": 0.2810224149168296, "grad_norm": 0.0, - "learning_rate": 1.6864421657294424e-05, - "loss": 1.076, + "learning_rate": 1.6873386470593564e-05, + "loss": 0.8375, "step": 9917 }, { - "epoch": 0.2814415437003405, + "epoch": 0.28105075235909205, "grad_norm": 0.0, - "learning_rate": 1.6863753289068612e-05, - "loss": 0.9047, + "learning_rate": 1.6872719817252015e-05, + "loss": 0.9689, "step": 9918 }, { - "epoch": 0.28146992054483544, + "epoch": 0.28107908980135454, "grad_norm": 0.0, - "learning_rate": 1.686308486286437e-05, - "loss": 0.9813, + "learning_rate": 1.6872053106018996e-05, + "loss": 1.0584, "step": 9919 }, { - "epoch": 0.2814982973893303, + "epoch": 0.281107427243617, "grad_norm": 0.0, - "learning_rate": 1.686241637868734e-05, - "loss": 0.9482, + "learning_rate": 1.687138633690012e-05, + "loss": 1.1068, "step": 9920 }, { - "epoch": 0.2815266742338252, + "epoch": 0.2811357646858795, "grad_norm": 0.0, - "learning_rate": 1.686174783654317e-05, - "loss": 1.01, + "learning_rate": 1.6870719509901003e-05, + "loss": 1.0257, "step": 9921 }, { - "epoch": 0.2815550510783201, + "epoch": 0.2811641021281419, "grad_norm": 0.0, - "learning_rate": 1.6861079236437503e-05, - "loss": 0.9663, + "learning_rate": 1.6870052625027263e-05, + "loss": 1.0669, "step": 9922 }, { - "epoch": 0.281583427922815, + "epoch": 0.28119243957040435, "grad_norm": 0.0, - "learning_rate": 1.6860410578375997e-05, - "loss": 0.9584, + "learning_rate": 1.6869385682284524e-05, + "loss": 0.9558, "step": 9923 }, { - "epoch": 0.28161180476730985, + "epoch": 0.28122077701266684, "grad_norm": 0.0, - "learning_rate": 1.6859741862364294e-05, - "loss": 0.9766, + "learning_rate": 1.6868718681678397e-05, + "loss": 0.9475, "step": 9924 }, { - "epoch": 0.28164018161180476, + "epoch": 0.2812491144549293, "grad_norm": 0.0, - "learning_rate": 1.6859073088408043e-05, - "loss": 0.9721, + "learning_rate": 1.6868051623214497e-05, + "loss": 1.056, "step": 9925 }, { - "epoch": 0.2816685584562997, + "epoch": 0.2812774518971918, "grad_norm": 0.0, - "learning_rate": 1.685840425651289e-05, - "loss": 0.9644, + "learning_rate": 1.6867384506898458e-05, + "loss": 0.91, "step": 9926 }, { - "epoch": 0.28169693530079454, + "epoch": 0.2813057893394542, "grad_norm": 0.0, - "learning_rate": 1.6857735366684492e-05, - "loss": 0.8982, + "learning_rate": 1.686671733273588e-05, + "loss": 0.8912, "step": 9927 }, { - "epoch": 0.28172531214528945, + "epoch": 0.2813341267817167, "grad_norm": 0.0, - "learning_rate": 1.6857066418928495e-05, - "loss": 0.9355, + "learning_rate": 1.6866050100732395e-05, + "loss": 1.0344, "step": 9928 }, { - "epoch": 0.2817536889897843, + "epoch": 0.28136246422397915, "grad_norm": 0.0, - "learning_rate": 1.6856397413250548e-05, - "loss": 1.0898, + "learning_rate": 1.686538281089362e-05, + "loss": 1.0446, "step": 9929 }, { - "epoch": 0.2817820658342792, + "epoch": 0.2813908016662416, "grad_norm": 0.0, - "learning_rate": 1.68557283496563e-05, - "loss": 1.0086, + "learning_rate": 1.6864715463225177e-05, + "loss": 1.0468, "step": 9930 }, { - "epoch": 0.28181044267877414, + "epoch": 0.2814191391085041, "grad_norm": 0.0, - "learning_rate": 1.6855059228151413e-05, - "loss": 0.9656, + "learning_rate": 1.6864048057732686e-05, + "loss": 0.9461, "step": 9931 }, { - "epoch": 0.281838819523269, + "epoch": 0.2814474765507665, "grad_norm": 0.0, - "learning_rate": 1.685439004874153e-05, - "loss": 0.9312, + "learning_rate": 1.6863380594421766e-05, + "loss": 0.9482, "step": 9932 }, { - "epoch": 0.2818671963677639, + "epoch": 0.281475813993029, "grad_norm": 0.0, - "learning_rate": 1.6853720811432307e-05, - "loss": 0.8835, + "learning_rate": 1.686271307329805e-05, + "loss": 1.0374, "step": 9933 }, { - "epoch": 0.2818955732122588, + "epoch": 0.28150415143529145, "grad_norm": 0.0, - "learning_rate": 1.68530515162294e-05, - "loss": 0.8972, + "learning_rate": 1.686204549436715e-05, + "loss": 0.9187, "step": 9934 }, { - "epoch": 0.2819239500567537, + "epoch": 0.2815324888775539, "grad_norm": 0.0, - "learning_rate": 1.6852382163138453e-05, - "loss": 0.9809, + "learning_rate": 1.686137785763469e-05, + "loss": 0.9188, "step": 9935 }, { - "epoch": 0.2819523269012486, + "epoch": 0.2815608263198164, "grad_norm": 0.0, - "learning_rate": 1.6851712752165124e-05, - "loss": 0.9589, + "learning_rate": 1.68607101631063e-05, + "loss": 0.9907, "step": 9936 }, { - "epoch": 0.28198070374574347, + "epoch": 0.2815891637620788, "grad_norm": 0.0, - "learning_rate": 1.6851043283315075e-05, - "loss": 0.8887, + "learning_rate": 1.6860042410787597e-05, + "loss": 1.0319, "step": 9937 }, { - "epoch": 0.2820090805902384, + "epoch": 0.2816175012043413, "grad_norm": 0.0, - "learning_rate": 1.6850373756593952e-05, - "loss": 0.952, + "learning_rate": 1.685937460068421e-05, + "loss": 1.0049, "step": 9938 }, { - "epoch": 0.28203745743473324, + "epoch": 0.28164583864660375, "grad_norm": 0.0, - "learning_rate": 1.6849704172007414e-05, - "loss": 0.9364, + "learning_rate": 1.6858706732801767e-05, + "loss": 1.0235, "step": 9939 }, { - "epoch": 0.28206583427922816, + "epoch": 0.28167417608886625, "grad_norm": 0.0, - "learning_rate": 1.6849034529561116e-05, - "loss": 0.8024, + "learning_rate": 1.685803880714589e-05, + "loss": 1.0339, "step": 9940 }, { - "epoch": 0.282094211123723, + "epoch": 0.2817025135311287, "grad_norm": 0.0, - "learning_rate": 1.684836482926072e-05, - "loss": 0.9613, + "learning_rate": 1.6857370823722204e-05, + "loss": 1.0149, "step": 9941 }, { - "epoch": 0.28212258796821793, + "epoch": 0.2817308509733911, "grad_norm": 0.0, - "learning_rate": 1.6847695071111876e-05, - "loss": 1.0187, + "learning_rate": 1.6856702782536335e-05, + "loss": 1.0165, "step": 9942 }, { - "epoch": 0.28215096481271285, + "epoch": 0.2817591884156536, "grad_norm": 0.0, - "learning_rate": 1.6847025255120244e-05, - "loss": 0.9966, + "learning_rate": 1.6856034683593917e-05, + "loss": 0.8699, "step": 9943 }, { - "epoch": 0.2821793416572077, + "epoch": 0.28178752585791605, "grad_norm": 0.0, - "learning_rate": 1.684635538129148e-05, - "loss": 0.9998, + "learning_rate": 1.685536652690057e-05, + "loss": 1.1215, "step": 9944 }, { - "epoch": 0.2822077185017026, + "epoch": 0.28181586330017855, "grad_norm": 0.0, - "learning_rate": 1.6845685449631247e-05, - "loss": 1.0488, + "learning_rate": 1.6854698312461924e-05, + "loss": 1.051, "step": 9945 }, { - "epoch": 0.2822360953461975, + "epoch": 0.281844200742441, "grad_norm": 0.0, - "learning_rate": 1.68450154601452e-05, - "loss": 0.9566, + "learning_rate": 1.685403004028361e-05, + "loss": 1.0014, "step": 9946 }, { - "epoch": 0.2822644721906924, + "epoch": 0.2818725381847034, "grad_norm": 0.0, - "learning_rate": 1.6844345412838997e-05, - "loss": 0.995, + "learning_rate": 1.6853361710371256e-05, + "loss": 1.0085, "step": 9947 }, { - "epoch": 0.2822928490351873, + "epoch": 0.2819008756269659, "grad_norm": 0.0, - "learning_rate": 1.6843675307718306e-05, - "loss": 0.935, + "learning_rate": 1.6852693322730493e-05, + "loss": 0.9342, "step": 9948 }, { - "epoch": 0.28232122587968217, + "epoch": 0.28192921306922836, "grad_norm": 0.0, - "learning_rate": 1.6843005144788778e-05, - "loss": 0.9924, + "learning_rate": 1.6852024877366945e-05, + "loss": 0.9982, "step": 9949 }, { - "epoch": 0.2823496027241771, + "epoch": 0.28195755051149085, "grad_norm": 0.0, - "learning_rate": 1.6842334924056082e-05, - "loss": 0.9381, + "learning_rate": 1.685135637428625e-05, + "loss": 0.9873, "step": 9950 }, { - "epoch": 0.28237797956867194, + "epoch": 0.2819858879537533, "grad_norm": 0.0, - "learning_rate": 1.684166464552587e-05, - "loss": 0.8763, + "learning_rate": 1.6850687813494036e-05, + "loss": 0.9203, "step": 9951 }, { - "epoch": 0.28240635641316686, + "epoch": 0.2820142253960158, "grad_norm": 0.0, - "learning_rate": 1.6840994309203815e-05, - "loss": 0.8757, + "learning_rate": 1.685001919499593e-05, + "loss": 1.0881, "step": 9952 }, { - "epoch": 0.2824347332576618, + "epoch": 0.2820425628382782, "grad_norm": 0.0, - "learning_rate": 1.6840323915095566e-05, - "loss": 0.9778, + "learning_rate": 1.6849350518797575e-05, + "loss": 0.9367, "step": 9953 }, { - "epoch": 0.28246311010215663, + "epoch": 0.28207090028054066, "grad_norm": 0.0, - "learning_rate": 1.68396534632068e-05, - "loss": 0.9757, + "learning_rate": 1.6848681784904597e-05, + "loss": 0.9549, "step": 9954 }, { - "epoch": 0.28249148694665155, + "epoch": 0.28209923772280315, "grad_norm": 0.0, - "learning_rate": 1.683898295354317e-05, - "loss": 0.9632, + "learning_rate": 1.6848012993322627e-05, + "loss": 1.1149, "step": 9955 }, { - "epoch": 0.2825198637911464, + "epoch": 0.2821275751650656, "grad_norm": 0.0, - "learning_rate": 1.6838312386110347e-05, - "loss": 0.881, + "learning_rate": 1.68473441440573e-05, + "loss": 1.0173, "step": 9956 }, { - "epoch": 0.2825482406356413, + "epoch": 0.2821559126073281, "grad_norm": 0.0, - "learning_rate": 1.683764176091399e-05, - "loss": 0.958, + "learning_rate": 1.684667523711425e-05, + "loss": 0.9378, "step": 9957 }, { - "epoch": 0.2825766174801362, + "epoch": 0.2821842500495905, "grad_norm": 0.0, - "learning_rate": 1.6836971077959766e-05, - "loss": 0.9832, + "learning_rate": 1.6846006272499113e-05, + "loss": 0.9987, "step": 9958 }, { - "epoch": 0.2826049943246311, + "epoch": 0.28221258749185296, "grad_norm": 0.0, - "learning_rate": 1.6836300337253337e-05, - "loss": 1.0065, + "learning_rate": 1.6845337250217525e-05, + "loss": 0.9252, "step": 9959 }, { - "epoch": 0.282633371169126, + "epoch": 0.28224092493411546, "grad_norm": 0.0, - "learning_rate": 1.6835629538800376e-05, - "loss": 0.9143, + "learning_rate": 1.6844668170275117e-05, + "loss": 0.9638, "step": 9960 }, { - "epoch": 0.2826617480136209, + "epoch": 0.2822692623763779, "grad_norm": 0.0, - "learning_rate": 1.6834958682606545e-05, - "loss": 1.0318, + "learning_rate": 1.6843999032677525e-05, + "loss": 0.9483, "step": 9961 }, { - "epoch": 0.2826901248581158, + "epoch": 0.2822975998186404, "grad_norm": 0.0, - "learning_rate": 1.6834287768677507e-05, - "loss": 0.9108, + "learning_rate": 1.6843329837430393e-05, + "loss": 0.8958, "step": 9962 }, { - "epoch": 0.28271850170261065, + "epoch": 0.2823259372609028, "grad_norm": 0.0, - "learning_rate": 1.6833616797018936e-05, - "loss": 0.9397, + "learning_rate": 1.6842660584539352e-05, + "loss": 0.9672, "step": 9963 }, { - "epoch": 0.28274687854710556, + "epoch": 0.2823542747031653, "grad_norm": 0.0, - "learning_rate": 1.6832945767636496e-05, - "loss": 0.8994, + "learning_rate": 1.6841991274010037e-05, + "loss": 0.9132, "step": 9964 }, { - "epoch": 0.2827752553916005, + "epoch": 0.28238261214542776, "grad_norm": 0.0, - "learning_rate": 1.6832274680535855e-05, - "loss": 1.054, + "learning_rate": 1.6841321905848088e-05, + "loss": 0.9473, "step": 9965 }, { - "epoch": 0.28280363223609534, + "epoch": 0.2824109495876902, "grad_norm": 0.0, - "learning_rate": 1.6831603535722682e-05, - "loss": 0.9495, + "learning_rate": 1.684065248005915e-05, + "loss": 0.9161, "step": 9966 }, { - "epoch": 0.28283200908059025, + "epoch": 0.2824392870299527, "grad_norm": 0.0, - "learning_rate": 1.683093233320265e-05, - "loss": 0.9698, + "learning_rate": 1.6839982996648848e-05, + "loss": 1.0583, "step": 9967 }, { - "epoch": 0.2828603859250851, + "epoch": 0.28246762447221513, "grad_norm": 0.0, - "learning_rate": 1.6830261072981423e-05, - "loss": 0.9318, + "learning_rate": 1.6839313455622836e-05, + "loss": 1.012, "step": 9968 }, { - "epoch": 0.28288876276958, + "epoch": 0.2824959619144776, "grad_norm": 0.0, - "learning_rate": 1.6829589755064673e-05, - "loss": 0.9869, + "learning_rate": 1.6838643856986746e-05, + "loss": 0.9055, "step": 9969 }, { - "epoch": 0.28291713961407494, + "epoch": 0.28252429935674006, "grad_norm": 0.0, - "learning_rate": 1.6828918379458072e-05, - "loss": 0.9861, + "learning_rate": 1.6837974200746218e-05, + "loss": 0.9911, "step": 9970 }, { - "epoch": 0.2829455164585698, + "epoch": 0.2825526367990025, "grad_norm": 0.0, - "learning_rate": 1.6828246946167293e-05, - "loss": 0.8488, + "learning_rate": 1.683730448690689e-05, + "loss": 0.9039, "step": 9971 }, { - "epoch": 0.2829738933030647, + "epoch": 0.282580974241265, "grad_norm": 0.0, - "learning_rate": 1.6827575455198002e-05, - "loss": 0.8104, + "learning_rate": 1.6836634715474413e-05, + "loss": 0.9979, "step": 9972 }, { - "epoch": 0.2830022701475596, + "epoch": 0.28260931168352743, "grad_norm": 0.0, - "learning_rate": 1.6826903906555875e-05, - "loss": 0.9307, + "learning_rate": 1.683596488645442e-05, + "loss": 0.9812, "step": 9973 }, { - "epoch": 0.2830306469920545, + "epoch": 0.2826376491257899, "grad_norm": 0.0, - "learning_rate": 1.6826232300246585e-05, - "loss": 0.8825, + "learning_rate": 1.6835294999852556e-05, + "loss": 0.9644, "step": 9974 }, { - "epoch": 0.28305902383654935, + "epoch": 0.28266598656805236, "grad_norm": 0.0, - "learning_rate": 1.6825560636275806e-05, - "loss": 0.903, + "learning_rate": 1.6834625055674467e-05, + "loss": 0.9332, "step": 9975 }, { - "epoch": 0.28308740068104427, + "epoch": 0.2826943240103148, "grad_norm": 0.0, - "learning_rate": 1.6824888914649205e-05, - "loss": 0.9882, + "learning_rate": 1.6833955053925792e-05, + "loss": 1.0124, "step": 9976 }, { - "epoch": 0.2831157775255392, + "epoch": 0.2827226614525773, "grad_norm": 0.0, - "learning_rate": 1.682421713537246e-05, - "loss": 0.9249, + "learning_rate": 1.6833284994612175e-05, + "loss": 0.9141, "step": 9977 }, { - "epoch": 0.28314415437003404, + "epoch": 0.28275099889483973, "grad_norm": 0.0, - "learning_rate": 1.6823545298451248e-05, - "loss": 0.9248, + "learning_rate": 1.683261487773926e-05, + "loss": 1.0311, "step": 9978 }, { - "epoch": 0.28317253121452896, + "epoch": 0.2827793363371022, "grad_norm": 0.0, - "learning_rate": 1.6822873403891246e-05, - "loss": 0.8915, + "learning_rate": 1.6831944703312694e-05, + "loss": 0.9372, "step": 9979 }, { - "epoch": 0.2832009080590238, + "epoch": 0.28280767377936467, "grad_norm": 0.0, - "learning_rate": 1.6822201451698123e-05, - "loss": 0.9709, + "learning_rate": 1.6831274471338122e-05, + "loss": 0.9591, "step": 9980 }, { - "epoch": 0.28322928490351873, + "epoch": 0.28283601122162716, "grad_norm": 0.0, - "learning_rate": 1.6821529441877557e-05, - "loss": 0.9589, + "learning_rate": 1.6830604181821188e-05, + "loss": 0.9752, "step": 9981 }, { - "epoch": 0.28325766174801364, + "epoch": 0.2828643486638896, "grad_norm": 0.0, - "learning_rate": 1.6820857374435223e-05, - "loss": 0.9381, + "learning_rate": 1.6829933834767537e-05, + "loss": 0.8926, "step": 9982 }, { - "epoch": 0.2832860385925085, + "epoch": 0.28289268610615204, "grad_norm": 0.0, - "learning_rate": 1.6820185249376804e-05, - "loss": 0.9118, + "learning_rate": 1.682926343018282e-05, + "loss": 0.9109, "step": 9983 }, { - "epoch": 0.2833144154370034, + "epoch": 0.28292102354841453, "grad_norm": 0.0, - "learning_rate": 1.6819513066707967e-05, - "loss": 0.9688, + "learning_rate": 1.682859296807268e-05, + "loss": 1.064, "step": 9984 }, { - "epoch": 0.2833427922814983, + "epoch": 0.28294936099067697, "grad_norm": 0.0, - "learning_rate": 1.6818840826434405e-05, - "loss": 1.0194, + "learning_rate": 1.682792244844276e-05, + "loss": 0.9354, "step": 9985 }, { - "epoch": 0.2833711691259932, + "epoch": 0.28297769843293946, "grad_norm": 0.0, - "learning_rate": 1.6818168528561783e-05, - "loss": 1.0255, + "learning_rate": 1.6827251871298723e-05, + "loss": 1.0271, "step": 9986 }, { - "epoch": 0.2833995459704881, + "epoch": 0.2830060358752019, "grad_norm": 0.0, - "learning_rate": 1.6817496173095786e-05, - "loss": 0.895, + "learning_rate": 1.68265812366462e-05, + "loss": 0.9611, "step": 9987 }, { - "epoch": 0.28342792281498297, + "epoch": 0.28303437331746434, "grad_norm": 0.0, - "learning_rate": 1.6816823760042092e-05, - "loss": 0.9354, + "learning_rate": 1.6825910544490852e-05, + "loss": 0.9208, "step": 9988 }, { - "epoch": 0.2834562996594779, + "epoch": 0.28306271075972683, "grad_norm": 0.0, - "learning_rate": 1.681615128940638e-05, - "loss": 0.8889, + "learning_rate": 1.6825239794838326e-05, + "loss": 0.8386, "step": 9989 }, { - "epoch": 0.28348467650397274, + "epoch": 0.28309104820198927, "grad_norm": 0.0, - "learning_rate": 1.681547876119434e-05, - "loss": 1.0103, + "learning_rate": 1.6824568987694268e-05, + "loss": 1.0533, "step": 9990 }, { - "epoch": 0.28351305334846766, + "epoch": 0.28311938564425176, "grad_norm": 0.0, - "learning_rate": 1.6814806175411633e-05, - "loss": 1.0701, + "learning_rate": 1.6823898123064334e-05, + "loss": 0.9793, "step": 9991 }, { - "epoch": 0.2835414301929625, + "epoch": 0.2831477230865142, "grad_norm": 0.0, - "learning_rate": 1.6814133532063955e-05, - "loss": 1.0123, + "learning_rate": 1.682322720095417e-05, + "loss": 1.0149, "step": 9992 }, { - "epoch": 0.28356980703745743, + "epoch": 0.2831760605287767, "grad_norm": 0.0, - "learning_rate": 1.681346083115699e-05, - "loss": 1.0108, + "learning_rate": 1.6822556221369432e-05, + "loss": 0.8599, "step": 9993 }, { - "epoch": 0.28359818388195235, + "epoch": 0.28320439797103913, "grad_norm": 0.0, - "learning_rate": 1.681278807269641e-05, - "loss": 0.893, + "learning_rate": 1.6821885184315767e-05, + "loss": 0.9391, "step": 9994 }, { - "epoch": 0.2836265607264472, + "epoch": 0.2832327354133016, "grad_norm": 0.0, - "learning_rate": 1.6812115256687906e-05, - "loss": 0.9708, + "learning_rate": 1.682121408979883e-05, + "loss": 0.9196, "step": 9995 }, { - "epoch": 0.2836549375709421, + "epoch": 0.28326107285556407, "grad_norm": 0.0, - "learning_rate": 1.6811442383137154e-05, - "loss": 1.0083, + "learning_rate": 1.6820542937824272e-05, + "loss": 1.0133, "step": 9996 }, { - "epoch": 0.283683314415437, + "epoch": 0.2832894102978265, "grad_norm": 0.0, - "learning_rate": 1.6810769452049846e-05, - "loss": 0.9977, + "learning_rate": 1.6819871728397755e-05, + "loss": 0.9131, "step": 9997 }, { - "epoch": 0.2837116912599319, + "epoch": 0.283317747740089, "grad_norm": 0.0, - "learning_rate": 1.681009646343166e-05, - "loss": 0.9319, + "learning_rate": 1.6819200461524922e-05, + "loss": 0.9574, "step": 9998 }, { - "epoch": 0.2837400681044268, + "epoch": 0.28334608518235144, "grad_norm": 0.0, - "learning_rate": 1.6809423417288283e-05, - "loss": 0.8995, + "learning_rate": 1.6818529137211427e-05, + "loss": 0.9421, "step": 9999 }, { - "epoch": 0.28376844494892167, + "epoch": 0.2833744226246139, "grad_norm": 0.0, - "learning_rate": 1.68087503136254e-05, - "loss": 0.8365, + "learning_rate": 1.6817857755462932e-05, + "loss": 1.02, "step": 10000 }, { - "epoch": 0.2837968217934166, + "epoch": 0.28340276006687637, "grad_norm": 0.0, - "learning_rate": 1.68080771524487e-05, - "loss": 0.9613, + "learning_rate": 1.681718631628509e-05, + "loss": 0.9177, "step": 10001 }, { - "epoch": 0.28382519863791145, + "epoch": 0.2834310975091388, "grad_norm": 0.0, - "learning_rate": 1.6807403933763865e-05, - "loss": 0.9199, + "learning_rate": 1.6816514819683557e-05, + "loss": 1.038, "step": 10002 }, { - "epoch": 0.28385357548240636, + "epoch": 0.2834594349514013, "grad_norm": 0.0, - "learning_rate": 1.6806730657576585e-05, - "loss": 1.0168, + "learning_rate": 1.681584326566399e-05, + "loss": 0.9987, "step": 10003 }, { - "epoch": 0.2838819523269012, + "epoch": 0.28348777239366374, "grad_norm": 0.0, - "learning_rate": 1.6806057323892542e-05, - "loss": 0.8998, + "learning_rate": 1.681517165423204e-05, + "loss": 0.9448, "step": 10004 }, { - "epoch": 0.28391032917139614, + "epoch": 0.28351610983592623, "grad_norm": 0.0, - "learning_rate": 1.6805383932717428e-05, - "loss": 1.0095, + "learning_rate": 1.681449998539337e-05, + "loss": 1.0686, "step": 10005 }, { - "epoch": 0.28393870601589105, + "epoch": 0.28354444727818867, "grad_norm": 0.0, - "learning_rate": 1.6804710484056926e-05, - "loss": 0.9357, + "learning_rate": 1.6813828259153638e-05, + "loss": 1.0425, "step": 10006 }, { - "epoch": 0.2839670828603859, + "epoch": 0.2835727847204511, "grad_norm": 0.0, - "learning_rate": 1.6804036977916735e-05, - "loss": 0.921, + "learning_rate": 1.6813156475518496e-05, + "loss": 0.9367, "step": 10007 }, { - "epoch": 0.2839954597048808, + "epoch": 0.2836011221627136, "grad_norm": 0.0, - "learning_rate": 1.6803363414302535e-05, - "loss": 1.0444, + "learning_rate": 1.6812484634493612e-05, + "loss": 0.9123, "step": 10008 }, { - "epoch": 0.2840238365493757, + "epoch": 0.28362945960497604, "grad_norm": 0.0, - "learning_rate": 1.6802689793220017e-05, - "loss": 1.0453, + "learning_rate": 1.6811812736084635e-05, + "loss": 0.9482, "step": 10009 }, { - "epoch": 0.2840522133938706, + "epoch": 0.28365779704723854, "grad_norm": 0.0, - "learning_rate": 1.6802016114674874e-05, - "loss": 0.943, + "learning_rate": 1.6811140780297236e-05, + "loss": 0.9202, "step": 10010 }, { - "epoch": 0.2840805902383655, + "epoch": 0.283686134489501, "grad_norm": 0.0, - "learning_rate": 1.6801342378672797e-05, - "loss": 1.1157, + "learning_rate": 1.6810468767137066e-05, + "loss": 0.894, "step": 10011 }, { - "epoch": 0.2841089670828604, + "epoch": 0.2837144719317634, "grad_norm": 0.0, - "learning_rate": 1.6800668585219474e-05, - "loss": 0.8655, + "learning_rate": 1.6809796696609784e-05, + "loss": 0.9041, "step": 10012 }, { - "epoch": 0.2841373439273553, + "epoch": 0.2837428093740259, "grad_norm": 0.0, - "learning_rate": 1.6799994734320596e-05, - "loss": 0.8215, + "learning_rate": 1.6809124568721062e-05, + "loss": 0.8869, "step": 10013 }, { - "epoch": 0.28416572077185015, + "epoch": 0.28377114681628834, "grad_norm": 0.0, - "learning_rate": 1.6799320825981856e-05, - "loss": 1.0405, + "learning_rate": 1.680845238347655e-05, + "loss": 0.9244, "step": 10014 }, { - "epoch": 0.28419409761634506, + "epoch": 0.28379948425855084, "grad_norm": 0.0, - "learning_rate": 1.679864686020895e-05, - "loss": 1.0312, + "learning_rate": 1.6807780140881922e-05, + "loss": 0.9829, "step": 10015 }, { - "epoch": 0.28422247446084, + "epoch": 0.2838278217008133, "grad_norm": 0.0, - "learning_rate": 1.6797972837007568e-05, - "loss": 0.9077, + "learning_rate": 1.6807107840942827e-05, + "loss": 0.9624, "step": 10016 }, { - "epoch": 0.28425085130533484, + "epoch": 0.28385615914307577, "grad_norm": 0.0, - "learning_rate": 1.6797298756383404e-05, - "loss": 1.0053, + "learning_rate": 1.6806435483664942e-05, + "loss": 0.8937, "step": 10017 }, { - "epoch": 0.28427922814982975, + "epoch": 0.2838844965853382, "grad_norm": 0.0, - "learning_rate": 1.679662461834215e-05, - "loss": 0.9331, + "learning_rate": 1.6805763069053917e-05, + "loss": 1.0329, "step": 10018 }, { - "epoch": 0.2843076049943246, + "epoch": 0.28391283402760065, "grad_norm": 0.0, - "learning_rate": 1.6795950422889503e-05, - "loss": 0.8827, + "learning_rate": 1.6805090597115424e-05, + "loss": 0.9169, "step": 10019 }, { - "epoch": 0.28433598183881953, + "epoch": 0.28394117146986314, "grad_norm": 0.0, - "learning_rate": 1.6795276170031157e-05, - "loss": 0.9426, + "learning_rate": 1.680441806785513e-05, + "loss": 0.9233, "step": 10020 }, { - "epoch": 0.2843643586833144, + "epoch": 0.2839695089121256, "grad_norm": 0.0, - "learning_rate": 1.679460185977281e-05, - "loss": 1.0273, + "learning_rate": 1.680374548127869e-05, + "loss": 0.8896, "step": 10021 }, { - "epoch": 0.2843927355278093, + "epoch": 0.2839978463543881, "grad_norm": 0.0, - "learning_rate": 1.679392749212015e-05, - "loss": 0.8826, + "learning_rate": 1.680307283739178e-05, + "loss": 1.0608, "step": 10022 }, { - "epoch": 0.2844211123723042, + "epoch": 0.2840261837966505, "grad_norm": 0.0, - "learning_rate": 1.6793253067078886e-05, - "loss": 1.0435, + "learning_rate": 1.6802400136200056e-05, + "loss": 0.9021, "step": 10023 }, { - "epoch": 0.2844494892167991, + "epoch": 0.28405452123891295, "grad_norm": 0.0, - "learning_rate": 1.6792578584654706e-05, - "loss": 0.9376, + "learning_rate": 1.6801727377709195e-05, + "loss": 0.9568, "step": 10024 }, { - "epoch": 0.284477866061294, + "epoch": 0.28408285868117544, "grad_norm": 0.0, - "learning_rate": 1.679190404485331e-05, - "loss": 0.9648, + "learning_rate": 1.6801054561924857e-05, + "loss": 1.0157, "step": 10025 }, { - "epoch": 0.28450624290578885, + "epoch": 0.2841111961234379, "grad_norm": 0.0, - "learning_rate": 1.679122944768039e-05, - "loss": 0.9939, + "learning_rate": 1.680038168885271e-05, + "loss": 0.881, "step": 10026 }, { - "epoch": 0.28453461975028377, + "epoch": 0.2841395335657004, "grad_norm": 0.0, - "learning_rate": 1.6790554793141654e-05, - "loss": 0.9768, + "learning_rate": 1.6799708758498424e-05, + "loss": 1.0162, "step": 10027 }, { - "epoch": 0.2845629965947787, + "epoch": 0.2841678710079628, "grad_norm": 0.0, - "learning_rate": 1.6789880081242794e-05, - "loss": 0.93, + "learning_rate": 1.6799035770867665e-05, + "loss": 1.0022, "step": 10028 }, { - "epoch": 0.28459137343927354, + "epoch": 0.2841962084502253, "grad_norm": 0.0, - "learning_rate": 1.6789205311989518e-05, - "loss": 0.9234, + "learning_rate": 1.6798362725966102e-05, + "loss": 1.0135, "step": 10029 }, { - "epoch": 0.28461975028376846, + "epoch": 0.28422454589248775, "grad_norm": 0.0, - "learning_rate": 1.6788530485387513e-05, - "loss": 0.9963, + "learning_rate": 1.6797689623799406e-05, + "loss": 0.9313, "step": 10030 }, { - "epoch": 0.2846481271282633, + "epoch": 0.2842528833347502, "grad_norm": 0.0, - "learning_rate": 1.678785560144249e-05, - "loss": 0.9396, + "learning_rate": 1.679701646437325e-05, + "loss": 1.0436, "step": 10031 }, { - "epoch": 0.28467650397275823, + "epoch": 0.2842812207770127, "grad_norm": 0.0, - "learning_rate": 1.678718066016014e-05, - "loss": 1.0046, + "learning_rate": 1.6796343247693293e-05, + "loss": 1.0183, "step": 10032 }, { - "epoch": 0.28470488081725315, + "epoch": 0.2843095582192751, "grad_norm": 0.0, - "learning_rate": 1.6786505661546172e-05, - "loss": 0.9563, + "learning_rate": 1.6795669973765218e-05, + "loss": 1.1303, "step": 10033 }, { - "epoch": 0.284733257661748, + "epoch": 0.2843378956615376, "grad_norm": 0.0, - "learning_rate": 1.6785830605606287e-05, - "loss": 1.0193, + "learning_rate": 1.679499664259469e-05, + "loss": 0.7888, "step": 10034 }, { - "epoch": 0.2847616345062429, + "epoch": 0.28436623310380005, "grad_norm": 0.0, - "learning_rate": 1.6785155492346188e-05, - "loss": 0.8937, + "learning_rate": 1.679432325418738e-05, + "loss": 1.0547, "step": 10035 }, { - "epoch": 0.2847900113507378, + "epoch": 0.2843945705460625, "grad_norm": 0.0, - "learning_rate": 1.6784480321771574e-05, - "loss": 0.9768, + "learning_rate": 1.6793649808548966e-05, + "loss": 1.0093, "step": 10036 }, { - "epoch": 0.2848183881952327, + "epoch": 0.284422907988325, "grad_norm": 0.0, - "learning_rate": 1.6783805093888144e-05, - "loss": 1.0139, + "learning_rate": 1.6792976305685115e-05, + "loss": 0.8876, "step": 10037 }, { - "epoch": 0.28484676503972756, + "epoch": 0.2844512454305874, "grad_norm": 0.0, - "learning_rate": 1.6783129808701618e-05, - "loss": 0.9794, + "learning_rate": 1.6792302745601505e-05, + "loss": 0.9117, "step": 10038 }, { - "epoch": 0.28487514188422247, + "epoch": 0.2844795828728499, "grad_norm": 0.0, - "learning_rate": 1.6782454466217683e-05, - "loss": 0.9445, + "learning_rate": 1.67916291283038e-05, + "loss": 0.9253, "step": 10039 }, { - "epoch": 0.2849035187287174, + "epoch": 0.28450792031511235, "grad_norm": 0.0, - "learning_rate": 1.678177906644205e-05, - "loss": 1.0317, + "learning_rate": 1.6790955453797687e-05, + "loss": 1.0265, "step": 10040 }, { - "epoch": 0.28493189557321225, + "epoch": 0.28453625775737484, "grad_norm": 0.0, - "learning_rate": 1.6781103609380425e-05, - "loss": 1.0329, + "learning_rate": 1.6790281722088834e-05, + "loss": 1.0347, "step": 10041 }, { - "epoch": 0.28496027241770716, + "epoch": 0.2845645951996373, "grad_norm": 0.0, - "learning_rate": 1.6780428095038512e-05, - "loss": 0.9013, + "learning_rate": 1.6789607933182912e-05, + "loss": 0.9044, "step": 10042 }, { - "epoch": 0.284988649262202, + "epoch": 0.2845929326418997, "grad_norm": 0.0, - "learning_rate": 1.6779752523422025e-05, - "loss": 0.9462, + "learning_rate": 1.6788934087085606e-05, + "loss": 0.8865, "step": 10043 }, { - "epoch": 0.28501702610669694, + "epoch": 0.2846212700841622, "grad_norm": 0.0, - "learning_rate": 1.6779076894536657e-05, - "loss": 0.9559, + "learning_rate": 1.6788260183802586e-05, + "loss": 0.8337, "step": 10044 }, { - "epoch": 0.28504540295119185, + "epoch": 0.28464960752642465, "grad_norm": 0.0, - "learning_rate": 1.677840120838812e-05, - "loss": 0.8673, + "learning_rate": 1.678758622333953e-05, + "loss": 0.9079, "step": 10045 }, { - "epoch": 0.2850737797956867, + "epoch": 0.28467794496868715, "grad_norm": 0.0, - "learning_rate": 1.6777725464982127e-05, - "loss": 0.9176, + "learning_rate": 1.6786912205702114e-05, + "loss": 1.0497, "step": 10046 }, { - "epoch": 0.2851021566401816, + "epoch": 0.2847062824109496, "grad_norm": 0.0, - "learning_rate": 1.677704966432438e-05, - "loss": 0.9131, + "learning_rate": 1.6786238130896016e-05, + "loss": 1.1371, "step": 10047 }, { - "epoch": 0.2851305334846765, + "epoch": 0.284734619853212, "grad_norm": 0.0, - "learning_rate": 1.677637380642059e-05, - "loss": 1.0452, + "learning_rate": 1.678556399892691e-05, + "loss": 1.0304, "step": 10048 }, { - "epoch": 0.2851589103291714, + "epoch": 0.2847629572954745, "grad_norm": 0.0, - "learning_rate": 1.677569789127647e-05, - "loss": 0.9289, + "learning_rate": 1.678488980980048e-05, + "loss": 0.9253, "step": 10049 }, { - "epoch": 0.2851872871736663, + "epoch": 0.28479129473773696, "grad_norm": 0.0, - "learning_rate": 1.677502191889772e-05, - "loss": 0.9775, + "learning_rate": 1.678421556352241e-05, + "loss": 0.948, "step": 10050 }, { - "epoch": 0.2852156640181612, + "epoch": 0.28481963217999945, "grad_norm": 0.0, - "learning_rate": 1.6774345889290062e-05, - "loss": 0.8712, + "learning_rate": 1.678354126009837e-05, + "loss": 0.9133, "step": 10051 }, { - "epoch": 0.2852440408626561, + "epoch": 0.2848479696222619, "grad_norm": 0.0, - "learning_rate": 1.6773669802459194e-05, - "loss": 0.8479, + "learning_rate": 1.6782866899534043e-05, + "loss": 0.9955, "step": 10052 }, { - "epoch": 0.28527241770715095, + "epoch": 0.2848763070645244, "grad_norm": 0.0, - "learning_rate": 1.6772993658410834e-05, - "loss": 1.0708, + "learning_rate": 1.6782192481835107e-05, + "loss": 0.9262, "step": 10053 }, { - "epoch": 0.28530079455164586, + "epoch": 0.2849046445067868, "grad_norm": 0.0, - "learning_rate": 1.6772317457150694e-05, - "loss": 1.0053, + "learning_rate": 1.6781518007007247e-05, + "loss": 0.961, "step": 10054 }, { - "epoch": 0.2853291713961407, + "epoch": 0.28493298194904926, "grad_norm": 0.0, - "learning_rate": 1.677164119868448e-05, - "loss": 0.8672, + "learning_rate": 1.6780843475056143e-05, + "loss": 0.9571, "step": 10055 }, { - "epoch": 0.28535754824063564, + "epoch": 0.28496131939131175, "grad_norm": 0.0, - "learning_rate": 1.6770964883017913e-05, - "loss": 0.9792, + "learning_rate": 1.678016888598748e-05, + "loss": 0.9668, "step": 10056 }, { - "epoch": 0.28538592508513055, + "epoch": 0.2849896568335742, "grad_norm": 0.0, - "learning_rate": 1.6770288510156697e-05, - "loss": 0.8649, + "learning_rate": 1.6779494239806928e-05, + "loss": 0.8657, "step": 10057 }, { - "epoch": 0.2854143019296254, + "epoch": 0.2850179942758367, "grad_norm": 0.0, - "learning_rate": 1.6769612080106554e-05, - "loss": 1.0617, + "learning_rate": 1.6778819536520184e-05, + "loss": 0.9105, "step": 10058 }, { - "epoch": 0.28544267877412033, + "epoch": 0.2850463317180991, "grad_norm": 0.0, - "learning_rate": 1.676893559287319e-05, - "loss": 0.9629, + "learning_rate": 1.6778144776132927e-05, + "loss": 0.9255, "step": 10059 }, { - "epoch": 0.2854710556186152, + "epoch": 0.28507466916036156, "grad_norm": 0.0, - "learning_rate": 1.6768259048462324e-05, - "loss": 0.9921, + "learning_rate": 1.6777469958650838e-05, + "loss": 0.9422, "step": 10060 }, { - "epoch": 0.2854994324631101, + "epoch": 0.28510300660262405, "grad_norm": 0.0, - "learning_rate": 1.6767582446879668e-05, - "loss": 0.9587, + "learning_rate": 1.67767950840796e-05, + "loss": 0.9306, "step": 10061 }, { - "epoch": 0.285527809307605, + "epoch": 0.2851313440448865, "grad_norm": 0.0, - "learning_rate": 1.676690578813094e-05, - "loss": 0.9879, + "learning_rate": 1.6776120152424905e-05, + "loss": 0.8471, "step": 10062 }, { - "epoch": 0.2855561861520999, + "epoch": 0.285159681487149, "grad_norm": 0.0, - "learning_rate": 1.6766229072221856e-05, - "loss": 1.0021, + "learning_rate": 1.677544516369243e-05, + "loss": 0.9808, "step": 10063 }, { - "epoch": 0.2855845629965948, + "epoch": 0.2851880189294114, "grad_norm": 0.0, - "learning_rate": 1.6765552299158128e-05, - "loss": 0.9613, + "learning_rate": 1.6774770117887866e-05, + "loss": 1.0012, "step": 10064 }, { - "epoch": 0.28561293984108965, + "epoch": 0.2852163563716739, "grad_norm": 0.0, - "learning_rate": 1.6764875468945477e-05, - "loss": 0.9664, + "learning_rate": 1.6774095015016897e-05, + "loss": 0.9202, "step": 10065 }, { - "epoch": 0.28564131668558457, + "epoch": 0.28524469381393636, "grad_norm": 0.0, - "learning_rate": 1.6764198581589622e-05, - "loss": 0.9728, + "learning_rate": 1.6773419855085208e-05, + "loss": 0.8465, "step": 10066 }, { - "epoch": 0.2856696935300795, + "epoch": 0.2852730312561988, "grad_norm": 0.0, - "learning_rate": 1.6763521637096272e-05, - "loss": 0.9936, + "learning_rate": 1.6772744638098495e-05, + "loss": 0.9759, "step": 10067 }, { - "epoch": 0.28569807037457434, + "epoch": 0.2853013686984613, "grad_norm": 0.0, - "learning_rate": 1.6762844635471153e-05, - "loss": 0.9737, + "learning_rate": 1.677206936406243e-05, + "loss": 0.9953, "step": 10068 }, { - "epoch": 0.28572644721906926, + "epoch": 0.2853297061407237, "grad_norm": 0.0, - "learning_rate": 1.6762167576719983e-05, - "loss": 1.0716, + "learning_rate": 1.6771394032982718e-05, + "loss": 1.0675, "step": 10069 }, { - "epoch": 0.2857548240635641, + "epoch": 0.2853580435829862, "grad_norm": 0.0, - "learning_rate": 1.6761490460848476e-05, - "loss": 0.9276, + "learning_rate": 1.6770718644865035e-05, + "loss": 0.8991, "step": 10070 }, { - "epoch": 0.28578320090805903, + "epoch": 0.28538638102524866, "grad_norm": 0.0, - "learning_rate": 1.676081328786236e-05, - "loss": 1.0096, + "learning_rate": 1.677004319971508e-05, + "loss": 1.0221, "step": 10071 }, { - "epoch": 0.2858115777525539, + "epoch": 0.2854147184675111, "grad_norm": 0.0, - "learning_rate": 1.676013605776735e-05, - "loss": 0.8519, + "learning_rate": 1.6769367697538532e-05, + "loss": 0.9689, "step": 10072 }, { - "epoch": 0.2858399545970488, + "epoch": 0.2854430559097736, "grad_norm": 0.0, - "learning_rate": 1.675945877056916e-05, - "loss": 0.962, + "learning_rate": 1.6768692138341086e-05, + "loss": 1.0275, "step": 10073 }, { - "epoch": 0.2858683314415437, + "epoch": 0.28547139335203603, "grad_norm": 0.0, - "learning_rate": 1.6758781426273523e-05, - "loss": 0.8548, + "learning_rate": 1.6768016522128435e-05, + "loss": 0.9031, "step": 10074 }, { - "epoch": 0.2858967082860386, + "epoch": 0.2854997307942985, "grad_norm": 0.0, - "learning_rate": 1.6758104024886156e-05, - "loss": 1.1035, + "learning_rate": 1.6767340848906266e-05, + "loss": 1.0298, "step": 10075 }, { - "epoch": 0.2859250851305335, + "epoch": 0.28552806823656096, "grad_norm": 0.0, - "learning_rate": 1.675742656641278e-05, - "loss": 1.0142, + "learning_rate": 1.676666511868027e-05, + "loss": 0.9675, "step": 10076 }, { - "epoch": 0.28595346197502836, + "epoch": 0.28555640567882346, "grad_norm": 0.0, - "learning_rate": 1.6756749050859117e-05, - "loss": 0.9699, + "learning_rate": 1.6765989331456144e-05, + "loss": 0.9528, "step": 10077 }, { - "epoch": 0.28598183881952327, + "epoch": 0.2855847431210859, "grad_norm": 0.0, - "learning_rate": 1.675607147823089e-05, - "loss": 1.1038, + "learning_rate": 1.6765313487239578e-05, + "loss": 1.0272, "step": 10078 }, { - "epoch": 0.2860102156640182, + "epoch": 0.28561308056334833, "grad_norm": 0.0, - "learning_rate": 1.6755393848533825e-05, - "loss": 0.8659, + "learning_rate": 1.676463758603626e-05, + "loss": 0.9747, "step": 10079 }, { - "epoch": 0.28603859250851305, + "epoch": 0.2856414180056108, "grad_norm": 0.0, - "learning_rate": 1.6754716161773645e-05, - "loss": 0.8988, + "learning_rate": 1.6763961627851894e-05, + "loss": 1.0077, "step": 10080 }, { - "epoch": 0.28606696935300796, + "epoch": 0.28566975544787326, "grad_norm": 0.0, - "learning_rate": 1.6754038417956075e-05, - "loss": 0.8589, + "learning_rate": 1.6763285612692163e-05, + "loss": 0.9808, "step": 10081 }, { - "epoch": 0.2860953461975028, + "epoch": 0.28569809289013576, "grad_norm": 0.0, - "learning_rate": 1.6753360617086835e-05, - "loss": 0.9694, + "learning_rate": 1.676260954056277e-05, + "loss": 0.996, "step": 10082 }, { - "epoch": 0.28612372304199774, + "epoch": 0.2857264303323982, "grad_norm": 0.0, - "learning_rate": 1.6752682759171657e-05, - "loss": 0.8881, + "learning_rate": 1.67619334114694e-05, + "loss": 1.0061, "step": 10083 }, { - "epoch": 0.2861520998864926, + "epoch": 0.28575476777466063, "grad_norm": 0.0, - "learning_rate": 1.6752004844216264e-05, - "loss": 0.9368, + "learning_rate": 1.676125722541776e-05, + "loss": 1.0592, "step": 10084 }, { - "epoch": 0.2861804767309875, + "epoch": 0.28578310521692313, "grad_norm": 0.0, - "learning_rate": 1.675132687222638e-05, - "loss": 1.0561, + "learning_rate": 1.6760580982413538e-05, + "loss": 1.0485, "step": 10085 }, { - "epoch": 0.2862088535754824, + "epoch": 0.28581144265918557, "grad_norm": 0.0, - "learning_rate": 1.6750648843207736e-05, - "loss": 0.974, + "learning_rate": 1.6759904682462428e-05, + "loss": 0.9181, "step": 10086 }, { - "epoch": 0.2862372304199773, + "epoch": 0.28583978010144806, "grad_norm": 0.0, - "learning_rate": 1.674997075716606e-05, - "loss": 1.0264, + "learning_rate": 1.675922832557013e-05, + "loss": 0.8288, "step": 10087 }, { - "epoch": 0.2862656072644722, + "epoch": 0.2858681175437105, "grad_norm": 0.0, - "learning_rate": 1.6749292614107076e-05, - "loss": 0.8469, + "learning_rate": 1.6758551911742346e-05, + "loss": 0.9752, "step": 10088 }, { - "epoch": 0.28629398410896706, + "epoch": 0.285896454985973, "grad_norm": 0.0, - "learning_rate": 1.6748614414036514e-05, - "loss": 0.9269, + "learning_rate": 1.675787544098477e-05, + "loss": 1.0493, "step": 10089 }, { - "epoch": 0.286322360953462, + "epoch": 0.28592479242823543, "grad_norm": 0.0, - "learning_rate": 1.6747936156960102e-05, - "loss": 0.9087, + "learning_rate": 1.6757198913303098e-05, + "loss": 1.0561, "step": 10090 }, { - "epoch": 0.2863507377979569, + "epoch": 0.28595312987049787, "grad_norm": 0.0, - "learning_rate": 1.6747257842883567e-05, - "loss": 0.9964, + "learning_rate": 1.6756522328703026e-05, + "loss": 0.8649, "step": 10091 }, { - "epoch": 0.28637911464245175, + "epoch": 0.28598146731276036, "grad_norm": 0.0, - "learning_rate": 1.6746579471812648e-05, - "loss": 1.0261, + "learning_rate": 1.6755845687190264e-05, + "loss": 0.9174, "step": 10092 }, { - "epoch": 0.28640749148694666, + "epoch": 0.2860098047550228, "grad_norm": 0.0, - "learning_rate": 1.6745901043753065e-05, - "loss": 0.9794, + "learning_rate": 1.67551689887705e-05, + "loss": 0.9084, "step": 10093 }, { - "epoch": 0.2864358683314415, + "epoch": 0.2860381421972853, "grad_norm": 0.0, - "learning_rate": 1.6745222558710556e-05, - "loss": 0.9862, + "learning_rate": 1.6754492233449445e-05, + "loss": 0.9552, "step": 10094 }, { - "epoch": 0.28646424517593644, + "epoch": 0.28606647963954773, "grad_norm": 0.0, - "learning_rate": 1.6744544016690844e-05, - "loss": 0.8842, + "learning_rate": 1.675381542123279e-05, + "loss": 0.9445, "step": 10095 }, { - "epoch": 0.28649262202043135, + "epoch": 0.28609481708181017, "grad_norm": 0.0, - "learning_rate": 1.674386541769967e-05, - "loss": 0.8536, + "learning_rate": 1.675313855212624e-05, + "loss": 1.0234, "step": 10096 }, { - "epoch": 0.2865209988649262, + "epoch": 0.28612315452407266, "grad_norm": 0.0, - "learning_rate": 1.6743186761742758e-05, - "loss": 1.0094, + "learning_rate": 1.6752461626135495e-05, + "loss": 0.9012, "step": 10097 }, { - "epoch": 0.28654937570942113, + "epoch": 0.2861514919663351, "grad_norm": 0.0, - "learning_rate": 1.674250804882585e-05, - "loss": 1.0322, + "learning_rate": 1.675178464326626e-05, + "loss": 0.8626, "step": 10098 }, { - "epoch": 0.286577752553916, + "epoch": 0.2861798294085976, "grad_norm": 0.0, - "learning_rate": 1.6741829278954664e-05, - "loss": 1.0242, + "learning_rate": 1.6751107603524238e-05, + "loss": 0.9519, "step": 10099 }, { - "epoch": 0.2866061293984109, + "epoch": 0.28620816685086004, "grad_norm": 0.0, - "learning_rate": 1.674115045213495e-05, - "loss": 0.9568, + "learning_rate": 1.6750430506915124e-05, + "loss": 1.099, "step": 10100 }, { - "epoch": 0.28663450624290576, + "epoch": 0.28623650429312253, "grad_norm": 0.0, - "learning_rate": 1.674047156837243e-05, - "loss": 1.0736, + "learning_rate": 1.6749753353444634e-05, + "loss": 1.0166, "step": 10101 }, { - "epoch": 0.2866628830874007, + "epoch": 0.28626484173538497, "grad_norm": 0.0, - "learning_rate": 1.673979262767285e-05, - "loss": 0.8956, + "learning_rate": 1.6749076143118457e-05, + "loss": 0.9303, "step": 10102 }, { - "epoch": 0.2866912599318956, + "epoch": 0.2862931791776474, "grad_norm": 0.0, - "learning_rate": 1.6739113630041937e-05, - "loss": 1.0563, + "learning_rate": 1.6748398875942312e-05, + "loss": 1.0431, "step": 10103 }, { - "epoch": 0.28671963677639045, + "epoch": 0.2863215166199099, "grad_norm": 0.0, - "learning_rate": 1.6738434575485425e-05, - "loss": 0.9891, + "learning_rate": 1.6747721551921894e-05, + "loss": 0.9084, "step": 10104 }, { - "epoch": 0.28674801362088537, + "epoch": 0.28634985406217234, "grad_norm": 0.0, - "learning_rate": 1.6737755464009054e-05, - "loss": 0.8733, + "learning_rate": 1.6747044171062916e-05, + "loss": 0.9102, "step": 10105 }, { - "epoch": 0.2867763904653802, + "epoch": 0.28637819150443483, "grad_norm": 0.0, - "learning_rate": 1.6737076295618565e-05, - "loss": 0.9749, + "learning_rate": 1.6746366733371076e-05, + "loss": 0.9873, "step": 10106 }, { - "epoch": 0.28680476730987514, + "epoch": 0.28640652894669727, "grad_norm": 0.0, - "learning_rate": 1.6736397070319682e-05, - "loss": 0.9367, + "learning_rate": 1.6745689238852084e-05, + "loss": 1.0029, "step": 10107 }, { - "epoch": 0.28683314415437006, + "epoch": 0.2864348663889597, "grad_norm": 0.0, - "learning_rate": 1.6735717788118156e-05, - "loss": 1.0301, + "learning_rate": 1.6745011687511646e-05, + "loss": 0.9293, "step": 10108 }, { - "epoch": 0.2868615209988649, + "epoch": 0.2864632038312222, "grad_norm": 0.0, - "learning_rate": 1.673503844901972e-05, - "loss": 1.0486, + "learning_rate": 1.6744334079355472e-05, + "loss": 0.9238, "step": 10109 }, { - "epoch": 0.28688989784335983, + "epoch": 0.28649154127348464, "grad_norm": 0.0, - "learning_rate": 1.6734359053030105e-05, - "loss": 0.8591, + "learning_rate": 1.6743656414389263e-05, + "loss": 1.0954, "step": 10110 }, { - "epoch": 0.2869182746878547, + "epoch": 0.28651987871574713, "grad_norm": 0.0, - "learning_rate": 1.673367960015506e-05, - "loss": 0.9708, + "learning_rate": 1.6742978692618735e-05, + "loss": 0.9667, "step": 10111 }, { - "epoch": 0.2869466515323496, + "epoch": 0.28654821615800957, "grad_norm": 0.0, - "learning_rate": 1.673300009040032e-05, - "loss": 0.9122, + "learning_rate": 1.6742300914049595e-05, + "loss": 0.8704, "step": 10112 }, { - "epoch": 0.2869750283768445, + "epoch": 0.28657655360027207, "grad_norm": 0.0, - "learning_rate": 1.6732320523771626e-05, - "loss": 0.9739, + "learning_rate": 1.674162307868755e-05, + "loss": 1.0564, "step": 10113 }, { - "epoch": 0.2870034052213394, + "epoch": 0.2866048910425345, "grad_norm": 0.0, - "learning_rate": 1.6731640900274718e-05, - "loss": 0.9295, + "learning_rate": 1.674094518653831e-05, + "loss": 1.0927, "step": 10114 }, { - "epoch": 0.2870317820658343, + "epoch": 0.28663322848479694, "grad_norm": 0.0, - "learning_rate": 1.6730961219915336e-05, - "loss": 0.9082, + "learning_rate": 1.674026723760758e-05, + "loss": 0.9353, "step": 10115 }, { - "epoch": 0.28706015891032916, + "epoch": 0.28666156592705944, "grad_norm": 0.0, - "learning_rate": 1.673028148269922e-05, - "loss": 0.9344, + "learning_rate": 1.6739589231901085e-05, + "loss": 0.9431, "step": 10116 }, { - "epoch": 0.28708853575482407, + "epoch": 0.2866899033693219, "grad_norm": 0.0, - "learning_rate": 1.6729601688632117e-05, - "loss": 0.9014, + "learning_rate": 1.6738911169424523e-05, + "loss": 1.0343, "step": 10117 }, { - "epoch": 0.28711691259931893, + "epoch": 0.28671824081158437, "grad_norm": 0.0, - "learning_rate": 1.672892183771977e-05, - "loss": 0.9194, + "learning_rate": 1.673823305018361e-05, + "loss": 0.9169, "step": 10118 }, { - "epoch": 0.28714528944381384, + "epoch": 0.2867465782538468, "grad_norm": 0.0, - "learning_rate": 1.672824192996791e-05, - "loss": 1.1315, + "learning_rate": 1.6737554874184058e-05, + "loss": 0.8206, "step": 10119 }, { - "epoch": 0.28717366628830876, + "epoch": 0.28677491569610924, "grad_norm": 0.0, - "learning_rate": 1.672756196538229e-05, - "loss": 0.9282, + "learning_rate": 1.673687664143158e-05, + "loss": 1.0616, "step": 10120 }, { - "epoch": 0.2872020431328036, + "epoch": 0.28680325313837174, "grad_norm": 0.0, - "learning_rate": 1.6726881943968653e-05, - "loss": 0.957, + "learning_rate": 1.6736198351931888e-05, + "loss": 0.934, "step": 10121 }, { - "epoch": 0.28723041997729853, + "epoch": 0.2868315905806342, "grad_norm": 0.0, - "learning_rate": 1.672620186573274e-05, - "loss": 0.9746, + "learning_rate": 1.6735520005690697e-05, + "loss": 0.8522, "step": 10122 }, { - "epoch": 0.2872587968217934, + "epoch": 0.28685992802289667, "grad_norm": 0.0, - "learning_rate": 1.67255217306803e-05, - "loss": 0.9067, + "learning_rate": 1.6734841602713717e-05, + "loss": 0.9223, "step": 10123 }, { - "epoch": 0.2872871736662883, + "epoch": 0.2868882654651591, "grad_norm": 0.0, - "learning_rate": 1.6724841538817073e-05, - "loss": 0.9413, + "learning_rate": 1.6734163143006665e-05, + "loss": 0.9681, "step": 10124 }, { - "epoch": 0.2873155505107832, + "epoch": 0.2869166029074216, "grad_norm": 0.0, - "learning_rate": 1.672416129014881e-05, - "loss": 0.9435, + "learning_rate": 1.673348462657526e-05, + "loss": 0.9674, "step": 10125 }, { - "epoch": 0.2873439273552781, + "epoch": 0.28694494034968404, "grad_norm": 0.0, - "learning_rate": 1.672348098468125e-05, - "loss": 0.9693, + "learning_rate": 1.673280605342521e-05, + "loss": 0.8861, "step": 10126 }, { - "epoch": 0.287372304199773, + "epoch": 0.2869732777919465, "grad_norm": 0.0, - "learning_rate": 1.672280062242015e-05, - "loss": 1.0221, + "learning_rate": 1.6732127423562236e-05, + "loss": 0.9336, "step": 10127 }, { - "epoch": 0.28740068104426786, + "epoch": 0.287001615234209, "grad_norm": 0.0, - "learning_rate": 1.672212020337125e-05, - "loss": 0.9113, + "learning_rate": 1.6731448736992053e-05, + "loss": 0.9669, "step": 10128 }, { - "epoch": 0.2874290578887628, + "epoch": 0.2870299526764714, "grad_norm": 0.0, - "learning_rate": 1.6721439727540294e-05, - "loss": 0.9841, + "learning_rate": 1.6730769993720376e-05, + "loss": 0.9608, "step": 10129 }, { - "epoch": 0.2874574347332577, + "epoch": 0.2870582901187339, "grad_norm": 0.0, - "learning_rate": 1.6720759194933037e-05, - "loss": 0.9207, + "learning_rate": 1.6730091193752925e-05, + "loss": 1.0041, "step": 10130 }, { - "epoch": 0.28748581157775255, + "epoch": 0.28708662756099634, "grad_norm": 0.0, - "learning_rate": 1.6720078605555227e-05, - "loss": 0.9318, + "learning_rate": 1.672941233709542e-05, + "loss": 0.8652, "step": 10131 }, { - "epoch": 0.28751418842224746, + "epoch": 0.2871149650032588, "grad_norm": 0.0, - "learning_rate": 1.6719397959412606e-05, - "loss": 0.8866, + "learning_rate": 1.672873342375357e-05, + "loss": 1.1399, "step": 10132 }, { - "epoch": 0.2875425652667423, + "epoch": 0.2871433024455213, "grad_norm": 0.0, - "learning_rate": 1.671871725651093e-05, - "loss": 0.9418, + "learning_rate": 1.67280544537331e-05, + "loss": 1.0486, "step": 10133 }, { - "epoch": 0.28757094211123724, + "epoch": 0.2871716398877837, "grad_norm": 0.0, - "learning_rate": 1.671803649685595e-05, - "loss": 0.9501, + "learning_rate": 1.6727375427039734e-05, + "loss": 0.9902, "step": 10134 }, { - "epoch": 0.2875993189557321, + "epoch": 0.2871999773300462, "grad_norm": 0.0, - "learning_rate": 1.6717355680453412e-05, - "loss": 0.9222, + "learning_rate": 1.6726696343679186e-05, + "loss": 0.9271, "step": 10135 }, { - "epoch": 0.287627695800227, + "epoch": 0.28722831477230865, "grad_norm": 0.0, - "learning_rate": 1.671667480730907e-05, - "loss": 0.9849, + "learning_rate": 1.6726017203657175e-05, + "loss": 0.8426, "step": 10136 }, { - "epoch": 0.2876560726447219, + "epoch": 0.28725665221457114, "grad_norm": 0.0, - "learning_rate": 1.6715993877428672e-05, - "loss": 0.9528, + "learning_rate": 1.6725338006979424e-05, + "loss": 0.9932, "step": 10137 }, { - "epoch": 0.2876844494892168, + "epoch": 0.2872849896568336, "grad_norm": 0.0, - "learning_rate": 1.6715312890817973e-05, - "loss": 0.9492, + "learning_rate": 1.6724658753651652e-05, + "loss": 0.9565, "step": 10138 }, { - "epoch": 0.2877128263337117, + "epoch": 0.287313327099096, "grad_norm": 0.0, - "learning_rate": 1.6714631847482724e-05, - "loss": 1.0588, + "learning_rate": 1.672397944367958e-05, + "loss": 0.9427, "step": 10139 }, { - "epoch": 0.28774120317820656, + "epoch": 0.2873416645413585, "grad_norm": 0.0, - "learning_rate": 1.6713950747428675e-05, - "loss": 1.0107, + "learning_rate": 1.672330007706894e-05, + "loss": 1.035, "step": 10140 }, { - "epoch": 0.2877695800227015, + "epoch": 0.28737000198362095, "grad_norm": 0.0, - "learning_rate": 1.6713269590661587e-05, - "loss": 1.041, + "learning_rate": 1.672262065382544e-05, + "loss": 0.8621, "step": 10141 }, { - "epoch": 0.2877979568671964, + "epoch": 0.28739833942588344, "grad_norm": 0.0, - "learning_rate": 1.6712588377187207e-05, - "loss": 0.9328, + "learning_rate": 1.6721941173954813e-05, + "loss": 1.0355, "step": 10142 }, { - "epoch": 0.28782633371169125, + "epoch": 0.2874266768681459, "grad_norm": 0.0, - "learning_rate": 1.671190710701129e-05, - "loss": 0.9569, + "learning_rate": 1.672126163746278e-05, + "loss": 0.9076, "step": 10143 }, { - "epoch": 0.28785471055618617, + "epoch": 0.2874550143104083, "grad_norm": 0.0, - "learning_rate": 1.6711225780139596e-05, - "loss": 0.9159, + "learning_rate": 1.672058204435506e-05, + "loss": 0.9441, "step": 10144 }, { - "epoch": 0.287883087400681, + "epoch": 0.2874833517526708, "grad_norm": 0.0, - "learning_rate": 1.671054439657787e-05, - "loss": 0.9653, + "learning_rate": 1.6719902394637388e-05, + "loss": 0.9975, "step": 10145 }, { - "epoch": 0.28791146424517594, + "epoch": 0.28751168919493325, "grad_norm": 0.0, - "learning_rate": 1.6709862956331877e-05, - "loss": 0.9777, + "learning_rate": 1.6719222688315478e-05, + "loss": 1.0649, "step": 10146 }, { - "epoch": 0.28793984108967086, + "epoch": 0.28754002663719574, "grad_norm": 0.0, - "learning_rate": 1.6709181459407368e-05, - "loss": 1.0216, + "learning_rate": 1.6718542925395063e-05, + "loss": 0.9306, "step": 10147 }, { - "epoch": 0.2879682179341657, + "epoch": 0.2875683640794582, "grad_norm": 0.0, - "learning_rate": 1.6708499905810107e-05, - "loss": 0.9482, + "learning_rate": 1.6717863105881863e-05, + "loss": 0.9765, "step": 10148 }, { - "epoch": 0.28799659477866063, + "epoch": 0.2875967015217207, "grad_norm": 0.0, - "learning_rate": 1.670781829554584e-05, - "loss": 0.996, + "learning_rate": 1.671718322978161e-05, + "loss": 1.0301, "step": 10149 }, { - "epoch": 0.2880249716231555, + "epoch": 0.2876250389639831, "grad_norm": 0.0, - "learning_rate": 1.6707136628620335e-05, - "loss": 1.0591, + "learning_rate": 1.6716503297100025e-05, + "loss": 0.9686, "step": 10150 }, { - "epoch": 0.2880533484676504, + "epoch": 0.28765337640624555, "grad_norm": 0.0, - "learning_rate": 1.6706454905039343e-05, - "loss": 0.9516, + "learning_rate": 1.6715823307842845e-05, + "loss": 0.912, "step": 10151 }, { - "epoch": 0.28808172531214526, + "epoch": 0.28768171384850805, "grad_norm": 0.0, - "learning_rate": 1.6705773124808622e-05, - "loss": 1.0047, + "learning_rate": 1.6715143262015784e-05, + "loss": 0.9424, "step": 10152 }, { - "epoch": 0.2881101021566402, + "epoch": 0.2877100512907705, "grad_norm": 0.0, - "learning_rate": 1.670509128793394e-05, - "loss": 1.0153, + "learning_rate": 1.671446315962458e-05, + "loss": 1.1, "step": 10153 }, { - "epoch": 0.2881384790011351, + "epoch": 0.287738388733033, "grad_norm": 0.0, - "learning_rate": 1.6704409394421044e-05, - "loss": 1.0819, + "learning_rate": 1.6713783000674963e-05, + "loss": 0.8907, "step": 10154 }, { - "epoch": 0.28816685584562995, + "epoch": 0.2877667261752954, "grad_norm": 0.0, - "learning_rate": 1.6703727444275704e-05, - "loss": 0.9752, + "learning_rate": 1.6713102785172654e-05, + "loss": 1.0457, "step": 10155 }, { - "epoch": 0.28819523269012487, + "epoch": 0.28779506361755786, "grad_norm": 0.0, - "learning_rate": 1.6703045437503677e-05, - "loss": 1.031, + "learning_rate": 1.671242251312339e-05, + "loss": 0.9775, "step": 10156 }, { - "epoch": 0.28822360953461973, + "epoch": 0.28782340105982035, "grad_norm": 0.0, - "learning_rate": 1.6702363374110726e-05, - "loss": 0.9553, + "learning_rate": 1.67117421845329e-05, + "loss": 0.8802, "step": 10157 }, { - "epoch": 0.28825198637911464, + "epoch": 0.2878517385020828, "grad_norm": 0.0, - "learning_rate": 1.6701681254102608e-05, - "loss": 0.9656, + "learning_rate": 1.671106179940691e-05, + "loss": 1.0118, "step": 10158 }, { - "epoch": 0.28828036322360956, + "epoch": 0.2878800759443453, "grad_norm": 0.0, - "learning_rate": 1.6700999077485085e-05, - "loss": 0.9686, + "learning_rate": 1.6710381357751155e-05, + "loss": 0.9139, "step": 10159 }, { - "epoch": 0.2883087400681044, + "epoch": 0.2879084133866077, "grad_norm": 0.0, - "learning_rate": 1.6700316844263922e-05, - "loss": 0.9866, + "learning_rate": 1.6709700859571366e-05, + "loss": 1.0343, "step": 10160 }, { - "epoch": 0.28833711691259933, + "epoch": 0.2879367508288702, "grad_norm": 0.0, - "learning_rate": 1.6699634554444885e-05, - "loss": 0.9723, + "learning_rate": 1.6709020304873277e-05, + "loss": 0.8695, "step": 10161 }, { - "epoch": 0.2883654937570942, + "epoch": 0.28796508827113265, "grad_norm": 0.0, - "learning_rate": 1.669895220803373e-05, - "loss": 1.0749, + "learning_rate": 1.670833969366262e-05, + "loss": 0.9185, "step": 10162 }, { - "epoch": 0.2883938706015891, + "epoch": 0.2879934257133951, "grad_norm": 0.0, - "learning_rate": 1.6698269805036228e-05, - "loss": 0.9195, + "learning_rate": 1.6707659025945124e-05, + "loss": 1.0072, "step": 10163 }, { - "epoch": 0.28842224744608397, + "epoch": 0.2880217631556576, "grad_norm": 0.0, - "learning_rate": 1.669758734545814e-05, - "loss": 0.9361, + "learning_rate": 1.6706978301726523e-05, + "loss": 0.9438, "step": 10164 }, { - "epoch": 0.2884506242905789, + "epoch": 0.28805010059792, "grad_norm": 0.0, - "learning_rate": 1.6696904829305226e-05, - "loss": 1.0061, + "learning_rate": 1.6706297521012556e-05, + "loss": 1.1498, "step": 10165 }, { - "epoch": 0.2884790011350738, + "epoch": 0.2880784380401825, "grad_norm": 0.0, - "learning_rate": 1.669622225658326e-05, - "loss": 0.9543, + "learning_rate": 1.6705616683808955e-05, + "loss": 0.8944, "step": 10166 }, { - "epoch": 0.28850737797956866, + "epoch": 0.28810677548244495, "grad_norm": 0.0, - "learning_rate": 1.6695539627298e-05, - "loss": 0.9605, + "learning_rate": 1.6704935790121456e-05, + "loss": 0.9517, "step": 10167 }, { - "epoch": 0.2885357548240636, + "epoch": 0.2881351129247074, "grad_norm": 0.0, - "learning_rate": 1.669485694145522e-05, - "loss": 0.899, + "learning_rate": 1.670425483995579e-05, + "loss": 0.9214, "step": 10168 }, { - "epoch": 0.28856413166855843, + "epoch": 0.2881634503669699, "grad_norm": 0.0, - "learning_rate": 1.669417419906068e-05, - "loss": 0.9472, + "learning_rate": 1.6703573833317698e-05, + "loss": 0.9931, "step": 10169 }, { - "epoch": 0.28859250851305335, + "epoch": 0.2881917878092323, "grad_norm": 0.0, - "learning_rate": 1.669349140012015e-05, - "loss": 1.001, + "learning_rate": 1.670289277021291e-05, + "loss": 1.057, "step": 10170 }, { - "epoch": 0.28862088535754826, + "epoch": 0.2882201252514948, "grad_norm": 0.0, - "learning_rate": 1.66928085446394e-05, - "loss": 1.039, + "learning_rate": 1.6702211650647173e-05, + "loss": 1.0695, "step": 10171 }, { - "epoch": 0.2886492622020431, + "epoch": 0.28824846269375726, "grad_norm": 0.0, - "learning_rate": 1.669212563262419e-05, - "loss": 0.9663, + "learning_rate": 1.6701530474626216e-05, + "loss": 1.0607, "step": 10172 }, { - "epoch": 0.28867763904653804, + "epoch": 0.2882768001360197, "grad_norm": 0.0, - "learning_rate": 1.6691442664080298e-05, - "loss": 1.0723, + "learning_rate": 1.6700849242155778e-05, + "loss": 0.9595, "step": 10173 }, { - "epoch": 0.2887060158910329, + "epoch": 0.2883051375782822, "grad_norm": 0.0, - "learning_rate": 1.6690759639013487e-05, - "loss": 1.0672, + "learning_rate": 1.6700167953241598e-05, + "loss": 0.916, "step": 10174 }, { - "epoch": 0.2887343927355278, + "epoch": 0.2883334750205446, "grad_norm": 0.0, - "learning_rate": 1.669007655742953e-05, - "loss": 0.8937, + "learning_rate": 1.6699486607889417e-05, + "loss": 1.0022, "step": 10175 }, { - "epoch": 0.2887627695800227, + "epoch": 0.2883618124628071, "grad_norm": 0.0, - "learning_rate": 1.6689393419334195e-05, - "loss": 1.0168, + "learning_rate": 1.6698805206104973e-05, + "loss": 1.0453, "step": 10176 }, { - "epoch": 0.2887911464245176, + "epoch": 0.28839014990506956, "grad_norm": 0.0, - "learning_rate": 1.6688710224733253e-05, - "loss": 0.8842, + "learning_rate": 1.6698123747894004e-05, + "loss": 0.9582, "step": 10177 }, { - "epoch": 0.2888195232690125, + "epoch": 0.28841848734733205, "grad_norm": 0.0, - "learning_rate": 1.6688026973632475e-05, - "loss": 1.0835, + "learning_rate": 1.6697442233262254e-05, + "loss": 0.9098, "step": 10178 }, { - "epoch": 0.28884790011350736, + "epoch": 0.2884468247895945, "grad_norm": 0.0, - "learning_rate": 1.6687343666037632e-05, - "loss": 0.9792, + "learning_rate": 1.6696760662215457e-05, + "loss": 0.9372, "step": 10179 }, { - "epoch": 0.2888762769580023, + "epoch": 0.28847516223185693, "grad_norm": 0.0, - "learning_rate": 1.6686660301954497e-05, - "loss": 0.8977, + "learning_rate": 1.669607903475936e-05, + "loss": 0.9404, "step": 10180 }, { - "epoch": 0.28890465380249714, + "epoch": 0.2885034996741194, "grad_norm": 0.0, - "learning_rate": 1.668597688138884e-05, - "loss": 0.9464, + "learning_rate": 1.6695397350899703e-05, + "loss": 0.9689, "step": 10181 }, { - "epoch": 0.28893303064699205, + "epoch": 0.28853183711638186, "grad_norm": 0.0, - "learning_rate": 1.668529340434644e-05, - "loss": 0.9909, + "learning_rate": 1.6694715610642226e-05, + "loss": 0.9911, "step": 10182 }, { - "epoch": 0.28896140749148697, + "epoch": 0.28856017455864436, "grad_norm": 0.0, - "learning_rate": 1.668460987083306e-05, - "loss": 0.9716, + "learning_rate": 1.6694033813992676e-05, + "loss": 0.9288, "step": 10183 }, { - "epoch": 0.2889897843359818, + "epoch": 0.2885885120009068, "grad_norm": 0.0, - "learning_rate": 1.6683926280854484e-05, - "loss": 0.9364, + "learning_rate": 1.6693351960956793e-05, + "loss": 1.0219, "step": 10184 }, { - "epoch": 0.28901816118047674, + "epoch": 0.28861684944316923, "grad_norm": 0.0, - "learning_rate": 1.6683242634416484e-05, - "loss": 0.8679, + "learning_rate": 1.669267005154032e-05, + "loss": 0.9733, "step": 10185 }, { - "epoch": 0.2890465380249716, + "epoch": 0.2886451868854317, "grad_norm": 0.0, - "learning_rate": 1.668255893152483e-05, - "loss": 1.0388, + "learning_rate": 1.6691988085749004e-05, + "loss": 0.9123, "step": 10186 }, { - "epoch": 0.2890749148694665, + "epoch": 0.28867352432769416, "grad_norm": 0.0, - "learning_rate": 1.66818751721853e-05, - "loss": 0.9517, + "learning_rate": 1.6691306063588583e-05, + "loss": 0.9363, "step": 10187 }, { - "epoch": 0.28910329171396143, + "epoch": 0.28870186176995666, "grad_norm": 0.0, - "learning_rate": 1.6681191356403672e-05, - "loss": 0.9863, + "learning_rate": 1.669062398506481e-05, + "loss": 1.0354, "step": 10188 }, { - "epoch": 0.2891316685584563, + "epoch": 0.2887301992122191, "grad_norm": 0.0, - "learning_rate": 1.668050748418572e-05, - "loss": 0.9805, + "learning_rate": 1.6689941850183425e-05, + "loss": 1.0397, "step": 10189 }, { - "epoch": 0.2891600454029512, + "epoch": 0.2887585366544816, "grad_norm": 0.0, - "learning_rate": 1.667982355553722e-05, - "loss": 1.0341, + "learning_rate": 1.6689259658950177e-05, + "loss": 1.0143, "step": 10190 }, { - "epoch": 0.28918842224744606, + "epoch": 0.28878687409674403, "grad_norm": 0.0, - "learning_rate": 1.6679139570463948e-05, - "loss": 1.0002, + "learning_rate": 1.668857741137081e-05, + "loss": 0.915, "step": 10191 }, { - "epoch": 0.289216799091941, + "epoch": 0.28881521153900647, "grad_norm": 0.0, - "learning_rate": 1.667845552897169e-05, - "loss": 0.8661, + "learning_rate": 1.6687895107451072e-05, + "loss": 0.9629, "step": 10192 }, { - "epoch": 0.2892451759364359, + "epoch": 0.28884354898126896, "grad_norm": 0.0, - "learning_rate": 1.6677771431066213e-05, - "loss": 0.8698, + "learning_rate": 1.668721274719671e-05, + "loss": 0.963, "step": 10193 }, { - "epoch": 0.28927355278093075, + "epoch": 0.2888718864235314, "grad_norm": 0.0, - "learning_rate": 1.6677087276753302e-05, - "loss": 0.9868, + "learning_rate": 1.6686530330613472e-05, + "loss": 0.9561, "step": 10194 }, { - "epoch": 0.28930192962542567, + "epoch": 0.2889002238657939, "grad_norm": 0.0, - "learning_rate": 1.6676403066038734e-05, - "loss": 1.0123, + "learning_rate": 1.6685847857707105e-05, + "loss": 0.9743, "step": 10195 }, { - "epoch": 0.28933030646992053, + "epoch": 0.28892856130805633, "grad_norm": 0.0, - "learning_rate": 1.667571879892829e-05, - "loss": 0.9043, + "learning_rate": 1.6685165328483356e-05, + "loss": 0.9267, "step": 10196 }, { - "epoch": 0.28935868331441544, + "epoch": 0.28895689875031877, "grad_norm": 0.0, - "learning_rate": 1.6675034475427747e-05, - "loss": 0.8604, + "learning_rate": 1.6684482742947984e-05, + "loss": 1.0303, "step": 10197 }, { - "epoch": 0.2893870601589103, + "epoch": 0.28898523619258126, "grad_norm": 0.0, - "learning_rate": 1.667435009554289e-05, - "loss": 0.9126, + "learning_rate": 1.6683800101106726e-05, + "loss": 1.1169, "step": 10198 }, { - "epoch": 0.2894154370034052, + "epoch": 0.2890135736348437, "grad_norm": 0.0, - "learning_rate": 1.6673665659279496e-05, - "loss": 1.0352, + "learning_rate": 1.668311740296534e-05, + "loss": 0.9539, "step": 10199 }, { - "epoch": 0.28944381384790013, + "epoch": 0.2890419110771062, "grad_norm": 0.0, - "learning_rate": 1.667298116664335e-05, - "loss": 0.8523, + "learning_rate": 1.6682434648529574e-05, + "loss": 0.9541, "step": 10200 }, { - "epoch": 0.289472190692395, + "epoch": 0.28907024851936863, "grad_norm": 0.0, - "learning_rate": 1.6672296617640232e-05, - "loss": 0.8985, + "learning_rate": 1.668175183780518e-05, + "loss": 0.9564, "step": 10201 }, { - "epoch": 0.2895005675368899, + "epoch": 0.2890985859616311, "grad_norm": 0.0, - "learning_rate": 1.6671612012275924e-05, - "loss": 1.0641, + "learning_rate": 1.668106897079791e-05, + "loss": 1.0997, "step": 10202 }, { - "epoch": 0.28952894438138477, + "epoch": 0.28912692340389357, "grad_norm": 0.0, - "learning_rate": 1.667092735055621e-05, - "loss": 1.0157, + "learning_rate": 1.6680386047513512e-05, + "loss": 0.899, "step": 10203 }, { - "epoch": 0.2895573212258797, + "epoch": 0.289155260846156, "grad_norm": 0.0, - "learning_rate": 1.667024263248687e-05, - "loss": 1.0452, + "learning_rate": 1.6679703067957745e-05, + "loss": 1.0714, "step": 10204 }, { - "epoch": 0.2895856980703746, + "epoch": 0.2891835982884185, "grad_norm": 0.0, - "learning_rate": 1.6669557858073696e-05, - "loss": 0.9522, + "learning_rate": 1.6679020032136354e-05, + "loss": 0.9253, "step": 10205 }, { - "epoch": 0.28961407491486946, + "epoch": 0.28921193573068094, "grad_norm": 0.0, - "learning_rate": 1.6668873027322462e-05, - "loss": 0.9034, + "learning_rate": 1.6678336940055103e-05, + "loss": 0.9321, "step": 10206 }, { - "epoch": 0.28964245175936437, + "epoch": 0.28924027317294343, "grad_norm": 0.0, - "learning_rate": 1.666818814023896e-05, - "loss": 0.9162, + "learning_rate": 1.6677653791719737e-05, + "loss": 1.0147, "step": 10207 }, { - "epoch": 0.28967082860385923, + "epoch": 0.28926861061520587, "grad_norm": 0.0, - "learning_rate": 1.666750319682897e-05, - "loss": 1.0865, + "learning_rate": 1.6676970587136013e-05, + "loss": 1.0013, "step": 10208 }, { - "epoch": 0.28969920544835415, + "epoch": 0.2892969480574683, "grad_norm": 0.0, - "learning_rate": 1.6666818197098286e-05, - "loss": 0.9926, + "learning_rate": 1.6676287326309684e-05, + "loss": 0.8839, "step": 10209 }, { - "epoch": 0.28972758229284906, + "epoch": 0.2893252854997308, "grad_norm": 0.0, - "learning_rate": 1.6666133141052687e-05, - "loss": 0.9739, + "learning_rate": 1.6675604009246514e-05, + "loss": 0.8854, "step": 10210 }, { - "epoch": 0.2897559591373439, + "epoch": 0.28935362294199324, "grad_norm": 0.0, - "learning_rate": 1.666544802869796e-05, - "loss": 0.9695, + "learning_rate": 1.667492063595225e-05, + "loss": 0.8656, "step": 10211 }, { - "epoch": 0.28978433598183884, + "epoch": 0.28938196038425573, "grad_norm": 0.0, - "learning_rate": 1.6664762860039893e-05, - "loss": 0.9166, + "learning_rate": 1.6674237206432648e-05, + "loss": 0.9126, "step": 10212 }, { - "epoch": 0.2898127128263337, + "epoch": 0.28941029782651817, "grad_norm": 0.0, - "learning_rate": 1.666407763508428e-05, - "loss": 1.052, + "learning_rate": 1.667355372069347e-05, + "loss": 0.8677, "step": 10213 }, { - "epoch": 0.2898410896708286, + "epoch": 0.28943863526878066, "grad_norm": 0.0, - "learning_rate": 1.66633923538369e-05, - "loss": 1.0115, + "learning_rate": 1.6672870178740468e-05, + "loss": 0.8732, "step": 10214 }, { - "epoch": 0.28986946651532347, + "epoch": 0.2894669727110431, "grad_norm": 0.0, - "learning_rate": 1.6662707016303542e-05, - "loss": 0.9583, + "learning_rate": 1.6672186580579406e-05, + "loss": 0.921, "step": 10215 }, { - "epoch": 0.2898978433598184, + "epoch": 0.28949531015330554, "grad_norm": 0.0, - "learning_rate": 1.6662021622490003e-05, - "loss": 0.9238, + "learning_rate": 1.667150292621604e-05, + "loss": 0.9834, "step": 10216 }, { - "epoch": 0.2899262202043133, + "epoch": 0.28952364759556803, "grad_norm": 0.0, - "learning_rate": 1.6661336172402068e-05, - "loss": 1.0939, + "learning_rate": 1.6670819215656125e-05, + "loss": 0.8599, "step": 10217 }, { - "epoch": 0.28995459704880816, + "epoch": 0.2895519850378305, "grad_norm": 0.0, - "learning_rate": 1.6660650666045525e-05, - "loss": 1.0071, + "learning_rate": 1.667013544890542e-05, + "loss": 0.9113, "step": 10218 }, { - "epoch": 0.2899829738933031, + "epoch": 0.28958032248009297, "grad_norm": 0.0, - "learning_rate": 1.6659965103426166e-05, - "loss": 0.9791, + "learning_rate": 1.666945162596969e-05, + "loss": 0.9594, "step": 10219 }, { - "epoch": 0.29001135073779793, + "epoch": 0.2896086599223554, "grad_norm": 0.0, - "learning_rate": 1.6659279484549782e-05, - "loss": 0.8704, + "learning_rate": 1.6668767746854694e-05, + "loss": 0.8798, "step": 10220 }, { - "epoch": 0.29003972758229285, + "epoch": 0.28963699736461784, "grad_norm": 0.0, - "learning_rate": 1.6658593809422168e-05, - "loss": 0.9224, + "learning_rate": 1.6668083811566188e-05, + "loss": 0.9738, "step": 10221 }, { - "epoch": 0.29006810442678777, + "epoch": 0.28966533480688034, "grad_norm": 0.0, - "learning_rate": 1.665790807804911e-05, - "loss": 0.855, + "learning_rate": 1.6667399820109937e-05, + "loss": 0.8831, "step": 10222 }, { - "epoch": 0.2900964812712826, + "epoch": 0.2896936722491428, "grad_norm": 0.0, - "learning_rate": 1.6657222290436404e-05, - "loss": 0.9383, + "learning_rate": 1.6666715772491702e-05, + "loss": 0.9214, "step": 10223 }, { - "epoch": 0.29012485811577754, + "epoch": 0.28972200969140527, "grad_norm": 0.0, - "learning_rate": 1.665653644658984e-05, - "loss": 0.8061, + "learning_rate": 1.6666031668717246e-05, + "loss": 0.9376, "step": 10224 }, { - "epoch": 0.2901532349602724, + "epoch": 0.2897503471336677, "grad_norm": 0.0, - "learning_rate": 1.6655850546515216e-05, - "loss": 1.0114, + "learning_rate": 1.6665347508792325e-05, + "loss": 0.9233, "step": 10225 }, { - "epoch": 0.2901816118047673, + "epoch": 0.2897786845759302, "grad_norm": 0.0, - "learning_rate": 1.6655164590218325e-05, - "loss": 0.9953, + "learning_rate": 1.666466329272271e-05, + "loss": 0.9101, "step": 10226 }, { - "epoch": 0.29020998864926223, + "epoch": 0.28980702201819264, "grad_norm": 0.0, - "learning_rate": 1.6654478577704955e-05, - "loss": 0.9528, + "learning_rate": 1.666397902051416e-05, + "loss": 1.0488, "step": 10227 }, { - "epoch": 0.2902383654937571, + "epoch": 0.2898353594604551, "grad_norm": 0.0, - "learning_rate": 1.665379250898091e-05, - "loss": 0.9641, + "learning_rate": 1.666329469217244e-05, + "loss": 0.888, "step": 10228 }, { - "epoch": 0.290266742338252, + "epoch": 0.28986369690271757, "grad_norm": 0.0, - "learning_rate": 1.6653106384051977e-05, - "loss": 0.9447, + "learning_rate": 1.6662610307703318e-05, + "loss": 0.8791, "step": 10229 }, { - "epoch": 0.29029511918274686, + "epoch": 0.28989203434498, "grad_norm": 0.0, - "learning_rate": 1.6652420202923956e-05, - "loss": 1.065, + "learning_rate": 1.6661925867112553e-05, + "loss": 0.9958, "step": 10230 }, { - "epoch": 0.2903234960272418, + "epoch": 0.2899203717872425, "grad_norm": 0.0, - "learning_rate": 1.665173396560264e-05, - "loss": 1.0095, + "learning_rate": 1.666124137040591e-05, + "loss": 0.9351, "step": 10231 }, { - "epoch": 0.29035187287173664, + "epoch": 0.28994870922950494, "grad_norm": 0.0, - "learning_rate": 1.6651047672093836e-05, - "loss": 0.9404, + "learning_rate": 1.666055681758916e-05, + "loss": 0.9251, "step": 10232 }, { - "epoch": 0.29038024971623155, + "epoch": 0.2899770466717674, "grad_norm": 0.0, - "learning_rate": 1.6650361322403324e-05, - "loss": 0.894, + "learning_rate": 1.6659872208668067e-05, + "loss": 1.0123, "step": 10233 }, { - "epoch": 0.29040862656072647, + "epoch": 0.2900053841140299, "grad_norm": 0.0, - "learning_rate": 1.6649674916536916e-05, - "loss": 0.885, + "learning_rate": 1.6659187543648398e-05, + "loss": 0.9844, "step": 10234 }, { - "epoch": 0.29043700340522133, + "epoch": 0.2900337215562923, "grad_norm": 0.0, - "learning_rate": 1.6648988454500407e-05, - "loss": 0.9014, + "learning_rate": 1.6658502822535916e-05, + "loss": 0.9518, "step": 10235 }, { - "epoch": 0.29046538024971624, + "epoch": 0.2900620589985548, "grad_norm": 0.0, - "learning_rate": 1.6648301936299592e-05, - "loss": 1.1212, + "learning_rate": 1.6657818045336392e-05, + "loss": 0.7718, "step": 10236 }, { - "epoch": 0.2904937570942111, + "epoch": 0.29009039644081724, "grad_norm": 0.0, - "learning_rate": 1.664761536194027e-05, - "loss": 0.9393, + "learning_rate": 1.66571332120556e-05, + "loss": 0.9076, "step": 10237 }, { - "epoch": 0.290522133938706, + "epoch": 0.29011873388307974, "grad_norm": 0.0, - "learning_rate": 1.664692873142824e-05, - "loss": 0.9719, + "learning_rate": 1.66564483226993e-05, + "loss": 1.0634, "step": 10238 }, { - "epoch": 0.29055051078320093, + "epoch": 0.2901470713253422, "grad_norm": 0.0, - "learning_rate": 1.6646242044769308e-05, - "loss": 0.9331, + "learning_rate": 1.6655763377273258e-05, + "loss": 1.0468, "step": 10239 }, { - "epoch": 0.2905788876276958, + "epoch": 0.2901754087676046, "grad_norm": 0.0, - "learning_rate": 1.664555530196927e-05, - "loss": 0.9982, + "learning_rate": 1.665507837578326e-05, + "loss": 0.8574, "step": 10240 }, { - "epoch": 0.2906072644721907, + "epoch": 0.2902037462098671, "grad_norm": 0.0, - "learning_rate": 1.6644868503033927e-05, - "loss": 0.9877, + "learning_rate": 1.6654393318235057e-05, + "loss": 0.998, "step": 10241 }, { - "epoch": 0.29063564131668557, + "epoch": 0.29023208365212955, "grad_norm": 0.0, - "learning_rate": 1.6644181647969082e-05, - "loss": 0.93, + "learning_rate": 1.6653708204634434e-05, + "loss": 0.9514, "step": 10242 }, { - "epoch": 0.2906640181611805, + "epoch": 0.29026042109439204, "grad_norm": 0.0, - "learning_rate": 1.6643494736780534e-05, - "loss": 1.0529, + "learning_rate": 1.665302303498715e-05, + "loss": 0.9835, "step": 10243 }, { - "epoch": 0.29069239500567534, + "epoch": 0.2902887585366545, "grad_norm": 0.0, - "learning_rate": 1.664280776947409e-05, - "loss": 1.0376, + "learning_rate": 1.665233780929899e-05, + "loss": 0.9731, "step": 10244 }, { - "epoch": 0.29072077185017026, + "epoch": 0.2903170959789169, "grad_norm": 0.0, - "learning_rate": 1.6642120746055547e-05, - "loss": 0.946, + "learning_rate": 1.6651652527575712e-05, + "loss": 0.9853, "step": 10245 }, { - "epoch": 0.29074914869466517, + "epoch": 0.2903454334211794, "grad_norm": 0.0, - "learning_rate": 1.6641433666530714e-05, - "loss": 1.0069, + "learning_rate": 1.66509671898231e-05, + "loss": 1.0133, "step": 10246 }, { - "epoch": 0.29077752553916003, + "epoch": 0.29037377086344185, "grad_norm": 0.0, - "learning_rate": 1.6640746530905395e-05, - "loss": 0.9666, + "learning_rate": 1.6650281796046917e-05, + "loss": 1.0052, "step": 10247 }, { - "epoch": 0.29080590238365495, + "epoch": 0.29040210830570434, "grad_norm": 0.0, - "learning_rate": 1.6640059339185387e-05, - "loss": 0.9433, + "learning_rate": 1.664959634625294e-05, + "loss": 0.9648, "step": 10248 }, { - "epoch": 0.2908342792281498, + "epoch": 0.2904304457479668, "grad_norm": 0.0, - "learning_rate": 1.66393720913765e-05, - "loss": 1.0053, + "learning_rate": 1.6648910840446947e-05, + "loss": 0.9588, "step": 10249 }, { - "epoch": 0.2908626560726447, + "epoch": 0.2904587831902293, "grad_norm": 0.0, - "learning_rate": 1.663868478748454e-05, - "loss": 0.9163, + "learning_rate": 1.664822527863471e-05, + "loss": 0.9835, "step": 10250 }, { - "epoch": 0.29089103291713964, + "epoch": 0.2904871206324917, "grad_norm": 0.0, - "learning_rate": 1.663799742751531e-05, - "loss": 1.0036, + "learning_rate": 1.6647539660822e-05, + "loss": 1.0674, "step": 10251 }, { - "epoch": 0.2909194097616345, + "epoch": 0.29051545807475415, "grad_norm": 0.0, - "learning_rate": 1.6637310011474618e-05, - "loss": 1.0304, + "learning_rate": 1.6646853987014594e-05, + "loss": 0.9539, "step": 10252 }, { - "epoch": 0.2909477866061294, + "epoch": 0.29054379551701665, "grad_norm": 0.0, - "learning_rate": 1.663662253936827e-05, - "loss": 0.9609, + "learning_rate": 1.664616825721827e-05, + "loss": 1.0372, "step": 10253 }, { - "epoch": 0.29097616345062427, + "epoch": 0.2905721329592791, "grad_norm": 0.0, - "learning_rate": 1.663593501120207e-05, - "loss": 0.829, + "learning_rate": 1.6645482471438805e-05, + "loss": 0.9822, "step": 10254 }, { - "epoch": 0.2910045402951192, + "epoch": 0.2906004704015416, "grad_norm": 0.0, - "learning_rate": 1.6635247426981833e-05, - "loss": 1.0197, + "learning_rate": 1.6644796629681968e-05, + "loss": 0.9701, "step": 10255 }, { - "epoch": 0.2910329171396141, + "epoch": 0.290628807843804, "grad_norm": 0.0, - "learning_rate": 1.663455978671336e-05, - "loss": 0.9952, + "learning_rate": 1.6644110731953546e-05, + "loss": 0.9511, "step": 10256 }, { - "epoch": 0.29106129398410896, + "epoch": 0.29065714528606645, "grad_norm": 0.0, - "learning_rate": 1.6633872090402465e-05, - "loss": 0.9796, + "learning_rate": 1.6643424778259313e-05, + "loss": 0.8437, "step": 10257 }, { - "epoch": 0.2910896708286039, + "epoch": 0.29068548272832895, "grad_norm": 0.0, - "learning_rate": 1.6633184338054947e-05, - "loss": 0.9834, + "learning_rate": 1.6642738768605045e-05, + "loss": 0.9289, "step": 10258 }, { - "epoch": 0.29111804767309873, + "epoch": 0.2907138201705914, "grad_norm": 0.0, - "learning_rate": 1.663249652967663e-05, - "loss": 0.8614, + "learning_rate": 1.664205270299652e-05, + "loss": 0.9882, "step": 10259 }, { - "epoch": 0.29114642451759365, + "epoch": 0.2907421576128539, "grad_norm": 0.0, - "learning_rate": 1.6631808665273315e-05, - "loss": 0.9581, + "learning_rate": 1.664136658143952e-05, + "loss": 1.0142, "step": 10260 }, { - "epoch": 0.2911748013620885, + "epoch": 0.2907704950551163, "grad_norm": 0.0, - "learning_rate": 1.663112074485081e-05, - "loss": 0.9569, + "learning_rate": 1.664068040393982e-05, + "loss": 0.8821, "step": 10261 }, { - "epoch": 0.2912031782065834, + "epoch": 0.2907988324973788, "grad_norm": 0.0, - "learning_rate": 1.6630432768414937e-05, - "loss": 1.0609, + "learning_rate": 1.6639994170503206e-05, + "loss": 0.9472, "step": 10262 }, { - "epoch": 0.29123155505107834, + "epoch": 0.29082716993964125, "grad_norm": 0.0, - "learning_rate": 1.66297447359715e-05, - "loss": 0.9196, + "learning_rate": 1.6639307881135457e-05, + "loss": 0.9849, "step": 10263 }, { - "epoch": 0.2912599318955732, + "epoch": 0.2908555073819037, "grad_norm": 0.0, - "learning_rate": 1.6629056647526306e-05, - "loss": 0.967, + "learning_rate": 1.6638621535842347e-05, + "loss": 0.8956, "step": 10264 }, { - "epoch": 0.2912883087400681, + "epoch": 0.2908838448241662, "grad_norm": 0.0, - "learning_rate": 1.6628368503085174e-05, - "loss": 0.9626, + "learning_rate": 1.6637935134629664e-05, + "loss": 1.0889, "step": 10265 }, { - "epoch": 0.291316685584563, + "epoch": 0.2909121822664286, "grad_norm": 0.0, - "learning_rate": 1.6627680302653914e-05, - "loss": 0.9044, + "learning_rate": 1.6637248677503194e-05, + "loss": 0.9902, "step": 10266 }, { - "epoch": 0.2913450624290579, + "epoch": 0.2909405197086911, "grad_norm": 0.0, - "learning_rate": 1.6626992046238343e-05, - "loss": 0.9201, + "learning_rate": 1.663656216446871e-05, + "loss": 1.0575, "step": 10267 }, { - "epoch": 0.2913734392735528, + "epoch": 0.29096885715095355, "grad_norm": 0.0, - "learning_rate": 1.6626303733844272e-05, - "loss": 0.9098, + "learning_rate": 1.6635875595531995e-05, + "loss": 0.952, "step": 10268 }, { - "epoch": 0.29140181611804766, + "epoch": 0.290997194593216, "grad_norm": 0.0, - "learning_rate": 1.6625615365477515e-05, - "loss": 0.8735, + "learning_rate": 1.6635188970698843e-05, + "loss": 1.0129, "step": 10269 }, { - "epoch": 0.2914301929625426, + "epoch": 0.2910255320354785, "grad_norm": 0.0, - "learning_rate": 1.662492694114389e-05, - "loss": 0.8757, + "learning_rate": 1.6634502289975025e-05, + "loss": 0.9382, "step": 10270 }, { - "epoch": 0.29145856980703744, + "epoch": 0.2910538694777409, "grad_norm": 0.0, - "learning_rate": 1.6624238460849203e-05, - "loss": 0.9169, + "learning_rate": 1.6633815553366334e-05, + "loss": 0.8974, "step": 10271 }, { - "epoch": 0.29148694665153235, + "epoch": 0.2910822069200034, "grad_norm": 0.0, - "learning_rate": 1.6623549924599283e-05, - "loss": 0.9701, + "learning_rate": 1.6633128760878548e-05, + "loss": 1.0214, "step": 10272 }, { - "epoch": 0.29151532349602727, + "epoch": 0.29111054436226586, "grad_norm": 0.0, - "learning_rate": 1.6622861332399936e-05, - "loss": 0.9759, + "learning_rate": 1.6632441912517453e-05, + "loss": 0.8458, "step": 10273 }, { - "epoch": 0.2915437003405221, + "epoch": 0.29113888180452835, "grad_norm": 0.0, - "learning_rate": 1.6622172684256984e-05, - "loss": 0.9519, + "learning_rate": 1.6631755008288843e-05, + "loss": 1.0051, "step": 10274 }, { - "epoch": 0.29157207718501704, + "epoch": 0.2911672192467908, "grad_norm": 0.0, - "learning_rate": 1.6621483980176237e-05, - "loss": 0.9264, + "learning_rate": 1.6631068048198495e-05, + "loss": 0.9073, "step": 10275 }, { - "epoch": 0.2916004540295119, + "epoch": 0.2911955566890532, "grad_norm": 0.0, - "learning_rate": 1.662079522016352e-05, - "loss": 0.9414, + "learning_rate": 1.66303810322522e-05, + "loss": 0.9519, "step": 10276 }, { - "epoch": 0.2916288308740068, + "epoch": 0.2912238941313157, "grad_norm": 0.0, - "learning_rate": 1.6620106404224644e-05, - "loss": 0.9426, + "learning_rate": 1.6629693960455743e-05, + "loss": 1.0981, "step": 10277 }, { - "epoch": 0.2916572077185017, + "epoch": 0.29125223157357816, "grad_norm": 0.0, - "learning_rate": 1.6619417532365437e-05, - "loss": 1.0314, + "learning_rate": 1.6629006832814912e-05, + "loss": 0.9946, "step": 10278 }, { - "epoch": 0.2916855845629966, + "epoch": 0.29128056901584065, "grad_norm": 0.0, - "learning_rate": 1.6618728604591708e-05, - "loss": 0.8917, + "learning_rate": 1.662831964933549e-05, + "loss": 0.8854, "step": 10279 }, { - "epoch": 0.2917139614074915, + "epoch": 0.2913089064581031, "grad_norm": 0.0, - "learning_rate": 1.6618039620909285e-05, - "loss": 0.9344, + "learning_rate": 1.6627632410023277e-05, + "loss": 1.0633, "step": 10280 }, { - "epoch": 0.29174233825198637, + "epoch": 0.29133724390036553, "grad_norm": 0.0, - "learning_rate": 1.6617350581323985e-05, - "loss": 1.1064, + "learning_rate": 1.662694511488405e-05, + "loss": 0.9594, "step": 10281 }, { - "epoch": 0.2917707150964813, + "epoch": 0.291365581342628, "grad_norm": 0.0, - "learning_rate": 1.6616661485841622e-05, - "loss": 0.9436, + "learning_rate": 1.6626257763923605e-05, + "loss": 0.921, "step": 10282 }, { - "epoch": 0.29179909194097614, + "epoch": 0.29139391878489046, "grad_norm": 0.0, - "learning_rate": 1.6615972334468023e-05, - "loss": 1.0105, + "learning_rate": 1.6625570357147732e-05, + "loss": 0.9703, "step": 10283 }, { - "epoch": 0.29182746878547106, + "epoch": 0.29142225622715295, "grad_norm": 0.0, - "learning_rate": 1.661528312720901e-05, - "loss": 0.9797, + "learning_rate": 1.662488289456222e-05, + "loss": 0.9631, "step": 10284 }, { - "epoch": 0.29185584562996597, + "epoch": 0.2914505936694154, "grad_norm": 0.0, - "learning_rate": 1.6614593864070404e-05, - "loss": 0.8805, + "learning_rate": 1.6624195376172857e-05, + "loss": 0.9712, "step": 10285 }, { - "epoch": 0.29188422247446083, + "epoch": 0.2914789311116779, "grad_norm": 0.0, - "learning_rate": 1.6613904545058025e-05, - "loss": 0.9598, + "learning_rate": 1.6623507801985438e-05, + "loss": 0.9289, "step": 10286 }, { - "epoch": 0.29191259931895575, + "epoch": 0.2915072685539403, "grad_norm": 0.0, - "learning_rate": 1.6613215170177698e-05, - "loss": 0.9339, + "learning_rate": 1.662282017200575e-05, + "loss": 0.9912, "step": 10287 }, { - "epoch": 0.2919409761634506, + "epoch": 0.29153560599620276, "grad_norm": 0.0, - "learning_rate": 1.6612525739435245e-05, - "loss": 0.9551, + "learning_rate": 1.6622132486239594e-05, + "loss": 0.9655, "step": 10288 }, { - "epoch": 0.2919693530079455, + "epoch": 0.29156394343846526, "grad_norm": 0.0, - "learning_rate": 1.661183625283649e-05, - "loss": 0.9582, + "learning_rate": 1.6621444744692753e-05, + "loss": 0.9711, "step": 10289 }, { - "epoch": 0.29199772985244044, + "epoch": 0.2915922808807277, "grad_norm": 0.0, - "learning_rate": 1.6611146710387254e-05, - "loss": 0.9484, + "learning_rate": 1.6620756947371025e-05, + "loss": 0.9472, "step": 10290 }, { - "epoch": 0.2920261066969353, + "epoch": 0.2916206183229902, "grad_norm": 0.0, - "learning_rate": 1.6610457112093364e-05, - "loss": 0.8951, + "learning_rate": 1.66200690942802e-05, + "loss": 1.0047, "step": 10291 }, { - "epoch": 0.2920544835414302, + "epoch": 0.2916489557652526, "grad_norm": 0.0, - "learning_rate": 1.660976745796065e-05, - "loss": 0.9579, + "learning_rate": 1.661938118542608e-05, + "loss": 0.9745, "step": 10292 }, { - "epoch": 0.29208286038592507, + "epoch": 0.29167729320751506, "grad_norm": 0.0, - "learning_rate": 1.660907774799493e-05, - "loss": 1.0284, + "learning_rate": 1.661869322081445e-05, + "loss": 1.1422, "step": 10293 }, { - "epoch": 0.29211123723042, + "epoch": 0.29170563064977756, "grad_norm": 0.0, - "learning_rate": 1.6608387982202034e-05, - "loss": 0.8521, + "learning_rate": 1.6618005200451112e-05, + "loss": 0.9593, "step": 10294 }, { - "epoch": 0.29213961407491484, + "epoch": 0.29173396809204, "grad_norm": 0.0, - "learning_rate": 1.6607698160587792e-05, - "loss": 0.9425, + "learning_rate": 1.6617317124341856e-05, + "loss": 0.9769, "step": 10295 }, { - "epoch": 0.29216799091940976, + "epoch": 0.2917623055343025, "grad_norm": 0.0, - "learning_rate": 1.6607008283158023e-05, - "loss": 0.9779, + "learning_rate": 1.661662899249248e-05, + "loss": 0.8939, "step": 10296 }, { - "epoch": 0.2921963677639047, + "epoch": 0.29179064297656493, "grad_norm": 0.0, - "learning_rate": 1.6606318349918557e-05, - "loss": 1.0224, + "learning_rate": 1.6615940804908787e-05, + "loss": 1.0589, "step": 10297 }, { - "epoch": 0.29222474460839953, + "epoch": 0.2918189804188274, "grad_norm": 0.0, - "learning_rate": 1.660562836087522e-05, - "loss": 1.0372, + "learning_rate": 1.661525256159656e-05, + "loss": 0.8668, "step": 10298 }, { - "epoch": 0.29225312145289445, + "epoch": 0.29184731786108986, "grad_norm": 0.0, - "learning_rate": 1.660493831603385e-05, - "loss": 0.9023, + "learning_rate": 1.661456426256161e-05, + "loss": 1.1202, "step": 10299 }, { - "epoch": 0.2922814982973893, + "epoch": 0.2918756553033523, "grad_norm": 0.0, - "learning_rate": 1.6604248215400266e-05, - "loss": 1.0209, + "learning_rate": 1.6613875907809728e-05, + "loss": 0.9423, "step": 10300 }, { - "epoch": 0.2923098751418842, + "epoch": 0.2919039927456148, "grad_norm": 0.0, - "learning_rate": 1.6603558058980296e-05, - "loss": 1.0225, + "learning_rate": 1.661318749734671e-05, + "loss": 0.9635, "step": 10301 }, { - "epoch": 0.29233825198637914, + "epoch": 0.29193233018787723, "grad_norm": 0.0, - "learning_rate": 1.6602867846779782e-05, - "loss": 0.85, + "learning_rate": 1.661249903117836e-05, + "loss": 0.9615, "step": 10302 }, { - "epoch": 0.292366628830874, + "epoch": 0.2919606676301397, "grad_norm": 0.0, - "learning_rate": 1.6602177578804544e-05, - "loss": 0.9618, + "learning_rate": 1.6611810509310476e-05, + "loss": 1.0733, "step": 10303 }, { - "epoch": 0.2923950056753689, + "epoch": 0.29198900507240216, "grad_norm": 0.0, - "learning_rate": 1.6601487255060416e-05, - "loss": 0.9409, + "learning_rate": 1.6611121931748858e-05, + "loss": 0.9552, "step": 10304 }, { - "epoch": 0.2924233825198638, + "epoch": 0.2920173425146646, "grad_norm": 0.0, - "learning_rate": 1.6600796875553225e-05, - "loss": 0.9452, + "learning_rate": 1.66104332984993e-05, + "loss": 0.9881, "step": 10305 }, { - "epoch": 0.2924517593643587, + "epoch": 0.2920456799569271, "grad_norm": 0.0, - "learning_rate": 1.6600106440288807e-05, - "loss": 0.9583, + "learning_rate": 1.6609744609567614e-05, + "loss": 0.9222, "step": 10306 }, { - "epoch": 0.2924801362088536, + "epoch": 0.29207401739918953, "grad_norm": 0.0, - "learning_rate": 1.6599415949272995e-05, - "loss": 0.968, + "learning_rate": 1.660905586495959e-05, + "loss": 0.9192, "step": 10307 }, { - "epoch": 0.29250851305334846, + "epoch": 0.292102354841452, "grad_norm": 0.0, - "learning_rate": 1.6598725402511618e-05, - "loss": 0.8703, + "learning_rate": 1.6608367064681033e-05, + "loss": 0.9313, "step": 10308 }, { - "epoch": 0.2925368898978434, + "epoch": 0.29213069228371447, "grad_norm": 0.0, - "learning_rate": 1.6598034800010512e-05, - "loss": 1.0996, + "learning_rate": 1.660767820873775e-05, + "loss": 1.0187, "step": 10309 }, { - "epoch": 0.29256526674233824, + "epoch": 0.29215902972597696, "grad_norm": 0.0, - "learning_rate": 1.659734414177551e-05, - "loss": 0.8692, + "learning_rate": 1.6606989297135538e-05, + "loss": 0.9543, "step": 10310 }, { - "epoch": 0.29259364358683315, + "epoch": 0.2921873671682394, "grad_norm": 0.0, - "learning_rate": 1.6596653427812443e-05, - "loss": 0.9474, + "learning_rate": 1.6606300329880204e-05, + "loss": 0.9805, "step": 10311 }, { - "epoch": 0.292622020431328, + "epoch": 0.29221570461050184, "grad_norm": 0.0, - "learning_rate": 1.6595962658127152e-05, - "loss": 0.9875, + "learning_rate": 1.6605611306977546e-05, + "loss": 0.9627, "step": 10312 }, { - "epoch": 0.2926503972758229, + "epoch": 0.29224404205276433, "grad_norm": 0.0, - "learning_rate": 1.6595271832725467e-05, - "loss": 0.9297, + "learning_rate": 1.6604922228433372e-05, + "loss": 0.9372, "step": 10313 }, { - "epoch": 0.29267877412031784, + "epoch": 0.29227237949502677, "grad_norm": 0.0, - "learning_rate": 1.659458095161322e-05, - "loss": 0.9109, + "learning_rate": 1.660423309425349e-05, + "loss": 1.1082, "step": 10314 }, { - "epoch": 0.2927071509648127, + "epoch": 0.29230071693728926, "grad_norm": 0.0, - "learning_rate": 1.6593890014796256e-05, - "loss": 0.9767, + "learning_rate": 1.6603543904443694e-05, + "loss": 0.9585, "step": 10315 }, { - "epoch": 0.2927355278093076, + "epoch": 0.2923290543795517, "grad_norm": 0.0, - "learning_rate": 1.6593199022280405e-05, - "loss": 0.8983, + "learning_rate": 1.6602854659009797e-05, + "loss": 0.9575, "step": 10316 }, { - "epoch": 0.2927639046538025, + "epoch": 0.29235739182181414, "grad_norm": 0.0, - "learning_rate": 1.6592507974071504e-05, - "loss": 0.9545, + "learning_rate": 1.6602165357957603e-05, + "loss": 1.0516, "step": 10317 }, { - "epoch": 0.2927922814982974, + "epoch": 0.29238572926407663, "grad_norm": 0.0, - "learning_rate": 1.659181687017539e-05, - "loss": 1.0243, + "learning_rate": 1.660147600129292e-05, + "loss": 0.9727, "step": 10318 }, { - "epoch": 0.2928206583427923, + "epoch": 0.29241406670633907, "grad_norm": 0.0, - "learning_rate": 1.6591125710597908e-05, - "loss": 0.8605, + "learning_rate": 1.6600786589021555e-05, + "loss": 1.0717, "step": 10319 }, { - "epoch": 0.29284903518728717, + "epoch": 0.29244240414860156, "grad_norm": 0.0, - "learning_rate": 1.659043449534489e-05, - "loss": 0.9138, + "learning_rate": 1.660009712114931e-05, + "loss": 0.8735, "step": 10320 }, { - "epoch": 0.2928774120317821, + "epoch": 0.292470741590864, "grad_norm": 0.0, - "learning_rate": 1.658974322442217e-05, - "loss": 0.9234, + "learning_rate": 1.6599407597681997e-05, + "loss": 1.0464, "step": 10321 }, { - "epoch": 0.29290578887627694, + "epoch": 0.2924990790331265, "grad_norm": 0.0, - "learning_rate": 1.65890518978356e-05, - "loss": 0.9239, + "learning_rate": 1.6598718018625424e-05, + "loss": 0.8937, "step": 10322 }, { - "epoch": 0.29293416572077186, + "epoch": 0.29252741647538893, "grad_norm": 0.0, - "learning_rate": 1.6588360515591007e-05, - "loss": 0.8954, + "learning_rate": 1.6598028383985398e-05, + "loss": 0.9916, "step": 10323 }, { - "epoch": 0.2929625425652667, + "epoch": 0.2925557539176514, "grad_norm": 0.0, - "learning_rate": 1.6587669077694242e-05, - "loss": 1.01, + "learning_rate": 1.659733869376773e-05, + "loss": 0.9951, "step": 10324 }, { - "epoch": 0.29299091940976163, + "epoch": 0.29258409135991387, "grad_norm": 0.0, - "learning_rate": 1.6586977584151135e-05, - "loss": 1.0904, + "learning_rate": 1.6596648947978225e-05, + "loss": 0.9708, "step": 10325 }, { - "epoch": 0.29301929625425654, + "epoch": 0.2926124288021763, "grad_norm": 0.0, - "learning_rate": 1.6586286034967535e-05, - "loss": 1.0764, + "learning_rate": 1.65959591466227e-05, + "loss": 1.0457, "step": 10326 }, { - "epoch": 0.2930476730987514, + "epoch": 0.2926407662444388, "grad_norm": 0.0, - "learning_rate": 1.658559443014928e-05, - "loss": 0.9535, + "learning_rate": 1.659526928970696e-05, + "loss": 1.0006, "step": 10327 }, { - "epoch": 0.2930760499432463, + "epoch": 0.29266910368670124, "grad_norm": 0.0, - "learning_rate": 1.6584902769702214e-05, - "loss": 1.0176, + "learning_rate": 1.6594579377236817e-05, + "loss": 0.9374, "step": 10328 }, { - "epoch": 0.2931044267877412, + "epoch": 0.2926974411289637, "grad_norm": 0.0, - "learning_rate": 1.6584211053632174e-05, - "loss": 1.0875, + "learning_rate": 1.6593889409218084e-05, + "loss": 0.9919, "step": 10329 }, { - "epoch": 0.2931328036322361, + "epoch": 0.29272577857122617, "grad_norm": 0.0, - "learning_rate": 1.658351928194501e-05, - "loss": 0.9424, + "learning_rate": 1.6593199385656574e-05, + "loss": 1.0253, "step": 10330 }, { - "epoch": 0.293161180476731, + "epoch": 0.2927541160134886, "grad_norm": 0.0, - "learning_rate": 1.6582827454646565e-05, - "loss": 0.8866, + "learning_rate": 1.6592509306558095e-05, + "loss": 1.0315, "step": 10331 }, { - "epoch": 0.29318955732122587, + "epoch": 0.2927824534557511, "grad_norm": 0.0, - "learning_rate": 1.658213557174268e-05, - "loss": 1.017, + "learning_rate": 1.6591819171928462e-05, + "loss": 1.0729, "step": 10332 }, { - "epoch": 0.2932179341657208, + "epoch": 0.29281079089801354, "grad_norm": 0.0, - "learning_rate": 1.6581443633239197e-05, - "loss": 0.9946, + "learning_rate": 1.659112898177349e-05, + "loss": 0.8943, "step": 10333 }, { - "epoch": 0.29324631101021564, + "epoch": 0.29283912834027603, "grad_norm": 0.0, - "learning_rate": 1.6580751639141966e-05, - "loss": 0.959, + "learning_rate": 1.6590438736098987e-05, + "loss": 1.0595, "step": 10334 }, { - "epoch": 0.29327468785471056, + "epoch": 0.29286746578253847, "grad_norm": 0.0, - "learning_rate": 1.658005958945683e-05, - "loss": 0.984, + "learning_rate": 1.6589748434910774e-05, + "loss": 0.9966, "step": 10335 }, { - "epoch": 0.2933030646992055, + "epoch": 0.2928958032248009, "grad_norm": 0.0, - "learning_rate": 1.6579367484189632e-05, - "loss": 0.875, + "learning_rate": 1.6589058078214662e-05, + "loss": 1.0048, "step": 10336 }, { - "epoch": 0.29333144154370033, + "epoch": 0.2929241406670634, "grad_norm": 0.0, - "learning_rate": 1.6578675323346224e-05, - "loss": 0.8768, + "learning_rate": 1.658836766601647e-05, + "loss": 1.0554, "step": 10337 }, { - "epoch": 0.29335981838819525, + "epoch": 0.29295247810932584, "grad_norm": 0.0, - "learning_rate": 1.6577983106932448e-05, - "loss": 1.012, + "learning_rate": 1.6587677198322008e-05, + "loss": 0.9675, "step": 10338 }, { - "epoch": 0.2933881952326901, + "epoch": 0.29298081555158834, "grad_norm": 0.0, - "learning_rate": 1.6577290834954153e-05, - "loss": 0.9374, + "learning_rate": 1.6586986675137095e-05, + "loss": 0.7435, "step": 10339 }, { - "epoch": 0.293416572077185, + "epoch": 0.2930091529938508, "grad_norm": 0.0, - "learning_rate": 1.657659850741719e-05, - "loss": 0.9537, + "learning_rate": 1.6586296096467545e-05, + "loss": 0.9251, "step": 10340 }, { - "epoch": 0.2934449489216799, + "epoch": 0.2930374904361132, "grad_norm": 0.0, - "learning_rate": 1.6575906124327403e-05, - "loss": 0.917, + "learning_rate": 1.658560546231918e-05, + "loss": 0.86, "step": 10341 }, { - "epoch": 0.2934733257661748, + "epoch": 0.2930658278783757, "grad_norm": 0.0, - "learning_rate": 1.657521368569064e-05, - "loss": 0.9595, + "learning_rate": 1.6584914772697816e-05, + "loss": 1.0726, "step": 10342 }, { - "epoch": 0.2935017026106697, + "epoch": 0.29309416532063814, "grad_norm": 0.0, - "learning_rate": 1.657452119151275e-05, - "loss": 1.1124, + "learning_rate": 1.6584224027609268e-05, + "loss": 1.0045, "step": 10343 }, { - "epoch": 0.29353007945516457, + "epoch": 0.29312250276290064, "grad_norm": 0.0, - "learning_rate": 1.6573828641799585e-05, - "loss": 1.0206, + "learning_rate": 1.6583533227059353e-05, + "loss": 0.8388, "step": 10344 }, { - "epoch": 0.2935584562996595, + "epoch": 0.2931508402051631, "grad_norm": 0.0, - "learning_rate": 1.6573136036556994e-05, - "loss": 1.0439, + "learning_rate": 1.6582842371053897e-05, + "loss": 1.0746, "step": 10345 }, { - "epoch": 0.29358683314415435, + "epoch": 0.29317917764742557, "grad_norm": 0.0, - "learning_rate": 1.6572443375790824e-05, - "loss": 1.0724, + "learning_rate": 1.6582151459598716e-05, + "loss": 1.0064, "step": 10346 }, { - "epoch": 0.29361520998864926, + "epoch": 0.293207515089688, "grad_norm": 0.0, - "learning_rate": 1.6571750659506937e-05, - "loss": 1.0187, + "learning_rate": 1.6581460492699625e-05, + "loss": 0.9391, "step": 10347 }, { - "epoch": 0.2936435868331442, + "epoch": 0.29323585253195045, "grad_norm": 0.0, - "learning_rate": 1.657105788771117e-05, - "loss": 0.8777, + "learning_rate": 1.658076947036245e-05, + "loss": 0.9546, "step": 10348 }, { - "epoch": 0.29367196367763904, + "epoch": 0.29326418997421294, "grad_norm": 0.0, - "learning_rate": 1.6570365060409388e-05, - "loss": 0.9233, + "learning_rate": 1.6580078392593012e-05, + "loss": 0.8562, "step": 10349 }, { - "epoch": 0.29370034052213395, + "epoch": 0.2932925274164754, "grad_norm": 0.0, - "learning_rate": 1.6569672177607432e-05, - "loss": 0.9647, + "learning_rate": 1.657938725939713e-05, + "loss": 0.9723, "step": 10350 }, { - "epoch": 0.2937287173666288, + "epoch": 0.2933208648587379, "grad_norm": 0.0, - "learning_rate": 1.656897923931116e-05, - "loss": 0.9073, + "learning_rate": 1.657869607078062e-05, + "loss": 1.0364, "step": 10351 }, { - "epoch": 0.2937570942111237, + "epoch": 0.2933492023010003, "grad_norm": 0.0, - "learning_rate": 1.6568286245526425e-05, - "loss": 1.0138, + "learning_rate": 1.6578004826749317e-05, + "loss": 1.0085, "step": 10352 }, { - "epoch": 0.29378547105561864, + "epoch": 0.29337753974326275, "grad_norm": 0.0, - "learning_rate": 1.6567593196259084e-05, - "loss": 1.0235, + "learning_rate": 1.6577313527309035e-05, + "loss": 1.0529, "step": 10353 }, { - "epoch": 0.2938138479001135, + "epoch": 0.29340587718552524, "grad_norm": 0.0, - "learning_rate": 1.6566900091514987e-05, - "loss": 0.8978, + "learning_rate": 1.6576622172465598e-05, + "loss": 0.9133, "step": 10354 }, { - "epoch": 0.2938422247446084, + "epoch": 0.2934342146277877, "grad_norm": 0.0, - "learning_rate": 1.6566206931299987e-05, - "loss": 0.9675, + "learning_rate": 1.6575930762224828e-05, + "loss": 1.0285, "step": 10355 }, { - "epoch": 0.2938706015891033, + "epoch": 0.2934625520700502, "grad_norm": 0.0, - "learning_rate": 1.6565513715619944e-05, - "loss": 0.9174, + "learning_rate": 1.6575239296592554e-05, + "loss": 0.9592, "step": 10356 }, { - "epoch": 0.2938989784335982, + "epoch": 0.2934908895123126, "grad_norm": 0.0, - "learning_rate": 1.656482044448071e-05, - "loss": 0.9528, + "learning_rate": 1.65745477755746e-05, + "loss": 0.9308, "step": 10357 }, { - "epoch": 0.29392735527809305, + "epoch": 0.2935192269545751, "grad_norm": 0.0, - "learning_rate": 1.6564127117888148e-05, - "loss": 0.9989, + "learning_rate": 1.6573856199176782e-05, + "loss": 0.9048, "step": 10358 }, { - "epoch": 0.29395573212258796, + "epoch": 0.29354756439683755, "grad_norm": 0.0, - "learning_rate": 1.65634337358481e-05, - "loss": 0.927, + "learning_rate": 1.657316456740494e-05, + "loss": 0.9493, "step": 10359 }, { - "epoch": 0.2939841089670829, + "epoch": 0.2935759018391, "grad_norm": 0.0, - "learning_rate": 1.6562740298366437e-05, - "loss": 0.909, + "learning_rate": 1.6572472880264883e-05, + "loss": 0.8681, "step": 10360 }, { - "epoch": 0.29401248581157774, + "epoch": 0.2936042392813625, "grad_norm": 0.0, - "learning_rate": 1.6562046805449014e-05, - "loss": 0.992, + "learning_rate": 1.6571781137762456e-05, + "loss": 0.8808, "step": 10361 }, { - "epoch": 0.29404086265607265, + "epoch": 0.2936325767236249, "grad_norm": 0.0, - "learning_rate": 1.6561353257101684e-05, - "loss": 0.8493, + "learning_rate": 1.657108933990347e-05, + "loss": 0.9479, "step": 10362 }, { - "epoch": 0.2940692395005675, + "epoch": 0.2936609141658874, "grad_norm": 0.0, - "learning_rate": 1.656065965333031e-05, - "loss": 0.9653, + "learning_rate": 1.657039748669376e-05, + "loss": 1.0437, "step": 10363 }, { - "epoch": 0.29409761634506243, + "epoch": 0.29368925160814985, "grad_norm": 0.0, - "learning_rate": 1.655996599414075e-05, - "loss": 0.8084, + "learning_rate": 1.6569705578139152e-05, + "loss": 0.9793, "step": 10364 }, { - "epoch": 0.29412599318955734, + "epoch": 0.2937175890504123, "grad_norm": 0.0, - "learning_rate": 1.655927227953886e-05, - "loss": 0.9351, + "learning_rate": 1.6569013614245473e-05, + "loss": 0.9602, "step": 10365 }, { - "epoch": 0.2941543700340522, + "epoch": 0.2937459264926748, "grad_norm": 0.0, - "learning_rate": 1.65585785095305e-05, - "loss": 1.0904, + "learning_rate": 1.6568321595018554e-05, + "loss": 0.9572, "step": 10366 }, { - "epoch": 0.2941827468785471, + "epoch": 0.2937742639349372, "grad_norm": 0.0, - "learning_rate": 1.655788468412153e-05, - "loss": 0.896, + "learning_rate": 1.6567629520464222e-05, + "loss": 0.9308, "step": 10367 }, { - "epoch": 0.294211123723042, + "epoch": 0.2938026013771997, "grad_norm": 0.0, - "learning_rate": 1.6557190803317818e-05, - "loss": 1.036, + "learning_rate": 1.656693739058831e-05, + "loss": 0.8816, "step": 10368 }, { - "epoch": 0.2942395005675369, + "epoch": 0.29383093881946215, "grad_norm": 0.0, - "learning_rate": 1.655649686712522e-05, - "loss": 0.9383, + "learning_rate": 1.6566245205396647e-05, + "loss": 0.9766, "step": 10369 }, { - "epoch": 0.2942678774120318, + "epoch": 0.2938592762617246, "grad_norm": 0.0, - "learning_rate": 1.65558028755496e-05, - "loss": 0.9862, + "learning_rate": 1.656555296489506e-05, + "loss": 0.7447, "step": 10370 }, { - "epoch": 0.29429625425652667, + "epoch": 0.2938876137039871, "grad_norm": 0.0, - "learning_rate": 1.655510882859682e-05, - "loss": 0.953, + "learning_rate": 1.6564860669089382e-05, + "loss": 0.8925, "step": 10371 }, { - "epoch": 0.2943246311010216, + "epoch": 0.2939159511462495, "grad_norm": 0.0, - "learning_rate": 1.6554414726272738e-05, - "loss": 0.8811, + "learning_rate": 1.6564168317985444e-05, + "loss": 0.9873, "step": 10372 }, { - "epoch": 0.29435300794551644, + "epoch": 0.293944288588512, "grad_norm": 0.0, - "learning_rate": 1.655372056858322e-05, - "loss": 0.8859, + "learning_rate": 1.656347591158908e-05, + "loss": 1.0395, "step": 10373 }, { - "epoch": 0.29438138479001136, + "epoch": 0.29397262603077445, "grad_norm": 0.0, - "learning_rate": 1.655302635553413e-05, - "loss": 0.8231, + "learning_rate": 1.656278344990612e-05, + "loss": 0.9591, "step": 10374 }, { - "epoch": 0.2944097616345062, + "epoch": 0.29400096347303695, "grad_norm": 0.0, - "learning_rate": 1.655233208713133e-05, - "loss": 1.0352, + "learning_rate": 1.65620909329424e-05, + "loss": 1.0476, "step": 10375 }, { - "epoch": 0.29443813847900113, + "epoch": 0.2940293009152994, "grad_norm": 0.0, - "learning_rate": 1.655163776338069e-05, - "loss": 0.9446, + "learning_rate": 1.656139836070375e-05, + "loss": 0.9746, "step": 10376 }, { - "epoch": 0.29446651532349605, + "epoch": 0.2940576383575618, "grad_norm": 0.0, - "learning_rate": 1.6550943384288066e-05, - "loss": 0.9072, + "learning_rate": 1.6560705733196004e-05, + "loss": 1.0219, "step": 10377 }, { - "epoch": 0.2944948921679909, + "epoch": 0.2940859757998243, "grad_norm": 0.0, - "learning_rate": 1.6550248949859333e-05, - "loss": 0.9999, + "learning_rate": 1.6560013050425003e-05, + "loss": 0.8689, "step": 10378 }, { - "epoch": 0.2945232690124858, + "epoch": 0.29411431324208676, "grad_norm": 0.0, - "learning_rate": 1.6549554460100354e-05, - "loss": 1.0473, + "learning_rate": 1.6559320312396573e-05, + "loss": 0.9971, "step": 10379 }, { - "epoch": 0.2945516458569807, + "epoch": 0.29414265068434925, "grad_norm": 0.0, - "learning_rate": 1.6548859915016992e-05, - "loss": 0.8761, + "learning_rate": 1.6558627519116547e-05, + "loss": 0.9519, "step": 10380 }, { - "epoch": 0.2945800227014756, + "epoch": 0.2941709881266117, "grad_norm": 0.0, - "learning_rate": 1.6548165314615114e-05, - "loss": 0.9584, + "learning_rate": 1.655793467059077e-05, + "loss": 1.0473, "step": 10381 }, { - "epoch": 0.2946083995459705, + "epoch": 0.2941993255688741, "grad_norm": 0.0, - "learning_rate": 1.6547470658900592e-05, - "loss": 1.012, + "learning_rate": 1.6557241766825077e-05, + "loss": 0.8788, "step": 10382 }, { - "epoch": 0.29463677639046537, + "epoch": 0.2942276630111366, "grad_norm": 0.0, - "learning_rate": 1.6546775947879288e-05, - "loss": 1.0417, + "learning_rate": 1.6556548807825298e-05, + "loss": 0.8993, "step": 10383 }, { - "epoch": 0.2946651532349603, + "epoch": 0.29425600045339906, "grad_norm": 0.0, - "learning_rate": 1.6546081181557076e-05, - "loss": 0.9629, + "learning_rate": 1.6555855793597273e-05, + "loss": 0.8869, "step": 10384 }, { - "epoch": 0.29469353007945515, + "epoch": 0.29428433789566155, "grad_norm": 0.0, - "learning_rate": 1.654538635993982e-05, - "loss": 0.9156, + "learning_rate": 1.6555162724146844e-05, + "loss": 0.8806, "step": 10385 }, { - "epoch": 0.29472190692395006, + "epoch": 0.294312675337924, "grad_norm": 0.0, - "learning_rate": 1.6544691483033392e-05, - "loss": 0.8688, + "learning_rate": 1.655446959947984e-05, + "loss": 0.8719, "step": 10386 }, { - "epoch": 0.294750283768445, + "epoch": 0.2943410127801865, "grad_norm": 0.0, - "learning_rate": 1.654399655084366e-05, - "loss": 0.9031, + "learning_rate": 1.655377641960211e-05, + "loss": 0.995, "step": 10387 }, { - "epoch": 0.29477866061293984, + "epoch": 0.2943693502224489, "grad_norm": 0.0, - "learning_rate": 1.6543301563376498e-05, - "loss": 0.9926, + "learning_rate": 1.6553083184519483e-05, + "loss": 0.9643, "step": 10388 }, { - "epoch": 0.29480703745743475, + "epoch": 0.29439768766471136, "grad_norm": 0.0, - "learning_rate": 1.6542606520637772e-05, - "loss": 0.8979, + "learning_rate": 1.6552389894237806e-05, + "loss": 0.941, "step": 10389 }, { - "epoch": 0.2948354143019296, + "epoch": 0.29442602510697385, "grad_norm": 0.0, - "learning_rate": 1.6541911422633355e-05, - "loss": 1.0027, + "learning_rate": 1.6551696548762914e-05, + "loss": 0.9611, "step": 10390 }, { - "epoch": 0.2948637911464245, + "epoch": 0.2944543625492363, "grad_norm": 0.0, - "learning_rate": 1.654121626936912e-05, - "loss": 0.9398, + "learning_rate": 1.6551003148100647e-05, + "loss": 0.8554, "step": 10391 }, { - "epoch": 0.2948921679909194, + "epoch": 0.2944826999914988, "grad_norm": 0.0, - "learning_rate": 1.654052106085093e-05, - "loss": 0.9873, + "learning_rate": 1.655030969225685e-05, + "loss": 0.9559, "step": 10392 }, { - "epoch": 0.2949205448354143, + "epoch": 0.2945110374337612, "grad_norm": 0.0, - "learning_rate": 1.6539825797084674e-05, - "loss": 0.9636, + "learning_rate": 1.6549616181237365e-05, + "loss": 0.9544, "step": 10393 }, { - "epoch": 0.2949489216799092, + "epoch": 0.29453937487602366, "grad_norm": 0.0, - "learning_rate": 1.6539130478076208e-05, - "loss": 1.0197, + "learning_rate": 1.6548922615048028e-05, + "loss": 1.06, "step": 10394 }, { - "epoch": 0.2949772985244041, + "epoch": 0.29456771231828616, "grad_norm": 0.0, - "learning_rate": 1.6538435103831416e-05, - "loss": 0.9421, + "learning_rate": 1.6548228993694685e-05, + "loss": 1.0025, "step": 10395 }, { - "epoch": 0.295005675368899, + "epoch": 0.2945960497605486, "grad_norm": 0.0, - "learning_rate": 1.653773967435617e-05, - "loss": 1.0065, + "learning_rate": 1.6547535317183176e-05, + "loss": 0.9987, "step": 10396 }, { - "epoch": 0.29503405221339385, + "epoch": 0.2946243872028111, "grad_norm": 0.0, - "learning_rate": 1.6537044189656343e-05, - "loss": 0.8915, + "learning_rate": 1.6546841585519346e-05, + "loss": 0.9456, "step": 10397 }, { - "epoch": 0.29506242905788876, + "epoch": 0.2946527246450735, "grad_norm": 0.0, - "learning_rate": 1.6536348649737806e-05, - "loss": 0.9531, + "learning_rate": 1.654614779870904e-05, + "loss": 0.8733, "step": 10398 }, { - "epoch": 0.2950908059023837, + "epoch": 0.294681062087336, "grad_norm": 0.0, - "learning_rate": 1.653565305460644e-05, - "loss": 0.9549, + "learning_rate": 1.6545453956758098e-05, + "loss": 1.0686, "step": 10399 }, { - "epoch": 0.29511918274687854, + "epoch": 0.29470939952959846, "grad_norm": 0.0, - "learning_rate": 1.653495740426812e-05, - "loss": 1.0071, + "learning_rate": 1.654476005967237e-05, + "loss": 1.0109, "step": 10400 }, { - "epoch": 0.29514755959137345, + "epoch": 0.2947377369718609, "grad_norm": 0.0, - "learning_rate": 1.653426169872872e-05, - "loss": 0.9521, + "learning_rate": 1.6544066107457693e-05, + "loss": 0.949, "step": 10401 }, { - "epoch": 0.2951759364358683, + "epoch": 0.2947660744141234, "grad_norm": 0.0, - "learning_rate": 1.6533565937994116e-05, - "loss": 0.9363, + "learning_rate": 1.654337210011992e-05, + "loss": 1.0089, "step": 10402 }, { - "epoch": 0.29520431328036323, + "epoch": 0.29479441185638583, "grad_norm": 0.0, - "learning_rate": 1.6532870122070187e-05, - "loss": 1.0706, + "learning_rate": 1.6542678037664892e-05, + "loss": 0.8363, "step": 10403 }, { - "epoch": 0.2952326901248581, + "epoch": 0.2948227492986483, "grad_norm": 0.0, - "learning_rate": 1.6532174250962814e-05, - "loss": 1.0808, + "learning_rate": 1.6541983920098462e-05, + "loss": 0.9502, "step": 10404 }, { - "epoch": 0.295261066969353, + "epoch": 0.29485108674091076, "grad_norm": 0.0, - "learning_rate": 1.653147832467787e-05, - "loss": 1.0013, + "learning_rate": 1.6541289747426467e-05, + "loss": 1.0315, "step": 10405 }, { - "epoch": 0.2952894438138479, + "epoch": 0.2948794241831732, "grad_norm": 0.0, - "learning_rate": 1.6530782343221235e-05, - "loss": 0.9346, + "learning_rate": 1.6540595519654762e-05, + "loss": 1.0374, "step": 10406 }, { - "epoch": 0.2953178206583428, + "epoch": 0.2949077616254357, "grad_norm": 0.0, - "learning_rate": 1.6530086306598786e-05, - "loss": 0.9448, + "learning_rate": 1.6539901236789192e-05, + "loss": 0.9016, "step": 10407 }, { - "epoch": 0.2953461975028377, + "epoch": 0.29493609906769813, "grad_norm": 0.0, - "learning_rate": 1.652939021481641e-05, - "loss": 0.8943, + "learning_rate": 1.6539206898835604e-05, + "loss": 0.932, "step": 10408 }, { - "epoch": 0.29537457434733255, + "epoch": 0.2949644365099606, "grad_norm": 0.0, - "learning_rate": 1.6528694067879977e-05, - "loss": 1.0765, + "learning_rate": 1.653851250579985e-05, + "loss": 0.8981, "step": 10409 }, { - "epoch": 0.29540295119182747, + "epoch": 0.29499277395222306, "grad_norm": 0.0, - "learning_rate": 1.652799786579537e-05, - "loss": 0.9034, + "learning_rate": 1.653781805768777e-05, + "loss": 0.8884, "step": 10410 }, { - "epoch": 0.2954313280363224, + "epoch": 0.29502111139448556, "grad_norm": 0.0, - "learning_rate": 1.6527301608568476e-05, - "loss": 1.0261, + "learning_rate": 1.653712355450523e-05, + "loss": 1.0133, "step": 10411 }, { - "epoch": 0.29545970488081724, + "epoch": 0.295049448836748, "grad_norm": 0.0, - "learning_rate": 1.652660529620517e-05, - "loss": 0.9861, + "learning_rate": 1.653642899625807e-05, + "loss": 1.0668, "step": 10412 }, { - "epoch": 0.29548808172531216, + "epoch": 0.29507778627901043, "grad_norm": 0.0, - "learning_rate": 1.652590892871133e-05, - "loss": 1.0605, + "learning_rate": 1.6535734382952135e-05, + "loss": 1.0262, "step": 10413 }, { - "epoch": 0.295516458569807, + "epoch": 0.29510612372127293, "grad_norm": 0.0, - "learning_rate": 1.652521250609285e-05, - "loss": 1.0175, + "learning_rate": 1.6535039714593288e-05, + "loss": 0.9505, "step": 10414 }, { - "epoch": 0.29554483541430193, + "epoch": 0.29513446116353537, "grad_norm": 0.0, - "learning_rate": 1.6524516028355608e-05, - "loss": 0.9542, + "learning_rate": 1.6534344991187373e-05, + "loss": 0.9265, "step": 10415 }, { - "epoch": 0.29557321225879685, + "epoch": 0.29516279860579786, "grad_norm": 0.0, - "learning_rate": 1.6523819495505483e-05, - "loss": 0.952, + "learning_rate": 1.6533650212740243e-05, + "loss": 1.0354, "step": 10416 }, { - "epoch": 0.2956015891032917, + "epoch": 0.2951911360480603, "grad_norm": 0.0, - "learning_rate": 1.652312290754836e-05, - "loss": 0.9928, + "learning_rate": 1.653295537925775e-05, + "loss": 0.9903, "step": 10417 }, { - "epoch": 0.2956299659477866, + "epoch": 0.29521947349032274, "grad_norm": 0.0, - "learning_rate": 1.652242626449013e-05, - "loss": 0.9196, + "learning_rate": 1.653226049074575e-05, + "loss": 0.8917, "step": 10418 }, { - "epoch": 0.2956583427922815, + "epoch": 0.29524781093258523, "grad_norm": 0.0, - "learning_rate": 1.6521729566336668e-05, - "loss": 0.897, + "learning_rate": 1.6531565547210095e-05, + "loss": 0.9136, "step": 10419 }, { - "epoch": 0.2956867196367764, + "epoch": 0.29527614837484767, "grad_norm": 0.0, - "learning_rate": 1.6521032813093866e-05, - "loss": 0.8728, + "learning_rate": 1.6530870548656636e-05, + "loss": 0.9614, "step": 10420 }, { - "epoch": 0.29571509648127126, + "epoch": 0.29530448581711016, "grad_norm": 0.0, - "learning_rate": 1.6520336004767603e-05, - "loss": 1.0795, + "learning_rate": 1.653017549509123e-05, + "loss": 1.0182, "step": 10421 }, { - "epoch": 0.29574347332576617, + "epoch": 0.2953328232593726, "grad_norm": 0.0, - "learning_rate": 1.6519639141363768e-05, - "loss": 0.8537, + "learning_rate": 1.6529480386519728e-05, + "loss": 0.9429, "step": 10422 }, { - "epoch": 0.2957718501702611, + "epoch": 0.2953611607016351, "grad_norm": 0.0, - "learning_rate": 1.6518942222888253e-05, - "loss": 0.9287, + "learning_rate": 1.6528785222947995e-05, + "loss": 0.9371, "step": 10423 }, { - "epoch": 0.29580022701475595, + "epoch": 0.29538949814389753, "grad_norm": 0.0, - "learning_rate": 1.6518245249346937e-05, - "loss": 0.956, + "learning_rate": 1.6528090004381872e-05, + "loss": 1.0205, "step": 10424 }, { - "epoch": 0.29582860385925086, + "epoch": 0.29541783558615997, "grad_norm": 0.0, - "learning_rate": 1.651754822074571e-05, - "loss": 0.8387, + "learning_rate": 1.6527394730827227e-05, + "loss": 0.9577, "step": 10425 }, { - "epoch": 0.2958569807037457, + "epoch": 0.29544617302842247, "grad_norm": 0.0, - "learning_rate": 1.651685113709046e-05, - "loss": 0.8933, + "learning_rate": 1.6526699402289912e-05, + "loss": 0.9927, "step": 10426 }, { - "epoch": 0.29588535754824064, + "epoch": 0.2954745104706849, "grad_norm": 0.0, - "learning_rate": 1.6516153998387077e-05, - "loss": 0.9779, + "learning_rate": 1.6526004018775785e-05, + "loss": 0.8023, "step": 10427 }, { - "epoch": 0.29591373439273555, + "epoch": 0.2955028479129474, "grad_norm": 0.0, - "learning_rate": 1.6515456804641446e-05, - "loss": 1.0585, + "learning_rate": 1.65253085802907e-05, + "loss": 0.906, "step": 10428 }, { - "epoch": 0.2959421112372304, + "epoch": 0.29553118535520984, "grad_norm": 0.0, - "learning_rate": 1.651475955585946e-05, - "loss": 0.9295, + "learning_rate": 1.652461308684052e-05, + "loss": 0.9335, "step": 10429 }, { - "epoch": 0.2959704880817253, + "epoch": 0.2955595227974723, "grad_norm": 0.0, - "learning_rate": 1.651406225204701e-05, - "loss": 0.9521, + "learning_rate": 1.65239175384311e-05, + "loss": 1.0198, "step": 10430 }, { - "epoch": 0.2959988649262202, + "epoch": 0.29558786023973477, "grad_norm": 0.0, - "learning_rate": 1.651336489320998e-05, - "loss": 0.9576, + "learning_rate": 1.6523221935068302e-05, + "loss": 0.9226, "step": 10431 }, { - "epoch": 0.2960272417707151, + "epoch": 0.2956161976819972, "grad_norm": 0.0, - "learning_rate": 1.6512667479354263e-05, - "loss": 0.9755, + "learning_rate": 1.652252627675798e-05, + "loss": 0.9662, "step": 10432 }, { - "epoch": 0.29605561861521, + "epoch": 0.2956445351242597, "grad_norm": 0.0, - "learning_rate": 1.6511970010485755e-05, - "loss": 0.8557, + "learning_rate": 1.6521830563506e-05, + "loss": 1.0294, "step": 10433 }, { - "epoch": 0.2960839954597049, + "epoch": 0.29567287256652214, "grad_norm": 0.0, - "learning_rate": 1.651127248661034e-05, - "loss": 1.0968, + "learning_rate": 1.6521134795318214e-05, + "loss": 1.0757, "step": 10434 }, { - "epoch": 0.2961123723041998, + "epoch": 0.29570121000878463, "grad_norm": 0.0, - "learning_rate": 1.6510574907733916e-05, - "loss": 0.9675, + "learning_rate": 1.6520438972200496e-05, + "loss": 1.0036, "step": 10435 }, { - "epoch": 0.29614074914869465, + "epoch": 0.29572954745104707, "grad_norm": 0.0, - "learning_rate": 1.650987727386237e-05, - "loss": 0.9892, + "learning_rate": 1.6519743094158694e-05, + "loss": 1.0041, "step": 10436 }, { - "epoch": 0.29616912599318956, + "epoch": 0.2957578848933095, "grad_norm": 0.0, - "learning_rate": 1.65091795850016e-05, - "loss": 0.8173, + "learning_rate": 1.6519047161198675e-05, + "loss": 1.0185, "step": 10437 }, { - "epoch": 0.2961975028376844, + "epoch": 0.295786222335572, "grad_norm": 0.0, - "learning_rate": 1.6508481841157497e-05, - "loss": 0.9106, + "learning_rate": 1.6518351173326302e-05, + "loss": 0.9554, "step": 10438 }, { - "epoch": 0.29622587968217934, + "epoch": 0.29581455977783444, "grad_norm": 0.0, - "learning_rate": 1.6507784042335958e-05, - "loss": 0.9489, + "learning_rate": 1.6517655130547435e-05, + "loss": 0.9637, "step": 10439 }, { - "epoch": 0.29625425652667425, + "epoch": 0.29584289722009693, "grad_norm": 0.0, - "learning_rate": 1.6507086188542875e-05, - "loss": 1.0163, + "learning_rate": 1.651695903286794e-05, + "loss": 1.0563, "step": 10440 }, { - "epoch": 0.2962826333711691, + "epoch": 0.2958712346623594, "grad_norm": 0.0, - "learning_rate": 1.650638827978414e-05, - "loss": 0.9752, + "learning_rate": 1.6516262880293684e-05, + "loss": 1.0716, "step": 10441 }, { - "epoch": 0.29631101021566403, + "epoch": 0.2958995721046218, "grad_norm": 0.0, - "learning_rate": 1.6505690316065644e-05, - "loss": 0.917, + "learning_rate": 1.651556667283052e-05, + "loss": 0.9623, "step": 10442 }, { - "epoch": 0.2963393870601589, + "epoch": 0.2959279095468843, "grad_norm": 0.0, - "learning_rate": 1.65049922973933e-05, - "loss": 0.9523, + "learning_rate": 1.6514870410484317e-05, + "loss": 1.0671, "step": 10443 }, { - "epoch": 0.2963677639046538, + "epoch": 0.29595624698914674, "grad_norm": 0.0, - "learning_rate": 1.6504294223772987e-05, - "loss": 0.9373, + "learning_rate": 1.6514174093260947e-05, + "loss": 0.9761, "step": 10444 }, { - "epoch": 0.2963961407491487, + "epoch": 0.29598458443140924, "grad_norm": 0.0, - "learning_rate": 1.650359609521061e-05, - "loss": 0.9915, + "learning_rate": 1.6513477721166268e-05, + "loss": 1.0181, "step": 10445 }, { - "epoch": 0.2964245175936436, + "epoch": 0.2960129218736717, "grad_norm": 0.0, - "learning_rate": 1.6502897911712067e-05, - "loss": 0.8809, + "learning_rate": 1.6512781294206144e-05, + "loss": 0.9743, "step": 10446 }, { - "epoch": 0.2964528944381385, + "epoch": 0.29604125931593417, "grad_norm": 0.0, - "learning_rate": 1.650219967328325e-05, - "loss": 1.0578, + "learning_rate": 1.6512084812386447e-05, + "loss": 0.9631, "step": 10447 }, { - "epoch": 0.29648127128263335, + "epoch": 0.2960695967581966, "grad_norm": 0.0, - "learning_rate": 1.6501501379930066e-05, - "loss": 1.0373, + "learning_rate": 1.651138827571304e-05, + "loss": 0.985, "step": 10448 }, { - "epoch": 0.29650964812712827, + "epoch": 0.29609793420045905, "grad_norm": 0.0, - "learning_rate": 1.65008030316584e-05, - "loss": 0.9699, + "learning_rate": 1.6510691684191795e-05, + "loss": 1.047, "step": 10449 }, { - "epoch": 0.2965380249716232, + "epoch": 0.29612627164272154, "grad_norm": 0.0, - "learning_rate": 1.6500104628474163e-05, - "loss": 1.0515, + "learning_rate": 1.6509995037828575e-05, + "loss": 0.8994, "step": 10450 }, { - "epoch": 0.29656640181611804, + "epoch": 0.296154609084984, "grad_norm": 0.0, - "learning_rate": 1.649940617038325e-05, - "loss": 0.8617, + "learning_rate": 1.6509298336629246e-05, + "loss": 0.8872, "step": 10451 }, { - "epoch": 0.29659477866061296, + "epoch": 0.29618294652724647, "grad_norm": 0.0, - "learning_rate": 1.6498707657391558e-05, - "loss": 0.9484, + "learning_rate": 1.650860158059968e-05, + "loss": 0.9446, "step": 10452 }, { - "epoch": 0.2966231555051078, + "epoch": 0.2962112839695089, "grad_norm": 0.0, - "learning_rate": 1.649800908950499e-05, - "loss": 0.9896, + "learning_rate": 1.650790476974575e-05, + "loss": 0.8313, "step": 10453 }, { - "epoch": 0.29665153234960273, + "epoch": 0.29623962141177135, "grad_norm": 0.0, - "learning_rate": 1.6497310466729448e-05, - "loss": 1.0672, + "learning_rate": 1.650720790407332e-05, + "loss": 1.0052, "step": 10454 }, { - "epoch": 0.2966799091940976, + "epoch": 0.29626795885403384, "grad_norm": 0.0, - "learning_rate": 1.6496611789070836e-05, - "loss": 1.0078, + "learning_rate": 1.650651098358826e-05, + "loss": 0.8812, "step": 10455 }, { - "epoch": 0.2967082860385925, + "epoch": 0.2962962962962963, "grad_norm": 0.0, - "learning_rate": 1.649591305653505e-05, - "loss": 0.9779, + "learning_rate": 1.6505814008296446e-05, + "loss": 0.9482, "step": 10456 }, { - "epoch": 0.2967366628830874, + "epoch": 0.2963246337385588, "grad_norm": 0.0, - "learning_rate": 1.649521426912799e-05, - "loss": 1.0903, + "learning_rate": 1.650511697820374e-05, + "loss": 0.9662, "step": 10457 }, { - "epoch": 0.2967650397275823, + "epoch": 0.2963529711808212, "grad_norm": 0.0, - "learning_rate": 1.649451542685557e-05, - "loss": 0.9252, + "learning_rate": 1.650441989331602e-05, + "loss": 0.9495, "step": 10458 }, { - "epoch": 0.2967934165720772, + "epoch": 0.2963813086230837, "grad_norm": 0.0, - "learning_rate": 1.6493816529723683e-05, - "loss": 0.9895, + "learning_rate": 1.6503722753639155e-05, + "loss": 0.8971, "step": 10459 }, { - "epoch": 0.29682179341657206, + "epoch": 0.29640964606534614, "grad_norm": 0.0, - "learning_rate": 1.6493117577738235e-05, - "loss": 1.0348, + "learning_rate": 1.650302555917902e-05, + "loss": 1.018, "step": 10460 }, { - "epoch": 0.29685017026106697, + "epoch": 0.2964379835076086, "grad_norm": 0.0, - "learning_rate": 1.6492418570905134e-05, - "loss": 1.0025, + "learning_rate": 1.6502328309941488e-05, + "loss": 0.9409, "step": 10461 }, { - "epoch": 0.2968785471055619, + "epoch": 0.2964663209498711, "grad_norm": 0.0, - "learning_rate": 1.649171950923028e-05, - "loss": 0.9625, + "learning_rate": 1.6501631005932425e-05, + "loss": 0.9514, "step": 10462 }, { - "epoch": 0.29690692395005674, + "epoch": 0.2964946583921335, "grad_norm": 0.0, - "learning_rate": 1.6491020392719575e-05, - "loss": 0.9555, + "learning_rate": 1.6500933647157712e-05, + "loss": 0.9128, "step": 10463 }, { - "epoch": 0.29693530079455166, + "epoch": 0.296522995834396, "grad_norm": 0.0, - "learning_rate": 1.6490321221378933e-05, - "loss": 0.9288, + "learning_rate": 1.650023623362322e-05, + "loss": 0.942, "step": 10464 }, { - "epoch": 0.2969636776390465, + "epoch": 0.29655133327665845, "grad_norm": 0.0, - "learning_rate": 1.648962199521426e-05, - "loss": 0.9069, + "learning_rate": 1.6499538765334825e-05, + "loss": 0.9176, "step": 10465 }, { - "epoch": 0.29699205448354143, + "epoch": 0.2965796707189209, "grad_norm": 0.0, - "learning_rate": 1.6488922714231453e-05, - "loss": 1.0653, + "learning_rate": 1.64988412422984e-05, + "loss": 1.0012, "step": 10466 }, { - "epoch": 0.29702043132803635, + "epoch": 0.2966080081611834, "grad_norm": 0.0, - "learning_rate": 1.6488223378436423e-05, - "loss": 0.979, + "learning_rate": 1.6498143664519822e-05, + "loss": 0.9506, "step": 10467 }, { - "epoch": 0.2970488081725312, + "epoch": 0.2966363456034458, "grad_norm": 0.0, - "learning_rate": 1.6487523987835082e-05, - "loss": 0.8968, + "learning_rate": 1.649744603200497e-05, + "loss": 0.8983, "step": 10468 }, { - "epoch": 0.2970771850170261, + "epoch": 0.2966646830457083, "grad_norm": 0.0, - "learning_rate": 1.648682454243333e-05, - "loss": 1.0175, + "learning_rate": 1.6496748344759715e-05, + "loss": 1.0838, "step": 10469 }, { - "epoch": 0.297105561861521, + "epoch": 0.29669302048797075, "grad_norm": 0.0, - "learning_rate": 1.6486125042237084e-05, - "loss": 1.1928, + "learning_rate": 1.6496050602789936e-05, + "loss": 0.9658, "step": 10470 }, { - "epoch": 0.2971339387060159, + "epoch": 0.29672135793023324, "grad_norm": 0.0, - "learning_rate": 1.6485425487252245e-05, - "loss": 1.0895, + "learning_rate": 1.649535280610151e-05, + "loss": 0.8289, "step": 10471 }, { - "epoch": 0.29716231555051076, + "epoch": 0.2967496953724957, "grad_norm": 0.0, - "learning_rate": 1.648472587748473e-05, - "loss": 0.8467, + "learning_rate": 1.649465495470032e-05, + "loss": 0.9332, "step": 10472 }, { - "epoch": 0.2971906923950057, + "epoch": 0.2967780328147581, "grad_norm": 0.0, - "learning_rate": 1.6484026212940442e-05, - "loss": 0.8208, + "learning_rate": 1.6493957048592234e-05, + "loss": 0.8845, "step": 10473 }, { - "epoch": 0.2972190692395006, + "epoch": 0.2968063702570206, "grad_norm": 0.0, - "learning_rate": 1.6483326493625292e-05, - "loss": 0.9478, + "learning_rate": 1.649325908778314e-05, + "loss": 1.0227, "step": 10474 }, { - "epoch": 0.29724744608399545, + "epoch": 0.29683470769928305, "grad_norm": 0.0, - "learning_rate": 1.648262671954519e-05, - "loss": 1.0533, + "learning_rate": 1.649256107227891e-05, + "loss": 1.0481, "step": 10475 }, { - "epoch": 0.29727582292849036, + "epoch": 0.29686304514154555, "grad_norm": 0.0, - "learning_rate": 1.648192689070605e-05, - "loss": 0.9829, + "learning_rate": 1.6491863002085428e-05, + "loss": 0.8441, "step": 10476 }, { - "epoch": 0.2973041997729852, + "epoch": 0.296891382583808, "grad_norm": 0.0, - "learning_rate": 1.6481227007113784e-05, - "loss": 0.8857, + "learning_rate": 1.6491164877208574e-05, + "loss": 1.006, "step": 10477 }, { - "epoch": 0.29733257661748014, + "epoch": 0.2969197200260704, "grad_norm": 0.0, - "learning_rate": 1.6480527068774298e-05, - "loss": 0.9582, + "learning_rate": 1.649046669765423e-05, + "loss": 1.088, "step": 10478 }, { - "epoch": 0.29736095346197505, + "epoch": 0.2969480574683329, "grad_norm": 0.0, - "learning_rate": 1.6479827075693512e-05, - "loss": 1.0173, + "learning_rate": 1.648976846342827e-05, + "loss": 0.9018, "step": 10479 }, { - "epoch": 0.2973893303064699, + "epoch": 0.29697639491059535, "grad_norm": 0.0, - "learning_rate": 1.6479127027877335e-05, - "loss": 0.9938, + "learning_rate": 1.6489070174536586e-05, + "loss": 0.954, "step": 10480 }, { - "epoch": 0.2974177071509648, + "epoch": 0.29700473235285785, "grad_norm": 0.0, - "learning_rate": 1.647842692533168e-05, - "loss": 0.9803, + "learning_rate": 1.648837183098505e-05, + "loss": 0.9702, "step": 10481 }, { - "epoch": 0.2974460839954597, + "epoch": 0.2970330697951203, "grad_norm": 0.0, - "learning_rate": 1.647772676806246e-05, - "loss": 1.0032, + "learning_rate": 1.648767343277955e-05, + "loss": 0.9495, "step": 10482 }, { - "epoch": 0.2974744608399546, + "epoch": 0.2970614072373828, "grad_norm": 0.0, - "learning_rate": 1.647702655607559e-05, - "loss": 1.0016, + "learning_rate": 1.6486974979925968e-05, + "loss": 0.8703, "step": 10483 }, { - "epoch": 0.29750283768444946, + "epoch": 0.2970897446796452, "grad_norm": 0.0, - "learning_rate": 1.647632628937699e-05, - "loss": 0.8601, + "learning_rate": 1.6486276472430186e-05, + "loss": 0.8482, "step": 10484 }, { - "epoch": 0.2975312145289444, + "epoch": 0.29711808212190766, "grad_norm": 0.0, - "learning_rate": 1.6475625967972573e-05, - "loss": 0.9631, + "learning_rate": 1.648557791029809e-05, + "loss": 0.9544, "step": 10485 }, { - "epoch": 0.2975595913734393, + "epoch": 0.29714641956417015, "grad_norm": 0.0, - "learning_rate": 1.647492559186825e-05, - "loss": 0.9857, + "learning_rate": 1.6484879293535562e-05, + "loss": 0.9238, "step": 10486 }, { - "epoch": 0.29758796821793415, + "epoch": 0.2971747570064326, "grad_norm": 0.0, - "learning_rate": 1.6474225161069936e-05, - "loss": 0.9233, + "learning_rate": 1.648418062214849e-05, + "loss": 0.9958, "step": 10487 }, { - "epoch": 0.29761634506242907, + "epoch": 0.2972030944486951, "grad_norm": 0.0, - "learning_rate": 1.647352467558355e-05, - "loss": 0.8458, + "learning_rate": 1.648348189614275e-05, + "loss": 1.0689, "step": 10488 }, { - "epoch": 0.2976447219069239, + "epoch": 0.2972314318909575, "grad_norm": 0.0, - "learning_rate": 1.6472824135415013e-05, - "loss": 0.98, + "learning_rate": 1.648278311552424e-05, + "loss": 0.9949, "step": 10489 }, { - "epoch": 0.29767309875141884, + "epoch": 0.29725976933321996, "grad_norm": 0.0, - "learning_rate": 1.647212354057024e-05, - "loss": 0.9778, + "learning_rate": 1.648208428029884e-05, + "loss": 1.0662, "step": 10490 }, { - "epoch": 0.29770147559591376, + "epoch": 0.29728810677548245, "grad_norm": 0.0, - "learning_rate": 1.647142289105515e-05, - "loss": 0.9829, + "learning_rate": 1.6481385390472438e-05, + "loss": 0.9107, "step": 10491 }, { - "epoch": 0.2977298524404086, + "epoch": 0.2973164442177449, "grad_norm": 0.0, - "learning_rate": 1.6470722186875656e-05, - "loss": 0.945, + "learning_rate": 1.6480686446050916e-05, + "loss": 1.0672, "step": 10492 }, { - "epoch": 0.29775822928490353, + "epoch": 0.2973447816600074, "grad_norm": 0.0, - "learning_rate": 1.6470021428037682e-05, - "loss": 1.0518, + "learning_rate": 1.647998744704017e-05, + "loss": 0.9094, "step": 10493 }, { - "epoch": 0.2977866061293984, + "epoch": 0.2973731191022698, "grad_norm": 0.0, - "learning_rate": 1.646932061454715e-05, - "loss": 0.9154, + "learning_rate": 1.647928839344608e-05, + "loss": 1.0302, "step": 10494 }, { - "epoch": 0.2978149829738933, + "epoch": 0.2974014565445323, "grad_norm": 0.0, - "learning_rate": 1.6468619746409972e-05, - "loss": 0.9088, + "learning_rate": 1.647858928527454e-05, + "loss": 0.9135, "step": 10495 }, { - "epoch": 0.2978433598183882, + "epoch": 0.29742979398679475, "grad_norm": 0.0, - "learning_rate": 1.646791882363207e-05, - "loss": 0.9492, + "learning_rate": 1.6477890122531433e-05, + "loss": 1.0013, "step": 10496 }, { - "epoch": 0.2978717366628831, + "epoch": 0.2974581314290572, "grad_norm": 0.0, - "learning_rate": 1.6467217846219372e-05, - "loss": 1.0526, + "learning_rate": 1.647719090522266e-05, + "loss": 0.9521, "step": 10497 }, { - "epoch": 0.297900113507378, + "epoch": 0.2974864688713197, "grad_norm": 0.0, - "learning_rate": 1.6466516814177792e-05, - "loss": 0.8661, + "learning_rate": 1.6476491633354096e-05, + "loss": 0.8983, "step": 10498 }, { - "epoch": 0.29792849035187285, + "epoch": 0.2975148063135821, "grad_norm": 0.0, - "learning_rate": 1.6465815727513253e-05, - "loss": 1.0073, + "learning_rate": 1.647579230693164e-05, + "loss": 0.9568, "step": 10499 }, { - "epoch": 0.29795686719636777, + "epoch": 0.2975431437558446, "grad_norm": 0.0, - "learning_rate": 1.646511458623168e-05, - "loss": 0.8922, + "learning_rate": 1.6475092925961177e-05, + "loss": 0.9736, "step": 10500 }, { - "epoch": 0.29798524404086263, + "epoch": 0.29757148119810706, "grad_norm": 0.0, - "learning_rate": 1.6464413390338993e-05, - "loss": 0.982, + "learning_rate": 1.6474393490448607e-05, + "loss": 0.9756, "step": 10501 }, { - "epoch": 0.29801362088535754, + "epoch": 0.2975998186403695, "grad_norm": 0.0, - "learning_rate": 1.6463712139841114e-05, - "loss": 0.9609, + "learning_rate": 1.6473694000399815e-05, + "loss": 0.9875, "step": 10502 }, { - "epoch": 0.29804199772985246, + "epoch": 0.297628156082632, "grad_norm": 0.0, - "learning_rate": 1.6463010834743965e-05, - "loss": 0.8873, + "learning_rate": 1.6472994455820694e-05, + "loss": 0.8842, "step": 10503 }, { - "epoch": 0.2980703745743473, + "epoch": 0.2976564935248944, "grad_norm": 0.0, - "learning_rate": 1.646230947505348e-05, - "loss": 1.0826, + "learning_rate": 1.6472294856717134e-05, + "loss": 1.0721, "step": 10504 }, { - "epoch": 0.29809875141884223, + "epoch": 0.2976848309671569, "grad_norm": 0.0, - "learning_rate": 1.646160806077557e-05, - "loss": 0.9301, + "learning_rate": 1.6471595203095034e-05, + "loss": 0.9242, "step": 10505 }, { - "epoch": 0.2981271282633371, + "epoch": 0.29771316840941936, "grad_norm": 0.0, - "learning_rate": 1.646090659191617e-05, - "loss": 0.9974, + "learning_rate": 1.6470895494960286e-05, + "loss": 0.9334, "step": 10506 }, { - "epoch": 0.298155505107832, + "epoch": 0.29774150585168185, "grad_norm": 0.0, - "learning_rate": 1.64602050684812e-05, - "loss": 0.9464, + "learning_rate": 1.6470195732318784e-05, + "loss": 0.9336, "step": 10507 }, { - "epoch": 0.2981838819523269, + "epoch": 0.2977698432939443, "grad_norm": 0.0, - "learning_rate": 1.645950349047659e-05, - "loss": 0.8804, + "learning_rate": 1.6469495915176414e-05, + "loss": 1.0491, "step": 10508 }, { - "epoch": 0.2982122587968218, + "epoch": 0.29779818073620673, "grad_norm": 0.0, - "learning_rate": 1.645880185790826e-05, - "loss": 0.906, + "learning_rate": 1.6468796043539082e-05, + "loss": 0.9518, "step": 10509 }, { - "epoch": 0.2982406356413167, + "epoch": 0.2978265181784692, "grad_norm": 0.0, - "learning_rate": 1.6458100170782145e-05, - "loss": 0.9689, + "learning_rate": 1.6468096117412676e-05, + "loss": 0.8937, "step": 10510 }, { - "epoch": 0.29826901248581156, + "epoch": 0.29785485562073166, "grad_norm": 0.0, - "learning_rate": 1.645739842910416e-05, - "loss": 0.9296, + "learning_rate": 1.64673961368031e-05, + "loss": 0.9984, "step": 10511 }, { - "epoch": 0.2982973893303065, + "epoch": 0.29788319306299416, "grad_norm": 0.0, - "learning_rate": 1.645669663288025e-05, - "loss": 1.0331, + "learning_rate": 1.646669610171624e-05, + "loss": 0.9524, "step": 10512 }, { - "epoch": 0.2983257661748014, + "epoch": 0.2979115305052566, "grad_norm": 0.0, - "learning_rate": 1.6455994782116325e-05, - "loss": 1.0778, + "learning_rate": 1.6465996012157996e-05, + "loss": 1.0098, "step": 10513 }, { - "epoch": 0.29835414301929625, + "epoch": 0.29793986794751903, "grad_norm": 0.0, - "learning_rate": 1.6455292876818325e-05, - "loss": 1.0017, + "learning_rate": 1.646529586813427e-05, + "loss": 0.9491, "step": 10514 }, { - "epoch": 0.29838251986379116, + "epoch": 0.2979682053897815, "grad_norm": 0.0, - "learning_rate": 1.6454590916992176e-05, - "loss": 0.9957, + "learning_rate": 1.6464595669650954e-05, + "loss": 0.977, "step": 10515 }, { - "epoch": 0.298410896708286, + "epoch": 0.29799654283204396, "grad_norm": 0.0, - "learning_rate": 1.6453888902643807e-05, - "loss": 1.0057, + "learning_rate": 1.6463895416713952e-05, + "loss": 1.0444, "step": 10516 }, { - "epoch": 0.29843927355278094, + "epoch": 0.29802488027430646, "grad_norm": 0.0, - "learning_rate": 1.645318683377915e-05, - "loss": 1.0489, + "learning_rate": 1.6463195109329156e-05, + "loss": 0.9176, "step": 10517 }, { - "epoch": 0.2984676503972758, + "epoch": 0.2980532177165689, "grad_norm": 0.0, - "learning_rate": 1.6452484710404132e-05, - "loss": 0.9353, + "learning_rate": 1.6462494747502467e-05, + "loss": 1.0066, "step": 10518 }, { - "epoch": 0.2984960272417707, + "epoch": 0.2980815551588314, "grad_norm": 0.0, - "learning_rate": 1.6451782532524686e-05, - "loss": 0.9721, + "learning_rate": 1.6461794331239785e-05, + "loss": 0.8252, "step": 10519 }, { - "epoch": 0.2985244040862656, + "epoch": 0.29810989260109383, "grad_norm": 0.0, - "learning_rate": 1.6451080300146746e-05, - "loss": 0.9506, + "learning_rate": 1.646109386054701e-05, + "loss": 0.8702, "step": 10520 }, { - "epoch": 0.2985527809307605, + "epoch": 0.29813823004335627, "grad_norm": 0.0, - "learning_rate": 1.6450378013276233e-05, - "loss": 0.7543, + "learning_rate": 1.646039333543004e-05, + "loss": 1.067, "step": 10521 }, { - "epoch": 0.2985811577752554, + "epoch": 0.29816656748561876, "grad_norm": 0.0, - "learning_rate": 1.644967567191909e-05, - "loss": 0.9481, + "learning_rate": 1.645969275589478e-05, + "loss": 0.8951, "step": 10522 }, { - "epoch": 0.29860953461975026, + "epoch": 0.2981949049278812, "grad_norm": 0.0, - "learning_rate": 1.6448973276081252e-05, - "loss": 0.8884, + "learning_rate": 1.645899212194713e-05, + "loss": 0.8654, "step": 10523 }, { - "epoch": 0.2986379114642452, + "epoch": 0.2982232423701437, "grad_norm": 0.0, - "learning_rate": 1.6448270825768644e-05, - "loss": 0.9091, + "learning_rate": 1.645829143359299e-05, + "loss": 0.9075, "step": 10524 }, { - "epoch": 0.2986662883087401, + "epoch": 0.29825157981240613, "grad_norm": 0.0, - "learning_rate": 1.64475683209872e-05, - "loss": 1.0452, + "learning_rate": 1.645759069083826e-05, + "loss": 0.9391, "step": 10525 }, { - "epoch": 0.29869466515323495, + "epoch": 0.29827991725466857, "grad_norm": 0.0, - "learning_rate": 1.644686576174286e-05, - "loss": 0.9901, + "learning_rate": 1.6456889893688855e-05, + "loss": 0.9211, "step": 10526 }, { - "epoch": 0.29872304199772987, + "epoch": 0.29830825469693106, "grad_norm": 0.0, - "learning_rate": 1.644616314804155e-05, - "loss": 0.9002, + "learning_rate": 1.645618904215066e-05, + "loss": 0.9199, "step": 10527 }, { - "epoch": 0.2987514188422247, + "epoch": 0.2983365921391935, "grad_norm": 0.0, - "learning_rate": 1.6445460479889214e-05, - "loss": 0.9273, + "learning_rate": 1.6455488136229592e-05, + "loss": 1.0012, "step": 10528 }, { - "epoch": 0.29877979568671964, + "epoch": 0.298364929581456, "grad_norm": 0.0, - "learning_rate": 1.6444757757291783e-05, - "loss": 0.8894, + "learning_rate": 1.6454787175931547e-05, + "loss": 1.0029, "step": 10529 }, { - "epoch": 0.29880817253121456, + "epoch": 0.29839326702371843, "grad_norm": 0.0, - "learning_rate": 1.6444054980255192e-05, - "loss": 0.9747, + "learning_rate": 1.6454086161262436e-05, + "loss": 0.9487, "step": 10530 }, { - "epoch": 0.2988365493757094, + "epoch": 0.2984216044659809, "grad_norm": 0.0, - "learning_rate": 1.644335214878538e-05, - "loss": 0.8592, + "learning_rate": 1.645338509222816e-05, + "loss": 1.0363, "step": 10531 }, { - "epoch": 0.29886492622020433, + "epoch": 0.29844994190824337, "grad_norm": 0.0, - "learning_rate": 1.644264926288828e-05, - "loss": 0.8994, + "learning_rate": 1.645268396883462e-05, + "loss": 0.9528, "step": 10532 }, { - "epoch": 0.2988933030646992, + "epoch": 0.2984782793505058, "grad_norm": 0.0, - "learning_rate": 1.644194632256984e-05, - "loss": 0.9432, + "learning_rate": 1.645198279108773e-05, + "loss": 0.9228, "step": 10533 }, { - "epoch": 0.2989216799091941, + "epoch": 0.2985066167927683, "grad_norm": 0.0, - "learning_rate": 1.6441243327835983e-05, - "loss": 1.0289, + "learning_rate": 1.6451281558993394e-05, + "loss": 0.9247, "step": 10534 }, { - "epoch": 0.29895005675368896, + "epoch": 0.29853495423503074, "grad_norm": 0.0, - "learning_rate": 1.6440540278692656e-05, - "loss": 0.8501, + "learning_rate": 1.6450580272557516e-05, + "loss": 0.9135, "step": 10535 }, { - "epoch": 0.2989784335981839, + "epoch": 0.29856329167729323, "grad_norm": 0.0, - "learning_rate": 1.6439837175145793e-05, - "loss": 0.9233, + "learning_rate": 1.6449878931786007e-05, + "loss": 1.0192, "step": 10536 }, { - "epoch": 0.2990068104426788, + "epoch": 0.29859162911955567, "grad_norm": 0.0, - "learning_rate": 1.643913401720134e-05, - "loss": 0.9986, + "learning_rate": 1.644917753668477e-05, + "loss": 0.9124, "step": 10537 }, { - "epoch": 0.29903518728717365, + "epoch": 0.2986199665618181, "grad_norm": 0.0, - "learning_rate": 1.6438430804865232e-05, - "loss": 0.9184, + "learning_rate": 1.644847608725972e-05, + "loss": 0.882, "step": 10538 }, { - "epoch": 0.29906356413166857, + "epoch": 0.2986483040040806, "grad_norm": 0.0, - "learning_rate": 1.643772753814341e-05, - "loss": 0.7796, + "learning_rate": 1.6447774583516756e-05, + "loss": 1.0104, "step": 10539 }, { - "epoch": 0.29909194097616343, + "epoch": 0.29867664144634304, "grad_norm": 0.0, - "learning_rate": 1.6437024217041813e-05, - "loss": 0.8943, + "learning_rate": 1.6447073025461797e-05, + "loss": 0.9783, "step": 10540 }, { - "epoch": 0.29912031782065834, + "epoch": 0.29870497888860553, "grad_norm": 0.0, - "learning_rate": 1.643632084156638e-05, - "loss": 0.9514, + "learning_rate": 1.6446371413100746e-05, + "loss": 0.8413, "step": 10541 }, { - "epoch": 0.29914869466515326, + "epoch": 0.29873331633086797, "grad_norm": 0.0, - "learning_rate": 1.643561741172306e-05, - "loss": 0.9556, + "learning_rate": 1.6445669746439514e-05, + "loss": 0.8907, "step": 10542 }, { - "epoch": 0.2991770715096481, + "epoch": 0.29876165377313046, "grad_norm": 0.0, - "learning_rate": 1.6434913927517787e-05, - "loss": 1.0236, + "learning_rate": 1.6444968025484015e-05, + "loss": 0.8365, "step": 10543 }, { - "epoch": 0.29920544835414303, + "epoch": 0.2987899912153929, "grad_norm": 0.0, - "learning_rate": 1.643421038895651e-05, - "loss": 0.9366, + "learning_rate": 1.644426625024015e-05, + "loss": 0.8401, "step": 10544 }, { - "epoch": 0.2992338251986379, + "epoch": 0.29881832865765534, "grad_norm": 0.0, - "learning_rate": 1.6433506796045166e-05, - "loss": 0.9635, + "learning_rate": 1.6443564420713846e-05, + "loss": 0.8677, "step": 10545 }, { - "epoch": 0.2992622020431328, + "epoch": 0.29884666609991783, "grad_norm": 0.0, - "learning_rate": 1.6432803148789704e-05, - "loss": 0.9949, + "learning_rate": 1.6442862536911e-05, + "loss": 0.9232, "step": 10546 }, { - "epoch": 0.2992905788876277, + "epoch": 0.2988750035421803, "grad_norm": 0.0, - "learning_rate": 1.643209944719606e-05, - "loss": 0.9774, + "learning_rate": 1.6442160598837532e-05, + "loss": 0.9879, "step": 10547 }, { - "epoch": 0.2993189557321226, + "epoch": 0.29890334098444277, "grad_norm": 0.0, - "learning_rate": 1.6431395691270188e-05, - "loss": 0.9965, + "learning_rate": 1.6441458606499355e-05, + "loss": 1.0066, "step": 10548 }, { - "epoch": 0.2993473325766175, + "epoch": 0.2989316784267052, "grad_norm": 0.0, - "learning_rate": 1.6430691881018024e-05, - "loss": 0.9183, + "learning_rate": 1.644075655990238e-05, + "loss": 0.9769, "step": 10549 }, { - "epoch": 0.29937570942111236, + "epoch": 0.29896001586896764, "grad_norm": 0.0, - "learning_rate": 1.6429988016445518e-05, - "loss": 0.9218, + "learning_rate": 1.644005445905252e-05, + "loss": 0.803, "step": 10550 }, { - "epoch": 0.29940408626560727, + "epoch": 0.29898835331123014, "grad_norm": 0.0, - "learning_rate": 1.6429284097558614e-05, - "loss": 0.8699, + "learning_rate": 1.643935230395569e-05, + "loss": 0.9092, "step": 10551 }, { - "epoch": 0.29943246311010213, + "epoch": 0.2990166907534926, "grad_norm": 0.0, - "learning_rate": 1.6428580124363257e-05, - "loss": 0.9561, + "learning_rate": 1.6438650094617804e-05, + "loss": 0.9991, "step": 10552 }, { - "epoch": 0.29946083995459705, + "epoch": 0.29904502819575507, "grad_norm": 0.0, - "learning_rate": 1.6427876096865394e-05, - "loss": 0.9237, + "learning_rate": 1.6437947831044776e-05, + "loss": 0.9924, "step": 10553 }, { - "epoch": 0.29948921679909196, + "epoch": 0.2990733656380175, "grad_norm": 0.0, - "learning_rate": 1.6427172015070977e-05, - "loss": 0.9894, + "learning_rate": 1.6437245513242523e-05, + "loss": 0.965, "step": 10554 }, { - "epoch": 0.2995175936435868, + "epoch": 0.29910170308028, "grad_norm": 0.0, - "learning_rate": 1.6426467878985946e-05, - "loss": 0.8495, + "learning_rate": 1.6436543141216962e-05, + "loss": 0.8928, "step": 10555 }, { - "epoch": 0.29954597048808174, + "epoch": 0.29913004052254244, "grad_norm": 0.0, - "learning_rate": 1.642576368861625e-05, - "loss": 0.943, + "learning_rate": 1.6435840714974008e-05, + "loss": 0.8861, "step": 10556 }, { - "epoch": 0.2995743473325766, + "epoch": 0.2991583779648049, "grad_norm": 0.0, - "learning_rate": 1.6425059443967844e-05, - "loss": 0.9335, + "learning_rate": 1.643513823451958e-05, + "loss": 1.0424, "step": 10557 }, { - "epoch": 0.2996027241770715, + "epoch": 0.29918671540706737, "grad_norm": 0.0, - "learning_rate": 1.642435514504667e-05, - "loss": 0.9211, + "learning_rate": 1.643443569985959e-05, + "loss": 0.9012, "step": 10558 }, { - "epoch": 0.2996311010215664, + "epoch": 0.2992150528493298, "grad_norm": 0.0, - "learning_rate": 1.6423650791858678e-05, - "loss": 0.979, + "learning_rate": 1.6433733110999956e-05, + "loss": 0.9148, "step": 10559 }, { - "epoch": 0.2996594778660613, + "epoch": 0.2992433902915923, "grad_norm": 0.0, - "learning_rate": 1.6422946384409817e-05, - "loss": 0.933, + "learning_rate": 1.6433030467946603e-05, + "loss": 0.9818, "step": 10560 }, { - "epoch": 0.2996878547105562, + "epoch": 0.29927172773385474, "grad_norm": 0.0, - "learning_rate": 1.6422241922706043e-05, - "loss": 0.9383, + "learning_rate": 1.643232777070545e-05, + "loss": 1.0291, "step": 10561 }, { - "epoch": 0.29971623155505106, + "epoch": 0.2993000651761172, "grad_norm": 0.0, - "learning_rate": 1.6421537406753302e-05, - "loss": 0.9873, + "learning_rate": 1.6431625019282402e-05, + "loss": 1.0474, "step": 10562 }, { - "epoch": 0.299744608399546, + "epoch": 0.2993284026183797, "grad_norm": 0.0, - "learning_rate": 1.6420832836557546e-05, - "loss": 0.9335, + "learning_rate": 1.6430922213683393e-05, + "loss": 1.0383, "step": 10563 }, { - "epoch": 0.29977298524404083, + "epoch": 0.2993567400606421, "grad_norm": 0.0, - "learning_rate": 1.6420128212124726e-05, - "loss": 1.1077, + "learning_rate": 1.6430219353914336e-05, + "loss": 1.1289, "step": 10564 }, { - "epoch": 0.29980136208853575, + "epoch": 0.2993850775029046, "grad_norm": 0.0, - "learning_rate": 1.6419423533460795e-05, - "loss": 1.0472, + "learning_rate": 1.6429516439981157e-05, + "loss": 0.9887, "step": 10565 }, { - "epoch": 0.29982973893303067, + "epoch": 0.29941341494516704, "grad_norm": 0.0, - "learning_rate": 1.6418718800571705e-05, - "loss": 0.9636, + "learning_rate": 1.642881347188977e-05, + "loss": 1.0036, "step": 10566 }, { - "epoch": 0.2998581157775255, + "epoch": 0.2994417523874295, "grad_norm": 0.0, - "learning_rate": 1.641801401346341e-05, - "loss": 1.0147, + "learning_rate": 1.64281104496461e-05, + "loss": 0.9639, "step": 10567 }, { - "epoch": 0.29988649262202044, + "epoch": 0.299470089829692, "grad_norm": 0.0, - "learning_rate": 1.6417309172141856e-05, - "loss": 0.9598, + "learning_rate": 1.6427407373256073e-05, + "loss": 0.962, "step": 10568 }, { - "epoch": 0.2999148694665153, + "epoch": 0.2994984272719544, "grad_norm": 0.0, - "learning_rate": 1.6416604276613007e-05, - "loss": 0.9754, + "learning_rate": 1.6426704242725603e-05, + "loss": 1.0018, "step": 10569 }, { - "epoch": 0.2999432463110102, + "epoch": 0.2995267647142169, "grad_norm": 0.0, - "learning_rate": 1.6415899326882815e-05, - "loss": 0.9392, + "learning_rate": 1.6426001058060616e-05, + "loss": 1.0336, "step": 10570 }, { - "epoch": 0.29997162315550513, + "epoch": 0.29955510215647935, "grad_norm": 0.0, - "learning_rate": 1.6415194322957232e-05, - "loss": 0.8689, + "learning_rate": 1.642529781926704e-05, + "loss": 1.0369, "step": 10571 }, { - "epoch": 0.3, + "epoch": 0.29958343959874184, "grad_norm": 0.0, - "learning_rate": 1.6414489264842212e-05, - "loss": 0.9235, + "learning_rate": 1.642459452635079e-05, + "loss": 0.935, "step": 10572 }, { - "epoch": 0.3000283768444949, + "epoch": 0.2996117770410043, "grad_norm": 0.0, - "learning_rate": 1.6413784152543713e-05, - "loss": 1.0757, + "learning_rate": 1.6423891179317796e-05, + "loss": 0.8889, "step": 10573 }, { - "epoch": 0.30005675368898976, + "epoch": 0.2996401144832667, "grad_norm": 0.0, - "learning_rate": 1.6413078986067693e-05, - "loss": 0.8762, + "learning_rate": 1.6423187778173983e-05, + "loss": 1.0221, "step": 10574 }, { - "epoch": 0.3000851305334847, + "epoch": 0.2996684519255292, "grad_norm": 0.0, - "learning_rate": 1.6412373765420105e-05, - "loss": 0.8889, + "learning_rate": 1.642248432292527e-05, + "loss": 0.9488, "step": 10575 }, { - "epoch": 0.3001135073779796, + "epoch": 0.29969678936779165, "grad_norm": 0.0, - "learning_rate": 1.6411668490606907e-05, - "loss": 1.011, + "learning_rate": 1.6421780813577593e-05, + "loss": 0.8737, "step": 10576 }, { - "epoch": 0.30014188422247445, + "epoch": 0.29972512681005414, "grad_norm": 0.0, - "learning_rate": 1.641096316163406e-05, - "loss": 0.9703, + "learning_rate": 1.6421077250136867e-05, + "loss": 1.1403, "step": 10577 }, { - "epoch": 0.30017026106696937, + "epoch": 0.2997534642523166, "grad_norm": 0.0, - "learning_rate": 1.6410257778507518e-05, - "loss": 0.9016, + "learning_rate": 1.6420373632609026e-05, + "loss": 0.9043, "step": 10578 }, { - "epoch": 0.30019863791146423, + "epoch": 0.299781801694579, "grad_norm": 0.0, - "learning_rate": 1.640955234123324e-05, - "loss": 0.8976, + "learning_rate": 1.641966996099999e-05, + "loss": 0.9394, "step": 10579 }, { - "epoch": 0.30022701475595914, + "epoch": 0.2998101391368415, "grad_norm": 0.0, - "learning_rate": 1.6408846849817182e-05, - "loss": 0.9744, + "learning_rate": 1.6418966235315694e-05, + "loss": 0.9214, "step": 10580 }, { - "epoch": 0.300255391600454, + "epoch": 0.29983847657910395, "grad_norm": 0.0, - "learning_rate": 1.6408141304265312e-05, - "loss": 0.8988, + "learning_rate": 1.641826245556206e-05, + "loss": 0.9013, "step": 10581 }, { - "epoch": 0.3002837684449489, + "epoch": 0.29986681402136645, "grad_norm": 0.0, - "learning_rate": 1.6407435704583582e-05, - "loss": 0.9768, + "learning_rate": 1.6417558621745014e-05, + "loss": 0.9654, "step": 10582 }, { - "epoch": 0.30031214528944383, + "epoch": 0.2998951514636289, "grad_norm": 0.0, - "learning_rate": 1.6406730050777954e-05, - "loss": 1.0469, + "learning_rate": 1.641685473387049e-05, + "loss": 0.9369, "step": 10583 }, { - "epoch": 0.3003405221339387, + "epoch": 0.2999234889058914, "grad_norm": 0.0, - "learning_rate": 1.640602434285439e-05, - "loss": 0.9585, + "learning_rate": 1.6416150791944422e-05, + "loss": 0.9447, "step": 10584 }, { - "epoch": 0.3003688989784336, + "epoch": 0.2999518263481538, "grad_norm": 0.0, - "learning_rate": 1.640531858081885e-05, - "loss": 0.8976, + "learning_rate": 1.641544679597273e-05, + "loss": 1.0223, "step": 10585 }, { - "epoch": 0.30039727582292847, + "epoch": 0.29998016379041625, "grad_norm": 0.0, - "learning_rate": 1.6404612764677294e-05, - "loss": 0.9512, + "learning_rate": 1.6414742745961346e-05, + "loss": 0.9688, "step": 10586 }, { - "epoch": 0.3004256526674234, + "epoch": 0.30000850123267875, "grad_norm": 0.0, - "learning_rate": 1.640390689443569e-05, - "loss": 0.9843, + "learning_rate": 1.64140386419162e-05, + "loss": 1.0268, "step": 10587 }, { - "epoch": 0.3004540295119183, + "epoch": 0.3000368386749412, "grad_norm": 0.0, - "learning_rate": 1.6403200970099996e-05, - "loss": 0.8703, + "learning_rate": 1.6413334483843225e-05, + "loss": 1.087, "step": 10588 }, { - "epoch": 0.30048240635641316, + "epoch": 0.3000651761172037, "grad_norm": 0.0, - "learning_rate": 1.6402494991676174e-05, - "loss": 1.1187, + "learning_rate": 1.6412630271748354e-05, + "loss": 0.9288, "step": 10589 }, { - "epoch": 0.30051078320090807, + "epoch": 0.3000935135594661, "grad_norm": 0.0, - "learning_rate": 1.640178895917019e-05, - "loss": 0.9835, + "learning_rate": 1.6411926005637518e-05, + "loss": 0.9505, "step": 10590 }, { - "epoch": 0.30053916004540293, + "epoch": 0.30012185100172856, "grad_norm": 0.0, - "learning_rate": 1.6401082872588004e-05, - "loss": 0.9063, + "learning_rate": 1.641122168551665e-05, + "loss": 0.956, "step": 10591 }, { - "epoch": 0.30056753688989785, + "epoch": 0.30015018844399105, "grad_norm": 0.0, - "learning_rate": 1.6400376731935587e-05, - "loss": 0.9394, + "learning_rate": 1.6410517311391674e-05, + "loss": 1.0135, "step": 10592 }, { - "epoch": 0.30059591373439276, + "epoch": 0.3001785258862535, "grad_norm": 0.0, - "learning_rate": 1.6399670537218896e-05, - "loss": 0.9068, + "learning_rate": 1.6409812883268535e-05, + "loss": 0.9538, "step": 10593 }, { - "epoch": 0.3006242905788876, + "epoch": 0.300206863328516, "grad_norm": 0.0, - "learning_rate": 1.6398964288443902e-05, - "loss": 0.9621, + "learning_rate": 1.6409108401153164e-05, + "loss": 0.8882, "step": 10594 }, { - "epoch": 0.30065266742338254, + "epoch": 0.3002352007707784, "grad_norm": 0.0, - "learning_rate": 1.639825798561657e-05, - "loss": 0.9208, + "learning_rate": 1.640840386505149e-05, + "loss": 0.9035, "step": 10595 }, { - "epoch": 0.3006810442678774, + "epoch": 0.3002635382130409, "grad_norm": 0.0, - "learning_rate": 1.6397551628742866e-05, - "loss": 0.9844, + "learning_rate": 1.640769927496945e-05, + "loss": 0.955, "step": 10596 }, { - "epoch": 0.3007094211123723, + "epoch": 0.30029187565530335, "grad_norm": 0.0, - "learning_rate": 1.6396845217828758e-05, - "loss": 0.8936, + "learning_rate": 1.640699463091298e-05, + "loss": 0.9035, "step": 10597 }, { - "epoch": 0.30073779795686717, + "epoch": 0.3003202130975658, "grad_norm": 0.0, - "learning_rate": 1.6396138752880205e-05, - "loss": 1.0128, + "learning_rate": 1.6406289932888016e-05, + "loss": 0.8732, "step": 10598 }, { - "epoch": 0.3007661748013621, + "epoch": 0.3003485505398283, "grad_norm": 0.0, - "learning_rate": 1.639543223390318e-05, - "loss": 0.9504, + "learning_rate": 1.640558518090049e-05, + "loss": 0.94, "step": 10599 }, { - "epoch": 0.300794551645857, + "epoch": 0.3003768879820907, "grad_norm": 0.0, - "learning_rate": 1.639472566090366e-05, - "loss": 0.9443, + "learning_rate": 1.6404880374956347e-05, + "loss": 0.97, "step": 10600 }, { - "epoch": 0.30082292849035186, + "epoch": 0.3004052254243532, "grad_norm": 0.0, - "learning_rate": 1.6394019033887597e-05, - "loss": 0.8777, + "learning_rate": 1.6404175515061514e-05, + "loss": 0.8973, "step": 10601 }, { - "epoch": 0.3008513053348468, + "epoch": 0.30043356286661566, "grad_norm": 0.0, - "learning_rate": 1.639331235286097e-05, - "loss": 0.9083, + "learning_rate": 1.6403470601221934e-05, + "loss": 0.997, "step": 10602 }, { - "epoch": 0.30087968217934163, + "epoch": 0.3004619003088781, "grad_norm": 0.0, - "learning_rate": 1.6392605617829745e-05, - "loss": 1.0005, + "learning_rate": 1.6402765633443546e-05, + "loss": 0.9746, "step": 10603 }, { - "epoch": 0.30090805902383655, + "epoch": 0.3004902377511406, "grad_norm": 0.0, - "learning_rate": 1.6391898828799895e-05, - "loss": 0.9467, + "learning_rate": 1.640206061173228e-05, + "loss": 1.0455, "step": 10604 }, { - "epoch": 0.30093643586833146, + "epoch": 0.300518575193403, "grad_norm": 0.0, - "learning_rate": 1.639119198577739e-05, - "loss": 0.9923, + "learning_rate": 1.640135553609408e-05, + "loss": 1.017, "step": 10605 }, { - "epoch": 0.3009648127128263, + "epoch": 0.3005469126356655, "grad_norm": 0.0, - "learning_rate": 1.6390485088768193e-05, - "loss": 0.8663, + "learning_rate": 1.640065040653489e-05, + "loss": 0.985, "step": 10606 }, { - "epoch": 0.30099318955732124, + "epoch": 0.30057525007792796, "grad_norm": 0.0, - "learning_rate": 1.6389778137778284e-05, - "loss": 0.9248, + "learning_rate": 1.639994522306064e-05, + "loss": 0.9168, "step": 10607 }, { - "epoch": 0.3010215664018161, + "epoch": 0.30060358752019045, "grad_norm": 0.0, - "learning_rate": 1.6389071132813634e-05, - "loss": 0.9839, + "learning_rate": 1.639923998567728e-05, + "loss": 1.0319, "step": 10608 }, { - "epoch": 0.301049943246311, + "epoch": 0.3006319249624529, "grad_norm": 0.0, - "learning_rate": 1.638836407388021e-05, - "loss": 0.9924, + "learning_rate": 1.639853469439074e-05, + "loss": 1.0194, "step": 10609 }, { - "epoch": 0.30107832009080593, + "epoch": 0.30066026240471533, "grad_norm": 0.0, - "learning_rate": 1.638765696098399e-05, - "loss": 0.9823, + "learning_rate": 1.6397829349206968e-05, + "loss": 1.0215, "step": 10610 }, { - "epoch": 0.3011066969353008, + "epoch": 0.3006885998469778, "grad_norm": 0.0, - "learning_rate": 1.6386949794130943e-05, - "loss": 0.9968, + "learning_rate": 1.6397123950131906e-05, + "loss": 0.9724, "step": 10611 }, { - "epoch": 0.3011350737797957, + "epoch": 0.30071693728924026, "grad_norm": 0.0, - "learning_rate": 1.6386242573327048e-05, - "loss": 0.9883, + "learning_rate": 1.6396418497171488e-05, + "loss": 0.9183, "step": 10612 }, { - "epoch": 0.30116345062429056, + "epoch": 0.30074527473150275, "grad_norm": 0.0, - "learning_rate": 1.6385535298578267e-05, - "loss": 0.9621, + "learning_rate": 1.639571299033167e-05, + "loss": 0.9338, "step": 10613 }, { - "epoch": 0.3011918274687855, + "epoch": 0.3007736121737652, "grad_norm": 0.0, - "learning_rate": 1.6384827969890587e-05, - "loss": 0.882, + "learning_rate": 1.639500742961838e-05, + "loss": 0.9458, "step": 10614 }, { - "epoch": 0.30122020431328034, + "epoch": 0.30080194961602763, "grad_norm": 0.0, - "learning_rate": 1.6384120587269978e-05, - "loss": 0.9125, + "learning_rate": 1.639430181503757e-05, + "loss": 0.9967, "step": 10615 }, { - "epoch": 0.30124858115777525, + "epoch": 0.3008302870582901, "grad_norm": 0.0, - "learning_rate": 1.6383413150722417e-05, - "loss": 1.0491, + "learning_rate": 1.639359614659518e-05, + "loss": 0.9284, "step": 10616 }, { - "epoch": 0.30127695800227017, + "epoch": 0.30085862450055256, "grad_norm": 0.0, - "learning_rate": 1.6382705660253873e-05, - "loss": 1.0549, + "learning_rate": 1.639289042429716e-05, + "loss": 0.9451, "step": 10617 }, { - "epoch": 0.301305334846765, + "epoch": 0.30088696194281506, "grad_norm": 0.0, - "learning_rate": 1.638199811587033e-05, - "loss": 1.0292, + "learning_rate": 1.6392184648149443e-05, + "loss": 1.0424, "step": 10618 }, { - "epoch": 0.30133371169125994, + "epoch": 0.3009152993850775, "grad_norm": 0.0, - "learning_rate": 1.6381290517577766e-05, - "loss": 0.9708, + "learning_rate": 1.6391478818157987e-05, + "loss": 0.929, "step": 10619 }, { - "epoch": 0.3013620885357548, + "epoch": 0.30094363682734, "grad_norm": 0.0, - "learning_rate": 1.638058286538215e-05, - "loss": 0.8629, + "learning_rate": 1.6390772934328728e-05, + "loss": 0.9684, "step": 10620 }, { - "epoch": 0.3013904653802497, + "epoch": 0.3009719742696024, "grad_norm": 0.0, - "learning_rate": 1.6379875159289466e-05, - "loss": 0.9368, + "learning_rate": 1.6390066996667617e-05, + "loss": 0.9247, "step": 10621 }, { - "epoch": 0.30141884222474463, + "epoch": 0.30100031171186487, "grad_norm": 0.0, - "learning_rate": 1.6379167399305687e-05, - "loss": 0.9712, + "learning_rate": 1.63893610051806e-05, + "loss": 0.9508, "step": 10622 }, { - "epoch": 0.3014472190692395, + "epoch": 0.30102864915412736, "grad_norm": 0.0, - "learning_rate": 1.6378459585436793e-05, - "loss": 0.8391, + "learning_rate": 1.638865495987362e-05, + "loss": 1.031, "step": 10623 }, { - "epoch": 0.3014755959137344, + "epoch": 0.3010569865963898, "grad_norm": 0.0, - "learning_rate": 1.637775171768877e-05, - "loss": 0.9541, + "learning_rate": 1.638794886075263e-05, + "loss": 0.9285, "step": 10624 }, { - "epoch": 0.30150397275822927, + "epoch": 0.3010853240386523, "grad_norm": 0.0, - "learning_rate": 1.6377043796067585e-05, - "loss": 0.9861, + "learning_rate": 1.638724270782357e-05, + "loss": 1.0356, "step": 10625 }, { - "epoch": 0.3015323496027242, + "epoch": 0.30111366148091473, "grad_norm": 0.0, - "learning_rate": 1.637633582057923e-05, - "loss": 0.9559, + "learning_rate": 1.6386536501092398e-05, + "loss": 0.8371, "step": 10626 }, { - "epoch": 0.3015607264472191, + "epoch": 0.30114199892317717, "grad_norm": 0.0, - "learning_rate": 1.6375627791229675e-05, - "loss": 0.9572, + "learning_rate": 1.6385830240565052e-05, + "loss": 0.8682, "step": 10627 }, { - "epoch": 0.30158910329171396, + "epoch": 0.30117033636543966, "grad_norm": 0.0, - "learning_rate": 1.6374919708024907e-05, - "loss": 1.0222, + "learning_rate": 1.638512392624749e-05, + "loss": 0.9183, "step": 10628 }, { - "epoch": 0.30161748013620887, + "epoch": 0.3011986738077021, "grad_norm": 0.0, - "learning_rate": 1.6374211570970907e-05, - "loss": 0.8411, + "learning_rate": 1.6384417558145654e-05, + "loss": 0.8079, "step": 10629 }, { - "epoch": 0.30164585698070373, + "epoch": 0.3012270112499646, "grad_norm": 0.0, - "learning_rate": 1.6373503380073655e-05, - "loss": 0.9449, + "learning_rate": 1.6383711136265504e-05, + "loss": 0.9611, "step": 10630 }, { - "epoch": 0.30167423382519865, + "epoch": 0.30125534869222703, "grad_norm": 0.0, - "learning_rate": 1.637279513533913e-05, - "loss": 1.0096, + "learning_rate": 1.6383004660612983e-05, + "loss": 0.9403, "step": 10631 }, { - "epoch": 0.3017026106696935, + "epoch": 0.3012836861344895, "grad_norm": 0.0, - "learning_rate": 1.6372086836773325e-05, - "loss": 1.0592, + "learning_rate": 1.638229813119404e-05, + "loss": 0.9599, "step": 10632 }, { - "epoch": 0.3017309875141884, + "epoch": 0.30131202357675196, "grad_norm": 0.0, - "learning_rate": 1.6371378484382212e-05, - "loss": 0.8973, + "learning_rate": 1.638159154801463e-05, + "loss": 0.9246, "step": 10633 }, { - "epoch": 0.30175936435868334, + "epoch": 0.3013403610190144, "grad_norm": 0.0, - "learning_rate": 1.637067007817178e-05, - "loss": 0.8808, + "learning_rate": 1.638088491108071e-05, + "loss": 0.9595, "step": 10634 }, { - "epoch": 0.3017877412031782, + "epoch": 0.3013686984612769, "grad_norm": 0.0, - "learning_rate": 1.636996161814801e-05, - "loss": 0.9849, + "learning_rate": 1.6380178220398226e-05, + "loss": 1.0732, "step": 10635 }, { - "epoch": 0.3018161180476731, + "epoch": 0.30139703590353933, "grad_norm": 0.0, - "learning_rate": 1.6369253104316885e-05, - "loss": 1.077, + "learning_rate": 1.6379471475973128e-05, + "loss": 0.9871, "step": 10636 }, { - "epoch": 0.30184449489216797, + "epoch": 0.30142537334580183, "grad_norm": 0.0, - "learning_rate": 1.63685445366844e-05, - "loss": 1.0875, + "learning_rate": 1.6378764677811375e-05, + "loss": 0.9574, "step": 10637 }, { - "epoch": 0.3018728717366629, + "epoch": 0.30145371078806427, "grad_norm": 0.0, - "learning_rate": 1.6367835915256523e-05, - "loss": 0.9701, + "learning_rate": 1.6378057825918917e-05, + "loss": 0.9919, "step": 10638 }, { - "epoch": 0.3019012485811578, + "epoch": 0.3014820482303267, "grad_norm": 0.0, - "learning_rate": 1.636712724003926e-05, - "loss": 0.8933, + "learning_rate": 1.637735092030171e-05, + "loss": 0.9596, "step": 10639 }, { - "epoch": 0.30192962542565266, + "epoch": 0.3015103856725892, "grad_norm": 0.0, - "learning_rate": 1.636641851103858e-05, - "loss": 0.9684, + "learning_rate": 1.6376643960965712e-05, + "loss": 0.9789, "step": 10640 }, { - "epoch": 0.3019580022701476, + "epoch": 0.30153872311485164, "grad_norm": 0.0, - "learning_rate": 1.6365709728260483e-05, - "loss": 0.9907, + "learning_rate": 1.6375936947916867e-05, + "loss": 1.0262, "step": 10641 }, { - "epoch": 0.30198637911464243, + "epoch": 0.30156706055711413, "grad_norm": 0.0, - "learning_rate": 1.6365000891710944e-05, - "loss": 0.9632, + "learning_rate": 1.637522988116114e-05, + "loss": 0.9958, "step": 10642 }, { - "epoch": 0.30201475595913735, + "epoch": 0.30159539799937657, "grad_norm": 0.0, - "learning_rate": 1.6364292001395958e-05, - "loss": 0.9077, + "learning_rate": 1.6374522760704485e-05, + "loss": 0.9812, "step": 10643 }, { - "epoch": 0.30204313280363226, + "epoch": 0.30162373544163906, "grad_norm": 0.0, - "learning_rate": 1.6363583057321512e-05, - "loss": 1.0091, + "learning_rate": 1.637381558655286e-05, + "loss": 0.9462, "step": 10644 }, { - "epoch": 0.3020715096481271, + "epoch": 0.3016520728839015, "grad_norm": 0.0, - "learning_rate": 1.6362874059493593e-05, - "loss": 0.9235, + "learning_rate": 1.6373108358712215e-05, + "loss": 0.9177, "step": 10645 }, { - "epoch": 0.30209988649262204, + "epoch": 0.30168041032616394, "grad_norm": 0.0, - "learning_rate": 1.636216500791819e-05, - "loss": 0.9545, + "learning_rate": 1.6372401077188515e-05, + "loss": 0.9516, "step": 10646 }, { - "epoch": 0.3021282633371169, + "epoch": 0.30170874776842643, "grad_norm": 0.0, - "learning_rate": 1.6361455902601296e-05, - "loss": 0.9405, + "learning_rate": 1.6371693741987714e-05, + "loss": 0.9693, "step": 10647 }, { - "epoch": 0.3021566401816118, + "epoch": 0.30173708521068887, "grad_norm": 0.0, - "learning_rate": 1.6360746743548896e-05, - "loss": 0.8851, + "learning_rate": 1.637098635311577e-05, + "loss": 0.9492, "step": 10648 }, { - "epoch": 0.3021850170261067, + "epoch": 0.30176542265295137, "grad_norm": 0.0, - "learning_rate": 1.6360037530766983e-05, - "loss": 0.9026, + "learning_rate": 1.6370278910578644e-05, + "loss": 0.9911, "step": 10649 }, { - "epoch": 0.3022133938706016, + "epoch": 0.3017937600952138, "grad_norm": 0.0, - "learning_rate": 1.6359328264261548e-05, - "loss": 0.9704, + "learning_rate": 1.6369571414382288e-05, + "loss": 1.0786, "step": 10650 }, { - "epoch": 0.3022417707150965, + "epoch": 0.30182209753747624, "grad_norm": 0.0, - "learning_rate": 1.6358618944038578e-05, - "loss": 0.9035, + "learning_rate": 1.636886386453267e-05, + "loss": 0.9583, "step": 10651 }, { - "epoch": 0.30227014755959136, + "epoch": 0.30185043497973874, "grad_norm": 0.0, - "learning_rate": 1.635790957010407e-05, - "loss": 1.044, + "learning_rate": 1.6368156261035747e-05, + "loss": 0.9339, "step": 10652 }, { - "epoch": 0.3022985244040863, + "epoch": 0.3018787724220012, "grad_norm": 0.0, - "learning_rate": 1.6357200142464013e-05, - "loss": 0.9728, + "learning_rate": 1.636744860389748e-05, + "loss": 1.0394, "step": 10653 }, { - "epoch": 0.30232690124858114, + "epoch": 0.30190710986426367, "grad_norm": 0.0, - "learning_rate": 1.6356490661124402e-05, - "loss": 1.0575, + "learning_rate": 1.6366740893123828e-05, + "loss": 0.9786, "step": 10654 }, { - "epoch": 0.30235527809307605, + "epoch": 0.3019354473065261, "grad_norm": 0.0, - "learning_rate": 1.6355781126091227e-05, - "loss": 0.9234, + "learning_rate": 1.636603312872075e-05, + "loss": 0.9535, "step": 10655 }, { - "epoch": 0.30238365493757097, + "epoch": 0.3019637847487886, "grad_norm": 0.0, - "learning_rate": 1.6355071537370484e-05, - "loss": 0.9292, + "learning_rate": 1.6365325310694215e-05, + "loss": 0.9673, "step": 10656 }, { - "epoch": 0.3024120317820658, + "epoch": 0.30199212219105104, "grad_norm": 0.0, - "learning_rate": 1.635436189496817e-05, - "loss": 0.9547, + "learning_rate": 1.636461743905018e-05, + "loss": 0.9981, "step": 10657 }, { - "epoch": 0.30244040862656074, + "epoch": 0.3020204596333135, "grad_norm": 0.0, - "learning_rate": 1.635365219889027e-05, - "loss": 0.9179, + "learning_rate": 1.6363909513794606e-05, + "loss": 1.0319, "step": 10658 }, { - "epoch": 0.3024687854710556, + "epoch": 0.30204879707557597, "grad_norm": 0.0, - "learning_rate": 1.6352942449142787e-05, - "loss": 0.8601, + "learning_rate": 1.6363201534933465e-05, + "loss": 1.0042, "step": 10659 }, { - "epoch": 0.3024971623155505, + "epoch": 0.3020771345178384, "grad_norm": 0.0, - "learning_rate": 1.635223264573171e-05, - "loss": 0.9018, + "learning_rate": 1.6362493502472713e-05, + "loss": 1.016, "step": 10660 }, { - "epoch": 0.3025255391600454, + "epoch": 0.3021054719601009, "grad_norm": 0.0, - "learning_rate": 1.6351522788663044e-05, - "loss": 0.9764, + "learning_rate": 1.6361785416418313e-05, + "loss": 1.0329, "step": 10661 }, { - "epoch": 0.3025539160045403, + "epoch": 0.30213380940236334, "grad_norm": 0.0, - "learning_rate": 1.6350812877942778e-05, - "loss": 1.0361, + "learning_rate": 1.636107727677623e-05, + "loss": 0.8716, "step": 10662 }, { - "epoch": 0.3025822928490352, + "epoch": 0.3021621468446258, "grad_norm": 0.0, - "learning_rate": 1.6350102913576906e-05, - "loss": 0.8914, + "learning_rate": 1.6360369083552433e-05, + "loss": 1.0127, "step": 10663 }, { - "epoch": 0.30261066969353007, + "epoch": 0.3021904842868883, "grad_norm": 0.0, - "learning_rate": 1.6349392895571433e-05, - "loss": 0.8987, + "learning_rate": 1.6359660836752887e-05, + "loss": 1.0841, "step": 10664 }, { - "epoch": 0.302639046538025, + "epoch": 0.3022188217291507, "grad_norm": 0.0, - "learning_rate": 1.6348682823932352e-05, - "loss": 0.9612, + "learning_rate": 1.635895253638356e-05, + "loss": 0.9416, "step": 10665 }, { - "epoch": 0.30266742338251984, + "epoch": 0.3022471591714132, "grad_norm": 0.0, - "learning_rate": 1.634797269866566e-05, - "loss": 1.0013, + "learning_rate": 1.6358244182450408e-05, + "loss": 0.9732, "step": 10666 }, { - "epoch": 0.30269580022701476, + "epoch": 0.30227549661367564, "grad_norm": 0.0, - "learning_rate": 1.6347262519777358e-05, - "loss": 1.0813, + "learning_rate": 1.6357535774959405e-05, + "loss": 0.9536, "step": 10667 }, { - "epoch": 0.30272417707150967, + "epoch": 0.30230383405593814, "grad_norm": 0.0, - "learning_rate": 1.6346552287273446e-05, - "loss": 1.0125, + "learning_rate": 1.635682731391652e-05, + "loss": 0.9446, "step": 10668 }, { - "epoch": 0.30275255391600453, + "epoch": 0.3023321714982006, "grad_norm": 0.0, - "learning_rate": 1.634584200115992e-05, - "loss": 0.9713, + "learning_rate": 1.6356118799327716e-05, + "loss": 0.856, "step": 10669 }, { - "epoch": 0.30278093076049944, + "epoch": 0.302360508940463, "grad_norm": 0.0, - "learning_rate": 1.6345131661442782e-05, - "loss": 0.8082, + "learning_rate": 1.6355410231198964e-05, + "loss": 1.0579, "step": 10670 }, { - "epoch": 0.3028093076049943, + "epoch": 0.3023888463827255, "grad_norm": 0.0, - "learning_rate": 1.6344421268128032e-05, - "loss": 0.939, + "learning_rate": 1.635470160953623e-05, + "loss": 1.0614, "step": 10671 }, { - "epoch": 0.3028376844494892, + "epoch": 0.30241718382498795, "grad_norm": 0.0, - "learning_rate": 1.634371082122167e-05, - "loss": 0.9722, + "learning_rate": 1.6353992934345484e-05, + "loss": 0.8782, "step": 10672 }, { - "epoch": 0.30286606129398413, + "epoch": 0.30244552126725044, "grad_norm": 0.0, - "learning_rate": 1.63430003207297e-05, - "loss": 1.0053, + "learning_rate": 1.63532842056327e-05, + "loss": 0.9037, "step": 10673 }, { - "epoch": 0.302894438138479, + "epoch": 0.3024738587095129, "grad_norm": 0.0, - "learning_rate": 1.6342289766658114e-05, - "loss": 1.0135, + "learning_rate": 1.635257542340384e-05, + "loss": 0.9387, "step": 10674 }, { - "epoch": 0.3029228149829739, + "epoch": 0.3025021961517753, "grad_norm": 0.0, - "learning_rate": 1.6341579159012928e-05, - "loss": 1.0538, + "learning_rate": 1.635186658766488e-05, + "loss": 0.9781, "step": 10675 }, { - "epoch": 0.30295119182746877, + "epoch": 0.3025305335940378, "grad_norm": 0.0, - "learning_rate": 1.6340868497800135e-05, - "loss": 1.0508, + "learning_rate": 1.635115769842179e-05, + "loss": 0.9716, "step": 10676 }, { - "epoch": 0.3029795686719637, + "epoch": 0.30255887103630025, "grad_norm": 0.0, - "learning_rate": 1.634015778302574e-05, - "loss": 1.0195, + "learning_rate": 1.635044875568054e-05, + "loss": 0.8508, "step": 10677 }, { - "epoch": 0.30300794551645854, + "epoch": 0.30258720847856274, "grad_norm": 0.0, - "learning_rate": 1.633944701469575e-05, - "loss": 0.9279, + "learning_rate": 1.63497397594471e-05, + "loss": 1.0352, "step": 10678 }, { - "epoch": 0.30303632236095346, + "epoch": 0.3026155459208252, "grad_norm": 0.0, - "learning_rate": 1.6338736192816163e-05, - "loss": 0.8671, + "learning_rate": 1.6349030709727444e-05, + "loss": 1.0347, "step": 10679 }, { - "epoch": 0.3030646992054484, + "epoch": 0.3026438833630877, "grad_norm": 0.0, - "learning_rate": 1.6338025317392986e-05, - "loss": 0.9258, + "learning_rate": 1.6348321606527545e-05, + "loss": 0.9804, "step": 10680 }, { - "epoch": 0.30309307604994323, + "epoch": 0.3026722208053501, "grad_norm": 0.0, - "learning_rate": 1.633731438843223e-05, - "loss": 0.9894, + "learning_rate": 1.634761244985338e-05, + "loss": 0.9664, "step": 10681 }, { - "epoch": 0.30312145289443815, + "epoch": 0.30270055824761255, "grad_norm": 0.0, - "learning_rate": 1.633660340593989e-05, - "loss": 1.0419, + "learning_rate": 1.6346903239710913e-05, + "loss": 0.9436, "step": 10682 }, { - "epoch": 0.303149829738933, + "epoch": 0.30272889568987504, "grad_norm": 0.0, - "learning_rate": 1.6335892369921974e-05, - "loss": 0.863, + "learning_rate": 1.6346193976106125e-05, + "loss": 0.9948, "step": 10683 }, { - "epoch": 0.3031782065834279, + "epoch": 0.3027572331321375, "grad_norm": 0.0, - "learning_rate": 1.6335181280384494e-05, - "loss": 0.9607, + "learning_rate": 1.6345484659044987e-05, + "loss": 0.8708, "step": 10684 }, { - "epoch": 0.30320658342792284, + "epoch": 0.3027855705744, "grad_norm": 0.0, - "learning_rate": 1.6334470137333452e-05, - "loss": 0.9632, + "learning_rate": 1.6344775288533477e-05, + "loss": 0.9246, "step": 10685 }, { - "epoch": 0.3032349602724177, + "epoch": 0.3028139080166624, "grad_norm": 0.0, - "learning_rate": 1.6333758940774854e-05, - "loss": 0.7791, + "learning_rate": 1.634406586457757e-05, + "loss": 0.8941, "step": 10686 }, { - "epoch": 0.3032633371169126, + "epoch": 0.30284224545892485, "grad_norm": 0.0, - "learning_rate": 1.6333047690714714e-05, - "loss": 0.8973, + "learning_rate": 1.634335638718324e-05, + "loss": 0.9928, "step": 10687 }, { - "epoch": 0.30329171396140747, + "epoch": 0.30287058290118735, "grad_norm": 0.0, - "learning_rate": 1.6332336387159032e-05, - "loss": 0.8868, + "learning_rate": 1.634264685635646e-05, + "loss": 0.9937, "step": 10688 }, { - "epoch": 0.3033200908059024, + "epoch": 0.3028989203434498, "grad_norm": 0.0, - "learning_rate": 1.633162503011382e-05, - "loss": 0.9203, + "learning_rate": 1.6341937272103213e-05, + "loss": 0.8774, "step": 10689 }, { - "epoch": 0.3033484676503973, + "epoch": 0.3029272577857123, "grad_norm": 0.0, - "learning_rate": 1.633091361958509e-05, - "loss": 1.0095, + "learning_rate": 1.6341227634429472e-05, + "loss": 1.0669, "step": 10690 }, { - "epoch": 0.30337684449489216, + "epoch": 0.3029555952279747, "grad_norm": 0.0, - "learning_rate": 1.6330202155578845e-05, - "loss": 0.9753, + "learning_rate": 1.6340517943341217e-05, + "loss": 1.0079, "step": 10691 }, { - "epoch": 0.3034052213393871, + "epoch": 0.3029839326702372, "grad_norm": 0.0, - "learning_rate": 1.6329490638101096e-05, - "loss": 0.8387, + "learning_rate": 1.6339808198844424e-05, + "loss": 1.0011, "step": 10692 }, { - "epoch": 0.30343359818388194, + "epoch": 0.30301227011249965, "grad_norm": 0.0, - "learning_rate": 1.6328779067157862e-05, - "loss": 0.8518, + "learning_rate": 1.6339098400945074e-05, + "loss": 0.9012, "step": 10693 }, { - "epoch": 0.30346197502837685, + "epoch": 0.3030406075547621, "grad_norm": 0.0, - "learning_rate": 1.632806744275514e-05, - "loss": 0.9065, + "learning_rate": 1.6338388549649146e-05, + "loss": 1.0447, "step": 10694 }, { - "epoch": 0.3034903518728717, + "epoch": 0.3030689449970246, "grad_norm": 0.0, - "learning_rate": 1.632735576489895e-05, - "loss": 0.9462, + "learning_rate": 1.633767864496261e-05, + "loss": 0.9229, "step": 10695 }, { - "epoch": 0.3035187287173666, + "epoch": 0.303097282439287, "grad_norm": 0.0, - "learning_rate": 1.6326644033595303e-05, - "loss": 0.9423, + "learning_rate": 1.633696868689146e-05, + "loss": 1.0243, "step": 10696 }, { - "epoch": 0.30354710556186154, + "epoch": 0.3031256198815495, "grad_norm": 0.0, - "learning_rate": 1.6325932248850206e-05, - "loss": 0.9388, + "learning_rate": 1.633625867544167e-05, + "loss": 0.8755, "step": 10697 }, { - "epoch": 0.3035754824063564, + "epoch": 0.30315395732381195, "grad_norm": 0.0, - "learning_rate": 1.6325220410669676e-05, - "loss": 0.8938, + "learning_rate": 1.6335548610619215e-05, + "loss": 1.0025, "step": 10698 }, { - "epoch": 0.3036038592508513, + "epoch": 0.3031822947660744, "grad_norm": 0.0, - "learning_rate": 1.6324508519059728e-05, - "loss": 1.0059, + "learning_rate": 1.6334838492430084e-05, + "loss": 0.8971, "step": 10699 }, { - "epoch": 0.3036322360953462, + "epoch": 0.3032106322083369, "grad_norm": 0.0, - "learning_rate": 1.6323796574026368e-05, - "loss": 0.9777, + "learning_rate": 1.6334128320880258e-05, + "loss": 0.9954, "step": 10700 }, { - "epoch": 0.3036606129398411, + "epoch": 0.3032389696505993, "grad_norm": 0.0, - "learning_rate": 1.6323084575575617e-05, - "loss": 0.901, + "learning_rate": 1.633341809597572e-05, + "loss": 0.9532, "step": 10701 }, { - "epoch": 0.303688989784336, + "epoch": 0.3032673070928618, "grad_norm": 0.0, - "learning_rate": 1.6322372523713487e-05, - "loss": 0.7912, + "learning_rate": 1.6332707817722446e-05, + "loss": 0.9552, "step": 10702 }, { - "epoch": 0.30371736662883086, + "epoch": 0.30329564453512425, "grad_norm": 0.0, - "learning_rate": 1.632166041844599e-05, - "loss": 0.8844, + "learning_rate": 1.6331997486126415e-05, + "loss": 0.936, "step": 10703 }, { - "epoch": 0.3037457434733258, + "epoch": 0.30332398197738675, "grad_norm": 0.0, - "learning_rate": 1.6320948259779143e-05, - "loss": 0.9914, + "learning_rate": 1.6331287101193625e-05, + "loss": 0.8947, "step": 10704 }, { - "epoch": 0.30377412031782064, + "epoch": 0.3033523194196492, "grad_norm": 0.0, - "learning_rate": 1.632023604771896e-05, - "loss": 1.007, + "learning_rate": 1.6330576662930052e-05, + "loss": 0.9713, "step": 10705 }, { - "epoch": 0.30380249716231555, + "epoch": 0.3033806568619116, "grad_norm": 0.0, - "learning_rate": 1.6319523782271462e-05, - "loss": 1.008, + "learning_rate": 1.632986617134168e-05, + "loss": 0.9193, "step": 10706 }, { - "epoch": 0.30383087400681047, + "epoch": 0.3034089943041741, "grad_norm": 0.0, - "learning_rate": 1.6318811463442666e-05, - "loss": 0.9541, + "learning_rate": 1.6329155626434498e-05, + "loss": 0.9758, "step": 10707 }, { - "epoch": 0.30385925085130533, + "epoch": 0.30343733174643656, "grad_norm": 0.0, - "learning_rate": 1.631809909123858e-05, - "loss": 1.0979, + "learning_rate": 1.6328445028214485e-05, + "loss": 1.0058, "step": 10708 }, { - "epoch": 0.30388762769580024, + "epoch": 0.30346566918869905, "grad_norm": 0.0, - "learning_rate": 1.631738666566523e-05, - "loss": 0.9221, + "learning_rate": 1.632773437668763e-05, + "loss": 0.9764, "step": 10709 }, { - "epoch": 0.3039160045402951, + "epoch": 0.3034940066309615, "grad_norm": 0.0, - "learning_rate": 1.6316674186728628e-05, - "loss": 0.8671, + "learning_rate": 1.632702367185992e-05, + "loss": 0.8968, "step": 10710 }, { - "epoch": 0.30394438138479, + "epoch": 0.3035223440732239, "grad_norm": 0.0, - "learning_rate": 1.6315961654434797e-05, - "loss": 0.9666, + "learning_rate": 1.632631291373734e-05, + "loss": 0.9447, "step": 10711 }, { - "epoch": 0.3039727582292849, + "epoch": 0.3035506815154864, "grad_norm": 0.0, - "learning_rate": 1.6315249068789755e-05, - "loss": 0.9554, + "learning_rate": 1.6325602102325873e-05, + "loss": 1.0292, "step": 10712 }, { - "epoch": 0.3040011350737798, + "epoch": 0.30357901895774886, "grad_norm": 0.0, - "learning_rate": 1.6314536429799516e-05, - "loss": 0.9442, + "learning_rate": 1.6324891237631514e-05, + "loss": 1.0669, "step": 10713 }, { - "epoch": 0.3040295119182747, + "epoch": 0.30360735640001135, "grad_norm": 0.0, - "learning_rate": 1.6313823737470105e-05, - "loss": 1.0348, + "learning_rate": 1.6324180319660247e-05, + "loss": 0.9502, "step": 10714 }, { - "epoch": 0.30405788876276957, + "epoch": 0.3036356938422738, "grad_norm": 0.0, - "learning_rate": 1.6313110991807546e-05, - "loss": 0.8562, + "learning_rate": 1.632346934841806e-05, + "loss": 1.0512, "step": 10715 }, { - "epoch": 0.3040862656072645, + "epoch": 0.3036640312845363, "grad_norm": 0.0, - "learning_rate": 1.631239819281785e-05, - "loss": 0.8571, + "learning_rate": 1.6322758323910943e-05, + "loss": 0.9809, "step": 10716 }, { - "epoch": 0.30411464245175934, + "epoch": 0.3036923687267987, "grad_norm": 0.0, - "learning_rate": 1.6311685340507046e-05, - "loss": 0.9235, + "learning_rate": 1.6322047246144887e-05, + "loss": 0.8369, "step": 10717 }, { - "epoch": 0.30414301929625426, + "epoch": 0.30372070616906116, "grad_norm": 0.0, - "learning_rate": 1.6310972434881152e-05, - "loss": 1.0948, + "learning_rate": 1.6321336115125876e-05, + "loss": 0.9234, "step": 10718 }, { - "epoch": 0.3041713961407492, + "epoch": 0.30374904361132365, "grad_norm": 0.0, - "learning_rate": 1.6310259475946187e-05, - "loss": 0.9343, + "learning_rate": 1.6320624930859905e-05, + "loss": 0.9532, "step": 10719 }, { - "epoch": 0.30419977298524403, + "epoch": 0.3037773810535861, "grad_norm": 0.0, - "learning_rate": 1.630954646370818e-05, - "loss": 1.015, + "learning_rate": 1.6319913693352963e-05, + "loss": 0.9477, "step": 10720 }, { - "epoch": 0.30422814982973895, + "epoch": 0.3038057184958486, "grad_norm": 0.0, - "learning_rate": 1.630883339817315e-05, - "loss": 0.8925, + "learning_rate": 1.631920240261104e-05, + "loss": 1.0042, "step": 10721 }, { - "epoch": 0.3042565266742338, + "epoch": 0.303834055938111, "grad_norm": 0.0, - "learning_rate": 1.6308120279347118e-05, - "loss": 1.0146, + "learning_rate": 1.631849105864013e-05, + "loss": 0.9313, "step": 10722 }, { - "epoch": 0.3042849035187287, + "epoch": 0.30386239338037346, "grad_norm": 0.0, - "learning_rate": 1.630740710723611e-05, - "loss": 1.0412, + "learning_rate": 1.6317779661446223e-05, + "loss": 1.0223, "step": 10723 }, { - "epoch": 0.30431328036322364, + "epoch": 0.30389073082263596, "grad_norm": 0.0, - "learning_rate": 1.6306693881846152e-05, - "loss": 0.9601, + "learning_rate": 1.631706821103531e-05, + "loss": 0.9483, "step": 10724 }, { - "epoch": 0.3043416572077185, + "epoch": 0.3039190682648984, "grad_norm": 0.0, - "learning_rate": 1.630598060318327e-05, - "loss": 0.9308, + "learning_rate": 1.631635670741339e-05, + "loss": 1.0381, "step": 10725 }, { - "epoch": 0.3043700340522134, + "epoch": 0.3039474057071609, "grad_norm": 0.0, - "learning_rate": 1.6305267271253483e-05, - "loss": 0.9736, + "learning_rate": 1.631564515058645e-05, + "loss": 0.9566, "step": 10726 }, { - "epoch": 0.30439841089670827, + "epoch": 0.3039757431494233, "grad_norm": 0.0, - "learning_rate": 1.630455388606282e-05, - "loss": 0.9914, + "learning_rate": 1.6314933540560485e-05, + "loss": 0.9531, "step": 10727 }, { - "epoch": 0.3044267877412032, + "epoch": 0.3040040805916858, "grad_norm": 0.0, - "learning_rate": 1.6303840447617306e-05, - "loss": 0.9999, + "learning_rate": 1.6314221877341488e-05, + "loss": 0.9494, "step": 10728 }, { - "epoch": 0.30445516458569805, + "epoch": 0.30403241803394826, "grad_norm": 0.0, - "learning_rate": 1.630312695592297e-05, - "loss": 0.842, + "learning_rate": 1.6313510160935457e-05, + "loss": 0.8624, "step": 10729 }, { - "epoch": 0.30448354143019296, + "epoch": 0.3040607554762107, "grad_norm": 0.0, - "learning_rate": 1.6302413410985838e-05, - "loss": 0.9444, + "learning_rate": 1.6312798391348387e-05, + "loss": 0.9043, "step": 10730 }, { - "epoch": 0.3045119182746879, + "epoch": 0.3040890929184732, "grad_norm": 0.0, - "learning_rate": 1.6301699812811936e-05, - "loss": 0.879, + "learning_rate": 1.6312086568586273e-05, + "loss": 1.0292, "step": 10731 }, { - "epoch": 0.30454029511918274, + "epoch": 0.30411743036073563, "grad_norm": 0.0, - "learning_rate": 1.6300986161407292e-05, - "loss": 0.9154, + "learning_rate": 1.6311374692655107e-05, + "loss": 1.0599, "step": 10732 }, { - "epoch": 0.30456867196367765, + "epoch": 0.3041457678029981, "grad_norm": 0.0, - "learning_rate": 1.6300272456777933e-05, - "loss": 1.0007, + "learning_rate": 1.631066276356089e-05, + "loss": 0.9, "step": 10733 }, { - "epoch": 0.3045970488081725, + "epoch": 0.30417410524526056, "grad_norm": 0.0, - "learning_rate": 1.6299558698929894e-05, - "loss": 0.9115, + "learning_rate": 1.6309950781309612e-05, + "loss": 0.9777, "step": 10734 }, { - "epoch": 0.3046254256526674, + "epoch": 0.304202442687523, "grad_norm": 0.0, - "learning_rate": 1.6298844887869196e-05, - "loss": 0.8141, + "learning_rate": 1.630923874590728e-05, + "loss": 1.071, "step": 10735 }, { - "epoch": 0.30465380249716234, + "epoch": 0.3042307801297855, "grad_norm": 0.0, - "learning_rate": 1.6298131023601872e-05, - "loss": 0.9774, + "learning_rate": 1.6308526657359888e-05, + "loss": 0.9996, "step": 10736 }, { - "epoch": 0.3046821793416572, + "epoch": 0.30425911757204793, "grad_norm": 0.0, - "learning_rate": 1.6297417106133954e-05, - "loss": 0.9836, + "learning_rate": 1.6307814515673433e-05, + "loss": 0.9539, "step": 10737 }, { - "epoch": 0.3047105561861521, + "epoch": 0.3042874550143104, "grad_norm": 0.0, - "learning_rate": 1.629670313547147e-05, - "loss": 0.9286, + "learning_rate": 1.6307102320853913e-05, + "loss": 1.0164, "step": 10738 }, { - "epoch": 0.304738933030647, + "epoch": 0.30431579245657286, "grad_norm": 0.0, - "learning_rate": 1.629598911162045e-05, - "loss": 0.9032, + "learning_rate": 1.6306390072907327e-05, + "loss": 0.9763, "step": 10739 }, { - "epoch": 0.3047673098751419, + "epoch": 0.30434412989883536, "grad_norm": 0.0, - "learning_rate": 1.629527503458693e-05, - "loss": 0.9317, + "learning_rate": 1.630567777183968e-05, + "loss": 1.0046, "step": 10740 }, { - "epoch": 0.30479568671963675, + "epoch": 0.3043724673410978, "grad_norm": 0.0, - "learning_rate": 1.6294560904376938e-05, - "loss": 1.0262, + "learning_rate": 1.6304965417656962e-05, + "loss": 0.9573, "step": 10741 }, { - "epoch": 0.30482406356413166, + "epoch": 0.30440080478336023, "grad_norm": 0.0, - "learning_rate": 1.6293846720996504e-05, - "loss": 0.9872, + "learning_rate": 1.630425301036518e-05, + "loss": 0.8424, "step": 10742 }, { - "epoch": 0.3048524404086266, + "epoch": 0.30442914222562273, "grad_norm": 0.0, - "learning_rate": 1.6293132484451667e-05, - "loss": 0.9216, + "learning_rate": 1.6303540549970338e-05, + "loss": 0.9625, "step": 10743 }, { - "epoch": 0.30488081725312144, + "epoch": 0.30445747966788517, "grad_norm": 0.0, - "learning_rate": 1.6292418194748452e-05, - "loss": 0.8067, + "learning_rate": 1.630282803647843e-05, + "loss": 0.9141, "step": 10744 }, { - "epoch": 0.30490919409761635, + "epoch": 0.30448581711014766, "grad_norm": 0.0, - "learning_rate": 1.6291703851892904e-05, - "loss": 0.9594, + "learning_rate": 1.630211546989546e-05, + "loss": 1.0318, "step": 10745 }, { - "epoch": 0.3049375709421112, + "epoch": 0.3045141545524101, "grad_norm": 0.0, - "learning_rate": 1.629098945589105e-05, - "loss": 0.9642, + "learning_rate": 1.6301402850227432e-05, + "loss": 1.0289, "step": 10746 }, { - "epoch": 0.30496594778660613, + "epoch": 0.30454249199467254, "grad_norm": 0.0, - "learning_rate": 1.6290275006748922e-05, - "loss": 1.0432, + "learning_rate": 1.630069017748035e-05, + "loss": 1.0095, "step": 10747 }, { - "epoch": 0.30499432463110104, + "epoch": 0.30457082943693503, "grad_norm": 0.0, - "learning_rate": 1.628956050447256e-05, - "loss": 0.9166, + "learning_rate": 1.629997745166021e-05, + "loss": 1.0097, "step": 10748 }, { - "epoch": 0.3050227014755959, + "epoch": 0.30459916687919747, "grad_norm": 0.0, - "learning_rate": 1.6288845949068e-05, - "loss": 0.8435, + "learning_rate": 1.6299264672773025e-05, + "loss": 1.0266, "step": 10749 }, { - "epoch": 0.3050510783200908, + "epoch": 0.30462750432145996, "grad_norm": 0.0, - "learning_rate": 1.628813134054127e-05, - "loss": 1.0121, + "learning_rate": 1.629855184082479e-05, + "loss": 1.0683, "step": 10750 }, { - "epoch": 0.3050794551645857, + "epoch": 0.3046558417637224, "grad_norm": 0.0, - "learning_rate": 1.6287416678898415e-05, - "loss": 0.9499, + "learning_rate": 1.629783895582152e-05, + "loss": 1.0615, "step": 10751 }, { - "epoch": 0.3051078320090806, + "epoch": 0.3046841792059849, "grad_norm": 0.0, - "learning_rate": 1.6286701964145468e-05, - "loss": 0.9555, + "learning_rate": 1.629712601776921e-05, + "loss": 0.8918, "step": 10752 }, { - "epoch": 0.3051362088535755, + "epoch": 0.30471251664824733, "grad_norm": 0.0, - "learning_rate": 1.6285987196288465e-05, - "loss": 0.8549, + "learning_rate": 1.629641302667387e-05, + "loss": 0.8846, "step": 10753 }, { - "epoch": 0.30516458569807037, + "epoch": 0.30474085409050977, "grad_norm": 0.0, - "learning_rate": 1.6285272375333453e-05, - "loss": 0.873, + "learning_rate": 1.6295699982541506e-05, + "loss": 0.9464, "step": 10754 }, { - "epoch": 0.3051929625425653, + "epoch": 0.30476919153277227, "grad_norm": 0.0, - "learning_rate": 1.6284557501286457e-05, - "loss": 0.856, + "learning_rate": 1.6294986885378123e-05, + "loss": 1.0203, "step": 10755 }, { - "epoch": 0.30522133938706014, + "epoch": 0.3047975289750347, "grad_norm": 0.0, - "learning_rate": 1.628384257415352e-05, - "loss": 1.0299, + "learning_rate": 1.6294273735189728e-05, + "loss": 0.962, "step": 10756 }, { - "epoch": 0.30524971623155506, + "epoch": 0.3048258664172972, "grad_norm": 0.0, - "learning_rate": 1.6283127593940686e-05, - "loss": 0.9889, + "learning_rate": 1.6293560531982326e-05, + "loss": 1.0446, "step": 10757 }, { - "epoch": 0.3052780930760499, + "epoch": 0.30485420385955964, "grad_norm": 0.0, - "learning_rate": 1.628241256065399e-05, - "loss": 0.9406, + "learning_rate": 1.629284727576193e-05, + "loss": 0.9975, "step": 10758 }, { - "epoch": 0.30530646992054483, + "epoch": 0.3048825413018221, "grad_norm": 0.0, - "learning_rate": 1.628169747429947e-05, - "loss": 0.9857, + "learning_rate": 1.629213396653454e-05, + "loss": 1.0178, "step": 10759 }, { - "epoch": 0.30533484676503975, + "epoch": 0.30491087874408457, "grad_norm": 0.0, - "learning_rate": 1.628098233488317e-05, - "loss": 1.0875, + "learning_rate": 1.6291420604306172e-05, + "loss": 0.9075, "step": 10760 }, { - "epoch": 0.3053632236095346, + "epoch": 0.304939216186347, "grad_norm": 0.0, - "learning_rate": 1.628026714241113e-05, - "loss": 1.0994, + "learning_rate": 1.629070718908283e-05, + "loss": 0.866, "step": 10761 }, { - "epoch": 0.3053916004540295, + "epoch": 0.3049675536286095, "grad_norm": 0.0, - "learning_rate": 1.6279551896889393e-05, - "loss": 0.9375, + "learning_rate": 1.6289993720870526e-05, + "loss": 0.9688, "step": 10762 }, { - "epoch": 0.3054199772985244, + "epoch": 0.30499589107087194, "grad_norm": 0.0, - "learning_rate": 1.6278836598324e-05, - "loss": 1.0269, + "learning_rate": 1.628928019967527e-05, + "loss": 1.0017, "step": 10763 }, { - "epoch": 0.3054483541430193, + "epoch": 0.30502422851313443, "grad_norm": 0.0, - "learning_rate": 1.627812124672099e-05, - "loss": 1.0278, + "learning_rate": 1.6288566625503076e-05, + "loss": 0.9869, "step": 10764 }, { - "epoch": 0.3054767309875142, + "epoch": 0.30505256595539687, "grad_norm": 0.0, - "learning_rate": 1.6277405842086405e-05, - "loss": 0.9344, + "learning_rate": 1.6287852998359943e-05, + "loss": 0.8995, "step": 10765 }, { - "epoch": 0.30550510783200907, + "epoch": 0.3050809033976593, "grad_norm": 0.0, - "learning_rate": 1.627669038442629e-05, - "loss": 0.858, + "learning_rate": 1.628713931825189e-05, + "loss": 0.8451, "step": 10766 }, { - "epoch": 0.305533484676504, + "epoch": 0.3051092408399218, "grad_norm": 0.0, - "learning_rate": 1.6275974873746693e-05, - "loss": 1.01, + "learning_rate": 1.628642558518493e-05, + "loss": 0.9078, "step": 10767 }, { - "epoch": 0.30556186152099885, + "epoch": 0.30513757828218424, "grad_norm": 0.0, - "learning_rate": 1.6275259310053653e-05, - "loss": 0.8703, + "learning_rate": 1.628571179916507e-05, + "loss": 1.0681, "step": 10768 }, { - "epoch": 0.30559023836549376, + "epoch": 0.30516591572444673, "grad_norm": 0.0, - "learning_rate": 1.6274543693353218e-05, - "loss": 0.8699, + "learning_rate": 1.628499796019833e-05, + "loss": 0.9273, "step": 10769 }, { - "epoch": 0.3056186152099887, + "epoch": 0.3051942531667092, "grad_norm": 0.0, - "learning_rate": 1.6273828023651426e-05, - "loss": 0.9174, + "learning_rate": 1.6284284068290716e-05, + "loss": 0.9872, "step": 10770 }, { - "epoch": 0.30564699205448354, + "epoch": 0.3052225906089716, "grad_norm": 0.0, - "learning_rate": 1.627311230095433e-05, - "loss": 0.9144, + "learning_rate": 1.6283570123448244e-05, + "loss": 0.9937, "step": 10771 }, { - "epoch": 0.30567536889897845, + "epoch": 0.3052509280512341, "grad_norm": 0.0, - "learning_rate": 1.627239652526797e-05, - "loss": 0.9479, + "learning_rate": 1.6282856125676927e-05, + "loss": 1.1097, "step": 10772 }, { - "epoch": 0.3057037457434733, + "epoch": 0.30527926549349654, "grad_norm": 0.0, - "learning_rate": 1.62716806965984e-05, - "loss": 0.9667, + "learning_rate": 1.628214207498278e-05, + "loss": 0.9939, "step": 10773 }, { - "epoch": 0.3057321225879682, + "epoch": 0.30530760293575904, "grad_norm": 0.0, - "learning_rate": 1.6270964814951654e-05, - "loss": 0.9136, + "learning_rate": 1.6281427971371817e-05, + "loss": 1.0587, "step": 10774 }, { - "epoch": 0.3057604994324631, + "epoch": 0.3053359403780215, "grad_norm": 0.0, - "learning_rate": 1.627024888033379e-05, - "loss": 0.9069, + "learning_rate": 1.6280713814850056e-05, + "loss": 1.0029, "step": 10775 }, { - "epoch": 0.305788876276958, + "epoch": 0.3053642778202839, "grad_norm": 0.0, - "learning_rate": 1.626953289275085e-05, - "loss": 1.0186, + "learning_rate": 1.6279999605423508e-05, + "loss": 0.8761, "step": 10776 }, { - "epoch": 0.3058172531214529, + "epoch": 0.3053926152625464, "grad_norm": 0.0, - "learning_rate": 1.6268816852208887e-05, - "loss": 0.8292, + "learning_rate": 1.627928534309819e-05, + "loss": 0.9156, "step": 10777 }, { - "epoch": 0.3058456299659478, + "epoch": 0.30542095270480885, "grad_norm": 0.0, - "learning_rate": 1.626810075871394e-05, - "loss": 0.9482, + "learning_rate": 1.627857102788012e-05, + "loss": 0.944, "step": 10778 }, { - "epoch": 0.3058740068104427, + "epoch": 0.30544929014707134, "grad_norm": 0.0, - "learning_rate": 1.6267384612272073e-05, - "loss": 1.0394, + "learning_rate": 1.627785665977532e-05, + "loss": 0.9665, "step": 10779 }, { - "epoch": 0.30590238365493755, + "epoch": 0.3054776275893338, "grad_norm": 0.0, - "learning_rate": 1.626666841288932e-05, - "loss": 1.0409, + "learning_rate": 1.6277142238789798e-05, + "loss": 1.0155, "step": 10780 }, { - "epoch": 0.30593076049943246, + "epoch": 0.30550596503159627, "grad_norm": 0.0, - "learning_rate": 1.6265952160571736e-05, - "loss": 0.9036, + "learning_rate": 1.6276427764929576e-05, + "loss": 0.9744, "step": 10781 }, { - "epoch": 0.3059591373439274, + "epoch": 0.3055343024738587, "grad_norm": 0.0, - "learning_rate": 1.6265235855325377e-05, - "loss": 0.9096, + "learning_rate": 1.6275713238200674e-05, + "loss": 0.9739, "step": 10782 }, { - "epoch": 0.30598751418842224, + "epoch": 0.30556263991612115, "grad_norm": 0.0, - "learning_rate": 1.6264519497156287e-05, - "loss": 0.9297, + "learning_rate": 1.6274998658609107e-05, + "loss": 0.8345, "step": 10783 }, { - "epoch": 0.30601589103291715, + "epoch": 0.30559097735838364, "grad_norm": 0.0, - "learning_rate": 1.626380308607052e-05, - "loss": 1.0445, + "learning_rate": 1.6274284026160894e-05, + "loss": 1.0042, "step": 10784 }, { - "epoch": 0.306044267877412, + "epoch": 0.3056193148006461, "grad_norm": 0.0, - "learning_rate": 1.6263086622074127e-05, - "loss": 0.9416, + "learning_rate": 1.627356934086206e-05, + "loss": 1.0381, "step": 10785 }, { - "epoch": 0.30607264472190693, + "epoch": 0.3056476522429086, "grad_norm": 0.0, - "learning_rate": 1.626237010517316e-05, - "loss": 1.0028, + "learning_rate": 1.6272854602718622e-05, + "loss": 0.9841, "step": 10786 }, { - "epoch": 0.30610102156640184, + "epoch": 0.305675989685171, "grad_norm": 0.0, - "learning_rate": 1.6261653535373668e-05, - "loss": 1.0365, + "learning_rate": 1.62721398117366e-05, + "loss": 0.9862, "step": 10787 }, { - "epoch": 0.3061293984108967, + "epoch": 0.30570432712743345, "grad_norm": 0.0, - "learning_rate": 1.626093691268171e-05, - "loss": 0.9462, + "learning_rate": 1.6271424967922015e-05, + "loss": 1.0367, "step": 10788 }, { - "epoch": 0.3061577752553916, + "epoch": 0.30573266456969594, "grad_norm": 0.0, - "learning_rate": 1.6260220237103338e-05, - "loss": 0.9707, + "learning_rate": 1.627071007128089e-05, + "loss": 0.8096, "step": 10789 }, { - "epoch": 0.3061861520998865, + "epoch": 0.3057610020119584, "grad_norm": 0.0, - "learning_rate": 1.62595035086446e-05, - "loss": 0.889, + "learning_rate": 1.6269995121819243e-05, + "loss": 1.0089, "step": 10790 }, { - "epoch": 0.3062145289443814, + "epoch": 0.3057893394542209, "grad_norm": 0.0, - "learning_rate": 1.6258786727311558e-05, - "loss": 0.9832, + "learning_rate": 1.62692801195431e-05, + "loss": 0.9636, "step": 10791 }, { - "epoch": 0.30624290578887625, + "epoch": 0.3058176768964833, "grad_norm": 0.0, - "learning_rate": 1.625806989311026e-05, - "loss": 0.9692, + "learning_rate": 1.6268565064458482e-05, + "loss": 0.9367, "step": 10792 }, { - "epoch": 0.30627128263337117, + "epoch": 0.3058460143387458, "grad_norm": 0.0, - "learning_rate": 1.6257353006046768e-05, - "loss": 0.8666, + "learning_rate": 1.6267849956571415e-05, + "loss": 1.0004, "step": 10793 }, { - "epoch": 0.3062996594778661, + "epoch": 0.30587435178100825, "grad_norm": 0.0, - "learning_rate": 1.625663606612713e-05, - "loss": 1.0072, + "learning_rate": 1.6267134795887914e-05, + "loss": 0.93, "step": 10794 }, { - "epoch": 0.30632803632236094, + "epoch": 0.3059026892232707, "grad_norm": 0.0, - "learning_rate": 1.625591907335741e-05, - "loss": 0.8774, + "learning_rate": 1.6266419582414016e-05, + "loss": 0.955, "step": 10795 }, { - "epoch": 0.30635641316685586, + "epoch": 0.3059310266655332, "grad_norm": 0.0, - "learning_rate": 1.6255202027743658e-05, - "loss": 1.0963, + "learning_rate": 1.6265704316155735e-05, + "loss": 1.0296, "step": 10796 }, { - "epoch": 0.3063847900113507, + "epoch": 0.3059593641077956, "grad_norm": 0.0, - "learning_rate": 1.6254484929291933e-05, - "loss": 0.9024, + "learning_rate": 1.6264988997119103e-05, + "loss": 1.0756, "step": 10797 }, { - "epoch": 0.30641316685584563, + "epoch": 0.3059877015500581, "grad_norm": 0.0, - "learning_rate": 1.6253767778008293e-05, - "loss": 0.952, + "learning_rate": 1.626427362531014e-05, + "loss": 0.9732, "step": 10798 }, { - "epoch": 0.30644154370034055, + "epoch": 0.30601603899232055, "grad_norm": 0.0, - "learning_rate": 1.6253050573898796e-05, - "loss": 1.0424, + "learning_rate": 1.6263558200734875e-05, + "loss": 0.9999, "step": 10799 }, { - "epoch": 0.3064699205448354, + "epoch": 0.306044376434583, "grad_norm": 0.0, - "learning_rate": 1.6252333316969497e-05, - "loss": 0.9215, + "learning_rate": 1.6262842723399335e-05, + "loss": 0.8965, "step": 10800 }, { - "epoch": 0.3064982973893303, + "epoch": 0.3060727138768455, "grad_norm": 0.0, - "learning_rate": 1.6251616007226464e-05, - "loss": 1.0745, + "learning_rate": 1.6262127193309543e-05, + "loss": 1.0754, "step": 10801 }, { - "epoch": 0.3065266742338252, + "epoch": 0.3061010513191079, "grad_norm": 0.0, - "learning_rate": 1.6250898644675745e-05, - "loss": 0.9877, + "learning_rate": 1.6261411610471526e-05, + "loss": 0.9333, "step": 10802 }, { - "epoch": 0.3065550510783201, + "epoch": 0.3061293887613704, "grad_norm": 0.0, - "learning_rate": 1.6250181229323403e-05, - "loss": 0.9437, + "learning_rate": 1.626069597489132e-05, + "loss": 0.9942, "step": 10803 }, { - "epoch": 0.306583427922815, + "epoch": 0.30615772620363285, "grad_norm": 0.0, - "learning_rate": 1.6249463761175502e-05, - "loss": 0.9753, + "learning_rate": 1.6259980286574938e-05, + "loss": 0.9734, "step": 10804 }, { - "epoch": 0.30661180476730987, + "epoch": 0.30618606364589535, "grad_norm": 0.0, - "learning_rate": 1.6248746240238095e-05, - "loss": 0.8991, + "learning_rate": 1.6259264545528426e-05, + "loss": 1.0319, "step": 10805 }, { - "epoch": 0.3066401816118048, + "epoch": 0.3062144010881578, "grad_norm": 0.0, - "learning_rate": 1.624802866651725e-05, - "loss": 1.1112, + "learning_rate": 1.6258548751757802e-05, + "loss": 1.0496, "step": 10806 }, { - "epoch": 0.30666855845629964, + "epoch": 0.3062427385304202, "grad_norm": 0.0, - "learning_rate": 1.624731104001903e-05, - "loss": 0.8798, + "learning_rate": 1.6257832905269095e-05, + "loss": 1.0157, "step": 10807 }, { - "epoch": 0.30669693530079456, + "epoch": 0.3062710759726827, "grad_norm": 0.0, - "learning_rate": 1.6246593360749488e-05, - "loss": 1.0854, + "learning_rate": 1.6257117006068338e-05, + "loss": 0.9356, "step": 10808 }, { - "epoch": 0.3067253121452894, + "epoch": 0.30629941341494515, "grad_norm": 0.0, - "learning_rate": 1.6245875628714692e-05, - "loss": 1.0138, + "learning_rate": 1.6256401054161565e-05, + "loss": 0.9626, "step": 10809 }, { - "epoch": 0.30675368898978433, + "epoch": 0.30632775085720765, "grad_norm": 0.0, - "learning_rate": 1.6245157843920708e-05, - "loss": 0.9993, + "learning_rate": 1.6255685049554802e-05, + "loss": 0.9413, "step": 10810 }, { - "epoch": 0.30678206583427925, + "epoch": 0.3063560882994701, "grad_norm": 0.0, - "learning_rate": 1.624444000637359e-05, - "loss": 1.0559, + "learning_rate": 1.6254968992254078e-05, + "loss": 1.1578, "step": 10811 }, { - "epoch": 0.3068104426787741, + "epoch": 0.3063844257417325, "grad_norm": 0.0, - "learning_rate": 1.6243722116079406e-05, - "loss": 0.9777, + "learning_rate": 1.6254252882265428e-05, + "loss": 0.9107, "step": 10812 }, { - "epoch": 0.306838819523269, + "epoch": 0.306412763183995, "grad_norm": 0.0, - "learning_rate": 1.6243004173044224e-05, - "loss": 0.9765, + "learning_rate": 1.6253536719594883e-05, + "loss": 0.8949, "step": 10813 }, { - "epoch": 0.3068671963677639, + "epoch": 0.30644110062625746, "grad_norm": 0.0, - "learning_rate": 1.6242286177274103e-05, - "loss": 0.8608, + "learning_rate": 1.6252820504248477e-05, + "loss": 0.9304, "step": 10814 }, { - "epoch": 0.3068955732122588, + "epoch": 0.30646943806851995, "grad_norm": 0.0, - "learning_rate": 1.6241568128775113e-05, - "loss": 0.9856, + "learning_rate": 1.625210423623224e-05, + "loss": 0.9941, "step": 10815 }, { - "epoch": 0.3069239500567537, + "epoch": 0.3064977755107824, "grad_norm": 0.0, - "learning_rate": 1.6240850027553313e-05, - "loss": 1.0641, + "learning_rate": 1.6251387915552213e-05, + "loss": 1.0087, "step": 10816 }, { - "epoch": 0.3069523269012486, + "epoch": 0.3065261129530449, "grad_norm": 0.0, - "learning_rate": 1.6240131873614774e-05, - "loss": 0.8108, + "learning_rate": 1.625067154221442e-05, + "loss": 1.0311, "step": 10817 }, { - "epoch": 0.3069807037457435, + "epoch": 0.3065544503953073, "grad_norm": 0.0, - "learning_rate": 1.623941366696556e-05, - "loss": 0.9292, + "learning_rate": 1.62499551162249e-05, + "loss": 0.8738, "step": 10818 }, { - "epoch": 0.30700908059023835, + "epoch": 0.30658278783756976, "grad_norm": 0.0, - "learning_rate": 1.6238695407611737e-05, - "loss": 1.0324, + "learning_rate": 1.624923863758969e-05, + "loss": 1.0045, "step": 10819 }, { - "epoch": 0.30703745743473326, + "epoch": 0.30661112527983225, "grad_norm": 0.0, - "learning_rate": 1.6237977095559374e-05, - "loss": 1.019, + "learning_rate": 1.6248522106314814e-05, + "loss": 1.0147, "step": 10820 }, { - "epoch": 0.3070658342792281, + "epoch": 0.3066394627220947, "grad_norm": 0.0, - "learning_rate": 1.623725873081454e-05, - "loss": 0.9693, + "learning_rate": 1.6247805522406324e-05, + "loss": 1.0844, "step": 10821 }, { - "epoch": 0.30709421112372304, + "epoch": 0.3066678001643572, "grad_norm": 0.0, - "learning_rate": 1.62365403133833e-05, - "loss": 1.0234, + "learning_rate": 1.6247088885870244e-05, + "loss": 1.0526, "step": 10822 }, { - "epoch": 0.30712258796821795, + "epoch": 0.3066961376066196, "grad_norm": 0.0, - "learning_rate": 1.6235821843271723e-05, - "loss": 0.8978, + "learning_rate": 1.6246372196712615e-05, + "loss": 1.0143, "step": 10823 }, { - "epoch": 0.3071509648127128, + "epoch": 0.30672447504888206, "grad_norm": 0.0, - "learning_rate": 1.6235103320485878e-05, - "loss": 0.9305, + "learning_rate": 1.6245655454939474e-05, + "loss": 0.9933, "step": 10824 }, { - "epoch": 0.3071793416572077, + "epoch": 0.30675281249114456, "grad_norm": 0.0, - "learning_rate": 1.6234384745031838e-05, - "loss": 0.9513, + "learning_rate": 1.624493866055686e-05, + "loss": 0.8859, "step": 10825 }, { - "epoch": 0.3072077185017026, + "epoch": 0.306781149933407, "grad_norm": 0.0, - "learning_rate": 1.6233666116915667e-05, - "loss": 0.8904, + "learning_rate": 1.6244221813570806e-05, + "loss": 0.9902, "step": 10826 }, { - "epoch": 0.3072360953461975, + "epoch": 0.3068094873756695, "grad_norm": 0.0, - "learning_rate": 1.623294743614344e-05, - "loss": 0.9208, + "learning_rate": 1.6243504913987357e-05, + "loss": 0.9955, "step": 10827 }, { - "epoch": 0.3072644721906924, + "epoch": 0.3068378248179319, "grad_norm": 0.0, - "learning_rate": 1.6232228702721223e-05, - "loss": 0.9305, + "learning_rate": 1.6242787961812543e-05, + "loss": 0.9018, "step": 10828 }, { - "epoch": 0.3072928490351873, + "epoch": 0.3068661622601944, "grad_norm": 0.0, - "learning_rate": 1.623150991665509e-05, - "loss": 1.0305, + "learning_rate": 1.624207095705241e-05, + "loss": 1.1221, "step": 10829 }, { - "epoch": 0.3073212258796822, + "epoch": 0.30689449970245686, "grad_norm": 0.0, - "learning_rate": 1.6230791077951117e-05, - "loss": 0.9831, + "learning_rate": 1.6241353899712994e-05, + "loss": 0.9868, "step": 10830 }, { - "epoch": 0.30734960272417705, + "epoch": 0.3069228371447193, "grad_norm": 0.0, - "learning_rate": 1.6230072186615367e-05, - "loss": 1.0435, + "learning_rate": 1.6240636789800337e-05, + "loss": 0.9694, "step": 10831 }, { - "epoch": 0.30737797956867197, + "epoch": 0.3069511745869818, "grad_norm": 0.0, - "learning_rate": 1.6229353242653922e-05, - "loss": 0.9376, + "learning_rate": 1.6239919627320477e-05, + "loss": 0.9146, "step": 10832 }, { - "epoch": 0.3074063564131669, + "epoch": 0.30697951202924423, "grad_norm": 0.0, - "learning_rate": 1.6228634246072848e-05, - "loss": 1.0735, + "learning_rate": 1.623920241227946e-05, + "loss": 0.922, "step": 10833 }, { - "epoch": 0.30743473325766174, + "epoch": 0.3070078494715067, "grad_norm": 0.0, - "learning_rate": 1.622791519687822e-05, - "loss": 1.0211, + "learning_rate": 1.6238485144683323e-05, + "loss": 1.0804, "step": 10834 }, { - "epoch": 0.30746311010215666, + "epoch": 0.30703618691376916, "grad_norm": 0.0, - "learning_rate": 1.6227196095076113e-05, - "loss": 0.9936, + "learning_rate": 1.6237767824538112e-05, + "loss": 0.9797, "step": 10835 }, { - "epoch": 0.3074914869466515, + "epoch": 0.3070645243560316, "grad_norm": 0.0, - "learning_rate": 1.62264769406726e-05, - "loss": 0.9876, + "learning_rate": 1.6237050451849862e-05, + "loss": 1.03, "step": 10836 }, { - "epoch": 0.30751986379114643, + "epoch": 0.3070928617982941, "grad_norm": 0.0, - "learning_rate": 1.6225757733673754e-05, - "loss": 0.9192, + "learning_rate": 1.6236333026624623e-05, + "loss": 0.8972, "step": 10837 }, { - "epoch": 0.3075482406356413, + "epoch": 0.30712119924055653, "grad_norm": 0.0, - "learning_rate": 1.6225038474085657e-05, - "loss": 0.8431, + "learning_rate": 1.6235615548868434e-05, + "loss": 1.0463, "step": 10838 }, { - "epoch": 0.3075766174801362, + "epoch": 0.307149536682819, "grad_norm": 0.0, - "learning_rate": 1.6224319161914378e-05, - "loss": 0.9332, + "learning_rate": 1.6234898018587336e-05, + "loss": 1.1473, "step": 10839 }, { - "epoch": 0.3076049943246311, + "epoch": 0.30717787412508146, "grad_norm": 0.0, - "learning_rate": 1.6223599797165996e-05, - "loss": 0.9275, + "learning_rate": 1.6234180435787382e-05, + "loss": 0.9886, "step": 10840 }, { - "epoch": 0.307633371169126, + "epoch": 0.30720621156734396, "grad_norm": 0.0, - "learning_rate": 1.6222880379846584e-05, - "loss": 0.9825, + "learning_rate": 1.6233462800474608e-05, + "loss": 0.9625, "step": 10841 }, { - "epoch": 0.3076617480136209, + "epoch": 0.3072345490096064, "grad_norm": 0.0, - "learning_rate": 1.6222160909962226e-05, - "loss": 1.0269, + "learning_rate": 1.6232745112655065e-05, + "loss": 0.9618, "step": 10842 }, { - "epoch": 0.30769012485811575, + "epoch": 0.30726288645186883, "grad_norm": 0.0, - "learning_rate": 1.622144138751899e-05, - "loss": 0.9896, + "learning_rate": 1.6232027372334793e-05, + "loss": 0.9722, "step": 10843 }, { - "epoch": 0.30771850170261067, + "epoch": 0.3072912238941313, "grad_norm": 0.0, - "learning_rate": 1.622072181252296e-05, - "loss": 0.9271, + "learning_rate": 1.623130957951984e-05, + "loss": 0.9586, "step": 10844 }, { - "epoch": 0.3077468785471056, + "epoch": 0.30731956133639377, "grad_norm": 0.0, - "learning_rate": 1.6220002184980218e-05, - "loss": 1.0423, + "learning_rate": 1.6230591734216252e-05, + "loss": 0.8907, "step": 10845 }, { - "epoch": 0.30777525539160044, + "epoch": 0.30734789877865626, "grad_norm": 0.0, - "learning_rate": 1.6219282504896834e-05, - "loss": 0.919, + "learning_rate": 1.6229873836430078e-05, + "loss": 0.9048, "step": 10846 }, { - "epoch": 0.30780363223609536, + "epoch": 0.3073762362209187, "grad_norm": 0.0, - "learning_rate": 1.6218562772278895e-05, - "loss": 0.8627, + "learning_rate": 1.6229155886167364e-05, + "loss": 0.9044, "step": 10847 }, { - "epoch": 0.3078320090805902, + "epoch": 0.30740457366318114, "grad_norm": 0.0, - "learning_rate": 1.6217842987132472e-05, - "loss": 0.9222, + "learning_rate": 1.6228437883434158e-05, + "loss": 0.8614, "step": 10848 }, { - "epoch": 0.30786038592508513, + "epoch": 0.30743291110544363, "grad_norm": 0.0, - "learning_rate": 1.6217123149463653e-05, - "loss": 0.9644, + "learning_rate": 1.6227719828236503e-05, + "loss": 0.9353, "step": 10849 }, { - "epoch": 0.30788876276958005, + "epoch": 0.30746124854770607, "grad_norm": 0.0, - "learning_rate": 1.6216403259278514e-05, - "loss": 1.0583, + "learning_rate": 1.622700172058045e-05, + "loss": 0.9486, "step": 10850 }, { - "epoch": 0.3079171396140749, + "epoch": 0.30748958598996856, "grad_norm": 0.0, - "learning_rate": 1.6215683316583133e-05, - "loss": 0.9552, + "learning_rate": 1.6226283560472053e-05, + "loss": 0.9714, "step": 10851 }, { - "epoch": 0.3079455164585698, + "epoch": 0.307517923432231, "grad_norm": 0.0, - "learning_rate": 1.62149633213836e-05, - "loss": 0.9914, + "learning_rate": 1.6225565347917357e-05, + "loss": 1.0287, "step": 10852 }, { - "epoch": 0.3079738933030647, + "epoch": 0.3075462608744935, "grad_norm": 0.0, - "learning_rate": 1.6214243273685994e-05, - "loss": 1.1143, + "learning_rate": 1.622484708292241e-05, + "loss": 1.0154, "step": 10853 }, { - "epoch": 0.3080022701475596, + "epoch": 0.30757459831675593, "grad_norm": 0.0, - "learning_rate": 1.6213523173496393e-05, - "loss": 0.8773, + "learning_rate": 1.622412876549327e-05, + "loss": 0.9457, "step": 10854 }, { - "epoch": 0.30803064699205446, + "epoch": 0.30760293575901837, "grad_norm": 0.0, - "learning_rate": 1.6212803020820883e-05, - "loss": 0.9367, + "learning_rate": 1.6223410395635976e-05, + "loss": 0.8823, "step": 10855 }, { - "epoch": 0.3080590238365494, + "epoch": 0.30763127320128086, "grad_norm": 0.0, - "learning_rate": 1.621208281566555e-05, - "loss": 0.9533, + "learning_rate": 1.6222691973356587e-05, + "loss": 0.8383, "step": 10856 }, { - "epoch": 0.3080874006810443, + "epoch": 0.3076596106435433, "grad_norm": 0.0, - "learning_rate": 1.6211362558036468e-05, - "loss": 1.0315, + "learning_rate": 1.622197349866115e-05, + "loss": 0.977, "step": 10857 }, { - "epoch": 0.30811577752553915, + "epoch": 0.3076879480858058, "grad_norm": 0.0, - "learning_rate": 1.621064224793973e-05, - "loss": 0.9283, + "learning_rate": 1.6221254971555726e-05, + "loss": 1.1367, "step": 10858 }, { - "epoch": 0.30814415437003406, + "epoch": 0.30771628552806823, "grad_norm": 0.0, - "learning_rate": 1.6209921885381418e-05, - "loss": 1.0201, + "learning_rate": 1.6220536392046357e-05, + "loss": 0.873, "step": 10859 }, { - "epoch": 0.3081725312145289, + "epoch": 0.3077446229703307, "grad_norm": 0.0, - "learning_rate": 1.6209201470367618e-05, - "loss": 0.9283, + "learning_rate": 1.6219817760139103e-05, + "loss": 0.9967, "step": 10860 }, { - "epoch": 0.30820090805902384, + "epoch": 0.30777296041259317, "grad_norm": 0.0, - "learning_rate": 1.6208481002904412e-05, - "loss": 0.9649, + "learning_rate": 1.621909907584001e-05, + "loss": 0.9908, "step": 10861 }, { - "epoch": 0.30822928490351875, + "epoch": 0.3078012978548556, "grad_norm": 0.0, - "learning_rate": 1.620776048299789e-05, - "loss": 0.98, + "learning_rate": 1.621838033915514e-05, + "loss": 0.9045, "step": 10862 }, { - "epoch": 0.3082576617480136, + "epoch": 0.3078296352971181, "grad_norm": 0.0, - "learning_rate": 1.6207039910654133e-05, - "loss": 1.0453, + "learning_rate": 1.621766155009054e-05, + "loss": 1.0004, "step": 10863 }, { - "epoch": 0.3082860385925085, + "epoch": 0.30785797273938054, "grad_norm": 0.0, - "learning_rate": 1.6206319285879233e-05, - "loss": 0.8112, + "learning_rate": 1.6216942708652276e-05, + "loss": 1.0546, "step": 10864 }, { - "epoch": 0.3083144154370034, + "epoch": 0.30788631018164303, "grad_norm": 0.0, - "learning_rate": 1.6205598608679272e-05, - "loss": 0.9188, + "learning_rate": 1.6216223814846385e-05, + "loss": 0.92, "step": 10865 }, { - "epoch": 0.3083427922814983, + "epoch": 0.30791464762390547, "grad_norm": 0.0, - "learning_rate": 1.6204877879060346e-05, - "loss": 0.9817, + "learning_rate": 1.6215504868678937e-05, + "loss": 0.9829, "step": 10866 }, { - "epoch": 0.3083711691259932, + "epoch": 0.3079429850661679, "grad_norm": 0.0, - "learning_rate": 1.6204157097028534e-05, - "loss": 0.8538, + "learning_rate": 1.6214785870155983e-05, + "loss": 0.9666, "step": 10867 }, { - "epoch": 0.3083995459704881, + "epoch": 0.3079713225084304, "grad_norm": 0.0, - "learning_rate": 1.6203436262589927e-05, - "loss": 1.0651, + "learning_rate": 1.6214066819283577e-05, + "loss": 0.9131, "step": 10868 }, { - "epoch": 0.308427922814983, + "epoch": 0.30799965995069284, "grad_norm": 0.0, - "learning_rate": 1.620271537575062e-05, - "loss": 0.9377, + "learning_rate": 1.621334771606778e-05, + "loss": 0.9019, "step": 10869 }, { - "epoch": 0.30845629965947785, + "epoch": 0.30802799739295533, "grad_norm": 0.0, - "learning_rate": 1.6201994436516693e-05, - "loss": 0.9546, + "learning_rate": 1.6212628560514652e-05, + "loss": 0.9864, "step": 10870 }, { - "epoch": 0.30848467650397277, + "epoch": 0.30805633483521777, "grad_norm": 0.0, - "learning_rate": 1.6201273444894243e-05, - "loss": 0.914, + "learning_rate": 1.6211909352630246e-05, + "loss": 1.0175, "step": 10871 }, { - "epoch": 0.3085130533484676, + "epoch": 0.3080846722774802, "grad_norm": 0.0, - "learning_rate": 1.6200552400889355e-05, - "loss": 0.9735, + "learning_rate": 1.6211190092420616e-05, + "loss": 0.9957, "step": 10872 }, { - "epoch": 0.30854143019296254, + "epoch": 0.3081130097197427, "grad_norm": 0.0, - "learning_rate": 1.6199831304508126e-05, - "loss": 1.0009, + "learning_rate": 1.621047077989183e-05, + "loss": 0.8391, "step": 10873 }, { - "epoch": 0.30856980703745746, + "epoch": 0.30814134716200514, "grad_norm": 0.0, - "learning_rate": 1.6199110155756636e-05, - "loss": 0.9111, + "learning_rate": 1.6209751415049937e-05, + "loss": 0.7892, "step": 10874 }, { - "epoch": 0.3085981838819523, + "epoch": 0.30816968460426764, "grad_norm": 0.0, - "learning_rate": 1.619838895464099e-05, - "loss": 1.0148, + "learning_rate": 1.6209031997901006e-05, + "loss": 0.791, "step": 10875 }, { - "epoch": 0.30862656072644723, + "epoch": 0.3081980220465301, "grad_norm": 0.0, - "learning_rate": 1.6197667701167272e-05, - "loss": 0.9979, + "learning_rate": 1.6208312528451094e-05, + "loss": 0.947, "step": 10876 }, { - "epoch": 0.3086549375709421, + "epoch": 0.30822635948879257, "grad_norm": 0.0, - "learning_rate": 1.6196946395341576e-05, - "loss": 0.8987, + "learning_rate": 1.6207593006706256e-05, + "loss": 0.9696, "step": 10877 }, { - "epoch": 0.308683314415437, + "epoch": 0.308254696931055, "grad_norm": 0.0, - "learning_rate": 1.6196225037169995e-05, - "loss": 0.8376, + "learning_rate": 1.620687343267256e-05, + "loss": 0.986, "step": 10878 }, { - "epoch": 0.3087116912599319, + "epoch": 0.30828303437331744, "grad_norm": 0.0, - "learning_rate": 1.6195503626658623e-05, - "loss": 0.9161, + "learning_rate": 1.6206153806356062e-05, + "loss": 0.9886, "step": 10879 }, { - "epoch": 0.3087400681044268, + "epoch": 0.30831137181557994, "grad_norm": 0.0, - "learning_rate": 1.6194782163813557e-05, - "loss": 0.9623, + "learning_rate": 1.6205434127762827e-05, + "loss": 0.8535, "step": 10880 }, { - "epoch": 0.3087684449489217, + "epoch": 0.3083397092578424, "grad_norm": 0.0, - "learning_rate": 1.6194060648640882e-05, - "loss": 0.9459, + "learning_rate": 1.6204714396898916e-05, + "loss": 1.1142, "step": 10881 }, { - "epoch": 0.30879682179341655, + "epoch": 0.30836804670010487, "grad_norm": 0.0, - "learning_rate": 1.6193339081146698e-05, - "loss": 0.9654, + "learning_rate": 1.6203994613770393e-05, + "loss": 1.0273, "step": 10882 }, { - "epoch": 0.30882519863791147, + "epoch": 0.3083963841423673, "grad_norm": 0.0, - "learning_rate": 1.6192617461337102e-05, - "loss": 0.9209, + "learning_rate": 1.620327477838332e-05, + "loss": 1.0607, "step": 10883 }, { - "epoch": 0.3088535754824064, + "epoch": 0.30842472158462975, "grad_norm": 0.0, - "learning_rate": 1.619189578921819e-05, - "loss": 1.042, + "learning_rate": 1.6202554890743754e-05, + "loss": 0.9409, "step": 10884 }, { - "epoch": 0.30888195232690124, + "epoch": 0.30845305902689224, "grad_norm": 0.0, - "learning_rate": 1.6191174064796056e-05, - "loss": 0.9866, + "learning_rate": 1.620183495085777e-05, + "loss": 0.8416, "step": 10885 }, { - "epoch": 0.30891032917139616, + "epoch": 0.3084813964691547, "grad_norm": 0.0, - "learning_rate": 1.6190452288076795e-05, - "loss": 0.9733, + "learning_rate": 1.6201114958731427e-05, + "loss": 0.949, "step": 10886 }, { - "epoch": 0.308938706015891, + "epoch": 0.30850973391141717, "grad_norm": 0.0, - "learning_rate": 1.6189730459066505e-05, - "loss": 0.9308, + "learning_rate": 1.620039491437079e-05, + "loss": 1.071, "step": 10887 }, { - "epoch": 0.30896708286038593, + "epoch": 0.3085380713536796, "grad_norm": 0.0, - "learning_rate": 1.6189008577771286e-05, - "loss": 0.914, + "learning_rate": 1.6199674817781924e-05, + "loss": 0.9776, "step": 10888 }, { - "epoch": 0.3089954597048808, + "epoch": 0.3085664087959421, "grad_norm": 0.0, - "learning_rate": 1.6188286644197227e-05, - "loss": 0.9786, + "learning_rate": 1.6198954668970893e-05, + "loss": 0.9278, "step": 10889 }, { - "epoch": 0.3090238365493757, + "epoch": 0.30859474623820454, "grad_norm": 0.0, - "learning_rate": 1.6187564658350438e-05, - "loss": 0.9831, + "learning_rate": 1.6198234467943765e-05, + "loss": 1.0067, "step": 10890 }, { - "epoch": 0.3090522133938706, + "epoch": 0.308623083680467, "grad_norm": 0.0, - "learning_rate": 1.618684262023701e-05, - "loss": 0.8934, + "learning_rate": 1.619751421470661e-05, + "loss": 0.9885, "step": 10891 }, { - "epoch": 0.3090805902383655, + "epoch": 0.3086514211227295, "grad_norm": 0.0, - "learning_rate": 1.6186120529863045e-05, - "loss": 0.9721, + "learning_rate": 1.619679390926549e-05, + "loss": 0.8412, "step": 10892 }, { - "epoch": 0.3091089670828604, + "epoch": 0.3086797585649919, "grad_norm": 0.0, - "learning_rate": 1.6185398387234644e-05, - "loss": 0.9868, + "learning_rate": 1.619607355162647e-05, + "loss": 1.036, "step": 10893 }, { - "epoch": 0.30913734392735526, + "epoch": 0.3087080960072544, "grad_norm": 0.0, - "learning_rate": 1.61846761923579e-05, - "loss": 0.9452, + "learning_rate": 1.619535314179563e-05, + "loss": 0.8943, "step": 10894 }, { - "epoch": 0.30916572077185017, + "epoch": 0.30873643344951684, "grad_norm": 0.0, - "learning_rate": 1.6183953945238924e-05, - "loss": 0.8914, + "learning_rate": 1.619463267977902e-05, + "loss": 0.9476, "step": 10895 }, { - "epoch": 0.3091940976163451, + "epoch": 0.3087647708917793, "grad_norm": 0.0, - "learning_rate": 1.6183231645883804e-05, - "loss": 0.8788, + "learning_rate": 1.6193912165582727e-05, + "loss": 1.0009, "step": 10896 }, { - "epoch": 0.30922247446083995, + "epoch": 0.3087931083340418, "grad_norm": 0.0, - "learning_rate": 1.6182509294298654e-05, - "loss": 0.9312, + "learning_rate": 1.6193191599212806e-05, + "loss": 0.9068, "step": 10897 }, { - "epoch": 0.30925085130533486, + "epoch": 0.3088214457763042, "grad_norm": 0.0, - "learning_rate": 1.618178689048957e-05, - "loss": 1.0237, + "learning_rate": 1.6192470980675335e-05, + "loss": 0.9469, "step": 10898 }, { - "epoch": 0.3092792281498297, + "epoch": 0.3088497832185667, "grad_norm": 0.0, - "learning_rate": 1.618106443446265e-05, - "loss": 0.9251, + "learning_rate": 1.619175030997638e-05, + "loss": 1.0551, "step": 10899 }, { - "epoch": 0.30930760499432464, + "epoch": 0.30887812066082915, "grad_norm": 0.0, - "learning_rate": 1.6180341926224006e-05, - "loss": 0.9162, + "learning_rate": 1.6191029587122013e-05, + "loss": 0.9569, "step": 10900 }, { - "epoch": 0.3093359818388195, + "epoch": 0.30890645810309164, "grad_norm": 0.0, - "learning_rate": 1.6179619365779732e-05, - "loss": 0.9769, + "learning_rate": 1.6190308812118305e-05, + "loss": 0.952, "step": 10901 }, { - "epoch": 0.3093643586833144, + "epoch": 0.3089347955453541, "grad_norm": 0.0, - "learning_rate": 1.6178896753135938e-05, - "loss": 1.0652, + "learning_rate": 1.6189587984971327e-05, + "loss": 0.962, "step": 10902 }, { - "epoch": 0.3093927355278093, + "epoch": 0.3089631329876165, "grad_norm": 0.0, - "learning_rate": 1.6178174088298725e-05, - "loss": 0.9721, + "learning_rate": 1.618886710568715e-05, + "loss": 0.9021, "step": 10903 }, { - "epoch": 0.3094211123723042, + "epoch": 0.308991470429879, "grad_norm": 0.0, - "learning_rate": 1.6177451371274197e-05, - "loss": 0.9834, + "learning_rate": 1.618814617427185e-05, + "loss": 0.9093, "step": 10904 }, { - "epoch": 0.3094494892167991, + "epoch": 0.30901980787214145, "grad_norm": 0.0, - "learning_rate": 1.6176728602068458e-05, - "loss": 0.9244, + "learning_rate": 1.6187425190731496e-05, + "loss": 1.0415, "step": 10905 }, { - "epoch": 0.30947786606129396, + "epoch": 0.30904814531440394, "grad_norm": 0.0, - "learning_rate": 1.6176005780687617e-05, - "loss": 1.0288, + "learning_rate": 1.6186704155072162e-05, + "loss": 0.9529, "step": 10906 }, { - "epoch": 0.3095062429057889, + "epoch": 0.3090764827566664, "grad_norm": 0.0, - "learning_rate": 1.6175282907137773e-05, - "loss": 1.004, + "learning_rate": 1.618598306729992e-05, + "loss": 0.8805, "step": 10907 }, { - "epoch": 0.3095346197502838, + "epoch": 0.3091048201989288, "grad_norm": 0.0, - "learning_rate": 1.6174559981425045e-05, - "loss": 0.8921, + "learning_rate": 1.6185261927420845e-05, + "loss": 1.0092, "step": 10908 }, { - "epoch": 0.30956299659477865, + "epoch": 0.3091331576411913, "grad_norm": 0.0, - "learning_rate": 1.6173837003555525e-05, - "loss": 0.9913, + "learning_rate": 1.6184540735441015e-05, + "loss": 0.95, "step": 10909 }, { - "epoch": 0.30959137343927357, + "epoch": 0.30916149508345375, "grad_norm": 0.0, - "learning_rate": 1.6173113973535327e-05, - "loss": 1.0405, + "learning_rate": 1.61838194913665e-05, + "loss": 0.9459, "step": 10910 }, { - "epoch": 0.3096197502837684, + "epoch": 0.30918983252571625, "grad_norm": 0.0, - "learning_rate": 1.617239089137056e-05, - "loss": 0.9445, + "learning_rate": 1.6183098195203376e-05, + "loss": 0.9635, "step": 10911 }, { - "epoch": 0.30964812712826334, + "epoch": 0.3092181699679787, "grad_norm": 0.0, - "learning_rate": 1.6171667757067328e-05, - "loss": 0.9157, + "learning_rate": 1.6182376846957724e-05, + "loss": 0.9201, "step": 10912 }, { - "epoch": 0.30967650397275825, + "epoch": 0.3092465074102412, "grad_norm": 0.0, - "learning_rate": 1.6170944570631743e-05, - "loss": 0.9606, + "learning_rate": 1.6181655446635613e-05, + "loss": 0.9852, "step": 10913 }, { - "epoch": 0.3097048808172531, + "epoch": 0.3092748448525036, "grad_norm": 0.0, - "learning_rate": 1.617022133206991e-05, - "loss": 0.8976, + "learning_rate": 1.6180933994243123e-05, + "loss": 0.9204, "step": 10914 }, { - "epoch": 0.30973325766174803, + "epoch": 0.30930318229476605, "grad_norm": 0.0, - "learning_rate": 1.616949804138794e-05, - "loss": 0.9787, + "learning_rate": 1.618021248978633e-05, + "loss": 0.9068, "step": 10915 }, { - "epoch": 0.3097616345062429, + "epoch": 0.30933151973702855, "grad_norm": 0.0, - "learning_rate": 1.6168774698591943e-05, - "loss": 1.0429, + "learning_rate": 1.6179490933271312e-05, + "loss": 0.992, "step": 10916 }, { - "epoch": 0.3097900113507378, + "epoch": 0.309359857179291, "grad_norm": 0.0, - "learning_rate": 1.6168051303688028e-05, - "loss": 0.8945, + "learning_rate": 1.6178769324704148e-05, + "loss": 0.9733, "step": 10917 }, { - "epoch": 0.30981838819523266, + "epoch": 0.3093881946215535, "grad_norm": 0.0, - "learning_rate": 1.616732785668231e-05, - "loss": 1.0163, + "learning_rate": 1.6178047664090915e-05, + "loss": 0.8163, "step": 10918 }, { - "epoch": 0.3098467650397276, + "epoch": 0.3094165320638159, "grad_norm": 0.0, - "learning_rate": 1.6166604357580894e-05, - "loss": 0.8299, + "learning_rate": 1.6177325951437693e-05, + "loss": 0.9684, "step": 10919 }, { - "epoch": 0.3098751418842225, + "epoch": 0.30944486950607836, "grad_norm": 0.0, - "learning_rate": 1.616588080638989e-05, - "loss": 1.0983, + "learning_rate": 1.6176604186750563e-05, + "loss": 0.8562, "step": 10920 }, { - "epoch": 0.30990351872871735, + "epoch": 0.30947320694834085, "grad_norm": 0.0, - "learning_rate": 1.616515720311542e-05, - "loss": 1.0114, + "learning_rate": 1.61758823700356e-05, + "loss": 0.9561, "step": 10921 }, { - "epoch": 0.30993189557321227, + "epoch": 0.3095015443906033, "grad_norm": 0.0, - "learning_rate": 1.6164433547763584e-05, - "loss": 0.8673, + "learning_rate": 1.6175160501298887e-05, + "loss": 0.9097, "step": 10922 }, { - "epoch": 0.30996027241770713, + "epoch": 0.3095298818328658, "grad_norm": 0.0, - "learning_rate": 1.6163709840340503e-05, - "loss": 0.9635, + "learning_rate": 1.6174438580546502e-05, + "loss": 0.878, "step": 10923 }, { - "epoch": 0.30998864926220204, + "epoch": 0.3095582192751282, "grad_norm": 0.0, - "learning_rate": 1.616298608085229e-05, - "loss": 0.9265, + "learning_rate": 1.617371660778453e-05, + "loss": 1.055, "step": 10924 }, { - "epoch": 0.31001702610669696, + "epoch": 0.3095865567173907, "grad_norm": 0.0, - "learning_rate": 1.6162262269305054e-05, - "loss": 0.9002, + "learning_rate": 1.617299458301905e-05, + "loss": 0.9506, "step": 10925 }, { - "epoch": 0.3100454029511918, + "epoch": 0.30961489415965315, "grad_norm": 0.0, - "learning_rate": 1.6161538405704914e-05, - "loss": 0.9355, + "learning_rate": 1.6172272506256144e-05, + "loss": 1.0059, "step": 10926 }, { - "epoch": 0.31007377979568673, + "epoch": 0.3096432316019156, "grad_norm": 0.0, - "learning_rate": 1.616081449005798e-05, - "loss": 0.8767, + "learning_rate": 1.6171550377501893e-05, + "loss": 0.9296, "step": 10927 }, { - "epoch": 0.3101021566401816, + "epoch": 0.3096715690441781, "grad_norm": 0.0, - "learning_rate": 1.616009052237037e-05, - "loss": 0.9931, + "learning_rate": 1.6170828196762383e-05, + "loss": 0.9882, "step": 10928 }, { - "epoch": 0.3101305334846765, + "epoch": 0.3096999064864405, "grad_norm": 0.0, - "learning_rate": 1.61593665026482e-05, - "loss": 0.9416, + "learning_rate": 1.6170105964043698e-05, + "loss": 0.9766, "step": 10929 }, { - "epoch": 0.3101589103291714, + "epoch": 0.309728243928703, "grad_norm": 0.0, - "learning_rate": 1.615864243089758e-05, - "loss": 1.0902, + "learning_rate": 1.6169383679351915e-05, + "loss": 0.9859, "step": 10930 }, { - "epoch": 0.3101872871736663, + "epoch": 0.30975658137096546, "grad_norm": 0.0, - "learning_rate": 1.6157918307124635e-05, - "loss": 0.9362, + "learning_rate": 1.6168661342693125e-05, + "loss": 1.0067, "step": 10931 }, { - "epoch": 0.3102156640181612, + "epoch": 0.3097849188132279, "grad_norm": 0.0, - "learning_rate": 1.6157194131335473e-05, - "loss": 0.943, + "learning_rate": 1.6167938954073408e-05, + "loss": 0.9749, "step": 10932 }, { - "epoch": 0.31024404086265606, + "epoch": 0.3098132562554904, "grad_norm": 0.0, - "learning_rate": 1.6156469903536217e-05, - "loss": 0.9126, + "learning_rate": 1.6167216513498854e-05, + "loss": 1.048, "step": 10933 }, { - "epoch": 0.31027241770715097, + "epoch": 0.3098415936977528, "grad_norm": 0.0, - "learning_rate": 1.6155745623732987e-05, - "loss": 0.7945, + "learning_rate": 1.6166494020975543e-05, + "loss": 1.0243, "step": 10934 }, { - "epoch": 0.31030079455164583, + "epoch": 0.3098699311400153, "grad_norm": 0.0, - "learning_rate": 1.6155021291931895e-05, - "loss": 0.98, + "learning_rate": 1.6165771476509563e-05, + "loss": 0.8622, "step": 10935 }, { - "epoch": 0.31032917139614075, + "epoch": 0.30989826858227776, "grad_norm": 0.0, - "learning_rate": 1.6154296908139057e-05, - "loss": 0.9103, + "learning_rate": 1.6165048880107002e-05, + "loss": 0.9066, "step": 10936 }, { - "epoch": 0.31035754824063566, + "epoch": 0.30992660602454025, "grad_norm": 0.0, - "learning_rate": 1.6153572472360602e-05, - "loss": 0.9235, + "learning_rate": 1.6164326231773942e-05, + "loss": 1.2172, "step": 10937 }, { - "epoch": 0.3103859250851305, + "epoch": 0.3099549434668027, "grad_norm": 0.0, - "learning_rate": 1.615284798460264e-05, - "loss": 1.0215, + "learning_rate": 1.6163603531516475e-05, + "loss": 0.9906, "step": 10938 }, { - "epoch": 0.31041430192962544, + "epoch": 0.30998328090906513, "grad_norm": 0.0, - "learning_rate": 1.6152123444871296e-05, - "loss": 0.9962, + "learning_rate": 1.6162880779340686e-05, + "loss": 0.933, "step": 10939 }, { - "epoch": 0.3104426787741203, + "epoch": 0.3100116183513276, "grad_norm": 0.0, - "learning_rate": 1.615139885317269e-05, - "loss": 0.9537, + "learning_rate": 1.6162157975252663e-05, + "loss": 0.9637, "step": 10940 }, { - "epoch": 0.3104710556186152, + "epoch": 0.31003995579359006, "grad_norm": 0.0, - "learning_rate": 1.615067420951294e-05, - "loss": 1.0112, + "learning_rate": 1.6161435119258495e-05, + "loss": 1.0118, "step": 10941 }, { - "epoch": 0.3104994324631101, + "epoch": 0.31006829323585255, "grad_norm": 0.0, - "learning_rate": 1.6149949513898165e-05, - "loss": 0.9578, + "learning_rate": 1.6160712211364274e-05, + "loss": 0.8681, "step": 10942 }, { - "epoch": 0.310527809307605, + "epoch": 0.310096630678115, "grad_norm": 0.0, - "learning_rate": 1.6149224766334492e-05, - "loss": 1.0146, + "learning_rate": 1.615998925157608e-05, + "loss": 1.0239, "step": 10943 }, { - "epoch": 0.3105561861520999, + "epoch": 0.31012496812037743, "grad_norm": 0.0, - "learning_rate": 1.6148499966828045e-05, - "loss": 0.9717, + "learning_rate": 1.6159266239900015e-05, + "loss": 0.9491, "step": 10944 }, { - "epoch": 0.31058456299659476, + "epoch": 0.3101533055626399, "grad_norm": 0.0, - "learning_rate": 1.6147775115384942e-05, - "loss": 1.0149, + "learning_rate": 1.6158543176342162e-05, + "loss": 1.0431, "step": 10945 }, { - "epoch": 0.3106129398410897, + "epoch": 0.31018164300490236, "grad_norm": 0.0, - "learning_rate": 1.6147050212011303e-05, - "loss": 0.9683, + "learning_rate": 1.6157820060908616e-05, + "loss": 0.9329, "step": 10946 }, { - "epoch": 0.3106413166855846, + "epoch": 0.31020998044716486, "grad_norm": 0.0, - "learning_rate": 1.6146325256713253e-05, - "loss": 0.911, + "learning_rate": 1.615709689360546e-05, + "loss": 0.9903, "step": 10947 }, { - "epoch": 0.31066969353007945, + "epoch": 0.3102383178894273, "grad_norm": 0.0, - "learning_rate": 1.6145600249496922e-05, - "loss": 0.9695, + "learning_rate": 1.615637367443879e-05, + "loss": 1.0716, "step": 10948 }, { - "epoch": 0.31069807037457436, + "epoch": 0.3102666553316898, "grad_norm": 0.0, - "learning_rate": 1.6144875190368426e-05, - "loss": 0.9639, + "learning_rate": 1.6155650403414703e-05, + "loss": 0.9943, "step": 10949 }, { - "epoch": 0.3107264472190692, + "epoch": 0.3102949927739522, "grad_norm": 0.0, - "learning_rate": 1.6144150079333892e-05, - "loss": 1.0632, + "learning_rate": 1.615492708053928e-05, + "loss": 1.0526, "step": 10950 }, { - "epoch": 0.31075482406356414, + "epoch": 0.31032333021621467, "grad_norm": 0.0, - "learning_rate": 1.6143424916399448e-05, - "loss": 0.8776, + "learning_rate": 1.615420370581863e-05, + "loss": 0.924, "step": 10951 }, { - "epoch": 0.310783200908059, + "epoch": 0.31035166765847716, "grad_norm": 0.0, - "learning_rate": 1.6142699701571218e-05, - "loss": 0.9749, + "learning_rate": 1.615348027925883e-05, + "loss": 0.8665, "step": 10952 }, { - "epoch": 0.3108115777525539, + "epoch": 0.3103800051007396, "grad_norm": 0.0, - "learning_rate": 1.614197443485533e-05, - "loss": 0.8743, + "learning_rate": 1.6152756800865984e-05, + "loss": 0.9082, "step": 10953 }, { - "epoch": 0.31083995459704883, + "epoch": 0.3104083425430021, "grad_norm": 0.0, - "learning_rate": 1.6141249116257906e-05, - "loss": 0.8668, + "learning_rate": 1.615203327064618e-05, + "loss": 0.9272, "step": 10954 }, { - "epoch": 0.3108683314415437, + "epoch": 0.31043667998526453, "grad_norm": 0.0, - "learning_rate": 1.614052374578507e-05, - "loss": 0.8708, + "learning_rate": 1.6151309688605518e-05, + "loss": 0.9481, "step": 10955 }, { - "epoch": 0.3108967082860386, + "epoch": 0.31046501742752697, "grad_norm": 0.0, - "learning_rate": 1.613979832344296e-05, - "loss": 0.933, + "learning_rate": 1.6150586054750085e-05, + "loss": 1.0185, "step": 10956 }, { - "epoch": 0.31092508513053346, + "epoch": 0.31049335486978946, "grad_norm": 0.0, - "learning_rate": 1.6139072849237692e-05, - "loss": 0.8665, + "learning_rate": 1.6149862369085986e-05, + "loss": 0.9142, "step": 10957 }, { - "epoch": 0.3109534619750284, + "epoch": 0.3105216923120519, "grad_norm": 0.0, - "learning_rate": 1.6138347323175402e-05, - "loss": 0.7799, + "learning_rate": 1.614913863161931e-05, + "loss": 0.939, "step": 10958 }, { - "epoch": 0.3109818388195233, + "epoch": 0.3105500297543144, "grad_norm": 0.0, - "learning_rate": 1.613762174526222e-05, - "loss": 0.9221, + "learning_rate": 1.614841484235616e-05, + "loss": 0.9969, "step": 10959 }, { - "epoch": 0.31101021566401815, + "epoch": 0.31057836719657683, "grad_norm": 0.0, - "learning_rate": 1.613689611550427e-05, - "loss": 0.95, + "learning_rate": 1.6147691001302628e-05, + "loss": 0.8671, "step": 10960 }, { - "epoch": 0.31103859250851307, + "epoch": 0.3106067046388393, "grad_norm": 0.0, - "learning_rate": 1.613617043390768e-05, - "loss": 0.916, + "learning_rate": 1.6146967108464808e-05, + "loss": 1.0468, "step": 10961 }, { - "epoch": 0.3110669693530079, + "epoch": 0.31063504208110176, "grad_norm": 0.0, - "learning_rate": 1.6135444700478583e-05, - "loss": 1.0871, + "learning_rate": 1.6146243163848802e-05, + "loss": 0.8605, "step": 10962 }, { - "epoch": 0.31109534619750284, + "epoch": 0.3106633795233642, "grad_norm": 0.0, - "learning_rate": 1.613471891522311e-05, - "loss": 1.0554, + "learning_rate": 1.614551916746071e-05, + "loss": 1.0749, "step": 10963 }, { - "epoch": 0.31112372304199776, + "epoch": 0.3106917169656267, "grad_norm": 0.0, - "learning_rate": 1.613399307814739e-05, - "loss": 0.9566, + "learning_rate": 1.6144795119306628e-05, + "loss": 1.0392, "step": 10964 }, { - "epoch": 0.3111520998864926, + "epoch": 0.31072005440788913, "grad_norm": 0.0, - "learning_rate": 1.6133267189257552e-05, + "learning_rate": 1.6144071019392654e-05, "loss": 0.9173, "step": 10965 }, { - "epoch": 0.31118047673098753, + "epoch": 0.31074839185015163, "grad_norm": 0.0, - "learning_rate": 1.6132541248559732e-05, - "loss": 0.9852, + "learning_rate": 1.6143346867724886e-05, + "loss": 0.9106, "step": 10966 }, { - "epoch": 0.3112088535754824, + "epoch": 0.31077672929241407, "grad_norm": 0.0, - "learning_rate": 1.6131815256060064e-05, - "loss": 0.9011, + "learning_rate": 1.6142622664309428e-05, + "loss": 1.0377, "step": 10967 }, { - "epoch": 0.3112372304199773, + "epoch": 0.3108050667346765, "grad_norm": 0.0, - "learning_rate": 1.6131089211764673e-05, - "loss": 0.9869, + "learning_rate": 1.614189840915238e-05, + "loss": 0.9091, "step": 10968 }, { - "epoch": 0.31126560726447217, + "epoch": 0.310833404176939, "grad_norm": 0.0, - "learning_rate": 1.6130363115679696e-05, - "loss": 0.8043, + "learning_rate": 1.6141174102259838e-05, + "loss": 1.0069, "step": 10969 }, { - "epoch": 0.3112939841089671, + "epoch": 0.31086174161920144, "grad_norm": 0.0, - "learning_rate": 1.6129636967811267e-05, - "loss": 0.9601, + "learning_rate": 1.614044974363791e-05, + "loss": 0.8983, "step": 10970 }, { - "epoch": 0.311322360953462, + "epoch": 0.31089007906146393, "grad_norm": 0.0, - "learning_rate": 1.612891076816552e-05, - "loss": 1.0462, + "learning_rate": 1.613972533329269e-05, + "loss": 1.0199, "step": 10971 }, { - "epoch": 0.31135073779795686, + "epoch": 0.31091841650372637, "grad_norm": 0.0, - "learning_rate": 1.612818451674859e-05, - "loss": 1.0649, + "learning_rate": 1.6139000871230286e-05, + "loss": 0.9725, "step": 10972 }, { - "epoch": 0.31137911464245177, + "epoch": 0.3109467539459888, "grad_norm": 0.0, - "learning_rate": 1.6127458213566604e-05, - "loss": 0.9725, + "learning_rate": 1.6138276357456796e-05, + "loss": 0.9298, "step": 10973 }, { - "epoch": 0.31140749148694663, + "epoch": 0.3109750913882513, "grad_norm": 0.0, - "learning_rate": 1.6126731858625705e-05, - "loss": 0.8616, + "learning_rate": 1.6137551791978325e-05, + "loss": 1.0292, "step": 10974 }, { - "epoch": 0.31143586833144155, + "epoch": 0.31100342883051374, "grad_norm": 0.0, - "learning_rate": 1.6126005451932028e-05, - "loss": 0.9911, + "learning_rate": 1.6136827174800978e-05, + "loss": 0.9598, "step": 10975 }, { - "epoch": 0.31146424517593646, + "epoch": 0.31103176627277623, "grad_norm": 0.0, - "learning_rate": 1.612527899349171e-05, - "loss": 0.9502, + "learning_rate": 1.6136102505930857e-05, + "loss": 0.9657, "step": 10976 }, { - "epoch": 0.3114926220204313, + "epoch": 0.31106010371503867, "grad_norm": 0.0, - "learning_rate": 1.612455248331088e-05, - "loss": 0.983, + "learning_rate": 1.6135377785374063e-05, + "loss": 0.8861, "step": 10977 }, { - "epoch": 0.31152099886492624, + "epoch": 0.31108844115730117, "grad_norm": 0.0, - "learning_rate": 1.6123825921395682e-05, - "loss": 0.8939, + "learning_rate": 1.6134653013136703e-05, + "loss": 0.9872, "step": 10978 }, { - "epoch": 0.3115493757094211, + "epoch": 0.3111167785995636, "grad_norm": 0.0, - "learning_rate": 1.6123099307752252e-05, - "loss": 0.9427, + "learning_rate": 1.613392818922489e-05, + "loss": 1.0329, "step": 10979 }, { - "epoch": 0.311577752553916, + "epoch": 0.31114511604182604, "grad_norm": 0.0, - "learning_rate": 1.6122372642386728e-05, - "loss": 0.9527, + "learning_rate": 1.6133203313644714e-05, + "loss": 0.9702, "step": 10980 }, { - "epoch": 0.31160612939841087, + "epoch": 0.31117345348408854, "grad_norm": 0.0, - "learning_rate": 1.6121645925305244e-05, - "loss": 0.9721, + "learning_rate": 1.6132478386402287e-05, + "loss": 0.9061, "step": 10981 }, { - "epoch": 0.3116345062429058, + "epoch": 0.311201790926351, "grad_norm": 0.0, - "learning_rate": 1.6120919156513944e-05, - "loss": 1.0254, + "learning_rate": 1.6131753407503724e-05, + "loss": 1.1038, "step": 10982 }, { - "epoch": 0.3116628830874007, + "epoch": 0.31123012836861347, "grad_norm": 0.0, - "learning_rate": 1.6120192336018963e-05, - "loss": 0.8701, + "learning_rate": 1.6131028376955125e-05, + "loss": 0.9917, "step": 10983 }, { - "epoch": 0.31169125993189556, + "epoch": 0.3112584658108759, "grad_norm": 0.0, - "learning_rate": 1.611946546382644e-05, - "loss": 1.0251, + "learning_rate": 1.6130303294762595e-05, + "loss": 0.9379, "step": 10984 }, { - "epoch": 0.3117196367763905, + "epoch": 0.31128680325313834, "grad_norm": 0.0, - "learning_rate": 1.6118738539942524e-05, - "loss": 1.028, + "learning_rate": 1.6129578160932244e-05, + "loss": 0.9821, "step": 10985 }, { - "epoch": 0.31174801362088533, + "epoch": 0.31131514069540084, "grad_norm": 0.0, - "learning_rate": 1.6118011564373347e-05, - "loss": 1.0859, + "learning_rate": 1.6128852975470182e-05, + "loss": 0.9241, "step": 10986 }, { - "epoch": 0.31177639046538025, + "epoch": 0.3113434781376633, "grad_norm": 0.0, - "learning_rate": 1.6117284537125047e-05, - "loss": 0.8818, + "learning_rate": 1.6128127738382513e-05, + "loss": 0.9682, "step": 10987 }, { - "epoch": 0.31180476730987516, + "epoch": 0.31137181557992577, "grad_norm": 0.0, - "learning_rate": 1.6116557458203774e-05, - "loss": 0.9316, + "learning_rate": 1.6127402449675348e-05, + "loss": 0.9625, "step": 10988 }, { - "epoch": 0.31183314415437, + "epoch": 0.3114001530221882, "grad_norm": 0.0, - "learning_rate": 1.611583032761566e-05, - "loss": 1.014, + "learning_rate": 1.61266771093548e-05, + "loss": 0.8186, "step": 10989 }, { - "epoch": 0.31186152099886494, + "epoch": 0.3114284904644507, "grad_norm": 0.0, - "learning_rate": 1.611510314536685e-05, - "loss": 1.0561, + "learning_rate": 1.6125951717426973e-05, + "loss": 1.0032, "step": 10990 }, { - "epoch": 0.3118898978433598, + "epoch": 0.31145682790671314, "grad_norm": 0.0, - "learning_rate": 1.61143759114635e-05, - "loss": 1.086, + "learning_rate": 1.612522627389798e-05, + "loss": 0.9676, "step": 10991 }, { - "epoch": 0.3119182746878547, + "epoch": 0.3114851653489756, "grad_norm": 0.0, - "learning_rate": 1.6113648625911733e-05, - "loss": 0.9797, + "learning_rate": 1.6124500778773934e-05, + "loss": 0.9611, "step": 10992 }, { - "epoch": 0.31194665153234963, + "epoch": 0.3115135027912381, "grad_norm": 0.0, - "learning_rate": 1.6112921288717706e-05, - "loss": 0.8768, + "learning_rate": 1.612377523206094e-05, + "loss": 0.9069, "step": 10993 }, { - "epoch": 0.3119750283768445, + "epoch": 0.3115418402335005, "grad_norm": 0.0, - "learning_rate": 1.6112193899887555e-05, - "loss": 0.9704, + "learning_rate": 1.6123049633765117e-05, + "loss": 0.9139, "step": 10994 }, { - "epoch": 0.3120034052213394, + "epoch": 0.311570177675763, "grad_norm": 0.0, - "learning_rate": 1.6111466459427428e-05, - "loss": 0.8743, + "learning_rate": 1.612232398389257e-05, + "loss": 0.9494, "step": 10995 }, { - "epoch": 0.31203178206583426, + "epoch": 0.31159851511802544, "grad_norm": 0.0, - "learning_rate": 1.611073896734347e-05, - "loss": 1.0666, + "learning_rate": 1.6121598282449414e-05, + "loss": 1.039, "step": 10996 }, { - "epoch": 0.3120601589103292, + "epoch": 0.3116268525602879, "grad_norm": 0.0, - "learning_rate": 1.611001142364182e-05, - "loss": 1.0364, + "learning_rate": 1.6120872529441766e-05, + "loss": 1.0298, "step": 10997 }, { - "epoch": 0.31208853575482404, + "epoch": 0.3116551900025504, "grad_norm": 0.0, - "learning_rate": 1.6109283828328636e-05, - "loss": 0.9242, + "learning_rate": 1.6120146724875734e-05, + "loss": 0.9703, "step": 10998 }, { - "epoch": 0.31211691259931895, + "epoch": 0.3116835274448128, "grad_norm": 0.0, - "learning_rate": 1.6108556181410056e-05, - "loss": 0.9723, + "learning_rate": 1.6119420868757433e-05, + "loss": 1.0565, "step": 10999 }, { - "epoch": 0.31214528944381387, + "epoch": 0.3117118648870753, "grad_norm": 0.0, - "learning_rate": 1.6107828482892225e-05, - "loss": 0.9255, + "learning_rate": 1.6118694961092976e-05, + "loss": 0.9134, "step": 11000 }, { - "epoch": 0.3121736662883087, + "epoch": 0.31174020232933775, "grad_norm": 0.0, - "learning_rate": 1.6107100732781295e-05, - "loss": 0.9785, + "learning_rate": 1.6117969001888486e-05, + "loss": 1.0758, "step": 11001 }, { - "epoch": 0.31220204313280364, + "epoch": 0.31176853977160024, "grad_norm": 0.0, - "learning_rate": 1.610637293108341e-05, - "loss": 0.9166, + "learning_rate": 1.6117242991150064e-05, + "loss": 0.9, "step": 11002 }, { - "epoch": 0.3122304199772985, + "epoch": 0.3117968772138627, "grad_norm": 0.0, - "learning_rate": 1.6105645077804713e-05, - "loss": 0.8083, + "learning_rate": 1.6116516928883836e-05, + "loss": 1.0276, "step": 11003 }, { - "epoch": 0.3122587968217934, + "epoch": 0.3118252146561251, "grad_norm": 0.0, - "learning_rate": 1.6104917172951363e-05, - "loss": 1.0451, + "learning_rate": 1.6115790815095914e-05, + "loss": 1.0281, "step": 11004 }, { - "epoch": 0.31228717366628833, + "epoch": 0.3118535520983876, "grad_norm": 0.0, - "learning_rate": 1.61041892165295e-05, - "loss": 1.0045, + "learning_rate": 1.6115064649792417e-05, + "loss": 0.9449, "step": 11005 }, { - "epoch": 0.3123155505107832, + "epoch": 0.31188188954065005, "grad_norm": 0.0, - "learning_rate": 1.6103461208545277e-05, - "loss": 0.921, + "learning_rate": 1.6114338432979454e-05, + "loss": 1.0102, "step": 11006 }, { - "epoch": 0.3123439273552781, + "epoch": 0.31191022698291254, "grad_norm": 0.0, - "learning_rate": 1.6102733149004847e-05, - "loss": 0.9498, + "learning_rate": 1.611361216466315e-05, + "loss": 1.0991, "step": 11007 }, { - "epoch": 0.31237230419977297, + "epoch": 0.311938564425175, "grad_norm": 0.0, - "learning_rate": 1.610200503791435e-05, - "loss": 1.0286, + "learning_rate": 1.6112885844849623e-05, + "loss": 0.9757, "step": 11008 }, { - "epoch": 0.3124006810442679, + "epoch": 0.3119669018674374, "grad_norm": 0.0, - "learning_rate": 1.6101276875279946e-05, - "loss": 1.0224, + "learning_rate": 1.611215947354499e-05, + "loss": 0.989, "step": 11009 }, { - "epoch": 0.3124290578887628, + "epoch": 0.3119952393096999, "grad_norm": 0.0, - "learning_rate": 1.610054866110778e-05, - "loss": 0.9473, + "learning_rate": 1.6111433050755363e-05, + "loss": 0.9679, "step": 11010 }, { - "epoch": 0.31245743473325766, + "epoch": 0.31202357675196235, "grad_norm": 0.0, - "learning_rate": 1.6099820395404005e-05, - "loss": 0.9296, + "learning_rate": 1.611070657648687e-05, + "loss": 1.024, "step": 11011 }, { - "epoch": 0.31248581157775257, + "epoch": 0.31205191419422484, "grad_norm": 0.0, - "learning_rate": 1.6099092078174773e-05, - "loss": 0.9406, + "learning_rate": 1.6109980050745627e-05, + "loss": 0.9526, "step": 11012 }, { - "epoch": 0.31251418842224743, + "epoch": 0.3120802516364873, "grad_norm": 0.0, - "learning_rate": 1.6098363709426238e-05, - "loss": 0.8775, + "learning_rate": 1.610925347353775e-05, + "loss": 0.8864, "step": 11013 }, { - "epoch": 0.31254256526674234, + "epoch": 0.3121085890787498, "grad_norm": 0.0, - "learning_rate": 1.6097635289164548e-05, - "loss": 0.885, + "learning_rate": 1.6108526844869365e-05, + "loss": 0.9199, "step": 11014 }, { - "epoch": 0.3125709421112372, + "epoch": 0.3121369265210122, "grad_norm": 0.0, - "learning_rate": 1.6096906817395862e-05, - "loss": 0.7901, + "learning_rate": 1.610780016474659e-05, + "loss": 0.8926, "step": 11015 }, { - "epoch": 0.3125993189557321, + "epoch": 0.31216526396327465, "grad_norm": 0.0, - "learning_rate": 1.6096178294126326e-05, - "loss": 1.032, + "learning_rate": 1.6107073433175542e-05, + "loss": 0.977, "step": 11016 }, { - "epoch": 0.31262769580022703, + "epoch": 0.31219360140553715, "grad_norm": 0.0, - "learning_rate": 1.60954497193621e-05, - "loss": 0.8896, + "learning_rate": 1.610634665016235e-05, + "loss": 0.8396, "step": 11017 }, { - "epoch": 0.3126560726447219, + "epoch": 0.3122219388477996, "grad_norm": 0.0, - "learning_rate": 1.6094721093109335e-05, - "loss": 1.007, + "learning_rate": 1.610561981571313e-05, + "loss": 0.9159, "step": 11018 }, { - "epoch": 0.3126844494892168, + "epoch": 0.3122502762900621, "grad_norm": 0.0, - "learning_rate": 1.609399241537419e-05, - "loss": 0.9253, + "learning_rate": 1.610489292983401e-05, + "loss": 1.0125, "step": 11019 }, { - "epoch": 0.31271282633371167, + "epoch": 0.3122786137323245, "grad_norm": 0.0, - "learning_rate": 1.6093263686162813e-05, - "loss": 0.9374, + "learning_rate": 1.6104165992531105e-05, + "loss": 1.0168, "step": 11020 }, { - "epoch": 0.3127412031782066, + "epoch": 0.31230695117458696, "grad_norm": 0.0, - "learning_rate": 1.6092534905481367e-05, - "loss": 0.8542, + "learning_rate": 1.610343900381055e-05, + "loss": 0.992, "step": 11021 }, { - "epoch": 0.3127695800227015, + "epoch": 0.31233528861684945, "grad_norm": 0.0, - "learning_rate": 1.6091806073336e-05, - "loss": 1.0214, + "learning_rate": 1.610271196367846e-05, + "loss": 0.9552, "step": 11022 }, { - "epoch": 0.31279795686719636, + "epoch": 0.3123636260591119, "grad_norm": 0.0, - "learning_rate": 1.609107718973288e-05, - "loss": 0.9655, + "learning_rate": 1.6101984872140958e-05, + "loss": 0.9365, "step": 11023 }, { - "epoch": 0.3128263337116913, + "epoch": 0.3123919635013744, "grad_norm": 0.0, - "learning_rate": 1.6090348254678154e-05, - "loss": 1.0406, + "learning_rate": 1.6101257729204173e-05, + "loss": 1.1042, "step": 11024 }, { - "epoch": 0.31285471055618613, + "epoch": 0.3124203009436368, "grad_norm": 0.0, - "learning_rate": 1.6089619268177985e-05, - "loss": 1.0, + "learning_rate": 1.6100530534874226e-05, + "loss": 0.8976, "step": 11025 }, { - "epoch": 0.31288308740068105, + "epoch": 0.3124486383858993, "grad_norm": 0.0, - "learning_rate": 1.6088890230238524e-05, - "loss": 0.9974, + "learning_rate": 1.609980328915725e-05, + "loss": 0.9414, "step": 11026 }, { - "epoch": 0.31291146424517596, + "epoch": 0.31247697582816175, "grad_norm": 0.0, - "learning_rate": 1.6088161140865936e-05, - "loss": 1.0058, + "learning_rate": 1.609907599205936e-05, + "loss": 1.0251, "step": 11027 }, { - "epoch": 0.3129398410896708, + "epoch": 0.3125053132704242, "grad_norm": 0.0, - "learning_rate": 1.6087432000066376e-05, - "loss": 0.9915, + "learning_rate": 1.6098348643586695e-05, + "loss": 0.8849, "step": 11028 }, { - "epoch": 0.31296821793416574, + "epoch": 0.3125336507126867, "grad_norm": 0.0, - "learning_rate": 1.6086702807846007e-05, - "loss": 1.0405, + "learning_rate": 1.6097621243745373e-05, + "loss": 1.0265, "step": 11029 }, { - "epoch": 0.3129965947786606, + "epoch": 0.3125619881549491, "grad_norm": 0.0, - "learning_rate": 1.608597356421098e-05, - "loss": 0.9461, + "learning_rate": 1.609689379254152e-05, + "loss": 1.021, "step": 11030 }, { - "epoch": 0.3130249716231555, + "epoch": 0.3125903255972116, "grad_norm": 0.0, - "learning_rate": 1.6085244269167467e-05, - "loss": 1.0045, + "learning_rate": 1.609616628998127e-05, + "loss": 0.9147, "step": 11031 }, { - "epoch": 0.31305334846765037, + "epoch": 0.31261866303947405, "grad_norm": 0.0, - "learning_rate": 1.608451492272162e-05, - "loss": 0.9756, + "learning_rate": 1.6095438736070742e-05, + "loss": 0.9983, "step": 11032 }, { - "epoch": 0.3130817253121453, + "epoch": 0.3126470004817365, "grad_norm": 0.0, - "learning_rate": 1.60837855248796e-05, - "loss": 1.0856, + "learning_rate": 1.6094711130816074e-05, + "loss": 0.8965, "step": 11033 }, { - "epoch": 0.3131101021566402, + "epoch": 0.312675337923999, "grad_norm": 0.0, - "learning_rate": 1.6083056075647572e-05, - "loss": 0.9481, + "learning_rate": 1.6093983474223392e-05, + "loss": 0.985, "step": 11034 }, { - "epoch": 0.31313847900113506, + "epoch": 0.3127036753662614, "grad_norm": 0.0, - "learning_rate": 1.6082326575031696e-05, - "loss": 0.9498, + "learning_rate": 1.609325576629882e-05, + "loss": 0.9488, "step": 11035 }, { - "epoch": 0.31316685584563, + "epoch": 0.3127320128085239, "grad_norm": 0.0, - "learning_rate": 1.608159702303813e-05, - "loss": 0.9146, + "learning_rate": 1.6092528007048495e-05, + "loss": 0.9481, "step": 11036 }, { - "epoch": 0.31319523269012484, + "epoch": 0.31276035025078636, "grad_norm": 0.0, - "learning_rate": 1.6080867419673045e-05, - "loss": 0.9818, + "learning_rate": 1.6091800196478546e-05, + "loss": 0.89, "step": 11037 }, { - "epoch": 0.31322360953461975, + "epoch": 0.31278868769304885, "grad_norm": 0.0, - "learning_rate": 1.6080137764942597e-05, - "loss": 0.8999, + "learning_rate": 1.60910723345951e-05, + "loss": 0.9882, "step": 11038 }, { - "epoch": 0.31325198637911467, + "epoch": 0.3128170251353113, "grad_norm": 0.0, - "learning_rate": 1.6079408058852956e-05, - "loss": 0.8853, + "learning_rate": 1.6090344421404286e-05, + "loss": 0.9561, "step": 11039 }, { - "epoch": 0.3132803632236095, + "epoch": 0.3128453625775737, "grad_norm": 0.0, - "learning_rate": 1.6078678301410276e-05, - "loss": 0.9154, + "learning_rate": 1.6089616456912245e-05, + "loss": 0.8335, "step": 11040 }, { - "epoch": 0.31330874006810444, + "epoch": 0.3128737000198362, "grad_norm": 0.0, - "learning_rate": 1.6077948492620734e-05, - "loss": 0.9655, + "learning_rate": 1.60888884411251e-05, + "loss": 0.8386, "step": 11041 }, { - "epoch": 0.3133371169125993, + "epoch": 0.31290203746209866, "grad_norm": 0.0, - "learning_rate": 1.6077218632490484e-05, - "loss": 1.0774, + "learning_rate": 1.6088160374048987e-05, + "loss": 0.8978, "step": 11042 }, { - "epoch": 0.3133654937570942, + "epoch": 0.31293037490436115, "grad_norm": 0.0, - "learning_rate": 1.6076488721025695e-05, - "loss": 0.8804, + "learning_rate": 1.608743225569004e-05, + "loss": 0.9733, "step": 11043 }, { - "epoch": 0.31339387060158913, + "epoch": 0.3129587123466236, "grad_norm": 0.0, - "learning_rate": 1.6075758758232535e-05, - "loss": 0.9253, + "learning_rate": 1.608670408605439e-05, + "loss": 1.0139, "step": 11044 }, { - "epoch": 0.313422247446084, + "epoch": 0.31298704978888603, "grad_norm": 0.0, - "learning_rate": 1.607502874411716e-05, - "loss": 1.0716, + "learning_rate": 1.608597586514817e-05, + "loss": 1.0588, "step": 11045 }, { - "epoch": 0.3134506242905789, + "epoch": 0.3130153872311485, "grad_norm": 0.0, - "learning_rate": 1.6074298678685753e-05, - "loss": 1.0073, + "learning_rate": 1.608524759297752e-05, + "loss": 0.9202, "step": 11046 }, { - "epoch": 0.31347900113507376, + "epoch": 0.31304372467341096, "grad_norm": 0.0, - "learning_rate": 1.6073568561944467e-05, - "loss": 0.9696, + "learning_rate": 1.6084519269548564e-05, + "loss": 0.8372, "step": 11047 }, { - "epoch": 0.3135073779795687, + "epoch": 0.31307206211567346, "grad_norm": 0.0, - "learning_rate": 1.6072838393899476e-05, - "loss": 0.9328, + "learning_rate": 1.6083790894867445e-05, + "loss": 0.984, "step": 11048 }, { - "epoch": 0.31353575482406354, + "epoch": 0.3131003995579359, "grad_norm": 0.0, - "learning_rate": 1.607210817455695e-05, - "loss": 0.8406, + "learning_rate": 1.6083062468940297e-05, + "loss": 0.9998, "step": 11049 }, { - "epoch": 0.31356413166855845, + "epoch": 0.3131287370001984, "grad_norm": 0.0, - "learning_rate": 1.6071377903923048e-05, - "loss": 0.9985, + "learning_rate": 1.6082333991773253e-05, + "loss": 0.9621, "step": 11050 }, { - "epoch": 0.31359250851305337, + "epoch": 0.3131570744424608, "grad_norm": 0.0, - "learning_rate": 1.6070647582003945e-05, - "loss": 0.9758, + "learning_rate": 1.6081605463372453e-05, + "loss": 0.9498, "step": 11051 }, { - "epoch": 0.31362088535754823, + "epoch": 0.31318541188472326, "grad_norm": 0.0, - "learning_rate": 1.606991720880581e-05, - "loss": 0.9196, + "learning_rate": 1.6080876883744028e-05, + "loss": 0.9231, "step": 11052 }, { - "epoch": 0.31364926220204314, + "epoch": 0.31321374932698576, "grad_norm": 0.0, - "learning_rate": 1.606918678433481e-05, - "loss": 0.9909, + "learning_rate": 1.6080148252894124e-05, + "loss": 0.935, "step": 11053 }, { - "epoch": 0.313677639046538, + "epoch": 0.3132420867692482, "grad_norm": 0.0, - "learning_rate": 1.6068456308597115e-05, - "loss": 0.8399, + "learning_rate": 1.6079419570828872e-05, + "loss": 0.9973, "step": 11054 }, { - "epoch": 0.3137060158910329, + "epoch": 0.3132704242115107, "grad_norm": 0.0, - "learning_rate": 1.60677257815989e-05, - "loss": 0.9225, + "learning_rate": 1.607869083755441e-05, + "loss": 0.9032, "step": 11055 }, { - "epoch": 0.31373439273552783, + "epoch": 0.31329876165377313, "grad_norm": 0.0, - "learning_rate": 1.6066995203346332e-05, - "loss": 0.9672, + "learning_rate": 1.6077962053076877e-05, + "loss": 0.9346, "step": 11056 }, { - "epoch": 0.3137627695800227, + "epoch": 0.31332709909603557, "grad_norm": 0.0, - "learning_rate": 1.606626457384558e-05, - "loss": 0.9852, + "learning_rate": 1.6077233217402413e-05, + "loss": 1.0061, "step": 11057 }, { - "epoch": 0.3137911464245176, + "epoch": 0.31335543653829806, "grad_norm": 0.0, - "learning_rate": 1.6065533893102823e-05, - "loss": 0.9684, + "learning_rate": 1.607650433053716e-05, + "loss": 1.0763, "step": 11058 }, { - "epoch": 0.31381952326901247, + "epoch": 0.3133837739805605, "grad_norm": 0.0, - "learning_rate": 1.6064803161124224e-05, - "loss": 0.8776, + "learning_rate": 1.6075775392487252e-05, + "loss": 1.0427, "step": 11059 }, { - "epoch": 0.3138479001135074, + "epoch": 0.313412111422823, "grad_norm": 0.0, - "learning_rate": 1.6064072377915965e-05, - "loss": 1.0068, + "learning_rate": 1.607504640325883e-05, + "loss": 0.9493, "step": 11060 }, { - "epoch": 0.31387627695800224, + "epoch": 0.31344044886508543, "grad_norm": 0.0, - "learning_rate": 1.606334154348421e-05, - "loss": 0.908, + "learning_rate": 1.607431736285804e-05, + "loss": 1.0174, "step": 11061 }, { - "epoch": 0.31390465380249716, + "epoch": 0.3134687863073479, "grad_norm": 0.0, - "learning_rate": 1.6062610657835137e-05, - "loss": 0.8927, + "learning_rate": 1.6073588271291018e-05, + "loss": 1.0194, "step": 11062 }, { - "epoch": 0.3139330306469921, + "epoch": 0.31349712374961036, "grad_norm": 0.0, - "learning_rate": 1.606187972097492e-05, - "loss": 0.81, + "learning_rate": 1.6072859128563905e-05, + "loss": 0.9763, "step": 11063 }, { - "epoch": 0.31396140749148693, + "epoch": 0.3135254611918728, "grad_norm": 0.0, - "learning_rate": 1.6061148732909734e-05, - "loss": 0.9355, + "learning_rate": 1.6072129934682847e-05, + "loss": 0.8769, "step": 11064 }, { - "epoch": 0.31398978433598185, + "epoch": 0.3135537986341353, "grad_norm": 0.0, - "learning_rate": 1.6060417693645753e-05, - "loss": 0.9021, + "learning_rate": 1.607140068965398e-05, + "loss": 0.903, "step": 11065 }, { - "epoch": 0.3140181611804767, + "epoch": 0.31358213607639773, "grad_norm": 0.0, - "learning_rate": 1.6059686603189146e-05, - "loss": 0.9673, + "learning_rate": 1.6070671393483456e-05, + "loss": 0.96, "step": 11066 }, { - "epoch": 0.3140465380249716, + "epoch": 0.3136104735186602, "grad_norm": 0.0, - "learning_rate": 1.6058955461546098e-05, - "loss": 0.9302, + "learning_rate": 1.606994204617741e-05, + "loss": 0.9797, "step": 11067 }, { - "epoch": 0.31407491486946654, + "epoch": 0.31363881096092266, "grad_norm": 0.0, - "learning_rate": 1.605822426872278e-05, - "loss": 0.9526, + "learning_rate": 1.6069212647741987e-05, + "loss": 0.8632, "step": 11068 }, { - "epoch": 0.3141032917139614, + "epoch": 0.3136671484031851, "grad_norm": 0.0, - "learning_rate": 1.6057493024725367e-05, - "loss": 0.9101, + "learning_rate": 1.606848319818333e-05, + "loss": 0.9158, "step": 11069 }, { - "epoch": 0.3141316685584563, + "epoch": 0.3136954858454476, "grad_norm": 0.0, - "learning_rate": 1.6056761729560043e-05, - "loss": 0.8477, + "learning_rate": 1.606775369750759e-05, + "loss": 0.989, "step": 11070 }, { - "epoch": 0.31416004540295117, + "epoch": 0.31372382328771004, "grad_norm": 0.0, - "learning_rate": 1.6056030383232978e-05, - "loss": 1.0448, + "learning_rate": 1.6067024145720905e-05, + "loss": 0.9368, "step": 11071 }, { - "epoch": 0.3141884222474461, + "epoch": 0.31375216072997253, "grad_norm": 0.0, - "learning_rate": 1.605529898575035e-05, - "loss": 0.9504, + "learning_rate": 1.6066294542829425e-05, + "loss": 0.9901, "step": 11072 }, { - "epoch": 0.314216799091941, + "epoch": 0.31378049817223497, "grad_norm": 0.0, - "learning_rate": 1.605456753711834e-05, - "loss": 0.835, + "learning_rate": 1.606556488883929e-05, + "loss": 0.9164, "step": 11073 }, { - "epoch": 0.31424517593643586, + "epoch": 0.31380883561449746, "grad_norm": 0.0, - "learning_rate": 1.6053836037343125e-05, - "loss": 1.0737, + "learning_rate": 1.6064835183756654e-05, + "loss": 0.991, "step": 11074 }, { - "epoch": 0.3142735527809308, + "epoch": 0.3138371730567599, "grad_norm": 0.0, - "learning_rate": 1.6053104486430887e-05, - "loss": 0.943, + "learning_rate": 1.6064105427587654e-05, + "loss": 0.8837, "step": 11075 }, { - "epoch": 0.31430192962542564, + "epoch": 0.31386551049902234, "grad_norm": 0.0, - "learning_rate": 1.6052372884387804e-05, - "loss": 0.9262, + "learning_rate": 1.6063375620338448e-05, + "loss": 1.0425, "step": 11076 }, { - "epoch": 0.31433030646992055, + "epoch": 0.31389384794128483, "grad_norm": 0.0, - "learning_rate": 1.6051641231220055e-05, - "loss": 0.9447, + "learning_rate": 1.6062645762015174e-05, + "loss": 0.9214, "step": 11077 }, { - "epoch": 0.3143586833144154, + "epoch": 0.31392218538354727, "grad_norm": 0.0, - "learning_rate": 1.605090952693382e-05, - "loss": 1.0289, + "learning_rate": 1.6061915852623982e-05, + "loss": 0.8752, "step": 11078 }, { - "epoch": 0.3143870601589103, + "epoch": 0.31395052282580976, "grad_norm": 0.0, - "learning_rate": 1.605017777153528e-05, - "loss": 0.908, + "learning_rate": 1.606118589217102e-05, + "loss": 1.1251, "step": 11079 }, { - "epoch": 0.31441543700340524, + "epoch": 0.3139788602680722, "grad_norm": 0.0, - "learning_rate": 1.6049445965030616e-05, - "loss": 0.9991, + "learning_rate": 1.6060455880662446e-05, + "loss": 0.9847, "step": 11080 }, { - "epoch": 0.3144438138479001, + "epoch": 0.31400719771033464, "grad_norm": 0.0, - "learning_rate": 1.604871410742601e-05, - "loss": 1.0103, + "learning_rate": 1.6059725818104393e-05, + "loss": 0.9699, "step": 11081 }, { - "epoch": 0.314472190692395, + "epoch": 0.31403553515259713, "grad_norm": 0.0, - "learning_rate": 1.604798219872764e-05, - "loss": 0.9304, + "learning_rate": 1.6058995704503028e-05, + "loss": 0.9613, "step": 11082 }, { - "epoch": 0.3145005675368899, + "epoch": 0.31406387259485957, "grad_norm": 0.0, - "learning_rate": 1.60472502389417e-05, - "loss": 1.0641, + "learning_rate": 1.6058265539864488e-05, + "loss": 0.9725, "step": 11083 }, { - "epoch": 0.3145289443813848, + "epoch": 0.31409221003712207, "grad_norm": 0.0, - "learning_rate": 1.604651822807436e-05, - "loss": 0.9549, + "learning_rate": 1.6057535324194928e-05, + "loss": 0.9831, "step": 11084 }, { - "epoch": 0.3145573212258797, + "epoch": 0.3141205474793845, "grad_norm": 0.0, - "learning_rate": 1.604578616613181e-05, - "loss": 0.9227, + "learning_rate": 1.6056805057500495e-05, + "loss": 0.9919, "step": 11085 }, { - "epoch": 0.31458569807037456, + "epoch": 0.314148884921647, "grad_norm": 0.0, - "learning_rate": 1.6045054053120235e-05, - "loss": 0.9078, + "learning_rate": 1.6056074739787347e-05, + "loss": 1.036, "step": 11086 }, { - "epoch": 0.3146140749148695, + "epoch": 0.31417722236390944, "grad_norm": 0.0, - "learning_rate": 1.6044321889045814e-05, - "loss": 0.925, + "learning_rate": 1.6055344371061633e-05, + "loss": 0.9932, "step": 11087 }, { - "epoch": 0.31464245175936434, + "epoch": 0.3142055598061719, "grad_norm": 0.0, - "learning_rate": 1.6043589673914737e-05, - "loss": 0.938, + "learning_rate": 1.6054613951329506e-05, + "loss": 0.9195, "step": 11088 }, { - "epoch": 0.31467082860385925, + "epoch": 0.31423389724843437, "grad_norm": 0.0, - "learning_rate": 1.6042857407733184e-05, - "loss": 0.8815, + "learning_rate": 1.6053883480597115e-05, + "loss": 0.8213, "step": 11089 }, { - "epoch": 0.31469920544835417, + "epoch": 0.3142622346906968, "grad_norm": 0.0, - "learning_rate": 1.6042125090507343e-05, - "loss": 0.903, + "learning_rate": 1.6053152958870617e-05, + "loss": 0.9507, "step": 11090 }, { - "epoch": 0.31472758229284903, + "epoch": 0.3142905721329593, "grad_norm": 0.0, - "learning_rate": 1.60413927222434e-05, - "loss": 0.9866, + "learning_rate": 1.6052422386156164e-05, + "loss": 1.0723, "step": 11091 }, { - "epoch": 0.31475595913734394, + "epoch": 0.31431890957522174, "grad_norm": 0.0, - "learning_rate": 1.604066030294754e-05, - "loss": 0.9017, + "learning_rate": 1.605169176245991e-05, + "loss": 0.9705, "step": 11092 }, { - "epoch": 0.3147843359818388, + "epoch": 0.3143472470174842, "grad_norm": 0.0, - "learning_rate": 1.6039927832625953e-05, - "loss": 1.0692, + "learning_rate": 1.6050961087788005e-05, + "loss": 0.907, "step": 11093 }, { - "epoch": 0.3148127128263337, + "epoch": 0.31437558445974667, "grad_norm": 0.0, - "learning_rate": 1.6039195311284825e-05, - "loss": 0.8818, + "learning_rate": 1.605023036214661e-05, + "loss": 1.0142, "step": 11094 }, { - "epoch": 0.3148410896708286, + "epoch": 0.3144039219020091, "grad_norm": 0.0, - "learning_rate": 1.6038462738930342e-05, - "loss": 0.8592, + "learning_rate": 1.604949958554188e-05, + "loss": 0.8886, "step": 11095 }, { - "epoch": 0.3148694665153235, + "epoch": 0.3144322593442716, "grad_norm": 0.0, - "learning_rate": 1.6037730115568687e-05, - "loss": 0.9781, + "learning_rate": 1.6048768757979966e-05, + "loss": 1.0178, "step": 11096 }, { - "epoch": 0.3148978433598184, + "epoch": 0.31446059678653404, "grad_norm": 0.0, - "learning_rate": 1.6036997441206063e-05, - "loss": 1.0569, + "learning_rate": 1.6048037879467025e-05, + "loss": 1.0071, "step": 11097 }, { - "epoch": 0.31492622020431327, + "epoch": 0.31448893422879654, "grad_norm": 0.0, - "learning_rate": 1.6036264715848644e-05, - "loss": 1.014, + "learning_rate": 1.6047306950009217e-05, + "loss": 0.8659, "step": 11098 }, { - "epoch": 0.3149545970488082, + "epoch": 0.314517271671059, "grad_norm": 0.0, - "learning_rate": 1.603553193950263e-05, - "loss": 0.9374, + "learning_rate": 1.60465759696127e-05, + "loss": 1.1271, "step": 11099 }, { - "epoch": 0.31498297389330304, + "epoch": 0.3145456091133214, "grad_norm": 0.0, - "learning_rate": 1.6034799112174205e-05, - "loss": 0.9561, + "learning_rate": 1.604584493828363e-05, + "loss": 0.9526, "step": 11100 }, { - "epoch": 0.31501135073779796, + "epoch": 0.3145739465555839, "grad_norm": 0.0, - "learning_rate": 1.6034066233869557e-05, - "loss": 0.9005, + "learning_rate": 1.6045113856028155e-05, + "loss": 0.9027, "step": 11101 }, { - "epoch": 0.31503972758229287, + "epoch": 0.31460228399784634, "grad_norm": 0.0, - "learning_rate": 1.6033333304594886e-05, - "loss": 0.9993, + "learning_rate": 1.6044382722852445e-05, + "loss": 0.9265, "step": 11102 }, { - "epoch": 0.31506810442678773, + "epoch": 0.31463062144010884, "grad_norm": 0.0, - "learning_rate": 1.6032600324356373e-05, - "loss": 0.8974, + "learning_rate": 1.6043651538762658e-05, + "loss": 0.9945, "step": 11103 }, { - "epoch": 0.31509648127128265, + "epoch": 0.3146589588823713, "grad_norm": 0.0, - "learning_rate": 1.6031867293160215e-05, - "loss": 1.0151, + "learning_rate": 1.6042920303764944e-05, + "loss": 0.9255, "step": 11104 }, { - "epoch": 0.3151248581157775, + "epoch": 0.3146872963246337, "grad_norm": 0.0, - "learning_rate": 1.6031134211012603e-05, - "loss": 0.944, + "learning_rate": 1.6042189017865473e-05, + "loss": 1.0232, "step": 11105 }, { - "epoch": 0.3151532349602724, + "epoch": 0.3147156337668962, "grad_norm": 0.0, - "learning_rate": 1.6030401077919727e-05, - "loss": 1.004, + "learning_rate": 1.6041457681070395e-05, + "loss": 0.9444, "step": 11106 }, { - "epoch": 0.31518161180476734, + "epoch": 0.31474397120915865, "grad_norm": 0.0, - "learning_rate": 1.6029667893887787e-05, - "loss": 0.8523, + "learning_rate": 1.604072629338588e-05, + "loss": 1.085, "step": 11107 }, { - "epoch": 0.3152099886492622, + "epoch": 0.31477230865142114, "grad_norm": 0.0, - "learning_rate": 1.6028934658922967e-05, - "loss": 0.8255, + "learning_rate": 1.603999485481808e-05, + "loss": 0.9457, "step": 11108 }, { - "epoch": 0.3152383654937571, + "epoch": 0.3148006460936836, "grad_norm": 0.0, - "learning_rate": 1.6028201373031466e-05, - "loss": 0.924, + "learning_rate": 1.6039263365373167e-05, + "loss": 0.937, "step": 11109 }, { - "epoch": 0.31526674233825197, + "epoch": 0.31482898353594607, "grad_norm": 0.0, - "learning_rate": 1.602746803621948e-05, - "loss": 1.0176, + "learning_rate": 1.6038531825057295e-05, + "loss": 0.9064, "step": 11110 }, { - "epoch": 0.3152951191827469, + "epoch": 0.3148573209782085, "grad_norm": 0.0, - "learning_rate": 1.6026734648493195e-05, - "loss": 0.924, + "learning_rate": 1.6037800233876623e-05, + "loss": 0.9475, "step": 11111 }, { - "epoch": 0.31532349602724175, + "epoch": 0.31488565842047095, "grad_norm": 0.0, - "learning_rate": 1.6026001209858818e-05, - "loss": 0.9449, + "learning_rate": 1.6037068591837318e-05, + "loss": 0.9488, "step": 11112 }, { - "epoch": 0.31535187287173666, + "epoch": 0.31491399586273344, "grad_norm": 0.0, - "learning_rate": 1.602526772032253e-05, - "loss": 0.982, + "learning_rate": 1.6036336898945543e-05, + "loss": 1.0397, "step": 11113 }, { - "epoch": 0.3153802497162316, + "epoch": 0.3149423333049959, "grad_norm": 0.0, - "learning_rate": 1.6024534179890542e-05, - "loss": 1.1372, + "learning_rate": 1.603560515520746e-05, + "loss": 0.9149, "step": 11114 }, { - "epoch": 0.31540862656072643, + "epoch": 0.3149706707472584, "grad_norm": 0.0, - "learning_rate": 1.602380058856904e-05, - "loss": 0.8728, + "learning_rate": 1.6034873360629238e-05, + "loss": 0.916, "step": 11115 }, { - "epoch": 0.31543700340522135, + "epoch": 0.3149990081895208, "grad_norm": 0.0, - "learning_rate": 1.6023066946364223e-05, - "loss": 0.9366, + "learning_rate": 1.603414151521703e-05, + "loss": 0.9226, "step": 11116 }, { - "epoch": 0.3154653802497162, + "epoch": 0.31502734563178325, "grad_norm": 0.0, - "learning_rate": 1.602233325328229e-05, - "loss": 0.9782, + "learning_rate": 1.6033409618977013e-05, + "loss": 0.8732, "step": 11117 }, { - "epoch": 0.3154937570942111, + "epoch": 0.31505568307404574, "grad_norm": 0.0, - "learning_rate": 1.602159950932944e-05, - "loss": 0.969, + "learning_rate": 1.6032677671915343e-05, + "loss": 1.0107, "step": 11118 }, { - "epoch": 0.31552213393870604, + "epoch": 0.3150840205163082, "grad_norm": 0.0, - "learning_rate": 1.6020865714511863e-05, - "loss": 1.0166, + "learning_rate": 1.603194567403819e-05, + "loss": 0.9371, "step": 11119 }, { - "epoch": 0.3155505107832009, + "epoch": 0.3151123579585707, "grad_norm": 0.0, - "learning_rate": 1.6020131868835762e-05, - "loss": 0.8088, + "learning_rate": 1.603121362535172e-05, + "loss": 0.9312, "step": 11120 }, { - "epoch": 0.3155788876276958, + "epoch": 0.3151406954008331, "grad_norm": 0.0, - "learning_rate": 1.601939797230734e-05, - "loss": 0.8344, + "learning_rate": 1.6030481525862096e-05, + "loss": 1.0519, "step": 11121 }, { - "epoch": 0.3156072644721907, + "epoch": 0.3151690328430956, "grad_norm": 0.0, - "learning_rate": 1.6018664024932792e-05, - "loss": 1.022, + "learning_rate": 1.6029749375575487e-05, + "loss": 0.9714, "step": 11122 }, { - "epoch": 0.3156356413166856, + "epoch": 0.31519737028535805, "grad_norm": 0.0, - "learning_rate": 1.601793002671832e-05, - "loss": 0.9823, + "learning_rate": 1.6029017174498062e-05, + "loss": 0.942, "step": 11123 }, { - "epoch": 0.3156640181611805, + "epoch": 0.3152257077276205, "grad_norm": 0.0, - "learning_rate": 1.601719597767012e-05, - "loss": 0.9303, + "learning_rate": 1.602828492263598e-05, + "loss": 0.9151, "step": 11124 }, { - "epoch": 0.31569239500567536, + "epoch": 0.315254045169883, "grad_norm": 0.0, - "learning_rate": 1.6016461877794397e-05, - "loss": 0.8902, + "learning_rate": 1.6027552619995423e-05, + "loss": 0.9197, "step": 11125 }, { - "epoch": 0.3157207718501703, + "epoch": 0.3152823826121454, "grad_norm": 0.0, - "learning_rate": 1.601572772709735e-05, - "loss": 0.9354, + "learning_rate": 1.602682026658255e-05, + "loss": 0.9536, "step": 11126 }, { - "epoch": 0.31574914869466514, + "epoch": 0.3153107200544079, "grad_norm": 0.0, - "learning_rate": 1.6014993525585176e-05, - "loss": 1.074, + "learning_rate": 1.602608786240353e-05, + "loss": 1.0097, "step": 11127 }, { - "epoch": 0.31577752553916005, + "epoch": 0.31533905749667035, "grad_norm": 0.0, - "learning_rate": 1.6014259273264085e-05, - "loss": 0.9233, + "learning_rate": 1.6025355407464536e-05, + "loss": 0.9339, "step": 11128 }, { - "epoch": 0.3158059023836549, + "epoch": 0.3153673949389328, "grad_norm": 0.0, - "learning_rate": 1.6013524970140276e-05, - "loss": 0.9483, + "learning_rate": 1.6024622901771736e-05, + "loss": 0.9501, "step": 11129 }, { - "epoch": 0.31583427922814983, + "epoch": 0.3153957323811953, "grad_norm": 0.0, - "learning_rate": 1.601279061621995e-05, - "loss": 0.8837, + "learning_rate": 1.6023890345331297e-05, + "loss": 0.9996, "step": 11130 }, { - "epoch": 0.31586265607264474, + "epoch": 0.3154240698234577, "grad_norm": 0.0, - "learning_rate": 1.6012056211509312e-05, - "loss": 0.8457, + "learning_rate": 1.6023157738149398e-05, + "loss": 0.9197, "step": 11131 }, { - "epoch": 0.3158910329171396, + "epoch": 0.3154524072657202, "grad_norm": 0.0, - "learning_rate": 1.6011321756014566e-05, - "loss": 1.0807, + "learning_rate": 1.60224250802322e-05, + "loss": 0.9652, "step": 11132 }, { - "epoch": 0.3159194097616345, + "epoch": 0.31548074470798265, "grad_norm": 0.0, - "learning_rate": 1.6010587249741915e-05, - "loss": 0.8654, + "learning_rate": 1.6021692371585884e-05, + "loss": 0.9342, "step": 11133 }, { - "epoch": 0.3159477866061294, + "epoch": 0.31550908215024515, "grad_norm": 0.0, - "learning_rate": 1.600985269269756e-05, - "loss": 0.9647, + "learning_rate": 1.6020959612216613e-05, + "loss": 0.9273, "step": 11134 }, { - "epoch": 0.3159761634506243, + "epoch": 0.3155374195925076, "grad_norm": 0.0, - "learning_rate": 1.6009118084887712e-05, - "loss": 0.8101, + "learning_rate": 1.6020226802130566e-05, + "loss": 0.9723, "step": 11135 }, { - "epoch": 0.3160045402951192, + "epoch": 0.31556575703477, "grad_norm": 0.0, - "learning_rate": 1.600838342631857e-05, - "loss": 0.9031, + "learning_rate": 1.6019493941333907e-05, + "loss": 0.9407, "step": 11136 }, { - "epoch": 0.31603291713961407, + "epoch": 0.3155940944770325, "grad_norm": 0.0, - "learning_rate": 1.6007648716996348e-05, - "loss": 0.9034, + "learning_rate": 1.6018761029832822e-05, + "loss": 1.0473, "step": 11137 }, { - "epoch": 0.316061293984109, + "epoch": 0.31562243191929495, "grad_norm": 0.0, - "learning_rate": 1.6006913956927242e-05, - "loss": 0.9301, + "learning_rate": 1.6018028067633478e-05, + "loss": 1.0156, "step": 11138 }, { - "epoch": 0.31608967082860384, + "epoch": 0.31565076936155745, "grad_norm": 0.0, - "learning_rate": 1.600617914611747e-05, - "loss": 1.014, + "learning_rate": 1.6017295054742045e-05, + "loss": 0.978, "step": 11139 }, { - "epoch": 0.31611804767309876, + "epoch": 0.3156791068038199, "grad_norm": 0.0, - "learning_rate": 1.600544428457323e-05, - "loss": 0.8989, + "learning_rate": 1.6016561991164702e-05, + "loss": 1.0143, "step": 11140 }, { - "epoch": 0.3161464245175936, + "epoch": 0.3157074442460823, "grad_norm": 0.0, - "learning_rate": 1.6004709372300732e-05, - "loss": 0.8125, + "learning_rate": 1.6015828876907623e-05, + "loss": 0.9926, "step": 11141 }, { - "epoch": 0.31617480136208853, + "epoch": 0.3157357816883448, "grad_norm": 0.0, - "learning_rate": 1.600397440930618e-05, - "loss": 0.9567, + "learning_rate": 1.6015095711976988e-05, + "loss": 0.9002, "step": 11142 }, { - "epoch": 0.31620317820658345, + "epoch": 0.31576411913060726, "grad_norm": 0.0, - "learning_rate": 1.600323939559579e-05, - "loss": 0.9727, + "learning_rate": 1.6014362496378962e-05, + "loss": 1.0053, "step": 11143 }, { - "epoch": 0.3162315550510783, + "epoch": 0.31579245657286975, "grad_norm": 0.0, - "learning_rate": 1.600250433117577e-05, - "loss": 0.9701, + "learning_rate": 1.601362923011973e-05, + "loss": 0.983, "step": 11144 }, { - "epoch": 0.3162599318955732, + "epoch": 0.3158207940151322, "grad_norm": 0.0, - "learning_rate": 1.6001769216052323e-05, - "loss": 0.9313, + "learning_rate": 1.6012895913205465e-05, + "loss": 0.9566, "step": 11145 }, { - "epoch": 0.3162883087400681, + "epoch": 0.3158491314573947, "grad_norm": 0.0, - "learning_rate": 1.600103405023166e-05, - "loss": 0.9257, + "learning_rate": 1.6012162545642346e-05, + "loss": 0.8363, "step": 11146 }, { - "epoch": 0.316316685584563, + "epoch": 0.3158774688996571, "grad_norm": 0.0, - "learning_rate": 1.6000298833719996e-05, - "loss": 0.8689, + "learning_rate": 1.6011429127436547e-05, + "loss": 0.959, "step": 11147 }, { - "epoch": 0.3163450624290579, + "epoch": 0.31590580634191956, "grad_norm": 0.0, - "learning_rate": 1.5999563566523537e-05, - "loss": 0.926, + "learning_rate": 1.6010695658594255e-05, + "loss": 0.9626, "step": 11148 }, { - "epoch": 0.31637343927355277, + "epoch": 0.31593414378418205, "grad_norm": 0.0, - "learning_rate": 1.5998828248648494e-05, - "loss": 0.8802, + "learning_rate": 1.6009962139121635e-05, + "loss": 1.0798, "step": 11149 }, { - "epoch": 0.3164018161180477, + "epoch": 0.3159624812264445, "grad_norm": 0.0, - "learning_rate": 1.5998092880101076e-05, - "loss": 0.9551, + "learning_rate": 1.6009228569024875e-05, + "loss": 1.0369, "step": 11150 }, { - "epoch": 0.31643019296254254, + "epoch": 0.315990818668707, "grad_norm": 0.0, - "learning_rate": 1.5997357460887502e-05, - "loss": 1.0917, + "learning_rate": 1.6008494948310147e-05, + "loss": 0.9391, "step": 11151 }, { - "epoch": 0.31645856980703746, + "epoch": 0.3160191561109694, "grad_norm": 0.0, - "learning_rate": 1.5996621991013982e-05, - "loss": 0.9923, + "learning_rate": 1.600776127698364e-05, + "loss": 0.8828, "step": 11152 }, { - "epoch": 0.3164869466515324, + "epoch": 0.31604749355323186, "grad_norm": 0.0, - "learning_rate": 1.5995886470486723e-05, - "loss": 0.8795, + "learning_rate": 1.6007027555051524e-05, + "loss": 0.912, "step": 11153 }, { - "epoch": 0.31651532349602723, + "epoch": 0.31607583099549436, "grad_norm": 0.0, - "learning_rate": 1.5995150899311938e-05, - "loss": 0.964, + "learning_rate": 1.6006293782519988e-05, + "loss": 1.0639, "step": 11154 }, { - "epoch": 0.31654370034052215, + "epoch": 0.3161041684377568, "grad_norm": 0.0, - "learning_rate": 1.5994415277495852e-05, - "loss": 0.8153, + "learning_rate": 1.600555995939521e-05, + "loss": 0.9885, "step": 11155 }, { - "epoch": 0.316572077185017, + "epoch": 0.3161325058800193, "grad_norm": 0.0, - "learning_rate": 1.5993679605044662e-05, - "loss": 0.8934, + "learning_rate": 1.6004826085683367e-05, + "loss": 0.9191, "step": 11156 }, { - "epoch": 0.3166004540295119, + "epoch": 0.3161608433222817, "grad_norm": 0.0, - "learning_rate": 1.5992943881964594e-05, - "loss": 0.8784, + "learning_rate": 1.6004092161390645e-05, + "loss": 1.0953, "step": 11157 }, { - "epoch": 0.3166288308740068, + "epoch": 0.3161891807645442, "grad_norm": 0.0, - "learning_rate": 1.5992208108261864e-05, - "loss": 0.8962, + "learning_rate": 1.6003358186523226e-05, + "loss": 0.8805, "step": 11158 }, { - "epoch": 0.3166572077185017, + "epoch": 0.31621751820680666, "grad_norm": 0.0, - "learning_rate": 1.599147228394268e-05, - "loss": 0.9931, + "learning_rate": 1.6002624161087293e-05, + "loss": 1.0164, "step": 11159 }, { - "epoch": 0.3166855845629966, + "epoch": 0.3162458556490691, "grad_norm": 0.0, - "learning_rate": 1.599073640901326e-05, - "loss": 0.9657, + "learning_rate": 1.6001890085089026e-05, + "loss": 0.9383, "step": 11160 }, { - "epoch": 0.3167139614074915, + "epoch": 0.3162741930913316, "grad_norm": 0.0, - "learning_rate": 1.599000048347982e-05, - "loss": 1.0274, + "learning_rate": 1.6001155958534608e-05, + "loss": 0.9854, "step": 11161 }, { - "epoch": 0.3167423382519864, + "epoch": 0.31630253053359403, "grad_norm": 0.0, - "learning_rate": 1.5989264507348576e-05, - "loss": 1.003, + "learning_rate": 1.600042178143023e-05, + "loss": 0.9732, "step": 11162 }, { - "epoch": 0.31677071509648125, + "epoch": 0.3163308679758565, "grad_norm": 0.0, - "learning_rate": 1.598852848062575e-05, - "loss": 1.0076, + "learning_rate": 1.5999687553782067e-05, + "loss": 0.8991, "step": 11163 }, { - "epoch": 0.31679909194097616, + "epoch": 0.31635920541811896, "grad_norm": 0.0, - "learning_rate": 1.598779240331755e-05, - "loss": 1.0659, + "learning_rate": 1.5998953275596307e-05, + "loss": 0.9198, "step": 11164 }, { - "epoch": 0.3168274687854711, + "epoch": 0.3163875428603814, "grad_norm": 0.0, - "learning_rate": 1.5987056275430196e-05, - "loss": 0.9701, + "learning_rate": 1.599821894687914e-05, + "loss": 0.9935, "step": 11165 }, { - "epoch": 0.31685584562996594, + "epoch": 0.3164158803026439, "grad_norm": 0.0, - "learning_rate": 1.5986320096969915e-05, - "loss": 0.97, + "learning_rate": 1.5997484567636744e-05, + "loss": 0.9323, "step": 11166 }, { - "epoch": 0.31688422247446085, + "epoch": 0.31644421774490633, "grad_norm": 0.0, - "learning_rate": 1.5985583867942916e-05, - "loss": 0.9399, + "learning_rate": 1.599675013787531e-05, + "loss": 1.034, "step": 11167 }, { - "epoch": 0.3169125993189557, + "epoch": 0.3164725551871688, "grad_norm": 0.0, - "learning_rate": 1.5984847588355423e-05, - "loss": 1.0328, + "learning_rate": 1.5996015657601023e-05, + "loss": 0.7988, "step": 11168 }, { - "epoch": 0.3169409761634506, + "epoch": 0.31650089262943126, "grad_norm": 0.0, - "learning_rate": 1.598411125821365e-05, - "loss": 1.0053, + "learning_rate": 1.5995281126820067e-05, + "loss": 1.0794, "step": 11169 }, { - "epoch": 0.31696935300794554, + "epoch": 0.3165292300716937, "grad_norm": 0.0, - "learning_rate": 1.598337487752382e-05, - "loss": 1.0141, + "learning_rate": 1.5994546545538634e-05, + "loss": 0.9459, "step": 11170 }, { - "epoch": 0.3169977298524404, + "epoch": 0.3165575675139562, "grad_norm": 0.0, - "learning_rate": 1.598263844629216e-05, - "loss": 1.0558, + "learning_rate": 1.599381191376291e-05, + "loss": 0.8625, "step": 11171 }, { - "epoch": 0.3170261066969353, + "epoch": 0.31658590495621863, "grad_norm": 0.0, - "learning_rate": 1.5981901964524877e-05, - "loss": 0.8681, + "learning_rate": 1.599307723149908e-05, + "loss": 0.9401, "step": 11172 }, { - "epoch": 0.3170544835414302, + "epoch": 0.3166142423984811, "grad_norm": 0.0, - "learning_rate": 1.5981165432228197e-05, - "loss": 0.8975, + "learning_rate": 1.5992342498753336e-05, + "loss": 0.9321, "step": 11173 }, { - "epoch": 0.3170828603859251, + "epoch": 0.31664257984074357, "grad_norm": 0.0, - "learning_rate": 1.598042884940835e-05, - "loss": 1.0048, + "learning_rate": 1.599160771553187e-05, + "loss": 0.9519, "step": 11174 }, { - "epoch": 0.31711123723041995, + "epoch": 0.31667091728300606, "grad_norm": 0.0, - "learning_rate": 1.597969221607155e-05, - "loss": 0.8887, + "learning_rate": 1.599087288184086e-05, + "loss": 0.9279, "step": 11175 }, { - "epoch": 0.31713961407491487, + "epoch": 0.3166992547252685, "grad_norm": 0.0, - "learning_rate": 1.5978955532224022e-05, - "loss": 0.9715, + "learning_rate": 1.5990137997686508e-05, + "loss": 0.9596, "step": 11176 }, { - "epoch": 0.3171679909194098, + "epoch": 0.31672759216753094, "grad_norm": 0.0, - "learning_rate": 1.5978218797871987e-05, - "loss": 0.9063, + "learning_rate": 1.5989403063074998e-05, + "loss": 0.8962, "step": 11177 }, { - "epoch": 0.31719636776390464, + "epoch": 0.31675592960979343, "grad_norm": 0.0, - "learning_rate": 1.597748201302167e-05, - "loss": 0.9451, + "learning_rate": 1.5988668078012525e-05, + "loss": 0.9957, "step": 11178 }, { - "epoch": 0.31722474460839956, + "epoch": 0.31678426705205587, "grad_norm": 0.0, - "learning_rate": 1.597674517767929e-05, - "loss": 0.9375, + "learning_rate": 1.5987933042505272e-05, + "loss": 0.956, "step": 11179 }, { - "epoch": 0.3172531214528944, + "epoch": 0.31681260449431836, "grad_norm": 0.0, - "learning_rate": 1.5976008291851076e-05, - "loss": 0.8923, + "learning_rate": 1.5987197956559434e-05, + "loss": 0.9939, "step": 11180 }, { - "epoch": 0.31728149829738933, + "epoch": 0.3168409419365808, "grad_norm": 0.0, - "learning_rate": 1.5975271355543252e-05, - "loss": 1.0171, + "learning_rate": 1.598646282018121e-05, + "loss": 0.9485, "step": 11181 }, { - "epoch": 0.31730987514188425, + "epoch": 0.31686927937884324, "grad_norm": 0.0, - "learning_rate": 1.5974534368762045e-05, - "loss": 0.9459, + "learning_rate": 1.5985727633376783e-05, + "loss": 1.0709, "step": 11182 }, { - "epoch": 0.3173382519863791, + "epoch": 0.31689761682110573, "grad_norm": 0.0, - "learning_rate": 1.5973797331513674e-05, - "loss": 1.0429, + "learning_rate": 1.598499239615235e-05, + "loss": 0.9456, "step": 11183 }, { - "epoch": 0.317366628830874, + "epoch": 0.31692595426336817, "grad_norm": 0.0, - "learning_rate": 1.597306024380437e-05, - "loss": 0.8542, + "learning_rate": 1.5984257108514107e-05, + "loss": 0.972, "step": 11184 }, { - "epoch": 0.3173950056753689, + "epoch": 0.31695429170563066, "grad_norm": 0.0, - "learning_rate": 1.5972323105640356e-05, - "loss": 0.8467, + "learning_rate": 1.598352177046824e-05, + "loss": 0.9093, "step": 11185 }, { - "epoch": 0.3174233825198638, + "epoch": 0.3169826291478931, "grad_norm": 0.0, - "learning_rate": 1.5971585917027864e-05, - "loss": 0.9766, + "learning_rate": 1.598278638202095e-05, + "loss": 0.8973, "step": 11186 }, { - "epoch": 0.3174517593643587, + "epoch": 0.3170109665901556, "grad_norm": 0.0, - "learning_rate": 1.5970848677973117e-05, - "loss": 0.9102, + "learning_rate": 1.5982050943178428e-05, + "loss": 1.0328, "step": 11187 }, { - "epoch": 0.31748013620885357, + "epoch": 0.31703930403241803, "grad_norm": 0.0, - "learning_rate": 1.5970111388482335e-05, - "loss": 0.9228, + "learning_rate": 1.5981315453946867e-05, + "loss": 1.0303, "step": 11188 }, { - "epoch": 0.3175085130533485, + "epoch": 0.3170676414746805, "grad_norm": 0.0, - "learning_rate": 1.5969374048561763e-05, - "loss": 0.9862, + "learning_rate": 1.598057991433247e-05, + "loss": 0.8381, "step": 11189 }, { - "epoch": 0.31753688989784334, + "epoch": 0.31709597891694297, "grad_norm": 0.0, - "learning_rate": 1.5968636658217614e-05, - "loss": 1.0428, + "learning_rate": 1.5979844324341424e-05, + "loss": 0.9502, "step": 11190 }, { - "epoch": 0.31756526674233826, + "epoch": 0.3171243163592054, "grad_norm": 0.0, - "learning_rate": 1.5967899217456126e-05, - "loss": 0.9112, + "learning_rate": 1.5979108683979928e-05, + "loss": 1.0017, "step": 11191 }, { - "epoch": 0.3175936435868331, + "epoch": 0.3171526538014679, "grad_norm": 0.0, - "learning_rate": 1.5967161726283527e-05, - "loss": 0.9984, + "learning_rate": 1.597837299325418e-05, + "loss": 0.929, "step": 11192 }, { - "epoch": 0.31762202043132803, + "epoch": 0.31718099124373034, "grad_norm": 0.0, - "learning_rate": 1.5966424184706043e-05, - "loss": 1.0514, + "learning_rate": 1.5977637252170377e-05, + "loss": 0.9437, "step": 11193 }, { - "epoch": 0.31765039727582295, + "epoch": 0.3172093286859928, "grad_norm": 0.0, - "learning_rate": 1.5965686592729903e-05, - "loss": 0.9409, + "learning_rate": 1.5976901460734714e-05, + "loss": 1.0166, "step": 11194 }, { - "epoch": 0.3176787741203178, + "epoch": 0.31723766612825527, "grad_norm": 0.0, - "learning_rate": 1.5964948950361343e-05, - "loss": 0.9622, + "learning_rate": 1.597616561895339e-05, + "loss": 0.9189, "step": 11195 }, { - "epoch": 0.3177071509648127, + "epoch": 0.3172660035705177, "grad_norm": 0.0, - "learning_rate": 1.5964211257606587e-05, - "loss": 0.9713, + "learning_rate": 1.597542972683261e-05, + "loss": 0.9754, "step": 11196 }, { - "epoch": 0.3177355278093076, + "epoch": 0.3172943410127802, "grad_norm": 0.0, - "learning_rate": 1.5963473514471874e-05, - "loss": 0.9467, + "learning_rate": 1.597469378437856e-05, + "loss": 0.8357, "step": 11197 }, { - "epoch": 0.3177639046538025, + "epoch": 0.31732267845504264, "grad_norm": 0.0, - "learning_rate": 1.5962735720963434e-05, - "loss": 0.963, + "learning_rate": 1.5973957791597445e-05, + "loss": 0.8558, "step": 11198 }, { - "epoch": 0.3177922814982974, + "epoch": 0.31735101589730513, "grad_norm": 0.0, - "learning_rate": 1.5961997877087495e-05, - "loss": 1.0161, + "learning_rate": 1.5973221748495472e-05, + "loss": 1.0029, "step": 11199 }, { - "epoch": 0.3178206583427923, + "epoch": 0.31737935333956757, "grad_norm": 0.0, - "learning_rate": 1.5961259982850293e-05, - "loss": 0.9849, + "learning_rate": 1.5972485655078828e-05, + "loss": 0.9626, "step": 11200 }, { - "epoch": 0.3178490351872872, + "epoch": 0.31740769078183, "grad_norm": 0.0, - "learning_rate": 1.5960522038258057e-05, - "loss": 0.9231, + "learning_rate": 1.597174951135372e-05, + "loss": 0.9936, "step": 11201 }, { - "epoch": 0.31787741203178205, + "epoch": 0.3174360282240925, "grad_norm": 0.0, - "learning_rate": 1.5959784043317027e-05, - "loss": 0.9852, + "learning_rate": 1.597101331732635e-05, + "loss": 0.9205, "step": 11202 }, { - "epoch": 0.31790578887627696, + "epoch": 0.31746436566635494, "grad_norm": 0.0, - "learning_rate": 1.595904599803343e-05, - "loss": 0.9043, + "learning_rate": 1.597027707300292e-05, + "loss": 1.0005, "step": 11203 }, { - "epoch": 0.3179341657207719, + "epoch": 0.31749270310861744, "grad_norm": 0.0, - "learning_rate": 1.5958307902413505e-05, - "loss": 1.0425, + "learning_rate": 1.5969540778389624e-05, + "loss": 0.9344, "step": 11204 }, { - "epoch": 0.31796254256526674, + "epoch": 0.3175210405508799, "grad_norm": 0.0, - "learning_rate": 1.5957569756463485e-05, - "loss": 1.0155, + "learning_rate": 1.596880443349267e-05, + "loss": 0.9344, "step": 11205 }, { - "epoch": 0.31799091940976165, + "epoch": 0.3175493779931423, "grad_norm": 0.0, - "learning_rate": 1.5956831560189608e-05, - "loss": 0.9029, + "learning_rate": 1.5968068038318266e-05, + "loss": 0.9592, "step": 11206 }, { - "epoch": 0.3180192962542565, + "epoch": 0.3175777154354048, "grad_norm": 0.0, - "learning_rate": 1.59560933135981e-05, - "loss": 1.1194, + "learning_rate": 1.5967331592872604e-05, + "loss": 0.8473, "step": 11207 }, { - "epoch": 0.3180476730987514, + "epoch": 0.31760605287766724, "grad_norm": 0.0, - "learning_rate": 1.5955355016695212e-05, - "loss": 0.9537, + "learning_rate": 1.5966595097161893e-05, + "loss": 1.0316, "step": 11208 }, { - "epoch": 0.3180760499432463, + "epoch": 0.31763439031992974, "grad_norm": 0.0, - "learning_rate": 1.595461666948717e-05, - "loss": 1.0564, + "learning_rate": 1.596585855119233e-05, + "loss": 1.0199, "step": 11209 }, { - "epoch": 0.3181044267877412, + "epoch": 0.3176627277621922, "grad_norm": 0.0, - "learning_rate": 1.5953878271980212e-05, - "loss": 0.8149, + "learning_rate": 1.5965121954970134e-05, + "loss": 0.9047, "step": 11210 }, { - "epoch": 0.3181328036322361, + "epoch": 0.31769106520445467, "grad_norm": 0.0, - "learning_rate": 1.5953139824180576e-05, - "loss": 0.9756, + "learning_rate": 1.5964385308501497e-05, + "loss": 0.9802, "step": 11211 }, { - "epoch": 0.318161180476731, + "epoch": 0.3177194026467171, "grad_norm": 0.0, - "learning_rate": 1.59524013260945e-05, - "loss": 0.9451, + "learning_rate": 1.5963648611792625e-05, + "loss": 1.0214, "step": 11212 }, { - "epoch": 0.3181895573212259, + "epoch": 0.31774774008897955, "grad_norm": 0.0, - "learning_rate": 1.5951662777728225e-05, - "loss": 1.0269, + "learning_rate": 1.596291186484973e-05, + "loss": 1.0195, "step": 11213 }, { - "epoch": 0.31821793416572075, + "epoch": 0.31777607753124204, "grad_norm": 0.0, - "learning_rate": 1.5950924179087985e-05, - "loss": 0.9466, + "learning_rate": 1.5962175067679013e-05, + "loss": 0.8613, "step": 11214 }, { - "epoch": 0.31824631101021567, + "epoch": 0.3178044149735045, "grad_norm": 0.0, - "learning_rate": 1.5950185530180023e-05, - "loss": 0.994, + "learning_rate": 1.596143822028668e-05, + "loss": 0.9602, "step": 11215 }, { - "epoch": 0.3182746878547106, + "epoch": 0.317832752415767, "grad_norm": 0.0, - "learning_rate": 1.5949446831010576e-05, - "loss": 0.921, + "learning_rate": 1.5960701322678943e-05, + "loss": 0.8436, "step": 11216 }, { - "epoch": 0.31830306469920544, + "epoch": 0.3178610898580294, "grad_norm": 0.0, - "learning_rate": 1.5948708081585885e-05, - "loss": 0.9004, + "learning_rate": 1.5959964374862e-05, + "loss": 0.918, "step": 11217 }, { - "epoch": 0.31833144154370036, + "epoch": 0.31788942730029185, "grad_norm": 0.0, - "learning_rate": 1.5947969281912188e-05, - "loss": 0.8983, + "learning_rate": 1.5959227376842067e-05, + "loss": 0.9424, "step": 11218 }, { - "epoch": 0.3183598183881952, + "epoch": 0.31791776474255434, "grad_norm": 0.0, - "learning_rate": 1.5947230431995725e-05, - "loss": 0.9459, + "learning_rate": 1.595849032862535e-05, + "loss": 0.901, "step": 11219 }, { - "epoch": 0.31838819523269013, + "epoch": 0.3179461021848168, "grad_norm": 0.0, - "learning_rate": 1.5946491531842744e-05, - "loss": 0.9922, + "learning_rate": 1.5957753230218052e-05, + "loss": 0.9709, "step": 11220 }, { - "epoch": 0.318416572077185, + "epoch": 0.3179744396270793, "grad_norm": 0.0, - "learning_rate": 1.5945752581459476e-05, - "loss": 0.9477, + "learning_rate": 1.595701608162639e-05, + "loss": 0.8624, "step": 11221 }, { - "epoch": 0.3184449489216799, + "epoch": 0.3180027770693417, "grad_norm": 0.0, - "learning_rate": 1.5945013580852172e-05, - "loss": 0.9196, + "learning_rate": 1.595627888285657e-05, + "loss": 0.9634, "step": 11222 }, { - "epoch": 0.3184733257661748, + "epoch": 0.3180311145116042, "grad_norm": 0.0, - "learning_rate": 1.594427453002707e-05, - "loss": 0.9487, + "learning_rate": 1.5955541633914798e-05, + "loss": 0.9704, "step": 11223 }, { - "epoch": 0.3185017026106697, + "epoch": 0.31805945195386665, "grad_norm": 0.0, - "learning_rate": 1.5943535428990417e-05, - "loss": 0.8608, + "learning_rate": 1.595480433480729e-05, + "loss": 0.9197, "step": 11224 }, { - "epoch": 0.3185300794551646, + "epoch": 0.3180877893961291, "grad_norm": 0.0, - "learning_rate": 1.5942796277748448e-05, - "loss": 0.9597, + "learning_rate": 1.5954066985540257e-05, + "loss": 0.8949, "step": 11225 }, { - "epoch": 0.31855845629965945, + "epoch": 0.3181161268383916, "grad_norm": 0.0, - "learning_rate": 1.5942057076307413e-05, - "loss": 0.8337, + "learning_rate": 1.59533295861199e-05, + "loss": 0.9035, "step": 11226 }, { - "epoch": 0.31858683314415437, + "epoch": 0.318144464280654, "grad_norm": 0.0, - "learning_rate": 1.5941317824673557e-05, - "loss": 0.9199, + "learning_rate": 1.595259213655244e-05, + "loss": 1.0166, "step": 11227 }, { - "epoch": 0.3186152099886493, + "epoch": 0.3181728017229165, "grad_norm": 0.0, - "learning_rate": 1.594057852285312e-05, - "loss": 1.0803, + "learning_rate": 1.5951854636844086e-05, + "loss": 0.9179, "step": 11228 }, { - "epoch": 0.31864358683314414, + "epoch": 0.31820113916517895, "grad_norm": 0.0, - "learning_rate": 1.5939839170852352e-05, - "loss": 1.0274, + "learning_rate": 1.5951117087001048e-05, + "loss": 1.0204, "step": 11229 }, { - "epoch": 0.31867196367763906, + "epoch": 0.3182294766074414, "grad_norm": 0.0, - "learning_rate": 1.5939099768677495e-05, - "loss": 0.9519, + "learning_rate": 1.5950379487029543e-05, + "loss": 1.0002, "step": 11230 }, { - "epoch": 0.3187003405221339, + "epoch": 0.3182578140497039, "grad_norm": 0.0, - "learning_rate": 1.593836031633479e-05, - "loss": 0.9004, + "learning_rate": 1.5949641836935782e-05, + "loss": 0.9315, "step": 11231 }, { - "epoch": 0.31872871736662883, + "epoch": 0.3182861514919663, "grad_norm": 0.0, - "learning_rate": 1.5937620813830495e-05, - "loss": 0.9054, + "learning_rate": 1.594890413672598e-05, + "loss": 1.0259, "step": 11232 }, { - "epoch": 0.31875709421112375, + "epoch": 0.3183144889342288, "grad_norm": 0.0, - "learning_rate": 1.593688126117085e-05, - "loss": 1.0251, + "learning_rate": 1.5948166386406345e-05, + "loss": 0.8803, "step": 11233 }, { - "epoch": 0.3187854710556186, + "epoch": 0.31834282637649125, "grad_norm": 0.0, - "learning_rate": 1.5936141658362098e-05, - "loss": 1.1187, + "learning_rate": 1.59474285859831e-05, + "loss": 1.0532, "step": 11234 }, { - "epoch": 0.3188138479001135, + "epoch": 0.31837116381875374, "grad_norm": 0.0, - "learning_rate": 1.5935402005410493e-05, - "loss": 1.1443, + "learning_rate": 1.5946690735462452e-05, + "loss": 0.919, "step": 11235 }, { - "epoch": 0.3188422247446084, + "epoch": 0.3183995012610162, "grad_norm": 0.0, - "learning_rate": 1.593466230232228e-05, - "loss": 0.9493, + "learning_rate": 1.5945952834850623e-05, + "loss": 0.9473, "step": 11236 }, { - "epoch": 0.3188706015891033, + "epoch": 0.3184278387032786, "grad_norm": 0.0, - "learning_rate": 1.5933922549103708e-05, - "loss": 0.9777, + "learning_rate": 1.5945214884153823e-05, + "loss": 1.0016, "step": 11237 }, { - "epoch": 0.31889897843359816, + "epoch": 0.3184561761455411, "grad_norm": 0.0, - "learning_rate": 1.5933182745761023e-05, - "loss": 0.8681, + "learning_rate": 1.5944476883378274e-05, + "loss": 0.9616, "step": 11238 }, { - "epoch": 0.31892735527809307, + "epoch": 0.31848451358780355, "grad_norm": 0.0, - "learning_rate": 1.593244289230048e-05, - "loss": 0.8682, + "learning_rate": 1.5943738832530183e-05, + "loss": 1.0058, "step": 11239 }, { - "epoch": 0.318955732122588, + "epoch": 0.31851285103006605, "grad_norm": 0.0, - "learning_rate": 1.5931702988728323e-05, - "loss": 0.8554, + "learning_rate": 1.5943000731615777e-05, + "loss": 0.8985, "step": 11240 }, { - "epoch": 0.31898410896708285, + "epoch": 0.3185411884723285, "grad_norm": 0.0, - "learning_rate": 1.5930963035050805e-05, - "loss": 0.9485, + "learning_rate": 1.594226258064127e-05, + "loss": 0.8387, "step": 11241 }, { - "epoch": 0.31901248581157776, + "epoch": 0.3185695259145909, "grad_norm": 0.0, - "learning_rate": 1.5930223031274176e-05, - "loss": 0.879, + "learning_rate": 1.5941524379612878e-05, + "loss": 0.9489, "step": 11242 }, { - "epoch": 0.3190408626560726, + "epoch": 0.3185978633568534, "grad_norm": 0.0, - "learning_rate": 1.5929482977404685e-05, - "loss": 1.0547, + "learning_rate": 1.5940786128536813e-05, + "loss": 0.8919, "step": 11243 }, { - "epoch": 0.31906923950056754, + "epoch": 0.31862620079911586, "grad_norm": 0.0, - "learning_rate": 1.5928742873448585e-05, - "loss": 0.9805, + "learning_rate": 1.5940047827419305e-05, + "loss": 1.0199, "step": 11244 }, { - "epoch": 0.31909761634506245, + "epoch": 0.31865453824137835, "grad_norm": 0.0, - "learning_rate": 1.592800271941213e-05, - "loss": 0.9254, + "learning_rate": 1.593930947626657e-05, + "loss": 0.9463, "step": 11245 }, { - "epoch": 0.3191259931895573, + "epoch": 0.3186828756836408, "grad_norm": 0.0, - "learning_rate": 1.5927262515301565e-05, - "loss": 1.0441, + "learning_rate": 1.5938571075084826e-05, + "loss": 0.9337, "step": 11246 }, { - "epoch": 0.3191543700340522, + "epoch": 0.3187112131259033, "grad_norm": 0.0, - "learning_rate": 1.5926522261123155e-05, - "loss": 0.9436, + "learning_rate": 1.593783262388029e-05, + "loss": 0.9965, "step": 11247 }, { - "epoch": 0.3191827468785471, + "epoch": 0.3187395505681657, "grad_norm": 0.0, - "learning_rate": 1.5925781956883136e-05, - "loss": 0.9808, + "learning_rate": 1.5937094122659187e-05, + "loss": 1.056, "step": 11248 }, { - "epoch": 0.319211123723042, + "epoch": 0.31876788801042816, "grad_norm": 0.0, - "learning_rate": 1.5925041602587773e-05, - "loss": 0.9837, + "learning_rate": 1.5936355571427734e-05, + "loss": 0.9532, "step": 11249 }, { - "epoch": 0.3192395005675369, + "epoch": 0.31879622545269065, "grad_norm": 0.0, - "learning_rate": 1.592430119824332e-05, - "loss": 0.9709, + "learning_rate": 1.5935616970192155e-05, + "loss": 0.9928, "step": 11250 }, { - "epoch": 0.3192678774120318, + "epoch": 0.3188245628949531, "grad_norm": 0.0, - "learning_rate": 1.592356074385603e-05, - "loss": 0.946, + "learning_rate": 1.5934878318958668e-05, + "loss": 1.1124, "step": 11251 }, { - "epoch": 0.3192962542565267, + "epoch": 0.3188529003372156, "grad_norm": 0.0, - "learning_rate": 1.592282023943215e-05, - "loss": 0.9938, + "learning_rate": 1.59341396177335e-05, + "loss": 1.0576, "step": 11252 }, { - "epoch": 0.31932463110102155, + "epoch": 0.318881237779478, "grad_norm": 0.0, - "learning_rate": 1.5922079684977944e-05, - "loss": 0.8816, + "learning_rate": 1.593340086652287e-05, + "loss": 0.9164, "step": 11253 }, { - "epoch": 0.31935300794551646, + "epoch": 0.31890957522174046, "grad_norm": 0.0, - "learning_rate": 1.5921339080499666e-05, - "loss": 0.8569, + "learning_rate": 1.5932662065332996e-05, + "loss": 0.9812, "step": 11254 }, { - "epoch": 0.3193813847900113, + "epoch": 0.31893791266400295, "grad_norm": 0.0, - "learning_rate": 1.592059842600357e-05, - "loss": 0.9272, + "learning_rate": 1.593192321417011e-05, + "loss": 1.0083, "step": 11255 }, { - "epoch": 0.31940976163450624, + "epoch": 0.3189662501062654, "grad_norm": 0.0, - "learning_rate": 1.5919857721495914e-05, - "loss": 1.0296, + "learning_rate": 1.5931184313040437e-05, + "loss": 0.9296, "step": 11256 }, { - "epoch": 0.31943813847900115, + "epoch": 0.3189945875485279, "grad_norm": 0.0, - "learning_rate": 1.5919116966982952e-05, - "loss": 0.9807, + "learning_rate": 1.5930445361950188e-05, + "loss": 0.9393, "step": 11257 }, { - "epoch": 0.319466515323496, + "epoch": 0.3190229249907903, "grad_norm": 0.0, - "learning_rate": 1.5918376162470942e-05, - "loss": 1.0633, + "learning_rate": 1.59297063609056e-05, + "loss": 0.9196, "step": 11258 }, { - "epoch": 0.31949489216799093, + "epoch": 0.3190512624330528, "grad_norm": 0.0, - "learning_rate": 1.5917635307966144e-05, - "loss": 0.966, + "learning_rate": 1.592896730991289e-05, + "loss": 1.0142, "step": 11259 }, { - "epoch": 0.3195232690124858, + "epoch": 0.31907959987531526, "grad_norm": 0.0, - "learning_rate": 1.5916894403474814e-05, - "loss": 0.9666, + "learning_rate": 1.592822820897829e-05, + "loss": 0.9171, "step": 11260 }, { - "epoch": 0.3195516458569807, + "epoch": 0.3191079373175777, "grad_norm": 0.0, - "learning_rate": 1.591615344900321e-05, - "loss": 1.0521, + "learning_rate": 1.5927489058108025e-05, + "loss": 0.9717, "step": 11261 }, { - "epoch": 0.3195800227014756, + "epoch": 0.3191362747598402, "grad_norm": 0.0, - "learning_rate": 1.5915412444557595e-05, - "loss": 0.8845, + "learning_rate": 1.5926749857308316e-05, + "loss": 0.9772, "step": 11262 }, { - "epoch": 0.3196083995459705, + "epoch": 0.3191646122021026, "grad_norm": 0.0, - "learning_rate": 1.5914671390144222e-05, - "loss": 0.9802, + "learning_rate": 1.5926010606585386e-05, + "loss": 0.929, "step": 11263 }, { - "epoch": 0.3196367763904654, + "epoch": 0.3191929496443651, "grad_norm": 0.0, - "learning_rate": 1.5913930285769356e-05, - "loss": 0.9714, + "learning_rate": 1.5925271305945474e-05, + "loss": 0.916, "step": 11264 }, { - "epoch": 0.31966515323496025, + "epoch": 0.31922128708662756, "grad_norm": 0.0, - "learning_rate": 1.5913189131439254e-05, - "loss": 0.9247, + "learning_rate": 1.5924531955394802e-05, + "loss": 0.998, "step": 11265 }, { - "epoch": 0.31969353007945517, + "epoch": 0.31924962452889, "grad_norm": 0.0, - "learning_rate": 1.5912447927160176e-05, - "loss": 1.021, + "learning_rate": 1.5923792554939598e-05, + "loss": 0.9889, "step": 11266 }, { - "epoch": 0.3197219069239501, + "epoch": 0.3192779619711525, "grad_norm": 0.0, - "learning_rate": 1.5911706672938388e-05, - "loss": 0.8979, + "learning_rate": 1.5923053104586087e-05, + "loss": 0.9434, "step": 11267 }, { - "epoch": 0.31975028376844494, + "epoch": 0.31930629941341493, "grad_norm": 0.0, - "learning_rate": 1.5910965368780146e-05, - "loss": 0.909, + "learning_rate": 1.59223136043405e-05, + "loss": 0.8755, "step": 11268 }, { - "epoch": 0.31977866061293986, + "epoch": 0.3193346368556774, "grad_norm": 0.0, - "learning_rate": 1.5910224014691717e-05, - "loss": 0.9894, + "learning_rate": 1.5921574054209064e-05, + "loss": 1.0466, "step": 11269 }, { - "epoch": 0.3198070374574347, + "epoch": 0.31936297429793986, "grad_norm": 0.0, - "learning_rate": 1.5909482610679355e-05, - "loss": 0.9915, + "learning_rate": 1.5920834454198014e-05, + "loss": 0.9287, "step": 11270 }, { - "epoch": 0.31983541430192963, + "epoch": 0.31939131174020235, "grad_norm": 0.0, - "learning_rate": 1.590874115674933e-05, - "loss": 0.913, + "learning_rate": 1.592009480431358e-05, + "loss": 0.97, "step": 11271 }, { - "epoch": 0.3198637911464245, + "epoch": 0.3194196491824648, "grad_norm": 0.0, - "learning_rate": 1.59079996529079e-05, - "loss": 0.9659, + "learning_rate": 1.5919355104561985e-05, + "loss": 1.0255, "step": 11272 }, { - "epoch": 0.3198921679909194, + "epoch": 0.31944798662472723, "grad_norm": 0.0, - "learning_rate": 1.5907258099161335e-05, - "loss": 0.9607, + "learning_rate": 1.5918615354949463e-05, + "loss": 0.9193, "step": 11273 }, { - "epoch": 0.3199205448354143, + "epoch": 0.3194763240669897, "grad_norm": 0.0, - "learning_rate": 1.5906516495515895e-05, - "loss": 0.9723, + "learning_rate": 1.591787555548225e-05, + "loss": 0.9434, "step": 11274 }, { - "epoch": 0.3199489216799092, + "epoch": 0.31950466150925216, "grad_norm": 0.0, - "learning_rate": 1.5905774841977843e-05, - "loss": 0.9962, + "learning_rate": 1.591713570616657e-05, + "loss": 1.1294, "step": 11275 }, { - "epoch": 0.3199772985244041, + "epoch": 0.31953299895151466, "grad_norm": 0.0, - "learning_rate": 1.590503313855345e-05, - "loss": 0.9471, + "learning_rate": 1.591639580700866e-05, + "loss": 0.8633, "step": 11276 }, { - "epoch": 0.32000567536889896, + "epoch": 0.3195613363937771, "grad_norm": 0.0, - "learning_rate": 1.5904291385248973e-05, - "loss": 0.929, + "learning_rate": 1.591565585801475e-05, + "loss": 0.9369, "step": 11277 }, { - "epoch": 0.32003405221339387, + "epoch": 0.31958967383603953, "grad_norm": 0.0, - "learning_rate": 1.5903549582070682e-05, - "loss": 0.9803, + "learning_rate": 1.5914915859191075e-05, + "loss": 0.9152, "step": 11278 }, { - "epoch": 0.3200624290578888, + "epoch": 0.319618011278302, "grad_norm": 0.0, - "learning_rate": 1.590280772902484e-05, - "loss": 0.9163, + "learning_rate": 1.5914175810543868e-05, + "loss": 1.0558, "step": 11279 }, { - "epoch": 0.32009080590238365, + "epoch": 0.31964634872056447, "grad_norm": 0.0, - "learning_rate": 1.5902065826117717e-05, - "loss": 0.9757, + "learning_rate": 1.591343571207936e-05, + "loss": 1.0499, "step": 11280 }, { - "epoch": 0.32011918274687856, + "epoch": 0.31967468616282696, "grad_norm": 0.0, - "learning_rate": 1.590132387335558e-05, - "loss": 0.8527, + "learning_rate": 1.591269556380379e-05, + "loss": 0.9017, "step": 11281 }, { - "epoch": 0.3201475595913734, + "epoch": 0.3197030236050894, "grad_norm": 0.0, - "learning_rate": 1.5900581870744692e-05, - "loss": 0.9661, + "learning_rate": 1.5911955365723385e-05, + "loss": 1.0146, "step": 11282 }, { - "epoch": 0.32017593643586834, + "epoch": 0.3197313610473519, "grad_norm": 0.0, - "learning_rate": 1.5899839818291323e-05, - "loss": 0.907, + "learning_rate": 1.5911215117844393e-05, + "loss": 0.8706, "step": 11283 }, { - "epoch": 0.32020431328036325, + "epoch": 0.31975969848961433, "grad_norm": 0.0, - "learning_rate": 1.589909771600175e-05, - "loss": 1.0776, + "learning_rate": 1.5910474820173033e-05, + "loss": 1.0278, "step": 11284 }, { - "epoch": 0.3202326901248581, + "epoch": 0.31978803593187677, "grad_norm": 0.0, - "learning_rate": 1.5898355563882227e-05, - "loss": 0.9417, + "learning_rate": 1.590973447271555e-05, + "loss": 0.9, "step": 11285 }, { - "epoch": 0.320261066969353, + "epoch": 0.31981637337413926, "grad_norm": 0.0, - "learning_rate": 1.589761336193903e-05, - "loss": 0.9811, + "learning_rate": 1.590899407547818e-05, + "loss": 0.9561, "step": 11286 }, { - "epoch": 0.3202894438138479, + "epoch": 0.3198447108164017, "grad_norm": 0.0, - "learning_rate": 1.5896871110178428e-05, - "loss": 0.8925, + "learning_rate": 1.590825362846716e-05, + "loss": 0.8811, "step": 11287 }, { - "epoch": 0.3203178206583428, + "epoch": 0.3198730482586642, "grad_norm": 0.0, - "learning_rate": 1.589612880860669e-05, - "loss": 0.9472, + "learning_rate": 1.5907513131688723e-05, + "loss": 0.9537, "step": 11288 }, { - "epoch": 0.32034619750283766, + "epoch": 0.31990138570092663, "grad_norm": 0.0, - "learning_rate": 1.5895386457230088e-05, - "loss": 1.0178, + "learning_rate": 1.590677258514911e-05, + "loss": 0.8748, "step": 11289 }, { - "epoch": 0.3203745743473326, + "epoch": 0.31992972314318907, "grad_norm": 0.0, - "learning_rate": 1.589464405605489e-05, - "loss": 0.9614, + "learning_rate": 1.590603198885456e-05, + "loss": 0.9482, "step": 11290 }, { - "epoch": 0.3204029511918275, + "epoch": 0.31995806058545156, "grad_norm": 0.0, - "learning_rate": 1.589390160508737e-05, - "loss": 0.9409, + "learning_rate": 1.590529134281131e-05, + "loss": 0.9885, "step": 11291 }, { - "epoch": 0.32043132803632235, + "epoch": 0.319986398027714, "grad_norm": 0.0, - "learning_rate": 1.58931591043338e-05, - "loss": 0.9321, + "learning_rate": 1.5904550647025595e-05, + "loss": 1.0429, "step": 11292 }, { - "epoch": 0.32045970488081726, + "epoch": 0.3200147354699765, "grad_norm": 0.0, - "learning_rate": 1.589241655380045e-05, - "loss": 0.8492, + "learning_rate": 1.590380990150366e-05, + "loss": 1.1066, "step": 11293 }, { - "epoch": 0.3204880817253121, + "epoch": 0.32004307291223894, "grad_norm": 0.0, - "learning_rate": 1.589167395349359e-05, - "loss": 0.9595, + "learning_rate": 1.590306910625174e-05, + "loss": 1.0061, "step": 11294 }, { - "epoch": 0.32051645856980704, + "epoch": 0.32007141035450143, "grad_norm": 0.0, - "learning_rate": 1.5890931303419496e-05, - "loss": 0.9518, + "learning_rate": 1.590232826127608e-05, + "loss": 0.9554, "step": 11295 }, { - "epoch": 0.32054483541430195, + "epoch": 0.32009974779676387, "grad_norm": 0.0, - "learning_rate": 1.589018860358444e-05, - "loss": 0.9327, + "learning_rate": 1.5901587366582915e-05, + "loss": 0.8975, "step": 11296 }, { - "epoch": 0.3205732122587968, + "epoch": 0.3201280852390263, "grad_norm": 0.0, - "learning_rate": 1.5889445853994695e-05, - "loss": 0.9812, + "learning_rate": 1.5900846422178488e-05, + "loss": 0.9268, "step": 11297 }, { - "epoch": 0.32060158910329173, + "epoch": 0.3201564226812888, "grad_norm": 0.0, - "learning_rate": 1.5888703054656536e-05, - "loss": 0.9509, + "learning_rate": 1.590010542806904e-05, + "loss": 1.0606, "step": 11298 }, { - "epoch": 0.3206299659477866, + "epoch": 0.32018476012355124, "grad_norm": 0.0, - "learning_rate": 1.5887960205576243e-05, - "loss": 0.9995, + "learning_rate": 1.5899364384260813e-05, + "loss": 0.9202, "step": 11299 }, { - "epoch": 0.3206583427922815, + "epoch": 0.32021309756581373, "grad_norm": 0.0, - "learning_rate": 1.5887217306760082e-05, - "loss": 0.8574, + "learning_rate": 1.5898623290760048e-05, + "loss": 0.9779, "step": 11300 }, { - "epoch": 0.32068671963677636, + "epoch": 0.32024143500807617, "grad_norm": 0.0, - "learning_rate": 1.5886474358214333e-05, - "loss": 0.9227, + "learning_rate": 1.589788214757299e-05, + "loss": 0.9311, "step": 11301 }, { - "epoch": 0.3207150964812713, + "epoch": 0.3202697724503386, "grad_norm": 0.0, - "learning_rate": 1.5885731359945266e-05, - "loss": 0.8575, + "learning_rate": 1.589714095470588e-05, + "loss": 0.9432, "step": 11302 }, { - "epoch": 0.3207434733257662, + "epoch": 0.3202981098926011, "grad_norm": 0.0, - "learning_rate": 1.5884988311959167e-05, - "loss": 0.9563, + "learning_rate": 1.5896399712164966e-05, + "loss": 0.9051, "step": 11303 }, { - "epoch": 0.32077185017026105, + "epoch": 0.32032644733486354, "grad_norm": 0.0, - "learning_rate": 1.5884245214262303e-05, - "loss": 0.9628, + "learning_rate": 1.5895658419956485e-05, + "loss": 1.0087, "step": 11304 }, { - "epoch": 0.32080022701475597, + "epoch": 0.32035478477712603, "grad_norm": 0.0, - "learning_rate": 1.5883502066860956e-05, - "loss": 0.9051, + "learning_rate": 1.589491707808668e-05, + "loss": 1.0695, "step": 11305 }, { - "epoch": 0.3208286038592508, + "epoch": 0.32038312221938847, "grad_norm": 0.0, - "learning_rate": 1.5882758869761404e-05, - "loss": 1.0173, + "learning_rate": 1.5894175686561803e-05, + "loss": 0.9005, "step": 11306 }, { - "epoch": 0.32085698070374574, + "epoch": 0.32041145966165097, "grad_norm": 0.0, - "learning_rate": 1.5882015622969923e-05, - "loss": 0.9649, + "learning_rate": 1.5893434245388097e-05, + "loss": 1.0739, "step": 11307 }, { - "epoch": 0.32088535754824066, + "epoch": 0.3204397971039134, "grad_norm": 0.0, - "learning_rate": 1.588127232649279e-05, - "loss": 1.032, + "learning_rate": 1.5892692754571802e-05, + "loss": 0.9961, "step": 11308 }, { - "epoch": 0.3209137343927355, + "epoch": 0.32046813454617584, "grad_norm": 0.0, - "learning_rate": 1.588052898033629e-05, - "loss": 0.9544, + "learning_rate": 1.5891951214119167e-05, + "loss": 1.0215, "step": 11309 }, { - "epoch": 0.32094211123723043, + "epoch": 0.32049647198843834, "grad_norm": 0.0, - "learning_rate": 1.5879785584506692e-05, - "loss": 0.8775, + "learning_rate": 1.5891209624036443e-05, + "loss": 0.8782, "step": 11310 }, { - "epoch": 0.3209704880817253, + "epoch": 0.3205248094307008, "grad_norm": 0.0, - "learning_rate": 1.5879042139010283e-05, - "loss": 0.9226, + "learning_rate": 1.5890467984329872e-05, + "loss": 0.8456, "step": 11311 }, { - "epoch": 0.3209988649262202, + "epoch": 0.32055314687296327, "grad_norm": 0.0, - "learning_rate": 1.587829864385334e-05, - "loss": 0.9475, + "learning_rate": 1.58897262950057e-05, + "loss": 0.9303, "step": 11312 }, { - "epoch": 0.3210272417707151, + "epoch": 0.3205814843152257, "grad_norm": 0.0, - "learning_rate": 1.5877555099042145e-05, - "loss": 0.8845, + "learning_rate": 1.588898455607018e-05, + "loss": 1.0367, "step": 11313 }, { - "epoch": 0.32105561861521, + "epoch": 0.32060982175748814, "grad_norm": 0.0, - "learning_rate": 1.587681150458298e-05, - "loss": 1.0576, + "learning_rate": 1.588824276752955e-05, + "loss": 0.9091, "step": 11314 }, { - "epoch": 0.3210839954597049, + "epoch": 0.32063815919975064, "grad_norm": 0.0, - "learning_rate": 1.587606786048212e-05, - "loss": 0.8374, + "learning_rate": 1.588750092939007e-05, + "loss": 0.9618, "step": 11315 }, { - "epoch": 0.32111237230419976, + "epoch": 0.3206664966420131, "grad_norm": 0.0, - "learning_rate": 1.5875324166745855e-05, - "loss": 0.7706, + "learning_rate": 1.588675904165798e-05, + "loss": 0.99, "step": 11316 }, { - "epoch": 0.32114074914869467, + "epoch": 0.32069483408427557, "grad_norm": 0.0, - "learning_rate": 1.5874580423380458e-05, - "loss": 0.8789, + "learning_rate": 1.5886017104339538e-05, + "loss": 1.0917, "step": 11317 }, { - "epoch": 0.32116912599318953, + "epoch": 0.320723171526538, "grad_norm": 0.0, - "learning_rate": 1.5873836630392218e-05, - "loss": 1.0382, + "learning_rate": 1.5885275117440983e-05, + "loss": 1.0724, "step": 11318 }, { - "epoch": 0.32119750283768445, + "epoch": 0.3207515089688005, "grad_norm": 0.0, - "learning_rate": 1.5873092787787418e-05, - "loss": 0.9264, + "learning_rate": 1.588453308096857e-05, + "loss": 0.9135, "step": 11319 }, { - "epoch": 0.32122587968217936, + "epoch": 0.32077984641106294, "grad_norm": 0.0, - "learning_rate": 1.5872348895572335e-05, - "loss": 0.9338, + "learning_rate": 1.5883790994928554e-05, + "loss": 0.8513, "step": 11320 }, { - "epoch": 0.3212542565266742, + "epoch": 0.3208081838533254, "grad_norm": 0.0, - "learning_rate": 1.587160495375326e-05, - "loss": 1.0344, + "learning_rate": 1.5883048859327178e-05, + "loss": 1.0024, "step": 11321 }, { - "epoch": 0.32128263337116914, + "epoch": 0.3208365212955879, "grad_norm": 0.0, - "learning_rate": 1.587086096233647e-05, - "loss": 0.9526, + "learning_rate": 1.58823066741707e-05, + "loss": 0.9623, "step": 11322 }, { - "epoch": 0.321311010215664, + "epoch": 0.3208648587378503, "grad_norm": 0.0, - "learning_rate": 1.5870116921328263e-05, - "loss": 1.0087, + "learning_rate": 1.5881564439465364e-05, + "loss": 1.0873, "step": 11323 }, { - "epoch": 0.3213393870601589, + "epoch": 0.3208931961801128, "grad_norm": 0.0, - "learning_rate": 1.5869372830734906e-05, - "loss": 0.9839, + "learning_rate": 1.588082215521743e-05, + "loss": 1.0479, "step": 11324 }, { - "epoch": 0.3213677639046538, + "epoch": 0.32092153362237524, "grad_norm": 0.0, - "learning_rate": 1.5868628690562695e-05, - "loss": 0.8506, + "learning_rate": 1.5880079821433145e-05, + "loss": 0.9248, "step": 11325 }, { - "epoch": 0.3213961407491487, + "epoch": 0.3209498710646377, "grad_norm": 0.0, - "learning_rate": 1.5867884500817914e-05, - "loss": 0.985, + "learning_rate": 1.5879337438118766e-05, + "loss": 0.8753, "step": 11326 }, { - "epoch": 0.3214245175936436, + "epoch": 0.3209782085069002, "grad_norm": 0.0, - "learning_rate": 1.5867140261506845e-05, - "loss": 0.8806, + "learning_rate": 1.5878595005280543e-05, + "loss": 0.8659, "step": 11327 }, { - "epoch": 0.32145289443813846, + "epoch": 0.3210065459491626, "grad_norm": 0.0, - "learning_rate": 1.5866395972635788e-05, - "loss": 1.0475, + "learning_rate": 1.5877852522924733e-05, + "loss": 0.9567, "step": 11328 }, { - "epoch": 0.3214812712826334, + "epoch": 0.3210348833914251, "grad_norm": 0.0, - "learning_rate": 1.586565163421101e-05, - "loss": 1.0355, + "learning_rate": 1.587710999105759e-05, + "loss": 0.9693, "step": 11329 }, { - "epoch": 0.3215096481271283, + "epoch": 0.32106322083368755, "grad_norm": 0.0, - "learning_rate": 1.5864907246238814e-05, - "loss": 0.8801, + "learning_rate": 1.5876367409685363e-05, + "loss": 1.0204, "step": 11330 }, { - "epoch": 0.32153802497162315, + "epoch": 0.32109155827595004, "grad_norm": 0.0, - "learning_rate": 1.586416280872548e-05, - "loss": 0.9749, + "learning_rate": 1.5875624778814313e-05, + "loss": 0.8656, "step": 11331 }, { - "epoch": 0.32156640181611806, + "epoch": 0.3211198957182125, "grad_norm": 0.0, - "learning_rate": 1.5863418321677303e-05, - "loss": 0.9711, + "learning_rate": 1.5874882098450694e-05, + "loss": 0.9094, "step": 11332 }, { - "epoch": 0.3215947786606129, + "epoch": 0.3211482331604749, "grad_norm": 0.0, - "learning_rate": 1.5862673785100567e-05, - "loss": 0.9888, + "learning_rate": 1.587413936860076e-05, + "loss": 0.99, "step": 11333 }, { - "epoch": 0.32162315550510784, + "epoch": 0.3211765706027374, "grad_norm": 0.0, - "learning_rate": 1.5861929199001562e-05, - "loss": 1.0872, + "learning_rate": 1.587339658927077e-05, + "loss": 0.9296, "step": 11334 }, { - "epoch": 0.3216515323496027, + "epoch": 0.32120490804499985, "grad_norm": 0.0, - "learning_rate": 1.5861184563386576e-05, - "loss": 0.9345, + "learning_rate": 1.587265376046698e-05, + "loss": 1.075, "step": 11335 }, { - "epoch": 0.3216799091940976, + "epoch": 0.32123324548726234, "grad_norm": 0.0, - "learning_rate": 1.5860439878261905e-05, - "loss": 0.9358, + "learning_rate": 1.5871910882195643e-05, + "loss": 0.9898, "step": 11336 }, { - "epoch": 0.32170828603859253, + "epoch": 0.3212615829295248, "grad_norm": 0.0, - "learning_rate": 1.585969514363383e-05, - "loss": 0.9968, + "learning_rate": 1.5871167954463028e-05, + "loss": 0.9525, "step": 11337 }, { - "epoch": 0.3217366628830874, + "epoch": 0.3212899203717872, "grad_norm": 0.0, - "learning_rate": 1.585895035950865e-05, - "loss": 1.0179, + "learning_rate": 1.5870424977275378e-05, + "loss": 0.9522, "step": 11338 }, { - "epoch": 0.3217650397275823, + "epoch": 0.3213182578140497, "grad_norm": 0.0, - "learning_rate": 1.5858205525892647e-05, - "loss": 0.9794, + "learning_rate": 1.586968195063896e-05, + "loss": 0.8699, "step": 11339 }, { - "epoch": 0.32179341657207716, + "epoch": 0.32134659525631215, "grad_norm": 0.0, - "learning_rate": 1.5857460642792124e-05, - "loss": 0.914, + "learning_rate": 1.5868938874560034e-05, + "loss": 0.9564, "step": 11340 }, { - "epoch": 0.3218217934165721, + "epoch": 0.32137493269857464, "grad_norm": 0.0, - "learning_rate": 1.5856715710213367e-05, - "loss": 1.0312, + "learning_rate": 1.5868195749044853e-05, + "loss": 1.0228, "step": 11341 }, { - "epoch": 0.321850170261067, + "epoch": 0.3214032701408371, "grad_norm": 0.0, - "learning_rate": 1.5855970728162668e-05, - "loss": 0.9347, + "learning_rate": 1.5867452574099682e-05, + "loss": 1.032, "step": 11342 }, { - "epoch": 0.32187854710556185, + "epoch": 0.3214316075830996, "grad_norm": 0.0, - "learning_rate": 1.5855225696646322e-05, - "loss": 0.9457, + "learning_rate": 1.586670934973078e-05, + "loss": 1.1061, "step": 11343 }, { - "epoch": 0.32190692395005677, + "epoch": 0.321459945025362, "grad_norm": 0.0, - "learning_rate": 1.5854480615670618e-05, - "loss": 0.8392, + "learning_rate": 1.5865966075944402e-05, + "loss": 0.9396, "step": 11344 }, { - "epoch": 0.3219353007945516, + "epoch": 0.32148828246762445, "grad_norm": 0.0, - "learning_rate": 1.5853735485241858e-05, - "loss": 0.8729, + "learning_rate": 1.586522275274682e-05, + "loss": 1.0486, "step": 11345 }, { - "epoch": 0.32196367763904654, + "epoch": 0.32151661990988695, "grad_norm": 0.0, - "learning_rate": 1.5852990305366326e-05, - "loss": 0.9809, + "learning_rate": 1.5864479380144283e-05, + "loss": 0.9732, "step": 11346 }, { - "epoch": 0.32199205448354146, + "epoch": 0.3215449573521494, "grad_norm": 0.0, - "learning_rate": 1.5852245076050325e-05, - "loss": 0.9567, + "learning_rate": 1.5863735958143064e-05, + "loss": 0.9448, "step": 11347 }, { - "epoch": 0.3220204313280363, + "epoch": 0.3215732947944119, "grad_norm": 0.0, - "learning_rate": 1.585149979730015e-05, - "loss": 0.92, + "learning_rate": 1.5862992486749416e-05, + "loss": 0.9967, "step": 11348 }, { - "epoch": 0.32204880817253123, + "epoch": 0.3216016322366743, "grad_norm": 0.0, - "learning_rate": 1.5850754469122084e-05, - "loss": 0.8961, + "learning_rate": 1.5862248965969604e-05, + "loss": 1.0774, "step": 11349 }, { - "epoch": 0.3220771850170261, + "epoch": 0.32162996967893676, "grad_norm": 0.0, - "learning_rate": 1.585000909152244e-05, - "loss": 0.914, + "learning_rate": 1.5861505395809895e-05, + "loss": 0.9571, "step": 11350 }, { - "epoch": 0.322105561861521, + "epoch": 0.32165830712119925, "grad_norm": 0.0, - "learning_rate": 1.5849263664507504e-05, - "loss": 0.8499, + "learning_rate": 1.5860761776276547e-05, + "loss": 1.0193, "step": 11351 }, { - "epoch": 0.32213393870601587, + "epoch": 0.3216866445634617, "grad_norm": 0.0, - "learning_rate": 1.5848518188083576e-05, - "loss": 0.861, + "learning_rate": 1.586001810737583e-05, + "loss": 1.0163, "step": 11352 }, { - "epoch": 0.3221623155505108, + "epoch": 0.3217149820057242, "grad_norm": 0.0, - "learning_rate": 1.584777266225695e-05, - "loss": 0.9117, + "learning_rate": 1.5859274389114e-05, + "loss": 0.987, "step": 11353 }, { - "epoch": 0.3221906923950057, + "epoch": 0.3217433194479866, "grad_norm": 0.0, - "learning_rate": 1.5847027087033926e-05, - "loss": 0.9922, + "learning_rate": 1.585853062149733e-05, + "loss": 0.9957, "step": 11354 }, { - "epoch": 0.32221906923950056, + "epoch": 0.3217716568902491, "grad_norm": 0.0, - "learning_rate": 1.5846281462420804e-05, - "loss": 1.0124, + "learning_rate": 1.5857786804532077e-05, + "loss": 0.8879, "step": 11355 }, { - "epoch": 0.32224744608399547, + "epoch": 0.32179999433251155, "grad_norm": 0.0, - "learning_rate": 1.5845535788423876e-05, - "loss": 0.8573, + "learning_rate": 1.5857042938224513e-05, + "loss": 0.9126, "step": 11356 }, { - "epoch": 0.32227582292849033, + "epoch": 0.321828331774774, "grad_norm": 0.0, - "learning_rate": 1.5844790065049444e-05, - "loss": 1.0802, + "learning_rate": 1.5856299022580902e-05, + "loss": 0.9262, "step": 11357 }, { - "epoch": 0.32230419977298524, + "epoch": 0.3218566692170365, "grad_norm": 0.0, - "learning_rate": 1.584404429230381e-05, - "loss": 1.0594, + "learning_rate": 1.585555505760751e-05, + "loss": 0.9732, "step": 11358 }, { - "epoch": 0.32233257661748016, + "epoch": 0.3218850066592989, "grad_norm": 0.0, - "learning_rate": 1.584329847019327e-05, - "loss": 0.9426, + "learning_rate": 1.58548110433106e-05, + "loss": 1.0598, "step": 11359 }, { - "epoch": 0.322360953461975, + "epoch": 0.3219133441015614, "grad_norm": 0.0, - "learning_rate": 1.5842552598724122e-05, - "loss": 1.02, + "learning_rate": 1.585406697969644e-05, + "loss": 0.9066, "step": 11360 }, { - "epoch": 0.32238933030646993, + "epoch": 0.32194168154382385, "grad_norm": 0.0, - "learning_rate": 1.5841806677902675e-05, - "loss": 0.8604, + "learning_rate": 1.5853322866771308e-05, + "loss": 0.9659, "step": 11361 }, { - "epoch": 0.3224177071509648, + "epoch": 0.3219700189860863, "grad_norm": 0.0, - "learning_rate": 1.5841060707735222e-05, - "loss": 0.9111, + "learning_rate": 1.585257870454146e-05, + "loss": 0.9876, "step": 11362 }, { - "epoch": 0.3224460839954597, + "epoch": 0.3219983564283488, "grad_norm": 0.0, - "learning_rate": 1.584031468822807e-05, - "loss": 0.9337, + "learning_rate": 1.5851834493013168e-05, + "loss": 0.829, "step": 11363 }, { - "epoch": 0.3224744608399546, + "epoch": 0.3220266938706112, "grad_norm": 0.0, - "learning_rate": 1.5839568619387512e-05, - "loss": 1.0135, + "learning_rate": 1.5851090232192704e-05, + "loss": 1.0557, "step": 11364 }, { - "epoch": 0.3225028376844495, + "epoch": 0.3220550313128737, "grad_norm": 0.0, - "learning_rate": 1.583882250121986e-05, - "loss": 0.9004, + "learning_rate": 1.585034592208633e-05, + "loss": 0.9211, "step": 11365 }, { - "epoch": 0.3225312145289444, + "epoch": 0.32208336875513616, "grad_norm": 0.0, - "learning_rate": 1.5838076333731406e-05, - "loss": 0.9231, + "learning_rate": 1.5849601562700322e-05, + "loss": 0.9935, "step": 11366 }, { - "epoch": 0.32255959137343926, + "epoch": 0.3221117061973986, "grad_norm": 0.0, - "learning_rate": 1.5837330116928464e-05, - "loss": 1.0934, + "learning_rate": 1.5848857154040947e-05, + "loss": 0.9061, "step": 11367 }, { - "epoch": 0.3225879682179342, + "epoch": 0.3221400436396611, "grad_norm": 0.0, - "learning_rate": 1.583658385081733e-05, - "loss": 0.8775, + "learning_rate": 1.5848112696114476e-05, + "loss": 1.0014, "step": 11368 }, { - "epoch": 0.32261634506242903, + "epoch": 0.3221683810819235, "grad_norm": 0.0, - "learning_rate": 1.583583753540431e-05, - "loss": 0.9351, + "learning_rate": 1.584736818892718e-05, + "loss": 0.9365, "step": 11369 }, { - "epoch": 0.32264472190692395, + "epoch": 0.322196718524186, "grad_norm": 0.0, - "learning_rate": 1.5835091170695708e-05, - "loss": 1.014, + "learning_rate": 1.5846623632485334e-05, + "loss": 0.8996, "step": 11370 }, { - "epoch": 0.32267309875141886, + "epoch": 0.32222505596644846, "grad_norm": 0.0, - "learning_rate": 1.583434475669783e-05, - "loss": 1.0194, + "learning_rate": 1.5845879026795202e-05, + "loss": 0.8827, "step": 11371 }, { - "epoch": 0.3227014755959137, + "epoch": 0.32225339340871095, "grad_norm": 0.0, - "learning_rate": 1.5833598293416978e-05, - "loss": 1.0771, + "learning_rate": 1.584513437186306e-05, + "loss": 0.9129, "step": 11372 }, { - "epoch": 0.32272985244040864, + "epoch": 0.3222817308509734, "grad_norm": 0.0, - "learning_rate": 1.583285178085946e-05, - "loss": 0.88, + "learning_rate": 1.5844389667695185e-05, + "loss": 1.0554, "step": 11373 }, { - "epoch": 0.3227582292849035, + "epoch": 0.32231006829323583, "grad_norm": 0.0, - "learning_rate": 1.583210521903158e-05, - "loss": 0.9224, + "learning_rate": 1.5843644914297838e-05, + "loss": 0.9766, "step": 11374 }, { - "epoch": 0.3227866061293984, + "epoch": 0.3223384057354983, "grad_norm": 0.0, - "learning_rate": 1.5831358607939643e-05, - "loss": 1.0235, + "learning_rate": 1.5842900111677307e-05, + "loss": 0.946, "step": 11375 }, { - "epoch": 0.3228149829738933, + "epoch": 0.32236674317776076, "grad_norm": 0.0, - "learning_rate": 1.583061194758996e-05, - "loss": 0.8339, + "learning_rate": 1.5842155259839858e-05, + "loss": 0.956, "step": 11376 }, { - "epoch": 0.3228433598183882, + "epoch": 0.32239508062002326, "grad_norm": 0.0, - "learning_rate": 1.5829865237988838e-05, - "loss": 0.9426, + "learning_rate": 1.5841410358791763e-05, + "loss": 0.9263, "step": 11377 }, { - "epoch": 0.3228717366628831, + "epoch": 0.3224234180622857, "grad_norm": 0.0, - "learning_rate": 1.582911847914258e-05, - "loss": 0.9888, + "learning_rate": 1.58406654085393e-05, + "loss": 0.987, "step": 11378 }, { - "epoch": 0.32290011350737796, + "epoch": 0.32245175550454813, "grad_norm": 0.0, - "learning_rate": 1.5828371671057495e-05, - "loss": 0.9834, + "learning_rate": 1.5839920409088743e-05, + "loss": 0.9306, "step": 11379 }, { - "epoch": 0.3229284903518729, + "epoch": 0.3224800929468106, "grad_norm": 0.0, - "learning_rate": 1.5827624813739893e-05, - "loss": 0.9538, + "learning_rate": 1.5839175360446367e-05, + "loss": 0.9884, "step": 11380 }, { - "epoch": 0.32295686719636774, + "epoch": 0.32250843038907306, "grad_norm": 0.0, - "learning_rate": 1.5826877907196084e-05, - "loss": 0.9501, + "learning_rate": 1.583843026261845e-05, + "loss": 1.0358, "step": 11381 }, { - "epoch": 0.32298524404086265, + "epoch": 0.32253676783133556, "grad_norm": 0.0, - "learning_rate": 1.5826130951432373e-05, - "loss": 1.0262, + "learning_rate": 1.583768511561127e-05, + "loss": 1.0287, "step": 11382 }, { - "epoch": 0.32301362088535757, + "epoch": 0.322565105273598, "grad_norm": 0.0, - "learning_rate": 1.5825383946455075e-05, - "loss": 0.9441, + "learning_rate": 1.5836939919431097e-05, + "loss": 0.9527, "step": 11383 }, { - "epoch": 0.3230419977298524, + "epoch": 0.3225934427158605, "grad_norm": 0.0, - "learning_rate": 1.5824636892270496e-05, - "loss": 0.8712, + "learning_rate": 1.583619467408421e-05, + "loss": 0.937, "step": 11384 }, { - "epoch": 0.32307037457434734, + "epoch": 0.32262178015812293, "grad_norm": 0.0, - "learning_rate": 1.5823889788884944e-05, - "loss": 0.8765, + "learning_rate": 1.5835449379576892e-05, + "loss": 1.0262, "step": 11385 }, { - "epoch": 0.3230987514188422, + "epoch": 0.32265011760038537, "grad_norm": 0.0, - "learning_rate": 1.5823142636304736e-05, - "loss": 1.0189, + "learning_rate": 1.5834704035915417e-05, + "loss": 0.9677, "step": 11386 }, { - "epoch": 0.3231271282633371, + "epoch": 0.32267845504264786, "grad_norm": 0.0, - "learning_rate": 1.582239543453618e-05, - "loss": 1.0279, + "learning_rate": 1.5833958643106058e-05, + "loss": 0.8528, "step": 11387 }, { - "epoch": 0.32315550510783203, + "epoch": 0.3227067924849103, "grad_norm": 0.0, - "learning_rate": 1.5821648183585588e-05, - "loss": 0.939, + "learning_rate": 1.58332132011551e-05, + "loss": 1.0286, "step": 11388 }, { - "epoch": 0.3231838819523269, + "epoch": 0.3227351299271728, "grad_norm": 0.0, - "learning_rate": 1.582090088345927e-05, - "loss": 0.9232, + "learning_rate": 1.5832467710068825e-05, + "loss": 0.9957, "step": 11389 }, { - "epoch": 0.3232122587968218, + "epoch": 0.32276346736943523, "grad_norm": 0.0, - "learning_rate": 1.5820153534163543e-05, - "loss": 1.0107, + "learning_rate": 1.583172216985351e-05, + "loss": 1.0233, "step": 11390 }, { - "epoch": 0.32324063564131666, + "epoch": 0.32279180481169767, "grad_norm": 0.0, - "learning_rate": 1.5819406135704717e-05, - "loss": 0.9984, + "learning_rate": 1.5830976580515432e-05, + "loss": 0.9723, "step": 11391 }, { - "epoch": 0.3232690124858116, + "epoch": 0.32282014225396016, "grad_norm": 0.0, - "learning_rate": 1.581865868808911e-05, - "loss": 1.0094, + "learning_rate": 1.583023094206087e-05, + "loss": 0.9698, "step": 11392 }, { - "epoch": 0.3232973893303065, + "epoch": 0.3228484796962226, "grad_norm": 0.0, - "learning_rate": 1.5817911191323025e-05, - "loss": 0.9912, + "learning_rate": 1.5829485254496108e-05, + "loss": 1.0318, "step": 11393 }, { - "epoch": 0.32332576617480135, + "epoch": 0.3228768171384851, "grad_norm": 0.0, - "learning_rate": 1.5817163645412784e-05, - "loss": 0.8811, + "learning_rate": 1.5828739517827426e-05, + "loss": 0.9075, "step": 11394 }, { - "epoch": 0.32335414301929627, + "epoch": 0.32290515458074753, "grad_norm": 0.0, - "learning_rate": 1.58164160503647e-05, - "loss": 1.0307, + "learning_rate": 1.5827993732061112e-05, + "loss": 1.0718, "step": 11395 }, { - "epoch": 0.32338251986379113, + "epoch": 0.32293349202301, "grad_norm": 0.0, - "learning_rate": 1.5815668406185093e-05, - "loss": 0.9369, + "learning_rate": 1.5827247897203436e-05, + "loss": 0.942, "step": 11396 }, { - "epoch": 0.32341089670828604, + "epoch": 0.32296182946527247, "grad_norm": 0.0, - "learning_rate": 1.5814920712880267e-05, - "loss": 0.922, + "learning_rate": 1.5826502013260694e-05, + "loss": 0.9977, "step": 11397 }, { - "epoch": 0.3234392735527809, + "epoch": 0.3229901669075349, "grad_norm": 0.0, - "learning_rate": 1.581417297045655e-05, - "loss": 0.9765, + "learning_rate": 1.582575608023916e-05, + "loss": 0.9027, "step": 11398 }, { - "epoch": 0.3234676503972758, + "epoch": 0.3230185043497974, "grad_norm": 0.0, - "learning_rate": 1.581342517892025e-05, - "loss": 0.9127, + "learning_rate": 1.5825010098145117e-05, + "loss": 0.9258, "step": 11399 }, { - "epoch": 0.32349602724177073, + "epoch": 0.32304684179205984, "grad_norm": 0.0, - "learning_rate": 1.5812677338277684e-05, - "loss": 0.9576, + "learning_rate": 1.5824264066984848e-05, + "loss": 1.0483, "step": 11400 }, { - "epoch": 0.3235244040862656, + "epoch": 0.32307517923432233, "grad_norm": 0.0, - "learning_rate": 1.5811929448535173e-05, - "loss": 0.845, + "learning_rate": 1.5823517986764647e-05, + "loss": 0.8611, "step": 11401 }, { - "epoch": 0.3235527809307605, + "epoch": 0.32310351667658477, "grad_norm": 0.0, - "learning_rate": 1.5811181509699033e-05, - "loss": 1.0322, + "learning_rate": 1.582277185749079e-05, + "loss": 1.0096, "step": 11402 }, { - "epoch": 0.32358115777525537, + "epoch": 0.3231318541188472, "grad_norm": 0.0, - "learning_rate": 1.5810433521775583e-05, - "loss": 0.9544, + "learning_rate": 1.582202567916956e-05, + "loss": 0.9215, "step": 11403 }, { - "epoch": 0.3236095346197503, + "epoch": 0.3231601915611097, "grad_norm": 0.0, - "learning_rate": 1.5809685484771136e-05, - "loss": 0.8891, + "learning_rate": 1.582127945180724e-05, + "loss": 0.8695, "step": 11404 }, { - "epoch": 0.3236379114642452, + "epoch": 0.32318852900337214, "grad_norm": 0.0, - "learning_rate": 1.5808937398692016e-05, - "loss": 1.0131, + "learning_rate": 1.5820533175410134e-05, + "loss": 1.0085, "step": 11405 }, { - "epoch": 0.32366628830874006, + "epoch": 0.32321686644563463, "grad_norm": 0.0, - "learning_rate": 1.5808189263544542e-05, - "loss": 0.9753, + "learning_rate": 1.581978684998451e-05, + "loss": 0.9368, "step": 11406 }, { - "epoch": 0.323694665153235, + "epoch": 0.32324520388789707, "grad_norm": 0.0, - "learning_rate": 1.5807441079335033e-05, - "loss": 0.9309, + "learning_rate": 1.581904047553666e-05, + "loss": 0.932, "step": 11407 }, { - "epoch": 0.32372304199772983, + "epoch": 0.32327354133015956, "grad_norm": 0.0, - "learning_rate": 1.5806692846069807e-05, - "loss": 0.8524, + "learning_rate": 1.5818294052072873e-05, + "loss": 0.8698, "step": 11408 }, { - "epoch": 0.32375141884222475, + "epoch": 0.323301878772422, "grad_norm": 0.0, - "learning_rate": 1.5805944563755183e-05, - "loss": 0.9542, + "learning_rate": 1.5817547579599436e-05, + "loss": 0.8243, "step": 11409 }, { - "epoch": 0.32377979568671966, + "epoch": 0.32333021621468444, "grad_norm": 0.0, - "learning_rate": 1.580519623239749e-05, - "loss": 0.9872, + "learning_rate": 1.5816801058122632e-05, + "loss": 0.9909, "step": 11410 }, { - "epoch": 0.3238081725312145, + "epoch": 0.32335855365694693, "grad_norm": 0.0, - "learning_rate": 1.580444785200304e-05, - "loss": 0.9274, + "learning_rate": 1.5816054487648753e-05, + "loss": 1.0045, "step": 11411 }, { - "epoch": 0.32383654937570944, + "epoch": 0.3233868910992094, "grad_norm": 0.0, - "learning_rate": 1.5803699422578157e-05, - "loss": 0.9155, + "learning_rate": 1.5815307868184085e-05, + "loss": 0.9207, "step": 11412 }, { - "epoch": 0.3238649262202043, + "epoch": 0.32341522854147187, "grad_norm": 0.0, - "learning_rate": 1.580295094412916e-05, - "loss": 0.9211, + "learning_rate": 1.5814561199734922e-05, + "loss": 0.9371, "step": 11413 }, { - "epoch": 0.3238933030646992, + "epoch": 0.3234435659837343, "grad_norm": 0.0, - "learning_rate": 1.5802202416662386e-05, - "loss": 0.937, + "learning_rate": 1.5813814482307552e-05, + "loss": 0.9282, "step": 11414 }, { - "epoch": 0.32392167990919407, + "epoch": 0.32347190342599674, "grad_norm": 0.0, - "learning_rate": 1.5801453840184143e-05, - "loss": 1.0465, + "learning_rate": 1.5813067715908265e-05, + "loss": 0.906, "step": 11415 }, { - "epoch": 0.323950056753689, + "epoch": 0.32350024086825924, "grad_norm": 0.0, - "learning_rate": 1.5800705214700758e-05, - "loss": 0.9333, + "learning_rate": 1.5812320900543348e-05, + "loss": 0.9042, "step": 11416 }, { - "epoch": 0.3239784335981839, + "epoch": 0.3235285783105217, "grad_norm": 0.0, - "learning_rate": 1.5799956540218553e-05, - "loss": 0.9093, + "learning_rate": 1.581157403621909e-05, + "loss": 0.8512, "step": 11417 }, { - "epoch": 0.32400681044267876, + "epoch": 0.32355691575278417, "grad_norm": 0.0, - "learning_rate": 1.5799207816743857e-05, - "loss": 0.8482, + "learning_rate": 1.5810827122941792e-05, + "loss": 0.9032, "step": 11418 }, { - "epoch": 0.3240351872871737, + "epoch": 0.3235852531950466, "grad_norm": 0.0, - "learning_rate": 1.5798459044282993e-05, - "loss": 0.9884, + "learning_rate": 1.5810080160717737e-05, + "loss": 1.0433, "step": 11419 }, { - "epoch": 0.32406356413166854, + "epoch": 0.3236135906373091, "grad_norm": 0.0, - "learning_rate": 1.579771022284228e-05, - "loss": 1.0344, + "learning_rate": 1.580933314955322e-05, + "loss": 0.9099, "step": 11420 }, { - "epoch": 0.32409194097616345, + "epoch": 0.32364192807957154, "grad_norm": 0.0, - "learning_rate": 1.5796961352428052e-05, - "loss": 0.9695, + "learning_rate": 1.580858608945453e-05, + "loss": 0.866, "step": 11421 }, { - "epoch": 0.32412031782065837, + "epoch": 0.323670265521834, "grad_norm": 0.0, - "learning_rate": 1.579621243304663e-05, - "loss": 1.0598, + "learning_rate": 1.5807838980427967e-05, + "loss": 0.9639, "step": 11422 }, { - "epoch": 0.3241486946651532, + "epoch": 0.32369860296409647, "grad_norm": 0.0, - "learning_rate": 1.5795463464704342e-05, - "loss": 0.9722, + "learning_rate": 1.5807091822479815e-05, + "loss": 0.9269, "step": 11423 }, { - "epoch": 0.32417707150964814, + "epoch": 0.3237269404063589, "grad_norm": 0.0, - "learning_rate": 1.5794714447407514e-05, - "loss": 0.8957, + "learning_rate": 1.5806344615616375e-05, + "loss": 1.0439, "step": 11424 }, { - "epoch": 0.324205448354143, + "epoch": 0.3237552778486214, "grad_norm": 0.0, - "learning_rate": 1.5793965381162467e-05, - "loss": 0.9292, + "learning_rate": 1.5805597359843935e-05, + "loss": 0.8914, "step": 11425 }, { - "epoch": 0.3242338251986379, + "epoch": 0.32378361529088384, "grad_norm": 0.0, - "learning_rate": 1.579321626597554e-05, - "loss": 0.9901, + "learning_rate": 1.5804850055168796e-05, + "loss": 0.8324, "step": 11426 }, { - "epoch": 0.32426220204313283, + "epoch": 0.3238119527331463, "grad_norm": 0.0, - "learning_rate": 1.5792467101853054e-05, - "loss": 1.0222, + "learning_rate": 1.5804102701597247e-05, + "loss": 0.9725, "step": 11427 }, { - "epoch": 0.3242905788876277, + "epoch": 0.3238402901754088, "grad_norm": 0.0, - "learning_rate": 1.5791717888801334e-05, - "loss": 0.9578, + "learning_rate": 1.5803355299135586e-05, + "loss": 1.023, "step": 11428 }, { - "epoch": 0.3243189557321226, + "epoch": 0.3238686276176712, "grad_norm": 0.0, - "learning_rate": 1.5790968626826716e-05, - "loss": 0.9806, + "learning_rate": 1.580260784779011e-05, + "loss": 0.9989, "step": 11429 }, { - "epoch": 0.32434733257661746, + "epoch": 0.3238969650599337, "grad_norm": 0.0, - "learning_rate": 1.5790219315935526e-05, - "loss": 0.9241, + "learning_rate": 1.5801860347567108e-05, + "loss": 1.0304, "step": 11430 }, { - "epoch": 0.3243757094211124, + "epoch": 0.32392530250219614, "grad_norm": 0.0, - "learning_rate": 1.5789469956134095e-05, - "loss": 0.9325, + "learning_rate": 1.5801112798472887e-05, + "loss": 0.9747, "step": 11431 }, { - "epoch": 0.32440408626560724, + "epoch": 0.32395363994445864, "grad_norm": 0.0, - "learning_rate": 1.578872054742875e-05, - "loss": 0.9322, + "learning_rate": 1.580036520051374e-05, + "loss": 0.9725, "step": 11432 }, { - "epoch": 0.32443246311010215, + "epoch": 0.3239819773867211, "grad_norm": 0.0, - "learning_rate": 1.5787971089825817e-05, - "loss": 0.9929, + "learning_rate": 1.5799617553695958e-05, + "loss": 0.9219, "step": 11433 }, { - "epoch": 0.32446083995459707, + "epoch": 0.3240103148289835, "grad_norm": 0.0, - "learning_rate": 1.5787221583331636e-05, - "loss": 0.9808, + "learning_rate": 1.5798869858025847e-05, + "loss": 0.9271, "step": 11434 }, { - "epoch": 0.32448921679909193, + "epoch": 0.324038652271246, "grad_norm": 0.0, - "learning_rate": 1.5786472027952533e-05, - "loss": 0.8708, + "learning_rate": 1.5798122113509703e-05, + "loss": 0.893, "step": 11435 }, { - "epoch": 0.32451759364358684, + "epoch": 0.32406698971350845, "grad_norm": 0.0, - "learning_rate": 1.5785722423694842e-05, - "loss": 0.8421, + "learning_rate": 1.579737432015382e-05, + "loss": 0.9628, "step": 11436 }, { - "epoch": 0.3245459704880817, + "epoch": 0.32409532715577094, "grad_norm": 0.0, - "learning_rate": 1.5784972770564895e-05, - "loss": 0.9572, + "learning_rate": 1.5796626477964502e-05, + "loss": 1.0487, "step": 11437 }, { - "epoch": 0.3245743473325766, + "epoch": 0.3241236645980334, "grad_norm": 0.0, - "learning_rate": 1.578422306856902e-05, - "loss": 0.9592, + "learning_rate": 1.5795878586948047e-05, + "loss": 0.9521, "step": 11438 }, { - "epoch": 0.32460272417707153, + "epoch": 0.3241520020402958, "grad_norm": 0.0, - "learning_rate": 1.5783473317713553e-05, - "loss": 0.9415, + "learning_rate": 1.5795130647110755e-05, + "loss": 0.9659, "step": 11439 }, { - "epoch": 0.3246311010215664, + "epoch": 0.3241803394825583, "grad_norm": 0.0, - "learning_rate": 1.578272351800483e-05, - "loss": 0.9656, + "learning_rate": 1.5794382658458924e-05, + "loss": 1.0085, "step": 11440 }, { - "epoch": 0.3246594778660613, + "epoch": 0.32420867692482075, "grad_norm": 0.0, - "learning_rate": 1.5781973669449175e-05, - "loss": 0.9197, + "learning_rate": 1.5793634620998858e-05, + "loss": 1.0505, "step": 11441 }, { - "epoch": 0.32468785471055617, + "epoch": 0.32423701436708324, "grad_norm": 0.0, - "learning_rate": 1.5781223772052934e-05, - "loss": 0.7952, + "learning_rate": 1.5792886534736854e-05, + "loss": 0.8635, "step": 11442 }, { - "epoch": 0.3247162315550511, + "epoch": 0.3242653518093457, "grad_norm": 0.0, - "learning_rate": 1.5780473825822434e-05, - "loss": 0.9616, + "learning_rate": 1.5792138399679216e-05, + "loss": 0.9647, "step": 11443 }, { - "epoch": 0.324744608399546, + "epoch": 0.3242936892516082, "grad_norm": 0.0, - "learning_rate": 1.5779723830764013e-05, - "loss": 0.9204, + "learning_rate": 1.5791390215832247e-05, + "loss": 0.911, "step": 11444 }, { - "epoch": 0.32477298524404086, + "epoch": 0.3243220266938706, "grad_norm": 0.0, - "learning_rate": 1.5778973786884005e-05, - "loss": 0.9844, + "learning_rate": 1.5790641983202245e-05, + "loss": 1.0133, "step": 11445 }, { - "epoch": 0.32480136208853577, + "epoch": 0.32435036413613305, "grad_norm": 0.0, - "learning_rate": 1.5778223694188746e-05, - "loss": 0.9984, + "learning_rate": 1.5789893701795515e-05, + "loss": 0.8778, "step": 11446 }, { - "epoch": 0.32482973893303063, + "epoch": 0.32437870157839555, "grad_norm": 0.0, - "learning_rate": 1.577747355268457e-05, - "loss": 0.9263, + "learning_rate": 1.5789145371618366e-05, + "loss": 0.8438, "step": 11447 }, { - "epoch": 0.32485811577752555, + "epoch": 0.324407039020658, "grad_norm": 0.0, - "learning_rate": 1.5776723362377816e-05, - "loss": 0.8805, + "learning_rate": 1.578839699267709e-05, + "loss": 0.9844, "step": 11448 }, { - "epoch": 0.3248864926220204, + "epoch": 0.3244353764629205, "grad_norm": 0.0, - "learning_rate": 1.577597312327482e-05, - "loss": 0.8879, + "learning_rate": 1.5787648564978e-05, + "loss": 0.9881, "step": 11449 }, { - "epoch": 0.3249148694665153, + "epoch": 0.3244637139051829, "grad_norm": 0.0, - "learning_rate": 1.577522283538192e-05, - "loss": 0.9815, + "learning_rate": 1.5786900088527394e-05, + "loss": 0.932, "step": 11450 }, { - "epoch": 0.32494324631101024, + "epoch": 0.32449205134744535, "grad_norm": 0.0, - "learning_rate": 1.577447249870545e-05, - "loss": 0.9393, + "learning_rate": 1.578615156333158e-05, + "loss": 0.8806, "step": 11451 }, { - "epoch": 0.3249716231555051, + "epoch": 0.32452038878970785, "grad_norm": 0.0, - "learning_rate": 1.5773722113251755e-05, - "loss": 0.9838, + "learning_rate": 1.5785402989396867e-05, + "loss": 1.0128, "step": 11452 }, { - "epoch": 0.325, + "epoch": 0.3245487262319703, "grad_norm": 0.0, - "learning_rate": 1.5772971679027168e-05, - "loss": 0.8106, + "learning_rate": 1.5784654366729554e-05, + "loss": 1.0132, "step": 11453 }, { - "epoch": 0.32502837684449487, + "epoch": 0.3245770636742328, "grad_norm": 0.0, - "learning_rate": 1.5772221196038032e-05, - "loss": 0.892, + "learning_rate": 1.5783905695335947e-05, + "loss": 0.9533, "step": 11454 }, { - "epoch": 0.3250567536889898, + "epoch": 0.3246054011164952, "grad_norm": 0.0, - "learning_rate": 1.5771470664290682e-05, - "loss": 0.9358, + "learning_rate": 1.5783156975222356e-05, + "loss": 0.9633, "step": 11455 }, { - "epoch": 0.3250851305334847, + "epoch": 0.3246337385587577, "grad_norm": 0.0, - "learning_rate": 1.5770720083791463e-05, - "loss": 0.951, + "learning_rate": 1.5782408206395087e-05, + "loss": 0.835, "step": 11456 }, { - "epoch": 0.32511350737797956, + "epoch": 0.32466207600102015, "grad_norm": 0.0, - "learning_rate": 1.5769969454546704e-05, - "loss": 0.9392, + "learning_rate": 1.5781659388860445e-05, + "loss": 0.9184, "step": 11457 }, { - "epoch": 0.3251418842224745, + "epoch": 0.3246904134432826, "grad_norm": 0.0, - "learning_rate": 1.576921877656276e-05, - "loss": 0.8944, + "learning_rate": 1.578091052262474e-05, + "loss": 0.8413, "step": 11458 }, { - "epoch": 0.32517026106696933, + "epoch": 0.3247187508855451, "grad_norm": 0.0, - "learning_rate": 1.5768468049845963e-05, - "loss": 1.0092, + "learning_rate": 1.5780161607694276e-05, + "loss": 0.9617, "step": 11459 }, { - "epoch": 0.32519863791146425, + "epoch": 0.3247470883278075, "grad_norm": 0.0, - "learning_rate": 1.5767717274402657e-05, - "loss": 0.8975, + "learning_rate": 1.577941264407537e-05, + "loss": 0.925, "step": 11460 }, { - "epoch": 0.32522701475595917, + "epoch": 0.32477542577007, "grad_norm": 0.0, - "learning_rate": 1.576696645023919e-05, - "loss": 0.941, + "learning_rate": 1.577866363177432e-05, + "loss": 0.9558, "step": 11461 }, { - "epoch": 0.325255391600454, + "epoch": 0.32480376321233245, "grad_norm": 0.0, - "learning_rate": 1.576621557736189e-05, - "loss": 0.8573, + "learning_rate": 1.5777914570797443e-05, + "loss": 0.9075, "step": 11462 }, { - "epoch": 0.32528376844494894, + "epoch": 0.3248321006545949, "grad_norm": 0.0, - "learning_rate": 1.5765464655777114e-05, - "loss": 0.9674, + "learning_rate": 1.5777165461151045e-05, + "loss": 1.0332, "step": 11463 }, { - "epoch": 0.3253121452894438, + "epoch": 0.3248604380968574, "grad_norm": 0.0, - "learning_rate": 1.5764713685491195e-05, - "loss": 1.0109, + "learning_rate": 1.577641630284144e-05, + "loss": 0.9279, "step": 11464 }, { - "epoch": 0.3253405221339387, + "epoch": 0.3248887755391198, "grad_norm": 0.0, - "learning_rate": 1.5763962666510483e-05, - "loss": 1.037, + "learning_rate": 1.5775667095874933e-05, + "loss": 0.9307, "step": 11465 }, { - "epoch": 0.3253688989784336, + "epoch": 0.3249171129813823, "grad_norm": 0.0, - "learning_rate": 1.5763211598841318e-05, - "loss": 0.9707, + "learning_rate": 1.5774917840257836e-05, + "loss": 0.9123, "step": 11466 }, { - "epoch": 0.3253972758229285, + "epoch": 0.32494545042364475, "grad_norm": 0.0, - "learning_rate": 1.5762460482490042e-05, - "loss": 1.0406, + "learning_rate": 1.577416853599646e-05, + "loss": 0.931, "step": 11467 }, { - "epoch": 0.3254256526674234, + "epoch": 0.32497378786590725, "grad_norm": 0.0, - "learning_rate": 1.576170931746301e-05, - "loss": 1.0479, + "learning_rate": 1.5773419183097124e-05, + "loss": 0.9084, "step": 11468 }, { - "epoch": 0.32545402951191826, + "epoch": 0.3250021253081697, "grad_norm": 0.0, - "learning_rate": 1.5760958103766556e-05, - "loss": 0.954, + "learning_rate": 1.577266978156613e-05, + "loss": 0.7945, "step": 11469 }, { - "epoch": 0.3254824063564132, + "epoch": 0.3250304627504321, "grad_norm": 0.0, - "learning_rate": 1.576020684140703e-05, - "loss": 0.9005, + "learning_rate": 1.57719203314098e-05, + "loss": 0.9261, "step": 11470 }, { - "epoch": 0.32551078320090804, + "epoch": 0.3250588001926946, "grad_norm": 0.0, - "learning_rate": 1.5759455530390782e-05, - "loss": 0.9528, + "learning_rate": 1.5771170832634438e-05, + "loss": 0.9098, "step": 11471 }, { - "epoch": 0.32553916004540295, + "epoch": 0.32508713763495706, "grad_norm": 0.0, - "learning_rate": 1.575870417072415e-05, - "loss": 0.9455, + "learning_rate": 1.577042128524636e-05, + "loss": 0.9451, "step": 11472 }, { - "epoch": 0.32556753688989787, + "epoch": 0.32511547507721955, "grad_norm": 0.0, - "learning_rate": 1.5757952762413484e-05, - "loss": 0.8655, + "learning_rate": 1.576967168925188e-05, + "loss": 1.0343, "step": 11473 }, { - "epoch": 0.32559591373439273, + "epoch": 0.325143812519482, "grad_norm": 0.0, - "learning_rate": 1.5757201305465134e-05, - "loss": 0.9849, + "learning_rate": 1.5768922044657316e-05, + "loss": 0.9302, "step": 11474 }, { - "epoch": 0.32562429057888764, + "epoch": 0.32517214996174443, "grad_norm": 0.0, - "learning_rate": 1.5756449799885448e-05, - "loss": 0.9401, + "learning_rate": 1.5768172351468975e-05, + "loss": 0.7831, "step": 11475 }, { - "epoch": 0.3256526674233825, + "epoch": 0.3252004874040069, "grad_norm": 0.0, - "learning_rate": 1.5755698245680768e-05, - "loss": 0.936, + "learning_rate": 1.576742260969318e-05, + "loss": 1.0311, "step": 11476 }, { - "epoch": 0.3256810442678774, + "epoch": 0.32522882484626936, "grad_norm": 0.0, - "learning_rate": 1.575494664285745e-05, - "loss": 0.9796, + "learning_rate": 1.5766672819336243e-05, + "loss": 1.1104, "step": 11477 }, { - "epoch": 0.3257094211123723, + "epoch": 0.32525716228853185, "grad_norm": 0.0, - "learning_rate": 1.5754194991421835e-05, - "loss": 0.8907, + "learning_rate": 1.576592298040448e-05, + "loss": 0.8591, "step": 11478 }, { - "epoch": 0.3257377979568672, + "epoch": 0.3252854997307943, "grad_norm": 0.0, - "learning_rate": 1.575344329138028e-05, - "loss": 0.9365, + "learning_rate": 1.5765173092904202e-05, + "loss": 0.9393, "step": 11479 }, { - "epoch": 0.3257661748013621, + "epoch": 0.3253138371730568, "grad_norm": 0.0, - "learning_rate": 1.575269154273913e-05, - "loss": 0.845, + "learning_rate": 1.5764423156841734e-05, + "loss": 0.9387, "step": 11480 }, { - "epoch": 0.32579455164585697, + "epoch": 0.3253421746153192, "grad_norm": 0.0, - "learning_rate": 1.5751939745504733e-05, - "loss": 1.0008, + "learning_rate": 1.576367317222339e-05, + "loss": 1.0251, "step": 11481 }, { - "epoch": 0.3258229284903519, + "epoch": 0.32537051205758166, "grad_norm": 0.0, - "learning_rate": 1.5751187899683445e-05, - "loss": 1.0467, + "learning_rate": 1.5762923139055485e-05, + "loss": 0.8753, "step": 11482 }, { - "epoch": 0.32585130533484674, + "epoch": 0.32539884949984416, "grad_norm": 0.0, - "learning_rate": 1.5750436005281612e-05, - "loss": 0.9822, + "learning_rate": 1.5762173057344336e-05, + "loss": 0.9954, "step": 11483 }, { - "epoch": 0.32587968217934166, + "epoch": 0.3254271869421066, "grad_norm": 0.0, - "learning_rate": 1.574968406230559e-05, - "loss": 0.9522, + "learning_rate": 1.5761422927096268e-05, + "loss": 0.9656, "step": 11484 }, { - "epoch": 0.32590805902383657, + "epoch": 0.3254555243843691, "grad_norm": 0.0, - "learning_rate": 1.5748932070761727e-05, - "loss": 0.9091, + "learning_rate": 1.5760672748317593e-05, + "loss": 1.0256, "step": 11485 }, { - "epoch": 0.32593643586833143, + "epoch": 0.3254838618266315, "grad_norm": 0.0, - "learning_rate": 1.5748180030656377e-05, - "loss": 0.941, + "learning_rate": 1.5759922521014633e-05, + "loss": 0.9672, "step": 11486 }, { - "epoch": 0.32596481271282635, + "epoch": 0.32551219926889396, "grad_norm": 0.0, - "learning_rate": 1.5747427941995893e-05, - "loss": 0.9206, + "learning_rate": 1.5759172245193704e-05, + "loss": 0.9334, "step": 11487 }, { - "epoch": 0.3259931895573212, + "epoch": 0.32554053671115646, "grad_norm": 0.0, - "learning_rate": 1.5746675804786625e-05, - "loss": 0.9796, + "learning_rate": 1.575842192086113e-05, + "loss": 0.9714, "step": 11488 }, { - "epoch": 0.3260215664018161, + "epoch": 0.3255688741534189, "grad_norm": 0.0, - "learning_rate": 1.5745923619034928e-05, - "loss": 0.9875, + "learning_rate": 1.575767154802323e-05, + "loss": 0.8259, "step": 11489 }, { - "epoch": 0.32604994324631104, + "epoch": 0.3255972115956814, "grad_norm": 0.0, - "learning_rate": 1.574517138474716e-05, - "loss": 0.8385, + "learning_rate": 1.575692112668633e-05, + "loss": 0.9045, "step": 11490 }, { - "epoch": 0.3260783200908059, + "epoch": 0.32562554903794383, "grad_norm": 0.0, - "learning_rate": 1.5744419101929665e-05, - "loss": 0.9192, + "learning_rate": 1.575617065685674e-05, + "loss": 0.9696, "step": 11491 }, { - "epoch": 0.3261066969353008, + "epoch": 0.3256538864802063, "grad_norm": 0.0, - "learning_rate": 1.5743666770588807e-05, - "loss": 1.1035, + "learning_rate": 1.5755420138540783e-05, + "loss": 0.8831, "step": 11492 }, { - "epoch": 0.32613507377979567, + "epoch": 0.32568222392246876, "grad_norm": 0.0, - "learning_rate": 1.5742914390730938e-05, - "loss": 1.0124, + "learning_rate": 1.5754669571744792e-05, + "loss": 1.1196, "step": 11493 }, { - "epoch": 0.3261634506242906, + "epoch": 0.3257105613647312, "grad_norm": 0.0, - "learning_rate": 1.574216196236241e-05, - "loss": 0.9682, + "learning_rate": 1.575391895647508e-05, + "loss": 0.9387, "step": 11494 }, { - "epoch": 0.32619182746878544, + "epoch": 0.3257388988069937, "grad_norm": 0.0, - "learning_rate": 1.5741409485489584e-05, - "loss": 1.0385, + "learning_rate": 1.5753168292737974e-05, + "loss": 0.9527, "step": 11495 }, { - "epoch": 0.32622020431328036, + "epoch": 0.32576723624925613, "grad_norm": 0.0, - "learning_rate": 1.574065696011881e-05, - "loss": 0.992, + "learning_rate": 1.575241758053979e-05, + "loss": 0.9836, "step": 11496 }, { - "epoch": 0.3262485811577753, + "epoch": 0.3257955736915186, "grad_norm": 0.0, - "learning_rate": 1.5739904386256456e-05, - "loss": 1.0016, + "learning_rate": 1.575166681988686e-05, + "loss": 1.0623, "step": 11497 }, { - "epoch": 0.32627695800227013, + "epoch": 0.32582391113378106, "grad_norm": 0.0, - "learning_rate": 1.5739151763908867e-05, - "loss": 0.9821, + "learning_rate": 1.5750916010785503e-05, + "loss": 0.9546, "step": 11498 }, { - "epoch": 0.32630533484676505, + "epoch": 0.3258522485760435, "grad_norm": 0.0, - "learning_rate": 1.5738399093082404e-05, - "loss": 1.0428, + "learning_rate": 1.5750165153242048e-05, + "loss": 0.9708, "step": 11499 }, { - "epoch": 0.3263337116912599, + "epoch": 0.325880586018306, "grad_norm": 0.0, - "learning_rate": 1.573764637378343e-05, - "loss": 0.8846, + "learning_rate": 1.5749414247262812e-05, + "loss": 1.0635, "step": 11500 }, { - "epoch": 0.3263620885357548, + "epoch": 0.32590892346056843, "grad_norm": 0.0, - "learning_rate": 1.5736893606018297e-05, - "loss": 0.9968, + "learning_rate": 1.5748663292854126e-05, + "loss": 0.953, "step": 11501 }, { - "epoch": 0.32639046538024974, + "epoch": 0.3259372609028309, "grad_norm": 0.0, - "learning_rate": 1.5736140789793367e-05, - "loss": 0.9337, + "learning_rate": 1.5747912290022318e-05, + "loss": 0.8914, "step": 11502 }, { - "epoch": 0.3264188422247446, + "epoch": 0.32596559834509337, "grad_norm": 0.0, - "learning_rate": 1.5735387925114992e-05, - "loss": 0.7919, + "learning_rate": 1.5747161238773706e-05, + "loss": 0.968, "step": 11503 }, { - "epoch": 0.3264472190692395, + "epoch": 0.32599393578735586, "grad_norm": 0.0, - "learning_rate": 1.5734635011989544e-05, - "loss": 0.8554, + "learning_rate": 1.5746410139114624e-05, + "loss": 1.019, "step": 11504 }, { - "epoch": 0.3264755959137344, + "epoch": 0.3260222732296183, "grad_norm": 0.0, - "learning_rate": 1.5733882050423376e-05, - "loss": 0.9105, + "learning_rate": 1.5745658991051397e-05, + "loss": 0.9738, "step": 11505 }, { - "epoch": 0.3265039727582293, + "epoch": 0.32605061067188074, "grad_norm": 0.0, - "learning_rate": 1.5733129040422844e-05, - "loss": 0.9231, + "learning_rate": 1.5744907794590347e-05, + "loss": 0.8863, "step": 11506 }, { - "epoch": 0.3265323496027242, + "epoch": 0.32607894811414323, "grad_norm": 0.0, - "learning_rate": 1.573237598199432e-05, - "loss": 0.8616, + "learning_rate": 1.5744156549737807e-05, + "loss": 1.0319, "step": 11507 }, { - "epoch": 0.32656072644721906, + "epoch": 0.32610728555640567, "grad_norm": 0.0, - "learning_rate": 1.5731622875144155e-05, - "loss": 0.961, + "learning_rate": 1.5743405256500102e-05, + "loss": 0.9209, "step": 11508 }, { - "epoch": 0.326589103291714, + "epoch": 0.32613562299866816, "grad_norm": 0.0, - "learning_rate": 1.5730869719878713e-05, - "loss": 1.004, + "learning_rate": 1.574265391488356e-05, + "loss": 0.9839, "step": 11509 }, { - "epoch": 0.32661748013620884, + "epoch": 0.3261639604409306, "grad_norm": 0.0, - "learning_rate": 1.5730116516204356e-05, - "loss": 0.903, + "learning_rate": 1.5741902524894514e-05, + "loss": 1.0125, "step": 11510 }, { - "epoch": 0.32664585698070375, + "epoch": 0.32619229788319304, "grad_norm": 0.0, - "learning_rate": 1.572936326412745e-05, - "loss": 1.0392, + "learning_rate": 1.5741151086539293e-05, + "loss": 0.9791, "step": 11511 }, { - "epoch": 0.3266742338251986, + "epoch": 0.32622063532545553, "grad_norm": 0.0, - "learning_rate": 1.572860996365435e-05, - "loss": 0.9652, + "learning_rate": 1.574039959982422e-05, + "loss": 0.9065, "step": 11512 }, { - "epoch": 0.3267026106696935, + "epoch": 0.32624897276771797, "grad_norm": 0.0, - "learning_rate": 1.572785661479143e-05, - "loss": 0.926, + "learning_rate": 1.5739648064755634e-05, + "loss": 0.9528, "step": 11513 }, { - "epoch": 0.32673098751418844, + "epoch": 0.32627731020998046, "grad_norm": 0.0, - "learning_rate": 1.5727103217545043e-05, - "loss": 0.9806, + "learning_rate": 1.5738896481339857e-05, + "loss": 0.9144, "step": 11514 }, { - "epoch": 0.3267593643586833, + "epoch": 0.3263056476522429, "grad_norm": 0.0, - "learning_rate": 1.572634977192156e-05, - "loss": 0.8633, + "learning_rate": 1.573814484958323e-05, + "loss": 0.9912, "step": 11515 }, { - "epoch": 0.3267877412031782, + "epoch": 0.3263339850945054, "grad_norm": 0.0, - "learning_rate": 1.5725596277927342e-05, - "loss": 0.9585, + "learning_rate": 1.5737393169492072e-05, + "loss": 0.9401, "step": 11516 }, { - "epoch": 0.3268161180476731, + "epoch": 0.32636232253676783, "grad_norm": 0.0, - "learning_rate": 1.572484273556876e-05, - "loss": 0.9529, + "learning_rate": 1.5736641441072722e-05, + "loss": 0.9027, "step": 11517 }, { - "epoch": 0.326844494892168, + "epoch": 0.3263906599790303, "grad_norm": 0.0, - "learning_rate": 1.572408914485217e-05, - "loss": 0.975, + "learning_rate": 1.573588966433151e-05, + "loss": 0.8992, "step": 11518 }, { - "epoch": 0.3268728717366629, + "epoch": 0.32641899742129277, "grad_norm": 0.0, - "learning_rate": 1.572333550578394e-05, - "loss": 0.9051, + "learning_rate": 1.5735137839274775e-05, + "loss": 0.944, "step": 11519 }, { - "epoch": 0.32690124858115777, + "epoch": 0.3264473348635552, "grad_norm": 0.0, - "learning_rate": 1.572258181837044e-05, - "loss": 0.9786, + "learning_rate": 1.573438596590884e-05, + "loss": 1.0525, "step": 11520 }, { - "epoch": 0.3269296254256527, + "epoch": 0.3264756723058177, "grad_norm": 0.0, - "learning_rate": 1.572182808261803e-05, - "loss": 1.0157, + "learning_rate": 1.573363404424004e-05, + "loss": 0.9038, "step": 11521 }, { - "epoch": 0.32695800227014754, + "epoch": 0.32650400974808014, "grad_norm": 0.0, - "learning_rate": 1.5721074298533086e-05, - "loss": 0.9024, + "learning_rate": 1.5732882074274717e-05, + "loss": 0.9935, "step": 11522 }, { - "epoch": 0.32698637911464246, + "epoch": 0.3265323471903426, "grad_norm": 0.0, - "learning_rate": 1.572032046612197e-05, - "loss": 0.9291, + "learning_rate": 1.5732130056019195e-05, + "loss": 0.9523, "step": 11523 }, { - "epoch": 0.32701475595913737, + "epoch": 0.32656068463260507, "grad_norm": 0.0, - "learning_rate": 1.5719566585391048e-05, - "loss": 0.745, + "learning_rate": 1.5731377989479813e-05, + "loss": 0.9049, "step": 11524 }, { - "epoch": 0.32704313280363223, + "epoch": 0.3265890220748675, "grad_norm": 0.0, - "learning_rate": 1.571881265634669e-05, - "loss": 0.9517, + "learning_rate": 1.5730625874662908e-05, + "loss": 0.954, "step": 11525 }, { - "epoch": 0.32707150964812715, + "epoch": 0.32661735951713, "grad_norm": 0.0, - "learning_rate": 1.5718058678995264e-05, - "loss": 0.9819, + "learning_rate": 1.572987371157481e-05, + "loss": 0.9388, "step": 11526 }, { - "epoch": 0.327099886492622, + "epoch": 0.32664569695939244, "grad_norm": 0.0, - "learning_rate": 1.571730465334314e-05, - "loss": 0.9728, + "learning_rate": 1.5729121500221865e-05, + "loss": 0.7944, "step": 11527 }, { - "epoch": 0.3271282633371169, + "epoch": 0.32667403440165493, "grad_norm": 0.0, - "learning_rate": 1.5716550579396687e-05, - "loss": 0.9045, + "learning_rate": 1.5728369240610397e-05, + "loss": 0.9947, "step": 11528 }, { - "epoch": 0.3271566401816118, + "epoch": 0.32670237184391737, "grad_norm": 0.0, - "learning_rate": 1.5715796457162274e-05, - "loss": 0.9994, + "learning_rate": 1.5727616932746748e-05, + "loss": 0.8833, "step": 11529 }, { - "epoch": 0.3271850170261067, + "epoch": 0.3267307092861798, "grad_norm": 0.0, - "learning_rate": 1.5715042286646272e-05, - "loss": 0.942, + "learning_rate": 1.5726864576637254e-05, + "loss": 0.8772, "step": 11530 }, { - "epoch": 0.3272133938706016, + "epoch": 0.3267590467284423, "grad_norm": 0.0, - "learning_rate": 1.571428806785505e-05, - "loss": 0.9418, + "learning_rate": 1.5726112172288254e-05, + "loss": 0.8702, "step": 11531 }, { - "epoch": 0.32724177071509647, + "epoch": 0.32678738417070474, "grad_norm": 0.0, - "learning_rate": 1.571353380079498e-05, - "loss": 0.9395, + "learning_rate": 1.572535971970609e-05, + "loss": 1.0116, "step": 11532 }, { - "epoch": 0.3272701475595914, + "epoch": 0.32681572161296724, "grad_norm": 0.0, - "learning_rate": 1.571277948547243e-05, - "loss": 0.8811, + "learning_rate": 1.5724607218897086e-05, + "loss": 0.962, "step": 11533 }, { - "epoch": 0.32729852440408624, + "epoch": 0.3268440590552297, "grad_norm": 0.0, - "learning_rate": 1.571202512189378e-05, - "loss": 1.0225, + "learning_rate": 1.5723854669867595e-05, + "loss": 0.9404, "step": 11534 }, { - "epoch": 0.32732690124858116, + "epoch": 0.3268723964974921, "grad_norm": 0.0, - "learning_rate": 1.5711270710065397e-05, - "loss": 0.9222, + "learning_rate": 1.572310207262395e-05, + "loss": 1.149, "step": 11535 }, { - "epoch": 0.3273552780930761, + "epoch": 0.3269007339397546, "grad_norm": 0.0, - "learning_rate": 1.571051624999365e-05, - "loss": 0.9703, + "learning_rate": 1.572234942717249e-05, + "loss": 1.0229, "step": 11536 }, { - "epoch": 0.32738365493757093, + "epoch": 0.32692907138201704, "grad_norm": 0.0, - "learning_rate": 1.570976174168492e-05, - "loss": 1.022, + "learning_rate": 1.5721596733519556e-05, + "loss": 1.0388, "step": 11537 }, { - "epoch": 0.32741203178206585, + "epoch": 0.32695740882427954, "grad_norm": 0.0, - "learning_rate": 1.570900718514557e-05, - "loss": 0.9508, + "learning_rate": 1.5720843991671485e-05, + "loss": 0.9772, "step": 11538 }, { - "epoch": 0.3274404086265607, + "epoch": 0.326985746266542, "grad_norm": 0.0, - "learning_rate": 1.5708252580381983e-05, - "loss": 0.9822, + "learning_rate": 1.572009120163463e-05, + "loss": 1.0024, "step": 11539 }, { - "epoch": 0.3274687854710556, + "epoch": 0.32701408370880447, "grad_norm": 0.0, - "learning_rate": 1.570749792740053e-05, - "loss": 0.945, + "learning_rate": 1.5719338363415313e-05, + "loss": 0.944, "step": 11540 }, { - "epoch": 0.32749716231555054, + "epoch": 0.3270424211510669, "grad_norm": 0.0, - "learning_rate": 1.5706743226207586e-05, - "loss": 0.923, + "learning_rate": 1.571858547701989e-05, + "loss": 0.9091, "step": 11541 }, { - "epoch": 0.3275255391600454, + "epoch": 0.32707075859332935, "grad_norm": 0.0, - "learning_rate": 1.5705988476809522e-05, - "loss": 0.9183, + "learning_rate": 1.5717832542454697e-05, + "loss": 0.8199, "step": 11542 }, { - "epoch": 0.3275539160045403, + "epoch": 0.32709909603559184, "grad_norm": 0.0, - "learning_rate": 1.570523367921272e-05, - "loss": 0.9128, + "learning_rate": 1.5717079559726075e-05, + "loss": 0.8667, "step": 11543 }, { - "epoch": 0.3275822928490352, + "epoch": 0.3271274334778543, "grad_norm": 0.0, - "learning_rate": 1.5704478833423552e-05, - "loss": 1.0389, + "learning_rate": 1.5716326528840374e-05, + "loss": 0.9541, "step": 11544 }, { - "epoch": 0.3276106696935301, + "epoch": 0.3271557709201168, "grad_norm": 0.0, - "learning_rate": 1.5703723939448395e-05, - "loss": 0.9803, + "learning_rate": 1.5715573449803926e-05, + "loss": 0.9138, "step": 11545 }, { - "epoch": 0.32763904653802495, + "epoch": 0.3271841083623792, "grad_norm": 0.0, - "learning_rate": 1.5702968997293625e-05, - "loss": 1.0346, + "learning_rate": 1.5714820322623085e-05, + "loss": 0.9151, "step": 11546 }, { - "epoch": 0.32766742338251986, + "epoch": 0.32721244580464165, "grad_norm": 0.0, - "learning_rate": 1.570221400696562e-05, - "loss": 0.8712, + "learning_rate": 1.5714067147304185e-05, + "loss": 1.0722, "step": 11547 }, { - "epoch": 0.3276958002270148, + "epoch": 0.32724078324690414, "grad_norm": 0.0, - "learning_rate": 1.570145896847076e-05, - "loss": 0.9493, + "learning_rate": 1.5713313923853584e-05, + "loss": 0.9085, "step": 11548 }, { - "epoch": 0.32772417707150964, + "epoch": 0.3272691206891666, "grad_norm": 0.0, - "learning_rate": 1.5700703881815415e-05, - "loss": 0.8597, + "learning_rate": 1.571256065227761e-05, + "loss": 1.0007, "step": 11549 }, { - "epoch": 0.32775255391600455, + "epoch": 0.3272974581314291, "grad_norm": 0.0, - "learning_rate": 1.569994874700597e-05, - "loss": 1.0652, + "learning_rate": 1.5711807332582618e-05, + "loss": 0.9613, "step": 11550 }, { - "epoch": 0.3277809307604994, + "epoch": 0.3273257955736915, "grad_norm": 0.0, - "learning_rate": 1.56991935640488e-05, - "loss": 0.9891, + "learning_rate": 1.5711053964774956e-05, + "loss": 0.932, "step": 11551 }, { - "epoch": 0.3278093076049943, + "epoch": 0.327354133015954, "grad_norm": 0.0, - "learning_rate": 1.5698438332950287e-05, - "loss": 0.978, + "learning_rate": 1.5710300548860962e-05, + "loss": 0.8883, "step": 11552 }, { - "epoch": 0.32783768444948924, + "epoch": 0.32738247045821645, "grad_norm": 0.0, - "learning_rate": 1.569768305371681e-05, - "loss": 0.9812, + "learning_rate": 1.570954708484699e-05, + "loss": 0.8514, "step": 11553 }, { - "epoch": 0.3278660612939841, + "epoch": 0.3274108079004789, "grad_norm": 0.0, - "learning_rate": 1.5696927726354746e-05, - "loss": 0.9289, + "learning_rate": 1.5708793572739378e-05, + "loss": 0.9237, "step": 11554 }, { - "epoch": 0.327894438138479, + "epoch": 0.3274391453427414, "grad_norm": 0.0, - "learning_rate": 1.5696172350870484e-05, - "loss": 0.8001, + "learning_rate": 1.5708040012544482e-05, + "loss": 0.9717, "step": 11555 }, { - "epoch": 0.3279228149829739, + "epoch": 0.3274674827850038, "grad_norm": 0.0, - "learning_rate": 1.569541692727039e-05, - "loss": 0.9044, + "learning_rate": 1.570728640426864e-05, + "loss": 0.9424, "step": 11556 }, { - "epoch": 0.3279511918274688, + "epoch": 0.3274958202272663, "grad_norm": 0.0, - "learning_rate": 1.5694661455560852e-05, - "loss": 0.9847, + "learning_rate": 1.570653274791821e-05, + "loss": 0.8959, "step": 11557 }, { - "epoch": 0.32797956867196365, + "epoch": 0.32752415766952875, "grad_norm": 0.0, - "learning_rate": 1.569390593574826e-05, - "loss": 0.9704, + "learning_rate": 1.5705779043499533e-05, + "loss": 0.9045, "step": 11558 }, { - "epoch": 0.32800794551645857, + "epoch": 0.3275524951117912, "grad_norm": 0.0, - "learning_rate": 1.5693150367838986e-05, - "loss": 0.9562, + "learning_rate": 1.570502529101896e-05, + "loss": 0.8912, "step": 11559 }, { - "epoch": 0.3280363223609535, + "epoch": 0.3275808325540537, "grad_norm": 0.0, - "learning_rate": 1.5692394751839413e-05, - "loss": 1.0901, + "learning_rate": 1.5704271490482843e-05, + "loss": 0.988, "step": 11560 }, { - "epoch": 0.32806469920544834, + "epoch": 0.3276091699963161, "grad_norm": 0.0, - "learning_rate": 1.5691639087755928e-05, - "loss": 0.8848, + "learning_rate": 1.570351764189753e-05, + "loss": 0.9578, "step": 11561 }, { - "epoch": 0.32809307604994326, + "epoch": 0.3276375074385786, "grad_norm": 0.0, - "learning_rate": 1.569088337559491e-05, - "loss": 0.8735, + "learning_rate": 1.5702763745269366e-05, + "loss": 0.9627, "step": 11562 }, { - "epoch": 0.3281214528944381, + "epoch": 0.32766584488084105, "grad_norm": 0.0, - "learning_rate": 1.5690127615362746e-05, - "loss": 0.9195, + "learning_rate": 1.5702009800604704e-05, + "loss": 1.0567, "step": 11563 }, { - "epoch": 0.32814982973893303, + "epoch": 0.3276941823231035, "grad_norm": 0.0, - "learning_rate": 1.5689371807065816e-05, - "loss": 0.9684, + "learning_rate": 1.57012558079099e-05, + "loss": 1.0552, "step": 11564 }, { - "epoch": 0.32817820658342794, + "epoch": 0.327722519765366, "grad_norm": 0.0, - "learning_rate": 1.5688615950710507e-05, - "loss": 1.0361, + "learning_rate": 1.57005017671913e-05, + "loss": 0.9425, "step": 11565 }, { - "epoch": 0.3282065834279228, + "epoch": 0.3277508572076284, "grad_norm": 0.0, - "learning_rate": 1.5687860046303205e-05, - "loss": 0.9762, + "learning_rate": 1.5699747678455258e-05, + "loss": 0.7641, "step": 11566 }, { - "epoch": 0.3282349602724177, + "epoch": 0.3277791946498909, "grad_norm": 0.0, - "learning_rate": 1.5687104093850294e-05, - "loss": 0.8493, + "learning_rate": 1.569899354170812e-05, + "loss": 0.9279, "step": 11567 }, { - "epoch": 0.3282633371169126, + "epoch": 0.32780753209215335, "grad_norm": 0.0, - "learning_rate": 1.5686348093358156e-05, - "loss": 0.9075, + "learning_rate": 1.569823935695625e-05, + "loss": 0.9436, "step": 11568 }, { - "epoch": 0.3282917139614075, + "epoch": 0.32783586953441585, "grad_norm": 0.0, - "learning_rate": 1.5685592044833185e-05, - "loss": 0.9546, + "learning_rate": 1.569748512420599e-05, + "loss": 0.9999, "step": 11569 }, { - "epoch": 0.3283200908059024, + "epoch": 0.3278642069766783, "grad_norm": 0.0, - "learning_rate": 1.568483594828176e-05, - "loss": 0.9124, + "learning_rate": 1.56967308434637e-05, + "loss": 0.8902, "step": 11570 }, { - "epoch": 0.32834846765039727, + "epoch": 0.3278925444189407, "grad_norm": 0.0, - "learning_rate": 1.568407980371027e-05, - "loss": 1.0675, + "learning_rate": 1.569597651473573e-05, + "loss": 1.0871, "step": 11571 }, { - "epoch": 0.3283768444948922, + "epoch": 0.3279208818612032, "grad_norm": 0.0, - "learning_rate": 1.5683323611125095e-05, - "loss": 0.9722, + "learning_rate": 1.5695222138028432e-05, + "loss": 0.8211, "step": 11572 }, { - "epoch": 0.32840522133938704, + "epoch": 0.32794921930346566, "grad_norm": 0.0, - "learning_rate": 1.568256737053264e-05, - "loss": 1.0499, + "learning_rate": 1.5694467713348163e-05, + "loss": 0.8113, "step": 11573 }, { - "epoch": 0.32843359818388196, + "epoch": 0.32797755674572815, "grad_norm": 0.0, - "learning_rate": 1.568181108193928e-05, - "loss": 0.9059, + "learning_rate": 1.569371324070128e-05, + "loss": 0.9465, "step": 11574 }, { - "epoch": 0.3284619750283768, + "epoch": 0.3280058941879906, "grad_norm": 0.0, - "learning_rate": 1.5681054745351408e-05, - "loss": 0.8611, + "learning_rate": 1.5692958720094136e-05, + "loss": 1.0049, "step": 11575 }, { - "epoch": 0.32849035187287173, + "epoch": 0.328034231630253, "grad_norm": 0.0, - "learning_rate": 1.568029836077541e-05, - "loss": 0.958, + "learning_rate": 1.5692204151533083e-05, + "loss": 0.9267, "step": 11576 }, { - "epoch": 0.32851872871736665, + "epoch": 0.3280625690725155, "grad_norm": 0.0, - "learning_rate": 1.5679541928217674e-05, - "loss": 0.92, + "learning_rate": 1.5691449535024486e-05, + "loss": 0.8756, "step": 11577 }, { - "epoch": 0.3285471055618615, + "epoch": 0.32809090651477796, "grad_norm": 0.0, - "learning_rate": 1.5678785447684592e-05, - "loss": 1.0367, + "learning_rate": 1.5690694870574696e-05, + "loss": 0.9184, "step": 11578 }, { - "epoch": 0.3285754824063564, + "epoch": 0.32811924395704045, "grad_norm": 0.0, - "learning_rate": 1.5678028919182558e-05, - "loss": 0.8104, + "learning_rate": 1.5689940158190064e-05, + "loss": 1.0316, "step": 11579 }, { - "epoch": 0.3286038592508513, + "epoch": 0.3281475813993029, "grad_norm": 0.0, - "learning_rate": 1.5677272342717957e-05, - "loss": 0.8755, + "learning_rate": 1.5689185397876957e-05, + "loss": 1.0147, "step": 11580 }, { - "epoch": 0.3286322360953462, + "epoch": 0.3281759188415654, "grad_norm": 0.0, - "learning_rate": 1.567651571829718e-05, - "loss": 1.0068, + "learning_rate": 1.568843058964173e-05, + "loss": 0.9449, "step": 11581 }, { - "epoch": 0.3286606129398411, + "epoch": 0.3282042562838278, "grad_norm": 0.0, - "learning_rate": 1.567575904592662e-05, - "loss": 0.9486, + "learning_rate": 1.5687675733490736e-05, + "loss": 0.9116, "step": 11582 }, { - "epoch": 0.32868898978433597, + "epoch": 0.32823259372609026, "grad_norm": 0.0, - "learning_rate": 1.5675002325612665e-05, - "loss": 0.8382, + "learning_rate": 1.5686920829430337e-05, + "loss": 0.9731, "step": 11583 }, { - "epoch": 0.3287173666288309, + "epoch": 0.32826093116835275, "grad_norm": 0.0, - "learning_rate": 1.5674245557361716e-05, - "loss": 0.9942, + "learning_rate": 1.5686165877466895e-05, + "loss": 0.9861, "step": 11584 }, { - "epoch": 0.32874574347332575, + "epoch": 0.3282892686106152, "grad_norm": 0.0, - "learning_rate": 1.5673488741180157e-05, - "loss": 0.9785, + "learning_rate": 1.568541087760677e-05, + "loss": 0.9166, "step": 11585 }, { - "epoch": 0.32877412031782066, + "epoch": 0.3283176060528777, "grad_norm": 0.0, - "learning_rate": 1.567273187707438e-05, - "loss": 0.9173, + "learning_rate": 1.568465582985631e-05, + "loss": 0.9305, "step": 11586 }, { - "epoch": 0.3288024971623156, + "epoch": 0.3283459434951401, "grad_norm": 0.0, - "learning_rate": 1.5671974965050787e-05, - "loss": 0.9202, + "learning_rate": 1.5683900734221888e-05, + "loss": 0.8619, "step": 11587 }, { - "epoch": 0.32883087400681044, + "epoch": 0.32837428093740256, "grad_norm": 0.0, - "learning_rate": 1.5671218005115767e-05, - "loss": 1.0105, + "learning_rate": 1.568314559070986e-05, + "loss": 0.954, "step": 11588 }, { - "epoch": 0.32885925085130535, + "epoch": 0.32840261837966506, "grad_norm": 0.0, - "learning_rate": 1.567046099727571e-05, - "loss": 1.0457, + "learning_rate": 1.5682390399326585e-05, + "loss": 0.9384, "step": 11589 }, { - "epoch": 0.3288876276958002, + "epoch": 0.3284309558219275, "grad_norm": 0.0, - "learning_rate": 1.5669703941537018e-05, - "loss": 0.9044, + "learning_rate": 1.5681635160078427e-05, + "loss": 0.9033, "step": 11590 }, { - "epoch": 0.3289160045402951, + "epoch": 0.32845929326419, "grad_norm": 0.0, - "learning_rate": 1.5668946837906077e-05, - "loss": 1.0013, + "learning_rate": 1.5680879872971742e-05, + "loss": 0.9017, "step": 11591 }, { - "epoch": 0.32894438138479, + "epoch": 0.3284876307064524, "grad_norm": 0.0, - "learning_rate": 1.5668189686389293e-05, - "loss": 0.957, + "learning_rate": 1.56801245380129e-05, + "loss": 0.9702, "step": 11592 }, { - "epoch": 0.3289727582292849, + "epoch": 0.3285159681487149, "grad_norm": 0.0, - "learning_rate": 1.5667432486993054e-05, - "loss": 0.8899, + "learning_rate": 1.5679369155208257e-05, + "loss": 0.9354, "step": 11593 }, { - "epoch": 0.3290011350737798, + "epoch": 0.32854430559097736, "grad_norm": 0.0, - "learning_rate": 1.5666675239723757e-05, - "loss": 0.9631, + "learning_rate": 1.567861372456418e-05, + "loss": 0.9807, "step": 11594 }, { - "epoch": 0.3290295119182747, + "epoch": 0.3285726430332398, "grad_norm": 0.0, - "learning_rate": 1.5665917944587797e-05, - "loss": 0.9833, + "learning_rate": 1.5677858246087028e-05, + "loss": 0.9123, "step": 11595 }, { - "epoch": 0.3290578887627696, + "epoch": 0.3286009804755023, "grad_norm": 0.0, - "learning_rate": 1.5665160601591576e-05, - "loss": 1.0098, + "learning_rate": 1.5677102719783172e-05, + "loss": 0.9368, "step": 11596 }, { - "epoch": 0.32908626560726445, + "epoch": 0.32862931791776473, "grad_norm": 0.0, - "learning_rate": 1.566440321074149e-05, - "loss": 0.9854, + "learning_rate": 1.5676347145658973e-05, + "loss": 1.0012, "step": 11597 }, { - "epoch": 0.32911464245175936, + "epoch": 0.3286576553600272, "grad_norm": 0.0, - "learning_rate": 1.5663645772043935e-05, - "loss": 0.8593, + "learning_rate": 1.567559152372079e-05, + "loss": 0.9102, "step": 11598 }, { - "epoch": 0.3291430192962543, + "epoch": 0.32868599280228966, "grad_norm": 0.0, - "learning_rate": 1.5662888285505305e-05, - "loss": 1.0611, + "learning_rate": 1.5674835853974992e-05, + "loss": 0.9845, "step": 11599 }, { - "epoch": 0.32917139614074914, + "epoch": 0.3287143302445521, "grad_norm": 0.0, - "learning_rate": 1.5662130751132008e-05, - "loss": 0.9971, + "learning_rate": 1.5674080136427946e-05, + "loss": 0.8449, "step": 11600 }, { - "epoch": 0.32919977298524405, + "epoch": 0.3287426676868146, "grad_norm": 0.0, - "learning_rate": 1.5661373168930437e-05, - "loss": 1.0654, + "learning_rate": 1.567332437108602e-05, + "loss": 0.9689, "step": 11601 }, { - "epoch": 0.3292281498297389, + "epoch": 0.32877100512907703, "grad_norm": 0.0, - "learning_rate": 1.566061553890699e-05, - "loss": 0.812, + "learning_rate": 1.5672568557955567e-05, + "loss": 0.9358, "step": 11602 }, { - "epoch": 0.32925652667423383, + "epoch": 0.3287993425713395, "grad_norm": 0.0, - "learning_rate": 1.5659857861068074e-05, - "loss": 0.909, + "learning_rate": 1.567181269704297e-05, + "loss": 1.0231, "step": 11603 }, { - "epoch": 0.32928490351872874, + "epoch": 0.32882768001360196, "grad_norm": 0.0, - "learning_rate": 1.5659100135420077e-05, - "loss": 0.9579, + "learning_rate": 1.5671056788354583e-05, + "loss": 1.0052, "step": 11604 }, { - "epoch": 0.3293132803632236, + "epoch": 0.32885601745586446, "grad_norm": 0.0, - "learning_rate": 1.5658342361969413e-05, - "loss": 1.0282, + "learning_rate": 1.567030083189678e-05, + "loss": 0.9225, "step": 11605 }, { - "epoch": 0.3293416572077185, + "epoch": 0.3288843548981269, "grad_norm": 0.0, - "learning_rate": 1.5657584540722476e-05, - "loss": 0.8632, + "learning_rate": 1.5669544827675934e-05, + "loss": 1.039, "step": 11606 }, { - "epoch": 0.3293700340522134, + "epoch": 0.32891269234038933, "grad_norm": 0.0, - "learning_rate": 1.5656826671685665e-05, - "loss": 0.9324, + "learning_rate": 1.56687887756984e-05, + "loss": 0.9901, "step": 11607 }, { - "epoch": 0.3293984108967083, + "epoch": 0.32894102978265183, "grad_norm": 0.0, - "learning_rate": 1.5656068754865388e-05, - "loss": 0.9984, + "learning_rate": 1.5668032675970555e-05, + "loss": 0.8873, "step": 11608 }, { - "epoch": 0.32942678774120315, + "epoch": 0.32896936722491427, "grad_norm": 0.0, - "learning_rate": 1.5655310790268045e-05, - "loss": 1.0369, + "learning_rate": 1.5667276528498766e-05, + "loss": 0.8644, "step": 11609 }, { - "epoch": 0.32945516458569807, + "epoch": 0.32899770466717676, "grad_norm": 0.0, - "learning_rate": 1.5654552777900032e-05, - "loss": 0.8265, + "learning_rate": 1.56665203332894e-05, + "loss": 0.9015, "step": 11610 }, { - "epoch": 0.329483541430193, + "epoch": 0.3290260421094392, "grad_norm": 0.0, - "learning_rate": 1.565379471776776e-05, - "loss": 1.0623, + "learning_rate": 1.5665764090348834e-05, + "loss": 0.8841, "step": 11611 }, { - "epoch": 0.32951191827468784, + "epoch": 0.32905437955170164, "grad_norm": 0.0, - "learning_rate": 1.5653036609877633e-05, - "loss": 0.8454, + "learning_rate": 1.566500779968343e-05, + "loss": 0.937, "step": 11612 }, { - "epoch": 0.32954029511918276, + "epoch": 0.32908271699396413, "grad_norm": 0.0, - "learning_rate": 1.5652278454236047e-05, - "loss": 1.0547, + "learning_rate": 1.566425146129956e-05, + "loss": 1.0855, "step": 11613 }, { - "epoch": 0.3295686719636776, + "epoch": 0.32911105443622657, "grad_norm": 0.0, - "learning_rate": 1.5651520250849413e-05, - "loss": 1.0972, + "learning_rate": 1.56634950752036e-05, + "loss": 0.9675, "step": 11614 }, { - "epoch": 0.32959704880817253, + "epoch": 0.32913939187848906, "grad_norm": 0.0, - "learning_rate": 1.5650761999724134e-05, - "loss": 0.872, + "learning_rate": 1.5662738641401915e-05, + "loss": 0.9912, "step": 11615 }, { - "epoch": 0.32962542565266745, + "epoch": 0.3291677293207515, "grad_norm": 0.0, - "learning_rate": 1.5650003700866613e-05, - "loss": 1.0136, + "learning_rate": 1.5661982159900882e-05, + "loss": 0.9639, "step": 11616 }, { - "epoch": 0.3296538024971623, + "epoch": 0.329196066763014, "grad_norm": 0.0, - "learning_rate": 1.564924535428326e-05, - "loss": 0.9646, + "learning_rate": 1.566122563070687e-05, + "loss": 0.8946, "step": 11617 }, { - "epoch": 0.3296821793416572, + "epoch": 0.32922440420527643, "grad_norm": 0.0, - "learning_rate": 1.5648486959980474e-05, - "loss": 1.0415, + "learning_rate": 1.566046905382625e-05, + "loss": 0.8881, "step": 11618 }, { - "epoch": 0.3297105561861521, + "epoch": 0.32925274164753887, "grad_norm": 0.0, - "learning_rate": 1.5647728517964665e-05, - "loss": 1.0104, + "learning_rate": 1.5659712429265403e-05, + "loss": 0.834, "step": 11619 }, { - "epoch": 0.329738933030647, + "epoch": 0.32928107908980137, "grad_norm": 0.0, - "learning_rate": 1.564697002824224e-05, - "loss": 1.0117, + "learning_rate": 1.565895575703069e-05, + "loss": 1.0162, "step": 11620 }, { - "epoch": 0.3297673098751419, + "epoch": 0.3293094165320638, "grad_norm": 0.0, - "learning_rate": 1.5646211490819604e-05, - "loss": 1.0577, + "learning_rate": 1.5658199037128492e-05, + "loss": 1.0298, "step": 11621 }, { - "epoch": 0.32979568671963677, + "epoch": 0.3293377539743263, "grad_norm": 0.0, - "learning_rate": 1.564545290570317e-05, - "loss": 0.9843, + "learning_rate": 1.5657442269565185e-05, + "loss": 0.8921, "step": 11622 }, { - "epoch": 0.3298240635641317, + "epoch": 0.32936609141658874, "grad_norm": 0.0, - "learning_rate": 1.5644694272899338e-05, - "loss": 0.915, + "learning_rate": 1.565668545434714e-05, + "loss": 0.9498, "step": 11623 }, { - "epoch": 0.32985244040862655, + "epoch": 0.3293944288588512, "grad_norm": 0.0, - "learning_rate": 1.5643935592414518e-05, - "loss": 1.0241, + "learning_rate": 1.5655928591480732e-05, + "loss": 0.8773, "step": 11624 }, { - "epoch": 0.32988081725312146, + "epoch": 0.32942276630111367, "grad_norm": 0.0, - "learning_rate": 1.5643176864255123e-05, - "loss": 0.7844, + "learning_rate": 1.5655171680972337e-05, + "loss": 0.9383, "step": 11625 }, { - "epoch": 0.3299091940976163, + "epoch": 0.3294511037433761, "grad_norm": 0.0, - "learning_rate": 1.564241808842756e-05, - "loss": 0.9129, + "learning_rate": 1.5654414722828334e-05, + "loss": 1.0147, "step": 11626 }, { - "epoch": 0.32993757094211124, + "epoch": 0.3294794411856386, "grad_norm": 0.0, - "learning_rate": 1.5641659264938236e-05, - "loss": 0.9547, + "learning_rate": 1.5653657717055096e-05, + "loss": 1.076, "step": 11627 }, { - "epoch": 0.32996594778660615, + "epoch": 0.32950777862790104, "grad_norm": 0.0, - "learning_rate": 1.5640900393793564e-05, - "loss": 0.8848, + "learning_rate": 1.5652900663658995e-05, + "loss": 1.0791, "step": 11628 }, { - "epoch": 0.329994324631101, + "epoch": 0.32953611607016353, "grad_norm": 0.0, - "learning_rate": 1.564014147499995e-05, - "loss": 0.9225, + "learning_rate": 1.5652143562646416e-05, + "loss": 0.8111, "step": 11629 }, { - "epoch": 0.3300227014755959, + "epoch": 0.32956445351242597, "grad_norm": 0.0, - "learning_rate": 1.563938250856381e-05, - "loss": 0.8575, + "learning_rate": 1.5651386414023734e-05, + "loss": 0.9628, "step": 11630 }, { - "epoch": 0.3300510783200908, + "epoch": 0.3295927909546884, "grad_norm": 0.0, - "learning_rate": 1.5638623494491552e-05, - "loss": 0.9419, + "learning_rate": 1.5650629217797322e-05, + "loss": 0.9067, "step": 11631 }, { - "epoch": 0.3300794551645857, + "epoch": 0.3296211283969509, "grad_norm": 0.0, - "learning_rate": 1.563786443278959e-05, - "loss": 0.9366, + "learning_rate": 1.5649871973973565e-05, + "loss": 1.0182, "step": 11632 }, { - "epoch": 0.3301078320090806, + "epoch": 0.32964946583921334, "grad_norm": 0.0, - "learning_rate": 1.563710532346433e-05, - "loss": 0.9633, + "learning_rate": 1.5649114682558838e-05, + "loss": 1.0859, "step": 11633 }, { - "epoch": 0.3301362088535755, + "epoch": 0.32967780328147583, "grad_norm": 0.0, - "learning_rate": 1.5636346166522192e-05, - "loss": 0.9361, + "learning_rate": 1.5648357343559518e-05, + "loss": 0.9535, "step": 11634 }, { - "epoch": 0.3301645856980704, + "epoch": 0.3297061407237383, "grad_norm": 0.0, - "learning_rate": 1.563558696196958e-05, - "loss": 0.9008, + "learning_rate": 1.564759995698199e-05, + "loss": 1.0005, "step": 11635 }, { - "epoch": 0.33019296254256525, + "epoch": 0.3297344781660007, "grad_norm": 0.0, - "learning_rate": 1.5634827709812914e-05, - "loss": 1.0271, + "learning_rate": 1.564684252283263e-05, + "loss": 0.9821, "step": 11636 }, { - "epoch": 0.33022133938706016, + "epoch": 0.3297628156082632, "grad_norm": 0.0, - "learning_rate": 1.5634068410058607e-05, - "loss": 0.9229, + "learning_rate": 1.5646085041117817e-05, + "loss": 1.01, "step": 11637 }, { - "epoch": 0.330249716231555, + "epoch": 0.32979115305052564, "grad_norm": 0.0, - "learning_rate": 1.5633309062713065e-05, - "loss": 0.957, + "learning_rate": 1.5645327511843932e-05, + "loss": 0.7683, "step": 11638 }, { - "epoch": 0.33027809307604994, + "epoch": 0.32981949049278814, "grad_norm": 0.0, - "learning_rate": 1.5632549667782714e-05, - "loss": 0.9113, + "learning_rate": 1.5644569935017357e-05, + "loss": 0.9005, "step": 11639 }, { - "epoch": 0.33030646992054485, + "epoch": 0.3298478279350506, "grad_norm": 0.0, - "learning_rate": 1.563179022527396e-05, - "loss": 0.8516, + "learning_rate": 1.5643812310644472e-05, + "loss": 1.0652, "step": 11640 }, { - "epoch": 0.3303348467650397, + "epoch": 0.32987616537731307, "grad_norm": 0.0, - "learning_rate": 1.563103073519322e-05, - "loss": 0.9327, + "learning_rate": 1.5643054638731662e-05, + "loss": 0.9664, "step": 11641 }, { - "epoch": 0.33036322360953463, + "epoch": 0.3299045028195755, "grad_norm": 0.0, - "learning_rate": 1.563027119754691e-05, - "loss": 0.9659, + "learning_rate": 1.5642296919285312e-05, + "loss": 0.9718, "step": 11642 }, { - "epoch": 0.3303916004540295, + "epoch": 0.32993284026183795, "grad_norm": 0.0, - "learning_rate": 1.562951161234145e-05, - "loss": 0.9917, + "learning_rate": 1.5641539152311797e-05, + "loss": 1.1021, "step": 11643 }, { - "epoch": 0.3304199772985244, + "epoch": 0.32996117770410044, "grad_norm": 0.0, - "learning_rate": 1.562875197958325e-05, - "loss": 0.9935, + "learning_rate": 1.5640781337817498e-05, + "loss": 0.9279, "step": 11644 }, { - "epoch": 0.3304483541430193, + "epoch": 0.3299895151463629, "grad_norm": 0.0, - "learning_rate": 1.562799229927873e-05, - "loss": 0.9518, + "learning_rate": 1.5640023475808807e-05, + "loss": 0.8776, "step": 11645 }, { - "epoch": 0.3304767309875142, + "epoch": 0.33001785258862537, "grad_norm": 0.0, - "learning_rate": 1.5627232571434304e-05, - "loss": 1.0048, + "learning_rate": 1.5639265566292102e-05, + "loss": 0.8444, "step": 11646 }, { - "epoch": 0.3305051078320091, + "epoch": 0.3300461900308878, "grad_norm": 0.0, - "learning_rate": 1.5626472796056394e-05, - "loss": 0.9467, + "learning_rate": 1.5638507609273772e-05, + "loss": 1.0226, "step": 11647 }, { - "epoch": 0.33053348467650395, + "epoch": 0.33007452747315025, "grad_norm": 0.0, - "learning_rate": 1.562571297315141e-05, - "loss": 0.8772, + "learning_rate": 1.5637749604760198e-05, + "loss": 0.9715, "step": 11648 }, { - "epoch": 0.33056186152099887, + "epoch": 0.33010286491541274, "grad_norm": 0.0, - "learning_rate": 1.5624953102725783e-05, - "loss": 0.9012, + "learning_rate": 1.5636991552757762e-05, + "loss": 0.9547, "step": 11649 }, { - "epoch": 0.3305902383654938, + "epoch": 0.3301312023576752, "grad_norm": 0.0, - "learning_rate": 1.562419318478592e-05, - "loss": 0.9972, + "learning_rate": 1.5636233453272858e-05, + "loss": 1.0053, "step": 11650 }, { - "epoch": 0.33061861520998864, + "epoch": 0.3301595397999377, "grad_norm": 0.0, - "learning_rate": 1.5623433219338244e-05, - "loss": 0.9224, + "learning_rate": 1.5635475306311865e-05, + "loss": 0.9149, "step": 11651 }, { - "epoch": 0.33064699205448356, + "epoch": 0.3301878772422001, "grad_norm": 0.0, - "learning_rate": 1.5622673206389178e-05, - "loss": 0.9378, + "learning_rate": 1.5634717111881168e-05, + "loss": 0.9858, "step": 11652 }, { - "epoch": 0.3306753688989784, + "epoch": 0.3302162146844626, "grad_norm": 0.0, - "learning_rate": 1.5621913145945135e-05, - "loss": 0.9355, + "learning_rate": 1.563395886998716e-05, + "loss": 1.0, "step": 11653 }, { - "epoch": 0.33070374574347333, + "epoch": 0.33024455212672504, "grad_norm": 0.0, - "learning_rate": 1.562115303801254e-05, - "loss": 0.8332, + "learning_rate": 1.563320058063622e-05, + "loss": 0.8943, "step": 11654 }, { - "epoch": 0.3307321225879682, + "epoch": 0.3302728895689875, "grad_norm": 0.0, - "learning_rate": 1.5620392882597816e-05, - "loss": 0.9008, + "learning_rate": 1.5632442243834746e-05, + "loss": 1.0347, "step": 11655 }, { - "epoch": 0.3307604994324631, + "epoch": 0.33030122701125, "grad_norm": 0.0, - "learning_rate": 1.561963267970737e-05, - "loss": 0.9323, + "learning_rate": 1.5631683859589115e-05, + "loss": 0.9778, "step": 11656 }, { - "epoch": 0.330788876276958, + "epoch": 0.3303295644535124, "grad_norm": 0.0, - "learning_rate": 1.5618872429347646e-05, - "loss": 0.9976, + "learning_rate": 1.563092542790572e-05, + "loss": 0.972, "step": 11657 }, { - "epoch": 0.3308172531214529, + "epoch": 0.3303579018957749, "grad_norm": 0.0, - "learning_rate": 1.5618112131525048e-05, - "loss": 0.9323, + "learning_rate": 1.563016694879095e-05, + "loss": 0.8401, "step": 11658 }, { - "epoch": 0.3308456299659478, + "epoch": 0.33038623933803735, "grad_norm": 0.0, - "learning_rate": 1.5617351786246007e-05, - "loss": 0.9527, + "learning_rate": 1.5629408422251194e-05, + "loss": 0.9293, "step": 11659 }, { - "epoch": 0.33087400681044266, + "epoch": 0.3304145767802998, "grad_norm": 0.0, - "learning_rate": 1.5616591393516946e-05, - "loss": 1.0078, + "learning_rate": 1.5628649848292836e-05, + "loss": 0.9694, "step": 11660 }, { - "epoch": 0.33090238365493757, + "epoch": 0.3304429142225623, "grad_norm": 0.0, - "learning_rate": 1.561583095334428e-05, - "loss": 0.8725, + "learning_rate": 1.5627891226922277e-05, + "loss": 0.9576, "step": 11661 }, { - "epoch": 0.3309307604994325, + "epoch": 0.3304712516648247, "grad_norm": 0.0, - "learning_rate": 1.5615070465734436e-05, - "loss": 0.946, + "learning_rate": 1.562713255814589e-05, + "loss": 0.9694, "step": 11662 }, { - "epoch": 0.33095913734392735, + "epoch": 0.3304995891070872, "grad_norm": 0.0, - "learning_rate": 1.5614309930693844e-05, - "loss": 0.8888, + "learning_rate": 1.5626373841970085e-05, + "loss": 0.8967, "step": 11663 }, { - "epoch": 0.33098751418842226, + "epoch": 0.33052792654934965, "grad_norm": 0.0, - "learning_rate": 1.561354934822892e-05, - "loss": 0.9193, + "learning_rate": 1.5625615078401244e-05, + "loss": 0.9211, "step": 11664 }, { - "epoch": 0.3310158910329171, + "epoch": 0.33055626399161214, "grad_norm": 0.0, - "learning_rate": 1.5612788718346092e-05, - "loss": 1.0215, + "learning_rate": 1.5624856267445757e-05, + "loss": 0.9401, "step": 11665 }, { - "epoch": 0.33104426787741204, + "epoch": 0.3305846014338746, "grad_norm": 0.0, - "learning_rate": 1.561202804105179e-05, - "loss": 0.9757, + "learning_rate": 1.5624097409110012e-05, + "loss": 0.9169, "step": 11666 }, { - "epoch": 0.33107264472190695, + "epoch": 0.330612938876137, "grad_norm": 0.0, - "learning_rate": 1.561126731635243e-05, - "loss": 0.9619, + "learning_rate": 1.562333850340041e-05, + "loss": 0.9229, "step": 11667 }, { - "epoch": 0.3311010215664018, + "epoch": 0.3306412763183995, "grad_norm": 0.0, - "learning_rate": 1.5610506544254446e-05, - "loss": 0.9274, + "learning_rate": 1.562257955032334e-05, + "loss": 0.9605, "step": 11668 }, { - "epoch": 0.3311293984108967, + "epoch": 0.33066961376066195, "grad_norm": 0.0, - "learning_rate": 1.5609745724764264e-05, - "loss": 0.9883, + "learning_rate": 1.5621820549885192e-05, + "loss": 0.8035, "step": 11669 }, { - "epoch": 0.3311577752553916, + "epoch": 0.33069795120292445, "grad_norm": 0.0, - "learning_rate": 1.56089848578883e-05, - "loss": 0.8898, + "learning_rate": 1.5621061502092364e-05, + "loss": 0.9191, "step": 11670 }, { - "epoch": 0.3311861520998865, + "epoch": 0.3307262886451869, "grad_norm": 0.0, - "learning_rate": 1.5608223943632993e-05, - "loss": 0.9084, + "learning_rate": 1.5620302406951246e-05, + "loss": 0.8789, "step": 11671 }, { - "epoch": 0.33121452894438136, + "epoch": 0.3307546260874493, "grad_norm": 0.0, - "learning_rate": 1.5607462982004763e-05, - "loss": 0.8961, + "learning_rate": 1.5619543264468236e-05, + "loss": 0.9633, "step": 11672 }, { - "epoch": 0.3312429057888763, + "epoch": 0.3307829635297118, "grad_norm": 0.0, - "learning_rate": 1.5606701973010047e-05, - "loss": 1.0252, + "learning_rate": 1.5618784074649725e-05, + "loss": 0.95, "step": 11673 }, { - "epoch": 0.3312712826333712, + "epoch": 0.33081130097197425, "grad_norm": 0.0, - "learning_rate": 1.5605940916655266e-05, - "loss": 0.9186, + "learning_rate": 1.561802483750211e-05, + "loss": 0.9304, "step": 11674 }, { - "epoch": 0.33129965947786605, + "epoch": 0.33083963841423675, "grad_norm": 0.0, - "learning_rate": 1.5605179812946844e-05, - "loss": 1.0703, + "learning_rate": 1.5617265553031783e-05, + "loss": 0.9586, "step": 11675 }, { - "epoch": 0.33132803632236096, + "epoch": 0.3308679758564992, "grad_norm": 0.0, - "learning_rate": 1.560441866189122e-05, - "loss": 0.9765, + "learning_rate": 1.5616506221245143e-05, + "loss": 0.9353, "step": 11676 }, { - "epoch": 0.3313564131668558, + "epoch": 0.3308963132987617, "grad_norm": 0.0, - "learning_rate": 1.5603657463494823e-05, - "loss": 1.0729, + "learning_rate": 1.561574684214859e-05, + "loss": 0.9045, "step": 11677 }, { - "epoch": 0.33138479001135074, + "epoch": 0.3309246507410241, "grad_norm": 0.0, - "learning_rate": 1.5602896217764075e-05, - "loss": 0.9307, + "learning_rate": 1.5614987415748514e-05, + "loss": 0.9934, "step": 11678 }, { - "epoch": 0.33141316685584565, + "epoch": 0.33095298818328656, "grad_norm": 0.0, - "learning_rate": 1.560213492470541e-05, - "loss": 0.9486, + "learning_rate": 1.561422794205131e-05, + "loss": 1.0743, "step": 11679 }, { - "epoch": 0.3314415437003405, + "epoch": 0.33098132562554905, "grad_norm": 0.0, - "learning_rate": 1.560137358432526e-05, - "loss": 0.9542, + "learning_rate": 1.5613468421063383e-05, + "loss": 0.9996, "step": 11680 }, { - "epoch": 0.33146992054483543, + "epoch": 0.3310096630678115, "grad_norm": 0.0, - "learning_rate": 1.560061219663006e-05, - "loss": 1.0114, + "learning_rate": 1.5612708852791127e-05, + "loss": 0.986, "step": 11681 }, { - "epoch": 0.3314982973893303, + "epoch": 0.331038000510074, "grad_norm": 0.0, - "learning_rate": 1.5599850761626235e-05, - "loss": 0.9255, + "learning_rate": 1.5611949237240938e-05, + "loss": 0.9262, "step": 11682 }, { - "epoch": 0.3315266742338252, + "epoch": 0.3310663379523364, "grad_norm": 0.0, - "learning_rate": 1.5599089279320215e-05, - "loss": 1.0724, + "learning_rate": 1.5611189574419215e-05, + "loss": 0.9117, "step": 11683 }, { - "epoch": 0.3315550510783201, + "epoch": 0.33109467539459886, "grad_norm": 0.0, - "learning_rate": 1.5598327749718443e-05, - "loss": 0.9933, + "learning_rate": 1.5610429864332363e-05, + "loss": 1.0544, "step": 11684 }, { - "epoch": 0.331583427922815, + "epoch": 0.33112301283686135, "grad_norm": 0.0, - "learning_rate": 1.559756617282734e-05, - "loss": 0.884, + "learning_rate": 1.5609670106986775e-05, + "loss": 0.9071, "step": 11685 }, { - "epoch": 0.3316118047673099, + "epoch": 0.3311513502791238, "grad_norm": 0.0, - "learning_rate": 1.5596804548653347e-05, - "loss": 0.875, + "learning_rate": 1.5608910302388854e-05, + "loss": 1.0746, "step": 11686 }, { - "epoch": 0.33164018161180475, + "epoch": 0.3311796877213863, "grad_norm": 0.0, - "learning_rate": 1.559604287720289e-05, - "loss": 1.0083, + "learning_rate": 1.5608150450544997e-05, + "loss": 1.0015, "step": 11687 }, { - "epoch": 0.33166855845629967, + "epoch": 0.3312080251636487, "grad_norm": 0.0, - "learning_rate": 1.559528115848241e-05, - "loss": 0.943, + "learning_rate": 1.5607390551461605e-05, + "loss": 0.8992, "step": 11688 }, { - "epoch": 0.3316969353007945, + "epoch": 0.3312363626059112, "grad_norm": 0.0, - "learning_rate": 1.5594519392498337e-05, - "loss": 0.9143, + "learning_rate": 1.5606630605145084e-05, + "loss": 0.9576, "step": 11689 }, { - "epoch": 0.33172531214528944, + "epoch": 0.33126470004817365, "grad_norm": 0.0, - "learning_rate": 1.559375757925711e-05, - "loss": 0.8424, + "learning_rate": 1.560587061160183e-05, + "loss": 0.9595, "step": 11690 }, { - "epoch": 0.33175368898978436, + "epoch": 0.3312930374904361, "grad_norm": 0.0, - "learning_rate": 1.559299571876516e-05, - "loss": 0.9317, + "learning_rate": 1.5605110570838246e-05, + "loss": 0.9047, "step": 11691 }, { - "epoch": 0.3317820658342792, + "epoch": 0.3313213749326986, "grad_norm": 0.0, - "learning_rate": 1.5592233811028924e-05, - "loss": 0.9493, + "learning_rate": 1.560435048286073e-05, + "loss": 0.9514, "step": 11692 }, { - "epoch": 0.33181044267877413, + "epoch": 0.331349712374961, "grad_norm": 0.0, - "learning_rate": 1.5591471856054836e-05, - "loss": 0.9796, + "learning_rate": 1.5603590347675695e-05, + "loss": 0.927, "step": 11693 }, { - "epoch": 0.331838819523269, + "epoch": 0.3313780498172235, "grad_norm": 0.0, - "learning_rate": 1.5590709853849334e-05, - "loss": 0.941, + "learning_rate": 1.5602830165289536e-05, + "loss": 0.9989, "step": 11694 }, { - "epoch": 0.3318671963677639, + "epoch": 0.33140638725948596, "grad_norm": 0.0, - "learning_rate": 1.558994780441886e-05, - "loss": 0.9668, + "learning_rate": 1.5602069935708653e-05, + "loss": 0.9681, "step": 11695 }, { - "epoch": 0.3318955732122588, + "epoch": 0.3314347247017484, "grad_norm": 0.0, - "learning_rate": 1.558918570776984e-05, - "loss": 0.9836, + "learning_rate": 1.560130965893946e-05, + "loss": 0.8506, "step": 11696 }, { - "epoch": 0.3319239500567537, + "epoch": 0.3314630621440109, "grad_norm": 0.0, - "learning_rate": 1.5588423563908716e-05, - "loss": 0.9044, + "learning_rate": 1.5600549334988356e-05, + "loss": 1.0501, "step": 11697 }, { - "epoch": 0.3319523269012486, + "epoch": 0.3314913995862733, "grad_norm": 0.0, - "learning_rate": 1.5587661372841928e-05, - "loss": 0.8127, + "learning_rate": 1.5599788963861745e-05, + "loss": 0.8708, "step": 11698 }, { - "epoch": 0.33198070374574346, + "epoch": 0.3315197370285358, "grad_norm": 0.0, - "learning_rate": 1.5586899134575916e-05, - "loss": 0.9943, + "learning_rate": 1.5599028545566028e-05, + "loss": 0.9812, "step": 11699 }, { - "epoch": 0.33200908059023837, + "epoch": 0.33154807447079826, "grad_norm": 0.0, - "learning_rate": 1.5586136849117114e-05, - "loss": 0.9092, + "learning_rate": 1.5598268080107618e-05, + "loss": 1.0722, "step": 11700 }, { - "epoch": 0.3320374574347333, + "epoch": 0.33157641191306075, "grad_norm": 0.0, - "learning_rate": 1.5585374516471964e-05, - "loss": 0.9337, + "learning_rate": 1.5597507567492915e-05, + "loss": 0.887, "step": 11701 }, { - "epoch": 0.33206583427922814, + "epoch": 0.3316047493553232, "grad_norm": 0.0, - "learning_rate": 1.55846121366469e-05, - "loss": 0.827, + "learning_rate": 1.559674700772833e-05, + "loss": 1.0553, "step": 11702 }, { - "epoch": 0.33209421112372306, + "epoch": 0.33163308679758563, "grad_norm": 0.0, - "learning_rate": 1.558384970964837e-05, - "loss": 0.943, + "learning_rate": 1.5595986400820266e-05, + "loss": 0.9595, "step": 11703 }, { - "epoch": 0.3321225879682179, + "epoch": 0.3316614242398481, "grad_norm": 0.0, - "learning_rate": 1.558308723548281e-05, - "loss": 0.9853, + "learning_rate": 1.5595225746775127e-05, + "loss": 0.9476, "step": 11704 }, { - "epoch": 0.33215096481271283, + "epoch": 0.33168976168211056, "grad_norm": 0.0, - "learning_rate": 1.5582324714156663e-05, - "loss": 0.9569, + "learning_rate": 1.559446504559933e-05, + "loss": 0.816, "step": 11705 }, { - "epoch": 0.3321793416572077, + "epoch": 0.33171809912437306, "grad_norm": 0.0, - "learning_rate": 1.5581562145676365e-05, - "loss": 0.9697, + "learning_rate": 1.559370429729927e-05, + "loss": 1.0574, "step": 11706 }, { - "epoch": 0.3322077185017026, + "epoch": 0.3317464365666355, "grad_norm": 0.0, - "learning_rate": 1.5580799530048362e-05, - "loss": 0.9958, + "learning_rate": 1.5592943501881362e-05, + "loss": 1.0885, "step": 11707 }, { - "epoch": 0.3322360953461975, + "epoch": 0.33177477400889793, "grad_norm": 0.0, - "learning_rate": 1.5580036867279096e-05, - "loss": 0.975, + "learning_rate": 1.559218265935202e-05, + "loss": 0.8296, "step": 11708 }, { - "epoch": 0.3322644721906924, + "epoch": 0.3318031114511604, "grad_norm": 0.0, - "learning_rate": 1.557927415737501e-05, - "loss": 0.8923, + "learning_rate": 1.5591421769717642e-05, + "loss": 0.9483, "step": 11709 }, { - "epoch": 0.3322928490351873, + "epoch": 0.33183144889342286, "grad_norm": 0.0, - "learning_rate": 1.5578511400342537e-05, - "loss": 0.9038, + "learning_rate": 1.5590660832984642e-05, + "loss": 1.0374, "step": 11710 }, { - "epoch": 0.33232122587968216, + "epoch": 0.33185978633568536, "grad_norm": 0.0, - "learning_rate": 1.557774859618813e-05, - "loss": 0.8675, + "learning_rate": 1.5589899849159432e-05, + "loss": 0.9999, "step": 11711 }, { - "epoch": 0.3323496027241771, + "epoch": 0.3318881237779478, "grad_norm": 0.0, - "learning_rate": 1.5576985744918232e-05, - "loss": 0.9778, + "learning_rate": 1.558913881824842e-05, + "loss": 0.9366, "step": 11712 }, { - "epoch": 0.332377979568672, + "epoch": 0.3319164612202103, "grad_norm": 0.0, - "learning_rate": 1.5576222846539285e-05, - "loss": 1.0151, + "learning_rate": 1.5588377740258015e-05, + "loss": 0.9573, "step": 11713 }, { - "epoch": 0.33240635641316685, + "epoch": 0.33194479866247273, "grad_norm": 0.0, - "learning_rate": 1.557545990105773e-05, - "loss": 1.0109, + "learning_rate": 1.558761661519463e-05, + "loss": 0.8911, "step": 11714 }, { - "epoch": 0.33243473325766176, + "epoch": 0.33197313610473517, "grad_norm": 0.0, - "learning_rate": 1.5574696908480018e-05, - "loss": 0.9178, + "learning_rate": 1.5586855443064674e-05, + "loss": 0.9584, "step": 11715 }, { - "epoch": 0.3324631101021566, + "epoch": 0.33200147354699766, "grad_norm": 0.0, - "learning_rate": 1.557393386881259e-05, - "loss": 0.9382, + "learning_rate": 1.558609422387456e-05, + "loss": 0.8878, "step": 11716 }, { - "epoch": 0.33249148694665154, + "epoch": 0.3320298109892601, "grad_norm": 0.0, - "learning_rate": 1.5573170782061888e-05, - "loss": 1.1355, + "learning_rate": 1.5585332957630702e-05, + "loss": 0.9498, "step": 11717 }, { - "epoch": 0.3325198637911464, + "epoch": 0.3320581484315226, "grad_norm": 0.0, - "learning_rate": 1.5572407648234365e-05, - "loss": 0.9235, + "learning_rate": 1.558457164433951e-05, + "loss": 0.9247, "step": 11718 }, { - "epoch": 0.3325482406356413, + "epoch": 0.33208648587378503, "grad_norm": 0.0, - "learning_rate": 1.5571644467336465e-05, - "loss": 0.8749, + "learning_rate": 1.5583810284007395e-05, + "loss": 0.9453, "step": 11719 }, { - "epoch": 0.3325766174801362, + "epoch": 0.33211482331604747, "grad_norm": 0.0, - "learning_rate": 1.5570881239374633e-05, - "loss": 0.8278, + "learning_rate": 1.5583048876640775e-05, + "loss": 1.0351, "step": 11720 }, { - "epoch": 0.3326049943246311, + "epoch": 0.33214316075830996, "grad_norm": 0.0, - "learning_rate": 1.5570117964355317e-05, - "loss": 0.9069, + "learning_rate": 1.558228742224606e-05, + "loss": 0.9012, "step": 11721 }, { - "epoch": 0.332633371169126, + "epoch": 0.3321714982005724, "grad_norm": 0.0, - "learning_rate": 1.5569354642284963e-05, - "loss": 0.9421, + "learning_rate": 1.5581525920829664e-05, + "loss": 0.9777, "step": 11722 }, { - "epoch": 0.33266174801362086, + "epoch": 0.3321998356428349, "grad_norm": 0.0, - "learning_rate": 1.556859127317002e-05, - "loss": 1.039, + "learning_rate": 1.5580764372398e-05, + "loss": 0.9888, "step": 11723 }, { - "epoch": 0.3326901248581158, + "epoch": 0.33222817308509733, "grad_norm": 0.0, - "learning_rate": 1.5567827857016936e-05, - "loss": 0.9115, + "learning_rate": 1.5580002776957493e-05, + "loss": 0.9578, "step": 11724 }, { - "epoch": 0.3327185017026107, + "epoch": 0.3322565105273598, "grad_norm": 0.0, - "learning_rate": 1.556706439383216e-05, - "loss": 0.9756, + "learning_rate": 1.5579241134514548e-05, + "loss": 0.9235, "step": 11725 }, { - "epoch": 0.33274687854710555, + "epoch": 0.33228484796962227, "grad_norm": 0.0, - "learning_rate": 1.5566300883622142e-05, - "loss": 0.9137, + "learning_rate": 1.557847944507558e-05, + "loss": 0.9277, "step": 11726 }, { - "epoch": 0.33277525539160047, + "epoch": 0.3323131854118847, "grad_norm": 0.0, - "learning_rate": 1.556553732639333e-05, - "loss": 0.9072, + "learning_rate": 1.557771770864701e-05, + "loss": 0.9879, "step": 11727 }, { - "epoch": 0.3328036322360953, + "epoch": 0.3323415228541472, "grad_norm": 0.0, - "learning_rate": 1.5564773722152173e-05, - "loss": 0.9695, + "learning_rate": 1.557695592523525e-05, + "loss": 0.7788, "step": 11728 }, { - "epoch": 0.33283200908059024, + "epoch": 0.33236986029640964, "grad_norm": 0.0, - "learning_rate": 1.556401007090512e-05, - "loss": 0.9132, + "learning_rate": 1.5576194094846723e-05, + "loss": 0.9639, "step": 11729 }, { - "epoch": 0.33286038592508516, + "epoch": 0.33239819773867213, "grad_norm": 0.0, - "learning_rate": 1.556324637265863e-05, - "loss": 1.1664, + "learning_rate": 1.5575432217487843e-05, + "loss": 0.8904, "step": 11730 }, { - "epoch": 0.33288876276958, + "epoch": 0.33242653518093457, "grad_norm": 0.0, - "learning_rate": 1.5562482627419144e-05, - "loss": 0.8641, + "learning_rate": 1.557467029316502e-05, + "loss": 0.9904, "step": 11731 }, { - "epoch": 0.33291713961407493, + "epoch": 0.332454872623197, "grad_norm": 0.0, - "learning_rate": 1.5561718835193117e-05, - "loss": 1.0375, + "learning_rate": 1.5573908321884685e-05, + "loss": 1.0306, "step": 11732 }, { - "epoch": 0.3329455164585698, + "epoch": 0.3324832100654595, "grad_norm": 0.0, - "learning_rate": 1.5560954995987e-05, - "loss": 1.0115, + "learning_rate": 1.5573146303653252e-05, + "loss": 1.0038, "step": 11733 }, { - "epoch": 0.3329738933030647, + "epoch": 0.33251154750772194, "grad_norm": 0.0, - "learning_rate": 1.5560191109807246e-05, - "loss": 0.8712, + "learning_rate": 1.5572384238477135e-05, + "loss": 0.957, "step": 11734 }, { - "epoch": 0.33300227014755956, + "epoch": 0.33253988494998443, "grad_norm": 0.0, - "learning_rate": 1.5559427176660315e-05, - "loss": 0.9632, + "learning_rate": 1.557162212636276e-05, + "loss": 0.9029, "step": 11735 }, { - "epoch": 0.3330306469920545, + "epoch": 0.33256822239224687, "grad_norm": 0.0, - "learning_rate": 1.5558663196552646e-05, - "loss": 0.9519, + "learning_rate": 1.5570859967316537e-05, + "loss": 0.9329, "step": 11736 }, { - "epoch": 0.3330590238365494, + "epoch": 0.33259655983450936, "grad_norm": 0.0, - "learning_rate": 1.55578991694907e-05, - "loss": 0.8748, + "learning_rate": 1.5570097761344892e-05, + "loss": 1.0074, "step": 11737 }, { - "epoch": 0.33308740068104425, + "epoch": 0.3326248972767718, "grad_norm": 0.0, - "learning_rate": 1.555713509548093e-05, - "loss": 0.9514, + "learning_rate": 1.556933550845425e-05, + "loss": 0.9424, "step": 11738 }, { - "epoch": 0.33311577752553917, + "epoch": 0.33265323471903424, "grad_norm": 0.0, - "learning_rate": 1.555637097452979e-05, - "loss": 0.9116, + "learning_rate": 1.5568573208651027e-05, + "loss": 1.0646, "step": 11739 }, { - "epoch": 0.33314415437003403, + "epoch": 0.33268157216129673, "grad_norm": 0.0, - "learning_rate": 1.5555606806643733e-05, - "loss": 1.0097, + "learning_rate": 1.556781086194164e-05, + "loss": 0.8535, "step": 11740 }, { - "epoch": 0.33317253121452894, + "epoch": 0.3327099096035592, "grad_norm": 0.0, - "learning_rate": 1.555484259182922e-05, - "loss": 0.974, + "learning_rate": 1.5567048468332516e-05, + "loss": 0.9883, "step": 11741 }, { - "epoch": 0.33320090805902386, + "epoch": 0.33273824704582167, "grad_norm": 0.0, - "learning_rate": 1.5554078330092697e-05, - "loss": 1.0047, + "learning_rate": 1.5566286027830076e-05, + "loss": 0.9591, "step": 11742 }, { - "epoch": 0.3332292849035187, + "epoch": 0.3327665844880841, "grad_norm": 0.0, - "learning_rate": 1.5553314021440627e-05, - "loss": 0.9509, + "learning_rate": 1.556552354044074e-05, + "loss": 0.9576, "step": 11743 }, { - "epoch": 0.33325766174801363, + "epoch": 0.33279492193034654, "grad_norm": 0.0, - "learning_rate": 1.5552549665879462e-05, - "loss": 0.9689, + "learning_rate": 1.5564761006170934e-05, + "loss": 0.9952, "step": 11744 }, { - "epoch": 0.3332860385925085, + "epoch": 0.33282325937260904, "grad_norm": 0.0, - "learning_rate": 1.5551785263415667e-05, - "loss": 0.8757, + "learning_rate": 1.556399842502708e-05, + "loss": 1.0276, "step": 11745 }, { - "epoch": 0.3333144154370034, + "epoch": 0.3328515968148715, "grad_norm": 0.0, - "learning_rate": 1.5551020814055687e-05, - "loss": 0.9882, + "learning_rate": 1.55632357970156e-05, + "loss": 0.9673, "step": 11746 }, { - "epoch": 0.3333427922814983, + "epoch": 0.33287993425713397, "grad_norm": 0.0, - "learning_rate": 1.555025631780598e-05, - "loss": 0.8241, + "learning_rate": 1.556247312214292e-05, + "loss": 0.8647, "step": 11747 }, { - "epoch": 0.3333711691259932, + "epoch": 0.3329082716993964, "grad_norm": 0.0, - "learning_rate": 1.5549491774673016e-05, - "loss": 0.965, + "learning_rate": 1.556171040041546e-05, + "loss": 0.9696, "step": 11748 }, { - "epoch": 0.3333995459704881, + "epoch": 0.3329366091416589, "grad_norm": 0.0, - "learning_rate": 1.5548727184663245e-05, - "loss": 0.9505, + "learning_rate": 1.5560947631839654e-05, + "loss": 1.046, "step": 11749 }, { - "epoch": 0.33342792281498296, + "epoch": 0.33296494658392134, "grad_norm": 0.0, - "learning_rate": 1.5547962547783126e-05, - "loss": 1.0348, + "learning_rate": 1.556018481642192e-05, + "loss": 1.0566, "step": 11750 }, { - "epoch": 0.3334562996594779, + "epoch": 0.3329932840261838, "grad_norm": 0.0, - "learning_rate": 1.5547197864039113e-05, - "loss": 0.8598, + "learning_rate": 1.555942195416868e-05, + "loss": 0.9796, "step": 11751 }, { - "epoch": 0.33348467650397273, + "epoch": 0.33302162146844627, "grad_norm": 0.0, - "learning_rate": 1.5546433133437675e-05, - "loss": 0.9768, + "learning_rate": 1.555865904508637e-05, + "loss": 0.9413, "step": 11752 }, { - "epoch": 0.33351305334846765, + "epoch": 0.3330499589107087, "grad_norm": 0.0, - "learning_rate": 1.5545668355985263e-05, - "loss": 0.9384, + "learning_rate": 1.5557896089181403e-05, + "loss": 0.9052, "step": 11753 }, { - "epoch": 0.33354143019296256, + "epoch": 0.3330782963529712, "grad_norm": 0.0, - "learning_rate": 1.5544903531688343e-05, - "loss": 1.0133, + "learning_rate": 1.555713308646022e-05, + "loss": 0.9958, "step": 11754 }, { - "epoch": 0.3335698070374574, + "epoch": 0.33310663379523364, "grad_norm": 0.0, - "learning_rate": 1.5544138660553375e-05, - "loss": 0.9092, + "learning_rate": 1.5556370036929237e-05, + "loss": 1.0022, "step": 11755 }, { - "epoch": 0.33359818388195234, + "epoch": 0.3331349712374961, "grad_norm": 0.0, - "learning_rate": 1.5543373742586816e-05, - "loss": 1.0234, + "learning_rate": 1.5555606940594892e-05, + "loss": 0.8586, "step": 11756 }, { - "epoch": 0.3336265607264472, + "epoch": 0.3331633086797586, "grad_norm": 0.0, - "learning_rate": 1.554260877779513e-05, - "loss": 0.9785, + "learning_rate": 1.55548437974636e-05, + "loss": 0.8422, "step": 11757 }, { - "epoch": 0.3336549375709421, + "epoch": 0.333191646122021, "grad_norm": 0.0, - "learning_rate": 1.5541843766184783e-05, - "loss": 1.0247, + "learning_rate": 1.55540806075418e-05, + "loss": 0.9093, "step": 11758 }, { - "epoch": 0.333683314415437, + "epoch": 0.3332199835642835, "grad_norm": 0.0, - "learning_rate": 1.554107870776223e-05, - "loss": 0.9893, + "learning_rate": 1.5553317370835916e-05, + "loss": 0.9531, "step": 11759 }, { - "epoch": 0.3337116912599319, + "epoch": 0.33324832100654594, "grad_norm": 0.0, - "learning_rate": 1.5540313602533932e-05, - "loss": 0.8763, + "learning_rate": 1.5552554087352382e-05, + "loss": 0.9826, "step": 11760 }, { - "epoch": 0.3337400681044268, + "epoch": 0.33327665844880844, "grad_norm": 0.0, - "learning_rate": 1.5539548450506362e-05, - "loss": 0.9671, + "learning_rate": 1.5551790757097616e-05, + "loss": 0.9211, "step": 11761 }, { - "epoch": 0.33376844494892166, + "epoch": 0.3333049958910709, "grad_norm": 0.0, - "learning_rate": 1.5538783251685975e-05, - "loss": 0.8397, + "learning_rate": 1.5551027380078054e-05, + "loss": 0.9878, "step": 11762 }, { - "epoch": 0.3337968217934166, + "epoch": 0.3333333333333333, "grad_norm": 0.0, - "learning_rate": 1.5538018006079235e-05, - "loss": 0.8497, + "learning_rate": 1.555026395630013e-05, + "loss": 0.976, "step": 11763 }, { - "epoch": 0.3338251986379115, + "epoch": 0.3333616707755958, "grad_norm": 0.0, - "learning_rate": 1.553725271369261e-05, - "loss": 0.9214, + "learning_rate": 1.5549500485770272e-05, + "loss": 1.0221, "step": 11764 }, { - "epoch": 0.33385357548240635, + "epoch": 0.33339000821785825, "grad_norm": 0.0, - "learning_rate": 1.5536487374532562e-05, - "loss": 0.8946, + "learning_rate": 1.554873696849491e-05, + "loss": 0.9569, "step": 11765 }, { - "epoch": 0.33388195232690127, + "epoch": 0.33341834566012074, "grad_norm": 0.0, - "learning_rate": 1.5535721988605558e-05, - "loss": 0.991, + "learning_rate": 1.554797340448048e-05, + "loss": 0.9542, "step": 11766 }, { - "epoch": 0.3339103291713961, + "epoch": 0.3334466831023832, "grad_norm": 0.0, - "learning_rate": 1.5534956555918056e-05, - "loss": 0.9214, + "learning_rate": 1.5547209793733403e-05, + "loss": 1.0997, "step": 11767 }, { - "epoch": 0.33393870601589104, + "epoch": 0.3334750205446456, "grad_norm": 0.0, - "learning_rate": 1.553419107647653e-05, - "loss": 0.8542, + "learning_rate": 1.5546446136260123e-05, + "loss": 1.0572, "step": 11768 }, { - "epoch": 0.3339670828603859, + "epoch": 0.3335033579869081, "grad_norm": 0.0, - "learning_rate": 1.5533425550287447e-05, - "loss": 0.8455, + "learning_rate": 1.5545682432067068e-05, + "loss": 0.9961, "step": 11769 }, { - "epoch": 0.3339954597048808, + "epoch": 0.33353169542917055, "grad_norm": 0.0, - "learning_rate": 1.5532659977357266e-05, - "loss": 1.0161, + "learning_rate": 1.5544918681160667e-05, + "loss": 1.1475, "step": 11770 }, { - "epoch": 0.33402383654937573, + "epoch": 0.33356003287143304, "grad_norm": 0.0, - "learning_rate": 1.553189435769246e-05, - "loss": 0.878, + "learning_rate": 1.554415488354736e-05, + "loss": 0.9025, "step": 11771 }, { - "epoch": 0.3340522133938706, + "epoch": 0.3335883703136955, "grad_norm": 0.0, - "learning_rate": 1.553112869129949e-05, - "loss": 1.0506, + "learning_rate": 1.5543391039233576e-05, + "loss": 0.8648, "step": 11772 }, { - "epoch": 0.3340805902383655, + "epoch": 0.3336167077559579, "grad_norm": 0.0, - "learning_rate": 1.5530362978184827e-05, - "loss": 0.9628, + "learning_rate": 1.554262714822575e-05, + "loss": 0.9657, "step": 11773 }, { - "epoch": 0.33410896708286036, + "epoch": 0.3336450451982204, "grad_norm": 0.0, - "learning_rate": 1.5529597218354943e-05, - "loss": 1.0132, + "learning_rate": 1.554186321053032e-05, + "loss": 1.0203, "step": 11774 }, { - "epoch": 0.3341373439273553, + "epoch": 0.33367338264048285, "grad_norm": 0.0, - "learning_rate": 1.55288314118163e-05, - "loss": 0.9111, + "learning_rate": 1.5541099226153714e-05, + "loss": 0.952, "step": 11775 }, { - "epoch": 0.3341657207718502, + "epoch": 0.33370172008274535, "grad_norm": 0.0, - "learning_rate": 1.552806555857537e-05, - "loss": 0.9834, + "learning_rate": 1.5540335195102377e-05, + "loss": 1.0759, "step": 11776 }, { - "epoch": 0.33419409761634505, + "epoch": 0.3337300575250078, "grad_norm": 0.0, - "learning_rate": 1.5527299658638627e-05, - "loss": 0.9263, + "learning_rate": 1.5539571117382734e-05, + "loss": 0.9494, "step": 11777 }, { - "epoch": 0.33422247446083997, + "epoch": 0.3337583949672703, "grad_norm": 0.0, - "learning_rate": 1.552653371201253e-05, - "loss": 1.0678, + "learning_rate": 1.5538806993001228e-05, + "loss": 0.9773, "step": 11778 }, { - "epoch": 0.33425085130533483, + "epoch": 0.3337867324095327, "grad_norm": 0.0, - "learning_rate": 1.5525767718703552e-05, - "loss": 1.0248, + "learning_rate": 1.5538042821964293e-05, + "loss": 0.9188, "step": 11779 }, { - "epoch": 0.33427922814982974, + "epoch": 0.33381506985179515, "grad_norm": 0.0, - "learning_rate": 1.5525001678718167e-05, - "loss": 0.9447, + "learning_rate": 1.553727860427837e-05, + "loss": 1.0066, "step": 11780 }, { - "epoch": 0.33430760499432466, + "epoch": 0.33384340729405765, "grad_norm": 0.0, - "learning_rate": 1.5524235592062845e-05, - "loss": 0.9326, + "learning_rate": 1.553651433994989e-05, + "loss": 0.9457, "step": 11781 }, { - "epoch": 0.3343359818388195, + "epoch": 0.3338717447363201, "grad_norm": 0.0, - "learning_rate": 1.552346945874406e-05, - "loss": 0.9082, + "learning_rate": 1.5535750028985296e-05, + "loss": 0.9584, "step": 11782 }, { - "epoch": 0.33436435868331443, + "epoch": 0.3339000821785826, "grad_norm": 0.0, - "learning_rate": 1.5522703278768278e-05, - "loss": 0.8873, + "learning_rate": 1.5534985671391025e-05, + "loss": 1.0043, "step": 11783 }, { - "epoch": 0.3343927355278093, + "epoch": 0.333928419620845, "grad_norm": 0.0, - "learning_rate": 1.5521937052141972e-05, - "loss": 1.0598, + "learning_rate": 1.5534221267173513e-05, + "loss": 0.8528, "step": 11784 }, { - "epoch": 0.3344211123723042, + "epoch": 0.33395675706310746, "grad_norm": 0.0, - "learning_rate": 1.552117077887162e-05, - "loss": 0.9309, + "learning_rate": 1.55334568163392e-05, + "loss": 1.0024, "step": 11785 }, { - "epoch": 0.33444948921679907, + "epoch": 0.33398509450536995, "grad_norm": 0.0, - "learning_rate": 1.5520404458963683e-05, - "loss": 0.7731, + "learning_rate": 1.5532692318894524e-05, + "loss": 0.9117, "step": 11786 }, { - "epoch": 0.334477866061294, + "epoch": 0.3340134319476324, "grad_norm": 0.0, - "learning_rate": 1.5519638092424647e-05, - "loss": 1.0327, + "learning_rate": 1.5531927774845926e-05, + "loss": 1.0965, "step": 11787 }, { - "epoch": 0.3345062429057889, + "epoch": 0.3340417693898949, "grad_norm": 0.0, - "learning_rate": 1.5518871679260978e-05, - "loss": 0.8913, + "learning_rate": 1.553116318419985e-05, + "loss": 0.8403, "step": 11788 }, { - "epoch": 0.33453461975028376, + "epoch": 0.3340701068321573, "grad_norm": 0.0, - "learning_rate": 1.5518105219479152e-05, - "loss": 0.977, + "learning_rate": 1.553039854696273e-05, + "loss": 1.0665, "step": 11789 }, { - "epoch": 0.33456299659477867, + "epoch": 0.3340984442744198, "grad_norm": 0.0, - "learning_rate": 1.5517338713085642e-05, - "loss": 0.9596, + "learning_rate": 1.5529633863141008e-05, + "loss": 0.9401, "step": 11790 }, { - "epoch": 0.33459137343927353, + "epoch": 0.33412678171668225, "grad_norm": 0.0, - "learning_rate": 1.5516572160086923e-05, - "loss": 0.8931, + "learning_rate": 1.5528869132741127e-05, + "loss": 0.9903, "step": 11791 }, { - "epoch": 0.33461975028376845, + "epoch": 0.3341551191589447, "grad_norm": 0.0, - "learning_rate": 1.5515805560489475e-05, - "loss": 0.9304, + "learning_rate": 1.5528104355769527e-05, + "loss": 0.8286, "step": 11792 }, { - "epoch": 0.33464812712826336, + "epoch": 0.3341834566012072, "grad_norm": 0.0, - "learning_rate": 1.551503891429977e-05, - "loss": 0.8849, + "learning_rate": 1.5527339532232657e-05, + "loss": 0.8073, "step": 11793 }, { - "epoch": 0.3346765039727582, + "epoch": 0.3342117940434696, "grad_norm": 0.0, - "learning_rate": 1.5514272221524278e-05, - "loss": 0.9069, + "learning_rate": 1.5526574662136948e-05, + "loss": 0.961, "step": 11794 }, { - "epoch": 0.33470488081725314, + "epoch": 0.3342401314857321, "grad_norm": 0.0, - "learning_rate": 1.551350548216948e-05, - "loss": 0.8975, + "learning_rate": 1.552580974548885e-05, + "loss": 1.005, "step": 11795 }, { - "epoch": 0.334733257661748, + "epoch": 0.33426846892799456, "grad_norm": 0.0, - "learning_rate": 1.5512738696241864e-05, - "loss": 0.8988, + "learning_rate": 1.5525044782294804e-05, + "loss": 1.0328, "step": 11796 }, { - "epoch": 0.3347616345062429, + "epoch": 0.334296806370257, "grad_norm": 0.0, - "learning_rate": 1.5511971863747886e-05, - "loss": 0.9328, + "learning_rate": 1.5524279772561257e-05, + "loss": 1.0314, "step": 11797 }, { - "epoch": 0.33479001135073777, + "epoch": 0.3343251438125195, "grad_norm": 0.0, - "learning_rate": 1.551120498469404e-05, - "loss": 0.9431, + "learning_rate": 1.5523514716294648e-05, + "loss": 0.9089, "step": 11798 }, { - "epoch": 0.3348183881952327, + "epoch": 0.3343534812547819, "grad_norm": 0.0, - "learning_rate": 1.5510438059086793e-05, - "loss": 0.9168, + "learning_rate": 1.5522749613501424e-05, + "loss": 0.9888, "step": 11799 }, { - "epoch": 0.3348467650397276, + "epoch": 0.3343818186970444, "grad_norm": 0.0, - "learning_rate": 1.550967108693263e-05, - "loss": 1.0009, + "learning_rate": 1.5521984464188025e-05, + "loss": 0.9289, "step": 11800 }, { - "epoch": 0.33487514188422246, + "epoch": 0.33441015613930686, "grad_norm": 0.0, - "learning_rate": 1.5508904068238025e-05, - "loss": 0.9903, + "learning_rate": 1.5521219268360907e-05, + "loss": 0.9842, "step": 11801 }, { - "epoch": 0.3349035187287174, + "epoch": 0.33443849358156935, "grad_norm": 0.0, - "learning_rate": 1.5508137003009463e-05, + "learning_rate": 1.5520454026026506e-05, "loss": 0.9223, "step": 11802 }, { - "epoch": 0.33493189557321223, + "epoch": 0.3344668310238318, "grad_norm": 0.0, - "learning_rate": 1.550736989125342e-05, - "loss": 0.9604, + "learning_rate": 1.551968873719127e-05, + "loss": 0.8911, "step": 11803 }, { - "epoch": 0.33496027241770715, + "epoch": 0.33449516846609423, "grad_norm": 0.0, - "learning_rate": 1.5506602732976373e-05, - "loss": 0.8964, + "learning_rate": 1.5518923401861647e-05, + "loss": 0.9031, "step": 11804 }, { - "epoch": 0.33498864926220207, + "epoch": 0.3345235059083567, "grad_norm": 0.0, - "learning_rate": 1.5505835528184808e-05, - "loss": 0.888, + "learning_rate": 1.5518158020044084e-05, + "loss": 0.874, "step": 11805 }, { - "epoch": 0.3350170261066969, + "epoch": 0.33455184335061916, "grad_norm": 0.0, - "learning_rate": 1.5505068276885205e-05, - "loss": 0.9167, + "learning_rate": 1.5517392591745023e-05, + "loss": 0.9903, "step": 11806 }, { - "epoch": 0.33504540295119184, + "epoch": 0.33458018079288165, "grad_norm": 0.0, - "learning_rate": 1.550430097908404e-05, - "loss": 0.9546, + "learning_rate": 1.5516627116970917e-05, + "loss": 0.8527, "step": 11807 }, { - "epoch": 0.3350737797956867, + "epoch": 0.3346085182351441, "grad_norm": 0.0, - "learning_rate": 1.5503533634787794e-05, - "loss": 0.9162, + "learning_rate": 1.5515861595728214e-05, + "loss": 0.9175, "step": 11808 }, { - "epoch": 0.3351021566401816, + "epoch": 0.33463685567740653, "grad_norm": 0.0, - "learning_rate": 1.5502766244002955e-05, - "loss": 0.8571, + "learning_rate": 1.551509602802336e-05, + "loss": 0.923, "step": 11809 }, { - "epoch": 0.33513053348467653, + "epoch": 0.334665193119669, "grad_norm": 0.0, - "learning_rate": 1.5501998806736e-05, - "loss": 1.0476, + "learning_rate": 1.5514330413862802e-05, + "loss": 0.8512, "step": 11810 }, { - "epoch": 0.3351589103291714, + "epoch": 0.33469353056193146, "grad_norm": 0.0, - "learning_rate": 1.550123132299342e-05, - "loss": 0.9725, + "learning_rate": 1.5513564753252995e-05, + "loss": 0.8734, "step": 11811 }, { - "epoch": 0.3351872871736663, + "epoch": 0.33472186800419396, "grad_norm": 0.0, - "learning_rate": 1.5500463792781687e-05, - "loss": 0.8823, + "learning_rate": 1.551279904620038e-05, + "loss": 0.9442, "step": 11812 }, { - "epoch": 0.33521566401816116, + "epoch": 0.3347502054464564, "grad_norm": 0.0, - "learning_rate": 1.5499696216107293e-05, - "loss": 0.8929, + "learning_rate": 1.5512033292711415e-05, + "loss": 0.8549, "step": 11813 }, { - "epoch": 0.3352440408626561, + "epoch": 0.3347785428887189, "grad_norm": 0.0, - "learning_rate": 1.5498928592976712e-05, - "loss": 0.9221, + "learning_rate": 1.551126749279255e-05, + "loss": 0.9521, "step": 11814 }, { - "epoch": 0.33527241770715094, + "epoch": 0.3348068803309813, "grad_norm": 0.0, - "learning_rate": 1.5498160923396438e-05, - "loss": 0.9136, + "learning_rate": 1.5510501646450222e-05, + "loss": 0.9945, "step": 11815 }, { - "epoch": 0.33530079455164585, + "epoch": 0.33483521777324377, "grad_norm": 0.0, - "learning_rate": 1.549739320737295e-05, - "loss": 1.0259, + "learning_rate": 1.5509735753690903e-05, + "loss": 0.8501, "step": 11816 }, { - "epoch": 0.33532917139614077, + "epoch": 0.33486355521550626, "grad_norm": 0.0, - "learning_rate": 1.549662544491273e-05, - "loss": 0.8714, + "learning_rate": 1.5508969814521026e-05, + "loss": 0.9302, "step": 11817 }, { - "epoch": 0.33535754824063563, + "epoch": 0.3348918926577687, "grad_norm": 0.0, - "learning_rate": 1.5495857636022275e-05, - "loss": 0.958, + "learning_rate": 1.5508203828947056e-05, + "loss": 1.0052, "step": 11818 }, { - "epoch": 0.33538592508513054, + "epoch": 0.3349202301000312, "grad_norm": 0.0, - "learning_rate": 1.5495089780708062e-05, - "loss": 0.8987, + "learning_rate": 1.5507437796975436e-05, + "loss": 0.9578, "step": 11819 }, { - "epoch": 0.3354143019296254, + "epoch": 0.33494856754229363, "grad_norm": 0.0, - "learning_rate": 1.5494321878976578e-05, - "loss": 0.9052, + "learning_rate": 1.5506671718612624e-05, + "loss": 0.9332, "step": 11820 }, { - "epoch": 0.3354426787741203, + "epoch": 0.33497690498455607, "grad_norm": 0.0, - "learning_rate": 1.549355393083431e-05, - "loss": 0.9947, + "learning_rate": 1.5505905593865073e-05, + "loss": 1.0579, "step": 11821 }, { - "epoch": 0.33547105561861523, + "epoch": 0.33500524242681856, "grad_norm": 0.0, - "learning_rate": 1.5492785936287744e-05, - "loss": 0.9201, + "learning_rate": 1.550513942273923e-05, + "loss": 0.9612, "step": 11822 }, { - "epoch": 0.3354994324631101, + "epoch": 0.335033579869081, "grad_norm": 0.0, - "learning_rate": 1.5492017895343366e-05, - "loss": 1.0039, + "learning_rate": 1.5504373205241558e-05, + "loss": 0.9767, "step": 11823 }, { - "epoch": 0.335527809307605, + "epoch": 0.3350619173113435, "grad_norm": 0.0, - "learning_rate": 1.5491249808007672e-05, - "loss": 1.0356, + "learning_rate": 1.5503606941378504e-05, + "loss": 0.9403, "step": 11824 }, { - "epoch": 0.33555618615209987, + "epoch": 0.33509025475360593, "grad_norm": 0.0, - "learning_rate": 1.5490481674287142e-05, - "loss": 0.9113, + "learning_rate": 1.5502840631156524e-05, + "loss": 0.8877, "step": 11825 }, { - "epoch": 0.3355845629965948, + "epoch": 0.3351185921958684, "grad_norm": 0.0, - "learning_rate": 1.548971349418826e-05, - "loss": 1.0214, + "learning_rate": 1.5502074274582075e-05, + "loss": 1.0768, "step": 11826 }, { - "epoch": 0.3356129398410897, + "epoch": 0.33514692963813086, "grad_norm": 0.0, - "learning_rate": 1.548894526771753e-05, - "loss": 1.0264, + "learning_rate": 1.5501307871661612e-05, + "loss": 0.8656, "step": 11827 }, { - "epoch": 0.33564131668558456, + "epoch": 0.3351752670803933, "grad_norm": 0.0, - "learning_rate": 1.548817699488143e-05, - "loss": 0.9193, + "learning_rate": 1.5500541422401592e-05, + "loss": 1.0331, "step": 11828 }, { - "epoch": 0.33566969353007947, + "epoch": 0.3352036045226558, "grad_norm": 0.0, - "learning_rate": 1.548740867568645e-05, - "loss": 0.9417, + "learning_rate": 1.5499774926808468e-05, + "loss": 0.9233, "step": 11829 }, { - "epoch": 0.33569807037457433, + "epoch": 0.33523194196491823, "grad_norm": 0.0, - "learning_rate": 1.5486640310139085e-05, - "loss": 0.9635, + "learning_rate": 1.5499008384888692e-05, + "loss": 0.9298, "step": 11830 }, { - "epoch": 0.33572644721906925, + "epoch": 0.33526027940718073, "grad_norm": 0.0, - "learning_rate": 1.5485871898245824e-05, - "loss": 0.9469, + "learning_rate": 1.5498241796648733e-05, + "loss": 0.9908, "step": 11831 }, { - "epoch": 0.3357548240635641, + "epoch": 0.33528861684944317, "grad_norm": 0.0, - "learning_rate": 1.548510344001315e-05, - "loss": 0.8982, + "learning_rate": 1.549747516209504e-05, + "loss": 0.9191, "step": 11832 }, { - "epoch": 0.335783200908059, + "epoch": 0.3353169542917056, "grad_norm": 0.0, - "learning_rate": 1.5484334935447564e-05, - "loss": 0.8895, + "learning_rate": 1.549670848123407e-05, + "loss": 0.9209, "step": 11833 }, { - "epoch": 0.33581157775255394, + "epoch": 0.3353452917339681, "grad_norm": 0.0, - "learning_rate": 1.5483566384555556e-05, - "loss": 0.9773, + "learning_rate": 1.5495941754072285e-05, + "loss": 0.9226, "step": 11834 }, { - "epoch": 0.3358399545970488, + "epoch": 0.33537362917623054, "grad_norm": 0.0, - "learning_rate": 1.5482797787343615e-05, - "loss": 0.9113, + "learning_rate": 1.549517498061614e-05, + "loss": 0.9456, "step": 11835 }, { - "epoch": 0.3358683314415437, + "epoch": 0.33540196661849303, "grad_norm": 0.0, - "learning_rate": 1.5482029143818237e-05, - "loss": 0.8174, + "learning_rate": 1.5494408160872096e-05, + "loss": 0.9077, "step": 11836 }, { - "epoch": 0.33589670828603857, + "epoch": 0.33543030406075547, "grad_norm": 0.0, - "learning_rate": 1.5481260453985906e-05, - "loss": 0.9516, + "learning_rate": 1.5493641294846615e-05, + "loss": 0.9326, "step": 11837 }, { - "epoch": 0.3359250851305335, + "epoch": 0.33545864150301796, "grad_norm": 0.0, - "learning_rate": 1.548049171785313e-05, - "loss": 0.9799, + "learning_rate": 1.549287438254615e-05, + "loss": 1.0007, "step": 11838 }, { - "epoch": 0.3359534619750284, + "epoch": 0.3354869789452804, "grad_norm": 0.0, - "learning_rate": 1.547972293542639e-05, - "loss": 0.9999, + "learning_rate": 1.5492107423977167e-05, + "loss": 0.9105, "step": 11839 }, { - "epoch": 0.33598183881952326, + "epoch": 0.33551531638754284, "grad_norm": 0.0, - "learning_rate": 1.547895410671218e-05, - "loss": 0.9123, + "learning_rate": 1.549134041914612e-05, + "loss": 1.0097, "step": 11840 }, { - "epoch": 0.3360102156640182, + "epoch": 0.33554365382980533, "grad_norm": 0.0, - "learning_rate": 1.5478185231717002e-05, - "loss": 1.0031, + "learning_rate": 1.5490573368059475e-05, + "loss": 1.0392, "step": 11841 }, { - "epoch": 0.33603859250851303, + "epoch": 0.33557199127206777, "grad_norm": 0.0, - "learning_rate": 1.5477416310447346e-05, - "loss": 0.9034, + "learning_rate": 1.5489806270723695e-05, + "loss": 0.9662, "step": 11842 }, { - "epoch": 0.33606696935300795, + "epoch": 0.33560032871433026, "grad_norm": 0.0, - "learning_rate": 1.5476647342909714e-05, - "loss": 0.9662, + "learning_rate": 1.5489039127145236e-05, + "loss": 0.9414, "step": 11843 }, { - "epoch": 0.33609534619750286, + "epoch": 0.3356286661565927, "grad_norm": 0.0, - "learning_rate": 1.547587832911059e-05, - "loss": 0.9675, + "learning_rate": 1.5488271937330562e-05, + "loss": 0.8892, "step": 11844 }, { - "epoch": 0.3361237230419977, + "epoch": 0.33565700359885514, "grad_norm": 0.0, - "learning_rate": 1.5475109269056473e-05, - "loss": 0.984, + "learning_rate": 1.5487504701286134e-05, + "loss": 0.911, "step": 11845 }, { - "epoch": 0.33615209988649264, + "epoch": 0.33568534104111764, "grad_norm": 0.0, - "learning_rate": 1.547434016275387e-05, - "loss": 1.0226, + "learning_rate": 1.5486737419018417e-05, + "loss": 0.9669, "step": 11846 }, { - "epoch": 0.3361804767309875, + "epoch": 0.3357136784833801, "grad_norm": 0.0, - "learning_rate": 1.5473571010209262e-05, - "loss": 0.89, + "learning_rate": 1.5485970090533875e-05, + "loss": 0.9496, "step": 11847 }, { - "epoch": 0.3362088535754824, + "epoch": 0.33574201592564257, "grad_norm": 0.0, - "learning_rate": 1.5472801811429155e-05, - "loss": 1.0015, + "learning_rate": 1.5485202715838966e-05, + "loss": 0.9056, "step": 11848 }, { - "epoch": 0.3362372304199773, + "epoch": 0.335770353367905, "grad_norm": 0.0, - "learning_rate": 1.5472032566420047e-05, - "loss": 0.8943, + "learning_rate": 1.548443529494016e-05, + "loss": 0.8697, "step": 11849 }, { - "epoch": 0.3362656072644722, + "epoch": 0.3357986908101675, "grad_norm": 0.0, - "learning_rate": 1.547126327518844e-05, - "loss": 0.8719, + "learning_rate": 1.548366782784392e-05, + "loss": 0.9557, "step": 11850 }, { - "epoch": 0.3362939841089671, + "epoch": 0.33582702825242994, "grad_norm": 0.0, - "learning_rate": 1.5470493937740816e-05, - "loss": 0.9885, + "learning_rate": 1.5482900314556707e-05, + "loss": 0.9305, "step": 11851 }, { - "epoch": 0.33632236095346196, + "epoch": 0.3358553656946924, "grad_norm": 0.0, - "learning_rate": 1.5469724554083686e-05, - "loss": 1.0066, + "learning_rate": 1.548213275508499e-05, + "loss": 0.9305, "step": 11852 }, { - "epoch": 0.3363507377979569, + "epoch": 0.33588370313695487, "grad_norm": 0.0, - "learning_rate": 1.546895512422355e-05, - "loss": 1.0147, + "learning_rate": 1.5481365149435235e-05, + "loss": 0.9708, "step": 11853 }, { - "epoch": 0.33637911464245174, + "epoch": 0.3359120405792173, "grad_norm": 0.0, - "learning_rate": 1.5468185648166902e-05, - "loss": 0.9429, + "learning_rate": 1.5480597497613903e-05, + "loss": 0.9453, "step": 11854 }, { - "epoch": 0.33640749148694665, + "epoch": 0.3359403780214798, "grad_norm": 0.0, - "learning_rate": 1.5467416125920244e-05, - "loss": 0.9221, + "learning_rate": 1.5479829799627464e-05, + "loss": 0.9876, "step": 11855 }, { - "epoch": 0.33643586833144157, + "epoch": 0.33596871546374224, "grad_norm": 0.0, - "learning_rate": 1.5466646557490077e-05, - "loss": 0.9955, + "learning_rate": 1.5479062055482384e-05, + "loss": 0.899, "step": 11856 }, { - "epoch": 0.3364642451759364, + "epoch": 0.3359970529060047, "grad_norm": 0.0, - "learning_rate": 1.5465876942882902e-05, - "loss": 0.9793, + "learning_rate": 1.547829426518513e-05, + "loss": 0.9095, "step": 11857 }, { - "epoch": 0.33649262202043134, + "epoch": 0.33602539034826717, "grad_norm": 0.0, - "learning_rate": 1.546510728210522e-05, - "loss": 0.944, + "learning_rate": 1.547752642874217e-05, + "loss": 0.9083, "step": 11858 }, { - "epoch": 0.3365209988649262, + "epoch": 0.3360537277905296, "grad_norm": 0.0, - "learning_rate": 1.5464337575163525e-05, - "loss": 0.8857, + "learning_rate": 1.5476758546159966e-05, + "loss": 0.9303, "step": 11859 }, { - "epoch": 0.3365493757094211, + "epoch": 0.3360820652327921, "grad_norm": 0.0, - "learning_rate": 1.546356782206433e-05, - "loss": 0.9902, + "learning_rate": 1.5475990617444997e-05, + "loss": 0.9117, "step": 11860 }, { - "epoch": 0.33657775255391603, + "epoch": 0.33611040267505454, "grad_norm": 0.0, - "learning_rate": 1.5462798022814133e-05, - "loss": 0.9751, + "learning_rate": 1.547522264260372e-05, + "loss": 0.9339, "step": 11861 }, { - "epoch": 0.3366061293984109, + "epoch": 0.33613874011731704, "grad_norm": 0.0, - "learning_rate": 1.546202817741943e-05, - "loss": 1.0139, + "learning_rate": 1.5474454621642613e-05, + "loss": 0.8533, "step": 11862 }, { - "epoch": 0.3366345062429058, + "epoch": 0.3361670775595795, "grad_norm": 0.0, - "learning_rate": 1.546125828588673e-05, - "loss": 0.9763, + "learning_rate": 1.5473686554568143e-05, + "loss": 1.0122, "step": 11863 }, { - "epoch": 0.33666288308740067, + "epoch": 0.3361954150018419, "grad_norm": 0.0, - "learning_rate": 1.546048834822254e-05, - "loss": 1.0481, + "learning_rate": 1.5472918441386776e-05, + "loss": 0.973, "step": 11864 }, { - "epoch": 0.3366912599318956, + "epoch": 0.3362237524441044, "grad_norm": 0.0, - "learning_rate": 1.5459718364433357e-05, - "loss": 0.9462, + "learning_rate": 1.5472150282104988e-05, + "loss": 0.9374, "step": 11865 }, { - "epoch": 0.33671963677639044, + "epoch": 0.33625208988636685, "grad_norm": 0.0, - "learning_rate": 1.545894833452569e-05, - "loss": 0.9354, + "learning_rate": 1.547138207672924e-05, + "loss": 0.8057, "step": 11866 }, { - "epoch": 0.33674801362088536, + "epoch": 0.33628042732862934, "grad_norm": 0.0, - "learning_rate": 1.545817825850604e-05, - "loss": 0.851, + "learning_rate": 1.5470613825266016e-05, + "loss": 0.9777, "step": 11867 }, { - "epoch": 0.33677639046538027, + "epoch": 0.3363087647708918, "grad_norm": 0.0, - "learning_rate": 1.545740813638091e-05, - "loss": 1.0447, + "learning_rate": 1.5469845527721776e-05, + "loss": 0.9752, "step": 11868 }, { - "epoch": 0.33680476730987513, + "epoch": 0.3363371022131542, "grad_norm": 0.0, - "learning_rate": 1.545663796815681e-05, - "loss": 1.01, + "learning_rate": 1.5469077184103e-05, + "loss": 0.9756, "step": 11869 }, { - "epoch": 0.33683314415437005, + "epoch": 0.3363654396554167, "grad_norm": 0.0, - "learning_rate": 1.5455867753840243e-05, - "loss": 0.9179, + "learning_rate": 1.5468308794416152e-05, + "loss": 0.9867, "step": 11870 }, { - "epoch": 0.3368615209988649, + "epoch": 0.33639377709767915, "grad_norm": 0.0, - "learning_rate": 1.5455097493437718e-05, - "loss": 0.9136, + "learning_rate": 1.546754035866771e-05, + "loss": 0.9964, "step": 11871 }, { - "epoch": 0.3368898978433598, + "epoch": 0.33642211453994164, "grad_norm": 0.0, - "learning_rate": 1.545432718695574e-05, - "loss": 0.9434, + "learning_rate": 1.5466771876864143e-05, + "loss": 0.9619, "step": 11872 }, { - "epoch": 0.33691827468785474, + "epoch": 0.3364504519822041, "grad_norm": 0.0, - "learning_rate": 1.545355683440081e-05, - "loss": 0.9366, + "learning_rate": 1.546600334901193e-05, + "loss": 0.8945, "step": 11873 }, { - "epoch": 0.3369466515323496, + "epoch": 0.3364787894244666, "grad_norm": 0.0, - "learning_rate": 1.5452786435779444e-05, - "loss": 0.8569, + "learning_rate": 1.5465234775117538e-05, + "loss": 1.0291, "step": 11874 }, { - "epoch": 0.3369750283768445, + "epoch": 0.336507126866729, "grad_norm": 0.0, - "learning_rate": 1.545201599109815e-05, - "loss": 0.9066, + "learning_rate": 1.5464466155187445e-05, + "loss": 1.1409, "step": 11875 }, { - "epoch": 0.33700340522133937, + "epoch": 0.33653546430899145, "grad_norm": 0.0, - "learning_rate": 1.5451245500363424e-05, - "loss": 0.9722, + "learning_rate": 1.5463697489228125e-05, + "loss": 0.962, "step": 11876 }, { - "epoch": 0.3370317820658343, + "epoch": 0.33656380175125394, "grad_norm": 0.0, - "learning_rate": 1.5450474963581787e-05, - "loss": 0.9255, + "learning_rate": 1.546292877724605e-05, + "loss": 0.9522, "step": 11877 }, { - "epoch": 0.33706015891032914, + "epoch": 0.3365921391935164, "grad_norm": 0.0, - "learning_rate": 1.5449704380759738e-05, - "loss": 0.9821, + "learning_rate": 1.5462160019247702e-05, + "loss": 0.9489, "step": 11878 }, { - "epoch": 0.33708853575482406, + "epoch": 0.3366204766357789, "grad_norm": 0.0, - "learning_rate": 1.5448933751903796e-05, - "loss": 1.0112, + "learning_rate": 1.546139121523955e-05, + "loss": 0.8708, "step": 11879 }, { - "epoch": 0.337116912599319, + "epoch": 0.3366488140780413, "grad_norm": 0.0, - "learning_rate": 1.5448163077020463e-05, - "loss": 0.8719, + "learning_rate": 1.546062236522807e-05, + "loss": 0.9695, "step": 11880 }, { - "epoch": 0.33714528944381383, + "epoch": 0.33667715152030375, "grad_norm": 0.0, - "learning_rate": 1.5447392356116255e-05, - "loss": 0.887, + "learning_rate": 1.5459853469219738e-05, + "loss": 1.0096, "step": 11881 }, { - "epoch": 0.33717366628830875, + "epoch": 0.33670548896256625, "grad_norm": 0.0, - "learning_rate": 1.5446621589197673e-05, - "loss": 0.9603, + "learning_rate": 1.5459084527221037e-05, + "loss": 1.0209, "step": 11882 }, { - "epoch": 0.3372020431328036, + "epoch": 0.3367338264048287, "grad_norm": 0.0, - "learning_rate": 1.544585077627124e-05, - "loss": 0.9767, + "learning_rate": 1.545831553923844e-05, + "loss": 0.9272, "step": 11883 }, { - "epoch": 0.3372304199772985, + "epoch": 0.3367621638470912, "grad_norm": 0.0, - "learning_rate": 1.5445079917343456e-05, - "loss": 0.9689, + "learning_rate": 1.545754650527842e-05, + "loss": 0.9933, "step": 11884 }, { - "epoch": 0.33725879682179344, + "epoch": 0.3367905012893536, "grad_norm": 0.0, - "learning_rate": 1.544430901242084e-05, - "loss": 0.9739, + "learning_rate": 1.5456777425347462e-05, + "loss": 1.0566, "step": 11885 }, { - "epoch": 0.3372871736662883, + "epoch": 0.3368188387316161, "grad_norm": 0.0, - "learning_rate": 1.54435380615099e-05, - "loss": 1.0322, + "learning_rate": 1.545600829945204e-05, + "loss": 0.8874, "step": 11886 }, { - "epoch": 0.3373155505107832, + "epoch": 0.33684717617387855, "grad_norm": 0.0, - "learning_rate": 1.544276706461715e-05, - "loss": 0.9531, + "learning_rate": 1.5455239127598636e-05, + "loss": 0.9, "step": 11887 }, { - "epoch": 0.3373439273552781, + "epoch": 0.336875513616141, "grad_norm": 0.0, - "learning_rate": 1.54419960217491e-05, - "loss": 0.9935, + "learning_rate": 1.5454469909793726e-05, + "loss": 1.0211, "step": 11888 }, { - "epoch": 0.337372304199773, + "epoch": 0.3369038510584035, "grad_norm": 0.0, - "learning_rate": 1.5441224932912265e-05, - "loss": 0.9661, + "learning_rate": 1.5453700646043793e-05, + "loss": 0.9323, "step": 11889 }, { - "epoch": 0.3374006810442679, + "epoch": 0.3369321885006659, "grad_norm": 0.0, - "learning_rate": 1.544045379811316e-05, - "loss": 0.8973, + "learning_rate": 1.545293133635531e-05, + "loss": 0.9479, "step": 11890 }, { - "epoch": 0.33742905788876276, + "epoch": 0.3369605259429284, "grad_norm": 0.0, - "learning_rate": 1.5439682617358295e-05, - "loss": 0.871, + "learning_rate": 1.5452161980734764e-05, + "loss": 0.9786, "step": 11891 }, { - "epoch": 0.3374574347332577, + "epoch": 0.33698886338519085, "grad_norm": 0.0, - "learning_rate": 1.5438911390654193e-05, - "loss": 1.0229, + "learning_rate": 1.5451392579188635e-05, + "loss": 0.9918, "step": 11892 }, { - "epoch": 0.33748581157775254, + "epoch": 0.3370172008274533, "grad_norm": 0.0, - "learning_rate": 1.5438140118007353e-05, - "loss": 0.9491, + "learning_rate": 1.54506231317234e-05, + "loss": 0.9104, "step": 11893 }, { - "epoch": 0.33751418842224745, + "epoch": 0.3370455382697158, "grad_norm": 0.0, - "learning_rate": 1.5437368799424307e-05, - "loss": 1.0707, + "learning_rate": 1.5449853638345538e-05, + "loss": 0.9087, "step": 11894 }, { - "epoch": 0.3375425652667423, + "epoch": 0.3370738757119782, "grad_norm": 0.0, - "learning_rate": 1.5436597434911556e-05, - "loss": 0.8924, + "learning_rate": 1.544908409906154e-05, + "loss": 0.9147, "step": 11895 }, { - "epoch": 0.3375709421112372, + "epoch": 0.3371022131542407, "grad_norm": 0.0, - "learning_rate": 1.5435826024475623e-05, - "loss": 0.8298, + "learning_rate": 1.544831451387788e-05, + "loss": 0.9501, "step": 11896 }, { - "epoch": 0.33759931895573214, + "epoch": 0.33713055059650315, "grad_norm": 0.0, - "learning_rate": 1.5435054568123027e-05, - "loss": 1.0583, + "learning_rate": 1.5447544882801046e-05, + "loss": 0.9679, "step": 11897 }, { - "epoch": 0.337627695800227, + "epoch": 0.33715888803876565, "grad_norm": 0.0, - "learning_rate": 1.5434283065860277e-05, - "loss": 0.9035, + "learning_rate": 1.5446775205837518e-05, + "loss": 0.9778, "step": 11898 }, { - "epoch": 0.3376560726447219, + "epoch": 0.3371872254810281, "grad_norm": 0.0, - "learning_rate": 1.5433511517693897e-05, - "loss": 0.9971, + "learning_rate": 1.5446005482993783e-05, + "loss": 0.907, "step": 11899 }, { - "epoch": 0.3376844494892168, + "epoch": 0.3372155629232905, "grad_norm": 0.0, - "learning_rate": 1.54327399236304e-05, - "loss": 0.9098, + "learning_rate": 1.544523571427632e-05, + "loss": 0.983, "step": 11900 }, { - "epoch": 0.3377128263337117, + "epoch": 0.337243900365553, "grad_norm": 0.0, - "learning_rate": 1.54319682836763e-05, - "loss": 0.9179, + "learning_rate": 1.5444465899691612e-05, + "loss": 0.8768, "step": 11901 }, { - "epoch": 0.3377412031782066, + "epoch": 0.33727223780781546, "grad_norm": 0.0, - "learning_rate": 1.5431196597838126e-05, - "loss": 0.9598, + "learning_rate": 1.5443696039246145e-05, + "loss": 0.9589, "step": 11902 }, { - "epoch": 0.33776958002270147, + "epoch": 0.33730057525007795, "grad_norm": 0.0, - "learning_rate": 1.5430424866122386e-05, - "loss": 0.8868, + "learning_rate": 1.544292613294641e-05, + "loss": 0.9532, "step": 11903 }, { - "epoch": 0.3377979568671964, + "epoch": 0.3373289126923404, "grad_norm": 0.0, - "learning_rate": 1.5429653088535604e-05, - "loss": 0.9536, + "learning_rate": 1.5442156180798883e-05, + "loss": 0.9994, "step": 11904 }, { - "epoch": 0.33782633371169124, + "epoch": 0.3373572501346028, "grad_norm": 0.0, - "learning_rate": 1.5428881265084303e-05, - "loss": 1.0327, + "learning_rate": 1.5441386182810055e-05, + "loss": 1.0034, "step": 11905 }, { - "epoch": 0.33785471055618616, + "epoch": 0.3373855875768653, "grad_norm": 0.0, - "learning_rate": 1.5428109395774993e-05, - "loss": 1.0341, + "learning_rate": 1.5440616138986407e-05, + "loss": 0.877, "step": 11906 }, { - "epoch": 0.33788308740068107, + "epoch": 0.33741392501912776, "grad_norm": 0.0, - "learning_rate": 1.54273374806142e-05, - "loss": 1.0667, + "learning_rate": 1.5439846049334434e-05, + "loss": 0.9565, "step": 11907 }, { - "epoch": 0.33791146424517593, + "epoch": 0.33744226246139025, "grad_norm": 0.0, - "learning_rate": 1.542656551960844e-05, - "loss": 0.8965, + "learning_rate": 1.5439075913860615e-05, + "loss": 1.0421, "step": 11908 }, { - "epoch": 0.33793984108967084, + "epoch": 0.3374705999036527, "grad_norm": 0.0, - "learning_rate": 1.542579351276424e-05, - "loss": 0.9319, + "learning_rate": 1.5438305732571445e-05, + "loss": 0.949, "step": 11909 }, { - "epoch": 0.3379682179341657, + "epoch": 0.3374989373459152, "grad_norm": 0.0, - "learning_rate": 1.542502146008812e-05, - "loss": 0.9349, + "learning_rate": 1.5437535505473398e-05, + "loss": 0.9176, "step": 11910 }, { - "epoch": 0.3379965947786606, + "epoch": 0.3375272747881776, "grad_norm": 0.0, - "learning_rate": 1.5424249361586596e-05, - "loss": 0.9209, + "learning_rate": 1.5436765232572976e-05, + "loss": 0.8717, "step": 11911 }, { - "epoch": 0.3380249716231555, + "epoch": 0.33755561223044006, "grad_norm": 0.0, - "learning_rate": 1.54234772172662e-05, - "loss": 0.9437, + "learning_rate": 1.5435994913876657e-05, + "loss": 0.9365, "step": 11912 }, { - "epoch": 0.3380533484676504, + "epoch": 0.33758394967270255, "grad_norm": 0.0, - "learning_rate": 1.542270502713344e-05, - "loss": 1.016, + "learning_rate": 1.543522454939094e-05, + "loss": 0.8515, "step": 11913 }, { - "epoch": 0.3380817253121453, + "epoch": 0.337612287114965, "grad_norm": 0.0, - "learning_rate": 1.5421932791194855e-05, - "loss": 0.9245, + "learning_rate": 1.5434454139122302e-05, + "loss": 0.934, "step": 11914 }, { - "epoch": 0.33811010215664017, + "epoch": 0.3376406245572275, "grad_norm": 0.0, - "learning_rate": 1.5421160509456956e-05, - "loss": 1.0346, + "learning_rate": 1.5433683683077243e-05, + "loss": 1.0717, "step": 11915 }, { - "epoch": 0.3381384790011351, + "epoch": 0.3376689619994899, "grad_norm": 0.0, - "learning_rate": 1.5420388181926268e-05, - "loss": 0.9869, + "learning_rate": 1.543291318126225e-05, + "loss": 0.9634, "step": 11916 }, { - "epoch": 0.33816685584562994, + "epoch": 0.33769729944175236, "grad_norm": 0.0, - "learning_rate": 1.5419615808609322e-05, - "loss": 0.9555, + "learning_rate": 1.543214263368381e-05, + "loss": 0.8601, "step": 11917 }, { - "epoch": 0.33819523269012486, + "epoch": 0.33772563688401486, "grad_norm": 0.0, - "learning_rate": 1.5418843389512637e-05, - "loss": 0.9577, + "learning_rate": 1.5431372040348414e-05, + "loss": 0.9446, "step": 11918 }, { - "epoch": 0.3382236095346198, + "epoch": 0.3377539743262773, "grad_norm": 0.0, - "learning_rate": 1.5418070924642736e-05, - "loss": 0.9608, + "learning_rate": 1.5430601401262554e-05, + "loss": 0.9015, "step": 11919 }, { - "epoch": 0.33825198637911463, + "epoch": 0.3377823117685398, "grad_norm": 0.0, - "learning_rate": 1.5417298414006147e-05, - "loss": 0.9333, + "learning_rate": 1.5429830716432723e-05, + "loss": 0.9427, "step": 11920 }, { - "epoch": 0.33828036322360955, + "epoch": 0.3378106492108022, "grad_norm": 0.0, - "learning_rate": 1.5416525857609396e-05, - "loss": 0.8756, + "learning_rate": 1.5429059985865414e-05, + "loss": 1.0802, "step": 11921 }, { - "epoch": 0.3383087400681044, + "epoch": 0.3378389866530647, "grad_norm": 0.0, - "learning_rate": 1.5415753255459005e-05, - "loss": 0.9477, + "learning_rate": 1.5428289209567114e-05, + "loss": 1.0282, "step": 11922 }, { - "epoch": 0.3383371169125993, + "epoch": 0.33786732409532716, "grad_norm": 0.0, - "learning_rate": 1.5414980607561506e-05, - "loss": 0.9323, + "learning_rate": 1.5427518387544316e-05, + "loss": 0.9832, "step": 11923 }, { - "epoch": 0.33836549375709424, + "epoch": 0.3378956615375896, "grad_norm": 0.0, - "learning_rate": 1.5414207913923422e-05, - "loss": 0.9353, + "learning_rate": 1.5426747519803518e-05, + "loss": 1.1251, "step": 11924 }, { - "epoch": 0.3383938706015891, + "epoch": 0.3379239989798521, "grad_norm": 0.0, - "learning_rate": 1.5413435174551277e-05, - "loss": 0.8973, + "learning_rate": 1.542597660635121e-05, + "loss": 0.9839, "step": 11925 }, { - "epoch": 0.338422247446084, + "epoch": 0.33795233642211453, "grad_norm": 0.0, - "learning_rate": 1.5412662389451605e-05, - "loss": 0.8525, + "learning_rate": 1.5425205647193887e-05, + "loss": 1.025, "step": 11926 }, { - "epoch": 0.33845062429057887, + "epoch": 0.337980673864377, "grad_norm": 0.0, - "learning_rate": 1.5411889558630928e-05, - "loss": 1.0208, + "learning_rate": 1.542443464233804e-05, + "loss": 0.9463, "step": 11927 }, { - "epoch": 0.3384790011350738, + "epoch": 0.33800901130663946, "grad_norm": 0.0, - "learning_rate": 1.541111668209578e-05, - "loss": 0.9828, + "learning_rate": 1.5423663591790168e-05, + "loss": 0.9111, "step": 11928 }, { - "epoch": 0.33850737797956865, + "epoch": 0.3380373487489019, "grad_norm": 0.0, - "learning_rate": 1.5410343759852682e-05, - "loss": 0.8694, + "learning_rate": 1.5422892495556764e-05, + "loss": 0.8407, "step": 11929 }, { - "epoch": 0.33853575482406356, + "epoch": 0.3380656861911644, "grad_norm": 0.0, - "learning_rate": 1.5409570791908172e-05, - "loss": 0.9118, + "learning_rate": 1.5422121353644323e-05, + "loss": 1.0062, "step": 11930 }, { - "epoch": 0.3385641316685585, + "epoch": 0.33809402363342683, "grad_norm": 0.0, - "learning_rate": 1.5408797778268772e-05, - "loss": 0.9685, + "learning_rate": 1.5421350166059336e-05, + "loss": 1.1021, "step": 11931 }, { - "epoch": 0.33859250851305334, + "epoch": 0.3381223610756893, "grad_norm": 0.0, - "learning_rate": 1.540802471894101e-05, - "loss": 1.0514, + "learning_rate": 1.5420578932808307e-05, + "loss": 0.9894, "step": 11932 }, { - "epoch": 0.33862088535754825, + "epoch": 0.33815069851795176, "grad_norm": 0.0, - "learning_rate": 1.5407251613931425e-05, - "loss": 0.8604, + "learning_rate": 1.541980765389773e-05, + "loss": 0.9585, "step": 11933 }, { - "epoch": 0.3386492622020431, + "epoch": 0.33817903596021426, "grad_norm": 0.0, - "learning_rate": 1.5406478463246543e-05, - "loss": 1.0204, + "learning_rate": 1.54190363293341e-05, + "loss": 0.9982, "step": 11934 }, { - "epoch": 0.338677639046538, + "epoch": 0.3382073734024767, "grad_norm": 0.0, - "learning_rate": 1.540570526689289e-05, - "loss": 0.8762, + "learning_rate": 1.541826495912391e-05, + "loss": 0.9142, "step": 11935 }, { - "epoch": 0.33870601589103294, + "epoch": 0.33823571084473913, "grad_norm": 0.0, - "learning_rate": 1.5404932024877005e-05, - "loss": 0.8464, + "learning_rate": 1.5417493543273665e-05, + "loss": 0.909, "step": 11936 }, { - "epoch": 0.3387343927355278, + "epoch": 0.33826404828700163, "grad_norm": 0.0, - "learning_rate": 1.5404158737205418e-05, - "loss": 0.9158, + "learning_rate": 1.541672208178986e-05, + "loss": 0.9544, "step": 11937 }, { - "epoch": 0.3387627695800227, + "epoch": 0.33829238572926407, "grad_norm": 0.0, - "learning_rate": 1.5403385403884653e-05, - "loss": 1.0051, + "learning_rate": 1.5415950574678996e-05, + "loss": 0.9419, "step": 11938 }, { - "epoch": 0.3387911464245176, + "epoch": 0.33832072317152656, "grad_norm": 0.0, - "learning_rate": 1.5402612024921254e-05, - "loss": 0.9783, + "learning_rate": 1.5415179021947566e-05, + "loss": 0.871, "step": 11939 }, { - "epoch": 0.3388195232690125, + "epoch": 0.338349060613789, "grad_norm": 0.0, - "learning_rate": 1.5401838600321746e-05, - "loss": 0.9734, + "learning_rate": 1.5414407423602073e-05, + "loss": 0.9851, "step": 11940 }, { - "epoch": 0.3388479001135074, + "epoch": 0.33837739805605144, "grad_norm": 0.0, - "learning_rate": 1.5401065130092665e-05, - "loss": 1.0426, + "learning_rate": 1.541363577964902e-05, + "loss": 0.9688, "step": 11941 }, { - "epoch": 0.33887627695800226, + "epoch": 0.33840573549831393, "grad_norm": 0.0, - "learning_rate": 1.5400291614240542e-05, - "loss": 1.0607, + "learning_rate": 1.5412864090094898e-05, + "loss": 0.8814, "step": 11942 }, { - "epoch": 0.3389046538024972, + "epoch": 0.33843407294057637, "grad_norm": 0.0, - "learning_rate": 1.5399518052771918e-05, - "loss": 0.9435, + "learning_rate": 1.541209235494621e-05, + "loss": 0.9042, "step": 11943 }, { - "epoch": 0.33893303064699204, + "epoch": 0.33846241038283886, "grad_norm": 0.0, - "learning_rate": 1.539874444569332e-05, - "loss": 0.8705, + "learning_rate": 1.541132057420946e-05, + "loss": 1.0072, "step": 11944 }, { - "epoch": 0.33896140749148695, + "epoch": 0.3384907478251013, "grad_norm": 0.0, - "learning_rate": 1.539797079301128e-05, - "loss": 0.8116, + "learning_rate": 1.5410548747891148e-05, + "loss": 0.924, "step": 11945 }, { - "epoch": 0.3389897843359818, + "epoch": 0.3385190852673638, "grad_norm": 0.0, - "learning_rate": 1.539719709473235e-05, - "loss": 0.9039, + "learning_rate": 1.5409776875997778e-05, + "loss": 0.8966, "step": 11946 }, { - "epoch": 0.33901816118047673, + "epoch": 0.33854742270962623, "grad_norm": 0.0, - "learning_rate": 1.5396423350863043e-05, - "loss": 0.8349, + "learning_rate": 1.5409004958535848e-05, + "loss": 0.9351, "step": 11947 }, { - "epoch": 0.33904653802497164, + "epoch": 0.33857576015188867, "grad_norm": 0.0, - "learning_rate": 1.5395649561409903e-05, - "loss": 0.9558, + "learning_rate": 1.5408232995511853e-05, + "loss": 0.979, "step": 11948 }, { - "epoch": 0.3390749148694665, + "epoch": 0.33860409759415117, "grad_norm": 0.0, - "learning_rate": 1.5394875726379476e-05, - "loss": 0.9474, + "learning_rate": 1.540746098693231e-05, + "loss": 0.8633, "step": 11949 }, { - "epoch": 0.3391032917139614, + "epoch": 0.3386324350364136, "grad_norm": 0.0, - "learning_rate": 1.539410184577829e-05, - "loss": 0.9398, + "learning_rate": 1.5406688932803713e-05, + "loss": 1.0162, "step": 11950 }, { - "epoch": 0.3391316685584563, + "epoch": 0.3386607724786761, "grad_norm": 0.0, - "learning_rate": 1.539332791961288e-05, - "loss": 0.9745, + "learning_rate": 1.540591683313257e-05, + "loss": 0.9859, "step": 11951 }, { - "epoch": 0.3391600454029512, + "epoch": 0.33868910992093854, "grad_norm": 0.0, - "learning_rate": 1.539255394788979e-05, - "loss": 0.9569, + "learning_rate": 1.5405144687925377e-05, + "loss": 0.9221, "step": 11952 }, { - "epoch": 0.3391884222474461, + "epoch": 0.338717447363201, "grad_norm": 0.0, - "learning_rate": 1.539177993061556e-05, - "loss": 0.9897, + "learning_rate": 1.5404372497188646e-05, + "loss": 0.9034, "step": 11953 }, { - "epoch": 0.33921679909194097, + "epoch": 0.33874578480546347, "grad_norm": 0.0, - "learning_rate": 1.5391005867796712e-05, - "loss": 0.9386, + "learning_rate": 1.540360026092888e-05, + "loss": 0.9849, "step": 11954 }, { - "epoch": 0.3392451759364359, + "epoch": 0.3387741222477259, "grad_norm": 0.0, - "learning_rate": 1.5390231759439798e-05, - "loss": 0.9475, + "learning_rate": 1.5402827979152582e-05, + "loss": 0.8653, "step": 11955 }, { - "epoch": 0.33927355278093074, + "epoch": 0.3388024596899884, "grad_norm": 0.0, - "learning_rate": 1.5389457605551353e-05, - "loss": 0.7526, + "learning_rate": 1.5402055651866256e-05, + "loss": 0.9998, "step": 11956 }, { - "epoch": 0.33930192962542566, + "epoch": 0.33883079713225084, "grad_norm": 0.0, - "learning_rate": 1.5388683406137923e-05, - "loss": 0.9416, + "learning_rate": 1.540128327907641e-05, + "loss": 0.8587, "step": 11957 }, { - "epoch": 0.3393303064699205, + "epoch": 0.33885913457451333, "grad_norm": 0.0, - "learning_rate": 1.5387909161206037e-05, - "loss": 0.9241, + "learning_rate": 1.540051086078955e-05, + "loss": 0.893, "step": 11958 }, { - "epoch": 0.33935868331441543, + "epoch": 0.33888747201677577, "grad_norm": 0.0, - "learning_rate": 1.5387134870762243e-05, - "loss": 0.8185, + "learning_rate": 1.5399738397012177e-05, + "loss": 1.0014, "step": 11959 }, { - "epoch": 0.33938706015891035, + "epoch": 0.3389158094590382, "grad_norm": 0.0, - "learning_rate": 1.538636053481308e-05, - "loss": 1.0011, + "learning_rate": 1.5398965887750807e-05, + "loss": 0.9232, "step": 11960 }, { - "epoch": 0.3394154370034052, + "epoch": 0.3389441469013007, "grad_norm": 0.0, - "learning_rate": 1.5385586153365082e-05, - "loss": 0.8423, + "learning_rate": 1.5398193333011944e-05, + "loss": 0.9632, "step": 11961 }, { - "epoch": 0.3394438138479001, + "epoch": 0.33897248434356314, "grad_norm": 0.0, - "learning_rate": 1.53848117264248e-05, - "loss": 0.9387, + "learning_rate": 1.539742073280209e-05, + "loss": 1.0902, "step": 11962 }, { - "epoch": 0.339472190692395, + "epoch": 0.33900082178582563, "grad_norm": 0.0, - "learning_rate": 1.5384037253998765e-05, - "loss": 1.0075, + "learning_rate": 1.5396648087127763e-05, + "loss": 0.952, "step": 11963 }, { - "epoch": 0.3395005675368899, + "epoch": 0.3390291592280881, "grad_norm": 0.0, - "learning_rate": 1.538326273609353e-05, - "loss": 0.9249, + "learning_rate": 1.5395875395995456e-05, + "loss": 1.0112, "step": 11964 }, { - "epoch": 0.3395289443813848, + "epoch": 0.3390574966703505, "grad_norm": 0.0, - "learning_rate": 1.5382488172715633e-05, - "loss": 0.9886, + "learning_rate": 1.539510265941169e-05, + "loss": 0.8631, "step": 11965 }, { - "epoch": 0.33955732122587967, + "epoch": 0.339085834112613, "grad_norm": 0.0, - "learning_rate": 1.5381713563871616e-05, - "loss": 0.9076, + "learning_rate": 1.5394329877382973e-05, + "loss": 0.9117, "step": 11966 }, { - "epoch": 0.3395856980703746, + "epoch": 0.33911417155487544, "grad_norm": 0.0, - "learning_rate": 1.5380938909568025e-05, - "loss": 0.9471, + "learning_rate": 1.539355704991581e-05, + "loss": 0.9531, "step": 11967 }, { - "epoch": 0.33961407491486945, + "epoch": 0.33914250899713794, "grad_norm": 0.0, - "learning_rate": 1.5380164209811396e-05, - "loss": 1.0051, + "learning_rate": 1.5392784177016715e-05, + "loss": 0.9283, "step": 11968 }, { - "epoch": 0.33964245175936436, + "epoch": 0.3391708464394004, "grad_norm": 0.0, - "learning_rate": 1.5379389464608282e-05, - "loss": 1.005, + "learning_rate": 1.5392011258692198e-05, + "loss": 0.9562, "step": 11969 }, { - "epoch": 0.3396708286038593, + "epoch": 0.3391991838816628, "grad_norm": 0.0, - "learning_rate": 1.537861467396522e-05, - "loss": 0.8452, + "learning_rate": 1.5391238294948768e-05, + "loss": 0.9097, "step": 11970 }, { - "epoch": 0.33969920544835414, + "epoch": 0.3392275213239253, "grad_norm": 0.0, - "learning_rate": 1.5377839837888763e-05, - "loss": 0.9686, + "learning_rate": 1.5390465285792933e-05, + "loss": 0.9505, "step": 11971 }, { - "epoch": 0.33972758229284905, + "epoch": 0.33925585876618775, "grad_norm": 0.0, - "learning_rate": 1.5377064956385447e-05, - "loss": 0.9901, + "learning_rate": 1.5389692231231207e-05, + "loss": 0.8503, "step": 11972 }, { - "epoch": 0.3397559591373439, + "epoch": 0.33928419620845024, "grad_norm": 0.0, - "learning_rate": 1.537629002946182e-05, - "loss": 0.9125, + "learning_rate": 1.5388919131270103e-05, + "loss": 1.0555, "step": 11973 }, { - "epoch": 0.3397843359818388, + "epoch": 0.3393125336507127, "grad_norm": 0.0, - "learning_rate": 1.5375515057124437e-05, - "loss": 0.898, + "learning_rate": 1.538814598591613e-05, + "loss": 0.8967, "step": 11974 }, { - "epoch": 0.3398127128263337, + "epoch": 0.33934087109297517, "grad_norm": 0.0, - "learning_rate": 1.537474003937983e-05, - "loss": 0.9587, + "learning_rate": 1.5387372795175806e-05, + "loss": 0.837, "step": 11975 }, { - "epoch": 0.3398410896708286, + "epoch": 0.3393692085352376, "grad_norm": 0.0, - "learning_rate": 1.5373964976234556e-05, - "loss": 0.955, + "learning_rate": 1.5386599559055643e-05, + "loss": 0.9952, "step": 11976 }, { - "epoch": 0.3398694665153235, + "epoch": 0.33939754597750005, "grad_norm": 0.0, - "learning_rate": 1.5373189867695154e-05, - "loss": 0.9994, + "learning_rate": 1.5385826277562145e-05, + "loss": 0.9418, "step": 11977 }, { - "epoch": 0.3398978433598184, + "epoch": 0.33942588341976254, "grad_norm": 0.0, - "learning_rate": 1.5372414713768177e-05, - "loss": 0.8852, + "learning_rate": 1.5385052950701833e-05, + "loss": 0.836, "step": 11978 }, { - "epoch": 0.3399262202043133, + "epoch": 0.339454220862025, "grad_norm": 0.0, - "learning_rate": 1.5371639514460172e-05, - "loss": 0.9267, + "learning_rate": 1.5384279578481223e-05, + "loss": 0.8544, "step": 11979 }, { - "epoch": 0.33995459704880815, + "epoch": 0.3394825583042875, "grad_norm": 0.0, - "learning_rate": 1.5370864269777687e-05, - "loss": 1.0325, + "learning_rate": 1.5383506160906826e-05, + "loss": 0.8636, "step": 11980 }, { - "epoch": 0.33998297389330306, + "epoch": 0.3395108957465499, "grad_norm": 0.0, - "learning_rate": 1.537008897972727e-05, - "loss": 0.9104, + "learning_rate": 1.5382732697985156e-05, + "loss": 0.9324, "step": 11981 }, { - "epoch": 0.340011350737798, + "epoch": 0.33953923318881235, "grad_norm": 0.0, - "learning_rate": 1.5369313644315474e-05, - "loss": 0.8774, + "learning_rate": 1.538195918972273e-05, + "loss": 0.9796, "step": 11982 }, { - "epoch": 0.34003972758229284, + "epoch": 0.33956757063107484, "grad_norm": 0.0, - "learning_rate": 1.536853826354884e-05, - "loss": 1.0072, + "learning_rate": 1.5381185636126067e-05, + "loss": 1.0142, "step": 11983 }, { - "epoch": 0.34006810442678775, + "epoch": 0.3395959080733373, "grad_norm": 0.0, - "learning_rate": 1.536776283743392e-05, - "loss": 0.8294, + "learning_rate": 1.5380412037201672e-05, + "loss": 0.9851, "step": 11984 }, { - "epoch": 0.3400964812712826, + "epoch": 0.3396242455155998, "grad_norm": 0.0, - "learning_rate": 1.536698736597727e-05, - "loss": 0.956, + "learning_rate": 1.5379638392956072e-05, + "loss": 0.9346, "step": 11985 }, { - "epoch": 0.34012485811577753, + "epoch": 0.3396525829578622, "grad_norm": 0.0, - "learning_rate": 1.5366211849185436e-05, - "loss": 0.9179, + "learning_rate": 1.5378864703395784e-05, + "loss": 0.9675, "step": 11986 }, { - "epoch": 0.34015323496027244, + "epoch": 0.3396809204001247, "grad_norm": 0.0, - "learning_rate": 1.5365436287064967e-05, - "loss": 0.9654, + "learning_rate": 1.5378090968527318e-05, + "loss": 0.9156, "step": 11987 }, { - "epoch": 0.3401816118047673, + "epoch": 0.33970925784238715, "grad_norm": 0.0, - "learning_rate": 1.5364660679622417e-05, - "loss": 0.9854, + "learning_rate": 1.5377317188357196e-05, + "loss": 0.8882, "step": 11988 }, { - "epoch": 0.3402099886492622, + "epoch": 0.3397375952846496, "grad_norm": 0.0, - "learning_rate": 1.5363885026864337e-05, - "loss": 0.9992, + "learning_rate": 1.5376543362891932e-05, + "loss": 0.928, "step": 11989 }, { - "epoch": 0.3402383654937571, + "epoch": 0.3397659327269121, "grad_norm": 0.0, - "learning_rate": 1.5363109328797282e-05, - "loss": 1.0187, + "learning_rate": 1.537576949213805e-05, + "loss": 0.8877, "step": 11990 }, { - "epoch": 0.340266742338252, + "epoch": 0.3397942701691745, "grad_norm": 0.0, - "learning_rate": 1.5362333585427798e-05, - "loss": 0.9359, + "learning_rate": 1.537499557610206e-05, + "loss": 0.9261, "step": 11991 }, { - "epoch": 0.34029511918274685, + "epoch": 0.339822607611437, "grad_norm": 0.0, - "learning_rate": 1.5361557796762444e-05, - "loss": 0.971, + "learning_rate": 1.5374221614790493e-05, + "loss": 0.9107, "step": 11992 }, { - "epoch": 0.34032349602724177, + "epoch": 0.33985094505369945, "grad_norm": 0.0, - "learning_rate": 1.536078196280777e-05, - "loss": 0.9081, + "learning_rate": 1.5373447608209856e-05, + "loss": 1.0055, "step": 11993 }, { - "epoch": 0.3403518728717367, + "epoch": 0.3398792824959619, "grad_norm": 0.0, - "learning_rate": 1.5360006083570326e-05, - "loss": 0.9414, + "learning_rate": 1.537267355636668e-05, + "loss": 0.9076, "step": 11994 }, { - "epoch": 0.34038024971623154, + "epoch": 0.3399076199382244, "grad_norm": 0.0, - "learning_rate": 1.535923015905667e-05, - "loss": 0.9408, + "learning_rate": 1.5371899459267473e-05, + "loss": 0.9294, "step": 11995 }, { - "epoch": 0.34040862656072646, + "epoch": 0.3399359573804868, "grad_norm": 0.0, - "learning_rate": 1.535845418927336e-05, - "loss": 0.9582, + "learning_rate": 1.5371125316918767e-05, + "loss": 0.9466, "step": 11996 }, { - "epoch": 0.3404370034052213, + "epoch": 0.3399642948227493, "grad_norm": 0.0, - "learning_rate": 1.5357678174226946e-05, - "loss": 0.9205, + "learning_rate": 1.5370351129327074e-05, + "loss": 0.9188, "step": 11997 }, { - "epoch": 0.34046538024971623, + "epoch": 0.33999263226501175, "grad_norm": 0.0, - "learning_rate": 1.535690211392398e-05, - "loss": 0.9001, + "learning_rate": 1.536957689649892e-05, + "loss": 0.9271, "step": 11998 }, { - "epoch": 0.34049375709421115, + "epoch": 0.34002096970727425, "grad_norm": 0.0, - "learning_rate": 1.5356126008371025e-05, - "loss": 1.0219, + "learning_rate": 1.536880261844083e-05, + "loss": 0.9647, "step": 11999 }, { - "epoch": 0.340522133938706, + "epoch": 0.3400493071495367, "grad_norm": 0.0, - "learning_rate": 1.535534985757463e-05, - "loss": 1.0245, + "learning_rate": 1.536802829515932e-05, + "loss": 0.9196, "step": 12000 }, { - "epoch": 0.3405505107832009, + "epoch": 0.3400776445917991, "grad_norm": 0.0, - "learning_rate": 1.5354573661541354e-05, - "loss": 0.8966, + "learning_rate": 1.5367253926660915e-05, + "loss": 0.9431, "step": 12001 }, { - "epoch": 0.3405788876276958, + "epoch": 0.3401059820340616, "grad_norm": 0.0, - "learning_rate": 1.5353797420277755e-05, - "loss": 0.925, + "learning_rate": 1.5366479512952133e-05, + "loss": 0.9869, "step": 12002 }, { - "epoch": 0.3406072644721907, + "epoch": 0.34013431947632405, "grad_norm": 0.0, - "learning_rate": 1.535302113379039e-05, - "loss": 1.0078, + "learning_rate": 1.5365705054039504e-05, + "loss": 0.9421, "step": 12003 }, { - "epoch": 0.3406356413166856, + "epoch": 0.34016265691858655, "grad_norm": 0.0, - "learning_rate": 1.535224480208581e-05, - "loss": 0.9706, + "learning_rate": 1.536493054992955e-05, + "loss": 0.9069, "step": 12004 }, { - "epoch": 0.34066401816118047, + "epoch": 0.340190994360849, "grad_norm": 0.0, - "learning_rate": 1.5351468425170583e-05, - "loss": 0.976, + "learning_rate": 1.536415600062879e-05, + "loss": 0.9473, "step": 12005 }, { - "epoch": 0.3406923950056754, + "epoch": 0.3402193318031114, "grad_norm": 0.0, - "learning_rate": 1.535069200305126e-05, - "loss": 0.8744, + "learning_rate": 1.5363381406143754e-05, + "loss": 0.9743, "step": 12006 }, { - "epoch": 0.34072077185017025, + "epoch": 0.3402476692453739, "grad_norm": 0.0, - "learning_rate": 1.5349915535734398e-05, - "loss": 0.791, + "learning_rate": 1.536260676648097e-05, + "loss": 0.8763, "step": 12007 }, { - "epoch": 0.34074914869466516, + "epoch": 0.34027600668763636, "grad_norm": 0.0, - "learning_rate": 1.5349139023226563e-05, - "loss": 0.9231, + "learning_rate": 1.536183208164695e-05, + "loss": 0.9168, "step": 12008 }, { - "epoch": 0.34077752553916, + "epoch": 0.34030434412989885, "grad_norm": 0.0, - "learning_rate": 1.5348362465534305e-05, - "loss": 0.8931, + "learning_rate": 1.536105735164823e-05, + "loss": 0.8844, "step": 12009 }, { - "epoch": 0.34080590238365494, + "epoch": 0.3403326815721613, "grad_norm": 0.0, - "learning_rate": 1.5347585862664192e-05, - "loss": 0.8911, + "learning_rate": 1.5360282576491332e-05, + "loss": 0.9901, "step": 12010 }, { - "epoch": 0.34083427922814985, + "epoch": 0.3403610190144238, "grad_norm": 0.0, - "learning_rate": 1.5346809214622785e-05, - "loss": 0.8923, + "learning_rate": 1.5359507756182785e-05, + "loss": 1.056, "step": 12011 }, { - "epoch": 0.3408626560726447, + "epoch": 0.3403893564566862, "grad_norm": 0.0, - "learning_rate": 1.5346032521416632e-05, - "loss": 0.8578, + "learning_rate": 1.5358732890729117e-05, + "loss": 0.9656, "step": 12012 }, { - "epoch": 0.3408910329171396, + "epoch": 0.34041769389894866, "grad_norm": 0.0, - "learning_rate": 1.5345255783052307e-05, - "loss": 0.9222, + "learning_rate": 1.535795798013685e-05, + "loss": 1.0526, "step": 12013 }, { - "epoch": 0.3409194097616345, + "epoch": 0.34044603134121115, "grad_norm": 0.0, - "learning_rate": 1.5344478999536366e-05, - "loss": 0.9585, + "learning_rate": 1.535718302441251e-05, + "loss": 0.9281, "step": 12014 }, { - "epoch": 0.3409477866061294, + "epoch": 0.3404743687834736, "grad_norm": 0.0, - "learning_rate": 1.534370217087537e-05, - "loss": 0.9979, + "learning_rate": 1.5356408023562626e-05, + "loss": 1.0091, "step": 12015 }, { - "epoch": 0.3409761634506243, + "epoch": 0.3405027062257361, "grad_norm": 0.0, - "learning_rate": 1.5342925297075885e-05, - "loss": 0.9107, + "learning_rate": 1.5355632977593735e-05, + "loss": 1.0681, "step": 12016 }, { - "epoch": 0.3410045402951192, + "epoch": 0.3405310436679985, "grad_norm": 0.0, - "learning_rate": 1.5342148378144464e-05, - "loss": 0.9797, + "learning_rate": 1.5354857886512357e-05, + "loss": 0.9647, "step": 12017 }, { - "epoch": 0.3410329171396141, + "epoch": 0.34055938111026096, "grad_norm": 0.0, - "learning_rate": 1.534137141408768e-05, - "loss": 0.9932, + "learning_rate": 1.535408275032502e-05, + "loss": 0.9907, "step": 12018 }, { - "epoch": 0.34106129398410895, + "epoch": 0.34058771855252346, "grad_norm": 0.0, - "learning_rate": 1.5340594404912087e-05, - "loss": 0.9357, + "learning_rate": 1.5353307569038255e-05, + "loss": 0.8769, "step": 12019 }, { - "epoch": 0.34108967082860386, + "epoch": 0.3406160559947859, "grad_norm": 0.0, - "learning_rate": 1.533981735062426e-05, - "loss": 1.1046, + "learning_rate": 1.5352532342658597e-05, + "loss": 0.8617, "step": 12020 }, { - "epoch": 0.3411180476730988, + "epoch": 0.3406443934370484, "grad_norm": 0.0, - "learning_rate": 1.533904025123075e-05, - "loss": 0.9664, + "learning_rate": 1.5351757071192574e-05, + "loss": 0.9357, "step": 12021 }, { - "epoch": 0.34114642451759364, + "epoch": 0.3406727308793108, "grad_norm": 0.0, - "learning_rate": 1.5338263106738126e-05, - "loss": 0.7995, + "learning_rate": 1.5350981754646705e-05, + "loss": 0.8906, "step": 12022 }, { - "epoch": 0.34117480136208855, + "epoch": 0.3407010683215733, "grad_norm": 0.0, - "learning_rate": 1.5337485917152956e-05, - "loss": 0.9787, + "learning_rate": 1.5350206393027533e-05, + "loss": 0.9802, "step": 12023 }, { - "epoch": 0.3412031782065834, + "epoch": 0.34072940576383576, "grad_norm": 0.0, - "learning_rate": 1.53367086824818e-05, - "loss": 1.1063, + "learning_rate": 1.5349430986341588e-05, + "loss": 0.9296, "step": 12024 }, { - "epoch": 0.34123155505107833, + "epoch": 0.3407577432060982, "grad_norm": 0.0, - "learning_rate": 1.5335931402731232e-05, - "loss": 0.9011, + "learning_rate": 1.5348655534595396e-05, + "loss": 0.9074, "step": 12025 }, { - "epoch": 0.3412599318955732, + "epoch": 0.3407860806483607, "grad_norm": 0.0, - "learning_rate": 1.5335154077907806e-05, - "loss": 0.9328, + "learning_rate": 1.5347880037795496e-05, + "loss": 0.9708, "step": 12026 }, { - "epoch": 0.3412883087400681, + "epoch": 0.34081441809062313, "grad_norm": 0.0, - "learning_rate": 1.5334376708018098e-05, - "loss": 0.8712, + "learning_rate": 1.5347104495948414e-05, + "loss": 0.914, "step": 12027 }, { - "epoch": 0.341316685584563, + "epoch": 0.3408427555328856, "grad_norm": 0.0, - "learning_rate": 1.5333599293068665e-05, - "loss": 1.0127, + "learning_rate": 1.5346328909060688e-05, + "loss": 0.988, "step": 12028 }, { - "epoch": 0.3413450624290579, + "epoch": 0.34087109297514806, "grad_norm": 0.0, - "learning_rate": 1.5332821833066083e-05, - "loss": 1.0676, + "learning_rate": 1.5345553277138846e-05, + "loss": 0.9425, "step": 12029 }, { - "epoch": 0.3413734392735528, + "epoch": 0.3408994304174105, "grad_norm": 0.0, - "learning_rate": 1.5332044328016916e-05, - "loss": 0.976, + "learning_rate": 1.5344777600189423e-05, + "loss": 0.8237, "step": 12030 }, { - "epoch": 0.34140181611804765, + "epoch": 0.340927767859673, "grad_norm": 0.0, - "learning_rate": 1.533126677792773e-05, - "loss": 0.9658, + "learning_rate": 1.5344001878218952e-05, + "loss": 1.0064, "step": 12031 }, { - "epoch": 0.34143019296254257, + "epoch": 0.34095610530193543, "grad_norm": 0.0, - "learning_rate": 1.5330489182805087e-05, - "loss": 0.9143, + "learning_rate": 1.5343226111233973e-05, + "loss": 0.8926, "step": 12032 }, { - "epoch": 0.3414585698070375, + "epoch": 0.3409844427441979, "grad_norm": 0.0, - "learning_rate": 1.532971154265557e-05, - "loss": 0.9508, + "learning_rate": 1.5342450299241013e-05, + "loss": 0.9707, "step": 12033 }, { - "epoch": 0.34148694665153234, + "epoch": 0.34101278018646036, "grad_norm": 0.0, - "learning_rate": 1.5328933857485735e-05, - "loss": 0.9295, + "learning_rate": 1.5341674442246613e-05, + "loss": 1.0317, "step": 12034 }, { - "epoch": 0.34151532349602726, + "epoch": 0.34104111762872286, "grad_norm": 0.0, - "learning_rate": 1.5328156127302164e-05, - "loss": 0.9628, + "learning_rate": 1.5340898540257304e-05, + "loss": 0.9613, "step": 12035 }, { - "epoch": 0.3415437003405221, + "epoch": 0.3410694550709853, "grad_norm": 0.0, - "learning_rate": 1.532737835211141e-05, - "loss": 0.927, + "learning_rate": 1.5340122593279626e-05, + "loss": 0.8747, "step": 12036 }, { - "epoch": 0.34157207718501703, + "epoch": 0.34109779251324773, "grad_norm": 0.0, - "learning_rate": 1.532660053192006e-05, - "loss": 0.9144, + "learning_rate": 1.5339346601320107e-05, + "loss": 1.0494, "step": 12037 }, { - "epoch": 0.3416004540295119, + "epoch": 0.3411261299555102, "grad_norm": 0.0, - "learning_rate": 1.532582266673467e-05, - "loss": 0.9295, + "learning_rate": 1.5338570564385294e-05, + "loss": 0.9862, "step": 12038 }, { - "epoch": 0.3416288308740068, + "epoch": 0.34115446739777266, "grad_norm": 0.0, - "learning_rate": 1.5325044756561816e-05, - "loss": 0.9703, + "learning_rate": 1.5337794482481714e-05, + "loss": 0.9216, "step": 12039 }, { - "epoch": 0.3416572077185017, + "epoch": 0.34118280484003516, "grad_norm": 0.0, - "learning_rate": 1.532426680140807e-05, - "loss": 1.0531, + "learning_rate": 1.5337018355615912e-05, + "loss": 1.0015, "step": 12040 }, { - "epoch": 0.3416855845629966, + "epoch": 0.3412111422822976, "grad_norm": 0.0, - "learning_rate": 1.5323488801280006e-05, - "loss": 0.8558, + "learning_rate": 1.5336242183794425e-05, + "loss": 0.8511, "step": 12041 }, { - "epoch": 0.3417139614074915, + "epoch": 0.34123947972456004, "grad_norm": 0.0, - "learning_rate": 1.532271075618419e-05, - "loss": 0.8641, + "learning_rate": 1.5335465967023787e-05, + "loss": 0.9896, "step": 12042 }, { - "epoch": 0.34174233825198636, + "epoch": 0.34126781716682253, "grad_norm": 0.0, - "learning_rate": 1.53219326661272e-05, - "loss": 0.9608, + "learning_rate": 1.5334689705310533e-05, + "loss": 0.9535, "step": 12043 }, { - "epoch": 0.34177071509648127, + "epoch": 0.34129615460908497, "grad_norm": 0.0, - "learning_rate": 1.5321154531115603e-05, - "loss": 0.9629, + "learning_rate": 1.533391339866121e-05, + "loss": 1.0372, "step": 12044 }, { - "epoch": 0.3417990919409762, + "epoch": 0.34132449205134746, "grad_norm": 0.0, - "learning_rate": 1.5320376351155975e-05, - "loss": 0.911, + "learning_rate": 1.5333137047082355e-05, + "loss": 0.9209, "step": 12045 }, { - "epoch": 0.34182746878547104, + "epoch": 0.3413528294936099, "grad_norm": 0.0, - "learning_rate": 1.531959812625489e-05, - "loss": 0.914, + "learning_rate": 1.5332360650580507e-05, + "loss": 0.7138, "step": 12046 }, { - "epoch": 0.34185584562996596, + "epoch": 0.3413811669358724, "grad_norm": 0.0, - "learning_rate": 1.5318819856418918e-05, - "loss": 0.929, + "learning_rate": 1.53315842091622e-05, + "loss": 0.9634, "step": 12047 }, { - "epoch": 0.3418842224744608, + "epoch": 0.34140950437813483, "grad_norm": 0.0, - "learning_rate": 1.531804154165464e-05, - "loss": 0.8919, + "learning_rate": 1.5330807722833985e-05, + "loss": 0.8093, "step": 12048 }, { - "epoch": 0.34191259931895573, + "epoch": 0.34143784182039727, "grad_norm": 0.0, - "learning_rate": 1.531726318196862e-05, - "loss": 0.9711, + "learning_rate": 1.5330031191602395e-05, + "loss": 1.018, "step": 12049 }, { - "epoch": 0.34194097616345065, + "epoch": 0.34146617926265976, "grad_norm": 0.0, - "learning_rate": 1.5316484777367443e-05, - "loss": 0.8405, + "learning_rate": 1.5329254615473974e-05, + "loss": 0.8753, "step": 12050 }, { - "epoch": 0.3419693530079455, + "epoch": 0.3414945167049222, "grad_norm": 0.0, - "learning_rate": 1.5315706327857678e-05, - "loss": 0.9098, + "learning_rate": 1.532847799445526e-05, + "loss": 0.9375, "step": 12051 }, { - "epoch": 0.3419977298524404, + "epoch": 0.3415228541471847, "grad_norm": 0.0, - "learning_rate": 1.5314927833445904e-05, - "loss": 0.9437, + "learning_rate": 1.5327701328552796e-05, + "loss": 0.9344, "step": 12052 }, { - "epoch": 0.3420261066969353, + "epoch": 0.34155119158944713, "grad_norm": 0.0, - "learning_rate": 1.5314149294138693e-05, - "loss": 0.8776, + "learning_rate": 1.532692461777313e-05, + "loss": 0.9635, "step": 12053 }, { - "epoch": 0.3420544835414302, + "epoch": 0.3415795290317096, "grad_norm": 0.0, - "learning_rate": 1.531337070994263e-05, - "loss": 0.9392, + "learning_rate": 1.5326147862122796e-05, + "loss": 0.9748, "step": 12054 }, { - "epoch": 0.34208286038592506, + "epoch": 0.34160786647397207, "grad_norm": 0.0, - "learning_rate": 1.531259208086428e-05, - "loss": 0.9629, + "learning_rate": 1.532537106160834e-05, + "loss": 0.9008, "step": 12055 }, { - "epoch": 0.34211123723042, + "epoch": 0.3416362039162345, "grad_norm": 0.0, - "learning_rate": 1.5311813406910227e-05, - "loss": 0.9365, + "learning_rate": 1.532459421623631e-05, + "loss": 0.9693, "step": 12056 }, { - "epoch": 0.3421396140749149, + "epoch": 0.341664541358497, "grad_norm": 0.0, - "learning_rate": 1.5311034688087048e-05, - "loss": 1.0, + "learning_rate": 1.5323817326013244e-05, + "loss": 1.0106, "step": 12057 }, { - "epoch": 0.34216799091940975, + "epoch": 0.34169287880075944, "grad_norm": 0.0, - "learning_rate": 1.531025592440132e-05, - "loss": 0.9291, + "learning_rate": 1.532304039094569e-05, + "loss": 0.9906, "step": 12058 }, { - "epoch": 0.34219636776390466, + "epoch": 0.34172121624302193, "grad_norm": 0.0, - "learning_rate": 1.530947711585962e-05, - "loss": 1.0382, + "learning_rate": 1.5322263411040186e-05, + "loss": 1.0258, "step": 12059 }, { - "epoch": 0.3422247446083995, + "epoch": 0.34174955368528437, "grad_norm": 0.0, - "learning_rate": 1.5308698262468533e-05, - "loss": 0.8628, + "learning_rate": 1.532148638630328e-05, + "loss": 1.0378, "step": 12060 }, { - "epoch": 0.34225312145289444, + "epoch": 0.3417778911275468, "grad_norm": 0.0, - "learning_rate": 1.530791936423463e-05, - "loss": 0.9105, + "learning_rate": 1.532070931674152e-05, + "loss": 0.9915, "step": 12061 }, { - "epoch": 0.34228149829738935, + "epoch": 0.3418062285698093, "grad_norm": 0.0, - "learning_rate": 1.5307140421164494e-05, - "loss": 0.9311, + "learning_rate": 1.5319932202361453e-05, + "loss": 0.9298, "step": 12062 }, { - "epoch": 0.3423098751418842, + "epoch": 0.34183456601207174, "grad_norm": 0.0, - "learning_rate": 1.53063614332647e-05, - "loss": 0.9046, + "learning_rate": 1.5319155043169618e-05, + "loss": 1.0074, "step": 12063 }, { - "epoch": 0.3423382519863791, + "epoch": 0.34186290345433423, "grad_norm": 0.0, - "learning_rate": 1.530558240054184e-05, - "loss": 1.0538, + "learning_rate": 1.5318377839172566e-05, + "loss": 1.0351, "step": 12064 }, { - "epoch": 0.342366628830874, + "epoch": 0.34189124089659667, "grad_norm": 0.0, - "learning_rate": 1.5304803323002483e-05, - "loss": 0.9279, + "learning_rate": 1.5317600590376842e-05, + "loss": 0.908, "step": 12065 }, { - "epoch": 0.3423950056753689, + "epoch": 0.3419195783388591, "grad_norm": 0.0, - "learning_rate": 1.5304024200653215e-05, - "loss": 0.945, + "learning_rate": 1.5316823296788993e-05, + "loss": 0.9521, "step": 12066 }, { - "epoch": 0.3424233825198638, + "epoch": 0.3419479157811216, "grad_norm": 0.0, - "learning_rate": 1.5303245033500616e-05, - "loss": 0.9629, + "learning_rate": 1.531604595841557e-05, + "loss": 0.9876, "step": 12067 }, { - "epoch": 0.3424517593643587, + "epoch": 0.34197625322338404, "grad_norm": 0.0, - "learning_rate": 1.5302465821551268e-05, - "loss": 1.0608, + "learning_rate": 1.5315268575263115e-05, + "loss": 0.9053, "step": 12068 }, { - "epoch": 0.3424801362088536, + "epoch": 0.34200459066564654, "grad_norm": 0.0, - "learning_rate": 1.5301686564811752e-05, - "loss": 0.9699, + "learning_rate": 1.531449114733818e-05, + "loss": 0.903, "step": 12069 }, { - "epoch": 0.34250851305334845, + "epoch": 0.342032928107909, "grad_norm": 0.0, - "learning_rate": 1.530090726328865e-05, - "loss": 1.1001, + "learning_rate": 1.5313713674647313e-05, + "loss": 0.8668, "step": 12070 }, { - "epoch": 0.34253688989784337, + "epoch": 0.34206126555017147, "grad_norm": 0.0, - "learning_rate": 1.5300127916988548e-05, - "loss": 0.905, + "learning_rate": 1.531293615719706e-05, + "loss": 1.0119, "step": 12071 }, { - "epoch": 0.3425652667423382, + "epoch": 0.3420896029924339, "grad_norm": 0.0, - "learning_rate": 1.5299348525918026e-05, - "loss": 0.9852, + "learning_rate": 1.5312158594993975e-05, + "loss": 0.9271, "step": 12072 }, { - "epoch": 0.34259364358683314, + "epoch": 0.34211794043469634, "grad_norm": 0.0, - "learning_rate": 1.529856909008367e-05, - "loss": 0.8536, + "learning_rate": 1.5311380988044606e-05, + "loss": 0.9405, "step": 12073 }, { - "epoch": 0.34262202043132806, + "epoch": 0.34214627787695884, "grad_norm": 0.0, - "learning_rate": 1.5297789609492062e-05, - "loss": 0.9484, + "learning_rate": 1.53106033363555e-05, + "loss": 0.9377, "step": 12074 }, { - "epoch": 0.3426503972758229, + "epoch": 0.3421746153192213, "grad_norm": 0.0, - "learning_rate": 1.529701008414979e-05, - "loss": 1.0428, + "learning_rate": 1.5309825639933214e-05, + "loss": 1.0141, "step": 12075 }, { - "epoch": 0.34267877412031783, + "epoch": 0.34220295276148377, "grad_norm": 0.0, - "learning_rate": 1.529623051406343e-05, - "loss": 0.8926, + "learning_rate": 1.530904789878429e-05, + "loss": 1.0494, "step": 12076 }, { - "epoch": 0.3427071509648127, + "epoch": 0.3422312902037462, "grad_norm": 0.0, - "learning_rate": 1.529545089923958e-05, - "loss": 0.9389, + "learning_rate": 1.5308270112915287e-05, + "loss": 1.0032, "step": 12077 }, { - "epoch": 0.3427355278093076, + "epoch": 0.34225962764600865, "grad_norm": 0.0, - "learning_rate": 1.529467123968481e-05, - "loss": 0.9021, + "learning_rate": 1.5307492282332754e-05, + "loss": 0.8399, "step": 12078 }, { - "epoch": 0.3427639046538025, + "epoch": 0.34228796508827114, "grad_norm": 0.0, - "learning_rate": 1.5293891535405716e-05, - "loss": 0.9971, + "learning_rate": 1.530671440704324e-05, + "loss": 0.9685, "step": 12079 }, { - "epoch": 0.3427922814982974, + "epoch": 0.3423163025305336, "grad_norm": 0.0, - "learning_rate": 1.5293111786408886e-05, - "loss": 0.9471, + "learning_rate": 1.5305936487053303e-05, + "loss": 0.9718, "step": 12080 }, { - "epoch": 0.3428206583427923, + "epoch": 0.34234463997279607, "grad_norm": 0.0, - "learning_rate": 1.52923319927009e-05, - "loss": 0.8985, + "learning_rate": 1.5305158522369493e-05, + "loss": 0.9827, "step": 12081 }, { - "epoch": 0.34284903518728715, + "epoch": 0.3423729774150585, "grad_norm": 0.0, - "learning_rate": 1.529155215428835e-05, - "loss": 1.0167, + "learning_rate": 1.530438051299836e-05, + "loss": 0.9722, "step": 12082 }, { - "epoch": 0.34287741203178207, + "epoch": 0.342401314857321, "grad_norm": 0.0, - "learning_rate": 1.5290772271177818e-05, - "loss": 0.9661, + "learning_rate": 1.530360245894646e-05, + "loss": 0.9093, "step": 12083 }, { - "epoch": 0.342905788876277, + "epoch": 0.34242965229958344, "grad_norm": 0.0, - "learning_rate": 1.5289992343375897e-05, - "loss": 0.9781, + "learning_rate": 1.5302824360220352e-05, + "loss": 1.0109, "step": 12084 }, { - "epoch": 0.34293416572077184, + "epoch": 0.3424579897418459, "grad_norm": 0.0, - "learning_rate": 1.5289212370889174e-05, - "loss": 0.8527, + "learning_rate": 1.530204621682658e-05, + "loss": 1.0242, "step": 12085 }, { - "epoch": 0.34296254256526676, + "epoch": 0.3424863271841084, "grad_norm": 0.0, - "learning_rate": 1.5288432353724233e-05, - "loss": 0.9002, + "learning_rate": 1.5301268028771708e-05, + "loss": 0.8132, "step": 12086 }, { - "epoch": 0.3429909194097616, + "epoch": 0.3425146646263708, "grad_norm": 0.0, - "learning_rate": 1.5287652291887667e-05, - "loss": 0.9285, + "learning_rate": 1.5300489796062286e-05, + "loss": 0.9173, "step": 12087 }, { - "epoch": 0.34301929625425653, + "epoch": 0.3425430020686333, "grad_norm": 0.0, - "learning_rate": 1.5286872185386067e-05, - "loss": 0.9393, + "learning_rate": 1.5299711518704866e-05, + "loss": 0.8317, "step": 12088 }, { - "epoch": 0.3430476730987514, + "epoch": 0.34257133951089574, "grad_norm": 0.0, - "learning_rate": 1.5286092034226017e-05, - "loss": 0.9242, + "learning_rate": 1.529893319670601e-05, + "loss": 0.9525, "step": 12089 }, { - "epoch": 0.3430760499432463, + "epoch": 0.3425996769531582, "grad_norm": 0.0, - "learning_rate": 1.5285311838414114e-05, - "loss": 0.9161, + "learning_rate": 1.5298154830072274e-05, + "loss": 1.0149, "step": 12090 }, { - "epoch": 0.3431044267877412, + "epoch": 0.3426280143954207, "grad_norm": 0.0, - "learning_rate": 1.528453159795694e-05, - "loss": 0.8792, + "learning_rate": 1.529737641881021e-05, + "loss": 0.9422, "step": 12091 }, { - "epoch": 0.3431328036322361, + "epoch": 0.3426563518376831, "grad_norm": 0.0, - "learning_rate": 1.5283751312861093e-05, - "loss": 1.068, + "learning_rate": 1.5296597962926377e-05, + "loss": 0.9702, "step": 12092 }, { - "epoch": 0.343161180476731, + "epoch": 0.3426846892799456, "grad_norm": 0.0, - "learning_rate": 1.5282970983133155e-05, - "loss": 0.985, + "learning_rate": 1.5295819462427336e-05, + "loss": 0.997, "step": 12093 }, { - "epoch": 0.34318955732122586, + "epoch": 0.34271302672220805, "grad_norm": 0.0, - "learning_rate": 1.528219060877973e-05, - "loss": 0.9738, + "learning_rate": 1.5295040917319637e-05, + "loss": 0.955, "step": 12094 }, { - "epoch": 0.3432179341657208, + "epoch": 0.34274136416447054, "grad_norm": 0.0, - "learning_rate": 1.5281410189807405e-05, - "loss": 0.8889, + "learning_rate": 1.5294262327609843e-05, + "loss": 0.9208, "step": 12095 }, { - "epoch": 0.3432463110102157, + "epoch": 0.342769701606733, "grad_norm": 0.0, - "learning_rate": 1.5280629726222766e-05, - "loss": 0.9497, + "learning_rate": 1.5293483693304513e-05, + "loss": 0.8558, "step": 12096 }, { - "epoch": 0.34327468785471055, + "epoch": 0.3427980390489954, "grad_norm": 0.0, - "learning_rate": 1.527984921803241e-05, - "loss": 1.0056, + "learning_rate": 1.5292705014410206e-05, + "loss": 1.012, "step": 12097 }, { - "epoch": 0.34330306469920546, + "epoch": 0.3428263764912579, "grad_norm": 0.0, - "learning_rate": 1.5279068665242936e-05, - "loss": 0.9317, + "learning_rate": 1.5291926290933476e-05, + "loss": 0.9215, "step": 12098 }, { - "epoch": 0.3433314415437003, + "epoch": 0.34285471393352035, "grad_norm": 0.0, - "learning_rate": 1.5278288067860925e-05, - "loss": 0.9431, + "learning_rate": 1.5291147522880887e-05, + "loss": 0.8224, "step": 12099 }, { - "epoch": 0.34335981838819524, + "epoch": 0.34288305137578284, "grad_norm": 0.0, - "learning_rate": 1.527750742589298e-05, - "loss": 0.9613, + "learning_rate": 1.5290368710258998e-05, + "loss": 0.8995, "step": 12100 }, { - "epoch": 0.34338819523269015, + "epoch": 0.3429113888180453, "grad_norm": 0.0, - "learning_rate": 1.527672673934569e-05, - "loss": 1.0122, + "learning_rate": 1.5289589853074366e-05, + "loss": 1.0267, "step": 12101 }, { - "epoch": 0.343416572077185, + "epoch": 0.3429397262603077, "grad_norm": 0.0, - "learning_rate": 1.527594600822566e-05, - "loss": 0.9746, + "learning_rate": 1.5288810951333558e-05, + "loss": 1.0373, "step": 12102 }, { - "epoch": 0.3434449489216799, + "epoch": 0.3429680637025702, "grad_norm": 0.0, - "learning_rate": 1.527516523253947e-05, - "loss": 0.9981, + "learning_rate": 1.528803200504313e-05, + "loss": 0.938, "step": 12103 }, { - "epoch": 0.3434733257661748, + "epoch": 0.34299640114483265, "grad_norm": 0.0, - "learning_rate": 1.5274384412293724e-05, - "loss": 0.8325, + "learning_rate": 1.5287253014209645e-05, + "loss": 0.9383, "step": 12104 }, { - "epoch": 0.3435017026106697, + "epoch": 0.34302473858709515, "grad_norm": 0.0, - "learning_rate": 1.5273603547495016e-05, - "loss": 1.021, + "learning_rate": 1.5286473978839662e-05, + "loss": 0.9743, "step": 12105 }, { - "epoch": 0.34353007945516456, + "epoch": 0.3430530760293576, "grad_norm": 0.0, - "learning_rate": 1.527282263814994e-05, - "loss": 1.1122, + "learning_rate": 1.5285694898939748e-05, + "loss": 0.9546, "step": 12106 }, { - "epoch": 0.3435584562996595, + "epoch": 0.3430814134716201, "grad_norm": 0.0, - "learning_rate": 1.5272041684265095e-05, - "loss": 0.8884, + "learning_rate": 1.5284915774516465e-05, + "loss": 1.081, "step": 12107 }, { - "epoch": 0.3435868331441544, + "epoch": 0.3431097509138825, "grad_norm": 0.0, - "learning_rate": 1.5271260685847078e-05, - "loss": 0.8708, + "learning_rate": 1.5284136605576373e-05, + "loss": 0.92, "step": 12108 }, { - "epoch": 0.34361520998864925, + "epoch": 0.34313808835614495, "grad_norm": 0.0, - "learning_rate": 1.5270479642902484e-05, - "loss": 0.8896, + "learning_rate": 1.528335739212603e-05, + "loss": 0.9151, "step": 12109 }, { - "epoch": 0.34364358683314417, + "epoch": 0.34316642579840745, "grad_norm": 0.0, - "learning_rate": 1.5269698555437913e-05, - "loss": 0.9862, + "learning_rate": 1.5282578134172013e-05, + "loss": 0.9735, "step": 12110 }, { - "epoch": 0.343671963677639, + "epoch": 0.3431947632406699, "grad_norm": 0.0, - "learning_rate": 1.5268917423459958e-05, - "loss": 0.937, + "learning_rate": 1.5281798831720876e-05, + "loss": 0.8153, "step": 12111 }, { - "epoch": 0.34370034052213394, + "epoch": 0.3432231006829324, "grad_norm": 0.0, - "learning_rate": 1.5268136246975226e-05, - "loss": 0.9776, + "learning_rate": 1.528101948477919e-05, + "loss": 1.0574, "step": 12112 }, { - "epoch": 0.34372871736662886, + "epoch": 0.3432514381251948, "grad_norm": 0.0, - "learning_rate": 1.5267355025990304e-05, - "loss": 0.8703, + "learning_rate": 1.528024009335351e-05, + "loss": 1.0615, "step": 12113 }, { - "epoch": 0.3437570942111237, + "epoch": 0.34327977556745726, "grad_norm": 0.0, - "learning_rate": 1.52665737605118e-05, - "loss": 0.8737, + "learning_rate": 1.5279460657450408e-05, + "loss": 0.9298, "step": 12114 }, { - "epoch": 0.34378547105561863, + "epoch": 0.34330811300971975, "grad_norm": 0.0, - "learning_rate": 1.526579245054632e-05, - "loss": 0.8988, + "learning_rate": 1.527868117707645e-05, + "loss": 1.1375, "step": 12115 }, { - "epoch": 0.3438138479001135, + "epoch": 0.3433364504519822, "grad_norm": 0.0, - "learning_rate": 1.526501109610044e-05, - "loss": 0.8909, + "learning_rate": 1.52779016522382e-05, + "loss": 0.9478, "step": 12116 }, { - "epoch": 0.3438422247446084, + "epoch": 0.3433647878942447, "grad_norm": 0.0, - "learning_rate": 1.5264229697180782e-05, - "loss": 1.0817, + "learning_rate": 1.5277122082942225e-05, + "loss": 0.9981, "step": 12117 }, { - "epoch": 0.34387060158910326, + "epoch": 0.3433931253365071, "grad_norm": 0.0, - "learning_rate": 1.526344825379394e-05, - "loss": 0.8762, + "learning_rate": 1.527634246919509e-05, + "loss": 0.9915, "step": 12118 }, { - "epoch": 0.3438989784335982, + "epoch": 0.3434214627787696, "grad_norm": 0.0, - "learning_rate": 1.526266676594651e-05, - "loss": 1.0697, + "learning_rate": 1.5275562811003363e-05, + "loss": 0.8952, "step": 12119 }, { - "epoch": 0.3439273552780931, + "epoch": 0.34344980022103205, "grad_norm": 0.0, - "learning_rate": 1.5261885233645097e-05, - "loss": 0.97, + "learning_rate": 1.5274783108373612e-05, + "loss": 0.8108, "step": 12120 }, { - "epoch": 0.34395573212258795, + "epoch": 0.3434781376632945, "grad_norm": 0.0, - "learning_rate": 1.5261103656896304e-05, - "loss": 0.932, + "learning_rate": 1.5274003361312405e-05, + "loss": 0.8698, "step": 12121 }, { - "epoch": 0.34398410896708287, + "epoch": 0.343506475105557, "grad_norm": 0.0, - "learning_rate": 1.5260322035706735e-05, - "loss": 0.9359, + "learning_rate": 1.5273223569826305e-05, + "loss": 1.0227, "step": 12122 }, { - "epoch": 0.34401248581157773, + "epoch": 0.3435348125478194, "grad_norm": 0.0, - "learning_rate": 1.5259540370082984e-05, - "loss": 0.8331, + "learning_rate": 1.527244373392189e-05, + "loss": 0.9138, "step": 12123 }, { - "epoch": 0.34404086265607264, + "epoch": 0.3435631499900819, "grad_norm": 0.0, - "learning_rate": 1.5258758660031663e-05, - "loss": 0.9098, + "learning_rate": 1.5271663853605723e-05, + "loss": 0.9897, "step": 12124 }, { - "epoch": 0.34406923950056756, + "epoch": 0.34359148743234436, "grad_norm": 0.0, - "learning_rate": 1.5257976905559368e-05, - "loss": 0.9742, + "learning_rate": 1.5270883928884373e-05, + "loss": 0.9654, "step": 12125 }, { - "epoch": 0.3440976163450624, + "epoch": 0.3436198248746068, "grad_norm": 0.0, - "learning_rate": 1.5257195106672709e-05, - "loss": 0.97, + "learning_rate": 1.527010395976441e-05, + "loss": 0.9125, "step": 12126 }, { - "epoch": 0.34412599318955733, + "epoch": 0.3436481623168693, "grad_norm": 0.0, - "learning_rate": 1.5256413263378287e-05, - "loss": 0.963, + "learning_rate": 1.52693239462524e-05, + "loss": 0.9295, "step": 12127 }, { - "epoch": 0.3441543700340522, + "epoch": 0.3436764997591317, "grad_norm": 0.0, - "learning_rate": 1.5255631375682702e-05, - "loss": 0.82, + "learning_rate": 1.5268543888354923e-05, + "loss": 1.0061, "step": 12128 }, { - "epoch": 0.3441827468785471, + "epoch": 0.3437048372013942, "grad_norm": 0.0, - "learning_rate": 1.5254849443592568e-05, - "loss": 0.9248, + "learning_rate": 1.5267763786078544e-05, + "loss": 0.9731, "step": 12129 }, { - "epoch": 0.344211123723042, + "epoch": 0.34373317464365666, "grad_norm": 0.0, - "learning_rate": 1.5254067467114479e-05, - "loss": 0.9433, + "learning_rate": 1.5266983639429832e-05, + "loss": 0.9299, "step": 12130 }, { - "epoch": 0.3442395005675369, + "epoch": 0.34376151208591915, "grad_norm": 0.0, - "learning_rate": 1.5253285446255048e-05, - "loss": 0.8888, + "learning_rate": 1.526620344841536e-05, + "loss": 0.9172, "step": 12131 }, { - "epoch": 0.3442678774120318, + "epoch": 0.3437898495281816, "grad_norm": 0.0, - "learning_rate": 1.525250338102088e-05, - "loss": 1.0079, + "learning_rate": 1.526542321304171e-05, + "loss": 0.9887, "step": 12132 }, { - "epoch": 0.34429625425652666, + "epoch": 0.34381818697044403, "grad_norm": 0.0, - "learning_rate": 1.525172127141858e-05, - "loss": 0.8655, + "learning_rate": 1.5264642933315438e-05, + "loss": 0.9185, "step": 12133 }, { - "epoch": 0.34432463110102157, + "epoch": 0.3438465244127065, "grad_norm": 0.0, - "learning_rate": 1.525093911745475e-05, - "loss": 1.0115, + "learning_rate": 1.5263862609243122e-05, + "loss": 0.9207, "step": 12134 }, { - "epoch": 0.34435300794551643, + "epoch": 0.34387486185496896, "grad_norm": 0.0, - "learning_rate": 1.5250156919136005e-05, - "loss": 1.0022, + "learning_rate": 1.526308224083134e-05, + "loss": 1.0029, "step": 12135 }, { - "epoch": 0.34438138479001135, + "epoch": 0.34390319929723145, "grad_norm": 0.0, - "learning_rate": 1.524937467646895e-05, - "loss": 0.9941, + "learning_rate": 1.5262301828086657e-05, + "loss": 1.0336, "step": 12136 }, { - "epoch": 0.34440976163450626, + "epoch": 0.3439315367394939, "grad_norm": 0.0, - "learning_rate": 1.5248592389460189e-05, - "loss": 0.8901, + "learning_rate": 1.5261521371015657e-05, + "loss": 1.0411, "step": 12137 }, { - "epoch": 0.3444381384790011, + "epoch": 0.34395987418175633, "grad_norm": 0.0, - "learning_rate": 1.524781005811633e-05, - "loss": 0.9062, + "learning_rate": 1.5260740869624906e-05, + "loss": 0.9642, "step": 12138 }, { - "epoch": 0.34446651532349604, + "epoch": 0.3439882116240188, "grad_norm": 0.0, - "learning_rate": 1.5247027682443987e-05, - "loss": 0.9479, + "learning_rate": 1.525996032392098e-05, + "loss": 0.9396, "step": 12139 }, { - "epoch": 0.3444948921679909, + "epoch": 0.34401654906628126, "grad_norm": 0.0, - "learning_rate": 1.5246245262449763e-05, - "loss": 0.9511, + "learning_rate": 1.5259179733910458e-05, + "loss": 0.8483, "step": 12140 }, { - "epoch": 0.3445232690124858, + "epoch": 0.34404488650854376, "grad_norm": 0.0, - "learning_rate": 1.5245462798140272e-05, - "loss": 0.9855, + "learning_rate": 1.5258399099599909e-05, + "loss": 0.8194, "step": 12141 }, { - "epoch": 0.3445516458569807, + "epoch": 0.3440732239508062, "grad_norm": 0.0, - "learning_rate": 1.5244680289522118e-05, - "loss": 1.0461, + "learning_rate": 1.5257618420995917e-05, + "loss": 0.989, "step": 12142 }, { - "epoch": 0.3445800227014756, + "epoch": 0.3441015613930687, "grad_norm": 0.0, - "learning_rate": 1.5243897736601914e-05, - "loss": 0.9141, + "learning_rate": 1.5256837698105047e-05, + "loss": 0.9375, "step": 12143 }, { - "epoch": 0.3446083995459705, + "epoch": 0.3441298988353311, "grad_norm": 0.0, - "learning_rate": 1.5243115139386274e-05, - "loss": 0.9974, + "learning_rate": 1.5256056930933884e-05, + "loss": 0.8928, "step": 12144 }, { - "epoch": 0.34463677639046536, + "epoch": 0.34415823627759357, "grad_norm": 0.0, - "learning_rate": 1.5242332497881801e-05, - "loss": 0.9705, + "learning_rate": 1.5255276119489004e-05, + "loss": 0.991, "step": 12145 }, { - "epoch": 0.3446651532349603, + "epoch": 0.34418657371985606, "grad_norm": 0.0, - "learning_rate": 1.5241549812095114e-05, - "loss": 0.8673, + "learning_rate": 1.5254495263776979e-05, + "loss": 1.0661, "step": 12146 }, { - "epoch": 0.3446935300794552, + "epoch": 0.3442149111621185, "grad_norm": 0.0, - "learning_rate": 1.524076708203281e-05, - "loss": 0.9666, + "learning_rate": 1.5253714363804389e-05, + "loss": 0.9463, "step": 12147 }, { - "epoch": 0.34472190692395005, + "epoch": 0.344243248604381, "grad_norm": 0.0, - "learning_rate": 1.523998430770152e-05, - "loss": 0.9247, + "learning_rate": 1.5252933419577809e-05, + "loss": 0.9661, "step": 12148 }, { - "epoch": 0.34475028376844497, + "epoch": 0.34427158604664343, "grad_norm": 0.0, - "learning_rate": 1.5239201489107846e-05, - "loss": 0.9435, + "learning_rate": 1.5252152431103824e-05, + "loss": 0.9273, "step": 12149 }, { - "epoch": 0.3447786606129398, + "epoch": 0.34429992348890587, "grad_norm": 0.0, - "learning_rate": 1.5238418626258398e-05, - "loss": 1.0093, + "learning_rate": 1.5251371398389008e-05, + "loss": 0.983, "step": 12150 }, { - "epoch": 0.34480703745743474, + "epoch": 0.34432826093116836, "grad_norm": 0.0, - "learning_rate": 1.5237635719159795e-05, - "loss": 0.9227, + "learning_rate": 1.525059032143994e-05, + "loss": 0.8575, "step": 12151 }, { - "epoch": 0.3448354143019296, + "epoch": 0.3443565983734308, "grad_norm": 0.0, - "learning_rate": 1.5236852767818651e-05, - "loss": 0.9675, + "learning_rate": 1.5249809200263199e-05, + "loss": 0.949, "step": 12152 }, { - "epoch": 0.3448637911464245, + "epoch": 0.3443849358156933, "grad_norm": 0.0, - "learning_rate": 1.523606977224157e-05, - "loss": 0.9304, + "learning_rate": 1.5249028034865368e-05, + "loss": 0.9066, "step": 12153 }, { - "epoch": 0.34489216799091943, + "epoch": 0.34441327325795573, "grad_norm": 0.0, - "learning_rate": 1.5235286732435174e-05, - "loss": 0.9055, + "learning_rate": 1.5248246825253023e-05, + "loss": 0.8061, "step": 12154 }, { - "epoch": 0.3449205448354143, + "epoch": 0.3444416107002182, "grad_norm": 0.0, - "learning_rate": 1.5234503648406075e-05, - "loss": 1.0191, + "learning_rate": 1.5247465571432746e-05, + "loss": 0.9196, "step": 12155 }, { - "epoch": 0.3449489216799092, + "epoch": 0.34446994814248066, "grad_norm": 0.0, - "learning_rate": 1.523372052016089e-05, - "loss": 0.911, + "learning_rate": 1.5246684273411121e-05, + "loss": 0.9628, "step": 12156 }, { - "epoch": 0.34497729852440406, + "epoch": 0.3444982855847431, "grad_norm": 0.0, - "learning_rate": 1.5232937347706234e-05, - "loss": 0.9194, + "learning_rate": 1.5245902931194723e-05, + "loss": 1.0278, "step": 12157 }, { - "epoch": 0.345005675368899, + "epoch": 0.3445266230270056, "grad_norm": 0.0, - "learning_rate": 1.5232154131048718e-05, - "loss": 0.9057, + "learning_rate": 1.5245121544790138e-05, + "loss": 0.8242, "step": 12158 }, { - "epoch": 0.3450340522133939, + "epoch": 0.34455496046926803, "grad_norm": 0.0, - "learning_rate": 1.5231370870194964e-05, - "loss": 1.0247, + "learning_rate": 1.5244340114203946e-05, + "loss": 0.9541, "step": 12159 }, { - "epoch": 0.34506242905788875, + "epoch": 0.34458329791153053, "grad_norm": 0.0, - "learning_rate": 1.5230587565151582e-05, - "loss": 0.9729, + "learning_rate": 1.5243558639442728e-05, + "loss": 0.9571, "step": 12160 }, { - "epoch": 0.34509080590238367, + "epoch": 0.34461163535379297, "grad_norm": 0.0, - "learning_rate": 1.5229804215925192e-05, - "loss": 0.9457, + "learning_rate": 1.524277712051307e-05, + "loss": 0.9054, "step": 12161 }, { - "epoch": 0.34511918274687853, + "epoch": 0.3446399727960554, "grad_norm": 0.0, - "learning_rate": 1.5229020822522413e-05, - "loss": 0.9837, + "learning_rate": 1.5241995557421555e-05, + "loss": 0.9722, "step": 12162 }, { - "epoch": 0.34514755959137344, + "epoch": 0.3446683102383179, "grad_norm": 0.0, - "learning_rate": 1.5228237384949858e-05, - "loss": 0.9478, + "learning_rate": 1.5241213950174763e-05, + "loss": 0.8884, "step": 12163 }, { - "epoch": 0.34517593643586836, + "epoch": 0.34469664768058034, "grad_norm": 0.0, - "learning_rate": 1.5227453903214147e-05, - "loss": 0.9029, + "learning_rate": 1.5240432298779281e-05, + "loss": 0.8615, "step": 12164 }, { - "epoch": 0.3452043132803632, + "epoch": 0.34472498512284283, "grad_norm": 0.0, - "learning_rate": 1.5226670377321898e-05, - "loss": 0.9267, + "learning_rate": 1.5239650603241692e-05, + "loss": 0.9966, "step": 12165 }, { - "epoch": 0.34523269012485813, + "epoch": 0.34475332256510527, "grad_norm": 0.0, - "learning_rate": 1.522588680727973e-05, - "loss": 0.9565, + "learning_rate": 1.5238868863568577e-05, + "loss": 0.9196, "step": 12166 }, { - "epoch": 0.345261066969353, + "epoch": 0.3447816600073677, "grad_norm": 0.0, - "learning_rate": 1.5225103193094261e-05, - "loss": 0.9581, + "learning_rate": 1.5238087079766524e-05, + "loss": 0.8968, "step": 12167 }, { - "epoch": 0.3452894438138479, + "epoch": 0.3448099974496302, "grad_norm": 0.0, - "learning_rate": 1.5224319534772111e-05, - "loss": 0.9721, + "learning_rate": 1.5237305251842122e-05, + "loss": 0.8035, "step": 12168 }, { - "epoch": 0.34531782065834277, + "epoch": 0.34483833489189264, "grad_norm": 0.0, - "learning_rate": 1.5223535832319898e-05, - "loss": 0.9273, + "learning_rate": 1.5236523379801954e-05, + "loss": 0.8547, "step": 12169 }, { - "epoch": 0.3453461975028377, + "epoch": 0.34486667233415513, "grad_norm": 0.0, - "learning_rate": 1.5222752085744244e-05, - "loss": 0.9799, + "learning_rate": 1.5235741463652602e-05, + "loss": 0.9056, "step": 12170 }, { - "epoch": 0.3453745743473326, + "epoch": 0.34489500977641757, "grad_norm": 0.0, - "learning_rate": 1.5221968295051766e-05, - "loss": 0.9827, + "learning_rate": 1.5234959503400658e-05, + "loss": 0.8249, "step": 12171 }, { - "epoch": 0.34540295119182746, + "epoch": 0.34492334721868007, "grad_norm": 0.0, - "learning_rate": 1.522118446024909e-05, - "loss": 0.7911, + "learning_rate": 1.5234177499052703e-05, + "loss": 1.0072, "step": 12172 }, { - "epoch": 0.34543132803632237, + "epoch": 0.3449516846609425, "grad_norm": 0.0, - "learning_rate": 1.5220400581342833e-05, - "loss": 0.834, + "learning_rate": 1.5233395450615326e-05, + "loss": 0.8596, "step": 12173 }, { - "epoch": 0.34545970488081723, + "epoch": 0.34498002210320494, "grad_norm": 0.0, - "learning_rate": 1.5219616658339615e-05, - "loss": 0.9542, + "learning_rate": 1.5232613358095121e-05, + "loss": 0.9797, "step": 12174 }, { - "epoch": 0.34548808172531215, + "epoch": 0.34500835954546744, "grad_norm": 0.0, - "learning_rate": 1.5218832691246066e-05, - "loss": 0.9548, + "learning_rate": 1.5231831221498667e-05, + "loss": 0.9186, "step": 12175 }, { - "epoch": 0.34551645856980706, + "epoch": 0.3450366969877299, "grad_norm": 0.0, - "learning_rate": 1.52180486800688e-05, - "loss": 1.0161, + "learning_rate": 1.5231049040832556e-05, + "loss": 0.9578, "step": 12176 }, { - "epoch": 0.3455448354143019, + "epoch": 0.34506503442999237, "grad_norm": 0.0, - "learning_rate": 1.521726462481444e-05, - "loss": 1.0543, + "learning_rate": 1.5230266816103379e-05, + "loss": 0.9335, "step": 12177 }, { - "epoch": 0.34557321225879684, + "epoch": 0.3450933718722548, "grad_norm": 0.0, - "learning_rate": 1.5216480525489612e-05, - "loss": 0.9133, + "learning_rate": 1.5229484547317718e-05, + "loss": 1.0912, "step": 12178 }, { - "epoch": 0.3456015891032917, + "epoch": 0.34512170931451724, "grad_norm": 0.0, - "learning_rate": 1.5215696382100941e-05, - "loss": 0.998, + "learning_rate": 1.5228702234482172e-05, + "loss": 0.8581, "step": 12179 }, { - "epoch": 0.3456299659477866, + "epoch": 0.34515004675677974, "grad_norm": 0.0, - "learning_rate": 1.5214912194655048e-05, - "loss": 0.9678, + "learning_rate": 1.522791987760332e-05, + "loss": 0.8969, "step": 12180 }, { - "epoch": 0.3456583427922815, + "epoch": 0.3451783841990422, "grad_norm": 0.0, - "learning_rate": 1.5214127963158553e-05, - "loss": 0.9132, + "learning_rate": 1.522713747668776e-05, + "loss": 0.8945, "step": 12181 }, { - "epoch": 0.3456867196367764, + "epoch": 0.34520672164130467, "grad_norm": 0.0, - "learning_rate": 1.521334368761809e-05, - "loss": 0.9034, + "learning_rate": 1.5226355031742081e-05, + "loss": 0.9192, "step": 12182 }, { - "epoch": 0.3457150964812713, + "epoch": 0.3452350590835671, "grad_norm": 0.0, - "learning_rate": 1.5212559368040278e-05, - "loss": 0.9177, + "learning_rate": 1.5225572542772874e-05, + "loss": 0.9588, "step": 12183 }, { - "epoch": 0.34574347332576616, + "epoch": 0.3452633965258296, "grad_norm": 0.0, - "learning_rate": 1.521177500443174e-05, - "loss": 0.8323, + "learning_rate": 1.5224790009786725e-05, + "loss": 0.9864, "step": 12184 }, { - "epoch": 0.3457718501702611, + "epoch": 0.34529173396809204, "grad_norm": 0.0, - "learning_rate": 1.5210990596799103e-05, - "loss": 0.959, + "learning_rate": 1.5224007432790234e-05, + "loss": 0.9808, "step": 12185 }, { - "epoch": 0.34580022701475593, + "epoch": 0.3453200714103545, "grad_norm": 0.0, - "learning_rate": 1.5210206145148996e-05, - "loss": 0.8902, + "learning_rate": 1.5223224811789986e-05, + "loss": 1.0759, "step": 12186 }, { - "epoch": 0.34582860385925085, + "epoch": 0.345348408852617, "grad_norm": 0.0, - "learning_rate": 1.5209421649488045e-05, - "loss": 0.9676, + "learning_rate": 1.5222442146792573e-05, + "loss": 1.0267, "step": 12187 }, { - "epoch": 0.34585698070374576, + "epoch": 0.3453767462948794, "grad_norm": 0.0, - "learning_rate": 1.5208637109822873e-05, - "loss": 0.8889, + "learning_rate": 1.5221659437804594e-05, + "loss": 0.9335, "step": 12188 }, { - "epoch": 0.3458853575482406, + "epoch": 0.3454050837371419, "grad_norm": 0.0, - "learning_rate": 1.5207852526160112e-05, - "loss": 0.968, + "learning_rate": 1.522087668483264e-05, + "loss": 0.9128, "step": 12189 }, { - "epoch": 0.34591373439273554, + "epoch": 0.34543342117940434, "grad_norm": 0.0, - "learning_rate": 1.5207067898506384e-05, - "loss": 1.045, + "learning_rate": 1.5220093887883301e-05, + "loss": 0.9695, "step": 12190 }, { - "epoch": 0.3459421112372304, + "epoch": 0.3454617586216668, "grad_norm": 0.0, - "learning_rate": 1.520628322686832e-05, - "loss": 0.9187, + "learning_rate": 1.5219311046963174e-05, + "loss": 1.0297, "step": 12191 }, { - "epoch": 0.3459704880817253, + "epoch": 0.3454900960639293, "grad_norm": 0.0, - "learning_rate": 1.5205498511252548e-05, - "loss": 1.0996, + "learning_rate": 1.521852816207885e-05, + "loss": 1.0324, "step": 12192 }, { - "epoch": 0.34599886492622023, + "epoch": 0.3455184335061917, "grad_norm": 0.0, - "learning_rate": 1.5204713751665697e-05, - "loss": 1.0123, + "learning_rate": 1.5217745233236922e-05, + "loss": 0.9022, "step": 12193 }, { - "epoch": 0.3460272417707151, + "epoch": 0.3455467709484542, "grad_norm": 0.0, - "learning_rate": 1.520392894811439e-05, - "loss": 0.9809, + "learning_rate": 1.5216962260443994e-05, + "loss": 0.8764, "step": 12194 }, { - "epoch": 0.34605561861521, + "epoch": 0.34557510839071665, "grad_norm": 0.0, - "learning_rate": 1.5203144100605267e-05, - "loss": 1.1122, + "learning_rate": 1.5216179243706655e-05, + "loss": 0.9352, "step": 12195 }, { - "epoch": 0.34608399545970486, + "epoch": 0.34560344583297914, "grad_norm": 0.0, - "learning_rate": 1.520235920914495e-05, - "loss": 0.9914, + "learning_rate": 1.52153961830315e-05, + "loss": 0.9281, "step": 12196 }, { - "epoch": 0.3461123723041998, + "epoch": 0.3456317832752416, "grad_norm": 0.0, - "learning_rate": 1.520157427374007e-05, - "loss": 0.9084, + "learning_rate": 1.5214613078425126e-05, + "loss": 1.0221, "step": 12197 }, { - "epoch": 0.34614074914869464, + "epoch": 0.345660120717504, "grad_norm": 0.0, - "learning_rate": 1.520078929439726e-05, - "loss": 0.8293, + "learning_rate": 1.5213829929894131e-05, + "loss": 0.9547, "step": 12198 }, { - "epoch": 0.34616912599318955, + "epoch": 0.3456884581597665, "grad_norm": 0.0, - "learning_rate": 1.520000427112315e-05, - "loss": 0.8868, + "learning_rate": 1.5213046737445108e-05, + "loss": 0.9229, "step": 12199 }, { - "epoch": 0.34619750283768447, + "epoch": 0.34571679560202895, "grad_norm": 0.0, - "learning_rate": 1.5199219203924367e-05, - "loss": 0.8848, + "learning_rate": 1.5212263501084658e-05, + "loss": 0.985, "step": 12200 }, { - "epoch": 0.3462258796821793, + "epoch": 0.34574513304429144, "grad_norm": 0.0, - "learning_rate": 1.5198434092807546e-05, - "loss": 0.9541, + "learning_rate": 1.5211480220819377e-05, + "loss": 0.9174, "step": 12201 }, { - "epoch": 0.34625425652667424, + "epoch": 0.3457734704865539, "grad_norm": 0.0, - "learning_rate": 1.5197648937779319e-05, - "loss": 0.8717, + "learning_rate": 1.5210696896655863e-05, + "loss": 0.9703, "step": 12202 }, { - "epoch": 0.3462826333711691, + "epoch": 0.3458018079288163, "grad_norm": 0.0, - "learning_rate": 1.5196863738846319e-05, - "loss": 0.9533, + "learning_rate": 1.5209913528600715e-05, + "loss": 1.0961, "step": 12203 }, { - "epoch": 0.346311010215664, + "epoch": 0.3458301453710788, "grad_norm": 0.0, - "learning_rate": 1.5196078496015174e-05, - "loss": 0.9615, + "learning_rate": 1.5209130116660532e-05, + "loss": 0.988, "step": 12204 }, { - "epoch": 0.34633938706015893, + "epoch": 0.34585848281334125, "grad_norm": 0.0, - "learning_rate": 1.5195293209292525e-05, - "loss": 0.92, + "learning_rate": 1.5208346660841908e-05, + "loss": 0.803, "step": 12205 }, { - "epoch": 0.3463677639046538, + "epoch": 0.34588682025560374, "grad_norm": 0.0, - "learning_rate": 1.5194507878684999e-05, - "loss": 1.0433, + "learning_rate": 1.5207563161151444e-05, + "loss": 0.9575, "step": 12206 }, { - "epoch": 0.3463961407491487, + "epoch": 0.3459151576978662, "grad_norm": 0.0, - "learning_rate": 1.5193722504199232e-05, - "loss": 1.0147, + "learning_rate": 1.5206779617595748e-05, + "loss": 0.9089, "step": 12207 }, { - "epoch": 0.34642451759364357, + "epoch": 0.3459434951401287, "grad_norm": 0.0, - "learning_rate": 1.5192937085841854e-05, - "loss": 1.0074, + "learning_rate": 1.520599603018141e-05, + "loss": 0.8523, "step": 12208 }, { - "epoch": 0.3464528944381385, + "epoch": 0.3459718325823911, "grad_norm": 0.0, - "learning_rate": 1.5192151623619506e-05, - "loss": 0.9589, + "learning_rate": 1.5205212398915034e-05, + "loss": 0.9896, "step": 12209 }, { - "epoch": 0.3464812712826334, + "epoch": 0.34600017002465355, "grad_norm": 0.0, - "learning_rate": 1.5191366117538822e-05, - "loss": 0.9019, + "learning_rate": 1.5204428723803224e-05, + "loss": 1.0224, "step": 12210 }, { - "epoch": 0.34650964812712826, + "epoch": 0.34602850746691605, "grad_norm": 0.0, - "learning_rate": 1.5190580567606433e-05, - "loss": 1.0348, + "learning_rate": 1.5203645004852577e-05, + "loss": 0.8599, "step": 12211 }, { - "epoch": 0.34653802497162317, + "epoch": 0.3460568449091785, "grad_norm": 0.0, - "learning_rate": 1.5189794973828975e-05, - "loss": 0.9561, + "learning_rate": 1.5202861242069693e-05, + "loss": 1.0588, "step": 12212 }, { - "epoch": 0.34656640181611803, + "epoch": 0.346085182351441, "grad_norm": 0.0, - "learning_rate": 1.5189009336213087e-05, - "loss": 0.9453, + "learning_rate": 1.5202077435461178e-05, + "loss": 0.9555, "step": 12213 }, { - "epoch": 0.34659477866061295, + "epoch": 0.3461135197937034, "grad_norm": 0.0, - "learning_rate": 1.5188223654765404e-05, - "loss": 0.921, + "learning_rate": 1.5201293585033634e-05, + "loss": 0.9876, "step": 12214 }, { - "epoch": 0.3466231555051078, + "epoch": 0.34614185723596586, "grad_norm": 0.0, - "learning_rate": 1.5187437929492561e-05, - "loss": 0.841, + "learning_rate": 1.5200509690793665e-05, + "loss": 0.8718, "step": 12215 }, { - "epoch": 0.3466515323496027, + "epoch": 0.34617019467822835, "grad_norm": 0.0, - "learning_rate": 1.5186652160401196e-05, - "loss": 0.9457, + "learning_rate": 1.5199725752747871e-05, + "loss": 0.9413, "step": 12216 }, { - "epoch": 0.34667990919409764, + "epoch": 0.3461985321204908, "grad_norm": 0.0, - "learning_rate": 1.518586634749795e-05, - "loss": 0.8877, + "learning_rate": 1.519894177090285e-05, + "loss": 0.9136, "step": 12217 }, { - "epoch": 0.3467082860385925, + "epoch": 0.3462268695627533, "grad_norm": 0.0, - "learning_rate": 1.5185080490789457e-05, - "loss": 0.9928, + "learning_rate": 1.519815774526522e-05, + "loss": 1.0036, "step": 12218 }, { - "epoch": 0.3467366628830874, + "epoch": 0.3462552070050157, "grad_norm": 0.0, - "learning_rate": 1.5184294590282356e-05, - "loss": 0.963, + "learning_rate": 1.5197373675841572e-05, + "loss": 0.9309, "step": 12219 }, { - "epoch": 0.34676503972758227, + "epoch": 0.3462835444472782, "grad_norm": 0.0, - "learning_rate": 1.5183508645983285e-05, - "loss": 1.175, + "learning_rate": 1.5196589562638518e-05, + "loss": 0.8734, "step": 12220 }, { - "epoch": 0.3467934165720772, + "epoch": 0.34631188188954065, "grad_norm": 0.0, - "learning_rate": 1.5182722657898882e-05, - "loss": 0.9609, + "learning_rate": 1.5195805405662655e-05, + "loss": 0.9789, "step": 12221 }, { - "epoch": 0.3468217934165721, + "epoch": 0.3463402193318031, "grad_norm": 0.0, - "learning_rate": 1.5181936626035791e-05, - "loss": 1.0167, + "learning_rate": 1.51950212049206e-05, + "loss": 0.9462, "step": 12222 }, { - "epoch": 0.34685017026106696, + "epoch": 0.3463685567740656, "grad_norm": 0.0, - "learning_rate": 1.5181150550400647e-05, - "loss": 0.9245, + "learning_rate": 1.519423696041895e-05, + "loss": 1.0181, "step": 12223 }, { - "epoch": 0.3468785471055619, + "epoch": 0.346396894216328, "grad_norm": 0.0, - "learning_rate": 1.5180364431000092e-05, - "loss": 0.9887, + "learning_rate": 1.5193452672164316e-05, + "loss": 0.9488, "step": 12224 }, { - "epoch": 0.34690692395005673, + "epoch": 0.3464252316585905, "grad_norm": 0.0, - "learning_rate": 1.5179578267840762e-05, - "loss": 0.8426, + "learning_rate": 1.5192668340163299e-05, + "loss": 1.0793, "step": 12225 }, { - "epoch": 0.34693530079455165, + "epoch": 0.34645356910085295, "grad_norm": 0.0, - "learning_rate": 1.5178792060929307e-05, - "loss": 0.8728, + "learning_rate": 1.5191883964422504e-05, + "loss": 0.8851, "step": 12226 }, { - "epoch": 0.34696367763904656, + "epoch": 0.3464819065431154, "grad_norm": 0.0, - "learning_rate": 1.517800581027236e-05, - "loss": 0.9204, + "learning_rate": 1.5191099544948552e-05, + "loss": 1.0387, "step": 12227 }, { - "epoch": 0.3469920544835414, + "epoch": 0.3465102439853779, "grad_norm": 0.0, - "learning_rate": 1.5177219515876565e-05, - "loss": 1.002, + "learning_rate": 1.5190315081748033e-05, + "loss": 0.9748, "step": 12228 }, { - "epoch": 0.34702043132803634, + "epoch": 0.3465385814276403, "grad_norm": 0.0, - "learning_rate": 1.5176433177748564e-05, - "loss": 0.9046, + "learning_rate": 1.5189530574827567e-05, + "loss": 0.9193, "step": 12229 }, { - "epoch": 0.3470488081725312, + "epoch": 0.3465669188699028, "grad_norm": 0.0, - "learning_rate": 1.5175646795895e-05, - "loss": 0.8596, + "learning_rate": 1.5188746024193756e-05, + "loss": 0.9256, "step": 12230 }, { - "epoch": 0.3470771850170261, + "epoch": 0.34659525631216526, "grad_norm": 0.0, - "learning_rate": 1.5174860370322515e-05, - "loss": 0.8937, + "learning_rate": 1.5187961429853211e-05, + "loss": 0.9836, "step": 12231 }, { - "epoch": 0.347105561861521, + "epoch": 0.34662359375442775, "grad_norm": 0.0, - "learning_rate": 1.5174073901037749e-05, - "loss": 0.9454, + "learning_rate": 1.5187176791812539e-05, + "loss": 1.0011, "step": 12232 }, { - "epoch": 0.3471339387060159, + "epoch": 0.3466519311966902, "grad_norm": 0.0, - "learning_rate": 1.517328738804735e-05, - "loss": 1.0313, + "learning_rate": 1.5186392110078353e-05, + "loss": 0.8595, "step": 12233 }, { - "epoch": 0.3471623155505108, + "epoch": 0.3466802686389526, "grad_norm": 0.0, - "learning_rate": 1.5172500831357963e-05, - "loss": 0.7902, + "learning_rate": 1.5185607384657257e-05, + "loss": 0.9074, "step": 12234 }, { - "epoch": 0.34719069239500566, + "epoch": 0.3467086060812151, "grad_norm": 0.0, - "learning_rate": 1.5171714230976224e-05, - "loss": 0.8441, + "learning_rate": 1.5184822615555867e-05, + "loss": 0.9026, "step": 12235 }, { - "epoch": 0.3472190692395006, + "epoch": 0.34673694352347756, "grad_norm": 0.0, - "learning_rate": 1.5170927586908787e-05, - "loss": 0.8222, + "learning_rate": 1.5184037802780792e-05, + "loss": 0.8981, "step": 12236 }, { - "epoch": 0.34724744608399544, + "epoch": 0.34676528096574005, "grad_norm": 0.0, - "learning_rate": 1.5170140899162289e-05, - "loss": 0.9647, + "learning_rate": 1.518325294633864e-05, + "loss": 1.014, "step": 12237 }, { - "epoch": 0.34727582292849035, + "epoch": 0.3467936184080025, "grad_norm": 0.0, - "learning_rate": 1.516935416774338e-05, - "loss": 0.9277, + "learning_rate": 1.518246804623602e-05, + "loss": 0.9804, "step": 12238 }, { - "epoch": 0.34730419977298527, + "epoch": 0.34682195585026493, "grad_norm": 0.0, - "learning_rate": 1.5168567392658701e-05, - "loss": 0.8911, + "learning_rate": 1.5181683102479553e-05, + "loss": 0.9564, "step": 12239 }, { - "epoch": 0.3473325766174801, + "epoch": 0.3468502932925274, "grad_norm": 0.0, - "learning_rate": 1.5167780573914903e-05, - "loss": 0.8413, + "learning_rate": 1.5180898115075841e-05, + "loss": 0.8395, "step": 12240 }, { - "epoch": 0.34736095346197504, + "epoch": 0.34687863073478986, "grad_norm": 0.0, - "learning_rate": 1.5166993711518631e-05, - "loss": 0.9268, + "learning_rate": 1.5180113084031502e-05, + "loss": 0.9882, "step": 12241 }, { - "epoch": 0.3473893303064699, + "epoch": 0.34690696817705235, "grad_norm": 0.0, - "learning_rate": 1.516620680547653e-05, - "loss": 1.1222, + "learning_rate": 1.5179328009353147e-05, + "loss": 0.9894, "step": 12242 }, { - "epoch": 0.3474177071509648, + "epoch": 0.3469353056193148, "grad_norm": 0.0, - "learning_rate": 1.516541985579525e-05, - "loss": 0.9011, + "learning_rate": 1.517854289104739e-05, + "loss": 0.9901, "step": 12243 }, { - "epoch": 0.34744608399545973, + "epoch": 0.3469636430615773, "grad_norm": 0.0, - "learning_rate": 1.5164632862481434e-05, - "loss": 0.9002, + "learning_rate": 1.5177757729120841e-05, + "loss": 0.9082, "step": 12244 }, { - "epoch": 0.3474744608399546, + "epoch": 0.3469919805038397, "grad_norm": 0.0, - "learning_rate": 1.516384582554173e-05, - "loss": 0.9231, + "learning_rate": 1.5176972523580115e-05, + "loss": 0.9724, "step": 12245 }, { - "epoch": 0.3475028376844495, + "epoch": 0.34702031794610216, "grad_norm": 0.0, - "learning_rate": 1.5163058744982793e-05, - "loss": 1.01, + "learning_rate": 1.517618727443183e-05, + "loss": 0.8645, "step": 12246 }, { - "epoch": 0.34753121452894437, + "epoch": 0.34704865538836466, "grad_norm": 0.0, - "learning_rate": 1.5162271620811262e-05, - "loss": 1.0219, + "learning_rate": 1.5175401981682597e-05, + "loss": 0.9547, "step": 12247 }, { - "epoch": 0.3475595913734393, + "epoch": 0.3470769928306271, "grad_norm": 0.0, - "learning_rate": 1.516148445303379e-05, - "loss": 0.8734, + "learning_rate": 1.5174616645339031e-05, + "loss": 1.0012, "step": 12248 }, { - "epoch": 0.34758796821793414, + "epoch": 0.3471053302728896, "grad_norm": 0.0, - "learning_rate": 1.5160697241657032e-05, - "loss": 1.0398, + "learning_rate": 1.5173831265407749e-05, + "loss": 1.0157, "step": 12249 }, { - "epoch": 0.34761634506242906, + "epoch": 0.34713366771515203, "grad_norm": 0.0, - "learning_rate": 1.5159909986687631e-05, - "loss": 0.959, + "learning_rate": 1.5173045841895362e-05, + "loss": 1.0256, "step": 12250 }, { - "epoch": 0.34764472190692397, + "epoch": 0.34716200515741447, "grad_norm": 0.0, - "learning_rate": 1.5159122688132235e-05, - "loss": 0.9307, + "learning_rate": 1.5172260374808492e-05, + "loss": 0.8691, "step": 12251 }, { - "epoch": 0.34767309875141883, + "epoch": 0.34719034259967696, "grad_norm": 0.0, - "learning_rate": 1.5158335345997502e-05, - "loss": 0.9519, + "learning_rate": 1.5171474864153747e-05, + "loss": 0.9643, "step": 12252 }, { - "epoch": 0.34770147559591374, + "epoch": 0.3472186800419394, "grad_norm": 0.0, - "learning_rate": 1.5157547960290077e-05, - "loss": 0.9052, + "learning_rate": 1.5170689309937751e-05, + "loss": 1.0455, "step": 12253 }, { - "epoch": 0.3477298524404086, + "epoch": 0.3472470174842019, "grad_norm": 0.0, - "learning_rate": 1.515676053101661e-05, - "loss": 0.8808, + "learning_rate": 1.5169903712167121e-05, + "loss": 0.8381, "step": 12254 }, { - "epoch": 0.3477582292849035, + "epoch": 0.34727535492646433, "grad_norm": 0.0, - "learning_rate": 1.5155973058183757e-05, - "loss": 0.9252, + "learning_rate": 1.5169118070848473e-05, + "loss": 0.9005, "step": 12255 }, { - "epoch": 0.34778660612939843, + "epoch": 0.3473036923687268, "grad_norm": 0.0, - "learning_rate": 1.515518554179817e-05, - "loss": 1.0422, + "learning_rate": 1.5168332385988422e-05, + "loss": 0.9308, "step": 12256 }, { - "epoch": 0.3478149829738933, + "epoch": 0.34733202981098926, "grad_norm": 0.0, - "learning_rate": 1.5154397981866495e-05, - "loss": 0.8994, + "learning_rate": 1.5167546657593587e-05, + "loss": 0.8653, "step": 12257 }, { - "epoch": 0.3478433598183882, + "epoch": 0.3473603672532517, "grad_norm": 0.0, - "learning_rate": 1.5153610378395392e-05, - "loss": 1.0262, + "learning_rate": 1.5166760885670591e-05, + "loss": 0.9008, "step": 12258 }, { - "epoch": 0.34787173666288307, + "epoch": 0.3473887046955142, "grad_norm": 0.0, - "learning_rate": 1.5152822731391508e-05, - "loss": 0.9487, + "learning_rate": 1.5165975070226045e-05, + "loss": 0.9114, "step": 12259 }, { - "epoch": 0.347900113507378, + "epoch": 0.34741704213777663, "grad_norm": 0.0, - "learning_rate": 1.5152035040861499e-05, - "loss": 0.9442, + "learning_rate": 1.5165189211266573e-05, + "loss": 0.8464, "step": 12260 }, { - "epoch": 0.3479284903518729, + "epoch": 0.3474453795800391, "grad_norm": 0.0, - "learning_rate": 1.515124730681202e-05, - "loss": 0.8938, + "learning_rate": 1.5164403308798798e-05, + "loss": 0.9625, "step": 12261 }, { - "epoch": 0.34795686719636776, + "epoch": 0.34747371702230156, "grad_norm": 0.0, - "learning_rate": 1.515045952924972e-05, - "loss": 0.9116, + "learning_rate": 1.5163617362829338e-05, + "loss": 0.953, "step": 12262 }, { - "epoch": 0.3479852440408627, + "epoch": 0.347502054464564, "grad_norm": 0.0, - "learning_rate": 1.514967170818126e-05, - "loss": 0.9391, + "learning_rate": 1.5162831373364806e-05, + "loss": 1.052, "step": 12263 }, { - "epoch": 0.34801362088535753, + "epoch": 0.3475303919068265, "grad_norm": 0.0, - "learning_rate": 1.514888384361329e-05, - "loss": 0.9768, + "learning_rate": 1.5162045340411826e-05, + "loss": 1.0395, "step": 12264 }, { - "epoch": 0.34804199772985245, + "epoch": 0.34755872934908894, "grad_norm": 0.0, - "learning_rate": 1.5148095935552469e-05, - "loss": 1.0163, + "learning_rate": 1.5161259263977028e-05, + "loss": 0.8682, "step": 12265 }, { - "epoch": 0.3480703745743473, + "epoch": 0.34758706679135143, "grad_norm": 0.0, - "learning_rate": 1.514730798400545e-05, - "loss": 0.8799, + "learning_rate": 1.5160473144067026e-05, + "loss": 0.919, "step": 12266 }, { - "epoch": 0.3480987514188422, + "epoch": 0.34761540423361387, "grad_norm": 0.0, - "learning_rate": 1.5146519988978888e-05, - "loss": 0.9602, + "learning_rate": 1.5159686980688438e-05, + "loss": 0.9735, "step": 12267 }, { - "epoch": 0.34812712826333714, + "epoch": 0.34764374167587636, "grad_norm": 0.0, - "learning_rate": 1.514573195047944e-05, - "loss": 0.8941, + "learning_rate": 1.5158900773847891e-05, + "loss": 1.002, "step": 12268 }, { - "epoch": 0.348155505107832, + "epoch": 0.3476720791181388, "grad_norm": 0.0, - "learning_rate": 1.5144943868513764e-05, - "loss": 0.992, + "learning_rate": 1.5158114523552011e-05, + "loss": 0.9628, "step": 12269 }, { - "epoch": 0.3481838819523269, + "epoch": 0.34770041656040124, "grad_norm": 0.0, - "learning_rate": 1.5144155743088513e-05, - "loss": 0.9152, + "learning_rate": 1.5157328229807412e-05, + "loss": 0.99, "step": 12270 }, { - "epoch": 0.34821225879682177, + "epoch": 0.34772875400266373, "grad_norm": 0.0, - "learning_rate": 1.514336757421035e-05, - "loss": 0.9254, + "learning_rate": 1.5156541892620725e-05, + "loss": 0.947, "step": 12271 }, { - "epoch": 0.3482406356413167, + "epoch": 0.34775709144492617, "grad_norm": 0.0, - "learning_rate": 1.5142579361885927e-05, - "loss": 0.929, + "learning_rate": 1.515575551199857e-05, + "loss": 0.9413, "step": 12272 }, { - "epoch": 0.3482690124858116, + "epoch": 0.34778542888718866, "grad_norm": 0.0, - "learning_rate": 1.5141791106121909e-05, - "loss": 0.8573, + "learning_rate": 1.5154969087947575e-05, + "loss": 0.9671, "step": 12273 }, { - "epoch": 0.34829738933030646, + "epoch": 0.3478137663294511, "grad_norm": 0.0, - "learning_rate": 1.5141002806924948e-05, - "loss": 0.8822, + "learning_rate": 1.5154182620474359e-05, + "loss": 0.8945, "step": 12274 }, { - "epoch": 0.3483257661748014, + "epoch": 0.34784210377171354, "grad_norm": 0.0, - "learning_rate": 1.5140214464301704e-05, - "loss": 0.8511, + "learning_rate": 1.5153396109585547e-05, + "loss": 1.0455, "step": 12275 }, { - "epoch": 0.34835414301929624, + "epoch": 0.34787044121397603, "grad_norm": 0.0, - "learning_rate": 1.5139426078258842e-05, - "loss": 0.83, + "learning_rate": 1.5152609555287767e-05, + "loss": 0.9479, "step": 12276 }, { - "epoch": 0.34838251986379115, + "epoch": 0.34789877865623847, "grad_norm": 0.0, - "learning_rate": 1.5138637648803011e-05, - "loss": 0.9413, + "learning_rate": 1.5151822957587645e-05, + "loss": 1.0159, "step": 12277 }, { - "epoch": 0.348410896708286, + "epoch": 0.34792711609850097, "grad_norm": 0.0, - "learning_rate": 1.5137849175940881e-05, - "loss": 0.9961, + "learning_rate": 1.5151036316491805e-05, + "loss": 0.9307, "step": 12278 }, { - "epoch": 0.3484392735527809, + "epoch": 0.3479554535407634, "grad_norm": 0.0, - "learning_rate": 1.5137060659679104e-05, - "loss": 0.9959, + "learning_rate": 1.5150249632006871e-05, + "loss": 0.9305, "step": 12279 }, { - "epoch": 0.34846765039727584, + "epoch": 0.3479837909830259, "grad_norm": 0.0, - "learning_rate": 1.513627210002435e-05, - "loss": 0.9293, + "learning_rate": 1.5149462904139474e-05, + "loss": 1.0145, "step": 12280 }, { - "epoch": 0.3484960272417707, + "epoch": 0.34801212842528834, "grad_norm": 0.0, - "learning_rate": 1.5135483496983273e-05, - "loss": 1.0329, + "learning_rate": 1.5148676132896238e-05, + "loss": 0.9566, "step": 12281 }, { - "epoch": 0.3485244040862656, + "epoch": 0.3480404658675508, "grad_norm": 0.0, - "learning_rate": 1.5134694850562533e-05, - "loss": 0.9716, + "learning_rate": 1.5147889318283793e-05, + "loss": 0.965, "step": 12282 }, { - "epoch": 0.3485527809307605, + "epoch": 0.34806880330981327, "grad_norm": 0.0, - "learning_rate": 1.5133906160768797e-05, - "loss": 0.8366, + "learning_rate": 1.5147102460308757e-05, + "loss": 0.8919, "step": 12283 }, { - "epoch": 0.3485811577752554, + "epoch": 0.3480971407520757, "grad_norm": 0.0, - "learning_rate": 1.5133117427608726e-05, - "loss": 0.8767, + "learning_rate": 1.5146315558977773e-05, + "loss": 0.847, "step": 12284 }, { - "epoch": 0.3486095346197503, + "epoch": 0.3481254781943382, "grad_norm": 0.0, - "learning_rate": 1.5132328651088979e-05, - "loss": 1.057, + "learning_rate": 1.514552861429746e-05, + "loss": 1.0659, "step": 12285 }, { - "epoch": 0.34863791146424516, + "epoch": 0.34815381563660064, "grad_norm": 0.0, - "learning_rate": 1.513153983121622e-05, - "loss": 0.7888, + "learning_rate": 1.5144741626274448e-05, + "loss": 0.9001, "step": 12286 }, { - "epoch": 0.3486662883087401, + "epoch": 0.3481821530788631, "grad_norm": 0.0, - "learning_rate": 1.5130750967997117e-05, - "loss": 1.0201, + "learning_rate": 1.514395459491537e-05, + "loss": 0.9557, "step": 12287 }, { - "epoch": 0.34869466515323494, + "epoch": 0.34821049052112557, "grad_norm": 0.0, - "learning_rate": 1.512996206143833e-05, - "loss": 0.9846, + "learning_rate": 1.5143167520226849e-05, + "loss": 0.9012, "step": 12288 }, { - "epoch": 0.34872304199772985, + "epoch": 0.348238827963388, "grad_norm": 0.0, - "learning_rate": 1.5129173111546518e-05, - "loss": 0.9224, + "learning_rate": 1.5142380402215519e-05, + "loss": 0.9303, "step": 12289 }, { - "epoch": 0.34875141884222477, + "epoch": 0.3482671654056505, "grad_norm": 0.0, - "learning_rate": 1.5128384118328355e-05, - "loss": 0.9565, + "learning_rate": 1.514159324088801e-05, + "loss": 0.9668, "step": 12290 }, { - "epoch": 0.34877979568671963, + "epoch": 0.34829550284791294, "grad_norm": 0.0, - "learning_rate": 1.5127595081790501e-05, - "loss": 0.8846, + "learning_rate": 1.5140806036250952e-05, + "loss": 0.9245, "step": 12291 }, { - "epoch": 0.34880817253121454, + "epoch": 0.34832384029017543, "grad_norm": 0.0, - "learning_rate": 1.5126806001939618e-05, - "loss": 0.9818, + "learning_rate": 1.5140018788310974e-05, + "loss": 1.006, "step": 12292 }, { - "epoch": 0.3488365493757094, + "epoch": 0.3483521777324379, "grad_norm": 0.0, - "learning_rate": 1.5126016878782372e-05, - "loss": 0.8112, + "learning_rate": 1.5139231497074711e-05, + "loss": 0.9783, "step": 12293 }, { - "epoch": 0.3488649262202043, + "epoch": 0.3483805151747003, "grad_norm": 0.0, - "learning_rate": 1.5125227712325436e-05, - "loss": 1.0157, + "learning_rate": 1.5138444162548791e-05, + "loss": 1.0368, "step": 12294 }, { - "epoch": 0.3488933030646992, + "epoch": 0.3484088526169628, "grad_norm": 0.0, - "learning_rate": 1.5124438502575472e-05, - "loss": 0.9772, + "learning_rate": 1.513765678473985e-05, + "loss": 0.9187, "step": 12295 }, { - "epoch": 0.3489216799091941, + "epoch": 0.34843719005922524, "grad_norm": 0.0, - "learning_rate": 1.5123649249539141e-05, - "loss": 0.911, + "learning_rate": 1.5136869363654513e-05, + "loss": 0.9933, "step": 12296 }, { - "epoch": 0.348950056753689, + "epoch": 0.34846552750148774, "grad_norm": 0.0, - "learning_rate": 1.5122859953223116e-05, - "loss": 0.9741, + "learning_rate": 1.5136081899299422e-05, + "loss": 0.9705, "step": 12297 }, { - "epoch": 0.34897843359818387, + "epoch": 0.3484938649437502, "grad_norm": 0.0, - "learning_rate": 1.5122070613634066e-05, - "loss": 0.9056, + "learning_rate": 1.5135294391681207e-05, + "loss": 0.9547, "step": 12298 }, { - "epoch": 0.3490068104426788, + "epoch": 0.3485222023860126, "grad_norm": 0.0, - "learning_rate": 1.512128123077865e-05, - "loss": 0.9345, + "learning_rate": 1.5134506840806498e-05, + "loss": 0.8447, "step": 12299 }, { - "epoch": 0.34903518728717364, + "epoch": 0.3485505398282751, "grad_norm": 0.0, - "learning_rate": 1.5120491804663543e-05, - "loss": 0.9962, + "learning_rate": 1.5133719246681931e-05, + "loss": 1.0399, "step": 12300 }, { - "epoch": 0.34906356413166856, + "epoch": 0.34857887727053755, "grad_norm": 0.0, - "learning_rate": 1.5119702335295415e-05, - "loss": 0.933, + "learning_rate": 1.513293160931414e-05, + "loss": 1.0482, "step": 12301 }, { - "epoch": 0.3490919409761635, + "epoch": 0.34860721471280004, "grad_norm": 0.0, - "learning_rate": 1.5118912822680924e-05, - "loss": 0.8535, + "learning_rate": 1.5132143928709764e-05, + "loss": 1.0979, "step": 12302 }, { - "epoch": 0.34912031782065833, + "epoch": 0.3486355521550625, "grad_norm": 0.0, - "learning_rate": 1.5118123266826752e-05, - "loss": 0.9591, + "learning_rate": 1.5131356204875428e-05, + "loss": 0.9299, "step": 12303 }, { - "epoch": 0.34914869466515325, + "epoch": 0.34866388959732497, "grad_norm": 0.0, - "learning_rate": 1.5117333667739564e-05, - "loss": 0.9385, + "learning_rate": 1.5130568437817776e-05, + "loss": 0.9329, "step": 12304 }, { - "epoch": 0.3491770715096481, + "epoch": 0.3486922270395874, "grad_norm": 0.0, - "learning_rate": 1.5116544025426025e-05, - "loss": 0.9086, + "learning_rate": 1.5129780627543445e-05, + "loss": 0.9706, "step": 12305 }, { - "epoch": 0.349205448354143, + "epoch": 0.34872056448184985, "grad_norm": 0.0, - "learning_rate": 1.511575433989281e-05, - "loss": 0.9655, + "learning_rate": 1.5128992774059063e-05, + "loss": 0.9455, "step": 12306 }, { - "epoch": 0.34923382519863794, + "epoch": 0.34874890192411234, "grad_norm": 0.0, - "learning_rate": 1.5114964611146587e-05, - "loss": 0.9892, + "learning_rate": 1.5128204877371272e-05, + "loss": 1.0142, "step": 12307 }, { - "epoch": 0.3492622020431328, + "epoch": 0.3487772393663748, "grad_norm": 0.0, - "learning_rate": 1.5114174839194029e-05, - "loss": 0.911, + "learning_rate": 1.5127416937486704e-05, + "loss": 0.9973, "step": 12308 }, { - "epoch": 0.3492905788876277, + "epoch": 0.3488055768086373, "grad_norm": 0.0, - "learning_rate": 1.5113385024041804e-05, - "loss": 1.0422, + "learning_rate": 1.5126628954412002e-05, + "loss": 0.9006, "step": 12309 }, { - "epoch": 0.34931895573212257, + "epoch": 0.3488339142508997, "grad_norm": 0.0, - "learning_rate": 1.5112595165696592e-05, - "loss": 0.9539, + "learning_rate": 1.5125840928153797e-05, + "loss": 0.8772, "step": 12310 }, { - "epoch": 0.3493473325766175, + "epoch": 0.34886225169316215, "grad_norm": 0.0, - "learning_rate": 1.5111805264165051e-05, - "loss": 0.9408, + "learning_rate": 1.5125052858718735e-05, + "loss": 0.8828, "step": 12311 }, { - "epoch": 0.34937570942111235, + "epoch": 0.34889058913542464, "grad_norm": 0.0, - "learning_rate": 1.5111015319453866e-05, - "loss": 0.8398, + "learning_rate": 1.5124264746113446e-05, + "loss": 0.8995, "step": 12312 }, { - "epoch": 0.34940408626560726, + "epoch": 0.3489189265776871, "grad_norm": 0.0, - "learning_rate": 1.5110225331569704e-05, - "loss": 0.9006, + "learning_rate": 1.5123476590344572e-05, + "loss": 1.0906, "step": 12313 }, { - "epoch": 0.3494324631101022, + "epoch": 0.3489472640199496, "grad_norm": 0.0, - "learning_rate": 1.510943530051924e-05, - "loss": 0.9892, + "learning_rate": 1.5122688391418754e-05, + "loss": 1.0135, "step": 12314 }, { - "epoch": 0.34946083995459704, + "epoch": 0.348975601462212, "grad_norm": 0.0, - "learning_rate": 1.5108645226309142e-05, - "loss": 0.8918, + "learning_rate": 1.512190014934263e-05, + "loss": 1.0524, "step": 12315 }, { - "epoch": 0.34948921679909195, + "epoch": 0.3490039389044745, "grad_norm": 0.0, - "learning_rate": 1.5107855108946091e-05, - "loss": 0.9296, + "learning_rate": 1.5121111864122837e-05, + "loss": 1.0242, "step": 12316 }, { - "epoch": 0.3495175936435868, + "epoch": 0.34903227634673695, "grad_norm": 0.0, - "learning_rate": 1.5107064948436758e-05, - "loss": 0.9438, + "learning_rate": 1.5120323535766018e-05, + "loss": 0.9035, "step": 12317 }, { - "epoch": 0.3495459704880817, + "epoch": 0.3490606137889994, "grad_norm": 0.0, - "learning_rate": 1.5106274744787819e-05, - "loss": 0.9574, + "learning_rate": 1.5119535164278812e-05, + "loss": 0.952, "step": 12318 }, { - "epoch": 0.34957434733257664, + "epoch": 0.3490889512312619, "grad_norm": 0.0, - "learning_rate": 1.5105484498005946e-05, - "loss": 0.8928, + "learning_rate": 1.5118746749667862e-05, + "loss": 0.8919, "step": 12319 }, { - "epoch": 0.3496027241770715, + "epoch": 0.3491172886735243, "grad_norm": 0.0, - "learning_rate": 1.5104694208097816e-05, - "loss": 0.9855, + "learning_rate": 1.5117958291939807e-05, + "loss": 0.9752, "step": 12320 }, { - "epoch": 0.3496311010215664, + "epoch": 0.3491456261157868, "grad_norm": 0.0, - "learning_rate": 1.5103903875070106e-05, - "loss": 0.9009, + "learning_rate": 1.5117169791101285e-05, + "loss": 0.929, "step": 12321 }, { - "epoch": 0.3496594778660613, + "epoch": 0.34917396355804925, "grad_norm": 0.0, - "learning_rate": 1.5103113498929489e-05, - "loss": 0.8338, + "learning_rate": 1.5116381247158947e-05, + "loss": 0.9478, "step": 12322 }, { - "epoch": 0.3496878547105562, + "epoch": 0.3492023010003117, "grad_norm": 0.0, - "learning_rate": 1.5102323079682641e-05, - "loss": 0.8382, + "learning_rate": 1.5115592660119425e-05, + "loss": 0.9579, "step": 12323 }, { - "epoch": 0.3497162315550511, + "epoch": 0.3492306384425742, "grad_norm": 0.0, - "learning_rate": 1.5101532617336245e-05, - "loss": 0.8992, + "learning_rate": 1.5114804029989372e-05, + "loss": 0.9679, "step": 12324 }, { - "epoch": 0.34974460839954596, + "epoch": 0.3492589758848366, "grad_norm": 0.0, - "learning_rate": 1.5100742111896973e-05, - "loss": 0.9331, + "learning_rate": 1.511401535677542e-05, + "loss": 0.9107, "step": 12325 }, { - "epoch": 0.3497729852440409, + "epoch": 0.3492873133270991, "grad_norm": 0.0, - "learning_rate": 1.50999515633715e-05, - "loss": 0.972, + "learning_rate": 1.511322664048422e-05, + "loss": 0.8558, "step": 12326 }, { - "epoch": 0.34980136208853574, + "epoch": 0.34931565076936155, "grad_norm": 0.0, - "learning_rate": 1.5099160971766509e-05, - "loss": 0.9006, + "learning_rate": 1.5112437881122412e-05, + "loss": 0.8492, "step": 12327 }, { - "epoch": 0.34982973893303065, + "epoch": 0.34934398821162405, "grad_norm": 0.0, - "learning_rate": 1.509837033708868e-05, - "loss": 0.9731, + "learning_rate": 1.5111649078696644e-05, + "loss": 0.8974, "step": 12328 }, { - "epoch": 0.3498581157775255, + "epoch": 0.3493723256538865, "grad_norm": 0.0, - "learning_rate": 1.5097579659344682e-05, - "loss": 0.8493, + "learning_rate": 1.5110860233213556e-05, + "loss": 0.8097, "step": 12329 }, { - "epoch": 0.34988649262202043, + "epoch": 0.3494006630961489, "grad_norm": 0.0, - "learning_rate": 1.5096788938541198e-05, - "loss": 1.0816, + "learning_rate": 1.5110071344679794e-05, + "loss": 0.8562, "step": 12330 }, { - "epoch": 0.34991486946651534, + "epoch": 0.3494290005384114, "grad_norm": 0.0, - "learning_rate": 1.5095998174684915e-05, - "loss": 0.9272, + "learning_rate": 1.5109282413102002e-05, + "loss": 0.9142, "step": 12331 }, { - "epoch": 0.3499432463110102, + "epoch": 0.34945733798067385, "grad_norm": 0.0, - "learning_rate": 1.50952073677825e-05, - "loss": 0.9139, + "learning_rate": 1.5108493438486831e-05, + "loss": 1.0392, "step": 12332 }, { - "epoch": 0.3499716231555051, + "epoch": 0.34948567542293635, "grad_norm": 0.0, - "learning_rate": 1.5094416517840642e-05, - "loss": 0.9917, + "learning_rate": 1.5107704420840919e-05, + "loss": 0.8446, "step": 12333 }, { - "epoch": 0.35, + "epoch": 0.3495140128651988, "grad_norm": 0.0, - "learning_rate": 1.509362562486602e-05, - "loss": 1.0053, + "learning_rate": 1.5106915360170917e-05, + "loss": 1.0645, "step": 12334 }, { - "epoch": 0.3500283768444949, + "epoch": 0.3495423503074612, "grad_norm": 0.0, - "learning_rate": 1.5092834688865311e-05, - "loss": 0.9512, + "learning_rate": 1.5106126256483472e-05, + "loss": 0.8444, "step": 12335 }, { - "epoch": 0.3500567536889898, + "epoch": 0.3495706877497237, "grad_norm": 0.0, - "learning_rate": 1.50920437098452e-05, - "loss": 1.0323, + "learning_rate": 1.5105337109785228e-05, + "loss": 0.8278, "step": 12336 }, { - "epoch": 0.35008513053348467, + "epoch": 0.34959902519198616, "grad_norm": 0.0, - "learning_rate": 1.5091252687812367e-05, - "loss": 1.0418, + "learning_rate": 1.5104547920082833e-05, + "loss": 0.8845, "step": 12337 }, { - "epoch": 0.3501135073779796, + "epoch": 0.34962736263424865, "grad_norm": 0.0, - "learning_rate": 1.5090461622773496e-05, - "loss": 0.8704, + "learning_rate": 1.5103758687382937e-05, + "loss": 0.9502, "step": 12338 }, { - "epoch": 0.35014188422247444, + "epoch": 0.3496557000765111, "grad_norm": 0.0, - "learning_rate": 1.5089670514735261e-05, - "loss": 0.8813, + "learning_rate": 1.5102969411692186e-05, + "loss": 0.9437, "step": 12339 }, { - "epoch": 0.35017026106696936, + "epoch": 0.3496840375187736, "grad_norm": 0.0, - "learning_rate": 1.5088879363704351e-05, - "loss": 0.9422, + "learning_rate": 1.5102180093017231e-05, + "loss": 0.8087, "step": 12340 }, { - "epoch": 0.35019863791146427, + "epoch": 0.349712374961036, "grad_norm": 0.0, - "learning_rate": 1.5088088169687453e-05, - "loss": 0.8946, + "learning_rate": 1.5101390731364715e-05, + "loss": 0.9298, "step": 12341 }, { - "epoch": 0.35022701475595913, + "epoch": 0.34974071240329846, "grad_norm": 0.0, - "learning_rate": 1.5087296932691244e-05, - "loss": 0.9437, + "learning_rate": 1.5100601326741291e-05, + "loss": 1.0818, "step": 12342 }, { - "epoch": 0.35025539160045405, + "epoch": 0.34976904984556095, "grad_norm": 0.0, - "learning_rate": 1.5086505652722407e-05, - "loss": 0.9729, + "learning_rate": 1.5099811879153605e-05, + "loss": 0.8072, "step": 12343 }, { - "epoch": 0.3502837684449489, + "epoch": 0.3497973872878234, "grad_norm": 0.0, - "learning_rate": 1.5085714329787631e-05, - "loss": 0.9624, + "learning_rate": 1.5099022388608315e-05, + "loss": 0.9486, "step": 12344 }, { - "epoch": 0.3503121452894438, + "epoch": 0.3498257247300859, "grad_norm": 0.0, - "learning_rate": 1.5084922963893597e-05, - "loss": 0.9275, + "learning_rate": 1.5098232855112068e-05, + "loss": 0.9829, "step": 12345 }, { - "epoch": 0.3503405221339387, + "epoch": 0.3498540621723483, "grad_norm": 0.0, - "learning_rate": 1.5084131555046985e-05, - "loss": 0.7721, + "learning_rate": 1.5097443278671505e-05, + "loss": 1.0021, "step": 12346 }, { - "epoch": 0.3503688989784336, + "epoch": 0.34988239961461076, "grad_norm": 0.0, - "learning_rate": 1.5083340103254491e-05, - "loss": 0.9282, + "learning_rate": 1.5096653659293287e-05, + "loss": 1.0815, "step": 12347 }, { - "epoch": 0.3503972758229285, + "epoch": 0.34991073705687326, "grad_norm": 0.0, - "learning_rate": 1.5082548608522794e-05, - "loss": 0.9379, + "learning_rate": 1.5095863996984065e-05, + "loss": 1.02, "step": 12348 }, { - "epoch": 0.35042565266742337, + "epoch": 0.3499390744991357, "grad_norm": 0.0, - "learning_rate": 1.5081757070858581e-05, - "loss": 0.9751, + "learning_rate": 1.5095074291750486e-05, + "loss": 0.9813, "step": 12349 }, { - "epoch": 0.3504540295119183, + "epoch": 0.3499674119413982, "grad_norm": 0.0, - "learning_rate": 1.5080965490268532e-05, - "loss": 1.0254, + "learning_rate": 1.5094284543599205e-05, + "loss": 1.0482, "step": 12350 }, { - "epoch": 0.35048240635641315, + "epoch": 0.3499957493836606, "grad_norm": 0.0, - "learning_rate": 1.5080173866759345e-05, - "loss": 0.8377, + "learning_rate": 1.5093494752536875e-05, + "loss": 1.0147, "step": 12351 }, { - "epoch": 0.35051078320090806, + "epoch": 0.3500240868259231, "grad_norm": 0.0, - "learning_rate": 1.5079382200337697e-05, - "loss": 0.948, + "learning_rate": 1.5092704918570146e-05, + "loss": 0.8702, "step": 12352 }, { - "epoch": 0.350539160045403, + "epoch": 0.35005242426818556, "grad_norm": 0.0, - "learning_rate": 1.507859049101028e-05, - "loss": 1.0005, + "learning_rate": 1.5091915041705671e-05, + "loss": 1.0565, "step": 12353 }, { - "epoch": 0.35056753688989784, + "epoch": 0.350080761710448, "grad_norm": 0.0, - "learning_rate": 1.5077798738783784e-05, - "loss": 1.0872, + "learning_rate": 1.5091125121950105e-05, + "loss": 0.8966, "step": 12354 }, { - "epoch": 0.35059591373439275, + "epoch": 0.3501090991527105, "grad_norm": 0.0, - "learning_rate": 1.5077006943664891e-05, - "loss": 1.0114, + "learning_rate": 1.5090335159310105e-05, + "loss": 1.0127, "step": 12355 }, { - "epoch": 0.3506242905788876, + "epoch": 0.35013743659497293, "grad_norm": 0.0, - "learning_rate": 1.5076215105660292e-05, - "loss": 0.9229, + "learning_rate": 1.5089545153792318e-05, + "loss": 0.9753, "step": 12356 }, { - "epoch": 0.3506526674233825, + "epoch": 0.3501657740372354, "grad_norm": 0.0, - "learning_rate": 1.5075423224776677e-05, - "loss": 0.9676, + "learning_rate": 1.5088755105403405e-05, + "loss": 0.9417, "step": 12357 }, { - "epoch": 0.35068104426787744, + "epoch": 0.35019411147949786, "grad_norm": 0.0, - "learning_rate": 1.5074631301020737e-05, - "loss": 0.7983, + "learning_rate": 1.5087965014150015e-05, + "loss": 0.9244, "step": 12358 }, { - "epoch": 0.3507094211123723, + "epoch": 0.3502224489217603, "grad_norm": 0.0, - "learning_rate": 1.5073839334399153e-05, - "loss": 1.0152, + "learning_rate": 1.5087174880038808e-05, + "loss": 0.9622, "step": 12359 }, { - "epoch": 0.3507377979568672, + "epoch": 0.3502507863640228, "grad_norm": 0.0, - "learning_rate": 1.5073047324918621e-05, - "loss": 0.8668, + "learning_rate": 1.508638470307644e-05, + "loss": 0.9763, "step": 12360 }, { - "epoch": 0.3507661748013621, + "epoch": 0.35027912380628523, "grad_norm": 0.0, - "learning_rate": 1.5072255272585833e-05, - "loss": 0.9483, + "learning_rate": 1.5085594483269561e-05, + "loss": 0.8886, "step": 12361 }, { - "epoch": 0.350794551645857, + "epoch": 0.3503074612485477, "grad_norm": 0.0, - "learning_rate": 1.5071463177407473e-05, - "loss": 0.9092, + "learning_rate": 1.5084804220624833e-05, + "loss": 0.847, "step": 12362 }, { - "epoch": 0.35082292849035185, + "epoch": 0.35033579869081016, "grad_norm": 0.0, - "learning_rate": 1.5070671039390237e-05, - "loss": 0.8652, + "learning_rate": 1.508401391514891e-05, + "loss": 1.1011, "step": 12363 }, { - "epoch": 0.35085130533484676, + "epoch": 0.3503641361330726, "grad_norm": 0.0, - "learning_rate": 1.5069878858540814e-05, - "loss": 0.8986, + "learning_rate": 1.5083223566848451e-05, + "loss": 0.8887, "step": 12364 }, { - "epoch": 0.3508796821793417, + "epoch": 0.3503924735753351, "grad_norm": 0.0, - "learning_rate": 1.50690866348659e-05, - "loss": 0.8674, + "learning_rate": 1.5082433175730114e-05, + "loss": 0.8761, "step": 12365 }, { - "epoch": 0.35090805902383654, + "epoch": 0.35042081101759753, "grad_norm": 0.0, - "learning_rate": 1.5068294368372179e-05, - "loss": 1.0164, + "learning_rate": 1.508164274180055e-05, + "loss": 0.8421, "step": 12366 }, { - "epoch": 0.35093643586833145, + "epoch": 0.35044914845986, "grad_norm": 0.0, - "learning_rate": 1.506750205906635e-05, - "loss": 0.9027, + "learning_rate": 1.5080852265066425e-05, + "loss": 1.0662, "step": 12367 }, { - "epoch": 0.3509648127128263, + "epoch": 0.35047748590212247, "grad_norm": 0.0, - "learning_rate": 1.5066709706955105e-05, - "loss": 1.0913, + "learning_rate": 1.5080061745534398e-05, + "loss": 0.9888, "step": 12368 }, { - "epoch": 0.35099318955732123, + "epoch": 0.35050582334438496, "grad_norm": 0.0, - "learning_rate": 1.5065917312045131e-05, - "loss": 1.0618, + "learning_rate": 1.5079271183211118e-05, + "loss": 0.9023, "step": 12369 }, { - "epoch": 0.35102156640181614, + "epoch": 0.3505341607866474, "grad_norm": 0.0, - "learning_rate": 1.5065124874343127e-05, - "loss": 0.8523, + "learning_rate": 1.5078480578103256e-05, + "loss": 0.9145, "step": 12370 }, { - "epoch": 0.351049943246311, + "epoch": 0.35056249822890984, "grad_norm": 0.0, - "learning_rate": 1.5064332393855787e-05, - "loss": 0.9692, + "learning_rate": 1.5077689930217462e-05, + "loss": 1.0698, "step": 12371 }, { - "epoch": 0.3510783200908059, + "epoch": 0.35059083567117233, "grad_norm": 0.0, - "learning_rate": 1.5063539870589803e-05, - "loss": 0.9383, + "learning_rate": 1.5076899239560403e-05, + "loss": 0.9891, "step": 12372 }, { - "epoch": 0.3511066969353008, + "epoch": 0.35061917311343477, "grad_norm": 0.0, - "learning_rate": 1.506274730455187e-05, - "loss": 0.9474, + "learning_rate": 1.5076108506138739e-05, + "loss": 0.9321, "step": 12373 }, { - "epoch": 0.3511350737797957, + "epoch": 0.35064751055569726, "grad_norm": 0.0, - "learning_rate": 1.5061954695748684e-05, - "loss": 0.834, + "learning_rate": 1.507531772995912e-05, + "loss": 0.9616, "step": 12374 }, { - "epoch": 0.35116345062429055, + "epoch": 0.3506758479979597, "grad_norm": 0.0, - "learning_rate": 1.506116204418694e-05, - "loss": 0.9061, + "learning_rate": 1.5074526911028222e-05, + "loss": 0.9362, "step": 12375 }, { - "epoch": 0.35119182746878547, + "epoch": 0.35070418544022214, "grad_norm": 0.0, - "learning_rate": 1.506036934987333e-05, - "loss": 0.9041, + "learning_rate": 1.5073736049352696e-05, + "loss": 0.8561, "step": 12376 }, { - "epoch": 0.3512202043132804, + "epoch": 0.35073252288248463, "grad_norm": 0.0, - "learning_rate": 1.5059576612814551e-05, - "loss": 0.9755, + "learning_rate": 1.5072945144939208e-05, + "loss": 0.9666, "step": 12377 }, { - "epoch": 0.35124858115777524, + "epoch": 0.35076086032474707, "grad_norm": 0.0, - "learning_rate": 1.5058783833017304e-05, - "loss": 0.9733, + "learning_rate": 1.5072154197794421e-05, + "loss": 0.9846, "step": 12378 }, { - "epoch": 0.35127695800227016, + "epoch": 0.35078919776700956, "grad_norm": 0.0, - "learning_rate": 1.5057991010488281e-05, - "loss": 0.969, + "learning_rate": 1.5071363207924994e-05, + "loss": 0.8839, "step": 12379 }, { - "epoch": 0.351305334846765, + "epoch": 0.350817535209272, "grad_norm": 0.0, - "learning_rate": 1.5057198145234182e-05, - "loss": 0.9351, + "learning_rate": 1.5070572175337591e-05, + "loss": 1.0108, "step": 12380 }, { - "epoch": 0.35133371169125993, + "epoch": 0.3508458726515345, "grad_norm": 0.0, - "learning_rate": 1.5056405237261702e-05, - "loss": 1.0224, + "learning_rate": 1.5069781100038878e-05, + "loss": 0.8352, "step": 12381 }, { - "epoch": 0.35136208853575485, + "epoch": 0.35087421009379693, "grad_norm": 0.0, - "learning_rate": 1.5055612286577541e-05, - "loss": 0.9664, + "learning_rate": 1.5068989982035516e-05, + "loss": 0.9985, "step": 12382 }, { - "epoch": 0.3513904653802497, + "epoch": 0.3509025475360594, "grad_norm": 0.0, - "learning_rate": 1.5054819293188394e-05, - "loss": 0.9372, + "learning_rate": 1.5068198821334166e-05, + "loss": 0.8884, "step": 12383 }, { - "epoch": 0.3514188422247446, + "epoch": 0.35093088497832187, "grad_norm": 0.0, - "learning_rate": 1.5054026257100964e-05, - "loss": 0.9138, + "learning_rate": 1.5067407617941499e-05, + "loss": 1.0011, "step": 12384 }, { - "epoch": 0.3514472190692395, + "epoch": 0.3509592224205843, "grad_norm": 0.0, - "learning_rate": 1.5053233178321942e-05, - "loss": 0.925, + "learning_rate": 1.5066616371864174e-05, + "loss": 0.9294, "step": 12385 }, { - "epoch": 0.3514755959137344, + "epoch": 0.3509875598628468, "grad_norm": 0.0, - "learning_rate": 1.5052440056858038e-05, - "loss": 0.8338, + "learning_rate": 1.5065825083108858e-05, + "loss": 0.9888, "step": 12386 }, { - "epoch": 0.3515039727582293, + "epoch": 0.35101589730510924, "grad_norm": 0.0, - "learning_rate": 1.505164689271594e-05, - "loss": 1.0523, + "learning_rate": 1.5065033751682214e-05, + "loss": 0.8829, "step": 12387 }, { - "epoch": 0.35153234960272417, + "epoch": 0.3510442347473717, "grad_norm": 0.0, - "learning_rate": 1.5050853685902358e-05, - "loss": 0.8882, + "learning_rate": 1.5064242377590912e-05, + "loss": 0.9215, "step": 12388 }, { - "epoch": 0.3515607264472191, + "epoch": 0.35107257218963417, "grad_norm": 0.0, - "learning_rate": 1.5050060436423986e-05, - "loss": 0.8747, + "learning_rate": 1.5063450960841616e-05, + "loss": 0.9186, "step": 12389 }, { - "epoch": 0.35158910329171394, + "epoch": 0.3511009096318966, "grad_norm": 0.0, - "learning_rate": 1.5049267144287527e-05, - "loss": 1.0095, + "learning_rate": 1.5062659501440994e-05, + "loss": 1.0303, "step": 12390 }, { - "epoch": 0.35161748013620886, + "epoch": 0.3511292470741591, "grad_norm": 0.0, - "learning_rate": 1.5048473809499682e-05, - "loss": 0.8686, + "learning_rate": 1.5061867999395708e-05, + "loss": 1.0123, "step": 12391 }, { - "epoch": 0.3516458569807037, + "epoch": 0.35115758451642154, "grad_norm": 0.0, - "learning_rate": 1.5047680432067151e-05, - "loss": 0.9426, + "learning_rate": 1.506107645471243e-05, + "loss": 0.9511, "step": 12392 }, { - "epoch": 0.35167423382519863, + "epoch": 0.35118592195868403, "grad_norm": 0.0, - "learning_rate": 1.5046887011996635e-05, - "loss": 1.0825, + "learning_rate": 1.5060284867397825e-05, + "loss": 0.9171, "step": 12393 }, { - "epoch": 0.35170261066969355, + "epoch": 0.35121425940094647, "grad_norm": 0.0, - "learning_rate": 1.504609354929484e-05, - "loss": 0.9286, + "learning_rate": 1.5059493237458563e-05, + "loss": 0.9566, "step": 12394 }, { - "epoch": 0.3517309875141884, + "epoch": 0.3512425968432089, "grad_norm": 0.0, - "learning_rate": 1.5045300043968465e-05, - "loss": 0.9352, + "learning_rate": 1.5058701564901309e-05, + "loss": 0.861, "step": 12395 }, { - "epoch": 0.3517593643586833, + "epoch": 0.3512709342854714, "grad_norm": 0.0, - "learning_rate": 1.5044506496024216e-05, - "loss": 0.9829, + "learning_rate": 1.505790984973273e-05, + "loss": 0.8816, "step": 12396 }, { - "epoch": 0.3517877412031782, + "epoch": 0.35129927172773384, "grad_norm": 0.0, - "learning_rate": 1.504371290546879e-05, - "loss": 0.9436, + "learning_rate": 1.5057118091959504e-05, + "loss": 0.9, "step": 12397 }, { - "epoch": 0.3518161180476731, + "epoch": 0.35132760916999634, "grad_norm": 0.0, - "learning_rate": 1.5042919272308895e-05, - "loss": 0.7895, + "learning_rate": 1.5056326291588293e-05, + "loss": 0.9547, "step": 12398 }, { - "epoch": 0.351844494892168, + "epoch": 0.3513559466122588, "grad_norm": 0.0, - "learning_rate": 1.5042125596551235e-05, - "loss": 0.8368, + "learning_rate": 1.5055534448625766e-05, + "loss": 1.02, "step": 12399 }, { - "epoch": 0.3518728717366629, + "epoch": 0.3513842840545212, "grad_norm": 0.0, - "learning_rate": 1.5041331878202514e-05, - "loss": 0.8851, + "learning_rate": 1.5054742563078594e-05, + "loss": 0.9039, "step": 12400 }, { - "epoch": 0.3519012485811578, + "epoch": 0.3514126214967837, "grad_norm": 0.0, - "learning_rate": 1.5040538117269435e-05, - "loss": 1.0091, + "learning_rate": 1.5053950634953451e-05, + "loss": 0.9871, "step": 12401 }, { - "epoch": 0.35192962542565265, + "epoch": 0.35144095893904614, "grad_norm": 0.0, - "learning_rate": 1.5039744313758706e-05, - "loss": 0.9401, + "learning_rate": 1.5053158664257005e-05, + "loss": 0.8749, "step": 12402 }, { - "epoch": 0.35195800227014756, + "epoch": 0.35146929638130864, "grad_norm": 0.0, - "learning_rate": 1.5038950467677029e-05, - "loss": 0.9273, + "learning_rate": 1.5052366650995927e-05, + "loss": 0.8681, "step": 12403 }, { - "epoch": 0.3519863791146425, + "epoch": 0.3514976338235711, "grad_norm": 0.0, - "learning_rate": 1.5038156579031109e-05, - "loss": 1.0218, + "learning_rate": 1.5051574595176886e-05, + "loss": 0.9161, "step": 12404 }, { - "epoch": 0.35201475595913734, + "epoch": 0.35152597126583357, "grad_norm": 0.0, - "learning_rate": 1.5037362647827656e-05, - "loss": 0.9839, + "learning_rate": 1.505078249680656e-05, + "loss": 1.0215, "step": 12405 }, { - "epoch": 0.35204313280363225, + "epoch": 0.351554308708096, "grad_norm": 0.0, - "learning_rate": 1.5036568674073375e-05, - "loss": 0.891, + "learning_rate": 1.5049990355891617e-05, + "loss": 1.032, "step": 12406 }, { - "epoch": 0.3520715096481271, + "epoch": 0.35158264615035845, "grad_norm": 0.0, - "learning_rate": 1.5035774657774972e-05, - "loss": 1.0002, + "learning_rate": 1.5049198172438728e-05, + "loss": 0.888, "step": 12407 }, { - "epoch": 0.352099886492622, + "epoch": 0.35161098359262094, "grad_norm": 0.0, - "learning_rate": 1.5034980598939151e-05, - "loss": 0.8955, + "learning_rate": 1.5048405946454568e-05, + "loss": 0.8949, "step": 12408 }, { - "epoch": 0.3521282633371169, + "epoch": 0.3516393210348834, "grad_norm": 0.0, - "learning_rate": 1.5034186497572624e-05, - "loss": 0.9452, + "learning_rate": 1.504761367794581e-05, + "loss": 1.1101, "step": 12409 }, { - "epoch": 0.3521566401816118, + "epoch": 0.3516676584771459, "grad_norm": 0.0, - "learning_rate": 1.5033392353682097e-05, - "loss": 0.7757, + "learning_rate": 1.5046821366919128e-05, + "loss": 0.9642, "step": 12410 }, { - "epoch": 0.3521850170261067, + "epoch": 0.3516959959194083, "grad_norm": 0.0, - "learning_rate": 1.5032598167274279e-05, - "loss": 0.9484, + "learning_rate": 1.5046029013381197e-05, + "loss": 0.9607, "step": 12411 }, { - "epoch": 0.3522133938706016, + "epoch": 0.35172433336167075, "grad_norm": 0.0, - "learning_rate": 1.503180393835588e-05, - "loss": 1.0011, + "learning_rate": 1.5045236617338688e-05, + "loss": 0.925, "step": 12412 }, { - "epoch": 0.3522417707150965, + "epoch": 0.35175267080393324, "grad_norm": 0.0, - "learning_rate": 1.5031009666933606e-05, - "loss": 0.8755, + "learning_rate": 1.5044444178798279e-05, + "loss": 0.9709, "step": 12413 }, { - "epoch": 0.35227014755959135, + "epoch": 0.3517810082461957, "grad_norm": 0.0, - "learning_rate": 1.5030215353014166e-05, - "loss": 1.0069, + "learning_rate": 1.5043651697766642e-05, + "loss": 1.0226, "step": 12414 }, { - "epoch": 0.35229852440408627, + "epoch": 0.3518093456884582, "grad_norm": 0.0, - "learning_rate": 1.5029420996604272e-05, - "loss": 0.9095, + "learning_rate": 1.5042859174250453e-05, + "loss": 0.7856, "step": 12415 }, { - "epoch": 0.3523269012485812, + "epoch": 0.3518376831307206, "grad_norm": 0.0, - "learning_rate": 1.5028626597710632e-05, - "loss": 0.9103, + "learning_rate": 1.5042066608256389e-05, + "loss": 0.9748, "step": 12416 }, { - "epoch": 0.35235527809307604, + "epoch": 0.3518660205729831, "grad_norm": 0.0, - "learning_rate": 1.5027832156339957e-05, - "loss": 0.9602, + "learning_rate": 1.5041273999791128e-05, + "loss": 0.8345, "step": 12417 }, { - "epoch": 0.35238365493757096, + "epoch": 0.35189435801524555, "grad_norm": 0.0, - "learning_rate": 1.5027037672498957e-05, - "loss": 0.8628, + "learning_rate": 1.5040481348861345e-05, + "loss": 0.7592, "step": 12418 }, { - "epoch": 0.3524120317820658, + "epoch": 0.351922695457508, "grad_norm": 0.0, - "learning_rate": 1.5026243146194346e-05, - "loss": 1.0218, + "learning_rate": 1.5039688655473712e-05, + "loss": 0.8776, "step": 12419 }, { - "epoch": 0.35244040862656073, + "epoch": 0.3519510328997705, "grad_norm": 0.0, - "learning_rate": 1.5025448577432831e-05, - "loss": 1.0043, + "learning_rate": 1.5038895919634913e-05, + "loss": 1.0882, "step": 12420 }, { - "epoch": 0.35246878547105565, + "epoch": 0.3519793703420329, "grad_norm": 0.0, - "learning_rate": 1.5024653966221125e-05, - "loss": 0.8933, + "learning_rate": 1.5038103141351617e-05, + "loss": 0.9857, "step": 12421 }, { - "epoch": 0.3524971623155505, + "epoch": 0.3520077077842954, "grad_norm": 0.0, - "learning_rate": 1.5023859312565945e-05, - "loss": 0.9028, + "learning_rate": 1.5037310320630512e-05, + "loss": 0.9814, "step": 12422 }, { - "epoch": 0.3525255391600454, + "epoch": 0.35203604522655785, "grad_norm": 0.0, - "learning_rate": 1.5023064616473996e-05, - "loss": 0.9153, + "learning_rate": 1.5036517457478272e-05, + "loss": 0.8741, "step": 12423 }, { - "epoch": 0.3525539160045403, + "epoch": 0.3520643826688203, "grad_norm": 0.0, - "learning_rate": 1.5022269877951996e-05, - "loss": 0.9976, + "learning_rate": 1.5035724551901576e-05, + "loss": 1.0101, "step": 12424 }, { - "epoch": 0.3525822928490352, + "epoch": 0.3520927201110828, "grad_norm": 0.0, - "learning_rate": 1.5021475097006657e-05, - "loss": 0.9416, + "learning_rate": 1.5034931603907099e-05, + "loss": 0.9776, "step": 12425 }, { - "epoch": 0.35261066969353005, + "epoch": 0.3521210575533452, "grad_norm": 0.0, - "learning_rate": 1.5020680273644692e-05, - "loss": 0.8826, + "learning_rate": 1.5034138613501525e-05, + "loss": 1.0027, "step": 12426 }, { - "epoch": 0.35263904653802497, + "epoch": 0.3521493949956077, "grad_norm": 0.0, - "learning_rate": 1.5019885407872814e-05, - "loss": 1.0034, + "learning_rate": 1.503334558069153e-05, + "loss": 0.8293, "step": 12427 }, { - "epoch": 0.3526674233825199, + "epoch": 0.35217773243787015, "grad_norm": 0.0, - "learning_rate": 1.5019090499697739e-05, - "loss": 0.8596, + "learning_rate": 1.5032552505483799e-05, + "loss": 0.9215, "step": 12428 }, { - "epoch": 0.35269580022701474, + "epoch": 0.35220606988013264, "grad_norm": 0.0, - "learning_rate": 1.501829554912618e-05, - "loss": 0.8575, + "learning_rate": 1.5031759387885008e-05, + "loss": 0.9524, "step": 12429 }, { - "epoch": 0.35272417707150966, + "epoch": 0.3522344073223951, "grad_norm": 0.0, - "learning_rate": 1.5017500556164855e-05, - "loss": 0.9368, + "learning_rate": 1.5030966227901842e-05, + "loss": 0.9957, "step": 12430 }, { - "epoch": 0.3527525539160045, + "epoch": 0.3522627447646575, "grad_norm": 0.0, - "learning_rate": 1.5016705520820476e-05, - "loss": 0.8815, + "learning_rate": 1.5030173025540977e-05, + "loss": 1.0103, "step": 12431 }, { - "epoch": 0.35278093076049943, + "epoch": 0.35229108220692, "grad_norm": 0.0, - "learning_rate": 1.5015910443099759e-05, - "loss": 0.8275, + "learning_rate": 1.5029379780809094e-05, + "loss": 0.9588, "step": 12432 }, { - "epoch": 0.35280930760499435, + "epoch": 0.35231941964918245, "grad_norm": 0.0, - "learning_rate": 1.5015115323009423e-05, - "loss": 1.0001, + "learning_rate": 1.5028586493712883e-05, + "loss": 0.9471, "step": 12433 }, { - "epoch": 0.3528376844494892, + "epoch": 0.35234775709144495, "grad_norm": 0.0, - "learning_rate": 1.5014320160556182e-05, - "loss": 0.8422, + "learning_rate": 1.5027793164259014e-05, + "loss": 0.8927, "step": 12434 }, { - "epoch": 0.3528660612939841, + "epoch": 0.3523760945337074, "grad_norm": 0.0, - "learning_rate": 1.501352495574675e-05, - "loss": 0.9802, + "learning_rate": 1.502699979245418e-05, + "loss": 0.9183, "step": 12435 }, { - "epoch": 0.352894438138479, + "epoch": 0.3524044319759698, "grad_norm": 0.0, - "learning_rate": 1.5012729708587852e-05, - "loss": 0.9683, + "learning_rate": 1.5026206378305062e-05, + "loss": 1.0034, "step": 12436 }, { - "epoch": 0.3529228149829739, + "epoch": 0.3524327694182323, "grad_norm": 0.0, - "learning_rate": 1.5011934419086199e-05, - "loss": 0.8659, + "learning_rate": 1.5025412921818338e-05, + "loss": 0.9377, "step": 12437 }, { - "epoch": 0.3529511918274688, + "epoch": 0.35246110686049476, "grad_norm": 0.0, - "learning_rate": 1.5011139087248508e-05, - "loss": 0.9251, + "learning_rate": 1.5024619423000695e-05, + "loss": 1.0078, "step": 12438 }, { - "epoch": 0.3529795686719637, + "epoch": 0.35248944430275725, "grad_norm": 0.0, - "learning_rate": 1.5010343713081504e-05, - "loss": 1.0423, + "learning_rate": 1.502382588185882e-05, + "loss": 1.0745, "step": 12439 }, { - "epoch": 0.3530079455164586, + "epoch": 0.3525177817450197, "grad_norm": 0.0, - "learning_rate": 1.50095482965919e-05, - "loss": 0.9583, + "learning_rate": 1.5023032298399391e-05, + "loss": 0.9322, "step": 12440 }, { - "epoch": 0.35303632236095345, + "epoch": 0.3525461191872822, "grad_norm": 0.0, - "learning_rate": 1.5008752837786413e-05, - "loss": 0.9462, + "learning_rate": 1.5022238672629094e-05, + "loss": 1.0326, "step": 12441 }, { - "epoch": 0.35306469920544836, + "epoch": 0.3525744566295446, "grad_norm": 0.0, - "learning_rate": 1.500795733667177e-05, - "loss": 1.0008, + "learning_rate": 1.502144500455462e-05, + "loss": 0.8935, "step": 12442 }, { - "epoch": 0.3530930760499432, + "epoch": 0.35260279407180706, "grad_norm": 0.0, - "learning_rate": 1.5007161793254686e-05, - "loss": 0.9193, + "learning_rate": 1.5020651294182646e-05, + "loss": 0.919, "step": 12443 }, { - "epoch": 0.35312145289443814, + "epoch": 0.35263113151406955, "grad_norm": 0.0, - "learning_rate": 1.5006366207541877e-05, - "loss": 1.0104, + "learning_rate": 1.5019857541519866e-05, + "loss": 0.9265, "step": 12444 }, { - "epoch": 0.35314982973893305, + "epoch": 0.352659468956332, "grad_norm": 0.0, - "learning_rate": 1.5005570579540073e-05, - "loss": 0.8317, + "learning_rate": 1.501906374657296e-05, + "loss": 0.9425, "step": 12445 }, { - "epoch": 0.3531782065834279, + "epoch": 0.3526878063985945, "grad_norm": 0.0, - "learning_rate": 1.5004774909255985e-05, - "loss": 0.8672, + "learning_rate": 1.5018269909348617e-05, + "loss": 0.9238, "step": 12446 }, { - "epoch": 0.3532065834279228, + "epoch": 0.3527161438408569, "grad_norm": 0.0, - "learning_rate": 1.5003979196696343e-05, - "loss": 0.991, + "learning_rate": 1.501747602985352e-05, + "loss": 0.9808, "step": 12447 }, { - "epoch": 0.3532349602724177, + "epoch": 0.35274448128311936, "grad_norm": 0.0, - "learning_rate": 1.5003183441867858e-05, - "loss": 0.9101, + "learning_rate": 1.5016682108094362e-05, + "loss": 0.8227, "step": 12448 }, { - "epoch": 0.3532633371169126, + "epoch": 0.35277281872538185, "grad_norm": 0.0, - "learning_rate": 1.5002387644777263e-05, - "loss": 1.0438, + "learning_rate": 1.5015888144077826e-05, + "loss": 0.9374, "step": 12449 }, { - "epoch": 0.3532917139614075, + "epoch": 0.3528011561676443, "grad_norm": 0.0, - "learning_rate": 1.5001591805431272e-05, - "loss": 0.9485, + "learning_rate": 1.5015094137810602e-05, + "loss": 0.9331, "step": 12450 }, { - "epoch": 0.3533200908059024, + "epoch": 0.3528294936099068, "grad_norm": 0.0, - "learning_rate": 1.5000795923836611e-05, - "loss": 0.8956, + "learning_rate": 1.5014300089299381e-05, + "loss": 0.9594, "step": 12451 }, { - "epoch": 0.3533484676503973, + "epoch": 0.3528578310521692, "grad_norm": 0.0, - "learning_rate": 1.5000000000000002e-05, - "loss": 1.0441, + "learning_rate": 1.5013505998550846e-05, + "loss": 0.9547, "step": 12452 }, { - "epoch": 0.35337684449489215, + "epoch": 0.3528861684944317, "grad_norm": 0.0, - "learning_rate": 1.499920403392817e-05, - "loss": 0.9614, + "learning_rate": 1.5012711865571686e-05, + "loss": 0.9311, "step": 12453 }, { - "epoch": 0.35340522133938707, + "epoch": 0.35291450593669416, "grad_norm": 0.0, - "learning_rate": 1.4998408025627831e-05, - "loss": 1.015, + "learning_rate": 1.5011917690368594e-05, + "loss": 0.8595, "step": 12454 }, { - "epoch": 0.3534335981838819, + "epoch": 0.3529428433789566, "grad_norm": 0.0, - "learning_rate": 1.499761197510572e-05, - "loss": 0.9275, + "learning_rate": 1.501112347294826e-05, + "loss": 0.9329, "step": 12455 }, { - "epoch": 0.35346197502837684, + "epoch": 0.3529711808212191, "grad_norm": 0.0, - "learning_rate": 1.4996815882368554e-05, - "loss": 0.759, + "learning_rate": 1.5010329213317372e-05, + "loss": 0.8366, "step": 12456 }, { - "epoch": 0.35349035187287176, + "epoch": 0.3529995182634815, "grad_norm": 0.0, - "learning_rate": 1.499601974742306e-05, - "loss": 0.866, + "learning_rate": 1.5009534911482617e-05, + "loss": 0.8923, "step": 12457 }, { - "epoch": 0.3535187287173666, + "epoch": 0.353027855705744, "grad_norm": 0.0, - "learning_rate": 1.4995223570275963e-05, - "loss": 1.0375, + "learning_rate": 1.5008740567450692e-05, + "loss": 0.9283, "step": 12458 }, { - "epoch": 0.35354710556186153, + "epoch": 0.35305619314800646, "grad_norm": 0.0, - "learning_rate": 1.4994427350933987e-05, - "loss": 0.8018, + "learning_rate": 1.5007946181228286e-05, + "loss": 0.8659, "step": 12459 }, { - "epoch": 0.3535754824063564, + "epoch": 0.3530845305902689, "grad_norm": 0.0, - "learning_rate": 1.499363108940386e-05, - "loss": 0.8875, + "learning_rate": 1.5007151752822087e-05, + "loss": 0.8654, "step": 12460 }, { - "epoch": 0.3536038592508513, + "epoch": 0.3531128680325314, "grad_norm": 0.0, - "learning_rate": 1.4992834785692303e-05, - "loss": 0.9244, + "learning_rate": 1.5006357282238791e-05, + "loss": 0.9827, "step": 12461 }, { - "epoch": 0.3536322360953462, + "epoch": 0.35314120547479383, "grad_norm": 0.0, - "learning_rate": 1.499203843980605e-05, - "loss": 1.0135, + "learning_rate": 1.5005562769485087e-05, + "loss": 0.8305, "step": 12462 }, { - "epoch": 0.3536606129398411, + "epoch": 0.3531695429170563, "grad_norm": 0.0, - "learning_rate": 1.4991242051751824e-05, - "loss": 0.9139, + "learning_rate": 1.500476821456767e-05, + "loss": 0.9654, "step": 12463 }, { - "epoch": 0.353688989784336, + "epoch": 0.35319788035931876, "grad_norm": 0.0, - "learning_rate": 1.4990445621536349e-05, - "loss": 1.0035, + "learning_rate": 1.5003973617493234e-05, + "loss": 0.9541, "step": 12464 }, { - "epoch": 0.35371736662883085, + "epoch": 0.35322621780158125, "grad_norm": 0.0, - "learning_rate": 1.4989649149166358e-05, - "loss": 1.001, + "learning_rate": 1.5003178978268468e-05, + "loss": 1.0391, "step": 12465 }, { - "epoch": 0.35374574347332577, + "epoch": 0.3532545552438437, "grad_norm": 0.0, - "learning_rate": 1.4988852634648577e-05, - "loss": 0.9185, + "learning_rate": 1.5002384296900068e-05, + "loss": 0.8593, "step": 12466 }, { - "epoch": 0.3537741203178207, + "epoch": 0.35328289268610613, "grad_norm": 0.0, - "learning_rate": 1.498805607798973e-05, - "loss": 0.9672, + "learning_rate": 1.5001589573394726e-05, + "loss": 0.9245, "step": 12467 }, { - "epoch": 0.35380249716231554, + "epoch": 0.3533112301283686, "grad_norm": 0.0, - "learning_rate": 1.4987259479196551e-05, - "loss": 0.9369, + "learning_rate": 1.500079480775914e-05, + "loss": 0.9515, "step": 12468 }, { - "epoch": 0.35383087400681046, + "epoch": 0.35333956757063106, "grad_norm": 0.0, - "learning_rate": 1.4986462838275769e-05, - "loss": 0.9721, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.9205, "step": 12469 }, { - "epoch": 0.3538592508513053, + "epoch": 0.35336790501289356, "grad_norm": 0.0, - "learning_rate": 1.4985666155234109e-05, - "loss": 0.9366, + "learning_rate": 1.4999205150124005e-05, + "loss": 0.9998, "step": 12470 }, { - "epoch": 0.35388762769580023, + "epoch": 0.353396242455156, "grad_norm": 0.0, - "learning_rate": 1.4984869430078304e-05, - "loss": 0.9642, + "learning_rate": 1.499841025813785e-05, + "loss": 0.9133, "step": 12471 }, { - "epoch": 0.3539160045402951, + "epoch": 0.35342457989741843, "grad_norm": 0.0, - "learning_rate": 1.4984072662815082e-05, - "loss": 0.9339, + "learning_rate": 1.4997615324048229e-05, + "loss": 0.8782, "step": 12472 }, { - "epoch": 0.35394438138479, + "epoch": 0.3534529173396809, "grad_norm": 0.0, - "learning_rate": 1.4983275853451176e-05, - "loss": 0.8722, + "learning_rate": 1.4996820347861834e-05, + "loss": 1.0087, "step": 12473 }, { - "epoch": 0.3539727582292849, + "epoch": 0.35348125478194337, "grad_norm": 0.0, - "learning_rate": 1.4982479001993311e-05, - "loss": 0.8822, + "learning_rate": 1.4996025329585368e-05, + "loss": 0.9994, "step": 12474 }, { - "epoch": 0.3540011350737798, + "epoch": 0.35350959222420586, "grad_norm": 0.0, - "learning_rate": 1.4981682108448225e-05, - "loss": 1.0213, + "learning_rate": 1.4995230269225526e-05, + "loss": 0.8576, "step": 12475 }, { - "epoch": 0.3540295119182747, + "epoch": 0.3535379296664683, "grad_norm": 0.0, - "learning_rate": 1.4980885172822648e-05, - "loss": 0.9149, + "learning_rate": 1.4994435166789006e-05, + "loss": 0.8486, "step": 12476 }, { - "epoch": 0.35405788876276956, + "epoch": 0.3535662671087308, "grad_norm": 0.0, - "learning_rate": 1.4980088195123306e-05, - "loss": 1.017, + "learning_rate": 1.4993640022282504e-05, + "loss": 0.9511, "step": 12477 }, { - "epoch": 0.35408626560726447, + "epoch": 0.35359460455099323, "grad_norm": 0.0, - "learning_rate": 1.4979291175356934e-05, - "loss": 0.8961, + "learning_rate": 1.4992844835712715e-05, + "loss": 0.9058, "step": 12478 }, { - "epoch": 0.3541146424517594, + "epoch": 0.35362294199325567, "grad_norm": 0.0, - "learning_rate": 1.4978494113530268e-05, - "loss": 1.0319, + "learning_rate": 1.499204960708634e-05, + "loss": 0.9693, "step": 12479 }, { - "epoch": 0.35414301929625425, + "epoch": 0.35365127943551816, "grad_norm": 0.0, - "learning_rate": 1.497769700965004e-05, - "loss": 0.9322, + "learning_rate": 1.499125433641008e-05, + "loss": 0.8618, "step": 12480 }, { - "epoch": 0.35417139614074916, + "epoch": 0.3536796168777806, "grad_norm": 0.0, - "learning_rate": 1.497689986372298e-05, - "loss": 0.9074, + "learning_rate": 1.4990459023690628e-05, + "loss": 0.9279, "step": 12481 }, { - "epoch": 0.354199772985244, + "epoch": 0.3537079543200431, "grad_norm": 0.0, - "learning_rate": 1.4976102675755824e-05, - "loss": 0.8972, + "learning_rate": 1.498966366893469e-05, + "loss": 0.983, "step": 12482 }, { - "epoch": 0.35422814982973894, + "epoch": 0.35373629176230553, "grad_norm": 0.0, - "learning_rate": 1.4975305445755306e-05, - "loss": 0.8662, + "learning_rate": 1.4988868272148959e-05, + "loss": 0.8999, "step": 12483 }, { - "epoch": 0.35425652667423385, + "epoch": 0.35376462920456797, "grad_norm": 0.0, - "learning_rate": 1.4974508173728156e-05, - "loss": 0.9613, + "learning_rate": 1.498807283334014e-05, + "loss": 1.0083, "step": 12484 }, { - "epoch": 0.3542849035187287, + "epoch": 0.35379296664683046, "grad_norm": 0.0, - "learning_rate": 1.4973710859681112e-05, - "loss": 0.9371, + "learning_rate": 1.4987277352514933e-05, + "loss": 0.8878, "step": 12485 }, { - "epoch": 0.3543132803632236, + "epoch": 0.3538213040890929, "grad_norm": 0.0, - "learning_rate": 1.4972913503620912e-05, - "loss": 0.9525, + "learning_rate": 1.4986481829680033e-05, + "loss": 0.9597, "step": 12486 }, { - "epoch": 0.3543416572077185, + "epoch": 0.3538496415313554, "grad_norm": 0.0, - "learning_rate": 1.4972116105554287e-05, - "loss": 0.9745, + "learning_rate": 1.4985686264842145e-05, + "loss": 1.0198, "step": 12487 }, { - "epoch": 0.3543700340522134, + "epoch": 0.35387797897361783, "grad_norm": 0.0, - "learning_rate": 1.4971318665487974e-05, - "loss": 0.9583, + "learning_rate": 1.498489065800797e-05, + "loss": 0.9684, "step": 12488 }, { - "epoch": 0.35439841089670826, + "epoch": 0.35390631641588033, "grad_norm": 0.0, - "learning_rate": 1.4970521183428706e-05, - "loss": 0.9039, + "learning_rate": 1.4984095009184215e-05, + "loss": 0.9905, "step": 12489 }, { - "epoch": 0.3544267877412032, + "epoch": 0.35393465385814277, "grad_norm": 0.0, - "learning_rate": 1.4969723659383224e-05, - "loss": 0.8039, + "learning_rate": 1.4983299318377574e-05, + "loss": 0.8382, "step": 12490 }, { - "epoch": 0.3544551645856981, + "epoch": 0.3539629913004052, "grad_norm": 0.0, - "learning_rate": 1.4968926093358263e-05, - "loss": 0.9363, + "learning_rate": 1.4982503585594748e-05, + "loss": 0.9242, "step": 12491 }, { - "epoch": 0.35448354143019295, + "epoch": 0.3539913287426677, "grad_norm": 0.0, - "learning_rate": 1.4968128485360558e-05, - "loss": 0.9429, + "learning_rate": 1.4981707810842448e-05, + "loss": 0.9173, "step": 12492 }, { - "epoch": 0.35451191827468786, + "epoch": 0.35401966618493014, "grad_norm": 0.0, - "learning_rate": 1.4967330835396851e-05, - "loss": 0.8681, + "learning_rate": 1.4980911994127372e-05, + "loss": 0.8521, "step": 12493 }, { - "epoch": 0.3545402951191827, + "epoch": 0.35404800362719263, "grad_norm": 0.0, - "learning_rate": 1.4966533143473876e-05, - "loss": 0.9812, + "learning_rate": 1.4980116135456228e-05, + "loss": 0.8878, "step": 12494 }, { - "epoch": 0.35456867196367764, + "epoch": 0.35407634106945507, "grad_norm": 0.0, - "learning_rate": 1.4965735409598373e-05, - "loss": 0.93, + "learning_rate": 1.4979320234835713e-05, + "loss": 1.0935, "step": 12495 }, { - "epoch": 0.35459704880817255, + "epoch": 0.3541046785117175, "grad_norm": 0.0, - "learning_rate": 1.4964937633777079e-05, - "loss": 1.0078, + "learning_rate": 1.4978524292272537e-05, + "loss": 0.8258, "step": 12496 }, { - "epoch": 0.3546254256526674, + "epoch": 0.35413301595398, "grad_norm": 0.0, - "learning_rate": 1.4964139816016737e-05, - "loss": 0.8735, + "learning_rate": 1.49777283077734e-05, + "loss": 0.9269, "step": 12497 }, { - "epoch": 0.35465380249716233, + "epoch": 0.35416135339624244, "grad_norm": 0.0, - "learning_rate": 1.4963341956324077e-05, - "loss": 0.9096, + "learning_rate": 1.4976932281345009e-05, + "loss": 0.9595, "step": 12498 }, { - "epoch": 0.3546821793416572, + "epoch": 0.35418969083850493, "grad_norm": 0.0, - "learning_rate": 1.4962544054705848e-05, - "loss": 0.7929, + "learning_rate": 1.497613621299407e-05, + "loss": 0.9813, "step": 12499 }, { - "epoch": 0.3547105561861521, + "epoch": 0.35421802828076737, "grad_norm": 0.0, - "learning_rate": 1.4961746111168785e-05, - "loss": 0.8577, + "learning_rate": 1.4975340102727287e-05, + "loss": 0.9186, "step": 12500 }, { - "epoch": 0.354738933030647, + "epoch": 0.35424636572302987, "grad_norm": 0.0, - "learning_rate": 1.496094812571963e-05, - "loss": 0.9965, + "learning_rate": 1.4974543950551371e-05, + "loss": 1.0547, "step": 12501 }, { - "epoch": 0.3547673098751419, + "epoch": 0.3542747031652923, "grad_norm": 0.0, - "learning_rate": 1.496015009836512e-05, - "loss": 0.9391, + "learning_rate": 1.497374775647302e-05, + "loss": 0.8641, "step": 12502 }, { - "epoch": 0.3547956867196368, + "epoch": 0.35430304060755474, "grad_norm": 0.0, - "learning_rate": 1.4959352029112004e-05, - "loss": 0.878, + "learning_rate": 1.4972951520498944e-05, + "loss": 0.9005, "step": 12503 }, { - "epoch": 0.35482406356413165, + "epoch": 0.35433137804981724, "grad_norm": 0.0, - "learning_rate": 1.4958553917967018e-05, - "loss": 0.9203, + "learning_rate": 1.4972155242635853e-05, + "loss": 0.97, "step": 12504 }, { - "epoch": 0.35485244040862657, + "epoch": 0.3543597154920797, "grad_norm": 0.0, - "learning_rate": 1.4957755764936903e-05, - "loss": 0.8997, + "learning_rate": 1.497135892289045e-05, + "loss": 0.9369, "step": 12505 }, { - "epoch": 0.35488081725312143, + "epoch": 0.35438805293434217, "grad_norm": 0.0, - "learning_rate": 1.4956957570028401e-05, - "loss": 1.137, + "learning_rate": 1.4970562561269447e-05, + "loss": 1.0097, "step": 12506 }, { - "epoch": 0.35490919409761634, + "epoch": 0.3544163903766046, "grad_norm": 0.0, - "learning_rate": 1.4956159333248258e-05, - "loss": 0.8054, + "learning_rate": 1.496976615777955e-05, + "loss": 0.9132, "step": 12507 }, { - "epoch": 0.35493757094211126, + "epoch": 0.35444472781886704, "grad_norm": 0.0, - "learning_rate": 1.495536105460321e-05, - "loss": 0.8929, + "learning_rate": 1.4968969712427464e-05, + "loss": 0.9386, "step": 12508 }, { - "epoch": 0.3549659477866061, + "epoch": 0.35447306526112954, "grad_norm": 0.0, - "learning_rate": 1.4954562734100008e-05, - "loss": 0.8938, + "learning_rate": 1.4968173225219904e-05, + "loss": 1.0202, "step": 12509 }, { - "epoch": 0.35499432463110103, + "epoch": 0.354501402703392, "grad_norm": 0.0, - "learning_rate": 1.4953764371745392e-05, - "loss": 1.0594, + "learning_rate": 1.4967376696163575e-05, + "loss": 0.9117, "step": 12510 }, { - "epoch": 0.3550227014755959, + "epoch": 0.35452974014565447, "grad_norm": 0.0, - "learning_rate": 1.4952965967546106e-05, - "loss": 0.8786, + "learning_rate": 1.4966580125265187e-05, + "loss": 0.9609, "step": 12511 }, { - "epoch": 0.3550510783200908, + "epoch": 0.3545580775879169, "grad_norm": 0.0, - "learning_rate": 1.495216752150889e-05, - "loss": 1.0092, + "learning_rate": 1.4965783512531449e-05, + "loss": 0.9468, "step": 12512 }, { - "epoch": 0.3550794551645857, + "epoch": 0.3545864150301794, "grad_norm": 0.0, - "learning_rate": 1.4951369033640497e-05, - "loss": 0.9255, + "learning_rate": 1.4964986857969074e-05, + "loss": 0.9552, "step": 12513 }, { - "epoch": 0.3551078320090806, + "epoch": 0.35461475247244184, "grad_norm": 0.0, - "learning_rate": 1.4950570503947668e-05, - "loss": 0.893, + "learning_rate": 1.4964190161584773e-05, + "loss": 0.9756, "step": 12514 }, { - "epoch": 0.3551362088535755, + "epoch": 0.3546430899147043, "grad_norm": 0.0, - "learning_rate": 1.494977193243714e-05, - "loss": 0.9891, + "learning_rate": 1.4963393423385254e-05, + "loss": 1.0095, "step": 12515 }, { - "epoch": 0.35516458569807036, + "epoch": 0.3546714273569668, "grad_norm": 0.0, - "learning_rate": 1.4948973319115671e-05, - "loss": 1.0713, + "learning_rate": 1.4962596643377225e-05, + "loss": 1.0307, "step": 12516 }, { - "epoch": 0.35519296254256527, + "epoch": 0.3546997647992292, "grad_norm": 0.0, - "learning_rate": 1.4948174663990002e-05, - "loss": 0.9272, + "learning_rate": 1.4961799821567407e-05, + "loss": 0.9579, "step": 12517 }, { - "epoch": 0.3552213393870602, + "epoch": 0.3547281022414917, "grad_norm": 0.0, - "learning_rate": 1.494737596706688e-05, - "loss": 0.9585, + "learning_rate": 1.4961002957962503e-05, + "loss": 1.05, "step": 12518 }, { - "epoch": 0.35524971623155505, + "epoch": 0.35475643968375414, "grad_norm": 0.0, - "learning_rate": 1.4946577228353048e-05, - "loss": 0.8957, + "learning_rate": 1.496020605256923e-05, + "loss": 0.9237, "step": 12519 }, { - "epoch": 0.35527809307604996, + "epoch": 0.3547847771260166, "grad_norm": 0.0, - "learning_rate": 1.4945778447855259e-05, - "loss": 0.9668, + "learning_rate": 1.4959409105394298e-05, + "loss": 1.0036, "step": 12520 }, { - "epoch": 0.3553064699205448, + "epoch": 0.3548131145682791, "grad_norm": 0.0, - "learning_rate": 1.4944979625580253e-05, - "loss": 0.9919, + "learning_rate": 1.4958612116444427e-05, + "loss": 1.0678, "step": 12521 }, { - "epoch": 0.35533484676503974, + "epoch": 0.3548414520105415, "grad_norm": 0.0, - "learning_rate": 1.4944180761534785e-05, - "loss": 0.8218, + "learning_rate": 1.495781508572632e-05, + "loss": 0.9538, "step": 12522 }, { - "epoch": 0.3553632236095346, + "epoch": 0.354869789452804, "grad_norm": 0.0, - "learning_rate": 1.4943381855725599e-05, - "loss": 0.9418, + "learning_rate": 1.4957018013246698e-05, + "loss": 1.0814, "step": 12523 }, { - "epoch": 0.3553916004540295, + "epoch": 0.35489812689506645, "grad_norm": 0.0, - "learning_rate": 1.4942582908159446e-05, - "loss": 0.9661, + "learning_rate": 1.4956220899012268e-05, + "loss": 0.9874, "step": 12524 }, { - "epoch": 0.3554199772985244, + "epoch": 0.35492646433732894, "grad_norm": 0.0, - "learning_rate": 1.4941783918843069e-05, - "loss": 0.9653, + "learning_rate": 1.4955423743029751e-05, + "loss": 1.0551, "step": 12525 }, { - "epoch": 0.3554483541430193, + "epoch": 0.3549548017795914, "grad_norm": 0.0, - "learning_rate": 1.4940984887783226e-05, - "loss": 0.9486, + "learning_rate": 1.4954626545305861e-05, + "loss": 0.8869, "step": 12526 }, { - "epoch": 0.3554767309875142, + "epoch": 0.3549831392218538, "grad_norm": 0.0, - "learning_rate": 1.4940185814986659e-05, - "loss": 1.0162, + "learning_rate": 1.4953829305847314e-05, + "loss": 0.8054, "step": 12527 }, { - "epoch": 0.35550510783200906, + "epoch": 0.3550114766641163, "grad_norm": 0.0, - "learning_rate": 1.493938670046012e-05, - "loss": 0.883, + "learning_rate": 1.4953032024660819e-05, + "loss": 0.8144, "step": 12528 }, { - "epoch": 0.355533484676504, + "epoch": 0.35503981410637875, "grad_norm": 0.0, - "learning_rate": 1.4938587544210358e-05, - "loss": 0.8602, + "learning_rate": 1.4952234701753097e-05, + "loss": 0.8163, "step": 12529 }, { - "epoch": 0.3555618615209989, + "epoch": 0.35506815154864124, "grad_norm": 0.0, - "learning_rate": 1.4937788346244126e-05, - "loss": 0.9087, + "learning_rate": 1.4951437337130865e-05, + "loss": 1.0107, "step": 12530 }, { - "epoch": 0.35559023836549375, + "epoch": 0.3550964889909037, "grad_norm": 0.0, - "learning_rate": 1.4936989106568176e-05, - "loss": 0.9063, + "learning_rate": 1.4950639930800834e-05, + "loss": 0.903, "step": 12531 }, { - "epoch": 0.35561861520998866, + "epoch": 0.3551248264331661, "grad_norm": 0.0, - "learning_rate": 1.4936189825189256e-05, - "loss": 0.8483, + "learning_rate": 1.4949842482769725e-05, + "loss": 0.9411, "step": 12532 }, { - "epoch": 0.3556469920544835, + "epoch": 0.3551531638754286, "grad_norm": 0.0, - "learning_rate": 1.4935390502114118e-05, - "loss": 1.0033, + "learning_rate": 1.4949044993044259e-05, + "loss": 0.9492, "step": 12533 }, { - "epoch": 0.35567536889897844, + "epoch": 0.35518150131769105, "grad_norm": 0.0, - "learning_rate": 1.4934591137349514e-05, - "loss": 0.9423, + "learning_rate": 1.4948247461631148e-05, + "loss": 0.959, "step": 12534 }, { - "epoch": 0.3557037457434733, + "epoch": 0.35520983875995354, "grad_norm": 0.0, - "learning_rate": 1.4933791730902199e-05, - "loss": 0.9531, + "learning_rate": 1.4947449888537109e-05, + "loss": 0.968, "step": 12535 }, { - "epoch": 0.3557321225879682, + "epoch": 0.355238176202216, "grad_norm": 0.0, - "learning_rate": 1.4932992282778923e-05, - "loss": 0.8717, + "learning_rate": 1.4946652273768864e-05, + "loss": 0.9852, "step": 12536 }, { - "epoch": 0.35576049943246313, + "epoch": 0.3552665136444785, "grad_norm": 0.0, - "learning_rate": 1.493219279298644e-05, - "loss": 1.0458, + "learning_rate": 1.4945854617333129e-05, + "loss": 0.857, "step": 12537 }, { - "epoch": 0.355788876276958, + "epoch": 0.3552948510867409, "grad_norm": 0.0, - "learning_rate": 1.4931393261531499e-05, - "loss": 1.0298, + "learning_rate": 1.4945056919236622e-05, + "loss": 0.8895, "step": 12538 }, { - "epoch": 0.3558172531214529, + "epoch": 0.35532318852900335, "grad_norm": 0.0, - "learning_rate": 1.493059368842086e-05, - "loss": 0.9688, + "learning_rate": 1.4944259179486068e-05, + "loss": 1.032, "step": 12539 }, { - "epoch": 0.35584562996594776, + "epoch": 0.35535152597126585, "grad_norm": 0.0, - "learning_rate": 1.4929794073661274e-05, - "loss": 0.9772, + "learning_rate": 1.4943461398088182e-05, + "loss": 1.0034, "step": 12540 }, { - "epoch": 0.3558740068104427, + "epoch": 0.3553798634135283, "grad_norm": 0.0, - "learning_rate": 1.4928994417259496e-05, - "loss": 0.8847, + "learning_rate": 1.4942663575049683e-05, + "loss": 0.9028, "step": 12541 }, { - "epoch": 0.3559023836549376, + "epoch": 0.3554082008557908, "grad_norm": 0.0, - "learning_rate": 1.492819471922228e-05, - "loss": 0.9833, + "learning_rate": 1.4941865710377298e-05, + "loss": 1.0059, "step": 12542 }, { - "epoch": 0.35593076049943245, + "epoch": 0.3554365382980532, "grad_norm": 0.0, - "learning_rate": 1.4927394979556384e-05, - "loss": 0.8823, + "learning_rate": 1.4941067804077738e-05, + "loss": 1.0125, "step": 12543 }, { - "epoch": 0.35595913734392737, + "epoch": 0.35546487574031566, "grad_norm": 0.0, - "learning_rate": 1.4926595198268561e-05, - "loss": 0.8962, + "learning_rate": 1.494026985615773e-05, + "loss": 1.0138, "step": 12544 }, { - "epoch": 0.3559875141884222, + "epoch": 0.35549321318257815, "grad_norm": 0.0, - "learning_rate": 1.4925795375365564e-05, - "loss": 0.7679, + "learning_rate": 1.4939471866623993e-05, + "loss": 0.9894, "step": 12545 }, { - "epoch": 0.35601589103291714, + "epoch": 0.3555215506248406, "grad_norm": 0.0, - "learning_rate": 1.4924995510854153e-05, - "loss": 1.0043, + "learning_rate": 1.4938673835483254e-05, + "loss": 0.9823, "step": 12546 }, { - "epoch": 0.35604426787741206, + "epoch": 0.3555498880671031, "grad_norm": 0.0, - "learning_rate": 1.4924195604741085e-05, - "loss": 1.022, + "learning_rate": 1.4937875762742232e-05, + "loss": 0.8622, "step": 12547 }, { - "epoch": 0.3560726447219069, + "epoch": 0.3555782255093655, "grad_norm": 0.0, - "learning_rate": 1.4923395657033113e-05, - "loss": 1.0923, + "learning_rate": 1.4937077648407645e-05, + "loss": 1.0002, "step": 12548 }, { - "epoch": 0.35610102156640183, + "epoch": 0.355606562951628, "grad_norm": 0.0, - "learning_rate": 1.4922595667736999e-05, - "loss": 0.8751, + "learning_rate": 1.4936279492486222e-05, + "loss": 1.0066, "step": 12549 }, { - "epoch": 0.3561293984108967, + "epoch": 0.35563490039389045, "grad_norm": 0.0, - "learning_rate": 1.4921795636859497e-05, - "loss": 1.0258, + "learning_rate": 1.4935481294984681e-05, + "loss": 0.8404, "step": 12550 }, { - "epoch": 0.3561577752553916, + "epoch": 0.3556632378361529, "grad_norm": 0.0, - "learning_rate": 1.4920995564407368e-05, - "loss": 0.9367, + "learning_rate": 1.4934683055909751e-05, + "loss": 0.8789, "step": 12551 }, { - "epoch": 0.35618615209988647, + "epoch": 0.3556915752784154, "grad_norm": 0.0, - "learning_rate": 1.4920195450387365e-05, - "loss": 0.952, + "learning_rate": 1.4933884775268153e-05, + "loss": 0.8303, "step": 12552 }, { - "epoch": 0.3562145289443814, + "epoch": 0.3557199127206778, "grad_norm": 0.0, - "learning_rate": 1.491939529480625e-05, - "loss": 0.9581, + "learning_rate": 1.493308645306661e-05, + "loss": 0.8676, "step": 12553 }, { - "epoch": 0.3562429057888763, + "epoch": 0.3557482501629403, "grad_norm": 0.0, - "learning_rate": 1.4918595097670782e-05, - "loss": 0.8901, + "learning_rate": 1.4932288089311848e-05, + "loss": 0.9278, "step": 12554 }, { - "epoch": 0.35627128263337116, + "epoch": 0.35577658760520275, "grad_norm": 0.0, - "learning_rate": 1.4917794858987721e-05, - "loss": 0.9492, + "learning_rate": 1.4931489684010593e-05, + "loss": 0.7941, "step": 12555 }, { - "epoch": 0.35629965947786607, + "epoch": 0.3558049250474652, "grad_norm": 0.0, - "learning_rate": 1.4916994578763826e-05, - "loss": 0.8675, + "learning_rate": 1.493069123716957e-05, + "loss": 0.9755, "step": 12556 }, { - "epoch": 0.35632803632236093, + "epoch": 0.3558332624897277, "grad_norm": 0.0, - "learning_rate": 1.4916194257005857e-05, - "loss": 0.861, + "learning_rate": 1.4929892748795503e-05, + "loss": 0.936, "step": 12557 }, { - "epoch": 0.35635641316685585, + "epoch": 0.3558615999319901, "grad_norm": 0.0, - "learning_rate": 1.4915393893720575e-05, - "loss": 1.0075, + "learning_rate": 1.4929094218895117e-05, + "loss": 0.9711, "step": 12558 }, { - "epoch": 0.35638479001135076, + "epoch": 0.3558899373742526, "grad_norm": 0.0, - "learning_rate": 1.4914593488914738e-05, - "loss": 0.9234, + "learning_rate": 1.4928295647475141e-05, + "loss": 0.9492, "step": 12559 }, { - "epoch": 0.3564131668558456, + "epoch": 0.35591827481651506, "grad_norm": 0.0, - "learning_rate": 1.4913793042595109e-05, - "loss": 0.8635, + "learning_rate": 1.4927497034542303e-05, + "loss": 0.8193, "step": 12560 }, { - "epoch": 0.35644154370034054, + "epoch": 0.3559466122587775, "grad_norm": 0.0, - "learning_rate": 1.4912992554768448e-05, - "loss": 0.9814, + "learning_rate": 1.4926698380103323e-05, + "loss": 0.9854, "step": 12561 }, { - "epoch": 0.3564699205448354, + "epoch": 0.35597494970104, "grad_norm": 0.0, - "learning_rate": 1.4912192025441518e-05, - "loss": 0.9714, + "learning_rate": 1.4925899684164937e-05, + "loss": 0.9687, "step": 12562 }, { - "epoch": 0.3564982973893303, + "epoch": 0.3560032871433024, "grad_norm": 0.0, - "learning_rate": 1.4911391454621085e-05, - "loss": 0.9393, + "learning_rate": 1.4925100946733869e-05, + "loss": 0.995, "step": 12563 }, { - "epoch": 0.3565266742338252, + "epoch": 0.3560316245855649, "grad_norm": 0.0, - "learning_rate": 1.4910590842313908e-05, - "loss": 0.9222, + "learning_rate": 1.4924302167816845e-05, + "loss": 0.8982, "step": 12564 }, { - "epoch": 0.3565550510783201, + "epoch": 0.35605996202782736, "grad_norm": 0.0, - "learning_rate": 1.4909790188526747e-05, - "loss": 0.9785, + "learning_rate": 1.4923503347420596e-05, + "loss": 0.9962, "step": 12565 }, { - "epoch": 0.356583427922815, + "epoch": 0.35608829947008985, "grad_norm": 0.0, - "learning_rate": 1.4908989493266366e-05, - "loss": 0.9423, + "learning_rate": 1.4922704485551847e-05, + "loss": 0.8992, "step": 12566 }, { - "epoch": 0.35661180476730986, + "epoch": 0.3561166369123523, "grad_norm": 0.0, - "learning_rate": 1.4908188756539533e-05, - "loss": 0.935, + "learning_rate": 1.4921905582217333e-05, + "loss": 0.9803, "step": 12567 }, { - "epoch": 0.3566401816118048, + "epoch": 0.35614497435461473, "grad_norm": 0.0, - "learning_rate": 1.4907387978353007e-05, - "loss": 0.9653, + "learning_rate": 1.4921106637423782e-05, + "loss": 1.0065, "step": 12568 }, { - "epoch": 0.35666855845629963, + "epoch": 0.3561733117968772, "grad_norm": 0.0, - "learning_rate": 1.4906587158713552e-05, - "loss": 0.9702, + "learning_rate": 1.4920307651177921e-05, + "loss": 0.9779, "step": 12569 }, { - "epoch": 0.35669693530079455, + "epoch": 0.35620164923913966, "grad_norm": 0.0, - "learning_rate": 1.4905786297627937e-05, - "loss": 0.9947, + "learning_rate": 1.491950862348648e-05, + "loss": 0.9143, "step": 12570 }, { - "epoch": 0.35672531214528946, + "epoch": 0.35622998668140216, "grad_norm": 0.0, - "learning_rate": 1.4904985395102927e-05, - "loss": 0.9487, + "learning_rate": 1.4918709554356192e-05, + "loss": 0.8479, "step": 12571 }, { - "epoch": 0.3567536889897843, + "epoch": 0.3562583241236646, "grad_norm": 0.0, - "learning_rate": 1.490418445114528e-05, - "loss": 0.9324, + "learning_rate": 1.4917910443793786e-05, + "loss": 0.9832, "step": 12572 }, { - "epoch": 0.35678206583427924, + "epoch": 0.35628666156592703, "grad_norm": 0.0, - "learning_rate": 1.4903383465761771e-05, - "loss": 0.9723, + "learning_rate": 1.4917111291805996e-05, + "loss": 0.904, "step": 12573 }, { - "epoch": 0.3568104426787741, + "epoch": 0.3563149990081895, "grad_norm": 0.0, - "learning_rate": 1.4902582438959161e-05, - "loss": 1.0097, + "learning_rate": 1.491631209839955e-05, + "loss": 1.0438, "step": 12574 }, { - "epoch": 0.356838819523269, + "epoch": 0.35634333645045196, "grad_norm": 0.0, - "learning_rate": 1.4901781370744215e-05, - "loss": 1.0099, + "learning_rate": 1.4915512863581185e-05, + "loss": 0.9141, "step": 12575 }, { - "epoch": 0.35686719636776393, + "epoch": 0.35637167389271446, "grad_norm": 0.0, - "learning_rate": 1.49009802611237e-05, - "loss": 0.9172, + "learning_rate": 1.4914713587357628e-05, + "loss": 0.8863, "step": 12576 }, { - "epoch": 0.3568955732122588, + "epoch": 0.3564000113349769, "grad_norm": 0.0, - "learning_rate": 1.4900179110104387e-05, - "loss": 1.0687, + "learning_rate": 1.491391426973561e-05, + "loss": 0.895, "step": 12577 }, { - "epoch": 0.3569239500567537, + "epoch": 0.3564283487772394, "grad_norm": 0.0, - "learning_rate": 1.4899377917693041e-05, - "loss": 1.0618, + "learning_rate": 1.4913114910721869e-05, + "loss": 0.9915, "step": 12578 }, { - "epoch": 0.35695232690124856, + "epoch": 0.35645668621950183, "grad_norm": 0.0, - "learning_rate": 1.4898576683896427e-05, - "loss": 0.9304, + "learning_rate": 1.4912315510323138e-05, + "loss": 1.0413, "step": 12579 }, { - "epoch": 0.3569807037457435, + "epoch": 0.35648502366176427, "grad_norm": 0.0, - "learning_rate": 1.4897775408721318e-05, - "loss": 0.8425, + "learning_rate": 1.491151606854615e-05, + "loss": 0.9396, "step": 12580 }, { - "epoch": 0.3570090805902384, + "epoch": 0.35651336110402676, "grad_norm": 0.0, - "learning_rate": 1.4896974092174481e-05, - "loss": 1.0195, + "learning_rate": 1.491071658539764e-05, + "loss": 1.0767, "step": 12581 }, { - "epoch": 0.35703745743473325, + "epoch": 0.3565416985462892, "grad_norm": 0.0, - "learning_rate": 1.4896172734262679e-05, - "loss": 0.842, + "learning_rate": 1.4909917060884336e-05, + "loss": 1.0459, "step": 12582 }, { - "epoch": 0.35706583427922817, + "epoch": 0.3565700359885517, "grad_norm": 0.0, - "learning_rate": 1.489537133499269e-05, - "loss": 0.8395, + "learning_rate": 1.4909117495012979e-05, + "loss": 1.0636, "step": 12583 }, { - "epoch": 0.357094211123723, + "epoch": 0.35659837343081413, "grad_norm": 0.0, - "learning_rate": 1.4894569894371276e-05, - "loss": 0.9179, + "learning_rate": 1.4908317887790301e-05, + "loss": 0.8696, "step": 12584 }, { - "epoch": 0.35712258796821794, + "epoch": 0.35662671087307657, "grad_norm": 0.0, - "learning_rate": 1.4893768412405214e-05, - "loss": 0.8899, + "learning_rate": 1.4907518239223043e-05, + "loss": 0.9684, "step": 12585 }, { - "epoch": 0.3571509648127128, + "epoch": 0.35665504831533906, "grad_norm": 0.0, - "learning_rate": 1.4892966889101265e-05, - "loss": 0.9915, + "learning_rate": 1.4906718549317934e-05, + "loss": 0.9219, "step": 12586 }, { - "epoch": 0.3571793416572077, + "epoch": 0.3566833857576015, "grad_norm": 0.0, - "learning_rate": 1.489216532446621e-05, - "loss": 0.9284, + "learning_rate": 1.4905918818081713e-05, + "loss": 1.0227, "step": 12587 }, { - "epoch": 0.35720771850170263, + "epoch": 0.356711723199864, "grad_norm": 0.0, - "learning_rate": 1.4891363718506812e-05, - "loss": 0.8703, + "learning_rate": 1.4905119045521115e-05, + "loss": 0.9771, "step": 12588 }, { - "epoch": 0.3572360953461975, + "epoch": 0.35674006064212643, "grad_norm": 0.0, - "learning_rate": 1.4890562071229844e-05, - "loss": 0.979, + "learning_rate": 1.4904319231642878e-05, + "loss": 0.9324, "step": 12589 }, { - "epoch": 0.3572644721906924, + "epoch": 0.3567683980843889, "grad_norm": 0.0, - "learning_rate": 1.488976038264208e-05, - "loss": 1.1383, + "learning_rate": 1.4903519376453738e-05, + "loss": 0.9455, "step": 12590 }, { - "epoch": 0.35729284903518727, + "epoch": 0.35679673552665137, "grad_norm": 0.0, - "learning_rate": 1.4888958652750292e-05, - "loss": 0.8967, + "learning_rate": 1.4902719479960434e-05, + "loss": 0.8857, "step": 12591 }, { - "epoch": 0.3573212258796822, + "epoch": 0.3568250729689138, "grad_norm": 0.0, - "learning_rate": 1.4888156881561246e-05, - "loss": 1.0821, + "learning_rate": 1.4901919542169707e-05, + "loss": 0.9534, "step": 12592 }, { - "epoch": 0.3573496027241771, + "epoch": 0.3568534104111763, "grad_norm": 0.0, - "learning_rate": 1.4887355069081721e-05, - "loss": 0.9548, + "learning_rate": 1.4901119563088288e-05, + "loss": 0.9235, "step": 12593 }, { - "epoch": 0.35737797956867196, + "epoch": 0.35688174785343874, "grad_norm": 0.0, - "learning_rate": 1.488655321531849e-05, - "loss": 0.8625, + "learning_rate": 1.4900319542722921e-05, + "loss": 1.0082, "step": 12594 }, { - "epoch": 0.35740635641316687, + "epoch": 0.35691008529570123, "grad_norm": 0.0, - "learning_rate": 1.4885751320278323e-05, - "loss": 0.9667, + "learning_rate": 1.489951948108034e-05, + "loss": 0.9421, "step": 12595 }, { - "epoch": 0.35743473325766173, + "epoch": 0.35693842273796367, "grad_norm": 0.0, - "learning_rate": 1.4884949383967993e-05, - "loss": 0.8236, + "learning_rate": 1.4898719378167287e-05, + "loss": 0.9925, "step": 12596 }, { - "epoch": 0.35746311010215664, + "epoch": 0.3569667601802261, "grad_norm": 0.0, - "learning_rate": 1.4884147406394277e-05, - "loss": 0.8411, + "learning_rate": 1.4897919233990502e-05, + "loss": 0.8455, "step": 12597 }, { - "epoch": 0.35749148694665156, + "epoch": 0.3569950976224886, "grad_norm": 0.0, - "learning_rate": 1.4883345387563952e-05, - "loss": 0.8875, + "learning_rate": 1.4897119048556728e-05, + "loss": 0.9772, "step": 12598 }, { - "epoch": 0.3575198637911464, + "epoch": 0.35702343506475104, "grad_norm": 0.0, - "learning_rate": 1.4882543327483784e-05, - "loss": 1.0917, + "learning_rate": 1.4896318821872697e-05, + "loss": 0.9307, "step": 12599 }, { - "epoch": 0.35754824063564133, + "epoch": 0.35705177250701353, "grad_norm": 0.0, - "learning_rate": 1.4881741226160556e-05, - "loss": 0.8853, + "learning_rate": 1.4895518553945159e-05, + "loss": 0.981, "step": 12600 }, { - "epoch": 0.3575766174801362, + "epoch": 0.35708010994927597, "grad_norm": 0.0, - "learning_rate": 1.4880939083601037e-05, - "loss": 0.9456, + "learning_rate": 1.4894718244780845e-05, + "loss": 0.9609, "step": 12601 }, { - "epoch": 0.3576049943246311, + "epoch": 0.35710844739153846, "grad_norm": 0.0, - "learning_rate": 1.4880136899812013e-05, - "loss": 0.8391, + "learning_rate": 1.4893917894386505e-05, + "loss": 0.914, "step": 12602 }, { - "epoch": 0.35763337116912597, + "epoch": 0.3571367848338009, "grad_norm": 0.0, - "learning_rate": 1.4879334674800246e-05, - "loss": 1.0014, + "learning_rate": 1.4893117502768877e-05, + "loss": 0.8939, "step": 12603 }, { - "epoch": 0.3576617480136209, + "epoch": 0.35716512227606334, "grad_norm": 0.0, - "learning_rate": 1.4878532408572523e-05, - "loss": 0.8521, + "learning_rate": 1.4892317069934702e-05, + "loss": 0.9507, "step": 12604 }, { - "epoch": 0.3576901248581158, + "epoch": 0.35719345971832583, "grad_norm": 0.0, - "learning_rate": 1.4877730101135619e-05, - "loss": 0.8228, + "learning_rate": 1.4891516595890724e-05, + "loss": 0.8882, "step": 12605 }, { - "epoch": 0.35771850170261066, + "epoch": 0.3572217971605883, "grad_norm": 0.0, - "learning_rate": 1.4876927752496303e-05, - "loss": 0.9796, + "learning_rate": 1.4890716080643688e-05, + "loss": 0.9442, "step": 12606 }, { - "epoch": 0.3577468785471056, + "epoch": 0.35725013460285077, "grad_norm": 0.0, - "learning_rate": 1.4876125362661364e-05, - "loss": 1.0476, + "learning_rate": 1.4889915524200331e-05, + "loss": 1.0018, "step": 12607 }, { - "epoch": 0.35777525539160043, + "epoch": 0.3572784720451132, "grad_norm": 0.0, - "learning_rate": 1.4875322931637574e-05, - "loss": 1.0331, + "learning_rate": 1.4889114926567402e-05, + "loss": 0.9525, "step": 12608 }, { - "epoch": 0.35780363223609535, + "epoch": 0.35730680948737564, "grad_norm": 0.0, - "learning_rate": 1.4874520459431713e-05, - "loss": 0.8751, + "learning_rate": 1.488831428775164e-05, + "loss": 0.8616, "step": 12609 }, { - "epoch": 0.35783200908059026, + "epoch": 0.35733514692963814, "grad_norm": 0.0, - "learning_rate": 1.487371794605056e-05, - "loss": 0.8454, + "learning_rate": 1.4887513607759794e-05, + "loss": 0.8571, "step": 12610 }, { - "epoch": 0.3578603859250851, + "epoch": 0.3573634843719006, "grad_norm": 0.0, - "learning_rate": 1.487291539150089e-05, - "loss": 0.8894, + "learning_rate": 1.4886712886598602e-05, + "loss": 0.7686, "step": 12611 }, { - "epoch": 0.35788876276958004, + "epoch": 0.35739182181416307, "grad_norm": 0.0, - "learning_rate": 1.4872112795789486e-05, - "loss": 0.965, + "learning_rate": 1.4885912124274818e-05, + "loss": 0.9153, "step": 12612 }, { - "epoch": 0.3579171396140749, + "epoch": 0.3574201592564255, "grad_norm": 0.0, - "learning_rate": 1.4871310158923122e-05, - "loss": 0.9837, + "learning_rate": 1.488511132079518e-05, + "loss": 0.9714, "step": 12613 }, { - "epoch": 0.3579455164585698, + "epoch": 0.357448496698688, "grad_norm": 0.0, - "learning_rate": 1.4870507480908587e-05, - "loss": 1.0385, + "learning_rate": 1.4884310476166435e-05, + "loss": 1.0502, "step": 12614 }, { - "epoch": 0.35797389330306467, + "epoch": 0.35747683414095044, "grad_norm": 0.0, - "learning_rate": 1.4869704761752654e-05, - "loss": 0.8383, + "learning_rate": 1.4883509590395326e-05, + "loss": 1.0028, "step": 12615 }, { - "epoch": 0.3580022701475596, + "epoch": 0.3575051715832129, "grad_norm": 0.0, - "learning_rate": 1.4868902001462107e-05, - "loss": 0.7949, + "learning_rate": 1.4882708663488605e-05, + "loss": 0.9272, "step": 12616 }, { - "epoch": 0.3580306469920545, + "epoch": 0.35753350902547537, "grad_norm": 0.0, - "learning_rate": 1.4868099200043723e-05, - "loss": 0.914, + "learning_rate": 1.4881907695453018e-05, + "loss": 0.8873, "step": 12617 }, { - "epoch": 0.35805902383654936, + "epoch": 0.3575618464677378, "grad_norm": 0.0, - "learning_rate": 1.4867296357504292e-05, - "loss": 0.9231, + "learning_rate": 1.4881106686295305e-05, + "loss": 0.8226, "step": 12618 }, { - "epoch": 0.3580874006810443, + "epoch": 0.3575901839100003, "grad_norm": 0.0, - "learning_rate": 1.4866493473850586e-05, - "loss": 1.0289, + "learning_rate": 1.4880305636022221e-05, + "loss": 0.8934, "step": 12619 }, { - "epoch": 0.35811577752553914, + "epoch": 0.35761852135226274, "grad_norm": 0.0, - "learning_rate": 1.4865690549089391e-05, - "loss": 0.91, + "learning_rate": 1.4879504544640511e-05, + "loss": 1.0323, "step": 12620 }, { - "epoch": 0.35814415437003405, + "epoch": 0.3576468587945252, "grad_norm": 0.0, - "learning_rate": 1.4864887583227492e-05, - "loss": 0.9669, + "learning_rate": 1.487870341215692e-05, + "loss": 0.9248, "step": 12621 }, { - "epoch": 0.35817253121452897, + "epoch": 0.3576751962367877, "grad_norm": 0.0, - "learning_rate": 1.4864084576271666e-05, - "loss": 0.8911, + "learning_rate": 1.48779022385782e-05, + "loss": 1.1069, "step": 12622 }, { - "epoch": 0.3582009080590238, + "epoch": 0.3577035336790501, "grad_norm": 0.0, - "learning_rate": 1.4863281528228698e-05, - "loss": 0.886, + "learning_rate": 1.4877101023911098e-05, + "loss": 0.9811, "step": 12623 }, { - "epoch": 0.35822928490351874, + "epoch": 0.3577318711213126, "grad_norm": 0.0, - "learning_rate": 1.4862478439105372e-05, - "loss": 1.0027, + "learning_rate": 1.4876299768162361e-05, + "loss": 0.8651, "step": 12624 }, { - "epoch": 0.3582576617480136, + "epoch": 0.35776020856357504, "grad_norm": 0.0, - "learning_rate": 1.4861675308908476e-05, - "loss": 0.9277, + "learning_rate": 1.487549847133874e-05, + "loss": 0.8614, "step": 12625 }, { - "epoch": 0.3582860385925085, + "epoch": 0.35778854600583754, "grad_norm": 0.0, - "learning_rate": 1.4860872137644786e-05, - "loss": 0.9298, + "learning_rate": 1.4874697133446988e-05, + "loss": 0.9793, "step": 12626 }, { - "epoch": 0.35831441543700343, + "epoch": 0.3578168834481, "grad_norm": 0.0, - "learning_rate": 1.4860068925321093e-05, - "loss": 0.9534, + "learning_rate": 1.4873895754493852e-05, + "loss": 1.0106, "step": 12627 }, { - "epoch": 0.3583427922814983, + "epoch": 0.3578452208903624, "grad_norm": 0.0, - "learning_rate": 1.485926567194418e-05, - "loss": 0.9191, + "learning_rate": 1.487309433448608e-05, + "loss": 0.8339, "step": 12628 }, { - "epoch": 0.3583711691259932, + "epoch": 0.3578735583326249, "grad_norm": 0.0, - "learning_rate": 1.485846237752083e-05, - "loss": 0.9306, + "learning_rate": 1.4872292873430425e-05, + "loss": 0.991, "step": 12629 }, { - "epoch": 0.35839954597048806, + "epoch": 0.35790189577488735, "grad_norm": 0.0, - "learning_rate": 1.4857659042057828e-05, - "loss": 0.8779, + "learning_rate": 1.4871491371333637e-05, + "loss": 0.852, "step": 12630 }, { - "epoch": 0.358427922814983, + "epoch": 0.35793023321714984, "grad_norm": 0.0, - "learning_rate": 1.4856855665561964e-05, - "loss": 0.9864, + "learning_rate": 1.4870689828202471e-05, + "loss": 0.9223, "step": 12631 }, { - "epoch": 0.35845629965947784, + "epoch": 0.3579585706594123, "grad_norm": 0.0, - "learning_rate": 1.4856052248040023e-05, - "loss": 0.9386, + "learning_rate": 1.4869888244043674e-05, + "loss": 1.0311, "step": 12632 }, { - "epoch": 0.35848467650397275, + "epoch": 0.3579869081016747, "grad_norm": 0.0, - "learning_rate": 1.485524878949879e-05, - "loss": 0.87, + "learning_rate": 1.4869086618864e-05, + "loss": 0.9781, "step": 12633 }, { - "epoch": 0.35851305334846767, + "epoch": 0.3580152455439372, "grad_norm": 0.0, - "learning_rate": 1.4854445289945049e-05, - "loss": 0.9501, + "learning_rate": 1.4868284952670205e-05, + "loss": 0.9305, "step": 12634 }, { - "epoch": 0.35854143019296253, + "epoch": 0.35804358298619965, "grad_norm": 0.0, - "learning_rate": 1.4853641749385595e-05, - "loss": 0.8722, + "learning_rate": 1.4867483245469031e-05, + "loss": 0.7826, "step": 12635 }, { - "epoch": 0.35856980703745744, + "epoch": 0.35807192042846214, "grad_norm": 0.0, - "learning_rate": 1.4852838167827209e-05, - "loss": 0.9913, + "learning_rate": 1.4866681497267242e-05, + "loss": 0.9232, "step": 12636 }, { - "epoch": 0.3585981838819523, + "epoch": 0.3581002578707246, "grad_norm": 0.0, - "learning_rate": 1.4852034545276681e-05, - "loss": 0.9714, + "learning_rate": 1.4865879708071589e-05, + "loss": 0.8241, "step": 12637 }, { - "epoch": 0.3586265607264472, + "epoch": 0.3581285953129871, "grad_norm": 0.0, - "learning_rate": 1.4851230881740796e-05, - "loss": 0.9552, + "learning_rate": 1.4865077877888822e-05, + "loss": 0.8832, "step": 12638 }, { - "epoch": 0.35865493757094213, + "epoch": 0.3581569327552495, "grad_norm": 0.0, - "learning_rate": 1.485042717722635e-05, - "loss": 0.9641, + "learning_rate": 1.48642760067257e-05, + "loss": 0.8827, "step": 12639 }, { - "epoch": 0.358683314415437, + "epoch": 0.35818527019751195, "grad_norm": 0.0, - "learning_rate": 1.4849623431740126e-05, - "loss": 1.0843, + "learning_rate": 1.486347409458897e-05, + "loss": 0.8248, "step": 12640 }, { - "epoch": 0.3587116912599319, + "epoch": 0.35821360763977445, "grad_norm": 0.0, - "learning_rate": 1.4848819645288915e-05, - "loss": 0.9934, + "learning_rate": 1.4862672141485396e-05, + "loss": 0.8154, "step": 12641 }, { - "epoch": 0.35874006810442677, + "epoch": 0.3582419450820369, "grad_norm": 0.0, - "learning_rate": 1.4848015817879507e-05, - "loss": 1.0449, + "learning_rate": 1.4861870147421726e-05, + "loss": 1.014, "step": 12642 }, { - "epoch": 0.3587684449489217, + "epoch": 0.3582702825242994, "grad_norm": 0.0, - "learning_rate": 1.484721194951869e-05, - "loss": 0.8673, + "learning_rate": 1.4861068112404721e-05, + "loss": 0.8642, "step": 12643 }, { - "epoch": 0.3587968217934166, + "epoch": 0.3582986199665618, "grad_norm": 0.0, - "learning_rate": 1.4846408040213256e-05, - "loss": 0.8844, + "learning_rate": 1.4860266036441134e-05, + "loss": 0.9215, "step": 12644 }, { - "epoch": 0.35882519863791146, + "epoch": 0.35832695740882425, "grad_norm": 0.0, - "learning_rate": 1.4845604089969996e-05, - "loss": 0.7623, + "learning_rate": 1.485946391953772e-05, + "loss": 0.8979, "step": 12645 }, { - "epoch": 0.3588535754824064, + "epoch": 0.35835529485108675, "grad_norm": 0.0, - "learning_rate": 1.48448000987957e-05, - "loss": 0.9403, + "learning_rate": 1.485866176170124e-05, + "loss": 0.9467, "step": 12646 }, { - "epoch": 0.35888195232690123, + "epoch": 0.3583836322933492, "grad_norm": 0.0, - "learning_rate": 1.484399606669716e-05, - "loss": 0.9967, + "learning_rate": 1.4857859562938444e-05, + "loss": 0.9151, "step": 12647 }, { - "epoch": 0.35891032917139615, + "epoch": 0.3584119697356117, "grad_norm": 0.0, - "learning_rate": 1.4843191993681167e-05, - "loss": 0.9617, + "learning_rate": 1.4857057323256092e-05, + "loss": 0.8865, "step": 12648 }, { - "epoch": 0.358938706015891, + "epoch": 0.3584403071778741, "grad_norm": 0.0, - "learning_rate": 1.4842387879754516e-05, - "loss": 0.9159, + "learning_rate": 1.4856255042660945e-05, + "loss": 0.9628, "step": 12649 }, { - "epoch": 0.3589670828603859, + "epoch": 0.3584686446201366, "grad_norm": 0.0, - "learning_rate": 1.4841583724923994e-05, - "loss": 0.9645, + "learning_rate": 1.4855452721159757e-05, + "loss": 0.9749, "step": 12650 }, { - "epoch": 0.35899545970488084, + "epoch": 0.35849698206239905, "grad_norm": 0.0, - "learning_rate": 1.4840779529196398e-05, - "loss": 0.8674, + "learning_rate": 1.485465035875929e-05, + "loss": 0.9055, "step": 12651 }, { - "epoch": 0.3590238365493757, + "epoch": 0.3585253195046615, "grad_norm": 0.0, - "learning_rate": 1.483997529257852e-05, - "loss": 0.9907, + "learning_rate": 1.48538479554663e-05, + "loss": 0.8846, "step": 12652 }, { - "epoch": 0.3590522133938706, + "epoch": 0.358553656946924, "grad_norm": 0.0, - "learning_rate": 1.4839171015077152e-05, - "loss": 0.8963, + "learning_rate": 1.4853045511287545e-05, + "loss": 0.8699, "step": 12653 }, { - "epoch": 0.35908059023836547, + "epoch": 0.3585819943891864, "grad_norm": 0.0, - "learning_rate": 1.4838366696699087e-05, - "loss": 0.8049, + "learning_rate": 1.4852243026229787e-05, + "loss": 0.9284, "step": 12654 }, { - "epoch": 0.3591089670828604, + "epoch": 0.3586103318314489, "grad_norm": 0.0, - "learning_rate": 1.4837562337451124e-05, - "loss": 0.8615, + "learning_rate": 1.485144050029978e-05, + "loss": 1.0061, "step": 12655 }, { - "epoch": 0.3591373439273553, + "epoch": 0.35863866927371135, "grad_norm": 0.0, - "learning_rate": 1.4836757937340053e-05, - "loss": 1.005, + "learning_rate": 1.4850637933504292e-05, + "loss": 0.928, "step": 12656 }, { - "epoch": 0.35916572077185016, + "epoch": 0.3586670067159738, "grad_norm": 0.0, - "learning_rate": 1.483595349637267e-05, - "loss": 0.9838, + "learning_rate": 1.4849835325850079e-05, + "loss": 0.9124, "step": 12657 }, { - "epoch": 0.3591940976163451, + "epoch": 0.3586953441582363, "grad_norm": 0.0, - "learning_rate": 1.4835149014555774e-05, - "loss": 0.9541, + "learning_rate": 1.4849032677343902e-05, + "loss": 0.9436, "step": 12658 }, { - "epoch": 0.35922247446083994, + "epoch": 0.3587236816004987, "grad_norm": 0.0, - "learning_rate": 1.4834344491896156e-05, - "loss": 0.937, + "learning_rate": 1.4848229987992522e-05, + "loss": 1.0028, "step": 12659 }, { - "epoch": 0.35925085130533485, + "epoch": 0.3587520190427612, "grad_norm": 0.0, - "learning_rate": 1.4833539928400609e-05, - "loss": 0.8156, + "learning_rate": 1.4847427257802702e-05, + "loss": 0.9264, "step": 12660 }, { - "epoch": 0.35927922814982977, + "epoch": 0.35878035648502365, "grad_norm": 0.0, - "learning_rate": 1.4832735324075932e-05, - "loss": 0.8465, + "learning_rate": 1.4846624486781199e-05, + "loss": 0.9688, "step": 12661 }, { - "epoch": 0.3593076049943246, + "epoch": 0.35880869392728615, "grad_norm": 0.0, - "learning_rate": 1.4831930678928928e-05, - "loss": 0.9215, + "learning_rate": 1.4845821674934779e-05, + "loss": 1.0583, "step": 12662 }, { - "epoch": 0.35933598183881954, + "epoch": 0.3588370313695486, "grad_norm": 0.0, - "learning_rate": 1.4831125992966386e-05, - "loss": 0.9553, + "learning_rate": 1.4845018822270204e-05, + "loss": 0.9027, "step": 12663 }, { - "epoch": 0.3593643586833144, + "epoch": 0.358865368811811, "grad_norm": 0.0, - "learning_rate": 1.4830321266195105e-05, - "loss": 0.9613, + "learning_rate": 1.4844215928794236e-05, + "loss": 0.9083, "step": 12664 }, { - "epoch": 0.3593927355278093, + "epoch": 0.3588937062540735, "grad_norm": 0.0, - "learning_rate": 1.4829516498621885e-05, - "loss": 0.9723, + "learning_rate": 1.484341299451364e-05, + "loss": 1.0117, "step": 12665 }, { - "epoch": 0.3594211123723042, + "epoch": 0.35892204369633596, "grad_norm": 0.0, - "learning_rate": 1.4828711690253521e-05, - "loss": 0.9027, + "learning_rate": 1.4842610019435178e-05, + "loss": 0.9281, "step": 12666 }, { - "epoch": 0.3594494892167991, + "epoch": 0.35895038113859845, "grad_norm": 0.0, - "learning_rate": 1.482790684109681e-05, - "loss": 0.9325, + "learning_rate": 1.4841807003565612e-05, + "loss": 0.8816, "step": 12667 }, { - "epoch": 0.359477866061294, + "epoch": 0.3589787185808609, "grad_norm": 0.0, - "learning_rate": 1.4827101951158555e-05, - "loss": 0.9546, + "learning_rate": 1.4841003946911707e-05, + "loss": 0.9586, "step": 12668 }, { - "epoch": 0.35950624290578886, + "epoch": 0.3590070560231233, "grad_norm": 0.0, - "learning_rate": 1.4826297020445553e-05, - "loss": 0.9765, + "learning_rate": 1.4840200849480226e-05, + "loss": 0.8025, "step": 12669 }, { - "epoch": 0.3595346197502838, + "epoch": 0.3590353934653858, "grad_norm": 0.0, - "learning_rate": 1.4825492048964603e-05, - "loss": 0.8887, + "learning_rate": 1.483939771127794e-05, + "loss": 0.8625, "step": 12670 }, { - "epoch": 0.35956299659477864, + "epoch": 0.35906373090764826, "grad_norm": 0.0, - "learning_rate": 1.4824687036722505e-05, - "loss": 0.8816, + "learning_rate": 1.483859453231161e-05, + "loss": 0.8034, "step": 12671 }, { - "epoch": 0.35959137343927355, + "epoch": 0.35909206834991075, "grad_norm": 0.0, - "learning_rate": 1.4823881983726059e-05, - "loss": 1.0031, + "learning_rate": 1.4837791312588001e-05, + "loss": 0.9924, "step": 12672 }, { - "epoch": 0.35961975028376847, + "epoch": 0.3591204057921732, "grad_norm": 0.0, - "learning_rate": 1.4823076889982064e-05, - "loss": 0.9384, + "learning_rate": 1.483698805211388e-05, + "loss": 0.9252, "step": 12673 }, { - "epoch": 0.35964812712826333, + "epoch": 0.3591487432344357, "grad_norm": 0.0, - "learning_rate": 1.4822271755497322e-05, - "loss": 1.0017, + "learning_rate": 1.483618475089601e-05, + "loss": 0.9814, "step": 12674 }, { - "epoch": 0.35967650397275824, + "epoch": 0.3591770806766981, "grad_norm": 0.0, - "learning_rate": 1.4821466580278634e-05, - "loss": 0.9819, + "learning_rate": 1.483538140894116e-05, + "loss": 0.8194, "step": 12675 }, { - "epoch": 0.3597048808172531, + "epoch": 0.35920541811896056, "grad_norm": 0.0, - "learning_rate": 1.48206613643328e-05, - "loss": 0.9796, + "learning_rate": 1.4834578026256099e-05, + "loss": 1.0115, "step": 12676 }, { - "epoch": 0.359733257661748, + "epoch": 0.35923375556122306, "grad_norm": 0.0, - "learning_rate": 1.4819856107666622e-05, - "loss": 0.8973, + "learning_rate": 1.483377460284759e-05, + "loss": 0.9441, "step": 12677 }, { - "epoch": 0.35976163450624293, + "epoch": 0.3592620930034855, "grad_norm": 0.0, - "learning_rate": 1.4819050810286903e-05, - "loss": 0.9555, + "learning_rate": 1.4832971138722403e-05, + "loss": 0.9974, "step": 12678 }, { - "epoch": 0.3597900113507378, + "epoch": 0.359290430445748, "grad_norm": 0.0, - "learning_rate": 1.4818245472200449e-05, - "loss": 0.9017, + "learning_rate": 1.4832167633887306e-05, + "loss": 0.9397, "step": 12679 }, { - "epoch": 0.3598183881952327, + "epoch": 0.3593187678880104, "grad_norm": 0.0, - "learning_rate": 1.4817440093414055e-05, - "loss": 0.9258, + "learning_rate": 1.483136408834907e-05, + "loss": 0.8693, "step": 12680 }, { - "epoch": 0.35984676503972757, + "epoch": 0.35934710533027286, "grad_norm": 0.0, - "learning_rate": 1.4816634673934532e-05, - "loss": 0.9889, + "learning_rate": 1.4830560502114452e-05, + "loss": 0.8726, "step": 12681 }, { - "epoch": 0.3598751418842225, + "epoch": 0.35937544277253536, "grad_norm": 0.0, - "learning_rate": 1.4815829213768675e-05, - "loss": 0.8776, + "learning_rate": 1.4829756875190236e-05, + "loss": 0.8874, "step": 12682 }, { - "epoch": 0.35990351872871734, + "epoch": 0.3594037802147978, "grad_norm": 0.0, - "learning_rate": 1.4815023712923296e-05, - "loss": 0.926, + "learning_rate": 1.482895320758318e-05, + "loss": 0.8425, "step": 12683 }, { - "epoch": 0.35993189557321226, + "epoch": 0.3594321176570603, "grad_norm": 0.0, - "learning_rate": 1.481421817140519e-05, - "loss": 0.913, + "learning_rate": 1.4828149499300061e-05, + "loss": 0.9428, "step": 12684 }, { - "epoch": 0.35996027241770717, + "epoch": 0.35946045509932273, "grad_norm": 0.0, - "learning_rate": 1.481341258922117e-05, - "loss": 0.9346, + "learning_rate": 1.4827345750347646e-05, + "loss": 0.9718, "step": 12685 }, { - "epoch": 0.35998864926220203, + "epoch": 0.3594887925415852, "grad_norm": 0.0, - "learning_rate": 1.4812606966378039e-05, - "loss": 0.9121, + "learning_rate": 1.4826541960732704e-05, + "loss": 1.0092, "step": 12686 }, { - "epoch": 0.36001702610669695, + "epoch": 0.35951712998384766, "grad_norm": 0.0, - "learning_rate": 1.4811801302882596e-05, - "loss": 0.9297, + "learning_rate": 1.4825738130462008e-05, + "loss": 0.9375, "step": 12687 }, { - "epoch": 0.3600454029511918, + "epoch": 0.3595454674261101, "grad_norm": 0.0, - "learning_rate": 1.4810995598741655e-05, - "loss": 1.0428, + "learning_rate": 1.4824934259542326e-05, + "loss": 0.912, "step": 12688 }, { - "epoch": 0.3600737797956867, + "epoch": 0.3595738048683726, "grad_norm": 0.0, - "learning_rate": 1.4810189853962018e-05, - "loss": 0.8886, + "learning_rate": 1.4824130347980431e-05, + "loss": 0.9895, "step": 12689 }, { - "epoch": 0.36010215664018164, + "epoch": 0.35960214231063503, "grad_norm": 0.0, - "learning_rate": 1.480938406855049e-05, - "loss": 1.0159, + "learning_rate": 1.4823326395783096e-05, + "loss": 0.9935, "step": 12690 }, { - "epoch": 0.3601305334846765, + "epoch": 0.3596304797528975, "grad_norm": 0.0, - "learning_rate": 1.4808578242513878e-05, - "loss": 1.0598, + "learning_rate": 1.4822522402957091e-05, + "loss": 0.9285, "step": 12691 }, { - "epoch": 0.3601589103291714, + "epoch": 0.35965881719515996, "grad_norm": 0.0, - "learning_rate": 1.4807772375858989e-05, - "loss": 1.0892, + "learning_rate": 1.482171836950919e-05, + "loss": 0.9909, "step": 12692 }, { - "epoch": 0.36018728717366627, + "epoch": 0.3596871546374224, "grad_norm": 0.0, - "learning_rate": 1.4806966468592632e-05, - "loss": 0.9434, + "learning_rate": 1.4820914295446165e-05, + "loss": 0.8311, "step": 12693 }, { - "epoch": 0.3602156640181612, + "epoch": 0.3597154920796849, "grad_norm": 0.0, - "learning_rate": 1.480616052072161e-05, - "loss": 0.8591, + "learning_rate": 1.4820110180774784e-05, + "loss": 0.9739, "step": 12694 }, { - "epoch": 0.36024404086265605, + "epoch": 0.35974382952194733, "grad_norm": 0.0, - "learning_rate": 1.480535453225274e-05, - "loss": 0.8547, + "learning_rate": 1.481930602550183e-05, + "loss": 0.9809, "step": 12695 }, { - "epoch": 0.36027241770715096, + "epoch": 0.3597721669642098, "grad_norm": 0.0, - "learning_rate": 1.4804548503192821e-05, - "loss": 0.9592, + "learning_rate": 1.4818501829634069e-05, + "loss": 0.9996, "step": 12696 }, { - "epoch": 0.3603007945516459, + "epoch": 0.35980050440647227, "grad_norm": 0.0, - "learning_rate": 1.4803742433548664e-05, - "loss": 0.9983, + "learning_rate": 1.4817697593178281e-05, + "loss": 0.8955, "step": 12697 }, { - "epoch": 0.36032917139614073, + "epoch": 0.35982884184873476, "grad_norm": 0.0, - "learning_rate": 1.4802936323327079e-05, - "loss": 0.7476, + "learning_rate": 1.4816893316141232e-05, + "loss": 0.8002, "step": 12698 }, { - "epoch": 0.36035754824063565, + "epoch": 0.3598571792909972, "grad_norm": 0.0, - "learning_rate": 1.4802130172534876e-05, - "loss": 0.8205, + "learning_rate": 1.4816088998529707e-05, + "loss": 0.9426, "step": 12699 }, { - "epoch": 0.3603859250851305, + "epoch": 0.35988551673325964, "grad_norm": 0.0, - "learning_rate": 1.4801323981178863e-05, - "loss": 0.8869, + "learning_rate": 1.4815284640350476e-05, + "loss": 0.9368, "step": 12700 }, { - "epoch": 0.3604143019296254, + "epoch": 0.35991385417552213, "grad_norm": 0.0, - "learning_rate": 1.4800517749265848e-05, - "loss": 0.9827, + "learning_rate": 1.481448024161031e-05, + "loss": 0.9553, "step": 12701 }, { - "epoch": 0.36044267877412034, + "epoch": 0.35994219161778457, "grad_norm": 0.0, - "learning_rate": 1.4799711476802649e-05, - "loss": 0.9837, + "learning_rate": 1.481367580231599e-05, + "loss": 1.0, "step": 12702 }, { - "epoch": 0.3604710556186152, + "epoch": 0.35997052906004706, "grad_norm": 0.0, - "learning_rate": 1.4798905163796069e-05, - "loss": 0.9696, + "learning_rate": 1.4812871322474294e-05, + "loss": 1.0642, "step": 12703 }, { - "epoch": 0.3604994324631101, + "epoch": 0.3599988665023095, "grad_norm": 0.0, - "learning_rate": 1.479809881025292e-05, - "loss": 0.984, + "learning_rate": 1.4812066802091995e-05, + "loss": 0.9446, "step": 12704 }, { - "epoch": 0.360527809307605, + "epoch": 0.36002720394457194, "grad_norm": 0.0, - "learning_rate": 1.4797292416180016e-05, - "loss": 0.8744, + "learning_rate": 1.481126224117587e-05, + "loss": 1.0054, "step": 12705 }, { - "epoch": 0.3605561861520999, + "epoch": 0.36005554138683443, "grad_norm": 0.0, - "learning_rate": 1.4796485981584169e-05, - "loss": 0.9288, + "learning_rate": 1.4810457639732696e-05, + "loss": 0.904, "step": 12706 }, { - "epoch": 0.3605845629965948, + "epoch": 0.36008387882909687, "grad_norm": 0.0, - "learning_rate": 1.4795679506472184e-05, - "loss": 0.8521, + "learning_rate": 1.480965299776925e-05, + "loss": 1.0002, "step": 12707 }, { - "epoch": 0.36061293984108966, + "epoch": 0.36011221627135936, "grad_norm": 0.0, - "learning_rate": 1.4794872990850881e-05, - "loss": 0.9924, + "learning_rate": 1.4808848315292313e-05, + "loss": 0.9127, "step": 12708 }, { - "epoch": 0.3606413166855846, + "epoch": 0.3601405537136218, "grad_norm": 0.0, - "learning_rate": 1.4794066434727073e-05, - "loss": 1.0455, + "learning_rate": 1.4808043592308661e-05, + "loss": 0.8981, "step": 12709 }, { - "epoch": 0.36066969353007944, + "epoch": 0.3601688911558843, "grad_norm": 0.0, - "learning_rate": 1.479325983810757e-05, - "loss": 0.9704, + "learning_rate": 1.480723882882507e-05, + "loss": 0.9668, "step": 12710 }, { - "epoch": 0.36069807037457435, + "epoch": 0.36019722859814673, "grad_norm": 0.0, - "learning_rate": 1.4792453200999185e-05, - "loss": 0.9236, + "learning_rate": 1.4806434024848322e-05, + "loss": 0.8964, "step": 12711 }, { - "epoch": 0.3607264472190692, + "epoch": 0.3602255660404092, "grad_norm": 0.0, - "learning_rate": 1.479164652340873e-05, - "loss": 1.0108, + "learning_rate": 1.4805629180385197e-05, + "loss": 0.8804, "step": 12712 }, { - "epoch": 0.36075482406356413, + "epoch": 0.36025390348267167, "grad_norm": 0.0, - "learning_rate": 1.4790839805343025e-05, - "loss": 0.9361, + "learning_rate": 1.480482429544247e-05, + "loss": 0.9288, "step": 12713 }, { - "epoch": 0.36078320090805904, + "epoch": 0.3602822409249341, "grad_norm": 0.0, - "learning_rate": 1.4790033046808878e-05, - "loss": 0.9872, + "learning_rate": 1.4804019370026927e-05, + "loss": 0.9262, "step": 12714 }, { - "epoch": 0.3608115777525539, + "epoch": 0.3603105783671966, "grad_norm": 0.0, - "learning_rate": 1.4789226247813104e-05, - "loss": 0.9348, + "learning_rate": 1.4803214404145343e-05, + "loss": 0.9459, "step": 12715 }, { - "epoch": 0.3608399545970488, + "epoch": 0.36033891580945904, "grad_norm": 0.0, - "learning_rate": 1.4788419408362527e-05, - "loss": 0.8743, + "learning_rate": 1.4802409397804501e-05, + "loss": 0.9477, "step": 12716 }, { - "epoch": 0.3608683314415437, + "epoch": 0.3603672532517215, "grad_norm": 0.0, - "learning_rate": 1.4787612528463952e-05, - "loss": 0.891, + "learning_rate": 1.480160435101118e-05, + "loss": 0.9482, "step": 12717 }, { - "epoch": 0.3608967082860386, + "epoch": 0.36039559069398397, "grad_norm": 0.0, - "learning_rate": 1.47868056081242e-05, - "loss": 0.7779, + "learning_rate": 1.4800799263772168e-05, + "loss": 0.9435, "step": 12718 }, { - "epoch": 0.3609250851305335, + "epoch": 0.3604239281362464, "grad_norm": 0.0, - "learning_rate": 1.4785998647350087e-05, - "loss": 0.8946, + "learning_rate": 1.4799994136094233e-05, + "loss": 0.931, "step": 12719 }, { - "epoch": 0.36095346197502837, + "epoch": 0.3604522655785089, "grad_norm": 0.0, - "learning_rate": 1.4785191646148427e-05, - "loss": 0.9118, + "learning_rate": 1.4799188967984168e-05, + "loss": 0.9448, "step": 12720 }, { - "epoch": 0.3609818388195233, + "epoch": 0.36048060302077134, "grad_norm": 0.0, - "learning_rate": 1.4784384604526037e-05, - "loss": 0.8943, + "learning_rate": 1.4798383759448754e-05, + "loss": 0.8301, "step": 12721 }, { - "epoch": 0.36101021566401814, + "epoch": 0.36050894046303383, "grad_norm": 0.0, - "learning_rate": 1.4783577522489733e-05, - "loss": 0.758, + "learning_rate": 1.4797578510494772e-05, + "loss": 0.8814, "step": 12722 }, { - "epoch": 0.36103859250851306, + "epoch": 0.36053727790529627, "grad_norm": 0.0, - "learning_rate": 1.478277040004634e-05, - "loss": 0.9819, + "learning_rate": 1.4796773221129001e-05, + "loss": 1.0017, "step": 12723 }, { - "epoch": 0.36106696935300797, + "epoch": 0.3605656153475587, "grad_norm": 0.0, - "learning_rate": 1.4781963237202665e-05, - "loss": 0.986, + "learning_rate": 1.4795967891358232e-05, + "loss": 0.9027, "step": 12724 }, { - "epoch": 0.36109534619750283, + "epoch": 0.3605939527898212, "grad_norm": 0.0, - "learning_rate": 1.4781156033965536e-05, - "loss": 0.9031, + "learning_rate": 1.4795162521189243e-05, + "loss": 0.9999, "step": 12725 }, { - "epoch": 0.36112372304199775, + "epoch": 0.36062229023208364, "grad_norm": 0.0, - "learning_rate": 1.4780348790341767e-05, - "loss": 0.8964, + "learning_rate": 1.479435711062882e-05, + "loss": 0.8583, "step": 12726 }, { - "epoch": 0.3611520998864926, + "epoch": 0.36065062767434614, "grad_norm": 0.0, - "learning_rate": 1.4779541506338178e-05, - "loss": 0.8995, + "learning_rate": 1.4793551659683743e-05, + "loss": 0.9769, "step": 12727 }, { - "epoch": 0.3611804767309875, + "epoch": 0.3606789651166086, "grad_norm": 0.0, - "learning_rate": 1.4778734181961582e-05, - "loss": 0.9503, + "learning_rate": 1.4792746168360803e-05, + "loss": 0.8508, "step": 12728 }, { - "epoch": 0.3612088535754824, + "epoch": 0.360707302558871, "grad_norm": 0.0, - "learning_rate": 1.4777926817218808e-05, - "loss": 0.8943, + "learning_rate": 1.4791940636666785e-05, + "loss": 1.0013, "step": 12729 }, { - "epoch": 0.3612372304199773, + "epoch": 0.3607356400011335, "grad_norm": 0.0, - "learning_rate": 1.4777119412116667e-05, - "loss": 0.8529, + "learning_rate": 1.479113506460847e-05, + "loss": 0.9443, "step": 12730 }, { - "epoch": 0.3612656072644722, + "epoch": 0.36076397744339594, "grad_norm": 0.0, - "learning_rate": 1.4776311966661987e-05, - "loss": 0.9639, + "learning_rate": 1.4790329452192643e-05, + "loss": 0.987, "step": 12731 }, { - "epoch": 0.36129398410896707, + "epoch": 0.36079231488565844, "grad_norm": 0.0, - "learning_rate": 1.4775504480861584e-05, - "loss": 1.0502, + "learning_rate": 1.4789523799426095e-05, + "loss": 0.8579, "step": 12732 }, { - "epoch": 0.361322360953462, + "epoch": 0.3608206523279209, "grad_norm": 0.0, - "learning_rate": 1.477469695472228e-05, - "loss": 0.9608, + "learning_rate": 1.4788718106315605e-05, + "loss": 0.9188, "step": 12733 }, { - "epoch": 0.36135073779795684, + "epoch": 0.36084898977018337, "grad_norm": 0.0, - "learning_rate": 1.4773889388250896e-05, - "loss": 0.9814, + "learning_rate": 1.478791237286797e-05, + "loss": 0.88, "step": 12734 }, { - "epoch": 0.36137911464245176, + "epoch": 0.3608773272124458, "grad_norm": 0.0, - "learning_rate": 1.4773081781454254e-05, - "loss": 0.8156, + "learning_rate": 1.4787106599089969e-05, + "loss": 0.9302, "step": 12735 }, { - "epoch": 0.3614074914869467, + "epoch": 0.36090566465470825, "grad_norm": 0.0, - "learning_rate": 1.4772274134339178e-05, - "loss": 1.0166, + "learning_rate": 1.478630078498839e-05, + "loss": 0.9379, "step": 12736 }, { - "epoch": 0.36143586833144153, + "epoch": 0.36093400209697074, "grad_norm": 0.0, - "learning_rate": 1.4771466446912485e-05, - "loss": 0.9135, + "learning_rate": 1.4785494930570021e-05, + "loss": 0.8294, "step": 12737 }, { - "epoch": 0.36146424517593645, + "epoch": 0.3609623395392332, "grad_norm": 0.0, - "learning_rate": 1.4770658719180999e-05, - "loss": 0.9308, + "learning_rate": 1.4784689035841655e-05, + "loss": 0.9296, "step": 12738 }, { - "epoch": 0.3614926220204313, + "epoch": 0.3609906769814957, "grad_norm": 0.0, - "learning_rate": 1.4769850951151547e-05, - "loss": 0.9205, + "learning_rate": 1.4783883100810074e-05, + "loss": 1.0055, "step": 12739 }, { - "epoch": 0.3615209988649262, + "epoch": 0.3610190144237581, "grad_norm": 0.0, - "learning_rate": 1.476904314283095e-05, - "loss": 0.9994, + "learning_rate": 1.4783077125482068e-05, + "loss": 0.9829, "step": 12740 }, { - "epoch": 0.36154937570942114, + "epoch": 0.36104735186602055, "grad_norm": 0.0, - "learning_rate": 1.476823529422603e-05, - "loss": 0.952, + "learning_rate": 1.478227110986443e-05, + "loss": 0.9492, "step": 12741 }, { - "epoch": 0.361577752553916, + "epoch": 0.36107568930828304, "grad_norm": 0.0, - "learning_rate": 1.4767427405343613e-05, - "loss": 0.8712, + "learning_rate": 1.4781465053963946e-05, + "loss": 0.8827, "step": 12742 }, { - "epoch": 0.3616061293984109, + "epoch": 0.3611040267505455, "grad_norm": 0.0, - "learning_rate": 1.4766619476190522e-05, - "loss": 0.8983, + "learning_rate": 1.4780658957787407e-05, + "loss": 0.9196, "step": 12743 }, { - "epoch": 0.3616345062429058, + "epoch": 0.361132364192808, "grad_norm": 0.0, - "learning_rate": 1.4765811506773582e-05, - "loss": 0.8852, + "learning_rate": 1.47798528213416e-05, + "loss": 0.8888, "step": 12744 }, { - "epoch": 0.3616628830874007, + "epoch": 0.3611607016350704, "grad_norm": 0.0, - "learning_rate": 1.4765003497099615e-05, - "loss": 0.8871, + "learning_rate": 1.477904664463332e-05, + "loss": 0.9277, "step": 12745 }, { - "epoch": 0.36169125993189555, + "epoch": 0.3611890390773329, "grad_norm": 0.0, - "learning_rate": 1.4764195447175454e-05, - "loss": 0.926, + "learning_rate": 1.4778240427669352e-05, + "loss": 0.9221, "step": 12746 }, { - "epoch": 0.36171963677639046, + "epoch": 0.36121737651959535, "grad_norm": 0.0, - "learning_rate": 1.4763387357007917e-05, - "loss": 0.8706, + "learning_rate": 1.4777434170456495e-05, + "loss": 1.0221, "step": 12747 }, { - "epoch": 0.3617480136208854, + "epoch": 0.3612457139618578, "grad_norm": 0.0, - "learning_rate": 1.4762579226603833e-05, - "loss": 0.9591, + "learning_rate": 1.4776627873001533e-05, + "loss": 0.7685, "step": 12748 }, { - "epoch": 0.36177639046538024, + "epoch": 0.3612740514041203, "grad_norm": 0.0, - "learning_rate": 1.4761771055970032e-05, - "loss": 0.9238, + "learning_rate": 1.477582153531126e-05, + "loss": 1.0567, "step": 12749 }, { - "epoch": 0.36180476730987515, + "epoch": 0.3613023888463827, "grad_norm": 0.0, - "learning_rate": 1.4760962845113334e-05, - "loss": 0.864, + "learning_rate": 1.4775015157392472e-05, + "loss": 0.907, "step": 12750 }, { - "epoch": 0.36183314415437, + "epoch": 0.3613307262886452, "grad_norm": 0.0, - "learning_rate": 1.4760154594040566e-05, - "loss": 0.9429, + "learning_rate": 1.4774208739251959e-05, + "loss": 0.9749, "step": 12751 }, { - "epoch": 0.3618615209988649, + "epoch": 0.36135906373090765, "grad_norm": 0.0, - "learning_rate": 1.4759346302758561e-05, - "loss": 0.9517, + "learning_rate": 1.4773402280896507e-05, + "loss": 0.9268, "step": 12752 }, { - "epoch": 0.36188989784335984, + "epoch": 0.3613874011731701, "grad_norm": 0.0, - "learning_rate": 1.4758537971274143e-05, - "loss": 0.9623, + "learning_rate": 1.4772595782332916e-05, + "loss": 0.9385, "step": 12753 }, { - "epoch": 0.3619182746878547, + "epoch": 0.3614157386154326, "grad_norm": 0.0, - "learning_rate": 1.4757729599594144e-05, - "loss": 0.822, + "learning_rate": 1.4771789243567983e-05, + "loss": 0.9232, "step": 12754 }, { - "epoch": 0.3619466515323496, + "epoch": 0.361444076057695, "grad_norm": 0.0, - "learning_rate": 1.4756921187725383e-05, - "loss": 0.9108, + "learning_rate": 1.4770982664608497e-05, + "loss": 0.9914, "step": 12755 }, { - "epoch": 0.3619750283768445, + "epoch": 0.3614724134999575, "grad_norm": 0.0, - "learning_rate": 1.4756112735674698e-05, - "loss": 0.9152, + "learning_rate": 1.4770176045461248e-05, + "loss": 0.9398, "step": 12756 }, { - "epoch": 0.3620034052213394, + "epoch": 0.36150075094221995, "grad_norm": 0.0, - "learning_rate": 1.4755304243448918e-05, - "loss": 0.9174, + "learning_rate": 1.4769369386133038e-05, + "loss": 0.8197, "step": 12757 }, { - "epoch": 0.3620317820658343, + "epoch": 0.36152908838448244, "grad_norm": 0.0, - "learning_rate": 1.4754495711054866e-05, - "loss": 0.9748, + "learning_rate": 1.4768562686630659e-05, + "loss": 1.0095, "step": 12758 }, { - "epoch": 0.36206015891032917, + "epoch": 0.3615574258267449, "grad_norm": 0.0, - "learning_rate": 1.4753687138499377e-05, - "loss": 0.939, + "learning_rate": 1.4767755946960902e-05, + "loss": 1.0229, "step": 12759 }, { - "epoch": 0.3620885357548241, + "epoch": 0.3615857632690073, "grad_norm": 0.0, - "learning_rate": 1.4752878525789279e-05, - "loss": 0.8723, + "learning_rate": 1.476694916713057e-05, + "loss": 1.0224, "step": 12760 }, { - "epoch": 0.36211691259931894, + "epoch": 0.3616141007112698, "grad_norm": 0.0, - "learning_rate": 1.4752069872931399e-05, - "loss": 0.9601, + "learning_rate": 1.4766142347146452e-05, + "loss": 0.9667, "step": 12761 }, { - "epoch": 0.36214528944381386, + "epoch": 0.36164243815353225, "grad_norm": 0.0, - "learning_rate": 1.4751261179932572e-05, - "loss": 0.8148, + "learning_rate": 1.476533548701535e-05, + "loss": 0.9394, "step": 12762 }, { - "epoch": 0.3621736662883087, + "epoch": 0.36167077559579475, "grad_norm": 0.0, - "learning_rate": 1.475045244679963e-05, - "loss": 0.9077, + "learning_rate": 1.4764528586744058e-05, + "loss": 0.8469, "step": 12763 }, { - "epoch": 0.36220204313280363, + "epoch": 0.3616991130380572, "grad_norm": 0.0, - "learning_rate": 1.4749643673539405e-05, - "loss": 0.8768, + "learning_rate": 1.4763721646339373e-05, + "loss": 0.9893, "step": 12764 }, { - "epoch": 0.36223041997729855, + "epoch": 0.3617274504803196, "grad_norm": 0.0, - "learning_rate": 1.474883486015872e-05, - "loss": 0.8337, + "learning_rate": 1.4762914665808089e-05, + "loss": 0.9236, "step": 12765 }, { - "epoch": 0.3622587968217934, + "epoch": 0.3617557879225821, "grad_norm": 0.0, - "learning_rate": 1.474802600666442e-05, - "loss": 0.9733, + "learning_rate": 1.4762107645157005e-05, + "loss": 0.8976, "step": 12766 }, { - "epoch": 0.3622871736662883, + "epoch": 0.36178412536484456, "grad_norm": 0.0, - "learning_rate": 1.4747217113063328e-05, - "loss": 0.9668, + "learning_rate": 1.4761300584392922e-05, + "loss": 0.9155, "step": 12767 }, { - "epoch": 0.3623155505107832, + "epoch": 0.36181246280710705, "grad_norm": 0.0, - "learning_rate": 1.4746408179362278e-05, - "loss": 1.0083, + "learning_rate": 1.4760493483522637e-05, + "loss": 0.8453, "step": 12768 }, { - "epoch": 0.3623439273552781, + "epoch": 0.3618408002493695, "grad_norm": 0.0, - "learning_rate": 1.4745599205568107e-05, - "loss": 0.9699, + "learning_rate": 1.4759686342552945e-05, + "loss": 0.8692, "step": 12769 }, { - "epoch": 0.362372304199773, + "epoch": 0.3618691376916319, "grad_norm": 0.0, - "learning_rate": 1.4744790191687646e-05, - "loss": 0.9258, + "learning_rate": 1.475887916149065e-05, + "loss": 0.9118, "step": 12770 }, { - "epoch": 0.36240068104426787, + "epoch": 0.3618974751338944, "grad_norm": 0.0, - "learning_rate": 1.4743981137727728e-05, - "loss": 1.0708, + "learning_rate": 1.4758071940342547e-05, + "loss": 1.0031, "step": 12771 }, { - "epoch": 0.3624290578887628, + "epoch": 0.36192581257615686, "grad_norm": 0.0, - "learning_rate": 1.4743172043695189e-05, - "loss": 0.9869, + "learning_rate": 1.4757264679115437e-05, + "loss": 0.8668, "step": 12772 }, { - "epoch": 0.36245743473325764, + "epoch": 0.36195415001841935, "grad_norm": 0.0, - "learning_rate": 1.4742362909596861e-05, - "loss": 1.0729, + "learning_rate": 1.475645737781612e-05, + "loss": 0.9578, "step": 12773 }, { - "epoch": 0.36248581157775256, + "epoch": 0.3619824874606818, "grad_norm": 0.0, - "learning_rate": 1.4741553735439581e-05, - "loss": 0.9781, + "learning_rate": 1.4755650036451397e-05, + "loss": 0.8786, "step": 12774 }, { - "epoch": 0.3625141884222474, + "epoch": 0.3620108249029443, "grad_norm": 0.0, - "learning_rate": 1.4740744521230184e-05, - "loss": 0.9285, + "learning_rate": 1.4754842655028067e-05, + "loss": 0.9393, "step": 12775 }, { - "epoch": 0.36254256526674233, + "epoch": 0.3620391623452067, "grad_norm": 0.0, - "learning_rate": 1.4739935266975503e-05, - "loss": 0.967, + "learning_rate": 1.4754035233552935e-05, + "loss": 0.8689, "step": 12776 }, { - "epoch": 0.36257094211123725, + "epoch": 0.36206749978746916, "grad_norm": 0.0, - "learning_rate": 1.473912597268238e-05, - "loss": 0.9287, + "learning_rate": 1.4753227772032795e-05, + "loss": 0.862, "step": 12777 }, { - "epoch": 0.3625993189557321, + "epoch": 0.36209583722973165, "grad_norm": 0.0, - "learning_rate": 1.4738316638357644e-05, - "loss": 0.9269, + "learning_rate": 1.4752420270474455e-05, + "loss": 0.9038, "step": 12778 }, { - "epoch": 0.362627695800227, + "epoch": 0.3621241746719941, "grad_norm": 0.0, - "learning_rate": 1.4737507264008133e-05, - "loss": 1.0466, + "learning_rate": 1.4751612728884711e-05, + "loss": 0.8278, "step": 12779 }, { - "epoch": 0.3626560726447219, + "epoch": 0.3621525121142566, "grad_norm": 0.0, - "learning_rate": 1.4736697849640688e-05, - "loss": 0.812, + "learning_rate": 1.4750805147270373e-05, + "loss": 0.9273, "step": 12780 }, { - "epoch": 0.3626844494892168, + "epoch": 0.362180849556519, "grad_norm": 0.0, - "learning_rate": 1.4735888395262144e-05, - "loss": 0.9784, + "learning_rate": 1.4749997525638233e-05, + "loss": 0.9798, "step": 12781 }, { - "epoch": 0.3627128263337117, + "epoch": 0.36220918699878146, "grad_norm": 0.0, - "learning_rate": 1.4735078900879334e-05, - "loss": 0.9813, + "learning_rate": 1.4749189863995107e-05, + "loss": 0.8975, "step": 12782 }, { - "epoch": 0.3627412031782066, + "epoch": 0.36223752444104396, "grad_norm": 0.0, - "learning_rate": 1.4734269366499104e-05, - "loss": 0.9287, + "learning_rate": 1.4748382162347788e-05, + "loss": 0.927, "step": 12783 }, { - "epoch": 0.3627695800227015, + "epoch": 0.3622658618833064, "grad_norm": 0.0, - "learning_rate": 1.4733459792128283e-05, - "loss": 0.9772, + "learning_rate": 1.474757442070308e-05, + "loss": 0.8959, "step": 12784 }, { - "epoch": 0.36279795686719635, + "epoch": 0.3622941993255689, "grad_norm": 0.0, - "learning_rate": 1.4732650177773717e-05, - "loss": 0.8983, + "learning_rate": 1.4746766639067793e-05, + "loss": 0.9, "step": 12785 }, { - "epoch": 0.36282633371169126, + "epoch": 0.3623225367678313, "grad_norm": 0.0, - "learning_rate": 1.4731840523442243e-05, - "loss": 0.8666, + "learning_rate": 1.4745958817448727e-05, + "loss": 1.0125, "step": 12786 }, { - "epoch": 0.3628547105561862, + "epoch": 0.3623508742100938, "grad_norm": 0.0, - "learning_rate": 1.4731030829140698e-05, - "loss": 0.9851, + "learning_rate": 1.4745150955852687e-05, + "loss": 0.8485, "step": 12787 }, { - "epoch": 0.36288308740068104, + "epoch": 0.36237921165235626, "grad_norm": 0.0, - "learning_rate": 1.4730221094875922e-05, - "loss": 0.908, + "learning_rate": 1.474434305428648e-05, + "loss": 0.9481, "step": 12788 }, { - "epoch": 0.36291146424517595, + "epoch": 0.3624075490946187, "grad_norm": 0.0, - "learning_rate": 1.4729411320654758e-05, - "loss": 0.9038, + "learning_rate": 1.474353511275691e-05, + "loss": 0.9448, "step": 12789 }, { - "epoch": 0.3629398410896708, + "epoch": 0.3624358865368812, "grad_norm": 0.0, - "learning_rate": 1.4728601506484043e-05, - "loss": 0.8604, + "learning_rate": 1.474272713127078e-05, + "loss": 1.0064, "step": 12790 }, { - "epoch": 0.3629682179341657, + "epoch": 0.36246422397914363, "grad_norm": 0.0, - "learning_rate": 1.472779165237062e-05, - "loss": 0.92, + "learning_rate": 1.4741919109834898e-05, + "loss": 1.0251, "step": 12791 }, { - "epoch": 0.3629965947786606, + "epoch": 0.3624925614214061, "grad_norm": 0.0, - "learning_rate": 1.4726981758321324e-05, - "loss": 1.0583, + "learning_rate": 1.4741111048456072e-05, + "loss": 0.9073, "step": 12792 }, { - "epoch": 0.3630249716231555, + "epoch": 0.36252089886366856, "grad_norm": 0.0, - "learning_rate": 1.4726171824343005e-05, - "loss": 0.819, + "learning_rate": 1.4740302947141107e-05, + "loss": 0.9709, "step": 12793 }, { - "epoch": 0.3630533484676504, + "epoch": 0.362549236305931, "grad_norm": 0.0, - "learning_rate": 1.4725361850442502e-05, - "loss": 0.8305, + "learning_rate": 1.473949480589681e-05, + "loss": 1.0637, "step": 12794 }, { - "epoch": 0.3630817253121453, + "epoch": 0.3625775737481935, "grad_norm": 0.0, - "learning_rate": 1.4724551836626648e-05, - "loss": 0.9459, + "learning_rate": 1.4738686624729987e-05, + "loss": 0.9661, "step": 12795 }, { - "epoch": 0.3631101021566402, + "epoch": 0.36260591119045593, "grad_norm": 0.0, - "learning_rate": 1.4723741782902297e-05, - "loss": 1.0269, + "learning_rate": 1.473787840364745e-05, + "loss": 0.9937, "step": 12796 }, { - "epoch": 0.36313847900113505, + "epoch": 0.3626342486327184, "grad_norm": 0.0, - "learning_rate": 1.4722931689276287e-05, - "loss": 0.946, + "learning_rate": 1.4737070142656003e-05, + "loss": 0.9597, "step": 12797 }, { - "epoch": 0.36316685584562997, + "epoch": 0.36266258607498086, "grad_norm": 0.0, - "learning_rate": 1.4722121555755457e-05, - "loss": 0.9395, + "learning_rate": 1.4736261841762454e-05, + "loss": 1.0858, "step": 12798 }, { - "epoch": 0.3631952326901249, + "epoch": 0.36269092351724336, "grad_norm": 0.0, - "learning_rate": 1.4721311382346657e-05, - "loss": 0.872, + "learning_rate": 1.4735453500973611e-05, + "loss": 0.7476, "step": 12799 }, { - "epoch": 0.36322360953461974, + "epoch": 0.3627192609595058, "grad_norm": 0.0, - "learning_rate": 1.4720501169056726e-05, - "loss": 0.9402, + "learning_rate": 1.4734645120296284e-05, + "loss": 0.962, "step": 12800 }, { - "epoch": 0.36325198637911466, + "epoch": 0.36274759840176823, "grad_norm": 0.0, - "learning_rate": 1.4719690915892513e-05, - "loss": 0.9438, + "learning_rate": 1.4733836699737287e-05, + "loss": 0.8892, "step": 12801 }, { - "epoch": 0.3632803632236095, + "epoch": 0.36277593584403073, "grad_norm": 0.0, - "learning_rate": 1.4718880622860856e-05, - "loss": 0.9664, + "learning_rate": 1.4733028239303424e-05, + "loss": 1.0192, "step": 12802 }, { - "epoch": 0.36330874006810443, + "epoch": 0.36280427328629317, "grad_norm": 0.0, - "learning_rate": 1.4718070289968602e-05, - "loss": 0.97, + "learning_rate": 1.4732219739001508e-05, + "loss": 1.0182, "step": 12803 }, { - "epoch": 0.36333711691259934, + "epoch": 0.36283261072855566, "grad_norm": 0.0, - "learning_rate": 1.4717259917222597e-05, - "loss": 0.8683, + "learning_rate": 1.4731411198838346e-05, + "loss": 0.9352, "step": 12804 }, { - "epoch": 0.3633654937570942, + "epoch": 0.3628609481708181, "grad_norm": 0.0, - "learning_rate": 1.4716449504629685e-05, - "loss": 0.8124, + "learning_rate": 1.4730602618820751e-05, + "loss": 0.9833, "step": 12805 }, { - "epoch": 0.3633938706015891, + "epoch": 0.36288928561308054, "grad_norm": 0.0, - "learning_rate": 1.4715639052196712e-05, - "loss": 0.8096, + "learning_rate": 1.472979399895553e-05, + "loss": 1.0833, "step": 12806 }, { - "epoch": 0.363422247446084, + "epoch": 0.36291762305534303, "grad_norm": 0.0, - "learning_rate": 1.4714828559930523e-05, - "loss": 1.0227, + "learning_rate": 1.4728985339249504e-05, + "loss": 0.8367, "step": 12807 }, { - "epoch": 0.3634506242905789, + "epoch": 0.36294596049760547, "grad_norm": 0.0, - "learning_rate": 1.4714018027837966e-05, - "loss": 0.9012, + "learning_rate": 1.4728176639709475e-05, + "loss": 1.0014, "step": 12808 }, { - "epoch": 0.36347900113507375, + "epoch": 0.36297429793986796, "grad_norm": 0.0, - "learning_rate": 1.4713207455925886e-05, - "loss": 1.0353, + "learning_rate": 1.472736790034226e-05, + "loss": 0.8832, "step": 12809 }, { - "epoch": 0.36350737797956867, + "epoch": 0.3630026353821304, "grad_norm": 0.0, - "learning_rate": 1.4712396844201134e-05, - "loss": 0.9818, + "learning_rate": 1.4726559121154668e-05, + "loss": 1.0446, "step": 12810 }, { - "epoch": 0.3635357548240636, + "epoch": 0.3630309728243929, "grad_norm": 0.0, - "learning_rate": 1.4711586192670551e-05, - "loss": 0.8859, + "learning_rate": 1.4725750302153514e-05, + "loss": 0.8717, "step": 12811 }, { - "epoch": 0.36356413166855844, + "epoch": 0.36305931026665533, "grad_norm": 0.0, - "learning_rate": 1.4710775501340988e-05, - "loss": 1.0063, + "learning_rate": 1.472494144334561e-05, + "loss": 0.928, "step": 12812 }, { - "epoch": 0.36359250851305336, + "epoch": 0.36308764770891777, "grad_norm": 0.0, - "learning_rate": 1.4709964770219293e-05, - "loss": 0.9562, + "learning_rate": 1.472413254473777e-05, + "loss": 0.9929, "step": 12813 }, { - "epoch": 0.3636208853575482, + "epoch": 0.36311598515118026, "grad_norm": 0.0, - "learning_rate": 1.4709153999312313e-05, - "loss": 0.9735, + "learning_rate": 1.4723323606336805e-05, + "loss": 0.8989, "step": 12814 }, { - "epoch": 0.36364926220204313, + "epoch": 0.3631443225934427, "grad_norm": 0.0, - "learning_rate": 1.4708343188626899e-05, - "loss": 0.9913, + "learning_rate": 1.4722514628149535e-05, + "loss": 0.8916, "step": 12815 }, { - "epoch": 0.36367763904653805, + "epoch": 0.3631726600357052, "grad_norm": 0.0, - "learning_rate": 1.4707532338169897e-05, - "loss": 0.8967, + "learning_rate": 1.4721705610182771e-05, + "loss": 1.0961, "step": 12816 }, { - "epoch": 0.3637060158910329, + "epoch": 0.36320099747796764, "grad_norm": 0.0, - "learning_rate": 1.4706721447948158e-05, - "loss": 0.8577, + "learning_rate": 1.4720896552443327e-05, + "loss": 1.0215, "step": 12817 }, { - "epoch": 0.3637343927355278, + "epoch": 0.3632293349202301, "grad_norm": 0.0, - "learning_rate": 1.4705910517968533e-05, - "loss": 0.9577, + "learning_rate": 1.4720087454938014e-05, + "loss": 0.9657, "step": 12818 }, { - "epoch": 0.3637627695800227, + "epoch": 0.36325767236249257, "grad_norm": 0.0, - "learning_rate": 1.4705099548237869e-05, - "loss": 1.084, + "learning_rate": 1.4719278317673655e-05, + "loss": 0.9584, "step": 12819 }, { - "epoch": 0.3637911464245176, + "epoch": 0.363286009804755, "grad_norm": 0.0, - "learning_rate": 1.4704288538763019e-05, - "loss": 0.9008, + "learning_rate": 1.4718469140657061e-05, + "loss": 0.8662, "step": 12820 }, { - "epoch": 0.3638195232690125, + "epoch": 0.3633143472470175, "grad_norm": 0.0, - "learning_rate": 1.470347748955083e-05, - "loss": 0.8323, + "learning_rate": 1.471765992389505e-05, + "loss": 0.8845, "step": 12821 }, { - "epoch": 0.36384790011350737, + "epoch": 0.36334268468927994, "grad_norm": 0.0, - "learning_rate": 1.4702666400608157e-05, - "loss": 0.9642, + "learning_rate": 1.471685066739444e-05, + "loss": 0.9816, "step": 12822 }, { - "epoch": 0.3638762769580023, + "epoch": 0.36337102213154243, "grad_norm": 0.0, - "learning_rate": 1.4701855271941848e-05, - "loss": 0.9524, + "learning_rate": 1.4716041371162041e-05, + "loss": 1.018, "step": 12823 }, { - "epoch": 0.36390465380249715, + "epoch": 0.36339935957380487, "grad_norm": 0.0, - "learning_rate": 1.4701044103558757e-05, - "loss": 0.9164, + "learning_rate": 1.4715232035204678e-05, + "loss": 0.8882, "step": 12824 }, { - "epoch": 0.36393303064699206, + "epoch": 0.3634276970160673, "grad_norm": 0.0, - "learning_rate": 1.4700232895465733e-05, - "loss": 0.8227, + "learning_rate": 1.4714422659529161e-05, + "loss": 0.8708, "step": 12825 }, { - "epoch": 0.3639614074914869, + "epoch": 0.3634560344583298, "grad_norm": 0.0, - "learning_rate": 1.4699421647669634e-05, - "loss": 0.9102, + "learning_rate": 1.4713613244142315e-05, + "loss": 0.9931, "step": 12826 }, { - "epoch": 0.36398978433598184, + "epoch": 0.36348437190059224, "grad_norm": 0.0, - "learning_rate": 1.4698610360177306e-05, - "loss": 0.8471, + "learning_rate": 1.471280378905095e-05, + "loss": 0.8696, "step": 12827 }, { - "epoch": 0.36401816118047675, + "epoch": 0.36351270934285473, "grad_norm": 0.0, - "learning_rate": 1.4697799032995608e-05, - "loss": 0.848, + "learning_rate": 1.4711994294261893e-05, + "loss": 0.9864, "step": 12828 }, { - "epoch": 0.3640465380249716, + "epoch": 0.3635410467851172, "grad_norm": 0.0, - "learning_rate": 1.4696987666131388e-05, - "loss": 0.9479, + "learning_rate": 1.4711184759781956e-05, + "loss": 0.8573, "step": 12829 }, { - "epoch": 0.3640749148694665, + "epoch": 0.3635693842273796, "grad_norm": 0.0, - "learning_rate": 1.4696176259591501e-05, - "loss": 0.8852, + "learning_rate": 1.471037518561796e-05, + "loss": 0.9368, "step": 12830 }, { - "epoch": 0.3641032917139614, + "epoch": 0.3635977216696421, "grad_norm": 0.0, - "learning_rate": 1.4695364813382806e-05, - "loss": 0.9702, + "learning_rate": 1.4709565571776723e-05, + "loss": 0.9754, "step": 12831 }, { - "epoch": 0.3641316685584563, + "epoch": 0.36362605911190454, "grad_norm": 0.0, - "learning_rate": 1.4694553327512151e-05, - "loss": 0.936, + "learning_rate": 1.470875591826507e-05, + "loss": 1.0101, "step": 12832 }, { - "epoch": 0.3641600454029512, + "epoch": 0.36365439655416704, "grad_norm": 0.0, - "learning_rate": 1.4693741801986392e-05, - "loss": 0.9384, + "learning_rate": 1.4707946225089815e-05, + "loss": 0.8791, "step": 12833 }, { - "epoch": 0.3641884222474461, + "epoch": 0.3636827339964295, "grad_norm": 0.0, - "learning_rate": 1.4692930236812389e-05, - "loss": 1.0175, + "learning_rate": 1.4707136492257783e-05, + "loss": 0.9584, "step": 12834 }, { - "epoch": 0.364216799091941, + "epoch": 0.36371107143869197, "grad_norm": 0.0, - "learning_rate": 1.469211863199699e-05, - "loss": 1.0099, + "learning_rate": 1.470632671977579e-05, + "loss": 0.8811, "step": 12835 }, { - "epoch": 0.36424517593643585, + "epoch": 0.3637394088809544, "grad_norm": 0.0, - "learning_rate": 1.4691306987547054e-05, - "loss": 0.9156, + "learning_rate": 1.470551690765066e-05, + "loss": 0.9491, "step": 12836 }, { - "epoch": 0.36427355278093076, + "epoch": 0.36376774632321685, "grad_norm": 0.0, - "learning_rate": 1.4690495303469436e-05, - "loss": 0.8938, + "learning_rate": 1.4704707055889213e-05, + "loss": 0.9027, "step": 12837 }, { - "epoch": 0.3643019296254257, + "epoch": 0.36379608376547934, "grad_norm": 0.0, - "learning_rate": 1.4689683579770994e-05, - "loss": 0.8361, + "learning_rate": 1.4703897164498276e-05, + "loss": 0.8453, "step": 12838 }, { - "epoch": 0.36433030646992054, + "epoch": 0.3638244212077418, "grad_norm": 0.0, - "learning_rate": 1.4688871816458585e-05, - "loss": 0.958, + "learning_rate": 1.470308723348466e-05, + "loss": 0.8604, "step": 12839 }, { - "epoch": 0.36435868331441545, + "epoch": 0.36385275865000427, "grad_norm": 0.0, - "learning_rate": 1.4688060013539068e-05, - "loss": 1.035, + "learning_rate": 1.4702277262855198e-05, + "loss": 0.9952, "step": 12840 }, { - "epoch": 0.3643870601589103, + "epoch": 0.3638810960922667, "grad_norm": 0.0, - "learning_rate": 1.4687248171019293e-05, - "loss": 0.9662, + "learning_rate": 1.4701467252616709e-05, + "loss": 0.9214, "step": 12841 }, { - "epoch": 0.36441543700340523, + "epoch": 0.36390943353452915, "grad_norm": 0.0, - "learning_rate": 1.4686436288906124e-05, - "loss": 0.9041, + "learning_rate": 1.4700657202776014e-05, + "loss": 0.9536, "step": 12842 }, { - "epoch": 0.3644438138479001, + "epoch": 0.36393777097679164, "grad_norm": 0.0, - "learning_rate": 1.4685624367206414e-05, - "loss": 0.9714, + "learning_rate": 1.4699847113339935e-05, + "loss": 0.881, "step": 12843 }, { - "epoch": 0.364472190692395, + "epoch": 0.3639661084190541, "grad_norm": 0.0, - "learning_rate": 1.468481240592703e-05, - "loss": 0.9995, + "learning_rate": 1.46990369843153e-05, + "loss": 0.9175, "step": 12844 }, { - "epoch": 0.3645005675368899, + "epoch": 0.3639944458613166, "grad_norm": 0.0, - "learning_rate": 1.4684000405074818e-05, - "loss": 0.8637, + "learning_rate": 1.4698226815708934e-05, + "loss": 0.9595, "step": 12845 }, { - "epoch": 0.3645289443813848, + "epoch": 0.364022783303579, "grad_norm": 0.0, - "learning_rate": 1.4683188364656648e-05, - "loss": 1.0257, + "learning_rate": 1.4697416607527659e-05, + "loss": 0.895, "step": 12846 }, { - "epoch": 0.3645573212258797, + "epoch": 0.3640511207458415, "grad_norm": 0.0, - "learning_rate": 1.4682376284679376e-05, - "loss": 0.8682, + "learning_rate": 1.4696606359778299e-05, + "loss": 0.974, "step": 12847 }, { - "epoch": 0.36458569807037455, + "epoch": 0.36407945818810394, "grad_norm": 0.0, - "learning_rate": 1.4681564165149863e-05, - "loss": 0.9157, + "learning_rate": 1.4695796072467677e-05, + "loss": 0.8702, "step": 12848 }, { - "epoch": 0.36461407491486947, + "epoch": 0.3641077956303664, "grad_norm": 0.0, - "learning_rate": 1.4680752006074961e-05, - "loss": 1.0112, + "learning_rate": 1.4694985745602623e-05, + "loss": 1.0531, "step": 12849 }, { - "epoch": 0.3646424517593644, + "epoch": 0.3641361330726289, "grad_norm": 0.0, - "learning_rate": 1.4679939807461541e-05, - "loss": 0.8735, + "learning_rate": 1.469417537918996e-05, + "loss": 0.7996, "step": 12850 }, { - "epoch": 0.36467082860385924, + "epoch": 0.3641644705148913, "grad_norm": 0.0, - "learning_rate": 1.4679127569316455e-05, - "loss": 1.0056, + "learning_rate": 1.4693364973236515e-05, + "loss": 0.9209, "step": 12851 }, { - "epoch": 0.36469920544835416, + "epoch": 0.3641928079571538, "grad_norm": 0.0, - "learning_rate": 1.4678315291646572e-05, - "loss": 0.9802, + "learning_rate": 1.4692554527749112e-05, + "loss": 0.9277, "step": 12852 }, { - "epoch": 0.364727582292849, + "epoch": 0.36422114539941625, "grad_norm": 0.0, - "learning_rate": 1.4677502974458746e-05, - "loss": 1.063, + "learning_rate": 1.4691744042734581e-05, + "loss": 0.9734, "step": 12853 }, { - "epoch": 0.36475595913734393, + "epoch": 0.3642494828416787, "grad_norm": 0.0, - "learning_rate": 1.4676690617759845e-05, - "loss": 1.0199, + "learning_rate": 1.469093351819975e-05, + "loss": 0.9396, "step": 12854 }, { - "epoch": 0.3647843359818388, + "epoch": 0.3642778202839412, "grad_norm": 0.0, - "learning_rate": 1.4675878221556727e-05, - "loss": 0.8524, + "learning_rate": 1.4690122954151443e-05, + "loss": 0.9035, "step": 12855 }, { - "epoch": 0.3648127128263337, + "epoch": 0.3643061577262036, "grad_norm": 0.0, - "learning_rate": 1.4675065785856256e-05, - "loss": 1.109, + "learning_rate": 1.4689312350596488e-05, + "loss": 0.9053, "step": 12856 }, { - "epoch": 0.3648410896708286, + "epoch": 0.3643344951684661, "grad_norm": 0.0, - "learning_rate": 1.4674253310665294e-05, - "loss": 0.9111, + "learning_rate": 1.4688501707541711e-05, + "loss": 0.8716, "step": 12857 }, { - "epoch": 0.3648694665153235, + "epoch": 0.36436283261072855, "grad_norm": 0.0, - "learning_rate": 1.4673440795990705e-05, - "loss": 0.904, + "learning_rate": 1.4687691024993947e-05, + "loss": 0.9087, "step": 12858 }, { - "epoch": 0.3648978433598184, + "epoch": 0.36439117005299104, "grad_norm": 0.0, - "learning_rate": 1.4672628241839349e-05, - "loss": 0.927, + "learning_rate": 1.4686880302960021e-05, + "loss": 0.9071, "step": 12859 }, { - "epoch": 0.36492622020431326, + "epoch": 0.3644195074952535, "grad_norm": 0.0, - "learning_rate": 1.4671815648218092e-05, - "loss": 0.988, + "learning_rate": 1.4686069541446757e-05, + "loss": 0.9107, "step": 12860 }, { - "epoch": 0.36495459704880817, + "epoch": 0.3644478449375159, "grad_norm": 0.0, - "learning_rate": 1.4671003015133803e-05, - "loss": 0.916, + "learning_rate": 1.4685258740460995e-05, + "loss": 0.9247, "step": 12861 }, { - "epoch": 0.3649829738933031, + "epoch": 0.3644761823797784, "grad_norm": 0.0, - "learning_rate": 1.4670190342593338e-05, - "loss": 0.8822, + "learning_rate": 1.4684447900009557e-05, + "loss": 0.9647, "step": 12862 }, { - "epoch": 0.36501135073779795, + "epoch": 0.36450451982204085, "grad_norm": 0.0, - "learning_rate": 1.4669377630603565e-05, - "loss": 0.9355, + "learning_rate": 1.4683637020099273e-05, + "loss": 0.8997, "step": 12863 }, { - "epoch": 0.36503972758229286, + "epoch": 0.36453285726430334, "grad_norm": 0.0, - "learning_rate": 1.466856487917135e-05, - "loss": 0.9453, + "learning_rate": 1.4682826100736973e-05, + "loss": 0.9349, "step": 12864 }, { - "epoch": 0.3650681044267877, + "epoch": 0.3645611947065658, "grad_norm": 0.0, - "learning_rate": 1.466775208830356e-05, - "loss": 1.0282, + "learning_rate": 1.4682015141929495e-05, + "loss": 0.9373, "step": 12865 }, { - "epoch": 0.36509648127128264, + "epoch": 0.3645895321488282, "grad_norm": 0.0, - "learning_rate": 1.4666939258007054e-05, - "loss": 0.9493, + "learning_rate": 1.4681204143683663e-05, + "loss": 1.0156, "step": 12866 }, { - "epoch": 0.36512485811577755, + "epoch": 0.3646178695910907, "grad_norm": 0.0, - "learning_rate": 1.4666126388288703e-05, - "loss": 0.9281, + "learning_rate": 1.4680393106006312e-05, + "loss": 0.8201, "step": 12867 }, { - "epoch": 0.3651532349602724, + "epoch": 0.36464620703335315, "grad_norm": 0.0, - "learning_rate": 1.4665313479155375e-05, - "loss": 0.9637, + "learning_rate": 1.4679582028904269e-05, + "loss": 0.8941, "step": 12868 }, { - "epoch": 0.3651816118047673, + "epoch": 0.36467454447561565, "grad_norm": 0.0, - "learning_rate": 1.4664500530613932e-05, - "loss": 0.8991, + "learning_rate": 1.467877091238437e-05, + "loss": 0.9572, "step": 12869 }, { - "epoch": 0.3652099886492622, + "epoch": 0.3647028819178781, "grad_norm": 0.0, - "learning_rate": 1.4663687542671245e-05, - "loss": 0.9779, + "learning_rate": 1.4677959756453443e-05, + "loss": 0.9438, "step": 12870 }, { - "epoch": 0.3652383654937571, + "epoch": 0.3647312193601406, "grad_norm": 0.0, - "learning_rate": 1.4662874515334178e-05, - "loss": 0.9846, + "learning_rate": 1.4677148561118328e-05, + "loss": 1.0001, "step": 12871 }, { - "epoch": 0.36526674233825196, + "epoch": 0.364759556802403, "grad_norm": 0.0, - "learning_rate": 1.4662061448609604e-05, - "loss": 0.9615, + "learning_rate": 1.4676337326385852e-05, + "loss": 0.965, "step": 12872 }, { - "epoch": 0.3652951191827469, + "epoch": 0.36478789424466546, "grad_norm": 0.0, - "learning_rate": 1.4661248342504383e-05, - "loss": 0.9031, + "learning_rate": 1.4675526052262853e-05, + "loss": 1.0119, "step": 12873 }, { - "epoch": 0.3653234960272418, + "epoch": 0.36481623168692795, "grad_norm": 0.0, - "learning_rate": 1.4660435197025391e-05, - "loss": 0.8938, + "learning_rate": 1.467471473875616e-05, + "loss": 0.9844, "step": 12874 }, { - "epoch": 0.36535187287173665, + "epoch": 0.3648445691291904, "grad_norm": 0.0, - "learning_rate": 1.4659622012179493e-05, - "loss": 0.9798, + "learning_rate": 1.467390338587261e-05, + "loss": 0.9213, "step": 12875 }, { - "epoch": 0.36538024971623156, + "epoch": 0.3648729065714529, "grad_norm": 0.0, - "learning_rate": 1.4658808787973556e-05, - "loss": 0.8718, + "learning_rate": 1.4673091993619033e-05, + "loss": 1.0009, "step": 12876 }, { - "epoch": 0.3654086265607264, + "epoch": 0.3649012440137153, "grad_norm": 0.0, - "learning_rate": 1.4657995524414453e-05, - "loss": 0.9139, + "learning_rate": 1.4672280562002266e-05, + "loss": 0.9585, "step": 12877 }, { - "epoch": 0.36543700340522134, + "epoch": 0.36492958145597776, "grad_norm": 0.0, - "learning_rate": 1.4657182221509051e-05, - "loss": 1.0076, + "learning_rate": 1.4671469091029149e-05, + "loss": 0.9786, "step": 12878 }, { - "epoch": 0.36546538024971625, + "epoch": 0.36495791889824025, "grad_norm": 0.0, - "learning_rate": 1.4656368879264225e-05, - "loss": 0.9622, + "learning_rate": 1.4670657580706511e-05, + "loss": 0.8563, "step": 12879 }, { - "epoch": 0.3654937570942111, + "epoch": 0.3649862563405027, "grad_norm": 0.0, - "learning_rate": 1.4655555497686837e-05, - "loss": 0.9373, + "learning_rate": 1.4669846031041193e-05, + "loss": 0.9567, "step": 12880 }, { - "epoch": 0.36552213393870603, + "epoch": 0.3650145937827652, "grad_norm": 0.0, - "learning_rate": 1.4654742076783767e-05, - "loss": 0.8267, + "learning_rate": 1.4669034442040021e-05, + "loss": 0.9445, "step": 12881 }, { - "epoch": 0.3655505107832009, + "epoch": 0.3650429312250276, "grad_norm": 0.0, - "learning_rate": 1.465392861656188e-05, - "loss": 0.8947, + "learning_rate": 1.4668222813709844e-05, + "loss": 0.8891, "step": 12882 }, { - "epoch": 0.3655788876276958, + "epoch": 0.3650712686672901, "grad_norm": 0.0, - "learning_rate": 1.4653115117028045e-05, - "loss": 1.0233, + "learning_rate": 1.466741114605749e-05, + "loss": 0.9662, "step": 12883 }, { - "epoch": 0.3656072644721907, + "epoch": 0.36509960610955255, "grad_norm": 0.0, - "learning_rate": 1.4652301578189141e-05, - "loss": 0.9846, + "learning_rate": 1.46665994390898e-05, + "loss": 1.0122, "step": 12884 }, { - "epoch": 0.3656356413166856, + "epoch": 0.365127943551815, "grad_norm": 0.0, - "learning_rate": 1.4651488000052036e-05, - "loss": 0.8945, + "learning_rate": 1.4665787692813608e-05, + "loss": 1.0587, "step": 12885 }, { - "epoch": 0.3656640181611805, + "epoch": 0.3651562809940775, "grad_norm": 0.0, - "learning_rate": 1.4650674382623606e-05, - "loss": 0.9693, + "learning_rate": 1.4664975907235757e-05, + "loss": 0.8702, "step": 12886 }, { - "epoch": 0.36569239500567535, + "epoch": 0.3651846184363399, "grad_norm": 0.0, - "learning_rate": 1.4649860725910716e-05, - "loss": 0.9135, + "learning_rate": 1.466416408236308e-05, + "loss": 0.8268, "step": 12887 }, { - "epoch": 0.36572077185017027, + "epoch": 0.3652129558786024, "grad_norm": 0.0, - "learning_rate": 1.4649047029920245e-05, - "loss": 0.9866, + "learning_rate": 1.4663352218202417e-05, + "loss": 0.9541, "step": 12888 }, { - "epoch": 0.3657491486946651, + "epoch": 0.36524129332086486, "grad_norm": 0.0, - "learning_rate": 1.4648233294659066e-05, - "loss": 0.9877, + "learning_rate": 1.4662540314760608e-05, + "loss": 0.9374, "step": 12889 }, { - "epoch": 0.36577752553916004, + "epoch": 0.3652696307631273, "grad_norm": 0.0, - "learning_rate": 1.4647419520134047e-05, - "loss": 0.9714, + "learning_rate": 1.4661728372044486e-05, + "loss": 1.017, "step": 12890 }, { - "epoch": 0.36580590238365496, + "epoch": 0.3652979682053898, "grad_norm": 0.0, - "learning_rate": 1.464660570635207e-05, - "loss": 1.0271, + "learning_rate": 1.46609163900609e-05, + "loss": 0.9117, "step": 12891 }, { - "epoch": 0.3658342792281498, + "epoch": 0.3653263056476522, "grad_norm": 0.0, - "learning_rate": 1.4645791853320005e-05, - "loss": 0.8931, + "learning_rate": 1.4660104368816681e-05, + "loss": 0.9539, "step": 12892 }, { - "epoch": 0.36586265607264473, + "epoch": 0.3653546430899147, "grad_norm": 0.0, - "learning_rate": 1.4644977961044725e-05, - "loss": 0.9916, + "learning_rate": 1.4659292308318673e-05, + "loss": 0.9224, "step": 12893 }, { - "epoch": 0.3658910329171396, + "epoch": 0.36538298053217716, "grad_norm": 0.0, - "learning_rate": 1.4644164029533113e-05, - "loss": 0.8785, + "learning_rate": 1.4658480208573717e-05, + "loss": 0.9149, "step": 12894 }, { - "epoch": 0.3659194097616345, + "epoch": 0.36541131797443965, "grad_norm": 0.0, - "learning_rate": 1.4643350058792036e-05, - "loss": 1.0405, + "learning_rate": 1.4657668069588654e-05, + "loss": 1.0163, "step": 12895 }, { - "epoch": 0.3659477866061294, + "epoch": 0.3654396554167021, "grad_norm": 0.0, - "learning_rate": 1.464253604882837e-05, - "loss": 0.8698, + "learning_rate": 1.4656855891370318e-05, + "loss": 0.9062, "step": 12896 }, { - "epoch": 0.3659761634506243, + "epoch": 0.36546799285896453, "grad_norm": 0.0, - "learning_rate": 1.4641721999648994e-05, - "loss": 0.9104, + "learning_rate": 1.4656043673925557e-05, + "loss": 0.9622, "step": 12897 }, { - "epoch": 0.3660045402951192, + "epoch": 0.365496330301227, "grad_norm": 0.0, - "learning_rate": 1.4640907911260787e-05, - "loss": 0.9662, + "learning_rate": 1.4655231417261213e-05, + "loss": 1.0123, "step": 12898 }, { - "epoch": 0.36603291713961406, + "epoch": 0.36552466774348946, "grad_norm": 0.0, - "learning_rate": 1.4640093783670619e-05, - "loss": 0.9013, + "learning_rate": 1.4654419121384126e-05, + "loss": 0.9024, "step": 12899 }, { - "epoch": 0.36606129398410897, + "epoch": 0.36555300518575196, "grad_norm": 0.0, - "learning_rate": 1.463927961688537e-05, - "loss": 1.0025, + "learning_rate": 1.465360678630114e-05, + "loss": 0.8857, "step": 12900 }, { - "epoch": 0.3660896708286039, + "epoch": 0.3655813426280144, "grad_norm": 0.0, - "learning_rate": 1.463846541091192e-05, - "loss": 0.976, + "learning_rate": 1.4652794412019094e-05, + "loss": 1.0058, "step": 12901 }, { - "epoch": 0.36611804767309875, + "epoch": 0.36560968007027683, "grad_norm": 0.0, - "learning_rate": 1.4637651165757143e-05, - "loss": 0.9989, + "learning_rate": 1.4651981998544833e-05, + "loss": 1.055, "step": 12902 }, { - "epoch": 0.36614642451759366, + "epoch": 0.3656380175125393, "grad_norm": 0.0, - "learning_rate": 1.4636836881427918e-05, - "loss": 0.9379, + "learning_rate": 1.46511695458852e-05, + "loss": 0.8933, "step": 12903 }, { - "epoch": 0.3661748013620885, + "epoch": 0.36566635495480176, "grad_norm": 0.0, - "learning_rate": 1.4636022557931124e-05, - "loss": 0.9778, + "learning_rate": 1.465035705404704e-05, + "loss": 0.9442, "step": 12904 }, { - "epoch": 0.36620317820658344, + "epoch": 0.36569469239706426, "grad_norm": 0.0, - "learning_rate": 1.4635208195273638e-05, - "loss": 1.0027, + "learning_rate": 1.4649544523037193e-05, + "loss": 0.8465, "step": 12905 }, { - "epoch": 0.3662315550510783, + "epoch": 0.3657230298393267, "grad_norm": 0.0, - "learning_rate": 1.4634393793462341e-05, - "loss": 0.8889, + "learning_rate": 1.4648731952862506e-05, + "loss": 0.8714, "step": 12906 }, { - "epoch": 0.3662599318955732, + "epoch": 0.3657513672815892, "grad_norm": 0.0, - "learning_rate": 1.4633579352504109e-05, - "loss": 0.9816, + "learning_rate": 1.4647919343529825e-05, + "loss": 0.9408, "step": 12907 }, { - "epoch": 0.3662883087400681, + "epoch": 0.36577970472385163, "grad_norm": 0.0, - "learning_rate": 1.4632764872405827e-05, - "loss": 0.937, + "learning_rate": 1.4647106695045996e-05, + "loss": 0.9174, "step": 12908 }, { - "epoch": 0.366316685584563, + "epoch": 0.36580804216611407, "grad_norm": 0.0, - "learning_rate": 1.4631950353174368e-05, - "loss": 0.8452, + "learning_rate": 1.4646294007417858e-05, + "loss": 0.9167, "step": 12909 }, { - "epoch": 0.3663450624290579, + "epoch": 0.36583637960837656, "grad_norm": 0.0, - "learning_rate": 1.463113579481662e-05, - "loss": 1.0046, + "learning_rate": 1.464548128065226e-05, + "loss": 0.8993, "step": 12910 }, { - "epoch": 0.36637343927355276, + "epoch": 0.365864717050639, "grad_norm": 0.0, - "learning_rate": 1.463032119733946e-05, - "loss": 0.9767, + "learning_rate": 1.464466851475605e-05, + "loss": 0.9527, "step": 12911 }, { - "epoch": 0.3664018161180477, + "epoch": 0.3658930544929015, "grad_norm": 0.0, - "learning_rate": 1.4629506560749766e-05, - "loss": 0.9122, + "learning_rate": 1.4643855709736071e-05, + "loss": 0.9128, "step": 12912 }, { - "epoch": 0.3664301929625426, + "epoch": 0.36592139193516393, "grad_norm": 0.0, - "learning_rate": 1.4628691885054423e-05, - "loss": 0.9273, + "learning_rate": 1.4643042865599174e-05, + "loss": 0.8878, "step": 12913 }, { - "epoch": 0.36645856980703745, + "epoch": 0.36594972937742637, "grad_norm": 0.0, - "learning_rate": 1.462787717026031e-05, - "loss": 0.8827, + "learning_rate": 1.4642229982352198e-05, + "loss": 1.0282, "step": 12914 }, { - "epoch": 0.36648694665153236, + "epoch": 0.36597806681968886, "grad_norm": 0.0, - "learning_rate": 1.4627062416374314e-05, - "loss": 0.925, + "learning_rate": 1.4641417060002e-05, + "loss": 1.0508, "step": 12915 }, { - "epoch": 0.3665153234960272, + "epoch": 0.3660064042619513, "grad_norm": 0.0, - "learning_rate": 1.462624762340331e-05, - "loss": 0.9576, + "learning_rate": 1.4640604098555418e-05, + "loss": 0.9784, "step": 12916 }, { - "epoch": 0.36654370034052214, + "epoch": 0.3660347417042138, "grad_norm": 0.0, - "learning_rate": 1.4625432791354187e-05, - "loss": 0.9695, + "learning_rate": 1.4639791098019307e-05, + "loss": 1.043, "step": 12917 }, { - "epoch": 0.36657207718501705, + "epoch": 0.36606307914647623, "grad_norm": 0.0, - "learning_rate": 1.4624617920233826e-05, - "loss": 0.8121, + "learning_rate": 1.463897805840051e-05, + "loss": 1.0098, "step": 12918 }, { - "epoch": 0.3666004540295119, + "epoch": 0.3660914165887387, "grad_norm": 0.0, - "learning_rate": 1.462380301004911e-05, - "loss": 0.9024, + "learning_rate": 1.4638164979705883e-05, + "loss": 0.936, "step": 12919 }, { - "epoch": 0.36662883087400683, + "epoch": 0.36611975403100117, "grad_norm": 0.0, - "learning_rate": 1.4622988060806917e-05, - "loss": 0.9734, + "learning_rate": 1.4637351861942266e-05, + "loss": 0.9373, "step": 12920 }, { - "epoch": 0.3666572077185017, + "epoch": 0.3661480914732636, "grad_norm": 0.0, - "learning_rate": 1.4622173072514141e-05, - "loss": 0.9552, + "learning_rate": 1.4636538705116516e-05, + "loss": 0.9632, "step": 12921 }, { - "epoch": 0.3666855845629966, + "epoch": 0.3661764289155261, "grad_norm": 0.0, - "learning_rate": 1.4621358045177658e-05, - "loss": 0.9864, + "learning_rate": 1.4635725509235474e-05, + "loss": 0.9008, "step": 12922 }, { - "epoch": 0.36671396140749146, + "epoch": 0.36620476635778854, "grad_norm": 0.0, - "learning_rate": 1.4620542978804357e-05, - "loss": 0.9946, + "learning_rate": 1.4634912274305996e-05, + "loss": 0.9646, "step": 12923 }, { - "epoch": 0.3667423382519864, + "epoch": 0.36623310380005103, "grad_norm": 0.0, - "learning_rate": 1.4619727873401122e-05, - "loss": 0.9641, + "learning_rate": 1.4634099000334932e-05, + "loss": 0.994, "step": 12924 }, { - "epoch": 0.3667707150964813, + "epoch": 0.36626144124231347, "grad_norm": 0.0, - "learning_rate": 1.461891272897484e-05, - "loss": 0.9528, + "learning_rate": 1.463328568732913e-05, + "loss": 0.9922, "step": 12925 }, { - "epoch": 0.36679909194097615, + "epoch": 0.3662897786845759, "grad_norm": 0.0, - "learning_rate": 1.4618097545532393e-05, - "loss": 0.9555, + "learning_rate": 1.4632472335295442e-05, + "loss": 0.8551, "step": 12926 }, { - "epoch": 0.36682746878547107, + "epoch": 0.3663181161268384, "grad_norm": 0.0, - "learning_rate": 1.4617282323080666e-05, - "loss": 0.958, + "learning_rate": 1.4631658944240723e-05, + "loss": 0.9613, "step": 12927 }, { - "epoch": 0.3668558456299659, + "epoch": 0.36634645356910084, "grad_norm": 0.0, - "learning_rate": 1.461646706162655e-05, - "loss": 0.876, + "learning_rate": 1.4630845514171818e-05, + "loss": 0.9258, "step": 12928 }, { - "epoch": 0.36688422247446084, + "epoch": 0.36637479101136333, "grad_norm": 0.0, - "learning_rate": 1.4615651761176928e-05, - "loss": 0.9509, + "learning_rate": 1.4630032045095582e-05, + "loss": 1.014, "step": 12929 }, { - "epoch": 0.36691259931895576, + "epoch": 0.36640312845362577, "grad_norm": 0.0, - "learning_rate": 1.4614836421738692e-05, - "loss": 0.9032, + "learning_rate": 1.4629218537018866e-05, + "loss": 0.9991, "step": 12930 }, { - "epoch": 0.3669409761634506, + "epoch": 0.36643146589588826, "grad_norm": 0.0, - "learning_rate": 1.461402104331872e-05, - "loss": 0.9235, + "learning_rate": 1.4628404989948522e-05, + "loss": 0.9452, "step": 12931 }, { - "epoch": 0.36696935300794553, + "epoch": 0.3664598033381507, "grad_norm": 0.0, - "learning_rate": 1.4613205625923907e-05, - "loss": 0.9059, + "learning_rate": 1.4627591403891405e-05, + "loss": 1.0013, "step": 12932 }, { - "epoch": 0.3669977298524404, + "epoch": 0.36648814078041314, "grad_norm": 0.0, - "learning_rate": 1.461239016956114e-05, - "loss": 0.7857, + "learning_rate": 1.4626777778854372e-05, + "loss": 0.8664, "step": 12933 }, { - "epoch": 0.3670261066969353, + "epoch": 0.36651647822267563, "grad_norm": 0.0, - "learning_rate": 1.4611574674237302e-05, - "loss": 0.9603, + "learning_rate": 1.4625964114844266e-05, + "loss": 0.9023, "step": 12934 }, { - "epoch": 0.36705448354143017, + "epoch": 0.3665448156649381, "grad_norm": 0.0, - "learning_rate": 1.461075913995929e-05, - "loss": 0.9212, + "learning_rate": 1.4625150411867948e-05, + "loss": 0.9386, "step": 12935 }, { - "epoch": 0.3670828603859251, + "epoch": 0.36657315310720057, "grad_norm": 0.0, - "learning_rate": 1.4609943566733988e-05, - "loss": 0.9847, + "learning_rate": 1.4624336669932268e-05, + "loss": 0.9152, "step": 12936 }, { - "epoch": 0.36711123723042, + "epoch": 0.366601490549463, "grad_norm": 0.0, - "learning_rate": 1.460912795456828e-05, - "loss": 0.8544, + "learning_rate": 1.4623522889044089e-05, + "loss": 0.9654, "step": 12937 }, { - "epoch": 0.36713961407491486, + "epoch": 0.36662982799172544, "grad_norm": 0.0, - "learning_rate": 1.4608312303469067e-05, - "loss": 1.0172, + "learning_rate": 1.4622709069210257e-05, + "loss": 1.0274, "step": 12938 }, { - "epoch": 0.36716799091940977, + "epoch": 0.36665816543398794, "grad_norm": 0.0, - "learning_rate": 1.460749661344323e-05, - "loss": 0.8598, + "learning_rate": 1.4621895210437627e-05, + "loss": 0.866, "step": 12939 }, { - "epoch": 0.36719636776390463, + "epoch": 0.3666865028762504, "grad_norm": 0.0, - "learning_rate": 1.4606680884497664e-05, - "loss": 0.9561, + "learning_rate": 1.4621081312733061e-05, + "loss": 0.9655, "step": 12940 }, { - "epoch": 0.36722474460839954, + "epoch": 0.36671484031851287, "grad_norm": 0.0, - "learning_rate": 1.4605865116639255e-05, - "loss": 1.0012, + "learning_rate": 1.4620267376103407e-05, + "loss": 0.8601, "step": 12941 }, { - "epoch": 0.36725312145289446, + "epoch": 0.3667431777607753, "grad_norm": 0.0, - "learning_rate": 1.4605049309874899e-05, - "loss": 0.863, + "learning_rate": 1.461945340055553e-05, + "loss": 0.885, "step": 12942 }, { - "epoch": 0.3672814982973893, + "epoch": 0.3667715152030378, "grad_norm": 0.0, - "learning_rate": 1.4604233464211484e-05, - "loss": 0.8333, + "learning_rate": 1.4618639386096278e-05, + "loss": 0.8965, "step": 12943 }, { - "epoch": 0.36730987514188423, + "epoch": 0.36679985264530024, "grad_norm": 0.0, - "learning_rate": 1.4603417579655901e-05, - "loss": 0.848, + "learning_rate": 1.4617825332732513e-05, + "loss": 0.9103, "step": 12944 }, { - "epoch": 0.3673382519863791, + "epoch": 0.3668281900875627, "grad_norm": 0.0, - "learning_rate": 1.4602601656215041e-05, - "loss": 0.9441, + "learning_rate": 1.4617011240471093e-05, + "loss": 0.9622, "step": 12945 }, { - "epoch": 0.367366628830874, + "epoch": 0.36685652752982517, "grad_norm": 0.0, - "learning_rate": 1.4601785693895801e-05, - "loss": 0.891, + "learning_rate": 1.4616197109318871e-05, + "loss": 0.8273, "step": 12946 }, { - "epoch": 0.3673950056753689, + "epoch": 0.3668848649720876, "grad_norm": 0.0, - "learning_rate": 1.4600969692705067e-05, - "loss": 0.9686, + "learning_rate": 1.4615382939282702e-05, + "loss": 0.8863, "step": 12947 }, { - "epoch": 0.3674233825198638, + "epoch": 0.3669132024143501, "grad_norm": 0.0, - "learning_rate": 1.4600153652649737e-05, - "loss": 0.8912, + "learning_rate": 1.4614568730369454e-05, + "loss": 0.99, "step": 12948 }, { - "epoch": 0.3674517593643587, + "epoch": 0.36694153985661254, "grad_norm": 0.0, - "learning_rate": 1.4599337573736703e-05, - "loss": 0.8712, + "learning_rate": 1.4613754482585978e-05, + "loss": 0.8518, "step": 12949 }, { - "epoch": 0.36748013620885356, + "epoch": 0.366969877298875, "grad_norm": 0.0, - "learning_rate": 1.4598521455972857e-05, - "loss": 0.9627, + "learning_rate": 1.4612940195939136e-05, + "loss": 0.933, "step": 12950 }, { - "epoch": 0.3675085130533485, + "epoch": 0.3669982147411375, "grad_norm": 0.0, - "learning_rate": 1.4597705299365094e-05, - "loss": 0.8785, + "learning_rate": 1.4612125870435785e-05, + "loss": 0.8791, "step": 12951 }, { - "epoch": 0.36753688989784333, + "epoch": 0.3670265521833999, "grad_norm": 0.0, - "learning_rate": 1.4596889103920306e-05, - "loss": 0.9261, + "learning_rate": 1.4611311506082784e-05, + "loss": 0.941, "step": 12952 }, { - "epoch": 0.36756526674233825, + "epoch": 0.3670548896256624, "grad_norm": 0.0, - "learning_rate": 1.459607286964539e-05, - "loss": 0.8728, + "learning_rate": 1.4610497102886995e-05, + "loss": 0.9507, "step": 12953 }, { - "epoch": 0.36759364358683316, + "epoch": 0.36708322706792484, "grad_norm": 0.0, - "learning_rate": 1.459525659654724e-05, - "loss": 0.9337, + "learning_rate": 1.4609682660855277e-05, + "loss": 0.9632, "step": 12954 }, { - "epoch": 0.367622020431328, + "epoch": 0.36711156451018734, "grad_norm": 0.0, - "learning_rate": 1.4594440284632754e-05, - "loss": 0.9398, + "learning_rate": 1.4608868179994489e-05, + "loss": 0.957, "step": 12955 }, { - "epoch": 0.36765039727582294, + "epoch": 0.3671399019524498, "grad_norm": 0.0, - "learning_rate": 1.4593623933908822e-05, - "loss": 0.9554, + "learning_rate": 1.4608053660311495e-05, + "loss": 0.866, "step": 12956 }, { - "epoch": 0.3676787741203178, + "epoch": 0.3671682393947122, "grad_norm": 0.0, - "learning_rate": 1.4592807544382339e-05, - "loss": 0.8775, + "learning_rate": 1.4607239101813153e-05, + "loss": 0.8913, "step": 12957 }, { - "epoch": 0.3677071509648127, + "epoch": 0.3671965768369747, "grad_norm": 0.0, - "learning_rate": 1.459199111606021e-05, - "loss": 0.9851, + "learning_rate": 1.4606424504506325e-05, + "loss": 1.0289, "step": 12958 }, { - "epoch": 0.3677355278093076, + "epoch": 0.36722491427923715, "grad_norm": 0.0, - "learning_rate": 1.4591174648949323e-05, - "loss": 0.9461, + "learning_rate": 1.4605609868397874e-05, + "loss": 0.927, "step": 12959 }, { - "epoch": 0.3677639046538025, + "epoch": 0.36725325172149964, "grad_norm": 0.0, - "learning_rate": 1.4590358143056575e-05, - "loss": 1.0164, + "learning_rate": 1.4604795193494659e-05, + "loss": 0.9213, "step": 12960 }, { - "epoch": 0.3677922814982974, + "epoch": 0.3672815891637621, "grad_norm": 0.0, - "learning_rate": 1.4589541598388865e-05, - "loss": 0.9271, + "learning_rate": 1.460398047980354e-05, + "loss": 0.8844, "step": 12961 }, { - "epoch": 0.36782065834279226, + "epoch": 0.3673099266060245, "grad_norm": 0.0, - "learning_rate": 1.4588725014953096e-05, - "loss": 0.8732, + "learning_rate": 1.4603165727331392e-05, + "loss": 0.9505, "step": 12962 }, { - "epoch": 0.3678490351872872, + "epoch": 0.367338264048287, "grad_norm": 0.0, - "learning_rate": 1.4587908392756159e-05, - "loss": 0.863, + "learning_rate": 1.4602350936085066e-05, + "loss": 0.8941, "step": 12963 }, { - "epoch": 0.3678774120317821, + "epoch": 0.36736660149054945, "grad_norm": 0.0, - "learning_rate": 1.458709173180495e-05, - "loss": 0.9364, + "learning_rate": 1.4601536106071428e-05, + "loss": 0.9351, "step": 12964 }, { - "epoch": 0.36790578887627695, + "epoch": 0.36739493893281194, "grad_norm": 0.0, - "learning_rate": 1.4586275032106373e-05, - "loss": 0.9398, + "learning_rate": 1.4600721237297344e-05, + "loss": 0.8546, "step": 12965 }, { - "epoch": 0.36793416572077187, + "epoch": 0.3674232763750744, "grad_norm": 0.0, - "learning_rate": 1.4585458293667327e-05, - "loss": 1.0085, + "learning_rate": 1.4599906329769678e-05, + "loss": 0.9717, "step": 12966 }, { - "epoch": 0.3679625425652667, + "epoch": 0.3674516138173368, "grad_norm": 0.0, - "learning_rate": 1.4584641516494706e-05, - "loss": 0.8472, + "learning_rate": 1.459909138349529e-05, + "loss": 0.8861, "step": 12967 }, { - "epoch": 0.36799091940976164, + "epoch": 0.3674799512595993, "grad_norm": 0.0, - "learning_rate": 1.4583824700595411e-05, - "loss": 0.9815, + "learning_rate": 1.4598276398481046e-05, + "loss": 0.7992, "step": 12968 }, { - "epoch": 0.3680192962542565, + "epoch": 0.36750828870186175, "grad_norm": 0.0, - "learning_rate": 1.4583007845976345e-05, - "loss": 0.826, + "learning_rate": 1.4597461374733817e-05, + "loss": 0.9258, "step": 12969 }, { - "epoch": 0.3680476730987514, + "epoch": 0.36753662614412425, "grad_norm": 0.0, - "learning_rate": 1.4582190952644407e-05, - "loss": 0.9597, + "learning_rate": 1.4596646312260462e-05, + "loss": 0.995, "step": 12970 }, { - "epoch": 0.36807604994324633, + "epoch": 0.3675649635863867, "grad_norm": 0.0, - "learning_rate": 1.4581374020606492e-05, - "loss": 0.9495, + "learning_rate": 1.459583121106785e-05, + "loss": 0.9924, "step": 12971 }, { - "epoch": 0.3681044267877412, + "epoch": 0.3675933010286492, "grad_norm": 0.0, - "learning_rate": 1.4580557049869507e-05, - "loss": 1.0104, + "learning_rate": 1.459501607116284e-05, + "loss": 0.9704, "step": 12972 }, { - "epoch": 0.3681328036322361, + "epoch": 0.3676216384709116, "grad_norm": 0.0, - "learning_rate": 1.4579740040440351e-05, - "loss": 0.9424, + "learning_rate": 1.4594200892552308e-05, + "loss": 0.9542, "step": 12973 }, { - "epoch": 0.36816118047673096, + "epoch": 0.36764997591317405, "grad_norm": 0.0, - "learning_rate": 1.4578922992325924e-05, - "loss": 1.0191, + "learning_rate": 1.4593385675243113e-05, + "loss": 1.0569, "step": 12974 }, { - "epoch": 0.3681895573212259, + "epoch": 0.36767831335543655, "grad_norm": 0.0, - "learning_rate": 1.4578105905533126e-05, - "loss": 0.8929, + "learning_rate": 1.4592570419242126e-05, + "loss": 0.9545, "step": 12975 }, { - "epoch": 0.3682179341657208, + "epoch": 0.367706650797699, "grad_norm": 0.0, - "learning_rate": 1.4577288780068868e-05, - "loss": 0.8642, + "learning_rate": 1.4591755124556214e-05, + "loss": 0.9023, "step": 12976 }, { - "epoch": 0.36824631101021565, + "epoch": 0.3677349882399615, "grad_norm": 0.0, - "learning_rate": 1.4576471615940042e-05, - "loss": 0.9149, + "learning_rate": 1.459093979119224e-05, + "loss": 0.9086, "step": 12977 }, { - "epoch": 0.36827468785471057, + "epoch": 0.3677633256822239, "grad_norm": 0.0, - "learning_rate": 1.4575654413153553e-05, - "loss": 0.9384, + "learning_rate": 1.459012441915708e-05, + "loss": 0.8384, "step": 12978 }, { - "epoch": 0.36830306469920543, + "epoch": 0.36779166312448636, "grad_norm": 0.0, - "learning_rate": 1.4574837171716309e-05, - "loss": 0.9525, + "learning_rate": 1.4589309008457594e-05, + "loss": 0.8888, "step": 12979 }, { - "epoch": 0.36833144154370034, + "epoch": 0.36782000056674885, "grad_norm": 0.0, - "learning_rate": 1.4574019891635209e-05, - "loss": 0.9303, + "learning_rate": 1.4588493559100653e-05, + "loss": 1.0592, "step": 12980 }, { - "epoch": 0.36835981838819526, + "epoch": 0.3678483380090113, "grad_norm": 0.0, - "learning_rate": 1.4573202572917156e-05, - "loss": 0.9749, + "learning_rate": 1.458767807109313e-05, + "loss": 0.8224, "step": 12981 }, { - "epoch": 0.3683881952326901, + "epoch": 0.3678766754512738, "grad_norm": 0.0, - "learning_rate": 1.4572385215569055e-05, - "loss": 0.9491, + "learning_rate": 1.4586862544441891e-05, + "loss": 1.0131, "step": 12982 }, { - "epoch": 0.36841657207718503, + "epoch": 0.3679050128935362, "grad_norm": 0.0, - "learning_rate": 1.4571567819597812e-05, - "loss": 0.9173, + "learning_rate": 1.4586046979153805e-05, + "loss": 0.9545, "step": 12983 }, { - "epoch": 0.3684449489216799, + "epoch": 0.3679333503357987, "grad_norm": 0.0, - "learning_rate": 1.457075038501033e-05, - "loss": 0.8445, + "learning_rate": 1.458523137523574e-05, + "loss": 0.832, "step": 12984 }, { - "epoch": 0.3684733257661748, + "epoch": 0.36796168777806115, "grad_norm": 0.0, - "learning_rate": 1.4569932911813514e-05, - "loss": 0.9032, + "learning_rate": 1.4584415732694572e-05, + "loss": 0.9331, "step": 12985 }, { - "epoch": 0.36850170261066967, + "epoch": 0.3679900252203236, "grad_norm": 0.0, - "learning_rate": 1.4569115400014269e-05, - "loss": 1.0009, + "learning_rate": 1.4583600051537166e-05, + "loss": 1.0511, "step": 12986 }, { - "epoch": 0.3685300794551646, + "epoch": 0.3680183626625861, "grad_norm": 0.0, - "learning_rate": 1.4568297849619502e-05, - "loss": 0.8731, + "learning_rate": 1.4582784331770395e-05, + "loss": 0.9153, "step": 12987 }, { - "epoch": 0.3685584562996595, + "epoch": 0.3680467001048485, "grad_norm": 0.0, - "learning_rate": 1.4567480260636115e-05, - "loss": 0.9833, + "learning_rate": 1.4581968573401128e-05, + "loss": 0.8485, "step": 12988 }, { - "epoch": 0.36858683314415436, + "epoch": 0.368075037547111, "grad_norm": 0.0, - "learning_rate": 1.4566662633071019e-05, - "loss": 1.0111, + "learning_rate": 1.458115277643624e-05, + "loss": 0.7864, "step": 12989 }, { - "epoch": 0.3686152099886493, + "epoch": 0.36810337498937346, "grad_norm": 0.0, - "learning_rate": 1.4565844966931119e-05, - "loss": 0.8573, + "learning_rate": 1.4580336940882602e-05, + "loss": 0.9552, "step": 12990 }, { - "epoch": 0.36864358683314413, + "epoch": 0.3681317124316359, "grad_norm": 0.0, - "learning_rate": 1.4565027262223317e-05, - "loss": 0.9227, + "learning_rate": 1.4579521066747085e-05, + "loss": 1.0022, "step": 12991 }, { - "epoch": 0.36867196367763905, + "epoch": 0.3681600498738984, "grad_norm": 0.0, - "learning_rate": 1.4564209518954528e-05, - "loss": 0.8974, + "learning_rate": 1.457870515403656e-05, + "loss": 1.0155, "step": 12992 }, { - "epoch": 0.36870034052213396, + "epoch": 0.3681883873161608, "grad_norm": 0.0, - "learning_rate": 1.4563391737131656e-05, - "loss": 0.8892, + "learning_rate": 1.4577889202757902e-05, + "loss": 0.8207, "step": 12993 }, { - "epoch": 0.3687287173666288, + "epoch": 0.3682167247584233, "grad_norm": 0.0, - "learning_rate": 1.4562573916761609e-05, - "loss": 1.0071, + "learning_rate": 1.457707321291798e-05, + "loss": 0.9875, "step": 12994 }, { - "epoch": 0.36875709421112374, + "epoch": 0.36824506220068576, "grad_norm": 0.0, - "learning_rate": 1.4561756057851295e-05, - "loss": 0.9447, + "learning_rate": 1.4576257184523677e-05, + "loss": 0.8857, "step": 12995 }, { - "epoch": 0.3687854710556186, + "epoch": 0.36827339964294825, "grad_norm": 0.0, - "learning_rate": 1.4560938160407622e-05, - "loss": 0.985, + "learning_rate": 1.4575441117581856e-05, + "loss": 0.869, "step": 12996 }, { - "epoch": 0.3688138479001135, + "epoch": 0.3683017370852107, "grad_norm": 0.0, - "learning_rate": 1.45601202244375e-05, - "loss": 0.8234, + "learning_rate": 1.4574625012099394e-05, + "loss": 0.9129, "step": 12997 }, { - "epoch": 0.3688422247446084, + "epoch": 0.36833007452747313, "grad_norm": 0.0, - "learning_rate": 1.4559302249947834e-05, - "loss": 0.8974, + "learning_rate": 1.4573808868083172e-05, + "loss": 0.9303, "step": 12998 }, { - "epoch": 0.3688706015891033, + "epoch": 0.3683584119697356, "grad_norm": 0.0, - "learning_rate": 1.455848423694554e-05, - "loss": 1.0356, + "learning_rate": 1.4572992685540057e-05, + "loss": 0.8846, "step": 12999 }, { - "epoch": 0.3688989784335982, + "epoch": 0.36838674941199806, "grad_norm": 0.0, - "learning_rate": 1.4557666185437526e-05, - "loss": 0.9615, + "learning_rate": 1.4572176464476924e-05, + "loss": 1.016, "step": 13000 }, { - "epoch": 0.36892735527809306, + "epoch": 0.36841508685426055, "grad_norm": 0.0, - "learning_rate": 1.4556848095430698e-05, - "loss": 0.8345, + "learning_rate": 1.4571360204900653e-05, + "loss": 0.9986, "step": 13001 }, { - "epoch": 0.368955732122588, + "epoch": 0.368443424296523, "grad_norm": 0.0, - "learning_rate": 1.4556029966931972e-05, - "loss": 0.9146, + "learning_rate": 1.4570543906818118e-05, + "loss": 0.8977, "step": 13002 }, { - "epoch": 0.36898410896708284, + "epoch": 0.36847176173878543, "grad_norm": 0.0, - "learning_rate": 1.4555211799948255e-05, - "loss": 1.0316, + "learning_rate": 1.4569727570236195e-05, + "loss": 0.7823, "step": 13003 }, { - "epoch": 0.36901248581157775, + "epoch": 0.3685000991810479, "grad_norm": 0.0, - "learning_rate": 1.4554393594486457e-05, - "loss": 1.0104, + "learning_rate": 1.4568911195161758e-05, + "loss": 1.0849, "step": 13004 }, { - "epoch": 0.36904086265607267, + "epoch": 0.36852843662331036, "grad_norm": 0.0, - "learning_rate": 1.4553575350553495e-05, - "loss": 0.7872, + "learning_rate": 1.4568094781601687e-05, + "loss": 0.8989, "step": 13005 }, { - "epoch": 0.3690692395005675, + "epoch": 0.36855677406557286, "grad_norm": 0.0, - "learning_rate": 1.4552757068156275e-05, - "loss": 0.9327, + "learning_rate": 1.4567278329562856e-05, + "loss": 0.9886, "step": 13006 }, { - "epoch": 0.36909761634506244, + "epoch": 0.3685851115078353, "grad_norm": 0.0, - "learning_rate": 1.4551938747301712e-05, - "loss": 0.9256, + "learning_rate": 1.4566461839052144e-05, + "loss": 0.9249, "step": 13007 }, { - "epoch": 0.3691259931895573, + "epoch": 0.3686134489500978, "grad_norm": 0.0, - "learning_rate": 1.4551120387996718e-05, - "loss": 1.0081, + "learning_rate": 1.4565645310076429e-05, + "loss": 1.0153, "step": 13008 }, { - "epoch": 0.3691543700340522, + "epoch": 0.3686417863923602, "grad_norm": 0.0, - "learning_rate": 1.4550301990248205e-05, - "loss": 0.9814, + "learning_rate": 1.4564828742642586e-05, + "loss": 1.0451, "step": 13009 }, { - "epoch": 0.36918274687854713, + "epoch": 0.36867012383462267, "grad_norm": 0.0, - "learning_rate": 1.454948355406309e-05, - "loss": 1.0334, + "learning_rate": 1.4564012136757497e-05, + "loss": 0.9922, "step": 13010 }, { - "epoch": 0.369211123723042, + "epoch": 0.36869846127688516, "grad_norm": 0.0, - "learning_rate": 1.4548665079448276e-05, - "loss": 1.0637, + "learning_rate": 1.456319549242804e-05, + "loss": 1.0402, "step": 13011 }, { - "epoch": 0.3692395005675369, + "epoch": 0.3687267987191476, "grad_norm": 0.0, - "learning_rate": 1.454784656641069e-05, - "loss": 0.8897, + "learning_rate": 1.456237880966109e-05, + "loss": 0.88, "step": 13012 }, { - "epoch": 0.36926787741203176, + "epoch": 0.3687551361614101, "grad_norm": 0.0, - "learning_rate": 1.4547028014957238e-05, - "loss": 0.9597, + "learning_rate": 1.456156208846353e-05, + "loss": 1.0469, "step": 13013 }, { - "epoch": 0.3692962542565267, + "epoch": 0.36878347360367253, "grad_norm": 0.0, - "learning_rate": 1.4546209425094832e-05, - "loss": 1.0327, + "learning_rate": 1.4560745328842238e-05, + "loss": 0.9627, "step": 13014 }, { - "epoch": 0.36932463110102154, + "epoch": 0.36881181104593497, "grad_norm": 0.0, - "learning_rate": 1.4545390796830394e-05, - "loss": 0.8462, + "learning_rate": 1.4559928530804097e-05, + "loss": 0.9927, "step": 13015 }, { - "epoch": 0.36935300794551645, + "epoch": 0.36884014848819746, "grad_norm": 0.0, - "learning_rate": 1.4544572130170837e-05, - "loss": 1.0142, + "learning_rate": 1.4559111694355985e-05, + "loss": 0.9308, "step": 13016 }, { - "epoch": 0.36938138479001137, + "epoch": 0.3688684859304599, "grad_norm": 0.0, - "learning_rate": 1.4543753425123073e-05, - "loss": 0.8584, + "learning_rate": 1.4558294819504779e-05, + "loss": 0.8397, "step": 13017 }, { - "epoch": 0.36940976163450623, + "epoch": 0.3688968233727224, "grad_norm": 0.0, - "learning_rate": 1.4542934681694019e-05, - "loss": 0.8669, + "learning_rate": 1.4557477906257365e-05, + "loss": 0.9495, "step": 13018 }, { - "epoch": 0.36943813847900114, + "epoch": 0.36892516081498483, "grad_norm": 0.0, - "learning_rate": 1.4542115899890594e-05, - "loss": 0.8874, + "learning_rate": 1.4556660954620622e-05, + "loss": 0.8902, "step": 13019 }, { - "epoch": 0.369466515323496, + "epoch": 0.3689534982572473, "grad_norm": 0.0, - "learning_rate": 1.454129707971971e-05, - "loss": 0.8217, + "learning_rate": 1.455584396460143e-05, + "loss": 0.9849, "step": 13020 }, { - "epoch": 0.3694948921679909, + "epoch": 0.36898183569950976, "grad_norm": 0.0, - "learning_rate": 1.4540478221188284e-05, - "loss": 1.0011, + "learning_rate": 1.4555026936206675e-05, + "loss": 0.9169, "step": 13021 }, { - "epoch": 0.36952326901248583, + "epoch": 0.3690101731417722, "grad_norm": 0.0, - "learning_rate": 1.4539659324303235e-05, - "loss": 0.828, + "learning_rate": 1.4554209869443235e-05, + "loss": 0.9243, "step": 13022 }, { - "epoch": 0.3695516458569807, + "epoch": 0.3690385105840347, "grad_norm": 0.0, - "learning_rate": 1.4538840389071482e-05, - "loss": 0.9319, + "learning_rate": 1.4553392764317998e-05, + "loss": 0.9177, "step": 13023 }, { - "epoch": 0.3695800227014756, + "epoch": 0.36906684802629713, "grad_norm": 0.0, - "learning_rate": 1.4538021415499938e-05, - "loss": 0.9898, + "learning_rate": 1.4552575620837839e-05, + "loss": 0.8655, "step": 13024 }, { - "epoch": 0.36960839954597047, + "epoch": 0.36909518546855963, "grad_norm": 0.0, - "learning_rate": 1.4537202403595523e-05, - "loss": 0.9712, + "learning_rate": 1.4551758439009647e-05, + "loss": 0.9597, "step": 13025 }, { - "epoch": 0.3696367763904654, + "epoch": 0.36912352291082207, "grad_norm": 0.0, - "learning_rate": 1.4536383353365158e-05, - "loss": 0.8665, + "learning_rate": 1.45509412188403e-05, + "loss": 0.8143, "step": 13026 }, { - "epoch": 0.3696651532349603, + "epoch": 0.3691518603530845, "grad_norm": 0.0, - "learning_rate": 1.4535564264815757e-05, - "loss": 0.9919, + "learning_rate": 1.4550123960336687e-05, + "loss": 0.874, "step": 13027 }, { - "epoch": 0.36969353007945516, + "epoch": 0.369180197795347, "grad_norm": 0.0, - "learning_rate": 1.453474513795424e-05, - "loss": 1.0788, + "learning_rate": 1.4549306663505691e-05, + "loss": 0.9329, "step": 13028 }, { - "epoch": 0.36972190692395007, + "epoch": 0.36920853523760944, "grad_norm": 0.0, - "learning_rate": 1.4533925972787526e-05, - "loss": 0.923, + "learning_rate": 1.4548489328354197e-05, + "loss": 0.7546, "step": 13029 }, { - "epoch": 0.36975028376844493, + "epoch": 0.36923687267987193, "grad_norm": 0.0, - "learning_rate": 1.4533106769322538e-05, - "loss": 0.8911, + "learning_rate": 1.4547671954889085e-05, + "loss": 0.8831, "step": 13030 }, { - "epoch": 0.36977866061293985, + "epoch": 0.36926521012213437, "grad_norm": 0.0, - "learning_rate": 1.4532287527566193e-05, - "loss": 0.9019, + "learning_rate": 1.4546854543117243e-05, + "loss": 0.952, "step": 13031 }, { - "epoch": 0.3698070374574347, + "epoch": 0.36929354756439686, "grad_norm": 0.0, - "learning_rate": 1.4531468247525411e-05, - "loss": 0.9722, + "learning_rate": 1.4546037093045562e-05, + "loss": 0.9711, "step": 13032 }, { - "epoch": 0.3698354143019296, + "epoch": 0.3693218850066593, "grad_norm": 0.0, - "learning_rate": 1.4530648929207114e-05, - "loss": 0.8823, + "learning_rate": 1.4545219604680918e-05, + "loss": 0.8698, "step": 13033 }, { - "epoch": 0.36986379114642454, + "epoch": 0.36935022244892174, "grad_norm": 0.0, - "learning_rate": 1.4529829572618222e-05, - "loss": 0.9095, + "learning_rate": 1.4544402078030203e-05, + "loss": 0.8672, "step": 13034 }, { - "epoch": 0.3698921679909194, + "epoch": 0.36937855989118423, "grad_norm": 0.0, - "learning_rate": 1.4529010177765654e-05, - "loss": 0.9082, + "learning_rate": 1.45435845131003e-05, + "loss": 0.8348, "step": 13035 }, { - "epoch": 0.3699205448354143, + "epoch": 0.36940689733344667, "grad_norm": 0.0, - "learning_rate": 1.4528190744656334e-05, - "loss": 0.9335, + "learning_rate": 1.45427669098981e-05, + "loss": 0.818, "step": 13036 }, { - "epoch": 0.36994892167990917, + "epoch": 0.36943523477570916, "grad_norm": 0.0, - "learning_rate": 1.4527371273297184e-05, - "loss": 0.9746, + "learning_rate": 1.4541949268430487e-05, + "loss": 0.8634, "step": 13037 }, { - "epoch": 0.3699772985244041, + "epoch": 0.3694635722179716, "grad_norm": 0.0, - "learning_rate": 1.4526551763695124e-05, - "loss": 1.0002, + "learning_rate": 1.4541131588704346e-05, + "loss": 0.9083, "step": 13038 }, { - "epoch": 0.370005675368899, + "epoch": 0.36949190966023404, "grad_norm": 0.0, - "learning_rate": 1.4525732215857078e-05, - "loss": 0.862, + "learning_rate": 1.454031387072657e-05, + "loss": 1.0073, "step": 13039 }, { - "epoch": 0.37003405221339386, + "epoch": 0.36952024710249654, "grad_norm": 0.0, - "learning_rate": 1.452491262978997e-05, - "loss": 1.0093, + "learning_rate": 1.4539496114504044e-05, + "loss": 1.1306, "step": 13040 }, { - "epoch": 0.3700624290578888, + "epoch": 0.369548584544759, "grad_norm": 0.0, - "learning_rate": 1.4524093005500721e-05, - "loss": 0.9817, + "learning_rate": 1.4538678320043656e-05, + "loss": 0.8898, "step": 13041 }, { - "epoch": 0.37009080590238363, + "epoch": 0.36957692198702147, "grad_norm": 0.0, - "learning_rate": 1.4523273342996254e-05, - "loss": 0.9019, + "learning_rate": 1.4537860487352293e-05, + "loss": 0.922, "step": 13042 }, { - "epoch": 0.37011918274687855, + "epoch": 0.3696052594292839, "grad_norm": 0.0, - "learning_rate": 1.4522453642283496e-05, - "loss": 0.9972, + "learning_rate": 1.453704261643685e-05, + "loss": 0.9312, "step": 13043 }, { - "epoch": 0.37014755959137347, + "epoch": 0.3696335968715464, "grad_norm": 0.0, - "learning_rate": 1.4521633903369365e-05, - "loss": 1.046, + "learning_rate": 1.4536224707304209e-05, + "loss": 0.9658, "step": 13044 }, { - "epoch": 0.3701759364358683, + "epoch": 0.36966193431380884, "grad_norm": 0.0, - "learning_rate": 1.4520814126260791e-05, - "loss": 0.9724, + "learning_rate": 1.4535406759961267e-05, + "loss": 0.8401, "step": 13045 }, { - "epoch": 0.37020431328036324, + "epoch": 0.3696902717560713, "grad_norm": 0.0, - "learning_rate": 1.4519994310964697e-05, - "loss": 0.9466, + "learning_rate": 1.4534588774414905e-05, + "loss": 0.9486, "step": 13046 }, { - "epoch": 0.3702326901248581, + "epoch": 0.36971860919833377, "grad_norm": 0.0, - "learning_rate": 1.451917445748801e-05, - "loss": 1.0786, + "learning_rate": 1.4533770750672019e-05, + "loss": 0.8763, "step": 13047 }, { - "epoch": 0.370261066969353, + "epoch": 0.3697469466405962, "grad_norm": 0.0, - "learning_rate": 1.4518354565837652e-05, - "loss": 0.903, + "learning_rate": 1.45329526887395e-05, + "loss": 0.9417, "step": 13048 }, { - "epoch": 0.3702894438138479, + "epoch": 0.3697752840828587, "grad_norm": 0.0, - "learning_rate": 1.451753463602055e-05, - "loss": 1.0561, + "learning_rate": 1.4532134588624236e-05, + "loss": 0.9292, "step": 13049 }, { - "epoch": 0.3703178206583428, + "epoch": 0.36980362152512114, "grad_norm": 0.0, - "learning_rate": 1.451671466804363e-05, - "loss": 0.9779, + "learning_rate": 1.4531316450333121e-05, + "loss": 0.9294, "step": 13050 }, { - "epoch": 0.3703461975028377, + "epoch": 0.3698319589673836, "grad_norm": 0.0, - "learning_rate": 1.4515894661913817e-05, - "loss": 0.9562, + "learning_rate": 1.4530498273873042e-05, + "loss": 0.9673, "step": 13051 }, { - "epoch": 0.37037457434733256, + "epoch": 0.36986029640964607, "grad_norm": 0.0, - "learning_rate": 1.4515074617638036e-05, - "loss": 0.9142, + "learning_rate": 1.4529680059250894e-05, + "loss": 0.7966, "step": 13052 }, { - "epoch": 0.3704029511918275, + "epoch": 0.3698886338519085, "grad_norm": 0.0, - "learning_rate": 1.451425453522322e-05, - "loss": 0.8959, + "learning_rate": 1.4528861806473572e-05, + "loss": 0.9898, "step": 13053 }, { - "epoch": 0.37043132803632234, + "epoch": 0.369916971294171, "grad_norm": 0.0, - "learning_rate": 1.4513434414676293e-05, - "loss": 0.8534, + "learning_rate": 1.4528043515547965e-05, + "loss": 1.0658, "step": 13054 }, { - "epoch": 0.37045970488081725, + "epoch": 0.36994530873643344, "grad_norm": 0.0, - "learning_rate": 1.4512614256004182e-05, - "loss": 0.9822, + "learning_rate": 1.4527225186480962e-05, + "loss": 1.0104, "step": 13055 }, { - "epoch": 0.37048808172531217, + "epoch": 0.36997364617869594, "grad_norm": 0.0, - "learning_rate": 1.4511794059213818e-05, - "loss": 0.8193, + "learning_rate": 1.4526406819279464e-05, + "loss": 0.972, "step": 13056 }, { - "epoch": 0.37051645856980703, + "epoch": 0.3700019836209584, "grad_norm": 0.0, - "learning_rate": 1.4510973824312125e-05, - "loss": 0.9896, + "learning_rate": 1.4525588413950357e-05, + "loss": 0.9786, "step": 13057 }, { - "epoch": 0.37054483541430194, + "epoch": 0.3700303210632208, "grad_norm": 0.0, - "learning_rate": 1.4510153551306032e-05, - "loss": 0.8315, + "learning_rate": 1.4524769970500541e-05, + "loss": 0.8906, "step": 13058 }, { - "epoch": 0.3705732122587968, + "epoch": 0.3700586585054833, "grad_norm": 0.0, - "learning_rate": 1.450933324020247e-05, - "loss": 0.826, + "learning_rate": 1.4523951488936905e-05, + "loss": 0.9127, "step": 13059 }, { - "epoch": 0.3706015891032917, + "epoch": 0.37008699594774574, "grad_norm": 0.0, - "learning_rate": 1.4508512891008368e-05, - "loss": 0.907, + "learning_rate": 1.452313296926635e-05, + "loss": 1.0116, "step": 13060 }, { - "epoch": 0.37062996594778663, + "epoch": 0.37011533339000824, "grad_norm": 0.0, - "learning_rate": 1.4507692503730654e-05, - "loss": 0.9649, + "learning_rate": 1.4522314411495763e-05, + "loss": 0.8867, "step": 13061 }, { - "epoch": 0.3706583427922815, + "epoch": 0.3701436708322707, "grad_norm": 0.0, - "learning_rate": 1.4506872078376257e-05, - "loss": 0.9517, + "learning_rate": 1.4521495815632043e-05, + "loss": 0.9961, "step": 13062 }, { - "epoch": 0.3706867196367764, + "epoch": 0.3701720082745331, "grad_norm": 0.0, - "learning_rate": 1.4506051614952114e-05, - "loss": 1.0138, + "learning_rate": 1.4520677181682084e-05, + "loss": 0.872, "step": 13063 }, { - "epoch": 0.37071509648127127, + "epoch": 0.3702003457167956, "grad_norm": 0.0, - "learning_rate": 1.4505231113465148e-05, - "loss": 0.883, + "learning_rate": 1.4519858509652785e-05, + "loss": 0.9319, "step": 13064 }, { - "epoch": 0.3707434733257662, + "epoch": 0.37022868315905805, "grad_norm": 0.0, - "learning_rate": 1.4504410573922292e-05, - "loss": 0.966, + "learning_rate": 1.4519039799551036e-05, + "loss": 0.9579, "step": 13065 }, { - "epoch": 0.37077185017026104, + "epoch": 0.37025702060132054, "grad_norm": 0.0, - "learning_rate": 1.4503589996330478e-05, - "loss": 1.0065, + "learning_rate": 1.4518221051383738e-05, + "loss": 0.9434, "step": 13066 }, { - "epoch": 0.37080022701475596, + "epoch": 0.370285358043583, "grad_norm": 0.0, - "learning_rate": 1.4502769380696635e-05, - "loss": 1.0011, + "learning_rate": 1.4517402265157785e-05, + "loss": 1.0053, "step": 13067 }, { - "epoch": 0.37082860385925087, + "epoch": 0.3703136954858455, "grad_norm": 0.0, - "learning_rate": 1.45019487270277e-05, - "loss": 0.9388, + "learning_rate": 1.451658344088008e-05, + "loss": 0.8735, "step": 13068 }, { - "epoch": 0.37085698070374573, + "epoch": 0.3703420329281079, "grad_norm": 0.0, - "learning_rate": 1.4501128035330595e-05, - "loss": 1.0318, + "learning_rate": 1.4515764578557512e-05, + "loss": 1.0153, "step": 13069 }, { - "epoch": 0.37088535754824065, + "epoch": 0.37037037037037035, "grad_norm": 0.0, - "learning_rate": 1.4500307305612267e-05, - "loss": 1.0507, + "learning_rate": 1.4514945678196984e-05, + "loss": 0.991, "step": 13070 }, { - "epoch": 0.3709137343927355, + "epoch": 0.37039870781263284, "grad_norm": 0.0, - "learning_rate": 1.4499486537879639e-05, - "loss": 0.9053, + "learning_rate": 1.4514126739805388e-05, + "loss": 0.8524, "step": 13071 }, { - "epoch": 0.3709421112372304, + "epoch": 0.3704270452548953, "grad_norm": 0.0, - "learning_rate": 1.4498665732139643e-05, - "loss": 0.8468, + "learning_rate": 1.4513307763389626e-05, + "loss": 0.9647, "step": 13072 }, { - "epoch": 0.37097048808172534, + "epoch": 0.3704553826971578, "grad_norm": 0.0, - "learning_rate": 1.4497844888399216e-05, - "loss": 1.0218, + "learning_rate": 1.45124887489566e-05, + "loss": 1.0906, "step": 13073 }, { - "epoch": 0.3709988649262202, + "epoch": 0.3704837201394202, "grad_norm": 0.0, - "learning_rate": 1.4497024006665295e-05, - "loss": 0.981, + "learning_rate": 1.4511669696513206e-05, + "loss": 0.9131, "step": 13074 }, { - "epoch": 0.3710272417707151, + "epoch": 0.37051205758168265, "grad_norm": 0.0, - "learning_rate": 1.4496203086944805e-05, - "loss": 0.9108, + "learning_rate": 1.4510850606066343e-05, + "loss": 0.874, "step": 13075 }, { - "epoch": 0.37105561861520997, + "epoch": 0.37054039502394515, "grad_norm": 0.0, - "learning_rate": 1.4495382129244686e-05, - "loss": 0.9088, + "learning_rate": 1.4510031477622905e-05, + "loss": 1.0188, "step": 13076 }, { - "epoch": 0.3710839954597049, + "epoch": 0.3705687324662076, "grad_norm": 0.0, - "learning_rate": 1.4494561133571873e-05, - "loss": 1.0029, + "learning_rate": 1.4509212311189803e-05, + "loss": 0.9975, "step": 13077 }, { - "epoch": 0.3711123723041998, + "epoch": 0.3705970699084701, "grad_norm": 0.0, - "learning_rate": 1.4493740099933302e-05, - "loss": 0.9778, + "learning_rate": 1.4508393106773928e-05, + "loss": 0.9459, "step": 13078 }, { - "epoch": 0.37114074914869466, + "epoch": 0.3706254073507325, "grad_norm": 0.0, - "learning_rate": 1.4492919028335905e-05, - "loss": 0.9711, + "learning_rate": 1.4507573864382187e-05, + "loss": 0.928, "step": 13079 }, { - "epoch": 0.3711691259931896, + "epoch": 0.370653744792995, "grad_norm": 0.0, - "learning_rate": 1.4492097918786621e-05, - "loss": 0.9512, + "learning_rate": 1.4506754584021474e-05, + "loss": 0.9155, "step": 13080 }, { - "epoch": 0.37119750283768443, + "epoch": 0.37068208223525745, "grad_norm": 0.0, - "learning_rate": 1.4491276771292382e-05, - "loss": 0.9422, + "learning_rate": 1.4505935265698694e-05, + "loss": 0.9577, "step": 13081 }, { - "epoch": 0.37122587968217935, + "epoch": 0.3707104196775199, "grad_norm": 0.0, - "learning_rate": 1.4490455585860124e-05, - "loss": 1.0333, + "learning_rate": 1.4505115909420751e-05, + "loss": 0.9417, "step": 13082 }, { - "epoch": 0.3712542565266742, + "epoch": 0.3707387571197824, "grad_norm": 0.0, - "learning_rate": 1.4489634362496787e-05, - "loss": 0.8991, + "learning_rate": 1.4504296515194543e-05, + "loss": 0.7831, "step": 13083 }, { - "epoch": 0.3712826333711691, + "epoch": 0.3707670945620448, "grad_norm": 0.0, - "learning_rate": 1.4488813101209309e-05, - "loss": 0.9798, + "learning_rate": 1.4503477083026969e-05, + "loss": 0.8758, "step": 13084 }, { - "epoch": 0.37131101021566404, + "epoch": 0.3707954320043073, "grad_norm": 0.0, - "learning_rate": 1.4487991802004625e-05, - "loss": 0.8631, + "learning_rate": 1.4502657612924938e-05, + "loss": 1.0051, "step": 13085 }, { - "epoch": 0.3713393870601589, + "epoch": 0.37082376944656975, "grad_norm": 0.0, - "learning_rate": 1.4487170464889668e-05, - "loss": 0.9131, + "learning_rate": 1.450183810489535e-05, + "loss": 0.9397, "step": 13086 }, { - "epoch": 0.3713677639046538, + "epoch": 0.3708521068888322, "grad_norm": 0.0, - "learning_rate": 1.4486349089871386e-05, - "loss": 0.8568, + "learning_rate": 1.4501018558945109e-05, + "loss": 1.0061, "step": 13087 }, { - "epoch": 0.3713961407491487, + "epoch": 0.3708804443310947, "grad_norm": 0.0, - "learning_rate": 1.448552767695671e-05, - "loss": 0.9304, + "learning_rate": 1.4500198975081112e-05, + "loss": 1.0419, "step": 13088 }, { - "epoch": 0.3714245175936436, + "epoch": 0.3709087817733571, "grad_norm": 0.0, - "learning_rate": 1.4484706226152576e-05, - "loss": 0.9089, + "learning_rate": 1.4499379353310275e-05, + "loss": 1.0094, "step": 13089 }, { - "epoch": 0.3714528944381385, + "epoch": 0.3709371192156196, "grad_norm": 0.0, - "learning_rate": 1.448388473746593e-05, - "loss": 0.9667, + "learning_rate": 1.4498559693639492e-05, + "loss": 0.8175, "step": 13090 }, { - "epoch": 0.37148127128263336, + "epoch": 0.37096545665788205, "grad_norm": 0.0, - "learning_rate": 1.448306321090371e-05, - "loss": 0.9475, + "learning_rate": 1.4497739996075669e-05, + "loss": 1.0156, "step": 13091 }, { - "epoch": 0.3715096481271283, + "epoch": 0.37099379410014455, "grad_norm": 0.0, - "learning_rate": 1.448224164647285e-05, - "loss": 0.9407, + "learning_rate": 1.449692026062571e-05, + "loss": 1.0051, "step": 13092 }, { - "epoch": 0.37153802497162314, + "epoch": 0.371022131542407, "grad_norm": 0.0, - "learning_rate": 1.4481420044180295e-05, - "loss": 0.8365, + "learning_rate": 1.4496100487296527e-05, + "loss": 0.9149, "step": 13093 }, { - "epoch": 0.37156640181611805, + "epoch": 0.3710504689846694, "grad_norm": 0.0, - "learning_rate": 1.4480598404032984e-05, - "loss": 0.9959, + "learning_rate": 1.4495280676095016e-05, + "loss": 0.9289, "step": 13094 }, { - "epoch": 0.3715947786606129, + "epoch": 0.3710788064269319, "grad_norm": 0.0, - "learning_rate": 1.4479776726037858e-05, - "loss": 0.9772, + "learning_rate": 1.449446082702809e-05, + "loss": 0.9275, "step": 13095 }, { - "epoch": 0.3716231555051078, + "epoch": 0.37110714386919436, "grad_norm": 0.0, - "learning_rate": 1.4478955010201855e-05, - "loss": 1.0157, + "learning_rate": 1.449364094010265e-05, + "loss": 0.8555, "step": 13096 }, { - "epoch": 0.37165153234960274, + "epoch": 0.37113548131145685, "grad_norm": 0.0, - "learning_rate": 1.4478133256531917e-05, - "loss": 0.8354, + "learning_rate": 1.4492821015325603e-05, + "loss": 0.9442, "step": 13097 }, { - "epoch": 0.3716799091940976, + "epoch": 0.3711638187537193, "grad_norm": 0.0, - "learning_rate": 1.4477311465034984e-05, - "loss": 0.9895, + "learning_rate": 1.4492001052703854e-05, + "loss": 0.7583, "step": 13098 }, { - "epoch": 0.3717082860385925, + "epoch": 0.3711921561959817, "grad_norm": 0.0, - "learning_rate": 1.4476489635718001e-05, - "loss": 0.8467, + "learning_rate": 1.4491181052244317e-05, + "loss": 1.0307, "step": 13099 }, { - "epoch": 0.3717366628830874, + "epoch": 0.3712204936382442, "grad_norm": 0.0, - "learning_rate": 1.447566776858791e-05, - "loss": 0.9958, + "learning_rate": 1.4490361013953891e-05, + "loss": 0.9128, "step": 13100 }, { - "epoch": 0.3717650397275823, + "epoch": 0.37124883108050666, "grad_norm": 0.0, - "learning_rate": 1.4474845863651655e-05, - "loss": 1.0245, + "learning_rate": 1.4489540937839486e-05, + "loss": 0.8923, "step": 13101 }, { - "epoch": 0.3717934165720772, + "epoch": 0.37127716852276915, "grad_norm": 0.0, - "learning_rate": 1.4474023920916174e-05, - "loss": 0.9484, + "learning_rate": 1.4488720823908013e-05, + "loss": 0.9023, "step": 13102 }, { - "epoch": 0.37182179341657207, + "epoch": 0.3713055059650316, "grad_norm": 0.0, - "learning_rate": 1.447320194038841e-05, - "loss": 0.8465, + "learning_rate": 1.4487900672166377e-05, + "loss": 0.8781, "step": 13103 }, { - "epoch": 0.371850170261067, + "epoch": 0.3713338434072941, "grad_norm": 0.0, - "learning_rate": 1.4472379922075308e-05, - "loss": 1.0231, + "learning_rate": 1.4487080482621485e-05, + "loss": 0.8537, "step": 13104 }, { - "epoch": 0.37187854710556184, + "epoch": 0.3713621808495565, "grad_norm": 0.0, - "learning_rate": 1.4471557865983816e-05, - "loss": 1.0349, + "learning_rate": 1.448626025528025e-05, + "loss": 0.9259, "step": 13105 }, { - "epoch": 0.37190692395005676, + "epoch": 0.37139051829181896, "grad_norm": 0.0, - "learning_rate": 1.4470735772120868e-05, - "loss": 0.967, + "learning_rate": 1.4485439990149579e-05, + "loss": 0.8924, "step": 13106 }, { - "epoch": 0.37193530079455167, + "epoch": 0.37141885573408145, "grad_norm": 0.0, - "learning_rate": 1.4469913640493417e-05, - "loss": 0.8462, + "learning_rate": 1.448461968723638e-05, + "loss": 0.9253, "step": 13107 }, { - "epoch": 0.37196367763904653, + "epoch": 0.3714471931763439, "grad_norm": 0.0, - "learning_rate": 1.4469091471108403e-05, - "loss": 0.9681, + "learning_rate": 1.4483799346547566e-05, + "loss": 1.0181, "step": 13108 }, { - "epoch": 0.37199205448354145, + "epoch": 0.3714755306186064, "grad_norm": 0.0, - "learning_rate": 1.4468269263972773e-05, - "loss": 0.9159, + "learning_rate": 1.4482978968090044e-05, + "loss": 1.0367, "step": 13109 }, { - "epoch": 0.3720204313280363, + "epoch": 0.3715038680608688, "grad_norm": 0.0, - "learning_rate": 1.4467447019093473e-05, - "loss": 0.9747, + "learning_rate": 1.4482158551870727e-05, + "loss": 0.9053, "step": 13110 }, { - "epoch": 0.3720488081725312, + "epoch": 0.37153220550313126, "grad_norm": 0.0, - "learning_rate": 1.4466624736477447e-05, - "loss": 1.0234, + "learning_rate": 1.448133809789652e-05, + "loss": 0.9858, "step": 13111 }, { - "epoch": 0.3720771850170261, + "epoch": 0.37156054294539376, "grad_norm": 0.0, - "learning_rate": 1.446580241613164e-05, - "loss": 0.8799, + "learning_rate": 1.4480517606174342e-05, + "loss": 1.0265, "step": 13112 }, { - "epoch": 0.372105561861521, + "epoch": 0.3715888803876562, "grad_norm": 0.0, - "learning_rate": 1.4464980058062999e-05, - "loss": 1.0431, + "learning_rate": 1.4479697076711098e-05, + "loss": 0.9381, "step": 13113 }, { - "epoch": 0.3721339387060159, + "epoch": 0.3716172178299187, "grad_norm": 0.0, - "learning_rate": 1.446415766227847e-05, - "loss": 0.962, + "learning_rate": 1.4478876509513703e-05, + "loss": 0.9594, "step": 13114 }, { - "epoch": 0.37216231555051077, + "epoch": 0.3716455552721811, "grad_norm": 0.0, - "learning_rate": 1.4463335228785005e-05, - "loss": 0.8046, + "learning_rate": 1.447805590458907e-05, + "loss": 0.9227, "step": 13115 }, { - "epoch": 0.3721906923950057, + "epoch": 0.3716738927144436, "grad_norm": 0.0, - "learning_rate": 1.4462512757589541e-05, - "loss": 0.9419, + "learning_rate": 1.4477235261944107e-05, + "loss": 0.9816, "step": 13116 }, { - "epoch": 0.37221906923950054, + "epoch": 0.37170223015670606, "grad_norm": 0.0, - "learning_rate": 1.4461690248699034e-05, - "loss": 0.8796, + "learning_rate": 1.447641458158573e-05, + "loss": 0.951, "step": 13117 }, { - "epoch": 0.37224744608399546, + "epoch": 0.3717305675989685, "grad_norm": 0.0, - "learning_rate": 1.4460867702120432e-05, - "loss": 0.8823, + "learning_rate": 1.4475593863520847e-05, + "loss": 1.0381, "step": 13118 }, { - "epoch": 0.3722758229284904, + "epoch": 0.371758905041231, "grad_norm": 0.0, - "learning_rate": 1.4460045117860674e-05, - "loss": 0.8778, + "learning_rate": 1.4474773107756379e-05, + "loss": 0.9987, "step": 13119 }, { - "epoch": 0.37230419977298523, + "epoch": 0.37178724248349343, "grad_norm": 0.0, - "learning_rate": 1.4459222495926719e-05, - "loss": 0.8175, + "learning_rate": 1.4473952314299235e-05, + "loss": 0.8998, "step": 13120 }, { - "epoch": 0.37233257661748015, + "epoch": 0.3718155799257559, "grad_norm": 0.0, - "learning_rate": 1.4458399836325508e-05, - "loss": 0.9158, + "learning_rate": 1.4473131483156326e-05, + "loss": 0.8499, "step": 13121 }, { - "epoch": 0.372360953461975, + "epoch": 0.37184391736801836, "grad_norm": 0.0, - "learning_rate": 1.4457577139063995e-05, - "loss": 0.9, + "learning_rate": 1.4472310614334575e-05, + "loss": 0.9136, "step": 13122 }, { - "epoch": 0.3723893303064699, + "epoch": 0.3718722548102808, "grad_norm": 0.0, - "learning_rate": 1.4456754404149124e-05, - "loss": 0.8988, + "learning_rate": 1.4471489707840887e-05, + "loss": 0.8828, "step": 13123 }, { - "epoch": 0.37241770715096484, + "epoch": 0.3719005922525433, "grad_norm": 0.0, - "learning_rate": 1.4455931631587854e-05, - "loss": 0.9172, + "learning_rate": 1.447066876368218e-05, + "loss": 0.9322, "step": 13124 }, { - "epoch": 0.3724460839954597, + "epoch": 0.37192892969480573, "grad_norm": 0.0, - "learning_rate": 1.4455108821387124e-05, - "loss": 0.8142, + "learning_rate": 1.4469847781865372e-05, + "loss": 0.9694, "step": 13125 }, { - "epoch": 0.3724744608399546, + "epoch": 0.3719572671370682, "grad_norm": 0.0, - "learning_rate": 1.4454285973553891e-05, - "loss": 0.9551, + "learning_rate": 1.4469026762397376e-05, + "loss": 0.8544, "step": 13126 }, { - "epoch": 0.3725028376844495, + "epoch": 0.37198560457933066, "grad_norm": 0.0, - "learning_rate": 1.4453463088095108e-05, - "loss": 0.9703, + "learning_rate": 1.4468205705285108e-05, + "loss": 0.9126, "step": 13127 }, { - "epoch": 0.3725312145289444, + "epoch": 0.37201394202159316, "grad_norm": 0.0, - "learning_rate": 1.445264016501772e-05, - "loss": 0.8816, + "learning_rate": 1.4467384610535486e-05, + "loss": 0.9478, "step": 13128 }, { - "epoch": 0.37255959137343925, + "epoch": 0.3720422794638556, "grad_norm": 0.0, - "learning_rate": 1.4451817204328678e-05, - "loss": 0.8533, + "learning_rate": 1.4466563478155422e-05, + "loss": 0.9424, "step": 13129 }, { - "epoch": 0.37258796821793416, + "epoch": 0.37207061690611803, "grad_norm": 0.0, - "learning_rate": 1.4450994206034936e-05, - "loss": 0.9131, + "learning_rate": 1.4465742308151836e-05, + "loss": 1.0516, "step": 13130 }, { - "epoch": 0.3726163450624291, + "epoch": 0.37209895434838053, "grad_norm": 0.0, - "learning_rate": 1.4450171170143447e-05, - "loss": 1.0147, + "learning_rate": 1.4464921100531642e-05, + "loss": 0.9096, "step": 13131 }, { - "epoch": 0.37264472190692394, + "epoch": 0.37212729179064297, "grad_norm": 0.0, - "learning_rate": 1.4449348096661164e-05, - "loss": 0.9507, + "learning_rate": 1.4464099855301762e-05, + "loss": 0.9891, "step": 13132 }, { - "epoch": 0.37267309875141885, + "epoch": 0.37215562923290546, "grad_norm": 0.0, - "learning_rate": 1.4448524985595033e-05, - "loss": 0.8923, + "learning_rate": 1.446327857246911e-05, + "loss": 0.996, "step": 13133 }, { - "epoch": 0.3727014755959137, + "epoch": 0.3721839666751679, "grad_norm": 0.0, - "learning_rate": 1.4447701836952017e-05, - "loss": 0.997, + "learning_rate": 1.4462457252040606e-05, + "loss": 0.9367, "step": 13134 }, { - "epoch": 0.3727298524404086, + "epoch": 0.37221230411743034, "grad_norm": 0.0, - "learning_rate": 1.444687865073906e-05, - "loss": 0.8665, + "learning_rate": 1.4461635894023167e-05, + "loss": 0.9619, "step": 13135 }, { - "epoch": 0.37275822928490354, + "epoch": 0.37224064155969283, "grad_norm": 0.0, - "learning_rate": 1.4446055426963121e-05, - "loss": 0.9351, + "learning_rate": 1.4460814498423714e-05, + "loss": 0.9749, "step": 13136 }, { - "epoch": 0.3727866061293984, + "epoch": 0.37226897900195527, "grad_norm": 0.0, - "learning_rate": 1.444523216563115e-05, - "loss": 0.8895, + "learning_rate": 1.445999306524916e-05, + "loss": 0.7945, "step": 13137 }, { - "epoch": 0.3728149829738933, + "epoch": 0.37229731644421776, "grad_norm": 0.0, - "learning_rate": 1.4444408866750104e-05, - "loss": 1.0993, + "learning_rate": 1.4459171594506431e-05, + "loss": 0.9431, "step": 13138 }, { - "epoch": 0.3728433598183882, + "epoch": 0.3723256538864802, "grad_norm": 0.0, - "learning_rate": 1.4443585530326938e-05, - "loss": 0.9674, + "learning_rate": 1.4458350086202443e-05, + "loss": 0.9601, "step": 13139 }, { - "epoch": 0.3728717366628831, + "epoch": 0.3723539913287427, "grad_norm": 0.0, - "learning_rate": 1.4442762156368604e-05, - "loss": 1.0312, + "learning_rate": 1.445752854034412e-05, + "loss": 0.9568, "step": 13140 }, { - "epoch": 0.372900113507378, + "epoch": 0.37238232877100513, "grad_norm": 0.0, - "learning_rate": 1.4441938744882058e-05, - "loss": 1.0238, + "learning_rate": 1.4456706956938376e-05, + "loss": 0.8652, "step": 13141 }, { - "epoch": 0.37292849035187287, + "epoch": 0.37241066621326757, "grad_norm": 0.0, - "learning_rate": 1.4441115295874256e-05, - "loss": 0.9998, + "learning_rate": 1.4455885335992133e-05, + "loss": 0.9028, "step": 13142 }, { - "epoch": 0.3729568671963678, + "epoch": 0.37243900365553007, "grad_norm": 0.0, - "learning_rate": 1.4440291809352153e-05, - "loss": 0.9138, + "learning_rate": 1.4455063677512315e-05, + "loss": 0.9883, "step": 13143 }, { - "epoch": 0.37298524404086264, + "epoch": 0.3724673410977925, "grad_norm": 0.0, - "learning_rate": 1.4439468285322705e-05, - "loss": 0.9292, + "learning_rate": 1.4454241981505838e-05, + "loss": 0.8933, "step": 13144 }, { - "epoch": 0.37301362088535756, + "epoch": 0.372495678540055, "grad_norm": 0.0, - "learning_rate": 1.443864472379287e-05, - "loss": 0.9471, + "learning_rate": 1.445342024797963e-05, + "loss": 0.9725, "step": 13145 }, { - "epoch": 0.3730419977298524, + "epoch": 0.37252401598231744, "grad_norm": 0.0, - "learning_rate": 1.4437821124769605e-05, - "loss": 1.0322, + "learning_rate": 1.445259847694061e-05, + "loss": 0.9609, "step": 13146 }, { - "epoch": 0.37307037457434733, + "epoch": 0.3725523534245799, "grad_norm": 0.0, - "learning_rate": 1.4436997488259863e-05, - "loss": 0.9094, + "learning_rate": 1.4451776668395698e-05, + "loss": 1.0083, "step": 13147 }, { - "epoch": 0.37309875141884224, + "epoch": 0.37258069086684237, "grad_norm": 0.0, - "learning_rate": 1.4436173814270604e-05, - "loss": 0.9663, + "learning_rate": 1.4450954822351819e-05, + "loss": 1.0323, "step": 13148 }, { - "epoch": 0.3731271282633371, + "epoch": 0.3726090283091048, "grad_norm": 0.0, - "learning_rate": 1.4435350102808787e-05, - "loss": 0.9785, + "learning_rate": 1.4450132938815896e-05, + "loss": 0.8909, "step": 13149 }, { - "epoch": 0.373155505107832, + "epoch": 0.3726373657513673, "grad_norm": 0.0, - "learning_rate": 1.4434526353881366e-05, - "loss": 0.9958, + "learning_rate": 1.4449311017794847e-05, + "loss": 0.9565, "step": 13150 }, { - "epoch": 0.3731838819523269, + "epoch": 0.37266570319362974, "grad_norm": 0.0, - "learning_rate": 1.4433702567495304e-05, - "loss": 1.0253, + "learning_rate": 1.44484890592956e-05, + "loss": 0.9611, "step": 13151 }, { - "epoch": 0.3732122587968218, + "epoch": 0.37269404063589223, "grad_norm": 0.0, - "learning_rate": 1.4432878743657555e-05, - "loss": 0.8965, + "learning_rate": 1.4447667063325081e-05, + "loss": 0.9388, "step": 13152 }, { - "epoch": 0.3732406356413167, + "epoch": 0.37272237807815467, "grad_norm": 0.0, - "learning_rate": 1.443205488237508e-05, - "loss": 0.956, + "learning_rate": 1.4446845029890209e-05, + "loss": 0.9002, "step": 13153 }, { - "epoch": 0.37326901248581157, + "epoch": 0.3727507155204171, "grad_norm": 0.0, - "learning_rate": 1.4431230983654839e-05, - "loss": 0.9866, + "learning_rate": 1.4446022958997908e-05, + "loss": 0.9238, "step": 13154 }, { - "epoch": 0.3732973893303065, + "epoch": 0.3727790529626796, "grad_norm": 0.0, - "learning_rate": 1.4430407047503792e-05, - "loss": 1.0255, + "learning_rate": 1.4445200850655106e-05, + "loss": 0.8789, "step": 13155 }, { - "epoch": 0.37332576617480134, + "epoch": 0.37280739040494204, "grad_norm": 0.0, - "learning_rate": 1.4429583073928897e-05, - "loss": 0.8565, + "learning_rate": 1.444437870486873e-05, + "loss": 0.9349, "step": 13156 }, { - "epoch": 0.37335414301929626, + "epoch": 0.37283572784720453, "grad_norm": 0.0, - "learning_rate": 1.4428759062937112e-05, - "loss": 0.9221, + "learning_rate": 1.4443556521645696e-05, + "loss": 0.8801, "step": 13157 }, { - "epoch": 0.3733825198637912, + "epoch": 0.372864065289467, "grad_norm": 0.0, - "learning_rate": 1.44279350145354e-05, - "loss": 0.8652, + "learning_rate": 1.4442734300992937e-05, + "loss": 0.9666, "step": 13158 }, { - "epoch": 0.37341089670828603, + "epoch": 0.3728924027317294, "grad_norm": 0.0, - "learning_rate": 1.4427110928730727e-05, - "loss": 1.0637, + "learning_rate": 1.4441912042917378e-05, + "loss": 0.9044, "step": 13159 }, { - "epoch": 0.37343927355278095, + "epoch": 0.3729207401739919, "grad_norm": 0.0, - "learning_rate": 1.4426286805530043e-05, - "loss": 0.8726, + "learning_rate": 1.4441089747425946e-05, + "loss": 0.9777, "step": 13160 }, { - "epoch": 0.3734676503972758, + "epoch": 0.37294907761625434, "grad_norm": 0.0, - "learning_rate": 1.4425462644940317e-05, - "loss": 0.9546, + "learning_rate": 1.4440267414525564e-05, + "loss": 1.0392, "step": 13161 }, { - "epoch": 0.3734960272417707, + "epoch": 0.37297741505851684, "grad_norm": 0.0, - "learning_rate": 1.4424638446968513e-05, - "loss": 0.7975, + "learning_rate": 1.443944504422316e-05, + "loss": 0.8611, "step": 13162 }, { - "epoch": 0.3735244040862656, + "epoch": 0.3730057525007793, "grad_norm": 0.0, - "learning_rate": 1.4423814211621586e-05, - "loss": 0.8386, + "learning_rate": 1.4438622636525666e-05, + "loss": 0.9203, "step": 13163 }, { - "epoch": 0.3735527809307605, + "epoch": 0.3730340899430417, "grad_norm": 0.0, - "learning_rate": 1.4422989938906502e-05, - "loss": 1.0553, + "learning_rate": 1.443780019144e-05, + "loss": 0.9614, "step": 13164 }, { - "epoch": 0.3735811577752554, + "epoch": 0.3730624273853042, "grad_norm": 0.0, - "learning_rate": 1.4422165628830223e-05, - "loss": 0.9945, + "learning_rate": 1.4436977708973099e-05, + "loss": 0.8287, "step": 13165 }, { - "epoch": 0.37360953461975027, + "epoch": 0.37309076482756665, "grad_norm": 0.0, - "learning_rate": 1.4421341281399713e-05, - "loss": 0.9158, + "learning_rate": 1.4436155189131884e-05, + "loss": 1.0367, "step": 13166 }, { - "epoch": 0.3736379114642452, + "epoch": 0.37311910226982914, "grad_norm": 0.0, - "learning_rate": 1.442051689662193e-05, - "loss": 0.9669, + "learning_rate": 1.4435332631923289e-05, + "loss": 0.8479, "step": 13167 }, { - "epoch": 0.37366628830874005, + "epoch": 0.3731474397120916, "grad_norm": 0.0, - "learning_rate": 1.4419692474503847e-05, - "loss": 1.0284, + "learning_rate": 1.4434510037354239e-05, + "loss": 0.9672, "step": 13168 }, { - "epoch": 0.37369466515323496, + "epoch": 0.37317577715435407, "grad_norm": 0.0, - "learning_rate": 1.4418868015052424e-05, - "loss": 0.9725, + "learning_rate": 1.4433687405431663e-05, + "loss": 1.005, "step": 13169 }, { - "epoch": 0.3737230419977299, + "epoch": 0.3732041145966165, "grad_norm": 0.0, - "learning_rate": 1.4418043518274621e-05, - "loss": 0.967, + "learning_rate": 1.4432864736162493e-05, + "loss": 0.9294, "step": 13170 }, { - "epoch": 0.37375141884222474, + "epoch": 0.37323245203887895, "grad_norm": 0.0, - "learning_rate": 1.4417218984177406e-05, - "loss": 0.9284, + "learning_rate": 1.4432042029553657e-05, + "loss": 0.982, "step": 13171 }, { - "epoch": 0.37377979568671965, + "epoch": 0.37326078948114144, "grad_norm": 0.0, - "learning_rate": 1.4416394412767747e-05, - "loss": 0.87, + "learning_rate": 1.4431219285612085e-05, + "loss": 1.0604, "step": 13172 }, { - "epoch": 0.3738081725312145, + "epoch": 0.3732891269234039, "grad_norm": 0.0, - "learning_rate": 1.44155698040526e-05, - "loss": 0.8725, + "learning_rate": 1.443039650434471e-05, + "loss": 0.9421, "step": 13173 }, { - "epoch": 0.3738365493757094, + "epoch": 0.3733174643656664, "grad_norm": 0.0, - "learning_rate": 1.441474515803894e-05, - "loss": 1.0347, + "learning_rate": 1.4429573685758458e-05, + "loss": 0.9314, "step": 13174 }, { - "epoch": 0.3738649262202043, + "epoch": 0.3733458018079288, "grad_norm": 0.0, - "learning_rate": 1.441392047473373e-05, - "loss": 0.9733, + "learning_rate": 1.442875082986026e-05, + "loss": 0.916, "step": 13175 }, { - "epoch": 0.3738933030646992, + "epoch": 0.37337413925019125, "grad_norm": 0.0, - "learning_rate": 1.4413095754143934e-05, - "loss": 0.9392, + "learning_rate": 1.4427927936657051e-05, + "loss": 0.9722, "step": 13176 }, { - "epoch": 0.3739216799091941, + "epoch": 0.37340247669245374, "grad_norm": 0.0, - "learning_rate": 1.4412270996276518e-05, - "loss": 0.9074, + "learning_rate": 1.4427105006155761e-05, + "loss": 0.9334, "step": 13177 }, { - "epoch": 0.373950056753689, + "epoch": 0.3734308141347162, "grad_norm": 0.0, - "learning_rate": 1.4411446201138453e-05, - "loss": 0.9321, + "learning_rate": 1.4426282038363322e-05, + "loss": 0.7676, "step": 13178 }, { - "epoch": 0.3739784335981839, + "epoch": 0.3734591515769787, "grad_norm": 0.0, - "learning_rate": 1.4410621368736703e-05, - "loss": 0.7933, + "learning_rate": 1.4425459033286664e-05, + "loss": 0.9277, "step": 13179 }, { - "epoch": 0.37400681044267875, + "epoch": 0.3734874890192411, "grad_norm": 0.0, - "learning_rate": 1.4409796499078238e-05, - "loss": 0.8649, + "learning_rate": 1.4424635990932723e-05, + "loss": 0.9839, "step": 13180 }, { - "epoch": 0.37403518728717366, + "epoch": 0.3735158264615036, "grad_norm": 0.0, - "learning_rate": 1.440897159217002e-05, - "loss": 0.8488, + "learning_rate": 1.442381291130843e-05, + "loss": 0.8687, "step": 13181 }, { - "epoch": 0.3740635641316686, + "epoch": 0.37354416390376605, "grad_norm": 0.0, - "learning_rate": 1.4408146648019025e-05, - "loss": 0.9146, + "learning_rate": 1.4422989794420716e-05, + "loss": 1.0162, "step": 13182 }, { - "epoch": 0.37409194097616344, + "epoch": 0.3735725013460285, "grad_norm": 0.0, - "learning_rate": 1.4407321666632213e-05, - "loss": 0.8929, + "learning_rate": 1.4422166640276519e-05, + "loss": 1.0353, "step": 13183 }, { - "epoch": 0.37412031782065835, + "epoch": 0.373600838788291, "grad_norm": 0.0, - "learning_rate": 1.4406496648016557e-05, - "loss": 0.953, + "learning_rate": 1.442134344888277e-05, + "loss": 0.9813, "step": 13184 }, { - "epoch": 0.3741486946651532, + "epoch": 0.3736291762305534, "grad_norm": 0.0, - "learning_rate": 1.4405671592179028e-05, - "loss": 0.9077, + "learning_rate": 1.4420520220246401e-05, + "loss": 1.0039, "step": 13185 }, { - "epoch": 0.37417707150964813, + "epoch": 0.3736575136728159, "grad_norm": 0.0, - "learning_rate": 1.4404846499126594e-05, - "loss": 0.918, + "learning_rate": 1.4419696954374352e-05, + "loss": 0.9446, "step": 13186 }, { - "epoch": 0.37420544835414304, + "epoch": 0.37368585111507835, "grad_norm": 0.0, - "learning_rate": 1.4404021368866222e-05, - "loss": 1.0576, + "learning_rate": 1.4418873651273553e-05, + "loss": 0.9155, "step": 13187 }, { - "epoch": 0.3742338251986379, + "epoch": 0.3737141885573408, "grad_norm": 0.0, - "learning_rate": 1.4403196201404884e-05, - "loss": 0.9917, + "learning_rate": 1.4418050310950938e-05, + "loss": 0.971, "step": 13188 }, { - "epoch": 0.3742622020431328, + "epoch": 0.3737425259996033, "grad_norm": 0.0, - "learning_rate": 1.4402370996749549e-05, - "loss": 0.9596, + "learning_rate": 1.4417226933413446e-05, + "loss": 0.88, "step": 13189 }, { - "epoch": 0.3742905788876277, + "epoch": 0.3737708634418657, "grad_norm": 0.0, - "learning_rate": 1.4401545754907187e-05, - "loss": 0.865, + "learning_rate": 1.4416403518668013e-05, + "loss": 0.9535, "step": 13190 }, { - "epoch": 0.3743189557321226, + "epoch": 0.3737992008841282, "grad_norm": 0.0, - "learning_rate": 1.440072047588477e-05, - "loss": 1.0005, + "learning_rate": 1.4415580066721573e-05, + "loss": 0.875, "step": 13191 }, { - "epoch": 0.37434733257661745, + "epoch": 0.37382753832639065, "grad_norm": 0.0, - "learning_rate": 1.4399895159689272e-05, - "loss": 1.0602, + "learning_rate": 1.441475657758106e-05, + "loss": 0.8821, "step": 13192 }, { - "epoch": 0.37437570942111237, + "epoch": 0.37385587576865315, "grad_norm": 0.0, - "learning_rate": 1.4399069806327662e-05, - "loss": 1.0127, + "learning_rate": 1.4413933051253414e-05, + "loss": 0.8767, "step": 13193 }, { - "epoch": 0.3744040862656073, + "epoch": 0.3738842132109156, "grad_norm": 0.0, - "learning_rate": 1.4398244415806908e-05, - "loss": 0.9648, + "learning_rate": 1.4413109487745571e-05, + "loss": 0.9377, "step": 13194 }, { - "epoch": 0.37443246311010214, + "epoch": 0.373912550653178, "grad_norm": 0.0, - "learning_rate": 1.4397418988133989e-05, - "loss": 0.8314, + "learning_rate": 1.4412285887064468e-05, + "loss": 0.9555, "step": 13195 }, { - "epoch": 0.37446083995459706, + "epoch": 0.3739408880954405, "grad_norm": 0.0, - "learning_rate": 1.4396593523315874e-05, - "loss": 0.8631, + "learning_rate": 1.4411462249217041e-05, + "loss": 0.9056, "step": 13196 }, { - "epoch": 0.3744892167990919, + "epoch": 0.37396922553770295, "grad_norm": 0.0, - "learning_rate": 1.4395768021359533e-05, - "loss": 0.9077, + "learning_rate": 1.4410638574210231e-05, + "loss": 0.9087, "step": 13197 }, { - "epoch": 0.37451759364358683, + "epoch": 0.37399756297996545, "grad_norm": 0.0, - "learning_rate": 1.4394942482271942e-05, - "loss": 0.8973, + "learning_rate": 1.4409814862050974e-05, + "loss": 0.8743, "step": 13198 }, { - "epoch": 0.37454597048808175, + "epoch": 0.3740259004222279, "grad_norm": 0.0, - "learning_rate": 1.4394116906060078e-05, - "loss": 0.8319, + "learning_rate": 1.4408991112746212e-05, + "loss": 0.9362, "step": 13199 }, { - "epoch": 0.3745743473325766, + "epoch": 0.3740542378644903, "grad_norm": 0.0, - "learning_rate": 1.439329129273091e-05, - "loss": 0.8381, + "learning_rate": 1.4408167326302875e-05, + "loss": 0.8774, "step": 13200 }, { - "epoch": 0.3746027241770715, + "epoch": 0.3740825753067528, "grad_norm": 0.0, - "learning_rate": 1.4392465642291412e-05, - "loss": 0.8561, + "learning_rate": 1.4407343502727913e-05, + "loss": 0.9076, "step": 13201 }, { - "epoch": 0.3746311010215664, + "epoch": 0.37411091274901526, "grad_norm": 0.0, - "learning_rate": 1.439163995474856e-05, - "loss": 1.0219, + "learning_rate": 1.4406519642028255e-05, + "loss": 0.8568, "step": 13202 }, { - "epoch": 0.3746594778660613, + "epoch": 0.37413925019127775, "grad_norm": 0.0, - "learning_rate": 1.4390814230109327e-05, - "loss": 1.0033, + "learning_rate": 1.440569574421085e-05, + "loss": 0.9231, "step": 13203 }, { - "epoch": 0.3746878547105562, + "epoch": 0.3741675876335402, "grad_norm": 0.0, - "learning_rate": 1.4389988468380688e-05, - "loss": 0.9, + "learning_rate": 1.4404871809282632e-05, + "loss": 0.8295, "step": 13204 }, { - "epoch": 0.37471623155505107, + "epoch": 0.3741959250758027, "grad_norm": 0.0, - "learning_rate": 1.4389162669569621e-05, - "loss": 0.9251, + "learning_rate": 1.4404047837250543e-05, + "loss": 0.9636, "step": 13205 }, { - "epoch": 0.374744608399546, + "epoch": 0.3742242625180651, "grad_norm": 0.0, - "learning_rate": 1.4388336833683099e-05, - "loss": 0.9346, + "learning_rate": 1.4403223828121525e-05, + "loss": 0.9349, "step": 13206 }, { - "epoch": 0.37477298524404085, + "epoch": 0.37425259996032756, "grad_norm": 0.0, - "learning_rate": 1.4387510960728098e-05, - "loss": 0.8459, + "learning_rate": 1.4402399781902516e-05, + "loss": 0.91, "step": 13207 }, { - "epoch": 0.37480136208853576, + "epoch": 0.37428093740259005, "grad_norm": 0.0, - "learning_rate": 1.4386685050711593e-05, - "loss": 1.0206, + "learning_rate": 1.4401575698600458e-05, + "loss": 0.9708, "step": 13208 }, { - "epoch": 0.3748297389330306, + "epoch": 0.3743092748448525, "grad_norm": 0.0, - "learning_rate": 1.4385859103640565e-05, - "loss": 0.9932, + "learning_rate": 1.4400751578222293e-05, + "loss": 0.8684, "step": 13209 }, { - "epoch": 0.37485811577752554, + "epoch": 0.374337612287115, "grad_norm": 0.0, - "learning_rate": 1.4385033119521987e-05, - "loss": 0.9156, + "learning_rate": 1.4399927420774966e-05, + "loss": 1.0516, "step": 13210 }, { - "epoch": 0.37488649262202045, + "epoch": 0.3743659497293774, "grad_norm": 0.0, - "learning_rate": 1.4384207098362837e-05, - "loss": 0.8867, + "learning_rate": 1.4399103226265418e-05, + "loss": 0.9452, "step": 13211 }, { - "epoch": 0.3749148694665153, + "epoch": 0.37439428717163986, "grad_norm": 0.0, - "learning_rate": 1.4383381040170095e-05, - "loss": 0.9455, + "learning_rate": 1.4398278994700587e-05, + "loss": 0.9197, "step": 13212 }, { - "epoch": 0.3749432463110102, + "epoch": 0.37442262461390236, "grad_norm": 0.0, - "learning_rate": 1.4382554944950733e-05, - "loss": 0.8869, + "learning_rate": 1.4397454726087418e-05, + "loss": 0.9214, "step": 13213 }, { - "epoch": 0.3749716231555051, + "epoch": 0.3744509620561648, "grad_norm": 0.0, - "learning_rate": 1.4381728812711733e-05, - "loss": 0.8669, + "learning_rate": 1.4396630420432858e-05, + "loss": 0.9062, "step": 13214 }, { - "epoch": 0.375, + "epoch": 0.3744792994984273, "grad_norm": 0.0, - "learning_rate": 1.4380902643460073e-05, - "loss": 1.1051, + "learning_rate": 1.4395806077743843e-05, + "loss": 0.9856, "step": 13215 }, { - "epoch": 0.3750283768444949, + "epoch": 0.3745076369406897, "grad_norm": 0.0, - "learning_rate": 1.4380076437202733e-05, - "loss": 0.919, + "learning_rate": 1.4394981698027324e-05, + "loss": 1.0028, "step": 13216 }, { - "epoch": 0.3750567536889898, + "epoch": 0.3745359743829522, "grad_norm": 0.0, - "learning_rate": 1.4379250193946685e-05, - "loss": 0.8999, + "learning_rate": 1.4394157281290239e-05, + "loss": 1.0214, "step": 13217 }, { - "epoch": 0.3750851305334847, + "epoch": 0.37456431182521466, "grad_norm": 0.0, - "learning_rate": 1.437842391369892e-05, - "loss": 1.0208, + "learning_rate": 1.4393332827539542e-05, + "loss": 0.876, "step": 13218 }, { - "epoch": 0.37511350737797955, + "epoch": 0.3745926492674771, "grad_norm": 0.0, - "learning_rate": 1.437759759646641e-05, - "loss": 0.9829, + "learning_rate": 1.4392508336782167e-05, + "loss": 0.9599, "step": 13219 }, { - "epoch": 0.37514188422247446, + "epoch": 0.3746209867097396, "grad_norm": 0.0, - "learning_rate": 1.4376771242256134e-05, - "loss": 0.9566, + "learning_rate": 1.4391683809025063e-05, + "loss": 0.9681, "step": 13220 }, { - "epoch": 0.3751702610669694, + "epoch": 0.37464932415200203, "grad_norm": 0.0, - "learning_rate": 1.4375944851075074e-05, - "loss": 0.9809, + "learning_rate": 1.4390859244275175e-05, + "loss": 0.9171, "step": 13221 }, { - "epoch": 0.37519863791146424, + "epoch": 0.3746776615942645, "grad_norm": 0.0, - "learning_rate": 1.4375118422930215e-05, - "loss": 0.9337, + "learning_rate": 1.4390034642539447e-05, + "loss": 0.9116, "step": 13222 }, { - "epoch": 0.37522701475595915, + "epoch": 0.37470599903652696, "grad_norm": 0.0, - "learning_rate": 1.4374291957828534e-05, - "loss": 0.8539, + "learning_rate": 1.4389210003824832e-05, + "loss": 0.8242, "step": 13223 }, { - "epoch": 0.375255391600454, + "epoch": 0.3747343364787894, "grad_norm": 0.0, - "learning_rate": 1.4373465455777009e-05, - "loss": 0.89, + "learning_rate": 1.438838532813827e-05, + "loss": 0.8791, "step": 13224 }, { - "epoch": 0.37528376844494893, + "epoch": 0.3747626739210519, "grad_norm": 0.0, - "learning_rate": 1.4372638916782627e-05, - "loss": 0.9598, + "learning_rate": 1.4387560615486706e-05, + "loss": 0.885, "step": 13225 }, { - "epoch": 0.3753121452894438, + "epoch": 0.37479101136331433, "grad_norm": 0.0, - "learning_rate": 1.4371812340852366e-05, - "loss": 0.9363, + "learning_rate": 1.4386735865877092e-05, + "loss": 0.7761, "step": 13226 }, { - "epoch": 0.3753405221339387, + "epoch": 0.3748193488055768, "grad_norm": 0.0, - "learning_rate": 1.437098572799321e-05, - "loss": 1.0474, + "learning_rate": 1.4385911079316373e-05, + "loss": 0.8899, "step": 13227 }, { - "epoch": 0.3753688989784336, + "epoch": 0.37484768624783926, "grad_norm": 0.0, - "learning_rate": 1.4370159078212142e-05, - "loss": 1.0129, + "learning_rate": 1.4385086255811496e-05, + "loss": 0.991, "step": 13228 }, { - "epoch": 0.3753972758229285, + "epoch": 0.37487602369010176, "grad_norm": 0.0, - "learning_rate": 1.4369332391516142e-05, - "loss": 1.0369, + "learning_rate": 1.4384261395369405e-05, + "loss": 1.0136, "step": 13229 }, { - "epoch": 0.3754256526674234, + "epoch": 0.3749043611323642, "grad_norm": 0.0, - "learning_rate": 1.4368505667912196e-05, - "loss": 0.8808, + "learning_rate": 1.4383436497997059e-05, + "loss": 0.8904, "step": 13230 }, { - "epoch": 0.37545402951191825, + "epoch": 0.37493269857462663, "grad_norm": 0.0, - "learning_rate": 1.4367678907407285e-05, - "loss": 1.1089, + "learning_rate": 1.4382611563701396e-05, + "loss": 0.9982, "step": 13231 }, { - "epoch": 0.37548240635641317, + "epoch": 0.3749610360168891, "grad_norm": 0.0, - "learning_rate": 1.4366852110008397e-05, - "loss": 0.8794, + "learning_rate": 1.438178659248937e-05, + "loss": 0.9671, "step": 13232 }, { - "epoch": 0.3755107832009081, + "epoch": 0.37498937345915156, "grad_norm": 0.0, - "learning_rate": 1.4366025275722512e-05, - "loss": 0.9621, + "learning_rate": 1.4380961584367927e-05, + "loss": 0.9586, "step": 13233 }, { - "epoch": 0.37553916004540294, + "epoch": 0.37501771090141406, "grad_norm": 0.0, - "learning_rate": 1.4365198404556612e-05, - "loss": 0.9234, + "learning_rate": 1.4380136539344018e-05, + "loss": 1.0445, "step": 13234 }, { - "epoch": 0.37556753688989786, + "epoch": 0.3750460483436765, "grad_norm": 0.0, - "learning_rate": 1.4364371496517688e-05, - "loss": 0.9142, + "learning_rate": 1.4379311457424591e-05, + "loss": 0.8732, "step": 13235 }, { - "epoch": 0.3755959137343927, + "epoch": 0.37507438578593894, "grad_norm": 0.0, - "learning_rate": 1.436354455161272e-05, - "loss": 0.9843, + "learning_rate": 1.43784863386166e-05, + "loss": 1.1129, "step": 13236 }, { - "epoch": 0.37562429057888763, + "epoch": 0.37510272322820143, "grad_norm": 0.0, - "learning_rate": 1.4362717569848697e-05, - "loss": 1.0175, + "learning_rate": 1.4377661182926992e-05, + "loss": 0.9562, "step": 13237 }, { - "epoch": 0.37565266742338255, + "epoch": 0.37513106067046387, "grad_norm": 0.0, - "learning_rate": 1.43618905512326e-05, - "loss": 1.0336, + "learning_rate": 1.4376835990362716e-05, + "loss": 0.881, "step": 13238 }, { - "epoch": 0.3756810442678774, + "epoch": 0.37515939811272636, "grad_norm": 0.0, - "learning_rate": 1.436106349577142e-05, - "loss": 1.0142, + "learning_rate": 1.437601076093073e-05, + "loss": 0.9761, "step": 13239 }, { - "epoch": 0.3757094211123723, + "epoch": 0.3751877355549888, "grad_norm": 0.0, - "learning_rate": 1.436023640347214e-05, - "loss": 0.8299, + "learning_rate": 1.4375185494637978e-05, + "loss": 0.9763, "step": 13240 }, { - "epoch": 0.3757377979568672, + "epoch": 0.3752160729972513, "grad_norm": 0.0, - "learning_rate": 1.4359409274341747e-05, - "loss": 0.9292, + "learning_rate": 1.4374360191491411e-05, + "loss": 1.0563, "step": 13241 }, { - "epoch": 0.3757661748013621, + "epoch": 0.37524441043951373, "grad_norm": 0.0, - "learning_rate": 1.4358582108387226e-05, - "loss": 0.9865, + "learning_rate": 1.4373534851497985e-05, + "loss": 0.8896, "step": 13242 }, { - "epoch": 0.37579455164585696, + "epoch": 0.37527274788177617, "grad_norm": 0.0, - "learning_rate": 1.4357754905615571e-05, - "loss": 0.8818, + "learning_rate": 1.4372709474664653e-05, + "loss": 0.9346, "step": 13243 }, { - "epoch": 0.37582292849035187, + "epoch": 0.37530108532403866, "grad_norm": 0.0, - "learning_rate": 1.435692766603376e-05, - "loss": 0.9808, + "learning_rate": 1.4371884060998364e-05, + "loss": 0.9816, "step": 13244 }, { - "epoch": 0.3758513053348468, + "epoch": 0.3753294227663011, "grad_norm": 0.0, - "learning_rate": 1.4356100389648784e-05, - "loss": 0.9953, + "learning_rate": 1.4371058610506075e-05, + "loss": 0.94, "step": 13245 }, { - "epoch": 0.37587968217934165, + "epoch": 0.3753577602085636, "grad_norm": 0.0, - "learning_rate": 1.4355273076467635e-05, - "loss": 0.9026, + "learning_rate": 1.437023312319473e-05, + "loss": 0.9503, "step": 13246 }, { - "epoch": 0.37590805902383656, + "epoch": 0.37538609765082603, "grad_norm": 0.0, - "learning_rate": 1.4354445726497298e-05, - "loss": 0.9268, + "learning_rate": 1.4369407599071295e-05, + "loss": 0.8475, "step": 13247 }, { - "epoch": 0.3759364358683314, + "epoch": 0.37541443509308847, "grad_norm": 0.0, - "learning_rate": 1.4353618339744761e-05, - "loss": 0.9482, + "learning_rate": 1.4368582038142713e-05, + "loss": 0.9913, "step": 13248 }, { - "epoch": 0.37596481271282634, + "epoch": 0.37544277253535097, "grad_norm": 0.0, - "learning_rate": 1.4352790916217016e-05, - "loss": 0.9689, + "learning_rate": 1.4367756440415943e-05, + "loss": 0.9581, "step": 13249 }, { - "epoch": 0.37599318955732125, + "epoch": 0.3754711099776134, "grad_norm": 0.0, - "learning_rate": 1.4351963455921052e-05, - "loss": 0.9159, + "learning_rate": 1.4366930805897939e-05, + "loss": 1.059, "step": 13250 }, { - "epoch": 0.3760215664018161, + "epoch": 0.3754994474198759, "grad_norm": 0.0, - "learning_rate": 1.4351135958863854e-05, - "loss": 0.8519, + "learning_rate": 1.4366105134595656e-05, + "loss": 0.9448, "step": 13251 }, { - "epoch": 0.376049943246311, + "epoch": 0.37552778486213834, "grad_norm": 0.0, - "learning_rate": 1.4350308425052413e-05, - "loss": 0.9026, + "learning_rate": 1.4365279426516048e-05, + "loss": 0.8966, "step": 13252 }, { - "epoch": 0.3760783200908059, + "epoch": 0.37555612230440083, "grad_norm": 0.0, - "learning_rate": 1.4349480854493724e-05, - "loss": 1.0298, + "learning_rate": 1.436445368166607e-05, + "loss": 0.9602, "step": 13253 }, { - "epoch": 0.3761066969353008, + "epoch": 0.37558445974666327, "grad_norm": 0.0, - "learning_rate": 1.4348653247194777e-05, - "loss": 0.9972, + "learning_rate": 1.4363627900052676e-05, + "loss": 1.0005, "step": 13254 }, { - "epoch": 0.3761350737797957, + "epoch": 0.3756127971889257, "grad_norm": 0.0, - "learning_rate": 1.4347825603162555e-05, - "loss": 1.0251, + "learning_rate": 1.4362802081682826e-05, + "loss": 0.9626, "step": 13255 }, { - "epoch": 0.3761634506242906, + "epoch": 0.3756411346311882, "grad_norm": 0.0, - "learning_rate": 1.434699792240406e-05, - "loss": 1.0754, + "learning_rate": 1.4361976226563475e-05, + "loss": 0.8894, "step": 13256 }, { - "epoch": 0.3761918274687855, + "epoch": 0.37566947207345064, "grad_norm": 0.0, - "learning_rate": 1.4346170204926275e-05, - "loss": 0.8495, + "learning_rate": 1.4361150334701575e-05, + "loss": 0.9914, "step": 13257 }, { - "epoch": 0.37622020431328035, + "epoch": 0.37569780951571313, "grad_norm": 0.0, - "learning_rate": 1.4345342450736195e-05, - "loss": 0.9765, + "learning_rate": 1.4360324406104086e-05, + "loss": 0.9372, "step": 13258 }, { - "epoch": 0.37624858115777526, + "epoch": 0.37572614695797557, "grad_norm": 0.0, - "learning_rate": 1.4344514659840814e-05, - "loss": 0.9417, + "learning_rate": 1.435949844077797e-05, + "loss": 1.0135, "step": 13259 }, { - "epoch": 0.3762769580022701, + "epoch": 0.375754484400238, "grad_norm": 0.0, - "learning_rate": 1.4343686832247121e-05, - "loss": 0.9666, + "learning_rate": 1.4358672438730178e-05, + "loss": 0.8389, "step": 13260 }, { - "epoch": 0.37630533484676504, + "epoch": 0.3757828218425005, "grad_norm": 0.0, - "learning_rate": 1.4342858967962108e-05, - "loss": 0.9087, + "learning_rate": 1.4357846399967668e-05, + "loss": 1.0269, "step": 13261 }, { - "epoch": 0.37633371169125995, + "epoch": 0.37581115928476294, "grad_norm": 0.0, - "learning_rate": 1.4342031066992775e-05, - "loss": 0.8956, + "learning_rate": 1.43570203244974e-05, + "loss": 0.9464, "step": 13262 }, { - "epoch": 0.3763620885357548, + "epoch": 0.37583949672702543, "grad_norm": 0.0, - "learning_rate": 1.4341203129346108e-05, - "loss": 0.9552, + "learning_rate": 1.4356194212326333e-05, + "loss": 0.9683, "step": 13263 }, { - "epoch": 0.37639046538024973, + "epoch": 0.3758678341692879, "grad_norm": 0.0, - "learning_rate": 1.4340375155029102e-05, - "loss": 0.8934, + "learning_rate": 1.4355368063461423e-05, + "loss": 0.9147, "step": 13264 }, { - "epoch": 0.3764188422247446, + "epoch": 0.37589617161155037, "grad_norm": 0.0, - "learning_rate": 1.4339547144048753e-05, - "loss": 1.0401, + "learning_rate": 1.4354541877909634e-05, + "loss": 0.9709, "step": 13265 }, { - "epoch": 0.3764472190692395, + "epoch": 0.3759245090538128, "grad_norm": 0.0, - "learning_rate": 1.4338719096412056e-05, - "loss": 0.8837, + "learning_rate": 1.4353715655677919e-05, + "loss": 0.8683, "step": 13266 }, { - "epoch": 0.3764755959137344, + "epoch": 0.37595284649607524, "grad_norm": 0.0, - "learning_rate": 1.4337891012125997e-05, - "loss": 0.946, + "learning_rate": 1.4352889396773239e-05, + "loss": 0.9453, "step": 13267 }, { - "epoch": 0.3765039727582293, + "epoch": 0.37598118393833774, "grad_norm": 0.0, - "learning_rate": 1.4337062891197582e-05, - "loss": 0.9679, + "learning_rate": 1.4352063101202556e-05, + "loss": 0.8909, "step": 13268 }, { - "epoch": 0.3765323496027242, + "epoch": 0.3760095213806002, "grad_norm": 0.0, - "learning_rate": 1.4336234733633802e-05, - "loss": 0.9078, + "learning_rate": 1.435123676897283e-05, + "loss": 0.8617, "step": 13269 }, { - "epoch": 0.37656072644721905, + "epoch": 0.37603785882286267, "grad_norm": 0.0, - "learning_rate": 1.4335406539441655e-05, - "loss": 0.9485, + "learning_rate": 1.4350410400091022e-05, + "loss": 0.8751, "step": 13270 }, { - "epoch": 0.37658910329171397, + "epoch": 0.3760661962651251, "grad_norm": 0.0, - "learning_rate": 1.4334578308628131e-05, - "loss": 0.8313, + "learning_rate": 1.4349583994564088e-05, + "loss": 0.9659, "step": 13271 }, { - "epoch": 0.3766174801362088, + "epoch": 0.37609453370738755, "grad_norm": 0.0, - "learning_rate": 1.433375004120023e-05, - "loss": 1.0583, + "learning_rate": 1.4348757552398997e-05, + "loss": 0.9612, "step": 13272 }, { - "epoch": 0.37664585698070374, + "epoch": 0.37612287114965004, "grad_norm": 0.0, - "learning_rate": 1.4332921737164946e-05, - "loss": 0.9456, + "learning_rate": 1.4347931073602706e-05, + "loss": 1.0051, "step": 13273 }, { - "epoch": 0.37667423382519866, + "epoch": 0.3761512085919125, "grad_norm": 0.0, - "learning_rate": 1.433209339652928e-05, - "loss": 0.9285, + "learning_rate": 1.4347104558182174e-05, + "loss": 0.8153, "step": 13274 }, { - "epoch": 0.3767026106696935, + "epoch": 0.37617954603417497, "grad_norm": 0.0, - "learning_rate": 1.4331265019300222e-05, - "loss": 1.0026, + "learning_rate": 1.4346278006144365e-05, + "loss": 0.8473, "step": 13275 }, { - "epoch": 0.37673098751418843, + "epoch": 0.3762078834764374, "grad_norm": 0.0, - "learning_rate": 1.4330436605484776e-05, - "loss": 0.8692, + "learning_rate": 1.4345451417496246e-05, + "loss": 0.9475, "step": 13276 }, { - "epoch": 0.3767593643586833, + "epoch": 0.3762362209186999, "grad_norm": 0.0, - "learning_rate": 1.432960815508994e-05, - "loss": 1.0141, + "learning_rate": 1.4344624792244776e-05, + "loss": 0.9852, "step": 13277 }, { - "epoch": 0.3767877412031782, + "epoch": 0.37626455836096234, "grad_norm": 0.0, - "learning_rate": 1.4328779668122706e-05, - "loss": 0.806, + "learning_rate": 1.434379813039692e-05, + "loss": 0.9446, "step": 13278 }, { - "epoch": 0.3768161180476731, + "epoch": 0.3762928958032248, "grad_norm": 0.0, - "learning_rate": 1.4327951144590078e-05, - "loss": 0.9606, + "learning_rate": 1.4342971431959634e-05, + "loss": 0.9765, "step": 13279 }, { - "epoch": 0.376844494892168, + "epoch": 0.3763212332454873, "grad_norm": 0.0, - "learning_rate": 1.4327122584499052e-05, - "loss": 0.9455, + "learning_rate": 1.4342144696939888e-05, + "loss": 0.8934, "step": 13280 }, { - "epoch": 0.3768728717366629, + "epoch": 0.3763495706877497, "grad_norm": 0.0, - "learning_rate": 1.4326293987856623e-05, - "loss": 0.8682, + "learning_rate": 1.4341317925344646e-05, + "loss": 0.91, "step": 13281 }, { - "epoch": 0.37690124858115776, + "epoch": 0.3763779081300122, "grad_norm": 0.0, - "learning_rate": 1.4325465354669796e-05, - "loss": 1.0683, + "learning_rate": 1.4340491117180872e-05, + "loss": 0.9615, "step": 13282 }, { - "epoch": 0.37692962542565267, + "epoch": 0.37640624557227464, "grad_norm": 0.0, - "learning_rate": 1.432463668494557e-05, - "loss": 1.1434, + "learning_rate": 1.433966427245553e-05, + "loss": 0.846, "step": 13283 }, { - "epoch": 0.3769580022701476, + "epoch": 0.3764345830145371, "grad_norm": 0.0, - "learning_rate": 1.4323807978690945e-05, - "loss": 0.9585, + "learning_rate": 1.4338837391175582e-05, + "loss": 0.9342, "step": 13284 }, { - "epoch": 0.37698637911464244, + "epoch": 0.3764629204567996, "grad_norm": 0.0, - "learning_rate": 1.4322979235912917e-05, - "loss": 0.9274, + "learning_rate": 1.4338010473348e-05, + "loss": 0.8499, "step": 13285 }, { - "epoch": 0.37701475595913736, + "epoch": 0.376491257899062, "grad_norm": 0.0, - "learning_rate": 1.4322150456618489e-05, - "loss": 0.909, + "learning_rate": 1.4337183518979739e-05, + "loss": 0.9824, "step": 13286 }, { - "epoch": 0.3770431328036322, + "epoch": 0.3765195953413245, "grad_norm": 0.0, - "learning_rate": 1.4321321640814665e-05, - "loss": 0.8645, + "learning_rate": 1.433635652807777e-05, + "loss": 0.9536, "step": 13287 }, { - "epoch": 0.37707150964812713, + "epoch": 0.37654793278358695, "grad_norm": 0.0, - "learning_rate": 1.432049278850844e-05, - "loss": 0.9685, + "learning_rate": 1.4335529500649065e-05, + "loss": 0.9361, "step": 13288 }, { - "epoch": 0.377099886492622, + "epoch": 0.37657627022584944, "grad_norm": 0.0, - "learning_rate": 1.4319663899706818e-05, - "loss": 1.0515, + "learning_rate": 1.4334702436700583e-05, + "loss": 0.9619, "step": 13289 }, { - "epoch": 0.3771282633371169, + "epoch": 0.3766046076681119, "grad_norm": 0.0, - "learning_rate": 1.43188349744168e-05, - "loss": 0.9209, + "learning_rate": 1.4333875336239293e-05, + "loss": 1.0034, "step": 13290 }, { - "epoch": 0.3771566401816118, + "epoch": 0.3766329451103743, "grad_norm": 0.0, - "learning_rate": 1.4318006012645391e-05, - "loss": 0.8953, + "learning_rate": 1.4333048199272161e-05, + "loss": 0.9419, "step": 13291 }, { - "epoch": 0.3771850170261067, + "epoch": 0.3766612825526368, "grad_norm": 0.0, - "learning_rate": 1.431717701439959e-05, - "loss": 1.0173, + "learning_rate": 1.4332221025806157e-05, + "loss": 0.9728, "step": 13292 }, { - "epoch": 0.3772133938706016, + "epoch": 0.37668961999489925, "grad_norm": 0.0, - "learning_rate": 1.4316347979686402e-05, - "loss": 0.929, + "learning_rate": 1.4331393815848242e-05, + "loss": 0.8792, "step": 13293 }, { - "epoch": 0.37724177071509646, + "epoch": 0.37671795743716174, "grad_norm": 0.0, - "learning_rate": 1.431551890851283e-05, - "loss": 0.9422, + "learning_rate": 1.4330566569405393e-05, + "loss": 0.8996, "step": 13294 }, { - "epoch": 0.3772701475595914, + "epoch": 0.3767462948794242, "grad_norm": 0.0, - "learning_rate": 1.4314689800885871e-05, - "loss": 0.9084, + "learning_rate": 1.432973928648457e-05, + "loss": 1.0204, "step": 13295 }, { - "epoch": 0.3772985244040863, + "epoch": 0.3767746323216866, "grad_norm": 0.0, - "learning_rate": 1.4313860656812537e-05, - "loss": 0.8494, + "learning_rate": 1.4328911967092748e-05, + "loss": 1.0206, "step": 13296 }, { - "epoch": 0.37732690124858115, + "epoch": 0.3768029697639491, "grad_norm": 0.0, - "learning_rate": 1.4313031476299828e-05, - "loss": 0.7873, + "learning_rate": 1.4328084611236892e-05, + "loss": 0.9265, "step": 13297 }, { - "epoch": 0.37735527809307606, + "epoch": 0.37683130720621155, "grad_norm": 0.0, - "learning_rate": 1.4312202259354746e-05, - "loss": 0.9917, + "learning_rate": 1.4327257218923976e-05, + "loss": 1.0078, "step": 13298 }, { - "epoch": 0.3773836549375709, + "epoch": 0.37685964464847405, "grad_norm": 0.0, - "learning_rate": 1.4311373005984297e-05, - "loss": 0.9353, + "learning_rate": 1.4326429790160958e-05, + "loss": 0.8563, "step": 13299 }, { - "epoch": 0.37741203178206584, + "epoch": 0.3768879820907365, "grad_norm": 0.0, - "learning_rate": 1.4310543716195489e-05, - "loss": 0.9027, + "learning_rate": 1.432560232495482e-05, + "loss": 0.9353, "step": 13300 }, { - "epoch": 0.37744040862656075, + "epoch": 0.376916319532999, "grad_norm": 0.0, - "learning_rate": 1.4309714389995324e-05, - "loss": 0.9318, + "learning_rate": 1.4324774823312526e-05, + "loss": 0.9602, "step": 13301 }, { - "epoch": 0.3774687854710556, + "epoch": 0.3769446569752614, "grad_norm": 0.0, - "learning_rate": 1.4308885027390806e-05, - "loss": 0.8527, + "learning_rate": 1.4323947285241049e-05, + "loss": 1.0549, "step": 13302 }, { - "epoch": 0.3774971623155505, + "epoch": 0.37697299441752385, "grad_norm": 0.0, - "learning_rate": 1.4308055628388946e-05, - "loss": 0.9124, + "learning_rate": 1.4323119710747357e-05, + "loss": 0.9128, "step": 13303 }, { - "epoch": 0.3775255391600454, + "epoch": 0.37700133185978635, "grad_norm": 0.0, - "learning_rate": 1.4307226192996745e-05, - "loss": 0.958, + "learning_rate": 1.4322292099838425e-05, + "loss": 0.9816, "step": 13304 }, { - "epoch": 0.3775539160045403, + "epoch": 0.3770296693020488, "grad_norm": 0.0, - "learning_rate": 1.4306396721221209e-05, - "loss": 0.8787, + "learning_rate": 1.432146445252122e-05, + "loss": 1.0025, "step": 13305 }, { - "epoch": 0.37758229284903516, + "epoch": 0.3770580067443113, "grad_norm": 0.0, - "learning_rate": 1.4305567213069346e-05, - "loss": 1.0361, + "learning_rate": 1.4320636768802712e-05, + "loss": 0.9252, "step": 13306 }, { - "epoch": 0.3776106696935301, + "epoch": 0.3770863441865737, "grad_norm": 0.0, - "learning_rate": 1.4304737668548165e-05, - "loss": 0.9249, + "learning_rate": 1.4319809048689879e-05, + "loss": 0.9631, "step": 13307 }, { - "epoch": 0.377639046538025, + "epoch": 0.37711468162883616, "grad_norm": 0.0, - "learning_rate": 1.4303908087664673e-05, - "loss": 0.9263, + "learning_rate": 1.4318981292189688e-05, + "loss": 0.8575, "step": 13308 }, { - "epoch": 0.37766742338251985, + "epoch": 0.37714301907109865, "grad_norm": 0.0, - "learning_rate": 1.4303078470425873e-05, - "loss": 0.8784, + "learning_rate": 1.4318153499309118e-05, + "loss": 0.8632, "step": 13309 }, { - "epoch": 0.37769580022701477, + "epoch": 0.3771713565133611, "grad_norm": 0.0, - "learning_rate": 1.4302248816838777e-05, - "loss": 1.0169, + "learning_rate": 1.4317325670055136e-05, + "loss": 1.0614, "step": 13310 }, { - "epoch": 0.3777241770715096, + "epoch": 0.3771996939556236, "grad_norm": 0.0, - "learning_rate": 1.4301419126910391e-05, - "loss": 0.8672, + "learning_rate": 1.4316497804434714e-05, + "loss": 0.8663, "step": 13311 }, { - "epoch": 0.37775255391600454, + "epoch": 0.377228031397886, "grad_norm": 0.0, - "learning_rate": 1.4300589400647726e-05, - "loss": 0.9935, + "learning_rate": 1.4315669902454832e-05, + "loss": 0.9248, "step": 13312 }, { - "epoch": 0.37778093076049946, + "epoch": 0.3772563688401485, "grad_norm": 0.0, - "learning_rate": 1.4299759638057786e-05, - "loss": 0.9082, + "learning_rate": 1.4314841964122455e-05, + "loss": 0.9196, "step": 13313 }, { - "epoch": 0.3778093076049943, + "epoch": 0.37728470628241095, "grad_norm": 0.0, - "learning_rate": 1.4298929839147586e-05, - "loss": 0.8893, + "learning_rate": 1.4314013989444566e-05, + "loss": 1.019, "step": 13314 }, { - "epoch": 0.37783768444948923, + "epoch": 0.3773130437246734, "grad_norm": 0.0, - "learning_rate": 1.4298100003924129e-05, - "loss": 0.9745, + "learning_rate": 1.4313185978428135e-05, + "loss": 0.9443, "step": 13315 }, { - "epoch": 0.3778660612939841, + "epoch": 0.3773413811669359, "grad_norm": 0.0, - "learning_rate": 1.4297270132394432e-05, - "loss": 1.0305, + "learning_rate": 1.4312357931080135e-05, + "loss": 0.9187, "step": 13316 }, { - "epoch": 0.377894438138479, + "epoch": 0.3773697186091983, "grad_norm": 0.0, - "learning_rate": 1.42964402245655e-05, - "loss": 0.8779, + "learning_rate": 1.4311529847407544e-05, + "loss": 0.9116, "step": 13317 }, { - "epoch": 0.3779228149829739, + "epoch": 0.3773980560514608, "grad_norm": 0.0, - "learning_rate": 1.4295610280444345e-05, - "loss": 0.8843, + "learning_rate": 1.4310701727417336e-05, + "loss": 0.913, "step": 13318 }, { - "epoch": 0.3779511918274688, + "epoch": 0.37742639349372326, "grad_norm": 0.0, - "learning_rate": 1.4294780300037973e-05, - "loss": 0.9149, + "learning_rate": 1.4309873571116486e-05, + "loss": 0.8489, "step": 13319 }, { - "epoch": 0.3779795686719637, + "epoch": 0.3774547309359857, "grad_norm": 0.0, - "learning_rate": 1.4293950283353404e-05, - "loss": 1.0524, + "learning_rate": 1.430904537851197e-05, + "loss": 0.998, "step": 13320 }, { - "epoch": 0.37800794551645855, + "epoch": 0.3774830683782482, "grad_norm": 0.0, - "learning_rate": 1.4293120230397639e-05, - "loss": 0.8845, + "learning_rate": 1.4308217149610767e-05, + "loss": 0.9741, "step": 13321 }, { - "epoch": 0.37803632236095347, + "epoch": 0.3775114058205106, "grad_norm": 0.0, - "learning_rate": 1.4292290141177697e-05, - "loss": 0.9035, + "learning_rate": 1.4307388884419848e-05, + "loss": 0.9803, "step": 13322 }, { - "epoch": 0.37806469920544833, + "epoch": 0.3775397432627731, "grad_norm": 0.0, - "learning_rate": 1.4291460015700587e-05, - "loss": 0.8978, + "learning_rate": 1.4306560582946194e-05, + "loss": 0.8823, "step": 13323 }, { - "epoch": 0.37809307604994324, + "epoch": 0.37756808070503556, "grad_norm": 0.0, - "learning_rate": 1.4290629853973321e-05, - "loss": 0.879, + "learning_rate": 1.4305732245196782e-05, + "loss": 1.0541, "step": 13324 }, { - "epoch": 0.37812145289443816, + "epoch": 0.37759641814729805, "grad_norm": 0.0, - "learning_rate": 1.4289799656002912e-05, - "loss": 1.0386, + "learning_rate": 1.4304903871178588e-05, + "loss": 1.0651, "step": 13325 }, { - "epoch": 0.378149829738933, + "epoch": 0.3776247555895605, "grad_norm": 0.0, - "learning_rate": 1.4288969421796372e-05, - "loss": 0.9594, + "learning_rate": 1.430407546089859e-05, + "loss": 0.8833, "step": 13326 }, { - "epoch": 0.37817820658342793, + "epoch": 0.37765309303182293, "grad_norm": 0.0, - "learning_rate": 1.4288139151360716e-05, - "loss": 0.9781, + "learning_rate": 1.4303247014363765e-05, + "loss": 0.9199, "step": 13327 }, { - "epoch": 0.3782065834279228, + "epoch": 0.3776814304740854, "grad_norm": 0.0, - "learning_rate": 1.4287308844702954e-05, - "loss": 0.9058, + "learning_rate": 1.4302418531581094e-05, + "loss": 0.8779, "step": 13328 }, { - "epoch": 0.3782349602724177, + "epoch": 0.37770976791634786, "grad_norm": 0.0, - "learning_rate": 1.4286478501830102e-05, - "loss": 0.9858, + "learning_rate": 1.4301590012557553e-05, + "loss": 0.9939, "step": 13329 }, { - "epoch": 0.3782633371169126, + "epoch": 0.37773810535861035, "grad_norm": 0.0, - "learning_rate": 1.4285648122749174e-05, - "loss": 1.0005, + "learning_rate": 1.4300761457300122e-05, + "loss": 0.9136, "step": 13330 }, { - "epoch": 0.3782917139614075, + "epoch": 0.3777664428008728, "grad_norm": 0.0, - "learning_rate": 1.4284817707467182e-05, - "loss": 0.8797, + "learning_rate": 1.4299932865815782e-05, + "loss": 0.9818, "step": 13331 }, { - "epoch": 0.3783200908059024, + "epoch": 0.37779478024313523, "grad_norm": 0.0, - "learning_rate": 1.4283987255991143e-05, - "loss": 0.9776, + "learning_rate": 1.429910423811151e-05, + "loss": 0.9471, "step": 13332 }, { - "epoch": 0.37834846765039726, + "epoch": 0.3778231176853977, "grad_norm": 0.0, - "learning_rate": 1.4283156768328073e-05, - "loss": 1.0056, + "learning_rate": 1.4298275574194285e-05, + "loss": 0.968, "step": 13333 }, { - "epoch": 0.3783768444948922, + "epoch": 0.37785145512766016, "grad_norm": 0.0, - "learning_rate": 1.4282326244484983e-05, - "loss": 0.8365, + "learning_rate": 1.429744687407109e-05, + "loss": 0.9655, "step": 13334 }, { - "epoch": 0.3784052213393871, + "epoch": 0.37787979256992266, "grad_norm": 0.0, - "learning_rate": 1.4281495684468892e-05, - "loss": 0.9914, + "learning_rate": 1.4296618137748903e-05, + "loss": 0.9868, "step": 13335 }, { - "epoch": 0.37843359818388195, + "epoch": 0.3779081300121851, "grad_norm": 0.0, - "learning_rate": 1.4280665088286811e-05, - "loss": 0.9272, + "learning_rate": 1.429578936523471e-05, + "loss": 0.886, "step": 13336 }, { - "epoch": 0.37846197502837686, + "epoch": 0.3779364674544476, "grad_norm": 0.0, - "learning_rate": 1.4279834455945765e-05, - "loss": 0.8438, + "learning_rate": 1.4294960556535482e-05, + "loss": 0.8686, "step": 13337 }, { - "epoch": 0.3784903518728717, + "epoch": 0.37796480489671, "grad_norm": 0.0, - "learning_rate": 1.4279003787452763e-05, - "loss": 0.914, + "learning_rate": 1.429413171165821e-05, + "loss": 1.0647, "step": 13338 }, { - "epoch": 0.37851872871736664, + "epoch": 0.37799314233897247, "grad_norm": 0.0, - "learning_rate": 1.427817308281482e-05, - "loss": 0.8439, + "learning_rate": 1.4293302830609869e-05, + "loss": 0.8346, "step": 13339 }, { - "epoch": 0.3785471055618615, + "epoch": 0.37802147978123496, "grad_norm": 0.0, - "learning_rate": 1.4277342342038963e-05, - "loss": 0.7931, + "learning_rate": 1.4292473913397449e-05, + "loss": 0.797, "step": 13340 }, { - "epoch": 0.3785754824063564, + "epoch": 0.3780498172234974, "grad_norm": 0.0, - "learning_rate": 1.4276511565132198e-05, - "loss": 0.9984, + "learning_rate": 1.4291644960027921e-05, + "loss": 1.0209, "step": 13341 }, { - "epoch": 0.3786038592508513, + "epoch": 0.3780781546657599, "grad_norm": 0.0, - "learning_rate": 1.427568075210155e-05, - "loss": 0.9398, + "learning_rate": 1.4290815970508279e-05, + "loss": 1.0938, "step": 13342 }, { - "epoch": 0.3786322360953462, + "epoch": 0.37810649210802233, "grad_norm": 0.0, - "learning_rate": 1.4274849902954034e-05, - "loss": 0.9229, + "learning_rate": 1.42899869448455e-05, + "loss": 0.8782, "step": 13343 }, { - "epoch": 0.3786606129398411, + "epoch": 0.37813482955028477, "grad_norm": 0.0, - "learning_rate": 1.4274019017696668e-05, - "loss": 0.9857, + "learning_rate": 1.4289157883046567e-05, + "loss": 0.7729, "step": 13344 }, { - "epoch": 0.37868898978433596, + "epoch": 0.37816316699254726, "grad_norm": 0.0, - "learning_rate": 1.4273188096336472e-05, - "loss": 0.9686, + "learning_rate": 1.4288328785118464e-05, + "loss": 0.9319, "step": 13345 }, { - "epoch": 0.3787173666288309, + "epoch": 0.3781915044348097, "grad_norm": 0.0, - "learning_rate": 1.4272357138880462e-05, - "loss": 1.0005, + "learning_rate": 1.4287499651068172e-05, + "loss": 0.9633, "step": 13346 }, { - "epoch": 0.3787457434733258, + "epoch": 0.3782198418770722, "grad_norm": 0.0, - "learning_rate": 1.4271526145335663e-05, - "loss": 0.8724, + "learning_rate": 1.4286670480902684e-05, + "loss": 0.9284, "step": 13347 }, { - "epoch": 0.37877412031782065, + "epoch": 0.37824817931933463, "grad_norm": 0.0, - "learning_rate": 1.4270695115709088e-05, - "loss": 0.9824, + "learning_rate": 1.4285841274628978e-05, + "loss": 0.9524, "step": 13348 }, { - "epoch": 0.37880249716231557, + "epoch": 0.3782765167615971, "grad_norm": 0.0, - "learning_rate": 1.426986405000776e-05, - "loss": 0.921, + "learning_rate": 1.4285012032254035e-05, + "loss": 0.9793, "step": 13349 }, { - "epoch": 0.3788308740068104, + "epoch": 0.37830485420385956, "grad_norm": 0.0, - "learning_rate": 1.42690329482387e-05, - "loss": 0.889, + "learning_rate": 1.4284182753784847e-05, + "loss": 0.8743, "step": 13350 }, { - "epoch": 0.37885925085130534, + "epoch": 0.378333191646122, "grad_norm": 0.0, - "learning_rate": 1.4268201810408924e-05, - "loss": 0.9415, + "learning_rate": 1.42833534392284e-05, + "loss": 0.8888, "step": 13351 }, { - "epoch": 0.3788876276958002, + "epoch": 0.3783615290883845, "grad_norm": 0.0, - "learning_rate": 1.4267370636525457e-05, - "loss": 0.9581, + "learning_rate": 1.4282524088591672e-05, + "loss": 0.8817, "step": 13352 }, { - "epoch": 0.3789160045402951, + "epoch": 0.37838986653064693, "grad_norm": 0.0, - "learning_rate": 1.4266539426595316e-05, - "loss": 0.9439, + "learning_rate": 1.4281694701881657e-05, + "loss": 0.9336, "step": 13353 }, { - "epoch": 0.37894438138479003, + "epoch": 0.37841820397290943, "grad_norm": 0.0, - "learning_rate": 1.4265708180625529e-05, - "loss": 0.944, + "learning_rate": 1.4280865279105333e-05, + "loss": 0.8349, "step": 13354 }, { - "epoch": 0.3789727582292849, + "epoch": 0.37844654141517187, "grad_norm": 0.0, - "learning_rate": 1.426487689862311e-05, - "loss": 1.0625, + "learning_rate": 1.4280035820269692e-05, + "loss": 0.9802, "step": 13355 }, { - "epoch": 0.3790011350737798, + "epoch": 0.3784748788574343, "grad_norm": 0.0, - "learning_rate": 1.4264045580595082e-05, - "loss": 0.8943, + "learning_rate": 1.4279206325381725e-05, + "loss": 0.9141, "step": 13356 }, { - "epoch": 0.37902951191827466, + "epoch": 0.3785032162996968, "grad_norm": 0.0, - "learning_rate": 1.4263214226548475e-05, - "loss": 0.9289, + "learning_rate": 1.4278376794448411e-05, + "loss": 0.9021, "step": 13357 }, { - "epoch": 0.3790578887627696, + "epoch": 0.37853155374195924, "grad_norm": 0.0, - "learning_rate": 1.4262382836490302e-05, - "loss": 0.8783, + "learning_rate": 1.4277547227476738e-05, + "loss": 0.9084, "step": 13358 }, { - "epoch": 0.3790862656072645, + "epoch": 0.37855989118422173, "grad_norm": 0.0, - "learning_rate": 1.4261551410427592e-05, - "loss": 0.8367, + "learning_rate": 1.4276717624473697e-05, + "loss": 0.9978, "step": 13359 }, { - "epoch": 0.37911464245175935, + "epoch": 0.37858822862648417, "grad_norm": 0.0, - "learning_rate": 1.4260719948367364e-05, - "loss": 0.9661, + "learning_rate": 1.4275887985446279e-05, + "loss": 0.8109, "step": 13360 }, { - "epoch": 0.37914301929625427, + "epoch": 0.3786165660687466, "grad_norm": 0.0, - "learning_rate": 1.4259888450316641e-05, - "loss": 0.8583, + "learning_rate": 1.4275058310401466e-05, + "loss": 0.8697, "step": 13361 }, { - "epoch": 0.37917139614074913, + "epoch": 0.3786449035110091, "grad_norm": 0.0, - "learning_rate": 1.4259056916282455e-05, - "loss": 0.8958, + "learning_rate": 1.4274228599346249e-05, + "loss": 0.8902, "step": 13362 }, { - "epoch": 0.37919977298524404, + "epoch": 0.37867324095327154, "grad_norm": 0.0, - "learning_rate": 1.4258225346271817e-05, - "loss": 1.1178, + "learning_rate": 1.4273398852287619e-05, + "loss": 0.9407, "step": 13363 }, { - "epoch": 0.37922814982973896, + "epoch": 0.37870157839553403, "grad_norm": 0.0, - "learning_rate": 1.4257393740291762e-05, - "loss": 0.9767, + "learning_rate": 1.4272569069232563e-05, + "loss": 0.8585, "step": 13364 }, { - "epoch": 0.3792565266742338, + "epoch": 0.37872991583779647, "grad_norm": 0.0, - "learning_rate": 1.4256562098349312e-05, - "loss": 0.8781, + "learning_rate": 1.427173925018807e-05, + "loss": 0.9808, "step": 13365 }, { - "epoch": 0.37928490351872873, + "epoch": 0.37875825328005897, "grad_norm": 0.0, - "learning_rate": 1.4255730420451485e-05, - "loss": 1.03, + "learning_rate": 1.427090939516113e-05, + "loss": 0.9787, "step": 13366 }, { - "epoch": 0.3793132803632236, + "epoch": 0.3787865907223214, "grad_norm": 0.0, - "learning_rate": 1.4254898706605316e-05, - "loss": 1.0193, + "learning_rate": 1.4270079504158738e-05, + "loss": 0.945, "step": 13367 }, { - "epoch": 0.3793416572077185, + "epoch": 0.37881492816458384, "grad_norm": 0.0, - "learning_rate": 1.4254066956817824e-05, - "loss": 0.8828, + "learning_rate": 1.4269249577187877e-05, + "loss": 0.9392, "step": 13368 }, { - "epoch": 0.37937003405221337, + "epoch": 0.37884326560684634, "grad_norm": 0.0, - "learning_rate": 1.4253235171096037e-05, - "loss": 0.8827, + "learning_rate": 1.4268419614255545e-05, + "loss": 0.935, "step": 13369 }, { - "epoch": 0.3793984108967083, + "epoch": 0.3788716030491088, "grad_norm": 0.0, - "learning_rate": 1.4252403349446986e-05, - "loss": 0.8807, + "learning_rate": 1.4267589615368727e-05, + "loss": 1.0149, "step": 13370 }, { - "epoch": 0.3794267877412032, + "epoch": 0.37889994049137127, "grad_norm": 0.0, - "learning_rate": 1.4251571491877689e-05, - "loss": 0.8506, + "learning_rate": 1.4266759580534417e-05, + "loss": 0.9401, "step": 13371 }, { - "epoch": 0.37945516458569806, + "epoch": 0.3789282779336337, "grad_norm": 0.0, - "learning_rate": 1.425073959839518e-05, - "loss": 0.9089, + "learning_rate": 1.4265929509759606e-05, + "loss": 0.9642, "step": 13372 }, { - "epoch": 0.37948354143019297, + "epoch": 0.37895661537589614, "grad_norm": 0.0, - "learning_rate": 1.4249907669006478e-05, - "loss": 0.867, + "learning_rate": 1.4265099403051291e-05, + "loss": 0.9737, "step": 13373 }, { - "epoch": 0.37951191827468783, + "epoch": 0.37898495281815864, "grad_norm": 0.0, - "learning_rate": 1.4249075703718615e-05, - "loss": 0.9512, + "learning_rate": 1.4264269260416455e-05, + "loss": 0.9244, "step": 13374 }, { - "epoch": 0.37954029511918275, + "epoch": 0.3790132902604211, "grad_norm": 0.0, - "learning_rate": 1.424824370253862e-05, - "loss": 0.9548, + "learning_rate": 1.4263439081862095e-05, + "loss": 0.9953, "step": 13375 }, { - "epoch": 0.37956867196367766, + "epoch": 0.37904162770268357, "grad_norm": 0.0, - "learning_rate": 1.424741166547352e-05, - "loss": 0.8052, + "learning_rate": 1.426260886739521e-05, + "loss": 0.9547, "step": 13376 }, { - "epoch": 0.3795970488081725, + "epoch": 0.379069965144946, "grad_norm": 0.0, - "learning_rate": 1.4246579592530342e-05, - "loss": 0.8968, + "learning_rate": 1.4261778617022786e-05, + "loss": 0.7752, "step": 13377 }, { - "epoch": 0.37962542565266744, + "epoch": 0.3790983025872085, "grad_norm": 0.0, - "learning_rate": 1.4245747483716117e-05, - "loss": 0.8806, + "learning_rate": 1.4260948330751814e-05, + "loss": 0.959, "step": 13378 }, { - "epoch": 0.3796538024971623, + "epoch": 0.37912664002947094, "grad_norm": 0.0, - "learning_rate": 1.424491533903787e-05, - "loss": 0.9982, + "learning_rate": 1.4260118008589294e-05, + "loss": 0.9411, "step": 13379 }, { - "epoch": 0.3796821793416572, + "epoch": 0.3791549774717334, "grad_norm": 0.0, - "learning_rate": 1.4244083158502633e-05, - "loss": 0.9301, + "learning_rate": 1.4259287650542217e-05, + "loss": 0.9288, "step": 13380 }, { - "epoch": 0.3797105561861521, + "epoch": 0.3791833149139959, "grad_norm": 0.0, - "learning_rate": 1.4243250942117437e-05, - "loss": 0.8617, + "learning_rate": 1.4258457256617581e-05, + "loss": 0.9741, "step": 13381 }, { - "epoch": 0.379738933030647, + "epoch": 0.3792116523562583, "grad_norm": 0.0, - "learning_rate": 1.4242418689889306e-05, - "loss": 0.94, + "learning_rate": 1.425762682682238e-05, + "loss": 0.8005, "step": 13382 }, { - "epoch": 0.3797673098751419, + "epoch": 0.3792399897985208, "grad_norm": 0.0, - "learning_rate": 1.4241586401825274e-05, - "loss": 0.9682, + "learning_rate": 1.4256796361163603e-05, + "loss": 0.8415, "step": 13383 }, { - "epoch": 0.37979568671963676, + "epoch": 0.37926832724078324, "grad_norm": 0.0, - "learning_rate": 1.4240754077932373e-05, - "loss": 0.9393, + "learning_rate": 1.425596585964825e-05, + "loss": 0.9186, "step": 13384 }, { - "epoch": 0.3798240635641317, + "epoch": 0.3792966646830457, "grad_norm": 0.0, - "learning_rate": 1.4239921718217632e-05, - "loss": 0.934, + "learning_rate": 1.4255135322283318e-05, + "loss": 0.8815, "step": 13385 }, { - "epoch": 0.37985244040862653, + "epoch": 0.3793250021253082, "grad_norm": 0.0, - "learning_rate": 1.4239089322688078e-05, - "loss": 0.8239, + "learning_rate": 1.42543047490758e-05, + "loss": 0.9653, "step": 13386 }, { - "epoch": 0.37988081725312145, + "epoch": 0.3793533395675706, "grad_norm": 0.0, - "learning_rate": 1.4238256891350748e-05, - "loss": 0.8707, + "learning_rate": 1.425347414003269e-05, + "loss": 1.1187, "step": 13387 }, { - "epoch": 0.37990919409761637, + "epoch": 0.3793816770098331, "grad_norm": 0.0, - "learning_rate": 1.4237424424212673e-05, - "loss": 0.9882, + "learning_rate": 1.4252643495160994e-05, + "loss": 0.8374, "step": 13388 }, { - "epoch": 0.3799375709421112, + "epoch": 0.37941001445209555, "grad_norm": 0.0, - "learning_rate": 1.4236591921280883e-05, - "loss": 0.8975, + "learning_rate": 1.4251812814467701e-05, + "loss": 0.8257, "step": 13389 }, { - "epoch": 0.37996594778660614, + "epoch": 0.37943835189435804, "grad_norm": 0.0, - "learning_rate": 1.4235759382562407e-05, - "loss": 0.944, + "learning_rate": 1.4250982097959806e-05, + "loss": 0.8778, "step": 13390 }, { - "epoch": 0.379994324631101, + "epoch": 0.3794666893366205, "grad_norm": 0.0, - "learning_rate": 1.4234926808064284e-05, - "loss": 0.9919, + "learning_rate": 1.4250151345644314e-05, + "loss": 0.9938, "step": 13391 }, { - "epoch": 0.3800227014755959, + "epoch": 0.3794950267788829, "grad_norm": 0.0, - "learning_rate": 1.4234094197793543e-05, - "loss": 0.9795, + "learning_rate": 1.4249320557528217e-05, + "loss": 0.9743, "step": 13392 }, { - "epoch": 0.38005107832009083, + "epoch": 0.3795233642211454, "grad_norm": 0.0, - "learning_rate": 1.4233261551757221e-05, - "loss": 0.8756, + "learning_rate": 1.4248489733618516e-05, + "loss": 1.0204, "step": 13393 }, { - "epoch": 0.3800794551645857, + "epoch": 0.37955170166340785, "grad_norm": 0.0, - "learning_rate": 1.4232428869962345e-05, - "loss": 0.9322, + "learning_rate": 1.424765887392221e-05, + "loss": 0.7941, "step": 13394 }, { - "epoch": 0.3801078320090806, + "epoch": 0.37958003910567034, "grad_norm": 0.0, - "learning_rate": 1.4231596152415954e-05, - "loss": 0.9572, + "learning_rate": 1.4246827978446293e-05, + "loss": 1.0048, "step": 13395 }, { - "epoch": 0.38013620885357546, + "epoch": 0.3796083765479328, "grad_norm": 0.0, - "learning_rate": 1.4230763399125079e-05, - "loss": 0.9144, + "learning_rate": 1.4245997047197767e-05, + "loss": 0.9844, "step": 13396 }, { - "epoch": 0.3801645856980704, + "epoch": 0.3796367139901952, "grad_norm": 0.0, - "learning_rate": 1.4229930610096754e-05, - "loss": 0.9801, + "learning_rate": 1.4245166080183633e-05, + "loss": 1.0286, "step": 13397 }, { - "epoch": 0.3801929625425653, + "epoch": 0.3796650514324577, "grad_norm": 0.0, - "learning_rate": 1.4229097785338018e-05, - "loss": 0.9358, + "learning_rate": 1.4244335077410889e-05, + "loss": 1.014, "step": 13398 }, { - "epoch": 0.38022133938706015, + "epoch": 0.37969338887472015, "grad_norm": 0.0, - "learning_rate": 1.42282649248559e-05, - "loss": 0.847, + "learning_rate": 1.4243504038886531e-05, + "loss": 1.051, "step": 13399 }, { - "epoch": 0.38024971623155507, + "epoch": 0.37972172631698264, "grad_norm": 0.0, - "learning_rate": 1.422743202865744e-05, - "loss": 1.119, + "learning_rate": 1.4242672964617565e-05, + "loss": 1.0588, "step": 13400 }, { - "epoch": 0.38027809307604993, + "epoch": 0.3797500637592451, "grad_norm": 0.0, - "learning_rate": 1.4226599096749673e-05, - "loss": 0.9343, + "learning_rate": 1.4241841854610992e-05, + "loss": 0.883, "step": 13401 }, { - "epoch": 0.38030646992054484, + "epoch": 0.3797784012015076, "grad_norm": 0.0, - "learning_rate": 1.4225766129139631e-05, - "loss": 1.0069, + "learning_rate": 1.4241010708873808e-05, + "loss": 0.9061, "step": 13402 }, { - "epoch": 0.3803348467650397, + "epoch": 0.37980673864377, "grad_norm": 0.0, - "learning_rate": 1.4224933125834353e-05, - "loss": 0.8766, + "learning_rate": 1.4240179527413014e-05, + "loss": 0.909, "step": 13403 }, { - "epoch": 0.3803632236095346, + "epoch": 0.37983507608603245, "grad_norm": 0.0, - "learning_rate": 1.4224100086840875e-05, - "loss": 0.8568, + "learning_rate": 1.4239348310235613e-05, + "loss": 0.9006, "step": 13404 }, { - "epoch": 0.38039160045402953, + "epoch": 0.37986341352829495, "grad_norm": 0.0, - "learning_rate": 1.4223267012166234e-05, - "loss": 0.925, + "learning_rate": 1.4238517057348609e-05, + "loss": 0.949, "step": 13405 }, { - "epoch": 0.3804199772985244, + "epoch": 0.3798917509705574, "grad_norm": 0.0, - "learning_rate": 1.4222433901817468e-05, - "loss": 0.9336, + "learning_rate": 1.4237685768759002e-05, + "loss": 0.9909, "step": 13406 }, { - "epoch": 0.3804483541430193, + "epoch": 0.3799200884128199, "grad_norm": 0.0, - "learning_rate": 1.422160075580161e-05, - "loss": 0.9221, + "learning_rate": 1.4236854444473793e-05, + "loss": 0.977, "step": 13407 }, { - "epoch": 0.38047673098751417, + "epoch": 0.3799484258550823, "grad_norm": 0.0, - "learning_rate": 1.4220767574125702e-05, - "loss": 0.9584, + "learning_rate": 1.4236023084499987e-05, + "loss": 1.08, "step": 13408 }, { - "epoch": 0.3805051078320091, + "epoch": 0.37997676329734476, "grad_norm": 0.0, - "learning_rate": 1.4219934356796783e-05, - "loss": 0.7552, + "learning_rate": 1.4235191688844585e-05, + "loss": 1.0083, "step": 13409 }, { - "epoch": 0.380533484676504, + "epoch": 0.38000510073960725, "grad_norm": 0.0, - "learning_rate": 1.4219101103821884e-05, - "loss": 0.8963, + "learning_rate": 1.423436025751459e-05, + "loss": 0.9315, "step": 13410 }, { - "epoch": 0.38056186152099886, + "epoch": 0.3800334381818697, "grad_norm": 0.0, - "learning_rate": 1.4218267815208053e-05, - "loss": 0.9085, + "learning_rate": 1.4233528790517007e-05, + "loss": 1.0037, "step": 13411 }, { - "epoch": 0.38059023836549377, + "epoch": 0.3800617756241322, "grad_norm": 0.0, - "learning_rate": 1.4217434490962321e-05, - "loss": 1.0292, + "learning_rate": 1.4232697287858836e-05, + "loss": 0.8962, "step": 13412 }, { - "epoch": 0.38061861520998863, + "epoch": 0.3800901130663946, "grad_norm": 0.0, - "learning_rate": 1.421660113109173e-05, - "loss": 1.002, + "learning_rate": 1.4231865749547086e-05, + "loss": 0.9298, "step": 13413 }, { - "epoch": 0.38064699205448355, + "epoch": 0.3801184505086571, "grad_norm": 0.0, - "learning_rate": 1.421576773560332e-05, - "loss": 0.8838, + "learning_rate": 1.4231034175588762e-05, + "loss": 0.9254, "step": 13414 }, { - "epoch": 0.38067536889897846, + "epoch": 0.38014678795091955, "grad_norm": 0.0, - "learning_rate": 1.4214934304504133e-05, - "loss": 0.9165, + "learning_rate": 1.4230202565990865e-05, + "loss": 0.9585, "step": 13415 }, { - "epoch": 0.3807037457434733, + "epoch": 0.380175125393182, "grad_norm": 0.0, - "learning_rate": 1.4214100837801206e-05, - "loss": 0.8367, + "learning_rate": 1.4229370920760398e-05, + "loss": 0.8935, "step": 13416 }, { - "epoch": 0.38073212258796824, + "epoch": 0.3802034628354445, "grad_norm": 0.0, - "learning_rate": 1.4213267335501578e-05, - "loss": 0.9372, + "learning_rate": 1.422853923990437e-05, + "loss": 1.0182, "step": 13417 }, { - "epoch": 0.3807604994324631, + "epoch": 0.3802318002777069, "grad_norm": 0.0, - "learning_rate": 1.4212433797612293e-05, - "loss": 0.9322, + "learning_rate": 1.4227707523429788e-05, + "loss": 0.9208, "step": 13418 }, { - "epoch": 0.380788876276958, + "epoch": 0.3802601377199694, "grad_norm": 0.0, - "learning_rate": 1.421160022414039e-05, - "loss": 0.8928, + "learning_rate": 1.4226875771343656e-05, + "loss": 0.9949, "step": 13419 }, { - "epoch": 0.38081725312145287, + "epoch": 0.38028847516223185, "grad_norm": 0.0, - "learning_rate": 1.4210766615092905e-05, - "loss": 0.9648, + "learning_rate": 1.4226043983652975e-05, + "loss": 0.9004, "step": 13420 }, { - "epoch": 0.3808456299659478, + "epoch": 0.3803168126044943, "grad_norm": 0.0, - "learning_rate": 1.4209932970476891e-05, - "loss": 0.8883, + "learning_rate": 1.422521216036476e-05, + "loss": 1.0569, "step": 13421 }, { - "epoch": 0.3808740068104427, + "epoch": 0.3803451500467568, "grad_norm": 0.0, - "learning_rate": 1.4209099290299384e-05, - "loss": 0.8723, + "learning_rate": 1.4224380301486013e-05, + "loss": 0.9335, "step": 13422 }, { - "epoch": 0.38090238365493756, + "epoch": 0.3803734874890192, "grad_norm": 0.0, - "learning_rate": 1.4208265574567426e-05, - "loss": 1.0291, + "learning_rate": 1.4223548407023743e-05, + "loss": 0.8833, "step": 13423 }, { - "epoch": 0.3809307604994325, + "epoch": 0.3804018249312817, "grad_norm": 0.0, - "learning_rate": 1.4207431823288058e-05, - "loss": 1.0076, + "learning_rate": 1.4222716476984953e-05, + "loss": 0.9942, "step": 13424 }, { - "epoch": 0.38095913734392733, + "epoch": 0.38043016237354416, "grad_norm": 0.0, - "learning_rate": 1.4206598036468326e-05, - "loss": 0.9305, + "learning_rate": 1.4221884511376658e-05, + "loss": 0.9292, "step": 13425 }, { - "epoch": 0.38098751418842225, + "epoch": 0.38045849981580665, "grad_norm": 0.0, - "learning_rate": 1.4205764214115272e-05, - "loss": 1.0878, + "learning_rate": 1.4221052510205861e-05, + "loss": 0.815, "step": 13426 }, { - "epoch": 0.38101589103291716, + "epoch": 0.3804868372580691, "grad_norm": 0.0, - "learning_rate": 1.4204930356235936e-05, - "loss": 0.9339, + "learning_rate": 1.4220220473479574e-05, + "loss": 0.9119, "step": 13427 }, { - "epoch": 0.381044267877412, + "epoch": 0.3805151747003315, "grad_norm": 0.0, - "learning_rate": 1.4204096462837362e-05, - "loss": 0.8067, + "learning_rate": 1.4219388401204796e-05, + "loss": 0.7516, "step": 13428 }, { - "epoch": 0.38107264472190694, + "epoch": 0.380543512142594, "grad_norm": 0.0, - "learning_rate": 1.4203262533926601e-05, - "loss": 0.9899, + "learning_rate": 1.4218556293388548e-05, + "loss": 0.9294, "step": 13429 }, { - "epoch": 0.3811010215664018, + "epoch": 0.38057184958485646, "grad_norm": 0.0, - "learning_rate": 1.420242856951069e-05, - "loss": 0.956, + "learning_rate": 1.4217724150037831e-05, + "loss": 0.9559, "step": 13430 }, { - "epoch": 0.3811293984108967, + "epoch": 0.38060018702711895, "grad_norm": 0.0, - "learning_rate": 1.4201594569596675e-05, - "loss": 1.0309, + "learning_rate": 1.4216891971159659e-05, + "loss": 0.8686, "step": 13431 }, { - "epoch": 0.3811577752553916, + "epoch": 0.3806285244693814, "grad_norm": 0.0, - "learning_rate": 1.4200760534191606e-05, - "loss": 0.9361, + "learning_rate": 1.4216059756761038e-05, + "loss": 0.9278, "step": 13432 }, { - "epoch": 0.3811861520998865, + "epoch": 0.38065686191164383, "grad_norm": 0.0, - "learning_rate": 1.419992646330252e-05, - "loss": 0.9115, + "learning_rate": 1.4215227506848982e-05, + "loss": 1.0894, "step": 13433 }, { - "epoch": 0.3812145289443814, + "epoch": 0.3806851993539063, "grad_norm": 0.0, - "learning_rate": 1.4199092356936468e-05, - "loss": 0.9774, + "learning_rate": 1.4214395221430501e-05, + "loss": 0.7963, "step": 13434 }, { - "epoch": 0.38124290578887626, + "epoch": 0.38071353679616876, "grad_norm": 0.0, - "learning_rate": 1.4198258215100496e-05, - "loss": 0.8784, + "learning_rate": 1.4213562900512603e-05, + "loss": 0.8544, "step": 13435 }, { - "epoch": 0.3812712826333712, + "epoch": 0.38074187423843125, "grad_norm": 0.0, - "learning_rate": 1.4197424037801643e-05, - "loss": 0.958, + "learning_rate": 1.4212730544102297e-05, + "loss": 0.9548, "step": 13436 }, { - "epoch": 0.38129965947786604, + "epoch": 0.3807702116806937, "grad_norm": 0.0, - "learning_rate": 1.4196589825046962e-05, - "loss": 0.8943, + "learning_rate": 1.4211898152206598e-05, + "loss": 0.9058, "step": 13437 }, { - "epoch": 0.38132803632236095, + "epoch": 0.3807985491229562, "grad_norm": 0.0, - "learning_rate": 1.4195755576843502e-05, - "loss": 0.9747, + "learning_rate": 1.421106572483252e-05, + "loss": 0.9878, "step": 13438 }, { - "epoch": 0.38135641316685587, + "epoch": 0.3808268865652186, "grad_norm": 0.0, - "learning_rate": 1.4194921293198304e-05, - "loss": 0.8332, + "learning_rate": 1.421023326198707e-05, + "loss": 0.8499, "step": 13439 }, { - "epoch": 0.3813847900113507, + "epoch": 0.38085522400748106, "grad_norm": 0.0, - "learning_rate": 1.4194086974118415e-05, - "loss": 0.9014, + "learning_rate": 1.4209400763677263e-05, + "loss": 0.9434, "step": 13440 }, { - "epoch": 0.38141316685584564, + "epoch": 0.38088356144974356, "grad_norm": 0.0, - "learning_rate": 1.4193252619610886e-05, - "loss": 0.8985, + "learning_rate": 1.4208568229910106e-05, + "loss": 0.9415, "step": 13441 }, { - "epoch": 0.3814415437003405, + "epoch": 0.380911898892006, "grad_norm": 0.0, - "learning_rate": 1.4192418229682768e-05, - "loss": 0.8602, + "learning_rate": 1.4207735660692621e-05, + "loss": 0.9389, "step": 13442 }, { - "epoch": 0.3814699205448354, + "epoch": 0.3809402363342685, "grad_norm": 0.0, - "learning_rate": 1.4191583804341098e-05, - "loss": 0.8675, + "learning_rate": 1.4206903056031813e-05, + "loss": 0.9811, "step": 13443 }, { - "epoch": 0.38149829738933033, + "epoch": 0.3809685737765309, "grad_norm": 0.0, - "learning_rate": 1.4190749343592934e-05, - "loss": 0.8897, + "learning_rate": 1.4206070415934701e-05, + "loss": 0.9686, "step": 13444 }, { - "epoch": 0.3815266742338252, + "epoch": 0.38099691121879337, "grad_norm": 0.0, - "learning_rate": 1.4189914847445323e-05, - "loss": 0.9032, + "learning_rate": 1.4205237740408291e-05, + "loss": 1.0095, "step": 13445 }, { - "epoch": 0.3815550510783201, + "epoch": 0.38102524866105586, "grad_norm": 0.0, - "learning_rate": 1.4189080315905313e-05, - "loss": 0.9719, + "learning_rate": 1.4204405029459607e-05, + "loss": 0.8549, "step": 13446 }, { - "epoch": 0.38158342792281497, + "epoch": 0.3810535861033183, "grad_norm": 0.0, - "learning_rate": 1.418824574897995e-05, - "loss": 0.9576, + "learning_rate": 1.4203572283095657e-05, + "loss": 0.9744, "step": 13447 }, { - "epoch": 0.3816118047673099, + "epoch": 0.3810819235455808, "grad_norm": 0.0, - "learning_rate": 1.4187411146676292e-05, - "loss": 0.9469, + "learning_rate": 1.4202739501323457e-05, + "loss": 0.944, "step": 13448 }, { - "epoch": 0.38164018161180474, + "epoch": 0.38111026098784323, "grad_norm": 0.0, - "learning_rate": 1.418657650900138e-05, - "loss": 0.9831, + "learning_rate": 1.420190668415002e-05, + "loss": 0.9939, "step": 13449 }, { - "epoch": 0.38166855845629966, + "epoch": 0.3811385984301057, "grad_norm": 0.0, - "learning_rate": 1.4185741835962268e-05, - "loss": 0.9666, + "learning_rate": 1.4201073831582361e-05, + "loss": 0.94, "step": 13450 }, { - "epoch": 0.38169693530079457, + "epoch": 0.38116693587236816, "grad_norm": 0.0, - "learning_rate": 1.4184907127566006e-05, - "loss": 0.8305, + "learning_rate": 1.4200240943627501e-05, + "loss": 0.8456, "step": 13451 }, { - "epoch": 0.38172531214528943, + "epoch": 0.3811952733146306, "grad_norm": 0.0, - "learning_rate": 1.4184072383819646e-05, - "loss": 0.9081, + "learning_rate": 1.4199408020292451e-05, + "loss": 0.9214, "step": 13452 }, { - "epoch": 0.38175368898978435, + "epoch": 0.3812236107568931, "grad_norm": 0.0, - "learning_rate": 1.4183237604730243e-05, - "loss": 0.9283, + "learning_rate": 1.4198575061584225e-05, + "loss": 0.9135, "step": 13453 }, { - "epoch": 0.3817820658342792, + "epoch": 0.38125194819915553, "grad_norm": 0.0, - "learning_rate": 1.4182402790304839e-05, - "loss": 0.8854, + "learning_rate": 1.4197742067509845e-05, + "loss": 1.0019, "step": 13454 }, { - "epoch": 0.3818104426787741, + "epoch": 0.381280285641418, "grad_norm": 0.0, - "learning_rate": 1.4181567940550492e-05, - "loss": 0.8588, + "learning_rate": 1.4196909038076326e-05, + "loss": 0.9522, "step": 13455 }, { - "epoch": 0.38183881952326904, + "epoch": 0.38130862308368046, "grad_norm": 0.0, - "learning_rate": 1.4180733055474254e-05, - "loss": 0.9771, + "learning_rate": 1.419607597329068e-05, + "loss": 0.9001, "step": 13456 }, { - "epoch": 0.3818671963677639, + "epoch": 0.3813369605259429, "grad_norm": 0.0, - "learning_rate": 1.4179898135083172e-05, - "loss": 0.9908, + "learning_rate": 1.4195242873159928e-05, + "loss": 0.8361, "step": 13457 }, { - "epoch": 0.3818955732122588, + "epoch": 0.3813652979682054, "grad_norm": 0.0, - "learning_rate": 1.4179063179384307e-05, - "loss": 0.8852, + "learning_rate": 1.419440973769109e-05, + "loss": 0.9945, "step": 13458 }, { - "epoch": 0.38192395005675367, + "epoch": 0.38139363541046784, "grad_norm": 0.0, - "learning_rate": 1.4178228188384704e-05, - "loss": 0.9616, + "learning_rate": 1.4193576566891181e-05, + "loss": 0.9872, "step": 13459 }, { - "epoch": 0.3819523269012486, + "epoch": 0.38142197285273033, "grad_norm": 0.0, - "learning_rate": 1.4177393162091421e-05, - "loss": 0.9999, + "learning_rate": 1.4192743360767219e-05, + "loss": 0.8165, "step": 13460 }, { - "epoch": 0.3819807037457435, + "epoch": 0.38145031029499277, "grad_norm": 0.0, - "learning_rate": 1.417655810051151e-05, - "loss": 0.8669, + "learning_rate": 1.4191910119326222e-05, + "loss": 0.9708, "step": 13461 }, { - "epoch": 0.38200908059023836, + "epoch": 0.38147864773725526, "grad_norm": 0.0, - "learning_rate": 1.4175723003652027e-05, - "loss": 0.9454, + "learning_rate": 1.4191076842575209e-05, + "loss": 0.9453, "step": 13462 }, { - "epoch": 0.3820374574347333, + "epoch": 0.3815069851795177, "grad_norm": 0.0, - "learning_rate": 1.4174887871520022e-05, - "loss": 0.9726, + "learning_rate": 1.41902435305212e-05, + "loss": 0.9013, "step": 13463 }, { - "epoch": 0.38206583427922813, + "epoch": 0.38153532262178014, "grad_norm": 0.0, - "learning_rate": 1.4174052704122552e-05, - "loss": 0.9509, + "learning_rate": 1.4189410183171214e-05, + "loss": 1.0092, "step": 13464 }, { - "epoch": 0.38209421112372305, + "epoch": 0.38156366006404263, "grad_norm": 0.0, - "learning_rate": 1.417321750146667e-05, - "loss": 0.9925, + "learning_rate": 1.4188576800532268e-05, + "loss": 1.0293, "step": 13465 }, { - "epoch": 0.3821225879682179, + "epoch": 0.38159199750630507, "grad_norm": 0.0, - "learning_rate": 1.4172382263559432e-05, - "loss": 0.9464, + "learning_rate": 1.4187743382611388e-05, + "loss": 0.9474, "step": 13466 }, { - "epoch": 0.3821509648127128, + "epoch": 0.38162033494856756, "grad_norm": 0.0, - "learning_rate": 1.4171546990407896e-05, - "loss": 0.8789, + "learning_rate": 1.418690992941559e-05, + "loss": 0.9033, "step": 13467 }, { - "epoch": 0.38217934165720774, + "epoch": 0.38164867239083, "grad_norm": 0.0, - "learning_rate": 1.4170711682019111e-05, - "loss": 0.8909, + "learning_rate": 1.4186076440951895e-05, + "loss": 1.0513, "step": 13468 }, { - "epoch": 0.3822077185017026, + "epoch": 0.38167700983309244, "grad_norm": 0.0, - "learning_rate": 1.4169876338400141e-05, - "loss": 0.9947, + "learning_rate": 1.418524291722732e-05, + "loss": 0.9814, "step": 13469 }, { - "epoch": 0.3822360953461975, + "epoch": 0.38170534727535493, "grad_norm": 0.0, - "learning_rate": 1.416904095955804e-05, - "loss": 0.8649, + "learning_rate": 1.4184409358248893e-05, + "loss": 1.0141, "step": 13470 }, { - "epoch": 0.3822644721906924, + "epoch": 0.38173368471761737, "grad_norm": 0.0, - "learning_rate": 1.4168205545499856e-05, - "loss": 0.7772, + "learning_rate": 1.418357576402363e-05, + "loss": 0.9221, "step": 13471 }, { - "epoch": 0.3822928490351873, + "epoch": 0.38176202215987987, "grad_norm": 0.0, - "learning_rate": 1.4167370096232657e-05, - "loss": 0.9055, + "learning_rate": 1.4182742134558555e-05, + "loss": 0.9165, "step": 13472 }, { - "epoch": 0.3823212258796822, + "epoch": 0.3817903596021423, "grad_norm": 0.0, - "learning_rate": 1.4166534611763495e-05, - "loss": 0.9679, + "learning_rate": 1.4181908469860695e-05, + "loss": 0.8625, "step": 13473 }, { - "epoch": 0.38234960272417706, + "epoch": 0.3818186970444048, "grad_norm": 0.0, - "learning_rate": 1.4165699092099425e-05, - "loss": 0.9016, + "learning_rate": 1.418107476993706e-05, + "loss": 1.0002, "step": 13474 }, { - "epoch": 0.382377979568672, + "epoch": 0.38184703448666724, "grad_norm": 0.0, - "learning_rate": 1.4164863537247509e-05, - "loss": 0.8421, + "learning_rate": 1.4180241034794684e-05, + "loss": 0.939, "step": 13475 }, { - "epoch": 0.38240635641316684, + "epoch": 0.3818753719289297, "grad_norm": 0.0, - "learning_rate": 1.4164027947214804e-05, - "loss": 0.8721, + "learning_rate": 1.4179407264440582e-05, + "loss": 1.0015, "step": 13476 }, { - "epoch": 0.38243473325766175, + "epoch": 0.38190370937119217, "grad_norm": 0.0, - "learning_rate": 1.4163192322008367e-05, - "loss": 0.8475, + "learning_rate": 1.4178573458881784e-05, + "loss": 0.923, "step": 13477 }, { - "epoch": 0.38246311010215667, + "epoch": 0.3819320468134546, "grad_norm": 0.0, - "learning_rate": 1.4162356661635262e-05, - "loss": 0.9843, + "learning_rate": 1.4177739618125305e-05, + "loss": 0.9131, "step": 13478 }, { - "epoch": 0.3824914869466515, + "epoch": 0.3819603842557171, "grad_norm": 0.0, - "learning_rate": 1.4161520966102538e-05, - "loss": 0.8874, + "learning_rate": 1.417690574217818e-05, + "loss": 0.9747, "step": 13479 }, { - "epoch": 0.38251986379114644, + "epoch": 0.38198872169797954, "grad_norm": 0.0, - "learning_rate": 1.4160685235417262e-05, - "loss": 0.9256, + "learning_rate": 1.4176071831047425e-05, + "loss": 0.89, "step": 13480 }, { - "epoch": 0.3825482406356413, + "epoch": 0.382017059140242, "grad_norm": 0.0, - "learning_rate": 1.4159849469586489e-05, - "loss": 0.8896, + "learning_rate": 1.4175237884740068e-05, + "loss": 0.8652, "step": 13481 }, { - "epoch": 0.3825766174801362, + "epoch": 0.38204539658250447, "grad_norm": 0.0, - "learning_rate": 1.4159013668617278e-05, - "loss": 0.9666, + "learning_rate": 1.4174403903263127e-05, + "loss": 0.9425, "step": 13482 }, { - "epoch": 0.3826049943246311, + "epoch": 0.3820737340247669, "grad_norm": 0.0, - "learning_rate": 1.4158177832516695e-05, - "loss": 0.9799, + "learning_rate": 1.4173569886623633e-05, + "loss": 0.9192, "step": 13483 }, { - "epoch": 0.382633371169126, + "epoch": 0.3821020714670294, "grad_norm": 0.0, - "learning_rate": 1.4157341961291798e-05, - "loss": 0.9574, + "learning_rate": 1.4172735834828613e-05, + "loss": 0.9513, "step": 13484 }, { - "epoch": 0.3826617480136209, + "epoch": 0.38213040890929184, "grad_norm": 0.0, - "learning_rate": 1.4156506054949643e-05, - "loss": 0.9881, + "learning_rate": 1.4171901747885088e-05, + "loss": 0.9537, "step": 13485 }, { - "epoch": 0.38269012485811577, + "epoch": 0.38215874635155433, "grad_norm": 0.0, - "learning_rate": 1.4155670113497297e-05, - "loss": 0.9844, + "learning_rate": 1.4171067625800083e-05, + "loss": 0.9394, "step": 13486 }, { - "epoch": 0.3827185017026107, + "epoch": 0.3821870837938168, "grad_norm": 0.0, - "learning_rate": 1.4154834136941817e-05, - "loss": 0.8238, + "learning_rate": 1.417023346858063e-05, + "loss": 0.9761, "step": 13487 }, { - "epoch": 0.38274687854710554, + "epoch": 0.3822154212360792, "grad_norm": 0.0, - "learning_rate": 1.4153998125290269e-05, - "loss": 0.9243, + "learning_rate": 1.4169399276233754e-05, + "loss": 0.9384, "step": 13488 }, { - "epoch": 0.38277525539160046, + "epoch": 0.3822437586783417, "grad_norm": 0.0, - "learning_rate": 1.4153162078549708e-05, - "loss": 0.9908, + "learning_rate": 1.4168565048766475e-05, + "loss": 0.9808, "step": 13489 }, { - "epoch": 0.38280363223609537, + "epoch": 0.38227209612060414, "grad_norm": 0.0, - "learning_rate": 1.4152325996727205e-05, - "loss": 1.0161, + "learning_rate": 1.4167730786185822e-05, + "loss": 1.0336, "step": 13490 }, { - "epoch": 0.38283200908059023, + "epoch": 0.38230043356286664, "grad_norm": 0.0, - "learning_rate": 1.4151489879829814e-05, - "loss": 0.886, + "learning_rate": 1.4166896488498831e-05, + "loss": 0.9343, "step": 13491 }, { - "epoch": 0.38286038592508514, + "epoch": 0.3823287710051291, "grad_norm": 0.0, - "learning_rate": 1.4150653727864605e-05, - "loss": 0.9633, + "learning_rate": 1.416606215571252e-05, + "loss": 0.9509, "step": 13492 }, { - "epoch": 0.38288876276958, + "epoch": 0.3823571084473915, "grad_norm": 0.0, - "learning_rate": 1.4149817540838636e-05, - "loss": 0.9967, + "learning_rate": 1.4165227787833925e-05, + "loss": 1.0146, "step": 13493 }, { - "epoch": 0.3829171396140749, + "epoch": 0.382385445889654, "grad_norm": 0.0, - "learning_rate": 1.414898131875897e-05, - "loss": 0.8744, + "learning_rate": 1.4164393384870065e-05, + "loss": 1.009, "step": 13494 }, { - "epoch": 0.38294551645856983, + "epoch": 0.38241378333191645, "grad_norm": 0.0, - "learning_rate": 1.4148145061632673e-05, - "loss": 0.9343, + "learning_rate": 1.4163558946827975e-05, + "loss": 1.0209, "step": 13495 }, { - "epoch": 0.3829738933030647, + "epoch": 0.38244212077417894, "grad_norm": 0.0, - "learning_rate": 1.414730876946681e-05, - "loss": 0.972, + "learning_rate": 1.416272447371468e-05, + "loss": 0.9907, "step": 13496 }, { - "epoch": 0.3830022701475596, + "epoch": 0.3824704582164414, "grad_norm": 0.0, - "learning_rate": 1.4146472442268439e-05, - "loss": 0.9818, + "learning_rate": 1.4161889965537213e-05, + "loss": 0.9553, "step": 13497 }, { - "epoch": 0.38303064699205447, + "epoch": 0.38249879565870387, "grad_norm": 0.0, - "learning_rate": 1.4145636080044631e-05, - "loss": 0.9293, + "learning_rate": 1.41610554223026e-05, + "loss": 1.0167, "step": 13498 }, { - "epoch": 0.3830590238365494, + "epoch": 0.3825271331009663, "grad_norm": 0.0, - "learning_rate": 1.414479968280245e-05, - "loss": 0.9876, + "learning_rate": 1.4160220844017874e-05, + "loss": 0.9295, "step": 13499 }, { - "epoch": 0.38308740068104424, + "epoch": 0.38255547054322875, "grad_norm": 0.0, - "learning_rate": 1.414396325054896e-05, - "loss": 0.9841, + "learning_rate": 1.4159386230690062e-05, + "loss": 0.9285, "step": 13500 }, { - "epoch": 0.38311577752553916, + "epoch": 0.38258380798549124, "grad_norm": 0.0, - "learning_rate": 1.4143126783291224e-05, - "loss": 0.9554, + "learning_rate": 1.4158551582326193e-05, + "loss": 0.9617, "step": 13501 }, { - "epoch": 0.3831441543700341, + "epoch": 0.3826121454277537, "grad_norm": 0.0, - "learning_rate": 1.4142290281036312e-05, - "loss": 0.9617, + "learning_rate": 1.4157716898933302e-05, + "loss": 0.9228, "step": 13502 }, { - "epoch": 0.38317253121452893, + "epoch": 0.3826404828700162, "grad_norm": 0.0, - "learning_rate": 1.4141453743791288e-05, - "loss": 0.9755, + "learning_rate": 1.4156882180518417e-05, + "loss": 0.8342, "step": 13503 }, { - "epoch": 0.38320090805902385, + "epoch": 0.3826688203122786, "grad_norm": 0.0, - "learning_rate": 1.4140617171563216e-05, - "loss": 0.8588, + "learning_rate": 1.415604742708857e-05, + "loss": 0.9393, "step": 13504 }, { - "epoch": 0.3832292849035187, + "epoch": 0.38269715775454105, "grad_norm": 0.0, - "learning_rate": 1.4139780564359162e-05, - "loss": 1.0101, + "learning_rate": 1.4155212638650793e-05, + "loss": 0.8634, "step": 13505 }, { - "epoch": 0.3832576617480136, + "epoch": 0.38272549519680354, "grad_norm": 0.0, - "learning_rate": 1.4138943922186202e-05, - "loss": 0.9976, + "learning_rate": 1.4154377815212117e-05, + "loss": 0.8627, "step": 13506 }, { - "epoch": 0.38328603859250854, + "epoch": 0.382753832639066, "grad_norm": 0.0, - "learning_rate": 1.4138107245051394e-05, - "loss": 0.9117, + "learning_rate": 1.4153542956779573e-05, + "loss": 0.9501, "step": 13507 }, { - "epoch": 0.3833144154370034, + "epoch": 0.3827821700813285, "grad_norm": 0.0, - "learning_rate": 1.4137270532961806e-05, - "loss": 0.9598, + "learning_rate": 1.4152708063360195e-05, + "loss": 0.9744, "step": 13508 }, { - "epoch": 0.3833427922814983, + "epoch": 0.3828105075235909, "grad_norm": 0.0, - "learning_rate": 1.413643378592451e-05, - "loss": 0.8824, + "learning_rate": 1.4151873134961014e-05, + "loss": 0.9151, "step": 13509 }, { - "epoch": 0.38337116912599317, + "epoch": 0.3828388449658534, "grad_norm": 0.0, - "learning_rate": 1.4135597003946573e-05, - "loss": 0.9759, + "learning_rate": 1.4151038171589064e-05, + "loss": 0.963, "step": 13510 }, { - "epoch": 0.3833995459704881, + "epoch": 0.38286718240811585, "grad_norm": 0.0, - "learning_rate": 1.4134760187035059e-05, - "loss": 0.8735, + "learning_rate": 1.4150203173251377e-05, + "loss": 0.946, "step": 13511 }, { - "epoch": 0.38342792281498295, + "epoch": 0.3828955198503783, "grad_norm": 0.0, - "learning_rate": 1.413392333519704e-05, - "loss": 0.9319, + "learning_rate": 1.414936813995499e-05, + "loss": 0.9201, "step": 13512 }, { - "epoch": 0.38345629965947786, + "epoch": 0.3829238572926408, "grad_norm": 0.0, - "learning_rate": 1.4133086448439587e-05, - "loss": 1.0627, + "learning_rate": 1.4148533071706933e-05, + "loss": 0.9, "step": 13513 }, { - "epoch": 0.3834846765039728, + "epoch": 0.3829521947349032, "grad_norm": 0.0, - "learning_rate": 1.4132249526769765e-05, - "loss": 0.9473, + "learning_rate": 1.4147697968514242e-05, + "loss": 0.8984, "step": 13514 }, { - "epoch": 0.38351305334846764, + "epoch": 0.3829805321771657, "grad_norm": 0.0, - "learning_rate": 1.4131412570194648e-05, - "loss": 1.0031, + "learning_rate": 1.414686283038395e-05, + "loss": 0.893, "step": 13515 }, { - "epoch": 0.38354143019296255, + "epoch": 0.38300886961942815, "grad_norm": 0.0, - "learning_rate": 1.4130575578721302e-05, - "loss": 0.9714, + "learning_rate": 1.4146027657323092e-05, + "loss": 0.9118, "step": 13516 }, { - "epoch": 0.3835698070374574, + "epoch": 0.3830372070616906, "grad_norm": 0.0, - "learning_rate": 1.41297385523568e-05, - "loss": 0.8675, + "learning_rate": 1.4145192449338704e-05, + "loss": 0.9496, "step": 13517 }, { - "epoch": 0.3835981838819523, + "epoch": 0.3830655445039531, "grad_norm": 0.0, - "learning_rate": 1.4128901491108208e-05, - "loss": 0.9827, + "learning_rate": 1.4144357206437822e-05, + "loss": 0.8913, "step": 13518 }, { - "epoch": 0.38362656072644724, + "epoch": 0.3830938819462155, "grad_norm": 0.0, - "learning_rate": 1.4128064394982601e-05, - "loss": 0.8766, + "learning_rate": 1.4143521928627479e-05, + "loss": 0.9341, "step": 13519 }, { - "epoch": 0.3836549375709421, + "epoch": 0.383122219388478, "grad_norm": 0.0, - "learning_rate": 1.4127227263987047e-05, - "loss": 1.0094, + "learning_rate": 1.4142686615914713e-05, + "loss": 0.9817, "step": 13520 }, { - "epoch": 0.383683314415437, + "epoch": 0.38315055683074045, "grad_norm": 0.0, - "learning_rate": 1.412639009812862e-05, - "loss": 0.8596, + "learning_rate": 1.414185126830656e-05, + "loss": 0.9772, "step": 13521 }, { - "epoch": 0.3837116912599319, + "epoch": 0.38317889427300295, "grad_norm": 0.0, - "learning_rate": 1.412555289741439e-05, - "loss": 0.908, + "learning_rate": 1.4141015885810055e-05, + "loss": 0.9688, "step": 13522 }, { - "epoch": 0.3837400681044268, + "epoch": 0.3832072317152654, "grad_norm": 0.0, - "learning_rate": 1.412471566185143e-05, - "loss": 0.898, + "learning_rate": 1.4140180468432235e-05, + "loss": 0.8198, "step": 13523 }, { - "epoch": 0.3837684449489217, + "epoch": 0.3832355691575278, "grad_norm": 0.0, - "learning_rate": 1.412387839144681e-05, - "loss": 0.9109, + "learning_rate": 1.4139345016180135e-05, + "loss": 1.0248, "step": 13524 }, { - "epoch": 0.38379682179341656, + "epoch": 0.3832639065997903, "grad_norm": 0.0, - "learning_rate": 1.4123041086207606e-05, - "loss": 0.9963, + "learning_rate": 1.41385095290608e-05, + "loss": 0.8819, "step": 13525 }, { - "epoch": 0.3838251986379115, + "epoch": 0.38329224404205275, "grad_norm": 0.0, - "learning_rate": 1.4122203746140886e-05, - "loss": 0.9318, + "learning_rate": 1.4137674007081259e-05, + "loss": 0.937, "step": 13526 }, { - "epoch": 0.38385357548240634, + "epoch": 0.38332058148431525, "grad_norm": 0.0, - "learning_rate": 1.4121366371253728e-05, - "loss": 0.8675, + "learning_rate": 1.4136838450248553e-05, + "loss": 0.9214, "step": 13527 }, { - "epoch": 0.38388195232690125, + "epoch": 0.3833489189265777, "grad_norm": 0.0, - "learning_rate": 1.4120528961553199e-05, - "loss": 0.9452, + "learning_rate": 1.413600285856972e-05, + "loss": 0.8555, "step": 13528 }, { - "epoch": 0.3839103291713961, + "epoch": 0.3833772563688401, "grad_norm": 0.0, - "learning_rate": 1.4119691517046379e-05, - "loss": 0.8725, + "learning_rate": 1.4135167232051802e-05, + "loss": 0.9553, "step": 13529 }, { - "epoch": 0.38393870601589103, + "epoch": 0.3834055938111026, "grad_norm": 0.0, - "learning_rate": 1.4118854037740341e-05, - "loss": 0.9239, + "learning_rate": 1.4134331570701834e-05, + "loss": 0.8994, "step": 13530 }, { - "epoch": 0.38396708286038594, + "epoch": 0.38343393125336506, "grad_norm": 0.0, - "learning_rate": 1.4118016523642158e-05, - "loss": 0.9247, + "learning_rate": 1.4133495874526857e-05, + "loss": 0.9723, "step": 13531 }, { - "epoch": 0.3839954597048808, + "epoch": 0.38346226869562755, "grad_norm": 0.0, - "learning_rate": 1.4117178974758903e-05, - "loss": 0.8668, + "learning_rate": 1.4132660143533907e-05, + "loss": 0.925, "step": 13532 }, { - "epoch": 0.3840238365493757, + "epoch": 0.38349060613789, "grad_norm": 0.0, - "learning_rate": 1.4116341391097652e-05, - "loss": 1.0156, + "learning_rate": 1.4131824377730026e-05, + "loss": 0.8553, "step": 13533 }, { - "epoch": 0.3840522133938706, + "epoch": 0.3835189435801525, "grad_norm": 0.0, - "learning_rate": 1.4115503772665483e-05, - "loss": 0.8564, + "learning_rate": 1.4130988577122253e-05, + "loss": 0.8538, "step": 13534 }, { - "epoch": 0.3840805902383655, + "epoch": 0.3835472810224149, "grad_norm": 0.0, - "learning_rate": 1.4114666119469463e-05, - "loss": 0.9891, + "learning_rate": 1.4130152741717634e-05, + "loss": 0.9663, "step": 13535 }, { - "epoch": 0.3841089670828604, + "epoch": 0.38357561846467736, "grad_norm": 0.0, - "learning_rate": 1.4113828431516676e-05, - "loss": 1.0218, + "learning_rate": 1.41293168715232e-05, + "loss": 0.9235, "step": 13536 }, { - "epoch": 0.38413734392735527, + "epoch": 0.38360395590693985, "grad_norm": 0.0, - "learning_rate": 1.4112990708814195e-05, - "loss": 0.8958, + "learning_rate": 1.4128480966545998e-05, + "loss": 0.9751, "step": 13537 }, { - "epoch": 0.3841657207718502, + "epoch": 0.3836322933492023, "grad_norm": 0.0, - "learning_rate": 1.4112152951369097e-05, - "loss": 0.9057, + "learning_rate": 1.4127645026793068e-05, + "loss": 0.8712, "step": 13538 }, { - "epoch": 0.38419409761634504, + "epoch": 0.3836606307914648, "grad_norm": 0.0, - "learning_rate": 1.4111315159188461e-05, - "loss": 0.8172, + "learning_rate": 1.4126809052271453e-05, + "loss": 0.9425, "step": 13539 }, { - "epoch": 0.38422247446083996, + "epoch": 0.3836889682337272, "grad_norm": 0.0, - "learning_rate": 1.411047733227936e-05, - "loss": 0.9689, + "learning_rate": 1.412597304298819e-05, + "loss": 1.0613, "step": 13540 }, { - "epoch": 0.3842508513053349, + "epoch": 0.38371730567598966, "grad_norm": 0.0, - "learning_rate": 1.4109639470648868e-05, - "loss": 1.0025, + "learning_rate": 1.4125136998950324e-05, + "loss": 0.8188, "step": 13541 }, { - "epoch": 0.38427922814982973, + "epoch": 0.38374564311825216, "grad_norm": 0.0, - "learning_rate": 1.4108801574304072e-05, - "loss": 0.9518, + "learning_rate": 1.4124300920164897e-05, + "loss": 0.934, "step": 13542 }, { - "epoch": 0.38430760499432465, + "epoch": 0.3837739805605146, "grad_norm": 0.0, - "learning_rate": 1.4107963643252038e-05, - "loss": 0.9892, + "learning_rate": 1.4123464806638955e-05, + "loss": 0.9199, "step": 13543 }, { - "epoch": 0.3843359818388195, + "epoch": 0.3838023180027771, "grad_norm": 0.0, - "learning_rate": 1.4107125677499856e-05, - "loss": 0.9177, + "learning_rate": 1.4122628658379536e-05, + "loss": 0.9045, "step": 13544 }, { - "epoch": 0.3843643586833144, + "epoch": 0.3838306554450395, "grad_norm": 0.0, - "learning_rate": 1.4106287677054597e-05, - "loss": 0.8376, + "learning_rate": 1.4121792475393685e-05, + "loss": 1.0098, "step": 13545 }, { - "epoch": 0.3843927355278093, + "epoch": 0.383858992887302, "grad_norm": 0.0, - "learning_rate": 1.4105449641923341e-05, - "loss": 1.0765, + "learning_rate": 1.4120956257688445e-05, + "loss": 0.9098, "step": 13546 }, { - "epoch": 0.3844211123723042, + "epoch": 0.38388733032956446, "grad_norm": 0.0, - "learning_rate": 1.410461157211317e-05, - "loss": 0.9201, + "learning_rate": 1.412012000527086e-05, + "loss": 0.9524, "step": 13547 }, { - "epoch": 0.3844494892167991, + "epoch": 0.3839156677718269, "grad_norm": 0.0, - "learning_rate": 1.4103773467631157e-05, - "loss": 0.9908, + "learning_rate": 1.4119283718147974e-05, + "loss": 0.9019, "step": 13548 }, { - "epoch": 0.38447786606129397, + "epoch": 0.3839440052140894, "grad_norm": 0.0, - "learning_rate": 1.4102935328484385e-05, - "loss": 0.8802, + "learning_rate": 1.4118447396326832e-05, + "loss": 0.8302, "step": 13549 }, { - "epoch": 0.3845062429057889, + "epoch": 0.38397234265635183, "grad_norm": 0.0, - "learning_rate": 1.4102097154679938e-05, - "loss": 0.9147, + "learning_rate": 1.4117611039814479e-05, + "loss": 0.8639, "step": 13550 }, { - "epoch": 0.38453461975028375, + "epoch": 0.3840006800986143, "grad_norm": 0.0, - "learning_rate": 1.4101258946224888e-05, - "loss": 0.9078, + "learning_rate": 1.4116774648617958e-05, + "loss": 0.9913, "step": 13551 }, { - "epoch": 0.38456299659477866, + "epoch": 0.38402901754087676, "grad_norm": 0.0, - "learning_rate": 1.4100420703126318e-05, - "loss": 0.9374, + "learning_rate": 1.4115938222744317e-05, + "loss": 0.8033, "step": 13552 }, { - "epoch": 0.3845913734392736, + "epoch": 0.3840573549831392, "grad_norm": 0.0, - "learning_rate": 1.4099582425391314e-05, - "loss": 0.8732, + "learning_rate": 1.4115101762200598e-05, + "loss": 0.8727, "step": 13553 }, { - "epoch": 0.38461975028376844, + "epoch": 0.3840856924254017, "grad_norm": 0.0, - "learning_rate": 1.4098744113026951e-05, - "loss": 0.9956, + "learning_rate": 1.4114265266993847e-05, + "loss": 1.0612, "step": 13554 }, { - "epoch": 0.38464812712826335, + "epoch": 0.38411402986766413, "grad_norm": 0.0, - "learning_rate": 1.4097905766040312e-05, - "loss": 0.8551, + "learning_rate": 1.4113428737131116e-05, + "loss": 0.8648, "step": 13555 }, { - "epoch": 0.3846765039727582, + "epoch": 0.3841423673099266, "grad_norm": 0.0, - "learning_rate": 1.409706738443848e-05, - "loss": 0.9171, + "learning_rate": 1.4112592172619449e-05, + "loss": 1.0116, "step": 13556 }, { - "epoch": 0.3847048808172531, + "epoch": 0.38417070475218906, "grad_norm": 0.0, - "learning_rate": 1.4096228968228536e-05, - "loss": 1.0029, + "learning_rate": 1.4111755573465884e-05, + "loss": 1.0346, "step": 13557 }, { - "epoch": 0.38473325766174804, + "epoch": 0.3841990421944515, "grad_norm": 0.0, - "learning_rate": 1.409539051741756e-05, - "loss": 0.8823, + "learning_rate": 1.411091893967748e-05, + "loss": 0.987, "step": 13558 }, { - "epoch": 0.3847616345062429, + "epoch": 0.384227379636714, "grad_norm": 0.0, - "learning_rate": 1.4094552032012635e-05, - "loss": 1.0027, + "learning_rate": 1.4110082271261278e-05, + "loss": 0.8989, "step": 13559 }, { - "epoch": 0.3847900113507378, + "epoch": 0.38425571707897643, "grad_norm": 0.0, - "learning_rate": 1.4093713512020848e-05, - "loss": 0.8735, + "learning_rate": 1.4109245568224326e-05, + "loss": 0.9483, "step": 13560 }, { - "epoch": 0.3848183881952327, + "epoch": 0.3842840545212389, "grad_norm": 0.0, - "learning_rate": 1.4092874957449278e-05, - "loss": 0.9194, + "learning_rate": 1.4108408830573673e-05, + "loss": 0.9203, "step": 13561 }, { - "epoch": 0.3848467650397276, + "epoch": 0.38431239196350137, "grad_norm": 0.0, - "learning_rate": 1.409203636830501e-05, - "loss": 0.9693, + "learning_rate": 1.4107572058316365e-05, + "loss": 0.8813, "step": 13562 }, { - "epoch": 0.38487514188422245, + "epoch": 0.38434072940576386, "grad_norm": 0.0, - "learning_rate": 1.4091197744595127e-05, - "loss": 0.8871, + "learning_rate": 1.4106735251459456e-05, + "loss": 0.9827, "step": 13563 }, { - "epoch": 0.38490351872871736, + "epoch": 0.3843690668480263, "grad_norm": 0.0, - "learning_rate": 1.4090359086326712e-05, - "loss": 0.967, + "learning_rate": 1.410589841000999e-05, + "loss": 0.8827, "step": 13564 }, { - "epoch": 0.3849318955732123, + "epoch": 0.38439740429028874, "grad_norm": 0.0, - "learning_rate": 1.4089520393506848e-05, - "loss": 0.9157, + "learning_rate": 1.4105061533975015e-05, + "loss": 0.8581, "step": 13565 }, { - "epoch": 0.38496027241770714, + "epoch": 0.38442574173255123, "grad_norm": 0.0, - "learning_rate": 1.4088681666142621e-05, - "loss": 0.8933, + "learning_rate": 1.4104224623361584e-05, + "loss": 1.0416, "step": 13566 }, { - "epoch": 0.38498864926220205, + "epoch": 0.38445407917481367, "grad_norm": 0.0, - "learning_rate": 1.408784290424112e-05, - "loss": 0.8422, + "learning_rate": 1.4103387678176745e-05, + "loss": 1.0139, "step": 13567 }, { - "epoch": 0.3850170261066969, + "epoch": 0.38448241661707616, "grad_norm": 0.0, - "learning_rate": 1.4087004107809424e-05, - "loss": 0.9214, + "learning_rate": 1.4102550698427548e-05, + "loss": 1.0436, "step": 13568 }, { - "epoch": 0.38504540295119183, + "epoch": 0.3845107540593386, "grad_norm": 0.0, - "learning_rate": 1.408616527685462e-05, - "loss": 1.0362, + "learning_rate": 1.4101713684121042e-05, + "loss": 0.9055, "step": 13569 }, { - "epoch": 0.38507377979568674, + "epoch": 0.38453909150160104, "grad_norm": 0.0, - "learning_rate": 1.4085326411383796e-05, - "loss": 0.9253, + "learning_rate": 1.4100876635264279e-05, + "loss": 0.9241, "step": 13570 }, { - "epoch": 0.3851021566401816, + "epoch": 0.38456742894386353, "grad_norm": 0.0, - "learning_rate": 1.4084487511404036e-05, - "loss": 1.0077, + "learning_rate": 1.410003955186431e-05, + "loss": 0.9613, "step": 13571 }, { - "epoch": 0.3851305334846765, + "epoch": 0.38459576638612597, "grad_norm": 0.0, - "learning_rate": 1.4083648576922424e-05, - "loss": 0.9126, + "learning_rate": 1.4099202433928185e-05, + "loss": 0.8289, "step": 13572 }, { - "epoch": 0.3851589103291714, + "epoch": 0.38462410382838846, "grad_norm": 0.0, - "learning_rate": 1.4082809607946053e-05, - "loss": 0.9982, + "learning_rate": 1.4098365281462953e-05, + "loss": 0.9455, "step": 13573 }, { - "epoch": 0.3851872871736663, + "epoch": 0.3846524412706509, "grad_norm": 0.0, - "learning_rate": 1.4081970604482003e-05, - "loss": 0.9355, + "learning_rate": 1.409752809447567e-05, + "loss": 0.8609, "step": 13574 }, { - "epoch": 0.3852156640181612, + "epoch": 0.3846807787129134, "grad_norm": 0.0, - "learning_rate": 1.4081131566537365e-05, - "loss": 1.0021, + "learning_rate": 1.4096690872973388e-05, + "loss": 0.945, "step": 13575 }, { - "epoch": 0.38524404086265607, + "epoch": 0.38470911615517583, "grad_norm": 0.0, - "learning_rate": 1.408029249411922e-05, - "loss": 0.8268, + "learning_rate": 1.4095853616963157e-05, + "loss": 0.9637, "step": 13576 }, { - "epoch": 0.385272417707151, + "epoch": 0.3847374535974383, "grad_norm": 0.0, - "learning_rate": 1.4079453387234667e-05, - "loss": 0.9336, + "learning_rate": 1.4095016326452027e-05, + "loss": 1.1015, "step": 13577 }, { - "epoch": 0.38530079455164584, + "epoch": 0.38476579103970077, "grad_norm": 0.0, - "learning_rate": 1.4078614245890787e-05, - "loss": 1.0225, + "learning_rate": 1.409417900144706e-05, + "loss": 1.0692, "step": 13578 }, { - "epoch": 0.38532917139614076, + "epoch": 0.3847941284819632, "grad_norm": 0.0, - "learning_rate": 1.4077775070094667e-05, - "loss": 0.9746, + "learning_rate": 1.4093341641955298e-05, + "loss": 0.9226, "step": 13579 }, { - "epoch": 0.3853575482406356, + "epoch": 0.3848224659242257, "grad_norm": 0.0, - "learning_rate": 1.40769358598534e-05, - "loss": 0.829, + "learning_rate": 1.4092504247983798e-05, + "loss": 0.8813, "step": 13580 }, { - "epoch": 0.38538592508513053, + "epoch": 0.38485080336648814, "grad_norm": 0.0, - "learning_rate": 1.4076096615174071e-05, - "loss": 0.7601, + "learning_rate": 1.409166681953962e-05, + "loss": 0.931, "step": 13581 }, { - "epoch": 0.38541430192962545, + "epoch": 0.3848791408087506, "grad_norm": 0.0, - "learning_rate": 1.4075257336063767e-05, - "loss": 0.7696, + "learning_rate": 1.4090829356629809e-05, + "loss": 0.9372, "step": 13582 }, { - "epoch": 0.3854426787741203, + "epoch": 0.38490747825101307, "grad_norm": 0.0, - "learning_rate": 1.4074418022529589e-05, - "loss": 0.9553, + "learning_rate": 1.4089991859261426e-05, + "loss": 0.9625, "step": 13583 }, { - "epoch": 0.3854710556186152, + "epoch": 0.3849358156932755, "grad_norm": 0.0, - "learning_rate": 1.4073578674578615e-05, - "loss": 0.8412, + "learning_rate": 1.408915432744152e-05, + "loss": 0.8995, "step": 13584 }, { - "epoch": 0.3854994324631101, + "epoch": 0.384964153135538, "grad_norm": 0.0, - "learning_rate": 1.4072739292217939e-05, - "loss": 0.9452, + "learning_rate": 1.4088316761177151e-05, + "loss": 0.7821, "step": 13585 }, { - "epoch": 0.385527809307605, + "epoch": 0.38499249057780044, "grad_norm": 0.0, - "learning_rate": 1.407189987545465e-05, - "loss": 1.0103, + "learning_rate": 1.408747916047537e-05, + "loss": 0.975, "step": 13586 }, { - "epoch": 0.3855561861520999, + "epoch": 0.38502082802006293, "grad_norm": 0.0, - "learning_rate": 1.4071060424295841e-05, - "loss": 0.8955, + "learning_rate": 1.4086641525343234e-05, + "loss": 0.8654, "step": 13587 }, { - "epoch": 0.38558456299659477, + "epoch": 0.38504916546232537, "grad_norm": 0.0, - "learning_rate": 1.4070220938748602e-05, - "loss": 0.9821, + "learning_rate": 1.40858038557878e-05, + "loss": 0.9546, "step": 13588 }, { - "epoch": 0.3856129398410897, + "epoch": 0.3850775029045878, "grad_norm": 0.0, - "learning_rate": 1.406938141882002e-05, - "loss": 0.9695, + "learning_rate": 1.4084966151816124e-05, + "loss": 0.8261, "step": 13589 }, { - "epoch": 0.38564131668558455, + "epoch": 0.3851058403468503, "grad_norm": 0.0, - "learning_rate": 1.4068541864517193e-05, - "loss": 0.9828, + "learning_rate": 1.4084128413435258e-05, + "loss": 0.9099, "step": 13590 }, { - "epoch": 0.38566969353007946, + "epoch": 0.38513417778911274, "grad_norm": 0.0, - "learning_rate": 1.4067702275847212e-05, - "loss": 0.9124, + "learning_rate": 1.4083290640652267e-05, + "loss": 0.8389, "step": 13591 }, { - "epoch": 0.3856980703745743, + "epoch": 0.38516251523137524, "grad_norm": 0.0, - "learning_rate": 1.4066862652817164e-05, - "loss": 0.9063, + "learning_rate": 1.4082452833474198e-05, + "loss": 0.7971, "step": 13592 }, { - "epoch": 0.38572644721906924, + "epoch": 0.3851908526736377, "grad_norm": 0.0, - "learning_rate": 1.4066022995434145e-05, - "loss": 0.8731, + "learning_rate": 1.4081614991908115e-05, + "loss": 0.7895, "step": 13593 }, { - "epoch": 0.38575482406356415, + "epoch": 0.3852191901159001, "grad_norm": 0.0, - "learning_rate": 1.406518330370525e-05, - "loss": 0.9989, + "learning_rate": 1.408077711596107e-05, + "loss": 0.9288, "step": 13594 }, { - "epoch": 0.385783200908059, + "epoch": 0.3852475275581626, "grad_norm": 0.0, - "learning_rate": 1.4064343577637565e-05, - "loss": 0.9968, + "learning_rate": 1.4079939205640127e-05, + "loss": 0.983, "step": 13595 }, { - "epoch": 0.3858115777525539, + "epoch": 0.38527586500042504, "grad_norm": 0.0, - "learning_rate": 1.4063503817238189e-05, - "loss": 0.9883, + "learning_rate": 1.4079101260952342e-05, + "loss": 0.9028, "step": 13596 }, { - "epoch": 0.3858399545970488, + "epoch": 0.38530420244268754, "grad_norm": 0.0, - "learning_rate": 1.4062664022514212e-05, - "loss": 0.8848, + "learning_rate": 1.4078263281904771e-05, + "loss": 0.9046, "step": 13597 }, { - "epoch": 0.3858683314415437, + "epoch": 0.38533253988495, "grad_norm": 0.0, - "learning_rate": 1.406182419347273e-05, - "loss": 0.9814, + "learning_rate": 1.4077425268504474e-05, + "loss": 1.0396, "step": 13598 }, { - "epoch": 0.3858967082860386, + "epoch": 0.38536087732721247, "grad_norm": 0.0, - "learning_rate": 1.4060984330120836e-05, - "loss": 0.9165, + "learning_rate": 1.407658722075851e-05, + "loss": 0.9457, "step": 13599 }, { - "epoch": 0.3859250851305335, + "epoch": 0.3853892147694749, "grad_norm": 0.0, - "learning_rate": 1.4060144432465627e-05, - "loss": 0.9974, + "learning_rate": 1.4075749138673937e-05, + "loss": 0.8406, "step": 13600 }, { - "epoch": 0.3859534619750284, + "epoch": 0.38541755221173735, "grad_norm": 0.0, - "learning_rate": 1.4059304500514195e-05, - "loss": 0.8704, + "learning_rate": 1.4074911022257815e-05, + "loss": 0.867, "step": 13601 }, { - "epoch": 0.38598183881952325, + "epoch": 0.38544588965399984, "grad_norm": 0.0, - "learning_rate": 1.4058464534273632e-05, - "loss": 0.9155, + "learning_rate": 1.4074072871517205e-05, + "loss": 0.9465, "step": 13602 }, { - "epoch": 0.38601021566401816, + "epoch": 0.3854742270962623, "grad_norm": 0.0, - "learning_rate": 1.4057624533751042e-05, - "loss": 0.9316, + "learning_rate": 1.4073234686459167e-05, + "loss": 0.8787, "step": 13603 }, { - "epoch": 0.3860385925085131, + "epoch": 0.3855025645385248, "grad_norm": 0.0, - "learning_rate": 1.4056784498953511e-05, - "loss": 1.035, + "learning_rate": 1.4072396467090764e-05, + "loss": 0.8706, "step": 13604 }, { - "epoch": 0.38606696935300794, + "epoch": 0.3855309019807872, "grad_norm": 0.0, - "learning_rate": 1.4055944429888139e-05, - "loss": 0.8561, + "learning_rate": 1.407155821341905e-05, + "loss": 0.9133, "step": 13605 }, { - "epoch": 0.38609534619750285, + "epoch": 0.38555923942304965, "grad_norm": 0.0, - "learning_rate": 1.4055104326562024e-05, - "loss": 0.8342, + "learning_rate": 1.4070719925451086e-05, + "loss": 1.0011, "step": 13606 }, { - "epoch": 0.3861237230419977, + "epoch": 0.38558757686531214, "grad_norm": 0.0, - "learning_rate": 1.4054264188982261e-05, - "loss": 0.9075, + "learning_rate": 1.406988160319394e-05, + "loss": 0.9757, "step": 13607 }, { - "epoch": 0.38615209988649263, + "epoch": 0.3856159143075746, "grad_norm": 0.0, - "learning_rate": 1.4053424017155945e-05, - "loss": 0.924, + "learning_rate": 1.406904324665467e-05, + "loss": 1.0314, "step": 13608 }, { - "epoch": 0.3861804767309875, + "epoch": 0.3856442517498371, "grad_norm": 0.0, - "learning_rate": 1.4052583811090173e-05, - "loss": 0.8918, + "learning_rate": 1.4068204855840338e-05, + "loss": 1.0781, "step": 13609 }, { - "epoch": 0.3862088535754824, + "epoch": 0.3856725891920995, "grad_norm": 0.0, - "learning_rate": 1.4051743570792046e-05, - "loss": 0.8485, + "learning_rate": 1.4067366430758004e-05, + "loss": 0.9732, "step": 13610 }, { - "epoch": 0.3862372304199773, + "epoch": 0.385700926634362, "grad_norm": 0.0, - "learning_rate": 1.4050903296268658e-05, - "loss": 0.9542, + "learning_rate": 1.4066527971414732e-05, + "loss": 0.8183, "step": 13611 }, { - "epoch": 0.3862656072644722, + "epoch": 0.38572926407662445, "grad_norm": 0.0, - "learning_rate": 1.4050062987527104e-05, - "loss": 0.9597, + "learning_rate": 1.4065689477817587e-05, + "loss": 0.9676, "step": 13612 }, { - "epoch": 0.3862939841089671, + "epoch": 0.3857576015188869, "grad_norm": 0.0, - "learning_rate": 1.404922264457449e-05, - "loss": 0.9935, + "learning_rate": 1.4064850949973627e-05, + "loss": 0.9429, "step": 13613 }, { - "epoch": 0.38632236095346195, + "epoch": 0.3857859389611494, "grad_norm": 0.0, - "learning_rate": 1.404838226741791e-05, - "loss": 0.9188, + "learning_rate": 1.406401238788992e-05, + "loss": 0.9098, "step": 13614 }, { - "epoch": 0.38635073779795687, + "epoch": 0.3858142764034118, "grad_norm": 0.0, - "learning_rate": 1.4047541856064464e-05, - "loss": 0.9537, + "learning_rate": 1.4063173791573528e-05, + "loss": 0.9512, "step": 13615 }, { - "epoch": 0.3863791146424518, + "epoch": 0.3858426138456743, "grad_norm": 0.0, - "learning_rate": 1.4046701410521247e-05, - "loss": 0.8551, + "learning_rate": 1.4062335161031512e-05, + "loss": 1.0017, "step": 13616 }, { - "epoch": 0.38640749148694664, + "epoch": 0.38587095128793675, "grad_norm": 0.0, - "learning_rate": 1.4045860930795362e-05, - "loss": 1.0273, + "learning_rate": 1.4061496496270944e-05, + "loss": 0.9334, "step": 13617 }, { - "epoch": 0.38643586833144156, + "epoch": 0.3858992887301992, "grad_norm": 0.0, - "learning_rate": 1.4045020416893911e-05, - "loss": 0.9428, + "learning_rate": 1.4060657797298876e-05, + "loss": 0.9239, "step": 13618 }, { - "epoch": 0.3864642451759364, + "epoch": 0.3859276261724617, "grad_norm": 0.0, - "learning_rate": 1.4044179868823986e-05, - "loss": 0.9687, + "learning_rate": 1.4059819064122382e-05, + "loss": 0.9151, "step": 13619 }, { - "epoch": 0.38649262202043133, + "epoch": 0.3859559636147241, "grad_norm": 0.0, - "learning_rate": 1.4043339286592691e-05, - "loss": 1.0078, + "learning_rate": 1.4058980296748526e-05, + "loss": 0.9656, "step": 13620 }, { - "epoch": 0.38652099886492625, + "epoch": 0.3859843010569866, "grad_norm": 0.0, - "learning_rate": 1.4042498670207131e-05, - "loss": 0.962, + "learning_rate": 1.4058141495184369e-05, + "loss": 0.9133, "step": 13621 }, { - "epoch": 0.3865493757094211, + "epoch": 0.38601263849924905, "grad_norm": 0.0, - "learning_rate": 1.4041658019674405e-05, - "loss": 0.9742, + "learning_rate": 1.4057302659436981e-05, + "loss": 0.8901, "step": 13622 }, { - "epoch": 0.386577752553916, + "epoch": 0.38604097594151154, "grad_norm": 0.0, - "learning_rate": 1.4040817335001608e-05, - "loss": 0.9011, + "learning_rate": 1.4056463789513425e-05, + "loss": 0.8901, "step": 13623 }, { - "epoch": 0.3866061293984109, + "epoch": 0.386069313383774, "grad_norm": 0.0, - "learning_rate": 1.4039976616195848e-05, - "loss": 0.9728, + "learning_rate": 1.405562488542077e-05, + "loss": 0.934, "step": 13624 }, { - "epoch": 0.3866345062429058, + "epoch": 0.3860976508260364, "grad_norm": 0.0, - "learning_rate": 1.4039135863264221e-05, - "loss": 0.9106, + "learning_rate": 1.4054785947166079e-05, + "loss": 0.8967, "step": 13625 }, { - "epoch": 0.38666288308740066, + "epoch": 0.3861259882682989, "grad_norm": 0.0, - "learning_rate": 1.4038295076213833e-05, - "loss": 0.9235, + "learning_rate": 1.405394697475642e-05, + "loss": 0.9272, "step": 13626 }, { - "epoch": 0.38669125993189557, + "epoch": 0.38615432571056135, "grad_norm": 0.0, - "learning_rate": 1.4037454255051786e-05, - "loss": 0.9141, + "learning_rate": 1.4053107968198862e-05, + "loss": 0.8993, "step": 13627 }, { - "epoch": 0.3867196367763905, + "epoch": 0.38618266315282385, "grad_norm": 0.0, - "learning_rate": 1.403661339978518e-05, - "loss": 0.86, + "learning_rate": 1.405226892750047e-05, + "loss": 0.8665, "step": 13628 }, { - "epoch": 0.38674801362088534, + "epoch": 0.3862110005950863, "grad_norm": 0.0, - "learning_rate": 1.403577251042112e-05, - "loss": 0.8888, + "learning_rate": 1.4051429852668312e-05, + "loss": 1.0268, "step": 13629 }, { - "epoch": 0.38677639046538026, + "epoch": 0.3862393380373487, "grad_norm": 0.0, - "learning_rate": 1.4034931586966707e-05, - "loss": 0.9783, + "learning_rate": 1.4050590743709456e-05, + "loss": 0.9514, "step": 13630 }, { - "epoch": 0.3868047673098751, + "epoch": 0.3862676754796112, "grad_norm": 0.0, - "learning_rate": 1.4034090629429047e-05, - "loss": 1.0879, + "learning_rate": 1.4049751600630968e-05, + "loss": 1.025, "step": 13631 }, { - "epoch": 0.38683314415437003, + "epoch": 0.38629601292187365, "grad_norm": 0.0, - "learning_rate": 1.4033249637815242e-05, - "loss": 0.8742, + "learning_rate": 1.4048912423439917e-05, + "loss": 0.8795, "step": 13632 }, { - "epoch": 0.38686152099886495, + "epoch": 0.38632435036413615, "grad_norm": 0.0, - "learning_rate": 1.4032408612132395e-05, - "loss": 0.8358, + "learning_rate": 1.4048073212143379e-05, + "loss": 0.8649, "step": 13633 }, { - "epoch": 0.3868898978433598, + "epoch": 0.3863526878063986, "grad_norm": 0.0, - "learning_rate": 1.4031567552387613e-05, - "loss": 0.8784, + "learning_rate": 1.4047233966748415e-05, + "loss": 0.8284, "step": 13634 }, { - "epoch": 0.3869182746878547, + "epoch": 0.3863810252486611, "grad_norm": 0.0, - "learning_rate": 1.4030726458587997e-05, - "loss": 0.918, + "learning_rate": 1.4046394687262095e-05, + "loss": 0.8531, "step": 13635 }, { - "epoch": 0.3869466515323496, + "epoch": 0.3864093626909235, "grad_norm": 0.0, - "learning_rate": 1.4029885330740652e-05, - "loss": 0.9112, + "learning_rate": 1.404555537369149e-05, + "loss": 1.0927, "step": 13636 }, { - "epoch": 0.3869750283768445, + "epoch": 0.38643770013318596, "grad_norm": 0.0, - "learning_rate": 1.4029044168852689e-05, - "loss": 0.8746, + "learning_rate": 1.404471602604367e-05, + "loss": 0.9728, "step": 13637 }, { - "epoch": 0.3870034052213394, + "epoch": 0.38646603757544845, "grad_norm": 0.0, - "learning_rate": 1.4028202972931206e-05, - "loss": 0.9448, + "learning_rate": 1.4043876644325705e-05, + "loss": 0.931, "step": 13638 }, { - "epoch": 0.3870317820658343, + "epoch": 0.3864943750177109, "grad_norm": 0.0, - "learning_rate": 1.402736174298331e-05, - "loss": 0.8313, + "learning_rate": 1.4043037228544667e-05, + "loss": 0.9721, "step": 13639 }, { - "epoch": 0.3870601589103292, + "epoch": 0.3865227124599734, "grad_norm": 0.0, - "learning_rate": 1.402652047901611e-05, - "loss": 0.8634, + "learning_rate": 1.4042197778707622e-05, + "loss": 0.9466, "step": 13640 }, { - "epoch": 0.38708853575482405, + "epoch": 0.3865510499022358, "grad_norm": 0.0, - "learning_rate": 1.402567918103671e-05, - "loss": 0.8962, + "learning_rate": 1.4041358294821646e-05, + "loss": 0.9527, "step": 13641 }, { - "epoch": 0.38711691259931896, + "epoch": 0.38657938734449826, "grad_norm": 0.0, - "learning_rate": 1.4024837849052219e-05, - "loss": 0.8576, + "learning_rate": 1.404051877689381e-05, + "loss": 0.9292, "step": 13642 }, { - "epoch": 0.3871452894438138, + "epoch": 0.38660772478676075, "grad_norm": 0.0, - "learning_rate": 1.4023996483069738e-05, - "loss": 1.0539, + "learning_rate": 1.4039679224931183e-05, + "loss": 0.9655, "step": 13643 }, { - "epoch": 0.38717366628830874, + "epoch": 0.3866360622290232, "grad_norm": 0.0, - "learning_rate": 1.402315508309638e-05, - "loss": 0.9715, + "learning_rate": 1.4038839638940835e-05, + "loss": 0.9444, "step": 13644 }, { - "epoch": 0.38720204313280365, + "epoch": 0.3866643996712857, "grad_norm": 0.0, - "learning_rate": 1.4022313649139252e-05, - "loss": 0.9398, + "learning_rate": 1.403800001892984e-05, + "loss": 1.0105, "step": 13645 }, { - "epoch": 0.3872304199772985, + "epoch": 0.3866927371135481, "grad_norm": 0.0, - "learning_rate": 1.4021472181205455e-05, - "loss": 0.8498, + "learning_rate": 1.4037160364905276e-05, + "loss": 0.8611, "step": 13646 }, { - "epoch": 0.3872587968217934, + "epoch": 0.3867210745558106, "grad_norm": 0.0, - "learning_rate": 1.4020630679302105e-05, - "loss": 0.9058, + "learning_rate": 1.403632067687421e-05, + "loss": 0.8508, "step": 13647 }, { - "epoch": 0.3872871736662883, + "epoch": 0.38674941199807306, "grad_norm": 0.0, - "learning_rate": 1.401978914343631e-05, - "loss": 0.9099, + "learning_rate": 1.4035480954843714e-05, + "loss": 1.009, "step": 13648 }, { - "epoch": 0.3873155505107832, + "epoch": 0.3867777494403355, "grad_norm": 0.0, - "learning_rate": 1.401894757361517e-05, - "loss": 0.9833, + "learning_rate": 1.4034641198820866e-05, + "loss": 0.9417, "step": 13649 }, { - "epoch": 0.3873439273552781, + "epoch": 0.386806086882598, "grad_norm": 0.0, - "learning_rate": 1.4018105969845798e-05, - "loss": 0.8356, + "learning_rate": 1.4033801408812738e-05, + "loss": 0.9733, "step": 13650 }, { - "epoch": 0.387372304199773, + "epoch": 0.3868344243248604, "grad_norm": 0.0, - "learning_rate": 1.401726433213531e-05, - "loss": 0.9886, + "learning_rate": 1.4032961584826396e-05, + "loss": 0.9963, "step": 13651 }, { - "epoch": 0.3874006810442679, + "epoch": 0.3868627617671229, "grad_norm": 0.0, - "learning_rate": 1.4016422660490807e-05, - "loss": 0.8752, + "learning_rate": 1.4032121726868926e-05, + "loss": 0.8775, "step": 13652 }, { - "epoch": 0.38742905788876275, + "epoch": 0.38689109920938536, "grad_norm": 0.0, - "learning_rate": 1.4015580954919399e-05, - "loss": 0.9297, + "learning_rate": 1.4031281834947397e-05, + "loss": 0.9271, "step": 13653 }, { - "epoch": 0.38745743473325767, + "epoch": 0.3869194366516478, "grad_norm": 0.0, - "learning_rate": 1.4014739215428202e-05, - "loss": 1.0198, + "learning_rate": 1.4030441909068886e-05, + "loss": 0.7835, "step": 13654 }, { - "epoch": 0.3874858115777526, + "epoch": 0.3869477740939103, "grad_norm": 0.0, - "learning_rate": 1.4013897442024323e-05, - "loss": 0.8653, + "learning_rate": 1.4029601949240464e-05, + "loss": 0.902, "step": 13655 }, { - "epoch": 0.38751418842224744, + "epoch": 0.38697611153617273, "grad_norm": 0.0, - "learning_rate": 1.4013055634714867e-05, - "loss": 1.0004, + "learning_rate": 1.4028761955469206e-05, + "loss": 0.9352, "step": 13656 }, { - "epoch": 0.38754256526674236, + "epoch": 0.3870044489784352, "grad_norm": 0.0, - "learning_rate": 1.4012213793506953e-05, - "loss": 0.9881, + "learning_rate": 1.4027921927762193e-05, + "loss": 0.8247, "step": 13657 }, { - "epoch": 0.3875709421112372, + "epoch": 0.38703278642069766, "grad_norm": 0.0, - "learning_rate": 1.4011371918407686e-05, - "loss": 0.8707, + "learning_rate": 1.4027081866126498e-05, + "loss": 0.9828, "step": 13658 }, { - "epoch": 0.38759931895573213, + "epoch": 0.38706112386296015, "grad_norm": 0.0, - "learning_rate": 1.4010530009424183e-05, - "loss": 1.0424, + "learning_rate": 1.4026241770569198e-05, + "loss": 0.8597, "step": 13659 }, { - "epoch": 0.387627695800227, + "epoch": 0.3870894613052226, "grad_norm": 0.0, - "learning_rate": 1.4009688066563552e-05, - "loss": 0.8535, + "learning_rate": 1.4025401641097365e-05, + "loss": 0.9111, "step": 13660 }, { - "epoch": 0.3876560726447219, + "epoch": 0.38711779874748503, "grad_norm": 0.0, - "learning_rate": 1.4008846089832906e-05, - "loss": 0.9977, + "learning_rate": 1.4024561477718081e-05, + "loss": 1.0266, "step": 13661 }, { - "epoch": 0.3876844494892168, + "epoch": 0.3871461361897475, "grad_norm": 0.0, - "learning_rate": 1.4008004079239358e-05, - "loss": 0.8802, + "learning_rate": 1.4023721280438423e-05, + "loss": 0.8994, "step": 13662 }, { - "epoch": 0.3877128263337117, + "epoch": 0.38717447363200996, "grad_norm": 0.0, - "learning_rate": 1.4007162034790015e-05, - "loss": 0.9321, + "learning_rate": 1.4022881049265465e-05, + "loss": 0.946, "step": 13663 }, { - "epoch": 0.3877412031782066, + "epoch": 0.38720281107427246, "grad_norm": 0.0, - "learning_rate": 1.4006319956491998e-05, - "loss": 0.8367, + "learning_rate": 1.4022040784206284e-05, + "loss": 0.868, "step": 13664 }, { - "epoch": 0.38776958002270145, + "epoch": 0.3872311485165349, "grad_norm": 0.0, - "learning_rate": 1.4005477844352415e-05, - "loss": 1.059, + "learning_rate": 1.4021200485267961e-05, + "loss": 0.9808, "step": 13665 }, { - "epoch": 0.38779795686719637, + "epoch": 0.38725948595879733, "grad_norm": 0.0, - "learning_rate": 1.400463569837838e-05, - "loss": 0.8497, + "learning_rate": 1.4020360152457575e-05, + "loss": 0.9437, "step": 13666 }, { - "epoch": 0.3878263337116913, + "epoch": 0.3872878234010598, "grad_norm": 0.0, - "learning_rate": 1.4003793518577007e-05, - "loss": 0.9682, + "learning_rate": 1.4019519785782201e-05, + "loss": 0.9401, "step": 13667 }, { - "epoch": 0.38785471055618614, + "epoch": 0.38731616084332227, "grad_norm": 0.0, - "learning_rate": 1.400295130495541e-05, - "loss": 0.7921, + "learning_rate": 1.401867938524892e-05, + "loss": 0.9012, "step": 13668 }, { - "epoch": 0.38788308740068106, + "epoch": 0.38734449828558476, "grad_norm": 0.0, - "learning_rate": 1.4002109057520707e-05, - "loss": 0.9578, + "learning_rate": 1.4017838950864808e-05, + "loss": 1.0231, "step": 13669 }, { - "epoch": 0.3879114642451759, + "epoch": 0.3873728357278472, "grad_norm": 0.0, - "learning_rate": 1.4001266776280005e-05, - "loss": 0.916, + "learning_rate": 1.401699848263695e-05, + "loss": 0.9781, "step": 13670 }, { - "epoch": 0.38793984108967083, + "epoch": 0.3874011731701097, "grad_norm": 0.0, - "learning_rate": 1.4000424461240426e-05, - "loss": 0.9869, + "learning_rate": 1.4016157980572418e-05, + "loss": 1.0325, "step": 13671 }, { - "epoch": 0.3879682179341657, + "epoch": 0.38742951061237213, "grad_norm": 0.0, - "learning_rate": 1.3999582112409078e-05, - "loss": 0.9831, + "learning_rate": 1.40153174446783e-05, + "loss": 0.9737, "step": 13672 }, { - "epoch": 0.3879965947786606, + "epoch": 0.38745784805463457, "grad_norm": 0.0, - "learning_rate": 1.3998739729793083e-05, - "loss": 0.9439, + "learning_rate": 1.4014476874961669e-05, + "loss": 0.8338, "step": 13673 }, { - "epoch": 0.3880249716231555, + "epoch": 0.38748618549689706, "grad_norm": 0.0, - "learning_rate": 1.3997897313399552e-05, - "loss": 0.9266, + "learning_rate": 1.4013636271429612e-05, + "loss": 0.8778, "step": 13674 }, { - "epoch": 0.3880533484676504, + "epoch": 0.3875145229391595, "grad_norm": 0.0, - "learning_rate": 1.3997054863235601e-05, - "loss": 1.0042, + "learning_rate": 1.4012795634089205e-05, + "loss": 0.874, "step": 13675 }, { - "epoch": 0.3880817253121453, + "epoch": 0.387542860381422, "grad_norm": 0.0, - "learning_rate": 1.3996212379308351e-05, - "loss": 0.9635, + "learning_rate": 1.4011954962947529e-05, + "loss": 1.0082, "step": 13676 }, { - "epoch": 0.38811010215664016, + "epoch": 0.38757119782368443, "grad_norm": 0.0, - "learning_rate": 1.3995369861624914e-05, - "loss": 0.8843, + "learning_rate": 1.4011114258011667e-05, + "loss": 0.8958, "step": 13677 }, { - "epoch": 0.3881384790011351, + "epoch": 0.38759953526594687, "grad_norm": 0.0, - "learning_rate": 1.399452731019241e-05, - "loss": 0.9915, + "learning_rate": 1.4010273519288698e-05, + "loss": 0.797, "step": 13678 }, { - "epoch": 0.38816685584563, + "epoch": 0.38762787270820936, "grad_norm": 0.0, - "learning_rate": 1.3993684725017954e-05, - "loss": 0.9308, + "learning_rate": 1.400943274678571e-05, + "loss": 1.1114, "step": 13679 }, { - "epoch": 0.38819523269012485, + "epoch": 0.3876562101504718, "grad_norm": 0.0, - "learning_rate": 1.399284210610866e-05, - "loss": 0.985, + "learning_rate": 1.400859194050978e-05, + "loss": 0.8456, "step": 13680 }, { - "epoch": 0.38822360953461976, + "epoch": 0.3876845475927343, "grad_norm": 0.0, - "learning_rate": 1.3991999453471651e-05, - "loss": 0.9348, + "learning_rate": 1.4007751100467988e-05, + "loss": 0.9935, "step": 13681 }, { - "epoch": 0.3882519863791146, + "epoch": 0.38771288503499673, "grad_norm": 0.0, - "learning_rate": 1.3991156767114044e-05, - "loss": 0.8997, + "learning_rate": 1.4006910226667425e-05, + "loss": 0.7843, "step": 13682 }, { - "epoch": 0.38828036322360954, + "epoch": 0.38774122247725923, "grad_norm": 0.0, - "learning_rate": 1.3990314047042958e-05, - "loss": 1.0493, + "learning_rate": 1.4006069319115168e-05, + "loss": 1.0035, "step": 13683 }, { - "epoch": 0.38830874006810445, + "epoch": 0.38776955991952167, "grad_norm": 0.0, - "learning_rate": 1.3989471293265506e-05, - "loss": 0.906, + "learning_rate": 1.4005228377818298e-05, + "loss": 0.9869, "step": 13684 }, { - "epoch": 0.3883371169125993, + "epoch": 0.3877978973617841, "grad_norm": 0.0, - "learning_rate": 1.3988628505788814e-05, - "loss": 0.8797, + "learning_rate": 1.4004387402783906e-05, + "loss": 0.8164, "step": 13685 }, { - "epoch": 0.3883654937570942, + "epoch": 0.3878262348040466, "grad_norm": 0.0, - "learning_rate": 1.398778568462e-05, - "loss": 0.9417, + "learning_rate": 1.4003546394019071e-05, + "loss": 0.9493, "step": 13686 }, { - "epoch": 0.3883938706015891, + "epoch": 0.38785457224630904, "grad_norm": 0.0, - "learning_rate": 1.3986942829766175e-05, - "loss": 0.9067, + "learning_rate": 1.4002705351530878e-05, + "loss": 0.9518, "step": 13687 }, { - "epoch": 0.388422247446084, + "epoch": 0.38788290968857153, "grad_norm": 0.0, - "learning_rate": 1.3986099941234468e-05, - "loss": 0.9104, + "learning_rate": 1.4001864275326412e-05, + "loss": 1.0576, "step": 13688 }, { - "epoch": 0.38845062429057886, + "epoch": 0.38791124713083397, "grad_norm": 0.0, - "learning_rate": 1.3985257019031992e-05, - "loss": 0.9542, + "learning_rate": 1.4001023165412754e-05, + "loss": 0.9084, "step": 13689 }, { - "epoch": 0.3884790011350738, + "epoch": 0.3879395845730964, "grad_norm": 0.0, - "learning_rate": 1.3984414063165874e-05, - "loss": 1.0767, + "learning_rate": 1.4000182021796995e-05, + "loss": 0.9953, "step": 13690 }, { - "epoch": 0.3885073779795687, + "epoch": 0.3879679220153589, "grad_norm": 0.0, - "learning_rate": 1.3983571073643232e-05, - "loss": 0.7938, + "learning_rate": 1.3999340844486218e-05, + "loss": 0.9043, "step": 13691 }, { - "epoch": 0.38853575482406355, + "epoch": 0.38799625945762134, "grad_norm": 0.0, - "learning_rate": 1.3982728050471184e-05, - "loss": 0.9866, + "learning_rate": 1.3998499633487509e-05, + "loss": 0.9038, "step": 13692 }, { - "epoch": 0.38856413166855847, + "epoch": 0.38802459689988383, "grad_norm": 0.0, - "learning_rate": 1.3981884993656853e-05, - "loss": 0.9522, + "learning_rate": 1.3997658388807948e-05, + "loss": 0.9988, "step": 13693 }, { - "epoch": 0.3885925085130533, + "epoch": 0.38805293434214627, "grad_norm": 0.0, - "learning_rate": 1.3981041903207364e-05, - "loss": 0.9173, + "learning_rate": 1.3996817110454627e-05, + "loss": 0.8049, "step": 13694 }, { - "epoch": 0.38862088535754824, + "epoch": 0.38808127178440877, "grad_norm": 0.0, - "learning_rate": 1.3980198779129834e-05, - "loss": 0.9335, + "learning_rate": 1.3995975798434636e-05, + "loss": 0.8892, "step": 13695 }, { - "epoch": 0.38864926220204316, + "epoch": 0.3881096092266712, "grad_norm": 0.0, - "learning_rate": 1.3979355621431382e-05, - "loss": 0.9019, + "learning_rate": 1.3995134452755055e-05, + "loss": 0.99, "step": 13696 }, { - "epoch": 0.388677639046538, + "epoch": 0.38813794666893364, "grad_norm": 0.0, - "learning_rate": 1.3978512430119136e-05, - "loss": 0.934, + "learning_rate": 1.399429307342297e-05, + "loss": 1.0135, "step": 13697 }, { - "epoch": 0.38870601589103293, + "epoch": 0.38816628411119614, "grad_norm": 0.0, - "learning_rate": 1.397766920520022e-05, - "loss": 0.9029, + "learning_rate": 1.3993451660445472e-05, + "loss": 0.9403, "step": 13698 }, { - "epoch": 0.3887343927355278, + "epoch": 0.3881946215534586, "grad_norm": 0.0, - "learning_rate": 1.3976825946681748e-05, - "loss": 1.0693, + "learning_rate": 1.3992610213829649e-05, + "loss": 0.8184, "step": 13699 }, { - "epoch": 0.3887627695800227, + "epoch": 0.38822295899572107, "grad_norm": 0.0, - "learning_rate": 1.3975982654570852e-05, - "loss": 0.8519, + "learning_rate": 1.3991768733582589e-05, + "loss": 0.9822, "step": 13700 }, { - "epoch": 0.3887911464245176, + "epoch": 0.3882512964379835, "grad_norm": 0.0, - "learning_rate": 1.3975139328874651e-05, - "loss": 0.9105, + "learning_rate": 1.3990927219711377e-05, + "loss": 0.9311, "step": 13701 }, { - "epoch": 0.3888195232690125, + "epoch": 0.38827963388024594, "grad_norm": 0.0, - "learning_rate": 1.397429596960027e-05, - "loss": 0.9525, + "learning_rate": 1.3990085672223102e-05, + "loss": 0.9012, "step": 13702 }, { - "epoch": 0.3888479001135074, + "epoch": 0.38830797132250844, "grad_norm": 0.0, - "learning_rate": 1.3973452576754833e-05, - "loss": 0.8674, + "learning_rate": 1.3989244091124853e-05, + "loss": 0.9263, "step": 13703 }, { - "epoch": 0.38887627695800225, + "epoch": 0.3883363087647709, "grad_norm": 0.0, - "learning_rate": 1.3972609150345462e-05, - "loss": 0.9175, + "learning_rate": 1.3988402476423722e-05, + "loss": 0.8907, "step": 13704 }, { - "epoch": 0.38890465380249717, + "epoch": 0.38836464620703337, "grad_norm": 0.0, - "learning_rate": 1.3971765690379284e-05, - "loss": 0.901, + "learning_rate": 1.3987560828126796e-05, + "loss": 0.8995, "step": 13705 }, { - "epoch": 0.38893303064699203, + "epoch": 0.3883929836492958, "grad_norm": 0.0, - "learning_rate": 1.397092219686342e-05, - "loss": 1.0338, + "learning_rate": 1.3986719146241163e-05, + "loss": 0.96, "step": 13706 }, { - "epoch": 0.38896140749148694, + "epoch": 0.3884213210915583, "grad_norm": 0.0, - "learning_rate": 1.3970078669805001e-05, - "loss": 0.9414, + "learning_rate": 1.3985877430773916e-05, + "loss": 1.0428, "step": 13707 }, { - "epoch": 0.38898978433598186, + "epoch": 0.38844965853382074, "grad_norm": 0.0, - "learning_rate": 1.3969235109211149e-05, - "loss": 0.9027, + "learning_rate": 1.3985035681732141e-05, + "loss": 0.8971, "step": 13708 }, { - "epoch": 0.3890181611804767, + "epoch": 0.3884779959760832, "grad_norm": 0.0, - "learning_rate": 1.3968391515088987e-05, - "loss": 0.9025, + "learning_rate": 1.3984193899122932e-05, + "loss": 0.9667, "step": 13709 }, { - "epoch": 0.38904653802497163, + "epoch": 0.3885063334183457, "grad_norm": 0.0, - "learning_rate": 1.3967547887445645e-05, - "loss": 1.0408, + "learning_rate": 1.3983352082953378e-05, + "loss": 1.0591, "step": 13710 }, { - "epoch": 0.3890749148694665, + "epoch": 0.3885346708606081, "grad_norm": 0.0, - "learning_rate": 1.3966704226288247e-05, - "loss": 0.9308, + "learning_rate": 1.3982510233230569e-05, + "loss": 0.975, "step": 13711 }, { - "epoch": 0.3891032917139614, + "epoch": 0.3885630083028706, "grad_norm": 0.0, - "learning_rate": 1.396586053162392e-05, - "loss": 0.8612, + "learning_rate": 1.3981668349961599e-05, + "loss": 0.8703, "step": 13712 }, { - "epoch": 0.3891316685584563, + "epoch": 0.38859134574513304, "grad_norm": 0.0, - "learning_rate": 1.3965016803459792e-05, - "loss": 0.9675, + "learning_rate": 1.3980826433153558e-05, + "loss": 0.8595, "step": 13713 }, { - "epoch": 0.3891600454029512, + "epoch": 0.3886196831873955, "grad_norm": 0.0, - "learning_rate": 1.3964173041802986e-05, - "loss": 0.952, + "learning_rate": 1.3979984482813538e-05, + "loss": 0.9631, "step": 13714 }, { - "epoch": 0.3891884222474461, + "epoch": 0.388648020629658, "grad_norm": 0.0, - "learning_rate": 1.3963329246660636e-05, - "loss": 0.9402, + "learning_rate": 1.397914249894863e-05, + "loss": 0.9213, "step": 13715 }, { - "epoch": 0.38921679909194096, + "epoch": 0.3886763580719204, "grad_norm": 0.0, - "learning_rate": 1.3962485418039865e-05, - "loss": 0.9987, + "learning_rate": 1.3978300481565928e-05, + "loss": 0.8781, "step": 13716 }, { - "epoch": 0.38924517593643587, + "epoch": 0.3887046955141829, "grad_norm": 0.0, - "learning_rate": 1.3961641555947797e-05, - "loss": 0.9807, + "learning_rate": 1.3977458430672521e-05, + "loss": 0.9669, "step": 13717 }, { - "epoch": 0.3892735527809308, + "epoch": 0.38873303295644535, "grad_norm": 0.0, - "learning_rate": 1.396079766039157e-05, - "loss": 0.8969, + "learning_rate": 1.3976616346275505e-05, + "loss": 0.8518, "step": 13718 }, { - "epoch": 0.38930192962542565, + "epoch": 0.38876137039870784, "grad_norm": 0.0, - "learning_rate": 1.3959953731378305e-05, - "loss": 0.9177, + "learning_rate": 1.3975774228381975e-05, + "loss": 0.9113, "step": 13719 }, { - "epoch": 0.38933030646992056, + "epoch": 0.3887897078409703, "grad_norm": 0.0, - "learning_rate": 1.3959109768915133e-05, - "loss": 0.9459, + "learning_rate": 1.3974932076999023e-05, + "loss": 1.0074, "step": 13720 }, { - "epoch": 0.3893586833144154, + "epoch": 0.3888180452832327, "grad_norm": 0.0, - "learning_rate": 1.3958265773009184e-05, - "loss": 0.9168, + "learning_rate": 1.3974089892133742e-05, + "loss": 0.9429, "step": 13721 }, { - "epoch": 0.38938706015891034, + "epoch": 0.3888463827254952, "grad_norm": 0.0, - "learning_rate": 1.3957421743667582e-05, - "loss": 1.0418, + "learning_rate": 1.3973247673793226e-05, + "loss": 0.9314, "step": 13722 }, { - "epoch": 0.3894154370034052, + "epoch": 0.38887472016775765, "grad_norm": 0.0, - "learning_rate": 1.3956577680897465e-05, - "loss": 0.9589, + "learning_rate": 1.3972405421984568e-05, + "loss": 0.9099, "step": 13723 }, { - "epoch": 0.3894438138479001, + "epoch": 0.38890305761002014, "grad_norm": 0.0, - "learning_rate": 1.3955733584705957e-05, - "loss": 1.0278, + "learning_rate": 1.397156313671486e-05, + "loss": 0.9226, "step": 13724 }, { - "epoch": 0.389472190692395, + "epoch": 0.3889313950522826, "grad_norm": 0.0, - "learning_rate": 1.3954889455100192e-05, - "loss": 1.0245, + "learning_rate": 1.3970720817991208e-05, + "loss": 0.9129, "step": 13725 }, { - "epoch": 0.3895005675368899, + "epoch": 0.388959732494545, "grad_norm": 0.0, - "learning_rate": 1.3954045292087294e-05, - "loss": 1.0445, + "learning_rate": 1.3969878465820697e-05, + "loss": 1.006, "step": 13726 }, { - "epoch": 0.3895289443813848, + "epoch": 0.3889880699368075, "grad_norm": 0.0, - "learning_rate": 1.39532010956744e-05, - "loss": 0.8757, + "learning_rate": 1.3969036080210425e-05, + "loss": 0.9505, "step": 13727 }, { - "epoch": 0.38955732122587966, + "epoch": 0.38901640737906995, "grad_norm": 0.0, - "learning_rate": 1.3952356865868634e-05, - "loss": 0.7472, + "learning_rate": 1.396819366116749e-05, + "loss": 0.956, "step": 13728 }, { - "epoch": 0.3895856980703746, + "epoch": 0.38904474482133244, "grad_norm": 0.0, - "learning_rate": 1.3951512602677137e-05, - "loss": 0.8171, + "learning_rate": 1.3967351208698985e-05, + "loss": 0.9182, "step": 13729 }, { - "epoch": 0.3896140749148695, + "epoch": 0.3890730822635949, "grad_norm": 0.0, - "learning_rate": 1.3950668306107035e-05, - "loss": 0.9558, + "learning_rate": 1.3966508722812009e-05, + "loss": 1.0106, "step": 13730 }, { - "epoch": 0.38964245175936435, + "epoch": 0.3891014197058574, "grad_norm": 0.0, - "learning_rate": 1.3949823976165459e-05, - "loss": 0.99, + "learning_rate": 1.3965666203513653e-05, + "loss": 0.8907, "step": 13731 }, { - "epoch": 0.38967082860385927, + "epoch": 0.3891297571481198, "grad_norm": 0.0, - "learning_rate": 1.3948979612859543e-05, - "loss": 1.0927, + "learning_rate": 1.3964823650811021e-05, + "loss": 0.9812, "step": 13732 }, { - "epoch": 0.3896992054483541, + "epoch": 0.38915809459038225, "grad_norm": 0.0, - "learning_rate": 1.3948135216196419e-05, - "loss": 0.9189, + "learning_rate": 1.396398106471121e-05, + "loss": 0.9588, "step": 13733 }, { - "epoch": 0.38972758229284904, + "epoch": 0.38918643203264475, "grad_norm": 0.0, - "learning_rate": 1.3947290786183219e-05, - "loss": 1.0226, + "learning_rate": 1.3963138445221311e-05, + "loss": 0.9173, "step": 13734 }, { - "epoch": 0.38975595913734395, + "epoch": 0.3892147694749072, "grad_norm": 0.0, - "learning_rate": 1.3946446322827074e-05, - "loss": 0.8744, + "learning_rate": 1.3962295792348424e-05, + "loss": 0.8969, "step": 13735 }, { - "epoch": 0.3897843359818388, + "epoch": 0.3892431069171697, "grad_norm": 0.0, - "learning_rate": 1.3945601826135122e-05, - "loss": 0.9576, + "learning_rate": 1.396145310609965e-05, + "loss": 0.9332, "step": 13736 }, { - "epoch": 0.38981271282633373, + "epoch": 0.3892714443594321, "grad_norm": 0.0, - "learning_rate": 1.3944757296114495e-05, - "loss": 0.9943, + "learning_rate": 1.3960610386482085e-05, + "loss": 0.8614, "step": 13737 }, { - "epoch": 0.3898410896708286, + "epoch": 0.38929978180169456, "grad_norm": 0.0, - "learning_rate": 1.3943912732772324e-05, - "loss": 1.0063, + "learning_rate": 1.3959767633502827e-05, + "loss": 0.8787, "step": 13738 }, { - "epoch": 0.3898694665153235, + "epoch": 0.38932811924395705, "grad_norm": 0.0, - "learning_rate": 1.3943068136115745e-05, - "loss": 1.0759, + "learning_rate": 1.3958924847168977e-05, + "loss": 0.8562, "step": 13739 }, { - "epoch": 0.38989784335981836, + "epoch": 0.3893564566862195, "grad_norm": 0.0, - "learning_rate": 1.3942223506151895e-05, - "loss": 1.0068, + "learning_rate": 1.3958082027487634e-05, + "loss": 0.8196, "step": 13740 }, { - "epoch": 0.3899262202043133, + "epoch": 0.389384794128482, "grad_norm": 0.0, - "learning_rate": 1.39413788428879e-05, - "loss": 0.8505, + "learning_rate": 1.3957239174465898e-05, + "loss": 0.8618, "step": 13741 }, { - "epoch": 0.3899545970488082, + "epoch": 0.3894131315707444, "grad_norm": 0.0, - "learning_rate": 1.3940534146330906e-05, - "loss": 0.8435, + "learning_rate": 1.395639628811086e-05, + "loss": 0.9422, "step": 13742 }, { - "epoch": 0.38998297389330305, + "epoch": 0.3894414690130069, "grad_norm": 0.0, - "learning_rate": 1.393968941648804e-05, - "loss": 0.9496, + "learning_rate": 1.395555336842963e-05, + "loss": 0.9406, "step": 13743 }, { - "epoch": 0.39001135073779797, + "epoch": 0.38946980645526935, "grad_norm": 0.0, - "learning_rate": 1.393884465336644e-05, - "loss": 0.973, + "learning_rate": 1.3954710415429307e-05, + "loss": 0.9426, "step": 13744 }, { - "epoch": 0.39003972758229283, + "epoch": 0.3894981438975318, "grad_norm": 0.0, - "learning_rate": 1.3937999856973243e-05, - "loss": 0.9851, + "learning_rate": 1.3953867429116991e-05, + "loss": 0.9608, "step": 13745 }, { - "epoch": 0.39006810442678774, + "epoch": 0.3895264813397943, "grad_norm": 0.0, - "learning_rate": 1.3937155027315586e-05, - "loss": 0.9157, + "learning_rate": 1.395302440949978e-05, + "loss": 0.959, "step": 13746 }, { - "epoch": 0.39009648127128266, + "epoch": 0.3895548187820567, "grad_norm": 0.0, - "learning_rate": 1.39363101644006e-05, - "loss": 0.9643, + "learning_rate": 1.3952181356584773e-05, + "loss": 0.922, "step": 13747 }, { - "epoch": 0.3901248581157775, + "epoch": 0.3895831562243192, "grad_norm": 0.0, - "learning_rate": 1.3935465268235429e-05, - "loss": 0.8305, + "learning_rate": 1.395133827037908e-05, + "loss": 0.8969, "step": 13748 }, { - "epoch": 0.39015323496027243, + "epoch": 0.38961149366658165, "grad_norm": 0.0, - "learning_rate": 1.3934620338827204e-05, - "loss": 0.8918, + "learning_rate": 1.3950495150889793e-05, + "loss": 0.8736, "step": 13749 }, { - "epoch": 0.3901816118047673, + "epoch": 0.3896398311088441, "grad_norm": 0.0, - "learning_rate": 1.3933775376183065e-05, - "loss": 0.8395, + "learning_rate": 1.394965199812402e-05, + "loss": 0.9618, "step": 13750 }, { - "epoch": 0.3902099886492622, + "epoch": 0.3896681685511066, "grad_norm": 0.0, - "learning_rate": 1.3932930380310145e-05, - "loss": 0.9103, + "learning_rate": 1.3948808812088863e-05, + "loss": 0.9999, "step": 13751 }, { - "epoch": 0.39023836549375707, + "epoch": 0.389696505993369, "grad_norm": 0.0, - "learning_rate": 1.393208535121559e-05, - "loss": 0.9324, + "learning_rate": 1.394796559279142e-05, + "loss": 0.9297, "step": 13752 }, { - "epoch": 0.390266742338252, + "epoch": 0.3897248434356315, "grad_norm": 0.0, - "learning_rate": 1.3931240288906534e-05, - "loss": 0.9179, + "learning_rate": 1.39471223402388e-05, + "loss": 0.8285, "step": 13753 }, { - "epoch": 0.3902951191827469, + "epoch": 0.38975318087789396, "grad_norm": 0.0, - "learning_rate": 1.393039519339011e-05, - "loss": 0.8386, + "learning_rate": 1.3946279054438103e-05, + "loss": 0.9725, "step": 13754 }, { - "epoch": 0.39032349602724176, + "epoch": 0.38978151832015645, "grad_norm": 0.0, - "learning_rate": 1.3929550064673462e-05, - "loss": 0.9775, + "learning_rate": 1.3945435735396428e-05, + "loss": 0.8223, "step": 13755 }, { - "epoch": 0.39035187287173667, + "epoch": 0.3898098557624189, "grad_norm": 0.0, - "learning_rate": 1.3928704902763733e-05, - "loss": 0.9537, + "learning_rate": 1.3944592383120885e-05, + "loss": 0.9855, "step": 13756 }, { - "epoch": 0.39038024971623153, + "epoch": 0.3898381932046813, "grad_norm": 0.0, - "learning_rate": 1.392785970766805e-05, - "loss": 0.9454, + "learning_rate": 1.3943748997618576e-05, + "loss": 0.825, "step": 13757 }, { - "epoch": 0.39040862656072645, + "epoch": 0.3898665306469438, "grad_norm": 0.0, - "learning_rate": 1.392701447939356e-05, - "loss": 0.8869, + "learning_rate": 1.3942905578896606e-05, + "loss": 1.0316, "step": 13758 }, { - "epoch": 0.39043700340522136, + "epoch": 0.38989486808920626, "grad_norm": 0.0, - "learning_rate": 1.3926169217947407e-05, - "loss": 0.8387, + "learning_rate": 1.3942062126962078e-05, + "loss": 0.8985, "step": 13759 }, { - "epoch": 0.3904653802497162, + "epoch": 0.38992320553146875, "grad_norm": 0.0, - "learning_rate": 1.3925323923336724e-05, - "loss": 0.9605, + "learning_rate": 1.3941218641822094e-05, + "loss": 0.9991, "step": 13760 }, { - "epoch": 0.39049375709421114, + "epoch": 0.3899515429737312, "grad_norm": 0.0, - "learning_rate": 1.3924478595568654e-05, - "loss": 0.964, + "learning_rate": 1.3940375123483764e-05, + "loss": 0.9038, "step": 13761 }, { - "epoch": 0.390522133938706, + "epoch": 0.38997988041599363, "grad_norm": 0.0, - "learning_rate": 1.3923633234650336e-05, - "loss": 0.899, + "learning_rate": 1.393953157195419e-05, + "loss": 0.9136, "step": 13762 }, { - "epoch": 0.3905505107832009, + "epoch": 0.3900082178582561, "grad_norm": 0.0, - "learning_rate": 1.3922787840588913e-05, - "loss": 0.9016, + "learning_rate": 1.393868798724048e-05, + "loss": 0.8847, "step": 13763 }, { - "epoch": 0.3905788876276958, + "epoch": 0.39003655530051856, "grad_norm": 0.0, - "learning_rate": 1.3921942413391522e-05, - "loss": 0.9705, + "learning_rate": 1.3937844369349736e-05, + "loss": 1.0137, "step": 13764 }, { - "epoch": 0.3906072644721907, + "epoch": 0.39006489274278106, "grad_norm": 0.0, - "learning_rate": 1.392109695306531e-05, - "loss": 0.9413, + "learning_rate": 1.393700071828907e-05, + "loss": 0.9993, "step": 13765 }, { - "epoch": 0.3906356413166856, + "epoch": 0.3900932301850435, "grad_norm": 0.0, - "learning_rate": 1.3920251459617413e-05, - "loss": 0.9825, + "learning_rate": 1.3936157034065583e-05, + "loss": 0.9804, "step": 13766 }, { - "epoch": 0.39066401816118046, + "epoch": 0.39012156762730593, "grad_norm": 0.0, - "learning_rate": 1.3919405933054977e-05, - "loss": 0.8273, + "learning_rate": 1.3935313316686385e-05, + "loss": 0.8647, "step": 13767 }, { - "epoch": 0.3906923950056754, + "epoch": 0.3901499050695684, "grad_norm": 0.0, - "learning_rate": 1.3918560373385144e-05, - "loss": 0.8929, + "learning_rate": 1.3934469566158579e-05, + "loss": 0.9755, "step": 13768 }, { - "epoch": 0.39072077185017023, + "epoch": 0.39017824251183086, "grad_norm": 0.0, - "learning_rate": 1.3917714780615055e-05, - "loss": 1.014, + "learning_rate": 1.3933625782489275e-05, + "loss": 0.8684, "step": 13769 }, { - "epoch": 0.39074914869466515, + "epoch": 0.39020657995409336, "grad_norm": 0.0, - "learning_rate": 1.3916869154751853e-05, - "loss": 0.8862, + "learning_rate": 1.3932781965685583e-05, + "loss": 0.9484, "step": 13770 }, { - "epoch": 0.39077752553916006, + "epoch": 0.3902349173963558, "grad_norm": 0.0, - "learning_rate": 1.391602349580268e-05, - "loss": 0.84, + "learning_rate": 1.3931938115754606e-05, + "loss": 0.9286, "step": 13771 }, { - "epoch": 0.3908059023836549, + "epoch": 0.3902632548386183, "grad_norm": 0.0, - "learning_rate": 1.3915177803774682e-05, - "loss": 0.9004, + "learning_rate": 1.3931094232703456e-05, + "loss": 0.9002, "step": 13772 }, { - "epoch": 0.39083427922814984, + "epoch": 0.39029159228088073, "grad_norm": 0.0, - "learning_rate": 1.3914332078675e-05, - "loss": 0.8896, + "learning_rate": 1.3930250316539237e-05, + "loss": 0.9959, "step": 13773 }, { - "epoch": 0.3908626560726447, + "epoch": 0.39031992972314317, "grad_norm": 0.0, - "learning_rate": 1.3913486320510779e-05, - "loss": 0.8788, + "learning_rate": 1.3929406367269063e-05, + "loss": 0.9783, "step": 13774 }, { - "epoch": 0.3908910329171396, + "epoch": 0.39034826716540566, "grad_norm": 0.0, - "learning_rate": 1.3912640529289163e-05, - "loss": 0.8906, + "learning_rate": 1.3928562384900037e-05, + "loss": 1.0173, "step": 13775 }, { - "epoch": 0.39091940976163453, + "epoch": 0.3903766046076681, "grad_norm": 0.0, - "learning_rate": 1.3911794705017299e-05, - "loss": 0.9658, + "learning_rate": 1.3927718369439274e-05, + "loss": 0.8376, "step": 13776 }, { - "epoch": 0.3909477866061294, + "epoch": 0.3904049420499306, "grad_norm": 0.0, - "learning_rate": 1.3910948847702326e-05, - "loss": 0.9262, + "learning_rate": 1.3926874320893878e-05, + "loss": 1.0363, "step": 13777 }, { - "epoch": 0.3909761634506243, + "epoch": 0.39043327949219303, "grad_norm": 0.0, - "learning_rate": 1.3910102957351392e-05, - "loss": 0.952, + "learning_rate": 1.3926030239270962e-05, + "loss": 0.7351, "step": 13778 }, { - "epoch": 0.39100454029511916, + "epoch": 0.39046161693445547, "grad_norm": 0.0, - "learning_rate": 1.3909257033971645e-05, - "loss": 0.886, + "learning_rate": 1.3925186124577639e-05, + "loss": 0.9918, "step": 13779 }, { - "epoch": 0.3910329171396141, + "epoch": 0.39048995437671796, "grad_norm": 0.0, - "learning_rate": 1.390841107757023e-05, - "loss": 0.9382, + "learning_rate": 1.3924341976821013e-05, + "loss": 0.829, "step": 13780 }, { - "epoch": 0.391061293984109, + "epoch": 0.3905182918189804, "grad_norm": 0.0, - "learning_rate": 1.3907565088154286e-05, - "loss": 0.9, + "learning_rate": 1.3923497796008196e-05, + "loss": 0.8241, "step": 13781 }, { - "epoch": 0.39108967082860385, + "epoch": 0.3905466292612429, "grad_norm": 0.0, - "learning_rate": 1.3906719065730965e-05, - "loss": 0.9467, + "learning_rate": 1.39226535821463e-05, + "loss": 0.9079, "step": 13782 }, { - "epoch": 0.39111804767309877, + "epoch": 0.39057496670350533, "grad_norm": 0.0, - "learning_rate": 1.3905873010307413e-05, - "loss": 0.8146, + "learning_rate": 1.3921809335242438e-05, + "loss": 0.8628, "step": 13783 }, { - "epoch": 0.3911464245175936, + "epoch": 0.3906033041457678, "grad_norm": 0.0, - "learning_rate": 1.390502692189078e-05, - "loss": 0.9371, + "learning_rate": 1.3920965055303718e-05, + "loss": 1.0205, "step": 13784 }, { - "epoch": 0.39117480136208854, + "epoch": 0.39063164158803027, "grad_norm": 0.0, - "learning_rate": 1.3904180800488202e-05, - "loss": 0.8558, + "learning_rate": 1.3920120742337255e-05, + "loss": 0.8557, "step": 13785 }, { - "epoch": 0.3912031782065834, + "epoch": 0.3906599790302927, "grad_norm": 0.0, - "learning_rate": 1.3903334646106838e-05, - "loss": 0.9452, + "learning_rate": 1.391927639635016e-05, + "loss": 0.9757, "step": 13786 }, { - "epoch": 0.3912315550510783, + "epoch": 0.3906883164725552, "grad_norm": 0.0, - "learning_rate": 1.3902488458753834e-05, - "loss": 0.9286, + "learning_rate": 1.3918432017349543e-05, + "loss": 0.8494, "step": 13787 }, { - "epoch": 0.39125993189557323, + "epoch": 0.39071665391481764, "grad_norm": 0.0, - "learning_rate": 1.3901642238436326e-05, - "loss": 0.9285, + "learning_rate": 1.3917587605342517e-05, + "loss": 0.9463, "step": 13788 }, { - "epoch": 0.3912883087400681, + "epoch": 0.39074499135708013, "grad_norm": 0.0, - "learning_rate": 1.3900795985161475e-05, - "loss": 0.9262, + "learning_rate": 1.3916743160336197e-05, + "loss": 0.7977, "step": 13789 }, { - "epoch": 0.391316685584563, + "epoch": 0.39077332879934257, "grad_norm": 0.0, - "learning_rate": 1.3899949698936426e-05, - "loss": 0.9041, + "learning_rate": 1.3915898682337697e-05, + "loss": 0.8536, "step": 13790 }, { - "epoch": 0.39134506242905787, + "epoch": 0.390801666241605, "grad_norm": 0.0, - "learning_rate": 1.3899103379768327e-05, - "loss": 0.9006, + "learning_rate": 1.3915054171354126e-05, + "loss": 0.8854, "step": 13791 }, { - "epoch": 0.3913734392735528, + "epoch": 0.3908300036838675, "grad_norm": 0.0, - "learning_rate": 1.3898257027664324e-05, - "loss": 0.8718, + "learning_rate": 1.39142096273926e-05, + "loss": 0.956, "step": 13792 }, { - "epoch": 0.3914018161180477, + "epoch": 0.39085834112612994, "grad_norm": 0.0, - "learning_rate": 1.389741064263157e-05, - "loss": 0.9016, + "learning_rate": 1.3913365050460232e-05, + "loss": 1.0444, "step": 13793 }, { - "epoch": 0.39143019296254256, + "epoch": 0.39088667856839243, "grad_norm": 0.0, - "learning_rate": 1.3896564224677214e-05, - "loss": 0.9479, + "learning_rate": 1.3912520440564139e-05, + "loss": 0.7748, "step": 13794 }, { - "epoch": 0.39145856980703747, + "epoch": 0.39091501601065487, "grad_norm": 0.0, - "learning_rate": 1.3895717773808402e-05, - "loss": 1.0021, + "learning_rate": 1.3911675797711431e-05, + "loss": 0.9086, "step": 13795 }, { - "epoch": 0.39148694665153233, + "epoch": 0.39094335345291736, "grad_norm": 0.0, - "learning_rate": 1.3894871290032286e-05, - "loss": 0.8148, + "learning_rate": 1.3910831121909228e-05, + "loss": 0.9731, "step": 13796 }, { - "epoch": 0.39151532349602725, + "epoch": 0.3909716908951798, "grad_norm": 0.0, - "learning_rate": 1.3894024773356021e-05, - "loss": 0.9361, + "learning_rate": 1.3909986413164637e-05, + "loss": 0.974, "step": 13797 }, { - "epoch": 0.39154370034052216, + "epoch": 0.39100002833744224, "grad_norm": 0.0, - "learning_rate": 1.3893178223786751e-05, - "loss": 1.0211, + "learning_rate": 1.3909141671484785e-05, + "loss": 0.942, "step": 13798 }, { - "epoch": 0.391572077185017, + "epoch": 0.39102836577970473, "grad_norm": 0.0, - "learning_rate": 1.3892331641331632e-05, - "loss": 0.9157, + "learning_rate": 1.3908296896876778e-05, + "loss": 0.8351, "step": 13799 }, { - "epoch": 0.39160045402951194, + "epoch": 0.3910567032219672, "grad_norm": 0.0, - "learning_rate": 1.3891485025997813e-05, - "loss": 0.8831, + "learning_rate": 1.3907452089347738e-05, + "loss": 0.9597, "step": 13800 }, { - "epoch": 0.3916288308740068, + "epoch": 0.39108504066422967, "grad_norm": 0.0, - "learning_rate": 1.3890638377792442e-05, - "loss": 0.907, + "learning_rate": 1.3906607248904773e-05, + "loss": 0.7528, "step": 13801 }, { - "epoch": 0.3916572077185017, + "epoch": 0.3911133781064921, "grad_norm": 0.0, - "learning_rate": 1.3889791696722676e-05, - "loss": 0.9561, + "learning_rate": 1.3905762375555006e-05, + "loss": 1.1571, "step": 13802 }, { - "epoch": 0.39168558456299657, + "epoch": 0.39114171554875454, "grad_norm": 0.0, - "learning_rate": 1.3888944982795665e-05, - "loss": 0.9609, + "learning_rate": 1.3904917469305555e-05, + "loss": 0.7882, "step": 13803 }, { - "epoch": 0.3917139614074915, + "epoch": 0.39117005299101704, "grad_norm": 0.0, - "learning_rate": 1.388809823601856e-05, - "loss": 1.0447, + "learning_rate": 1.3904072530163532e-05, + "loss": 0.9209, "step": 13804 }, { - "epoch": 0.3917423382519864, + "epoch": 0.3911983904332795, "grad_norm": 0.0, - "learning_rate": 1.3887251456398512e-05, - "loss": 1.0009, + "learning_rate": 1.3903227558136057e-05, + "loss": 0.9428, "step": 13805 }, { - "epoch": 0.39177071509648126, + "epoch": 0.39122672787554197, "grad_norm": 0.0, - "learning_rate": 1.3886404643942679e-05, - "loss": 0.9053, + "learning_rate": 1.3902382553230245e-05, + "loss": 0.8962, "step": 13806 }, { - "epoch": 0.3917990919409762, + "epoch": 0.3912550653178044, "grad_norm": 0.0, - "learning_rate": 1.3885557798658212e-05, - "loss": 0.8792, + "learning_rate": 1.3901537515453218e-05, + "loss": 0.9579, "step": 13807 }, { - "epoch": 0.39182746878547103, + "epoch": 0.3912834027600669, "grad_norm": 0.0, - "learning_rate": 1.388471092055226e-05, - "loss": 0.971, + "learning_rate": 1.390069244481209e-05, + "loss": 0.8192, "step": 13808 }, { - "epoch": 0.39185584562996595, + "epoch": 0.39131174020232934, "grad_norm": 0.0, - "learning_rate": 1.3883864009631984e-05, - "loss": 0.9134, + "learning_rate": 1.3899847341313982e-05, + "loss": 0.8841, "step": 13809 }, { - "epoch": 0.39188422247446086, + "epoch": 0.3913400776445918, "grad_norm": 0.0, - "learning_rate": 1.3883017065904534e-05, - "loss": 0.9606, + "learning_rate": 1.389900220496601e-05, + "loss": 0.9464, "step": 13810 }, { - "epoch": 0.3919125993189557, + "epoch": 0.39136841508685427, "grad_norm": 0.0, - "learning_rate": 1.388217008937706e-05, - "loss": 1.1116, + "learning_rate": 1.3898157035775296e-05, + "loss": 0.8945, "step": 13811 }, { - "epoch": 0.39194097616345064, + "epoch": 0.3913967525291167, "grad_norm": 0.0, - "learning_rate": 1.3881323080056723e-05, - "loss": 0.8625, + "learning_rate": 1.3897311833748958e-05, + "loss": 1.095, "step": 13812 }, { - "epoch": 0.3919693530079455, + "epoch": 0.3914250899713792, "grad_norm": 0.0, - "learning_rate": 1.3880476037950676e-05, - "loss": 0.9318, + "learning_rate": 1.3896466598894115e-05, + "loss": 0.8892, "step": 13813 }, { - "epoch": 0.3919977298524404, + "epoch": 0.39145342741364164, "grad_norm": 0.0, - "learning_rate": 1.3879628963066076e-05, - "loss": 1.0018, + "learning_rate": 1.3895621331217887e-05, + "loss": 0.8766, "step": 13814 }, { - "epoch": 0.39202610669693533, + "epoch": 0.3914817648559041, "grad_norm": 0.0, - "learning_rate": 1.3878781855410072e-05, - "loss": 0.8717, + "learning_rate": 1.3894776030727393e-05, + "loss": 0.9882, "step": 13815 }, { - "epoch": 0.3920544835414302, + "epoch": 0.3915101022981666, "grad_norm": 0.0, - "learning_rate": 1.3877934714989824e-05, - "loss": 0.8972, + "learning_rate": 1.3893930697429756e-05, + "loss": 0.9829, "step": 13816 }, { - "epoch": 0.3920828603859251, + "epoch": 0.391538439740429, "grad_norm": 0.0, - "learning_rate": 1.3877087541812491e-05, - "loss": 0.9429, + "learning_rate": 1.3893085331332094e-05, + "loss": 0.8982, "step": 13817 }, { - "epoch": 0.39211123723041996, + "epoch": 0.3915667771826915, "grad_norm": 0.0, - "learning_rate": 1.387624033588522e-05, - "loss": 0.9256, + "learning_rate": 1.3892239932441526e-05, + "loss": 0.8803, "step": 13818 }, { - "epoch": 0.3921396140749149, + "epoch": 0.39159511462495394, "grad_norm": 0.0, - "learning_rate": 1.3875393097215173e-05, - "loss": 0.9634, + "learning_rate": 1.3891394500765181e-05, + "loss": 0.9166, "step": 13819 }, { - "epoch": 0.39216799091940974, + "epoch": 0.39162345206721644, "grad_norm": 0.0, - "learning_rate": 1.3874545825809509e-05, - "loss": 0.8824, + "learning_rate": 1.389054903631017e-05, + "loss": 0.9916, "step": 13820 }, { - "epoch": 0.39219636776390465, + "epoch": 0.3916517895094789, "grad_norm": 0.0, - "learning_rate": 1.3873698521675383e-05, - "loss": 0.8734, + "learning_rate": 1.3889703539083621e-05, + "loss": 0.9184, "step": 13821 }, { - "epoch": 0.39222474460839957, + "epoch": 0.3916801269517413, "grad_norm": 0.0, - "learning_rate": 1.3872851184819948e-05, - "loss": 0.9262, + "learning_rate": 1.3888858009092653e-05, + "loss": 0.8757, "step": 13822 }, { - "epoch": 0.3922531214528944, + "epoch": 0.3917084643940038, "grad_norm": 0.0, - "learning_rate": 1.3872003815250367e-05, - "loss": 0.8997, + "learning_rate": 1.3888012446344394e-05, + "loss": 1.0008, "step": 13823 }, { - "epoch": 0.39228149829738934, + "epoch": 0.39173680183626625, "grad_norm": 0.0, - "learning_rate": 1.3871156412973797e-05, - "loss": 0.971, + "learning_rate": 1.3887166850845963e-05, + "loss": 1.0313, "step": 13824 }, { - "epoch": 0.3923098751418842, + "epoch": 0.39176513927852874, "grad_norm": 0.0, - "learning_rate": 1.387030897799739e-05, - "loss": 0.9329, + "learning_rate": 1.388632122260448e-05, + "loss": 0.8447, "step": 13825 }, { - "epoch": 0.3923382519863791, + "epoch": 0.3917934767207912, "grad_norm": 0.0, - "learning_rate": 1.3869461510328314e-05, - "loss": 0.87, + "learning_rate": 1.3885475561627069e-05, + "loss": 0.9427, "step": 13826 }, { - "epoch": 0.39236662883087403, + "epoch": 0.3918218141630536, "grad_norm": 0.0, - "learning_rate": 1.386861400997372e-05, - "loss": 0.9349, + "learning_rate": 1.3884629867920856e-05, + "loss": 0.926, "step": 13827 }, { - "epoch": 0.3923950056753689, + "epoch": 0.3918501516053161, "grad_norm": 0.0, - "learning_rate": 1.3867766476940771e-05, - "loss": 0.823, + "learning_rate": 1.3883784141492961e-05, + "loss": 0.9752, "step": 13828 }, { - "epoch": 0.3924233825198638, + "epoch": 0.39187848904757855, "grad_norm": 0.0, - "learning_rate": 1.3866918911236624e-05, - "loss": 1.0377, + "learning_rate": 1.3882938382350513e-05, + "loss": 0.9836, "step": 13829 }, { - "epoch": 0.39245175936435867, + "epoch": 0.39190682648984104, "grad_norm": 0.0, - "learning_rate": 1.386607131286844e-05, - "loss": 0.9067, + "learning_rate": 1.388209259050063e-05, + "loss": 0.8741, "step": 13830 }, { - "epoch": 0.3924801362088536, + "epoch": 0.3919351639321035, "grad_norm": 0.0, - "learning_rate": 1.3865223681843376e-05, - "loss": 0.9372, + "learning_rate": 1.3881246765950442e-05, + "loss": 0.8936, "step": 13831 }, { - "epoch": 0.39250851305334844, + "epoch": 0.391963501374366, "grad_norm": 0.0, - "learning_rate": 1.3864376018168594e-05, - "loss": 0.8533, + "learning_rate": 1.3880400908707073e-05, + "loss": 0.9603, "step": 13832 }, { - "epoch": 0.39253688989784336, + "epoch": 0.3919918388166284, "grad_norm": 0.0, - "learning_rate": 1.3863528321851256e-05, - "loss": 0.9862, + "learning_rate": 1.3879555018777643e-05, + "loss": 1.0058, "step": 13833 }, { - "epoch": 0.39256526674233827, + "epoch": 0.39202017625889085, "grad_norm": 0.0, - "learning_rate": 1.386268059289852e-05, - "loss": 0.9292, + "learning_rate": 1.3878709096169281e-05, + "loss": 0.839, "step": 13834 }, { - "epoch": 0.39259364358683313, + "epoch": 0.39204851370115334, "grad_norm": 0.0, - "learning_rate": 1.3861832831317546e-05, - "loss": 0.8841, + "learning_rate": 1.3877863140889111e-05, + "loss": 0.9523, "step": 13835 }, { - "epoch": 0.39262202043132804, + "epoch": 0.3920768511434158, "grad_norm": 0.0, - "learning_rate": 1.3860985037115497e-05, - "loss": 0.9513, + "learning_rate": 1.3877017152944261e-05, + "loss": 1.0726, "step": 13836 }, { - "epoch": 0.3926503972758229, + "epoch": 0.3921051885856783, "grad_norm": 0.0, - "learning_rate": 1.3860137210299533e-05, - "loss": 1.0537, + "learning_rate": 1.3876171132341858e-05, + "loss": 0.9275, "step": 13837 }, { - "epoch": 0.3926787741203178, + "epoch": 0.3921335260279407, "grad_norm": 0.0, - "learning_rate": 1.3859289350876822e-05, - "loss": 0.9168, + "learning_rate": 1.3875325079089024e-05, + "loss": 1.0164, "step": 13838 }, { - "epoch": 0.39270715096481273, + "epoch": 0.39216186347020315, "grad_norm": 0.0, - "learning_rate": 1.3858441458854512e-05, - "loss": 1.0008, + "learning_rate": 1.3874478993192886e-05, + "loss": 0.8804, "step": 13839 }, { - "epoch": 0.3927355278093076, + "epoch": 0.39219020091246565, "grad_norm": 0.0, - "learning_rate": 1.3857593534239779e-05, - "loss": 1.0131, + "learning_rate": 1.3873632874660575e-05, + "loss": 1.0111, "step": 13840 }, { - "epoch": 0.3927639046538025, + "epoch": 0.3922185383547281, "grad_norm": 0.0, - "learning_rate": 1.3856745577039781e-05, - "loss": 0.9344, + "learning_rate": 1.3872786723499215e-05, + "loss": 0.7907, "step": 13841 }, { - "epoch": 0.39279228149829737, + "epoch": 0.3922468757969906, "grad_norm": 0.0, - "learning_rate": 1.3855897587261673e-05, - "loss": 0.9395, + "learning_rate": 1.3871940539715936e-05, + "loss": 0.9369, "step": 13842 }, { - "epoch": 0.3928206583427923, + "epoch": 0.392275213239253, "grad_norm": 0.0, - "learning_rate": 1.385504956491263e-05, - "loss": 0.9003, + "learning_rate": 1.387109432331786e-05, + "loss": 0.903, "step": 13843 }, { - "epoch": 0.3928490351872872, + "epoch": 0.3923035506815155, "grad_norm": 0.0, - "learning_rate": 1.385420150999981e-05, - "loss": 1.0042, + "learning_rate": 1.3870248074312123e-05, + "loss": 0.968, "step": 13844 }, { - "epoch": 0.39287741203178206, + "epoch": 0.39233188812377795, "grad_norm": 0.0, - "learning_rate": 1.3853353422530375e-05, - "loss": 0.8443, + "learning_rate": 1.3869401792705847e-05, + "loss": 0.8985, "step": 13845 }, { - "epoch": 0.392905788876277, + "epoch": 0.3923602255660404, "grad_norm": 0.0, - "learning_rate": 1.3852505302511492e-05, - "loss": 0.869, + "learning_rate": 1.3868555478506162e-05, + "loss": 0.9347, "step": 13846 }, { - "epoch": 0.39293416572077183, + "epoch": 0.3923885630083029, "grad_norm": 0.0, - "learning_rate": 1.3851657149950325e-05, - "loss": 0.8067, + "learning_rate": 1.38677091317202e-05, + "loss": 0.9309, "step": 13847 }, { - "epoch": 0.39296254256526675, + "epoch": 0.3924169004505653, "grad_norm": 0.0, - "learning_rate": 1.3850808964854032e-05, - "loss": 1.0316, + "learning_rate": 1.3866862752355088e-05, + "loss": 0.9613, "step": 13848 }, { - "epoch": 0.3929909194097616, + "epoch": 0.3924452378928278, "grad_norm": 0.0, - "learning_rate": 1.3849960747229784e-05, - "loss": 0.9077, + "learning_rate": 1.3866016340417953e-05, + "loss": 1.0031, "step": 13849 }, { - "epoch": 0.3930192962542565, + "epoch": 0.39247357533509025, "grad_norm": 0.0, - "learning_rate": 1.3849112497084747e-05, - "loss": 0.953, + "learning_rate": 1.3865169895915931e-05, + "loss": 0.9284, "step": 13850 }, { - "epoch": 0.39304767309875144, + "epoch": 0.3925019127773527, "grad_norm": 0.0, - "learning_rate": 1.3848264214426081e-05, - "loss": 0.8128, + "learning_rate": 1.3864323418856142e-05, + "loss": 1.0554, "step": 13851 }, { - "epoch": 0.3930760499432463, + "epoch": 0.3925302502196152, "grad_norm": 0.0, - "learning_rate": 1.3847415899260955e-05, - "loss": 1.0235, + "learning_rate": 1.3863476909245726e-05, + "loss": 0.9657, "step": 13852 }, { - "epoch": 0.3931044267877412, + "epoch": 0.3925585876618776, "grad_norm": 0.0, - "learning_rate": 1.3846567551596534e-05, - "loss": 0.8986, + "learning_rate": 1.3862630367091808e-05, + "loss": 1.0656, "step": 13853 }, { - "epoch": 0.39313280363223607, + "epoch": 0.3925869251041401, "grad_norm": 0.0, - "learning_rate": 1.3845719171439986e-05, - "loss": 0.8523, + "learning_rate": 1.386178379240152e-05, + "loss": 0.8966, "step": 13854 }, { - "epoch": 0.393161180476731, + "epoch": 0.39261526254640255, "grad_norm": 0.0, - "learning_rate": 1.3844870758798474e-05, - "loss": 0.9521, + "learning_rate": 1.3860937185181991e-05, + "loss": 0.9983, "step": 13855 }, { - "epoch": 0.3931895573212259, + "epoch": 0.39264359998866505, "grad_norm": 0.0, - "learning_rate": 1.3844022313679167e-05, - "loss": 0.8939, + "learning_rate": 1.3860090545440357e-05, + "loss": 1.0278, "step": 13856 }, { - "epoch": 0.39321793416572076, + "epoch": 0.3926719374309275, "grad_norm": 0.0, - "learning_rate": 1.3843173836089228e-05, - "loss": 0.9526, + "learning_rate": 1.3859243873183748e-05, + "loss": 0.98, "step": 13857 }, { - "epoch": 0.3932463110102157, + "epoch": 0.3927002748731899, "grad_norm": 0.0, - "learning_rate": 1.3842325326035829e-05, - "loss": 0.8502, + "learning_rate": 1.3858397168419292e-05, + "loss": 0.9621, "step": 13858 }, { - "epoch": 0.39327468785471054, + "epoch": 0.3927286123154524, "grad_norm": 0.0, - "learning_rate": 1.384147678352613e-05, - "loss": 0.9346, + "learning_rate": 1.3857550431154123e-05, + "loss": 1.0134, "step": 13859 }, { - "epoch": 0.39330306469920545, + "epoch": 0.39275694975771486, "grad_norm": 0.0, - "learning_rate": 1.384062820856731e-05, - "loss": 1.1395, + "learning_rate": 1.3856703661395376e-05, + "loss": 0.9272, "step": 13860 }, { - "epoch": 0.39333144154370037, + "epoch": 0.39278528719997735, "grad_norm": 0.0, - "learning_rate": 1.3839779601166532e-05, - "loss": 0.8685, + "learning_rate": 1.3855856859150182e-05, + "loss": 0.9537, "step": 13861 }, { - "epoch": 0.3933598183881952, + "epoch": 0.3928136246422398, "grad_norm": 0.0, - "learning_rate": 1.383893096133096e-05, - "loss": 1.0161, + "learning_rate": 1.3855010024425677e-05, + "loss": 0.9222, "step": 13862 }, { - "epoch": 0.39338819523269014, + "epoch": 0.3928419620845022, "grad_norm": 0.0, - "learning_rate": 1.3838082289067765e-05, - "loss": 0.7704, + "learning_rate": 1.3854163157228987e-05, + "loss": 0.9466, "step": 13863 }, { - "epoch": 0.393416572077185, + "epoch": 0.3928702995267647, "grad_norm": 0.0, - "learning_rate": 1.3837233584384118e-05, - "loss": 0.8249, + "learning_rate": 1.3853316257567251e-05, + "loss": 0.9022, "step": 13864 }, { - "epoch": 0.3934449489216799, + "epoch": 0.39289863696902716, "grad_norm": 0.0, - "learning_rate": 1.3836384847287186e-05, - "loss": 0.9061, + "learning_rate": 1.3852469325447599e-05, + "loss": 1.0635, "step": 13865 }, { - "epoch": 0.3934733257661748, + "epoch": 0.39292697441128965, "grad_norm": 0.0, - "learning_rate": 1.3835536077784135e-05, - "loss": 0.8668, + "learning_rate": 1.3851622360877169e-05, + "loss": 0.7807, "step": 13866 }, { - "epoch": 0.3935017026106697, + "epoch": 0.3929553118535521, "grad_norm": 0.0, - "learning_rate": 1.3834687275882144e-05, - "loss": 0.994, + "learning_rate": 1.3850775363863094e-05, + "loss": 0.8193, "step": 13867 }, { - "epoch": 0.3935300794551646, + "epoch": 0.3929836492958146, "grad_norm": 0.0, - "learning_rate": 1.3833838441588374e-05, - "loss": 0.8522, + "learning_rate": 1.3849928334412508e-05, + "loss": 0.9419, "step": 13868 }, { - "epoch": 0.39355845629965946, + "epoch": 0.393011986738077, "grad_norm": 0.0, - "learning_rate": 1.3832989574909999e-05, - "loss": 1.0071, + "learning_rate": 1.3849081272532545e-05, + "loss": 0.9453, "step": 13869 }, { - "epoch": 0.3935868331441544, + "epoch": 0.39304032418033946, "grad_norm": 0.0, - "learning_rate": 1.3832140675854189e-05, - "loss": 0.9086, + "learning_rate": 1.3848234178230344e-05, + "loss": 0.824, "step": 13870 }, { - "epoch": 0.39361520998864924, + "epoch": 0.39306866162260196, "grad_norm": 0.0, - "learning_rate": 1.3831291744428117e-05, - "loss": 0.9227, + "learning_rate": 1.3847387051513035e-05, + "loss": 0.9944, "step": 13871 }, { - "epoch": 0.39364358683314415, + "epoch": 0.3930969990648644, "grad_norm": 0.0, - "learning_rate": 1.3830442780638946e-05, - "loss": 1.0189, + "learning_rate": 1.3846539892387755e-05, + "loss": 0.8713, "step": 13872 }, { - "epoch": 0.39367196367763907, + "epoch": 0.3931253365071269, "grad_norm": 0.0, - "learning_rate": 1.3829593784493855e-05, - "loss": 0.9517, + "learning_rate": 1.3845692700861642e-05, + "loss": 0.9542, "step": 13873 }, { - "epoch": 0.39370034052213393, + "epoch": 0.3931536739493893, "grad_norm": 0.0, - "learning_rate": 1.3828744756000015e-05, - "loss": 0.8601, + "learning_rate": 1.3844845476941833e-05, + "loss": 0.8991, "step": 13874 }, { - "epoch": 0.39372871736662884, + "epoch": 0.39318201139165176, "grad_norm": 0.0, - "learning_rate": 1.3827895695164595e-05, - "loss": 0.7895, + "learning_rate": 1.3843998220635462e-05, + "loss": 0.9056, "step": 13875 }, { - "epoch": 0.3937570942111237, + "epoch": 0.39321034883391426, "grad_norm": 0.0, - "learning_rate": 1.3827046601994765e-05, - "loss": 1.0492, + "learning_rate": 1.3843150931949665e-05, + "loss": 0.7999, "step": 13876 }, { - "epoch": 0.3937854710556186, + "epoch": 0.3932386862761767, "grad_norm": 0.0, - "learning_rate": 1.3826197476497705e-05, - "loss": 0.8737, + "learning_rate": 1.3842303610891582e-05, + "loss": 0.918, "step": 13877 }, { - "epoch": 0.39381384790011353, + "epoch": 0.3932670237184392, "grad_norm": 0.0, - "learning_rate": 1.3825348318680582e-05, - "loss": 0.924, + "learning_rate": 1.384145625746835e-05, + "loss": 0.9252, "step": 13878 }, { - "epoch": 0.3938422247446084, + "epoch": 0.39329536116070163, "grad_norm": 0.0, - "learning_rate": 1.3824499128550569e-05, - "loss": 0.8423, + "learning_rate": 1.3840608871687104e-05, + "loss": 0.8298, "step": 13879 }, { - "epoch": 0.3938706015891033, + "epoch": 0.3933236986029641, "grad_norm": 0.0, - "learning_rate": 1.3823649906114837e-05, - "loss": 0.9807, + "learning_rate": 1.383976145355498e-05, + "loss": 0.8673, "step": 13880 }, { - "epoch": 0.39389897843359817, + "epoch": 0.39335203604522656, "grad_norm": 0.0, - "learning_rate": 1.3822800651380565e-05, - "loss": 0.7928, + "learning_rate": 1.3838914003079125e-05, + "loss": 0.8796, "step": 13881 }, { - "epoch": 0.3939273552780931, + "epoch": 0.393380373487489, "grad_norm": 0.0, - "learning_rate": 1.3821951364354924e-05, - "loss": 0.8655, + "learning_rate": 1.3838066520266672e-05, + "loss": 0.9536, "step": 13882 }, { - "epoch": 0.39395573212258794, + "epoch": 0.3934087109297515, "grad_norm": 0.0, - "learning_rate": 1.3821102045045088e-05, - "loss": 0.8925, + "learning_rate": 1.3837219005124758e-05, + "loss": 0.9321, "step": 13883 }, { - "epoch": 0.39398410896708286, + "epoch": 0.39343704837201393, "grad_norm": 0.0, - "learning_rate": 1.3820252693458231e-05, - "loss": 0.9717, + "learning_rate": 1.383637145766052e-05, + "loss": 1.0394, "step": 13884 }, { - "epoch": 0.3940124858115778, + "epoch": 0.3934653858142764, "grad_norm": 0.0, - "learning_rate": 1.3819403309601528e-05, - "loss": 0.982, + "learning_rate": 1.3835523877881106e-05, + "loss": 0.9401, "step": 13885 }, { - "epoch": 0.39404086265607263, + "epoch": 0.39349372325653886, "grad_norm": 0.0, - "learning_rate": 1.3818553893482154e-05, - "loss": 0.8487, + "learning_rate": 1.3834676265793646e-05, + "loss": 0.9547, "step": 13886 }, { - "epoch": 0.39406923950056755, + "epoch": 0.3935220606988013, "grad_norm": 0.0, - "learning_rate": 1.3817704445107283e-05, - "loss": 0.981, + "learning_rate": 1.3833828621405286e-05, + "loss": 0.9684, "step": 13887 }, { - "epoch": 0.3940976163450624, + "epoch": 0.3935503981410638, "grad_norm": 0.0, - "learning_rate": 1.381685496448409e-05, - "loss": 0.9807, + "learning_rate": 1.383298094472316e-05, + "loss": 1.011, "step": 13888 }, { - "epoch": 0.3941259931895573, + "epoch": 0.39357873558332623, "grad_norm": 0.0, - "learning_rate": 1.3816005451619754e-05, - "loss": 0.8261, + "learning_rate": 1.3832133235754417e-05, + "loss": 0.8092, "step": 13889 }, { - "epoch": 0.39415437003405224, + "epoch": 0.3936070730255887, "grad_norm": 0.0, - "learning_rate": 1.3815155906521445e-05, - "loss": 0.849, + "learning_rate": 1.383128549450619e-05, + "loss": 0.8995, "step": 13890 }, { - "epoch": 0.3941827468785471, + "epoch": 0.39363541046785117, "grad_norm": 0.0, - "learning_rate": 1.3814306329196345e-05, - "loss": 0.882, + "learning_rate": 1.3830437720985626e-05, + "loss": 1.0587, "step": 13891 }, { - "epoch": 0.394211123723042, + "epoch": 0.39366374791011366, "grad_norm": 0.0, - "learning_rate": 1.381345671965163e-05, - "loss": 0.9118, + "learning_rate": 1.3829589915199859e-05, + "loss": 0.8361, "step": 13892 }, { - "epoch": 0.39423950056753687, + "epoch": 0.3936920853523761, "grad_norm": 0.0, - "learning_rate": 1.3812607077894471e-05, - "loss": 0.9675, + "learning_rate": 1.3828742077156035e-05, + "loss": 0.9079, "step": 13893 }, { - "epoch": 0.3942678774120318, + "epoch": 0.39372042279463854, "grad_norm": 0.0, - "learning_rate": 1.3811757403932048e-05, - "loss": 0.8817, + "learning_rate": 1.3827894206861294e-05, + "loss": 0.8567, "step": 13894 }, { - "epoch": 0.3942962542565267, + "epoch": 0.39374876023690103, "grad_norm": 0.0, - "learning_rate": 1.3810907697771543e-05, - "loss": 1.0108, + "learning_rate": 1.3827046304322779e-05, + "loss": 0.8966, "step": 13895 }, { - "epoch": 0.39432463110102156, + "epoch": 0.39377709767916347, "grad_norm": 0.0, - "learning_rate": 1.3810057959420124e-05, - "loss": 0.8345, + "learning_rate": 1.3826198369547635e-05, + "loss": 0.9261, "step": 13896 }, { - "epoch": 0.3943530079455165, + "epoch": 0.39380543512142596, "grad_norm": 0.0, - "learning_rate": 1.3809208188884978e-05, - "loss": 0.9331, + "learning_rate": 1.3825350402542999e-05, + "loss": 0.9285, "step": 13897 }, { - "epoch": 0.39438138479001134, + "epoch": 0.3938337725636884, "grad_norm": 0.0, - "learning_rate": 1.380835838617328e-05, - "loss": 1.015, + "learning_rate": 1.3824502403316015e-05, + "loss": 0.928, "step": 13898 }, { - "epoch": 0.39440976163450625, + "epoch": 0.39386211000595084, "grad_norm": 0.0, - "learning_rate": 1.3807508551292207e-05, - "loss": 0.9108, + "learning_rate": 1.3823654371873827e-05, + "loss": 0.9431, "step": 13899 }, { - "epoch": 0.3944381384790011, + "epoch": 0.39389044744821333, "grad_norm": 0.0, - "learning_rate": 1.3806658684248935e-05, - "loss": 0.8598, + "learning_rate": 1.3822806308223579e-05, + "loss": 0.8949, "step": 13900 }, { - "epoch": 0.394466515323496, + "epoch": 0.39391878489047577, "grad_norm": 0.0, - "learning_rate": 1.380580878505065e-05, - "loss": 0.8629, + "learning_rate": 1.3821958212372413e-05, + "loss": 0.9617, "step": 13901 }, { - "epoch": 0.39449489216799094, + "epoch": 0.39394712233273826, "grad_norm": 0.0, - "learning_rate": 1.3804958853704524e-05, - "loss": 0.9199, + "learning_rate": 1.3821110084327476e-05, + "loss": 0.8206, "step": 13902 }, { - "epoch": 0.3945232690124858, + "epoch": 0.3939754597750007, "grad_norm": 0.0, - "learning_rate": 1.380410889021774e-05, - "loss": 0.9389, + "learning_rate": 1.382026192409591e-05, + "loss": 0.8789, "step": 13903 }, { - "epoch": 0.3945516458569807, + "epoch": 0.3940037972172632, "grad_norm": 0.0, - "learning_rate": 1.3803258894597478e-05, - "loss": 0.8932, + "learning_rate": 1.3819413731684858e-05, + "loss": 0.938, "step": 13904 }, { - "epoch": 0.3945800227014756, + "epoch": 0.39403213465952563, "grad_norm": 0.0, - "learning_rate": 1.3802408866850917e-05, - "loss": 1.0344, + "learning_rate": 1.3818565507101464e-05, + "loss": 0.9071, "step": 13905 }, { - "epoch": 0.3946083995459705, + "epoch": 0.3940604721017881, "grad_norm": 0.0, - "learning_rate": 1.3801558806985237e-05, - "loss": 0.955, + "learning_rate": 1.3817717250352876e-05, + "loss": 0.9559, "step": 13906 }, { - "epoch": 0.3946367763904654, + "epoch": 0.39408880954405057, "grad_norm": 0.0, - "learning_rate": 1.3800708715007618e-05, - "loss": 0.9318, + "learning_rate": 1.381686896144624e-05, + "loss": 0.8876, "step": 13907 }, { - "epoch": 0.39466515323496026, + "epoch": 0.394117146986313, "grad_norm": 0.0, - "learning_rate": 1.3799858590925244e-05, - "loss": 1.0442, + "learning_rate": 1.3816020640388698e-05, + "loss": 0.9615, "step": 13908 }, { - "epoch": 0.3946935300794552, + "epoch": 0.3941454844285755, "grad_norm": 0.0, - "learning_rate": 1.3799008434745291e-05, - "loss": 0.9638, + "learning_rate": 1.3815172287187394e-05, + "loss": 0.8942, "step": 13909 }, { - "epoch": 0.39472190692395004, + "epoch": 0.39417382187083794, "grad_norm": 0.0, - "learning_rate": 1.3798158246474946e-05, - "loss": 0.9997, + "learning_rate": 1.3814323901849483e-05, + "loss": 0.9014, "step": 13910 }, { - "epoch": 0.39475028376844495, + "epoch": 0.3942021593131004, "grad_norm": 0.0, - "learning_rate": 1.3797308026121386e-05, - "loss": 0.9687, + "learning_rate": 1.3813475484382102e-05, + "loss": 0.9185, "step": 13911 }, { - "epoch": 0.3947786606129398, + "epoch": 0.39423049675536287, "grad_norm": 0.0, - "learning_rate": 1.3796457773691793e-05, - "loss": 0.9525, + "learning_rate": 1.3812627034792401e-05, + "loss": 0.9075, "step": 13912 }, { - "epoch": 0.39480703745743473, + "epoch": 0.3942588341976253, "grad_norm": 0.0, - "learning_rate": 1.3795607489193351e-05, - "loss": 1.018, + "learning_rate": 1.3811778553087524e-05, + "loss": 0.8466, "step": 13913 }, { - "epoch": 0.39483541430192964, + "epoch": 0.3942871716398878, "grad_norm": 0.0, - "learning_rate": 1.3794757172633244e-05, - "loss": 0.974, + "learning_rate": 1.3810930039274626e-05, + "loss": 0.9329, "step": 13914 }, { - "epoch": 0.3948637911464245, + "epoch": 0.39431550908215024, "grad_norm": 0.0, - "learning_rate": 1.3793906824018652e-05, - "loss": 0.9452, + "learning_rate": 1.3810081493360847e-05, + "loss": 0.8359, "step": 13915 }, { - "epoch": 0.3948921679909194, + "epoch": 0.39434384652441273, "grad_norm": 0.0, - "learning_rate": 1.3793056443356758e-05, - "loss": 0.8829, + "learning_rate": 1.3809232915353336e-05, + "loss": 0.8698, "step": 13916 }, { - "epoch": 0.3949205448354143, + "epoch": 0.39437218396667517, "grad_norm": 0.0, - "learning_rate": 1.3792206030654747e-05, - "loss": 0.9895, + "learning_rate": 1.3808384305259244e-05, + "loss": 0.9389, "step": 13917 }, { - "epoch": 0.3949489216799092, + "epoch": 0.3944005214089376, "grad_norm": 0.0, - "learning_rate": 1.3791355585919801e-05, - "loss": 1.0075, + "learning_rate": 1.3807535663085714e-05, + "loss": 0.8634, "step": 13918 }, { - "epoch": 0.3949772985244041, + "epoch": 0.3944288588512001, "grad_norm": 0.0, - "learning_rate": 1.3790505109159101e-05, - "loss": 0.821, + "learning_rate": 1.3806686988839898e-05, + "loss": 0.9609, "step": 13919 }, { - "epoch": 0.39500567536889897, + "epoch": 0.39445719629346254, "grad_norm": 0.0, - "learning_rate": 1.3789654600379833e-05, - "loss": 0.8643, + "learning_rate": 1.3805838282528946e-05, + "loss": 0.8684, "step": 13920 }, { - "epoch": 0.3950340522133939, + "epoch": 0.39448553373572504, "grad_norm": 0.0, - "learning_rate": 1.3788804059589186e-05, - "loss": 0.9462, + "learning_rate": 1.380498954416e-05, + "loss": 0.934, "step": 13921 }, { - "epoch": 0.39506242905788874, + "epoch": 0.3945138711779875, "grad_norm": 0.0, - "learning_rate": 1.3787953486794342e-05, - "loss": 0.9818, + "learning_rate": 1.3804140773740218e-05, + "loss": 1.0133, "step": 13922 }, { - "epoch": 0.39509080590238366, + "epoch": 0.3945422086202499, "grad_norm": 0.0, - "learning_rate": 1.3787102882002482e-05, - "loss": 0.907, + "learning_rate": 1.3803291971276744e-05, + "loss": 0.9144, "step": 13923 }, { - "epoch": 0.39511918274687857, + "epoch": 0.3945705460625124, "grad_norm": 0.0, - "learning_rate": 1.3786252245220793e-05, - "loss": 1.0088, + "learning_rate": 1.380244313677673e-05, + "loss": 0.9792, "step": 13924 }, { - "epoch": 0.39514755959137343, + "epoch": 0.39459888350477484, "grad_norm": 0.0, - "learning_rate": 1.3785401576456463e-05, - "loss": 0.9605, + "learning_rate": 1.3801594270247328e-05, + "loss": 0.9306, "step": 13925 }, { - "epoch": 0.39517593643586835, + "epoch": 0.39462722094703734, "grad_norm": 0.0, - "learning_rate": 1.3784550875716673e-05, - "loss": 0.8277, + "learning_rate": 1.380074537169568e-05, + "loss": 0.9453, "step": 13926 }, { - "epoch": 0.3952043132803632, + "epoch": 0.3946555583892998, "grad_norm": 0.0, - "learning_rate": 1.3783700143008614e-05, - "loss": 0.918, + "learning_rate": 1.3799896441128949e-05, + "loss": 0.9247, "step": 13927 }, { - "epoch": 0.3952326901248581, + "epoch": 0.39468389583156227, "grad_norm": 0.0, - "learning_rate": 1.3782849378339469e-05, - "loss": 0.9145, + "learning_rate": 1.3799047478554275e-05, + "loss": 0.9038, "step": 13928 }, { - "epoch": 0.395261066969353, + "epoch": 0.3947122332738247, "grad_norm": 0.0, - "learning_rate": 1.3781998581716427e-05, - "loss": 0.8172, + "learning_rate": 1.3798198483978816e-05, + "loss": 0.995, "step": 13929 }, { - "epoch": 0.3952894438138479, + "epoch": 0.39474057071608715, "grad_norm": 0.0, - "learning_rate": 1.3781147753146671e-05, - "loss": 0.8864, + "learning_rate": 1.3797349457409716e-05, + "loss": 0.8909, "step": 13930 }, { - "epoch": 0.3953178206583428, + "epoch": 0.39476890815834964, "grad_norm": 0.0, - "learning_rate": 1.378029689263739e-05, - "loss": 1.0336, + "learning_rate": 1.3796500398854136e-05, + "loss": 0.923, "step": 13931 }, { - "epoch": 0.39534619750283767, + "epoch": 0.3947972456006121, "grad_norm": 0.0, - "learning_rate": 1.3779446000195773e-05, - "loss": 1.0138, + "learning_rate": 1.379565130831922e-05, + "loss": 0.8679, "step": 13932 }, { - "epoch": 0.3953745743473326, + "epoch": 0.3948255830428746, "grad_norm": 0.0, - "learning_rate": 1.3778595075829002e-05, - "loss": 0.9579, + "learning_rate": 1.3794802185812126e-05, + "loss": 1.0472, "step": 13933 }, { - "epoch": 0.39540295119182745, + "epoch": 0.394853920485137, "grad_norm": 0.0, - "learning_rate": 1.3777744119544271e-05, - "loss": 0.9721, + "learning_rate": 1.3793953031340004e-05, + "loss": 0.9562, "step": 13934 }, { - "epoch": 0.39543132803632236, + "epoch": 0.39488225792739945, "grad_norm": 0.0, - "learning_rate": 1.3776893131348765e-05, - "loss": 0.9614, + "learning_rate": 1.3793103844910005e-05, + "loss": 0.9748, "step": 13935 }, { - "epoch": 0.3954597048808173, + "epoch": 0.39491059536966194, "grad_norm": 0.0, - "learning_rate": 1.3776042111249676e-05, - "loss": 0.9737, + "learning_rate": 1.3792254626529286e-05, + "loss": 0.8986, "step": 13936 }, { - "epoch": 0.39548808172531214, + "epoch": 0.3949389328119244, "grad_norm": 0.0, - "learning_rate": 1.3775191059254185e-05, - "loss": 0.964, + "learning_rate": 1.3791405376204998e-05, + "loss": 0.963, "step": 13937 }, { - "epoch": 0.39551645856980705, + "epoch": 0.3949672702541869, "grad_norm": 0.0, - "learning_rate": 1.3774339975369489e-05, - "loss": 0.8179, + "learning_rate": 1.3790556093944291e-05, + "loss": 0.9826, "step": 13938 }, { - "epoch": 0.3955448354143019, + "epoch": 0.3949956076964493, "grad_norm": 0.0, - "learning_rate": 1.3773488859602772e-05, - "loss": 0.9527, + "learning_rate": 1.3789706779754326e-05, + "loss": 0.9334, "step": 13939 }, { - "epoch": 0.3955732122587968, + "epoch": 0.3950239451387118, "grad_norm": 0.0, - "learning_rate": 1.3772637711961223e-05, - "loss": 0.9319, + "learning_rate": 1.3788857433642253e-05, + "loss": 0.9761, "step": 13940 }, { - "epoch": 0.39560158910329174, + "epoch": 0.39505228258097425, "grad_norm": 0.0, - "learning_rate": 1.3771786532452038e-05, - "loss": 0.915, + "learning_rate": 1.3788008055615227e-05, + "loss": 0.9733, "step": 13941 }, { - "epoch": 0.3956299659477866, + "epoch": 0.3950806200232367, "grad_norm": 0.0, - "learning_rate": 1.3770935321082399e-05, - "loss": 0.8839, + "learning_rate": 1.3787158645680401e-05, + "loss": 0.9676, "step": 13942 }, { - "epoch": 0.3956583427922815, + "epoch": 0.3951089574654992, "grad_norm": 0.0, - "learning_rate": 1.3770084077859502e-05, - "loss": 0.8754, + "learning_rate": 1.3786309203844932e-05, + "loss": 0.8725, "step": 13943 }, { - "epoch": 0.3956867196367764, + "epoch": 0.3951372949077616, "grad_norm": 0.0, - "learning_rate": 1.3769232802790534e-05, - "loss": 0.9483, + "learning_rate": 1.3785459730115975e-05, + "loss": 0.994, "step": 13944 }, { - "epoch": 0.3957150964812713, + "epoch": 0.3951656323500241, "grad_norm": 0.0, - "learning_rate": 1.3768381495882688e-05, - "loss": 0.9848, + "learning_rate": 1.3784610224500685e-05, + "loss": 0.889, "step": 13945 }, { - "epoch": 0.39574347332576615, + "epoch": 0.39519396979228655, "grad_norm": 0.0, - "learning_rate": 1.3767530157143154e-05, - "loss": 0.9648, + "learning_rate": 1.3783760687006218e-05, + "loss": 0.849, "step": 13946 }, { - "epoch": 0.39577185017026106, + "epoch": 0.395222307234549, "grad_norm": 0.0, - "learning_rate": 1.3766678786579121e-05, - "loss": 0.9125, + "learning_rate": 1.3782911117639729e-05, + "loss": 0.9955, "step": 13947 }, { - "epoch": 0.395800227014756, + "epoch": 0.3952506446768115, "grad_norm": 0.0, - "learning_rate": 1.3765827384197787e-05, - "loss": 0.932, + "learning_rate": 1.3782061516408376e-05, + "loss": 0.8753, "step": 13948 }, { - "epoch": 0.39582860385925084, + "epoch": 0.3952789821190739, "grad_norm": 0.0, - "learning_rate": 1.3764975950006339e-05, - "loss": 0.9032, + "learning_rate": 1.3781211883319315e-05, + "loss": 0.9515, "step": 13949 }, { - "epoch": 0.39585698070374575, + "epoch": 0.3953073195613364, "grad_norm": 0.0, - "learning_rate": 1.3764124484011965e-05, - "loss": 1.0319, + "learning_rate": 1.3780362218379697e-05, + "loss": 0.9596, "step": 13950 }, { - "epoch": 0.3958853575482406, + "epoch": 0.39533565700359885, "grad_norm": 0.0, - "learning_rate": 1.3763272986221864e-05, - "loss": 0.9464, + "learning_rate": 1.3779512521596689e-05, + "loss": 0.9047, "step": 13951 }, { - "epoch": 0.39591373439273553, + "epoch": 0.39536399444586134, "grad_norm": 0.0, - "learning_rate": 1.376242145664323e-05, - "loss": 0.906, + "learning_rate": 1.3778662792977443e-05, + "loss": 0.8696, "step": 13952 }, { - "epoch": 0.39594211123723044, + "epoch": 0.3953923318881238, "grad_norm": 0.0, - "learning_rate": 1.3761569895283253e-05, - "loss": 0.9283, + "learning_rate": 1.3777813032529117e-05, + "loss": 1.0237, "step": 13953 }, { - "epoch": 0.3959704880817253, + "epoch": 0.3954206693303862, "grad_norm": 0.0, - "learning_rate": 1.3760718302149125e-05, - "loss": 0.9045, + "learning_rate": 1.3776963240258869e-05, + "loss": 0.9081, "step": 13954 }, { - "epoch": 0.3959988649262202, + "epoch": 0.3954490067726487, "grad_norm": 0.0, - "learning_rate": 1.3759866677248041e-05, - "loss": 0.8772, + "learning_rate": 1.3776113416173856e-05, + "loss": 0.9243, "step": 13955 }, { - "epoch": 0.3960272417707151, + "epoch": 0.39547734421491115, "grad_norm": 0.0, - "learning_rate": 1.3759015020587194e-05, - "loss": 0.807, + "learning_rate": 1.3775263560281238e-05, + "loss": 0.982, "step": 13956 }, { - "epoch": 0.39605561861521, + "epoch": 0.39550568165717365, "grad_norm": 0.0, - "learning_rate": 1.3758163332173772e-05, - "loss": 0.8831, + "learning_rate": 1.3774413672588174e-05, + "loss": 1.0152, "step": 13957 }, { - "epoch": 0.3960839954597049, + "epoch": 0.3955340190994361, "grad_norm": 0.0, - "learning_rate": 1.3757311612014984e-05, - "loss": 0.7693, + "learning_rate": 1.377356375310182e-05, + "loss": 0.9432, "step": 13958 }, { - "epoch": 0.39611237230419977, + "epoch": 0.3955623565416985, "grad_norm": 0.0, - "learning_rate": 1.3756459860118013e-05, - "loss": 1.0241, + "learning_rate": 1.3772713801829338e-05, + "loss": 0.9023, "step": 13959 }, { - "epoch": 0.3961407491486947, + "epoch": 0.395590693983961, "grad_norm": 0.0, - "learning_rate": 1.3755608076490054e-05, - "loss": 0.887, + "learning_rate": 1.3771863818777888e-05, + "loss": 0.8457, "step": 13960 }, { - "epoch": 0.39616912599318954, + "epoch": 0.39561903142622346, "grad_norm": 0.0, - "learning_rate": 1.3754756261138308e-05, - "loss": 0.9341, + "learning_rate": 1.377101380395463e-05, + "loss": 0.8816, "step": 13961 }, { - "epoch": 0.39619750283768446, + "epoch": 0.39564736886848595, "grad_norm": 0.0, - "learning_rate": 1.3753904414069967e-05, - "loss": 0.789, + "learning_rate": 1.3770163757366718e-05, + "loss": 0.998, "step": 13962 }, { - "epoch": 0.3962258796821793, + "epoch": 0.3956757063107484, "grad_norm": 0.0, - "learning_rate": 1.3753052535292226e-05, - "loss": 0.9334, + "learning_rate": 1.3769313679021319e-05, + "loss": 0.9239, "step": 13963 }, { - "epoch": 0.39625425652667423, + "epoch": 0.3957040437530108, "grad_norm": 0.0, - "learning_rate": 1.3752200624812282e-05, - "loss": 0.9669, + "learning_rate": 1.3768463568925589e-05, + "loss": 0.9984, "step": 13964 }, { - "epoch": 0.39628263337116915, + "epoch": 0.3957323811952733, "grad_norm": 0.0, - "learning_rate": 1.3751348682637328e-05, - "loss": 0.7889, + "learning_rate": 1.3767613427086694e-05, + "loss": 0.9084, "step": 13965 }, { - "epoch": 0.396311010215664, + "epoch": 0.39576071863753576, "grad_norm": 0.0, - "learning_rate": 1.3750496708774565e-05, - "loss": 0.9434, + "learning_rate": 1.3766763253511793e-05, + "loss": 0.8954, "step": 13966 }, { - "epoch": 0.3963393870601589, + "epoch": 0.39578905607979825, "grad_norm": 0.0, - "learning_rate": 1.3749644703231188e-05, - "loss": 0.9832, + "learning_rate": 1.3765913048208042e-05, + "loss": 0.8342, "step": 13967 }, { - "epoch": 0.3963677639046538, + "epoch": 0.3958173935220607, "grad_norm": 0.0, - "learning_rate": 1.3748792666014394e-05, - "loss": 0.8974, + "learning_rate": 1.376506281118261e-05, + "loss": 0.9246, "step": 13968 }, { - "epoch": 0.3963961407491487, + "epoch": 0.3958457309643232, "grad_norm": 0.0, - "learning_rate": 1.3747940597131382e-05, - "loss": 0.8868, + "learning_rate": 1.3764212542442656e-05, + "loss": 0.92, "step": 13969 }, { - "epoch": 0.3964245175936436, + "epoch": 0.3958740684065856, "grad_norm": 0.0, - "learning_rate": 1.3747088496589343e-05, - "loss": 0.9296, + "learning_rate": 1.376336224199534e-05, + "loss": 0.8947, "step": 13970 }, { - "epoch": 0.39645289443813847, + "epoch": 0.39590240584884806, "grad_norm": 0.0, - "learning_rate": 1.374623636439548e-05, - "loss": 0.9672, + "learning_rate": 1.3762511909847827e-05, + "loss": 0.9052, "step": 13971 }, { - "epoch": 0.3964812712826334, + "epoch": 0.39593074329111055, "grad_norm": 0.0, - "learning_rate": 1.374538420055699e-05, - "loss": 0.889, + "learning_rate": 1.376166154600728e-05, + "loss": 0.9988, "step": 13972 }, { - "epoch": 0.39650964812712824, + "epoch": 0.395959080733373, "grad_norm": 0.0, - "learning_rate": 1.3744532005081072e-05, - "loss": 0.8626, + "learning_rate": 1.376081115048086e-05, + "loss": 0.9306, "step": 13973 }, { - "epoch": 0.39653802497162316, + "epoch": 0.3959874181756355, "grad_norm": 0.0, - "learning_rate": 1.3743679777974923e-05, - "loss": 0.8569, + "learning_rate": 1.375996072327573e-05, + "loss": 1.0051, "step": 13974 }, { - "epoch": 0.3965664018161181, + "epoch": 0.3960157556178979, "grad_norm": 0.0, - "learning_rate": 1.3742827519245742e-05, - "loss": 0.9112, + "learning_rate": 1.3759110264399058e-05, + "loss": 0.8922, "step": 13975 }, { - "epoch": 0.39659477866061293, + "epoch": 0.39604409306016036, "grad_norm": 0.0, - "learning_rate": 1.3741975228900732e-05, - "loss": 0.9022, + "learning_rate": 1.3758259773858003e-05, + "loss": 0.8643, "step": 13976 }, { - "epoch": 0.39662315550510785, + "epoch": 0.39607243050242286, "grad_norm": 0.0, - "learning_rate": 1.3741122906947086e-05, - "loss": 0.9102, + "learning_rate": 1.3757409251659727e-05, + "loss": 0.9366, "step": 13977 }, { - "epoch": 0.3966515323496027, + "epoch": 0.3961007679446853, "grad_norm": 0.0, - "learning_rate": 1.374027055339201e-05, - "loss": 0.9073, + "learning_rate": 1.3756558697811402e-05, + "loss": 0.9777, "step": 13978 }, { - "epoch": 0.3966799091940976, + "epoch": 0.3961291053869478, "grad_norm": 0.0, - "learning_rate": 1.37394181682427e-05, - "loss": 0.8293, + "learning_rate": 1.3755708112320187e-05, + "loss": 0.9289, "step": 13979 }, { - "epoch": 0.3967082860385925, + "epoch": 0.3961574428292102, "grad_norm": 0.0, - "learning_rate": 1.3738565751506352e-05, - "loss": 0.9695, + "learning_rate": 1.3754857495193245e-05, + "loss": 1.0257, "step": 13980 }, { - "epoch": 0.3967366628830874, + "epoch": 0.3961857802714727, "grad_norm": 0.0, - "learning_rate": 1.3737713303190173e-05, - "loss": 0.9296, + "learning_rate": 1.3754006846437748e-05, + "loss": 0.9959, "step": 13981 }, { - "epoch": 0.3967650397275823, + "epoch": 0.39621411771373516, "grad_norm": 0.0, - "learning_rate": 1.3736860823301364e-05, - "loss": 0.8237, + "learning_rate": 1.3753156166060857e-05, + "loss": 0.9063, "step": 13982 }, { - "epoch": 0.3967934165720772, + "epoch": 0.3962424551559976, "grad_norm": 0.0, - "learning_rate": 1.3736008311847122e-05, - "loss": 0.8983, + "learning_rate": 1.3752305454069734e-05, + "loss": 0.9559, "step": 13983 }, { - "epoch": 0.3968217934165721, + "epoch": 0.3962707925982601, "grad_norm": 0.0, - "learning_rate": 1.373515576883465e-05, - "loss": 0.8361, + "learning_rate": 1.375145471047155e-05, + "loss": 0.889, "step": 13984 }, { - "epoch": 0.39685017026106695, + "epoch": 0.39629913004052253, "grad_norm": 0.0, - "learning_rate": 1.373430319427115e-05, - "loss": 0.9157, + "learning_rate": 1.3750603935273472e-05, + "loss": 0.872, "step": 13985 }, { - "epoch": 0.39687854710556186, + "epoch": 0.396327467482785, "grad_norm": 0.0, - "learning_rate": 1.3733450588163822e-05, - "loss": 0.8539, + "learning_rate": 1.3749753128482665e-05, + "loss": 0.9163, "step": 13986 }, { - "epoch": 0.3969069239500568, + "epoch": 0.39635580492504746, "grad_norm": 0.0, - "learning_rate": 1.3732597950519868e-05, - "loss": 0.9666, + "learning_rate": 1.3748902290106294e-05, + "loss": 0.9585, "step": 13987 }, { - "epoch": 0.39693530079455164, + "epoch": 0.3963841423673099, "grad_norm": 0.0, - "learning_rate": 1.3731745281346492e-05, - "loss": 0.9203, + "learning_rate": 1.3748051420151524e-05, + "loss": 0.911, "step": 13988 }, { - "epoch": 0.39696367763904655, + "epoch": 0.3964124798095724, "grad_norm": 0.0, - "learning_rate": 1.3730892580650898e-05, - "loss": 0.9694, + "learning_rate": 1.374720051862553e-05, + "loss": 0.9722, "step": 13989 }, { - "epoch": 0.3969920544835414, + "epoch": 0.39644081725183483, "grad_norm": 0.0, - "learning_rate": 1.3730039848440286e-05, - "loss": 0.9733, + "learning_rate": 1.374634958553547e-05, + "loss": 0.7516, "step": 13990 }, { - "epoch": 0.3970204313280363, + "epoch": 0.3964691546940973, "grad_norm": 0.0, - "learning_rate": 1.3729187084721858e-05, - "loss": 0.9702, + "learning_rate": 1.374549862088852e-05, + "loss": 0.9249, "step": 13991 }, { - "epoch": 0.3970488081725312, + "epoch": 0.39649749213635976, "grad_norm": 0.0, - "learning_rate": 1.372833428950282e-05, - "loss": 0.9664, + "learning_rate": 1.3744647624691841e-05, + "loss": 0.8629, "step": 13992 }, { - "epoch": 0.3970771850170261, + "epoch": 0.39652582957862226, "grad_norm": 0.0, - "learning_rate": 1.3727481462790376e-05, - "loss": 0.9759, + "learning_rate": 1.3743796596952608e-05, + "loss": 0.9427, "step": 13993 }, { - "epoch": 0.397105561861521, + "epoch": 0.3965541670208847, "grad_norm": 0.0, - "learning_rate": 1.3726628604591725e-05, - "loss": 0.9636, + "learning_rate": 1.3742945537677983e-05, + "loss": 1.0183, "step": 13994 }, { - "epoch": 0.3971339387060159, + "epoch": 0.39658250446314713, "grad_norm": 0.0, - "learning_rate": 1.3725775714914078e-05, - "loss": 0.8407, + "learning_rate": 1.374209444687514e-05, + "loss": 0.9243, "step": 13995 }, { - "epoch": 0.3971623155505108, + "epoch": 0.39661084190540963, "grad_norm": 0.0, - "learning_rate": 1.3724922793764634e-05, - "loss": 0.8644, + "learning_rate": 1.3741243324551246e-05, + "loss": 0.8712, "step": 13996 }, { - "epoch": 0.39719069239500565, + "epoch": 0.39663917934767207, "grad_norm": 0.0, - "learning_rate": 1.3724069841150603e-05, - "loss": 0.907, + "learning_rate": 1.3740392170713466e-05, + "loss": 0.912, "step": 13997 }, { - "epoch": 0.39721906923950057, + "epoch": 0.39666751678993456, "grad_norm": 0.0, - "learning_rate": 1.3723216857079183e-05, - "loss": 0.9664, + "learning_rate": 1.3739540985368978e-05, + "loss": 0.944, "step": 13998 }, { - "epoch": 0.3972474460839955, + "epoch": 0.396695854232197, "grad_norm": 0.0, - "learning_rate": 1.3722363841557584e-05, - "loss": 0.8872, + "learning_rate": 1.3738689768524946e-05, + "loss": 0.9994, "step": 13999 }, { - "epoch": 0.39727582292849034, + "epoch": 0.39672419167445944, "grad_norm": 0.0, - "learning_rate": 1.3721510794593012e-05, - "loss": 0.8683, + "learning_rate": 1.3737838520188542e-05, + "loss": 0.9184, "step": 14000 }, { - "epoch": 0.39730419977298526, + "epoch": 0.39675252911672193, "grad_norm": 0.0, - "learning_rate": 1.3720657716192669e-05, - "loss": 0.8874, + "learning_rate": 1.3736987240366937e-05, + "loss": 0.856, "step": 14001 }, { - "epoch": 0.3973325766174801, + "epoch": 0.39678086655898437, "grad_norm": 0.0, - "learning_rate": 1.3719804606363765e-05, - "loss": 0.8706, + "learning_rate": 1.37361359290673e-05, + "loss": 0.8748, "step": 14002 }, { - "epoch": 0.39736095346197503, + "epoch": 0.39680920400124686, "grad_norm": 0.0, - "learning_rate": 1.3718951465113499e-05, - "loss": 0.9254, + "learning_rate": 1.3735284586296802e-05, + "loss": 0.9212, "step": 14003 }, { - "epoch": 0.39738933030646995, + "epoch": 0.3968375414435093, "grad_norm": 0.0, - "learning_rate": 1.3718098292449087e-05, - "loss": 0.8588, + "learning_rate": 1.3734433212062617e-05, + "loss": 0.9183, "step": 14004 }, { - "epoch": 0.3974177071509648, + "epoch": 0.3968658788857718, "grad_norm": 0.0, - "learning_rate": 1.3717245088377732e-05, - "loss": 0.8931, + "learning_rate": 1.3733581806371911e-05, + "loss": 1.0013, "step": 14005 }, { - "epoch": 0.3974460839954597, + "epoch": 0.39689421632803423, "grad_norm": 0.0, - "learning_rate": 1.3716391852906638e-05, - "loss": 0.8305, + "learning_rate": 1.3732730369231862e-05, + "loss": 0.774, "step": 14006 }, { - "epoch": 0.3974744608399546, + "epoch": 0.39692255377029667, "grad_norm": 0.0, - "learning_rate": 1.371553858604302e-05, - "loss": 0.8738, + "learning_rate": 1.3731878900649638e-05, + "loss": 0.9114, "step": 14007 }, { - "epoch": 0.3975028376844495, + "epoch": 0.39695089121255916, "grad_norm": 0.0, - "learning_rate": 1.3714685287794075e-05, - "loss": 0.8989, + "learning_rate": 1.3731027400632413e-05, + "loss": 0.9683, "step": 14008 }, { - "epoch": 0.39753121452894435, + "epoch": 0.3969792286548216, "grad_norm": 0.0, - "learning_rate": 1.3713831958167018e-05, - "loss": 0.9684, + "learning_rate": 1.373017586918736e-05, + "loss": 0.8992, "step": 14009 }, { - "epoch": 0.39755959137343927, + "epoch": 0.3970075660970841, "grad_norm": 0.0, - "learning_rate": 1.371297859716906e-05, - "loss": 1.0417, + "learning_rate": 1.3729324306321645e-05, + "loss": 0.9613, "step": 14010 }, { - "epoch": 0.3975879682179342, + "epoch": 0.39703590353934654, "grad_norm": 0.0, - "learning_rate": 1.3712125204807397e-05, - "loss": 0.9893, + "learning_rate": 1.372847271204245e-05, + "loss": 0.9634, "step": 14011 }, { - "epoch": 0.39761634506242904, + "epoch": 0.397064240981609, "grad_norm": 0.0, - "learning_rate": 1.371127178108925e-05, - "loss": 0.9237, + "learning_rate": 1.3727621086356947e-05, + "loss": 0.8946, "step": 14012 }, { - "epoch": 0.39764472190692396, + "epoch": 0.39709257842387147, "grad_norm": 0.0, - "learning_rate": 1.3710418326021824e-05, - "loss": 0.877, + "learning_rate": 1.3726769429272302e-05, + "loss": 0.9119, "step": 14013 }, { - "epoch": 0.3976730987514188, + "epoch": 0.3971209158661339, "grad_norm": 0.0, - "learning_rate": 1.3709564839612325e-05, - "loss": 0.9062, + "learning_rate": 1.3725917740795698e-05, + "loss": 1.0227, "step": 14014 }, { - "epoch": 0.39770147559591373, + "epoch": 0.3971492533083964, "grad_norm": 0.0, - "learning_rate": 1.370871132186797e-05, - "loss": 0.8866, + "learning_rate": 1.3725066020934306e-05, + "loss": 0.9269, "step": 14015 }, { - "epoch": 0.39772985244040865, + "epoch": 0.39717759075065884, "grad_norm": 0.0, - "learning_rate": 1.3707857772795961e-05, - "loss": 0.8363, + "learning_rate": 1.3724214269695297e-05, + "loss": 0.8811, "step": 14016 }, { - "epoch": 0.3977582292849035, + "epoch": 0.39720592819292133, "grad_norm": 0.0, - "learning_rate": 1.3707004192403507e-05, - "loss": 0.9248, + "learning_rate": 1.3723362487085847e-05, + "loss": 0.8801, "step": 14017 }, { - "epoch": 0.3977866061293984, + "epoch": 0.39723426563518377, "grad_norm": 0.0, - "learning_rate": 1.3706150580697826e-05, - "loss": 0.9214, + "learning_rate": 1.3722510673113136e-05, + "loss": 0.8836, "step": 14018 }, { - "epoch": 0.3978149829738933, + "epoch": 0.3972626030774462, "grad_norm": 0.0, - "learning_rate": 1.3705296937686123e-05, - "loss": 0.9372, + "learning_rate": 1.3721658827784335e-05, + "loss": 0.8707, "step": 14019 }, { - "epoch": 0.3978433598183882, + "epoch": 0.3972909405197087, "grad_norm": 0.0, - "learning_rate": 1.3704443263375611e-05, - "loss": 0.9359, + "learning_rate": 1.372080695110662e-05, + "loss": 0.942, "step": 14020 }, { - "epoch": 0.3978717366628831, + "epoch": 0.39731927796197114, "grad_norm": 0.0, - "learning_rate": 1.3703589557773498e-05, - "loss": 0.8507, + "learning_rate": 1.3719955043087163e-05, + "loss": 0.9242, "step": 14021 }, { - "epoch": 0.397900113507378, + "epoch": 0.39734761540423363, "grad_norm": 0.0, - "learning_rate": 1.3702735820887001e-05, - "loss": 0.8394, + "learning_rate": 1.3719103103733147e-05, + "loss": 0.8948, "step": 14022 }, { - "epoch": 0.3979284903518729, + "epoch": 0.39737595284649607, "grad_norm": 0.0, - "learning_rate": 1.3701882052723328e-05, - "loss": 0.9782, + "learning_rate": 1.371825113305174e-05, + "loss": 0.7824, "step": 14023 }, { - "epoch": 0.39795686719636775, + "epoch": 0.3974042902887585, "grad_norm": 0.0, - "learning_rate": 1.3701028253289688e-05, - "loss": 0.8505, + "learning_rate": 1.3717399131050128e-05, + "loss": 0.9423, "step": 14024 }, { - "epoch": 0.39798524404086266, + "epoch": 0.397432627731021, "grad_norm": 0.0, - "learning_rate": 1.3700174422593298e-05, - "loss": 0.9926, + "learning_rate": 1.371654709773548e-05, + "loss": 0.9844, "step": 14025 }, { - "epoch": 0.3980136208853575, + "epoch": 0.39746096517328344, "grad_norm": 0.0, - "learning_rate": 1.3699320560641364e-05, - "loss": 0.9208, + "learning_rate": 1.3715695033114974e-05, + "loss": 0.8696, "step": 14026 }, { - "epoch": 0.39804199772985244, + "epoch": 0.39748930261554594, "grad_norm": 0.0, - "learning_rate": 1.369846666744111e-05, - "loss": 0.9676, + "learning_rate": 1.3714842937195794e-05, + "loss": 0.9614, "step": 14027 }, { - "epoch": 0.39807037457434735, + "epoch": 0.3975176400578084, "grad_norm": 0.0, - "learning_rate": 1.3697612742999738e-05, - "loss": 0.9296, + "learning_rate": 1.3713990809985109e-05, + "loss": 0.8737, "step": 14028 }, { - "epoch": 0.3980987514188422, + "epoch": 0.39754597750007087, "grad_norm": 0.0, - "learning_rate": 1.3696758787324464e-05, - "loss": 0.9475, + "learning_rate": 1.37131386514901e-05, + "loss": 0.7734, "step": 14029 }, { - "epoch": 0.3981271282633371, + "epoch": 0.3975743149423333, "grad_norm": 0.0, - "learning_rate": 1.3695904800422506e-05, - "loss": 0.9563, + "learning_rate": 1.3712286461717945e-05, + "loss": 0.8367, "step": 14030 }, { - "epoch": 0.398155505107832, + "epoch": 0.39760265238459574, "grad_norm": 0.0, - "learning_rate": 1.3695050782301068e-05, - "loss": 0.946, + "learning_rate": 1.3711434240675825e-05, + "loss": 0.925, "step": 14031 }, { - "epoch": 0.3981838819523269, + "epoch": 0.39763098982685824, "grad_norm": 0.0, - "learning_rate": 1.3694196732967378e-05, - "loss": 0.8388, + "learning_rate": 1.3710581988370915e-05, + "loss": 0.8752, "step": 14032 }, { - "epoch": 0.3982122587968218, + "epoch": 0.3976593272691207, "grad_norm": 0.0, - "learning_rate": 1.3693342652428637e-05, - "loss": 0.9332, + "learning_rate": 1.3709729704810396e-05, + "loss": 0.8803, "step": 14033 }, { - "epoch": 0.3982406356413167, + "epoch": 0.39768766471138317, "grad_norm": 0.0, - "learning_rate": 1.3692488540692064e-05, - "loss": 0.8176, + "learning_rate": 1.3708877390001442e-05, + "loss": 0.8501, "step": 14034 }, { - "epoch": 0.3982690124858116, + "epoch": 0.3977160021536456, "grad_norm": 0.0, - "learning_rate": 1.3691634397764875e-05, - "loss": 0.9229, + "learning_rate": 1.370802504395124e-05, + "loss": 0.8881, "step": 14035 }, { - "epoch": 0.39829738933030645, + "epoch": 0.39774433959590805, "grad_norm": 0.0, - "learning_rate": 1.3690780223654286e-05, - "loss": 0.8465, + "learning_rate": 1.3707172666666966e-05, + "loss": 0.9327, "step": 14036 }, { - "epoch": 0.39832576617480137, + "epoch": 0.39777267703817054, "grad_norm": 0.0, - "learning_rate": 1.368992601836751e-05, - "loss": 0.9351, + "learning_rate": 1.37063202581558e-05, + "loss": 0.972, "step": 14037 }, { - "epoch": 0.3983541430192963, + "epoch": 0.397801014480433, "grad_norm": 0.0, - "learning_rate": 1.3689071781911763e-05, - "loss": 0.9258, + "learning_rate": 1.370546781842492e-05, + "loss": 0.8451, "step": 14038 }, { - "epoch": 0.39838251986379114, + "epoch": 0.3978293519226955, "grad_norm": 0.0, - "learning_rate": 1.368821751429426e-05, - "loss": 0.8687, + "learning_rate": 1.3704615347481511e-05, + "loss": 0.8783, "step": 14039 }, { - "epoch": 0.39841089670828606, + "epoch": 0.3978576893649579, "grad_norm": 0.0, - "learning_rate": 1.3687363215522218e-05, - "loss": 0.8645, + "learning_rate": 1.370376284533275e-05, + "loss": 0.9165, "step": 14040 }, { - "epoch": 0.3984392735527809, + "epoch": 0.3978860268072204, "grad_norm": 0.0, - "learning_rate": 1.3686508885602852e-05, - "loss": 1.0078, + "learning_rate": 1.3702910311985822e-05, + "loss": 0.9142, "step": 14041 }, { - "epoch": 0.39846765039727583, + "epoch": 0.39791436424948284, "grad_norm": 0.0, - "learning_rate": 1.368565452454338e-05, - "loss": 0.8593, + "learning_rate": 1.3702057747447903e-05, + "loss": 0.9826, "step": 14042 }, { - "epoch": 0.3984960272417707, + "epoch": 0.3979427016917453, "grad_norm": 0.0, - "learning_rate": 1.3684800132351022e-05, - "loss": 0.8278, + "learning_rate": 1.3701205151726177e-05, + "loss": 0.8494, "step": 14043 }, { - "epoch": 0.3985244040862656, + "epoch": 0.3979710391340078, "grad_norm": 0.0, - "learning_rate": 1.3683945709032989e-05, - "loss": 0.9878, + "learning_rate": 1.3700352524827825e-05, + "loss": 0.9485, "step": 14044 }, { - "epoch": 0.3985527809307605, + "epoch": 0.3979993765762702, "grad_norm": 0.0, - "learning_rate": 1.36830912545965e-05, - "loss": 0.8791, + "learning_rate": 1.3699499866760032e-05, + "loss": 0.9487, "step": 14045 }, { - "epoch": 0.3985811577752554, + "epoch": 0.3980277140185327, "grad_norm": 0.0, - "learning_rate": 1.3682236769048776e-05, - "loss": 0.9576, + "learning_rate": 1.3698647177529974e-05, + "loss": 0.9937, "step": 14046 }, { - "epoch": 0.3986095346197503, + "epoch": 0.39805605146079515, "grad_norm": 0.0, - "learning_rate": 1.3681382252397033e-05, - "loss": 0.9208, + "learning_rate": 1.369779445714484e-05, + "loss": 0.9229, "step": 14047 }, { - "epoch": 0.39863791146424515, + "epoch": 0.3980843889030576, "grad_norm": 0.0, - "learning_rate": 1.3680527704648485e-05, - "loss": 0.9581, + "learning_rate": 1.3696941705611811e-05, + "loss": 0.9255, "step": 14048 }, { - "epoch": 0.39866628830874007, + "epoch": 0.3981127263453201, "grad_norm": 0.0, - "learning_rate": 1.3679673125810356e-05, - "loss": 0.9698, + "learning_rate": 1.3696088922938065e-05, + "loss": 0.9365, "step": 14049 }, { - "epoch": 0.398694665153235, + "epoch": 0.3981410637875825, "grad_norm": 0.0, - "learning_rate": 1.3678818515889863e-05, - "loss": 0.8327, + "learning_rate": 1.3695236109130792e-05, + "loss": 1.2091, "step": 14050 }, { - "epoch": 0.39872304199772984, + "epoch": 0.398169401229845, "grad_norm": 0.0, - "learning_rate": 1.3677963874894225e-05, - "loss": 1.0735, + "learning_rate": 1.3694383264197173e-05, + "loss": 0.9106, "step": 14051 }, { - "epoch": 0.39875141884222476, + "epoch": 0.39819773867210745, "grad_norm": 0.0, - "learning_rate": 1.3677109202830658e-05, - "loss": 0.8836, + "learning_rate": 1.3693530388144394e-05, + "loss": 0.9239, "step": 14052 }, { - "epoch": 0.3987797956867196, + "epoch": 0.39822607611436994, "grad_norm": 0.0, - "learning_rate": 1.3676254499706387e-05, - "loss": 0.8907, + "learning_rate": 1.3692677480979635e-05, + "loss": 0.8606, "step": 14053 }, { - "epoch": 0.39880817253121453, + "epoch": 0.3982544135566324, "grad_norm": 0.0, - "learning_rate": 1.367539976552863e-05, - "loss": 0.8814, + "learning_rate": 1.3691824542710082e-05, + "loss": 1.0208, "step": 14054 }, { - "epoch": 0.39883654937570945, + "epoch": 0.3982827509988948, "grad_norm": 0.0, - "learning_rate": 1.3674545000304602e-05, - "loss": 0.9973, + "learning_rate": 1.3690971573342921e-05, + "loss": 0.8534, "step": 14055 }, { - "epoch": 0.3988649262202043, + "epoch": 0.3983110884411573, "grad_norm": 0.0, - "learning_rate": 1.3673690204041531e-05, - "loss": 0.9298, + "learning_rate": 1.3690118572885334e-05, + "loss": 0.9748, "step": 14056 }, { - "epoch": 0.3988933030646992, + "epoch": 0.39833942588341975, "grad_norm": 0.0, - "learning_rate": 1.3672835376746631e-05, - "loss": 0.8771, + "learning_rate": 1.368926554134451e-05, + "loss": 0.903, "step": 14057 }, { - "epoch": 0.3989216799091941, + "epoch": 0.39836776332568224, "grad_norm": 0.0, - "learning_rate": 1.3671980518427126e-05, - "loss": 0.8794, + "learning_rate": 1.3688412478727633e-05, + "loss": 0.871, "step": 14058 }, { - "epoch": 0.398950056753689, + "epoch": 0.3983961007679447, "grad_norm": 0.0, - "learning_rate": 1.3671125629090239e-05, - "loss": 0.974, + "learning_rate": 1.3687559385041884e-05, + "loss": 1.0474, "step": 14059 }, { - "epoch": 0.39897843359818386, + "epoch": 0.3984244382102071, "grad_norm": 0.0, - "learning_rate": 1.3670270708743187e-05, - "loss": 0.8234, + "learning_rate": 1.3686706260294458e-05, + "loss": 0.9211, "step": 14060 }, { - "epoch": 0.39900681044267877, + "epoch": 0.3984527756524696, "grad_norm": 0.0, - "learning_rate": 1.3669415757393195e-05, - "loss": 0.7819, + "learning_rate": 1.3685853104492534e-05, + "loss": 0.9422, "step": 14061 }, { - "epoch": 0.3990351872871737, + "epoch": 0.39848111309473205, "grad_norm": 0.0, - "learning_rate": 1.3668560775047478e-05, - "loss": 0.9624, + "learning_rate": 1.36849999176433e-05, + "loss": 0.8847, "step": 14062 }, { - "epoch": 0.39906356413166855, + "epoch": 0.39850945053699455, "grad_norm": 0.0, - "learning_rate": 1.366770576171327e-05, - "loss": 0.9672, + "learning_rate": 1.3684146699753942e-05, + "loss": 0.8538, "step": 14063 }, { - "epoch": 0.39909194097616346, + "epoch": 0.398537787979257, "grad_norm": 0.0, - "learning_rate": 1.3666850717397783e-05, - "loss": 0.8745, + "learning_rate": 1.3683293450831649e-05, + "loss": 0.712, "step": 14064 }, { - "epoch": 0.3991203178206583, + "epoch": 0.3985661254215195, "grad_norm": 0.0, - "learning_rate": 1.366599564210824e-05, - "loss": 0.9058, + "learning_rate": 1.368244017088361e-05, + "loss": 1.0855, "step": 14065 }, { - "epoch": 0.39914869466515324, + "epoch": 0.3985944628637819, "grad_norm": 0.0, - "learning_rate": 1.366514053585187e-05, - "loss": 0.9412, + "learning_rate": 1.3681586859917011e-05, + "loss": 0.9648, "step": 14066 }, { - "epoch": 0.39917707150964815, + "epoch": 0.39862280030604436, "grad_norm": 0.0, - "learning_rate": 1.3664285398635895e-05, - "loss": 0.8697, + "learning_rate": 1.3680733517939034e-05, + "loss": 0.8914, "step": 14067 }, { - "epoch": 0.399205448354143, + "epoch": 0.39865113774830685, "grad_norm": 0.0, - "learning_rate": 1.3663430230467535e-05, - "loss": 0.9342, + "learning_rate": 1.3679880144956875e-05, + "loss": 0.9557, "step": 14068 }, { - "epoch": 0.3992338251986379, + "epoch": 0.3986794751905693, "grad_norm": 0.0, - "learning_rate": 1.3662575031354016e-05, - "loss": 0.924, + "learning_rate": 1.3679026740977717e-05, + "loss": 0.9586, "step": 14069 }, { - "epoch": 0.3992622020431328, + "epoch": 0.3987078126328318, "grad_norm": 0.0, - "learning_rate": 1.3661719801302562e-05, - "loss": 0.8571, + "learning_rate": 1.3678173306008753e-05, + "loss": 0.9272, "step": 14070 }, { - "epoch": 0.3992905788876277, + "epoch": 0.3987361500750942, "grad_norm": 0.0, - "learning_rate": 1.3660864540320392e-05, - "loss": 0.9515, + "learning_rate": 1.3677319840057166e-05, + "loss": 0.9431, "step": 14071 }, { - "epoch": 0.39931895573212256, + "epoch": 0.39876448751735666, "grad_norm": 0.0, - "learning_rate": 1.3660009248414735e-05, - "loss": 1.0246, + "learning_rate": 1.367646634313015e-05, + "loss": 0.895, "step": 14072 }, { - "epoch": 0.3993473325766175, + "epoch": 0.39879282495961915, "grad_norm": 0.0, - "learning_rate": 1.3659153925592821e-05, - "loss": 0.8429, + "learning_rate": 1.3675612815234896e-05, + "loss": 0.8965, "step": 14073 }, { - "epoch": 0.3993757094211124, + "epoch": 0.3988211624018816, "grad_norm": 0.0, - "learning_rate": 1.3658298571861866e-05, - "loss": 1.0394, + "learning_rate": 1.3674759256378585e-05, + "loss": 1.0139, "step": 14074 }, { - "epoch": 0.39940408626560725, + "epoch": 0.3988494998441441, "grad_norm": 0.0, - "learning_rate": 1.3657443187229098e-05, - "loss": 0.8828, + "learning_rate": 1.3673905666568414e-05, + "loss": 0.882, "step": 14075 }, { - "epoch": 0.39943246311010216, + "epoch": 0.3988778372864065, "grad_norm": 0.0, - "learning_rate": 1.3656587771701744e-05, - "loss": 0.9893, + "learning_rate": 1.3673052045811572e-05, + "loss": 0.9701, "step": 14076 }, { - "epoch": 0.399460839954597, + "epoch": 0.398906174728669, "grad_norm": 0.0, - "learning_rate": 1.3655732325287028e-05, - "loss": 0.8787, + "learning_rate": 1.367219839411525e-05, + "loss": 0.9451, "step": 14077 }, { - "epoch": 0.39948921679909194, + "epoch": 0.39893451217093145, "grad_norm": 0.0, - "learning_rate": 1.3654876847992176e-05, - "loss": 0.8988, + "learning_rate": 1.3671344711486638e-05, + "loss": 1.0214, "step": 14078 }, { - "epoch": 0.39951759364358685, + "epoch": 0.3989628496131939, "grad_norm": 0.0, - "learning_rate": 1.3654021339824417e-05, - "loss": 0.9186, + "learning_rate": 1.3670490997932922e-05, + "loss": 0.994, "step": 14079 }, { - "epoch": 0.3995459704880817, + "epoch": 0.3989911870554564, "grad_norm": 0.0, - "learning_rate": 1.3653165800790973e-05, - "loss": 0.9092, + "learning_rate": 1.36696372534613e-05, + "loss": 0.9168, "step": 14080 }, { - "epoch": 0.39957434733257663, + "epoch": 0.3990195244977188, "grad_norm": 0.0, - "learning_rate": 1.3652310230899074e-05, - "loss": 0.9479, + "learning_rate": 1.366878347807896e-05, + "loss": 0.9272, "step": 14081 }, { - "epoch": 0.3996027241770715, + "epoch": 0.3990478619399813, "grad_norm": 0.0, - "learning_rate": 1.3651454630155945e-05, - "loss": 0.9221, + "learning_rate": 1.3667929671793094e-05, + "loss": 0.9213, "step": 14082 }, { - "epoch": 0.3996311010215664, + "epoch": 0.39907619938224376, "grad_norm": 0.0, - "learning_rate": 1.3650598998568818e-05, - "loss": 0.844, + "learning_rate": 1.3667075834610894e-05, + "loss": 0.8736, "step": 14083 }, { - "epoch": 0.3996594778660613, + "epoch": 0.3991045368245062, "grad_norm": 0.0, - "learning_rate": 1.3649743336144917e-05, - "loss": 1.1044, + "learning_rate": 1.3666221966539554e-05, + "loss": 0.9565, "step": 14084 }, { - "epoch": 0.3996878547105562, + "epoch": 0.3991328742667687, "grad_norm": 0.0, - "learning_rate": 1.3648887642891466e-05, - "loss": 0.9187, + "learning_rate": 1.3665368067586267e-05, + "loss": 0.8549, "step": 14085 }, { - "epoch": 0.3997162315550511, + "epoch": 0.3991612117090311, "grad_norm": 0.0, - "learning_rate": 1.36480319188157e-05, - "loss": 0.9168, + "learning_rate": 1.366451413775822e-05, + "loss": 0.9449, "step": 14086 }, { - "epoch": 0.39974460839954595, + "epoch": 0.3991895491512936, "grad_norm": 0.0, - "learning_rate": 1.3647176163924846e-05, - "loss": 0.9726, + "learning_rate": 1.366366017706261e-05, + "loss": 0.7923, "step": 14087 }, { - "epoch": 0.39977298524404087, + "epoch": 0.39921788659355606, "grad_norm": 0.0, - "learning_rate": 1.3646320378226128e-05, - "loss": 0.8259, + "learning_rate": 1.366280618550663e-05, + "loss": 0.9202, "step": 14088 }, { - "epoch": 0.39980136208853573, + "epoch": 0.39924622403581855, "grad_norm": 0.0, - "learning_rate": 1.3645464561726779e-05, - "loss": 1.0223, + "learning_rate": 1.3661952163097474e-05, + "loss": 0.8476, "step": 14089 }, { - "epoch": 0.39982973893303064, + "epoch": 0.399274561478081, "grad_norm": 0.0, - "learning_rate": 1.3644608714434027e-05, - "loss": 0.8282, + "learning_rate": 1.3661098109842336e-05, + "loss": 0.9194, "step": 14090 }, { - "epoch": 0.39985811577752556, + "epoch": 0.39930289892034343, "grad_norm": 0.0, - "learning_rate": 1.3643752836355103e-05, - "loss": 0.9061, + "learning_rate": 1.366024402574841e-05, + "loss": 0.9679, "step": 14091 }, { - "epoch": 0.3998864926220204, + "epoch": 0.3993312363626059, "grad_norm": 0.0, - "learning_rate": 1.3642896927497228e-05, - "loss": 0.8778, + "learning_rate": 1.3659389910822887e-05, + "loss": 1.0038, "step": 14092 }, { - "epoch": 0.39991486946651533, + "epoch": 0.39935957380486836, "grad_norm": 0.0, - "learning_rate": 1.3642040987867647e-05, - "loss": 0.9539, + "learning_rate": 1.3658535765072964e-05, + "loss": 0.9896, "step": 14093 }, { - "epoch": 0.3999432463110102, + "epoch": 0.39938791124713086, "grad_norm": 0.0, - "learning_rate": 1.364118501747358e-05, - "loss": 0.9563, + "learning_rate": 1.3657681588505835e-05, + "loss": 0.9421, "step": 14094 }, { - "epoch": 0.3999716231555051, + "epoch": 0.3994162486893933, "grad_norm": 0.0, - "learning_rate": 1.3640329016322259e-05, - "loss": 0.8923, + "learning_rate": 1.3656827381128697e-05, + "loss": 0.913, "step": 14095 }, { - "epoch": 0.4, + "epoch": 0.39944458613165573, "grad_norm": 0.0, - "learning_rate": 1.3639472984420913e-05, - "loss": 0.9051, + "learning_rate": 1.3655973142948743e-05, + "loss": 1.038, "step": 14096 }, { - "epoch": 0.4000283768444949, + "epoch": 0.3994729235739182, "grad_norm": 0.0, - "learning_rate": 1.3638616921776775e-05, - "loss": 0.9249, + "learning_rate": 1.3655118873973172e-05, + "loss": 0.896, "step": 14097 }, { - "epoch": 0.4000567536889898, + "epoch": 0.39950126101618066, "grad_norm": 0.0, - "learning_rate": 1.363776082839708e-05, - "loss": 0.8635, + "learning_rate": 1.3654264574209175e-05, + "loss": 0.9427, "step": 14098 }, { - "epoch": 0.40008513053348466, + "epoch": 0.39952959845844316, "grad_norm": 0.0, - "learning_rate": 1.3636904704289053e-05, - "loss": 1.0508, + "learning_rate": 1.3653410243663953e-05, + "loss": 1.0154, "step": 14099 }, { - "epoch": 0.40011350737797957, + "epoch": 0.3995579359007056, "grad_norm": 0.0, - "learning_rate": 1.363604854945993e-05, - "loss": 0.951, + "learning_rate": 1.3652555882344696e-05, + "loss": 0.8216, "step": 14100 }, { - "epoch": 0.4001418842224745, + "epoch": 0.3995862733429681, "grad_norm": 0.0, - "learning_rate": 1.3635192363916941e-05, - "loss": 0.9568, + "learning_rate": 1.3651701490258607e-05, + "loss": 0.8541, "step": 14101 }, { - "epoch": 0.40017026106696935, + "epoch": 0.39961461078523053, "grad_norm": 0.0, - "learning_rate": 1.3634336147667317e-05, - "loss": 0.8249, + "learning_rate": 1.365084706741288e-05, + "loss": 0.8982, "step": 14102 }, { - "epoch": 0.40019863791146426, + "epoch": 0.39964294822749297, "grad_norm": 0.0, - "learning_rate": 1.3633479900718292e-05, - "loss": 0.9201, + "learning_rate": 1.3649992613814714e-05, + "loss": 0.8966, "step": 14103 }, { - "epoch": 0.4002270147559591, + "epoch": 0.39967128566975546, "grad_norm": 0.0, - "learning_rate": 1.3632623623077104e-05, - "loss": 0.9312, + "learning_rate": 1.3649138129471302e-05, + "loss": 0.9821, "step": 14104 }, { - "epoch": 0.40025539160045404, + "epoch": 0.3996996231120179, "grad_norm": 0.0, - "learning_rate": 1.3631767314750977e-05, - "loss": 0.9136, + "learning_rate": 1.3648283614389846e-05, + "loss": 1.02, "step": 14105 }, { - "epoch": 0.4002837684449489, + "epoch": 0.3997279605542804, "grad_norm": 0.0, - "learning_rate": 1.3630910975747146e-05, - "loss": 0.9041, + "learning_rate": 1.3647429068577544e-05, + "loss": 0.9926, "step": 14106 }, { - "epoch": 0.4003121452894438, + "epoch": 0.39975629799654283, "grad_norm": 0.0, - "learning_rate": 1.363005460607285e-05, - "loss": 0.8525, + "learning_rate": 1.364657449204159e-05, + "loss": 0.813, "step": 14107 }, { - "epoch": 0.4003405221339387, + "epoch": 0.39978463543880527, "grad_norm": 0.0, - "learning_rate": 1.362919820573532e-05, - "loss": 0.9945, + "learning_rate": 1.3645719884789184e-05, + "loss": 0.867, "step": 14108 }, { - "epoch": 0.4003688989784336, + "epoch": 0.39981297288106776, "grad_norm": 0.0, - "learning_rate": 1.3628341774741789e-05, - "loss": 0.8693, + "learning_rate": 1.3644865246827528e-05, + "loss": 0.9881, "step": 14109 }, { - "epoch": 0.4003972758229285, + "epoch": 0.3998413103233302, "grad_norm": 0.0, - "learning_rate": 1.3627485313099493e-05, - "loss": 0.9584, + "learning_rate": 1.3644010578163818e-05, + "loss": 0.8351, "step": 14110 }, { - "epoch": 0.40042565266742336, + "epoch": 0.3998696477655927, "grad_norm": 0.0, - "learning_rate": 1.3626628820815662e-05, - "loss": 0.8205, + "learning_rate": 1.3643155878805255e-05, + "loss": 0.8945, "step": 14111 }, { - "epoch": 0.4004540295119183, + "epoch": 0.39989798520785513, "grad_norm": 0.0, - "learning_rate": 1.3625772297897535e-05, - "loss": 0.9595, + "learning_rate": 1.3642301148759034e-05, + "loss": 0.9022, "step": 14112 }, { - "epoch": 0.4004824063564132, + "epoch": 0.3999263226501176, "grad_norm": 0.0, - "learning_rate": 1.3624915744352349e-05, - "loss": 0.8733, + "learning_rate": 1.364144638803236e-05, + "loss": 0.9179, "step": 14113 }, { - "epoch": 0.40051078320090805, + "epoch": 0.39995466009238007, "grad_norm": 0.0, - "learning_rate": 1.3624059160187337e-05, - "loss": 0.9373, + "learning_rate": 1.3640591596632429e-05, + "loss": 1.0213, "step": 14114 }, { - "epoch": 0.40053916004540296, + "epoch": 0.3999829975346425, "grad_norm": 0.0, - "learning_rate": 1.3623202545409733e-05, - "loss": 0.9684, + "learning_rate": 1.3639736774566445e-05, + "loss": 0.938, "step": 14115 }, { - "epoch": 0.4005675368898978, + "epoch": 0.400011334976905, "grad_norm": 0.0, - "learning_rate": 1.3622345900026774e-05, - "loss": 0.9533, + "learning_rate": 1.3638881921841607e-05, + "loss": 0.931, "step": 14116 }, { - "epoch": 0.40059591373439274, + "epoch": 0.40003967241916744, "grad_norm": 0.0, - "learning_rate": 1.3621489224045696e-05, - "loss": 0.9745, + "learning_rate": 1.3638027038465114e-05, + "loss": 0.9362, "step": 14117 }, { - "epoch": 0.40062429057888765, + "epoch": 0.40006800986142993, "grad_norm": 0.0, - "learning_rate": 1.3620632517473736e-05, - "loss": 0.9072, + "learning_rate": 1.3637172124444169e-05, + "loss": 0.9259, "step": 14118 }, { - "epoch": 0.4006526674233825, + "epoch": 0.40009634730369237, "grad_norm": 0.0, - "learning_rate": 1.361977578031813e-05, - "loss": 0.9825, + "learning_rate": 1.3636317179785972e-05, + "loss": 0.8566, "step": 14119 }, { - "epoch": 0.40068104426787743, + "epoch": 0.4001246847459548, "grad_norm": 0.0, - "learning_rate": 1.3618919012586116e-05, - "loss": 1.0121, + "learning_rate": 1.3635462204497724e-05, + "loss": 0.8392, "step": 14120 }, { - "epoch": 0.4007094211123723, + "epoch": 0.4001530221882173, "grad_norm": 0.0, - "learning_rate": 1.3618062214284931e-05, - "loss": 0.9414, + "learning_rate": 1.363460719858663e-05, + "loss": 0.9932, "step": 14121 }, { - "epoch": 0.4007377979568672, + "epoch": 0.40018135963047974, "grad_norm": 0.0, - "learning_rate": 1.361720538542181e-05, - "loss": 0.9224, + "learning_rate": 1.3633752162059888e-05, + "loss": 0.963, "step": 14122 }, { - "epoch": 0.40076617480136206, + "epoch": 0.40020969707274223, "grad_norm": 0.0, - "learning_rate": 1.3616348526003996e-05, - "loss": 0.9776, + "learning_rate": 1.3632897094924704e-05, + "loss": 0.9416, "step": 14123 }, { - "epoch": 0.400794551645857, + "epoch": 0.40023803451500467, "grad_norm": 0.0, - "learning_rate": 1.361549163603872e-05, - "loss": 0.7985, + "learning_rate": 1.3632041997188278e-05, + "loss": 0.9725, "step": 14124 }, { - "epoch": 0.4008229284903519, + "epoch": 0.40026637195726716, "grad_norm": 0.0, - "learning_rate": 1.3614634715533223e-05, + "learning_rate": 1.3631186868857813e-05, "loss": 1.0133, "step": 14125 }, { - "epoch": 0.40085130533484675, + "epoch": 0.4002947093995296, "grad_norm": 0.0, - "learning_rate": 1.3613777764494747e-05, - "loss": 0.9629, + "learning_rate": 1.3630331709940514e-05, + "loss": 0.9045, "step": 14126 }, { - "epoch": 0.40087968217934167, + "epoch": 0.40032304684179204, "grad_norm": 0.0, - "learning_rate": 1.3612920782930525e-05, - "loss": 0.9365, + "learning_rate": 1.362947652044358e-05, + "loss": 0.8735, "step": 14127 }, { - "epoch": 0.4009080590238365, + "epoch": 0.40035138428405453, "grad_norm": 0.0, - "learning_rate": 1.36120637708478e-05, - "loss": 1.0044, + "learning_rate": 1.362862130037422e-05, + "loss": 0.9176, "step": 14128 }, { - "epoch": 0.40093643586833144, + "epoch": 0.400379721726317, "grad_norm": 0.0, - "learning_rate": 1.3611206728253813e-05, - "loss": 0.9489, + "learning_rate": 1.3627766049739635e-05, + "loss": 0.863, "step": 14129 }, { - "epoch": 0.40096481271282636, + "epoch": 0.40040805916857947, "grad_norm": 0.0, - "learning_rate": 1.3610349655155797e-05, - "loss": 0.8655, + "learning_rate": 1.362691076854703e-05, + "loss": 0.8693, "step": 14130 }, { - "epoch": 0.4009931895573212, + "epoch": 0.4004363966108419, "grad_norm": 0.0, - "learning_rate": 1.3609492551560996e-05, - "loss": 1.0665, + "learning_rate": 1.3626055456803608e-05, + "loss": 0.9711, "step": 14131 }, { - "epoch": 0.40102156640181613, + "epoch": 0.40046473405310434, "grad_norm": 0.0, - "learning_rate": 1.3608635417476647e-05, - "loss": 0.952, + "learning_rate": 1.3625200114516574e-05, + "loss": 0.9105, "step": 14132 }, { - "epoch": 0.401049943246311, + "epoch": 0.40049307149536684, "grad_norm": 0.0, - "learning_rate": 1.3607778252909996e-05, - "loss": 0.9134, + "learning_rate": 1.3624344741693134e-05, + "loss": 0.8067, "step": 14133 }, { - "epoch": 0.4010783200908059, + "epoch": 0.4005214089376293, "grad_norm": 0.0, - "learning_rate": 1.3606921057868277e-05, - "loss": 0.9415, + "learning_rate": 1.3623489338340491e-05, + "loss": 0.9022, "step": 14134 }, { - "epoch": 0.4011066969353008, + "epoch": 0.40054974637989177, "grad_norm": 0.0, - "learning_rate": 1.3606063832358736e-05, - "loss": 0.862, + "learning_rate": 1.3622633904465855e-05, + "loss": 1.0006, "step": 14135 }, { - "epoch": 0.4011350737797957, + "epoch": 0.4005780838221542, "grad_norm": 0.0, - "learning_rate": 1.360520657638861e-05, - "loss": 0.8842, + "learning_rate": 1.3621778440076426e-05, + "loss": 0.8679, "step": 14136 }, { - "epoch": 0.4011634506242906, + "epoch": 0.4006064212644167, "grad_norm": 0.0, - "learning_rate": 1.3604349289965141e-05, - "loss": 0.8164, + "learning_rate": 1.3620922945179411e-05, + "loss": 0.8361, "step": 14137 }, { - "epoch": 0.40119182746878546, + "epoch": 0.40063475870667914, "grad_norm": 0.0, - "learning_rate": 1.3603491973095575e-05, - "loss": 1.0043, + "learning_rate": 1.3620067419782019e-05, + "loss": 0.9717, "step": 14138 }, { - "epoch": 0.40122020431328037, + "epoch": 0.4006630961489416, "grad_norm": 0.0, - "learning_rate": 1.3602634625787147e-05, - "loss": 0.8722, + "learning_rate": 1.3619211863891458e-05, + "loss": 0.9219, "step": 14139 }, { - "epoch": 0.40124858115777523, + "epoch": 0.40069143359120407, "grad_norm": 0.0, - "learning_rate": 1.3601777248047105e-05, - "loss": 0.8312, + "learning_rate": 1.3618356277514924e-05, + "loss": 0.94, "step": 14140 }, { - "epoch": 0.40127695800227015, + "epoch": 0.4007197710334665, "grad_norm": 0.0, - "learning_rate": 1.3600919839882687e-05, - "loss": 0.9265, + "learning_rate": 1.3617500660659635e-05, + "loss": 0.8554, "step": 14141 }, { - "epoch": 0.40130533484676506, + "epoch": 0.400748108475729, "grad_norm": 0.0, - "learning_rate": 1.3600062401301139e-05, - "loss": 0.8938, + "learning_rate": 1.3616645013332796e-05, + "loss": 0.8416, "step": 14142 }, { - "epoch": 0.4013337116912599, + "epoch": 0.40077644591799144, "grad_norm": 0.0, - "learning_rate": 1.35992049323097e-05, - "loss": 0.9271, + "learning_rate": 1.3615789335541613e-05, + "loss": 0.8813, "step": 14143 }, { - "epoch": 0.40136208853575484, + "epoch": 0.4008047833602539, "grad_norm": 0.0, - "learning_rate": 1.3598347432915617e-05, - "loss": 0.8718, + "learning_rate": 1.3614933627293294e-05, + "loss": 1.0104, "step": 14144 }, { - "epoch": 0.4013904653802497, + "epoch": 0.4008331208025164, "grad_norm": 0.0, - "learning_rate": 1.359748990312613e-05, - "loss": 0.897, + "learning_rate": 1.3614077888595047e-05, + "loss": 0.9317, "step": 14145 }, { - "epoch": 0.4014188422247446, + "epoch": 0.4008614582447788, "grad_norm": 0.0, - "learning_rate": 1.3596632342948484e-05, - "loss": 1.043, + "learning_rate": 1.3613222119454077e-05, + "loss": 0.9232, "step": 14146 }, { - "epoch": 0.4014472190692395, + "epoch": 0.4008897956870413, "grad_norm": 0.0, - "learning_rate": 1.3595774752389926e-05, - "loss": 0.8918, + "learning_rate": 1.3612366319877597e-05, + "loss": 0.9533, "step": 14147 }, { - "epoch": 0.4014755959137344, + "epoch": 0.40091813312930374, "grad_norm": 0.0, - "learning_rate": 1.3594917131457695e-05, - "loss": 0.9951, + "learning_rate": 1.3611510489872815e-05, + "loss": 0.8813, "step": 14148 }, { - "epoch": 0.4015039727582293, + "epoch": 0.40094647057156624, "grad_norm": 0.0, - "learning_rate": 1.3594059480159034e-05, - "loss": 0.9488, + "learning_rate": 1.3610654629446938e-05, + "loss": 0.8627, "step": 14149 }, { - "epoch": 0.40153234960272416, + "epoch": 0.4009748080138287, "grad_norm": 0.0, - "learning_rate": 1.3593201798501192e-05, - "loss": 0.8777, + "learning_rate": 1.3609798738607176e-05, + "loss": 0.8726, "step": 14150 }, { - "epoch": 0.4015607264472191, + "epoch": 0.4010031454560911, "grad_norm": 0.0, - "learning_rate": 1.3592344086491416e-05, - "loss": 1.0378, + "learning_rate": 1.360894281736074e-05, + "loss": 0.8224, "step": 14151 }, { - "epoch": 0.401589103291714, + "epoch": 0.4010314828983536, "grad_norm": 0.0, - "learning_rate": 1.3591486344136949e-05, - "loss": 0.8334, + "learning_rate": 1.3608086865714838e-05, + "loss": 0.9429, "step": 14152 }, { - "epoch": 0.40161748013620885, + "epoch": 0.40105982034061605, "grad_norm": 0.0, - "learning_rate": 1.3590628571445033e-05, - "loss": 0.8509, + "learning_rate": 1.3607230883676678e-05, + "loss": 0.7753, "step": 14153 }, { - "epoch": 0.40164585698070376, + "epoch": 0.40108815778287854, "grad_norm": 0.0, - "learning_rate": 1.3589770768422916e-05, - "loss": 0.9514, + "learning_rate": 1.3606374871253474e-05, + "loss": 1.0494, "step": 14154 }, { - "epoch": 0.4016742338251986, + "epoch": 0.401116495225141, "grad_norm": 0.0, - "learning_rate": 1.3588912935077845e-05, - "loss": 0.9347, + "learning_rate": 1.3605518828452437e-05, + "loss": 0.8298, "step": 14155 }, { - "epoch": 0.40170261066969354, + "epoch": 0.4011448326674034, "grad_norm": 0.0, - "learning_rate": 1.3588055071417063e-05, - "loss": 0.9684, + "learning_rate": 1.3604662755280775e-05, + "loss": 0.8623, "step": 14156 }, { - "epoch": 0.4017309875141884, + "epoch": 0.4011731701096659, "grad_norm": 0.0, - "learning_rate": 1.3587197177447817e-05, - "loss": 0.9735, + "learning_rate": 1.3603806651745701e-05, + "loss": 0.9326, "step": 14157 }, { - "epoch": 0.4017593643586833, + "epoch": 0.40120150755192835, "grad_norm": 0.0, - "learning_rate": 1.3586339253177361e-05, - "loss": 0.8907, + "learning_rate": 1.3602950517854426e-05, + "loss": 0.9349, "step": 14158 }, { - "epoch": 0.40178774120317823, + "epoch": 0.40122984499419084, "grad_norm": 0.0, - "learning_rate": 1.3585481298612933e-05, - "loss": 1.0084, + "learning_rate": 1.360209435361416e-05, + "loss": 0.8934, "step": 14159 }, { - "epoch": 0.4018161180476731, + "epoch": 0.4012581824364533, "grad_norm": 0.0, - "learning_rate": 1.3584623313761783e-05, - "loss": 0.9351, + "learning_rate": 1.3601238159032113e-05, + "loss": 0.8932, "step": 14160 }, { - "epoch": 0.401844494892168, + "epoch": 0.4012865198787157, "grad_norm": 0.0, - "learning_rate": 1.358376529863116e-05, - "loss": 1.0032, + "learning_rate": 1.3600381934115502e-05, + "loss": 0.9014, "step": 14161 }, { - "epoch": 0.40187287173666286, + "epoch": 0.4013148573209782, "grad_norm": 0.0, - "learning_rate": 1.358290725322831e-05, - "loss": 0.8275, + "learning_rate": 1.3599525678871536e-05, + "loss": 0.9355, "step": 14162 }, { - "epoch": 0.4019012485811578, + "epoch": 0.40134319476324065, "grad_norm": 0.0, - "learning_rate": 1.358204917756048e-05, - "loss": 0.9126, + "learning_rate": 1.3598669393307429e-05, + "loss": 0.9135, "step": 14163 }, { - "epoch": 0.4019296254256527, + "epoch": 0.40137153220550315, "grad_norm": 0.0, - "learning_rate": 1.3581191071634918e-05, - "loss": 0.9272, + "learning_rate": 1.3597813077430395e-05, + "loss": 0.9896, "step": 14164 }, { - "epoch": 0.40195800227014755, + "epoch": 0.4013998696477656, "grad_norm": 0.0, - "learning_rate": 1.3580332935458878e-05, - "loss": 0.9374, + "learning_rate": 1.3596956731247646e-05, + "loss": 0.8488, "step": 14165 }, { - "epoch": 0.40198637911464247, + "epoch": 0.4014282070900281, "grad_norm": 0.0, - "learning_rate": 1.3579474769039602e-05, - "loss": 0.9744, + "learning_rate": 1.359610035476639e-05, + "loss": 0.9747, "step": 14166 }, { - "epoch": 0.4020147559591373, + "epoch": 0.4014565445322905, "grad_norm": 0.0, - "learning_rate": 1.3578616572384345e-05, - "loss": 0.8313, + "learning_rate": 1.3595243947993848e-05, + "loss": 0.9061, "step": 14167 }, { - "epoch": 0.40204313280363224, + "epoch": 0.40148488197455295, "grad_norm": 0.0, - "learning_rate": 1.357775834550035e-05, - "loss": 0.993, + "learning_rate": 1.3594387510937232e-05, + "loss": 1.0276, "step": 14168 }, { - "epoch": 0.4020715096481271, + "epoch": 0.40151321941681545, "grad_norm": 0.0, - "learning_rate": 1.3576900088394871e-05, - "loss": 0.9125, + "learning_rate": 1.3593531043603756e-05, + "loss": 0.9131, "step": 14169 }, { - "epoch": 0.402099886492622, + "epoch": 0.4015415568590779, "grad_norm": 0.0, - "learning_rate": 1.3576041801075154e-05, - "loss": 0.9073, + "learning_rate": 1.3592674546000631e-05, + "loss": 0.8212, "step": 14170 }, { - "epoch": 0.40212826333711693, + "epoch": 0.4015698943013404, "grad_norm": 0.0, - "learning_rate": 1.3575183483548453e-05, - "loss": 0.9034, + "learning_rate": 1.3591818018135077e-05, + "loss": 0.9278, "step": 14171 }, { - "epoch": 0.4021566401816118, + "epoch": 0.4015982317436028, "grad_norm": 0.0, - "learning_rate": 1.3574325135822016e-05, - "loss": 0.9525, + "learning_rate": 1.3590961460014304e-05, + "loss": 0.9939, "step": 14172 }, { - "epoch": 0.4021850170261067, + "epoch": 0.40162656918586526, "grad_norm": 0.0, - "learning_rate": 1.3573466757903094e-05, - "loss": 0.9802, + "learning_rate": 1.3590104871645529e-05, + "loss": 1.0382, "step": 14173 }, { - "epoch": 0.40221339387060157, + "epoch": 0.40165490662812775, "grad_norm": 0.0, - "learning_rate": 1.3572608349798937e-05, - "loss": 0.8829, + "learning_rate": 1.3589248253035967e-05, + "loss": 0.9325, "step": 14174 }, { - "epoch": 0.4022417707150965, + "epoch": 0.4016832440703902, "grad_norm": 0.0, - "learning_rate": 1.3571749911516798e-05, - "loss": 0.7846, + "learning_rate": 1.3588391604192834e-05, + "loss": 0.8144, "step": 14175 }, { - "epoch": 0.4022701475595914, + "epoch": 0.4017115815126527, "grad_norm": 0.0, - "learning_rate": 1.3570891443063925e-05, - "loss": 1.0746, + "learning_rate": 1.3587534925123349e-05, + "loss": 0.9324, "step": 14176 }, { - "epoch": 0.40229852440408626, + "epoch": 0.4017399189549151, "grad_norm": 0.0, - "learning_rate": 1.3570032944447574e-05, - "loss": 0.9413, + "learning_rate": 1.3586678215834725e-05, + "loss": 0.89, "step": 14177 }, { - "epoch": 0.40232690124858117, + "epoch": 0.4017682563971776, "grad_norm": 0.0, - "learning_rate": 1.3569174415674993e-05, - "loss": 0.8506, + "learning_rate": 1.3585821476334176e-05, + "loss": 0.8007, "step": 14178 }, { - "epoch": 0.40235527809307603, + "epoch": 0.40179659383944005, "grad_norm": 0.0, - "learning_rate": 1.3568315856753436e-05, - "loss": 0.8839, + "learning_rate": 1.3584964706628923e-05, + "loss": 0.9769, "step": 14179 }, { - "epoch": 0.40238365493757094, + "epoch": 0.4018249312817025, "grad_norm": 0.0, - "learning_rate": 1.3567457267690152e-05, - "loss": 0.8998, + "learning_rate": 1.3584107906726178e-05, + "loss": 0.8991, "step": 14180 }, { - "epoch": 0.40241203178206586, + "epoch": 0.401853268723965, "grad_norm": 0.0, - "learning_rate": 1.3566598648492398e-05, - "loss": 1.0348, + "learning_rate": 1.3583251076633163e-05, + "loss": 0.8006, "step": 14181 }, { - "epoch": 0.4024404086265607, + "epoch": 0.4018816061662274, "grad_norm": 0.0, - "learning_rate": 1.3565739999167425e-05, - "loss": 0.9623, + "learning_rate": 1.3582394216357095e-05, + "loss": 0.8233, "step": 14182 }, { - "epoch": 0.40246878547105563, + "epoch": 0.4019099436084899, "grad_norm": 0.0, - "learning_rate": 1.3564881319722485e-05, - "loss": 0.9379, + "learning_rate": 1.3581537325905188e-05, + "loss": 0.9228, "step": 14183 }, { - "epoch": 0.4024971623155505, + "epoch": 0.40193828105075236, "grad_norm": 0.0, - "learning_rate": 1.3564022610164833e-05, - "loss": 0.8325, + "learning_rate": 1.3580680405284666e-05, + "loss": 0.9182, "step": 14184 }, { - "epoch": 0.4025255391600454, + "epoch": 0.4019666184930148, "grad_norm": 0.0, - "learning_rate": 1.3563163870501723e-05, - "loss": 0.8933, + "learning_rate": 1.357982345450274e-05, + "loss": 0.8699, "step": 14185 }, { - "epoch": 0.40255391600454027, + "epoch": 0.4019949559352773, "grad_norm": 0.0, - "learning_rate": 1.3562305100740404e-05, - "loss": 0.9225, + "learning_rate": 1.3578966473566631e-05, + "loss": 0.962, "step": 14186 }, { - "epoch": 0.4025822928490352, + "epoch": 0.4020232933775397, "grad_norm": 0.0, - "learning_rate": 1.3561446300888137e-05, - "loss": 0.9133, + "learning_rate": 1.3578109462483558e-05, + "loss": 0.9676, "step": 14187 }, { - "epoch": 0.4026106696935301, + "epoch": 0.4020516308198022, "grad_norm": 0.0, - "learning_rate": 1.3560587470952172e-05, - "loss": 0.9233, + "learning_rate": 1.3577252421260742e-05, + "loss": 0.9103, "step": 14188 }, { - "epoch": 0.40263904653802496, + "epoch": 0.40207996826206466, "grad_norm": 0.0, - "learning_rate": 1.3559728610939767e-05, - "loss": 0.9408, + "learning_rate": 1.3576395349905403e-05, + "loss": 0.8462, "step": 14189 }, { - "epoch": 0.4026674233825199, + "epoch": 0.40210830570432715, "grad_norm": 0.0, - "learning_rate": 1.3558869720858167e-05, - "loss": 0.8918, + "learning_rate": 1.3575538248424756e-05, + "loss": 0.9589, "step": 14190 }, { - "epoch": 0.40269580022701473, + "epoch": 0.4021366431465896, "grad_norm": 0.0, - "learning_rate": 1.3558010800714643e-05, - "loss": 0.8717, + "learning_rate": 1.3574681116826018e-05, + "loss": 0.8332, "step": 14191 }, { - "epoch": 0.40272417707150965, + "epoch": 0.40216498058885203, "grad_norm": 0.0, - "learning_rate": 1.3557151850516439e-05, - "loss": 1.0041, + "learning_rate": 1.3573823955116416e-05, + "loss": 0.9613, "step": 14192 }, { - "epoch": 0.40275255391600456, + "epoch": 0.4021933180311145, "grad_norm": 0.0, - "learning_rate": 1.355629287027081e-05, - "loss": 0.866, + "learning_rate": 1.357296676330317e-05, + "loss": 0.8397, "step": 14193 }, { - "epoch": 0.4027809307604994, + "epoch": 0.40222165547337696, "grad_norm": 0.0, - "learning_rate": 1.3555433859985019e-05, - "loss": 1.0156, + "learning_rate": 1.3572109541393498e-05, + "loss": 0.9776, "step": 14194 }, { - "epoch": 0.40280930760499434, + "epoch": 0.40224999291563945, "grad_norm": 0.0, - "learning_rate": 1.3554574819666315e-05, - "loss": 0.8217, + "learning_rate": 1.357125228939462e-05, + "loss": 0.8405, "step": 14195 }, { - "epoch": 0.4028376844494892, + "epoch": 0.4022783303579019, "grad_norm": 0.0, - "learning_rate": 1.3553715749321962e-05, - "loss": 1.026, + "learning_rate": 1.357039500731376e-05, + "loss": 0.9564, "step": 14196 }, { - "epoch": 0.4028660612939841, + "epoch": 0.40230666780016433, "grad_norm": 0.0, - "learning_rate": 1.355285664895921e-05, - "loss": 0.9281, + "learning_rate": 1.3569537695158135e-05, + "loss": 0.8991, "step": 14197 }, { - "epoch": 0.402894438138479, + "epoch": 0.4023350052424268, "grad_norm": 0.0, - "learning_rate": 1.3551997518585317e-05, - "loss": 0.8234, + "learning_rate": 1.3568680352934966e-05, + "loss": 0.9195, "step": 14198 }, { - "epoch": 0.4029228149829739, + "epoch": 0.40236334268468926, "grad_norm": 0.0, - "learning_rate": 1.3551138358207545e-05, - "loss": 0.8779, + "learning_rate": 1.3567822980651481e-05, + "loss": 1.0309, "step": 14199 }, { - "epoch": 0.4029511918274688, + "epoch": 0.40239168012695176, "grad_norm": 0.0, - "learning_rate": 1.3550279167833145e-05, - "loss": 0.9014, + "learning_rate": 1.3566965578314897e-05, + "loss": 0.8263, "step": 14200 }, { - "epoch": 0.40297956867196366, + "epoch": 0.4024200175692142, "grad_norm": 0.0, - "learning_rate": 1.3549419947469377e-05, - "loss": 0.9302, + "learning_rate": 1.3566108145932437e-05, + "loss": 1.0235, "step": 14201 }, { - "epoch": 0.4030079455164586, + "epoch": 0.4024483550114767, "grad_norm": 0.0, - "learning_rate": 1.3548560697123501e-05, - "loss": 0.9576, + "learning_rate": 1.3565250683511324e-05, + "loss": 0.9621, "step": 14202 }, { - "epoch": 0.40303632236095344, + "epoch": 0.4024766924537391, "grad_norm": 0.0, - "learning_rate": 1.3547701416802772e-05, - "loss": 0.9485, + "learning_rate": 1.3564393191058782e-05, + "loss": 0.8431, "step": 14203 }, { - "epoch": 0.40306469920544835, + "epoch": 0.40250502989600156, "grad_norm": 0.0, - "learning_rate": 1.3546842106514448e-05, - "loss": 0.916, + "learning_rate": 1.356353566858203e-05, + "loss": 0.9212, "step": 14204 }, { - "epoch": 0.40309307604994327, + "epoch": 0.40253336733826406, "grad_norm": 0.0, - "learning_rate": 1.3545982766265792e-05, - "loss": 0.8469, + "learning_rate": 1.3562678116088294e-05, + "loss": 0.9762, "step": 14205 }, { - "epoch": 0.4031214528944381, + "epoch": 0.4025617047805265, "grad_norm": 0.0, - "learning_rate": 1.354512339606406e-05, - "loss": 0.8489, + "learning_rate": 1.35618205335848e-05, + "loss": 0.9276, "step": 14206 }, { - "epoch": 0.40314982973893304, + "epoch": 0.402590042222789, "grad_norm": 0.0, - "learning_rate": 1.3544263995916508e-05, - "loss": 0.963, + "learning_rate": 1.3560962921078766e-05, + "loss": 0.9356, "step": 14207 }, { - "epoch": 0.4031782065834279, + "epoch": 0.40261837966505143, "grad_norm": 0.0, - "learning_rate": 1.3543404565830403e-05, - "loss": 0.7941, + "learning_rate": 1.356010527857742e-05, + "loss": 0.938, "step": 14208 }, { - "epoch": 0.4032065834279228, + "epoch": 0.40264671710731387, "grad_norm": 0.0, - "learning_rate": 1.3542545105813002e-05, - "loss": 0.8193, + "learning_rate": 1.3559247606087987e-05, + "loss": 0.9287, "step": 14209 }, { - "epoch": 0.40323496027241773, + "epoch": 0.40267505454957636, "grad_norm": 0.0, - "learning_rate": 1.3541685615871556e-05, - "loss": 0.8937, + "learning_rate": 1.3558389903617688e-05, + "loss": 1.0397, "step": 14210 }, { - "epoch": 0.4032633371169126, + "epoch": 0.4027033919918388, "grad_norm": 0.0, - "learning_rate": 1.3540826096013335e-05, - "loss": 0.95, + "learning_rate": 1.3557532171173749e-05, + "loss": 1.0251, "step": 14211 }, { - "epoch": 0.4032917139614075, + "epoch": 0.4027317294341013, "grad_norm": 0.0, - "learning_rate": 1.35399665462456e-05, - "loss": 0.8317, + "learning_rate": 1.3556674408763395e-05, + "loss": 1.0469, "step": 14212 }, { - "epoch": 0.40332009080590236, + "epoch": 0.40276006687636373, "grad_norm": 0.0, - "learning_rate": 1.3539106966575605e-05, - "loss": 0.8074, + "learning_rate": 1.3555816616393852e-05, + "loss": 0.9745, "step": 14213 }, { - "epoch": 0.4033484676503973, + "epoch": 0.4027884043186262, "grad_norm": 0.0, - "learning_rate": 1.3538247357010617e-05, - "loss": 0.8472, + "learning_rate": 1.3554958794072346e-05, + "loss": 0.7762, "step": 14214 }, { - "epoch": 0.4033768444948922, + "epoch": 0.40281674176088866, "grad_norm": 0.0, - "learning_rate": 1.3537387717557893e-05, - "loss": 0.8694, + "learning_rate": 1.3554100941806103e-05, + "loss": 0.9626, "step": 14215 }, { - "epoch": 0.40340522133938705, + "epoch": 0.4028450792031511, "grad_norm": 0.0, - "learning_rate": 1.3536528048224698e-05, - "loss": 0.9474, + "learning_rate": 1.3553243059602346e-05, + "loss": 1.0765, "step": 14216 }, { - "epoch": 0.40343359818388197, + "epoch": 0.4028734166454136, "grad_norm": 0.0, - "learning_rate": 1.3535668349018287e-05, - "loss": 0.9844, + "learning_rate": 1.3552385147468306e-05, + "loss": 0.9432, "step": 14217 }, { - "epoch": 0.40346197502837683, + "epoch": 0.40290175408767603, "grad_norm": 0.0, - "learning_rate": 1.353480861994593e-05, - "loss": 0.9279, + "learning_rate": 1.3551527205411203e-05, + "loss": 0.9427, "step": 14218 }, { - "epoch": 0.40349035187287174, + "epoch": 0.4029300915299385, "grad_norm": 0.0, - "learning_rate": 1.3533948861014885e-05, - "loss": 1.0409, + "learning_rate": 1.3550669233438271e-05, + "loss": 1.0587, "step": 14219 }, { - "epoch": 0.4035187287173666, + "epoch": 0.40295842897220097, "grad_norm": 0.0, - "learning_rate": 1.3533089072232413e-05, - "loss": 0.9995, + "learning_rate": 1.354981123155673e-05, + "loss": 0.869, "step": 14220 }, { - "epoch": 0.4035471055618615, + "epoch": 0.4029867664144634, "grad_norm": 0.0, - "learning_rate": 1.3532229253605783e-05, - "loss": 0.8368, + "learning_rate": 1.3548953199773813e-05, + "loss": 1.0157, "step": 14221 }, { - "epoch": 0.40357548240635643, + "epoch": 0.4030151038567259, "grad_norm": 0.0, - "learning_rate": 1.3531369405142251e-05, - "loss": 0.9133, + "learning_rate": 1.3548095138096746e-05, + "loss": 0.8989, "step": 14222 }, { - "epoch": 0.4036038592508513, + "epoch": 0.40304344129898834, "grad_norm": 0.0, - "learning_rate": 1.3530509526849086e-05, - "loss": 0.9565, + "learning_rate": 1.3547237046532757e-05, + "loss": 0.8862, "step": 14223 }, { - "epoch": 0.4036322360953462, + "epoch": 0.40307177874125083, "grad_norm": 0.0, - "learning_rate": 1.3529649618733545e-05, - "loss": 0.8373, + "learning_rate": 1.3546378925089069e-05, + "loss": 0.9902, "step": 14224 }, { - "epoch": 0.40366061293984107, + "epoch": 0.40310011618351327, "grad_norm": 0.0, - "learning_rate": 1.3528789680802896e-05, - "loss": 0.9486, + "learning_rate": 1.3545520773772916e-05, + "loss": 0.8855, "step": 14225 }, { - "epoch": 0.403688989784336, + "epoch": 0.40312845362577576, "grad_norm": 0.0, - "learning_rate": 1.3527929713064401e-05, - "loss": 0.909, + "learning_rate": 1.3544662592591526e-05, + "loss": 0.9354, "step": 14226 }, { - "epoch": 0.4037173666288309, + "epoch": 0.4031567910680382, "grad_norm": 0.0, - "learning_rate": 1.3527069715525322e-05, - "loss": 0.9018, + "learning_rate": 1.3543804381552129e-05, + "loss": 1.015, "step": 14227 }, { - "epoch": 0.40374574347332576, + "epoch": 0.40318512851030064, "grad_norm": 0.0, - "learning_rate": 1.3526209688192934e-05, - "loss": 0.925, + "learning_rate": 1.3542946140661948e-05, + "loss": 0.8431, "step": 14228 }, { - "epoch": 0.4037741203178207, + "epoch": 0.40321346595256313, "grad_norm": 0.0, - "learning_rate": 1.352534963107449e-05, - "loss": 0.8229, + "learning_rate": 1.3542087869928215e-05, + "loss": 0.8925, "step": 14229 }, { - "epoch": 0.40380249716231553, + "epoch": 0.40324180339482557, "grad_norm": 0.0, - "learning_rate": 1.352448954417726e-05, - "loss": 0.9642, + "learning_rate": 1.3541229569358165e-05, + "loss": 0.8867, "step": 14230 }, { - "epoch": 0.40383087400681045, + "epoch": 0.40327014083708806, "grad_norm": 0.0, - "learning_rate": 1.352362942750851e-05, - "loss": 1.0441, + "learning_rate": 1.354037123895902e-05, + "loss": 0.9425, "step": 14231 }, { - "epoch": 0.40385925085130536, + "epoch": 0.4032984782793505, "grad_norm": 0.0, - "learning_rate": 1.3522769281075502e-05, - "loss": 0.9313, + "learning_rate": 1.3539512878738015e-05, + "loss": 0.8467, "step": 14232 }, { - "epoch": 0.4038876276958002, + "epoch": 0.40332681572161294, "grad_norm": 0.0, - "learning_rate": 1.3521909104885503e-05, - "loss": 0.9449, + "learning_rate": 1.3538654488702376e-05, + "loss": 0.992, "step": 14233 }, { - "epoch": 0.40391600454029514, + "epoch": 0.40335515316387544, "grad_norm": 0.0, - "learning_rate": 1.3521048898945778e-05, - "loss": 0.9097, + "learning_rate": 1.3537796068859339e-05, + "loss": 0.9846, "step": 14234 }, { - "epoch": 0.40394438138479, + "epoch": 0.4033834906061379, "grad_norm": 0.0, - "learning_rate": 1.3520188663263598e-05, - "loss": 0.8728, + "learning_rate": 1.3536937619216133e-05, + "loss": 0.9659, "step": 14235 }, { - "epoch": 0.4039727582292849, + "epoch": 0.40341182804840037, "grad_norm": 0.0, - "learning_rate": 1.3519328397846225e-05, - "loss": 0.9063, + "learning_rate": 1.3536079139779987e-05, + "loss": 1.0065, "step": 14236 }, { - "epoch": 0.40400113507377977, + "epoch": 0.4034401654906628, "grad_norm": 0.0, - "learning_rate": 1.3518468102700926e-05, - "loss": 1.0174, + "learning_rate": 1.353522063055813e-05, + "loss": 0.9823, "step": 14237 }, { - "epoch": 0.4040295119182747, + "epoch": 0.4034685029329253, "grad_norm": 0.0, - "learning_rate": 1.3517607777834972e-05, - "loss": 0.8932, + "learning_rate": 1.3534362091557798e-05, + "loss": 0.8812, "step": 14238 }, { - "epoch": 0.4040578887627696, + "epoch": 0.40349684037518774, "grad_norm": 0.0, - "learning_rate": 1.3516747423255626e-05, - "loss": 1.0027, + "learning_rate": 1.3533503522786224e-05, + "loss": 0.9356, "step": 14239 }, { - "epoch": 0.40408626560726446, + "epoch": 0.4035251778174502, "grad_norm": 0.0, - "learning_rate": 1.351588703897015e-05, - "loss": 0.9352, + "learning_rate": 1.3532644924250638e-05, + "loss": 0.9821, "step": 14240 }, { - "epoch": 0.4041146424517594, + "epoch": 0.40355351525971267, "grad_norm": 0.0, - "learning_rate": 1.3515026624985823e-05, - "loss": 0.939, + "learning_rate": 1.3531786295958268e-05, + "loss": 0.8544, "step": 14241 }, { - "epoch": 0.40414301929625424, + "epoch": 0.4035818527019751, "grad_norm": 0.0, - "learning_rate": 1.3514166181309908e-05, - "loss": 0.9959, + "learning_rate": 1.3530927637916356e-05, + "loss": 0.97, "step": 14242 }, { - "epoch": 0.40417139614074915, + "epoch": 0.4036101901442376, "grad_norm": 0.0, - "learning_rate": 1.3513305707949675e-05, - "loss": 0.7712, + "learning_rate": 1.3530068950132127e-05, + "loss": 0.8834, "step": 14243 }, { - "epoch": 0.40419977298524407, + "epoch": 0.40363852758650004, "grad_norm": 0.0, - "learning_rate": 1.3512445204912387e-05, - "loss": 1.0507, + "learning_rate": 1.3529210232612815e-05, + "loss": 0.9998, "step": 14244 }, { - "epoch": 0.4042281498297389, + "epoch": 0.4036668650287625, "grad_norm": 0.0, - "learning_rate": 1.351158467220532e-05, - "loss": 0.8724, + "learning_rate": 1.3528351485365654e-05, + "loss": 0.9494, "step": 14245 }, { - "epoch": 0.40425652667423384, + "epoch": 0.40369520247102497, "grad_norm": 0.0, - "learning_rate": 1.351072410983574e-05, - "loss": 0.9167, + "learning_rate": 1.3527492708397881e-05, + "loss": 1.0204, "step": 14246 }, { - "epoch": 0.4042849035187287, + "epoch": 0.4037235399132874, "grad_norm": 0.0, - "learning_rate": 1.3509863517810913e-05, - "loss": 0.8942, + "learning_rate": 1.3526633901716726e-05, + "loss": 1.0113, "step": 14247 }, { - "epoch": 0.4043132803632236, + "epoch": 0.4037518773555499, "grad_norm": 0.0, - "learning_rate": 1.3509002896138113e-05, - "loss": 0.89, + "learning_rate": 1.3525775065329425e-05, + "loss": 0.9446, "step": 14248 }, { - "epoch": 0.4043416572077185, + "epoch": 0.40378021479781234, "grad_norm": 0.0, - "learning_rate": 1.3508142244824604e-05, - "loss": 0.9271, + "learning_rate": 1.352491619924321e-05, + "loss": 0.9407, "step": 14249 }, { - "epoch": 0.4043700340522134, + "epoch": 0.40380855224007484, "grad_norm": 0.0, - "learning_rate": 1.3507281563877663e-05, - "loss": 0.9748, + "learning_rate": 1.3524057303465317e-05, + "loss": 0.9367, "step": 14250 }, { - "epoch": 0.4043984108967083, + "epoch": 0.4038368896823373, "grad_norm": 0.0, - "learning_rate": 1.3506420853304557e-05, - "loss": 0.9236, + "learning_rate": 1.352319837800298e-05, + "loss": 0.8657, "step": 14251 }, { - "epoch": 0.40442678774120316, + "epoch": 0.4038652271245997, "grad_norm": 0.0, - "learning_rate": 1.3505560113112555e-05, - "loss": 0.9731, + "learning_rate": 1.3522339422863438e-05, + "loss": 0.9727, "step": 14252 }, { - "epoch": 0.4044551645856981, + "epoch": 0.4038935645668622, "grad_norm": 0.0, - "learning_rate": 1.3504699343308932e-05, - "loss": 0.9041, + "learning_rate": 1.3521480438053918e-05, + "loss": 0.9473, "step": 14253 }, { - "epoch": 0.40448354143019294, + "epoch": 0.40392190200912464, "grad_norm": 0.0, - "learning_rate": 1.3503838543900952e-05, - "loss": 0.9734, + "learning_rate": 1.3520621423581663e-05, + "loss": 0.8968, "step": 14254 }, { - "epoch": 0.40451191827468785, + "epoch": 0.40395023945138714, "grad_norm": 0.0, - "learning_rate": 1.3502977714895894e-05, - "loss": 0.9558, + "learning_rate": 1.351976237945391e-05, + "loss": 1.006, "step": 14255 }, { - "epoch": 0.40454029511918277, + "epoch": 0.4039785768936496, "grad_norm": 0.0, - "learning_rate": 1.3502116856301022e-05, - "loss": 0.8781, + "learning_rate": 1.3518903305677889e-05, + "loss": 0.8476, "step": 14256 }, { - "epoch": 0.40456867196367763, + "epoch": 0.404006914335912, "grad_norm": 0.0, - "learning_rate": 1.3501255968123613e-05, - "loss": 0.9635, + "learning_rate": 1.3518044202260835e-05, + "loss": 0.8603, "step": 14257 }, { - "epoch": 0.40459704880817254, + "epoch": 0.4040352517781745, "grad_norm": 0.0, - "learning_rate": 1.3500395050370939e-05, - "loss": 0.8921, + "learning_rate": 1.351718506920999e-05, + "loss": 0.8063, "step": 14258 }, { - "epoch": 0.4046254256526674, + "epoch": 0.40406358922043695, "grad_norm": 0.0, - "learning_rate": 1.3499534103050269e-05, - "loss": 0.9327, + "learning_rate": 1.3516325906532592e-05, + "loss": 0.8687, "step": 14259 }, { - "epoch": 0.4046538024971623, + "epoch": 0.40409192666269944, "grad_norm": 0.0, - "learning_rate": 1.3498673126168877e-05, - "loss": 0.9776, + "learning_rate": 1.3515466714235874e-05, + "loss": 0.9338, "step": 14260 }, { - "epoch": 0.40468217934165723, + "epoch": 0.4041202641049619, "grad_norm": 0.0, - "learning_rate": 1.3497812119734037e-05, - "loss": 0.9163, + "learning_rate": 1.3514607492327074e-05, + "loss": 0.9456, "step": 14261 }, { - "epoch": 0.4047105561861521, + "epoch": 0.4041486015472244, "grad_norm": 0.0, - "learning_rate": 1.3496951083753021e-05, - "loss": 0.8616, + "learning_rate": 1.3513748240813429e-05, + "loss": 0.8959, "step": 14262 }, { - "epoch": 0.404738933030647, + "epoch": 0.4041769389894868, "grad_norm": 0.0, - "learning_rate": 1.3496090018233101e-05, - "loss": 0.8687, + "learning_rate": 1.351288895970218e-05, + "loss": 0.9014, "step": 14263 }, { - "epoch": 0.40476730987514187, + "epoch": 0.40420527643174925, "grad_norm": 0.0, - "learning_rate": 1.3495228923181552e-05, - "loss": 0.9041, + "learning_rate": 1.351202964900056e-05, + "loss": 0.8218, "step": 14264 }, { - "epoch": 0.4047956867196368, + "epoch": 0.40423361387401174, "grad_norm": 0.0, - "learning_rate": 1.3494367798605645e-05, - "loss": 0.9723, + "learning_rate": 1.3511170308715811e-05, + "loss": 1.0488, "step": 14265 }, { - "epoch": 0.40482406356413164, + "epoch": 0.4042619513162742, "grad_norm": 0.0, - "learning_rate": 1.3493506644512659e-05, - "loss": 0.9616, + "learning_rate": 1.3510310938855172e-05, + "loss": 0.88, "step": 14266 }, { - "epoch": 0.40485244040862656, + "epoch": 0.4042902887585367, "grad_norm": 0.0, - "learning_rate": 1.3492645460909866e-05, - "loss": 0.9046, + "learning_rate": 1.350945153942588e-05, + "loss": 0.851, "step": 14267 }, { - "epoch": 0.40488081725312147, + "epoch": 0.4043186262007991, "grad_norm": 0.0, - "learning_rate": 1.3491784247804535e-05, - "loss": 0.9223, + "learning_rate": 1.3508592110435173e-05, + "loss": 1.0424, "step": 14268 }, { - "epoch": 0.40490919409761633, + "epoch": 0.40434696364306155, "grad_norm": 0.0, - "learning_rate": 1.3490923005203951e-05, - "loss": 0.8704, + "learning_rate": 1.3507732651890294e-05, + "loss": 0.9047, "step": 14269 }, { - "epoch": 0.40493757094211125, + "epoch": 0.40437530108532405, "grad_norm": 0.0, - "learning_rate": 1.3490061733115383e-05, - "loss": 0.8922, + "learning_rate": 1.3506873163798478e-05, + "loss": 0.8366, "step": 14270 }, { - "epoch": 0.4049659477866061, + "epoch": 0.4044036385275865, "grad_norm": 0.0, - "learning_rate": 1.3489200431546103e-05, - "loss": 0.9627, + "learning_rate": 1.3506013646166966e-05, + "loss": 1.0422, "step": 14271 }, { - "epoch": 0.404994324631101, + "epoch": 0.404431975969849, "grad_norm": 0.0, - "learning_rate": 1.348833910050339e-05, - "loss": 0.9836, + "learning_rate": 1.3505154099003003e-05, + "loss": 0.8707, "step": 14272 }, { - "epoch": 0.40502270147559594, + "epoch": 0.4044603134121114, "grad_norm": 0.0, - "learning_rate": 1.3487477739994522e-05, - "loss": 1.0171, + "learning_rate": 1.3504294522313825e-05, + "loss": 0.9231, "step": 14273 }, { - "epoch": 0.4050510783200908, + "epoch": 0.4044886508543739, "grad_norm": 0.0, - "learning_rate": 1.3486616350026771e-05, - "loss": 1.0769, + "learning_rate": 1.350343491610667e-05, + "loss": 0.9289, "step": 14274 }, { - "epoch": 0.4050794551645857, + "epoch": 0.40451698829663635, "grad_norm": 0.0, - "learning_rate": 1.3485754930607418e-05, - "loss": 0.8737, + "learning_rate": 1.3502575280388785e-05, + "loss": 0.9281, "step": 14275 }, { - "epoch": 0.40510783200908057, + "epoch": 0.4045453257388988, "grad_norm": 0.0, - "learning_rate": 1.3484893481743735e-05, - "loss": 0.983, + "learning_rate": 1.3501715615167408e-05, + "loss": 0.9347, "step": 14276 }, { - "epoch": 0.4051362088535755, + "epoch": 0.4045736631811613, "grad_norm": 0.0, - "learning_rate": 1.3484032003443002e-05, - "loss": 0.918, + "learning_rate": 1.3500855920449775e-05, + "loss": 0.8075, "step": 14277 }, { - "epoch": 0.4051645856980704, + "epoch": 0.4046020006234237, "grad_norm": 0.0, - "learning_rate": 1.348317049571249e-05, - "loss": 0.91, + "learning_rate": 1.3499996196243135e-05, + "loss": 0.8424, "step": 14278 }, { - "epoch": 0.40519296254256526, + "epoch": 0.4046303380656862, "grad_norm": 0.0, - "learning_rate": 1.3482308958559483e-05, - "loss": 0.8387, + "learning_rate": 1.349913644255473e-05, + "loss": 0.9561, "step": 14279 }, { - "epoch": 0.4052213393870602, + "epoch": 0.40465867550794865, "grad_norm": 0.0, - "learning_rate": 1.3481447391991254e-05, - "loss": 0.8626, + "learning_rate": 1.3498276659391799e-05, + "loss": 0.812, "step": 14280 }, { - "epoch": 0.40524971623155503, + "epoch": 0.4046870129502111, "grad_norm": 0.0, - "learning_rate": 1.3480585796015082e-05, - "loss": 1.0355, + "learning_rate": 1.3497416846761584e-05, + "loss": 0.8979, "step": 14281 }, { - "epoch": 0.40527809307604995, + "epoch": 0.4047153503924736, "grad_norm": 0.0, - "learning_rate": 1.3479724170638248e-05, - "loss": 0.98, + "learning_rate": 1.3496557004671325e-05, + "loss": 0.954, "step": 14282 }, { - "epoch": 0.4053064699205448, + "epoch": 0.404743687834736, "grad_norm": 0.0, - "learning_rate": 1.3478862515868025e-05, - "loss": 0.9513, + "learning_rate": 1.3495697133128272e-05, + "loss": 0.8581, "step": 14283 }, { - "epoch": 0.4053348467650397, + "epoch": 0.4047720252769985, "grad_norm": 0.0, - "learning_rate": 1.3478000831711695e-05, - "loss": 0.902, + "learning_rate": 1.349483723213966e-05, + "loss": 0.9764, "step": 14284 }, { - "epoch": 0.40536322360953464, + "epoch": 0.40480036271926095, "grad_norm": 0.0, - "learning_rate": 1.3477139118176536e-05, - "loss": 0.922, + "learning_rate": 1.349397730171274e-05, + "loss": 0.8729, "step": 14285 }, { - "epoch": 0.4053916004540295, + "epoch": 0.40482870016152345, "grad_norm": 0.0, - "learning_rate": 1.3476277375269828e-05, - "loss": 0.7798, + "learning_rate": 1.3493117341854748e-05, + "loss": 0.8253, "step": 14286 }, { - "epoch": 0.4054199772985244, + "epoch": 0.4048570376037859, "grad_norm": 0.0, - "learning_rate": 1.3475415602998844e-05, - "loss": 1.0759, + "learning_rate": 1.3492257352572935e-05, + "loss": 0.8345, "step": 14287 }, { - "epoch": 0.4054483541430193, + "epoch": 0.4048853750460483, "grad_norm": 0.0, - "learning_rate": 1.3474553801370871e-05, - "loss": 0.9047, + "learning_rate": 1.349139733387454e-05, + "loss": 1.0814, "step": 14288 }, { - "epoch": 0.4054767309875142, + "epoch": 0.4049137124883108, "grad_norm": 0.0, - "learning_rate": 1.3473691970393187e-05, - "loss": 0.9977, + "learning_rate": 1.3490537285766809e-05, + "loss": 0.9914, "step": 14289 }, { - "epoch": 0.4055051078320091, + "epoch": 0.40494204993057326, "grad_norm": 0.0, - "learning_rate": 1.3472830110073071e-05, - "loss": 0.9434, + "learning_rate": 1.3489677208256987e-05, + "loss": 0.9244, "step": 14290 }, { - "epoch": 0.40553348467650396, + "epoch": 0.40497038737283575, "grad_norm": 0.0, - "learning_rate": 1.34719682204178e-05, - "loss": 0.9628, + "learning_rate": 1.3488817101352315e-05, + "loss": 0.9932, "step": 14291 }, { - "epoch": 0.4055618615209989, + "epoch": 0.4049987248150982, "grad_norm": 0.0, - "learning_rate": 1.347110630143466e-05, - "loss": 0.9391, + "learning_rate": 1.3487956965060044e-05, + "loss": 1.0271, "step": 14292 }, { - "epoch": 0.40559023836549374, + "epoch": 0.4050270622573606, "grad_norm": 0.0, - "learning_rate": 1.347024435313093e-05, - "loss": 0.9308, + "learning_rate": 1.3487096799387418e-05, + "loss": 0.8943, "step": 14293 }, { - "epoch": 0.40561861520998865, + "epoch": 0.4050553996996231, "grad_norm": 0.0, - "learning_rate": 1.3469382375513886e-05, - "loss": 0.965, + "learning_rate": 1.3486236604341679e-05, + "loss": 0.9293, "step": 14294 }, { - "epoch": 0.40564699205448357, + "epoch": 0.40508373714188556, "grad_norm": 0.0, - "learning_rate": 1.3468520368590813e-05, - "loss": 0.9616, + "learning_rate": 1.3485376379930071e-05, + "loss": 0.8642, "step": 14295 }, { - "epoch": 0.40567536889897843, + "epoch": 0.40511207458414805, "grad_norm": 0.0, - "learning_rate": 1.3467658332368996e-05, - "loss": 0.9775, + "learning_rate": 1.3484516126159845e-05, + "loss": 0.89, "step": 14296 }, { - "epoch": 0.40570374574347334, + "epoch": 0.4051404120264105, "grad_norm": 0.0, - "learning_rate": 1.3466796266855713e-05, - "loss": 0.8726, + "learning_rate": 1.3483655843038248e-05, + "loss": 0.9377, "step": 14297 }, { - "epoch": 0.4057321225879682, + "epoch": 0.405168749468673, "grad_norm": 0.0, - "learning_rate": 1.3465934172058244e-05, - "loss": 0.9091, + "learning_rate": 1.3482795530572523e-05, + "loss": 0.9405, "step": 14298 }, { - "epoch": 0.4057604994324631, + "epoch": 0.4051970869109354, "grad_norm": 0.0, - "learning_rate": 1.3465072047983875e-05, - "loss": 0.9659, + "learning_rate": 1.348193518876992e-05, + "loss": 0.961, "step": 14299 }, { - "epoch": 0.405788876276958, + "epoch": 0.40522542435319786, "grad_norm": 0.0, - "learning_rate": 1.3464209894639885e-05, - "loss": 1.0234, + "learning_rate": 1.3481074817637681e-05, + "loss": 0.8062, "step": 14300 }, { - "epoch": 0.4058172531214529, + "epoch": 0.40525376179546035, "grad_norm": 0.0, - "learning_rate": 1.3463347712033558e-05, - "loss": 0.9158, + "learning_rate": 1.3480214417183058e-05, + "loss": 0.9516, "step": 14301 }, { - "epoch": 0.4058456299659478, + "epoch": 0.4052820992377228, "grad_norm": 0.0, - "learning_rate": 1.3462485500172177e-05, - "loss": 0.9729, + "learning_rate": 1.3479353987413295e-05, + "loss": 1.0335, "step": 14302 }, { - "epoch": 0.40587400681044267, + "epoch": 0.4053104366799853, "grad_norm": 0.0, - "learning_rate": 1.3461623259063031e-05, - "loss": 0.9003, + "learning_rate": 1.3478493528335641e-05, + "loss": 1.012, "step": 14303 }, { - "epoch": 0.4059023836549376, + "epoch": 0.4053387741222477, "grad_norm": 0.0, - "learning_rate": 1.3460760988713393e-05, - "loss": 0.8542, + "learning_rate": 1.3477633039957346e-05, + "loss": 0.8355, "step": 14304 }, { - "epoch": 0.40593076049943244, + "epoch": 0.40536711156451016, "grad_norm": 0.0, - "learning_rate": 1.3459898689130548e-05, - "loss": 0.8381, + "learning_rate": 1.3476772522285656e-05, + "loss": 0.8658, "step": 14305 }, { - "epoch": 0.40595913734392736, + "epoch": 0.40539544900677266, "grad_norm": 0.0, - "learning_rate": 1.345903636032179e-05, - "loss": 0.9701, + "learning_rate": 1.3475911975327823e-05, + "loss": 0.8368, "step": 14306 }, { - "epoch": 0.40598751418842227, + "epoch": 0.4054237864490351, "grad_norm": 0.0, - "learning_rate": 1.3458174002294394e-05, - "loss": 0.9718, + "learning_rate": 1.3475051399091088e-05, + "loss": 0.9402, "step": 14307 }, { - "epoch": 0.40601589103291713, + "epoch": 0.4054521238912976, "grad_norm": 0.0, - "learning_rate": 1.3457311615055643e-05, - "loss": 0.9367, + "learning_rate": 1.3474190793582707e-05, + "loss": 0.9727, "step": 14308 }, { - "epoch": 0.40604426787741205, + "epoch": 0.40548046133356, "grad_norm": 0.0, - "learning_rate": 1.345644919861283e-05, - "loss": 0.9099, + "learning_rate": 1.3473330158809925e-05, + "loss": 0.8483, "step": 14309 }, { - "epoch": 0.4060726447219069, + "epoch": 0.4055087987758225, "grad_norm": 0.0, - "learning_rate": 1.3455586752973234e-05, - "loss": 1.0078, + "learning_rate": 1.3472469494779994e-05, + "loss": 0.9522, "step": 14310 }, { - "epoch": 0.4061010215664018, + "epoch": 0.40553713621808496, "grad_norm": 0.0, - "learning_rate": 1.3454724278144142e-05, - "loss": 0.9303, + "learning_rate": 1.3471608801500163e-05, + "loss": 0.9787, "step": 14311 }, { - "epoch": 0.40612939841089674, + "epoch": 0.4055654736603474, "grad_norm": 0.0, - "learning_rate": 1.3453861774132836e-05, - "loss": 0.9547, + "learning_rate": 1.3470748078977682e-05, + "loss": 0.9282, "step": 14312 }, { - "epoch": 0.4061577752553916, + "epoch": 0.4055938111026099, "grad_norm": 0.0, - "learning_rate": 1.3452999240946606e-05, - "loss": 0.9812, + "learning_rate": 1.3469887327219802e-05, + "loss": 0.8576, "step": 14313 }, { - "epoch": 0.4061861520998865, + "epoch": 0.40562214854487233, "grad_norm": 0.0, - "learning_rate": 1.3452136678592737e-05, - "loss": 0.9545, + "learning_rate": 1.3469026546233774e-05, + "loss": 0.8857, "step": 14314 }, { - "epoch": 0.40621452894438137, + "epoch": 0.4056504859871348, "grad_norm": 0.0, - "learning_rate": 1.3451274087078514e-05, - "loss": 0.9913, + "learning_rate": 1.3468165736026844e-05, + "loss": 0.8746, "step": 14315 }, { - "epoch": 0.4062429057888763, + "epoch": 0.40567882342939726, "grad_norm": 0.0, - "learning_rate": 1.3450411466411224e-05, - "loss": 0.8968, + "learning_rate": 1.3467304896606265e-05, + "loss": 0.8487, "step": 14316 }, { - "epoch": 0.40627128263337114, + "epoch": 0.4057071608716597, "grad_norm": 0.0, - "learning_rate": 1.3449548816598152e-05, - "loss": 0.9656, + "learning_rate": 1.3466444027979293e-05, + "loss": 0.815, "step": 14317 }, { - "epoch": 0.40629965947786606, + "epoch": 0.4057354983139222, "grad_norm": 0.0, - "learning_rate": 1.3448686137646587e-05, - "loss": 0.9181, + "learning_rate": 1.3465583130153174e-05, + "loss": 0.9503, "step": 14318 }, { - "epoch": 0.406328036322361, + "epoch": 0.40576383575618463, "grad_norm": 0.0, - "learning_rate": 1.3447823429563813e-05, - "loss": 1.0258, + "learning_rate": 1.3464722203135164e-05, + "loss": 0.8856, "step": 14319 }, { - "epoch": 0.40635641316685583, + "epoch": 0.4057921731984471, "grad_norm": 0.0, - "learning_rate": 1.3446960692357122e-05, - "loss": 0.9492, + "learning_rate": 1.3463861246932508e-05, + "loss": 0.9974, "step": 14320 }, { - "epoch": 0.40638479001135075, + "epoch": 0.40582051064070956, "grad_norm": 0.0, - "learning_rate": 1.34460979260338e-05, - "loss": 0.8802, + "learning_rate": 1.3463000261552466e-05, + "loss": 0.8274, "step": 14321 }, { - "epoch": 0.4064131668558456, + "epoch": 0.40584884808297206, "grad_norm": 0.0, - "learning_rate": 1.3445235130601129e-05, - "loss": 0.944, + "learning_rate": 1.3462139247002284e-05, + "loss": 0.8517, "step": 14322 }, { - "epoch": 0.4064415437003405, + "epoch": 0.4058771855252345, "grad_norm": 0.0, - "learning_rate": 1.3444372306066407e-05, - "loss": 0.852, + "learning_rate": 1.346127820328922e-05, + "loss": 0.9512, "step": 14323 }, { - "epoch": 0.40646992054483544, + "epoch": 0.40590552296749693, "grad_norm": 0.0, - "learning_rate": 1.3443509452436917e-05, - "loss": 0.9875, + "learning_rate": 1.346041713042052e-05, + "loss": 1.0073, "step": 14324 }, { - "epoch": 0.4064982973893303, + "epoch": 0.40593386040975943, "grad_norm": 0.0, - "learning_rate": 1.3442646569719944e-05, - "loss": 0.9626, + "learning_rate": 1.3459556028403446e-05, + "loss": 0.8286, "step": 14325 }, { - "epoch": 0.4065266742338252, + "epoch": 0.40596219785202187, "grad_norm": 0.0, - "learning_rate": 1.3441783657922782e-05, - "loss": 0.8694, + "learning_rate": 1.3458694897245246e-05, + "loss": 0.9041, "step": 14326 }, { - "epoch": 0.4065550510783201, + "epoch": 0.40599053529428436, "grad_norm": 0.0, - "learning_rate": 1.3440920717052721e-05, - "loss": 0.848, + "learning_rate": 1.3457833736953174e-05, + "loss": 0.9724, "step": 14327 }, { - "epoch": 0.406583427922815, + "epoch": 0.4060188727365468, "grad_norm": 0.0, - "learning_rate": 1.3440057747117049e-05, - "loss": 0.8667, + "learning_rate": 1.3456972547534484e-05, + "loss": 0.931, "step": 14328 }, { - "epoch": 0.40661180476730985, + "epoch": 0.40604721017880924, "grad_norm": 0.0, - "learning_rate": 1.3439194748123048e-05, - "loss": 0.8591, + "learning_rate": 1.3456111328996431e-05, + "loss": 0.9071, "step": 14329 }, { - "epoch": 0.40664018161180476, + "epoch": 0.40607554762107173, "grad_norm": 0.0, - "learning_rate": 1.3438331720078019e-05, - "loss": 0.9164, + "learning_rate": 1.3455250081346269e-05, + "loss": 0.9726, "step": 14330 }, { - "epoch": 0.4066685584562997, + "epoch": 0.40610388506333417, "grad_norm": 0.0, - "learning_rate": 1.3437468662989251e-05, - "loss": 1.0431, + "learning_rate": 1.3454388804591253e-05, + "loss": 1.0044, "step": 14331 }, { - "epoch": 0.40669693530079454, + "epoch": 0.40613222250559666, "grad_norm": 0.0, - "learning_rate": 1.3436605576864023e-05, - "loss": 0.8838, + "learning_rate": 1.3453527498738637e-05, + "loss": 0.8868, "step": 14332 }, { - "epoch": 0.40672531214528945, + "epoch": 0.4061605599478591, "grad_norm": 0.0, - "learning_rate": 1.3435742461709635e-05, - "loss": 0.8778, + "learning_rate": 1.3452666163795675e-05, + "loss": 0.8512, "step": 14333 }, { - "epoch": 0.4067536889897843, + "epoch": 0.4061888973901216, "grad_norm": 0.0, - "learning_rate": 1.343487931753338e-05, - "loss": 0.9594, + "learning_rate": 1.3451804799769625e-05, + "loss": 0.9476, "step": 14334 }, { - "epoch": 0.4067820658342792, + "epoch": 0.40621723483238403, "grad_norm": 0.0, - "learning_rate": 1.3434016144342543e-05, - "loss": 0.8501, + "learning_rate": 1.3450943406667741e-05, + "loss": 0.9003, "step": 14335 }, { - "epoch": 0.40681044267877414, + "epoch": 0.40624557227464647, "grad_norm": 0.0, - "learning_rate": 1.343315294214442e-05, - "loss": 0.9147, + "learning_rate": 1.3450081984497279e-05, + "loss": 0.9231, "step": 14336 }, { - "epoch": 0.406838819523269, + "epoch": 0.40627390971690897, "grad_norm": 0.0, - "learning_rate": 1.3432289710946296e-05, - "loss": 0.9583, + "learning_rate": 1.3449220533265497e-05, + "loss": 0.9957, "step": 14337 }, { - "epoch": 0.4068671963677639, + "epoch": 0.4063022471591714, "grad_norm": 0.0, - "learning_rate": 1.3431426450755465e-05, - "loss": 0.8976, + "learning_rate": 1.3448359052979649e-05, + "loss": 0.9639, "step": 14338 }, { - "epoch": 0.4068955732122588, + "epoch": 0.4063305846014339, "grad_norm": 0.0, - "learning_rate": 1.3430563161579224e-05, - "loss": 0.8316, + "learning_rate": 1.3447497543646992e-05, + "loss": 0.8988, "step": 14339 }, { - "epoch": 0.4069239500567537, + "epoch": 0.40635892204369634, "grad_norm": 0.0, - "learning_rate": 1.3429699843424861e-05, - "loss": 0.9248, + "learning_rate": 1.3446636005274782e-05, + "loss": 0.9182, "step": 14340 }, { - "epoch": 0.4069523269012486, + "epoch": 0.4063872594859588, "grad_norm": 0.0, - "learning_rate": 1.3428836496299668e-05, - "loss": 0.9213, + "learning_rate": 1.3445774437870278e-05, + "loss": 1.0747, "step": 14341 }, { - "epoch": 0.40698070374574347, + "epoch": 0.40641559692822127, "grad_norm": 0.0, - "learning_rate": 1.342797312021094e-05, - "loss": 0.9184, + "learning_rate": 1.3444912841440734e-05, + "loss": 0.8787, "step": 14342 }, { - "epoch": 0.4070090805902384, + "epoch": 0.4064439343704837, "grad_norm": 0.0, - "learning_rate": 1.3427109715165969e-05, - "loss": 0.895, + "learning_rate": 1.3444051215993412e-05, + "loss": 0.9957, "step": 14343 }, { - "epoch": 0.40703745743473324, + "epoch": 0.4064722718127462, "grad_norm": 0.0, - "learning_rate": 1.3426246281172048e-05, - "loss": 0.9753, + "learning_rate": 1.3443189561535568e-05, + "loss": 0.9202, "step": 14344 }, { - "epoch": 0.40706583427922816, + "epoch": 0.40650060925500864, "grad_norm": 0.0, - "learning_rate": 1.3425382818236469e-05, - "loss": 0.9518, + "learning_rate": 1.3442327878074459e-05, + "loss": 0.9549, "step": 14345 }, { - "epoch": 0.407094211123723, + "epoch": 0.40652894669727113, "grad_norm": 0.0, - "learning_rate": 1.3424519326366531e-05, - "loss": 0.9157, + "learning_rate": 1.3441466165617346e-05, + "loss": 1.0007, "step": 14346 }, { - "epoch": 0.40712258796821793, + "epoch": 0.40655728413953357, "grad_norm": 0.0, - "learning_rate": 1.3423655805569524e-05, - "loss": 0.939, + "learning_rate": 1.3440604424171483e-05, + "loss": 0.9227, "step": 14347 }, { - "epoch": 0.40715096481271285, + "epoch": 0.406585621581796, "grad_norm": 0.0, - "learning_rate": 1.3422792255852739e-05, - "loss": 0.8992, + "learning_rate": 1.3439742653744133e-05, + "loss": 1.0018, "step": 14348 }, { - "epoch": 0.4071793416572077, + "epoch": 0.4066139590240585, "grad_norm": 0.0, - "learning_rate": 1.3421928677223474e-05, - "loss": 0.9944, + "learning_rate": 1.3438880854342552e-05, + "loss": 0.9507, "step": 14349 }, { - "epoch": 0.4072077185017026, + "epoch": 0.40664229646632094, "grad_norm": 0.0, - "learning_rate": 1.3421065069689028e-05, - "loss": 0.8723, + "learning_rate": 1.3438019025974e-05, + "loss": 0.8573, "step": 14350 }, { - "epoch": 0.4072360953461975, + "epoch": 0.40667063390858343, "grad_norm": 0.0, - "learning_rate": 1.342020143325669e-05, - "loss": 0.8992, + "learning_rate": 1.3437157168645738e-05, + "loss": 0.8943, "step": 14351 }, { - "epoch": 0.4072644721906924, + "epoch": 0.4066989713508459, "grad_norm": 0.0, - "learning_rate": 1.3419337767933753e-05, - "loss": 0.9035, + "learning_rate": 1.3436295282365026e-05, + "loss": 0.9073, "step": 14352 }, { - "epoch": 0.4072928490351873, + "epoch": 0.4067273087931083, "grad_norm": 0.0, - "learning_rate": 1.341847407372752e-05, - "loss": 0.9686, + "learning_rate": 1.3435433367139122e-05, + "loss": 0.8694, "step": 14353 }, { - "epoch": 0.40732122587968217, + "epoch": 0.4067556462353708, "grad_norm": 0.0, - "learning_rate": 1.3417610350645283e-05, - "loss": 0.837, + "learning_rate": 1.3434571422975286e-05, + "loss": 0.9339, "step": 14354 }, { - "epoch": 0.4073496027241771, + "epoch": 0.40678398367763324, "grad_norm": 0.0, - "learning_rate": 1.3416746598694336e-05, - "loss": 0.9409, + "learning_rate": 1.3433709449880778e-05, + "loss": 0.9235, "step": 14355 }, { - "epoch": 0.40737797956867194, + "epoch": 0.40681232111989574, "grad_norm": 0.0, - "learning_rate": 1.3415882817881975e-05, - "loss": 0.9227, + "learning_rate": 1.3432847447862865e-05, + "loss": 1.0526, "step": 14356 }, { - "epoch": 0.40740635641316686, + "epoch": 0.4068406585621582, "grad_norm": 0.0, - "learning_rate": 1.3415019008215501e-05, - "loss": 0.874, + "learning_rate": 1.3431985416928799e-05, + "loss": 0.9836, "step": 14357 }, { - "epoch": 0.4074347332576618, + "epoch": 0.4068689960044206, "grad_norm": 0.0, - "learning_rate": 1.341415516970221e-05, - "loss": 0.9647, + "learning_rate": 1.3431123357085847e-05, + "loss": 0.9102, "step": 14358 }, { - "epoch": 0.40746311010215663, + "epoch": 0.4068973334466831, "grad_norm": 0.0, - "learning_rate": 1.3413291302349394e-05, - "loss": 0.848, + "learning_rate": 1.3430261268341272e-05, + "loss": 0.9798, "step": 14359 }, { - "epoch": 0.40749148694665155, + "epoch": 0.40692567088894555, "grad_norm": 0.0, - "learning_rate": 1.3412427406164353e-05, - "loss": 0.8574, + "learning_rate": 1.342939915070233e-05, + "loss": 0.8918, "step": 14360 }, { - "epoch": 0.4075198637911464, + "epoch": 0.40695400833120804, "grad_norm": 0.0, - "learning_rate": 1.3411563481154387e-05, - "loss": 0.7858, + "learning_rate": 1.3428537004176282e-05, + "loss": 0.8807, "step": 14361 }, { - "epoch": 0.4075482406356413, + "epoch": 0.4069823457734705, "grad_norm": 0.0, - "learning_rate": 1.3410699527326788e-05, - "loss": 0.9324, + "learning_rate": 1.3427674828770396e-05, + "loss": 0.8798, "step": 14362 }, { - "epoch": 0.4075766174801362, + "epoch": 0.40701068321573297, "grad_norm": 0.0, - "learning_rate": 1.3409835544688856e-05, - "loss": 0.9104, + "learning_rate": 1.3426812624491935e-05, + "loss": 1.0294, "step": 14363 }, { - "epoch": 0.4076049943246311, + "epoch": 0.4070390206579954, "grad_norm": 0.0, - "learning_rate": 1.340897153324789e-05, - "loss": 0.9141, + "learning_rate": 1.3425950391348154e-05, + "loss": 0.9335, "step": 14364 }, { - "epoch": 0.407633371169126, + "epoch": 0.40706735810025785, "grad_norm": 0.0, - "learning_rate": 1.340810749301119e-05, - "loss": 0.9539, + "learning_rate": 1.3425088129346322e-05, + "loss": 0.88, "step": 14365 }, { - "epoch": 0.4076617480136209, + "epoch": 0.40709569554252034, "grad_norm": 0.0, - "learning_rate": 1.3407243423986052e-05, - "loss": 0.9205, + "learning_rate": 1.3424225838493703e-05, + "loss": 0.9077, "step": 14366 }, { - "epoch": 0.4076901248581158, + "epoch": 0.4071240329847828, "grad_norm": 0.0, - "learning_rate": 1.3406379326179778e-05, - "loss": 0.9323, + "learning_rate": 1.3423363518797558e-05, + "loss": 0.8942, "step": 14367 }, { - "epoch": 0.40771850170261065, + "epoch": 0.4071523704270453, "grad_norm": 0.0, - "learning_rate": 1.3405515199599662e-05, - "loss": 0.9081, + "learning_rate": 1.342250117026515e-05, + "loss": 0.9065, "step": 14368 }, { - "epoch": 0.40774687854710556, + "epoch": 0.4071807078693077, "grad_norm": 0.0, - "learning_rate": 1.3404651044253006e-05, - "loss": 0.9907, + "learning_rate": 1.3421638792903743e-05, + "loss": 1.0184, "step": 14369 }, { - "epoch": 0.4077752553916005, + "epoch": 0.40720904531157015, "grad_norm": 0.0, - "learning_rate": 1.340378686014711e-05, - "loss": 0.8883, + "learning_rate": 1.3420776386720601e-05, + "loss": 0.8463, "step": 14370 }, { - "epoch": 0.40780363223609534, + "epoch": 0.40723738275383264, "grad_norm": 0.0, - "learning_rate": 1.3402922647289277e-05, - "loss": 0.9226, + "learning_rate": 1.3419913951722991e-05, + "loss": 0.9178, "step": 14371 }, { - "epoch": 0.40783200908059025, + "epoch": 0.4072657201960951, "grad_norm": 0.0, - "learning_rate": 1.3402058405686798e-05, - "loss": 0.9118, + "learning_rate": 1.3419051487918178e-05, + "loss": 1.0535, "step": 14372 }, { - "epoch": 0.4078603859250851, + "epoch": 0.4072940576383576, "grad_norm": 0.0, - "learning_rate": 1.3401194135346978e-05, - "loss": 0.8558, + "learning_rate": 1.3418188995313424e-05, + "loss": 0.8791, "step": 14373 }, { - "epoch": 0.40788876276958, + "epoch": 0.40732239508062, "grad_norm": 0.0, - "learning_rate": 1.3400329836277122e-05, - "loss": 1.0351, + "learning_rate": 1.341732647391599e-05, + "loss": 0.8384, "step": 14374 }, { - "epoch": 0.40791713961407494, + "epoch": 0.4073507325228825, "grad_norm": 0.0, - "learning_rate": 1.3399465508484526e-05, - "loss": 0.9672, + "learning_rate": 1.341646392373315e-05, + "loss": 0.8537, "step": 14375 }, { - "epoch": 0.4079455164585698, + "epoch": 0.40737906996514495, "grad_norm": 0.0, - "learning_rate": 1.3398601151976491e-05, - "loss": 0.7903, + "learning_rate": 1.3415601344772164e-05, + "loss": 0.9484, "step": 14376 }, { - "epoch": 0.4079738933030647, + "epoch": 0.4074074074074074, "grad_norm": 0.0, - "learning_rate": 1.3397736766760322e-05, - "loss": 0.9157, + "learning_rate": 1.3414738737040303e-05, + "loss": 0.9166, "step": 14377 }, { - "epoch": 0.4080022701475596, + "epoch": 0.4074357448496699, "grad_norm": 0.0, - "learning_rate": 1.3396872352843318e-05, - "loss": 0.9552, + "learning_rate": 1.3413876100544825e-05, + "loss": 0.8721, "step": 14378 }, { - "epoch": 0.4080306469920545, + "epoch": 0.4074640822919323, "grad_norm": 0.0, - "learning_rate": 1.3396007910232773e-05, - "loss": 0.7886, + "learning_rate": 1.3413013435293004e-05, + "loss": 0.9469, "step": 14379 }, { - "epoch": 0.40805902383654935, + "epoch": 0.4074924197341948, "grad_norm": 0.0, - "learning_rate": 1.3395143438936003e-05, - "loss": 0.9583, + "learning_rate": 1.3412150741292102e-05, + "loss": 0.9266, "step": 14380 }, { - "epoch": 0.40808740068104427, + "epoch": 0.40752075717645725, "grad_norm": 0.0, - "learning_rate": 1.3394278938960303e-05, - "loss": 0.943, + "learning_rate": 1.3411288018549387e-05, + "loss": 0.8357, "step": 14381 }, { - "epoch": 0.4081157775255392, + "epoch": 0.4075490946187197, "grad_norm": 0.0, - "learning_rate": 1.339341441031298e-05, - "loss": 1.016, + "learning_rate": 1.3410425267072124e-05, + "loss": 0.9104, "step": 14382 }, { - "epoch": 0.40814415437003404, + "epoch": 0.4075774320609822, "grad_norm": 0.0, - "learning_rate": 1.3392549853001326e-05, - "loss": 0.7882, + "learning_rate": 1.3409562486867586e-05, + "loss": 0.9746, "step": 14383 }, { - "epoch": 0.40817253121452896, + "epoch": 0.4076057695032446, "grad_norm": 0.0, - "learning_rate": 1.3391685267032654e-05, - "loss": 0.9324, + "learning_rate": 1.3408699677943038e-05, + "loss": 0.8897, "step": 14384 }, { - "epoch": 0.4082009080590238, + "epoch": 0.4076341069455071, "grad_norm": 0.0, - "learning_rate": 1.3390820652414265e-05, - "loss": 1.044, + "learning_rate": 1.3407836840305746e-05, + "loss": 0.8917, "step": 14385 }, { - "epoch": 0.40822928490351873, + "epoch": 0.40766244438776955, "grad_norm": 0.0, - "learning_rate": 1.338995600915346e-05, - "loss": 0.9042, + "learning_rate": 1.3406973973962977e-05, + "loss": 0.976, "step": 14386 }, { - "epoch": 0.40825766174801364, + "epoch": 0.40769078183003205, "grad_norm": 0.0, - "learning_rate": 1.3389091337257543e-05, - "loss": 0.9753, + "learning_rate": 1.3406111078922002e-05, + "loss": 0.91, "step": 14387 }, { - "epoch": 0.4082860385925085, + "epoch": 0.4077191192722945, "grad_norm": 0.0, - "learning_rate": 1.338822663673382e-05, - "loss": 0.8623, + "learning_rate": 1.3405248155190086e-05, + "loss": 0.9409, "step": 14388 }, { - "epoch": 0.4083144154370034, + "epoch": 0.4077474567145569, "grad_norm": 0.0, - "learning_rate": 1.3387361907589596e-05, - "loss": 0.8027, + "learning_rate": 1.3404385202774506e-05, + "loss": 0.92, "step": 14389 }, { - "epoch": 0.4083427922814983, + "epoch": 0.4077757941568194, "grad_norm": 0.0, - "learning_rate": 1.3386497149832175e-05, - "loss": 0.7882, + "learning_rate": 1.3403522221682522e-05, + "loss": 0.9391, "step": 14390 }, { - "epoch": 0.4083711691259932, + "epoch": 0.40780413159908185, "grad_norm": 0.0, - "learning_rate": 1.3385632363468858e-05, - "loss": 0.8401, + "learning_rate": 1.3402659211921407e-05, + "loss": 1.0269, "step": 14391 }, { - "epoch": 0.4083995459704881, + "epoch": 0.40783246904134435, "grad_norm": 0.0, - "learning_rate": 1.338476754850695e-05, - "loss": 0.9522, + "learning_rate": 1.340179617349843e-05, + "loss": 0.9572, "step": 14392 }, { - "epoch": 0.40842792281498297, + "epoch": 0.4078608064836068, "grad_norm": 0.0, - "learning_rate": 1.3383902704953761e-05, - "loss": 0.8026, + "learning_rate": 1.3400933106420861e-05, + "loss": 1.0267, "step": 14393 }, { - "epoch": 0.4084562996594779, + "epoch": 0.4078891439258692, "grad_norm": 0.0, - "learning_rate": 1.3383037832816594e-05, - "loss": 0.934, + "learning_rate": 1.3400070010695966e-05, + "loss": 0.9612, "step": 14394 }, { - "epoch": 0.40848467650397274, + "epoch": 0.4079174813681317, "grad_norm": 0.0, - "learning_rate": 1.3382172932102755e-05, - "loss": 0.9206, + "learning_rate": 1.3399206886331022e-05, + "loss": 0.8665, "step": 14395 }, { - "epoch": 0.40851305334846766, + "epoch": 0.40794581881039416, "grad_norm": 0.0, - "learning_rate": 1.3381308002819546e-05, - "loss": 0.8118, + "learning_rate": 1.3398343733333295e-05, + "loss": 0.959, "step": 14396 }, { - "epoch": 0.4085414301929625, + "epoch": 0.40797415625265665, "grad_norm": 0.0, - "learning_rate": 1.3380443044974279e-05, - "loss": 0.8244, + "learning_rate": 1.3397480551710059e-05, + "loss": 1.001, "step": 14397 }, { - "epoch": 0.40856980703745743, + "epoch": 0.4080024936949191, "grad_norm": 0.0, - "learning_rate": 1.3379578058574259e-05, - "loss": 0.9391, + "learning_rate": 1.3396617341468581e-05, + "loss": 0.9692, "step": 14398 }, { - "epoch": 0.40859818388195235, + "epoch": 0.4080308311371816, "grad_norm": 0.0, - "learning_rate": 1.3378713043626788e-05, - "loss": 0.933, + "learning_rate": 1.3395754102616135e-05, + "loss": 0.9499, "step": 14399 }, { - "epoch": 0.4086265607264472, + "epoch": 0.408059168579444, "grad_norm": 0.0, - "learning_rate": 1.3377848000139178e-05, - "loss": 0.9721, + "learning_rate": 1.339489083515999e-05, + "loss": 0.9793, "step": 14400 }, { - "epoch": 0.4086549375709421, + "epoch": 0.40808750602170646, "grad_norm": 0.0, - "learning_rate": 1.3376982928118734e-05, - "loss": 0.9763, + "learning_rate": 1.3394027539107417e-05, + "loss": 0.9471, "step": 14401 }, { - "epoch": 0.408683314415437, + "epoch": 0.40811584346396895, "grad_norm": 0.0, - "learning_rate": 1.337611782757276e-05, - "loss": 0.9555, + "learning_rate": 1.3393164214465692e-05, + "loss": 0.934, "step": 14402 }, { - "epoch": 0.4087116912599319, + "epoch": 0.4081441809062314, "grad_norm": 0.0, - "learning_rate": 1.337525269850857e-05, - "loss": 0.9113, + "learning_rate": 1.3392300861242085e-05, + "loss": 0.9088, "step": 14403 }, { - "epoch": 0.4087400681044268, + "epoch": 0.4081725183484939, "grad_norm": 0.0, - "learning_rate": 1.337438754093347e-05, - "loss": 1.0229, + "learning_rate": 1.3391437479443867e-05, + "loss": 0.9152, "step": 14404 }, { - "epoch": 0.40876844494892167, + "epoch": 0.4082008557907563, "grad_norm": 0.0, - "learning_rate": 1.3373522354854765e-05, - "loss": 1.0052, + "learning_rate": 1.3390574069078312e-05, + "loss": 0.9747, "step": 14405 }, { - "epoch": 0.4087968217934166, + "epoch": 0.40822919323301876, "grad_norm": 0.0, - "learning_rate": 1.3372657140279763e-05, - "loss": 0.8274, + "learning_rate": 1.3389710630152693e-05, + "loss": 0.961, "step": 14406 }, { - "epoch": 0.40882519863791145, + "epoch": 0.40825753067528125, "grad_norm": 0.0, - "learning_rate": 1.3371791897215778e-05, - "loss": 0.9505, + "learning_rate": 1.3388847162674282e-05, + "loss": 0.9398, "step": 14407 }, { - "epoch": 0.40885357548240636, + "epoch": 0.4082858681175437, "grad_norm": 0.0, - "learning_rate": 1.3370926625670115e-05, - "loss": 1.0245, + "learning_rate": 1.3387983666650352e-05, + "loss": 0.9095, "step": 14408 }, { - "epoch": 0.4088819523269012, + "epoch": 0.4083142055598062, "grad_norm": 0.0, - "learning_rate": 1.3370061325650081e-05, - "loss": 0.859, + "learning_rate": 1.3387120142088182e-05, + "loss": 0.9073, "step": 14409 }, { - "epoch": 0.40891032917139614, + "epoch": 0.4083425430020686, "grad_norm": 0.0, - "learning_rate": 1.3369195997162987e-05, - "loss": 0.8903, + "learning_rate": 1.3386256588995036e-05, + "loss": 0.9864, "step": 14410 }, { - "epoch": 0.40893870601589105, + "epoch": 0.4083708804443311, "grad_norm": 0.0, - "learning_rate": 1.3368330640216147e-05, - "loss": 0.9851, + "learning_rate": 1.3385393007378195e-05, + "loss": 0.9625, "step": 14411 }, { - "epoch": 0.4089670828603859, + "epoch": 0.40839921788659356, "grad_norm": 0.0, - "learning_rate": 1.3367465254816865e-05, - "loss": 0.8866, + "learning_rate": 1.3384529397244935e-05, + "loss": 0.912, "step": 14412 }, { - "epoch": 0.4089954597048808, + "epoch": 0.408427555328856, "grad_norm": 0.0, - "learning_rate": 1.336659984097245e-05, - "loss": 0.9112, + "learning_rate": 1.3383665758602522e-05, + "loss": 1.043, "step": 14413 }, { - "epoch": 0.4090238365493757, + "epoch": 0.4084558927711185, "grad_norm": 0.0, - "learning_rate": 1.3365734398690217e-05, - "loss": 0.8802, + "learning_rate": 1.3382802091458237e-05, + "loss": 0.9056, "step": 14414 }, { - "epoch": 0.4090522133938706, + "epoch": 0.4084842302133809, "grad_norm": 0.0, - "learning_rate": 1.3364868927977475e-05, - "loss": 0.9024, + "learning_rate": 1.3381938395819354e-05, + "loss": 0.8969, "step": 14415 }, { - "epoch": 0.4090805902383655, + "epoch": 0.4085125676556434, "grad_norm": 0.0, - "learning_rate": 1.3364003428841532e-05, - "loss": 0.9349, + "learning_rate": 1.338107467169315e-05, + "loss": 0.9274, "step": 14416 }, { - "epoch": 0.4091089670828604, + "epoch": 0.40854090509790586, "grad_norm": 0.0, - "learning_rate": 1.3363137901289702e-05, - "loss": 0.9365, + "learning_rate": 1.3380210919086898e-05, + "loss": 0.9019, "step": 14417 }, { - "epoch": 0.4091373439273553, + "epoch": 0.4085692425401683, "grad_norm": 0.0, - "learning_rate": 1.3362272345329294e-05, - "loss": 0.938, + "learning_rate": 1.3379347138007874e-05, + "loss": 0.9293, "step": 14418 }, { - "epoch": 0.40916572077185015, + "epoch": 0.4085975799824308, "grad_norm": 0.0, - "learning_rate": 1.336140676096762e-05, - "loss": 0.892, + "learning_rate": 1.3378483328463352e-05, + "loss": 0.9554, "step": 14419 }, { - "epoch": 0.40919409761634506, + "epoch": 0.40862591742469323, "grad_norm": 0.0, - "learning_rate": 1.3360541148211995e-05, - "loss": 0.9367, + "learning_rate": 1.3377619490460612e-05, + "loss": 0.9305, "step": 14420 }, { - "epoch": 0.40922247446084, + "epoch": 0.4086542548669557, "grad_norm": 0.0, - "learning_rate": 1.3359675507069725e-05, - "loss": 0.8832, + "learning_rate": 1.337675562400693e-05, + "loss": 0.9024, "step": 14421 }, { - "epoch": 0.40925085130533484, + "epoch": 0.40868259230921816, "grad_norm": 0.0, - "learning_rate": 1.3358809837548128e-05, - "loss": 0.9391, + "learning_rate": 1.337589172910958e-05, + "loss": 0.9631, "step": 14422 }, { - "epoch": 0.40927922814982975, + "epoch": 0.40871092975148066, "grad_norm": 0.0, - "learning_rate": 1.3357944139654508e-05, - "loss": 0.9272, + "learning_rate": 1.337502780577584e-05, + "loss": 0.8453, "step": 14423 }, { - "epoch": 0.4093076049943246, + "epoch": 0.4087392671937431, "grad_norm": 0.0, - "learning_rate": 1.3357078413396188e-05, - "loss": 0.9028, + "learning_rate": 1.3374163854012987e-05, + "loss": 0.8331, "step": 14424 }, { - "epoch": 0.40933598183881953, + "epoch": 0.40876760463600553, "grad_norm": 0.0, - "learning_rate": 1.3356212658780471e-05, - "loss": 0.8757, + "learning_rate": 1.3373299873828303e-05, + "loss": 0.9182, "step": 14425 }, { - "epoch": 0.4093643586833144, + "epoch": 0.408795942078268, "grad_norm": 0.0, - "learning_rate": 1.335534687581468e-05, - "loss": 1.042, + "learning_rate": 1.3372435865229056e-05, + "loss": 0.8409, "step": 14426 }, { - "epoch": 0.4093927355278093, + "epoch": 0.40882427952053046, "grad_norm": 0.0, - "learning_rate": 1.335448106450612e-05, - "loss": 0.9204, + "learning_rate": 1.3371571828222534e-05, + "loss": 0.9672, "step": 14427 }, { - "epoch": 0.4094211123723042, + "epoch": 0.40885261696279296, "grad_norm": 0.0, - "learning_rate": 1.3353615224862109e-05, - "loss": 0.9908, + "learning_rate": 1.3370707762816007e-05, + "loss": 0.9077, "step": 14428 }, { - "epoch": 0.4094494892167991, + "epoch": 0.4088809544050554, "grad_norm": 0.0, - "learning_rate": 1.3352749356889957e-05, - "loss": 0.855, + "learning_rate": 1.3369843669016757e-05, + "loss": 0.9945, "step": 14429 }, { - "epoch": 0.409477866061294, + "epoch": 0.40890929184731784, "grad_norm": 0.0, - "learning_rate": 1.3351883460596984e-05, - "loss": 1.0352, + "learning_rate": 1.3368979546832066e-05, + "loss": 0.8911, "step": 14430 }, { - "epoch": 0.40950624290578885, + "epoch": 0.40893762928958033, "grad_norm": 0.0, - "learning_rate": 1.3351017535990499e-05, - "loss": 0.9203, + "learning_rate": 1.3368115396269209e-05, + "loss": 0.8039, "step": 14431 }, { - "epoch": 0.40953461975028377, + "epoch": 0.40896596673184277, "grad_norm": 0.0, - "learning_rate": 1.335015158307782e-05, - "loss": 0.8952, + "learning_rate": 1.336725121733546e-05, + "loss": 0.932, "step": 14432 }, { - "epoch": 0.4095629965947787, + "epoch": 0.40899430417410526, "grad_norm": 0.0, - "learning_rate": 1.3349285601866254e-05, - "loss": 1.002, + "learning_rate": 1.3366387010038107e-05, + "loss": 1.0126, "step": 14433 }, { - "epoch": 0.40959137343927354, + "epoch": 0.4090226416163677, "grad_norm": 0.0, - "learning_rate": 1.3348419592363126e-05, - "loss": 0.887, + "learning_rate": 1.3365522774384425e-05, + "loss": 0.9045, "step": 14434 }, { - "epoch": 0.40961975028376846, + "epoch": 0.4090509790586302, "grad_norm": 0.0, - "learning_rate": 1.334755355457575e-05, - "loss": 0.8501, + "learning_rate": 1.3364658510381699e-05, + "loss": 0.9882, "step": 14435 }, { - "epoch": 0.4096481271282633, + "epoch": 0.40907931650089263, "grad_norm": 0.0, - "learning_rate": 1.3346687488511436e-05, - "loss": 0.8925, + "learning_rate": 1.3363794218037198e-05, + "loss": 1.0084, "step": 14436 }, { - "epoch": 0.40967650397275823, + "epoch": 0.40910765394315507, "grad_norm": 0.0, - "learning_rate": 1.3345821394177501e-05, - "loss": 0.92, + "learning_rate": 1.3362929897358215e-05, + "loss": 0.8549, "step": 14437 }, { - "epoch": 0.40970488081725315, + "epoch": 0.40913599138541756, "grad_norm": 0.0, - "learning_rate": 1.3344955271581264e-05, - "loss": 0.8517, + "learning_rate": 1.3362065548352023e-05, + "loss": 0.9351, "step": 14438 }, { - "epoch": 0.409733257661748, + "epoch": 0.40916432882768, "grad_norm": 0.0, - "learning_rate": 1.334408912073004e-05, - "loss": 0.8753, + "learning_rate": 1.33612011710259e-05, + "loss": 1.0374, "step": 14439 }, { - "epoch": 0.4097616345062429, + "epoch": 0.4091926662699425, "grad_norm": 0.0, - "learning_rate": 1.3343222941631142e-05, - "loss": 0.9818, + "learning_rate": 1.3360336765387136e-05, + "loss": 1.0158, "step": 14440 }, { - "epoch": 0.4097900113507378, + "epoch": 0.40922100371220493, "grad_norm": 0.0, - "learning_rate": 1.334235673429189e-05, - "loss": 0.8057, + "learning_rate": 1.3359472331443008e-05, + "loss": 0.9321, "step": 14441 }, { - "epoch": 0.4098183881952327, + "epoch": 0.40924934115446737, "grad_norm": 0.0, - "learning_rate": 1.3341490498719605e-05, - "loss": 0.9117, + "learning_rate": 1.3358607869200798e-05, + "loss": 0.9203, "step": 14442 }, { - "epoch": 0.40984676503972756, + "epoch": 0.40927767859672987, "grad_norm": 0.0, - "learning_rate": 1.3340624234921592e-05, - "loss": 1.017, + "learning_rate": 1.3357743378667785e-05, + "loss": 0.9074, "step": 14443 }, { - "epoch": 0.40987514188422247, + "epoch": 0.4093060160389923, "grad_norm": 0.0, - "learning_rate": 1.3339757942905182e-05, - "loss": 0.9257, + "learning_rate": 1.335687885985125e-05, + "loss": 0.874, "step": 14444 }, { - "epoch": 0.4099035187287174, + "epoch": 0.4093343534812548, "grad_norm": 0.0, - "learning_rate": 1.3338891622677688e-05, - "loss": 0.8585, + "learning_rate": 1.3356014312758483e-05, + "loss": 0.8615, "step": 14445 }, { - "epoch": 0.40993189557321225, + "epoch": 0.40936269092351724, "grad_norm": 0.0, - "learning_rate": 1.333802527424642e-05, - "loss": 0.9175, + "learning_rate": 1.3355149737396756e-05, + "loss": 0.9725, "step": 14446 }, { - "epoch": 0.40996027241770716, + "epoch": 0.40939102836577973, "grad_norm": 0.0, - "learning_rate": 1.3337158897618707e-05, - "loss": 0.8872, + "learning_rate": 1.3354285133773361e-05, + "loss": 1.006, "step": 14447 }, { - "epoch": 0.409988649262202, + "epoch": 0.40941936580804217, "grad_norm": 0.0, - "learning_rate": 1.333629249280186e-05, - "loss": 0.9811, + "learning_rate": 1.3353420501895573e-05, + "loss": 0.9084, "step": 14448 }, { - "epoch": 0.41001702610669694, + "epoch": 0.4094477032503046, "grad_norm": 0.0, - "learning_rate": 1.3335426059803203e-05, - "loss": 0.8626, + "learning_rate": 1.3352555841770682e-05, + "loss": 0.9883, "step": 14449 }, { - "epoch": 0.41004540295119185, + "epoch": 0.4094760406925671, "grad_norm": 0.0, - "learning_rate": 1.333455959863005e-05, - "loss": 0.9377, + "learning_rate": 1.3351691153405968e-05, + "loss": 0.912, "step": 14450 }, { - "epoch": 0.4100737797956867, + "epoch": 0.40950437813482954, "grad_norm": 0.0, - "learning_rate": 1.3333693109289722e-05, - "loss": 0.8159, + "learning_rate": 1.3350826436808715e-05, + "loss": 1.0001, "step": 14451 }, { - "epoch": 0.4101021566401816, + "epoch": 0.40953271557709203, "grad_norm": 0.0, - "learning_rate": 1.3332826591789543e-05, - "loss": 0.945, + "learning_rate": 1.3349961691986205e-05, + "loss": 0.9031, "step": 14452 }, { - "epoch": 0.4101305334846765, + "epoch": 0.40956105301935447, "grad_norm": 0.0, - "learning_rate": 1.3331960046136822e-05, - "loss": 1.0721, + "learning_rate": 1.334909691894572e-05, + "loss": 0.9571, "step": 14453 }, { - "epoch": 0.4101589103291714, + "epoch": 0.4095893904616169, "grad_norm": 0.0, - "learning_rate": 1.3331093472338888e-05, - "loss": 0.8862, + "learning_rate": 1.3348232117694555e-05, + "loss": 0.9033, "step": 14454 }, { - "epoch": 0.4101872871736663, + "epoch": 0.4096177279038794, "grad_norm": 0.0, - "learning_rate": 1.3330226870403058e-05, - "loss": 0.9097, + "learning_rate": 1.3347367288239986e-05, + "loss": 0.925, "step": 14455 }, { - "epoch": 0.4102156640181612, + "epoch": 0.40964606534614184, "grad_norm": 0.0, - "learning_rate": 1.3329360240336651e-05, - "loss": 0.934, + "learning_rate": 1.33465024305893e-05, + "loss": 0.811, "step": 14456 }, { - "epoch": 0.4102440408626561, + "epoch": 0.40967440278840433, "grad_norm": 0.0, - "learning_rate": 1.3328493582146983e-05, - "loss": 1.012, + "learning_rate": 1.3345637544749776e-05, + "loss": 1.0598, "step": 14457 }, { - "epoch": 0.41027241770715095, + "epoch": 0.4097027402306668, "grad_norm": 0.0, - "learning_rate": 1.3327626895841388e-05, - "loss": 0.8435, + "learning_rate": 1.3344772630728708e-05, + "loss": 0.9491, "step": 14458 }, { - "epoch": 0.41030079455164586, + "epoch": 0.40973107767292927, "grad_norm": 0.0, - "learning_rate": 1.332676018142718e-05, - "loss": 0.8884, + "learning_rate": 1.3343907688533378e-05, + "loss": 0.9026, "step": 14459 }, { - "epoch": 0.4103291713961407, + "epoch": 0.4097594151151917, "grad_norm": 0.0, - "learning_rate": 1.3325893438911672e-05, - "loss": 0.8722, + "learning_rate": 1.3343042718171073e-05, + "loss": 0.8842, "step": 14460 }, { - "epoch": 0.41035754824063564, + "epoch": 0.40978775255745414, "grad_norm": 0.0, - "learning_rate": 1.3325026668302197e-05, - "loss": 0.8713, + "learning_rate": 1.3342177719649075e-05, + "loss": 1.0154, "step": 14461 }, { - "epoch": 0.41038592508513055, + "epoch": 0.40981608999971664, "grad_norm": 0.0, - "learning_rate": 1.3324159869606072e-05, - "loss": 0.918, + "learning_rate": 1.3341312692974674e-05, + "loss": 0.8295, "step": 14462 }, { - "epoch": 0.4104143019296254, + "epoch": 0.4098444274419791, "grad_norm": 0.0, - "learning_rate": 1.3323293042830617e-05, - "loss": 0.9203, + "learning_rate": 1.3340447638155158e-05, + "loss": 1.0001, "step": 14463 }, { - "epoch": 0.41044267877412033, + "epoch": 0.40987276488424157, "grad_norm": 0.0, - "learning_rate": 1.3322426187983158e-05, - "loss": 0.9441, + "learning_rate": 1.3339582555197809e-05, + "loss": 0.9886, "step": 14464 }, { - "epoch": 0.4104710556186152, + "epoch": 0.409901102326504, "grad_norm": 0.0, - "learning_rate": 1.3321559305071014e-05, - "loss": 0.9295, + "learning_rate": 1.3338717444109916e-05, + "loss": 1.0014, "step": 14465 }, { - "epoch": 0.4104994324631101, + "epoch": 0.40992943976876645, "grad_norm": 0.0, - "learning_rate": 1.3320692394101514e-05, - "loss": 0.8319, + "learning_rate": 1.3337852304898766e-05, + "loss": 0.9807, "step": 14466 }, { - "epoch": 0.410527809307605, + "epoch": 0.40995777721102894, "grad_norm": 0.0, - "learning_rate": 1.3319825455081969e-05, - "loss": 1.0112, + "learning_rate": 1.3336987137571646e-05, + "loss": 0.821, "step": 14467 }, { - "epoch": 0.4105561861520999, + "epoch": 0.4099861146532914, "grad_norm": 0.0, - "learning_rate": 1.3318958488019716e-05, - "loss": 0.9007, + "learning_rate": 1.3336121942135847e-05, + "loss": 0.9705, "step": 14468 }, { - "epoch": 0.4105845629965948, + "epoch": 0.41001445209555387, "grad_norm": 0.0, - "learning_rate": 1.3318091492922068e-05, - "loss": 1.0247, + "learning_rate": 1.333525671859865e-05, + "loss": 0.9638, "step": 14469 }, { - "epoch": 0.41061293984108965, + "epoch": 0.4100427895378163, "grad_norm": 0.0, - "learning_rate": 1.331722446979635e-05, - "loss": 0.9653, + "learning_rate": 1.333439146696735e-05, + "loss": 0.8821, "step": 14470 }, { - "epoch": 0.41064131668558457, + "epoch": 0.4100711269800788, "grad_norm": 0.0, - "learning_rate": 1.331635741864989e-05, - "loss": 0.9288, + "learning_rate": 1.3333526187249235e-05, + "loss": 0.8779, "step": 14471 }, { - "epoch": 0.4106696935300795, + "epoch": 0.41009946442234124, "grad_norm": 0.0, - "learning_rate": 1.3315490339490012e-05, - "loss": 0.9947, + "learning_rate": 1.3332660879451584e-05, + "loss": 0.9242, "step": 14472 }, { - "epoch": 0.41069807037457434, + "epoch": 0.4101278018646037, "grad_norm": 0.0, - "learning_rate": 1.3314623232324035e-05, - "loss": 0.9586, + "learning_rate": 1.3331795543581696e-05, + "loss": 0.9685, "step": 14473 }, { - "epoch": 0.41072644721906926, + "epoch": 0.4101561393068662, "grad_norm": 0.0, - "learning_rate": 1.3313756097159288e-05, - "loss": 0.8536, + "learning_rate": 1.3330930179646859e-05, + "loss": 0.9527, "step": 14474 }, { - "epoch": 0.4107548240635641, + "epoch": 0.4101844767491286, "grad_norm": 0.0, - "learning_rate": 1.3312888934003095e-05, - "loss": 0.8097, + "learning_rate": 1.333006478765436e-05, + "loss": 0.9019, "step": 14475 }, { - "epoch": 0.41078320090805903, + "epoch": 0.4102128141913911, "grad_norm": 0.0, - "learning_rate": 1.3312021742862777e-05, - "loss": 0.9317, + "learning_rate": 1.3329199367611488e-05, + "loss": 0.8382, "step": 14476 }, { - "epoch": 0.4108115777525539, + "epoch": 0.41024115163365354, "grad_norm": 0.0, - "learning_rate": 1.3311154523745663e-05, - "loss": 0.9924, + "learning_rate": 1.3328333919525531e-05, + "loss": 0.9577, "step": 14477 }, { - "epoch": 0.4108399545970488, + "epoch": 0.410269489075916, "grad_norm": 0.0, - "learning_rate": 1.3310287276659082e-05, - "loss": 0.8944, + "learning_rate": 1.3327468443403784e-05, + "loss": 1.0001, "step": 14478 }, { - "epoch": 0.4108683314415437, + "epoch": 0.4102978265181785, "grad_norm": 0.0, - "learning_rate": 1.3309420001610352e-05, - "loss": 0.8625, + "learning_rate": 1.3326602939253532e-05, + "loss": 0.967, "step": 14479 }, { - "epoch": 0.4108967082860386, + "epoch": 0.4103261639604409, "grad_norm": 0.0, - "learning_rate": 1.3308552698606805e-05, - "loss": 0.8987, + "learning_rate": 1.3325737407082074e-05, + "loss": 0.9058, "step": 14480 }, { - "epoch": 0.4109250851305335, + "epoch": 0.4103545014027034, "grad_norm": 0.0, - "learning_rate": 1.330768536765576e-05, - "loss": 0.928, + "learning_rate": 1.332487184689669e-05, + "loss": 1.0414, "step": 14481 }, { - "epoch": 0.41095346197502836, + "epoch": 0.41038283884496585, "grad_norm": 0.0, - "learning_rate": 1.3306818008764553e-05, - "loss": 0.9952, + "learning_rate": 1.3324006258704677e-05, + "loss": 0.9156, "step": 14482 }, { - "epoch": 0.41098183881952327, + "epoch": 0.41041117628722834, "grad_norm": 0.0, - "learning_rate": 1.3305950621940504e-05, - "loss": 0.9083, + "learning_rate": 1.3323140642513324e-05, + "loss": 0.9636, "step": 14483 }, { - "epoch": 0.4110102156640182, + "epoch": 0.4104395137294908, "grad_norm": 0.0, - "learning_rate": 1.3305083207190941e-05, - "loss": 0.9471, + "learning_rate": 1.3322274998329925e-05, + "loss": 0.9106, "step": 14484 }, { - "epoch": 0.41103859250851305, + "epoch": 0.4104678511717532, "grad_norm": 0.0, - "learning_rate": 1.3304215764523192e-05, - "loss": 0.8351, + "learning_rate": 1.3321409326161767e-05, + "loss": 0.9594, "step": 14485 }, { - "epoch": 0.41106696935300796, + "epoch": 0.4104961886140157, "grad_norm": 0.0, - "learning_rate": 1.3303348293944585e-05, - "loss": 0.8772, + "learning_rate": 1.3320543626016147e-05, + "loss": 1.0056, "step": 14486 }, { - "epoch": 0.4110953461975028, + "epoch": 0.41052452605627815, "grad_norm": 0.0, - "learning_rate": 1.3302480795462441e-05, - "loss": 0.8427, + "learning_rate": 1.3319677897900357e-05, + "loss": 0.8478, "step": 14487 }, { - "epoch": 0.41112372304199774, + "epoch": 0.41055286349854064, "grad_norm": 0.0, - "learning_rate": 1.3301613269084099e-05, - "loss": 0.8835, + "learning_rate": 1.3318812141821684e-05, + "loss": 0.9501, "step": 14488 }, { - "epoch": 0.4111520998864926, + "epoch": 0.4105812009408031, "grad_norm": 0.0, - "learning_rate": 1.330074571481688e-05, - "loss": 0.8504, + "learning_rate": 1.3317946357787426e-05, + "loss": 0.9401, "step": 14489 }, { - "epoch": 0.4111804767309875, + "epoch": 0.4106095383830655, "grad_norm": 0.0, - "learning_rate": 1.3299878132668114e-05, - "loss": 0.9599, + "learning_rate": 1.3317080545804872e-05, + "loss": 1.0015, "step": 14490 }, { - "epoch": 0.4112088535754824, + "epoch": 0.410637875825328, "grad_norm": 0.0, - "learning_rate": 1.3299010522645129e-05, - "loss": 0.943, + "learning_rate": 1.3316214705881318e-05, + "loss": 0.899, "step": 14491 }, { - "epoch": 0.4112372304199773, + "epoch": 0.41066621326759045, "grad_norm": 0.0, - "learning_rate": 1.3298142884755252e-05, - "loss": 0.9208, + "learning_rate": 1.3315348838024056e-05, + "loss": 0.9187, "step": 14492 }, { - "epoch": 0.4112656072644722, + "epoch": 0.41069455070985295, "grad_norm": 0.0, - "learning_rate": 1.3297275219005817e-05, - "loss": 0.954, + "learning_rate": 1.3314482942240379e-05, + "loss": 0.8495, "step": 14493 }, { - "epoch": 0.41129398410896706, + "epoch": 0.4107228881521154, "grad_norm": 0.0, - "learning_rate": 1.3296407525404145e-05, - "loss": 0.8791, + "learning_rate": 1.3313617018537581e-05, + "loss": 0.9079, "step": 14494 }, { - "epoch": 0.411322360953462, + "epoch": 0.4107512255943779, "grad_norm": 0.0, - "learning_rate": 1.3295539803957573e-05, - "loss": 0.8681, + "learning_rate": 1.3312751066922958e-05, + "loss": 0.88, "step": 14495 }, { - "epoch": 0.4113507377979569, + "epoch": 0.4107795630366403, "grad_norm": 0.0, - "learning_rate": 1.329467205467343e-05, - "loss": 0.8946, + "learning_rate": 1.3311885087403801e-05, + "loss": 0.907, "step": 14496 }, { - "epoch": 0.41137911464245175, + "epoch": 0.41080790047890275, "grad_norm": 0.0, - "learning_rate": 1.3293804277559042e-05, - "loss": 0.8485, + "learning_rate": 1.3311019079987409e-05, + "loss": 0.9012, "step": 14497 }, { - "epoch": 0.41140749148694666, + "epoch": 0.41083623792116525, "grad_norm": 0.0, - "learning_rate": 1.329293647262174e-05, - "loss": 0.844, + "learning_rate": 1.331015304468107e-05, + "loss": 0.8427, "step": 14498 }, { - "epoch": 0.4114358683314415, + "epoch": 0.4108645753634277, "grad_norm": 0.0, - "learning_rate": 1.329206863986886e-05, - "loss": 0.9152, + "learning_rate": 1.3309286981492084e-05, + "loss": 0.8009, "step": 14499 }, { - "epoch": 0.41146424517593644, + "epoch": 0.4108929128056902, "grad_norm": 0.0, - "learning_rate": 1.3291200779307724e-05, - "loss": 0.8273, + "learning_rate": 1.3308420890427747e-05, + "loss": 0.7848, "step": 14500 }, { - "epoch": 0.41149262202043135, + "epoch": 0.4109212502479526, "grad_norm": 0.0, - "learning_rate": 1.329033289094567e-05, - "loss": 0.9668, + "learning_rate": 1.330755477149535e-05, + "loss": 0.8627, "step": 14501 }, { - "epoch": 0.4115209988649262, + "epoch": 0.41094958769021506, "grad_norm": 0.0, - "learning_rate": 1.3289464974790022e-05, - "loss": 0.9908, + "learning_rate": 1.330668862470219e-05, + "loss": 1.0197, "step": 14502 }, { - "epoch": 0.41154937570942113, + "epoch": 0.41097792513247755, "grad_norm": 0.0, - "learning_rate": 1.328859703084812e-05, - "loss": 0.9591, + "learning_rate": 1.3305822450055565e-05, + "loss": 0.9145, "step": 14503 }, { - "epoch": 0.411577752553916, + "epoch": 0.41100626257474, "grad_norm": 0.0, - "learning_rate": 1.3287729059127288e-05, - "loss": 0.9568, + "learning_rate": 1.3304956247562772e-05, + "loss": 0.986, "step": 14504 }, { - "epoch": 0.4116061293984109, + "epoch": 0.4110346000170025, "grad_norm": 0.0, - "learning_rate": 1.3286861059634863e-05, - "loss": 1.031, + "learning_rate": 1.3304090017231101e-05, + "loss": 0.8608, "step": 14505 }, { - "epoch": 0.41163450624290576, + "epoch": 0.4110629374592649, "grad_norm": 0.0, - "learning_rate": 1.3285993032378175e-05, - "loss": 1.0084, + "learning_rate": 1.3303223759067855e-05, + "loss": 0.962, "step": 14506 }, { - "epoch": 0.4116628830874007, + "epoch": 0.4110912749015274, "grad_norm": 0.0, - "learning_rate": 1.3285124977364555e-05, - "loss": 0.8872, + "learning_rate": 1.330235747308033e-05, + "loss": 0.9366, "step": 14507 }, { - "epoch": 0.4116912599318956, + "epoch": 0.41111961234378985, "grad_norm": 0.0, - "learning_rate": 1.3284256894601337e-05, - "loss": 0.9468, + "learning_rate": 1.3301491159275821e-05, + "loss": 0.9582, "step": 14508 }, { - "epoch": 0.41171963677639045, + "epoch": 0.4111479497860523, "grad_norm": 0.0, - "learning_rate": 1.3283388784095854e-05, - "loss": 0.9204, + "learning_rate": 1.3300624817661627e-05, + "loss": 0.9922, "step": 14509 }, { - "epoch": 0.41174801362088537, + "epoch": 0.4111762872283148, "grad_norm": 0.0, - "learning_rate": 1.3282520645855436e-05, - "loss": 0.934, + "learning_rate": 1.3299758448245044e-05, + "loss": 1.0339, "step": 14510 }, { - "epoch": 0.4117763904653802, + "epoch": 0.4112046246705772, "grad_norm": 0.0, - "learning_rate": 1.328165247988742e-05, - "loss": 0.8409, + "learning_rate": 1.3298892051033368e-05, + "loss": 0.7693, "step": 14511 }, { - "epoch": 0.41180476730987514, + "epoch": 0.4112329621128397, "grad_norm": 0.0, - "learning_rate": 1.3280784286199137e-05, - "loss": 0.9083, + "learning_rate": 1.32980256260339e-05, + "loss": 0.8875, "step": 14512 }, { - "epoch": 0.41183314415437006, + "epoch": 0.41126129955510216, "grad_norm": 0.0, - "learning_rate": 1.3279916064797924e-05, - "loss": 0.9088, + "learning_rate": 1.3297159173253937e-05, + "loss": 0.8925, "step": 14513 }, { - "epoch": 0.4118615209988649, + "epoch": 0.4112896369973646, "grad_norm": 0.0, - "learning_rate": 1.327904781569111e-05, - "loss": 0.8962, + "learning_rate": 1.3296292692700781e-05, + "loss": 0.8749, "step": 14514 }, { - "epoch": 0.41188989784335983, + "epoch": 0.4113179744396271, "grad_norm": 0.0, - "learning_rate": 1.3278179538886034e-05, - "loss": 0.9481, + "learning_rate": 1.3295426184381723e-05, + "loss": 0.9144, "step": 14515 }, { - "epoch": 0.4119182746878547, + "epoch": 0.4113463118818895, "grad_norm": 0.0, - "learning_rate": 1.327731123439003e-05, - "loss": 0.9442, + "learning_rate": 1.329455964830407e-05, + "loss": 0.8898, "step": 14516 }, { - "epoch": 0.4119466515323496, + "epoch": 0.411374649324152, "grad_norm": 0.0, - "learning_rate": 1.3276442902210425e-05, - "loss": 0.996, + "learning_rate": 1.3293693084475116e-05, + "loss": 0.9653, "step": 14517 }, { - "epoch": 0.4119750283768445, + "epoch": 0.41140298676641446, "grad_norm": 0.0, - "learning_rate": 1.3275574542354562e-05, - "loss": 0.8873, + "learning_rate": 1.3292826492902164e-05, + "loss": 0.918, "step": 14518 }, { - "epoch": 0.4120034052213394, + "epoch": 0.41143132420867695, "grad_norm": 0.0, - "learning_rate": 1.3274706154829772e-05, - "loss": 0.866, + "learning_rate": 1.3291959873592508e-05, + "loss": 0.8887, "step": 14519 }, { - "epoch": 0.4120317820658343, + "epoch": 0.4114596616509394, "grad_norm": 0.0, - "learning_rate": 1.3273837739643396e-05, - "loss": 0.8191, + "learning_rate": 1.3291093226553456e-05, + "loss": 1.0022, "step": 14520 }, { - "epoch": 0.41206015891032916, + "epoch": 0.41148799909320183, "grad_norm": 0.0, - "learning_rate": 1.3272969296802762e-05, - "loss": 0.9943, + "learning_rate": 1.3290226551792302e-05, + "loss": 0.9092, "step": 14521 }, { - "epoch": 0.41208853575482407, + "epoch": 0.4115163365354643, "grad_norm": 0.0, - "learning_rate": 1.327210082631521e-05, - "loss": 0.993, + "learning_rate": 1.328935984931635e-05, + "loss": 0.8812, "step": 14522 }, { - "epoch": 0.41211691259931893, + "epoch": 0.41154467397772676, "grad_norm": 0.0, - "learning_rate": 1.3271232328188077e-05, - "loss": 0.9843, + "learning_rate": 1.3288493119132894e-05, + "loss": 0.915, "step": 14523 }, { - "epoch": 0.41214528944381384, + "epoch": 0.41157301141998925, "grad_norm": 0.0, - "learning_rate": 1.3270363802428697e-05, - "loss": 0.8658, + "learning_rate": 1.328762636124924e-05, + "loss": 0.8622, "step": 14524 }, { - "epoch": 0.41217366628830876, + "epoch": 0.4116013488622517, "grad_norm": 0.0, - "learning_rate": 1.3269495249044404e-05, - "loss": 1.0643, + "learning_rate": 1.3286759575672692e-05, + "loss": 0.9074, "step": 14525 }, { - "epoch": 0.4122020431328036, + "epoch": 0.41162968630451413, "grad_norm": 0.0, - "learning_rate": 1.3268626668042539e-05, - "loss": 0.9421, + "learning_rate": 1.3285892762410547e-05, + "loss": 0.9242, "step": 14526 }, { - "epoch": 0.41223041997729853, + "epoch": 0.4116580237467766, "grad_norm": 0.0, - "learning_rate": 1.326775805943044e-05, - "loss": 0.8632, + "learning_rate": 1.3285025921470103e-05, + "loss": 0.957, "step": 14527 }, { - "epoch": 0.4122587968217934, + "epoch": 0.41168636118903906, "grad_norm": 0.0, - "learning_rate": 1.3266889423215438e-05, - "loss": 0.9599, + "learning_rate": 1.3284159052858668e-05, + "loss": 0.8677, "step": 14528 }, { - "epoch": 0.4122871736662883, + "epoch": 0.41171469863130156, "grad_norm": 0.0, - "learning_rate": 1.3266020759404878e-05, - "loss": 0.8907, + "learning_rate": 1.3283292156583542e-05, + "loss": 0.8262, "step": 14529 }, { - "epoch": 0.4123155505107832, + "epoch": 0.411743036073564, "grad_norm": 0.0, - "learning_rate": 1.3265152068006093e-05, - "loss": 1.0132, + "learning_rate": 1.3282425232652027e-05, + "loss": 0.8595, "step": 14530 }, { - "epoch": 0.4123439273552781, + "epoch": 0.4117713735158265, "grad_norm": 0.0, - "learning_rate": 1.3264283349026419e-05, - "loss": 0.8474, + "learning_rate": 1.3281558281071422e-05, + "loss": 1.0, "step": 14531 }, { - "epoch": 0.412372304199773, + "epoch": 0.4117997109580889, "grad_norm": 0.0, - "learning_rate": 1.3263414602473201e-05, - "loss": 0.9416, + "learning_rate": 1.3280691301849037e-05, + "loss": 1.0372, "step": 14532 }, { - "epoch": 0.41240068104426786, + "epoch": 0.41182804840035137, "grad_norm": 0.0, - "learning_rate": 1.326254582835377e-05, - "loss": 0.8662, + "learning_rate": 1.3279824294992172e-05, + "loss": 0.9891, "step": 14533 }, { - "epoch": 0.4124290578887628, + "epoch": 0.41185638584261386, "grad_norm": 0.0, - "learning_rate": 1.3261677026675467e-05, - "loss": 0.9548, + "learning_rate": 1.3278957260508129e-05, + "loss": 0.8712, "step": 14534 }, { - "epoch": 0.4124574347332577, + "epoch": 0.4118847232848763, "grad_norm": 0.0, - "learning_rate": 1.3260808197445632e-05, - "loss": 1.0189, + "learning_rate": 1.3278090198404207e-05, + "loss": 0.8672, "step": 14535 }, { - "epoch": 0.41248581157775255, + "epoch": 0.4119130607271388, "grad_norm": 0.0, - "learning_rate": 1.3259939340671606e-05, - "loss": 0.8415, + "learning_rate": 1.3277223108687717e-05, + "loss": 0.9208, "step": 14536 }, { - "epoch": 0.41251418842224746, + "epoch": 0.41194139816940123, "grad_norm": 0.0, - "learning_rate": 1.3259070456360726e-05, - "loss": 0.8702, + "learning_rate": 1.327635599136596e-05, + "loss": 0.9005, "step": 14537 }, { - "epoch": 0.4125425652667423, + "epoch": 0.41196973561166367, "grad_norm": 0.0, - "learning_rate": 1.3258201544520328e-05, - "loss": 0.9791, + "learning_rate": 1.327548884644624e-05, + "loss": 0.8399, "step": 14538 }, { - "epoch": 0.41257094211123724, + "epoch": 0.41199807305392616, "grad_norm": 0.0, - "learning_rate": 1.3257332605157758e-05, - "loss": 0.9278, + "learning_rate": 1.3274621673935861e-05, + "loss": 0.8275, "step": 14539 }, { - "epoch": 0.4125993189557321, + "epoch": 0.4120264104961886, "grad_norm": 0.0, - "learning_rate": 1.325646363828035e-05, - "loss": 0.91, + "learning_rate": 1.3273754473842127e-05, + "loss": 0.8787, "step": 14540 }, { - "epoch": 0.412627695800227, + "epoch": 0.4120547479384511, "grad_norm": 0.0, - "learning_rate": 1.3255594643895448e-05, - "loss": 0.8247, + "learning_rate": 1.3272887246172344e-05, + "loss": 0.9328, "step": 14541 }, { - "epoch": 0.4126560726447219, + "epoch": 0.41208308538071353, "grad_norm": 0.0, - "learning_rate": 1.3254725622010393e-05, - "loss": 0.8215, + "learning_rate": 1.3272019990933816e-05, + "loss": 0.9034, "step": 14542 }, { - "epoch": 0.4126844494892168, + "epoch": 0.412111422822976, "grad_norm": 0.0, - "learning_rate": 1.3253856572632526e-05, - "loss": 0.8915, + "learning_rate": 1.3271152708133848e-05, + "loss": 0.9681, "step": 14543 }, { - "epoch": 0.4127128263337117, + "epoch": 0.41213976026523846, "grad_norm": 0.0, - "learning_rate": 1.3252987495769185e-05, - "loss": 0.9695, + "learning_rate": 1.3270285397779743e-05, + "loss": 0.9388, "step": 14544 }, { - "epoch": 0.41274120317820656, + "epoch": 0.4121680977075009, "grad_norm": 0.0, - "learning_rate": 1.325211839142771e-05, - "loss": 0.9251, + "learning_rate": 1.3269418059878815e-05, + "loss": 0.9005, "step": 14545 }, { - "epoch": 0.4127695800227015, + "epoch": 0.4121964351497634, "grad_norm": 0.0, - "learning_rate": 1.3251249259615448e-05, - "loss": 0.7983, + "learning_rate": 1.3268550694438363e-05, + "loss": 0.9016, "step": 14546 }, { - "epoch": 0.4127979568671964, + "epoch": 0.41222477259202583, "grad_norm": 0.0, - "learning_rate": 1.3250380100339738e-05, - "loss": 0.9892, + "learning_rate": 1.3267683301465697e-05, + "loss": 0.858, "step": 14547 }, { - "epoch": 0.41282633371169125, + "epoch": 0.41225311003428833, "grad_norm": 0.0, - "learning_rate": 1.3249510913607916e-05, - "loss": 0.9733, + "learning_rate": 1.3266815880968115e-05, + "loss": 0.791, "step": 14548 }, { - "epoch": 0.41285471055618617, + "epoch": 0.41228144747655077, "grad_norm": 0.0, - "learning_rate": 1.3248641699427332e-05, - "loss": 0.9302, + "learning_rate": 1.3265948432952935e-05, + "loss": 0.8664, "step": 14549 }, { - "epoch": 0.412883087400681, + "epoch": 0.4123097849188132, "grad_norm": 0.0, - "learning_rate": 1.3247772457805328e-05, - "loss": 0.9943, + "learning_rate": 1.3265080957427456e-05, + "loss": 0.8253, "step": 14550 }, { - "epoch": 0.41291146424517594, + "epoch": 0.4123381223610757, "grad_norm": 0.0, - "learning_rate": 1.324690318874924e-05, - "loss": 0.8909, + "learning_rate": 1.3264213454398988e-05, + "loss": 0.9642, "step": 14551 }, { - "epoch": 0.41293984108967086, + "epoch": 0.41236645980333814, "grad_norm": 0.0, - "learning_rate": 1.324603389226642e-05, - "loss": 0.8736, + "learning_rate": 1.3263345923874838e-05, + "loss": 0.9013, "step": 14552 }, { - "epoch": 0.4129682179341657, + "epoch": 0.41239479724560063, "grad_norm": 0.0, - "learning_rate": 1.3245164568364201e-05, - "loss": 0.8486, + "learning_rate": 1.3262478365862314e-05, + "loss": 0.8849, "step": 14553 }, { - "epoch": 0.41299659477866063, + "epoch": 0.41242313468786307, "grad_norm": 0.0, - "learning_rate": 1.3244295217049932e-05, - "loss": 0.86, + "learning_rate": 1.3261610780368726e-05, + "loss": 0.9799, "step": 14554 }, { - "epoch": 0.4130249716231555, + "epoch": 0.4124514721301255, "grad_norm": 0.0, - "learning_rate": 1.3243425838330957e-05, - "loss": 0.999, + "learning_rate": 1.3260743167401375e-05, + "loss": 0.9844, "step": 14555 }, { - "epoch": 0.4130533484676504, + "epoch": 0.412479809572388, "grad_norm": 0.0, - "learning_rate": 1.3242556432214615e-05, - "loss": 0.9606, + "learning_rate": 1.3259875526967574e-05, + "loss": 0.8939, "step": 14556 }, { - "epoch": 0.41308172531214526, + "epoch": 0.41250814701465044, "grad_norm": 0.0, - "learning_rate": 1.3241686998708256e-05, - "loss": 0.8739, + "learning_rate": 1.325900785907463e-05, + "loss": 0.8281, "step": 14557 }, { - "epoch": 0.4131101021566402, + "epoch": 0.41253648445691293, "grad_norm": 0.0, - "learning_rate": 1.324081753781922e-05, - "loss": 0.8925, + "learning_rate": 1.3258140163729856e-05, + "loss": 0.8385, "step": 14558 }, { - "epoch": 0.4131384790011351, + "epoch": 0.41256482189917537, "grad_norm": 0.0, - "learning_rate": 1.3239948049554854e-05, - "loss": 0.887, + "learning_rate": 1.3257272440940559e-05, + "loss": 0.8736, "step": 14559 }, { - "epoch": 0.41316685584562995, + "epoch": 0.41259315934143787, "grad_norm": 0.0, - "learning_rate": 1.32390785339225e-05, - "loss": 0.8687, + "learning_rate": 1.325640469071404e-05, + "loss": 0.8884, "step": 14560 }, { - "epoch": 0.41319523269012487, + "epoch": 0.4126214967837003, "grad_norm": 0.0, - "learning_rate": 1.3238208990929505e-05, - "loss": 0.9123, + "learning_rate": 1.3255536913057621e-05, + "loss": 0.9095, "step": 14561 }, { - "epoch": 0.41322360953461973, + "epoch": 0.41264983422596274, "grad_norm": 0.0, - "learning_rate": 1.3237339420583213e-05, - "loss": 0.9421, + "learning_rate": 1.3254669107978604e-05, + "loss": 0.8714, "step": 14562 }, { - "epoch": 0.41325198637911464, + "epoch": 0.41267817166822524, "grad_norm": 0.0, - "learning_rate": 1.3236469822890968e-05, - "loss": 0.9296, + "learning_rate": 1.3253801275484298e-05, + "loss": 1.0532, "step": 14563 }, { - "epoch": 0.41328036322360956, + "epoch": 0.4127065091104877, "grad_norm": 0.0, - "learning_rate": 1.3235600197860116e-05, - "loss": 0.9183, + "learning_rate": 1.3252933415582016e-05, + "loss": 0.8569, "step": 14564 }, { - "epoch": 0.4133087400681044, + "epoch": 0.41273484655275017, "grad_norm": 0.0, - "learning_rate": 1.3234730545498004e-05, - "loss": 0.8517, + "learning_rate": 1.325206552827907e-05, + "loss": 0.9928, "step": 14565 }, { - "epoch": 0.41333711691259933, + "epoch": 0.4127631839950126, "grad_norm": 0.0, - "learning_rate": 1.323386086581198e-05, - "loss": 1.0568, + "learning_rate": 1.3251197613582769e-05, + "loss": 0.7522, "step": 14566 }, { - "epoch": 0.4133654937570942, + "epoch": 0.41279152143727504, "grad_norm": 0.0, - "learning_rate": 1.323299115880939e-05, - "loss": 0.9694, + "learning_rate": 1.3250329671500421e-05, + "loss": 0.955, "step": 14567 }, { - "epoch": 0.4133938706015891, + "epoch": 0.41281985887953754, "grad_norm": 0.0, - "learning_rate": 1.3232121424497572e-05, - "loss": 0.9164, + "learning_rate": 1.3249461702039344e-05, + "loss": 1.0627, "step": 14568 }, { - "epoch": 0.41342224744608397, + "epoch": 0.4128481963218, "grad_norm": 0.0, - "learning_rate": 1.3231251662883884e-05, - "loss": 0.829, + "learning_rate": 1.3248593705206838e-05, + "loss": 0.9467, "step": 14569 }, { - "epoch": 0.4134506242905789, + "epoch": 0.41287653376406247, "grad_norm": 0.0, - "learning_rate": 1.3230381873975667e-05, - "loss": 1.0267, + "learning_rate": 1.3247725681010223e-05, + "loss": 0.9739, "step": 14570 }, { - "epoch": 0.4134790011350738, + "epoch": 0.4129048712063249, "grad_norm": 0.0, - "learning_rate": 1.3229512057780267e-05, - "loss": 0.9698, + "learning_rate": 1.3246857629456808e-05, + "loss": 0.9498, "step": 14571 }, { - "epoch": 0.41350737797956866, + "epoch": 0.4129332086485874, "grad_norm": 0.0, - "learning_rate": 1.3228642214305033e-05, - "loss": 0.9295, + "learning_rate": 1.3245989550553909e-05, + "loss": 0.9001, "step": 14572 }, { - "epoch": 0.4135357548240636, + "epoch": 0.41296154609084984, "grad_norm": 0.0, - "learning_rate": 1.3227772343557316e-05, - "loss": 0.9429, + "learning_rate": 1.324512144430883e-05, + "loss": 1.0912, "step": 14573 }, { - "epoch": 0.41356413166855843, + "epoch": 0.4129898835331123, "grad_norm": 0.0, - "learning_rate": 1.322690244554446e-05, - "loss": 0.9011, + "learning_rate": 1.324425331072889e-05, + "loss": 0.8878, "step": 14574 }, { - "epoch": 0.41359250851305335, + "epoch": 0.4130182209753748, "grad_norm": 0.0, - "learning_rate": 1.3226032520273812e-05, - "loss": 0.9344, + "learning_rate": 1.3243385149821402e-05, + "loss": 0.8798, "step": 14575 }, { - "epoch": 0.41362088535754826, + "epoch": 0.4130465584176372, "grad_norm": 0.0, - "learning_rate": 1.3225162567752725e-05, - "loss": 0.9391, + "learning_rate": 1.3242516961593672e-05, + "loss": 0.969, "step": 14576 }, { - "epoch": 0.4136492622020431, + "epoch": 0.4130748958598997, "grad_norm": 0.0, - "learning_rate": 1.3224292587988545e-05, - "loss": 0.869, + "learning_rate": 1.324164874605302e-05, + "loss": 0.8483, "step": 14577 }, { - "epoch": 0.41367763904653804, + "epoch": 0.41310323330216214, "grad_norm": 0.0, - "learning_rate": 1.322342258098862e-05, - "loss": 0.8201, + "learning_rate": 1.3240780503206755e-05, + "loss": 0.8179, "step": 14578 }, { - "epoch": 0.4137060158910329, + "epoch": 0.4131315707444246, "grad_norm": 0.0, - "learning_rate": 1.3222552546760297e-05, - "loss": 1.0504, + "learning_rate": 1.3239912233062198e-05, + "loss": 0.9998, "step": 14579 }, { - "epoch": 0.4137343927355278, + "epoch": 0.4131599081866871, "grad_norm": 0.0, - "learning_rate": 1.3221682485310929e-05, - "loss": 1.0353, + "learning_rate": 1.3239043935626652e-05, + "loss": 0.9204, "step": 14580 }, { - "epoch": 0.4137627695800227, + "epoch": 0.4131882456289495, "grad_norm": 0.0, - "learning_rate": 1.3220812396647867e-05, - "loss": 0.8658, + "learning_rate": 1.3238175610907437e-05, + "loss": 0.8704, "step": 14581 }, { - "epoch": 0.4137911464245176, + "epoch": 0.413216583071212, "grad_norm": 0.0, - "learning_rate": 1.3219942280778456e-05, - "loss": 0.8345, + "learning_rate": 1.3237307258911867e-05, + "loss": 0.8183, "step": 14582 }, { - "epoch": 0.4138195232690125, + "epoch": 0.41324492051347445, "grad_norm": 0.0, - "learning_rate": 1.3219072137710048e-05, - "loss": 1.031, + "learning_rate": 1.3236438879647256e-05, + "loss": 1.0214, "step": 14583 }, { - "epoch": 0.41384790011350736, + "epoch": 0.41327325795573694, "grad_norm": 0.0, - "learning_rate": 1.3218201967449993e-05, - "loss": 0.9769, + "learning_rate": 1.3235570473120917e-05, + "loss": 0.7542, "step": 14584 }, { - "epoch": 0.4138762769580023, + "epoch": 0.4133015953979994, "grad_norm": 0.0, - "learning_rate": 1.3217331770005639e-05, - "loss": 0.9029, + "learning_rate": 1.3234702039340167e-05, + "loss": 0.9714, "step": 14585 }, { - "epoch": 0.41390465380249714, + "epoch": 0.4133299328402618, "grad_norm": 0.0, - "learning_rate": 1.321646154538434e-05, - "loss": 0.9644, + "learning_rate": 1.3233833578312321e-05, + "loss": 0.799, "step": 14586 }, { - "epoch": 0.41393303064699205, + "epoch": 0.4133582702825243, "grad_norm": 0.0, - "learning_rate": 1.3215591293593449e-05, - "loss": 0.9341, + "learning_rate": 1.3232965090044694e-05, + "loss": 0.9702, "step": 14587 }, { - "epoch": 0.41396140749148697, + "epoch": 0.41338660772478675, "grad_norm": 0.0, - "learning_rate": 1.321472101464031e-05, - "loss": 1.0121, + "learning_rate": 1.3232096574544602e-05, + "loss": 0.9645, "step": 14588 }, { - "epoch": 0.4139897843359818, + "epoch": 0.41341494516704924, "grad_norm": 0.0, - "learning_rate": 1.3213850708532278e-05, - "loss": 0.9025, + "learning_rate": 1.3231228031819358e-05, + "loss": 0.8651, "step": 14589 }, { - "epoch": 0.41401816118047674, + "epoch": 0.4134432826093117, "grad_norm": 0.0, - "learning_rate": 1.3212980375276708e-05, - "loss": 0.9892, + "learning_rate": 1.3230359461876282e-05, + "loss": 0.8705, "step": 14590 }, { - "epoch": 0.4140465380249716, + "epoch": 0.4134716200515741, "grad_norm": 0.0, - "learning_rate": 1.3212110014880945e-05, - "loss": 0.9677, + "learning_rate": 1.322949086472269e-05, + "loss": 0.9885, "step": 14591 }, { - "epoch": 0.4140749148694665, + "epoch": 0.4134999574938366, "grad_norm": 0.0, - "learning_rate": 1.3211239627352345e-05, - "loss": 0.9195, + "learning_rate": 1.3228622240365896e-05, + "loss": 1.118, "step": 14592 }, { - "epoch": 0.41410329171396143, + "epoch": 0.41352829493609905, "grad_norm": 0.0, - "learning_rate": 1.3210369212698262e-05, - "loss": 0.8575, + "learning_rate": 1.3227753588813217e-05, + "loss": 0.9714, "step": 14593 }, { - "epoch": 0.4141316685584563, + "epoch": 0.41355663237836154, "grad_norm": 0.0, - "learning_rate": 1.3209498770926043e-05, - "loss": 0.9605, + "learning_rate": 1.3226884910071973e-05, + "loss": 0.8923, "step": 14594 }, { - "epoch": 0.4141600454029512, + "epoch": 0.413584969820624, "grad_norm": 0.0, - "learning_rate": 1.3208628302043042e-05, - "loss": 1.0013, + "learning_rate": 1.322601620414948e-05, + "loss": 0.9936, "step": 14595 }, { - "epoch": 0.41418842224744606, + "epoch": 0.4136133072628865, "grad_norm": 0.0, - "learning_rate": 1.3207757806056619e-05, - "loss": 0.9097, + "learning_rate": 1.322514747105305e-05, + "loss": 0.9746, "step": 14596 }, { - "epoch": 0.414216799091941, + "epoch": 0.4136416447051489, "grad_norm": 0.0, - "learning_rate": 1.320688728297412e-05, - "loss": 0.834, + "learning_rate": 1.3224278710790008e-05, + "loss": 0.896, "step": 14597 }, { - "epoch": 0.4142451759364359, + "epoch": 0.41366998214741135, "grad_norm": 0.0, - "learning_rate": 1.3206016732802898e-05, - "loss": 1.0315, + "learning_rate": 1.3223409923367669e-05, + "loss": 0.9105, "step": 14598 }, { - "epoch": 0.41427355278093075, + "epoch": 0.41369831958967385, "grad_norm": 0.0, - "learning_rate": 1.3205146155550309e-05, - "loss": 0.8726, + "learning_rate": 1.3222541108793352e-05, + "loss": 0.9519, "step": 14599 }, { - "epoch": 0.41430192962542567, + "epoch": 0.4137266570319363, "grad_norm": 0.0, - "learning_rate": 1.3204275551223709e-05, - "loss": 1.0088, + "learning_rate": 1.3221672267074375e-05, + "loss": 0.9199, "step": 14600 }, { - "epoch": 0.41433030646992053, + "epoch": 0.4137549944741988, "grad_norm": 0.0, - "learning_rate": 1.3203404919830447e-05, - "loss": 0.9359, + "learning_rate": 1.3220803398218056e-05, + "loss": 0.9057, "step": 14601 }, { - "epoch": 0.41435868331441544, + "epoch": 0.4137833319164612, "grad_norm": 0.0, - "learning_rate": 1.3202534261377875e-05, - "loss": 0.8907, + "learning_rate": 1.3219934502231711e-05, + "loss": 0.928, "step": 14602 }, { - "epoch": 0.4143870601589103, + "epoch": 0.41381166935872365, "grad_norm": 0.0, - "learning_rate": 1.3201663575873359e-05, - "loss": 0.874, + "learning_rate": 1.3219065579122663e-05, + "loss": 1.0167, "step": 14603 }, { - "epoch": 0.4144154370034052, + "epoch": 0.41384000680098615, "grad_norm": 0.0, - "learning_rate": 1.3200792863324246e-05, - "loss": 0.7891, + "learning_rate": 1.3218196628898232e-05, + "loss": 0.9532, "step": 14604 }, { - "epoch": 0.41444381384790013, + "epoch": 0.4138683442432486, "grad_norm": 0.0, - "learning_rate": 1.3199922123737887e-05, - "loss": 0.9456, + "learning_rate": 1.3217327651565734e-05, + "loss": 0.8729, "step": 14605 }, { - "epoch": 0.414472190692395, + "epoch": 0.4138966816855111, "grad_norm": 0.0, - "learning_rate": 1.3199051357121647e-05, - "loss": 0.9782, + "learning_rate": 1.321645864713249e-05, + "loss": 0.8807, "step": 14606 }, { - "epoch": 0.4145005675368899, + "epoch": 0.4139250191277735, "grad_norm": 0.0, - "learning_rate": 1.3198180563482877e-05, - "loss": 1.0405, + "learning_rate": 1.3215589615605824e-05, + "loss": 0.9831, "step": 14607 }, { - "epoch": 0.41452894438138477, + "epoch": 0.413953356570036, "grad_norm": 0.0, - "learning_rate": 1.3197309742828927e-05, - "loss": 0.9145, + "learning_rate": 1.321472055699305e-05, + "loss": 0.8785, "step": 14608 }, { - "epoch": 0.4145573212258797, + "epoch": 0.41398169401229845, "grad_norm": 0.0, - "learning_rate": 1.319643889516716e-05, - "loss": 0.8445, + "learning_rate": 1.3213851471301492e-05, + "loss": 0.9304, "step": 14609 }, { - "epoch": 0.4145856980703746, + "epoch": 0.4140100314545609, "grad_norm": 0.0, - "learning_rate": 1.3195568020504932e-05, - "loss": 1.049, + "learning_rate": 1.3212982358538467e-05, + "loss": 0.9401, "step": 14610 }, { - "epoch": 0.41461407491486946, + "epoch": 0.4140383688968234, "grad_norm": 0.0, - "learning_rate": 1.3194697118849598e-05, - "loss": 0.9107, + "learning_rate": 1.3212113218711302e-05, + "loss": 0.9074, "step": 14611 }, { - "epoch": 0.41464245175936437, + "epoch": 0.4140667063390858, "grad_norm": 0.0, - "learning_rate": 1.3193826190208507e-05, - "loss": 0.8927, + "learning_rate": 1.3211244051827312e-05, + "loss": 0.9814, "step": 14612 }, { - "epoch": 0.41467082860385923, + "epoch": 0.4140950437813483, "grad_norm": 0.0, - "learning_rate": 1.3192955234589028e-05, - "loss": 0.983, + "learning_rate": 1.3210374857893824e-05, + "loss": 0.871, "step": 14613 }, { - "epoch": 0.41469920544835415, + "epoch": 0.41412338122361075, "grad_norm": 0.0, - "learning_rate": 1.3192084251998515e-05, - "loss": 0.8817, + "learning_rate": 1.3209505636918154e-05, + "loss": 0.9156, "step": 14614 }, { - "epoch": 0.41472758229284906, + "epoch": 0.4141517186658732, "grad_norm": 0.0, - "learning_rate": 1.3191213242444318e-05, - "loss": 0.9825, + "learning_rate": 1.3208636388907627e-05, + "loss": 0.9991, "step": 14615 }, { - "epoch": 0.4147559591373439, + "epoch": 0.4141800561081357, "grad_norm": 0.0, - "learning_rate": 1.31903422059338e-05, - "loss": 0.8434, + "learning_rate": 1.320776711386956e-05, + "loss": 0.9353, "step": 14616 }, { - "epoch": 0.41478433598183884, + "epoch": 0.4142083935503981, "grad_norm": 0.0, - "learning_rate": 1.318947114247432e-05, - "loss": 0.9081, + "learning_rate": 1.3206897811811285e-05, + "loss": 1.0045, "step": 14617 }, { - "epoch": 0.4148127128263337, + "epoch": 0.4142367309926606, "grad_norm": 0.0, - "learning_rate": 1.3188600052073235e-05, - "loss": 1.0483, + "learning_rate": 1.3206028482740116e-05, + "loss": 0.9669, "step": 14618 }, { - "epoch": 0.4148410896708286, + "epoch": 0.41426506843492306, "grad_norm": 0.0, - "learning_rate": 1.3187728934737897e-05, - "loss": 0.8585, + "learning_rate": 1.320515912666338e-05, + "loss": 0.9522, "step": 14619 }, { - "epoch": 0.41486946651532347, + "epoch": 0.41429340587718555, "grad_norm": 0.0, - "learning_rate": 1.3186857790475673e-05, - "loss": 0.9467, + "learning_rate": 1.32042897435884e-05, + "loss": 0.881, "step": 14620 }, { - "epoch": 0.4148978433598184, + "epoch": 0.414321743319448, "grad_norm": 0.0, - "learning_rate": 1.318598661929392e-05, - "loss": 0.8167, + "learning_rate": 1.3203420333522497e-05, + "loss": 0.9684, "step": 14621 }, { - "epoch": 0.4149262202043133, + "epoch": 0.4143500807617104, "grad_norm": 0.0, - "learning_rate": 1.318511542119999e-05, - "loss": 0.9128, + "learning_rate": 1.3202550896472993e-05, + "loss": 0.9576, "step": 14622 }, { - "epoch": 0.41495459704880816, + "epoch": 0.4143784182039729, "grad_norm": 0.0, - "learning_rate": 1.318424419620125e-05, - "loss": 0.9253, + "learning_rate": 1.320168143244721e-05, + "loss": 0.8521, "step": 14623 }, { - "epoch": 0.4149829738933031, + "epoch": 0.41440675564623536, "grad_norm": 0.0, - "learning_rate": 1.3183372944305055e-05, - "loss": 0.8444, + "learning_rate": 1.320081194145248e-05, + "loss": 0.9291, "step": 14624 }, { - "epoch": 0.41501135073779793, + "epoch": 0.41443509308849785, "grad_norm": 0.0, - "learning_rate": 1.3182501665518766e-05, - "loss": 0.8403, + "learning_rate": 1.3199942423496123e-05, + "loss": 0.9854, "step": 14625 }, { - "epoch": 0.41503972758229285, + "epoch": 0.4144634305307603, "grad_norm": 0.0, - "learning_rate": 1.318163035984974e-05, - "loss": 0.968, + "learning_rate": 1.3199072878585464e-05, + "loss": 0.9481, "step": 14626 }, { - "epoch": 0.41506810442678777, + "epoch": 0.41449176797302273, "grad_norm": 0.0, - "learning_rate": 1.3180759027305342e-05, - "loss": 0.9355, + "learning_rate": 1.3198203306727822e-05, + "loss": 0.959, "step": 14627 }, { - "epoch": 0.4150964812712826, + "epoch": 0.4145201054152852, "grad_norm": 0.0, - "learning_rate": 1.317988766789293e-05, - "loss": 0.9164, + "learning_rate": 1.3197333707930527e-05, + "loss": 0.8732, "step": 14628 }, { - "epoch": 0.41512485811577754, + "epoch": 0.41454844285754766, "grad_norm": 0.0, - "learning_rate": 1.317901628161986e-05, - "loss": 0.8456, + "learning_rate": 1.3196464082200903e-05, + "loss": 1.0775, "step": 14629 }, { - "epoch": 0.4151532349602724, + "epoch": 0.41457678029981015, "grad_norm": 0.0, - "learning_rate": 1.3178144868493502e-05, - "loss": 0.9249, + "learning_rate": 1.3195594429546279e-05, + "loss": 0.9302, "step": 14630 }, { - "epoch": 0.4151816118047673, + "epoch": 0.4146051177420726, "grad_norm": 0.0, - "learning_rate": 1.3177273428521211e-05, - "loss": 0.8504, + "learning_rate": 1.3194724749973971e-05, + "loss": 0.8775, "step": 14631 }, { - "epoch": 0.41520998864926223, + "epoch": 0.4146334551843351, "grad_norm": 0.0, - "learning_rate": 1.3176401961710343e-05, - "loss": 0.9502, + "learning_rate": 1.3193855043491313e-05, + "loss": 0.8422, "step": 14632 }, { - "epoch": 0.4152383654937571, + "epoch": 0.4146617926265975, "grad_norm": 0.0, - "learning_rate": 1.3175530468068267e-05, - "loss": 0.9315, + "learning_rate": 1.3192985310105628e-05, + "loss": 0.8469, "step": 14633 }, { - "epoch": 0.415266742338252, + "epoch": 0.41469013006885996, "grad_norm": 0.0, - "learning_rate": 1.3174658947602346e-05, - "loss": 0.8885, + "learning_rate": 1.319211554982424e-05, + "loss": 0.9095, "step": 14634 }, { - "epoch": 0.41529511918274686, + "epoch": 0.41471846751112246, "grad_norm": 0.0, - "learning_rate": 1.3173787400319938e-05, - "loss": 1.0204, + "learning_rate": 1.319124576265448e-05, + "loss": 0.9782, "step": 14635 }, { - "epoch": 0.4153234960272418, + "epoch": 0.4147468049533849, "grad_norm": 0.0, - "learning_rate": 1.3172915826228398e-05, - "loss": 0.9695, + "learning_rate": 1.3190375948603668e-05, + "loss": 0.8675, "step": 14636 }, { - "epoch": 0.41535187287173664, + "epoch": 0.4147751423956474, "grad_norm": 0.0, - "learning_rate": 1.3172044225335103e-05, - "loss": 0.8114, + "learning_rate": 1.3189506107679139e-05, + "loss": 0.951, "step": 14637 }, { - "epoch": 0.41538024971623155, + "epoch": 0.4148034798379098, "grad_norm": 0.0, - "learning_rate": 1.3171172597647407e-05, - "loss": 0.8145, + "learning_rate": 1.3188636239888216e-05, + "loss": 1.0543, "step": 14638 }, { - "epoch": 0.41540862656072647, + "epoch": 0.41483181728017227, "grad_norm": 0.0, - "learning_rate": 1.3170300943172669e-05, - "loss": 0.9421, + "learning_rate": 1.3187766345238222e-05, + "loss": 0.9154, "step": 14639 }, { - "epoch": 0.41543700340522133, + "epoch": 0.41486015472243476, "grad_norm": 0.0, - "learning_rate": 1.3169429261918258e-05, - "loss": 1.0016, + "learning_rate": 1.318689642373649e-05, + "loss": 0.9108, "step": 14640 }, { - "epoch": 0.41546538024971624, + "epoch": 0.4148884921646972, "grad_norm": 0.0, - "learning_rate": 1.316855755389154e-05, - "loss": 0.8801, + "learning_rate": 1.3186026475390345e-05, + "loss": 0.8769, "step": 14641 }, { - "epoch": 0.4154937570942111, + "epoch": 0.4149168296069597, "grad_norm": 0.0, - "learning_rate": 1.3167685819099869e-05, - "loss": 0.9076, + "learning_rate": 1.318515650020712e-05, + "loss": 0.9787, "step": 14642 }, { - "epoch": 0.415522133938706, + "epoch": 0.41494516704922213, "grad_norm": 0.0, - "learning_rate": 1.3166814057550614e-05, - "loss": 0.834, + "learning_rate": 1.3184286498194134e-05, + "loss": 0.9555, "step": 14643 }, { - "epoch": 0.41555051078320093, + "epoch": 0.4149735044914846, "grad_norm": 0.0, - "learning_rate": 1.3165942269251139e-05, - "loss": 0.9518, + "learning_rate": 1.3183416469358724e-05, + "loss": 0.9551, "step": 14644 }, { - "epoch": 0.4155788876276958, + "epoch": 0.41500184193374706, "grad_norm": 0.0, - "learning_rate": 1.316507045420881e-05, - "loss": 1.0314, + "learning_rate": 1.3182546413708212e-05, + "loss": 0.9263, "step": 14645 }, { - "epoch": 0.4156072644721907, + "epoch": 0.4150301793760095, "grad_norm": 0.0, - "learning_rate": 1.3164198612430985e-05, - "loss": 0.8684, + "learning_rate": 1.3181676331249932e-05, + "loss": 0.9146, "step": 14646 }, { - "epoch": 0.41563564131668557, + "epoch": 0.415058516818272, "grad_norm": 0.0, - "learning_rate": 1.3163326743925034e-05, - "loss": 1.0095, + "learning_rate": 1.3180806221991209e-05, + "loss": 0.8249, "step": 14647 }, { - "epoch": 0.4156640181611805, + "epoch": 0.41508685426053443, "grad_norm": 0.0, - "learning_rate": 1.3162454848698318e-05, - "loss": 0.9553, + "learning_rate": 1.3179936085939372e-05, + "loss": 1.0562, "step": 14648 }, { - "epoch": 0.41569239500567534, + "epoch": 0.4151151917027969, "grad_norm": 0.0, - "learning_rate": 1.3161582926758202e-05, - "loss": 0.9376, + "learning_rate": 1.3179065923101759e-05, + "loss": 0.8568, "step": 14649 }, { - "epoch": 0.41572077185017026, + "epoch": 0.41514352914505936, "grad_norm": 0.0, - "learning_rate": 1.316071097811206e-05, - "loss": 0.8712, + "learning_rate": 1.3178195733485689e-05, + "loss": 0.9341, "step": 14650 }, { - "epoch": 0.41574914869466517, + "epoch": 0.4151718665873218, "grad_norm": 0.0, - "learning_rate": 1.3159839002767245e-05, - "loss": 1.0194, + "learning_rate": 1.3177325517098498e-05, + "loss": 0.9143, "step": 14651 }, { - "epoch": 0.41577752553916003, + "epoch": 0.4152002040295843, "grad_norm": 0.0, - "learning_rate": 1.3158967000731129e-05, - "loss": 0.9805, + "learning_rate": 1.3176455273947513e-05, + "loss": 0.8711, "step": 14652 }, { - "epoch": 0.41580590238365495, + "epoch": 0.41522854147184673, "grad_norm": 0.0, - "learning_rate": 1.3158094972011077e-05, - "loss": 0.9432, + "learning_rate": 1.3175585004040066e-05, + "loss": 0.9309, "step": 14653 }, { - "epoch": 0.4158342792281498, + "epoch": 0.41525687891410923, "grad_norm": 0.0, - "learning_rate": 1.3157222916614454e-05, - "loss": 0.9367, + "learning_rate": 1.3174714707383485e-05, + "loss": 0.9681, "step": 14654 }, { - "epoch": 0.4158626560726447, + "epoch": 0.41528521635637167, "grad_norm": 0.0, - "learning_rate": 1.3156350834548625e-05, - "loss": 0.9107, + "learning_rate": 1.3173844383985107e-05, + "loss": 0.9103, "step": 14655 }, { - "epoch": 0.41589103291713964, + "epoch": 0.41531355379863416, "grad_norm": 0.0, - "learning_rate": 1.315547872582096e-05, - "loss": 0.9403, + "learning_rate": 1.3172974033852254e-05, + "loss": 0.9442, "step": 14656 }, { - "epoch": 0.4159194097616345, + "epoch": 0.4153418912408966, "grad_norm": 0.0, - "learning_rate": 1.3154606590438828e-05, - "loss": 1.0266, + "learning_rate": 1.3172103656992268e-05, + "loss": 0.9551, "step": 14657 }, { - "epoch": 0.4159477866061294, + "epoch": 0.41537022868315904, "grad_norm": 0.0, - "learning_rate": 1.315373442840959e-05, - "loss": 0.9629, + "learning_rate": 1.3171233253412475e-05, + "loss": 0.8315, "step": 14658 }, { - "epoch": 0.41597616345062427, + "epoch": 0.41539856612542153, "grad_norm": 0.0, - "learning_rate": 1.3152862239740616e-05, - "loss": 0.8471, + "learning_rate": 1.3170362823120204e-05, + "loss": 0.8382, "step": 14659 }, { - "epoch": 0.4160045402951192, + "epoch": 0.41542690356768397, "grad_norm": 0.0, - "learning_rate": 1.3151990024439272e-05, - "loss": 0.9273, + "learning_rate": 1.316949236612279e-05, + "loss": 0.8366, "step": 14660 }, { - "epoch": 0.4160329171396141, + "epoch": 0.41545524100994646, "grad_norm": 0.0, - "learning_rate": 1.3151117782512928e-05, - "loss": 0.8701, + "learning_rate": 1.3168621882427561e-05, + "loss": 0.9436, "step": 14661 }, { - "epoch": 0.41606129398410896, + "epoch": 0.4154835784522089, "grad_norm": 0.0, - "learning_rate": 1.3150245513968947e-05, - "loss": 0.8601, + "learning_rate": 1.316775137204186e-05, + "loss": 0.9875, "step": 14662 }, { - "epoch": 0.4160896708286039, + "epoch": 0.41551191589447134, "grad_norm": 0.0, - "learning_rate": 1.3149373218814703e-05, - "loss": 0.9757, + "learning_rate": 1.316688083497301e-05, + "loss": 0.9606, "step": 14663 }, { - "epoch": 0.41611804767309873, + "epoch": 0.41554025333673383, "grad_norm": 0.0, - "learning_rate": 1.314850089705756e-05, - "loss": 0.8305, + "learning_rate": 1.3166010271228347e-05, + "loss": 0.8723, "step": 14664 }, { - "epoch": 0.41614642451759365, + "epoch": 0.41556859077899627, "grad_norm": 0.0, - "learning_rate": 1.3147628548704893e-05, - "loss": 0.9306, + "learning_rate": 1.3165139680815202e-05, + "loss": 0.9078, "step": 14665 }, { - "epoch": 0.4161748013620885, + "epoch": 0.41559692822125877, "grad_norm": 0.0, - "learning_rate": 1.3146756173764061e-05, - "loss": 0.8874, + "learning_rate": 1.3164269063740914e-05, + "loss": 0.7781, "step": 14666 }, { - "epoch": 0.4162031782065834, + "epoch": 0.4156252656635212, "grad_norm": 0.0, - "learning_rate": 1.3145883772242443e-05, - "loss": 0.9318, + "learning_rate": 1.316339842001281e-05, + "loss": 0.9487, "step": 14667 }, { - "epoch": 0.41623155505107834, + "epoch": 0.4156536031057837, "grad_norm": 0.0, - "learning_rate": 1.31450113441474e-05, - "loss": 0.9116, + "learning_rate": 1.3162527749638226e-05, + "loss": 0.9253, "step": 14668 }, { - "epoch": 0.4162599318955732, + "epoch": 0.41568194054804614, "grad_norm": 0.0, - "learning_rate": 1.3144138889486304e-05, - "loss": 0.9014, + "learning_rate": 1.3161657052624497e-05, + "loss": 0.9385, "step": 14669 }, { - "epoch": 0.4162883087400681, + "epoch": 0.4157102779903086, "grad_norm": 0.0, - "learning_rate": 1.3143266408266528e-05, - "loss": 0.8896, + "learning_rate": 1.3160786328978956e-05, + "loss": 0.93, "step": 14670 }, { - "epoch": 0.416316685584563, + "epoch": 0.41573861543257107, "grad_norm": 0.0, - "learning_rate": 1.3142393900495438e-05, - "loss": 0.9376, + "learning_rate": 1.3159915578708939e-05, + "loss": 0.8801, "step": 14671 }, { - "epoch": 0.4163450624290579, + "epoch": 0.4157669528748335, "grad_norm": 0.0, - "learning_rate": 1.3141521366180407e-05, - "loss": 0.9088, + "learning_rate": 1.3159044801821779e-05, + "loss": 0.9132, "step": 14672 }, { - "epoch": 0.4163734392735528, + "epoch": 0.415795290317096, "grad_norm": 0.0, - "learning_rate": 1.3140648805328801e-05, - "loss": 0.8607, + "learning_rate": 1.3158173998324816e-05, + "loss": 0.9365, "step": 14673 }, { - "epoch": 0.41640181611804766, + "epoch": 0.41582362775935844, "grad_norm": 0.0, - "learning_rate": 1.3139776217947996e-05, - "loss": 1.0578, + "learning_rate": 1.3157303168225373e-05, + "loss": 0.944, "step": 14674 }, { - "epoch": 0.4164301929625426, + "epoch": 0.4158519652016209, "grad_norm": 0.0, - "learning_rate": 1.3138903604045358e-05, - "loss": 0.9068, + "learning_rate": 1.3156432311530797e-05, + "loss": 0.8902, "step": 14675 }, { - "epoch": 0.41645856980703744, + "epoch": 0.41588030264388337, "grad_norm": 0.0, - "learning_rate": 1.3138030963628263e-05, - "loss": 0.9275, + "learning_rate": 1.3155561428248418e-05, + "loss": 0.9224, "step": 14676 }, { - "epoch": 0.41648694665153235, + "epoch": 0.4159086400861458, "grad_norm": 0.0, - "learning_rate": 1.3137158296704078e-05, - "loss": 0.99, + "learning_rate": 1.3154690518385575e-05, + "loss": 0.8667, "step": 14677 }, { - "epoch": 0.41651532349602727, + "epoch": 0.4159369775284083, "grad_norm": 0.0, - "learning_rate": 1.3136285603280174e-05, - "loss": 1.054, + "learning_rate": 1.3153819581949603e-05, + "loss": 0.9237, "step": 14678 }, { - "epoch": 0.4165437003405221, + "epoch": 0.41596531497067074, "grad_norm": 0.0, - "learning_rate": 1.3135412883363928e-05, - "loss": 0.8929, + "learning_rate": 1.3152948618947839e-05, + "loss": 0.8977, "step": 14679 }, { - "epoch": 0.41657207718501704, + "epoch": 0.41599365241293323, "grad_norm": 0.0, - "learning_rate": 1.3134540136962704e-05, - "loss": 0.8638, + "learning_rate": 1.3152077629387612e-05, + "loss": 0.8994, "step": 14680 }, { - "epoch": 0.4166004540295119, + "epoch": 0.4160219898551957, "grad_norm": 0.0, - "learning_rate": 1.3133667364083882e-05, - "loss": 0.9583, + "learning_rate": 1.3151206613276265e-05, + "loss": 0.9151, "step": 14681 }, { - "epoch": 0.4166288308740068, + "epoch": 0.4160503272974581, "grad_norm": 0.0, - "learning_rate": 1.3132794564734829e-05, - "loss": 0.8878, + "learning_rate": 1.315033557062114e-05, + "loss": 0.9497, "step": 14682 }, { - "epoch": 0.4166572077185017, + "epoch": 0.4160786647397206, "grad_norm": 0.0, - "learning_rate": 1.3131921738922921e-05, - "loss": 0.9544, + "learning_rate": 1.3149464501429568e-05, + "loss": 0.9111, "step": 14683 }, { - "epoch": 0.4166855845629966, + "epoch": 0.41610700218198304, "grad_norm": 0.0, - "learning_rate": 1.3131048886655529e-05, - "loss": 0.8914, + "learning_rate": 1.3148593405708886e-05, + "loss": 0.9225, "step": 14684 }, { - "epoch": 0.4167139614074915, + "epoch": 0.41613533962424554, "grad_norm": 0.0, - "learning_rate": 1.3130176007940027e-05, - "loss": 0.9212, + "learning_rate": 1.3147722283466428e-05, + "loss": 0.9906, "step": 14685 }, { - "epoch": 0.41674233825198637, + "epoch": 0.416163677066508, "grad_norm": 0.0, - "learning_rate": 1.3129303102783785e-05, - "loss": 0.9707, + "learning_rate": 1.3146851134709542e-05, + "loss": 0.8315, "step": 14686 }, { - "epoch": 0.4167707150964813, + "epoch": 0.4161920145087704, "grad_norm": 0.0, - "learning_rate": 1.312843017119418e-05, - "loss": 0.8619, + "learning_rate": 1.3145979959445556e-05, + "loss": 0.9189, "step": 14687 }, { - "epoch": 0.41679909194097614, + "epoch": 0.4162203519510329, "grad_norm": 0.0, - "learning_rate": 1.3127557213178586e-05, - "loss": 0.9343, + "learning_rate": 1.3145108757681818e-05, + "loss": 0.9939, "step": 14688 }, { - "epoch": 0.41682746878547106, + "epoch": 0.41624868939329535, "grad_norm": 0.0, - "learning_rate": 1.3126684228744375e-05, - "loss": 0.8765, + "learning_rate": 1.3144237529425655e-05, + "loss": 0.9575, "step": 14689 }, { - "epoch": 0.41685584562996597, + "epoch": 0.41627702683555784, "grad_norm": 0.0, - "learning_rate": 1.312581121789892e-05, - "loss": 1.0118, + "learning_rate": 1.3143366274684415e-05, + "loss": 0.922, "step": 14690 }, { - "epoch": 0.41688422247446083, + "epoch": 0.4163053642778203, "grad_norm": 0.0, - "learning_rate": 1.3124938180649601e-05, - "loss": 0.9834, + "learning_rate": 1.3142494993465435e-05, + "loss": 0.9132, "step": 14691 }, { - "epoch": 0.41691259931895575, + "epoch": 0.41633370172008277, "grad_norm": 0.0, - "learning_rate": 1.3124065117003786e-05, - "loss": 0.9202, + "learning_rate": 1.314162368577605e-05, + "loss": 0.8454, "step": 14692 }, { - "epoch": 0.4169409761634506, + "epoch": 0.4163620391623452, "grad_norm": 0.0, - "learning_rate": 1.3123192026968851e-05, - "loss": 1.0343, + "learning_rate": 1.3140752351623602e-05, + "loss": 0.8733, "step": 14693 }, { - "epoch": 0.4169693530079455, + "epoch": 0.41639037660460765, "grad_norm": 0.0, - "learning_rate": 1.3122318910552174e-05, - "loss": 0.9337, + "learning_rate": 1.3139880991015432e-05, + "loss": 0.8475, "step": 14694 }, { - "epoch": 0.41699772985244044, + "epoch": 0.41641871404687014, "grad_norm": 0.0, - "learning_rate": 1.3121445767761132e-05, - "loss": 0.9228, + "learning_rate": 1.313900960395888e-05, + "loss": 0.9833, "step": 14695 }, { - "epoch": 0.4170261066969353, + "epoch": 0.4164470514891326, "grad_norm": 0.0, - "learning_rate": 1.3120572598603094e-05, - "loss": 0.89, + "learning_rate": 1.313813819046128e-05, + "loss": 0.9968, "step": 14696 }, { - "epoch": 0.4170544835414302, + "epoch": 0.4164753889313951, "grad_norm": 0.0, - "learning_rate": 1.3119699403085439e-05, - "loss": 0.8914, + "learning_rate": 1.313726675052998e-05, + "loss": 0.8618, "step": 14697 }, { - "epoch": 0.41708286038592507, + "epoch": 0.4165037263736575, "grad_norm": 0.0, - "learning_rate": 1.3118826181215545e-05, - "loss": 0.9528, + "learning_rate": 1.3136395284172317e-05, + "loss": 0.9378, "step": 14698 }, { - "epoch": 0.41711123723042, + "epoch": 0.41653206381591995, "grad_norm": 0.0, - "learning_rate": 1.3117952933000783e-05, - "loss": 0.8743, + "learning_rate": 1.3135523791395632e-05, + "loss": 0.9222, "step": 14699 }, { - "epoch": 0.41713961407491484, + "epoch": 0.41656040125818244, "grad_norm": 0.0, - "learning_rate": 1.3117079658448535e-05, - "loss": 0.8774, + "learning_rate": 1.3134652272207265e-05, + "loss": 0.9491, "step": 14700 }, { - "epoch": 0.41716799091940976, + "epoch": 0.4165887387004449, "grad_norm": 0.0, - "learning_rate": 1.3116206357566173e-05, - "loss": 0.7841, + "learning_rate": 1.3133780726614556e-05, + "loss": 0.8164, "step": 14701 }, { - "epoch": 0.4171963677639047, + "epoch": 0.4166170761427074, "grad_norm": 0.0, - "learning_rate": 1.3115333030361078e-05, - "loss": 0.8919, + "learning_rate": 1.313290915462485e-05, + "loss": 0.9477, "step": 14702 }, { - "epoch": 0.41722474460839953, + "epoch": 0.4166454135849698, "grad_norm": 0.0, - "learning_rate": 1.311445967684062e-05, - "loss": 0.9589, + "learning_rate": 1.313203755624549e-05, + "loss": 0.8708, "step": 14703 }, { - "epoch": 0.41725312145289445, + "epoch": 0.4166737510272323, "grad_norm": 0.0, - "learning_rate": 1.3113586297012187e-05, - "loss": 0.8765, + "learning_rate": 1.313116593148381e-05, + "loss": 0.9983, "step": 14704 }, { - "epoch": 0.4172814982973893, + "epoch": 0.41670208846949475, "grad_norm": 0.0, - "learning_rate": 1.3112712890883149e-05, - "loss": 0.9438, + "learning_rate": 1.3130294280347157e-05, + "loss": 0.924, "step": 14705 }, { - "epoch": 0.4173098751418842, + "epoch": 0.4167304259117572, "grad_norm": 0.0, - "learning_rate": 1.3111839458460884e-05, - "loss": 0.8348, + "learning_rate": 1.3129422602842876e-05, + "loss": 0.8177, "step": 14706 }, { - "epoch": 0.41733825198637914, + "epoch": 0.4167587633540197, "grad_norm": 0.0, - "learning_rate": 1.3110965999752772e-05, - "loss": 0.8812, + "learning_rate": 1.3128550898978303e-05, + "loss": 0.9699, "step": 14707 }, { - "epoch": 0.417366628830874, + "epoch": 0.4167871007962821, "grad_norm": 0.0, - "learning_rate": 1.3110092514766191e-05, - "loss": 0.8884, + "learning_rate": 1.3127679168760785e-05, + "loss": 0.9586, "step": 14708 }, { - "epoch": 0.4173950056753689, + "epoch": 0.4168154382385446, "grad_norm": 0.0, - "learning_rate": 1.3109219003508517e-05, - "loss": 0.8738, + "learning_rate": 1.3126807412197666e-05, + "loss": 0.9514, "step": 14709 }, { - "epoch": 0.4174233825198638, + "epoch": 0.41684377568080705, "grad_norm": 0.0, - "learning_rate": 1.3108345465987128e-05, - "loss": 0.9628, + "learning_rate": 1.3125935629296284e-05, + "loss": 0.9214, "step": 14710 }, { - "epoch": 0.4174517593643587, + "epoch": 0.4168721131230695, "grad_norm": 0.0, - "learning_rate": 1.310747190220941e-05, - "loss": 0.9659, + "learning_rate": 1.3125063820063989e-05, + "loss": 0.9026, "step": 14711 }, { - "epoch": 0.4174801362088536, + "epoch": 0.416900450565332, "grad_norm": 0.0, - "learning_rate": 1.3106598312182736e-05, - "loss": 0.9011, + "learning_rate": 1.3124191984508118e-05, + "loss": 0.9022, "step": 14712 }, { - "epoch": 0.41750851305334846, + "epoch": 0.4169287880075944, "grad_norm": 0.0, - "learning_rate": 1.3105724695914486e-05, - "loss": 0.8744, + "learning_rate": 1.3123320122636019e-05, + "loss": 0.9966, "step": 14713 }, { - "epoch": 0.4175368898978434, + "epoch": 0.4169571254498569, "grad_norm": 0.0, - "learning_rate": 1.3104851053412039e-05, - "loss": 0.9595, + "learning_rate": 1.3122448234455031e-05, + "loss": 0.9313, "step": 14714 }, { - "epoch": 0.41756526674233824, + "epoch": 0.41698546289211935, "grad_norm": 0.0, - "learning_rate": 1.3103977384682778e-05, - "loss": 0.9396, + "learning_rate": 1.3121576319972507e-05, + "loss": 0.9514, "step": 14715 }, { - "epoch": 0.41759364358683315, + "epoch": 0.41701380033438185, "grad_norm": 0.0, - "learning_rate": 1.3103103689734079e-05, - "loss": 0.8223, + "learning_rate": 1.3120704379195785e-05, + "loss": 0.8656, "step": 14716 }, { - "epoch": 0.417622020431328, + "epoch": 0.4170421377766443, "grad_norm": 0.0, - "learning_rate": 1.3102229968573321e-05, - "loss": 0.9323, + "learning_rate": 1.3119832412132212e-05, + "loss": 0.8713, "step": 14717 }, { - "epoch": 0.4176503972758229, + "epoch": 0.4170704752189067, "grad_norm": 0.0, - "learning_rate": 1.310135622120789e-05, - "loss": 0.9877, + "learning_rate": 1.311896041878913e-05, + "loss": 0.8707, "step": 14718 }, { - "epoch": 0.41767877412031784, + "epoch": 0.4170988126611692, "grad_norm": 0.0, - "learning_rate": 1.3100482447645164e-05, - "loss": 0.799, + "learning_rate": 1.3118088399173888e-05, + "loss": 1.0044, "step": 14719 }, { - "epoch": 0.4177071509648127, + "epoch": 0.41712715010343165, "grad_norm": 0.0, - "learning_rate": 1.309960864789252e-05, - "loss": 0.8825, + "learning_rate": 1.311721635329383e-05, + "loss": 0.9565, "step": 14720 }, { - "epoch": 0.4177355278093076, + "epoch": 0.41715548754569415, "grad_norm": 0.0, - "learning_rate": 1.3098734821957347e-05, - "loss": 0.9486, + "learning_rate": 1.3116344281156299e-05, + "loss": 0.9215, "step": 14721 }, { - "epoch": 0.4177639046538025, + "epoch": 0.4171838249879566, "grad_norm": 0.0, - "learning_rate": 1.309786096984702e-05, - "loss": 0.9041, + "learning_rate": 1.3115472182768643e-05, + "loss": 0.8991, "step": 14722 }, { - "epoch": 0.4177922814982974, + "epoch": 0.417212162430219, "grad_norm": 0.0, - "learning_rate": 1.309698709156892e-05, - "loss": 1.0315, + "learning_rate": 1.311460005813821e-05, + "loss": 0.8891, "step": 14723 }, { - "epoch": 0.4178206583427923, + "epoch": 0.4172404998724815, "grad_norm": 0.0, - "learning_rate": 1.3096113187130432e-05, - "loss": 0.8781, + "learning_rate": 1.3113727907272341e-05, + "loss": 0.9422, "step": 14724 }, { - "epoch": 0.41784903518728717, + "epoch": 0.41726883731474396, "grad_norm": 0.0, - "learning_rate": 1.3095239256538939e-05, - "loss": 0.9629, + "learning_rate": 1.3112855730178389e-05, + "loss": 0.9919, "step": 14725 }, { - "epoch": 0.4178774120317821, + "epoch": 0.41729717475700645, "grad_norm": 0.0, - "learning_rate": 1.3094365299801818e-05, - "loss": 1.0045, + "learning_rate": 1.3111983526863696e-05, + "loss": 0.9757, "step": 14726 }, { - "epoch": 0.41790578887627694, + "epoch": 0.4173255121992689, "grad_norm": 0.0, - "learning_rate": 1.3093491316926454e-05, - "loss": 0.9731, + "learning_rate": 1.3111111297335608e-05, + "loss": 0.9324, "step": 14727 }, { - "epoch": 0.41793416572077186, + "epoch": 0.4173538496415314, "grad_norm": 0.0, - "learning_rate": 1.3092617307920233e-05, - "loss": 0.865, + "learning_rate": 1.3110239041601478e-05, + "loss": 0.9849, "step": 14728 }, { - "epoch": 0.4179625425652667, + "epoch": 0.4173821870837938, "grad_norm": 0.0, - "learning_rate": 1.3091743272790533e-05, - "loss": 0.8376, + "learning_rate": 1.3109366759668647e-05, + "loss": 0.9619, "step": 14729 }, { - "epoch": 0.41799091940976163, + "epoch": 0.41741052452605626, "grad_norm": 0.0, - "learning_rate": 1.309086921154474e-05, - "loss": 0.7502, + "learning_rate": 1.3108494451544464e-05, + "loss": 0.8814, "step": 14730 }, { - "epoch": 0.41801929625425654, + "epoch": 0.41743886196831875, "grad_norm": 0.0, - "learning_rate": 1.3089995124190234e-05, - "loss": 0.8343, + "learning_rate": 1.310762211723628e-05, + "loss": 0.8766, "step": 14731 }, { - "epoch": 0.4180476730987514, + "epoch": 0.4174671994105812, "grad_norm": 0.0, - "learning_rate": 1.3089121010734398e-05, - "loss": 0.9171, + "learning_rate": 1.3106749756751443e-05, + "loss": 0.7656, "step": 14732 }, { - "epoch": 0.4180760499432463, + "epoch": 0.4174955368528437, "grad_norm": 0.0, - "learning_rate": 1.3088246871184622e-05, - "loss": 1.0019, + "learning_rate": 1.3105877370097295e-05, + "loss": 0.8741, "step": 14733 }, { - "epoch": 0.4181044267877412, + "epoch": 0.4175238742951061, "grad_norm": 0.0, - "learning_rate": 1.3087372705548282e-05, - "loss": 0.7906, + "learning_rate": 1.3105004957281189e-05, + "loss": 0.8655, "step": 14734 }, { - "epoch": 0.4181328036322361, + "epoch": 0.41755221173736856, "grad_norm": 0.0, - "learning_rate": 1.3086498513832768e-05, - "loss": 0.9413, + "learning_rate": 1.3104132518310477e-05, + "loss": 0.9426, "step": 14735 }, { - "epoch": 0.418161180476731, + "epoch": 0.41758054917963106, "grad_norm": 0.0, - "learning_rate": 1.3085624296045465e-05, - "loss": 0.9284, + "learning_rate": 1.3103260053192501e-05, + "loss": 0.9067, "step": 14736 }, { - "epoch": 0.41818955732122587, + "epoch": 0.4176088866218935, "grad_norm": 0.0, - "learning_rate": 1.3084750052193749e-05, - "loss": 0.8481, + "learning_rate": 1.3102387561934615e-05, + "loss": 0.8624, "step": 14737 }, { - "epoch": 0.4182179341657208, + "epoch": 0.417637224064156, "grad_norm": 0.0, - "learning_rate": 1.3083875782285015e-05, - "loss": 0.9667, + "learning_rate": 1.3101515044544164e-05, + "loss": 0.9032, "step": 14738 }, { - "epoch": 0.41824631101021564, + "epoch": 0.4176655615064184, "grad_norm": 0.0, - "learning_rate": 1.3083001486326646e-05, - "loss": 0.8974, + "learning_rate": 1.3100642501028502e-05, + "loss": 0.9307, "step": 14739 }, { - "epoch": 0.41827468785471056, + "epoch": 0.4176938989486809, "grad_norm": 0.0, - "learning_rate": 1.308212716432602e-05, - "loss": 0.8669, + "learning_rate": 1.3099769931394977e-05, + "loss": 0.8508, "step": 14740 }, { - "epoch": 0.4183030646992055, + "epoch": 0.41772223639094336, "grad_norm": 0.0, - "learning_rate": 1.3081252816290527e-05, - "loss": 0.9401, + "learning_rate": 1.3098897335650938e-05, + "loss": 0.8581, "step": 14741 }, { - "epoch": 0.41833144154370033, + "epoch": 0.4177505738332058, "grad_norm": 0.0, - "learning_rate": 1.3080378442227554e-05, - "loss": 1.0125, + "learning_rate": 1.3098024713803736e-05, + "loss": 0.8662, "step": 14742 }, { - "epoch": 0.41835981838819525, + "epoch": 0.4177789112754683, "grad_norm": 0.0, - "learning_rate": 1.3079504042144487e-05, - "loss": 0.8951, + "learning_rate": 1.309715206586072e-05, + "loss": 1.0246, "step": 14743 }, { - "epoch": 0.4183881952326901, + "epoch": 0.41780724871773073, "grad_norm": 0.0, - "learning_rate": 1.307862961604871e-05, - "loss": 0.8957, + "learning_rate": 1.3096279391829245e-05, + "loss": 0.9074, "step": 14744 }, { - "epoch": 0.418416572077185, + "epoch": 0.4178355861599932, "grad_norm": 0.0, - "learning_rate": 1.3077755163947612e-05, - "loss": 0.918, + "learning_rate": 1.3095406691716657e-05, + "loss": 0.8242, "step": 14745 }, { - "epoch": 0.4184449489216799, + "epoch": 0.41786392360225566, "grad_norm": 0.0, - "learning_rate": 1.3076880685848576e-05, - "loss": 0.9178, + "learning_rate": 1.3094533965530309e-05, + "loss": 0.9795, "step": 14746 }, { - "epoch": 0.4184733257661748, + "epoch": 0.4178922610445181, "grad_norm": 0.0, - "learning_rate": 1.3076006181758989e-05, - "loss": 0.9826, + "learning_rate": 1.3093661213277553e-05, + "loss": 1.0282, "step": 14747 }, { - "epoch": 0.4185017026106697, + "epoch": 0.4179205984867806, "grad_norm": 0.0, - "learning_rate": 1.307513165168624e-05, - "loss": 0.9489, + "learning_rate": 1.309278843496574e-05, + "loss": 1.0496, "step": 14748 }, { - "epoch": 0.41853007945516457, + "epoch": 0.41794893592904303, "grad_norm": 0.0, - "learning_rate": 1.307425709563772e-05, - "loss": 0.9165, + "learning_rate": 1.3091915630602223e-05, + "loss": 0.9019, "step": 14749 }, { - "epoch": 0.4185584562996595, + "epoch": 0.4179772733713055, "grad_norm": 0.0, - "learning_rate": 1.307338251362081e-05, - "loss": 0.8333, + "learning_rate": 1.3091042800194352e-05, + "loss": 0.8275, "step": 14750 }, { - "epoch": 0.41858683314415435, + "epoch": 0.41800561081356796, "grad_norm": 0.0, - "learning_rate": 1.3072507905642897e-05, - "loss": 0.9007, + "learning_rate": 1.3090169943749475e-05, + "loss": 0.9051, "step": 14751 }, { - "epoch": 0.41861520998864926, + "epoch": 0.41803394825583046, "grad_norm": 0.0, - "learning_rate": 1.3071633271711375e-05, - "loss": 0.9119, + "learning_rate": 1.3089297061274953e-05, + "loss": 1.0233, "step": 14752 }, { - "epoch": 0.4186435868331442, + "epoch": 0.4180622856980929, "grad_norm": 0.0, - "learning_rate": 1.3070758611833628e-05, - "loss": 0.9046, + "learning_rate": 1.3088424152778134e-05, + "loss": 0.8324, "step": 14753 }, { - "epoch": 0.41867196367763904, + "epoch": 0.41809062314035533, "grad_norm": 0.0, - "learning_rate": 1.3069883926017042e-05, - "loss": 0.9789, + "learning_rate": 1.3087551218266373e-05, + "loss": 0.8015, "step": 14754 }, { - "epoch": 0.41870034052213395, + "epoch": 0.4181189605826178, "grad_norm": 0.0, - "learning_rate": 1.3069009214269011e-05, - "loss": 0.9496, + "learning_rate": 1.308667825774702e-05, + "loss": 0.965, "step": 14755 }, { - "epoch": 0.4187287173666288, + "epoch": 0.41814729802488027, "grad_norm": 0.0, - "learning_rate": 1.306813447659692e-05, - "loss": 0.8159, + "learning_rate": 1.308580527122743e-05, + "loss": 0.8531, "step": 14756 }, { - "epoch": 0.4187570942111237, + "epoch": 0.41817563546714276, "grad_norm": 0.0, - "learning_rate": 1.306725971300816e-05, - "loss": 0.8786, + "learning_rate": 1.3084932258714955e-05, + "loss": 0.9893, "step": 14757 }, { - "epoch": 0.41878547105561864, + "epoch": 0.4182039729094052, "grad_norm": 0.0, - "learning_rate": 1.3066384923510121e-05, - "loss": 0.8892, + "learning_rate": 1.3084059220216952e-05, + "loss": 0.8882, "step": 14758 }, { - "epoch": 0.4188138479001135, + "epoch": 0.41823231035166764, "grad_norm": 0.0, - "learning_rate": 1.306551010811019e-05, - "loss": 0.9765, + "learning_rate": 1.308318615574077e-05, + "loss": 0.8542, "step": 14759 }, { - "epoch": 0.4188422247446084, + "epoch": 0.41826064779393013, "grad_norm": 0.0, - "learning_rate": 1.3064635266815757e-05, - "loss": 0.8513, + "learning_rate": 1.3082313065293767e-05, + "loss": 0.9144, "step": 14760 }, { - "epoch": 0.4188706015891033, + "epoch": 0.41828898523619257, "grad_norm": 0.0, - "learning_rate": 1.306376039963421e-05, - "loss": 0.7565, + "learning_rate": 1.3081439948883299e-05, + "loss": 0.9117, "step": 14761 }, { - "epoch": 0.4188989784335982, + "epoch": 0.41831732267845506, "grad_norm": 0.0, - "learning_rate": 1.3062885506572944e-05, - "loss": 1.0297, + "learning_rate": 1.3080566806516719e-05, + "loss": 0.9206, "step": 14762 }, { - "epoch": 0.41892735527809305, + "epoch": 0.4183456601207175, "grad_norm": 0.0, - "learning_rate": 1.3062010587639345e-05, - "loss": 0.8804, + "learning_rate": 1.3079693638201379e-05, + "loss": 1.0374, "step": 14763 }, { - "epoch": 0.41895573212258796, + "epoch": 0.41837399756297994, "grad_norm": 0.0, - "learning_rate": 1.3061135642840804e-05, - "loss": 0.8663, + "learning_rate": 1.3078820443944635e-05, + "loss": 0.8311, "step": 14764 }, { - "epoch": 0.4189841089670829, + "epoch": 0.41840233500524243, "grad_norm": 0.0, - "learning_rate": 1.3060260672184713e-05, - "loss": 0.9591, + "learning_rate": 1.3077947223753842e-05, + "loss": 0.8251, "step": 14765 }, { - "epoch": 0.41901248581157774, + "epoch": 0.41843067244750487, "grad_norm": 0.0, - "learning_rate": 1.3059385675678465e-05, - "loss": 0.9256, + "learning_rate": 1.307707397763636e-05, + "loss": 0.8556, "step": 14766 }, { - "epoch": 0.41904086265607265, + "epoch": 0.41845900988976736, "grad_norm": 0.0, - "learning_rate": 1.3058510653329446e-05, - "loss": 0.9279, + "learning_rate": 1.307620070559954e-05, + "loss": 0.9569, "step": 14767 }, { - "epoch": 0.4190692395005675, + "epoch": 0.4184873473320298, "grad_norm": 0.0, - "learning_rate": 1.305763560514505e-05, - "loss": 1.0288, + "learning_rate": 1.3075327407650736e-05, + "loss": 0.9505, "step": 14768 }, { - "epoch": 0.41909761634506243, + "epoch": 0.4185156847742923, "grad_norm": 0.0, - "learning_rate": 1.3056760531132671e-05, - "loss": 0.9179, + "learning_rate": 1.307445408379731e-05, + "loss": 0.8743, "step": 14769 }, { - "epoch": 0.41912599318955734, + "epoch": 0.41854402221655473, "grad_norm": 0.0, - "learning_rate": 1.3055885431299695e-05, - "loss": 0.883, + "learning_rate": 1.3073580734046618e-05, + "loss": 0.9876, "step": 14770 }, { - "epoch": 0.4191543700340522, + "epoch": 0.4185723596588172, "grad_norm": 0.0, - "learning_rate": 1.3055010305653516e-05, - "loss": 0.9242, + "learning_rate": 1.307270735840601e-05, + "loss": 0.8493, "step": 14771 }, { - "epoch": 0.4191827468785471, + "epoch": 0.41860069710107967, "grad_norm": 0.0, - "learning_rate": 1.3054135154201535e-05, - "loss": 0.9029, + "learning_rate": 1.3071833956882847e-05, + "loss": 0.956, "step": 14772 }, { - "epoch": 0.419211123723042, + "epoch": 0.4186290345433421, "grad_norm": 0.0, - "learning_rate": 1.3053259976951134e-05, - "loss": 0.8816, + "learning_rate": 1.307096052948449e-05, + "loss": 0.9423, "step": 14773 }, { - "epoch": 0.4192395005675369, + "epoch": 0.4186573719856046, "grad_norm": 0.0, - "learning_rate": 1.3052384773909706e-05, - "loss": 0.9315, + "learning_rate": 1.3070087076218288e-05, + "loss": 0.9222, "step": 14774 }, { - "epoch": 0.4192678774120318, + "epoch": 0.41868570942786704, "grad_norm": 0.0, - "learning_rate": 1.3051509545084648e-05, - "loss": 0.8264, + "learning_rate": 1.3069213597091603e-05, + "loss": 0.9198, "step": 14775 }, { - "epoch": 0.41929625425652667, + "epoch": 0.4187140468701295, "grad_norm": 0.0, - "learning_rate": 1.3050634290483352e-05, - "loss": 0.9366, + "learning_rate": 1.3068340092111793e-05, + "loss": 1.0395, "step": 14776 }, { - "epoch": 0.4193246311010216, + "epoch": 0.41874238431239197, "grad_norm": 0.0, - "learning_rate": 1.3049759010113212e-05, - "loss": 0.889, + "learning_rate": 1.3067466561286217e-05, + "loss": 0.9049, "step": 14777 }, { - "epoch": 0.41935300794551644, + "epoch": 0.4187707217546544, "grad_norm": 0.0, - "learning_rate": 1.3048883703981617e-05, - "loss": 0.843, + "learning_rate": 1.3066593004622225e-05, + "loss": 0.8577, "step": 14778 }, { - "epoch": 0.41938138479001136, + "epoch": 0.4187990591969169, "grad_norm": 0.0, - "learning_rate": 1.3048008372095968e-05, - "loss": 0.9382, + "learning_rate": 1.3065719422127188e-05, + "loss": 0.918, "step": 14779 }, { - "epoch": 0.4194097616345062, + "epoch": 0.41882739663917934, "grad_norm": 0.0, - "learning_rate": 1.3047133014463656e-05, - "loss": 0.9498, + "learning_rate": 1.3064845813808455e-05, + "loss": 0.8457, "step": 14780 }, { - "epoch": 0.41943813847900113, + "epoch": 0.41885573408144183, "grad_norm": 0.0, - "learning_rate": 1.3046257631092072e-05, - "loss": 0.932, + "learning_rate": 1.3063972179673388e-05, + "loss": 0.945, "step": 14781 }, { - "epoch": 0.41946651532349605, + "epoch": 0.41888407152370427, "grad_norm": 0.0, - "learning_rate": 1.3045382221988612e-05, - "loss": 0.9539, + "learning_rate": 1.3063098519729347e-05, + "loss": 0.9124, "step": 14782 }, { - "epoch": 0.4194948921679909, + "epoch": 0.4189124089659667, "grad_norm": 0.0, - "learning_rate": 1.3044506787160676e-05, - "loss": 0.9434, + "learning_rate": 1.3062224833983687e-05, + "loss": 0.8836, "step": 14783 }, { - "epoch": 0.4195232690124858, + "epoch": 0.4189407464082292, "grad_norm": 0.0, - "learning_rate": 1.3043631326615648e-05, - "loss": 0.876, + "learning_rate": 1.3061351122443774e-05, + "loss": 1.0758, "step": 14784 }, { - "epoch": 0.4195516458569807, + "epoch": 0.41896908385049164, "grad_norm": 0.0, - "learning_rate": 1.3042755840360935e-05, - "loss": 1.0499, + "learning_rate": 1.3060477385116958e-05, + "loss": 0.9099, "step": 14785 }, { - "epoch": 0.4195800227014756, + "epoch": 0.41899742129275414, "grad_norm": 0.0, - "learning_rate": 1.3041880328403921e-05, - "loss": 0.9976, + "learning_rate": 1.3059603622010607e-05, + "loss": 0.8922, "step": 14786 }, { - "epoch": 0.4196083995459705, + "epoch": 0.4190257587350166, "grad_norm": 0.0, - "learning_rate": 1.3041004790752013e-05, - "loss": 0.8766, + "learning_rate": 1.305872983313208e-05, + "loss": 0.9253, "step": 14787 }, { - "epoch": 0.41963677639046537, + "epoch": 0.419054096177279, "grad_norm": 0.0, - "learning_rate": 1.3040129227412597e-05, - "loss": 1.0195, + "learning_rate": 1.3057856018488734e-05, + "loss": 0.8229, "step": 14788 }, { - "epoch": 0.4196651532349603, + "epoch": 0.4190824336195415, "grad_norm": 0.0, - "learning_rate": 1.3039253638393075e-05, - "loss": 0.9391, + "learning_rate": 1.3056982178087933e-05, + "loss": 0.9007, "step": 14789 }, { - "epoch": 0.41969353007945515, + "epoch": 0.41911077106180394, "grad_norm": 0.0, - "learning_rate": 1.303837802370084e-05, - "loss": 0.8951, + "learning_rate": 1.3056108311937034e-05, + "loss": 0.8729, "step": 14790 }, { - "epoch": 0.41972190692395006, + "epoch": 0.41913910850406644, "grad_norm": 0.0, - "learning_rate": 1.3037502383343287e-05, - "loss": 1.0076, + "learning_rate": 1.3055234420043401e-05, + "loss": 0.8894, "step": 14791 }, { - "epoch": 0.419750283768445, + "epoch": 0.4191674459463289, "grad_norm": 0.0, - "learning_rate": 1.3036626717327818e-05, - "loss": 0.9271, + "learning_rate": 1.3054360502414393e-05, + "loss": 0.9233, "step": 14792 }, { - "epoch": 0.41977866061293984, + "epoch": 0.41919578338859137, "grad_norm": 0.0, - "learning_rate": 1.3035751025661824e-05, - "loss": 0.9439, + "learning_rate": 1.3053486559057373e-05, + "loss": 0.9066, "step": 14793 }, { - "epoch": 0.41980703745743475, + "epoch": 0.4192241208308538, "grad_norm": 0.0, - "learning_rate": 1.3034875308352706e-05, - "loss": 0.9236, + "learning_rate": 1.3052612589979704e-05, + "loss": 0.9067, "step": 14794 }, { - "epoch": 0.4198354143019296, + "epoch": 0.41925245827311625, "grad_norm": 0.0, - "learning_rate": 1.303399956540786e-05, - "loss": 0.7115, + "learning_rate": 1.3051738595188744e-05, + "loss": 0.9475, "step": 14795 }, { - "epoch": 0.4198637911464245, + "epoch": 0.41928079571537874, "grad_norm": 0.0, - "learning_rate": 1.3033123796834683e-05, - "loss": 0.9112, + "learning_rate": 1.3050864574691857e-05, + "loss": 0.8803, "step": 14796 }, { - "epoch": 0.4198921679909194, + "epoch": 0.4193091331576412, "grad_norm": 0.0, - "learning_rate": 1.3032248002640574e-05, - "loss": 0.8923, + "learning_rate": 1.3049990528496404e-05, + "loss": 0.9828, "step": 14797 }, { - "epoch": 0.4199205448354143, + "epoch": 0.41933747059990367, "grad_norm": 0.0, - "learning_rate": 1.303137218283293e-05, - "loss": 0.9008, + "learning_rate": 1.3049116456609745e-05, + "loss": 0.938, "step": 14798 }, { - "epoch": 0.4199489216799092, + "epoch": 0.4193658080421661, "grad_norm": 0.0, - "learning_rate": 1.3030496337419148e-05, - "loss": 0.8383, + "learning_rate": 1.304824235903925e-05, + "loss": 0.9704, "step": 14799 }, { - "epoch": 0.4199772985244041, + "epoch": 0.41939414548442855, "grad_norm": 0.0, - "learning_rate": 1.3029620466406627e-05, - "loss": 0.9781, + "learning_rate": 1.3047368235792277e-05, + "loss": 0.9405, "step": 14800 }, { - "epoch": 0.420005675368899, + "epoch": 0.41942248292669104, "grad_norm": 0.0, - "learning_rate": 1.3028744569802765e-05, - "loss": 0.9091, + "learning_rate": 1.304649408687619e-05, + "loss": 0.9483, "step": 14801 }, { - "epoch": 0.42003405221339385, + "epoch": 0.4194508203689535, "grad_norm": 0.0, - "learning_rate": 1.3027868647614961e-05, - "loss": 0.9442, + "learning_rate": 1.304561991229835e-05, + "loss": 0.9824, "step": 14802 }, { - "epoch": 0.42006242905788876, + "epoch": 0.419479157811216, "grad_norm": 0.0, - "learning_rate": 1.3026992699850616e-05, - "loss": 0.8774, + "learning_rate": 1.3044745712066125e-05, + "loss": 1.0047, "step": 14803 }, { - "epoch": 0.4200908059023837, + "epoch": 0.4195074952534784, "grad_norm": 0.0, - "learning_rate": 1.302611672651713e-05, - "loss": 0.8354, + "learning_rate": 1.3043871486186874e-05, + "loss": 0.9479, "step": 14804 }, { - "epoch": 0.42011918274687854, + "epoch": 0.4195358326957409, "grad_norm": 0.0, - "learning_rate": 1.3025240727621894e-05, - "loss": 0.9088, + "learning_rate": 1.3042997234667963e-05, + "loss": 0.9497, "step": 14805 }, { - "epoch": 0.42014755959137345, + "epoch": 0.41956417013800335, "grad_norm": 0.0, - "learning_rate": 1.302436470317232e-05, - "loss": 0.9131, + "learning_rate": 1.3042122957516759e-05, + "loss": 0.9018, "step": 14806 }, { - "epoch": 0.4201759364358683, + "epoch": 0.4195925075802658, "grad_norm": 0.0, - "learning_rate": 1.3023488653175797e-05, - "loss": 1.0014, + "learning_rate": 1.3041248654740621e-05, + "loss": 0.9792, "step": 14807 }, { - "epoch": 0.42020431328036323, + "epoch": 0.4196208450225283, "grad_norm": 0.0, - "learning_rate": 1.302261257763973e-05, - "loss": 0.9655, + "learning_rate": 1.304037432634692e-05, + "loss": 0.8547, "step": 14808 }, { - "epoch": 0.4202326901248581, + "epoch": 0.4196491824647907, "grad_norm": 0.0, - "learning_rate": 1.3021736476571518e-05, - "loss": 0.9158, + "learning_rate": 1.3039499972343013e-05, + "loss": 0.7577, "step": 14809 }, { - "epoch": 0.420261066969353, + "epoch": 0.4196775199070532, "grad_norm": 0.0, - "learning_rate": 1.3020860349978564e-05, - "loss": 0.8512, + "learning_rate": 1.3038625592736271e-05, + "loss": 0.8856, "step": 14810 }, { - "epoch": 0.4202894438138479, + "epoch": 0.41970585734931565, "grad_norm": 0.0, - "learning_rate": 1.3019984197868266e-05, - "loss": 0.8858, + "learning_rate": 1.3037751187534055e-05, + "loss": 0.8813, "step": 14811 }, { - "epoch": 0.4203178206583428, + "epoch": 0.4197341947915781, "grad_norm": 0.0, - "learning_rate": 1.3019108020248025e-05, - "loss": 0.8804, + "learning_rate": 1.3036876756743734e-05, + "loss": 0.8101, "step": 14812 }, { - "epoch": 0.4203461975028377, + "epoch": 0.4197625322338406, "grad_norm": 0.0, - "learning_rate": 1.3018231817125247e-05, - "loss": 0.8834, + "learning_rate": 1.3036002300372675e-05, + "loss": 0.8688, "step": 14813 }, { - "epoch": 0.42037457434733255, + "epoch": 0.419790869676103, "grad_norm": 0.0, - "learning_rate": 1.3017355588507325e-05, - "loss": 0.9124, + "learning_rate": 1.3035127818428239e-05, + "loss": 0.9768, "step": 14814 }, { - "epoch": 0.42040295119182747, + "epoch": 0.4198192071183655, "grad_norm": 0.0, - "learning_rate": 1.3016479334401666e-05, - "loss": 0.8043, + "learning_rate": 1.3034253310917795e-05, + "loss": 0.8828, "step": 14815 }, { - "epoch": 0.4204313280363224, + "epoch": 0.41984754456062795, "grad_norm": 0.0, - "learning_rate": 1.3015603054815669e-05, - "loss": 0.9685, + "learning_rate": 1.3033378777848708e-05, + "loss": 0.9519, "step": 14816 }, { - "epoch": 0.42045970488081724, + "epoch": 0.41987588200289044, "grad_norm": 0.0, - "learning_rate": 1.3014726749756738e-05, - "loss": 0.811, + "learning_rate": 1.3032504219228344e-05, + "loss": 0.9152, "step": 14817 }, { - "epoch": 0.42048808172531216, + "epoch": 0.4199042194451529, "grad_norm": 0.0, - "learning_rate": 1.3013850419232275e-05, - "loss": 0.8155, + "learning_rate": 1.3031629635064072e-05, + "loss": 0.8517, "step": 14818 }, { - "epoch": 0.420516458569807, + "epoch": 0.4199325568874153, "grad_norm": 0.0, - "learning_rate": 1.3012974063249684e-05, - "loss": 0.8643, + "learning_rate": 1.3030755025363257e-05, + "loss": 0.8437, "step": 14819 }, { - "epoch": 0.42054483541430193, + "epoch": 0.4199608943296778, "grad_norm": 0.0, - "learning_rate": 1.3012097681816364e-05, - "loss": 0.9922, + "learning_rate": 1.302988039013327e-05, + "loss": 1.0388, "step": 14820 }, { - "epoch": 0.42057321225879685, + "epoch": 0.41998923177194025, "grad_norm": 0.0, - "learning_rate": 1.3011221274939718e-05, - "loss": 1.0224, + "learning_rate": 1.3029005729381474e-05, + "loss": 0.9473, "step": 14821 }, { - "epoch": 0.4206015891032917, + "epoch": 0.42001756921420275, "grad_norm": 0.0, - "learning_rate": 1.3010344842627155e-05, - "loss": 0.9345, + "learning_rate": 1.3028131043115235e-05, + "loss": 1.0499, "step": 14822 }, { - "epoch": 0.4206299659477866, + "epoch": 0.4200459066564652, "grad_norm": 0.0, - "learning_rate": 1.3009468384886068e-05, - "loss": 0.9432, + "learning_rate": 1.3027256331341926e-05, + "loss": 0.8809, "step": 14823 }, { - "epoch": 0.4206583427922815, + "epoch": 0.4200742440987276, "grad_norm": 0.0, - "learning_rate": 1.3008591901723869e-05, - "loss": 0.8461, + "learning_rate": 1.3026381594068913e-05, + "loss": 0.9528, "step": 14824 }, { - "epoch": 0.4206867196367764, + "epoch": 0.4201025815409901, "grad_norm": 0.0, - "learning_rate": 1.3007715393147957e-05, - "loss": 0.9383, + "learning_rate": 1.3025506831303565e-05, + "loss": 0.9006, "step": 14825 }, { - "epoch": 0.42071509648127126, + "epoch": 0.42013091898325255, "grad_norm": 0.0, - "learning_rate": 1.3006838859165742e-05, - "loss": 0.8117, + "learning_rate": 1.3024632043053246e-05, + "loss": 0.8685, "step": 14826 }, { - "epoch": 0.42074347332576617, + "epoch": 0.42015925642551505, "grad_norm": 0.0, - "learning_rate": 1.3005962299784623e-05, - "loss": 0.8281, + "learning_rate": 1.302375722932533e-05, + "loss": 0.9622, "step": 14827 }, { - "epoch": 0.4207718501702611, + "epoch": 0.4201875938677775, "grad_norm": 0.0, - "learning_rate": 1.3005085715012003e-05, - "loss": 0.9401, + "learning_rate": 1.3022882390127185e-05, + "loss": 0.945, "step": 14828 }, { - "epoch": 0.42080022701475595, + "epoch": 0.42021593131004, "grad_norm": 0.0, - "learning_rate": 1.300420910485529e-05, - "loss": 0.9327, + "learning_rate": 1.302200752546618e-05, + "loss": 0.8399, "step": 14829 }, { - "epoch": 0.42082860385925086, + "epoch": 0.4202442687523024, "grad_norm": 0.0, - "learning_rate": 1.3003332469321888e-05, - "loss": 0.9197, + "learning_rate": 1.302113263534968e-05, + "loss": 0.952, "step": 14830 }, { - "epoch": 0.4208569807037457, + "epoch": 0.42027260619456486, "grad_norm": 0.0, - "learning_rate": 1.30024558084192e-05, - "loss": 0.9161, + "learning_rate": 1.3020257719785058e-05, + "loss": 0.9392, "step": 14831 }, { - "epoch": 0.42088535754824064, + "epoch": 0.42030094363682735, "grad_norm": 0.0, - "learning_rate": 1.3001579122154633e-05, - "loss": 0.9068, + "learning_rate": 1.3019382778779688e-05, + "loss": 0.9099, "step": 14832 }, { - "epoch": 0.42091373439273555, + "epoch": 0.4203292810790898, "grad_norm": 0.0, - "learning_rate": 1.3000702410535595e-05, - "loss": 0.7946, + "learning_rate": 1.3018507812340932e-05, + "loss": 0.9133, "step": 14833 }, { - "epoch": 0.4209421112372304, + "epoch": 0.4203576185213523, "grad_norm": 0.0, - "learning_rate": 1.2999825673569489e-05, - "loss": 0.9238, + "learning_rate": 1.3017632820476165e-05, + "loss": 0.9109, "step": 14834 }, { - "epoch": 0.4209704880817253, + "epoch": 0.4203859559636147, "grad_norm": 0.0, - "learning_rate": 1.2998948911263716e-05, - "loss": 0.8648, + "learning_rate": 1.3016757803192756e-05, + "loss": 0.9223, "step": 14835 }, { - "epoch": 0.4209988649262202, + "epoch": 0.42041429340587716, "grad_norm": 0.0, - "learning_rate": 1.299807212362569e-05, - "loss": 1.0103, + "learning_rate": 1.3015882760498077e-05, + "loss": 0.9745, "step": 14836 }, { - "epoch": 0.4210272417707151, + "epoch": 0.42044263084813965, "grad_norm": 0.0, - "learning_rate": 1.2997195310662816e-05, - "loss": 0.9696, + "learning_rate": 1.3015007692399496e-05, + "loss": 0.9292, "step": 14837 }, { - "epoch": 0.42105561861521, + "epoch": 0.4204709682904021, "grad_norm": 0.0, - "learning_rate": 1.2996318472382494e-05, - "loss": 0.8331, + "learning_rate": 1.3014132598904383e-05, + "loss": 1.0183, "step": 14838 }, { - "epoch": 0.4210839954597049, + "epoch": 0.4204993057326646, "grad_norm": 0.0, - "learning_rate": 1.2995441608792137e-05, - "loss": 0.93, + "learning_rate": 1.3013257480020116e-05, + "loss": 0.9584, "step": 14839 }, { - "epoch": 0.4211123723041998, + "epoch": 0.420527643174927, "grad_norm": 0.0, - "learning_rate": 1.299456471989915e-05, - "loss": 0.7998, + "learning_rate": 1.3012382335754064e-05, + "loss": 0.9675, "step": 14840 }, { - "epoch": 0.42114074914869465, + "epoch": 0.4205559806171895, "grad_norm": 0.0, - "learning_rate": 1.2993687805710941e-05, - "loss": 0.9044, + "learning_rate": 1.3011507166113595e-05, + "loss": 0.8584, "step": 14841 }, { - "epoch": 0.42116912599318956, + "epoch": 0.42058431805945196, "grad_norm": 0.0, - "learning_rate": 1.2992810866234916e-05, - "loss": 0.9294, + "learning_rate": 1.301063197110608e-05, + "loss": 0.9116, "step": 14842 }, { - "epoch": 0.4211975028376844, + "epoch": 0.4206126555017144, "grad_norm": 0.0, - "learning_rate": 1.2991933901478484e-05, - "loss": 0.9015, + "learning_rate": 1.3009756750738896e-05, + "loss": 0.9051, "step": 14843 }, { - "epoch": 0.42122587968217934, + "epoch": 0.4206409929439769, "grad_norm": 0.0, - "learning_rate": 1.2991056911449052e-05, - "loss": 0.8974, + "learning_rate": 1.3008881505019413e-05, + "loss": 0.943, "step": 14844 }, { - "epoch": 0.42125425652667425, + "epoch": 0.4206693303862393, "grad_norm": 0.0, - "learning_rate": 1.2990179896154025e-05, - "loss": 0.8742, + "learning_rate": 1.3008006233955004e-05, + "loss": 1.0089, "step": 14845 }, { - "epoch": 0.4212826333711691, + "epoch": 0.4206976678285018, "grad_norm": 0.0, - "learning_rate": 1.2989302855600816e-05, - "loss": 0.9155, + "learning_rate": 1.300713093755304e-05, + "loss": 0.9687, "step": 14846 }, { - "epoch": 0.42131101021566403, + "epoch": 0.42072600527076426, "grad_norm": 0.0, - "learning_rate": 1.298842578979683e-05, - "loss": 0.8563, + "learning_rate": 1.3006255615820898e-05, + "loss": 0.8255, "step": 14847 }, { - "epoch": 0.4213393870601589, + "epoch": 0.4207543427130267, "grad_norm": 0.0, - "learning_rate": 1.298754869874948e-05, - "loss": 0.9525, + "learning_rate": 1.3005380268765947e-05, + "loss": 0.987, "step": 14848 }, { - "epoch": 0.4213677639046538, + "epoch": 0.4207826801552892, "grad_norm": 0.0, - "learning_rate": 1.2986671582466168e-05, - "loss": 0.8851, + "learning_rate": 1.3004504896395564e-05, + "loss": 0.9317, "step": 14849 }, { - "epoch": 0.4213961407491487, + "epoch": 0.42081101759755163, "grad_norm": 0.0, - "learning_rate": 1.298579444095431e-05, - "loss": 0.8573, + "learning_rate": 1.3003629498717119e-05, + "loss": 0.8883, "step": 14850 }, { - "epoch": 0.4214245175936436, + "epoch": 0.4208393550398141, "grad_norm": 0.0, - "learning_rate": 1.298491727422131e-05, - "loss": 0.8211, + "learning_rate": 1.3002754075737984e-05, + "loss": 0.864, "step": 14851 }, { - "epoch": 0.4214528944381385, + "epoch": 0.42086769248207656, "grad_norm": 0.0, - "learning_rate": 1.2984040082274581e-05, - "loss": 0.9351, + "learning_rate": 1.300187862746554e-05, + "loss": 0.8874, "step": 14852 }, { - "epoch": 0.42148127128263335, + "epoch": 0.42089602992433905, "grad_norm": 0.0, - "learning_rate": 1.2983162865121533e-05, - "loss": 0.9507, + "learning_rate": 1.3001003153907158e-05, + "loss": 0.9428, "step": 14853 }, { - "epoch": 0.42150964812712827, + "epoch": 0.4209243673666015, "grad_norm": 0.0, - "learning_rate": 1.2982285622769573e-05, - "loss": 0.8811, + "learning_rate": 1.3000127655070214e-05, + "loss": 0.8921, "step": 14854 }, { - "epoch": 0.4215380249716232, + "epoch": 0.42095270480886393, "grad_norm": 0.0, - "learning_rate": 1.2981408355226109e-05, - "loss": 0.9215, + "learning_rate": 1.2999252130962076e-05, + "loss": 0.9237, "step": 14855 }, { - "epoch": 0.42156640181611804, + "epoch": 0.4209810422511264, "grad_norm": 0.0, - "learning_rate": 1.2980531062498557e-05, - "loss": 0.837, + "learning_rate": 1.2998376581590125e-05, + "loss": 0.8722, "step": 14856 }, { - "epoch": 0.42159477866061296, + "epoch": 0.42100937969338886, "grad_norm": 0.0, - "learning_rate": 1.2979653744594324e-05, - "loss": 0.9046, + "learning_rate": 1.2997501006961737e-05, + "loss": 0.9412, "step": 14857 }, { - "epoch": 0.4216231555051078, + "epoch": 0.42103771713565136, "grad_norm": 0.0, - "learning_rate": 1.2978776401520825e-05, - "loss": 0.9322, + "learning_rate": 1.2996625407084282e-05, + "loss": 1.0172, "step": 14858 }, { - "epoch": 0.42165153234960273, + "epoch": 0.4210660545779138, "grad_norm": 0.0, - "learning_rate": 1.2977899033285465e-05, - "loss": 0.9408, + "learning_rate": 1.2995749781965139e-05, + "loss": 0.8829, "step": 14859 }, { - "epoch": 0.4216799091940976, + "epoch": 0.42109439202017623, "grad_norm": 0.0, - "learning_rate": 1.297702163989566e-05, - "loss": 0.8022, + "learning_rate": 1.2994874131611685e-05, + "loss": 0.929, "step": 14860 }, { - "epoch": 0.4217082860385925, + "epoch": 0.4211227294624387, "grad_norm": 0.0, - "learning_rate": 1.2976144221358818e-05, - "loss": 0.844, + "learning_rate": 1.2993998456031294e-05, + "loss": 0.8929, "step": 14861 }, { - "epoch": 0.4217366628830874, + "epoch": 0.42115106690470117, "grad_norm": 0.0, - "learning_rate": 1.297526677768235e-05, - "loss": 0.9859, + "learning_rate": 1.299312275523134e-05, + "loss": 0.8353, "step": 14862 }, { - "epoch": 0.4217650397275823, + "epoch": 0.42117940434696366, "grad_norm": 0.0, - "learning_rate": 1.2974389308873672e-05, - "loss": 0.9038, + "learning_rate": 1.29922470292192e-05, + "loss": 1.0248, "step": 14863 }, { - "epoch": 0.4217934165720772, + "epoch": 0.4212077417892261, "grad_norm": 0.0, - "learning_rate": 1.2973511814940194e-05, - "loss": 0.8363, + "learning_rate": 1.2991371278002256e-05, + "loss": 0.9352, "step": 14864 }, { - "epoch": 0.42182179341657206, + "epoch": 0.4212360792314886, "grad_norm": 0.0, - "learning_rate": 1.2972634295889327e-05, - "loss": 0.912, + "learning_rate": 1.2990495501587882e-05, + "loss": 0.9531, "step": 14865 }, { - "epoch": 0.42185017026106697, + "epoch": 0.42126441667375103, "grad_norm": 0.0, - "learning_rate": 1.2971756751728486e-05, - "loss": 0.9167, + "learning_rate": 1.298961969998345e-05, + "loss": 0.9933, "step": 14866 }, { - "epoch": 0.4218785471055619, + "epoch": 0.42129275411601347, "grad_norm": 0.0, - "learning_rate": 1.2970879182465082e-05, - "loss": 0.9475, + "learning_rate": 1.2988743873196344e-05, + "loss": 0.9461, "step": 14867 }, { - "epoch": 0.42190692395005674, + "epoch": 0.42132109155827596, "grad_norm": 0.0, - "learning_rate": 1.297000158810653e-05, - "loss": 0.9921, + "learning_rate": 1.2987868021233936e-05, + "loss": 0.8544, "step": 14868 }, { - "epoch": 0.42193530079455166, + "epoch": 0.4213494290005384, "grad_norm": 0.0, - "learning_rate": 1.2969123968660235e-05, - "loss": 0.8803, + "learning_rate": 1.2986992144103607e-05, + "loss": 0.9267, "step": 14869 }, { - "epoch": 0.4219636776390465, + "epoch": 0.4213777664428009, "grad_norm": 0.0, - "learning_rate": 1.296824632413362e-05, - "loss": 0.9471, + "learning_rate": 1.2986116241812734e-05, + "loss": 0.8944, "step": 14870 }, { - "epoch": 0.42199205448354143, + "epoch": 0.42140610388506333, "grad_norm": 0.0, - "learning_rate": 1.2967368654534096e-05, - "loss": 0.9179, + "learning_rate": 1.2985240314368694e-05, + "loss": 0.867, "step": 14871 }, { - "epoch": 0.42202043132803635, + "epoch": 0.42143444132732577, "grad_norm": 0.0, - "learning_rate": 1.2966490959869072e-05, - "loss": 1.0009, + "learning_rate": 1.298436436177887e-05, + "loss": 0.8665, "step": 14872 }, { - "epoch": 0.4220488081725312, + "epoch": 0.42146277876958826, "grad_norm": 0.0, - "learning_rate": 1.2965613240145969e-05, - "loss": 0.8489, + "learning_rate": 1.2983488384050633e-05, + "loss": 0.9218, "step": 14873 }, { - "epoch": 0.4220771850170261, + "epoch": 0.4214911162118507, "grad_norm": 0.0, - "learning_rate": 1.2964735495372197e-05, - "loss": 0.8841, + "learning_rate": 1.2982612381191368e-05, + "loss": 1.0295, "step": 14874 }, { - "epoch": 0.422105561861521, + "epoch": 0.4215194536541132, "grad_norm": 0.0, - "learning_rate": 1.2963857725555169e-05, - "loss": 0.9173, + "learning_rate": 1.298173635320845e-05, + "loss": 0.9092, "step": 14875 }, { - "epoch": 0.4221339387060159, + "epoch": 0.42154779109637563, "grad_norm": 0.0, - "learning_rate": 1.2962979930702305e-05, - "loss": 0.996, + "learning_rate": 1.2980860300109257e-05, + "loss": 0.8244, "step": 14876 }, { - "epoch": 0.42216231555051076, + "epoch": 0.42157612853863813, "grad_norm": 0.0, - "learning_rate": 1.2962102110821013e-05, - "loss": 0.9236, + "learning_rate": 1.2979984221901174e-05, + "loss": 0.8946, "step": 14877 }, { - "epoch": 0.4221906923950057, + "epoch": 0.42160446598090057, "grad_norm": 0.0, - "learning_rate": 1.2961224265918712e-05, - "loss": 1.0611, + "learning_rate": 1.297910811859158e-05, + "loss": 1.0026, "step": 14878 }, { - "epoch": 0.4222190692395006, + "epoch": 0.421632803423163, "grad_norm": 0.0, - "learning_rate": 1.2960346396002817e-05, - "loss": 0.9443, + "learning_rate": 1.2978231990187848e-05, + "loss": 0.9569, "step": 14879 }, { - "epoch": 0.42224744608399545, + "epoch": 0.4216611408654255, "grad_norm": 0.0, - "learning_rate": 1.2959468501080744e-05, - "loss": 0.902, + "learning_rate": 1.297735583669736e-05, + "loss": 0.9195, "step": 14880 }, { - "epoch": 0.42227582292849036, + "epoch": 0.42168947830768794, "grad_norm": 0.0, - "learning_rate": 1.2958590581159907e-05, - "loss": 0.8217, + "learning_rate": 1.2976479658127503e-05, + "loss": 0.8879, "step": 14881 }, { - "epoch": 0.4223041997729852, + "epoch": 0.42171781574995043, "grad_norm": 0.0, - "learning_rate": 1.2957712636247722e-05, - "loss": 1.0046, + "learning_rate": 1.2975603454485648e-05, + "loss": 0.8721, "step": 14882 }, { - "epoch": 0.42233257661748014, + "epoch": 0.42174615319221287, "grad_norm": 0.0, - "learning_rate": 1.2956834666351603e-05, - "loss": 0.8854, + "learning_rate": 1.2974727225779185e-05, + "loss": 0.9812, "step": 14883 }, { - "epoch": 0.42236095346197505, + "epoch": 0.4217744906344753, "grad_norm": 0.0, - "learning_rate": 1.2955956671478973e-05, - "loss": 0.8788, + "learning_rate": 1.2973850972015485e-05, + "loss": 0.9503, "step": 14884 }, { - "epoch": 0.4223893303064699, + "epoch": 0.4218028280767378, "grad_norm": 0.0, - "learning_rate": 1.2955078651637241e-05, - "loss": 0.9162, + "learning_rate": 1.2972974693201938e-05, + "loss": 0.947, "step": 14885 }, { - "epoch": 0.4224177071509648, + "epoch": 0.42183116551900024, "grad_norm": 0.0, - "learning_rate": 1.2954200606833826e-05, - "loss": 0.8855, + "learning_rate": 1.2972098389345921e-05, + "loss": 0.9106, "step": 14886 }, { - "epoch": 0.4224460839954597, + "epoch": 0.42185950296126273, "grad_norm": 0.0, - "learning_rate": 1.295332253707615e-05, - "loss": 0.9508, + "learning_rate": 1.2971222060454816e-05, + "loss": 0.945, "step": 14887 }, { - "epoch": 0.4224744608399546, + "epoch": 0.42188784040352517, "grad_norm": 0.0, - "learning_rate": 1.2952444442371624e-05, - "loss": 0.9457, + "learning_rate": 1.2970345706536e-05, + "loss": 1.0499, "step": 14888 }, { - "epoch": 0.42250283768444946, + "epoch": 0.42191617784578767, "grad_norm": 0.0, - "learning_rate": 1.2951566322727664e-05, - "loss": 0.9635, + "learning_rate": 1.296946932759686e-05, + "loss": 1.0541, "step": 14889 }, { - "epoch": 0.4225312145289444, + "epoch": 0.4219445152880501, "grad_norm": 0.0, - "learning_rate": 1.2950688178151696e-05, - "loss": 0.949, + "learning_rate": 1.296859292364478e-05, + "loss": 0.7853, "step": 14890 }, { - "epoch": 0.4225595913734393, + "epoch": 0.42197285273031254, "grad_norm": 0.0, - "learning_rate": 1.2949810008651129e-05, - "loss": 0.8917, + "learning_rate": 1.296771649468714e-05, + "loss": 0.9568, "step": 14891 }, { - "epoch": 0.42258796821793415, + "epoch": 0.42200119017257504, "grad_norm": 0.0, - "learning_rate": 1.2948931814233382e-05, - "loss": 0.8585, + "learning_rate": 1.2966840040731316e-05, + "loss": 0.9146, "step": 14892 }, { - "epoch": 0.42261634506242907, + "epoch": 0.4220295276148375, "grad_norm": 0.0, - "learning_rate": 1.2948053594905878e-05, - "loss": 0.9658, + "learning_rate": 1.2965963561784705e-05, + "loss": 0.8981, "step": 14893 }, { - "epoch": 0.4226447219069239, + "epoch": 0.42205786505709997, "grad_norm": 0.0, - "learning_rate": 1.2947175350676033e-05, - "loss": 0.8617, + "learning_rate": 1.2965087057854678e-05, + "loss": 0.951, "step": 14894 }, { - "epoch": 0.42267309875141884, + "epoch": 0.4220862024993624, "grad_norm": 0.0, - "learning_rate": 1.2946297081551267e-05, - "loss": 0.8767, + "learning_rate": 1.2964210528948617e-05, + "loss": 0.9643, "step": 14895 }, { - "epoch": 0.42270147559591376, + "epoch": 0.42211453994162484, "grad_norm": 0.0, - "learning_rate": 1.2945418787538992e-05, - "loss": 0.9109, + "learning_rate": 1.2963333975073912e-05, + "loss": 0.7735, "step": 14896 }, { - "epoch": 0.4227298524404086, + "epoch": 0.42214287738388734, "grad_norm": 0.0, - "learning_rate": 1.2944540468646639e-05, - "loss": 0.8973, + "learning_rate": 1.296245739623795e-05, + "loss": 0.8358, "step": 14897 }, { - "epoch": 0.42275822928490353, + "epoch": 0.4221712148261498, "grad_norm": 0.0, - "learning_rate": 1.2943662124881615e-05, - "loss": 0.8906, + "learning_rate": 1.2961580792448104e-05, + "loss": 0.978, "step": 14898 }, { - "epoch": 0.4227866061293984, + "epoch": 0.42219955226841227, "grad_norm": 0.0, - "learning_rate": 1.2942783756251345e-05, - "loss": 0.8997, + "learning_rate": 1.2960704163711769e-05, + "loss": 0.8811, "step": 14899 }, { - "epoch": 0.4228149829738933, + "epoch": 0.4222278897106747, "grad_norm": 0.0, - "learning_rate": 1.2941905362763252e-05, - "loss": 0.9885, + "learning_rate": 1.2959827510036318e-05, + "loss": 0.9362, "step": 14900 }, { - "epoch": 0.4228433598183882, + "epoch": 0.4222562271529372, "grad_norm": 0.0, - "learning_rate": 1.2941026944424748e-05, - "loss": 0.9321, + "learning_rate": 1.2958950831429142e-05, + "loss": 0.97, "step": 14901 }, { - "epoch": 0.4228717366628831, + "epoch": 0.42228456459519964, "grad_norm": 0.0, - "learning_rate": 1.2940148501243259e-05, - "loss": 0.953, + "learning_rate": 1.2958074127897624e-05, + "loss": 0.9243, "step": 14902 }, { - "epoch": 0.422900113507378, + "epoch": 0.4223129020374621, "grad_norm": 0.0, - "learning_rate": 1.2939270033226204e-05, - "loss": 0.9471, + "learning_rate": 1.295719739944915e-05, + "loss": 0.9466, "step": 14903 }, { - "epoch": 0.42292849035187285, + "epoch": 0.4223412394797246, "grad_norm": 0.0, - "learning_rate": 1.2938391540381001e-05, - "loss": 0.9164, + "learning_rate": 1.2956320646091106e-05, + "loss": 0.8786, "step": 14904 }, { - "epoch": 0.42295686719636777, + "epoch": 0.422369576921987, "grad_norm": 0.0, - "learning_rate": 1.2937513022715076e-05, - "loss": 0.8838, + "learning_rate": 1.295544386783087e-05, + "loss": 0.9759, "step": 14905 }, { - "epoch": 0.42298524404086263, + "epoch": 0.4223979143642495, "grad_norm": 0.0, - "learning_rate": 1.2936634480235843e-05, - "loss": 0.9711, + "learning_rate": 1.2954567064675839e-05, + "loss": 1.018, "step": 14906 }, { - "epoch": 0.42301362088535754, + "epoch": 0.42242625180651194, "grad_norm": 0.0, - "learning_rate": 1.2935755912950728e-05, - "loss": 0.8664, + "learning_rate": 1.2953690236633389e-05, + "loss": 0.874, "step": 14907 }, { - "epoch": 0.42304199772985246, + "epoch": 0.4224545892487744, "grad_norm": 0.0, - "learning_rate": 1.2934877320867152e-05, - "loss": 0.9849, + "learning_rate": 1.2952813383710909e-05, + "loss": 0.9826, "step": 14908 }, { - "epoch": 0.4230703745743473, + "epoch": 0.4224829266910369, "grad_norm": 0.0, - "learning_rate": 1.2933998703992531e-05, - "loss": 0.8864, + "learning_rate": 1.2951936505915783e-05, + "loss": 0.9442, "step": 14909 }, { - "epoch": 0.42309875141884223, + "epoch": 0.4225112641332993, "grad_norm": 0.0, - "learning_rate": 1.2933120062334294e-05, - "loss": 0.7795, + "learning_rate": 1.2951059603255405e-05, + "loss": 0.9455, "step": 14910 }, { - "epoch": 0.4231271282633371, + "epoch": 0.4225396015755618, "grad_norm": 0.0, - "learning_rate": 1.2932241395899862e-05, - "loss": 0.7975, + "learning_rate": 1.2950182675737155e-05, + "loss": 0.946, "step": 14911 }, { - "epoch": 0.423155505107832, + "epoch": 0.42256793901782425, "grad_norm": 0.0, - "learning_rate": 1.2931362704696652e-05, - "loss": 0.9157, + "learning_rate": 1.2949305723368419e-05, + "loss": 0.8754, "step": 14912 }, { - "epoch": 0.4231838819523269, + "epoch": 0.42259627646008674, "grad_norm": 0.0, - "learning_rate": 1.293048398873209e-05, - "loss": 0.9119, + "learning_rate": 1.2948428746156588e-05, + "loss": 0.9474, "step": 14913 }, { - "epoch": 0.4232122587968218, + "epoch": 0.4226246139023492, "grad_norm": 0.0, - "learning_rate": 1.2929605248013601e-05, - "loss": 0.9165, + "learning_rate": 1.2947551744109044e-05, + "loss": 0.9245, "step": 14914 }, { - "epoch": 0.4232406356413167, + "epoch": 0.4226529513446116, "grad_norm": 0.0, - "learning_rate": 1.2928726482548602e-05, - "loss": 0.8972, + "learning_rate": 1.294667471723318e-05, + "loss": 0.942, "step": 14915 }, { - "epoch": 0.42326901248581156, + "epoch": 0.4226812887868741, "grad_norm": 0.0, - "learning_rate": 1.2927847692344514e-05, - "loss": 0.9889, + "learning_rate": 1.2945797665536378e-05, + "loss": 0.9074, "step": 14916 }, { - "epoch": 0.4232973893303065, + "epoch": 0.42270962622913655, "grad_norm": 0.0, - "learning_rate": 1.2926968877408773e-05, - "loss": 0.8712, + "learning_rate": 1.2944920589026029e-05, + "loss": 0.9416, "step": 14917 }, { - "epoch": 0.4233257661748014, + "epoch": 0.42273796367139904, "grad_norm": 0.0, - "learning_rate": 1.2926090037748793e-05, - "loss": 0.8884, + "learning_rate": 1.2944043487709519e-05, + "loss": 0.8819, "step": 14918 }, { - "epoch": 0.42335414301929625, + "epoch": 0.4227663011136615, "grad_norm": 0.0, - "learning_rate": 1.2925211173371994e-05, - "loss": 1.0076, + "learning_rate": 1.2943166361594242e-05, + "loss": 0.8791, "step": 14919 }, { - "epoch": 0.42338251986379116, + "epoch": 0.4227946385559239, "grad_norm": 0.0, - "learning_rate": 1.2924332284285809e-05, - "loss": 0.9945, + "learning_rate": 1.2942289210687577e-05, + "loss": 0.8729, "step": 14920 }, { - "epoch": 0.423410896708286, + "epoch": 0.4228229759981864, "grad_norm": 0.0, - "learning_rate": 1.2923453370497658e-05, - "loss": 0.9263, + "learning_rate": 1.294141203499692e-05, + "loss": 0.9701, "step": 14921 }, { - "epoch": 0.42343927355278094, + "epoch": 0.42285131344044885, "grad_norm": 0.0, - "learning_rate": 1.2922574432014965e-05, - "loss": 0.856, + "learning_rate": 1.2940534834529654e-05, + "loss": 0.9637, "step": 14922 }, { - "epoch": 0.4234676503972758, + "epoch": 0.42287965088271134, "grad_norm": 0.0, - "learning_rate": 1.2921695468845152e-05, - "loss": 0.9477, + "learning_rate": 1.2939657609293174e-05, + "loss": 0.8541, "step": 14923 }, { - "epoch": 0.4234960272417707, + "epoch": 0.4229079883249738, "grad_norm": 0.0, - "learning_rate": 1.2920816480995645e-05, - "loss": 0.8925, + "learning_rate": 1.2938780359294868e-05, + "loss": 0.8455, "step": 14924 }, { - "epoch": 0.4235244040862656, + "epoch": 0.4229363257672363, "grad_norm": 0.0, - "learning_rate": 1.2919937468473873e-05, - "loss": 0.8782, + "learning_rate": 1.293790308454212e-05, + "loss": 1.0225, "step": 14925 }, { - "epoch": 0.4235527809307605, + "epoch": 0.4229646632094987, "grad_norm": 0.0, - "learning_rate": 1.2919058431287257e-05, - "loss": 0.8806, + "learning_rate": 1.2937025785042328e-05, + "loss": 0.8172, "step": 14926 }, { - "epoch": 0.4235811577752554, + "epoch": 0.42299300065176115, "grad_norm": 0.0, - "learning_rate": 1.2918179369443224e-05, - "loss": 0.8265, + "learning_rate": 1.2936148460802875e-05, + "loss": 1.0214, "step": 14927 }, { - "epoch": 0.42360953461975026, + "epoch": 0.42302133809402365, "grad_norm": 0.0, - "learning_rate": 1.2917300282949199e-05, - "loss": 0.9127, + "learning_rate": 1.293527111183115e-05, + "loss": 0.9437, "step": 14928 }, { - "epoch": 0.4236379114642452, + "epoch": 0.4230496755362861, "grad_norm": 0.0, - "learning_rate": 1.2916421171812605e-05, - "loss": 0.8773, + "learning_rate": 1.2934393738134548e-05, + "loss": 0.9341, "step": 14929 }, { - "epoch": 0.4236662883087401, + "epoch": 0.4230780129785486, "grad_norm": 0.0, - "learning_rate": 1.2915542036040871e-05, - "loss": 0.9404, + "learning_rate": 1.2933516339720459e-05, + "loss": 0.7699, "step": 14930 }, { - "epoch": 0.42369466515323495, + "epoch": 0.423106350420811, "grad_norm": 0.0, - "learning_rate": 1.2914662875641425e-05, - "loss": 0.8435, + "learning_rate": 1.2932638916596275e-05, + "loss": 0.8827, "step": 14931 }, { - "epoch": 0.42372304199772987, + "epoch": 0.42313468786307346, "grad_norm": 0.0, - "learning_rate": 1.2913783690621688e-05, - "loss": 0.9475, + "learning_rate": 1.2931761468769382e-05, + "loss": 0.9607, "step": 14932 }, { - "epoch": 0.4237514188422247, + "epoch": 0.42316302530533595, "grad_norm": 0.0, - "learning_rate": 1.2912904480989088e-05, - "loss": 1.0021, + "learning_rate": 1.2930883996247174e-05, + "loss": 0.8069, "step": 14933 }, { - "epoch": 0.42377979568671964, + "epoch": 0.4231913627475984, "grad_norm": 0.0, - "learning_rate": 1.2912025246751054e-05, - "loss": 0.9116, + "learning_rate": 1.293000649903704e-05, + "loss": 1.0074, "step": 14934 }, { - "epoch": 0.42380817253121456, + "epoch": 0.4232197001898609, "grad_norm": 0.0, - "learning_rate": 1.2911145987915015e-05, - "loss": 0.9799, + "learning_rate": 1.2929128977146372e-05, + "loss": 0.8481, "step": 14935 }, { - "epoch": 0.4238365493757094, + "epoch": 0.4232480376321233, "grad_norm": 0.0, - "learning_rate": 1.291026670448839e-05, - "loss": 0.8702, + "learning_rate": 1.2928251430582565e-05, + "loss": 0.8414, "step": 14936 }, { - "epoch": 0.42386492622020433, + "epoch": 0.4232763750743858, "grad_norm": 0.0, - "learning_rate": 1.2909387396478614e-05, - "loss": 0.9053, + "learning_rate": 1.292737385935301e-05, + "loss": 0.918, "step": 14937 }, { - "epoch": 0.4238933030646992, + "epoch": 0.42330471251664825, "grad_norm": 0.0, - "learning_rate": 1.2908508063893112e-05, - "loss": 1.0275, + "learning_rate": 1.2926496263465095e-05, + "loss": 0.8343, "step": 14938 }, { - "epoch": 0.4239216799091941, + "epoch": 0.4233330499589107, "grad_norm": 0.0, - "learning_rate": 1.2907628706739312e-05, - "loss": 0.8908, + "learning_rate": 1.292561864292622e-05, + "loss": 1.0363, "step": 14939 }, { - "epoch": 0.42395005675368896, + "epoch": 0.4233613874011732, "grad_norm": 0.0, - "learning_rate": 1.2906749325024638e-05, - "loss": 0.8469, + "learning_rate": 1.2924740997743769e-05, + "loss": 0.8292, "step": 14940 }, { - "epoch": 0.4239784335981839, + "epoch": 0.4233897248434356, "grad_norm": 0.0, - "learning_rate": 1.2905869918756526e-05, - "loss": 0.7975, + "learning_rate": 1.2923863327925138e-05, + "loss": 0.938, "step": 14941 }, { - "epoch": 0.4240068104426788, + "epoch": 0.4234180622856981, "grad_norm": 0.0, - "learning_rate": 1.29049904879424e-05, - "loss": 0.8658, + "learning_rate": 1.2922985633477722e-05, + "loss": 0.9102, "step": 14942 }, { - "epoch": 0.42403518728717365, + "epoch": 0.42344639972796055, "grad_norm": 0.0, - "learning_rate": 1.2904111032589688e-05, - "loss": 0.9394, + "learning_rate": 1.2922107914408913e-05, + "loss": 0.9088, "step": 14943 }, { - "epoch": 0.42406356413166857, + "epoch": 0.423474737170223, "grad_norm": 0.0, - "learning_rate": 1.2903231552705819e-05, - "loss": 0.9044, + "learning_rate": 1.2921230170726102e-05, + "loss": 0.8451, "step": 14944 }, { - "epoch": 0.42409194097616343, + "epoch": 0.4235030746124855, "grad_norm": 0.0, - "learning_rate": 1.2902352048298224e-05, - "loss": 0.8726, + "learning_rate": 1.292035240243669e-05, + "loss": 0.9684, "step": 14945 }, { - "epoch": 0.42412031782065834, + "epoch": 0.4235314120547479, "grad_norm": 0.0, - "learning_rate": 1.2901472519374327e-05, - "loss": 0.9056, + "learning_rate": 1.291947460954806e-05, + "loss": 0.8687, "step": 14946 }, { - "epoch": 0.42414869466515326, + "epoch": 0.4235597494970104, "grad_norm": 0.0, - "learning_rate": 1.2900592965941563e-05, - "loss": 0.8937, + "learning_rate": 1.2918596792067613e-05, + "loss": 0.9239, "step": 14947 }, { - "epoch": 0.4241770715096481, + "epoch": 0.42358808693927286, "grad_norm": 0.0, - "learning_rate": 1.2899713388007362e-05, - "loss": 0.9642, + "learning_rate": 1.2917718950002737e-05, + "loss": 0.8966, "step": 14948 }, { - "epoch": 0.42420544835414303, + "epoch": 0.42361642438153535, "grad_norm": 0.0, - "learning_rate": 1.289883378557915e-05, - "loss": 0.9146, + "learning_rate": 1.2916841083360836e-05, + "loss": 0.7635, "step": 14949 }, { - "epoch": 0.4242338251986379, + "epoch": 0.4236447618237978, "grad_norm": 0.0, - "learning_rate": 1.2897954158664358e-05, - "loss": 0.8973, + "learning_rate": 1.2915963192149297e-05, + "loss": 0.9277, "step": 14950 }, { - "epoch": 0.4242622020431328, + "epoch": 0.4236730992660602, "grad_norm": 0.0, - "learning_rate": 1.289707450727042e-05, - "loss": 0.8717, + "learning_rate": 1.2915085276375519e-05, + "loss": 0.9445, "step": 14951 }, { - "epoch": 0.4242905788876277, + "epoch": 0.4237014367083227, "grad_norm": 0.0, - "learning_rate": 1.289619483140476e-05, - "loss": 0.8213, + "learning_rate": 1.2914207336046896e-05, + "loss": 0.8743, "step": 14952 }, { - "epoch": 0.4243189557321226, + "epoch": 0.42372977415058516, "grad_norm": 0.0, - "learning_rate": 1.2895315131074812e-05, - "loss": 0.8425, + "learning_rate": 1.291332937117082e-05, + "loss": 0.9917, "step": 14953 }, { - "epoch": 0.4243473325766175, + "epoch": 0.42375811159284765, "grad_norm": 0.0, - "learning_rate": 1.289443540628801e-05, - "loss": 0.8955, + "learning_rate": 1.291245138175469e-05, + "loss": 0.841, "step": 14954 }, { - "epoch": 0.42437570942111236, + "epoch": 0.4237864490351101, "grad_norm": 0.0, - "learning_rate": 1.289355565705178e-05, - "loss": 0.9148, + "learning_rate": 1.2911573367805898e-05, + "loss": 0.8241, "step": 14955 }, { - "epoch": 0.42440408626560727, + "epoch": 0.42381478647737253, "grad_norm": 0.0, - "learning_rate": 1.2892675883373555e-05, - "loss": 0.9412, + "learning_rate": 1.2910695329331846e-05, + "loss": 0.8838, "step": 14956 }, { - "epoch": 0.42443246311010213, + "epoch": 0.423843123919635, "grad_norm": 0.0, - "learning_rate": 1.2891796085260766e-05, - "loss": 0.9294, + "learning_rate": 1.2909817266339926e-05, + "loss": 0.8282, "step": 14957 }, { - "epoch": 0.42446083995459705, + "epoch": 0.42387146136189746, "grad_norm": 0.0, - "learning_rate": 1.2890916262720848e-05, - "loss": 0.8941, + "learning_rate": 1.2908939178837532e-05, + "loss": 1.0311, "step": 14958 }, { - "epoch": 0.42448921679909196, + "epoch": 0.42389979880415996, "grad_norm": 0.0, - "learning_rate": 1.2890036415761232e-05, - "loss": 0.9721, + "learning_rate": 1.2908061066832064e-05, + "loss": 1.0024, "step": 14959 }, { - "epoch": 0.4245175936435868, + "epoch": 0.4239281362464224, "grad_norm": 0.0, - "learning_rate": 1.2889156544389342e-05, - "loss": 0.8649, + "learning_rate": 1.2907182930330921e-05, + "loss": 0.8759, "step": 14960 }, { - "epoch": 0.42454597048808174, + "epoch": 0.42395647368868483, "grad_norm": 0.0, - "learning_rate": 1.288827664861262e-05, - "loss": 1.0153, + "learning_rate": 1.2906304769341493e-05, + "loss": 0.8822, "step": 14961 }, { - "epoch": 0.4245743473325766, + "epoch": 0.4239848111309473, "grad_norm": 0.0, - "learning_rate": 1.2887396728438498e-05, - "loss": 0.8404, + "learning_rate": 1.290542658387118e-05, + "loss": 0.8721, "step": 14962 }, { - "epoch": 0.4246027241770715, + "epoch": 0.42401314857320976, "grad_norm": 0.0, - "learning_rate": 1.2886516783874403e-05, - "loss": 0.9088, + "learning_rate": 1.2904548373927383e-05, + "loss": 0.896, "step": 14963 }, { - "epoch": 0.4246311010215664, + "epoch": 0.42404148601547226, "grad_norm": 0.0, - "learning_rate": 1.2885636814927769e-05, - "loss": 0.881, + "learning_rate": 1.2903670139517495e-05, + "loss": 0.9234, "step": 14964 }, { - "epoch": 0.4246594778660613, + "epoch": 0.4240698234577347, "grad_norm": 0.0, - "learning_rate": 1.2884756821606037e-05, - "loss": 0.9255, + "learning_rate": 1.2902791880648917e-05, + "loss": 0.9931, "step": 14965 }, { - "epoch": 0.4246878547105562, + "epoch": 0.4240981608999972, "grad_norm": 0.0, - "learning_rate": 1.288387680391663e-05, - "loss": 1.0188, + "learning_rate": 1.290191359732904e-05, + "loss": 1.0248, "step": 14966 }, { - "epoch": 0.42471623155505106, + "epoch": 0.42412649834225963, "grad_norm": 0.0, - "learning_rate": 1.2882996761866985e-05, - "loss": 0.9317, + "learning_rate": 1.2901035289565274e-05, + "loss": 0.9618, "step": 14967 }, { - "epoch": 0.424744608399546, + "epoch": 0.42415483578452207, "grad_norm": 0.0, - "learning_rate": 1.2882116695464542e-05, - "loss": 0.9773, + "learning_rate": 1.2900156957365004e-05, + "loss": 0.933, "step": 14968 }, { - "epoch": 0.42477298524404083, + "epoch": 0.42418317322678456, "grad_norm": 0.0, - "learning_rate": 1.2881236604716728e-05, - "loss": 0.928, + "learning_rate": 1.2899278600735641e-05, + "loss": 0.995, "step": 14969 }, { - "epoch": 0.42480136208853575, + "epoch": 0.424211510669047, "grad_norm": 0.0, - "learning_rate": 1.2880356489630974e-05, - "loss": 0.9995, + "learning_rate": 1.2898400219684575e-05, + "loss": 0.8129, "step": 14970 }, { - "epoch": 0.42482973893303067, + "epoch": 0.4242398481113095, "grad_norm": 0.0, - "learning_rate": 1.2879476350214724e-05, - "loss": 0.9616, + "learning_rate": 1.2897521814219207e-05, + "loss": 1.0269, "step": 14971 }, { - "epoch": 0.4248581157775255, + "epoch": 0.42426818555357193, "grad_norm": 0.0, - "learning_rate": 1.2878596186475408e-05, - "loss": 0.8861, + "learning_rate": 1.2896643384346936e-05, + "loss": 0.8738, "step": 14972 }, { - "epoch": 0.42488649262202044, + "epoch": 0.42429652299583437, "grad_norm": 0.0, - "learning_rate": 1.2877715998420457e-05, - "loss": 0.9871, + "learning_rate": 1.2895764930075164e-05, + "loss": 0.8605, "step": 14973 }, { - "epoch": 0.4249148694665153, + "epoch": 0.42432486043809686, "grad_norm": 0.0, - "learning_rate": 1.2876835786057313e-05, - "loss": 0.9198, + "learning_rate": 1.2894886451411289e-05, + "loss": 0.948, "step": 14974 }, { - "epoch": 0.4249432463110102, + "epoch": 0.4243531978803593, "grad_norm": 0.0, - "learning_rate": 1.2875955549393408e-05, - "loss": 0.9006, + "learning_rate": 1.2894007948362707e-05, + "loss": 0.897, "step": 14975 }, { - "epoch": 0.42497162315550513, + "epoch": 0.4243815353226218, "grad_norm": 0.0, - "learning_rate": 1.2875075288436177e-05, - "loss": 0.9341, + "learning_rate": 1.2893129420936826e-05, + "loss": 0.8536, "step": 14976 }, { - "epoch": 0.425, + "epoch": 0.42440987276488423, "grad_norm": 0.0, - "learning_rate": 1.2874195003193052e-05, - "loss": 0.9135, + "learning_rate": 1.289225086914104e-05, + "loss": 0.8349, "step": 14977 }, { - "epoch": 0.4250283768444949, + "epoch": 0.4244382102071467, "grad_norm": 0.0, - "learning_rate": 1.2873314693671475e-05, - "loss": 0.9138, + "learning_rate": 1.2891372292982749e-05, + "loss": 0.9819, "step": 14978 }, { - "epoch": 0.42505675368898976, + "epoch": 0.42446654764940916, "grad_norm": 0.0, - "learning_rate": 1.287243435987888e-05, - "loss": 0.8701, + "learning_rate": 1.2890493692469357e-05, + "loss": 0.9785, "step": 14979 }, { - "epoch": 0.4250851305334847, + "epoch": 0.4244948850916716, "grad_norm": 0.0, - "learning_rate": 1.2871554001822701e-05, - "loss": 0.9718, + "learning_rate": 1.2889615067608261e-05, + "loss": 0.9573, "step": 14980 }, { - "epoch": 0.4251135073779796, + "epoch": 0.4245232225339341, "grad_norm": 0.0, - "learning_rate": 1.2870673619510382e-05, - "loss": 0.9157, + "learning_rate": 1.2888736418406869e-05, + "loss": 0.8978, "step": 14981 }, { - "epoch": 0.42514188422247445, + "epoch": 0.42455155997619654, "grad_norm": 0.0, - "learning_rate": 1.2869793212949349e-05, - "loss": 0.9651, + "learning_rate": 1.2887857744872574e-05, + "loss": 0.9991, "step": 14982 }, { - "epoch": 0.42517026106696937, + "epoch": 0.42457989741845903, "grad_norm": 0.0, - "learning_rate": 1.2868912782147043e-05, - "loss": 0.9903, + "learning_rate": 1.288697904701278e-05, + "loss": 0.9209, "step": 14983 }, { - "epoch": 0.42519863791146423, + "epoch": 0.42460823486072147, "grad_norm": 0.0, - "learning_rate": 1.2868032327110904e-05, - "loss": 0.9156, + "learning_rate": 1.288610032483489e-05, + "loss": 0.9276, "step": 14984 }, { - "epoch": 0.42522701475595914, + "epoch": 0.4246365723029839, "grad_norm": 0.0, - "learning_rate": 1.2867151847848364e-05, - "loss": 0.9073, + "learning_rate": 1.288522157834631e-05, + "loss": 0.9153, "step": 14985 }, { - "epoch": 0.425255391600454, + "epoch": 0.4246649097452464, "grad_norm": 0.0, - "learning_rate": 1.286627134436687e-05, - "loss": 0.9838, + "learning_rate": 1.2884342807554433e-05, + "loss": 0.8968, "step": 14986 }, { - "epoch": 0.4252837684449489, + "epoch": 0.42469324718750884, "grad_norm": 0.0, - "learning_rate": 1.2865390816673846e-05, - "loss": 1.0334, + "learning_rate": 1.2883464012466664e-05, + "loss": 0.8749, "step": 14987 }, { - "epoch": 0.42531214528944383, + "epoch": 0.42472158462977133, "grad_norm": 0.0, - "learning_rate": 1.2864510264776739e-05, - "loss": 0.9475, + "learning_rate": 1.2882585193090412e-05, + "loss": 0.8926, "step": 14988 }, { - "epoch": 0.4253405221339387, + "epoch": 0.42474992207203377, "grad_norm": 0.0, - "learning_rate": 1.2863629688682988e-05, - "loss": 0.8888, + "learning_rate": 1.288170634943307e-05, + "loss": 0.9907, "step": 14989 }, { - "epoch": 0.4253688989784336, + "epoch": 0.42477825951429626, "grad_norm": 0.0, - "learning_rate": 1.2862749088400026e-05, - "loss": 0.8562, + "learning_rate": 1.2880827481502048e-05, + "loss": 0.8737, "step": 14990 }, { - "epoch": 0.42539727582292847, + "epoch": 0.4248065969565587, "grad_norm": 0.0, - "learning_rate": 1.2861868463935294e-05, - "loss": 0.9876, + "learning_rate": 1.2879948589304745e-05, + "loss": 0.8313, "step": 14991 }, { - "epoch": 0.4254256526674234, + "epoch": 0.42483493439882114, "grad_norm": 0.0, - "learning_rate": 1.2860987815296233e-05, - "loss": 0.9647, + "learning_rate": 1.2879069672848565e-05, + "loss": 0.8825, "step": 14992 }, { - "epoch": 0.4254540295119183, + "epoch": 0.42486327184108363, "grad_norm": 0.0, - "learning_rate": 1.2860107142490274e-05, - "loss": 0.9578, + "learning_rate": 1.2878190732140911e-05, + "loss": 0.8635, "step": 14993 }, { - "epoch": 0.42548240635641316, + "epoch": 0.4248916092833461, "grad_norm": 0.0, - "learning_rate": 1.2859226445524865e-05, - "loss": 0.9148, + "learning_rate": 1.2877311767189192e-05, + "loss": 0.8578, "step": 14994 }, { - "epoch": 0.42551078320090807, + "epoch": 0.42491994672560857, "grad_norm": 0.0, - "learning_rate": 1.285834572440744e-05, - "loss": 0.8826, + "learning_rate": 1.2876432778000804e-05, + "loss": 0.869, "step": 14995 }, { - "epoch": 0.42553916004540293, + "epoch": 0.424948284167871, "grad_norm": 0.0, - "learning_rate": 1.2857464979145442e-05, - "loss": 0.9122, + "learning_rate": 1.2875553764583156e-05, + "loss": 0.9055, "step": 14996 }, { - "epoch": 0.42556753688989785, + "epoch": 0.42497662161013344, "grad_norm": 0.0, - "learning_rate": 1.2856584209746306e-05, - "loss": 0.8412, + "learning_rate": 1.287467472694365e-05, + "loss": 0.8723, "step": 14997 }, { - "epoch": 0.42559591373439276, + "epoch": 0.42500495905239594, "grad_norm": 0.0, - "learning_rate": 1.2855703416217478e-05, - "loss": 0.8884, + "learning_rate": 1.2873795665089692e-05, + "loss": 0.7701, "step": 14998 }, { - "epoch": 0.4256242905788876, + "epoch": 0.4250332964946584, "grad_norm": 0.0, - "learning_rate": 1.2854822598566394e-05, - "loss": 0.891, + "learning_rate": 1.2872916579028684e-05, + "loss": 0.9019, "step": 14999 }, { - "epoch": 0.42565266742338254, + "epoch": 0.42506163393692087, "grad_norm": 0.0, - "learning_rate": 1.2853941756800494e-05, - "loss": 0.9268, + "learning_rate": 1.2872037468768032e-05, + "loss": 0.8637, "step": 15000 }, { - "epoch": 0.4256810442678774, + "epoch": 0.4250899713791833, "grad_norm": 0.0, - "learning_rate": 1.2853060890927217e-05, - "loss": 0.9172, + "learning_rate": 1.2871158334315146e-05, + "loss": 0.9766, "step": 15001 }, { - "epoch": 0.4257094211123723, + "epoch": 0.4251183088214458, "grad_norm": 0.0, - "learning_rate": 1.285218000095401e-05, - "loss": 0.879, + "learning_rate": 1.2870279175677427e-05, + "loss": 0.9997, "step": 15002 }, { - "epoch": 0.42573779795686717, + "epoch": 0.42514664626370824, "grad_norm": 0.0, - "learning_rate": 1.2851299086888313e-05, - "loss": 0.9872, + "learning_rate": 1.286939999286228e-05, + "loss": 0.8098, "step": 15003 }, { - "epoch": 0.4257661748013621, + "epoch": 0.4251749837059707, "grad_norm": 0.0, - "learning_rate": 1.285041814873756e-05, - "loss": 0.9607, + "learning_rate": 1.2868520785877108e-05, + "loss": 1.1765, "step": 15004 }, { - "epoch": 0.425794551645857, + "epoch": 0.42520332114823317, "grad_norm": 0.0, - "learning_rate": 1.28495371865092e-05, - "loss": 0.9434, + "learning_rate": 1.2867641554729325e-05, + "loss": 0.8933, "step": 15005 }, { - "epoch": 0.42582292849035186, + "epoch": 0.4252316585904956, "grad_norm": 0.0, - "learning_rate": 1.284865620021067e-05, - "loss": 0.9925, + "learning_rate": 1.2866762299426328e-05, + "loss": 0.8285, "step": 15006 }, { - "epoch": 0.4258513053348468, + "epoch": 0.4252599960327581, "grad_norm": 0.0, - "learning_rate": 1.2847775189849412e-05, - "loss": 0.8271, + "learning_rate": 1.2865883019975529e-05, + "loss": 0.9221, "step": 15007 }, { - "epoch": 0.42587968217934163, + "epoch": 0.42528833347502054, "grad_norm": 0.0, - "learning_rate": 1.2846894155432868e-05, - "loss": 0.9417, + "learning_rate": 1.2865003716384332e-05, + "loss": 0.7478, "step": 15008 }, { - "epoch": 0.42590805902383655, + "epoch": 0.425316670917283, "grad_norm": 0.0, - "learning_rate": 1.2846013096968483e-05, - "loss": 0.7647, + "learning_rate": 1.2864124388660148e-05, + "loss": 0.9544, "step": 15009 }, { - "epoch": 0.42593643586833146, + "epoch": 0.4253450083595455, "grad_norm": 0.0, - "learning_rate": 1.28451320144637e-05, - "loss": 0.9508, + "learning_rate": 1.286324503681038e-05, + "loss": 0.8784, "step": 15010 }, { - "epoch": 0.4259648127128263, + "epoch": 0.4253733458018079, "grad_norm": 0.0, - "learning_rate": 1.2844250907925953e-05, - "loss": 0.9322, + "learning_rate": 1.2862365660842437e-05, + "loss": 0.8333, "step": 15011 }, { - "epoch": 0.42599318955732124, + "epoch": 0.4254016832440704, "grad_norm": 0.0, - "learning_rate": 1.2843369777362695e-05, - "loss": 1.0329, + "learning_rate": 1.286148626076372e-05, + "loss": 0.9322, "step": 15012 }, { - "epoch": 0.4260215664018161, + "epoch": 0.42543002068633284, "grad_norm": 0.0, - "learning_rate": 1.2842488622781364e-05, - "loss": 0.9601, + "learning_rate": 1.2860606836581643e-05, + "loss": 0.9273, "step": 15013 }, { - "epoch": 0.426049943246311, + "epoch": 0.42545835812859534, "grad_norm": 0.0, - "learning_rate": 1.2841607444189402e-05, - "loss": 0.9922, + "learning_rate": 1.2859727388303615e-05, + "loss": 1.0628, "step": 15014 }, { - "epoch": 0.42607832009080593, + "epoch": 0.4254866955708578, "grad_norm": 0.0, - "learning_rate": 1.2840726241594258e-05, - "loss": 1.0216, + "learning_rate": 1.2858847915937043e-05, + "loss": 0.9332, "step": 15015 }, { - "epoch": 0.4261066969353008, + "epoch": 0.4255150330131202, "grad_norm": 0.0, - "learning_rate": 1.2839845015003368e-05, - "loss": 0.7919, + "learning_rate": 1.2857968419489329e-05, + "loss": 0.9592, "step": 15016 }, { - "epoch": 0.4261350737797957, + "epoch": 0.4255433704553827, "grad_norm": 0.0, - "learning_rate": 1.283896376442418e-05, - "loss": 0.929, + "learning_rate": 1.285708889896789e-05, + "loss": 0.8137, "step": 15017 }, { - "epoch": 0.42616345062429056, + "epoch": 0.42557170789764515, "grad_norm": 0.0, - "learning_rate": 1.2838082489864137e-05, - "loss": 0.962, + "learning_rate": 1.2856209354380127e-05, + "loss": 0.8022, "step": 15018 }, { - "epoch": 0.4261918274687855, + "epoch": 0.42560004533990764, "grad_norm": 0.0, - "learning_rate": 1.2837201191330686e-05, - "loss": 0.8789, + "learning_rate": 1.2855329785733452e-05, + "loss": 0.9691, "step": 15019 }, { - "epoch": 0.42622020431328034, + "epoch": 0.4256283827821701, "grad_norm": 0.0, - "learning_rate": 1.2836319868831269e-05, - "loss": 0.9013, + "learning_rate": 1.2854450193035275e-05, + "loss": 0.9463, "step": 15020 }, { - "epoch": 0.42624858115777525, + "epoch": 0.4256567202244325, "grad_norm": 0.0, - "learning_rate": 1.2835438522373326e-05, - "loss": 0.9543, + "learning_rate": 1.2853570576293002e-05, + "loss": 0.9034, "step": 15021 }, { - "epoch": 0.42627695800227017, + "epoch": 0.425685057666695, "grad_norm": 0.0, - "learning_rate": 1.283455715196431e-05, - "loss": 0.8925, + "learning_rate": 1.2852690935514047e-05, + "loss": 0.8638, "step": 15022 }, { - "epoch": 0.426305334846765, + "epoch": 0.42571339510895745, "grad_norm": 0.0, - "learning_rate": 1.2833675757611666e-05, - "loss": 0.9712, + "learning_rate": 1.2851811270705819e-05, + "loss": 0.758, "step": 15023 }, { - "epoch": 0.42633371169125994, + "epoch": 0.42574173255121994, "grad_norm": 0.0, - "learning_rate": 1.2832794339322827e-05, - "loss": 0.9122, + "learning_rate": 1.2850931581875723e-05, + "loss": 0.9872, "step": 15024 }, { - "epoch": 0.4263620885357548, + "epoch": 0.4257700699934824, "grad_norm": 0.0, - "learning_rate": 1.2831912897105252e-05, - "loss": 1.0062, + "learning_rate": 1.2850051869031169e-05, + "loss": 0.9059, "step": 15025 }, { - "epoch": 0.4263904653802497, + "epoch": 0.4257984074357449, "grad_norm": 0.0, - "learning_rate": 1.283103143096638e-05, - "loss": 0.8428, + "learning_rate": 1.284917213217957e-05, + "loss": 0.846, "step": 15026 }, { - "epoch": 0.42641884222474463, + "epoch": 0.4258267448780073, "grad_norm": 0.0, - "learning_rate": 1.283014994091366e-05, - "loss": 0.8755, + "learning_rate": 1.284829237132834e-05, + "loss": 0.7744, "step": 15027 }, { - "epoch": 0.4264472190692395, + "epoch": 0.42585508232026975, "grad_norm": 0.0, - "learning_rate": 1.2829268426954535e-05, - "loss": 0.9138, + "learning_rate": 1.2847412586484884e-05, + "loss": 0.8729, "step": 15028 }, { - "epoch": 0.4264755959137344, + "epoch": 0.42588341976253224, "grad_norm": 0.0, - "learning_rate": 1.2828386889096453e-05, - "loss": 0.8324, + "learning_rate": 1.2846532777656613e-05, + "loss": 0.9197, "step": 15029 }, { - "epoch": 0.42650397275822927, + "epoch": 0.4259117572047947, "grad_norm": 0.0, - "learning_rate": 1.282750532734686e-05, - "loss": 0.9758, + "learning_rate": 1.2845652944850941e-05, + "loss": 0.806, "step": 15030 }, { - "epoch": 0.4265323496027242, + "epoch": 0.4259400946470572, "grad_norm": 0.0, - "learning_rate": 1.28266237417132e-05, - "loss": 0.9473, + "learning_rate": 1.284477308807528e-05, + "loss": 0.9603, "step": 15031 }, { - "epoch": 0.4265607264472191, + "epoch": 0.4259684320893196, "grad_norm": 0.0, - "learning_rate": 1.2825742132202925e-05, - "loss": 0.9557, + "learning_rate": 1.2843893207337033e-05, + "loss": 0.921, "step": 15032 }, { - "epoch": 0.42658910329171396, + "epoch": 0.42599676953158205, "grad_norm": 0.0, - "learning_rate": 1.282486049882348e-05, - "loss": 0.9701, + "learning_rate": 1.284301330264362e-05, + "loss": 1.0023, "step": 15033 }, { - "epoch": 0.42661748013620887, + "epoch": 0.42602510697384455, "grad_norm": 0.0, - "learning_rate": 1.2823978841582308e-05, - "loss": 0.8615, + "learning_rate": 1.2842133374002452e-05, + "loss": 0.9028, "step": 15034 }, { - "epoch": 0.42664585698070373, + "epoch": 0.426053444416107, "grad_norm": 0.0, - "learning_rate": 1.2823097160486861e-05, - "loss": 0.8932, + "learning_rate": 1.2841253421420938e-05, + "loss": 0.9351, "step": 15035 }, { - "epoch": 0.42667423382519865, + "epoch": 0.4260817818583695, "grad_norm": 0.0, - "learning_rate": 1.2822215455544587e-05, - "loss": 0.7378, + "learning_rate": 1.2840373444906493e-05, + "loss": 0.9067, "step": 15036 }, { - "epoch": 0.4267026106696935, + "epoch": 0.4261101193006319, "grad_norm": 0.0, - "learning_rate": 1.282133372676293e-05, - "loss": 0.9389, + "learning_rate": 1.2839493444466525e-05, + "loss": 0.9706, "step": 15037 }, { - "epoch": 0.4267309875141884, + "epoch": 0.4261384567428944, "grad_norm": 0.0, - "learning_rate": 1.2820451974149341e-05, - "loss": 0.8498, + "learning_rate": 1.283861342010845e-05, + "loss": 0.845, "step": 15038 }, { - "epoch": 0.42675936435868334, + "epoch": 0.42616679418515685, "grad_norm": 0.0, - "learning_rate": 1.2819570197711269e-05, - "loss": 0.9473, + "learning_rate": 1.283773337183968e-05, + "loss": 1.0184, "step": 15039 }, { - "epoch": 0.4267877412031782, + "epoch": 0.4261951316274193, "grad_norm": 0.0, - "learning_rate": 1.2818688397456159e-05, - "loss": 0.7496, + "learning_rate": 1.2836853299667628e-05, + "loss": 0.8751, "step": 15040 }, { - "epoch": 0.4268161180476731, + "epoch": 0.4262234690696818, "grad_norm": 0.0, - "learning_rate": 1.281780657339146e-05, - "loss": 0.9398, + "learning_rate": 1.2835973203599707e-05, + "loss": 0.9962, "step": 15041 }, { - "epoch": 0.42684449489216797, + "epoch": 0.4262518065119442, "grad_norm": 0.0, - "learning_rate": 1.2816924725524624e-05, - "loss": 0.9817, + "learning_rate": 1.2835093083643332e-05, + "loss": 0.8703, "step": 15042 }, { - "epoch": 0.4268728717366629, + "epoch": 0.4262801439542067, "grad_norm": 0.0, - "learning_rate": 1.28160428538631e-05, - "loss": 0.9376, + "learning_rate": 1.2834212939805917e-05, + "loss": 0.9417, "step": 15043 }, { - "epoch": 0.4269012485811578, + "epoch": 0.42630848139646915, "grad_norm": 0.0, - "learning_rate": 1.2815160958414332e-05, - "loss": 0.9199, + "learning_rate": 1.283333277209487e-05, + "loss": 0.9944, "step": 15044 }, { - "epoch": 0.42692962542565266, + "epoch": 0.4263368188387316, "grad_norm": 0.0, - "learning_rate": 1.2814279039185775e-05, - "loss": 0.9589, + "learning_rate": 1.283245258051761e-05, + "loss": 0.8687, "step": 15045 }, { - "epoch": 0.4269580022701476, + "epoch": 0.4263651562809941, "grad_norm": 0.0, - "learning_rate": 1.2813397096184876e-05, - "loss": 0.8359, + "learning_rate": 1.2831572365081549e-05, + "loss": 0.9171, "step": 15046 }, { - "epoch": 0.42698637911464243, + "epoch": 0.4263934937232565, "grad_norm": 0.0, - "learning_rate": 1.2812515129419085e-05, - "loss": 0.908, + "learning_rate": 1.2830692125794104e-05, + "loss": 1.0519, "step": 15047 }, { - "epoch": 0.42701475595913735, + "epoch": 0.426421831165519, "grad_norm": 0.0, - "learning_rate": 1.2811633138895851e-05, - "loss": 0.9573, + "learning_rate": 1.282981186266269e-05, + "loss": 0.9073, "step": 15048 }, { - "epoch": 0.42704313280363226, + "epoch": 0.42645016860778145, "grad_norm": 0.0, - "learning_rate": 1.2810751124622626e-05, - "loss": 0.9765, + "learning_rate": 1.2828931575694718e-05, + "loss": 0.9661, "step": 15049 }, { - "epoch": 0.4270715096481271, + "epoch": 0.42647850605004395, "grad_norm": 0.0, - "learning_rate": 1.2809869086606863e-05, - "loss": 0.9867, + "learning_rate": 1.2828051264897604e-05, + "loss": 0.9336, "step": 15050 }, { - "epoch": 0.42709988649262204, + "epoch": 0.4265068434923064, "grad_norm": 0.0, - "learning_rate": 1.2808987024856006e-05, - "loss": 0.7929, + "learning_rate": 1.2827170930278765e-05, + "loss": 0.97, "step": 15051 }, { - "epoch": 0.4271282633371169, + "epoch": 0.4265351809345688, "grad_norm": 0.0, - "learning_rate": 1.2808104939377512e-05, - "loss": 0.8848, + "learning_rate": 1.2826290571845614e-05, + "loss": 0.8047, "step": 15052 }, { - "epoch": 0.4271566401816118, + "epoch": 0.4265635183768313, "grad_norm": 0.0, - "learning_rate": 1.280722283017883e-05, - "loss": 0.8683, + "learning_rate": 1.2825410189605569e-05, + "loss": 0.8402, "step": 15053 }, { - "epoch": 0.4271850170261067, + "epoch": 0.42659185581909376, "grad_norm": 0.0, - "learning_rate": 1.2806340697267406e-05, - "loss": 0.9277, + "learning_rate": 1.2824529783566044e-05, + "loss": 0.9097, "step": 15054 }, { - "epoch": 0.4272133938706016, + "epoch": 0.42662019326135625, "grad_norm": 0.0, - "learning_rate": 1.2805458540650696e-05, - "loss": 0.9983, + "learning_rate": 1.2823649353734458e-05, + "loss": 0.8985, "step": 15055 }, { - "epoch": 0.4272417707150965, + "epoch": 0.4266485307036187, "grad_norm": 0.0, - "learning_rate": 1.2804576360336156e-05, - "loss": 0.8937, + "learning_rate": 1.2822768900118226e-05, + "loss": 1.0072, "step": 15056 }, { - "epoch": 0.42727014755959136, + "epoch": 0.4266768681458811, "grad_norm": 0.0, - "learning_rate": 1.2803694156331233e-05, - "loss": 0.8428, + "learning_rate": 1.282188842272476e-05, + "loss": 0.9352, "step": 15057 }, { - "epoch": 0.4272985244040863, + "epoch": 0.4267052055881436, "grad_norm": 0.0, - "learning_rate": 1.2802811928643375e-05, - "loss": 0.8476, + "learning_rate": 1.2821007921561481e-05, + "loss": 0.9535, "step": 15058 }, { - "epoch": 0.42732690124858114, + "epoch": 0.42673354303040606, "grad_norm": 0.0, - "learning_rate": 1.2801929677280043e-05, - "loss": 0.9206, + "learning_rate": 1.2820127396635802e-05, + "loss": 1.0181, "step": 15059 }, { - "epoch": 0.42735527809307605, + "epoch": 0.42676188047266855, "grad_norm": 0.0, - "learning_rate": 1.2801047402248686e-05, - "loss": 1.0377, + "learning_rate": 1.2819246847955148e-05, + "loss": 1.0278, "step": 15060 }, { - "epoch": 0.42738365493757097, + "epoch": 0.426790217914931, "grad_norm": 0.0, - "learning_rate": 1.280016510355675e-05, - "loss": 0.8101, + "learning_rate": 1.2818366275526927e-05, + "loss": 0.9217, "step": 15061 }, { - "epoch": 0.4274120317820658, + "epoch": 0.4268185553571935, "grad_norm": 0.0, - "learning_rate": 1.2799282781211698e-05, - "loss": 0.857, + "learning_rate": 1.2817485679358562e-05, + "loss": 0.9263, "step": 15062 }, { - "epoch": 0.42744040862656074, + "epoch": 0.4268468927994559, "grad_norm": 0.0, - "learning_rate": 1.2798400435220977e-05, - "loss": 0.8711, + "learning_rate": 1.2816605059457468e-05, + "loss": 0.8556, "step": 15063 }, { - "epoch": 0.4274687854710556, + "epoch": 0.42687523024171836, "grad_norm": 0.0, - "learning_rate": 1.2797518065592043e-05, - "loss": 1.0103, + "learning_rate": 1.2815724415831065e-05, + "loss": 0.8426, "step": 15064 }, { - "epoch": 0.4274971623155505, + "epoch": 0.42690356768398086, "grad_norm": 0.0, - "learning_rate": 1.2796635672332345e-05, - "loss": 0.9278, + "learning_rate": 1.2814843748486767e-05, + "loss": 0.9094, "step": 15065 }, { - "epoch": 0.4275255391600454, + "epoch": 0.4269319051262433, "grad_norm": 0.0, - "learning_rate": 1.2795753255449343e-05, - "loss": 0.8594, + "learning_rate": 1.2813963057431995e-05, + "loss": 0.8773, "step": 15066 }, { - "epoch": 0.4275539160045403, + "epoch": 0.4269602425685058, "grad_norm": 0.0, - "learning_rate": 1.2794870814950486e-05, - "loss": 0.8272, + "learning_rate": 1.281308234267417e-05, + "loss": 0.979, "step": 15067 }, { - "epoch": 0.4275822928490352, + "epoch": 0.4269885800107682, "grad_norm": 0.0, - "learning_rate": 1.2793988350843232e-05, - "loss": 0.8577, + "learning_rate": 1.2812201604220706e-05, + "loss": 0.9813, "step": 15068 }, { - "epoch": 0.42761066969353007, + "epoch": 0.42701691745303066, "grad_norm": 0.0, - "learning_rate": 1.279310586313503e-05, - "loss": 0.9853, + "learning_rate": 1.2811320842079026e-05, + "loss": 1.0477, "step": 15069 }, { - "epoch": 0.427639046538025, + "epoch": 0.42704525489529316, "grad_norm": 0.0, - "learning_rate": 1.2792223351833338e-05, - "loss": 0.9066, + "learning_rate": 1.2810440056256543e-05, + "loss": 0.8624, "step": 15070 }, { - "epoch": 0.42766742338251984, + "epoch": 0.4270735923375556, "grad_norm": 0.0, - "learning_rate": 1.279134081694561e-05, - "loss": 1.0106, + "learning_rate": 1.2809559246760684e-05, + "loss": 1.0059, "step": 15071 }, { - "epoch": 0.42769580022701476, + "epoch": 0.4271019297798181, "grad_norm": 0.0, - "learning_rate": 1.27904582584793e-05, - "loss": 0.8934, + "learning_rate": 1.2808678413598861e-05, + "loss": 0.9836, "step": 15072 }, { - "epoch": 0.42772417707150967, + "epoch": 0.42713026722208053, "grad_norm": 0.0, - "learning_rate": 1.2789575676441866e-05, - "loss": 0.9336, + "learning_rate": 1.2807797556778497e-05, + "loss": 0.9385, "step": 15073 }, { - "epoch": 0.42775255391600453, + "epoch": 0.427158604664343, "grad_norm": 0.0, - "learning_rate": 1.2788693070840758e-05, - "loss": 0.9509, + "learning_rate": 1.2806916676307012e-05, + "loss": 0.9844, "step": 15074 }, { - "epoch": 0.42778093076049944, + "epoch": 0.42718694210660546, "grad_norm": 0.0, - "learning_rate": 1.2787810441683435e-05, - "loss": 0.8844, + "learning_rate": 1.2806035772191825e-05, + "loss": 0.901, "step": 15075 }, { - "epoch": 0.4278093076049943, + "epoch": 0.4272152795488679, "grad_norm": 0.0, - "learning_rate": 1.2786927788977353e-05, - "loss": 0.9508, + "learning_rate": 1.2805154844440359e-05, + "loss": 0.9718, "step": 15076 }, { - "epoch": 0.4278376844494892, + "epoch": 0.4272436169911304, "grad_norm": 0.0, - "learning_rate": 1.2786045112729964e-05, - "loss": 0.7767, + "learning_rate": 1.2804273893060028e-05, + "loss": 0.86, "step": 15077 }, { - "epoch": 0.42786606129398413, + "epoch": 0.42727195443339283, "grad_norm": 0.0, - "learning_rate": 1.2785162412948728e-05, - "loss": 0.8362, + "learning_rate": 1.2803392918058259e-05, + "loss": 0.9723, "step": 15078 }, { - "epoch": 0.427894438138479, + "epoch": 0.4273002918756553, "grad_norm": 0.0, - "learning_rate": 1.2784279689641102e-05, - "loss": 0.8243, + "learning_rate": 1.280251191944247e-05, + "loss": 0.9387, "step": 15079 }, { - "epoch": 0.4279228149829739, + "epoch": 0.42732862931791776, "grad_norm": 0.0, - "learning_rate": 1.278339694281454e-05, - "loss": 0.8438, + "learning_rate": 1.2801630897220083e-05, + "loss": 0.9339, "step": 15080 }, { - "epoch": 0.42795119182746877, + "epoch": 0.4273569667601802, "grad_norm": 0.0, - "learning_rate": 1.2782514172476494e-05, - "loss": 0.9789, + "learning_rate": 1.280074985139852e-05, + "loss": 0.9016, "step": 15081 }, { - "epoch": 0.4279795686719637, + "epoch": 0.4273853042024427, "grad_norm": 0.0, - "learning_rate": 1.2781631378634432e-05, - "loss": 0.9532, + "learning_rate": 1.2799868781985201e-05, + "loss": 0.9362, "step": 15082 }, { - "epoch": 0.42800794551645854, + "epoch": 0.42741364164470513, "grad_norm": 0.0, - "learning_rate": 1.2780748561295802e-05, - "loss": 1.033, + "learning_rate": 1.2798987688987543e-05, + "loss": 0.9308, "step": 15083 }, { - "epoch": 0.42803632236095346, + "epoch": 0.4274419790869676, "grad_norm": 0.0, - "learning_rate": 1.2779865720468065e-05, - "loss": 0.776, + "learning_rate": 1.2798106572412973e-05, + "loss": 0.8358, "step": 15084 }, { - "epoch": 0.4280646992054484, + "epoch": 0.42747031652923007, "grad_norm": 0.0, - "learning_rate": 1.2778982856158673e-05, - "loss": 0.8957, + "learning_rate": 1.2797225432268916e-05, + "loss": 0.8898, "step": 15085 }, { - "epoch": 0.42809307604994323, + "epoch": 0.42749865397149256, "grad_norm": 0.0, - "learning_rate": 1.2778099968375092e-05, - "loss": 0.8625, + "learning_rate": 1.279634426856279e-05, + "loss": 0.8964, "step": 15086 }, { - "epoch": 0.42812145289443815, + "epoch": 0.427526991413755, "grad_norm": 0.0, - "learning_rate": 1.2777217057124776e-05, - "loss": 0.7971, + "learning_rate": 1.2795463081302017e-05, + "loss": 0.9532, "step": 15087 }, { - "epoch": 0.428149829738933, + "epoch": 0.42755532885601744, "grad_norm": 0.0, - "learning_rate": 1.277633412241518e-05, - "loss": 0.9617, + "learning_rate": 1.2794581870494021e-05, + "loss": 0.8124, "step": 15088 }, { - "epoch": 0.4281782065834279, + "epoch": 0.42758366629827993, "grad_norm": 0.0, - "learning_rate": 1.2775451164253767e-05, - "loss": 0.8109, + "learning_rate": 1.2793700636146222e-05, + "loss": 0.9048, "step": 15089 }, { - "epoch": 0.42820658342792284, + "epoch": 0.42761200374054237, "grad_norm": 0.0, - "learning_rate": 1.2774568182647995e-05, - "loss": 0.9376, + "learning_rate": 1.2792819378266047e-05, + "loss": 0.9429, "step": 15090 }, { - "epoch": 0.4282349602724177, + "epoch": 0.42764034118280486, "grad_norm": 0.0, - "learning_rate": 1.2773685177605314e-05, - "loss": 0.999, + "learning_rate": 1.2791938096860914e-05, + "loss": 0.9685, "step": 15091 }, { - "epoch": 0.4282633371169126, + "epoch": 0.4276686786250673, "grad_norm": 0.0, - "learning_rate": 1.2772802149133196e-05, - "loss": 0.9912, + "learning_rate": 1.2791056791938255e-05, + "loss": 0.9178, "step": 15092 }, { - "epoch": 0.42829171396140747, + "epoch": 0.42769701606732974, "grad_norm": 0.0, - "learning_rate": 1.2771919097239092e-05, - "loss": 0.9567, + "learning_rate": 1.2790175463505485e-05, + "loss": 0.9344, "step": 15093 }, { - "epoch": 0.4283200908059024, + "epoch": 0.42772535350959223, "grad_norm": 0.0, - "learning_rate": 1.2771036021930463e-05, - "loss": 0.8869, + "learning_rate": 1.2789294111570035e-05, + "loss": 0.8793, "step": 15094 }, { - "epoch": 0.4283484676503973, + "epoch": 0.42775369095185467, "grad_norm": 0.0, - "learning_rate": 1.2770152923214767e-05, - "loss": 0.9889, + "learning_rate": 1.278841273613932e-05, + "loss": 0.8997, "step": 15095 }, { - "epoch": 0.42837684449489216, + "epoch": 0.42778202839411716, "grad_norm": 0.0, - "learning_rate": 1.2769269801099467e-05, - "loss": 0.913, + "learning_rate": 1.2787531337220771e-05, + "loss": 0.8721, "step": 15096 }, { - "epoch": 0.4284052213393871, + "epoch": 0.4278103658363796, "grad_norm": 0.0, - "learning_rate": 1.2768386655592018e-05, - "loss": 0.9227, + "learning_rate": 1.2786649914821807e-05, + "loss": 0.905, "step": 15097 }, { - "epoch": 0.42843359818388194, + "epoch": 0.4278387032786421, "grad_norm": 0.0, - "learning_rate": 1.2767503486699884e-05, - "loss": 0.9038, + "learning_rate": 1.278576846894986e-05, + "loss": 0.8111, "step": 15098 }, { - "epoch": 0.42846197502837685, + "epoch": 0.42786704072090453, "grad_norm": 0.0, - "learning_rate": 1.2766620294430523e-05, - "loss": 0.9632, + "learning_rate": 1.278488699961235e-05, + "loss": 0.9303, "step": 15099 }, { - "epoch": 0.4284903518728717, + "epoch": 0.427895378163167, "grad_norm": 0.0, - "learning_rate": 1.2765737078791394e-05, - "loss": 0.8636, + "learning_rate": 1.2784005506816701e-05, + "loss": 0.9128, "step": 15100 }, { - "epoch": 0.4285187287173666, + "epoch": 0.42792371560542947, "grad_norm": 0.0, - "learning_rate": 1.2764853839789964e-05, - "loss": 1.0034, + "learning_rate": 1.2783123990570343e-05, + "loss": 0.9796, "step": 15101 }, { - "epoch": 0.42854710556186154, + "epoch": 0.4279520530476919, "grad_norm": 0.0, - "learning_rate": 1.2763970577433685e-05, - "loss": 0.8584, + "learning_rate": 1.2782242450880697e-05, + "loss": 0.8833, "step": 15102 }, { - "epoch": 0.4285754824063564, + "epoch": 0.4279803904899544, "grad_norm": 0.0, - "learning_rate": 1.2763087291730023e-05, - "loss": 0.9133, + "learning_rate": 1.2781360887755188e-05, + "loss": 0.9817, "step": 15103 }, { - "epoch": 0.4286038592508513, + "epoch": 0.42800872793221684, "grad_norm": 0.0, - "learning_rate": 1.276220398268644e-05, - "loss": 0.9133, + "learning_rate": 1.2780479301201243e-05, + "loss": 0.9124, "step": 15104 }, { - "epoch": 0.4286322360953462, + "epoch": 0.4280370653744793, "grad_norm": 0.0, - "learning_rate": 1.2761320650310396e-05, - "loss": 0.8242, + "learning_rate": 1.277959769122629e-05, + "loss": 0.8594, "step": 15105 }, { - "epoch": 0.4286606129398411, + "epoch": 0.42806540281674177, "grad_norm": 0.0, - "learning_rate": 1.2760437294609354e-05, - "loss": 0.8878, + "learning_rate": 1.2778716057837755e-05, + "loss": 0.884, "step": 15106 }, { - "epoch": 0.428688989784336, + "epoch": 0.4280937402590042, "grad_norm": 0.0, - "learning_rate": 1.275955391559077e-05, - "loss": 0.8464, + "learning_rate": 1.2777834401043061e-05, + "loss": 0.8769, "step": 15107 }, { - "epoch": 0.42871736662883086, + "epoch": 0.4281220777012667, "grad_norm": 0.0, - "learning_rate": 1.2758670513262113e-05, - "loss": 0.896, + "learning_rate": 1.2776952720849636e-05, + "loss": 0.9793, "step": 15108 }, { - "epoch": 0.4287457434733258, + "epoch": 0.42815041514352914, "grad_norm": 0.0, - "learning_rate": 1.2757787087630837e-05, - "loss": 0.9259, + "learning_rate": 1.2776071017264908e-05, + "loss": 1.0332, "step": 15109 }, { - "epoch": 0.42877412031782064, + "epoch": 0.42817875258579163, "grad_norm": 0.0, - "learning_rate": 1.2756903638704414e-05, - "loss": 0.8377, + "learning_rate": 1.27751892902963e-05, + "loss": 0.802, "step": 15110 }, { - "epoch": 0.42880249716231555, + "epoch": 0.42820709002805407, "grad_norm": 0.0, - "learning_rate": 1.2756020166490303e-05, - "loss": 0.9115, + "learning_rate": 1.2774307539951245e-05, + "loss": 1.01, "step": 15111 }, { - "epoch": 0.42883087400681047, + "epoch": 0.4282354274703165, "grad_norm": 0.0, - "learning_rate": 1.2755136670995965e-05, - "loss": 0.9152, + "learning_rate": 1.2773425766237167e-05, + "loss": 0.9665, "step": 15112 }, { - "epoch": 0.42885925085130533, + "epoch": 0.428263764912579, "grad_norm": 0.0, - "learning_rate": 1.2754253152228864e-05, - "loss": 0.8796, + "learning_rate": 1.2772543969161493e-05, + "loss": 0.8732, "step": 15113 }, { - "epoch": 0.42888762769580024, + "epoch": 0.42829210235484144, "grad_norm": 0.0, - "learning_rate": 1.2753369610196463e-05, - "loss": 0.9837, + "learning_rate": 1.2771662148731653e-05, + "loss": 0.9554, "step": 15114 }, { - "epoch": 0.4289160045402951, + "epoch": 0.42832043979710394, "grad_norm": 0.0, - "learning_rate": 1.2752486044906221e-05, - "loss": 0.9797, + "learning_rate": 1.2770780304955075e-05, + "loss": 0.9447, "step": 15115 }, { - "epoch": 0.42894438138479, + "epoch": 0.4283487772393664, "grad_norm": 0.0, - "learning_rate": 1.2751602456365608e-05, - "loss": 0.9336, + "learning_rate": 1.2769898437839181e-05, + "loss": 0.9251, "step": 15116 }, { - "epoch": 0.4289727582292849, + "epoch": 0.4283771146816288, "grad_norm": 0.0, - "learning_rate": 1.2750718844582087e-05, - "loss": 0.9894, + "learning_rate": 1.2769016547391405e-05, + "loss": 0.9929, "step": 15117 }, { - "epoch": 0.4290011350737798, + "epoch": 0.4284054521238913, "grad_norm": 0.0, - "learning_rate": 1.2749835209563119e-05, - "loss": 0.9704, + "learning_rate": 1.2768134633619176e-05, + "loss": 1.0217, "step": 15118 }, { - "epoch": 0.4290295119182747, + "epoch": 0.42843378956615374, "grad_norm": 0.0, - "learning_rate": 1.2748951551316168e-05, - "loss": 0.9329, + "learning_rate": 1.2767252696529922e-05, + "loss": 0.983, "step": 15119 }, { - "epoch": 0.42905788876276957, + "epoch": 0.42846212700841624, "grad_norm": 0.0, - "learning_rate": 1.2748067869848703e-05, - "loss": 0.9477, + "learning_rate": 1.2766370736131069e-05, + "loss": 0.9884, "step": 15120 }, { - "epoch": 0.4290862656072645, + "epoch": 0.4284904644506787, "grad_norm": 0.0, - "learning_rate": 1.2747184165168186e-05, - "loss": 1.0391, + "learning_rate": 1.2765488752430049e-05, + "loss": 0.8562, "step": 15121 }, { - "epoch": 0.42911464245175934, + "epoch": 0.42851880189294117, "grad_norm": 0.0, - "learning_rate": 1.2746300437282076e-05, - "loss": 0.8678, + "learning_rate": 1.2764606745434289e-05, + "loss": 0.8706, "step": 15122 }, { - "epoch": 0.42914301929625426, + "epoch": 0.4285471393352036, "grad_norm": 0.0, - "learning_rate": 1.2745416686197842e-05, - "loss": 1.0296, + "learning_rate": 1.276372471515122e-05, + "loss": 0.948, "step": 15123 }, { - "epoch": 0.4291713961407492, + "epoch": 0.42857547677746605, "grad_norm": 0.0, - "learning_rate": 1.2744532911922956e-05, - "loss": 0.8527, + "learning_rate": 1.276284266158827e-05, + "loss": 0.9292, "step": 15124 }, { - "epoch": 0.42919977298524403, + "epoch": 0.42860381421972854, "grad_norm": 0.0, - "learning_rate": 1.2743649114464873e-05, - "loss": 0.8421, + "learning_rate": 1.2761960584752874e-05, + "loss": 0.9285, "step": 15125 }, { - "epoch": 0.42922814982973895, + "epoch": 0.428632151661991, "grad_norm": 0.0, - "learning_rate": 1.2742765293831063e-05, - "loss": 0.9287, + "learning_rate": 1.2761078484652458e-05, + "loss": 0.9298, "step": 15126 }, { - "epoch": 0.4292565266742338, + "epoch": 0.4286604891042535, "grad_norm": 0.0, - "learning_rate": 1.2741881450028992e-05, - "loss": 0.9638, + "learning_rate": 1.2760196361294452e-05, + "loss": 0.8524, "step": 15127 }, { - "epoch": 0.4292849035187287, + "epoch": 0.4286888265465159, "grad_norm": 0.0, - "learning_rate": 1.2740997583066126e-05, - "loss": 0.8516, + "learning_rate": 1.2759314214686284e-05, + "loss": 0.8262, "step": 15128 }, { - "epoch": 0.42931328036322364, + "epoch": 0.42871716398877835, "grad_norm": 0.0, - "learning_rate": 1.2740113692949927e-05, - "loss": 0.9264, + "learning_rate": 1.275843204483539e-05, + "loss": 0.8167, "step": 15129 }, { - "epoch": 0.4293416572077185, + "epoch": 0.42874550143104084, "grad_norm": 0.0, - "learning_rate": 1.273922977968787e-05, - "loss": 0.7955, + "learning_rate": 1.27575498517492e-05, + "loss": 0.9428, "step": 15130 }, { - "epoch": 0.4293700340522134, + "epoch": 0.4287738388733033, "grad_norm": 0.0, - "learning_rate": 1.273834584328741e-05, - "loss": 0.975, + "learning_rate": 1.2756667635435143e-05, + "loss": 0.9072, "step": 15131 }, { - "epoch": 0.42939841089670827, + "epoch": 0.4288021763155658, "grad_norm": 0.0, - "learning_rate": 1.2737461883756021e-05, - "loss": 0.9325, + "learning_rate": 1.2755785395900651e-05, + "loss": 1.0307, "step": 15132 }, { - "epoch": 0.4294267877412032, + "epoch": 0.4288305137578282, "grad_norm": 0.0, - "learning_rate": 1.273657790110117e-05, - "loss": 0.9379, + "learning_rate": 1.2754903133153154e-05, + "loss": 0.9587, "step": 15133 }, { - "epoch": 0.42945516458569805, + "epoch": 0.4288588512000907, "grad_norm": 0.0, - "learning_rate": 1.2735693895330324e-05, - "loss": 0.8753, + "learning_rate": 1.2754020847200085e-05, + "loss": 1.009, "step": 15134 }, { - "epoch": 0.42948354143019296, + "epoch": 0.42888718864235315, "grad_norm": 0.0, - "learning_rate": 1.2734809866450944e-05, - "loss": 0.8719, + "learning_rate": 1.2753138538048878e-05, + "loss": 1.022, "step": 15135 }, { - "epoch": 0.4295119182746879, + "epoch": 0.4289155260846156, "grad_norm": 0.0, - "learning_rate": 1.2733925814470507e-05, - "loss": 0.8782, + "learning_rate": 1.2752256205706958e-05, + "loss": 0.8698, "step": 15136 }, { - "epoch": 0.42954029511918274, + "epoch": 0.4289438635268781, "grad_norm": 0.0, - "learning_rate": 1.2733041739396473e-05, - "loss": 0.8794, + "learning_rate": 1.2751373850181766e-05, + "loss": 0.9065, "step": 15137 }, { - "epoch": 0.42956867196367765, + "epoch": 0.4289722009691405, "grad_norm": 0.0, - "learning_rate": 1.2732157641236315e-05, - "loss": 0.9298, + "learning_rate": 1.2750491471480729e-05, + "loss": 0.9385, "step": 15138 }, { - "epoch": 0.4295970488081725, + "epoch": 0.429000538411403, "grad_norm": 0.0, - "learning_rate": 1.2731273519997493e-05, - "loss": 0.9064, + "learning_rate": 1.2749609069611282e-05, + "loss": 0.9365, "step": 15139 }, { - "epoch": 0.4296254256526674, + "epoch": 0.42902887585366545, "grad_norm": 0.0, - "learning_rate": 1.2730389375687487e-05, - "loss": 0.9296, + "learning_rate": 1.2748726644580856e-05, + "loss": 0.909, "step": 15140 }, { - "epoch": 0.42965380249716234, + "epoch": 0.4290572132959279, "grad_norm": 0.0, - "learning_rate": 1.2729505208313755e-05, - "loss": 0.8345, + "learning_rate": 1.2747844196396883e-05, + "loss": 0.7948, "step": 15141 }, { - "epoch": 0.4296821793416572, + "epoch": 0.4290855507381904, "grad_norm": 0.0, - "learning_rate": 1.272862101788377e-05, - "loss": 0.8709, + "learning_rate": 1.27469617250668e-05, + "loss": 0.9081, "step": 15142 }, { - "epoch": 0.4297105561861521, + "epoch": 0.4291138881804528, "grad_norm": 0.0, - "learning_rate": 1.2727736804405005e-05, - "loss": 0.8841, + "learning_rate": 1.2746079230598036e-05, + "loss": 0.8507, "step": 15143 }, { - "epoch": 0.429738933030647, + "epoch": 0.4291422256227153, "grad_norm": 0.0, - "learning_rate": 1.2726852567884922e-05, - "loss": 0.8384, + "learning_rate": 1.2745196712998032e-05, + "loss": 0.8611, "step": 15144 }, { - "epoch": 0.4297673098751419, + "epoch": 0.42917056306497775, "grad_norm": 0.0, - "learning_rate": 1.272596830833099e-05, - "loss": 0.9384, + "learning_rate": 1.274431417227421e-05, + "loss": 0.8632, "step": 15145 }, { - "epoch": 0.42979568671963675, + "epoch": 0.42919890050724024, "grad_norm": 0.0, - "learning_rate": 1.2725084025750683e-05, - "loss": 0.869, + "learning_rate": 1.2743431608434016e-05, + "loss": 0.9424, "step": 15146 }, { - "epoch": 0.42982406356413166, + "epoch": 0.4292272379495027, "grad_norm": 0.0, - "learning_rate": 1.2724199720151469e-05, - "loss": 0.9957, + "learning_rate": 1.2742549021484878e-05, + "loss": 0.7851, "step": 15147 }, { - "epoch": 0.4298524404086266, + "epoch": 0.4292555753917651, "grad_norm": 0.0, - "learning_rate": 1.272331539154082e-05, - "loss": 1.0251, + "learning_rate": 1.274166641143423e-05, + "loss": 0.9836, "step": 15148 }, { - "epoch": 0.42988081725312144, + "epoch": 0.4292839128340276, "grad_norm": 0.0, - "learning_rate": 1.27224310399262e-05, - "loss": 0.9344, + "learning_rate": 1.2740783778289507e-05, + "loss": 0.8689, "step": 15149 }, { - "epoch": 0.42990919409761635, + "epoch": 0.42931225027629005, "grad_norm": 0.0, - "learning_rate": 1.2721546665315086e-05, - "loss": 0.8828, + "learning_rate": 1.2739901122058145e-05, + "loss": 0.8784, "step": 15150 }, { - "epoch": 0.4299375709421112, + "epoch": 0.42934058771855255, "grad_norm": 0.0, - "learning_rate": 1.2720662267714942e-05, - "loss": 0.9891, + "learning_rate": 1.273901844274758e-05, + "loss": 0.9137, "step": 15151 }, { - "epoch": 0.42996594778660613, + "epoch": 0.429368925160815, "grad_norm": 0.0, - "learning_rate": 1.2719777847133241e-05, - "loss": 0.9352, + "learning_rate": 1.2738135740365243e-05, + "loss": 0.9659, "step": 15152 }, { - "epoch": 0.42999432463110104, + "epoch": 0.4293972626030774, "grad_norm": 0.0, - "learning_rate": 1.2718893403577457e-05, - "loss": 0.8085, + "learning_rate": 1.2737253014918573e-05, + "loss": 0.9537, "step": 15153 }, { - "epoch": 0.4300227014755959, + "epoch": 0.4294256000453399, "grad_norm": 0.0, - "learning_rate": 1.2718008937055056e-05, - "loss": 0.9807, + "learning_rate": 1.2736370266415006e-05, + "loss": 0.9402, "step": 15154 }, { - "epoch": 0.4300510783200908, + "epoch": 0.42945393748760236, "grad_norm": 0.0, - "learning_rate": 1.2717124447573515e-05, - "loss": 0.9476, + "learning_rate": 1.2735487494861975e-05, + "loss": 0.9139, "step": 15155 }, { - "epoch": 0.4300794551645857, + "epoch": 0.42948227492986485, "grad_norm": 0.0, - "learning_rate": 1.2716239935140295e-05, - "loss": 0.8804, + "learning_rate": 1.2734604700266914e-05, + "loss": 0.9717, "step": 15156 }, { - "epoch": 0.4301078320090806, + "epoch": 0.4295106123721273, "grad_norm": 0.0, - "learning_rate": 1.2715355399762878e-05, - "loss": 0.8659, + "learning_rate": 1.2733721882637265e-05, + "loss": 1.0779, "step": 15157 }, { - "epoch": 0.4301362088535755, + "epoch": 0.4295389498143897, "grad_norm": 0.0, - "learning_rate": 1.2714470841448733e-05, - "loss": 0.9486, + "learning_rate": 1.2732839041980463e-05, + "loss": 0.9425, "step": 15158 }, { - "epoch": 0.43016458569807037, + "epoch": 0.4295672872566522, "grad_norm": 0.0, - "learning_rate": 1.2713586260205331e-05, - "loss": 0.8022, + "learning_rate": 1.2731956178303941e-05, + "loss": 0.8367, "step": 15159 }, { - "epoch": 0.4301929625425653, + "epoch": 0.42959562469891466, "grad_norm": 0.0, - "learning_rate": 1.2712701656040142e-05, - "loss": 0.8016, + "learning_rate": 1.273107329161514e-05, + "loss": 0.9494, "step": 15160 }, { - "epoch": 0.43022133938706014, + "epoch": 0.42962396214117715, "grad_norm": 0.0, - "learning_rate": 1.2711817028960642e-05, - "loss": 0.8881, + "learning_rate": 1.2730190381921492e-05, + "loss": 0.9234, "step": 15161 }, { - "epoch": 0.43024971623155506, + "epoch": 0.4296522995834396, "grad_norm": 0.0, - "learning_rate": 1.2710932378974299e-05, - "loss": 0.9162, + "learning_rate": 1.2729307449230435e-05, + "loss": 0.854, "step": 15162 }, { - "epoch": 0.4302780930760499, + "epoch": 0.4296806370257021, "grad_norm": 0.0, - "learning_rate": 1.2710047706088588e-05, - "loss": 0.9064, + "learning_rate": 1.2728424493549409e-05, + "loss": 1.0057, "step": 15163 }, { - "epoch": 0.43030646992054483, + "epoch": 0.4297089744679645, "grad_norm": 0.0, - "learning_rate": 1.2709163010310985e-05, - "loss": 0.8299, + "learning_rate": 1.2727541514885853e-05, + "loss": 0.7941, "step": 15164 }, { - "epoch": 0.43033484676503975, + "epoch": 0.42973731191022696, "grad_norm": 0.0, - "learning_rate": 1.2708278291648961e-05, - "loss": 0.8816, + "learning_rate": 1.2726658513247202e-05, + "loss": 0.8622, "step": 15165 }, { - "epoch": 0.4303632236095346, + "epoch": 0.42976564935248945, "grad_norm": 0.0, - "learning_rate": 1.2707393550109985e-05, - "loss": 1.0359, + "learning_rate": 1.2725775488640887e-05, + "loss": 0.9827, "step": 15166 }, { - "epoch": 0.4303916004540295, + "epoch": 0.4297939867947519, "grad_norm": 0.0, - "learning_rate": 1.2706508785701537e-05, - "loss": 0.9791, + "learning_rate": 1.272489244107436e-05, + "loss": 0.9236, "step": 15167 }, { - "epoch": 0.4304199772985244, + "epoch": 0.4298223242370144, "grad_norm": 0.0, - "learning_rate": 1.2705623998431086e-05, - "loss": 0.8894, + "learning_rate": 1.2724009370555051e-05, + "loss": 0.9212, "step": 15168 }, { - "epoch": 0.4304483541430193, + "epoch": 0.4298506616792768, "grad_norm": 0.0, - "learning_rate": 1.2704739188306107e-05, - "loss": 0.8797, + "learning_rate": 1.2723126277090396e-05, + "loss": 1.048, "step": 15169 }, { - "epoch": 0.4304767309875142, + "epoch": 0.42987899912153926, "grad_norm": 0.0, - "learning_rate": 1.2703854355334073e-05, - "loss": 1.0231, + "learning_rate": 1.272224316068784e-05, + "loss": 0.8835, "step": 15170 }, { - "epoch": 0.43050510783200907, + "epoch": 0.42990733656380176, "grad_norm": 0.0, - "learning_rate": 1.2702969499522463e-05, - "loss": 0.8755, + "learning_rate": 1.2721360021354817e-05, + "loss": 0.89, "step": 15171 }, { - "epoch": 0.430533484676504, + "epoch": 0.4299356740060642, "grad_norm": 0.0, - "learning_rate": 1.270208462087875e-05, - "loss": 0.7999, + "learning_rate": 1.2720476859098771e-05, + "loss": 0.9912, "step": 15172 }, { - "epoch": 0.43056186152099885, + "epoch": 0.4299640114483267, "grad_norm": 0.0, - "learning_rate": 1.2701199719410402e-05, - "loss": 0.9766, + "learning_rate": 1.271959367392714e-05, + "loss": 0.9101, "step": 15173 }, { - "epoch": 0.43059023836549376, + "epoch": 0.4299923488905891, "grad_norm": 0.0, - "learning_rate": 1.2700314795124902e-05, - "loss": 0.9756, + "learning_rate": 1.2718710465847355e-05, + "loss": 1.0406, "step": 15174 }, { - "epoch": 0.4306186152099887, + "epoch": 0.4300206863328516, "grad_norm": 0.0, - "learning_rate": 1.269942984802972e-05, - "loss": 0.8892, + "learning_rate": 1.2717827234866867e-05, + "loss": 0.9995, "step": 15175 }, { - "epoch": 0.43064699205448354, + "epoch": 0.43004902377511406, "grad_norm": 0.0, - "learning_rate": 1.269854487813233e-05, - "loss": 0.9624, + "learning_rate": 1.2716943980993108e-05, + "loss": 0.7602, "step": 15176 }, { - "epoch": 0.43067536889897845, + "epoch": 0.4300773612173765, "grad_norm": 0.0, - "learning_rate": 1.2697659885440213e-05, - "loss": 0.9781, + "learning_rate": 1.2716060704233523e-05, + "loss": 0.8528, "step": 15177 }, { - "epoch": 0.4307037457434733, + "epoch": 0.430105698659639, "grad_norm": 0.0, - "learning_rate": 1.2696774869960842e-05, - "loss": 0.9974, + "learning_rate": 1.2715177404595548e-05, + "loss": 0.872, "step": 15178 }, { - "epoch": 0.4307321225879682, + "epoch": 0.43013403610190143, "grad_norm": 0.0, - "learning_rate": 1.2695889831701691e-05, - "loss": 0.9667, + "learning_rate": 1.2714294082086628e-05, + "loss": 0.9291, "step": 15179 }, { - "epoch": 0.4307604994324631, + "epoch": 0.4301623735441639, "grad_norm": 0.0, - "learning_rate": 1.2695004770670239e-05, - "loss": 0.9668, + "learning_rate": 1.2713410736714202e-05, + "loss": 0.8966, "step": 15180 }, { - "epoch": 0.430788876276958, + "epoch": 0.43019071098642636, "grad_norm": 0.0, - "learning_rate": 1.269411968687396e-05, - "loss": 0.8507, + "learning_rate": 1.2712527368485708e-05, + "loss": 0.8771, "step": 15181 }, { - "epoch": 0.4308172531214529, + "epoch": 0.4302190484286888, "grad_norm": 0.0, - "learning_rate": 1.2693234580320333e-05, - "loss": 0.9255, + "learning_rate": 1.2711643977408587e-05, + "loss": 0.8781, "step": 15182 }, { - "epoch": 0.4308456299659478, + "epoch": 0.4302473858709513, "grad_norm": 0.0, - "learning_rate": 1.2692349451016828e-05, - "loss": 0.8994, + "learning_rate": 1.271076056349028e-05, + "loss": 0.8566, "step": 15183 }, { - "epoch": 0.4308740068104427, + "epoch": 0.43027572331321373, "grad_norm": 0.0, - "learning_rate": 1.2691464298970928e-05, - "loss": 0.9792, + "learning_rate": 1.2709877126738235e-05, + "loss": 0.9824, "step": 15184 }, { - "epoch": 0.43090238365493755, + "epoch": 0.4303040607554762, "grad_norm": 0.0, - "learning_rate": 1.2690579124190108e-05, - "loss": 0.9399, + "learning_rate": 1.2708993667159887e-05, + "loss": 0.9946, "step": 15185 }, { - "epoch": 0.43093076049943246, + "epoch": 0.43033239819773866, "grad_norm": 0.0, - "learning_rate": 1.2689693926681845e-05, - "loss": 0.8142, + "learning_rate": 1.2708110184762684e-05, + "loss": 0.9818, "step": 15186 }, { - "epoch": 0.4309591373439274, + "epoch": 0.43036073564000116, "grad_norm": 0.0, - "learning_rate": 1.2688808706453616e-05, - "loss": 0.8658, + "learning_rate": 1.2707226679554054e-05, + "loss": 0.9394, "step": 15187 }, { - "epoch": 0.43098751418842224, + "epoch": 0.4303890730822636, "grad_norm": 0.0, - "learning_rate": 1.26879234635129e-05, - "loss": 0.8564, + "learning_rate": 1.2706343151541457e-05, + "loss": 1.0038, "step": 15188 }, { - "epoch": 0.43101589103291715, + "epoch": 0.43041741052452603, "grad_norm": 0.0, - "learning_rate": 1.2687038197867174e-05, - "loss": 0.9475, + "learning_rate": 1.2705459600732319e-05, + "loss": 0.9022, "step": 15189 }, { - "epoch": 0.431044267877412, + "epoch": 0.43044574796678853, "grad_norm": 0.0, - "learning_rate": 1.2686152909523917e-05, - "loss": 0.8423, + "learning_rate": 1.2704576027134095e-05, + "loss": 0.8275, "step": 15190 }, { - "epoch": 0.43107264472190693, + "epoch": 0.43047408540905097, "grad_norm": 0.0, - "learning_rate": 1.2685267598490606e-05, - "loss": 1.0438, + "learning_rate": 1.2703692430754223e-05, + "loss": 0.8125, "step": 15191 }, { - "epoch": 0.43110102156640184, + "epoch": 0.43050242285131346, "grad_norm": 0.0, - "learning_rate": 1.2684382264774713e-05, - "loss": 0.9422, + "learning_rate": 1.2702808811600144e-05, + "loss": 0.9633, "step": 15192 }, { - "epoch": 0.4311293984108967, + "epoch": 0.4305307602935759, "grad_norm": 0.0, - "learning_rate": 1.2683496908383725e-05, - "loss": 0.8582, + "learning_rate": 1.2701925169679303e-05, + "loss": 0.9159, "step": 15193 }, { - "epoch": 0.4311577752553916, + "epoch": 0.43055909773583834, "grad_norm": 0.0, - "learning_rate": 1.268261152932512e-05, - "loss": 0.9049, + "learning_rate": 1.2701041504999144e-05, + "loss": 0.9879, "step": 15194 }, { - "epoch": 0.4311861520998865, + "epoch": 0.43058743517810083, "grad_norm": 0.0, - "learning_rate": 1.2681726127606374e-05, - "loss": 0.8903, + "learning_rate": 1.2700157817567105e-05, + "loss": 0.8629, "step": 15195 }, { - "epoch": 0.4312145289443814, + "epoch": 0.43061577262036327, "grad_norm": 0.0, - "learning_rate": 1.2680840703234968e-05, - "loss": 1.0267, + "learning_rate": 1.2699274107390638e-05, + "loss": 0.9197, "step": 15196 }, { - "epoch": 0.43124290578887625, + "epoch": 0.43064411006262576, "grad_norm": 0.0, - "learning_rate": 1.2679955256218379e-05, - "loss": 0.7664, + "learning_rate": 1.2698390374477186e-05, + "loss": 0.9184, "step": 15197 }, { - "epoch": 0.43127128263337117, + "epoch": 0.4306724475048882, "grad_norm": 0.0, - "learning_rate": 1.2679069786564089e-05, - "loss": 0.9212, + "learning_rate": 1.2697506618834185e-05, + "loss": 0.8672, "step": 15198 }, { - "epoch": 0.4312996594778661, + "epoch": 0.4307007849471507, "grad_norm": 0.0, - "learning_rate": 1.2678184294279573e-05, - "loss": 0.881, + "learning_rate": 1.2696622840469084e-05, + "loss": 0.9158, "step": 15199 }, { - "epoch": 0.43132803632236094, + "epoch": 0.43072912238941313, "grad_norm": 0.0, - "learning_rate": 1.2677298779372314e-05, - "loss": 0.9928, + "learning_rate": 1.269573903938933e-05, + "loss": 0.9047, "step": 15200 }, { - "epoch": 0.43135641316685586, + "epoch": 0.43075745983167557, "grad_norm": 0.0, - "learning_rate": 1.2676413241849792e-05, - "loss": 0.9744, + "learning_rate": 1.2694855215602362e-05, + "loss": 0.8201, "step": 15201 }, { - "epoch": 0.4313847900113507, + "epoch": 0.43078579727393806, "grad_norm": 0.0, - "learning_rate": 1.267552768171949e-05, - "loss": 0.8987, + "learning_rate": 1.269397136911563e-05, + "loss": 0.9418, "step": 15202 }, { - "epoch": 0.43141316685584563, + "epoch": 0.4308141347162005, "grad_norm": 0.0, - "learning_rate": 1.2674642098988884e-05, - "loss": 0.8741, + "learning_rate": 1.2693087499936575e-05, + "loss": 0.8763, "step": 15203 }, { - "epoch": 0.43144154370034055, + "epoch": 0.430842472158463, "grad_norm": 0.0, - "learning_rate": 1.2673756493665454e-05, - "loss": 0.8733, + "learning_rate": 1.2692203608072646e-05, + "loss": 0.944, "step": 15204 }, { - "epoch": 0.4314699205448354, + "epoch": 0.43087080960072544, "grad_norm": 0.0, - "learning_rate": 1.2672870865756685e-05, - "loss": 0.8263, + "learning_rate": 1.2691319693531287e-05, + "loss": 0.8328, "step": 15205 }, { - "epoch": 0.4314982973893303, + "epoch": 0.4308991470429879, "grad_norm": 0.0, - "learning_rate": 1.2671985215270055e-05, - "loss": 0.9401, + "learning_rate": 1.269043575631994e-05, + "loss": 0.9497, "step": 15206 }, { - "epoch": 0.4315266742338252, + "epoch": 0.43092748448525037, "grad_norm": 0.0, - "learning_rate": 1.2671099542213046e-05, - "loss": 0.8361, + "learning_rate": 1.2689551796446057e-05, + "loss": 0.8818, "step": 15207 }, { - "epoch": 0.4315550510783201, + "epoch": 0.4309558219275128, "grad_norm": 0.0, - "learning_rate": 1.2670213846593137e-05, - "loss": 0.9289, + "learning_rate": 1.2688667813917075e-05, + "loss": 0.9961, "step": 15208 }, { - "epoch": 0.431583427922815, + "epoch": 0.4309841593697753, "grad_norm": 0.0, - "learning_rate": 1.2669328128417815e-05, - "loss": 0.8983, + "learning_rate": 1.268778380874045e-05, + "loss": 0.8893, "step": 15209 }, { - "epoch": 0.43161180476730987, + "epoch": 0.43101249681203774, "grad_norm": 0.0, - "learning_rate": 1.2668442387694555e-05, - "loss": 0.8498, + "learning_rate": 1.2686899780923624e-05, + "loss": 0.8859, "step": 15210 }, { - "epoch": 0.4316401816118048, + "epoch": 0.43104083425430023, "grad_norm": 0.0, - "learning_rate": 1.2667556624430844e-05, - "loss": 0.8774, + "learning_rate": 1.2686015730474042e-05, + "loss": 0.9131, "step": 15211 }, { - "epoch": 0.43166855845629964, + "epoch": 0.43106917169656267, "grad_norm": 0.0, - "learning_rate": 1.2666670838634163e-05, - "loss": 0.9172, + "learning_rate": 1.2685131657399153e-05, + "loss": 0.8472, "step": 15212 }, { - "epoch": 0.43169693530079456, + "epoch": 0.4310975091388251, "grad_norm": 0.0, - "learning_rate": 1.2665785030311993e-05, - "loss": 0.8419, + "learning_rate": 1.2684247561706402e-05, + "loss": 0.886, "step": 15213 }, { - "epoch": 0.4317253121452894, + "epoch": 0.4311258465810876, "grad_norm": 0.0, - "learning_rate": 1.2664899199471816e-05, - "loss": 1.0169, + "learning_rate": 1.2683363443403235e-05, + "loss": 0.9239, "step": 15214 }, { - "epoch": 0.43175368898978433, + "epoch": 0.43115418402335004, "grad_norm": 0.0, - "learning_rate": 1.2664013346121115e-05, - "loss": 0.9366, + "learning_rate": 1.2682479302497106e-05, + "loss": 0.9348, "step": 15215 }, { - "epoch": 0.43178206583427925, + "epoch": 0.43118252146561253, "grad_norm": 0.0, - "learning_rate": 1.2663127470267376e-05, - "loss": 0.9688, + "learning_rate": 1.2681595138995456e-05, + "loss": 0.8823, "step": 15216 }, { - "epoch": 0.4318104426787741, + "epoch": 0.43121085890787497, "grad_norm": 0.0, - "learning_rate": 1.2662241571918077e-05, - "loss": 0.8565, + "learning_rate": 1.2680710952905733e-05, + "loss": 1.028, "step": 15217 }, { - "epoch": 0.431838819523269, + "epoch": 0.4312391963501374, "grad_norm": 0.0, - "learning_rate": 1.2661355651080707e-05, - "loss": 0.859, + "learning_rate": 1.2679826744235388e-05, + "loss": 0.9538, "step": 15218 }, { - "epoch": 0.4318671963677639, + "epoch": 0.4312675337923999, "grad_norm": 0.0, - "learning_rate": 1.2660469707762744e-05, - "loss": 0.8287, + "learning_rate": 1.2678942512991865e-05, + "loss": 0.9286, "step": 15219 }, { - "epoch": 0.4318955732122588, + "epoch": 0.43129587123466234, "grad_norm": 0.0, - "learning_rate": 1.2659583741971675e-05, - "loss": 0.8639, + "learning_rate": 1.2678058259182615e-05, + "loss": 0.9234, "step": 15220 }, { - "epoch": 0.4319239500567537, + "epoch": 0.43132420867692484, "grad_norm": 0.0, - "learning_rate": 1.2658697753714982e-05, - "loss": 0.8411, + "learning_rate": 1.2677173982815086e-05, + "loss": 0.9665, "step": 15221 }, { - "epoch": 0.4319523269012486, + "epoch": 0.4313525461191873, "grad_norm": 0.0, - "learning_rate": 1.265781174300015e-05, - "loss": 0.8886, + "learning_rate": 1.2676289683896727e-05, + "loss": 0.9443, "step": 15222 }, { - "epoch": 0.4319807037457435, + "epoch": 0.43138088356144977, "grad_norm": 0.0, - "learning_rate": 1.265692570983466e-05, - "loss": 0.9564, + "learning_rate": 1.2675405362434987e-05, + "loss": 0.9467, "step": 15223 }, { - "epoch": 0.43200908059023835, + "epoch": 0.4314092210037122, "grad_norm": 0.0, - "learning_rate": 1.2656039654226e-05, - "loss": 0.9222, + "learning_rate": 1.2674521018437311e-05, + "loss": 0.8233, "step": 15224 }, { - "epoch": 0.43203745743473326, + "epoch": 0.43143755844597464, "grad_norm": 0.0, - "learning_rate": 1.2655153576181655e-05, - "loss": 0.9562, + "learning_rate": 1.2673636651911154e-05, + "loss": 0.9021, "step": 15225 }, { - "epoch": 0.4320658342792281, + "epoch": 0.43146589588823714, "grad_norm": 0.0, - "learning_rate": 1.265426747570911e-05, - "loss": 0.9392, + "learning_rate": 1.2672752262863963e-05, + "loss": 0.9919, "step": 15226 }, { - "epoch": 0.43209421112372304, + "epoch": 0.4314942333304996, "grad_norm": 0.0, - "learning_rate": 1.2653381352815846e-05, - "loss": 0.9077, + "learning_rate": 1.2671867851303185e-05, + "loss": 0.9814, "step": 15227 }, { - "epoch": 0.43212258796821795, + "epoch": 0.43152257077276207, "grad_norm": 0.0, - "learning_rate": 1.265249520750935e-05, - "loss": 0.8784, + "learning_rate": 1.2670983417236271e-05, + "loss": 0.9169, "step": 15228 }, { - "epoch": 0.4321509648127128, + "epoch": 0.4315509082150245, "grad_norm": 0.0, - "learning_rate": 1.265160903979711e-05, - "loss": 0.9927, + "learning_rate": 1.2670098960670676e-05, + "loss": 0.9569, "step": 15229 }, { - "epoch": 0.4321793416572077, + "epoch": 0.43157924565728695, "grad_norm": 0.0, - "learning_rate": 1.2650722849686607e-05, - "loss": 0.9007, + "learning_rate": 1.2669214481613846e-05, + "loss": 0.8642, "step": 15230 }, { - "epoch": 0.4322077185017026, + "epoch": 0.43160758309954944, "grad_norm": 0.0, - "learning_rate": 1.264983663718533e-05, - "loss": 0.8393, + "learning_rate": 1.2668329980073229e-05, + "loss": 0.8379, "step": 15231 }, { - "epoch": 0.4322360953461975, + "epoch": 0.4316359205418119, "grad_norm": 0.0, - "learning_rate": 1.2648950402300763e-05, - "loss": 1.0748, + "learning_rate": 1.2667445456056276e-05, + "loss": 0.8932, "step": 15232 }, { - "epoch": 0.4322644721906924, + "epoch": 0.4316642579840744, "grad_norm": 0.0, - "learning_rate": 1.2648064145040392e-05, - "loss": 0.9157, + "learning_rate": 1.2666560909570442e-05, + "loss": 0.9057, "step": 15233 }, { - "epoch": 0.4322928490351873, + "epoch": 0.4316925954263368, "grad_norm": 0.0, - "learning_rate": 1.2647177865411706e-05, - "loss": 0.9625, + "learning_rate": 1.2665676340623172e-05, + "loss": 0.8628, "step": 15234 }, { - "epoch": 0.4323212258796822, + "epoch": 0.4317209328685993, "grad_norm": 0.0, - "learning_rate": 1.2646291563422191e-05, - "loss": 0.9468, + "learning_rate": 1.2664791749221923e-05, + "loss": 0.8203, "step": 15235 }, { - "epoch": 0.43234960272417705, + "epoch": 0.43174927031086174, "grad_norm": 0.0, - "learning_rate": 1.264540523907933e-05, - "loss": 0.988, + "learning_rate": 1.2663907135374142e-05, + "loss": 0.9889, "step": 15236 }, { - "epoch": 0.43237797956867197, + "epoch": 0.4317776077531242, "grad_norm": 0.0, - "learning_rate": 1.2644518892390611e-05, - "loss": 1.0238, + "learning_rate": 1.2663022499087285e-05, + "loss": 0.9354, "step": 15237 }, { - "epoch": 0.4324063564131669, + "epoch": 0.4318059451953867, "grad_norm": 0.0, - "learning_rate": 1.2643632523363523e-05, - "loss": 0.8789, + "learning_rate": 1.26621378403688e-05, + "loss": 0.9871, "step": 15238 }, { - "epoch": 0.43243473325766174, + "epoch": 0.4318342826376491, "grad_norm": 0.0, - "learning_rate": 1.2642746132005552e-05, - "loss": 0.9899, + "learning_rate": 1.266125315922614e-05, + "loss": 0.9005, "step": 15239 }, { - "epoch": 0.43246311010215666, + "epoch": 0.4318626200799116, "grad_norm": 0.0, - "learning_rate": 1.2641859718324185e-05, - "loss": 0.9152, + "learning_rate": 1.2660368455666752e-05, + "loss": 0.9861, "step": 15240 }, { - "epoch": 0.4324914869466515, + "epoch": 0.43189095752217405, "grad_norm": 0.0, - "learning_rate": 1.2640973282326914e-05, - "loss": 0.8838, + "learning_rate": 1.2659483729698094e-05, + "loss": 0.9912, "step": 15241 }, { - "epoch": 0.43251986379114643, + "epoch": 0.4319192949644365, "grad_norm": 0.0, - "learning_rate": 1.264008682402122e-05, - "loss": 0.9015, + "learning_rate": 1.265859898132762e-05, + "loss": 0.8273, "step": 15242 }, { - "epoch": 0.4325482406356413, + "epoch": 0.431947632406699, "grad_norm": 0.0, - "learning_rate": 1.2639200343414593e-05, - "loss": 0.9586, + "learning_rate": 1.265771421056278e-05, + "loss": 0.9474, "step": 15243 }, { - "epoch": 0.4325766174801362, + "epoch": 0.4319759698489614, "grad_norm": 0.0, - "learning_rate": 1.2638313840514525e-05, - "loss": 0.8608, + "learning_rate": 1.2656829417411023e-05, + "loss": 0.8564, "step": 15244 }, { - "epoch": 0.4326049943246311, + "epoch": 0.4320043072912239, "grad_norm": 0.0, - "learning_rate": 1.2637427315328502e-05, - "loss": 0.898, + "learning_rate": 1.2655944601879805e-05, + "loss": 0.9165, "step": 15245 }, { - "epoch": 0.432633371169126, + "epoch": 0.43203264473348635, "grad_norm": 0.0, - "learning_rate": 1.2636540767864008e-05, - "loss": 0.9783, + "learning_rate": 1.265505976397658e-05, + "loss": 0.8515, "step": 15246 }, { - "epoch": 0.4326617480136209, + "epoch": 0.43206098217574884, "grad_norm": 0.0, - "learning_rate": 1.2635654198128536e-05, - "loss": 0.9141, + "learning_rate": 1.2654174903708803e-05, + "loss": 0.9607, "step": 15247 }, { - "epoch": 0.43269012485811575, + "epoch": 0.4320893196180113, "grad_norm": 0.0, - "learning_rate": 1.2634767606129577e-05, - "loss": 1.0524, + "learning_rate": 1.2653290021083925e-05, + "loss": 0.9034, "step": 15248 }, { - "epoch": 0.43271850170261067, + "epoch": 0.4321176570602737, "grad_norm": 0.0, - "learning_rate": 1.2633880991874617e-05, - "loss": 0.8062, + "learning_rate": 1.2652405116109394e-05, + "loss": 0.9803, "step": 15249 }, { - "epoch": 0.4327468785471056, + "epoch": 0.4321459945025362, "grad_norm": 0.0, - "learning_rate": 1.2632994355371144e-05, - "loss": 0.7749, + "learning_rate": 1.2651520188792677e-05, + "loss": 0.9012, "step": 15250 }, { - "epoch": 0.43277525539160044, + "epoch": 0.43217433194479865, "grad_norm": 0.0, - "learning_rate": 1.2632107696626652e-05, - "loss": 0.9547, + "learning_rate": 1.2650635239141217e-05, + "loss": 0.9844, "step": 15251 }, { - "epoch": 0.43280363223609536, + "epoch": 0.43220266938706114, "grad_norm": 0.0, - "learning_rate": 1.2631221015648626e-05, - "loss": 0.99, + "learning_rate": 1.2649750267162474e-05, + "loss": 0.9809, "step": 15252 }, { - "epoch": 0.4328320090805902, + "epoch": 0.4322310068293236, "grad_norm": 0.0, - "learning_rate": 1.2630334312444557e-05, - "loss": 0.912, + "learning_rate": 1.26488652728639e-05, + "loss": 0.9106, "step": 15253 }, { - "epoch": 0.43286038592508513, + "epoch": 0.432259344271586, "grad_norm": 0.0, - "learning_rate": 1.2629447587021937e-05, - "loss": 0.867, + "learning_rate": 1.2647980256252947e-05, + "loss": 1.0056, "step": 15254 }, { - "epoch": 0.43288876276958005, + "epoch": 0.4322876817138485, "grad_norm": 0.0, - "learning_rate": 1.2628560839388255e-05, - "loss": 1.0072, + "learning_rate": 1.2647095217337078e-05, + "loss": 0.812, "step": 15255 }, { - "epoch": 0.4329171396140749, + "epoch": 0.43231601915611095, "grad_norm": 0.0, - "learning_rate": 1.2627674069551003e-05, - "loss": 0.9211, + "learning_rate": 1.2646210156123742e-05, + "loss": 0.9457, "step": 15256 }, { - "epoch": 0.4329455164585698, + "epoch": 0.43234435659837345, "grad_norm": 0.0, - "learning_rate": 1.2626787277517667e-05, - "loss": 0.8074, + "learning_rate": 1.264532507262039e-05, + "loss": 0.9421, "step": 15257 }, { - "epoch": 0.4329738933030647, + "epoch": 0.4323726940406359, "grad_norm": 0.0, - "learning_rate": 1.2625900463295743e-05, - "loss": 0.7344, + "learning_rate": 1.264443996683449e-05, + "loss": 0.8821, "step": 15258 }, { - "epoch": 0.4330022701475596, + "epoch": 0.4324010314828984, "grad_norm": 0.0, - "learning_rate": 1.2625013626892719e-05, - "loss": 0.8412, + "learning_rate": 1.2643554838773486e-05, + "loss": 0.9589, "step": 15259 }, { - "epoch": 0.43303064699205446, + "epoch": 0.4324293689251608, "grad_norm": 0.0, - "learning_rate": 1.2624126768316088e-05, - "loss": 0.9353, + "learning_rate": 1.2642669688444837e-05, + "loss": 0.922, "step": 15260 }, { - "epoch": 0.4330590238365494, + "epoch": 0.43245770636742326, "grad_norm": 0.0, - "learning_rate": 1.2623239887573338e-05, - "loss": 0.9255, + "learning_rate": 1.2641784515856002e-05, + "loss": 0.8501, "step": 15261 }, { - "epoch": 0.4330874006810443, + "epoch": 0.43248604380968575, "grad_norm": 0.0, - "learning_rate": 1.2622352984671964e-05, - "loss": 0.9831, + "learning_rate": 1.2640899321014435e-05, + "loss": 0.9033, "step": 15262 }, { - "epoch": 0.43311577752553915, + "epoch": 0.4325143812519482, "grad_norm": 0.0, - "learning_rate": 1.2621466059619459e-05, - "loss": 0.9215, + "learning_rate": 1.2640014103927594e-05, + "loss": 0.9571, "step": 15263 }, { - "epoch": 0.43314415437003406, + "epoch": 0.4325427186942107, "grad_norm": 0.0, - "learning_rate": 1.2620579112423308e-05, - "loss": 0.8963, + "learning_rate": 1.2639128864602932e-05, + "loss": 0.8204, "step": 15264 }, { - "epoch": 0.4331725312145289, + "epoch": 0.4325710561364731, "grad_norm": 0.0, - "learning_rate": 1.2619692143091009e-05, - "loss": 1.0117, + "learning_rate": 1.2638243603047907e-05, + "loss": 0.8857, "step": 15265 }, { - "epoch": 0.43320090805902384, + "epoch": 0.43259939357873556, "grad_norm": 0.0, - "learning_rate": 1.2618805151630054e-05, - "loss": 0.9148, + "learning_rate": 1.2637358319269976e-05, + "loss": 0.8686, "step": 15266 }, { - "epoch": 0.43322928490351875, + "epoch": 0.43262773102099805, "grad_norm": 0.0, - "learning_rate": 1.2617918138047931e-05, - "loss": 1.0457, + "learning_rate": 1.2636473013276596e-05, + "loss": 0.9536, "step": 15267 }, { - "epoch": 0.4332576617480136, + "epoch": 0.4326560684632605, "grad_norm": 0.0, - "learning_rate": 1.261703110235214e-05, - "loss": 1.0, + "learning_rate": 1.2635587685075227e-05, + "loss": 0.8722, "step": 15268 }, { - "epoch": 0.4332860385925085, + "epoch": 0.432684405905523, "grad_norm": 0.0, - "learning_rate": 1.2616144044550167e-05, - "loss": 0.8303, + "learning_rate": 1.263470233467332e-05, + "loss": 0.8689, "step": 15269 }, { - "epoch": 0.4333144154370034, + "epoch": 0.4327127433477854, "grad_norm": 0.0, - "learning_rate": 1.2615256964649509e-05, - "loss": 0.8666, + "learning_rate": 1.2633816962078342e-05, + "loss": 0.9085, "step": 15270 }, { - "epoch": 0.4333427922814983, + "epoch": 0.4327410807900479, "grad_norm": 0.0, - "learning_rate": 1.2614369862657657e-05, - "loss": 0.9876, + "learning_rate": 1.2632931567297745e-05, + "loss": 0.9559, "step": 15271 }, { - "epoch": 0.4333711691259932, + "epoch": 0.43276941823231035, "grad_norm": 0.0, - "learning_rate": 1.2613482738582104e-05, - "loss": 0.8499, + "learning_rate": 1.2632046150338988e-05, + "loss": 0.9333, "step": 15272 }, { - "epoch": 0.4333995459704881, + "epoch": 0.4327977556745728, "grad_norm": 0.0, - "learning_rate": 1.2612595592430347e-05, - "loss": 0.9307, + "learning_rate": 1.2631160711209528e-05, + "loss": 0.77, "step": 15273 }, { - "epoch": 0.433427922814983, + "epoch": 0.4328260931168353, "grad_norm": 0.0, - "learning_rate": 1.2611708424209877e-05, - "loss": 0.9146, + "learning_rate": 1.2630275249916822e-05, + "loss": 0.9364, "step": 15274 }, { - "epoch": 0.43345629965947785, + "epoch": 0.4328544305590977, "grad_norm": 0.0, - "learning_rate": 1.2610821233928188e-05, - "loss": 0.9798, + "learning_rate": 1.2629389766468331e-05, + "loss": 0.8314, "step": 15275 }, { - "epoch": 0.43348467650397277, + "epoch": 0.4328827680013602, "grad_norm": 0.0, - "learning_rate": 1.2609934021592776e-05, - "loss": 0.9216, + "learning_rate": 1.2628504260871517e-05, + "loss": 0.9476, "step": 15276 }, { - "epoch": 0.4335130533484676, + "epoch": 0.43291110544362266, "grad_norm": 0.0, - "learning_rate": 1.2609046787211132e-05, - "loss": 0.8959, + "learning_rate": 1.2627618733133835e-05, + "loss": 0.8926, "step": 15277 }, { - "epoch": 0.43354143019296254, + "epoch": 0.4329394428858851, "grad_norm": 0.0, - "learning_rate": 1.260815953079075e-05, - "loss": 0.9581, + "learning_rate": 1.2626733183262743e-05, + "loss": 0.9657, "step": 15278 }, { - "epoch": 0.43356980703745746, + "epoch": 0.4329677803281476, "grad_norm": 0.0, - "learning_rate": 1.2607272252339133e-05, - "loss": 0.9691, + "learning_rate": 1.2625847611265703e-05, + "loss": 0.954, "step": 15279 }, { - "epoch": 0.4335981838819523, + "epoch": 0.43299611777041, "grad_norm": 0.0, - "learning_rate": 1.260638495186377e-05, - "loss": 1.1309, + "learning_rate": 1.262496201715017e-05, + "loss": 0.8195, "step": 15280 }, { - "epoch": 0.43362656072644723, + "epoch": 0.4330244552126725, "grad_norm": 0.0, - "learning_rate": 1.2605497629372153e-05, - "loss": 0.9077, + "learning_rate": 1.262407640092361e-05, + "loss": 1.0616, "step": 15281 }, { - "epoch": 0.4336549375709421, + "epoch": 0.43305279265493496, "grad_norm": 0.0, - "learning_rate": 1.2604610284871782e-05, - "loss": 0.8498, + "learning_rate": 1.262319076259348e-05, + "loss": 0.9384, "step": 15282 }, { - "epoch": 0.433683314415437, + "epoch": 0.43308113009719745, "grad_norm": 0.0, - "learning_rate": 1.260372291837015e-05, - "loss": 0.9052, + "learning_rate": 1.262230510216724e-05, + "loss": 0.9387, "step": 15283 }, { - "epoch": 0.4337116912599319, + "epoch": 0.4331094675394599, "grad_norm": 0.0, - "learning_rate": 1.260283552987475e-05, - "loss": 0.8258, + "learning_rate": 1.2621419419652353e-05, + "loss": 0.8972, "step": 15284 }, { - "epoch": 0.4337400681044268, + "epoch": 0.43313780498172233, "grad_norm": 0.0, - "learning_rate": 1.2601948119393085e-05, - "loss": 0.8976, + "learning_rate": 1.2620533715056275e-05, + "loss": 1.0116, "step": 15285 }, { - "epoch": 0.4337684449489217, + "epoch": 0.4331661424239848, "grad_norm": 0.0, - "learning_rate": 1.2601060686932649e-05, - "loss": 1.0012, + "learning_rate": 1.2619647988386468e-05, + "loss": 1.0665, "step": 15286 }, { - "epoch": 0.43379682179341655, + "epoch": 0.43319447986624726, "grad_norm": 0.0, - "learning_rate": 1.2600173232500935e-05, - "loss": 0.9774, + "learning_rate": 1.2618762239650391e-05, + "loss": 0.9064, "step": 15287 }, { - "epoch": 0.43382519863791147, + "epoch": 0.43322281730850976, "grad_norm": 0.0, - "learning_rate": 1.259928575610544e-05, - "loss": 0.9551, + "learning_rate": 1.261787646885551e-05, + "loss": 0.816, "step": 15288 }, { - "epoch": 0.4338535754824064, + "epoch": 0.4332511547507722, "grad_norm": 0.0, - "learning_rate": 1.2598398257753661e-05, - "loss": 0.9183, + "learning_rate": 1.2616990676009283e-05, + "loss": 0.9007, "step": 15289 }, { - "epoch": 0.43388195232690124, + "epoch": 0.43327949219303463, "grad_norm": 0.0, - "learning_rate": 1.2597510737453098e-05, - "loss": 0.8777, + "learning_rate": 1.261610486111917e-05, + "loss": 0.8598, "step": 15290 }, { - "epoch": 0.43391032917139616, + "epoch": 0.4333078296352971, "grad_norm": 0.0, - "learning_rate": 1.2596623195211242e-05, - "loss": 0.8613, + "learning_rate": 1.2615219024192636e-05, + "loss": 0.9833, "step": 15291 }, { - "epoch": 0.433938706015891, + "epoch": 0.43333616707755956, "grad_norm": 0.0, - "learning_rate": 1.2595735631035593e-05, - "loss": 0.9422, + "learning_rate": 1.261433316523714e-05, + "loss": 0.9844, "step": 15292 }, { - "epoch": 0.43396708286038593, + "epoch": 0.43336450451982206, "grad_norm": 0.0, - "learning_rate": 1.259484804493365e-05, - "loss": 0.926, + "learning_rate": 1.2613447284260144e-05, + "loss": 0.88, "step": 15293 }, { - "epoch": 0.4339954597048808, + "epoch": 0.4333928419620845, "grad_norm": 0.0, - "learning_rate": 1.2593960436912907e-05, - "loss": 1.0214, + "learning_rate": 1.2612561381269113e-05, + "loss": 0.917, "step": 15294 }, { - "epoch": 0.4340238365493757, + "epoch": 0.433421179404347, "grad_norm": 0.0, - "learning_rate": 1.2593072806980865e-05, - "loss": 0.8572, + "learning_rate": 1.2611675456271505e-05, + "loss": 0.8994, "step": 15295 }, { - "epoch": 0.4340522133938706, + "epoch": 0.43344951684660943, "grad_norm": 0.0, - "learning_rate": 1.2592185155145023e-05, - "loss": 0.9919, + "learning_rate": 1.261078950927479e-05, + "loss": 0.8637, "step": 15296 }, { - "epoch": 0.4340805902383655, + "epoch": 0.43347785428887187, "grad_norm": 0.0, - "learning_rate": 1.2591297481412872e-05, - "loss": 0.9872, + "learning_rate": 1.2609903540286424e-05, + "loss": 0.9579, "step": 15297 }, { - "epoch": 0.4341089670828604, + "epoch": 0.43350619173113436, "grad_norm": 0.0, - "learning_rate": 1.2590409785791917e-05, - "loss": 0.9121, + "learning_rate": 1.2609017549313867e-05, + "loss": 1.0674, "step": 15298 }, { - "epoch": 0.43413734392735526, + "epoch": 0.4335345291733968, "grad_norm": 0.0, - "learning_rate": 1.2589522068289655e-05, - "loss": 0.7882, + "learning_rate": 1.260813153636459e-05, + "loss": 0.9268, "step": 15299 }, { - "epoch": 0.43416572077185017, + "epoch": 0.4335628666156593, "grad_norm": 0.0, - "learning_rate": 1.2588634328913581e-05, - "loss": 0.8831, + "learning_rate": 1.2607245501446051e-05, + "loss": 1.0044, "step": 15300 }, { - "epoch": 0.4341940976163451, + "epoch": 0.43359120405792173, "grad_norm": 0.0, - "learning_rate": 1.2587746567671197e-05, - "loss": 0.9539, + "learning_rate": 1.2606359444565715e-05, + "loss": 0.8106, "step": 15301 }, { - "epoch": 0.43422247446083995, + "epoch": 0.43361954150018417, "grad_norm": 0.0, - "learning_rate": 1.2586858784570002e-05, - "loss": 0.9753, + "learning_rate": 1.2605473365731047e-05, + "loss": 0.9335, "step": 15302 }, { - "epoch": 0.43425085130533486, + "epoch": 0.43364787894244666, "grad_norm": 0.0, - "learning_rate": 1.2585970979617494e-05, - "loss": 0.9725, + "learning_rate": 1.2604587264949506e-05, + "loss": 0.9384, "step": 15303 }, { - "epoch": 0.4342792281498297, + "epoch": 0.4336762163847091, "grad_norm": 0.0, - "learning_rate": 1.2585083152821174e-05, - "loss": 1.0308, + "learning_rate": 1.2603701142228564e-05, + "loss": 0.9219, "step": 15304 }, { - "epoch": 0.43430760499432464, + "epoch": 0.4337045538269716, "grad_norm": 0.0, - "learning_rate": 1.2584195304188542e-05, - "loss": 0.9289, + "learning_rate": 1.2602814997575677e-05, + "loss": 0.9185, "step": 15305 }, { - "epoch": 0.4343359818388195, + "epoch": 0.43373289126923403, "grad_norm": 0.0, - "learning_rate": 1.2583307433727094e-05, - "loss": 1.0144, + "learning_rate": 1.2601928830998314e-05, + "loss": 0.9109, "step": 15306 }, { - "epoch": 0.4343643586833144, + "epoch": 0.4337612287114965, "grad_norm": 0.0, - "learning_rate": 1.2582419541444331e-05, - "loss": 0.9529, + "learning_rate": 1.2601042642503935e-05, + "loss": 0.9655, "step": 15307 }, { - "epoch": 0.4343927355278093, + "epoch": 0.43378956615375897, "grad_norm": 0.0, - "learning_rate": 1.2581531627347754e-05, - "loss": 0.9682, + "learning_rate": 1.2600156432100012e-05, + "loss": 1.0187, "step": 15308 }, { - "epoch": 0.4344211123723042, + "epoch": 0.4338179035960214, "grad_norm": 0.0, - "learning_rate": 1.2580643691444866e-05, - "loss": 0.9446, + "learning_rate": 1.2599270199794008e-05, + "loss": 0.8582, "step": 15309 }, { - "epoch": 0.4344494892167991, + "epoch": 0.4338462410382839, "grad_norm": 0.0, - "learning_rate": 1.2579755733743164e-05, - "loss": 0.8885, + "learning_rate": 1.2598383945593382e-05, + "loss": 0.9933, "step": 15310 }, { - "epoch": 0.43447786606129396, + "epoch": 0.43387457848054634, "grad_norm": 0.0, - "learning_rate": 1.2578867754250146e-05, - "loss": 0.9421, + "learning_rate": 1.2597497669505603e-05, + "loss": 0.8854, "step": 15311 }, { - "epoch": 0.4345062429057889, + "epoch": 0.43390291592280883, "grad_norm": 0.0, - "learning_rate": 1.257797975297332e-05, - "loss": 0.9532, + "learning_rate": 1.2596611371538135e-05, + "loss": 0.9075, "step": 15312 }, { - "epoch": 0.4345346197502838, + "epoch": 0.43393125336507127, "grad_norm": 0.0, - "learning_rate": 1.2577091729920184e-05, - "loss": 1.0043, + "learning_rate": 1.2595725051698448e-05, + "loss": 0.8768, "step": 15313 }, { - "epoch": 0.43456299659477865, + "epoch": 0.4339595908073337, "grad_norm": 0.0, - "learning_rate": 1.2576203685098233e-05, - "loss": 0.9546, + "learning_rate": 1.2594838709994007e-05, + "loss": 0.8938, "step": 15314 }, { - "epoch": 0.43459137343927357, + "epoch": 0.4339879282495962, "grad_norm": 0.0, - "learning_rate": 1.2575315618514977e-05, - "loss": 0.8824, + "learning_rate": 1.2593952346432273e-05, + "loss": 0.9626, "step": 15315 }, { - "epoch": 0.4346197502837684, + "epoch": 0.43401626569185864, "grad_norm": 0.0, - "learning_rate": 1.2574427530177915e-05, - "loss": 0.7345, + "learning_rate": 1.2593065961020714e-05, + "loss": 0.8257, "step": 15316 }, { - "epoch": 0.43464812712826334, + "epoch": 0.43404460313412113, "grad_norm": 0.0, - "learning_rate": 1.2573539420094548e-05, - "loss": 0.964, + "learning_rate": 1.25921795537668e-05, + "loss": 1.0307, "step": 15317 }, { - "epoch": 0.43467650397275825, + "epoch": 0.43407294057638357, "grad_norm": 0.0, - "learning_rate": 1.2572651288272375e-05, - "loss": 0.8543, + "learning_rate": 1.2591293124677992e-05, + "loss": 0.923, "step": 15318 }, { - "epoch": 0.4347048808172531, + "epoch": 0.43410127801864606, "grad_norm": 0.0, - "learning_rate": 1.2571763134718904e-05, - "loss": 0.9098, + "learning_rate": 1.2590406673761762e-05, + "loss": 0.9345, "step": 15319 }, { - "epoch": 0.43473325766174803, + "epoch": 0.4341296154609085, "grad_norm": 0.0, - "learning_rate": 1.2570874959441635e-05, - "loss": 0.8913, + "learning_rate": 1.2589520201025576e-05, + "loss": 0.9254, "step": 15320 }, { - "epoch": 0.4347616345062429, + "epoch": 0.43415795290317094, "grad_norm": 0.0, - "learning_rate": 1.2569986762448063e-05, - "loss": 0.889, + "learning_rate": 1.2588633706476898e-05, + "loss": 0.8756, "step": 15321 }, { - "epoch": 0.4347900113507378, + "epoch": 0.43418629034543343, "grad_norm": 0.0, - "learning_rate": 1.2569098543745703e-05, - "loss": 0.8799, + "learning_rate": 1.2587747190123198e-05, + "loss": 0.7559, "step": 15322 }, { - "epoch": 0.43481838819523266, + "epoch": 0.4342146277876959, "grad_norm": 0.0, - "learning_rate": 1.256821030334205e-05, - "loss": 0.8643, + "learning_rate": 1.2586860651971941e-05, + "loss": 0.8808, "step": 15323 }, { - "epoch": 0.4348467650397276, + "epoch": 0.43424296522995837, "grad_norm": 0.0, - "learning_rate": 1.2567322041244612e-05, - "loss": 0.8035, + "learning_rate": 1.2585974092030597e-05, + "loss": 0.8639, "step": 15324 }, { - "epoch": 0.4348751418842225, + "epoch": 0.4342713026722208, "grad_norm": 0.0, - "learning_rate": 1.2566433757460886e-05, - "loss": 0.853, + "learning_rate": 1.2585087510306633e-05, + "loss": 1.0055, "step": 15325 }, { - "epoch": 0.43490351872871735, + "epoch": 0.43429964011448324, "grad_norm": 0.0, - "learning_rate": 1.2565545451998382e-05, - "loss": 0.9659, + "learning_rate": 1.2584200906807517e-05, + "loss": 0.9754, "step": 15326 }, { - "epoch": 0.43493189557321227, + "epoch": 0.43432797755674574, "grad_norm": 0.0, - "learning_rate": 1.2564657124864599e-05, - "loss": 0.9651, + "learning_rate": 1.2583314281540718e-05, + "loss": 0.878, "step": 15327 }, { - "epoch": 0.43496027241770713, + "epoch": 0.4343563149990082, "grad_norm": 0.0, - "learning_rate": 1.2563768776067039e-05, - "loss": 1.0034, + "learning_rate": 1.2582427634513701e-05, + "loss": 0.9861, "step": 15328 }, { - "epoch": 0.43498864926220204, + "epoch": 0.43438465244127067, "grad_norm": 0.0, - "learning_rate": 1.256288040561321e-05, - "loss": 0.9237, + "learning_rate": 1.258154096573394e-05, + "loss": 0.8573, "step": 15329 }, { - "epoch": 0.43501702610669696, + "epoch": 0.4344129898835331, "grad_norm": 0.0, - "learning_rate": 1.2561992013510618e-05, - "loss": 0.8547, + "learning_rate": 1.25806542752089e-05, + "loss": 0.9108, "step": 15330 }, { - "epoch": 0.4350454029511918, + "epoch": 0.4344413273257956, "grad_norm": 0.0, - "learning_rate": 1.2561103599766761e-05, - "loss": 1.0139, + "learning_rate": 1.2579767562946048e-05, + "loss": 0.7834, "step": 15331 }, { - "epoch": 0.43507377979568673, + "epoch": 0.43446966476805804, "grad_norm": 0.0, - "learning_rate": 1.2560215164389149e-05, - "loss": 0.9864, + "learning_rate": 1.2578880828952857e-05, + "loss": 0.9277, "step": 15332 }, { - "epoch": 0.4351021566401816, + "epoch": 0.4344980022103205, "grad_norm": 0.0, - "learning_rate": 1.2559326707385285e-05, - "loss": 0.956, + "learning_rate": 1.2577994073236797e-05, + "loss": 0.8307, "step": 15333 }, { - "epoch": 0.4351305334846765, + "epoch": 0.43452633965258297, "grad_norm": 0.0, - "learning_rate": 1.2558438228762673e-05, - "loss": 0.9103, + "learning_rate": 1.2577107295805334e-05, + "loss": 0.8806, "step": 15334 }, { - "epoch": 0.4351589103291714, + "epoch": 0.4345546770948454, "grad_norm": 0.0, - "learning_rate": 1.2557549728528815e-05, - "loss": 0.9881, + "learning_rate": 1.2576220496665942e-05, + "loss": 0.9711, "step": 15335 }, { - "epoch": 0.4351872871736663, + "epoch": 0.4345830145371079, "grad_norm": 0.0, - "learning_rate": 1.2556661206691221e-05, - "loss": 0.8237, + "learning_rate": 1.2575333675826084e-05, + "loss": 1.0025, "step": 15336 }, { - "epoch": 0.4352156640181612, + "epoch": 0.43461135197937034, "grad_norm": 0.0, - "learning_rate": 1.2555772663257397e-05, - "loss": 0.9039, + "learning_rate": 1.257444683329324e-05, + "loss": 0.8928, "step": 15337 }, { - "epoch": 0.43524404086265606, + "epoch": 0.4346396894216328, "grad_norm": 0.0, - "learning_rate": 1.2554884098234843e-05, - "loss": 0.8851, + "learning_rate": 1.257355996907487e-05, + "loss": 0.9359, "step": 15338 }, { - "epoch": 0.43527241770715097, + "epoch": 0.4346680268638953, "grad_norm": 0.0, - "learning_rate": 1.255399551163107e-05, - "loss": 0.818, + "learning_rate": 1.2572673083178448e-05, + "loss": 0.9091, "step": 15339 }, { - "epoch": 0.43530079455164583, + "epoch": 0.4346963643061577, "grad_norm": 0.0, - "learning_rate": 1.2553106903453583e-05, - "loss": 0.9899, + "learning_rate": 1.2571786175611445e-05, + "loss": 0.879, "step": 15340 }, { - "epoch": 0.43532917139614075, + "epoch": 0.4347247017484202, "grad_norm": 0.0, - "learning_rate": 1.2552218273709889e-05, - "loss": 0.8233, + "learning_rate": 1.2570899246381334e-05, + "loss": 0.7887, "step": 15341 }, { - "epoch": 0.43535754824063566, + "epoch": 0.43475303919068264, "grad_norm": 0.0, - "learning_rate": 1.2551329622407486e-05, - "loss": 1.0166, + "learning_rate": 1.2570012295495583e-05, + "loss": 0.8942, "step": 15342 }, { - "epoch": 0.4353859250851305, + "epoch": 0.43478137663294514, "grad_norm": 0.0, - "learning_rate": 1.2550440949553892e-05, - "loss": 0.8959, + "learning_rate": 1.2569125322961667e-05, + "loss": 0.9888, "step": 15343 }, { - "epoch": 0.43541430192962544, + "epoch": 0.4348097140752076, "grad_norm": 0.0, - "learning_rate": 1.254955225515661e-05, - "loss": 0.8012, + "learning_rate": 1.256823832878705e-05, + "loss": 0.8392, "step": 15344 }, { - "epoch": 0.4354426787741203, + "epoch": 0.43483805151747, "grad_norm": 0.0, - "learning_rate": 1.254866353922314e-05, - "loss": 0.8971, + "learning_rate": 1.256735131297921e-05, + "loss": 0.8488, "step": 15345 }, { - "epoch": 0.4354710556186152, + "epoch": 0.4348663889597325, "grad_norm": 0.0, - "learning_rate": 1.2547774801760997e-05, - "loss": 0.9373, + "learning_rate": 1.2566464275545615e-05, + "loss": 0.7903, "step": 15346 }, { - "epoch": 0.4354994324631101, + "epoch": 0.43489472640199495, "grad_norm": 0.0, - "learning_rate": 1.2546886042777686e-05, - "loss": 0.8799, + "learning_rate": 1.2565577216493743e-05, + "loss": 0.8483, "step": 15347 }, { - "epoch": 0.435527809307605, + "epoch": 0.43492306384425744, "grad_norm": 0.0, - "learning_rate": 1.2545997262280713e-05, - "loss": 0.8432, + "learning_rate": 1.2564690135831057e-05, + "loss": 0.9429, "step": 15348 }, { - "epoch": 0.4355561861520999, + "epoch": 0.4349514012865199, "grad_norm": 0.0, - "learning_rate": 1.2545108460277591e-05, - "loss": 0.8383, + "learning_rate": 1.2563803033565034e-05, + "loss": 1.0251, "step": 15349 }, { - "epoch": 0.43558456299659476, + "epoch": 0.4349797387287823, "grad_norm": 0.0, - "learning_rate": 1.254421963677582e-05, - "loss": 0.9567, + "learning_rate": 1.2562915909703149e-05, + "loss": 0.8687, "step": 15350 }, { - "epoch": 0.4356129398410897, + "epoch": 0.4350080761710448, "grad_norm": 0.0, - "learning_rate": 1.254333079178291e-05, - "loss": 0.8982, + "learning_rate": 1.2562028764252867e-05, + "loss": 0.9628, "step": 15351 }, { - "epoch": 0.4356413166855846, + "epoch": 0.43503641361330725, "grad_norm": 0.0, - "learning_rate": 1.2542441925306375e-05, - "loss": 0.9732, + "learning_rate": 1.2561141597221667e-05, + "loss": 0.8214, "step": 15352 }, { - "epoch": 0.43566969353007945, + "epoch": 0.43506475105556974, "grad_norm": 0.0, - "learning_rate": 1.2541553037353716e-05, - "loss": 0.8935, + "learning_rate": 1.2560254408617022e-05, + "loss": 0.8424, "step": 15353 }, { - "epoch": 0.43569807037457436, + "epoch": 0.4350930884978322, "grad_norm": 0.0, - "learning_rate": 1.2540664127932444e-05, - "loss": 0.9894, + "learning_rate": 1.2559367198446401e-05, + "loss": 0.9645, "step": 15354 }, { - "epoch": 0.4357264472190692, + "epoch": 0.4351214259400946, "grad_norm": 0.0, - "learning_rate": 1.2539775197050065e-05, - "loss": 0.9398, + "learning_rate": 1.2558479966717282e-05, + "loss": 0.9078, "step": 15355 }, { - "epoch": 0.43575482406356414, + "epoch": 0.4351497633823571, "grad_norm": 0.0, - "learning_rate": 1.2538886244714096e-05, - "loss": 0.8484, + "learning_rate": 1.2557592713437137e-05, + "loss": 0.8429, "step": 15356 }, { - "epoch": 0.435783200908059, + "epoch": 0.43517810082461955, "grad_norm": 0.0, - "learning_rate": 1.2537997270932041e-05, - "loss": 0.8796, + "learning_rate": 1.2556705438613437e-05, + "loss": 0.9093, "step": 15357 }, { - "epoch": 0.4358115777525539, + "epoch": 0.43520643826688205, "grad_norm": 0.0, - "learning_rate": 1.2537108275711405e-05, - "loss": 1.0286, + "learning_rate": 1.2555818142253656e-05, + "loss": 0.8918, "step": 15358 }, { - "epoch": 0.43583995459704883, + "epoch": 0.4352347757091445, "grad_norm": 0.0, - "learning_rate": 1.2536219259059703e-05, - "loss": 0.923, + "learning_rate": 1.2554930824365273e-05, + "loss": 0.9569, "step": 15359 }, { - "epoch": 0.4358683314415437, + "epoch": 0.435263113151407, "grad_norm": 0.0, - "learning_rate": 1.2535330220984445e-05, - "loss": 0.9248, + "learning_rate": 1.2554043484955757e-05, + "loss": 0.931, "step": 15360 }, { - "epoch": 0.4358967082860386, + "epoch": 0.4352914505936694, "grad_norm": 0.0, - "learning_rate": 1.2534441161493134e-05, - "loss": 0.7752, + "learning_rate": 1.2553156124032585e-05, + "loss": 0.9728, "step": 15361 }, { - "epoch": 0.43592508513053346, + "epoch": 0.43531978803593185, "grad_norm": 0.0, - "learning_rate": 1.2533552080593286e-05, - "loss": 1.002, + "learning_rate": 1.2552268741603232e-05, + "loss": 0.8788, "step": 15362 }, { - "epoch": 0.4359534619750284, + "epoch": 0.43534812547819435, "grad_norm": 0.0, - "learning_rate": 1.253266297829241e-05, - "loss": 0.8741, + "learning_rate": 1.2551381337675168e-05, + "loss": 1.0237, "step": 15363 }, { - "epoch": 0.4359818388195233, + "epoch": 0.4353764629204568, "grad_norm": 0.0, - "learning_rate": 1.2531773854598019e-05, - "loss": 0.9221, + "learning_rate": 1.2550493912255872e-05, + "loss": 1.0065, "step": 15364 }, { - "epoch": 0.43601021566401815, + "epoch": 0.4354048003627193, "grad_norm": 0.0, - "learning_rate": 1.2530884709517616e-05, - "loss": 0.975, + "learning_rate": 1.2549606465352819e-05, + "loss": 0.9178, "step": 15365 }, { - "epoch": 0.43603859250851307, + "epoch": 0.4354331378049817, "grad_norm": 0.0, - "learning_rate": 1.2529995543058721e-05, - "loss": 0.8948, + "learning_rate": 1.2548718996973488e-05, + "loss": 0.9855, "step": 15366 }, { - "epoch": 0.4360669693530079, + "epoch": 0.43546147524724416, "grad_norm": 0.0, - "learning_rate": 1.2529106355228836e-05, - "loss": 0.9197, + "learning_rate": 1.2547831507125348e-05, + "loss": 0.9644, "step": 15367 }, { - "epoch": 0.43609534619750284, + "epoch": 0.43548981268950665, "grad_norm": 0.0, - "learning_rate": 1.2528217146035478e-05, - "loss": 0.9172, + "learning_rate": 1.2546943995815876e-05, + "loss": 0.9025, "step": 15368 }, { - "epoch": 0.43612372304199776, + "epoch": 0.4355181501317691, "grad_norm": 0.0, - "learning_rate": 1.2527327915486154e-05, - "loss": 0.7613, + "learning_rate": 1.254605646305255e-05, + "loss": 0.9124, "step": 15369 }, { - "epoch": 0.4361520998864926, + "epoch": 0.4355464875740316, "grad_norm": 0.0, - "learning_rate": 1.252643866358838e-05, - "loss": 0.9375, + "learning_rate": 1.2545168908842846e-05, + "loss": 0.8205, "step": 15370 }, { - "epoch": 0.43618047673098753, + "epoch": 0.435574825016294, "grad_norm": 0.0, - "learning_rate": 1.2525549390349665e-05, - "loss": 0.8953, + "learning_rate": 1.2544281333194238e-05, + "loss": 0.9337, "step": 15371 }, { - "epoch": 0.4362088535754824, + "epoch": 0.4356031624585565, "grad_norm": 0.0, - "learning_rate": 1.2524660095777516e-05, - "loss": 0.8624, + "learning_rate": 1.2543393736114205e-05, + "loss": 0.9464, "step": 15372 }, { - "epoch": 0.4362372304199773, + "epoch": 0.43563149990081895, "grad_norm": 0.0, - "learning_rate": 1.2523770779879457e-05, - "loss": 0.8771, + "learning_rate": 1.2542506117610218e-05, + "loss": 0.9239, "step": 15373 }, { - "epoch": 0.43626560726447217, + "epoch": 0.4356598373430814, "grad_norm": 0.0, - "learning_rate": 1.2522881442662989e-05, - "loss": 0.9965, + "learning_rate": 1.2541618477689761e-05, + "loss": 0.9926, "step": 15374 }, { - "epoch": 0.4362939841089671, + "epoch": 0.4356881747853439, "grad_norm": 0.0, - "learning_rate": 1.2521992084135626e-05, - "loss": 0.9476, + "learning_rate": 1.254073081636031e-05, + "loss": 0.866, "step": 15375 }, { - "epoch": 0.436322360953462, + "epoch": 0.4357165122276063, "grad_norm": 0.0, - "learning_rate": 1.2521102704304884e-05, - "loss": 0.8734, + "learning_rate": 1.2539843133629335e-05, + "loss": 0.9422, "step": 15376 }, { - "epoch": 0.43635073779795686, + "epoch": 0.4357448496698688, "grad_norm": 0.0, - "learning_rate": 1.2520213303178274e-05, - "loss": 0.8948, + "learning_rate": 1.253895542950432e-05, + "loss": 0.9955, "step": 15377 }, { - "epoch": 0.43637911464245177, + "epoch": 0.43577318711213126, "grad_norm": 0.0, - "learning_rate": 1.2519323880763312e-05, - "loss": 0.8629, + "learning_rate": 1.2538067703992738e-05, + "loss": 0.8563, "step": 15378 }, { - "epoch": 0.43640749148694663, + "epoch": 0.4358015245543937, "grad_norm": 0.0, - "learning_rate": 1.2518434437067507e-05, - "loss": 0.7977, + "learning_rate": 1.2537179957102075e-05, + "loss": 0.9509, "step": 15379 }, { - "epoch": 0.43643586833144155, + "epoch": 0.4358298619966562, "grad_norm": 0.0, - "learning_rate": 1.251754497209837e-05, - "loss": 0.9614, + "learning_rate": 1.25362921888398e-05, + "loss": 0.8967, "step": 15380 }, { - "epoch": 0.43646424517593646, + "epoch": 0.4358581994389186, "grad_norm": 0.0, - "learning_rate": 1.251665548586342e-05, - "loss": 0.8568, + "learning_rate": 1.2535404399213394e-05, + "loss": 0.955, "step": 15381 }, { - "epoch": 0.4364926220204313, + "epoch": 0.4358865368811811, "grad_norm": 0.0, - "learning_rate": 1.2515765978370167e-05, - "loss": 0.8828, + "learning_rate": 1.2534516588230335e-05, + "loss": 0.9649, "step": 15382 }, { - "epoch": 0.43652099886492624, + "epoch": 0.43591487432344356, "grad_norm": 0.0, - "learning_rate": 1.2514876449626126e-05, - "loss": 0.9843, + "learning_rate": 1.2533628755898102e-05, + "loss": 0.9938, "step": 15383 }, { - "epoch": 0.4365493757094211, + "epoch": 0.43594321176570605, "grad_norm": 0.0, - "learning_rate": 1.2513986899638809e-05, - "loss": 1.0537, + "learning_rate": 1.2532740902224171e-05, + "loss": 0.9862, "step": 15384 }, { - "epoch": 0.436577752553916, + "epoch": 0.4359715492079685, "grad_norm": 0.0, - "learning_rate": 1.2513097328415733e-05, - "loss": 0.8456, + "learning_rate": 1.2531853027216028e-05, + "loss": 0.9429, "step": 15385 }, { - "epoch": 0.43660612939841087, + "epoch": 0.43599988665023093, "grad_norm": 0.0, - "learning_rate": 1.251220773596441e-05, - "loss": 0.9893, + "learning_rate": 1.253096513088114e-05, + "loss": 0.8684, "step": 15386 }, { - "epoch": 0.4366345062429058, + "epoch": 0.4360282240924934, "grad_norm": 0.0, - "learning_rate": 1.2511318122292358e-05, - "loss": 0.8875, + "learning_rate": 1.2530077213226998e-05, + "loss": 0.9898, "step": 15387 }, { - "epoch": 0.4366628830874007, + "epoch": 0.43605656153475586, "grad_norm": 0.0, - "learning_rate": 1.2510428487407088e-05, - "loss": 0.9125, + "learning_rate": 1.2529189274261078e-05, + "loss": 0.9728, "step": 15388 }, { - "epoch": 0.43669125993189556, + "epoch": 0.43608489897701835, "grad_norm": 0.0, - "learning_rate": 1.2509538831316112e-05, - "loss": 0.9021, + "learning_rate": 1.2528301313990854e-05, + "loss": 0.9774, "step": 15389 }, { - "epoch": 0.4367196367763905, + "epoch": 0.4361132364192808, "grad_norm": 0.0, - "learning_rate": 1.2508649154026953e-05, - "loss": 0.9938, + "learning_rate": 1.2527413332423808e-05, + "loss": 0.9383, "step": 15390 }, { - "epoch": 0.43674801362088533, + "epoch": 0.43614157386154323, "grad_norm": 0.0, - "learning_rate": 1.2507759455547121e-05, - "loss": 0.867, + "learning_rate": 1.2526525329567422e-05, + "loss": 1.0667, "step": 15391 }, { - "epoch": 0.43677639046538025, + "epoch": 0.4361699113038057, "grad_norm": 0.0, - "learning_rate": 1.250686973588413e-05, - "loss": 0.8469, + "learning_rate": 1.2525637305429176e-05, + "loss": 0.9288, "step": 15392 }, { - "epoch": 0.43680476730987516, + "epoch": 0.43619824874606816, "grad_norm": 0.0, - "learning_rate": 1.2505979995045498e-05, - "loss": 0.9564, + "learning_rate": 1.252474926001655e-05, + "loss": 0.8821, "step": 15393 }, { - "epoch": 0.43683314415437, + "epoch": 0.43622658618833066, "grad_norm": 0.0, - "learning_rate": 1.2505090233038743e-05, - "loss": 0.9137, + "learning_rate": 1.2523861193337019e-05, + "loss": 0.7997, "step": 15394 }, { - "epoch": 0.43686152099886494, + "epoch": 0.4362549236305931, "grad_norm": 0.0, - "learning_rate": 1.2504200449871378e-05, - "loss": 0.82, + "learning_rate": 1.2522973105398073e-05, + "loss": 0.891, "step": 15395 }, { - "epoch": 0.4368898978433598, + "epoch": 0.4362832610728556, "grad_norm": 0.0, - "learning_rate": 1.2503310645550918e-05, - "loss": 0.9621, + "learning_rate": 1.2522084996207187e-05, + "loss": 0.9697, "step": 15396 }, { - "epoch": 0.4369182746878547, + "epoch": 0.436311598515118, "grad_norm": 0.0, - "learning_rate": 1.2502420820084879e-05, - "loss": 0.8511, + "learning_rate": 1.2521196865771839e-05, + "loss": 0.8749, "step": 15397 }, { - "epoch": 0.43694665153234963, + "epoch": 0.43633993595738046, "grad_norm": 0.0, - "learning_rate": 1.2501530973480783e-05, - "loss": 0.9661, + "learning_rate": 1.2520308714099513e-05, + "loss": 0.9608, "step": 15398 }, { - "epoch": 0.4369750283768445, + "epoch": 0.43636827339964296, "grad_norm": 0.0, - "learning_rate": 1.2500641105746137e-05, - "loss": 0.7917, + "learning_rate": 1.2519420541197696e-05, + "loss": 0.9923, "step": 15399 }, { - "epoch": 0.4370034052213394, + "epoch": 0.4363966108419054, "grad_norm": 0.0, - "learning_rate": 1.2499751216888464e-05, - "loss": 0.8523, + "learning_rate": 1.2518532347073862e-05, + "loss": 0.9574, "step": 15400 }, { - "epoch": 0.43703178206583426, + "epoch": 0.4364249482841679, "grad_norm": 0.0, - "learning_rate": 1.2498861306915283e-05, - "loss": 0.9639, + "learning_rate": 1.2517644131735496e-05, + "loss": 0.8951, "step": 15401 }, { - "epoch": 0.4370601589103292, + "epoch": 0.43645328572643033, "grad_norm": 0.0, - "learning_rate": 1.2497971375834106e-05, - "loss": 1.0098, + "learning_rate": 1.2516755895190076e-05, + "loss": 0.9068, "step": 15402 }, { - "epoch": 0.43708853575482404, + "epoch": 0.43648162316869277, "grad_norm": 0.0, - "learning_rate": 1.2497081423652455e-05, - "loss": 0.8893, + "learning_rate": 1.2515867637445088e-05, + "loss": 0.8787, "step": 15403 }, { - "epoch": 0.43711691259931895, + "epoch": 0.43650996061095526, "grad_norm": 0.0, - "learning_rate": 1.2496191450377844e-05, - "loss": 0.9988, + "learning_rate": 1.251497935850801e-05, + "loss": 1.0282, "step": 15404 }, { - "epoch": 0.43714528944381387, + "epoch": 0.4365382980532177, "grad_norm": 0.0, - "learning_rate": 1.249530145601779e-05, - "loss": 0.8294, + "learning_rate": 1.2514091058386331e-05, + "loss": 0.821, "step": 15405 }, { - "epoch": 0.4371736662883087, + "epoch": 0.4365666354954802, "grad_norm": 0.0, - "learning_rate": 1.2494411440579814e-05, - "loss": 0.9612, + "learning_rate": 1.2513202737087525e-05, + "loss": 0.9423, "step": 15406 }, { - "epoch": 0.43720204313280364, + "epoch": 0.43659497293774263, "grad_norm": 0.0, - "learning_rate": 1.2493521404071432e-05, - "loss": 0.9424, + "learning_rate": 1.2512314394619083e-05, + "loss": 1.0834, "step": 15407 }, { - "epoch": 0.4372304199772985, + "epoch": 0.4366233103800051, "grad_norm": 0.0, - "learning_rate": 1.2492631346500163e-05, - "loss": 1.0049, + "learning_rate": 1.2511426030988483e-05, + "loss": 0.9046, "step": 15408 }, { - "epoch": 0.4372587968217934, + "epoch": 0.43665164782226756, "grad_norm": 0.0, - "learning_rate": 1.2491741267873522e-05, - "loss": 0.8901, + "learning_rate": 1.2510537646203209e-05, + "loss": 0.8727, "step": 15409 }, { - "epoch": 0.43728717366628833, + "epoch": 0.43667998526453, "grad_norm": 0.0, - "learning_rate": 1.2490851168199036e-05, - "loss": 0.9009, + "learning_rate": 1.2509649240270742e-05, + "loss": 0.856, "step": 15410 }, { - "epoch": 0.4373155505107832, + "epoch": 0.4367083227067925, "grad_norm": 0.0, - "learning_rate": 1.2489961047484215e-05, - "loss": 0.8317, + "learning_rate": 1.2508760813198569e-05, + "loss": 0.9843, "step": 15411 }, { - "epoch": 0.4373439273552781, + "epoch": 0.43673666014905493, "grad_norm": 0.0, - "learning_rate": 1.248907090573658e-05, - "loss": 0.9449, + "learning_rate": 1.2507872364994174e-05, + "loss": 0.8723, "step": 15412 }, { - "epoch": 0.43737230419977297, + "epoch": 0.4367649975913174, "grad_norm": 0.0, - "learning_rate": 1.2488180742963654e-05, - "loss": 0.9928, + "learning_rate": 1.2506983895665036e-05, + "loss": 0.931, "step": 15413 }, { - "epoch": 0.4374006810442679, + "epoch": 0.43679333503357987, "grad_norm": 0.0, - "learning_rate": 1.2487290559172953e-05, - "loss": 0.9015, + "learning_rate": 1.2506095405218646e-05, + "loss": 0.9344, "step": 15414 }, { - "epoch": 0.4374290578887628, + "epoch": 0.4368216724758423, "grad_norm": 0.0, - "learning_rate": 1.2486400354371995e-05, - "loss": 0.8026, + "learning_rate": 1.2505206893662478e-05, + "loss": 0.975, "step": 15415 }, { - "epoch": 0.43745743473325766, + "epoch": 0.4368500099181048, "grad_norm": 0.0, - "learning_rate": 1.2485510128568302e-05, - "loss": 0.9518, + "learning_rate": 1.2504318361004022e-05, + "loss": 1.0267, "step": 15416 }, { - "epoch": 0.43748581157775257, + "epoch": 0.43687834736036724, "grad_norm": 0.0, - "learning_rate": 1.2484619881769393e-05, - "loss": 0.8883, + "learning_rate": 1.2503429807250766e-05, + "loss": 0.9072, "step": 15417 }, { - "epoch": 0.43751418842224743, + "epoch": 0.43690668480262973, "grad_norm": 0.0, - "learning_rate": 1.2483729613982789e-05, - "loss": 0.8374, + "learning_rate": 1.2502541232410192e-05, + "loss": 0.9418, "step": 15418 }, { - "epoch": 0.43754256526674234, + "epoch": 0.43693502224489217, "grad_norm": 0.0, - "learning_rate": 1.2482839325216008e-05, - "loss": 0.9046, + "learning_rate": 1.250165263648978e-05, + "loss": 0.9572, "step": 15419 }, { - "epoch": 0.4375709421112372, + "epoch": 0.43696335968715466, "grad_norm": 0.0, - "learning_rate": 1.2481949015476573e-05, - "loss": 1.0464, + "learning_rate": 1.2500764019497022e-05, + "loss": 0.967, "step": 15420 }, { - "epoch": 0.4375993189557321, + "epoch": 0.4369916971294171, "grad_norm": 0.0, - "learning_rate": 1.2481058684772006e-05, - "loss": 1.0369, + "learning_rate": 1.24998753814394e-05, + "loss": 0.9857, "step": 15421 }, { - "epoch": 0.43762769580022703, + "epoch": 0.43702003457167954, "grad_norm": 0.0, - "learning_rate": 1.2480168333109818e-05, - "loss": 1.0157, + "learning_rate": 1.2498986722324398e-05, + "loss": 0.8508, "step": 15422 }, { - "epoch": 0.4376560726447219, + "epoch": 0.43704837201394203, "grad_norm": 0.0, - "learning_rate": 1.247927796049754e-05, - "loss": 0.9761, + "learning_rate": 1.24980980421595e-05, + "loss": 0.958, "step": 15423 }, { - "epoch": 0.4376844494892168, + "epoch": 0.43707670945620447, "grad_norm": 0.0, - "learning_rate": 1.2478387566942689e-05, - "loss": 0.9554, + "learning_rate": 1.24972093409522e-05, + "loss": 0.8562, "step": 15424 }, { - "epoch": 0.43771282633371167, + "epoch": 0.43710504689846696, "grad_norm": 0.0, - "learning_rate": 1.2477497152452788e-05, - "loss": 0.8943, + "learning_rate": 1.2496320618709979e-05, + "loss": 0.9083, "step": 15425 }, { - "epoch": 0.4377412031782066, + "epoch": 0.4371333843407294, "grad_norm": 0.0, - "learning_rate": 1.2476606717035355e-05, - "loss": 0.973, + "learning_rate": 1.2495431875440319e-05, + "loss": 1.0081, "step": 15426 }, { - "epoch": 0.4377695800227015, + "epoch": 0.43716172178299184, "grad_norm": 0.0, - "learning_rate": 1.2475716260697917e-05, - "loss": 0.87, + "learning_rate": 1.2494543111150707e-05, + "loss": 0.9828, "step": 15427 }, { - "epoch": 0.43779795686719636, + "epoch": 0.43719005922525433, "grad_norm": 0.0, - "learning_rate": 1.2474825783447993e-05, - "loss": 0.9879, + "learning_rate": 1.249365432584864e-05, + "loss": 0.8555, "step": 15428 }, { - "epoch": 0.4378263337116913, + "epoch": 0.4372183966675168, "grad_norm": 0.0, - "learning_rate": 1.2473935285293099e-05, - "loss": 0.8622, + "learning_rate": 1.249276551954159e-05, + "loss": 1.0226, "step": 15429 }, { - "epoch": 0.43785471055618613, + "epoch": 0.43724673410977927, "grad_norm": 0.0, - "learning_rate": 1.2473044766240766e-05, - "loss": 0.8893, + "learning_rate": 1.249187669223705e-05, + "loss": 0.8901, "step": 15430 }, { - "epoch": 0.43788308740068105, + "epoch": 0.4372750715520417, "grad_norm": 0.0, - "learning_rate": 1.2472154226298513e-05, - "loss": 0.8691, + "learning_rate": 1.2490987843942511e-05, + "loss": 0.8178, "step": 15431 }, { - "epoch": 0.43791146424517596, + "epoch": 0.4373034089943042, "grad_norm": 0.0, - "learning_rate": 1.247126366547386e-05, - "loss": 0.9655, + "learning_rate": 1.2490098974665454e-05, + "loss": 0.9847, "step": 15432 }, { - "epoch": 0.4379398410896708, + "epoch": 0.43733174643656664, "grad_norm": 0.0, - "learning_rate": 1.2470373083774335e-05, - "loss": 0.8057, + "learning_rate": 1.2489210084413372e-05, + "loss": 0.8901, "step": 15433 }, { - "epoch": 0.43796821793416574, + "epoch": 0.4373600838788291, "grad_norm": 0.0, - "learning_rate": 1.2469482481207456e-05, - "loss": 0.897, + "learning_rate": 1.2488321173193748e-05, + "loss": 0.9344, "step": 15434 }, { - "epoch": 0.4379965947786606, + "epoch": 0.43738842132109157, "grad_norm": 0.0, - "learning_rate": 1.2468591857780747e-05, - "loss": 0.9365, + "learning_rate": 1.2487432241014068e-05, + "loss": 0.9496, "step": 15435 }, { - "epoch": 0.4380249716231555, + "epoch": 0.437416758763354, "grad_norm": 0.0, - "learning_rate": 1.246770121350173e-05, - "loss": 0.8759, + "learning_rate": 1.2486543287881822e-05, + "loss": 0.9375, "step": 15436 }, { - "epoch": 0.43805334846765037, + "epoch": 0.4374450962056165, "grad_norm": 0.0, - "learning_rate": 1.2466810548377932e-05, - "loss": 0.868, + "learning_rate": 1.2485654313804501e-05, + "loss": 0.9937, "step": 15437 }, { - "epoch": 0.4380817253121453, + "epoch": 0.43747343364787894, "grad_norm": 0.0, - "learning_rate": 1.2465919862416874e-05, - "loss": 1.0137, + "learning_rate": 1.2484765318789591e-05, + "loss": 0.9531, "step": 15438 }, { - "epoch": 0.4381101021566402, + "epoch": 0.4375017710901414, "grad_norm": 0.0, - "learning_rate": 1.2465029155626081e-05, - "loss": 0.8893, + "learning_rate": 1.2483876302844579e-05, + "loss": 0.9721, "step": 15439 }, { - "epoch": 0.43813847900113506, + "epoch": 0.43753010853240387, "grad_norm": 0.0, - "learning_rate": 1.2464138428013072e-05, - "loss": 0.8649, + "learning_rate": 1.2482987265976956e-05, + "loss": 0.8639, "step": 15440 }, { - "epoch": 0.43816685584563, + "epoch": 0.4375584459746663, "grad_norm": 0.0, - "learning_rate": 1.246324767958538e-05, - "loss": 0.8905, + "learning_rate": 1.2482098208194208e-05, + "loss": 0.9125, "step": 15441 }, { - "epoch": 0.43819523269012484, + "epoch": 0.4375867834169288, "grad_norm": 0.0, - "learning_rate": 1.2462356910350524e-05, - "loss": 0.9274, + "learning_rate": 1.2481209129503824e-05, + "loss": 0.923, "step": 15442 }, { - "epoch": 0.43822360953461975, + "epoch": 0.43761512085919124, "grad_norm": 0.0, - "learning_rate": 1.2461466120316024e-05, - "loss": 0.9726, + "learning_rate": 1.2480320029913295e-05, + "loss": 0.9025, "step": 15443 }, { - "epoch": 0.43825198637911467, + "epoch": 0.43764345830145374, "grad_norm": 0.0, - "learning_rate": 1.2460575309489414e-05, - "loss": 0.9015, + "learning_rate": 1.2479430909430109e-05, + "loss": 0.9652, "step": 15444 }, { - "epoch": 0.4382803632236095, + "epoch": 0.4376717957437162, "grad_norm": 0.0, - "learning_rate": 1.245968447787821e-05, - "loss": 1.0099, + "learning_rate": 1.2478541768061758e-05, + "loss": 0.8444, "step": 15445 }, { - "epoch": 0.43830874006810444, + "epoch": 0.4377001331859786, "grad_norm": 0.0, - "learning_rate": 1.245879362548994e-05, - "loss": 0.9427, + "learning_rate": 1.2477652605815729e-05, + "loss": 0.9021, "step": 15446 }, { - "epoch": 0.4383371169125993, + "epoch": 0.4377284706282411, "grad_norm": 0.0, - "learning_rate": 1.2457902752332131e-05, - "loss": 0.8752, + "learning_rate": 1.247676342269951e-05, + "loss": 0.8612, "step": 15447 }, { - "epoch": 0.4383654937570942, + "epoch": 0.43775680807050354, "grad_norm": 0.0, - "learning_rate": 1.245701185841231e-05, - "loss": 0.9006, + "learning_rate": 1.2475874218720594e-05, + "loss": 0.8646, "step": 15448 }, { - "epoch": 0.43839387060158913, + "epoch": 0.43778514551276604, "grad_norm": 0.0, - "learning_rate": 1.2456120943737996e-05, - "loss": 0.8766, + "learning_rate": 1.2474984993886467e-05, + "loss": 0.8963, "step": 15449 }, { - "epoch": 0.438422247446084, + "epoch": 0.4378134829550285, "grad_norm": 0.0, - "learning_rate": 1.245523000831672e-05, - "loss": 0.744, + "learning_rate": 1.2474095748204627e-05, + "loss": 0.9786, "step": 15450 }, { - "epoch": 0.4384506242905789, + "epoch": 0.4378418203972909, "grad_norm": 0.0, - "learning_rate": 1.2454339052156007e-05, - "loss": 0.8572, + "learning_rate": 1.2473206481682559e-05, + "loss": 0.8729, "step": 15451 }, { - "epoch": 0.43847900113507376, + "epoch": 0.4378701578395534, "grad_norm": 0.0, - "learning_rate": 1.245344807526338e-05, - "loss": 0.9109, + "learning_rate": 1.247231719432775e-05, + "loss": 0.9942, "step": 15452 }, { - "epoch": 0.4385073779795687, + "epoch": 0.43789849528181585, "grad_norm": 0.0, - "learning_rate": 1.245255707764636e-05, - "loss": 1.0157, + "learning_rate": 1.24714278861477e-05, + "loss": 0.8646, "step": 15453 }, { - "epoch": 0.43853575482406354, + "epoch": 0.43792683272407834, "grad_norm": 0.0, - "learning_rate": 1.2451666059312488e-05, - "loss": 0.8327, + "learning_rate": 1.2470538557149895e-05, + "loss": 0.9117, "step": 15454 }, { - "epoch": 0.43856413166855845, + "epoch": 0.4379551701663408, "grad_norm": 0.0, - "learning_rate": 1.2450775020269283e-05, - "loss": 0.8789, + "learning_rate": 1.2469649207341823e-05, + "loss": 0.8415, "step": 15455 }, { - "epoch": 0.43859250851305337, + "epoch": 0.4379835076086033, "grad_norm": 0.0, - "learning_rate": 1.2449883960524267e-05, - "loss": 0.9889, + "learning_rate": 1.246875983673098e-05, + "loss": 0.8503, "step": 15456 }, { - "epoch": 0.43862088535754823, + "epoch": 0.4380118450508657, "grad_norm": 0.0, - "learning_rate": 1.2448992880084973e-05, - "loss": 1.005, + "learning_rate": 1.2467870445324857e-05, + "loss": 0.9318, "step": 15457 }, { - "epoch": 0.43864926220204314, + "epoch": 0.43804018249312815, "grad_norm": 0.0, - "learning_rate": 1.244810177895893e-05, - "loss": 0.9157, + "learning_rate": 1.2466981033130944e-05, + "loss": 0.8536, "step": 15458 }, { - "epoch": 0.438677639046538, + "epoch": 0.43806851993539064, "grad_norm": 0.0, - "learning_rate": 1.2447210657153655e-05, - "loss": 0.9216, + "learning_rate": 1.2466091600156736e-05, + "loss": 0.8574, "step": 15459 }, { - "epoch": 0.4387060158910329, + "epoch": 0.4380968573776531, "grad_norm": 0.0, - "learning_rate": 1.2446319514676685e-05, - "loss": 0.984, + "learning_rate": 1.246520214640972e-05, + "loss": 0.933, "step": 15460 }, { - "epoch": 0.43873439273552783, + "epoch": 0.4381251948199156, "grad_norm": 0.0, - "learning_rate": 1.2445428351535542e-05, - "loss": 1.0345, + "learning_rate": 1.2464312671897391e-05, + "loss": 0.9204, "step": 15461 }, { - "epoch": 0.4387627695800227, + "epoch": 0.438153532262178, "grad_norm": 0.0, - "learning_rate": 1.2444537167737759e-05, - "loss": 0.9522, + "learning_rate": 1.2463423176627243e-05, + "loss": 0.8277, "step": 15462 }, { - "epoch": 0.4387911464245176, + "epoch": 0.43818186970444045, "grad_norm": 0.0, - "learning_rate": 1.2443645963290856e-05, - "loss": 1.0924, + "learning_rate": 1.2462533660606766e-05, + "loss": 0.9818, "step": 15463 }, { - "epoch": 0.43881952326901247, + "epoch": 0.43821020714670295, "grad_norm": 0.0, - "learning_rate": 1.244275473820237e-05, - "loss": 1.014, + "learning_rate": 1.2461644123843452e-05, + "loss": 0.9078, "step": 15464 }, { - "epoch": 0.4388479001135074, + "epoch": 0.4382385445889654, "grad_norm": 0.0, - "learning_rate": 1.2441863492479825e-05, - "loss": 0.8185, + "learning_rate": 1.2460754566344797e-05, + "loss": 0.8755, "step": 15465 }, { - "epoch": 0.43887627695800224, + "epoch": 0.4382668820312279, "grad_norm": 0.0, - "learning_rate": 1.2440972226130746e-05, - "loss": 0.9408, + "learning_rate": 1.2459864988118293e-05, + "loss": 0.8844, "step": 15466 }, { - "epoch": 0.43890465380249716, + "epoch": 0.4382952194734903, "grad_norm": 0.0, - "learning_rate": 1.2440080939162666e-05, - "loss": 0.9034, + "learning_rate": 1.2458975389171434e-05, + "loss": 0.877, "step": 15467 }, { - "epoch": 0.4389330306469921, + "epoch": 0.4383235569157528, "grad_norm": 0.0, - "learning_rate": 1.2439189631583113e-05, - "loss": 0.8758, + "learning_rate": 1.245808576951171e-05, + "loss": 0.8839, "step": 15468 }, { - "epoch": 0.43896140749148693, + "epoch": 0.43835189435801525, "grad_norm": 0.0, - "learning_rate": 1.2438298303399616e-05, - "loss": 0.9144, + "learning_rate": 1.2457196129146616e-05, + "loss": 0.8385, "step": 15469 }, { - "epoch": 0.43898978433598185, + "epoch": 0.4383802318002777, "grad_norm": 0.0, - "learning_rate": 1.2437406954619701e-05, - "loss": 0.9377, + "learning_rate": 1.2456306468083647e-05, + "loss": 0.933, "step": 15470 }, { - "epoch": 0.4390181611804767, + "epoch": 0.4384085692425402, "grad_norm": 0.0, - "learning_rate": 1.2436515585250904e-05, - "loss": 0.8206, + "learning_rate": 1.2455416786330299e-05, + "loss": 0.7124, "step": 15471 }, { - "epoch": 0.4390465380249716, + "epoch": 0.4384369066848026, "grad_norm": 0.0, - "learning_rate": 1.2435624195300748e-05, - "loss": 0.8721, + "learning_rate": 1.2454527083894061e-05, + "loss": 0.9511, "step": 15472 }, { - "epoch": 0.43907491486946654, + "epoch": 0.4384652441270651, "grad_norm": 0.0, - "learning_rate": 1.2434732784776765e-05, - "loss": 0.7533, + "learning_rate": 1.2453637360782432e-05, + "loss": 0.8651, "step": 15473 }, { - "epoch": 0.4391032917139614, + "epoch": 0.43849358156932755, "grad_norm": 0.0, - "learning_rate": 1.2433841353686486e-05, - "loss": 0.8624, + "learning_rate": 1.2452747617002902e-05, + "loss": 0.9698, "step": 15474 }, { - "epoch": 0.4391316685584563, + "epoch": 0.43852191901159, "grad_norm": 0.0, - "learning_rate": 1.2432949902037439e-05, - "loss": 0.8413, + "learning_rate": 1.2451857852562967e-05, + "loss": 1.0294, "step": 15475 }, { - "epoch": 0.43916004540295117, + "epoch": 0.4385502564538525, "grad_norm": 0.0, - "learning_rate": 1.2432058429837153e-05, - "loss": 1.0386, + "learning_rate": 1.2450968067470126e-05, + "loss": 0.8765, "step": 15476 }, { - "epoch": 0.4391884222474461, + "epoch": 0.4385785938961149, "grad_norm": 0.0, - "learning_rate": 1.243116693709316e-05, - "loss": 0.9736, + "learning_rate": 1.2450078261731869e-05, + "loss": 0.9255, "step": 15477 }, { - "epoch": 0.439216799091941, + "epoch": 0.4386069313383774, "grad_norm": 0.0, - "learning_rate": 1.2430275423812995e-05, - "loss": 0.9065, + "learning_rate": 1.2449188435355695e-05, + "loss": 0.947, "step": 15478 }, { - "epoch": 0.43924517593643586, + "epoch": 0.43863526878063985, "grad_norm": 0.0, - "learning_rate": 1.242938389000418e-05, - "loss": 0.8297, + "learning_rate": 1.2448298588349097e-05, + "loss": 0.9204, "step": 15479 }, { - "epoch": 0.4392735527809308, + "epoch": 0.43866360622290235, "grad_norm": 0.0, - "learning_rate": 1.2428492335674252e-05, - "loss": 0.8749, + "learning_rate": 1.244740872071957e-05, + "loss": 0.9698, "step": 15480 }, { - "epoch": 0.43930192962542564, + "epoch": 0.4386919436651648, "grad_norm": 0.0, - "learning_rate": 1.242760076083074e-05, - "loss": 0.985, + "learning_rate": 1.2446518832474609e-05, + "loss": 0.8581, "step": 15481 }, { - "epoch": 0.43933030646992055, + "epoch": 0.4387202811074272, "grad_norm": 0.0, - "learning_rate": 1.2426709165481177e-05, - "loss": 0.8946, + "learning_rate": 1.244562892362171e-05, + "loss": 0.8491, "step": 15482 }, { - "epoch": 0.4393586833144154, + "epoch": 0.4387486185496897, "grad_norm": 0.0, - "learning_rate": 1.2425817549633088e-05, - "loss": 0.8887, + "learning_rate": 1.2444738994168374e-05, + "loss": 0.8758, "step": 15483 }, { - "epoch": 0.4393870601589103, + "epoch": 0.43877695599195216, "grad_norm": 0.0, - "learning_rate": 1.242492591329401e-05, - "loss": 0.9318, + "learning_rate": 1.2443849044122094e-05, + "loss": 0.9331, "step": 15484 }, { - "epoch": 0.43941543700340524, + "epoch": 0.43880529343421465, "grad_norm": 0.0, - "learning_rate": 1.2424034256471478e-05, - "loss": 0.9009, + "learning_rate": 1.2442959073490365e-05, + "loss": 0.9197, "step": 15485 }, { - "epoch": 0.4394438138479001, + "epoch": 0.4388336308764771, "grad_norm": 0.0, - "learning_rate": 1.2423142579173018e-05, - "loss": 0.9467, + "learning_rate": 1.2442069082280683e-05, + "loss": 0.8001, "step": 15486 }, { - "epoch": 0.439472190692395, + "epoch": 0.4388619683187395, "grad_norm": 0.0, - "learning_rate": 1.2422250881406159e-05, - "loss": 0.9215, + "learning_rate": 1.2441179070500549e-05, + "loss": 0.9076, "step": 15487 }, { - "epoch": 0.4395005675368899, + "epoch": 0.438890305761002, "grad_norm": 0.0, - "learning_rate": 1.2421359163178443e-05, - "loss": 1.0098, + "learning_rate": 1.2440289038157455e-05, + "loss": 0.9167, "step": 15488 }, { - "epoch": 0.4395289443813848, + "epoch": 0.43891864320326446, "grad_norm": 0.0, - "learning_rate": 1.2420467424497398e-05, - "loss": 0.8036, + "learning_rate": 1.24393989852589e-05, + "loss": 0.9606, "step": 15489 }, { - "epoch": 0.4395573212258797, + "epoch": 0.43894698064552695, "grad_norm": 0.0, - "learning_rate": 1.2419575665370554e-05, - "loss": 1.0215, + "learning_rate": 1.2438508911812384e-05, + "loss": 0.8447, "step": 15490 }, { - "epoch": 0.43958569807037456, + "epoch": 0.4389753180877894, "grad_norm": 0.0, - "learning_rate": 1.2418683885805442e-05, - "loss": 0.9447, + "learning_rate": 1.2437618817825402e-05, + "loss": 0.7817, "step": 15491 }, { - "epoch": 0.4396140749148695, + "epoch": 0.4390036555300519, "grad_norm": 0.0, - "learning_rate": 1.2417792085809604e-05, - "loss": 0.8406, + "learning_rate": 1.243672870330545e-05, + "loss": 0.9172, "step": 15492 }, { - "epoch": 0.43964245175936434, + "epoch": 0.4390319929723143, "grad_norm": 0.0, - "learning_rate": 1.2416900265390565e-05, - "loss": 0.9681, + "learning_rate": 1.2435838568260026e-05, + "loss": 0.8129, "step": 15493 }, { - "epoch": 0.43967082860385925, + "epoch": 0.43906033041457676, "grad_norm": 0.0, - "learning_rate": 1.241600842455586e-05, - "loss": 0.8462, + "learning_rate": 1.243494841269663e-05, + "loss": 0.9411, "step": 15494 }, { - "epoch": 0.43969920544835417, + "epoch": 0.43908866785683925, "grad_norm": 0.0, - "learning_rate": 1.2415116563313025e-05, - "loss": 0.9457, + "learning_rate": 1.2434058236622759e-05, + "loss": 0.8756, "step": 15495 }, { - "epoch": 0.43972758229284903, + "epoch": 0.4391170052991017, "grad_norm": 0.0, - "learning_rate": 1.2414224681669593e-05, - "loss": 0.8834, + "learning_rate": 1.2433168040045912e-05, + "loss": 1.0544, "step": 15496 }, { - "epoch": 0.43975595913734394, + "epoch": 0.4391453427413642, "grad_norm": 0.0, - "learning_rate": 1.2413332779633093e-05, - "loss": 0.889, + "learning_rate": 1.2432277822973588e-05, + "loss": 0.9207, "step": 15497 }, { - "epoch": 0.4397843359818388, + "epoch": 0.4391736801836266, "grad_norm": 0.0, - "learning_rate": 1.2412440857211066e-05, - "loss": 0.9093, + "learning_rate": 1.2431387585413283e-05, + "loss": 0.8758, "step": 15498 }, { - "epoch": 0.4398127128263337, + "epoch": 0.43920201762588906, "grad_norm": 0.0, - "learning_rate": 1.241154891441104e-05, - "loss": 1.0337, + "learning_rate": 1.2430497327372502e-05, + "loss": 0.9276, "step": 15499 }, { - "epoch": 0.4398410896708286, + "epoch": 0.43923035506815156, "grad_norm": 0.0, - "learning_rate": 1.2410656951240552e-05, - "loss": 0.9206, + "learning_rate": 1.2429607048858737e-05, + "loss": 0.8621, "step": 15500 }, { - "epoch": 0.4398694665153235, + "epoch": 0.439258692510414, "grad_norm": 0.0, - "learning_rate": 1.2409764967707139e-05, - "loss": 0.8982, + "learning_rate": 1.2428716749879485e-05, + "loss": 0.838, "step": 15501 }, { - "epoch": 0.4398978433598184, + "epoch": 0.4392870299526765, "grad_norm": 0.0, - "learning_rate": 1.2408872963818332e-05, - "loss": 0.9913, + "learning_rate": 1.2427826430442253e-05, + "loss": 0.9741, "step": 15502 }, { - "epoch": 0.43992622020431327, + "epoch": 0.4393153673949389, "grad_norm": 0.0, - "learning_rate": 1.2407980939581665e-05, - "loss": 0.87, + "learning_rate": 1.242693609055454e-05, + "loss": 0.9081, "step": 15503 }, { - "epoch": 0.4399545970488082, + "epoch": 0.4393437048372014, "grad_norm": 0.0, - "learning_rate": 1.2407088895004676e-05, - "loss": 0.9087, + "learning_rate": 1.2426045730223842e-05, + "loss": 0.8723, "step": 15504 }, { - "epoch": 0.43998297389330304, + "epoch": 0.43937204227946386, "grad_norm": 0.0, - "learning_rate": 1.2406196830094901e-05, - "loss": 0.9454, + "learning_rate": 1.242515534945766e-05, + "loss": 0.8718, "step": 15505 }, { - "epoch": 0.44001135073779796, + "epoch": 0.4394003797217263, "grad_norm": 0.0, - "learning_rate": 1.2405304744859871e-05, - "loss": 0.9495, + "learning_rate": 1.2424264948263492e-05, + "loss": 1.0234, "step": 15506 }, { - "epoch": 0.44003972758229287, + "epoch": 0.4394287171639888, "grad_norm": 0.0, - "learning_rate": 1.240441263930712e-05, - "loss": 0.8858, + "learning_rate": 1.2423374526648841e-05, + "loss": 0.928, "step": 15507 }, { - "epoch": 0.44006810442678773, + "epoch": 0.43945705460625123, "grad_norm": 0.0, - "learning_rate": 1.2403520513444192e-05, - "loss": 0.895, + "learning_rate": 1.2422484084621205e-05, + "loss": 0.9221, "step": 15508 }, { - "epoch": 0.44009648127128265, + "epoch": 0.4394853920485137, "grad_norm": 0.0, - "learning_rate": 1.2402628367278618e-05, - "loss": 0.8401, + "learning_rate": 1.2421593622188088e-05, + "loss": 0.8167, "step": 15509 }, { - "epoch": 0.4401248581157775, + "epoch": 0.43951372949077616, "grad_norm": 0.0, - "learning_rate": 1.240173620081793e-05, - "loss": 0.8578, + "learning_rate": 1.2420703139356987e-05, + "loss": 0.947, "step": 15510 }, { - "epoch": 0.4401532349602724, + "epoch": 0.4395420669330386, "grad_norm": 0.0, - "learning_rate": 1.2400844014069675e-05, - "loss": 0.8592, + "learning_rate": 1.2419812636135406e-05, + "loss": 0.9158, "step": 15511 }, { - "epoch": 0.44018161180476734, + "epoch": 0.4395704043753011, "grad_norm": 0.0, - "learning_rate": 1.239995180704138e-05, - "loss": 0.9375, + "learning_rate": 1.2418922112530847e-05, + "loss": 0.8859, "step": 15512 }, { - "epoch": 0.4402099886492622, + "epoch": 0.43959874181756353, "grad_norm": 0.0, - "learning_rate": 1.2399059579740582e-05, - "loss": 0.7436, + "learning_rate": 1.2418031568550805e-05, + "loss": 0.9984, "step": 15513 }, { - "epoch": 0.4402383654937571, + "epoch": 0.439627079259826, "grad_norm": 0.0, - "learning_rate": 1.2398167332174821e-05, - "loss": 0.9428, + "learning_rate": 1.2417141004202787e-05, + "loss": 0.9337, "step": 15514 }, { - "epoch": 0.44026674233825197, + "epoch": 0.43965541670208846, "grad_norm": 0.0, - "learning_rate": 1.2397275064351635e-05, - "loss": 0.8955, + "learning_rate": 1.2416250419494292e-05, + "loss": 0.8895, "step": 15515 }, { - "epoch": 0.4402951191827469, + "epoch": 0.43968375414435096, "grad_norm": 0.0, - "learning_rate": 1.2396382776278557e-05, - "loss": 0.9562, + "learning_rate": 1.2415359814432822e-05, + "loss": 0.9171, "step": 15516 }, { - "epoch": 0.44032349602724175, + "epoch": 0.4397120915866134, "grad_norm": 0.0, - "learning_rate": 1.2395490467963124e-05, - "loss": 0.9843, + "learning_rate": 1.2414469189025881e-05, + "loss": 0.9199, "step": 15517 }, { - "epoch": 0.44035187287173666, + "epoch": 0.43974042902887583, "grad_norm": 0.0, - "learning_rate": 1.239459813941288e-05, - "loss": 0.8891, + "learning_rate": 1.2413578543280967e-05, + "loss": 1.0067, "step": 15518 }, { - "epoch": 0.4403802497162316, + "epoch": 0.43976876647113833, "grad_norm": 0.0, - "learning_rate": 1.2393705790635356e-05, - "loss": 0.8698, + "learning_rate": 1.2412687877205587e-05, + "loss": 0.8593, "step": 15519 }, { - "epoch": 0.44040862656072643, + "epoch": 0.43979710391340077, "grad_norm": 0.0, - "learning_rate": 1.2392813421638088e-05, - "loss": 0.8691, + "learning_rate": 1.241179719080724e-05, + "loss": 0.8446, "step": 15520 }, { - "epoch": 0.44043700340522135, + "epoch": 0.43982544135566326, "grad_norm": 0.0, - "learning_rate": 1.2391921032428622e-05, - "loss": 0.9096, + "learning_rate": 1.241090648409343e-05, + "loss": 0.9809, "step": 15521 }, { - "epoch": 0.4404653802497162, + "epoch": 0.4398537787979257, "grad_norm": 0.0, - "learning_rate": 1.2391028623014487e-05, - "loss": 0.9628, + "learning_rate": 1.2410015757071656e-05, + "loss": 0.8432, "step": 15522 }, { - "epoch": 0.4404937570942111, + "epoch": 0.43988211624018814, "grad_norm": 0.0, - "learning_rate": 1.239013619340323e-05, - "loss": 0.9795, + "learning_rate": 1.240912500974943e-05, + "loss": 0.8872, "step": 15523 }, { - "epoch": 0.44052213393870604, + "epoch": 0.43991045368245063, "grad_norm": 0.0, - "learning_rate": 1.2389243743602383e-05, - "loss": 0.9271, + "learning_rate": 1.2408234242134247e-05, + "loss": 0.9364, "step": 15524 }, { - "epoch": 0.4405505107832009, + "epoch": 0.43993879112471307, "grad_norm": 0.0, - "learning_rate": 1.2388351273619488e-05, - "loss": 1.0026, + "learning_rate": 1.2407343454233613e-05, + "loss": 0.8666, "step": 15525 }, { - "epoch": 0.4405788876276958, + "epoch": 0.43996712856697556, "grad_norm": 0.0, - "learning_rate": 1.2387458783462082e-05, - "loss": 0.8364, + "learning_rate": 1.240645264605503e-05, + "loss": 0.8916, "step": 15526 }, { - "epoch": 0.4406072644721907, + "epoch": 0.439995466009238, "grad_norm": 0.0, - "learning_rate": 1.2386566273137703e-05, - "loss": 0.8928, + "learning_rate": 1.2405561817606005e-05, + "loss": 0.9899, "step": 15527 }, { - "epoch": 0.4406356413166856, + "epoch": 0.4400238034515005, "grad_norm": 0.0, - "learning_rate": 1.2385673742653894e-05, - "loss": 1.0088, + "learning_rate": 1.2404670968894037e-05, + "loss": 0.976, "step": 15528 }, { - "epoch": 0.4406640181611805, + "epoch": 0.44005214089376293, "grad_norm": 0.0, - "learning_rate": 1.2384781192018192e-05, - "loss": 0.8534, + "learning_rate": 1.2403780099926635e-05, + "loss": 0.8135, "step": 15529 }, { - "epoch": 0.44069239500567536, + "epoch": 0.44008047833602537, "grad_norm": 0.0, - "learning_rate": 1.2383888621238132e-05, - "loss": 0.9303, + "learning_rate": 1.24028892107113e-05, + "loss": 0.8536, "step": 15530 }, { - "epoch": 0.4407207718501703, + "epoch": 0.44010881577828787, "grad_norm": 0.0, - "learning_rate": 1.2382996030321258e-05, - "loss": 0.8665, + "learning_rate": 1.2401998301255533e-05, + "loss": 0.8851, "step": 15531 }, { - "epoch": 0.44074914869466514, + "epoch": 0.4401371532205503, "grad_norm": 0.0, - "learning_rate": 1.2382103419275113e-05, - "loss": 0.9482, + "learning_rate": 1.2401107371566847e-05, + "loss": 0.9225, "step": 15532 }, { - "epoch": 0.44077752553916005, + "epoch": 0.4401654906628128, "grad_norm": 0.0, - "learning_rate": 1.2381210788107232e-05, - "loss": 0.8828, + "learning_rate": 1.240021642165274e-05, + "loss": 0.9075, "step": 15533 }, { - "epoch": 0.4408059023836549, + "epoch": 0.44019382810507524, "grad_norm": 0.0, - "learning_rate": 1.2380318136825155e-05, - "loss": 0.9261, + "learning_rate": 1.2399325451520718e-05, + "loss": 0.934, "step": 15534 }, { - "epoch": 0.44083427922814983, + "epoch": 0.4402221655473377, "grad_norm": 0.0, - "learning_rate": 1.2379425465436426e-05, - "loss": 0.8964, + "learning_rate": 1.2398434461178289e-05, + "loss": 0.7906, "step": 15535 }, { - "epoch": 0.44086265607264474, + "epoch": 0.44025050298960017, "grad_norm": 0.0, - "learning_rate": 1.2378532773948582e-05, - "loss": 0.7365, + "learning_rate": 1.2397543450632953e-05, + "loss": 0.9293, "step": 15536 }, { - "epoch": 0.4408910329171396, + "epoch": 0.4402788404318626, "grad_norm": 0.0, - "learning_rate": 1.2377640062369163e-05, - "loss": 0.8463, + "learning_rate": 1.2396652419892221e-05, + "loss": 0.9398, "step": 15537 }, { - "epoch": 0.4409194097616345, + "epoch": 0.4403071778741251, "grad_norm": 0.0, - "learning_rate": 1.2376747330705711e-05, - "loss": 0.9538, + "learning_rate": 1.2395761368963597e-05, + "loss": 0.9344, "step": 15538 }, { - "epoch": 0.4409477866061294, + "epoch": 0.44033551531638754, "grad_norm": 0.0, - "learning_rate": 1.2375854578965771e-05, - "loss": 0.9277, + "learning_rate": 1.2394870297854582e-05, + "loss": 0.9199, "step": 15539 }, { - "epoch": 0.4409761634506243, + "epoch": 0.44036385275865003, "grad_norm": 0.0, - "learning_rate": 1.237496180715688e-05, - "loss": 0.9209, + "learning_rate": 1.2393979206572684e-05, + "loss": 0.988, "step": 15540 }, { - "epoch": 0.4410045402951192, + "epoch": 0.44039219020091247, "grad_norm": 0.0, - "learning_rate": 1.2374069015286579e-05, - "loss": 0.9492, + "learning_rate": 1.2393088095125414e-05, + "loss": 0.8712, "step": 15541 }, { - "epoch": 0.44103291713961407, + "epoch": 0.4404205276431749, "grad_norm": 0.0, - "learning_rate": 1.2373176203362412e-05, - "loss": 0.8852, + "learning_rate": 1.2392196963520273e-05, + "loss": 0.9147, "step": 15542 }, { - "epoch": 0.441061293984109, + "epoch": 0.4404488650854374, "grad_norm": 0.0, - "learning_rate": 1.2372283371391918e-05, - "loss": 0.9388, + "learning_rate": 1.2391305811764767e-05, + "loss": 0.9763, "step": 15543 }, { - "epoch": 0.44108967082860384, + "epoch": 0.44047720252769984, "grad_norm": 0.0, - "learning_rate": 1.2371390519382635e-05, - "loss": 0.8954, + "learning_rate": 1.2390414639866406e-05, + "loss": 0.8596, "step": 15544 }, { - "epoch": 0.44111804767309876, + "epoch": 0.44050553996996233, "grad_norm": 0.0, - "learning_rate": 1.2370497647342114e-05, - "loss": 0.9859, + "learning_rate": 1.2389523447832696e-05, + "loss": 0.8864, "step": 15545 }, { - "epoch": 0.4411464245175936, + "epoch": 0.4405338774122248, "grad_norm": 0.0, - "learning_rate": 1.2369604755277893e-05, - "loss": 0.9163, + "learning_rate": 1.2388632235671139e-05, + "loss": 0.9591, "step": 15546 }, { - "epoch": 0.44117480136208853, + "epoch": 0.4405622148544872, "grad_norm": 0.0, - "learning_rate": 1.2368711843197513e-05, - "loss": 0.8846, + "learning_rate": 1.2387741003389247e-05, + "loss": 0.914, "step": 15547 }, { - "epoch": 0.44120317820658345, + "epoch": 0.4405905522967497, "grad_norm": 0.0, - "learning_rate": 1.2367818911108518e-05, - "loss": 0.9218, + "learning_rate": 1.2386849750994528e-05, + "loss": 0.9028, "step": 15548 }, { - "epoch": 0.4412315550510783, + "epoch": 0.44061888973901214, "grad_norm": 0.0, - "learning_rate": 1.2366925959018452e-05, - "loss": 0.823, + "learning_rate": 1.2385958478494487e-05, + "loss": 0.914, "step": 15549 }, { - "epoch": 0.4412599318955732, + "epoch": 0.44064722718127464, "grad_norm": 0.0, - "learning_rate": 1.2366032986934856e-05, - "loss": 1.0009, + "learning_rate": 1.2385067185896631e-05, + "loss": 0.9076, "step": 15550 }, { - "epoch": 0.4412883087400681, + "epoch": 0.4406755646235371, "grad_norm": 0.0, - "learning_rate": 1.236513999486527e-05, - "loss": 0.721, + "learning_rate": 1.2384175873208467e-05, + "loss": 0.9158, "step": 15551 }, { - "epoch": 0.441316685584563, + "epoch": 0.4407039020657995, "grad_norm": 0.0, - "learning_rate": 1.236424698281724e-05, - "loss": 0.9349, + "learning_rate": 1.2383284540437505e-05, + "loss": 0.8918, "step": 15552 }, { - "epoch": 0.4413450624290579, + "epoch": 0.440732239508062, "grad_norm": 0.0, - "learning_rate": 1.2363353950798311e-05, - "loss": 0.8958, + "learning_rate": 1.2382393187591251e-05, + "loss": 0.9303, "step": 15553 }, { - "epoch": 0.44137343927355277, + "epoch": 0.44076057695032445, "grad_norm": 0.0, - "learning_rate": 1.2362460898816025e-05, - "loss": 1.0089, + "learning_rate": 1.2381501814677216e-05, + "loss": 0.8129, "step": 15554 }, { - "epoch": 0.4414018161180477, + "epoch": 0.44078891439258694, "grad_norm": 0.0, - "learning_rate": 1.2361567826877925e-05, - "loss": 0.9124, + "learning_rate": 1.238061042170291e-05, + "loss": 0.8792, "step": 15555 }, { - "epoch": 0.44143019296254254, + "epoch": 0.4408172518348494, "grad_norm": 0.0, - "learning_rate": 1.2360674734991558e-05, - "loss": 0.8175, + "learning_rate": 1.2379719008675833e-05, + "loss": 1.0576, "step": 15556 }, { - "epoch": 0.44145856980703746, + "epoch": 0.44084558927711187, "grad_norm": 0.0, - "learning_rate": 1.2359781623164465e-05, - "loss": 0.9625, + "learning_rate": 1.2378827575603502e-05, + "loss": 0.9366, "step": 15557 }, { - "epoch": 0.4414869466515324, + "epoch": 0.4408739267193743, "grad_norm": 0.0, - "learning_rate": 1.2358888491404186e-05, - "loss": 0.9505, + "learning_rate": 1.2377936122493423e-05, + "loss": 0.9553, "step": 15558 }, { - "epoch": 0.44151532349602723, + "epoch": 0.44090226416163675, "grad_norm": 0.0, - "learning_rate": 1.2357995339718273e-05, + "learning_rate": 1.2377044649353103e-05, "loss": 0.9238, "step": 15559 }, { - "epoch": 0.44154370034052215, + "epoch": 0.44093060160389924, "grad_norm": 0.0, - "learning_rate": 1.2357102168114267e-05, - "loss": 1.019, + "learning_rate": 1.2376153156190053e-05, + "loss": 0.807, "step": 15560 }, { - "epoch": 0.441572077185017, + "epoch": 0.4409589390461617, "grad_norm": 0.0, - "learning_rate": 1.235620897659971e-05, - "loss": 0.8827, + "learning_rate": 1.2375261643011787e-05, + "loss": 0.947, "step": 15561 }, { - "epoch": 0.4416004540295119, + "epoch": 0.4409872764884242, "grad_norm": 0.0, - "learning_rate": 1.2355315765182154e-05, - "loss": 0.9444, + "learning_rate": 1.2374370109825807e-05, + "loss": 0.8928, "step": 15562 }, { - "epoch": 0.4416288308740068, + "epoch": 0.4410156139306866, "grad_norm": 0.0, - "learning_rate": 1.235442253386914e-05, - "loss": 0.8328, + "learning_rate": 1.2373478556639627e-05, + "loss": 0.9187, "step": 15563 }, { - "epoch": 0.4416572077185017, + "epoch": 0.44104395137294905, "grad_norm": 0.0, - "learning_rate": 1.235352928266821e-05, - "loss": 0.953, + "learning_rate": 1.2372586983460755e-05, + "loss": 0.7808, "step": 15564 }, { - "epoch": 0.4416855845629966, + "epoch": 0.44107228881521154, "grad_norm": 0.0, - "learning_rate": 1.2352636011586915e-05, - "loss": 0.9109, + "learning_rate": 1.23716953902967e-05, + "loss": 0.9603, "step": 15565 }, { - "epoch": 0.4417139614074915, + "epoch": 0.441100626257474, "grad_norm": 0.0, - "learning_rate": 1.2351742720632798e-05, - "loss": 0.8955, + "learning_rate": 1.2370803777154976e-05, + "loss": 0.9685, "step": 15566 }, { - "epoch": 0.4417423382519864, + "epoch": 0.4411289636997365, "grad_norm": 0.0, - "learning_rate": 1.2350849409813403e-05, - "loss": 0.8834, + "learning_rate": 1.2369912144043092e-05, + "loss": 0.9615, "step": 15567 }, { - "epoch": 0.44177071509648125, + "epoch": 0.4411573011419989, "grad_norm": 0.0, - "learning_rate": 1.2349956079136276e-05, - "loss": 0.9488, + "learning_rate": 1.2369020490968556e-05, + "loss": 0.9595, "step": 15568 }, { - "epoch": 0.44179909194097616, + "epoch": 0.4411856385842614, "grad_norm": 0.0, - "learning_rate": 1.2349062728608967e-05, - "loss": 0.8614, + "learning_rate": 1.2368128817938883e-05, + "loss": 0.8541, "step": 15569 }, { - "epoch": 0.4418274687854711, + "epoch": 0.44121397602652385, "grad_norm": 0.0, - "learning_rate": 1.2348169358239019e-05, - "loss": 0.9249, + "learning_rate": 1.2367237124961582e-05, + "loss": 0.8663, "step": 15570 }, { - "epoch": 0.44185584562996594, + "epoch": 0.4412423134687863, "grad_norm": 0.0, - "learning_rate": 1.2347275968033978e-05, - "loss": 0.8873, + "learning_rate": 1.2366345412044161e-05, + "loss": 0.9609, "step": 15571 }, { - "epoch": 0.44188422247446085, + "epoch": 0.4412706509110488, "grad_norm": 0.0, - "learning_rate": 1.2346382558001392e-05, - "loss": 0.9171, + "learning_rate": 1.2365453679194135e-05, + "loss": 0.903, "step": 15572 }, { - "epoch": 0.4419125993189557, + "epoch": 0.4412989883533112, "grad_norm": 0.0, - "learning_rate": 1.234548912814881e-05, - "loss": 0.9082, + "learning_rate": 1.2364561926419011e-05, + "loss": 0.9102, "step": 15573 }, { - "epoch": 0.4419409761634506, + "epoch": 0.4413273257955737, "grad_norm": 0.0, - "learning_rate": 1.234459567848377e-05, - "loss": 0.9305, + "learning_rate": 1.2363670153726308e-05, + "loss": 0.9299, "step": 15574 }, { - "epoch": 0.44196935300794554, + "epoch": 0.44135566323783615, "grad_norm": 0.0, - "learning_rate": 1.234370220901383e-05, - "loss": 0.8777, + "learning_rate": 1.2362778361123535e-05, + "loss": 0.9427, "step": 15575 }, { - "epoch": 0.4419977298524404, + "epoch": 0.4413840006800986, "grad_norm": 0.0, - "learning_rate": 1.2342808719746528e-05, - "loss": 0.8447, + "learning_rate": 1.2361886548618198e-05, + "loss": 0.9537, "step": 15576 }, { - "epoch": 0.4420261066969353, + "epoch": 0.4414123381223611, "grad_norm": 0.0, - "learning_rate": 1.2341915210689419e-05, - "loss": 0.8388, + "learning_rate": 1.2360994716217819e-05, + "loss": 0.8909, "step": 15577 }, { - "epoch": 0.4420544835414302, + "epoch": 0.4414406755646235, "grad_norm": 0.0, - "learning_rate": 1.2341021681850045e-05, - "loss": 0.8862, + "learning_rate": 1.2360102863929902e-05, + "loss": 0.9949, "step": 15578 }, { - "epoch": 0.4420828603859251, + "epoch": 0.441469013006886, "grad_norm": 0.0, - "learning_rate": 1.2340128133235956e-05, - "loss": 0.9109, + "learning_rate": 1.2359210991761958e-05, + "loss": 0.9533, "step": 15579 }, { - "epoch": 0.44211123723041995, + "epoch": 0.44149735044914845, "grad_norm": 0.0, - "learning_rate": 1.23392345648547e-05, - "loss": 0.8468, + "learning_rate": 1.2358319099721508e-05, + "loss": 0.9448, "step": 15580 }, { - "epoch": 0.44213961407491487, + "epoch": 0.44152568789141095, "grad_norm": 0.0, - "learning_rate": 1.2338340976713823e-05, - "loss": 0.9785, + "learning_rate": 1.235742718781606e-05, + "loss": 0.9516, "step": 15581 }, { - "epoch": 0.4421679909194098, + "epoch": 0.4415540253336734, "grad_norm": 0.0, - "learning_rate": 1.2337447368820876e-05, - "loss": 0.847, + "learning_rate": 1.2356535256053129e-05, + "loss": 0.845, "step": 15582 }, { - "epoch": 0.44219636776390464, + "epoch": 0.4415823627759358, "grad_norm": 0.0, - "learning_rate": 1.2336553741183408e-05, - "loss": 0.9478, + "learning_rate": 1.2355643304440223e-05, + "loss": 0.7995, "step": 15583 }, { - "epoch": 0.44222474460839956, + "epoch": 0.4416107002181983, "grad_norm": 0.0, - "learning_rate": 1.2335660093808962e-05, - "loss": 0.9481, + "learning_rate": 1.2354751332984862e-05, + "loss": 0.8688, "step": 15584 }, { - "epoch": 0.4422531214528944, + "epoch": 0.44163903766046075, "grad_norm": 0.0, - "learning_rate": 1.233476642670509e-05, - "loss": 0.9306, + "learning_rate": 1.2353859341694555e-05, + "loss": 0.9426, "step": 15585 }, { - "epoch": 0.44228149829738933, + "epoch": 0.44166737510272325, "grad_norm": 0.0, - "learning_rate": 1.2333872739879343e-05, - "loss": 0.892, + "learning_rate": 1.2352967330576813e-05, + "loss": 1.0575, "step": 15586 }, { - "epoch": 0.44230987514188425, + "epoch": 0.4416957125449857, "grad_norm": 0.0, - "learning_rate": 1.2332979033339267e-05, - "loss": 1.0011, + "learning_rate": 1.2352075299639157e-05, + "loss": 0.9341, "step": 15587 }, { - "epoch": 0.4423382519863791, + "epoch": 0.4417240499872481, "grad_norm": 0.0, - "learning_rate": 1.2332085307092412e-05, - "loss": 0.9309, + "learning_rate": 1.2351183248889098e-05, + "loss": 0.8394, "step": 15588 }, { - "epoch": 0.442366628830874, + "epoch": 0.4417523874295106, "grad_norm": 0.0, - "learning_rate": 1.2331191561146329e-05, - "loss": 0.8353, + "learning_rate": 1.2350291178334145e-05, + "loss": 0.9972, "step": 15589 }, { - "epoch": 0.4423950056753689, + "epoch": 0.44178072487177306, "grad_norm": 0.0, - "learning_rate": 1.2330297795508566e-05, - "loss": 0.8192, + "learning_rate": 1.2349399087981823e-05, + "loss": 0.9941, "step": 15590 }, { - "epoch": 0.4424233825198638, + "epoch": 0.44180906231403555, "grad_norm": 0.0, - "learning_rate": 1.2329404010186673e-05, - "loss": 1.0688, + "learning_rate": 1.2348506977839639e-05, + "loss": 0.9293, "step": 15591 }, { - "epoch": 0.4424517593643587, + "epoch": 0.441837399756298, "grad_norm": 0.0, - "learning_rate": 1.2328510205188195e-05, - "loss": 0.9189, + "learning_rate": 1.2347614847915103e-05, + "loss": 0.9936, "step": 15592 }, { - "epoch": 0.44248013620885357, + "epoch": 0.4418657371985605, "grad_norm": 0.0, - "learning_rate": 1.232761638052069e-05, - "loss": 0.9551, + "learning_rate": 1.2346722698215738e-05, + "loss": 0.9171, "step": 15593 }, { - "epoch": 0.4425085130533485, + "epoch": 0.4418940746408229, "grad_norm": 0.0, - "learning_rate": 1.2326722536191707e-05, - "loss": 0.8742, + "learning_rate": 1.2345830528749059e-05, + "loss": 0.9639, "step": 15594 }, { - "epoch": 0.44253688989784334, + "epoch": 0.44192241208308536, "grad_norm": 0.0, - "learning_rate": 1.232582867220879e-05, - "loss": 0.9067, + "learning_rate": 1.2344938339522576e-05, + "loss": 0.8658, "step": 15595 }, { - "epoch": 0.44256526674233826, + "epoch": 0.44195074952534785, "grad_norm": 0.0, - "learning_rate": 1.23249347885795e-05, - "loss": 0.934, + "learning_rate": 1.234404613054381e-05, + "loss": 0.9202, "step": 15596 }, { - "epoch": 0.4425936435868331, + "epoch": 0.4419790869676103, "grad_norm": 0.0, - "learning_rate": 1.2324040885311376e-05, - "loss": 0.9295, + "learning_rate": 1.2343153901820269e-05, + "loss": 1.0048, "step": 15597 }, { - "epoch": 0.44262202043132803, + "epoch": 0.4420074244098728, "grad_norm": 0.0, - "learning_rate": 1.2323146962411974e-05, - "loss": 0.8794, + "learning_rate": 1.2342261653359472e-05, + "loss": 0.9016, "step": 15598 }, { - "epoch": 0.44265039727582295, + "epoch": 0.4420357618521352, "grad_norm": 0.0, - "learning_rate": 1.2322253019888846e-05, - "loss": 0.8144, + "learning_rate": 1.2341369385168938e-05, + "loss": 1.0021, "step": 15599 }, { - "epoch": 0.4426787741203178, + "epoch": 0.44206409929439766, "grad_norm": 0.0, - "learning_rate": 1.2321359057749542e-05, - "loss": 0.8466, + "learning_rate": 1.2340477097256181e-05, + "loss": 0.8852, "step": 15600 }, { - "epoch": 0.4427071509648127, + "epoch": 0.44209243673666015, "grad_norm": 0.0, - "learning_rate": 1.2320465076001618e-05, - "loss": 0.9247, + "learning_rate": 1.2339584789628712e-05, + "loss": 0.8692, "step": 15601 }, { - "epoch": 0.4427355278093076, + "epoch": 0.4421207741789226, "grad_norm": 0.0, - "learning_rate": 1.2319571074652614e-05, - "loss": 0.8722, + "learning_rate": 1.2338692462294054e-05, + "loss": 0.8587, "step": 15602 }, { - "epoch": 0.4427639046538025, + "epoch": 0.4421491116211851, "grad_norm": 0.0, - "learning_rate": 1.2318677053710095e-05, - "loss": 1.0721, + "learning_rate": 1.2337800115259724e-05, + "loss": 0.9178, "step": 15603 }, { - "epoch": 0.4427922814982974, + "epoch": 0.4421774490634475, "grad_norm": 0.0, - "learning_rate": 1.2317783013181606e-05, - "loss": 0.9601, + "learning_rate": 1.2336907748533234e-05, + "loss": 0.8486, "step": 15604 }, { - "epoch": 0.4428206583427923, + "epoch": 0.44220578650571, "grad_norm": 0.0, - "learning_rate": 1.2316888953074695e-05, - "loss": 0.8698, + "learning_rate": 1.2336015362122099e-05, + "loss": 0.9595, "step": 15605 }, { - "epoch": 0.4428490351872872, + "epoch": 0.44223412394797246, "grad_norm": 0.0, - "learning_rate": 1.2315994873396923e-05, - "loss": 0.8674, + "learning_rate": 1.2335122956033838e-05, + "loss": 0.9318, "step": 15606 }, { - "epoch": 0.44287741203178205, + "epoch": 0.4422624613902349, "grad_norm": 0.0, - "learning_rate": 1.2315100774155837e-05, - "loss": 0.9676, + "learning_rate": 1.2334230530275974e-05, + "loss": 0.9226, "step": 15607 }, { - "epoch": 0.44290578887627696, + "epoch": 0.4422907988324974, "grad_norm": 0.0, - "learning_rate": 1.231420665535899e-05, - "loss": 0.9512, + "learning_rate": 1.233333808485602e-05, + "loss": 0.9371, "step": 15608 }, { - "epoch": 0.4429341657207719, + "epoch": 0.4423191362747598, "grad_norm": 0.0, - "learning_rate": 1.2313312517013936e-05, - "loss": 0.9423, + "learning_rate": 1.233244561978149e-05, + "loss": 0.8981, "step": 15609 }, { - "epoch": 0.44296254256526674, + "epoch": 0.4423474737170223, "grad_norm": 0.0, - "learning_rate": 1.231241835912823e-05, - "loss": 0.8766, + "learning_rate": 1.2331553135059904e-05, + "loss": 0.8611, "step": 15610 }, { - "epoch": 0.44299091940976165, + "epoch": 0.44237581115928476, "grad_norm": 0.0, - "learning_rate": 1.2311524181709417e-05, - "loss": 0.9698, + "learning_rate": 1.233066063069878e-05, + "loss": 1.0205, "step": 15611 }, { - "epoch": 0.4430192962542565, + "epoch": 0.4424041486015472, "grad_norm": 0.0, - "learning_rate": 1.2310629984765056e-05, - "loss": 0.9102, + "learning_rate": 1.2329768106705637e-05, + "loss": 0.9133, "step": 15612 }, { - "epoch": 0.4430476730987514, + "epoch": 0.4424324860438097, "grad_norm": 0.0, - "learning_rate": 1.2309735768302704e-05, - "loss": 0.9297, + "learning_rate": 1.2328875563087994e-05, + "loss": 0.8805, "step": 15613 }, { - "epoch": 0.4430760499432463, + "epoch": 0.44246082348607213, "grad_norm": 0.0, - "learning_rate": 1.2308841532329908e-05, - "loss": 0.8924, + "learning_rate": 1.2327982999853363e-05, + "loss": 0.9153, "step": 15614 }, { - "epoch": 0.4431044267877412, + "epoch": 0.4424891609283346, "grad_norm": 0.0, - "learning_rate": 1.230794727685422e-05, - "loss": 0.8589, + "learning_rate": 1.232709041700927e-05, + "loss": 0.9528, "step": 15615 }, { - "epoch": 0.4431328036322361, + "epoch": 0.44251749837059706, "grad_norm": 0.0, - "learning_rate": 1.2307053001883202e-05, - "loss": 1.0473, + "learning_rate": 1.2326197814563233e-05, + "loss": 0.9012, "step": 15616 }, { - "epoch": 0.443161180476731, + "epoch": 0.44254583581285956, "grad_norm": 0.0, - "learning_rate": 1.2306158707424402e-05, - "loss": 0.9079, + "learning_rate": 1.2325305192522763e-05, + "loss": 0.9059, "step": 15617 }, { - "epoch": 0.4431895573212259, + "epoch": 0.442574173255122, "grad_norm": 0.0, - "learning_rate": 1.2305264393485377e-05, - "loss": 0.9196, + "learning_rate": 1.2324412550895383e-05, + "loss": 0.918, "step": 15618 }, { - "epoch": 0.44321793416572075, + "epoch": 0.44260251069738443, "grad_norm": 0.0, - "learning_rate": 1.230437006007368e-05, - "loss": 0.8556, + "learning_rate": 1.2323519889688615e-05, + "loss": 0.8588, "step": 15619 }, { - "epoch": 0.44324631101021567, + "epoch": 0.4426308481396469, "grad_norm": 0.0, - "learning_rate": 1.2303475707196866e-05, - "loss": 0.9369, + "learning_rate": 1.2322627208909979e-05, + "loss": 0.9101, "step": 15620 }, { - "epoch": 0.4432746878547106, + "epoch": 0.44265918558190936, "grad_norm": 0.0, - "learning_rate": 1.2302581334862486e-05, - "loss": 1.0637, + "learning_rate": 1.232173450856699e-05, + "loss": 0.901, "step": 15621 }, { - "epoch": 0.44330306469920544, + "epoch": 0.44268752302417186, "grad_norm": 0.0, - "learning_rate": 1.2301686943078099e-05, - "loss": 0.7954, + "learning_rate": 1.2320841788667166e-05, + "loss": 0.8354, "step": 15622 }, { - "epoch": 0.44333144154370036, + "epoch": 0.4427158604664343, "grad_norm": 0.0, - "learning_rate": 1.230079253185126e-05, - "loss": 0.9182, + "learning_rate": 1.2319949049218031e-05, + "loss": 0.7143, "step": 15623 }, { - "epoch": 0.4433598183881952, + "epoch": 0.44274419790869673, "grad_norm": 0.0, - "learning_rate": 1.2299898101189524e-05, - "loss": 0.849, + "learning_rate": 1.2319056290227106e-05, + "loss": 0.8059, "step": 15624 }, { - "epoch": 0.44338819523269013, + "epoch": 0.44277253535095923, "grad_norm": 0.0, - "learning_rate": 1.2299003651100442e-05, - "loss": 0.8501, + "learning_rate": 1.2318163511701906e-05, + "loss": 1.0715, "step": 15625 }, { - "epoch": 0.443416572077185, + "epoch": 0.44280087279322167, "grad_norm": 0.0, - "learning_rate": 1.2298109181591578e-05, - "loss": 0.8031, + "learning_rate": 1.2317270713649955e-05, + "loss": 0.8682, "step": 15626 }, { - "epoch": 0.4434449489216799, + "epoch": 0.44282921023548416, "grad_norm": 0.0, - "learning_rate": 1.2297214692670481e-05, - "loss": 0.9244, + "learning_rate": 1.2316377896078772e-05, + "loss": 0.8197, "step": 15627 }, { - "epoch": 0.4434733257661748, + "epoch": 0.4428575476777466, "grad_norm": 0.0, - "learning_rate": 1.2296320184344704e-05, - "loss": 0.8854, + "learning_rate": 1.231548505899588e-05, + "loss": 1.0112, "step": 15628 }, { - "epoch": 0.4435017026106697, + "epoch": 0.4428858851200091, "grad_norm": 0.0, - "learning_rate": 1.2295425656621812e-05, - "loss": 0.9232, + "learning_rate": 1.2314592202408798e-05, + "loss": 0.9539, "step": 15629 }, { - "epoch": 0.4435300794551646, + "epoch": 0.44291422256227153, "grad_norm": 0.0, - "learning_rate": 1.2294531109509351e-05, - "loss": 0.8321, + "learning_rate": 1.2313699326325041e-05, + "loss": 0.9926, "step": 15630 }, { - "epoch": 0.44355845629965945, + "epoch": 0.44294256000453397, "grad_norm": 0.0, - "learning_rate": 1.2293636543014888e-05, - "loss": 0.8937, + "learning_rate": 1.231280643075214e-05, + "loss": 0.9855, "step": 15631 }, { - "epoch": 0.44358683314415437, + "epoch": 0.44297089744679646, "grad_norm": 0.0, - "learning_rate": 1.229274195714597e-05, - "loss": 0.9508, + "learning_rate": 1.231191351569761e-05, + "loss": 0.9853, "step": 15632 }, { - "epoch": 0.4436152099886493, + "epoch": 0.4429992348890589, "grad_norm": 0.0, - "learning_rate": 1.229184735191016e-05, - "loss": 0.9843, + "learning_rate": 1.2311020581168972e-05, + "loss": 0.7982, "step": 15633 }, { - "epoch": 0.44364358683314414, + "epoch": 0.4430275723313214, "grad_norm": 0.0, - "learning_rate": 1.2290952727315013e-05, - "loss": 0.8568, + "learning_rate": 1.2310127627173753e-05, + "loss": 0.8868, "step": 15634 }, { - "epoch": 0.44367196367763906, + "epoch": 0.44305590977358383, "grad_norm": 0.0, - "learning_rate": 1.2290058083368084e-05, - "loss": 0.8783, + "learning_rate": 1.230923465371947e-05, + "loss": 0.8631, "step": 15635 }, { - "epoch": 0.4437003405221339, + "epoch": 0.44308424721584627, "grad_norm": 0.0, - "learning_rate": 1.228916342007693e-05, - "loss": 0.8949, + "learning_rate": 1.2308341660813647e-05, + "loss": 1.0134, "step": 15636 }, { - "epoch": 0.44372871736662883, + "epoch": 0.44311258465810877, "grad_norm": 0.0, - "learning_rate": 1.2288268737449111e-05, - "loss": 1.0045, + "learning_rate": 1.2307448648463804e-05, + "loss": 0.8622, "step": 15637 }, { - "epoch": 0.44375709421112375, + "epoch": 0.4431409221003712, "grad_norm": 0.0, - "learning_rate": 1.2287374035492184e-05, - "loss": 0.9272, + "learning_rate": 1.2306555616677462e-05, + "loss": 0.9725, "step": 15638 }, { - "epoch": 0.4437854710556186, + "epoch": 0.4431692595426337, "grad_norm": 0.0, - "learning_rate": 1.2286479314213703e-05, - "loss": 0.9014, + "learning_rate": 1.2305662565462146e-05, + "loss": 1.0006, "step": 15639 }, { - "epoch": 0.4438138479001135, + "epoch": 0.44319759698489614, "grad_norm": 0.0, - "learning_rate": 1.2285584573621231e-05, - "loss": 0.9289, + "learning_rate": 1.2304769494825382e-05, + "loss": 0.9823, "step": 15640 }, { - "epoch": 0.4438422247446084, + "epoch": 0.44322593442715863, "grad_norm": 0.0, - "learning_rate": 1.2284689813722326e-05, - "loss": 0.9151, + "learning_rate": 1.2303876404774686e-05, + "loss": 0.9058, "step": 15641 }, { - "epoch": 0.4438706015891033, + "epoch": 0.44325427186942107, "grad_norm": 0.0, - "learning_rate": 1.228379503452454e-05, - "loss": 0.8524, + "learning_rate": 1.2302983295317586e-05, + "loss": 0.9502, "step": 15642 }, { - "epoch": 0.44389897843359816, + "epoch": 0.4432826093116835, "grad_norm": 0.0, - "learning_rate": 1.2282900236035435e-05, - "loss": 0.9347, + "learning_rate": 1.2302090166461598e-05, + "loss": 0.9603, "step": 15643 }, { - "epoch": 0.44392735527809307, + "epoch": 0.443310946753946, "grad_norm": 0.0, - "learning_rate": 1.228200541826257e-05, - "loss": 0.8495, + "learning_rate": 1.2301197018214251e-05, + "loss": 0.8648, "step": 15644 }, { - "epoch": 0.443955732122588, + "epoch": 0.44333928419620844, "grad_norm": 0.0, - "learning_rate": 1.22811105812135e-05, - "loss": 0.9032, + "learning_rate": 1.2300303850583069e-05, + "loss": 0.8779, "step": 15645 }, { - "epoch": 0.44398410896708285, + "epoch": 0.44336762163847093, "grad_norm": 0.0, - "learning_rate": 1.2280215724895786e-05, - "loss": 0.7262, + "learning_rate": 1.2299410663575572e-05, + "loss": 0.9422, "step": 15646 }, { - "epoch": 0.44401248581157776, + "epoch": 0.44339595908073337, "grad_norm": 0.0, - "learning_rate": 1.2279320849316991e-05, - "loss": 1.0054, + "learning_rate": 1.2298517457199283e-05, + "loss": 0.8512, "step": 15647 }, { - "epoch": 0.4440408626560726, + "epoch": 0.4434242965229958, "grad_norm": 0.0, - "learning_rate": 1.227842595448467e-05, - "loss": 0.9454, + "learning_rate": 1.2297624231461734e-05, + "loss": 0.9563, "step": 15648 }, { - "epoch": 0.44406923950056754, + "epoch": 0.4434526339652583, "grad_norm": 0.0, - "learning_rate": 1.2277531040406379e-05, - "loss": 0.7999, + "learning_rate": 1.2296730986370437e-05, + "loss": 0.8526, "step": 15649 }, { - "epoch": 0.44409761634506245, + "epoch": 0.44348097140752074, "grad_norm": 0.0, - "learning_rate": 1.2276636107089683e-05, - "loss": 0.8718, + "learning_rate": 1.2295837721932925e-05, + "loss": 0.9628, "step": 15650 }, { - "epoch": 0.4441259931895573, + "epoch": 0.44350930884978323, "grad_norm": 0.0, - "learning_rate": 1.2275741154542143e-05, - "loss": 0.9178, + "learning_rate": 1.2294944438156717e-05, + "loss": 0.9486, "step": 15651 }, { - "epoch": 0.4441543700340522, + "epoch": 0.4435376462920457, "grad_norm": 0.0, - "learning_rate": 1.227484618277131e-05, - "loss": 0.8962, + "learning_rate": 1.2294051135049343e-05, + "loss": 0.8751, "step": 15652 }, { - "epoch": 0.4441827468785471, + "epoch": 0.44356598373430817, "grad_norm": 0.0, - "learning_rate": 1.2273951191784748e-05, - "loss": 0.8963, + "learning_rate": 1.2293157812618324e-05, + "loss": 0.9773, "step": 15653 }, { - "epoch": 0.444211123723042, + "epoch": 0.4435943211765706, "grad_norm": 0.0, - "learning_rate": 1.2273056181590022e-05, - "loss": 0.9153, + "learning_rate": 1.2292264470871183e-05, + "loss": 0.9191, "step": 15654 }, { - "epoch": 0.4442395005675369, + "epoch": 0.44362265861883304, "grad_norm": 0.0, - "learning_rate": 1.2272161152194688e-05, - "loss": 0.9135, + "learning_rate": 1.2291371109815446e-05, + "loss": 0.8879, "step": 15655 }, { - "epoch": 0.4442678774120318, + "epoch": 0.44365099606109554, "grad_norm": 0.0, - "learning_rate": 1.2271266103606305e-05, - "loss": 1.0407, + "learning_rate": 1.2290477729458642e-05, + "loss": 0.9308, "step": 15656 }, { - "epoch": 0.4442962542565267, + "epoch": 0.443679333503358, "grad_norm": 0.0, - "learning_rate": 1.2270371035832436e-05, - "loss": 0.8724, + "learning_rate": 1.2289584329808293e-05, + "loss": 0.9099, "step": 15657 }, { - "epoch": 0.44432463110102155, + "epoch": 0.44370767094562047, "grad_norm": 0.0, - "learning_rate": 1.2269475948880643e-05, - "loss": 0.9755, + "learning_rate": 1.2288690910871926e-05, + "loss": 0.796, "step": 15658 }, { - "epoch": 0.44435300794551646, + "epoch": 0.4437360083878829, "grad_norm": 0.0, - "learning_rate": 1.226858084275848e-05, - "loss": 0.8762, + "learning_rate": 1.2287797472657064e-05, + "loss": 0.8855, "step": 15659 }, { - "epoch": 0.4443813847900113, + "epoch": 0.44376434583014535, "grad_norm": 0.0, - "learning_rate": 1.2267685717473513e-05, - "loss": 0.9146, + "learning_rate": 1.2286904015171234e-05, + "loss": 0.8052, "step": 15660 }, { - "epoch": 0.44440976163450624, + "epoch": 0.44379268327240784, "grad_norm": 0.0, - "learning_rate": 1.2266790573033307e-05, - "loss": 0.8976, + "learning_rate": 1.2286010538421964e-05, + "loss": 0.9815, "step": 15661 }, { - "epoch": 0.44443813847900115, + "epoch": 0.4438210207146703, "grad_norm": 0.0, - "learning_rate": 1.2265895409445413e-05, - "loss": 0.9991, + "learning_rate": 1.228511704241678e-05, + "loss": 0.9589, "step": 15662 }, { - "epoch": 0.444466515323496, + "epoch": 0.44384935815693277, "grad_norm": 0.0, - "learning_rate": 1.2265000226717405e-05, - "loss": 0.8445, + "learning_rate": 1.22842235271632e-05, + "loss": 0.9131, "step": 15663 }, { - "epoch": 0.44449489216799093, + "epoch": 0.4438776955991952, "grad_norm": 0.0, - "learning_rate": 1.2264105024856836e-05, - "loss": 0.8844, + "learning_rate": 1.2283329992668762e-05, + "loss": 0.881, "step": 15664 }, { - "epoch": 0.4445232690124858, + "epoch": 0.4439060330414577, "grad_norm": 0.0, - "learning_rate": 1.2263209803871271e-05, - "loss": 0.8916, + "learning_rate": 1.2282436438940987e-05, + "loss": 0.8492, "step": 15665 }, { - "epoch": 0.4445516458569807, + "epoch": 0.44393437048372014, "grad_norm": 0.0, - "learning_rate": 1.226231456376827e-05, - "loss": 0.8545, + "learning_rate": 1.2281542865987404e-05, + "loss": 0.8585, "step": 15666 }, { - "epoch": 0.4445800227014756, + "epoch": 0.4439627079259826, "grad_norm": 0.0, - "learning_rate": 1.2261419304555395e-05, - "loss": 0.892, + "learning_rate": 1.2280649273815536e-05, + "loss": 0.9499, "step": 15667 }, { - "epoch": 0.4446083995459705, + "epoch": 0.4439910453682451, "grad_norm": 0.0, - "learning_rate": 1.226052402624021e-05, - "loss": 0.8096, + "learning_rate": 1.227975566243291e-05, + "loss": 0.9621, "step": 15668 }, { - "epoch": 0.4446367763904654, + "epoch": 0.4440193828105075, "grad_norm": 0.0, - "learning_rate": 1.2259628728830277e-05, - "loss": 0.9156, + "learning_rate": 1.2278862031847061e-05, + "loss": 0.7985, "step": 15669 }, { - "epoch": 0.44466515323496025, + "epoch": 0.44404772025277, "grad_norm": 0.0, - "learning_rate": 1.2258733412333159e-05, - "loss": 0.86, + "learning_rate": 1.2277968382065506e-05, + "loss": 0.7682, "step": 15670 }, { - "epoch": 0.44469353007945517, + "epoch": 0.44407605769503244, "grad_norm": 0.0, - "learning_rate": 1.2257838076756418e-05, - "loss": 0.9475, + "learning_rate": 1.227707471309578e-05, + "loss": 0.9326, "step": 15671 }, { - "epoch": 0.4447219069239501, + "epoch": 0.4441043951372949, "grad_norm": 0.0, - "learning_rate": 1.2256942722107618e-05, - "loss": 0.9418, + "learning_rate": 1.2276181024945406e-05, + "loss": 0.8987, "step": 15672 }, { - "epoch": 0.44475028376844494, + "epoch": 0.4441327325795574, "grad_norm": 0.0, - "learning_rate": 1.2256047348394322e-05, - "loss": 0.9155, + "learning_rate": 1.2275287317621916e-05, + "loss": 1.0, "step": 15673 }, { - "epoch": 0.44477866061293986, + "epoch": 0.4441610700218198, "grad_norm": 0.0, - "learning_rate": 1.2255151955624092e-05, - "loss": 0.8687, + "learning_rate": 1.2274393591132837e-05, + "loss": 0.9014, "step": 15674 }, { - "epoch": 0.4448070374574347, + "epoch": 0.4441894074640823, "grad_norm": 0.0, - "learning_rate": 1.225425654380449e-05, - "loss": 0.9402, + "learning_rate": 1.2273499845485695e-05, + "loss": 0.9007, "step": 15675 }, { - "epoch": 0.44483541430192963, + "epoch": 0.44421774490634475, "grad_norm": 0.0, - "learning_rate": 1.2253361112943084e-05, - "loss": 0.883, + "learning_rate": 1.2272606080688016e-05, + "loss": 0.9668, "step": 15676 }, { - "epoch": 0.4448637911464245, + "epoch": 0.44424608234860724, "grad_norm": 0.0, - "learning_rate": 1.2252465663047434e-05, - "loss": 0.7651, + "learning_rate": 1.2271712296747335e-05, + "loss": 0.9427, "step": 15677 }, { - "epoch": 0.4448921679909194, + "epoch": 0.4442744197908697, "grad_norm": 0.0, - "learning_rate": 1.2251570194125106e-05, - "loss": 0.8624, + "learning_rate": 1.227081849367118e-05, + "loss": 0.9031, "step": 15678 }, { - "epoch": 0.4449205448354143, + "epoch": 0.4443027572331321, "grad_norm": 0.0, - "learning_rate": 1.2250674706183664e-05, - "loss": 0.9186, + "learning_rate": 1.2269924671467075e-05, + "loss": 0.8925, "step": 15679 }, { - "epoch": 0.4449489216799092, + "epoch": 0.4443310946753946, "grad_norm": 0.0, - "learning_rate": 1.2249779199230672e-05, - "loss": 0.9581, + "learning_rate": 1.2269030830142552e-05, + "loss": 0.911, "step": 15680 }, { - "epoch": 0.4449772985244041, + "epoch": 0.44435943211765705, "grad_norm": 0.0, - "learning_rate": 1.2248883673273693e-05, - "loss": 0.783, + "learning_rate": 1.226813696970514e-05, + "loss": 0.9235, "step": 15681 }, { - "epoch": 0.44500567536889896, + "epoch": 0.44438776955991954, "grad_norm": 0.0, - "learning_rate": 1.2247988128320292e-05, - "loss": 0.7298, + "learning_rate": 1.2267243090162369e-05, + "loss": 0.9129, "step": 15682 }, { - "epoch": 0.44503405221339387, + "epoch": 0.444416107002182, "grad_norm": 0.0, - "learning_rate": 1.2247092564378032e-05, - "loss": 0.9979, + "learning_rate": 1.2266349191521765e-05, + "loss": 0.9571, "step": 15683 }, { - "epoch": 0.4450624290578888, + "epoch": 0.4444444444444444, "grad_norm": 0.0, - "learning_rate": 1.2246196981454483e-05, - "loss": 0.8999, + "learning_rate": 1.226545527379086e-05, + "loss": 0.8977, "step": 15684 }, { - "epoch": 0.44509080590238365, + "epoch": 0.4444727818867069, "grad_norm": 0.0, - "learning_rate": 1.2245301379557209e-05, - "loss": 0.9511, + "learning_rate": 1.2264561336977185e-05, + "loss": 0.8478, "step": 15685 }, { - "epoch": 0.44511918274687856, + "epoch": 0.44450111932896935, "grad_norm": 0.0, - "learning_rate": 1.224440575869377e-05, - "loss": 0.9029, + "learning_rate": 1.2263667381088272e-05, + "loss": 0.8179, "step": 15686 }, { - "epoch": 0.4451475595913734, + "epoch": 0.44452945677123185, "grad_norm": 0.0, - "learning_rate": 1.2243510118871736e-05, - "loss": 0.9384, + "learning_rate": 1.2262773406131644e-05, + "loss": 0.829, "step": 15687 }, { - "epoch": 0.44517593643586834, + "epoch": 0.4445577942134943, "grad_norm": 0.0, - "learning_rate": 1.2242614460098672e-05, - "loss": 0.868, + "learning_rate": 1.2261879412114837e-05, + "loss": 0.8594, "step": 15688 }, { - "epoch": 0.44520431328036325, + "epoch": 0.4445861316557568, "grad_norm": 0.0, - "learning_rate": 1.2241718782382139e-05, - "loss": 0.8198, + "learning_rate": 1.2260985399045379e-05, + "loss": 0.9618, "step": 15689 }, { - "epoch": 0.4452326901248581, + "epoch": 0.4446144690980192, "grad_norm": 0.0, - "learning_rate": 1.2240823085729712e-05, - "loss": 0.865, + "learning_rate": 1.2260091366930802e-05, + "loss": 0.8968, "step": 15690 }, { - "epoch": 0.445261066969353, + "epoch": 0.44464280654028165, "grad_norm": 0.0, - "learning_rate": 1.2239927370148948e-05, - "loss": 0.8866, + "learning_rate": 1.2259197315778638e-05, + "loss": 0.9249, "step": 15691 }, { - "epoch": 0.4452894438138479, + "epoch": 0.44467114398254415, "grad_norm": 0.0, - "learning_rate": 1.2239031635647417e-05, - "loss": 0.9274, + "learning_rate": 1.2258303245596413e-05, + "loss": 0.9498, "step": 15692 }, { - "epoch": 0.4453178206583428, + "epoch": 0.4446994814248066, "grad_norm": 0.0, - "learning_rate": 1.2238135882232687e-05, - "loss": 0.903, + "learning_rate": 1.2257409156391662e-05, + "loss": 0.7866, "step": 15693 }, { - "epoch": 0.44534619750283766, + "epoch": 0.4447278188670691, "grad_norm": 0.0, - "learning_rate": 1.2237240109912323e-05, - "loss": 0.9503, + "learning_rate": 1.2256515048171917e-05, + "loss": 0.8571, "step": 15694 }, { - "epoch": 0.4453745743473326, + "epoch": 0.4447561563093315, "grad_norm": 0.0, - "learning_rate": 1.2236344318693889e-05, - "loss": 0.9361, + "learning_rate": 1.2255620920944708e-05, + "loss": 0.9619, "step": 15695 }, { - "epoch": 0.4454029511918275, + "epoch": 0.44478449375159396, "grad_norm": 0.0, - "learning_rate": 1.2235448508584956e-05, - "loss": 1.0135, + "learning_rate": 1.2254726774717564e-05, + "loss": 0.9433, "step": 15696 }, { - "epoch": 0.44543132803632235, + "epoch": 0.44481283119385645, "grad_norm": 0.0, - "learning_rate": 1.2234552679593089e-05, - "loss": 1.019, + "learning_rate": 1.2253832609498018e-05, + "loss": 1.0363, "step": 15697 }, { - "epoch": 0.44545970488081726, + "epoch": 0.4448411686361189, "grad_norm": 0.0, - "learning_rate": 1.2233656831725853e-05, - "loss": 1.0238, + "learning_rate": 1.2252938425293606e-05, + "loss": 0.8735, "step": 15698 }, { - "epoch": 0.4454880817253121, + "epoch": 0.4448695060783814, "grad_norm": 0.0, - "learning_rate": 1.2232760964990818e-05, - "loss": 0.9933, + "learning_rate": 1.2252044222111859e-05, + "loss": 0.9529, "step": 15699 }, { - "epoch": 0.44551645856980704, + "epoch": 0.4448978435206438, "grad_norm": 0.0, - "learning_rate": 1.2231865079395549e-05, - "loss": 0.9844, + "learning_rate": 1.2251149999960303e-05, + "loss": 0.908, "step": 15700 }, { - "epoch": 0.44554483541430195, + "epoch": 0.4449261809629063, "grad_norm": 0.0, - "learning_rate": 1.2230969174947619e-05, - "loss": 0.8647, + "learning_rate": 1.2250255758846477e-05, + "loss": 0.9645, "step": 15701 }, { - "epoch": 0.4455732122587968, + "epoch": 0.44495451840516875, "grad_norm": 0.0, - "learning_rate": 1.2230073251654591e-05, - "loss": 0.9589, + "learning_rate": 1.2249361498777909e-05, + "loss": 0.8447, "step": 15702 }, { - "epoch": 0.44560158910329173, + "epoch": 0.4449828558474312, "grad_norm": 0.0, - "learning_rate": 1.2229177309524032e-05, - "loss": 0.9389, + "learning_rate": 1.2248467219762135e-05, + "loss": 0.9191, "step": 15703 }, { - "epoch": 0.4456299659477866, + "epoch": 0.4450111932896937, "grad_norm": 0.0, - "learning_rate": 1.2228281348563513e-05, - "loss": 0.8446, + "learning_rate": 1.2247572921806688e-05, + "loss": 0.9227, "step": 15704 }, { - "epoch": 0.4456583427922815, + "epoch": 0.4450395307319561, "grad_norm": 0.0, - "learning_rate": 1.2227385368780605e-05, - "loss": 0.8165, + "learning_rate": 1.2246678604919095e-05, + "loss": 0.9031, "step": 15705 }, { - "epoch": 0.44568671963677636, + "epoch": 0.4450678681742186, "grad_norm": 0.0, - "learning_rate": 1.2226489370182865e-05, - "loss": 0.8632, + "learning_rate": 1.2245784269106897e-05, + "loss": 0.981, "step": 15706 }, { - "epoch": 0.4457150964812713, + "epoch": 0.44509620561648106, "grad_norm": 0.0, - "learning_rate": 1.2225593352777873e-05, - "loss": 0.8979, + "learning_rate": 1.2244889914377627e-05, + "loss": 0.7576, "step": 15707 }, { - "epoch": 0.4457434733257662, + "epoch": 0.4451245430587435, "grad_norm": 0.0, - "learning_rate": 1.2224697316573197e-05, - "loss": 0.8784, + "learning_rate": 1.2243995540738813e-05, + "loss": 0.8462, "step": 15708 }, { - "epoch": 0.44577185017026105, + "epoch": 0.445152880501006, "grad_norm": 0.0, - "learning_rate": 1.22238012615764e-05, - "loss": 0.9025, + "learning_rate": 1.2243101148197991e-05, + "loss": 0.9057, "step": 15709 }, { - "epoch": 0.44580022701475597, + "epoch": 0.4451812179432684, "grad_norm": 0.0, - "learning_rate": 1.2222905187795053e-05, - "loss": 0.9718, + "learning_rate": 1.2242206736762694e-05, + "loss": 0.7917, "step": 15710 }, { - "epoch": 0.4458286038592508, + "epoch": 0.4452095553855309, "grad_norm": 0.0, - "learning_rate": 1.222200909523673e-05, - "loss": 0.962, + "learning_rate": 1.2241312306440458e-05, + "loss": 0.8229, "step": 15711 }, { - "epoch": 0.44585698070374574, + "epoch": 0.44523789282779336, "grad_norm": 0.0, - "learning_rate": 1.2221112983908994e-05, - "loss": 0.8921, + "learning_rate": 1.2240417857238817e-05, + "loss": 0.9306, "step": 15712 }, { - "epoch": 0.44588535754824066, + "epoch": 0.44526623027005585, "grad_norm": 0.0, - "learning_rate": 1.2220216853819413e-05, - "loss": 0.9449, + "learning_rate": 1.2239523389165301e-05, + "loss": 0.8898, "step": 15713 }, { - "epoch": 0.4459137343927355, + "epoch": 0.4452945677123183, "grad_norm": 0.0, - "learning_rate": 1.2219320704975564e-05, - "loss": 0.9909, + "learning_rate": 1.2238628902227454e-05, + "loss": 0.951, "step": 15714 }, { - "epoch": 0.44594211123723043, + "epoch": 0.44532290515458073, "grad_norm": 0.0, - "learning_rate": 1.2218424537385012e-05, - "loss": 0.9008, + "learning_rate": 1.2237734396432801e-05, + "loss": 0.8866, "step": 15715 }, { - "epoch": 0.4459704880817253, + "epoch": 0.4453512425968432, "grad_norm": 0.0, - "learning_rate": 1.2217528351055329e-05, - "loss": 0.8514, + "learning_rate": 1.2236839871788879e-05, + "loss": 0.955, "step": 15716 }, { - "epoch": 0.4459988649262202, + "epoch": 0.44537958003910566, "grad_norm": 0.0, - "learning_rate": 1.2216632145994086e-05, - "loss": 0.7814, + "learning_rate": 1.2235945328303225e-05, + "loss": 0.9425, "step": 15717 }, { - "epoch": 0.4460272417707151, + "epoch": 0.44540791748136815, "grad_norm": 0.0, - "learning_rate": 1.2215735922208849e-05, - "loss": 1.0045, + "learning_rate": 1.2235050765983374e-05, + "loss": 0.93, "step": 15718 }, { - "epoch": 0.44605561861521, + "epoch": 0.4454362549236306, "grad_norm": 0.0, - "learning_rate": 1.2214839679707193e-05, - "loss": 0.9492, + "learning_rate": 1.223415618483686e-05, + "loss": 0.9428, "step": 15719 }, { - "epoch": 0.4460839954597049, + "epoch": 0.44546459236589303, "grad_norm": 0.0, - "learning_rate": 1.2213943418496685e-05, - "loss": 0.9537, + "learning_rate": 1.223326158487122e-05, + "loss": 0.8643, "step": 15720 }, { - "epoch": 0.44611237230419976, + "epoch": 0.4454929298081555, "grad_norm": 0.0, - "learning_rate": 1.2213047138584899e-05, - "loss": 1.0387, + "learning_rate": 1.2232366966093989e-05, + "loss": 0.9875, "step": 15721 }, { - "epoch": 0.44614074914869467, + "epoch": 0.44552126725041796, "grad_norm": 0.0, - "learning_rate": 1.2212150839979402e-05, - "loss": 0.9313, + "learning_rate": 1.2231472328512699e-05, + "loss": 0.9602, "step": 15722 }, { - "epoch": 0.44616912599318953, + "epoch": 0.44554960469268046, "grad_norm": 0.0, - "learning_rate": 1.2211254522687767e-05, - "loss": 0.8848, + "learning_rate": 1.2230577672134889e-05, + "loss": 0.8322, "step": 15723 }, { - "epoch": 0.44619750283768445, + "epoch": 0.4455779421349429, "grad_norm": 0.0, - "learning_rate": 1.2210358186717569e-05, - "loss": 0.8852, + "learning_rate": 1.2229682996968097e-05, + "loss": 0.8588, "step": 15724 }, { - "epoch": 0.44622587968217936, + "epoch": 0.4456062795772054, "grad_norm": 0.0, - "learning_rate": 1.2209461832076375e-05, - "loss": 0.8804, + "learning_rate": 1.2228788303019858e-05, + "loss": 0.867, "step": 15725 }, { - "epoch": 0.4462542565266742, + "epoch": 0.4456346170194678, "grad_norm": 0.0, - "learning_rate": 1.2208565458771755e-05, - "loss": 0.8668, + "learning_rate": 1.2227893590297706e-05, + "loss": 0.8819, "step": 15726 }, { - "epoch": 0.44628263337116914, + "epoch": 0.44566295446173027, "grad_norm": 0.0, - "learning_rate": 1.2207669066811288e-05, - "loss": 0.9496, + "learning_rate": 1.2226998858809178e-05, + "loss": 0.8426, "step": 15727 }, { - "epoch": 0.446311010215664, + "epoch": 0.44569129190399276, "grad_norm": 0.0, - "learning_rate": 1.220677265620254e-05, - "loss": 0.8396, + "learning_rate": 1.2226104108561813e-05, + "loss": 0.8733, "step": 15728 }, { - "epoch": 0.4463393870601589, + "epoch": 0.4457196293462552, "grad_norm": 0.0, - "learning_rate": 1.220587622695308e-05, - "loss": 0.8502, + "learning_rate": 1.2225209339563144e-05, + "loss": 0.937, "step": 15729 }, { - "epoch": 0.4463677639046538, + "epoch": 0.4457479667885177, "grad_norm": 0.0, - "learning_rate": 1.2204979779070485e-05, - "loss": 0.9158, + "learning_rate": 1.2224314551820712e-05, + "loss": 0.9988, "step": 15730 }, { - "epoch": 0.4463961407491487, + "epoch": 0.44577630423078013, "grad_norm": 0.0, - "learning_rate": 1.2204083312562331e-05, - "loss": 0.8931, + "learning_rate": 1.2223419745342055e-05, + "loss": 0.7993, "step": 15731 }, { - "epoch": 0.4464245175936436, + "epoch": 0.44580464167304257, "grad_norm": 0.0, - "learning_rate": 1.2203186827436185e-05, - "loss": 0.9678, + "learning_rate": 1.2222524920134707e-05, + "loss": 0.8732, "step": 15732 }, { - "epoch": 0.44645289443813846, + "epoch": 0.44583297911530506, "grad_norm": 0.0, - "learning_rate": 1.2202290323699617e-05, - "loss": 0.8837, + "learning_rate": 1.2221630076206206e-05, + "loss": 0.8729, "step": 15733 }, { - "epoch": 0.4464812712826334, + "epoch": 0.4458613165575675, "grad_norm": 0.0, - "learning_rate": 1.2201393801360208e-05, - "loss": 0.867, + "learning_rate": 1.2220735213564088e-05, + "loss": 0.9279, "step": 15734 }, { - "epoch": 0.4465096481271283, + "epoch": 0.44588965399983, "grad_norm": 0.0, - "learning_rate": 1.2200497260425526e-05, - "loss": 0.8834, + "learning_rate": 1.2219840332215894e-05, + "loss": 0.85, "step": 15735 }, { - "epoch": 0.44653802497162315, + "epoch": 0.44591799144209243, "grad_norm": 0.0, - "learning_rate": 1.2199600700903143e-05, - "loss": 1.0549, + "learning_rate": 1.2218945432169158e-05, + "loss": 0.7936, "step": 15736 }, { - "epoch": 0.44656640181611806, + "epoch": 0.4459463288843549, "grad_norm": 0.0, - "learning_rate": 1.2198704122800633e-05, - "loss": 0.8408, + "learning_rate": 1.2218050513431424e-05, + "loss": 0.9229, "step": 15737 }, { - "epoch": 0.4465947786606129, + "epoch": 0.44597466632661736, "grad_norm": 0.0, - "learning_rate": 1.2197807526125573e-05, - "loss": 0.9863, + "learning_rate": 1.2217155576010225e-05, + "loss": 0.9552, "step": 15738 }, { - "epoch": 0.44662315550510784, + "epoch": 0.4460030037688798, "grad_norm": 0.0, - "learning_rate": 1.2196910910885534e-05, - "loss": 0.7618, + "learning_rate": 1.2216260619913103e-05, + "loss": 0.938, "step": 15739 }, { - "epoch": 0.4466515323496027, + "epoch": 0.4460313412111423, "grad_norm": 0.0, - "learning_rate": 1.2196014277088089e-05, - "loss": 0.9901, + "learning_rate": 1.2215365645147594e-05, + "loss": 0.8995, "step": 15740 }, { - "epoch": 0.4466799091940976, + "epoch": 0.44605967865340473, "grad_norm": 0.0, - "learning_rate": 1.2195117624740812e-05, - "loss": 0.9734, + "learning_rate": 1.2214470651721237e-05, + "loss": 0.9174, "step": 15741 }, { - "epoch": 0.44670828603859253, + "epoch": 0.44608801609566723, "grad_norm": 0.0, - "learning_rate": 1.2194220953851278e-05, - "loss": 0.8744, + "learning_rate": 1.2213575639641571e-05, + "loss": 0.8065, "step": 15742 }, { - "epoch": 0.4467366628830874, + "epoch": 0.44611635353792967, "grad_norm": 0.0, - "learning_rate": 1.2193324264427062e-05, - "loss": 0.891, + "learning_rate": 1.2212680608916134e-05, + "loss": 0.909, "step": 15743 }, { - "epoch": 0.4467650397275823, + "epoch": 0.4461446909801921, "grad_norm": 0.0, - "learning_rate": 1.2192427556475736e-05, - "loss": 0.8556, + "learning_rate": 1.2211785559552472e-05, + "loss": 0.9712, "step": 15744 }, { - "epoch": 0.44679341657207716, + "epoch": 0.4461730284224546, "grad_norm": 0.0, - "learning_rate": 1.2191530830004877e-05, - "loss": 0.8486, + "learning_rate": 1.2210890491558117e-05, + "loss": 0.8131, "step": 15745 }, { - "epoch": 0.4468217934165721, + "epoch": 0.44620136586471704, "grad_norm": 0.0, - "learning_rate": 1.2190634085022056e-05, - "loss": 0.9557, + "learning_rate": 1.2209995404940607e-05, + "loss": 0.9241, "step": 15746 }, { - "epoch": 0.446850170261067, + "epoch": 0.44622970330697953, "grad_norm": 0.0, - "learning_rate": 1.2189737321534853e-05, - "loss": 0.7736, + "learning_rate": 1.2209100299707485e-05, + "loss": 0.9439, "step": 15747 }, { - "epoch": 0.44687854710556185, + "epoch": 0.44625804074924197, "grad_norm": 0.0, - "learning_rate": 1.2188840539550841e-05, - "loss": 0.9964, + "learning_rate": 1.2208205175866295e-05, + "loss": 0.8645, "step": 15748 }, { - "epoch": 0.44690692395005677, + "epoch": 0.44628637819150446, "grad_norm": 0.0, - "learning_rate": 1.2187943739077595e-05, - "loss": 0.8366, + "learning_rate": 1.2207310033424569e-05, + "loss": 0.8073, "step": 15749 }, { - "epoch": 0.4469353007945516, + "epoch": 0.4463147156337669, "grad_norm": 0.0, - "learning_rate": 1.2187046920122686e-05, - "loss": 1.0168, + "learning_rate": 1.2206414872389853e-05, + "loss": 0.9189, "step": 15750 }, { - "epoch": 0.44696367763904654, + "epoch": 0.44634305307602934, "grad_norm": 0.0, - "learning_rate": 1.2186150082693697e-05, - "loss": 1.1106, + "learning_rate": 1.2205519692769685e-05, + "loss": 1.0338, "step": 15751 }, { - "epoch": 0.44699205448354146, + "epoch": 0.44637139051829183, "grad_norm": 0.0, - "learning_rate": 1.2185253226798198e-05, - "loss": 0.8866, + "learning_rate": 1.2204624494571607e-05, + "loss": 0.9427, "step": 15752 }, { - "epoch": 0.4470204313280363, + "epoch": 0.44639972796055427, "grad_norm": 0.0, - "learning_rate": 1.2184356352443766e-05, - "loss": 1.0148, + "learning_rate": 1.2203729277803157e-05, + "loss": 0.8365, "step": 15753 }, { - "epoch": 0.44704880817253123, + "epoch": 0.44642806540281676, "grad_norm": 0.0, - "learning_rate": 1.2183459459637981e-05, - "loss": 0.9038, + "learning_rate": 1.2202834042471874e-05, + "loss": 1.0544, "step": 15754 }, { - "epoch": 0.4470771850170261, + "epoch": 0.4464564028450792, "grad_norm": 0.0, - "learning_rate": 1.2182562548388414e-05, - "loss": 0.9202, + "learning_rate": 1.2201938788585305e-05, + "loss": 0.8485, "step": 15755 }, { - "epoch": 0.447105561861521, + "epoch": 0.44648474028734164, "grad_norm": 0.0, - "learning_rate": 1.2181665618702643e-05, - "loss": 0.8892, + "learning_rate": 1.2201043516150988e-05, + "loss": 1.0335, "step": 15756 }, { - "epoch": 0.44713393870601587, + "epoch": 0.44651307772960414, "grad_norm": 0.0, - "learning_rate": 1.2180768670588244e-05, - "loss": 0.8419, + "learning_rate": 1.2200148225176462e-05, + "loss": 0.8006, "step": 15757 }, { - "epoch": 0.4471623155505108, + "epoch": 0.4465414151718666, "grad_norm": 0.0, - "learning_rate": 1.2179871704052795e-05, - "loss": 0.8772, + "learning_rate": 1.2199252915669274e-05, + "loss": 0.8148, "step": 15758 }, { - "epoch": 0.4471906923950057, + "epoch": 0.44656975261412907, "grad_norm": 0.0, - "learning_rate": 1.2178974719103873e-05, - "loss": 0.9859, + "learning_rate": 1.2198357587636958e-05, + "loss": 0.9624, "step": 15759 }, { - "epoch": 0.44721906923950056, + "epoch": 0.4465980900563915, "grad_norm": 0.0, - "learning_rate": 1.217807771574905e-05, - "loss": 0.8382, + "learning_rate": 1.2197462241087063e-05, + "loss": 0.9929, "step": 15760 }, { - "epoch": 0.44724744608399547, + "epoch": 0.44662642749865394, "grad_norm": 0.0, - "learning_rate": 1.2177180693995908e-05, - "loss": 0.9, + "learning_rate": 1.2196566876027125e-05, + "loss": 0.9539, "step": 15761 }, { - "epoch": 0.44727582292849033, + "epoch": 0.44665476494091644, "grad_norm": 0.0, - "learning_rate": 1.2176283653852024e-05, - "loss": 0.8324, + "learning_rate": 1.2195671492464691e-05, + "loss": 0.9349, "step": 15762 }, { - "epoch": 0.44730419977298524, + "epoch": 0.4466831023831789, "grad_norm": 0.0, - "learning_rate": 1.2175386595324974e-05, - "loss": 0.9533, + "learning_rate": 1.21947760904073e-05, + "loss": 0.9084, "step": 15763 }, { - "epoch": 0.44733257661748016, + "epoch": 0.44671143982544137, "grad_norm": 0.0, - "learning_rate": 1.2174489518422333e-05, - "loss": 0.7776, + "learning_rate": 1.2193880669862492e-05, + "loss": 0.8586, "step": 15764 }, { - "epoch": 0.447360953461975, + "epoch": 0.4467397772677038, "grad_norm": 0.0, - "learning_rate": 1.2173592423151683e-05, - "loss": 0.9439, + "learning_rate": 1.2192985230837817e-05, + "loss": 0.8756, "step": 15765 }, { - "epoch": 0.44738933030646993, + "epoch": 0.4467681147099663, "grad_norm": 0.0, - "learning_rate": 1.2172695309520602e-05, - "loss": 0.9342, + "learning_rate": 1.2192089773340811e-05, + "loss": 0.9665, "step": 15766 }, { - "epoch": 0.4474177071509648, + "epoch": 0.44679645215222874, "grad_norm": 0.0, - "learning_rate": 1.2171798177536665e-05, - "loss": 0.9559, + "learning_rate": 1.2191194297379019e-05, + "loss": 0.8773, "step": 15767 }, { - "epoch": 0.4474460839954597, + "epoch": 0.4468247895944912, "grad_norm": 0.0, - "learning_rate": 1.2170901027207447e-05, - "loss": 0.8911, + "learning_rate": 1.2190298802959982e-05, + "loss": 0.9641, "step": 15768 }, { - "epoch": 0.4474744608399546, + "epoch": 0.4468531270367537, "grad_norm": 0.0, - "learning_rate": 1.2170003858540536e-05, - "loss": 0.8017, + "learning_rate": 1.2189403290091246e-05, + "loss": 0.9874, "step": 15769 }, { - "epoch": 0.4475028376844495, + "epoch": 0.4468814644790161, "grad_norm": 0.0, - "learning_rate": 1.21691066715435e-05, - "loss": 0.9022, + "learning_rate": 1.2188507758780355e-05, + "loss": 0.8259, "step": 15770 }, { - "epoch": 0.4475312145289444, + "epoch": 0.4469098019212786, "grad_norm": 0.0, - "learning_rate": 1.2168209466223928e-05, - "loss": 0.9729, + "learning_rate": 1.2187612209034847e-05, + "loss": 0.9602, "step": 15771 }, { - "epoch": 0.44755959137343926, + "epoch": 0.44693813936354104, "grad_norm": 0.0, - "learning_rate": 1.2167312242589391e-05, - "loss": 0.9734, + "learning_rate": 1.218671664086227e-05, + "loss": 0.9431, "step": 15772 }, { - "epoch": 0.4475879682179342, + "epoch": 0.4469664768058035, "grad_norm": 0.0, - "learning_rate": 1.216641500064747e-05, - "loss": 0.7922, + "learning_rate": 1.218582105427017e-05, + "loss": 0.8703, "step": 15773 }, { - "epoch": 0.44761634506242903, + "epoch": 0.446994814248066, "grad_norm": 0.0, - "learning_rate": 1.2165517740405743e-05, - "loss": 0.7977, + "learning_rate": 1.2184925449266083e-05, + "loss": 0.9427, "step": 15774 }, { - "epoch": 0.44764472190692395, + "epoch": 0.4470231516903284, "grad_norm": 0.0, - "learning_rate": 1.216462046187179e-05, - "loss": 0.8614, + "learning_rate": 1.2184029825857559e-05, + "loss": 0.9741, "step": 15775 }, { - "epoch": 0.44767309875141886, + "epoch": 0.4470514891325909, "grad_norm": 0.0, - "learning_rate": 1.2163723165053192e-05, - "loss": 0.8313, + "learning_rate": 1.2183134184052143e-05, + "loss": 0.8993, "step": 15776 }, { - "epoch": 0.4477014755959137, + "epoch": 0.44707982657485335, "grad_norm": 0.0, - "learning_rate": 1.2162825849957525e-05, - "loss": 0.9365, + "learning_rate": 1.2182238523857378e-05, + "loss": 0.9053, "step": 15777 }, { - "epoch": 0.44772985244040864, + "epoch": 0.44710816401711584, "grad_norm": 0.0, - "learning_rate": 1.2161928516592373e-05, - "loss": 0.9496, + "learning_rate": 1.2181342845280803e-05, + "loss": 0.8384, "step": 15778 }, { - "epoch": 0.4477582292849035, + "epoch": 0.4471365014593783, "grad_norm": 0.0, - "learning_rate": 1.2161031164965315e-05, - "loss": 0.9861, + "learning_rate": 1.2180447148329972e-05, + "loss": 0.9262, "step": 15779 }, { - "epoch": 0.4477866061293984, + "epoch": 0.4471648389016407, "grad_norm": 0.0, - "learning_rate": 1.2160133795083926e-05, - "loss": 0.8648, + "learning_rate": 1.217955143301242e-05, + "loss": 0.965, "step": 15780 }, { - "epoch": 0.4478149829738933, + "epoch": 0.4471931763439032, "grad_norm": 0.0, - "learning_rate": 1.2159236406955793e-05, - "loss": 0.9358, + "learning_rate": 1.2178655699335698e-05, + "loss": 1.0247, "step": 15781 }, { - "epoch": 0.4478433598183882, + "epoch": 0.44722151378616565, "grad_norm": 0.0, - "learning_rate": 1.2158339000588493e-05, - "loss": 1.0009, + "learning_rate": 1.2177759947307352e-05, + "loss": 0.9599, "step": 15782 }, { - "epoch": 0.4478717366628831, + "epoch": 0.44724985122842814, "grad_norm": 0.0, - "learning_rate": 1.2157441575989604e-05, - "loss": 0.918, + "learning_rate": 1.2176864176934925e-05, + "loss": 0.9683, "step": 15783 }, { - "epoch": 0.44790011350737796, + "epoch": 0.4472781886706906, "grad_norm": 0.0, - "learning_rate": 1.2156544133166708e-05, - "loss": 0.9863, + "learning_rate": 1.2175968388225963e-05, + "loss": 0.9408, "step": 15784 }, { - "epoch": 0.4479284903518729, + "epoch": 0.447306526112953, "grad_norm": 0.0, - "learning_rate": 1.215564667212739e-05, - "loss": 0.9085, + "learning_rate": 1.217507258118801e-05, + "loss": 0.8267, "step": 15785 }, { - "epoch": 0.44795686719636774, + "epoch": 0.4473348635552155, "grad_norm": 0.0, - "learning_rate": 1.2154749192879227e-05, - "loss": 0.8034, + "learning_rate": 1.2174176755828616e-05, + "loss": 0.9904, "step": 15786 }, { - "epoch": 0.44798524404086265, + "epoch": 0.44736320099747795, "grad_norm": 0.0, - "learning_rate": 1.21538516954298e-05, - "loss": 0.8466, + "learning_rate": 1.2173280912155318e-05, + "loss": 0.8653, "step": 15787 }, { - "epoch": 0.44801362088535757, + "epoch": 0.44739153843974044, "grad_norm": 0.0, - "learning_rate": 1.2152954179786691e-05, - "loss": 0.9278, + "learning_rate": 1.217238505017567e-05, + "loss": 0.8335, "step": 15788 }, { - "epoch": 0.4480419977298524, + "epoch": 0.4474198758820029, "grad_norm": 0.0, - "learning_rate": 1.2152056645957481e-05, - "loss": 1.0146, + "learning_rate": 1.2171489169897217e-05, + "loss": 1.0144, "step": 15789 }, { - "epoch": 0.44807037457434734, + "epoch": 0.4474482133242654, "grad_norm": 0.0, - "learning_rate": 1.2151159093949751e-05, - "loss": 0.8949, + "learning_rate": 1.2170593271327507e-05, + "loss": 0.9024, "step": 15790 }, { - "epoch": 0.4480987514188422, + "epoch": 0.4474765507665278, "grad_norm": 0.0, - "learning_rate": 1.2150261523771083e-05, - "loss": 0.9381, + "learning_rate": 1.2169697354474081e-05, + "loss": 0.8569, "step": 15791 }, { - "epoch": 0.4481271282633371, + "epoch": 0.44750488820879025, "grad_norm": 0.0, - "learning_rate": 1.2149363935429061e-05, - "loss": 0.8864, + "learning_rate": 1.216880141934449e-05, + "loss": 0.923, "step": 15792 }, { - "epoch": 0.44815550510783203, + "epoch": 0.44753322565105275, "grad_norm": 0.0, - "learning_rate": 1.2148466328931268e-05, - "loss": 0.9485, + "learning_rate": 1.2167905465946276e-05, + "loss": 0.9492, "step": 15793 }, { - "epoch": 0.4481838819523269, + "epoch": 0.4475615630933152, "grad_norm": 0.0, - "learning_rate": 1.2147568704285278e-05, - "loss": 0.8966, + "learning_rate": 1.2167009494286991e-05, + "loss": 0.9045, "step": 15794 }, { - "epoch": 0.4482122587968218, + "epoch": 0.4475899005355777, "grad_norm": 0.0, - "learning_rate": 1.2146671061498682e-05, - "loss": 0.8966, + "learning_rate": 1.2166113504374182e-05, + "loss": 0.7371, "step": 15795 }, { - "epoch": 0.44824063564131666, + "epoch": 0.4476182379778401, "grad_norm": 0.0, - "learning_rate": 1.214577340057906e-05, - "loss": 1.043, + "learning_rate": 1.2165217496215392e-05, + "loss": 0.8858, "step": 15796 }, { - "epoch": 0.4482690124858116, + "epoch": 0.44764657542010255, "grad_norm": 0.0, - "learning_rate": 1.2144875721533989e-05, - "loss": 0.9531, + "learning_rate": 1.2164321469818172e-05, + "loss": 0.8747, "step": 15797 }, { - "epoch": 0.4482973893303065, + "epoch": 0.44767491286236505, "grad_norm": 0.0, - "learning_rate": 1.2143978024371056e-05, - "loss": 0.9146, + "learning_rate": 1.216342542519007e-05, + "loss": 0.8939, "step": 15798 }, { - "epoch": 0.44832576617480135, + "epoch": 0.4477032503046275, "grad_norm": 0.0, - "learning_rate": 1.214308030909785e-05, - "loss": 0.9922, + "learning_rate": 1.2162529362338633e-05, + "loss": 0.8537, "step": 15799 }, { - "epoch": 0.44835414301929627, + "epoch": 0.44773158774689, "grad_norm": 0.0, - "learning_rate": 1.2142182575721946e-05, - "loss": 0.8016, + "learning_rate": 1.2161633281271403e-05, + "loss": 1.0275, "step": 15800 }, { - "epoch": 0.44838251986379113, + "epoch": 0.4477599251891524, "grad_norm": 0.0, - "learning_rate": 1.2141284824250927e-05, - "loss": 0.7686, + "learning_rate": 1.2160737181995937e-05, + "loss": 0.8924, "step": 15801 }, { - "epoch": 0.44841089670828604, + "epoch": 0.4477882626314149, "grad_norm": 0.0, - "learning_rate": 1.2140387054692381e-05, - "loss": 0.9145, + "learning_rate": 1.2159841064519777e-05, + "loss": 0.8258, "step": 15802 }, { - "epoch": 0.4484392735527809, + "epoch": 0.44781660007367735, "grad_norm": 0.0, - "learning_rate": 1.2139489267053892e-05, - "loss": 1.0256, + "learning_rate": 1.2158944928850479e-05, + "loss": 0.93, "step": 15803 }, { - "epoch": 0.4484676503972758, + "epoch": 0.4478449375159398, "grad_norm": 0.0, - "learning_rate": 1.2138591461343037e-05, - "loss": 0.8936, + "learning_rate": 1.215804877499558e-05, + "loss": 0.8407, "step": 15804 }, { - "epoch": 0.44849602724177073, + "epoch": 0.4478732749582023, "grad_norm": 0.0, - "learning_rate": 1.2137693637567405e-05, - "loss": 0.9188, + "learning_rate": 1.2157152602962638e-05, + "loss": 0.9913, "step": 15805 }, { - "epoch": 0.4485244040862656, + "epoch": 0.4479016124004647, "grad_norm": 0.0, - "learning_rate": 1.2136795795734577e-05, - "loss": 0.9437, + "learning_rate": 1.21562564127592e-05, + "loss": 0.8895, "step": 15806 }, { - "epoch": 0.4485527809307605, + "epoch": 0.4479299498427272, "grad_norm": 0.0, - "learning_rate": 1.2135897935852143e-05, - "loss": 0.8252, + "learning_rate": 1.215536020439281e-05, + "loss": 0.8469, "step": 15807 }, { - "epoch": 0.44858115777525537, + "epoch": 0.44795828728498965, "grad_norm": 0.0, - "learning_rate": 1.213500005792768e-05, - "loss": 0.9346, + "learning_rate": 1.2154463977871022e-05, + "loss": 0.7427, "step": 15808 }, { - "epoch": 0.4486095346197503, + "epoch": 0.4479866247272521, "grad_norm": 0.0, - "learning_rate": 1.2134102161968775e-05, - "loss": 0.9817, + "learning_rate": 1.2153567733201383e-05, + "loss": 0.8919, "step": 15809 }, { - "epoch": 0.4486379114642452, + "epoch": 0.4480149621695146, "grad_norm": 0.0, - "learning_rate": 1.2133204247983016e-05, - "loss": 0.9791, + "learning_rate": 1.2152671470391443e-05, + "loss": 0.8806, "step": 15810 }, { - "epoch": 0.44866628830874006, + "epoch": 0.448043299611777, "grad_norm": 0.0, - "learning_rate": 1.2132306315977983e-05, - "loss": 1.0544, + "learning_rate": 1.2151775189448755e-05, + "loss": 0.8617, "step": 15811 }, { - "epoch": 0.448694665153235, + "epoch": 0.4480716370540395, "grad_norm": 0.0, - "learning_rate": 1.2131408365961263e-05, - "loss": 0.9027, + "learning_rate": 1.2150878890380865e-05, + "loss": 0.919, "step": 15812 }, { - "epoch": 0.44872304199772983, + "epoch": 0.44809997449630196, "grad_norm": 0.0, - "learning_rate": 1.213051039794044e-05, - "loss": 0.985, + "learning_rate": 1.214998257319532e-05, + "loss": 0.9367, "step": 15813 }, { - "epoch": 0.44875141884222475, + "epoch": 0.44812831193856445, "grad_norm": 0.0, - "learning_rate": 1.21296124119231e-05, - "loss": 0.8929, + "learning_rate": 1.2149086237899675e-05, + "loss": 0.8776, "step": 15814 }, { - "epoch": 0.44877979568671966, + "epoch": 0.4481566493808269, "grad_norm": 0.0, - "learning_rate": 1.2128714407916827e-05, - "loss": 1.0296, + "learning_rate": 1.214818988450148e-05, + "loss": 0.9206, "step": 15815 }, { - "epoch": 0.4488081725312145, + "epoch": 0.4481849868230893, "grad_norm": 0.0, - "learning_rate": 1.212781638592921e-05, - "loss": 0.8221, + "learning_rate": 1.2147293513008283e-05, + "loss": 0.7861, "step": 15816 }, { - "epoch": 0.44883654937570944, + "epoch": 0.4482133242653518, "grad_norm": 0.0, - "learning_rate": 1.2126918345967832e-05, - "loss": 1.0135, + "learning_rate": 1.2146397123427635e-05, + "loss": 0.8482, "step": 15817 }, { - "epoch": 0.4488649262202043, + "epoch": 0.44824166170761426, "grad_norm": 0.0, - "learning_rate": 1.212602028804028e-05, - "loss": 0.9286, + "learning_rate": 1.2145500715767087e-05, + "loss": 0.9211, "step": 15818 }, { - "epoch": 0.4488933030646992, + "epoch": 0.44826999914987675, "grad_norm": 0.0, - "learning_rate": 1.2125122212154137e-05, - "loss": 0.9131, + "learning_rate": 1.2144604290034193e-05, + "loss": 0.879, "step": 15819 }, { - "epoch": 0.44892167990919407, + "epoch": 0.4482983365921392, "grad_norm": 0.0, - "learning_rate": 1.2124224118316995e-05, - "loss": 0.8609, + "learning_rate": 1.2143707846236495e-05, + "loss": 0.9783, "step": 15820 }, { - "epoch": 0.448950056753689, + "epoch": 0.44832667403440163, "grad_norm": 0.0, - "learning_rate": 1.2123326006536429e-05, - "loss": 0.9699, + "learning_rate": 1.214281138438155e-05, + "loss": 0.9152, "step": 15821 }, { - "epoch": 0.4489784335981839, + "epoch": 0.4483550114766641, "grad_norm": 0.0, - "learning_rate": 1.2122427876820038e-05, - "loss": 0.9197, + "learning_rate": 1.2141914904476912e-05, + "loss": 0.9227, "step": 15822 }, { - "epoch": 0.44900681044267876, + "epoch": 0.44838334891892656, "grad_norm": 0.0, - "learning_rate": 1.2121529729175402e-05, - "loss": 0.9674, + "learning_rate": 1.2141018406530131e-05, + "loss": 0.8308, "step": 15823 }, { - "epoch": 0.4490351872871737, + "epoch": 0.44841168636118905, "grad_norm": 0.0, - "learning_rate": 1.2120631563610108e-05, - "loss": 0.84, + "learning_rate": 1.2140121890548755e-05, + "loss": 0.9151, "step": 15824 }, { - "epoch": 0.44906356413166854, + "epoch": 0.4484400238034515, "grad_norm": 0.0, - "learning_rate": 1.2119733380131746e-05, - "loss": 0.8262, + "learning_rate": 1.2139225356540336e-05, + "loss": 1.0087, "step": 15825 }, { - "epoch": 0.44909194097616345, + "epoch": 0.448468361245714, "grad_norm": 0.0, - "learning_rate": 1.2118835178747902e-05, - "loss": 0.8926, + "learning_rate": 1.2138328804512429e-05, + "loss": 1.0189, "step": 15826 }, { - "epoch": 0.44912031782065837, + "epoch": 0.4484966986879764, "grad_norm": 0.0, - "learning_rate": 1.211793695946616e-05, - "loss": 0.903, + "learning_rate": 1.2137432234472583e-05, + "loss": 0.9682, "step": 15827 }, { - "epoch": 0.4491486946651532, + "epoch": 0.44852503613023886, "grad_norm": 0.0, - "learning_rate": 1.211703872229411e-05, - "loss": 0.8275, + "learning_rate": 1.2136535646428356e-05, + "loss": 0.9673, "step": 15828 }, { - "epoch": 0.44917707150964814, + "epoch": 0.44855337357250136, "grad_norm": 0.0, - "learning_rate": 1.2116140467239337e-05, - "loss": 0.8452, + "learning_rate": 1.2135639040387291e-05, + "loss": 0.9809, "step": 15829 }, { - "epoch": 0.449205448354143, + "epoch": 0.4485817110147638, "grad_norm": 0.0, - "learning_rate": 1.2115242194309431e-05, - "loss": 1.0328, + "learning_rate": 1.2134742416356945e-05, + "loss": 0.7605, "step": 15830 }, { - "epoch": 0.4492338251986379, + "epoch": 0.4486100484570263, "grad_norm": 0.0, - "learning_rate": 1.2114343903511979e-05, - "loss": 0.9888, + "learning_rate": 1.2133845774344875e-05, + "loss": 0.9366, "step": 15831 }, { - "epoch": 0.44926220204313283, + "epoch": 0.4486383858992887, "grad_norm": 0.0, - "learning_rate": 1.2113445594854572e-05, - "loss": 0.8544, + "learning_rate": 1.2132949114358627e-05, + "loss": 0.8179, "step": 15832 }, { - "epoch": 0.4492905788876277, + "epoch": 0.44866672334155117, "grad_norm": 0.0, - "learning_rate": 1.2112547268344793e-05, - "loss": 0.8997, + "learning_rate": 1.2132052436405756e-05, + "loss": 0.9332, "step": 15833 }, { - "epoch": 0.4493189557321226, + "epoch": 0.44869506078381366, "grad_norm": 0.0, - "learning_rate": 1.211164892399023e-05, - "loss": 1.0301, + "learning_rate": 1.2131155740493816e-05, + "loss": 0.8812, "step": 15834 }, { - "epoch": 0.44934733257661746, + "epoch": 0.4487233982260761, "grad_norm": 0.0, - "learning_rate": 1.2110750561798476e-05, - "loss": 0.9178, + "learning_rate": 1.2130259026630363e-05, + "loss": 0.7816, "step": 15835 }, { - "epoch": 0.4493757094211124, + "epoch": 0.4487517356683386, "grad_norm": 0.0, - "learning_rate": 1.2109852181777117e-05, - "loss": 0.9711, + "learning_rate": 1.2129362294822943e-05, + "loss": 0.8243, "step": 15836 }, { - "epoch": 0.44940408626560724, + "epoch": 0.44878007311060103, "grad_norm": 0.0, - "learning_rate": 1.2108953783933741e-05, - "loss": 0.9591, + "learning_rate": 1.2128465545079117e-05, + "loss": 0.9156, "step": 15837 }, { - "epoch": 0.44943246311010215, + "epoch": 0.4488084105528635, "grad_norm": 0.0, - "learning_rate": 1.2108055368275938e-05, - "loss": 0.961, + "learning_rate": 1.2127568777406434e-05, + "loss": 0.7908, "step": 15838 }, { - "epoch": 0.44946083995459707, + "epoch": 0.44883674799512596, "grad_norm": 0.0, - "learning_rate": 1.2107156934811296e-05, - "loss": 0.8568, + "learning_rate": 1.212667199181245e-05, + "loss": 0.8226, "step": 15839 }, { - "epoch": 0.44948921679909193, + "epoch": 0.4488650854373884, "grad_norm": 0.0, - "learning_rate": 1.2106258483547406e-05, - "loss": 0.9225, + "learning_rate": 1.2125775188304714e-05, + "loss": 0.9115, "step": 15840 }, { - "epoch": 0.44951759364358684, + "epoch": 0.4488934228796509, "grad_norm": 0.0, - "learning_rate": 1.2105360014491854e-05, - "loss": 0.8829, + "learning_rate": 1.212487836689079e-05, + "loss": 0.7928, "step": 15841 }, { - "epoch": 0.4495459704880817, + "epoch": 0.44892176032191333, "grad_norm": 0.0, - "learning_rate": 1.2104461527652233e-05, - "loss": 0.9062, + "learning_rate": 1.2123981527578221e-05, + "loss": 0.9259, "step": 15842 }, { - "epoch": 0.4495743473325766, + "epoch": 0.4489500977641758, "grad_norm": 0.0, - "learning_rate": 1.2103563023036131e-05, - "loss": 0.9294, + "learning_rate": 1.212308467037457e-05, + "loss": 0.9559, "step": 15843 }, { - "epoch": 0.44960272417707153, + "epoch": 0.44897843520643826, "grad_norm": 0.0, - "learning_rate": 1.2102664500651136e-05, - "loss": 0.8955, + "learning_rate": 1.2122187795287388e-05, + "loss": 1.0161, "step": 15844 }, { - "epoch": 0.4496311010215664, + "epoch": 0.4490067726487007, "grad_norm": 0.0, - "learning_rate": 1.210176596050484e-05, - "loss": 0.8855, + "learning_rate": 1.2121290902324232e-05, + "loss": 1.0376, "step": 15845 }, { - "epoch": 0.4496594778660613, + "epoch": 0.4490351100909632, "grad_norm": 0.0, - "learning_rate": 1.2100867402604832e-05, - "loss": 0.9076, + "learning_rate": 1.2120393991492652e-05, + "loss": 0.9129, "step": 15846 }, { - "epoch": 0.44968785471055617, + "epoch": 0.44906344753322563, "grad_norm": 0.0, - "learning_rate": 1.2099968826958705e-05, - "loss": 0.7759, + "learning_rate": 1.2119497062800205e-05, + "loss": 0.8635, "step": 15847 }, { - "epoch": 0.4497162315550511, + "epoch": 0.44909178497548813, "grad_norm": 0.0, - "learning_rate": 1.2099070233574043e-05, - "loss": 1.0219, + "learning_rate": 1.2118600116254452e-05, + "loss": 0.9606, "step": 15848 }, { - "epoch": 0.449744608399546, + "epoch": 0.44912012241775057, "grad_norm": 0.0, - "learning_rate": 1.2098171622458446e-05, - "loss": 0.8243, + "learning_rate": 1.211770315186294e-05, + "loss": 0.9961, "step": 15849 }, { - "epoch": 0.44977298524404086, + "epoch": 0.44914845986001306, "grad_norm": 0.0, - "learning_rate": 1.2097272993619495e-05, - "loss": 0.9088, + "learning_rate": 1.2116806169633227e-05, + "loss": 1.0363, "step": 15850 }, { - "epoch": 0.44980136208853577, + "epoch": 0.4491767973022755, "grad_norm": 0.0, - "learning_rate": 1.2096374347064785e-05, - "loss": 0.8353, + "learning_rate": 1.2115909169572872e-05, + "loss": 0.8701, "step": 15851 }, { - "epoch": 0.44982973893303063, + "epoch": 0.44920513474453794, "grad_norm": 0.0, - "learning_rate": 1.2095475682801907e-05, - "loss": 0.8457, + "learning_rate": 1.211501215168943e-05, + "loss": 0.9956, "step": 15852 }, { - "epoch": 0.44985811577752555, + "epoch": 0.44923347218680043, "grad_norm": 0.0, - "learning_rate": 1.2094577000838452e-05, - "loss": 0.8608, + "learning_rate": 1.211411511599045e-05, + "loss": 0.9339, "step": 15853 }, { - "epoch": 0.4498864926220204, + "epoch": 0.44926180962906287, "grad_norm": 0.0, - "learning_rate": 1.2093678301182013e-05, - "loss": 0.9246, + "learning_rate": 1.2113218062483493e-05, + "loss": 0.9543, "step": 15854 }, { - "epoch": 0.4499148694665153, + "epoch": 0.44929014707132536, "grad_norm": 0.0, - "learning_rate": 1.2092779583840173e-05, - "loss": 0.8489, + "learning_rate": 1.211232099117612e-05, + "loss": 0.8681, "step": 15855 }, { - "epoch": 0.44994324631101024, + "epoch": 0.4493184845135878, "grad_norm": 0.0, - "learning_rate": 1.2091880848820536e-05, - "loss": 0.8429, + "learning_rate": 1.2111423902075883e-05, + "loss": 0.899, "step": 15856 }, { - "epoch": 0.4499716231555051, + "epoch": 0.44934682195585024, "grad_norm": 0.0, - "learning_rate": 1.2090982096130686e-05, - "loss": 0.8913, + "learning_rate": 1.2110526795190338e-05, + "loss": 0.9017, "step": 15857 }, { - "epoch": 0.45, + "epoch": 0.44937515939811273, "grad_norm": 0.0, - "learning_rate": 1.2090083325778214e-05, - "loss": 0.8661, + "learning_rate": 1.210962967052704e-05, + "loss": 0.9218, "step": 15858 }, { - "epoch": 0.45002837684449487, + "epoch": 0.44940349684037517, "grad_norm": 0.0, - "learning_rate": 1.2089184537770717e-05, - "loss": 0.8474, + "learning_rate": 1.2108732528093549e-05, + "loss": 0.8952, "step": 15859 }, { - "epoch": 0.4500567536889898, + "epoch": 0.44943183428263767, "grad_norm": 0.0, - "learning_rate": 1.208828573211578e-05, - "loss": 0.8646, + "learning_rate": 1.210783536789742e-05, + "loss": 0.8964, "step": 15860 }, { - "epoch": 0.4500851305334847, + "epoch": 0.4494601717249001, "grad_norm": 0.0, - "learning_rate": 1.2087386908821005e-05, - "loss": 0.9217, + "learning_rate": 1.2106938189946213e-05, + "loss": 0.9239, "step": 15861 }, { - "epoch": 0.45011350737797956, + "epoch": 0.4494885091671626, "grad_norm": 0.0, - "learning_rate": 1.2086488067893976e-05, - "loss": 1.0156, + "learning_rate": 1.2106040994247484e-05, + "loss": 0.9437, "step": 15862 }, { - "epoch": 0.4501418842224745, + "epoch": 0.44951684660942504, "grad_norm": 0.0, - "learning_rate": 1.2085589209342287e-05, - "loss": 0.8471, + "learning_rate": 1.2105143780808786e-05, + "loss": 0.8776, "step": 15863 }, { - "epoch": 0.45017026106696933, + "epoch": 0.4495451840516875, "grad_norm": 0.0, - "learning_rate": 1.2084690333173536e-05, - "loss": 0.8874, + "learning_rate": 1.2104246549637683e-05, + "loss": 0.8477, "step": 15864 }, { - "epoch": 0.45019863791146425, + "epoch": 0.44957352149394997, "grad_norm": 0.0, - "learning_rate": 1.2083791439395309e-05, - "loss": 0.919, + "learning_rate": 1.2103349300741727e-05, + "loss": 1.1548, "step": 15865 }, { - "epoch": 0.45022701475595917, + "epoch": 0.4496018589362124, "grad_norm": 0.0, - "learning_rate": 1.2082892528015205e-05, - "loss": 0.8554, + "learning_rate": 1.2102452034128482e-05, + "loss": 0.8816, "step": 15866 }, { - "epoch": 0.450255391600454, + "epoch": 0.4496301963784749, "grad_norm": 0.0, - "learning_rate": 1.208199359904081e-05, - "loss": 0.9309, + "learning_rate": 1.21015547498055e-05, + "loss": 0.9595, "step": 15867 }, { - "epoch": 0.45028376844494894, + "epoch": 0.44965853382073734, "grad_norm": 0.0, - "learning_rate": 1.2081094652479725e-05, - "loss": 0.8757, + "learning_rate": 1.2100657447780344e-05, + "loss": 0.7792, "step": 15868 }, { - "epoch": 0.4503121452894438, + "epoch": 0.4496868712629998, "grad_norm": 0.0, - "learning_rate": 1.2080195688339537e-05, - "loss": 0.975, + "learning_rate": 1.2099760128060571e-05, + "loss": 0.8218, "step": 15869 }, { - "epoch": 0.4503405221339387, + "epoch": 0.44971520870526227, "grad_norm": 0.0, - "learning_rate": 1.2079296706627845e-05, - "loss": 0.9376, + "learning_rate": 1.2098862790653738e-05, + "loss": 1.0775, "step": 15870 }, { - "epoch": 0.4503688989784336, + "epoch": 0.4497435461475247, "grad_norm": 0.0, - "learning_rate": 1.207839770735224e-05, - "loss": 0.8573, + "learning_rate": 1.2097965435567402e-05, + "loss": 0.8271, "step": 15871 }, { - "epoch": 0.4503972758229285, + "epoch": 0.4497718835897872, "grad_norm": 0.0, - "learning_rate": 1.2077498690520316e-05, - "loss": 0.9714, + "learning_rate": 1.2097068062809125e-05, + "loss": 0.9853, "step": 15872 }, { - "epoch": 0.4504256526674234, + "epoch": 0.44980022103204964, "grad_norm": 0.0, - "learning_rate": 1.2076599656139667e-05, - "loss": 0.9758, + "learning_rate": 1.2096170672386467e-05, + "loss": 0.9701, "step": 15873 }, { - "epoch": 0.45045402951191826, + "epoch": 0.44982855847431213, "grad_norm": 0.0, - "learning_rate": 1.2075700604217889e-05, - "loss": 0.8718, + "learning_rate": 1.2095273264306984e-05, + "loss": 0.944, "step": 15874 }, { - "epoch": 0.4504824063564132, + "epoch": 0.4498568959165746, "grad_norm": 0.0, - "learning_rate": 1.207480153476257e-05, - "loss": 0.7808, + "learning_rate": 1.2094375838578234e-05, + "loss": 1.0469, "step": 15875 }, { - "epoch": 0.45051078320090804, + "epoch": 0.449885233358837, "grad_norm": 0.0, - "learning_rate": 1.2073902447781311e-05, - "loss": 0.9376, + "learning_rate": 1.209347839520778e-05, + "loss": 0.9325, "step": 15876 }, { - "epoch": 0.45053916004540295, + "epoch": 0.4499135708010995, "grad_norm": 0.0, - "learning_rate": 1.2073003343281709e-05, - "loss": 0.7451, + "learning_rate": 1.2092580934203183e-05, + "loss": 0.8779, "step": 15877 }, { - "epoch": 0.45056753688989787, + "epoch": 0.44994190824336194, "grad_norm": 0.0, - "learning_rate": 1.207210422127135e-05, - "loss": 0.8898, + "learning_rate": 1.2091683455571997e-05, + "loss": 1.0103, "step": 15878 }, { - "epoch": 0.45059591373439273, + "epoch": 0.44997024568562444, "grad_norm": 0.0, - "learning_rate": 1.2071205081757835e-05, - "loss": 0.9722, + "learning_rate": 1.2090785959321783e-05, + "loss": 1.0134, "step": 15879 }, { - "epoch": 0.45062429057888764, + "epoch": 0.4499985831278869, "grad_norm": 0.0, - "learning_rate": 1.2070305924748758e-05, - "loss": 0.8156, + "learning_rate": 1.2089888445460105e-05, + "loss": 0.961, "step": 15880 }, { - "epoch": 0.4506526674233825, + "epoch": 0.4500269205701493, "grad_norm": 0.0, - "learning_rate": 1.2069406750251713e-05, - "loss": 0.9147, + "learning_rate": 1.208899091399452e-05, + "loss": 0.766, "step": 15881 }, { - "epoch": 0.4506810442678774, + "epoch": 0.4500552580124118, "grad_norm": 0.0, - "learning_rate": 1.2068507558274297e-05, - "loss": 0.9131, + "learning_rate": 1.2088093364932591e-05, + "loss": 0.9531, "step": 15882 }, { - "epoch": 0.4507094211123723, + "epoch": 0.45008359545467425, "grad_norm": 0.0, - "learning_rate": 1.2067608348824102e-05, - "loss": 0.9069, + "learning_rate": 1.2087195798281873e-05, + "loss": 0.9024, "step": 15883 }, { - "epoch": 0.4507377979568672, + "epoch": 0.45011193289693674, "grad_norm": 0.0, - "learning_rate": 1.2066709121908731e-05, - "loss": 0.9704, + "learning_rate": 1.208629821404993e-05, + "loss": 0.9169, "step": 15884 }, { - "epoch": 0.4507661748013621, + "epoch": 0.4501402703391992, "grad_norm": 0.0, - "learning_rate": 1.206580987753577e-05, - "loss": 0.9384, + "learning_rate": 1.2085400612244323e-05, + "loss": 0.9118, "step": 15885 }, { - "epoch": 0.45079455164585697, + "epoch": 0.45016860778146167, "grad_norm": 0.0, - "learning_rate": 1.2064910615712824e-05, - "loss": 0.9741, + "learning_rate": 1.2084502992872613e-05, + "loss": 0.7976, "step": 15886 }, { - "epoch": 0.4508229284903519, + "epoch": 0.4501969452237241, "grad_norm": 0.0, - "learning_rate": 1.2064011336447484e-05, - "loss": 0.9296, + "learning_rate": 1.2083605355942358e-05, + "loss": 0.8895, "step": 15887 }, { - "epoch": 0.45085130533484674, + "epoch": 0.45022528266598655, "grad_norm": 0.0, - "learning_rate": 1.2063112039747347e-05, - "loss": 1.0309, + "learning_rate": 1.2082707701461122e-05, + "loss": 0.9134, "step": 15888 }, { - "epoch": 0.45087968217934166, + "epoch": 0.45025362010824904, "grad_norm": 0.0, - "learning_rate": 1.2062212725620011e-05, - "loss": 0.8687, + "learning_rate": 1.208181002943647e-05, + "loss": 0.9623, "step": 15889 }, { - "epoch": 0.45090805902383657, + "epoch": 0.4502819575505115, "grad_norm": 0.0, - "learning_rate": 1.206131339407307e-05, - "loss": 0.9332, + "learning_rate": 1.2080912339875957e-05, + "loss": 1.0251, "step": 15890 }, { - "epoch": 0.45093643586833143, + "epoch": 0.450310294992774, "grad_norm": 0.0, - "learning_rate": 1.2060414045114122e-05, - "loss": 0.8056, + "learning_rate": 1.2080014632787142e-05, + "loss": 0.8155, "step": 15891 }, { - "epoch": 0.45096481271282635, + "epoch": 0.4503386324350364, "grad_norm": 0.0, - "learning_rate": 1.2059514678750764e-05, - "loss": 0.9719, + "learning_rate": 1.2079116908177592e-05, + "loss": 0.8267, "step": 15892 }, { - "epoch": 0.4509931895573212, + "epoch": 0.45036696987729885, "grad_norm": 0.0, - "learning_rate": 1.2058615294990594e-05, - "loss": 0.9939, + "learning_rate": 1.2078219166054873e-05, + "loss": 0.9398, "step": 15893 }, { - "epoch": 0.4510215664018161, + "epoch": 0.45039530731956134, "grad_norm": 0.0, - "learning_rate": 1.2057715893841208e-05, - "loss": 0.9445, + "learning_rate": 1.2077321406426542e-05, + "loss": 0.9197, "step": 15894 }, { - "epoch": 0.45104994324631104, + "epoch": 0.4504236447618238, "grad_norm": 0.0, - "learning_rate": 1.2056816475310202e-05, - "loss": 0.9797, + "learning_rate": 1.207642362930016e-05, + "loss": 0.7981, "step": 15895 }, { - "epoch": 0.4510783200908059, + "epoch": 0.4504519822040863, "grad_norm": 0.0, - "learning_rate": 1.2055917039405176e-05, - "loss": 0.8671, + "learning_rate": 1.2075525834683288e-05, + "loss": 1.0255, "step": 15896 }, { - "epoch": 0.4511066969353008, + "epoch": 0.4504803196463487, "grad_norm": 0.0, - "learning_rate": 1.2055017586133729e-05, - "loss": 0.9977, + "learning_rate": 1.2074628022583494e-05, + "loss": 0.9439, "step": 15897 }, { - "epoch": 0.45113507377979567, + "epoch": 0.4505086570886112, "grad_norm": 0.0, - "learning_rate": 1.205411811550345e-05, - "loss": 0.9643, + "learning_rate": 1.2073730193008336e-05, + "loss": 0.8805, "step": 15898 }, { - "epoch": 0.4511634506242906, + "epoch": 0.45053699453087365, "grad_norm": 0.0, - "learning_rate": 1.2053218627521946e-05, - "loss": 0.9036, + "learning_rate": 1.2072832345965381e-05, + "loss": 0.8577, "step": 15899 }, { - "epoch": 0.45119182746878544, + "epoch": 0.4505653319731361, "grad_norm": 0.0, - "learning_rate": 1.2052319122196812e-05, - "loss": 0.9579, + "learning_rate": 1.2071934481462186e-05, + "loss": 0.9228, "step": 15900 }, { - "epoch": 0.45122020431328036, + "epoch": 0.4505936694153986, "grad_norm": 0.0, - "learning_rate": 1.2051419599535648e-05, - "loss": 0.8593, + "learning_rate": 1.207103659950632e-05, + "loss": 0.8702, "step": 15901 }, { - "epoch": 0.4512485811577753, + "epoch": 0.450622006857661, "grad_norm": 0.0, - "learning_rate": 1.2050520059546048e-05, - "loss": 1.0412, + "learning_rate": 1.2070138700105346e-05, + "loss": 0.9279, "step": 15902 }, { - "epoch": 0.45127695800227013, + "epoch": 0.4506503442999235, "grad_norm": 0.0, - "learning_rate": 1.2049620502235613e-05, - "loss": 0.8816, + "learning_rate": 1.2069240783266822e-05, + "loss": 0.9618, "step": 15903 }, { - "epoch": 0.45130533484676505, + "epoch": 0.45067868174218595, "grad_norm": 0.0, - "learning_rate": 1.2048720927611944e-05, - "loss": 0.962, + "learning_rate": 1.2068342848998314e-05, + "loss": 0.9484, "step": 15904 }, { - "epoch": 0.4513337116912599, + "epoch": 0.4507070191844484, "grad_norm": 0.0, - "learning_rate": 1.2047821335682632e-05, - "loss": 0.8881, + "learning_rate": 1.2067444897307386e-05, + "loss": 0.9649, "step": 15905 }, { - "epoch": 0.4513620885357548, + "epoch": 0.4507353566267109, "grad_norm": 0.0, - "learning_rate": 1.2046921726455284e-05, - "loss": 0.8184, + "learning_rate": 1.2066546928201602e-05, + "loss": 0.7873, "step": 15906 }, { - "epoch": 0.45139046538024974, + "epoch": 0.4507636940689733, "grad_norm": 0.0, - "learning_rate": 1.2046022099937497e-05, - "loss": 0.8462, + "learning_rate": 1.2065648941688528e-05, + "loss": 0.9371, "step": 15907 }, { - "epoch": 0.4514188422247446, + "epoch": 0.4507920315112358, "grad_norm": 0.0, - "learning_rate": 1.204512245613687e-05, - "loss": 0.8817, + "learning_rate": 1.2064750937775722e-05, + "loss": 1.0005, "step": 15908 }, { - "epoch": 0.4514472190692395, + "epoch": 0.45082036895349825, "grad_norm": 0.0, - "learning_rate": 1.2044222795060999e-05, - "loss": 0.8341, + "learning_rate": 1.2063852916470755e-05, + "loss": 1.015, "step": 15909 }, { - "epoch": 0.4514755959137344, + "epoch": 0.45084870639576075, "grad_norm": 0.0, - "learning_rate": 1.204332311671749e-05, - "loss": 0.9329, + "learning_rate": 1.206295487778119e-05, + "loss": 0.8575, "step": 15910 }, { - "epoch": 0.4515039727582293, + "epoch": 0.4508770438380232, "grad_norm": 0.0, - "learning_rate": 1.2042423421113936e-05, - "loss": 0.9875, + "learning_rate": 1.2062056821714588e-05, + "loss": 0.9178, "step": 15911 }, { - "epoch": 0.4515323496027242, + "epoch": 0.4509053812802856, "grad_norm": 0.0, - "learning_rate": 1.2041523708257938e-05, - "loss": 0.9535, + "learning_rate": 1.2061158748278512e-05, + "loss": 0.9866, "step": 15912 }, { - "epoch": 0.45156072644721906, + "epoch": 0.4509337187225481, "grad_norm": 0.0, - "learning_rate": 1.2040623978157097e-05, - "loss": 0.8139, + "learning_rate": 1.2060260657480536e-05, + "loss": 0.9473, "step": 15913 }, { - "epoch": 0.451589103291714, + "epoch": 0.45096205616481055, "grad_norm": 0.0, - "learning_rate": 1.2039724230819019e-05, - "loss": 0.8412, + "learning_rate": 1.205936254932822e-05, + "loss": 0.943, "step": 15914 }, { - "epoch": 0.45161748013620884, + "epoch": 0.45099039360707305, "grad_norm": 0.0, - "learning_rate": 1.2038824466251295e-05, - "loss": 0.897, + "learning_rate": 1.2058464423829127e-05, + "loss": 0.8391, "step": 15915 }, { - "epoch": 0.45164585698070375, + "epoch": 0.4510187310493355, "grad_norm": 0.0, - "learning_rate": 1.203792468446153e-05, - "loss": 0.9217, + "learning_rate": 1.205756628099082e-05, + "loss": 0.8942, "step": 15916 }, { - "epoch": 0.4516742338251986, + "epoch": 0.4510470684915979, "grad_norm": 0.0, - "learning_rate": 1.2037024885457325e-05, - "loss": 0.956, + "learning_rate": 1.2056668120820871e-05, + "loss": 0.8257, "step": 15917 }, { - "epoch": 0.4517026106696935, + "epoch": 0.4510754059338604, "grad_norm": 0.0, - "learning_rate": 1.2036125069246278e-05, - "loss": 0.8284, + "learning_rate": 1.2055769943326844e-05, + "loss": 0.9604, "step": 15918 }, { - "epoch": 0.45173098751418844, + "epoch": 0.45110374337612286, "grad_norm": 0.0, - "learning_rate": 1.2035225235835988e-05, - "loss": 0.9545, + "learning_rate": 1.2054871748516301e-05, + "loss": 0.8708, "step": 15919 }, { - "epoch": 0.4517593643586833, + "epoch": 0.45113208081838535, "grad_norm": 0.0, - "learning_rate": 1.2034325385234062e-05, - "loss": 0.9724, + "learning_rate": 1.2053973536396812e-05, + "loss": 0.8766, "step": 15920 }, { - "epoch": 0.4517877412031782, + "epoch": 0.4511604182606478, "grad_norm": 0.0, - "learning_rate": 1.2033425517448097e-05, - "loss": 0.8305, + "learning_rate": 1.205307530697594e-05, + "loss": 0.9378, "step": 15921 }, { - "epoch": 0.4518161180476731, + "epoch": 0.4511887557029103, "grad_norm": 0.0, - "learning_rate": 1.2032525632485696e-05, - "loss": 0.9355, + "learning_rate": 1.2052177060261254e-05, + "loss": 0.9553, "step": 15922 }, { - "epoch": 0.451844494892168, + "epoch": 0.4512170931451727, "grad_norm": 0.0, - "learning_rate": 1.2031625730354459e-05, - "loss": 0.9618, + "learning_rate": 1.2051278796260318e-05, + "loss": 0.9778, "step": 15923 }, { - "epoch": 0.4518728717366629, + "epoch": 0.45124543058743516, "grad_norm": 0.0, - "learning_rate": 1.2030725811061989e-05, - "loss": 0.9107, + "learning_rate": 1.2050380514980697e-05, + "loss": 0.812, "step": 15924 }, { - "epoch": 0.45190124858115777, + "epoch": 0.45127376802969765, "grad_norm": 0.0, - "learning_rate": 1.2029825874615889e-05, - "loss": 0.7849, + "learning_rate": 1.204948221642996e-05, + "loss": 0.804, "step": 15925 }, { - "epoch": 0.4519296254256527, + "epoch": 0.4513021054719601, "grad_norm": 0.0, - "learning_rate": 1.2028925921023753e-05, - "loss": 0.861, + "learning_rate": 1.2048583900615674e-05, + "loss": 0.9165, "step": 15926 }, { - "epoch": 0.45195800227014754, + "epoch": 0.4513304429142226, "grad_norm": 0.0, - "learning_rate": 1.2028025950293191e-05, - "loss": 0.8532, + "learning_rate": 1.2047685567545406e-05, + "loss": 0.8703, "step": 15927 }, { - "epoch": 0.45198637911464246, + "epoch": 0.451358780356485, "grad_norm": 0.0, - "learning_rate": 1.2027125962431805e-05, - "loss": 0.9748, + "learning_rate": 1.2046787217226723e-05, + "loss": 0.9851, "step": 15928 }, { - "epoch": 0.45201475595913737, + "epoch": 0.45138711779874746, "grad_norm": 0.0, - "learning_rate": 1.202622595744719e-05, - "loss": 0.8682, + "learning_rate": 1.2045888849667187e-05, + "loss": 0.8454, "step": 15929 }, { - "epoch": 0.45204313280363223, + "epoch": 0.45141545524100996, "grad_norm": 0.0, - "learning_rate": 1.2025325935346957e-05, - "loss": 0.8712, + "learning_rate": 1.2044990464874373e-05, + "loss": 0.925, "step": 15930 }, { - "epoch": 0.45207150964812715, + "epoch": 0.4514437926832724, "grad_norm": 0.0, - "learning_rate": 1.2024425896138705e-05, - "loss": 0.9247, + "learning_rate": 1.2044092062855844e-05, + "loss": 0.8238, "step": 15931 }, { - "epoch": 0.452099886492622, + "epoch": 0.4514721301255349, "grad_norm": 0.0, - "learning_rate": 1.2023525839830038e-05, - "loss": 0.9291, + "learning_rate": 1.2043193643619168e-05, + "loss": 0.8787, "step": 15932 }, { - "epoch": 0.4521282633371169, + "epoch": 0.4515004675677973, "grad_norm": 0.0, - "learning_rate": 1.202262576642855e-05, - "loss": 0.7158, + "learning_rate": 1.2042295207171912e-05, + "loss": 0.8623, "step": 15933 }, { - "epoch": 0.4521566401816118, + "epoch": 0.4515288050100598, "grad_norm": 0.0, - "learning_rate": 1.2021725675941857e-05, - "loss": 0.9507, + "learning_rate": 1.2041396753521649e-05, + "loss": 0.7976, "step": 15934 }, { - "epoch": 0.4521850170261067, + "epoch": 0.45155714245232226, "grad_norm": 0.0, - "learning_rate": 1.2020825568377556e-05, - "loss": 0.9578, + "learning_rate": 1.204049828267594e-05, + "loss": 0.9113, "step": 15935 }, { - "epoch": 0.4522133938706016, + "epoch": 0.4515854798945847, "grad_norm": 0.0, - "learning_rate": 1.2019925443743248e-05, - "loss": 0.9444, + "learning_rate": 1.2039599794642358e-05, + "loss": 0.8099, "step": 15936 }, { - "epoch": 0.45224177071509647, + "epoch": 0.4516138173368472, "grad_norm": 0.0, - "learning_rate": 1.2019025302046541e-05, - "loss": 0.9034, + "learning_rate": 1.2038701289428468e-05, + "loss": 0.8628, "step": 15937 }, { - "epoch": 0.4522701475595914, + "epoch": 0.45164215477910963, "grad_norm": 0.0, - "learning_rate": 1.2018125143295038e-05, - "loss": 1.0527, + "learning_rate": 1.203780276704184e-05, + "loss": 0.7835, "step": 15938 }, { - "epoch": 0.45229852440408624, + "epoch": 0.4516704922213721, "grad_norm": 0.0, - "learning_rate": 1.2017224967496336e-05, - "loss": 0.888, + "learning_rate": 1.2036904227490043e-05, + "loss": 0.9836, "step": 15939 }, { - "epoch": 0.45232690124858116, + "epoch": 0.45169882966363456, "grad_norm": 0.0, - "learning_rate": 1.201632477465805e-05, - "loss": 0.8708, + "learning_rate": 1.2036005670780646e-05, + "loss": 0.925, "step": 15940 }, { - "epoch": 0.4523552780930761, + "epoch": 0.451727167105897, "grad_norm": 0.0, - "learning_rate": 1.2015424564787777e-05, - "loss": 0.9026, + "learning_rate": 1.2035107096921215e-05, + "loss": 0.8752, "step": 15941 }, { - "epoch": 0.45238365493757093, + "epoch": 0.4517555045481595, "grad_norm": 0.0, - "learning_rate": 1.2014524337893119e-05, - "loss": 0.8513, + "learning_rate": 1.2034208505919324e-05, + "loss": 0.9293, "step": 15942 }, { - "epoch": 0.45241203178206585, + "epoch": 0.45178384199042193, "grad_norm": 0.0, - "learning_rate": 1.2013624093981685e-05, - "loss": 0.812, + "learning_rate": 1.2033309897782538e-05, + "loss": 0.8958, "step": 15943 }, { - "epoch": 0.4524404086265607, + "epoch": 0.4518121794326844, "grad_norm": 0.0, - "learning_rate": 1.2012723833061077e-05, - "loss": 0.8379, + "learning_rate": 1.2032411272518428e-05, + "loss": 1.0143, "step": 15944 }, { - "epoch": 0.4524687854710556, + "epoch": 0.45184051687494686, "grad_norm": 0.0, - "learning_rate": 1.2011823555138902e-05, - "loss": 1.0161, + "learning_rate": 1.2031512630134562e-05, + "loss": 0.935, "step": 15945 }, { - "epoch": 0.45249716231555054, + "epoch": 0.45186885431720936, "grad_norm": 0.0, - "learning_rate": 1.201092326022276e-05, - "loss": 0.8635, + "learning_rate": 1.2030613970638512e-05, + "loss": 0.8683, "step": 15946 }, { - "epoch": 0.4525255391600454, + "epoch": 0.4518971917594718, "grad_norm": 0.0, - "learning_rate": 1.2010022948320263e-05, - "loss": 0.9093, + "learning_rate": 1.2029715294037847e-05, + "loss": 0.9948, "step": 15947 }, { - "epoch": 0.4525539160045403, + "epoch": 0.45192552920173423, "grad_norm": 0.0, - "learning_rate": 1.2009122619439011e-05, - "loss": 0.9259, + "learning_rate": 1.2028816600340137e-05, + "loss": 0.9092, "step": 15948 }, { - "epoch": 0.4525822928490352, + "epoch": 0.4519538666439967, "grad_norm": 0.0, - "learning_rate": 1.2008222273586607e-05, - "loss": 0.8402, + "learning_rate": 1.2027917889552951e-05, + "loss": 0.8984, "step": 15949 }, { - "epoch": 0.4526106696935301, + "epoch": 0.45198220408625916, "grad_norm": 0.0, - "learning_rate": 1.2007321910770662e-05, - "loss": 0.8945, + "learning_rate": 1.2027019161683857e-05, + "loss": 0.9216, "step": 15950 }, { - "epoch": 0.45263904653802495, + "epoch": 0.45201054152852166, "grad_norm": 0.0, - "learning_rate": 1.2006421530998778e-05, - "loss": 1.0536, + "learning_rate": 1.2026120416740428e-05, + "loss": 0.8632, "step": 15951 }, { - "epoch": 0.45266742338251986, + "epoch": 0.4520388789707841, "grad_norm": 0.0, - "learning_rate": 1.2005521134278559e-05, - "loss": 0.9499, + "learning_rate": 1.2025221654730238e-05, + "loss": 0.8881, "step": 15952 }, { - "epoch": 0.4526958002270148, + "epoch": 0.45206721641304654, "grad_norm": 0.0, - "learning_rate": 1.2004620720617613e-05, - "loss": 0.9075, + "learning_rate": 1.2024322875660853e-05, + "loss": 0.9427, "step": 15953 }, { - "epoch": 0.45272417707150964, + "epoch": 0.45209555385530903, "grad_norm": 0.0, - "learning_rate": 1.2003720290023549e-05, - "loss": 0.8393, + "learning_rate": 1.2023424079539841e-05, + "loss": 0.9106, "step": 15954 }, { - "epoch": 0.45275255391600455, + "epoch": 0.45212389129757147, "grad_norm": 0.0, - "learning_rate": 1.2002819842503967e-05, - "loss": 0.9651, + "learning_rate": 1.2022525266374778e-05, + "loss": 0.9388, "step": 15955 }, { - "epoch": 0.4527809307604994, + "epoch": 0.45215222873983396, "grad_norm": 0.0, - "learning_rate": 1.2001919378066476e-05, - "loss": 0.8716, + "learning_rate": 1.2021626436173238e-05, + "loss": 0.9399, "step": 15956 }, { - "epoch": 0.4528093076049943, + "epoch": 0.4521805661820964, "grad_norm": 0.0, - "learning_rate": 1.2001018896718684e-05, - "loss": 0.8092, + "learning_rate": 1.2020727588942783e-05, + "loss": 0.8932, "step": 15957 }, { - "epoch": 0.45283768444948924, + "epoch": 0.45220890362435884, "grad_norm": 0.0, - "learning_rate": 1.2000118398468193e-05, - "loss": 0.8941, + "learning_rate": 1.2019828724690988e-05, + "loss": 0.8767, "step": 15958 }, { - "epoch": 0.4528660612939841, + "epoch": 0.45223724106662133, "grad_norm": 0.0, - "learning_rate": 1.1999217883322612e-05, - "loss": 0.9238, + "learning_rate": 1.201892984342543e-05, + "loss": 0.8317, "step": 15959 }, { - "epoch": 0.452894438138479, + "epoch": 0.45226557850888377, "grad_norm": 0.0, - "learning_rate": 1.1998317351289547e-05, - "loss": 0.8379, + "learning_rate": 1.2018030945153674e-05, + "loss": 0.8916, "step": 15960 }, { - "epoch": 0.4529228149829739, + "epoch": 0.45229391595114626, "grad_norm": 0.0, - "learning_rate": 1.1997416802376606e-05, - "loss": 0.9184, + "learning_rate": 1.2017132029883297e-05, + "loss": 0.9695, "step": 15961 }, { - "epoch": 0.4529511918274688, + "epoch": 0.4523222533934087, "grad_norm": 0.0, - "learning_rate": 1.1996516236591398e-05, - "loss": 0.8425, + "learning_rate": 1.2016233097621864e-05, + "loss": 0.9675, "step": 15962 }, { - "epoch": 0.45297956867196365, + "epoch": 0.4523505908356712, "grad_norm": 0.0, - "learning_rate": 1.1995615653941526e-05, - "loss": 0.827, + "learning_rate": 1.2015334148376951e-05, + "loss": 0.8441, "step": 15963 }, { - "epoch": 0.45300794551645857, + "epoch": 0.45237892827793363, "grad_norm": 0.0, - "learning_rate": 1.1994715054434601e-05, - "loss": 0.9575, + "learning_rate": 1.201443518215613e-05, + "loss": 1.0005, "step": 15964 }, { - "epoch": 0.4530363223609535, + "epoch": 0.4524072657201961, "grad_norm": 0.0, - "learning_rate": 1.1993814438078226e-05, - "loss": 0.8219, + "learning_rate": 1.2013536198966977e-05, + "loss": 0.9208, "step": 15965 }, { - "epoch": 0.45306469920544834, + "epoch": 0.45243560316245857, "grad_norm": 0.0, - "learning_rate": 1.199291380488001e-05, - "loss": 0.9581, + "learning_rate": 1.2012637198817056e-05, + "loss": 1.0438, "step": 15966 }, { - "epoch": 0.45309307604994326, + "epoch": 0.452463940604721, "grad_norm": 0.0, - "learning_rate": 1.1992013154847559e-05, - "loss": 0.9182, + "learning_rate": 1.2011738181713947e-05, + "loss": 0.8757, "step": 15967 }, { - "epoch": 0.4531214528944381, + "epoch": 0.4524922780469835, "grad_norm": 0.0, - "learning_rate": 1.199111248798849e-05, - "loss": 0.9499, + "learning_rate": 1.2010839147665222e-05, + "loss": 1.0094, "step": 15968 }, { - "epoch": 0.45314982973893303, + "epoch": 0.45252061548924594, "grad_norm": 0.0, - "learning_rate": 1.19902118043104e-05, - "loss": 0.8375, + "learning_rate": 1.2009940096678451e-05, + "loss": 0.7423, "step": 15969 }, { - "epoch": 0.45317820658342794, + "epoch": 0.4525489529315084, "grad_norm": 0.0, - "learning_rate": 1.1989311103820902e-05, - "loss": 0.8916, + "learning_rate": 1.2009041028761207e-05, + "loss": 0.8654, "step": 15970 }, { - "epoch": 0.4532065834279228, + "epoch": 0.45257729037377087, "grad_norm": 0.0, - "learning_rate": 1.1988410386527605e-05, - "loss": 0.7809, + "learning_rate": 1.2008141943921063e-05, + "loss": 0.9039, "step": 15971 }, { - "epoch": 0.4532349602724177, + "epoch": 0.4526056278160333, "grad_norm": 0.0, - "learning_rate": 1.1987509652438117e-05, - "loss": 0.9986, + "learning_rate": 1.2007242842165599e-05, + "loss": 0.8933, "step": 15972 }, { - "epoch": 0.4532633371169126, + "epoch": 0.4526339652582958, "grad_norm": 0.0, - "learning_rate": 1.1986608901560041e-05, - "loss": 0.8785, + "learning_rate": 1.200634372350238e-05, + "loss": 0.8416, "step": 15973 }, { - "epoch": 0.4532917139614075, + "epoch": 0.45266230270055824, "grad_norm": 0.0, - "learning_rate": 1.1985708133900996e-05, - "loss": 1.0249, + "learning_rate": 1.2005444587938981e-05, + "loss": 0.942, "step": 15974 }, { - "epoch": 0.4533200908059024, + "epoch": 0.45269064014282073, "grad_norm": 0.0, - "learning_rate": 1.198480734946858e-05, - "loss": 0.9552, + "learning_rate": 1.2004545435482979e-05, + "loss": 0.9835, "step": 15975 }, { - "epoch": 0.45334846765039727, + "epoch": 0.45271897758508317, "grad_norm": 0.0, - "learning_rate": 1.198390654827041e-05, - "loss": 0.8918, + "learning_rate": 1.2003646266141949e-05, + "loss": 0.9683, "step": 15976 }, { - "epoch": 0.4533768444948922, + "epoch": 0.4527473150273456, "grad_norm": 0.0, - "learning_rate": 1.198300573031409e-05, - "loss": 0.9622, + "learning_rate": 1.2002747079923459e-05, + "loss": 0.8935, "step": 15977 }, { - "epoch": 0.45340522133938704, + "epoch": 0.4527756524696081, "grad_norm": 0.0, - "learning_rate": 1.1982104895607237e-05, - "loss": 0.9718, + "learning_rate": 1.200184787683509e-05, + "loss": 0.8456, "step": 15978 }, { - "epoch": 0.45343359818388196, + "epoch": 0.45280398991187054, "grad_norm": 0.0, - "learning_rate": 1.1981204044157451e-05, - "loss": 0.8998, + "learning_rate": 1.2000948656884408e-05, + "loss": 0.8465, "step": 15979 }, { - "epoch": 0.4534619750283768, + "epoch": 0.45283232735413304, "grad_norm": 0.0, - "learning_rate": 1.1980303175972343e-05, - "loss": 0.865, + "learning_rate": 1.2000049420078996e-05, + "loss": 0.9527, "step": 15980 }, { - "epoch": 0.45349035187287173, + "epoch": 0.4528606647963955, "grad_norm": 0.0, - "learning_rate": 1.1979402291059528e-05, - "loss": 0.8443, + "learning_rate": 1.1999150166426426e-05, + "loss": 0.8966, "step": 15981 }, { - "epoch": 0.45351872871736665, + "epoch": 0.4528890022386579, "grad_norm": 0.0, - "learning_rate": 1.1978501389426611e-05, - "loss": 0.7745, + "learning_rate": 1.1998250895934267e-05, + "loss": 0.8372, "step": 15982 }, { - "epoch": 0.4535471055618615, + "epoch": 0.4529173396809204, "grad_norm": 0.0, - "learning_rate": 1.1977600471081203e-05, - "loss": 0.9978, + "learning_rate": 1.1997351608610102e-05, + "loss": 0.8178, "step": 15983 }, { - "epoch": 0.4535754824063564, + "epoch": 0.45294567712318284, "grad_norm": 0.0, - "learning_rate": 1.1976699536030918e-05, - "loss": 0.9158, + "learning_rate": 1.1996452304461502e-05, + "loss": 0.8383, "step": 15984 }, { - "epoch": 0.4536038592508513, + "epoch": 0.45297401456544534, "grad_norm": 0.0, - "learning_rate": 1.1975798584283362e-05, - "loss": 0.8642, + "learning_rate": 1.1995552983496043e-05, + "loss": 1.0205, "step": 15985 }, { - "epoch": 0.4536322360953462, + "epoch": 0.4530023520077078, "grad_norm": 0.0, - "learning_rate": 1.1974897615846148e-05, - "loss": 1.0098, + "learning_rate": 1.1994653645721304e-05, + "loss": 0.8902, "step": 15986 }, { - "epoch": 0.4536606129398411, + "epoch": 0.45303068944997027, "grad_norm": 0.0, - "learning_rate": 1.1973996630726882e-05, - "loss": 1.0734, + "learning_rate": 1.199375429114485e-05, + "loss": 0.948, "step": 15987 }, { - "epoch": 0.45368898978433597, + "epoch": 0.4530590268922327, "grad_norm": 0.0, - "learning_rate": 1.1973095628933182e-05, - "loss": 0.9185, + "learning_rate": 1.1992854919774269e-05, + "loss": 0.8264, "step": 15988 }, { - "epoch": 0.4537173666288309, + "epoch": 0.45308736433449515, "grad_norm": 0.0, - "learning_rate": 1.1972194610472655e-05, - "loss": 0.9017, + "learning_rate": 1.1991955531617126e-05, + "loss": 0.8883, "step": 15989 }, { - "epoch": 0.45374574347332575, + "epoch": 0.45311570177675764, "grad_norm": 0.0, - "learning_rate": 1.1971293575352902e-05, - "loss": 0.8118, + "learning_rate": 1.1991056126681005e-05, + "loss": 1.008, "step": 15990 }, { - "epoch": 0.45377412031782066, + "epoch": 0.4531440392190201, "grad_norm": 0.0, - "learning_rate": 1.197039252358155e-05, - "loss": 0.8166, + "learning_rate": 1.1990156704973478e-05, + "loss": 0.8871, "step": 15991 }, { - "epoch": 0.4538024971623156, + "epoch": 0.45317237666128257, "grad_norm": 0.0, - "learning_rate": 1.1969491455166207e-05, - "loss": 0.8795, + "learning_rate": 1.1989257266502121e-05, + "loss": 0.8676, "step": 15992 }, { - "epoch": 0.45383087400681044, + "epoch": 0.453200714103545, "grad_norm": 0.0, - "learning_rate": 1.1968590370114477e-05, - "loss": 0.9559, + "learning_rate": 1.1988357811274514e-05, + "loss": 1.0244, "step": 15993 }, { - "epoch": 0.45385925085130535, + "epoch": 0.45322905154580745, "grad_norm": 0.0, - "learning_rate": 1.1967689268433978e-05, - "loss": 0.9036, + "learning_rate": 1.198745833929823e-05, + "loss": 0.8786, "step": 15994 }, { - "epoch": 0.4538876276958002, + "epoch": 0.45325738898806994, "grad_norm": 0.0, - "learning_rate": 1.1966788150132318e-05, - "loss": 0.8484, + "learning_rate": 1.1986558850580843e-05, + "loss": 0.799, "step": 15995 }, { - "epoch": 0.4539160045402951, + "epoch": 0.4532857264303324, "grad_norm": 0.0, - "learning_rate": 1.1965887015217107e-05, - "loss": 0.9054, + "learning_rate": 1.1985659345129936e-05, + "loss": 0.7992, "step": 15996 }, { - "epoch": 0.45394438138479, + "epoch": 0.4533140638725949, "grad_norm": 0.0, - "learning_rate": 1.1964985863695966e-05, - "loss": 0.91, + "learning_rate": 1.1984759822953083e-05, + "loss": 0.93, "step": 15997 }, { - "epoch": 0.4539727582292849, + "epoch": 0.4533424013148573, "grad_norm": 0.0, - "learning_rate": 1.1964084695576496e-05, - "loss": 0.877, + "learning_rate": 1.1983860284057862e-05, + "loss": 1.0229, "step": 15998 }, { - "epoch": 0.4540011350737798, + "epoch": 0.4533707387571198, "grad_norm": 0.0, - "learning_rate": 1.1963183510866317e-05, - "loss": 0.9429, + "learning_rate": 1.1982960728451847e-05, + "loss": 0.9218, "step": 15999 }, { - "epoch": 0.4540295119182747, + "epoch": 0.45339907619938224, "grad_norm": 0.0, - "learning_rate": 1.1962282309573037e-05, - "loss": 0.8823, + "learning_rate": 1.1982061156142618e-05, + "loss": 0.8706, "step": 16000 }, { - "epoch": 0.4540578887627696, + "epoch": 0.4534274136416447, "grad_norm": 0.0, - "learning_rate": 1.1961381091704271e-05, - "loss": 0.9221, + "learning_rate": 1.1981161567137754e-05, + "loss": 0.7808, "step": 16001 }, { - "epoch": 0.45408626560726445, + "epoch": 0.4534557510839072, "grad_norm": 0.0, - "learning_rate": 1.1960479857267632e-05, - "loss": 0.9001, + "learning_rate": 1.1980261961444826e-05, + "loss": 0.9306, "step": 16002 }, { - "epoch": 0.45411464245175936, + "epoch": 0.4534840885261696, "grad_norm": 0.0, - "learning_rate": 1.195957860627073e-05, - "loss": 0.9371, + "learning_rate": 1.1979362339071421e-05, + "loss": 0.8721, "step": 16003 }, { - "epoch": 0.4541430192962543, + "epoch": 0.4535124259684321, "grad_norm": 0.0, - "learning_rate": 1.1958677338721181e-05, - "loss": 0.9423, + "learning_rate": 1.1978462700025109e-05, + "loss": 0.991, "step": 16004 }, { - "epoch": 0.45417139614074914, + "epoch": 0.45354076341069455, "grad_norm": 0.0, - "learning_rate": 1.1957776054626593e-05, - "loss": 0.7964, + "learning_rate": 1.1977563044313471e-05, + "loss": 0.9902, "step": 16005 }, { - "epoch": 0.45419977298524405, + "epoch": 0.453569100852957, "grad_norm": 0.0, - "learning_rate": 1.1956874753994585e-05, - "loss": 0.8996, + "learning_rate": 1.1976663371944085e-05, + "loss": 0.8786, "step": 16006 }, { - "epoch": 0.4542281498297389, + "epoch": 0.4535974382952195, "grad_norm": 0.0, - "learning_rate": 1.1955973436832768e-05, - "loss": 0.8883, + "learning_rate": 1.1975763682924532e-05, + "loss": 0.8515, "step": 16007 }, { - "epoch": 0.45425652667423383, + "epoch": 0.4536257757374819, "grad_norm": 0.0, - "learning_rate": 1.1955072103148755e-05, - "loss": 0.9449, + "learning_rate": 1.1974863977262386e-05, + "loss": 0.8674, "step": 16008 }, { - "epoch": 0.45428490351872874, + "epoch": 0.4536541131797444, "grad_norm": 0.0, - "learning_rate": 1.1954170752950163e-05, - "loss": 0.8698, + "learning_rate": 1.1973964254965224e-05, + "loss": 0.9705, "step": 16009 }, { - "epoch": 0.4543132803632236, + "epoch": 0.45368245062200685, "grad_norm": 0.0, - "learning_rate": 1.1953269386244598e-05, - "loss": 0.975, + "learning_rate": 1.1973064516040634e-05, + "loss": 0.8296, "step": 16010 }, { - "epoch": 0.4543416572077185, + "epoch": 0.45371078806426934, "grad_norm": 0.0, - "learning_rate": 1.1952368003039681e-05, - "loss": 0.808, + "learning_rate": 1.1972164760496187e-05, + "loss": 0.8255, "step": 16011 }, { - "epoch": 0.4543700340522134, + "epoch": 0.4537391255065318, "grad_norm": 0.0, - "learning_rate": 1.1951466603343025e-05, - "loss": 0.8901, + "learning_rate": 1.197126498833946e-05, + "loss": 0.8721, "step": 16012 }, { - "epoch": 0.4543984108967083, + "epoch": 0.4537674629487942, "grad_norm": 0.0, - "learning_rate": 1.195056518716224e-05, - "loss": 0.8163, + "learning_rate": 1.1970365199578043e-05, + "loss": 0.959, "step": 16013 }, { - "epoch": 0.45442678774120315, + "epoch": 0.4537958003910567, "grad_norm": 0.0, - "learning_rate": 1.1949663754504944e-05, - "loss": 0.8837, + "learning_rate": 1.1969465394219503e-05, + "loss": 0.8485, "step": 16014 }, { - "epoch": 0.45445516458569807, + "epoch": 0.45382413783331915, "grad_norm": 0.0, - "learning_rate": 1.1948762305378751e-05, - "loss": 0.8688, + "learning_rate": 1.1968565572271426e-05, + "loss": 0.7058, "step": 16015 }, { - "epoch": 0.454483541430193, + "epoch": 0.45385247527558165, "grad_norm": 0.0, - "learning_rate": 1.1947860839791276e-05, - "loss": 0.8203, + "learning_rate": 1.196766573374139e-05, + "loss": 1.0334, "step": 16016 }, { - "epoch": 0.45451191827468784, + "epoch": 0.4538808127178441, "grad_norm": 0.0, - "learning_rate": 1.1946959357750132e-05, - "loss": 0.8373, + "learning_rate": 1.1966765878636974e-05, + "loss": 1.0446, "step": 16017 }, { - "epoch": 0.45454029511918276, + "epoch": 0.4539091501601065, "grad_norm": 0.0, - "learning_rate": 1.1946057859262936e-05, - "loss": 0.9549, + "learning_rate": 1.1965866006965762e-05, + "loss": 0.9327, "step": 16018 }, { - "epoch": 0.4545686719636776, + "epoch": 0.453937487602369, "grad_norm": 0.0, - "learning_rate": 1.1945156344337303e-05, - "loss": 0.8099, + "learning_rate": 1.196496611873533e-05, + "loss": 1.0987, "step": 16019 }, { - "epoch": 0.45459704880817253, + "epoch": 0.45396582504463145, "grad_norm": 0.0, - "learning_rate": 1.1944254812980841e-05, - "loss": 0.9709, + "learning_rate": 1.1964066213953256e-05, + "loss": 0.8869, "step": 16020 }, { - "epoch": 0.45462542565266745, + "epoch": 0.45399416248689395, "grad_norm": 0.0, - "learning_rate": 1.1943353265201177e-05, - "loss": 0.9062, + "learning_rate": 1.1963166292627125e-05, + "loss": 0.9589, "step": 16021 }, { - "epoch": 0.4546538024971623, + "epoch": 0.4540224999291564, "grad_norm": 0.0, - "learning_rate": 1.194245170100592e-05, - "loss": 0.9382, + "learning_rate": 1.1962266354764513e-05, + "loss": 0.9998, "step": 16022 }, { - "epoch": 0.4546821793416572, + "epoch": 0.4540508373714189, "grad_norm": 0.0, - "learning_rate": 1.1941550120402683e-05, - "loss": 0.8557, + "learning_rate": 1.1961366400373005e-05, + "loss": 0.9937, "step": 16023 }, { - "epoch": 0.4547105561861521, + "epoch": 0.4540791748136813, "grad_norm": 0.0, - "learning_rate": 1.1940648523399085e-05, - "loss": 0.9271, + "learning_rate": 1.1960466429460179e-05, + "loss": 0.9682, "step": 16024 }, { - "epoch": 0.454738933030647, + "epoch": 0.45410751225594376, "grad_norm": 0.0, - "learning_rate": 1.1939746910002745e-05, - "loss": 0.9989, + "learning_rate": 1.1959566442033615e-05, + "loss": 0.9127, "step": 16025 }, { - "epoch": 0.4547673098751419, + "epoch": 0.45413584969820625, "grad_norm": 0.0, - "learning_rate": 1.1938845280221275e-05, - "loss": 0.8996, + "learning_rate": 1.1958666438100897e-05, + "loss": 0.8721, "step": 16026 }, { - "epoch": 0.45479568671963677, + "epoch": 0.4541641871404687, "grad_norm": 0.0, - "learning_rate": 1.193794363406229e-05, - "loss": 0.9485, + "learning_rate": 1.1957766417669605e-05, + "loss": 0.8615, "step": 16027 }, { - "epoch": 0.4548240635641317, + "epoch": 0.4541925245827312, "grad_norm": 0.0, - "learning_rate": 1.1937041971533407e-05, - "loss": 0.8887, + "learning_rate": 1.1956866380747316e-05, + "loss": 0.8828, "step": 16028 }, { - "epoch": 0.45485244040862655, + "epoch": 0.4542208620249936, "grad_norm": 0.0, - "learning_rate": 1.1936140292642247e-05, - "loss": 0.9808, + "learning_rate": 1.1955966327341614e-05, + "loss": 0.8864, "step": 16029 }, { - "epoch": 0.45488081725312146, + "epoch": 0.45424919946725606, "grad_norm": 0.0, - "learning_rate": 1.1935238597396421e-05, - "loss": 0.7949, + "learning_rate": 1.1955066257460086e-05, + "loss": 0.7939, "step": 16030 }, { - "epoch": 0.4549091940976163, + "epoch": 0.45427753690951855, "grad_norm": 0.0, - "learning_rate": 1.1934336885803548e-05, - "loss": 0.924, + "learning_rate": 1.1954166171110308e-05, + "loss": 0.9968, "step": 16031 }, { - "epoch": 0.45493757094211124, + "epoch": 0.454305874351781, "grad_norm": 0.0, - "learning_rate": 1.1933435157871245e-05, - "loss": 0.8395, + "learning_rate": 1.1953266068299863e-05, + "loss": 0.851, "step": 16032 }, { - "epoch": 0.45496594778660615, + "epoch": 0.4543342117940435, "grad_norm": 0.0, - "learning_rate": 1.1932533413607128e-05, - "loss": 0.9522, + "learning_rate": 1.195236594903633e-05, + "loss": 0.8604, "step": 16033 }, { - "epoch": 0.454994324631101, + "epoch": 0.4543625492363059, "grad_norm": 0.0, - "learning_rate": 1.1931631653018812e-05, - "loss": 0.8533, + "learning_rate": 1.1951465813327294e-05, + "loss": 0.8206, "step": 16034 }, { - "epoch": 0.4550227014755959, + "epoch": 0.4543908866785684, "grad_norm": 0.0, - "learning_rate": 1.1930729876113918e-05, - "loss": 0.8814, + "learning_rate": 1.1950565661180337e-05, + "loss": 0.9493, "step": 16035 }, { - "epoch": 0.4550510783200908, + "epoch": 0.45441922412083086, "grad_norm": 0.0, - "learning_rate": 1.1929828082900062e-05, - "loss": 0.8327, + "learning_rate": 1.1949665492603045e-05, + "loss": 0.9279, "step": 16036 }, { - "epoch": 0.4550794551645857, + "epoch": 0.4544475615630933, "grad_norm": 0.0, - "learning_rate": 1.192892627338486e-05, - "loss": 0.8452, + "learning_rate": 1.194876530760299e-05, + "loss": 0.894, "step": 16037 }, { - "epoch": 0.4551078320090806, + "epoch": 0.4544758990053558, "grad_norm": 0.0, - "learning_rate": 1.1928024447575934e-05, - "loss": 0.85, + "learning_rate": 1.1947865106187766e-05, + "loss": 0.8639, "step": 16038 }, { - "epoch": 0.4551362088535755, + "epoch": 0.4545042364476182, "grad_norm": 0.0, - "learning_rate": 1.1927122605480899e-05, - "loss": 1.0073, + "learning_rate": 1.1946964888364949e-05, + "loss": 0.9453, "step": 16039 }, { - "epoch": 0.4551645856980704, + "epoch": 0.4545325738898807, "grad_norm": 0.0, - "learning_rate": 1.1926220747107372e-05, - "loss": 0.9376, + "learning_rate": 1.1946064654142124e-05, + "loss": 0.8707, "step": 16040 }, { - "epoch": 0.45519296254256525, + "epoch": 0.45456091133214316, "grad_norm": 0.0, - "learning_rate": 1.192531887246297e-05, - "loss": 0.8592, + "learning_rate": 1.1945164403526874e-05, + "loss": 0.9455, "step": 16041 }, { - "epoch": 0.45522133938706016, + "epoch": 0.4545892487744056, "grad_norm": 0.0, - "learning_rate": 1.1924416981555314e-05, - "loss": 0.9031, + "learning_rate": 1.194426413652678e-05, + "loss": 0.8567, "step": 16042 }, { - "epoch": 0.455249716231555, + "epoch": 0.4546175862166681, "grad_norm": 0.0, - "learning_rate": 1.1923515074392022e-05, - "loss": 0.8862, + "learning_rate": 1.194336385314943e-05, + "loss": 0.8056, "step": 16043 }, { - "epoch": 0.45527809307604994, + "epoch": 0.45464592365893053, "grad_norm": 0.0, - "learning_rate": 1.1922613150980708e-05, - "loss": 0.925, + "learning_rate": 1.1942463553402407e-05, + "loss": 0.873, "step": 16044 }, { - "epoch": 0.45530646992054485, + "epoch": 0.454674261101193, "grad_norm": 0.0, - "learning_rate": 1.1921711211329e-05, - "loss": 1.0089, + "learning_rate": 1.1941563237293285e-05, + "loss": 0.996, "step": 16045 }, { - "epoch": 0.4553348467650397, + "epoch": 0.45470259854345546, "grad_norm": 0.0, - "learning_rate": 1.1920809255444507e-05, - "loss": 0.9448, + "learning_rate": 1.1940662904829661e-05, + "loss": 0.8824, "step": 16046 }, { - "epoch": 0.45536322360953463, + "epoch": 0.45473093598571795, "grad_norm": 0.0, - "learning_rate": 1.1919907283334854e-05, - "loss": 1.0126, + "learning_rate": 1.193976255601911e-05, + "loss": 0.8484, "step": 16047 }, { - "epoch": 0.4553916004540295, + "epoch": 0.4547592734279804, "grad_norm": 0.0, - "learning_rate": 1.1919005295007655e-05, - "loss": 0.8771, + "learning_rate": 1.1938862190869218e-05, + "loss": 0.8727, "step": 16048 }, { - "epoch": 0.4554199772985244, + "epoch": 0.45478761087024283, "grad_norm": 0.0, - "learning_rate": 1.1918103290470535e-05, - "loss": 0.8765, + "learning_rate": 1.1937961809387569e-05, + "loss": 0.9152, "step": 16049 }, { - "epoch": 0.4554483541430193, + "epoch": 0.4548159483125053, "grad_norm": 0.0, - "learning_rate": 1.1917201269731109e-05, - "loss": 0.9325, + "learning_rate": 1.1937061411581752e-05, + "loss": 0.9261, "step": 16050 }, { - "epoch": 0.4554767309875142, + "epoch": 0.45484428575476776, "grad_norm": 0.0, - "learning_rate": 1.1916299232796997e-05, - "loss": 0.9351, + "learning_rate": 1.1936160997459344e-05, + "loss": 0.875, "step": 16051 }, { - "epoch": 0.4555051078320091, + "epoch": 0.45487262319703026, "grad_norm": 0.0, - "learning_rate": 1.191539717967582e-05, - "loss": 0.8805, + "learning_rate": 1.1935260567027936e-05, + "loss": 0.9808, "step": 16052 }, { - "epoch": 0.45553348467650395, + "epoch": 0.4549009606392927, "grad_norm": 0.0, - "learning_rate": 1.1914495110375199e-05, - "loss": 0.9759, + "learning_rate": 1.1934360120295105e-05, + "loss": 0.9431, "step": 16053 }, { - "epoch": 0.45556186152099887, + "epoch": 0.45492929808155513, "grad_norm": 0.0, - "learning_rate": 1.1913593024902748e-05, - "loss": 0.9126, + "learning_rate": 1.1933459657268444e-05, + "loss": 0.8605, "step": 16054 }, { - "epoch": 0.4555902383654938, + "epoch": 0.4549576355238176, "grad_norm": 0.0, - "learning_rate": 1.1912690923266095e-05, - "loss": 0.8929, + "learning_rate": 1.1932559177955533e-05, + "loss": 0.8879, "step": 16055 }, { - "epoch": 0.45561861520998864, + "epoch": 0.45498597296608007, "grad_norm": 0.0, - "learning_rate": 1.1911788805472853e-05, - "loss": 0.8643, + "learning_rate": 1.193165868236396e-05, + "loss": 0.8257, "step": 16056 }, { - "epoch": 0.45564699205448356, + "epoch": 0.45501431040834256, "grad_norm": 0.0, - "learning_rate": 1.1910886671530644e-05, - "loss": 0.8749, + "learning_rate": 1.1930758170501306e-05, + "loss": 0.9856, "step": 16057 }, { - "epoch": 0.4556753688989784, + "epoch": 0.455042647850605, "grad_norm": 0.0, - "learning_rate": 1.1909984521447091e-05, - "loss": 0.7935, + "learning_rate": 1.1929857642375162e-05, + "loss": 0.9903, "step": 16058 }, { - "epoch": 0.45570374574347333, + "epoch": 0.4550709852928675, "grad_norm": 0.0, - "learning_rate": 1.1909082355229812e-05, - "loss": 0.892, + "learning_rate": 1.192895709799311e-05, + "loss": 1.0271, "step": 16059 }, { - "epoch": 0.4557321225879682, + "epoch": 0.45509932273512993, "grad_norm": 0.0, - "learning_rate": 1.190818017288643e-05, - "loss": 0.8882, + "learning_rate": 1.1928056537362736e-05, + "loss": 0.9309, "step": 16060 }, { - "epoch": 0.4557604994324631, + "epoch": 0.45512766017739237, "grad_norm": 0.0, - "learning_rate": 1.1907277974424562e-05, - "loss": 0.8821, + "learning_rate": 1.1927155960491623e-05, + "loss": 0.8491, "step": 16061 }, { - "epoch": 0.455788876276958, + "epoch": 0.45515599761965486, "grad_norm": 0.0, - "learning_rate": 1.1906375759851834e-05, - "loss": 0.8327, + "learning_rate": 1.1926255367387361e-05, + "loss": 0.971, "step": 16062 }, { - "epoch": 0.4558172531214529, + "epoch": 0.4551843350619173, "grad_norm": 0.0, - "learning_rate": 1.1905473529175864e-05, - "loss": 0.9045, + "learning_rate": 1.1925354758057535e-05, + "loss": 0.9035, "step": 16063 }, { - "epoch": 0.4558456299659478, + "epoch": 0.4552126725041798, "grad_norm": 0.0, - "learning_rate": 1.1904571282404271e-05, - "loss": 0.9839, + "learning_rate": 1.1924454132509733e-05, + "loss": 0.8164, "step": 16064 }, { - "epoch": 0.45587400681044266, + "epoch": 0.45524100994644223, "grad_norm": 0.0, - "learning_rate": 1.1903669019544682e-05, - "loss": 0.8322, + "learning_rate": 1.1923553490751539e-05, + "loss": 0.8525, "step": 16065 }, { - "epoch": 0.45590238365493757, + "epoch": 0.45526934738870467, "grad_norm": 0.0, - "learning_rate": 1.1902766740604714e-05, - "loss": 0.8847, + "learning_rate": 1.1922652832790536e-05, + "loss": 0.8377, "step": 16066 }, { - "epoch": 0.4559307604994325, + "epoch": 0.45529768483096716, "grad_norm": 0.0, - "learning_rate": 1.1901864445591987e-05, - "loss": 0.9021, + "learning_rate": 1.1921752158634316e-05, + "loss": 0.8116, "step": 16067 }, { - "epoch": 0.45595913734392735, + "epoch": 0.4553260222732296, "grad_norm": 0.0, - "learning_rate": 1.1900962134514127e-05, - "loss": 0.8859, + "learning_rate": 1.1920851468290465e-05, + "loss": 0.8761, "step": 16068 }, { - "epoch": 0.45598751418842226, + "epoch": 0.4553543597154921, "grad_norm": 0.0, - "learning_rate": 1.1900059807378757e-05, - "loss": 0.8465, + "learning_rate": 1.1919950761766568e-05, + "loss": 0.863, "step": 16069 }, { - "epoch": 0.4560158910329171, + "epoch": 0.45538269715775453, "grad_norm": 0.0, - "learning_rate": 1.1899157464193493e-05, - "loss": 0.9252, + "learning_rate": 1.191905003907021e-05, + "loss": 0.8622, "step": 16070 }, { - "epoch": 0.45604426787741204, + "epoch": 0.45541103460001703, "grad_norm": 0.0, - "learning_rate": 1.189825510496596e-05, - "loss": 0.8835, + "learning_rate": 1.1918149300208986e-05, + "loss": 0.9459, "step": 16071 }, { - "epoch": 0.45607264472190695, + "epoch": 0.45543937204227947, "grad_norm": 0.0, - "learning_rate": 1.1897352729703786e-05, - "loss": 0.7802, + "learning_rate": 1.1917248545190476e-05, + "loss": 0.8581, "step": 16072 }, { - "epoch": 0.4561010215664018, + "epoch": 0.4554677094845419, "grad_norm": 0.0, - "learning_rate": 1.1896450338414584e-05, - "loss": 0.9049, + "learning_rate": 1.191634777402227e-05, + "loss": 0.9023, "step": 16073 }, { - "epoch": 0.4561293984108967, + "epoch": 0.4554960469268044, "grad_norm": 0.0, - "learning_rate": 1.1895547931105982e-05, - "loss": 1.0201, + "learning_rate": 1.1915446986711953e-05, + "loss": 0.9535, "step": 16074 }, { - "epoch": 0.4561577752553916, + "epoch": 0.45552438436906684, "grad_norm": 0.0, - "learning_rate": 1.18946455077856e-05, - "loss": 0.7984, + "learning_rate": 1.1914546183267115e-05, + "loss": 0.9868, "step": 16075 }, { - "epoch": 0.4561861520998865, + "epoch": 0.45555272181132933, "grad_norm": 0.0, - "learning_rate": 1.1893743068461064e-05, - "loss": 0.9276, + "learning_rate": 1.1913645363695345e-05, + "loss": 1.0224, "step": 16076 }, { - "epoch": 0.45621452894438136, + "epoch": 0.45558105925359177, "grad_norm": 0.0, - "learning_rate": 1.1892840613139995e-05, - "loss": 0.8734, + "learning_rate": 1.1912744528004233e-05, + "loss": 0.9698, "step": 16077 }, { - "epoch": 0.4562429057888763, + "epoch": 0.4556093966958542, "grad_norm": 0.0, - "learning_rate": 1.1891938141830015e-05, - "loss": 0.7728, + "learning_rate": 1.1911843676201356e-05, + "loss": 0.9394, "step": 16078 }, { - "epoch": 0.4562712826333712, + "epoch": 0.4556377341381167, "grad_norm": 0.0, - "learning_rate": 1.189103565453875e-05, - "loss": 0.8426, + "learning_rate": 1.1910942808294315e-05, + "loss": 0.8529, "step": 16079 }, { - "epoch": 0.45629965947786605, + "epoch": 0.45566607158037914, "grad_norm": 0.0, - "learning_rate": 1.1890133151273822e-05, - "loss": 0.8976, + "learning_rate": 1.1910041924290692e-05, + "loss": 0.8785, "step": 16080 }, { - "epoch": 0.45632803632236096, + "epoch": 0.45569440902264163, "grad_norm": 0.0, - "learning_rate": 1.1889230632042851e-05, - "loss": 0.83, + "learning_rate": 1.1909141024198076e-05, + "loss": 0.8729, "step": 16081 }, { - "epoch": 0.4563564131668558, + "epoch": 0.45572274646490407, "grad_norm": 0.0, - "learning_rate": 1.1888328096853465e-05, - "loss": 0.8744, + "learning_rate": 1.1908240108024053e-05, + "loss": 0.9141, "step": 16082 }, { - "epoch": 0.45638479001135074, + "epoch": 0.45575108390716657, "grad_norm": 0.0, - "learning_rate": 1.1887425545713291e-05, - "loss": 0.8984, + "learning_rate": 1.1907339175776222e-05, + "loss": 0.8034, "step": 16083 }, { - "epoch": 0.45641316685584565, + "epoch": 0.455779421349429, "grad_norm": 0.0, - "learning_rate": 1.1886522978629947e-05, - "loss": 0.9606, + "learning_rate": 1.1906438227462162e-05, + "loss": 0.9013, "step": 16084 }, { - "epoch": 0.4564415437003405, + "epoch": 0.45580775879169144, "grad_norm": 0.0, - "learning_rate": 1.1885620395611054e-05, - "loss": 0.8798, + "learning_rate": 1.1905537263089464e-05, + "loss": 0.8752, "step": 16085 }, { - "epoch": 0.45646992054483543, + "epoch": 0.45583609623395394, "grad_norm": 0.0, - "learning_rate": 1.1884717796664246e-05, - "loss": 0.9293, + "learning_rate": 1.1904636282665718e-05, + "loss": 0.8646, "step": 16086 }, { - "epoch": 0.4564982973893303, + "epoch": 0.4558644336762164, "grad_norm": 0.0, - "learning_rate": 1.188381518179714e-05, - "loss": 0.8072, + "learning_rate": 1.1903735286198514e-05, + "loss": 0.8929, "step": 16087 }, { - "epoch": 0.4565266742338252, + "epoch": 0.45589277111847887, "grad_norm": 0.0, - "learning_rate": 1.1882912551017362e-05, - "loss": 0.9212, + "learning_rate": 1.1902834273695438e-05, + "loss": 0.8952, "step": 16088 }, { - "epoch": 0.4565550510783201, + "epoch": 0.4559211085607413, "grad_norm": 0.0, - "learning_rate": 1.188200990433254e-05, - "loss": 0.9512, + "learning_rate": 1.1901933245164085e-05, + "loss": 0.9343, "step": 16089 }, { - "epoch": 0.456583427922815, + "epoch": 0.45594944600300374, "grad_norm": 0.0, - "learning_rate": 1.1881107241750289e-05, - "loss": 0.8698, + "learning_rate": 1.1901032200612043e-05, + "loss": 0.9105, "step": 16090 }, { - "epoch": 0.4566118047673099, + "epoch": 0.45597778344526624, "grad_norm": 0.0, - "learning_rate": 1.1880204563278244e-05, - "loss": 0.9896, + "learning_rate": 1.19001311400469e-05, + "loss": 1.0044, "step": 16091 }, { - "epoch": 0.45664018161180475, + "epoch": 0.4560061208875287, "grad_norm": 0.0, - "learning_rate": 1.1879301868924029e-05, - "loss": 0.8167, + "learning_rate": 1.1899230063476246e-05, + "loss": 0.8618, "step": 16092 }, { - "epoch": 0.45666855845629967, + "epoch": 0.45603445832979117, "grad_norm": 0.0, - "learning_rate": 1.1878399158695267e-05, - "loss": 1.0118, + "learning_rate": 1.1898328970907671e-05, + "loss": 1.0372, "step": 16093 }, { - "epoch": 0.4566969353007945, + "epoch": 0.4560627957720536, "grad_norm": 0.0, - "learning_rate": 1.187749643259958e-05, - "loss": 0.8527, + "learning_rate": 1.1897427862348768e-05, + "loss": 0.9026, "step": 16094 }, { - "epoch": 0.45672531214528944, + "epoch": 0.4560911332143161, "grad_norm": 0.0, - "learning_rate": 1.1876593690644595e-05, - "loss": 0.8034, + "learning_rate": 1.1896526737807124e-05, + "loss": 0.9729, "step": 16095 }, { - "epoch": 0.45675368898978436, + "epoch": 0.45611947065657854, "grad_norm": 0.0, - "learning_rate": 1.1875690932837942e-05, - "loss": 0.9097, + "learning_rate": 1.1895625597290333e-05, + "loss": 0.93, "step": 16096 }, { - "epoch": 0.4567820658342792, + "epoch": 0.456147808098841, "grad_norm": 0.0, - "learning_rate": 1.1874788159187243e-05, - "loss": 0.8402, + "learning_rate": 1.1894724440805982e-05, + "loss": 0.9549, "step": 16097 }, { - "epoch": 0.45681044267877413, + "epoch": 0.4561761455411035, "grad_norm": 0.0, - "learning_rate": 1.187388536970012e-05, - "loss": 0.9584, + "learning_rate": 1.1893823268361667e-05, + "loss": 0.8341, "step": 16098 }, { - "epoch": 0.456838819523269, + "epoch": 0.4562044829833659, "grad_norm": 0.0, - "learning_rate": 1.1872982564384208e-05, - "loss": 0.7984, + "learning_rate": 1.189292207996497e-05, + "loss": 0.8259, "step": 16099 }, { - "epoch": 0.4568671963677639, + "epoch": 0.4562328204256284, "grad_norm": 0.0, - "learning_rate": 1.1872079743247127e-05, - "loss": 0.9021, + "learning_rate": 1.189202087562349e-05, + "loss": 0.8616, "step": 16100 }, { - "epoch": 0.4568955732122588, + "epoch": 0.45626115786789084, "grad_norm": 0.0, - "learning_rate": 1.1871176906296502e-05, - "loss": 1.0009, + "learning_rate": 1.1891119655344815e-05, + "loss": 0.9559, "step": 16101 }, { - "epoch": 0.4569239500567537, + "epoch": 0.4562894953101533, "grad_norm": 0.0, - "learning_rate": 1.1870274053539966e-05, - "loss": 0.8922, + "learning_rate": 1.1890218419136538e-05, + "loss": 0.8984, "step": 16102 }, { - "epoch": 0.4569523269012486, + "epoch": 0.4563178327524158, "grad_norm": 0.0, - "learning_rate": 1.1869371184985139e-05, - "loss": 0.9117, + "learning_rate": 1.1889317167006247e-05, + "loss": 0.8753, "step": 16103 }, { - "epoch": 0.45698070374574346, + "epoch": 0.4563461701946782, "grad_norm": 0.0, - "learning_rate": 1.1868468300639646e-05, - "loss": 0.916, + "learning_rate": 1.1888415898961538e-05, + "loss": 0.8469, "step": 16104 }, { - "epoch": 0.45700908059023837, + "epoch": 0.4563745076369407, "grad_norm": 0.0, - "learning_rate": 1.1867565400511119e-05, - "loss": 0.9541, + "learning_rate": 1.188751461501e-05, + "loss": 1.0107, "step": 16105 }, { - "epoch": 0.4570374574347333, + "epoch": 0.45640284507920315, "grad_norm": 0.0, - "learning_rate": 1.1866662484607186e-05, - "loss": 0.9626, + "learning_rate": 1.1886613315159226e-05, + "loss": 0.8968, "step": 16106 }, { - "epoch": 0.45706583427922814, + "epoch": 0.45643118252146564, "grad_norm": 0.0, - "learning_rate": 1.186575955293547e-05, - "loss": 0.9178, + "learning_rate": 1.1885711999416804e-05, + "loss": 0.9781, "step": 16107 }, { - "epoch": 0.45709421112372306, + "epoch": 0.4564595199637281, "grad_norm": 0.0, - "learning_rate": 1.1864856605503596e-05, - "loss": 0.8685, + "learning_rate": 1.1884810667790334e-05, + "loss": 0.9047, "step": 16108 }, { - "epoch": 0.4571225879682179, + "epoch": 0.4564878574059905, "grad_norm": 0.0, - "learning_rate": 1.1863953642319199e-05, - "loss": 0.8206, + "learning_rate": 1.1883909320287406e-05, + "loss": 0.9324, "step": 16109 }, { - "epoch": 0.45715096481271283, + "epoch": 0.456516194848253, "grad_norm": 0.0, - "learning_rate": 1.1863050663389899e-05, - "loss": 0.8952, + "learning_rate": 1.1883007956915606e-05, + "loss": 0.8895, "step": 16110 }, { - "epoch": 0.4571793416572077, + "epoch": 0.45654453229051545, "grad_norm": 0.0, - "learning_rate": 1.1862147668723328e-05, - "loss": 0.9252, + "learning_rate": 1.188210657768253e-05, + "loss": 0.9059, "step": 16111 }, { - "epoch": 0.4572077185017026, + "epoch": 0.45657286973277794, "grad_norm": 0.0, - "learning_rate": 1.1861244658327111e-05, - "loss": 0.9057, + "learning_rate": 1.1881205182595774e-05, + "loss": 0.9421, "step": 16112 }, { - "epoch": 0.4572360953461975, + "epoch": 0.4566012071750404, "grad_norm": 0.0, - "learning_rate": 1.1860341632208874e-05, - "loss": 1.0225, + "learning_rate": 1.1880303771662926e-05, + "loss": 0.9235, "step": 16113 }, { - "epoch": 0.4572644721906924, + "epoch": 0.4566295446173028, "grad_norm": 0.0, - "learning_rate": 1.1859438590376251e-05, - "loss": 0.8469, + "learning_rate": 1.1879402344891581e-05, + "loss": 0.8156, "step": 16114 }, { - "epoch": 0.4572928490351873, + "epoch": 0.4566578820595653, "grad_norm": 0.0, - "learning_rate": 1.1858535532836866e-05, - "loss": 0.856, + "learning_rate": 1.1878500902289335e-05, + "loss": 0.8903, "step": 16115 }, { - "epoch": 0.45732122587968216, + "epoch": 0.45668621950182775, "grad_norm": 0.0, - "learning_rate": 1.1857632459598346e-05, - "loss": 0.9516, + "learning_rate": 1.1877599443863777e-05, + "loss": 0.8981, "step": 16116 }, { - "epoch": 0.4573496027241771, + "epoch": 0.45671455694409024, "grad_norm": 0.0, - "learning_rate": 1.1856729370668325e-05, - "loss": 1.0113, + "learning_rate": 1.1876697969622502e-05, + "loss": 0.8897, "step": 16117 }, { - "epoch": 0.457377979568672, + "epoch": 0.4567428943863527, "grad_norm": 0.0, - "learning_rate": 1.1855826266054425e-05, - "loss": 0.965, + "learning_rate": 1.1875796479573104e-05, + "loss": 0.8852, "step": 16118 }, { - "epoch": 0.45740635641316685, + "epoch": 0.4567712318286152, "grad_norm": 0.0, - "learning_rate": 1.1854923145764279e-05, - "loss": 0.8672, + "learning_rate": 1.1874894973723173e-05, + "loss": 0.8961, "step": 16119 }, { - "epoch": 0.45743473325766176, + "epoch": 0.4567995692708776, "grad_norm": 0.0, - "learning_rate": 1.1854020009805511e-05, - "loss": 0.9708, + "learning_rate": 1.1873993452080304e-05, + "loss": 0.943, "step": 16120 }, { - "epoch": 0.4574631101021566, + "epoch": 0.45682790671314005, "grad_norm": 0.0, - "learning_rate": 1.1853116858185754e-05, - "loss": 0.9513, + "learning_rate": 1.1873091914652096e-05, + "loss": 0.747, "step": 16121 }, { - "epoch": 0.45749148694665154, + "epoch": 0.45685624415540255, "grad_norm": 0.0, - "learning_rate": 1.1852213690912632e-05, - "loss": 0.8508, + "learning_rate": 1.1872190361446139e-05, + "loss": 0.9544, "step": 16122 }, { - "epoch": 0.4575198637911464, + "epoch": 0.456884581597665, "grad_norm": 0.0, - "learning_rate": 1.1851310507993784e-05, - "loss": 0.8807, + "learning_rate": 1.1871288792470026e-05, + "loss": 0.9588, "step": 16123 }, { - "epoch": 0.4575482406356413, + "epoch": 0.4569129190399275, "grad_norm": 0.0, - "learning_rate": 1.1850407309436831e-05, - "loss": 0.9282, + "learning_rate": 1.1870387207731353e-05, + "loss": 0.8467, "step": 16124 }, { - "epoch": 0.4575766174801362, + "epoch": 0.4569412564821899, "grad_norm": 0.0, - "learning_rate": 1.1849504095249402e-05, - "loss": 0.9585, + "learning_rate": 1.1869485607237714e-05, + "loss": 0.7643, "step": 16125 }, { - "epoch": 0.4576049943246311, + "epoch": 0.45696959392445236, "grad_norm": 0.0, - "learning_rate": 1.1848600865439131e-05, - "loss": 0.8961, + "learning_rate": 1.1868583990996702e-05, + "loss": 0.9395, "step": 16126 }, { - "epoch": 0.457633371169126, + "epoch": 0.45699793136671485, "grad_norm": 0.0, - "learning_rate": 1.1847697620013645e-05, - "loss": 0.9299, + "learning_rate": 1.1867682359015915e-05, + "loss": 0.7845, "step": 16127 }, { - "epoch": 0.45766174801362086, + "epoch": 0.4570262688089773, "grad_norm": 0.0, - "learning_rate": 1.1846794358980573e-05, - "loss": 0.8652, + "learning_rate": 1.1866780711302946e-05, + "loss": 0.9131, "step": 16128 }, { - "epoch": 0.4576901248581158, + "epoch": 0.4570546062512398, "grad_norm": 0.0, - "learning_rate": 1.1845891082347545e-05, - "loss": 1.0438, + "learning_rate": 1.186587904786539e-05, + "loss": 0.9476, "step": 16129 }, { - "epoch": 0.4577185017026107, + "epoch": 0.4570829436935022, "grad_norm": 0.0, - "learning_rate": 1.1844987790122195e-05, - "loss": 0.9524, + "learning_rate": 1.1864977368710841e-05, + "loss": 0.9056, "step": 16130 }, { - "epoch": 0.45774687854710555, + "epoch": 0.4571112811357647, "grad_norm": 0.0, - "learning_rate": 1.1844084482312148e-05, - "loss": 1.0616, + "learning_rate": 1.1864075673846897e-05, + "loss": 0.8388, "step": 16131 }, { - "epoch": 0.45777525539160047, + "epoch": 0.45713961857802715, "grad_norm": 0.0, - "learning_rate": 1.1843181158925036e-05, - "loss": 0.8762, + "learning_rate": 1.186317396328115e-05, + "loss": 1.066, "step": 16132 }, { - "epoch": 0.4578036322360953, + "epoch": 0.4571679560202896, "grad_norm": 0.0, - "learning_rate": 1.1842277819968493e-05, - "loss": 0.8398, + "learning_rate": 1.1862272237021193e-05, + "loss": 0.974, "step": 16133 }, { - "epoch": 0.45783200908059024, + "epoch": 0.4571962934625521, "grad_norm": 0.0, - "learning_rate": 1.1841374465450146e-05, - "loss": 0.822, + "learning_rate": 1.1861370495074631e-05, + "loss": 0.9903, "step": 16134 }, { - "epoch": 0.45786038592508516, + "epoch": 0.4572246309048145, "grad_norm": 0.0, - "learning_rate": 1.1840471095377623e-05, - "loss": 0.9466, + "learning_rate": 1.1860468737449054e-05, + "loss": 0.9017, "step": 16135 }, { - "epoch": 0.45788876276958, + "epoch": 0.457252968347077, "grad_norm": 0.0, - "learning_rate": 1.183956770975856e-05, - "loss": 0.8609, + "learning_rate": 1.1859566964152053e-05, + "loss": 0.875, "step": 16136 }, { - "epoch": 0.45791713961407493, + "epoch": 0.45728130578933945, "grad_norm": 0.0, - "learning_rate": 1.1838664308600585e-05, - "loss": 0.8407, + "learning_rate": 1.1858665175191233e-05, + "loss": 0.8519, "step": 16137 }, { - "epoch": 0.4579455164585698, + "epoch": 0.4573096432316019, "grad_norm": 0.0, - "learning_rate": 1.1837760891911332e-05, - "loss": 0.881, + "learning_rate": 1.1857763370574188e-05, + "loss": 0.7549, "step": 16138 }, { - "epoch": 0.4579738933030647, + "epoch": 0.4573379806738644, "grad_norm": 0.0, - "learning_rate": 1.1836857459698427e-05, - "loss": 0.9619, + "learning_rate": 1.1856861550308507e-05, + "loss": 0.8711, "step": 16139 }, { - "epoch": 0.45800227014755956, + "epoch": 0.4573663181161268, "grad_norm": 0.0, - "learning_rate": 1.1835954011969508e-05, - "loss": 0.8104, + "learning_rate": 1.1855959714401792e-05, + "loss": 0.9433, "step": 16140 }, { - "epoch": 0.4580306469920545, + "epoch": 0.4573946555583893, "grad_norm": 0.0, - "learning_rate": 1.1835050548732202e-05, - "loss": 0.9669, + "learning_rate": 1.1855057862861643e-05, + "loss": 0.9499, "step": 16141 }, { - "epoch": 0.4580590238365494, + "epoch": 0.45742299300065176, "grad_norm": 0.0, - "learning_rate": 1.183414706999414e-05, - "loss": 1.0205, + "learning_rate": 1.1854155995695652e-05, + "loss": 0.8597, "step": 16142 }, { - "epoch": 0.45808740068104425, + "epoch": 0.45745133044291425, "grad_norm": 0.0, - "learning_rate": 1.1833243575762956e-05, - "loss": 0.8468, + "learning_rate": 1.1853254112911416e-05, + "loss": 0.8213, "step": 16143 }, { - "epoch": 0.45811577752553917, + "epoch": 0.4574796678851767, "grad_norm": 0.0, - "learning_rate": 1.1832340066046279e-05, - "loss": 0.9825, + "learning_rate": 1.185235221451653e-05, + "loss": 0.9201, "step": 16144 }, { - "epoch": 0.45814415437003403, + "epoch": 0.4575080053274391, "grad_norm": 0.0, - "learning_rate": 1.1831436540851743e-05, - "loss": 0.8827, + "learning_rate": 1.1851450300518597e-05, + "loss": 0.9129, "step": 16145 }, { - "epoch": 0.45817253121452894, + "epoch": 0.4575363427697016, "grad_norm": 0.0, - "learning_rate": 1.1830533000186984e-05, - "loss": 0.9673, + "learning_rate": 1.1850548370925208e-05, + "loss": 0.803, "step": 16146 }, { - "epoch": 0.45820090805902386, + "epoch": 0.45756468021196406, "grad_norm": 0.0, - "learning_rate": 1.182962944405963e-05, - "loss": 0.9075, + "learning_rate": 1.1849646425743964e-05, + "loss": 1.0156, "step": 16147 }, { - "epoch": 0.4582292849035187, + "epoch": 0.45759301765422655, "grad_norm": 0.0, - "learning_rate": 1.1828725872477313e-05, - "loss": 0.847, + "learning_rate": 1.1848744464982463e-05, + "loss": 0.8864, "step": 16148 }, { - "epoch": 0.45825766174801363, + "epoch": 0.457621355096489, "grad_norm": 0.0, - "learning_rate": 1.1827822285447666e-05, - "loss": 0.8253, + "learning_rate": 1.1847842488648296e-05, + "loss": 0.7607, "step": 16149 }, { - "epoch": 0.4582860385925085, + "epoch": 0.45764969253875143, "grad_norm": 0.0, - "learning_rate": 1.1826918682978321e-05, - "loss": 0.8169, + "learning_rate": 1.1846940496749068e-05, + "loss": 1.0029, "step": 16150 }, { - "epoch": 0.4583144154370034, + "epoch": 0.4576780299810139, "grad_norm": 0.0, - "learning_rate": 1.1826015065076912e-05, - "loss": 0.8194, + "learning_rate": 1.1846038489292377e-05, + "loss": 0.9861, "step": 16151 }, { - "epoch": 0.4583427922814983, + "epoch": 0.45770636742327636, "grad_norm": 0.0, - "learning_rate": 1.182511143175107e-05, - "loss": 0.8063, + "learning_rate": 1.1845136466285816e-05, + "loss": 0.9149, "step": 16152 }, { - "epoch": 0.4583711691259932, + "epoch": 0.45773470486553886, "grad_norm": 0.0, - "learning_rate": 1.1824207783008434e-05, - "loss": 0.8914, + "learning_rate": 1.1844234427736982e-05, + "loss": 0.9686, "step": 16153 }, { - "epoch": 0.4583995459704881, + "epoch": 0.4577630423078013, "grad_norm": 0.0, - "learning_rate": 1.182330411885663e-05, - "loss": 1.0051, + "learning_rate": 1.1843332373653482e-05, + "loss": 0.8677, "step": 16154 }, { - "epoch": 0.45842792281498296, + "epoch": 0.45779137975006373, "grad_norm": 0.0, - "learning_rate": 1.1822400439303295e-05, - "loss": 0.9047, + "learning_rate": 1.1842430304042909e-05, + "loss": 0.8554, "step": 16155 }, { - "epoch": 0.4584562996594779, + "epoch": 0.4578197171923262, "grad_norm": 0.0, - "learning_rate": 1.1821496744356062e-05, - "loss": 0.8352, + "learning_rate": 1.1841528218912858e-05, + "loss": 0.914, "step": 16156 }, { - "epoch": 0.45848467650397273, + "epoch": 0.45784805463458866, "grad_norm": 0.0, - "learning_rate": 1.1820593034022565e-05, - "loss": 0.8419, + "learning_rate": 1.1840626118270932e-05, + "loss": 1.0046, "step": 16157 }, { - "epoch": 0.45851305334846765, + "epoch": 0.45787639207685116, "grad_norm": 0.0, - "learning_rate": 1.1819689308310433e-05, - "loss": 0.8853, + "learning_rate": 1.183972400212473e-05, + "loss": 0.8687, "step": 16158 }, { - "epoch": 0.45854143019296256, + "epoch": 0.4579047295191136, "grad_norm": 0.0, - "learning_rate": 1.1818785567227307e-05, - "loss": 0.8907, + "learning_rate": 1.1838821870481847e-05, + "loss": 0.7632, "step": 16159 }, { - "epoch": 0.4585698070374574, + "epoch": 0.4579330669613761, "grad_norm": 0.0, - "learning_rate": 1.1817881810780815e-05, - "loss": 0.8947, + "learning_rate": 1.1837919723349888e-05, + "loss": 0.9561, "step": 16160 }, { - "epoch": 0.45859818388195234, + "epoch": 0.45796140440363853, "grad_norm": 0.0, - "learning_rate": 1.1816978038978596e-05, - "loss": 0.8416, + "learning_rate": 1.1837017560736447e-05, + "loss": 0.9528, "step": 16161 }, { - "epoch": 0.4586265607264472, + "epoch": 0.45798974184590097, "grad_norm": 0.0, - "learning_rate": 1.1816074251828281e-05, - "loss": 0.9476, + "learning_rate": 1.1836115382649126e-05, + "loss": 0.8771, "step": 16162 }, { - "epoch": 0.4586549375709421, + "epoch": 0.45801807928816346, "grad_norm": 0.0, - "learning_rate": 1.1815170449337507e-05, - "loss": 0.8619, + "learning_rate": 1.183521318909552e-05, + "loss": 0.9095, "step": 16163 }, { - "epoch": 0.458683314415437, + "epoch": 0.4580464167304259, "grad_norm": 0.0, - "learning_rate": 1.1814266631513906e-05, - "loss": 0.9351, + "learning_rate": 1.1834310980083234e-05, + "loss": 0.8732, "step": 16164 }, { - "epoch": 0.4587116912599319, + "epoch": 0.4580747541726884, "grad_norm": 0.0, - "learning_rate": 1.1813362798365111e-05, - "loss": 0.8603, + "learning_rate": 1.1833408755619867e-05, + "loss": 0.9937, "step": 16165 }, { - "epoch": 0.4587400681044268, + "epoch": 0.45810309161495083, "grad_norm": 0.0, - "learning_rate": 1.181245894989876e-05, - "loss": 0.9743, + "learning_rate": 1.1832506515713014e-05, + "loss": 0.8262, "step": 16166 }, { - "epoch": 0.45876844494892166, + "epoch": 0.45813142905721327, "grad_norm": 0.0, - "learning_rate": 1.1811555086122486e-05, - "loss": 0.7537, + "learning_rate": 1.1831604260370279e-05, + "loss": 0.7859, "step": 16167 }, { - "epoch": 0.4587968217934166, + "epoch": 0.45815976649947576, "grad_norm": 0.0, - "learning_rate": 1.1810651207043925e-05, - "loss": 0.8016, + "learning_rate": 1.1830701989599263e-05, + "loss": 0.8711, "step": 16168 }, { - "epoch": 0.4588251986379115, + "epoch": 0.4581881039417382, "grad_norm": 0.0, - "learning_rate": 1.1809747312670711e-05, - "loss": 0.8914, + "learning_rate": 1.1829799703407563e-05, + "loss": 0.8583, "step": 16169 }, { - "epoch": 0.45885357548240635, + "epoch": 0.4582164413840007, "grad_norm": 0.0, - "learning_rate": 1.1808843403010484e-05, - "loss": 0.9777, + "learning_rate": 1.1828897401802782e-05, + "loss": 0.9995, "step": 16170 }, { - "epoch": 0.45888195232690127, + "epoch": 0.45824477882626313, "grad_norm": 0.0, - "learning_rate": 1.1807939478070873e-05, - "loss": 0.9545, + "learning_rate": 1.1827995084792518e-05, + "loss": 0.9217, "step": 16171 }, { - "epoch": 0.4589103291713961, + "epoch": 0.4582731162685256, "grad_norm": 0.0, - "learning_rate": 1.1807035537859514e-05, - "loss": 0.9003, + "learning_rate": 1.182709275238437e-05, + "loss": 0.914, "step": 16172 }, { - "epoch": 0.45893870601589104, + "epoch": 0.45830145371078806, "grad_norm": 0.0, - "learning_rate": 1.1806131582384049e-05, - "loss": 0.8545, + "learning_rate": 1.1826190404585946e-05, + "loss": 0.9296, "step": 16173 }, { - "epoch": 0.4589670828603859, + "epoch": 0.4583297911530505, "grad_norm": 0.0, - "learning_rate": 1.1805227611652106e-05, - "loss": 0.936, + "learning_rate": 1.1825288041404838e-05, + "loss": 0.9399, "step": 16174 }, { - "epoch": 0.4589954597048808, + "epoch": 0.458358128595313, "grad_norm": 0.0, - "learning_rate": 1.1804323625671326e-05, - "loss": 0.9986, + "learning_rate": 1.1824385662848654e-05, + "loss": 0.8598, "step": 16175 }, { - "epoch": 0.45902383654937573, + "epoch": 0.45838646603757544, "grad_norm": 0.0, - "learning_rate": 1.180341962444934e-05, - "loss": 0.8803, + "learning_rate": 1.1823483268924992e-05, + "loss": 0.8663, "step": 16176 }, { - "epoch": 0.4590522133938706, + "epoch": 0.45841480347983793, "grad_norm": 0.0, - "learning_rate": 1.180251560799379e-05, - "loss": 0.9626, + "learning_rate": 1.1822580859641452e-05, + "loss": 0.9436, "step": 16177 }, { - "epoch": 0.4590805902383655, + "epoch": 0.45844314092210037, "grad_norm": 0.0, - "learning_rate": 1.180161157631231e-05, - "loss": 0.8481, + "learning_rate": 1.1821678435005637e-05, + "loss": 0.9696, "step": 16178 }, { - "epoch": 0.45910896708286036, + "epoch": 0.4584714783643628, "grad_norm": 0.0, - "learning_rate": 1.1800707529412534e-05, - "loss": 0.8823, + "learning_rate": 1.1820775995025147e-05, + "loss": 0.8059, "step": 16179 }, { - "epoch": 0.4591373439273553, + "epoch": 0.4584998158066253, "grad_norm": 0.0, - "learning_rate": 1.1799803467302102e-05, - "loss": 0.9167, + "learning_rate": 1.1819873539707584e-05, + "loss": 0.8713, "step": 16180 }, { - "epoch": 0.4591657207718502, + "epoch": 0.45852815324888774, "grad_norm": 0.0, - "learning_rate": 1.1798899389988652e-05, - "loss": 0.8483, + "learning_rate": 1.1818971069060553e-05, + "loss": 0.9528, "step": 16181 }, { - "epoch": 0.45919409761634505, + "epoch": 0.45855649069115023, "grad_norm": 0.0, - "learning_rate": 1.1797995297479812e-05, - "loss": 0.9059, + "learning_rate": 1.1818068583091654e-05, + "loss": 0.9101, "step": 16182 }, { - "epoch": 0.45922247446083997, + "epoch": 0.45858482813341267, "grad_norm": 0.0, - "learning_rate": 1.1797091189783227e-05, - "loss": 0.8324, + "learning_rate": 1.1817166081808486e-05, + "loss": 0.8974, "step": 16183 }, { - "epoch": 0.45925085130533483, + "epoch": 0.45861316557567516, "grad_norm": 0.0, - "learning_rate": 1.1796187066906535e-05, - "loss": 1.0718, + "learning_rate": 1.1816263565218654e-05, + "loss": 0.8386, "step": 16184 }, { - "epoch": 0.45927922814982974, + "epoch": 0.4586415030179376, "grad_norm": 0.0, - "learning_rate": 1.1795282928857368e-05, - "loss": 0.979, + "learning_rate": 1.1815361033329758e-05, + "loss": 0.8852, "step": 16185 }, { - "epoch": 0.45930760499432466, + "epoch": 0.45866984046020004, "grad_norm": 0.0, - "learning_rate": 1.1794378775643365e-05, - "loss": 0.8617, + "learning_rate": 1.1814458486149402e-05, + "loss": 0.8741, "step": 16186 }, { - "epoch": 0.4593359818388195, + "epoch": 0.45869817790246253, "grad_norm": 0.0, - "learning_rate": 1.1793474607272164e-05, - "loss": 0.9531, + "learning_rate": 1.1813555923685189e-05, + "loss": 0.9708, "step": 16187 }, { - "epoch": 0.45936435868331443, + "epoch": 0.45872651534472497, "grad_norm": 0.0, - "learning_rate": 1.1792570423751404e-05, - "loss": 0.8531, + "learning_rate": 1.1812653345944725e-05, + "loss": 0.8853, "step": 16188 }, { - "epoch": 0.4593927355278093, + "epoch": 0.45875485278698747, "grad_norm": 0.0, - "learning_rate": 1.1791666225088719e-05, - "loss": 0.903, + "learning_rate": 1.1811750752935604e-05, + "loss": 0.8707, "step": 16189 }, { - "epoch": 0.4594211123723042, + "epoch": 0.4587831902292499, "grad_norm": 0.0, - "learning_rate": 1.1790762011291747e-05, - "loss": 0.8793, + "learning_rate": 1.1810848144665435e-05, + "loss": 0.9193, "step": 16190 }, { - "epoch": 0.45944948921679907, + "epoch": 0.45881152767151234, "grad_norm": 0.0, - "learning_rate": 1.178985778236813e-05, - "loss": 1.0061, + "learning_rate": 1.1809945521141818e-05, + "loss": 0.8826, "step": 16191 }, { - "epoch": 0.459477866061294, + "epoch": 0.45883986511377484, "grad_norm": 0.0, - "learning_rate": 1.1788953538325508e-05, - "loss": 0.8379, + "learning_rate": 1.180904288237236e-05, + "loss": 1.0337, "step": 16192 }, { - "epoch": 0.4595062429057889, + "epoch": 0.4588682025560373, "grad_norm": 0.0, - "learning_rate": 1.1788049279171507e-05, - "loss": 0.8014, + "learning_rate": 1.1808140228364662e-05, + "loss": 0.8063, "step": 16193 }, { - "epoch": 0.45953461975028376, + "epoch": 0.45889653999829977, "grad_norm": 0.0, - "learning_rate": 1.178714500491378e-05, - "loss": 0.9272, + "learning_rate": 1.1807237559126325e-05, + "loss": 0.9673, "step": 16194 }, { - "epoch": 0.45956299659477867, + "epoch": 0.4589248774405622, "grad_norm": 0.0, - "learning_rate": 1.1786240715559956e-05, - "loss": 0.9672, + "learning_rate": 1.1806334874664958e-05, + "loss": 0.9106, "step": 16195 }, { - "epoch": 0.45959137343927353, + "epoch": 0.4589532148828247, "grad_norm": 0.0, - "learning_rate": 1.1785336411117675e-05, - "loss": 0.9357, + "learning_rate": 1.180543217498816e-05, + "loss": 0.9443, "step": 16196 }, { - "epoch": 0.45961975028376845, + "epoch": 0.45898155232508714, "grad_norm": 0.0, - "learning_rate": 1.1784432091594578e-05, - "loss": 0.958, + "learning_rate": 1.180452946010354e-05, + "loss": 0.8922, "step": 16197 }, { - "epoch": 0.45964812712826336, + "epoch": 0.4590098897673496, "grad_norm": 0.0, - "learning_rate": 1.1783527756998306e-05, - "loss": 0.9492, + "learning_rate": 1.1803626730018694e-05, + "loss": 0.8087, "step": 16198 }, { - "epoch": 0.4596765039727582, + "epoch": 0.45903822720961207, "grad_norm": 0.0, - "learning_rate": 1.178262340733649e-05, - "loss": 0.8914, + "learning_rate": 1.1802723984741229e-05, + "loss": 0.9427, "step": 16199 }, { - "epoch": 0.45970488081725314, + "epoch": 0.4590665646518745, "grad_norm": 0.0, - "learning_rate": 1.1781719042616777e-05, - "loss": 0.9597, + "learning_rate": 1.1801821224278753e-05, + "loss": 0.8758, "step": 16200 }, { - "epoch": 0.459733257661748, + "epoch": 0.459094902094137, "grad_norm": 0.0, - "learning_rate": 1.1780814662846804e-05, - "loss": 0.8152, + "learning_rate": 1.180091844863887e-05, + "loss": 0.837, "step": 16201 }, { - "epoch": 0.4597616345062429, + "epoch": 0.45912323953639944, "grad_norm": 0.0, - "learning_rate": 1.177991026803421e-05, - "loss": 0.9537, + "learning_rate": 1.1800015657829178e-05, + "loss": 0.9589, "step": 16202 }, { - "epoch": 0.45979001135073777, + "epoch": 0.4591515769786619, "grad_norm": 0.0, - "learning_rate": 1.177900585818663e-05, - "loss": 0.8352, + "learning_rate": 1.1799112851857286e-05, + "loss": 0.8028, "step": 16203 }, { - "epoch": 0.4598183881952327, + "epoch": 0.4591799144209244, "grad_norm": 0.0, - "learning_rate": 1.1778101433311711e-05, - "loss": 1.0023, + "learning_rate": 1.1798210030730798e-05, + "loss": 0.8079, "step": 16204 }, { - "epoch": 0.4598467650397276, + "epoch": 0.4592082518631868, "grad_norm": 0.0, - "learning_rate": 1.1777196993417087e-05, - "loss": 0.8757, + "learning_rate": 1.1797307194457323e-05, + "loss": 0.8509, "step": 16205 }, { - "epoch": 0.45987514188422246, + "epoch": 0.4592365893054493, "grad_norm": 0.0, - "learning_rate": 1.17762925385104e-05, - "loss": 0.9211, + "learning_rate": 1.1796404343044461e-05, + "loss": 0.8733, "step": 16206 }, { - "epoch": 0.4599035187287174, + "epoch": 0.45926492674771174, "grad_norm": 0.0, - "learning_rate": 1.1775388068599289e-05, - "loss": 0.8739, + "learning_rate": 1.1795501476499816e-05, + "loss": 0.994, "step": 16207 }, { - "epoch": 0.45993189557321223, + "epoch": 0.45929326418997424, "grad_norm": 0.0, - "learning_rate": 1.1774483583691399e-05, - "loss": 0.8269, + "learning_rate": 1.1794598594830996e-05, + "loss": 0.9367, "step": 16208 }, { - "epoch": 0.45996027241770715, + "epoch": 0.4593216016322367, "grad_norm": 0.0, - "learning_rate": 1.1773579083794363e-05, - "loss": 0.9752, + "learning_rate": 1.1793695698045606e-05, + "loss": 0.8473, "step": 16209 }, { - "epoch": 0.45998864926220207, + "epoch": 0.4593499390744991, "grad_norm": 0.0, - "learning_rate": 1.1772674568915827e-05, - "loss": 0.9712, + "learning_rate": 1.1792792786151251e-05, + "loss": 0.8179, "step": 16210 }, { - "epoch": 0.4600170261066969, + "epoch": 0.4593782765167616, "grad_norm": 0.0, - "learning_rate": 1.1771770039063431e-05, - "loss": 0.9559, + "learning_rate": 1.1791889859155537e-05, + "loss": 0.9508, "step": 16211 }, { - "epoch": 0.46004540295119184, + "epoch": 0.45940661395902405, "grad_norm": 0.0, - "learning_rate": 1.177086549424481e-05, - "loss": 0.8422, + "learning_rate": 1.1790986917066068e-05, + "loss": 0.8578, "step": 16212 }, { - "epoch": 0.4600737797956867, + "epoch": 0.45943495140128654, "grad_norm": 0.0, - "learning_rate": 1.1769960934467608e-05, - "loss": 0.878, + "learning_rate": 1.1790083959890453e-05, + "loss": 0.8245, "step": 16213 }, { - "epoch": 0.4601021566401816, + "epoch": 0.459463288843549, "grad_norm": 0.0, - "learning_rate": 1.176905635973947e-05, - "loss": 0.9428, + "learning_rate": 1.1789180987636297e-05, + "loss": 0.9158, "step": 16214 }, { - "epoch": 0.46013053348467653, + "epoch": 0.4594916262858114, "grad_norm": 0.0, - "learning_rate": 1.1768151770068033e-05, - "loss": 0.9047, + "learning_rate": 1.1788278000311202e-05, + "loss": 0.9183, "step": 16215 }, { - "epoch": 0.4601589103291714, + "epoch": 0.4595199637280739, "grad_norm": 0.0, - "learning_rate": 1.1767247165460935e-05, - "loss": 0.9308, + "learning_rate": 1.1787374997922779e-05, + "loss": 0.9957, "step": 16216 }, { - "epoch": 0.4601872871736663, + "epoch": 0.45954830117033635, "grad_norm": 0.0, - "learning_rate": 1.1766342545925824e-05, - "loss": 0.7324, + "learning_rate": 1.1786471980478631e-05, + "loss": 0.9627, "step": 16217 }, { - "epoch": 0.46021566401816116, + "epoch": 0.45957663861259884, "grad_norm": 0.0, - "learning_rate": 1.1765437911470339e-05, - "loss": 0.8854, + "learning_rate": 1.1785568947986368e-05, + "loss": 0.8055, "step": 16218 }, { - "epoch": 0.4602440408626561, + "epoch": 0.4596049760548613, "grad_norm": 0.0, - "learning_rate": 1.1764533262102119e-05, - "loss": 0.8415, + "learning_rate": 1.1784665900453594e-05, + "loss": 1.0907, "step": 16219 }, { - "epoch": 0.46027241770715094, + "epoch": 0.4596333134971238, "grad_norm": 0.0, - "learning_rate": 1.1763628597828804e-05, - "loss": 0.9643, + "learning_rate": 1.1783762837887915e-05, + "loss": 0.8394, "step": 16220 }, { - "epoch": 0.46030079455164585, + "epoch": 0.4596616509393862, "grad_norm": 0.0, - "learning_rate": 1.1762723918658043e-05, - "loss": 0.8957, + "learning_rate": 1.1782859760296943e-05, + "loss": 0.9496, "step": 16221 }, { - "epoch": 0.46032917139614077, + "epoch": 0.45968998838164865, "grad_norm": 0.0, - "learning_rate": 1.1761819224597473e-05, - "loss": 0.9963, + "learning_rate": 1.1781956667688279e-05, + "loss": 0.8946, "step": 16222 }, { - "epoch": 0.46035754824063563, + "epoch": 0.45971832582391114, "grad_norm": 0.0, - "learning_rate": 1.1760914515654735e-05, - "loss": 0.9041, + "learning_rate": 1.1781053560069531e-05, + "loss": 0.8484, "step": 16223 }, { - "epoch": 0.46038592508513054, + "epoch": 0.4597466632661736, "grad_norm": 0.0, - "learning_rate": 1.1760009791837477e-05, - "loss": 0.8864, + "learning_rate": 1.1780150437448308e-05, + "loss": 0.9666, "step": 16224 }, { - "epoch": 0.4604143019296254, + "epoch": 0.4597750007084361, "grad_norm": 0.0, - "learning_rate": 1.1759105053153334e-05, - "loss": 0.915, + "learning_rate": 1.1779247299832219e-05, + "loss": 0.994, "step": 16225 }, { - "epoch": 0.4604426787741203, + "epoch": 0.4598033381506985, "grad_norm": 0.0, - "learning_rate": 1.1758200299609952e-05, - "loss": 0.9153, + "learning_rate": 1.1778344147228869e-05, + "loss": 0.9765, "step": 16226 }, { - "epoch": 0.46047105561861523, + "epoch": 0.45983167559296095, "grad_norm": 0.0, - "learning_rate": 1.1757295531214974e-05, - "loss": 0.8143, + "learning_rate": 1.1777440979645862e-05, + "loss": 0.9982, "step": 16227 }, { - "epoch": 0.4604994324631101, + "epoch": 0.45986001303522345, "grad_norm": 0.0, - "learning_rate": 1.1756390747976038e-05, - "loss": 0.8993, + "learning_rate": 1.1776537797090815e-05, + "loss": 0.9286, "step": 16228 }, { - "epoch": 0.460527809307605, + "epoch": 0.4598883504774859, "grad_norm": 0.0, - "learning_rate": 1.1755485949900796e-05, - "loss": 0.8616, + "learning_rate": 1.1775634599571326e-05, + "loss": 0.7762, "step": 16229 }, { - "epoch": 0.46055618615209987, + "epoch": 0.4599166879197484, "grad_norm": 0.0, - "learning_rate": 1.1754581136996882e-05, - "loss": 0.8656, + "learning_rate": 1.1774731387095008e-05, + "loss": 0.9323, "step": 16230 }, { - "epoch": 0.4605845629965948, + "epoch": 0.4599450253620108, "grad_norm": 0.0, - "learning_rate": 1.1753676309271943e-05, - "loss": 0.8708, + "learning_rate": 1.1773828159669472e-05, + "loss": 0.933, "step": 16231 }, { - "epoch": 0.4606129398410897, + "epoch": 0.4599733628042733, "grad_norm": 0.0, - "learning_rate": 1.1752771466733622e-05, - "loss": 0.8421, + "learning_rate": 1.177292491730232e-05, + "loss": 1.001, "step": 16232 }, { - "epoch": 0.46064131668558456, + "epoch": 0.46000170024653575, "grad_norm": 0.0, - "learning_rate": 1.175186660938956e-05, - "loss": 0.9218, + "learning_rate": 1.1772021660001163e-05, + "loss": 0.8284, "step": 16233 }, { - "epoch": 0.46066969353007947, + "epoch": 0.4600300376887982, "grad_norm": 0.0, - "learning_rate": 1.1750961737247404e-05, - "loss": 0.9307, + "learning_rate": 1.177111838777361e-05, + "loss": 0.8582, "step": 16234 }, { - "epoch": 0.46069807037457433, + "epoch": 0.4600583751310607, "grad_norm": 0.0, - "learning_rate": 1.1750056850314794e-05, - "loss": 0.8329, + "learning_rate": 1.177021510062727e-05, + "loss": 0.9698, "step": 16235 }, { - "epoch": 0.46072644721906925, + "epoch": 0.4600867125733231, "grad_norm": 0.0, - "learning_rate": 1.1749151948599374e-05, - "loss": 0.8425, + "learning_rate": 1.176931179856975e-05, + "loss": 0.8966, "step": 16236 }, { - "epoch": 0.4607548240635641, + "epoch": 0.4601150500155856, "grad_norm": 0.0, - "learning_rate": 1.1748247032108789e-05, - "loss": 0.9759, + "learning_rate": 1.1768408481608657e-05, + "loss": 0.8651, "step": 16237 }, { - "epoch": 0.460783200908059, + "epoch": 0.46014338745784805, "grad_norm": 0.0, - "learning_rate": 1.1747342100850685e-05, - "loss": 0.8948, + "learning_rate": 1.1767505149751606e-05, + "loss": 0.9046, "step": 16238 }, { - "epoch": 0.46081157775255394, + "epoch": 0.4601717249001105, "grad_norm": 0.0, - "learning_rate": 1.1746437154832704e-05, - "loss": 0.9234, + "learning_rate": 1.1766601803006204e-05, + "loss": 0.822, "step": 16239 }, { - "epoch": 0.4608399545970488, + "epoch": 0.460200062342373, "grad_norm": 0.0, - "learning_rate": 1.1745532194062487e-05, - "loss": 0.8752, + "learning_rate": 1.1765698441380056e-05, + "loss": 0.9106, "step": 16240 }, { - "epoch": 0.4608683314415437, + "epoch": 0.4602283997846354, "grad_norm": 0.0, - "learning_rate": 1.1744627218547683e-05, - "loss": 0.8531, + "learning_rate": 1.1764795064880777e-05, + "loss": 0.8113, "step": 16241 }, { - "epoch": 0.46089670828603857, + "epoch": 0.4602567372268979, "grad_norm": 0.0, - "learning_rate": 1.1743722228295934e-05, - "loss": 0.9374, + "learning_rate": 1.1763891673515973e-05, + "loss": 1.021, "step": 16242 }, { - "epoch": 0.4609250851305335, + "epoch": 0.46028507466916035, "grad_norm": 0.0, - "learning_rate": 1.1742817223314887e-05, - "loss": 0.8695, + "learning_rate": 1.1762988267293252e-05, + "loss": 0.9376, "step": 16243 }, { - "epoch": 0.4609534619750284, + "epoch": 0.46031341211142285, "grad_norm": 0.0, - "learning_rate": 1.174191220361218e-05, - "loss": 0.9908, + "learning_rate": 1.1762084846220226e-05, + "loss": 0.9314, "step": 16244 }, { - "epoch": 0.46098183881952326, + "epoch": 0.4603417495536853, "grad_norm": 0.0, - "learning_rate": 1.1741007169195466e-05, - "loss": 0.851, + "learning_rate": 1.1761181410304508e-05, + "loss": 0.8061, "step": 16245 }, { - "epoch": 0.4610102156640182, + "epoch": 0.4603700869959477, "grad_norm": 0.0, - "learning_rate": 1.1740102120072385e-05, - "loss": 1.0023, + "learning_rate": 1.1760277959553706e-05, + "loss": 0.8875, "step": 16246 }, { - "epoch": 0.46103859250851303, + "epoch": 0.4603984244382102, "grad_norm": 0.0, - "learning_rate": 1.1739197056250582e-05, - "loss": 0.8974, + "learning_rate": 1.1759374493975425e-05, + "loss": 0.8102, "step": 16247 }, { - "epoch": 0.46106696935300795, + "epoch": 0.46042676188047266, "grad_norm": 0.0, - "learning_rate": 1.1738291977737706e-05, - "loss": 0.9608, + "learning_rate": 1.1758471013577283e-05, + "loss": 0.8779, "step": 16248 }, { - "epoch": 0.46109534619750286, + "epoch": 0.46045509932273515, "grad_norm": 0.0, - "learning_rate": 1.17373868845414e-05, - "loss": 0.8551, + "learning_rate": 1.1757567518366883e-05, + "loss": 0.9662, "step": 16249 }, { - "epoch": 0.4611237230419977, + "epoch": 0.4604834367649976, "grad_norm": 0.0, - "learning_rate": 1.1736481776669307e-05, - "loss": 0.8953, + "learning_rate": 1.1756664008351842e-05, + "loss": 0.885, "step": 16250 }, { - "epoch": 0.46115209988649264, + "epoch": 0.46051177420726, "grad_norm": 0.0, - "learning_rate": 1.173557665412907e-05, - "loss": 0.9034, + "learning_rate": 1.1755760483539767e-05, + "loss": 0.9943, "step": 16251 }, { - "epoch": 0.4611804767309875, + "epoch": 0.4605401116495225, "grad_norm": 0.0, - "learning_rate": 1.1734671516928346e-05, - "loss": 0.9608, + "learning_rate": 1.1754856943938266e-05, + "loss": 0.8764, "step": 16252 }, { - "epoch": 0.4612088535754824, + "epoch": 0.46056844909178496, "grad_norm": 0.0, - "learning_rate": 1.173376636507477e-05, - "loss": 0.9077, + "learning_rate": 1.175395338955496e-05, + "loss": 0.864, "step": 16253 }, { - "epoch": 0.4612372304199773, + "epoch": 0.46059678653404745, "grad_norm": 0.0, - "learning_rate": 1.1732861198575993e-05, - "loss": 0.9592, + "learning_rate": 1.1753049820397449e-05, + "loss": 0.8849, "step": 16254 }, { - "epoch": 0.4612656072644722, + "epoch": 0.4606251239763099, "grad_norm": 0.0, - "learning_rate": 1.1731956017439661e-05, - "loss": 1.0182, + "learning_rate": 1.175214623647335e-05, + "loss": 0.8925, "step": 16255 }, { - "epoch": 0.4612939841089671, + "epoch": 0.4606534614185724, "grad_norm": 0.0, - "learning_rate": 1.1731050821673418e-05, - "loss": 0.8925, + "learning_rate": 1.175124263779027e-05, + "loss": 0.8728, "step": 16256 }, { - "epoch": 0.46132236095346196, + "epoch": 0.4606817988608348, "grad_norm": 0.0, - "learning_rate": 1.1730145611284907e-05, - "loss": 0.9707, + "learning_rate": 1.1750339024355824e-05, + "loss": 0.8719, "step": 16257 }, { - "epoch": 0.4613507377979569, + "epoch": 0.46071013630309726, "grad_norm": 0.0, - "learning_rate": 1.1729240386281782e-05, - "loss": 0.8756, + "learning_rate": 1.1749435396177623e-05, + "loss": 0.9334, "step": 16258 }, { - "epoch": 0.46137911464245174, + "epoch": 0.46073847374535976, "grad_norm": 0.0, - "learning_rate": 1.1728335146671683e-05, - "loss": 0.8728, + "learning_rate": 1.1748531753263282e-05, + "loss": 0.8685, "step": 16259 }, { - "epoch": 0.46140749148694665, + "epoch": 0.4607668111876222, "grad_norm": 0.0, - "learning_rate": 1.1727429892462262e-05, - "loss": 0.8174, + "learning_rate": 1.1747628095620405e-05, + "loss": 0.9574, "step": 16260 }, { - "epoch": 0.46143586833144157, + "epoch": 0.4607951486298847, "grad_norm": 0.0, - "learning_rate": 1.1726524623661161e-05, - "loss": 1.0594, + "learning_rate": 1.1746724423256605e-05, + "loss": 0.8864, "step": 16261 }, { - "epoch": 0.4614642451759364, + "epoch": 0.4608234860721471, "grad_norm": 0.0, - "learning_rate": 1.172561934027603e-05, - "loss": 0.8145, + "learning_rate": 1.17458207361795e-05, + "loss": 0.8466, "step": 16262 }, { - "epoch": 0.46149262202043134, + "epoch": 0.46085182351440956, "grad_norm": 0.0, - "learning_rate": 1.1724714042314515e-05, - "loss": 0.9141, + "learning_rate": 1.1744917034396697e-05, + "loss": 0.9254, "step": 16263 }, { - "epoch": 0.4615209988649262, + "epoch": 0.46088016095667206, "grad_norm": 0.0, - "learning_rate": 1.1723808729784265e-05, - "loss": 0.7852, + "learning_rate": 1.1744013317915812e-05, + "loss": 1.0328, "step": 16264 }, { - "epoch": 0.4615493757094211, + "epoch": 0.4609084983989345, "grad_norm": 0.0, - "learning_rate": 1.1722903402692922e-05, - "loss": 0.8866, + "learning_rate": 1.1743109586744451e-05, + "loss": 0.9656, "step": 16265 }, { - "epoch": 0.46157775255391603, + "epoch": 0.460936835841197, "grad_norm": 0.0, - "learning_rate": 1.1721998061048136e-05, - "loss": 0.924, + "learning_rate": 1.1742205840890235e-05, + "loss": 0.8697, "step": 16266 }, { - "epoch": 0.4616061293984109, + "epoch": 0.46096517328345943, "grad_norm": 0.0, - "learning_rate": 1.1721092704857556e-05, - "loss": 0.8367, + "learning_rate": 1.1741302080360773e-05, + "loss": 0.956, "step": 16267 }, { - "epoch": 0.4616345062429058, + "epoch": 0.4609935107257219, "grad_norm": 0.0, - "learning_rate": 1.172018733412883e-05, - "loss": 1.053, + "learning_rate": 1.1740398305163673e-05, + "loss": 0.9249, "step": 16268 }, { - "epoch": 0.46166288308740067, + "epoch": 0.46102184816798436, "grad_norm": 0.0, - "learning_rate": 1.1719281948869605e-05, - "loss": 1.0277, + "learning_rate": 1.1739494515306553e-05, + "loss": 0.9856, "step": 16269 }, { - "epoch": 0.4616912599318956, + "epoch": 0.4610501856102468, "grad_norm": 0.0, - "learning_rate": 1.1718376549087525e-05, - "loss": 0.9961, + "learning_rate": 1.1738590710797024e-05, + "loss": 0.8623, "step": 16270 }, { - "epoch": 0.46171963677639044, + "epoch": 0.4610785230525093, "grad_norm": 0.0, - "learning_rate": 1.1717471134790243e-05, - "loss": 0.8891, + "learning_rate": 1.1737686891642703e-05, + "loss": 0.9627, "step": 16271 }, { - "epoch": 0.46174801362088536, + "epoch": 0.46110686049477173, "grad_norm": 0.0, - "learning_rate": 1.1716565705985405e-05, - "loss": 0.946, + "learning_rate": 1.1736783057851198e-05, + "loss": 0.8194, "step": 16272 }, { - "epoch": 0.46177639046538027, + "epoch": 0.4611351979370342, "grad_norm": 0.0, - "learning_rate": 1.1715660262680657e-05, - "loss": 0.966, + "learning_rate": 1.1735879209430123e-05, + "loss": 0.7484, "step": 16273 }, { - "epoch": 0.46180476730987513, + "epoch": 0.46116353537929666, "grad_norm": 0.0, - "learning_rate": 1.171475480488365e-05, - "loss": 0.9424, + "learning_rate": 1.1734975346387097e-05, + "loss": 0.9194, "step": 16274 }, { - "epoch": 0.46183314415437005, + "epoch": 0.4611918728215591, "grad_norm": 0.0, - "learning_rate": 1.1713849332602036e-05, - "loss": 0.8962, + "learning_rate": 1.1734071468729726e-05, + "loss": 0.9013, "step": 16275 }, { - "epoch": 0.4618615209988649, + "epoch": 0.4612202102638216, "grad_norm": 0.0, - "learning_rate": 1.1712943845843459e-05, - "loss": 0.7607, + "learning_rate": 1.1733167576465627e-05, + "loss": 0.9271, "step": 16276 }, { - "epoch": 0.4618898978433598, + "epoch": 0.46124854770608403, "grad_norm": 0.0, - "learning_rate": 1.1712038344615564e-05, - "loss": 0.9572, + "learning_rate": 1.1732263669602413e-05, + "loss": 0.8885, "step": 16277 }, { - "epoch": 0.46191827468785474, + "epoch": 0.4612768851483465, "grad_norm": 0.0, - "learning_rate": 1.1711132828926007e-05, - "loss": 0.902, + "learning_rate": 1.1731359748147702e-05, + "loss": 0.9292, "step": 16278 }, { - "epoch": 0.4619466515323496, + "epoch": 0.46130522259060897, "grad_norm": 0.0, - "learning_rate": 1.1710227298782435e-05, - "loss": 0.8696, + "learning_rate": 1.1730455812109102e-05, + "loss": 0.8937, "step": 16279 }, { - "epoch": 0.4619750283768445, + "epoch": 0.46133356003287146, "grad_norm": 0.0, - "learning_rate": 1.1709321754192492e-05, - "loss": 0.8722, + "learning_rate": 1.1729551861494235e-05, + "loss": 0.9903, "step": 16280 }, { - "epoch": 0.46200340522133937, + "epoch": 0.4613618974751339, "grad_norm": 0.0, - "learning_rate": 1.1708416195163836e-05, - "loss": 0.9744, + "learning_rate": 1.1728647896310705e-05, + "loss": 1.0288, "step": 16281 }, { - "epoch": 0.4620317820658343, + "epoch": 0.46139023491739634, "grad_norm": 0.0, - "learning_rate": 1.170751062170411e-05, - "loss": 0.8165, + "learning_rate": 1.1727743916566135e-05, + "loss": 0.8502, "step": 16282 }, { - "epoch": 0.46206015891032914, + "epoch": 0.46141857235965883, "grad_norm": 0.0, - "learning_rate": 1.1706605033820966e-05, - "loss": 0.9365, + "learning_rate": 1.1726839922268134e-05, + "loss": 1.0019, "step": 16283 }, { - "epoch": 0.46208853575482406, + "epoch": 0.46144690980192127, "grad_norm": 0.0, - "learning_rate": 1.1705699431522049e-05, - "loss": 0.8312, + "learning_rate": 1.172593591342432e-05, + "loss": 0.9929, "step": 16284 }, { - "epoch": 0.462116912599319, + "epoch": 0.46147524724418376, "grad_norm": 0.0, - "learning_rate": 1.1704793814815017e-05, - "loss": 0.8506, + "learning_rate": 1.1725031890042309e-05, + "loss": 0.7688, "step": 16285 }, { - "epoch": 0.46214528944381383, + "epoch": 0.4615035846864462, "grad_norm": 0.0, - "learning_rate": 1.1703888183707513e-05, - "loss": 0.9363, + "learning_rate": 1.172412785212971e-05, + "loss": 0.9323, "step": 16286 }, { - "epoch": 0.46217366628830875, + "epoch": 0.46153192212870864, "grad_norm": 0.0, - "learning_rate": 1.1702982538207186e-05, - "loss": 0.8241, + "learning_rate": 1.1723223799694145e-05, + "loss": 0.8407, "step": 16287 }, { - "epoch": 0.4622020431328036, + "epoch": 0.46156025957097113, "grad_norm": 0.0, - "learning_rate": 1.1702076878321692e-05, - "loss": 0.9152, + "learning_rate": 1.1722319732743225e-05, + "loss": 0.8402, "step": 16288 }, { - "epoch": 0.4622304199772985, + "epoch": 0.46158859701323357, "grad_norm": 0.0, - "learning_rate": 1.1701171204058677e-05, - "loss": 0.9358, + "learning_rate": 1.1721415651284567e-05, + "loss": 0.907, "step": 16289 }, { - "epoch": 0.46225879682179344, + "epoch": 0.46161693445549606, "grad_norm": 0.0, - "learning_rate": 1.1700265515425792e-05, - "loss": 0.7594, + "learning_rate": 1.1720511555325782e-05, + "loss": 0.831, "step": 16290 }, { - "epoch": 0.4622871736662883, + "epoch": 0.4616452718977585, "grad_norm": 0.0, - "learning_rate": 1.1699359812430689e-05, - "loss": 0.9168, + "learning_rate": 1.1719607444874495e-05, + "loss": 0.9395, "step": 16291 }, { - "epoch": 0.4623155505107832, + "epoch": 0.461673609340021, "grad_norm": 0.0, - "learning_rate": 1.1698454095081018e-05, - "loss": 0.906, + "learning_rate": 1.1718703319938313e-05, + "loss": 1.0137, "step": 16292 }, { - "epoch": 0.4623439273552781, + "epoch": 0.46170194678228343, "grad_norm": 0.0, - "learning_rate": 1.1697548363384428e-05, - "loss": 0.9438, + "learning_rate": 1.1717799180524856e-05, + "loss": 0.7655, "step": 16293 }, { - "epoch": 0.462372304199773, + "epoch": 0.4617302842245459, "grad_norm": 0.0, - "learning_rate": 1.169664261734857e-05, - "loss": 0.947, + "learning_rate": 1.1716895026641735e-05, + "loss": 0.8608, "step": 16294 }, { - "epoch": 0.4624006810442679, + "epoch": 0.46175862166680837, "grad_norm": 0.0, - "learning_rate": 1.1695736856981097e-05, - "loss": 0.9322, + "learning_rate": 1.1715990858296573e-05, + "loss": 0.8705, "step": 16295 }, { - "epoch": 0.46242905788876276, + "epoch": 0.4617869591090708, "grad_norm": 0.0, - "learning_rate": 1.1694831082289659e-05, - "loss": 0.7805, + "learning_rate": 1.171508667549698e-05, + "loss": 0.8524, "step": 16296 }, { - "epoch": 0.4624574347332577, + "epoch": 0.4618152965513333, "grad_norm": 0.0, - "learning_rate": 1.1693925293281905e-05, - "loss": 0.794, + "learning_rate": 1.171418247825058e-05, + "loss": 0.8287, "step": 16297 }, { - "epoch": 0.46248581157775254, + "epoch": 0.46184363399359574, "grad_norm": 0.0, - "learning_rate": 1.1693019489965485e-05, - "loss": 0.8834, + "learning_rate": 1.1713278266564978e-05, + "loss": 0.9402, "step": 16298 }, { - "epoch": 0.46251418842224745, + "epoch": 0.4618719714358582, "grad_norm": 0.0, - "learning_rate": 1.1692113672348058e-05, - "loss": 0.8588, + "learning_rate": 1.1712374040447802e-05, + "loss": 0.9553, "step": 16299 }, { - "epoch": 0.4625425652667423, + "epoch": 0.46190030887812067, "grad_norm": 0.0, - "learning_rate": 1.1691207840437271e-05, - "loss": 0.9302, + "learning_rate": 1.1711469799906663e-05, + "loss": 0.8834, "step": 16300 }, { - "epoch": 0.4625709421112372, + "epoch": 0.4619286463203831, "grad_norm": 0.0, - "learning_rate": 1.1690301994240772e-05, - "loss": 1.0101, + "learning_rate": 1.1710565544949177e-05, + "loss": 0.886, "step": 16301 }, { - "epoch": 0.46259931895573214, + "epoch": 0.4619569837626456, "grad_norm": 0.0, - "learning_rate": 1.1689396133766217e-05, - "loss": 0.8171, + "learning_rate": 1.1709661275582961e-05, + "loss": 0.8271, "step": 16302 }, { - "epoch": 0.462627695800227, + "epoch": 0.46198532120490804, "grad_norm": 0.0, - "learning_rate": 1.1688490259021258e-05, - "loss": 0.9855, + "learning_rate": 1.1708756991815635e-05, + "loss": 0.8917, "step": 16303 }, { - "epoch": 0.4626560726447219, + "epoch": 0.46201365864717053, "grad_norm": 0.0, - "learning_rate": 1.1687584370013545e-05, - "loss": 0.9221, + "learning_rate": 1.1707852693654815e-05, + "loss": 0.8665, "step": 16304 }, { - "epoch": 0.4626844494892168, + "epoch": 0.46204199608943297, "grad_norm": 0.0, - "learning_rate": 1.1686678466750729e-05, - "loss": 0.9641, + "learning_rate": 1.1706948381108117e-05, + "loss": 1.0609, "step": 16305 }, { - "epoch": 0.4627128263337117, + "epoch": 0.4620703335316954, "grad_norm": 0.0, - "learning_rate": 1.1685772549240469e-05, - "loss": 0.9628, + "learning_rate": 1.1706044054183156e-05, + "loss": 0.8464, "step": 16306 }, { - "epoch": 0.4627412031782066, + "epoch": 0.4620986709739579, "grad_norm": 0.0, - "learning_rate": 1.168486661749041e-05, - "loss": 0.9694, + "learning_rate": 1.1705139712887556e-05, + "loss": 0.8971, "step": 16307 }, { - "epoch": 0.46276958002270147, + "epoch": 0.46212700841622034, "grad_norm": 0.0, - "learning_rate": 1.1683960671508206e-05, - "loss": 0.8253, + "learning_rate": 1.1704235357228932e-05, + "loss": 0.9507, "step": 16308 }, { - "epoch": 0.4627979568671964, + "epoch": 0.46215534585848284, "grad_norm": 0.0, - "learning_rate": 1.1683054711301514e-05, - "loss": 0.9684, + "learning_rate": 1.1703330987214898e-05, + "loss": 0.8675, "step": 16309 }, { - "epoch": 0.46282633371169124, + "epoch": 0.4621836833007453, "grad_norm": 0.0, - "learning_rate": 1.168214873687798e-05, - "loss": 0.8419, + "learning_rate": 1.1702426602853074e-05, + "loss": 0.9902, "step": 16310 }, { - "epoch": 0.46285471055618616, + "epoch": 0.4622120207430077, "grad_norm": 0.0, - "learning_rate": 1.168124274824526e-05, - "loss": 0.9348, + "learning_rate": 1.170152220415108e-05, + "loss": 0.9926, "step": 16311 }, { - "epoch": 0.46288308740068107, + "epoch": 0.4622403581852702, "grad_norm": 0.0, - "learning_rate": 1.1680336745411005e-05, - "loss": 0.906, + "learning_rate": 1.1700617791116532e-05, + "loss": 0.9043, "step": 16312 }, { - "epoch": 0.46291146424517593, + "epoch": 0.46226869562753264, "grad_norm": 0.0, - "learning_rate": 1.1679430728382875e-05, - "loss": 0.8327, + "learning_rate": 1.169971336375705e-05, + "loss": 0.9382, "step": 16313 }, { - "epoch": 0.46293984108967084, + "epoch": 0.46229703306979514, "grad_norm": 0.0, - "learning_rate": 1.1678524697168513e-05, - "loss": 1.0353, + "learning_rate": 1.1698808922080248e-05, + "loss": 0.9043, "step": 16314 }, { - "epoch": 0.4629682179341657, + "epoch": 0.4623253705120576, "grad_norm": 0.0, - "learning_rate": 1.1677618651775582e-05, - "loss": 0.9576, + "learning_rate": 1.1697904466093753e-05, + "loss": 0.8864, "step": 16315 }, { - "epoch": 0.4629965947786606, + "epoch": 0.46235370795432007, "grad_norm": 0.0, - "learning_rate": 1.1676712592211729e-05, - "loss": 0.8309, + "learning_rate": 1.1696999995805174e-05, + "loss": 0.882, "step": 16316 }, { - "epoch": 0.4630249716231555, + "epoch": 0.4623820453965825, "grad_norm": 0.0, - "learning_rate": 1.1675806518484609e-05, - "loss": 0.8663, + "learning_rate": 1.1696095511222137e-05, + "loss": 0.9059, "step": 16317 }, { - "epoch": 0.4630533484676504, + "epoch": 0.46241038283884495, "grad_norm": 0.0, - "learning_rate": 1.1674900430601876e-05, - "loss": 0.9765, + "learning_rate": 1.1695191012352256e-05, + "loss": 0.9148, "step": 16318 }, { - "epoch": 0.4630817253121453, + "epoch": 0.46243872028110744, "grad_norm": 0.0, - "learning_rate": 1.1673994328571186e-05, - "loss": 1.0224, + "learning_rate": 1.169428649920315e-05, + "loss": 0.7141, "step": 16319 }, { - "epoch": 0.46311010215664017, + "epoch": 0.4624670577233699, "grad_norm": 0.0, - "learning_rate": 1.1673088212400188e-05, - "loss": 0.8795, + "learning_rate": 1.1693381971782442e-05, + "loss": 0.7818, "step": 16320 }, { - "epoch": 0.4631384790011351, + "epoch": 0.4624953951656324, "grad_norm": 0.0, - "learning_rate": 1.1672182082096538e-05, - "loss": 0.9177, + "learning_rate": 1.1692477430097746e-05, + "loss": 1.0057, "step": 16321 }, { - "epoch": 0.46316685584562994, + "epoch": 0.4625237326078948, "grad_norm": 0.0, - "learning_rate": 1.1671275937667895e-05, - "loss": 0.8498, + "learning_rate": 1.1691572874156687e-05, + "loss": 0.9906, "step": 16322 }, { - "epoch": 0.46319523269012486, + "epoch": 0.46255207005015725, "grad_norm": 0.0, - "learning_rate": 1.1670369779121904e-05, - "loss": 0.9351, + "learning_rate": 1.169066830396688e-05, + "loss": 0.8587, "step": 16323 }, { - "epoch": 0.4632236095346198, + "epoch": 0.46258040749241974, "grad_norm": 0.0, - "learning_rate": 1.1669463606466227e-05, - "loss": 0.8373, + "learning_rate": 1.1689763719535947e-05, + "loss": 0.8514, "step": 16324 }, { - "epoch": 0.46325198637911463, + "epoch": 0.4626087449346822, "grad_norm": 0.0, - "learning_rate": 1.1668557419708514e-05, - "loss": 0.8202, + "learning_rate": 1.1688859120871507e-05, + "loss": 0.9071, "step": 16325 }, { - "epoch": 0.46328036322360955, + "epoch": 0.4626370823769447, "grad_norm": 0.0, - "learning_rate": 1.1667651218856427e-05, - "loss": 0.9314, + "learning_rate": 1.168795450798118e-05, + "loss": 1.0174, "step": 16326 }, { - "epoch": 0.4633087400681044, + "epoch": 0.4626654198192071, "grad_norm": 0.0, - "learning_rate": 1.1666745003917608e-05, - "loss": 0.9232, + "learning_rate": 1.1687049880872583e-05, + "loss": 0.7572, "step": 16327 }, { - "epoch": 0.4633371169125993, + "epoch": 0.4626937572614696, "grad_norm": 0.0, - "learning_rate": 1.166583877489972e-05, - "loss": 1.0433, + "learning_rate": 1.1686145239553336e-05, + "loss": 1.013, "step": 16328 }, { - "epoch": 0.46336549375709424, + "epoch": 0.46272209470373205, "grad_norm": 0.0, - "learning_rate": 1.1664932531810422e-05, - "loss": 0.9662, + "learning_rate": 1.1685240584031068e-05, + "loss": 0.9441, "step": 16329 }, { - "epoch": 0.4633938706015891, + "epoch": 0.4627504321459945, "grad_norm": 0.0, - "learning_rate": 1.166402627465736e-05, - "loss": 1.0019, + "learning_rate": 1.168433591431339e-05, + "loss": 0.884, "step": 16330 }, { - "epoch": 0.463422247446084, + "epoch": 0.462778769588257, "grad_norm": 0.0, - "learning_rate": 1.1663120003448195e-05, - "loss": 0.9045, + "learning_rate": 1.1683431230407924e-05, + "loss": 0.8763, "step": 16331 }, { - "epoch": 0.46345062429057887, + "epoch": 0.4628071070305194, "grad_norm": 0.0, - "learning_rate": 1.166221371819058e-05, - "loss": 0.9522, + "learning_rate": 1.1682526532322294e-05, + "loss": 0.8317, "step": 16332 }, { - "epoch": 0.4634790011350738, + "epoch": 0.4628354444727819, "grad_norm": 0.0, - "learning_rate": 1.1661307418892172e-05, - "loss": 0.8829, + "learning_rate": 1.1681621820064117e-05, + "loss": 0.9714, "step": 16333 }, { - "epoch": 0.46350737797956865, + "epoch": 0.46286378191504435, "grad_norm": 0.0, - "learning_rate": 1.1660401105560624e-05, - "loss": 0.9286, + "learning_rate": 1.1680717093641012e-05, + "loss": 0.9538, "step": 16334 }, { - "epoch": 0.46353575482406356, + "epoch": 0.4628921193573068, "grad_norm": 0.0, - "learning_rate": 1.1659494778203593e-05, - "loss": 0.8949, + "learning_rate": 1.1679812353060604e-05, + "loss": 1.0069, "step": 16335 }, { - "epoch": 0.4635641316685585, + "epoch": 0.4629204567995693, "grad_norm": 0.0, - "learning_rate": 1.1658588436828738e-05, - "loss": 0.9201, + "learning_rate": 1.1678907598330515e-05, + "loss": 0.7647, "step": 16336 }, { - "epoch": 0.46359250851305334, + "epoch": 0.4629487942418317, "grad_norm": 0.0, - "learning_rate": 1.1657682081443708e-05, - "loss": 0.8687, + "learning_rate": 1.1678002829458367e-05, + "loss": 0.843, "step": 16337 }, { - "epoch": 0.46362088535754825, + "epoch": 0.4629771316840942, "grad_norm": 0.0, - "learning_rate": 1.1656775712056163e-05, - "loss": 0.9193, + "learning_rate": 1.1677098046451773e-05, + "loss": 0.873, "step": 16338 }, { - "epoch": 0.4636492622020431, + "epoch": 0.46300546912635665, "grad_norm": 0.0, - "learning_rate": 1.1655869328673764e-05, - "loss": 1.0629, + "learning_rate": 1.1676193249318359e-05, + "loss": 0.8168, "step": 16339 }, { - "epoch": 0.463677639046538, + "epoch": 0.46303380656861914, "grad_norm": 0.0, - "learning_rate": 1.1654962931304159e-05, - "loss": 0.938, + "learning_rate": 1.167528843806575e-05, + "loss": 0.9845, "step": 16340 }, { - "epoch": 0.46370601589103294, + "epoch": 0.4630621440108816, "grad_norm": 0.0, - "learning_rate": 1.1654056519955005e-05, - "loss": 0.8706, + "learning_rate": 1.1674383612701562e-05, + "loss": 1.0043, "step": 16341 }, { - "epoch": 0.4637343927355278, + "epoch": 0.463090481453144, "grad_norm": 0.0, - "learning_rate": 1.1653150094633967e-05, - "loss": 0.9938, + "learning_rate": 1.167347877323342e-05, + "loss": 0.8996, "step": 16342 }, { - "epoch": 0.4637627695800227, + "epoch": 0.4631188188954065, "grad_norm": 0.0, - "learning_rate": 1.1652243655348691e-05, - "loss": 0.8869, + "learning_rate": 1.1672573919668947e-05, + "loss": 0.8942, "step": 16343 }, { - "epoch": 0.4637911464245176, + "epoch": 0.46314715633766895, "grad_norm": 0.0, - "learning_rate": 1.1651337202106841e-05, - "loss": 0.9071, + "learning_rate": 1.1671669052015757e-05, + "loss": 0.841, "step": 16344 }, { - "epoch": 0.4638195232690125, + "epoch": 0.46317549377993145, "grad_norm": 0.0, - "learning_rate": 1.165043073491607e-05, - "loss": 0.8456, + "learning_rate": 1.1670764170281483e-05, + "loss": 0.8811, "step": 16345 }, { - "epoch": 0.4638479001135074, + "epoch": 0.4632038312221939, "grad_norm": 0.0, - "learning_rate": 1.1649524253784037e-05, - "loss": 0.9033, + "learning_rate": 1.166985927447374e-05, + "loss": 0.9739, "step": 16346 }, { - "epoch": 0.46387627695800226, + "epoch": 0.4632321686644563, "grad_norm": 0.0, - "learning_rate": 1.16486177587184e-05, - "loss": 0.8334, + "learning_rate": 1.1668954364600154e-05, + "loss": 0.8332, "step": 16347 }, { - "epoch": 0.4639046538024972, + "epoch": 0.4632605061067188, "grad_norm": 0.0, - "learning_rate": 1.1647711249726813e-05, - "loss": 0.9618, + "learning_rate": 1.1668049440668342e-05, + "loss": 0.898, "step": 16348 }, { - "epoch": 0.46393303064699204, + "epoch": 0.46328884354898126, "grad_norm": 0.0, - "learning_rate": 1.1646804726816937e-05, - "loss": 0.9159, + "learning_rate": 1.1667144502685932e-05, + "loss": 0.9032, "step": 16349 }, { - "epoch": 0.46396140749148695, + "epoch": 0.46331718099124375, "grad_norm": 0.0, - "learning_rate": 1.1645898189996425e-05, - "loss": 0.8672, + "learning_rate": 1.1666239550660548e-05, + "loss": 0.9866, "step": 16350 }, { - "epoch": 0.4639897843359818, + "epoch": 0.4633455184335062, "grad_norm": 0.0, - "learning_rate": 1.1644991639272938e-05, - "loss": 0.9454, + "learning_rate": 1.1665334584599805e-05, + "loss": 0.9358, "step": 16351 }, { - "epoch": 0.46401816118047673, + "epoch": 0.4633738558757686, "grad_norm": 0.0, - "learning_rate": 1.164408507465413e-05, - "loss": 0.9098, + "learning_rate": 1.1664429604511332e-05, + "loss": 0.7131, "step": 16352 }, { - "epoch": 0.46404653802497164, + "epoch": 0.4634021933180311, "grad_norm": 0.0, - "learning_rate": 1.1643178496147667e-05, - "loss": 0.9888, + "learning_rate": 1.1663524610402752e-05, + "loss": 1.0197, "step": 16353 }, { - "epoch": 0.4640749148694665, + "epoch": 0.46343053076029356, "grad_norm": 0.0, - "learning_rate": 1.1642271903761198e-05, - "loss": 0.8764, + "learning_rate": 1.1662619602281682e-05, + "loss": 0.7843, "step": 16354 }, { - "epoch": 0.4641032917139614, + "epoch": 0.46345886820255605, "grad_norm": 0.0, - "learning_rate": 1.1641365297502384e-05, - "loss": 0.9444, + "learning_rate": 1.1661714580155755e-05, + "loss": 1.0147, "step": 16355 }, { - "epoch": 0.4641316685584563, + "epoch": 0.4634872056448185, "grad_norm": 0.0, - "learning_rate": 1.1640458677378885e-05, - "loss": 0.9368, + "learning_rate": 1.1660809544032584e-05, + "loss": 0.8258, "step": 16356 }, { - "epoch": 0.4641600454029512, + "epoch": 0.463515543087081, "grad_norm": 0.0, - "learning_rate": 1.163955204339836e-05, - "loss": 0.9489, + "learning_rate": 1.16599044939198e-05, + "loss": 1.0189, "step": 16357 }, { - "epoch": 0.4641884222474461, + "epoch": 0.4635438805293434, "grad_norm": 0.0, - "learning_rate": 1.1638645395568458e-05, - "loss": 0.9591, + "learning_rate": 1.1658999429825026e-05, + "loss": 0.9538, "step": 16358 }, { - "epoch": 0.46421679909194097, + "epoch": 0.46357221797160586, "grad_norm": 0.0, - "learning_rate": 1.1637738733896848e-05, - "loss": 0.9218, + "learning_rate": 1.1658094351755883e-05, + "loss": 0.9462, "step": 16359 }, { - "epoch": 0.4642451759364359, + "epoch": 0.46360055541386835, "grad_norm": 0.0, - "learning_rate": 1.1636832058391188e-05, - "loss": 0.9025, + "learning_rate": 1.1657189259719992e-05, + "loss": 0.801, "step": 16360 }, { - "epoch": 0.46427355278093074, + "epoch": 0.4636288928561308, "grad_norm": 0.0, - "learning_rate": 1.1635925369059131e-05, - "loss": 0.8811, + "learning_rate": 1.1656284153724982e-05, + "loss": 1.0239, "step": 16361 }, { - "epoch": 0.46430192962542566, + "epoch": 0.4636572302983933, "grad_norm": 0.0, - "learning_rate": 1.1635018665908342e-05, - "loss": 0.8931, + "learning_rate": 1.1655379033778477e-05, + "loss": 0.9379, "step": 16362 }, { - "epoch": 0.4643303064699205, + "epoch": 0.4636855677406557, "grad_norm": 0.0, - "learning_rate": 1.1634111948946475e-05, - "loss": 0.8418, + "learning_rate": 1.16544738998881e-05, + "loss": 0.8694, "step": 16363 }, { - "epoch": 0.46435868331441543, + "epoch": 0.46371390518291816, "grad_norm": 0.0, - "learning_rate": 1.1633205218181192e-05, - "loss": 0.9187, + "learning_rate": 1.1653568752061472e-05, + "loss": 1.0167, "step": 16364 }, { - "epoch": 0.46438706015891035, + "epoch": 0.46374224262518066, "grad_norm": 0.0, - "learning_rate": 1.163229847362015e-05, - "loss": 0.9176, + "learning_rate": 1.1652663590306226e-05, + "loss": 0.925, "step": 16365 }, { - "epoch": 0.4644154370034052, + "epoch": 0.4637705800674431, "grad_norm": 0.0, - "learning_rate": 1.1631391715271008e-05, - "loss": 0.9575, + "learning_rate": 1.1651758414629977e-05, + "loss": 0.9031, "step": 16366 }, { - "epoch": 0.4644438138479001, + "epoch": 0.4637989175097056, "grad_norm": 0.0, - "learning_rate": 1.1630484943141428e-05, - "loss": 0.9806, + "learning_rate": 1.1650853225040352e-05, + "loss": 0.9428, "step": 16367 }, { - "epoch": 0.464472190692395, + "epoch": 0.463827254951968, "grad_norm": 0.0, - "learning_rate": 1.1629578157239067e-05, - "loss": 0.9764, + "learning_rate": 1.1649948021544979e-05, + "loss": 0.9519, "step": 16368 }, { - "epoch": 0.4645005675368899, + "epoch": 0.4638555923942305, "grad_norm": 0.0, - "learning_rate": 1.1628671357571587e-05, - "loss": 0.8661, + "learning_rate": 1.164904280415148e-05, + "loss": 0.9258, "step": 16369 }, { - "epoch": 0.4645289443813848, + "epoch": 0.46388392983649296, "grad_norm": 0.0, - "learning_rate": 1.1627764544146651e-05, - "loss": 0.8982, + "learning_rate": 1.1648137572867484e-05, + "loss": 0.8613, "step": 16370 }, { - "epoch": 0.46455732122587967, + "epoch": 0.4639122672787554, "grad_norm": 0.0, - "learning_rate": 1.1626857716971908e-05, - "loss": 0.8892, + "learning_rate": 1.1647232327700613e-05, + "loss": 0.9639, "step": 16371 }, { - "epoch": 0.4645856980703746, + "epoch": 0.4639406047210179, "grad_norm": 0.0, - "learning_rate": 1.162595087605503e-05, - "loss": 0.8473, + "learning_rate": 1.1646327068658491e-05, + "loss": 0.9171, "step": 16372 }, { - "epoch": 0.46461407491486945, + "epoch": 0.46396894216328033, "grad_norm": 0.0, - "learning_rate": 1.162504402140367e-05, - "loss": 0.7432, + "learning_rate": 1.1645421795748744e-05, + "loss": 0.8219, "step": 16373 }, { - "epoch": 0.46464245175936436, + "epoch": 0.4639972796055428, "grad_norm": 0.0, - "learning_rate": 1.162413715302549e-05, - "loss": 0.9501, + "learning_rate": 1.1644516508978998e-05, + "loss": 0.8959, "step": 16374 }, { - "epoch": 0.4646708286038593, + "epoch": 0.46402561704780526, "grad_norm": 0.0, - "learning_rate": 1.162323027092815e-05, - "loss": 0.9172, + "learning_rate": 1.164361120835688e-05, + "loss": 0.9621, "step": 16375 }, { - "epoch": 0.46469920544835414, + "epoch": 0.4640539544900677, "grad_norm": 0.0, - "learning_rate": 1.1622323375119312e-05, - "loss": 0.9611, + "learning_rate": 1.1642705893890014e-05, + "loss": 0.7941, "step": 16376 }, { - "epoch": 0.46472758229284905, + "epoch": 0.4640822919323302, "grad_norm": 0.0, - "learning_rate": 1.1621416465606634e-05, - "loss": 1.0099, + "learning_rate": 1.1641800565586026e-05, + "loss": 0.8596, "step": 16377 }, { - "epoch": 0.4647559591373439, + "epoch": 0.46411062937459263, "grad_norm": 0.0, - "learning_rate": 1.162050954239778e-05, - "loss": 0.9394, + "learning_rate": 1.1640895223452543e-05, + "loss": 0.9346, "step": 16378 }, { - "epoch": 0.4647843359818388, + "epoch": 0.4641389668168551, "grad_norm": 0.0, - "learning_rate": 1.1619602605500408e-05, - "loss": 0.8558, + "learning_rate": 1.163998986749719e-05, + "loss": 0.9653, "step": 16379 }, { - "epoch": 0.4648127128263337, + "epoch": 0.46416730425911756, "grad_norm": 0.0, - "learning_rate": 1.161869565492218e-05, - "loss": 0.9245, + "learning_rate": 1.1639084497727593e-05, + "loss": 0.921, "step": 16380 }, { - "epoch": 0.4648410896708286, + "epoch": 0.46419564170138006, "grad_norm": 0.0, - "learning_rate": 1.1617788690670756e-05, - "loss": 0.984, + "learning_rate": 1.1638179114151378e-05, + "loss": 0.9155, "step": 16381 }, { - "epoch": 0.4648694665153235, + "epoch": 0.4642239791436425, "grad_norm": 0.0, - "learning_rate": 1.16168817127538e-05, - "loss": 0.9095, + "learning_rate": 1.1637273716776172e-05, + "loss": 0.9258, "step": 16382 }, { - "epoch": 0.4648978433598184, + "epoch": 0.46425231658590493, "grad_norm": 0.0, - "learning_rate": 1.1615974721178972e-05, - "loss": 0.8662, + "learning_rate": 1.1636368305609604e-05, + "loss": 0.8847, "step": 16383 }, { - "epoch": 0.4649262202043133, + "epoch": 0.4642806540281674, "grad_norm": 0.0, - "learning_rate": 1.1615067715953931e-05, - "loss": 0.8559, + "learning_rate": 1.1635462880659296e-05, + "loss": 0.8547, "step": 16384 }, { - "epoch": 0.46495459704880815, + "epoch": 0.46430899147042987, "grad_norm": 0.0, - "learning_rate": 1.161416069708634e-05, - "loss": 0.9886, + "learning_rate": 1.1634557441932877e-05, + "loss": 0.8986, "step": 16385 }, { - "epoch": 0.46498297389330306, + "epoch": 0.46433732891269236, "grad_norm": 0.0, - "learning_rate": 1.1613253664583864e-05, - "loss": 0.9401, + "learning_rate": 1.1633651989437976e-05, + "loss": 0.9625, "step": 16386 }, { - "epoch": 0.465011350737798, + "epoch": 0.4643656663549548, "grad_norm": 0.0, - "learning_rate": 1.161234661845416e-05, - "loss": 1.0137, + "learning_rate": 1.1632746523182213e-05, + "loss": 0.9317, "step": 16387 }, { - "epoch": 0.46503972758229284, + "epoch": 0.46439400379721724, "grad_norm": 0.0, - "learning_rate": 1.161143955870489e-05, - "loss": 0.8605, + "learning_rate": 1.1631841043173222e-05, + "loss": 0.8608, "step": 16388 }, { - "epoch": 0.46506810442678775, + "epoch": 0.46442234123947973, "grad_norm": 0.0, - "learning_rate": 1.161053248534372e-05, - "loss": 0.8595, + "learning_rate": 1.1630935549418627e-05, + "loss": 0.9264, "step": 16389 }, { - "epoch": 0.4650964812712826, + "epoch": 0.46445067868174217, "grad_norm": 0.0, - "learning_rate": 1.1609625398378308e-05, - "loss": 0.7185, + "learning_rate": 1.163003004192606e-05, + "loss": 0.7909, "step": 16390 }, { - "epoch": 0.46512485811577753, + "epoch": 0.46447901612400466, "grad_norm": 0.0, - "learning_rate": 1.1608718297816322e-05, - "loss": 0.9144, + "learning_rate": 1.1629124520703141e-05, + "loss": 0.8927, "step": 16391 }, { - "epoch": 0.46515323496027244, + "epoch": 0.4645073535662671, "grad_norm": 0.0, - "learning_rate": 1.1607811183665414e-05, - "loss": 0.9603, + "learning_rate": 1.1628218985757504e-05, + "loss": 0.8256, "step": 16392 }, { - "epoch": 0.4651816118047673, + "epoch": 0.4645356910085296, "grad_norm": 0.0, - "learning_rate": 1.1606904055933256e-05, - "loss": 0.8436, + "learning_rate": 1.162731343709677e-05, + "loss": 0.9439, "step": 16393 }, { - "epoch": 0.4652099886492622, + "epoch": 0.46456402845079203, "grad_norm": 0.0, - "learning_rate": 1.1605996914627509e-05, - "loss": 0.9214, + "learning_rate": 1.1626407874728572e-05, + "loss": 0.9807, "step": 16394 }, { - "epoch": 0.4652383654937571, + "epoch": 0.46459236589305447, "grad_norm": 0.0, - "learning_rate": 1.160508975975583e-05, - "loss": 0.8819, + "learning_rate": 1.1625502298660539e-05, + "loss": 0.9839, "step": 16395 }, { - "epoch": 0.465266742338252, + "epoch": 0.46462070333531696, "grad_norm": 0.0, - "learning_rate": 1.1604182591325888e-05, - "loss": 0.9065, + "learning_rate": 1.1624596708900296e-05, + "loss": 0.8483, "step": 16396 }, { - "epoch": 0.46529511918274685, + "epoch": 0.4646490407775794, "grad_norm": 0.0, - "learning_rate": 1.1603275409345343e-05, - "loss": 0.8972, + "learning_rate": 1.1623691105455469e-05, + "loss": 0.8993, "step": 16397 }, { - "epoch": 0.46532349602724177, + "epoch": 0.4646773782198419, "grad_norm": 0.0, - "learning_rate": 1.160236821382186e-05, - "loss": 0.8396, + "learning_rate": 1.162278548833369e-05, + "loss": 0.8864, "step": 16398 }, { - "epoch": 0.4653518728717367, + "epoch": 0.46470571566210434, "grad_norm": 0.0, - "learning_rate": 1.1601461004763097e-05, - "loss": 0.8979, + "learning_rate": 1.1621879857542587e-05, + "loss": 0.9677, "step": 16399 }, { - "epoch": 0.46538024971623154, + "epoch": 0.4647340531043668, "grad_norm": 0.0, - "learning_rate": 1.1600553782176724e-05, - "loss": 0.8728, + "learning_rate": 1.1620974213089786e-05, + "loss": 0.952, "step": 16400 }, { - "epoch": 0.46540862656072646, + "epoch": 0.46476239054662927, "grad_norm": 0.0, - "learning_rate": 1.15996465460704e-05, - "loss": 0.9577, + "learning_rate": 1.1620068554982919e-05, + "loss": 0.9364, "step": 16401 }, { - "epoch": 0.4654370034052213, + "epoch": 0.4647907279888917, "grad_norm": 0.0, - "learning_rate": 1.1598739296451789e-05, - "loss": 0.8661, + "learning_rate": 1.1619162883229611e-05, + "loss": 0.9718, "step": 16402 }, { - "epoch": 0.46546538024971623, + "epoch": 0.4648190654311542, "grad_norm": 0.0, - "learning_rate": 1.1597832033328558e-05, - "loss": 0.8475, + "learning_rate": 1.1618257197837495e-05, + "loss": 0.9026, "step": 16403 }, { - "epoch": 0.46549375709421115, + "epoch": 0.46484740287341664, "grad_norm": 0.0, - "learning_rate": 1.1596924756708366e-05, - "loss": 0.8693, + "learning_rate": 1.1617351498814199e-05, + "loss": 0.8893, "step": 16404 }, { - "epoch": 0.465522133938706, + "epoch": 0.46487574031567913, "grad_norm": 0.0, - "learning_rate": 1.1596017466598879e-05, - "loss": 0.8738, + "learning_rate": 1.161644578616735e-05, + "loss": 0.9803, "step": 16405 }, { - "epoch": 0.4655505107832009, + "epoch": 0.46490407775794157, "grad_norm": 0.0, - "learning_rate": 1.159511016300776e-05, - "loss": 0.892, + "learning_rate": 1.1615540059904572e-05, + "loss": 0.8864, "step": 16406 }, { - "epoch": 0.4655788876276958, + "epoch": 0.464932415200204, "grad_norm": 0.0, - "learning_rate": 1.1594202845942674e-05, - "loss": 0.8705, + "learning_rate": 1.1614634320033505e-05, + "loss": 0.7975, "step": 16407 }, { - "epoch": 0.4656072644721907, + "epoch": 0.4649607526424665, "grad_norm": 0.0, - "learning_rate": 1.1593295515411287e-05, - "loss": 0.896, + "learning_rate": 1.1613728566561775e-05, + "loss": 0.9393, "step": 16408 }, { - "epoch": 0.4656356413166856, + "epoch": 0.46498909008472894, "grad_norm": 0.0, - "learning_rate": 1.1592388171421259e-05, - "loss": 0.8556, + "learning_rate": 1.1612822799497008e-05, + "loss": 0.8015, "step": 16409 }, { - "epoch": 0.46566401816118047, + "epoch": 0.46501742752699143, "grad_norm": 0.0, - "learning_rate": 1.1591480813980257e-05, - "loss": 0.9053, + "learning_rate": 1.1611917018846835e-05, + "loss": 0.8018, "step": 16410 }, { - "epoch": 0.4656923950056754, + "epoch": 0.46504576496925387, "grad_norm": 0.0, - "learning_rate": 1.1590573443095947e-05, - "loss": 0.8126, + "learning_rate": 1.1611011224618888e-05, + "loss": 0.9509, "step": 16411 }, { - "epoch": 0.46572077185017025, + "epoch": 0.4650741024115163, "grad_norm": 0.0, - "learning_rate": 1.1589666058775986e-05, - "loss": 0.8944, + "learning_rate": 1.1610105416820796e-05, + "loss": 0.8767, "step": 16412 }, { - "epoch": 0.46574914869466516, + "epoch": 0.4651024398537788, "grad_norm": 0.0, - "learning_rate": 1.158875866102805e-05, - "loss": 0.9091, + "learning_rate": 1.1609199595460185e-05, + "loss": 0.9534, "step": 16413 }, { - "epoch": 0.46577752553916, + "epoch": 0.46513077729604124, "grad_norm": 0.0, - "learning_rate": 1.1587851249859795e-05, - "loss": 0.8676, + "learning_rate": 1.160829376054469e-05, + "loss": 0.9165, "step": 16414 }, { - "epoch": 0.46580590238365494, + "epoch": 0.46515911473830374, "grad_norm": 0.0, - "learning_rate": 1.1586943825278892e-05, - "loss": 0.8319, + "learning_rate": 1.160738791208194e-05, + "loss": 0.9869, "step": 16415 }, { - "epoch": 0.46583427922814985, + "epoch": 0.4651874521805662, "grad_norm": 0.0, - "learning_rate": 1.1586036387293e-05, - "loss": 0.8285, + "learning_rate": 1.1606482050079563e-05, + "loss": 0.8881, "step": 16416 }, { - "epoch": 0.4658626560726447, + "epoch": 0.46521578962282867, "grad_norm": 0.0, - "learning_rate": 1.158512893590979e-05, - "loss": 0.9255, + "learning_rate": 1.1605576174545195e-05, + "loss": 0.8807, "step": 16417 }, { - "epoch": 0.4658910329171396, + "epoch": 0.4652441270650911, "grad_norm": 0.0, - "learning_rate": 1.1584221471136924e-05, - "loss": 0.9077, + "learning_rate": 1.1604670285486457e-05, + "loss": 0.9662, "step": 16418 }, { - "epoch": 0.4659194097616345, + "epoch": 0.46527246450735354, "grad_norm": 0.0, - "learning_rate": 1.1583313992982067e-05, - "loss": 0.9119, + "learning_rate": 1.1603764382910989e-05, + "loss": 0.8907, "step": 16419 }, { - "epoch": 0.4659477866061294, + "epoch": 0.46530080194961604, "grad_norm": 0.0, - "learning_rate": 1.1582406501452886e-05, - "loss": 0.9488, + "learning_rate": 1.1602858466826417e-05, + "loss": 0.9044, "step": 16420 }, { - "epoch": 0.4659761634506243, + "epoch": 0.4653291393918785, "grad_norm": 0.0, - "learning_rate": 1.1581498996557046e-05, - "loss": 0.8949, + "learning_rate": 1.1601952537240373e-05, + "loss": 0.8896, "step": 16421 }, { - "epoch": 0.4660045402951192, + "epoch": 0.46535747683414097, "grad_norm": 0.0, - "learning_rate": 1.1580591478302212e-05, - "loss": 0.9144, + "learning_rate": 1.1601046594160487e-05, + "loss": 0.8685, "step": 16422 }, { - "epoch": 0.4660329171396141, + "epoch": 0.4653858142764034, "grad_norm": 0.0, - "learning_rate": 1.1579683946696054e-05, - "loss": 0.9346, + "learning_rate": 1.1600140637594392e-05, + "loss": 0.8871, "step": 16423 }, { - "epoch": 0.46606129398410895, + "epoch": 0.46541415171866585, "grad_norm": 0.0, - "learning_rate": 1.1578776401746232e-05, - "loss": 0.8943, + "learning_rate": 1.1599234667549722e-05, + "loss": 0.9533, "step": 16424 }, { - "epoch": 0.46608967082860386, + "epoch": 0.46544248916092834, "grad_norm": 0.0, - "learning_rate": 1.1577868843460415e-05, - "loss": 0.923, + "learning_rate": 1.15983286840341e-05, + "loss": 0.9324, "step": 16425 }, { - "epoch": 0.4661180476730988, + "epoch": 0.4654708266031908, "grad_norm": 0.0, - "learning_rate": 1.1576961271846269e-05, - "loss": 0.9435, + "learning_rate": 1.1597422687055161e-05, + "loss": 0.9401, "step": 16426 }, { - "epoch": 0.46614642451759364, + "epoch": 0.4654991640454533, "grad_norm": 0.0, - "learning_rate": 1.1576053686911459e-05, - "loss": 0.9007, + "learning_rate": 1.1596516676620539e-05, + "loss": 0.866, "step": 16427 }, { - "epoch": 0.46617480136208855, + "epoch": 0.4655275014877157, "grad_norm": 0.0, - "learning_rate": 1.1575146088663656e-05, - "loss": 0.9222, + "learning_rate": 1.1595610652737865e-05, + "loss": 0.8407, "step": 16428 }, { - "epoch": 0.4662031782065834, + "epoch": 0.4655558389299782, "grad_norm": 0.0, - "learning_rate": 1.157423847711052e-05, - "loss": 0.949, + "learning_rate": 1.159470461541477e-05, + "loss": 0.8492, "step": 16429 }, { - "epoch": 0.46623155505107833, + "epoch": 0.46558417637224064, "grad_norm": 0.0, - "learning_rate": 1.1573330852259724e-05, - "loss": 0.8777, + "learning_rate": 1.1593798564658887e-05, + "loss": 0.9148, "step": 16430 }, { - "epoch": 0.4662599318955732, + "epoch": 0.4656125138145031, "grad_norm": 0.0, - "learning_rate": 1.157242321411893e-05, - "loss": 0.9108, + "learning_rate": 1.1592892500477843e-05, + "loss": 0.8969, "step": 16431 }, { - "epoch": 0.4662883087400681, + "epoch": 0.4656408512567656, "grad_norm": 0.0, - "learning_rate": 1.1571515562695805e-05, - "loss": 0.9149, + "learning_rate": 1.1591986422879276e-05, + "loss": 1.0537, "step": 16432 }, { - "epoch": 0.466316685584563, + "epoch": 0.465669188699028, "grad_norm": 0.0, - "learning_rate": 1.1570607897998018e-05, - "loss": 1.0117, + "learning_rate": 1.1591080331870816e-05, + "loss": 0.9045, "step": 16433 }, { - "epoch": 0.4663450624290579, + "epoch": 0.4656975261412905, "grad_norm": 0.0, - "learning_rate": 1.1569700220033237e-05, - "loss": 0.8988, + "learning_rate": 1.1590174227460098e-05, + "loss": 1.0135, "step": 16434 }, { - "epoch": 0.4663734392735528, + "epoch": 0.46572586358355295, "grad_norm": 0.0, - "learning_rate": 1.1568792528809127e-05, - "loss": 0.9345, + "learning_rate": 1.1589268109654748e-05, + "loss": 0.9251, "step": 16435 }, { - "epoch": 0.46640181611804765, + "epoch": 0.4657542010258154, "grad_norm": 0.0, - "learning_rate": 1.1567884824333353e-05, - "loss": 0.7209, + "learning_rate": 1.1588361978462405e-05, + "loss": 0.8653, "step": 16436 }, { - "epoch": 0.46643019296254257, + "epoch": 0.4657825384680779, "grad_norm": 0.0, - "learning_rate": 1.156697710661359e-05, - "loss": 0.909, + "learning_rate": 1.15874558338907e-05, + "loss": 0.8542, "step": 16437 }, { - "epoch": 0.4664585698070375, + "epoch": 0.4658108759103403, "grad_norm": 0.0, - "learning_rate": 1.15660693756575e-05, - "loss": 0.9308, + "learning_rate": 1.1586549675947261e-05, + "loss": 0.913, "step": 16438 }, { - "epoch": 0.46648694665153234, + "epoch": 0.4658392133526028, "grad_norm": 0.0, - "learning_rate": 1.156516163147275e-05, - "loss": 0.7232, + "learning_rate": 1.1585643504639728e-05, + "loss": 0.8567, "step": 16439 }, { - "epoch": 0.46651532349602726, + "epoch": 0.46586755079486525, "grad_norm": 0.0, - "learning_rate": 1.156425387406701e-05, - "loss": 0.8658, + "learning_rate": 1.158473731997573e-05, + "loss": 0.9141, "step": 16440 }, { - "epoch": 0.4665437003405221, + "epoch": 0.46589588823712774, "grad_norm": 0.0, - "learning_rate": 1.1563346103447948e-05, - "loss": 0.8863, + "learning_rate": 1.1583831121962902e-05, + "loss": 0.9339, "step": 16441 }, { - "epoch": 0.46657207718501703, + "epoch": 0.4659242256793902, "grad_norm": 0.0, - "learning_rate": 1.1562438319623231e-05, - "loss": 0.89, + "learning_rate": 1.1582924910608877e-05, + "loss": 0.9971, "step": 16442 }, { - "epoch": 0.4666004540295119, + "epoch": 0.4659525631216526, "grad_norm": 0.0, - "learning_rate": 1.1561530522600524e-05, - "loss": 0.9858, + "learning_rate": 1.1582018685921287e-05, + "loss": 0.9284, "step": 16443 }, { - "epoch": 0.4666288308740068, + "epoch": 0.4659809005639151, "grad_norm": 0.0, - "learning_rate": 1.1560622712387502e-05, - "loss": 1.0635, + "learning_rate": 1.1581112447907766e-05, + "loss": 0.9504, "step": 16444 }, { - "epoch": 0.4666572077185017, + "epoch": 0.46600923800617755, "grad_norm": 0.0, - "learning_rate": 1.1559714888991831e-05, - "loss": 0.8614, + "learning_rate": 1.1580206196575945e-05, + "loss": 0.8419, "step": 16445 }, { - "epoch": 0.4666855845629966, + "epoch": 0.46603757544844004, "grad_norm": 0.0, - "learning_rate": 1.1558807052421174e-05, - "loss": 0.9047, + "learning_rate": 1.1579299931933465e-05, + "loss": 0.8319, "step": 16446 }, { - "epoch": 0.4667139614074915, + "epoch": 0.4660659128907025, "grad_norm": 0.0, - "learning_rate": 1.1557899202683208e-05, - "loss": 0.918, + "learning_rate": 1.1578393653987952e-05, + "loss": 1.0134, "step": 16447 }, { - "epoch": 0.46674233825198636, + "epoch": 0.4660942503329649, "grad_norm": 0.0, - "learning_rate": 1.1556991339785595e-05, - "loss": 0.9861, + "learning_rate": 1.1577487362747043e-05, + "loss": 0.8409, "step": 16448 }, { - "epoch": 0.46677071509648127, + "epoch": 0.4661225877752274, "grad_norm": 0.0, - "learning_rate": 1.1556083463736005e-05, - "loss": 0.8835, + "learning_rate": 1.1576581058218375e-05, + "loss": 0.9248, "step": 16449 }, { - "epoch": 0.4667990919409762, + "epoch": 0.46615092521748985, "grad_norm": 0.0, - "learning_rate": 1.1555175574542108e-05, - "loss": 0.9417, + "learning_rate": 1.1575674740409579e-05, + "loss": 0.8539, "step": 16450 }, { - "epoch": 0.46682746878547104, + "epoch": 0.46617926265975235, "grad_norm": 0.0, - "learning_rate": 1.1554267672211576e-05, - "loss": 0.9302, + "learning_rate": 1.1574768409328288e-05, + "loss": 0.945, "step": 16451 }, { - "epoch": 0.46685584562996596, + "epoch": 0.4662076001020148, "grad_norm": 0.0, - "learning_rate": 1.1553359756752074e-05, - "loss": 0.8166, + "learning_rate": 1.1573862064982135e-05, + "loss": 0.9357, "step": 16452 }, { - "epoch": 0.4668842224744608, + "epoch": 0.4662359375442773, "grad_norm": 0.0, - "learning_rate": 1.1552451828171271e-05, - "loss": 0.9097, + "learning_rate": 1.157295570737876e-05, + "loss": 0.9081, "step": 16453 }, { - "epoch": 0.46691259931895573, + "epoch": 0.4662642749865397, "grad_norm": 0.0, - "learning_rate": 1.155154388647684e-05, - "loss": 0.9465, + "learning_rate": 1.15720493365258e-05, + "loss": 0.8888, "step": 16454 }, { - "epoch": 0.46694097616345065, + "epoch": 0.46629261242880216, "grad_norm": 0.0, - "learning_rate": 1.1550635931676446e-05, - "loss": 0.8157, + "learning_rate": 1.1571142952430878e-05, + "loss": 0.8629, "step": 16455 }, { - "epoch": 0.4669693530079455, + "epoch": 0.46632094987106465, "grad_norm": 0.0, - "learning_rate": 1.1549727963777761e-05, - "loss": 0.8796, + "learning_rate": 1.1570236555101638e-05, + "loss": 0.8727, "step": 16456 }, { - "epoch": 0.4669977298524404, + "epoch": 0.4663492873133271, "grad_norm": 0.0, - "learning_rate": 1.1548819982788455e-05, - "loss": 0.889, + "learning_rate": 1.1569330144545712e-05, + "loss": 0.9285, "step": 16457 }, { - "epoch": 0.4670261066969353, + "epoch": 0.4663776247555896, "grad_norm": 0.0, - "learning_rate": 1.1547911988716194e-05, - "loss": 0.9272, + "learning_rate": 1.1568423720770734e-05, + "loss": 0.863, "step": 16458 }, { - "epoch": 0.4670544835414302, + "epoch": 0.466405962197852, "grad_norm": 0.0, - "learning_rate": 1.1547003981568654e-05, - "loss": 0.9299, + "learning_rate": 1.1567517283784344e-05, + "loss": 0.9023, "step": 16459 }, { - "epoch": 0.46708286038592506, + "epoch": 0.46643429964011446, "grad_norm": 0.0, - "learning_rate": 1.15460959613535e-05, - "loss": 0.897, + "learning_rate": 1.156661083359417e-05, + "loss": 0.8894, "step": 16460 }, { - "epoch": 0.46711123723042, + "epoch": 0.46646263708237695, "grad_norm": 0.0, - "learning_rate": 1.1545187928078407e-05, - "loss": 0.9211, + "learning_rate": 1.1565704370207853e-05, + "loss": 0.8436, "step": 16461 }, { - "epoch": 0.4671396140749149, + "epoch": 0.4664909745246394, "grad_norm": 0.0, - "learning_rate": 1.1544279881751042e-05, - "loss": 0.8841, + "learning_rate": 1.1564797893633029e-05, + "loss": 0.9509, "step": 16462 }, { - "epoch": 0.46716799091940975, + "epoch": 0.4665193119669019, "grad_norm": 0.0, - "learning_rate": 1.154337182237907e-05, - "loss": 0.8645, + "learning_rate": 1.156389140387733e-05, + "loss": 0.7923, "step": 16463 }, { - "epoch": 0.46719636776390466, + "epoch": 0.4665476494091643, "grad_norm": 0.0, - "learning_rate": 1.1542463749970172e-05, - "loss": 0.8961, + "learning_rate": 1.156298490094839e-05, + "loss": 0.8419, "step": 16464 }, { - "epoch": 0.4672247446083995, + "epoch": 0.4665759868514268, "grad_norm": 0.0, - "learning_rate": 1.1541555664532011e-05, - "loss": 0.8619, + "learning_rate": 1.156207838485385e-05, + "loss": 0.8318, "step": 16465 }, { - "epoch": 0.46725312145289444, + "epoch": 0.46660432429368925, "grad_norm": 0.0, - "learning_rate": 1.1540647566072259e-05, - "loss": 0.8127, + "learning_rate": 1.1561171855601344e-05, + "loss": 0.9776, "step": 16466 }, { - "epoch": 0.46728149829738935, + "epoch": 0.4666326617359517, "grad_norm": 0.0, - "learning_rate": 1.153973945459859e-05, - "loss": 0.8131, + "learning_rate": 1.1560265313198507e-05, + "loss": 0.8494, "step": 16467 }, { - "epoch": 0.4673098751418842, + "epoch": 0.4666609991782142, "grad_norm": 0.0, - "learning_rate": 1.1538831330118674e-05, - "loss": 0.9215, + "learning_rate": 1.1559358757652973e-05, + "loss": 0.8714, "step": 16468 }, { - "epoch": 0.4673382519863791, + "epoch": 0.4666893366204766, "grad_norm": 0.0, - "learning_rate": 1.1537923192640178e-05, - "loss": 0.8759, + "learning_rate": 1.1558452188972386e-05, + "loss": 0.9316, "step": 16469 }, { - "epoch": 0.467366628830874, + "epoch": 0.4667176740627391, "grad_norm": 0.0, - "learning_rate": 1.1537015042170776e-05, - "loss": 0.7792, + "learning_rate": 1.1557545607164378e-05, + "loss": 0.9064, "step": 16470 }, { - "epoch": 0.4673950056753689, + "epoch": 0.46674601150500156, "grad_norm": 0.0, - "learning_rate": 1.1536106878718138e-05, - "loss": 0.904, + "learning_rate": 1.155663901223658e-05, + "loss": 0.8824, "step": 16471 }, { - "epoch": 0.4674233825198638, + "epoch": 0.466774348947264, "grad_norm": 0.0, - "learning_rate": 1.153519870228994e-05, - "loss": 0.926, + "learning_rate": 1.1555732404196636e-05, + "loss": 0.891, "step": 16472 }, { - "epoch": 0.4674517593643587, + "epoch": 0.4668026863895265, "grad_norm": 0.0, - "learning_rate": 1.1534290512893844e-05, - "loss": 0.9424, + "learning_rate": 1.1554825783052181e-05, + "loss": 0.8941, "step": 16473 }, { - "epoch": 0.4674801362088536, + "epoch": 0.4668310238317889, "grad_norm": 0.0, - "learning_rate": 1.1533382310537531e-05, - "loss": 0.8559, + "learning_rate": 1.1553919148810853e-05, + "loss": 0.8615, "step": 16474 }, { - "epoch": 0.46750851305334845, + "epoch": 0.4668593612740514, "grad_norm": 0.0, - "learning_rate": 1.1532474095228667e-05, - "loss": 0.893, + "learning_rate": 1.1553012501480285e-05, + "loss": 0.8406, "step": 16475 }, { - "epoch": 0.46753688989784337, + "epoch": 0.46688769871631386, "grad_norm": 0.0, - "learning_rate": 1.1531565866974926e-05, - "loss": 1.0237, + "learning_rate": 1.1552105841068114e-05, + "loss": 0.8397, "step": 16476 }, { - "epoch": 0.4675652667423382, + "epoch": 0.46691603615857635, "grad_norm": 0.0, - "learning_rate": 1.153065762578398e-05, - "loss": 0.8652, + "learning_rate": 1.1551199167581982e-05, + "loss": 0.9052, "step": 16477 }, { - "epoch": 0.46759364358683314, + "epoch": 0.4669443736008388, "grad_norm": 0.0, - "learning_rate": 1.1529749371663498e-05, - "loss": 0.8726, + "learning_rate": 1.155029248102952e-05, + "loss": 1.0063, "step": 16478 }, { - "epoch": 0.46762202043132806, + "epoch": 0.46697271104310123, "grad_norm": 0.0, - "learning_rate": 1.1528841104621155e-05, - "loss": 0.962, + "learning_rate": 1.1549385781418372e-05, + "loss": 0.901, "step": 16479 }, { - "epoch": 0.4676503972758229, + "epoch": 0.4670010484853637, "grad_norm": 0.0, - "learning_rate": 1.1527932824664624e-05, - "loss": 0.8832, + "learning_rate": 1.1548479068756169e-05, + "loss": 0.8325, "step": 16480 }, { - "epoch": 0.46767877412031783, + "epoch": 0.46702938592762616, "grad_norm": 0.0, - "learning_rate": 1.1527024531801576e-05, - "loss": 0.8797, + "learning_rate": 1.1547572343050556e-05, + "loss": 0.9574, "step": 16481 }, { - "epoch": 0.4677071509648127, + "epoch": 0.46705772336988866, "grad_norm": 0.0, - "learning_rate": 1.1526116226039683e-05, - "loss": 0.9489, + "learning_rate": 1.1546665604309165e-05, + "loss": 0.9, "step": 16482 }, { - "epoch": 0.4677355278093076, + "epoch": 0.4670860608121511, "grad_norm": 0.0, - "learning_rate": 1.1525207907386615e-05, - "loss": 0.8302, + "learning_rate": 1.1545758852539634e-05, + "loss": 0.8215, "step": 16483 }, { - "epoch": 0.4677639046538025, + "epoch": 0.46711439825441353, "grad_norm": 0.0, - "learning_rate": 1.1524299575850048e-05, - "loss": 0.877, + "learning_rate": 1.1544852087749604e-05, + "loss": 0.928, "step": 16484 }, { - "epoch": 0.4677922814982974, + "epoch": 0.467142735696676, "grad_norm": 0.0, - "learning_rate": 1.1523391231437657e-05, - "loss": 0.9542, + "learning_rate": 1.1543945309946707e-05, + "loss": 0.8909, "step": 16485 }, { - "epoch": 0.4678206583427923, + "epoch": 0.46717107313893846, "grad_norm": 0.0, - "learning_rate": 1.1522482874157108e-05, - "loss": 0.9932, + "learning_rate": 1.1543038519138589e-05, + "loss": 0.9444, "step": 16486 }, { - "epoch": 0.46784903518728715, + "epoch": 0.46719941058120096, "grad_norm": 0.0, - "learning_rate": 1.1521574504016082e-05, - "loss": 0.8757, + "learning_rate": 1.1542131715332886e-05, + "loss": 0.8653, "step": 16487 }, { - "epoch": 0.46787741203178207, + "epoch": 0.4672277480234634, "grad_norm": 0.0, - "learning_rate": 1.1520666121022246e-05, - "loss": 0.9525, + "learning_rate": 1.154122489853723e-05, + "loss": 0.8593, "step": 16488 }, { - "epoch": 0.467905788876277, + "epoch": 0.4672560854657259, "grad_norm": 0.0, - "learning_rate": 1.1519757725183272e-05, - "loss": 0.922, + "learning_rate": 1.1540318068759268e-05, + "loss": 0.8497, "step": 16489 }, { - "epoch": 0.46793416572077184, + "epoch": 0.46728442290798833, "grad_norm": 0.0, - "learning_rate": 1.1518849316506837e-05, - "loss": 0.8588, + "learning_rate": 1.1539411226006636e-05, + "loss": 0.8527, "step": 16490 }, { - "epoch": 0.46796254256526676, + "epoch": 0.46731276035025077, "grad_norm": 0.0, - "learning_rate": 1.1517940895000616e-05, - "loss": 0.7337, + "learning_rate": 1.1538504370286967e-05, + "loss": 0.8956, "step": 16491 }, { - "epoch": 0.4679909194097616, + "epoch": 0.46734109779251326, "grad_norm": 0.0, - "learning_rate": 1.151703246067228e-05, - "loss": 0.9023, + "learning_rate": 1.1537597501607908e-05, + "loss": 0.8839, "step": 16492 }, { - "epoch": 0.46801929625425653, + "epoch": 0.4673694352347757, "grad_norm": 0.0, - "learning_rate": 1.15161240135295e-05, - "loss": 0.9536, + "learning_rate": 1.153669061997709e-05, + "loss": 0.9237, "step": 16493 }, { - "epoch": 0.4680476730987514, + "epoch": 0.4673977726770382, "grad_norm": 0.0, - "learning_rate": 1.1515215553579955e-05, - "loss": 0.9031, + "learning_rate": 1.1535783725402163e-05, + "loss": 0.8785, "step": 16494 }, { - "epoch": 0.4680760499432463, + "epoch": 0.46742611011930063, "grad_norm": 0.0, - "learning_rate": 1.1514307080831316e-05, - "loss": 0.9965, + "learning_rate": 1.1534876817890756e-05, + "loss": 0.9309, "step": 16495 }, { - "epoch": 0.4681044267877412, + "epoch": 0.46745444756156307, "grad_norm": 0.0, - "learning_rate": 1.1513398595291254e-05, - "loss": 0.8084, + "learning_rate": 1.1533969897450512e-05, + "loss": 0.9159, "step": 16496 }, { - "epoch": 0.4681328036322361, + "epoch": 0.46748278500382556, "grad_norm": 0.0, - "learning_rate": 1.1512490096967449e-05, - "loss": 0.947, + "learning_rate": 1.1533062964089068e-05, + "loss": 0.918, "step": 16497 }, { - "epoch": 0.468161180476731, + "epoch": 0.467511122446088, "grad_norm": 0.0, - "learning_rate": 1.151158158586757e-05, - "loss": 0.9691, + "learning_rate": 1.1532156017814068e-05, + "loss": 0.9536, "step": 16498 }, { - "epoch": 0.46818955732122586, + "epoch": 0.4675394598883505, "grad_norm": 0.0, - "learning_rate": 1.1510673061999296e-05, - "loss": 0.9252, + "learning_rate": 1.1531249058633147e-05, + "loss": 0.7664, "step": 16499 }, { - "epoch": 0.4682179341657208, + "epoch": 0.46756779733061293, "grad_norm": 0.0, - "learning_rate": 1.1509764525370294e-05, - "loss": 0.8102, + "learning_rate": 1.1530342086553947e-05, + "loss": 0.8873, "step": 16500 }, { - "epoch": 0.4682463110102157, + "epoch": 0.4675961347728754, "grad_norm": 0.0, - "learning_rate": 1.150885597598825e-05, - "loss": 0.9027, + "learning_rate": 1.1529435101584108e-05, + "loss": 0.9182, "step": 16501 }, { - "epoch": 0.46827468785471055, + "epoch": 0.46762447221513787, "grad_norm": 0.0, - "learning_rate": 1.1507947413860827e-05, - "loss": 0.9237, + "learning_rate": 1.152852810373127e-05, + "loss": 0.7964, "step": 16502 }, { - "epoch": 0.46830306469920546, + "epoch": 0.4676528096574003, "grad_norm": 0.0, - "learning_rate": 1.1507038838995705e-05, - "loss": 0.9644, + "learning_rate": 1.1527621093003071e-05, + "loss": 0.8431, "step": 16503 }, { - "epoch": 0.4683314415437003, + "epoch": 0.4676811470996628, "grad_norm": 0.0, - "learning_rate": 1.1506130251400558e-05, - "loss": 0.9375, + "learning_rate": 1.152671406940715e-05, + "loss": 0.8106, "step": 16504 }, { - "epoch": 0.46835981838819524, + "epoch": 0.46770948454192524, "grad_norm": 0.0, - "learning_rate": 1.1505221651083063e-05, - "loss": 0.9161, + "learning_rate": 1.152580703295115e-05, + "loss": 0.9075, "step": 16505 }, { - "epoch": 0.46838819523269015, + "epoch": 0.46773782198418773, "grad_norm": 0.0, - "learning_rate": 1.1504313038050893e-05, - "loss": 0.8284, + "learning_rate": 1.1524899983642715e-05, + "loss": 1.0591, "step": 16506 }, { - "epoch": 0.468416572077185, + "epoch": 0.46776615942645017, "grad_norm": 0.0, - "learning_rate": 1.150340441231172e-05, - "loss": 0.87, + "learning_rate": 1.152399292148948e-05, + "loss": 0.978, "step": 16507 }, { - "epoch": 0.4684449489216799, + "epoch": 0.4677944968687126, "grad_norm": 0.0, - "learning_rate": 1.1502495773873226e-05, - "loss": 0.8517, + "learning_rate": 1.1523085846499085e-05, + "loss": 0.9245, "step": 16508 }, { - "epoch": 0.4684733257661748, + "epoch": 0.4678228343109751, "grad_norm": 0.0, - "learning_rate": 1.1501587122743081e-05, - "loss": 1.0031, + "learning_rate": 1.1522178758679172e-05, + "loss": 0.9284, "step": 16509 }, { - "epoch": 0.4685017026106697, + "epoch": 0.46785117175323754, "grad_norm": 0.0, - "learning_rate": 1.1500678458928962e-05, - "loss": 0.8494, + "learning_rate": 1.1521271658037383e-05, + "loss": 0.8252, "step": 16510 }, { - "epoch": 0.46853007945516456, + "epoch": 0.46787950919550003, "grad_norm": 0.0, - "learning_rate": 1.1499769782438547e-05, - "loss": 0.9038, + "learning_rate": 1.1520364544581357e-05, + "loss": 0.8414, "step": 16511 }, { - "epoch": 0.4685584562996595, + "epoch": 0.46790784663776247, "grad_norm": 0.0, - "learning_rate": 1.1498861093279503e-05, - "loss": 0.8815, + "learning_rate": 1.1519457418318738e-05, + "loss": 0.8581, "step": 16512 }, { - "epoch": 0.4685868331441544, + "epoch": 0.46793618408002496, "grad_norm": 0.0, - "learning_rate": 1.1497952391459517e-05, - "loss": 0.8266, + "learning_rate": 1.1518550279257164e-05, + "loss": 0.8508, "step": 16513 }, { - "epoch": 0.46861520998864925, + "epoch": 0.4679645215222874, "grad_norm": 0.0, - "learning_rate": 1.1497043676986256e-05, - "loss": 0.9851, + "learning_rate": 1.1517643127404274e-05, + "loss": 0.9369, "step": 16514 }, { - "epoch": 0.46864358683314417, + "epoch": 0.46799285896454984, "grad_norm": 0.0, - "learning_rate": 1.1496134949867405e-05, - "loss": 0.8853, + "learning_rate": 1.1516735962767716e-05, + "loss": 0.9283, "step": 16515 }, { - "epoch": 0.468671963677639, + "epoch": 0.46802119640681233, "grad_norm": 0.0, - "learning_rate": 1.1495226210110633e-05, - "loss": 0.9241, + "learning_rate": 1.1515828785355128e-05, + "loss": 0.8762, "step": 16516 }, { - "epoch": 0.46870034052213394, + "epoch": 0.4680495338490748, "grad_norm": 0.0, - "learning_rate": 1.1494317457723615e-05, - "loss": 0.9029, + "learning_rate": 1.151492159517415e-05, + "loss": 0.9257, "step": 16517 }, { - "epoch": 0.46872871736662886, + "epoch": 0.46807787129133727, "grad_norm": 0.0, - "learning_rate": 1.1493408692714035e-05, - "loss": 0.9599, + "learning_rate": 1.1514014392232422e-05, + "loss": 0.8985, "step": 16518 }, { - "epoch": 0.4687570942111237, + "epoch": 0.4681062087335997, "grad_norm": 0.0, - "learning_rate": 1.149249991508956e-05, - "loss": 0.8054, + "learning_rate": 1.1513107176537593e-05, + "loss": 1.0114, "step": 16519 }, { - "epoch": 0.46878547105561863, + "epoch": 0.46813454617586214, "grad_norm": 0.0, - "learning_rate": 1.1491591124857873e-05, - "loss": 0.9128, + "learning_rate": 1.15121999480973e-05, + "loss": 0.9555, "step": 16520 }, { - "epoch": 0.4688138479001135, + "epoch": 0.46816288361812464, "grad_norm": 0.0, - "learning_rate": 1.149068232202665e-05, - "loss": 0.8438, + "learning_rate": 1.1511292706919184e-05, + "loss": 0.9048, "step": 16521 }, { - "epoch": 0.4688422247446084, + "epoch": 0.4681912210603871, "grad_norm": 0.0, - "learning_rate": 1.1489773506603566e-05, - "loss": 0.7239, + "learning_rate": 1.1510385453010886e-05, + "loss": 0.8688, "step": 16522 }, { - "epoch": 0.46887060158910326, + "epoch": 0.46821955850264957, "grad_norm": 0.0, - "learning_rate": 1.1488864678596296e-05, - "loss": 0.927, + "learning_rate": 1.1509478186380054e-05, + "loss": 0.8499, "step": 16523 }, { - "epoch": 0.4688989784335982, + "epoch": 0.468247895944912, "grad_norm": 0.0, - "learning_rate": 1.1487955838012523e-05, - "loss": 0.8446, + "learning_rate": 1.1508570907034325e-05, + "loss": 0.8765, "step": 16524 }, { - "epoch": 0.4689273552780931, + "epoch": 0.4682762333871745, "grad_norm": 0.0, - "learning_rate": 1.1487046984859917e-05, - "loss": 0.8542, + "learning_rate": 1.1507663614981343e-05, + "loss": 0.9143, "step": 16525 }, { - "epoch": 0.46895573212258795, + "epoch": 0.46830457082943694, "grad_norm": 0.0, - "learning_rate": 1.1486138119146162e-05, - "loss": 0.9171, + "learning_rate": 1.150675631022875e-05, + "loss": 0.8753, "step": 16526 }, { - "epoch": 0.46898410896708287, + "epoch": 0.4683329082716994, "grad_norm": 0.0, - "learning_rate": 1.1485229240878924e-05, - "loss": 0.9346, + "learning_rate": 1.1505848992784192e-05, + "loss": 0.9589, "step": 16527 }, { - "epoch": 0.46901248581157773, + "epoch": 0.46836124571396187, "grad_norm": 0.0, - "learning_rate": 1.1484320350065894e-05, - "loss": 0.8829, + "learning_rate": 1.1504941662655309e-05, + "loss": 0.9031, "step": 16528 }, { - "epoch": 0.46904086265607264, + "epoch": 0.4683895831562243, "grad_norm": 0.0, - "learning_rate": 1.1483411446714744e-05, - "loss": 0.8365, + "learning_rate": 1.1504034319849741e-05, + "loss": 0.8396, "step": 16529 }, { - "epoch": 0.46906923950056756, + "epoch": 0.4684179205984868, "grad_norm": 0.0, - "learning_rate": 1.1482502530833147e-05, - "loss": 1.0234, + "learning_rate": 1.1503126964375133e-05, + "loss": 0.9385, "step": 16530 }, { - "epoch": 0.4690976163450624, + "epoch": 0.46844625804074924, "grad_norm": 0.0, - "learning_rate": 1.1481593602428786e-05, - "loss": 0.9781, + "learning_rate": 1.1502219596239128e-05, + "loss": 0.9869, "step": 16531 }, { - "epoch": 0.46912599318955733, + "epoch": 0.4684745954830117, "grad_norm": 0.0, - "learning_rate": 1.1480684661509338e-05, - "loss": 0.993, + "learning_rate": 1.1501312215449372e-05, + "loss": 0.8837, "step": 16532 }, { - "epoch": 0.4691543700340522, + "epoch": 0.4685029329252742, "grad_norm": 0.0, - "learning_rate": 1.1479775708082479e-05, - "loss": 0.9545, + "learning_rate": 1.1500404822013505e-05, + "loss": 1.0148, "step": 16533 }, { - "epoch": 0.4691827468785471, + "epoch": 0.4685312703675366, "grad_norm": 0.0, - "learning_rate": 1.1478866742155889e-05, - "loss": 0.972, + "learning_rate": 1.149949741593917e-05, + "loss": 0.955, "step": 16534 }, { - "epoch": 0.469211123723042, + "epoch": 0.4685596078097991, "grad_norm": 0.0, - "learning_rate": 1.1477957763737243e-05, - "loss": 0.8992, + "learning_rate": 1.149858999723401e-05, + "loss": 0.8503, "step": 16535 }, { - "epoch": 0.4692395005675369, + "epoch": 0.46858794525206154, "grad_norm": 0.0, - "learning_rate": 1.1477048772834225e-05, - "loss": 0.9169, + "learning_rate": 1.1497682565905674e-05, + "loss": 0.9692, "step": 16536 }, { - "epoch": 0.4692678774120318, + "epoch": 0.46861628269432404, "grad_norm": 0.0, - "learning_rate": 1.1476139769454506e-05, - "loss": 0.8003, + "learning_rate": 1.1496775121961799e-05, + "loss": 0.8685, "step": 16537 }, { - "epoch": 0.46929625425652666, + "epoch": 0.4686446201365865, "grad_norm": 0.0, - "learning_rate": 1.147523075360577e-05, - "loss": 0.9362, + "learning_rate": 1.149586766541003e-05, + "loss": 0.8971, "step": 16538 }, { - "epoch": 0.46932463110102157, + "epoch": 0.4686729575788489, "grad_norm": 0.0, - "learning_rate": 1.1474321725295694e-05, - "loss": 0.9053, + "learning_rate": 1.1494960196258016e-05, + "loss": 0.8644, "step": 16539 }, { - "epoch": 0.46935300794551643, + "epoch": 0.4687012950211114, "grad_norm": 0.0, - "learning_rate": 1.1473412684531955e-05, - "loss": 0.8727, + "learning_rate": 1.1494052714513395e-05, + "loss": 0.8857, "step": 16540 }, { - "epoch": 0.46938138479001135, + "epoch": 0.46872963246337385, "grad_norm": 0.0, - "learning_rate": 1.1472503631322233e-05, - "loss": 1.0294, + "learning_rate": 1.1493145220183814e-05, + "loss": 0.8413, "step": 16541 }, { - "epoch": 0.46940976163450626, + "epoch": 0.46875796990563634, "grad_norm": 0.0, - "learning_rate": 1.1471594565674207e-05, - "loss": 0.9494, + "learning_rate": 1.1492237713276915e-05, + "loss": 0.866, "step": 16542 }, { - "epoch": 0.4694381384790011, + "epoch": 0.4687863073478988, "grad_norm": 0.0, - "learning_rate": 1.1470685487595557e-05, - "loss": 0.9202, + "learning_rate": 1.1491330193800345e-05, + "loss": 0.9505, "step": 16543 }, { - "epoch": 0.46946651532349604, + "epoch": 0.4688146447901612, "grad_norm": 0.0, - "learning_rate": 1.1469776397093954e-05, - "loss": 0.8973, + "learning_rate": 1.1490422661761744e-05, + "loss": 0.9675, "step": 16544 }, { - "epoch": 0.4694948921679909, + "epoch": 0.4688429822324237, "grad_norm": 0.0, - "learning_rate": 1.1468867294177091e-05, - "loss": 0.9338, + "learning_rate": 1.1489515117168763e-05, + "loss": 0.9015, "step": 16545 }, { - "epoch": 0.4695232690124858, + "epoch": 0.46887131967468615, "grad_norm": 0.0, - "learning_rate": 1.1467958178852638e-05, - "loss": 0.8811, + "learning_rate": 1.1488607560029043e-05, + "loss": 0.9888, "step": 16546 }, { - "epoch": 0.4695516458569807, + "epoch": 0.46889965711694864, "grad_norm": 0.0, - "learning_rate": 1.1467049051128275e-05, - "loss": 0.974, + "learning_rate": 1.1487699990350228e-05, + "loss": 0.9417, "step": 16547 }, { - "epoch": 0.4695800227014756, + "epoch": 0.4689279945592111, "grad_norm": 0.0, - "learning_rate": 1.1466139911011684e-05, - "loss": 0.8195, + "learning_rate": 1.1486792408139962e-05, + "loss": 0.9686, "step": 16548 }, { - "epoch": 0.4696083995459705, + "epoch": 0.4689563320014735, "grad_norm": 0.0, - "learning_rate": 1.1465230758510544e-05, - "loss": 0.8756, + "learning_rate": 1.1485884813405893e-05, + "loss": 0.8448, "step": 16549 }, { - "epoch": 0.46963677639046536, + "epoch": 0.468984669443736, "grad_norm": 0.0, - "learning_rate": 1.1464321593632533e-05, - "loss": 0.8668, + "learning_rate": 1.1484977206155662e-05, + "loss": 0.9247, "step": 16550 }, { - "epoch": 0.4696651532349603, + "epoch": 0.46901300688599845, "grad_norm": 0.0, - "learning_rate": 1.146341241638533e-05, - "loss": 0.8128, + "learning_rate": 1.1484069586396919e-05, + "loss": 0.9569, "step": 16551 }, { - "epoch": 0.4696935300794552, + "epoch": 0.46904134432826095, "grad_norm": 0.0, - "learning_rate": 1.1462503226776618e-05, - "loss": 0.8286, + "learning_rate": 1.1483161954137308e-05, + "loss": 0.8893, "step": 16552 }, { - "epoch": 0.46972190692395005, + "epoch": 0.4690696817705234, "grad_norm": 0.0, - "learning_rate": 1.1461594024814075e-05, - "loss": 0.9275, + "learning_rate": 1.148225430938447e-05, + "loss": 0.94, "step": 16553 }, { - "epoch": 0.46975028376844497, + "epoch": 0.4690980192127859, "grad_norm": 0.0, - "learning_rate": 1.1460684810505383e-05, - "loss": 0.797, + "learning_rate": 1.1481346652146057e-05, + "loss": 0.9102, "step": 16554 }, { - "epoch": 0.4697786606129398, + "epoch": 0.4691263566550483, "grad_norm": 0.0, - "learning_rate": 1.145977558385822e-05, - "loss": 0.9097, + "learning_rate": 1.1480438982429707e-05, + "loss": 0.8906, "step": 16555 }, { - "epoch": 0.46980703745743474, + "epoch": 0.46915469409731075, "grad_norm": 0.0, - "learning_rate": 1.1458866344880266e-05, - "loss": 0.8122, + "learning_rate": 1.1479531300243072e-05, + "loss": 0.8867, "step": 16556 }, { - "epoch": 0.4698354143019296, + "epoch": 0.46918303153957325, "grad_norm": 0.0, - "learning_rate": 1.1457957093579202e-05, - "loss": 0.981, + "learning_rate": 1.1478623605593795e-05, + "loss": 0.9583, "step": 16557 }, { - "epoch": 0.4698637911464245, + "epoch": 0.4692113689818357, "grad_norm": 0.0, - "learning_rate": 1.1457047829962708e-05, - "loss": 0.961, + "learning_rate": 1.1477715898489522e-05, + "loss": 0.8214, "step": 16558 }, { - "epoch": 0.46989216799091943, + "epoch": 0.4692397064240982, "grad_norm": 0.0, - "learning_rate": 1.145613855403847e-05, - "loss": 0.8892, + "learning_rate": 1.1476808178937899e-05, + "loss": 0.9832, "step": 16559 }, { - "epoch": 0.4699205448354143, + "epoch": 0.4692680438663606, "grad_norm": 0.0, - "learning_rate": 1.1455229265814163e-05, - "loss": 0.7836, + "learning_rate": 1.1475900446946575e-05, + "loss": 0.9703, "step": 16560 }, { - "epoch": 0.4699489216799092, + "epoch": 0.46929638130862306, "grad_norm": 0.0, - "learning_rate": 1.1454319965297464e-05, - "loss": 0.942, + "learning_rate": 1.1474992702523191e-05, + "loss": 0.9962, "step": 16561 }, { - "epoch": 0.46997729852440406, + "epoch": 0.46932471875088555, "grad_norm": 0.0, - "learning_rate": 1.1453410652496064e-05, - "loss": 0.8665, + "learning_rate": 1.1474084945675396e-05, + "loss": 0.8878, "step": 16562 }, { - "epoch": 0.470005675368899, + "epoch": 0.469353056193148, "grad_norm": 0.0, - "learning_rate": 1.145250132741764e-05, - "loss": 0.919, + "learning_rate": 1.1473177176410835e-05, + "loss": 0.8756, "step": 16563 }, { - "epoch": 0.4700340522133939, + "epoch": 0.4693813936354105, "grad_norm": 0.0, - "learning_rate": 1.1451591990069867e-05, - "loss": 0.9344, + "learning_rate": 1.147226939473716e-05, + "loss": 0.9269, "step": 16564 }, { - "epoch": 0.47006242905788875, + "epoch": 0.4694097310776729, "grad_norm": 0.0, - "learning_rate": 1.1450682640460434e-05, - "loss": 0.8555, + "learning_rate": 1.147136160066201e-05, + "loss": 0.8438, "step": 16565 }, { - "epoch": 0.47009080590238367, + "epoch": 0.4694380685199354, "grad_norm": 0.0, - "learning_rate": 1.1449773278597018e-05, - "loss": 0.8358, + "learning_rate": 1.1470453794193034e-05, + "loss": 0.845, "step": 16566 }, { - "epoch": 0.47011918274687853, + "epoch": 0.46946640596219785, "grad_norm": 0.0, - "learning_rate": 1.1448863904487304e-05, - "loss": 0.8931, + "learning_rate": 1.1469545975337884e-05, + "loss": 0.8371, "step": 16567 }, { - "epoch": 0.47014755959137344, + "epoch": 0.4694947434044603, "grad_norm": 0.0, - "learning_rate": 1.144795451813897e-05, - "loss": 0.9275, + "learning_rate": 1.1468638144104195e-05, + "loss": 0.8283, "step": 16568 }, { - "epoch": 0.47017593643586836, + "epoch": 0.4695230808467228, "grad_norm": 0.0, - "learning_rate": 1.14470451195597e-05, - "loss": 0.9636, + "learning_rate": 1.1467730300499626e-05, + "loss": 0.8376, "step": 16569 }, { - "epoch": 0.4702043132803632, + "epoch": 0.4695514182889852, "grad_norm": 0.0, - "learning_rate": 1.1446135708757176e-05, - "loss": 0.9918, + "learning_rate": 1.146682244453182e-05, + "loss": 0.9609, "step": 16570 }, { - "epoch": 0.47023269012485813, + "epoch": 0.4695797557312477, "grad_norm": 0.0, - "learning_rate": 1.1445226285739075e-05, - "loss": 0.9234, + "learning_rate": 1.1465914576208423e-05, + "loss": 0.9418, "step": 16571 }, { - "epoch": 0.470261066969353, + "epoch": 0.46960809317351015, "grad_norm": 0.0, - "learning_rate": 1.1444316850513086e-05, - "loss": 0.817, + "learning_rate": 1.1465006695537082e-05, + "loss": 0.9512, "step": 16572 }, { - "epoch": 0.4702894438138479, + "epoch": 0.4696364306157726, "grad_norm": 0.0, - "learning_rate": 1.1443407403086886e-05, - "loss": 0.9188, + "learning_rate": 1.1464098802525449e-05, + "loss": 0.8946, "step": 16573 }, { - "epoch": 0.47031782065834277, + "epoch": 0.4696647680580351, "grad_norm": 0.0, - "learning_rate": 1.1442497943468158e-05, - "loss": 0.863, + "learning_rate": 1.1463190897181167e-05, + "loss": 0.8302, "step": 16574 }, { - "epoch": 0.4703461975028377, + "epoch": 0.4696931055002975, "grad_norm": 0.0, - "learning_rate": 1.1441588471664587e-05, - "loss": 0.8771, + "learning_rate": 1.1462282979511883e-05, + "loss": 0.8057, "step": 16575 }, { - "epoch": 0.4703745743473326, + "epoch": 0.46972144294256, "grad_norm": 0.0, - "learning_rate": 1.1440678987683853e-05, - "loss": 0.7784, + "learning_rate": 1.1461375049525246e-05, + "loss": 0.786, "step": 16576 }, { - "epoch": 0.47040295119182746, + "epoch": 0.46974978038482246, "grad_norm": 0.0, - "learning_rate": 1.1439769491533636e-05, - "loss": 0.8241, + "learning_rate": 1.1460467107228906e-05, + "loss": 1.038, "step": 16577 }, { - "epoch": 0.47043132803632237, + "epoch": 0.46977811782708495, "grad_norm": 0.0, - "learning_rate": 1.1438859983221625e-05, - "loss": 0.8536, + "learning_rate": 1.145955915263051e-05, + "loss": 0.9966, "step": 16578 }, { - "epoch": 0.47045970488081723, + "epoch": 0.4698064552693474, "grad_norm": 0.0, - "learning_rate": 1.1437950462755498e-05, - "loss": 0.9648, + "learning_rate": 1.1458651185737703e-05, + "loss": 0.7929, "step": 16579 }, { - "epoch": 0.47048808172531215, + "epoch": 0.4698347927116098, "grad_norm": 0.0, - "learning_rate": 1.143704093014294e-05, - "loss": 0.8518, + "learning_rate": 1.1457743206558137e-05, + "loss": 0.9134, "step": 16580 }, { - "epoch": 0.47051645856980706, + "epoch": 0.4698631301538723, "grad_norm": 0.0, - "learning_rate": 1.143613138539163e-05, - "loss": 0.8684, + "learning_rate": 1.1456835215099457e-05, + "loss": 0.883, "step": 16581 }, { - "epoch": 0.4705448354143019, + "epoch": 0.46989146759613476, "grad_norm": 0.0, - "learning_rate": 1.1435221828509253e-05, - "loss": 0.9709, + "learning_rate": 1.1455927211369314e-05, + "loss": 0.9065, "step": 16582 }, { - "epoch": 0.47057321225879684, + "epoch": 0.46991980503839725, "grad_norm": 0.0, - "learning_rate": 1.1434312259503498e-05, - "loss": 0.8825, + "learning_rate": 1.1455019195375356e-05, + "loss": 0.8351, "step": 16583 }, { - "epoch": 0.4706015891032917, + "epoch": 0.4699481424806597, "grad_norm": 0.0, - "learning_rate": 1.1433402678382037e-05, - "loss": 0.7996, + "learning_rate": 1.1454111167125231e-05, + "loss": 0.8991, "step": 16584 }, { - "epoch": 0.4706299659477866, + "epoch": 0.46997647992292213, "grad_norm": 0.0, - "learning_rate": 1.1432493085152563e-05, - "loss": 0.9798, + "learning_rate": 1.1453203126626586e-05, + "loss": 0.8073, "step": 16585 }, { - "epoch": 0.4706583427922815, + "epoch": 0.4700048173651846, "grad_norm": 0.0, - "learning_rate": 1.1431583479822754e-05, - "loss": 0.933, + "learning_rate": 1.1452295073887074e-05, + "loss": 0.9213, "step": 16586 }, { - "epoch": 0.4706867196367764, + "epoch": 0.47003315480744706, "grad_norm": 0.0, - "learning_rate": 1.1430673862400295e-05, - "loss": 0.9002, + "learning_rate": 1.145138700891434e-05, + "loss": 0.7989, "step": 16587 }, { - "epoch": 0.4707150964812713, + "epoch": 0.47006149224970956, "grad_norm": 0.0, - "learning_rate": 1.1429764232892868e-05, - "loss": 0.8213, + "learning_rate": 1.1450478931716033e-05, + "loss": 1.0092, "step": 16588 }, { - "epoch": 0.47074347332576616, + "epoch": 0.470089829691972, "grad_norm": 0.0, - "learning_rate": 1.142885459130816e-05, - "loss": 0.8171, + "learning_rate": 1.1449570842299804e-05, + "loss": 0.8834, "step": 16589 }, { - "epoch": 0.4707718501702611, + "epoch": 0.4701181671342345, "grad_norm": 0.0, - "learning_rate": 1.1427944937653855e-05, - "loss": 0.8646, + "learning_rate": 1.1448662740673304e-05, + "loss": 0.8573, "step": 16590 }, { - "epoch": 0.47080022701475593, + "epoch": 0.4701465045764969, "grad_norm": 0.0, - "learning_rate": 1.142703527193763e-05, - "loss": 0.8422, + "learning_rate": 1.1447754626844178e-05, + "loss": 0.9522, "step": 16591 }, { - "epoch": 0.47082860385925085, + "epoch": 0.47017484201875936, "grad_norm": 0.0, - "learning_rate": 1.1426125594167179e-05, - "loss": 0.8091, + "learning_rate": 1.1446846500820076e-05, + "loss": 0.9365, "step": 16592 }, { - "epoch": 0.47085698070374576, + "epoch": 0.47020317946102186, "grad_norm": 0.0, - "learning_rate": 1.1425215904350179e-05, - "loss": 0.9407, + "learning_rate": 1.144593836260865e-05, + "loss": 0.9214, "step": 16593 }, { - "epoch": 0.4708853575482406, + "epoch": 0.4702315169032843, "grad_norm": 0.0, - "learning_rate": 1.1424306202494315e-05, - "loss": 0.8339, + "learning_rate": 1.1445030212217549e-05, + "loss": 0.9507, "step": 16594 }, { - "epoch": 0.47091373439273554, + "epoch": 0.4702598543455468, "grad_norm": 0.0, - "learning_rate": 1.1423396488607275e-05, - "loss": 0.8624, + "learning_rate": 1.1444122049654421e-05, + "loss": 0.8446, "step": 16595 }, { - "epoch": 0.4709421112372304, + "epoch": 0.47028819178780923, "grad_norm": 0.0, - "learning_rate": 1.1422486762696736e-05, - "loss": 0.9097, + "learning_rate": 1.1443213874926914e-05, + "loss": 1.0171, "step": 16596 }, { - "epoch": 0.4709704880817253, + "epoch": 0.47031652923007167, "grad_norm": 0.0, - "learning_rate": 1.1421577024770392e-05, - "loss": 0.9191, + "learning_rate": 1.1442305688042685e-05, + "loss": 0.8046, "step": 16597 }, { - "epoch": 0.47099886492622023, + "epoch": 0.47034486667233416, "grad_norm": 0.0, - "learning_rate": 1.1420667274835922e-05, - "loss": 0.7971, + "learning_rate": 1.1441397489009378e-05, + "loss": 0.9695, "step": 16598 }, { - "epoch": 0.4710272417707151, + "epoch": 0.4703732041145966, "grad_norm": 0.0, - "learning_rate": 1.141975751290101e-05, - "loss": 0.9649, + "learning_rate": 1.1440489277834645e-05, + "loss": 1.032, "step": 16599 }, { - "epoch": 0.47105561861521, + "epoch": 0.4704015415568591, "grad_norm": 0.0, - "learning_rate": 1.1418847738973346e-05, - "loss": 0.8073, + "learning_rate": 1.1439581054526136e-05, + "loss": 0.8951, "step": 16600 }, { - "epoch": 0.47108399545970486, + "epoch": 0.47042987899912153, "grad_norm": 0.0, - "learning_rate": 1.1417937953060607e-05, - "loss": 0.9216, + "learning_rate": 1.1438672819091502e-05, + "loss": 0.8786, "step": 16601 }, { - "epoch": 0.4711123723041998, + "epoch": 0.470458216441384, "grad_norm": 0.0, - "learning_rate": 1.1417028155170487e-05, - "loss": 0.8884, + "learning_rate": 1.1437764571538387e-05, + "loss": 0.8909, "step": 16602 }, { - "epoch": 0.47114074914869464, + "epoch": 0.47048655388364646, "grad_norm": 0.0, - "learning_rate": 1.1416118345310664e-05, - "loss": 0.7776, + "learning_rate": 1.1436856311874453e-05, + "loss": 0.8096, "step": 16603 }, { - "epoch": 0.47116912599318955, + "epoch": 0.4705148913259089, "grad_norm": 0.0, - "learning_rate": 1.1415208523488825e-05, - "loss": 0.9188, + "learning_rate": 1.1435948040107343e-05, + "loss": 0.8961, "step": 16604 }, { - "epoch": 0.47119750283768447, + "epoch": 0.4705432287681714, "grad_norm": 0.0, - "learning_rate": 1.1414298689712655e-05, - "loss": 0.9169, + "learning_rate": 1.1435039756244708e-05, + "loss": 0.8612, "step": 16605 }, { - "epoch": 0.4712258796821793, + "epoch": 0.47057156621043383, "grad_norm": 0.0, - "learning_rate": 1.1413388843989843e-05, - "loss": 0.9098, + "learning_rate": 1.1434131460294205e-05, + "loss": 0.8348, "step": 16606 }, { - "epoch": 0.47125425652667424, + "epoch": 0.4705999036526963, "grad_norm": 0.0, - "learning_rate": 1.141247898632807e-05, - "loss": 0.9597, + "learning_rate": 1.1433223152263475e-05, + "loss": 1.0648, "step": 16607 }, { - "epoch": 0.4712826333711691, + "epoch": 0.47062824109495877, "grad_norm": 0.0, - "learning_rate": 1.1411569116735024e-05, - "loss": 0.989, + "learning_rate": 1.1432314832160176e-05, + "loss": 0.905, "step": 16608 }, { - "epoch": 0.471311010215664, + "epoch": 0.4706565785372212, "grad_norm": 0.0, - "learning_rate": 1.1410659235218393e-05, - "loss": 0.8368, + "learning_rate": 1.1431406499991955e-05, + "loss": 0.8924, "step": 16609 }, { - "epoch": 0.47133938706015893, + "epoch": 0.4706849159794837, "grad_norm": 0.0, - "learning_rate": 1.1409749341785859e-05, - "loss": 0.8988, + "learning_rate": 1.143049815576647e-05, + "loss": 0.8976, "step": 16610 }, { - "epoch": 0.4713677639046538, + "epoch": 0.47071325342174614, "grad_norm": 0.0, - "learning_rate": 1.1408839436445105e-05, - "loss": 0.9153, + "learning_rate": 1.1429589799491364e-05, + "loss": 0.9874, "step": 16611 }, { - "epoch": 0.4713961407491487, + "epoch": 0.47074159086400863, "grad_norm": 0.0, - "learning_rate": 1.1407929519203821e-05, - "loss": 0.9656, + "learning_rate": 1.1428681431174296e-05, + "loss": 0.8588, "step": 16612 }, { - "epoch": 0.47142451759364357, + "epoch": 0.47076992830627107, "grad_norm": 0.0, - "learning_rate": 1.1407019590069698e-05, - "loss": 0.9816, + "learning_rate": 1.142777305082291e-05, + "loss": 0.8463, "step": 16613 }, { - "epoch": 0.4714528944381385, + "epoch": 0.47079826574853356, "grad_norm": 0.0, - "learning_rate": 1.1406109649050416e-05, - "loss": 0.9312, + "learning_rate": 1.1426864658444865e-05, + "loss": 0.9233, "step": 16614 }, { - "epoch": 0.4714812712826334, + "epoch": 0.470826603190796, "grad_norm": 0.0, - "learning_rate": 1.140519969615366e-05, - "loss": 0.8594, + "learning_rate": 1.1425956254047805e-05, + "loss": 0.9109, "step": 16615 }, { - "epoch": 0.47150964812712826, + "epoch": 0.47085494063305844, "grad_norm": 0.0, - "learning_rate": 1.140428973138712e-05, - "loss": 0.993, + "learning_rate": 1.142504783763939e-05, + "loss": 0.8919, "step": 16616 }, { - "epoch": 0.47153802497162317, + "epoch": 0.47088327807532093, "grad_norm": 0.0, - "learning_rate": 1.1403379754758483e-05, - "loss": 0.9195, + "learning_rate": 1.1424139409227265e-05, + "loss": 0.974, "step": 16617 }, { - "epoch": 0.47156640181611803, + "epoch": 0.47091161551758337, "grad_norm": 0.0, - "learning_rate": 1.1402469766275432e-05, - "loss": 0.8971, + "learning_rate": 1.1423230968819085e-05, + "loss": 0.9168, "step": 16618 }, { - "epoch": 0.47159477866061295, + "epoch": 0.47093995295984586, "grad_norm": 0.0, - "learning_rate": 1.1401559765945657e-05, - "loss": 0.9295, + "learning_rate": 1.1422322516422506e-05, + "loss": 0.9721, "step": 16619 }, { - "epoch": 0.4716231555051078, + "epoch": 0.4709682904021083, "grad_norm": 0.0, - "learning_rate": 1.1400649753776845e-05, - "loss": 0.9772, + "learning_rate": 1.1421414052045174e-05, + "loss": 0.8681, "step": 16620 }, { - "epoch": 0.4716515323496027, + "epoch": 0.47099662784437074, "grad_norm": 0.0, - "learning_rate": 1.1399739729776679e-05, - "loss": 0.855, + "learning_rate": 1.142050557569474e-05, + "loss": 0.9148, "step": 16621 }, { - "epoch": 0.47167990919409764, + "epoch": 0.47102496528663323, "grad_norm": 0.0, - "learning_rate": 1.1398829693952851e-05, - "loss": 0.9884, + "learning_rate": 1.1419597087378862e-05, + "loss": 0.8661, "step": 16622 }, { - "epoch": 0.4717082860385925, + "epoch": 0.4710533027288957, "grad_norm": 0.0, - "learning_rate": 1.1397919646313044e-05, - "loss": 0.9331, + "learning_rate": 1.1418688587105194e-05, + "loss": 0.9184, "step": 16623 }, { - "epoch": 0.4717366628830874, + "epoch": 0.47108164017115817, "grad_norm": 0.0, - "learning_rate": 1.1397009586864949e-05, - "loss": 0.9081, + "learning_rate": 1.1417780074881382e-05, + "loss": 0.886, "step": 16624 }, { - "epoch": 0.47176503972758227, + "epoch": 0.4711099776134206, "grad_norm": 0.0, - "learning_rate": 1.1396099515616248e-05, - "loss": 0.8614, + "learning_rate": 1.1416871550715086e-05, + "loss": 0.898, "step": 16625 }, { - "epoch": 0.4717934165720772, + "epoch": 0.4711383150556831, "grad_norm": 0.0, - "learning_rate": 1.1395189432574634e-05, - "loss": 0.9969, + "learning_rate": 1.1415963014613948e-05, + "loss": 1.0055, "step": 16626 }, { - "epoch": 0.4718217934165721, + "epoch": 0.47116665249794554, "grad_norm": 0.0, - "learning_rate": 1.1394279337747789e-05, - "loss": 0.94, + "learning_rate": 1.1415054466585633e-05, + "loss": 0.8754, "step": 16627 }, { - "epoch": 0.47185017026106696, + "epoch": 0.471194989940208, "grad_norm": 0.0, - "learning_rate": 1.1393369231143406e-05, - "loss": 0.8972, + "learning_rate": 1.1414145906637786e-05, + "loss": 0.9803, "step": 16628 }, { - "epoch": 0.4718785471055619, + "epoch": 0.47122332738247047, "grad_norm": 0.0, - "learning_rate": 1.1392459112769172e-05, - "loss": 0.9557, + "learning_rate": 1.1413237334778064e-05, + "loss": 0.8541, "step": 16629 }, { - "epoch": 0.47190692395005673, + "epoch": 0.4712516648247329, "grad_norm": 0.0, - "learning_rate": 1.1391548982632772e-05, - "loss": 0.8923, + "learning_rate": 1.1412328751014116e-05, + "loss": 0.9574, "step": 16630 }, { - "epoch": 0.47193530079455165, + "epoch": 0.4712800022669954, "grad_norm": 0.0, - "learning_rate": 1.1390638840741894e-05, - "loss": 0.8275, + "learning_rate": 1.1411420155353605e-05, + "loss": 0.8768, "step": 16631 }, { - "epoch": 0.47196367763904656, + "epoch": 0.47130833970925784, "grad_norm": 0.0, - "learning_rate": 1.1389728687104228e-05, - "loss": 0.8966, + "learning_rate": 1.1410511547804175e-05, + "loss": 0.7891, "step": 16632 }, { - "epoch": 0.4719920544835414, + "epoch": 0.4713366771515203, "grad_norm": 0.0, - "learning_rate": 1.1388818521727463e-05, - "loss": 0.9781, + "learning_rate": 1.1409602928373483e-05, + "loss": 0.8902, "step": 16633 }, { - "epoch": 0.47202043132803634, + "epoch": 0.47136501459378277, "grad_norm": 0.0, - "learning_rate": 1.1387908344619283e-05, - "loss": 0.8713, + "learning_rate": 1.1408694297069178e-05, + "loss": 0.8084, "step": 16634 }, { - "epoch": 0.4720488081725312, + "epoch": 0.4713933520360452, "grad_norm": 0.0, - "learning_rate": 1.138699815578738e-05, - "loss": 0.8972, + "learning_rate": 1.140778565389892e-05, + "loss": 0.8914, "step": 16635 }, { - "epoch": 0.4720771850170261, + "epoch": 0.4714216894783077, "grad_norm": 0.0, - "learning_rate": 1.1386087955239442e-05, - "loss": 0.9251, + "learning_rate": 1.1406876998870363e-05, + "loss": 0.9052, "step": 16636 }, { - "epoch": 0.472105561861521, + "epoch": 0.47145002692057014, "grad_norm": 0.0, - "learning_rate": 1.1385177742983155e-05, - "loss": 0.842, + "learning_rate": 1.1405968331991157e-05, + "loss": 0.7807, "step": 16637 }, { - "epoch": 0.4721339387060159, + "epoch": 0.47147836436283264, "grad_norm": 0.0, - "learning_rate": 1.1384267519026211e-05, - "loss": 1.0086, + "learning_rate": 1.1405059653268958e-05, + "loss": 0.9301, "step": 16638 }, { - "epoch": 0.4721623155505108, + "epoch": 0.4715067018050951, "grad_norm": 0.0, - "learning_rate": 1.1383357283376295e-05, - "loss": 0.9578, + "learning_rate": 1.1404150962711419e-05, + "loss": 0.9042, "step": 16639 }, { - "epoch": 0.47219069239500566, + "epoch": 0.4715350392473575, "grad_norm": 0.0, - "learning_rate": 1.13824470360411e-05, - "loss": 0.9716, + "learning_rate": 1.1403242260326197e-05, + "loss": 0.9454, "step": 16640 }, { - "epoch": 0.4722190692395006, + "epoch": 0.47156337668962, "grad_norm": 0.0, - "learning_rate": 1.1381536777028313e-05, - "loss": 0.9172, + "learning_rate": 1.140233354612094e-05, + "loss": 0.9795, "step": 16641 }, { - "epoch": 0.47224744608399544, + "epoch": 0.47159171413188244, "grad_norm": 0.0, - "learning_rate": 1.1380626506345618e-05, - "loss": 0.8832, + "learning_rate": 1.1401424820103308e-05, + "loss": 0.8879, "step": 16642 }, { - "epoch": 0.47227582292849035, + "epoch": 0.47162005157414494, "grad_norm": 0.0, - "learning_rate": 1.1379716224000715e-05, - "loss": 0.9199, + "learning_rate": 1.1400516082280957e-05, + "loss": 0.8882, "step": 16643 }, { - "epoch": 0.47230419977298527, + "epoch": 0.4716483890164074, "grad_norm": 0.0, - "learning_rate": 1.1378805930001284e-05, - "loss": 0.9566, + "learning_rate": 1.139960733266154e-05, + "loss": 0.8321, "step": 16644 }, { - "epoch": 0.4723325766174801, + "epoch": 0.4716767264586698, "grad_norm": 0.0, - "learning_rate": 1.1377895624355017e-05, - "loss": 1.002, + "learning_rate": 1.1398698571252709e-05, + "loss": 0.8762, "step": 16645 }, { - "epoch": 0.47236095346197504, + "epoch": 0.4717050639009323, "grad_norm": 0.0, - "learning_rate": 1.1376985307069606e-05, - "loss": 0.9631, + "learning_rate": 1.139778979806212e-05, + "loss": 0.8644, "step": 16646 }, { - "epoch": 0.4723893303064699, + "epoch": 0.47173340134319475, "grad_norm": 0.0, - "learning_rate": 1.1376074978152736e-05, - "loss": 0.8966, + "learning_rate": 1.1396881013097429e-05, + "loss": 0.9282, "step": 16647 }, { - "epoch": 0.4724177071509648, + "epoch": 0.47176173878545724, "grad_norm": 0.0, - "learning_rate": 1.13751646376121e-05, - "loss": 0.924, + "learning_rate": 1.1395972216366288e-05, + "loss": 0.8491, "step": 16648 }, { - "epoch": 0.47244608399545973, + "epoch": 0.4717900762277197, "grad_norm": 0.0, - "learning_rate": 1.1374254285455387e-05, - "loss": 0.8457, + "learning_rate": 1.1395063407876358e-05, + "loss": 0.9763, "step": 16649 }, { - "epoch": 0.4724744608399546, + "epoch": 0.4718184136699822, "grad_norm": 0.0, - "learning_rate": 1.1373343921690283e-05, - "loss": 0.8738, + "learning_rate": 1.1394154587635288e-05, + "loss": 0.7763, "step": 16650 }, { - "epoch": 0.4725028376844495, + "epoch": 0.4718467511122446, "grad_norm": 0.0, - "learning_rate": 1.1372433546324484e-05, - "loss": 0.9562, + "learning_rate": 1.139324575565074e-05, + "loss": 0.8843, "step": 16651 }, { - "epoch": 0.47253121452894437, + "epoch": 0.47187508855450705, "grad_norm": 0.0, - "learning_rate": 1.1371523159365675e-05, - "loss": 0.9002, + "learning_rate": 1.1392336911930363e-05, + "loss": 0.9623, "step": 16652 }, { - "epoch": 0.4725595913734393, + "epoch": 0.47190342599676954, "grad_norm": 0.0, - "learning_rate": 1.1370612760821551e-05, - "loss": 0.8779, + "learning_rate": 1.1391428056481814e-05, + "loss": 0.9012, "step": 16653 }, { - "epoch": 0.47258796821793414, + "epoch": 0.471931763439032, "grad_norm": 0.0, - "learning_rate": 1.1369702350699798e-05, - "loss": 0.8409, + "learning_rate": 1.1390519189312754e-05, + "loss": 1.0048, "step": 16654 }, { - "epoch": 0.47261634506242906, + "epoch": 0.4719601008812945, "grad_norm": 0.0, - "learning_rate": 1.1368791929008107e-05, - "loss": 0.8294, + "learning_rate": 1.138961031043083e-05, + "loss": 0.9865, "step": 16655 }, { - "epoch": 0.47264472190692397, + "epoch": 0.4719884383235569, "grad_norm": 0.0, - "learning_rate": 1.1367881495754172e-05, - "loss": 0.765, + "learning_rate": 1.1388701419843706e-05, + "loss": 0.9288, "step": 16656 }, { - "epoch": 0.47267309875141883, + "epoch": 0.47201677576581935, "grad_norm": 0.0, - "learning_rate": 1.1366971050945676e-05, - "loss": 1.0111, + "learning_rate": 1.1387792517559033e-05, + "loss": 0.8828, "step": 16657 }, { - "epoch": 0.47270147559591374, + "epoch": 0.47204511320808185, "grad_norm": 0.0, - "learning_rate": 1.1366060594590319e-05, - "loss": 0.9521, + "learning_rate": 1.1386883603584468e-05, + "loss": 1.0392, "step": 16658 }, { - "epoch": 0.4727298524404086, + "epoch": 0.4720734506503443, "grad_norm": 0.0, - "learning_rate": 1.136515012669578e-05, - "loss": 0.8929, + "learning_rate": 1.1385974677927667e-05, + "loss": 0.9342, "step": 16659 }, { - "epoch": 0.4727582292849035, + "epoch": 0.4721017880926068, "grad_norm": 0.0, - "learning_rate": 1.1364239647269763e-05, - "loss": 0.8667, + "learning_rate": 1.1385065740596286e-05, + "loss": 0.7767, "step": 16660 }, { - "epoch": 0.47278660612939843, + "epoch": 0.4721301255348692, "grad_norm": 0.0, - "learning_rate": 1.1363329156319952e-05, - "loss": 0.8808, + "learning_rate": 1.1384156791597983e-05, + "loss": 0.9146, "step": 16661 }, { - "epoch": 0.4728149829738933, + "epoch": 0.4721584629771317, "grad_norm": 0.0, - "learning_rate": 1.1362418653854035e-05, - "loss": 0.94, + "learning_rate": 1.1383247830940414e-05, + "loss": 0.9052, "step": 16662 }, { - "epoch": 0.4728433598183882, + "epoch": 0.47218680041939415, "grad_norm": 0.0, - "learning_rate": 1.1361508139879708e-05, - "loss": 0.8618, + "learning_rate": 1.1382338858631232e-05, + "loss": 0.8645, "step": 16663 }, { - "epoch": 0.47287173666288307, + "epoch": 0.4722151378616566, "grad_norm": 0.0, - "learning_rate": 1.1360597614404663e-05, - "loss": 0.972, + "learning_rate": 1.13814298746781e-05, + "loss": 0.91, "step": 16664 }, { - "epoch": 0.472900113507378, + "epoch": 0.4722434753039191, "grad_norm": 0.0, - "learning_rate": 1.1359687077436582e-05, - "loss": 0.8773, + "learning_rate": 1.138052087908867e-05, + "loss": 0.794, "step": 16665 }, { - "epoch": 0.4729284903518729, + "epoch": 0.4722718127461815, "grad_norm": 0.0, - "learning_rate": 1.1358776528983166e-05, - "loss": 1.0186, + "learning_rate": 1.13796118718706e-05, + "loss": 0.9919, "step": 16666 }, { - "epoch": 0.47295686719636776, + "epoch": 0.472300150188444, "grad_norm": 0.0, - "learning_rate": 1.1357865969052107e-05, - "loss": 0.9619, + "learning_rate": 1.1378702853031545e-05, + "loss": 0.9374, "step": 16667 }, { - "epoch": 0.4729852440408627, + "epoch": 0.47232848763070645, "grad_norm": 0.0, - "learning_rate": 1.1356955397651093e-05, - "loss": 0.9661, + "learning_rate": 1.1377793822579166e-05, + "loss": 0.9751, "step": 16668 }, { - "epoch": 0.47301362088535753, + "epoch": 0.4723568250729689, "grad_norm": 0.0, - "learning_rate": 1.1356044814787812e-05, - "loss": 0.8736, + "learning_rate": 1.1376884780521117e-05, + "loss": 0.9023, "step": 16669 }, { - "epoch": 0.47304199772985245, + "epoch": 0.4723851625152314, "grad_norm": 0.0, - "learning_rate": 1.1355134220469961e-05, - "loss": 0.8675, + "learning_rate": 1.1375975726865058e-05, + "loss": 0.921, "step": 16670 }, { - "epoch": 0.4730703745743473, + "epoch": 0.4724134999574938, "grad_norm": 0.0, - "learning_rate": 1.135422361470523e-05, - "loss": 0.7816, + "learning_rate": 1.137506666161864e-05, + "loss": 0.8396, "step": 16671 }, { - "epoch": 0.4730987514188422, + "epoch": 0.4724418373997563, "grad_norm": 0.0, - "learning_rate": 1.1353312997501313e-05, - "loss": 1.0001, + "learning_rate": 1.1374157584789532e-05, + "loss": 0.9434, "step": 16672 }, { - "epoch": 0.47312712826333714, + "epoch": 0.47247017484201875, "grad_norm": 0.0, - "learning_rate": 1.1352402368865898e-05, - "loss": 0.9216, + "learning_rate": 1.137324849638538e-05, + "loss": 0.9353, "step": 16673 }, { - "epoch": 0.473155505107832, + "epoch": 0.47249851228428125, "grad_norm": 0.0, - "learning_rate": 1.135149172880668e-05, - "loss": 0.8655, + "learning_rate": 1.1372339396413845e-05, + "loss": 0.8521, "step": 16674 }, { - "epoch": 0.4731838819523269, + "epoch": 0.4725268497265437, "grad_norm": 0.0, - "learning_rate": 1.1350581077331353e-05, - "loss": 0.8992, + "learning_rate": 1.1371430284882586e-05, + "loss": 0.843, "step": 16675 }, { - "epoch": 0.47321225879682177, + "epoch": 0.4725551871688061, "grad_norm": 0.0, - "learning_rate": 1.1349670414447604e-05, - "loss": 0.8818, + "learning_rate": 1.1370521161799264e-05, + "loss": 0.8866, "step": 16676 }, { - "epoch": 0.4732406356413167, + "epoch": 0.4725835246110686, "grad_norm": 0.0, - "learning_rate": 1.1348759740163129e-05, - "loss": 0.9524, + "learning_rate": 1.1369612027171531e-05, + "loss": 0.8412, "step": 16677 }, { - "epoch": 0.4732690124858116, + "epoch": 0.47261186205333106, "grad_norm": 0.0, - "learning_rate": 1.1347849054485622e-05, - "loss": 0.9121, + "learning_rate": 1.1368702881007048e-05, + "loss": 0.9322, "step": 16678 }, { - "epoch": 0.47329738933030646, + "epoch": 0.47264019949559355, "grad_norm": 0.0, - "learning_rate": 1.134693835742277e-05, - "loss": 0.8799, + "learning_rate": 1.1367793723313469e-05, + "loss": 1.0048, "step": 16679 }, { - "epoch": 0.4733257661748014, + "epoch": 0.472668536937856, "grad_norm": 0.0, - "learning_rate": 1.1346027648982272e-05, - "loss": 1.0119, + "learning_rate": 1.1366884554098458e-05, + "loss": 0.8453, "step": 16680 }, { - "epoch": 0.47335414301929624, + "epoch": 0.4726968743801184, "grad_norm": 0.0, - "learning_rate": 1.1345116929171815e-05, - "loss": 0.9909, + "learning_rate": 1.1365975373369671e-05, + "loss": 0.8153, "step": 16681 }, { - "epoch": 0.47338251986379115, + "epoch": 0.4727252118223809, "grad_norm": 0.0, - "learning_rate": 1.1344206197999095e-05, - "loss": 0.9624, + "learning_rate": 1.1365066181134768e-05, + "loss": 0.9182, "step": 16682 }, { - "epoch": 0.473410896708286, + "epoch": 0.47275354926464336, "grad_norm": 0.0, - "learning_rate": 1.1343295455471808e-05, - "loss": 0.8779, + "learning_rate": 1.1364156977401404e-05, + "loss": 0.9431, "step": 16683 }, { - "epoch": 0.4734392735527809, + "epoch": 0.47278188670690585, "grad_norm": 0.0, - "learning_rate": 1.1342384701597644e-05, - "loss": 0.8538, + "learning_rate": 1.1363247762177236e-05, + "loss": 0.9373, "step": 16684 }, { - "epoch": 0.47346765039727584, + "epoch": 0.4728102241491683, "grad_norm": 0.0, - "learning_rate": 1.1341473936384292e-05, - "loss": 0.9121, + "learning_rate": 1.136233853546993e-05, + "loss": 0.9487, "step": 16685 }, { - "epoch": 0.4734960272417707, + "epoch": 0.4728385615914308, "grad_norm": 0.0, - "learning_rate": 1.1340563159839452e-05, - "loss": 0.8998, + "learning_rate": 1.1361429297287138e-05, + "loss": 0.7957, "step": 16686 }, { - "epoch": 0.4735244040862656, + "epoch": 0.4728668990336932, "grad_norm": 0.0, - "learning_rate": 1.1339652371970815e-05, - "loss": 0.8535, + "learning_rate": 1.1360520047636525e-05, + "loss": 0.8317, "step": 16687 }, { - "epoch": 0.4735527809307605, + "epoch": 0.47289523647595566, "grad_norm": 0.0, - "learning_rate": 1.1338741572786074e-05, - "loss": 1.0869, + "learning_rate": 1.1359610786525743e-05, + "loss": 0.951, "step": 16688 }, { - "epoch": 0.4735811577752554, + "epoch": 0.47292357391821815, "grad_norm": 0.0, - "learning_rate": 1.133783076229292e-05, - "loss": 0.9359, + "learning_rate": 1.1358701513962457e-05, + "loss": 0.8726, "step": 16689 }, { - "epoch": 0.4736095346197503, + "epoch": 0.4729519113604806, "grad_norm": 0.0, - "learning_rate": 1.1336919940499054e-05, - "loss": 0.9165, + "learning_rate": 1.1357792229954324e-05, + "loss": 0.9705, "step": 16690 }, { - "epoch": 0.47363791146424516, + "epoch": 0.4729802488027431, "grad_norm": 0.0, - "learning_rate": 1.1336009107412162e-05, - "loss": 0.9022, + "learning_rate": 1.1356882934509001e-05, + "loss": 0.77, "step": 16691 }, { - "epoch": 0.4736662883087401, + "epoch": 0.4730085862450055, "grad_norm": 0.0, - "learning_rate": 1.1335098263039943e-05, - "loss": 0.8732, + "learning_rate": 1.1355973627634147e-05, + "loss": 0.8158, "step": 16692 }, { - "epoch": 0.47369466515323494, + "epoch": 0.47303692368726796, "grad_norm": 0.0, - "learning_rate": 1.1334187407390088e-05, - "loss": 0.952, + "learning_rate": 1.1355064309337424e-05, + "loss": 0.9037, "step": 16693 }, { - "epoch": 0.47372304199772985, + "epoch": 0.47306526112953046, "grad_norm": 0.0, - "learning_rate": 1.1333276540470293e-05, - "loss": 0.8117, + "learning_rate": 1.1354154979626495e-05, + "loss": 0.951, "step": 16694 }, { - "epoch": 0.47375141884222477, + "epoch": 0.4730935985717929, "grad_norm": 0.0, - "learning_rate": 1.1332365662288249e-05, - "loss": 0.8842, + "learning_rate": 1.1353245638509016e-05, + "loss": 0.9165, "step": 16695 }, { - "epoch": 0.47377979568671963, + "epoch": 0.4731219360140554, "grad_norm": 0.0, - "learning_rate": 1.133145477285165e-05, - "loss": 1.0301, + "learning_rate": 1.1352336285992643e-05, + "loss": 0.9127, "step": 16696 }, { - "epoch": 0.47380817253121454, + "epoch": 0.4731502734563178, "grad_norm": 0.0, - "learning_rate": 1.1330543872168198e-05, - "loss": 0.8065, + "learning_rate": 1.135142692208504e-05, + "loss": 1.0042, "step": 16697 }, { - "epoch": 0.4738365493757094, + "epoch": 0.4731786108985803, "grad_norm": 0.0, - "learning_rate": 1.1329632960245582e-05, - "loss": 0.9238, + "learning_rate": 1.135051754679387e-05, + "loss": 0.9057, "step": 16698 }, { - "epoch": 0.4738649262202043, + "epoch": 0.47320694834084276, "grad_norm": 0.0, - "learning_rate": 1.1328722037091494e-05, - "loss": 0.9163, + "learning_rate": 1.1349608160126784e-05, + "loss": 0.8516, "step": 16699 }, { - "epoch": 0.4738933030646992, + "epoch": 0.4732352857831052, "grad_norm": 0.0, - "learning_rate": 1.1327811102713633e-05, - "loss": 0.8873, + "learning_rate": 1.1348698762091448e-05, + "loss": 0.9977, "step": 16700 }, { - "epoch": 0.4739216799091941, + "epoch": 0.4732636232253677, "grad_norm": 0.0, - "learning_rate": 1.1326900157119692e-05, - "loss": 0.8771, + "learning_rate": 1.1347789352695524e-05, + "loss": 0.8432, "step": 16701 }, { - "epoch": 0.473950056753689, + "epoch": 0.47329196066763013, "grad_norm": 0.0, - "learning_rate": 1.1325989200317362e-05, - "loss": 0.9332, + "learning_rate": 1.1346879931946668e-05, + "loss": 0.8877, "step": 16702 }, { - "epoch": 0.47397843359818387, + "epoch": 0.4733202981098926, "grad_norm": 0.0, - "learning_rate": 1.1325078232314347e-05, - "loss": 0.9278, + "learning_rate": 1.1345970499852546e-05, + "loss": 0.9734, "step": 16703 }, { - "epoch": 0.4740068104426788, + "epoch": 0.47334863555215506, "grad_norm": 0.0, - "learning_rate": 1.132416725311833e-05, - "loss": 0.8056, + "learning_rate": 1.134506105642081e-05, + "loss": 0.8781, "step": 16704 }, { - "epoch": 0.47403518728717364, + "epoch": 0.4733769729944175, "grad_norm": 0.0, - "learning_rate": 1.1323256262737018e-05, - "loss": 0.8779, + "learning_rate": 1.1344151601659125e-05, + "loss": 0.8754, "step": 16705 }, { - "epoch": 0.47406356413166856, + "epoch": 0.47340531043668, "grad_norm": 0.0, - "learning_rate": 1.1322345261178098e-05, - "loss": 0.9639, + "learning_rate": 1.1343242135575155e-05, + "loss": 0.9433, "step": 16706 }, { - "epoch": 0.4740919409761635, + "epoch": 0.47343364787894243, "grad_norm": 0.0, - "learning_rate": 1.132143424844927e-05, - "loss": 0.8868, + "learning_rate": 1.1342332658176556e-05, + "loss": 0.8731, "step": 16707 }, { - "epoch": 0.47412031782065833, + "epoch": 0.4734619853212049, "grad_norm": 0.0, - "learning_rate": 1.1320523224558228e-05, - "loss": 0.8636, + "learning_rate": 1.134142316947099e-05, + "loss": 0.9902, "step": 16708 }, { - "epoch": 0.47414869466515325, + "epoch": 0.47349032276346736, "grad_norm": 0.0, - "learning_rate": 1.1319612189512665e-05, - "loss": 0.951, + "learning_rate": 1.134051366946612e-05, + "loss": 0.8742, "step": 16709 }, { - "epoch": 0.4741770715096481, + "epoch": 0.47351866020572986, "grad_norm": 0.0, - "learning_rate": 1.131870114332028e-05, - "loss": 0.8692, + "learning_rate": 1.1339604158169606e-05, + "loss": 0.8593, "step": 16710 }, { - "epoch": 0.474205448354143, + "epoch": 0.4735469976479923, "grad_norm": 0.0, - "learning_rate": 1.1317790085988767e-05, - "loss": 0.8936, + "learning_rate": 1.133869463558911e-05, + "loss": 0.9094, "step": 16711 }, { - "epoch": 0.47423382519863794, + "epoch": 0.47357533509025473, "grad_norm": 0.0, - "learning_rate": 1.131687901752582e-05, - "loss": 0.9061, + "learning_rate": 1.1337785101732286e-05, + "loss": 0.8496, "step": 16712 }, { - "epoch": 0.4742622020431328, + "epoch": 0.47360367253251723, "grad_norm": 0.0, - "learning_rate": 1.1315967937939138e-05, - "loss": 0.9944, + "learning_rate": 1.1336875556606806e-05, + "loss": 0.8837, "step": 16713 }, { - "epoch": 0.4742905788876277, + "epoch": 0.47363200997477967, "grad_norm": 0.0, - "learning_rate": 1.1315056847236419e-05, - "loss": 0.9171, + "learning_rate": 1.1335966000220325e-05, + "loss": 0.9159, "step": 16714 }, { - "epoch": 0.47431895573212257, + "epoch": 0.47366034741704216, "grad_norm": 0.0, - "learning_rate": 1.1314145745425354e-05, - "loss": 0.9997, + "learning_rate": 1.133505643258051e-05, + "loss": 0.9415, "step": 16715 }, { - "epoch": 0.4743473325766175, + "epoch": 0.4736886848593046, "grad_norm": 0.0, - "learning_rate": 1.1313234632513637e-05, - "loss": 0.8782, + "learning_rate": 1.1334146853695017e-05, + "loss": 0.9005, "step": 16716 }, { - "epoch": 0.47437570942111235, + "epoch": 0.47371702230156704, "grad_norm": 0.0, - "learning_rate": 1.131232350850897e-05, - "loss": 0.8308, + "learning_rate": 1.1333237263571506e-05, + "loss": 0.9219, "step": 16717 }, { - "epoch": 0.47440408626560726, + "epoch": 0.47374535974382953, "grad_norm": 0.0, - "learning_rate": 1.1311412373419051e-05, - "loss": 0.8502, + "learning_rate": 1.1332327662217646e-05, + "loss": 0.8555, "step": 16718 }, { - "epoch": 0.4744324631101022, + "epoch": 0.47377369718609197, "grad_norm": 0.0, - "learning_rate": 1.131050122725157e-05, - "loss": 0.839, + "learning_rate": 1.1331418049641091e-05, + "loss": 0.8794, "step": 16719 }, { - "epoch": 0.47446083995459704, + "epoch": 0.47380203462835446, "grad_norm": 0.0, - "learning_rate": 1.1309590070014222e-05, - "loss": 0.848, + "learning_rate": 1.1330508425849514e-05, + "loss": 0.9659, "step": 16720 }, { - "epoch": 0.47448921679909195, + "epoch": 0.4738303720706169, "grad_norm": 0.0, - "learning_rate": 1.1308678901714713e-05, - "loss": 0.8661, + "learning_rate": 1.1329598790850565e-05, + "loss": 0.8981, "step": 16721 }, { - "epoch": 0.4745175936435868, + "epoch": 0.4738587095128794, "grad_norm": 0.0, - "learning_rate": 1.1307767722360734e-05, - "loss": 0.8627, + "learning_rate": 1.1328689144651915e-05, + "loss": 0.9253, "step": 16722 }, { - "epoch": 0.4745459704880817, + "epoch": 0.47388704695514183, "grad_norm": 0.0, - "learning_rate": 1.130685653195998e-05, - "loss": 0.9305, + "learning_rate": 1.1327779487261222e-05, + "loss": 0.8588, "step": 16723 }, { - "epoch": 0.47457434733257664, + "epoch": 0.47391538439740427, "grad_norm": 0.0, - "learning_rate": 1.1305945330520153e-05, - "loss": 0.8228, + "learning_rate": 1.132686981868615e-05, + "loss": 0.917, "step": 16724 }, { - "epoch": 0.4746027241770715, + "epoch": 0.47394372183966677, "grad_norm": 0.0, - "learning_rate": 1.1305034118048945e-05, - "loss": 0.8323, + "learning_rate": 1.132596013893436e-05, + "loss": 0.8829, "step": 16725 }, { - "epoch": 0.4746311010215664, + "epoch": 0.4739720592819292, "grad_norm": 0.0, - "learning_rate": 1.1304122894554055e-05, - "loss": 0.8737, + "learning_rate": 1.1325050448013513e-05, + "loss": 0.905, "step": 16726 }, { - "epoch": 0.4746594778660613, + "epoch": 0.4740003967241917, "grad_norm": 0.0, - "learning_rate": 1.1303211660043181e-05, - "loss": 0.9363, + "learning_rate": 1.1324140745931278e-05, + "loss": 0.8941, "step": 16727 }, { - "epoch": 0.4746878547105562, + "epoch": 0.47402873416645414, "grad_norm": 0.0, - "learning_rate": 1.1302300414524021e-05, - "loss": 0.8794, + "learning_rate": 1.1323231032695313e-05, + "loss": 0.8805, "step": 16728 }, { - "epoch": 0.4747162315550511, + "epoch": 0.4740570716087166, "grad_norm": 0.0, - "learning_rate": 1.130138915800427e-05, - "loss": 0.7628, + "learning_rate": 1.1322321308313278e-05, + "loss": 0.8768, "step": 16729 }, { - "epoch": 0.47474460839954596, + "epoch": 0.47408540905097907, "grad_norm": 0.0, - "learning_rate": 1.1300477890491623e-05, - "loss": 0.8408, + "learning_rate": 1.1321411572792844e-05, + "loss": 0.8113, "step": 16730 }, { - "epoch": 0.4747729852440409, + "epoch": 0.4741137464932415, "grad_norm": 0.0, - "learning_rate": 1.1299566611993784e-05, - "loss": 0.9909, + "learning_rate": 1.1320501826141668e-05, + "loss": 0.9681, "step": 16731 }, { - "epoch": 0.47480136208853574, + "epoch": 0.474142083935504, "grad_norm": 0.0, - "learning_rate": 1.1298655322518448e-05, - "loss": 0.8422, + "learning_rate": 1.1319592068367413e-05, + "loss": 0.8815, "step": 16732 }, { - "epoch": 0.47482973893303065, + "epoch": 0.47417042137776644, "grad_norm": 0.0, - "learning_rate": 1.1297744022073312e-05, - "loss": 0.9706, + "learning_rate": 1.1318682299477746e-05, + "loss": 0.8567, "step": 16733 }, { - "epoch": 0.4748581157775255, + "epoch": 0.47419875882002893, "grad_norm": 0.0, - "learning_rate": 1.129683271066607e-05, - "loss": 0.7503, + "learning_rate": 1.1317772519480328e-05, + "loss": 0.8333, "step": 16734 }, { - "epoch": 0.47488649262202043, + "epoch": 0.47422709626229137, "grad_norm": 0.0, - "learning_rate": 1.1295921388304427e-05, - "loss": 0.8628, + "learning_rate": 1.1316862728382825e-05, + "loss": 0.8664, "step": 16735 }, { - "epoch": 0.47491486946651534, + "epoch": 0.4742554337045538, "grad_norm": 0.0, - "learning_rate": 1.1295010054996079e-05, - "loss": 0.9592, + "learning_rate": 1.1315952926192898e-05, + "loss": 0.9599, "step": 16736 }, { - "epoch": 0.4749432463110102, + "epoch": 0.4742837711468163, "grad_norm": 0.0, - "learning_rate": 1.129409871074872e-05, - "loss": 0.8743, + "learning_rate": 1.1315043112918206e-05, + "loss": 0.8762, "step": 16737 }, { - "epoch": 0.4749716231555051, + "epoch": 0.47431210858907874, "grad_norm": 0.0, - "learning_rate": 1.1293187355570054e-05, - "loss": 0.8531, + "learning_rate": 1.131413328856642e-05, + "loss": 0.8603, "step": 16738 }, { - "epoch": 0.475, + "epoch": 0.47434044603134123, "grad_norm": 0.0, - "learning_rate": 1.1292275989467773e-05, - "loss": 0.8299, + "learning_rate": 1.1313223453145202e-05, + "loss": 0.9632, "step": 16739 }, { - "epoch": 0.4750283768444949, + "epoch": 0.4743687834736037, "grad_norm": 0.0, - "learning_rate": 1.1291364612449583e-05, - "loss": 0.8645, + "learning_rate": 1.1312313606662216e-05, + "loss": 0.929, "step": 16740 }, { - "epoch": 0.4750567536889898, + "epoch": 0.4743971209158661, "grad_norm": 0.0, - "learning_rate": 1.1290453224523175e-05, - "loss": 0.8533, + "learning_rate": 1.1311403749125123e-05, + "loss": 0.7994, "step": 16741 }, { - "epoch": 0.47508513053348467, + "epoch": 0.4744254583581286, "grad_norm": 0.0, - "learning_rate": 1.1289541825696248e-05, - "loss": 0.8963, + "learning_rate": 1.1310493880541588e-05, + "loss": 0.8481, "step": 16742 }, { - "epoch": 0.4751135073779796, + "epoch": 0.47445379580039104, "grad_norm": 0.0, - "learning_rate": 1.1288630415976508e-05, - "loss": 0.9293, + "learning_rate": 1.130958400091928e-05, + "loss": 0.9703, "step": 16743 }, { - "epoch": 0.47514188422247444, + "epoch": 0.47448213324265354, "grad_norm": 0.0, - "learning_rate": 1.1287718995371647e-05, - "loss": 0.9459, + "learning_rate": 1.1308674110265861e-05, + "loss": 0.8848, "step": 16744 }, { - "epoch": 0.47517026106696936, + "epoch": 0.474510470684916, "grad_norm": 0.0, - "learning_rate": 1.1286807563889366e-05, - "loss": 0.8978, + "learning_rate": 1.1307764208588989e-05, + "loss": 0.9352, "step": 16745 }, { - "epoch": 0.47519863791146427, + "epoch": 0.47453880812717847, "grad_norm": 0.0, - "learning_rate": 1.1285896121537363e-05, - "loss": 0.8511, + "learning_rate": 1.1306854295896335e-05, + "loss": 0.9797, "step": 16746 }, { - "epoch": 0.47522701475595913, + "epoch": 0.4745671455694409, "grad_norm": 0.0, - "learning_rate": 1.1284984668323338e-05, - "loss": 0.7693, + "learning_rate": 1.1305944372195564e-05, + "loss": 0.8999, "step": 16747 }, { - "epoch": 0.47525539160045405, + "epoch": 0.47459548301170335, "grad_norm": 0.0, - "learning_rate": 1.128407320425499e-05, - "loss": 0.8654, + "learning_rate": 1.1305034437494337e-05, + "loss": 0.9849, "step": 16748 }, { - "epoch": 0.4752837684449489, + "epoch": 0.47462382045396584, "grad_norm": 0.0, - "learning_rate": 1.1283161729340017e-05, - "loss": 0.8587, + "learning_rate": 1.130412449180032e-05, + "loss": 1.0, "step": 16749 }, { - "epoch": 0.4753121452894438, + "epoch": 0.4746521578962283, "grad_norm": 0.0, - "learning_rate": 1.1282250243586118e-05, - "loss": 0.8516, + "learning_rate": 1.1303214535121181e-05, + "loss": 0.8895, "step": 16750 }, { - "epoch": 0.4753405221339387, + "epoch": 0.47468049533849077, "grad_norm": 0.0, - "learning_rate": 1.1281338747000998e-05, - "loss": 1.0669, + "learning_rate": 1.130230456746458e-05, + "loss": 0.9152, "step": 16751 }, { - "epoch": 0.4753688989784336, + "epoch": 0.4747088327807532, "grad_norm": 0.0, - "learning_rate": 1.1280427239592351e-05, - "loss": 0.7972, + "learning_rate": 1.130139458883818e-05, + "loss": 0.8912, "step": 16752 }, { - "epoch": 0.4753972758229285, + "epoch": 0.47473717022301565, "grad_norm": 0.0, - "learning_rate": 1.1279515721367875e-05, - "loss": 0.9016, + "learning_rate": 1.1300484599249656e-05, + "loss": 0.8191, "step": 16753 }, { - "epoch": 0.47542565266742337, + "epoch": 0.47476550766527814, "grad_norm": 0.0, - "learning_rate": 1.1278604192335273e-05, - "loss": 0.8084, + "learning_rate": 1.1299574598706663e-05, + "loss": 0.9166, "step": 16754 }, { - "epoch": 0.4754540295119183, + "epoch": 0.4747938451075406, "grad_norm": 0.0, - "learning_rate": 1.1277692652502246e-05, - "loss": 0.8394, + "learning_rate": 1.1298664587216877e-05, + "loss": 1.0305, "step": 16755 }, { - "epoch": 0.47548240635641315, + "epoch": 0.4748221825498031, "grad_norm": 0.0, - "learning_rate": 1.1276781101876488e-05, - "loss": 0.8997, + "learning_rate": 1.1297754564787952e-05, + "loss": 0.8947, "step": 16756 }, { - "epoch": 0.47551078320090806, + "epoch": 0.4748505199920655, "grad_norm": 0.0, - "learning_rate": 1.1275869540465705e-05, - "loss": 0.8837, + "learning_rate": 1.129684453142756e-05, + "loss": 0.9106, "step": 16757 }, { - "epoch": 0.475539160045403, + "epoch": 0.47487885743432795, "grad_norm": 0.0, - "learning_rate": 1.1274957968277593e-05, - "loss": 0.7972, + "learning_rate": 1.1295934487143364e-05, + "loss": 0.9711, "step": 16758 }, { - "epoch": 0.47556753688989784, + "epoch": 0.47490719487659044, "grad_norm": 0.0, - "learning_rate": 1.1274046385319857e-05, - "loss": 0.791, + "learning_rate": 1.1295024431943029e-05, + "loss": 0.9331, "step": 16759 }, { - "epoch": 0.47559591373439275, + "epoch": 0.4749355323188529, "grad_norm": 0.0, - "learning_rate": 1.127313479160019e-05, - "loss": 0.8325, + "learning_rate": 1.1294114365834225e-05, + "loss": 0.8518, "step": 16760 }, { - "epoch": 0.4756242905788876, + "epoch": 0.4749638697611154, "grad_norm": 0.0, - "learning_rate": 1.1272223187126298e-05, - "loss": 0.8442, + "learning_rate": 1.1293204288824615e-05, + "loss": 0.85, "step": 16761 }, { - "epoch": 0.4756526674233825, + "epoch": 0.4749922072033778, "grad_norm": 0.0, - "learning_rate": 1.1271311571905884e-05, - "loss": 0.8545, + "learning_rate": 1.1292294200921862e-05, + "loss": 0.8539, "step": 16762 }, { - "epoch": 0.47568104426787744, + "epoch": 0.4750205446456403, "grad_norm": 0.0, - "learning_rate": 1.1270399945946638e-05, - "loss": 0.8552, + "learning_rate": 1.1291384102133638e-05, + "loss": 0.8931, "step": 16763 }, { - "epoch": 0.4757094211123723, + "epoch": 0.47504888208790275, "grad_norm": 0.0, - "learning_rate": 1.1269488309256267e-05, - "loss": 0.9221, + "learning_rate": 1.1290473992467607e-05, + "loss": 0.9389, "step": 16764 }, { - "epoch": 0.4757377979568672, + "epoch": 0.4750772195301652, "grad_norm": 0.0, - "learning_rate": 1.1268576661842472e-05, - "loss": 0.9942, + "learning_rate": 1.128956387193143e-05, + "loss": 0.8317, "step": 16765 }, { - "epoch": 0.4757661748013621, + "epoch": 0.4751055569724277, "grad_norm": 0.0, - "learning_rate": 1.1267665003712953e-05, - "loss": 0.8158, + "learning_rate": 1.1288653740532782e-05, + "loss": 0.8922, "step": 16766 }, { - "epoch": 0.475794551645857, + "epoch": 0.4751338944146901, "grad_norm": 0.0, - "learning_rate": 1.1266753334875411e-05, - "loss": 1.0778, + "learning_rate": 1.1287743598279323e-05, + "loss": 1.0338, "step": 16767 }, { - "epoch": 0.47582292849035185, + "epoch": 0.4751622318569526, "grad_norm": 0.0, - "learning_rate": 1.1265841655337547e-05, + "learning_rate": 1.1286833445178722e-05, "loss": 0.8804, "step": 16768 }, { - "epoch": 0.47585130533484676, + "epoch": 0.47519056929921505, "grad_norm": 0.0, - "learning_rate": 1.1264929965107064e-05, - "loss": 0.8779, + "learning_rate": 1.1285923281238646e-05, + "loss": 0.8246, "step": 16769 }, { - "epoch": 0.4758796821793417, + "epoch": 0.4752189067414775, "grad_norm": 0.0, - "learning_rate": 1.1264018264191653e-05, - "loss": 0.8771, + "learning_rate": 1.128501310646676e-05, + "loss": 0.8601, "step": 16770 }, { - "epoch": 0.47590805902383654, + "epoch": 0.47524724418374, "grad_norm": 0.0, - "learning_rate": 1.126310655259903e-05, - "loss": 1.01, + "learning_rate": 1.128410292087073e-05, + "loss": 1.0277, "step": 16771 }, { - "epoch": 0.47593643586833145, + "epoch": 0.4752755816260024, "grad_norm": 0.0, - "learning_rate": 1.1262194830336888e-05, - "loss": 0.9235, + "learning_rate": 1.1283192724458225e-05, + "loss": 0.9296, "step": 16772 }, { - "epoch": 0.4759648127128263, + "epoch": 0.4753039190682649, "grad_norm": 0.0, - "learning_rate": 1.1261283097412928e-05, - "loss": 0.8202, + "learning_rate": 1.1282282517236913e-05, + "loss": 0.8132, "step": 16773 }, { - "epoch": 0.47599318955732123, + "epoch": 0.47533225651052735, "grad_norm": 0.0, - "learning_rate": 1.1260371353834851e-05, - "loss": 0.9378, + "learning_rate": 1.1281372299214457e-05, + "loss": 0.8506, "step": 16774 }, { - "epoch": 0.47602156640181614, + "epoch": 0.47536059395278984, "grad_norm": 0.0, - "learning_rate": 1.1259459599610365e-05, - "loss": 0.8671, + "learning_rate": 1.1280462070398529e-05, + "loss": 0.8385, "step": 16775 }, { - "epoch": 0.476049943246311, + "epoch": 0.4753889313950523, "grad_norm": 0.0, - "learning_rate": 1.1258547834747167e-05, - "loss": 0.9054, + "learning_rate": 1.1279551830796792e-05, + "loss": 0.828, "step": 16776 }, { - "epoch": 0.4760783200908059, + "epoch": 0.4754172688373147, "grad_norm": 0.0, - "learning_rate": 1.1257636059252954e-05, - "loss": 0.9002, + "learning_rate": 1.127864158041691e-05, + "loss": 0.9435, "step": 16777 }, { - "epoch": 0.4761066969353008, + "epoch": 0.4754456062795772, "grad_norm": 0.0, - "learning_rate": 1.1256724273135438e-05, - "loss": 0.9762, + "learning_rate": 1.1277731319266562e-05, + "loss": 0.8958, "step": 16778 }, { - "epoch": 0.4761350737797957, + "epoch": 0.47547394372183965, "grad_norm": 0.0, - "learning_rate": 1.1255812476402315e-05, - "loss": 0.913, + "learning_rate": 1.1276821047353403e-05, + "loss": 0.904, "step": 16779 }, { - "epoch": 0.47616345062429055, + "epoch": 0.47550228116410215, "grad_norm": 0.0, - "learning_rate": 1.1254900669061282e-05, - "loss": 0.8652, + "learning_rate": 1.1275910764685111e-05, + "loss": 0.8677, "step": 16780 }, { - "epoch": 0.47619182746878547, + "epoch": 0.4755306186063646, "grad_norm": 0.0, - "learning_rate": 1.125398885112005e-05, - "loss": 0.9182, + "learning_rate": 1.1275000471269348e-05, + "loss": 1.0218, "step": 16781 }, { - "epoch": 0.4762202043132804, + "epoch": 0.475558956048627, "grad_norm": 0.0, - "learning_rate": 1.1253077022586321e-05, - "loss": 0.9541, + "learning_rate": 1.1274090167113783e-05, + "loss": 0.9226, "step": 16782 }, { - "epoch": 0.47624858115777524, + "epoch": 0.4755872934908895, "grad_norm": 0.0, - "learning_rate": 1.1252165183467792e-05, - "loss": 0.9293, + "learning_rate": 1.127317985222608e-05, + "loss": 0.7647, "step": 16783 }, { - "epoch": 0.47627695800227016, + "epoch": 0.47561563093315196, "grad_norm": 0.0, - "learning_rate": 1.1251253333772166e-05, - "loss": 0.9076, + "learning_rate": 1.1272269526613913e-05, + "loss": 0.8717, "step": 16784 }, { - "epoch": 0.476305334846765, + "epoch": 0.47564396837541445, "grad_norm": 0.0, - "learning_rate": 1.1250341473507147e-05, - "loss": 0.9701, + "learning_rate": 1.1271359190284947e-05, + "loss": 0.854, "step": 16785 }, { - "epoch": 0.47633371169125993, + "epoch": 0.4756723058176769, "grad_norm": 0.0, - "learning_rate": 1.124942960268044e-05, - "loss": 0.9431, + "learning_rate": 1.127044884324685e-05, + "loss": 0.9551, "step": 16786 }, { - "epoch": 0.47636208853575485, + "epoch": 0.4757006432599394, "grad_norm": 0.0, - "learning_rate": 1.1248517721299741e-05, - "loss": 0.958, + "learning_rate": 1.126953848550729e-05, + "loss": 0.9221, "step": 16787 }, { - "epoch": 0.4763904653802497, + "epoch": 0.4757289807022018, "grad_norm": 0.0, - "learning_rate": 1.1247605829372758e-05, - "loss": 0.8732, + "learning_rate": 1.1268628117073939e-05, + "loss": 0.9178, "step": 16788 }, { - "epoch": 0.4764188422247446, + "epoch": 0.47575731814446426, "grad_norm": 0.0, - "learning_rate": 1.1246693926907193e-05, - "loss": 0.8649, + "learning_rate": 1.126771773795446e-05, + "loss": 1.0032, "step": 16789 }, { - "epoch": 0.4764472190692395, + "epoch": 0.47578565558672675, "grad_norm": 0.0, - "learning_rate": 1.1245782013910748e-05, - "loss": 0.8875, + "learning_rate": 1.1266807348156521e-05, + "loss": 0.9948, "step": 16790 }, { - "epoch": 0.4764755959137344, + "epoch": 0.4758139930289892, "grad_norm": 0.0, - "learning_rate": 1.1244870090391127e-05, - "loss": 0.8915, + "learning_rate": 1.1265896947687796e-05, + "loss": 0.9521, "step": 16791 }, { - "epoch": 0.4765039727582293, + "epoch": 0.4758423304712517, "grad_norm": 0.0, - "learning_rate": 1.1243958156356034e-05, - "loss": 1.0132, + "learning_rate": 1.1264986536555951e-05, + "loss": 0.9194, "step": 16792 }, { - "epoch": 0.47653234960272417, + "epoch": 0.4758706679135141, "grad_norm": 0.0, - "learning_rate": 1.1243046211813167e-05, - "loss": 0.8401, + "learning_rate": 1.1264076114768657e-05, + "loss": 0.9477, "step": 16793 }, { - "epoch": 0.4765607264472191, + "epoch": 0.47589900535577656, "grad_norm": 0.0, - "learning_rate": 1.1242134256770237e-05, - "loss": 0.917, + "learning_rate": 1.1263165682333577e-05, + "loss": 0.9617, "step": 16794 }, { - "epoch": 0.47658910329171394, + "epoch": 0.47592734279803905, "grad_norm": 0.0, - "learning_rate": 1.124122229123494e-05, - "loss": 0.9261, + "learning_rate": 1.1262255239258385e-05, + "loss": 0.9532, "step": 16795 }, { - "epoch": 0.47661748013620886, + "epoch": 0.4759556802403015, "grad_norm": 0.0, - "learning_rate": 1.1240310315214982e-05, - "loss": 0.8679, + "learning_rate": 1.1261344785550748e-05, + "loss": 0.8064, "step": 16796 }, { - "epoch": 0.4766458569807037, + "epoch": 0.475984017682564, "grad_norm": 0.0, - "learning_rate": 1.1239398328718068e-05, - "loss": 0.9491, + "learning_rate": 1.1260434321218334e-05, + "loss": 0.8564, "step": 16797 }, { - "epoch": 0.47667423382519863, + "epoch": 0.4760123551248264, "grad_norm": 0.0, - "learning_rate": 1.1238486331751902e-05, - "loss": 0.8006, + "learning_rate": 1.1259523846268816e-05, + "loss": 1.0193, "step": 16798 }, { - "epoch": 0.47670261066969355, + "epoch": 0.4760406925670889, "grad_norm": 0.0, - "learning_rate": 1.1237574324324185e-05, - "loss": 0.8843, + "learning_rate": 1.125861336070986e-05, + "loss": 0.9343, "step": 16799 }, { - "epoch": 0.4767309875141884, + "epoch": 0.47606903000935136, "grad_norm": 0.0, - "learning_rate": 1.123666230644262e-05, - "loss": 0.9138, + "learning_rate": 1.1257702864549134e-05, + "loss": 0.892, "step": 16800 }, { - "epoch": 0.4767593643586833, + "epoch": 0.4760973674516138, "grad_norm": 0.0, - "learning_rate": 1.1235750278114916e-05, - "loss": 0.9373, + "learning_rate": 1.1256792357794313e-05, + "loss": 0.8446, "step": 16801 }, { - "epoch": 0.4767877412031782, + "epoch": 0.4761257048938763, "grad_norm": 0.0, - "learning_rate": 1.1234838239348773e-05, - "loss": 0.9207, + "learning_rate": 1.1255881840453065e-05, + "loss": 0.9618, "step": 16802 }, { - "epoch": 0.4768161180476731, + "epoch": 0.4761540423361387, "grad_norm": 0.0, - "learning_rate": 1.1233926190151896e-05, - "loss": 0.9137, + "learning_rate": 1.1254971312533052e-05, + "loss": 1.0222, "step": 16803 }, { - "epoch": 0.476844494892168, + "epoch": 0.4761823797784012, "grad_norm": 0.0, - "learning_rate": 1.1233014130531988e-05, - "loss": 0.8762, + "learning_rate": 1.1254060774041953e-05, + "loss": 0.9083, "step": 16804 }, { - "epoch": 0.4768728717366629, + "epoch": 0.47621071722066366, "grad_norm": 0.0, - "learning_rate": 1.1232102060496754e-05, - "loss": 0.9139, + "learning_rate": 1.1253150224987435e-05, + "loss": 0.8802, "step": 16805 }, { - "epoch": 0.4769012485811578, + "epoch": 0.4762390546629261, "grad_norm": 0.0, - "learning_rate": 1.12311899800539e-05, - "loss": 0.8311, + "learning_rate": 1.1252239665377167e-05, + "loss": 0.9542, "step": 16806 }, { - "epoch": 0.47692962542565265, + "epoch": 0.4762673921051886, "grad_norm": 0.0, - "learning_rate": 1.1230277889211128e-05, - "loss": 0.8257, + "learning_rate": 1.1251329095218819e-05, + "loss": 0.9464, "step": 16807 }, { - "epoch": 0.47695800227014756, + "epoch": 0.47629572954745103, "grad_norm": 0.0, - "learning_rate": 1.1229365787976145e-05, - "loss": 0.9075, + "learning_rate": 1.1250418514520061e-05, + "loss": 0.766, "step": 16808 }, { - "epoch": 0.4769863791146425, + "epoch": 0.4763240669897135, "grad_norm": 0.0, - "learning_rate": 1.1228453676356653e-05, - "loss": 0.8843, + "learning_rate": 1.1249507923288563e-05, + "loss": 0.877, "step": 16809 }, { - "epoch": 0.47701475595913734, + "epoch": 0.47635240443197596, "grad_norm": 0.0, - "learning_rate": 1.1227541554360354e-05, - "loss": 0.9007, + "learning_rate": 1.1248597321531995e-05, + "loss": 0.8974, "step": 16810 }, { - "epoch": 0.47704313280363225, + "epoch": 0.47638074187423846, "grad_norm": 0.0, - "learning_rate": 1.122662942199496e-05, - "loss": 0.9096, + "learning_rate": 1.124768670925803e-05, + "loss": 0.8366, "step": 16811 }, { - "epoch": 0.4770715096481271, + "epoch": 0.4764090793165009, "grad_norm": 0.0, - "learning_rate": 1.1225717279268174e-05, - "loss": 0.8689, + "learning_rate": 1.1246776086474335e-05, + "loss": 0.8705, "step": 16812 }, { - "epoch": 0.477099886492622, + "epoch": 0.47643741675876333, "grad_norm": 0.0, - "learning_rate": 1.1224805126187697e-05, - "loss": 0.9122, + "learning_rate": 1.1245865453188584e-05, + "loss": 0.977, "step": 16813 }, { - "epoch": 0.4771282633371169, + "epoch": 0.4764657542010258, "grad_norm": 0.0, - "learning_rate": 1.1223892962761233e-05, - "loss": 0.9139, + "learning_rate": 1.1244954809408446e-05, + "loss": 0.8973, "step": 16814 }, { - "epoch": 0.4771566401816118, + "epoch": 0.47649409164328826, "grad_norm": 0.0, - "learning_rate": 1.1222980788996494e-05, - "loss": 0.9296, + "learning_rate": 1.124404415514159e-05, + "loss": 0.8725, "step": 16815 }, { - "epoch": 0.4771850170261067, + "epoch": 0.47652242908555076, "grad_norm": 0.0, - "learning_rate": 1.1222068604901181e-05, - "loss": 0.8801, + "learning_rate": 1.1243133490395687e-05, + "loss": 0.8711, "step": 16816 }, { - "epoch": 0.4772133938706016, + "epoch": 0.4765507665278132, "grad_norm": 0.0, - "learning_rate": 1.1221156410482998e-05, - "loss": 0.9968, + "learning_rate": 1.1242222815178409e-05, + "loss": 0.9965, "step": 16817 }, { - "epoch": 0.4772417707150965, + "epoch": 0.47657910397007563, "grad_norm": 0.0, - "learning_rate": 1.1220244205749653e-05, - "loss": 0.813, + "learning_rate": 1.124131212949743e-05, + "loss": 0.9111, "step": 16818 }, { - "epoch": 0.47727014755959135, + "epoch": 0.47660744141233813, "grad_norm": 0.0, - "learning_rate": 1.1219331990708849e-05, - "loss": 0.9901, + "learning_rate": 1.1240401433360417e-05, + "loss": 0.9383, "step": 16819 }, { - "epoch": 0.47729852440408627, + "epoch": 0.47663577885460057, "grad_norm": 0.0, - "learning_rate": 1.1218419765368296e-05, - "loss": 0.8441, + "learning_rate": 1.123949072677504e-05, + "loss": 0.9226, "step": 16820 }, { - "epoch": 0.4773269012485812, + "epoch": 0.47666411629686306, "grad_norm": 0.0, - "learning_rate": 1.1217507529735692e-05, - "loss": 0.901, + "learning_rate": 1.1238580009748975e-05, + "loss": 0.8997, "step": 16821 }, { - "epoch": 0.47735527809307604, + "epoch": 0.4766924537391255, "grad_norm": 0.0, - "learning_rate": 1.121659528381875e-05, - "loss": 0.9273, + "learning_rate": 1.1237669282289889e-05, + "loss": 0.9179, "step": 16822 }, { - "epoch": 0.47738365493757096, + "epoch": 0.476720791181388, "grad_norm": 0.0, - "learning_rate": 1.1215683027625173e-05, - "loss": 0.9398, + "learning_rate": 1.1236758544405454e-05, + "loss": 0.8842, "step": 16823 }, { - "epoch": 0.4774120317820658, + "epoch": 0.47674912862365043, "grad_norm": 0.0, - "learning_rate": 1.1214770761162666e-05, - "loss": 0.8288, + "learning_rate": 1.1235847796103345e-05, + "loss": 0.8867, "step": 16824 }, { - "epoch": 0.47744040862656073, + "epoch": 0.47677746606591287, "grad_norm": 0.0, - "learning_rate": 1.1213858484438935e-05, - "loss": 0.96, + "learning_rate": 1.1234937037391227e-05, + "loss": 0.8471, "step": 16825 }, { - "epoch": 0.47746878547105565, + "epoch": 0.47680580350817536, "grad_norm": 0.0, - "learning_rate": 1.1212946197461688e-05, - "loss": 0.9634, + "learning_rate": 1.123402626827678e-05, + "loss": 0.938, "step": 16826 }, { - "epoch": 0.4774971623155505, + "epoch": 0.4768341409504378, "grad_norm": 0.0, - "learning_rate": 1.121203390023863e-05, - "loss": 0.8717, + "learning_rate": 1.1233115488767672e-05, + "loss": 0.9326, "step": 16827 }, { - "epoch": 0.4775255391600454, + "epoch": 0.4768624783927003, "grad_norm": 0.0, - "learning_rate": 1.1211121592777466e-05, - "loss": 1.0235, + "learning_rate": 1.1232204698871572e-05, + "loss": 0.9019, "step": 16828 }, { - "epoch": 0.4775539160045403, + "epoch": 0.47689081583496273, "grad_norm": 0.0, - "learning_rate": 1.1210209275085906e-05, - "loss": 0.8689, + "learning_rate": 1.1231293898596154e-05, + "loss": 0.9343, "step": 16829 }, { - "epoch": 0.4775822928490352, + "epoch": 0.47691915327722517, "grad_norm": 0.0, - "learning_rate": 1.1209296947171653e-05, - "loss": 0.9108, + "learning_rate": 1.123038308794909e-05, + "loss": 0.8741, "step": 16830 }, { - "epoch": 0.47761066969353005, + "epoch": 0.47694749071948767, "grad_norm": 0.0, - "learning_rate": 1.120838460904241e-05, - "loss": 0.9398, + "learning_rate": 1.1229472266938052e-05, + "loss": 0.9242, "step": 16831 }, { - "epoch": 0.47763904653802497, + "epoch": 0.4769758281617501, "grad_norm": 0.0, - "learning_rate": 1.1207472260705893e-05, - "loss": 0.9674, + "learning_rate": 1.1228561435570718e-05, + "loss": 1.0056, "step": 16832 }, { - "epoch": 0.4776674233825199, + "epoch": 0.4770041656040126, "grad_norm": 0.0, - "learning_rate": 1.1206559902169805e-05, - "loss": 0.8752, + "learning_rate": 1.122765059385475e-05, + "loss": 0.9603, "step": 16833 }, { - "epoch": 0.47769580022701474, + "epoch": 0.47703250304627504, "grad_norm": 0.0, - "learning_rate": 1.1205647533441843e-05, - "loss": 0.7673, + "learning_rate": 1.1226739741797825e-05, + "loss": 0.8421, "step": 16834 }, { - "epoch": 0.47772417707150966, + "epoch": 0.47706084048853753, "grad_norm": 0.0, - "learning_rate": 1.1204735154529728e-05, - "loss": 0.7841, + "learning_rate": 1.1225828879407617e-05, + "loss": 0.9133, "step": 16835 }, { - "epoch": 0.4777525539160045, + "epoch": 0.47708917793079997, "grad_norm": 0.0, - "learning_rate": 1.1203822765441162e-05, - "loss": 0.8473, + "learning_rate": 1.1224918006691793e-05, + "loss": 0.8817, "step": 16836 }, { - "epoch": 0.47778093076049943, + "epoch": 0.4771175153730624, "grad_norm": 0.0, - "learning_rate": 1.120291036618385e-05, - "loss": 0.9645, + "learning_rate": 1.1224007123658034e-05, + "loss": 0.8252, "step": 16837 }, { - "epoch": 0.47780930760499435, + "epoch": 0.4771458528153249, "grad_norm": 0.0, - "learning_rate": 1.1201997956765499e-05, - "loss": 0.9067, + "learning_rate": 1.1223096230314008e-05, + "loss": 0.8705, "step": 16838 }, { - "epoch": 0.4778376844494892, + "epoch": 0.47717419025758734, "grad_norm": 0.0, - "learning_rate": 1.1201085537193817e-05, - "loss": 0.9037, + "learning_rate": 1.122218532666739e-05, + "loss": 0.9104, "step": 16839 }, { - "epoch": 0.4778660612939841, + "epoch": 0.47720252769984983, "grad_norm": 0.0, - "learning_rate": 1.1200173107476514e-05, - "loss": 0.8487, + "learning_rate": 1.122127441272585e-05, + "loss": 0.8453, "step": 16840 }, { - "epoch": 0.477894438138479, + "epoch": 0.47723086514211227, "grad_norm": 0.0, - "learning_rate": 1.119926066762129e-05, - "loss": 0.8269, + "learning_rate": 1.1220363488497059e-05, + "loss": 0.9261, "step": 16841 }, { - "epoch": 0.4779228149829739, + "epoch": 0.4772592025843747, "grad_norm": 0.0, - "learning_rate": 1.119834821763586e-05, - "loss": 0.9212, + "learning_rate": 1.1219452553988696e-05, + "loss": 0.8713, "step": 16842 }, { - "epoch": 0.4779511918274688, + "epoch": 0.4772875400266372, "grad_norm": 0.0, - "learning_rate": 1.119743575752793e-05, - "loss": 0.965, + "learning_rate": 1.121854160920843e-05, + "loss": 0.8921, "step": 16843 }, { - "epoch": 0.4779795686719637, + "epoch": 0.47731587746889964, "grad_norm": 0.0, - "learning_rate": 1.1196523287305204e-05, - "loss": 0.8436, + "learning_rate": 1.1217630654163938e-05, + "loss": 0.8433, "step": 16844 }, { - "epoch": 0.4780079455164586, + "epoch": 0.47734421491116213, "grad_norm": 0.0, - "learning_rate": 1.1195610806975394e-05, - "loss": 0.8721, + "learning_rate": 1.1216719688862888e-05, + "loss": 0.803, "step": 16845 }, { - "epoch": 0.47803632236095345, + "epoch": 0.4773725523534246, "grad_norm": 0.0, - "learning_rate": 1.1194698316546205e-05, - "loss": 0.8376, + "learning_rate": 1.121580871331296e-05, + "loss": 0.8538, "step": 16846 }, { - "epoch": 0.47806469920544836, + "epoch": 0.47740088979568707, "grad_norm": 0.0, - "learning_rate": 1.1193785816025345e-05, - "loss": 0.7803, + "learning_rate": 1.1214897727521821e-05, + "loss": 0.8432, "step": 16847 }, { - "epoch": 0.4780930760499432, + "epoch": 0.4774292272379495, "grad_norm": 0.0, - "learning_rate": 1.1192873305420525e-05, - "loss": 0.89, + "learning_rate": 1.1213986731497146e-05, + "loss": 0.9226, "step": 16848 }, { - "epoch": 0.47812145289443814, + "epoch": 0.47745756468021194, "grad_norm": 0.0, - "learning_rate": 1.1191960784739447e-05, - "loss": 0.8228, + "learning_rate": 1.1213075725246612e-05, + "loss": 0.9594, "step": 16849 }, { - "epoch": 0.47814982973893305, + "epoch": 0.47748590212247444, "grad_norm": 0.0, - "learning_rate": 1.1191048253989825e-05, - "loss": 0.8414, + "learning_rate": 1.1212164708777889e-05, + "loss": 0.8029, "step": 16850 }, { - "epoch": 0.4781782065834279, + "epoch": 0.4775142395647369, "grad_norm": 0.0, - "learning_rate": 1.1190135713179362e-05, - "loss": 0.8082, + "learning_rate": 1.1211253682098653e-05, + "loss": 0.9169, "step": 16851 }, { - "epoch": 0.4782065834279228, + "epoch": 0.47754257700699937, "grad_norm": 0.0, - "learning_rate": 1.1189223162315773e-05, - "loss": 0.9106, + "learning_rate": 1.1210342645216578e-05, + "loss": 1.0145, "step": 16852 }, { - "epoch": 0.4782349602724177, + "epoch": 0.4775709144492618, "grad_norm": 0.0, - "learning_rate": 1.118831060140676e-05, - "loss": 0.9499, + "learning_rate": 1.120943159813934e-05, + "loss": 0.9758, "step": 16853 }, { - "epoch": 0.4782633371169126, + "epoch": 0.47759925189152425, "grad_norm": 0.0, - "learning_rate": 1.1187398030460035e-05, - "loss": 0.8838, + "learning_rate": 1.1208520540874607e-05, + "loss": 0.9624, "step": 16854 }, { - "epoch": 0.4782917139614075, + "epoch": 0.47762758933378674, "grad_norm": 0.0, - "learning_rate": 1.1186485449483306e-05, - "loss": 0.8732, + "learning_rate": 1.1207609473430059e-05, + "loss": 0.9109, "step": 16855 }, { - "epoch": 0.4783200908059024, + "epoch": 0.4776559267760492, "grad_norm": 0.0, - "learning_rate": 1.1185572858484282e-05, - "loss": 0.9211, + "learning_rate": 1.1206698395813365e-05, + "loss": 0.849, "step": 16856 }, { - "epoch": 0.4783484676503973, + "epoch": 0.47768426421831167, "grad_norm": 0.0, - "learning_rate": 1.1184660257470666e-05, - "loss": 0.7862, + "learning_rate": 1.1205787308032205e-05, + "loss": 0.9086, "step": 16857 }, { - "epoch": 0.47837684449489215, + "epoch": 0.4777126016605741, "grad_norm": 0.0, - "learning_rate": 1.1183747646450173e-05, - "loss": 0.7307, + "learning_rate": 1.1204876210094248e-05, + "loss": 0.9687, "step": 16858 }, { - "epoch": 0.47840522133938707, + "epoch": 0.4777409391028366, "grad_norm": 0.0, - "learning_rate": 1.1182835025430514e-05, - "loss": 1.0201, + "learning_rate": 1.1203965102007176e-05, + "loss": 0.8574, "step": 16859 }, { - "epoch": 0.4784335981838819, + "epoch": 0.47776927654509904, "grad_norm": 0.0, - "learning_rate": 1.1181922394419393e-05, - "loss": 0.8629, + "learning_rate": 1.1203053983778655e-05, + "loss": 0.9717, "step": 16860 }, { - "epoch": 0.47846197502837684, + "epoch": 0.4777976139873615, "grad_norm": 0.0, - "learning_rate": 1.1181009753424519e-05, - "loss": 0.7769, + "learning_rate": 1.1202142855416365e-05, + "loss": 0.9863, "step": 16861 }, { - "epoch": 0.47849035187287176, + "epoch": 0.477825951429624, "grad_norm": 0.0, - "learning_rate": 1.1180097102453604e-05, - "loss": 0.9625, + "learning_rate": 1.1201231716927979e-05, + "loss": 0.8576, "step": 16862 }, { - "epoch": 0.4785187287173666, + "epoch": 0.4778542888718864, "grad_norm": 0.0, - "learning_rate": 1.1179184441514354e-05, - "loss": 0.9376, + "learning_rate": 1.120032056832117e-05, + "loss": 0.8726, "step": 16863 }, { - "epoch": 0.47854710556186153, + "epoch": 0.4778826263141489, "grad_norm": 0.0, - "learning_rate": 1.1178271770614482e-05, - "loss": 0.9147, + "learning_rate": 1.1199409409603618e-05, + "loss": 0.9093, "step": 16864 }, { - "epoch": 0.4785754824063564, + "epoch": 0.47791096375641134, "grad_norm": 0.0, - "learning_rate": 1.1177359089761693e-05, - "loss": 0.9342, + "learning_rate": 1.1198498240782996e-05, + "loss": 0.9316, "step": 16865 }, { - "epoch": 0.4786038592508513, + "epoch": 0.4779393011986738, "grad_norm": 0.0, - "learning_rate": 1.11764463989637e-05, - "loss": 0.9307, + "learning_rate": 1.1197587061866975e-05, + "loss": 0.8649, "step": 16866 }, { - "epoch": 0.4786322360953462, + "epoch": 0.4779676386409363, "grad_norm": 0.0, - "learning_rate": 1.1175533698228214e-05, - "loss": 0.7562, + "learning_rate": 1.1196675872863235e-05, + "loss": 0.8887, "step": 16867 }, { - "epoch": 0.4786606129398411, + "epoch": 0.4779959760831987, "grad_norm": 0.0, - "learning_rate": 1.1174620987562936e-05, - "loss": 0.918, + "learning_rate": 1.119576467377945e-05, + "loss": 0.8893, "step": 16868 }, { - "epoch": 0.478688989784336, + "epoch": 0.4780243135254612, "grad_norm": 0.0, - "learning_rate": 1.1173708266975588e-05, - "loss": 0.9679, + "learning_rate": 1.1194853464623294e-05, + "loss": 0.9022, "step": 16869 }, { - "epoch": 0.47871736662883085, + "epoch": 0.47805265096772365, "grad_norm": 0.0, - "learning_rate": 1.117279553647387e-05, - "loss": 0.7333, + "learning_rate": 1.1193942245402443e-05, + "loss": 0.8229, "step": 16870 }, { - "epoch": 0.47874574347332577, + "epoch": 0.47808098840998614, "grad_norm": 0.0, - "learning_rate": 1.1171882796065496e-05, - "loss": 0.9415, + "learning_rate": 1.1193031016124576e-05, + "loss": 0.9461, "step": 16871 }, { - "epoch": 0.4787741203178207, + "epoch": 0.4781093258522486, "grad_norm": 0.0, - "learning_rate": 1.1170970045758173e-05, - "loss": 0.892, + "learning_rate": 1.1192119776797366e-05, + "loss": 0.9043, "step": 16872 }, { - "epoch": 0.47880249716231554, + "epoch": 0.478137663294511, "grad_norm": 0.0, - "learning_rate": 1.1170057285559619e-05, - "loss": 0.8511, + "learning_rate": 1.1191208527428488e-05, + "loss": 0.9422, "step": 16873 }, { - "epoch": 0.47883087400681046, + "epoch": 0.4781660007367735, "grad_norm": 0.0, - "learning_rate": 1.1169144515477536e-05, - "loss": 0.9828, + "learning_rate": 1.1190297268025614e-05, + "loss": 0.9201, "step": 16874 }, { - "epoch": 0.4788592508513053, + "epoch": 0.47819433817903595, "grad_norm": 0.0, - "learning_rate": 1.1168231735519634e-05, - "loss": 0.9202, + "learning_rate": 1.1189385998596429e-05, + "loss": 0.9061, "step": 16875 }, { - "epoch": 0.47888762769580023, + "epoch": 0.47822267562129844, "grad_norm": 0.0, - "learning_rate": 1.116731894569363e-05, - "loss": 0.8531, + "learning_rate": 1.1188474719148601e-05, + "loss": 0.9852, "step": 16876 }, { - "epoch": 0.4789160045402951, + "epoch": 0.4782510130635609, "grad_norm": 0.0, - "learning_rate": 1.116640614600723e-05, - "loss": 0.856, + "learning_rate": 1.1187563429689809e-05, + "loss": 1.0067, "step": 16877 }, { - "epoch": 0.47894438138479, + "epoch": 0.4782793505058233, "grad_norm": 0.0, - "learning_rate": 1.1165493336468142e-05, - "loss": 0.844, + "learning_rate": 1.1186652130227734e-05, + "loss": 0.8394, "step": 16878 }, { - "epoch": 0.4789727582292849, + "epoch": 0.4783076879480858, "grad_norm": 0.0, - "learning_rate": 1.116458051708408e-05, - "loss": 0.975, + "learning_rate": 1.1185740820770042e-05, + "loss": 0.9331, "step": 16879 }, { - "epoch": 0.4790011350737798, + "epoch": 0.47833602539034825, "grad_norm": 0.0, - "learning_rate": 1.1163667687862755e-05, - "loss": 0.8423, + "learning_rate": 1.1184829501324416e-05, + "loss": 0.9074, "step": 16880 }, { - "epoch": 0.4790295119182747, + "epoch": 0.47836436283261075, "grad_norm": 0.0, - "learning_rate": 1.116275484881188e-05, - "loss": 0.8832, + "learning_rate": 1.118391817189853e-05, + "loss": 0.9812, "step": 16881 }, { - "epoch": 0.47905788876276956, + "epoch": 0.4783927002748732, "grad_norm": 0.0, - "learning_rate": 1.1161841999939156e-05, - "loss": 0.8421, + "learning_rate": 1.1183006832500065e-05, + "loss": 0.8822, "step": 16882 }, { - "epoch": 0.47908626560726447, + "epoch": 0.4784210377171357, "grad_norm": 0.0, - "learning_rate": 1.1160929141252303e-05, - "loss": 0.9282, + "learning_rate": 1.1182095483136692e-05, + "loss": 0.9846, "step": 16883 }, { - "epoch": 0.4791146424517594, + "epoch": 0.4784493751593981, "grad_norm": 0.0, - "learning_rate": 1.1160016272759031e-05, - "loss": 0.8816, + "learning_rate": 1.1181184123816092e-05, + "loss": 0.9463, "step": 16884 }, { - "epoch": 0.47914301929625425, + "epoch": 0.47847771260166055, "grad_norm": 0.0, - "learning_rate": 1.1159103394467048e-05, - "loss": 0.9385, + "learning_rate": 1.1180272754545939e-05, + "loss": 0.8222, "step": 16885 }, { - "epoch": 0.47917139614074916, + "epoch": 0.47850605004392305, "grad_norm": 0.0, - "learning_rate": 1.1158190506384069e-05, - "loss": 0.8848, + "learning_rate": 1.1179361375333907e-05, + "loss": 0.8639, "step": 16886 }, { - "epoch": 0.479199772985244, + "epoch": 0.4785343874861855, "grad_norm": 0.0, - "learning_rate": 1.11572776085178e-05, - "loss": 0.8617, + "learning_rate": 1.1178449986187679e-05, + "loss": 0.9442, "step": 16887 }, { - "epoch": 0.47922814982973894, + "epoch": 0.478562724928448, "grad_norm": 0.0, - "learning_rate": 1.1156364700875952e-05, - "loss": 0.8609, + "learning_rate": 1.1177538587114926e-05, + "loss": 0.8234, "step": 16888 }, { - "epoch": 0.47925652667423385, + "epoch": 0.4785910623707104, "grad_norm": 0.0, - "learning_rate": 1.1155451783466244e-05, - "loss": 0.9055, + "learning_rate": 1.1176627178123332e-05, + "loss": 0.8231, "step": 16889 }, { - "epoch": 0.4792849035187287, + "epoch": 0.47861939981297286, "grad_norm": 0.0, - "learning_rate": 1.1154538856296381e-05, - "loss": 0.9209, + "learning_rate": 1.117571575922057e-05, + "loss": 0.9078, "step": 16890 }, { - "epoch": 0.4793132803632236, + "epoch": 0.47864773725523535, "grad_norm": 0.0, - "learning_rate": 1.115362591937408e-05, - "loss": 0.9554, + "learning_rate": 1.1174804330414315e-05, + "loss": 0.9211, "step": 16891 }, { - "epoch": 0.4793416572077185, + "epoch": 0.4786760746974978, "grad_norm": 0.0, - "learning_rate": 1.1152712972707045e-05, - "loss": 0.8759, + "learning_rate": 1.1173892891712251e-05, + "loss": 0.9332, "step": 16892 }, { - "epoch": 0.4793700340522134, + "epoch": 0.4787044121397603, "grad_norm": 0.0, - "learning_rate": 1.1151800016302994e-05, - "loss": 0.8621, + "learning_rate": 1.1172981443122048e-05, + "loss": 0.8226, "step": 16893 }, { - "epoch": 0.47939841089670826, + "epoch": 0.4787327495820227, "grad_norm": 0.0, - "learning_rate": 1.1150887050169636e-05, - "loss": 0.9259, + "learning_rate": 1.1172069984651388e-05, + "loss": 0.9937, "step": 16894 }, { - "epoch": 0.4794267877412032, + "epoch": 0.4787610870242852, "grad_norm": 0.0, - "learning_rate": 1.1149974074314678e-05, - "loss": 1.0403, + "learning_rate": 1.1171158516307944e-05, + "loss": 0.9545, "step": 16895 }, { - "epoch": 0.4794551645856981, + "epoch": 0.47878942446654765, "grad_norm": 0.0, - "learning_rate": 1.1149061088745842e-05, - "loss": 0.9417, + "learning_rate": 1.1170247038099402e-05, + "loss": 0.8156, "step": 16896 }, { - "epoch": 0.47948354143019295, + "epoch": 0.4788177619088101, "grad_norm": 0.0, - "learning_rate": 1.1148148093470838e-05, - "loss": 0.9515, + "learning_rate": 1.1169335550033434e-05, + "loss": 0.9434, "step": 16897 }, { - "epoch": 0.47951191827468786, + "epoch": 0.4788460993510726, "grad_norm": 0.0, - "learning_rate": 1.1147235088497372e-05, - "loss": 1.0031, + "learning_rate": 1.1168424052117717e-05, + "loss": 0.8502, "step": 16898 }, { - "epoch": 0.4795402951191827, + "epoch": 0.478874436793335, "grad_norm": 0.0, - "learning_rate": 1.114632207383316e-05, - "loss": 0.938, + "learning_rate": 1.1167512544359929e-05, + "loss": 0.8835, "step": 16899 }, { - "epoch": 0.47956867196367764, + "epoch": 0.4789027742355975, "grad_norm": 0.0, - "learning_rate": 1.1145409049485916e-05, - "loss": 0.8475, + "learning_rate": 1.1166601026767749e-05, + "loss": 0.9721, "step": 16900 }, { - "epoch": 0.47959704880817255, + "epoch": 0.47893111167785996, "grad_norm": 0.0, - "learning_rate": 1.1144496015463346e-05, - "loss": 0.8655, + "learning_rate": 1.1165689499348857e-05, + "loss": 0.9033, "step": 16901 }, { - "epoch": 0.4796254256526674, + "epoch": 0.4789594491201224, "grad_norm": 0.0, - "learning_rate": 1.1143582971773172e-05, - "loss": 0.7908, + "learning_rate": 1.1164777962110929e-05, + "loss": 0.7971, "step": 16902 }, { - "epoch": 0.47965380249716233, + "epoch": 0.4789877865623849, "grad_norm": 0.0, - "learning_rate": 1.1142669918423098e-05, - "loss": 0.8641, + "learning_rate": 1.1163866415061643e-05, + "loss": 0.8769, "step": 16903 }, { - "epoch": 0.4796821793416572, + "epoch": 0.4790161240046473, "grad_norm": 0.0, - "learning_rate": 1.114175685542084e-05, - "loss": 0.8943, + "learning_rate": 1.1162954858208682e-05, + "loss": 0.8651, "step": 16904 }, { - "epoch": 0.4797105561861521, + "epoch": 0.4790444614469098, "grad_norm": 0.0, - "learning_rate": 1.1140843782774108e-05, - "loss": 0.8235, + "learning_rate": 1.1162043291559716e-05, + "loss": 0.9728, "step": 16905 }, { - "epoch": 0.479738933030647, + "epoch": 0.47907279888917226, "grad_norm": 0.0, - "learning_rate": 1.1139930700490622e-05, - "loss": 0.8591, + "learning_rate": 1.1161131715122432e-05, + "loss": 0.8882, "step": 16906 }, { - "epoch": 0.4797673098751419, + "epoch": 0.47910113633143475, "grad_norm": 0.0, - "learning_rate": 1.1139017608578088e-05, - "loss": 0.8871, + "learning_rate": 1.1160220128904498e-05, + "loss": 0.8475, "step": 16907 }, { - "epoch": 0.4797956867196368, + "epoch": 0.4791294737736972, "grad_norm": 0.0, - "learning_rate": 1.1138104507044222e-05, - "loss": 0.8785, + "learning_rate": 1.1159308532913601e-05, + "loss": 0.8837, "step": 16908 }, { - "epoch": 0.47982406356413165, + "epoch": 0.47915781121595963, "grad_norm": 0.0, - "learning_rate": 1.1137191395896736e-05, - "loss": 0.8146, + "learning_rate": 1.115839692715742e-05, + "loss": 0.8534, "step": 16909 }, { - "epoch": 0.47985244040862657, + "epoch": 0.4791861486582221, "grad_norm": 0.0, - "learning_rate": 1.1136278275143344e-05, - "loss": 0.9185, + "learning_rate": 1.1157485311643632e-05, + "loss": 0.8678, "step": 16910 }, { - "epoch": 0.47988081725312143, + "epoch": 0.47921448610048456, "grad_norm": 0.0, - "learning_rate": 1.1135365144791754e-05, - "loss": 0.8655, + "learning_rate": 1.1156573686379915e-05, + "loss": 0.9646, "step": 16911 }, { - "epoch": 0.47990919409761634, + "epoch": 0.47924282354274705, "grad_norm": 0.0, - "learning_rate": 1.1134452004849687e-05, - "loss": 0.9684, + "learning_rate": 1.1155662051373946e-05, + "loss": 0.9642, "step": 16912 }, { - "epoch": 0.47993757094211126, + "epoch": 0.4792711609850095, "grad_norm": 0.0, - "learning_rate": 1.1133538855324853e-05, - "loss": 0.8155, + "learning_rate": 1.115475040663341e-05, + "loss": 0.769, "step": 16913 }, { - "epoch": 0.4799659477866061, + "epoch": 0.47929949842727193, "grad_norm": 0.0, - "learning_rate": 1.1132625696224966e-05, - "loss": 0.8752, + "learning_rate": 1.115383875216598e-05, + "loss": 0.9793, "step": 16914 }, { - "epoch": 0.47999432463110103, + "epoch": 0.4793278358695344, "grad_norm": 0.0, - "learning_rate": 1.1131712527557737e-05, - "loss": 0.8943, + "learning_rate": 1.1152927087979337e-05, + "loss": 0.9928, "step": 16915 }, { - "epoch": 0.4800227014755959, + "epoch": 0.47935617331179686, "grad_norm": 0.0, - "learning_rate": 1.1130799349330881e-05, - "loss": 0.9028, + "learning_rate": 1.115201541408116e-05, + "loss": 0.9148, "step": 16916 }, { - "epoch": 0.4800510783200908, + "epoch": 0.47938451075405936, "grad_norm": 0.0, - "learning_rate": 1.1129886161552116e-05, - "loss": 0.9135, + "learning_rate": 1.1151103730479134e-05, + "loss": 0.9547, "step": 16917 }, { - "epoch": 0.4800794551645857, + "epoch": 0.4794128481963218, "grad_norm": 0.0, - "learning_rate": 1.1128972964229146e-05, - "loss": 0.9746, + "learning_rate": 1.1150192037180932e-05, + "loss": 0.9595, "step": 16918 }, { - "epoch": 0.4801078320090806, + "epoch": 0.4794411856385843, "grad_norm": 0.0, - "learning_rate": 1.112805975736969e-05, - "loss": 0.9939, + "learning_rate": 1.1149280334194238e-05, + "loss": 0.8333, "step": 16919 }, { - "epoch": 0.4801362088535755, + "epoch": 0.4794695230808467, "grad_norm": 0.0, - "learning_rate": 1.1127146540981468e-05, - "loss": 0.9041, + "learning_rate": 1.1148368621526721e-05, + "loss": 0.9108, "step": 16920 }, { - "epoch": 0.48016458569807036, + "epoch": 0.47949786052310917, "grad_norm": 0.0, - "learning_rate": 1.1126233315072188e-05, - "loss": 0.8648, + "learning_rate": 1.1147456899186073e-05, + "loss": 0.955, "step": 16921 }, { - "epoch": 0.48019296254256527, + "epoch": 0.47952619796537166, "grad_norm": 0.0, - "learning_rate": 1.1125320079649562e-05, - "loss": 0.9039, + "learning_rate": 1.1146545167179972e-05, + "loss": 1.0167, "step": 16922 }, { - "epoch": 0.4802213393870602, + "epoch": 0.4795545354076341, "grad_norm": 0.0, - "learning_rate": 1.112440683472131e-05, - "loss": 0.8194, + "learning_rate": 1.1145633425516094e-05, + "loss": 0.8796, "step": 16923 }, { - "epoch": 0.48024971623155505, + "epoch": 0.4795828728498966, "grad_norm": 0.0, - "learning_rate": 1.1123493580295142e-05, - "loss": 0.881, + "learning_rate": 1.1144721674202116e-05, + "loss": 0.9585, "step": 16924 }, { - "epoch": 0.48027809307604996, + "epoch": 0.47961121029215903, "grad_norm": 0.0, - "learning_rate": 1.1122580316378769e-05, - "loss": 0.8829, + "learning_rate": 1.1143809913245727e-05, + "loss": 0.8071, "step": 16925 }, { - "epoch": 0.4803064699205448, + "epoch": 0.47963954773442147, "grad_norm": 0.0, - "learning_rate": 1.112166704297991e-05, - "loss": 0.9741, + "learning_rate": 1.1142898142654603e-05, + "loss": 0.922, "step": 16926 }, { - "epoch": 0.48033484676503974, + "epoch": 0.47966788517668396, "grad_norm": 0.0, - "learning_rate": 1.1120753760106284e-05, - "loss": 0.8947, + "learning_rate": 1.1141986362436419e-05, + "loss": 0.9818, "step": 16927 }, { - "epoch": 0.4803632236095346, + "epoch": 0.4796962226189464, "grad_norm": 0.0, - "learning_rate": 1.11198404677656e-05, - "loss": 0.9743, + "learning_rate": 1.1141074572598863e-05, + "loss": 0.8816, "step": 16928 }, { - "epoch": 0.4803916004540295, + "epoch": 0.4797245600612089, "grad_norm": 0.0, - "learning_rate": 1.1118927165965569e-05, - "loss": 0.9189, + "learning_rate": 1.1140162773149612e-05, + "loss": 1.0475, "step": 16929 }, { - "epoch": 0.4804199772985244, + "epoch": 0.47975289750347133, "grad_norm": 0.0, - "learning_rate": 1.1118013854713913e-05, - "loss": 0.9046, + "learning_rate": 1.1139250964096346e-05, + "loss": 0.8739, "step": 16930 }, { - "epoch": 0.4804483541430193, + "epoch": 0.4797812349457338, "grad_norm": 0.0, - "learning_rate": 1.1117100534018343e-05, - "loss": 0.8976, + "learning_rate": 1.1138339145446746e-05, + "loss": 0.8537, "step": 16931 }, { - "epoch": 0.4804767309875142, + "epoch": 0.47980957238799626, "grad_norm": 0.0, - "learning_rate": 1.1116187203886575e-05, - "loss": 0.9025, + "learning_rate": 1.1137427317208494e-05, + "loss": 0.9504, "step": 16932 }, { - "epoch": 0.48050510783200906, + "epoch": 0.4798379098302587, "grad_norm": 0.0, - "learning_rate": 1.1115273864326324e-05, - "loss": 0.9824, + "learning_rate": 1.1136515479389267e-05, + "loss": 0.8184, "step": 16933 }, { - "epoch": 0.480533484676504, + "epoch": 0.4798662472725212, "grad_norm": 0.0, - "learning_rate": 1.1114360515345301e-05, - "loss": 0.9715, + "learning_rate": 1.1135603631996748e-05, + "loss": 0.8716, "step": 16934 }, { - "epoch": 0.4805618615209989, + "epoch": 0.47989458471478363, "grad_norm": 0.0, - "learning_rate": 1.1113447156951229e-05, - "loss": 0.7452, + "learning_rate": 1.113469177503862e-05, + "loss": 0.9319, "step": 16935 }, { - "epoch": 0.48059023836549375, + "epoch": 0.47992292215704613, "grad_norm": 0.0, - "learning_rate": 1.1112533789151816e-05, - "loss": 0.8552, + "learning_rate": 1.1133779908522561e-05, + "loss": 0.7664, "step": 16936 }, { - "epoch": 0.48061861520998866, + "epoch": 0.47995125959930857, "grad_norm": 0.0, - "learning_rate": 1.1111620411954782e-05, - "loss": 0.8902, + "learning_rate": 1.1132868032456252e-05, + "loss": 0.9008, "step": 16937 }, { - "epoch": 0.4806469920544835, + "epoch": 0.479979597041571, "grad_norm": 0.0, - "learning_rate": 1.111070702536784e-05, - "loss": 0.9445, + "learning_rate": 1.1131956146847379e-05, + "loss": 0.8236, "step": 16938 }, { - "epoch": 0.48067536889897844, + "epoch": 0.4800079344838335, "grad_norm": 0.0, - "learning_rate": 1.1109793629398705e-05, - "loss": 0.879, + "learning_rate": 1.1131044251703615e-05, + "loss": 0.9138, "step": 16939 }, { - "epoch": 0.4807037457434733, + "epoch": 0.48003627192609594, "grad_norm": 0.0, - "learning_rate": 1.1108880224055093e-05, - "loss": 0.9284, + "learning_rate": 1.1130132347032646e-05, + "loss": 0.8618, "step": 16940 }, { - "epoch": 0.4807321225879682, + "epoch": 0.48006460936835843, "grad_norm": 0.0, - "learning_rate": 1.110796680934472e-05, - "loss": 0.9289, + "learning_rate": 1.1129220432842149e-05, + "loss": 0.9576, "step": 16941 }, { - "epoch": 0.48076049943246313, + "epoch": 0.48009294681062087, "grad_norm": 0.0, - "learning_rate": 1.1107053385275302e-05, - "loss": 0.8797, + "learning_rate": 1.1128308509139814e-05, + "loss": 0.8092, "step": 16942 }, { - "epoch": 0.480788876276958, + "epoch": 0.48012128425288336, "grad_norm": 0.0, - "learning_rate": 1.1106139951854555e-05, - "loss": 0.8567, + "learning_rate": 1.1127396575933315e-05, + "loss": 1.048, "step": 16943 }, { - "epoch": 0.4808172531214529, + "epoch": 0.4801496216951458, "grad_norm": 0.0, - "learning_rate": 1.1105226509090194e-05, - "loss": 0.9131, + "learning_rate": 1.112648463323034e-05, + "loss": 0.9037, "step": 16944 }, { - "epoch": 0.48084562996594776, + "epoch": 0.48017795913740824, "grad_norm": 0.0, - "learning_rate": 1.1104313056989936e-05, - "loss": 0.9461, + "learning_rate": 1.1125572681038561e-05, + "loss": 0.8025, "step": 16945 }, { - "epoch": 0.4808740068104427, + "epoch": 0.48020629657967073, "grad_norm": 0.0, - "learning_rate": 1.1103399595561493e-05, - "loss": 0.9669, + "learning_rate": 1.1124660719365669e-05, + "loss": 0.8888, "step": 16946 }, { - "epoch": 0.4809023836549376, + "epoch": 0.48023463402193317, "grad_norm": 0.0, - "learning_rate": 1.1102486124812586e-05, - "loss": 0.8824, + "learning_rate": 1.112374874821934e-05, + "loss": 0.7786, "step": 16947 }, { - "epoch": 0.48093076049943245, + "epoch": 0.48026297146419566, "grad_norm": 0.0, - "learning_rate": 1.1101572644750931e-05, - "loss": 0.9519, + "learning_rate": 1.1122836767607259e-05, + "loss": 1.0224, "step": 16948 }, { - "epoch": 0.48095913734392737, + "epoch": 0.4802913089064581, "grad_norm": 0.0, - "learning_rate": 1.1100659155384235e-05, - "loss": 0.918, + "learning_rate": 1.1121924777537108e-05, + "loss": 0.8797, "step": 16949 }, { - "epoch": 0.4809875141884222, + "epoch": 0.48031964634872054, "grad_norm": 0.0, - "learning_rate": 1.1099745656720229e-05, - "loss": 0.914, + "learning_rate": 1.1121012778016567e-05, + "loss": 0.8808, "step": 16950 }, { - "epoch": 0.48101589103291714, + "epoch": 0.48034798379098304, "grad_norm": 0.0, - "learning_rate": 1.1098832148766621e-05, - "loss": 0.7916, + "learning_rate": 1.112010076905332e-05, + "loss": 0.9432, "step": 16951 }, { - "epoch": 0.48104426787741206, + "epoch": 0.4803763212332455, "grad_norm": 0.0, - "learning_rate": 1.1097918631531123e-05, - "loss": 0.8546, + "learning_rate": 1.1119188750655047e-05, + "loss": 0.9901, "step": 16952 }, { - "epoch": 0.4810726447219069, + "epoch": 0.48040465867550797, "grad_norm": 0.0, - "learning_rate": 1.1097005105021463e-05, - "loss": 0.8531, + "learning_rate": 1.111827672282943e-05, + "loss": 0.9982, "step": 16953 }, { - "epoch": 0.48110102156640183, + "epoch": 0.4804329961177704, "grad_norm": 0.0, - "learning_rate": 1.109609156924535e-05, - "loss": 0.9038, + "learning_rate": 1.1117364685584154e-05, + "loss": 0.9893, "step": 16954 }, { - "epoch": 0.4811293984108967, + "epoch": 0.48046133356003284, "grad_norm": 0.0, - "learning_rate": 1.1095178024210502e-05, - "loss": 0.9409, + "learning_rate": 1.1116452638926903e-05, + "loss": 0.8273, "step": 16955 }, { - "epoch": 0.4811577752553916, + "epoch": 0.48048967100229534, "grad_norm": 0.0, - "learning_rate": 1.1094264469924636e-05, - "loss": 0.9246, + "learning_rate": 1.1115540582865357e-05, + "loss": 0.8722, "step": 16956 }, { - "epoch": 0.48118615209988647, + "epoch": 0.4805180084445578, "grad_norm": 0.0, - "learning_rate": 1.1093350906395469e-05, - "loss": 0.8721, + "learning_rate": 1.1114628517407193e-05, + "loss": 0.9395, "step": 16957 }, { - "epoch": 0.4812145289443814, + "epoch": 0.48054634588682027, "grad_norm": 0.0, - "learning_rate": 1.1092437333630716e-05, - "loss": 0.7865, + "learning_rate": 1.1113716442560102e-05, + "loss": 0.803, "step": 16958 }, { - "epoch": 0.4812429057888763, + "epoch": 0.4805746833290827, "grad_norm": 0.0, - "learning_rate": 1.1091523751638098e-05, - "loss": 0.9081, + "learning_rate": 1.1112804358331766e-05, + "loss": 0.9579, "step": 16959 }, { - "epoch": 0.48127128263337116, + "epoch": 0.4806030207713452, "grad_norm": 0.0, - "learning_rate": 1.1090610160425327e-05, - "loss": 0.8276, + "learning_rate": 1.1111892264729862e-05, + "loss": 0.8616, "step": 16960 }, { - "epoch": 0.48129965947786607, + "epoch": 0.48063135821360764, "grad_norm": 0.0, - "learning_rate": 1.1089696560000128e-05, - "loss": 0.9392, + "learning_rate": 1.1110980161762078e-05, + "loss": 0.9796, "step": 16961 }, { - "epoch": 0.48132803632236093, + "epoch": 0.4806596956558701, "grad_norm": 0.0, - "learning_rate": 1.108878295037021e-05, - "loss": 0.8796, + "learning_rate": 1.1110068049436098e-05, + "loss": 0.9569, "step": 16962 }, { - "epoch": 0.48135641316685585, + "epoch": 0.48068803309813257, "grad_norm": 0.0, - "learning_rate": 1.1087869331543293e-05, - "loss": 0.9037, + "learning_rate": 1.11091559277596e-05, + "loss": 0.8795, "step": 16963 }, { - "epoch": 0.48138479001135076, + "epoch": 0.480716370540395, "grad_norm": 0.0, - "learning_rate": 1.1086955703527093e-05, - "loss": 0.9245, + "learning_rate": 1.1108243796740272e-05, + "loss": 0.9362, "step": 16964 }, { - "epoch": 0.4814131668558456, + "epoch": 0.4807447079826575, "grad_norm": 0.0, - "learning_rate": 1.1086042066329334e-05, - "loss": 0.8706, + "learning_rate": 1.1107331656385793e-05, + "loss": 0.8832, "step": 16965 }, { - "epoch": 0.48144154370034054, + "epoch": 0.48077304542491994, "grad_norm": 0.0, - "learning_rate": 1.1085128419957724e-05, - "loss": 0.8774, + "learning_rate": 1.110641950670385e-05, + "loss": 0.9135, "step": 16966 }, { - "epoch": 0.4814699205448354, + "epoch": 0.4808013828671824, "grad_norm": 0.0, - "learning_rate": 1.1084214764419989e-05, - "loss": 0.8906, + "learning_rate": 1.110550734770212e-05, + "loss": 0.9148, "step": 16967 }, { - "epoch": 0.4814982973893303, + "epoch": 0.4808297203094449, "grad_norm": 0.0, - "learning_rate": 1.1083301099723844e-05, - "loss": 0.9373, + "learning_rate": 1.1104595179388295e-05, + "loss": 0.854, "step": 16968 }, { - "epoch": 0.4815266742338252, + "epoch": 0.4808580577517073, "grad_norm": 0.0, - "learning_rate": 1.1082387425877004e-05, - "loss": 0.8179, + "learning_rate": 1.1103683001770055e-05, + "loss": 0.8696, "step": 16969 }, { - "epoch": 0.4815550510783201, + "epoch": 0.4808863951939698, "grad_norm": 0.0, - "learning_rate": 1.108147374288719e-05, - "loss": 0.8816, + "learning_rate": 1.1102770814855081e-05, + "loss": 0.9901, "step": 16970 }, { - "epoch": 0.481583427922815, + "epoch": 0.48091473263623224, "grad_norm": 0.0, - "learning_rate": 1.1080560050762116e-05, - "loss": 1.002, + "learning_rate": 1.1101858618651062e-05, + "loss": 0.9749, "step": 16971 }, { - "epoch": 0.48161180476730986, + "epoch": 0.48094307007849474, "grad_norm": 0.0, - "learning_rate": 1.1079646349509505e-05, - "loss": 0.834, + "learning_rate": 1.1100946413165677e-05, + "loss": 0.8766, "step": 16972 }, { - "epoch": 0.4816401816118048, + "epoch": 0.4809714075207572, "grad_norm": 0.0, - "learning_rate": 1.1078732639137071e-05, - "loss": 0.9076, + "learning_rate": 1.110003419840661e-05, + "loss": 0.9934, "step": 16973 }, { - "epoch": 0.48166855845629963, + "epoch": 0.4809997449630196, "grad_norm": 0.0, - "learning_rate": 1.1077818919652534e-05, - "loss": 0.895, + "learning_rate": 1.1099121974381546e-05, + "loss": 0.9608, "step": 16974 }, { - "epoch": 0.48169693530079455, + "epoch": 0.4810280824052821, "grad_norm": 0.0, - "learning_rate": 1.1076905191063614e-05, - "loss": 1.0153, + "learning_rate": 1.1098209741098173e-05, + "loss": 0.8389, "step": 16975 }, { - "epoch": 0.48172531214528946, + "epoch": 0.48105641984754455, "grad_norm": 0.0, - "learning_rate": 1.1075991453378026e-05, - "loss": 0.8811, + "learning_rate": 1.109729749856417e-05, + "loss": 0.942, "step": 16976 }, { - "epoch": 0.4817536889897843, + "epoch": 0.48108475728980704, "grad_norm": 0.0, - "learning_rate": 1.1075077706603493e-05, - "loss": 0.8941, + "learning_rate": 1.1096385246787225e-05, + "loss": 0.9251, "step": 16977 }, { - "epoch": 0.48178206583427924, + "epoch": 0.4811130947320695, "grad_norm": 0.0, - "learning_rate": 1.1074163950747727e-05, - "loss": 0.8945, + "learning_rate": 1.1095472985775015e-05, + "loss": 0.8705, "step": 16978 }, { - "epoch": 0.4818104426787741, + "epoch": 0.4811414321743319, "grad_norm": 0.0, - "learning_rate": 1.1073250185818449e-05, - "loss": 0.7791, + "learning_rate": 1.1094560715535232e-05, + "loss": 0.92, "step": 16979 }, { - "epoch": 0.481838819523269, + "epoch": 0.4811697696165944, "grad_norm": 0.0, - "learning_rate": 1.107233641182338e-05, - "loss": 0.9214, + "learning_rate": 1.1093648436075558e-05, + "loss": 0.9152, "step": 16980 }, { - "epoch": 0.48186719636776393, + "epoch": 0.48119810705885685, "grad_norm": 0.0, - "learning_rate": 1.1071422628770237e-05, - "loss": 0.8813, + "learning_rate": 1.1092736147403675e-05, + "loss": 0.9532, "step": 16981 }, { - "epoch": 0.4818955732122588, + "epoch": 0.48122644450111934, "grad_norm": 0.0, - "learning_rate": 1.1070508836666738e-05, - "loss": 0.8736, + "learning_rate": 1.109182384952727e-05, + "loss": 1.0055, "step": 16982 }, { - "epoch": 0.4819239500567537, + "epoch": 0.4812547819433818, "grad_norm": 0.0, - "learning_rate": 1.1069595035520604e-05, - "loss": 0.9717, + "learning_rate": 1.109091154245403e-05, + "loss": 0.8491, "step": 16983 }, { - "epoch": 0.48195232690124856, + "epoch": 0.4812831193856443, "grad_norm": 0.0, - "learning_rate": 1.1068681225339552e-05, - "loss": 0.8875, + "learning_rate": 1.1089999226191637e-05, + "loss": 0.9502, "step": 16984 }, { - "epoch": 0.4819807037457435, + "epoch": 0.4813114568279067, "grad_norm": 0.0, - "learning_rate": 1.1067767406131303e-05, - "loss": 0.9261, + "learning_rate": 1.1089086900747774e-05, + "loss": 0.9189, "step": 16985 }, { - "epoch": 0.4820090805902384, + "epoch": 0.48133979427016915, "grad_norm": 0.0, - "learning_rate": 1.1066853577903572e-05, - "loss": 1.0107, + "learning_rate": 1.1088174566130126e-05, + "loss": 0.834, "step": 16986 }, { - "epoch": 0.48203745743473325, + "epoch": 0.48136813171243165, "grad_norm": 0.0, - "learning_rate": 1.1065939740664083e-05, - "loss": 0.8535, + "learning_rate": 1.108726222234638e-05, + "loss": 0.92, "step": 16987 }, { - "epoch": 0.48206583427922817, + "epoch": 0.4813964691546941, "grad_norm": 0.0, - "learning_rate": 1.1065025894420551e-05, - "loss": 0.8476, + "learning_rate": 1.1086349869404222e-05, + "loss": 0.8868, "step": 16988 }, { - "epoch": 0.482094211123723, + "epoch": 0.4814248065969566, "grad_norm": 0.0, - "learning_rate": 1.10641120391807e-05, - "loss": 0.9138, + "learning_rate": 1.108543750731134e-05, + "loss": 0.9213, "step": 16989 }, { - "epoch": 0.48212258796821794, + "epoch": 0.481453144039219, "grad_norm": 0.0, - "learning_rate": 1.1063198174952245e-05, - "loss": 0.8738, + "learning_rate": 1.1084525136075408e-05, + "loss": 0.9911, "step": 16990 }, { - "epoch": 0.4821509648127128, + "epoch": 0.48148148148148145, "grad_norm": 0.0, - "learning_rate": 1.106228430174291e-05, - "loss": 0.8974, + "learning_rate": 1.1083612755704121e-05, + "loss": 0.9192, "step": 16991 }, { - "epoch": 0.4821793416572077, + "epoch": 0.48150981892374395, "grad_norm": 0.0, - "learning_rate": 1.1061370419560408e-05, - "loss": 0.8578, + "learning_rate": 1.1082700366205157e-05, + "loss": 0.9919, "step": 16992 }, { - "epoch": 0.48220771850170263, + "epoch": 0.4815381563660064, "grad_norm": 0.0, - "learning_rate": 1.1060456528412464e-05, - "loss": 0.8788, + "learning_rate": 1.108178796758621e-05, + "loss": 0.8903, "step": 16993 }, { - "epoch": 0.4822360953461975, + "epoch": 0.4815664938082689, "grad_norm": 0.0, - "learning_rate": 1.1059542628306797e-05, - "loss": 0.9774, + "learning_rate": 1.1080875559854962e-05, + "loss": 0.8582, "step": 16994 }, { - "epoch": 0.4822644721906924, + "epoch": 0.4815948312505313, "grad_norm": 0.0, - "learning_rate": 1.1058628719251123e-05, - "loss": 0.8895, + "learning_rate": 1.1079963143019097e-05, + "loss": 0.9459, "step": 16995 }, { - "epoch": 0.48229284903518727, + "epoch": 0.4816231686927938, "grad_norm": 0.0, - "learning_rate": 1.1057714801253165e-05, - "loss": 0.9988, + "learning_rate": 1.1079050717086301e-05, + "loss": 0.8543, "step": 16996 }, { - "epoch": 0.4823212258796822, + "epoch": 0.48165150613505625, "grad_norm": 0.0, - "learning_rate": 1.1056800874320646e-05, - "loss": 0.8809, + "learning_rate": 1.107813828206426e-05, + "loss": 0.9128, "step": 16997 }, { - "epoch": 0.4823496027241771, + "epoch": 0.4816798435773187, "grad_norm": 0.0, - "learning_rate": 1.1055886938461282e-05, - "loss": 1.0122, + "learning_rate": 1.1077225837960658e-05, + "loss": 0.8656, "step": 16998 }, { - "epoch": 0.48237797956867196, + "epoch": 0.4817081810195812, "grad_norm": 0.0, - "learning_rate": 1.1054972993682794e-05, - "loss": 0.7949, + "learning_rate": 1.1076313384783183e-05, + "loss": 0.85, "step": 16999 }, { - "epoch": 0.48240635641316687, + "epoch": 0.4817365184618436, "grad_norm": 0.0, - "learning_rate": 1.1054059039992897e-05, - "loss": 1.0315, + "learning_rate": 1.1075400922539525e-05, + "loss": 0.8867, "step": 17000 }, { - "epoch": 0.48243473325766173, + "epoch": 0.4817648559041061, "grad_norm": 0.0, - "learning_rate": 1.1053145077399318e-05, - "loss": 0.9264, + "learning_rate": 1.1074488451237364e-05, + "loss": 0.8963, "step": 17001 }, { - "epoch": 0.48246311010215664, + "epoch": 0.48179319334636855, "grad_norm": 0.0, - "learning_rate": 1.1052231105909778e-05, - "loss": 0.8347, + "learning_rate": 1.1073575970884387e-05, + "loss": 0.8833, "step": 17002 }, { - "epoch": 0.48249148694665156, + "epoch": 0.481821530788631, "grad_norm": 0.0, - "learning_rate": 1.1051317125531989e-05, - "loss": 0.8567, + "learning_rate": 1.107266348148828e-05, + "loss": 0.8476, "step": 17003 }, { - "epoch": 0.4825198637911464, + "epoch": 0.4818498682308935, "grad_norm": 0.0, - "learning_rate": 1.1050403136273681e-05, - "loss": 0.9104, + "learning_rate": 1.1071750983056733e-05, + "loss": 0.9612, "step": 17004 }, { - "epoch": 0.48254824063564133, + "epoch": 0.4818782056731559, "grad_norm": 0.0, - "learning_rate": 1.1049489138142571e-05, - "loss": 0.7928, + "learning_rate": 1.1070838475597426e-05, + "loss": 0.8075, "step": 17005 }, { - "epoch": 0.4825766174801362, + "epoch": 0.4819065431154184, "grad_norm": 0.0, - "learning_rate": 1.1048575131146377e-05, - "loss": 0.9427, + "learning_rate": 1.1069925959118048e-05, + "loss": 0.9245, "step": 17006 }, { - "epoch": 0.4826049943246311, + "epoch": 0.48193488055768086, "grad_norm": 0.0, - "learning_rate": 1.1047661115292825e-05, - "loss": 0.8016, + "learning_rate": 1.106901343362629e-05, + "loss": 0.9567, "step": 17007 }, { - "epoch": 0.48263337116912597, + "epoch": 0.48196321799994335, "grad_norm": 0.0, - "learning_rate": 1.1046747090589629e-05, - "loss": 0.9362, + "learning_rate": 1.1068100899129832e-05, + "loss": 0.8331, "step": 17008 }, { - "epoch": 0.4826617480136209, + "epoch": 0.4819915554422058, "grad_norm": 0.0, - "learning_rate": 1.1045833057044515e-05, - "loss": 0.8351, + "learning_rate": 1.1067188355636366e-05, + "loss": 0.9213, "step": 17009 }, { - "epoch": 0.4826901248581158, + "epoch": 0.4820198928844682, "grad_norm": 0.0, - "learning_rate": 1.1044919014665198e-05, - "loss": 0.8695, + "learning_rate": 1.1066275803153577e-05, + "loss": 0.9191, "step": 17010 }, { - "epoch": 0.48271850170261066, + "epoch": 0.4820482303267307, "grad_norm": 0.0, - "learning_rate": 1.1044004963459409e-05, - "loss": 0.8419, + "learning_rate": 1.1065363241689148e-05, + "loss": 0.8839, "step": 17011 }, { - "epoch": 0.4827468785471056, + "epoch": 0.48207656776899316, "grad_norm": 0.0, - "learning_rate": 1.104309090343486e-05, - "loss": 0.7379, + "learning_rate": 1.1064450671250768e-05, + "loss": 0.8823, "step": 17012 }, { - "epoch": 0.48277525539160043, + "epoch": 0.48210490521125565, "grad_norm": 0.0, - "learning_rate": 1.1042176834599276e-05, - "loss": 0.8501, + "learning_rate": 1.106353809184613e-05, + "loss": 0.8765, "step": 17013 }, { - "epoch": 0.48280363223609535, + "epoch": 0.4821332426535181, "grad_norm": 0.0, - "learning_rate": 1.1041262756960378e-05, - "loss": 0.9544, + "learning_rate": 1.1062625503482911e-05, + "loss": 0.8416, "step": 17014 }, { - "epoch": 0.48283200908059026, + "epoch": 0.48216158009578053, "grad_norm": 0.0, - "learning_rate": 1.1040348670525889e-05, - "loss": 0.9568, + "learning_rate": 1.1061712906168802e-05, + "loss": 0.9931, "step": 17015 }, { - "epoch": 0.4828603859250851, + "epoch": 0.482189917538043, "grad_norm": 0.0, - "learning_rate": 1.1039434575303522e-05, - "loss": 0.9238, + "learning_rate": 1.1060800299911496e-05, + "loss": 0.9323, "step": 17016 }, { - "epoch": 0.48288876276958004, + "epoch": 0.48221825498030546, "grad_norm": 0.0, - "learning_rate": 1.1038520471301008e-05, - "loss": 0.8782, + "learning_rate": 1.1059887684718673e-05, + "loss": 0.915, "step": 17017 }, { - "epoch": 0.4829171396140749, + "epoch": 0.48224659242256795, "grad_norm": 0.0, - "learning_rate": 1.1037606358526065e-05, - "loss": 0.8341, + "learning_rate": 1.1058975060598021e-05, + "loss": 0.9143, "step": 17018 }, { - "epoch": 0.4829455164585698, + "epoch": 0.4822749298648304, "grad_norm": 0.0, - "learning_rate": 1.1036692236986416e-05, - "loss": 0.8747, + "learning_rate": 1.105806242755723e-05, + "loss": 0.9718, "step": 17019 }, { - "epoch": 0.48297389330306467, + "epoch": 0.4823032673070929, "grad_norm": 0.0, - "learning_rate": 1.1035778106689776e-05, - "loss": 0.8934, + "learning_rate": 1.1057149785603984e-05, + "loss": 0.927, "step": 17020 }, { - "epoch": 0.4830022701475596, + "epoch": 0.4823316047493553, "grad_norm": 0.0, - "learning_rate": 1.1034863967643877e-05, - "loss": 0.9539, + "learning_rate": 1.1056237134745975e-05, + "loss": 0.8604, "step": 17021 }, { - "epoch": 0.4830306469920545, + "epoch": 0.48235994219161776, "grad_norm": 0.0, - "learning_rate": 1.1033949819856432e-05, - "loss": 0.9078, + "learning_rate": 1.1055324474990889e-05, + "loss": 0.9124, "step": 17022 }, { - "epoch": 0.48305902383654936, + "epoch": 0.48238827963388026, "grad_norm": 0.0, - "learning_rate": 1.1033035663335167e-05, - "loss": 0.8992, + "learning_rate": 1.1054411806346413e-05, + "loss": 0.8513, "step": 17023 }, { - "epoch": 0.4830874006810443, + "epoch": 0.4824166170761427, "grad_norm": 0.0, - "learning_rate": 1.1032121498087807e-05, - "loss": 0.8742, + "learning_rate": 1.1053499128820232e-05, + "loss": 1.0166, "step": 17024 }, { - "epoch": 0.48311577752553914, + "epoch": 0.4824449545184052, "grad_norm": 0.0, - "learning_rate": 1.1031207324122066e-05, - "loss": 0.8462, + "learning_rate": 1.1052586442420036e-05, + "loss": 0.9463, "step": 17025 }, { - "epoch": 0.48314415437003405, + "epoch": 0.4824732919606676, "grad_norm": 0.0, - "learning_rate": 1.1030293141445672e-05, - "loss": 0.9258, + "learning_rate": 1.1051673747153518e-05, + "loss": 0.9861, "step": 17026 }, { - "epoch": 0.48317253121452897, + "epoch": 0.48250162940293007, "grad_norm": 0.0, - "learning_rate": 1.1029378950066343e-05, - "loss": 0.8917, + "learning_rate": 1.105076104302836e-05, + "loss": 0.9412, "step": 17027 }, { - "epoch": 0.4832009080590238, + "epoch": 0.48252996684519256, "grad_norm": 0.0, - "learning_rate": 1.1028464749991807e-05, - "loss": 0.8795, + "learning_rate": 1.1049848330052251e-05, + "loss": 0.8716, "step": 17028 }, { - "epoch": 0.48322928490351874, + "epoch": 0.482558304287455, "grad_norm": 0.0, - "learning_rate": 1.1027550541229783e-05, - "loss": 0.7897, + "learning_rate": 1.104893560823288e-05, + "loss": 0.8699, "step": 17029 }, { - "epoch": 0.4832576617480136, + "epoch": 0.4825866417297175, "grad_norm": 0.0, - "learning_rate": 1.102663632378799e-05, - "loss": 0.8484, + "learning_rate": 1.1048022877577935e-05, + "loss": 0.8851, "step": 17030 }, { - "epoch": 0.4832860385925085, + "epoch": 0.48261497917197993, "grad_norm": 0.0, - "learning_rate": 1.1025722097674158e-05, - "loss": 0.9008, + "learning_rate": 1.1047110138095102e-05, + "loss": 0.8945, "step": 17031 }, { - "epoch": 0.48331441543700343, + "epoch": 0.4826433166142424, "grad_norm": 0.0, - "learning_rate": 1.1024807862896005e-05, - "loss": 0.9301, + "learning_rate": 1.1046197389792073e-05, + "loss": 0.8693, "step": 17032 }, { - "epoch": 0.4833427922814983, + "epoch": 0.48267165405650486, "grad_norm": 0.0, - "learning_rate": 1.1023893619461253e-05, - "loss": 0.8922, + "learning_rate": 1.1045284632676535e-05, + "loss": 0.88, "step": 17033 }, { - "epoch": 0.4833711691259932, + "epoch": 0.4826999914987673, "grad_norm": 0.0, - "learning_rate": 1.1022979367377622e-05, - "loss": 0.8643, + "learning_rate": 1.1044371866756178e-05, + "loss": 0.8672, "step": 17034 }, { - "epoch": 0.48339954597048806, + "epoch": 0.4827283289410298, "grad_norm": 0.0, - "learning_rate": 1.1022065106652842e-05, - "loss": 0.8756, + "learning_rate": 1.1043459092038688e-05, + "loss": 0.896, "step": 17035 }, { - "epoch": 0.483427922814983, + "epoch": 0.48275666638329223, "grad_norm": 0.0, - "learning_rate": 1.1021150837294632e-05, - "loss": 0.9736, + "learning_rate": 1.1042546308531752e-05, + "loss": 0.9279, "step": 17036 }, { - "epoch": 0.48345629965947784, + "epoch": 0.4827850038255547, "grad_norm": 0.0, - "learning_rate": 1.1020236559310714e-05, - "loss": 1.0368, + "learning_rate": 1.1041633516243064e-05, + "loss": 0.9488, "step": 17037 }, { - "epoch": 0.48348467650397275, + "epoch": 0.48281334126781716, "grad_norm": 0.0, - "learning_rate": 1.1019322272708812e-05, - "loss": 1.0088, + "learning_rate": 1.104072071518031e-05, + "loss": 0.8884, "step": 17038 }, { - "epoch": 0.48351305334846767, + "epoch": 0.4828416787100796, "grad_norm": 0.0, - "learning_rate": 1.1018407977496652e-05, - "loss": 0.9561, + "learning_rate": 1.1039807905351179e-05, + "loss": 0.9042, "step": 17039 }, { - "epoch": 0.48354143019296253, + "epoch": 0.4828700161523421, "grad_norm": 0.0, - "learning_rate": 1.1017493673681949e-05, - "loss": 0.9762, + "learning_rate": 1.1038895086763357e-05, + "loss": 0.8295, "step": 17040 }, { - "epoch": 0.48356980703745744, + "epoch": 0.48289835359460453, "grad_norm": 0.0, - "learning_rate": 1.1016579361272432e-05, - "loss": 0.8651, + "learning_rate": 1.103798225942454e-05, + "loss": 0.8958, "step": 17041 }, { - "epoch": 0.4835981838819523, + "epoch": 0.48292669103686703, "grad_norm": 0.0, - "learning_rate": 1.1015665040275827e-05, - "loss": 0.8137, + "learning_rate": 1.103706942334241e-05, + "loss": 0.933, "step": 17042 }, { - "epoch": 0.4836265607264472, + "epoch": 0.48295502847912947, "grad_norm": 0.0, - "learning_rate": 1.1014750710699853e-05, - "loss": 0.8788, + "learning_rate": 1.103615657852466e-05, + "loss": 0.9305, "step": 17043 }, { - "epoch": 0.48365493757094213, + "epoch": 0.48298336592139196, "grad_norm": 0.0, - "learning_rate": 1.101383637255223e-05, - "loss": 1.0157, + "learning_rate": 1.1035243724978979e-05, + "loss": 0.9081, "step": 17044 }, { - "epoch": 0.483683314415437, + "epoch": 0.4830117033636544, "grad_norm": 0.0, - "learning_rate": 1.1012922025840691e-05, - "loss": 1.0009, + "learning_rate": 1.1034330862713054e-05, + "loss": 0.8728, "step": 17045 }, { - "epoch": 0.4837116912599319, + "epoch": 0.48304004080591684, "grad_norm": 0.0, - "learning_rate": 1.101200767057295e-05, - "loss": 0.82, + "learning_rate": 1.1033417991734579e-05, + "loss": 0.8599, "step": 17046 }, { - "epoch": 0.48374006810442677, + "epoch": 0.48306837824817933, "grad_norm": 0.0, - "learning_rate": 1.1011093306756737e-05, - "loss": 0.7448, + "learning_rate": 1.103250511205124e-05, + "loss": 0.8243, "step": 17047 }, { - "epoch": 0.4837684449489217, + "epoch": 0.48309671569044177, "grad_norm": 0.0, - "learning_rate": 1.1010178934399773e-05, - "loss": 0.9193, + "learning_rate": 1.1031592223670726e-05, + "loss": 0.7565, "step": 17048 }, { - "epoch": 0.4837968217934166, + "epoch": 0.48312505313270426, "grad_norm": 0.0, - "learning_rate": 1.100926455350978e-05, - "loss": 0.8176, + "learning_rate": 1.1030679326600726e-05, + "loss": 0.9557, "step": 17049 }, { - "epoch": 0.48382519863791146, + "epoch": 0.4831533905749667, "grad_norm": 0.0, - "learning_rate": 1.1008350164094486e-05, - "loss": 0.8932, + "learning_rate": 1.1029766420848932e-05, + "loss": 0.9023, "step": 17050 }, { - "epoch": 0.4838535754824064, + "epoch": 0.48318172801722914, "grad_norm": 0.0, - "learning_rate": 1.1007435766161612e-05, - "loss": 0.7619, + "learning_rate": 1.1028853506423034e-05, + "loss": 0.8899, "step": 17051 }, { - "epoch": 0.48388195232690123, + "epoch": 0.48321006545949163, "grad_norm": 0.0, - "learning_rate": 1.1006521359718885e-05, - "loss": 0.9353, + "learning_rate": 1.1027940583330721e-05, + "loss": 0.9708, "step": 17052 }, { - "epoch": 0.48391032917139615, + "epoch": 0.48323840290175407, "grad_norm": 0.0, - "learning_rate": 1.1005606944774025e-05, - "loss": 0.9141, + "learning_rate": 1.1027027651579683e-05, + "loss": 0.9134, "step": 17053 }, { - "epoch": 0.483938706015891, + "epoch": 0.48326674034401657, "grad_norm": 0.0, - "learning_rate": 1.1004692521334757e-05, - "loss": 0.9705, + "learning_rate": 1.1026114711177608e-05, + "loss": 0.871, "step": 17054 }, { - "epoch": 0.4839670828603859, + "epoch": 0.483295077786279, "grad_norm": 0.0, - "learning_rate": 1.1003778089408808e-05, - "loss": 0.8903, + "learning_rate": 1.1025201762132192e-05, + "loss": 0.8831, "step": 17055 }, { - "epoch": 0.48399545970488084, + "epoch": 0.4833234152285415, "grad_norm": 0.0, - "learning_rate": 1.1002863649003898e-05, - "loss": 0.9424, + "learning_rate": 1.1024288804451118e-05, + "loss": 0.8978, "step": 17056 }, { - "epoch": 0.4840238365493757, + "epoch": 0.48335175267080394, "grad_norm": 0.0, - "learning_rate": 1.1001949200127756e-05, - "loss": 0.8033, + "learning_rate": 1.1023375838142076e-05, + "loss": 0.7812, "step": 17057 }, { - "epoch": 0.4840522133938706, + "epoch": 0.4833800901130664, "grad_norm": 0.0, - "learning_rate": 1.1001034742788104e-05, - "loss": 0.8338, + "learning_rate": 1.1022462863212762e-05, + "loss": 0.9445, "step": 17058 }, { - "epoch": 0.48408059023836547, + "epoch": 0.48340842755532887, "grad_norm": 0.0, - "learning_rate": 1.1000120276992665e-05, - "loss": 0.9107, + "learning_rate": 1.1021549879670865e-05, + "loss": 1.0401, "step": 17059 }, { - "epoch": 0.4841089670828604, + "epoch": 0.4834367649975913, "grad_norm": 0.0, - "learning_rate": 1.0999205802749165e-05, - "loss": 0.9382, + "learning_rate": 1.1020636887524072e-05, + "loss": 0.8415, "step": 17060 }, { - "epoch": 0.4841373439273553, + "epoch": 0.4834651024398538, "grad_norm": 0.0, - "learning_rate": 1.0998291320065331e-05, - "loss": 0.7609, + "learning_rate": 1.1019723886780075e-05, + "loss": 0.9918, "step": 17061 }, { - "epoch": 0.48416572077185016, + "epoch": 0.48349343988211624, "grad_norm": 0.0, - "learning_rate": 1.0997376828948885e-05, - "loss": 0.8841, + "learning_rate": 1.1018810877446569e-05, + "loss": 0.9498, "step": 17062 }, { - "epoch": 0.4841940976163451, + "epoch": 0.4835217773243787, "grad_norm": 0.0, - "learning_rate": 1.0996462329407549e-05, - "loss": 0.8438, + "learning_rate": 1.1017897859531237e-05, + "loss": 0.8809, "step": 17063 }, { - "epoch": 0.48422247446083994, + "epoch": 0.48355011476664117, "grad_norm": 0.0, - "learning_rate": 1.0995547821449053e-05, - "loss": 0.9058, + "learning_rate": 1.1016984833041773e-05, + "loss": 0.8951, "step": 17064 }, { - "epoch": 0.48425085130533485, + "epoch": 0.4835784522089036, "grad_norm": 0.0, - "learning_rate": 1.099463330508112e-05, - "loss": 0.9209, + "learning_rate": 1.1016071797985867e-05, + "loss": 1.0067, "step": 17065 }, { - "epoch": 0.48427922814982977, + "epoch": 0.4836067896511661, "grad_norm": 0.0, - "learning_rate": 1.0993718780311475e-05, - "loss": 0.8684, + "learning_rate": 1.1015158754371217e-05, + "loss": 0.8506, "step": 17066 }, { - "epoch": 0.4843076049943246, + "epoch": 0.48363512709342854, "grad_norm": 0.0, - "learning_rate": 1.0992804247147841e-05, - "loss": 0.8123, + "learning_rate": 1.1014245702205504e-05, + "loss": 0.8776, "step": 17067 }, { - "epoch": 0.48433598183881954, + "epoch": 0.48366346453569103, "grad_norm": 0.0, - "learning_rate": 1.0991889705597946e-05, - "loss": 0.9012, + "learning_rate": 1.1013332641496424e-05, + "loss": 0.8821, "step": 17068 }, { - "epoch": 0.4843643586833144, + "epoch": 0.4836918019779535, "grad_norm": 0.0, - "learning_rate": 1.0990975155669516e-05, - "loss": 0.9158, + "learning_rate": 1.1012419572251665e-05, + "loss": 0.8882, "step": 17069 }, { - "epoch": 0.4843927355278093, + "epoch": 0.4837201394202159, "grad_norm": 0.0, - "learning_rate": 1.0990060597370271e-05, - "loss": 0.8356, + "learning_rate": 1.1011506494478921e-05, + "loss": 1.0267, "step": 17070 }, { - "epoch": 0.4844211123723042, + "epoch": 0.4837484768624784, "grad_norm": 0.0, - "learning_rate": 1.0989146030707942e-05, - "loss": 0.8399, + "learning_rate": 1.101059340818588e-05, + "loss": 0.8591, "step": 17071 }, { - "epoch": 0.4844494892167991, + "epoch": 0.48377681430474084, "grad_norm": 0.0, - "learning_rate": 1.098823145569025e-05, - "loss": 0.813, + "learning_rate": 1.100968031338024e-05, + "loss": 0.9346, "step": 17072 }, { - "epoch": 0.484477866061294, + "epoch": 0.48380515174700334, "grad_norm": 0.0, - "learning_rate": 1.0987316872324925e-05, - "loss": 0.7815, + "learning_rate": 1.1008767210069684e-05, + "loss": 0.9146, "step": 17073 }, { - "epoch": 0.48450624290578886, + "epoch": 0.4838334891892658, "grad_norm": 0.0, - "learning_rate": 1.0986402280619689e-05, - "loss": 0.8252, + "learning_rate": 1.1007854098261908e-05, + "loss": 0.9325, "step": 17074 }, { - "epoch": 0.4845346197502838, + "epoch": 0.4838618266315282, "grad_norm": 0.0, - "learning_rate": 1.0985487680582267e-05, - "loss": 0.9134, + "learning_rate": 1.1006940977964604e-05, + "loss": 0.9158, "step": 17075 }, { - "epoch": 0.48456299659477864, + "epoch": 0.4838901640737907, "grad_norm": 0.0, - "learning_rate": 1.0984573072220388e-05, - "loss": 0.8363, + "learning_rate": 1.1006027849185463e-05, + "loss": 0.871, "step": 17076 }, { - "epoch": 0.48459137343927355, + "epoch": 0.48391850151605315, "grad_norm": 0.0, - "learning_rate": 1.0983658455541774e-05, - "loss": 0.9463, + "learning_rate": 1.1005114711932172e-05, + "loss": 0.872, "step": 17077 }, { - "epoch": 0.48461975028376847, + "epoch": 0.48394683895831564, "grad_norm": 0.0, - "learning_rate": 1.0982743830554157e-05, - "loss": 0.9157, + "learning_rate": 1.1004201566212426e-05, + "loss": 0.9887, "step": 17078 }, { - "epoch": 0.48464812712826333, + "epoch": 0.4839751764005781, "grad_norm": 0.0, - "learning_rate": 1.0981829197265254e-05, - "loss": 0.8974, + "learning_rate": 1.1003288412033923e-05, + "loss": 0.8835, "step": 17079 }, { - "epoch": 0.48467650397275824, + "epoch": 0.48400351384284057, "grad_norm": 0.0, - "learning_rate": 1.0980914555682797e-05, - "loss": 0.9617, + "learning_rate": 1.1002375249404347e-05, + "loss": 0.8484, "step": 17080 }, { - "epoch": 0.4847048808172531, + "epoch": 0.484031851285103, "grad_norm": 0.0, - "learning_rate": 1.0979999905814512e-05, - "loss": 0.9702, + "learning_rate": 1.1001462078331394e-05, + "loss": 1.024, "step": 17081 }, { - "epoch": 0.484733257661748, + "epoch": 0.48406018872736545, "grad_norm": 0.0, - "learning_rate": 1.0979085247668123e-05, - "loss": 0.8618, + "learning_rate": 1.1000548898822748e-05, + "loss": 0.8932, "step": 17082 }, { - "epoch": 0.48476163450624293, + "epoch": 0.48408852616962794, "grad_norm": 0.0, - "learning_rate": 1.0978170581251358e-05, - "loss": 0.8666, + "learning_rate": 1.0999635710886112e-05, + "loss": 0.7994, "step": 17083 }, { - "epoch": 0.4847900113507378, + "epoch": 0.4841168636118904, "grad_norm": 0.0, - "learning_rate": 1.097725590657194e-05, - "loss": 0.8547, + "learning_rate": 1.099872251452917e-05, + "loss": 0.924, "step": 17084 }, { - "epoch": 0.4848183881952327, + "epoch": 0.4841452010541529, "grad_norm": 0.0, - "learning_rate": 1.0976341223637599e-05, - "loss": 0.8453, + "learning_rate": 1.099780930975962e-05, + "loss": 1.0228, "step": 17085 }, { - "epoch": 0.48484676503972757, + "epoch": 0.4841735384964153, "grad_norm": 0.0, - "learning_rate": 1.0975426532456059e-05, - "loss": 0.8535, + "learning_rate": 1.0996896096585148e-05, + "loss": 0.8782, "step": 17086 }, { - "epoch": 0.4848751418842225, + "epoch": 0.48420187593867775, "grad_norm": 0.0, - "learning_rate": 1.0974511833035048e-05, - "loss": 0.924, + "learning_rate": 1.0995982875013453e-05, + "loss": 0.805, "step": 17087 }, { - "epoch": 0.48490351872871734, + "epoch": 0.48423021338094024, "grad_norm": 0.0, - "learning_rate": 1.0973597125382289e-05, - "loss": 0.8397, + "learning_rate": 1.0995069645052226e-05, + "loss": 0.9151, "step": 17088 }, { - "epoch": 0.48493189557321226, + "epoch": 0.4842585508232027, "grad_norm": 0.0, - "learning_rate": 1.0972682409505515e-05, - "loss": 0.7233, + "learning_rate": 1.0994156406709155e-05, + "loss": 0.7835, "step": 17089 }, { - "epoch": 0.48496027241770717, + "epoch": 0.4842868882654652, "grad_norm": 0.0, - "learning_rate": 1.097176768541245e-05, - "loss": 0.8657, + "learning_rate": 1.0993243159991936e-05, + "loss": 0.8628, "step": 17090 }, { - "epoch": 0.48498864926220203, + "epoch": 0.4843152257077276, "grad_norm": 0.0, - "learning_rate": 1.0970852953110812e-05, - "loss": 0.8319, + "learning_rate": 1.0992329904908261e-05, + "loss": 0.8588, "step": 17091 }, { - "epoch": 0.48501702610669695, + "epoch": 0.4843435631499901, "grad_norm": 0.0, - "learning_rate": 1.0969938212608345e-05, - "loss": 0.8402, + "learning_rate": 1.0991416641465823e-05, + "loss": 0.9285, "step": 17092 }, { - "epoch": 0.4850454029511918, + "epoch": 0.48437190059225255, "grad_norm": 0.0, - "learning_rate": 1.0969023463912763e-05, - "loss": 0.8701, + "learning_rate": 1.0990503369672316e-05, + "loss": 0.9109, "step": 17093 }, { - "epoch": 0.4850737797956867, + "epoch": 0.484400238034515, "grad_norm": 0.0, - "learning_rate": 1.0968108707031792e-05, - "loss": 0.9046, + "learning_rate": 1.0989590089535426e-05, + "loss": 0.8701, "step": 17094 }, { - "epoch": 0.48510215664018164, + "epoch": 0.4844285754767775, "grad_norm": 0.0, - "learning_rate": 1.0967193941973167e-05, - "loss": 1.0093, + "learning_rate": 1.0988676801062858e-05, + "loss": 0.8307, "step": 17095 }, { - "epoch": 0.4851305334846765, + "epoch": 0.4844569129190399, "grad_norm": 0.0, - "learning_rate": 1.096627916874461e-05, - "loss": 0.9496, + "learning_rate": 1.0987763504262297e-05, + "loss": 0.8877, "step": 17096 }, { - "epoch": 0.4851589103291714, + "epoch": 0.4844852503613024, "grad_norm": 0.0, - "learning_rate": 1.0965364387353852e-05, - "loss": 0.9779, + "learning_rate": 1.0986850199141432e-05, + "loss": 0.8337, "step": 17097 }, { - "epoch": 0.48518728717366627, + "epoch": 0.48451358780356485, "grad_norm": 0.0, - "learning_rate": 1.0964449597808612e-05, - "loss": 0.8835, + "learning_rate": 1.0985936885707965e-05, + "loss": 0.9905, "step": 17098 }, { - "epoch": 0.4852156640181612, + "epoch": 0.4845419252458273, "grad_norm": 0.0, - "learning_rate": 1.0963534800116628e-05, - "loss": 0.8932, + "learning_rate": 1.0985023563969585e-05, + "loss": 0.8301, "step": 17099 }, { - "epoch": 0.48524404086265605, + "epoch": 0.4845702626880898, "grad_norm": 0.0, - "learning_rate": 1.0962619994285623e-05, - "loss": 0.8987, + "learning_rate": 1.0984110233933987e-05, + "loss": 0.9077, "step": 17100 }, { - "epoch": 0.48527241770715096, + "epoch": 0.4845986001303522, "grad_norm": 0.0, - "learning_rate": 1.096170518032332e-05, - "loss": 0.9232, + "learning_rate": 1.0983196895608863e-05, + "loss": 0.7973, "step": 17101 }, { - "epoch": 0.4853007945516459, + "epoch": 0.4846269375726147, "grad_norm": 0.0, - "learning_rate": 1.096079035823745e-05, - "loss": 0.9058, + "learning_rate": 1.0982283549001904e-05, + "loss": 0.8005, "step": 17102 }, { - "epoch": 0.48532917139614073, + "epoch": 0.48465527501487715, "grad_norm": 0.0, - "learning_rate": 1.0959875528035743e-05, - "loss": 0.8978, + "learning_rate": 1.0981370194120808e-05, + "loss": 0.8211, "step": 17103 }, { - "epoch": 0.48535754824063565, + "epoch": 0.48468361245713965, "grad_norm": 0.0, - "learning_rate": 1.0958960689725924e-05, - "loss": 0.8232, + "learning_rate": 1.0980456830973266e-05, + "loss": 0.8464, "step": 17104 }, { - "epoch": 0.4853859250851305, + "epoch": 0.4847119498994021, "grad_norm": 0.0, - "learning_rate": 1.0958045843315722e-05, - "loss": 0.9231, + "learning_rate": 1.0979543459566973e-05, + "loss": 0.8622, "step": 17105 }, { - "epoch": 0.4854143019296254, + "epoch": 0.4847402873416645, "grad_norm": 0.0, - "learning_rate": 1.0957130988812863e-05, - "loss": 0.9313, + "learning_rate": 1.097863007990962e-05, + "loss": 0.9204, "step": 17106 }, { - "epoch": 0.48544267877412034, + "epoch": 0.484768624783927, "grad_norm": 0.0, - "learning_rate": 1.0956216126225074e-05, - "loss": 0.9985, + "learning_rate": 1.0977716692008901e-05, + "loss": 0.9435, "step": 17107 }, { - "epoch": 0.4854710556186152, + "epoch": 0.48479696222618945, "grad_norm": 0.0, - "learning_rate": 1.0955301255560085e-05, - "loss": 0.8129, + "learning_rate": 1.0976803295872513e-05, + "loss": 0.9124, "step": 17108 }, { - "epoch": 0.4854994324631101, + "epoch": 0.48482529966845195, "grad_norm": 0.0, - "learning_rate": 1.0954386376825624e-05, - "loss": 0.9647, + "learning_rate": 1.097588989150815e-05, + "loss": 0.8875, "step": 17109 }, { - "epoch": 0.485527809307605, + "epoch": 0.4848536371107144, "grad_norm": 0.0, - "learning_rate": 1.0953471490029417e-05, - "loss": 0.893, + "learning_rate": 1.0974976478923503e-05, + "loss": 0.7572, "step": 17110 }, { - "epoch": 0.4855561861520999, + "epoch": 0.4848819745529768, "grad_norm": 0.0, - "learning_rate": 1.0952556595179193e-05, - "loss": 0.9093, + "learning_rate": 1.0974063058126263e-05, + "loss": 0.8675, "step": 17111 }, { - "epoch": 0.4855845629965948, + "epoch": 0.4849103119952393, "grad_norm": 0.0, - "learning_rate": 1.0951641692282682e-05, - "loss": 0.8551, + "learning_rate": 1.0973149629124134e-05, + "loss": 0.8345, "step": 17112 }, { - "epoch": 0.48561293984108966, + "epoch": 0.48493864943750176, "grad_norm": 0.0, - "learning_rate": 1.0950726781347612e-05, - "loss": 1.0056, + "learning_rate": 1.0972236191924801e-05, + "loss": 0.9056, "step": 17113 }, { - "epoch": 0.4856413166855846, + "epoch": 0.48496698687976425, "grad_norm": 0.0, - "learning_rate": 1.0949811862381708e-05, - "loss": 0.9038, + "learning_rate": 1.0971322746535964e-05, + "loss": 0.8331, "step": 17114 }, { - "epoch": 0.48566969353007944, + "epoch": 0.4849953243220267, "grad_norm": 0.0, - "learning_rate": 1.09488969353927e-05, - "loss": 0.8653, + "learning_rate": 1.0970409292965312e-05, + "loss": 0.8942, "step": 17115 }, { - "epoch": 0.48569807037457435, + "epoch": 0.4850236617642892, "grad_norm": 0.0, - "learning_rate": 1.0947982000388322e-05, - "loss": 0.8704, + "learning_rate": 1.0969495831220543e-05, + "loss": 0.7928, "step": 17116 }, { - "epoch": 0.4857264472190692, + "epoch": 0.4850519992065516, "grad_norm": 0.0, - "learning_rate": 1.0947067057376291e-05, - "loss": 0.9951, + "learning_rate": 1.096858236130935e-05, + "loss": 0.899, "step": 17117 }, { - "epoch": 0.48575482406356413, + "epoch": 0.48508033664881406, "grad_norm": 0.0, - "learning_rate": 1.0946152106364344e-05, - "loss": 0.9244, + "learning_rate": 1.096766888323943e-05, + "loss": 0.9343, "step": 17118 }, { - "epoch": 0.48578320090805904, + "epoch": 0.48510867409107655, "grad_norm": 0.0, - "learning_rate": 1.0945237147360208e-05, - "loss": 0.8852, + "learning_rate": 1.0966755397018474e-05, + "loss": 0.8342, "step": 17119 }, { - "epoch": 0.4858115777525539, + "epoch": 0.485137011533339, "grad_norm": 0.0, - "learning_rate": 1.0944322180371614e-05, - "loss": 0.9646, + "learning_rate": 1.096584190265418e-05, + "loss": 0.8777, "step": 17120 }, { - "epoch": 0.4858399545970488, + "epoch": 0.4851653489756015, "grad_norm": 0.0, - "learning_rate": 1.0943407205406284e-05, - "loss": 0.9275, + "learning_rate": 1.096492840015424e-05, + "loss": 0.839, "step": 17121 }, { - "epoch": 0.4858683314415437, + "epoch": 0.4851936864178639, "grad_norm": 0.0, - "learning_rate": 1.0942492222471954e-05, - "loss": 0.7253, + "learning_rate": 1.096401488952635e-05, + "loss": 0.9076, "step": 17122 }, { - "epoch": 0.4858967082860386, + "epoch": 0.48522202386012636, "grad_norm": 0.0, - "learning_rate": 1.094157723157635e-05, - "loss": 0.8759, + "learning_rate": 1.0963101370778201e-05, + "loss": 0.9161, "step": 17123 }, { - "epoch": 0.4859250851305335, + "epoch": 0.48525036130238886, "grad_norm": 0.0, - "learning_rate": 1.0940662232727198e-05, - "loss": 0.9012, + "learning_rate": 1.0962187843917498e-05, + "loss": 0.8232, "step": 17124 }, { - "epoch": 0.48595346197502837, + "epoch": 0.4852786987446513, "grad_norm": 0.0, - "learning_rate": 1.0939747225932229e-05, - "loss": 0.8898, + "learning_rate": 1.0961274308951925e-05, + "loss": 0.912, "step": 17125 }, { - "epoch": 0.4859818388195233, + "epoch": 0.4853070361869138, "grad_norm": 0.0, - "learning_rate": 1.0938832211199177e-05, - "loss": 0.8176, + "learning_rate": 1.0960360765889185e-05, + "loss": 0.9015, "step": 17126 }, { - "epoch": 0.48601021566401814, + "epoch": 0.4853353736291762, "grad_norm": 0.0, - "learning_rate": 1.0937917188535765e-05, - "loss": 0.9729, + "learning_rate": 1.0959447214736966e-05, + "loss": 0.8277, "step": 17127 }, { - "epoch": 0.48603859250851306, + "epoch": 0.4853637110714387, "grad_norm": 0.0, - "learning_rate": 1.0937002157949723e-05, - "loss": 0.9669, + "learning_rate": 1.0958533655502969e-05, + "loss": 0.8148, "step": 17128 }, { - "epoch": 0.48606696935300797, + "epoch": 0.48539204851370116, "grad_norm": 0.0, - "learning_rate": 1.0936087119448784e-05, - "loss": 0.8902, + "learning_rate": 1.0957620088194884e-05, + "loss": 0.9662, "step": 17129 }, { - "epoch": 0.48609534619750283, + "epoch": 0.4854203859559636, "grad_norm": 0.0, - "learning_rate": 1.0935172073040676e-05, - "loss": 1.0114, + "learning_rate": 1.0956706512820414e-05, + "loss": 0.8591, "step": 17130 }, { - "epoch": 0.48612372304199775, + "epoch": 0.4854487233982261, "grad_norm": 0.0, - "learning_rate": 1.0934257018733125e-05, - "loss": 0.8715, + "learning_rate": 1.0955792929387248e-05, + "loss": 0.8459, "step": 17131 }, { - "epoch": 0.4861520998864926, + "epoch": 0.48547706084048853, "grad_norm": 0.0, - "learning_rate": 1.0933341956533865e-05, - "loss": 0.8595, + "learning_rate": 1.0954879337903081e-05, + "loss": 0.7921, "step": 17132 }, { - "epoch": 0.4861804767309875, + "epoch": 0.485505398282751, "grad_norm": 0.0, - "learning_rate": 1.093242688645062e-05, - "loss": 0.9316, + "learning_rate": 1.0953965738375616e-05, + "loss": 0.9918, "step": 17133 }, { - "epoch": 0.4862088535754824, + "epoch": 0.48553373572501346, "grad_norm": 0.0, - "learning_rate": 1.0931511808491126e-05, - "loss": 0.9188, + "learning_rate": 1.095305213081254e-05, + "loss": 0.8931, "step": 17134 }, { - "epoch": 0.4862372304199773, + "epoch": 0.4855620731672759, "grad_norm": 0.0, - "learning_rate": 1.0930596722663109e-05, - "loss": 0.9418, + "learning_rate": 1.0952138515221551e-05, + "loss": 0.914, "step": 17135 }, { - "epoch": 0.4862656072644722, + "epoch": 0.4855904106095384, "grad_norm": 0.0, - "learning_rate": 1.0929681628974301e-05, - "loss": 0.8727, + "learning_rate": 1.0951224891610347e-05, + "loss": 0.9089, "step": 17136 }, { - "epoch": 0.48629398410896707, + "epoch": 0.48561874805180083, "grad_norm": 0.0, - "learning_rate": 1.092876652743243e-05, - "loss": 0.8587, + "learning_rate": 1.0950311259986622e-05, + "loss": 0.7898, "step": 17137 }, { - "epoch": 0.486322360953462, + "epoch": 0.4856470854940633, "grad_norm": 0.0, - "learning_rate": 1.0927851418045224e-05, - "loss": 0.9843, + "learning_rate": 1.0949397620358073e-05, + "loss": 1.0138, "step": 17138 }, { - "epoch": 0.48635073779795684, + "epoch": 0.48567542293632576, "grad_norm": 0.0, - "learning_rate": 1.092693630082042e-05, - "loss": 0.9775, + "learning_rate": 1.0948483972732395e-05, + "loss": 0.862, "step": 17139 }, { - "epoch": 0.48637911464245176, + "epoch": 0.48570376037858826, "grad_norm": 0.0, - "learning_rate": 1.092602117576574e-05, - "loss": 0.8555, + "learning_rate": 1.0947570317117282e-05, + "loss": 0.8166, "step": 17140 }, { - "epoch": 0.4864074914869467, + "epoch": 0.4857320978208507, "grad_norm": 0.0, - "learning_rate": 1.0925106042888915e-05, - "loss": 0.9383, + "learning_rate": 1.0946656653520435e-05, + "loss": 0.8476, "step": 17141 }, { - "epoch": 0.48643586833144153, + "epoch": 0.48576043526311313, "grad_norm": 0.0, - "learning_rate": 1.092419090219768e-05, - "loss": 0.9725, + "learning_rate": 1.0945742981949547e-05, + "loss": 0.9205, "step": 17142 }, { - "epoch": 0.48646424517593645, + "epoch": 0.4857887727053756, "grad_norm": 0.0, - "learning_rate": 1.0923275753699765e-05, - "loss": 0.9478, + "learning_rate": 1.0944829302412314e-05, + "loss": 0.8153, "step": 17143 }, { - "epoch": 0.4864926220204313, + "epoch": 0.48581711014763806, "grad_norm": 0.0, - "learning_rate": 1.09223605974029e-05, - "loss": 0.898, + "learning_rate": 1.0943915614916434e-05, + "loss": 0.8602, "step": 17144 }, { - "epoch": 0.4865209988649262, + "epoch": 0.48584544758990056, "grad_norm": 0.0, - "learning_rate": 1.0921445433314808e-05, - "loss": 0.8659, + "learning_rate": 1.0943001919469602e-05, + "loss": 0.8707, "step": 17145 }, { - "epoch": 0.48654937570942114, + "epoch": 0.485873785032163, "grad_norm": 0.0, - "learning_rate": 1.0920530261443228e-05, - "loss": 0.8704, + "learning_rate": 1.0942088216079516e-05, + "loss": 0.9164, "step": 17146 }, { - "epoch": 0.486577752553916, + "epoch": 0.48590212247442544, "grad_norm": 0.0, - "learning_rate": 1.0919615081795887e-05, - "loss": 0.909, + "learning_rate": 1.094117450475387e-05, + "loss": 0.9078, "step": 17147 }, { - "epoch": 0.4866061293984109, + "epoch": 0.48593045991668793, "grad_norm": 0.0, - "learning_rate": 1.0918699894380515e-05, - "loss": 0.8979, + "learning_rate": 1.094026078550036e-05, + "loss": 0.9729, "step": 17148 }, { - "epoch": 0.4866345062429058, + "epoch": 0.48595879735895037, "grad_norm": 0.0, - "learning_rate": 1.0917784699204843e-05, - "loss": 0.914, + "learning_rate": 1.0939347058326684e-05, + "loss": 0.8496, "step": 17149 }, { - "epoch": 0.4866628830874007, + "epoch": 0.48598713480121286, "grad_norm": 0.0, - "learning_rate": 1.0916869496276604e-05, - "loss": 0.881, + "learning_rate": 1.0938433323240543e-05, + "loss": 0.8913, "step": 17150 }, { - "epoch": 0.48669125993189555, + "epoch": 0.4860154722434753, "grad_norm": 0.0, - "learning_rate": 1.0915954285603529e-05, - "loss": 0.8903, + "learning_rate": 1.0937519580249628e-05, + "loss": 0.972, "step": 17151 }, { - "epoch": 0.48671963677639046, + "epoch": 0.48604380968573774, "grad_norm": 0.0, - "learning_rate": 1.0915039067193345e-05, - "loss": 0.8595, + "learning_rate": 1.0936605829361633e-05, + "loss": 0.9052, "step": 17152 }, { - "epoch": 0.4867480136208854, + "epoch": 0.48607214712800023, "grad_norm": 0.0, - "learning_rate": 1.0914123841053785e-05, - "loss": 0.948, + "learning_rate": 1.0935692070584264e-05, + "loss": 0.9059, "step": 17153 }, { - "epoch": 0.48677639046538024, + "epoch": 0.48610048457026267, "grad_norm": 0.0, - "learning_rate": 1.0913208607192581e-05, - "loss": 0.8137, + "learning_rate": 1.0934778303925214e-05, + "loss": 0.9714, "step": 17154 }, { - "epoch": 0.48680476730987515, + "epoch": 0.48612882201252516, "grad_norm": 0.0, - "learning_rate": 1.0912293365617462e-05, - "loss": 0.954, + "learning_rate": 1.0933864529392175e-05, + "loss": 0.8874, "step": 17155 }, { - "epoch": 0.48683314415437, + "epoch": 0.4861571594547876, "grad_norm": 0.0, - "learning_rate": 1.0911378116336157e-05, - "loss": 0.9306, + "learning_rate": 1.093295074699285e-05, + "loss": 0.8711, "step": 17156 }, { - "epoch": 0.4868615209988649, + "epoch": 0.4861854968970501, "grad_norm": 0.0, - "learning_rate": 1.0910462859356404e-05, - "loss": 0.9365, + "learning_rate": 1.0932036956734935e-05, + "loss": 0.8694, "step": 17157 }, { - "epoch": 0.48688989784335984, + "epoch": 0.48621383433931253, "grad_norm": 0.0, - "learning_rate": 1.0909547594685928e-05, - "loss": 0.8102, + "learning_rate": 1.0931123158626127e-05, + "loss": 0.9703, "step": 17158 }, { - "epoch": 0.4869182746878547, + "epoch": 0.48624217178157497, "grad_norm": 0.0, - "learning_rate": 1.0908632322332464e-05, - "loss": 0.9225, + "learning_rate": 1.0930209352674123e-05, + "loss": 0.8217, "step": 17159 }, { - "epoch": 0.4869466515323496, + "epoch": 0.48627050922383747, "grad_norm": 0.0, - "learning_rate": 1.0907717042303743e-05, - "loss": 0.8781, + "learning_rate": 1.0929295538886622e-05, + "loss": 1.0165, "step": 17160 }, { - "epoch": 0.4869750283768445, + "epoch": 0.4862988466660999, "grad_norm": 0.0, - "learning_rate": 1.0906801754607495e-05, - "loss": 0.8237, + "learning_rate": 1.0928381717271315e-05, + "loss": 0.9115, "step": 17161 }, { - "epoch": 0.4870034052213394, + "epoch": 0.4863271841083624, "grad_norm": 0.0, - "learning_rate": 1.0905886459251449e-05, - "loss": 0.8786, + "learning_rate": 1.0927467887835905e-05, + "loss": 0.9281, "step": 17162 }, { - "epoch": 0.4870317820658343, + "epoch": 0.48635552155062484, "grad_norm": 0.0, - "learning_rate": 1.090497115624334e-05, - "loss": 0.8323, + "learning_rate": 1.0926554050588091e-05, + "loss": 0.7939, "step": 17163 }, { - "epoch": 0.48706015891032917, + "epoch": 0.4863838589928873, "grad_norm": 0.0, - "learning_rate": 1.0904055845590899e-05, - "loss": 0.9219, + "learning_rate": 1.0925640205535569e-05, + "loss": 0.8935, "step": 17164 }, { - "epoch": 0.4870885357548241, + "epoch": 0.48641219643514977, "grad_norm": 0.0, - "learning_rate": 1.0903140527301859e-05, - "loss": 0.9718, + "learning_rate": 1.092472635268603e-05, + "loss": 0.874, "step": 17165 }, { - "epoch": 0.48711691259931894, + "epoch": 0.4864405338774122, "grad_norm": 0.0, - "learning_rate": 1.0902225201383949e-05, - "loss": 0.8392, + "learning_rate": 1.0923812492047183e-05, + "loss": 0.9374, "step": 17166 }, { - "epoch": 0.48714528944381386, + "epoch": 0.4864688713196747, "grad_norm": 0.0, - "learning_rate": 1.0901309867844904e-05, - "loss": 0.9228, + "learning_rate": 1.0922898623626721e-05, + "loss": 1.0066, "step": 17167 }, { - "epoch": 0.4871736662883087, + "epoch": 0.48649720876193714, "grad_norm": 0.0, - "learning_rate": 1.0900394526692453e-05, - "loss": 0.97, + "learning_rate": 1.0921984747432336e-05, + "loss": 0.9307, "step": 17168 }, { - "epoch": 0.48720204313280363, + "epoch": 0.48652554620419963, "grad_norm": 0.0, - "learning_rate": 1.0899479177934331e-05, - "loss": 0.8917, + "learning_rate": 1.0921070863471732e-05, + "loss": 0.8609, "step": 17169 }, { - "epoch": 0.48723041997729855, + "epoch": 0.48655388364646207, "grad_norm": 0.0, - "learning_rate": 1.0898563821578265e-05, - "loss": 0.8593, + "learning_rate": 1.0920156971752612e-05, + "loss": 0.8792, "step": 17170 }, { - "epoch": 0.4872587968217934, + "epoch": 0.4865822210887245, "grad_norm": 0.0, - "learning_rate": 1.0897648457631991e-05, - "loss": 0.9474, + "learning_rate": 1.0919243072282664e-05, + "loss": 0.9511, "step": 17171 }, { - "epoch": 0.4872871736662883, + "epoch": 0.486610558530987, "grad_norm": 0.0, - "learning_rate": 1.0896733086103239e-05, - "loss": 0.9622, + "learning_rate": 1.091832916506959e-05, + "loss": 0.8783, "step": 17172 }, { - "epoch": 0.4873155505107832, + "epoch": 0.48663889597324944, "grad_norm": 0.0, - "learning_rate": 1.0895817706999746e-05, - "loss": 0.998, + "learning_rate": 1.0917415250121088e-05, + "loss": 0.8267, "step": 17173 }, { - "epoch": 0.4873439273552781, + "epoch": 0.48666723341551194, "grad_norm": 0.0, - "learning_rate": 1.0894902320329236e-05, - "loss": 0.9229, + "learning_rate": 1.0916501327444859e-05, + "loss": 0.8969, "step": 17174 }, { - "epoch": 0.487372304199773, + "epoch": 0.4866955708577744, "grad_norm": 0.0, - "learning_rate": 1.0893986926099449e-05, - "loss": 0.8943, + "learning_rate": 1.0915587397048595e-05, + "loss": 0.9033, "step": 17175 }, { - "epoch": 0.48740068104426787, + "epoch": 0.4867239083000368, "grad_norm": 0.0, - "learning_rate": 1.0893071524318112e-05, - "loss": 0.89, + "learning_rate": 1.0914673458940002e-05, + "loss": 0.8961, "step": 17176 }, { - "epoch": 0.4874290578887628, + "epoch": 0.4867522457422993, "grad_norm": 0.0, - "learning_rate": 1.0892156114992963e-05, - "loss": 0.7759, + "learning_rate": 1.0913759513126774e-05, + "loss": 0.9069, "step": 17177 }, { - "epoch": 0.48745743473325764, + "epoch": 0.48678058318456174, "grad_norm": 0.0, - "learning_rate": 1.0891240698131729e-05, - "loss": 0.9278, + "learning_rate": 1.091284555961661e-05, + "loss": 0.9375, "step": 17178 }, { - "epoch": 0.48748581157775256, + "epoch": 0.48680892062682424, "grad_norm": 0.0, - "learning_rate": 1.0890325273742141e-05, - "loss": 0.9802, + "learning_rate": 1.091193159841721e-05, + "loss": 0.8987, "step": 17179 }, { - "epoch": 0.4875141884222474, + "epoch": 0.4868372580690867, "grad_norm": 0.0, - "learning_rate": 1.0889409841831942e-05, - "loss": 0.9559, + "learning_rate": 1.0911017629536272e-05, + "loss": 1.0339, "step": 17180 }, { - "epoch": 0.48754256526674233, + "epoch": 0.48686559551134917, "grad_norm": 0.0, - "learning_rate": 1.0888494402408856e-05, - "loss": 0.9358, + "learning_rate": 1.091010365298149e-05, + "loss": 0.8736, "step": 17181 }, { - "epoch": 0.48757094211123725, + "epoch": 0.4868939329536116, "grad_norm": 0.0, - "learning_rate": 1.0887578955480616e-05, - "loss": 0.8203, + "learning_rate": 1.090918966876057e-05, + "loss": 0.862, "step": 17182 }, { - "epoch": 0.4875993189557321, + "epoch": 0.48692227039587405, "grad_norm": 0.0, - "learning_rate": 1.088666350105496e-05, - "loss": 0.9448, + "learning_rate": 1.0908275676881206e-05, + "loss": 0.8217, "step": 17183 }, { - "epoch": 0.487627695800227, + "epoch": 0.48695060783813654, "grad_norm": 0.0, - "learning_rate": 1.0885748039139615e-05, - "loss": 1.0283, + "learning_rate": 1.09073616773511e-05, + "loss": 0.9183, "step": 17184 }, { - "epoch": 0.4876560726447219, + "epoch": 0.486978945280399, "grad_norm": 0.0, - "learning_rate": 1.0884832569742316e-05, - "loss": 0.8613, + "learning_rate": 1.0906447670177948e-05, + "loss": 0.865, "step": 17185 }, { - "epoch": 0.4876844494892168, + "epoch": 0.48700728272266147, "grad_norm": 0.0, - "learning_rate": 1.0883917092870797e-05, - "loss": 0.8306, + "learning_rate": 1.0905533655369455e-05, + "loss": 0.8385, "step": 17186 }, { - "epoch": 0.4877128263337117, + "epoch": 0.4870356201649239, "grad_norm": 0.0, - "learning_rate": 1.0883001608532791e-05, - "loss": 0.926, + "learning_rate": 1.0904619632933312e-05, + "loss": 0.8477, "step": 17187 }, { - "epoch": 0.4877412031782066, + "epoch": 0.48706395760718635, "grad_norm": 0.0, - "learning_rate": 1.088208611673603e-05, - "loss": 0.8885, + "learning_rate": 1.0903705602877224e-05, + "loss": 0.927, "step": 17188 }, { - "epoch": 0.4877695800227015, + "epoch": 0.48709229504944884, "grad_norm": 0.0, - "learning_rate": 1.0881170617488248e-05, - "loss": 0.7813, + "learning_rate": 1.0902791565208887e-05, + "loss": 0.8229, "step": 17189 }, { - "epoch": 0.48779795686719635, + "epoch": 0.4871206324917113, "grad_norm": 0.0, - "learning_rate": 1.088025511079718e-05, - "loss": 0.9652, + "learning_rate": 1.0901877519936001e-05, + "loss": 0.8285, "step": 17190 }, { - "epoch": 0.48782633371169126, + "epoch": 0.4871489699339738, "grad_norm": 0.0, - "learning_rate": 1.0879339596670558e-05, - "loss": 0.8958, + "learning_rate": 1.0900963467066268e-05, + "loss": 0.8823, "step": 17191 }, { - "epoch": 0.4878547105561862, + "epoch": 0.4871773073762362, "grad_norm": 0.0, - "learning_rate": 1.0878424075116111e-05, - "loss": 0.8642, + "learning_rate": 1.0900049406607383e-05, + "loss": 0.8602, "step": 17192 }, { - "epoch": 0.48788308740068104, + "epoch": 0.4872056448184987, "grad_norm": 0.0, - "learning_rate": 1.087750854614158e-05, - "loss": 0.8, + "learning_rate": 1.089913533856705e-05, + "loss": 0.8345, "step": 17193 }, { - "epoch": 0.48791146424517595, + "epoch": 0.48723398226076114, "grad_norm": 0.0, - "learning_rate": 1.0876593009754692e-05, - "loss": 0.9307, + "learning_rate": 1.0898221262952962e-05, + "loss": 0.8185, "step": 17194 }, { - "epoch": 0.4879398410896708, + "epoch": 0.4872623197030236, "grad_norm": 0.0, - "learning_rate": 1.0875677465963186e-05, - "loss": 0.9391, + "learning_rate": 1.0897307179772825e-05, + "loss": 0.8325, "step": 17195 }, { - "epoch": 0.4879682179341657, + "epoch": 0.4872906571452861, "grad_norm": 0.0, - "learning_rate": 1.087476191477479e-05, - "loss": 0.9042, + "learning_rate": 1.0896393089034336e-05, + "loss": 0.934, "step": 17196 }, { - "epoch": 0.4879965947786606, + "epoch": 0.4873189945875485, "grad_norm": 0.0, - "learning_rate": 1.0873846356197244e-05, - "loss": 0.8843, + "learning_rate": 1.0895478990745196e-05, + "loss": 0.8675, "step": 17197 }, { - "epoch": 0.4880249716231555, + "epoch": 0.487347332029811, "grad_norm": 0.0, - "learning_rate": 1.0872930790238278e-05, - "loss": 0.8851, + "learning_rate": 1.0894564884913103e-05, + "loss": 0.9071, "step": 17198 }, { - "epoch": 0.4880533484676504, + "epoch": 0.48737566947207345, "grad_norm": 0.0, - "learning_rate": 1.0872015216905625e-05, - "loss": 0.8638, + "learning_rate": 1.089365077154576e-05, + "loss": 0.8148, "step": 17199 }, { - "epoch": 0.4880817253121453, + "epoch": 0.4874040069143359, "grad_norm": 0.0, - "learning_rate": 1.0871099636207022e-05, - "loss": 0.9952, + "learning_rate": 1.0892736650650864e-05, + "loss": 0.8227, "step": 17200 }, { - "epoch": 0.4881101021566402, + "epoch": 0.4874323443565984, "grad_norm": 0.0, - "learning_rate": 1.08701840481502e-05, - "loss": 0.8006, + "learning_rate": 1.0891822522236114e-05, + "loss": 0.9607, "step": 17201 }, { - "epoch": 0.48813847900113505, + "epoch": 0.4874606817988608, "grad_norm": 0.0, - "learning_rate": 1.0869268452742893e-05, - "loss": 0.903, + "learning_rate": 1.089090838630921e-05, + "loss": 0.9419, "step": 17202 }, { - "epoch": 0.48816685584562997, + "epoch": 0.4874890192411233, "grad_norm": 0.0, - "learning_rate": 1.0868352849992836e-05, - "loss": 0.8574, + "learning_rate": 1.0889994242877857e-05, + "loss": 0.8525, "step": 17203 }, { - "epoch": 0.4881952326901249, + "epoch": 0.48751735668338575, "grad_norm": 0.0, - "learning_rate": 1.0867437239907766e-05, - "loss": 0.8523, + "learning_rate": 1.088908009194975e-05, + "loss": 0.9319, "step": 17204 }, { - "epoch": 0.48822360953461974, + "epoch": 0.48754569412564824, "grad_norm": 0.0, - "learning_rate": 1.0866521622495417e-05, - "loss": 0.8983, + "learning_rate": 1.0888165933532595e-05, + "loss": 0.8471, "step": 17205 }, { - "epoch": 0.48825198637911466, + "epoch": 0.4875740315679107, "grad_norm": 0.0, - "learning_rate": 1.0865605997763516e-05, - "loss": 0.7896, + "learning_rate": 1.0887251767634084e-05, + "loss": 0.9656, "step": 17206 }, { - "epoch": 0.4882803632236095, + "epoch": 0.4876023690101731, "grad_norm": 0.0, - "learning_rate": 1.0864690365719803e-05, - "loss": 0.9352, + "learning_rate": 1.0886337594261926e-05, + "loss": 0.9896, "step": 17207 }, { - "epoch": 0.48830874006810443, + "epoch": 0.4876307064524356, "grad_norm": 0.0, - "learning_rate": 1.0863774726372014e-05, - "loss": 0.8347, + "learning_rate": 1.0885423413423812e-05, + "loss": 0.7697, "step": 17208 }, { - "epoch": 0.48833711691259934, + "epoch": 0.48765904389469805, "grad_norm": 0.0, - "learning_rate": 1.086285907972788e-05, - "loss": 0.8837, + "learning_rate": 1.0884509225127453e-05, + "loss": 0.8517, "step": 17209 }, { - "epoch": 0.4883654937570942, + "epoch": 0.48768738133696055, "grad_norm": 0.0, - "learning_rate": 1.0861943425795132e-05, - "loss": 0.8837, + "learning_rate": 1.088359502938054e-05, + "loss": 0.9379, "step": 17210 }, { - "epoch": 0.4883938706015891, + "epoch": 0.487715718779223, "grad_norm": 0.0, - "learning_rate": 1.0861027764581515e-05, - "loss": 0.8597, + "learning_rate": 1.0882680826190782e-05, + "loss": 0.9187, "step": 17211 }, { - "epoch": 0.488422247446084, + "epoch": 0.4877440562214854, "grad_norm": 0.0, - "learning_rate": 1.0860112096094755e-05, - "loss": 0.8438, + "learning_rate": 1.0881766615565877e-05, + "loss": 0.8586, "step": 17212 }, { - "epoch": 0.4884506242905789, + "epoch": 0.4877723936637479, "grad_norm": 0.0, - "learning_rate": 1.085919642034259e-05, - "loss": 0.9494, + "learning_rate": 1.0880852397513519e-05, + "loss": 0.9133, "step": 17213 }, { - "epoch": 0.48847900113507375, + "epoch": 0.48780073110601035, "grad_norm": 0.0, - "learning_rate": 1.0858280737332756e-05, - "loss": 0.859, + "learning_rate": 1.0879938172041415e-05, + "loss": 0.9019, "step": 17214 }, { - "epoch": 0.48850737797956867, + "epoch": 0.48782906854827285, "grad_norm": 0.0, - "learning_rate": 1.0857365047072985e-05, - "loss": 0.9362, + "learning_rate": 1.0879023939157267e-05, + "loss": 0.8405, "step": 17215 }, { - "epoch": 0.4885357548240636, + "epoch": 0.4878574059905353, "grad_norm": 0.0, - "learning_rate": 1.0856449349571013e-05, - "loss": 0.8409, + "learning_rate": 1.0878109698868773e-05, + "loss": 0.9463, "step": 17216 }, { - "epoch": 0.48856413166855844, + "epoch": 0.4878857434327978, "grad_norm": 0.0, - "learning_rate": 1.0855533644834573e-05, - "loss": 0.8876, + "learning_rate": 1.0877195451183637e-05, + "loss": 0.8755, "step": 17217 }, { - "epoch": 0.48859250851305336, + "epoch": 0.4879140808750602, "grad_norm": 0.0, - "learning_rate": 1.0854617932871403e-05, - "loss": 0.8557, + "learning_rate": 1.0876281196109556e-05, + "loss": 0.8657, "step": 17218 }, { - "epoch": 0.4886208853575482, + "epoch": 0.48794241831732266, "grad_norm": 0.0, - "learning_rate": 1.0853702213689236e-05, - "loss": 0.9505, + "learning_rate": 1.0875366933654232e-05, + "loss": 0.9398, "step": 17219 }, { - "epoch": 0.48864926220204313, + "epoch": 0.48797075575958515, "grad_norm": 0.0, - "learning_rate": 1.085278648729581e-05, - "loss": 0.8338, + "learning_rate": 1.0874452663825368e-05, + "loss": 0.7991, "step": 17220 }, { - "epoch": 0.48867763904653805, + "epoch": 0.4879990932018476, "grad_norm": 0.0, - "learning_rate": 1.085187075369886e-05, - "loss": 0.9016, + "learning_rate": 1.0873538386630666e-05, + "loss": 0.9463, "step": 17221 }, { - "epoch": 0.4887060158910329, + "epoch": 0.4880274306441101, "grad_norm": 0.0, - "learning_rate": 1.0850955012906114e-05, - "loss": 0.8855, + "learning_rate": 1.0872624102077827e-05, + "loss": 0.8557, "step": 17222 }, { - "epoch": 0.4887343927355278, + "epoch": 0.4880557680863725, "grad_norm": 0.0, - "learning_rate": 1.0850039264925315e-05, - "loss": 0.8103, + "learning_rate": 1.0871709810174547e-05, + "loss": 0.8133, "step": 17223 }, { - "epoch": 0.4887627695800227, + "epoch": 0.48808410552863496, "grad_norm": 0.0, - "learning_rate": 1.08491235097642e-05, - "loss": 0.8695, + "learning_rate": 1.0870795510928536e-05, + "loss": 0.9152, "step": 17224 }, { - "epoch": 0.4887911464245176, + "epoch": 0.48811244297089745, "grad_norm": 0.0, - "learning_rate": 1.0848207747430495e-05, - "loss": 1.0085, + "learning_rate": 1.0869881204347488e-05, + "loss": 0.7902, "step": 17225 }, { - "epoch": 0.4888195232690125, + "epoch": 0.4881407804131599, "grad_norm": 0.0, - "learning_rate": 1.0847291977931942e-05, - "loss": 0.8201, + "learning_rate": 1.0868966890439107e-05, + "loss": 0.9581, "step": 17226 }, { - "epoch": 0.48884790011350737, + "epoch": 0.4881691178554224, "grad_norm": 0.0, - "learning_rate": 1.0846376201276278e-05, - "loss": 0.9501, + "learning_rate": 1.0868052569211096e-05, + "loss": 0.8513, "step": 17227 }, { - "epoch": 0.4888762769580023, + "epoch": 0.4881974552976848, "grad_norm": 0.0, - "learning_rate": 1.0845460417471237e-05, - "loss": 0.8907, + "learning_rate": 1.0867138240671156e-05, + "loss": 0.9382, "step": 17228 }, { - "epoch": 0.48890465380249715, + "epoch": 0.4882257927399473, "grad_norm": 0.0, - "learning_rate": 1.0844544626524551e-05, - "loss": 0.8944, + "learning_rate": 1.0866223904826992e-05, + "loss": 0.9033, "step": 17229 }, { - "epoch": 0.48893303064699206, + "epoch": 0.48825413018220976, "grad_norm": 0.0, - "learning_rate": 1.084362882844396e-05, - "loss": 0.9859, + "learning_rate": 1.08653095616863e-05, + "loss": 0.8906, "step": 17230 }, { - "epoch": 0.4889614074914869, + "epoch": 0.4882824676244722, "grad_norm": 0.0, - "learning_rate": 1.08427130232372e-05, - "loss": 0.9372, + "learning_rate": 1.0864395211256782e-05, + "loss": 0.8342, "step": 17231 }, { - "epoch": 0.48898978433598184, + "epoch": 0.4883108050667347, "grad_norm": 0.0, - "learning_rate": 1.0841797210912e-05, - "loss": 0.7896, + "learning_rate": 1.0863480853546142e-05, + "loss": 0.947, "step": 17232 }, { - "epoch": 0.48901816118047675, + "epoch": 0.4883391425089971, "grad_norm": 0.0, - "learning_rate": 1.0840881391476103e-05, - "loss": 0.9931, + "learning_rate": 1.0862566488562082e-05, + "loss": 0.8779, "step": 17233 }, { - "epoch": 0.4890465380249716, + "epoch": 0.4883674799512596, "grad_norm": 0.0, - "learning_rate": 1.0839965564937244e-05, - "loss": 0.8411, + "learning_rate": 1.086165211631231e-05, + "loss": 0.8342, "step": 17234 }, { - "epoch": 0.4890749148694665, + "epoch": 0.48839581739352206, "grad_norm": 0.0, - "learning_rate": 1.0839049731303159e-05, - "loss": 0.8482, + "learning_rate": 1.0860737736804517e-05, + "loss": 0.9265, "step": 17235 }, { - "epoch": 0.4891032917139614, + "epoch": 0.4884241548357845, "grad_norm": 0.0, - "learning_rate": 1.0838133890581583e-05, - "loss": 0.8473, + "learning_rate": 1.085982335004641e-05, + "loss": 0.9363, "step": 17236 }, { - "epoch": 0.4891316685584563, + "epoch": 0.488452492278047, "grad_norm": 0.0, - "learning_rate": 1.0837218042780254e-05, - "loss": 0.8761, + "learning_rate": 1.0858908956045695e-05, + "loss": 0.9109, "step": 17237 }, { - "epoch": 0.4891600454029512, + "epoch": 0.48848082972030943, "grad_norm": 0.0, - "learning_rate": 1.0836302187906904e-05, - "loss": 0.918, + "learning_rate": 1.0857994554810069e-05, + "loss": 0.8954, "step": 17238 }, { - "epoch": 0.4891884222474461, + "epoch": 0.4885091671625719, "grad_norm": 0.0, - "learning_rate": 1.083538632596927e-05, - "loss": 0.8867, + "learning_rate": 1.0857080146347236e-05, + "loss": 0.9345, "step": 17239 }, { - "epoch": 0.489216799091941, + "epoch": 0.48853750460483436, "grad_norm": 0.0, - "learning_rate": 1.0834470456975092e-05, - "loss": 0.8987, + "learning_rate": 1.0856165730664898e-05, + "loss": 0.9095, "step": 17240 }, { - "epoch": 0.48924517593643585, + "epoch": 0.48856584204709685, "grad_norm": 0.0, - "learning_rate": 1.0833554580932107e-05, - "loss": 0.7417, + "learning_rate": 1.085525130777076e-05, + "loss": 0.8784, "step": 17241 }, { - "epoch": 0.48927355278093076, + "epoch": 0.4885941794893593, "grad_norm": 0.0, - "learning_rate": 1.0832638697848047e-05, - "loss": 0.8156, + "learning_rate": 1.0854336877672525e-05, + "loss": 0.8925, "step": 17242 }, { - "epoch": 0.4893019296254257, + "epoch": 0.48862251693162173, "grad_norm": 0.0, - "learning_rate": 1.083172280773065e-05, - "loss": 0.8847, + "learning_rate": 1.0853422440377888e-05, + "loss": 0.9379, "step": 17243 }, { - "epoch": 0.48933030646992054, + "epoch": 0.4886508543738842, "grad_norm": 0.0, - "learning_rate": 1.0830806910587655e-05, - "loss": 0.7776, + "learning_rate": 1.0852507995894558e-05, + "loss": 0.9288, "step": 17244 }, { - "epoch": 0.48935868331441545, + "epoch": 0.48867919181614666, "grad_norm": 0.0, - "learning_rate": 1.0829891006426796e-05, - "loss": 0.9555, + "learning_rate": 1.085159354423024e-05, + "loss": 0.8362, "step": 17245 }, { - "epoch": 0.4893870601589103, + "epoch": 0.48870752925840916, "grad_norm": 0.0, - "learning_rate": 1.0828975095255806e-05, - "loss": 0.8976, + "learning_rate": 1.085067908539263e-05, + "loss": 0.8746, "step": 17246 }, { - "epoch": 0.48941543700340523, + "epoch": 0.4887358667006716, "grad_norm": 0.0, - "learning_rate": 1.0828059177082433e-05, - "loss": 0.9043, + "learning_rate": 1.0849764619389436e-05, + "loss": 0.9458, "step": 17247 }, { - "epoch": 0.4894438138479001, + "epoch": 0.48876420414293403, "grad_norm": 0.0, - "learning_rate": 1.0827143251914401e-05, - "loss": 0.7864, + "learning_rate": 1.0848850146228356e-05, + "loss": 0.9335, "step": 17248 }, { - "epoch": 0.489472190692395, + "epoch": 0.4887925415851965, "grad_norm": 0.0, - "learning_rate": 1.0826227319759458e-05, - "loss": 0.9776, + "learning_rate": 1.08479356659171e-05, + "loss": 0.8819, "step": 17249 }, { - "epoch": 0.4895005675368899, + "epoch": 0.48882087902745897, "grad_norm": 0.0, - "learning_rate": 1.082531138062533e-05, - "loss": 1.0013, + "learning_rate": 1.0847021178463366e-05, + "loss": 0.8785, "step": 17250 }, { - "epoch": 0.4895289443813848, + "epoch": 0.48884921646972146, "grad_norm": 0.0, - "learning_rate": 1.0824395434519763e-05, - "loss": 0.955, + "learning_rate": 1.0846106683874858e-05, + "loss": 0.8412, "step": 17251 }, { - "epoch": 0.4895573212258797, + "epoch": 0.4888775539119839, "grad_norm": 0.0, - "learning_rate": 1.0823479481450491e-05, - "loss": 0.9178, + "learning_rate": 1.0845192182159276e-05, + "loss": 0.8041, "step": 17252 }, { - "epoch": 0.48958569807037455, + "epoch": 0.4889058913542464, "grad_norm": 0.0, - "learning_rate": 1.0822563521425248e-05, - "loss": 0.8725, + "learning_rate": 1.0844277673324328e-05, + "loss": 0.8458, "step": 17253 }, { - "epoch": 0.48961407491486947, + "epoch": 0.48893422879650883, "grad_norm": 0.0, - "learning_rate": 1.0821647554451779e-05, - "loss": 0.9711, + "learning_rate": 1.0843363157377718e-05, + "loss": 0.9847, "step": 17254 }, { - "epoch": 0.4896424517593644, + "epoch": 0.48896256623877127, "grad_norm": 0.0, - "learning_rate": 1.0820731580537813e-05, - "loss": 0.8354, + "learning_rate": 1.0842448634327146e-05, + "loss": 0.8435, "step": 17255 }, { - "epoch": 0.48967082860385924, + "epoch": 0.48899090368103376, "grad_norm": 0.0, - "learning_rate": 1.081981559969109e-05, - "loss": 0.8271, + "learning_rate": 1.0841534104180313e-05, + "loss": 0.8276, "step": 17256 }, { - "epoch": 0.48969920544835416, + "epoch": 0.4890192411232962, "grad_norm": 0.0, - "learning_rate": 1.0818899611919349e-05, - "loss": 0.9211, + "learning_rate": 1.084061956694493e-05, + "loss": 0.9108, "step": 17257 }, { - "epoch": 0.489727582292849, + "epoch": 0.4890475785655587, "grad_norm": 0.0, - "learning_rate": 1.0817983617230326e-05, - "loss": 0.9062, + "learning_rate": 1.0839705022628698e-05, + "loss": 0.9379, "step": 17258 }, { - "epoch": 0.48975595913734393, + "epoch": 0.48907591600782113, "grad_norm": 0.0, - "learning_rate": 1.0817067615631757e-05, - "loss": 0.8886, + "learning_rate": 1.0838790471239314e-05, + "loss": 0.8899, "step": 17259 }, { - "epoch": 0.4897843359818388, + "epoch": 0.48910425345008357, "grad_norm": 0.0, - "learning_rate": 1.0816151607131383e-05, - "loss": 0.8728, + "learning_rate": 1.0837875912784486e-05, + "loss": 0.8185, "step": 17260 }, { - "epoch": 0.4898127128263337, + "epoch": 0.48913259089234606, "grad_norm": 0.0, - "learning_rate": 1.081523559173694e-05, - "loss": 0.7822, + "learning_rate": 1.0836961347271919e-05, + "loss": 0.8708, "step": 17261 }, { - "epoch": 0.4898410896708286, + "epoch": 0.4891609283346085, "grad_norm": 0.0, - "learning_rate": 1.0814319569456164e-05, - "loss": 0.8638, + "learning_rate": 1.0836046774709319e-05, + "loss": 0.8667, "step": 17262 }, { - "epoch": 0.4898694665153235, + "epoch": 0.489189265776871, "grad_norm": 0.0, - "learning_rate": 1.0813403540296791e-05, - "loss": 1.0012, + "learning_rate": 1.0835132195104385e-05, + "loss": 0.9152, "step": 17263 }, { - "epoch": 0.4898978433598184, + "epoch": 0.48921760321913343, "grad_norm": 0.0, - "learning_rate": 1.0812487504266567e-05, - "loss": 0.8905, + "learning_rate": 1.0834217608464819e-05, + "loss": 0.8601, "step": 17264 }, { - "epoch": 0.48992622020431326, + "epoch": 0.48924594066139593, "grad_norm": 0.0, - "learning_rate": 1.0811571461373224e-05, - "loss": 0.9338, + "learning_rate": 1.0833303014798333e-05, + "loss": 0.8833, "step": 17265 }, { - "epoch": 0.48995459704880817, + "epoch": 0.48927427810365837, "grad_norm": 0.0, - "learning_rate": 1.0810655411624497e-05, - "loss": 0.8872, + "learning_rate": 1.0832388414112623e-05, + "loss": 0.9159, "step": 17266 }, { - "epoch": 0.4899829738933031, + "epoch": 0.4893026155459208, "grad_norm": 0.0, - "learning_rate": 1.080973935502813e-05, - "loss": 0.9555, + "learning_rate": 1.0831473806415397e-05, + "loss": 0.8924, "step": 17267 }, { - "epoch": 0.49001135073779795, + "epoch": 0.4893309529881833, "grad_norm": 0.0, - "learning_rate": 1.080882329159186e-05, - "loss": 0.8063, + "learning_rate": 1.0830559191714358e-05, + "loss": 0.855, "step": 17268 }, { - "epoch": 0.49003972758229286, + "epoch": 0.48935929043044574, "grad_norm": 0.0, - "learning_rate": 1.0807907221323422e-05, - "loss": 0.974, + "learning_rate": 1.0829644570017213e-05, + "loss": 0.9471, "step": 17269 }, { - "epoch": 0.4900681044267877, + "epoch": 0.48938762787270823, "grad_norm": 0.0, - "learning_rate": 1.0806991144230551e-05, - "loss": 0.8292, + "learning_rate": 1.0828729941331664e-05, + "loss": 0.9232, "step": 17270 }, { - "epoch": 0.49009648127128264, + "epoch": 0.48941596531497067, "grad_norm": 0.0, - "learning_rate": 1.0806075060320994e-05, - "loss": 0.8546, + "learning_rate": 1.0827815305665413e-05, + "loss": 0.8768, "step": 17271 }, { - "epoch": 0.49012485811577755, + "epoch": 0.4894443027572331, "grad_norm": 0.0, - "learning_rate": 1.0805158969602484e-05, - "loss": 0.9237, + "learning_rate": 1.0826900663026166e-05, + "loss": 0.836, "step": 17272 }, { - "epoch": 0.4901532349602724, + "epoch": 0.4894726401994956, "grad_norm": 0.0, - "learning_rate": 1.0804242872082758e-05, - "loss": 0.8728, + "learning_rate": 1.0825986013421626e-05, + "loss": 0.8697, "step": 17273 }, { - "epoch": 0.4901816118047673, + "epoch": 0.48950097764175804, "grad_norm": 0.0, - "learning_rate": 1.0803326767769559e-05, - "loss": 0.9263, + "learning_rate": 1.0825071356859502e-05, + "loss": 0.9321, "step": 17274 }, { - "epoch": 0.4902099886492622, + "epoch": 0.48952931508402053, "grad_norm": 0.0, - "learning_rate": 1.0802410656670623e-05, - "loss": 0.8932, + "learning_rate": 1.0824156693347496e-05, + "loss": 0.8748, "step": 17275 }, { - "epoch": 0.4902383654937571, + "epoch": 0.48955765252628297, "grad_norm": 0.0, - "learning_rate": 1.0801494538793684e-05, - "loss": 0.9898, + "learning_rate": 1.082324202289331e-05, + "loss": 0.9196, "step": 17276 }, { - "epoch": 0.49026674233825196, + "epoch": 0.48958598996854547, "grad_norm": 0.0, - "learning_rate": 1.080057841414649e-05, - "loss": 0.8686, + "learning_rate": 1.0822327345504651e-05, + "loss": 0.8873, "step": 17277 }, { - "epoch": 0.4902951191827469, + "epoch": 0.4896143274108079, "grad_norm": 0.0, - "learning_rate": 1.079966228273677e-05, - "loss": 0.8631, + "learning_rate": 1.0821412661189225e-05, + "loss": 0.9608, "step": 17278 }, { - "epoch": 0.4903234960272418, + "epoch": 0.48964266485307034, "grad_norm": 0.0, - "learning_rate": 1.0798746144572267e-05, - "loss": 0.8683, + "learning_rate": 1.0820497969954734e-05, + "loss": 0.9069, "step": 17279 }, { - "epoch": 0.49035187287173665, + "epoch": 0.48967100229533284, "grad_norm": 0.0, - "learning_rate": 1.079782999966072e-05, - "loss": 0.8879, + "learning_rate": 1.0819583271808884e-05, + "loss": 0.8686, "step": 17280 }, { - "epoch": 0.49038024971623156, + "epoch": 0.4896993397375953, "grad_norm": 0.0, - "learning_rate": 1.079691384800987e-05, - "loss": 0.9865, + "learning_rate": 1.0818668566759379e-05, + "loss": 0.8681, "step": 17281 }, { - "epoch": 0.4904086265607264, + "epoch": 0.48972767717985777, "grad_norm": 0.0, - "learning_rate": 1.079599768962745e-05, - "loss": 0.9196, + "learning_rate": 1.0817753854813926e-05, + "loss": 0.8337, "step": 17282 }, { - "epoch": 0.49043700340522134, + "epoch": 0.4897560146221202, "grad_norm": 0.0, - "learning_rate": 1.0795081524521201e-05, - "loss": 0.8571, + "learning_rate": 1.0816839135980228e-05, + "loss": 0.8809, "step": 17283 }, { - "epoch": 0.49046538024971625, + "epoch": 0.48978435206438264, "grad_norm": 0.0, - "learning_rate": 1.0794165352698864e-05, - "loss": 0.8496, + "learning_rate": 1.0815924410265992e-05, + "loss": 0.8994, "step": 17284 }, { - "epoch": 0.4904937570942111, + "epoch": 0.48981268950664514, "grad_norm": 0.0, - "learning_rate": 1.079324917416818e-05, - "loss": 0.8988, + "learning_rate": 1.0815009677678918e-05, + "loss": 0.8471, "step": 17285 }, { - "epoch": 0.49052213393870603, + "epoch": 0.4898410269489076, "grad_norm": 0.0, - "learning_rate": 1.0792332988936878e-05, - "loss": 0.8727, + "learning_rate": 1.0814094938226716e-05, + "loss": 0.9181, "step": 17286 }, { - "epoch": 0.4905505107832009, + "epoch": 0.48986936439117007, "grad_norm": 0.0, - "learning_rate": 1.0791416797012706e-05, - "loss": 0.8371, + "learning_rate": 1.0813180191917092e-05, + "loss": 1.0093, "step": 17287 }, { - "epoch": 0.4905788876276958, + "epoch": 0.4898977018334325, "grad_norm": 0.0, - "learning_rate": 1.0790500598403401e-05, - "loss": 0.8565, + "learning_rate": 1.081226543875775e-05, + "loss": 0.8801, "step": 17288 }, { - "epoch": 0.4906072644721907, + "epoch": 0.489926039275695, "grad_norm": 0.0, - "learning_rate": 1.0789584393116706e-05, - "loss": 0.943, + "learning_rate": 1.0811350678756392e-05, + "loss": 0.9593, "step": 17289 }, { - "epoch": 0.4906356413166856, + "epoch": 0.48995437671795744, "grad_norm": 0.0, - "learning_rate": 1.078866818116035e-05, - "loss": 0.8298, + "learning_rate": 1.081043591192073e-05, + "loss": 0.9033, "step": 17290 }, { - "epoch": 0.4906640181611805, + "epoch": 0.4899827141602199, "grad_norm": 0.0, - "learning_rate": 1.0787751962542081e-05, - "loss": 1.0371, + "learning_rate": 1.080952113825846e-05, + "loss": 0.8568, "step": 17291 }, { - "epoch": 0.49069239500567535, + "epoch": 0.4900110516024824, "grad_norm": 0.0, - "learning_rate": 1.0786835737269636e-05, - "loss": 0.9633, + "learning_rate": 1.0808606357777296e-05, + "loss": 0.9063, "step": 17292 }, { - "epoch": 0.49072077185017027, + "epoch": 0.4900393890447448, "grad_norm": 0.0, - "learning_rate": 1.0785919505350754e-05, - "loss": 0.7372, + "learning_rate": 1.0807691570484937e-05, + "loss": 0.9449, "step": 17293 }, { - "epoch": 0.4907491486946651, + "epoch": 0.4900677264870073, "grad_norm": 0.0, - "learning_rate": 1.0785003266793173e-05, - "loss": 0.8405, + "learning_rate": 1.0806776776389096e-05, + "loss": 0.733, "step": 17294 }, { - "epoch": 0.49077752553916004, + "epoch": 0.49009606392926974, "grad_norm": 0.0, - "learning_rate": 1.0784087021604636e-05, - "loss": 0.8562, + "learning_rate": 1.0805861975497473e-05, + "loss": 0.9868, "step": 17295 }, { - "epoch": 0.49080590238365496, + "epoch": 0.4901244013715322, "grad_norm": 0.0, - "learning_rate": 1.0783170769792882e-05, - "loss": 0.9076, + "learning_rate": 1.0804947167817778e-05, + "loss": 0.8842, "step": 17296 }, { - "epoch": 0.4908342792281498, + "epoch": 0.4901527388137947, "grad_norm": 0.0, - "learning_rate": 1.0782254511365646e-05, - "loss": 0.9156, + "learning_rate": 1.080403235335771e-05, + "loss": 0.7911, "step": 17297 }, { - "epoch": 0.49086265607264473, + "epoch": 0.4901810762560571, "grad_norm": 0.0, - "learning_rate": 1.0781338246330673e-05, - "loss": 0.8283, + "learning_rate": 1.0803117532124983e-05, + "loss": 0.9756, "step": 17298 }, { - "epoch": 0.4908910329171396, + "epoch": 0.4902094136983196, "grad_norm": 0.0, - "learning_rate": 1.07804219746957e-05, - "loss": 0.8871, + "learning_rate": 1.0802202704127293e-05, + "loss": 0.8657, "step": 17299 }, { - "epoch": 0.4909194097616345, + "epoch": 0.49023775114058205, "grad_norm": 0.0, - "learning_rate": 1.0779505696468468e-05, - "loss": 0.8069, + "learning_rate": 1.0801287869372356e-05, + "loss": 0.8402, "step": 17300 }, { - "epoch": 0.4909477866061294, + "epoch": 0.49026608858284454, "grad_norm": 0.0, - "learning_rate": 1.0778589411656717e-05, - "loss": 0.877, + "learning_rate": 1.0800373027867874e-05, + "loss": 0.8612, "step": 17301 }, { - "epoch": 0.4909761634506243, + "epoch": 0.490294426025107, "grad_norm": 0.0, - "learning_rate": 1.0777673120268183e-05, - "loss": 0.8973, + "learning_rate": 1.0799458179621552e-05, + "loss": 0.8791, "step": 17302 }, { - "epoch": 0.4910045402951192, + "epoch": 0.4903227634673694, "grad_norm": 0.0, - "learning_rate": 1.0776756822310615e-05, - "loss": 1.0618, + "learning_rate": 1.0798543324641096e-05, + "loss": 0.9011, "step": 17303 }, { - "epoch": 0.49103291713961406, + "epoch": 0.4903511009096319, "grad_norm": 0.0, - "learning_rate": 1.0775840517791741e-05, - "loss": 0.8569, + "learning_rate": 1.0797628462934214e-05, + "loss": 0.7843, "step": 17304 }, { - "epoch": 0.49106129398410897, + "epoch": 0.49037943835189435, "grad_norm": 0.0, - "learning_rate": 1.077492420671931e-05, - "loss": 0.8656, + "learning_rate": 1.079671359450861e-05, + "loss": 0.8553, "step": 17305 }, { - "epoch": 0.4910896708286039, + "epoch": 0.49040777579415684, "grad_norm": 0.0, - "learning_rate": 1.0774007889101062e-05, - "loss": 0.92, + "learning_rate": 1.079579871937199e-05, + "loss": 0.8313, "step": 17306 }, { - "epoch": 0.49111804767309875, + "epoch": 0.4904361132364193, "grad_norm": 0.0, - "learning_rate": 1.077309156494473e-05, - "loss": 0.8425, + "learning_rate": 1.0794883837532066e-05, + "loss": 0.9081, "step": 17307 }, { - "epoch": 0.49114642451759366, + "epoch": 0.4904644506786817, "grad_norm": 0.0, - "learning_rate": 1.0772175234258063e-05, - "loss": 0.9011, + "learning_rate": 1.0793968948996538e-05, + "loss": 0.8114, "step": 17308 }, { - "epoch": 0.4911748013620885, + "epoch": 0.4904927881209442, "grad_norm": 0.0, - "learning_rate": 1.0771258897048796e-05, - "loss": 0.9124, + "learning_rate": 1.0793054053773118e-05, + "loss": 0.9049, "step": 17309 }, { - "epoch": 0.49120317820658344, + "epoch": 0.49052112556320665, "grad_norm": 0.0, - "learning_rate": 1.0770342553324667e-05, - "loss": 1.0024, + "learning_rate": 1.0792139151869505e-05, + "loss": 0.7191, "step": 17310 }, { - "epoch": 0.4912315550510783, + "epoch": 0.49054946300546914, "grad_norm": 0.0, - "learning_rate": 1.076942620309342e-05, - "loss": 0.9258, + "learning_rate": 1.0791224243293412e-05, + "loss": 0.766, "step": 17311 }, { - "epoch": 0.4912599318955732, + "epoch": 0.4905778004477316, "grad_norm": 0.0, - "learning_rate": 1.0768509846362798e-05, - "loss": 0.8387, + "learning_rate": 1.0790309328052539e-05, + "loss": 0.9142, "step": 17312 }, { - "epoch": 0.4912883087400681, + "epoch": 0.4906061378899941, "grad_norm": 0.0, - "learning_rate": 1.076759348314054e-05, - "loss": 0.8803, + "learning_rate": 1.0789394406154603e-05, + "loss": 0.8659, "step": 17313 }, { - "epoch": 0.491316685584563, + "epoch": 0.4906344753322565, "grad_norm": 0.0, - "learning_rate": 1.0766677113434381e-05, - "loss": 0.8415, + "learning_rate": 1.07884794776073e-05, + "loss": 1.0161, "step": 17314 }, { - "epoch": 0.4913450624290579, + "epoch": 0.49066281277451895, "grad_norm": 0.0, - "learning_rate": 1.0765760737252072e-05, - "loss": 0.9374, + "learning_rate": 1.0787564542418346e-05, + "loss": 0.9418, "step": 17315 }, { - "epoch": 0.49137343927355276, + "epoch": 0.49069115021678145, "grad_norm": 0.0, - "learning_rate": 1.0764844354601342e-05, - "loss": 0.7569, + "learning_rate": 1.0786649600595442e-05, + "loss": 0.8539, "step": 17316 }, { - "epoch": 0.4914018161180477, + "epoch": 0.4907194876590439, "grad_norm": 0.0, - "learning_rate": 1.0763927965489938e-05, - "loss": 0.8461, + "learning_rate": 1.0785734652146296e-05, + "loss": 0.8953, "step": 17317 }, { - "epoch": 0.4914301929625426, + "epoch": 0.4907478251013064, "grad_norm": 0.0, - "learning_rate": 1.07630115699256e-05, - "loss": 0.87, + "learning_rate": 1.0784819697078614e-05, + "loss": 0.8865, "step": 17318 }, { - "epoch": 0.49145856980703745, + "epoch": 0.4907761625435688, "grad_norm": 0.0, - "learning_rate": 1.0762095167916074e-05, - "loss": 0.8844, + "learning_rate": 1.0783904735400103e-05, + "loss": 0.9213, "step": 17319 }, { - "epoch": 0.49148694665153236, + "epoch": 0.49080449998583126, "grad_norm": 0.0, - "learning_rate": 1.0761178759469091e-05, - "loss": 0.8719, + "learning_rate": 1.0782989767118475e-05, + "loss": 0.7867, "step": 17320 }, { - "epoch": 0.4915153234960272, + "epoch": 0.49083283742809375, "grad_norm": 0.0, - "learning_rate": 1.0760262344592399e-05, - "loss": 0.8096, + "learning_rate": 1.0782074792241432e-05, + "loss": 1.0515, "step": 17321 }, { - "epoch": 0.49154370034052214, + "epoch": 0.4908611748703562, "grad_norm": 0.0, - "learning_rate": 1.0759345923293736e-05, - "loss": 0.9627, + "learning_rate": 1.0781159810776682e-05, + "loss": 0.8986, "step": 17322 }, { - "epoch": 0.49157207718501705, + "epoch": 0.4908895123126187, "grad_norm": 0.0, - "learning_rate": 1.0758429495580844e-05, - "loss": 0.9264, + "learning_rate": 1.0780244822731935e-05, + "loss": 0.9046, "step": 17323 }, { - "epoch": 0.4916004540295119, + "epoch": 0.4909178497548811, "grad_norm": 0.0, - "learning_rate": 1.0757513061461462e-05, - "loss": 0.8764, + "learning_rate": 1.0779329828114895e-05, + "loss": 0.7873, "step": 17324 }, { - "epoch": 0.49162883087400683, + "epoch": 0.4909461871971436, "grad_norm": 0.0, - "learning_rate": 1.0756596620943335e-05, - "loss": 0.8598, + "learning_rate": 1.077841482693327e-05, + "loss": 0.8823, "step": 17325 }, { - "epoch": 0.4916572077185017, + "epoch": 0.49097452463940605, "grad_norm": 0.0, - "learning_rate": 1.0755680174034203e-05, - "loss": 0.8957, + "learning_rate": 1.0777499819194766e-05, + "loss": 0.9085, "step": 17326 }, { - "epoch": 0.4916855845629966, + "epoch": 0.4910028620816685, "grad_norm": 0.0, - "learning_rate": 1.0754763720741805e-05, - "loss": 0.9285, + "learning_rate": 1.0776584804907096e-05, + "loss": 0.9437, "step": 17327 }, { - "epoch": 0.49171396140749146, + "epoch": 0.491031199523931, "grad_norm": 0.0, - "learning_rate": 1.0753847261073885e-05, - "loss": 0.9406, + "learning_rate": 1.0775669784077961e-05, + "loss": 0.9459, "step": 17328 }, { - "epoch": 0.4917423382519864, + "epoch": 0.4910595369661934, "grad_norm": 0.0, - "learning_rate": 1.0752930795038183e-05, - "loss": 0.8809, + "learning_rate": 1.0774754756715074e-05, + "loss": 0.9209, "step": 17329 }, { - "epoch": 0.4917707150964813, + "epoch": 0.4910878744084559, "grad_norm": 0.0, - "learning_rate": 1.075201432264244e-05, - "loss": 0.8947, + "learning_rate": 1.0773839722826137e-05, + "loss": 0.8882, "step": 17330 }, { - "epoch": 0.49179909194097615, + "epoch": 0.49111621185071835, "grad_norm": 0.0, - "learning_rate": 1.07510978438944e-05, - "loss": 0.9396, + "learning_rate": 1.0772924682418862e-05, + "loss": 0.8673, "step": 17331 }, { - "epoch": 0.49182746878547107, + "epoch": 0.4911445492929808, "grad_norm": 0.0, - "learning_rate": 1.0750181358801803e-05, - "loss": 0.9315, + "learning_rate": 1.0772009635500952e-05, + "loss": 0.9475, "step": 17332 }, { - "epoch": 0.4918558456299659, + "epoch": 0.4911728867352433, "grad_norm": 0.0, - "learning_rate": 1.0749264867372389e-05, - "loss": 0.9129, + "learning_rate": 1.0771094582080124e-05, + "loss": 0.9059, "step": 17333 }, { - "epoch": 0.49188422247446084, + "epoch": 0.4912012241775057, "grad_norm": 0.0, - "learning_rate": 1.0748348369613899e-05, - "loss": 0.8378, + "learning_rate": 1.0770179522164079e-05, + "loss": 0.9017, "step": 17334 }, { - "epoch": 0.49191259931895576, + "epoch": 0.4912295616197682, "grad_norm": 0.0, - "learning_rate": 1.074743186553408e-05, - "loss": 0.8487, + "learning_rate": 1.0769264455760521e-05, + "loss": 0.8299, "step": 17335 }, { - "epoch": 0.4919409761634506, + "epoch": 0.49125789906203066, "grad_norm": 0.0, - "learning_rate": 1.074651535514067e-05, - "loss": 0.8443, + "learning_rate": 1.0768349382877168e-05, + "loss": 0.9379, "step": 17336 }, { - "epoch": 0.49196935300794553, + "epoch": 0.49128623650429315, "grad_norm": 0.0, - "learning_rate": 1.074559883844141e-05, - "loss": 0.8935, + "learning_rate": 1.076743430352172e-05, + "loss": 0.8283, "step": 17337 }, { - "epoch": 0.4919977298524404, + "epoch": 0.4913145739465556, "grad_norm": 0.0, - "learning_rate": 1.0744682315444044e-05, - "loss": 1.0414, + "learning_rate": 1.076651921770189e-05, + "loss": 0.9069, "step": 17338 }, { - "epoch": 0.4920261066969353, + "epoch": 0.491342911388818, "grad_norm": 0.0, - "learning_rate": 1.0743765786156313e-05, - "loss": 0.9195, + "learning_rate": 1.0765604125425381e-05, + "loss": 0.9766, "step": 17339 }, { - "epoch": 0.49205448354143017, + "epoch": 0.4913712488310805, "grad_norm": 0.0, - "learning_rate": 1.0742849250585957e-05, - "loss": 0.8606, + "learning_rate": 1.0764689026699909e-05, + "loss": 0.8276, "step": 17340 }, { - "epoch": 0.4920828603859251, + "epoch": 0.49139958627334296, "grad_norm": 0.0, - "learning_rate": 1.074193270874072e-05, - "loss": 0.905, + "learning_rate": 1.0763773921533174e-05, + "loss": 0.7896, "step": 17341 }, { - "epoch": 0.49211123723042, + "epoch": 0.49142792371560545, "grad_norm": 0.0, - "learning_rate": 1.0741016160628346e-05, - "loss": 1.0023, + "learning_rate": 1.076285880993289e-05, + "loss": 0.8667, "step": 17342 }, { - "epoch": 0.49213961407491486, + "epoch": 0.4914562611578679, "grad_norm": 0.0, - "learning_rate": 1.0740099606256576e-05, - "loss": 0.9565, + "learning_rate": 1.0761943691906758e-05, + "loss": 0.8793, "step": 17343 }, { - "epoch": 0.49216799091940977, + "epoch": 0.49148459860013033, "grad_norm": 0.0, - "learning_rate": 1.0739183045633148e-05, - "loss": 1.1, + "learning_rate": 1.0761028567462492e-05, + "loss": 0.8285, "step": 17344 }, { - "epoch": 0.49219636776390463, + "epoch": 0.4915129360423928, "grad_norm": 0.0, - "learning_rate": 1.0738266478765811e-05, - "loss": 0.8959, + "learning_rate": 1.0760113436607804e-05, + "loss": 0.8767, "step": 17345 }, { - "epoch": 0.49222474460839954, + "epoch": 0.49154127348465526, "grad_norm": 0.0, - "learning_rate": 1.0737349905662305e-05, - "loss": 0.932, + "learning_rate": 1.0759198299350398e-05, + "loss": 0.9229, "step": 17346 }, { - "epoch": 0.49225312145289446, + "epoch": 0.49156961092691775, "grad_norm": 0.0, - "learning_rate": 1.0736433326330366e-05, - "loss": 0.8869, + "learning_rate": 1.075828315569798e-05, + "loss": 0.9792, "step": 17347 }, { - "epoch": 0.4922814982973893, + "epoch": 0.4915979483691802, "grad_norm": 0.0, - "learning_rate": 1.0735516740777742e-05, - "loss": 0.8511, + "learning_rate": 1.0757368005658264e-05, + "loss": 0.9055, "step": 17348 }, { - "epoch": 0.49230987514188423, + "epoch": 0.49162628581144263, "grad_norm": 0.0, - "learning_rate": 1.073460014901218e-05, - "loss": 0.9692, + "learning_rate": 1.0756452849238955e-05, + "loss": 0.8993, "step": 17349 }, { - "epoch": 0.4923382519863791, + "epoch": 0.4916546232537051, "grad_norm": 0.0, - "learning_rate": 1.0733683551041416e-05, - "loss": 1.0201, + "learning_rate": 1.075553768644776e-05, + "loss": 0.8857, "step": 17350 }, { - "epoch": 0.492366628830874, + "epoch": 0.49168296069596756, "grad_norm": 0.0, - "learning_rate": 1.073276694687319e-05, - "loss": 0.9275, + "learning_rate": 1.0754622517292393e-05, + "loss": 0.9618, "step": 17351 }, { - "epoch": 0.4923950056753689, + "epoch": 0.49171129813823006, "grad_norm": 0.0, - "learning_rate": 1.0731850336515253e-05, - "loss": 0.9904, + "learning_rate": 1.0753707341780562e-05, + "loss": 0.8988, "step": 17352 }, { - "epoch": 0.4924233825198638, + "epoch": 0.4917396355804925, "grad_norm": 0.0, - "learning_rate": 1.0730933719975343e-05, - "loss": 0.8881, + "learning_rate": 1.075279215991997e-05, + "loss": 0.7666, "step": 17353 }, { - "epoch": 0.4924517593643587, + "epoch": 0.491767973022755, "grad_norm": 0.0, - "learning_rate": 1.07300170972612e-05, - "loss": 0.9503, + "learning_rate": 1.0751876971718336e-05, + "loss": 1.0208, "step": 17354 }, { - "epoch": 0.49248013620885356, + "epoch": 0.4917963104650174, "grad_norm": 0.0, - "learning_rate": 1.0729100468380574e-05, - "loss": 0.8992, + "learning_rate": 1.0750961777183357e-05, + "loss": 0.8627, "step": 17355 }, { - "epoch": 0.4925085130533485, + "epoch": 0.49182464790727987, "grad_norm": 0.0, - "learning_rate": 1.0728183833341199e-05, - "loss": 0.967, + "learning_rate": 1.0750046576322752e-05, + "loss": 0.8737, "step": 17356 }, { - "epoch": 0.49253688989784333, + "epoch": 0.49185298534954236, "grad_norm": 0.0, - "learning_rate": 1.0727267192150825e-05, - "loss": 0.9512, + "learning_rate": 1.0749131369144224e-05, + "loss": 0.8668, "step": 17357 }, { - "epoch": 0.49256526674233825, + "epoch": 0.4918813227918048, "grad_norm": 0.0, - "learning_rate": 1.0726350544817192e-05, - "loss": 0.9176, + "learning_rate": 1.0748216155655484e-05, + "loss": 0.9446, "step": 17358 }, { - "epoch": 0.49259364358683316, + "epoch": 0.4919096602340673, "grad_norm": 0.0, - "learning_rate": 1.0725433891348043e-05, - "loss": 0.8867, + "learning_rate": 1.0747300935864245e-05, + "loss": 0.8693, "step": 17359 }, { - "epoch": 0.492622020431328, + "epoch": 0.49193799767632973, "grad_norm": 0.0, - "learning_rate": 1.0724517231751123e-05, - "loss": 0.8959, + "learning_rate": 1.0746385709778209e-05, + "loss": 0.989, "step": 17360 }, { - "epoch": 0.49265039727582294, + "epoch": 0.49196633511859217, "grad_norm": 0.0, - "learning_rate": 1.0723600566034172e-05, - "loss": 0.7663, + "learning_rate": 1.0745470477405091e-05, + "loss": 0.8272, "step": 17361 }, { - "epoch": 0.4926787741203178, + "epoch": 0.49199467256085466, "grad_norm": 0.0, - "learning_rate": 1.0722683894204935e-05, - "loss": 0.9257, + "learning_rate": 1.0744555238752598e-05, + "loss": 0.9955, "step": 17362 }, { - "epoch": 0.4927071509648127, + "epoch": 0.4920230100031171, "grad_norm": 0.0, - "learning_rate": 1.0721767216271153e-05, - "loss": 0.9673, + "learning_rate": 1.074363999382844e-05, + "loss": 0.8625, "step": 17363 }, { - "epoch": 0.4927355278093076, + "epoch": 0.4920513474453796, "grad_norm": 0.0, - "learning_rate": 1.0720850532240572e-05, - "loss": 0.9957, + "learning_rate": 1.0742724742640323e-05, + "loss": 0.9642, "step": 17364 }, { - "epoch": 0.4927639046538025, + "epoch": 0.49207968488764203, "grad_norm": 0.0, - "learning_rate": 1.0719933842120933e-05, - "loss": 0.9534, + "learning_rate": 1.0741809485195962e-05, + "loss": 0.8623, "step": 17365 }, { - "epoch": 0.4927922814982974, + "epoch": 0.4921080223299045, "grad_norm": 0.0, - "learning_rate": 1.0719017145919984e-05, - "loss": 0.8293, + "learning_rate": 1.0740894221503067e-05, + "loss": 0.9371, "step": 17366 }, { - "epoch": 0.49282065834279226, + "epoch": 0.49213635977216696, "grad_norm": 0.0, - "learning_rate": 1.0718100443645461e-05, - "loss": 0.7501, + "learning_rate": 1.0739978951569343e-05, + "loss": 0.9463, "step": 17367 }, { - "epoch": 0.4928490351872872, + "epoch": 0.4921646972144294, "grad_norm": 0.0, - "learning_rate": 1.0717183735305112e-05, - "loss": 0.8624, + "learning_rate": 1.0739063675402499e-05, + "loss": 0.9527, "step": 17368 }, { - "epoch": 0.4928774120317821, + "epoch": 0.4921930346566919, "grad_norm": 0.0, - "learning_rate": 1.0716267020906681e-05, - "loss": 0.9081, + "learning_rate": 1.0738148393010251e-05, + "loss": 0.8923, "step": 17369 }, { - "epoch": 0.49290578887627695, + "epoch": 0.49222137209895434, "grad_norm": 0.0, - "learning_rate": 1.0715350300457912e-05, - "loss": 0.8433, + "learning_rate": 1.07372331044003e-05, + "loss": 0.8899, "step": 17370 }, { - "epoch": 0.49293416572077187, + "epoch": 0.49224970954121683, "grad_norm": 0.0, - "learning_rate": 1.071443357396654e-05, - "loss": 0.8976, + "learning_rate": 1.0736317809580365e-05, + "loss": 0.8317, "step": 17371 }, { - "epoch": 0.4929625425652667, + "epoch": 0.49227804698347927, "grad_norm": 0.0, - "learning_rate": 1.0713516841440321e-05, - "loss": 0.9626, + "learning_rate": 1.073540250855815e-05, + "loss": 0.8843, "step": 17372 }, { - "epoch": 0.49299091940976164, + "epoch": 0.4923063844257417, "grad_norm": 0.0, - "learning_rate": 1.0712600102886995e-05, - "loss": 0.817, + "learning_rate": 1.0734487201341368e-05, + "loss": 0.9796, "step": 17373 }, { - "epoch": 0.4930192962542565, + "epoch": 0.4923347218680042, "grad_norm": 0.0, - "learning_rate": 1.0711683358314298e-05, - "loss": 0.9301, + "learning_rate": 1.0733571887937726e-05, + "loss": 0.8784, "step": 17374 }, { - "epoch": 0.4930476730987514, + "epoch": 0.49236305931026664, "grad_norm": 0.0, - "learning_rate": 1.0710766607729983e-05, - "loss": 0.8438, + "learning_rate": 1.0732656568354938e-05, + "loss": 0.856, "step": 17375 }, { - "epoch": 0.49307604994324633, + "epoch": 0.49239139675252913, "grad_norm": 0.0, - "learning_rate": 1.0709849851141792e-05, - "loss": 0.9989, + "learning_rate": 1.0731741242600709e-05, + "loss": 0.9035, "step": 17376 }, { - "epoch": 0.4931044267877412, + "epoch": 0.49241973419479157, "grad_norm": 0.0, - "learning_rate": 1.0708933088557468e-05, - "loss": 0.9778, + "learning_rate": 1.0730825910682751e-05, + "loss": 0.8554, "step": 17377 }, { - "epoch": 0.4931328036322361, + "epoch": 0.49244807163705406, "grad_norm": 0.0, - "learning_rate": 1.0708016319984751e-05, - "loss": 1.011, + "learning_rate": 1.0729910572608776e-05, + "loss": 0.8431, "step": 17378 }, { - "epoch": 0.49316118047673096, + "epoch": 0.4924764090793165, "grad_norm": 0.0, - "learning_rate": 1.0707099545431388e-05, - "loss": 0.8192, + "learning_rate": 1.0728995228386496e-05, + "loss": 0.8955, "step": 17379 }, { - "epoch": 0.4931895573212259, + "epoch": 0.49250474652157894, "grad_norm": 0.0, - "learning_rate": 1.0706182764905124e-05, - "loss": 0.8413, + "learning_rate": 1.0728079878023617e-05, + "loss": 0.8156, "step": 17380 }, { - "epoch": 0.4932179341657208, + "epoch": 0.49253308396384143, "grad_norm": 0.0, - "learning_rate": 1.0705265978413703e-05, - "loss": 0.9194, + "learning_rate": 1.0727164521527848e-05, + "loss": 0.8939, "step": 17381 }, { - "epoch": 0.49324631101021565, + "epoch": 0.49256142140610387, "grad_norm": 0.0, - "learning_rate": 1.0704349185964868e-05, - "loss": 0.9533, + "learning_rate": 1.0726249158906908e-05, + "loss": 0.9092, "step": 17382 }, { - "epoch": 0.49327468785471057, + "epoch": 0.49258975884836637, "grad_norm": 0.0, - "learning_rate": 1.0703432387566367e-05, - "loss": 0.8894, + "learning_rate": 1.0725333790168496e-05, + "loss": 0.9064, "step": 17383 }, { - "epoch": 0.49330306469920543, + "epoch": 0.4926180962906288, "grad_norm": 0.0, - "learning_rate": 1.0702515583225936e-05, - "loss": 0.8316, + "learning_rate": 1.0724418415320328e-05, + "loss": 0.8605, "step": 17384 }, { - "epoch": 0.49333144154370034, + "epoch": 0.49264643373289124, "grad_norm": 0.0, - "learning_rate": 1.070159877295133e-05, - "loss": 0.8463, + "learning_rate": 1.0723503034370117e-05, + "loss": 0.949, "step": 17385 }, { - "epoch": 0.49335981838819526, + "epoch": 0.49267477117515374, "grad_norm": 0.0, - "learning_rate": 1.0700681956750282e-05, - "loss": 0.9068, + "learning_rate": 1.0722587647325573e-05, + "loss": 0.9611, "step": 17386 }, { - "epoch": 0.4933881952326901, + "epoch": 0.4927031086174162, "grad_norm": 0.0, - "learning_rate": 1.0699765134630544e-05, - "loss": 0.8876, + "learning_rate": 1.0721672254194404e-05, + "loss": 0.8323, "step": 17387 }, { - "epoch": 0.49341657207718503, + "epoch": 0.49273144605967867, "grad_norm": 0.0, - "learning_rate": 1.0698848306599858e-05, - "loss": 0.8811, + "learning_rate": 1.0720756854984322e-05, + "loss": 0.7914, "step": 17388 }, { - "epoch": 0.4934449489216799, + "epoch": 0.4927597835019411, "grad_norm": 0.0, - "learning_rate": 1.0697931472665972e-05, - "loss": 0.85, + "learning_rate": 1.0719841449703035e-05, + "loss": 0.8883, "step": 17389 }, { - "epoch": 0.4934733257661748, + "epoch": 0.4927881209442036, "grad_norm": 0.0, - "learning_rate": 1.0697014632836627e-05, - "loss": 0.9152, + "learning_rate": 1.0718926038358256e-05, + "loss": 0.8657, "step": 17390 }, { - "epoch": 0.49350170261066967, + "epoch": 0.49281645838646604, "grad_norm": 0.0, - "learning_rate": 1.0696097787119563e-05, - "loss": 0.9452, + "learning_rate": 1.0718010620957697e-05, + "loss": 0.8958, "step": 17391 }, { - "epoch": 0.4935300794551646, + "epoch": 0.4928447958287285, "grad_norm": 0.0, - "learning_rate": 1.0695180935522536e-05, - "loss": 0.9033, + "learning_rate": 1.0717095197509068e-05, + "loss": 0.91, "step": 17392 }, { - "epoch": 0.4935584562996595, + "epoch": 0.49287313327099097, "grad_norm": 0.0, - "learning_rate": 1.069426407805328e-05, - "loss": 1.02, + "learning_rate": 1.071617976802008e-05, + "loss": 0.9572, "step": 17393 }, { - "epoch": 0.49358683314415436, + "epoch": 0.4929014707132534, "grad_norm": 0.0, - "learning_rate": 1.0693347214719545e-05, - "loss": 0.8619, + "learning_rate": 1.0715264332498445e-05, + "loss": 0.9041, "step": 17394 }, { - "epoch": 0.4936152099886493, + "epoch": 0.4929298081555159, "grad_norm": 0.0, - "learning_rate": 1.0692430345529074e-05, - "loss": 0.8041, + "learning_rate": 1.0714348890951871e-05, + "loss": 0.8568, "step": 17395 }, { - "epoch": 0.49364358683314413, + "epoch": 0.49295814559777834, "grad_norm": 0.0, - "learning_rate": 1.0691513470489616e-05, - "loss": 0.9913, + "learning_rate": 1.071343344338807e-05, + "loss": 0.9041, "step": 17396 }, { - "epoch": 0.49367196367763905, + "epoch": 0.4929864830400408, "grad_norm": 0.0, - "learning_rate": 1.0690596589608912e-05, - "loss": 0.9246, + "learning_rate": 1.0712517989814754e-05, + "loss": 0.8568, "step": 17397 }, { - "epoch": 0.49370034052213396, + "epoch": 0.4930148204823033, "grad_norm": 0.0, - "learning_rate": 1.0689679702894704e-05, - "loss": 0.9018, + "learning_rate": 1.071160253023964e-05, + "loss": 0.9727, "step": 17398 }, { - "epoch": 0.4937287173666288, + "epoch": 0.4930431579245657, "grad_norm": 0.0, - "learning_rate": 1.0688762810354745e-05, - "loss": 0.9021, + "learning_rate": 1.071068706467043e-05, + "loss": 0.8282, "step": 17399 }, { - "epoch": 0.49375709421112374, + "epoch": 0.4930714953668282, "grad_norm": 0.0, - "learning_rate": 1.0687845911996775e-05, - "loss": 0.9115, + "learning_rate": 1.070977159311484e-05, + "loss": 0.9776, "step": 17400 }, { - "epoch": 0.4937854710556186, + "epoch": 0.49309983280909064, "grad_norm": 0.0, - "learning_rate": 1.0686929007828536e-05, - "loss": 0.9444, + "learning_rate": 1.0708856115580578e-05, + "loss": 0.966, "step": 17401 }, { - "epoch": 0.4938138479001135, + "epoch": 0.49312817025135314, "grad_norm": 0.0, - "learning_rate": 1.068601209785778e-05, - "loss": 0.9143, + "learning_rate": 1.070794063207536e-05, + "loss": 0.8448, "step": 17402 }, { - "epoch": 0.4938422247446084, + "epoch": 0.4931565076936156, "grad_norm": 0.0, - "learning_rate": 1.0685095182092246e-05, - "loss": 0.9286, + "learning_rate": 1.0707025142606893e-05, + "loss": 0.8852, "step": 17403 }, { - "epoch": 0.4938706015891033, + "epoch": 0.493184845135878, "grad_norm": 0.0, - "learning_rate": 1.0684178260539686e-05, - "loss": 0.9259, + "learning_rate": 1.0706109647182891e-05, + "loss": 0.9413, "step": 17404 }, { - "epoch": 0.4938989784335982, + "epoch": 0.4932131825781405, "grad_norm": 0.0, - "learning_rate": 1.0683261333207837e-05, - "loss": 0.9288, + "learning_rate": 1.0705194145811066e-05, + "loss": 0.8838, "step": 17405 }, { - "epoch": 0.49392735527809306, + "epoch": 0.49324152002040295, "grad_norm": 0.0, - "learning_rate": 1.0682344400104454e-05, - "loss": 0.9109, + "learning_rate": 1.0704278638499128e-05, + "loss": 0.8077, "step": 17406 }, { - "epoch": 0.493955732122588, + "epoch": 0.49326985746266544, "grad_norm": 0.0, - "learning_rate": 1.0681427461237275e-05, - "loss": 0.774, + "learning_rate": 1.0703363125254792e-05, + "loss": 0.8053, "step": 17407 }, { - "epoch": 0.49398410896708284, + "epoch": 0.4932981949049279, "grad_norm": 0.0, - "learning_rate": 1.0680510516614047e-05, - "loss": 0.8892, + "learning_rate": 1.0702447606085767e-05, + "loss": 0.7859, "step": 17408 }, { - "epoch": 0.49401248581157775, + "epoch": 0.4933265323471903, "grad_norm": 0.0, - "learning_rate": 1.0679593566242514e-05, - "loss": 0.818, + "learning_rate": 1.0701532080999762e-05, + "loss": 0.8795, "step": 17409 }, { - "epoch": 0.49404086265607267, + "epoch": 0.4933548697894528, "grad_norm": 0.0, - "learning_rate": 1.0678676610130427e-05, - "loss": 0.908, + "learning_rate": 1.0700616550004492e-05, + "loss": 0.8985, "step": 17410 }, { - "epoch": 0.4940692395005675, + "epoch": 0.49338320723171525, "grad_norm": 0.0, - "learning_rate": 1.0677759648285529e-05, - "loss": 0.9073, + "learning_rate": 1.069970101310767e-05, + "loss": 0.9544, "step": 17411 }, { - "epoch": 0.49409761634506244, + "epoch": 0.49341154467397774, "grad_norm": 0.0, - "learning_rate": 1.067684268071556e-05, - "loss": 1.0313, + "learning_rate": 1.0698785470317008e-05, + "loss": 0.8958, "step": 17412 }, { - "epoch": 0.4941259931895573, + "epoch": 0.4934398821162402, "grad_norm": 0.0, - "learning_rate": 1.0675925707428274e-05, - "loss": 0.7686, + "learning_rate": 1.0697869921640216e-05, + "loss": 0.9889, "step": 17413 }, { - "epoch": 0.4941543700340522, + "epoch": 0.4934682195585027, "grad_norm": 0.0, - "learning_rate": 1.0675008728431415e-05, - "loss": 0.9192, + "learning_rate": 1.0696954367085004e-05, + "loss": 0.7845, "step": 17414 }, { - "epoch": 0.49418274687854713, + "epoch": 0.4934965570007651, "grad_norm": 0.0, - "learning_rate": 1.0674091743732721e-05, - "loss": 0.9421, + "learning_rate": 1.0696038806659087e-05, + "loss": 0.8928, "step": 17415 }, { - "epoch": 0.494211123723042, + "epoch": 0.49352489444302755, "grad_norm": 0.0, - "learning_rate": 1.067317475333995e-05, - "loss": 0.9772, + "learning_rate": 1.0695123240370178e-05, + "loss": 0.8627, "step": 17416 }, { - "epoch": 0.4942395005675369, + "epoch": 0.49355323188529004, "grad_norm": 0.0, - "learning_rate": 1.0672257757260838e-05, - "loss": 0.813, + "learning_rate": 1.0694207668225989e-05, + "loss": 1.0263, "step": 17417 }, { - "epoch": 0.49426787741203176, + "epoch": 0.4935815693275525, "grad_norm": 0.0, - "learning_rate": 1.0671340755503135e-05, - "loss": 0.9408, + "learning_rate": 1.0693292090234228e-05, + "loss": 0.952, "step": 17418 }, { - "epoch": 0.4942962542565267, + "epoch": 0.493609906769815, "grad_norm": 0.0, - "learning_rate": 1.0670423748074586e-05, - "loss": 0.8319, + "learning_rate": 1.0692376506402614e-05, + "loss": 0.9047, "step": 17419 }, { - "epoch": 0.49432463110102154, + "epoch": 0.4936382442120774, "grad_norm": 0.0, - "learning_rate": 1.066950673498294e-05, - "loss": 0.895, + "learning_rate": 1.0691460916738854e-05, + "loss": 0.898, "step": 17420 }, { - "epoch": 0.49435300794551645, + "epoch": 0.49366658165433985, "grad_norm": 0.0, - "learning_rate": 1.0668589716235937e-05, - "loss": 0.8434, + "learning_rate": 1.069054532125066e-05, + "loss": 0.8164, "step": 17421 }, { - "epoch": 0.49438138479001137, + "epoch": 0.49369491909660235, "grad_norm": 0.0, - "learning_rate": 1.0667672691841329e-05, - "loss": 0.9223, + "learning_rate": 1.0689629719945746e-05, + "loss": 0.8918, "step": 17422 }, { - "epoch": 0.49440976163450623, + "epoch": 0.4937232565388648, "grad_norm": 0.0, - "learning_rate": 1.066675566180686e-05, - "loss": 0.7967, + "learning_rate": 1.0688714112831826e-05, + "loss": 0.8499, "step": 17423 }, { - "epoch": 0.49443813847900114, + "epoch": 0.4937515939811273, "grad_norm": 0.0, - "learning_rate": 1.0665838626140274e-05, - "loss": 0.8811, + "learning_rate": 1.0687798499916613e-05, + "loss": 0.8918, "step": 17424 }, { - "epoch": 0.494466515323496, + "epoch": 0.4937799314233897, "grad_norm": 0.0, - "learning_rate": 1.0664921584849317e-05, - "loss": 0.9686, + "learning_rate": 1.0686882881207818e-05, + "loss": 0.8966, "step": 17425 }, { - "epoch": 0.4944948921679909, + "epoch": 0.4938082688656522, "grad_norm": 0.0, - "learning_rate": 1.0664004537941744e-05, - "loss": 0.8712, + "learning_rate": 1.068596725671315e-05, + "loss": 0.9552, "step": 17426 }, { - "epoch": 0.49452326901248583, + "epoch": 0.49383660630791465, "grad_norm": 0.0, - "learning_rate": 1.0663087485425293e-05, - "loss": 0.9066, + "learning_rate": 1.0685051626440328e-05, + "loss": 0.7915, "step": 17427 }, { - "epoch": 0.4945516458569807, + "epoch": 0.4938649437501771, "grad_norm": 0.0, - "learning_rate": 1.0662170427307709e-05, - "loss": 0.9561, + "learning_rate": 1.0684135990397062e-05, + "loss": 0.953, "step": 17428 }, { - "epoch": 0.4945800227014756, + "epoch": 0.4938932811924396, "grad_norm": 0.0, - "learning_rate": 1.0661253363596742e-05, - "loss": 0.8249, + "learning_rate": 1.068322034859106e-05, + "loss": 0.8438, "step": 17429 }, { - "epoch": 0.49460839954597047, + "epoch": 0.493921618634702, "grad_norm": 0.0, - "learning_rate": 1.0660336294300143e-05, - "loss": 0.9385, + "learning_rate": 1.0682304701030044e-05, + "loss": 0.9071, "step": 17430 }, { - "epoch": 0.4946367763904654, + "epoch": 0.4939499560769645, "grad_norm": 0.0, - "learning_rate": 1.0659419219425649e-05, - "loss": 0.8125, + "learning_rate": 1.0681389047721722e-05, + "loss": 0.8487, "step": 17431 }, { - "epoch": 0.4946651532349603, + "epoch": 0.49397829351922695, "grad_norm": 0.0, - "learning_rate": 1.0658502138981009e-05, - "loss": 0.7705, + "learning_rate": 1.0680473388673807e-05, + "loss": 0.9224, "step": 17432 }, { - "epoch": 0.49469353007945516, + "epoch": 0.4940066309614894, "grad_norm": 0.0, - "learning_rate": 1.0657585052973976e-05, - "loss": 0.8751, + "learning_rate": 1.0679557723894009e-05, + "loss": 0.9199, "step": 17433 }, { - "epoch": 0.49472190692395007, + "epoch": 0.4940349684037519, "grad_norm": 0.0, - "learning_rate": 1.0656667961412293e-05, - "loss": 0.8826, + "learning_rate": 1.0678642053390045e-05, + "loss": 0.8513, "step": 17434 }, { - "epoch": 0.49475028376844493, + "epoch": 0.4940633058460143, "grad_norm": 0.0, - "learning_rate": 1.0655750864303702e-05, - "loss": 0.9128, + "learning_rate": 1.0677726377169628e-05, + "loss": 0.8946, "step": 17435 }, { - "epoch": 0.49477866061293985, + "epoch": 0.4940916432882768, "grad_norm": 0.0, - "learning_rate": 1.0654833761655958e-05, - "loss": 0.894, + "learning_rate": 1.0676810695240469e-05, + "loss": 0.9144, "step": 17436 }, { - "epoch": 0.4948070374574347, + "epoch": 0.49411998073053925, "grad_norm": 0.0, - "learning_rate": 1.0653916653476803e-05, - "loss": 0.8831, + "learning_rate": 1.0675895007610285e-05, + "loss": 0.8893, "step": 17437 }, { - "epoch": 0.4948354143019296, + "epoch": 0.49414831817280175, "grad_norm": 0.0, - "learning_rate": 1.0652999539773985e-05, - "loss": 0.7984, + "learning_rate": 1.0674979314286782e-05, + "loss": 0.8888, "step": 17438 }, { - "epoch": 0.49486379114642454, + "epoch": 0.4941766556150642, "grad_norm": 0.0, - "learning_rate": 1.065208242055525e-05, - "loss": 0.9499, + "learning_rate": 1.0674063615277681e-05, + "loss": 0.881, "step": 17439 }, { - "epoch": 0.4948921679909194, + "epoch": 0.4942049930573266, "grad_norm": 0.0, - "learning_rate": 1.0651165295828342e-05, - "loss": 0.8949, + "learning_rate": 1.0673147910590691e-05, + "loss": 0.8971, "step": 17440 }, { - "epoch": 0.4949205448354143, + "epoch": 0.4942333304995891, "grad_norm": 0.0, - "learning_rate": 1.0650248165601018e-05, - "loss": 0.8046, + "learning_rate": 1.0672232200233525e-05, + "loss": 0.9067, "step": 17441 }, { - "epoch": 0.49494892167990917, + "epoch": 0.49426166794185156, "grad_norm": 0.0, - "learning_rate": 1.0649331029881013e-05, - "loss": 0.9124, + "learning_rate": 1.0671316484213899e-05, + "loss": 0.9216, "step": 17442 }, { - "epoch": 0.4949772985244041, + "epoch": 0.49429000538411405, "grad_norm": 0.0, - "learning_rate": 1.0648413888676084e-05, - "loss": 0.8559, + "learning_rate": 1.0670400762539524e-05, + "loss": 0.9093, "step": 17443 }, { - "epoch": 0.495005675368899, + "epoch": 0.4943183428263765, "grad_norm": 0.0, - "learning_rate": 1.0647496741993972e-05, - "loss": 0.8252, + "learning_rate": 1.0669485035218114e-05, + "loss": 0.8804, "step": 17444 }, { - "epoch": 0.49503405221339386, + "epoch": 0.4943466802686389, "grad_norm": 0.0, - "learning_rate": 1.0646579589842423e-05, - "loss": 0.9025, + "learning_rate": 1.0668569302257385e-05, + "loss": 0.9759, "step": 17445 }, { - "epoch": 0.4950624290578888, + "epoch": 0.4943750177109014, "grad_norm": 0.0, - "learning_rate": 1.0645662432229192e-05, - "loss": 0.9508, + "learning_rate": 1.0667653563665049e-05, + "loss": 0.8889, "step": 17446 }, { - "epoch": 0.49509080590238363, + "epoch": 0.49440335515316386, "grad_norm": 0.0, - "learning_rate": 1.064474526916202e-05, - "loss": 0.8962, + "learning_rate": 1.0666737819448816e-05, + "loss": 0.9833, "step": 17447 }, { - "epoch": 0.49511918274687855, + "epoch": 0.49443169259542635, "grad_norm": 0.0, - "learning_rate": 1.0643828100648652e-05, - "loss": 0.9047, + "learning_rate": 1.0665822069616404e-05, + "loss": 0.8474, "step": 17448 }, { - "epoch": 0.49514755959137347, + "epoch": 0.4944600300376888, "grad_norm": 0.0, - "learning_rate": 1.0642910926696842e-05, - "loss": 0.9558, + "learning_rate": 1.0664906314175525e-05, + "loss": 0.8934, "step": 17449 }, { - "epoch": 0.4951759364358683, + "epoch": 0.4944883674799513, "grad_norm": 0.0, - "learning_rate": 1.0641993747314335e-05, - "loss": 0.8393, + "learning_rate": 1.0663990553133896e-05, + "loss": 0.9157, "step": 17450 }, { - "epoch": 0.49520431328036324, + "epoch": 0.4945167049222137, "grad_norm": 0.0, - "learning_rate": 1.0641076562508878e-05, - "loss": 0.9547, + "learning_rate": 1.0663074786499223e-05, + "loss": 0.9613, "step": 17451 }, { - "epoch": 0.4952326901248581, + "epoch": 0.49454504236447616, "grad_norm": 0.0, - "learning_rate": 1.0640159372288217e-05, - "loss": 0.8514, + "learning_rate": 1.066215901427923e-05, + "loss": 0.8685, "step": 17452 }, { - "epoch": 0.495261066969353, + "epoch": 0.49457337980673866, "grad_norm": 0.0, - "learning_rate": 1.0639242176660103e-05, - "loss": 0.9447, + "learning_rate": 1.0661243236481624e-05, + "loss": 1.0286, "step": 17453 }, { - "epoch": 0.4952894438138479, + "epoch": 0.4946017172490011, "grad_norm": 0.0, - "learning_rate": 1.063832497563228e-05, - "loss": 0.9198, + "learning_rate": 1.0660327453114118e-05, + "loss": 0.8534, "step": 17454 }, { - "epoch": 0.4953178206583428, + "epoch": 0.4946300546912636, "grad_norm": 0.0, - "learning_rate": 1.0637407769212495e-05, - "loss": 0.9359, + "learning_rate": 1.065941166418443e-05, + "loss": 0.8701, "step": 17455 }, { - "epoch": 0.4953461975028377, + "epoch": 0.494658392133526, "grad_norm": 0.0, - "learning_rate": 1.0636490557408501e-05, - "loss": 0.9502, + "learning_rate": 1.0658495869700273e-05, + "loss": 0.8958, "step": 17456 }, { - "epoch": 0.49537457434733256, + "epoch": 0.49468672957578846, "grad_norm": 0.0, - "learning_rate": 1.0635573340228042e-05, - "loss": 0.8166, + "learning_rate": 1.0657580069669363e-05, + "loss": 0.8297, "step": 17457 }, { - "epoch": 0.4954029511918275, + "epoch": 0.49471506701805096, "grad_norm": 0.0, - "learning_rate": 1.0634656117678868e-05, - "loss": 0.8348, + "learning_rate": 1.065666426409941e-05, + "loss": 0.9035, "step": 17458 }, { - "epoch": 0.49543132803632234, + "epoch": 0.4947434044603134, "grad_norm": 0.0, - "learning_rate": 1.0633738889768723e-05, - "loss": 0.9899, + "learning_rate": 1.065574845299813e-05, + "loss": 0.8858, "step": 17459 }, { - "epoch": 0.49545970488081725, + "epoch": 0.4947717419025759, "grad_norm": 0.0, - "learning_rate": 1.063282165650536e-05, - "loss": 0.8425, + "learning_rate": 1.0654832636373239e-05, + "loss": 0.9017, "step": 17460 }, { - "epoch": 0.49548808172531217, + "epoch": 0.49480007934483833, "grad_norm": 0.0, - "learning_rate": 1.0631904417896522e-05, - "loss": 0.9355, + "learning_rate": 1.0653916814232445e-05, + "loss": 0.9495, "step": 17461 }, { - "epoch": 0.49551645856980703, + "epoch": 0.4948284167871008, "grad_norm": 0.0, - "learning_rate": 1.0630987173949959e-05, - "loss": 0.9156, + "learning_rate": 1.0653000986583471e-05, + "loss": 0.9503, "step": 17462 }, { - "epoch": 0.49554483541430194, + "epoch": 0.49485675422936326, "grad_norm": 0.0, - "learning_rate": 1.0630069924673416e-05, - "loss": 0.9311, + "learning_rate": 1.0652085153434025e-05, + "loss": 0.892, "step": 17463 }, { - "epoch": 0.4955732122587968, + "epoch": 0.4948850916716257, "grad_norm": 0.0, - "learning_rate": 1.0629152670074648e-05, - "loss": 0.8501, + "learning_rate": 1.0651169314791825e-05, + "loss": 0.9841, "step": 17464 }, { - "epoch": 0.4956015891032917, + "epoch": 0.4949134291138882, "grad_norm": 0.0, - "learning_rate": 1.0628235410161398e-05, - "loss": 0.8651, + "learning_rate": 1.0650253470664584e-05, + "loss": 0.9776, "step": 17465 }, { - "epoch": 0.49562996594778663, + "epoch": 0.49494176655615063, "grad_norm": 0.0, - "learning_rate": 1.0627318144941415e-05, - "loss": 0.9953, + "learning_rate": 1.0649337621060018e-05, + "loss": 0.8262, "step": 17466 }, { - "epoch": 0.4956583427922815, + "epoch": 0.4949701039984131, "grad_norm": 0.0, - "learning_rate": 1.062640087442245e-05, - "loss": 0.9477, + "learning_rate": 1.0648421765985837e-05, + "loss": 0.7948, "step": 17467 }, { - "epoch": 0.4956867196367764, + "epoch": 0.49499844144067556, "grad_norm": 0.0, - "learning_rate": 1.0625483598612245e-05, - "loss": 0.9512, + "learning_rate": 1.0647505905449758e-05, + "loss": 0.8946, "step": 17468 }, { - "epoch": 0.49571509648127127, + "epoch": 0.495026778882938, "grad_norm": 0.0, - "learning_rate": 1.0624566317518552e-05, - "loss": 0.8416, + "learning_rate": 1.0646590039459499e-05, + "loss": 0.8517, "step": 17469 }, { - "epoch": 0.4957434733257662, + "epoch": 0.4950551163252005, "grad_norm": 0.0, - "learning_rate": 1.0623649031149122e-05, - "loss": 0.9006, + "learning_rate": 1.0645674168022772e-05, + "loss": 0.9438, "step": 17470 }, { - "epoch": 0.49577185017026104, + "epoch": 0.49508345376746293, "grad_norm": 0.0, - "learning_rate": 1.0622731739511699e-05, - "loss": 0.894, + "learning_rate": 1.0644758291147293e-05, + "loss": 0.9037, "step": 17471 }, { - "epoch": 0.49580022701475596, + "epoch": 0.4951117912097254, "grad_norm": 0.0, - "learning_rate": 1.0621814442614033e-05, - "loss": 0.9303, + "learning_rate": 1.0643842408840772e-05, + "loss": 0.9014, "step": 17472 }, { - "epoch": 0.49582860385925087, + "epoch": 0.49514012865198787, "grad_norm": 0.0, - "learning_rate": 1.062089714046387e-05, - "loss": 0.8883, + "learning_rate": 1.064292652111093e-05, + "loss": 0.9068, "step": 17473 }, { - "epoch": 0.49585698070374573, + "epoch": 0.49516846609425036, "grad_norm": 0.0, - "learning_rate": 1.0619979833068965e-05, - "loss": 0.7952, + "learning_rate": 1.0642010627965475e-05, + "loss": 0.7574, "step": 17474 }, { - "epoch": 0.49588535754824065, + "epoch": 0.4951968035365128, "grad_norm": 0.0, - "learning_rate": 1.0619062520437062e-05, - "loss": 0.8808, + "learning_rate": 1.0641094729412132e-05, + "loss": 0.9088, "step": 17475 }, { - "epoch": 0.4959137343927355, + "epoch": 0.49522514097877524, "grad_norm": 0.0, - "learning_rate": 1.0618145202575909e-05, - "loss": 0.8537, + "learning_rate": 1.0640178825458605e-05, + "loss": 0.8445, "step": 17476 }, { - "epoch": 0.4959421112372304, + "epoch": 0.49525347842103773, "grad_norm": 0.0, - "learning_rate": 1.0617227879493257e-05, - "loss": 0.8716, + "learning_rate": 1.0639262916112615e-05, + "loss": 0.9414, "step": 17477 }, { - "epoch": 0.49597048808172534, + "epoch": 0.49528181586330017, "grad_norm": 0.0, - "learning_rate": 1.061631055119685e-05, - "loss": 0.9143, + "learning_rate": 1.063834700138188e-05, + "loss": 0.8362, "step": 17478 }, { - "epoch": 0.4959988649262202, + "epoch": 0.49531015330556266, "grad_norm": 0.0, - "learning_rate": 1.0615393217694441e-05, - "loss": 0.942, + "learning_rate": 1.0637431081274108e-05, + "loss": 0.8492, "step": 17479 }, { - "epoch": 0.4960272417707151, + "epoch": 0.4953384907478251, "grad_norm": 0.0, - "learning_rate": 1.061447587899378e-05, - "loss": 0.8954, + "learning_rate": 1.0636515155797018e-05, + "loss": 0.8403, "step": 17480 }, { - "epoch": 0.49605561861520997, + "epoch": 0.49536682819008754, "grad_norm": 0.0, - "learning_rate": 1.0613558535102613e-05, - "loss": 0.8922, + "learning_rate": 1.0635599224958321e-05, + "loss": 0.9441, "step": 17481 }, { - "epoch": 0.4960839954597049, + "epoch": 0.49539516563235003, "grad_norm": 0.0, - "learning_rate": 1.0612641186028692e-05, - "loss": 0.9243, + "learning_rate": 1.0634683288765741e-05, + "loss": 0.8686, "step": 17482 }, { - "epoch": 0.4961123723041998, + "epoch": 0.49542350307461247, "grad_norm": 0.0, - "learning_rate": 1.0611723831779756e-05, - "loss": 0.7891, + "learning_rate": 1.0633767347226987e-05, + "loss": 0.9173, "step": 17483 }, { - "epoch": 0.49614074914869466, + "epoch": 0.49545184051687496, "grad_norm": 0.0, - "learning_rate": 1.0610806472363568e-05, - "loss": 0.7665, + "learning_rate": 1.063285140034977e-05, + "loss": 0.9017, "step": 17484 }, { - "epoch": 0.4961691259931896, + "epoch": 0.4954801779591374, "grad_norm": 0.0, - "learning_rate": 1.060988910778787e-05, - "loss": 0.8056, + "learning_rate": 1.0631935448141817e-05, + "loss": 0.8445, "step": 17485 }, { - "epoch": 0.49619750283768443, + "epoch": 0.4955085154013999, "grad_norm": 0.0, - "learning_rate": 1.0608971738060405e-05, - "loss": 0.9095, + "learning_rate": 1.0631019490610837e-05, + "loss": 0.9721, "step": 17486 }, { - "epoch": 0.49622587968217935, + "epoch": 0.49553685284366233, "grad_norm": 0.0, - "learning_rate": 1.0608054363188934e-05, - "loss": 0.8696, + "learning_rate": 1.0630103527764542e-05, + "loss": 0.9216, "step": 17487 }, { - "epoch": 0.4962542565266742, + "epoch": 0.4955651902859248, "grad_norm": 0.0, - "learning_rate": 1.06071369831812e-05, - "loss": 0.8684, + "learning_rate": 1.0629187559610649e-05, + "loss": 0.8376, "step": 17488 }, { - "epoch": 0.4962826333711691, + "epoch": 0.49559352772818727, "grad_norm": 0.0, - "learning_rate": 1.0606219598044952e-05, - "loss": 0.9961, + "learning_rate": 1.062827158615688e-05, + "loss": 0.9373, "step": 17489 }, { - "epoch": 0.49631101021566404, + "epoch": 0.4956218651704497, "grad_norm": 0.0, - "learning_rate": 1.060530220778794e-05, - "loss": 0.9049, + "learning_rate": 1.0627355607410948e-05, + "loss": 0.8044, "step": 17490 }, { - "epoch": 0.4963393870601589, + "epoch": 0.4956502026127122, "grad_norm": 0.0, - "learning_rate": 1.0604384812417912e-05, - "loss": 0.8524, + "learning_rate": 1.0626439623380562e-05, + "loss": 0.8684, "step": 17491 }, { - "epoch": 0.4963677639046538, + "epoch": 0.49567854005497464, "grad_norm": 0.0, - "learning_rate": 1.0603467411942618e-05, - "loss": 0.7971, + "learning_rate": 1.0625523634073445e-05, + "loss": 0.8942, "step": 17492 }, { - "epoch": 0.4963961407491487, + "epoch": 0.4957068774972371, "grad_norm": 0.0, - "learning_rate": 1.0602550006369809e-05, - "loss": 1.0152, + "learning_rate": 1.062460763949731e-05, + "loss": 0.8695, "step": 17493 }, { - "epoch": 0.4964245175936436, + "epoch": 0.49573521493949957, "grad_norm": 0.0, - "learning_rate": 1.0601632595707233e-05, - "loss": 0.8514, + "learning_rate": 1.062369163965987e-05, + "loss": 0.8266, "step": 17494 }, { - "epoch": 0.4964528944381385, + "epoch": 0.495763552381762, "grad_norm": 0.0, - "learning_rate": 1.060071517996264e-05, - "loss": 0.9232, + "learning_rate": 1.0622775634568847e-05, + "loss": 0.9054, "step": 17495 }, { - "epoch": 0.49648127128263336, + "epoch": 0.4957918898240245, "grad_norm": 0.0, - "learning_rate": 1.0599797759143775e-05, - "loss": 0.8208, + "learning_rate": 1.0621859624231952e-05, + "loss": 0.8392, "step": 17496 }, { - "epoch": 0.4965096481271283, + "epoch": 0.49582022726628694, "grad_norm": 0.0, - "learning_rate": 1.0598880333258394e-05, - "loss": 0.8249, + "learning_rate": 1.0620943608656901e-05, + "loss": 0.88, "step": 17497 }, { - "epoch": 0.49653802497162314, + "epoch": 0.49584856470854943, "grad_norm": 0.0, - "learning_rate": 1.0597962902314248e-05, - "loss": 0.7915, + "learning_rate": 1.0620027587851417e-05, + "loss": 0.9736, "step": 17498 }, { - "epoch": 0.49656640181611805, + "epoch": 0.49587690215081187, "grad_norm": 0.0, - "learning_rate": 1.0597045466319076e-05, - "loss": 0.9446, + "learning_rate": 1.0619111561823208e-05, + "loss": 0.9864, "step": 17499 }, { - "epoch": 0.4965947786606129, + "epoch": 0.4959052395930743, "grad_norm": 0.0, - "learning_rate": 1.0596128025280637e-05, - "loss": 0.8212, + "learning_rate": 1.0618195530579989e-05, + "loss": 0.9095, "step": 17500 }, { - "epoch": 0.4966231555051078, + "epoch": 0.4959335770353368, "grad_norm": 0.0, - "learning_rate": 1.0595210579206676e-05, - "loss": 0.9082, + "learning_rate": 1.061727949412948e-05, + "loss": 0.9478, "step": 17501 }, { - "epoch": 0.49665153234960274, + "epoch": 0.49596191447759924, "grad_norm": 0.0, - "learning_rate": 1.0594293128104947e-05, - "loss": 0.9115, + "learning_rate": 1.0616363452479399e-05, + "loss": 0.8228, "step": 17502 }, { - "epoch": 0.4966799091940976, + "epoch": 0.49599025191986174, "grad_norm": 0.0, - "learning_rate": 1.0593375671983195e-05, - "loss": 0.8291, + "learning_rate": 1.061544740563746e-05, + "loss": 0.9563, "step": 17503 }, { - "epoch": 0.4967082860385925, + "epoch": 0.4960185893621242, "grad_norm": 0.0, - "learning_rate": 1.0592458210849175e-05, - "loss": 0.8715, + "learning_rate": 1.061453135361138e-05, + "loss": 0.927, "step": 17504 }, { - "epoch": 0.4967366628830874, + "epoch": 0.4960469268043866, "grad_norm": 0.0, - "learning_rate": 1.0591540744710633e-05, - "loss": 1.0117, + "learning_rate": 1.061361529640887e-05, + "loss": 0.9205, "step": 17505 }, { - "epoch": 0.4967650397275823, + "epoch": 0.4960752642466491, "grad_norm": 0.0, - "learning_rate": 1.0590623273575317e-05, - "loss": 0.9682, + "learning_rate": 1.0612699234037653e-05, + "loss": 0.926, "step": 17506 }, { - "epoch": 0.4967934165720772, + "epoch": 0.49610360168891154, "grad_norm": 0.0, - "learning_rate": 1.0589705797450983e-05, - "loss": 0.8602, + "learning_rate": 1.061178316650544e-05, + "loss": 0.9459, "step": 17507 }, { - "epoch": 0.49682179341657207, + "epoch": 0.49613193913117404, "grad_norm": 0.0, - "learning_rate": 1.0588788316345377e-05, - "loss": 0.9501, + "learning_rate": 1.0610867093819954e-05, + "loss": 0.8579, "step": 17508 }, { - "epoch": 0.496850170261067, + "epoch": 0.4961602765734365, "grad_norm": 0.0, - "learning_rate": 1.0587870830266247e-05, - "loss": 0.875, + "learning_rate": 1.0609951015988907e-05, + "loss": 0.8993, "step": 17509 }, { - "epoch": 0.49687854710556184, + "epoch": 0.49618861401569897, "grad_norm": 0.0, - "learning_rate": 1.0586953339221346e-05, - "loss": 0.8641, + "learning_rate": 1.0609034933020015e-05, + "loss": 0.8738, "step": 17510 }, { - "epoch": 0.49690692395005676, + "epoch": 0.4962169514579614, "grad_norm": 0.0, - "learning_rate": 1.0586035843218427e-05, - "loss": 0.8058, + "learning_rate": 1.0608118844920996e-05, + "loss": 0.8857, "step": 17511 }, { - "epoch": 0.49693530079455167, + "epoch": 0.49624528890022385, "grad_norm": 0.0, - "learning_rate": 1.0585118342265235e-05, - "loss": 0.9603, + "learning_rate": 1.0607202751699568e-05, + "loss": 0.9263, "step": 17512 }, { - "epoch": 0.49696367763904653, + "epoch": 0.49627362634248634, "grad_norm": 0.0, - "learning_rate": 1.058420083636952e-05, - "loss": 0.842, + "learning_rate": 1.0606286653363442e-05, + "loss": 0.8979, "step": 17513 }, { - "epoch": 0.49699205448354145, + "epoch": 0.4963019637847488, "grad_norm": 0.0, - "learning_rate": 1.0583283325539037e-05, - "loss": 0.936, + "learning_rate": 1.060537054992034e-05, + "loss": 0.7812, "step": 17514 }, { - "epoch": 0.4970204313280363, + "epoch": 0.4963303012270113, "grad_norm": 0.0, - "learning_rate": 1.0582365809781536e-05, - "loss": 0.8769, + "learning_rate": 1.0604454441377978e-05, + "loss": 0.926, "step": 17515 }, { - "epoch": 0.4970488081725312, + "epoch": 0.4963586386692737, "grad_norm": 0.0, - "learning_rate": 1.0581448289104759e-05, - "loss": 0.874, + "learning_rate": 1.0603538327744071e-05, + "loss": 0.8643, "step": 17516 }, { - "epoch": 0.4970771850170261, + "epoch": 0.49638697611153615, "grad_norm": 0.0, - "learning_rate": 1.0580530763516466e-05, - "loss": 0.8123, + "learning_rate": 1.0602622209026336e-05, + "loss": 0.8835, "step": 17517 }, { - "epoch": 0.497105561861521, + "epoch": 0.49641531355379864, "grad_norm": 0.0, - "learning_rate": 1.0579613233024402e-05, - "loss": 0.9305, + "learning_rate": 1.0601706085232492e-05, + "loss": 0.9256, "step": 17518 }, { - "epoch": 0.4971339387060159, + "epoch": 0.4964436509960611, "grad_norm": 0.0, - "learning_rate": 1.057869569763632e-05, - "loss": 0.7893, + "learning_rate": 1.0600789956370254e-05, + "loss": 0.8222, "step": 17519 }, { - "epoch": 0.49716231555051077, + "epoch": 0.4964719884383236, "grad_norm": 0.0, - "learning_rate": 1.0577778157359969e-05, - "loss": 0.9519, + "learning_rate": 1.0599873822447338e-05, + "loss": 0.7586, "step": 17520 }, { - "epoch": 0.4971906923950057, + "epoch": 0.496500325880586, "grad_norm": 0.0, - "learning_rate": 1.05768606122031e-05, - "loss": 0.9591, + "learning_rate": 1.059895768347146e-05, + "loss": 0.8725, "step": 17521 }, { - "epoch": 0.49721906923950054, + "epoch": 0.4965286633228485, "grad_norm": 0.0, - "learning_rate": 1.0575943062173464e-05, - "loss": 0.8288, + "learning_rate": 1.0598041539450344e-05, + "loss": 0.8594, "step": 17522 }, { - "epoch": 0.49724744608399546, + "epoch": 0.49655700076511095, "grad_norm": 0.0, - "learning_rate": 1.057502550727881e-05, - "loss": 0.918, + "learning_rate": 1.0597125390391697e-05, + "loss": 0.8992, "step": 17523 }, { - "epoch": 0.4972758229284904, + "epoch": 0.4965853382073734, "grad_norm": 0.0, - "learning_rate": 1.0574107947526888e-05, - "loss": 0.9733, + "learning_rate": 1.0596209236303246e-05, + "loss": 0.9024, "step": 17524 }, { - "epoch": 0.49730419977298523, + "epoch": 0.4966136756496359, "grad_norm": 0.0, - "learning_rate": 1.0573190382925454e-05, - "loss": 0.8064, + "learning_rate": 1.0595293077192699e-05, + "loss": 0.8262, "step": 17525 }, { - "epoch": 0.49733257661748015, + "epoch": 0.4966420130918983, "grad_norm": 0.0, - "learning_rate": 1.0572272813482254e-05, - "loss": 0.8976, + "learning_rate": 1.059437691306778e-05, + "loss": 0.8913, "step": 17526 }, { - "epoch": 0.497360953461975, + "epoch": 0.4966703505341608, "grad_norm": 0.0, - "learning_rate": 1.0571355239205037e-05, - "loss": 0.8111, + "learning_rate": 1.0593460743936202e-05, + "loss": 0.908, "step": 17527 }, { - "epoch": 0.4973893303064699, + "epoch": 0.49669868797642325, "grad_norm": 0.0, - "learning_rate": 1.057043766010156e-05, - "loss": 0.8131, + "learning_rate": 1.0592544569805685e-05, + "loss": 0.9221, "step": 17528 }, { - "epoch": 0.49741770715096484, + "epoch": 0.4967270254186857, "grad_norm": 0.0, - "learning_rate": 1.0569520076179569e-05, - "loss": 0.9375, + "learning_rate": 1.0591628390683945e-05, + "loss": 0.8964, "step": 17529 }, { - "epoch": 0.4974460839954597, + "epoch": 0.4967553628609482, "grad_norm": 0.0, - "learning_rate": 1.0568602487446817e-05, - "loss": 0.8814, + "learning_rate": 1.0590712206578698e-05, + "loss": 0.8664, "step": 17530 }, { - "epoch": 0.4974744608399546, + "epoch": 0.4967837003032106, "grad_norm": 0.0, - "learning_rate": 1.0567684893911054e-05, - "loss": 0.8952, + "learning_rate": 1.0589796017497665e-05, + "loss": 0.9329, "step": 17531 }, { - "epoch": 0.4975028376844495, + "epoch": 0.4968120377454731, "grad_norm": 0.0, - "learning_rate": 1.056676729558003e-05, - "loss": 0.767, + "learning_rate": 1.0588879823448559e-05, + "loss": 0.8345, "step": 17532 }, { - "epoch": 0.4975312145289444, + "epoch": 0.49684037518773555, "grad_norm": 0.0, - "learning_rate": 1.0565849692461497e-05, - "loss": 0.8863, + "learning_rate": 1.0587963624439099e-05, + "loss": 0.8843, "step": 17533 }, { - "epoch": 0.49755959137343925, + "epoch": 0.49686871262999804, "grad_norm": 0.0, - "learning_rate": 1.056493208456321e-05, - "loss": 0.8694, + "learning_rate": 1.0587047420477003e-05, + "loss": 0.8587, "step": 17534 }, { - "epoch": 0.49758796821793416, + "epoch": 0.4968970500722605, "grad_norm": 0.0, - "learning_rate": 1.0564014471892911e-05, - "loss": 0.9236, + "learning_rate": 1.0586131211569992e-05, + "loss": 0.9409, "step": 17535 }, { - "epoch": 0.4976163450624291, + "epoch": 0.4969253875145229, "grad_norm": 0.0, - "learning_rate": 1.0563096854458361e-05, - "loss": 0.9843, + "learning_rate": 1.0585214997725778e-05, + "loss": 0.8979, "step": 17536 }, { - "epoch": 0.49764472190692394, + "epoch": 0.4969537249567854, "grad_norm": 0.0, - "learning_rate": 1.0562179232267304e-05, - "loss": 0.9036, + "learning_rate": 1.0584298778952082e-05, + "loss": 0.9922, "step": 17537 }, { - "epoch": 0.49767309875141885, + "epoch": 0.49698206239904785, "grad_norm": 0.0, - "learning_rate": 1.0561261605327495e-05, - "loss": 0.9176, + "learning_rate": 1.0583382555256618e-05, + "loss": 0.8415, "step": 17538 }, { - "epoch": 0.4977014755959137, + "epoch": 0.49701039984131035, "grad_norm": 0.0, - "learning_rate": 1.0560343973646686e-05, - "loss": 0.7946, + "learning_rate": 1.058246632664711e-05, + "loss": 0.8996, "step": 17539 }, { - "epoch": 0.4977298524404086, + "epoch": 0.4970387372835728, "grad_norm": 0.0, - "learning_rate": 1.055942633723262e-05, - "loss": 1.0284, + "learning_rate": 1.0581550093131266e-05, + "loss": 0.8514, "step": 17540 }, { - "epoch": 0.49775822928490354, + "epoch": 0.4970670747258352, "grad_norm": 0.0, - "learning_rate": 1.0558508696093058e-05, - "loss": 0.8321, + "learning_rate": 1.0580633854716814e-05, + "loss": 0.834, "step": 17541 }, { - "epoch": 0.4977866061293984, + "epoch": 0.4970954121680977, "grad_norm": 0.0, - "learning_rate": 1.055759105023575e-05, - "loss": 0.8813, + "learning_rate": 1.0579717611411464e-05, + "loss": 0.8847, "step": 17542 }, { - "epoch": 0.4978149829738933, + "epoch": 0.49712374961036015, "grad_norm": 0.0, - "learning_rate": 1.0556673399668442e-05, - "loss": 0.9279, + "learning_rate": 1.0578801363222941e-05, + "loss": 1.0428, "step": 17543 }, { - "epoch": 0.4978433598183882, + "epoch": 0.49715208705262265, "grad_norm": 0.0, - "learning_rate": 1.055575574439889e-05, - "loss": 0.9679, + "learning_rate": 1.0577885110158959e-05, + "loss": 0.8853, "step": 17544 }, { - "epoch": 0.4978717366628831, + "epoch": 0.4971804244948851, "grad_norm": 0.0, - "learning_rate": 1.0554838084434846e-05, - "loss": 0.8612, + "learning_rate": 1.0576968852227236e-05, + "loss": 0.9236, "step": 17545 }, { - "epoch": 0.497900113507378, + "epoch": 0.4972087619371475, "grad_norm": 0.0, - "learning_rate": 1.0553920419784056e-05, - "loss": 0.9428, + "learning_rate": 1.0576052589435485e-05, + "loss": 0.9125, "step": 17546 }, { - "epoch": 0.49792849035187287, + "epoch": 0.49723709937941, "grad_norm": 0.0, - "learning_rate": 1.0553002750454277e-05, - "loss": 0.9833, + "learning_rate": 1.0575136321791433e-05, + "loss": 0.9624, "step": 17547 }, { - "epoch": 0.4979568671963678, + "epoch": 0.49726543682167246, "grad_norm": 0.0, - "learning_rate": 1.055208507645326e-05, - "loss": 0.9489, + "learning_rate": 1.0574220049302795e-05, + "loss": 0.9387, "step": 17548 }, { - "epoch": 0.49798524404086264, + "epoch": 0.49729377426393495, "grad_norm": 0.0, - "learning_rate": 1.0551167397788757e-05, - "loss": 0.9118, + "learning_rate": 1.057330377197729e-05, + "loss": 0.9585, "step": 17549 }, { - "epoch": 0.49801362088535756, + "epoch": 0.4973221117061974, "grad_norm": 0.0, - "learning_rate": 1.0550249714468515e-05, - "loss": 0.9262, + "learning_rate": 1.0572387489822628e-05, + "loss": 0.9359, "step": 17550 }, { - "epoch": 0.4980419977298524, + "epoch": 0.4973504491484599, "grad_norm": 0.0, - "learning_rate": 1.0549332026500291e-05, - "loss": 0.8842, + "learning_rate": 1.057147120284654e-05, + "loss": 0.9298, "step": 17551 }, { - "epoch": 0.49807037457434733, + "epoch": 0.4973787865907223, "grad_norm": 0.0, - "learning_rate": 1.0548414333891835e-05, - "loss": 0.979, + "learning_rate": 1.0570554911056736e-05, + "loss": 1.0155, "step": 17552 }, { - "epoch": 0.49809875141884224, + "epoch": 0.49740712403298476, "grad_norm": 0.0, - "learning_rate": 1.0547496636650896e-05, - "loss": 0.8594, + "learning_rate": 1.0569638614460936e-05, + "loss": 0.8203, "step": 17553 }, { - "epoch": 0.4981271282633371, + "epoch": 0.49743546147524725, "grad_norm": 0.0, - "learning_rate": 1.054657893478523e-05, - "loss": 0.8864, + "learning_rate": 1.0568722313066856e-05, + "loss": 0.894, "step": 17554 }, { - "epoch": 0.498155505107832, + "epoch": 0.4974637989175097, "grad_norm": 0.0, - "learning_rate": 1.0545661228302586e-05, - "loss": 0.8715, + "learning_rate": 1.056780600688222e-05, + "loss": 0.9687, "step": 17555 }, { - "epoch": 0.4981838819523269, + "epoch": 0.4974921363597722, "grad_norm": 0.0, - "learning_rate": 1.0544743517210718e-05, - "loss": 0.8594, + "learning_rate": 1.0566889695914741e-05, + "loss": 0.8007, "step": 17556 }, { - "epoch": 0.4982122587968218, + "epoch": 0.4975204738020346, "grad_norm": 0.0, - "learning_rate": 1.0543825801517375e-05, - "loss": 0.9692, + "learning_rate": 1.0565973380172144e-05, + "loss": 0.9145, "step": 17557 }, { - "epoch": 0.4982406356413167, + "epoch": 0.49754881124429706, "grad_norm": 0.0, - "learning_rate": 1.0542908081230314e-05, - "loss": 0.896, + "learning_rate": 1.0565057059662137e-05, + "loss": 0.9307, "step": 17558 }, { - "epoch": 0.49826901248581157, + "epoch": 0.49757714868655956, "grad_norm": 0.0, - "learning_rate": 1.0541990356357285e-05, - "loss": 0.9982, + "learning_rate": 1.0564140734392445e-05, + "loss": 0.9316, "step": 17559 }, { - "epoch": 0.4982973893303065, + "epoch": 0.497605486128822, "grad_norm": 0.0, - "learning_rate": 1.0541072626906035e-05, - "loss": 0.9108, + "learning_rate": 1.056322440437079e-05, + "loss": 0.8163, "step": 17560 }, { - "epoch": 0.49832576617480134, + "epoch": 0.4976338235710845, "grad_norm": 0.0, - "learning_rate": 1.0540154892884325e-05, - "loss": 0.9933, + "learning_rate": 1.0562308069604886e-05, + "loss": 0.9517, "step": 17561 }, { - "epoch": 0.49835414301929626, + "epoch": 0.4976621610133469, "grad_norm": 0.0, - "learning_rate": 1.05392371542999e-05, - "loss": 0.788, + "learning_rate": 1.056139173010245e-05, + "loss": 0.8337, "step": 17562 }, { - "epoch": 0.4983825198637912, + "epoch": 0.4976904984556094, "grad_norm": 0.0, - "learning_rate": 1.0538319411160512e-05, - "loss": 0.8273, + "learning_rate": 1.0560475385871202e-05, + "loss": 0.8225, "step": 17563 }, { - "epoch": 0.49841089670828603, + "epoch": 0.49771883589787186, "grad_norm": 0.0, - "learning_rate": 1.0537401663473916e-05, - "loss": 0.8457, + "learning_rate": 1.0559559036918867e-05, + "loss": 0.9908, "step": 17564 }, { - "epoch": 0.49843927355278095, + "epoch": 0.4977471733401343, "grad_norm": 0.0, - "learning_rate": 1.0536483911247869e-05, - "loss": 0.8435, + "learning_rate": 1.0558642683253153e-05, + "loss": 0.898, "step": 17565 }, { - "epoch": 0.4984676503972758, + "epoch": 0.4977755107823968, "grad_norm": 0.0, - "learning_rate": 1.0535566154490116e-05, - "loss": 0.9382, + "learning_rate": 1.0557726324881787e-05, + "loss": 0.8099, "step": 17566 }, { - "epoch": 0.4984960272417707, + "epoch": 0.49780384822465923, "grad_norm": 0.0, - "learning_rate": 1.0534648393208409e-05, - "loss": 0.848, + "learning_rate": 1.0556809961812484e-05, + "loss": 0.9101, "step": 17567 }, { - "epoch": 0.4985244040862656, + "epoch": 0.4978321856669217, "grad_norm": 0.0, - "learning_rate": 1.0533730627410505e-05, - "loss": 0.9169, + "learning_rate": 1.0555893594052965e-05, + "loss": 0.8746, "step": 17568 }, { - "epoch": 0.4985527809307605, + "epoch": 0.49786052310918416, "grad_norm": 0.0, - "learning_rate": 1.0532812857104155e-05, - "loss": 0.9089, + "learning_rate": 1.055497722161095e-05, + "loss": 0.8614, "step": 17569 }, { - "epoch": 0.4985811577752554, + "epoch": 0.4978888605514466, "grad_norm": 0.0, - "learning_rate": 1.0531895082297107e-05, - "loss": 0.7453, + "learning_rate": 1.0554060844494152e-05, + "loss": 0.9292, "step": 17570 }, { - "epoch": 0.49860953461975027, + "epoch": 0.4979171979937091, "grad_norm": 0.0, - "learning_rate": 1.0530977302997121e-05, - "loss": 0.9679, + "learning_rate": 1.0553144462710293e-05, + "loss": 0.8956, "step": 17571 }, { - "epoch": 0.4986379114642452, + "epoch": 0.49794553543597153, "grad_norm": 0.0, - "learning_rate": 1.0530059519211946e-05, - "loss": 0.9449, + "learning_rate": 1.0552228076267094e-05, + "loss": 0.7231, "step": 17572 }, { - "epoch": 0.49866628830874005, + "epoch": 0.497973872878234, "grad_norm": 0.0, - "learning_rate": 1.0529141730949334e-05, - "loss": 0.9456, + "learning_rate": 1.0551311685172275e-05, + "loss": 0.8213, "step": 17573 }, { - "epoch": 0.49869466515323496, + "epoch": 0.49800221032049646, "grad_norm": 0.0, - "learning_rate": 1.0528223938217037e-05, - "loss": 1.0226, + "learning_rate": 1.0550395289433553e-05, + "loss": 0.7874, "step": 17574 }, { - "epoch": 0.4987230419977299, + "epoch": 0.49803054776275896, "grad_norm": 0.0, - "learning_rate": 1.0527306141022808e-05, - "loss": 0.8057, + "learning_rate": 1.0549478889058644e-05, + "loss": 0.8441, "step": 17575 }, { - "epoch": 0.49875141884222474, + "epoch": 0.4980588852050214, "grad_norm": 0.0, - "learning_rate": 1.0526388339374403e-05, - "loss": 0.8502, + "learning_rate": 1.0548562484055274e-05, + "loss": 0.9942, "step": 17576 }, { - "epoch": 0.49877979568671965, + "epoch": 0.49808722264728383, "grad_norm": 0.0, - "learning_rate": 1.052547053327957e-05, - "loss": 0.9344, + "learning_rate": 1.0547646074431155e-05, + "loss": 0.8937, "step": 17577 }, { - "epoch": 0.4988081725312145, + "epoch": 0.4981155600895463, "grad_norm": 0.0, - "learning_rate": 1.0524552722746063e-05, - "loss": 0.7621, + "learning_rate": 1.0546729660194011e-05, + "loss": 0.8848, "step": 17578 }, { - "epoch": 0.4988365493757094, + "epoch": 0.49814389753180877, "grad_norm": 0.0, - "learning_rate": 1.0523634907781637e-05, - "loss": 0.8437, + "learning_rate": 1.054581324135156e-05, + "loss": 0.8526, "step": 17579 }, { - "epoch": 0.4988649262202043, + "epoch": 0.49817223497407126, "grad_norm": 0.0, - "learning_rate": 1.0522717088394046e-05, - "loss": 0.9944, + "learning_rate": 1.0544896817911521e-05, + "loss": 0.8616, "step": 17580 }, { - "epoch": 0.4988933030646992, + "epoch": 0.4982005724163337, "grad_norm": 0.0, - "learning_rate": 1.0521799264591035e-05, - "loss": 0.8463, + "learning_rate": 1.0543980389881613e-05, + "loss": 0.9243, "step": 17581 }, { - "epoch": 0.4989216799091941, + "epoch": 0.49822890985859614, "grad_norm": 0.0, - "learning_rate": 1.0520881436380366e-05, - "loss": 0.9875, + "learning_rate": 1.0543063957269558e-05, + "loss": 0.8728, "step": 17582 }, { - "epoch": 0.498950056753689, + "epoch": 0.49825724730085863, "grad_norm": 0.0, - "learning_rate": 1.0519963603769787e-05, - "loss": 0.9923, + "learning_rate": 1.0542147520083077e-05, + "loss": 0.8525, "step": 17583 }, { - "epoch": 0.4989784335981839, + "epoch": 0.49828558474312107, "grad_norm": 0.0, - "learning_rate": 1.0519045766767052e-05, - "loss": 0.9023, + "learning_rate": 1.0541231078329881e-05, + "loss": 0.9297, "step": 17584 }, { - "epoch": 0.49900681044267875, + "epoch": 0.49831392218538356, "grad_norm": 0.0, - "learning_rate": 1.0518127925379914e-05, - "loss": 0.8259, + "learning_rate": 1.0540314632017694e-05, + "loss": 0.8715, "step": 17585 }, { - "epoch": 0.49903518728717366, + "epoch": 0.498342259627646, "grad_norm": 0.0, - "learning_rate": 1.0517210079616126e-05, - "loss": 0.8416, + "learning_rate": 1.0539398181154239e-05, + "loss": 0.8381, "step": 17586 }, { - "epoch": 0.4990635641316686, + "epoch": 0.4983705970699085, "grad_norm": 0.0, - "learning_rate": 1.0516292229483437e-05, - "loss": 1.0143, + "learning_rate": 1.0538481725747232e-05, + "loss": 0.944, "step": 17587 }, { - "epoch": 0.49909194097616344, + "epoch": 0.49839893451217093, "grad_norm": 0.0, - "learning_rate": 1.051537437498961e-05, - "loss": 0.8994, + "learning_rate": 1.0537565265804392e-05, + "loss": 0.9611, "step": 17588 }, { - "epoch": 0.49912031782065835, + "epoch": 0.49842727195443337, "grad_norm": 0.0, - "learning_rate": 1.0514456516142393e-05, - "loss": 0.9307, + "learning_rate": 1.0536648801333443e-05, + "loss": 0.8567, "step": 17589 }, { - "epoch": 0.4991486946651532, + "epoch": 0.49845560939669586, "grad_norm": 0.0, - "learning_rate": 1.0513538652949538e-05, - "loss": 0.91, + "learning_rate": 1.0535732332342102e-05, + "loss": 0.9265, "step": 17590 }, { - "epoch": 0.49917707150964813, + "epoch": 0.4984839468389583, "grad_norm": 0.0, - "learning_rate": 1.0512620785418796e-05, - "loss": 0.89, + "learning_rate": 1.0534815858838085e-05, + "loss": 0.94, "step": 17591 }, { - "epoch": 0.49920544835414304, + "epoch": 0.4985122842812208, "grad_norm": 0.0, - "learning_rate": 1.0511702913557926e-05, - "loss": 0.8425, + "learning_rate": 1.0533899380829116e-05, + "loss": 0.8855, "step": 17592 }, { - "epoch": 0.4992338251986379, + "epoch": 0.49854062172348323, "grad_norm": 0.0, - "learning_rate": 1.0510785037374675e-05, - "loss": 0.9183, + "learning_rate": 1.0532982898322916e-05, + "loss": 0.76, "step": 17593 }, { - "epoch": 0.4992622020431328, + "epoch": 0.4985689591657457, "grad_norm": 0.0, - "learning_rate": 1.0509867156876803e-05, - "loss": 0.8791, + "learning_rate": 1.0532066411327204e-05, + "loss": 0.8924, "step": 17594 }, { - "epoch": 0.4992905788876277, + "epoch": 0.49859729660800817, "grad_norm": 0.0, - "learning_rate": 1.0508949272072059e-05, - "loss": 0.9096, + "learning_rate": 1.0531149919849699e-05, + "loss": 0.8408, "step": 17595 }, { - "epoch": 0.4993189557321226, + "epoch": 0.4986256340502706, "grad_norm": 0.0, - "learning_rate": 1.05080313829682e-05, - "loss": 0.8855, + "learning_rate": 1.0530233423898118e-05, + "loss": 0.8632, "step": 17596 }, { - "epoch": 0.49934733257661745, + "epoch": 0.4986539714925331, "grad_norm": 0.0, - "learning_rate": 1.0507113489572974e-05, - "loss": 0.9285, + "learning_rate": 1.0529316923480186e-05, + "loss": 0.8543, "step": 17597 }, { - "epoch": 0.49937570942111237, + "epoch": 0.49868230893479554, "grad_norm": 0.0, - "learning_rate": 1.0506195591894139e-05, - "loss": 0.9733, + "learning_rate": 1.0528400418603622e-05, + "loss": 0.8075, "step": 17598 }, { - "epoch": 0.4994040862656073, + "epoch": 0.49871064637705803, "grad_norm": 0.0, - "learning_rate": 1.0505277689939448e-05, - "loss": 0.9233, + "learning_rate": 1.0527483909276144e-05, + "loss": 0.9152, "step": 17599 }, { - "epoch": 0.49943246311010214, + "epoch": 0.49873898381932047, "grad_norm": 0.0, - "learning_rate": 1.0504359783716652e-05, - "loss": 0.8736, + "learning_rate": 1.0526567395505472e-05, + "loss": 1.0014, "step": 17600 }, { - "epoch": 0.49946083995459706, + "epoch": 0.4987673212615829, "grad_norm": 0.0, - "learning_rate": 1.0503441873233505e-05, - "loss": 0.9289, + "learning_rate": 1.0525650877299326e-05, + "loss": 0.8192, "step": 17601 }, { - "epoch": 0.4994892167990919, + "epoch": 0.4987956587038454, "grad_norm": 0.0, - "learning_rate": 1.0502523958497763e-05, - "loss": 0.9047, + "learning_rate": 1.0524734354665433e-05, + "loss": 0.8209, "step": 17602 }, { - "epoch": 0.49951759364358683, + "epoch": 0.49882399614610784, "grad_norm": 0.0, - "learning_rate": 1.050160603951718e-05, - "loss": 0.866, + "learning_rate": 1.0523817827611504e-05, + "loss": 0.7732, "step": 17603 }, { - "epoch": 0.49954597048808175, + "epoch": 0.49885233358837033, "grad_norm": 0.0, - "learning_rate": 1.0500688116299507e-05, - "loss": 0.902, + "learning_rate": 1.0522901296145263e-05, + "loss": 0.9764, "step": 17604 }, { - "epoch": 0.4995743473325766, + "epoch": 0.49888067103063277, "grad_norm": 0.0, - "learning_rate": 1.0499770188852501e-05, - "loss": 0.7764, + "learning_rate": 1.0521984760274429e-05, + "loss": 0.9394, "step": 17605 }, { - "epoch": 0.4996027241770715, + "epoch": 0.4989090084728952, "grad_norm": 0.0, - "learning_rate": 1.0498852257183912e-05, - "loss": 0.7925, + "learning_rate": 1.0521068220006727e-05, + "loss": 0.9473, "step": 17606 }, { - "epoch": 0.4996311010215664, + "epoch": 0.4989373459151577, "grad_norm": 0.0, - "learning_rate": 1.0497934321301492e-05, - "loss": 0.9339, + "learning_rate": 1.0520151675349873e-05, + "loss": 1.0008, "step": 17607 }, { - "epoch": 0.4996594778660613, + "epoch": 0.49896568335742014, "grad_norm": 0.0, - "learning_rate": 1.0497016381213006e-05, - "loss": 0.9361, + "learning_rate": 1.0519235126311584e-05, + "loss": 0.8458, "step": 17608 }, { - "epoch": 0.4996878547105562, + "epoch": 0.49899402079968264, "grad_norm": 0.0, - "learning_rate": 1.0496098436926195e-05, - "loss": 0.8608, + "learning_rate": 1.051831857289959e-05, + "loss": 0.9403, "step": 17609 }, { - "epoch": 0.49971623155505107, + "epoch": 0.4990223582419451, "grad_norm": 0.0, - "learning_rate": 1.049518048844882e-05, - "loss": 0.8496, + "learning_rate": 1.0517402015121606e-05, + "loss": 0.9394, "step": 17610 }, { - "epoch": 0.499744608399546, + "epoch": 0.49905069568420757, "grad_norm": 0.0, - "learning_rate": 1.049426253578863e-05, - "loss": 0.8969, + "learning_rate": 1.0516485452985349e-05, + "loss": 0.8737, "step": 17611 }, { - "epoch": 0.49977298524404085, + "epoch": 0.49907903312647, "grad_norm": 0.0, - "learning_rate": 1.0493344578953386e-05, - "loss": 0.7622, + "learning_rate": 1.0515568886498546e-05, + "loss": 0.9682, "step": 17612 }, { - "epoch": 0.49980136208853576, + "epoch": 0.49910737056873244, "grad_norm": 0.0, - "learning_rate": 1.0492426617950838e-05, - "loss": 0.7828, + "learning_rate": 1.0514652315668911e-05, + "loss": 0.8547, "step": 17613 }, { - "epoch": 0.4998297389330306, + "epoch": 0.49913570801099494, "grad_norm": 0.0, - "learning_rate": 1.0491508652788737e-05, - "loss": 0.893, + "learning_rate": 1.0513735740504175e-05, + "loss": 0.8403, "step": 17614 }, { - "epoch": 0.49985811577752554, + "epoch": 0.4991640454532574, "grad_norm": 0.0, - "learning_rate": 1.0490590683474844e-05, - "loss": 0.8463, + "learning_rate": 1.0512819161012046e-05, + "loss": 0.9709, "step": 17615 }, { - "epoch": 0.49988649262202045, + "epoch": 0.49919238289551987, "grad_norm": 0.0, - "learning_rate": 1.0489672710016907e-05, - "loss": 0.9417, + "learning_rate": 1.0511902577200255e-05, + "loss": 0.889, "step": 17616 }, { - "epoch": 0.4999148694665153, + "epoch": 0.4992207203377823, "grad_norm": 0.0, - "learning_rate": 1.0488754732422684e-05, - "loss": 0.8276, + "learning_rate": 1.0510985989076517e-05, + "loss": 0.8824, "step": 17617 }, { - "epoch": 0.4999432463110102, + "epoch": 0.49924905778004475, "grad_norm": 0.0, - "learning_rate": 1.0487836750699924e-05, - "loss": 0.9465, + "learning_rate": 1.0510069396648553e-05, + "loss": 0.8894, "step": 17618 }, { - "epoch": 0.4999716231555051, + "epoch": 0.49927739522230724, "grad_norm": 0.0, - "learning_rate": 1.0486918764856391e-05, - "loss": 0.8995, + "learning_rate": 1.0509152799924085e-05, + "loss": 0.9288, "step": 17619 }, { - "epoch": 0.5, + "epoch": 0.4993057326645697, "grad_norm": 0.0, - "learning_rate": 1.0486000774899832e-05, - "loss": 1.0087, + "learning_rate": 1.0508236198910836e-05, + "loss": 0.9609, "step": 17620 }, { - "epoch": 0.5000283768444949, + "epoch": 0.4993340701068322, "grad_norm": 0.0, - "learning_rate": 1.0485082780837998e-05, - "loss": 0.9397, + "learning_rate": 1.0507319593616523e-05, + "loss": 0.8182, "step": 17621 }, { - "epoch": 0.5000567536889898, + "epoch": 0.4993624075490946, "grad_norm": 0.0, - "learning_rate": 1.0484164782678654e-05, - "loss": 0.9756, + "learning_rate": 1.0506402984048872e-05, + "loss": 0.9024, "step": 17622 }, { - "epoch": 0.5000851305334847, + "epoch": 0.4993907449913571, "grad_norm": 0.0, - "learning_rate": 1.0483246780429546e-05, - "loss": 0.9634, + "learning_rate": 1.0505486370215597e-05, + "loss": 0.8246, "step": 17623 }, { - "epoch": 0.5001135073779795, + "epoch": 0.49941908243361954, "grad_norm": 0.0, - "learning_rate": 1.048232877409843e-05, - "loss": 0.7934, + "learning_rate": 1.0504569752124423e-05, + "loss": 0.9013, "step": 17624 }, { - "epoch": 0.5001418842224744, + "epoch": 0.499447419875882, "grad_norm": 0.0, - "learning_rate": 1.0481410763693059e-05, - "loss": 1.0346, + "learning_rate": 1.050365312978307e-05, + "loss": 0.9284, "step": 17625 }, { - "epoch": 0.5001702610669694, + "epoch": 0.4994757573181445, "grad_norm": 0.0, - "learning_rate": 1.0480492749221191e-05, - "loss": 0.8528, + "learning_rate": 1.0502736503199262e-05, + "loss": 0.8247, "step": 17626 }, { - "epoch": 0.5001986379114642, + "epoch": 0.4995040947604069, "grad_norm": 0.0, - "learning_rate": 1.0479574730690583e-05, - "loss": 0.9987, + "learning_rate": 1.0501819872380717e-05, + "loss": 0.7249, "step": 17627 }, { - "epoch": 0.5002270147559591, + "epoch": 0.4995324322026694, "grad_norm": 0.0, - "learning_rate": 1.0478656708108981e-05, - "loss": 0.7766, + "learning_rate": 1.0500903237335157e-05, + "loss": 0.7759, "step": 17628 }, { - "epoch": 0.5002553916004541, + "epoch": 0.49956076964493185, "grad_norm": 0.0, - "learning_rate": 1.0477738681484146e-05, - "loss": 0.8528, + "learning_rate": 1.0499986598070302e-05, + "loss": 0.9722, "step": 17629 }, { - "epoch": 0.5002837684449489, + "epoch": 0.4995891070871943, "grad_norm": 0.0, - "learning_rate": 1.0476820650823834e-05, - "loss": 0.9225, + "learning_rate": 1.0499069954593874e-05, + "loss": 0.7375, "step": 17630 }, { - "epoch": 0.5003121452894438, + "epoch": 0.4996174445294568, "grad_norm": 0.0, - "learning_rate": 1.047590261613579e-05, - "loss": 0.9597, + "learning_rate": 1.0498153306913595e-05, + "loss": 0.8823, "step": 17631 }, { - "epoch": 0.5003405221339388, + "epoch": 0.4996457819717192, "grad_norm": 0.0, - "learning_rate": 1.0474984577427778e-05, - "loss": 0.9038, + "learning_rate": 1.0497236655037187e-05, + "loss": 0.931, "step": 17632 }, { - "epoch": 0.5003688989784336, + "epoch": 0.4996741194139817, "grad_norm": 0.0, - "learning_rate": 1.0474066534707551e-05, - "loss": 0.9277, + "learning_rate": 1.0496319998972366e-05, + "loss": 0.9139, "step": 17633 }, { - "epoch": 0.5003972758229285, + "epoch": 0.49970245685624415, "grad_norm": 0.0, - "learning_rate": 1.0473148487982865e-05, - "loss": 0.8277, + "learning_rate": 1.0495403338726862e-05, + "loss": 0.8797, "step": 17634 }, { - "epoch": 0.5004256526674233, + "epoch": 0.49973079429850664, "grad_norm": 0.0, - "learning_rate": 1.0472230437261469e-05, - "loss": 0.9583, + "learning_rate": 1.049448667430839e-05, + "loss": 0.9456, "step": 17635 }, { - "epoch": 0.5004540295119183, + "epoch": 0.4997591317407691, "grad_norm": 0.0, - "learning_rate": 1.0471312382551121e-05, - "loss": 0.9757, + "learning_rate": 1.0493570005724676e-05, + "loss": 0.8721, "step": 17636 }, { - "epoch": 0.5004824063564132, + "epoch": 0.4997874691830315, "grad_norm": 0.0, - "learning_rate": 1.0470394323859579e-05, - "loss": 0.9054, + "learning_rate": 1.0492653332983434e-05, + "loss": 0.8693, "step": 17637 }, { - "epoch": 0.500510783200908, + "epoch": 0.499815806625294, "grad_norm": 0.0, - "learning_rate": 1.0469476261194591e-05, - "loss": 0.8875, + "learning_rate": 1.049173665609239e-05, + "loss": 1.0205, "step": 17638 }, { - "epoch": 0.500539160045403, + "epoch": 0.49984414406755645, "grad_norm": 0.0, - "learning_rate": 1.0468558194563919e-05, - "loss": 0.9479, + "learning_rate": 1.0490819975059268e-05, + "loss": 0.9463, "step": 17639 }, { - "epoch": 0.5005675368898979, + "epoch": 0.49987248150981894, "grad_norm": 0.0, - "learning_rate": 1.0467640123975314e-05, - "loss": 0.8224, + "learning_rate": 1.0489903289891787e-05, + "loss": 0.8509, "step": 17640 }, { - "epoch": 0.5005959137343927, + "epoch": 0.4999008189520814, "grad_norm": 0.0, - "learning_rate": 1.046672204943653e-05, - "loss": 0.886, + "learning_rate": 1.0488986600597669e-05, + "loss": 0.9659, "step": 17641 }, { - "epoch": 0.5006242905788876, + "epoch": 0.4999291563943438, "grad_norm": 0.0, - "learning_rate": 1.0465803970955327e-05, - "loss": 0.8638, + "learning_rate": 1.0488069907184632e-05, + "loss": 0.8364, "step": 17642 }, { - "epoch": 0.5006526674233825, + "epoch": 0.4999574938366063, "grad_norm": 0.0, - "learning_rate": 1.0464885888539455e-05, - "loss": 0.8441, + "learning_rate": 1.0487153209660405e-05, + "loss": 0.7855, "step": 17643 }, { - "epoch": 0.5006810442678774, + "epoch": 0.49998583127886875, "grad_norm": 0.0, - "learning_rate": 1.0463967802196673e-05, - "loss": 0.8402, + "learning_rate": 1.0486236508032703e-05, + "loss": 0.8903, "step": 17644 }, { - "epoch": 0.5007094211123723, + "epoch": 0.5000141687211312, "grad_norm": 0.0, - "learning_rate": 1.0463049711934731e-05, - "loss": 0.9103, + "learning_rate": 1.048531980230925e-05, + "loss": 0.9685, "step": 17645 }, { - "epoch": 0.5007377979568672, + "epoch": 0.5000425061633937, "grad_norm": 0.0, - "learning_rate": 1.046213161776139e-05, - "loss": 0.8945, + "learning_rate": 1.048440309249777e-05, + "loss": 0.8864, "step": 17646 }, { - "epoch": 0.5007661748013621, + "epoch": 0.5000708436056561, "grad_norm": 0.0, - "learning_rate": 1.0461213519684401e-05, - "loss": 0.9028, + "learning_rate": 1.0483486378605983e-05, + "loss": 0.9616, "step": 17647 }, { - "epoch": 0.500794551645857, + "epoch": 0.5000991810479186, "grad_norm": 0.0, - "learning_rate": 1.046029541771152e-05, - "loss": 0.8369, + "learning_rate": 1.0482569660641611e-05, + "loss": 0.865, "step": 17648 }, { - "epoch": 0.5008229284903519, + "epoch": 0.5001275184901811, "grad_norm": 0.0, - "learning_rate": 1.0459377311850505e-05, - "loss": 0.9365, + "learning_rate": 1.0481652938612374e-05, + "loss": 0.8841, "step": 17649 }, { - "epoch": 0.5008513053348468, + "epoch": 0.5001558559324435, "grad_norm": 0.0, - "learning_rate": 1.0458459202109108e-05, - "loss": 0.9303, + "learning_rate": 1.0480736212525996e-05, + "loss": 0.8916, "step": 17650 }, { - "epoch": 0.5008796821793416, + "epoch": 0.500184193374706, "grad_norm": 0.0, - "learning_rate": 1.0457541088495085e-05, - "loss": 0.8399, + "learning_rate": 1.0479819482390194e-05, + "loss": 0.9031, "step": 17651 }, { - "epoch": 0.5009080590238365, + "epoch": 0.5002125308169685, "grad_norm": 0.0, - "learning_rate": 1.0456622971016193e-05, - "loss": 0.9075, + "learning_rate": 1.0478902748212701e-05, + "loss": 0.9257, "step": 17652 }, { - "epoch": 0.5009364358683315, + "epoch": 0.500240868259231, "grad_norm": 0.0, - "learning_rate": 1.0455704849680188e-05, - "loss": 0.9604, + "learning_rate": 1.0477986010001232e-05, + "loss": 0.9207, "step": 17653 }, { - "epoch": 0.5009648127128263, + "epoch": 0.5002692057014934, "grad_norm": 0.0, - "learning_rate": 1.0454786724494819e-05, - "loss": 0.8621, + "learning_rate": 1.0477069267763505e-05, + "loss": 0.8812, "step": 17654 }, { - "epoch": 0.5009931895573212, + "epoch": 0.5002975431437559, "grad_norm": 0.0, - "learning_rate": 1.0453868595467849e-05, - "loss": 0.9207, + "learning_rate": 1.0476152521507247e-05, + "loss": 0.9237, "step": 17655 }, { - "epoch": 0.5010215664018162, + "epoch": 0.5003258805860183, "grad_norm": 0.0, - "learning_rate": 1.045295046260703e-05, - "loss": 0.9669, + "learning_rate": 1.0475235771240185e-05, + "loss": 0.9858, "step": 17656 }, { - "epoch": 0.501049943246311, + "epoch": 0.5003542180282807, "grad_norm": 0.0, - "learning_rate": 1.0452032325920118e-05, - "loss": 0.978, + "learning_rate": 1.047431901697003e-05, + "loss": 0.9157, "step": 17657 }, { - "epoch": 0.5010783200908059, + "epoch": 0.5003825554705432, "grad_norm": 0.0, - "learning_rate": 1.0451114185414867e-05, - "loss": 0.8861, + "learning_rate": 1.0473402258704509e-05, + "loss": 0.8332, "step": 17658 }, { - "epoch": 0.5011066969353007, + "epoch": 0.5004108929128057, "grad_norm": 0.0, - "learning_rate": 1.045019604109904e-05, - "loss": 0.7601, + "learning_rate": 1.0472485496451347e-05, + "loss": 0.8394, "step": 17659 }, { - "epoch": 0.5011350737797957, + "epoch": 0.5004392303550681, "grad_norm": 0.0, - "learning_rate": 1.0449277892980382e-05, - "loss": 0.8655, + "learning_rate": 1.0471568730218267e-05, + "loss": 0.8863, "step": 17660 }, { - "epoch": 0.5011634506242906, + "epoch": 0.5004675677973306, "grad_norm": 0.0, - "learning_rate": 1.0448359741066653e-05, - "loss": 0.9055, + "learning_rate": 1.0470651960012987e-05, + "loss": 0.8541, "step": 17661 }, { - "epoch": 0.5011918274687854, + "epoch": 0.5004959052395931, "grad_norm": 0.0, - "learning_rate": 1.044744158536561e-05, - "loss": 0.9706, + "learning_rate": 1.0469735185843228e-05, + "loss": 0.7814, "step": 17662 }, { - "epoch": 0.5012202043132804, + "epoch": 0.5005242426818556, "grad_norm": 0.0, - "learning_rate": 1.0446523425885008e-05, - "loss": 0.9925, + "learning_rate": 1.0468818407716719e-05, + "loss": 0.9026, "step": 17663 }, { - "epoch": 0.5012485811577753, + "epoch": 0.500552580124118, "grad_norm": 0.0, - "learning_rate": 1.0445605262632603e-05, - "loss": 0.962, + "learning_rate": 1.0467901625641174e-05, + "loss": 1.0093, "step": 17664 }, { - "epoch": 0.5012769580022701, + "epoch": 0.5005809175663805, "grad_norm": 0.0, - "learning_rate": 1.044468709561615e-05, - "loss": 0.8642, + "learning_rate": 1.0466984839624324e-05, + "loss": 0.8927, "step": 17665 }, { - "epoch": 0.5013053348467651, + "epoch": 0.500609255008643, "grad_norm": 0.0, - "learning_rate": 1.0443768924843405e-05, - "loss": 0.8238, + "learning_rate": 1.0466068049673883e-05, + "loss": 0.8161, "step": 17666 }, { - "epoch": 0.50133371169126, + "epoch": 0.5006375924509053, "grad_norm": 0.0, - "learning_rate": 1.0442850750322126e-05, - "loss": 0.919, + "learning_rate": 1.0465151255797582e-05, + "loss": 0.9286, "step": 17667 }, { - "epoch": 0.5013620885357548, + "epoch": 0.5006659298931678, "grad_norm": 0.0, - "learning_rate": 1.0441932572060062e-05, - "loss": 0.9322, + "learning_rate": 1.0464234458003139e-05, + "loss": 0.9893, "step": 17668 }, { - "epoch": 0.5013904653802497, + "epoch": 0.5006942673354303, "grad_norm": 0.0, - "learning_rate": 1.0441014390064978e-05, - "loss": 0.8849, + "learning_rate": 1.0463317656298273e-05, + "loss": 0.7915, "step": 17669 }, { - "epoch": 0.5014188422247446, + "epoch": 0.5007226047776928, "grad_norm": 0.0, - "learning_rate": 1.0440096204344623e-05, - "loss": 1.0162, + "learning_rate": 1.0462400850690715e-05, + "loss": 0.9532, "step": 17670 }, { - "epoch": 0.5014472190692395, + "epoch": 0.5007509422199552, "grad_norm": 0.0, - "learning_rate": 1.043917801490676e-05, - "loss": 0.9631, + "learning_rate": 1.0461484041188179e-05, + "loss": 0.8687, "step": 17671 }, { - "epoch": 0.5014755959137344, + "epoch": 0.5007792796622177, "grad_norm": 0.0, - "learning_rate": 1.0438259821759133e-05, - "loss": 0.9195, + "learning_rate": 1.0460567227798392e-05, + "loss": 0.8647, "step": 17672 }, { - "epoch": 0.5015039727582293, + "epoch": 0.5008076171044802, "grad_norm": 0.0, - "learning_rate": 1.0437341624909512e-05, - "loss": 0.9159, + "learning_rate": 1.045965041052908e-05, + "loss": 0.8394, "step": 17673 }, { - "epoch": 0.5015323496027242, + "epoch": 0.5008359545467426, "grad_norm": 0.0, - "learning_rate": 1.0436423424365646e-05, - "loss": 0.8037, + "learning_rate": 1.045873358938796e-05, + "loss": 0.8847, "step": 17674 }, { - "epoch": 0.501560726447219, + "epoch": 0.5008642919890051, "grad_norm": 0.0, - "learning_rate": 1.0435505220135288e-05, - "loss": 0.9246, + "learning_rate": 1.0457816764382756e-05, + "loss": 0.9183, "step": 17675 }, { - "epoch": 0.5015891032917139, + "epoch": 0.5008926294312676, "grad_norm": 0.0, - "learning_rate": 1.0434587012226203e-05, - "loss": 0.9724, + "learning_rate": 1.0456899935521187e-05, + "loss": 0.8942, "step": 17676 }, { - "epoch": 0.5016174801362089, + "epoch": 0.50092096687353, "grad_norm": 0.0, - "learning_rate": 1.0433668800646139e-05, - "loss": 0.8811, + "learning_rate": 1.0455983102810987e-05, + "loss": 0.9431, "step": 17677 }, { - "epoch": 0.5016458569807037, + "epoch": 0.5009493043157924, "grad_norm": 0.0, - "learning_rate": 1.0432750585402853e-05, - "loss": 0.9415, + "learning_rate": 1.045506626625987e-05, + "loss": 0.8863, "step": 17678 }, { - "epoch": 0.5016742338251986, + "epoch": 0.5009776417580549, "grad_norm": 0.0, - "learning_rate": 1.0431832366504104e-05, - "loss": 0.9113, + "learning_rate": 1.045414942587556e-05, + "loss": 0.8729, "step": 17679 }, { - "epoch": 0.5017026106696936, + "epoch": 0.5010059792003174, "grad_norm": 0.0, - "learning_rate": 1.0430914143957651e-05, - "loss": 0.9243, + "learning_rate": 1.0453232581665783e-05, + "loss": 0.7876, "step": 17680 }, { - "epoch": 0.5017309875141884, + "epoch": 0.5010343166425798, "grad_norm": 0.0, - "learning_rate": 1.0429995917771247e-05, - "loss": 0.8589, + "learning_rate": 1.0452315733638257e-05, + "loss": 0.8498, "step": 17681 }, { - "epoch": 0.5017593643586833, + "epoch": 0.5010626540848423, "grad_norm": 0.0, - "learning_rate": 1.0429077687952645e-05, - "loss": 0.9325, + "learning_rate": 1.0451398881800708e-05, + "loss": 0.9907, "step": 17682 }, { - "epoch": 0.5017877412031783, + "epoch": 0.5010909915271048, "grad_norm": 0.0, - "learning_rate": 1.0428159454509605e-05, - "loss": 0.9713, + "learning_rate": 1.0450482026160855e-05, + "loss": 0.9177, "step": 17683 }, { - "epoch": 0.5018161180476731, + "epoch": 0.5011193289693672, "grad_norm": 0.0, - "learning_rate": 1.0427241217449886e-05, - "loss": 0.8698, + "learning_rate": 1.044956516672643e-05, + "loss": 0.9574, "step": 17684 }, { - "epoch": 0.501844494892168, + "epoch": 0.5011476664116297, "grad_norm": 0.0, - "learning_rate": 1.0426322976781238e-05, - "loss": 0.8771, + "learning_rate": 1.044864830350515e-05, + "loss": 0.8944, "step": 17685 }, { - "epoch": 0.5018728717366628, + "epoch": 0.5011760038538922, "grad_norm": 0.0, - "learning_rate": 1.0425404732511419e-05, - "loss": 0.8343, + "learning_rate": 1.044773143650474e-05, + "loss": 0.9388, "step": 17686 }, { - "epoch": 0.5019012485811578, + "epoch": 0.5012043412961547, "grad_norm": 0.0, - "learning_rate": 1.0424486484648192e-05, - "loss": 0.9074, + "learning_rate": 1.0446814565732919e-05, + "loss": 0.8774, "step": 17687 }, { - "epoch": 0.5019296254256527, + "epoch": 0.501232678738417, "grad_norm": 0.0, - "learning_rate": 1.0423568233199306e-05, - "loss": 1.011, + "learning_rate": 1.0445897691197412e-05, + "loss": 0.9685, "step": 17688 }, { - "epoch": 0.5019580022701475, + "epoch": 0.5012610161806795, "grad_norm": 0.0, - "learning_rate": 1.042264997817252e-05, - "loss": 0.8968, + "learning_rate": 1.0444980812905945e-05, + "loss": 0.7891, "step": 17689 }, { - "epoch": 0.5019863791146425, + "epoch": 0.501289353622942, "grad_norm": 0.0, - "learning_rate": 1.042173171957559e-05, - "loss": 0.825, + "learning_rate": 1.044406393086624e-05, + "loss": 0.9582, "step": 17690 }, { - "epoch": 0.5020147559591374, + "epoch": 0.5013176910652044, "grad_norm": 0.0, - "learning_rate": 1.0420813457416275e-05, - "loss": 0.8893, + "learning_rate": 1.0443147045086017e-05, + "loss": 0.8952, "step": 17691 }, { - "epoch": 0.5020431328036322, + "epoch": 0.5013460285074669, "grad_norm": 0.0, - "learning_rate": 1.0419895191702328e-05, - "loss": 0.8931, + "learning_rate": 1.0442230155573005e-05, + "loss": 0.9482, "step": 17692 }, { - "epoch": 0.5020715096481271, + "epoch": 0.5013743659497294, "grad_norm": 0.0, - "learning_rate": 1.0418976922441506e-05, - "loss": 0.8634, + "learning_rate": 1.0441313262334925e-05, + "loss": 0.9994, "step": 17693 }, { - "epoch": 0.502099886492622, + "epoch": 0.5014027033919919, "grad_norm": 0.0, - "learning_rate": 1.0418058649641571e-05, - "loss": 0.9543, + "learning_rate": 1.0440396365379496e-05, + "loss": 0.9001, "step": 17694 }, { - "epoch": 0.5021282633371169, + "epoch": 0.5014310408342543, "grad_norm": 0.0, - "learning_rate": 1.0417140373310273e-05, - "loss": 0.9535, + "learning_rate": 1.0439479464714447e-05, + "loss": 0.8545, "step": 17695 }, { - "epoch": 0.5021566401816118, + "epoch": 0.5014593782765168, "grad_norm": 0.0, - "learning_rate": 1.0416222093455373e-05, - "loss": 0.9106, + "learning_rate": 1.0438562560347499e-05, + "loss": 0.8483, "step": 17696 }, { - "epoch": 0.5021850170261067, + "epoch": 0.5014877157187793, "grad_norm": 0.0, - "learning_rate": 1.0415303810084626e-05, - "loss": 0.8617, + "learning_rate": 1.0437645652286374e-05, + "loss": 0.8907, "step": 17697 }, { - "epoch": 0.5022133938706016, + "epoch": 0.5015160531610416, "grad_norm": 0.0, - "learning_rate": 1.041438552320579e-05, - "loss": 0.9159, + "learning_rate": 1.04367287405388e-05, + "loss": 1.0026, "step": 17698 }, { - "epoch": 0.5022417707150965, + "epoch": 0.5015443906033041, "grad_norm": 0.0, - "learning_rate": 1.041346723282662e-05, - "loss": 0.9764, + "learning_rate": 1.0435811825112496e-05, + "loss": 0.917, "step": 17699 }, { - "epoch": 0.5022701475595914, + "epoch": 0.5015727280455666, "grad_norm": 0.0, - "learning_rate": 1.0412548938954874e-05, - "loss": 0.8589, + "learning_rate": 1.0434894906015188e-05, + "loss": 0.8313, "step": 17700 }, { - "epoch": 0.5022985244040863, + "epoch": 0.5016010654878291, "grad_norm": 0.0, - "learning_rate": 1.0411630641598307e-05, - "loss": 0.9672, + "learning_rate": 1.0433977983254598e-05, + "loss": 0.9057, "step": 17701 }, { - "epoch": 0.5023269012485811, + "epoch": 0.5016294029300915, "grad_norm": 0.0, - "learning_rate": 1.0410712340764676e-05, - "loss": 1.0093, + "learning_rate": 1.0433061056838449e-05, + "loss": 0.8657, "step": 17702 }, { - "epoch": 0.502355278093076, + "epoch": 0.501657740372354, "grad_norm": 0.0, - "learning_rate": 1.0409794036461745e-05, - "loss": 0.8278, + "learning_rate": 1.0432144126774469e-05, + "loss": 0.9168, "step": 17703 }, { - "epoch": 0.502383654937571, + "epoch": 0.5016860778146165, "grad_norm": 0.0, - "learning_rate": 1.0408875728697264e-05, - "loss": 0.7892, + "learning_rate": 1.0431227193070374e-05, + "loss": 0.9673, "step": 17704 }, { - "epoch": 0.5024120317820658, + "epoch": 0.5017144152568789, "grad_norm": 0.0, - "learning_rate": 1.0407957417478987e-05, - "loss": 0.841, + "learning_rate": 1.0430310255733895e-05, + "loss": 0.9047, "step": 17705 }, { - "epoch": 0.5024404086265607, + "epoch": 0.5017427526991414, "grad_norm": 0.0, - "learning_rate": 1.0407039102814677e-05, - "loss": 0.9373, + "learning_rate": 1.0429393314772756e-05, + "loss": 0.9508, "step": 17706 }, { - "epoch": 0.5024687854710557, + "epoch": 0.5017710901414039, "grad_norm": 0.0, - "learning_rate": 1.0406120784712093e-05, - "loss": 0.8046, + "learning_rate": 1.0428476370194675e-05, + "loss": 0.9252, "step": 17707 }, { - "epoch": 0.5024971623155505, + "epoch": 0.5017994275836662, "grad_norm": 0.0, - "learning_rate": 1.0405202463178985e-05, - "loss": 0.9108, + "learning_rate": 1.0427559422007375e-05, + "loss": 0.8915, "step": 17708 }, { - "epoch": 0.5025255391600454, + "epoch": 0.5018277650259287, "grad_norm": 0.0, - "learning_rate": 1.0404284138223113e-05, - "loss": 0.8741, + "learning_rate": 1.0426642470218587e-05, + "loss": 0.9696, "step": 17709 }, { - "epoch": 0.5025539160045402, + "epoch": 0.5018561024681912, "grad_norm": 0.0, - "learning_rate": 1.0403365809852236e-05, - "loss": 0.8483, + "learning_rate": 1.042572551483603e-05, + "loss": 0.9515, "step": 17710 }, { - "epoch": 0.5025822928490352, + "epoch": 0.5018844399104537, "grad_norm": 0.0, - "learning_rate": 1.0402447478074112e-05, - "loss": 0.8561, + "learning_rate": 1.0424808555867429e-05, + "loss": 0.9288, "step": 17711 }, { - "epoch": 0.5026106696935301, + "epoch": 0.5019127773527161, "grad_norm": 0.0, - "learning_rate": 1.040152914289649e-05, - "loss": 0.9197, + "learning_rate": 1.0423891593320507e-05, + "loss": 0.901, "step": 17712 }, { - "epoch": 0.5026390465380249, + "epoch": 0.5019411147949786, "grad_norm": 0.0, - "learning_rate": 1.0400610804327141e-05, - "loss": 0.9368, + "learning_rate": 1.042297462720299e-05, + "loss": 0.92, "step": 17713 }, { - "epoch": 0.5026674233825199, + "epoch": 0.5019694522372411, "grad_norm": 0.0, - "learning_rate": 1.0399692462373811e-05, - "loss": 0.8923, + "learning_rate": 1.0422057657522602e-05, + "loss": 0.8894, "step": 17714 }, { - "epoch": 0.5026958002270148, + "epoch": 0.5019977896795035, "grad_norm": 0.0, - "learning_rate": 1.039877411704426e-05, - "loss": 0.9793, + "learning_rate": 1.0421140684287063e-05, + "loss": 0.764, "step": 17715 }, { - "epoch": 0.5027241770715096, + "epoch": 0.502026127121766, "grad_norm": 0.0, - "learning_rate": 1.0397855768346246e-05, - "loss": 1.0094, + "learning_rate": 1.04202237075041e-05, + "loss": 0.9155, "step": 17716 }, { - "epoch": 0.5027525539160045, + "epoch": 0.5020544645640285, "grad_norm": 0.0, - "learning_rate": 1.0396937416287527e-05, - "loss": 0.8724, + "learning_rate": 1.0419306727181438e-05, + "loss": 0.791, "step": 17717 }, { - "epoch": 0.5027809307604995, + "epoch": 0.502082802006291, "grad_norm": 0.0, - "learning_rate": 1.0396019060875862e-05, - "loss": 0.8518, + "learning_rate": 1.04183897433268e-05, + "loss": 0.8645, "step": 17718 }, { - "epoch": 0.5028093076049943, + "epoch": 0.5021111394485533, "grad_norm": 0.0, - "learning_rate": 1.0395100702119003e-05, - "loss": 0.9765, + "learning_rate": 1.0417472755947908e-05, + "loss": 0.8194, "step": 17719 }, { - "epoch": 0.5028376844494892, + "epoch": 0.5021394768908158, "grad_norm": 0.0, - "learning_rate": 1.0394182340024712e-05, - "loss": 0.7856, + "learning_rate": 1.0416555765052487e-05, + "loss": 0.8582, "step": 17720 }, { - "epoch": 0.5028660612939841, + "epoch": 0.5021678143330783, "grad_norm": 0.0, - "learning_rate": 1.0393263974600747e-05, - "loss": 0.8517, + "learning_rate": 1.0415638770648266e-05, + "loss": 0.9344, "step": 17721 }, { - "epoch": 0.502894438138479, + "epoch": 0.5021961517753407, "grad_norm": 0.0, - "learning_rate": 1.039234560585486e-05, - "loss": 0.8465, + "learning_rate": 1.0414721772742962e-05, + "loss": 0.888, "step": 17722 }, { - "epoch": 0.5029228149829739, + "epoch": 0.5022244892176032, "grad_norm": 0.0, - "learning_rate": 1.0391427233794813e-05, - "loss": 0.9319, + "learning_rate": 1.0413804771344305e-05, + "loss": 0.8125, "step": 17723 }, { - "epoch": 0.5029511918274688, + "epoch": 0.5022528266598657, "grad_norm": 0.0, - "learning_rate": 1.0390508858428363e-05, - "loss": 0.7975, + "learning_rate": 1.0412887766460017e-05, + "loss": 0.9528, "step": 17724 }, { - "epoch": 0.5029795686719637, + "epoch": 0.5022811641021282, "grad_norm": 0.0, - "learning_rate": 1.0389590479763267e-05, - "loss": 0.8778, + "learning_rate": 1.0411970758097818e-05, + "loss": 0.8768, "step": 17725 }, { - "epoch": 0.5030079455164586, + "epoch": 0.5023095015443906, "grad_norm": 0.0, - "learning_rate": 1.0388672097807282e-05, - "loss": 0.7959, + "learning_rate": 1.041105374626544e-05, + "loss": 0.873, "step": 17726 }, { - "epoch": 0.5030363223609534, + "epoch": 0.5023378389866531, "grad_norm": 0.0, - "learning_rate": 1.038775371256817e-05, - "loss": 0.9727, + "learning_rate": 1.0410136730970603e-05, + "loss": 0.8928, "step": 17727 }, { - "epoch": 0.5030646992054484, + "epoch": 0.5023661764289156, "grad_norm": 0.0, - "learning_rate": 1.0386835324053682e-05, - "loss": 0.9275, + "learning_rate": 1.040921971222103e-05, + "loss": 0.7908, "step": 17728 }, { - "epoch": 0.5030930760499432, + "epoch": 0.502394513871178, "grad_norm": 0.0, - "learning_rate": 1.0385916932271577e-05, - "loss": 0.9419, + "learning_rate": 1.0408302690024447e-05, + "loss": 1.0005, "step": 17729 }, { - "epoch": 0.5031214528944381, + "epoch": 0.5024228513134404, "grad_norm": 0.0, - "learning_rate": 1.0384998537229618e-05, - "loss": 0.8599, + "learning_rate": 1.040738566438858e-05, + "loss": 0.9338, "step": 17730 }, { - "epoch": 0.5031498297389331, + "epoch": 0.5024511887557029, "grad_norm": 0.0, - "learning_rate": 1.0384080138935555e-05, - "loss": 0.8624, + "learning_rate": 1.0406468635321157e-05, + "loss": 0.828, "step": 17731 }, { - "epoch": 0.5031782065834279, + "epoch": 0.5024795261979653, "grad_norm": 0.0, - "learning_rate": 1.0383161737397154e-05, - "loss": 0.9467, + "learning_rate": 1.0405551602829893e-05, + "loss": 0.8519, "step": 17732 }, { - "epoch": 0.5032065834279228, + "epoch": 0.5025078636402278, "grad_norm": 0.0, - "learning_rate": 1.0382243332622164e-05, - "loss": 0.7986, + "learning_rate": 1.0404634566922516e-05, + "loss": 0.9304, "step": 17733 }, { - "epoch": 0.5032349602724177, + "epoch": 0.5025362010824903, "grad_norm": 0.0, - "learning_rate": 1.038132492461835e-05, - "loss": 0.8186, + "learning_rate": 1.0403717527606757e-05, + "loss": 0.9146, "step": 17734 }, { - "epoch": 0.5032633371169126, + "epoch": 0.5025645385247528, "grad_norm": 0.0, - "learning_rate": 1.0380406513393469e-05, - "loss": 0.8275, + "learning_rate": 1.040280048489033e-05, + "loss": 0.8405, "step": 17735 }, { - "epoch": 0.5032917139614075, + "epoch": 0.5025928759670152, "grad_norm": 0.0, - "learning_rate": 1.0379488098955275e-05, - "loss": 0.9676, + "learning_rate": 1.0401883438780966e-05, + "loss": 0.9566, "step": 17736 }, { - "epoch": 0.5033200908059023, + "epoch": 0.5026212134092777, "grad_norm": 0.0, - "learning_rate": 1.0378569681311528e-05, - "loss": 1.0098, + "learning_rate": 1.040096638928639e-05, + "loss": 0.936, "step": 17737 }, { - "epoch": 0.5033484676503973, + "epoch": 0.5026495508515402, "grad_norm": 0.0, - "learning_rate": 1.0377651260469987e-05, - "loss": 0.8723, + "learning_rate": 1.0400049336414323e-05, + "loss": 0.9631, "step": 17738 }, { - "epoch": 0.5033768444948922, + "epoch": 0.5026778882938026, "grad_norm": 0.0, - "learning_rate": 1.0376732836438406e-05, - "loss": 0.8192, + "learning_rate": 1.0399132280172494e-05, + "loss": 0.849, "step": 17739 }, { - "epoch": 0.503405221339387, + "epoch": 0.502706225736065, "grad_norm": 0.0, - "learning_rate": 1.0375814409224547e-05, - "loss": 0.9357, + "learning_rate": 1.0398215220568629e-05, + "loss": 0.9843, "step": 17740 }, { - "epoch": 0.503433598183882, + "epoch": 0.5027345631783275, "grad_norm": 0.0, - "learning_rate": 1.0374895978836169e-05, - "loss": 0.9536, + "learning_rate": 1.0397298157610442e-05, + "loss": 0.9782, "step": 17741 }, { - "epoch": 0.5034619750283769, + "epoch": 0.50276290062059, "grad_norm": 0.0, - "learning_rate": 1.0373977545281027e-05, - "loss": 0.7945, + "learning_rate": 1.0396381091305666e-05, + "loss": 0.8057, "step": 17742 }, { - "epoch": 0.5034903518728717, + "epoch": 0.5027912380628524, "grad_norm": 0.0, - "learning_rate": 1.0373059108566878e-05, - "loss": 0.906, + "learning_rate": 1.0395464021662031e-05, + "loss": 0.789, "step": 17743 }, { - "epoch": 0.5035187287173666, + "epoch": 0.5028195755051149, "grad_norm": 0.0, - "learning_rate": 1.0372140668701483e-05, - "loss": 0.8094, + "learning_rate": 1.0394546948687253e-05, + "loss": 0.9099, "step": 17744 }, { - "epoch": 0.5035471055618616, + "epoch": 0.5028479129473774, "grad_norm": 0.0, - "learning_rate": 1.0371222225692601e-05, - "loss": 0.8036, + "learning_rate": 1.0393629872389057e-05, + "loss": 0.8968, "step": 17745 }, { - "epoch": 0.5035754824063564, + "epoch": 0.5028762503896398, "grad_norm": 0.0, - "learning_rate": 1.0370303779547985e-05, - "loss": 0.9676, + "learning_rate": 1.0392712792775172e-05, + "loss": 0.8969, "step": 17746 }, { - "epoch": 0.5036038592508513, + "epoch": 0.5029045878319023, "grad_norm": 0.0, - "learning_rate": 1.0369385330275397e-05, - "loss": 0.8879, + "learning_rate": 1.0391795709853323e-05, + "loss": 0.8545, "step": 17747 }, { - "epoch": 0.5036322360953462, + "epoch": 0.5029329252741648, "grad_norm": 0.0, - "learning_rate": 1.0368466877882595e-05, - "loss": 0.7916, + "learning_rate": 1.039087862363123e-05, + "loss": 0.9463, "step": 17748 }, { - "epoch": 0.5036606129398411, + "epoch": 0.5029612627164272, "grad_norm": 0.0, - "learning_rate": 1.0367548422377336e-05, - "loss": 0.8186, + "learning_rate": 1.0389961534116622e-05, + "loss": 0.9225, "step": 17749 }, { - "epoch": 0.503688989784336, + "epoch": 0.5029896001586897, "grad_norm": 0.0, - "learning_rate": 1.036662996376738e-05, - "loss": 0.8943, + "learning_rate": 1.0389044441317224e-05, + "loss": 0.9385, "step": 17750 }, { - "epoch": 0.5037173666288308, + "epoch": 0.5030179376009521, "grad_norm": 0.0, - "learning_rate": 1.0365711502060485e-05, - "loss": 0.8177, + "learning_rate": 1.0388127345240762e-05, + "loss": 0.9216, "step": 17751 }, { - "epoch": 0.5037457434733258, + "epoch": 0.5030462750432146, "grad_norm": 0.0, - "learning_rate": 1.0364793037264408e-05, - "loss": 0.8977, + "learning_rate": 1.0387210245894959e-05, + "loss": 0.882, "step": 17752 }, { - "epoch": 0.5037741203178207, + "epoch": 0.503074612485477, "grad_norm": 0.0, - "learning_rate": 1.0363874569386907e-05, - "loss": 0.8737, + "learning_rate": 1.038629314328754e-05, + "loss": 0.875, "step": 17753 }, { - "epoch": 0.5038024971623155, + "epoch": 0.5031029499277395, "grad_norm": 0.0, - "learning_rate": 1.0362956098435739e-05, - "loss": 0.946, + "learning_rate": 1.0385376037426227e-05, + "loss": 0.9159, "step": 17754 }, { - "epoch": 0.5038308740068105, + "epoch": 0.503131287370002, "grad_norm": 0.0, - "learning_rate": 1.0362037624418668e-05, - "loss": 0.8761, + "learning_rate": 1.038445892831875e-05, + "loss": 0.915, "step": 17755 }, { - "epoch": 0.5038592508513053, + "epoch": 0.5031596248122644, "grad_norm": 0.0, - "learning_rate": 1.0361119147343448e-05, - "loss": 0.8754, + "learning_rate": 1.0383541815972835e-05, + "loss": 0.7697, "step": 17756 }, { - "epoch": 0.5038876276958002, + "epoch": 0.5031879622545269, "grad_norm": 0.0, - "learning_rate": 1.0360200667217839e-05, - "loss": 0.871, + "learning_rate": 1.0382624700396204e-05, + "loss": 0.9447, "step": 17757 }, { - "epoch": 0.5039160045402952, + "epoch": 0.5032162996967894, "grad_norm": 0.0, - "learning_rate": 1.0359282184049599e-05, - "loss": 0.9577, + "learning_rate": 1.0381707581596581e-05, + "loss": 0.8691, "step": 17758 }, { - "epoch": 0.50394438138479, + "epoch": 0.5032446371390519, "grad_norm": 0.0, - "learning_rate": 1.0358363697846484e-05, - "loss": 0.933, + "learning_rate": 1.0380790459581695e-05, + "loss": 0.8911, "step": 17759 }, { - "epoch": 0.5039727582292849, + "epoch": 0.5032729745813143, "grad_norm": 0.0, - "learning_rate": 1.0357445208616256e-05, - "loss": 0.7959, + "learning_rate": 1.037987333435927e-05, + "loss": 0.9435, "step": 17760 }, { - "epoch": 0.5040011350737797, + "epoch": 0.5033013120235768, "grad_norm": 0.0, - "learning_rate": 1.0356526716366674e-05, - "loss": 0.9021, + "learning_rate": 1.037895620593703e-05, + "loss": 0.9362, "step": 17761 }, { - "epoch": 0.5040295119182747, + "epoch": 0.5033296494658392, "grad_norm": 0.0, - "learning_rate": 1.035560822110549e-05, - "loss": 0.9115, + "learning_rate": 1.0378039074322699e-05, + "loss": 0.8417, "step": 17762 }, { - "epoch": 0.5040578887627696, + "epoch": 0.5033579869081016, "grad_norm": 0.0, - "learning_rate": 1.0354689722840473e-05, - "loss": 1.0048, + "learning_rate": 1.0377121939524009e-05, + "loss": 0.9706, "step": 17763 }, { - "epoch": 0.5040862656072644, + "epoch": 0.5033863243503641, "grad_norm": 0.0, - "learning_rate": 1.0353771221579372e-05, - "loss": 0.9594, + "learning_rate": 1.0376204801548677e-05, + "loss": 0.8903, "step": 17764 }, { - "epoch": 0.5041146424517594, + "epoch": 0.5034146617926266, "grad_norm": 0.0, - "learning_rate": 1.0352852717329952e-05, - "loss": 0.9658, + "learning_rate": 1.0375287660404436e-05, + "loss": 0.9099, "step": 17765 }, { - "epoch": 0.5041430192962543, + "epoch": 0.5034429992348891, "grad_norm": 0.0, - "learning_rate": 1.0351934210099968e-05, - "loss": 0.9192, + "learning_rate": 1.0374370516099004e-05, + "loss": 0.977, "step": 17766 }, { - "epoch": 0.5041713961407491, + "epoch": 0.5034713366771515, "grad_norm": 0.0, - "learning_rate": 1.035101569989718e-05, - "loss": 0.8314, + "learning_rate": 1.0373453368640112e-05, + "loss": 0.9143, "step": 17767 }, { - "epoch": 0.504199772985244, + "epoch": 0.503499674119414, "grad_norm": 0.0, - "learning_rate": 1.035009718672935e-05, - "loss": 0.87, + "learning_rate": 1.0372536218035482e-05, + "loss": 0.8715, "step": 17768 }, { - "epoch": 0.504228149829739, + "epoch": 0.5035280115616765, "grad_norm": 0.0, - "learning_rate": 1.0349178670604227e-05, - "loss": 0.8708, + "learning_rate": 1.0371619064292844e-05, + "loss": 0.7415, "step": 17769 }, { - "epoch": 0.5042565266742338, + "epoch": 0.5035563490039389, "grad_norm": 0.0, - "learning_rate": 1.0348260151529577e-05, - "loss": 0.9214, + "learning_rate": 1.0370701907419918e-05, + "loss": 0.8991, "step": 17770 }, { - "epoch": 0.5042849035187287, + "epoch": 0.5035846864462014, "grad_norm": 0.0, - "learning_rate": 1.034734162951316e-05, - "loss": 0.9713, + "learning_rate": 1.0369784747424434e-05, + "loss": 0.8906, "step": 17771 }, { - "epoch": 0.5043132803632236, + "epoch": 0.5036130238884639, "grad_norm": 0.0, - "learning_rate": 1.0346423104562735e-05, - "loss": 0.8207, + "learning_rate": 1.0368867584314115e-05, + "loss": 0.8243, "step": 17772 }, { - "epoch": 0.5043416572077185, + "epoch": 0.5036413613307262, "grad_norm": 0.0, - "learning_rate": 1.0345504576686053e-05, - "loss": 0.9312, + "learning_rate": 1.0367950418096689e-05, + "loss": 0.8502, "step": 17773 }, { - "epoch": 0.5043700340522134, + "epoch": 0.5036696987729887, "grad_norm": 0.0, - "learning_rate": 1.0344586045890882e-05, - "loss": 0.9186, + "learning_rate": 1.0367033248779876e-05, + "loss": 0.9366, "step": 17774 }, { - "epoch": 0.5043984108967083, + "epoch": 0.5036980362152512, "grad_norm": 0.0, - "learning_rate": 1.034366751218498e-05, - "loss": 0.8274, + "learning_rate": 1.0366116076371407e-05, + "loss": 0.9466, "step": 17775 }, { - "epoch": 0.5044267877412032, + "epoch": 0.5037263736575137, "grad_norm": 0.0, - "learning_rate": 1.0342748975576097e-05, - "loss": 0.9791, + "learning_rate": 1.0365198900879008e-05, + "loss": 0.8259, "step": 17776 }, { - "epoch": 0.5044551645856981, + "epoch": 0.5037547110997761, "grad_norm": 0.0, - "learning_rate": 1.0341830436072001e-05, - "loss": 0.9238, + "learning_rate": 1.0364281722310403e-05, + "loss": 0.8657, "step": 17777 }, { - "epoch": 0.5044835414301929, + "epoch": 0.5037830485420386, "grad_norm": 0.0, - "learning_rate": 1.0340911893680447e-05, - "loss": 0.9195, + "learning_rate": 1.0363364540673315e-05, + "loss": 1.0197, "step": 17778 }, { - "epoch": 0.5045119182746879, + "epoch": 0.5038113859843011, "grad_norm": 0.0, - "learning_rate": 1.0339993348409197e-05, - "loss": 0.9577, + "learning_rate": 1.0362447355975475e-05, + "loss": 0.8303, "step": 17779 }, { - "epoch": 0.5045402951191827, + "epoch": 0.5038397234265635, "grad_norm": 0.0, - "learning_rate": 1.0339074800266005e-05, - "loss": 0.8399, + "learning_rate": 1.0361530168224605e-05, + "loss": 0.9433, "step": 17780 }, { - "epoch": 0.5045686719636776, + "epoch": 0.503868060868826, "grad_norm": 0.0, - "learning_rate": 1.0338156249258635e-05, - "loss": 0.8581, + "learning_rate": 1.0360612977428435e-05, + "loss": 0.9787, "step": 17781 }, { - "epoch": 0.5045970488081726, + "epoch": 0.5038963983110885, "grad_norm": 0.0, - "learning_rate": 1.0337237695394844e-05, - "loss": 0.9124, + "learning_rate": 1.0359695783594687e-05, + "loss": 0.9453, "step": 17782 }, { - "epoch": 0.5046254256526674, + "epoch": 0.503924735753351, "grad_norm": 0.0, - "learning_rate": 1.0336319138682389e-05, - "loss": 1.0091, + "learning_rate": 1.0358778586731084e-05, + "loss": 0.7892, "step": 17783 }, { - "epoch": 0.5046538024971623, + "epoch": 0.5039530731956133, "grad_norm": 0.0, - "learning_rate": 1.0335400579129034e-05, - "loss": 0.8739, + "learning_rate": 1.035786138684536e-05, + "loss": 0.903, "step": 17784 }, { - "epoch": 0.5046821793416572, + "epoch": 0.5039814106378758, "grad_norm": 0.0, - "learning_rate": 1.0334482016742533e-05, - "loss": 0.9254, + "learning_rate": 1.0356944183945237e-05, + "loss": 0.8894, "step": 17785 }, { - "epoch": 0.5047105561861521, + "epoch": 0.5040097480801383, "grad_norm": 0.0, - "learning_rate": 1.0333563451530648e-05, - "loss": 0.9817, + "learning_rate": 1.0356026978038437e-05, + "loss": 0.9012, "step": 17786 }, { - "epoch": 0.504738933030647, + "epoch": 0.5040380855224007, "grad_norm": 0.0, - "learning_rate": 1.0332644883501138e-05, - "loss": 0.9964, + "learning_rate": 1.0355109769132689e-05, + "loss": 0.8456, "step": 17787 }, { - "epoch": 0.5047673098751418, + "epoch": 0.5040664229646632, "grad_norm": 0.0, - "learning_rate": 1.0331726312661761e-05, - "loss": 0.8076, + "learning_rate": 1.0354192557235725e-05, + "loss": 0.8547, "step": 17788 }, { - "epoch": 0.5047956867196368, + "epoch": 0.5040947604069257, "grad_norm": 0.0, - "learning_rate": 1.0330807739020278e-05, - "loss": 0.872, + "learning_rate": 1.0353275342355262e-05, + "loss": 0.8202, "step": 17789 }, { - "epoch": 0.5048240635641317, + "epoch": 0.5041230978491882, "grad_norm": 0.0, - "learning_rate": 1.0329889162584446e-05, - "loss": 0.9042, + "learning_rate": 1.0352358124499031e-05, + "loss": 0.9204, "step": 17790 }, { - "epoch": 0.5048524404086265, + "epoch": 0.5041514352914506, "grad_norm": 0.0, - "learning_rate": 1.0328970583362026e-05, - "loss": 0.8775, + "learning_rate": 1.0351440903674757e-05, + "loss": 0.9117, "step": 17791 }, { - "epoch": 0.5048808172531215, + "epoch": 0.5041797727337131, "grad_norm": 0.0, - "learning_rate": 1.0328052001360778e-05, - "loss": 0.8427, + "learning_rate": 1.0350523679890163e-05, + "loss": 0.858, "step": 17792 }, { - "epoch": 0.5049091940976164, + "epoch": 0.5042081101759756, "grad_norm": 0.0, - "learning_rate": 1.0327133416588457e-05, - "loss": 0.9014, + "learning_rate": 1.0349606453152979e-05, + "loss": 0.841, "step": 17793 }, { - "epoch": 0.5049375709421112, + "epoch": 0.5042364476182379, "grad_norm": 0.0, - "learning_rate": 1.0326214829052826e-05, - "loss": 0.9946, + "learning_rate": 1.0348689223470932e-05, + "loss": 0.9999, "step": 17794 }, { - "epoch": 0.5049659477866061, + "epoch": 0.5042647850605004, "grad_norm": 0.0, - "learning_rate": 1.0325296238761644e-05, - "loss": 0.8772, + "learning_rate": 1.0347771990851742e-05, + "loss": 0.8665, "step": 17795 }, { - "epoch": 0.504994324631101, + "epoch": 0.5042931225027629, "grad_norm": 0.0, - "learning_rate": 1.0324377645722671e-05, - "loss": 0.9061, + "learning_rate": 1.0346854755303143e-05, + "loss": 0.81, "step": 17796 }, { - "epoch": 0.5050227014755959, + "epoch": 0.5043214599450253, "grad_norm": 0.0, - "learning_rate": 1.0323459049943665e-05, - "loss": 0.8116, + "learning_rate": 1.0345937516832858e-05, + "loss": 0.9191, "step": 17797 }, { - "epoch": 0.5050510783200908, + "epoch": 0.5043497973872878, "grad_norm": 0.0, - "learning_rate": 1.0322540451432385e-05, - "loss": 0.8344, + "learning_rate": 1.0345020275448612e-05, + "loss": 0.883, "step": 17798 }, { - "epoch": 0.5050794551645857, + "epoch": 0.5043781348295503, "grad_norm": 0.0, - "learning_rate": 1.0321621850196595e-05, - "loss": 0.8835, + "learning_rate": 1.034410303115813e-05, + "loss": 0.8449, "step": 17799 }, { - "epoch": 0.5051078320090806, + "epoch": 0.5044064722718128, "grad_norm": 0.0, - "learning_rate": 1.0320703246244044e-05, - "loss": 0.8471, + "learning_rate": 1.034318578396914e-05, + "loss": 0.8154, "step": 17800 }, { - "epoch": 0.5051362088535755, + "epoch": 0.5044348097140752, "grad_norm": 0.0, - "learning_rate": 1.0319784639582502e-05, - "loss": 0.7441, + "learning_rate": 1.0342268533889373e-05, + "loss": 0.7963, "step": 17801 }, { - "epoch": 0.5051645856980703, + "epoch": 0.5044631471563377, "grad_norm": 0.0, - "learning_rate": 1.0318866030219727e-05, - "loss": 0.7939, + "learning_rate": 1.034135128092655e-05, + "loss": 0.9598, "step": 17802 }, { - "epoch": 0.5051929625425653, + "epoch": 0.5044914845986002, "grad_norm": 0.0, - "learning_rate": 1.0317947418163471e-05, - "loss": 0.9181, + "learning_rate": 1.0340434025088396e-05, + "loss": 0.9213, "step": 17803 }, { - "epoch": 0.5052213393870602, + "epoch": 0.5045198220408625, "grad_norm": 0.0, - "learning_rate": 1.0317028803421506e-05, - "loss": 0.8429, + "learning_rate": 1.0339516766382643e-05, + "loss": 0.8914, "step": 17804 }, { - "epoch": 0.505249716231555, + "epoch": 0.504548159483125, "grad_norm": 0.0, - "learning_rate": 1.031611018600158e-05, - "loss": 0.8791, + "learning_rate": 1.033859950481701e-05, + "loss": 0.8809, "step": 17805 }, { - "epoch": 0.50527809307605, + "epoch": 0.5045764969253875, "grad_norm": 0.0, - "learning_rate": 1.0315191565911458e-05, - "loss": 0.9157, + "learning_rate": 1.033768224039923e-05, + "loss": 0.8162, "step": 17806 }, { - "epoch": 0.5053064699205448, + "epoch": 0.50460483436765, "grad_norm": 0.0, - "learning_rate": 1.0314272943158899e-05, - "loss": 0.7956, + "learning_rate": 1.0336764973137026e-05, + "loss": 0.8537, "step": 17807 }, { - "epoch": 0.5053348467650397, + "epoch": 0.5046331718099124, "grad_norm": 0.0, - "learning_rate": 1.0313354317751658e-05, - "loss": 0.8283, + "learning_rate": 1.0335847703038126e-05, + "loss": 0.8112, "step": 17808 }, { - "epoch": 0.5053632236095347, + "epoch": 0.5046615092521749, "grad_norm": 0.0, - "learning_rate": 1.0312435689697504e-05, - "loss": 0.8284, + "learning_rate": 1.0334930430110258e-05, + "loss": 0.8972, "step": 17809 }, { - "epoch": 0.5053916004540295, + "epoch": 0.5046898466944374, "grad_norm": 0.0, - "learning_rate": 1.031151705900419e-05, - "loss": 0.9367, + "learning_rate": 1.0334013154361147e-05, + "loss": 0.7693, "step": 17810 }, { - "epoch": 0.5054199772985244, + "epoch": 0.5047181841366998, "grad_norm": 0.0, - "learning_rate": 1.031059842567948e-05, - "loss": 1.0543, + "learning_rate": 1.0333095875798517e-05, + "loss": 0.8933, "step": 17811 }, { - "epoch": 0.5054483541430193, + "epoch": 0.5047465215789623, "grad_norm": 0.0, - "learning_rate": 1.030967978973113e-05, - "loss": 0.875, + "learning_rate": 1.0332178594430096e-05, + "loss": 0.8294, "step": 17812 }, { - "epoch": 0.5054767309875142, + "epoch": 0.5047748590212248, "grad_norm": 0.0, - "learning_rate": 1.0308761151166896e-05, - "loss": 0.9466, + "learning_rate": 1.0331261310263612e-05, + "loss": 0.8811, "step": 17813 }, { - "epoch": 0.5055051078320091, + "epoch": 0.5048031964634873, "grad_norm": 0.0, - "learning_rate": 1.0307842509994548e-05, - "loss": 0.8745, + "learning_rate": 1.0330344023306791e-05, + "loss": 0.8836, "step": 17814 }, { - "epoch": 0.5055334846765039, + "epoch": 0.5048315339057496, "grad_norm": 0.0, - "learning_rate": 1.0306923866221839e-05, - "loss": 0.9207, + "learning_rate": 1.032942673356736e-05, + "loss": 0.9303, "step": 17815 }, { - "epoch": 0.5055618615209989, + "epoch": 0.5048598713480121, "grad_norm": 0.0, - "learning_rate": 1.030600521985653e-05, - "loss": 0.9423, + "learning_rate": 1.0328509441053045e-05, + "loss": 0.8615, "step": 17816 }, { - "epoch": 0.5055902383654938, + "epoch": 0.5048882087902746, "grad_norm": 0.0, - "learning_rate": 1.030508657090638e-05, - "loss": 0.8647, + "learning_rate": 1.0327592145771574e-05, + "loss": 0.8198, "step": 17817 }, { - "epoch": 0.5056186152099886, + "epoch": 0.504916546232537, "grad_norm": 0.0, - "learning_rate": 1.0304167919379151e-05, - "loss": 0.9285, + "learning_rate": 1.0326674847730673e-05, + "loss": 1.0238, "step": 17818 }, { - "epoch": 0.5056469920544835, + "epoch": 0.5049448836747995, "grad_norm": 0.0, - "learning_rate": 1.0303249265282605e-05, - "loss": 0.9815, + "learning_rate": 1.0325757546938067e-05, + "loss": 0.9965, "step": 17819 }, { - "epoch": 0.5056753688989785, + "epoch": 0.504973221117062, "grad_norm": 0.0, - "learning_rate": 1.0302330608624494e-05, - "loss": 0.8737, + "learning_rate": 1.0324840243401481e-05, + "loss": 0.8842, "step": 17820 }, { - "epoch": 0.5057037457434733, + "epoch": 0.5050015585593244, "grad_norm": 0.0, - "learning_rate": 1.0301411949412586e-05, - "loss": 0.8835, + "learning_rate": 1.032392293712865e-05, + "loss": 0.7868, "step": 17821 }, { - "epoch": 0.5057321225879682, + "epoch": 0.5050298960015869, "grad_norm": 0.0, - "learning_rate": 1.0300493287654635e-05, - "loss": 0.8343, + "learning_rate": 1.0323005628127297e-05, + "loss": 0.9366, "step": 17822 }, { - "epoch": 0.5057604994324632, + "epoch": 0.5050582334438494, "grad_norm": 0.0, - "learning_rate": 1.0299574623358406e-05, - "loss": 0.9073, + "learning_rate": 1.0322088316405145e-05, + "loss": 0.9129, "step": 17823 }, { - "epoch": 0.505788876276958, + "epoch": 0.5050865708861119, "grad_norm": 0.0, - "learning_rate": 1.0298655956531653e-05, - "loss": 0.8469, + "learning_rate": 1.0321171001969924e-05, + "loss": 0.8851, "step": 17824 }, { - "epoch": 0.5058172531214529, + "epoch": 0.5051149083283742, "grad_norm": 0.0, - "learning_rate": 1.0297737287182144e-05, - "loss": 0.8508, + "learning_rate": 1.032025368482936e-05, + "loss": 0.8791, "step": 17825 }, { - "epoch": 0.5058456299659478, + "epoch": 0.5051432457706367, "grad_norm": 0.0, - "learning_rate": 1.0296818615317634e-05, - "loss": 0.8605, + "learning_rate": 1.0319336364991179e-05, + "loss": 0.7894, "step": 17826 }, { - "epoch": 0.5058740068104427, + "epoch": 0.5051715832128992, "grad_norm": 0.0, - "learning_rate": 1.0295899940945884e-05, - "loss": 0.8937, + "learning_rate": 1.031841904246311e-05, + "loss": 0.8422, "step": 17827 }, { - "epoch": 0.5059023836549376, + "epoch": 0.5051999206551616, "grad_norm": 0.0, - "learning_rate": 1.0294981264074653e-05, - "loss": 0.8209, + "learning_rate": 1.0317501717252878e-05, + "loss": 0.9395, "step": 17828 }, { - "epoch": 0.5059307604994324, + "epoch": 0.5052282580974241, "grad_norm": 0.0, - "learning_rate": 1.0294062584711703e-05, - "loss": 0.8834, + "learning_rate": 1.0316584389368213e-05, + "loss": 0.9423, "step": 17829 }, { - "epoch": 0.5059591373439274, + "epoch": 0.5052565955396866, "grad_norm": 0.0, - "learning_rate": 1.0293143902864791e-05, - "loss": 0.7867, + "learning_rate": 1.0315667058816843e-05, + "loss": 0.9623, "step": 17830 }, { - "epoch": 0.5059875141884222, + "epoch": 0.5052849329819491, "grad_norm": 0.0, - "learning_rate": 1.0292225218541678e-05, - "loss": 0.9518, + "learning_rate": 1.031474972560649e-05, + "loss": 0.8587, "step": 17831 }, { - "epoch": 0.5060158910329171, + "epoch": 0.5053132704242115, "grad_norm": 0.0, - "learning_rate": 1.0291306531750129e-05, - "loss": 0.9514, + "learning_rate": 1.031383238974488e-05, + "loss": 0.921, "step": 17832 }, { - "epoch": 0.5060442678774121, + "epoch": 0.505341607866474, "grad_norm": 0.0, - "learning_rate": 1.0290387842497902e-05, - "loss": 0.8328, + "learning_rate": 1.0312915051239746e-05, + "loss": 0.7395, "step": 17833 }, { - "epoch": 0.5060726447219069, + "epoch": 0.5053699453087365, "grad_norm": 0.0, - "learning_rate": 1.0289469150792753e-05, - "loss": 0.9856, + "learning_rate": 1.0311997710098812e-05, + "loss": 0.935, "step": 17834 }, { - "epoch": 0.5061010215664018, + "epoch": 0.5053982827509989, "grad_norm": 0.0, - "learning_rate": 1.0288550456642445e-05, - "loss": 0.917, + "learning_rate": 1.0311080366329804e-05, + "loss": 0.8522, "step": 17835 }, { - "epoch": 0.5061293984108967, + "epoch": 0.5054266201932613, "grad_norm": 0.0, - "learning_rate": 1.028763176005474e-05, - "loss": 0.885, + "learning_rate": 1.0310163019940454e-05, + "loss": 0.9518, "step": 17836 }, { - "epoch": 0.5061577752553916, + "epoch": 0.5054549576355238, "grad_norm": 0.0, - "learning_rate": 1.0286713061037394e-05, - "loss": 0.7394, + "learning_rate": 1.0309245670938481e-05, + "loss": 0.8311, "step": 17837 }, { - "epoch": 0.5061861520998865, + "epoch": 0.5054832950777863, "grad_norm": 0.0, - "learning_rate": 1.028579435959817e-05, - "loss": 0.8643, + "learning_rate": 1.0308328319331622e-05, + "loss": 0.851, "step": 17838 }, { - "epoch": 0.5062145289443813, + "epoch": 0.5055116325200487, "grad_norm": 0.0, - "learning_rate": 1.028487565574483e-05, - "loss": 0.9792, + "learning_rate": 1.0307410965127595e-05, + "loss": 0.8972, "step": 17839 }, { - "epoch": 0.5062429057888763, + "epoch": 0.5055399699623112, "grad_norm": 0.0, - "learning_rate": 1.0283956949485133e-05, - "loss": 0.8771, + "learning_rate": 1.0306493608334134e-05, + "loss": 0.8472, "step": 17840 }, { - "epoch": 0.5062712826333712, + "epoch": 0.5055683074045737, "grad_norm": 0.0, - "learning_rate": 1.0283038240826837e-05, - "loss": 0.8992, + "learning_rate": 1.030557624895896e-05, + "loss": 1.002, "step": 17841 }, { - "epoch": 0.506299659477866, + "epoch": 0.5055966448468361, "grad_norm": 0.0, - "learning_rate": 1.0282119529777705e-05, - "loss": 0.8966, + "learning_rate": 1.0304658887009806e-05, + "loss": 0.9758, "step": 17842 }, { - "epoch": 0.506328036322361, + "epoch": 0.5056249822890986, "grad_norm": 0.0, - "learning_rate": 1.0281200816345498e-05, - "loss": 1.0291, + "learning_rate": 1.03037415224944e-05, + "loss": 0.9703, "step": 17843 }, { - "epoch": 0.5063564131668559, + "epoch": 0.5056533197313611, "grad_norm": 0.0, - "learning_rate": 1.0280282100537972e-05, - "loss": 1.0016, + "learning_rate": 1.0302824155420464e-05, + "loss": 0.9148, "step": 17844 }, { - "epoch": 0.5063847900113507, + "epoch": 0.5056816571736235, "grad_norm": 0.0, - "learning_rate": 1.0279363382362894e-05, - "loss": 0.8967, + "learning_rate": 1.0301906785795726e-05, + "loss": 0.9307, "step": 17845 }, { - "epoch": 0.5064131668558456, + "epoch": 0.505709994615886, "grad_norm": 0.0, - "learning_rate": 1.0278444661828018e-05, - "loss": 0.9156, + "learning_rate": 1.0300989413627913e-05, + "loss": 0.8223, "step": 17846 }, { - "epoch": 0.5064415437003406, + "epoch": 0.5057383320581484, "grad_norm": 0.0, - "learning_rate": 1.027752593894111e-05, - "loss": 0.8773, + "learning_rate": 1.030007203892476e-05, + "loss": 0.8716, "step": 17847 }, { - "epoch": 0.5064699205448354, + "epoch": 0.5057666695004109, "grad_norm": 0.0, - "learning_rate": 1.0276607213709926e-05, - "loss": 0.8766, + "learning_rate": 1.0299154661693987e-05, + "loss": 0.8661, "step": 17848 }, { - "epoch": 0.5064982973893303, + "epoch": 0.5057950069426733, "grad_norm": 0.0, - "learning_rate": 1.027568848614223e-05, - "loss": 0.985, + "learning_rate": 1.0298237281943321e-05, + "loss": 0.9044, "step": 17849 }, { - "epoch": 0.5065266742338252, + "epoch": 0.5058233443849358, "grad_norm": 0.0, - "learning_rate": 1.0274769756245781e-05, - "loss": 0.9735, + "learning_rate": 1.0297319899680493e-05, + "loss": 0.9401, "step": 17850 }, { - "epoch": 0.5065550510783201, + "epoch": 0.5058516818271983, "grad_norm": 0.0, - "learning_rate": 1.0273851024028337e-05, - "loss": 0.8861, + "learning_rate": 1.029640251491323e-05, + "loss": 0.9014, "step": 17851 }, { - "epoch": 0.506583427922815, + "epoch": 0.5058800192694607, "grad_norm": 0.0, - "learning_rate": 1.0272932289497664e-05, - "loss": 0.9189, + "learning_rate": 1.0295485127649258e-05, + "loss": 0.8339, "step": 17852 }, { - "epoch": 0.5066118047673098, + "epoch": 0.5059083567117232, "grad_norm": 0.0, - "learning_rate": 1.027201355266152e-05, - "loss": 0.8772, + "learning_rate": 1.0294567737896304e-05, + "loss": 0.8295, "step": 17853 }, { - "epoch": 0.5066401816118048, + "epoch": 0.5059366941539857, "grad_norm": 0.0, - "learning_rate": 1.0271094813527657e-05, - "loss": 0.8443, + "learning_rate": 1.0293650345662099e-05, + "loss": 0.8585, "step": 17854 }, { - "epoch": 0.5066685584562997, + "epoch": 0.5059650315962482, "grad_norm": 0.0, - "learning_rate": 1.0270176072103852e-05, - "loss": 0.9475, + "learning_rate": 1.0292732950954366e-05, + "loss": 0.8625, "step": 17855 }, { - "epoch": 0.5066969353007945, + "epoch": 0.5059933690385106, "grad_norm": 0.0, - "learning_rate": 1.026925732839786e-05, - "loss": 0.9214, + "learning_rate": 1.0291815553780835e-05, + "loss": 0.7936, "step": 17856 }, { - "epoch": 0.5067253121452895, + "epoch": 0.506021706480773, "grad_norm": 0.0, - "learning_rate": 1.0268338582417432e-05, - "loss": 0.8916, + "learning_rate": 1.0290898154149234e-05, + "loss": 1.0003, "step": 17857 }, { - "epoch": 0.5067536889897843, + "epoch": 0.5060500439230355, "grad_norm": 0.0, - "learning_rate": 1.026741983417034e-05, - "loss": 0.8563, + "learning_rate": 1.028998075206729e-05, + "loss": 0.8441, "step": 17858 }, { - "epoch": 0.5067820658342792, + "epoch": 0.5060783813652979, "grad_norm": 0.0, - "learning_rate": 1.0266501083664341e-05, - "loss": 0.9049, + "learning_rate": 1.0289063347542727e-05, + "loss": 0.8832, "step": 17859 }, { - "epoch": 0.5068104426787742, + "epoch": 0.5061067188075604, "grad_norm": 0.0, - "learning_rate": 1.0265582330907195e-05, - "loss": 0.7978, + "learning_rate": 1.0288145940583281e-05, + "loss": 0.9265, "step": 17860 }, { - "epoch": 0.506838819523269, + "epoch": 0.5061350562498229, "grad_norm": 0.0, - "learning_rate": 1.0264663575906661e-05, - "loss": 0.9236, + "learning_rate": 1.028722853119667e-05, + "loss": 0.8903, "step": 17861 }, { - "epoch": 0.5068671963677639, + "epoch": 0.5061633936920854, "grad_norm": 0.0, - "learning_rate": 1.0263744818670503e-05, - "loss": 0.8944, + "learning_rate": 1.028631111939063e-05, + "loss": 0.8542, "step": 17862 }, { - "epoch": 0.5068955732122588, + "epoch": 0.5061917311343478, "grad_norm": 0.0, - "learning_rate": 1.026282605920648e-05, - "loss": 0.8594, + "learning_rate": 1.0285393705172886e-05, + "loss": 0.7998, "step": 17863 }, { - "epoch": 0.5069239500567537, + "epoch": 0.5062200685766103, "grad_norm": 0.0, - "learning_rate": 1.0261907297522356e-05, - "loss": 0.7997, + "learning_rate": 1.0284476288551164e-05, + "loss": 0.9292, "step": 17864 }, { - "epoch": 0.5069523269012486, + "epoch": 0.5062484060188728, "grad_norm": 0.0, - "learning_rate": 1.0260988533625889e-05, - "loss": 0.9661, + "learning_rate": 1.028355886953319e-05, + "loss": 0.8856, "step": 17865 }, { - "epoch": 0.5069807037457434, + "epoch": 0.5062767434611352, "grad_norm": 0.0, - "learning_rate": 1.0260069767524838e-05, - "loss": 0.8771, + "learning_rate": 1.0282641448126693e-05, + "loss": 0.9038, "step": 17866 }, { - "epoch": 0.5070090805902384, + "epoch": 0.5063050809033977, "grad_norm": 0.0, - "learning_rate": 1.0259150999226965e-05, - "loss": 0.9571, + "learning_rate": 1.0281724024339406e-05, + "loss": 0.8581, "step": 17867 }, { - "epoch": 0.5070374574347333, + "epoch": 0.5063334183456601, "grad_norm": 0.0, - "learning_rate": 1.0258232228740035e-05, - "loss": 0.8244, + "learning_rate": 1.0280806598179055e-05, + "loss": 0.9698, "step": 17868 }, { - "epoch": 0.5070658342792281, + "epoch": 0.5063617557879225, "grad_norm": 0.0, - "learning_rate": 1.0257313456071805e-05, - "loss": 0.9168, + "learning_rate": 1.027988916965336e-05, + "loss": 0.8634, "step": 17869 }, { - "epoch": 0.507094211123723, + "epoch": 0.506390093230185, "grad_norm": 0.0, - "learning_rate": 1.0256394681230035e-05, - "loss": 0.9535, + "learning_rate": 1.0278971738770058e-05, + "loss": 0.8256, "step": 17870 }, { - "epoch": 0.507122587968218, + "epoch": 0.5064184306724475, "grad_norm": 0.0, - "learning_rate": 1.0255475904222488e-05, - "loss": 0.8918, + "learning_rate": 1.0278054305536873e-05, + "loss": 0.943, "step": 17871 }, { - "epoch": 0.5071509648127128, + "epoch": 0.50644676811471, "grad_norm": 0.0, - "learning_rate": 1.0254557125056926e-05, - "loss": 0.9125, + "learning_rate": 1.0277136869961533e-05, + "loss": 0.9189, "step": 17872 }, { - "epoch": 0.5071793416572077, + "epoch": 0.5064751055569724, "grad_norm": 0.0, - "learning_rate": 1.0253638343741108e-05, - "loss": 0.9611, + "learning_rate": 1.0276219432051766e-05, + "loss": 0.9432, "step": 17873 }, { - "epoch": 0.5072077185017027, + "epoch": 0.5065034429992349, "grad_norm": 0.0, - "learning_rate": 1.0252719560282795e-05, - "loss": 0.8812, + "learning_rate": 1.0275301991815299e-05, + "loss": 0.8633, "step": 17874 }, { - "epoch": 0.5072360953461975, + "epoch": 0.5065317804414974, "grad_norm": 0.0, - "learning_rate": 1.0251800774689748e-05, - "loss": 0.7569, + "learning_rate": 1.0274384549259864e-05, + "loss": 0.8912, "step": 17875 }, { - "epoch": 0.5072644721906924, + "epoch": 0.5065601178837598, "grad_norm": 0.0, - "learning_rate": 1.0250881986969733e-05, - "loss": 0.928, + "learning_rate": 1.0273467104393184e-05, + "loss": 1.0094, "step": 17876 }, { - "epoch": 0.5072928490351872, + "epoch": 0.5065884553260223, "grad_norm": 0.0, - "learning_rate": 1.0249963197130502e-05, - "loss": 0.9272, + "learning_rate": 1.0272549657222992e-05, + "loss": 0.8863, "step": 17877 }, { - "epoch": 0.5073212258796822, + "epoch": 0.5066167927682848, "grad_norm": 0.0, - "learning_rate": 1.0249044405179818e-05, - "loss": 0.8414, + "learning_rate": 1.0271632207757008e-05, + "loss": 0.8187, "step": 17878 }, { - "epoch": 0.5073496027241771, + "epoch": 0.5066451302105472, "grad_norm": 0.0, - "learning_rate": 1.024812561112545e-05, - "loss": 0.8964, + "learning_rate": 1.0270714756002967e-05, + "loss": 0.9251, "step": 17879 }, { - "epoch": 0.5073779795686719, + "epoch": 0.5066734676528096, "grad_norm": 0.0, - "learning_rate": 1.0247206814975153e-05, - "loss": 0.8177, + "learning_rate": 1.0269797301968595e-05, + "loss": 1.0038, "step": 17880 }, { - "epoch": 0.5074063564131669, + "epoch": 0.5067018050950721, "grad_norm": 0.0, - "learning_rate": 1.0246288016736688e-05, - "loss": 0.858, + "learning_rate": 1.0268879845661623e-05, + "loss": 0.9575, "step": 17881 }, { - "epoch": 0.5074347332576618, + "epoch": 0.5067301425373346, "grad_norm": 0.0, - "learning_rate": 1.0245369216417818e-05, - "loss": 0.9702, + "learning_rate": 1.0267962387089771e-05, + "loss": 0.8879, "step": 17882 }, { - "epoch": 0.5074631101021566, + "epoch": 0.506758479979597, "grad_norm": 0.0, - "learning_rate": 1.0244450414026301e-05, - "loss": 0.8376, + "learning_rate": 1.026704492626078e-05, + "loss": 0.8548, "step": 17883 }, { - "epoch": 0.5074914869466516, + "epoch": 0.5067868174218595, "grad_norm": 0.0, - "learning_rate": 1.02435316095699e-05, - "loss": 0.8784, + "learning_rate": 1.0266127463182365e-05, + "loss": 0.8962, "step": 17884 }, { - "epoch": 0.5075198637911464, + "epoch": 0.506815154864122, "grad_norm": 0.0, - "learning_rate": 1.0242612803056378e-05, - "loss": 1.0237, + "learning_rate": 1.0265209997862259e-05, + "loss": 0.858, "step": 17885 }, { - "epoch": 0.5075482406356413, + "epoch": 0.5068434923063845, "grad_norm": 0.0, - "learning_rate": 1.0241693994493496e-05, - "loss": 0.79, + "learning_rate": 1.0264292530308191e-05, + "loss": 0.9949, "step": 17886 }, { - "epoch": 0.5075766174801362, + "epoch": 0.5068718297486469, "grad_norm": 0.0, - "learning_rate": 1.0240775183889013e-05, - "loss": 0.9244, + "learning_rate": 1.026337506052789e-05, + "loss": 0.9882, "step": 17887 }, { - "epoch": 0.5076049943246311, + "epoch": 0.5069001671909094, "grad_norm": 0.0, - "learning_rate": 1.023985637125069e-05, - "loss": 1.0004, + "learning_rate": 1.0262457588529084e-05, + "loss": 0.8486, "step": 17888 }, { - "epoch": 0.507633371169126, + "epoch": 0.5069285046331719, "grad_norm": 0.0, - "learning_rate": 1.0238937556586292e-05, - "loss": 0.924, + "learning_rate": 1.02615401143195e-05, + "loss": 0.8839, "step": 17889 }, { - "epoch": 0.5076617480136209, + "epoch": 0.5069568420754342, "grad_norm": 0.0, - "learning_rate": 1.0238018739903576e-05, - "loss": 0.8238, + "learning_rate": 1.0260622637906865e-05, + "loss": 0.9158, "step": 17890 }, { - "epoch": 0.5076901248581158, + "epoch": 0.5069851795176967, "grad_norm": 0.0, - "learning_rate": 1.0237099921210305e-05, - "loss": 0.8818, + "learning_rate": 1.025970515929891e-05, + "loss": 0.8481, "step": 17891 }, { - "epoch": 0.5077185017026107, + "epoch": 0.5070135169599592, "grad_norm": 0.0, - "learning_rate": 1.0236181100514242e-05, - "loss": 0.8767, + "learning_rate": 1.0258787678503359e-05, + "loss": 0.8278, "step": 17892 }, { - "epoch": 0.5077468785471055, + "epoch": 0.5070418544022216, "grad_norm": 0.0, - "learning_rate": 1.0235262277823144e-05, - "loss": 0.8725, + "learning_rate": 1.0257870195527947e-05, + "loss": 0.9225, "step": 17893 }, { - "epoch": 0.5077752553916004, + "epoch": 0.5070701918444841, "grad_norm": 0.0, - "learning_rate": 1.0234343453144777e-05, - "loss": 0.9657, + "learning_rate": 1.0256952710380399e-05, + "loss": 0.9419, "step": 17894 }, { - "epoch": 0.5078036322360954, + "epoch": 0.5070985292867466, "grad_norm": 0.0, - "learning_rate": 1.0233424626486896e-05, - "loss": 0.9641, + "learning_rate": 1.025603522306844e-05, + "loss": 0.8582, "step": 17895 }, { - "epoch": 0.5078320090805902, + "epoch": 0.5071268667290091, "grad_norm": 0.0, - "learning_rate": 1.0232505797857273e-05, - "loss": 0.9567, + "learning_rate": 1.0255117733599804e-05, + "loss": 0.8888, "step": 17896 }, { - "epoch": 0.5078603859250851, + "epoch": 0.5071552041712715, "grad_norm": 0.0, - "learning_rate": 1.023158696726366e-05, - "loss": 0.821, + "learning_rate": 1.0254200241982213e-05, + "loss": 0.8175, "step": 17897 }, { - "epoch": 0.5078887627695801, + "epoch": 0.507183541613534, "grad_norm": 0.0, - "learning_rate": 1.0230668134713822e-05, - "loss": 1.0566, + "learning_rate": 1.0253282748223403e-05, + "loss": 0.9607, "step": 17898 }, { - "epoch": 0.5079171396140749, + "epoch": 0.5072118790557965, "grad_norm": 0.0, - "learning_rate": 1.022974930021552e-05, - "loss": 0.9499, + "learning_rate": 1.0252365252331094e-05, + "loss": 0.9596, "step": 17899 }, { - "epoch": 0.5079455164585698, + "epoch": 0.5072402164980588, "grad_norm": 0.0, - "learning_rate": 1.0228830463776514e-05, - "loss": 0.8796, + "learning_rate": 1.0251447754313023e-05, + "loss": 0.869, "step": 17900 }, { - "epoch": 0.5079738933030647, + "epoch": 0.5072685539403213, "grad_norm": 0.0, - "learning_rate": 1.0227911625404567e-05, - "loss": 0.9355, + "learning_rate": 1.0250530254176913e-05, + "loss": 0.7791, "step": 17901 }, { - "epoch": 0.5080022701475596, + "epoch": 0.5072968913825838, "grad_norm": 0.0, - "learning_rate": 1.0226992785107441e-05, - "loss": 0.928, + "learning_rate": 1.0249612751930494e-05, + "loss": 0.9197, "step": 17902 }, { - "epoch": 0.5080306469920545, + "epoch": 0.5073252288248463, "grad_norm": 0.0, - "learning_rate": 1.0226073942892897e-05, - "loss": 0.861, + "learning_rate": 1.024869524758149e-05, + "loss": 0.9207, "step": 17903 }, { - "epoch": 0.5080590238365493, + "epoch": 0.5073535662671087, "grad_norm": 0.0, - "learning_rate": 1.0225155098768697e-05, - "loss": 0.8835, + "learning_rate": 1.0247777741137636e-05, + "loss": 0.899, "step": 17904 }, { - "epoch": 0.5080874006810443, + "epoch": 0.5073819037093712, "grad_norm": 0.0, - "learning_rate": 1.0224236252742598e-05, - "loss": 1.0083, + "learning_rate": 1.0246860232606661e-05, + "loss": 0.9053, "step": 17905 }, { - "epoch": 0.5081157775255392, + "epoch": 0.5074102411516337, "grad_norm": 0.0, - "learning_rate": 1.022331740482237e-05, - "loss": 0.9103, + "learning_rate": 1.024594272199629e-05, + "loss": 0.8548, "step": 17906 }, { - "epoch": 0.508144154370034, + "epoch": 0.5074385785938961, "grad_norm": 0.0, - "learning_rate": 1.0222398555015769e-05, - "loss": 1.0052, + "learning_rate": 1.0245025209314248e-05, + "loss": 0.8448, "step": 17907 }, { - "epoch": 0.508172531214529, + "epoch": 0.5074669160361586, "grad_norm": 0.0, - "learning_rate": 1.0221479703330553e-05, - "loss": 0.8095, + "learning_rate": 1.024410769456827e-05, + "loss": 0.8701, "step": 17908 }, { - "epoch": 0.5082009080590238, + "epoch": 0.5074952534784211, "grad_norm": 0.0, - "learning_rate": 1.0220560849774492e-05, - "loss": 0.9549, + "learning_rate": 1.0243190177766084e-05, + "loss": 0.938, "step": 17909 }, { - "epoch": 0.5082292849035187, + "epoch": 0.5075235909206836, "grad_norm": 0.0, - "learning_rate": 1.0219641994355343e-05, - "loss": 0.9895, + "learning_rate": 1.0242272658915414e-05, + "loss": 0.9667, "step": 17910 }, { - "epoch": 0.5082576617480136, + "epoch": 0.5075519283629459, "grad_norm": 0.0, - "learning_rate": 1.0218723137080868e-05, - "loss": 0.9496, + "learning_rate": 1.0241355138023992e-05, + "loss": 0.8535, "step": 17911 }, { - "epoch": 0.5082860385925085, + "epoch": 0.5075802658052084, "grad_norm": 0.0, - "learning_rate": 1.0217804277958829e-05, - "loss": 0.8769, + "learning_rate": 1.0240437615099548e-05, + "loss": 0.8582, "step": 17912 }, { - "epoch": 0.5083144154370034, + "epoch": 0.5076086032474709, "grad_norm": 0.0, - "learning_rate": 1.0216885416996989e-05, - "loss": 0.9553, + "learning_rate": 1.0239520090149809e-05, + "loss": 0.8517, "step": 17913 }, { - "epoch": 0.5083427922814983, + "epoch": 0.5076369406897333, "grad_norm": 0.0, - "learning_rate": 1.0215966554203107e-05, - "loss": 0.8561, + "learning_rate": 1.02386025631825e-05, + "loss": 0.8465, "step": 17914 }, { - "epoch": 0.5083711691259932, + "epoch": 0.5076652781319958, "grad_norm": 0.0, - "learning_rate": 1.0215047689584943e-05, - "loss": 0.8339, + "learning_rate": 1.0237685034205353e-05, + "loss": 0.8617, "step": 17915 }, { - "epoch": 0.5083995459704881, + "epoch": 0.5076936155742583, "grad_norm": 0.0, - "learning_rate": 1.0214128823150263e-05, - "loss": 0.918, + "learning_rate": 1.0236767503226102e-05, + "loss": 0.7888, "step": 17916 }, { - "epoch": 0.508427922814983, + "epoch": 0.5077219530165207, "grad_norm": 0.0, - "learning_rate": 1.0213209954906829e-05, - "loss": 0.8129, + "learning_rate": 1.0235849970252465e-05, + "loss": 0.9597, "step": 17917 }, { - "epoch": 0.5084562996594779, + "epoch": 0.5077502904587832, "grad_norm": 0.0, - "learning_rate": 1.0212291084862397e-05, - "loss": 0.821, + "learning_rate": 1.0234932435292178e-05, + "loss": 0.8772, "step": 17918 }, { - "epoch": 0.5084846765039728, + "epoch": 0.5077786279010457, "grad_norm": 0.0, - "learning_rate": 1.0211372213024738e-05, - "loss": 0.8298, + "learning_rate": 1.0234014898352966e-05, + "loss": 0.8953, "step": 17919 }, { - "epoch": 0.5085130533484676, + "epoch": 0.5078069653433082, "grad_norm": 0.0, - "learning_rate": 1.0210453339401607e-05, - "loss": 0.9952, + "learning_rate": 1.0233097359442563e-05, + "loss": 0.7822, "step": 17920 }, { - "epoch": 0.5085414301929625, + "epoch": 0.5078353027855705, "grad_norm": 0.0, - "learning_rate": 1.0209534464000765e-05, - "loss": 0.9431, + "learning_rate": 1.0232179818568692e-05, + "loss": 0.8555, "step": 17921 }, { - "epoch": 0.5085698070374575, + "epoch": 0.507863640227833, "grad_norm": 0.0, - "learning_rate": 1.0208615586829977e-05, - "loss": 0.8875, + "learning_rate": 1.0231262275739086e-05, + "loss": 0.793, "step": 17922 }, { - "epoch": 0.5085981838819523, + "epoch": 0.5078919776700955, "grad_norm": 0.0, - "learning_rate": 1.0207696707897004e-05, - "loss": 0.7724, + "learning_rate": 1.0230344730961471e-05, + "loss": 0.9363, "step": 17923 }, { - "epoch": 0.5086265607264472, + "epoch": 0.5079203151123579, "grad_norm": 0.0, - "learning_rate": 1.0206777827209609e-05, - "loss": 0.9202, + "learning_rate": 1.0229427184243574e-05, + "loss": 0.9038, "step": 17924 }, { - "epoch": 0.5086549375709422, + "epoch": 0.5079486525546204, "grad_norm": 0.0, - "learning_rate": 1.0205858944775549e-05, - "loss": 0.8509, + "learning_rate": 1.022850963559313e-05, + "loss": 0.8408, "step": 17925 }, { - "epoch": 0.508683314415437, + "epoch": 0.5079769899968829, "grad_norm": 0.0, - "learning_rate": 1.020494006060259e-05, - "loss": 0.8314, + "learning_rate": 1.0227592085017866e-05, + "loss": 0.8674, "step": 17926 }, { - "epoch": 0.5087116912599319, + "epoch": 0.5080053274391454, "grad_norm": 0.0, - "learning_rate": 1.0204021174698497e-05, - "loss": 0.9349, + "learning_rate": 1.022667453252551e-05, + "loss": 0.9083, "step": 17927 }, { - "epoch": 0.5087400681044267, + "epoch": 0.5080336648814078, "grad_norm": 0.0, - "learning_rate": 1.0203102287071022e-05, - "loss": 0.9188, + "learning_rate": 1.0225756978123784e-05, + "loss": 0.8829, "step": 17928 }, { - "epoch": 0.5087684449489217, + "epoch": 0.5080620023236703, "grad_norm": 0.0, - "learning_rate": 1.0202183397727938e-05, - "loss": 0.9608, + "learning_rate": 1.0224839421820426e-05, + "loss": 0.8706, "step": 17929 }, { - "epoch": 0.5087968217934166, + "epoch": 0.5080903397659328, "grad_norm": 0.0, - "learning_rate": 1.0201264506677e-05, - "loss": 0.9153, + "learning_rate": 1.0223921863623163e-05, + "loss": 0.8433, "step": 17930 }, { - "epoch": 0.5088251986379114, + "epoch": 0.5081186772081951, "grad_norm": 0.0, - "learning_rate": 1.0200345613925971e-05, - "loss": 0.9687, + "learning_rate": 1.0223004303539723e-05, + "loss": 0.9635, "step": 17931 }, { - "epoch": 0.5088535754824064, + "epoch": 0.5081470146504576, "grad_norm": 0.0, - "learning_rate": 1.0199426719482612e-05, - "loss": 0.9541, + "learning_rate": 1.0222086741577835e-05, + "loss": 0.9319, "step": 17932 }, { - "epoch": 0.5088819523269013, + "epoch": 0.5081753520927201, "grad_norm": 0.0, - "learning_rate": 1.0198507823354692e-05, - "loss": 0.8671, + "learning_rate": 1.0221169177745227e-05, + "loss": 0.8025, "step": 17933 }, { - "epoch": 0.5089103291713961, + "epoch": 0.5082036895349826, "grad_norm": 0.0, - "learning_rate": 1.0197588925549963e-05, - "loss": 0.8787, + "learning_rate": 1.022025161204963e-05, + "loss": 0.7961, "step": 17934 }, { - "epoch": 0.5089387060158911, + "epoch": 0.508232026977245, "grad_norm": 0.0, - "learning_rate": 1.0196670026076192e-05, - "loss": 0.8426, + "learning_rate": 1.0219334044498773e-05, + "loss": 0.965, "step": 17935 }, { - "epoch": 0.5089670828603859, + "epoch": 0.5082603644195075, "grad_norm": 0.0, - "learning_rate": 1.0195751124941142e-05, - "loss": 0.9374, + "learning_rate": 1.0218416475100381e-05, + "loss": 0.8527, "step": 17936 }, { - "epoch": 0.5089954597048808, + "epoch": 0.50828870186177, "grad_norm": 0.0, - "learning_rate": 1.0194832222152573e-05, - "loss": 1.0037, + "learning_rate": 1.0217498903862186e-05, + "loss": 0.9686, "step": 17937 }, { - "epoch": 0.5090238365493757, + "epoch": 0.5083170393040324, "grad_norm": 0.0, - "learning_rate": 1.0193913317718245e-05, - "loss": 0.8895, + "learning_rate": 1.0216581330791919e-05, + "loss": 0.9298, "step": 17938 }, { - "epoch": 0.5090522133938706, + "epoch": 0.5083453767462949, "grad_norm": 0.0, - "learning_rate": 1.0192994411645923e-05, - "loss": 0.8278, + "learning_rate": 1.0215663755897306e-05, + "loss": 0.8533, "step": 17939 }, { - "epoch": 0.5090805902383655, + "epoch": 0.5083737141885574, "grad_norm": 0.0, - "learning_rate": 1.019207550394337e-05, - "loss": 1.0155, + "learning_rate": 1.0214746179186078e-05, + "loss": 0.927, "step": 17940 }, { - "epoch": 0.5091089670828604, + "epoch": 0.5084020516308198, "grad_norm": 0.0, - "learning_rate": 1.0191156594618348e-05, - "loss": 0.8425, + "learning_rate": 1.0213828600665961e-05, + "loss": 0.9993, "step": 17941 }, { - "epoch": 0.5091373439273553, + "epoch": 0.5084303890730822, "grad_norm": 0.0, - "learning_rate": 1.0190237683678614e-05, - "loss": 1.0131, + "learning_rate": 1.021291102034469e-05, + "loss": 0.9678, "step": 17942 }, { - "epoch": 0.5091657207718502, + "epoch": 0.5084587265153447, "grad_norm": 0.0, - "learning_rate": 1.0189318771131938e-05, - "loss": 0.8899, + "learning_rate": 1.0211993438229985e-05, + "loss": 0.8888, "step": 17943 }, { - "epoch": 0.509194097616345, + "epoch": 0.5084870639576072, "grad_norm": 0.0, - "learning_rate": 1.0188399856986077e-05, - "loss": 0.9414, + "learning_rate": 1.0211075854329583e-05, + "loss": 0.7555, "step": 17944 }, { - "epoch": 0.5092224744608399, + "epoch": 0.5085154013998696, "grad_norm": 0.0, - "learning_rate": 1.018748094124879e-05, - "loss": 0.7706, + "learning_rate": 1.0210158268651212e-05, + "loss": 0.8597, "step": 17945 }, { - "epoch": 0.5092508513053349, + "epoch": 0.5085437388421321, "grad_norm": 0.0, - "learning_rate": 1.0186562023927847e-05, - "loss": 0.9085, + "learning_rate": 1.0209240681202602e-05, + "loss": 0.847, "step": 17946 }, { - "epoch": 0.5092792281498297, + "epoch": 0.5085720762843946, "grad_norm": 0.0, - "learning_rate": 1.0185643105031005e-05, - "loss": 0.9022, + "learning_rate": 1.0208323091991476e-05, + "loss": 0.8726, "step": 17947 }, { - "epoch": 0.5093076049943246, + "epoch": 0.508600413726657, "grad_norm": 0.0, - "learning_rate": 1.0184724184566028e-05, - "loss": 0.8247, + "learning_rate": 1.0207405501025567e-05, + "loss": 0.9504, "step": 17948 }, { - "epoch": 0.5093359818388196, + "epoch": 0.5086287511689195, "grad_norm": 0.0, - "learning_rate": 1.0183805262540673e-05, - "loss": 0.7897, + "learning_rate": 1.0206487908312607e-05, + "loss": 0.8227, "step": 17949 }, { - "epoch": 0.5093643586833144, + "epoch": 0.508657088611182, "grad_norm": 0.0, - "learning_rate": 1.0182886338962711e-05, - "loss": 0.8762, + "learning_rate": 1.020557031386032e-05, + "loss": 0.9017, "step": 17950 }, { - "epoch": 0.5093927355278093, + "epoch": 0.5086854260534445, "grad_norm": 0.0, - "learning_rate": 1.0181967413839901e-05, - "loss": 0.8561, + "learning_rate": 1.020465271767644e-05, + "loss": 0.8371, "step": 17951 }, { - "epoch": 0.5094211123723043, + "epoch": 0.5087137634957068, "grad_norm": 0.0, - "learning_rate": 1.0181048487180003e-05, - "loss": 0.9678, + "learning_rate": 1.0203735119768696e-05, + "loss": 1.0167, "step": 17952 }, { - "epoch": 0.5094494892167991, + "epoch": 0.5087421009379693, "grad_norm": 0.0, - "learning_rate": 1.018012955899078e-05, - "loss": 0.9381, + "learning_rate": 1.0202817520144811e-05, + "loss": 0.864, "step": 17953 }, { - "epoch": 0.509477866061294, + "epoch": 0.5087704383802318, "grad_norm": 0.0, - "learning_rate": 1.0179210629279992e-05, - "loss": 0.9083, + "learning_rate": 1.0201899918812522e-05, + "loss": 0.9359, "step": 17954 }, { - "epoch": 0.5095062429057888, + "epoch": 0.5087987758224942, "grad_norm": 0.0, - "learning_rate": 1.0178291698055408e-05, - "loss": 0.9966, + "learning_rate": 1.0200982315779555e-05, + "loss": 0.9448, "step": 17955 }, { - "epoch": 0.5095346197502838, + "epoch": 0.5088271132647567, "grad_norm": 0.0, - "learning_rate": 1.0177372765324782e-05, - "loss": 0.9201, + "learning_rate": 1.0200064711053636e-05, + "loss": 0.8578, "step": 17956 }, { - "epoch": 0.5095629965947787, + "epoch": 0.5088554507070192, "grad_norm": 0.0, - "learning_rate": 1.0176453831095883e-05, - "loss": 0.7977, + "learning_rate": 1.01991471046425e-05, + "loss": 0.9001, "step": 17957 }, { - "epoch": 0.5095913734392735, + "epoch": 0.5088837881492816, "grad_norm": 0.0, - "learning_rate": 1.0175534895376471e-05, - "loss": 0.9291, + "learning_rate": 1.0198229496553873e-05, + "loss": 0.7878, "step": 17958 }, { - "epoch": 0.5096197502837685, + "epoch": 0.5089121255915441, "grad_norm": 0.0, - "learning_rate": 1.0174615958174305e-05, - "loss": 0.8663, + "learning_rate": 1.0197311886795487e-05, + "loss": 0.8696, "step": 17959 }, { - "epoch": 0.5096481271282634, + "epoch": 0.5089404630338066, "grad_norm": 0.0, - "learning_rate": 1.0173697019497152e-05, - "loss": 0.8383, + "learning_rate": 1.019639427537507e-05, + "loss": 0.839, "step": 17960 }, { - "epoch": 0.5096765039727582, + "epoch": 0.5089688004760691, "grad_norm": 0.0, - "learning_rate": 1.0172778079352774e-05, - "loss": 0.8988, + "learning_rate": 1.0195476662300347e-05, + "loss": 0.7472, "step": 17961 }, { - "epoch": 0.5097048808172531, + "epoch": 0.5089971379183315, "grad_norm": 0.0, - "learning_rate": 1.0171859137748927e-05, - "loss": 0.9945, + "learning_rate": 1.0194559047579057e-05, + "loss": 0.8123, "step": 17962 }, { - "epoch": 0.509733257661748, + "epoch": 0.509025475360594, "grad_norm": 0.0, - "learning_rate": 1.0170940194693381e-05, - "loss": 0.8102, + "learning_rate": 1.019364143121892e-05, + "loss": 0.8587, "step": 17963 }, { - "epoch": 0.5097616345062429, + "epoch": 0.5090538128028564, "grad_norm": 0.0, - "learning_rate": 1.0170021250193898e-05, - "loss": 0.9615, + "learning_rate": 1.0192723813227672e-05, + "loss": 0.8369, "step": 17964 }, { - "epoch": 0.5097900113507378, + "epoch": 0.5090821502451188, "grad_norm": 0.0, - "learning_rate": 1.0169102304258234e-05, - "loss": 0.954, + "learning_rate": 1.0191806193613037e-05, + "loss": 0.879, "step": 17965 }, { - "epoch": 0.5098183881952327, + "epoch": 0.5091104876873813, "grad_norm": 0.0, - "learning_rate": 1.0168183356894156e-05, - "loss": 0.9538, + "learning_rate": 1.019088857238275e-05, + "loss": 0.8386, "step": 17966 }, { - "epoch": 0.5098467650397276, + "epoch": 0.5091388251296438, "grad_norm": 0.0, - "learning_rate": 1.016726440810943e-05, - "loss": 0.8732, + "learning_rate": 1.0189970949544536e-05, + "loss": 0.8991, "step": 17967 }, { - "epoch": 0.5098751418842224, + "epoch": 0.5091671625719063, "grad_norm": 0.0, - "learning_rate": 1.0166345457911811e-05, - "loss": 0.7964, + "learning_rate": 1.0189053325106126e-05, + "loss": 0.9056, "step": 17968 }, { - "epoch": 0.5099035187287174, + "epoch": 0.5091955000141687, "grad_norm": 0.0, - "learning_rate": 1.0165426506309059e-05, - "loss": 0.9208, + "learning_rate": 1.018813569907525e-05, + "loss": 0.9281, "step": 17969 }, { - "epoch": 0.5099318955732123, + "epoch": 0.5092238374564312, "grad_norm": 0.0, - "learning_rate": 1.0164507553308948e-05, - "loss": 0.8529, + "learning_rate": 1.0187218071459635e-05, + "loss": 0.8496, "step": 17970 }, { - "epoch": 0.5099602724177071, + "epoch": 0.5092521748986937, "grad_norm": 0.0, - "learning_rate": 1.0163588598919234e-05, - "loss": 0.8747, + "learning_rate": 1.0186300442267016e-05, + "loss": 0.9219, "step": 17971 }, { - "epoch": 0.509988649262202, + "epoch": 0.5092805123409561, "grad_norm": 0.0, - "learning_rate": 1.0162669643147679e-05, - "loss": 0.9502, + "learning_rate": 1.018538281150512e-05, + "loss": 0.8541, "step": 17972 }, { - "epoch": 0.510017026106697, + "epoch": 0.5093088497832186, "grad_norm": 0.0, - "learning_rate": 1.0161750686002045e-05, - "loss": 0.8815, + "learning_rate": 1.0184465179181671e-05, + "loss": 0.9536, "step": 17973 }, { - "epoch": 0.5100454029511918, + "epoch": 0.509337187225481, "grad_norm": 0.0, - "learning_rate": 1.0160831727490097e-05, - "loss": 0.942, + "learning_rate": 1.0183547545304406e-05, + "loss": 0.7999, "step": 17974 }, { - "epoch": 0.5100737797956867, + "epoch": 0.5093655246677435, "grad_norm": 0.0, - "learning_rate": 1.0159912767619595e-05, - "loss": 0.9092, + "learning_rate": 1.0182629909881055e-05, + "loss": 0.8251, "step": 17975 }, { - "epoch": 0.5101021566401817, + "epoch": 0.5093938621100059, "grad_norm": 0.0, - "learning_rate": 1.0158993806398305e-05, - "loss": 0.9276, + "learning_rate": 1.0181712272919339e-05, + "loss": 0.8568, "step": 17976 }, { - "epoch": 0.5101305334846765, + "epoch": 0.5094221995522684, "grad_norm": 0.0, - "learning_rate": 1.0158074843833984e-05, - "loss": 0.9245, + "learning_rate": 1.0180794634426996e-05, + "loss": 0.8191, "step": 17977 }, { - "epoch": 0.5101589103291714, + "epoch": 0.5094505369945309, "grad_norm": 0.0, - "learning_rate": 1.0157155879934399e-05, - "loss": 0.8678, + "learning_rate": 1.017987699441175e-05, + "loss": 1.0005, "step": 17978 }, { - "epoch": 0.5101872871736662, + "epoch": 0.5094788744367933, "grad_norm": 0.0, - "learning_rate": 1.0156236914707312e-05, - "loss": 0.9382, + "learning_rate": 1.0178959352881337e-05, + "loss": 0.8946, "step": 17979 }, { - "epoch": 0.5102156640181612, + "epoch": 0.5095072118790558, "grad_norm": 0.0, - "learning_rate": 1.0155317948160486e-05, - "loss": 0.8238, + "learning_rate": 1.0178041709843483e-05, + "loss": 0.878, "step": 17980 }, { - "epoch": 0.5102440408626561, + "epoch": 0.5095355493213183, "grad_norm": 0.0, - "learning_rate": 1.015439898030168e-05, - "loss": 0.8763, + "learning_rate": 1.0177124065305917e-05, + "loss": 0.9453, "step": 17981 }, { - "epoch": 0.5102724177071509, + "epoch": 0.5095638867635807, "grad_norm": 0.0, - "learning_rate": 1.0153480011138658e-05, - "loss": 0.8159, + "learning_rate": 1.0176206419276366e-05, + "loss": 0.9418, "step": 17982 }, { - "epoch": 0.5103007945516459, + "epoch": 0.5095922242058432, "grad_norm": 0.0, - "learning_rate": 1.0152561040679188e-05, - "loss": 0.8832, + "learning_rate": 1.0175288771762563e-05, + "loss": 0.7868, "step": 17983 }, { - "epoch": 0.5103291713961408, + "epoch": 0.5096205616481057, "grad_norm": 0.0, - "learning_rate": 1.0151642068931024e-05, - "loss": 1.016, + "learning_rate": 1.0174371122772241e-05, + "loss": 0.8374, "step": 17984 }, { - "epoch": 0.5103575482406356, + "epoch": 0.5096488990903681, "grad_norm": 0.0, - "learning_rate": 1.0150723095901933e-05, - "loss": 0.9758, + "learning_rate": 1.0173453472313127e-05, + "loss": 0.8993, "step": 17985 }, { - "epoch": 0.5103859250851306, + "epoch": 0.5096772365326305, "grad_norm": 0.0, - "learning_rate": 1.0149804121599675e-05, - "loss": 0.7864, + "learning_rate": 1.0172535820392947e-05, + "loss": 0.9237, "step": 17986 }, { - "epoch": 0.5104143019296254, + "epoch": 0.509705573974893, "grad_norm": 0.0, - "learning_rate": 1.014888514603202e-05, - "loss": 0.9296, + "learning_rate": 1.0171618167019434e-05, + "loss": 0.8881, "step": 17987 }, { - "epoch": 0.5104426787741203, + "epoch": 0.5097339114171555, "grad_norm": 0.0, - "learning_rate": 1.0147966169206724e-05, - "loss": 0.8212, + "learning_rate": 1.0170700512200317e-05, + "loss": 0.8301, "step": 17988 }, { - "epoch": 0.5104710556186152, + "epoch": 0.5097622488594179, "grad_norm": 0.0, - "learning_rate": 1.014704719113155e-05, - "loss": 0.8202, + "learning_rate": 1.0169782855943327e-05, + "loss": 0.8948, "step": 17989 }, { - "epoch": 0.5104994324631101, + "epoch": 0.5097905863016804, "grad_norm": 0.0, - "learning_rate": 1.014612821181426e-05, - "loss": 0.7876, + "learning_rate": 1.0168865198256192e-05, + "loss": 0.88, "step": 17990 }, { - "epoch": 0.510527809307605, + "epoch": 0.5098189237439429, "grad_norm": 0.0, - "learning_rate": 1.0145209231262623e-05, - "loss": 0.9656, + "learning_rate": 1.0167947539146645e-05, + "loss": 0.9597, "step": 17991 }, { - "epoch": 0.5105561861520999, + "epoch": 0.5098472611862054, "grad_norm": 0.0, - "learning_rate": 1.0144290249484395e-05, - "loss": 0.8493, + "learning_rate": 1.0167029878622415e-05, + "loss": 0.8583, "step": 17992 }, { - "epoch": 0.5105845629965948, + "epoch": 0.5098755986284678, "grad_norm": 0.0, - "learning_rate": 1.0143371266487339e-05, - "loss": 0.8776, + "learning_rate": 1.0166112216691227e-05, + "loss": 0.9014, "step": 17993 }, { - "epoch": 0.5106129398410897, + "epoch": 0.5099039360707303, "grad_norm": 0.0, - "learning_rate": 1.014245228227922e-05, - "loss": 0.7842, + "learning_rate": 1.0165194553360813e-05, + "loss": 0.9488, "step": 17994 }, { - "epoch": 0.5106413166855845, + "epoch": 0.5099322735129928, "grad_norm": 0.0, - "learning_rate": 1.0141533296867802e-05, - "loss": 0.8016, + "learning_rate": 1.0164276888638907e-05, + "loss": 0.9004, "step": 17995 }, { - "epoch": 0.5106696935300794, + "epoch": 0.5099606109552551, "grad_norm": 0.0, - "learning_rate": 1.0140614310260844e-05, - "loss": 0.8559, + "learning_rate": 1.0163359222533234e-05, + "loss": 0.8749, "step": 17996 }, { - "epoch": 0.5106980703745744, + "epoch": 0.5099889483975176, "grad_norm": 0.0, - "learning_rate": 1.0139695322466112e-05, - "loss": 0.9132, + "learning_rate": 1.0162441555051525e-05, + "loss": 0.8239, "step": 17997 }, { - "epoch": 0.5107264472190692, + "epoch": 0.5100172858397801, "grad_norm": 0.0, - "learning_rate": 1.0138776333491369e-05, - "loss": 1.0073, + "learning_rate": 1.0161523886201511e-05, + "loss": 0.8788, "step": 17998 }, { - "epoch": 0.5107548240635641, + "epoch": 0.5100456232820426, "grad_norm": 0.0, - "learning_rate": 1.0137857343344372e-05, - "loss": 0.8256, + "learning_rate": 1.0160606215990922e-05, + "loss": 0.8703, "step": 17999 }, { - "epoch": 0.5107832009080591, + "epoch": 0.510073960724305, "grad_norm": 0.0, - "learning_rate": 1.0136938352032888e-05, - "loss": 0.7901, + "learning_rate": 1.0159688544427488e-05, + "loss": 1.0295, "step": 18000 }, { - "epoch": 0.5108115777525539, + "epoch": 0.5101022981665675, "grad_norm": 0.0, - "learning_rate": 1.0136019359564681e-05, - "loss": 1.0144, + "learning_rate": 1.015877087151894e-05, + "loss": 0.8595, "step": 18001 }, { - "epoch": 0.5108399545970488, + "epoch": 0.51013063560883, "grad_norm": 0.0, - "learning_rate": 1.0135100365947514e-05, - "loss": 0.9183, + "learning_rate": 1.0157853197273e-05, + "loss": 0.9788, "step": 18002 }, { - "epoch": 0.5108683314415438, + "epoch": 0.5101589730510924, "grad_norm": 0.0, - "learning_rate": 1.0134181371189146e-05, - "loss": 0.8738, + "learning_rate": 1.0156935521697406e-05, + "loss": 0.9384, "step": 18003 }, { - "epoch": 0.5108967082860386, + "epoch": 0.5101873104933549, "grad_norm": 0.0, - "learning_rate": 1.0133262375297344e-05, - "loss": 0.8289, + "learning_rate": 1.0156017844799888e-05, + "loss": 0.8642, "step": 18004 }, { - "epoch": 0.5109250851305335, + "epoch": 0.5102156479356174, "grad_norm": 0.0, - "learning_rate": 1.0132343378279868e-05, - "loss": 0.7979, + "learning_rate": 1.0155100166588174e-05, + "loss": 0.9345, "step": 18005 }, { - "epoch": 0.5109534619750283, + "epoch": 0.5102439853778797, "grad_norm": 0.0, - "learning_rate": 1.0131424380144482e-05, - "loss": 0.8083, + "learning_rate": 1.0154182487069992e-05, + "loss": 0.9883, "step": 18006 }, { - "epoch": 0.5109818388195233, + "epoch": 0.5102723228201422, "grad_norm": 0.0, - "learning_rate": 1.0130505380898947e-05, - "loss": 0.863, + "learning_rate": 1.0153264806253074e-05, + "loss": 0.9024, "step": 18007 }, { - "epoch": 0.5110102156640182, + "epoch": 0.5103006602624047, "grad_norm": 0.0, - "learning_rate": 1.0129586380551026e-05, - "loss": 0.9553, + "learning_rate": 1.0152347124145148e-05, + "loss": 0.8722, "step": 18008 }, { - "epoch": 0.511038592508513, + "epoch": 0.5103289977046672, "grad_norm": 0.0, - "learning_rate": 1.0128667379108486e-05, - "loss": 0.8688, + "learning_rate": 1.0151429440753948e-05, + "loss": 0.7685, "step": 18009 }, { - "epoch": 0.511066969353008, + "epoch": 0.5103573351469296, "grad_norm": 0.0, - "learning_rate": 1.0127748376579082e-05, - "loss": 0.7527, + "learning_rate": 1.0150511756087203e-05, + "loss": 0.8512, "step": 18010 }, { - "epoch": 0.5110953461975029, + "epoch": 0.5103856725891921, "grad_norm": 0.0, - "learning_rate": 1.0126829372970588e-05, - "loss": 0.9532, + "learning_rate": 1.0149594070152638e-05, + "loss": 0.8309, "step": 18011 }, { - "epoch": 0.5111237230419977, + "epoch": 0.5104140100314546, "grad_norm": 0.0, - "learning_rate": 1.0125910368290757e-05, - "loss": 0.8458, + "learning_rate": 1.0148676382957987e-05, + "loss": 0.9488, "step": 18012 }, { - "epoch": 0.5111520998864926, + "epoch": 0.510442347473717, "grad_norm": 0.0, - "learning_rate": 1.0124991362547356e-05, - "loss": 0.8569, + "learning_rate": 1.0147758694510984e-05, + "loss": 0.8073, "step": 18013 }, { - "epoch": 0.5111804767309875, + "epoch": 0.5104706849159795, "grad_norm": 0.0, - "learning_rate": 1.012407235574815e-05, - "loss": 0.8107, + "learning_rate": 1.0146841004819348e-05, + "loss": 0.8964, "step": 18014 }, { - "epoch": 0.5112088535754824, + "epoch": 0.510499022358242, "grad_norm": 0.0, - "learning_rate": 1.0123153347900893e-05, - "loss": 0.9334, + "learning_rate": 1.0145923313890817e-05, + "loss": 0.865, "step": 18015 }, { - "epoch": 0.5112372304199773, + "epoch": 0.5105273598005045, "grad_norm": 0.0, - "learning_rate": 1.0122234339013357e-05, - "loss": 0.8643, + "learning_rate": 1.0145005621733124e-05, + "loss": 0.864, "step": 18016 }, { - "epoch": 0.5112656072644722, + "epoch": 0.5105556972427668, "grad_norm": 0.0, - "learning_rate": 1.0121315329093305e-05, - "loss": 0.8469, + "learning_rate": 1.0144087928353991e-05, + "loss": 0.8887, "step": 18017 }, { - "epoch": 0.5112939841089671, + "epoch": 0.5105840346850293, "grad_norm": 0.0, - "learning_rate": 1.0120396318148495e-05, - "loss": 0.8539, + "learning_rate": 1.0143170233761157e-05, + "loss": 0.8965, "step": 18018 }, { - "epoch": 0.511322360953462, + "epoch": 0.5106123721272918, "grad_norm": 0.0, - "learning_rate": 1.0119477306186688e-05, - "loss": 0.9925, + "learning_rate": 1.014225253796234e-05, + "loss": 0.8498, "step": 18019 }, { - "epoch": 0.5113507377979569, + "epoch": 0.5106407095695542, "grad_norm": 0.0, - "learning_rate": 1.0118558293215657e-05, - "loss": 0.9393, + "learning_rate": 1.0141334840965283e-05, + "loss": 0.8577, "step": 18020 }, { - "epoch": 0.5113791146424518, + "epoch": 0.5106690470118167, "grad_norm": 0.0, - "learning_rate": 1.0117639279243159e-05, - "loss": 0.892, + "learning_rate": 1.0140417142777705e-05, + "loss": 0.8054, "step": 18021 }, { - "epoch": 0.5114074914869466, + "epoch": 0.5106973844540792, "grad_norm": 0.0, - "learning_rate": 1.0116720264276953e-05, - "loss": 0.8087, + "learning_rate": 1.0139499443407346e-05, + "loss": 0.8221, "step": 18022 }, { - "epoch": 0.5114358683314415, + "epoch": 0.5107257218963417, "grad_norm": 0.0, - "learning_rate": 1.0115801248324803e-05, - "loss": 0.8523, + "learning_rate": 1.0138581742861926e-05, + "loss": 0.7501, "step": 18023 }, { - "epoch": 0.5114642451759365, + "epoch": 0.5107540593386041, "grad_norm": 0.0, - "learning_rate": 1.0114882231394481e-05, - "loss": 0.9163, + "learning_rate": 1.0137664041149187e-05, + "loss": 0.8893, "step": 18024 }, { - "epoch": 0.5114926220204313, + "epoch": 0.5107823967808666, "grad_norm": 0.0, - "learning_rate": 1.011396321349374e-05, - "loss": 0.9652, + "learning_rate": 1.0136746338276848e-05, + "loss": 0.7771, "step": 18025 }, { - "epoch": 0.5115209988649262, + "epoch": 0.5108107342231291, "grad_norm": 0.0, - "learning_rate": 1.0113044194630348e-05, - "loss": 0.9004, + "learning_rate": 1.0135828634252647e-05, + "loss": 0.813, "step": 18026 }, { - "epoch": 0.5115493757094212, + "epoch": 0.5108390716653914, "grad_norm": 0.0, - "learning_rate": 1.0112125174812067e-05, - "loss": 0.8723, + "learning_rate": 1.0134910929084308e-05, + "loss": 0.832, "step": 18027 }, { - "epoch": 0.511577752553916, + "epoch": 0.5108674091076539, "grad_norm": 0.0, - "learning_rate": 1.011120615404666e-05, - "loss": 0.8568, + "learning_rate": 1.0133993222779563e-05, + "loss": 0.8201, "step": 18028 }, { - "epoch": 0.5116061293984109, + "epoch": 0.5108957465499164, "grad_norm": 0.0, - "learning_rate": 1.0110287132341888e-05, - "loss": 0.8118, + "learning_rate": 1.0133075515346147e-05, + "loss": 0.9072, "step": 18029 }, { - "epoch": 0.5116345062429057, + "epoch": 0.5109240839921788, "grad_norm": 0.0, - "learning_rate": 1.0109368109705514e-05, - "loss": 0.9505, + "learning_rate": 1.0132157806791788e-05, + "loss": 0.9546, "step": 18030 }, { - "epoch": 0.5116628830874007, + "epoch": 0.5109524214344413, "grad_norm": 0.0, - "learning_rate": 1.0108449086145304e-05, - "loss": 0.9075, + "learning_rate": 1.0131240097124208e-05, + "loss": 0.873, "step": 18031 }, { - "epoch": 0.5116912599318956, + "epoch": 0.5109807588767038, "grad_norm": 0.0, - "learning_rate": 1.0107530061669021e-05, - "loss": 0.9565, + "learning_rate": 1.013032238635115e-05, + "loss": 0.8575, "step": 18032 }, { - "epoch": 0.5117196367763904, + "epoch": 0.5110090963189663, "grad_norm": 0.0, - "learning_rate": 1.0106611036284426e-05, - "loss": 0.8434, + "learning_rate": 1.0129404674480336e-05, + "loss": 0.8208, "step": 18033 }, { - "epoch": 0.5117480136208854, + "epoch": 0.5110374337612287, "grad_norm": 0.0, - "learning_rate": 1.0105692009999284e-05, - "loss": 0.8486, + "learning_rate": 1.0128486961519497e-05, + "loss": 0.8733, "step": 18034 }, { - "epoch": 0.5117763904653803, + "epoch": 0.5110657712034912, "grad_norm": 0.0, - "learning_rate": 1.0104772982821357e-05, - "loss": 0.8669, + "learning_rate": 1.0127569247476367e-05, + "loss": 0.854, "step": 18035 }, { - "epoch": 0.5118047673098751, + "epoch": 0.5110941086457537, "grad_norm": 0.0, - "learning_rate": 1.0103853954758404e-05, - "loss": 0.8067, + "learning_rate": 1.0126651532358671e-05, + "loss": 0.901, "step": 18036 }, { - "epoch": 0.51183314415437, + "epoch": 0.511122446088016, "grad_norm": 0.0, - "learning_rate": 1.0102934925818198e-05, - "loss": 0.924, + "learning_rate": 1.0125733816174145e-05, + "loss": 0.9855, "step": 18037 }, { - "epoch": 0.511861520998865, + "epoch": 0.5111507835302785, "grad_norm": 0.0, - "learning_rate": 1.010201589600849e-05, - "loss": 0.8915, + "learning_rate": 1.0124816098930516e-05, + "loss": 0.9095, "step": 18038 }, { - "epoch": 0.5118898978433598, + "epoch": 0.511179120972541, "grad_norm": 0.0, - "learning_rate": 1.0101096865337053e-05, - "loss": 0.8747, + "learning_rate": 1.0123898380635515e-05, + "loss": 0.8727, "step": 18039 }, { - "epoch": 0.5119182746878547, + "epoch": 0.5112074584148035, "grad_norm": 0.0, - "learning_rate": 1.0100177833811645e-05, - "loss": 1.0014, + "learning_rate": 1.012298066129687e-05, + "loss": 0.9129, "step": 18040 }, { - "epoch": 0.5119466515323496, + "epoch": 0.5112357958570659, "grad_norm": 0.0, - "learning_rate": 1.009925880144003e-05, - "loss": 0.8512, + "learning_rate": 1.0122062940922313e-05, + "loss": 0.9153, "step": 18041 }, { - "epoch": 0.5119750283768445, + "epoch": 0.5112641332993284, "grad_norm": 0.0, - "learning_rate": 1.0098339768229972e-05, - "loss": 0.9102, + "learning_rate": 1.0121145219519574e-05, + "loss": 0.8696, "step": 18042 }, { - "epoch": 0.5120034052213394, + "epoch": 0.5112924707415909, "grad_norm": 0.0, - "learning_rate": 1.0097420734189233e-05, - "loss": 0.862, + "learning_rate": 1.0120227497096388e-05, + "loss": 0.9143, "step": 18043 }, { - "epoch": 0.5120317820658343, + "epoch": 0.5113208081838533, "grad_norm": 0.0, - "learning_rate": 1.0096501699325578e-05, - "loss": 0.8738, + "learning_rate": 1.0119309773660478e-05, + "loss": 0.8661, "step": 18044 }, { - "epoch": 0.5120601589103292, + "epoch": 0.5113491456261158, "grad_norm": 0.0, - "learning_rate": 1.009558266364677e-05, - "loss": 0.9199, + "learning_rate": 1.0118392049219578e-05, + "loss": 0.889, "step": 18045 }, { - "epoch": 0.512088535754824, + "epoch": 0.5113774830683783, "grad_norm": 0.0, - "learning_rate": 1.0094663627160566e-05, - "loss": 0.7993, + "learning_rate": 1.011747432378142e-05, + "loss": 0.7472, "step": 18046 }, { - "epoch": 0.5121169125993189, + "epoch": 0.5114058205106408, "grad_norm": 0.0, - "learning_rate": 1.0093744589874737e-05, - "loss": 0.9631, + "learning_rate": 1.0116556597353728e-05, + "loss": 0.7935, "step": 18047 }, { - "epoch": 0.5121452894438139, + "epoch": 0.5114341579529031, "grad_norm": 0.0, - "learning_rate": 1.0092825551797043e-05, - "loss": 0.9889, + "learning_rate": 1.0115638869944237e-05, + "loss": 0.9749, "step": 18048 }, { - "epoch": 0.5121736662883087, + "epoch": 0.5114624953951656, "grad_norm": 0.0, - "learning_rate": 1.009190651293525e-05, - "loss": 0.8733, + "learning_rate": 1.011472114156068e-05, + "loss": 0.9299, "step": 18049 }, { - "epoch": 0.5122020431328036, + "epoch": 0.5114908328374281, "grad_norm": 0.0, - "learning_rate": 1.0090987473297113e-05, - "loss": 0.8918, + "learning_rate": 1.0113803412210784e-05, + "loss": 0.9904, "step": 18050 }, { - "epoch": 0.5122304199772986, + "epoch": 0.5115191702796905, "grad_norm": 0.0, - "learning_rate": 1.0090068432890405e-05, - "loss": 0.8711, + "learning_rate": 1.0112885681902278e-05, + "loss": 0.8586, "step": 18051 }, { - "epoch": 0.5122587968217934, + "epoch": 0.511547507721953, "grad_norm": 0.0, - "learning_rate": 1.0089149391722886e-05, - "loss": 0.8819, + "learning_rate": 1.0111967950642892e-05, + "loss": 0.9023, "step": 18052 }, { - "epoch": 0.5122871736662883, + "epoch": 0.5115758451642155, "grad_norm": 0.0, - "learning_rate": 1.0088230349802315e-05, - "loss": 0.9167, + "learning_rate": 1.0111050218440362e-05, + "loss": 0.847, "step": 18053 }, { - "epoch": 0.5123155505107831, + "epoch": 0.5116041826064779, "grad_norm": 0.0, - "learning_rate": 1.0087311307136457e-05, - "loss": 0.9593, + "learning_rate": 1.011013248530241e-05, + "loss": 0.8313, "step": 18054 }, { - "epoch": 0.5123439273552781, + "epoch": 0.5116325200487404, "grad_norm": 0.0, - "learning_rate": 1.008639226373308e-05, - "loss": 0.867, + "learning_rate": 1.0109214751236778e-05, + "loss": 0.8872, "step": 18055 }, { - "epoch": 0.512372304199773, + "epoch": 0.5116608574910029, "grad_norm": 0.0, - "learning_rate": 1.0085473219599942e-05, - "loss": 0.8568, + "learning_rate": 1.0108297016251182e-05, + "loss": 0.8273, "step": 18056 }, { - "epoch": 0.5124006810442678, + "epoch": 0.5116891949332654, "grad_norm": 0.0, - "learning_rate": 1.0084554174744807e-05, - "loss": 0.8483, + "learning_rate": 1.0107379280353367e-05, + "loss": 0.9539, "step": 18057 }, { - "epoch": 0.5124290578887628, + "epoch": 0.5117175323755278, "grad_norm": 0.0, - "learning_rate": 1.0083635129175442e-05, - "loss": 0.8723, + "learning_rate": 1.0106461543551053e-05, + "loss": 0.8697, "step": 18058 }, { - "epoch": 0.5124574347332577, + "epoch": 0.5117458698177902, "grad_norm": 0.0, - "learning_rate": 1.0082716082899605e-05, - "loss": 0.808, + "learning_rate": 1.0105543805851977e-05, + "loss": 0.9539, "step": 18059 }, { - "epoch": 0.5124858115777525, + "epoch": 0.5117742072600527, "grad_norm": 0.0, - "learning_rate": 1.0081797035925063e-05, - "loss": 0.864, + "learning_rate": 1.0104626067263861e-05, + "loss": 0.8414, "step": 18060 }, { - "epoch": 0.5125141884222475, + "epoch": 0.5118025447023151, "grad_norm": 0.0, - "learning_rate": 1.0080877988259575e-05, - "loss": 0.8051, + "learning_rate": 1.0103708327794444e-05, + "loss": 0.9496, "step": 18061 }, { - "epoch": 0.5125425652667424, + "epoch": 0.5118308821445776, "grad_norm": 0.0, - "learning_rate": 1.007995893991091e-05, - "loss": 0.9171, + "learning_rate": 1.0102790587451452e-05, + "loss": 0.8845, "step": 18062 }, { - "epoch": 0.5125709421112372, + "epoch": 0.5118592195868401, "grad_norm": 0.0, - "learning_rate": 1.0079039890886826e-05, - "loss": 0.911, + "learning_rate": 1.010187284624262e-05, + "loss": 0.9211, "step": 18063 }, { - "epoch": 0.5125993189557321, + "epoch": 0.5118875570291026, "grad_norm": 0.0, - "learning_rate": 1.0078120841195088e-05, - "loss": 0.9938, + "learning_rate": 1.0100955104175671e-05, + "loss": 0.87, "step": 18064 }, { - "epoch": 0.512627695800227, + "epoch": 0.511915894471365, "grad_norm": 0.0, - "learning_rate": 1.0077201790843463e-05, - "loss": 0.9756, + "learning_rate": 1.0100037361258342e-05, + "loss": 0.8745, "step": 18065 }, { - "epoch": 0.5126560726447219, + "epoch": 0.5119442319136275, "grad_norm": 0.0, - "learning_rate": 1.0076282739839711e-05, - "loss": 0.796, + "learning_rate": 1.009911961749836e-05, + "loss": 0.9782, "step": 18066 }, { - "epoch": 0.5126844494892168, + "epoch": 0.51197256935589, "grad_norm": 0.0, - "learning_rate": 1.0075363688191593e-05, - "loss": 0.8727, + "learning_rate": 1.0098201872903457e-05, + "loss": 0.9751, "step": 18067 }, { - "epoch": 0.5127128263337117, + "epoch": 0.5120009067981524, "grad_norm": 0.0, - "learning_rate": 1.0074444635906875e-05, - "loss": 0.8, + "learning_rate": 1.0097284127481364e-05, + "loss": 0.8565, "step": 18068 }, { - "epoch": 0.5127412031782066, + "epoch": 0.5120292442404148, "grad_norm": 0.0, - "learning_rate": 1.0073525582993317e-05, - "loss": 0.8166, + "learning_rate": 1.0096366381239808e-05, + "loss": 0.9119, "step": 18069 }, { - "epoch": 0.5127695800227015, + "epoch": 0.5120575816826773, "grad_norm": 0.0, - "learning_rate": 1.0072606529458688e-05, - "loss": 0.8911, + "learning_rate": 1.0095448634186527e-05, + "loss": 0.9106, "step": 18070 }, { - "epoch": 0.5127979568671963, + "epoch": 0.5120859191249398, "grad_norm": 0.0, - "learning_rate": 1.007168747531075e-05, - "loss": 0.9487, + "learning_rate": 1.0094530886329244e-05, + "loss": 0.8644, "step": 18071 }, { - "epoch": 0.5128263337116913, + "epoch": 0.5121142565672022, "grad_norm": 0.0, - "learning_rate": 1.0070768420557263e-05, - "loss": 0.876, + "learning_rate": 1.0093613137675693e-05, + "loss": 0.9119, "step": 18072 }, { - "epoch": 0.5128547105561861, + "epoch": 0.5121425940094647, "grad_norm": 0.0, - "learning_rate": 1.006984936520599e-05, - "loss": 0.8808, + "learning_rate": 1.0092695388233601e-05, + "loss": 0.905, "step": 18073 }, { - "epoch": 0.512883087400681, + "epoch": 0.5121709314517272, "grad_norm": 0.0, - "learning_rate": 1.00689303092647e-05, - "loss": 0.8869, + "learning_rate": 1.0091777638010702e-05, + "loss": 0.7699, "step": 18074 }, { - "epoch": 0.512911464245176, + "epoch": 0.5121992688939896, "grad_norm": 0.0, - "learning_rate": 1.0068011252741153e-05, - "loss": 0.8585, + "learning_rate": 1.0090859887014728e-05, + "loss": 0.8577, "step": 18075 }, { - "epoch": 0.5129398410896708, + "epoch": 0.5122276063362521, "grad_norm": 0.0, - "learning_rate": 1.0067092195643108e-05, - "loss": 0.838, + "learning_rate": 1.0089942135253407e-05, + "loss": 0.9736, "step": 18076 }, { - "epoch": 0.5129682179341657, + "epoch": 0.5122559437785146, "grad_norm": 0.0, - "learning_rate": 1.0066173137978333e-05, - "loss": 0.891, + "learning_rate": 1.008902438273447e-05, + "loss": 0.7982, "step": 18077 }, { - "epoch": 0.5129965947786607, + "epoch": 0.512284281220777, "grad_norm": 0.0, - "learning_rate": 1.0065254079754592e-05, - "loss": 0.837, + "learning_rate": 1.0088106629465646e-05, + "loss": 0.9227, "step": 18078 }, { - "epoch": 0.5130249716231555, + "epoch": 0.5123126186630395, "grad_norm": 0.0, - "learning_rate": 1.0064335020979648e-05, - "loss": 0.899, + "learning_rate": 1.008718887545467e-05, + "loss": 0.904, "step": 18079 }, { - "epoch": 0.5130533484676504, + "epoch": 0.512340956105302, "grad_norm": 0.0, - "learning_rate": 1.0063415961661257e-05, - "loss": 0.8649, + "learning_rate": 1.0086271120709265e-05, + "loss": 0.8544, "step": 18080 }, { - "epoch": 0.5130817253121452, + "epoch": 0.5123692935475644, "grad_norm": 0.0, - "learning_rate": 1.0062496901807195e-05, - "loss": 0.8277, + "learning_rate": 1.008535336523717e-05, + "loss": 0.7829, "step": 18081 }, { - "epoch": 0.5131101021566402, + "epoch": 0.5123976309898268, "grad_norm": 0.0, - "learning_rate": 1.0061577841425219e-05, - "loss": 1.0065, + "learning_rate": 1.008443560904611e-05, + "loss": 0.886, "step": 18082 }, { - "epoch": 0.5131384790011351, + "epoch": 0.5124259684320893, "grad_norm": 0.0, - "learning_rate": 1.0060658780523089e-05, - "loss": 0.9437, + "learning_rate": 1.0083517852143821e-05, + "loss": 0.9298, "step": 18083 }, { - "epoch": 0.5131668558456299, + "epoch": 0.5124543058743518, "grad_norm": 0.0, - "learning_rate": 1.005973971910857e-05, - "loss": 0.9004, + "learning_rate": 1.0082600094538029e-05, + "loss": 0.8994, "step": 18084 }, { - "epoch": 0.5131952326901249, + "epoch": 0.5124826433166142, "grad_norm": 0.0, - "learning_rate": 1.0058820657189432e-05, - "loss": 0.9533, + "learning_rate": 1.0081682336236462e-05, + "loss": 0.8997, "step": 18085 }, { - "epoch": 0.5132236095346198, + "epoch": 0.5125109807588767, "grad_norm": 0.0, - "learning_rate": 1.0057901594773432e-05, - "loss": 0.8754, + "learning_rate": 1.0080764577246858e-05, + "loss": 0.8173, "step": 18086 }, { - "epoch": 0.5132519863791146, + "epoch": 0.5125393182011392, "grad_norm": 0.0, - "learning_rate": 1.0056982531868331e-05, - "loss": 0.9239, + "learning_rate": 1.0079846817576942e-05, + "loss": 0.8225, "step": 18087 }, { - "epoch": 0.5132803632236095, + "epoch": 0.5125676556434017, "grad_norm": 0.0, - "learning_rate": 1.0056063468481901e-05, - "loss": 0.9039, + "learning_rate": 1.0078929057234449e-05, + "loss": 0.8066, "step": 18088 }, { - "epoch": 0.5133087400681045, + "epoch": 0.5125959930856641, "grad_norm": 0.0, - "learning_rate": 1.0055144404621898e-05, - "loss": 0.9131, + "learning_rate": 1.0078011296227104e-05, + "loss": 0.8401, "step": 18089 }, { - "epoch": 0.5133371169125993, + "epoch": 0.5126243305279266, "grad_norm": 0.0, - "learning_rate": 1.0054225340296086e-05, - "loss": 0.879, + "learning_rate": 1.0077093534562643e-05, + "loss": 0.7832, "step": 18090 }, { - "epoch": 0.5133654937570942, + "epoch": 0.512652667970189, "grad_norm": 0.0, - "learning_rate": 1.0053306275512234e-05, - "loss": 0.9073, + "learning_rate": 1.0076175772248795e-05, + "loss": 0.9754, "step": 18091 }, { - "epoch": 0.5133938706015891, + "epoch": 0.5126810054124514, "grad_norm": 0.0, - "learning_rate": 1.0052387210278096e-05, - "loss": 0.7966, + "learning_rate": 1.007525800929329e-05, + "loss": 0.8542, "step": 18092 }, { - "epoch": 0.513422247446084, + "epoch": 0.5127093428547139, "grad_norm": 0.0, - "learning_rate": 1.0051468144601446e-05, - "loss": 0.7853, + "learning_rate": 1.0074340245703857e-05, + "loss": 0.8258, "step": 18093 }, { - "epoch": 0.5134506242905789, + "epoch": 0.5127376802969764, "grad_norm": 0.0, - "learning_rate": 1.0050549078490037e-05, - "loss": 0.7211, + "learning_rate": 1.007342248148823e-05, + "loss": 0.817, "step": 18094 }, { - "epoch": 0.5134790011350738, + "epoch": 0.5127660177392389, "grad_norm": 0.0, - "learning_rate": 1.0049630011951643e-05, - "loss": 0.8405, + "learning_rate": 1.0072504716654138e-05, + "loss": 0.9776, "step": 18095 }, { - "epoch": 0.5135073779795687, + "epoch": 0.5127943551815013, "grad_norm": 0.0, - "learning_rate": 1.004871094499402e-05, - "loss": 0.9228, + "learning_rate": 1.0071586951209311e-05, + "loss": 0.9186, "step": 18096 }, { - "epoch": 0.5135357548240636, + "epoch": 0.5128226926237638, "grad_norm": 0.0, - "learning_rate": 1.0047791877624932e-05, - "loss": 0.8298, + "learning_rate": 1.0070669185161484e-05, + "loss": 0.9933, "step": 18097 }, { - "epoch": 0.5135641316685584, + "epoch": 0.5128510300660263, "grad_norm": 0.0, - "learning_rate": 1.0046872809852148e-05, - "loss": 0.9351, + "learning_rate": 1.0069751418518379e-05, + "loss": 0.8997, "step": 18098 }, { - "epoch": 0.5135925085130534, + "epoch": 0.5128793675082887, "grad_norm": 0.0, - "learning_rate": 1.0045953741683426e-05, - "loss": 0.8969, + "learning_rate": 1.0068833651287736e-05, + "loss": 0.8897, "step": 18099 }, { - "epoch": 0.5136208853575482, + "epoch": 0.5129077049505512, "grad_norm": 0.0, - "learning_rate": 1.0045034673126528e-05, - "loss": 0.9085, + "learning_rate": 1.0067915883477277e-05, + "loss": 0.858, "step": 18100 }, { - "epoch": 0.5136492622020431, + "epoch": 0.5129360423928137, "grad_norm": 0.0, - "learning_rate": 1.004411560418922e-05, - "loss": 0.9059, + "learning_rate": 1.0066998115094742e-05, + "loss": 0.8948, "step": 18101 }, { - "epoch": 0.5136776390465381, + "epoch": 0.512964379835076, "grad_norm": 0.0, - "learning_rate": 1.0043196534879268e-05, - "loss": 0.8437, + "learning_rate": 1.0066080346147853e-05, + "loss": 0.8674, "step": 18102 }, { - "epoch": 0.5137060158910329, + "epoch": 0.5129927172773385, "grad_norm": 0.0, - "learning_rate": 1.004227746520443e-05, - "loss": 0.9007, + "learning_rate": 1.0065162576644348e-05, + "loss": 0.7743, "step": 18103 }, { - "epoch": 0.5137343927355278, + "epoch": 0.513021054719601, "grad_norm": 0.0, - "learning_rate": 1.0041358395172473e-05, - "loss": 0.7625, + "learning_rate": 1.0064244806591953e-05, + "loss": 0.8368, "step": 18104 }, { - "epoch": 0.5137627695800226, + "epoch": 0.5130493921618635, "grad_norm": 0.0, - "learning_rate": 1.0040439324791163e-05, - "loss": 0.7614, + "learning_rate": 1.0063327035998402e-05, + "loss": 0.9705, "step": 18105 }, { - "epoch": 0.5137911464245176, + "epoch": 0.5130777296041259, "grad_norm": 0.0, - "learning_rate": 1.003952025406826e-05, - "loss": 0.9145, + "learning_rate": 1.0062409264871423e-05, + "loss": 0.8908, "step": 18106 }, { - "epoch": 0.5138195232690125, + "epoch": 0.5131060670463884, "grad_norm": 0.0, - "learning_rate": 1.0038601183011524e-05, - "loss": 0.8351, + "learning_rate": 1.0061491493218744e-05, + "loss": 0.8545, "step": 18107 }, { - "epoch": 0.5138479001135073, + "epoch": 0.5131344044886509, "grad_norm": 0.0, - "learning_rate": 1.0037682111628723e-05, - "loss": 0.8482, + "learning_rate": 1.0060573721048104e-05, + "loss": 0.8967, "step": 18108 }, { - "epoch": 0.5138762769580023, + "epoch": 0.5131627419309133, "grad_norm": 0.0, - "learning_rate": 1.0036763039927621e-05, - "loss": 0.9223, + "learning_rate": 1.0059655948367229e-05, + "loss": 0.8768, "step": 18109 }, { - "epoch": 0.5139046538024972, + "epoch": 0.5131910793731758, "grad_norm": 0.0, - "learning_rate": 1.003584396791598e-05, - "loss": 0.9117, + "learning_rate": 1.0058738175183847e-05, + "loss": 0.8513, "step": 18110 }, { - "epoch": 0.513933030646992, + "epoch": 0.5132194168154383, "grad_norm": 0.0, - "learning_rate": 1.003492489560156e-05, - "loss": 0.8276, + "learning_rate": 1.005782040150569e-05, + "loss": 0.8461, "step": 18111 }, { - "epoch": 0.513961407491487, + "epoch": 0.5132477542577008, "grad_norm": 0.0, - "learning_rate": 1.0034005822992132e-05, - "loss": 0.9263, + "learning_rate": 1.0056902627340498e-05, + "loss": 0.8433, "step": 18112 }, { - "epoch": 0.5139897843359819, + "epoch": 0.5132760916999631, "grad_norm": 0.0, - "learning_rate": 1.0033086750095455e-05, - "loss": 0.8519, + "learning_rate": 1.0055984852695985e-05, + "loss": 0.9633, "step": 18113 }, { - "epoch": 0.5140181611804767, + "epoch": 0.5133044291422256, "grad_norm": 0.0, - "learning_rate": 1.003216767691929e-05, - "loss": 0.9188, + "learning_rate": 1.0055067077579894e-05, + "loss": 0.9332, "step": 18114 }, { - "epoch": 0.5140465380249716, + "epoch": 0.5133327665844881, "grad_norm": 0.0, - "learning_rate": 1.0031248603471404e-05, - "loss": 0.987, + "learning_rate": 1.0054149301999953e-05, + "loss": 0.8247, "step": 18115 }, { - "epoch": 0.5140749148694665, + "epoch": 0.5133611040267505, "grad_norm": 0.0, - "learning_rate": 1.0030329529759561e-05, - "loss": 0.9089, + "learning_rate": 1.0053231525963894e-05, + "loss": 0.9639, "step": 18116 }, { - "epoch": 0.5141032917139614, + "epoch": 0.513389441469013, "grad_norm": 0.0, - "learning_rate": 1.0029410455791524e-05, - "loss": 0.9167, + "learning_rate": 1.0052313749479445e-05, + "loss": 0.9351, "step": 18117 }, { - "epoch": 0.5141316685584563, + "epoch": 0.5134177789112755, "grad_norm": 0.0, - "learning_rate": 1.0028491381575054e-05, - "loss": 0.9245, + "learning_rate": 1.0051395972554336e-05, + "loss": 0.8497, "step": 18118 }, { - "epoch": 0.5141600454029512, + "epoch": 0.513446116353538, "grad_norm": 0.0, - "learning_rate": 1.0027572307117916e-05, - "loss": 0.7844, + "learning_rate": 1.0050478195196303e-05, + "loss": 0.9246, "step": 18119 }, { - "epoch": 0.5141884222474461, + "epoch": 0.5134744537958004, "grad_norm": 0.0, - "learning_rate": 1.0026653232427873e-05, - "loss": 0.8593, + "learning_rate": 1.0049560417413071e-05, + "loss": 0.9816, "step": 18120 }, { - "epoch": 0.514216799091941, + "epoch": 0.5135027912380629, "grad_norm": 0.0, - "learning_rate": 1.002573415751269e-05, - "loss": 0.8281, + "learning_rate": 1.0048642639212374e-05, + "loss": 0.8587, "step": 18121 }, { - "epoch": 0.5142451759364358, + "epoch": 0.5135311286803254, "grad_norm": 0.0, - "learning_rate": 1.0024815082380131e-05, - "loss": 0.9172, + "learning_rate": 1.0047724860601943e-05, + "loss": 0.8106, "step": 18122 }, { - "epoch": 0.5142735527809308, + "epoch": 0.5135594661225877, "grad_norm": 0.0, - "learning_rate": 1.0023896007037952e-05, - "loss": 0.9735, + "learning_rate": 1.0046807081589504e-05, + "loss": 0.8521, "step": 18123 }, { - "epoch": 0.5143019296254256, + "epoch": 0.5135878035648502, "grad_norm": 0.0, - "learning_rate": 1.0022976931493926e-05, - "loss": 0.8598, + "learning_rate": 1.0045889302182797e-05, + "loss": 0.843, "step": 18124 }, { - "epoch": 0.5143303064699205, + "epoch": 0.5136161410071127, "grad_norm": 0.0, - "learning_rate": 1.0022057855755814e-05, - "loss": 0.97, + "learning_rate": 1.0044971522389542e-05, + "loss": 0.9044, "step": 18125 }, { - "epoch": 0.5143586833144155, + "epoch": 0.5136444784493751, "grad_norm": 0.0, - "learning_rate": 1.0021138779831378e-05, - "loss": 0.8024, + "learning_rate": 1.004405374221748e-05, + "loss": 0.8743, "step": 18126 }, { - "epoch": 0.5143870601589103, + "epoch": 0.5136728158916376, "grad_norm": 0.0, - "learning_rate": 1.0020219703728379e-05, - "loss": 0.7521, + "learning_rate": 1.0043135961674333e-05, + "loss": 0.8897, "step": 18127 }, { - "epoch": 0.5144154370034052, + "epoch": 0.5137011533339001, "grad_norm": 0.0, - "learning_rate": 1.0019300627454586e-05, - "loss": 0.9108, + "learning_rate": 1.0042218180767838e-05, + "loss": 0.9537, "step": 18128 }, { - "epoch": 0.5144438138479002, + "epoch": 0.5137294907761626, "grad_norm": 0.0, - "learning_rate": 1.0018381551017762e-05, - "loss": 0.7588, + "learning_rate": 1.0041300399505724e-05, + "loss": 0.7663, "step": 18129 }, { - "epoch": 0.514472190692395, + "epoch": 0.513757828218425, "grad_norm": 0.0, - "learning_rate": 1.0017462474425664e-05, - "loss": 0.8722, + "learning_rate": 1.004038261789572e-05, + "loss": 0.8899, "step": 18130 }, { - "epoch": 0.5145005675368899, + "epoch": 0.5137861656606875, "grad_norm": 0.0, - "learning_rate": 1.0016543397686059e-05, - "loss": 0.9748, + "learning_rate": 1.0039464835945558e-05, + "loss": 0.8359, "step": 18131 }, { - "epoch": 0.5145289443813847, + "epoch": 0.51381450310295, "grad_norm": 0.0, - "learning_rate": 1.0015624320806714e-05, - "loss": 0.8023, + "learning_rate": 1.0038547053662968e-05, + "loss": 0.983, "step": 18132 }, { - "epoch": 0.5145573212258797, + "epoch": 0.5138428405452123, "grad_norm": 0.0, - "learning_rate": 1.0014705243795389e-05, - "loss": 0.8793, + "learning_rate": 1.0037629271055684e-05, + "loss": 0.9904, "step": 18133 }, { - "epoch": 0.5145856980703746, + "epoch": 0.5138711779874748, "grad_norm": 0.0, - "learning_rate": 1.0013786166659847e-05, - "loss": 0.8103, + "learning_rate": 1.0036711488131437e-05, + "loss": 0.9146, "step": 18134 }, { - "epoch": 0.5146140749148694, + "epoch": 0.5138995154297373, "grad_norm": 0.0, - "learning_rate": 1.0012867089407855e-05, - "loss": 0.8798, + "learning_rate": 1.003579370489795e-05, + "loss": 0.9805, "step": 18135 }, { - "epoch": 0.5146424517593644, + "epoch": 0.5139278528719998, "grad_norm": 0.0, - "learning_rate": 1.0011948012047173e-05, - "loss": 0.8813, + "learning_rate": 1.0034875921362963e-05, + "loss": 0.844, "step": 18136 }, { - "epoch": 0.5146708286038593, + "epoch": 0.5139561903142622, "grad_norm": 0.0, - "learning_rate": 1.0011028934585565e-05, - "loss": 0.9045, + "learning_rate": 1.0033958137534203e-05, + "loss": 0.9297, "step": 18137 }, { - "epoch": 0.5146992054483541, + "epoch": 0.5139845277565247, "grad_norm": 0.0, - "learning_rate": 1.0010109857030794e-05, - "loss": 0.7679, + "learning_rate": 1.00330403534194e-05, + "loss": 0.8808, "step": 18138 }, { - "epoch": 0.514727582292849, + "epoch": 0.5140128651987872, "grad_norm": 0.0, - "learning_rate": 1.0009190779390627e-05, - "loss": 0.9153, + "learning_rate": 1.0032122569026284e-05, + "loss": 0.9495, "step": 18139 }, { - "epoch": 0.514755959137344, + "epoch": 0.5140412026410496, "grad_norm": 0.0, - "learning_rate": 1.0008271701672825e-05, - "loss": 0.8054, + "learning_rate": 1.0031204784362591e-05, + "loss": 0.8755, "step": 18140 }, { - "epoch": 0.5147843359818388, + "epoch": 0.5140695400833121, "grad_norm": 0.0, - "learning_rate": 1.0007352623885148e-05, - "loss": 0.8794, + "learning_rate": 1.0030286999436048e-05, + "loss": 0.8356, "step": 18141 }, { - "epoch": 0.5148127128263337, + "epoch": 0.5140978775255746, "grad_norm": 0.0, - "learning_rate": 1.0006433546035368e-05, - "loss": 0.9583, + "learning_rate": 1.0029369214254385e-05, + "loss": 0.8809, "step": 18142 }, { - "epoch": 0.5148410896708286, + "epoch": 0.514126214967837, "grad_norm": 0.0, - "learning_rate": 1.0005514468131242e-05, - "loss": 0.9249, + "learning_rate": 1.0028451428825334e-05, + "loss": 0.8265, "step": 18143 }, { - "epoch": 0.5148694665153235, + "epoch": 0.5141545524100994, "grad_norm": 0.0, - "learning_rate": 1.0004595390180532e-05, - "loss": 0.9245, + "learning_rate": 1.0027533643156629e-05, + "loss": 0.9271, "step": 18144 }, { - "epoch": 0.5148978433598184, + "epoch": 0.5141828898523619, "grad_norm": 0.0, - "learning_rate": 1.0003676312191008e-05, - "loss": 0.8431, + "learning_rate": 1.0026615857255994e-05, + "loss": 0.8598, "step": 18145 }, { - "epoch": 0.5149262202043133, + "epoch": 0.5142112272946244, "grad_norm": 0.0, - "learning_rate": 1.0002757234170428e-05, - "loss": 0.8734, + "learning_rate": 1.0025698071131166e-05, + "loss": 0.9153, "step": 18146 }, { - "epoch": 0.5149545970488082, + "epoch": 0.5142395647368868, "grad_norm": 0.0, - "learning_rate": 1.0001838156126558e-05, - "loss": 0.87, + "learning_rate": 1.0024780284789875e-05, + "loss": 0.8324, "step": 18147 }, { - "epoch": 0.514982973893303, + "epoch": 0.5142679021791493, "grad_norm": 0.0, - "learning_rate": 1.0000919078067162e-05, - "loss": 0.8582, + "learning_rate": 1.0023862498239847e-05, + "loss": 0.8934, "step": 18148 }, { - "epoch": 0.5150113507377979, + "epoch": 0.5142962396214118, "grad_norm": 0.0, - "learning_rate": 1e-05, - "loss": 0.902, + "learning_rate": 1.0022944711488818e-05, + "loss": 0.9496, "step": 18149 }, { - "epoch": 0.5150397275822929, + "epoch": 0.5143245770636742, "grad_norm": 0.0, - "learning_rate": 9.999080921932843e-06, - "loss": 0.9224, + "learning_rate": 1.0022026924544517e-05, + "loss": 0.8146, "step": 18150 }, { - "epoch": 0.5150681044267877, + "epoch": 0.5143529145059367, "grad_norm": 0.0, - "learning_rate": 9.998161843873443e-06, - "loss": 0.9545, + "learning_rate": 1.0021109137414674e-05, + "loss": 0.8492, "step": 18151 }, { - "epoch": 0.5150964812712826, + "epoch": 0.5143812519481992, "grad_norm": 0.0, - "learning_rate": 9.997242765829575e-06, - "loss": 0.8028, + "learning_rate": 1.002019135010702e-05, + "loss": 0.9747, "step": 18152 }, { - "epoch": 0.5151248581157776, + "epoch": 0.5144095893904617, "grad_norm": 0.0, - "learning_rate": 9.996323687808997e-06, - "loss": 0.8958, + "learning_rate": 1.001927356262929e-05, + "loss": 0.9348, "step": 18153 }, { - "epoch": 0.5151532349602724, + "epoch": 0.514437926832724, "grad_norm": 0.0, - "learning_rate": 9.99540460981947e-06, - "loss": 0.9162, + "learning_rate": 1.0018355774989213e-05, + "loss": 0.8617, "step": 18154 }, { - "epoch": 0.5151816118047673, + "epoch": 0.5144662642749865, "grad_norm": 0.0, - "learning_rate": 9.994485531868763e-06, - "loss": 0.8465, + "learning_rate": 1.0017437987194516e-05, + "loss": 0.9512, "step": 18155 }, { - "epoch": 0.5152099886492622, + "epoch": 0.514494601717249, "grad_norm": 0.0, - "learning_rate": 9.993566453964639e-06, - "loss": 0.8863, + "learning_rate": 1.001652019925293e-05, + "loss": 0.8912, "step": 18156 }, { - "epoch": 0.5152383654937571, + "epoch": 0.5145229391595114, "grad_norm": 0.0, - "learning_rate": 9.992647376114853e-06, - "loss": 1.052, + "learning_rate": 1.0015602411172191e-05, + "loss": 0.9771, "step": 18157 }, { - "epoch": 0.515266742338252, + "epoch": 0.5145512766017739, "grad_norm": 0.0, - "learning_rate": 9.991728298327179e-06, - "loss": 0.8682, + "learning_rate": 1.0014684622960027e-05, + "loss": 1.0342, "step": 18158 }, { - "epoch": 0.5152951191827468, + "epoch": 0.5145796140440364, "grad_norm": 0.0, - "learning_rate": 9.990809220609374e-06, - "loss": 0.8915, + "learning_rate": 1.0013766834624168e-05, + "loss": 0.9473, "step": 18159 }, { - "epoch": 0.5153234960272418, + "epoch": 0.5146079514862989, "grad_norm": 0.0, - "learning_rate": 9.989890142969207e-06, - "loss": 0.741, + "learning_rate": 1.0012849046172346e-05, + "loss": 0.9445, "step": 18160 }, { - "epoch": 0.5153518728717367, + "epoch": 0.5146362889285613, "grad_norm": 0.0, - "learning_rate": 9.98897106541444e-06, - "loss": 0.9214, + "learning_rate": 1.0011931257612292e-05, + "loss": 0.9778, "step": 18161 }, { - "epoch": 0.5153802497162315, + "epoch": 0.5146646263708238, "grad_norm": 0.0, - "learning_rate": 9.988051987952829e-06, - "loss": 0.9351, + "learning_rate": 1.0011013468951738e-05, + "loss": 0.7979, "step": 18162 }, { - "epoch": 0.5154086265607265, + "epoch": 0.5146929638130863, "grad_norm": 0.0, - "learning_rate": 9.987132910592147e-06, - "loss": 0.8313, + "learning_rate": 1.0010095680198413e-05, + "loss": 0.8791, "step": 18163 }, { - "epoch": 0.5154370034052214, + "epoch": 0.5147213012553487, "grad_norm": 0.0, - "learning_rate": 9.986213833340156e-06, - "loss": 0.8477, + "learning_rate": 1.0009177891360048e-05, + "loss": 0.8644, "step": 18164 }, { - "epoch": 0.5154653802497162, + "epoch": 0.5147496386976111, "grad_norm": 0.0, - "learning_rate": 9.985294756204613e-06, - "loss": 0.8483, + "learning_rate": 1.0008260102444369e-05, + "loss": 0.8386, "step": 18165 }, { - "epoch": 0.5154937570942111, + "epoch": 0.5147779761398736, "grad_norm": 0.0, - "learning_rate": 9.98437567919329e-06, - "loss": 0.891, + "learning_rate": 1.000734231345912e-05, + "loss": 0.8946, "step": 18166 }, { - "epoch": 0.515522133938706, + "epoch": 0.514806313582136, "grad_norm": 0.0, - "learning_rate": 9.983456602313944e-06, - "loss": 0.8639, + "learning_rate": 1.000642452441202e-05, + "loss": 0.8781, "step": 18167 }, { - "epoch": 0.5155505107832009, + "epoch": 0.5148346510243985, "grad_norm": 0.0, - "learning_rate": 9.98253752557434e-06, - "loss": 0.9253, + "learning_rate": 1.0005506735310803e-05, + "loss": 0.9538, "step": 18168 }, { - "epoch": 0.5155788876276958, + "epoch": 0.514862988466661, "grad_norm": 0.0, - "learning_rate": 9.981618448982245e-06, - "loss": 0.9974, + "learning_rate": 1.0004588946163203e-05, + "loss": 0.851, "step": 18169 }, { - "epoch": 0.5156072644721907, + "epoch": 0.5148913259089235, "grad_norm": 0.0, - "learning_rate": 9.980699372545419e-06, - "loss": 0.8622, + "learning_rate": 1.0003671156976948e-05, + "loss": 0.882, "step": 18170 }, { - "epoch": 0.5156356413166856, + "epoch": 0.5149196633511859, "grad_norm": 0.0, - "learning_rate": 9.979780296271623e-06, - "loss": 0.7518, + "learning_rate": 1.000275336775977e-05, + "loss": 0.9022, "step": 18171 }, { - "epoch": 0.5156640181611805, + "epoch": 0.5149480007934484, "grad_norm": 0.0, - "learning_rate": 9.978861220168627e-06, - "loss": 0.8156, + "learning_rate": 1.0001835578519397e-05, + "loss": 0.8133, "step": 18172 }, { - "epoch": 0.5156923950056753, + "epoch": 0.5149763382357109, "grad_norm": 0.0, - "learning_rate": 9.977942144244186e-06, - "loss": 0.8354, + "learning_rate": 1.0000917789263565e-05, + "loss": 0.7326, "step": 18173 }, { - "epoch": 0.5157207718501703, + "epoch": 0.5150046756779733, "grad_norm": 0.0, - "learning_rate": 9.977023068506074e-06, - "loss": 0.7729, + "learning_rate": 1e-05, + "loss": 0.8854, "step": 18174 }, { - "epoch": 0.5157491486946651, + "epoch": 0.5150330131202357, "grad_norm": 0.0, - "learning_rate": 9.976103992962047e-06, - "loss": 1.0041, + "learning_rate": 9.999082210736437e-06, + "loss": 0.8115, "step": 18175 }, { - "epoch": 0.51577752553916, + "epoch": 0.5150613505624982, "grad_norm": 0.0, - "learning_rate": 9.975184917619874e-06, - "loss": 0.742, + "learning_rate": 9.998164421480606e-06, + "loss": 0.9234, "step": 18176 }, { - "epoch": 0.515805902383655, + "epoch": 0.5150896880047607, "grad_norm": 0.0, - "learning_rate": 9.974265842487312e-06, - "loss": 0.8689, + "learning_rate": 9.997246632240234e-06, + "loss": 0.866, "step": 18177 }, { - "epoch": 0.5158342792281498, + "epoch": 0.5151180254470231, "grad_norm": 0.0, - "learning_rate": 9.973346767572128e-06, - "loss": 0.9681, + "learning_rate": 9.996328843023055e-06, + "loss": 0.7746, "step": 18178 }, { - "epoch": 0.5158626560726447, + "epoch": 0.5151463628892856, "grad_norm": 0.0, - "learning_rate": 9.972427692882087e-06, - "loss": 0.8934, + "learning_rate": 9.995411053836798e-06, + "loss": 0.8696, "step": 18179 }, { - "epoch": 0.5158910329171397, + "epoch": 0.5151747003315481, "grad_norm": 0.0, - "learning_rate": 9.97150861842495e-06, - "loss": 0.9104, + "learning_rate": 9.994493264689197e-06, + "loss": 0.8898, "step": 18180 }, { - "epoch": 0.5159194097616345, + "epoch": 0.5152030377738105, "grad_norm": 0.0, - "learning_rate": 9.97058954420848e-06, - "loss": 0.8076, + "learning_rate": 9.993575475587984e-06, + "loss": 0.9419, "step": 18181 }, { - "epoch": 0.5159477866061294, + "epoch": 0.515231375216073, "grad_norm": 0.0, - "learning_rate": 9.96967047024044e-06, - "loss": 0.9939, + "learning_rate": 9.992657686540884e-06, + "loss": 0.9687, "step": 18182 }, { - "epoch": 0.5159761634506242, + "epoch": 0.5152597126583355, "grad_norm": 0.0, - "learning_rate": 9.968751396528599e-06, - "loss": 0.8744, + "learning_rate": 9.99173989755563e-06, + "loss": 0.8654, "step": 18183 }, { - "epoch": 0.5160045402951192, + "epoch": 0.515288050100598, "grad_norm": 0.0, - "learning_rate": 9.967832323080715e-06, - "loss": 0.9579, + "learning_rate": 9.990822108639957e-06, + "loss": 0.9589, "step": 18184 }, { - "epoch": 0.5160329171396141, + "epoch": 0.5153163875428604, "grad_norm": 0.0, - "learning_rate": 9.96691324990455e-06, - "loss": 0.9392, + "learning_rate": 9.98990431980159e-06, + "loss": 0.8036, "step": 18185 }, { - "epoch": 0.5160612939841089, + "epoch": 0.5153447249851228, "grad_norm": 0.0, - "learning_rate": 9.965994177007871e-06, - "loss": 0.791, + "learning_rate": 9.988986531048267e-06, + "loss": 0.9002, "step": 18186 }, { - "epoch": 0.5160896708286039, + "epoch": 0.5153730624273853, "grad_norm": 0.0, - "learning_rate": 9.965075104398444e-06, - "loss": 0.8812, + "learning_rate": 9.988068742387711e-06, + "loss": 0.8289, "step": 18187 }, { - "epoch": 0.5161180476730988, + "epoch": 0.5154013998696477, "grad_norm": 0.0, - "learning_rate": 9.964156032084022e-06, - "loss": 0.9492, + "learning_rate": 9.987150953827656e-06, + "loss": 0.9337, "step": 18188 }, { - "epoch": 0.5161464245175936, + "epoch": 0.5154297373119102, "grad_norm": 0.0, - "learning_rate": 9.96323696007238e-06, - "loss": 0.8602, + "learning_rate": 9.986233165375837e-06, + "loss": 0.8277, "step": 18189 }, { - "epoch": 0.5161748013620885, + "epoch": 0.5154580747541727, "grad_norm": 0.0, - "learning_rate": 9.962317888371278e-06, - "loss": 0.9233, + "learning_rate": 9.985315377039978e-06, + "loss": 0.7892, "step": 18190 }, { - "epoch": 0.5162031782065835, + "epoch": 0.5154864121964351, "grad_norm": 0.0, - "learning_rate": 9.961398816988479e-06, - "loss": 0.8371, + "learning_rate": 9.984397588827812e-06, + "loss": 0.9836, "step": 18191 }, { - "epoch": 0.5162315550510783, + "epoch": 0.5155147496386976, "grad_norm": 0.0, - "learning_rate": 9.960479745931743e-06, - "loss": 0.7873, + "learning_rate": 9.983479800747072e-06, + "loss": 0.8972, "step": 18192 }, { - "epoch": 0.5162599318955732, + "epoch": 0.5155430870809601, "grad_norm": 0.0, - "learning_rate": 9.959560675208839e-06, - "loss": 0.9109, + "learning_rate": 9.982562012805487e-06, + "loss": 0.9414, "step": 18193 }, { - "epoch": 0.5162883087400681, + "epoch": 0.5155714245232226, "grad_norm": 0.0, - "learning_rate": 9.958641604827529e-06, - "loss": 0.9557, + "learning_rate": 9.981644225010794e-06, + "loss": 0.9136, "step": 18194 }, { - "epoch": 0.516316685584563, + "epoch": 0.515599761965485, "grad_norm": 0.0, - "learning_rate": 9.95772253479557e-06, - "loss": 0.8969, + "learning_rate": 9.980726437370713e-06, + "loss": 0.8264, "step": 18195 }, { - "epoch": 0.5163450624290579, + "epoch": 0.5156280994077475, "grad_norm": 0.0, - "learning_rate": 9.956803465120736e-06, - "loss": 0.9171, + "learning_rate": 9.979808649892979e-06, + "loss": 0.7958, "step": 18196 }, { - "epoch": 0.5163734392735527, + "epoch": 0.51565643685001, "grad_norm": 0.0, - "learning_rate": 9.955884395810783e-06, - "loss": 0.8993, + "learning_rate": 9.978890862585329e-06, + "loss": 0.9588, "step": 18197 }, { - "epoch": 0.5164018161180477, + "epoch": 0.5156847742922723, "grad_norm": 0.0, - "learning_rate": 9.954965326873477e-06, - "loss": 0.8913, + "learning_rate": 9.977973075455485e-06, + "loss": 0.8654, "step": 18198 }, { - "epoch": 0.5164301929625426, + "epoch": 0.5157131117345348, "grad_norm": 0.0, - "learning_rate": 9.95404625831658e-06, - "loss": 0.8793, + "learning_rate": 9.977055288511182e-06, + "loss": 0.9258, "step": 18199 }, { - "epoch": 0.5164585698070374, + "epoch": 0.5157414491767973, "grad_norm": 0.0, - "learning_rate": 9.953127190147858e-06, - "loss": 0.9305, + "learning_rate": 9.976137501760157e-06, + "loss": 0.9177, "step": 18200 }, { - "epoch": 0.5164869466515324, + "epoch": 0.5157697866190598, "grad_norm": 0.0, - "learning_rate": 9.95220812237507e-06, - "loss": 0.8484, + "learning_rate": 9.975219715210129e-06, + "loss": 0.9212, "step": 18201 }, { - "epoch": 0.5165153234960272, + "epoch": 0.5157981240613222, "grad_norm": 0.0, - "learning_rate": 9.951289055005983e-06, - "loss": 0.845, + "learning_rate": 9.974301928868839e-06, + "loss": 0.8393, "step": 18202 }, { - "epoch": 0.5165437003405221, + "epoch": 0.5158264615035847, "grad_norm": 0.0, - "learning_rate": 9.950369988048357e-06, - "loss": 0.8291, + "learning_rate": 9.97338414274401e-06, + "loss": 0.7884, "step": 18203 }, { - "epoch": 0.5165720771850171, + "epoch": 0.5158547989458472, "grad_norm": 0.0, - "learning_rate": 9.949450921509962e-06, - "loss": 0.8879, + "learning_rate": 9.972466356843375e-06, + "loss": 0.8597, "step": 18204 }, { - "epoch": 0.5166004540295119, + "epoch": 0.5158831363881096, "grad_norm": 0.0, - "learning_rate": 9.948531855398558e-06, - "loss": 0.8657, + "learning_rate": 9.971548571174668e-06, + "loss": 0.9337, "step": 18205 }, { - "epoch": 0.5166288308740068, + "epoch": 0.5159114738303721, "grad_norm": 0.0, - "learning_rate": 9.947612789721904e-06, - "loss": 0.9212, + "learning_rate": 9.970630785745617e-06, + "loss": 0.7753, "step": 18206 }, { - "epoch": 0.5166572077185017, + "epoch": 0.5159398112726346, "grad_norm": 0.0, - "learning_rate": 9.946693724487771e-06, - "loss": 0.86, + "learning_rate": 9.969713000563957e-06, + "loss": 0.8838, "step": 18207 }, { - "epoch": 0.5166855845629966, + "epoch": 0.515968148714897, "grad_norm": 0.0, - "learning_rate": 9.945774659703918e-06, - "loss": 0.8587, + "learning_rate": 9.968795215637412e-06, + "loss": 0.7622, "step": 18208 }, { - "epoch": 0.5167139614074915, + "epoch": 0.5159964861571594, "grad_norm": 0.0, - "learning_rate": 9.944855595378106e-06, - "loss": 0.8071, + "learning_rate": 9.967877430973716e-06, + "loss": 0.9655, "step": 18209 }, { - "epoch": 0.5167423382519863, + "epoch": 0.5160248235994219, "grad_norm": 0.0, - "learning_rate": 9.943936531518104e-06, - "loss": 0.8035, + "learning_rate": 9.966959646580604e-06, + "loss": 0.8595, "step": 18210 }, { - "epoch": 0.5167707150964813, + "epoch": 0.5160531610416844, "grad_norm": 0.0, - "learning_rate": 9.943017468131672e-06, - "loss": 0.7725, + "learning_rate": 9.966041862465799e-06, + "loss": 0.7882, "step": 18211 }, { - "epoch": 0.5167990919409762, + "epoch": 0.5160814984839468, "grad_norm": 0.0, - "learning_rate": 9.942098405226571e-06, - "loss": 0.9177, + "learning_rate": 9.965124078637037e-06, + "loss": 0.9123, "step": 18212 }, { - "epoch": 0.516827468785471, + "epoch": 0.5161098359262093, "grad_norm": 0.0, - "learning_rate": 9.941179342810571e-06, - "loss": 0.8585, + "learning_rate": 9.964206295102052e-06, + "loss": 0.9324, "step": 18213 }, { - "epoch": 0.5168558456299659, + "epoch": 0.5161381733684718, "grad_norm": 0.0, - "learning_rate": 9.940260280891432e-06, - "loss": 0.8952, + "learning_rate": 9.963288511868567e-06, + "loss": 0.9663, "step": 18214 }, { - "epoch": 0.5168842224744609, + "epoch": 0.5161665108107342, "grad_norm": 0.0, - "learning_rate": 9.939341219476915e-06, - "loss": 0.8524, + "learning_rate": 9.96237072894432e-06, + "loss": 0.8676, "step": 18215 }, { - "epoch": 0.5169125993189557, + "epoch": 0.5161948482529967, "grad_norm": 0.0, - "learning_rate": 9.938422158574786e-06, - "loss": 0.8195, + "learning_rate": 9.961452946337035e-06, + "loss": 0.8961, "step": 18216 }, { - "epoch": 0.5169409761634506, + "epoch": 0.5162231856952592, "grad_norm": 0.0, - "learning_rate": 9.937503098192809e-06, - "loss": 0.8564, + "learning_rate": 9.960535164054444e-06, + "loss": 0.895, "step": 18217 }, { - "epoch": 0.5169693530079456, + "epoch": 0.5162515231375217, "grad_norm": 0.0, - "learning_rate": 9.936584038338744e-06, - "loss": 0.9553, + "learning_rate": 9.959617382104284e-06, + "loss": 0.9228, "step": 18218 }, { - "epoch": 0.5169977298524404, + "epoch": 0.516279860579784, "grad_norm": 0.0, - "learning_rate": 9.935664979020354e-06, - "loss": 0.8768, + "learning_rate": 9.95869960049428e-06, + "loss": 0.9246, "step": 18219 }, { - "epoch": 0.5170261066969353, + "epoch": 0.5163081980220465, "grad_norm": 0.0, - "learning_rate": 9.93474592024541e-06, - "loss": 0.8015, + "learning_rate": 9.957781819232163e-06, + "loss": 0.9453, "step": 18220 }, { - "epoch": 0.5170544835414302, + "epoch": 0.516336535464309, "grad_norm": 0.0, - "learning_rate": 9.933826862021669e-06, - "loss": 0.9181, + "learning_rate": 9.95686403832567e-06, + "loss": 0.9387, "step": 18221 }, { - "epoch": 0.5170828603859251, + "epoch": 0.5163648729065714, "grad_norm": 0.0, - "learning_rate": 9.932907804356893e-06, - "loss": 0.9062, + "learning_rate": 9.955946257782524e-06, + "loss": 0.9027, "step": 18222 }, { - "epoch": 0.51711123723042, + "epoch": 0.5163932103488339, "grad_norm": 0.0, - "learning_rate": 9.93198874725885e-06, - "loss": 0.8105, + "learning_rate": 9.95502847761046e-06, + "loss": 0.8136, "step": 18223 }, { - "epoch": 0.5171396140749148, + "epoch": 0.5164215477910964, "grad_norm": 0.0, - "learning_rate": 9.931069690735301e-06, - "loss": 0.8112, + "learning_rate": 9.954110697817207e-06, + "loss": 0.8748, "step": 18224 }, { - "epoch": 0.5171679909194098, + "epoch": 0.5164498852333589, "grad_norm": 0.0, - "learning_rate": 9.930150634794013e-06, - "loss": 0.9508, + "learning_rate": 9.953192918410496e-06, + "loss": 0.82, "step": 18225 }, { - "epoch": 0.5171963677639047, + "epoch": 0.5164782226756213, "grad_norm": 0.0, - "learning_rate": 9.92923157944274e-06, - "loss": 0.7864, + "learning_rate": 9.952275139398062e-06, + "loss": 0.805, "step": 18226 }, { - "epoch": 0.5172247446083995, + "epoch": 0.5165065601178838, "grad_norm": 0.0, - "learning_rate": 9.928312524689252e-06, - "loss": 0.9004, + "learning_rate": 9.95135736078763e-06, + "loss": 0.86, "step": 18227 }, { - "epoch": 0.5172531214528945, + "epoch": 0.5165348975601463, "grad_norm": 0.0, - "learning_rate": 9.927393470541314e-06, - "loss": 0.826, + "learning_rate": 9.95043958258693e-06, + "loss": 0.9467, "step": 18228 }, { - "epoch": 0.5172814982973893, + "epoch": 0.5165632350024086, "grad_norm": 0.0, - "learning_rate": 9.926474417006684e-06, - "loss": 0.8406, + "learning_rate": 9.949521804803699e-06, + "loss": 1.0147, "step": 18229 }, { - "epoch": 0.5173098751418842, + "epoch": 0.5165915724446711, "grad_norm": 0.0, - "learning_rate": 9.92555536409313e-06, - "loss": 0.8946, + "learning_rate": 9.948604027445666e-06, + "loss": 0.8821, "step": 18230 }, { - "epoch": 0.5173382519863791, + "epoch": 0.5166199098869336, "grad_norm": 0.0, - "learning_rate": 9.924636311808413e-06, - "loss": 0.8133, + "learning_rate": 9.94768625052056e-06, + "loss": 0.9642, "step": 18231 }, { - "epoch": 0.517366628830874, + "epoch": 0.5166482473291961, "grad_norm": 0.0, - "learning_rate": 9.923717260160294e-06, - "loss": 0.8693, + "learning_rate": 9.946768474036107e-06, + "loss": 0.8297, "step": 18232 }, { - "epoch": 0.5173950056753689, + "epoch": 0.5166765847714585, "grad_norm": 0.0, - "learning_rate": 9.922798209156542e-06, - "loss": 0.9223, + "learning_rate": 9.945850698000047e-06, + "loss": 0.8809, "step": 18233 }, { - "epoch": 0.5174233825198638, + "epoch": 0.516704922213721, "grad_norm": 0.0, - "learning_rate": 9.921879158804912e-06, - "loss": 0.9675, + "learning_rate": 9.944932922420109e-06, + "loss": 0.9025, "step": 18234 }, { - "epoch": 0.5174517593643587, + "epoch": 0.5167332596559835, "grad_norm": 0.0, - "learning_rate": 9.920960109113176e-06, - "loss": 0.8681, + "learning_rate": 9.944015147304018e-06, + "loss": 0.9091, "step": 18235 }, { - "epoch": 0.5174801362088536, + "epoch": 0.5167615970982459, "grad_norm": 0.0, - "learning_rate": 9.920041060089092e-06, - "loss": 0.885, + "learning_rate": 9.943097372659509e-06, + "loss": 0.9406, "step": 18236 }, { - "epoch": 0.5175085130533484, + "epoch": 0.5167899345405084, "grad_norm": 0.0, - "learning_rate": 9.919122011740427e-06, - "loss": 0.8297, + "learning_rate": 9.94217959849431e-06, + "loss": 1.0366, "step": 18237 }, { - "epoch": 0.5175368898978434, + "epoch": 0.5168182719827709, "grad_norm": 0.0, - "learning_rate": 9.918202964074942e-06, - "loss": 0.9895, + "learning_rate": 9.941261824816155e-06, + "loss": 0.7775, "step": 18238 }, { - "epoch": 0.5175652667423383, + "epoch": 0.5168466094250332, "grad_norm": 0.0, - "learning_rate": 9.917283917100396e-06, - "loss": 0.8769, + "learning_rate": 9.940344051632778e-06, + "loss": 1.0202, "step": 18239 }, { - "epoch": 0.5175936435868331, + "epoch": 0.5168749468672957, "grad_norm": 0.0, - "learning_rate": 9.916364870824561e-06, - "loss": 0.9311, + "learning_rate": 9.9394262789519e-06, + "loss": 0.8237, "step": 18240 }, { - "epoch": 0.517622020431328, + "epoch": 0.5169032843095582, "grad_norm": 0.0, - "learning_rate": 9.915445825255196e-06, - "loss": 0.8857, + "learning_rate": 9.938508506781256e-06, + "loss": 0.9061, "step": 18241 }, { - "epoch": 0.517650397275823, + "epoch": 0.5169316217518207, "grad_norm": 0.0, - "learning_rate": 9.91452678040006e-06, - "loss": 0.9232, + "learning_rate": 9.93759073512858e-06, + "loss": 0.8837, "step": 18242 }, { - "epoch": 0.5176787741203178, + "epoch": 0.5169599591940831, "grad_norm": 0.0, - "learning_rate": 9.913607736266923e-06, - "loss": 0.9193, + "learning_rate": 9.9366729640016e-06, + "loss": 0.8822, "step": 18243 }, { - "epoch": 0.5177071509648127, + "epoch": 0.5169882966363456, "grad_norm": 0.0, - "learning_rate": 9.912688692863545e-06, - "loss": 0.8089, + "learning_rate": 9.935755193408052e-06, + "loss": 1.0098, "step": 18244 }, { - "epoch": 0.5177355278093076, + "epoch": 0.5170166340786081, "grad_norm": 0.0, - "learning_rate": 9.911769650197689e-06, - "loss": 0.9255, + "learning_rate": 9.934837423355654e-06, + "loss": 0.8322, "step": 18245 }, { - "epoch": 0.5177639046538025, + "epoch": 0.5170449715208705, "grad_norm": 0.0, - "learning_rate": 9.91085060827712e-06, - "loss": 0.8635, + "learning_rate": 9.933919653852147e-06, + "loss": 0.9472, "step": 18246 }, { - "epoch": 0.5177922814982974, + "epoch": 0.517073308963133, "grad_norm": 0.0, - "learning_rate": 9.9099315671096e-06, - "loss": 0.9005, + "learning_rate": 9.933001884905263e-06, + "loss": 1.028, "step": 18247 }, { - "epoch": 0.5178206583427922, + "epoch": 0.5171016464053955, "grad_norm": 0.0, - "learning_rate": 9.909012526702889e-06, - "loss": 0.8911, + "learning_rate": 9.932084116522725e-06, + "loss": 0.9038, "step": 18248 }, { - "epoch": 0.5178490351872872, + "epoch": 0.517129983847658, "grad_norm": 0.0, - "learning_rate": 9.908093487064755e-06, - "loss": 0.8831, + "learning_rate": 9.931166348712268e-06, + "loss": 0.8991, "step": 18249 }, { - "epoch": 0.5178774120317821, + "epoch": 0.5171583212899203, "grad_norm": 0.0, - "learning_rate": 9.907174448202957e-06, - "loss": 0.9708, + "learning_rate": 9.930248581481625e-06, + "loss": 0.868, "step": 18250 }, { - "epoch": 0.5179057888762769, + "epoch": 0.5171866587321828, "grad_norm": 0.0, - "learning_rate": 9.906255410125264e-06, - "loss": 0.8047, + "learning_rate": 9.92933081483852e-06, + "loss": 0.9747, "step": 18251 }, { - "epoch": 0.5179341657207719, + "epoch": 0.5172149961744453, "grad_norm": 0.0, - "learning_rate": 9.905336372839436e-06, - "loss": 0.9426, + "learning_rate": 9.928413048790694e-06, + "loss": 0.8388, "step": 18252 }, { - "epoch": 0.5179625425652667, + "epoch": 0.5172433336167077, "grad_norm": 0.0, - "learning_rate": 9.904417336353234e-06, - "loss": 0.8973, + "learning_rate": 9.927495283345866e-06, + "loss": 0.982, "step": 18253 }, { - "epoch": 0.5179909194097616, + "epoch": 0.5172716710589702, "grad_norm": 0.0, - "learning_rate": 9.903498300674425e-06, - "loss": 0.9449, + "learning_rate": 9.926577518511772e-06, + "loss": 0.8915, "step": 18254 }, { - "epoch": 0.5180192962542566, + "epoch": 0.5173000085012327, "grad_norm": 0.0, - "learning_rate": 9.90257926581077e-06, - "loss": 0.9716, + "learning_rate": 9.925659754296145e-06, + "loss": 0.9141, "step": 18255 }, { - "epoch": 0.5180476730987514, + "epoch": 0.5173283459434952, "grad_norm": 0.0, - "learning_rate": 9.90166023177003e-06, - "loss": 0.8476, + "learning_rate": 9.924741990706712e-06, + "loss": 0.9376, "step": 18256 }, { - "epoch": 0.5180760499432463, + "epoch": 0.5173566833857576, "grad_norm": 0.0, - "learning_rate": 9.900741198559971e-06, - "loss": 0.8843, + "learning_rate": 9.923824227751205e-06, + "loss": 0.7575, "step": 18257 }, { - "epoch": 0.5181044267877412, + "epoch": 0.5173850208280201, "grad_norm": 0.0, - "learning_rate": 9.899822166188359e-06, - "loss": 0.9144, + "learning_rate": 9.92290646543736e-06, + "loss": 0.8606, "step": 18258 }, { - "epoch": 0.5181328036322361, + "epoch": 0.5174133582702826, "grad_norm": 0.0, - "learning_rate": 9.898903134662949e-06, - "loss": 0.9321, + "learning_rate": 9.921988703772897e-06, + "loss": 0.8009, "step": 18259 }, { - "epoch": 0.518161180476731, + "epoch": 0.517441695712545, "grad_norm": 0.0, - "learning_rate": 9.897984103991513e-06, - "loss": 0.8259, + "learning_rate": 9.921070942765556e-06, + "loss": 0.9523, "step": 18260 }, { - "epoch": 0.5181895573212258, + "epoch": 0.5174700331548074, "grad_norm": 0.0, - "learning_rate": 9.897065074181809e-06, - "loss": 1.004, + "learning_rate": 9.920153182423062e-06, + "loss": 0.8791, "step": 18261 }, { - "epoch": 0.5182179341657208, + "epoch": 0.5174983705970699, "grad_norm": 0.0, - "learning_rate": 9.896146045241598e-06, - "loss": 0.7812, + "learning_rate": 9.919235422753143e-06, + "loss": 0.887, "step": 18262 }, { - "epoch": 0.5182463110102157, + "epoch": 0.5175267080393323, "grad_norm": 0.0, - "learning_rate": 9.895227017178648e-06, - "loss": 0.7353, + "learning_rate": 9.91831766376354e-06, + "loss": 1.0197, "step": 18263 }, { - "epoch": 0.5182746878547105, + "epoch": 0.5175550454815948, "grad_norm": 0.0, - "learning_rate": 9.894307990000722e-06, - "loss": 0.9502, + "learning_rate": 9.917399905461974e-06, + "loss": 0.9069, "step": 18264 }, { - "epoch": 0.5183030646992054, + "epoch": 0.5175833829238573, "grad_norm": 0.0, - "learning_rate": 9.893388963715574e-06, - "loss": 0.8723, + "learning_rate": 9.916482147856184e-06, + "loss": 0.8711, "step": 18265 }, { - "epoch": 0.5183314415437004, + "epoch": 0.5176117203661198, "grad_norm": 0.0, - "learning_rate": 9.89246993833098e-06, - "loss": 0.8621, + "learning_rate": 9.915564390953891e-06, + "loss": 0.9355, "step": 18266 }, { - "epoch": 0.5183598183881952, + "epoch": 0.5176400578083822, "grad_norm": 0.0, - "learning_rate": 9.891550913854696e-06, - "loss": 0.8293, + "learning_rate": 9.91464663476283e-06, + "loss": 0.9176, "step": 18267 }, { - "epoch": 0.5183881952326901, + "epoch": 0.5176683952506447, "grad_norm": 0.0, - "learning_rate": 9.890631890294487e-06, - "loss": 0.8869, + "learning_rate": 9.913728879290736e-06, + "loss": 0.854, "step": 18268 }, { - "epoch": 0.5184165720771851, + "epoch": 0.5176967326929072, "grad_norm": 0.0, - "learning_rate": 9.889712867658117e-06, - "loss": 0.8671, + "learning_rate": 9.912811124545334e-06, + "loss": 0.8706, "step": 18269 }, { - "epoch": 0.5184449489216799, + "epoch": 0.5177250701351696, "grad_norm": 0.0, - "learning_rate": 9.888793845953345e-06, - "loss": 0.8466, + "learning_rate": 9.911893370534354e-06, + "loss": 0.9911, "step": 18270 }, { - "epoch": 0.5184733257661748, + "epoch": 0.517753407577432, "grad_norm": 0.0, - "learning_rate": 9.887874825187937e-06, - "loss": 0.9088, + "learning_rate": 9.910975617265535e-06, + "loss": 0.9021, "step": 18271 }, { - "epoch": 0.5185017026106697, + "epoch": 0.5177817450196945, "grad_norm": 0.0, - "learning_rate": 9.886955805369655e-06, - "loss": 0.9004, + "learning_rate": 9.910057864746596e-06, + "loss": 0.8292, "step": 18272 }, { - "epoch": 0.5185300794551646, + "epoch": 0.517810082461957, "grad_norm": 0.0, - "learning_rate": 9.886036786506262e-06, - "loss": 0.8918, + "learning_rate": 9.909140112985277e-06, + "loss": 0.9001, "step": 18273 }, { - "epoch": 0.5185584562996595, + "epoch": 0.5178384199042194, "grad_norm": 0.0, - "learning_rate": 9.885117768605522e-06, - "loss": 0.87, + "learning_rate": 9.908222361989301e-06, + "loss": 0.9565, "step": 18274 }, { - "epoch": 0.5185868331441543, + "epoch": 0.5178667573464819, "grad_norm": 0.0, - "learning_rate": 9.884198751675198e-06, - "loss": 0.8256, + "learning_rate": 9.907304611766402e-06, + "loss": 0.9058, "step": 18275 }, { - "epoch": 0.5186152099886493, + "epoch": 0.5178950947887444, "grad_norm": 0.0, - "learning_rate": 9.883279735723052e-06, - "loss": 0.9923, + "learning_rate": 9.906386862324313e-06, + "loss": 0.8941, "step": 18276 }, { - "epoch": 0.5186435868331442, + "epoch": 0.5179234322310068, "grad_norm": 0.0, - "learning_rate": 9.882360720756848e-06, - "loss": 0.9164, + "learning_rate": 9.90546911367076e-06, + "loss": 0.9723, "step": 18277 }, { - "epoch": 0.518671963677639, + "epoch": 0.5179517696732693, "grad_norm": 0.0, - "learning_rate": 9.881441706784348e-06, - "loss": 0.8893, + "learning_rate": 9.904551365813475e-06, + "loss": 0.9258, "step": 18278 }, { - "epoch": 0.518700340522134, + "epoch": 0.5179801071155318, "grad_norm": 0.0, - "learning_rate": 9.880522693813313e-06, - "loss": 0.955, + "learning_rate": 9.903633618760195e-06, + "loss": 0.9315, "step": 18279 }, { - "epoch": 0.5187287173666288, + "epoch": 0.5180084445577943, "grad_norm": 0.0, - "learning_rate": 9.879603681851506e-06, - "loss": 0.8679, + "learning_rate": 9.90271587251864e-06, + "loss": 1.0048, "step": 18280 }, { - "epoch": 0.5187570942111237, + "epoch": 0.5180367820000567, "grad_norm": 0.0, - "learning_rate": 9.878684670906697e-06, - "loss": 0.9318, + "learning_rate": 9.901798127096545e-06, + "loss": 0.8736, "step": 18281 }, { - "epoch": 0.5187854710556186, + "epoch": 0.5180651194423191, "grad_norm": 0.0, - "learning_rate": 9.877765660986643e-06, - "loss": 0.9097, + "learning_rate": 9.900880382501641e-06, + "loss": 0.8913, "step": 18282 }, { - "epoch": 0.5188138479001135, + "epoch": 0.5180934568845816, "grad_norm": 0.0, - "learning_rate": 9.876846652099107e-06, - "loss": 0.8721, + "learning_rate": 9.89996263874166e-06, + "loss": 0.8516, "step": 18283 }, { - "epoch": 0.5188422247446084, + "epoch": 0.518121794326844, "grad_norm": 0.0, - "learning_rate": 9.875927644251856e-06, - "loss": 0.9064, + "learning_rate": 9.899044895824332e-06, + "loss": 0.9956, "step": 18284 }, { - "epoch": 0.5188706015891033, + "epoch": 0.5181501317691065, "grad_norm": 0.0, - "learning_rate": 9.875008637452647e-06, - "loss": 0.8762, + "learning_rate": 9.898127153757385e-06, + "loss": 0.8291, "step": 18285 }, { - "epoch": 0.5188989784335982, + "epoch": 0.518178469211369, "grad_norm": 0.0, - "learning_rate": 9.874089631709245e-06, - "loss": 0.7245, + "learning_rate": 9.89720941254855e-06, + "loss": 0.8829, "step": 18286 }, { - "epoch": 0.5189273552780931, + "epoch": 0.5182068066536314, "grad_norm": 0.0, - "learning_rate": 9.873170627029416e-06, - "loss": 0.8515, + "learning_rate": 9.89629167220556e-06, + "loss": 0.8583, "step": 18287 }, { - "epoch": 0.5189557321225879, + "epoch": 0.5182351440958939, "grad_norm": 0.0, - "learning_rate": 9.87225162342092e-06, - "loss": 0.8476, + "learning_rate": 9.89537393273614e-06, + "loss": 0.8708, "step": 18288 }, { - "epoch": 0.5189841089670829, + "epoch": 0.5182634815381564, "grad_norm": 0.0, - "learning_rate": 9.871332620891519e-06, - "loss": 0.7605, + "learning_rate": 9.894456194148028e-06, + "loss": 0.8198, "step": 18289 }, { - "epoch": 0.5190124858115778, + "epoch": 0.5182918189804189, "grad_norm": 0.0, - "learning_rate": 9.870413619448977e-06, - "loss": 0.8935, + "learning_rate": 9.893538456448949e-06, + "loss": 0.8026, "step": 18290 }, { - "epoch": 0.5190408626560726, + "epoch": 0.5183201564226813, "grad_norm": 0.0, - "learning_rate": 9.869494619101058e-06, - "loss": 0.9523, + "learning_rate": 9.892620719646635e-06, + "loss": 0.8286, "step": 18291 }, { - "epoch": 0.5190692395005675, + "epoch": 0.5183484938649437, "grad_norm": 0.0, - "learning_rate": 9.868575619855525e-06, - "loss": 0.8636, + "learning_rate": 9.89170298374882e-06, + "loss": 0.8854, "step": 18292 }, { - "epoch": 0.5190976163450625, + "epoch": 0.5183768313072062, "grad_norm": 0.0, - "learning_rate": 9.867656621720137e-06, - "loss": 0.9355, + "learning_rate": 9.890785248763227e-06, + "loss": 0.8265, "step": 18293 }, { - "epoch": 0.5191259931895573, + "epoch": 0.5184051687494686, "grad_norm": 0.0, - "learning_rate": 9.866737624702661e-06, - "loss": 0.8552, + "learning_rate": 9.889867514697591e-06, + "loss": 0.8863, "step": 18294 }, { - "epoch": 0.5191543700340522, + "epoch": 0.5184335061917311, "grad_norm": 0.0, - "learning_rate": 9.865818628810853e-06, - "loss": 0.7258, + "learning_rate": 9.888949781559642e-06, + "loss": 0.8223, "step": 18295 }, { - "epoch": 0.5191827468785472, + "epoch": 0.5184618436339936, "grad_norm": 0.0, - "learning_rate": 9.864899634052488e-06, - "loss": 0.8062, + "learning_rate": 9.888032049357108e-06, + "loss": 0.865, "step": 18296 }, { - "epoch": 0.519211123723042, + "epoch": 0.5184901810762561, "grad_norm": 0.0, - "learning_rate": 9.863980640435317e-06, - "loss": 0.9081, + "learning_rate": 9.887114318097728e-06, + "loss": 0.8753, "step": 18297 }, { - "epoch": 0.5192395005675369, + "epoch": 0.5185185185185185, "grad_norm": 0.0, - "learning_rate": 9.863061647967113e-06, - "loss": 0.8659, + "learning_rate": 9.886196587789221e-06, + "loss": 0.9015, "step": 18298 }, { - "epoch": 0.5192678774120317, + "epoch": 0.518546855960781, "grad_norm": 0.0, - "learning_rate": 9.862142656655631e-06, - "loss": 0.7947, + "learning_rate": 9.885278858439321e-06, + "loss": 0.91, "step": 18299 }, { - "epoch": 0.5192962542565267, + "epoch": 0.5185751934030435, "grad_norm": 0.0, - "learning_rate": 9.861223666508635e-06, - "loss": 0.9537, + "learning_rate": 9.884361130055766e-06, + "loss": 0.8926, "step": 18300 }, { - "epoch": 0.5193246311010216, + "epoch": 0.5186035308453059, "grad_norm": 0.0, - "learning_rate": 9.860304677533891e-06, - "loss": 0.8788, + "learning_rate": 9.883443402646275e-06, + "loss": 0.871, "step": 18301 }, { - "epoch": 0.5193530079455164, + "epoch": 0.5186318682875684, "grad_norm": 0.0, - "learning_rate": 9.859385689739157e-06, - "loss": 0.7781, + "learning_rate": 9.882525676218586e-06, + "loss": 0.9958, "step": 18302 }, { - "epoch": 0.5193813847900114, + "epoch": 0.5186602057298308, "grad_norm": 0.0, - "learning_rate": 9.8584667031322e-06, - "loss": 0.8626, + "learning_rate": 9.881607950780424e-06, + "loss": 0.9312, "step": 18303 }, { - "epoch": 0.5194097616345063, + "epoch": 0.5186885431720933, "grad_norm": 0.0, - "learning_rate": 9.857547717720783e-06, - "loss": 0.9631, + "learning_rate": 9.880690226339524e-06, + "loss": 0.8245, "step": 18304 }, { - "epoch": 0.5194381384790011, + "epoch": 0.5187168806143557, "grad_norm": 0.0, - "learning_rate": 9.856628733512665e-06, - "loss": 0.9157, + "learning_rate": 9.879772502903617e-06, + "loss": 0.9714, "step": 18305 }, { - "epoch": 0.5194665153234961, + "epoch": 0.5187452180566182, "grad_norm": 0.0, - "learning_rate": 9.855709750515609e-06, - "loss": 0.9664, + "learning_rate": 9.878854780480427e-06, + "loss": 0.9051, "step": 18306 }, { - "epoch": 0.5194948921679909, + "epoch": 0.5187735554988807, "grad_norm": 0.0, - "learning_rate": 9.85479076873738e-06, - "loss": 0.8788, + "learning_rate": 9.877937059077689e-06, + "loss": 0.9661, "step": 18307 }, { - "epoch": 0.5195232690124858, + "epoch": 0.5188018929411431, "grad_norm": 0.0, - "learning_rate": 9.853871788185743e-06, - "loss": 0.7856, + "learning_rate": 9.877019338703133e-06, + "loss": 0.8738, "step": 18308 }, { - "epoch": 0.5195516458569807, + "epoch": 0.5188302303834056, "grad_norm": 0.0, - "learning_rate": 9.852952808868454e-06, - "loss": 0.8804, + "learning_rate": 9.876101619364487e-06, + "loss": 0.8627, "step": 18309 }, { - "epoch": 0.5195800227014756, + "epoch": 0.5188585678256681, "grad_norm": 0.0, - "learning_rate": 9.852033830793281e-06, - "loss": 0.8162, + "learning_rate": 9.875183901069489e-06, + "loss": 0.752, "step": 18310 }, { - "epoch": 0.5196083995459705, + "epoch": 0.5188869052679305, "grad_norm": 0.0, - "learning_rate": 9.85111485396798e-06, - "loss": 0.8958, + "learning_rate": 9.874266183825858e-06, + "loss": 0.9116, "step": 18311 }, { - "epoch": 0.5196367763904653, + "epoch": 0.518915242710193, "grad_norm": 0.0, - "learning_rate": 9.850195878400325e-06, - "loss": 0.7849, + "learning_rate": 9.873348467641329e-06, + "loss": 0.8876, "step": 18312 }, { - "epoch": 0.5196651532349603, + "epoch": 0.5189435801524555, "grad_norm": 0.0, - "learning_rate": 9.84927690409807e-06, - "loss": 0.6904, + "learning_rate": 9.872430752523638e-06, + "loss": 0.8712, "step": 18313 }, { - "epoch": 0.5196935300794552, + "epoch": 0.518971917594718, "grad_norm": 0.0, - "learning_rate": 9.848357931068978e-06, - "loss": 0.9006, + "learning_rate": 9.871513038480506e-06, + "loss": 0.9102, "step": 18314 }, { - "epoch": 0.51972190692395, + "epoch": 0.5190002550369803, "grad_norm": 0.0, - "learning_rate": 9.847438959320816e-06, - "loss": 0.8685, + "learning_rate": 9.870595325519669e-06, + "loss": 1.058, "step": 18315 }, { - "epoch": 0.5197502837684449, + "epoch": 0.5190285924792428, "grad_norm": 0.0, - "learning_rate": 9.846519988861343e-06, - "loss": 0.8557, + "learning_rate": 9.869677613648853e-06, + "loss": 0.8329, "step": 18316 }, { - "epoch": 0.5197786606129399, + "epoch": 0.5190569299215053, "grad_norm": 0.0, - "learning_rate": 9.845601019698323e-06, - "loss": 0.8515, + "learning_rate": 9.868759902875792e-06, + "loss": 0.9184, "step": 18317 }, { - "epoch": 0.5198070374574347, + "epoch": 0.5190852673637677, "grad_norm": 0.0, - "learning_rate": 9.844682051839517e-06, - "loss": 0.8897, + "learning_rate": 9.86784219320822e-06, + "loss": 0.8085, "step": 18318 }, { - "epoch": 0.5198354143019296, + "epoch": 0.5191136048060302, "grad_norm": 0.0, - "learning_rate": 9.843763085292692e-06, - "loss": 0.9355, + "learning_rate": 9.866924484653856e-06, + "loss": 0.811, "step": 18319 }, { - "epoch": 0.5198637911464246, + "epoch": 0.5191419422482927, "grad_norm": 0.0, - "learning_rate": 9.842844120065603e-06, - "loss": 0.7852, + "learning_rate": 9.866006777220437e-06, + "loss": 0.9082, "step": 18320 }, { - "epoch": 0.5198921679909194, + "epoch": 0.5191702796905552, "grad_norm": 0.0, - "learning_rate": 9.84192515616602e-06, - "loss": 0.8533, + "learning_rate": 9.865089070915695e-06, + "loss": 0.8639, "step": 18321 }, { - "epoch": 0.5199205448354143, + "epoch": 0.5191986171328176, "grad_norm": 0.0, - "learning_rate": 9.8410061936017e-06, - "loss": 0.8566, + "learning_rate": 9.864171365747356e-06, + "loss": 0.8989, "step": 18322 }, { - "epoch": 0.5199489216799092, + "epoch": 0.5192269545750801, "grad_norm": 0.0, - "learning_rate": 9.840087232380408e-06, - "loss": 0.8772, + "learning_rate": 9.863253661723157e-06, + "loss": 0.8261, "step": 18323 }, { - "epoch": 0.5199772985244041, + "epoch": 0.5192552920173426, "grad_norm": 0.0, - "learning_rate": 9.839168272509908e-06, - "loss": 0.8175, + "learning_rate": 9.862335958850816e-06, + "loss": 0.8494, "step": 18324 }, { - "epoch": 0.520005675368899, + "epoch": 0.5192836294596049, "grad_norm": 0.0, - "learning_rate": 9.83824931399796e-06, - "loss": 1.0043, + "learning_rate": 9.861418257138074e-06, + "loss": 0.8574, "step": 18325 }, { - "epoch": 0.5200340522133938, + "epoch": 0.5193119669018674, "grad_norm": 0.0, - "learning_rate": 9.837330356852323e-06, - "loss": 0.8601, + "learning_rate": 9.860500556592661e-06, + "loss": 0.8841, "step": 18326 }, { - "epoch": 0.5200624290578888, + "epoch": 0.5193403043441299, "grad_norm": 0.0, - "learning_rate": 9.836411401080766e-06, - "loss": 0.822, + "learning_rate": 9.859582857222297e-06, + "loss": 0.9839, "step": 18327 }, { - "epoch": 0.5200908059023837, + "epoch": 0.5193686417863924, "grad_norm": 0.0, - "learning_rate": 9.835492446691054e-06, - "loss": 0.9293, + "learning_rate": 9.85866515903472e-06, + "loss": 0.9112, "step": 18328 }, { - "epoch": 0.5201191827468785, + "epoch": 0.5193969792286548, "grad_norm": 0.0, - "learning_rate": 9.834573493690941e-06, - "loss": 0.9427, + "learning_rate": 9.857747462037663e-06, + "loss": 0.8825, "step": 18329 }, { - "epoch": 0.5201475595913735, + "epoch": 0.5194253166709173, "grad_norm": 0.0, - "learning_rate": 9.833654542088192e-06, - "loss": 0.9393, + "learning_rate": 9.856829766238846e-06, + "loss": 0.9276, "step": 18330 }, { - "epoch": 0.5201759364358683, + "epoch": 0.5194536541131798, "grad_norm": 0.0, - "learning_rate": 9.832735591890575e-06, - "loss": 0.8613, + "learning_rate": 9.855912071646012e-06, + "loss": 0.9572, "step": 18331 }, { - "epoch": 0.5202043132803632, + "epoch": 0.5194819915554422, "grad_norm": 0.0, - "learning_rate": 9.831816643105845e-06, - "loss": 0.7927, + "learning_rate": 9.85499437826688e-06, + "loss": 0.8811, "step": 18332 }, { - "epoch": 0.5202326901248581, + "epoch": 0.5195103289977047, "grad_norm": 0.0, - "learning_rate": 9.83089769574177e-06, - "loss": 0.9006, + "learning_rate": 9.854076686109183e-06, + "loss": 0.8164, "step": 18333 }, { - "epoch": 0.520261066969353, + "epoch": 0.5195386664399672, "grad_norm": 0.0, - "learning_rate": 9.829978749806105e-06, - "loss": 0.8491, + "learning_rate": 9.853158995180656e-06, + "loss": 0.8561, "step": 18334 }, { - "epoch": 0.5202894438138479, + "epoch": 0.5195670038822295, "grad_norm": 0.0, - "learning_rate": 9.82905980530662e-06, - "loss": 0.8417, + "learning_rate": 9.852241305489021e-06, + "loss": 0.9391, "step": 18335 }, { - "epoch": 0.5203178206583428, + "epoch": 0.519595341324492, "grad_norm": 0.0, - "learning_rate": 9.828140862251076e-06, - "loss": 0.8352, + "learning_rate": 9.851323617042012e-06, + "loss": 0.9166, "step": 18336 }, { - "epoch": 0.5203461975028377, + "epoch": 0.5196236787667545, "grad_norm": 0.0, - "learning_rate": 9.827221920647231e-06, - "loss": 0.8362, + "learning_rate": 9.850405929847367e-06, + "loss": 0.8912, "step": 18337 }, { - "epoch": 0.5203745743473326, + "epoch": 0.519652016209017, "grad_norm": 0.0, - "learning_rate": 9.826302980502853e-06, - "loss": 0.8983, + "learning_rate": 9.849488243912802e-06, + "loss": 0.9764, "step": 18338 }, { - "epoch": 0.5204029511918274, + "epoch": 0.5196803536512794, "grad_norm": 0.0, - "learning_rate": 9.8253840418257e-06, - "loss": 0.8284, + "learning_rate": 9.848570559246055e-06, + "loss": 0.9753, "step": 18339 }, { - "epoch": 0.5204313280363224, + "epoch": 0.5197086910935419, "grad_norm": 0.0, - "learning_rate": 9.824465104623534e-06, - "loss": 0.8787, + "learning_rate": 9.847652875854855e-06, + "loss": 0.8827, "step": 18340 }, { - "epoch": 0.5204597048808173, + "epoch": 0.5197370285358044, "grad_norm": 0.0, - "learning_rate": 9.823546168904117e-06, - "loss": 0.9191, + "learning_rate": 9.846735193746929e-06, + "loss": 0.8931, "step": 18341 }, { - "epoch": 0.5204880817253121, + "epoch": 0.5197653659780668, "grad_norm": 0.0, - "learning_rate": 9.822627234675218e-06, - "loss": 0.9383, + "learning_rate": 9.845817512930012e-06, + "loss": 0.8974, "step": 18342 }, { - "epoch": 0.520516458569807, + "epoch": 0.5197937034203293, "grad_norm": 0.0, - "learning_rate": 9.821708301944596e-06, - "loss": 0.9239, + "learning_rate": 9.84489983341183e-06, + "loss": 0.9174, "step": 18343 }, { - "epoch": 0.520544835414302, + "epoch": 0.5198220408625918, "grad_norm": 0.0, - "learning_rate": 9.820789370720008e-06, - "loss": 0.9498, + "learning_rate": 9.843982155200117e-06, + "loss": 0.9254, "step": 18344 }, { - "epoch": 0.5205732122587968, + "epoch": 0.5198503783048543, "grad_norm": 0.0, - "learning_rate": 9.819870441009222e-06, - "loss": 0.9867, + "learning_rate": 9.843064478302596e-06, + "loss": 0.9695, "step": 18345 }, { - "epoch": 0.5206015891032917, + "epoch": 0.5198787157471166, "grad_norm": 0.0, - "learning_rate": 9.81895151282e-06, - "loss": 0.9637, + "learning_rate": 9.842146802727001e-06, + "loss": 0.8582, "step": 18346 }, { - "epoch": 0.5206299659477867, + "epoch": 0.5199070531893791, "grad_norm": 0.0, - "learning_rate": 9.8180325861601e-06, - "loss": 0.7528, + "learning_rate": 9.841229128481065e-06, + "loss": 0.8543, "step": 18347 }, { - "epoch": 0.5206583427922815, + "epoch": 0.5199353906316416, "grad_norm": 0.0, - "learning_rate": 9.81711366103729e-06, - "loss": 0.8036, + "learning_rate": 9.840311455572515e-06, + "loss": 0.9283, "step": 18348 }, { - "epoch": 0.5206867196367764, + "epoch": 0.519963728073904, "grad_norm": 0.0, - "learning_rate": 9.816194737459328e-06, - "loss": 0.9606, + "learning_rate": 9.839393784009078e-06, + "loss": 0.8756, "step": 18349 }, { - "epoch": 0.5207150964812712, + "epoch": 0.5199920655161665, "grad_norm": 0.0, - "learning_rate": 9.815275815433976e-06, - "loss": 0.9382, + "learning_rate": 9.838476113798492e-06, + "loss": 0.7839, "step": 18350 }, { - "epoch": 0.5207434733257662, + "epoch": 0.520020402958429, "grad_norm": 0.0, - "learning_rate": 9.814356894968998e-06, - "loss": 0.9367, + "learning_rate": 9.837558444948478e-06, + "loss": 0.7851, "step": 18351 }, { - "epoch": 0.5207718501702611, + "epoch": 0.5200487404006914, "grad_norm": 0.0, - "learning_rate": 9.813437976072158e-06, - "loss": 0.8216, + "learning_rate": 9.836640777466771e-06, + "loss": 0.8572, "step": 18352 }, { - "epoch": 0.5208002270147559, + "epoch": 0.5200770778429539, "grad_norm": 0.0, - "learning_rate": 9.812519058751211e-06, - "loss": 0.8745, + "learning_rate": 9.835723111361096e-06, + "loss": 0.8802, "step": 18353 }, { - "epoch": 0.5208286038592509, + "epoch": 0.5201054152852164, "grad_norm": 0.0, - "learning_rate": 9.811600143013928e-06, - "loss": 0.8716, + "learning_rate": 9.834805446639187e-06, + "loss": 0.8477, "step": 18354 }, { - "epoch": 0.5208569807037458, + "epoch": 0.5201337527274789, "grad_norm": 0.0, - "learning_rate": 9.810681228868067e-06, - "loss": 0.9013, + "learning_rate": 9.833887783308778e-06, + "loss": 0.9015, "step": 18355 }, { - "epoch": 0.5208853575482406, + "epoch": 0.5201620901697412, "grad_norm": 0.0, - "learning_rate": 9.809762316321388e-06, - "loss": 0.8627, + "learning_rate": 9.83297012137759e-06, + "loss": 0.8438, "step": 18356 }, { - "epoch": 0.5209137343927355, + "epoch": 0.5201904276120037, "grad_norm": 0.0, - "learning_rate": 9.808843405381653e-06, - "loss": 0.7993, + "learning_rate": 9.832052460853356e-06, + "loss": 0.9023, "step": 18357 }, { - "epoch": 0.5209421112372304, + "epoch": 0.5202187650542662, "grad_norm": 0.0, - "learning_rate": 9.80792449605663e-06, - "loss": 0.8837, + "learning_rate": 9.83113480174381e-06, + "loss": 0.8376, "step": 18358 }, { - "epoch": 0.5209704880817253, + "epoch": 0.5202471024965286, "grad_norm": 0.0, - "learning_rate": 9.807005588354077e-06, - "loss": 0.8181, + "learning_rate": 9.830217144056675e-06, + "loss": 0.8712, "step": 18359 }, { - "epoch": 0.5209988649262202, + "epoch": 0.5202754399387911, "grad_norm": 0.0, - "learning_rate": 9.806086682281759e-06, - "loss": 0.847, + "learning_rate": 9.829299487799686e-06, + "loss": 0.7423, "step": 18360 }, { - "epoch": 0.5210272417707151, + "epoch": 0.5203037773810536, "grad_norm": 0.0, - "learning_rate": 9.80516777784743e-06, - "loss": 0.8707, + "learning_rate": 9.82838183298057e-06, + "loss": 0.7809, "step": 18361 }, { - "epoch": 0.52105561861521, + "epoch": 0.5203321148233161, "grad_norm": 0.0, - "learning_rate": 9.804248875058862e-06, - "loss": 0.8962, + "learning_rate": 9.827464179607055e-06, + "loss": 0.8202, "step": 18362 }, { - "epoch": 0.5210839954597049, + "epoch": 0.5203604522655785, "grad_norm": 0.0, - "learning_rate": 9.803329973923811e-06, - "loss": 0.8588, + "learning_rate": 9.826546527686878e-06, + "loss": 0.9244, "step": 18363 }, { - "epoch": 0.5211123723041998, + "epoch": 0.520388789707841, "grad_norm": 0.0, - "learning_rate": 9.80241107445004e-06, - "loss": 0.8588, + "learning_rate": 9.825628877227762e-06, + "loss": 0.8063, "step": 18364 }, { - "epoch": 0.5211407491486947, + "epoch": 0.5204171271501035, "grad_norm": 0.0, - "learning_rate": 9.801492176645313e-06, - "loss": 0.9223, + "learning_rate": 9.824711228237437e-06, + "loss": 0.9116, "step": 18365 }, { - "epoch": 0.5211691259931895, + "epoch": 0.5204454645923658, "grad_norm": 0.0, - "learning_rate": 9.80057328051739e-06, - "loss": 0.8344, + "learning_rate": 9.823793580723637e-06, + "loss": 0.8627, "step": 18366 }, { - "epoch": 0.5211975028376844, + "epoch": 0.5204738020346283, "grad_norm": 0.0, - "learning_rate": 9.799654386074032e-06, - "loss": 0.9504, + "learning_rate": 9.822875934694086e-06, + "loss": 0.8401, "step": 18367 }, { - "epoch": 0.5212258796821794, + "epoch": 0.5205021394768908, "grad_norm": 0.0, - "learning_rate": 9.798735493323004e-06, - "loss": 0.8609, + "learning_rate": 9.821958290156522e-06, + "loss": 0.8698, "step": 18368 }, { - "epoch": 0.5212542565266742, + "epoch": 0.5205304769191533, "grad_norm": 0.0, - "learning_rate": 9.797816602272067e-06, - "loss": 0.9586, + "learning_rate": 9.821040647118666e-06, + "loss": 0.8873, "step": 18369 }, { - "epoch": 0.5212826333711691, + "epoch": 0.5205588143614157, "grad_norm": 0.0, - "learning_rate": 9.79689771292898e-06, - "loss": 0.9474, + "learning_rate": 9.82012300558825e-06, + "loss": 0.7708, "step": 18370 }, { - "epoch": 0.5213110102156641, + "epoch": 0.5205871518036782, "grad_norm": 0.0, - "learning_rate": 9.79597882530151e-06, - "loss": 0.9009, + "learning_rate": 9.819205365573009e-06, + "loss": 0.995, "step": 18371 }, { - "epoch": 0.5213393870601589, + "epoch": 0.5206154892459407, "grad_norm": 0.0, - "learning_rate": 9.79505993939741e-06, - "loss": 0.8562, + "learning_rate": 9.818287727080663e-06, + "loss": 0.8961, "step": 18372 }, { - "epoch": 0.5213677639046538, + "epoch": 0.5206438266882031, "grad_norm": 0.0, - "learning_rate": 9.794141055224451e-06, - "loss": 0.9456, + "learning_rate": 9.81737009011895e-06, + "loss": 0.9628, "step": 18373 }, { - "epoch": 0.5213961407491486, + "epoch": 0.5206721641304656, "grad_norm": 0.0, - "learning_rate": 9.793222172790396e-06, - "loss": 0.7801, + "learning_rate": 9.816452454695596e-06, + "loss": 0.9417, "step": 18374 }, { - "epoch": 0.5214245175936436, + "epoch": 0.5207005015727281, "grad_norm": 0.0, - "learning_rate": 9.792303292102997e-06, - "loss": 0.816, + "learning_rate": 9.815534820818329e-06, + "loss": 0.8824, "step": 18375 }, { - "epoch": 0.5214528944381385, + "epoch": 0.5207288390149905, "grad_norm": 0.0, - "learning_rate": 9.791384413170026e-06, - "loss": 0.7755, + "learning_rate": 9.814617188494886e-06, + "loss": 0.9837, "step": 18376 }, { - "epoch": 0.5214812712826333, + "epoch": 0.520757176457253, "grad_norm": 0.0, - "learning_rate": 9.790465535999239e-06, - "loss": 0.9108, + "learning_rate": 9.813699557732986e-06, + "loss": 0.8647, "step": 18377 }, { - "epoch": 0.5215096481271283, + "epoch": 0.5207855138995154, "grad_norm": 0.0, - "learning_rate": 9.789546660598396e-06, - "loss": 0.9195, + "learning_rate": 9.812781928540365e-06, + "loss": 0.8599, "step": 18378 }, { - "epoch": 0.5215380249716232, + "epoch": 0.5208138513417779, "grad_norm": 0.0, - "learning_rate": 9.788627786975264e-06, - "loss": 0.9489, + "learning_rate": 9.811864300924753e-06, + "loss": 0.9096, "step": 18379 }, { - "epoch": 0.521566401816118, + "epoch": 0.5208421887840403, "grad_norm": 0.0, - "learning_rate": 9.787708915137604e-06, - "loss": 0.8878, + "learning_rate": 9.810946674893876e-06, + "loss": 0.9073, "step": 18380 }, { - "epoch": 0.521594778660613, + "epoch": 0.5208705262263028, "grad_norm": 0.0, - "learning_rate": 9.786790045093175e-06, - "loss": 0.9722, + "learning_rate": 9.81002905045547e-06, + "loss": 0.8666, "step": 18381 }, { - "epoch": 0.5216231555051078, + "epoch": 0.5208988636685653, "grad_norm": 0.0, - "learning_rate": 9.785871176849739e-06, - "loss": 0.9323, + "learning_rate": 9.809111427617254e-06, + "loss": 0.887, "step": 18382 }, { - "epoch": 0.5216515323496027, + "epoch": 0.5209272011108277, "grad_norm": 0.0, - "learning_rate": 9.784952310415062e-06, - "loss": 0.8795, + "learning_rate": 9.808193806386965e-06, + "loss": 0.9882, "step": 18383 }, { - "epoch": 0.5216799091940976, + "epoch": 0.5209555385530902, "grad_norm": 0.0, - "learning_rate": 9.7840334457969e-06, - "loss": 0.9181, + "learning_rate": 9.807276186772335e-06, + "loss": 0.8747, "step": 18384 }, { - "epoch": 0.5217082860385925, + "epoch": 0.5209838759953527, "grad_norm": 0.0, - "learning_rate": 9.783114583003018e-06, - "loss": 0.9913, + "learning_rate": 9.806358568781084e-06, + "loss": 0.7921, "step": 18385 }, { - "epoch": 0.5217366628830874, + "epoch": 0.5210122134376152, "grad_norm": 0.0, - "learning_rate": 9.782195722041176e-06, - "loss": 0.9266, + "learning_rate": 9.805440952420946e-06, + "loss": 0.7456, "step": 18386 }, { - "epoch": 0.5217650397275823, + "epoch": 0.5210405508798776, "grad_norm": 0.0, - "learning_rate": 9.781276862919133e-06, - "loss": 0.9327, + "learning_rate": 9.804523337699654e-06, + "loss": 1.0082, "step": 18387 }, { - "epoch": 0.5217934165720772, + "epoch": 0.52106888832214, "grad_norm": 0.0, - "learning_rate": 9.780358005644657e-06, - "loss": 0.8894, + "learning_rate": 9.803605724624932e-06, + "loss": 0.9795, "step": 18388 }, { - "epoch": 0.5218217934165721, + "epoch": 0.5210972257644025, "grad_norm": 0.0, - "learning_rate": 9.77943915022551e-06, - "loss": 0.807, + "learning_rate": 9.802688113204518e-06, + "loss": 0.8102, "step": 18389 }, { - "epoch": 0.521850170261067, + "epoch": 0.5211255632066649, "grad_norm": 0.0, - "learning_rate": 9.778520296669449e-06, - "loss": 0.8576, + "learning_rate": 9.801770503446129e-06, + "loss": 0.8647, "step": 18390 }, { - "epoch": 0.5218785471055618, + "epoch": 0.5211539006489274, "grad_norm": 0.0, - "learning_rate": 9.777601444984234e-06, - "loss": 0.8094, + "learning_rate": 9.800852895357502e-06, + "loss": 0.8478, "step": 18391 }, { - "epoch": 0.5219069239500568, + "epoch": 0.5211822380911899, "grad_norm": 0.0, - "learning_rate": 9.776682595177633e-06, - "loss": 0.8884, + "learning_rate": 9.799935288946365e-06, + "loss": 0.811, "step": 18392 }, { - "epoch": 0.5219353007945516, + "epoch": 0.5212105755334524, "grad_norm": 0.0, - "learning_rate": 9.775763747257404e-06, - "loss": 0.7649, + "learning_rate": 9.799017684220449e-06, + "loss": 0.822, "step": 18393 }, { - "epoch": 0.5219636776390465, + "epoch": 0.5212389129757148, "grad_norm": 0.0, - "learning_rate": 9.774844901231306e-06, - "loss": 0.8853, + "learning_rate": 9.79810008118748e-06, + "loss": 1.0204, "step": 18394 }, { - "epoch": 0.5219920544835415, + "epoch": 0.5212672504179773, "grad_norm": 0.0, - "learning_rate": 9.773926057107106e-06, - "loss": 0.9014, + "learning_rate": 9.797182479855192e-06, + "loss": 0.8682, "step": 18395 }, { - "epoch": 0.5220204313280363, + "epoch": 0.5212955878602398, "grad_norm": 0.0, - "learning_rate": 9.773007214892562e-06, - "loss": 0.8902, + "learning_rate": 9.796264880231307e-06, + "loss": 0.9262, "step": 18396 }, { - "epoch": 0.5220488081725312, + "epoch": 0.5213239253025022, "grad_norm": 0.0, - "learning_rate": 9.772088374595435e-06, - "loss": 0.7845, + "learning_rate": 9.795347282323563e-06, + "loss": 0.7912, "step": 18397 }, { - "epoch": 0.5220771850170262, + "epoch": 0.5213522627447646, "grad_norm": 0.0, - "learning_rate": 9.77116953622349e-06, - "loss": 0.9053, + "learning_rate": 9.794429686139683e-06, + "loss": 1.007, "step": 18398 }, { - "epoch": 0.522105561861521, + "epoch": 0.5213806001870271, "grad_norm": 0.0, - "learning_rate": 9.770250699784486e-06, - "loss": 0.8398, + "learning_rate": 9.793512091687396e-06, + "loss": 0.8981, "step": 18399 }, { - "epoch": 0.5221339387060159, + "epoch": 0.5214089376292895, "grad_norm": 0.0, - "learning_rate": 9.769331865286185e-06, - "loss": 0.9464, + "learning_rate": 9.792594498974436e-06, + "loss": 0.9372, "step": 18400 }, { - "epoch": 0.5221623155505107, + "epoch": 0.521437275071552, "grad_norm": 0.0, - "learning_rate": 9.768413032736345e-06, - "loss": 0.899, + "learning_rate": 9.791676908008526e-06, + "loss": 0.9301, "step": 18401 }, { - "epoch": 0.5221906923950057, + "epoch": 0.5214656125138145, "grad_norm": 0.0, - "learning_rate": 9.767494202142734e-06, - "loss": 0.881, + "learning_rate": 9.790759318797405e-06, + "loss": 0.9211, "step": 18402 }, { - "epoch": 0.5222190692395006, + "epoch": 0.521493949956077, "grad_norm": 0.0, - "learning_rate": 9.766575373513102e-06, - "loss": 0.8183, + "learning_rate": 9.78984173134879e-06, + "loss": 0.7759, "step": 18403 }, { - "epoch": 0.5222474460839954, + "epoch": 0.5215222873983394, "grad_norm": 0.0, - "learning_rate": 9.765656546855226e-06, - "loss": 0.9882, + "learning_rate": 9.788924145670418e-06, + "loss": 0.8738, "step": 18404 }, { - "epoch": 0.5222758229284904, + "epoch": 0.5215506248406019, "grad_norm": 0.0, - "learning_rate": 9.764737722176858e-06, - "loss": 0.8921, + "learning_rate": 9.788006561770018e-06, + "loss": 0.9268, "step": 18405 }, { - "epoch": 0.5223041997729853, + "epoch": 0.5215789622828644, "grad_norm": 0.0, - "learning_rate": 9.763818899485761e-06, - "loss": 0.8555, + "learning_rate": 9.787088979655314e-06, + "loss": 0.934, "step": 18406 }, { - "epoch": 0.5223325766174801, + "epoch": 0.5216072997251268, "grad_norm": 0.0, - "learning_rate": 9.762900078789698e-06, - "loss": 0.8608, + "learning_rate": 9.786171399334039e-06, + "loss": 0.8706, "step": 18407 }, { - "epoch": 0.522360953461975, + "epoch": 0.5216356371673893, "grad_norm": 0.0, - "learning_rate": 9.761981260096425e-06, - "loss": 0.9078, + "learning_rate": 9.785253820813927e-06, + "loss": 0.7573, "step": 18408 }, { - "epoch": 0.52238933030647, + "epoch": 0.5216639746096517, "grad_norm": 0.0, - "learning_rate": 9.761062443413711e-06, - "loss": 0.8233, + "learning_rate": 9.784336244102697e-06, + "loss": 0.9558, "step": 18409 }, { - "epoch": 0.5224177071509648, + "epoch": 0.5216923120519142, "grad_norm": 0.0, - "learning_rate": 9.760143628749312e-06, - "loss": 0.8926, + "learning_rate": 9.783418669208086e-06, + "loss": 0.9018, "step": 18410 }, { - "epoch": 0.5224460839954597, + "epoch": 0.5217206494941766, "grad_norm": 0.0, - "learning_rate": 9.75922481611099e-06, - "loss": 0.87, + "learning_rate": 9.782501096137817e-06, + "loss": 0.9489, "step": 18411 }, { - "epoch": 0.5224744608399546, + "epoch": 0.5217489869364391, "grad_norm": 0.0, - "learning_rate": 9.758306005506508e-06, - "loss": 0.9109, + "learning_rate": 9.781583524899622e-06, + "loss": 0.8942, "step": 18412 }, { - "epoch": 0.5225028376844495, + "epoch": 0.5217773243787016, "grad_norm": 0.0, - "learning_rate": 9.757387196943626e-06, - "loss": 0.7986, + "learning_rate": 9.780665955501232e-06, + "loss": 0.9099, "step": 18413 }, { - "epoch": 0.5225312145289444, + "epoch": 0.521805661820964, "grad_norm": 0.0, - "learning_rate": 9.756468390430101e-06, - "loss": 0.8987, + "learning_rate": 9.779748387950372e-06, + "loss": 0.9468, "step": 18414 }, { - "epoch": 0.5225595913734393, + "epoch": 0.5218339992632265, "grad_norm": 0.0, - "learning_rate": 9.755549585973704e-06, - "loss": 0.9217, + "learning_rate": 9.778830822254773e-06, + "loss": 0.9881, "step": 18415 }, { - "epoch": 0.5225879682179342, + "epoch": 0.521862336705489, "grad_norm": 0.0, - "learning_rate": 9.754630783582189e-06, - "loss": 0.9472, + "learning_rate": 9.777913258422168e-06, + "loss": 0.8478, "step": 18416 }, { - "epoch": 0.522616345062429, + "epoch": 0.5218906741477515, "grad_norm": 0.0, - "learning_rate": 9.753711983263316e-06, - "loss": 0.7732, + "learning_rate": 9.776995696460279e-06, + "loss": 0.9325, "step": 18417 }, { - "epoch": 0.5226447219069239, + "epoch": 0.5219190115900139, "grad_norm": 0.0, - "learning_rate": 9.75279318502485e-06, - "loss": 0.9807, + "learning_rate": 9.77607813637684e-06, + "loss": 0.8265, "step": 18418 }, { - "epoch": 0.5226730987514189, + "epoch": 0.5219473490322764, "grad_norm": 0.0, - "learning_rate": 9.75187438887455e-06, - "loss": 0.7318, + "learning_rate": 9.775160578179575e-06, + "loss": 0.9169, "step": 18419 }, { - "epoch": 0.5227014755959137, + "epoch": 0.5219756864745388, "grad_norm": 0.0, - "learning_rate": 9.750955594820182e-06, - "loss": 0.9137, + "learning_rate": 9.774243021876216e-06, + "loss": 0.8239, "step": 18420 }, { - "epoch": 0.5227298524404086, + "epoch": 0.5220040239168012, "grad_norm": 0.0, - "learning_rate": 9.750036802869503e-06, - "loss": 0.9619, + "learning_rate": 9.773325467474497e-06, + "loss": 0.8537, "step": 18421 }, { - "epoch": 0.5227582292849036, + "epoch": 0.5220323613590637, "grad_norm": 0.0, - "learning_rate": 9.74911801303027e-06, - "loss": 0.8776, + "learning_rate": 9.772407914982136e-06, + "loss": 0.9013, "step": 18422 }, { - "epoch": 0.5227866061293984, + "epoch": 0.5220606988013262, "grad_norm": 0.0, - "learning_rate": 9.748199225310254e-06, - "loss": 0.9589, + "learning_rate": 9.77149036440687e-06, + "loss": 0.928, "step": 18423 }, { - "epoch": 0.5228149829738933, + "epoch": 0.5220890362435886, "grad_norm": 0.0, - "learning_rate": 9.747280439717208e-06, - "loss": 0.9189, + "learning_rate": 9.770572815756428e-06, + "loss": 0.9141, "step": 18424 }, { - "epoch": 0.5228433598183881, + "epoch": 0.5221173736858511, "grad_norm": 0.0, - "learning_rate": 9.746361656258893e-06, - "loss": 0.9008, + "learning_rate": 9.76965526903853e-06, + "loss": 0.9977, "step": 18425 }, { - "epoch": 0.5228717366628831, + "epoch": 0.5221457111281136, "grad_norm": 0.0, - "learning_rate": 9.745442874943076e-06, - "loss": 0.9414, + "learning_rate": 9.768737724260919e-06, + "loss": 0.9297, "step": 18426 }, { - "epoch": 0.522900113507378, + "epoch": 0.5221740485703761, "grad_norm": 0.0, - "learning_rate": 9.744524095777514e-06, - "loss": 0.7492, + "learning_rate": 9.76782018143131e-06, + "loss": 0.9052, "step": 18427 }, { - "epoch": 0.5229284903518728, + "epoch": 0.5222023860126385, "grad_norm": 0.0, - "learning_rate": 9.743605318769967e-06, - "loss": 0.925, + "learning_rate": 9.766902640557438e-06, + "loss": 0.8534, "step": 18428 }, { - "epoch": 0.5229568671963678, + "epoch": 0.522230723454901, "grad_norm": 0.0, - "learning_rate": 9.7426865439282e-06, - "loss": 0.8651, + "learning_rate": 9.765985101647037e-06, + "loss": 0.9196, "step": 18429 }, { - "epoch": 0.5229852440408627, + "epoch": 0.5222590608971635, "grad_norm": 0.0, - "learning_rate": 9.74176777125997e-06, + "learning_rate": 9.765067564707825e-06, "loss": 0.8659, "step": 18430 }, { - "epoch": 0.5230136208853575, + "epoch": 0.5222873983394258, "grad_norm": 0.0, - "learning_rate": 9.740849000773037e-06, - "loss": 0.9188, + "learning_rate": 9.764150029747538e-06, + "loss": 0.9008, "step": 18431 }, { - "epoch": 0.5230419977298525, + "epoch": 0.5223157357816883, "grad_norm": 0.0, - "learning_rate": 9.739930232475167e-06, - "loss": 0.802, + "learning_rate": 9.763232496773903e-06, + "loss": 0.8922, "step": 18432 }, { - "epoch": 0.5230703745743474, + "epoch": 0.5223440732239508, "grad_norm": 0.0, - "learning_rate": 9.739011466374113e-06, - "loss": 0.7975, + "learning_rate": 9.762314965794645e-06, + "loss": 0.8545, "step": 18433 }, { - "epoch": 0.5230987514188422, + "epoch": 0.5223724106662133, "grad_norm": 0.0, - "learning_rate": 9.738092702477646e-06, - "loss": 0.7908, + "learning_rate": 9.761397436817504e-06, + "loss": 0.8367, "step": 18434 }, { - "epoch": 0.5231271282633371, + "epoch": 0.5224007481084757, "grad_norm": 0.0, - "learning_rate": 9.737173940793518e-06, - "loss": 0.9357, + "learning_rate": 9.760479909850196e-06, + "loss": 0.8948, "step": 18435 }, { - "epoch": 0.523155505107832, + "epoch": 0.5224290855507382, "grad_norm": 0.0, - "learning_rate": 9.736255181329499e-06, - "loss": 0.836, + "learning_rate": 9.759562384900453e-06, + "loss": 1.0221, "step": 18436 }, { - "epoch": 0.5231838819523269, + "epoch": 0.5224574229930007, "grad_norm": 0.0, - "learning_rate": 9.735336424093342e-06, - "loss": 0.8747, + "learning_rate": 9.758644861976012e-06, + "loss": 0.8836, "step": 18437 }, { - "epoch": 0.5232122587968218, + "epoch": 0.5224857604352631, "grad_norm": 0.0, - "learning_rate": 9.734417669092807e-06, - "loss": 0.9264, + "learning_rate": 9.757727341084588e-06, + "loss": 0.9335, "step": 18438 }, { - "epoch": 0.5232406356413167, + "epoch": 0.5225140978775256, "grad_norm": 0.0, - "learning_rate": 9.733498916335662e-06, - "loss": 0.8411, + "learning_rate": 9.75680982223392e-06, + "loss": 0.8962, "step": 18439 }, { - "epoch": 0.5232690124858116, + "epoch": 0.5225424353197881, "grad_norm": 0.0, - "learning_rate": 9.732580165829663e-06, - "loss": 0.9378, + "learning_rate": 9.755892305431733e-06, + "loss": 0.9178, "step": 18440 }, { - "epoch": 0.5232973893303065, + "epoch": 0.5225707727620506, "grad_norm": 0.0, - "learning_rate": 9.731661417582571e-06, - "loss": 0.7687, + "learning_rate": 9.754974790685754e-06, + "loss": 0.8474, "step": 18441 }, { - "epoch": 0.5233257661748013, + "epoch": 0.5225991102043129, "grad_norm": 0.0, - "learning_rate": 9.730742671602144e-06, - "loss": 0.7922, + "learning_rate": 9.754057278003717e-06, + "loss": 0.8879, "step": 18442 }, { - "epoch": 0.5233541430192963, + "epoch": 0.5226274476465754, "grad_norm": 0.0, - "learning_rate": 9.72982392789615e-06, - "loss": 0.7329, + "learning_rate": 9.753139767393342e-06, + "loss": 0.9783, "step": 18443 }, { - "epoch": 0.5233825198637911, + "epoch": 0.5226557850888379, "grad_norm": 0.0, - "learning_rate": 9.728905186472345e-06, - "loss": 0.8499, + "learning_rate": 9.752222258862364e-06, + "loss": 0.8522, "step": 18444 }, { - "epoch": 0.523410896708286, + "epoch": 0.5226841225311003, "grad_norm": 0.0, - "learning_rate": 9.727986447338487e-06, - "loss": 0.9369, + "learning_rate": 9.751304752418512e-06, + "loss": 0.8349, "step": 18445 }, { - "epoch": 0.523439273552781, + "epoch": 0.5227124599733628, "grad_norm": 0.0, - "learning_rate": 9.727067710502341e-06, - "loss": 0.9478, + "learning_rate": 9.75038724806951e-06, + "loss": 0.8217, "step": 18446 }, { - "epoch": 0.5234676503972758, + "epoch": 0.5227407974156253, "grad_norm": 0.0, - "learning_rate": 9.726148975971668e-06, - "loss": 0.9974, + "learning_rate": 9.749469745823092e-06, + "loss": 0.7465, "step": 18447 }, { - "epoch": 0.5234960272417707, + "epoch": 0.5227691348578877, "grad_norm": 0.0, - "learning_rate": 9.725230243754224e-06, - "loss": 0.9037, + "learning_rate": 9.74855224568698e-06, + "loss": 0.8414, "step": 18448 }, { - "epoch": 0.5235244040862657, + "epoch": 0.5227974723001502, "grad_norm": 0.0, - "learning_rate": 9.724311513857772e-06, - "loss": 0.9457, + "learning_rate": 9.747634747668906e-06, + "loss": 0.8835, "step": 18449 }, { - "epoch": 0.5235527809307605, + "epoch": 0.5228258097424127, "grad_norm": 0.0, - "learning_rate": 9.723392786290076e-06, - "loss": 0.8402, + "learning_rate": 9.746717251776602e-06, + "loss": 0.9307, "step": 18450 }, { - "epoch": 0.5235811577752554, + "epoch": 0.5228541471846752, "grad_norm": 0.0, - "learning_rate": 9.722474061058893e-06, - "loss": 0.932, + "learning_rate": 9.74579975801779e-06, + "loss": 0.8292, "step": 18451 }, { - "epoch": 0.5236095346197502, + "epoch": 0.5228824846269375, "grad_norm": 0.0, - "learning_rate": 9.721555338171982e-06, - "loss": 1.0179, + "learning_rate": 9.7448822664002e-06, + "loss": 0.9458, "step": 18452 }, { - "epoch": 0.5236379114642452, + "epoch": 0.5229108220692, "grad_norm": 0.0, - "learning_rate": 9.720636617637108e-06, - "loss": 0.8917, + "learning_rate": 9.743964776931562e-06, + "loss": 0.856, "step": 18453 }, { - "epoch": 0.5236662883087401, + "epoch": 0.5229391595114625, "grad_norm": 0.0, - "learning_rate": 9.71971789946203e-06, - "loss": 1.0358, + "learning_rate": 9.743047289619604e-06, + "loss": 0.8695, "step": 18454 }, { - "epoch": 0.5236946651532349, + "epoch": 0.5229674969537249, "grad_norm": 0.0, - "learning_rate": 9.718799183654505e-06, - "loss": 0.7701, + "learning_rate": 9.742129804472056e-06, + "loss": 0.9664, "step": 18455 }, { - "epoch": 0.5237230419977299, + "epoch": 0.5229958343959874, "grad_norm": 0.0, - "learning_rate": 9.717880470222298e-06, - "loss": 0.9081, + "learning_rate": 9.741212321496643e-06, + "loss": 0.8926, "step": 18456 }, { - "epoch": 0.5237514188422248, + "epoch": 0.5230241718382499, "grad_norm": 0.0, - "learning_rate": 9.716961759173166e-06, - "loss": 0.9325, + "learning_rate": 9.740294840701094e-06, + "loss": 0.8427, "step": 18457 }, { - "epoch": 0.5237797956867196, + "epoch": 0.5230525092805124, "grad_norm": 0.0, - "learning_rate": 9.716043050514869e-06, - "loss": 0.9191, + "learning_rate": 9.739377362093138e-06, + "loss": 0.882, "step": 18458 }, { - "epoch": 0.5238081725312145, + "epoch": 0.5230808467227748, "grad_norm": 0.0, - "learning_rate": 9.715124344255173e-06, - "loss": 0.9516, + "learning_rate": 9.738459885680502e-06, + "loss": 0.7879, "step": 18459 }, { - "epoch": 0.5238365493757094, + "epoch": 0.5231091841650373, "grad_norm": 0.0, - "learning_rate": 9.714205640401833e-06, - "loss": 0.8904, + "learning_rate": 9.73754241147092e-06, + "loss": 0.8672, "step": 18460 }, { - "epoch": 0.5238649262202043, + "epoch": 0.5231375216072998, "grad_norm": 0.0, - "learning_rate": 9.71328693896261e-06, - "loss": 0.935, + "learning_rate": 9.736624939472112e-06, + "loss": 1.0504, "step": 18461 }, { - "epoch": 0.5238933030646992, + "epoch": 0.5231658590495621, "grad_norm": 0.0, - "learning_rate": 9.712368239945265e-06, - "loss": 1.0248, + "learning_rate": 9.73570746969181e-06, + "loss": 0.9199, "step": 18462 }, { - "epoch": 0.5239216799091941, + "epoch": 0.5231941964918246, "grad_norm": 0.0, - "learning_rate": 9.71144954335756e-06, - "loss": 0.943, + "learning_rate": 9.734790002137743e-06, + "loss": 0.7955, "step": 18463 }, { - "epoch": 0.523950056753689, + "epoch": 0.5232225339340871, "grad_norm": 0.0, - "learning_rate": 9.710530849207249e-06, - "loss": 0.933, + "learning_rate": 9.733872536817637e-06, + "loss": 1.0709, "step": 18464 }, { - "epoch": 0.5239784335981839, + "epoch": 0.5232508713763496, "grad_norm": 0.0, - "learning_rate": 9.709612157502103e-06, - "loss": 0.9818, + "learning_rate": 9.732955073739222e-06, + "loss": 0.9605, "step": 18465 }, { - "epoch": 0.5240068104426788, + "epoch": 0.523279208818612, "grad_norm": 0.0, - "learning_rate": 9.70869346824987e-06, - "loss": 0.9786, + "learning_rate": 9.73203761291023e-06, + "loss": 0.8827, "step": 18466 }, { - "epoch": 0.5240351872871737, + "epoch": 0.5233075462608745, "grad_norm": 0.0, - "learning_rate": 9.70777478145832e-06, - "loss": 0.9313, + "learning_rate": 9.73112015433838e-06, + "loss": 0.8711, "step": 18467 }, { - "epoch": 0.5240635641316685, + "epoch": 0.523335883703137, "grad_norm": 0.0, - "learning_rate": 9.706856097135212e-06, - "loss": 0.9065, + "learning_rate": 9.73020269803141e-06, + "loss": 0.8239, "step": 18468 }, { - "epoch": 0.5240919409761634, + "epoch": 0.5233642211453994, "grad_norm": 0.0, - "learning_rate": 9.7059374152883e-06, - "loss": 0.7469, + "learning_rate": 9.729285243997037e-06, + "loss": 0.8457, "step": 18469 }, { - "epoch": 0.5241203178206584, + "epoch": 0.5233925585876619, "grad_norm": 0.0, - "learning_rate": 9.70501873592535e-06, - "loss": 0.8259, + "learning_rate": 9.728367792242993e-06, + "loss": 0.9075, "step": 18470 }, { - "epoch": 0.5241486946651532, + "epoch": 0.5234208960299244, "grad_norm": 0.0, - "learning_rate": 9.70410005905412e-06, - "loss": 0.7993, + "learning_rate": 9.727450342777015e-06, + "loss": 0.816, "step": 18471 }, { - "epoch": 0.5241770715096481, + "epoch": 0.5234492334721867, "grad_norm": 0.0, - "learning_rate": 9.703181384682368e-06, - "loss": 0.8332, + "learning_rate": 9.726532895606819e-06, + "loss": 0.9017, "step": 18472 }, { - "epoch": 0.5242054483541431, + "epoch": 0.5234775709144492, "grad_norm": 0.0, - "learning_rate": 9.702262712817857e-06, - "loss": 0.9486, + "learning_rate": 9.725615450740138e-06, + "loss": 0.797, "step": 18473 }, { - "epoch": 0.5242338251986379, + "epoch": 0.5235059083567117, "grad_norm": 0.0, - "learning_rate": 9.701344043468349e-06, - "loss": 0.916, + "learning_rate": 9.724698008184705e-06, + "loss": 0.878, "step": 18474 }, { - "epoch": 0.5242622020431328, + "epoch": 0.5235342457989742, "grad_norm": 0.0, - "learning_rate": 9.700425376641599e-06, - "loss": 0.8719, + "learning_rate": 9.723780567948236e-06, + "loss": 0.9417, "step": 18475 }, { - "epoch": 0.5242905788876276, + "epoch": 0.5235625832412366, "grad_norm": 0.0, - "learning_rate": 9.699506712345368e-06, - "loss": 0.9215, + "learning_rate": 9.722863130038472e-06, + "loss": 0.9236, "step": 18476 }, { - "epoch": 0.5243189557321226, + "epoch": 0.5235909206834991, "grad_norm": 0.0, - "learning_rate": 9.69858805058742e-06, - "loss": 0.7822, + "learning_rate": 9.721945694463129e-06, + "loss": 0.7834, "step": 18477 }, { - "epoch": 0.5243473325766175, + "epoch": 0.5236192581257616, "grad_norm": 0.0, - "learning_rate": 9.697669391375509e-06, - "loss": 0.8975, + "learning_rate": 9.721028261229944e-06, + "loss": 0.8209, "step": 18478 }, { - "epoch": 0.5243757094211123, + "epoch": 0.523647595568024, "grad_norm": 0.0, - "learning_rate": 9.696750734717397e-06, - "loss": 0.815, + "learning_rate": 9.720110830346643e-06, + "loss": 0.9326, "step": 18479 }, { - "epoch": 0.5244040862656073, + "epoch": 0.5236759330102865, "grad_norm": 0.0, - "learning_rate": 9.695832080620849e-06, - "loss": 0.9897, + "learning_rate": 9.71919340182095e-06, + "loss": 0.9695, "step": 18480 }, { - "epoch": 0.5244324631101022, + "epoch": 0.523704270452549, "grad_norm": 0.0, - "learning_rate": 9.694913429093621e-06, - "loss": 0.8671, + "learning_rate": 9.718275975660594e-06, + "loss": 0.8856, "step": 18481 }, { - "epoch": 0.524460839954597, + "epoch": 0.5237326078948115, "grad_norm": 0.0, - "learning_rate": 9.693994780143475e-06, - "loss": 0.7849, + "learning_rate": 9.71735855187331e-06, + "loss": 0.8824, "step": 18482 }, { - "epoch": 0.524489216799092, + "epoch": 0.5237609453370738, "grad_norm": 0.0, - "learning_rate": 9.693076133778164e-06, - "loss": 0.9747, + "learning_rate": 9.716441130466814e-06, + "loss": 0.8435, "step": 18483 }, { - "epoch": 0.5245175936435869, + "epoch": 0.5237892827793363, "grad_norm": 0.0, - "learning_rate": 9.692157490005456e-06, - "loss": 0.9043, + "learning_rate": 9.715523711448842e-06, + "loss": 0.9704, "step": 18484 }, { - "epoch": 0.5245459704880817, + "epoch": 0.5238176202215988, "grad_norm": 0.0, - "learning_rate": 9.691238848833106e-06, - "loss": 0.8195, + "learning_rate": 9.714606294827119e-06, + "loss": 0.8329, "step": 18485 }, { - "epoch": 0.5245743473325766, + "epoch": 0.5238459576638612, "grad_norm": 0.0, - "learning_rate": 9.690320210268876e-06, - "loss": 0.9544, + "learning_rate": 9.71368888060937e-06, + "loss": 0.8616, "step": 18486 }, { - "epoch": 0.5246027241770715, + "epoch": 0.5238742951061237, "grad_norm": 0.0, - "learning_rate": 9.689401574320526e-06, - "loss": 0.8552, + "learning_rate": 9.712771468803333e-06, + "loss": 0.8823, "step": 18487 }, { - "epoch": 0.5246311010215664, + "epoch": 0.5239026325483862, "grad_norm": 0.0, - "learning_rate": 9.688482940995814e-06, - "loss": 1.0516, + "learning_rate": 9.711854059416722e-06, + "loss": 0.9285, "step": 18488 }, { - "epoch": 0.5246594778660613, + "epoch": 0.5239309699906487, "grad_norm": 0.0, - "learning_rate": 9.687564310302499e-06, - "loss": 0.7356, + "learning_rate": 9.710936652457276e-06, + "loss": 0.9011, "step": 18489 }, { - "epoch": 0.5246878547105562, + "epoch": 0.5239593074329111, "grad_norm": 0.0, - "learning_rate": 9.686645682248345e-06, - "loss": 0.9023, + "learning_rate": 9.710019247932714e-06, + "loss": 0.8225, "step": 18490 }, { - "epoch": 0.5247162315550511, + "epoch": 0.5239876448751736, "grad_norm": 0.0, - "learning_rate": 9.685727056841108e-06, - "loss": 0.9425, + "learning_rate": 9.709101845850768e-06, + "loss": 0.9403, "step": 18491 }, { - "epoch": 0.524744608399546, + "epoch": 0.5240159823174361, "grad_norm": 0.0, - "learning_rate": 9.684808434088547e-06, - "loss": 0.92, + "learning_rate": 9.708184446219168e-06, + "loss": 0.9052, "step": 18492 }, { - "epoch": 0.5247729852440408, + "epoch": 0.5240443197596985, "grad_norm": 0.0, - "learning_rate": 9.683889813998426e-06, - "loss": 0.8678, + "learning_rate": 9.707267049045636e-06, + "loss": 0.7624, "step": 18493 }, { - "epoch": 0.5248013620885358, + "epoch": 0.524072657201961, "grad_norm": 0.0, - "learning_rate": 9.682971196578501e-06, - "loss": 0.9389, + "learning_rate": 9.706349654337903e-06, + "loss": 0.8781, "step": 18494 }, { - "epoch": 0.5248297389330306, + "epoch": 0.5241009946442234, "grad_norm": 0.0, - "learning_rate": 9.682052581836529e-06, - "loss": 0.7892, + "learning_rate": 9.7054322621037e-06, + "loss": 0.8156, "step": 18495 }, { - "epoch": 0.5248581157775255, + "epoch": 0.5241293320864858, "grad_norm": 0.0, - "learning_rate": 9.681133969780274e-06, - "loss": 0.8814, + "learning_rate": 9.704514872350745e-06, + "loss": 0.8683, "step": 18496 }, { - "epoch": 0.5248864926220205, + "epoch": 0.5241576695287483, "grad_norm": 0.0, - "learning_rate": 9.680215360417498e-06, - "loss": 0.8412, + "learning_rate": 9.703597485086775e-06, + "loss": 0.8691, "step": 18497 }, { - "epoch": 0.5249148694665153, + "epoch": 0.5241860069710108, "grad_norm": 0.0, - "learning_rate": 9.679296753755958e-06, - "loss": 0.8696, + "learning_rate": 9.702680100319509e-06, + "loss": 0.9482, "step": 18498 }, { - "epoch": 0.5249432463110102, + "epoch": 0.5242143444132733, "grad_norm": 0.0, - "learning_rate": 9.67837814980341e-06, - "loss": 0.9228, + "learning_rate": 9.70176271805668e-06, + "loss": 0.9097, "step": 18499 }, { - "epoch": 0.5249716231555052, + "epoch": 0.5242426818555357, "grad_norm": 0.0, - "learning_rate": 9.677459548567617e-06, - "loss": 0.8714, + "learning_rate": 9.700845338306018e-06, + "loss": 0.9216, "step": 18500 }, { - "epoch": 0.525, + "epoch": 0.5242710192977982, "grad_norm": 0.0, - "learning_rate": 9.676540950056339e-06, - "loss": 0.9515, + "learning_rate": 9.699927961075245e-06, + "loss": 0.9424, "step": 18501 }, { - "epoch": 0.5250283768444949, + "epoch": 0.5242993567400607, "grad_norm": 0.0, - "learning_rate": 9.67562235427733e-06, - "loss": 0.9873, + "learning_rate": 9.699010586372087e-06, + "loss": 0.7969, "step": 18502 }, { - "epoch": 0.5250567536889897, + "epoch": 0.5243276941823231, "grad_norm": 0.0, - "learning_rate": 9.674703761238358e-06, - "loss": 0.8462, + "learning_rate": 9.698093214204279e-06, + "loss": 0.8524, "step": 18503 }, { - "epoch": 0.5250851305334847, + "epoch": 0.5243560316245856, "grad_norm": 0.0, - "learning_rate": 9.673785170947177e-06, - "loss": 1.0433, + "learning_rate": 9.69717584457954e-06, + "loss": 0.81, "step": 18504 }, { - "epoch": 0.5251135073779796, + "epoch": 0.524384369066848, "grad_norm": 0.0, - "learning_rate": 9.672866583411546e-06, - "loss": 0.8385, + "learning_rate": 9.696258477505606e-06, + "loss": 0.9054, "step": 18505 }, { - "epoch": 0.5251418842224744, + "epoch": 0.5244127065091105, "grad_norm": 0.0, - "learning_rate": 9.671947998639229e-06, - "loss": 0.8948, + "learning_rate": 9.695341112990196e-06, + "loss": 0.8307, "step": 18506 }, { - "epoch": 0.5251702610669694, + "epoch": 0.5244410439513729, "grad_norm": 0.0, - "learning_rate": 9.671029416637979e-06, - "loss": 0.9905, + "learning_rate": 9.69442375104104e-06, + "loss": 0.9188, "step": 18507 }, { - "epoch": 0.5251986379114643, + "epoch": 0.5244693813936354, "grad_norm": 0.0, - "learning_rate": 9.670110837415559e-06, - "loss": 0.9026, + "learning_rate": 9.693506391665873e-06, + "loss": 0.8836, "step": 18508 }, { - "epoch": 0.5252270147559591, + "epoch": 0.5244977188358979, "grad_norm": 0.0, - "learning_rate": 9.669192260979727e-06, - "loss": 0.7955, + "learning_rate": 9.692589034872408e-06, + "loss": 0.907, "step": 18509 }, { - "epoch": 0.525255391600454, + "epoch": 0.5245260562781603, "grad_norm": 0.0, - "learning_rate": 9.668273687338239e-06, - "loss": 0.8012, + "learning_rate": 9.691671680668381e-06, + "loss": 0.8822, "step": 18510 }, { - "epoch": 0.525283768444949, + "epoch": 0.5245543937204228, "grad_norm": 0.0, - "learning_rate": 9.667355116498864e-06, - "loss": 0.9235, + "learning_rate": 9.69075432906152e-06, + "loss": 0.8203, "step": 18511 }, { - "epoch": 0.5253121452894438, + "epoch": 0.5245827311626853, "grad_norm": 0.0, - "learning_rate": 9.666436548469354e-06, - "loss": 0.7898, + "learning_rate": 9.68983698005955e-06, + "loss": 0.9308, "step": 18512 }, { - "epoch": 0.5253405221339387, + "epoch": 0.5246110686049478, "grad_norm": 0.0, - "learning_rate": 9.665517983257469e-06, - "loss": 0.88, + "learning_rate": 9.6889196336702e-06, + "loss": 0.9039, "step": 18513 }, { - "epoch": 0.5253688989784336, + "epoch": 0.5246394060472102, "grad_norm": 0.0, - "learning_rate": 9.664599420870967e-06, - "loss": 0.8767, + "learning_rate": 9.688002289901192e-06, + "loss": 0.892, "step": 18514 }, { - "epoch": 0.5253972758229285, + "epoch": 0.5246677434894726, "grad_norm": 0.0, - "learning_rate": 9.663680861317613e-06, - "loss": 0.8875, + "learning_rate": 9.687084948760256e-06, + "loss": 0.9963, "step": 18515 }, { - "epoch": 0.5254256526674234, + "epoch": 0.5246960809317351, "grad_norm": 0.0, - "learning_rate": 9.662762304605158e-06, - "loss": 0.9292, + "learning_rate": 9.686167610255123e-06, + "loss": 0.8452, "step": 18516 }, { - "epoch": 0.5254540295119183, + "epoch": 0.5247244183739975, "grad_norm": 0.0, - "learning_rate": 9.661843750741368e-06, - "loss": 0.8819, + "learning_rate": 9.685250274393515e-06, + "loss": 0.9101, "step": 18517 }, { - "epoch": 0.5254824063564132, + "epoch": 0.52475275581626, "grad_norm": 0.0, - "learning_rate": 9.660925199733997e-06, - "loss": 0.9803, + "learning_rate": 9.684332941183164e-06, + "loss": 0.8788, "step": 18518 }, { - "epoch": 0.525510783200908, + "epoch": 0.5247810932585225, "grad_norm": 0.0, - "learning_rate": 9.660006651590805e-06, - "loss": 0.9017, + "learning_rate": 9.683415610631788e-06, + "loss": 0.8767, "step": 18519 }, { - "epoch": 0.5255391600454029, + "epoch": 0.5248094307007849, "grad_norm": 0.0, - "learning_rate": 9.659088106319554e-06, - "loss": 0.8832, + "learning_rate": 9.682498282747122e-06, + "loss": 0.8575, "step": 18520 }, { - "epoch": 0.5255675368898979, + "epoch": 0.5248377681430474, "grad_norm": 0.0, - "learning_rate": 9.658169563928002e-06, - "loss": 0.8427, + "learning_rate": 9.681580957536895e-06, + "loss": 0.865, "step": 18521 }, { - "epoch": 0.5255959137343927, + "epoch": 0.5248661055853099, "grad_norm": 0.0, - "learning_rate": 9.657251024423904e-06, - "loss": 0.7617, + "learning_rate": 9.680663635008824e-06, + "loss": 0.9682, "step": 18522 }, { - "epoch": 0.5256242905788876, + "epoch": 0.5248944430275724, "grad_norm": 0.0, - "learning_rate": 9.656332487815026e-06, - "loss": 0.9081, + "learning_rate": 9.679746315170643e-06, + "loss": 0.8391, "step": 18523 }, { - "epoch": 0.5256526674233826, + "epoch": 0.5249227804698348, "grad_norm": 0.0, - "learning_rate": 9.655413954109121e-06, - "loss": 0.9166, + "learning_rate": 9.678828998030081e-06, + "loss": 0.8602, "step": 18524 }, { - "epoch": 0.5256810442678774, + "epoch": 0.5249511179120973, "grad_norm": 0.0, - "learning_rate": 9.654495423313945e-06, - "loss": 0.905, + "learning_rate": 9.677911683594858e-06, + "loss": 0.8969, "step": 18525 }, { - "epoch": 0.5257094211123723, + "epoch": 0.5249794553543597, "grad_norm": 0.0, - "learning_rate": 9.653576895437268e-06, - "loss": 0.8318, + "learning_rate": 9.676994371872708e-06, + "loss": 0.8588, "step": 18526 }, { - "epoch": 0.5257377979568671, + "epoch": 0.5250077927966221, "grad_norm": 0.0, - "learning_rate": 9.65265837048684e-06, - "loss": 0.8657, + "learning_rate": 9.676077062871352e-06, + "loss": 0.8522, "step": 18527 }, { - "epoch": 0.5257661748013621, + "epoch": 0.5250361302388846, "grad_norm": 0.0, - "learning_rate": 9.651739848470423e-06, - "loss": 0.8503, + "learning_rate": 9.675159756598519e-06, + "loss": 0.7846, "step": 18528 }, { - "epoch": 0.525794551645857, + "epoch": 0.5250644676811471, "grad_norm": 0.0, - "learning_rate": 9.650821329395777e-06, - "loss": 0.9355, + "learning_rate": 9.674242453061938e-06, + "loss": 0.8871, "step": 18529 }, { - "epoch": 0.5258229284903518, + "epoch": 0.5250928051234096, "grad_norm": 0.0, - "learning_rate": 9.649902813270655e-06, - "loss": 0.8817, + "learning_rate": 9.67332515226933e-06, + "loss": 0.8539, "step": 18530 }, { - "epoch": 0.5258513053348468, + "epoch": 0.525121142565672, "grad_norm": 0.0, - "learning_rate": 9.648984300102823e-06, - "loss": 0.9171, + "learning_rate": 9.672407854228428e-06, + "loss": 0.9196, "step": 18531 }, { - "epoch": 0.5258796821793417, + "epoch": 0.5251494800079345, "grad_norm": 0.0, - "learning_rate": 9.648065789900035e-06, - "loss": 0.8213, + "learning_rate": 9.671490558946957e-06, + "loss": 0.9252, "step": 18532 }, { - "epoch": 0.5259080590238365, + "epoch": 0.525177817450197, "grad_norm": 0.0, - "learning_rate": 9.647147282670051e-06, - "loss": 0.822, + "learning_rate": 9.670573266432643e-06, + "loss": 0.9104, "step": 18533 }, { - "epoch": 0.5259364358683314, + "epoch": 0.5252061548924594, "grad_norm": 0.0, - "learning_rate": 9.646228778420632e-06, - "loss": 0.8966, + "learning_rate": 9.669655976693214e-06, + "loss": 0.8929, "step": 18534 }, { - "epoch": 0.5259648127128264, + "epoch": 0.5252344923347219, "grad_norm": 0.0, - "learning_rate": 9.645310277159532e-06, - "loss": 0.8985, + "learning_rate": 9.668738689736393e-06, + "loss": 0.9232, "step": 18535 }, { - "epoch": 0.5259931895573212, + "epoch": 0.5252628297769844, "grad_norm": 0.0, - "learning_rate": 9.64439177889451e-06, - "loss": 0.7957, + "learning_rate": 9.667821405569907e-06, + "loss": 0.8862, "step": 18536 }, { - "epoch": 0.5260215664018161, + "epoch": 0.5252911672192468, "grad_norm": 0.0, - "learning_rate": 9.64347328363333e-06, - "loss": 0.9116, + "learning_rate": 9.666904124201488e-06, + "loss": 0.9656, "step": 18537 }, { - "epoch": 0.526049943246311, + "epoch": 0.5253195046615092, "grad_norm": 0.0, - "learning_rate": 9.642554791383747e-06, - "loss": 0.8367, + "learning_rate": 9.665986845638857e-06, + "loss": 0.9294, "step": 18538 }, { - "epoch": 0.5260783200908059, + "epoch": 0.5253478421037717, "grad_norm": 0.0, - "learning_rate": 9.641636302153518e-06, - "loss": 0.9667, + "learning_rate": 9.665069569889742e-06, + "loss": 0.8985, "step": 18539 }, { - "epoch": 0.5261066969353008, + "epoch": 0.5253761795460342, "grad_norm": 0.0, - "learning_rate": 9.640717815950406e-06, - "loss": 0.9252, + "learning_rate": 9.664152296961876e-06, + "loss": 0.8814, "step": 18540 }, { - "epoch": 0.5261350737797957, + "epoch": 0.5254045169882966, "grad_norm": 0.0, - "learning_rate": 9.639799332782161e-06, - "loss": 0.8495, + "learning_rate": 9.663235026862976e-06, + "loss": 0.7881, "step": 18541 }, { - "epoch": 0.5261634506242906, + "epoch": 0.5254328544305591, "grad_norm": 0.0, - "learning_rate": 9.638880852656552e-06, - "loss": 0.9232, + "learning_rate": 9.662317759600774e-06, + "loss": 0.8811, "step": 18542 }, { - "epoch": 0.5261918274687855, + "epoch": 0.5254611918728216, "grad_norm": 0.0, - "learning_rate": 9.637962375581332e-06, - "loss": 0.827, + "learning_rate": 9.661400495182991e-06, + "loss": 0.894, "step": 18543 }, { - "epoch": 0.5262202043132803, + "epoch": 0.525489529315084, "grad_norm": 0.0, - "learning_rate": 9.63704390156426e-06, - "loss": 0.8559, + "learning_rate": 9.660483233617359e-06, + "loss": 0.8412, "step": 18544 }, { - "epoch": 0.5262485811577753, + "epoch": 0.5255178667573465, "grad_norm": 0.0, - "learning_rate": 9.636125430613096e-06, - "loss": 0.8364, + "learning_rate": 9.659565974911608e-06, + "loss": 0.9306, "step": 18545 }, { - "epoch": 0.5262769580022701, + "epoch": 0.525546204199609, "grad_norm": 0.0, - "learning_rate": 9.635206962735593e-06, - "loss": 0.8773, + "learning_rate": 9.658648719073454e-06, + "loss": 0.8509, "step": 18546 }, { - "epoch": 0.526305334846765, + "epoch": 0.5255745416418715, "grad_norm": 0.0, - "learning_rate": 9.634288497939518e-06, - "loss": 0.873, + "learning_rate": 9.657731466110632e-06, + "loss": 0.9077, "step": 18547 }, { - "epoch": 0.52633371169126, + "epoch": 0.5256028790841338, "grad_norm": 0.0, - "learning_rate": 9.633370036232622e-06, - "loss": 0.8372, + "learning_rate": 9.656814216030861e-06, + "loss": 0.7979, "step": 18548 }, { - "epoch": 0.5263620885357548, + "epoch": 0.5256312165263963, "grad_norm": 0.0, - "learning_rate": 9.632451577622667e-06, - "loss": 0.7963, + "learning_rate": 9.655896968841873e-06, + "loss": 0.8521, "step": 18549 }, { - "epoch": 0.5263904653802497, + "epoch": 0.5256595539686588, "grad_norm": 0.0, - "learning_rate": 9.631533122117409e-06, - "loss": 0.8279, + "learning_rate": 9.654979724551393e-06, + "loss": 0.9602, "step": 18550 }, { - "epoch": 0.5264188422247446, + "epoch": 0.5256878914109212, "grad_norm": 0.0, - "learning_rate": 9.630614669724607e-06, - "loss": 0.8531, + "learning_rate": 9.654062483167145e-06, + "loss": 0.9834, "step": 18551 }, { - "epoch": 0.5264472190692395, + "epoch": 0.5257162288531837, "grad_norm": 0.0, - "learning_rate": 9.629696220452021e-06, - "loss": 1.0054, + "learning_rate": 9.653145244696857e-06, + "loss": 0.8793, "step": 18552 }, { - "epoch": 0.5264755959137344, + "epoch": 0.5257445662954462, "grad_norm": 0.0, - "learning_rate": 9.628777774307404e-06, - "loss": 0.9925, + "learning_rate": 9.65222800914826e-06, + "loss": 0.8373, "step": 18553 }, { - "epoch": 0.5265039727582292, + "epoch": 0.5257729037377087, "grad_norm": 0.0, - "learning_rate": 9.627859331298522e-06, - "loss": 0.921, + "learning_rate": 9.651310776529072e-06, + "loss": 0.7558, "step": 18554 }, { - "epoch": 0.5265323496027242, + "epoch": 0.5258012411799711, "grad_norm": 0.0, - "learning_rate": 9.626940891433127e-06, - "loss": 0.8098, + "learning_rate": 9.650393546847023e-06, + "loss": 0.9371, "step": 18555 }, { - "epoch": 0.5265607264472191, + "epoch": 0.5258295786222336, "grad_norm": 0.0, - "learning_rate": 9.626022454718976e-06, - "loss": 0.8781, + "learning_rate": 9.649476320109838e-06, + "loss": 0.8538, "step": 18556 }, { - "epoch": 0.5265891032917139, + "epoch": 0.5258579160644961, "grad_norm": 0.0, - "learning_rate": 9.625104021163833e-06, - "loss": 0.8079, + "learning_rate": 9.648559096325245e-06, + "loss": 0.8924, "step": 18557 }, { - "epoch": 0.5266174801362089, + "epoch": 0.5258862535067584, "grad_norm": 0.0, - "learning_rate": 9.624185590775454e-06, - "loss": 0.9802, + "learning_rate": 9.647641875500972e-06, + "loss": 0.9312, "step": 18558 }, { - "epoch": 0.5266458569807038, + "epoch": 0.5259145909490209, "grad_norm": 0.0, - "learning_rate": 9.623267163561596e-06, - "loss": 0.8821, + "learning_rate": 9.64672465764474e-06, + "loss": 0.924, "step": 18559 }, { - "epoch": 0.5266742338251986, + "epoch": 0.5259429283912834, "grad_norm": 0.0, - "learning_rate": 9.622348739530017e-06, - "loss": 0.783, + "learning_rate": 9.645807442764277e-06, + "loss": 0.8282, "step": 18560 }, { - "epoch": 0.5267026106696935, + "epoch": 0.5259712658335458, "grad_norm": 0.0, - "learning_rate": 9.621430318688475e-06, - "loss": 0.8461, + "learning_rate": 9.644890230867313e-06, + "loss": 0.8418, "step": 18561 }, { - "epoch": 0.5267309875141885, + "epoch": 0.5259996032758083, "grad_norm": 0.0, - "learning_rate": 9.620511901044729e-06, - "loss": 0.9664, + "learning_rate": 9.643973021961566e-06, + "loss": 0.8681, "step": 18562 }, { - "epoch": 0.5267593643586833, + "epoch": 0.5260279407180708, "grad_norm": 0.0, - "learning_rate": 9.619593486606533e-06, - "loss": 0.9869, + "learning_rate": 9.643055816054768e-06, + "loss": 0.8835, "step": 18563 }, { - "epoch": 0.5267877412031782, + "epoch": 0.5260562781603333, "grad_norm": 0.0, - "learning_rate": 9.618675075381651e-06, - "loss": 0.9446, + "learning_rate": 9.642138613154643e-06, + "loss": 0.9112, "step": 18564 }, { - "epoch": 0.5268161180476731, + "epoch": 0.5260846156025957, "grad_norm": 0.0, - "learning_rate": 9.61775666737784e-06, - "loss": 0.8359, + "learning_rate": 9.641221413268916e-06, + "loss": 0.8484, "step": 18565 }, { - "epoch": 0.526844494892168, + "epoch": 0.5261129530448582, "grad_norm": 0.0, - "learning_rate": 9.61683826260285e-06, - "loss": 0.9503, + "learning_rate": 9.640304216405318e-06, + "loss": 0.8394, "step": 18566 }, { - "epoch": 0.5268728717366629, + "epoch": 0.5261412904871207, "grad_norm": 0.0, - "learning_rate": 9.615919861064447e-06, - "loss": 0.9066, + "learning_rate": 9.639387022571569e-06, + "loss": 0.9259, "step": 18567 }, { - "epoch": 0.5269012485811577, + "epoch": 0.526169627929383, "grad_norm": 0.0, - "learning_rate": 9.615001462770387e-06, - "loss": 0.8576, + "learning_rate": 9.638469831775395e-06, + "loss": 0.8472, "step": 18568 }, { - "epoch": 0.5269296254256527, + "epoch": 0.5261979653716455, "grad_norm": 0.0, - "learning_rate": 9.614083067728426e-06, - "loss": 0.8341, + "learning_rate": 9.637552644024526e-06, + "loss": 0.8904, "step": 18569 }, { - "epoch": 0.5269580022701476, + "epoch": 0.526226302813908, "grad_norm": 0.0, - "learning_rate": 9.613164675946323e-06, - "loss": 0.9213, + "learning_rate": 9.636635459326685e-06, + "loss": 0.7771, "step": 18570 }, { - "epoch": 0.5269863791146424, + "epoch": 0.5262546402561705, "grad_norm": 0.0, - "learning_rate": 9.612246287431832e-06, - "loss": 0.8843, + "learning_rate": 9.635718277689602e-06, + "loss": 0.8835, "step": 18571 }, { - "epoch": 0.5270147559591374, + "epoch": 0.5262829776984329, "grad_norm": 0.0, - "learning_rate": 9.611327902192718e-06, - "loss": 0.8939, + "learning_rate": 9.634801099120996e-06, + "loss": 0.9076, "step": 18572 }, { - "epoch": 0.5270431328036322, + "epoch": 0.5263113151406954, "grad_norm": 0.0, - "learning_rate": 9.610409520236735e-06, - "loss": 0.8524, + "learning_rate": 9.633883923628595e-06, + "loss": 0.9077, "step": 18573 }, { - "epoch": 0.5270715096481271, + "epoch": 0.5263396525829579, "grad_norm": 0.0, - "learning_rate": 9.609491141571637e-06, - "loss": 0.8886, + "learning_rate": 9.632966751220127e-06, + "loss": 0.9386, "step": 18574 }, { - "epoch": 0.5270998864926221, + "epoch": 0.5263679900252203, "grad_norm": 0.0, - "learning_rate": 9.608572766205189e-06, - "loss": 0.9706, + "learning_rate": 9.632049581903315e-06, + "loss": 0.8947, "step": 18575 }, { - "epoch": 0.5271282633371169, + "epoch": 0.5263963274674828, "grad_norm": 0.0, - "learning_rate": 9.607654394145142e-06, - "loss": 0.9975, + "learning_rate": 9.63113241568589e-06, + "loss": 0.8261, "step": 18576 }, { - "epoch": 0.5271566401816118, + "epoch": 0.5264246649097453, "grad_norm": 0.0, - "learning_rate": 9.606736025399256e-06, - "loss": 0.8194, + "learning_rate": 9.63021525257557e-06, + "loss": 0.8234, "step": 18577 }, { - "epoch": 0.5271850170261067, + "epoch": 0.5264530023520078, "grad_norm": 0.0, - "learning_rate": 9.605817659975291e-06, - "loss": 0.697, + "learning_rate": 9.629298092580083e-06, + "loss": 0.8191, "step": 18578 }, { - "epoch": 0.5272133938706016, + "epoch": 0.5264813397942701, "grad_norm": 0.0, - "learning_rate": 9.604899297881e-06, - "loss": 0.9135, + "learning_rate": 9.62838093570716e-06, + "loss": 0.8199, "step": 18579 }, { - "epoch": 0.5272417707150965, + "epoch": 0.5265096772365326, "grad_norm": 0.0, - "learning_rate": 9.603980939124141e-06, - "loss": 0.8633, + "learning_rate": 9.627463781964521e-06, + "loss": 0.9205, "step": 18580 }, { - "epoch": 0.5272701475595913, + "epoch": 0.5265380146787951, "grad_norm": 0.0, - "learning_rate": 9.603062583712475e-06, - "loss": 0.9123, + "learning_rate": 9.62654663135989e-06, + "loss": 0.8981, "step": 18581 }, { - "epoch": 0.5272985244040863, + "epoch": 0.5265663521210575, "grad_norm": 0.0, - "learning_rate": 9.602144231653758e-06, - "loss": 0.9114, + "learning_rate": 9.625629483901e-06, + "loss": 0.8886, "step": 18582 }, { - "epoch": 0.5273269012485812, + "epoch": 0.52659468956332, "grad_norm": 0.0, - "learning_rate": 9.601225882955742e-06, - "loss": 0.861, + "learning_rate": 9.624712339595567e-06, + "loss": 0.8848, "step": 18583 }, { - "epoch": 0.527355278093076, + "epoch": 0.5266230270055825, "grad_norm": 0.0, - "learning_rate": 9.600307537626194e-06, - "loss": 0.8607, + "learning_rate": 9.623795198451326e-06, + "loss": 1.0312, "step": 18584 }, { - "epoch": 0.5273836549375709, + "epoch": 0.5266513644478449, "grad_norm": 0.0, - "learning_rate": 9.599389195672864e-06, - "loss": 0.8054, + "learning_rate": 9.622878060475995e-06, + "loss": 0.9328, "step": 18585 }, { - "epoch": 0.5274120317820659, + "epoch": 0.5266797018901074, "grad_norm": 0.0, - "learning_rate": 9.598470857103511e-06, - "loss": 0.9432, + "learning_rate": 9.621960925677301e-06, + "loss": 0.8988, "step": 18586 }, { - "epoch": 0.5274404086265607, + "epoch": 0.5267080393323699, "grad_norm": 0.0, - "learning_rate": 9.59755252192589e-06, - "loss": 0.8153, + "learning_rate": 9.621043794062974e-06, + "loss": 0.9519, "step": 18587 }, { - "epoch": 0.5274687854710556, + "epoch": 0.5267363767746324, "grad_norm": 0.0, - "learning_rate": 9.596634190147765e-06, - "loss": 0.8846, + "learning_rate": 9.620126665640732e-06, + "loss": 0.7772, "step": 18588 }, { - "epoch": 0.5274971623155505, + "epoch": 0.5267647142168947, "grad_norm": 0.0, - "learning_rate": 9.595715861776887e-06, - "loss": 0.7458, + "learning_rate": 9.619209540418307e-06, + "loss": 0.9358, "step": 18589 }, { - "epoch": 0.5275255391600454, + "epoch": 0.5267930516591572, "grad_norm": 0.0, - "learning_rate": 9.594797536821018e-06, - "loss": 0.8471, + "learning_rate": 9.618292418403422e-06, + "loss": 0.9441, "step": 18590 }, { - "epoch": 0.5275539160045403, + "epoch": 0.5268213891014197, "grad_norm": 0.0, - "learning_rate": 9.59387921528791e-06, - "loss": 0.8804, + "learning_rate": 9.617375299603799e-06, + "loss": 0.9311, "step": 18591 }, { - "epoch": 0.5275822928490352, + "epoch": 0.5268497265436821, "grad_norm": 0.0, - "learning_rate": 9.592960897185324e-06, - "loss": 0.9037, + "learning_rate": 9.616458184027171e-06, + "loss": 0.8064, "step": 18592 }, { - "epoch": 0.5276106696935301, + "epoch": 0.5268780639859446, "grad_norm": 0.0, - "learning_rate": 9.592042582521016e-06, - "loss": 0.886, + "learning_rate": 9.615541071681253e-06, + "loss": 1.013, "step": 18593 }, { - "epoch": 0.527639046538025, + "epoch": 0.5269064014282071, "grad_norm": 0.0, - "learning_rate": 9.591124271302741e-06, - "loss": 0.8953, + "learning_rate": 9.614623962573776e-06, + "loss": 0.9071, "step": 18594 }, { - "epoch": 0.5276674233825198, + "epoch": 0.5269347388704696, "grad_norm": 0.0, - "learning_rate": 9.590205963538259e-06, - "loss": 0.8629, + "learning_rate": 9.613706856712466e-06, + "loss": 1.0063, "step": 18595 }, { - "epoch": 0.5276958002270148, + "epoch": 0.526963076312732, "grad_norm": 0.0, - "learning_rate": 9.589287659235325e-06, - "loss": 0.89, + "learning_rate": 9.612789754105044e-06, + "loss": 0.8814, "step": 18596 }, { - "epoch": 0.5277241770715096, + "epoch": 0.5269914137549945, "grad_norm": 0.0, - "learning_rate": 9.588369358401696e-06, - "loss": 0.8188, + "learning_rate": 9.611872654759242e-06, + "loss": 0.8171, "step": 18597 }, { - "epoch": 0.5277525539160045, + "epoch": 0.527019751197257, "grad_norm": 0.0, - "learning_rate": 9.587451061045131e-06, - "loss": 0.9151, + "learning_rate": 9.610955558682778e-06, + "loss": 0.8444, "step": 18598 }, { - "epoch": 0.5277809307604995, + "epoch": 0.5270480886395194, "grad_norm": 0.0, - "learning_rate": 9.586532767173385e-06, - "loss": 0.8776, + "learning_rate": 9.610038465883378e-06, + "loss": 0.8535, "step": 18599 }, { - "epoch": 0.5278093076049943, + "epoch": 0.5270764260817818, "grad_norm": 0.0, - "learning_rate": 9.585614476794214e-06, - "loss": 0.8868, + "learning_rate": 9.609121376368772e-06, + "loss": 0.9162, "step": 18600 }, { - "epoch": 0.5278376844494892, + "epoch": 0.5271047635240443, "grad_norm": 0.0, - "learning_rate": 9.584696189915379e-06, - "loss": 0.951, + "learning_rate": 9.60820429014668e-06, + "loss": 0.8909, "step": 18601 }, { - "epoch": 0.5278660612939841, + "epoch": 0.5271331009663068, "grad_norm": 0.0, - "learning_rate": 9.583777906544628e-06, - "loss": 0.9617, + "learning_rate": 9.607287207224828e-06, + "loss": 0.9493, "step": 18602 }, { - "epoch": 0.527894438138479, + "epoch": 0.5271614384085692, "grad_norm": 0.0, - "learning_rate": 9.582859626689727e-06, - "loss": 0.8196, + "learning_rate": 9.606370127610946e-06, + "loss": 0.9156, "step": 18603 }, { - "epoch": 0.5279228149829739, + "epoch": 0.5271897758508317, "grad_norm": 0.0, - "learning_rate": 9.58194135035843e-06, - "loss": 0.9825, + "learning_rate": 9.605453051312749e-06, + "loss": 0.8934, "step": 18604 }, { - "epoch": 0.5279511918274687, + "epoch": 0.5272181132930942, "grad_norm": 0.0, - "learning_rate": 9.581023077558494e-06, - "loss": 0.7813, + "learning_rate": 9.604535978337974e-06, + "loss": 0.9213, "step": 18605 }, { - "epoch": 0.5279795686719637, + "epoch": 0.5272464507353566, "grad_norm": 0.0, - "learning_rate": 9.580104808297676e-06, - "loss": 0.9461, + "learning_rate": 9.603618908694335e-06, + "loss": 0.8987, "step": 18606 }, { - "epoch": 0.5280079455164586, + "epoch": 0.5272747881776191, "grad_norm": 0.0, - "learning_rate": 9.579186542583729e-06, - "loss": 0.8763, + "learning_rate": 9.60270184238956e-06, + "loss": 0.8683, "step": 18607 }, { - "epoch": 0.5280363223609534, + "epoch": 0.5273031256198816, "grad_norm": 0.0, - "learning_rate": 9.578268280424413e-06, - "loss": 0.9924, + "learning_rate": 9.601784779431376e-06, + "loss": 0.8224, "step": 18608 }, { - "epoch": 0.5280646992054484, + "epoch": 0.527331463062144, "grad_norm": 0.0, - "learning_rate": 9.577350021827486e-06, - "loss": 0.8908, + "learning_rate": 9.600867719827507e-06, + "loss": 0.8638, "step": 18609 }, { - "epoch": 0.5280930760499433, + "epoch": 0.5273598005044065, "grad_norm": 0.0, - "learning_rate": 9.576431766800697e-06, - "loss": 0.9817, + "learning_rate": 9.599950663585677e-06, + "loss": 0.8582, "step": 18610 }, { - "epoch": 0.5281214528944381, + "epoch": 0.527388137946669, "grad_norm": 0.0, - "learning_rate": 9.575513515351813e-06, - "loss": 0.8917, + "learning_rate": 9.599033610713614e-06, + "loss": 0.9348, "step": 18611 }, { - "epoch": 0.528149829738933, + "epoch": 0.5274164753889314, "grad_norm": 0.0, - "learning_rate": 9.574595267488584e-06, - "loss": 0.8089, + "learning_rate": 9.598116561219036e-06, + "loss": 0.9323, "step": 18612 }, { - "epoch": 0.528178206583428, + "epoch": 0.5274448128311938, "grad_norm": 0.0, - "learning_rate": 9.573677023218765e-06, - "loss": 0.8338, + "learning_rate": 9.597199515109674e-06, + "loss": 0.8093, "step": 18613 }, { - "epoch": 0.5282065834279228, + "epoch": 0.5274731502734563, "grad_norm": 0.0, - "learning_rate": 9.57275878255012e-06, - "loss": 0.915, + "learning_rate": 9.596282472393248e-06, + "loss": 0.8371, "step": 18614 }, { - "epoch": 0.5282349602724177, + "epoch": 0.5275014877157188, "grad_norm": 0.0, - "learning_rate": 9.5718405454904e-06, - "loss": 0.9749, + "learning_rate": 9.595365433077484e-06, + "loss": 0.8332, "step": 18615 }, { - "epoch": 0.5282633371169126, + "epoch": 0.5275298251579812, "grad_norm": 0.0, - "learning_rate": 9.57092231204736e-06, - "loss": 0.8868, + "learning_rate": 9.594448397170112e-06, + "loss": 0.9137, "step": 18616 }, { - "epoch": 0.5282917139614075, + "epoch": 0.5275581626002437, "grad_norm": 0.0, - "learning_rate": 9.570004082228757e-06, - "loss": 0.9218, + "learning_rate": 9.593531364678848e-06, + "loss": 0.8383, "step": 18617 }, { - "epoch": 0.5283200908059024, + "epoch": 0.5275865000425062, "grad_norm": 0.0, - "learning_rate": 9.569085856042349e-06, - "loss": 0.9377, + "learning_rate": 9.592614335611418e-06, + "loss": 0.8146, "step": 18618 }, { - "epoch": 0.5283484676503972, + "epoch": 0.5276148374847687, "grad_norm": 0.0, - "learning_rate": 9.568167633495895e-06, - "loss": 0.897, + "learning_rate": 9.591697309975556e-06, + "loss": 0.7436, "step": 18619 }, { - "epoch": 0.5283768444948922, + "epoch": 0.5276431749270311, "grad_norm": 0.0, - "learning_rate": 9.567249414597148e-06, - "loss": 0.9045, + "learning_rate": 9.590780287778973e-06, + "loss": 0.9753, "step": 18620 }, { - "epoch": 0.528405221339387, + "epoch": 0.5276715123692935, "grad_norm": 0.0, - "learning_rate": 9.566331199353865e-06, - "loss": 0.9788, + "learning_rate": 9.589863269029402e-06, + "loss": 0.818, "step": 18621 }, { - "epoch": 0.5284335981838819, + "epoch": 0.527699849811556, "grad_norm": 0.0, - "learning_rate": 9.565412987773802e-06, - "loss": 0.9243, + "learning_rate": 9.588946253734564e-06, + "loss": 0.9741, "step": 18622 }, { - "epoch": 0.5284619750283769, + "epoch": 0.5277281872538184, "grad_norm": 0.0, - "learning_rate": 9.564494779864713e-06, - "loss": 0.8755, + "learning_rate": 9.588029241902182e-06, + "loss": 0.8474, "step": 18623 }, { - "epoch": 0.5284903518728717, + "epoch": 0.5277565246960809, "grad_norm": 0.0, - "learning_rate": 9.563576575634357e-06, - "loss": 0.8622, + "learning_rate": 9.587112233539988e-06, + "loss": 0.9324, "step": 18624 }, { - "epoch": 0.5285187287173666, + "epoch": 0.5277848621383434, "grad_norm": 0.0, - "learning_rate": 9.56265837509049e-06, - "loss": 0.9003, + "learning_rate": 9.586195228655698e-06, + "loss": 0.9247, "step": 18625 }, { - "epoch": 0.5285471055618616, + "epoch": 0.5278131995806059, "grad_norm": 0.0, - "learning_rate": 9.561740178240868e-06, - "loss": 0.7183, + "learning_rate": 9.585278227257042e-06, + "loss": 0.9302, "step": 18626 }, { - "epoch": 0.5285754824063564, + "epoch": 0.5278415370228683, "grad_norm": 0.0, - "learning_rate": 9.560821985093245e-06, - "loss": 0.8712, + "learning_rate": 9.584361229351735e-06, + "loss": 0.7748, "step": 18627 }, { - "epoch": 0.5286038592508513, + "epoch": 0.5278698744651308, "grad_norm": 0.0, - "learning_rate": 9.559903795655379e-06, - "loss": 0.838, + "learning_rate": 9.583444234947513e-06, + "loss": 0.8642, "step": 18628 }, { - "epoch": 0.5286322360953462, + "epoch": 0.5278982119073933, "grad_norm": 0.0, - "learning_rate": 9.558985609935027e-06, - "loss": 0.8824, + "learning_rate": 9.582527244052095e-06, + "loss": 0.9861, "step": 18629 }, { - "epoch": 0.5286606129398411, + "epoch": 0.5279265493496557, "grad_norm": 0.0, - "learning_rate": 9.55806742793994e-06, - "loss": 0.9171, + "learning_rate": 9.581610256673205e-06, + "loss": 0.858, "step": 18630 }, { - "epoch": 0.528688989784336, + "epoch": 0.5279548867919182, "grad_norm": 0.0, - "learning_rate": 9.55714924967788e-06, - "loss": 0.922, + "learning_rate": 9.580693272818564e-06, + "loss": 0.8362, "step": 18631 }, { - "epoch": 0.5287173666288308, + "epoch": 0.5279832242341806, "grad_norm": 0.0, - "learning_rate": 9.5562310751566e-06, - "loss": 0.8366, + "learning_rate": 9.579776292495903e-06, + "loss": 0.9164, "step": 18632 }, { - "epoch": 0.5287457434733258, + "epoch": 0.528011561676443, "grad_norm": 0.0, - "learning_rate": 9.555312904383852e-06, - "loss": 0.9241, + "learning_rate": 9.578859315712939e-06, + "loss": 0.8608, "step": 18633 }, { - "epoch": 0.5287741203178207, + "epoch": 0.5280398991187055, "grad_norm": 0.0, - "learning_rate": 9.554394737367398e-06, - "loss": 0.97, + "learning_rate": 9.577942342477403e-06, + "loss": 0.9725, "step": 18634 }, { - "epoch": 0.5288024971623155, + "epoch": 0.528068236560968, "grad_norm": 0.0, - "learning_rate": 9.553476574114993e-06, - "loss": 0.9435, + "learning_rate": 9.577025372797012e-06, + "loss": 0.7521, "step": 18635 }, { - "epoch": 0.5288308740068104, + "epoch": 0.5280965740032305, "grad_norm": 0.0, - "learning_rate": 9.552558414634392e-06, - "loss": 0.906, + "learning_rate": 9.576108406679493e-06, + "loss": 0.9554, "step": 18636 }, { - "epoch": 0.5288592508513054, + "epoch": 0.5281249114454929, "grad_norm": 0.0, - "learning_rate": 9.55164025893335e-06, - "loss": 0.9086, + "learning_rate": 9.575191444132574e-06, + "loss": 0.8547, "step": 18637 }, { - "epoch": 0.5288876276958002, + "epoch": 0.5281532488877554, "grad_norm": 0.0, - "learning_rate": 9.550722107019621e-06, - "loss": 0.924, + "learning_rate": 9.574274485163972e-06, + "loss": 0.6726, "step": 18638 }, { - "epoch": 0.5289160045402951, + "epoch": 0.5281815863300179, "grad_norm": 0.0, - "learning_rate": 9.549803958900966e-06, - "loss": 0.8533, + "learning_rate": 9.573357529781413e-06, + "loss": 0.9159, "step": 18639 }, { - "epoch": 0.52894438138479, + "epoch": 0.5282099237722803, "grad_norm": 0.0, - "learning_rate": 9.548885814585134e-06, - "loss": 0.9297, + "learning_rate": 9.572440577992626e-06, + "loss": 0.8517, "step": 18640 }, { - "epoch": 0.5289727582292849, + "epoch": 0.5282382612145428, "grad_norm": 0.0, - "learning_rate": 9.547967674079884e-06, - "loss": 0.8862, + "learning_rate": 9.571523629805328e-06, + "loss": 0.9135, "step": 18641 }, { - "epoch": 0.5290011350737798, + "epoch": 0.5282665986568053, "grad_norm": 0.0, - "learning_rate": 9.547049537392973e-06, - "loss": 0.8773, + "learning_rate": 9.57060668522725e-06, + "loss": 0.9599, "step": 18642 }, { - "epoch": 0.5290295119182747, + "epoch": 0.5282949360990677, "grad_norm": 0.0, - "learning_rate": 9.546131404532154e-06, - "loss": 0.9032, + "learning_rate": 9.569689744266107e-06, + "loss": 0.8801, "step": 18643 }, { - "epoch": 0.5290578887627696, + "epoch": 0.5283232735413301, "grad_norm": 0.0, - "learning_rate": 9.545213275505183e-06, - "loss": 0.773, + "learning_rate": 9.568772806929626e-06, + "loss": 0.8496, "step": 18644 }, { - "epoch": 0.5290862656072645, + "epoch": 0.5283516109835926, "grad_norm": 0.0, - "learning_rate": 9.544295150319817e-06, - "loss": 0.9277, + "learning_rate": 9.567855873225536e-06, + "loss": 0.9051, "step": 18645 }, { - "epoch": 0.5291146424517593, + "epoch": 0.5283799484258551, "grad_norm": 0.0, - "learning_rate": 9.543377028983812e-06, - "loss": 0.7964, + "learning_rate": 9.566938943161555e-06, + "loss": 0.7904, "step": 18646 }, { - "epoch": 0.5291430192962543, + "epoch": 0.5284082858681175, "grad_norm": 0.0, - "learning_rate": 9.542458911504917e-06, - "loss": 0.87, + "learning_rate": 9.566022016745406e-06, + "loss": 0.8706, "step": 18647 }, { - "epoch": 0.5291713961407492, + "epoch": 0.52843662331038, "grad_norm": 0.0, - "learning_rate": 9.541540797890892e-06, - "loss": 0.9286, + "learning_rate": 9.565105093984815e-06, + "loss": 0.8602, "step": 18648 }, { - "epoch": 0.529199772985244, + "epoch": 0.5284649607526425, "grad_norm": 0.0, - "learning_rate": 9.540622688149495e-06, - "loss": 1.0072, + "learning_rate": 9.564188174887505e-06, + "loss": 1.0027, "step": 18649 }, { - "epoch": 0.529228149829739, + "epoch": 0.528493298194905, "grad_norm": 0.0, - "learning_rate": 9.53970458228848e-06, - "loss": 0.8436, + "learning_rate": 9.563271259461206e-06, + "loss": 0.9341, "step": 18650 }, { - "epoch": 0.5292565266742338, + "epoch": 0.5285216356371674, "grad_norm": 0.0, - "learning_rate": 9.538786480315599e-06, - "loss": 0.8957, + "learning_rate": 9.562354347713628e-06, + "loss": 0.8687, "step": 18651 }, { - "epoch": 0.5292849035187287, + "epoch": 0.5285499730794299, "grad_norm": 0.0, - "learning_rate": 9.537868382238611e-06, - "loss": 0.968, + "learning_rate": 9.561437439652503e-06, + "loss": 0.8931, "step": 18652 }, { - "epoch": 0.5293132803632236, + "epoch": 0.5285783105216924, "grad_norm": 0.0, - "learning_rate": 9.53695028806527e-06, - "loss": 0.9896, + "learning_rate": 9.560520535285555e-06, + "loss": 0.8589, "step": 18653 }, { - "epoch": 0.5293416572077185, + "epoch": 0.5286066479639547, "grad_norm": 0.0, - "learning_rate": 9.536032197803329e-06, - "loss": 0.9742, + "learning_rate": 9.559603634620505e-06, + "loss": 0.9202, "step": 18654 }, { - "epoch": 0.5293700340522134, + "epoch": 0.5286349854062172, "grad_norm": 0.0, - "learning_rate": 9.535114111460547e-06, - "loss": 0.8907, + "learning_rate": 9.55868673766508e-06, + "loss": 0.829, "step": 18655 }, { - "epoch": 0.5293984108967082, + "epoch": 0.5286633228484797, "grad_norm": 0.0, - "learning_rate": 9.534196029044677e-06, - "loss": 0.8742, + "learning_rate": 9.557769844426997e-06, + "loss": 0.8473, "step": 18656 }, { - "epoch": 0.5294267877412032, + "epoch": 0.5286916602907421, "grad_norm": 0.0, - "learning_rate": 9.533277950563473e-06, - "loss": 0.7749, + "learning_rate": 9.556852954913983e-06, + "loss": 0.8293, "step": 18657 }, { - "epoch": 0.5294551645856981, + "epoch": 0.5287199977330046, "grad_norm": 0.0, - "learning_rate": 9.53235987602469e-06, - "loss": 0.8783, + "learning_rate": 9.555936069133765e-06, + "loss": 1.0084, "step": 18658 }, { - "epoch": 0.5294835414301929, + "epoch": 0.5287483351752671, "grad_norm": 0.0, - "learning_rate": 9.531441805436086e-06, - "loss": 0.9336, + "learning_rate": 9.555019187094058e-06, + "loss": 0.8669, "step": 18659 }, { - "epoch": 0.5295119182746879, + "epoch": 0.5287766726175296, "grad_norm": 0.0, - "learning_rate": 9.530523738805412e-06, - "loss": 0.7603, + "learning_rate": 9.55410230880259e-06, + "loss": 0.8041, "step": 18660 }, { - "epoch": 0.5295402951191828, + "epoch": 0.528805010059792, "grad_norm": 0.0, - "learning_rate": 9.529605676140426e-06, - "loss": 0.8298, + "learning_rate": 9.553185434267084e-06, + "loss": 0.8428, "step": 18661 }, { - "epoch": 0.5295686719636776, + "epoch": 0.5288333475020545, "grad_norm": 0.0, - "learning_rate": 9.528687617448884e-06, - "loss": 0.8556, + "learning_rate": 9.552268563495264e-06, + "loss": 0.8628, "step": 18662 }, { - "epoch": 0.5295970488081725, + "epoch": 0.528861684944317, "grad_norm": 0.0, - "learning_rate": 9.527769562738533e-06, - "loss": 0.9199, + "learning_rate": 9.551351696494854e-06, + "loss": 0.9615, "step": 18663 }, { - "epoch": 0.5296254256526675, + "epoch": 0.5288900223865793, "grad_norm": 0.0, - "learning_rate": 9.526851512017139e-06, - "loss": 0.8939, + "learning_rate": 9.550434833273572e-06, + "loss": 0.8487, "step": 18664 }, { - "epoch": 0.5296538024971623, + "epoch": 0.5289183598288418, "grad_norm": 0.0, - "learning_rate": 9.525933465292449e-06, - "loss": 0.8511, + "learning_rate": 9.549517973839143e-06, + "loss": 0.8026, "step": 18665 }, { - "epoch": 0.5296821793416572, + "epoch": 0.5289466972711043, "grad_norm": 0.0, - "learning_rate": 9.525015422572222e-06, - "loss": 0.9554, + "learning_rate": 9.548601118199297e-06, + "loss": 0.8409, "step": 18666 }, { - "epoch": 0.5297105561861521, + "epoch": 0.5289750347133668, "grad_norm": 0.0, - "learning_rate": 9.524097383864211e-06, - "loss": 0.8786, + "learning_rate": 9.547684266361746e-06, + "loss": 0.9625, "step": 18667 }, { - "epoch": 0.529738933030647, + "epoch": 0.5290033721556292, "grad_norm": 0.0, - "learning_rate": 9.52317934917617e-06, - "loss": 0.9705, + "learning_rate": 9.546767418334219e-06, + "loss": 0.9515, "step": 18668 }, { - "epoch": 0.5297673098751419, + "epoch": 0.5290317095978917, "grad_norm": 0.0, - "learning_rate": 9.522261318515856e-06, - "loss": 0.8426, + "learning_rate": 9.545850574124444e-06, + "loss": 0.9243, "step": 18669 }, { - "epoch": 0.5297956867196367, + "epoch": 0.5290600470401542, "grad_norm": 0.0, - "learning_rate": 9.521343291891022e-06, - "loss": 0.9349, + "learning_rate": 9.544933733740133e-06, + "loss": 0.9027, "step": 18670 }, { - "epoch": 0.5298240635641317, + "epoch": 0.5290883844824166, "grad_norm": 0.0, - "learning_rate": 9.52042526930942e-06, - "loss": 0.8138, + "learning_rate": 9.544016897189018e-06, + "loss": 0.8997, "step": 18671 }, { - "epoch": 0.5298524404086266, + "epoch": 0.5291167219246791, "grad_norm": 0.0, - "learning_rate": 9.51950725077881e-06, - "loss": 0.7675, + "learning_rate": 9.543100064478815e-06, + "loss": 0.9024, "step": 18672 }, { - "epoch": 0.5298808172531214, + "epoch": 0.5291450593669416, "grad_norm": 0.0, - "learning_rate": 9.518589236306945e-06, - "loss": 0.8356, + "learning_rate": 9.54218323561725e-06, + "loss": 0.821, "step": 18673 }, { - "epoch": 0.5299091940976164, + "epoch": 0.529173396809204, "grad_norm": 0.0, - "learning_rate": 9.517671225901574e-06, - "loss": 0.8964, + "learning_rate": 9.541266410612045e-06, + "loss": 0.8692, "step": 18674 }, { - "epoch": 0.5299375709421112, + "epoch": 0.5292017342514664, "grad_norm": 0.0, - "learning_rate": 9.516753219570459e-06, - "loss": 0.8487, + "learning_rate": 9.540349589470923e-06, + "loss": 0.9198, "step": 18675 }, { - "epoch": 0.5299659477866061, + "epoch": 0.5292300716937289, "grad_norm": 0.0, - "learning_rate": 9.515835217321351e-06, - "loss": 0.911, + "learning_rate": 9.539432772201606e-06, + "loss": 0.823, "step": 18676 }, { - "epoch": 0.5299943246311011, + "epoch": 0.5292584091359914, "grad_norm": 0.0, - "learning_rate": 9.514917219162004e-06, - "loss": 0.8476, + "learning_rate": 9.538515958811824e-06, + "loss": 0.7651, "step": 18677 }, { - "epoch": 0.5300227014755959, + "epoch": 0.5292867465782538, "grad_norm": 0.0, - "learning_rate": 9.513999225100173e-06, - "loss": 0.9198, + "learning_rate": 9.537599149309288e-06, + "loss": 0.8811, "step": 18678 }, { - "epoch": 0.5300510783200908, + "epoch": 0.5293150840205163, "grad_norm": 0.0, - "learning_rate": 9.51308123514361e-06, - "loss": 0.8723, + "learning_rate": 9.53668234370173e-06, + "loss": 0.9056, "step": 18679 }, { - "epoch": 0.5300794551645857, + "epoch": 0.5293434214627788, "grad_norm": 0.0, - "learning_rate": 9.512163249300074e-06, - "loss": 0.8384, + "learning_rate": 9.535765541996864e-06, + "loss": 0.8998, "step": 18680 }, { - "epoch": 0.5301078320090806, + "epoch": 0.5293717589050412, "grad_norm": 0.0, - "learning_rate": 9.51124526757732e-06, - "loss": 0.9274, + "learning_rate": 9.53484874420242e-06, + "loss": 0.8866, "step": 18681 }, { - "epoch": 0.5301362088535755, + "epoch": 0.5294000963473037, "grad_norm": 0.0, - "learning_rate": 9.510327289983095e-06, - "loss": 0.9294, + "learning_rate": 9.533931950326118e-06, + "loss": 0.8107, "step": 18682 }, { - "epoch": 0.5301645856980703, + "epoch": 0.5294284337895662, "grad_norm": 0.0, - "learning_rate": 9.509409316525159e-06, - "loss": 0.7343, + "learning_rate": 9.53301516037568e-06, + "loss": 0.8589, "step": 18683 }, { - "epoch": 0.5301929625425653, + "epoch": 0.5294567712318287, "grad_norm": 0.0, - "learning_rate": 9.508491347211266e-06, - "loss": 0.8856, + "learning_rate": 9.532098374358828e-06, + "loss": 0.867, "step": 18684 }, { - "epoch": 0.5302213393870602, + "epoch": 0.529485108674091, "grad_norm": 0.0, - "learning_rate": 9.507573382049166e-06, - "loss": 0.763, + "learning_rate": 9.531181592283285e-06, + "loss": 0.9006, "step": 18685 }, { - "epoch": 0.530249716231555, + "epoch": 0.5295134461163535, "grad_norm": 0.0, - "learning_rate": 9.506655421046617e-06, - "loss": 0.7407, + "learning_rate": 9.530264814156772e-06, + "loss": 0.9967, "step": 18686 }, { - "epoch": 0.5302780930760499, + "epoch": 0.529541783558616, "grad_norm": 0.0, - "learning_rate": 9.505737464211373e-06, - "loss": 0.9021, + "learning_rate": 9.52934803998702e-06, + "loss": 0.8327, "step": 18687 }, { - "epoch": 0.5303064699205449, + "epoch": 0.5295701210008784, "grad_norm": 0.0, - "learning_rate": 9.504819511551184e-06, - "loss": 0.8513, + "learning_rate": 9.528431269781737e-06, + "loss": 0.8651, "step": 18688 }, { - "epoch": 0.5303348467650397, + "epoch": 0.5295984584431409, "grad_norm": 0.0, - "learning_rate": 9.50390156307381e-06, - "loss": 0.9028, + "learning_rate": 9.52751450354865e-06, + "loss": 0.8224, "step": 18689 }, { - "epoch": 0.5303632236095346, + "epoch": 0.5296267958854034, "grad_norm": 0.0, - "learning_rate": 9.502983618787001e-06, - "loss": 0.8755, + "learning_rate": 9.526597741295493e-06, + "loss": 0.7591, "step": 18690 }, { - "epoch": 0.5303916004540296, + "epoch": 0.5296551333276659, "grad_norm": 0.0, - "learning_rate": 9.50206567869851e-06, - "loss": 0.8987, + "learning_rate": 9.525680983029973e-06, + "loss": 0.8896, "step": 18691 }, { - "epoch": 0.5304199772985244, + "epoch": 0.5296834707699283, "grad_norm": 0.0, - "learning_rate": 9.501147742816094e-06, - "loss": 0.8068, + "learning_rate": 9.524764228759822e-06, + "loss": 0.853, "step": 18692 }, { - "epoch": 0.5304483541430193, + "epoch": 0.5297118082121908, "grad_norm": 0.0, - "learning_rate": 9.500229811147506e-06, - "loss": 0.9306, + "learning_rate": 9.523847478492754e-06, + "loss": 0.8806, "step": 18693 }, { - "epoch": 0.5304767309875141, + "epoch": 0.5297401456544533, "grad_norm": 0.0, - "learning_rate": 9.499311883700494e-06, - "loss": 0.8975, + "learning_rate": 9.522930732236497e-06, + "loss": 0.7728, "step": 18694 }, { - "epoch": 0.5305051078320091, + "epoch": 0.5297684830967156, "grad_norm": 0.0, - "learning_rate": 9.49839396048282e-06, - "loss": 0.8624, + "learning_rate": 9.522013989998773e-06, + "loss": 0.9777, "step": 18695 }, { - "epoch": 0.530533484676504, + "epoch": 0.5297968205389781, "grad_norm": 0.0, - "learning_rate": 9.497476041502237e-06, - "loss": 0.9981, + "learning_rate": 9.521097251787302e-06, + "loss": 0.9363, "step": 18696 }, { - "epoch": 0.5305618615209988, + "epoch": 0.5298251579812406, "grad_norm": 0.0, - "learning_rate": 9.496558126766497e-06, - "loss": 0.8748, + "learning_rate": 9.520180517609806e-06, + "loss": 0.8676, "step": 18697 }, { - "epoch": 0.5305902383654938, + "epoch": 0.5298534954235031, "grad_norm": 0.0, - "learning_rate": 9.495640216283352e-06, - "loss": 0.8572, + "learning_rate": 9.519263787474008e-06, + "loss": 0.7851, "step": 18698 }, { - "epoch": 0.5306186152099887, + "epoch": 0.5298818328657655, "grad_norm": 0.0, - "learning_rate": 9.494722310060555e-06, - "loss": 0.8924, + "learning_rate": 9.518347061387629e-06, + "loss": 0.8578, "step": 18699 }, { - "epoch": 0.5306469920544835, + "epoch": 0.529910170308028, "grad_norm": 0.0, - "learning_rate": 9.493804408105863e-06, - "loss": 0.9556, + "learning_rate": 9.517430339358395e-06, + "loss": 0.8782, "step": 18700 }, { - "epoch": 0.5306753688989785, + "epoch": 0.5299385077502905, "grad_norm": 0.0, - "learning_rate": 9.49288651042703e-06, - "loss": 0.9344, + "learning_rate": 9.51651362139402e-06, + "loss": 0.8526, "step": 18701 }, { - "epoch": 0.5307037457434733, + "epoch": 0.5299668451925529, "grad_norm": 0.0, - "learning_rate": 9.491968617031803e-06, - "loss": 0.8165, + "learning_rate": 9.515596907502231e-06, + "loss": 0.8157, "step": 18702 }, { - "epoch": 0.5307321225879682, + "epoch": 0.5299951826348154, "grad_norm": 0.0, - "learning_rate": 9.491050727927943e-06, - "loss": 0.93, + "learning_rate": 9.514680197690753e-06, + "loss": 0.8619, "step": 18703 }, { - "epoch": 0.5307604994324631, + "epoch": 0.5300235200770779, "grad_norm": 0.0, - "learning_rate": 9.4901328431232e-06, - "loss": 0.9358, + "learning_rate": 9.5137634919673e-06, + "loss": 1.0096, "step": 18704 }, { - "epoch": 0.530788876276958, + "epoch": 0.5300518575193403, "grad_norm": 0.0, - "learning_rate": 9.489214962625327e-06, - "loss": 0.883, + "learning_rate": 9.512846790339598e-06, + "loss": 0.9353, "step": 18705 }, { - "epoch": 0.5308172531214529, + "epoch": 0.5300801949616027, "grad_norm": 0.0, - "learning_rate": 9.48829708644208e-06, - "loss": 0.9268, + "learning_rate": 9.51193009281537e-06, + "loss": 0.8702, "step": 18706 }, { - "epoch": 0.5308456299659478, + "epoch": 0.5301085324038652, "grad_norm": 0.0, - "learning_rate": 9.487379214581209e-06, - "loss": 0.7964, + "learning_rate": 9.511013399402333e-06, + "loss": 0.8893, "step": 18707 }, { - "epoch": 0.5308740068104427, + "epoch": 0.5301368698461277, "grad_norm": 0.0, - "learning_rate": 9.486461347050466e-06, - "loss": 0.6948, + "learning_rate": 9.510096710108218e-06, + "loss": 0.7776, "step": 18708 }, { - "epoch": 0.5309023836549376, + "epoch": 0.5301652072883901, "grad_norm": 0.0, - "learning_rate": 9.485543483857607e-06, - "loss": 0.863, + "learning_rate": 9.509180024940735e-06, + "loss": 0.9663, "step": 18709 }, { - "epoch": 0.5309307604994324, + "epoch": 0.5301935447306526, "grad_norm": 0.0, - "learning_rate": 9.484625625010388e-06, - "loss": 0.8576, + "learning_rate": 9.508263343907612e-06, + "loss": 0.9742, "step": 18710 }, { - "epoch": 0.5309591373439273, + "epoch": 0.5302218821729151, "grad_norm": 0.0, - "learning_rate": 9.483707770516561e-06, - "loss": 0.7971, + "learning_rate": 9.50734666701657e-06, + "loss": 0.8568, "step": 18711 }, { - "epoch": 0.5309875141884223, + "epoch": 0.5302502196151775, "grad_norm": 0.0, - "learning_rate": 9.482789920383876e-06, - "loss": 0.9563, + "learning_rate": 9.506429994275328e-06, + "loss": 0.8598, "step": 18712 }, { - "epoch": 0.5310158910329171, + "epoch": 0.53027855705744, "grad_norm": 0.0, - "learning_rate": 9.481872074620089e-06, - "loss": 0.9242, + "learning_rate": 9.505513325691615e-06, + "loss": 0.9174, "step": 18713 }, { - "epoch": 0.531044267877412, + "epoch": 0.5303068944997025, "grad_norm": 0.0, - "learning_rate": 9.480954233232952e-06, - "loss": 0.8511, + "learning_rate": 9.504596661273141e-06, + "loss": 0.9138, "step": 18714 }, { - "epoch": 0.531072644721907, + "epoch": 0.530335231941965, "grad_norm": 0.0, - "learning_rate": 9.480036396230216e-06, - "loss": 0.7632, + "learning_rate": 9.503680001027633e-06, + "loss": 0.853, "step": 18715 }, { - "epoch": 0.5311010215664018, + "epoch": 0.5303635693842274, "grad_norm": 0.0, - "learning_rate": 9.479118563619638e-06, - "loss": 0.8846, + "learning_rate": 9.502763344962818e-06, + "loss": 0.9001, "step": 18716 }, { - "epoch": 0.5311293984108967, + "epoch": 0.5303919068264898, "grad_norm": 0.0, - "learning_rate": 9.478200735408967e-06, - "loss": 0.8289, + "learning_rate": 9.501846693086408e-06, + "loss": 0.8167, "step": 18717 }, { - "epoch": 0.5311577752553917, + "epoch": 0.5304202442687523, "grad_norm": 0.0, - "learning_rate": 9.477282911605959e-06, - "loss": 0.832, + "learning_rate": 9.500930045406129e-06, + "loss": 0.9376, "step": 18718 }, { - "epoch": 0.5311861520998865, + "epoch": 0.5304485817110147, "grad_norm": 0.0, - "learning_rate": 9.476365092218365e-06, - "loss": 0.9509, + "learning_rate": 9.500013401929701e-06, + "loss": 0.944, "step": 18719 }, { - "epoch": 0.5312145289443814, + "epoch": 0.5304769191532772, "grad_norm": 0.0, - "learning_rate": 9.475447277253939e-06, - "loss": 0.8806, + "learning_rate": 9.499096762664845e-06, + "loss": 0.9143, "step": 18720 }, { - "epoch": 0.5312429057888762, + "epoch": 0.5305052565955397, "grad_norm": 0.0, - "learning_rate": 9.474529466720432e-06, - "loss": 1.0161, + "learning_rate": 9.498180127619288e-06, + "loss": 0.8075, "step": 18721 }, { - "epoch": 0.5312712826333712, + "epoch": 0.5305335940378022, "grad_norm": 0.0, - "learning_rate": 9.4736116606256e-06, - "loss": 0.7302, + "learning_rate": 9.497263496800741e-06, + "loss": 0.836, "step": 18722 }, { - "epoch": 0.5312996594778661, + "epoch": 0.5305619314800646, "grad_norm": 0.0, - "learning_rate": 9.472693858977195e-06, - "loss": 0.8754, + "learning_rate": 9.496346870216932e-06, + "loss": 0.9312, "step": 18723 }, { - "epoch": 0.5313280363223609, + "epoch": 0.5305902689223271, "grad_norm": 0.0, - "learning_rate": 9.471776061782968e-06, - "loss": 0.8988, + "learning_rate": 9.49543024787558e-06, + "loss": 0.9044, "step": 18724 }, { - "epoch": 0.5313564131668559, + "epoch": 0.5306186063645896, "grad_norm": 0.0, - "learning_rate": 9.470858269050669e-06, - "loss": 0.8251, + "learning_rate": 9.494513629784405e-06, + "loss": 0.7126, "step": 18725 }, { - "epoch": 0.5313847900113507, + "epoch": 0.530646943806852, "grad_norm": 0.0, - "learning_rate": 9.469940480788056e-06, - "loss": 0.8823, + "learning_rate": 9.49359701595113e-06, + "loss": 0.9209, "step": 18726 }, { - "epoch": 0.5314131668558456, + "epoch": 0.5306752812491144, "grad_norm": 0.0, - "learning_rate": 9.46902269700288e-06, - "loss": 0.9134, + "learning_rate": 9.49268040638348e-06, + "loss": 0.8435, "step": 18727 }, { - "epoch": 0.5314415437003405, + "epoch": 0.5307036186913769, "grad_norm": 0.0, - "learning_rate": 9.468104917702894e-06, - "loss": 0.9697, + "learning_rate": 9.491763801089167e-06, + "loss": 0.844, "step": 18728 }, { - "epoch": 0.5314699205448354, + "epoch": 0.5307319561336393, "grad_norm": 0.0, - "learning_rate": 9.467187142895848e-06, - "loss": 0.9284, + "learning_rate": 9.490847200075919e-06, + "loss": 0.8872, "step": 18729 }, { - "epoch": 0.5314982973893303, + "epoch": 0.5307602935759018, "grad_norm": 0.0, - "learning_rate": 9.466269372589498e-06, - "loss": 0.9482, + "learning_rate": 9.48993060335145e-06, + "loss": 0.843, "step": 18730 }, { - "epoch": 0.5315266742338252, + "epoch": 0.5307886310181643, "grad_norm": 0.0, - "learning_rate": 9.465351606791595e-06, - "loss": 0.8927, + "learning_rate": 9.489014010923486e-06, + "loss": 0.8723, "step": 18731 }, { - "epoch": 0.5315550510783201, + "epoch": 0.5308169684604268, "grad_norm": 0.0, - "learning_rate": 9.464433845509888e-06, - "loss": 0.7615, + "learning_rate": 9.48809742279975e-06, + "loss": 0.9957, "step": 18732 }, { - "epoch": 0.531583427922815, + "epoch": 0.5308453059026892, "grad_norm": 0.0, - "learning_rate": 9.463516088752135e-06, - "loss": 0.9078, + "learning_rate": 9.487180838987955e-06, + "loss": 0.973, "step": 18733 }, { - "epoch": 0.5316118047673098, + "epoch": 0.5308736433449517, "grad_norm": 0.0, - "learning_rate": 9.462598336526086e-06, - "loss": 0.9131, + "learning_rate": 9.486264259495827e-06, + "loss": 0.8672, "step": 18734 }, { - "epoch": 0.5316401816118048, + "epoch": 0.5309019807872142, "grad_norm": 0.0, - "learning_rate": 9.46168058883949e-06, - "loss": 0.8253, + "learning_rate": 9.48534768433109e-06, + "loss": 0.8797, "step": 18735 }, { - "epoch": 0.5316685584562997, + "epoch": 0.5309303182294766, "grad_norm": 0.0, - "learning_rate": 9.460762845700106e-06, - "loss": 0.8536, + "learning_rate": 9.484431113501458e-06, + "loss": 0.8512, "step": 18736 }, { - "epoch": 0.5316969353007945, + "epoch": 0.530958655671739, "grad_norm": 0.0, - "learning_rate": 9.459845107115682e-06, - "loss": 0.9332, + "learning_rate": 9.483514547014653e-06, + "loss": 0.9558, "step": 18737 }, { - "epoch": 0.5317253121452894, + "epoch": 0.5309869931140015, "grad_norm": 0.0, - "learning_rate": 9.458927373093967e-06, - "loss": 0.8429, + "learning_rate": 9.482597984878398e-06, + "loss": 0.8631, "step": 18738 }, { - "epoch": 0.5317536889897844, + "epoch": 0.531015330556264, "grad_norm": 0.0, - "learning_rate": 9.458009643642721e-06, - "loss": 0.877, + "learning_rate": 9.481681427100411e-06, + "loss": 0.8006, "step": 18739 }, { - "epoch": 0.5317820658342792, + "epoch": 0.5310436679985264, "grad_norm": 0.0, - "learning_rate": 9.457091918769685e-06, - "loss": 0.9725, + "learning_rate": 9.480764873688417e-06, + "loss": 0.8104, "step": 18740 }, { - "epoch": 0.5318104426787741, + "epoch": 0.5310720054407889, "grad_norm": 0.0, - "learning_rate": 9.456174198482623e-06, - "loss": 0.8599, + "learning_rate": 9.47984832465013e-06, + "loss": 0.9218, "step": 18741 }, { - "epoch": 0.5318388195232691, + "epoch": 0.5311003428830514, "grad_norm": 0.0, - "learning_rate": 9.455256482789285e-06, - "loss": 0.8795, + "learning_rate": 9.47893177999328e-06, + "loss": 0.9103, "step": 18742 }, { - "epoch": 0.5318671963677639, + "epoch": 0.5311286803253138, "grad_norm": 0.0, - "learning_rate": 9.454338771697414e-06, - "loss": 0.8784, + "learning_rate": 9.478015239725573e-06, + "loss": 0.7729, "step": 18743 }, { - "epoch": 0.5318955732122588, + "epoch": 0.5311570177675763, "grad_norm": 0.0, - "learning_rate": 9.453421065214773e-06, - "loss": 0.7863, + "learning_rate": 9.47709870385474e-06, + "loss": 0.9715, "step": 18744 }, { - "epoch": 0.5319239500567536, + "epoch": 0.5311853552098388, "grad_norm": 0.0, - "learning_rate": 9.452503363349107e-06, - "loss": 0.8207, + "learning_rate": 9.476182172388501e-06, + "loss": 0.9179, "step": 18745 }, { - "epoch": 0.5319523269012486, + "epoch": 0.5312136926521012, "grad_norm": 0.0, - "learning_rate": 9.451585666108168e-06, - "loss": 0.8234, + "learning_rate": 9.47526564533457e-06, + "loss": 0.9172, "step": 18746 }, { - "epoch": 0.5319807037457435, + "epoch": 0.5312420300943637, "grad_norm": 0.0, - "learning_rate": 9.450667973499712e-06, - "loss": 0.8861, + "learning_rate": 9.474349122700672e-06, + "loss": 0.8864, "step": 18747 }, { - "epoch": 0.5320090805902383, + "epoch": 0.5312703675366262, "grad_norm": 0.0, - "learning_rate": 9.449750285531488e-06, - "loss": 0.9239, + "learning_rate": 9.473432604494532e-06, + "loss": 0.8438, "step": 18748 }, { - "epoch": 0.5320374574347333, + "epoch": 0.5312987049788886, "grad_norm": 0.0, - "learning_rate": 9.448832602211246e-06, - "loss": 0.7606, + "learning_rate": 9.47251609072386e-06, + "loss": 0.9788, "step": 18749 }, { - "epoch": 0.5320658342792282, + "epoch": 0.531327042421151, "grad_norm": 0.0, - "learning_rate": 9.447914923546741e-06, - "loss": 0.9082, + "learning_rate": 9.471599581396383e-06, + "loss": 0.9257, "step": 18750 }, { - "epoch": 0.532094211123723, + "epoch": 0.5313553798634135, "grad_norm": 0.0, - "learning_rate": 9.446997249545725e-06, - "loss": 0.9042, + "learning_rate": 9.470683076519815e-06, + "loss": 0.8349, "step": 18751 }, { - "epoch": 0.532122587968218, + "epoch": 0.531383717305676, "grad_norm": 0.0, - "learning_rate": 9.446079580215946e-06, - "loss": 0.9689, + "learning_rate": 9.469766576101882e-06, + "loss": 0.9345, "step": 18752 }, { - "epoch": 0.5321509648127128, + "epoch": 0.5314120547479384, "grad_norm": 0.0, - "learning_rate": 9.445161915565159e-06, - "loss": 0.9357, + "learning_rate": 9.468850080150306e-06, + "loss": 0.8999, "step": 18753 }, { - "epoch": 0.5321793416572077, + "epoch": 0.5314403921902009, "grad_norm": 0.0, - "learning_rate": 9.444244255601115e-06, - "loss": 0.774, + "learning_rate": 9.467933588672799e-06, + "loss": 0.868, "step": 18754 }, { - "epoch": 0.5322077185017026, + "epoch": 0.5314687296324634, "grad_norm": 0.0, - "learning_rate": 9.443326600331562e-06, - "loss": 0.8575, + "learning_rate": 9.467017101677084e-06, + "loss": 0.9183, "step": 18755 }, { - "epoch": 0.5322360953461975, + "epoch": 0.5314970670747259, "grad_norm": 0.0, - "learning_rate": 9.44240894976425e-06, - "loss": 0.8917, + "learning_rate": 9.466100619170887e-06, + "loss": 0.9424, "step": 18756 }, { - "epoch": 0.5322644721906924, + "epoch": 0.5315254045169883, "grad_norm": 0.0, - "learning_rate": 9.441491303906942e-06, - "loss": 0.9226, + "learning_rate": 9.465184141161918e-06, + "loss": 0.9097, "step": 18757 }, { - "epoch": 0.5322928490351873, + "epoch": 0.5315537419592508, "grad_norm": 0.0, - "learning_rate": 9.440573662767382e-06, - "loss": 0.8099, + "learning_rate": 9.464267667657903e-06, + "loss": 0.8818, "step": 18758 }, { - "epoch": 0.5323212258796822, + "epoch": 0.5315820794015133, "grad_norm": 0.0, - "learning_rate": 9.439656026353318e-06, - "loss": 0.8332, + "learning_rate": 9.46335119866656e-06, + "loss": 0.8978, "step": 18759 }, { - "epoch": 0.5323496027241771, + "epoch": 0.5316104168437756, "grad_norm": 0.0, - "learning_rate": 9.438738394672507e-06, - "loss": 0.9683, + "learning_rate": 9.462434734195608e-06, + "loss": 0.8596, "step": 18760 }, { - "epoch": 0.5323779795686719, + "epoch": 0.5316387542860381, "grad_norm": 0.0, - "learning_rate": 9.437820767732699e-06, - "loss": 0.7871, + "learning_rate": 9.461518274252772e-06, + "loss": 0.8996, "step": 18761 }, { - "epoch": 0.5324063564131668, + "epoch": 0.5316670917283006, "grad_norm": 0.0, - "learning_rate": 9.436903145541642e-06, - "loss": 0.9181, + "learning_rate": 9.460601818845764e-06, + "loss": 0.8487, "step": 18762 }, { - "epoch": 0.5324347332576618, + "epoch": 0.5316954291705631, "grad_norm": 0.0, - "learning_rate": 9.43598552810709e-06, - "loss": 0.8481, + "learning_rate": 9.459685367982308e-06, + "loss": 0.8834, "step": 18763 }, { - "epoch": 0.5324631101021566, + "epoch": 0.5317237666128255, "grad_norm": 0.0, - "learning_rate": 9.435067915436796e-06, - "loss": 0.8082, + "learning_rate": 9.458768921670124e-06, + "loss": 0.9652, "step": 18764 }, { - "epoch": 0.5324914869466515, + "epoch": 0.531752104055088, "grad_norm": 0.0, - "learning_rate": 9.434150307538507e-06, - "loss": 0.9184, + "learning_rate": 9.457852479916927e-06, + "loss": 0.9358, "step": 18765 }, { - "epoch": 0.5325198637911465, + "epoch": 0.5317804414973505, "grad_norm": 0.0, - "learning_rate": 9.433232704419974e-06, - "loss": 0.7525, + "learning_rate": 9.456936042730445e-06, + "loss": 0.8842, "step": 18766 }, { - "epoch": 0.5325482406356413, + "epoch": 0.5318087789396129, "grad_norm": 0.0, - "learning_rate": 9.432315106088951e-06, - "loss": 0.8082, + "learning_rate": 9.456019610118388e-06, + "loss": 0.9457, "step": 18767 }, { - "epoch": 0.5325766174801362, + "epoch": 0.5318371163818754, "grad_norm": 0.0, - "learning_rate": 9.431397512553188e-06, - "loss": 0.8714, + "learning_rate": 9.45510318208848e-06, + "loss": 0.8254, "step": 18768 }, { - "epoch": 0.5326049943246312, + "epoch": 0.5318654538241379, "grad_norm": 0.0, - "learning_rate": 9.430479923820434e-06, - "loss": 0.9151, + "learning_rate": 9.454186758648444e-06, + "loss": 0.9063, "step": 18769 }, { - "epoch": 0.532633371169126, + "epoch": 0.5318937912664002, "grad_norm": 0.0, - "learning_rate": 9.429562339898446e-06, - "loss": 0.8275, + "learning_rate": 9.453270339805992e-06, + "loss": 0.951, "step": 18770 }, { - "epoch": 0.5326617480136209, + "epoch": 0.5319221287086627, "grad_norm": 0.0, - "learning_rate": 9.428644760794965e-06, - "loss": 0.7993, + "learning_rate": 9.452353925568849e-06, + "loss": 0.9572, "step": 18771 }, { - "epoch": 0.5326901248581157, + "epoch": 0.5319504661509252, "grad_norm": 0.0, - "learning_rate": 9.42772718651775e-06, - "loss": 0.9077, + "learning_rate": 9.451437515944731e-06, + "loss": 0.8939, "step": 18772 }, { - "epoch": 0.5327185017026107, + "epoch": 0.5319788035931877, "grad_norm": 0.0, - "learning_rate": 9.426809617074548e-06, - "loss": 0.8345, + "learning_rate": 9.450521110941356e-06, + "loss": 0.7906, "step": 18773 }, { - "epoch": 0.5327468785471056, + "epoch": 0.5320071410354501, "grad_norm": 0.0, - "learning_rate": 9.425892052473114e-06, - "loss": 0.8527, + "learning_rate": 9.449604710566452e-06, + "loss": 0.8386, "step": 18774 }, { - "epoch": 0.5327752553916004, + "epoch": 0.5320354784777126, "grad_norm": 0.0, - "learning_rate": 9.424974492721194e-06, - "loss": 0.915, + "learning_rate": 9.448688314827727e-06, + "loss": 0.8027, "step": 18775 }, { - "epoch": 0.5328036322360954, + "epoch": 0.5320638159199751, "grad_norm": 0.0, - "learning_rate": 9.424056937826539e-06, - "loss": 0.8425, + "learning_rate": 9.447771923732906e-06, + "loss": 0.8354, "step": 18776 }, { - "epoch": 0.5328320090805903, + "epoch": 0.5320921533622375, "grad_norm": 0.0, - "learning_rate": 9.423139387796904e-06, - "loss": 0.7743, + "learning_rate": 9.446855537289708e-06, + "loss": 0.869, "step": 18777 }, { - "epoch": 0.5328603859250851, + "epoch": 0.5321204908045, "grad_norm": 0.0, - "learning_rate": 9.422221842640035e-06, - "loss": 0.8466, + "learning_rate": 9.44593915550585e-06, + "loss": 0.8876, "step": 18778 }, { - "epoch": 0.53288876276958, + "epoch": 0.5321488282467625, "grad_norm": 0.0, - "learning_rate": 9.421304302363683e-06, - "loss": 0.9115, + "learning_rate": 9.445022778389057e-06, + "loss": 0.7986, "step": 18779 }, { - "epoch": 0.5329171396140749, + "epoch": 0.532177165689025, "grad_norm": 0.0, - "learning_rate": 9.420386766975602e-06, - "loss": 0.8299, + "learning_rate": 9.444106405947038e-06, + "loss": 0.9072, "step": 18780 }, { - "epoch": 0.5329455164585698, + "epoch": 0.5322055031312873, "grad_norm": 0.0, - "learning_rate": 9.419469236483539e-06, - "loss": 0.7765, + "learning_rate": 9.443190038187517e-06, + "loss": 1.0038, "step": 18781 }, { - "epoch": 0.5329738933030647, + "epoch": 0.5322338405735498, "grad_norm": 0.0, - "learning_rate": 9.418551710895243e-06, - "loss": 0.8546, + "learning_rate": 9.442273675118218e-06, + "loss": 0.8272, "step": 18782 }, { - "epoch": 0.5330022701475596, + "epoch": 0.5322621780158123, "grad_norm": 0.0, - "learning_rate": 9.41763419021847e-06, - "loss": 0.918, + "learning_rate": 9.441357316746849e-06, + "loss": 0.9501, "step": 18783 }, { - "epoch": 0.5330306469920545, + "epoch": 0.5322905154580747, "grad_norm": 0.0, - "learning_rate": 9.416716674460966e-06, - "loss": 0.9901, + "learning_rate": 9.440440963081136e-06, + "loss": 0.9523, "step": 18784 }, { - "epoch": 0.5330590238365494, + "epoch": 0.5323188529003372, "grad_norm": 0.0, - "learning_rate": 9.415799163630482e-06, - "loss": 0.9661, + "learning_rate": 9.4395246141288e-06, + "loss": 0.8608, "step": 18785 }, { - "epoch": 0.5330874006810443, + "epoch": 0.5323471903425997, "grad_norm": 0.0, - "learning_rate": 9.414881657734768e-06, - "loss": 0.871, + "learning_rate": 9.438608269897552e-06, + "loss": 0.9751, "step": 18786 }, { - "epoch": 0.5331157775255392, + "epoch": 0.5323755277848622, "grad_norm": 0.0, - "learning_rate": 9.413964156781575e-06, - "loss": 0.8559, + "learning_rate": 9.437691930395121e-06, + "loss": 1.0103, "step": 18787 }, { - "epoch": 0.533144154370034, + "epoch": 0.5324038652271246, "grad_norm": 0.0, - "learning_rate": 9.413046660778654e-06, - "loss": 0.8922, + "learning_rate": 9.436775595629214e-06, + "loss": 0.8579, "step": 18788 }, { - "epoch": 0.5331725312145289, + "epoch": 0.5324322026693871, "grad_norm": 0.0, - "learning_rate": 9.412129169733756e-06, - "loss": 0.931, + "learning_rate": 9.435859265607555e-06, + "loss": 0.8704, "step": 18789 }, { - "epoch": 0.5332009080590239, + "epoch": 0.5324605401116496, "grad_norm": 0.0, - "learning_rate": 9.411211683654626e-06, - "loss": 0.848, + "learning_rate": 9.434942940337867e-06, + "loss": 0.9011, "step": 18790 }, { - "epoch": 0.5332292849035187, + "epoch": 0.5324888775539119, "grad_norm": 0.0, - "learning_rate": 9.410294202549019e-06, - "loss": 0.8336, + "learning_rate": 9.434026619827861e-06, + "loss": 0.9499, "step": 18791 }, { - "epoch": 0.5332576617480136, + "epoch": 0.5325172149961744, "grad_norm": 0.0, - "learning_rate": 9.409376726424686e-06, - "loss": 0.7962, + "learning_rate": 9.433110304085259e-06, + "loss": 0.8759, "step": 18792 }, { - "epoch": 0.5332860385925086, + "epoch": 0.5325455524384369, "grad_norm": 0.0, - "learning_rate": 9.40845925528937e-06, - "loss": 0.9471, + "learning_rate": 9.432193993117784e-06, + "loss": 1.0055, "step": 18793 }, { - "epoch": 0.5333144154370034, + "epoch": 0.5325738898806993, "grad_norm": 0.0, - "learning_rate": 9.407541789150829e-06, - "loss": 0.8018, + "learning_rate": 9.431277686933145e-06, + "loss": 0.8213, "step": 18794 }, { - "epoch": 0.5333427922814983, + "epoch": 0.5326022273229618, "grad_norm": 0.0, - "learning_rate": 9.406624328016807e-06, - "loss": 0.8757, + "learning_rate": 9.430361385539069e-06, + "loss": 0.9024, "step": 18795 }, { - "epoch": 0.5333711691259931, + "epoch": 0.5326305647652243, "grad_norm": 0.0, - "learning_rate": 9.405706871895055e-06, - "loss": 0.9409, + "learning_rate": 9.429445088943267e-06, + "loss": 0.9345, "step": 18796 }, { - "epoch": 0.5333995459704881, + "epoch": 0.5326589022074868, "grad_norm": 0.0, - "learning_rate": 9.404789420793327e-06, - "loss": 0.9562, + "learning_rate": 9.428528797153462e-06, + "loss": 0.8009, "step": 18797 }, { - "epoch": 0.533427922814983, + "epoch": 0.5326872396497492, "grad_norm": 0.0, - "learning_rate": 9.403871974719368e-06, - "loss": 0.8625, + "learning_rate": 9.427612510177374e-06, + "loss": 0.9678, "step": 18798 }, { - "epoch": 0.5334562996594778, + "epoch": 0.5327155770920117, "grad_norm": 0.0, - "learning_rate": 9.402954533680927e-06, - "loss": 0.8089, + "learning_rate": 9.426696228022714e-06, + "loss": 0.9184, "step": 18799 }, { - "epoch": 0.5334846765039728, + "epoch": 0.5327439145342742, "grad_norm": 0.0, - "learning_rate": 9.402037097685759e-06, - "loss": 0.8371, + "learning_rate": 9.42577995069721e-06, + "loss": 0.8546, "step": 18800 }, { - "epoch": 0.5335130533484677, + "epoch": 0.5327722519765365, "grad_norm": 0.0, - "learning_rate": 9.40111966674161e-06, - "loss": 0.8194, + "learning_rate": 9.424863678208568e-06, + "loss": 0.839, "step": 18801 }, { - "epoch": 0.5335414301929625, + "epoch": 0.532800589418799, "grad_norm": 0.0, - "learning_rate": 9.400202240856225e-06, - "loss": 0.9808, + "learning_rate": 9.423947410564516e-06, + "loss": 0.9247, "step": 18802 }, { - "epoch": 0.5335698070374575, + "epoch": 0.5328289268610615, "grad_norm": 0.0, - "learning_rate": 9.399284820037361e-06, - "loss": 0.945, + "learning_rate": 9.42303114777277e-06, + "loss": 0.9522, "step": 18803 }, { - "epoch": 0.5335981838819523, + "epoch": 0.532857264303324, "grad_norm": 0.0, - "learning_rate": 9.398367404292769e-06, - "loss": 0.8474, + "learning_rate": 9.422114889841045e-06, + "loss": 0.8212, "step": 18804 }, { - "epoch": 0.5336265607264472, + "epoch": 0.5328856017455864, "grad_norm": 0.0, - "learning_rate": 9.397449993630193e-06, - "loss": 0.8386, + "learning_rate": 9.421198636777059e-06, + "loss": 0.8353, "step": 18805 }, { - "epoch": 0.5336549375709421, + "epoch": 0.5329139391878489, "grad_norm": 0.0, - "learning_rate": 9.396532588057384e-06, - "loss": 0.9619, + "learning_rate": 9.420282388588539e-06, + "loss": 0.922, "step": 18806 }, { - "epoch": 0.533683314415437, + "epoch": 0.5329422766301114, "grad_norm": 0.0, - "learning_rate": 9.39561518758209e-06, - "loss": 0.8281, + "learning_rate": 9.419366145283188e-06, + "loss": 0.891, "step": 18807 }, { - "epoch": 0.5337116912599319, + "epoch": 0.5329706140723738, "grad_norm": 0.0, - "learning_rate": 9.394697792212064e-06, - "loss": 0.7737, + "learning_rate": 9.418449906868736e-06, + "loss": 0.905, "step": 18808 }, { - "epoch": 0.5337400681044268, + "epoch": 0.5329989515146363, "grad_norm": 0.0, - "learning_rate": 9.393780401955053e-06, - "loss": 0.9036, + "learning_rate": 9.417533673352895e-06, + "loss": 0.9189, "step": 18809 }, { - "epoch": 0.5337684449489217, + "epoch": 0.5330272889568988, "grad_norm": 0.0, - "learning_rate": 9.392863016818803e-06, - "loss": 0.8912, + "learning_rate": 9.416617444743382e-06, + "loss": 0.9219, "step": 18810 }, { - "epoch": 0.5337968217934166, + "epoch": 0.5330556263991613, "grad_norm": 0.0, - "learning_rate": 9.391945636811068e-06, - "loss": 0.8642, + "learning_rate": 9.415701221047923e-06, + "loss": 0.8972, "step": 18811 }, { - "epoch": 0.5338251986379114, + "epoch": 0.5330839638414236, "grad_norm": 0.0, - "learning_rate": 9.391028261939598e-06, - "loss": 0.9692, + "learning_rate": 9.414785002274225e-06, + "loss": 0.9362, "step": 18812 }, { - "epoch": 0.5338535754824063, + "epoch": 0.5331123012836861, "grad_norm": 0.0, - "learning_rate": 9.390110892212135e-06, - "loss": 0.8824, + "learning_rate": 9.41386878843001e-06, + "loss": 0.9163, "step": 18813 }, { - "epoch": 0.5338819523269013, + "epoch": 0.5331406387259486, "grad_norm": 0.0, - "learning_rate": 9.389193527636436e-06, - "loss": 0.778, + "learning_rate": 9.412952579523e-06, + "loss": 0.8846, "step": 18814 }, { - "epoch": 0.5339103291713961, + "epoch": 0.533168976168211, "grad_norm": 0.0, - "learning_rate": 9.388276168220248e-06, - "loss": 0.9566, + "learning_rate": 9.412036375560903e-06, + "loss": 0.9919, "step": 18815 }, { - "epoch": 0.533938706015891, + "epoch": 0.5331973136104735, "grad_norm": 0.0, - "learning_rate": 9.387358813971314e-06, - "loss": 0.955, + "learning_rate": 9.411120176551445e-06, + "loss": 0.8109, "step": 18816 }, { - "epoch": 0.533967082860386, + "epoch": 0.533225651052736, "grad_norm": 0.0, - "learning_rate": 9.386441464897387e-06, - "loss": 0.9772, + "learning_rate": 9.410203982502338e-06, + "loss": 0.8682, "step": 18817 }, { - "epoch": 0.5339954597048808, + "epoch": 0.5332539884949984, "grad_norm": 0.0, - "learning_rate": 9.385524121006221e-06, - "loss": 0.8459, + "learning_rate": 9.409287793421302e-06, + "loss": 0.9149, "step": 18818 }, { - "epoch": 0.5340238365493757, + "epoch": 0.5332823259372609, "grad_norm": 0.0, - "learning_rate": 9.38460678230556e-06, - "loss": 0.8191, + "learning_rate": 9.40837160931606e-06, + "loss": 0.8965, "step": 18819 }, { - "epoch": 0.5340522133938707, + "epoch": 0.5333106633795234, "grad_norm": 0.0, - "learning_rate": 9.383689448803151e-06, - "loss": 0.8171, + "learning_rate": 9.407455430194318e-06, + "loss": 0.8468, "step": 18820 }, { - "epoch": 0.5340805902383655, + "epoch": 0.5333390008217859, "grad_norm": 0.0, - "learning_rate": 9.382772120506747e-06, - "loss": 0.7345, + "learning_rate": 9.4065392560638e-06, + "loss": 1.0217, "step": 18821 }, { - "epoch": 0.5341089670828604, + "epoch": 0.5333673382640483, "grad_norm": 0.0, - "learning_rate": 9.381854797424094e-06, - "loss": 0.7916, + "learning_rate": 9.405623086932223e-06, + "loss": 0.8091, "step": 18822 }, { - "epoch": 0.5341373439273552, + "epoch": 0.5333956757063107, "grad_norm": 0.0, - "learning_rate": 9.380937479562941e-06, - "loss": 0.8869, + "learning_rate": 9.404706922807301e-06, + "loss": 0.8665, "step": 18823 }, { - "epoch": 0.5341657207718502, + "epoch": 0.5334240131485732, "grad_norm": 0.0, - "learning_rate": 9.380020166931037e-06, - "loss": 0.8544, + "learning_rate": 9.40379076369676e-06, + "loss": 0.8207, "step": 18824 }, { - "epoch": 0.5341940976163451, + "epoch": 0.5334523505908356, "grad_norm": 0.0, - "learning_rate": 9.379102859536131e-06, - "loss": 0.8158, + "learning_rate": 9.402874609608304e-06, + "loss": 0.9673, "step": 18825 }, { - "epoch": 0.5342224744608399, + "epoch": 0.5334806880330981, "grad_norm": 0.0, - "learning_rate": 9.37818555738597e-06, - "loss": 0.9271, + "learning_rate": 9.401958460549658e-06, + "loss": 0.9257, "step": 18826 }, { - "epoch": 0.5342508513053349, + "epoch": 0.5335090254753606, "grad_norm": 0.0, - "learning_rate": 9.377268260488305e-06, - "loss": 0.7924, + "learning_rate": 9.401042316528542e-06, + "loss": 0.9156, "step": 18827 }, { - "epoch": 0.5342792281498298, + "epoch": 0.5335373629176231, "grad_norm": 0.0, - "learning_rate": 9.376350968850883e-06, - "loss": 0.8823, + "learning_rate": 9.400126177552666e-06, + "loss": 0.8289, "step": 18828 }, { - "epoch": 0.5343076049943246, + "epoch": 0.5335657003598855, "grad_norm": 0.0, - "learning_rate": 9.375433682481451e-06, - "loss": 0.7688, + "learning_rate": 9.399210043629751e-06, + "loss": 0.8977, "step": 18829 }, { - "epoch": 0.5343359818388195, + "epoch": 0.533594037802148, "grad_norm": 0.0, - "learning_rate": 9.37451640138776e-06, - "loss": 0.8975, + "learning_rate": 9.398293914767511e-06, + "loss": 0.9302, "step": 18830 }, { - "epoch": 0.5343643586833144, + "epoch": 0.5336223752444105, "grad_norm": 0.0, - "learning_rate": 9.373599125577557e-06, - "loss": 0.8086, + "learning_rate": 9.397377790973664e-06, + "loss": 0.8858, "step": 18831 }, { - "epoch": 0.5343927355278093, + "epoch": 0.5336507126866729, "grad_norm": 0.0, - "learning_rate": 9.372681855058587e-06, - "loss": 0.8561, + "learning_rate": 9.396461672255934e-06, + "loss": 0.9523, "step": 18832 }, { - "epoch": 0.5344211123723042, + "epoch": 0.5336790501289354, "grad_norm": 0.0, - "learning_rate": 9.371764589838605e-06, - "loss": 0.8992, + "learning_rate": 9.395545558622025e-06, + "loss": 0.9158, "step": 18833 }, { - "epoch": 0.5344494892167991, + "epoch": 0.5337073875711978, "grad_norm": 0.0, - "learning_rate": 9.370847329925354e-06, - "loss": 0.7994, + "learning_rate": 9.394629450079661e-06, + "loss": 0.886, "step": 18834 }, { - "epoch": 0.534477866061294, + "epoch": 0.5337357250134603, "grad_norm": 0.0, - "learning_rate": 9.369930075326586e-06, - "loss": 0.9971, + "learning_rate": 9.39371334663656e-06, + "loss": 0.8664, "step": 18835 }, { - "epoch": 0.5345062429057889, + "epoch": 0.5337640624557227, "grad_norm": 0.0, - "learning_rate": 9.369012826050046e-06, - "loss": 0.9794, + "learning_rate": 9.392797248300435e-06, + "loss": 0.8401, "step": 18836 }, { - "epoch": 0.5345346197502838, + "epoch": 0.5337923998979852, "grad_norm": 0.0, - "learning_rate": 9.368095582103482e-06, - "loss": 0.9159, + "learning_rate": 9.391881155079007e-06, + "loss": 0.8554, "step": 18837 }, { - "epoch": 0.5345629965947787, + "epoch": 0.5338207373402477, "grad_norm": 0.0, - "learning_rate": 9.367178343494644e-06, - "loss": 0.9811, + "learning_rate": 9.390965066979987e-06, + "loss": 0.8497, "step": 18838 }, { - "epoch": 0.5345913734392735, + "epoch": 0.5338490747825101, "grad_norm": 0.0, - "learning_rate": 9.36626111023128e-06, - "loss": 0.867, + "learning_rate": 9.390048984011095e-06, + "loss": 0.8703, "step": 18839 }, { - "epoch": 0.5346197502837684, + "epoch": 0.5338774122247726, "grad_norm": 0.0, - "learning_rate": 9.365343882321134e-06, - "loss": 0.771, + "learning_rate": 9.38913290618005e-06, + "loss": 0.7805, "step": 18840 }, { - "epoch": 0.5346481271282634, + "epoch": 0.5339057496670351, "grad_norm": 0.0, - "learning_rate": 9.36442665977196e-06, - "loss": 0.8448, + "learning_rate": 9.388216833494562e-06, + "loss": 0.8673, "step": 18841 }, { - "epoch": 0.5346765039727582, + "epoch": 0.5339340871092975, "grad_norm": 0.0, - "learning_rate": 9.363509442591502e-06, - "loss": 0.7228, + "learning_rate": 9.38730076596235e-06, + "loss": 1.0151, "step": 18842 }, { - "epoch": 0.5347048808172531, + "epoch": 0.53396242455156, "grad_norm": 0.0, - "learning_rate": 9.362592230787507e-06, - "loss": 0.8186, + "learning_rate": 9.386384703591133e-06, + "loss": 0.8026, "step": 18843 }, { - "epoch": 0.5347332576617481, + "epoch": 0.5339907619938224, "grad_norm": 0.0, - "learning_rate": 9.361675024367724e-06, - "loss": 0.8709, + "learning_rate": 9.385468646388624e-06, + "loss": 1.0521, "step": 18844 }, { - "epoch": 0.5347616345062429, + "epoch": 0.5340190994360849, "grad_norm": 0.0, - "learning_rate": 9.360757823339904e-06, - "loss": 0.8866, + "learning_rate": 9.384552594362545e-06, + "loss": 0.9384, "step": 18845 }, { - "epoch": 0.5347900113507378, + "epoch": 0.5340474368783473, "grad_norm": 0.0, - "learning_rate": 9.359840627711788e-06, - "loss": 0.8922, + "learning_rate": 9.383636547520604e-06, + "loss": 0.8319, "step": 18846 }, { - "epoch": 0.5348183881952326, + "epoch": 0.5340757743206098, "grad_norm": 0.0, - "learning_rate": 9.358923437491127e-06, - "loss": 0.8518, + "learning_rate": 9.382720505870521e-06, + "loss": 0.7997, "step": 18847 }, { - "epoch": 0.5348467650397276, + "epoch": 0.5341041117628723, "grad_norm": 0.0, - "learning_rate": 9.358006252685667e-06, - "loss": 0.8796, + "learning_rate": 9.381804469420015e-06, + "loss": 0.7847, "step": 18848 }, { - "epoch": 0.5348751418842225, + "epoch": 0.5341324492051347, "grad_norm": 0.0, - "learning_rate": 9.35708907330316e-06, - "loss": 0.8169, + "learning_rate": 9.380888438176797e-06, + "loss": 0.8305, "step": 18849 }, { - "epoch": 0.5349035187287173, + "epoch": 0.5341607866473972, "grad_norm": 0.0, - "learning_rate": 9.356171899351351e-06, - "loss": 0.8351, + "learning_rate": 9.379972412148585e-06, + "loss": 0.823, "step": 18850 }, { - "epoch": 0.5349318955732123, + "epoch": 0.5341891240896597, "grad_norm": 0.0, - "learning_rate": 9.355254730837983e-06, - "loss": 0.8573, + "learning_rate": 9.3790563913431e-06, + "loss": 0.8796, "step": 18851 }, { - "epoch": 0.5349602724177072, + "epoch": 0.5342174615319222, "grad_norm": 0.0, - "learning_rate": 9.354337567770812e-06, - "loss": 0.844, + "learning_rate": 9.37814037576805e-06, + "loss": 0.8347, "step": 18852 }, { - "epoch": 0.534988649262202, + "epoch": 0.5342457989741846, "grad_norm": 0.0, - "learning_rate": 9.353420410157579e-06, - "loss": 0.9974, + "learning_rate": 9.377224365431158e-06, + "loss": 0.8756, "step": 18853 }, { - "epoch": 0.5350170261066969, + "epoch": 0.534274136416447, "grad_norm": 0.0, - "learning_rate": 9.352503258006031e-06, - "loss": 0.8398, + "learning_rate": 9.376308360340132e-06, + "loss": 0.8455, "step": 18854 }, { - "epoch": 0.5350454029511919, + "epoch": 0.5343024738587095, "grad_norm": 0.0, - "learning_rate": 9.351586111323921e-06, - "loss": 0.794, + "learning_rate": 9.375392360502694e-06, + "loss": 0.9026, "step": 18855 }, { - "epoch": 0.5350737797956867, + "epoch": 0.5343308113009719, "grad_norm": 0.0, - "learning_rate": 9.35066897011899e-06, - "loss": 0.8525, + "learning_rate": 9.374476365926559e-06, + "loss": 0.8993, "step": 18856 }, { - "epoch": 0.5351021566401816, + "epoch": 0.5343591487432344, "grad_norm": 0.0, - "learning_rate": 9.349751834398986e-06, - "loss": 0.9659, + "learning_rate": 9.37356037661944e-06, + "loss": 0.7322, "step": 18857 }, { - "epoch": 0.5351305334846765, + "epoch": 0.5343874861854969, "grad_norm": 0.0, - "learning_rate": 9.34883470417166e-06, - "loss": 0.8988, + "learning_rate": 9.372644392589059e-06, + "loss": 0.9596, "step": 18858 }, { - "epoch": 0.5351589103291714, + "epoch": 0.5344158236277594, "grad_norm": 0.0, - "learning_rate": 9.347917579444755e-06, - "loss": 0.8306, + "learning_rate": 9.371728413843122e-06, + "loss": 0.8976, "step": 18859 }, { - "epoch": 0.5351872871736663, + "epoch": 0.5344441610700218, "grad_norm": 0.0, - "learning_rate": 9.34700046022602e-06, - "loss": 0.8837, + "learning_rate": 9.370812440389351e-06, + "loss": 0.9409, "step": 18860 }, { - "epoch": 0.5352156640181612, + "epoch": 0.5344724985122843, "grad_norm": 0.0, - "learning_rate": 9.3460833465232e-06, - "loss": 0.7925, + "learning_rate": 9.369896472235463e-06, + "loss": 0.9226, "step": 18861 }, { - "epoch": 0.5352440408626561, + "epoch": 0.5345008359545468, "grad_norm": 0.0, - "learning_rate": 9.345166238344047e-06, - "loss": 0.9166, + "learning_rate": 9.368980509389167e-06, + "loss": 0.9453, "step": 18862 }, { - "epoch": 0.535272417707151, + "epoch": 0.5345291733968092, "grad_norm": 0.0, - "learning_rate": 9.344249135696298e-06, - "loss": 0.9068, + "learning_rate": 9.368064551858183e-06, + "loss": 0.8316, "step": 18863 }, { - "epoch": 0.5353007945516458, + "epoch": 0.5345575108390717, "grad_norm": 0.0, - "learning_rate": 9.343332038587707e-06, - "loss": 0.8889, + "learning_rate": 9.367148599650231e-06, + "loss": 0.941, "step": 18864 }, { - "epoch": 0.5353291713961408, + "epoch": 0.5345858482813342, "grad_norm": 0.0, - "learning_rate": 9.342414947026026e-06, - "loss": 0.9598, + "learning_rate": 9.366232652773016e-06, + "loss": 0.8458, "step": 18865 }, { - "epoch": 0.5353575482406356, + "epoch": 0.5346141857235965, "grad_norm": 0.0, - "learning_rate": 9.341497861018991e-06, - "loss": 0.9057, + "learning_rate": 9.365316711234264e-06, + "loss": 0.8564, "step": 18866 }, { - "epoch": 0.5353859250851305, + "epoch": 0.534642523165859, "grad_norm": 0.0, - "learning_rate": 9.340580780574355e-06, - "loss": 0.8765, + "learning_rate": 9.36440077504168e-06, + "loss": 0.9056, "step": 18867 }, { - "epoch": 0.5354143019296255, + "epoch": 0.5346708606081215, "grad_norm": 0.0, - "learning_rate": 9.339663705699862e-06, - "loss": 0.8871, + "learning_rate": 9.363484844202985e-06, + "loss": 0.8738, "step": 18868 }, { - "epoch": 0.5354426787741203, + "epoch": 0.534699198050384, "grad_norm": 0.0, - "learning_rate": 9.33874663640326e-06, - "loss": 0.9327, + "learning_rate": 9.362568918725895e-06, + "loss": 0.9518, "step": 18869 }, { - "epoch": 0.5354710556186152, + "epoch": 0.5347275354926464, "grad_norm": 0.0, - "learning_rate": 9.337829572692293e-06, - "loss": 0.8138, + "learning_rate": 9.361652998618124e-06, + "loss": 0.9193, "step": 18870 }, { - "epoch": 0.53549943246311, + "epoch": 0.5347558729349089, "grad_norm": 0.0, - "learning_rate": 9.336912514574712e-06, - "loss": 0.9031, + "learning_rate": 9.360737083887385e-06, + "loss": 0.9607, "step": 18871 }, { - "epoch": 0.535527809307605, + "epoch": 0.5347842103771714, "grad_norm": 0.0, - "learning_rate": 9.33599546205826e-06, - "loss": 0.951, + "learning_rate": 9.359821174541399e-06, + "loss": 0.8807, "step": 18872 }, { - "epoch": 0.5355561861520999, + "epoch": 0.5348125478194338, "grad_norm": 0.0, - "learning_rate": 9.335078415150685e-06, - "loss": 0.9407, + "learning_rate": 9.358905270587873e-06, + "loss": 0.7605, "step": 18873 }, { - "epoch": 0.5355845629965947, + "epoch": 0.5348408852616963, "grad_norm": 0.0, - "learning_rate": 9.334161373859729e-06, - "loss": 0.9152, + "learning_rate": 9.357989372034527e-06, + "loss": 0.7926, "step": 18874 }, { - "epoch": 0.5356129398410897, + "epoch": 0.5348692227039588, "grad_norm": 0.0, - "learning_rate": 9.333244338193145e-06, - "loss": 0.9382, + "learning_rate": 9.357073478889074e-06, + "loss": 0.8672, "step": 18875 }, { - "epoch": 0.5356413166855846, + "epoch": 0.5348975601462213, "grad_norm": 0.0, - "learning_rate": 9.332327308158676e-06, - "loss": 0.8939, + "learning_rate": 9.35615759115923e-06, + "loss": 0.9236, "step": 18876 }, { - "epoch": 0.5356696935300794, + "epoch": 0.5349258975884836, "grad_norm": 0.0, - "learning_rate": 9.331410283764066e-06, - "loss": 0.8524, + "learning_rate": 9.355241708852712e-06, + "loss": 0.8078, "step": 18877 }, { - "epoch": 0.5356980703745744, + "epoch": 0.5349542350307461, "grad_norm": 0.0, - "learning_rate": 9.330493265017062e-06, - "loss": 0.8151, + "learning_rate": 9.35432583197723e-06, + "loss": 0.8643, "step": 18878 }, { - "epoch": 0.5357264472190693, + "epoch": 0.5349825724730086, "grad_norm": 0.0, - "learning_rate": 9.329576251925416e-06, - "loss": 0.8642, + "learning_rate": 9.353409960540506e-06, + "loss": 0.8883, "step": 18879 }, { - "epoch": 0.5357548240635641, + "epoch": 0.535010909915271, "grad_norm": 0.0, - "learning_rate": 9.328659244496867e-06, - "loss": 0.8675, + "learning_rate": 9.352494094550243e-06, + "loss": 0.8774, "step": 18880 }, { - "epoch": 0.535783200908059, + "epoch": 0.5350392473575335, "grad_norm": 0.0, - "learning_rate": 9.327742242739164e-06, - "loss": 1.0232, + "learning_rate": 9.351578234014167e-06, + "loss": 0.8742, "step": 18881 }, { - "epoch": 0.535811577752554, + "epoch": 0.535067584799796, "grad_norm": 0.0, - "learning_rate": 9.326825246660053e-06, - "loss": 0.8118, + "learning_rate": 9.350662378939987e-06, + "loss": 1.0509, "step": 18882 }, { - "epoch": 0.5358399545970488, + "epoch": 0.5350959222420585, "grad_norm": 0.0, - "learning_rate": 9.32590825626728e-06, - "loss": 0.8843, + "learning_rate": 9.349746529335418e-06, + "loss": 0.8609, "step": 18883 }, { - "epoch": 0.5358683314415437, + "epoch": 0.5351242596843209, "grad_norm": 0.0, - "learning_rate": 9.324991271568589e-06, - "loss": 0.674, + "learning_rate": 9.348830685208175e-06, + "loss": 0.9595, "step": 18884 }, { - "epoch": 0.5358967082860386, + "epoch": 0.5351525971265834, "grad_norm": 0.0, - "learning_rate": 9.324074292571727e-06, - "loss": 0.9505, + "learning_rate": 9.347914846565979e-06, + "loss": 0.8946, "step": 18885 }, { - "epoch": 0.5359250851305335, + "epoch": 0.5351809345688459, "grad_norm": 0.0, - "learning_rate": 9.323157319284441e-06, - "loss": 0.8083, + "learning_rate": 9.346999013416532e-06, + "loss": 0.9355, "step": 18886 }, { - "epoch": 0.5359534619750284, + "epoch": 0.5352092720111082, "grad_norm": 0.0, - "learning_rate": 9.322240351714475e-06, - "loss": 0.935, + "learning_rate": 9.346083185767556e-06, + "loss": 0.9176, "step": 18887 }, { - "epoch": 0.5359818388195232, + "epoch": 0.5352376094533707, "grad_norm": 0.0, - "learning_rate": 9.321323389869575e-06, - "loss": 0.963, + "learning_rate": 9.345167363626765e-06, + "loss": 0.7569, "step": 18888 }, { - "epoch": 0.5360102156640182, + "epoch": 0.5352659468956332, "grad_norm": 0.0, - "learning_rate": 9.320406433757488e-06, - "loss": 0.8453, + "learning_rate": 9.344251547001872e-06, + "loss": 0.8103, "step": 18889 }, { - "epoch": 0.536038592508513, + "epoch": 0.5352942843378956, "grad_norm": 0.0, - "learning_rate": 9.319489483385956e-06, - "loss": 0.8744, + "learning_rate": 9.343335735900593e-06, + "loss": 0.8398, "step": 18890 }, { - "epoch": 0.5360669693530079, + "epoch": 0.5353226217801581, "grad_norm": 0.0, - "learning_rate": 9.31857253876273e-06, - "loss": 0.9159, + "learning_rate": 9.342419930330639e-06, + "loss": 0.9517, "step": 18891 }, { - "epoch": 0.5360953461975029, + "epoch": 0.5353509592224206, "grad_norm": 0.0, - "learning_rate": 9.317655599895551e-06, - "loss": 0.8266, + "learning_rate": 9.341504130299727e-06, + "loss": 0.8085, "step": 18892 }, { - "epoch": 0.5361237230419977, + "epoch": 0.5353792966646831, "grad_norm": 0.0, - "learning_rate": 9.316738666792165e-06, - "loss": 0.8795, + "learning_rate": 9.340588335815574e-06, + "loss": 1.0306, "step": 18893 }, { - "epoch": 0.5361520998864926, + "epoch": 0.5354076341069455, "grad_norm": 0.0, - "learning_rate": 9.315821739460318e-06, - "loss": 0.8294, + "learning_rate": 9.339672546885885e-06, + "loss": 0.8288, "step": 18894 }, { - "epoch": 0.5361804767309876, + "epoch": 0.535435971549208, "grad_norm": 0.0, - "learning_rate": 9.314904817907754e-06, - "loss": 0.7741, + "learning_rate": 9.338756763518381e-06, + "loss": 0.8979, "step": 18895 }, { - "epoch": 0.5362088535754824, + "epoch": 0.5354643089914705, "grad_norm": 0.0, - "learning_rate": 9.313987902142222e-06, - "loss": 0.8928, + "learning_rate": 9.337840985720774e-06, + "loss": 0.8392, "step": 18896 }, { - "epoch": 0.5362372304199773, + "epoch": 0.5354926464337328, "grad_norm": 0.0, - "learning_rate": 9.313070992171467e-06, - "loss": 0.9273, + "learning_rate": 9.336925213500777e-06, + "loss": 0.8848, "step": 18897 }, { - "epoch": 0.5362656072644721, + "epoch": 0.5355209838759953, "grad_norm": 0.0, - "learning_rate": 9.31215408800323e-06, - "loss": 0.901, + "learning_rate": 9.33600944686611e-06, + "loss": 0.7927, "step": 18898 }, { - "epoch": 0.5362939841089671, + "epoch": 0.5355493213182578, "grad_norm": 0.0, - "learning_rate": 9.311237189645259e-06, - "loss": 0.7767, + "learning_rate": 9.335093685824479e-06, + "loss": 0.6856, "step": 18899 }, { - "epoch": 0.536322360953462, + "epoch": 0.5355776587605203, "grad_norm": 0.0, - "learning_rate": 9.310320297105298e-06, - "loss": 0.8875, + "learning_rate": 9.334177930383598e-06, + "loss": 0.826, "step": 18900 }, { - "epoch": 0.5363507377979568, + "epoch": 0.5356059962027827, "grad_norm": 0.0, - "learning_rate": 9.30940341039109e-06, - "loss": 0.9694, + "learning_rate": 9.333262180551187e-06, + "loss": 0.8183, "step": 18901 }, { - "epoch": 0.5363791146424518, + "epoch": 0.5356343336450452, "grad_norm": 0.0, - "learning_rate": 9.308486529510386e-06, - "loss": 0.8824, + "learning_rate": 9.332346436334953e-06, + "loss": 0.888, "step": 18902 }, { - "epoch": 0.5364074914869467, + "epoch": 0.5356626710873077, "grad_norm": 0.0, - "learning_rate": 9.307569654470929e-06, - "loss": 0.903, + "learning_rate": 9.33143069774262e-06, + "loss": 0.9545, "step": 18903 }, { - "epoch": 0.5364358683314415, + "epoch": 0.5356910085295701, "grad_norm": 0.0, - "learning_rate": 9.306652785280456e-06, - "loss": 0.8986, + "learning_rate": 9.330514964781889e-06, + "loss": 0.9823, "step": 18904 }, { - "epoch": 0.5364642451759364, + "epoch": 0.5357193459718326, "grad_norm": 0.0, - "learning_rate": 9.305735921946724e-06, - "loss": 0.9288, + "learning_rate": 9.329599237460478e-06, + "loss": 0.8704, "step": 18905 }, { - "epoch": 0.5364926220204314, + "epoch": 0.5357476834140951, "grad_norm": 0.0, - "learning_rate": 9.30481906447747e-06, - "loss": 0.8685, + "learning_rate": 9.328683515786103e-06, + "loss": 0.8075, "step": 18906 }, { - "epoch": 0.5365209988649262, + "epoch": 0.5357760208563576, "grad_norm": 0.0, - "learning_rate": 9.30390221288044e-06, - "loss": 0.8358, + "learning_rate": 9.327767799766478e-06, + "loss": 0.9082, "step": 18907 }, { - "epoch": 0.5365493757094211, + "epoch": 0.5358043582986199, "grad_norm": 0.0, - "learning_rate": 9.30298536716338e-06, - "loss": 0.7988, + "learning_rate": 9.326852089409314e-06, + "loss": 0.9343, "step": 18908 }, { - "epoch": 0.536577752553916, + "epoch": 0.5358326957408824, "grad_norm": 0.0, - "learning_rate": 9.30206852733403e-06, - "loss": 0.8162, + "learning_rate": 9.325936384722322e-06, + "loss": 0.8754, "step": 18909 }, { - "epoch": 0.5366061293984109, + "epoch": 0.5358610331831449, "grad_norm": 0.0, - "learning_rate": 9.301151693400142e-06, - "loss": 0.9294, + "learning_rate": 9.325020685713218e-06, + "loss": 0.8655, "step": 18910 }, { - "epoch": 0.5366345062429058, + "epoch": 0.5358893706254073, "grad_norm": 0.0, - "learning_rate": 9.300234865369455e-06, - "loss": 0.9087, + "learning_rate": 9.32410499238972e-06, + "loss": 0.8411, "step": 18911 }, { - "epoch": 0.5366628830874007, + "epoch": 0.5359177080676698, "grad_norm": 0.0, - "learning_rate": 9.29931804324972e-06, - "loss": 0.8916, + "learning_rate": 9.323189304759533e-06, + "loss": 0.7901, "step": 18912 }, { - "epoch": 0.5366912599318956, + "epoch": 0.5359460455099323, "grad_norm": 0.0, - "learning_rate": 9.298401227048674e-06, - "loss": 0.8756, + "learning_rate": 9.322273622830373e-06, + "loss": 0.9223, "step": 18913 }, { - "epoch": 0.5367196367763905, + "epoch": 0.5359743829521947, "grad_norm": 0.0, - "learning_rate": 9.297484416774066e-06, - "loss": 0.84, + "learning_rate": 9.321357946609957e-06, + "loss": 0.9384, "step": 18914 }, { - "epoch": 0.5367480136208853, + "epoch": 0.5360027203944572, "grad_norm": 0.0, - "learning_rate": 9.296567612433635e-06, - "loss": 0.7975, + "learning_rate": 9.320442276105993e-06, + "loss": 0.9139, "step": 18915 }, { - "epoch": 0.5367763904653803, + "epoch": 0.5360310578367197, "grad_norm": 0.0, - "learning_rate": 9.295650814035134e-06, - "loss": 0.8746, + "learning_rate": 9.3195266113262e-06, + "loss": 0.8751, "step": 18916 }, { - "epoch": 0.5368047673098751, + "epoch": 0.5360593952789822, "grad_norm": 0.0, - "learning_rate": 9.2947340215863e-06, - "loss": 0.902, + "learning_rate": 9.318610952278282e-06, + "loss": 0.8549, "step": 18917 }, { - "epoch": 0.53683314415437, + "epoch": 0.5360877327212445, "grad_norm": 0.0, - "learning_rate": 9.29381723509488e-06, - "loss": 0.9002, + "learning_rate": 9.317695298969957e-06, + "loss": 0.8246, "step": 18918 }, { - "epoch": 0.536861520998865, + "epoch": 0.536116070163507, "grad_norm": 0.0, - "learning_rate": 9.292900454568615e-06, - "loss": 0.8082, + "learning_rate": 9.316779651408941e-06, + "loss": 0.8351, "step": 18919 }, { - "epoch": 0.5368898978433598, + "epoch": 0.5361444076057695, "grad_norm": 0.0, - "learning_rate": 9.291983680015255e-06, - "loss": 0.8893, + "learning_rate": 9.315864009602941e-06, + "loss": 0.9448, "step": 18920 }, { - "epoch": 0.5369182746878547, + "epoch": 0.5361727450480319, "grad_norm": 0.0, - "learning_rate": 9.291066911442537e-06, - "loss": 0.8304, + "learning_rate": 9.314948373559672e-06, + "loss": 0.9927, "step": 18921 }, { - "epoch": 0.5369466515323496, + "epoch": 0.5362010824902944, "grad_norm": 0.0, - "learning_rate": 9.290150148858212e-06, - "loss": 0.7903, + "learning_rate": 9.314032743286853e-06, + "loss": 0.9252, "step": 18922 }, { - "epoch": 0.5369750283768445, + "epoch": 0.5362294199325569, "grad_norm": 0.0, - "learning_rate": 9.28923339227002e-06, - "loss": 0.9127, + "learning_rate": 9.313117118792185e-06, + "loss": 0.9878, "step": 18923 }, { - "epoch": 0.5370034052213394, + "epoch": 0.5362577573748194, "grad_norm": 0.0, - "learning_rate": 9.288316641685702e-06, - "loss": 0.7555, + "learning_rate": 9.312201500083392e-06, + "loss": 0.9608, "step": 18924 }, { - "epoch": 0.5370317820658342, + "epoch": 0.5362860948170818, "grad_norm": 0.0, - "learning_rate": 9.287399897113007e-06, - "loss": 0.8984, + "learning_rate": 9.311285887168176e-06, + "loss": 0.9279, "step": 18925 }, { - "epoch": 0.5370601589103292, + "epoch": 0.5363144322593443, "grad_norm": 0.0, - "learning_rate": 9.286483158559679e-06, - "loss": 0.8486, + "learning_rate": 9.310370280054255e-06, + "loss": 0.9929, "step": 18926 }, { - "epoch": 0.5370885357548241, + "epoch": 0.5363427697016068, "grad_norm": 0.0, - "learning_rate": 9.285566426033459e-06, - "loss": 0.8486, + "learning_rate": 9.309454678749343e-06, + "loss": 0.8512, "step": 18927 }, { - "epoch": 0.5371169125993189, + "epoch": 0.5363711071438692, "grad_norm": 0.0, - "learning_rate": 9.284649699542092e-06, - "loss": 0.8652, + "learning_rate": 9.30853908326115e-06, + "loss": 0.9464, "step": 18928 }, { - "epoch": 0.5371452894438139, + "epoch": 0.5363994445861316, "grad_norm": 0.0, - "learning_rate": 9.283732979093322e-06, - "loss": 0.8064, + "learning_rate": 9.307623493597388e-06, + "loss": 0.9221, "step": 18929 }, { - "epoch": 0.5371736662883088, + "epoch": 0.5364277820283941, "grad_norm": 0.0, - "learning_rate": 9.28281626469489e-06, - "loss": 0.8492, + "learning_rate": 9.306707909765775e-06, + "loss": 0.786, "step": 18930 }, { - "epoch": 0.5372020431328036, + "epoch": 0.5364561194706566, "grad_norm": 0.0, - "learning_rate": 9.28189955635454e-06, - "loss": 0.8028, + "learning_rate": 9.305792331774015e-06, + "loss": 0.8129, "step": 18931 }, { - "epoch": 0.5372304199772985, + "epoch": 0.536484456912919, "grad_norm": 0.0, - "learning_rate": 9.280982854080021e-06, - "loss": 0.9093, + "learning_rate": 9.304876759629824e-06, + "loss": 0.7249, "step": 18932 }, { - "epoch": 0.5372587968217934, + "epoch": 0.5365127943551815, "grad_norm": 0.0, - "learning_rate": 9.280066157879069e-06, - "loss": 0.8267, + "learning_rate": 9.303961193340914e-06, + "loss": 0.8327, "step": 18933 }, { - "epoch": 0.5372871736662883, + "epoch": 0.536541131797444, "grad_norm": 0.0, - "learning_rate": 9.27914946775943e-06, - "loss": 0.9755, + "learning_rate": 9.303045632914997e-06, + "loss": 0.7038, "step": 18934 }, { - "epoch": 0.5373155505107832, + "epoch": 0.5365694692397064, "grad_norm": 0.0, - "learning_rate": 9.27823278372885e-06, - "loss": 0.8502, + "learning_rate": 9.30213007835979e-06, + "loss": 0.8872, "step": 18935 }, { - "epoch": 0.5373439273552781, + "epoch": 0.5365978066819689, "grad_norm": 0.0, - "learning_rate": 9.277316105795071e-06, - "loss": 0.954, + "learning_rate": 9.301214529682995e-06, + "loss": 0.832, "step": 18936 }, { - "epoch": 0.537372304199773, + "epoch": 0.5366261441242314, "grad_norm": 0.0, - "learning_rate": 9.276399433965835e-06, - "loss": 0.9427, + "learning_rate": 9.300298986892335e-06, + "loss": 0.7767, "step": 18937 }, { - "epoch": 0.5374006810442679, + "epoch": 0.5366544815664938, "grad_norm": 0.0, - "learning_rate": 9.275482768248882e-06, - "loss": 0.8099, + "learning_rate": 9.299383449995511e-06, + "loss": 0.8048, "step": 18938 }, { - "epoch": 0.5374290578887627, + "epoch": 0.5366828190087563, "grad_norm": 0.0, - "learning_rate": 9.274566108651962e-06, - "loss": 0.9059, + "learning_rate": 9.29846791900024e-06, + "loss": 0.829, "step": 18939 }, { - "epoch": 0.5374574347332577, + "epoch": 0.5367111564510187, "grad_norm": 0.0, - "learning_rate": 9.27364945518281e-06, - "loss": 0.9047, + "learning_rate": 9.297552393914238e-06, + "loss": 0.9865, "step": 18940 }, { - "epoch": 0.5374858115777525, + "epoch": 0.5367394938932812, "grad_norm": 0.0, - "learning_rate": 9.272732807849177e-06, - "loss": 0.8946, + "learning_rate": 9.296636874745211e-06, + "loss": 0.8326, "step": 18941 }, { - "epoch": 0.5375141884222474, + "epoch": 0.5367678313355436, "grad_norm": 0.0, - "learning_rate": 9.271816166658801e-06, - "loss": 0.9216, + "learning_rate": 9.29572136150087e-06, + "loss": 0.8929, "step": 18942 }, { - "epoch": 0.5375425652667424, + "epoch": 0.5367961687778061, "grad_norm": 0.0, - "learning_rate": 9.27089953161943e-06, - "loss": 0.9731, + "learning_rate": 9.294805854188937e-06, + "loss": 0.874, "step": 18943 }, { - "epoch": 0.5375709421112372, + "epoch": 0.5368245062200686, "grad_norm": 0.0, - "learning_rate": 9.269982902738803e-06, - "loss": 0.8383, + "learning_rate": 9.29389035281711e-06, + "loss": 1.0827, "step": 18944 }, { - "epoch": 0.5375993189557321, + "epoch": 0.536852843662331, "grad_norm": 0.0, - "learning_rate": 9.26906628002466e-06, - "loss": 0.8072, + "learning_rate": 9.292974857393112e-06, + "loss": 0.8052, "step": 18945 }, { - "epoch": 0.5376276958002271, + "epoch": 0.5368811811045935, "grad_norm": 0.0, - "learning_rate": 9.268149663484749e-06, - "loss": 0.8477, + "learning_rate": 9.292059367924644e-06, + "loss": 0.9217, "step": 18946 }, { - "epoch": 0.5376560726447219, + "epoch": 0.536909518546856, "grad_norm": 0.0, - "learning_rate": 9.267233053126811e-06, - "loss": 0.8019, + "learning_rate": 9.291143884419424e-06, + "loss": 0.9712, "step": 18947 }, { - "epoch": 0.5376844494892168, + "epoch": 0.5369378559891185, "grad_norm": 0.0, - "learning_rate": 9.266316448958587e-06, - "loss": 0.8885, + "learning_rate": 9.290228406885166e-06, + "loss": 0.8729, "step": 18948 }, { - "epoch": 0.5377128263337116, + "epoch": 0.5369661934313809, "grad_norm": 0.0, - "learning_rate": 9.265399850987824e-06, - "loss": 0.9408, + "learning_rate": 9.289312935329573e-06, + "loss": 0.7329, "step": 18949 }, { - "epoch": 0.5377412031782066, + "epoch": 0.5369945308736433, "grad_norm": 0.0, - "learning_rate": 9.26448325922226e-06, - "loss": 0.9113, + "learning_rate": 9.288397469760362e-06, + "loss": 0.857, "step": 18950 }, { - "epoch": 0.5377695800227015, + "epoch": 0.5370228683159058, "grad_norm": 0.0, - "learning_rate": 9.263566673669637e-06, - "loss": 0.927, + "learning_rate": 9.287482010185247e-06, + "loss": 0.9359, "step": 18951 }, { - "epoch": 0.5377979568671963, + "epoch": 0.5370512057581682, "grad_norm": 0.0, - "learning_rate": 9.262650094337702e-06, - "loss": 0.8702, + "learning_rate": 9.286566556611932e-06, + "loss": 0.8574, "step": 18952 }, { - "epoch": 0.5378263337116913, + "epoch": 0.5370795432004307, "grad_norm": 0.0, - "learning_rate": 9.261733521234194e-06, - "loss": 0.9349, + "learning_rate": 9.285651109048134e-06, + "loss": 0.8648, "step": 18953 }, { - "epoch": 0.5378547105561862, + "epoch": 0.5371078806426932, "grad_norm": 0.0, - "learning_rate": 9.260816954366853e-06, - "loss": 1.0676, + "learning_rate": 9.284735667501558e-06, + "loss": 0.8226, "step": 18954 }, { - "epoch": 0.537883087400681, + "epoch": 0.5371362180849556, "grad_norm": 0.0, - "learning_rate": 9.259900393743427e-06, - "loss": 0.7872, + "learning_rate": 9.28382023197992e-06, + "loss": 0.8129, "step": 18955 }, { - "epoch": 0.5379114642451759, + "epoch": 0.5371645555272181, "grad_norm": 0.0, - "learning_rate": 9.258983839371655e-06, - "loss": 0.943, + "learning_rate": 9.282904802490937e-06, + "loss": 0.939, "step": 18956 }, { - "epoch": 0.5379398410896709, + "epoch": 0.5371928929694806, "grad_norm": 0.0, - "learning_rate": 9.25806729125928e-06, - "loss": 0.8956, + "learning_rate": 9.281989379042306e-06, + "loss": 0.868, "step": 18957 }, { - "epoch": 0.5379682179341657, + "epoch": 0.5372212304117431, "grad_norm": 0.0, - "learning_rate": 9.257150749414045e-06, - "loss": 0.9065, + "learning_rate": 9.281073961641746e-06, + "loss": 0.8597, "step": 18958 }, { - "epoch": 0.5379965947786606, + "epoch": 0.5372495678540055, "grad_norm": 0.0, - "learning_rate": 9.25623421384369e-06, - "loss": 0.8174, + "learning_rate": 9.28015855029697e-06, + "loss": 0.8313, "step": 18959 }, { - "epoch": 0.5380249716231555, + "epoch": 0.537277905296268, "grad_norm": 0.0, - "learning_rate": 9.25531768455596e-06, - "loss": 0.896, + "learning_rate": 9.279243145015681e-06, + "loss": 0.8235, "step": 18960 }, { - "epoch": 0.5380533484676504, + "epoch": 0.5373062427385304, "grad_norm": 0.0, - "learning_rate": 9.254401161558594e-06, - "loss": 0.7695, + "learning_rate": 9.278327745805603e-06, + "loss": 0.8104, "step": 18961 }, { - "epoch": 0.5380817253121453, + "epoch": 0.5373345801807928, "grad_norm": 0.0, - "learning_rate": 9.253484644859334e-06, - "loss": 0.865, + "learning_rate": 9.277412352674429e-06, + "loss": 0.8651, "step": 18962 }, { - "epoch": 0.5381101021566402, + "epoch": 0.5373629176230553, "grad_norm": 0.0, - "learning_rate": 9.252568134465924e-06, - "loss": 0.8578, + "learning_rate": 9.276496965629883e-06, + "loss": 0.8617, "step": 18963 }, { - "epoch": 0.5381384790011351, + "epoch": 0.5373912550653178, "grad_norm": 0.0, - "learning_rate": 9.251651630386103e-06, - "loss": 0.9774, + "learning_rate": 9.275581584679675e-06, + "loss": 0.9639, "step": 18964 }, { - "epoch": 0.53816685584563, + "epoch": 0.5374195925075803, "grad_norm": 0.0, - "learning_rate": 9.250735132627614e-06, - "loss": 0.8918, + "learning_rate": 9.274666209831508e-06, + "loss": 0.9209, "step": 18965 }, { - "epoch": 0.5381952326901248, + "epoch": 0.5374479299498427, "grad_norm": 0.0, - "learning_rate": 9.249818641198202e-06, - "loss": 0.9575, + "learning_rate": 9.273750841093099e-06, + "loss": 0.9622, "step": 18966 }, { - "epoch": 0.5382236095346198, + "epoch": 0.5374762673921052, "grad_norm": 0.0, - "learning_rate": 9.248902156105605e-06, - "loss": 0.9254, + "learning_rate": 9.272835478472154e-06, + "loss": 0.9135, "step": 18967 }, { - "epoch": 0.5382519863791146, + "epoch": 0.5375046048343677, "grad_norm": 0.0, - "learning_rate": 9.247985677357563e-06, - "loss": 0.8549, + "learning_rate": 9.271920121976385e-06, + "loss": 0.9636, "step": 18968 }, { - "epoch": 0.5382803632236095, + "epoch": 0.5375329422766301, "grad_norm": 0.0, - "learning_rate": 9.24706920496182e-06, - "loss": 0.7987, + "learning_rate": 9.271004771613509e-06, + "loss": 0.9062, "step": 18969 }, { - "epoch": 0.5383087400681045, + "epoch": 0.5375612797188926, "grad_norm": 0.0, - "learning_rate": 9.246152738926115e-06, - "loss": 0.9274, + "learning_rate": 9.270089427391225e-06, + "loss": 0.8646, "step": 18970 }, { - "epoch": 0.5383371169125993, + "epoch": 0.537589617161155, "grad_norm": 0.0, - "learning_rate": 9.245236279258197e-06, - "loss": 0.9366, + "learning_rate": 9.26917408931725e-06, + "loss": 0.9151, "step": 18971 }, { - "epoch": 0.5383654937570942, + "epoch": 0.5376179546034175, "grad_norm": 0.0, - "learning_rate": 9.244319825965797e-06, - "loss": 0.8458, + "learning_rate": 9.268258757399295e-06, + "loss": 0.9042, "step": 18972 }, { - "epoch": 0.538393870601589, + "epoch": 0.5376462920456799, "grad_norm": 0.0, - "learning_rate": 9.243403379056666e-06, - "loss": 0.9368, + "learning_rate": 9.267343431645065e-06, + "loss": 0.8196, "step": 18973 }, { - "epoch": 0.538422247446084, + "epoch": 0.5376746294879424, "grad_norm": 0.0, - "learning_rate": 9.24248693853854e-06, - "loss": 0.8773, + "learning_rate": 9.266428112062277e-06, + "loss": 0.9769, "step": 18974 }, { - "epoch": 0.5384506242905789, + "epoch": 0.5377029669302049, "grad_norm": 0.0, - "learning_rate": 9.241570504419158e-06, - "loss": 0.8805, + "learning_rate": 9.265512798658636e-06, + "loss": 0.7658, "step": 18975 }, { - "epoch": 0.5384790011350737, + "epoch": 0.5377313043724673, "grad_norm": 0.0, - "learning_rate": 9.240654076706267e-06, - "loss": 0.7917, + "learning_rate": 9.264597491441851e-06, + "loss": 0.8623, "step": 18976 }, { - "epoch": 0.5385073779795687, + "epoch": 0.5377596418147298, "grad_norm": 0.0, - "learning_rate": 9.239737655407605e-06, - "loss": 0.8056, + "learning_rate": 9.263682190419639e-06, + "loss": 0.8351, "step": 18977 }, { - "epoch": 0.5385357548240636, + "epoch": 0.5377879792569923, "grad_norm": 0.0, - "learning_rate": 9.238821240530914e-06, - "loss": 0.8764, + "learning_rate": 9.262766895599701e-06, + "loss": 0.9636, "step": 18978 }, { - "epoch": 0.5385641316685584, + "epoch": 0.5378163166992547, "grad_norm": 0.0, - "learning_rate": 9.23790483208393e-06, - "loss": 0.865, + "learning_rate": 9.261851606989754e-06, + "loss": 0.9698, "step": 18979 }, { - "epoch": 0.5385925085130534, + "epoch": 0.5378446541415172, "grad_norm": 0.0, - "learning_rate": 9.236988430074401e-06, - "loss": 0.8236, + "learning_rate": 9.260936324597504e-06, + "loss": 0.9361, "step": 18980 }, { - "epoch": 0.5386208853575483, + "epoch": 0.5378729915837797, "grad_norm": 0.0, - "learning_rate": 9.236072034510067e-06, - "loss": 1.0663, + "learning_rate": 9.26002104843066e-06, + "loss": 0.8792, "step": 18981 }, { - "epoch": 0.5386492622020431, + "epoch": 0.5379013290260422, "grad_norm": 0.0, - "learning_rate": 9.235155645398663e-06, - "loss": 0.8608, + "learning_rate": 9.259105778496938e-06, + "loss": 0.8918, "step": 18982 }, { - "epoch": 0.538677639046538, + "epoch": 0.5379296664683045, "grad_norm": 0.0, - "learning_rate": 9.234239262747935e-06, - "loss": 0.8858, + "learning_rate": 9.25819051480404e-06, + "loss": 0.9802, "step": 18983 }, { - "epoch": 0.538706015891033, + "epoch": 0.537958003910567, "grad_norm": 0.0, - "learning_rate": 9.233322886565624e-06, - "loss": 0.8559, + "learning_rate": 9.257275257359679e-06, + "loss": 0.9602, "step": 18984 }, { - "epoch": 0.5387343927355278, + "epoch": 0.5379863413528295, "grad_norm": 0.0, - "learning_rate": 9.232406516859465e-06, - "loss": 0.7684, + "learning_rate": 9.256360006171564e-06, + "loss": 0.993, "step": 18985 }, { - "epoch": 0.5387627695800227, + "epoch": 0.5380146787950919, "grad_norm": 0.0, - "learning_rate": 9.231490153637203e-06, - "loss": 0.9146, + "learning_rate": 9.255444761247403e-06, + "loss": 0.903, "step": 18986 }, { - "epoch": 0.5387911464245176, + "epoch": 0.5380430162373544, "grad_norm": 0.0, - "learning_rate": 9.23057379690658e-06, - "loss": 0.8306, + "learning_rate": 9.254529522594909e-06, + "loss": 0.9982, "step": 18987 }, { - "epoch": 0.5388195232690125, + "epoch": 0.5380713536796169, "grad_norm": 0.0, - "learning_rate": 9.229657446675337e-06, - "loss": 0.8591, + "learning_rate": 9.253614290221794e-06, + "loss": 0.8685, "step": 18988 }, { - "epoch": 0.5388479001135074, + "epoch": 0.5380996911218794, "grad_norm": 0.0, - "learning_rate": 9.22874110295121e-06, - "loss": 0.8907, + "learning_rate": 9.252699064135759e-06, + "loss": 0.9049, "step": 18989 }, { - "epoch": 0.5388762769580022, + "epoch": 0.5381280285641418, "grad_norm": 0.0, - "learning_rate": 9.227824765741942e-06, - "loss": 0.8715, + "learning_rate": 9.251783844344519e-06, + "loss": 0.9201, "step": 18990 }, { - "epoch": 0.5389046538024972, + "epoch": 0.5381563660064043, "grad_norm": 0.0, - "learning_rate": 9.226908435055273e-06, - "loss": 0.8062, + "learning_rate": 9.250868630855779e-06, + "loss": 0.864, "step": 18991 }, { - "epoch": 0.538933030646992, + "epoch": 0.5381847034486668, "grad_norm": 0.0, - "learning_rate": 9.225992110898941e-06, - "loss": 0.8982, + "learning_rate": 9.249953423677252e-06, + "loss": 0.9604, "step": 18992 }, { - "epoch": 0.5389614074914869, + "epoch": 0.5382130408909291, "grad_norm": 0.0, - "learning_rate": 9.225075793280693e-06, - "loss": 0.8817, + "learning_rate": 9.249038222816645e-06, + "loss": 0.8834, "step": 18993 }, { - "epoch": 0.5389897843359819, + "epoch": 0.5382413783331916, "grad_norm": 0.0, - "learning_rate": 9.22415948220826e-06, - "loss": 0.8459, + "learning_rate": 9.248123028281668e-06, + "loss": 0.8906, "step": 18994 }, { - "epoch": 0.5390181611804767, + "epoch": 0.5382697157754541, "grad_norm": 0.0, - "learning_rate": 9.223243177689389e-06, - "loss": 0.849, + "learning_rate": 9.247207840080034e-06, + "loss": 0.7995, "step": 18995 }, { - "epoch": 0.5390465380249716, + "epoch": 0.5382980532177166, "grad_norm": 0.0, - "learning_rate": 9.222326879731818e-06, - "loss": 0.8884, + "learning_rate": 9.246292658219442e-06, + "loss": 0.9371, "step": 18996 }, { - "epoch": 0.5390749148694666, + "epoch": 0.538326390659979, "grad_norm": 0.0, - "learning_rate": 9.221410588343288e-06, - "loss": 0.8753, + "learning_rate": 9.245377482707609e-06, + "loss": 0.9091, "step": 18997 }, { - "epoch": 0.5391032917139614, + "epoch": 0.5383547281022415, "grad_norm": 0.0, - "learning_rate": 9.220494303531534e-06, - "loss": 0.8848, + "learning_rate": 9.244462313552241e-06, + "loss": 0.9282, "step": 18998 }, { - "epoch": 0.5391316685584563, + "epoch": 0.538383065544504, "grad_norm": 0.0, - "learning_rate": 9.219578025304303e-06, - "loss": 0.8575, + "learning_rate": 9.243547150761047e-06, + "loss": 0.9237, "step": 18999 }, { - "epoch": 0.5391600454029511, + "epoch": 0.5384114029867664, "grad_norm": 0.0, - "learning_rate": 9.21866175366933e-06, - "loss": 0.8784, + "learning_rate": 9.242631994341738e-06, + "loss": 0.8516, "step": 19000 }, { - "epoch": 0.5391884222474461, + "epoch": 0.5384397404290289, "grad_norm": 0.0, - "learning_rate": 9.217745488634354e-06, - "loss": 0.8799, + "learning_rate": 9.241716844302024e-06, + "loss": 0.8842, "step": 19001 }, { - "epoch": 0.539216799091941, + "epoch": 0.5384680778712914, "grad_norm": 0.0, - "learning_rate": 9.21682923020712e-06, - "loss": 0.9063, + "learning_rate": 9.240801700649605e-06, + "loss": 0.916, "step": 19002 }, { - "epoch": 0.5392451759364358, + "epoch": 0.5384964153135537, "grad_norm": 0.0, - "learning_rate": 9.215912978395364e-06, - "loss": 0.909, + "learning_rate": 9.2398865633922e-06, + "loss": 0.8512, "step": 19003 }, { - "epoch": 0.5392735527809308, + "epoch": 0.5385247527558162, "grad_norm": 0.0, - "learning_rate": 9.214996733206827e-06, - "loss": 0.9062, + "learning_rate": 9.23897143253751e-06, + "loss": 0.8365, "step": 19004 }, { - "epoch": 0.5393019296254257, + "epoch": 0.5385530901980787, "grad_norm": 0.0, - "learning_rate": 9.21408049464925e-06, - "loss": 0.8359, + "learning_rate": 9.238056308093244e-06, + "loss": 0.824, "step": 19005 }, { - "epoch": 0.5393303064699205, + "epoch": 0.5385814276403412, "grad_norm": 0.0, - "learning_rate": 9.213164262730365e-06, - "loss": 0.9296, + "learning_rate": 9.237141190067116e-06, + "loss": 1.0206, "step": 19006 }, { - "epoch": 0.5393586833144154, + "epoch": 0.5386097650826036, "grad_norm": 0.0, - "learning_rate": 9.21224803745792e-06, - "loss": 0.9093, + "learning_rate": 9.23622607846683e-06, + "loss": 0.8007, "step": 19007 }, { - "epoch": 0.5393870601589104, + "epoch": 0.5386381025248661, "grad_norm": 0.0, - "learning_rate": 9.211331818839653e-06, - "loss": 0.8884, + "learning_rate": 9.235310973300093e-06, + "loss": 0.7979, "step": 19008 }, { - "epoch": 0.5394154370034052, + "epoch": 0.5386664399671286, "grad_norm": 0.0, - "learning_rate": 9.210415606883298e-06, - "loss": 0.8065, + "learning_rate": 9.23439587457462e-06, + "loss": 0.9063, "step": 19009 }, { - "epoch": 0.5394438138479001, + "epoch": 0.538694777409391, "grad_norm": 0.0, - "learning_rate": 9.2094994015966e-06, - "loss": 0.9412, + "learning_rate": 9.233480782298111e-06, + "loss": 0.9087, "step": 19010 }, { - "epoch": 0.539472190692395, + "epoch": 0.5387231148516535, "grad_norm": 0.0, - "learning_rate": 9.208583202987297e-06, - "loss": 0.8578, + "learning_rate": 9.232565696478282e-06, + "loss": 1.0254, "step": 19011 }, { - "epoch": 0.5395005675368899, + "epoch": 0.538751452293916, "grad_norm": 0.0, - "learning_rate": 9.207667011063124e-06, - "loss": 0.8313, + "learning_rate": 9.231650617122833e-06, + "loss": 1.0083, "step": 19012 }, { - "epoch": 0.5395289443813848, + "epoch": 0.5387797897361785, "grad_norm": 0.0, - "learning_rate": 9.206750825831826e-06, - "loss": 0.8835, + "learning_rate": 9.230735544239477e-06, + "loss": 0.7862, "step": 19013 }, { - "epoch": 0.5395573212258796, + "epoch": 0.5388081271784408, "grad_norm": 0.0, - "learning_rate": 9.205834647301139e-06, - "loss": 0.8222, + "learning_rate": 9.229820477835926e-06, + "loss": 0.8765, "step": 19014 }, { - "epoch": 0.5395856980703746, + "epoch": 0.5388364646207033, "grad_norm": 0.0, - "learning_rate": 9.2049184754788e-06, - "loss": 0.855, + "learning_rate": 9.228905417919879e-06, + "loss": 0.8517, "step": 19015 }, { - "epoch": 0.5396140749148695, + "epoch": 0.5388648020629658, "grad_norm": 0.0, - "learning_rate": 9.204002310372551e-06, - "loss": 0.8469, + "learning_rate": 9.227990364499046e-06, + "loss": 0.811, "step": 19016 }, { - "epoch": 0.5396424517593643, + "epoch": 0.5388931395052282, "grad_norm": 0.0, - "learning_rate": 9.203086151990132e-06, - "loss": 0.8203, + "learning_rate": 9.227075317581141e-06, + "loss": 0.9535, "step": 19017 }, { - "epoch": 0.5396708286038593, + "epoch": 0.5389214769474907, "grad_norm": 0.0, - "learning_rate": 9.20217000033928e-06, - "loss": 0.8763, + "learning_rate": 9.226160277173867e-06, + "loss": 0.9043, "step": 19018 }, { - "epoch": 0.5396992054483541, + "epoch": 0.5389498143897532, "grad_norm": 0.0, - "learning_rate": 9.201253855427734e-06, - "loss": 0.9607, + "learning_rate": 9.225245243284931e-06, + "loss": 0.9072, "step": 19019 }, { - "epoch": 0.539727582292849, + "epoch": 0.5389781518320157, "grad_norm": 0.0, - "learning_rate": 9.200337717263231e-06, - "loss": 0.8797, + "learning_rate": 9.224330215922042e-06, + "loss": 0.8941, "step": 19020 }, { - "epoch": 0.539755959137344, + "epoch": 0.5390064892742781, "grad_norm": 0.0, - "learning_rate": 9.199421585853514e-06, - "loss": 0.9303, + "learning_rate": 9.223415195092906e-06, + "loss": 0.8128, "step": 19021 }, { - "epoch": 0.5397843359818388, + "epoch": 0.5390348267165406, "grad_norm": 0.0, - "learning_rate": 9.198505461206318e-06, - "loss": 0.9078, + "learning_rate": 9.222500180805237e-06, + "loss": 0.9013, "step": 19022 }, { - "epoch": 0.5398127128263337, + "epoch": 0.5390631641588031, "grad_norm": 0.0, - "learning_rate": 9.19758934332938e-06, - "loss": 0.8566, + "learning_rate": 9.221585173066735e-06, + "loss": 0.8691, "step": 19023 }, { - "epoch": 0.5398410896708286, + "epoch": 0.5390915016010654, "grad_norm": 0.0, - "learning_rate": 9.196673232230444e-06, - "loss": 0.8727, + "learning_rate": 9.22067017188511e-06, + "loss": 1.0891, "step": 19024 }, { - "epoch": 0.5398694665153235, + "epoch": 0.5391198390433279, "grad_norm": 0.0, - "learning_rate": 9.195757127917245e-06, - "loss": 1.0159, + "learning_rate": 9.219755177268068e-06, + "loss": 0.8344, "step": 19025 }, { - "epoch": 0.5398978433598184, + "epoch": 0.5391481764855904, "grad_norm": 0.0, - "learning_rate": 9.194841030397518e-06, - "loss": 0.8075, + "learning_rate": 9.218840189223318e-06, + "loss": 0.9383, "step": 19026 }, { - "epoch": 0.5399262202043132, + "epoch": 0.5391765139278528, "grad_norm": 0.0, - "learning_rate": 9.19392493967901e-06, - "loss": 0.8144, + "learning_rate": 9.217925207758571e-06, + "loss": 0.95, "step": 19027 }, { - "epoch": 0.5399545970488082, + "epoch": 0.5392048513701153, "grad_norm": 0.0, - "learning_rate": 9.193008855769452e-06, - "loss": 0.7903, + "learning_rate": 9.217010232881527e-06, + "loss": 0.9455, "step": 19028 }, { - "epoch": 0.5399829738933031, + "epoch": 0.5392331888123778, "grad_norm": 0.0, - "learning_rate": 9.192092778676583e-06, - "loss": 0.9615, + "learning_rate": 9.216095264599895e-06, + "loss": 0.9061, "step": 19029 }, { - "epoch": 0.5400113507377979, + "epoch": 0.5392615262546403, "grad_norm": 0.0, - "learning_rate": 9.191176708408145e-06, - "loss": 0.8595, + "learning_rate": 9.215180302921387e-06, + "loss": 0.8618, "step": 19030 }, { - "epoch": 0.5400397275822928, + "epoch": 0.5392898636969027, "grad_norm": 0.0, - "learning_rate": 9.190260644971874e-06, - "loss": 0.869, + "learning_rate": 9.214265347853706e-06, + "loss": 0.7029, "step": 19031 }, { - "epoch": 0.5400681044267878, + "epoch": 0.5393182011391652, "grad_norm": 0.0, - "learning_rate": 9.189344588375503e-06, - "loss": 0.9708, + "learning_rate": 9.213350399404563e-06, + "loss": 0.9572, "step": 19032 }, { - "epoch": 0.5400964812712826, + "epoch": 0.5393465385814277, "grad_norm": 0.0, - "learning_rate": 9.188428538626777e-06, - "loss": 0.8371, + "learning_rate": 9.212435457581656e-06, + "loss": 0.8169, "step": 19033 }, { - "epoch": 0.5401248581157775, + "epoch": 0.53937487602369, "grad_norm": 0.0, - "learning_rate": 9.187512495733433e-06, - "loss": 0.8701, + "learning_rate": 9.2115205223927e-06, + "loss": 0.8231, "step": 19034 }, { - "epoch": 0.5401532349602725, + "epoch": 0.5394032134659525, "grad_norm": 0.0, - "learning_rate": 9.186596459703209e-06, - "loss": 0.9135, + "learning_rate": 9.210605593845402e-06, + "loss": 0.8549, "step": 19035 }, { - "epoch": 0.5401816118047673, + "epoch": 0.539431550908215, "grad_norm": 0.0, - "learning_rate": 9.185680430543838e-06, - "loss": 0.8733, + "learning_rate": 9.209690671947463e-06, + "loss": 0.827, "step": 19036 }, { - "epoch": 0.5402099886492622, + "epoch": 0.5394598883504775, "grad_norm": 0.0, - "learning_rate": 9.184764408263062e-06, - "loss": 0.899, + "learning_rate": 9.208775756706591e-06, + "loss": 0.8807, "step": 19037 }, { - "epoch": 0.5402383654937571, + "epoch": 0.5394882257927399, "grad_norm": 0.0, - "learning_rate": 9.183848392868619e-06, - "loss": 0.9335, + "learning_rate": 9.207860848130498e-06, + "loss": 0.9135, "step": 19038 }, { - "epoch": 0.540266742338252, + "epoch": 0.5395165632350024, "grad_norm": 0.0, - "learning_rate": 9.182932384368245e-06, - "loss": 0.8724, + "learning_rate": 9.206945946226885e-06, + "loss": 0.8884, "step": 19039 }, { - "epoch": 0.5402951191827469, + "epoch": 0.5395449006772649, "grad_norm": 0.0, - "learning_rate": 9.182016382769678e-06, - "loss": 0.9158, + "learning_rate": 9.206031051003465e-06, + "loss": 0.8585, "step": 19040 }, { - "epoch": 0.5403234960272417, + "epoch": 0.5395732381195273, "grad_norm": 0.0, - "learning_rate": 9.181100388080655e-06, - "loss": 0.9107, + "learning_rate": 9.205116162467938e-06, + "loss": 0.8905, "step": 19041 }, { - "epoch": 0.5403518728717367, + "epoch": 0.5396015755617898, "grad_norm": 0.0, - "learning_rate": 9.180184400308913e-06, - "loss": 0.8482, + "learning_rate": 9.204201280628011e-06, + "loss": 0.9799, "step": 19042 }, { - "epoch": 0.5403802497162316, + "epoch": 0.5396299130040523, "grad_norm": 0.0, - "learning_rate": 9.17926841946219e-06, - "loss": 0.8887, + "learning_rate": 9.203286405491393e-06, + "loss": 0.8916, "step": 19043 }, { - "epoch": 0.5404086265607264, + "epoch": 0.5396582504463148, "grad_norm": 0.0, - "learning_rate": 9.178352445548226e-06, - "loss": 0.9437, + "learning_rate": 9.202371537065788e-06, + "loss": 0.9645, "step": 19044 }, { - "epoch": 0.5404370034052214, + "epoch": 0.5396865878885772, "grad_norm": 0.0, - "learning_rate": 9.177436478574755e-06, - "loss": 0.7646, + "learning_rate": 9.201456675358904e-06, + "loss": 0.6554, "step": 19045 }, { - "epoch": 0.5404653802497162, + "epoch": 0.5397149253308396, "grad_norm": 0.0, - "learning_rate": 9.176520518549514e-06, - "loss": 0.7206, + "learning_rate": 9.200541820378452e-06, + "loss": 0.9208, "step": 19046 }, { - "epoch": 0.5404937570942111, + "epoch": 0.5397432627731021, "grad_norm": 0.0, - "learning_rate": 9.175604565480237e-06, - "loss": 0.7976, + "learning_rate": 9.199626972132128e-06, + "loss": 0.8903, "step": 19047 }, { - "epoch": 0.540522133938706, + "epoch": 0.5397716002153645, "grad_norm": 0.0, - "learning_rate": 9.174688619374671e-06, - "loss": 0.8637, + "learning_rate": 9.19871213062765e-06, + "loss": 0.8723, "step": 19048 }, { - "epoch": 0.5405505107832009, + "epoch": 0.539799937657627, "grad_norm": 0.0, - "learning_rate": 9.173772680240547e-06, - "loss": 0.8939, + "learning_rate": 9.197797295872709e-06, + "loss": 0.7637, "step": 19049 }, { - "epoch": 0.5405788876276958, + "epoch": 0.5398282750998895, "grad_norm": 0.0, - "learning_rate": 9.1728567480856e-06, - "loss": 0.7755, + "learning_rate": 9.19688246787502e-06, + "loss": 0.968, "step": 19050 }, { - "epoch": 0.5406072644721907, + "epoch": 0.5398566125421519, "grad_norm": 0.0, - "learning_rate": 9.17194082291757e-06, - "loss": 0.7976, + "learning_rate": 9.195967646642294e-06, + "loss": 0.8031, "step": 19051 }, { - "epoch": 0.5406356413166856, + "epoch": 0.5398849499844144, "grad_norm": 0.0, - "learning_rate": 9.171024904744195e-06, - "loss": 0.8833, + "learning_rate": 9.195052832182225e-06, + "loss": 0.9057, "step": 19052 }, { - "epoch": 0.5406640181611805, + "epoch": 0.5399132874266769, "grad_norm": 0.0, - "learning_rate": 9.170108993573207e-06, - "loss": 0.855, + "learning_rate": 9.19413802450253e-06, + "loss": 0.7683, "step": 19053 }, { - "epoch": 0.5406923950056753, + "epoch": 0.5399416248689394, "grad_norm": 0.0, - "learning_rate": 9.16919308941235e-06, - "loss": 0.8321, + "learning_rate": 9.193223223610906e-06, + "loss": 0.9136, "step": 19054 }, { - "epoch": 0.5407207718501703, + "epoch": 0.5399699623112018, "grad_norm": 0.0, - "learning_rate": 9.168277192269354e-06, - "loss": 0.9999, + "learning_rate": 9.192308429515063e-06, + "loss": 0.8498, "step": 19055 }, { - "epoch": 0.5407491486946652, + "epoch": 0.5399982997534643, "grad_norm": 0.0, - "learning_rate": 9.167361302151956e-06, - "loss": 0.8717, + "learning_rate": 9.191393642222707e-06, + "loss": 0.9279, "step": 19056 }, { - "epoch": 0.54077752553916, + "epoch": 0.5400266371957267, "grad_norm": 0.0, - "learning_rate": 9.166445419067896e-06, - "loss": 0.9366, + "learning_rate": 9.190478861741542e-06, + "loss": 0.8813, "step": 19057 }, { - "epoch": 0.5408059023836549, + "epoch": 0.5400549746379891, "grad_norm": 0.0, - "learning_rate": 9.165529543024911e-06, - "loss": 0.9513, + "learning_rate": 9.189564088079272e-06, + "loss": 0.8192, "step": 19058 }, { - "epoch": 0.5408342792281499, + "epoch": 0.5400833120802516, "grad_norm": 0.0, - "learning_rate": 9.164613674030731e-06, - "loss": 0.9039, + "learning_rate": 9.18864932124361e-06, + "loss": 0.936, "step": 19059 }, { - "epoch": 0.5408626560726447, + "epoch": 0.5401116495225141, "grad_norm": 0.0, - "learning_rate": 9.163697812093101e-06, - "loss": 0.7799, + "learning_rate": 9.187734561242252e-06, + "loss": 0.876, "step": 19060 }, { - "epoch": 0.5408910329171396, + "epoch": 0.5401399869647766, "grad_norm": 0.0, - "learning_rate": 9.162781957219753e-06, - "loss": 0.8425, + "learning_rate": 9.186819808082912e-06, + "loss": 0.8635, "step": 19061 }, { - "epoch": 0.5409194097616346, + "epoch": 0.540168324407039, "grad_norm": 0.0, - "learning_rate": 9.161866109418418e-06, - "loss": 0.9025, + "learning_rate": 9.185905061773286e-06, + "loss": 0.8436, "step": 19062 }, { - "epoch": 0.5409477866061294, + "epoch": 0.5401966618493015, "grad_norm": 0.0, - "learning_rate": 9.160950268696843e-06, - "loss": 0.8063, + "learning_rate": 9.184990322321083e-06, + "loss": 0.7198, "step": 19063 }, { - "epoch": 0.5409761634506243, + "epoch": 0.540224999291564, "grad_norm": 0.0, - "learning_rate": 9.160034435062756e-06, - "loss": 0.8549, + "learning_rate": 9.184075589734013e-06, + "loss": 0.8389, "step": 19064 }, { - "epoch": 0.5410045402951191, + "epoch": 0.5402533367338264, "grad_norm": 0.0, - "learning_rate": 9.159118608523898e-06, - "loss": 0.8418, + "learning_rate": 9.183160864019774e-06, + "loss": 0.8549, "step": 19065 }, { - "epoch": 0.5410329171396141, + "epoch": 0.5402816741760889, "grad_norm": 0.0, - "learning_rate": 9.158202789088001e-06, - "loss": 0.9834, + "learning_rate": 9.182246145186076e-06, + "loss": 0.833, "step": 19066 }, { - "epoch": 0.541061293984109, + "epoch": 0.5403100116183513, "grad_norm": 0.0, - "learning_rate": 9.157286976762805e-06, - "loss": 0.9026, + "learning_rate": 9.181331433240625e-06, + "loss": 0.8657, "step": 19067 }, { - "epoch": 0.5410896708286038, + "epoch": 0.5403383490606138, "grad_norm": 0.0, - "learning_rate": 9.156371171556044e-06, - "loss": 0.8502, + "learning_rate": 9.18041672819112e-06, + "loss": 0.8402, "step": 19068 }, { - "epoch": 0.5411180476730988, + "epoch": 0.5403666865028762, "grad_norm": 0.0, - "learning_rate": 9.155455373475452e-06, - "loss": 0.7534, + "learning_rate": 9.17950203004527e-06, + "loss": 0.9174, "step": 19069 }, { - "epoch": 0.5411464245175936, + "epoch": 0.5403950239451387, "grad_norm": 0.0, - "learning_rate": 9.154539582528766e-06, - "loss": 0.7884, + "learning_rate": 9.178587338810778e-06, + "loss": 0.9341, "step": 19070 }, { - "epoch": 0.5411748013620885, + "epoch": 0.5404233613874012, "grad_norm": 0.0, - "learning_rate": 9.153623798723724e-06, - "loss": 0.8676, + "learning_rate": 9.17767265449535e-06, + "loss": 0.8161, "step": 19071 }, { - "epoch": 0.5412031782065835, + "epoch": 0.5404516988296636, "grad_norm": 0.0, - "learning_rate": 9.15270802206806e-06, - "loss": 0.8196, + "learning_rate": 9.176757977106693e-06, + "loss": 0.8596, "step": 19072 }, { - "epoch": 0.5412315550510783, + "epoch": 0.5404800362719261, "grad_norm": 0.0, - "learning_rate": 9.151792252569506e-06, - "loss": 0.855, + "learning_rate": 9.175843306652507e-06, + "loss": 0.856, "step": 19073 }, { - "epoch": 0.5412599318955732, + "epoch": 0.5405083737141886, "grad_norm": 0.0, - "learning_rate": 9.150876490235805e-06, - "loss": 0.8886, + "learning_rate": 9.1749286431405e-06, + "loss": 0.9231, "step": 19074 }, { - "epoch": 0.5412883087400681, + "epoch": 0.540536711156451, "grad_norm": 0.0, - "learning_rate": 9.149960735074688e-06, - "loss": 0.8713, + "learning_rate": 9.174013986578377e-06, + "loss": 0.9208, "step": 19075 }, { - "epoch": 0.541316685584563, + "epoch": 0.5405650485987135, "grad_norm": 0.0, - "learning_rate": 9.149044987093887e-06, - "loss": 0.8932, + "learning_rate": 9.173099336973838e-06, + "loss": 0.8538, "step": 19076 }, { - "epoch": 0.5413450624290579, + "epoch": 0.540593386040976, "grad_norm": 0.0, - "learning_rate": 9.148129246301146e-06, - "loss": 0.92, + "learning_rate": 9.172184694334592e-06, + "loss": 0.7841, "step": 19077 }, { - "epoch": 0.5413734392735527, + "epoch": 0.5406217234832384, "grad_norm": 0.0, - "learning_rate": 9.14721351270419e-06, - "loss": 0.9411, + "learning_rate": 9.17127005866834e-06, + "loss": 0.8911, "step": 19078 }, { - "epoch": 0.5414018161180477, + "epoch": 0.5406500609255008, "grad_norm": 0.0, - "learning_rate": 9.146297786310764e-06, - "loss": 0.903, + "learning_rate": 9.170355429982787e-06, + "loss": 0.8568, "step": 19079 }, { - "epoch": 0.5414301929625426, + "epoch": 0.5406783983677633, "grad_norm": 0.0, - "learning_rate": 9.145382067128596e-06, - "loss": 0.8716, + "learning_rate": 9.169440808285644e-06, + "loss": 0.7318, "step": 19080 }, { - "epoch": 0.5414585698070374, + "epoch": 0.5407067358100258, "grad_norm": 0.0, - "learning_rate": 9.144466355165428e-06, - "loss": 0.9564, + "learning_rate": 9.168526193584604e-06, + "loss": 0.9446, "step": 19081 }, { - "epoch": 0.5414869466515323, + "epoch": 0.5407350732522882, "grad_norm": 0.0, - "learning_rate": 9.14355065042899e-06, - "loss": 0.8925, + "learning_rate": 9.16761158588738e-06, + "loss": 0.9069, "step": 19082 }, { - "epoch": 0.5415153234960273, + "epoch": 0.5407634106945507, "grad_norm": 0.0, - "learning_rate": 9.142634952927017e-06, - "loss": 0.8228, + "learning_rate": 9.16669698520167e-06, + "loss": 0.8156, "step": 19083 }, { - "epoch": 0.5415437003405221, + "epoch": 0.5407917481368132, "grad_norm": 0.0, - "learning_rate": 9.141719262667246e-06, - "loss": 0.9626, + "learning_rate": 9.165782391535181e-06, + "loss": 0.8522, "step": 19084 }, { - "epoch": 0.541572077185017, + "epoch": 0.5408200855790757, "grad_norm": 0.0, - "learning_rate": 9.140803579657413e-06, - "loss": 0.8006, + "learning_rate": 9.16486780489562e-06, + "loss": 0.7717, "step": 19085 }, { - "epoch": 0.541600454029512, + "epoch": 0.5408484230213381, "grad_norm": 0.0, - "learning_rate": 9.139887903905248e-06, - "loss": 0.8855, + "learning_rate": 9.163953225290685e-06, + "loss": 0.7999, "step": 19086 }, { - "epoch": 0.5416288308740068, + "epoch": 0.5408767604636006, "grad_norm": 0.0, - "learning_rate": 9.138972235418488e-06, - "loss": 0.8447, + "learning_rate": 9.163038652728081e-06, + "loss": 0.9002, "step": 19087 }, { - "epoch": 0.5416572077185017, + "epoch": 0.540905097905863, "grad_norm": 0.0, - "learning_rate": 9.13805657420487e-06, - "loss": 0.8727, + "learning_rate": 9.162124087215519e-06, + "loss": 0.8923, "step": 19088 }, { - "epoch": 0.5416855845629966, + "epoch": 0.5409334353481254, "grad_norm": 0.0, - "learning_rate": 9.137140920272127e-06, - "loss": 0.9728, + "learning_rate": 9.161209528760691e-06, + "loss": 0.9847, "step": 19089 }, { - "epoch": 0.5417139614074915, + "epoch": 0.5409617727903879, "grad_norm": 0.0, - "learning_rate": 9.13622527362799e-06, - "loss": 0.9563, + "learning_rate": 9.160294977371309e-06, + "loss": 0.9586, "step": 19090 }, { - "epoch": 0.5417423382519864, + "epoch": 0.5409901102326504, "grad_norm": 0.0, - "learning_rate": 9.135309634280202e-06, - "loss": 0.8516, + "learning_rate": 9.159380433055074e-06, + "loss": 0.9078, "step": 19091 }, { - "epoch": 0.5417707150964812, + "epoch": 0.5410184476749129, "grad_norm": 0.0, - "learning_rate": 9.134394002236491e-06, - "loss": 0.8055, + "learning_rate": 9.158465895819687e-06, + "loss": 0.8215, "step": 19092 }, { - "epoch": 0.5417990919409762, + "epoch": 0.5410467851171753, "grad_norm": 0.0, - "learning_rate": 9.133478377504588e-06, - "loss": 0.8595, + "learning_rate": 9.157551365672859e-06, + "loss": 0.8571, "step": 19093 }, { - "epoch": 0.5418274687854711, + "epoch": 0.5410751225594378, "grad_norm": 0.0, - "learning_rate": 9.132562760092234e-06, - "loss": 0.915, + "learning_rate": 9.156636842622286e-06, + "loss": 0.8121, "step": 19094 }, { - "epoch": 0.5418558456299659, + "epoch": 0.5411034600017003, "grad_norm": 0.0, - "learning_rate": 9.131647150007164e-06, - "loss": 0.7977, + "learning_rate": 9.155722326675672e-06, + "loss": 0.813, "step": 19095 }, { - "epoch": 0.5418842224744609, + "epoch": 0.5411317974439627, "grad_norm": 0.0, - "learning_rate": 9.13073154725711e-06, - "loss": 0.7327, + "learning_rate": 9.154807817840726e-06, + "loss": 0.8088, "step": 19096 }, { - "epoch": 0.5419125993189557, + "epoch": 0.5411601348862252, "grad_norm": 0.0, - "learning_rate": 9.129815951849803e-06, - "loss": 0.9688, + "learning_rate": 9.153893316125145e-06, + "loss": 1.0493, "step": 19097 }, { - "epoch": 0.5419409761634506, + "epoch": 0.5411884723284877, "grad_norm": 0.0, - "learning_rate": 9.128900363792982e-06, - "loss": 0.9763, + "learning_rate": 9.15297882153664e-06, + "loss": 0.8172, "step": 19098 }, { - "epoch": 0.5419693530079455, + "epoch": 0.54121680977075, "grad_norm": 0.0, - "learning_rate": 9.12798478309438e-06, - "loss": 0.7897, + "learning_rate": 9.152064334082905e-06, + "loss": 0.9935, "step": 19099 }, { - "epoch": 0.5419977298524404, + "epoch": 0.5412451472130125, "grad_norm": 0.0, - "learning_rate": 9.127069209761726e-06, - "loss": 0.8336, + "learning_rate": 9.151149853771645e-06, + "loss": 0.8231, "step": 19100 }, { - "epoch": 0.5420261066969353, + "epoch": 0.541273484655275, "grad_norm": 0.0, - "learning_rate": 9.126153643802759e-06, - "loss": 0.9284, + "learning_rate": 9.15023538061057e-06, + "loss": 0.9749, "step": 19101 }, { - "epoch": 0.5420544835414302, + "epoch": 0.5413018220975375, "grad_norm": 0.0, - "learning_rate": 9.125238085225214e-06, - "loss": 1.0554, + "learning_rate": 9.149320914607374e-06, + "loss": 0.8945, "step": 19102 }, { - "epoch": 0.5420828603859251, + "epoch": 0.5413301595397999, "grad_norm": 0.0, - "learning_rate": 9.124322534036816e-06, - "loss": 0.8873, + "learning_rate": 9.148406455769762e-06, + "loss": 1.0204, "step": 19103 }, { - "epoch": 0.54211123723042, + "epoch": 0.5413584969820624, "grad_norm": 0.0, - "learning_rate": 9.123406990245312e-06, - "loss": 0.9184, + "learning_rate": 9.147492004105443e-06, + "loss": 0.8569, "step": 19104 }, { - "epoch": 0.5421396140749148, + "epoch": 0.5413868344243249, "grad_norm": 0.0, - "learning_rate": 9.122491453858425e-06, - "loss": 0.8265, + "learning_rate": 9.146577559622114e-06, + "loss": 0.816, "step": 19105 }, { - "epoch": 0.5421679909194098, + "epoch": 0.5414151718665873, "grad_norm": 0.0, - "learning_rate": 9.121575924883892e-06, - "loss": 0.8978, + "learning_rate": 9.145663122327482e-06, + "loss": 0.9294, "step": 19106 }, { - "epoch": 0.5421963677639047, + "epoch": 0.5414435093088498, "grad_norm": 0.0, - "learning_rate": 9.120660403329447e-06, - "loss": 0.9907, + "learning_rate": 9.144748692229242e-06, + "loss": 0.8549, "step": 19107 }, { - "epoch": 0.5422247446083995, + "epoch": 0.5414718467511123, "grad_norm": 0.0, - "learning_rate": 9.11974488920282e-06, - "loss": 0.8438, + "learning_rate": 9.143834269335102e-06, + "loss": 0.8561, "step": 19108 }, { - "epoch": 0.5422531214528944, + "epoch": 0.5415001841933748, "grad_norm": 0.0, - "learning_rate": 9.118829382511752e-06, - "loss": 0.8441, + "learning_rate": 9.142919853652766e-06, + "loss": 0.9355, "step": 19109 }, { - "epoch": 0.5422814982973894, + "epoch": 0.5415285216356371, "grad_norm": 0.0, - "learning_rate": 9.117913883263972e-06, - "loss": 0.9654, + "learning_rate": 9.142005445189933e-06, + "loss": 0.9566, "step": 19110 }, { - "epoch": 0.5423098751418842, + "epoch": 0.5415568590778996, "grad_norm": 0.0, - "learning_rate": 9.11699839146721e-06, - "loss": 0.9829, + "learning_rate": 9.14109104395431e-06, + "loss": 1.0236, "step": 19111 }, { - "epoch": 0.5423382519863791, + "epoch": 0.5415851965201621, "grad_norm": 0.0, - "learning_rate": 9.116082907129204e-06, - "loss": 0.9754, + "learning_rate": 9.140176649953592e-06, + "loss": 1.0427, "step": 19112 }, { - "epoch": 0.542366628830874, + "epoch": 0.5416135339624245, "grad_norm": 0.0, - "learning_rate": 9.115167430257687e-06, - "loss": 0.821, + "learning_rate": 9.139262263195485e-06, + "loss": 0.8355, "step": 19113 }, { - "epoch": 0.5423950056753689, + "epoch": 0.541641871404687, "grad_norm": 0.0, - "learning_rate": 9.114251960860387e-06, - "loss": 0.8936, + "learning_rate": 9.138347883687695e-06, + "loss": 0.7882, "step": 19114 }, { - "epoch": 0.5424233825198638, + "epoch": 0.5416702088469495, "grad_norm": 0.0, - "learning_rate": 9.113336498945045e-06, - "loss": 0.8981, + "learning_rate": 9.13743351143792e-06, + "loss": 0.8131, "step": 19115 }, { - "epoch": 0.5424517593643586, + "epoch": 0.541698546289212, "grad_norm": 0.0, - "learning_rate": 9.112421044519388e-06, - "loss": 0.7289, + "learning_rate": 9.13651914645386e-06, + "loss": 0.9188, "step": 19116 }, { - "epoch": 0.5424801362088536, + "epoch": 0.5417268837314744, "grad_norm": 0.0, - "learning_rate": 9.111505597591147e-06, - "loss": 0.9229, + "learning_rate": 9.135604788743222e-06, + "loss": 0.8885, "step": 19117 }, { - "epoch": 0.5425085130533485, + "epoch": 0.5417552211737369, "grad_norm": 0.0, - "learning_rate": 9.110590158168061e-06, - "loss": 0.9721, + "learning_rate": 9.134690438313704e-06, + "loss": 1.0302, "step": 19118 }, { - "epoch": 0.5425368898978433, + "epoch": 0.5417835586159994, "grad_norm": 0.0, - "learning_rate": 9.10967472625786e-06, - "loss": 0.7985, + "learning_rate": 9.133776095173015e-06, + "loss": 0.9708, "step": 19119 }, { - "epoch": 0.5425652667423383, + "epoch": 0.5418118960582617, "grad_norm": 0.0, - "learning_rate": 9.108759301868274e-06, - "loss": 0.7576, + "learning_rate": 9.132861759328845e-06, + "loss": 0.91, "step": 19120 }, { - "epoch": 0.5425936435868332, + "epoch": 0.5418402335005242, "grad_norm": 0.0, - "learning_rate": 9.107843885007042e-06, - "loss": 0.9683, + "learning_rate": 9.131947430788905e-06, + "loss": 0.8287, "step": 19121 }, { - "epoch": 0.542622020431328, + "epoch": 0.5418685709427867, "grad_norm": 0.0, - "learning_rate": 9.106928475681893e-06, - "loss": 0.9393, + "learning_rate": 9.131033109560896e-06, + "loss": 0.8252, "step": 19122 }, { - "epoch": 0.542650397275823, + "epoch": 0.5418969083850491, "grad_norm": 0.0, - "learning_rate": 9.106013073900554e-06, - "loss": 0.9173, + "learning_rate": 9.130118795652515e-06, + "loss": 0.8691, "step": 19123 }, { - "epoch": 0.5426787741203178, + "epoch": 0.5419252458273116, "grad_norm": 0.0, - "learning_rate": 9.105097679670764e-06, - "loss": 0.8495, + "learning_rate": 9.129204489071465e-06, + "loss": 0.8625, "step": 19124 }, { - "epoch": 0.5427071509648127, + "epoch": 0.5419535832695741, "grad_norm": 0.0, - "learning_rate": 9.104182293000256e-06, - "loss": 0.9763, + "learning_rate": 9.128290189825457e-06, + "loss": 0.9363, "step": 19125 }, { - "epoch": 0.5427355278093076, + "epoch": 0.5419819207118366, "grad_norm": 0.0, - "learning_rate": 9.103266913896763e-06, - "loss": 0.7969, + "learning_rate": 9.127375897922177e-06, + "loss": 0.6759, "step": 19126 }, { - "epoch": 0.5427639046538025, + "epoch": 0.542010258154099, "grad_norm": 0.0, - "learning_rate": 9.102351542368012e-06, - "loss": 0.9658, + "learning_rate": 9.126461613369337e-06, + "loss": 0.8742, "step": 19127 }, { - "epoch": 0.5427922814982974, + "epoch": 0.5420385955963615, "grad_norm": 0.0, - "learning_rate": 9.101436178421736e-06, - "loss": 0.8451, + "learning_rate": 9.125547336174634e-06, + "loss": 0.8589, "step": 19128 }, { - "epoch": 0.5428206583427923, + "epoch": 0.542066933038624, "grad_norm": 0.0, - "learning_rate": 9.100520822065672e-06, - "loss": 0.8388, + "learning_rate": 9.12463306634577e-06, + "loss": 0.8816, "step": 19129 }, { - "epoch": 0.5428490351872872, + "epoch": 0.5420952704808863, "grad_norm": 0.0, - "learning_rate": 9.09960547330755e-06, - "loss": 0.8024, + "learning_rate": 9.123718803890449e-06, + "loss": 0.8548, "step": 19130 }, { - "epoch": 0.5428774120317821, + "epoch": 0.5421236079231488, "grad_norm": 0.0, - "learning_rate": 9.098690132155098e-06, - "loss": 0.8905, + "learning_rate": 9.122804548816366e-06, + "loss": 0.9081, "step": 19131 }, { - "epoch": 0.5429057888762769, + "epoch": 0.5421519453654113, "grad_norm": 0.0, - "learning_rate": 9.097774798616053e-06, - "loss": 0.8684, + "learning_rate": 9.121890301131227e-06, + "loss": 0.9503, "step": 19132 }, { - "epoch": 0.5429341657207718, + "epoch": 0.5421802828076738, "grad_norm": 0.0, - "learning_rate": 9.096859472698145e-06, - "loss": 0.8488, + "learning_rate": 9.120976060842736e-06, + "loss": 0.9996, "step": 19133 }, { - "epoch": 0.5429625425652668, + "epoch": 0.5422086202499362, "grad_norm": 0.0, - "learning_rate": 9.095944154409103e-06, - "loss": 0.8751, + "learning_rate": 9.120061827958586e-06, + "loss": 0.8256, "step": 19134 }, { - "epoch": 0.5429909194097616, + "epoch": 0.5422369576921987, "grad_norm": 0.0, - "learning_rate": 9.095028843756663e-06, - "loss": 0.9444, + "learning_rate": 9.119147602486484e-06, + "loss": 0.8341, "step": 19135 }, { - "epoch": 0.5430192962542565, + "epoch": 0.5422652951344612, "grad_norm": 0.0, - "learning_rate": 9.094113540748556e-06, - "loss": 0.8676, + "learning_rate": 9.118233384434128e-06, + "loss": 0.8374, "step": 19136 }, { - "epoch": 0.5430476730987515, + "epoch": 0.5422936325767236, "grad_norm": 0.0, - "learning_rate": 9.093198245392512e-06, - "loss": 0.8798, + "learning_rate": 9.117319173809218e-06, + "loss": 0.8731, "step": 19137 }, { - "epoch": 0.5430760499432463, + "epoch": 0.5423219700189861, "grad_norm": 0.0, - "learning_rate": 9.092282957696264e-06, - "loss": 0.8371, + "learning_rate": 9.116404970619461e-06, + "loss": 0.8202, "step": 19138 }, { - "epoch": 0.5431044267877412, + "epoch": 0.5423503074612486, "grad_norm": 0.0, - "learning_rate": 9.091367677667538e-06, - "loss": 0.8644, + "learning_rate": 9.115490774872549e-06, + "loss": 0.7892, "step": 19139 }, { - "epoch": 0.5431328036322361, + "epoch": 0.542378644903511, "grad_norm": 0.0, - "learning_rate": 9.090452405314074e-06, - "loss": 0.7919, + "learning_rate": 9.11457658657619e-06, + "loss": 0.8861, "step": 19140 }, { - "epoch": 0.543161180476731, + "epoch": 0.5424069823457734, "grad_norm": 0.0, - "learning_rate": 9.089537140643598e-06, - "loss": 0.8469, + "learning_rate": 9.113662405738077e-06, + "loss": 0.8704, "step": 19141 }, { - "epoch": 0.5431895573212259, + "epoch": 0.5424353197880359, "grad_norm": 0.0, - "learning_rate": 9.088621883663845e-06, - "loss": 0.7928, + "learning_rate": 9.112748232365916e-06, + "loss": 0.836, "step": 19142 }, { - "epoch": 0.5432179341657207, + "epoch": 0.5424636572302984, "grad_norm": 0.0, - "learning_rate": 9.087706634382543e-06, - "loss": 0.7958, + "learning_rate": 9.11183406646741e-06, + "loss": 0.8021, "step": 19143 }, { - "epoch": 0.5432463110102157, + "epoch": 0.5424919946725608, "grad_norm": 0.0, - "learning_rate": 9.08679139280742e-06, - "loss": 0.9119, + "learning_rate": 9.110919908050251e-06, + "loss": 0.9319, "step": 19144 }, { - "epoch": 0.5432746878547106, + "epoch": 0.5425203321148233, "grad_norm": 0.0, - "learning_rate": 9.085876158946217e-06, - "loss": 0.8098, + "learning_rate": 9.110005757122144e-06, + "loss": 0.9536, "step": 19145 }, { - "epoch": 0.5433030646992054, + "epoch": 0.5425486695570858, "grad_norm": 0.0, - "learning_rate": 9.084960932806659e-06, - "loss": 0.8382, + "learning_rate": 9.109091613690794e-06, + "loss": 0.8345, "step": 19146 }, { - "epoch": 0.5433314415437004, + "epoch": 0.5425770069993482, "grad_norm": 0.0, - "learning_rate": 9.084045714396473e-06, - "loss": 1.0353, + "learning_rate": 9.108177477763891e-06, + "loss": 0.8147, "step": 19147 }, { - "epoch": 0.5433598183881952, + "epoch": 0.5426053444416107, "grad_norm": 0.0, - "learning_rate": 9.083130503723397e-06, - "loss": 0.8686, + "learning_rate": 9.107263349349143e-06, + "loss": 1.0311, "step": 19148 }, { - "epoch": 0.5433881952326901, + "epoch": 0.5426336818838732, "grad_norm": 0.0, - "learning_rate": 9.082215300795159e-06, - "loss": 0.9493, + "learning_rate": 9.106349228454242e-06, + "loss": 0.97, "step": 19149 }, { - "epoch": 0.543416572077185, + "epoch": 0.5426620193261357, "grad_norm": 0.0, - "learning_rate": 9.081300105619488e-06, - "loss": 0.9078, + "learning_rate": 9.105435115086898e-06, + "loss": 0.7299, "step": 19150 }, { - "epoch": 0.5434449489216799, + "epoch": 0.542690356768398, "grad_norm": 0.0, - "learning_rate": 9.080384918204118e-06, - "loss": 0.9303, + "learning_rate": 9.104521009254807e-06, + "loss": 0.808, "step": 19151 }, { - "epoch": 0.5434733257661748, + "epoch": 0.5427186942106605, "grad_norm": 0.0, - "learning_rate": 9.079469738556777e-06, - "loss": 0.8534, + "learning_rate": 9.103606910965666e-06, + "loss": 0.951, "step": 19152 }, { - "epoch": 0.5435017026106697, + "epoch": 0.542747031652923, "grad_norm": 0.0, - "learning_rate": 9.078554566685198e-06, - "loss": 0.9335, + "learning_rate": 9.102692820227177e-06, + "loss": 0.9226, "step": 19153 }, { - "epoch": 0.5435300794551646, + "epoch": 0.5427753690951854, "grad_norm": 0.0, - "learning_rate": 9.077639402597105e-06, - "loss": 0.7507, + "learning_rate": 9.10177873704704e-06, + "loss": 0.8763, "step": 19154 }, { - "epoch": 0.5435584562996595, + "epoch": 0.5428037065374479, "grad_norm": 0.0, - "learning_rate": 9.076724246300237e-06, - "loss": 0.858, + "learning_rate": 9.100864661432952e-06, + "loss": 0.9031, "step": 19155 }, { - "epoch": 0.5435868331441543, + "epoch": 0.5428320439797104, "grad_norm": 0.0, - "learning_rate": 9.075809097802319e-06, - "loss": 0.7967, + "learning_rate": 9.099950593392622e-06, + "loss": 0.8284, "step": 19156 }, { - "epoch": 0.5436152099886493, + "epoch": 0.5428603814219729, "grad_norm": 0.0, - "learning_rate": 9.074893957111087e-06, - "loss": 0.8538, + "learning_rate": 9.099036532933736e-06, + "loss": 0.8513, "step": 19157 }, { - "epoch": 0.5436435868331442, + "epoch": 0.5428887188642353, "grad_norm": 0.0, - "learning_rate": 9.073978824234262e-06, - "loss": 0.6791, + "learning_rate": 9.098122480064e-06, + "loss": 0.7908, "step": 19158 }, { - "epoch": 0.543671963677639, + "epoch": 0.5429170563064978, "grad_norm": 0.0, - "learning_rate": 9.073063699179586e-06, - "loss": 0.9633, + "learning_rate": 9.097208434791118e-06, + "loss": 0.8872, "step": 19159 }, { - "epoch": 0.5437003405221339, + "epoch": 0.5429453937487603, "grad_norm": 0.0, - "learning_rate": 9.072148581954778e-06, - "loss": 0.8938, + "learning_rate": 9.09629439712278e-06, + "loss": 0.8247, "step": 19160 }, { - "epoch": 0.5437287173666289, + "epoch": 0.5429737311910227, "grad_norm": 0.0, - "learning_rate": 9.071233472567573e-06, - "loss": 0.8811, + "learning_rate": 9.095380367066691e-06, + "loss": 0.9287, "step": 19161 }, { - "epoch": 0.5437570942111237, + "epoch": 0.5430020686332852, "grad_norm": 0.0, - "learning_rate": 9.070318371025702e-06, - "loss": 1.0215, + "learning_rate": 9.094466344630548e-06, + "loss": 0.9552, "step": 19162 }, { - "epoch": 0.5437854710556186, + "epoch": 0.5430304060755476, "grad_norm": 0.0, - "learning_rate": 9.069403277336895e-06, - "loss": 0.8431, + "learning_rate": 9.09355232982205e-06, + "loss": 0.8787, "step": 19163 }, { - "epoch": 0.5438138479001136, + "epoch": 0.54305874351781, "grad_norm": 0.0, - "learning_rate": 9.068488191508876e-06, - "loss": 0.8289, + "learning_rate": 9.092638322648904e-06, + "loss": 0.8409, "step": 19164 }, { - "epoch": 0.5438422247446084, + "epoch": 0.5430870809600725, "grad_norm": 0.0, - "learning_rate": 9.067573113549383e-06, - "loss": 0.8029, + "learning_rate": 9.091724323118797e-06, + "loss": 0.8908, "step": 19165 }, { - "epoch": 0.5438706015891033, + "epoch": 0.543115418402335, "grad_norm": 0.0, - "learning_rate": 9.06665804346614e-06, - "loss": 0.7655, + "learning_rate": 9.090810331239432e-06, + "loss": 0.9077, "step": 19166 }, { - "epoch": 0.5438989784335981, + "epoch": 0.5431437558445975, "grad_norm": 0.0, - "learning_rate": 9.065742981266878e-06, - "loss": 0.8951, + "learning_rate": 9.089896347018512e-06, + "loss": 0.9323, "step": 19167 }, { - "epoch": 0.5439273552780931, + "epoch": 0.5431720932868599, "grad_norm": 0.0, - "learning_rate": 9.064827926959329e-06, - "loss": 0.9877, + "learning_rate": 9.088982370463732e-06, + "loss": 0.8208, "step": 19168 }, { - "epoch": 0.543955732122588, + "epoch": 0.5432004307291224, "grad_norm": 0.0, - "learning_rate": 9.06391288055122e-06, - "loss": 0.9365, + "learning_rate": 9.088068401582795e-06, + "loss": 0.8611, "step": 19169 }, { - "epoch": 0.5439841089670828, + "epoch": 0.5432287681713849, "grad_norm": 0.0, - "learning_rate": 9.062997842050276e-06, - "loss": 0.8614, + "learning_rate": 9.087154440383394e-06, + "loss": 0.8391, "step": 19170 }, { - "epoch": 0.5440124858115778, + "epoch": 0.5432571056136473, "grad_norm": 0.0, - "learning_rate": 9.062082811464236e-06, - "loss": 0.8598, + "learning_rate": 9.086240486873227e-06, + "loss": 0.7651, "step": 19171 }, { - "epoch": 0.5440408626560727, + "epoch": 0.5432854430559098, "grad_norm": 0.0, - "learning_rate": 9.061167788800825e-06, - "loss": 0.9216, + "learning_rate": 9.085326541060002e-06, + "loss": 0.8974, "step": 19172 }, { - "epoch": 0.5440692395005675, + "epoch": 0.5433137804981722, "grad_norm": 0.0, - "learning_rate": 9.060252774067771e-06, - "loss": 0.8325, + "learning_rate": 9.084412602951406e-06, + "loss": 0.8091, "step": 19173 }, { - "epoch": 0.5440976163450624, + "epoch": 0.5433421179404347, "grad_norm": 0.0, - "learning_rate": 9.059337767272806e-06, - "loss": 0.9138, + "learning_rate": 9.083498672555144e-06, + "loss": 0.8664, "step": 19174 }, { - "epoch": 0.5441259931895573, + "epoch": 0.5433704553826971, "grad_norm": 0.0, - "learning_rate": 9.058422768423653e-06, - "loss": 0.8152, + "learning_rate": 9.082584749878915e-06, + "loss": 0.8225, "step": 19175 }, { - "epoch": 0.5441543700340522, + "epoch": 0.5433987928249596, "grad_norm": 0.0, - "learning_rate": 9.057507777528048e-06, - "loss": 0.9669, + "learning_rate": 9.081670834930413e-06, + "loss": 0.7696, "step": 19176 }, { - "epoch": 0.5441827468785471, + "epoch": 0.5434271302672221, "grad_norm": 0.0, - "learning_rate": 9.056592794593719e-06, - "loss": 0.8909, + "learning_rate": 9.080756927717343e-06, + "loss": 0.89, "step": 19177 }, { - "epoch": 0.544211123723042, + "epoch": 0.5434554677094845, "grad_norm": 0.0, - "learning_rate": 9.05567781962839e-06, - "loss": 0.8464, + "learning_rate": 9.079843028247393e-06, + "loss": 0.8595, "step": 19178 }, { - "epoch": 0.5442395005675369, + "epoch": 0.543483805151747, "grad_norm": 0.0, - "learning_rate": 9.054762852639793e-06, - "loss": 0.845, + "learning_rate": 9.078929136528267e-06, + "loss": 0.9078, "step": 19179 }, { - "epoch": 0.5442678774120318, + "epoch": 0.5435121425940095, "grad_norm": 0.0, - "learning_rate": 9.053847893635658e-06, - "loss": 0.8257, + "learning_rate": 9.078015252567667e-06, + "loss": 0.9083, "step": 19180 }, { - "epoch": 0.5442962542565267, + "epoch": 0.543540480036272, "grad_norm": 0.0, - "learning_rate": 9.05293294262371e-06, - "loss": 0.8441, + "learning_rate": 9.077101376373282e-06, + "loss": 0.9372, "step": 19181 }, { - "epoch": 0.5443246311010216, + "epoch": 0.5435688174785344, "grad_norm": 0.0, - "learning_rate": 9.052017999611684e-06, - "loss": 0.9375, + "learning_rate": 9.076187507952816e-06, + "loss": 0.8673, "step": 19182 }, { - "epoch": 0.5443530079455164, + "epoch": 0.5435971549207969, "grad_norm": 0.0, - "learning_rate": 9.051103064607303e-06, - "loss": 0.9018, + "learning_rate": 9.075273647313971e-06, + "loss": 0.8641, "step": 19183 }, { - "epoch": 0.5443813847900113, + "epoch": 0.5436254923630593, "grad_norm": 0.0, - "learning_rate": 9.050188137618296e-06, - "loss": 0.9399, + "learning_rate": 9.074359794464436e-06, + "loss": 0.9071, "step": 19184 }, { - "epoch": 0.5444097616345063, + "epoch": 0.5436538298053217, "grad_norm": 0.0, - "learning_rate": 9.04927321865239e-06, - "loss": 0.9546, + "learning_rate": 9.073445949411914e-06, + "loss": 0.9497, "step": 19185 }, { - "epoch": 0.5444381384790011, + "epoch": 0.5436821672475842, "grad_norm": 0.0, - "learning_rate": 9.048358307717318e-06, - "loss": 0.8854, + "learning_rate": 9.072532112164097e-06, + "loss": 0.785, "step": 19186 }, { - "epoch": 0.544466515323496, + "epoch": 0.5437105046898467, "grad_norm": 0.0, - "learning_rate": 9.047443404820808e-06, - "loss": 0.8989, + "learning_rate": 9.071618282728689e-06, + "loss": 0.8655, "step": 19187 }, { - "epoch": 0.544494892167991, + "epoch": 0.5437388421321091, "grad_norm": 0.0, - "learning_rate": 9.046528509970584e-06, - "loss": 0.9343, + "learning_rate": 9.070704461113385e-06, + "loss": 0.8475, "step": 19188 }, { - "epoch": 0.5445232690124858, + "epoch": 0.5437671795743716, "grad_norm": 0.0, - "learning_rate": 9.045613623174378e-06, - "loss": 0.8012, + "learning_rate": 9.069790647325879e-06, + "loss": 0.8906, "step": 19189 }, { - "epoch": 0.5445516458569807, + "epoch": 0.5437955170166341, "grad_norm": 0.0, - "learning_rate": 9.044698744439918e-06, - "loss": 0.927, + "learning_rate": 9.068876841373878e-06, + "loss": 0.829, "step": 19190 }, { - "epoch": 0.5445800227014755, + "epoch": 0.5438238544588966, "grad_norm": 0.0, - "learning_rate": 9.04378387377493e-06, - "loss": 0.7549, + "learning_rate": 9.067963043265068e-06, + "loss": 0.8631, "step": 19191 }, { - "epoch": 0.5446083995459705, + "epoch": 0.543852191901159, "grad_norm": 0.0, - "learning_rate": 9.042869011187142e-06, - "loss": 0.8592, + "learning_rate": 9.067049253007151e-06, + "loss": 0.9027, "step": 19192 }, { - "epoch": 0.5446367763904654, + "epoch": 0.5438805293434215, "grad_norm": 0.0, - "learning_rate": 9.041954156684282e-06, - "loss": 0.8726, + "learning_rate": 9.066135470607828e-06, + "loss": 0.8652, "step": 19193 }, { - "epoch": 0.5446651532349602, + "epoch": 0.543908866785684, "grad_norm": 0.0, - "learning_rate": 9.04103931027408e-06, - "loss": 0.9171, + "learning_rate": 9.06522169607479e-06, + "loss": 0.8374, "step": 19194 }, { - "epoch": 0.5446935300794552, + "epoch": 0.5439372042279463, "grad_norm": 0.0, - "learning_rate": 9.040124471964258e-06, - "loss": 0.8691, + "learning_rate": 9.064307929415737e-06, + "loss": 0.8768, "step": 19195 }, { - "epoch": 0.5447219069239501, + "epoch": 0.5439655416702088, "grad_norm": 0.0, - "learning_rate": 9.039209641762552e-06, - "loss": 0.8309, + "learning_rate": 9.063394170638369e-06, + "loss": 0.7771, "step": 19196 }, { - "epoch": 0.5447502837684449, + "epoch": 0.5439938791124713, "grad_norm": 0.0, - "learning_rate": 9.038294819676685e-06, - "loss": 0.8291, + "learning_rate": 9.062480419750377e-06, + "loss": 0.8914, "step": 19197 }, { - "epoch": 0.5447786606129399, + "epoch": 0.5440222165547338, "grad_norm": 0.0, - "learning_rate": 9.037380005714382e-06, - "loss": 0.913, + "learning_rate": 9.061566676759464e-06, + "loss": 0.8618, "step": 19198 }, { - "epoch": 0.5448070374574348, + "epoch": 0.5440505539969962, "grad_norm": 0.0, - "learning_rate": 9.036465199883375e-06, - "loss": 0.9312, + "learning_rate": 9.060652941673317e-06, + "loss": 0.9419, "step": 19199 }, { - "epoch": 0.5448354143019296, + "epoch": 0.5440788914392587, "grad_norm": 0.0, - "learning_rate": 9.035550402191386e-06, - "loss": 0.7952, + "learning_rate": 9.059739214499643e-06, + "loss": 0.9036, "step": 19200 }, { - "epoch": 0.5448637911464245, + "epoch": 0.5441072288815212, "grad_norm": 0.0, - "learning_rate": 9.034635612646151e-06, - "loss": 0.9943, + "learning_rate": 9.058825495246134e-06, + "loss": 0.8318, "step": 19201 }, { - "epoch": 0.5448921679909194, + "epoch": 0.5441355663237836, "grad_norm": 0.0, - "learning_rate": 9.033720831255391e-06, - "loss": 0.8314, + "learning_rate": 9.057911783920487e-06, + "loss": 0.8492, "step": 19202 }, { - "epoch": 0.5449205448354143, + "epoch": 0.5441639037660461, "grad_norm": 0.0, - "learning_rate": 9.032806058026835e-06, - "loss": 0.8747, + "learning_rate": 9.056998080530398e-06, + "loss": 0.8617, "step": 19203 }, { - "epoch": 0.5449489216799092, + "epoch": 0.5441922412083086, "grad_norm": 0.0, - "learning_rate": 9.03189129296821e-06, - "loss": 0.8282, + "learning_rate": 9.056084385083569e-06, + "loss": 0.857, "step": 19204 }, { - "epoch": 0.5449772985244041, + "epoch": 0.544220578650571, "grad_norm": 0.0, - "learning_rate": 9.030976536087242e-06, - "loss": 0.8795, + "learning_rate": 9.055170697587688e-06, + "loss": 0.9603, "step": 19205 }, { - "epoch": 0.545005675368899, + "epoch": 0.5442489160928334, "grad_norm": 0.0, - "learning_rate": 9.03006178739166e-06, - "loss": 0.8564, + "learning_rate": 9.054257018050456e-06, + "loss": 0.9676, "step": 19206 }, { - "epoch": 0.5450340522133938, + "epoch": 0.5442772535350959, "grad_norm": 0.0, - "learning_rate": 9.02914704688919e-06, - "loss": 0.8494, + "learning_rate": 9.053343346479567e-06, + "loss": 0.8506, "step": 19207 }, { - "epoch": 0.5450624290578887, + "epoch": 0.5443055909773584, "grad_norm": 0.0, - "learning_rate": 9.028232314587556e-06, - "loss": 0.865, + "learning_rate": 9.052429682882717e-06, + "loss": 0.7857, "step": 19208 }, { - "epoch": 0.5450908059023837, + "epoch": 0.5443339284196208, "grad_norm": 0.0, - "learning_rate": 9.027317590494488e-06, - "loss": 0.8082, + "learning_rate": 9.05151602726761e-06, + "loss": 0.8793, "step": 19209 }, { - "epoch": 0.5451191827468785, + "epoch": 0.5443622658618833, "grad_norm": 0.0, - "learning_rate": 9.026402874617713e-06, - "loss": 0.7982, + "learning_rate": 9.05060237964193e-06, + "loss": 0.9413, "step": 19210 }, { - "epoch": 0.5451475595913734, + "epoch": 0.5443906033041458, "grad_norm": 0.0, - "learning_rate": 9.025488166964955e-06, - "loss": 0.8539, + "learning_rate": 9.04968874001338e-06, + "loss": 0.9991, "step": 19211 }, { - "epoch": 0.5451759364358684, + "epoch": 0.5444189407464082, "grad_norm": 0.0, - "learning_rate": 9.024573467543945e-06, - "loss": 0.9075, + "learning_rate": 9.048775108389658e-06, + "loss": 0.7775, "step": 19212 }, { - "epoch": 0.5452043132803632, + "epoch": 0.5444472781886707, "grad_norm": 0.0, - "learning_rate": 9.023658776362406e-06, - "loss": 0.8153, + "learning_rate": 9.047861484778454e-06, + "loss": 0.8744, "step": 19213 }, { - "epoch": 0.5452326901248581, + "epoch": 0.5444756156309332, "grad_norm": 0.0, - "learning_rate": 9.022744093428062e-06, - "loss": 0.8484, + "learning_rate": 9.046947869187465e-06, + "loss": 0.8415, "step": 19214 }, { - "epoch": 0.5452610669693531, + "epoch": 0.5445039530731957, "grad_norm": 0.0, - "learning_rate": 9.021829418748647e-06, - "loss": 0.8574, + "learning_rate": 9.046034261624389e-06, + "loss": 0.9655, "step": 19215 }, { - "epoch": 0.5452894438138479, + "epoch": 0.544532290515458, "grad_norm": 0.0, - "learning_rate": 9.020914752331877e-06, - "loss": 0.8716, + "learning_rate": 9.045120662096917e-06, + "loss": 0.8693, "step": 19216 }, { - "epoch": 0.5453178206583428, + "epoch": 0.5445606279577205, "grad_norm": 0.0, - "learning_rate": 9.02000009418549e-06, - "loss": 0.7579, + "learning_rate": 9.044207070612756e-06, + "loss": 0.9432, "step": 19217 }, { - "epoch": 0.5453461975028376, + "epoch": 0.544588965399983, "grad_norm": 0.0, - "learning_rate": 9.019085444317204e-06, - "loss": 0.9003, + "learning_rate": 9.043293487179588e-06, + "loss": 0.9944, "step": 19218 }, { - "epoch": 0.5453745743473326, + "epoch": 0.5446173028422454, "grad_norm": 0.0, - "learning_rate": 9.018170802734748e-06, - "loss": 0.9728, + "learning_rate": 9.042379911805117e-06, + "loss": 0.8711, "step": 19219 }, { - "epoch": 0.5454029511918275, + "epoch": 0.5446456402845079, "grad_norm": 0.0, - "learning_rate": 9.017256169445847e-06, - "loss": 0.8507, + "learning_rate": 9.041466344497033e-06, + "loss": 0.7771, "step": 19220 }, { - "epoch": 0.5454313280363223, + "epoch": 0.5446739777267704, "grad_norm": 0.0, - "learning_rate": 9.016341544458227e-06, - "loss": 0.8932, + "learning_rate": 9.040552785263036e-06, + "loss": 0.8731, "step": 19221 }, { - "epoch": 0.5454597048808173, + "epoch": 0.5447023151690329, "grad_norm": 0.0, - "learning_rate": 9.015426927779613e-06, - "loss": 0.8718, + "learning_rate": 9.03963923411082e-06, + "loss": 0.995, "step": 19222 }, { - "epoch": 0.5454880817253122, + "epoch": 0.5447306526112953, "grad_norm": 0.0, - "learning_rate": 9.014512319417735e-06, - "loss": 0.8837, + "learning_rate": 9.038725691048076e-06, + "loss": 0.9212, "step": 19223 }, { - "epoch": 0.545516458569807, + "epoch": 0.5447589900535578, "grad_norm": 0.0, - "learning_rate": 9.013597719380316e-06, - "loss": 0.8271, + "learning_rate": 9.037812156082503e-06, + "loss": 0.8711, "step": 19224 }, { - "epoch": 0.5455448354143019, + "epoch": 0.5447873274958203, "grad_norm": 0.0, - "learning_rate": 9.012683127675078e-06, - "loss": 0.9717, + "learning_rate": 9.0368986292218e-06, + "loss": 0.8529, "step": 19225 }, { - "epoch": 0.5455732122587968, + "epoch": 0.5448156649380826, "grad_norm": 0.0, - "learning_rate": 9.011768544309753e-06, - "loss": 0.8884, + "learning_rate": 9.035985110473654e-06, + "loss": 0.804, "step": 19226 }, { - "epoch": 0.5456015891032917, + "epoch": 0.5448440023803451, "grad_norm": 0.0, - "learning_rate": 9.010853969292064e-06, - "loss": 0.9561, + "learning_rate": 9.035071599845763e-06, + "loss": 0.7707, "step": 19227 }, { - "epoch": 0.5456299659477866, + "epoch": 0.5448723398226076, "grad_norm": 0.0, - "learning_rate": 9.009939402629732e-06, - "loss": 0.8687, + "learning_rate": 9.034158097345823e-06, + "loss": 0.8937, "step": 19228 }, { - "epoch": 0.5456583427922815, + "epoch": 0.5449006772648701, "grad_norm": 0.0, - "learning_rate": 9.009024844330489e-06, - "loss": 0.8799, + "learning_rate": 9.033244602981527e-06, + "loss": 0.9774, "step": 19229 }, { - "epoch": 0.5456867196367764, + "epoch": 0.5449290147071325, "grad_norm": 0.0, - "learning_rate": 9.008110294402057e-06, - "loss": 0.9245, + "learning_rate": 9.032331116760574e-06, + "loss": 0.9165, "step": 19230 }, { - "epoch": 0.5457150964812713, + "epoch": 0.544957352149395, "grad_norm": 0.0, - "learning_rate": 9.00719575285216e-06, - "loss": 0.8914, + "learning_rate": 9.031417638690653e-06, + "loss": 0.8997, "step": 19231 }, { - "epoch": 0.5457434733257662, + "epoch": 0.5449856895916575, "grad_norm": 0.0, - "learning_rate": 9.006281219688527e-06, - "loss": 0.921, + "learning_rate": 9.030504168779458e-06, + "loss": 0.7305, "step": 19232 }, { - "epoch": 0.5457718501702611, + "epoch": 0.5450140270339199, "grad_norm": 0.0, - "learning_rate": 9.005366694918882e-06, - "loss": 0.8666, + "learning_rate": 9.029590707034691e-06, + "loss": 0.9009, "step": 19233 }, { - "epoch": 0.545800227014756, + "epoch": 0.5450423644761824, "grad_norm": 0.0, - "learning_rate": 9.004452178550949e-06, - "loss": 0.9274, + "learning_rate": 9.02867725346404e-06, + "loss": 0.7335, "step": 19234 }, { - "epoch": 0.5458286038592508, + "epoch": 0.5450707019184449, "grad_norm": 0.0, - "learning_rate": 9.003537670592455e-06, - "loss": 0.8861, + "learning_rate": 9.027763808075202e-06, + "loss": 0.8549, "step": 19235 }, { - "epoch": 0.5458569807037458, + "epoch": 0.5450990393607072, "grad_norm": 0.0, - "learning_rate": 9.002623171051118e-06, - "loss": 0.8442, + "learning_rate": 9.026850370875871e-06, + "loss": 0.9325, "step": 19236 }, { - "epoch": 0.5458853575482406, + "epoch": 0.5451273768029697, "grad_norm": 0.0, - "learning_rate": 9.001708679934672e-06, - "loss": 0.8033, + "learning_rate": 9.025936941873737e-06, + "loss": 0.8956, "step": 19237 }, { - "epoch": 0.5459137343927355, + "epoch": 0.5451557142452322, "grad_norm": 0.0, - "learning_rate": 9.000794197250836e-06, - "loss": 0.8568, + "learning_rate": 9.025023521076504e-06, + "loss": 0.831, "step": 19238 }, { - "epoch": 0.5459421112372305, + "epoch": 0.5451840516874947, "grad_norm": 0.0, - "learning_rate": 8.999879723007338e-06, - "loss": 0.8805, + "learning_rate": 9.024110108491855e-06, + "loss": 0.9569, "step": 19239 }, { - "epoch": 0.5459704880817253, + "epoch": 0.5452123891297571, "grad_norm": 0.0, - "learning_rate": 8.9989652572119e-06, - "loss": 0.8413, + "learning_rate": 9.023196704127489e-06, + "loss": 0.7976, "step": 19240 }, { - "epoch": 0.5459988649262202, + "epoch": 0.5452407265720196, "grad_norm": 0.0, - "learning_rate": 8.998050799872248e-06, - "loss": 0.8804, + "learning_rate": 9.022283307991102e-06, + "loss": 0.9286, "step": 19241 }, { - "epoch": 0.546027241770715, + "epoch": 0.5452690640142821, "grad_norm": 0.0, - "learning_rate": 8.997136350996104e-06, - "loss": 0.9562, + "learning_rate": 9.021369920090384e-06, + "loss": 0.8755, "step": 19242 }, { - "epoch": 0.54605561861521, + "epoch": 0.5452974014565445, "grad_norm": 0.0, - "learning_rate": 8.996221910591196e-06, - "loss": 0.866, + "learning_rate": 9.020456540433033e-06, + "loss": 0.839, "step": 19243 }, { - "epoch": 0.5460839954597049, + "epoch": 0.545325738898807, "grad_norm": 0.0, - "learning_rate": 8.995307478665246e-06, - "loss": 0.9385, + "learning_rate": 9.019543169026739e-06, + "loss": 0.9516, "step": 19244 }, { - "epoch": 0.5461123723041997, + "epoch": 0.5453540763410695, "grad_norm": 0.0, - "learning_rate": 8.99439305522598e-06, - "loss": 0.8493, + "learning_rate": 9.018629805879194e-06, + "loss": 0.8652, "step": 19245 }, { - "epoch": 0.5461407491486947, + "epoch": 0.545382413783332, "grad_norm": 0.0, - "learning_rate": 8.993478640281117e-06, - "loss": 0.9023, + "learning_rate": 9.017716450998099e-06, + "loss": 0.8698, "step": 19246 }, { - "epoch": 0.5461691259931896, + "epoch": 0.5454107512255943, "grad_norm": 0.0, - "learning_rate": 8.992564233838388e-06, - "loss": 0.8663, + "learning_rate": 9.01680310439114e-06, + "loss": 0.8302, "step": 19247 }, { - "epoch": 0.5461975028376844, + "epoch": 0.5454390886678568, "grad_norm": 0.0, - "learning_rate": 8.991649835905516e-06, - "loss": 0.8561, + "learning_rate": 9.015889766066018e-06, + "loss": 0.888, "step": 19248 }, { - "epoch": 0.5462258796821794, + "epoch": 0.5454674261101193, "grad_norm": 0.0, - "learning_rate": 8.990735446490219e-06, - "loss": 0.9911, + "learning_rate": 9.014976436030417e-06, + "loss": 0.9019, "step": 19249 }, { - "epoch": 0.5462542565266743, + "epoch": 0.5454957635523817, "grad_norm": 0.0, - "learning_rate": 8.98982106560023e-06, - "loss": 0.8635, + "learning_rate": 9.014063114292037e-06, + "loss": 0.7733, "step": 19250 }, { - "epoch": 0.5462826333711691, + "epoch": 0.5455241009946442, "grad_norm": 0.0, - "learning_rate": 8.988906693243266e-06, - "loss": 0.881, + "learning_rate": 9.01314980085857e-06, + "loss": 0.8416, "step": 19251 }, { - "epoch": 0.546311010215664, + "epoch": 0.5455524384369067, "grad_norm": 0.0, - "learning_rate": 8.987992329427052e-06, - "loss": 0.9285, + "learning_rate": 9.012236495737708e-06, + "loss": 0.8511, "step": 19252 }, { - "epoch": 0.5463393870601589, + "epoch": 0.5455807758791692, "grad_norm": 0.0, - "learning_rate": 8.987077974159312e-06, - "loss": 0.8428, + "learning_rate": 9.011323198937144e-06, + "loss": 0.7916, "step": 19253 }, { - "epoch": 0.5463677639046538, + "epoch": 0.5456091133214316, "grad_norm": 0.0, - "learning_rate": 8.986163627447773e-06, - "loss": 0.9141, + "learning_rate": 9.010409910464575e-06, + "loss": 1.0084, "step": 19254 }, { - "epoch": 0.5463961407491487, + "epoch": 0.5456374507636941, "grad_norm": 0.0, - "learning_rate": 8.985249289300152e-06, - "loss": 0.769, + "learning_rate": 9.009496630327687e-06, + "loss": 0.8701, "step": 19255 }, { - "epoch": 0.5464245175936436, + "epoch": 0.5456657882059566, "grad_norm": 0.0, - "learning_rate": 8.984334959724177e-06, - "loss": 0.8918, + "learning_rate": 9.008583358534182e-06, + "loss": 0.9141, "step": 19256 }, { - "epoch": 0.5464528944381385, + "epoch": 0.545694125648219, "grad_norm": 0.0, - "learning_rate": 8.98342063872757e-06, - "loss": 0.853, + "learning_rate": 9.007670095091744e-06, + "loss": 0.8467, "step": 19257 }, { - "epoch": 0.5464812712826334, + "epoch": 0.5457224630904814, "grad_norm": 0.0, - "learning_rate": 8.982506326318053e-06, - "loss": 0.8728, + "learning_rate": 9.006756840008065e-06, + "loss": 0.7868, "step": 19258 }, { - "epoch": 0.5465096481271282, + "epoch": 0.5457508005327439, "grad_norm": 0.0, - "learning_rate": 8.981592022503353e-06, - "loss": 0.8625, + "learning_rate": 9.005843593290849e-06, + "loss": 0.9759, "step": 19259 }, { - "epoch": 0.5465380249716232, + "epoch": 0.5457791379750063, "grad_norm": 0.0, - "learning_rate": 8.980677727291192e-06, - "loss": 0.9317, + "learning_rate": 9.004930354947777e-06, + "loss": 0.9286, "step": 19260 }, { - "epoch": 0.546566401816118, + "epoch": 0.5458074754172688, "grad_norm": 0.0, - "learning_rate": 8.979763440689291e-06, - "loss": 0.7645, + "learning_rate": 9.004017124986547e-06, + "loss": 0.7766, "step": 19261 }, { - "epoch": 0.5465947786606129, + "epoch": 0.5458358128595313, "grad_norm": 0.0, - "learning_rate": 8.97884916270537e-06, - "loss": 0.8373, + "learning_rate": 9.003103903414855e-06, + "loss": 0.8367, "step": 19262 }, { - "epoch": 0.5466231555051079, + "epoch": 0.5458641503017938, "grad_norm": 0.0, - "learning_rate": 8.977934893347158e-06, - "loss": 0.8727, + "learning_rate": 9.002190690240384e-06, + "loss": 1.0292, "step": 19263 }, { - "epoch": 0.5466515323496027, + "epoch": 0.5458924877440562, "grad_norm": 0.0, - "learning_rate": 8.977020632622378e-06, - "loss": 0.9019, + "learning_rate": 9.001277485470834e-06, + "loss": 0.8143, "step": 19264 }, { - "epoch": 0.5466799091940976, + "epoch": 0.5459208251863187, "grad_norm": 0.0, - "learning_rate": 8.976106380538752e-06, - "loss": 0.8339, + "learning_rate": 9.000364289113893e-06, + "loss": 0.942, "step": 19265 }, { - "epoch": 0.5467082860385926, + "epoch": 0.5459491626285812, "grad_norm": 0.0, - "learning_rate": 8.975192137103997e-06, - "loss": 0.9397, + "learning_rate": 8.999451101177252e-06, + "loss": 0.7691, "step": 19266 }, { - "epoch": 0.5467366628830874, + "epoch": 0.5459775000708436, "grad_norm": 0.0, - "learning_rate": 8.974277902325844e-06, - "loss": 0.9233, + "learning_rate": 8.998537921668613e-06, + "loss": 0.89, "step": 19267 }, { - "epoch": 0.5467650397275823, + "epoch": 0.546005837513106, "grad_norm": 0.0, - "learning_rate": 8.973363676212011e-06, - "loss": 0.8583, + "learning_rate": 8.997624750595657e-06, + "loss": 0.844, "step": 19268 }, { - "epoch": 0.5467934165720771, + "epoch": 0.5460341749553685, "grad_norm": 0.0, - "learning_rate": 8.97244945877022e-06, - "loss": 0.9139, + "learning_rate": 8.996711587966079e-06, + "loss": 0.9474, "step": 19269 }, { - "epoch": 0.5468217934165721, + "epoch": 0.546062512397631, "grad_norm": 0.0, - "learning_rate": 8.971535250008196e-06, - "loss": 0.8462, + "learning_rate": 8.995798433787576e-06, + "loss": 0.9096, "step": 19270 }, { - "epoch": 0.546850170261067, + "epoch": 0.5460908498398934, "grad_norm": 0.0, - "learning_rate": 8.970621049933659e-06, - "loss": 0.9584, + "learning_rate": 8.994885288067831e-06, + "loss": 0.7394, "step": 19271 }, { - "epoch": 0.5468785471055618, + "epoch": 0.5461191872821559, "grad_norm": 0.0, - "learning_rate": 8.969706858554331e-06, - "loss": 0.8848, + "learning_rate": 8.993972150814542e-06, + "loss": 0.9055, "step": 19272 }, { - "epoch": 0.5469069239500568, + "epoch": 0.5461475247244184, "grad_norm": 0.0, - "learning_rate": 8.968792675877937e-06, - "loss": 0.8692, + "learning_rate": 8.9930590220354e-06, + "loss": 0.9434, "step": 19273 }, { - "epoch": 0.5469353007945517, + "epoch": 0.5461758621666808, "grad_norm": 0.0, - "learning_rate": 8.9678785019122e-06, - "loss": 0.9776, + "learning_rate": 8.992145901738092e-06, + "loss": 0.8766, "step": 19274 }, { - "epoch": 0.5469636776390465, + "epoch": 0.5462041996089433, "grad_norm": 0.0, - "learning_rate": 8.966964336664835e-06, - "loss": 0.7894, + "learning_rate": 8.99123278993032e-06, + "loss": 0.7427, "step": 19275 }, { - "epoch": 0.5469920544835414, + "epoch": 0.5462325370512058, "grad_norm": 0.0, - "learning_rate": 8.966050180143571e-06, - "loss": 0.8096, + "learning_rate": 8.990319686619764e-06, + "loss": 0.8456, "step": 19276 }, { - "epoch": 0.5470204313280363, + "epoch": 0.5462608744934683, "grad_norm": 0.0, - "learning_rate": 8.965136032356125e-06, - "loss": 0.8318, + "learning_rate": 8.989406591814123e-06, + "loss": 0.8722, "step": 19277 }, { - "epoch": 0.5470488081725312, + "epoch": 0.5462892119357307, "grad_norm": 0.0, - "learning_rate": 8.964221893310223e-06, - "loss": 0.8846, + "learning_rate": 8.988493505521082e-06, + "loss": 0.9291, "step": 19278 }, { - "epoch": 0.5470771850170261, + "epoch": 0.5463175493779932, "grad_norm": 0.0, - "learning_rate": 8.963307763013588e-06, - "loss": 0.9034, + "learning_rate": 8.987580427748335e-06, + "loss": 0.9786, "step": 19279 }, { - "epoch": 0.547105561861521, + "epoch": 0.5463458868202556, "grad_norm": 0.0, - "learning_rate": 8.962393641473936e-06, - "loss": 0.9773, + "learning_rate": 8.98666735850358e-06, + "loss": 0.8663, "step": 19280 }, { - "epoch": 0.5471339387060159, + "epoch": 0.546374224262518, "grad_norm": 0.0, - "learning_rate": 8.961479528698994e-06, - "loss": 0.8852, + "learning_rate": 8.9857542977945e-06, + "loss": 0.8923, "step": 19281 }, { - "epoch": 0.5471623155505108, + "epoch": 0.5464025617047805, "grad_norm": 0.0, - "learning_rate": 8.96056542469648e-06, - "loss": 0.8569, + "learning_rate": 8.984841245628785e-06, + "loss": 0.8028, "step": 19282 }, { - "epoch": 0.5471906923950057, + "epoch": 0.546430899147043, "grad_norm": 0.0, - "learning_rate": 8.959651329474115e-06, - "loss": 0.9369, + "learning_rate": 8.983928202014135e-06, + "loss": 0.9121, "step": 19283 }, { - "epoch": 0.5472190692395006, + "epoch": 0.5464592365893054, "grad_norm": 0.0, - "learning_rate": 8.958737243039623e-06, - "loss": 0.9047, + "learning_rate": 8.983015166958228e-06, + "loss": 0.7885, "step": 19284 }, { - "epoch": 0.5472474460839954, + "epoch": 0.5464875740315679, "grad_norm": 0.0, - "learning_rate": 8.957823165400727e-06, - "loss": 0.8506, + "learning_rate": 8.982102140468766e-06, + "loss": 0.9195, "step": 19285 }, { - "epoch": 0.5472758229284903, + "epoch": 0.5465159114738304, "grad_norm": 0.0, - "learning_rate": 8.956909096565141e-06, - "loss": 0.7947, + "learning_rate": 8.981189122553436e-06, + "loss": 0.8057, "step": 19286 }, { - "epoch": 0.5473041997729853, + "epoch": 0.5465442489160929, "grad_norm": 0.0, - "learning_rate": 8.955995036540593e-06, - "loss": 0.8152, + "learning_rate": 8.980276113219925e-06, + "loss": 0.9158, "step": 19287 }, { - "epoch": 0.5473325766174801, + "epoch": 0.5465725863583553, "grad_norm": 0.0, - "learning_rate": 8.955080985334803e-06, - "loss": 0.7698, + "learning_rate": 8.979363112475933e-06, + "loss": 0.8679, "step": 19288 }, { - "epoch": 0.547360953461975, + "epoch": 0.5466009238006178, "grad_norm": 0.0, - "learning_rate": 8.954166942955488e-06, - "loss": 0.8356, + "learning_rate": 8.978450120329138e-06, + "loss": 0.8982, "step": 19289 }, { - "epoch": 0.54738933030647, + "epoch": 0.5466292612428802, "grad_norm": 0.0, - "learning_rate": 8.953252909410374e-06, - "loss": 0.8133, + "learning_rate": 8.977537136787238e-06, + "loss": 0.8008, "step": 19290 }, { - "epoch": 0.5474177071509648, + "epoch": 0.5466575986851426, "grad_norm": 0.0, - "learning_rate": 8.952338884707182e-06, - "loss": 0.8437, + "learning_rate": 8.976624161857925e-06, + "loss": 0.9946, "step": 19291 }, { - "epoch": 0.5474460839954597, + "epoch": 0.5466859361274051, "grad_norm": 0.0, - "learning_rate": 8.951424868853623e-06, - "loss": 0.901, + "learning_rate": 8.975711195548885e-06, + "loss": 0.8662, "step": 19292 }, { - "epoch": 0.5474744608399545, + "epoch": 0.5467142735696676, "grad_norm": 0.0, - "learning_rate": 8.950510861857429e-06, - "loss": 0.8284, + "learning_rate": 8.974798237867814e-06, + "loss": 0.9701, "step": 19293 }, { - "epoch": 0.5475028376844495, + "epoch": 0.5467426110119301, "grad_norm": 0.0, - "learning_rate": 8.949596863726319e-06, - "loss": 0.952, + "learning_rate": 8.973885288822393e-06, + "loss": 0.7863, "step": 19294 }, { - "epoch": 0.5475312145289444, + "epoch": 0.5467709484541925, "grad_norm": 0.0, - "learning_rate": 8.948682874468011e-06, - "loss": 0.8351, + "learning_rate": 8.972972348420318e-06, + "loss": 0.9109, "step": 19295 }, { - "epoch": 0.5475595913734392, + "epoch": 0.546799285896455, "grad_norm": 0.0, - "learning_rate": 8.947768894090224e-06, - "loss": 0.9402, + "learning_rate": 8.972059416669282e-06, + "loss": 0.9281, "step": 19296 }, { - "epoch": 0.5475879682179342, + "epoch": 0.5468276233387175, "grad_norm": 0.0, - "learning_rate": 8.946854922600683e-06, - "loss": 0.9059, + "learning_rate": 8.97114649357697e-06, + "loss": 1.0071, "step": 19297 }, { - "epoch": 0.5476163450624291, + "epoch": 0.5468559607809799, "grad_norm": 0.0, - "learning_rate": 8.945940960007106e-06, - "loss": 0.8542, + "learning_rate": 8.97023357915107e-06, + "loss": 0.8331, "step": 19298 }, { - "epoch": 0.5476447219069239, + "epoch": 0.5468842982232424, "grad_norm": 0.0, - "learning_rate": 8.945027006317211e-06, - "loss": 0.9167, + "learning_rate": 8.969320673399276e-06, + "loss": 0.9302, "step": 19299 }, { - "epoch": 0.5476730987514189, + "epoch": 0.5469126356655049, "grad_norm": 0.0, - "learning_rate": 8.944113061538721e-06, - "loss": 0.8936, + "learning_rate": 8.968407776329277e-06, + "loss": 0.8939, "step": 19300 }, { - "epoch": 0.5477014755959138, + "epoch": 0.5469409731077673, "grad_norm": 0.0, - "learning_rate": 8.943199125679357e-06, - "loss": 0.8932, + "learning_rate": 8.967494887948766e-06, + "loss": 0.9319, "step": 19301 }, { - "epoch": 0.5477298524404086, + "epoch": 0.5469693105500297, "grad_norm": 0.0, - "learning_rate": 8.942285198746836e-06, - "loss": 0.9318, + "learning_rate": 8.966582008265424e-06, + "loss": 0.9264, "step": 19302 }, { - "epoch": 0.5477582292849035, + "epoch": 0.5469976479922922, "grad_norm": 0.0, - "learning_rate": 8.941371280748878e-06, - "loss": 0.9431, + "learning_rate": 8.965669137286946e-06, + "loss": 0.9873, "step": 19303 }, { - "epoch": 0.5477866061293984, + "epoch": 0.5470259854345547, "grad_norm": 0.0, - "learning_rate": 8.940457371693208e-06, - "loss": 0.8829, + "learning_rate": 8.964756275021024e-06, + "loss": 0.886, "step": 19304 }, { - "epoch": 0.5478149829738933, + "epoch": 0.5470543228768171, "grad_norm": 0.0, - "learning_rate": 8.93954347158754e-06, - "loss": 0.8886, + "learning_rate": 8.96384342147534e-06, + "loss": 0.8367, "step": 19305 }, { - "epoch": 0.5478433598183882, + "epoch": 0.5470826603190796, "grad_norm": 0.0, - "learning_rate": 8.938629580439595e-06, - "loss": 0.9369, + "learning_rate": 8.962930576657593e-06, + "loss": 0.8797, "step": 19306 }, { - "epoch": 0.5478717366628831, + "epoch": 0.5471109977613421, "grad_norm": 0.0, - "learning_rate": 8.937715698257096e-06, - "loss": 0.8761, + "learning_rate": 8.962017740575464e-06, + "loss": 0.9678, "step": 19307 }, { - "epoch": 0.547900113507378, + "epoch": 0.5471393352036045, "grad_norm": 0.0, - "learning_rate": 8.936801825047755e-06, - "loss": 0.8468, + "learning_rate": 8.961104913236643e-06, + "loss": 0.8652, "step": 19308 }, { - "epoch": 0.5479284903518729, + "epoch": 0.547167672645867, "grad_norm": 0.0, - "learning_rate": 8.935887960819301e-06, - "loss": 0.9435, + "learning_rate": 8.960192094648828e-06, + "loss": 0.9259, "step": 19309 }, { - "epoch": 0.5479568671963677, + "epoch": 0.5471960100881295, "grad_norm": 0.0, - "learning_rate": 8.934974105579449e-06, - "loss": 0.8627, + "learning_rate": 8.959279284819694e-06, + "loss": 0.8037, "step": 19310 }, { - "epoch": 0.5479852440408627, + "epoch": 0.547224347530392, "grad_norm": 0.0, - "learning_rate": 8.934060259335919e-06, - "loss": 0.9064, + "learning_rate": 8.958366483756938e-06, + "loss": 0.8631, "step": 19311 }, { - "epoch": 0.5480136208853575, + "epoch": 0.5472526849726543, "grad_norm": 0.0, - "learning_rate": 8.93314642209643e-06, - "loss": 0.8775, + "learning_rate": 8.95745369146825e-06, + "loss": 0.9383, "step": 19312 }, { - "epoch": 0.5480419977298524, + "epoch": 0.5472810224149168, "grad_norm": 0.0, - "learning_rate": 8.932232593868699e-06, - "loss": 0.7481, + "learning_rate": 8.956540907961315e-06, + "loss": 0.7943, "step": 19313 }, { - "epoch": 0.5480703745743474, + "epoch": 0.5473093598571793, "grad_norm": 0.0, - "learning_rate": 8.93131877466045e-06, - "loss": 0.7797, + "learning_rate": 8.955628133243828e-06, + "loss": 0.836, "step": 19314 }, { - "epoch": 0.5480987514188422, + "epoch": 0.5473376972994417, "grad_norm": 0.0, - "learning_rate": 8.9304049644794e-06, - "loss": 0.9434, + "learning_rate": 8.954715367323468e-06, + "loss": 0.929, "step": 19315 }, { - "epoch": 0.5481271282633371, + "epoch": 0.5473660347417042, "grad_norm": 0.0, - "learning_rate": 8.929491163333263e-06, - "loss": 0.8517, + "learning_rate": 8.953802610207928e-06, + "loss": 0.9034, "step": 19316 }, { - "epoch": 0.5481555051078321, + "epoch": 0.5473943721839667, "grad_norm": 0.0, - "learning_rate": 8.928577371229767e-06, - "loss": 0.8674, + "learning_rate": 8.9528898619049e-06, + "loss": 0.9087, "step": 19317 }, { - "epoch": 0.5481838819523269, + "epoch": 0.5474227096262292, "grad_norm": 0.0, - "learning_rate": 8.927663588176626e-06, - "loss": 0.8688, + "learning_rate": 8.951977122422067e-06, + "loss": 0.7883, "step": 19318 }, { - "epoch": 0.5482122587968218, + "epoch": 0.5474510470684916, "grad_norm": 0.0, - "learning_rate": 8.926749814181555e-06, - "loss": 0.8461, + "learning_rate": 8.95106439176712e-06, + "loss": 0.88, "step": 19319 }, { - "epoch": 0.5482406356413166, + "epoch": 0.5474793845107541, "grad_norm": 0.0, - "learning_rate": 8.92583604925228e-06, - "loss": 0.9, + "learning_rate": 8.950151669947754e-06, + "loss": 0.8939, "step": 19320 }, { - "epoch": 0.5482690124858116, + "epoch": 0.5475077219530166, "grad_norm": 0.0, - "learning_rate": 8.924922293396514e-06, - "loss": 0.8752, + "learning_rate": 8.949238956971642e-06, + "loss": 0.9134, "step": 19321 }, { - "epoch": 0.5482973893303065, + "epoch": 0.5475360593952789, "grad_norm": 0.0, - "learning_rate": 8.924008546621977e-06, - "loss": 0.9302, + "learning_rate": 8.948326252846487e-06, + "loss": 0.9183, "step": 19322 }, { - "epoch": 0.5483257661748013, + "epoch": 0.5475643968375414, "grad_norm": 0.0, - "learning_rate": 8.923094808936388e-06, - "loss": 0.923, + "learning_rate": 8.947413557579965e-06, + "loss": 0.8048, "step": 19323 }, { - "epoch": 0.5483541430192963, + "epoch": 0.5475927342798039, "grad_norm": 0.0, - "learning_rate": 8.922181080347465e-06, - "loss": 0.8325, + "learning_rate": 8.946500871179771e-06, + "loss": 1.0196, "step": 19324 }, { - "epoch": 0.5483825198637912, + "epoch": 0.5476210717220664, "grad_norm": 0.0, - "learning_rate": 8.92126736086293e-06, - "loss": 0.8349, + "learning_rate": 8.945588193653592e-06, + "loss": 0.795, "step": 19325 }, { - "epoch": 0.548410896708286, + "epoch": 0.5476494091643288, "grad_norm": 0.0, - "learning_rate": 8.920353650490499e-06, - "loss": 0.9031, + "learning_rate": 8.944675525009114e-06, + "loss": 0.7587, "step": 19326 }, { - "epoch": 0.5484392735527809, + "epoch": 0.5476777466065913, "grad_norm": 0.0, - "learning_rate": 8.919439949237885e-06, - "loss": 0.908, + "learning_rate": 8.943762865254025e-06, + "loss": 0.8146, "step": 19327 }, { - "epoch": 0.5484676503972759, + "epoch": 0.5477060840488538, "grad_norm": 0.0, - "learning_rate": 8.918526257112815e-06, - "loss": 0.9939, + "learning_rate": 8.94285021439602e-06, + "loss": 0.8178, "step": 19328 }, { - "epoch": 0.5484960272417707, + "epoch": 0.5477344214911162, "grad_norm": 0.0, - "learning_rate": 8.917612574123e-06, - "loss": 0.8372, + "learning_rate": 8.941937572442773e-06, + "loss": 0.8121, "step": 19329 }, { - "epoch": 0.5485244040862656, + "epoch": 0.5477627589333787, "grad_norm": 0.0, - "learning_rate": 8.916698900276158e-06, - "loss": 1.0022, + "learning_rate": 8.941024939401984e-06, + "loss": 0.744, "step": 19330 }, { - "epoch": 0.5485527809307605, + "epoch": 0.5477910963756412, "grad_norm": 0.0, - "learning_rate": 8.915785235580013e-06, - "loss": 0.817, + "learning_rate": 8.94011231528133e-06, + "loss": 0.8879, "step": 19331 }, { - "epoch": 0.5485811577752554, + "epoch": 0.5478194338179035, "grad_norm": 0.0, - "learning_rate": 8.914871580042278e-06, - "loss": 0.7761, + "learning_rate": 8.939199700088506e-06, + "loss": 0.8185, "step": 19332 }, { - "epoch": 0.5486095346197503, + "epoch": 0.547847771260166, "grad_norm": 0.0, - "learning_rate": 8.913957933670669e-06, - "loss": 0.9235, + "learning_rate": 8.9382870938312e-06, + "loss": 1.0032, "step": 19333 }, { - "epoch": 0.5486379114642451, + "epoch": 0.5478761087024285, "grad_norm": 0.0, - "learning_rate": 8.913044296472908e-06, - "loss": 0.9623, + "learning_rate": 8.937374496517092e-06, + "loss": 0.9122, "step": 19334 }, { - "epoch": 0.5486662883087401, + "epoch": 0.547904446144691, "grad_norm": 0.0, - "learning_rate": 8.912130668456712e-06, - "loss": 0.7334, + "learning_rate": 8.936461908153877e-06, + "loss": 0.9014, "step": 19335 }, { - "epoch": 0.548694665153235, + "epoch": 0.5479327835869534, "grad_norm": 0.0, - "learning_rate": 8.911217049629795e-06, - "loss": 0.9263, + "learning_rate": 8.935549328749235e-06, + "loss": 0.8545, "step": 19336 }, { - "epoch": 0.5487230419977298, + "epoch": 0.5479611210292159, "grad_norm": 0.0, - "learning_rate": 8.910303439999877e-06, - "loss": 0.7969, + "learning_rate": 8.934636758310855e-06, + "loss": 0.8046, "step": 19337 }, { - "epoch": 0.5487514188422248, + "epoch": 0.5479894584714784, "grad_norm": 0.0, - "learning_rate": 8.909389839574676e-06, - "loss": 0.7548, + "learning_rate": 8.933724196846428e-06, + "loss": 0.9376, "step": 19338 }, { - "epoch": 0.5487797956867196, + "epoch": 0.5480177959137408, "grad_norm": 0.0, - "learning_rate": 8.908476248361904e-06, - "loss": 0.8854, + "learning_rate": 8.932811644363635e-06, + "loss": 0.8183, "step": 19339 }, { - "epoch": 0.5488081725312145, + "epoch": 0.5480461333560033, "grad_norm": 0.0, - "learning_rate": 8.907562666369283e-06, - "loss": 0.8557, + "learning_rate": 8.931899100870168e-06, + "loss": 0.8099, "step": 19340 }, { - "epoch": 0.5488365493757095, + "epoch": 0.5480744707982658, "grad_norm": 0.0, - "learning_rate": 8.906649093604533e-06, - "loss": 0.8012, + "learning_rate": 8.930986566373715e-06, + "loss": 0.9519, "step": 19341 }, { - "epoch": 0.5488649262202043, + "epoch": 0.5481028082405283, "grad_norm": 0.0, - "learning_rate": 8.905735530075367e-06, - "loss": 0.7417, + "learning_rate": 8.930074040881954e-06, + "loss": 0.8645, "step": 19342 }, { - "epoch": 0.5488933030646992, + "epoch": 0.5481311456827906, "grad_norm": 0.0, - "learning_rate": 8.904821975789501e-06, - "loss": 0.854, + "learning_rate": 8.92916152440258e-06, + "loss": 0.8475, "step": 19343 }, { - "epoch": 0.548921679909194, + "epoch": 0.5481594831250531, "grad_norm": 0.0, - "learning_rate": 8.903908430754651e-06, - "loss": 0.8503, + "learning_rate": 8.92824901694327e-06, + "loss": 0.9481, "step": 19344 }, { - "epoch": 0.548950056753689, + "epoch": 0.5481878205673156, "grad_norm": 0.0, - "learning_rate": 8.902994894978539e-06, - "loss": 0.7797, + "learning_rate": 8.92733651851172e-06, + "loss": 0.9341, "step": 19345 }, { - "epoch": 0.5489784335981839, + "epoch": 0.548216158009578, "grad_norm": 0.0, - "learning_rate": 8.902081368468879e-06, - "loss": 0.7214, + "learning_rate": 8.926424029115618e-06, + "loss": 0.9872, "step": 19346 }, { - "epoch": 0.5490068104426787, + "epoch": 0.5482444954518405, "grad_norm": 0.0, - "learning_rate": 8.901167851233384e-06, - "loss": 0.8075, + "learning_rate": 8.925511548762639e-06, + "loss": 0.954, "step": 19347 }, { - "epoch": 0.5490351872871737, + "epoch": 0.548272832894103, "grad_norm": 0.0, - "learning_rate": 8.900254343279774e-06, - "loss": 0.8996, + "learning_rate": 8.924599077460477e-06, + "loss": 0.9052, "step": 19348 }, { - "epoch": 0.5490635641316686, + "epoch": 0.5483011703363654, "grad_norm": 0.0, - "learning_rate": 8.899340844615767e-06, - "loss": 0.9286, + "learning_rate": 8.923686615216818e-06, + "loss": 0.9464, "step": 19349 }, { - "epoch": 0.5490919409761634, + "epoch": 0.5483295077786279, "grad_norm": 0.0, - "learning_rate": 8.898427355249076e-06, - "loss": 0.9009, + "learning_rate": 8.922774162039343e-06, + "loss": 1.018, "step": 19350 }, { - "epoch": 0.5491203178206583, + "epoch": 0.5483578452208904, "grad_norm": 0.0, - "learning_rate": 8.897513875187419e-06, - "loss": 0.8497, + "learning_rate": 8.921861717935744e-06, + "loss": 0.807, "step": 19351 }, { - "epoch": 0.5491486946651533, + "epoch": 0.5483861826631529, "grad_norm": 0.0, - "learning_rate": 8.896600404438512e-06, - "loss": 0.8545, + "learning_rate": 8.920949282913702e-06, + "loss": 0.8615, "step": 19352 }, { - "epoch": 0.5491770715096481, + "epoch": 0.5484145201054152, "grad_norm": 0.0, - "learning_rate": 8.89568694301007e-06, - "loss": 0.8658, + "learning_rate": 8.920036856980905e-06, + "loss": 0.9031, "step": 19353 }, { - "epoch": 0.549205448354143, + "epoch": 0.5484428575476777, "grad_norm": 0.0, - "learning_rate": 8.894773490909806e-06, - "loss": 0.889, + "learning_rate": 8.919124440145041e-06, + "loss": 0.8201, "step": 19354 }, { - "epoch": 0.549233825198638, + "epoch": 0.5484711949899402, "grad_norm": 0.0, - "learning_rate": 8.893860048145446e-06, - "loss": 1.0027, + "learning_rate": 8.918212032413791e-06, + "loss": 0.9056, "step": 19355 }, { - "epoch": 0.5492622020431328, + "epoch": 0.5484995324322026, "grad_norm": 0.0, - "learning_rate": 8.892946614724698e-06, - "loss": 0.8431, + "learning_rate": 8.917299633794843e-06, + "loss": 0.9132, "step": 19356 }, { - "epoch": 0.5492905788876277, + "epoch": 0.5485278698744651, "grad_norm": 0.0, - "learning_rate": 8.892033190655281e-06, - "loss": 0.9206, + "learning_rate": 8.916387244295884e-06, + "loss": 0.8869, "step": 19357 }, { - "epoch": 0.5493189557321226, + "epoch": 0.5485562073167276, "grad_norm": 0.0, - "learning_rate": 8.891119775944908e-06, - "loss": 0.8974, + "learning_rate": 8.915474863924594e-06, + "loss": 0.9493, "step": 19358 }, { - "epoch": 0.5493473325766175, + "epoch": 0.5485845447589901, "grad_norm": 0.0, - "learning_rate": 8.890206370601299e-06, - "loss": 0.8103, + "learning_rate": 8.914562492688667e-06, + "loss": 0.9036, "step": 19359 }, { - "epoch": 0.5493757094211124, + "epoch": 0.5486128822012525, "grad_norm": 0.0, - "learning_rate": 8.889292974632162e-06, - "loss": 0.9094, + "learning_rate": 8.91365013059578e-06, + "loss": 0.7536, "step": 19360 }, { - "epoch": 0.5494040862656072, + "epoch": 0.548641219643515, "grad_norm": 0.0, - "learning_rate": 8.888379588045221e-06, - "loss": 0.888, + "learning_rate": 8.91273777765362e-06, + "loss": 0.9763, "step": 19361 }, { - "epoch": 0.5494324631101022, + "epoch": 0.5486695570857775, "grad_norm": 0.0, - "learning_rate": 8.887466210848188e-06, - "loss": 0.8347, + "learning_rate": 8.911825433869876e-06, + "loss": 0.8269, "step": 19362 }, { - "epoch": 0.549460839954597, + "epoch": 0.5486978945280399, "grad_norm": 0.0, - "learning_rate": 8.886552843048773e-06, - "loss": 0.9486, + "learning_rate": 8.91091309925223e-06, + "loss": 0.889, "step": 19363 }, { - "epoch": 0.5494892167990919, + "epoch": 0.5487262319703023, "grad_norm": 0.0, - "learning_rate": 8.8856394846547e-06, - "loss": 0.7573, + "learning_rate": 8.91000077380837e-06, + "loss": 0.9431, "step": 19364 }, { - "epoch": 0.5495175936435869, + "epoch": 0.5487545694125648, "grad_norm": 0.0, - "learning_rate": 8.884726135673681e-06, - "loss": 0.8349, + "learning_rate": 8.909088457545973e-06, + "loss": 0.7585, "step": 19365 }, { - "epoch": 0.5495459704880817, + "epoch": 0.5487829068548273, "grad_norm": 0.0, - "learning_rate": 8.883812796113429e-06, - "loss": 0.8576, + "learning_rate": 8.90817615047273e-06, + "loss": 0.9089, "step": 19366 }, { - "epoch": 0.5495743473325766, + "epoch": 0.5488112442970897, "grad_norm": 0.0, - "learning_rate": 8.882899465981662e-06, - "loss": 0.9069, + "learning_rate": 8.90726385259633e-06, + "loss": 0.989, "step": 19367 }, { - "epoch": 0.5496027241770715, + "epoch": 0.5488395817393522, "grad_norm": 0.0, - "learning_rate": 8.881986145286092e-06, - "loss": 0.8468, + "learning_rate": 8.906351563924446e-06, + "loss": 0.937, "step": 19368 }, { - "epoch": 0.5496311010215664, + "epoch": 0.5488679191816147, "grad_norm": 0.0, - "learning_rate": 8.881072834034433e-06, - "loss": 0.8075, + "learning_rate": 8.90543928446477e-06, + "loss": 0.9919, "step": 19369 }, { - "epoch": 0.5496594778660613, + "epoch": 0.5488962566238771, "grad_norm": 0.0, - "learning_rate": 8.880159532234404e-06, - "loss": 0.9739, + "learning_rate": 8.904527014224988e-06, + "loss": 0.9078, "step": 19370 }, { - "epoch": 0.5496878547105561, + "epoch": 0.5489245940661396, "grad_norm": 0.0, - "learning_rate": 8.879246239893717e-06, - "loss": 0.7743, + "learning_rate": 8.90361475321278e-06, + "loss": 0.9741, "step": 19371 }, { - "epoch": 0.5497162315550511, + "epoch": 0.5489529315084021, "grad_norm": 0.0, - "learning_rate": 8.87833295702009e-06, - "loss": 0.8541, + "learning_rate": 8.902702501435835e-06, + "loss": 0.862, "step": 19372 }, { - "epoch": 0.549744608399546, + "epoch": 0.5489812689506645, "grad_norm": 0.0, - "learning_rate": 8.877419683621233e-06, - "loss": 0.9797, + "learning_rate": 8.90179025890183e-06, + "loss": 0.8238, "step": 19373 }, { - "epoch": 0.5497729852440408, + "epoch": 0.549009606392927, "grad_norm": 0.0, - "learning_rate": 8.876506419704863e-06, - "loss": 0.8545, + "learning_rate": 8.900878025618453e-06, + "loss": 0.8581, "step": 19374 }, { - "epoch": 0.5498013620885358, + "epoch": 0.5490379438351894, "grad_norm": 0.0, - "learning_rate": 8.875593165278694e-06, - "loss": 0.8317, + "learning_rate": 8.899965801593393e-06, + "loss": 0.8033, "step": 19375 }, { - "epoch": 0.5498297389330307, + "epoch": 0.5490662812774519, "grad_norm": 0.0, - "learning_rate": 8.874679920350441e-06, - "loss": 0.875, + "learning_rate": 8.899053586834325e-06, + "loss": 0.8597, "step": 19376 }, { - "epoch": 0.5498581157775255, + "epoch": 0.5490946187197143, "grad_norm": 0.0, - "learning_rate": 8.873766684927814e-06, - "loss": 0.9749, + "learning_rate": 8.89814138134894e-06, + "loss": 0.7073, "step": 19377 }, { - "epoch": 0.5498864926220204, + "epoch": 0.5491229561619768, "grad_norm": 0.0, - "learning_rate": 8.872853459018534e-06, - "loss": 0.8756, + "learning_rate": 8.897229185144922e-06, + "loss": 0.8136, "step": 19378 }, { - "epoch": 0.5499148694665154, + "epoch": 0.5491512936042393, "grad_norm": 0.0, - "learning_rate": 8.871940242630311e-06, - "loss": 0.8355, + "learning_rate": 8.896316998229946e-06, + "loss": 0.9101, "step": 19379 }, { - "epoch": 0.5499432463110102, + "epoch": 0.5491796310465017, "grad_norm": 0.0, - "learning_rate": 8.871027035770856e-06, - "loss": 0.8626, + "learning_rate": 8.89540482061171e-06, + "loss": 0.8006, "step": 19380 }, { - "epoch": 0.5499716231555051, + "epoch": 0.5492079684887642, "grad_norm": 0.0, - "learning_rate": 8.87011383844789e-06, - "loss": 0.8741, + "learning_rate": 8.894492652297883e-06, + "loss": 0.8757, "step": 19381 }, { - "epoch": 0.55, + "epoch": 0.5492363059310267, "grad_norm": 0.0, - "learning_rate": 8.869200650669122e-06, - "loss": 0.8893, + "learning_rate": 8.893580493296155e-06, + "loss": 0.8835, "step": 19382 }, { - "epoch": 0.5500283768444949, + "epoch": 0.5492646433732892, "grad_norm": 0.0, - "learning_rate": 8.868287472442268e-06, - "loss": 0.9266, + "learning_rate": 8.89266834361421e-06, + "loss": 0.8169, "step": 19383 }, { - "epoch": 0.5500567536889898, + "epoch": 0.5492929808155516, "grad_norm": 0.0, - "learning_rate": 8.867374303775039e-06, - "loss": 0.8613, + "learning_rate": 8.89175620325973e-06, + "loss": 0.9503, "step": 19384 }, { - "epoch": 0.5500851305334846, + "epoch": 0.549321318257814, "grad_norm": 0.0, - "learning_rate": 8.866461144675147e-06, - "loss": 0.9244, + "learning_rate": 8.8908440722404e-06, + "loss": 0.8201, "step": 19385 }, { - "epoch": 0.5501135073779796, + "epoch": 0.5493496557000765, "grad_norm": 0.0, - "learning_rate": 8.865547995150314e-06, - "loss": 0.9906, + "learning_rate": 8.889931950563907e-06, + "loss": 0.8948, "step": 19386 }, { - "epoch": 0.5501418842224745, + "epoch": 0.5493779931423389, "grad_norm": 0.0, - "learning_rate": 8.864634855208248e-06, - "loss": 0.9178, + "learning_rate": 8.889019838237922e-06, + "loss": 0.8447, "step": 19387 }, { - "epoch": 0.5501702610669693, + "epoch": 0.5494063305846014, "grad_norm": 0.0, - "learning_rate": 8.86372172485666e-06, - "loss": 0.8911, + "learning_rate": 8.88810773527014e-06, + "loss": 0.8027, "step": 19388 }, { - "epoch": 0.5501986379114643, + "epoch": 0.5494346680268639, "grad_norm": 0.0, - "learning_rate": 8.862808604103267e-06, - "loss": 0.7946, + "learning_rate": 8.887195641668235e-06, + "loss": 0.8199, "step": 19389 }, { - "epoch": 0.5502270147559591, + "epoch": 0.5494630054691264, "grad_norm": 0.0, - "learning_rate": 8.861895492955781e-06, - "loss": 0.946, + "learning_rate": 8.886283557439898e-06, + "loss": 0.7821, "step": 19390 }, { - "epoch": 0.550255391600454, + "epoch": 0.5494913429113888, "grad_norm": 0.0, - "learning_rate": 8.860982391421914e-06, - "loss": 0.8894, + "learning_rate": 8.885371482592809e-06, + "loss": 0.8614, "step": 19391 }, { - "epoch": 0.550283768444949, + "epoch": 0.5495196803536513, "grad_norm": 0.0, - "learning_rate": 8.86006929950938e-06, - "loss": 0.8651, + "learning_rate": 8.884459417134648e-06, + "loss": 0.813, "step": 19392 }, { - "epoch": 0.5503121452894438, + "epoch": 0.5495480177959138, "grad_norm": 0.0, - "learning_rate": 8.859156217225893e-06, - "loss": 0.884, + "learning_rate": 8.883547361073102e-06, + "loss": 0.9132, "step": 19393 }, { - "epoch": 0.5503405221339387, + "epoch": 0.5495763552381762, "grad_norm": 0.0, - "learning_rate": 8.858243144579163e-06, - "loss": 0.9277, + "learning_rate": 8.882635314415848e-06, + "loss": 0.87, "step": 19394 }, { - "epoch": 0.5503688989784336, + "epoch": 0.5496046926804387, "grad_norm": 0.0, - "learning_rate": 8.857330081576906e-06, - "loss": 0.8349, + "learning_rate": 8.881723277170573e-06, + "loss": 0.9271, "step": 19395 }, { - "epoch": 0.5503972758229285, + "epoch": 0.5496330301227011, "grad_norm": 0.0, - "learning_rate": 8.856417028226833e-06, - "loss": 0.8487, + "learning_rate": 8.880811249344958e-06, + "loss": 0.8798, "step": 19396 }, { - "epoch": 0.5504256526674234, + "epoch": 0.5496613675649635, "grad_norm": 0.0, - "learning_rate": 8.855503984536656e-06, - "loss": 0.9048, + "learning_rate": 8.879899230946684e-06, + "loss": 0.9301, "step": 19397 }, { - "epoch": 0.5504540295119182, + "epoch": 0.549689705007226, "grad_norm": 0.0, - "learning_rate": 8.85459095051409e-06, - "loss": 0.8393, + "learning_rate": 8.878987221983434e-06, + "loss": 0.9029, "step": 19398 }, { - "epoch": 0.5504824063564132, + "epoch": 0.5497180424494885, "grad_norm": 0.0, - "learning_rate": 8.853677926166845e-06, - "loss": 0.8397, + "learning_rate": 8.878075222462896e-06, + "loss": 0.9179, "step": 19399 }, { - "epoch": 0.5505107832009081, + "epoch": 0.549746379891751, "grad_norm": 0.0, - "learning_rate": 8.85276491150263e-06, - "loss": 0.8461, + "learning_rate": 8.877163232392743e-06, + "loss": 0.9056, "step": 19400 }, { - "epoch": 0.5505391600454029, + "epoch": 0.5497747173340134, "grad_norm": 0.0, - "learning_rate": 8.851851906529163e-06, - "loss": 0.8869, + "learning_rate": 8.876251251780663e-06, + "loss": 0.83, "step": 19401 }, { - "epoch": 0.5505675368898978, + "epoch": 0.5498030547762759, "grad_norm": 0.0, - "learning_rate": 8.850938911254156e-06, - "loss": 0.8878, + "learning_rate": 8.875339280634334e-06, + "loss": 0.8427, "step": 19402 }, { - "epoch": 0.5505959137343928, + "epoch": 0.5498313922185384, "grad_norm": 0.0, - "learning_rate": 8.850025925685322e-06, - "loss": 0.937, + "learning_rate": 8.874427318961439e-06, + "loss": 0.8856, "step": 19403 }, { - "epoch": 0.5506242905788876, + "epoch": 0.5498597296608008, "grad_norm": 0.0, - "learning_rate": 8.849112949830368e-06, - "loss": 0.7959, + "learning_rate": 8.873515366769666e-06, + "loss": 0.8343, "step": 19404 }, { - "epoch": 0.5506526674233825, + "epoch": 0.5498880671030633, "grad_norm": 0.0, - "learning_rate": 8.84819998369701e-06, - "loss": 0.968, + "learning_rate": 8.872603424066686e-06, + "loss": 0.8279, "step": 19405 }, { - "epoch": 0.5506810442678775, + "epoch": 0.5499164045453258, "grad_norm": 0.0, - "learning_rate": 8.847287027292959e-06, - "loss": 0.8059, + "learning_rate": 8.871691490860188e-06, + "loss": 0.9107, "step": 19406 }, { - "epoch": 0.5507094211123723, + "epoch": 0.5499447419875882, "grad_norm": 0.0, - "learning_rate": 8.846374080625923e-06, - "loss": 0.858, + "learning_rate": 8.870779567157853e-06, + "loss": 0.8032, "step": 19407 }, { - "epoch": 0.5507377979568672, + "epoch": 0.5499730794298506, "grad_norm": 0.0, - "learning_rate": 8.84546114370362e-06, - "loss": 0.7984, + "learning_rate": 8.86986765296736e-06, + "loss": 0.8917, "step": 19408 }, { - "epoch": 0.5507661748013621, + "epoch": 0.5500014168721131, "grad_norm": 0.0, - "learning_rate": 8.844548216533759e-06, - "loss": 0.949, + "learning_rate": 8.868955748296391e-06, + "loss": 0.7643, "step": 19409 }, { - "epoch": 0.550794551645857, + "epoch": 0.5500297543143756, "grad_norm": 0.0, - "learning_rate": 8.843635299124051e-06, - "loss": 0.9501, + "learning_rate": 8.868043853152626e-06, + "loss": 0.7545, "step": 19410 }, { - "epoch": 0.5508229284903519, + "epoch": 0.550058091756638, "grad_norm": 0.0, - "learning_rate": 8.842722391482205e-06, - "loss": 0.7955, + "learning_rate": 8.867131967543748e-06, + "loss": 0.8237, "step": 19411 }, { - "epoch": 0.5508513053348467, + "epoch": 0.5500864291989005, "grad_norm": 0.0, - "learning_rate": 8.841809493615938e-06, - "loss": 0.7487, + "learning_rate": 8.866220091477444e-06, + "loss": 0.8523, "step": 19412 }, { - "epoch": 0.5508796821793417, + "epoch": 0.550114766641163, "grad_norm": 0.0, - "learning_rate": 8.840896605532957e-06, - "loss": 0.9536, + "learning_rate": 8.865308224961381e-06, + "loss": 0.9022, "step": 19413 }, { - "epoch": 0.5509080590238365, + "epoch": 0.5501431040834255, "grad_norm": 0.0, - "learning_rate": 8.839983727240972e-06, - "loss": 0.7586, + "learning_rate": 8.864396368003252e-06, + "loss": 0.863, "step": 19414 }, { - "epoch": 0.5509364358683314, + "epoch": 0.5501714415256879, "grad_norm": 0.0, - "learning_rate": 8.839070858747697e-06, - "loss": 0.8847, + "learning_rate": 8.863484520610736e-06, + "loss": 0.9167, "step": 19415 }, { - "epoch": 0.5509648127128264, + "epoch": 0.5501997789679504, "grad_norm": 0.0, - "learning_rate": 8.838158000060845e-06, - "loss": 0.8988, + "learning_rate": 8.862572682791508e-06, + "loss": 0.8568, "step": 19416 }, { - "epoch": 0.5509931895573212, + "epoch": 0.5502281164102129, "grad_norm": 0.0, - "learning_rate": 8.837245151188126e-06, - "loss": 0.7838, + "learning_rate": 8.861660854553257e-06, + "loss": 0.8479, "step": 19417 }, { - "epoch": 0.5510215664018161, + "epoch": 0.5502564538524752, "grad_norm": 0.0, - "learning_rate": 8.836332312137246e-06, - "loss": 0.9364, + "learning_rate": 8.860749035903657e-06, + "loss": 0.933, "step": 19418 }, { - "epoch": 0.551049943246311, + "epoch": 0.5502847912947377, "grad_norm": 0.0, - "learning_rate": 8.835419482915922e-06, - "loss": 0.8747, + "learning_rate": 8.85983722685039e-06, + "loss": 0.9304, "step": 19419 }, { - "epoch": 0.5510783200908059, + "epoch": 0.5503131287370002, "grad_norm": 0.0, - "learning_rate": 8.834506663531861e-06, - "loss": 0.889, + "learning_rate": 8.858925427401142e-06, + "loss": 0.7782, "step": 19420 }, { - "epoch": 0.5511066969353008, + "epoch": 0.5503414661792626, "grad_norm": 0.0, - "learning_rate": 8.833593853992773e-06, - "loss": 0.813, + "learning_rate": 8.858013637563583e-06, + "loss": 0.8673, "step": 19421 }, { - "epoch": 0.5511350737797956, + "epoch": 0.5503698036215251, "grad_norm": 0.0, - "learning_rate": 8.832681054306373e-06, - "loss": 0.8358, + "learning_rate": 8.857101857345402e-06, + "loss": 0.8755, "step": 19422 }, { - "epoch": 0.5511634506242906, + "epoch": 0.5503981410637876, "grad_norm": 0.0, - "learning_rate": 8.831768264480368e-06, - "loss": 0.937, + "learning_rate": 8.856190086754274e-06, + "loss": 0.8714, "step": 19423 }, { - "epoch": 0.5511918274687855, + "epoch": 0.5504264785060501, "grad_norm": 0.0, - "learning_rate": 8.830855484522467e-06, - "loss": 0.9424, + "learning_rate": 8.855278325797884e-06, + "loss": 0.7117, "step": 19424 }, { - "epoch": 0.5512202043132803, + "epoch": 0.5504548159483125, "grad_norm": 0.0, - "learning_rate": 8.829942714440385e-06, - "loss": 0.8913, + "learning_rate": 8.854366574483913e-06, + "loss": 0.8992, "step": 19425 }, { - "epoch": 0.5512485811577753, + "epoch": 0.550483153390575, "grad_norm": 0.0, - "learning_rate": 8.829029954241828e-06, - "loss": 0.9424, + "learning_rate": 8.853454832820033e-06, + "loss": 0.8633, "step": 19426 }, { - "epoch": 0.5512769580022702, + "epoch": 0.5505114908328375, "grad_norm": 0.0, - "learning_rate": 8.828117203934506e-06, - "loss": 0.9242, + "learning_rate": 8.852543100813927e-06, + "loss": 0.852, "step": 19427 }, { - "epoch": 0.551305334846765, + "epoch": 0.5505398282750998, "grad_norm": 0.0, - "learning_rate": 8.827204463526133e-06, - "loss": 0.8162, + "learning_rate": 8.85163137847328e-06, + "loss": 0.7926, "step": 19428 }, { - "epoch": 0.5513337116912599, + "epoch": 0.5505681657173623, "grad_norm": 0.0, - "learning_rate": 8.826291733024417e-06, - "loss": 0.8233, + "learning_rate": 8.850719665805768e-06, + "loss": 0.8167, "step": 19429 }, { - "epoch": 0.5513620885357549, + "epoch": 0.5505965031596248, "grad_norm": 0.0, - "learning_rate": 8.825379012437066e-06, - "loss": 0.8882, + "learning_rate": 8.849807962819072e-06, + "loss": 0.8621, "step": 19430 }, { - "epoch": 0.5513904653802497, + "epoch": 0.5506248406018873, "grad_norm": 0.0, - "learning_rate": 8.82446630177179e-06, - "loss": 0.7865, + "learning_rate": 8.84889626952087e-06, + "loss": 0.8723, "step": 19431 }, { - "epoch": 0.5514188422247446, + "epoch": 0.5506531780441497, "grad_norm": 0.0, - "learning_rate": 8.8235536010363e-06, - "loss": 0.7842, + "learning_rate": 8.847984585918838e-06, + "loss": 0.8399, "step": 19432 }, { - "epoch": 0.5514472190692395, + "epoch": 0.5506815154864122, "grad_norm": 0.0, - "learning_rate": 8.822640910238309e-06, - "loss": 0.8916, + "learning_rate": 8.847072912020668e-06, + "loss": 0.8977, "step": 19433 }, { - "epoch": 0.5514755959137344, + "epoch": 0.5507098529286747, "grad_norm": 0.0, - "learning_rate": 8.821728229385522e-06, - "loss": 0.8525, + "learning_rate": 8.846161247834024e-06, + "loss": 0.8823, "step": 19434 }, { - "epoch": 0.5515039727582293, + "epoch": 0.5507381903709371, "grad_norm": 0.0, - "learning_rate": 8.820815558485647e-06, - "loss": 0.9256, + "learning_rate": 8.845249593366594e-06, + "loss": 0.9057, "step": 19435 }, { - "epoch": 0.5515323496027241, + "epoch": 0.5507665278131996, "grad_norm": 0.0, - "learning_rate": 8.8199028975464e-06, - "loss": 0.8479, + "learning_rate": 8.844337948626056e-06, + "loss": 0.8437, "step": 19436 }, { - "epoch": 0.5515607264472191, + "epoch": 0.5507948652554621, "grad_norm": 0.0, - "learning_rate": 8.818990246575485e-06, - "loss": 0.8907, + "learning_rate": 8.843426313620087e-06, + "loss": 0.834, "step": 19437 }, { - "epoch": 0.551589103291714, + "epoch": 0.5508232026977246, "grad_norm": 0.0, - "learning_rate": 8.81807760558061e-06, - "loss": 0.877, + "learning_rate": 8.842514688356373e-06, + "loss": 0.9301, "step": 19438 }, { - "epoch": 0.5516174801362088, + "epoch": 0.5508515401399869, "grad_norm": 0.0, - "learning_rate": 8.81716497456949e-06, - "loss": 0.8748, + "learning_rate": 8.841603072842582e-06, + "loss": 0.9161, "step": 19439 }, { - "epoch": 0.5516458569807038, + "epoch": 0.5508798775822494, "grad_norm": 0.0, - "learning_rate": 8.816252353549829e-06, - "loss": 0.9903, + "learning_rate": 8.840691467086399e-06, + "loss": 0.8249, "step": 19440 }, { - "epoch": 0.5516742338251986, + "epoch": 0.5509082150245119, "grad_norm": 0.0, - "learning_rate": 8.815339742529336e-06, - "loss": 0.9205, + "learning_rate": 8.839779871095504e-06, + "loss": 0.9073, "step": 19441 }, { - "epoch": 0.5517026106696935, + "epoch": 0.5509365524667743, "grad_norm": 0.0, - "learning_rate": 8.814427141515724e-06, - "loss": 0.8602, + "learning_rate": 8.838868284877573e-06, + "loss": 0.8109, "step": 19442 }, { - "epoch": 0.5517309875141885, + "epoch": 0.5509648899090368, "grad_norm": 0.0, - "learning_rate": 8.8135145505167e-06, - "loss": 0.9395, + "learning_rate": 8.83795670844029e-06, + "loss": 0.8071, "step": 19443 }, { - "epoch": 0.5517593643586833, + "epoch": 0.5509932273512993, "grad_norm": 0.0, - "learning_rate": 8.812601969539968e-06, - "loss": 0.857, + "learning_rate": 8.837045141791323e-06, + "loss": 0.7335, "step": 19444 }, { - "epoch": 0.5517877412031782, + "epoch": 0.5510215647935617, "grad_norm": 0.0, - "learning_rate": 8.811689398593245e-06, - "loss": 0.8773, + "learning_rate": 8.836133584938358e-06, + "loss": 0.8837, "step": 19445 }, { - "epoch": 0.551816118047673, + "epoch": 0.5510499022358242, "grad_norm": 0.0, - "learning_rate": 8.810776837684229e-06, - "loss": 0.8515, + "learning_rate": 8.835222037889074e-06, + "loss": 0.8871, "step": 19446 }, { - "epoch": 0.551844494892168, + "epoch": 0.5510782396780867, "grad_norm": 0.0, - "learning_rate": 8.809864286820638e-06, - "loss": 0.8876, + "learning_rate": 8.834310500651146e-06, + "loss": 0.8642, "step": 19447 }, { - "epoch": 0.5518728717366629, + "epoch": 0.5511065771203492, "grad_norm": 0.0, - "learning_rate": 8.808951746010176e-06, - "loss": 0.7905, + "learning_rate": 8.833398973232253e-06, + "loss": 0.8283, "step": 19448 }, { - "epoch": 0.5519012485811577, + "epoch": 0.5511349145626115, "grad_norm": 0.0, - "learning_rate": 8.808039215260555e-06, - "loss": 0.9038, + "learning_rate": 8.832487455640074e-06, + "loss": 0.9105, "step": 19449 }, { - "epoch": 0.5519296254256527, + "epoch": 0.551163252004874, "grad_norm": 0.0, - "learning_rate": 8.80712669457948e-06, - "loss": 0.8455, + "learning_rate": 8.831575947882288e-06, + "loss": 0.9509, "step": 19450 }, { - "epoch": 0.5519580022701476, + "epoch": 0.5511915894471365, "grad_norm": 0.0, - "learning_rate": 8.806214183974659e-06, - "loss": 0.8887, + "learning_rate": 8.830664449966573e-06, + "loss": 0.8066, "step": 19451 }, { - "epoch": 0.5519863791146424, + "epoch": 0.5512199268893989, "grad_norm": 0.0, - "learning_rate": 8.805301683453797e-06, - "loss": 0.8938, + "learning_rate": 8.829752961900602e-06, + "loss": 0.898, "step": 19452 }, { - "epoch": 0.5520147559591373, + "epoch": 0.5512482643316614, "grad_norm": 0.0, - "learning_rate": 8.804389193024609e-06, - "loss": 0.9216, + "learning_rate": 8.828841483692057e-06, + "loss": 0.8813, "step": 19453 }, { - "epoch": 0.5520431328036323, + "epoch": 0.5512766017739239, "grad_norm": 0.0, - "learning_rate": 8.8034767126948e-06, - "loss": 0.8082, + "learning_rate": 8.827930015348616e-06, + "loss": 0.8101, "step": 19454 }, { - "epoch": 0.5520715096481271, + "epoch": 0.5513049392161864, "grad_norm": 0.0, - "learning_rate": 8.802564242472074e-06, - "loss": 0.8424, + "learning_rate": 8.827018556877955e-06, + "loss": 0.9052, "step": 19455 }, { - "epoch": 0.552099886492622, + "epoch": 0.5513332766584488, "grad_norm": 0.0, - "learning_rate": 8.801651782364143e-06, - "loss": 0.871, + "learning_rate": 8.82610710828775e-06, + "loss": 0.9174, "step": 19456 }, { - "epoch": 0.552128263337117, + "epoch": 0.5513616141007113, "grad_norm": 0.0, - "learning_rate": 8.800739332378714e-06, - "loss": 0.8347, + "learning_rate": 8.825195669585687e-06, + "loss": 0.9059, "step": 19457 }, { - "epoch": 0.5521566401816118, + "epoch": 0.5513899515429738, "grad_norm": 0.0, - "learning_rate": 8.799826892523492e-06, - "loss": 0.9276, + "learning_rate": 8.824284240779433e-06, + "loss": 0.8469, "step": 19458 }, { - "epoch": 0.5521850170261067, + "epoch": 0.5514182889852361, "grad_norm": 0.0, - "learning_rate": 8.798914462806188e-06, - "loss": 0.8515, + "learning_rate": 8.823372821876673e-06, + "loss": 0.8791, "step": 19459 }, { - "epoch": 0.5522133938706016, + "epoch": 0.5514466264274986, "grad_norm": 0.0, - "learning_rate": 8.798002043234506e-06, - "loss": 0.8029, + "learning_rate": 8.822461412885076e-06, + "loss": 0.9056, "step": 19460 }, { - "epoch": 0.5522417707150965, + "epoch": 0.5514749638697611, "grad_norm": 0.0, - "learning_rate": 8.797089633816153e-06, - "loss": 0.8234, + "learning_rate": 8.821550013812324e-06, + "loss": 0.8079, "step": 19461 }, { - "epoch": 0.5522701475595914, + "epoch": 0.5515033013120236, "grad_norm": 0.0, - "learning_rate": 8.796177234558838e-06, - "loss": 0.9348, + "learning_rate": 8.820638624666096e-06, + "loss": 0.9089, "step": 19462 }, { - "epoch": 0.5522985244040862, + "epoch": 0.551531638754286, "grad_norm": 0.0, - "learning_rate": 8.795264845470272e-06, - "loss": 0.8202, + "learning_rate": 8.819727245454065e-06, + "loss": 0.9305, "step": 19463 }, { - "epoch": 0.5523269012485812, + "epoch": 0.5515599761965485, "grad_norm": 0.0, - "learning_rate": 8.794352466558157e-06, - "loss": 0.8276, + "learning_rate": 8.81881587618391e-06, + "loss": 0.8433, "step": 19464 }, { - "epoch": 0.552355278093076, + "epoch": 0.551588313638811, "grad_norm": 0.0, - "learning_rate": 8.793440097830199e-06, - "loss": 0.9943, + "learning_rate": 8.817904516863311e-06, + "loss": 0.7939, "step": 19465 }, { - "epoch": 0.5523836549375709, + "epoch": 0.5516166510810734, "grad_norm": 0.0, - "learning_rate": 8.792527739294109e-06, - "loss": 0.843, + "learning_rate": 8.816993167499938e-06, + "loss": 0.8615, "step": 19466 }, { - "epoch": 0.5524120317820659, + "epoch": 0.5516449885233359, "grad_norm": 0.0, - "learning_rate": 8.791615390957591e-06, - "loss": 0.7955, + "learning_rate": 8.816081828101471e-06, + "loss": 0.9121, "step": 19467 }, { - "epoch": 0.5524404086265607, + "epoch": 0.5516733259655984, "grad_norm": 0.0, - "learning_rate": 8.79070305282835e-06, - "loss": 0.9273, + "learning_rate": 8.815170498675585e-06, + "loss": 0.8794, "step": 19468 }, { - "epoch": 0.5524687854710556, + "epoch": 0.5517016634078608, "grad_norm": 0.0, - "learning_rate": 8.789790724914098e-06, - "loss": 0.856, + "learning_rate": 8.81425917922996e-06, + "loss": 0.8944, "step": 19469 }, { - "epoch": 0.5524971623155505, + "epoch": 0.5517300008501232, "grad_norm": 0.0, - "learning_rate": 8.788878407222538e-06, - "loss": 0.8561, + "learning_rate": 8.813347869772273e-06, + "loss": 0.9254, "step": 19470 }, { - "epoch": 0.5525255391600454, + "epoch": 0.5517583382923857, "grad_norm": 0.0, - "learning_rate": 8.787966099761372e-06, - "loss": 0.7327, + "learning_rate": 8.812436570310193e-06, + "loss": 0.9409, "step": 19471 }, { - "epoch": 0.5525539160045403, + "epoch": 0.5517866757346482, "grad_norm": 0.0, - "learning_rate": 8.787053802538316e-06, - "loss": 0.9099, + "learning_rate": 8.811525280851402e-06, + "loss": 0.8817, "step": 19472 }, { - "epoch": 0.5525822928490352, + "epoch": 0.5518150131769106, "grad_norm": 0.0, - "learning_rate": 8.786141515561068e-06, - "loss": 0.7951, + "learning_rate": 8.810614001403574e-06, + "loss": 0.9383, "step": 19473 }, { - "epoch": 0.5526106696935301, + "epoch": 0.5518433506191731, "grad_norm": 0.0, - "learning_rate": 8.785229238837338e-06, - "loss": 0.8814, + "learning_rate": 8.809702731974387e-06, + "loss": 0.8636, "step": 19474 }, { - "epoch": 0.552639046538025, + "epoch": 0.5518716880614356, "grad_norm": 0.0, - "learning_rate": 8.784316972374832e-06, - "loss": 0.8471, + "learning_rate": 8.80879147257152e-06, + "loss": 0.8929, "step": 19475 }, { - "epoch": 0.5526674233825198, + "epoch": 0.551900025503698, "grad_norm": 0.0, - "learning_rate": 8.783404716181255e-06, - "loss": 0.9711, + "learning_rate": 8.807880223202639e-06, + "loss": 0.9059, "step": 19476 }, { - "epoch": 0.5526958002270148, + "epoch": 0.5519283629459605, "grad_norm": 0.0, - "learning_rate": 8.78249247026431e-06, - "loss": 0.9028, + "learning_rate": 8.806968983875424e-06, + "loss": 0.9068, "step": 19477 }, { - "epoch": 0.5527241770715097, + "epoch": 0.551956700388223, "grad_norm": 0.0, - "learning_rate": 8.781580234631709e-06, - "loss": 0.8508, + "learning_rate": 8.806057754597559e-06, + "loss": 0.8542, "step": 19478 }, { - "epoch": 0.5527525539160045, + "epoch": 0.5519850378304855, "grad_norm": 0.0, - "learning_rate": 8.780668009291151e-06, - "loss": 0.8768, + "learning_rate": 8.805146535376709e-06, + "loss": 0.8235, "step": 19479 }, { - "epoch": 0.5527809307604994, + "epoch": 0.5520133752727479, "grad_norm": 0.0, - "learning_rate": 8.77975579425035e-06, - "loss": 1.0, + "learning_rate": 8.804235326220554e-06, + "loss": 0.7358, "step": 19480 }, { - "epoch": 0.5528093076049944, + "epoch": 0.5520417127150103, "grad_norm": 0.0, - "learning_rate": 8.778843589517005e-06, - "loss": 0.7831, + "learning_rate": 8.803324127136767e-06, + "loss": 0.8953, "step": 19481 }, { - "epoch": 0.5528376844494892, + "epoch": 0.5520700501572728, "grad_norm": 0.0, - "learning_rate": 8.77793139509882e-06, - "loss": 0.8816, + "learning_rate": 8.802412938133026e-06, + "loss": 0.9469, "step": 19482 }, { - "epoch": 0.5528660612939841, + "epoch": 0.5520983875995352, "grad_norm": 0.0, - "learning_rate": 8.777019211003508e-06, - "loss": 0.8551, + "learning_rate": 8.801501759217011e-06, + "loss": 0.8665, "step": 19483 }, { - "epoch": 0.552894438138479, + "epoch": 0.5521267250417977, "grad_norm": 0.0, - "learning_rate": 8.77610703723877e-06, - "loss": 0.8086, + "learning_rate": 8.800590590396385e-06, + "loss": 0.8988, "step": 19484 }, { - "epoch": 0.5529228149829739, + "epoch": 0.5521550624840602, "grad_norm": 0.0, - "learning_rate": 8.775194873812308e-06, - "loss": 0.8645, + "learning_rate": 8.799679431678831e-06, + "loss": 0.8799, "step": 19485 }, { - "epoch": 0.5529511918274688, + "epoch": 0.5521833999263227, "grad_norm": 0.0, - "learning_rate": 8.77428272073183e-06, - "loss": 0.8579, + "learning_rate": 8.798768283072025e-06, + "loss": 0.8587, "step": 19486 }, { - "epoch": 0.5529795686719636, + "epoch": 0.5522117373685851, "grad_norm": 0.0, - "learning_rate": 8.773370578005042e-06, - "loss": 0.9158, + "learning_rate": 8.797857144583637e-06, + "loss": 0.9222, "step": 19487 }, { - "epoch": 0.5530079455164586, + "epoch": 0.5522400748108476, "grad_norm": 0.0, - "learning_rate": 8.772458445639647e-06, - "loss": 0.9431, + "learning_rate": 8.79694601622135e-06, + "loss": 0.8822, "step": 19488 }, { - "epoch": 0.5530363223609535, + "epoch": 0.5522684122531101, "grad_norm": 0.0, - "learning_rate": 8.771546323643352e-06, - "loss": 0.9249, + "learning_rate": 8.79603489799283e-06, + "loss": 0.7423, "step": 19489 }, { - "epoch": 0.5530646992054483, + "epoch": 0.5522967496953725, "grad_norm": 0.0, - "learning_rate": 8.770634212023861e-06, - "loss": 0.7697, + "learning_rate": 8.795123789905753e-06, + "loss": 0.869, "step": 19490 }, { - "epoch": 0.5530930760499433, + "epoch": 0.552325087137635, "grad_norm": 0.0, - "learning_rate": 8.769722110788876e-06, - "loss": 0.8387, + "learning_rate": 8.7942126919678e-06, + "loss": 0.8435, "step": 19491 }, { - "epoch": 0.5531214528944381, + "epoch": 0.5523534245798974, "grad_norm": 0.0, - "learning_rate": 8.768810019946102e-06, - "loss": 0.8918, + "learning_rate": 8.793301604186638e-06, + "loss": 0.7852, "step": 19492 }, { - "epoch": 0.553149829738933, + "epoch": 0.5523817620221598, "grad_norm": 0.0, - "learning_rate": 8.767897939503246e-06, - "loss": 0.8972, + "learning_rate": 8.792390526569944e-06, + "loss": 0.8863, "step": 19493 }, { - "epoch": 0.553178206583428, + "epoch": 0.5524100994644223, "grad_norm": 0.0, - "learning_rate": 8.766985869468014e-06, - "loss": 0.8867, + "learning_rate": 8.791479459125396e-06, + "loss": 0.8724, "step": 19494 }, { - "epoch": 0.5532065834279228, + "epoch": 0.5524384369066848, "grad_norm": 0.0, - "learning_rate": 8.766073809848108e-06, - "loss": 0.8594, + "learning_rate": 8.790568401860663e-06, + "loss": 0.8489, "step": 19495 }, { - "epoch": 0.5532349602724177, + "epoch": 0.5524667743489473, "grad_norm": 0.0, - "learning_rate": 8.765161760651228e-06, - "loss": 0.9193, + "learning_rate": 8.789657354783425e-06, + "loss": 0.9114, "step": 19496 }, { - "epoch": 0.5532633371169126, + "epoch": 0.5524951117912097, "grad_norm": 0.0, - "learning_rate": 8.764249721885087e-06, - "loss": 0.835, + "learning_rate": 8.788746317901349e-06, + "loss": 0.8965, "step": 19497 }, { - "epoch": 0.5532917139614075, + "epoch": 0.5525234492334722, "grad_norm": 0.0, - "learning_rate": 8.763337693557383e-06, - "loss": 0.9465, + "learning_rate": 8.787835291222113e-06, + "loss": 0.8963, "step": 19498 }, { - "epoch": 0.5533200908059024, + "epoch": 0.5525517866757347, "grad_norm": 0.0, - "learning_rate": 8.762425675675818e-06, - "loss": 0.8129, + "learning_rate": 8.786924274753391e-06, + "loss": 0.9195, "step": 19499 }, { - "epoch": 0.5533484676503972, + "epoch": 0.5525801241179971, "grad_norm": 0.0, - "learning_rate": 8.761513668248103e-06, - "loss": 0.9034, + "learning_rate": 8.786013268502855e-06, + "loss": 0.8991, "step": 19500 }, { - "epoch": 0.5533768444948922, + "epoch": 0.5526084615602596, "grad_norm": 0.0, - "learning_rate": 8.760601671281937e-06, - "loss": 0.8797, + "learning_rate": 8.785102272478185e-06, + "loss": 0.942, "step": 19501 }, { - "epoch": 0.5534052213393871, + "epoch": 0.552636799002522, "grad_norm": 0.0, - "learning_rate": 8.759689684785021e-06, - "loss": 0.9532, + "learning_rate": 8.784191286687044e-06, + "loss": 0.778, "step": 19502 }, { - "epoch": 0.5534335981838819, + "epoch": 0.5526651364447845, "grad_norm": 0.0, - "learning_rate": 8.758777708765065e-06, - "loss": 0.9394, + "learning_rate": 8.783280311137114e-06, + "loss": 0.87, "step": 19503 }, { - "epoch": 0.5534619750283768, + "epoch": 0.5526934738870469, "grad_norm": 0.0, - "learning_rate": 8.75786574322977e-06, - "loss": 0.9513, + "learning_rate": 8.782369345836067e-06, + "loss": 0.9515, "step": 19504 }, { - "epoch": 0.5534903518728718, + "epoch": 0.5527218113293094, "grad_norm": 0.0, - "learning_rate": 8.756953788186837e-06, - "loss": 0.8695, + "learning_rate": 8.781458390791573e-06, + "loss": 0.846, "step": 19505 }, { - "epoch": 0.5535187287173666, + "epoch": 0.5527501487715719, "grad_norm": 0.0, - "learning_rate": 8.756041843643971e-06, - "loss": 0.8514, + "learning_rate": 8.780547446011306e-06, + "loss": 0.843, "step": 19506 }, { - "epoch": 0.5535471055618615, + "epoch": 0.5527784862138343, "grad_norm": 0.0, - "learning_rate": 8.755129909608873e-06, - "loss": 0.8431, + "learning_rate": 8.779636511502944e-06, + "loss": 0.8853, "step": 19507 }, { - "epoch": 0.5535754824063565, + "epoch": 0.5528068236560968, "grad_norm": 0.0, - "learning_rate": 8.754217986089253e-06, - "loss": 0.9209, + "learning_rate": 8.778725587274152e-06, + "loss": 0.7456, "step": 19508 }, { - "epoch": 0.5536038592508513, + "epoch": 0.5528351610983593, "grad_norm": 0.0, - "learning_rate": 8.753306073092807e-06, - "loss": 0.9261, + "learning_rate": 8.777814673332615e-06, + "loss": 0.91, "step": 19509 }, { - "epoch": 0.5536322360953462, + "epoch": 0.5528634985406218, "grad_norm": 0.0, - "learning_rate": 8.752394170627243e-06, - "loss": 0.8102, + "learning_rate": 8.776903769685994e-06, + "loss": 0.9152, "step": 19510 }, { - "epoch": 0.553660612939841, + "epoch": 0.5528918359828842, "grad_norm": 0.0, - "learning_rate": 8.751482278700262e-06, - "loss": 0.8707, + "learning_rate": 8.775992876341966e-06, + "loss": 0.837, "step": 19511 }, { - "epoch": 0.553688989784336, + "epoch": 0.5529201734251467, "grad_norm": 0.0, - "learning_rate": 8.750570397319562e-06, - "loss": 0.8496, + "learning_rate": 8.775081993308208e-06, + "loss": 0.8644, "step": 19512 }, { - "epoch": 0.5537173666288309, + "epoch": 0.5529485108674091, "grad_norm": 0.0, - "learning_rate": 8.749658526492855e-06, - "loss": 1.0442, + "learning_rate": 8.774171120592386e-06, + "loss": 0.9011, "step": 19513 }, { - "epoch": 0.5537457434733257, + "epoch": 0.5529768483096715, "grad_norm": 0.0, - "learning_rate": 8.748746666227838e-06, - "loss": 1.001, + "learning_rate": 8.773260258202177e-06, + "loss": 0.8887, "step": 19514 }, { - "epoch": 0.5537741203178207, + "epoch": 0.553005185751934, "grad_norm": 0.0, - "learning_rate": 8.747834816532212e-06, - "loss": 0.7853, + "learning_rate": 8.772349406145256e-06, + "loss": 0.9498, "step": 19515 }, { - "epoch": 0.5538024971623156, + "epoch": 0.5530335231941965, "grad_norm": 0.0, - "learning_rate": 8.746922977413684e-06, - "loss": 0.8769, + "learning_rate": 8.771438564429286e-06, + "loss": 0.8567, "step": 19516 }, { - "epoch": 0.5538308740068104, + "epoch": 0.5530618606364589, "grad_norm": 0.0, - "learning_rate": 8.746011148879951e-06, - "loss": 0.9165, + "learning_rate": 8.770527733061951e-06, + "loss": 0.7411, "step": 19517 }, { - "epoch": 0.5538592508513054, + "epoch": 0.5530901980787214, "grad_norm": 0.0, - "learning_rate": 8.745099330938721e-06, - "loss": 0.7956, + "learning_rate": 8.769616912050914e-06, + "loss": 0.8858, "step": 19518 }, { - "epoch": 0.5538876276958002, + "epoch": 0.5531185355209839, "grad_norm": 0.0, - "learning_rate": 8.744187523597692e-06, - "loss": 0.8507, + "learning_rate": 8.76870610140385e-06, + "loss": 0.8687, "step": 19519 }, { - "epoch": 0.5539160045402951, + "epoch": 0.5531468729632464, "grad_norm": 0.0, - "learning_rate": 8.743275726864567e-06, - "loss": 0.8294, + "learning_rate": 8.767795301128433e-06, + "loss": 0.8537, "step": 19520 }, { - "epoch": 0.55394438138479, + "epoch": 0.5531752104055088, "grad_norm": 0.0, - "learning_rate": 8.74236394074705e-06, - "loss": 0.8061, + "learning_rate": 8.766884511232333e-06, + "loss": 0.8283, "step": 19521 }, { - "epoch": 0.5539727582292849, + "epoch": 0.5532035478477713, "grad_norm": 0.0, - "learning_rate": 8.741452165252838e-06, - "loss": 0.8387, + "learning_rate": 8.765973731723221e-06, + "loss": 0.9852, "step": 19522 }, { - "epoch": 0.5540011350737798, + "epoch": 0.5532318852900338, "grad_norm": 0.0, - "learning_rate": 8.740540400389635e-06, - "loss": 0.859, + "learning_rate": 8.765062962608775e-06, + "loss": 0.8156, "step": 19523 }, { - "epoch": 0.5540295119182747, + "epoch": 0.5532602227322961, "grad_norm": 0.0, - "learning_rate": 8.739628646165149e-06, - "loss": 0.8051, + "learning_rate": 8.764152203896658e-06, + "loss": 0.9386, "step": 19524 }, { - "epoch": 0.5540578887627696, + "epoch": 0.5532885601745586, "grad_norm": 0.0, - "learning_rate": 8.738716902587074e-06, - "loss": 0.8661, + "learning_rate": 8.763241455594548e-06, + "loss": 0.82, "step": 19525 }, { - "epoch": 0.5540862656072645, + "epoch": 0.5533168976168211, "grad_norm": 0.0, - "learning_rate": 8.737805169663113e-06, - "loss": 0.7349, + "learning_rate": 8.762330717710113e-06, + "loss": 0.9936, "step": 19526 }, { - "epoch": 0.5541146424517593, + "epoch": 0.5533452350590836, "grad_norm": 0.0, - "learning_rate": 8.736893447400973e-06, - "loss": 0.9619, + "learning_rate": 8.761419990251027e-06, + "loss": 0.8754, "step": 19527 }, { - "epoch": 0.5541430192962542, + "epoch": 0.553373572501346, "grad_norm": 0.0, - "learning_rate": 8.735981735808348e-06, - "loss": 0.9231, + "learning_rate": 8.760509273224963e-06, + "loss": 0.8449, "step": 19528 }, { - "epoch": 0.5541713961407492, + "epoch": 0.5534019099436085, "grad_norm": 0.0, - "learning_rate": 8.735070034892941e-06, - "loss": 0.93, + "learning_rate": 8.759598566639586e-06, + "loss": 0.8247, "step": 19529 }, { - "epoch": 0.554199772985244, + "epoch": 0.553430247385871, "grad_norm": 0.0, - "learning_rate": 8.734158344662455e-06, - "loss": 0.9185, + "learning_rate": 8.758687870502576e-06, + "loss": 0.7886, "step": 19530 }, { - "epoch": 0.5542281498297389, + "epoch": 0.5534585848281334, "grad_norm": 0.0, - "learning_rate": 8.733246665124592e-06, - "loss": 0.8041, + "learning_rate": 8.757777184821593e-06, + "loss": 0.7742, "step": 19531 }, { - "epoch": 0.5542565266742339, + "epoch": 0.5534869222703959, "grad_norm": 0.0, - "learning_rate": 8.732334996287048e-06, - "loss": 0.8572, + "learning_rate": 8.756866509604314e-06, + "loss": 0.9561, "step": 19532 }, { - "epoch": 0.5542849035187287, + "epoch": 0.5535152597126584, "grad_norm": 0.0, - "learning_rate": 8.731423338157532e-06, - "loss": 0.9395, + "learning_rate": 8.755955844858415e-06, + "loss": 0.8453, "step": 19533 }, { - "epoch": 0.5543132803632236, + "epoch": 0.5535435971549209, "grad_norm": 0.0, - "learning_rate": 8.730511690743738e-06, - "loss": 0.7961, + "learning_rate": 8.755045190591557e-06, + "loss": 0.8501, "step": 19534 }, { - "epoch": 0.5543416572077186, + "epoch": 0.5535719345971832, "grad_norm": 0.0, - "learning_rate": 8.729600054053367e-06, - "loss": 0.984, + "learning_rate": 8.754134546811416e-06, + "loss": 0.8121, "step": 19535 }, { - "epoch": 0.5543700340522134, + "epoch": 0.5536002720394457, "grad_norm": 0.0, - "learning_rate": 8.728688428094123e-06, - "loss": 0.8672, + "learning_rate": 8.753223913525668e-06, + "loss": 0.8893, "step": 19536 }, { - "epoch": 0.5543984108967083, + "epoch": 0.5536286094817082, "grad_norm": 0.0, - "learning_rate": 8.727776812873705e-06, - "loss": 0.9472, + "learning_rate": 8.752313290741972e-06, + "loss": 0.787, "step": 19537 }, { - "epoch": 0.5544267877412031, + "epoch": 0.5536569469239706, "grad_norm": 0.0, - "learning_rate": 8.72686520839981e-06, - "loss": 0.8846, + "learning_rate": 8.751402678468008e-06, + "loss": 0.8563, "step": 19538 }, { - "epoch": 0.5544551645856981, + "epoch": 0.5536852843662331, "grad_norm": 0.0, - "learning_rate": 8.725953614680147e-06, - "loss": 0.9589, + "learning_rate": 8.750492076711439e-06, + "loss": 0.8661, "step": 19539 }, { - "epoch": 0.554483541430193, + "epoch": 0.5537136218084956, "grad_norm": 0.0, - "learning_rate": 8.725042031722406e-06, - "loss": 0.8945, + "learning_rate": 8.74958148547994e-06, + "loss": 1.0074, "step": 19540 }, { - "epoch": 0.5545119182746878, + "epoch": 0.553741959250758, "grad_norm": 0.0, - "learning_rate": 8.724130459534297e-06, - "loss": 0.8028, + "learning_rate": 8.748670904781186e-06, + "loss": 0.8843, "step": 19541 }, { - "epoch": 0.5545402951191828, + "epoch": 0.5537702966930205, "grad_norm": 0.0, - "learning_rate": 8.723218898123514e-06, - "loss": 0.8504, + "learning_rate": 8.747760334622838e-06, + "loss": 0.8325, "step": 19542 }, { - "epoch": 0.5545686719636777, + "epoch": 0.553798634135283, "grad_norm": 0.0, - "learning_rate": 8.722307347497757e-06, - "loss": 0.8547, + "learning_rate": 8.746849775012566e-06, + "loss": 0.9036, "step": 19543 }, { - "epoch": 0.5545970488081725, + "epoch": 0.5538269715775455, "grad_norm": 0.0, - "learning_rate": 8.72139580766473e-06, - "loss": 0.8482, + "learning_rate": 8.745939225958052e-06, + "loss": 0.8479, "step": 19544 }, { - "epoch": 0.5546254256526674, + "epoch": 0.5538553090198078, "grad_norm": 0.0, - "learning_rate": 8.720484278632129e-06, - "loss": 0.9151, + "learning_rate": 8.74502868746695e-06, + "loss": 0.9237, "step": 19545 }, { - "epoch": 0.5546538024971623, + "epoch": 0.5538836464620703, "grad_norm": 0.0, - "learning_rate": 8.719572760407652e-06, - "loss": 0.7136, + "learning_rate": 8.744118159546942e-06, + "loss": 0.9677, "step": 19546 }, { - "epoch": 0.5546821793416572, + "epoch": 0.5539119839043328, "grad_norm": 0.0, - "learning_rate": 8.718661252999004e-06, - "loss": 0.8641, + "learning_rate": 8.743207642205688e-06, + "loss": 0.805, "step": 19547 }, { - "epoch": 0.5547105561861521, + "epoch": 0.5539403213465952, "grad_norm": 0.0, - "learning_rate": 8.717749756413883e-06, - "loss": 0.8158, + "learning_rate": 8.742297135450866e-06, + "loss": 0.9515, "step": 19548 }, { - "epoch": 0.554738933030647, + "epoch": 0.5539686587888577, "grad_norm": 0.0, - "learning_rate": 8.716838270659985e-06, - "loss": 0.8089, + "learning_rate": 8.741386639290145e-06, + "loss": 0.7907, "step": 19549 }, { - "epoch": 0.5547673098751419, + "epoch": 0.5539969962311202, "grad_norm": 0.0, - "learning_rate": 8.715926795745013e-06, - "loss": 0.8174, + "learning_rate": 8.740476153731187e-06, + "loss": 0.8274, "step": 19550 }, { - "epoch": 0.5547956867196367, + "epoch": 0.5540253336733827, "grad_norm": 0.0, - "learning_rate": 8.715015331676667e-06, - "loss": 0.8344, + "learning_rate": 8.739565678781668e-06, + "loss": 0.8535, "step": 19551 }, { - "epoch": 0.5548240635641317, + "epoch": 0.5540536711156451, "grad_norm": 0.0, - "learning_rate": 8.71410387846264e-06, - "loss": 0.8387, + "learning_rate": 8.738655214449256e-06, + "loss": 0.8779, "step": 19552 }, { - "epoch": 0.5548524404086266, + "epoch": 0.5540820085579076, "grad_norm": 0.0, - "learning_rate": 8.713192436110635e-06, - "loss": 0.7627, + "learning_rate": 8.737744760741616e-06, + "loss": 0.9266, "step": 19553 }, { - "epoch": 0.5548808172531214, + "epoch": 0.5541103460001701, "grad_norm": 0.0, - "learning_rate": 8.712281004628355e-06, - "loss": 0.8746, + "learning_rate": 8.736834317666428e-06, + "loss": 0.8167, "step": 19554 }, { - "epoch": 0.5549091940976163, + "epoch": 0.5541386834424324, "grad_norm": 0.0, - "learning_rate": 8.711369584023494e-06, - "loss": 0.7925, + "learning_rate": 8.735923885231348e-06, + "loss": 0.8107, "step": 19555 }, { - "epoch": 0.5549375709421113, + "epoch": 0.5541670208846949, "grad_norm": 0.0, - "learning_rate": 8.71045817430375e-06, - "loss": 0.8283, + "learning_rate": 8.735013463444049e-06, + "loss": 0.9451, "step": 19556 }, { - "epoch": 0.5549659477866061, + "epoch": 0.5541953583269574, "grad_norm": 0.0, - "learning_rate": 8.709546775476828e-06, - "loss": 0.9032, + "learning_rate": 8.734103052312207e-06, + "loss": 0.8593, "step": 19557 }, { - "epoch": 0.554994324631101, + "epoch": 0.5542236957692198, "grad_norm": 0.0, - "learning_rate": 8.708635387550422e-06, - "loss": 0.8655, + "learning_rate": 8.73319265184348e-06, + "loss": 0.8815, "step": 19558 }, { - "epoch": 0.555022701475596, + "epoch": 0.5542520332114823, "grad_norm": 0.0, - "learning_rate": 8.70772401053223e-06, - "loss": 0.818, + "learning_rate": 8.732282262045546e-06, + "loss": 0.8808, "step": 19559 }, { - "epoch": 0.5550510783200908, + "epoch": 0.5542803706537448, "grad_norm": 0.0, - "learning_rate": 8.70681264442995e-06, - "loss": 0.9539, + "learning_rate": 8.731371882926065e-06, + "loss": 0.8198, "step": 19560 }, { - "epoch": 0.5550794551645857, + "epoch": 0.5543087080960073, "grad_norm": 0.0, - "learning_rate": 8.705901289251282e-06, - "loss": 0.9478, + "learning_rate": 8.73046151449271e-06, + "loss": 0.9712, "step": 19561 }, { - "epoch": 0.5551078320090805, + "epoch": 0.5543370455382697, "grad_norm": 0.0, - "learning_rate": 8.704989945003926e-06, - "loss": 0.8457, + "learning_rate": 8.729551156753155e-06, + "loss": 0.8787, "step": 19562 }, { - "epoch": 0.5551362088535755, + "epoch": 0.5543653829805322, "grad_norm": 0.0, - "learning_rate": 8.704078611695575e-06, - "loss": 0.8195, + "learning_rate": 8.728640809715057e-06, + "loss": 0.8008, "step": 19563 }, { - "epoch": 0.5551645856980704, + "epoch": 0.5543937204227947, "grad_norm": 0.0, - "learning_rate": 8.703167289333932e-06, - "loss": 0.8347, + "learning_rate": 8.727730473386089e-06, + "loss": 0.9333, "step": 19564 }, { - "epoch": 0.5551929625425652, + "epoch": 0.554422057865057, "grad_norm": 0.0, - "learning_rate": 8.702255977926695e-06, - "loss": 0.8997, + "learning_rate": 8.726820147773923e-06, + "loss": 0.9018, "step": 19565 }, { - "epoch": 0.5552213393870602, + "epoch": 0.5544503953073195, "grad_norm": 0.0, - "learning_rate": 8.701344677481555e-06, - "loss": 0.8885, + "learning_rate": 8.72590983288622e-06, + "loss": 0.8288, "step": 19566 }, { - "epoch": 0.5552497162315551, + "epoch": 0.554478732749582, "grad_norm": 0.0, - "learning_rate": 8.700433388006219e-06, - "loss": 0.8911, + "learning_rate": 8.724999528730657e-06, + "loss": 0.9313, "step": 19567 }, { - "epoch": 0.5552780930760499, + "epoch": 0.5545070701918445, "grad_norm": 0.0, - "learning_rate": 8.699522109508382e-06, - "loss": 0.8822, + "learning_rate": 8.72408923531489e-06, + "loss": 0.9135, "step": 19568 }, { - "epoch": 0.5553064699205449, + "epoch": 0.5545354076341069, "grad_norm": 0.0, - "learning_rate": 8.698610841995735e-06, - "loss": 0.93, + "learning_rate": 8.723178952646597e-06, + "loss": 0.9488, "step": 19569 }, { - "epoch": 0.5553348467650397, + "epoch": 0.5545637450763694, "grad_norm": 0.0, - "learning_rate": 8.69769958547598e-06, - "loss": 0.7557, + "learning_rate": 8.722268680733443e-06, + "loss": 0.9076, "step": 19570 }, { - "epoch": 0.5553632236095346, + "epoch": 0.5545920825186319, "grad_norm": 0.0, - "learning_rate": 8.696788339956819e-06, - "loss": 0.8637, + "learning_rate": 8.72135841958309e-06, + "loss": 0.9283, "step": 19571 }, { - "epoch": 0.5553916004540295, + "epoch": 0.5546204199608943, "grad_norm": 0.0, - "learning_rate": 8.695877105445947e-06, - "loss": 0.8382, + "learning_rate": 8.720448169203213e-06, + "loss": 0.8778, "step": 19572 }, { - "epoch": 0.5554199772985244, + "epoch": 0.5546487574031568, "grad_norm": 0.0, - "learning_rate": 8.694965881951057e-06, - "loss": 0.7915, + "learning_rate": 8.719537929601476e-06, + "loss": 0.8752, "step": 19573 }, { - "epoch": 0.5554483541430193, + "epoch": 0.5546770948454193, "grad_norm": 0.0, - "learning_rate": 8.69405466947985e-06, - "loss": 0.8901, + "learning_rate": 8.718627700785545e-06, + "loss": 0.9283, "step": 19574 }, { - "epoch": 0.5554767309875142, + "epoch": 0.5547054322876818, "grad_norm": 0.0, - "learning_rate": 8.693143468040023e-06, - "loss": 0.8703, + "learning_rate": 8.717717482763092e-06, + "loss": 0.8569, "step": 19575 }, { - "epoch": 0.5555051078320091, + "epoch": 0.5547337697299441, "grad_norm": 0.0, - "learning_rate": 8.692232277639269e-06, - "loss": 0.8344, + "learning_rate": 8.716807275541778e-06, + "loss": 0.8287, "step": 19576 }, { - "epoch": 0.555533484676504, + "epoch": 0.5547621071722066, "grad_norm": 0.0, - "learning_rate": 8.69132109828529e-06, - "loss": 0.9222, + "learning_rate": 8.715897079129272e-06, + "loss": 0.8944, "step": 19577 }, { - "epoch": 0.5555618615209988, + "epoch": 0.5547904446144691, "grad_norm": 0.0, - "learning_rate": 8.690409929985781e-06, - "loss": 0.9062, + "learning_rate": 8.714986893533244e-06, + "loss": 0.8905, "step": 19578 }, { - "epoch": 0.5555902383654937, + "epoch": 0.5548187820567315, "grad_norm": 0.0, - "learning_rate": 8.689498772748436e-06, - "loss": 0.8453, + "learning_rate": 8.714076718761357e-06, + "loss": 0.8239, "step": 19579 }, { - "epoch": 0.5556186152099887, + "epoch": 0.554847119498994, "grad_norm": 0.0, - "learning_rate": 8.688587626580954e-06, - "loss": 0.8466, + "learning_rate": 8.713166554821277e-06, + "loss": 0.8618, "step": 19580 }, { - "epoch": 0.5556469920544835, + "epoch": 0.5548754569412565, "grad_norm": 0.0, - "learning_rate": 8.687676491491033e-06, - "loss": 0.8376, + "learning_rate": 8.71225640172068e-06, + "loss": 0.9694, "step": 19581 }, { - "epoch": 0.5556753688989784, + "epoch": 0.5549037943835189, "grad_norm": 0.0, - "learning_rate": 8.686765367486364e-06, - "loss": 0.7915, + "learning_rate": 8.71134625946722e-06, + "loss": 0.8875, "step": 19582 }, { - "epoch": 0.5557037457434734, + "epoch": 0.5549321318257814, "grad_norm": 0.0, - "learning_rate": 8.685854254574653e-06, - "loss": 0.9378, + "learning_rate": 8.710436128068572e-06, + "loss": 0.7554, "step": 19583 }, { - "epoch": 0.5557321225879682, + "epoch": 0.5549604692680439, "grad_norm": 0.0, - "learning_rate": 8.684943152763583e-06, - "loss": 0.8593, + "learning_rate": 8.709526007532396e-06, + "loss": 0.8511, "step": 19584 }, { - "epoch": 0.5557604994324631, + "epoch": 0.5549888067103064, "grad_norm": 0.0, - "learning_rate": 8.684032062060862e-06, - "loss": 0.9423, + "learning_rate": 8.708615897866363e-06, + "loss": 0.8599, "step": 19585 }, { - "epoch": 0.555788876276958, + "epoch": 0.5550171441525688, "grad_norm": 0.0, - "learning_rate": 8.683120982474181e-06, - "loss": 0.8212, + "learning_rate": 8.70770579907814e-06, + "loss": 0.8938, "step": 19586 }, { - "epoch": 0.5558172531214529, + "epoch": 0.5550454815948312, "grad_norm": 0.0, - "learning_rate": 8.682209914011233e-06, - "loss": 0.8889, + "learning_rate": 8.706795711175389e-06, + "loss": 0.9626, "step": 19587 }, { - "epoch": 0.5558456299659478, + "epoch": 0.5550738190370937, "grad_norm": 0.0, - "learning_rate": 8.681298856679721e-06, - "loss": 0.8146, + "learning_rate": 8.70588563416578e-06, + "loss": 0.8213, "step": 19588 }, { - "epoch": 0.5558740068104426, + "epoch": 0.5551021564793561, "grad_norm": 0.0, - "learning_rate": 8.680387810487337e-06, - "loss": 0.9177, + "learning_rate": 8.704975568056975e-06, + "loss": 0.8747, "step": 19589 }, { - "epoch": 0.5559023836549376, + "epoch": 0.5551304939216186, "grad_norm": 0.0, - "learning_rate": 8.679476775441773e-06, - "loss": 0.8908, + "learning_rate": 8.70406551285664e-06, + "loss": 0.7842, "step": 19590 }, { - "epoch": 0.5559307604994325, + "epoch": 0.5551588313638811, "grad_norm": 0.0, - "learning_rate": 8.67856575155073e-06, - "loss": 0.7923, + "learning_rate": 8.703155468572443e-06, + "loss": 0.9324, "step": 19591 }, { - "epoch": 0.5559591373439273, + "epoch": 0.5551871688061436, "grad_norm": 0.0, - "learning_rate": 8.677654738821904e-06, - "loss": 0.9255, + "learning_rate": 8.702245435212051e-06, + "loss": 0.8395, "step": 19592 }, { - "epoch": 0.5559875141884223, + "epoch": 0.555215506248406, "grad_norm": 0.0, - "learning_rate": 8.676743737262985e-06, - "loss": 0.7881, + "learning_rate": 8.701335412783124e-06, + "loss": 0.9778, "step": 19593 }, { - "epoch": 0.5560158910329172, + "epoch": 0.5552438436906685, "grad_norm": 0.0, - "learning_rate": 8.675832746881672e-06, - "loss": 0.9211, + "learning_rate": 8.700425401293338e-06, + "loss": 0.8368, "step": 19594 }, { - "epoch": 0.556044267877412, + "epoch": 0.555272181132931, "grad_norm": 0.0, - "learning_rate": 8.67492176768566e-06, - "loss": 0.8319, + "learning_rate": 8.699515400750345e-06, + "loss": 0.8688, "step": 19595 }, { - "epoch": 0.5560726447219069, + "epoch": 0.5553005185751934, "grad_norm": 0.0, - "learning_rate": 8.674010799682641e-06, - "loss": 0.8793, + "learning_rate": 8.69860541116182e-06, + "loss": 0.9688, "step": 19596 }, { - "epoch": 0.5561010215664018, + "epoch": 0.5553288560174559, "grad_norm": 0.0, - "learning_rate": 8.673099842880315e-06, - "loss": 0.8905, + "learning_rate": 8.697695432535424e-06, + "loss": 0.9474, "step": 19597 }, { - "epoch": 0.5561293984108967, + "epoch": 0.5553571934597183, "grad_norm": 0.0, - "learning_rate": 8.672188897286374e-06, - "loss": 0.7727, + "learning_rate": 8.69678546487882e-06, + "loss": 0.8326, "step": 19598 }, { - "epoch": 0.5561577752553916, + "epoch": 0.5553855309019808, "grad_norm": 0.0, - "learning_rate": 8.671277962908508e-06, - "loss": 0.8856, + "learning_rate": 8.695875508199683e-06, + "loss": 0.8218, "step": 19599 }, { - "epoch": 0.5561861520998865, + "epoch": 0.5554138683442432, "grad_norm": 0.0, - "learning_rate": 8.670367039754421e-06, - "loss": 0.8502, + "learning_rate": 8.694965562505664e-06, + "loss": 0.9697, "step": 19600 }, { - "epoch": 0.5562145289443814, + "epoch": 0.5554422057865057, "grad_norm": 0.0, - "learning_rate": 8.669456127831802e-06, - "loss": 0.9182, + "learning_rate": 8.694055627804438e-06, + "loss": 0.892, "step": 19601 }, { - "epoch": 0.5562429057888763, + "epoch": 0.5554705432287682, "grad_norm": 0.0, - "learning_rate": 8.66854522714835e-06, - "loss": 0.9554, + "learning_rate": 8.693145704103669e-06, + "loss": 0.7434, "step": 19602 }, { - "epoch": 0.5562712826333712, + "epoch": 0.5554988806710306, "grad_norm": 0.0, - "learning_rate": 8.667634337711754e-06, - "loss": 0.8701, + "learning_rate": 8.692235791411013e-06, + "loss": 0.9844, "step": 19603 }, { - "epoch": 0.5562996594778661, + "epoch": 0.5555272181132931, "grad_norm": 0.0, - "learning_rate": 8.66672345952971e-06, - "loss": 0.8095, + "learning_rate": 8.691325889734144e-06, + "loss": 0.8747, "step": 19604 }, { - "epoch": 0.5563280363223609, + "epoch": 0.5555555555555556, "grad_norm": 0.0, - "learning_rate": 8.665812592609914e-06, - "loss": 0.9363, + "learning_rate": 8.690415999080721e-06, + "loss": 0.8754, "step": 19605 }, { - "epoch": 0.5563564131668558, + "epoch": 0.555583892997818, "grad_norm": 0.0, - "learning_rate": 8.66490173696006e-06, - "loss": 0.9165, + "learning_rate": 8.68950611945841e-06, + "loss": 0.8607, "step": 19606 }, { - "epoch": 0.5563847900113508, + "epoch": 0.5556122304400805, "grad_norm": 0.0, - "learning_rate": 8.663990892587839e-06, - "loss": 0.7769, + "learning_rate": 8.688596250874882e-06, + "loss": 0.8925, "step": 19607 }, { - "epoch": 0.5564131668558456, + "epoch": 0.555640567882343, "grad_norm": 0.0, - "learning_rate": 8.66308005950095e-06, - "loss": 0.8594, + "learning_rate": 8.687686393337789e-06, + "loss": 0.9747, "step": 19608 }, { - "epoch": 0.5564415437003405, + "epoch": 0.5556689053246054, "grad_norm": 0.0, - "learning_rate": 8.662169237707083e-06, - "loss": 0.8712, + "learning_rate": 8.6867765468548e-06, + "loss": 0.7631, "step": 19609 }, { - "epoch": 0.5564699205448355, + "epoch": 0.5556972427668678, "grad_norm": 0.0, - "learning_rate": 8.66125842721393e-06, - "loss": 0.8195, + "learning_rate": 8.685866711433582e-06, + "loss": 0.8122, "step": 19610 }, { - "epoch": 0.5564982973893303, + "epoch": 0.5557255802091303, "grad_norm": 0.0, - "learning_rate": 8.660347628029188e-06, - "loss": 0.8619, + "learning_rate": 8.684956887081795e-06, + "loss": 0.7976, "step": 19611 }, { - "epoch": 0.5565266742338252, + "epoch": 0.5557539176513928, "grad_norm": 0.0, - "learning_rate": 8.659436840160553e-06, - "loss": 0.8636, + "learning_rate": 8.684047073807109e-06, + "loss": 0.7998, "step": 19612 }, { - "epoch": 0.55655505107832, + "epoch": 0.5557822550936552, "grad_norm": 0.0, - "learning_rate": 8.658526063615711e-06, - "loss": 0.7801, + "learning_rate": 8.683137271617179e-06, + "loss": 0.9053, "step": 19613 }, { - "epoch": 0.556583427922815, + "epoch": 0.5558105925359177, "grad_norm": 0.0, - "learning_rate": 8.657615298402363e-06, - "loss": 1.0146, + "learning_rate": 8.682227480519672e-06, + "loss": 0.8429, "step": 19614 }, { - "epoch": 0.5566118047673099, + "epoch": 0.5558389299781802, "grad_norm": 0.0, - "learning_rate": 8.656704544528192e-06, - "loss": 0.89, + "learning_rate": 8.681317700522257e-06, + "loss": 0.8831, "step": 19615 }, { - "epoch": 0.5566401816118047, + "epoch": 0.5558672674204427, "grad_norm": 0.0, - "learning_rate": 8.655793802000905e-06, - "loss": 0.8994, + "learning_rate": 8.680407931632589e-06, + "loss": 0.8401, "step": 19616 }, { - "epoch": 0.5566685584562997, + "epoch": 0.5558956048627051, "grad_norm": 0.0, - "learning_rate": 8.654883070828184e-06, - "loss": 0.9828, + "learning_rate": 8.679498173858335e-06, + "loss": 0.8808, "step": 19617 }, { - "epoch": 0.5566969353007946, + "epoch": 0.5559239423049676, "grad_norm": 0.0, - "learning_rate": 8.653972351017732e-06, - "loss": 0.8644, + "learning_rate": 8.67858842720716e-06, + "loss": 0.8077, "step": 19618 }, { - "epoch": 0.5567253121452894, + "epoch": 0.55595227974723, "grad_norm": 0.0, - "learning_rate": 8.653061642577232e-06, - "loss": 0.8209, + "learning_rate": 8.677678691686722e-06, + "loss": 0.8915, "step": 19619 }, { - "epoch": 0.5567536889897844, + "epoch": 0.5559806171894924, "grad_norm": 0.0, - "learning_rate": 8.652150945514381e-06, - "loss": 0.8776, + "learning_rate": 8.676768967304692e-06, + "loss": 0.8818, "step": 19620 }, { - "epoch": 0.5567820658342792, + "epoch": 0.5560089546317549, "grad_norm": 0.0, - "learning_rate": 8.651240259836873e-06, - "loss": 1.0142, + "learning_rate": 8.675859254068726e-06, + "loss": 0.7964, "step": 19621 }, { - "epoch": 0.5568104426787741, + "epoch": 0.5560372920740174, "grad_norm": 0.0, - "learning_rate": 8.6503295855524e-06, - "loss": 0.7823, + "learning_rate": 8.674949551986487e-06, + "loss": 0.9084, "step": 19622 }, { - "epoch": 0.556838819523269, + "epoch": 0.5560656295162799, "grad_norm": 0.0, - "learning_rate": 8.649418922668649e-06, - "loss": 0.8523, + "learning_rate": 8.674039861065644e-06, + "loss": 0.8459, "step": 19623 }, { - "epoch": 0.5568671963677639, + "epoch": 0.5560939669585423, "grad_norm": 0.0, - "learning_rate": 8.648508271193323e-06, - "loss": 0.8893, + "learning_rate": 8.673130181313852e-06, + "loss": 0.8803, "step": 19624 }, { - "epoch": 0.5568955732122588, + "epoch": 0.5561223044008048, "grad_norm": 0.0, - "learning_rate": 8.647597631134105e-06, - "loss": 0.8614, + "learning_rate": 8.672220512738783e-06, + "loss": 0.8605, "step": 19625 }, { - "epoch": 0.5569239500567537, + "epoch": 0.5561506418430673, "grad_norm": 0.0, - "learning_rate": 8.646687002498692e-06, - "loss": 0.9216, + "learning_rate": 8.671310855348089e-06, + "loss": 0.8434, "step": 19626 }, { - "epoch": 0.5569523269012486, + "epoch": 0.5561789792853297, "grad_norm": 0.0, - "learning_rate": 8.645776385294773e-06, - "loss": 0.8928, + "learning_rate": 8.670401209149435e-06, + "loss": 0.8798, "step": 19627 }, { - "epoch": 0.5569807037457435, + "epoch": 0.5562073167275922, "grad_norm": 0.0, - "learning_rate": 8.644865779530042e-06, - "loss": 0.9486, + "learning_rate": 8.669491574150493e-06, + "loss": 0.8827, "step": 19628 }, { - "epoch": 0.5570090805902383, + "epoch": 0.5562356541698547, "grad_norm": 0.0, - "learning_rate": 8.643955185212195e-06, - "loss": 0.9393, + "learning_rate": 8.66858195035891e-06, + "loss": 0.8832, "step": 19629 }, { - "epoch": 0.5570374574347332, + "epoch": 0.556263991612117, "grad_norm": 0.0, - "learning_rate": 8.64304460234891e-06, - "loss": 0.9255, + "learning_rate": 8.667672337782359e-06, + "loss": 0.8323, "step": 19630 }, { - "epoch": 0.5570658342792282, + "epoch": 0.5562923290543795, "grad_norm": 0.0, - "learning_rate": 8.642134030947893e-06, - "loss": 0.8802, + "learning_rate": 8.666762736428497e-06, + "loss": 0.8152, "step": 19631 }, { - "epoch": 0.557094211123723, + "epoch": 0.556320666496642, "grad_norm": 0.0, - "learning_rate": 8.641223471016834e-06, - "loss": 0.8992, + "learning_rate": 8.665853146304988e-06, + "loss": 0.8896, "step": 19632 }, { - "epoch": 0.5571225879682179, + "epoch": 0.5563490039389045, "grad_norm": 0.0, - "learning_rate": 8.64031292256342e-06, - "loss": 0.9656, + "learning_rate": 8.664943567419497e-06, + "loss": 0.8505, "step": 19633 }, { - "epoch": 0.5571509648127129, + "epoch": 0.5563773413811669, "grad_norm": 0.0, - "learning_rate": 8.639402385595342e-06, - "loss": 0.8546, + "learning_rate": 8.664033999779677e-06, + "loss": 0.9211, "step": 19634 }, { - "epoch": 0.5571793416572077, + "epoch": 0.5564056788234294, "grad_norm": 0.0, - "learning_rate": 8.638491860120295e-06, - "loss": 0.8018, + "learning_rate": 8.663124443393195e-06, + "loss": 0.9059, "step": 19635 }, { - "epoch": 0.5572077185017026, + "epoch": 0.5564340162656919, "grad_norm": 0.0, - "learning_rate": 8.637581346145968e-06, - "loss": 0.9366, + "learning_rate": 8.662214898267715e-06, + "loss": 0.9011, "step": 19636 }, { - "epoch": 0.5572360953461976, + "epoch": 0.5564623537079543, "grad_norm": 0.0, - "learning_rate": 8.636670843680051e-06, - "loss": 0.7515, + "learning_rate": 8.661305364410894e-06, + "loss": 0.9471, "step": 19637 }, { - "epoch": 0.5572644721906924, + "epoch": 0.5564906911502168, "grad_norm": 0.0, - "learning_rate": 8.635760352730239e-06, - "loss": 0.8666, + "learning_rate": 8.660395841830395e-06, + "loss": 0.8143, "step": 19638 }, { - "epoch": 0.5572928490351873, + "epoch": 0.5565190285924793, "grad_norm": 0.0, - "learning_rate": 8.634849873304221e-06, - "loss": 0.8746, + "learning_rate": 8.659486330533883e-06, + "loss": 0.8693, "step": 19639 }, { - "epoch": 0.5573212258796821, + "epoch": 0.5565473660347418, "grad_norm": 0.0, - "learning_rate": 8.633939405409685e-06, - "loss": 0.767, + "learning_rate": 8.658576830529011e-06, + "loss": 0.8837, "step": 19640 }, { - "epoch": 0.5573496027241771, + "epoch": 0.5565757034770041, "grad_norm": 0.0, - "learning_rate": 8.633028949054325e-06, - "loss": 0.7549, + "learning_rate": 8.657667341823449e-06, + "loss": 0.9102, "step": 19641 }, { - "epoch": 0.557377979568672, + "epoch": 0.5566040409192666, "grad_norm": 0.0, - "learning_rate": 8.632118504245833e-06, - "loss": 0.9644, + "learning_rate": 8.656757864424848e-06, + "loss": 0.802, "step": 19642 }, { - "epoch": 0.5574063564131668, + "epoch": 0.5566323783615291, "grad_norm": 0.0, - "learning_rate": 8.631208070991896e-06, - "loss": 0.7953, + "learning_rate": 8.655848398340876e-06, + "loss": 0.8761, "step": 19643 }, { - "epoch": 0.5574347332576618, + "epoch": 0.5566607158037915, "grad_norm": 0.0, - "learning_rate": 8.630297649300205e-06, - "loss": 0.9016, + "learning_rate": 8.654938943579194e-06, + "loss": 0.8623, "step": 19644 }, { - "epoch": 0.5574631101021567, + "epoch": 0.556689053246054, "grad_norm": 0.0, - "learning_rate": 8.62938723917845e-06, - "loss": 0.8247, + "learning_rate": 8.654029500147458e-06, + "loss": 0.8741, "step": 19645 }, { - "epoch": 0.5574914869466515, + "epoch": 0.5567173906883165, "grad_norm": 0.0, - "learning_rate": 8.628476840634326e-06, - "loss": 0.9053, + "learning_rate": 8.653120068053336e-06, + "loss": 0.7093, "step": 19646 }, { - "epoch": 0.5575198637911464, + "epoch": 0.556745728130579, "grad_norm": 0.0, - "learning_rate": 8.62756645367552e-06, - "loss": 0.8666, + "learning_rate": 8.65221064730448e-06, + "loss": 0.8995, "step": 19647 }, { - "epoch": 0.5575482406356413, + "epoch": 0.5567740655728414, "grad_norm": 0.0, - "learning_rate": 8.626656078309718e-06, - "loss": 0.9041, + "learning_rate": 8.651301237908552e-06, + "loss": 0.8705, "step": 19648 }, { - "epoch": 0.5575766174801362, + "epoch": 0.5568024030151039, "grad_norm": 0.0, - "learning_rate": 8.625745714544618e-06, - "loss": 1.0588, + "learning_rate": 8.65039183987322e-06, + "loss": 0.9645, "step": 19649 }, { - "epoch": 0.5576049943246311, + "epoch": 0.5568307404573664, "grad_norm": 0.0, - "learning_rate": 8.624835362387905e-06, - "loss": 0.947, + "learning_rate": 8.649482453206134e-06, + "loss": 0.8955, "step": 19650 }, { - "epoch": 0.557633371169126, + "epoch": 0.5568590778996287, "grad_norm": 0.0, - "learning_rate": 8.623925021847266e-06, - "loss": 0.8692, + "learning_rate": 8.64857307791496e-06, + "loss": 0.8614, "step": 19651 }, { - "epoch": 0.5576617480136209, + "epoch": 0.5568874153418912, "grad_norm": 0.0, - "learning_rate": 8.623014692930399e-06, - "loss": 0.8179, + "learning_rate": 8.64766371400736e-06, + "loss": 0.9167, "step": 19652 }, { - "epoch": 0.5576901248581158, + "epoch": 0.5569157527841537, "grad_norm": 0.0, - "learning_rate": 8.622104375644986e-06, - "loss": 0.8592, + "learning_rate": 8.646754361490988e-06, + "loss": 0.8075, "step": 19653 }, { - "epoch": 0.5577185017026107, + "epoch": 0.5569440902264161, "grad_norm": 0.0, - "learning_rate": 8.621194069998718e-06, - "loss": 0.7654, + "learning_rate": 8.645845020373508e-06, + "loss": 0.8748, "step": 19654 }, { - "epoch": 0.5577468785471056, + "epoch": 0.5569724276686786, "grad_norm": 0.0, - "learning_rate": 8.62028377599929e-06, - "loss": 0.8228, + "learning_rate": 8.644935690662578e-06, + "loss": 0.8225, "step": 19655 }, { - "epoch": 0.5577752553916004, + "epoch": 0.5570007651109411, "grad_norm": 0.0, - "learning_rate": 8.619373493654383e-06, - "loss": 0.8874, + "learning_rate": 8.644026372365855e-06, + "loss": 0.8781, "step": 19656 }, { - "epoch": 0.5578036322360953, + "epoch": 0.5570291025532036, "grad_norm": 0.0, - "learning_rate": 8.618463222971692e-06, - "loss": 0.9047, + "learning_rate": 8.643117065491005e-06, + "loss": 0.8263, "step": 19657 }, { - "epoch": 0.5578320090805903, + "epoch": 0.557057439995466, "grad_norm": 0.0, - "learning_rate": 8.617552963958904e-06, - "loss": 0.9363, + "learning_rate": 8.64220777004568e-06, + "loss": 0.855, "step": 19658 }, { - "epoch": 0.5578603859250851, + "epoch": 0.5570857774377285, "grad_norm": 0.0, - "learning_rate": 8.616642716623709e-06, - "loss": 0.877, + "learning_rate": 8.641298486037543e-06, + "loss": 0.8974, "step": 19659 }, { - "epoch": 0.55788876276958, + "epoch": 0.557114114879991, "grad_norm": 0.0, - "learning_rate": 8.615732480973794e-06, - "loss": 0.9332, + "learning_rate": 8.640389213474259e-06, + "loss": 0.9587, "step": 19660 }, { - "epoch": 0.557917139614075, + "epoch": 0.5571424523222533, "grad_norm": 0.0, - "learning_rate": 8.614822257016845e-06, - "loss": 0.8665, + "learning_rate": 8.639479952363478e-06, + "loss": 0.8653, "step": 19661 }, { - "epoch": 0.5579455164585698, + "epoch": 0.5571707897645158, "grad_norm": 0.0, - "learning_rate": 8.61391204476056e-06, - "loss": 0.785, + "learning_rate": 8.638570702712863e-06, + "loss": 0.7566, "step": 19662 }, { - "epoch": 0.5579738933030647, + "epoch": 0.5571991272067783, "grad_norm": 0.0, - "learning_rate": 8.613001844212622e-06, - "loss": 0.832, + "learning_rate": 8.637661464530072e-06, + "loss": 0.8612, "step": 19663 }, { - "epoch": 0.5580022701475595, + "epoch": 0.5572274646490408, "grad_norm": 0.0, - "learning_rate": 8.612091655380718e-06, - "loss": 0.894, + "learning_rate": 8.636752237822762e-06, + "loss": 0.8454, "step": 19664 }, { - "epoch": 0.5580306469920545, + "epoch": 0.5572558020913032, "grad_norm": 0.0, - "learning_rate": 8.611181478272539e-06, - "loss": 0.868, + "learning_rate": 8.6358430225986e-06, + "loss": 0.8791, "step": 19665 }, { - "epoch": 0.5580590238365494, + "epoch": 0.5572841395335657, "grad_norm": 0.0, - "learning_rate": 8.610271312895773e-06, - "loss": 0.7769, + "learning_rate": 8.634933818865235e-06, + "loss": 0.9322, "step": 19666 }, { - "epoch": 0.5580874006810442, + "epoch": 0.5573124769758282, "grad_norm": 0.0, - "learning_rate": 8.609361159258109e-06, - "loss": 1.0015, + "learning_rate": 8.634024626630329e-06, + "loss": 0.8297, "step": 19667 }, { - "epoch": 0.5581157775255392, + "epoch": 0.5573408144180906, "grad_norm": 0.0, - "learning_rate": 8.608451017367231e-06, - "loss": 0.881, + "learning_rate": 8.633115445901545e-06, + "loss": 0.8479, "step": 19668 }, { - "epoch": 0.5581441543700341, + "epoch": 0.5573691518603531, "grad_norm": 0.0, - "learning_rate": 8.607540887230832e-06, - "loss": 0.83, + "learning_rate": 8.632206276686533e-06, + "loss": 0.8947, "step": 19669 }, { - "epoch": 0.5581725312145289, + "epoch": 0.5573974893026156, "grad_norm": 0.0, - "learning_rate": 8.606630768856596e-06, - "loss": 0.9262, + "learning_rate": 8.631297118992957e-06, + "loss": 0.8743, "step": 19670 }, { - "epoch": 0.5582009080590238, + "epoch": 0.5574258267448781, "grad_norm": 0.0, - "learning_rate": 8.605720662252213e-06, - "loss": 0.8589, + "learning_rate": 8.630387972828472e-06, + "loss": 0.9894, "step": 19671 }, { - "epoch": 0.5582292849035188, + "epoch": 0.5574541641871404, "grad_norm": 0.0, - "learning_rate": 8.60481056742537e-06, - "loss": 0.8919, + "learning_rate": 8.629478838200737e-06, + "loss": 0.7568, "step": 19672 }, { - "epoch": 0.5582576617480136, + "epoch": 0.5574825016294029, "grad_norm": 0.0, - "learning_rate": 8.603900484383757e-06, - "loss": 0.8763, + "learning_rate": 8.628569715117416e-06, + "loss": 0.9399, "step": 19673 }, { - "epoch": 0.5582860385925085, + "epoch": 0.5575108390716654, "grad_norm": 0.0, - "learning_rate": 8.602990413135056e-06, - "loss": 0.8905, + "learning_rate": 8.627660603586157e-06, + "loss": 0.8499, "step": 19674 }, { - "epoch": 0.5583144154370034, + "epoch": 0.5575391765139278, "grad_norm": 0.0, - "learning_rate": 8.602080353686961e-06, - "loss": 0.9446, + "learning_rate": 8.626751503614624e-06, + "loss": 0.8653, "step": 19675 }, { - "epoch": 0.5583427922814983, + "epoch": 0.5575675139561903, "grad_norm": 0.0, - "learning_rate": 8.60117030604715e-06, - "loss": 0.8485, + "learning_rate": 8.625842415210471e-06, + "loss": 0.9237, "step": 19676 }, { - "epoch": 0.5583711691259932, + "epoch": 0.5575958513984528, "grad_norm": 0.0, - "learning_rate": 8.600260270223322e-06, - "loss": 0.7754, + "learning_rate": 8.624933338381358e-06, + "loss": 0.8422, "step": 19677 }, { - "epoch": 0.5583995459704881, + "epoch": 0.5576241888407152, "grad_norm": 0.0, - "learning_rate": 8.599350246223156e-06, - "loss": 0.8344, + "learning_rate": 8.624024273134947e-06, + "loss": 0.8455, "step": 19678 }, { - "epoch": 0.558427922814983, + "epoch": 0.5576525262829777, "grad_norm": 0.0, - "learning_rate": 8.598440234054345e-06, - "loss": 0.8897, + "learning_rate": 8.623115219478884e-06, + "loss": 0.9776, "step": 19679 }, { - "epoch": 0.5584562996594779, + "epoch": 0.5576808637252402, "grad_norm": 0.0, - "learning_rate": 8.59753023372457e-06, - "loss": 0.9573, + "learning_rate": 8.622206177420836e-06, + "loss": 0.7797, "step": 19680 }, { - "epoch": 0.5584846765039727, + "epoch": 0.5577092011675027, "grad_norm": 0.0, - "learning_rate": 8.596620245241518e-06, - "loss": 0.8648, + "learning_rate": 8.62129714696846e-06, + "loss": 0.7962, "step": 19681 }, { - "epoch": 0.5585130533484677, + "epoch": 0.557737538609765, "grad_norm": 0.0, - "learning_rate": 8.595710268612881e-06, - "loss": 0.9361, + "learning_rate": 8.620388128129404e-06, + "loss": 0.8054, "step": 19682 }, { - "epoch": 0.5585414301929625, + "epoch": 0.5577658760520275, "grad_norm": 0.0, - "learning_rate": 8.594800303846342e-06, - "loss": 0.8633, + "learning_rate": 8.619479120911334e-06, + "loss": 0.8758, "step": 19683 }, { - "epoch": 0.5585698070374574, + "epoch": 0.55779421349429, "grad_norm": 0.0, - "learning_rate": 8.593890350949587e-06, - "loss": 0.9591, + "learning_rate": 8.618570125321903e-06, + "loss": 0.9296, "step": 19684 }, { - "epoch": 0.5585981838819524, + "epoch": 0.5578225509365524, "grad_norm": 0.0, - "learning_rate": 8.592980409930305e-06, - "loss": 0.778, + "learning_rate": 8.617661141368768e-06, + "loss": 0.8507, "step": 19685 }, { - "epoch": 0.5586265607264472, + "epoch": 0.5578508883788149, "grad_norm": 0.0, - "learning_rate": 8.59207048079618e-06, - "loss": 0.7578, + "learning_rate": 8.616752169059591e-06, + "loss": 0.7974, "step": 19686 }, { - "epoch": 0.5586549375709421, + "epoch": 0.5578792258210774, "grad_norm": 0.0, - "learning_rate": 8.591160563554898e-06, - "loss": 0.8404, + "learning_rate": 8.615843208402019e-06, + "loss": 0.8764, "step": 19687 }, { - "epoch": 0.558683314415437, + "epoch": 0.5579075632633399, "grad_norm": 0.0, - "learning_rate": 8.590250658214148e-06, - "loss": 0.9672, + "learning_rate": 8.614934259403716e-06, + "loss": 0.9954, "step": 19688 }, { - "epoch": 0.5587116912599319, + "epoch": 0.5579359007056023, "grad_norm": 0.0, - "learning_rate": 8.589340764781613e-06, - "loss": 0.8436, + "learning_rate": 8.614025322072338e-06, + "loss": 0.8922, "step": 19689 }, { - "epoch": 0.5587400681044268, + "epoch": 0.5579642381478648, "grad_norm": 0.0, - "learning_rate": 8.588430883264977e-06, - "loss": 0.9593, + "learning_rate": 8.613116396415534e-06, + "loss": 0.9474, "step": 19690 }, { - "epoch": 0.5587684449489216, + "epoch": 0.5579925755901273, "grad_norm": 0.0, - "learning_rate": 8.587521013671931e-06, - "loss": 0.9273, + "learning_rate": 8.612207482440972e-06, + "loss": 0.8733, "step": 19691 }, { - "epoch": 0.5587968217934166, + "epoch": 0.5580209130323897, "grad_norm": 0.0, - "learning_rate": 8.586611156010159e-06, - "loss": 0.8643, + "learning_rate": 8.611298580156297e-06, + "loss": 0.7581, "step": 19692 }, { - "epoch": 0.5588251986379115, + "epoch": 0.5580492504746521, "grad_norm": 0.0, - "learning_rate": 8.585701310287346e-06, - "loss": 0.9116, + "learning_rate": 8.610389689569171e-06, + "loss": 0.7532, "step": 19693 }, { - "epoch": 0.5588535754824063, + "epoch": 0.5580775879169146, "grad_norm": 0.0, - "learning_rate": 8.584791476511178e-06, - "loss": 0.7449, + "learning_rate": 8.60948081068725e-06, + "loss": 0.866, "step": 19694 }, { - "epoch": 0.5588819523269013, + "epoch": 0.5581059253591771, "grad_norm": 0.0, - "learning_rate": 8.583881654689338e-06, - "loss": 0.9044, + "learning_rate": 8.608571943518187e-06, + "loss": 0.7892, "step": 19695 }, { - "epoch": 0.5589103291713962, + "epoch": 0.5581342628014395, "grad_norm": 0.0, - "learning_rate": 8.582971844829518e-06, - "loss": 0.9501, + "learning_rate": 8.607663088069639e-06, + "loss": 0.8737, "step": 19696 }, { - "epoch": 0.558938706015891, + "epoch": 0.558162600243702, "grad_norm": 0.0, - "learning_rate": 8.582062046939396e-06, - "loss": 0.8102, + "learning_rate": 8.606754244349264e-06, + "loss": 0.91, "step": 19697 }, { - "epoch": 0.5589670828603859, + "epoch": 0.5581909376859645, "grad_norm": 0.0, - "learning_rate": 8.581152261026657e-06, - "loss": 0.8457, + "learning_rate": 8.605845412364712e-06, + "loss": 0.7344, "step": 19698 }, { - "epoch": 0.5589954597048808, + "epoch": 0.5582192751282269, "grad_norm": 0.0, - "learning_rate": 8.580242487098993e-06, - "loss": 0.9282, + "learning_rate": 8.604936592123647e-06, + "loss": 0.8845, "step": 19699 }, { - "epoch": 0.5590238365493757, + "epoch": 0.5582476125704894, "grad_norm": 0.0, - "learning_rate": 8.579332725164081e-06, - "loss": 0.9496, + "learning_rate": 8.604027783633713e-06, + "loss": 0.8664, "step": 19700 }, { - "epoch": 0.5590522133938706, + "epoch": 0.5582759500127519, "grad_norm": 0.0, - "learning_rate": 8.57842297522961e-06, - "loss": 0.8716, + "learning_rate": 8.603118986902574e-06, + "loss": 0.8678, "step": 19701 }, { - "epoch": 0.5590805902383655, + "epoch": 0.5583042874550143, "grad_norm": 0.0, - "learning_rate": 8.577513237303266e-06, - "loss": 0.7608, + "learning_rate": 8.602210201937884e-06, + "loss": 0.9076, "step": 19702 }, { - "epoch": 0.5591089670828604, + "epoch": 0.5583326248972768, "grad_norm": 0.0, - "learning_rate": 8.576603511392732e-06, - "loss": 0.8238, + "learning_rate": 8.601301428747293e-06, + "loss": 0.862, "step": 19703 }, { - "epoch": 0.5591373439273553, + "epoch": 0.5583609623395392, "grad_norm": 0.0, - "learning_rate": 8.575693797505688e-06, - "loss": 0.9429, + "learning_rate": 8.600392667338465e-06, + "loss": 0.8092, "step": 19704 }, { - "epoch": 0.5591657207718501, + "epoch": 0.5583892997818017, "grad_norm": 0.0, - "learning_rate": 8.574784095649826e-06, - "loss": 0.9139, + "learning_rate": 8.599483917719044e-06, + "loss": 0.853, "step": 19705 }, { - "epoch": 0.5591940976163451, + "epoch": 0.5584176372240641, "grad_norm": 0.0, - "learning_rate": 8.573874405832828e-06, - "loss": 0.9466, + "learning_rate": 8.59857517989669e-06, + "loss": 0.9447, "step": 19706 }, { - "epoch": 0.55922247446084, + "epoch": 0.5584459746663266, "grad_norm": 0.0, - "learning_rate": 8.57296472806237e-06, - "loss": 0.8595, + "learning_rate": 8.597666453879062e-06, + "loss": 0.9642, "step": 19707 }, { - "epoch": 0.5592508513053348, + "epoch": 0.5584743121085891, "grad_norm": 0.0, - "learning_rate": 8.572055062346149e-06, - "loss": 0.8753, + "learning_rate": 8.596757739673806e-06, + "loss": 0.8963, "step": 19708 }, { - "epoch": 0.5592792281498298, + "epoch": 0.5585026495508515, "grad_norm": 0.0, - "learning_rate": 8.571145408691842e-06, - "loss": 0.891, + "learning_rate": 8.595849037288581e-06, + "loss": 0.8751, "step": 19709 }, { - "epoch": 0.5593076049943246, + "epoch": 0.558530986993114, "grad_norm": 0.0, - "learning_rate": 8.570235767107134e-06, - "loss": 0.993, + "learning_rate": 8.594940346731047e-06, + "loss": 0.9431, "step": 19710 }, { - "epoch": 0.5593359818388195, + "epoch": 0.5585593244353765, "grad_norm": 0.0, - "learning_rate": 8.56932613759971e-06, - "loss": 0.8449, + "learning_rate": 8.594031668008845e-06, + "loss": 0.9643, "step": 19711 }, { - "epoch": 0.5593643586833145, + "epoch": 0.558587661877639, "grad_norm": 0.0, - "learning_rate": 8.568416520177249e-06, - "loss": 0.9619, + "learning_rate": 8.593123001129642e-06, + "loss": 1.0517, "step": 19712 }, { - "epoch": 0.5593927355278093, + "epoch": 0.5586159993199014, "grad_norm": 0.0, - "learning_rate": 8.56750691484744e-06, - "loss": 0.9083, + "learning_rate": 8.592214346101083e-06, + "loss": 0.7392, "step": 19713 }, { - "epoch": 0.5594211123723042, + "epoch": 0.5586443367621639, "grad_norm": 0.0, - "learning_rate": 8.566597321617967e-06, - "loss": 0.7984, + "learning_rate": 8.591305702930824e-06, + "loss": 0.9266, "step": 19714 }, { - "epoch": 0.559449489216799, + "epoch": 0.5586726742044263, "grad_norm": 0.0, - "learning_rate": 8.565687740496507e-06, - "loss": 0.9401, + "learning_rate": 8.590397071626522e-06, + "loss": 0.8799, "step": 19715 }, { - "epoch": 0.559477866061294, + "epoch": 0.5587010116466887, "grad_norm": 0.0, - "learning_rate": 8.564778171490749e-06, - "loss": 0.8146, + "learning_rate": 8.589488452195829e-06, + "loss": 0.88, "step": 19716 }, { - "epoch": 0.5595062429057889, + "epoch": 0.5587293490889512, "grad_norm": 0.0, - "learning_rate": 8.563868614608376e-06, - "loss": 0.8837, + "learning_rate": 8.588579844646397e-06, + "loss": 0.9504, "step": 19717 }, { - "epoch": 0.5595346197502837, + "epoch": 0.5587576865312137, "grad_norm": 0.0, - "learning_rate": 8.562959069857064e-06, - "loss": 0.7965, + "learning_rate": 8.587671248985885e-06, + "loss": 0.8549, "step": 19718 }, { - "epoch": 0.5595629965947787, + "epoch": 0.5587860239734762, "grad_norm": 0.0, - "learning_rate": 8.562049537244505e-06, - "loss": 0.8958, + "learning_rate": 8.58676266522194e-06, + "loss": 0.8775, "step": 19719 }, { - "epoch": 0.5595913734392736, + "epoch": 0.5588143614157386, "grad_norm": 0.0, - "learning_rate": 8.56114001677838e-06, - "loss": 0.7689, + "learning_rate": 8.585854093362219e-06, + "loss": 0.9792, "step": 19720 }, { - "epoch": 0.5596197502837684, + "epoch": 0.5588426988580011, "grad_norm": 0.0, - "learning_rate": 8.560230508466367e-06, - "loss": 0.8762, + "learning_rate": 8.58494553341437e-06, + "loss": 0.8903, "step": 19721 }, { - "epoch": 0.5596481271282633, + "epoch": 0.5588710363002636, "grad_norm": 0.0, - "learning_rate": 8.559321012316149e-06, - "loss": 0.8172, + "learning_rate": 8.584036985386053e-06, + "loss": 0.7887, "step": 19722 }, { - "epoch": 0.5596765039727583, + "epoch": 0.558899373742526, "grad_norm": 0.0, - "learning_rate": 8.558411528335414e-06, - "loss": 0.918, + "learning_rate": 8.583128449284921e-06, + "loss": 0.9289, "step": 19723 }, { - "epoch": 0.5597048808172531, + "epoch": 0.5589277111847885, "grad_norm": 0.0, - "learning_rate": 8.557502056531844e-06, - "loss": 0.8676, + "learning_rate": 8.58221992511862e-06, + "loss": 0.8874, "step": 19724 }, { - "epoch": 0.559733257661748, + "epoch": 0.558956048627051, "grad_norm": 0.0, - "learning_rate": 8.556592596913115e-06, - "loss": 0.8188, + "learning_rate": 8.581311412894811e-06, + "loss": 0.8046, "step": 19725 }, { - "epoch": 0.5597616345062429, + "epoch": 0.5589843860693133, "grad_norm": 0.0, - "learning_rate": 8.555683149486916e-06, - "loss": 0.8626, + "learning_rate": 8.58040291262114e-06, + "loss": 0.9169, "step": 19726 }, { - "epoch": 0.5597900113507378, + "epoch": 0.5590127235115758, "grad_norm": 0.0, - "learning_rate": 8.554773714260927e-06, - "loss": 0.8607, + "learning_rate": 8.579494424305261e-06, + "loss": 0.795, "step": 19727 }, { - "epoch": 0.5598183881952327, + "epoch": 0.5590410609538383, "grad_norm": 0.0, - "learning_rate": 8.553864291242827e-06, - "loss": 0.8007, + "learning_rate": 8.578585947954831e-06, + "loss": 0.9054, "step": 19728 }, { - "epoch": 0.5598467650397276, + "epoch": 0.5590693983961008, "grad_norm": 0.0, - "learning_rate": 8.552954880440303e-06, - "loss": 0.8333, + "learning_rate": 8.577677483577498e-06, + "loss": 0.8096, "step": 19729 }, { - "epoch": 0.5598751418842225, + "epoch": 0.5590977358383632, "grad_norm": 0.0, - "learning_rate": 8.552045481861033e-06, - "loss": 0.8572, + "learning_rate": 8.576769031180913e-06, + "loss": 0.8237, "step": 19730 }, { - "epoch": 0.5599035187287174, + "epoch": 0.5591260732806257, "grad_norm": 0.0, - "learning_rate": 8.551136095512698e-06, - "loss": 0.9197, + "learning_rate": 8.575860590772737e-06, + "loss": 0.855, "step": 19731 }, { - "epoch": 0.5599318955732122, + "epoch": 0.5591544107228882, "grad_norm": 0.0, - "learning_rate": 8.550226721402985e-06, - "loss": 0.9348, + "learning_rate": 8.574952162360612e-06, + "loss": 0.9134, "step": 19732 }, { - "epoch": 0.5599602724177072, + "epoch": 0.5591827481651506, "grad_norm": 0.0, - "learning_rate": 8.549317359539571e-06, - "loss": 0.8363, + "learning_rate": 8.574043745952196e-06, + "loss": 0.9199, "step": 19733 }, { - "epoch": 0.559988649262202, + "epoch": 0.5592110856074131, "grad_norm": 0.0, - "learning_rate": 8.548408009930138e-06, - "loss": 0.8768, + "learning_rate": 8.573135341555138e-06, + "loss": 0.8288, "step": 19734 }, { - "epoch": 0.5600170261066969, + "epoch": 0.5592394230496756, "grad_norm": 0.0, - "learning_rate": 8.547498672582367e-06, - "loss": 0.8532, + "learning_rate": 8.57222694917709e-06, + "loss": 0.9725, "step": 19735 }, { - "epoch": 0.5600454029511919, + "epoch": 0.559267760491938, "grad_norm": 0.0, - "learning_rate": 8.546589347503939e-06, - "loss": 0.8808, + "learning_rate": 8.571318568825709e-06, + "loss": 0.9464, "step": 19736 }, { - "epoch": 0.5600737797956867, + "epoch": 0.5592960979342004, "grad_norm": 0.0, - "learning_rate": 8.545680034702535e-06, - "loss": 0.8121, + "learning_rate": 8.570410200508637e-06, + "loss": 0.8083, "step": 19737 }, { - "epoch": 0.5601021566401816, + "epoch": 0.5593244353764629, "grad_norm": 0.0, - "learning_rate": 8.54477073418584e-06, - "loss": 0.8786, + "learning_rate": 8.569501844233531e-06, + "loss": 0.8778, "step": 19738 }, { - "epoch": 0.5601305334846765, + "epoch": 0.5593527728187254, "grad_norm": 0.0, - "learning_rate": 8.543861445961532e-06, - "loss": 0.9344, + "learning_rate": 8.568593500008047e-06, + "loss": 0.9805, "step": 19739 }, { - "epoch": 0.5601589103291714, + "epoch": 0.5593811102609878, "grad_norm": 0.0, - "learning_rate": 8.542952170037292e-06, - "loss": 0.8518, + "learning_rate": 8.567685167839827e-06, + "loss": 0.9217, "step": 19740 }, { - "epoch": 0.5601872871736663, + "epoch": 0.5594094477032503, "grad_norm": 0.0, - "learning_rate": 8.5420429064208e-06, - "loss": 0.7996, + "learning_rate": 8.566776847736528e-06, + "loss": 0.9224, "step": 19741 }, { - "epoch": 0.5602156640181611, + "epoch": 0.5594377851455128, "grad_norm": 0.0, - "learning_rate": 8.541133655119736e-06, - "loss": 0.8873, + "learning_rate": 8.5658685397058e-06, + "loss": 0.9254, "step": 19742 }, { - "epoch": 0.5602440408626561, + "epoch": 0.5594661225877752, "grad_norm": 0.0, - "learning_rate": 8.540224416141784e-06, - "loss": 0.8652, + "learning_rate": 8.564960243755292e-06, + "loss": 0.8292, "step": 19743 }, { - "epoch": 0.560272417707151, + "epoch": 0.5594944600300377, "grad_norm": 0.0, - "learning_rate": 8.539315189494622e-06, - "loss": 0.801, + "learning_rate": 8.564051959892662e-06, + "loss": 0.9016, "step": 19744 }, { - "epoch": 0.5603007945516458, + "epoch": 0.5595227974723002, "grad_norm": 0.0, - "learning_rate": 8.538405975185927e-06, - "loss": 0.9385, + "learning_rate": 8.56314368812555e-06, + "loss": 0.8398, "step": 19745 }, { - "epoch": 0.5603291713961408, + "epoch": 0.5595511349145627, "grad_norm": 0.0, - "learning_rate": 8.537496773223386e-06, - "loss": 0.8461, + "learning_rate": 8.562235428461614e-06, + "loss": 0.931, "step": 19746 }, { - "epoch": 0.5603575482406357, + "epoch": 0.559579472356825, "grad_norm": 0.0, - "learning_rate": 8.536587583614674e-06, - "loss": 0.9406, + "learning_rate": 8.561327180908503e-06, + "loss": 0.9113, "step": 19747 }, { - "epoch": 0.5603859250851305, + "epoch": 0.5596078097990875, "grad_norm": 0.0, - "learning_rate": 8.535678406367472e-06, - "loss": 0.8384, + "learning_rate": 8.560418945473866e-06, + "loss": 0.9031, "step": 19748 }, { - "epoch": 0.5604143019296254, + "epoch": 0.55963614724135, "grad_norm": 0.0, - "learning_rate": 8.53476924148946e-06, - "loss": 0.7628, + "learning_rate": 8.55951072216536e-06, + "loss": 0.829, "step": 19749 }, { - "epoch": 0.5604426787741204, + "epoch": 0.5596644846836124, "grad_norm": 0.0, - "learning_rate": 8.533860088988321e-06, - "loss": 0.8678, + "learning_rate": 8.558602510990625e-06, + "loss": 0.8037, "step": 19750 }, { - "epoch": 0.5604710556186152, + "epoch": 0.5596928221258749, "grad_norm": 0.0, - "learning_rate": 8.532950948871728e-06, - "loss": 0.8736, + "learning_rate": 8.557694311957316e-06, + "loss": 0.8379, "step": 19751 }, { - "epoch": 0.5604994324631101, + "epoch": 0.5597211595681374, "grad_norm": 0.0, - "learning_rate": 8.532041821147366e-06, - "loss": 0.9065, + "learning_rate": 8.556786125073089e-06, + "loss": 0.8511, "step": 19752 }, { - "epoch": 0.560527809307605, + "epoch": 0.5597494970103999, "grad_norm": 0.0, - "learning_rate": 8.531132705822908e-06, - "loss": 0.8091, + "learning_rate": 8.555877950345584e-06, + "loss": 0.9447, "step": 19753 }, { - "epoch": 0.5605561861520999, + "epoch": 0.5597778344526623, "grad_norm": 0.0, - "learning_rate": 8.530223602906044e-06, - "loss": 0.7849, + "learning_rate": 8.554969787782456e-06, + "loss": 1.0066, "step": 19754 }, { - "epoch": 0.5605845629965948, + "epoch": 0.5598061718949248, "grad_norm": 0.0, - "learning_rate": 8.529314512404448e-06, - "loss": 0.9231, + "learning_rate": 8.554061637391353e-06, + "loss": 0.8525, "step": 19755 }, { - "epoch": 0.5606129398410896, + "epoch": 0.5598345093371873, "grad_norm": 0.0, - "learning_rate": 8.528405434325795e-06, - "loss": 0.773, + "learning_rate": 8.553153499179926e-06, + "loss": 0.8653, "step": 19756 }, { - "epoch": 0.5606413166855846, + "epoch": 0.5598628467794496, "grad_norm": 0.0, - "learning_rate": 8.527496368677768e-06, - "loss": 0.8946, + "learning_rate": 8.552245373155827e-06, + "loss": 0.8253, "step": 19757 }, { - "epoch": 0.5606696935300794, + "epoch": 0.5598911842217121, "grad_norm": 0.0, - "learning_rate": 8.526587315468048e-06, - "loss": 0.8746, + "learning_rate": 8.5513372593267e-06, + "loss": 0.9061, "step": 19758 }, { - "epoch": 0.5606980703745743, + "epoch": 0.5599195216639746, "grad_norm": 0.0, - "learning_rate": 8.525678274704308e-06, - "loss": 0.8947, + "learning_rate": 8.550429157700196e-06, + "loss": 0.882, "step": 19759 }, { - "epoch": 0.5607264472190693, + "epoch": 0.5599478591062371, "grad_norm": 0.0, - "learning_rate": 8.524769246394233e-06, - "loss": 0.9676, + "learning_rate": 8.549521068283968e-06, + "loss": 1.0286, "step": 19760 }, { - "epoch": 0.5607548240635641, + "epoch": 0.5599761965484995, "grad_norm": 0.0, - "learning_rate": 8.523860230545496e-06, - "loss": 0.8654, + "learning_rate": 8.548612991085661e-06, + "loss": 0.8782, "step": 19761 }, { - "epoch": 0.560783200908059, + "epoch": 0.560004533990762, "grad_norm": 0.0, - "learning_rate": 8.522951227165779e-06, - "loss": 0.7633, + "learning_rate": 8.547704926112931e-06, + "loss": 0.9083, "step": 19762 }, { - "epoch": 0.560811577752554, + "epoch": 0.5600328714330245, "grad_norm": 0.0, - "learning_rate": 8.52204223626276e-06, - "loss": 0.8397, + "learning_rate": 8.546796873373415e-06, + "loss": 0.8174, "step": 19763 }, { - "epoch": 0.5608399545970488, + "epoch": 0.5600612088752869, "grad_norm": 0.0, - "learning_rate": 8.521133257844116e-06, - "loss": 0.8551, + "learning_rate": 8.54588883287477e-06, + "loss": 0.9403, "step": 19764 }, { - "epoch": 0.5608683314415437, + "epoch": 0.5600895463175494, "grad_norm": 0.0, - "learning_rate": 8.520224291917524e-06, - "loss": 0.8907, + "learning_rate": 8.54498080462465e-06, + "loss": 0.8325, "step": 19765 }, { - "epoch": 0.5608967082860385, + "epoch": 0.5601178837598119, "grad_norm": 0.0, - "learning_rate": 8.519315338490667e-06, - "loss": 0.8532, + "learning_rate": 8.544072788630688e-06, + "loss": 0.8894, "step": 19766 }, { - "epoch": 0.5609250851305335, + "epoch": 0.5601462212020742, "grad_norm": 0.0, - "learning_rate": 8.518406397571219e-06, - "loss": 0.796, + "learning_rate": 8.543164784900544e-06, + "loss": 0.899, "step": 19767 }, { - "epoch": 0.5609534619750284, + "epoch": 0.5601745586443367, "grad_norm": 0.0, - "learning_rate": 8.517497469166855e-06, - "loss": 0.8224, + "learning_rate": 8.542256793441866e-06, + "loss": 0.9634, "step": 19768 }, { - "epoch": 0.5609818388195232, + "epoch": 0.5602028960865992, "grad_norm": 0.0, - "learning_rate": 8.516588553285258e-06, - "loss": 0.9475, + "learning_rate": 8.541348814262298e-06, + "loss": 0.8012, "step": 19769 }, { - "epoch": 0.5610102156640182, + "epoch": 0.5602312335288617, "grad_norm": 0.0, - "learning_rate": 8.515679649934106e-06, - "loss": 0.8914, + "learning_rate": 8.540440847369495e-06, + "loss": 0.8893, "step": 19770 }, { - "epoch": 0.5610385925085131, + "epoch": 0.5602595709711241, "grad_norm": 0.0, - "learning_rate": 8.514770759121076e-06, - "loss": 0.776, + "learning_rate": 8.539532892771098e-06, + "loss": 0.8979, "step": 19771 }, { - "epoch": 0.5610669693530079, + "epoch": 0.5602879084133866, "grad_norm": 0.0, - "learning_rate": 8.513861880853843e-06, - "loss": 0.9432, + "learning_rate": 8.538624950474756e-06, + "loss": 0.8155, "step": 19772 }, { - "epoch": 0.5610953461975028, + "epoch": 0.5603162458556491, "grad_norm": 0.0, - "learning_rate": 8.512953015140085e-06, - "loss": 0.8304, + "learning_rate": 8.53771702048812e-06, + "loss": 0.7782, "step": 19773 }, { - "epoch": 0.5611237230419978, + "epoch": 0.5603445832979115, "grad_norm": 0.0, - "learning_rate": 8.51204416198748e-06, - "loss": 0.9437, + "learning_rate": 8.536809102818836e-06, + "loss": 0.9873, "step": 19774 }, { - "epoch": 0.5611520998864926, + "epoch": 0.560372920740174, "grad_norm": 0.0, - "learning_rate": 8.511135321403705e-06, - "loss": 0.8545, + "learning_rate": 8.535901197474553e-06, + "loss": 0.9285, "step": 19775 }, { - "epoch": 0.5611804767309875, + "epoch": 0.5604012581824365, "grad_norm": 0.0, - "learning_rate": 8.510226493396436e-06, - "loss": 0.9092, + "learning_rate": 8.53499330446292e-06, + "loss": 0.896, "step": 19776 }, { - "epoch": 0.5612088535754824, + "epoch": 0.560429595624699, "grad_norm": 0.0, - "learning_rate": 8.509317677973351e-06, - "loss": 0.8788, + "learning_rate": 8.534085423791579e-06, + "loss": 0.8851, "step": 19777 }, { - "epoch": 0.5612372304199773, + "epoch": 0.5604579330669613, "grad_norm": 0.0, - "learning_rate": 8.50840887514213e-06, - "loss": 0.9969, + "learning_rate": 8.533177555468185e-06, + "loss": 0.7649, "step": 19778 }, { - "epoch": 0.5612656072644722, + "epoch": 0.5604862705092238, "grad_norm": 0.0, - "learning_rate": 8.507500084910441e-06, - "loss": 0.7974, + "learning_rate": 8.532269699500377e-06, + "loss": 0.9567, "step": 19779 }, { - "epoch": 0.5612939841089671, + "epoch": 0.5605146079514863, "grad_norm": 0.0, - "learning_rate": 8.50659130728597e-06, - "loss": 0.7878, + "learning_rate": 8.531361855895806e-06, + "loss": 0.846, "step": 19780 }, { - "epoch": 0.561322360953462, + "epoch": 0.5605429453937487, "grad_norm": 0.0, - "learning_rate": 8.505682542276388e-06, - "loss": 0.8099, + "learning_rate": 8.530454024662123e-06, + "loss": 0.8729, "step": 19781 }, { - "epoch": 0.5613507377979569, + "epoch": 0.5605712828360112, "grad_norm": 0.0, - "learning_rate": 8.504773789889372e-06, - "loss": 0.9943, + "learning_rate": 8.529546205806967e-06, + "loss": 0.9756, "step": 19782 }, { - "epoch": 0.5613791146424517, + "epoch": 0.5605996202782737, "grad_norm": 0.0, - "learning_rate": 8.503865050132596e-06, - "loss": 0.7934, + "learning_rate": 8.528638399337997e-06, + "loss": 0.8797, "step": 19783 }, { - "epoch": 0.5614074914869467, + "epoch": 0.5606279577205362, "grad_norm": 0.0, - "learning_rate": 8.502956323013742e-06, - "loss": 0.8611, + "learning_rate": 8.527730605262846e-06, + "loss": 0.7649, "step": 19784 }, { - "epoch": 0.5614358683314415, + "epoch": 0.5606562951627986, "grad_norm": 0.0, - "learning_rate": 8.502047608540485e-06, - "loss": 0.8788, + "learning_rate": 8.526822823589165e-06, + "loss": 0.9621, "step": 19785 }, { - "epoch": 0.5614642451759364, + "epoch": 0.5606846326050611, "grad_norm": 0.0, - "learning_rate": 8.501138906720496e-06, - "loss": 0.923, + "learning_rate": 8.525915054324607e-06, + "loss": 0.793, "step": 19786 }, { - "epoch": 0.5614926220204314, + "epoch": 0.5607129700473236, "grad_norm": 0.0, - "learning_rate": 8.500230217561459e-06, - "loss": 0.8631, + "learning_rate": 8.52500729747681e-06, + "loss": 0.8161, "step": 19787 }, { - "epoch": 0.5615209988649262, + "epoch": 0.560741307489586, "grad_norm": 0.0, - "learning_rate": 8.499321541071041e-06, - "loss": 0.895, + "learning_rate": 8.524099553053425e-06, + "loss": 0.8471, "step": 19788 }, { - "epoch": 0.5615493757094211, + "epoch": 0.5607696449318484, "grad_norm": 0.0, - "learning_rate": 8.49841287725692e-06, - "loss": 0.869, + "learning_rate": 8.523191821062103e-06, + "loss": 0.8021, "step": 19789 }, { - "epoch": 0.561577752553916, + "epoch": 0.5607979823741109, "grad_norm": 0.0, - "learning_rate": 8.497504226126777e-06, - "loss": 0.886, + "learning_rate": 8.52228410151048e-06, + "loss": 0.7796, "step": 19790 }, { - "epoch": 0.5616061293984109, + "epoch": 0.5608263198163733, "grad_norm": 0.0, - "learning_rate": 8.496595587688281e-06, - "loss": 0.8309, + "learning_rate": 8.52137639440621e-06, + "loss": 0.8185, "step": 19791 }, { - "epoch": 0.5616345062429058, + "epoch": 0.5608546572586358, "grad_norm": 0.0, - "learning_rate": 8.49568696194911e-06, - "loss": 0.7988, + "learning_rate": 8.520468699756932e-06, + "loss": 0.8503, "step": 19792 }, { - "epoch": 0.5616628830874006, + "epoch": 0.5608829947008983, "grad_norm": 0.0, - "learning_rate": 8.49477834891694e-06, - "loss": 0.8547, + "learning_rate": 8.519561017570295e-06, + "loss": 0.7972, "step": 19793 }, { - "epoch": 0.5616912599318956, + "epoch": 0.5609113321431608, "grad_norm": 0.0, - "learning_rate": 8.493869748599445e-06, - "loss": 0.8859, + "learning_rate": 8.518653347853948e-06, + "loss": 0.7953, "step": 19794 }, { - "epoch": 0.5617196367763905, + "epoch": 0.5609396695854232, "grad_norm": 0.0, - "learning_rate": 8.492961161004297e-06, - "loss": 0.8727, + "learning_rate": 8.517745690615531e-06, + "loss": 0.9381, "step": 19795 }, { - "epoch": 0.5617480136208853, + "epoch": 0.5609680070276857, "grad_norm": 0.0, - "learning_rate": 8.492052586139176e-06, - "loss": 0.9245, + "learning_rate": 8.516838045862694e-06, + "loss": 0.8841, "step": 19796 }, { - "epoch": 0.5617763904653803, + "epoch": 0.5609963444699482, "grad_norm": 0.0, - "learning_rate": 8.491144024011755e-06, - "loss": 0.9084, + "learning_rate": 8.515930413603084e-06, + "loss": 0.8351, "step": 19797 }, { - "epoch": 0.5618047673098752, + "epoch": 0.5610246819122106, "grad_norm": 0.0, - "learning_rate": 8.490235474629707e-06, - "loss": 0.9013, + "learning_rate": 8.51502279384434e-06, + "loss": 0.7927, "step": 19798 }, { - "epoch": 0.56183314415437, + "epoch": 0.561053019354473, "grad_norm": 0.0, - "learning_rate": 8.489326938000708e-06, - "loss": 0.992, + "learning_rate": 8.51411518659411e-06, + "loss": 0.87, "step": 19799 }, { - "epoch": 0.5618615209988649, + "epoch": 0.5610813567967355, "grad_norm": 0.0, - "learning_rate": 8.48841841413243e-06, - "loss": 0.9526, + "learning_rate": 8.51320759186004e-06, + "loss": 0.9357, "step": 19800 }, { - "epoch": 0.5618898978433599, + "epoch": 0.561109694238998, "grad_norm": 0.0, - "learning_rate": 8.487509903032553e-06, - "loss": 0.8531, + "learning_rate": 8.512300009649774e-06, + "loss": 0.7653, "step": 19801 }, { - "epoch": 0.5619182746878547, + "epoch": 0.5611380316812604, "grad_norm": 0.0, - "learning_rate": 8.486601404708748e-06, - "loss": 0.9333, + "learning_rate": 8.511392439970962e-06, + "loss": 0.9143, "step": 19802 }, { - "epoch": 0.5619466515323496, + "epoch": 0.5611663691235229, "grad_norm": 0.0, - "learning_rate": 8.485692919168686e-06, - "loss": 0.9146, + "learning_rate": 8.510484882831238e-06, + "loss": 0.8906, "step": 19803 }, { - "epoch": 0.5619750283768445, + "epoch": 0.5611947065657854, "grad_norm": 0.0, - "learning_rate": 8.484784446420048e-06, - "loss": 0.8835, + "learning_rate": 8.509577338238255e-06, + "loss": 0.8681, "step": 19804 }, { - "epoch": 0.5620034052213394, + "epoch": 0.5612230440080478, "grad_norm": 0.0, - "learning_rate": 8.483875986470502e-06, - "loss": 0.8526, + "learning_rate": 8.508669806199658e-06, + "loss": 0.8747, "step": 19805 }, { - "epoch": 0.5620317820658343, + "epoch": 0.5612513814503103, "grad_norm": 0.0, - "learning_rate": 8.482967539327722e-06, - "loss": 0.8557, + "learning_rate": 8.507762286723088e-06, + "loss": 0.9234, "step": 19806 }, { - "epoch": 0.5620601589103291, + "epoch": 0.5612797188925728, "grad_norm": 0.0, - "learning_rate": 8.482059104999387e-06, - "loss": 0.8978, + "learning_rate": 8.506854779816191e-06, + "loss": 0.9687, "step": 19807 }, { - "epoch": 0.5620885357548241, + "epoch": 0.5613080563348353, "grad_norm": 0.0, - "learning_rate": 8.481150683493166e-06, - "loss": 0.8737, + "learning_rate": 8.505947285486608e-06, + "loss": 0.8878, "step": 19808 }, { - "epoch": 0.562116912599319, + "epoch": 0.5613363937770977, "grad_norm": 0.0, - "learning_rate": 8.48024227481673e-06, - "loss": 0.783, + "learning_rate": 8.505039803741985e-06, + "loss": 0.8037, "step": 19809 }, { - "epoch": 0.5621452894438138, + "epoch": 0.5613647312193601, "grad_norm": 0.0, - "learning_rate": 8.47933387897776e-06, - "loss": 0.8373, + "learning_rate": 8.504132334589972e-06, + "loss": 0.8472, "step": 19810 }, { - "epoch": 0.5621736662883088, + "epoch": 0.5613930686616226, "grad_norm": 0.0, - "learning_rate": 8.478425495983923e-06, - "loss": 0.8556, + "learning_rate": 8.503224878038203e-06, + "loss": 0.7028, "step": 19811 }, { - "epoch": 0.5622020431328036, + "epoch": 0.561421406103885, "grad_norm": 0.0, - "learning_rate": 8.477517125842893e-06, - "loss": 0.7558, + "learning_rate": 8.502317434094331e-06, + "loss": 0.8926, "step": 19812 }, { - "epoch": 0.5622304199772985, + "epoch": 0.5614497435461475, "grad_norm": 0.0, - "learning_rate": 8.476608768562348e-06, - "loss": 0.8472, + "learning_rate": 8.501410002765991e-06, + "loss": 0.864, "step": 19813 }, { - "epoch": 0.5622587968217935, + "epoch": 0.56147808098841, "grad_norm": 0.0, - "learning_rate": 8.47570042414995e-06, - "loss": 0.7172, + "learning_rate": 8.500502584060832e-06, + "loss": 0.8973, "step": 19814 }, { - "epoch": 0.5622871736662883, + "epoch": 0.5615064184306724, "grad_norm": 0.0, - "learning_rate": 8.474792092613387e-06, - "loss": 0.8539, + "learning_rate": 8.4995951779865e-06, + "loss": 0.8075, "step": 19815 }, { - "epoch": 0.5623155505107832, + "epoch": 0.5615347558729349, "grad_norm": 0.0, - "learning_rate": 8.47388377396032e-06, - "loss": 0.8128, + "learning_rate": 8.498687784550632e-06, + "loss": 0.8341, "step": 19816 }, { - "epoch": 0.562343927355278, + "epoch": 0.5615630933151974, "grad_norm": 0.0, - "learning_rate": 8.472975468198425e-06, - "loss": 0.8016, + "learning_rate": 8.497780403760872e-06, + "loss": 0.8583, "step": 19817 }, { - "epoch": 0.562372304199773, + "epoch": 0.5615914307574599, "grad_norm": 0.0, - "learning_rate": 8.472067175335377e-06, - "loss": 0.864, + "learning_rate": 8.49687303562487e-06, + "loss": 0.83, "step": 19818 }, { - "epoch": 0.5624006810442679, + "epoch": 0.5616197681997223, "grad_norm": 0.0, - "learning_rate": 8.471158895378846e-06, - "loss": 0.8561, + "learning_rate": 8.49596568015026e-06, + "loss": 0.8059, "step": 19819 }, { - "epoch": 0.5624290578887627, + "epoch": 0.5616481056419848, "grad_norm": 0.0, - "learning_rate": 8.470250628336503e-06, - "loss": 0.7776, + "learning_rate": 8.495058337344698e-06, + "loss": 0.9564, "step": 19820 }, { - "epoch": 0.5624574347332577, + "epoch": 0.5616764430842472, "grad_norm": 0.0, - "learning_rate": 8.469342374216022e-06, - "loss": 0.7507, + "learning_rate": 8.494151007215811e-06, + "loss": 0.8661, "step": 19821 }, { - "epoch": 0.5624858115777526, + "epoch": 0.5617047805265096, "grad_norm": 0.0, - "learning_rate": 8.468434133025077e-06, - "loss": 0.7964, + "learning_rate": 8.49324368977125e-06, + "loss": 0.9678, "step": 19822 }, { - "epoch": 0.5625141884222474, + "epoch": 0.5617331179687721, "grad_norm": 0.0, - "learning_rate": 8.467525904771336e-06, - "loss": 0.7957, + "learning_rate": 8.49233638501866e-06, + "loss": 0.837, "step": 19823 }, { - "epoch": 0.5625425652667423, + "epoch": 0.5617614554110346, "grad_norm": 0.0, - "learning_rate": 8.466617689462474e-06, - "loss": 0.8693, + "learning_rate": 8.491429092965677e-06, + "loss": 0.8876, "step": 19824 }, { - "epoch": 0.5625709421112373, + "epoch": 0.5617897928532971, "grad_norm": 0.0, - "learning_rate": 8.46570948710616e-06, - "loss": 0.9539, + "learning_rate": 8.490521813619947e-06, + "loss": 0.9327, "step": 19825 }, { - "epoch": 0.5625993189557321, + "epoch": 0.5618181302955595, "grad_norm": 0.0, - "learning_rate": 8.464801297710065e-06, - "loss": 0.7131, + "learning_rate": 8.489614546989116e-06, + "loss": 0.8748, "step": 19826 }, { - "epoch": 0.562627695800227, + "epoch": 0.561846467737822, "grad_norm": 0.0, - "learning_rate": 8.463893121281865e-06, - "loss": 0.864, + "learning_rate": 8.48870729308082e-06, + "loss": 0.9498, "step": 19827 }, { - "epoch": 0.562656072644722, + "epoch": 0.5618748051800845, "grad_norm": 0.0, - "learning_rate": 8.46298495782923e-06, - "loss": 0.926, + "learning_rate": 8.487800051902706e-06, + "loss": 0.8609, "step": 19828 }, { - "epoch": 0.5626844494892168, + "epoch": 0.5619031426223469, "grad_norm": 0.0, - "learning_rate": 8.462076807359824e-06, - "loss": 0.8138, + "learning_rate": 8.48689282346241e-06, + "loss": 0.8569, "step": 19829 }, { - "epoch": 0.5627128263337117, + "epoch": 0.5619314800646094, "grad_norm": 0.0, - "learning_rate": 8.461168669881328e-06, - "loss": 0.7892, + "learning_rate": 8.485985607767578e-06, + "loss": 0.8546, "step": 19830 }, { - "epoch": 0.5627412031782065, + "epoch": 0.5619598175068719, "grad_norm": 0.0, - "learning_rate": 8.460260545401411e-06, - "loss": 0.8935, + "learning_rate": 8.485078404825854e-06, + "loss": 0.8463, "step": 19831 }, { - "epoch": 0.5627695800227015, + "epoch": 0.5619881549491343, "grad_norm": 0.0, - "learning_rate": 8.459352433927743e-06, - "loss": 0.8269, + "learning_rate": 8.484171214644876e-06, + "loss": 0.821, "step": 19832 }, { - "epoch": 0.5627979568671964, + "epoch": 0.5620164923913967, "grad_norm": 0.0, - "learning_rate": 8.45844433546799e-06, - "loss": 0.8772, + "learning_rate": 8.483264037232284e-06, + "loss": 0.8277, "step": 19833 }, { - "epoch": 0.5628263337116912, + "epoch": 0.5620448298336592, "grad_norm": 0.0, - "learning_rate": 8.457536250029832e-06, - "loss": 0.9023, + "learning_rate": 8.482356872595729e-06, + "loss": 0.8242, "step": 19834 }, { - "epoch": 0.5628547105561862, + "epoch": 0.5620731672759217, "grad_norm": 0.0, - "learning_rate": 8.456628177620933e-06, - "loss": 0.8639, + "learning_rate": 8.48144972074284e-06, + "loss": 0.9982, "step": 19835 }, { - "epoch": 0.562883087400681, + "epoch": 0.5621015047181841, "grad_norm": 0.0, - "learning_rate": 8.455720118248963e-06, - "loss": 0.9303, + "learning_rate": 8.480542581681268e-06, + "loss": 0.9182, "step": 19836 }, { - "epoch": 0.5629114642451759, + "epoch": 0.5621298421604466, "grad_norm": 0.0, - "learning_rate": 8.454812071921597e-06, - "loss": 0.9576, + "learning_rate": 8.479635455418647e-06, + "loss": 0.819, "step": 19837 }, { - "epoch": 0.5629398410896709, + "epoch": 0.5621581796027091, "grad_norm": 0.0, - "learning_rate": 8.453904038646501e-06, - "loss": 0.8823, + "learning_rate": 8.478728341962619e-06, + "loss": 0.8687, "step": 19838 }, { - "epoch": 0.5629682179341657, + "epoch": 0.5621865170449715, "grad_norm": 0.0, - "learning_rate": 8.452996018431348e-06, - "loss": 0.8387, + "learning_rate": 8.477821241320831e-06, + "loss": 0.8372, "step": 19839 }, { - "epoch": 0.5629965947786606, + "epoch": 0.562214854487234, "grad_norm": 0.0, - "learning_rate": 8.452088011283807e-06, - "loss": 0.8776, + "learning_rate": 8.476914153500917e-06, + "loss": 0.8998, "step": 19840 }, { - "epoch": 0.5630249716231555, + "epoch": 0.5622431919294965, "grad_norm": 0.0, - "learning_rate": 8.45118001721155e-06, - "loss": 0.7779, + "learning_rate": 8.476007078510526e-06, + "loss": 0.9085, "step": 19841 }, { - "epoch": 0.5630533484676504, + "epoch": 0.562271529371759, "grad_norm": 0.0, - "learning_rate": 8.450272036222245e-06, - "loss": 0.8405, + "learning_rate": 8.475100016357288e-06, + "loss": 0.8696, "step": 19842 }, { - "epoch": 0.5630817253121453, + "epoch": 0.5622998668140213, "grad_norm": 0.0, - "learning_rate": 8.449364068323559e-06, - "loss": 0.8416, + "learning_rate": 8.47419296704885e-06, + "loss": 0.8713, "step": 19843 }, { - "epoch": 0.5631101021566401, + "epoch": 0.5623282042562838, "grad_norm": 0.0, - "learning_rate": 8.448456113523166e-06, - "loss": 0.7819, + "learning_rate": 8.473285930592852e-06, + "loss": 0.9647, "step": 19844 }, { - "epoch": 0.5631384790011351, + "epoch": 0.5623565416985463, "grad_norm": 0.0, - "learning_rate": 8.44754817182873e-06, - "loss": 0.8366, + "learning_rate": 8.472378906996932e-06, + "loss": 0.7881, "step": 19845 }, { - "epoch": 0.56316685584563, + "epoch": 0.5623848791408087, "grad_norm": 0.0, - "learning_rate": 8.44664024324793e-06, - "loss": 0.8827, + "learning_rate": 8.471471896268732e-06, + "loss": 0.9216, "step": 19846 }, { - "epoch": 0.5631952326901248, + "epoch": 0.5624132165830712, "grad_norm": 0.0, - "learning_rate": 8.445732327788425e-06, - "loss": 0.8926, + "learning_rate": 8.470564898415897e-06, + "loss": 0.8582, "step": 19847 }, { - "epoch": 0.5632236095346197, + "epoch": 0.5624415540253337, "grad_norm": 0.0, - "learning_rate": 8.444824425457893e-06, - "loss": 0.8144, + "learning_rate": 8.469657913446055e-06, + "loss": 0.8893, "step": 19848 }, { - "epoch": 0.5632519863791147, + "epoch": 0.5624698914675962, "grad_norm": 0.0, - "learning_rate": 8.443916536263999e-06, - "loss": 0.8171, + "learning_rate": 8.468750941366858e-06, + "loss": 0.6564, "step": 19849 }, { - "epoch": 0.5632803632236095, + "epoch": 0.5624982289098586, "grad_norm": 0.0, - "learning_rate": 8.443008660214409e-06, - "loss": 0.8343, + "learning_rate": 8.467843982185937e-06, + "loss": 0.9567, "step": 19850 }, { - "epoch": 0.5633087400681044, + "epoch": 0.5625265663521211, "grad_norm": 0.0, - "learning_rate": 8.442100797316797e-06, - "loss": 0.9446, + "learning_rate": 8.466937035910934e-06, + "loss": 0.9444, "step": 19851 }, { - "epoch": 0.5633371169125994, + "epoch": 0.5625549037943836, "grad_norm": 0.0, - "learning_rate": 8.441192947578829e-06, - "loss": 0.8028, + "learning_rate": 8.466030102549493e-06, + "loss": 0.6908, "step": 19852 }, { - "epoch": 0.5633654937570942, + "epoch": 0.5625832412366459, "grad_norm": 0.0, - "learning_rate": 8.440285111008172e-06, - "loss": 0.8004, + "learning_rate": 8.465123182109247e-06, + "loss": 0.9272, "step": 19853 }, { - "epoch": 0.5633938706015891, + "epoch": 0.5626115786789084, "grad_norm": 0.0, - "learning_rate": 8.4393772876125e-06, - "loss": 0.9549, + "learning_rate": 8.464216274597839e-06, + "loss": 0.9267, "step": 19854 }, { - "epoch": 0.563422247446084, + "epoch": 0.5626399161211709, "grad_norm": 0.0, - "learning_rate": 8.43846947739948e-06, - "loss": 0.9068, + "learning_rate": 8.463309380022911e-06, + "loss": 0.9084, "step": 19855 }, { - "epoch": 0.5634506242905789, + "epoch": 0.5626682535634334, "grad_norm": 0.0, - "learning_rate": 8.437561680376774e-06, - "loss": 1.0605, + "learning_rate": 8.462402498392095e-06, + "loss": 0.7834, "step": 19856 }, { - "epoch": 0.5634790011350738, + "epoch": 0.5626965910056958, "grad_norm": 0.0, - "learning_rate": 8.436653896552057e-06, - "loss": 0.8735, + "learning_rate": 8.461495629713036e-06, + "loss": 0.8237, "step": 19857 }, { - "epoch": 0.5635073779795686, + "epoch": 0.5627249284479583, "grad_norm": 0.0, - "learning_rate": 8.435746125932995e-06, - "loss": 0.9928, + "learning_rate": 8.460588773993368e-06, + "loss": 0.9051, "step": 19858 }, { - "epoch": 0.5635357548240636, + "epoch": 0.5627532658902208, "grad_norm": 0.0, - "learning_rate": 8.434838368527254e-06, - "loss": 0.8646, + "learning_rate": 8.459681931240734e-06, + "loss": 0.8462, "step": 19859 }, { - "epoch": 0.5635641316685585, + "epoch": 0.5627816033324832, "grad_norm": 0.0, - "learning_rate": 8.433930624342503e-06, - "loss": 0.9256, + "learning_rate": 8.458775101462773e-06, + "loss": 0.787, "step": 19860 }, { - "epoch": 0.5635925085130533, + "epoch": 0.5628099407747457, "grad_norm": 0.0, - "learning_rate": 8.43302289338641e-06, - "loss": 0.8656, + "learning_rate": 8.45786828466712e-06, + "loss": 0.8765, "step": 19861 }, { - "epoch": 0.5636208853575483, + "epoch": 0.5628382782170082, "grad_norm": 0.0, - "learning_rate": 8.432115175666647e-06, - "loss": 0.8992, + "learning_rate": 8.456961480861413e-06, + "loss": 0.8828, "step": 19862 }, { - "epoch": 0.5636492622020431, + "epoch": 0.5628666156592705, "grad_norm": 0.0, - "learning_rate": 8.431207471190877e-06, - "loss": 0.893, + "learning_rate": 8.456054690053296e-06, + "loss": 0.8455, "step": 19863 }, { - "epoch": 0.563677639046538, + "epoch": 0.562894953101533, "grad_norm": 0.0, - "learning_rate": 8.430299779966764e-06, - "loss": 0.844, + "learning_rate": 8.455147912250401e-06, + "loss": 0.9138, "step": 19864 }, { - "epoch": 0.5637060158910329, + "epoch": 0.5629232905437955, "grad_norm": 0.0, - "learning_rate": 8.429392102001983e-06, - "loss": 0.9126, + "learning_rate": 8.45424114746037e-06, + "loss": 0.8014, "step": 19865 }, { - "epoch": 0.5637343927355278, + "epoch": 0.562951627986058, "grad_norm": 0.0, - "learning_rate": 8.428484437304198e-06, - "loss": 0.8303, + "learning_rate": 8.453334395690839e-06, + "loss": 0.7997, "step": 19866 }, { - "epoch": 0.5637627695800227, + "epoch": 0.5629799654283204, "grad_norm": 0.0, - "learning_rate": 8.427576785881073e-06, - "loss": 0.8573, + "learning_rate": 8.452427656949446e-06, + "loss": 0.9088, "step": 19867 }, { - "epoch": 0.5637911464245176, + "epoch": 0.5630083028705829, "grad_norm": 0.0, - "learning_rate": 8.42666914774028e-06, - "loss": 0.8613, + "learning_rate": 8.451520931243833e-06, + "loss": 0.8637, "step": 19868 }, { - "epoch": 0.5638195232690125, + "epoch": 0.5630366403128454, "grad_norm": 0.0, - "learning_rate": 8.425761522889483e-06, - "loss": 0.9066, + "learning_rate": 8.450614218581631e-06, + "loss": 0.8893, "step": 19869 }, { - "epoch": 0.5638479001135074, + "epoch": 0.5630649777551078, "grad_norm": 0.0, - "learning_rate": 8.424853911336347e-06, - "loss": 0.9131, + "learning_rate": 8.449707518970482e-06, + "loss": 0.8123, "step": 19870 }, { - "epoch": 0.5638762769580022, + "epoch": 0.5630933151973703, "grad_norm": 0.0, - "learning_rate": 8.423946313088543e-06, - "loss": 0.87, + "learning_rate": 8.448800832418022e-06, + "loss": 0.9363, "step": 19871 }, { - "epoch": 0.5639046538024972, + "epoch": 0.5631216526396328, "grad_norm": 0.0, - "learning_rate": 8.423038728153734e-06, - "loss": 0.823, + "learning_rate": 8.447894158931888e-06, + "loss": 0.8635, "step": 19872 }, { - "epoch": 0.5639330306469921, + "epoch": 0.5631499900818953, "grad_norm": 0.0, - "learning_rate": 8.422131156539588e-06, - "loss": 0.9033, + "learning_rate": 8.446987498519722e-06, + "loss": 0.857, "step": 19873 }, { - "epoch": 0.5639614074914869, + "epoch": 0.5631783275241576, "grad_norm": 0.0, - "learning_rate": 8.421223598253773e-06, - "loss": 0.8775, + "learning_rate": 8.44608085118915e-06, + "loss": 0.8079, "step": 19874 }, { - "epoch": 0.5639897843359818, + "epoch": 0.5632066649664201, "grad_norm": 0.0, - "learning_rate": 8.420316053303948e-06, - "loss": 0.8117, + "learning_rate": 8.445174216947819e-06, + "loss": 0.8874, "step": 19875 }, { - "epoch": 0.5640181611804768, + "epoch": 0.5632350024086826, "grad_norm": 0.0, - "learning_rate": 8.419408521697788e-06, - "loss": 0.9188, + "learning_rate": 8.444267595803368e-06, + "loss": 0.9382, "step": 19876 }, { - "epoch": 0.5640465380249716, + "epoch": 0.563263339850945, "grad_norm": 0.0, - "learning_rate": 8.418501003442954e-06, - "loss": 0.8893, + "learning_rate": 8.443360987763421e-06, + "loss": 0.7484, "step": 19877 }, { - "epoch": 0.5640749148694665, + "epoch": 0.5632916772932075, "grad_norm": 0.0, - "learning_rate": 8.417593498547116e-06, - "loss": 0.8691, + "learning_rate": 8.442454392835627e-06, + "loss": 0.8079, "step": 19878 }, { - "epoch": 0.5641032917139615, + "epoch": 0.56332001473547, "grad_norm": 0.0, - "learning_rate": 8.416686007017936e-06, - "loss": 0.8377, + "learning_rate": 8.441547811027615e-06, + "loss": 0.8747, "step": 19879 }, { - "epoch": 0.5641316685584563, + "epoch": 0.5633483521777325, "grad_norm": 0.0, - "learning_rate": 8.415778528863077e-06, - "loss": 0.8453, + "learning_rate": 8.440641242347025e-06, + "loss": 0.7348, "step": 19880 }, { - "epoch": 0.5641600454029512, + "epoch": 0.5633766896199949, "grad_norm": 0.0, - "learning_rate": 8.414871064090211e-06, - "loss": 0.909, + "learning_rate": 8.439734686801498e-06, + "loss": 0.7422, "step": 19881 }, { - "epoch": 0.564188422247446, + "epoch": 0.5634050270622574, "grad_norm": 0.0, - "learning_rate": 8.413963612707001e-06, - "loss": 0.8221, + "learning_rate": 8.43882814439866e-06, + "loss": 0.7562, "step": 19882 }, { - "epoch": 0.564216799091941, + "epoch": 0.5634333645045199, "grad_norm": 0.0, - "learning_rate": 8.413056174721112e-06, - "loss": 0.8843, + "learning_rate": 8.437921615146152e-06, + "loss": 0.9724, "step": 19883 }, { - "epoch": 0.5642451759364359, + "epoch": 0.5634617019467822, "grad_norm": 0.0, - "learning_rate": 8.412148750140206e-06, - "loss": 0.8856, + "learning_rate": 8.437015099051613e-06, + "loss": 0.7251, "step": 19884 }, { - "epoch": 0.5642735527809307, + "epoch": 0.5634900393890447, "grad_norm": 0.0, - "learning_rate": 8.411241338971954e-06, - "loss": 0.7622, + "learning_rate": 8.436108596122673e-06, + "loss": 0.8575, "step": 19885 }, { - "epoch": 0.5643019296254257, + "epoch": 0.5635183768313072, "grad_norm": 0.0, - "learning_rate": 8.410333941224017e-06, - "loss": 0.8659, + "learning_rate": 8.435202106366976e-06, + "loss": 0.9316, "step": 19886 }, { - "epoch": 0.5643303064699206, + "epoch": 0.5635467142735696, "grad_norm": 0.0, - "learning_rate": 8.409426556904058e-06, - "loss": 0.8849, + "learning_rate": 8.434295629792149e-06, + "loss": 0.9139, "step": 19887 }, { - "epoch": 0.5643586833144154, + "epoch": 0.5635750517158321, "grad_norm": 0.0, - "learning_rate": 8.408519186019748e-06, - "loss": 0.9202, + "learning_rate": 8.433389166405829e-06, + "loss": 0.9692, "step": 19888 }, { - "epoch": 0.5643870601589104, + "epoch": 0.5636033891580946, "grad_norm": 0.0, - "learning_rate": 8.407611828578746e-06, - "loss": 0.9243, + "learning_rate": 8.432482716215663e-06, + "loss": 0.9109, "step": 19889 }, { - "epoch": 0.5644154370034052, + "epoch": 0.5636317266003571, "grad_norm": 0.0, - "learning_rate": 8.406704484588718e-06, - "loss": 0.8162, + "learning_rate": 8.431576279229268e-06, + "loss": 0.7918, "step": 19890 }, { - "epoch": 0.5644438138479001, + "epoch": 0.5636600640426195, "grad_norm": 0.0, - "learning_rate": 8.405797154057326e-06, - "loss": 0.8379, + "learning_rate": 8.43066985545429e-06, + "loss": 0.913, "step": 19891 }, { - "epoch": 0.564472190692395, + "epoch": 0.563688401484882, "grad_norm": 0.0, - "learning_rate": 8.404889836992242e-06, - "loss": 0.8654, + "learning_rate": 8.429763444898364e-06, + "loss": 0.9398, "step": 19892 }, { - "epoch": 0.5645005675368899, + "epoch": 0.5637167389271445, "grad_norm": 0.0, - "learning_rate": 8.403982533401123e-06, - "loss": 0.8545, + "learning_rate": 8.428857047569124e-06, + "loss": 0.8838, "step": 19893 }, { - "epoch": 0.5645289443813848, + "epoch": 0.5637450763694069, "grad_norm": 0.0, - "learning_rate": 8.403075243291636e-06, - "loss": 0.8046, + "learning_rate": 8.427950663474207e-06, + "loss": 0.9069, "step": 19894 }, { - "epoch": 0.5645573212258796, + "epoch": 0.5637734138116693, "grad_norm": 0.0, - "learning_rate": 8.402167966671445e-06, - "loss": 0.8818, + "learning_rate": 8.427044292621241e-06, + "loss": 0.9057, "step": 19895 }, { - "epoch": 0.5645856980703746, + "epoch": 0.5638017512539318, "grad_norm": 0.0, - "learning_rate": 8.401260703548213e-06, - "loss": 0.8468, + "learning_rate": 8.426137935017865e-06, + "loss": 0.8936, "step": 19896 }, { - "epoch": 0.5646140749148695, + "epoch": 0.5638300886961943, "grad_norm": 0.0, - "learning_rate": 8.400353453929601e-06, - "loss": 0.884, + "learning_rate": 8.425231590671716e-06, + "loss": 0.8669, "step": 19897 }, { - "epoch": 0.5646424517593643, + "epoch": 0.5638584261384567, "grad_norm": 0.0, - "learning_rate": 8.39944621782328e-06, - "loss": 0.8641, + "learning_rate": 8.424325259590425e-06, + "loss": 0.8955, "step": 19898 }, { - "epoch": 0.5646708286038592, + "epoch": 0.5638867635807192, "grad_norm": 0.0, - "learning_rate": 8.398538995236906e-06, - "loss": 0.8894, + "learning_rate": 8.42341894178163e-06, + "loss": 0.864, "step": 19899 }, { - "epoch": 0.5646992054483542, + "epoch": 0.5639151010229817, "grad_norm": 0.0, - "learning_rate": 8.397631786178143e-06, - "loss": 0.9414, + "learning_rate": 8.422512637252958e-06, + "loss": 0.8604, "step": 19900 }, { - "epoch": 0.564727582292849, + "epoch": 0.5639434384652441, "grad_norm": 0.0, - "learning_rate": 8.39672459065466e-06, - "loss": 0.8551, + "learning_rate": 8.42160634601205e-06, + "loss": 0.8734, "step": 19901 }, { - "epoch": 0.5647559591373439, + "epoch": 0.5639717759075066, "grad_norm": 0.0, - "learning_rate": 8.395817408674117e-06, - "loss": 0.877, + "learning_rate": 8.42070006806654e-06, + "loss": 0.8132, "step": 19902 }, { - "epoch": 0.5647843359818389, + "epoch": 0.5640001133497691, "grad_norm": 0.0, - "learning_rate": 8.394910240244172e-06, - "loss": 0.7134, + "learning_rate": 8.419793803424057e-06, + "loss": 0.8892, "step": 19903 }, { - "epoch": 0.5648127128263337, + "epoch": 0.5640284507920316, "grad_norm": 0.0, - "learning_rate": 8.394003085372496e-06, - "loss": 0.9119, + "learning_rate": 8.418887552092237e-06, + "loss": 0.8866, "step": 19904 }, { - "epoch": 0.5648410896708286, + "epoch": 0.564056788234294, "grad_norm": 0.0, - "learning_rate": 8.393095944066749e-06, - "loss": 0.9209, + "learning_rate": 8.417981314078717e-06, + "loss": 0.8929, "step": 19905 }, { - "epoch": 0.5648694665153235, + "epoch": 0.5640851256765564, "grad_norm": 0.0, - "learning_rate": 8.392188816334587e-06, - "loss": 0.93, + "learning_rate": 8.417075089391125e-06, + "loss": 0.9778, "step": 19906 }, { - "epoch": 0.5648978433598184, + "epoch": 0.5641134631188189, "grad_norm": 0.0, - "learning_rate": 8.391281702183683e-06, - "loss": 0.9215, + "learning_rate": 8.416168878037103e-06, + "loss": 0.7999, "step": 19907 }, { - "epoch": 0.5649262202043133, + "epoch": 0.5641418005610813, "grad_norm": 0.0, - "learning_rate": 8.39037460162169e-06, - "loss": 0.8487, + "learning_rate": 8.415262680024272e-06, + "loss": 0.8072, "step": 19908 }, { - "epoch": 0.5649545970488081, + "epoch": 0.5641701380033438, "grad_norm": 0.0, - "learning_rate": 8.389467514656282e-06, - "loss": 0.7769, + "learning_rate": 8.414356495360273e-06, + "loss": 1.0126, "step": 19909 }, { - "epoch": 0.5649829738933031, + "epoch": 0.5641984754456063, "grad_norm": 0.0, - "learning_rate": 8.388560441295111e-06, - "loss": 0.9885, + "learning_rate": 8.41345032405274e-06, + "loss": 0.9385, "step": 19910 }, { - "epoch": 0.565011350737798, + "epoch": 0.5642268128878687, "grad_norm": 0.0, - "learning_rate": 8.387653381545841e-06, - "loss": 0.9109, + "learning_rate": 8.412544166109304e-06, + "loss": 0.9756, "step": 19911 }, { - "epoch": 0.5650397275822928, + "epoch": 0.5642551503301312, "grad_norm": 0.0, - "learning_rate": 8.38674633541614e-06, - "loss": 0.7969, + "learning_rate": 8.411638021537596e-06, + "loss": 0.9008, "step": 19912 }, { - "epoch": 0.5650681044267878, + "epoch": 0.5642834877723937, "grad_norm": 0.0, - "learning_rate": 8.385839302913661e-06, - "loss": 0.9092, + "learning_rate": 8.410731890345256e-06, + "loss": 0.8227, "step": 19913 }, { - "epoch": 0.5650964812712826, + "epoch": 0.5643118252146562, "grad_norm": 0.0, - "learning_rate": 8.38493228404607e-06, - "loss": 0.8912, + "learning_rate": 8.409825772539905e-06, + "loss": 0.9421, "step": 19914 }, { - "epoch": 0.5651248581157775, + "epoch": 0.5643401626569186, "grad_norm": 0.0, - "learning_rate": 8.384025278821032e-06, - "loss": 0.9272, + "learning_rate": 8.408919668129186e-06, + "loss": 0.88, "step": 19915 }, { - "epoch": 0.5651532349602724, + "epoch": 0.564368500099181, "grad_norm": 0.0, - "learning_rate": 8.383118287246203e-06, - "loss": 0.7954, + "learning_rate": 8.408013577120729e-06, + "loss": 0.862, "step": 19916 }, { - "epoch": 0.5651816118047673, + "epoch": 0.5643968375414435, "grad_norm": 0.0, - "learning_rate": 8.382211309329245e-06, - "loss": 0.8774, + "learning_rate": 8.407107499522158e-06, + "loss": 0.8888, "step": 19917 }, { - "epoch": 0.5652099886492622, + "epoch": 0.5644251749837059, "grad_norm": 0.0, - "learning_rate": 8.381304345077823e-06, - "loss": 0.894, + "learning_rate": 8.406201435341118e-06, + "loss": 0.9592, "step": 19918 }, { - "epoch": 0.5652383654937571, + "epoch": 0.5644535124259684, "grad_norm": 0.0, - "learning_rate": 8.380397394499597e-06, - "loss": 0.8989, + "learning_rate": 8.405295384585232e-06, + "loss": 0.8001, "step": 19919 }, { - "epoch": 0.565266742338252, + "epoch": 0.5644818498682309, "grad_norm": 0.0, - "learning_rate": 8.379490457602224e-06, - "loss": 0.9002, + "learning_rate": 8.404389347262136e-06, + "loss": 0.8431, "step": 19920 }, { - "epoch": 0.5652951191827469, + "epoch": 0.5645101873104934, "grad_norm": 0.0, - "learning_rate": 8.37858353439337e-06, - "loss": 0.9642, + "learning_rate": 8.403483323379465e-06, + "loss": 0.8653, "step": 19921 }, { - "epoch": 0.5653234960272417, + "epoch": 0.5645385247527558, "grad_norm": 0.0, - "learning_rate": 8.377676624880688e-06, - "loss": 0.9666, + "learning_rate": 8.402577312944842e-06, + "loss": 0.9525, "step": 19922 }, { - "epoch": 0.5653518728717367, + "epoch": 0.5645668621950183, "grad_norm": 0.0, - "learning_rate": 8.376769729071852e-06, - "loss": 0.9099, + "learning_rate": 8.401671315965905e-06, + "loss": 0.8567, "step": 19923 }, { - "epoch": 0.5653802497162316, + "epoch": 0.5645951996372808, "grad_norm": 0.0, - "learning_rate": 8.375862846974512e-06, - "loss": 0.9139, + "learning_rate": 8.400765332450283e-06, + "loss": 1.0071, "step": 19924 }, { - "epoch": 0.5654086265607264, + "epoch": 0.5646235370795432, "grad_norm": 0.0, - "learning_rate": 8.374955978596331e-06, - "loss": 0.8281, + "learning_rate": 8.399859362405606e-06, + "loss": 0.9345, "step": 19925 }, { - "epoch": 0.5654370034052213, + "epoch": 0.5646518745218057, "grad_norm": 0.0, - "learning_rate": 8.374049123944971e-06, - "loss": 0.8182, + "learning_rate": 8.398953405839516e-06, + "loss": 0.8534, "step": 19926 }, { - "epoch": 0.5654653802497163, + "epoch": 0.5646802119640681, "grad_norm": 0.0, - "learning_rate": 8.373142283028093e-06, - "loss": 0.9622, + "learning_rate": 8.39804746275963e-06, + "loss": 0.9607, "step": 19927 }, { - "epoch": 0.5654937570942111, + "epoch": 0.5647085494063306, "grad_norm": 0.0, - "learning_rate": 8.372235455853352e-06, - "loss": 0.8069, + "learning_rate": 8.397141533173586e-06, + "loss": 0.7987, "step": 19928 }, { - "epoch": 0.565522133938706, + "epoch": 0.564736886848593, "grad_norm": 0.0, - "learning_rate": 8.371328642428414e-06, - "loss": 0.8171, + "learning_rate": 8.396235617089013e-06, + "loss": 0.9061, "step": 19929 }, { - "epoch": 0.565550510783201, + "epoch": 0.5647652242908555, "grad_norm": 0.0, - "learning_rate": 8.370421842760934e-06, - "loss": 0.8738, + "learning_rate": 8.395329714513543e-06, + "loss": 0.8855, "step": 19930 }, { - "epoch": 0.5655788876276958, + "epoch": 0.564793561733118, "grad_norm": 0.0, - "learning_rate": 8.369515056858575e-06, - "loss": 0.825, + "learning_rate": 8.394423825454812e-06, + "loss": 0.8225, "step": 19931 }, { - "epoch": 0.5656072644721907, + "epoch": 0.5648218991753804, "grad_norm": 0.0, - "learning_rate": 8.368608284728997e-06, - "loss": 0.8676, + "learning_rate": 8.393517949920438e-06, + "loss": 0.9412, "step": 19932 }, { - "epoch": 0.5656356413166855, + "epoch": 0.5648502366176429, "grad_norm": 0.0, - "learning_rate": 8.367701526379857e-06, - "loss": 0.8926, + "learning_rate": 8.392612087918062e-06, + "loss": 0.8792, "step": 19933 }, { - "epoch": 0.5656640181611805, + "epoch": 0.5648785740599054, "grad_norm": 0.0, - "learning_rate": 8.366794781818813e-06, - "loss": 0.7923, + "learning_rate": 8.391706239455316e-06, + "loss": 0.7867, "step": 19934 }, { - "epoch": 0.5656923950056754, + "epoch": 0.5649069115021678, "grad_norm": 0.0, - "learning_rate": 8.36588805105353e-06, - "loss": 0.8445, + "learning_rate": 8.390800404539818e-06, + "loss": 0.8261, "step": 19935 }, { - "epoch": 0.5657207718501702, + "epoch": 0.5649352489444303, "grad_norm": 0.0, - "learning_rate": 8.364981334091665e-06, - "loss": 0.9124, + "learning_rate": 8.38989458317921e-06, + "loss": 0.7963, "step": 19936 }, { - "epoch": 0.5657491486946652, + "epoch": 0.5649635863866928, "grad_norm": 0.0, - "learning_rate": 8.364074630940869e-06, - "loss": 0.8511, + "learning_rate": 8.388988775381115e-06, + "loss": 0.7863, "step": 19937 }, { - "epoch": 0.56577752553916, + "epoch": 0.5649919238289552, "grad_norm": 0.0, - "learning_rate": 8.363167941608814e-06, - "loss": 0.8529, + "learning_rate": 8.388082981153167e-06, + "loss": 0.8316, "step": 19938 }, { - "epoch": 0.5658059023836549, + "epoch": 0.5650202612712176, "grad_norm": 0.0, - "learning_rate": 8.362261266103152e-06, - "loss": 0.9039, + "learning_rate": 8.387177200502996e-06, + "loss": 0.7887, "step": 19939 }, { - "epoch": 0.5658342792281499, + "epoch": 0.5650485987134801, "grad_norm": 0.0, - "learning_rate": 8.361354604431544e-06, - "loss": 0.7566, + "learning_rate": 8.386271433438228e-06, + "loss": 0.8955, "step": 19940 }, { - "epoch": 0.5658626560726447, + "epoch": 0.5650769361557426, "grad_norm": 0.0, - "learning_rate": 8.360447956601645e-06, - "loss": 0.8686, + "learning_rate": 8.385365679966495e-06, + "loss": 0.822, "step": 19941 }, { - "epoch": 0.5658910329171396, + "epoch": 0.565105273598005, "grad_norm": 0.0, - "learning_rate": 8.359541322621118e-06, - "loss": 0.8919, + "learning_rate": 8.38445994009543e-06, + "loss": 0.8955, "step": 19942 }, { - "epoch": 0.5659194097616345, + "epoch": 0.5651336110402675, "grad_norm": 0.0, - "learning_rate": 8.358634702497618e-06, - "loss": 0.8405, + "learning_rate": 8.383554213832654e-06, + "loss": 0.8125, "step": 19943 }, { - "epoch": 0.5659477866061294, + "epoch": 0.56516194848253, "grad_norm": 0.0, - "learning_rate": 8.357728096238804e-06, + "learning_rate": 8.382648501185806e-06, "loss": 0.8572, "step": 19944 }, { - "epoch": 0.5659761634506243, + "epoch": 0.5651902859247925, "grad_norm": 0.0, - "learning_rate": 8.356821503852338e-06, - "loss": 0.9198, + "learning_rate": 8.381742802162506e-06, + "loss": 0.871, "step": 19945 }, { - "epoch": 0.5660045402951192, + "epoch": 0.5652186233670549, "grad_norm": 0.0, - "learning_rate": 8.355914925345872e-06, - "loss": 0.9045, + "learning_rate": 8.380837116770389e-06, + "loss": 0.8725, "step": 19946 }, { - "epoch": 0.5660329171396141, + "epoch": 0.5652469608093174, "grad_norm": 0.0, - "learning_rate": 8.355008360727065e-06, - "loss": 0.8857, + "learning_rate": 8.379931445017086e-06, + "loss": 0.7688, "step": 19947 }, { - "epoch": 0.566061293984109, + "epoch": 0.5652752982515798, "grad_norm": 0.0, - "learning_rate": 8.354101810003578e-06, - "loss": 0.8644, + "learning_rate": 8.379025786910217e-06, + "loss": 0.8757, "step": 19948 }, { - "epoch": 0.5660896708286038, + "epoch": 0.5653036356938422, "grad_norm": 0.0, - "learning_rate": 8.353195273183068e-06, - "loss": 0.8066, + "learning_rate": 8.378120142457415e-06, + "loss": 0.9586, "step": 19949 }, { - "epoch": 0.5661180476730987, + "epoch": 0.5653319731361047, "grad_norm": 0.0, - "learning_rate": 8.352288750273192e-06, - "loss": 0.9557, + "learning_rate": 8.377214511666313e-06, + "loss": 0.9087, "step": 19950 }, { - "epoch": 0.5661464245175937, + "epoch": 0.5653603105783672, "grad_norm": 0.0, - "learning_rate": 8.351382241281604e-06, - "loss": 0.7772, + "learning_rate": 8.376308894544533e-06, + "loss": 0.9109, "step": 19951 }, { - "epoch": 0.5661748013620885, + "epoch": 0.5653886480206296, "grad_norm": 0.0, - "learning_rate": 8.350475746215963e-06, - "loss": 0.8605, + "learning_rate": 8.37540329109971e-06, + "loss": 0.7876, "step": 19952 }, { - "epoch": 0.5662031782065834, + "epoch": 0.5654169854628921, "grad_norm": 0.0, - "learning_rate": 8.34956926508393e-06, - "loss": 0.7649, + "learning_rate": 8.374497701339466e-06, + "loss": 0.8636, "step": 19953 }, { - "epoch": 0.5662315550510784, + "epoch": 0.5654453229051546, "grad_norm": 0.0, - "learning_rate": 8.34866279789316e-06, - "loss": 0.7937, + "learning_rate": 8.37359212527143e-06, + "loss": 0.8623, "step": 19954 }, { - "epoch": 0.5662599318955732, + "epoch": 0.5654736603474171, "grad_norm": 0.0, - "learning_rate": 8.34775634465131e-06, - "loss": 0.8639, + "learning_rate": 8.372686562903233e-06, + "loss": 0.8955, "step": 19955 }, { - "epoch": 0.5662883087400681, + "epoch": 0.5655019977896795, "grad_norm": 0.0, - "learning_rate": 8.346849905366036e-06, - "loss": 0.8669, + "learning_rate": 8.3717810142425e-06, + "loss": 0.7799, "step": 19956 }, { - "epoch": 0.566316685584563, + "epoch": 0.565530335231942, "grad_norm": 0.0, - "learning_rate": 8.345943480044996e-06, - "loss": 0.8065, + "learning_rate": 8.370875479296864e-06, + "loss": 1.0375, "step": 19957 }, { - "epoch": 0.5663450624290579, + "epoch": 0.5655586726742045, "grad_norm": 0.0, - "learning_rate": 8.345037068695844e-06, - "loss": 0.8139, + "learning_rate": 8.369969958073945e-06, + "loss": 0.882, "step": 19958 }, { - "epoch": 0.5663734392735528, + "epoch": 0.5655870101164668, "grad_norm": 0.0, - "learning_rate": 8.344130671326241e-06, - "loss": 0.8512, + "learning_rate": 8.369064450581374e-06, + "loss": 0.9233, "step": 19959 }, { - "epoch": 0.5664018161180476, + "epoch": 0.5656153475587293, "grad_norm": 0.0, - "learning_rate": 8.343224287943838e-06, - "loss": 0.8489, + "learning_rate": 8.368158956826783e-06, + "loss": 1.0023, "step": 19960 }, { - "epoch": 0.5664301929625426, + "epoch": 0.5656436850009918, "grad_norm": 0.0, - "learning_rate": 8.342317918556294e-06, - "loss": 0.7688, + "learning_rate": 8.36725347681779e-06, + "loss": 0.819, "step": 19961 }, { - "epoch": 0.5664585698070375, + "epoch": 0.5656720224432543, "grad_norm": 0.0, - "learning_rate": 8.341411563171266e-06, - "loss": 0.844, + "learning_rate": 8.36634801056203e-06, + "loss": 0.9952, "step": 19962 }, { - "epoch": 0.5664869466515323, + "epoch": 0.5657003598855167, "grad_norm": 0.0, - "learning_rate": 8.34050522179641e-06, - "loss": 0.7928, + "learning_rate": 8.365442558067127e-06, + "loss": 1.0118, "step": 19963 }, { - "epoch": 0.5665153234960273, + "epoch": 0.5657286973277792, "grad_norm": 0.0, - "learning_rate": 8.339598894439379e-06, - "loss": 0.8721, + "learning_rate": 8.364537119340705e-06, + "loss": 0.7759, "step": 19964 }, { - "epoch": 0.5665437003405221, + "epoch": 0.5657570347700417, "grad_norm": 0.0, - "learning_rate": 8.338692581107833e-06, - "loss": 0.9645, + "learning_rate": 8.363631694390402e-06, + "loss": 0.8291, "step": 19965 }, { - "epoch": 0.566572077185017, + "epoch": 0.5657853722123041, "grad_norm": 0.0, - "learning_rate": 8.337786281809425e-06, - "loss": 0.8783, + "learning_rate": 8.36272628322383e-06, + "loss": 0.914, "step": 19966 }, { - "epoch": 0.5666004540295119, + "epoch": 0.5658137096545666, "grad_norm": 0.0, - "learning_rate": 8.336879996551807e-06, - "loss": 0.8816, + "learning_rate": 8.361820885848623e-06, + "loss": 0.8696, "step": 19967 }, { - "epoch": 0.5666288308740068, + "epoch": 0.5658420470968291, "grad_norm": 0.0, - "learning_rate": 8.335973725342642e-06, - "loss": 0.7732, + "learning_rate": 8.36091550227241e-06, + "loss": 0.8431, "step": 19968 }, { - "epoch": 0.5666572077185017, + "epoch": 0.5658703845390916, "grad_norm": 0.0, - "learning_rate": 8.33506746818958e-06, - "loss": 0.8365, + "learning_rate": 8.360010132502811e-06, + "loss": 0.8836, "step": 19969 }, { - "epoch": 0.5666855845629966, + "epoch": 0.5658987219813539, "grad_norm": 0.0, - "learning_rate": 8.334161225100279e-06, - "loss": 0.8846, + "learning_rate": 8.359104776547458e-06, + "loss": 0.851, "step": 19970 }, { - "epoch": 0.5667139614074915, + "epoch": 0.5659270594236164, "grad_norm": 0.0, - "learning_rate": 8.333254996082394e-06, - "loss": 0.9567, + "learning_rate": 8.358199434413977e-06, + "loss": 0.7944, "step": 19971 }, { - "epoch": 0.5667423382519864, + "epoch": 0.5659553968658789, "grad_norm": 0.0, - "learning_rate": 8.332348781143579e-06, - "loss": 0.8184, + "learning_rate": 8.357294106109988e-06, + "loss": 0.9524, "step": 19972 }, { - "epoch": 0.5667707150964812, + "epoch": 0.5659837343081413, "grad_norm": 0.0, - "learning_rate": 8.331442580291487e-06, - "loss": 0.9537, + "learning_rate": 8.356388791643126e-06, + "loss": 0.9264, "step": 19973 }, { - "epoch": 0.5667990919409762, + "epoch": 0.5660120717504038, "grad_norm": 0.0, - "learning_rate": 8.330536393533777e-06, - "loss": 0.8842, + "learning_rate": 8.355483491021007e-06, + "loss": 0.9222, "step": 19974 }, { - "epoch": 0.5668274687854711, + "epoch": 0.5660404091926663, "grad_norm": 0.0, - "learning_rate": 8.329630220878098e-06, - "loss": 0.8582, + "learning_rate": 8.35457820425126e-06, + "loss": 0.9421, "step": 19975 }, { - "epoch": 0.5668558456299659, + "epoch": 0.5660687466349287, "grad_norm": 0.0, - "learning_rate": 8.32872406233211e-06, - "loss": 0.8296, + "learning_rate": 8.353672931341514e-06, + "loss": 0.8901, "step": 19976 }, { - "epoch": 0.5668842224744608, + "epoch": 0.5660970840771912, "grad_norm": 0.0, - "learning_rate": 8.327817917903464e-06, - "loss": 0.8771, + "learning_rate": 8.35276767229939e-06, + "loss": 0.8833, "step": 19977 }, { - "epoch": 0.5669125993189558, + "epoch": 0.5661254215194537, "grad_norm": 0.0, - "learning_rate": 8.326911787599815e-06, - "loss": 0.9328, + "learning_rate": 8.351862427132516e-06, + "loss": 0.7935, "step": 19978 }, { - "epoch": 0.5669409761634506, + "epoch": 0.5661537589617162, "grad_norm": 0.0, - "learning_rate": 8.32600567142882e-06, - "loss": 0.8569, + "learning_rate": 8.350957195848521e-06, + "loss": 0.8047, "step": 19979 }, { - "epoch": 0.5669693530079455, + "epoch": 0.5661820964039785, "grad_norm": 0.0, - "learning_rate": 8.325099569398127e-06, - "loss": 0.8486, + "learning_rate": 8.350051978455023e-06, + "loss": 0.7519, "step": 19980 }, { - "epoch": 0.5669977298524405, + "epoch": 0.566210433846241, "grad_norm": 0.0, - "learning_rate": 8.324193481515394e-06, - "loss": 0.8053, + "learning_rate": 8.349146774959651e-06, + "loss": 0.8166, "step": 19981 }, { - "epoch": 0.5670261066969353, + "epoch": 0.5662387712885035, "grad_norm": 0.0, - "learning_rate": 8.323287407788274e-06, - "loss": 0.838, + "learning_rate": 8.348241585370026e-06, + "loss": 0.9521, "step": 19982 }, { - "epoch": 0.5670544835414302, + "epoch": 0.5662671087307659, "grad_norm": 0.0, - "learning_rate": 8.32238134822442e-06, - "loss": 0.8872, + "learning_rate": 8.347336409693776e-06, + "loss": 0.9518, "step": 19983 }, { - "epoch": 0.567082860385925, + "epoch": 0.5662954461730284, "grad_norm": 0.0, - "learning_rate": 8.321475302831487e-06, - "loss": 0.879, + "learning_rate": 8.34643124793853e-06, + "loss": 0.7933, "step": 19984 }, { - "epoch": 0.56711123723042, + "epoch": 0.5663237836152909, "grad_norm": 0.0, - "learning_rate": 8.320569271617127e-06, - "loss": 0.9255, + "learning_rate": 8.345526100111903e-06, + "loss": 1.0188, "step": 19985 }, { - "epoch": 0.5671396140749149, + "epoch": 0.5663521210575534, "grad_norm": 0.0, - "learning_rate": 8.319663254588995e-06, - "loss": 0.9389, + "learning_rate": 8.344620966221528e-06, + "loss": 0.8581, "step": 19986 }, { - "epoch": 0.5671679909194097, + "epoch": 0.5663804584998158, "grad_norm": 0.0, - "learning_rate": 8.318757251754743e-06, - "loss": 0.8835, + "learning_rate": 8.34371584627502e-06, + "loss": 0.7828, "step": 19987 }, { - "epoch": 0.5671963677639047, + "epoch": 0.5664087959420783, "grad_norm": 0.0, - "learning_rate": 8.317851263122023e-06, - "loss": 0.942, + "learning_rate": 8.34281074028001e-06, + "loss": 0.8157, "step": 19988 }, { - "epoch": 0.5672247446083996, + "epoch": 0.5664371333843408, "grad_norm": 0.0, - "learning_rate": 8.31694528869849e-06, - "loss": 0.7413, + "learning_rate": 8.341905648244122e-06, + "loss": 0.9095, "step": 19989 }, { - "epoch": 0.5672531214528944, + "epoch": 0.5664654708266031, "grad_norm": 0.0, - "learning_rate": 8.316039328491796e-06, - "loss": 0.9742, + "learning_rate": 8.341000570174977e-06, + "loss": 0.9547, "step": 19990 }, { - "epoch": 0.5672814982973893, + "epoch": 0.5664938082688656, "grad_norm": 0.0, - "learning_rate": 8.315133382509594e-06, - "loss": 0.8716, + "learning_rate": 8.340095506080198e-06, + "loss": 0.8902, "step": 19991 }, { - "epoch": 0.5673098751418842, + "epoch": 0.5665221457111281, "grad_norm": 0.0, - "learning_rate": 8.314227450759535e-06, - "loss": 0.9016, + "learning_rate": 8.339190455967418e-06, + "loss": 0.8424, "step": 19992 }, { - "epoch": 0.5673382519863791, + "epoch": 0.5665504831533906, "grad_norm": 0.0, - "learning_rate": 8.313321533249273e-06, - "loss": 0.8933, + "learning_rate": 8.338285419844249e-06, + "loss": 0.9204, "step": 19993 }, { - "epoch": 0.567366628830874, + "epoch": 0.566578820595653, "grad_norm": 0.0, - "learning_rate": 8.31241562998646e-06, - "loss": 0.8871, + "learning_rate": 8.33738039771832e-06, + "loss": 0.8438, "step": 19994 }, { - "epoch": 0.5673950056753689, + "epoch": 0.5666071580379155, "grad_norm": 0.0, - "learning_rate": 8.311509740978747e-06, - "loss": 1.0187, + "learning_rate": 8.336475389597252e-06, + "loss": 0.8146, "step": 19995 }, { - "epoch": 0.5674233825198638, + "epoch": 0.566635495480178, "grad_norm": 0.0, - "learning_rate": 8.310603866233788e-06, - "loss": 0.9285, + "learning_rate": 8.335570395488668e-06, + "loss": 0.8368, "step": 19996 }, { - "epoch": 0.5674517593643587, + "epoch": 0.5666638329224404, "grad_norm": 0.0, - "learning_rate": 8.309698005759233e-06, - "loss": 0.7852, + "learning_rate": 8.3346654154002e-06, + "loss": 0.8961, "step": 19997 }, { - "epoch": 0.5674801362088536, + "epoch": 0.5666921703647029, "grad_norm": 0.0, - "learning_rate": 8.308792159562732e-06, - "loss": 0.7735, + "learning_rate": 8.333760449339456e-06, + "loss": 0.9328, "step": 19998 }, { - "epoch": 0.5675085130533485, + "epoch": 0.5667205078069654, "grad_norm": 0.0, - "learning_rate": 8.307886327651943e-06, - "loss": 0.8926, + "learning_rate": 8.332855497314068e-06, + "loss": 0.8289, "step": 19999 }, { - "epoch": 0.5675368898978433, + "epoch": 0.5667488452492278, "grad_norm": 0.0, - "learning_rate": 8.306980510034515e-06, - "loss": 0.8493, + "learning_rate": 8.33195055933166e-06, + "loss": 0.8723, "step": 20000 }, { - "epoch": 0.5675652667423382, + "epoch": 0.5667771826914902, "grad_norm": 0.0, - "learning_rate": 8.3060747067181e-06, - "loss": 0.8758, + "learning_rate": 8.33104563539985e-06, + "loss": 0.942, "step": 20001 }, { - "epoch": 0.5675936435868332, + "epoch": 0.5668055201337527, "grad_norm": 0.0, - "learning_rate": 8.305168917710344e-06, - "loss": 0.8884, + "learning_rate": 8.330140725526264e-06, + "loss": 0.8542, "step": 20002 }, { - "epoch": 0.567622020431328, + "epoch": 0.5668338575760152, "grad_norm": 0.0, - "learning_rate": 8.304263143018906e-06, - "loss": 0.8239, + "learning_rate": 8.329235829718519e-06, + "loss": 0.9136, "step": 20003 }, { - "epoch": 0.5676503972758229, + "epoch": 0.5668621950182776, "grad_norm": 0.0, - "learning_rate": 8.303357382651432e-06, - "loss": 0.9161, + "learning_rate": 8.328330947984243e-06, + "loss": 0.9463, "step": 20004 }, { - "epoch": 0.5676787741203179, + "epoch": 0.5668905324605401, "grad_norm": 0.0, - "learning_rate": 8.302451636615574e-06, - "loss": 0.9279, + "learning_rate": 8.327426080331058e-06, + "loss": 0.8587, "step": 20005 }, { - "epoch": 0.5677071509648127, + "epoch": 0.5669188699028026, "grad_norm": 0.0, - "learning_rate": 8.301545904918986e-06, - "loss": 0.8919, + "learning_rate": 8.326521226766583e-06, + "loss": 0.8349, "step": 20006 }, { - "epoch": 0.5677355278093076, + "epoch": 0.566947207345065, "grad_norm": 0.0, - "learning_rate": 8.300640187569313e-06, - "loss": 0.8434, + "learning_rate": 8.32561638729844e-06, + "loss": 0.8399, "step": 20007 }, { - "epoch": 0.5677639046538024, + "epoch": 0.5669755447873275, "grad_norm": 0.0, - "learning_rate": 8.29973448457421e-06, - "loss": 0.875, + "learning_rate": 8.324711561934253e-06, + "loss": 0.8997, "step": 20008 }, { - "epoch": 0.5677922814982974, + "epoch": 0.56700388222959, "grad_norm": 0.0, - "learning_rate": 8.298828795941327e-06, - "loss": 0.8843, + "learning_rate": 8.323806750681641e-06, + "loss": 0.8822, "step": 20009 }, { - "epoch": 0.5678206583427923, + "epoch": 0.5670322196718525, "grad_norm": 0.0, - "learning_rate": 8.297923121678313e-06, - "loss": 0.9752, + "learning_rate": 8.322901953548232e-06, + "loss": 0.9327, "step": 20010 }, { - "epoch": 0.5678490351872871, + "epoch": 0.5670605571141148, "grad_norm": 0.0, - "learning_rate": 8.297017461792817e-06, - "loss": 0.9785, + "learning_rate": 8.321997170541638e-06, + "loss": 0.8558, "step": 20011 }, { - "epoch": 0.5678774120317821, + "epoch": 0.5670888945563773, "grad_norm": 0.0, - "learning_rate": 8.296111816292494e-06, - "loss": 0.9004, + "learning_rate": 8.321092401669484e-06, + "loss": 0.8271, "step": 20012 }, { - "epoch": 0.567905788876277, + "epoch": 0.5671172319986398, "grad_norm": 0.0, - "learning_rate": 8.29520618518499e-06, - "loss": 0.9974, + "learning_rate": 8.320187646939397e-06, + "loss": 0.9402, "step": 20013 }, { - "epoch": 0.5679341657207718, + "epoch": 0.5671455694409022, "grad_norm": 0.0, - "learning_rate": 8.294300568477951e-06, - "loss": 0.9089, + "learning_rate": 8.31928290635899e-06, + "loss": 0.9096, "step": 20014 }, { - "epoch": 0.5679625425652668, + "epoch": 0.5671739068831647, "grad_norm": 0.0, - "learning_rate": 8.293394966179039e-06, - "loss": 0.8406, + "learning_rate": 8.318378179935888e-06, + "loss": 0.8872, "step": 20015 }, { - "epoch": 0.5679909194097617, + "epoch": 0.5672022443254272, "grad_norm": 0.0, - "learning_rate": 8.292489378295892e-06, - "loss": 0.9082, + "learning_rate": 8.317473467677711e-06, + "loss": 0.8403, "step": 20016 }, { - "epoch": 0.5680192962542565, + "epoch": 0.5672305817676897, "grad_norm": 0.0, - "learning_rate": 8.291583804836167e-06, - "loss": 0.8114, + "learning_rate": 8.316568769592078e-06, + "loss": 0.8235, "step": 20017 }, { - "epoch": 0.5680476730987514, + "epoch": 0.5672589192099521, "grad_norm": 0.0, - "learning_rate": 8.29067824580751e-06, - "loss": 0.8204, + "learning_rate": 8.315664085686614e-06, + "loss": 0.8157, "step": 20018 }, { - "epoch": 0.5680760499432463, + "epoch": 0.5672872566522146, "grad_norm": 0.0, - "learning_rate": 8.289772701217567e-06, - "loss": 0.8165, + "learning_rate": 8.314759415968936e-06, + "loss": 0.8582, "step": 20019 }, { - "epoch": 0.5681044267877412, + "epoch": 0.5673155940944771, "grad_norm": 0.0, - "learning_rate": 8.288867171073995e-06, - "loss": 0.8362, + "learning_rate": 8.313854760446664e-06, + "loss": 0.8611, "step": 20020 }, { - "epoch": 0.5681328036322361, + "epoch": 0.5673439315367395, "grad_norm": 0.0, - "learning_rate": 8.287961655384439e-06, - "loss": 0.998, + "learning_rate": 8.312950119127422e-06, + "loss": 0.8653, "step": 20021 }, { - "epoch": 0.568161180476731, + "epoch": 0.567372268979002, "grad_norm": 0.0, - "learning_rate": 8.287056154156546e-06, - "loss": 0.8272, + "learning_rate": 8.312045492018822e-06, + "loss": 0.8925, "step": 20022 }, { - "epoch": 0.5681895573212259, + "epoch": 0.5674006064212644, "grad_norm": 0.0, - "learning_rate": 8.286150667397968e-06, - "loss": 0.9354, + "learning_rate": 8.311140879128498e-06, + "loss": 0.8199, "step": 20023 }, { - "epoch": 0.5682179341657208, + "epoch": 0.5674289438635268, "grad_norm": 0.0, - "learning_rate": 8.285245195116351e-06, - "loss": 0.8433, + "learning_rate": 8.310236280464056e-06, + "loss": 0.8774, "step": 20024 }, { - "epoch": 0.5682463110102156, + "epoch": 0.5674572813057893, "grad_norm": 0.0, - "learning_rate": 8.284339737319344e-06, - "loss": 0.7349, + "learning_rate": 8.309331696033122e-06, + "loss": 0.925, "step": 20025 }, { - "epoch": 0.5682746878547106, + "epoch": 0.5674856187480518, "grad_norm": 0.0, - "learning_rate": 8.2834342940146e-06, - "loss": 0.8345, + "learning_rate": 8.308427125843318e-06, + "loss": 0.8288, "step": 20026 }, { - "epoch": 0.5683030646992054, + "epoch": 0.5675139561903143, "grad_norm": 0.0, - "learning_rate": 8.282528865209762e-06, - "loss": 0.8341, + "learning_rate": 8.307522569902256e-06, + "loss": 0.9013, "step": 20027 }, { - "epoch": 0.5683314415437003, + "epoch": 0.5675422936325767, "grad_norm": 0.0, - "learning_rate": 8.281623450912479e-06, - "loss": 0.9253, + "learning_rate": 8.306618028217561e-06, + "loss": 0.9302, "step": 20028 }, { - "epoch": 0.5683598183881953, + "epoch": 0.5675706310748392, "grad_norm": 0.0, - "learning_rate": 8.280718051130396e-06, - "loss": 0.875, + "learning_rate": 8.305713500796852e-06, + "loss": 0.8981, "step": 20029 }, { - "epoch": 0.5683881952326901, + "epoch": 0.5675989685171017, "grad_norm": 0.0, - "learning_rate": 8.279812665871171e-06, - "loss": 0.9682, + "learning_rate": 8.304808987647747e-06, + "loss": 0.8474, "step": 20030 }, { - "epoch": 0.568416572077185, + "epoch": 0.5676273059593641, "grad_norm": 0.0, - "learning_rate": 8.278907295142444e-06, - "loss": 0.8743, + "learning_rate": 8.303904488777868e-06, + "loss": 0.9597, "step": 20031 }, { - "epoch": 0.56844494892168, + "epoch": 0.5676556434016266, "grad_norm": 0.0, - "learning_rate": 8.278001938951865e-06, - "loss": 0.8647, + "learning_rate": 8.303000004194829e-06, + "loss": 0.8696, "step": 20032 }, { - "epoch": 0.5684733257661748, + "epoch": 0.567683980843889, "grad_norm": 0.0, - "learning_rate": 8.27709659730708e-06, - "loss": 0.9445, + "learning_rate": 8.302095533906248e-06, + "loss": 0.7109, "step": 20033 }, { - "epoch": 0.5685017026106697, + "epoch": 0.5677123182861515, "grad_norm": 0.0, - "learning_rate": 8.276191270215739e-06, - "loss": 0.8042, + "learning_rate": 8.301191077919753e-06, + "loss": 0.8972, "step": 20034 }, { - "epoch": 0.5685300794551645, + "epoch": 0.5677406557284139, "grad_norm": 0.0, - "learning_rate": 8.275285957685488e-06, - "loss": 0.871, + "learning_rate": 8.300286636242951e-06, + "loss": 0.9033, "step": 20035 }, { - "epoch": 0.5685584562996595, + "epoch": 0.5677689931706764, "grad_norm": 0.0, - "learning_rate": 8.274380659723971e-06, - "loss": 0.8389, + "learning_rate": 8.299382208883473e-06, + "loss": 0.8852, "step": 20036 }, { - "epoch": 0.5685868331441544, + "epoch": 0.5677973306129389, "grad_norm": 0.0, - "learning_rate": 8.27347537633884e-06, - "loss": 0.8382, + "learning_rate": 8.298477795848922e-06, + "loss": 0.8148, "step": 20037 }, { - "epoch": 0.5686152099886492, + "epoch": 0.5678256680552013, "grad_norm": 0.0, - "learning_rate": 8.272570107537742e-06, - "loss": 0.8459, + "learning_rate": 8.297573397146928e-06, + "loss": 0.8777, "step": 20038 }, { - "epoch": 0.5686435868331442, + "epoch": 0.5678540054974638, "grad_norm": 0.0, - "learning_rate": 8.271664853328319e-06, - "loss": 0.7855, + "learning_rate": 8.296669012785105e-06, + "loss": 0.9162, "step": 20039 }, { - "epoch": 0.5686719636776391, + "epoch": 0.5678823429397263, "grad_norm": 0.0, - "learning_rate": 8.270759613718222e-06, - "loss": 0.9059, + "learning_rate": 8.295764642771072e-06, + "loss": 0.8539, "step": 20040 }, { - "epoch": 0.5687003405221339, + "epoch": 0.5679106803819888, "grad_norm": 0.0, - "learning_rate": 8.269854388715098e-06, - "loss": 0.7835, + "learning_rate": 8.294860287112444e-06, + "loss": 0.9138, "step": 20041 }, { - "epoch": 0.5687287173666288, + "epoch": 0.5679390178242512, "grad_norm": 0.0, - "learning_rate": 8.268949178326589e-06, - "loss": 0.7972, + "learning_rate": 8.293955945816846e-06, + "loss": 0.8994, "step": 20042 }, { - "epoch": 0.5687570942111237, + "epoch": 0.5679673552665137, "grad_norm": 0.0, - "learning_rate": 8.268043982560345e-06, - "loss": 0.8425, + "learning_rate": 8.293051618891885e-06, + "loss": 0.8705, "step": 20043 }, { - "epoch": 0.5687854710556186, + "epoch": 0.5679956927087761, "grad_norm": 0.0, - "learning_rate": 8.267138801424007e-06, - "loss": 0.8173, + "learning_rate": 8.292147306345191e-06, + "loss": 0.8798, "step": 20044 }, { - "epoch": 0.5688138479001135, + "epoch": 0.5680240301510385, "grad_norm": 0.0, - "learning_rate": 8.266233634925232e-06, - "loss": 0.9437, + "learning_rate": 8.291243008184368e-06, + "loss": 0.8749, "step": 20045 }, { - "epoch": 0.5688422247446084, + "epoch": 0.568052367593301, "grad_norm": 0.0, - "learning_rate": 8.265328483071655e-06, - "loss": 0.9696, + "learning_rate": 8.29033872441704e-06, + "loss": 0.7817, "step": 20046 }, { - "epoch": 0.5688706015891033, + "epoch": 0.5680807050355635, "grad_norm": 0.0, - "learning_rate": 8.264423345870928e-06, - "loss": 0.9821, + "learning_rate": 8.289434455050826e-06, + "loss": 0.9671, "step": 20047 }, { - "epoch": 0.5688989784335982, + "epoch": 0.5681090424778259, "grad_norm": 0.0, - "learning_rate": 8.263518223330698e-06, - "loss": 0.9003, + "learning_rate": 8.28853020009334e-06, + "loss": 0.9596, "step": 20048 }, { - "epoch": 0.5689273552780931, + "epoch": 0.5681373799200884, "grad_norm": 0.0, - "learning_rate": 8.262613115458604e-06, - "loss": 0.8299, + "learning_rate": 8.2876259595522e-06, + "loss": 0.9246, "step": 20049 }, { - "epoch": 0.568955732122588, + "epoch": 0.5681657173623509, "grad_norm": 0.0, - "learning_rate": 8.261708022262295e-06, - "loss": 0.897, + "learning_rate": 8.286721733435023e-06, + "loss": 0.8526, "step": 20050 }, { - "epoch": 0.5689841089670828, + "epoch": 0.5681940548046134, "grad_norm": 0.0, - "learning_rate": 8.26080294374942e-06, - "loss": 0.9781, + "learning_rate": 8.285817521749423e-06, + "loss": 0.8784, "step": 20051 }, { - "epoch": 0.5690124858115777, + "epoch": 0.5682223922468758, "grad_norm": 0.0, - "learning_rate": 8.259897879927616e-06, - "loss": 0.8192, + "learning_rate": 8.284913324503021e-06, + "loss": 0.9891, "step": 20052 }, { - "epoch": 0.5690408626560727, + "epoch": 0.5682507296891383, "grad_norm": 0.0, - "learning_rate": 8.258992830804538e-06, - "loss": 0.7958, + "learning_rate": 8.28400914170343e-06, + "loss": 0.7858, "step": 20053 }, { - "epoch": 0.5690692395005675, + "epoch": 0.5682790671314008, "grad_norm": 0.0, - "learning_rate": 8.258087796387823e-06, - "loss": 0.8515, + "learning_rate": 8.283104973358265e-06, + "loss": 0.8528, "step": 20054 }, { - "epoch": 0.5690976163450624, + "epoch": 0.5683074045736631, "grad_norm": 0.0, - "learning_rate": 8.257182776685118e-06, - "loss": 0.6908, + "learning_rate": 8.28220081947515e-06, + "loss": 0.8151, "step": 20055 }, { - "epoch": 0.5691259931895574, + "epoch": 0.5683357420159256, "grad_norm": 0.0, - "learning_rate": 8.256277771704069e-06, - "loss": 0.8519, + "learning_rate": 8.28129668006169e-06, + "loss": 0.889, "step": 20056 }, { - "epoch": 0.5691543700340522, + "epoch": 0.5683640794581881, "grad_norm": 0.0, - "learning_rate": 8.255372781452322e-06, - "loss": 0.8407, + "learning_rate": 8.280392555125506e-06, + "loss": 0.8216, "step": 20057 }, { - "epoch": 0.5691827468785471, + "epoch": 0.5683924169004506, "grad_norm": 0.0, - "learning_rate": 8.254467805937518e-06, - "loss": 0.8293, + "learning_rate": 8.279488444674221e-06, + "loss": 0.861, "step": 20058 }, { - "epoch": 0.569211123723042, + "epoch": 0.568420754342713, "grad_norm": 0.0, - "learning_rate": 8.2535628451673e-06, - "loss": 0.812, + "learning_rate": 8.278584348715436e-06, + "loss": 0.8285, "step": 20059 }, { - "epoch": 0.5692395005675369, + "epoch": 0.5684490917849755, "grad_norm": 0.0, - "learning_rate": 8.252657899149316e-06, - "loss": 0.9187, + "learning_rate": 8.27768026725678e-06, + "loss": 0.8779, "step": 20060 }, { - "epoch": 0.5692678774120318, + "epoch": 0.568477429227238, "grad_norm": 0.0, - "learning_rate": 8.251752967891211e-06, - "loss": 0.9028, + "learning_rate": 8.276776200305858e-06, + "loss": 0.8524, "step": 20061 }, { - "epoch": 0.5692962542565266, + "epoch": 0.5685057666695004, "grad_norm": 0.0, - "learning_rate": 8.250848051400628e-06, - "loss": 0.9074, + "learning_rate": 8.27587214787029e-06, + "loss": 0.8834, "step": 20062 }, { - "epoch": 0.5693246311010216, + "epoch": 0.5685341041117629, "grad_norm": 0.0, - "learning_rate": 8.249943149685209e-06, - "loss": 0.8756, + "learning_rate": 8.274968109957696e-06, + "loss": 0.9357, "step": 20063 }, { - "epoch": 0.5693530079455165, + "epoch": 0.5685624415540254, "grad_norm": 0.0, - "learning_rate": 8.2490382627526e-06, - "loss": 0.8967, + "learning_rate": 8.274064086575682e-06, + "loss": 0.9681, "step": 20064 }, { - "epoch": 0.5693813847900113, + "epoch": 0.5685907789962878, "grad_norm": 0.0, - "learning_rate": 8.248133390610445e-06, - "loss": 0.9267, + "learning_rate": 8.27316007773187e-06, + "loss": 0.6922, "step": 20065 }, { - "epoch": 0.5694097616345063, + "epoch": 0.5686191164385502, "grad_norm": 0.0, - "learning_rate": 8.247228533266381e-06, - "loss": 0.9227, + "learning_rate": 8.272256083433869e-06, + "loss": 0.8337, "step": 20066 }, { - "epoch": 0.5694381384790012, + "epoch": 0.5686474538808127, "grad_norm": 0.0, - "learning_rate": 8.24632369072806e-06, - "loss": 0.8578, + "learning_rate": 8.271352103689296e-06, + "loss": 0.9109, "step": 20067 }, { - "epoch": 0.569466515323496, + "epoch": 0.5686757913230752, "grad_norm": 0.0, - "learning_rate": 8.245418863003123e-06, - "loss": 0.9749, + "learning_rate": 8.270448138505771e-06, + "loss": 0.999, "step": 20068 }, { - "epoch": 0.5694948921679909, + "epoch": 0.5687041287653376, "grad_norm": 0.0, - "learning_rate": 8.244514050099207e-06, - "loss": 0.8823, + "learning_rate": 8.2695441878909e-06, + "loss": 0.8709, "step": 20069 }, { - "epoch": 0.5695232690124858, + "epoch": 0.5687324662076001, "grad_norm": 0.0, - "learning_rate": 8.243609252023964e-06, - "loss": 0.9039, + "learning_rate": 8.2686402518523e-06, + "loss": 0.786, "step": 20070 }, { - "epoch": 0.5695516458569807, + "epoch": 0.5687608036498626, "grad_norm": 0.0, - "learning_rate": 8.242704468785032e-06, - "loss": 0.9978, + "learning_rate": 8.26773633039759e-06, + "loss": 0.8656, "step": 20071 }, { - "epoch": 0.5695800227014756, + "epoch": 0.568789141092125, "grad_norm": 0.0, - "learning_rate": 8.241799700390052e-06, - "loss": 0.81, + "learning_rate": 8.266832423534376e-06, + "loss": 0.882, "step": 20072 }, { - "epoch": 0.5696083995459705, + "epoch": 0.5688174785343875, "grad_norm": 0.0, - "learning_rate": 8.24089494684667e-06, - "loss": 0.8529, + "learning_rate": 8.265928531270279e-06, + "loss": 0.9589, "step": 20073 }, { - "epoch": 0.5696367763904654, + "epoch": 0.56884581597665, "grad_norm": 0.0, - "learning_rate": 8.23999020816253e-06, - "loss": 0.806, + "learning_rate": 8.265024653612908e-06, + "loss": 0.9117, "step": 20074 }, { - "epoch": 0.5696651532349603, + "epoch": 0.5688741534189125, "grad_norm": 0.0, - "learning_rate": 8.239085484345265e-06, - "loss": 0.9036, + "learning_rate": 8.264120790569877e-06, + "loss": 0.8674, "step": 20075 }, { - "epoch": 0.5696935300794551, + "epoch": 0.5689024908611748, "grad_norm": 0.0, - "learning_rate": 8.23818077540253e-06, - "loss": 0.8818, + "learning_rate": 8.263216942148806e-06, + "loss": 0.9855, "step": 20076 }, { - "epoch": 0.5697219069239501, + "epoch": 0.5689308283034373, "grad_norm": 0.0, - "learning_rate": 8.237276081341959e-06, - "loss": 0.7996, + "learning_rate": 8.2623131083573e-06, + "loss": 0.9244, "step": 20077 }, { - "epoch": 0.5697502837684449, + "epoch": 0.5689591657456998, "grad_norm": 0.0, - "learning_rate": 8.236371402171198e-06, - "loss": 0.8601, + "learning_rate": 8.261409289202976e-06, + "loss": 0.8968, "step": 20078 }, { - "epoch": 0.5697786606129398, + "epoch": 0.5689875031879622, "grad_norm": 0.0, - "learning_rate": 8.235466737897886e-06, - "loss": 0.9178, + "learning_rate": 8.260505484693449e-06, + "loss": 0.8937, "step": 20079 }, { - "epoch": 0.5698070374574348, + "epoch": 0.5690158406302247, "grad_norm": 0.0, - "learning_rate": 8.234562088529665e-06, - "loss": 0.8898, + "learning_rate": 8.259601694836328e-06, + "loss": 0.8347, "step": 20080 }, { - "epoch": 0.5698354143019296, + "epoch": 0.5690441780724872, "grad_norm": 0.0, - "learning_rate": 8.23365745407418e-06, - "loss": 0.8932, + "learning_rate": 8.258697919639234e-06, + "loss": 0.7694, "step": 20081 }, { - "epoch": 0.5698637911464245, + "epoch": 0.5690725155147497, "grad_norm": 0.0, - "learning_rate": 8.232752834539068e-06, - "loss": 0.9774, + "learning_rate": 8.257794159109768e-06, + "loss": 0.8257, "step": 20082 }, { - "epoch": 0.5698921679909195, + "epoch": 0.5691008529570121, "grad_norm": 0.0, - "learning_rate": 8.23184822993197e-06, - "loss": 0.9531, + "learning_rate": 8.256890413255549e-06, + "loss": 0.8455, "step": 20083 }, { - "epoch": 0.5699205448354143, + "epoch": 0.5691291903992746, "grad_norm": 0.0, - "learning_rate": 8.230943640260533e-06, - "loss": 0.935, + "learning_rate": 8.255986682084194e-06, + "loss": 0.8802, "step": 20084 }, { - "epoch": 0.5699489216799092, + "epoch": 0.5691575278415371, "grad_norm": 0.0, - "learning_rate": 8.230039065532395e-06, - "loss": 0.817, + "learning_rate": 8.255082965603307e-06, + "loss": 0.7696, "step": 20085 }, { - "epoch": 0.569977298524404, + "epoch": 0.5691858652837994, "grad_norm": 0.0, - "learning_rate": 8.229134505755192e-06, - "loss": 0.9216, + "learning_rate": 8.254179263820503e-06, + "loss": 0.8628, "step": 20086 }, { - "epoch": 0.570005675368899, + "epoch": 0.5692142027260619, "grad_norm": 0.0, - "learning_rate": 8.228229960936574e-06, - "loss": 0.9606, + "learning_rate": 8.253275576743397e-06, + "loss": 0.8012, "step": 20087 }, { - "epoch": 0.5700340522133939, + "epoch": 0.5692425401683244, "grad_norm": 0.0, - "learning_rate": 8.227325431084176e-06, - "loss": 0.9147, + "learning_rate": 8.252371904379598e-06, + "loss": 0.8811, "step": 20088 }, { - "epoch": 0.5700624290578887, + "epoch": 0.5692708776105869, "grad_norm": 0.0, - "learning_rate": 8.22642091620564e-06, - "loss": 0.7551, + "learning_rate": 8.251468246736725e-06, + "loss": 0.8235, "step": 20089 }, { - "epoch": 0.5700908059023837, + "epoch": 0.5692992150528493, "grad_norm": 0.0, - "learning_rate": 8.225516416308601e-06, - "loss": 0.9154, + "learning_rate": 8.250564603822378e-06, + "loss": 0.8578, "step": 20090 }, { - "epoch": 0.5701191827468786, + "epoch": 0.5693275524951118, "grad_norm": 0.0, - "learning_rate": 8.22461193140071e-06, - "loss": 0.7389, + "learning_rate": 8.249660975644176e-06, + "loss": 0.8073, "step": 20091 }, { - "epoch": 0.5701475595913734, + "epoch": 0.5693558899373743, "grad_norm": 0.0, - "learning_rate": 8.223707461489602e-06, - "loss": 0.7355, + "learning_rate": 8.248757362209732e-06, + "loss": 0.8676, "step": 20092 }, { - "epoch": 0.5701759364358683, + "epoch": 0.5693842273796367, "grad_norm": 0.0, - "learning_rate": 8.222803006582915e-06, - "loss": 0.9361, + "learning_rate": 8.247853763526652e-06, + "loss": 0.7514, "step": 20093 }, { - "epoch": 0.5702043132803633, + "epoch": 0.5694125648218992, "grad_norm": 0.0, - "learning_rate": 8.221898566688294e-06, - "loss": 0.8297, + "learning_rate": 8.246950179602554e-06, + "loss": 0.8861, "step": 20094 }, { - "epoch": 0.5702326901248581, + "epoch": 0.5694409022641617, "grad_norm": 0.0, - "learning_rate": 8.220994141813373e-06, - "loss": 0.9845, + "learning_rate": 8.246046610445044e-06, + "loss": 0.9029, "step": 20095 }, { - "epoch": 0.570261066969353, + "epoch": 0.569469239706424, "grad_norm": 0.0, - "learning_rate": 8.220089731965794e-06, - "loss": 0.7707, + "learning_rate": 8.245143056061732e-06, + "loss": 0.8008, "step": 20096 }, { - "epoch": 0.5702894438138479, + "epoch": 0.5694975771486865, "grad_norm": 0.0, - "learning_rate": 8.219185337153199e-06, - "loss": 0.8861, + "learning_rate": 8.244239516460238e-06, + "loss": 0.818, "step": 20097 }, { - "epoch": 0.5703178206583428, + "epoch": 0.569525914590949, "grad_norm": 0.0, - "learning_rate": 8.218280957383225e-06, - "loss": 0.8417, + "learning_rate": 8.243335991648163e-06, + "loss": 0.7404, "step": 20098 }, { - "epoch": 0.5703461975028377, + "epoch": 0.5695542520332115, "grad_norm": 0.0, - "learning_rate": 8.217376592663513e-06, - "loss": 0.8711, + "learning_rate": 8.242432481633119e-06, + "loss": 0.8756, "step": 20099 }, { - "epoch": 0.5703745743473326, + "epoch": 0.5695825894754739, "grad_norm": 0.0, - "learning_rate": 8.2164722430017e-06, - "loss": 0.7962, + "learning_rate": 8.241528986422722e-06, + "loss": 0.9243, "step": 20100 }, { - "epoch": 0.5704029511918275, + "epoch": 0.5696109269177364, "grad_norm": 0.0, - "learning_rate": 8.215567908405424e-06, - "loss": 0.8359, + "learning_rate": 8.240625506024576e-06, + "loss": 1.0107, "step": 20101 }, { - "epoch": 0.5704313280363223, + "epoch": 0.5696392643599989, "grad_norm": 0.0, - "learning_rate": 8.21466358888233e-06, - "loss": 0.8777, + "learning_rate": 8.239722040446301e-06, + "loss": 0.7875, "step": 20102 }, { - "epoch": 0.5704597048808172, + "epoch": 0.5696676018022613, "grad_norm": 0.0, - "learning_rate": 8.213759284440049e-06, - "loss": 0.7977, + "learning_rate": 8.238818589695496e-06, + "loss": 0.7709, "step": 20103 }, { - "epoch": 0.5704880817253122, + "epoch": 0.5696959392445238, "grad_norm": 0.0, - "learning_rate": 8.212854995086225e-06, - "loss": 0.826, + "learning_rate": 8.237915153779774e-06, + "loss": 0.8543, "step": 20104 }, { - "epoch": 0.570516458569807, + "epoch": 0.5697242766867863, "grad_norm": 0.0, - "learning_rate": 8.211950720828496e-06, - "loss": 0.8206, + "learning_rate": 8.237011732706751e-06, + "loss": 0.837, "step": 20105 }, { - "epoch": 0.5705448354143019, + "epoch": 0.5697526141290488, "grad_norm": 0.0, - "learning_rate": 8.211046461674497e-06, - "loss": 0.8243, + "learning_rate": 8.23610832648403e-06, + "loss": 0.9477, "step": 20106 }, { - "epoch": 0.5705732122587969, + "epoch": 0.5697809515713111, "grad_norm": 0.0, - "learning_rate": 8.21014221763187e-06, - "loss": 0.8226, + "learning_rate": 8.235204935119223e-06, + "loss": 0.8574, "step": 20107 }, { - "epoch": 0.5706015891032917, + "epoch": 0.5698092890135736, "grad_norm": 0.0, - "learning_rate": 8.209237988708254e-06, - "loss": 0.7995, + "learning_rate": 8.234301558619948e-06, + "loss": 0.7752, "step": 20108 }, { - "epoch": 0.5706299659477866, + "epoch": 0.5698376264558361, "grad_norm": 0.0, - "learning_rate": 8.208333774911284e-06, - "loss": 0.8886, + "learning_rate": 8.2333981969938e-06, + "loss": 0.8258, "step": 20109 }, { - "epoch": 0.5706583427922814, + "epoch": 0.5698659638980985, "grad_norm": 0.0, - "learning_rate": 8.2074295762486e-06, - "loss": 0.8375, + "learning_rate": 8.232494850248398e-06, + "loss": 0.9484, "step": 20110 }, { - "epoch": 0.5706867196367764, + "epoch": 0.569894301340361, "grad_norm": 0.0, - "learning_rate": 8.20652539272784e-06, - "loss": 0.8236, + "learning_rate": 8.231591518391345e-06, + "loss": 0.9123, "step": 20111 }, { - "epoch": 0.5707150964812713, + "epoch": 0.5699226387826235, "grad_norm": 0.0, - "learning_rate": 8.20562122435664e-06, - "loss": 0.9503, + "learning_rate": 8.230688201430253e-06, + "loss": 0.8665, "step": 20112 }, { - "epoch": 0.5707434733257661, + "epoch": 0.569950976224886, "grad_norm": 0.0, - "learning_rate": 8.204717071142635e-06, - "loss": 0.9234, + "learning_rate": 8.229784899372734e-06, + "loss": 0.7996, "step": 20113 }, { - "epoch": 0.5707718501702611, + "epoch": 0.5699793136671484, "grad_norm": 0.0, - "learning_rate": 8.203812933093469e-06, - "loss": 0.9425, + "learning_rate": 8.228881612226391e-06, + "loss": 0.95, "step": 20114 }, { - "epoch": 0.570800227014756, + "epoch": 0.5700076511094109, "grad_norm": 0.0, - "learning_rate": 8.202908810216775e-06, - "loss": 0.9304, + "learning_rate": 8.227978339998838e-06, + "loss": 0.9644, "step": 20115 }, { - "epoch": 0.5708286038592508, + "epoch": 0.5700359885516734, "grad_norm": 0.0, - "learning_rate": 8.20200470252019e-06, - "loss": 0.8814, + "learning_rate": 8.227075082697685e-06, + "loss": 0.8384, "step": 20116 }, { - "epoch": 0.5708569807037458, + "epoch": 0.5700643259939357, "grad_norm": 0.0, - "learning_rate": 8.201100610011355e-06, - "loss": 0.746, + "learning_rate": 8.226171840330531e-06, + "loss": 0.802, "step": 20117 }, { - "epoch": 0.5708853575482407, + "epoch": 0.5700926634361982, "grad_norm": 0.0, - "learning_rate": 8.200196532697901e-06, - "loss": 0.8648, + "learning_rate": 8.225268612904994e-06, + "loss": 0.7895, "step": 20118 }, { - "epoch": 0.5709137343927355, + "epoch": 0.5701210008784607, "grad_norm": 0.0, - "learning_rate": 8.199292470587468e-06, - "loss": 0.8534, + "learning_rate": 8.224365400428676e-06, + "loss": 0.8523, "step": 20119 }, { - "epoch": 0.5709421112372304, + "epoch": 0.5701493383207231, "grad_norm": 0.0, - "learning_rate": 8.198388423687694e-06, - "loss": 0.8586, + "learning_rate": 8.223462202909187e-06, + "loss": 0.8878, "step": 20120 }, { - "epoch": 0.5709704880817253, + "epoch": 0.5701776757629856, "grad_norm": 0.0, - "learning_rate": 8.19748439200621e-06, - "loss": 0.7806, + "learning_rate": 8.22255902035414e-06, + "loss": 0.9877, "step": 20121 }, { - "epoch": 0.5709988649262202, + "epoch": 0.5702060132052481, "grad_norm": 0.0, - "learning_rate": 8.19658037555066e-06, - "loss": 0.9643, + "learning_rate": 8.221655852771134e-06, + "loss": 0.8687, "step": 20122 }, { - "epoch": 0.5710272417707151, + "epoch": 0.5702343506475106, "grad_norm": 0.0, - "learning_rate": 8.195676374328679e-06, - "loss": 0.8096, + "learning_rate": 8.220752700167786e-06, + "loss": 0.8983, "step": 20123 }, { - "epoch": 0.57105561861521, + "epoch": 0.570262688089773, "grad_norm": 0.0, - "learning_rate": 8.194772388347896e-06, - "loss": 0.87, + "learning_rate": 8.219849562551695e-06, + "loss": 0.8181, "step": 20124 }, { - "epoch": 0.5710839954597049, + "epoch": 0.5702910255320355, "grad_norm": 0.0, - "learning_rate": 8.193868417615954e-06, - "loss": 0.8695, + "learning_rate": 8.21894643993047e-06, + "loss": 0.8033, "step": 20125 }, { - "epoch": 0.5711123723041998, + "epoch": 0.570319362974298, "grad_norm": 0.0, - "learning_rate": 8.192964462140487e-06, - "loss": 0.8813, + "learning_rate": 8.218043332311724e-06, + "loss": 0.882, "step": 20126 }, { - "epoch": 0.5711407491486946, + "epoch": 0.5703477004165604, "grad_norm": 0.0, - "learning_rate": 8.19206052192913e-06, - "loss": 0.8589, + "learning_rate": 8.217140239703059e-06, + "loss": 0.7478, "step": 20127 }, { - "epoch": 0.5711691259931896, + "epoch": 0.5703760378588228, "grad_norm": 0.0, - "learning_rate": 8.19115659698952e-06, - "loss": 0.8864, + "learning_rate": 8.216237162112085e-06, + "loss": 0.8309, "step": 20128 }, { - "epoch": 0.5711975028376844, + "epoch": 0.5704043753010853, "grad_norm": 0.0, - "learning_rate": 8.19025268732929e-06, - "loss": 1.0099, + "learning_rate": 8.215334099546411e-06, + "loss": 0.8864, "step": 20129 }, { - "epoch": 0.5712258796821793, + "epoch": 0.5704327127433478, "grad_norm": 0.0, - "learning_rate": 8.189348792956076e-06, - "loss": 0.9211, + "learning_rate": 8.214431052013636e-06, + "loss": 0.8551, "step": 20130 }, { - "epoch": 0.5712542565266743, + "epoch": 0.5704610501856102, "grad_norm": 0.0, - "learning_rate": 8.188444913877518e-06, - "loss": 1.0181, + "learning_rate": 8.213528019521372e-06, + "loss": 0.8267, "step": 20131 }, { - "epoch": 0.5712826333711691, + "epoch": 0.5704893876278727, "grad_norm": 0.0, - "learning_rate": 8.187541050101245e-06, - "loss": 0.8907, + "learning_rate": 8.212625002077223e-06, + "loss": 0.8381, "step": 20132 }, { - "epoch": 0.571311010215664, + "epoch": 0.5705177250701352, "grad_norm": 0.0, - "learning_rate": 8.186637201634892e-06, - "loss": 0.8091, + "learning_rate": 8.2117219996888e-06, + "loss": 0.7677, "step": 20133 }, { - "epoch": 0.571339387060159, + "epoch": 0.5705460625123976, "grad_norm": 0.0, - "learning_rate": 8.185733368486099e-06, - "loss": 0.8372, + "learning_rate": 8.21081901236371e-06, + "loss": 0.8886, "step": 20134 }, { - "epoch": 0.5713677639046538, + "epoch": 0.5705743999546601, "grad_norm": 0.0, - "learning_rate": 8.184829550662498e-06, - "loss": 1.0092, + "learning_rate": 8.20991604010955e-06, + "loss": 0.8383, "step": 20135 }, { - "epoch": 0.5713961407491487, + "epoch": 0.5706027373969226, "grad_norm": 0.0, - "learning_rate": 8.18392574817172e-06, - "loss": 0.8702, + "learning_rate": 8.209013082933932e-06, + "loss": 0.8495, "step": 20136 }, { - "epoch": 0.5714245175936435, + "epoch": 0.570631074839185, "grad_norm": 0.0, - "learning_rate": 8.183021961021402e-06, - "loss": 0.9171, + "learning_rate": 8.208110140844467e-06, + "loss": 0.8083, "step": 20137 }, { - "epoch": 0.5714528944381385, + "epoch": 0.5706594122814475, "grad_norm": 0.0, - "learning_rate": 8.182118189219183e-06, - "loss": 0.8976, + "learning_rate": 8.20720721384875e-06, + "loss": 0.8174, "step": 20138 }, { - "epoch": 0.5714812712826334, + "epoch": 0.57068774972371, "grad_norm": 0.0, - "learning_rate": 8.181214432772694e-06, - "loss": 0.8696, + "learning_rate": 8.206304301954397e-06, + "loss": 0.7749, "step": 20139 }, { - "epoch": 0.5715096481271282, + "epoch": 0.5707160871659724, "grad_norm": 0.0, - "learning_rate": 8.180310691689569e-06, - "loss": 0.8896, + "learning_rate": 8.205401405169007e-06, + "loss": 0.8742, "step": 20140 }, { - "epoch": 0.5715380249716232, + "epoch": 0.5707444246082348, "grad_norm": 0.0, - "learning_rate": 8.179406965977438e-06, - "loss": 0.89, + "learning_rate": 8.204498523500185e-06, + "loss": 0.9115, "step": 20141 }, { - "epoch": 0.5715664018161181, + "epoch": 0.5707727620504973, "grad_norm": 0.0, - "learning_rate": 8.17850325564394e-06, - "loss": 0.8839, + "learning_rate": 8.203595656955546e-06, + "loss": 0.9522, "step": 20142 }, { - "epoch": 0.5715947786606129, + "epoch": 0.5708010994927598, "grad_norm": 0.0, - "learning_rate": 8.177599560696707e-06, - "loss": 0.8628, + "learning_rate": 8.20269280554268e-06, + "loss": 0.9793, "step": 20143 }, { - "epoch": 0.5716231555051078, + "epoch": 0.5708294369350222, "grad_norm": 0.0, - "learning_rate": 8.176695881143371e-06, - "loss": 0.8739, + "learning_rate": 8.201789969269202e-06, + "loss": 0.9137, "step": 20144 }, { - "epoch": 0.5716515323496028, + "epoch": 0.5708577743772847, "grad_norm": 0.0, - "learning_rate": 8.17579221699157e-06, - "loss": 0.888, + "learning_rate": 8.200887148142716e-06, + "loss": 0.9247, "step": 20145 }, { - "epoch": 0.5716799091940976, + "epoch": 0.5708861118195472, "grad_norm": 0.0, - "learning_rate": 8.174888568248931e-06, - "loss": 0.8631, + "learning_rate": 8.199984342170823e-06, + "loss": 0.8739, "step": 20146 }, { - "epoch": 0.5717082860385925, + "epoch": 0.5709144492618097, "grad_norm": 0.0, - "learning_rate": 8.173984934923091e-06, - "loss": 0.9469, + "learning_rate": 8.199081551361137e-06, + "loss": 0.7416, "step": 20147 }, { - "epoch": 0.5717366628830874, + "epoch": 0.5709427867040721, "grad_norm": 0.0, - "learning_rate": 8.173081317021684e-06, - "loss": 0.6925, + "learning_rate": 8.198178775721249e-06, + "loss": 0.8636, "step": 20148 }, { - "epoch": 0.5717650397275823, + "epoch": 0.5709711241463346, "grad_norm": 0.0, - "learning_rate": 8.17217771455234e-06, - "loss": 0.9021, + "learning_rate": 8.197276015258773e-06, + "loss": 0.79, "step": 20149 }, { - "epoch": 0.5717934165720772, + "epoch": 0.570999461588597, "grad_norm": 0.0, - "learning_rate": 8.171274127522692e-06, - "loss": 0.8752, + "learning_rate": 8.196373269981311e-06, + "loss": 0.8669, "step": 20150 }, { - "epoch": 0.571821793416572, + "epoch": 0.5710277990308594, "grad_norm": 0.0, - "learning_rate": 8.170370555940375e-06, - "loss": 0.9186, + "learning_rate": 8.195470539896464e-06, + "loss": 0.8178, "step": 20151 }, { - "epoch": 0.571850170261067, + "epoch": 0.5710561364731219, "grad_norm": 0.0, - "learning_rate": 8.169466999813016e-06, - "loss": 0.8066, + "learning_rate": 8.194567825011843e-06, + "loss": 0.832, "step": 20152 }, { - "epoch": 0.5718785471055619, + "epoch": 0.5710844739153844, "grad_norm": 0.0, - "learning_rate": 8.168563459148257e-06, - "loss": 0.7665, + "learning_rate": 8.193665125335043e-06, + "loss": 0.9144, "step": 20153 }, { - "epoch": 0.5719069239500567, + "epoch": 0.5711128113576469, "grad_norm": 0.0, - "learning_rate": 8.167659933953723e-06, - "loss": 0.9104, + "learning_rate": 8.192762440873675e-06, + "loss": 0.8036, "step": 20154 }, { - "epoch": 0.5719353007945517, + "epoch": 0.5711411487999093, "grad_norm": 0.0, - "learning_rate": 8.166756424237047e-06, - "loss": 0.8349, + "learning_rate": 8.191859771635343e-06, + "loss": 0.9486, "step": 20155 }, { - "epoch": 0.5719636776390465, + "epoch": 0.5711694862421718, "grad_norm": 0.0, - "learning_rate": 8.165852930005865e-06, - "loss": 0.7948, + "learning_rate": 8.190957117627642e-06, + "loss": 0.9444, "step": 20156 }, { - "epoch": 0.5719920544835414, + "epoch": 0.5711978236844343, "grad_norm": 0.0, - "learning_rate": 8.1649494512678e-06, - "loss": 0.8519, + "learning_rate": 8.190054478858183e-06, + "loss": 0.9266, "step": 20157 }, { - "epoch": 0.5720204313280364, + "epoch": 0.5712261611266967, "grad_norm": 0.0, - "learning_rate": 8.164045988030495e-06, - "loss": 0.8513, + "learning_rate": 8.189151855334569e-06, + "loss": 0.8922, "step": 20158 }, { - "epoch": 0.5720488081725312, + "epoch": 0.5712544985689592, "grad_norm": 0.0, - "learning_rate": 8.163142540301575e-06, - "loss": 0.8241, + "learning_rate": 8.188249247064398e-06, + "loss": 0.7789, "step": 20159 }, { - "epoch": 0.5720771850170261, + "epoch": 0.5712828360112217, "grad_norm": 0.0, - "learning_rate": 8.162239108088671e-06, - "loss": 0.8732, + "learning_rate": 8.187346654055282e-06, + "loss": 0.9788, "step": 20160 }, { - "epoch": 0.572105561861521, + "epoch": 0.571311173453484, "grad_norm": 0.0, - "learning_rate": 8.161335691399417e-06, - "loss": 0.7513, + "learning_rate": 8.186444076314813e-06, + "loss": 0.8893, "step": 20161 }, { - "epoch": 0.5721339387060159, + "epoch": 0.5713395108957465, "grad_norm": 0.0, - "learning_rate": 8.160432290241443e-06, - "loss": 0.7617, + "learning_rate": 8.1855415138506e-06, + "loss": 0.8319, "step": 20162 }, { - "epoch": 0.5721623155505108, + "epoch": 0.571367848338009, "grad_norm": 0.0, - "learning_rate": 8.159528904622379e-06, - "loss": 0.8395, + "learning_rate": 8.184638966670246e-06, + "loss": 0.8682, "step": 20163 }, { - "epoch": 0.5721906923950056, + "epoch": 0.5713961857802715, "grad_norm": 0.0, - "learning_rate": 8.158625534549859e-06, - "loss": 0.904, + "learning_rate": 8.183736434781349e-06, + "loss": 0.8297, "step": 20164 }, { - "epoch": 0.5722190692395006, + "epoch": 0.5714245232225339, "grad_norm": 0.0, - "learning_rate": 8.157722180031512e-06, - "loss": 0.8971, + "learning_rate": 8.182833918191515e-06, + "loss": 0.8545, "step": 20165 }, { - "epoch": 0.5722474460839955, + "epoch": 0.5714528606647964, "grad_norm": 0.0, - "learning_rate": 8.156818841074967e-06, - "loss": 0.8804, + "learning_rate": 8.181931416908351e-06, + "loss": 0.8609, "step": 20166 }, { - "epoch": 0.5722758229284903, + "epoch": 0.5714811981070589, "grad_norm": 0.0, - "learning_rate": 8.155915517687854e-06, - "loss": 0.9313, + "learning_rate": 8.181028930939448e-06, + "loss": 0.9221, "step": 20167 }, { - "epoch": 0.5723041997729852, + "epoch": 0.5715095355493213, "grad_norm": 0.0, - "learning_rate": 8.155012209877807e-06, - "loss": 0.8781, + "learning_rate": 8.18012646029242e-06, + "loss": 0.9209, "step": 20168 }, { - "epoch": 0.5723325766174802, + "epoch": 0.5715378729915838, "grad_norm": 0.0, - "learning_rate": 8.154108917652456e-06, - "loss": 0.8781, + "learning_rate": 8.179224004974857e-06, + "loss": 0.9679, "step": 20169 }, { - "epoch": 0.572360953461975, + "epoch": 0.5715662104338463, "grad_norm": 0.0, - "learning_rate": 8.15320564101943e-06, - "loss": 0.8812, + "learning_rate": 8.178321564994368e-06, + "loss": 0.86, "step": 20170 }, { - "epoch": 0.5723893303064699, + "epoch": 0.5715945478761087, "grad_norm": 0.0, - "learning_rate": 8.152302379986358e-06, - "loss": 0.7483, + "learning_rate": 8.177419140358553e-06, + "loss": 0.8364, "step": 20171 }, { - "epoch": 0.5724177071509648, + "epoch": 0.5716228853183711, "grad_norm": 0.0, - "learning_rate": 8.151399134560872e-06, - "loss": 0.8851, + "learning_rate": 8.176516731075012e-06, + "loss": 0.9361, "step": 20172 }, { - "epoch": 0.5724460839954597, + "epoch": 0.5716512227606336, "grad_norm": 0.0, - "learning_rate": 8.150495904750601e-06, - "loss": 0.7915, + "learning_rate": 8.175614337151348e-06, + "loss": 0.8323, "step": 20173 }, { - "epoch": 0.5724744608399546, + "epoch": 0.5716795602028961, "grad_norm": 0.0, - "learning_rate": 8.149592690563172e-06, - "loss": 0.894, + "learning_rate": 8.174711958595165e-06, + "loss": 1.0352, "step": 20174 }, { - "epoch": 0.5725028376844495, + "epoch": 0.5717078976451585, "grad_norm": 0.0, - "learning_rate": 8.148689492006218e-06, - "loss": 0.9236, + "learning_rate": 8.173809595414057e-06, + "loss": 0.8885, "step": 20175 }, { - "epoch": 0.5725312145289444, + "epoch": 0.571736235087421, "grad_norm": 0.0, - "learning_rate": 8.14778630908737e-06, - "loss": 0.9329, + "learning_rate": 8.172907247615632e-06, + "loss": 0.9755, "step": 20176 }, { - "epoch": 0.5725595913734393, + "epoch": 0.5717645725296835, "grad_norm": 0.0, - "learning_rate": 8.146883141814249e-06, - "loss": 0.9032, + "learning_rate": 8.172004915207485e-06, + "loss": 0.7521, "step": 20177 }, { - "epoch": 0.5725879682179341, + "epoch": 0.571792909971946, "grad_norm": 0.0, - "learning_rate": 8.145979990194492e-06, - "loss": 0.9201, + "learning_rate": 8.17110259819722e-06, + "loss": 0.8837, "step": 20178 }, { - "epoch": 0.5726163450624291, + "epoch": 0.5718212474142084, "grad_norm": 0.0, - "learning_rate": 8.145076854235728e-06, - "loss": 0.8516, + "learning_rate": 8.17020029659244e-06, + "loss": 0.8986, "step": 20179 }, { - "epoch": 0.572644721906924, + "epoch": 0.5718495848564709, "grad_norm": 0.0, - "learning_rate": 8.144173733945579e-06, - "loss": 0.8123, + "learning_rate": 8.169298010400739e-06, + "loss": 0.8721, "step": 20180 }, { - "epoch": 0.5726730987514188, + "epoch": 0.5718779222987334, "grad_norm": 0.0, - "learning_rate": 8.143270629331678e-06, - "loss": 0.8035, + "learning_rate": 8.168395739629726e-06, + "loss": 0.9695, "step": 20181 }, { - "epoch": 0.5727014755959138, + "epoch": 0.5719062597409957, "grad_norm": 0.0, - "learning_rate": 8.142367540401652e-06, - "loss": 0.8622, + "learning_rate": 8.16749348428699e-06, + "loss": 0.8073, "step": 20182 }, { - "epoch": 0.5727298524404086, + "epoch": 0.5719345971832582, "grad_norm": 0.0, - "learning_rate": 8.141464467163134e-06, - "loss": 0.8101, + "learning_rate": 8.166591244380138e-06, + "loss": 0.9124, "step": 20183 }, { - "epoch": 0.5727582292849035, + "epoch": 0.5719629346255207, "grad_norm": 0.0, - "learning_rate": 8.14056140962375e-06, - "loss": 0.9348, + "learning_rate": 8.165689019916769e-06, + "loss": 0.8987, "step": 20184 }, { - "epoch": 0.5727866061293984, + "epoch": 0.5719912720677831, "grad_norm": 0.0, - "learning_rate": 8.139658367791126e-06, - "loss": 0.8362, + "learning_rate": 8.164786810904482e-06, + "loss": 0.8755, "step": 20185 }, { - "epoch": 0.5728149829738933, + "epoch": 0.5720196095100456, "grad_norm": 0.0, - "learning_rate": 8.138755341672892e-06, - "loss": 0.796, + "learning_rate": 8.163884617350876e-06, + "loss": 0.8799, "step": 20186 }, { - "epoch": 0.5728433598183882, + "epoch": 0.5720479469523081, "grad_norm": 0.0, - "learning_rate": 8.137852331276677e-06, - "loss": 0.8872, + "learning_rate": 8.162982439263558e-06, + "loss": 0.7938, "step": 20187 }, { - "epoch": 0.572871736662883, + "epoch": 0.5720762843945706, "grad_norm": 0.0, - "learning_rate": 8.136949336610103e-06, - "loss": 0.8356, + "learning_rate": 8.162080276650115e-06, + "loss": 0.8867, "step": 20188 }, { - "epoch": 0.572900113507378, + "epoch": 0.572104621836833, "grad_norm": 0.0, - "learning_rate": 8.136046357680804e-06, - "loss": 1.0266, + "learning_rate": 8.161178129518155e-06, + "loss": 0.7921, "step": 20189 }, { - "epoch": 0.5729284903518729, + "epoch": 0.5721329592790955, "grad_norm": 0.0, - "learning_rate": 8.135143394496407e-06, - "loss": 0.8691, + "learning_rate": 8.160275997875272e-06, + "loss": 0.91, "step": 20190 }, { - "epoch": 0.5729568671963677, + "epoch": 0.572161296721358, "grad_norm": 0.0, - "learning_rate": 8.134240447064534e-06, - "loss": 0.9032, + "learning_rate": 8.159373881729068e-06, + "loss": 0.827, "step": 20191 }, { - "epoch": 0.5729852440408627, + "epoch": 0.5721896341636203, "grad_norm": 0.0, - "learning_rate": 8.133337515392818e-06, - "loss": 0.8373, + "learning_rate": 8.158471781087145e-06, + "loss": 1.0001, "step": 20192 }, { - "epoch": 0.5730136208853576, + "epoch": 0.5722179716058828, "grad_norm": 0.0, - "learning_rate": 8.132434599488883e-06, - "loss": 0.8913, + "learning_rate": 8.157569695957094e-06, + "loss": 0.8127, "step": 20193 }, { - "epoch": 0.5730419977298524, + "epoch": 0.5722463090481453, "grad_norm": 0.0, - "learning_rate": 8.131531699360357e-06, - "loss": 0.796, + "learning_rate": 8.156667626346518e-06, + "loss": 0.7936, "step": 20194 }, { - "epoch": 0.5730703745743473, + "epoch": 0.5722746464904078, "grad_norm": 0.0, - "learning_rate": 8.130628815014868e-06, - "loss": 0.8968, + "learning_rate": 8.15576557226302e-06, + "loss": 1.0005, "step": 20195 }, { - "epoch": 0.5730987514188423, + "epoch": 0.5723029839326702, "grad_norm": 0.0, - "learning_rate": 8.129725946460039e-06, - "loss": 0.905, + "learning_rate": 8.154863533714189e-06, + "loss": 0.9027, "step": 20196 }, { - "epoch": 0.5731271282633371, + "epoch": 0.5723313213749327, "grad_norm": 0.0, - "learning_rate": 8.1288230937035e-06, - "loss": 0.9098, + "learning_rate": 8.153961510707628e-06, + "loss": 0.8671, "step": 20197 }, { - "epoch": 0.573155505107832, + "epoch": 0.5723596588171952, "grad_norm": 0.0, - "learning_rate": 8.127920256752873e-06, - "loss": 0.8363, + "learning_rate": 8.153059503250934e-06, + "loss": 0.9321, "step": 20198 }, { - "epoch": 0.573183881952327, + "epoch": 0.5723879962594576, "grad_norm": 0.0, - "learning_rate": 8.127017435615792e-06, - "loss": 0.8013, + "learning_rate": 8.152157511351704e-06, + "loss": 0.9756, "step": 20199 }, { - "epoch": 0.5732122587968218, + "epoch": 0.5724163337017201, "grad_norm": 0.0, - "learning_rate": 8.12611463029988e-06, - "loss": 0.8103, + "learning_rate": 8.151255535017544e-06, + "loss": 0.9598, "step": 20200 }, { - "epoch": 0.5732406356413167, + "epoch": 0.5724446711439826, "grad_norm": 0.0, - "learning_rate": 8.125211840812759e-06, - "loss": 0.8649, + "learning_rate": 8.15035357425604e-06, + "loss": 0.8513, "step": 20201 }, { - "epoch": 0.5732690124858115, + "epoch": 0.5724730085862451, "grad_norm": 0.0, - "learning_rate": 8.12430906716206e-06, - "loss": 0.9514, + "learning_rate": 8.149451629074793e-06, + "loss": 0.8624, "step": 20202 }, { - "epoch": 0.5732973893303065, + "epoch": 0.5725013460285074, "grad_norm": 0.0, - "learning_rate": 8.123406309355406e-06, - "loss": 0.7747, + "learning_rate": 8.148549699481406e-06, + "loss": 0.8591, "step": 20203 }, { - "epoch": 0.5733257661748014, + "epoch": 0.5725296834707699, "grad_norm": 0.0, - "learning_rate": 8.122503567400423e-06, - "loss": 0.7734, + "learning_rate": 8.147647785483471e-06, + "loss": 0.9716, "step": 20204 }, { - "epoch": 0.5733541430192962, + "epoch": 0.5725580209130324, "grad_norm": 0.0, - "learning_rate": 8.121600841304738e-06, - "loss": 0.9877, + "learning_rate": 8.146745887088589e-06, + "loss": 0.9685, "step": 20205 }, { - "epoch": 0.5733825198637912, + "epoch": 0.5725863583552948, "grad_norm": 0.0, - "learning_rate": 8.120698131075975e-06, - "loss": 0.9026, + "learning_rate": 8.145844004304352e-06, + "loss": 0.9526, "step": 20206 }, { - "epoch": 0.573410896708286, + "epoch": 0.5726146957975573, "grad_norm": 0.0, - "learning_rate": 8.119795436721757e-06, - "loss": 0.761, + "learning_rate": 8.144942137138358e-06, + "loss": 0.7905, "step": 20207 }, { - "epoch": 0.5734392735527809, + "epoch": 0.5726430332398198, "grad_norm": 0.0, - "learning_rate": 8.118892758249713e-06, - "loss": 0.8307, + "learning_rate": 8.14404028559821e-06, + "loss": 0.9262, "step": 20208 }, { - "epoch": 0.5734676503972759, + "epoch": 0.5726713706820822, "grad_norm": 0.0, - "learning_rate": 8.117990095667467e-06, - "loss": 0.9376, + "learning_rate": 8.143138449691495e-06, + "loss": 0.9029, "step": 20209 }, { - "epoch": 0.5734960272417707, + "epoch": 0.5726997081243447, "grad_norm": 0.0, - "learning_rate": 8.117087448982643e-06, - "loss": 0.7968, + "learning_rate": 8.142236629425817e-06, + "loss": 0.8929, "step": 20210 }, { - "epoch": 0.5735244040862656, + "epoch": 0.5727280455666072, "grad_norm": 0.0, - "learning_rate": 8.116184818202864e-06, - "loss": 0.9187, + "learning_rate": 8.141334824808769e-06, + "loss": 0.8019, "step": 20211 }, { - "epoch": 0.5735527809307605, + "epoch": 0.5727563830088697, "grad_norm": 0.0, - "learning_rate": 8.115282203335759e-06, - "loss": 0.9812, + "learning_rate": 8.140433035847947e-06, + "loss": 0.8914, "step": 20212 }, { - "epoch": 0.5735811577752554, + "epoch": 0.572784720451132, "grad_norm": 0.0, - "learning_rate": 8.114379604388946e-06, - "loss": 0.9149, + "learning_rate": 8.139531262550952e-06, + "loss": 0.8634, "step": 20213 }, { - "epoch": 0.5736095346197503, + "epoch": 0.5728130578933945, "grad_norm": 0.0, - "learning_rate": 8.113477021370057e-06, - "loss": 0.8294, + "learning_rate": 8.138629504925372e-06, + "loss": 0.9673, "step": 20214 }, { - "epoch": 0.5736379114642451, + "epoch": 0.572841395335657, "grad_norm": 0.0, - "learning_rate": 8.11257445428671e-06, - "loss": 0.8097, + "learning_rate": 8.137727762978807e-06, + "loss": 0.8146, "step": 20215 }, { - "epoch": 0.5736662883087401, + "epoch": 0.5728697327779194, "grad_norm": 0.0, - "learning_rate": 8.111671903146533e-06, - "loss": 0.9123, + "learning_rate": 8.136826036718854e-06, + "loss": 0.9119, "step": 20216 }, { - "epoch": 0.573694665153235, + "epoch": 0.5728980702201819, "grad_norm": 0.0, - "learning_rate": 8.11076936795715e-06, - "loss": 0.86, + "learning_rate": 8.135924326153106e-06, + "loss": 0.9136, "step": 20217 }, { - "epoch": 0.5737230419977298, + "epoch": 0.5729264076624444, "grad_norm": 0.0, - "learning_rate": 8.10986684872618e-06, - "loss": 0.8356, + "learning_rate": 8.135022631289164e-06, + "loss": 0.8894, "step": 20218 }, { - "epoch": 0.5737514188422247, + "epoch": 0.5729547451047069, "grad_norm": 0.0, - "learning_rate": 8.108964345461252e-06, - "loss": 0.8224, + "learning_rate": 8.134120952134613e-06, + "loss": 0.8891, "step": 20219 }, { - "epoch": 0.5737797956867197, + "epoch": 0.5729830825469693, "grad_norm": 0.0, - "learning_rate": 8.108061858169988e-06, - "loss": 0.8672, + "learning_rate": 8.133219288697056e-06, + "loss": 0.9371, "step": 20220 }, { - "epoch": 0.5738081725312145, + "epoch": 0.5730114199892318, "grad_norm": 0.0, - "learning_rate": 8.107159386860007e-06, - "loss": 0.8489, + "learning_rate": 8.132317640984088e-06, + "loss": 0.9378, "step": 20221 }, { - "epoch": 0.5738365493757094, + "epoch": 0.5730397574314943, "grad_norm": 0.0, - "learning_rate": 8.106256931538938e-06, - "loss": 0.8668, + "learning_rate": 8.131416009003301e-06, + "loss": 0.8827, "step": 20222 }, { - "epoch": 0.5738649262202044, + "epoch": 0.5730680948737567, "grad_norm": 0.0, - "learning_rate": 8.105354492214403e-06, - "loss": 0.9054, + "learning_rate": 8.130514392762289e-06, + "loss": 0.8846, "step": 20223 }, { - "epoch": 0.5738933030646992, + "epoch": 0.5730964323160191, "grad_norm": 0.0, - "learning_rate": 8.104452068894022e-06, - "loss": 0.7875, + "learning_rate": 8.12961279226865e-06, + "loss": 0.9551, "step": 20224 }, { - "epoch": 0.5739216799091941, + "epoch": 0.5731247697582816, "grad_norm": 0.0, - "learning_rate": 8.10354966158542e-06, - "loss": 0.8937, + "learning_rate": 8.128711207529976e-06, + "loss": 0.8836, "step": 20225 }, { - "epoch": 0.573950056753689, + "epoch": 0.5731531072005441, "grad_norm": 0.0, - "learning_rate": 8.10264727029622e-06, - "loss": 0.8986, + "learning_rate": 8.127809638553868e-06, + "loss": 0.911, "step": 20226 }, { - "epoch": 0.5739784335981839, + "epoch": 0.5731814446428065, "grad_norm": 0.0, - "learning_rate": 8.101744895034041e-06, - "loss": 0.8572, + "learning_rate": 8.126908085347907e-06, + "loss": 0.9218, "step": 20227 }, { - "epoch": 0.5740068104426788, + "epoch": 0.573209782085069, "grad_norm": 0.0, - "learning_rate": 8.100842535806509e-06, - "loss": 0.8803, + "learning_rate": 8.126006547919697e-06, + "loss": 0.9387, "step": 20228 }, { - "epoch": 0.5740351872871736, + "epoch": 0.5732381195273315, "grad_norm": 0.0, - "learning_rate": 8.099940192621245e-06, - "loss": 0.8245, + "learning_rate": 8.125105026276832e-06, + "loss": 0.9098, "step": 20229 }, { - "epoch": 0.5740635641316686, + "epoch": 0.5732664569695939, "grad_norm": 0.0, - "learning_rate": 8.099037865485873e-06, - "loss": 0.8925, + "learning_rate": 8.1242035204269e-06, + "loss": 0.9479, "step": 20230 }, { - "epoch": 0.5740919409761635, + "epoch": 0.5732947944118564, "grad_norm": 0.0, - "learning_rate": 8.098135554408015e-06, - "loss": 0.8547, + "learning_rate": 8.1233020303775e-06, + "loss": 0.8884, "step": 20231 }, { - "epoch": 0.5741203178206583, + "epoch": 0.5733231318541189, "grad_norm": 0.0, - "learning_rate": 8.09723325939529e-06, - "loss": 0.9179, + "learning_rate": 8.122400556136226e-06, + "loss": 0.8202, "step": 20232 }, { - "epoch": 0.5741486946651533, + "epoch": 0.5733514692963813, "grad_norm": 0.0, - "learning_rate": 8.096330980455322e-06, - "loss": 0.916, + "learning_rate": 8.121499097710667e-06, + "loss": 0.8336, "step": 20233 }, { - "epoch": 0.5741770715096481, + "epoch": 0.5733798067386437, "grad_norm": 0.0, - "learning_rate": 8.095428717595732e-06, - "loss": 0.7593, + "learning_rate": 8.120597655108422e-06, + "loss": 0.9592, "step": 20234 }, { - "epoch": 0.574205448354143, + "epoch": 0.5734081441809062, "grad_norm": 0.0, - "learning_rate": 8.09452647082414e-06, - "loss": 0.8735, + "learning_rate": 8.119696228337077e-06, + "loss": 0.7797, "step": 20235 }, { - "epoch": 0.5742338251986379, + "epoch": 0.5734364816231687, "grad_norm": 0.0, - "learning_rate": 8.093624240148168e-06, - "loss": 0.9258, + "learning_rate": 8.118794817404229e-06, + "loss": 0.8797, "step": 20236 }, { - "epoch": 0.5742622020431328, + "epoch": 0.5734648190654311, "grad_norm": 0.0, - "learning_rate": 8.09272202557544e-06, - "loss": 0.8757, + "learning_rate": 8.117893422317473e-06, + "loss": 0.8319, "step": 20237 }, { - "epoch": 0.5742905788876277, + "epoch": 0.5734931565076936, "grad_norm": 0.0, - "learning_rate": 8.091819827113571e-06, - "loss": 0.8333, + "learning_rate": 8.116992043084397e-06, + "loss": 0.9018, "step": 20238 }, { - "epoch": 0.5743189557321225, + "epoch": 0.5735214939499561, "grad_norm": 0.0, - "learning_rate": 8.090917644770191e-06, - "loss": 0.8902, + "learning_rate": 8.116090679712601e-06, + "loss": 0.9724, "step": 20239 }, { - "epoch": 0.5743473325766175, + "epoch": 0.5735498313922185, "grad_norm": 0.0, - "learning_rate": 8.090015478552914e-06, - "loss": 0.7787, + "learning_rate": 8.115189332209667e-06, + "loss": 0.8044, "step": 20240 }, { - "epoch": 0.5743757094211124, + "epoch": 0.573578168834481, "grad_norm": 0.0, - "learning_rate": 8.089113328469357e-06, - "loss": 0.9034, + "learning_rate": 8.114288000583194e-06, + "loss": 0.9366, "step": 20241 }, { - "epoch": 0.5744040862656072, + "epoch": 0.5736065062767435, "grad_norm": 0.0, - "learning_rate": 8.08821119452715e-06, - "loss": 0.955, + "learning_rate": 8.113386684840777e-06, + "loss": 0.8863, "step": 20242 }, { - "epoch": 0.5744324631101022, + "epoch": 0.573634843719006, "grad_norm": 0.0, - "learning_rate": 8.087309076733912e-06, - "loss": 0.9628, + "learning_rate": 8.112485384990001e-06, + "loss": 0.8732, "step": 20243 }, { - "epoch": 0.5744608399545971, + "epoch": 0.5736631811612684, "grad_norm": 0.0, - "learning_rate": 8.086406975097252e-06, - "loss": 0.8324, + "learning_rate": 8.111584101038462e-06, + "loss": 0.8932, "step": 20244 }, { - "epoch": 0.5744892167990919, + "epoch": 0.5736915186035308, "grad_norm": 0.0, - "learning_rate": 8.085504889624803e-06, - "loss": 0.8791, + "learning_rate": 8.110682832993757e-06, + "loss": 0.8979, "step": 20245 }, { - "epoch": 0.5745175936435868, + "epoch": 0.5737198560457933, "grad_norm": 0.0, - "learning_rate": 8.08460282032418e-06, - "loss": 0.8959, + "learning_rate": 8.109781580863465e-06, + "loss": 0.8813, "step": 20246 }, { - "epoch": 0.5745459704880818, + "epoch": 0.5737481934880557, "grad_norm": 0.0, - "learning_rate": 8.083700767203004e-06, - "loss": 0.7561, + "learning_rate": 8.10888034465519e-06, + "loss": 0.8199, "step": 20247 }, { - "epoch": 0.5745743473325766, + "epoch": 0.5737765309303182, "grad_norm": 0.0, - "learning_rate": 8.082798730268894e-06, - "loss": 0.7995, + "learning_rate": 8.107979124376516e-06, + "loss": 0.8289, "step": 20248 }, { - "epoch": 0.5746027241770715, + "epoch": 0.5738048683725807, "grad_norm": 0.0, - "learning_rate": 8.081896709529466e-06, - "loss": 0.8593, + "learning_rate": 8.107077920035032e-06, + "loss": 0.9484, "step": 20249 }, { - "epoch": 0.5746311010215664, + "epoch": 0.5738332058148432, "grad_norm": 0.0, - "learning_rate": 8.080994704992347e-06, - "loss": 0.782, + "learning_rate": 8.106176731638338e-06, + "loss": 0.765, "step": 20250 }, { - "epoch": 0.5746594778660613, + "epoch": 0.5738615432571056, "grad_norm": 0.0, - "learning_rate": 8.080092716665151e-06, - "loss": 0.9934, + "learning_rate": 8.10527555919402e-06, + "loss": 0.9308, "step": 20251 }, { - "epoch": 0.5746878547105562, + "epoch": 0.5738898806993681, "grad_norm": 0.0, - "learning_rate": 8.079190744555494e-06, - "loss": 0.8324, + "learning_rate": 8.104374402709669e-06, + "loss": 0.8516, "step": 20252 }, { - "epoch": 0.574716231555051, + "epoch": 0.5739182181416306, "grad_norm": 0.0, - "learning_rate": 8.078288788671004e-06, - "loss": 0.9148, + "learning_rate": 8.10347326219288e-06, + "loss": 0.9194, "step": 20253 }, { - "epoch": 0.574744608399546, + "epoch": 0.573946555583893, "grad_norm": 0.0, - "learning_rate": 8.077386849019295e-06, - "loss": 0.895, + "learning_rate": 8.102572137651234e-06, + "loss": 0.8835, "step": 20254 }, { - "epoch": 0.5747729852440409, + "epoch": 0.5739748930261555, "grad_norm": 0.0, - "learning_rate": 8.076484925607983e-06, - "loss": 0.9509, + "learning_rate": 8.101671029092332e-06, + "loss": 0.865, "step": 20255 }, { - "epoch": 0.5748013620885357, + "epoch": 0.574003230468418, "grad_norm": 0.0, - "learning_rate": 8.075583018444691e-06, - "loss": 0.8869, + "learning_rate": 8.100769936523758e-06, + "loss": 0.8484, "step": 20256 }, { - "epoch": 0.5748297389330307, + "epoch": 0.5740315679106803, "grad_norm": 0.0, - "learning_rate": 8.074681127537035e-06, - "loss": 0.8802, + "learning_rate": 8.099868859953101e-06, + "loss": 0.9257, "step": 20257 }, { - "epoch": 0.5748581157775255, + "epoch": 0.5740599053529428, "grad_norm": 0.0, - "learning_rate": 8.073779252892633e-06, - "loss": 0.9035, + "learning_rate": 8.098967799387962e-06, + "loss": 0.8112, "step": 20258 }, { - "epoch": 0.5748864926220204, + "epoch": 0.5740882427952053, "grad_norm": 0.0, - "learning_rate": 8.072877394519103e-06, - "loss": 0.8394, + "learning_rate": 8.098066754835916e-06, + "loss": 0.8759, "step": 20259 }, { - "epoch": 0.5749148694665154, + "epoch": 0.5741165802374678, "grad_norm": 0.0, - "learning_rate": 8.071975552424066e-06, - "loss": 0.7764, + "learning_rate": 8.09716572630456e-06, + "loss": 0.7632, "step": 20260 }, { - "epoch": 0.5749432463110102, + "epoch": 0.5741449176797302, "grad_norm": 0.0, - "learning_rate": 8.07107372661514e-06, - "loss": 0.7901, + "learning_rate": 8.096264713801489e-06, + "loss": 0.8262, "step": 20261 }, { - "epoch": 0.5749716231555051, + "epoch": 0.5741732551219927, "grad_norm": 0.0, - "learning_rate": 8.070171917099938e-06, - "loss": 0.9426, + "learning_rate": 8.095363717334284e-06, + "loss": 0.8532, "step": 20262 }, { - "epoch": 0.575, + "epoch": 0.5742015925642552, "grad_norm": 0.0, - "learning_rate": 8.069270123886084e-06, - "loss": 0.9412, + "learning_rate": 8.09446273691054e-06, + "loss": 0.8043, "step": 20263 }, { - "epoch": 0.5750283768444949, + "epoch": 0.5742299300065176, "grad_norm": 0.0, - "learning_rate": 8.068368346981192e-06, - "loss": 0.8607, + "learning_rate": 8.093561772537841e-06, + "loss": 0.8122, "step": 20264 }, { - "epoch": 0.5750567536889898, + "epoch": 0.5742582674487801, "grad_norm": 0.0, - "learning_rate": 8.067466586392875e-06, - "loss": 0.9323, + "learning_rate": 8.09266082422378e-06, + "loss": 0.8788, "step": 20265 }, { - "epoch": 0.5750851305334846, + "epoch": 0.5742866048910426, "grad_norm": 0.0, - "learning_rate": 8.066564842128758e-06, - "loss": 0.8704, + "learning_rate": 8.091759891975948e-06, + "loss": 0.9344, "step": 20266 }, { - "epoch": 0.5751135073779796, + "epoch": 0.574314942333305, "grad_norm": 0.0, - "learning_rate": 8.065663114196455e-06, - "loss": 0.8752, + "learning_rate": 8.090858975801927e-06, + "loss": 0.9901, "step": 20267 }, { - "epoch": 0.5751418842224745, + "epoch": 0.5743432797755674, "grad_norm": 0.0, - "learning_rate": 8.064761402603582e-06, - "loss": 0.8701, + "learning_rate": 8.089958075709311e-06, + "loss": 0.7279, "step": 20268 }, { - "epoch": 0.5751702610669693, + "epoch": 0.5743716172178299, "grad_norm": 0.0, - "learning_rate": 8.063859707357757e-06, - "loss": 0.8613, + "learning_rate": 8.089057191705686e-06, + "loss": 0.8247, "step": 20269 }, { - "epoch": 0.5751986379114642, + "epoch": 0.5743999546600924, "grad_norm": 0.0, - "learning_rate": 8.062958028466596e-06, - "loss": 0.911, + "learning_rate": 8.088156323798644e-06, + "loss": 0.7894, "step": 20270 }, { - "epoch": 0.5752270147559592, + "epoch": 0.5744282921023548, "grad_norm": 0.0, - "learning_rate": 8.062056365937713e-06, - "loss": 0.8755, + "learning_rate": 8.087255471995774e-06, + "loss": 0.8704, "step": 20271 }, { - "epoch": 0.575255391600454, + "epoch": 0.5744566295446173, "grad_norm": 0.0, - "learning_rate": 8.06115471977873e-06, - "loss": 0.9802, + "learning_rate": 8.086354636304657e-06, + "loss": 0.9273, "step": 20272 }, { - "epoch": 0.5752837684449489, + "epoch": 0.5744849669868798, "grad_norm": 0.0, - "learning_rate": 8.06025308999726e-06, - "loss": 0.9394, + "learning_rate": 8.085453816732885e-06, + "loss": 0.8724, "step": 20273 }, { - "epoch": 0.5753121452894439, + "epoch": 0.5745133044291423, "grad_norm": 0.0, - "learning_rate": 8.059351476600913e-06, - "loss": 0.9458, + "learning_rate": 8.084553013288048e-06, + "loss": 0.8083, "step": 20274 }, { - "epoch": 0.5753405221339387, + "epoch": 0.5745416418714047, "grad_norm": 0.0, - "learning_rate": 8.058449879597318e-06, - "loss": 0.8622, + "learning_rate": 8.083652225977734e-06, + "loss": 0.7606, "step": 20275 }, { - "epoch": 0.5753688989784336, + "epoch": 0.5745699793136672, "grad_norm": 0.0, - "learning_rate": 8.057548298994082e-06, - "loss": 0.8866, + "learning_rate": 8.082751454809529e-06, + "loss": 0.8664, "step": 20276 }, { - "epoch": 0.5753972758229285, + "epoch": 0.5745983167559297, "grad_norm": 0.0, - "learning_rate": 8.056646734798824e-06, - "loss": 0.8331, + "learning_rate": 8.081850699791017e-06, + "loss": 0.9473, "step": 20277 }, { - "epoch": 0.5754256526674234, + "epoch": 0.574626654198192, "grad_norm": 0.0, - "learning_rate": 8.05574518701916e-06, - "loss": 0.9035, + "learning_rate": 8.08094996092979e-06, + "loss": 0.852, "step": 20278 }, { - "epoch": 0.5754540295119183, + "epoch": 0.5746549916404545, "grad_norm": 0.0, - "learning_rate": 8.0548436556627e-06, - "loss": 0.8967, + "learning_rate": 8.080049238233439e-06, + "loss": 0.7577, "step": 20279 }, { - "epoch": 0.5754824063564131, + "epoch": 0.574683329082717, "grad_norm": 0.0, - "learning_rate": 8.053942140737066e-06, - "loss": 0.7774, + "learning_rate": 8.07914853170954e-06, + "loss": 0.8882, "step": 20280 }, { - "epoch": 0.5755107832009081, + "epoch": 0.5747116665249794, "grad_norm": 0.0, - "learning_rate": 8.053040642249871e-06, - "loss": 0.8401, + "learning_rate": 8.078247841365686e-06, + "loss": 0.9027, "step": 20281 }, { - "epoch": 0.575539160045403, + "epoch": 0.5747400039672419, "grad_norm": 0.0, - "learning_rate": 8.052139160208725e-06, - "loss": 0.7717, + "learning_rate": 8.077347167209467e-06, + "loss": 0.8337, "step": 20282 }, { - "epoch": 0.5755675368898978, + "epoch": 0.5747683414095044, "grad_norm": 0.0, - "learning_rate": 8.051237694621252e-06, - "loss": 0.9108, + "learning_rate": 8.076446509248466e-06, + "loss": 0.8577, "step": 20283 }, { - "epoch": 0.5755959137343928, + "epoch": 0.5747966788517669, "grad_norm": 0.0, - "learning_rate": 8.05033624549506e-06, - "loss": 0.8416, + "learning_rate": 8.075545867490272e-06, + "loss": 0.8611, "step": 20284 }, { - "epoch": 0.5756242905788876, + "epoch": 0.5748250162940293, "grad_norm": 0.0, - "learning_rate": 8.049434812837762e-06, - "loss": 0.9293, + "learning_rate": 8.074645241942466e-06, + "loss": 0.9415, "step": 20285 }, { - "epoch": 0.5756526674233825, + "epoch": 0.5748533537362918, "grad_norm": 0.0, - "learning_rate": 8.04853339665698e-06, - "loss": 0.8487, + "learning_rate": 8.07374463261264e-06, + "loss": 0.8835, "step": 20286 }, { - "epoch": 0.5756810442678774, + "epoch": 0.5748816911785543, "grad_norm": 0.0, - "learning_rate": 8.047631996960324e-06, - "loss": 0.9078, + "learning_rate": 8.07284403950838e-06, + "loss": 0.9471, "step": 20287 }, { - "epoch": 0.5757094211123723, + "epoch": 0.5749100286208166, "grad_norm": 0.0, - "learning_rate": 8.046730613755404e-06, - "loss": 0.8046, + "learning_rate": 8.071943462637267e-06, + "loss": 0.9094, "step": 20288 }, { - "epoch": 0.5757377979568672, + "epoch": 0.5749383660630791, "grad_norm": 0.0, - "learning_rate": 8.045829247049844e-06, - "loss": 0.7823, + "learning_rate": 8.071042902006896e-06, + "loss": 0.9158, "step": 20289 }, { - "epoch": 0.575766174801362, + "epoch": 0.5749667035053416, "grad_norm": 0.0, - "learning_rate": 8.044927896851245e-06, - "loss": 0.7875, + "learning_rate": 8.070142357624841e-06, + "loss": 0.8896, "step": 20290 }, { - "epoch": 0.575794551645857, + "epoch": 0.5749950409476041, "grad_norm": 0.0, - "learning_rate": 8.044026563167233e-06, - "loss": 0.8126, + "learning_rate": 8.069241829498694e-06, + "loss": 0.8636, "step": 20291 }, { - "epoch": 0.5758229284903519, + "epoch": 0.5750233783898665, "grad_norm": 0.0, - "learning_rate": 8.043125246005416e-06, - "loss": 0.905, + "learning_rate": 8.068341317636045e-06, + "loss": 0.8152, "step": 20292 }, { - "epoch": 0.5758513053348467, + "epoch": 0.575051715832129, "grad_norm": 0.0, - "learning_rate": 8.042223945373407e-06, - "loss": 0.8305, + "learning_rate": 8.06744082204447e-06, + "loss": 0.7594, "step": 20293 }, { - "epoch": 0.5758796821793417, + "epoch": 0.5750800532743915, "grad_norm": 0.0, - "learning_rate": 8.041322661278822e-06, - "loss": 0.8751, + "learning_rate": 8.066540342731558e-06, + "loss": 0.9757, "step": 20294 }, { - "epoch": 0.5759080590238366, + "epoch": 0.5751083907166539, "grad_norm": 0.0, - "learning_rate": 8.040421393729273e-06, - "loss": 0.9402, + "learning_rate": 8.065639879704896e-06, + "loss": 0.8637, "step": 20295 }, { - "epoch": 0.5759364358683314, + "epoch": 0.5751367281589164, "grad_norm": 0.0, - "learning_rate": 8.039520142732371e-06, - "loss": 0.7424, + "learning_rate": 8.064739432972068e-06, + "loss": 0.8349, "step": 20296 }, { - "epoch": 0.5759648127128263, + "epoch": 0.5751650656011789, "grad_norm": 0.0, - "learning_rate": 8.03861890829573e-06, - "loss": 0.8575, + "learning_rate": 8.06383900254066e-06, + "loss": 0.881, "step": 20297 }, { - "epoch": 0.5759931895573213, + "epoch": 0.5751934030434414, "grad_norm": 0.0, - "learning_rate": 8.037717690426966e-06, - "loss": 0.951, + "learning_rate": 8.062938588418251e-06, + "loss": 0.7747, "step": 20298 }, { - "epoch": 0.5760215664018161, + "epoch": 0.5752217404857037, "grad_norm": 0.0, - "learning_rate": 8.036816489133685e-06, - "loss": 0.8846, + "learning_rate": 8.062038190612431e-06, + "loss": 0.8571, "step": 20299 }, { - "epoch": 0.576049943246311, + "epoch": 0.5752500779279662, "grad_norm": 0.0, - "learning_rate": 8.035915304423507e-06, - "loss": 0.8192, + "learning_rate": 8.061137809130785e-06, + "loss": 0.9153, "step": 20300 }, { - "epoch": 0.576078320090806, + "epoch": 0.5752784153702287, "grad_norm": 0.0, - "learning_rate": 8.03501413630404e-06, - "loss": 0.8606, + "learning_rate": 8.060237443980892e-06, + "loss": 0.8571, "step": 20301 }, { - "epoch": 0.5761066969353008, + "epoch": 0.5753067528124911, "grad_norm": 0.0, - "learning_rate": 8.034112984782894e-06, - "loss": 0.9625, + "learning_rate": 8.05933709517034e-06, + "loss": 0.9071, "step": 20302 }, { - "epoch": 0.5761350737797957, + "epoch": 0.5753350902547536, "grad_norm": 0.0, - "learning_rate": 8.033211849867688e-06, - "loss": 0.8557, + "learning_rate": 8.058436762706718e-06, + "loss": 0.8623, "step": 20303 }, { - "epoch": 0.5761634506242905, + "epoch": 0.5753634276970161, "grad_norm": 0.0, - "learning_rate": 8.032310731566029e-06, - "loss": 0.8281, + "learning_rate": 8.057536446597598e-06, + "loss": 0.7812, "step": 20304 }, { - "epoch": 0.5761918274687855, + "epoch": 0.5753917651392785, "grad_norm": 0.0, - "learning_rate": 8.031409629885525e-06, - "loss": 0.8009, + "learning_rate": 8.056636146850575e-06, + "loss": 0.7919, "step": 20305 }, { - "epoch": 0.5762202043132804, + "epoch": 0.575420102581541, "grad_norm": 0.0, - "learning_rate": 8.030508544833793e-06, - "loss": 0.8048, + "learning_rate": 8.055735863473222e-06, + "loss": 0.796, "step": 20306 }, { - "epoch": 0.5762485811577752, + "epoch": 0.5754484400238035, "grad_norm": 0.0, - "learning_rate": 8.029607476418448e-06, - "loss": 0.8867, + "learning_rate": 8.05483559647313e-06, + "loss": 0.9329, "step": 20307 }, { - "epoch": 0.5762769580022702, + "epoch": 0.575476777466066, "grad_norm": 0.0, - "learning_rate": 8.028706424647098e-06, - "loss": 0.8692, + "learning_rate": 8.053935345857879e-06, + "loss": 0.9212, "step": 20308 }, { - "epoch": 0.576305334846765, + "epoch": 0.5755051149083283, "grad_norm": 0.0, - "learning_rate": 8.02780538952735e-06, - "loss": 0.8413, + "learning_rate": 8.053035111635054e-06, + "loss": 0.8068, "step": 20309 }, { - "epoch": 0.5763337116912599, + "epoch": 0.5755334523505908, "grad_norm": 0.0, - "learning_rate": 8.02690437106682e-06, - "loss": 0.8692, + "learning_rate": 8.052134893812236e-06, + "loss": 0.8036, "step": 20310 }, { - "epoch": 0.5763620885357549, + "epoch": 0.5755617897928533, "grad_norm": 0.0, - "learning_rate": 8.02600336927312e-06, - "loss": 0.9427, + "learning_rate": 8.051234692397013e-06, + "loss": 0.8473, "step": 20311 }, { - "epoch": 0.5763904653802497, + "epoch": 0.5755901272351157, "grad_norm": 0.0, - "learning_rate": 8.025102384153854e-06, - "loss": 0.8345, + "learning_rate": 8.05033450739696e-06, + "loss": 0.8158, "step": 20312 }, { - "epoch": 0.5764188422247446, + "epoch": 0.5756184646773782, "grad_norm": 0.0, - "learning_rate": 8.02420141571664e-06, - "loss": 0.8928, + "learning_rate": 8.049434338819666e-06, + "loss": 0.8215, "step": 20313 }, { - "epoch": 0.5764472190692395, + "epoch": 0.5756468021196407, "grad_norm": 0.0, - "learning_rate": 8.023300463969083e-06, - "loss": 0.8824, + "learning_rate": 8.048534186672708e-06, + "loss": 0.879, "step": 20314 }, { - "epoch": 0.5764755959137344, + "epoch": 0.5756751395619032, "grad_norm": 0.0, - "learning_rate": 8.022399528918799e-06, - "loss": 0.8979, + "learning_rate": 8.04763405096367e-06, + "loss": 0.8699, "step": 20315 }, { - "epoch": 0.5765039727582293, + "epoch": 0.5757034770041656, "grad_norm": 0.0, - "learning_rate": 8.021498610573392e-06, - "loss": 0.7806, + "learning_rate": 8.046733931700142e-06, + "loss": 0.935, "step": 20316 }, { - "epoch": 0.5765323496027241, + "epoch": 0.5757318144464281, "grad_norm": 0.0, - "learning_rate": 8.020597708940477e-06, - "loss": 0.86, + "learning_rate": 8.045833828889695e-06, + "loss": 0.9318, "step": 20317 }, { - "epoch": 0.5765607264472191, + "epoch": 0.5757601518886906, "grad_norm": 0.0, - "learning_rate": 8.019696824027662e-06, - "loss": 0.779, + "learning_rate": 8.044933742539919e-06, + "loss": 0.8642, "step": 20318 }, { - "epoch": 0.576589103291714, + "epoch": 0.575788489330953, "grad_norm": 0.0, - "learning_rate": 8.018795955842556e-06, - "loss": 0.7599, + "learning_rate": 8.044033672658387e-06, + "loss": 0.7888, "step": 20319 }, { - "epoch": 0.5766174801362088, + "epoch": 0.5758168267732154, "grad_norm": 0.0, - "learning_rate": 8.017895104392765e-06, - "loss": 0.9, + "learning_rate": 8.043133619252687e-06, + "loss": 0.8723, "step": 20320 }, { - "epoch": 0.5766458569807037, + "epoch": 0.5758451642154779, "grad_norm": 0.0, - "learning_rate": 8.016994269685909e-06, - "loss": 0.8196, + "learning_rate": 8.0422335823304e-06, + "loss": 0.8257, "step": 20321 }, { - "epoch": 0.5766742338251987, + "epoch": 0.5758735016577404, "grad_norm": 0.0, - "learning_rate": 8.016093451729591e-06, - "loss": 0.8704, + "learning_rate": 8.041333561899105e-06, + "loss": 0.9614, "step": 20322 }, { - "epoch": 0.5767026106696935, + "epoch": 0.5759018391000028, "grad_norm": 0.0, - "learning_rate": 8.015192650531421e-06, - "loss": 0.7377, + "learning_rate": 8.040433557966385e-06, + "loss": 0.867, "step": 20323 }, { - "epoch": 0.5767309875141884, + "epoch": 0.5759301765422653, "grad_norm": 0.0, - "learning_rate": 8.014291866099008e-06, - "loss": 0.8941, + "learning_rate": 8.039533570539826e-06, + "loss": 0.9544, "step": 20324 }, { - "epoch": 0.5767593643586834, + "epoch": 0.5759585139845278, "grad_norm": 0.0, - "learning_rate": 8.01339109843996e-06, - "loss": 0.8274, + "learning_rate": 8.038633599626998e-06, + "loss": 0.9271, "step": 20325 }, { - "epoch": 0.5767877412031782, + "epoch": 0.5759868514267902, "grad_norm": 0.0, - "learning_rate": 8.012490347561887e-06, - "loss": 0.7896, + "learning_rate": 8.03773364523549e-06, + "loss": 0.92, "step": 20326 }, { - "epoch": 0.5768161180476731, + "epoch": 0.5760151888690527, "grad_norm": 0.0, - "learning_rate": 8.011589613472397e-06, - "loss": 0.8056, + "learning_rate": 8.036833707372879e-06, + "loss": 0.9173, "step": 20327 }, { - "epoch": 0.5768444948921679, + "epoch": 0.5760435263113152, "grad_norm": 0.0, - "learning_rate": 8.010688896179101e-06, - "loss": 0.9501, + "learning_rate": 8.035933786046745e-06, + "loss": 0.9386, "step": 20328 }, { - "epoch": 0.5768728717366629, + "epoch": 0.5760718637535776, "grad_norm": 0.0, - "learning_rate": 8.009788195689601e-06, - "loss": 0.8667, + "learning_rate": 8.035033881264676e-06, + "loss": 0.8041, "step": 20329 }, { - "epoch": 0.5769012485811578, + "epoch": 0.57610020119584, "grad_norm": 0.0, - "learning_rate": 8.008887512011515e-06, - "loss": 0.8654, + "learning_rate": 8.034133993034241e-06, + "loss": 0.7388, "step": 20330 }, { - "epoch": 0.5769296254256526, + "epoch": 0.5761285386381025, "grad_norm": 0.0, - "learning_rate": 8.007986845152443e-06, - "loss": 0.8423, + "learning_rate": 8.033234121363026e-06, + "loss": 0.8945, "step": 20331 }, { - "epoch": 0.5769580022701476, + "epoch": 0.576156876080365, "grad_norm": 0.0, - "learning_rate": 8.007086195119994e-06, - "loss": 0.8337, + "learning_rate": 8.032334266258614e-06, + "loss": 0.8384, "step": 20332 }, { - "epoch": 0.5769863791146425, + "epoch": 0.5761852135226274, "grad_norm": 0.0, - "learning_rate": 8.00618556192178e-06, - "loss": 0.7859, + "learning_rate": 8.031434427728576e-06, + "loss": 0.8487, "step": 20333 }, { - "epoch": 0.5770147559591373, + "epoch": 0.5762135509648899, "grad_norm": 0.0, - "learning_rate": 8.005284945565406e-06, - "loss": 0.9524, + "learning_rate": 8.0305346057805e-06, + "loss": 0.9329, "step": 20334 }, { - "epoch": 0.5770431328036323, + "epoch": 0.5762418884071524, "grad_norm": 0.0, - "learning_rate": 8.004384346058477e-06, - "loss": 0.855, + "learning_rate": 8.02963480042196e-06, + "loss": 0.8503, "step": 20335 }, { - "epoch": 0.5770715096481271, + "epoch": 0.5762702258494148, "grad_norm": 0.0, - "learning_rate": 8.003483763408604e-06, - "loss": 0.8179, + "learning_rate": 8.028735011660537e-06, + "loss": 0.9162, "step": 20336 }, { - "epoch": 0.577099886492622, + "epoch": 0.5762985632916773, "grad_norm": 0.0, - "learning_rate": 8.002583197623392e-06, - "loss": 0.8402, + "learning_rate": 8.027835239503818e-06, + "loss": 0.8868, "step": 20337 }, { - "epoch": 0.5771282633371169, + "epoch": 0.5763269007339398, "grad_norm": 0.0, - "learning_rate": 8.001682648710455e-06, - "loss": 0.8695, + "learning_rate": 8.026935483959368e-06, + "loss": 0.8362, "step": 20338 }, { - "epoch": 0.5771566401816118, + "epoch": 0.5763552381762023, "grad_norm": 0.0, - "learning_rate": 8.000782116677391e-06, - "loss": 0.8704, + "learning_rate": 8.026035745034774e-06, + "loss": 0.8619, "step": 20339 }, { - "epoch": 0.5771850170261067, + "epoch": 0.5763835756184646, "grad_norm": 0.0, - "learning_rate": 7.999881601531809e-06, - "loss": 0.8781, + "learning_rate": 8.025136022737618e-06, + "loss": 0.8491, "step": 20340 }, { - "epoch": 0.5772133938706016, + "epoch": 0.5764119130607271, "grad_norm": 0.0, - "learning_rate": 7.99898110328132e-06, - "loss": 0.8179, + "learning_rate": 8.02423631707547e-06, + "loss": 0.8434, "step": 20341 }, { - "epoch": 0.5772417707150965, + "epoch": 0.5764402505029896, "grad_norm": 0.0, - "learning_rate": 7.998080621933529e-06, - "loss": 0.8676, + "learning_rate": 8.023336628055918e-06, + "loss": 0.8241, "step": 20342 }, { - "epoch": 0.5772701475595914, + "epoch": 0.576468587945252, "grad_norm": 0.0, - "learning_rate": 7.997180157496036e-06, - "loss": 0.8966, + "learning_rate": 8.022436955686532e-06, + "loss": 0.7849, "step": 20343 }, { - "epoch": 0.5772985244040862, + "epoch": 0.5764969253875145, "grad_norm": 0.0, - "learning_rate": 7.996279709976456e-06, - "loss": 0.8316, + "learning_rate": 8.021537299974893e-06, + "loss": 0.9095, "step": 20344 }, { - "epoch": 0.5773269012485811, + "epoch": 0.576525262829777, "grad_norm": 0.0, - "learning_rate": 7.99537927938239e-06, - "loss": 0.8874, + "learning_rate": 8.020637660928586e-06, + "loss": 0.8626, "step": 20345 }, { - "epoch": 0.5773552780930761, + "epoch": 0.5765536002720394, "grad_norm": 0.0, - "learning_rate": 7.994478865721444e-06, - "loss": 0.9379, + "learning_rate": 8.019738038555176e-06, + "loss": 0.8993, "step": 20346 }, { - "epoch": 0.5773836549375709, + "epoch": 0.5765819377143019, "grad_norm": 0.0, - "learning_rate": 7.993578469001227e-06, - "loss": 0.8267, + "learning_rate": 8.018838432862251e-06, + "loss": 0.8257, "step": 20347 }, { - "epoch": 0.5774120317820658, + "epoch": 0.5766102751565644, "grad_norm": 0.0, - "learning_rate": 7.992678089229345e-06, - "loss": 0.8527, + "learning_rate": 8.017938843857384e-06, + "loss": 0.9274, "step": 20348 }, { - "epoch": 0.5774404086265608, + "epoch": 0.5766386125988269, "grad_norm": 0.0, - "learning_rate": 7.991777726413396e-06, - "loss": 0.866, + "learning_rate": 8.017039271548154e-06, + "loss": 0.8603, "step": 20349 }, { - "epoch": 0.5774687854710556, + "epoch": 0.5766669500410893, "grad_norm": 0.0, - "learning_rate": 7.990877380560996e-06, - "loss": 0.8638, + "learning_rate": 8.016139715942143e-06, + "loss": 0.898, "step": 20350 }, { - "epoch": 0.5774971623155505, + "epoch": 0.5766952874833517, "grad_norm": 0.0, - "learning_rate": 7.989977051679738e-06, - "loss": 0.876, + "learning_rate": 8.01524017704692e-06, + "loss": 0.8076, "step": 20351 }, { - "epoch": 0.5775255391600455, + "epoch": 0.5767236249256142, "grad_norm": 0.0, - "learning_rate": 7.98907673977724e-06, - "loss": 0.9142, + "learning_rate": 8.014340654870065e-06, + "loss": 0.8777, "step": 20352 }, { - "epoch": 0.5775539160045403, + "epoch": 0.5767519623678766, "grad_norm": 0.0, - "learning_rate": 7.988176444861098e-06, - "loss": 0.8407, + "learning_rate": 8.013441149419159e-06, + "loss": 0.9185, "step": 20353 }, { - "epoch": 0.5775822928490352, + "epoch": 0.5767802998101391, "grad_norm": 0.0, - "learning_rate": 7.987276166938923e-06, - "loss": 0.9324, + "learning_rate": 8.012541660701774e-06, + "loss": 0.7645, "step": 20354 }, { - "epoch": 0.57761066969353, + "epoch": 0.5768086372524016, "grad_norm": 0.0, - "learning_rate": 7.986375906018316e-06, - "loss": 0.8806, + "learning_rate": 8.011642188725491e-06, + "loss": 0.9405, "step": 20355 }, { - "epoch": 0.577639046538025, + "epoch": 0.5768369746946641, "grad_norm": 0.0, - "learning_rate": 7.985475662106885e-06, - "loss": 0.8498, + "learning_rate": 8.010742733497882e-06, + "loss": 0.9107, "step": 20356 }, { - "epoch": 0.5776674233825199, + "epoch": 0.5768653121369265, "grad_norm": 0.0, - "learning_rate": 7.984575435212226e-06, - "loss": 0.8871, + "learning_rate": 8.009843295026524e-06, + "loss": 0.9668, "step": 20357 }, { - "epoch": 0.5776958002270147, + "epoch": 0.576893649579189, "grad_norm": 0.0, - "learning_rate": 7.983675225341953e-06, - "loss": 0.7791, + "learning_rate": 8.008943873319e-06, + "loss": 0.8784, "step": 20358 }, { - "epoch": 0.5777241770715097, + "epoch": 0.5769219870214515, "grad_norm": 0.0, - "learning_rate": 7.982775032503665e-06, - "loss": 0.8077, + "learning_rate": 8.008044468382878e-06, + "loss": 0.924, "step": 20359 }, { - "epoch": 0.5777525539160046, + "epoch": 0.5769503244637139, "grad_norm": 0.0, - "learning_rate": 7.981874856704965e-06, - "loss": 0.8324, + "learning_rate": 8.007145080225736e-06, + "loss": 0.8262, "step": 20360 }, { - "epoch": 0.5777809307604994, + "epoch": 0.5769786619059764, "grad_norm": 0.0, - "learning_rate": 7.98097469795346e-06, - "loss": 0.7534, + "learning_rate": 8.006245708855152e-06, + "loss": 0.8811, "step": 20361 }, { - "epoch": 0.5778093076049943, + "epoch": 0.5770069993482388, "grad_norm": 0.0, - "learning_rate": 7.980074556256756e-06, - "loss": 0.8987, + "learning_rate": 8.0053463542787e-06, + "loss": 0.9001, "step": 20362 }, { - "epoch": 0.5778376844494892, + "epoch": 0.5770353367905013, "grad_norm": 0.0, - "learning_rate": 7.979174431622447e-06, - "loss": 0.9591, + "learning_rate": 8.004447016503962e-06, + "loss": 0.9251, "step": 20363 }, { - "epoch": 0.5778660612939841, + "epoch": 0.5770636742327637, "grad_norm": 0.0, - "learning_rate": 7.978274324058146e-06, - "loss": 0.9201, + "learning_rate": 8.0035476955385e-06, + "loss": 0.8856, "step": 20364 }, { - "epoch": 0.577894438138479, + "epoch": 0.5770920116750262, "grad_norm": 0.0, - "learning_rate": 7.977374233571453e-06, - "loss": 0.7973, + "learning_rate": 8.0026483913899e-06, + "loss": 0.8508, "step": 20365 }, { - "epoch": 0.5779228149829739, + "epoch": 0.5771203491172887, "grad_norm": 0.0, - "learning_rate": 7.976474160169967e-06, - "loss": 0.8241, + "learning_rate": 8.001749104065735e-06, + "loss": 0.937, "step": 20366 }, { - "epoch": 0.5779511918274688, + "epoch": 0.5771486865595511, "grad_norm": 0.0, - "learning_rate": 7.975574103861295e-06, - "loss": 0.8762, + "learning_rate": 8.000849833573579e-06, + "loss": 0.8925, "step": 20367 }, { - "epoch": 0.5779795686719637, + "epoch": 0.5771770240018136, "grad_norm": 0.0, - "learning_rate": 7.974674064653045e-06, - "loss": 0.7628, + "learning_rate": 7.999950579921005e-06, + "loss": 0.8812, "step": 20368 }, { - "epoch": 0.5780079455164586, + "epoch": 0.5772053614440761, "grad_norm": 0.0, - "learning_rate": 7.973774042552811e-06, - "loss": 0.7731, + "learning_rate": 7.999051343115595e-06, + "loss": 0.7614, "step": 20369 }, { - "epoch": 0.5780363223609535, + "epoch": 0.5772336988863385, "grad_norm": 0.0, - "learning_rate": 7.972874037568197e-06, - "loss": 0.7144, + "learning_rate": 7.998152123164916e-06, + "loss": 0.8902, "step": 20370 }, { - "epoch": 0.5780646992054483, + "epoch": 0.577262036328601, "grad_norm": 0.0, - "learning_rate": 7.97197404970681e-06, - "loss": 0.8449, + "learning_rate": 7.997252920076543e-06, + "loss": 0.8851, "step": 20371 }, { - "epoch": 0.5780930760499432, + "epoch": 0.5772903737708635, "grad_norm": 0.0, - "learning_rate": 7.97107407897625e-06, - "loss": 0.8702, + "learning_rate": 7.996353733858055e-06, + "loss": 0.9841, "step": 20372 }, { - "epoch": 0.5781214528944382, + "epoch": 0.577318711213126, "grad_norm": 0.0, - "learning_rate": 7.970174125384116e-06, - "loss": 0.7612, + "learning_rate": 7.995454564517023e-06, + "loss": 0.9434, "step": 20373 }, { - "epoch": 0.578149829738933, + "epoch": 0.5773470486553883, "grad_norm": 0.0, - "learning_rate": 7.969274188938013e-06, - "loss": 0.9021, + "learning_rate": 7.994555412061022e-06, + "loss": 0.7696, "step": 20374 }, { - "epoch": 0.5781782065834279, + "epoch": 0.5773753860976508, "grad_norm": 0.0, - "learning_rate": 7.968374269645545e-06, - "loss": 0.9194, + "learning_rate": 7.993656276497623e-06, + "loss": 0.8453, "step": 20375 }, { - "epoch": 0.5782065834279229, + "epoch": 0.5774037235399133, "grad_norm": 0.0, - "learning_rate": 7.967474367514306e-06, - "loss": 0.8855, + "learning_rate": 7.992757157834408e-06, + "loss": 0.9798, "step": 20376 }, { - "epoch": 0.5782349602724177, + "epoch": 0.5774320609821757, "grad_norm": 0.0, - "learning_rate": 7.966574482551905e-06, - "loss": 0.861, + "learning_rate": 7.991858056078938e-06, + "loss": 0.8141, "step": 20377 }, { - "epoch": 0.5782633371169126, + "epoch": 0.5774603984244382, "grad_norm": 0.0, - "learning_rate": 7.965674614765942e-06, - "loss": 0.7874, + "learning_rate": 7.990958971238796e-06, + "loss": 0.9291, "step": 20378 }, { - "epoch": 0.5782917139614074, + "epoch": 0.5774887358667007, "grad_norm": 0.0, - "learning_rate": 7.964774764164013e-06, - "loss": 0.9459, + "learning_rate": 7.990059903321554e-06, + "loss": 0.8394, "step": 20379 }, { - "epoch": 0.5783200908059024, + "epoch": 0.5775170733089632, "grad_norm": 0.0, - "learning_rate": 7.963874930753728e-06, - "loss": 0.7858, + "learning_rate": 7.98916085233478e-06, + "loss": 0.7889, "step": 20380 }, { - "epoch": 0.5783484676503973, + "epoch": 0.5775454107512256, "grad_norm": 0.0, - "learning_rate": 7.962975114542681e-06, - "loss": 0.8117, + "learning_rate": 7.988261818286051e-06, + "loss": 0.8298, "step": 20381 }, { - "epoch": 0.5783768444948921, + "epoch": 0.5775737481934881, "grad_norm": 0.0, - "learning_rate": 7.962075315538471e-06, - "loss": 0.9214, + "learning_rate": 7.987362801182946e-06, + "loss": 0.8378, "step": 20382 }, { - "epoch": 0.5784052213393871, + "epoch": 0.5776020856357506, "grad_norm": 0.0, - "learning_rate": 7.961175533748707e-06, - "loss": 0.8633, + "learning_rate": 7.986463801033027e-06, + "loss": 1.008, "step": 20383 }, { - "epoch": 0.578433598183882, + "epoch": 0.5776304230780129, "grad_norm": 0.0, - "learning_rate": 7.960275769180982e-06, - "loss": 0.8758, + "learning_rate": 7.985564817843872e-06, + "loss": 0.8472, "step": 20384 }, { - "epoch": 0.5784619750283768, + "epoch": 0.5776587605202754, "grad_norm": 0.0, - "learning_rate": 7.959376021842903e-06, - "loss": 0.9141, + "learning_rate": 7.984665851623052e-06, + "loss": 1.0144, "step": 20385 }, { - "epoch": 0.5784903518728718, + "epoch": 0.5776870979625379, "grad_norm": 0.0, - "learning_rate": 7.958476291742065e-06, - "loss": 0.7746, + "learning_rate": 7.983766902378138e-06, + "loss": 0.916, "step": 20386 }, { - "epoch": 0.5785187287173666, + "epoch": 0.5777154354048004, "grad_norm": 0.0, - "learning_rate": 7.957576578886068e-06, - "loss": 0.9052, + "learning_rate": 7.98286797011671e-06, + "loss": 0.7313, "step": 20387 }, { - "epoch": 0.5785471055618615, + "epoch": 0.5777437728470628, "grad_norm": 0.0, - "learning_rate": 7.956676883282514e-06, - "loss": 0.9079, + "learning_rate": 7.981969054846328e-06, + "loss": 0.7873, "step": 20388 }, { - "epoch": 0.5785754824063564, + "epoch": 0.5777721102893253, "grad_norm": 0.0, - "learning_rate": 7.955777204939003e-06, - "loss": 0.8353, + "learning_rate": 7.981070156574572e-06, + "loss": 0.8828, "step": 20389 }, { - "epoch": 0.5786038592508513, + "epoch": 0.5778004477315878, "grad_norm": 0.0, - "learning_rate": 7.954877543863133e-06, - "loss": 0.942, + "learning_rate": 7.980171275309014e-06, + "loss": 0.9628, "step": 20390 }, { - "epoch": 0.5786322360953462, + "epoch": 0.5778287851738502, "grad_norm": 0.0, - "learning_rate": 7.953977900062506e-06, - "loss": 0.8486, + "learning_rate": 7.979272411057222e-06, + "loss": 0.9089, "step": 20391 }, { - "epoch": 0.5786606129398411, + "epoch": 0.5778571226161127, "grad_norm": 0.0, - "learning_rate": 7.953078273544718e-06, - "loss": 0.8202, + "learning_rate": 7.978373563826769e-06, + "loss": 0.834, "step": 20392 }, { - "epoch": 0.578688989784336, + "epoch": 0.5778854600583752, "grad_norm": 0.0, - "learning_rate": 7.952178664317371e-06, - "loss": 0.8244, + "learning_rate": 7.977474733625224e-06, + "loss": 0.8855, "step": 20393 }, { - "epoch": 0.5787173666288309, + "epoch": 0.5779137975006375, "grad_norm": 0.0, - "learning_rate": 7.951279072388063e-06, - "loss": 0.8154, + "learning_rate": 7.97657592046016e-06, + "loss": 0.7095, "step": 20394 }, { - "epoch": 0.5787457434733257, + "epoch": 0.5779421349429, "grad_norm": 0.0, - "learning_rate": 7.950379497764392e-06, - "loss": 0.8604, + "learning_rate": 7.975677124339154e-06, + "loss": 0.9634, "step": 20395 }, { - "epoch": 0.5787741203178206, + "epoch": 0.5779704723851625, "grad_norm": 0.0, - "learning_rate": 7.949479940453958e-06, - "loss": 0.9414, + "learning_rate": 7.974778345269767e-06, + "loss": 0.8892, "step": 20396 }, { - "epoch": 0.5788024971623156, + "epoch": 0.577998809827425, "grad_norm": 0.0, - "learning_rate": 7.948580400464356e-06, - "loss": 0.8789, + "learning_rate": 7.973879583259573e-06, + "loss": 0.9116, "step": 20397 }, { - "epoch": 0.5788308740068104, + "epoch": 0.5780271472696874, "grad_norm": 0.0, - "learning_rate": 7.947680877803188e-06, - "loss": 0.7947, + "learning_rate": 7.972980838316146e-06, + "loss": 0.9527, "step": 20398 }, { - "epoch": 0.5788592508513053, + "epoch": 0.5780554847119499, "grad_norm": 0.0, - "learning_rate": 7.946781372478056e-06, - "loss": 0.9802, + "learning_rate": 7.972082110447052e-06, + "loss": 0.8921, "step": 20399 }, { - "epoch": 0.5788876276958003, + "epoch": 0.5780838221542124, "grad_norm": 0.0, - "learning_rate": 7.945881884496553e-06, - "loss": 0.8536, + "learning_rate": 7.971183399659868e-06, + "loss": 0.9074, "step": 20400 }, { - "epoch": 0.5789160045402951, + "epoch": 0.5781121595964748, "grad_norm": 0.0, - "learning_rate": 7.944982413866276e-06, - "loss": 0.9072, + "learning_rate": 7.970284705962156e-06, + "loss": 0.8551, "step": 20401 }, { - "epoch": 0.57894438138479, + "epoch": 0.5781404970387373, "grad_norm": 0.0, - "learning_rate": 7.944082960594825e-06, - "loss": 0.8603, + "learning_rate": 7.96938602936149e-06, + "loss": 0.8471, "step": 20402 }, { - "epoch": 0.578972758229285, + "epoch": 0.5781688344809998, "grad_norm": 0.0, - "learning_rate": 7.943183524689801e-06, - "loss": 0.877, + "learning_rate": 7.96848736986544e-06, + "loss": 0.9019, "step": 20403 }, { - "epoch": 0.5790011350737798, + "epoch": 0.5781971719232623, "grad_norm": 0.0, - "learning_rate": 7.942284106158795e-06, - "loss": 0.9605, + "learning_rate": 7.967588727481574e-06, + "loss": 0.845, "step": 20404 }, { - "epoch": 0.5790295119182747, + "epoch": 0.5782255093655246, "grad_norm": 0.0, - "learning_rate": 7.94138470500941e-06, - "loss": 0.8574, + "learning_rate": 7.966690102217467e-06, + "loss": 0.8604, "step": 20405 }, { - "epoch": 0.5790578887627695, + "epoch": 0.5782538468077871, "grad_norm": 0.0, - "learning_rate": 7.94048532124924e-06, - "loss": 0.873, + "learning_rate": 7.965791494080679e-06, + "loss": 0.7824, "step": 20406 }, { - "epoch": 0.5790862656072645, + "epoch": 0.5782821842500496, "grad_norm": 0.0, - "learning_rate": 7.93958595488588e-06, - "loss": 0.9494, + "learning_rate": 7.964892903078785e-06, + "loss": 0.9293, "step": 20407 }, { - "epoch": 0.5791146424517594, + "epoch": 0.578310521692312, "grad_norm": 0.0, - "learning_rate": 7.938686605926934e-06, - "loss": 0.9167, + "learning_rate": 7.963994329219359e-06, + "loss": 0.9559, "step": 20408 }, { - "epoch": 0.5791430192962542, + "epoch": 0.5783388591345745, "grad_norm": 0.0, - "learning_rate": 7.937787274379994e-06, - "loss": 0.8163, + "learning_rate": 7.96309577250996e-06, + "loss": 0.8872, "step": 20409 }, { - "epoch": 0.5791713961407492, + "epoch": 0.578367196576837, "grad_norm": 0.0, - "learning_rate": 7.936887960252658e-06, - "loss": 0.7696, + "learning_rate": 7.962197232958162e-06, + "loss": 0.9031, "step": 20410 }, { - "epoch": 0.579199772985244, + "epoch": 0.5783955340190995, "grad_norm": 0.0, - "learning_rate": 7.935988663552521e-06, - "loss": 0.8704, + "learning_rate": 7.961298710571536e-06, + "loss": 1.0499, "step": 20411 }, { - "epoch": 0.5792281498297389, + "epoch": 0.5784238714613619, "grad_norm": 0.0, - "learning_rate": 7.935089384287177e-06, - "loss": 0.9411, + "learning_rate": 7.960400205357645e-06, + "loss": 0.9348, "step": 20412 }, { - "epoch": 0.5792565266742338, + "epoch": 0.5784522089036244, "grad_norm": 0.0, - "learning_rate": 7.934190122464232e-06, - "loss": 0.8611, + "learning_rate": 7.959501717324065e-06, + "loss": 0.877, "step": 20413 }, { - "epoch": 0.5792849035187287, + "epoch": 0.5784805463458869, "grad_norm": 0.0, - "learning_rate": 7.93329087809127e-06, - "loss": 0.8381, + "learning_rate": 7.958603246478355e-06, + "loss": 0.8056, "step": 20414 }, { - "epoch": 0.5793132803632236, + "epoch": 0.5785088837881492, "grad_norm": 0.0, - "learning_rate": 7.932391651175898e-06, - "loss": 0.7414, + "learning_rate": 7.957704792828088e-06, + "loss": 0.8304, "step": 20415 }, { - "epoch": 0.5793416572077185, + "epoch": 0.5785372212304117, "grad_norm": 0.0, - "learning_rate": 7.931492441725707e-06, - "loss": 0.859, + "learning_rate": 7.956806356380837e-06, + "loss": 0.8227, "step": 20416 }, { - "epoch": 0.5793700340522134, + "epoch": 0.5785655586726742, "grad_norm": 0.0, - "learning_rate": 7.930593249748289e-06, - "loss": 0.8323, + "learning_rate": 7.95590793714416e-06, + "loss": 0.9271, "step": 20417 }, { - "epoch": 0.5793984108967083, + "epoch": 0.5785938961149366, "grad_norm": 0.0, - "learning_rate": 7.929694075251244e-06, - "loss": 0.892, + "learning_rate": 7.95500953512563e-06, + "loss": 0.8767, "step": 20418 }, { - "epoch": 0.5794267877412032, + "epoch": 0.5786222335571991, "grad_norm": 0.0, - "learning_rate": 7.928794918242168e-06, - "loss": 0.7605, + "learning_rate": 7.954111150332814e-06, + "loss": 0.9249, "step": 20419 }, { - "epoch": 0.5794551645856981, + "epoch": 0.5786505709994616, "grad_norm": 0.0, - "learning_rate": 7.927895778728651e-06, - "loss": 0.8312, + "learning_rate": 7.95321278277328e-06, + "loss": 0.8179, "step": 20420 }, { - "epoch": 0.579483541430193, + "epoch": 0.5786789084417241, "grad_norm": 0.0, - "learning_rate": 7.926996656718296e-06, - "loss": 0.6455, + "learning_rate": 7.952314432454599e-06, + "loss": 0.8259, "step": 20421 }, { - "epoch": 0.5795119182746878, + "epoch": 0.5787072458839865, "grad_norm": 0.0, - "learning_rate": 7.926097552218692e-06, - "loss": 0.879, + "learning_rate": 7.951416099384328e-06, + "loss": 0.8139, "step": 20422 }, { - "epoch": 0.5795402951191827, + "epoch": 0.578735583326249, "grad_norm": 0.0, - "learning_rate": 7.925198465237433e-06, - "loss": 0.8868, + "learning_rate": 7.950517783570041e-06, + "loss": 0.8398, "step": 20423 }, { - "epoch": 0.5795686719636777, + "epoch": 0.5787639207685115, "grad_norm": 0.0, - "learning_rate": 7.924299395782116e-06, - "loss": 0.8291, + "learning_rate": 7.949619485019307e-06, + "loss": 0.8241, "step": 20424 }, { - "epoch": 0.5795970488081725, + "epoch": 0.5787922582107738, "grad_norm": 0.0, - "learning_rate": 7.923400343860338e-06, - "loss": 0.8065, + "learning_rate": 7.948721203739686e-06, + "loss": 0.8957, "step": 20425 }, { - "epoch": 0.5796254256526674, + "epoch": 0.5788205956530363, "grad_norm": 0.0, - "learning_rate": 7.92250130947969e-06, - "loss": 0.8772, + "learning_rate": 7.947822939738747e-06, + "loss": 0.8297, "step": 20426 }, { - "epoch": 0.5796538024971624, + "epoch": 0.5788489330952988, "grad_norm": 0.0, - "learning_rate": 7.921602292647763e-06, - "loss": 0.8142, + "learning_rate": 7.946924693024062e-06, + "loss": 0.8487, "step": 20427 }, { - "epoch": 0.5796821793416572, + "epoch": 0.5788772705375613, "grad_norm": 0.0, - "learning_rate": 7.920703293372153e-06, - "loss": 0.9304, + "learning_rate": 7.94602646360319e-06, + "loss": 0.8642, "step": 20428 }, { - "epoch": 0.5797105561861521, + "epoch": 0.5789056079798237, "grad_norm": 0.0, - "learning_rate": 7.919804311660463e-06, - "loss": 0.7099, + "learning_rate": 7.945128251483704e-06, + "loss": 0.8878, "step": 20429 }, { - "epoch": 0.5797389330306469, + "epoch": 0.5789339454220862, "grad_norm": 0.0, - "learning_rate": 7.918905347520278e-06, - "loss": 0.9284, + "learning_rate": 7.944230056673162e-06, + "loss": 0.8599, "step": 20430 }, { - "epoch": 0.5797673098751419, + "epoch": 0.5789622828643487, "grad_norm": 0.0, - "learning_rate": 7.918006400959191e-06, - "loss": 0.821, + "learning_rate": 7.94333187917913e-06, + "loss": 0.895, "step": 20431 }, { - "epoch": 0.5797956867196368, + "epoch": 0.5789906203066111, "grad_norm": 0.0, - "learning_rate": 7.917107471984798e-06, - "loss": 0.919, + "learning_rate": 7.942433719009183e-06, + "loss": 0.7976, "step": 20432 }, { - "epoch": 0.5798240635641316, + "epoch": 0.5790189577488736, "grad_norm": 0.0, - "learning_rate": 7.916208560604693e-06, - "loss": 0.8577, + "learning_rate": 7.941535576170878e-06, + "loss": 0.9123, "step": 20433 }, { - "epoch": 0.5798524404086266, + "epoch": 0.5790472951911361, "grad_norm": 0.0, - "learning_rate": 7.915309666826466e-06, - "loss": 0.8473, + "learning_rate": 7.940637450671787e-06, + "loss": 0.8282, "step": 20434 }, { - "epoch": 0.5798808172531215, + "epoch": 0.5790756326333986, "grad_norm": 0.0, - "learning_rate": 7.914410790657715e-06, - "loss": 0.8344, + "learning_rate": 7.939739342519468e-06, + "loss": 0.889, "step": 20435 }, { - "epoch": 0.5799091940976163, + "epoch": 0.579103970075661, "grad_norm": 0.0, - "learning_rate": 7.913511932106028e-06, - "loss": 0.6851, + "learning_rate": 7.938841251721488e-06, + "loss": 0.9103, "step": 20436 }, { - "epoch": 0.5799375709421113, + "epoch": 0.5791323075179234, "grad_norm": 0.0, - "learning_rate": 7.912613091178997e-06, - "loss": 0.813, + "learning_rate": 7.937943178285416e-06, + "loss": 0.8748, "step": 20437 }, { - "epoch": 0.5799659477866062, + "epoch": 0.5791606449601859, "grad_norm": 0.0, - "learning_rate": 7.911714267884221e-06, - "loss": 0.8151, + "learning_rate": 7.937045122218813e-06, + "loss": 0.7664, "step": 20438 }, { - "epoch": 0.579994324631101, + "epoch": 0.5791889824024483, "grad_norm": 0.0, - "learning_rate": 7.91081546222929e-06, - "loss": 0.8841, + "learning_rate": 7.936147083529245e-06, + "loss": 0.9009, "step": 20439 }, { - "epoch": 0.5800227014755959, + "epoch": 0.5792173198447108, "grad_norm": 0.0, - "learning_rate": 7.90991667422179e-06, - "loss": 0.8773, + "learning_rate": 7.935249062224281e-06, + "loss": 0.9462, "step": 20440 }, { - "epoch": 0.5800510783200908, + "epoch": 0.5792456572869733, "grad_norm": 0.0, - "learning_rate": 7.909017903869319e-06, - "loss": 0.7977, + "learning_rate": 7.934351058311475e-06, + "loss": 0.8209, "step": 20441 }, { - "epoch": 0.5800794551645857, + "epoch": 0.5792739947292357, "grad_norm": 0.0, - "learning_rate": 7.90811915117947e-06, - "loss": 0.8834, + "learning_rate": 7.933453071798403e-06, + "loss": 0.8958, "step": 20442 }, { - "epoch": 0.5801078320090806, + "epoch": 0.5793023321714982, "grad_norm": 0.0, - "learning_rate": 7.907220416159827e-06, - "loss": 0.8677, + "learning_rate": 7.932555102692619e-06, + "loss": 0.8041, "step": 20443 }, { - "epoch": 0.5801362088535755, + "epoch": 0.5793306696137607, "grad_norm": 0.0, - "learning_rate": 7.906321698817992e-06, - "loss": 0.8262, + "learning_rate": 7.93165715100169e-06, + "loss": 0.997, "step": 20444 }, { - "epoch": 0.5801645856980704, + "epoch": 0.5793590070560232, "grad_norm": 0.0, - "learning_rate": 7.90542299916155e-06, - "loss": 0.9297, + "learning_rate": 7.930759216733183e-06, + "loss": 0.9239, "step": 20445 }, { - "epoch": 0.5801929625425652, + "epoch": 0.5793873444982856, "grad_norm": 0.0, - "learning_rate": 7.904524317198095e-06, - "loss": 1.0075, + "learning_rate": 7.929861299894658e-06, + "loss": 0.8626, "step": 20446 }, { - "epoch": 0.5802213393870601, + "epoch": 0.579415681940548, "grad_norm": 0.0, - "learning_rate": 7.903625652935217e-06, - "loss": 0.9464, + "learning_rate": 7.92896340049368e-06, + "loss": 0.8432, "step": 20447 }, { - "epoch": 0.5802497162315551, + "epoch": 0.5794440193828105, "grad_norm": 0.0, - "learning_rate": 7.902727006380506e-06, - "loss": 0.8506, + "learning_rate": 7.928065518537816e-06, + "loss": 0.8095, "step": 20448 }, { - "epoch": 0.5802780930760499, + "epoch": 0.5794723568250729, "grad_norm": 0.0, - "learning_rate": 7.901828377541558e-06, - "loss": 0.9095, + "learning_rate": 7.927167654034622e-06, + "loss": 0.8943, "step": 20449 }, { - "epoch": 0.5803064699205448, + "epoch": 0.5795006942673354, "grad_norm": 0.0, - "learning_rate": 7.900929766425958e-06, - "loss": 0.922, + "learning_rate": 7.926269806991666e-06, + "loss": 0.9465, "step": 20450 }, { - "epoch": 0.5803348467650398, + "epoch": 0.5795290317095979, "grad_norm": 0.0, - "learning_rate": 7.900031173041296e-06, - "loss": 0.8779, + "learning_rate": 7.925371977416508e-06, + "loss": 0.9107, "step": 20451 }, { - "epoch": 0.5803632236095346, + "epoch": 0.5795573691518604, "grad_norm": 0.0, - "learning_rate": 7.89913259739517e-06, - "loss": 0.8561, + "learning_rate": 7.924474165316712e-06, + "loss": 0.8152, "step": 20452 }, { - "epoch": 0.5803916004540295, + "epoch": 0.5795857065941228, "grad_norm": 0.0, - "learning_rate": 7.898234039495162e-06, - "loss": 0.864, + "learning_rate": 7.923576370699845e-06, + "loss": 0.8665, "step": 20453 }, { - "epoch": 0.5804199772985245, + "epoch": 0.5796140440363853, "grad_norm": 0.0, - "learning_rate": 7.897335499348866e-06, - "loss": 0.8493, + "learning_rate": 7.922678593573462e-06, + "loss": 0.8246, "step": 20454 }, { - "epoch": 0.5804483541430193, + "epoch": 0.5796423814786478, "grad_norm": 0.0, - "learning_rate": 7.896436976963872e-06, - "loss": 0.8592, + "learning_rate": 7.921780833945127e-06, + "loss": 0.8022, "step": 20455 }, { - "epoch": 0.5804767309875142, + "epoch": 0.5796707189209102, "grad_norm": 0.0, - "learning_rate": 7.895538472347772e-06, - "loss": 0.9069, + "learning_rate": 7.92088309182241e-06, + "loss": 0.9211, "step": 20456 }, { - "epoch": 0.580505107832009, + "epoch": 0.5796990563631726, "grad_norm": 0.0, - "learning_rate": 7.894639985508147e-06, - "loss": 0.7862, + "learning_rate": 7.919985367212861e-06, + "loss": 0.797, "step": 20457 }, { - "epoch": 0.580533484676504, + "epoch": 0.5797273938054351, "grad_norm": 0.0, - "learning_rate": 7.893741516452594e-06, - "loss": 0.8285, + "learning_rate": 7.91908766012405e-06, + "loss": 0.8889, "step": 20458 }, { - "epoch": 0.5805618615209989, + "epoch": 0.5797557312476976, "grad_norm": 0.0, - "learning_rate": 7.892843065188705e-06, - "loss": 0.8695, + "learning_rate": 7.918189970563534e-06, + "loss": 0.8575, "step": 20459 }, { - "epoch": 0.5805902383654937, + "epoch": 0.57978406868996, "grad_norm": 0.0, - "learning_rate": 7.891944631724064e-06, - "loss": 0.8283, + "learning_rate": 7.917292298538877e-06, + "loss": 0.9062, "step": 20460 }, { - "epoch": 0.5806186152099887, + "epoch": 0.5798124061322225, "grad_norm": 0.0, - "learning_rate": 7.891046216066259e-06, - "loss": 0.8875, + "learning_rate": 7.916394644057645e-06, + "loss": 0.8676, "step": 20461 }, { - "epoch": 0.5806469920544836, + "epoch": 0.579840743574485, "grad_norm": 0.0, - "learning_rate": 7.890147818222885e-06, - "loss": 0.8945, + "learning_rate": 7.91549700712739e-06, + "loss": 0.8608, "step": 20462 }, { - "epoch": 0.5806753688989784, + "epoch": 0.5798690810167474, "grad_norm": 0.0, - "learning_rate": 7.889249438201526e-06, - "loss": 0.8857, + "learning_rate": 7.91459938775568e-06, + "loss": 0.7848, "step": 20463 }, { - "epoch": 0.5807037457434733, + "epoch": 0.5798974184590099, "grad_norm": 0.0, - "learning_rate": 7.888351076009772e-06, - "loss": 0.863, + "learning_rate": 7.913701785950072e-06, + "loss": 0.8786, "step": 20464 }, { - "epoch": 0.5807321225879682, + "epoch": 0.5799257559012724, "grad_norm": 0.0, - "learning_rate": 7.88745273165521e-06, - "loss": 0.7932, + "learning_rate": 7.912804201718129e-06, + "loss": 0.888, "step": 20465 }, { - "epoch": 0.5807604994324631, + "epoch": 0.5799540933435348, "grad_norm": 0.0, - "learning_rate": 7.886554405145433e-06, - "loss": 0.8401, + "learning_rate": 7.911906635067414e-06, + "loss": 0.8607, "step": 20466 }, { - "epoch": 0.580788876276958, + "epoch": 0.5799824307857973, "grad_norm": 0.0, - "learning_rate": 7.885656096488023e-06, - "loss": 0.9511, + "learning_rate": 7.911009086005481e-06, + "loss": 0.8974, "step": 20467 }, { - "epoch": 0.5808172531214529, + "epoch": 0.5800107682280597, "grad_norm": 0.0, - "learning_rate": 7.884757805690572e-06, - "loss": 0.8232, + "learning_rate": 7.910111554539895e-06, + "loss": 0.8812, "step": 20468 }, { - "epoch": 0.5808456299659478, + "epoch": 0.5800391056703222, "grad_norm": 0.0, - "learning_rate": 7.883859532760666e-06, - "loss": 0.8253, + "learning_rate": 7.90921404067822e-06, + "loss": 0.9213, "step": 20469 }, { - "epoch": 0.5808740068104427, + "epoch": 0.5800674431125846, "grad_norm": 0.0, - "learning_rate": 7.882961277705897e-06, - "loss": 0.8486, + "learning_rate": 7.908316544428007e-06, + "loss": 0.8246, "step": 20470 }, { - "epoch": 0.5809023836549376, + "epoch": 0.5800957805548471, "grad_norm": 0.0, - "learning_rate": 7.882063040533844e-06, - "loss": 0.9529, + "learning_rate": 7.907419065796822e-06, + "loss": 0.7867, "step": 20471 }, { - "epoch": 0.5809307604994325, + "epoch": 0.5801241179971096, "grad_norm": 0.0, - "learning_rate": 7.881164821252103e-06, - "loss": 0.7759, + "learning_rate": 7.906521604792221e-06, + "loss": 0.8316, "step": 20472 }, { - "epoch": 0.5809591373439273, + "epoch": 0.580152455439372, "grad_norm": 0.0, - "learning_rate": 7.880266619868257e-06, - "loss": 0.9476, + "learning_rate": 7.905624161421767e-06, + "loss": 0.8327, "step": 20473 }, { - "epoch": 0.5809875141884222, + "epoch": 0.5801807928816345, "grad_norm": 0.0, - "learning_rate": 7.87936843638989e-06, - "loss": 0.7982, + "learning_rate": 7.904726735693021e-06, + "loss": 0.791, "step": 20474 }, { - "epoch": 0.5810158910329172, + "epoch": 0.580209130323897, "grad_norm": 0.0, - "learning_rate": 7.8784702708246e-06, - "loss": 0.8846, + "learning_rate": 7.903829327613536e-06, + "loss": 0.9965, "step": 20475 }, { - "epoch": 0.581044267877412, + "epoch": 0.5802374677661595, "grad_norm": 0.0, - "learning_rate": 7.877572123179964e-06, - "loss": 0.8721, + "learning_rate": 7.902931937190877e-06, + "loss": 0.8877, "step": 20476 }, { - "epoch": 0.5810726447219069, + "epoch": 0.5802658052084219, "grad_norm": 0.0, - "learning_rate": 7.876673993463573e-06, - "loss": 0.8563, + "learning_rate": 7.902034564432601e-06, + "loss": 0.7837, "step": 20477 }, { - "epoch": 0.5811010215664019, + "epoch": 0.5802941426506844, "grad_norm": 0.0, - "learning_rate": 7.87577588168301e-06, - "loss": 0.9176, + "learning_rate": 7.901137209346266e-06, + "loss": 0.8055, "step": 20478 }, { - "epoch": 0.5811293984108967, + "epoch": 0.5803224800929468, "grad_norm": 0.0, - "learning_rate": 7.874877787845865e-06, - "loss": 0.8466, + "learning_rate": 7.900239871939435e-06, + "loss": 0.9064, "step": 20479 }, { - "epoch": 0.5811577752553916, + "epoch": 0.5803508175352092, "grad_norm": 0.0, - "learning_rate": 7.873979711959724e-06, - "loss": 0.9225, + "learning_rate": 7.89934255221966e-06, + "loss": 0.941, "step": 20480 }, { - "epoch": 0.5811861520998864, + "epoch": 0.5803791549774717, "grad_norm": 0.0, - "learning_rate": 7.87308165403217e-06, - "loss": 0.8636, + "learning_rate": 7.8984452501945e-06, + "loss": 0.9108, "step": 20481 }, { - "epoch": 0.5812145289443814, + "epoch": 0.5804074924197342, "grad_norm": 0.0, - "learning_rate": 7.872183614070791e-06, - "loss": 0.8902, + "learning_rate": 7.897547965871521e-06, + "loss": 0.8509, "step": 20482 }, { - "epoch": 0.5812429057888763, + "epoch": 0.5804358298619967, "grad_norm": 0.0, - "learning_rate": 7.871285592083175e-06, - "loss": 0.8265, + "learning_rate": 7.896650699258277e-06, + "loss": 0.9956, "step": 20483 }, { - "epoch": 0.5812712826333711, + "epoch": 0.5804641673042591, "grad_norm": 0.0, - "learning_rate": 7.870387588076902e-06, - "loss": 0.9123, + "learning_rate": 7.89575345036232e-06, + "loss": 0.9476, "step": 20484 }, { - "epoch": 0.5812996594778661, + "epoch": 0.5804925047465216, "grad_norm": 0.0, - "learning_rate": 7.869489602059565e-06, - "loss": 0.8697, + "learning_rate": 7.894856219191218e-06, + "loss": 0.936, "step": 20485 }, { - "epoch": 0.581328036322361, + "epoch": 0.5805208421887841, "grad_norm": 0.0, - "learning_rate": 7.868591634038742e-06, - "loss": 0.8526, + "learning_rate": 7.89395900575252e-06, + "loss": 0.8944, "step": 20486 }, { - "epoch": 0.5813564131668558, + "epoch": 0.5805491796310465, "grad_norm": 0.0, - "learning_rate": 7.867693684022022e-06, - "loss": 0.9309, + "learning_rate": 7.893061810053792e-06, + "loss": 0.8306, "step": 20487 }, { - "epoch": 0.5813847900113507, + "epoch": 0.580577517073309, "grad_norm": 0.0, - "learning_rate": 7.86679575201699e-06, - "loss": 0.8592, + "learning_rate": 7.89216463210258e-06, + "loss": 0.8639, "step": 20488 }, { - "epoch": 0.5814131668558457, + "epoch": 0.5806058545155715, "grad_norm": 0.0, - "learning_rate": 7.865897838031225e-06, - "loss": 0.8849, + "learning_rate": 7.891267471906453e-06, + "loss": 0.978, "step": 20489 }, { - "epoch": 0.5814415437003405, + "epoch": 0.5806341919578338, "grad_norm": 0.0, - "learning_rate": 7.864999942072321e-06, - "loss": 0.8382, + "learning_rate": 7.890370329472963e-06, + "loss": 0.9354, "step": 20490 }, { - "epoch": 0.5814699205448354, + "epoch": 0.5806625294000963, "grad_norm": 0.0, - "learning_rate": 7.86410206414786e-06, - "loss": 0.8449, + "learning_rate": 7.889473204809664e-06, + "loss": 0.9135, "step": 20491 }, { - "epoch": 0.5814982973893303, + "epoch": 0.5806908668423588, "grad_norm": 0.0, - "learning_rate": 7.863204204265423e-06, - "loss": 0.9323, + "learning_rate": 7.88857609792412e-06, + "loss": 0.8829, "step": 20492 }, { - "epoch": 0.5815266742338252, + "epoch": 0.5807192042846213, "grad_norm": 0.0, - "learning_rate": 7.862306362432597e-06, - "loss": 0.9717, + "learning_rate": 7.887679008823881e-06, + "loss": 0.8361, "step": 20493 }, { - "epoch": 0.5815550510783201, + "epoch": 0.5807475417268837, "grad_norm": 0.0, - "learning_rate": 7.861408538656966e-06, - "loss": 0.8113, + "learning_rate": 7.886781937516505e-06, + "loss": 0.8053, "step": 20494 }, { - "epoch": 0.581583427922815, + "epoch": 0.5807758791691462, "grad_norm": 0.0, - "learning_rate": 7.860510732946111e-06, - "loss": 0.7923, + "learning_rate": 7.885884884009552e-06, + "loss": 0.8817, "step": 20495 }, { - "epoch": 0.5816118047673099, + "epoch": 0.5808042166114087, "grad_norm": 0.0, - "learning_rate": 7.85961294530762e-06, - "loss": 0.8912, + "learning_rate": 7.884987848310574e-06, + "loss": 0.9282, "step": 20496 }, { - "epoch": 0.5816401816118048, + "epoch": 0.5808325540536711, "grad_norm": 0.0, - "learning_rate": 7.858715175749075e-06, - "loss": 0.8187, + "learning_rate": 7.88409083042713e-06, + "loss": 0.9096, "step": 20497 }, { - "epoch": 0.5816685584562996, + "epoch": 0.5808608914959336, "grad_norm": 0.0, - "learning_rate": 7.857817424278056e-06, - "loss": 0.8578, + "learning_rate": 7.883193830366775e-06, + "loss": 0.8303, "step": 20498 }, { - "epoch": 0.5816969353007946, + "epoch": 0.5808892289381961, "grad_norm": 0.0, - "learning_rate": 7.856919690902152e-06, - "loss": 0.9483, + "learning_rate": 7.882296848137063e-06, + "loss": 0.9326, "step": 20499 }, { - "epoch": 0.5817253121452894, + "epoch": 0.5809175663804585, "grad_norm": 0.0, - "learning_rate": 7.856021975628945e-06, - "loss": 0.8841, + "learning_rate": 7.881399883745555e-06, + "loss": 0.853, "step": 20500 }, { - "epoch": 0.5817536889897843, + "epoch": 0.5809459038227209, "grad_norm": 0.0, - "learning_rate": 7.855124278466013e-06, - "loss": 0.8773, + "learning_rate": 7.880502937199798e-06, + "loss": 0.7246, "step": 20501 }, { - "epoch": 0.5817820658342793, + "epoch": 0.5809742412649834, "grad_norm": 0.0, - "learning_rate": 7.854226599420947e-06, - "loss": 0.8631, + "learning_rate": 7.879606008507351e-06, + "loss": 0.9113, "step": 20502 }, { - "epoch": 0.5818104426787741, + "epoch": 0.5810025787072459, "grad_norm": 0.0, - "learning_rate": 7.853328938501323e-06, - "loss": 0.7227, + "learning_rate": 7.878709097675775e-06, + "loss": 0.9129, "step": 20503 }, { - "epoch": 0.581838819523269, + "epoch": 0.5810309161495083, "grad_norm": 0.0, - "learning_rate": 7.852431295714722e-06, - "loss": 0.8115, + "learning_rate": 7.877812204712614e-06, + "loss": 0.9511, "step": 20504 }, { - "epoch": 0.5818671963677639, + "epoch": 0.5810592535917708, "grad_norm": 0.0, - "learning_rate": 7.851533671068737e-06, - "loss": 0.7968, + "learning_rate": 7.876915329625431e-06, + "loss": 0.783, "step": 20505 }, { - "epoch": 0.5818955732122588, + "epoch": 0.5810875910340333, "grad_norm": 0.0, - "learning_rate": 7.850636064570939e-06, - "loss": 0.8645, + "learning_rate": 7.876018472421782e-06, + "loss": 0.8132, "step": 20506 }, { - "epoch": 0.5819239500567537, + "epoch": 0.5811159284762958, "grad_norm": 0.0, - "learning_rate": 7.849738476228916e-06, - "loss": 0.7883, + "learning_rate": 7.875121633109214e-06, + "loss": 0.8586, "step": 20507 }, { - "epoch": 0.5819523269012485, + "epoch": 0.5811442659185582, "grad_norm": 0.0, - "learning_rate": 7.848840906050252e-06, - "loss": 0.8465, + "learning_rate": 7.874224811695287e-06, + "loss": 0.8546, "step": 20508 }, { - "epoch": 0.5819807037457435, + "epoch": 0.5811726033608207, "grad_norm": 0.0, - "learning_rate": 7.84794335404252e-06, - "loss": 0.7854, + "learning_rate": 7.873328008187554e-06, + "loss": 0.8845, "step": 20509 }, { - "epoch": 0.5820090805902384, + "epoch": 0.5812009408030832, "grad_norm": 0.0, - "learning_rate": 7.847045820213312e-06, - "loss": 0.8836, + "learning_rate": 7.872431222593568e-06, + "loss": 0.9106, "step": 20510 }, { - "epoch": 0.5820374574347332, + "epoch": 0.5812292782453455, "grad_norm": 0.0, - "learning_rate": 7.846148304570204e-06, - "loss": 0.8013, + "learning_rate": 7.871534454920886e-06, + "loss": 0.9613, "step": 20511 }, { - "epoch": 0.5820658342792282, + "epoch": 0.581257615687608, "grad_norm": 0.0, - "learning_rate": 7.845250807120776e-06, - "loss": 0.809, + "learning_rate": 7.870637705177058e-06, + "loss": 0.8496, "step": 20512 }, { - "epoch": 0.5820942111237231, + "epoch": 0.5812859531298705, "grad_norm": 0.0, - "learning_rate": 7.844353327872614e-06, - "loss": 0.8216, + "learning_rate": 7.869740973369639e-06, + "loss": 0.8271, "step": 20513 }, { - "epoch": 0.5821225879682179, + "epoch": 0.5813142905721329, "grad_norm": 0.0, - "learning_rate": 7.843455866833295e-06, - "loss": 0.9932, + "learning_rate": 7.868844259506186e-06, + "loss": 0.8737, "step": 20514 }, { - "epoch": 0.5821509648127128, + "epoch": 0.5813426280143954, "grad_norm": 0.0, - "learning_rate": 7.8425584240104e-06, - "loss": 0.8821, + "learning_rate": 7.867947563594246e-06, + "loss": 0.8714, "step": 20515 }, { - "epoch": 0.5821793416572077, + "epoch": 0.5813709654566579, "grad_norm": 0.0, - "learning_rate": 7.841660999411513e-06, - "loss": 0.8551, + "learning_rate": 7.867050885641376e-06, + "loss": 0.9069, "step": 20516 }, { - "epoch": 0.5822077185017026, + "epoch": 0.5813993028989204, "grad_norm": 0.0, - "learning_rate": 7.84076359304421e-06, - "loss": 0.8478, + "learning_rate": 7.866154225655127e-06, + "loss": 0.74, "step": 20517 }, { - "epoch": 0.5822360953461975, + "epoch": 0.5814276403411828, "grad_norm": 0.0, - "learning_rate": 7.839866204916077e-06, - "loss": 0.7141, + "learning_rate": 7.865257583643053e-06, + "loss": 0.7842, "step": 20518 }, { - "epoch": 0.5822644721906924, + "epoch": 0.5814559777834453, "grad_norm": 0.0, - "learning_rate": 7.83896883503469e-06, - "loss": 0.9181, + "learning_rate": 7.864360959612714e-06, + "loss": 0.7869, "step": 20519 }, { - "epoch": 0.5822928490351873, + "epoch": 0.5814843152257078, "grad_norm": 0.0, - "learning_rate": 7.838071483407627e-06, - "loss": 0.9892, + "learning_rate": 7.863464353571649e-06, + "loss": 0.8112, "step": 20520 }, { - "epoch": 0.5823212258796822, + "epoch": 0.5815126526679701, "grad_norm": 0.0, - "learning_rate": 7.837174150042475e-06, - "loss": 0.9624, + "learning_rate": 7.862567765527418e-06, + "loss": 0.7986, "step": 20521 }, { - "epoch": 0.582349602724177, + "epoch": 0.5815409901102326, "grad_norm": 0.0, - "learning_rate": 7.83627683494681e-06, - "loss": 0.8049, + "learning_rate": 7.861671195487573e-06, + "loss": 0.8907, "step": 20522 }, { - "epoch": 0.582377979568672, + "epoch": 0.5815693275524951, "grad_norm": 0.0, - "learning_rate": 7.83537953812821e-06, - "loss": 0.8082, + "learning_rate": 7.860774643459664e-06, + "loss": 0.9337, "step": 20523 }, { - "epoch": 0.5824063564131668, + "epoch": 0.5815976649947576, "grad_norm": 0.0, - "learning_rate": 7.83448225959426e-06, - "loss": 0.8449, + "learning_rate": 7.85987810945125e-06, + "loss": 0.8746, "step": 20524 }, { - "epoch": 0.5824347332576617, + "epoch": 0.58162600243702, "grad_norm": 0.0, - "learning_rate": 7.833584999352533e-06, - "loss": 0.8913, + "learning_rate": 7.858981593469872e-06, + "loss": 0.7758, "step": 20525 }, { - "epoch": 0.5824631101021567, + "epoch": 0.5816543398792825, "grad_norm": 0.0, - "learning_rate": 7.832687757410612e-06, - "loss": 0.8581, + "learning_rate": 7.858085095523088e-06, + "loss": 0.8881, "step": 20526 }, { - "epoch": 0.5824914869466515, + "epoch": 0.581682677321545, "grad_norm": 0.0, - "learning_rate": 7.831790533776077e-06, - "loss": 0.8417, + "learning_rate": 7.857188615618452e-06, + "loss": 0.9058, "step": 20527 }, { - "epoch": 0.5825198637911464, + "epoch": 0.5817110147638074, "grad_norm": 0.0, - "learning_rate": 7.830893328456501e-06, - "loss": 0.9824, + "learning_rate": 7.856292153763508e-06, + "loss": 0.7858, "step": 20528 }, { - "epoch": 0.5825482406356414, + "epoch": 0.5817393522060699, "grad_norm": 0.0, - "learning_rate": 7.829996141459468e-06, - "loss": 0.8613, + "learning_rate": 7.855395709965814e-06, + "loss": 0.8549, "step": 20529 }, { - "epoch": 0.5825766174801362, + "epoch": 0.5817676896483324, "grad_norm": 0.0, - "learning_rate": 7.829098972792555e-06, - "loss": 0.7548, + "learning_rate": 7.854499284232915e-06, + "loss": 0.8862, "step": 20530 }, { - "epoch": 0.5826049943246311, + "epoch": 0.5817960270905949, "grad_norm": 0.0, - "learning_rate": 7.828201822463342e-06, - "loss": 0.7976, + "learning_rate": 7.853602876572367e-06, + "loss": 0.8077, "step": 20531 }, { - "epoch": 0.582633371169126, + "epoch": 0.5818243645328572, "grad_norm": 0.0, - "learning_rate": 7.827304690479403e-06, - "loss": 0.8427, + "learning_rate": 7.852706486991722e-06, + "loss": 0.8447, "step": 20532 }, { - "epoch": 0.5826617480136209, + "epoch": 0.5818527019751197, "grad_norm": 0.0, - "learning_rate": 7.82640757684832e-06, - "loss": 0.8083, + "learning_rate": 7.851810115498523e-06, + "loss": 0.9259, "step": 20533 }, { - "epoch": 0.5826901248581158, + "epoch": 0.5818810394173822, "grad_norm": 0.0, - "learning_rate": 7.825510481577672e-06, - "loss": 0.7791, + "learning_rate": 7.850913762100325e-06, + "loss": 0.9838, "step": 20534 }, { - "epoch": 0.5827185017026106, + "epoch": 0.5819093768596446, "grad_norm": 0.0, - "learning_rate": 7.82461340467503e-06, - "loss": 0.8383, + "learning_rate": 7.850017426804682e-06, + "loss": 0.944, "step": 20535 }, { - "epoch": 0.5827468785471056, + "epoch": 0.5819377143019071, "grad_norm": 0.0, - "learning_rate": 7.823716346147977e-06, - "loss": 0.8662, + "learning_rate": 7.849121109619138e-06, + "loss": 0.7425, "step": 20536 }, { - "epoch": 0.5827752553916005, + "epoch": 0.5819660517441696, "grad_norm": 0.0, - "learning_rate": 7.822819306004094e-06, - "loss": 0.7683, + "learning_rate": 7.84822481055125e-06, + "loss": 0.8608, "step": 20537 }, { - "epoch": 0.5828036322360953, + "epoch": 0.581994389186432, "grad_norm": 0.0, - "learning_rate": 7.821922284250954e-06, - "loss": 0.9096, + "learning_rate": 7.847328529608558e-06, + "loss": 0.9205, "step": 20538 }, { - "epoch": 0.5828320090805902, + "epoch": 0.5820227266286945, "grad_norm": 0.0, - "learning_rate": 7.82102528089613e-06, - "loss": 0.857, + "learning_rate": 7.846432266798618e-06, + "loss": 0.7801, "step": 20539 }, { - "epoch": 0.5828603859250852, + "epoch": 0.582051064070957, "grad_norm": 0.0, - "learning_rate": 7.820128295947206e-06, - "loss": 0.8341, + "learning_rate": 7.845536022128983e-06, + "loss": 0.9493, "step": 20540 }, { - "epoch": 0.58288876276958, + "epoch": 0.5820794015132195, "grad_norm": 0.0, - "learning_rate": 7.819231329411758e-06, - "loss": 0.9157, + "learning_rate": 7.844639795607195e-06, + "loss": 0.7402, "step": 20541 }, { - "epoch": 0.5829171396140749, + "epoch": 0.5821077389554818, "grad_norm": 0.0, - "learning_rate": 7.818334381297359e-06, - "loss": 0.8804, + "learning_rate": 7.843743587240804e-06, + "loss": 0.8894, "step": 20542 }, { - "epoch": 0.5829455164585698, + "epoch": 0.5821360763977443, "grad_norm": 0.0, - "learning_rate": 7.81743745161159e-06, - "loss": 0.8737, + "learning_rate": 7.842847397037366e-06, + "loss": 0.7059, "step": 20543 }, { - "epoch": 0.5829738933030647, + "epoch": 0.5821644138400068, "grad_norm": 0.0, - "learning_rate": 7.816540540362022e-06, - "loss": 0.8661, + "learning_rate": 7.84195122500442e-06, + "loss": 0.8869, "step": 20544 }, { - "epoch": 0.5830022701475596, + "epoch": 0.5821927512822692, "grad_norm": 0.0, - "learning_rate": 7.815643647556235e-06, - "loss": 0.9256, + "learning_rate": 7.841055071149526e-06, + "loss": 0.8615, "step": 20545 }, { - "epoch": 0.5830306469920545, + "epoch": 0.5822210887245317, "grad_norm": 0.0, - "learning_rate": 7.814746773201804e-06, - "loss": 0.8851, + "learning_rate": 7.840158935480224e-06, + "loss": 0.9582, "step": 20546 }, { - "epoch": 0.5830590238365494, + "epoch": 0.5822494261667942, "grad_norm": 0.0, - "learning_rate": 7.813849917306308e-06, - "loss": 0.9232, + "learning_rate": 7.839262818004065e-06, + "loss": 0.8779, "step": 20547 }, { - "epoch": 0.5830874006810443, + "epoch": 0.5822777636090567, "grad_norm": 0.0, - "learning_rate": 7.812953079877317e-06, - "loss": 0.94, + "learning_rate": 7.838366718728599e-06, + "loss": 0.8409, "step": 20548 }, { - "epoch": 0.5831157775255391, + "epoch": 0.5823061010513191, "grad_norm": 0.0, - "learning_rate": 7.81205626092241e-06, - "loss": 0.8519, + "learning_rate": 7.83747063766137e-06, + "loss": 0.8456, "step": 20549 }, { - "epoch": 0.5831441543700341, + "epoch": 0.5823344384935816, "grad_norm": 0.0, - "learning_rate": 7.811159460449164e-06, - "loss": 0.722, + "learning_rate": 7.836574574809935e-06, + "loss": 0.8928, "step": 20550 }, { - "epoch": 0.5831725312145289, + "epoch": 0.5823627759358441, "grad_norm": 0.0, - "learning_rate": 7.810262678465147e-06, - "loss": 0.9491, + "learning_rate": 7.83567853018183e-06, + "loss": 0.9651, "step": 20551 }, { - "epoch": 0.5832009080590238, + "epoch": 0.5823911133781065, "grad_norm": 0.0, - "learning_rate": 7.809365914977945e-06, - "loss": 0.8839, + "learning_rate": 7.83478250378461e-06, + "loss": 0.8662, "step": 20552 }, { - "epoch": 0.5832292849035188, + "epoch": 0.582419450820369, "grad_norm": 0.0, - "learning_rate": 7.808469169995125e-06, - "loss": 0.7572, + "learning_rate": 7.833886495625825e-06, + "loss": 0.8968, "step": 20553 }, { - "epoch": 0.5832576617480136, + "epoch": 0.5824477882626314, "grad_norm": 0.0, - "learning_rate": 7.807572443524266e-06, - "loss": 0.8164, + "learning_rate": 7.832990505713012e-06, + "loss": 0.8134, "step": 20554 }, { - "epoch": 0.5832860385925085, + "epoch": 0.5824761257048938, "grad_norm": 0.0, - "learning_rate": 7.806675735572941e-06, - "loss": 0.8696, + "learning_rate": 7.832094534053725e-06, + "loss": 0.8573, "step": 20555 }, { - "epoch": 0.5833144154370034, + "epoch": 0.5825044631471563, "grad_norm": 0.0, - "learning_rate": 7.805779046148723e-06, - "loss": 0.8987, + "learning_rate": 7.831198580655515e-06, + "loss": 0.8659, "step": 20556 }, { - "epoch": 0.5833427922814983, + "epoch": 0.5825328005894188, "grad_norm": 0.0, - "learning_rate": 7.80488237525919e-06, - "loss": 0.8587, + "learning_rate": 7.83030264552592e-06, + "loss": 0.7722, "step": 20557 }, { - "epoch": 0.5833711691259932, + "epoch": 0.5825611380316813, "grad_norm": 0.0, - "learning_rate": 7.803985722911915e-06, - "loss": 0.9725, + "learning_rate": 7.829406728672498e-06, + "loss": 0.9091, "step": 20558 }, { - "epoch": 0.583399545970488, + "epoch": 0.5825894754739437, "grad_norm": 0.0, - "learning_rate": 7.80308908911447e-06, - "loss": 0.8967, + "learning_rate": 7.828510830102785e-06, + "loss": 0.8528, "step": 20559 }, { - "epoch": 0.583427922814983, + "epoch": 0.5826178129162062, "grad_norm": 0.0, - "learning_rate": 7.80219247387443e-06, - "loss": 0.8238, + "learning_rate": 7.82761494982433e-06, + "loss": 0.7479, "step": 20560 }, { - "epoch": 0.5834562996594779, + "epoch": 0.5826461503584687, "grad_norm": 0.0, - "learning_rate": 7.80129587719937e-06, - "loss": 0.9679, + "learning_rate": 7.826719087844684e-06, + "loss": 0.8, "step": 20561 }, { - "epoch": 0.5834846765039727, + "epoch": 0.5826744878007311, "grad_norm": 0.0, - "learning_rate": 7.80039929909686e-06, - "loss": 0.848, + "learning_rate": 7.82582324417139e-06, + "loss": 0.8876, "step": 20562 }, { - "epoch": 0.5835130533484677, + "epoch": 0.5827028252429935, "grad_norm": 0.0, - "learning_rate": 7.79950273957448e-06, - "loss": 0.9015, + "learning_rate": 7.82492741881199e-06, + "loss": 0.8642, "step": 20563 }, { - "epoch": 0.5835414301929626, + "epoch": 0.582731162685256, "grad_norm": 0.0, - "learning_rate": 7.798606198639797e-06, - "loss": 0.9289, + "learning_rate": 7.824031611774042e-06, + "loss": 0.8565, "step": 20564 }, { - "epoch": 0.5835698070374574, + "epoch": 0.5827595001275185, "grad_norm": 0.0, - "learning_rate": 7.797709676300385e-06, - "loss": 0.8531, + "learning_rate": 7.823135823065076e-06, + "loss": 0.9332, "step": 20565 }, { - "epoch": 0.5835981838819523, + "epoch": 0.5827878375697809, "grad_norm": 0.0, - "learning_rate": 7.796813172563817e-06, - "loss": 0.8227, + "learning_rate": 7.822240052692653e-06, + "loss": 0.8412, "step": 20566 }, { - "epoch": 0.5836265607264473, + "epoch": 0.5828161750120434, "grad_norm": 0.0, - "learning_rate": 7.79591668743767e-06, - "loss": 0.8639, + "learning_rate": 7.821344300664304e-06, + "loss": 0.8719, "step": 20567 }, { - "epoch": 0.5836549375709421, + "epoch": 0.5828445124543059, "grad_norm": 0.0, - "learning_rate": 7.795020220929515e-06, - "loss": 0.903, + "learning_rate": 7.820448566987582e-06, + "loss": 0.9824, "step": 20568 }, { - "epoch": 0.583683314415437, + "epoch": 0.5828728498965683, "grad_norm": 0.0, - "learning_rate": 7.794123773046923e-06, - "loss": 0.9792, + "learning_rate": 7.819552851670033e-06, + "loss": 0.8185, "step": 20569 }, { - "epoch": 0.5837116912599319, + "epoch": 0.5829011873388308, "grad_norm": 0.0, - "learning_rate": 7.793227343797465e-06, - "loss": 0.8349, + "learning_rate": 7.818657154719198e-06, + "loss": 0.9125, "step": 20570 }, { - "epoch": 0.5837400681044268, + "epoch": 0.5829295247810933, "grad_norm": 0.0, - "learning_rate": 7.792330933188716e-06, - "loss": 0.89, + "learning_rate": 7.817761476142629e-06, + "loss": 0.8997, "step": 20571 }, { - "epoch": 0.5837684449489217, + "epoch": 0.5829578622233558, "grad_norm": 0.0, - "learning_rate": 7.791434541228247e-06, - "loss": 0.9221, + "learning_rate": 7.81686581594786e-06, + "loss": 0.8698, "step": 20572 }, { - "epoch": 0.5837968217934165, + "epoch": 0.5829861996656182, "grad_norm": 0.0, - "learning_rate": 7.790538167923628e-06, - "loss": 0.826, + "learning_rate": 7.815970174142441e-06, + "loss": 0.8897, "step": 20573 }, { - "epoch": 0.5838251986379115, + "epoch": 0.5830145371078806, "grad_norm": 0.0, - "learning_rate": 7.789641813282433e-06, - "loss": 0.9189, + "learning_rate": 7.815074550733919e-06, + "loss": 0.9209, "step": 20574 }, { - "epoch": 0.5838535754824064, + "epoch": 0.5830428745501431, "grad_norm": 0.0, - "learning_rate": 7.788745477312236e-06, - "loss": 0.8278, + "learning_rate": 7.814178945729833e-06, + "loss": 0.9683, "step": 20575 }, { - "epoch": 0.5838819523269012, + "epoch": 0.5830712119924055, "grad_norm": 0.0, - "learning_rate": 7.787849160020601e-06, - "loss": 0.877, + "learning_rate": 7.813283359137728e-06, + "loss": 0.8767, "step": 20576 }, { - "epoch": 0.5839103291713962, + "epoch": 0.583099549434668, "grad_norm": 0.0, - "learning_rate": 7.786952861415106e-06, - "loss": 0.9238, + "learning_rate": 7.812387790965156e-06, + "loss": 0.8509, "step": 20577 }, { - "epoch": 0.583938706015891, + "epoch": 0.5831278868769305, "grad_norm": 0.0, - "learning_rate": 7.78605658150332e-06, - "loss": 0.8933, + "learning_rate": 7.811492241219648e-06, + "loss": 0.9914, "step": 20578 }, { - "epoch": 0.5839670828603859, + "epoch": 0.5831562243191929, "grad_norm": 0.0, - "learning_rate": 7.785160320292812e-06, - "loss": 0.9211, + "learning_rate": 7.810596709908759e-06, + "loss": 0.7637, "step": 20579 }, { - "epoch": 0.5839954597048809, + "epoch": 0.5831845617614554, "grad_norm": 0.0, - "learning_rate": 7.784264077791156e-06, - "loss": 0.8736, + "learning_rate": 7.809701197040021e-06, + "loss": 0.9434, "step": 20580 }, { - "epoch": 0.5840238365493757, + "epoch": 0.5832128992037179, "grad_norm": 0.0, - "learning_rate": 7.783367854005916e-06, - "loss": 0.8741, + "learning_rate": 7.808805702620985e-06, + "loss": 0.8976, "step": 20581 }, { - "epoch": 0.5840522133938706, + "epoch": 0.5832412366459804, "grad_norm": 0.0, - "learning_rate": 7.782471648944673e-06, - "loss": 0.8621, + "learning_rate": 7.807910226659194e-06, + "loss": 0.8109, "step": 20582 }, { - "epoch": 0.5840805902383654, + "epoch": 0.5832695740882428, "grad_norm": 0.0, - "learning_rate": 7.781575462614988e-06, - "loss": 0.9549, + "learning_rate": 7.807014769162186e-06, + "loss": 0.9412, "step": 20583 }, { - "epoch": 0.5841089670828604, + "epoch": 0.5832979115305053, "grad_norm": 0.0, - "learning_rate": 7.780679295024438e-06, - "loss": 0.9334, + "learning_rate": 7.806119330137507e-06, + "loss": 0.7774, "step": 20584 }, { - "epoch": 0.5841373439273553, + "epoch": 0.5833262489727677, "grad_norm": 0.0, - "learning_rate": 7.77978314618059e-06, - "loss": 0.9026, + "learning_rate": 7.805223909592706e-06, + "loss": 0.771, "step": 20585 }, { - "epoch": 0.5841657207718501, + "epoch": 0.5833545864150301, "grad_norm": 0.0, - "learning_rate": 7.77888701609101e-06, - "loss": 0.8934, + "learning_rate": 7.804328507535312e-06, + "loss": 0.8575, "step": 20586 }, { - "epoch": 0.5841940976163451, + "epoch": 0.5833829238572926, "grad_norm": 0.0, - "learning_rate": 7.777990904763274e-06, - "loss": 0.8259, + "learning_rate": 7.803433123972878e-06, + "loss": 0.8557, "step": 20587 }, { - "epoch": 0.58422247446084, + "epoch": 0.5834112612995551, "grad_norm": 0.0, - "learning_rate": 7.777094812204949e-06, - "loss": 0.8147, + "learning_rate": 7.80253775891294e-06, + "loss": 0.8726, "step": 20588 }, { - "epoch": 0.5842508513053348, + "epoch": 0.5834395987418176, "grad_norm": 0.0, - "learning_rate": 7.7761987384236e-06, - "loss": 0.7515, + "learning_rate": 7.801642412363042e-06, + "loss": 0.8792, "step": 20589 }, { - "epoch": 0.5842792281498297, + "epoch": 0.58346793618408, "grad_norm": 0.0, - "learning_rate": 7.775302683426806e-06, - "loss": 0.9382, + "learning_rate": 7.80074708433073e-06, + "loss": 0.7443, "step": 20590 }, { - "epoch": 0.5843076049943247, + "epoch": 0.5834962736263425, "grad_norm": 0.0, - "learning_rate": 7.774406647222128e-06, - "loss": 0.8028, + "learning_rate": 7.79985177482354e-06, + "loss": 0.8292, "step": 20591 }, { - "epoch": 0.5843359818388195, + "epoch": 0.583524611068605, "grad_norm": 0.0, - "learning_rate": 7.773510629817137e-06, - "loss": 0.8823, + "learning_rate": 7.798956483849013e-06, + "loss": 0.788, "step": 20592 }, { - "epoch": 0.5843643586833144, + "epoch": 0.5835529485108674, "grad_norm": 0.0, - "learning_rate": 7.772614631219402e-06, - "loss": 0.8289, + "learning_rate": 7.798061211414696e-06, + "loss": 0.9008, "step": 20593 }, { - "epoch": 0.5843927355278093, + "epoch": 0.5835812859531299, "grad_norm": 0.0, - "learning_rate": 7.77171865143649e-06, - "loss": 0.8669, + "learning_rate": 7.797165957528127e-06, + "loss": 0.9216, "step": 20594 }, { - "epoch": 0.5844211123723042, + "epoch": 0.5836096233953924, "grad_norm": 0.0, - "learning_rate": 7.770822690475973e-06, - "loss": 0.9511, + "learning_rate": 7.796270722196848e-06, + "loss": 0.985, "step": 20595 }, { - "epoch": 0.5844494892167991, + "epoch": 0.5836379608376548, "grad_norm": 0.0, - "learning_rate": 7.769926748345414e-06, - "loss": 0.8609, + "learning_rate": 7.795375505428398e-06, + "loss": 0.7849, "step": 20596 }, { - "epoch": 0.584477866061294, + "epoch": 0.5836662982799172, "grad_norm": 0.0, - "learning_rate": 7.769030825052383e-06, - "loss": 0.7398, + "learning_rate": 7.794480307230317e-06, + "loss": 0.7839, "step": 20597 }, { - "epoch": 0.5845062429057889, + "epoch": 0.5836946357221797, "grad_norm": 0.0, - "learning_rate": 7.768134920604453e-06, - "loss": 0.8617, + "learning_rate": 7.79358512761015e-06, + "loss": 0.9059, "step": 20598 }, { - "epoch": 0.5845346197502838, + "epoch": 0.5837229731644422, "grad_norm": 0.0, - "learning_rate": 7.767239035009187e-06, - "loss": 0.9198, + "learning_rate": 7.792689966575433e-06, + "loss": 0.9353, "step": 20599 }, { - "epoch": 0.5845629965947786, + "epoch": 0.5837513106067046, "grad_norm": 0.0, - "learning_rate": 7.76634316827415e-06, - "loss": 0.9769, + "learning_rate": 7.791794824133709e-06, + "loss": 0.8885, "step": 20600 }, { - "epoch": 0.5845913734392736, + "epoch": 0.5837796480489671, "grad_norm": 0.0, - "learning_rate": 7.765447320406916e-06, - "loss": 0.806, + "learning_rate": 7.790899700292516e-06, + "loss": 0.8879, "step": 20601 }, { - "epoch": 0.5846197502837684, + "epoch": 0.5838079854912296, "grad_norm": 0.0, - "learning_rate": 7.76455149141505e-06, - "loss": 0.9078, + "learning_rate": 7.790004595059395e-06, + "loss": 0.9424, "step": 20602 }, { - "epoch": 0.5846481271282633, + "epoch": 0.583836322933492, "grad_norm": 0.0, - "learning_rate": 7.763655681306113e-06, - "loss": 0.8592, + "learning_rate": 7.78910950844189e-06, + "loss": 0.8981, "step": 20603 }, { - "epoch": 0.5846765039727583, + "epoch": 0.5838646603757545, "grad_norm": 0.0, - "learning_rate": 7.762759890087682e-06, - "loss": 0.8215, + "learning_rate": 7.788214440447532e-06, + "loss": 0.9129, "step": 20604 }, { - "epoch": 0.5847048808172531, + "epoch": 0.583892997818017, "grad_norm": 0.0, - "learning_rate": 7.761864117767316e-06, - "loss": 0.9222, + "learning_rate": 7.787319391083864e-06, + "loss": 0.9407, "step": 20605 }, { - "epoch": 0.584733257661748, + "epoch": 0.5839213352602795, "grad_norm": 0.0, - "learning_rate": 7.760968364352584e-06, - "loss": 0.8903, + "learning_rate": 7.78642436035843e-06, + "loss": 0.985, "step": 20606 }, { - "epoch": 0.5847616345062429, + "epoch": 0.5839496727025418, "grad_norm": 0.0, - "learning_rate": 7.760072629851056e-06, - "loss": 0.9681, + "learning_rate": 7.785529348278765e-06, + "loss": 0.873, "step": 20607 }, { - "epoch": 0.5847900113507378, + "epoch": 0.5839780101448043, "grad_norm": 0.0, - "learning_rate": 7.759176914270293e-06, - "loss": 0.9519, + "learning_rate": 7.784634354852411e-06, + "loss": 0.877, "step": 20608 }, { - "epoch": 0.5848183881952327, + "epoch": 0.5840063475870668, "grad_norm": 0.0, - "learning_rate": 7.758281217617863e-06, - "loss": 0.7813, + "learning_rate": 7.7837393800869e-06, + "loss": 0.835, "step": 20609 }, { - "epoch": 0.5848467650397275, + "epoch": 0.5840346850293292, "grad_norm": 0.0, - "learning_rate": 7.757385539901333e-06, - "loss": 0.9415, + "learning_rate": 7.782844423989777e-06, + "loss": 0.8256, "step": 20610 }, { - "epoch": 0.5848751418842225, + "epoch": 0.5840630224715917, "grad_norm": 0.0, - "learning_rate": 7.756489881128269e-06, - "loss": 0.9231, + "learning_rate": 7.781949486568581e-06, + "loss": 0.8307, "step": 20611 }, { - "epoch": 0.5849035187287174, + "epoch": 0.5840913599138542, "grad_norm": 0.0, - "learning_rate": 7.755594241306232e-06, - "loss": 0.8939, + "learning_rate": 7.781054567830845e-06, + "loss": 0.9453, "step": 20612 }, { - "epoch": 0.5849318955732122, + "epoch": 0.5841196973561167, "grad_norm": 0.0, - "learning_rate": 7.754698620442794e-06, - "loss": 0.9205, + "learning_rate": 7.78015966778411e-06, + "loss": 0.8368, "step": 20613 }, { - "epoch": 0.5849602724177072, + "epoch": 0.5841480347983791, "grad_norm": 0.0, - "learning_rate": 7.753803018545517e-06, - "loss": 0.8847, + "learning_rate": 7.779264786435916e-06, + "loss": 0.8247, "step": 20614 }, { - "epoch": 0.5849886492622021, + "epoch": 0.5841763722406416, "grad_norm": 0.0, - "learning_rate": 7.752907435621968e-06, - "loss": 0.8136, + "learning_rate": 7.778369923793799e-06, + "loss": 0.757, "step": 20615 }, { - "epoch": 0.5850170261066969, + "epoch": 0.584204709682904, "grad_norm": 0.0, - "learning_rate": 7.752011871679712e-06, - "loss": 0.9323, + "learning_rate": 7.777475079865298e-06, + "loss": 0.9014, "step": 20616 }, { - "epoch": 0.5850454029511918, + "epoch": 0.5842330471251664, "grad_norm": 0.0, - "learning_rate": 7.75111632672631e-06, - "loss": 0.8537, + "learning_rate": 7.776580254657948e-06, + "loss": 0.7402, "step": 20617 }, { - "epoch": 0.5850737797956868, + "epoch": 0.5842613845674289, "grad_norm": 0.0, - "learning_rate": 7.750220800769333e-06, - "loss": 0.8598, + "learning_rate": 7.775685448179288e-06, + "loss": 0.7643, "step": 20618 }, { - "epoch": 0.5851021566401816, + "epoch": 0.5842897220096914, "grad_norm": 0.0, - "learning_rate": 7.74932529381634e-06, - "loss": 0.8409, + "learning_rate": 7.774790660436857e-06, + "loss": 0.8171, "step": 20619 }, { - "epoch": 0.5851305334846765, + "epoch": 0.5843180594519539, "grad_norm": 0.0, - "learning_rate": 7.748429805874896e-06, - "loss": 0.7671, + "learning_rate": 7.773895891438189e-06, + "loss": 0.8989, "step": 20620 }, { - "epoch": 0.5851589103291714, + "epoch": 0.5843463968942163, "grad_norm": 0.0, - "learning_rate": 7.747534336952569e-06, - "loss": 0.9221, + "learning_rate": 7.773001141190822e-06, + "loss": 0.7724, "step": 20621 }, { - "epoch": 0.5851872871736663, + "epoch": 0.5843747343364788, "grad_norm": 0.0, - "learning_rate": 7.74663888705692e-06, - "loss": 0.8465, + "learning_rate": 7.772106409702297e-06, + "loss": 0.8551, "step": 20622 }, { - "epoch": 0.5852156640181612, + "epoch": 0.5844030717787413, "grad_norm": 0.0, - "learning_rate": 7.745743456195513e-06, - "loss": 0.941, + "learning_rate": 7.771211696980145e-06, + "loss": 0.8902, "step": 20623 }, { - "epoch": 0.585244040862656, + "epoch": 0.5844314092210037, "grad_norm": 0.0, - "learning_rate": 7.744848044375913e-06, - "loss": 0.8263, + "learning_rate": 7.770317003031908e-06, + "loss": 0.9498, "step": 20624 }, { - "epoch": 0.585272417707151, + "epoch": 0.5844597466632662, "grad_norm": 0.0, - "learning_rate": 7.743952651605683e-06, - "loss": 0.7967, + "learning_rate": 7.769422327865113e-06, + "loss": 0.8647, "step": 20625 }, { - "epoch": 0.5853007945516459, + "epoch": 0.5844880841055287, "grad_norm": 0.0, - "learning_rate": 7.743057277892385e-06, - "loss": 0.7754, + "learning_rate": 7.768527671487304e-06, + "loss": 0.8533, "step": 20626 }, { - "epoch": 0.5853291713961407, + "epoch": 0.584516421547791, "grad_norm": 0.0, - "learning_rate": 7.742161923243581e-06, - "loss": 0.9583, + "learning_rate": 7.767633033906016e-06, + "loss": 0.7842, "step": 20627 }, { - "epoch": 0.5853575482406357, + "epoch": 0.5845447589900535, "grad_norm": 0.0, - "learning_rate": 7.741266587666841e-06, - "loss": 0.8561, + "learning_rate": 7.766738415128781e-06, + "loss": 0.8055, "step": 20628 }, { - "epoch": 0.5853859250851305, + "epoch": 0.584573096432316, "grad_norm": 0.0, - "learning_rate": 7.740371271169724e-06, - "loss": 0.9095, + "learning_rate": 7.765843815163143e-06, + "loss": 0.8388, "step": 20629 }, { - "epoch": 0.5854143019296254, + "epoch": 0.5846014338745785, "grad_norm": 0.0, - "learning_rate": 7.73947597375979e-06, - "loss": 0.8628, + "learning_rate": 7.76494923401663e-06, + "loss": 0.7443, "step": 20630 }, { - "epoch": 0.5854426787741204, + "epoch": 0.5846297713168409, "grad_norm": 0.0, - "learning_rate": 7.738580695444607e-06, - "loss": 0.8704, + "learning_rate": 7.764054671696776e-06, + "loss": 0.7879, "step": 20631 }, { - "epoch": 0.5854710556186152, + "epoch": 0.5846581087591034, "grad_norm": 0.0, - "learning_rate": 7.737685436231734e-06, - "loss": 0.9052, + "learning_rate": 7.763160128211123e-06, + "loss": 0.8762, "step": 20632 }, { - "epoch": 0.5854994324631101, + "epoch": 0.5846864462013659, "grad_norm": 0.0, - "learning_rate": 7.736790196128732e-06, - "loss": 0.7552, + "learning_rate": 7.762265603567202e-06, + "loss": 0.7718, "step": 20633 }, { - "epoch": 0.585527809307605, + "epoch": 0.5847147836436283, "grad_norm": 0.0, - "learning_rate": 7.735894975143166e-06, - "loss": 0.807, + "learning_rate": 7.761371097772548e-06, + "loss": 0.8451, "step": 20634 }, { - "epoch": 0.5855561861520999, + "epoch": 0.5847431210858908, "grad_norm": 0.0, - "learning_rate": 7.734999773282597e-06, - "loss": 0.9009, + "learning_rate": 7.7604766108347e-06, + "loss": 0.8581, "step": 20635 }, { - "epoch": 0.5855845629965948, + "epoch": 0.5847714585281533, "grad_norm": 0.0, - "learning_rate": 7.734104590554588e-06, - "loss": 0.8668, + "learning_rate": 7.759582142761186e-06, + "loss": 0.8817, "step": 20636 }, { - "epoch": 0.5856129398410896, + "epoch": 0.5847997959704158, "grad_norm": 0.0, - "learning_rate": 7.733209426966697e-06, - "loss": 0.8829, + "learning_rate": 7.758687693559547e-06, + "loss": 0.9878, "step": 20637 }, { - "epoch": 0.5856413166855846, + "epoch": 0.5848281334126781, "grad_norm": 0.0, - "learning_rate": 7.73231428252649e-06, - "loss": 0.8726, + "learning_rate": 7.75779326323731e-06, + "loss": 0.7733, "step": 20638 }, { - "epoch": 0.5856696935300795, + "epoch": 0.5848564708549406, "grad_norm": 0.0, - "learning_rate": 7.731419157241525e-06, - "loss": 0.8631, + "learning_rate": 7.756898851802014e-06, + "loss": 0.8047, "step": 20639 }, { - "epoch": 0.5856980703745743, + "epoch": 0.5848848082972031, "grad_norm": 0.0, - "learning_rate": 7.730524051119364e-06, - "loss": 0.924, + "learning_rate": 7.756004459261192e-06, + "loss": 0.7798, "step": 20640 }, { - "epoch": 0.5857264472190692, + "epoch": 0.5849131457394655, "grad_norm": 0.0, - "learning_rate": 7.729628964167568e-06, - "loss": 0.9215, + "learning_rate": 7.755110085622377e-06, + "loss": 0.918, "step": 20641 }, { - "epoch": 0.5857548240635642, + "epoch": 0.584941483181728, "grad_norm": 0.0, - "learning_rate": 7.7287338963937e-06, - "loss": 0.9213, + "learning_rate": 7.754215730893103e-06, + "loss": 0.8379, "step": 20642 }, { - "epoch": 0.585783200908059, + "epoch": 0.5849698206239905, "grad_norm": 0.0, - "learning_rate": 7.727838847805314e-06, - "loss": 0.8369, + "learning_rate": 7.753321395080907e-06, + "loss": 0.9251, "step": 20643 }, { - "epoch": 0.5858115777525539, + "epoch": 0.584998158066253, "grad_norm": 0.0, - "learning_rate": 7.726943818409978e-06, - "loss": 0.8313, + "learning_rate": 7.752427078193316e-06, + "loss": 0.8275, "step": 20644 }, { - "epoch": 0.5858399545970489, + "epoch": 0.5850264955085154, "grad_norm": 0.0, - "learning_rate": 7.726048808215252e-06, - "loss": 0.8493, + "learning_rate": 7.75153278023787e-06, + "loss": 0.8497, "step": 20645 }, { - "epoch": 0.5858683314415437, + "epoch": 0.5850548329507779, "grad_norm": 0.0, - "learning_rate": 7.725153817228694e-06, - "loss": 0.9528, + "learning_rate": 7.750638501222093e-06, + "loss": 0.7917, "step": 20646 }, { - "epoch": 0.5858967082860386, + "epoch": 0.5850831703930404, "grad_norm": 0.0, - "learning_rate": 7.72425884545786e-06, - "loss": 0.9157, + "learning_rate": 7.749744241153524e-06, + "loss": 1.0211, "step": 20647 }, { - "epoch": 0.5859250851305334, + "epoch": 0.5851115078353027, "grad_norm": 0.0, - "learning_rate": 7.723363892910319e-06, - "loss": 0.7974, + "learning_rate": 7.748850000039702e-06, + "loss": 0.8651, "step": 20648 }, { - "epoch": 0.5859534619750284, + "epoch": 0.5851398452775652, "grad_norm": 0.0, - "learning_rate": 7.722468959593623e-06, - "loss": 0.8577, + "learning_rate": 7.747955777888145e-06, + "loss": 0.8203, "step": 20649 }, { - "epoch": 0.5859818388195233, + "epoch": 0.5851681827198277, "grad_norm": 0.0, - "learning_rate": 7.721574045515334e-06, - "loss": 0.92, + "learning_rate": 7.747061574706394e-06, + "loss": 0.9174, "step": 20650 }, { - "epoch": 0.5860102156640181, + "epoch": 0.5851965201620901, "grad_norm": 0.0, - "learning_rate": 7.720679150683012e-06, - "loss": 0.8323, + "learning_rate": 7.746167390501984e-06, + "loss": 0.9349, "step": 20651 }, { - "epoch": 0.5860385925085131, + "epoch": 0.5852248576043526, "grad_norm": 0.0, - "learning_rate": 7.719784275104216e-06, - "loss": 0.8624, + "learning_rate": 7.745273225282439e-06, + "loss": 0.7849, "step": 20652 }, { - "epoch": 0.586066969353008, + "epoch": 0.5852531950466151, "grad_norm": 0.0, - "learning_rate": 7.718889418786504e-06, - "loss": 0.8238, + "learning_rate": 7.744379079055297e-06, + "loss": 0.9429, "step": 20653 }, { - "epoch": 0.5860953461975028, + "epoch": 0.5852815324888776, "grad_norm": 0.0, - "learning_rate": 7.717994581737436e-06, - "loss": 0.759, + "learning_rate": 7.743484951828085e-06, + "loss": 0.8461, "step": 20654 }, { - "epoch": 0.5861237230419978, + "epoch": 0.58530986993114, "grad_norm": 0.0, - "learning_rate": 7.71709976396457e-06, - "loss": 0.9726, + "learning_rate": 7.742590843608337e-06, + "loss": 0.8692, "step": 20655 }, { - "epoch": 0.5861520998864926, + "epoch": 0.5853382073734025, "grad_norm": 0.0, - "learning_rate": 7.716204965475465e-06, - "loss": 0.7716, + "learning_rate": 7.74169675440359e-06, + "loss": 0.9087, "step": 20656 }, { - "epoch": 0.5861804767309875, + "epoch": 0.585366544815665, "grad_norm": 0.0, - "learning_rate": 7.715310186277681e-06, - "loss": 0.845, + "learning_rate": 7.740802684221364e-06, + "loss": 0.8465, "step": 20657 }, { - "epoch": 0.5862088535754824, + "epoch": 0.5853948822579274, "grad_norm": 0.0, - "learning_rate": 7.714415426378767e-06, - "loss": 0.8265, + "learning_rate": 7.7399086330692e-06, + "loss": 0.9457, "step": 20658 }, { - "epoch": 0.5862372304199773, + "epoch": 0.5854232197001898, "grad_norm": 0.0, - "learning_rate": 7.713520685786297e-06, - "loss": 0.7903, + "learning_rate": 7.739014600954623e-06, + "loss": 0.9176, "step": 20659 }, { - "epoch": 0.5862656072644722, + "epoch": 0.5854515571424523, "grad_norm": 0.0, - "learning_rate": 7.712625964507818e-06, - "loss": 0.9239, + "learning_rate": 7.738120587885163e-06, + "loss": 0.8501, "step": 20660 }, { - "epoch": 0.586293984108967, + "epoch": 0.5854798945847148, "grad_norm": 0.0, - "learning_rate": 7.71173126255089e-06, - "loss": 0.9889, + "learning_rate": 7.737226593868359e-06, + "loss": 0.8775, "step": 20661 }, { - "epoch": 0.586322360953462, + "epoch": 0.5855082320269772, "grad_norm": 0.0, - "learning_rate": 7.710836579923072e-06, - "loss": 0.8296, + "learning_rate": 7.736332618911731e-06, + "loss": 0.8067, "step": 20662 }, { - "epoch": 0.5863507377979569, + "epoch": 0.5855365694692397, "grad_norm": 0.0, - "learning_rate": 7.70994191663192e-06, - "loss": 0.8961, + "learning_rate": 7.735438663022815e-06, + "loss": 0.8111, "step": 20663 }, { - "epoch": 0.5863791146424517, + "epoch": 0.5855649069115022, "grad_norm": 0.0, - "learning_rate": 7.70904727268499e-06, - "loss": 0.8578, + "learning_rate": 7.734544726209143e-06, + "loss": 0.7663, "step": 20664 }, { - "epoch": 0.5864074914869466, + "epoch": 0.5855932443537646, "grad_norm": 0.0, - "learning_rate": 7.708152648089843e-06, - "loss": 0.7699, + "learning_rate": 7.733650808478239e-06, + "loss": 0.7805, "step": 20665 }, { - "epoch": 0.5864358683314416, + "epoch": 0.5856215817960271, "grad_norm": 0.0, - "learning_rate": 7.707258042854033e-06, - "loss": 0.8404, + "learning_rate": 7.732756909837636e-06, + "loss": 0.7344, "step": 20666 }, { - "epoch": 0.5864642451759364, + "epoch": 0.5856499192382896, "grad_norm": 0.0, - "learning_rate": 7.706363456985115e-06, - "loss": 0.8472, + "learning_rate": 7.731863030294864e-06, + "loss": 0.8135, "step": 20667 }, { - "epoch": 0.5864926220204313, + "epoch": 0.5856782566805521, "grad_norm": 0.0, - "learning_rate": 7.70546889049065e-06, - "loss": 0.9245, + "learning_rate": 7.73096916985745e-06, + "loss": 0.9237, "step": 20668 }, { - "epoch": 0.5865209988649263, + "epoch": 0.5857065941228145, "grad_norm": 0.0, - "learning_rate": 7.704574343378193e-06, - "loss": 0.882, + "learning_rate": 7.73007532853293e-06, + "loss": 0.859, "step": 20669 }, { - "epoch": 0.5865493757094211, + "epoch": 0.5857349315650769, "grad_norm": 0.0, - "learning_rate": 7.703679815655297e-06, - "loss": 0.9363, + "learning_rate": 7.729181506328825e-06, + "loss": 0.8864, "step": 20670 }, { - "epoch": 0.586577752553916, + "epoch": 0.5857632690073394, "grad_norm": 0.0, - "learning_rate": 7.702785307329526e-06, - "loss": 0.9634, + "learning_rate": 7.728287703252667e-06, + "loss": 0.8261, "step": 20671 }, { - "epoch": 0.586606129398411, + "epoch": 0.5857916064496018, "grad_norm": 0.0, - "learning_rate": 7.701890818408427e-06, - "loss": 0.7543, + "learning_rate": 7.727393919311986e-06, + "loss": 0.8724, "step": 20672 }, { - "epoch": 0.5866345062429058, + "epoch": 0.5858199438918643, "grad_norm": 0.0, - "learning_rate": 7.700996348899558e-06, - "loss": 0.7591, + "learning_rate": 7.726500154514308e-06, + "loss": 0.8543, "step": 20673 }, { - "epoch": 0.5866628830874007, + "epoch": 0.5858482813341268, "grad_norm": 0.0, - "learning_rate": 7.700101898810476e-06, - "loss": 0.8389, + "learning_rate": 7.725606408867168e-06, + "loss": 0.853, "step": 20674 }, { - "epoch": 0.5866912599318955, + "epoch": 0.5858766187763892, "grad_norm": 0.0, - "learning_rate": 7.69920746814874e-06, - "loss": 0.8237, + "learning_rate": 7.724712682378088e-06, + "loss": 0.7986, "step": 20675 }, { - "epoch": 0.5867196367763905, + "epoch": 0.5859049562186517, "grad_norm": 0.0, - "learning_rate": 7.698313056921903e-06, - "loss": 0.8579, + "learning_rate": 7.723818975054596e-06, + "loss": 0.9624, "step": 20676 }, { - "epoch": 0.5867480136208854, + "epoch": 0.5859332936609142, "grad_norm": 0.0, - "learning_rate": 7.697418665137517e-06, - "loss": 0.81, + "learning_rate": 7.722925286904225e-06, + "loss": 0.8868, "step": 20677 }, { - "epoch": 0.5867763904653802, + "epoch": 0.5859616311031767, "grad_norm": 0.0, - "learning_rate": 7.696524292803138e-06, - "loss": 0.8797, + "learning_rate": 7.722031617934497e-06, + "loss": 0.7766, "step": 20678 }, { - "epoch": 0.5868047673098752, + "epoch": 0.585989968545439, "grad_norm": 0.0, - "learning_rate": 7.695629939926323e-06, - "loss": 0.8975, + "learning_rate": 7.721137968152944e-06, + "loss": 0.8373, "step": 20679 }, { - "epoch": 0.58683314415437, + "epoch": 0.5860183059877015, "grad_norm": 0.0, - "learning_rate": 7.694735606514627e-06, - "loss": 0.7729, + "learning_rate": 7.720244337567092e-06, + "loss": 0.8215, "step": 20680 }, { - "epoch": 0.5868615209988649, + "epoch": 0.586046643429964, "grad_norm": 0.0, - "learning_rate": 7.6938412925756e-06, - "loss": 0.7555, + "learning_rate": 7.719350726184467e-06, + "loss": 0.8599, "step": 20681 }, { - "epoch": 0.5868898978433598, + "epoch": 0.5860749808722264, "grad_norm": 0.0, - "learning_rate": 7.692946998116801e-06, - "loss": 0.8355, + "learning_rate": 7.718457134012601e-06, + "loss": 0.9114, "step": 20682 }, { - "epoch": 0.5869182746878547, + "epoch": 0.5861033183144889, "grad_norm": 0.0, - "learning_rate": 7.692052723145782e-06, - "loss": 0.874, + "learning_rate": 7.717563561059016e-06, + "loss": 0.9081, "step": 20683 }, { - "epoch": 0.5869466515323496, + "epoch": 0.5861316557567514, "grad_norm": 0.0, - "learning_rate": 7.691158467670097e-06, - "loss": 0.8413, + "learning_rate": 7.716670007331238e-06, + "loss": 0.9418, "step": 20684 }, { - "epoch": 0.5869750283768445, + "epoch": 0.5861599931990139, "grad_norm": 0.0, - "learning_rate": 7.6902642316973e-06, - "loss": 0.7984, + "learning_rate": 7.715776472836801e-06, + "loss": 0.8418, "step": 20685 }, { - "epoch": 0.5870034052213394, + "epoch": 0.5861883306412763, "grad_norm": 0.0, - "learning_rate": 7.689370015234947e-06, - "loss": 0.7054, + "learning_rate": 7.714882957583222e-06, + "loss": 0.9027, "step": 20686 }, { - "epoch": 0.5870317820658343, + "epoch": 0.5862166680835388, "grad_norm": 0.0, - "learning_rate": 7.688475818290588e-06, - "loss": 0.8509, + "learning_rate": 7.713989461578039e-06, + "loss": 0.8623, "step": 20687 }, { - "epoch": 0.5870601589103291, + "epoch": 0.5862450055258013, "grad_norm": 0.0, - "learning_rate": 7.687581640871778e-06, - "loss": 0.8884, + "learning_rate": 7.713095984828767e-06, + "loss": 0.9047, "step": 20688 }, { - "epoch": 0.5870885357548241, + "epoch": 0.5862733429680637, "grad_norm": 0.0, - "learning_rate": 7.686687482986066e-06, - "loss": 0.8278, + "learning_rate": 7.712202527342937e-06, + "loss": 0.827, "step": 20689 }, { - "epoch": 0.587116912599319, + "epoch": 0.5863016804103262, "grad_norm": 0.0, - "learning_rate": 7.685793344641012e-06, - "loss": 0.7854, + "learning_rate": 7.711309089128078e-06, + "loss": 0.9229, "step": 20690 }, { - "epoch": 0.5871452894438138, + "epoch": 0.5863300178525886, "grad_norm": 0.0, - "learning_rate": 7.684899225844165e-06, - "loss": 0.8812, + "learning_rate": 7.710415670191708e-06, + "loss": 0.9014, "step": 20691 }, { - "epoch": 0.5871736662883087, + "epoch": 0.5863583552948511, "grad_norm": 0.0, - "learning_rate": 7.684005126603079e-06, - "loss": 0.8461, + "learning_rate": 7.70952227054136e-06, + "loss": 0.7594, "step": 20692 }, { - "epoch": 0.5872020431328037, + "epoch": 0.5863866927371135, "grad_norm": 0.0, - "learning_rate": 7.683111046925306e-06, - "loss": 0.7788, + "learning_rate": 7.708628890184556e-06, + "loss": 0.8437, "step": 20693 }, { - "epoch": 0.5872304199772985, + "epoch": 0.586415030179376, "grad_norm": 0.0, - "learning_rate": 7.682216986818398e-06, - "loss": 0.892, + "learning_rate": 7.707735529128819e-06, + "loss": 0.9212, "step": 20694 }, { - "epoch": 0.5872587968217934, + "epoch": 0.5864433676216385, "grad_norm": 0.0, - "learning_rate": 7.681322946289909e-06, - "loss": 0.8289, + "learning_rate": 7.706842187381683e-06, + "loss": 0.7452, "step": 20695 }, { - "epoch": 0.5872871736662884, + "epoch": 0.5864717050639009, "grad_norm": 0.0, - "learning_rate": 7.680428925347388e-06, - "loss": 0.8925, + "learning_rate": 7.70594886495066e-06, + "loss": 0.7848, "step": 20696 }, { - "epoch": 0.5873155505107832, + "epoch": 0.5865000425061634, "grad_norm": 0.0, - "learning_rate": 7.679534923998386e-06, - "loss": 0.9033, + "learning_rate": 7.705055561843285e-06, + "loss": 0.8233, "step": 20697 }, { - "epoch": 0.5873439273552781, + "epoch": 0.5865283799484259, "grad_norm": 0.0, - "learning_rate": 7.67864094225046e-06, - "loss": 0.8646, + "learning_rate": 7.704162278067077e-06, + "loss": 0.9022, "step": 20698 }, { - "epoch": 0.5873723041997729, + "epoch": 0.5865567173906883, "grad_norm": 0.0, - "learning_rate": 7.677746980111158e-06, - "loss": 1.0038, + "learning_rate": 7.703269013629565e-06, + "loss": 0.9388, "step": 20699 }, { - "epoch": 0.5874006810442679, + "epoch": 0.5865850548329508, "grad_norm": 0.0, - "learning_rate": 7.676853037588028e-06, - "loss": 0.8421, + "learning_rate": 7.702375768538268e-06, + "loss": 0.7928, "step": 20700 }, { - "epoch": 0.5874290578887628, + "epoch": 0.5866133922752133, "grad_norm": 0.0, - "learning_rate": 7.675959114688629e-06, - "loss": 0.9631, + "learning_rate": 7.701482542800718e-06, + "loss": 0.7987, "step": 20701 }, { - "epoch": 0.5874574347332576, + "epoch": 0.5866417297174757, "grad_norm": 0.0, - "learning_rate": 7.675065211420508e-06, - "loss": 0.8925, + "learning_rate": 7.700589336424431e-06, + "loss": 0.8966, "step": 20702 }, { - "epoch": 0.5874858115777526, + "epoch": 0.5866700671597381, "grad_norm": 0.0, - "learning_rate": 7.674171327791213e-06, - "loss": 0.8505, + "learning_rate": 7.699696149416935e-06, + "loss": 0.9799, "step": 20703 }, { - "epoch": 0.5875141884222475, + "epoch": 0.5866984046020006, "grad_norm": 0.0, - "learning_rate": 7.673277463808295e-06, - "loss": 0.9533, + "learning_rate": 7.698802981785752e-06, + "loss": 0.86, "step": 20704 }, { - "epoch": 0.5875425652667423, + "epoch": 0.5867267420442631, "grad_norm": 0.0, - "learning_rate": 7.67238361947931e-06, - "loss": 0.9582, + "learning_rate": 7.697909833538405e-06, + "loss": 0.814, "step": 20705 }, { - "epoch": 0.5875709421112373, + "epoch": 0.5867550794865255, "grad_norm": 0.0, - "learning_rate": 7.671489794811805e-06, - "loss": 0.9002, + "learning_rate": 7.697016704682421e-06, + "loss": 0.7786, "step": 20706 }, { - "epoch": 0.5875993189557321, + "epoch": 0.586783416928788, "grad_norm": 0.0, - "learning_rate": 7.670595989813333e-06, - "loss": 1.0194, + "learning_rate": 7.696123595225316e-06, + "loss": 0.8893, "step": 20707 }, { - "epoch": 0.587627695800227, + "epoch": 0.5868117543710505, "grad_norm": 0.0, - "learning_rate": 7.669702204491435e-06, - "loss": 0.7722, + "learning_rate": 7.69523050517462e-06, + "loss": 0.7841, "step": 20708 }, { - "epoch": 0.5876560726447219, + "epoch": 0.586840091813313, "grad_norm": 0.0, - "learning_rate": 7.668808438853674e-06, - "loss": 0.8944, + "learning_rate": 7.694337434537856e-06, + "loss": 0.9286, "step": 20709 }, { - "epoch": 0.5876844494892168, + "epoch": 0.5868684292555754, "grad_norm": 0.0, - "learning_rate": 7.667914692907591e-06, - "loss": 0.9013, + "learning_rate": 7.69344438332254e-06, + "loss": 1.0008, "step": 20710 }, { - "epoch": 0.5877128263337117, + "epoch": 0.5868967666978379, "grad_norm": 0.0, - "learning_rate": 7.667020966660736e-06, - "loss": 0.7423, + "learning_rate": 7.692551351536202e-06, + "loss": 0.7912, "step": 20711 }, { - "epoch": 0.5877412031782066, + "epoch": 0.5869251041401004, "grad_norm": 0.0, - "learning_rate": 7.66612726012066e-06, - "loss": 0.8067, + "learning_rate": 7.691658339186356e-06, + "loss": 0.8822, "step": 20712 }, { - "epoch": 0.5877695800227015, + "epoch": 0.5869534415823627, "grad_norm": 0.0, - "learning_rate": 7.665233573294914e-06, - "loss": 0.9231, + "learning_rate": 7.690765346280532e-06, + "loss": 0.9097, "step": 20713 }, { - "epoch": 0.5877979568671964, + "epoch": 0.5869817790246252, "grad_norm": 0.0, - "learning_rate": 7.664339906191042e-06, - "loss": 0.9174, + "learning_rate": 7.68987237282625e-06, + "loss": 0.8411, "step": 20714 }, { - "epoch": 0.5878263337116912, + "epoch": 0.5870101164668877, "grad_norm": 0.0, - "learning_rate": 7.663446258816598e-06, - "loss": 0.7735, + "learning_rate": 7.68897941883103e-06, + "loss": 0.804, "step": 20715 }, { - "epoch": 0.5878547105561861, + "epoch": 0.5870384539091502, "grad_norm": 0.0, - "learning_rate": 7.662552631179127e-06, - "loss": 0.8355, + "learning_rate": 7.688086484302394e-06, + "loss": 0.8506, "step": 20716 }, { - "epoch": 0.5878830874006811, + "epoch": 0.5870667913514126, "grad_norm": 0.0, - "learning_rate": 7.661659023286179e-06, - "loss": 0.9005, + "learning_rate": 7.687193569247863e-06, + "loss": 0.8515, "step": 20717 }, { - "epoch": 0.5879114642451759, + "epoch": 0.5870951287936751, "grad_norm": 0.0, - "learning_rate": 7.660765435145305e-06, - "loss": 0.8443, + "learning_rate": 7.686300673674959e-06, + "loss": 0.8681, "step": 20718 }, { - "epoch": 0.5879398410896708, + "epoch": 0.5871234662359376, "grad_norm": 0.0, - "learning_rate": 7.659871866764044e-06, - "loss": 0.9252, + "learning_rate": 7.685407797591207e-06, + "loss": 0.9163, "step": 20719 }, { - "epoch": 0.5879682179341658, + "epoch": 0.5871518036782, "grad_norm": 0.0, - "learning_rate": 7.658978318149956e-06, - "loss": 0.7971, + "learning_rate": 7.684514941004121e-06, + "loss": 0.773, "step": 20720 }, { - "epoch": 0.5879965947786606, + "epoch": 0.5871801411204625, "grad_norm": 0.0, - "learning_rate": 7.658084789310584e-06, - "loss": 0.8168, + "learning_rate": 7.683622103921228e-06, + "loss": 0.9025, "step": 20721 }, { - "epoch": 0.5880249716231555, + "epoch": 0.587208478562725, "grad_norm": 0.0, - "learning_rate": 7.65719128025347e-06, - "loss": 0.8999, + "learning_rate": 7.682729286350048e-06, + "loss": 0.8958, "step": 20722 }, { - "epoch": 0.5880533484676504, + "epoch": 0.5872368160049873, "grad_norm": 0.0, - "learning_rate": 7.656297790986173e-06, - "loss": 0.7923, + "learning_rate": 7.681836488298096e-06, + "loss": 0.9268, "step": 20723 }, { - "epoch": 0.5880817253121453, + "epoch": 0.5872651534472498, "grad_norm": 0.0, - "learning_rate": 7.655404321516231e-06, - "loss": 0.6993, + "learning_rate": 7.680943709772899e-06, + "loss": 0.8335, "step": 20724 }, { - "epoch": 0.5881101021566402, + "epoch": 0.5872934908895123, "grad_norm": 0.0, - "learning_rate": 7.654510871851192e-06, - "loss": 0.9496, + "learning_rate": 7.68005095078197e-06, + "loss": 0.837, "step": 20725 }, { - "epoch": 0.588138479001135, + "epoch": 0.5873218283317748, "grad_norm": 0.0, - "learning_rate": 7.65361744199861e-06, - "loss": 1.0424, + "learning_rate": 7.679158211332834e-06, + "loss": 0.8301, "step": 20726 }, { - "epoch": 0.58816685584563, + "epoch": 0.5873501657740372, "grad_norm": 0.0, - "learning_rate": 7.652724031966023e-06, - "loss": 0.8684, + "learning_rate": 7.678265491433015e-06, + "loss": 0.8705, "step": 20727 }, { - "epoch": 0.5881952326901249, + "epoch": 0.5873785032162997, "grad_norm": 0.0, - "learning_rate": 7.651830641760983e-06, - "loss": 0.833, + "learning_rate": 7.677372791090025e-06, + "loss": 0.998, "step": 20728 }, { - "epoch": 0.5882236095346197, + "epoch": 0.5874068406585622, "grad_norm": 0.0, - "learning_rate": 7.650937271391035e-06, - "loss": 0.7294, + "learning_rate": 7.676480110311385e-06, + "loss": 0.874, "step": 20729 }, { - "epoch": 0.5882519863791147, + "epoch": 0.5874351781008246, "grad_norm": 0.0, - "learning_rate": 7.650043920863726e-06, - "loss": 0.915, + "learning_rate": 7.675587449104618e-06, + "loss": 0.8596, "step": 20730 }, { - "epoch": 0.5882803632236095, + "epoch": 0.5874635155430871, "grad_norm": 0.0, - "learning_rate": 7.6491505901866e-06, - "loss": 0.8556, + "learning_rate": 7.674694807477239e-06, + "loss": 0.7406, "step": 20731 }, { - "epoch": 0.5883087400681044, + "epoch": 0.5874918529853496, "grad_norm": 0.0, - "learning_rate": 7.648257279367206e-06, - "loss": 0.894, + "learning_rate": 7.673802185436774e-06, + "loss": 0.7995, "step": 20732 }, { - "epoch": 0.5883371169125993, + "epoch": 0.587520190427612, "grad_norm": 0.0, - "learning_rate": 7.64736398841309e-06, - "loss": 0.8428, + "learning_rate": 7.672909582990731e-06, + "loss": 0.8641, "step": 20733 }, { - "epoch": 0.5883654937570942, + "epoch": 0.5875485278698744, "grad_norm": 0.0, - "learning_rate": 7.646470717331791e-06, - "loss": 0.9013, + "learning_rate": 7.672017000146637e-06, + "loss": 0.9331, "step": 20734 }, { - "epoch": 0.5883938706015891, + "epoch": 0.5875768653121369, "grad_norm": 0.0, - "learning_rate": 7.645577466130861e-06, - "loss": 0.7911, + "learning_rate": 7.671124436912012e-06, + "loss": 0.8638, "step": 20735 }, { - "epoch": 0.588422247446084, + "epoch": 0.5876052027543994, "grad_norm": 0.0, - "learning_rate": 7.644684234817846e-06, - "loss": 0.833, + "learning_rate": 7.670231893294365e-06, + "loss": 0.8568, "step": 20736 }, { - "epoch": 0.5884506242905789, + "epoch": 0.5876335401966618, "grad_norm": 0.0, - "learning_rate": 7.64379102340029e-06, - "loss": 0.7298, + "learning_rate": 7.669339369301221e-06, + "loss": 0.8915, "step": 20737 }, { - "epoch": 0.5884790011350738, + "epoch": 0.5876618776389243, "grad_norm": 0.0, - "learning_rate": 7.642897831885734e-06, - "loss": 0.7033, + "learning_rate": 7.6684468649401e-06, + "loss": 0.7796, "step": 20738 }, { - "epoch": 0.5885073779795686, + "epoch": 0.5876902150811868, "grad_norm": 0.0, - "learning_rate": 7.64200466028173e-06, - "loss": 0.8706, + "learning_rate": 7.667554380218513e-06, + "loss": 0.8768, "step": 20739 }, { - "epoch": 0.5885357548240636, + "epoch": 0.5877185525234492, "grad_norm": 0.0, - "learning_rate": 7.641111508595818e-06, - "loss": 0.8864, + "learning_rate": 7.666661915143985e-06, + "loss": 0.7978, "step": 20740 }, { - "epoch": 0.5885641316685585, + "epoch": 0.5877468899657117, "grad_norm": 0.0, - "learning_rate": 7.64021837683554e-06, - "loss": 0.8554, + "learning_rate": 7.665769469724029e-06, + "loss": 0.8543, "step": 20741 }, { - "epoch": 0.5885925085130533, + "epoch": 0.5877752274079742, "grad_norm": 0.0, - "learning_rate": 7.639325265008445e-06, - "loss": 0.9426, + "learning_rate": 7.664877043966162e-06, + "loss": 0.8633, "step": 20742 }, { - "epoch": 0.5886208853575482, + "epoch": 0.5878035648502367, "grad_norm": 0.0, - "learning_rate": 7.638432173122078e-06, - "loss": 0.8846, + "learning_rate": 7.663984637877903e-06, + "loss": 0.8413, "step": 20743 }, { - "epoch": 0.5886492622020432, + "epoch": 0.587831902292499, "grad_norm": 0.0, - "learning_rate": 7.63753910118398e-06, - "loss": 0.9348, + "learning_rate": 7.66309225146677e-06, + "loss": 0.9718, "step": 20744 }, { - "epoch": 0.588677639046538, + "epoch": 0.5878602397347615, "grad_norm": 0.0, - "learning_rate": 7.636646049201692e-06, - "loss": 0.8136, + "learning_rate": 7.66219988474028e-06, + "loss": 0.9424, "step": 20745 }, { - "epoch": 0.5887060158910329, + "epoch": 0.587888577177024, "grad_norm": 0.0, - "learning_rate": 7.635753017182763e-06, - "loss": 0.8117, + "learning_rate": 7.661307537705947e-06, + "loss": 0.8442, "step": 20746 }, { - "epoch": 0.5887343927355279, + "epoch": 0.5879169146192864, "grad_norm": 0.0, - "learning_rate": 7.634860005134737e-06, - "loss": 0.9208, + "learning_rate": 7.660415210371288e-06, + "loss": 0.9278, "step": 20747 }, { - "epoch": 0.5887627695800227, + "epoch": 0.5879452520615489, "grad_norm": 0.0, - "learning_rate": 7.63396701306515e-06, - "loss": 0.8945, + "learning_rate": 7.659522902743824e-06, + "loss": 0.8986, "step": 20748 }, { - "epoch": 0.5887911464245176, + "epoch": 0.5879735895038114, "grad_norm": 0.0, - "learning_rate": 7.633074040981553e-06, - "loss": 0.8572, + "learning_rate": 7.658630614831066e-06, + "loss": 0.8194, "step": 20749 }, { - "epoch": 0.5888195232690124, + "epoch": 0.5880019269460739, "grad_norm": 0.0, - "learning_rate": 7.632181088891482e-06, - "loss": 0.9326, + "learning_rate": 7.65773834664053e-06, + "loss": 0.767, "step": 20750 }, { - "epoch": 0.5888479001135074, + "epoch": 0.5880302643883363, "grad_norm": 0.0, - "learning_rate": 7.631288156802488e-06, - "loss": 0.9252, + "learning_rate": 7.656846098179735e-06, + "loss": 0.8939, "step": 20751 }, { - "epoch": 0.5888762769580023, + "epoch": 0.5880586018305988, "grad_norm": 0.0, - "learning_rate": 7.630395244722108e-06, - "loss": 0.8336, + "learning_rate": 7.655953869456194e-06, + "loss": 0.8337, "step": 20752 }, { - "epoch": 0.5889046538024971, + "epoch": 0.5880869392728613, "grad_norm": 0.0, - "learning_rate": 7.629502352657888e-06, - "loss": 0.8332, + "learning_rate": 7.655061660477428e-06, + "loss": 0.9072, "step": 20753 }, { - "epoch": 0.5889330306469921, + "epoch": 0.5881152767151236, "grad_norm": 0.0, - "learning_rate": 7.628609480617366e-06, - "loss": 0.7161, + "learning_rate": 7.654169471250945e-06, + "loss": 0.8211, "step": 20754 }, { - "epoch": 0.588961407491487, + "epoch": 0.5881436141573861, "grad_norm": 0.0, - "learning_rate": 7.627716628608086e-06, - "loss": 0.8579, + "learning_rate": 7.653277301784262e-06, + "loss": 0.8682, "step": 20755 }, { - "epoch": 0.5889897843359818, + "epoch": 0.5881719515996486, "grad_norm": 0.0, - "learning_rate": 7.626823796637592e-06, - "loss": 0.9052, + "learning_rate": 7.652385152084898e-06, + "loss": 0.8692, "step": 20756 }, { - "epoch": 0.5890181611804768, + "epoch": 0.5882002890419111, "grad_norm": 0.0, - "learning_rate": 7.625930984713424e-06, - "loss": 0.7452, + "learning_rate": 7.651493022160366e-06, + "loss": 0.8358, "step": 20757 }, { - "epoch": 0.5890465380249716, + "epoch": 0.5882286264841735, "grad_norm": 0.0, - "learning_rate": 7.625038192843122e-06, - "loss": 0.781, + "learning_rate": 7.650600912018178e-06, + "loss": 0.8384, "step": 20758 }, { - "epoch": 0.5890749148694665, + "epoch": 0.588256963926436, "grad_norm": 0.0, - "learning_rate": 7.624145421034231e-06, - "loss": 0.8607, + "learning_rate": 7.649708821665856e-06, + "loss": 0.8404, "step": 20759 }, { - "epoch": 0.5891032917139614, + "epoch": 0.5882853013686985, "grad_norm": 0.0, - "learning_rate": 7.623252669294291e-06, - "loss": 0.9167, + "learning_rate": 7.648816751110905e-06, + "loss": 0.8606, "step": 20760 }, { - "epoch": 0.5891316685584563, + "epoch": 0.5883136388109609, "grad_norm": 0.0, - "learning_rate": 7.622359937630839e-06, - "loss": 0.8612, + "learning_rate": 7.647924700360847e-06, + "loss": 0.9376, "step": 20761 }, { - "epoch": 0.5891600454029512, + "epoch": 0.5883419762532234, "grad_norm": 0.0, - "learning_rate": 7.621467226051422e-06, - "loss": 0.8431, + "learning_rate": 7.64703266942319e-06, + "loss": 0.9575, "step": 20762 }, { - "epoch": 0.589188422247446, + "epoch": 0.5883703136954859, "grad_norm": 0.0, - "learning_rate": 7.62057453456358e-06, - "loss": 0.7938, + "learning_rate": 7.64614065830545e-06, + "loss": 0.8435, "step": 20763 }, { - "epoch": 0.589216799091941, + "epoch": 0.5883986511377483, "grad_norm": 0.0, - "learning_rate": 7.619681863174848e-06, - "loss": 0.8831, + "learning_rate": 7.645248667015143e-06, + "loss": 0.8471, "step": 20764 }, { - "epoch": 0.5892451759364359, + "epoch": 0.5884269885800107, "grad_norm": 0.0, - "learning_rate": 7.618789211892771e-06, - "loss": 0.9523, + "learning_rate": 7.644356695559779e-06, + "loss": 0.9319, "step": 20765 }, { - "epoch": 0.5892735527809307, + "epoch": 0.5884553260222732, "grad_norm": 0.0, - "learning_rate": 7.6178965807248885e-06, - "loss": 0.9559, + "learning_rate": 7.643464743946873e-06, + "loss": 0.9155, "step": 20766 }, { - "epoch": 0.5893019296254256, + "epoch": 0.5884836634645357, "grad_norm": 0.0, - "learning_rate": 7.617003969678742e-06, - "loss": 0.948, + "learning_rate": 7.642572812183944e-06, + "loss": 0.8423, "step": 20767 }, { - "epoch": 0.5893303064699206, + "epoch": 0.5885120009067981, "grad_norm": 0.0, - "learning_rate": 7.616111378761872e-06, - "loss": 0.7966, + "learning_rate": 7.641680900278494e-06, + "loss": 0.8265, "step": 20768 }, { - "epoch": 0.5893586833144154, + "epoch": 0.5885403383490606, "grad_norm": 0.0, - "learning_rate": 7.6152188079818125e-06, - "loss": 0.7903, + "learning_rate": 7.640789008238044e-06, + "loss": 0.8973, "step": 20769 }, { - "epoch": 0.5893870601589103, + "epoch": 0.5885686757913231, "grad_norm": 0.0, - "learning_rate": 7.6143262573461095e-06, - "loss": 0.8867, + "learning_rate": 7.639897136070103e-06, + "loss": 0.8578, "step": 20770 }, { - "epoch": 0.5894154370034053, + "epoch": 0.5885970132335855, "grad_norm": 0.0, - "learning_rate": 7.613433726862299e-06, - "loss": 0.8854, + "learning_rate": 7.639005283782183e-06, + "loss": 0.8364, "step": 20771 }, { - "epoch": 0.5894438138479001, + "epoch": 0.588625350675848, "grad_norm": 0.0, - "learning_rate": 7.612541216537921e-06, - "loss": 0.911, + "learning_rate": 7.638113451381804e-06, + "loss": 0.8457, "step": 20772 }, { - "epoch": 0.589472190692395, + "epoch": 0.5886536881181105, "grad_norm": 0.0, - "learning_rate": 7.611648726380515e-06, - "loss": 0.9291, + "learning_rate": 7.637221638876468e-06, + "loss": 0.9034, "step": 20773 }, { - "epoch": 0.58950056753689, + "epoch": 0.588682025560373, "grad_norm": 0.0, - "learning_rate": 7.61075625639762e-06, - "loss": 0.8098, + "learning_rate": 7.636329846273695e-06, + "loss": 0.9323, "step": 20774 }, { - "epoch": 0.5895289443813848, + "epoch": 0.5887103630026354, "grad_norm": 0.0, - "learning_rate": 7.609863806596772e-06, - "loss": 0.8321, + "learning_rate": 7.63543807358099e-06, + "loss": 0.8608, "step": 20775 }, { - "epoch": 0.5895573212258797, + "epoch": 0.5887387004448978, "grad_norm": 0.0, - "learning_rate": 7.608971376985514e-06, - "loss": 0.805, + "learning_rate": 7.634546320805869e-06, + "loss": 0.8161, "step": 20776 }, { - "epoch": 0.5895856980703745, + "epoch": 0.5887670378871603, "grad_norm": 0.0, - "learning_rate": 7.608078967571384e-06, - "loss": 0.8267, + "learning_rate": 7.633654587955844e-06, + "loss": 0.8033, "step": 20777 }, { - "epoch": 0.5896140749148695, + "epoch": 0.5887953753294227, "grad_norm": 0.0, - "learning_rate": 7.607186578361915e-06, - "loss": 0.8344, + "learning_rate": 7.632762875038422e-06, + "loss": 0.9235, "step": 20778 }, { - "epoch": 0.5896424517593644, + "epoch": 0.5888237127716852, "grad_norm": 0.0, - "learning_rate": 7.606294209364651e-06, - "loss": 0.9921, + "learning_rate": 7.631871182061117e-06, + "loss": 0.8665, "step": 20779 }, { - "epoch": 0.5896708286038592, + "epoch": 0.5888520502139477, "grad_norm": 0.0, - "learning_rate": 7.6054018605871264e-06, - "loss": 0.7673, + "learning_rate": 7.630979509031446e-06, + "loss": 0.7597, "step": 20780 }, { - "epoch": 0.5896992054483542, + "epoch": 0.5888803876562102, "grad_norm": 0.0, - "learning_rate": 7.6045095320368765e-06, - "loss": 0.8495, + "learning_rate": 7.630087855956911e-06, + "loss": 0.8799, "step": 20781 }, { - "epoch": 0.589727582292849, + "epoch": 0.5889087250984726, "grad_norm": 0.0, - "learning_rate": 7.603617223721445e-06, - "loss": 0.8515, + "learning_rate": 7.629196222845027e-06, + "loss": 0.9403, "step": 20782 }, { - "epoch": 0.5897559591373439, + "epoch": 0.5889370625407351, "grad_norm": 0.0, - "learning_rate": 7.602724935648367e-06, - "loss": 0.937, + "learning_rate": 7.628304609703301e-06, + "loss": 0.7808, "step": 20783 }, { - "epoch": 0.5897843359818388, + "epoch": 0.5889653999829976, "grad_norm": 0.0, - "learning_rate": 7.601832667825179e-06, - "loss": 0.8857, + "learning_rate": 7.627413016539247e-06, + "loss": 0.884, "step": 20784 }, { - "epoch": 0.5898127128263337, + "epoch": 0.58899373742526, "grad_norm": 0.0, - "learning_rate": 7.600940420259421e-06, - "loss": 0.9161, + "learning_rate": 7.6265214433603775e-06, + "loss": 0.8336, "step": 20785 }, { - "epoch": 0.5898410896708286, + "epoch": 0.5890220748675224, "grad_norm": 0.0, - "learning_rate": 7.600048192958622e-06, - "loss": 0.9274, + "learning_rate": 7.625629890174196e-06, + "loss": 0.8644, "step": 20786 }, { - "epoch": 0.5898694665153235, + "epoch": 0.5890504123097849, "grad_norm": 0.0, - "learning_rate": 7.599155985930328e-06, - "loss": 0.7952, + "learning_rate": 7.624738356988215e-06, + "loss": 0.8872, "step": 20787 }, { - "epoch": 0.5898978433598184, + "epoch": 0.5890787497520473, "grad_norm": 0.0, - "learning_rate": 7.598263799182071e-06, - "loss": 0.8862, + "learning_rate": 7.6238468438099485e-06, + "loss": 0.8052, "step": 20788 }, { - "epoch": 0.5899262202043133, + "epoch": 0.5891070871943098, "grad_norm": 0.0, - "learning_rate": 7.597371632721384e-06, - "loss": 0.8998, + "learning_rate": 7.622955350646899e-06, + "loss": 0.8727, "step": 20789 }, { - "epoch": 0.5899545970488081, + "epoch": 0.5891354246365723, "grad_norm": 0.0, - "learning_rate": 7.5964794865558115e-06, - "loss": 0.9453, + "learning_rate": 7.622063877506581e-06, + "loss": 0.7951, "step": 20790 }, { - "epoch": 0.5899829738933031, + "epoch": 0.5891637620788348, "grad_norm": 0.0, - "learning_rate": 7.595587360692883e-06, - "loss": 0.8091, + "learning_rate": 7.621172424396501e-06, + "loss": 0.9012, "step": 20791 }, { - "epoch": 0.590011350737798, + "epoch": 0.5891920995210972, "grad_norm": 0.0, - "learning_rate": 7.5946952551401346e-06, - "loss": 0.8695, + "learning_rate": 7.620280991324167e-06, + "loss": 0.8158, "step": 20792 }, { - "epoch": 0.5900397275822928, + "epoch": 0.5892204369633597, "grad_norm": 0.0, - "learning_rate": 7.5938031699051055e-06, - "loss": 0.8129, + "learning_rate": 7.619389578297096e-06, + "loss": 0.9153, "step": 20793 }, { - "epoch": 0.5900681044267877, + "epoch": 0.5892487744056222, "grad_norm": 0.0, - "learning_rate": 7.592911104995328e-06, - "loss": 0.7762, + "learning_rate": 7.618498185322786e-06, + "loss": 0.8657, "step": 20794 }, { - "epoch": 0.5900964812712827, + "epoch": 0.5892771118478846, "grad_norm": 0.0, - "learning_rate": 7.592019060418339e-06, - "loss": 0.839, + "learning_rate": 7.617606812408749e-06, + "loss": 0.8409, "step": 20795 }, { - "epoch": 0.5901248581157775, + "epoch": 0.589305449290147, "grad_norm": 0.0, - "learning_rate": 7.591127036181669e-06, - "loss": 0.9293, + "learning_rate": 7.616715459562498e-06, + "loss": 0.9767, "step": 20796 }, { - "epoch": 0.5901532349602724, + "epoch": 0.5893337867324095, "grad_norm": 0.0, - "learning_rate": 7.590235032292863e-06, - "loss": 0.7899, + "learning_rate": 7.615824126791534e-06, + "loss": 0.7975, "step": 20797 }, { - "epoch": 0.5901816118047674, + "epoch": 0.589362124174672, "grad_norm": 0.0, - "learning_rate": 7.589343048759449e-06, - "loss": 0.7667, + "learning_rate": 7.614932814103375e-06, + "loss": 0.7439, "step": 20798 }, { - "epoch": 0.5902099886492622, + "epoch": 0.5893904616169344, "grad_norm": 0.0, - "learning_rate": 7.58845108558896e-06, - "loss": 0.7715, + "learning_rate": 7.614041521505517e-06, + "loss": 0.8284, "step": 20799 }, { - "epoch": 0.5902383654937571, + "epoch": 0.5894187990591969, "grad_norm": 0.0, - "learning_rate": 7.587559142788936e-06, - "loss": 0.8532, + "learning_rate": 7.613150249005473e-06, + "loss": 0.8529, "step": 20800 }, { - "epoch": 0.5902667423382519, + "epoch": 0.5894471365014594, "grad_norm": 0.0, - "learning_rate": 7.586667220366909e-06, - "loss": 0.8395, + "learning_rate": 7.612258996610756e-06, + "loss": 0.8864, "step": 20801 }, { - "epoch": 0.5902951191827469, + "epoch": 0.5894754739437218, "grad_norm": 0.0, - "learning_rate": 7.58577531833041e-06, - "loss": 0.9548, + "learning_rate": 7.611367764328863e-06, + "loss": 0.8833, "step": 20802 }, { - "epoch": 0.5903234960272418, + "epoch": 0.5895038113859843, "grad_norm": 0.0, - "learning_rate": 7.584883436686976e-06, - "loss": 0.8558, + "learning_rate": 7.610476552167309e-06, + "loss": 0.9573, "step": 20803 }, { - "epoch": 0.5903518728717366, + "epoch": 0.5895321488282468, "grad_norm": 0.0, - "learning_rate": 7.583991575444143e-06, - "loss": 0.7777, + "learning_rate": 7.609585360133596e-06, + "loss": 0.8763, "step": 20804 }, { - "epoch": 0.5903802497162316, + "epoch": 0.5895604862705093, "grad_norm": 0.0, - "learning_rate": 7.583099734609437e-06, - "loss": 0.957, + "learning_rate": 7.608694188235234e-06, + "loss": 0.8092, "step": 20805 }, { - "epoch": 0.5904086265607265, + "epoch": 0.5895888237127717, "grad_norm": 0.0, - "learning_rate": 7.582207914190399e-06, - "loss": 0.773, + "learning_rate": 7.607803036479731e-06, + "loss": 0.9286, "step": 20806 }, { - "epoch": 0.5904370034052213, + "epoch": 0.5896171611550342, "grad_norm": 0.0, - "learning_rate": 7.5813161141945605e-06, - "loss": 0.7786, + "learning_rate": 7.60691190487459e-06, + "loss": 0.9704, "step": 20807 }, { - "epoch": 0.5904653802497162, + "epoch": 0.5896454985972966, "grad_norm": 0.0, - "learning_rate": 7.58042433462945e-06, - "loss": 0.9084, + "learning_rate": 7.606020793427316e-06, + "loss": 0.9303, "step": 20808 }, { - "epoch": 0.5904937570942111, + "epoch": 0.589673836039559, "grad_norm": 0.0, - "learning_rate": 7.5795325755026075e-06, - "loss": 0.8975, + "learning_rate": 7.605129702145422e-06, + "loss": 0.9067, "step": 20809 }, { - "epoch": 0.590522133938706, + "epoch": 0.5897021734818215, "grad_norm": 0.0, - "learning_rate": 7.578640836821561e-06, - "loss": 0.8089, + "learning_rate": 7.6042386310364055e-06, + "loss": 0.8183, "step": 20810 }, { - "epoch": 0.5905505107832009, + "epoch": 0.589730510924084, "grad_norm": 0.0, - "learning_rate": 7.57774911859384e-06, - "loss": 0.8759, + "learning_rate": 7.603347580107782e-06, + "loss": 0.8244, "step": 20811 }, { - "epoch": 0.5905788876276958, + "epoch": 0.5897588483663464, "grad_norm": 0.0, - "learning_rate": 7.576857420826987e-06, - "loss": 0.8652, + "learning_rate": 7.6024565493670485e-06, + "loss": 1.0576, "step": 20812 }, { - "epoch": 0.5906072644721907, + "epoch": 0.5897871858086089, "grad_norm": 0.0, - "learning_rate": 7.575965743528524e-06, - "loss": 0.8363, + "learning_rate": 7.601565538821714e-06, + "loss": 0.8936, "step": 20813 }, { - "epoch": 0.5906356413166856, + "epoch": 0.5898155232508714, "grad_norm": 0.0, - "learning_rate": 7.57507408670599e-06, - "loss": 0.8278, + "learning_rate": 7.6006745484792855e-06, + "loss": 0.7064, "step": 20814 }, { - "epoch": 0.5906640181611805, + "epoch": 0.5898438606931339, "grad_norm": 0.0, - "learning_rate": 7.574182450366915e-06, + "learning_rate": 7.599783578347264e-06, "loss": 0.882, "step": 20815 }, { - "epoch": 0.5906923950056754, + "epoch": 0.5898721981353963, "grad_norm": 0.0, - "learning_rate": 7.5732908345188276e-06, - "loss": 0.7231, + "learning_rate": 7.598892628433157e-06, + "loss": 0.8793, "step": 20816 }, { - "epoch": 0.5907207718501702, + "epoch": 0.5899005355776588, "grad_norm": 0.0, - "learning_rate": 7.572399239169263e-06, - "loss": 0.8848, + "learning_rate": 7.598001698744469e-06, + "loss": 0.9536, "step": 20817 }, { - "epoch": 0.5907491486946651, + "epoch": 0.5899288730199213, "grad_norm": 0.0, - "learning_rate": 7.571507664325751e-06, - "loss": 0.8855, + "learning_rate": 7.597110789288704e-06, + "loss": 0.7734, "step": 20818 }, { - "epoch": 0.5907775255391601, + "epoch": 0.5899572104621836, "grad_norm": 0.0, - "learning_rate": 7.570616109995821e-06, - "loss": 0.8632, + "learning_rate": 7.596219900073372e-06, + "loss": 0.8584, "step": 20819 }, { - "epoch": 0.5908059023836549, + "epoch": 0.5899855479044461, "grad_norm": 0.0, - "learning_rate": 7.569724576187009e-06, - "loss": 0.8293, + "learning_rate": 7.595329031105967e-06, + "loss": 0.9441, "step": 20820 }, { - "epoch": 0.5908342792281498, + "epoch": 0.5900138853467086, "grad_norm": 0.0, - "learning_rate": 7.568833062906842e-06, - "loss": 0.8539, + "learning_rate": 7.5944381823939985e-06, + "loss": 0.8907, "step": 20821 }, { - "epoch": 0.5908626560726448, + "epoch": 0.5900422227889711, "grad_norm": 0.0, - "learning_rate": 7.567941570162849e-06, - "loss": 0.8243, + "learning_rate": 7.593547353944972e-06, + "loss": 0.8948, "step": 20822 }, { - "epoch": 0.5908910329171396, + "epoch": 0.5900705602312335, "grad_norm": 0.0, - "learning_rate": 7.567050097962566e-06, - "loss": 0.8956, + "learning_rate": 7.592656545766389e-06, + "loss": 0.9529, "step": 20823 }, { - "epoch": 0.5909194097616345, + "epoch": 0.590098897673496, "grad_norm": 0.0, - "learning_rate": 7.56615864631352e-06, - "loss": 0.935, + "learning_rate": 7.591765757865753e-06, + "loss": 0.8663, "step": 20824 }, { - "epoch": 0.5909477866061293, + "epoch": 0.5901272351157585, "grad_norm": 0.0, - "learning_rate": 7.565267215223238e-06, - "loss": 0.7227, + "learning_rate": 7.590874990250574e-06, + "loss": 0.8869, "step": 20825 }, { - "epoch": 0.5909761634506243, + "epoch": 0.5901555725580209, "grad_norm": 0.0, - "learning_rate": 7.564375804699257e-06, - "loss": 0.8675, + "learning_rate": 7.5899842429283434e-06, + "loss": 0.9651, "step": 20826 }, { - "epoch": 0.5910045402951192, + "epoch": 0.5901839100002834, "grad_norm": 0.0, - "learning_rate": 7.563484414749097e-06, - "loss": 0.9464, + "learning_rate": 7.589093515906574e-06, + "loss": 0.8936, "step": 20827 }, { - "epoch": 0.591032917139614, + "epoch": 0.5902122474425459, "grad_norm": 0.0, - "learning_rate": 7.5625930453802995e-06, - "loss": 0.8721, + "learning_rate": 7.588202809192762e-06, + "loss": 0.844, "step": 20828 }, { - "epoch": 0.591061293984109, + "epoch": 0.5902405848848084, "grad_norm": 0.0, - "learning_rate": 7.561701696600387e-06, - "loss": 0.8905, + "learning_rate": 7.587312122794414e-06, + "loss": 0.896, "step": 20829 }, { - "epoch": 0.5910896708286039, + "epoch": 0.5902689223270707, "grad_norm": 0.0, - "learning_rate": 7.5608103684168885e-06, - "loss": 0.8239, + "learning_rate": 7.586421456719037e-06, + "loss": 0.8343, "step": 20830 }, { - "epoch": 0.5911180476730987, + "epoch": 0.5902972597693332, "grad_norm": 0.0, - "learning_rate": 7.559919060837337e-06, - "loss": 0.7335, + "learning_rate": 7.585530810974122e-06, + "loss": 0.8658, "step": 20831 }, { - "epoch": 0.5911464245175937, + "epoch": 0.5903255972115957, "grad_norm": 0.0, - "learning_rate": 7.559027773869259e-06, - "loss": 0.7597, + "learning_rate": 7.584640185567184e-06, + "loss": 0.808, "step": 20832 }, { - "epoch": 0.5911748013620886, + "epoch": 0.5903539346538581, "grad_norm": 0.0, - "learning_rate": 7.55813650752018e-06, - "loss": 0.8552, + "learning_rate": 7.583749580505712e-06, + "loss": 0.8158, "step": 20833 }, { - "epoch": 0.5912031782065834, + "epoch": 0.5903822720961206, "grad_norm": 0.0, - "learning_rate": 7.557245261797633e-06, - "loss": 0.9158, + "learning_rate": 7.582858995797217e-06, + "loss": 0.7782, "step": 20834 }, { - "epoch": 0.5912315550510783, + "epoch": 0.5904106095383831, "grad_norm": 0.0, - "learning_rate": 7.556354036709147e-06, - "loss": 0.9702, + "learning_rate": 7.5819684314491984e-06, + "loss": 0.8481, "step": 20835 }, { - "epoch": 0.5912599318955732, + "epoch": 0.5904389469806455, "grad_norm": 0.0, - "learning_rate": 7.5554628322622446e-06, - "loss": 0.9269, + "learning_rate": 7.581077887469157e-06, + "loss": 0.8911, "step": 20836 }, { - "epoch": 0.5912883087400681, + "epoch": 0.590467284422908, "grad_norm": 0.0, - "learning_rate": 7.554571648464461e-06, - "loss": 0.8737, + "learning_rate": 7.580187363864593e-06, + "loss": 0.8871, "step": 20837 }, { - "epoch": 0.591316685584563, + "epoch": 0.5904956218651705, "grad_norm": 0.0, - "learning_rate": 7.5536804853233206e-06, - "loss": 0.872, + "learning_rate": 7.579296860643015e-06, + "loss": 0.8884, "step": 20838 }, { - "epoch": 0.5913450624290579, + "epoch": 0.590523959307433, "grad_norm": 0.0, - "learning_rate": 7.552789342846347e-06, - "loss": 0.8032, + "learning_rate": 7.578406377811914e-06, + "loss": 0.8976, "step": 20839 }, { - "epoch": 0.5913734392735528, + "epoch": 0.5905522967496953, "grad_norm": 0.0, - "learning_rate": 7.551898221041076e-06, - "loss": 0.8933, + "learning_rate": 7.577515915378798e-06, + "loss": 0.8324, "step": 20840 }, { - "epoch": 0.5914018161180477, + "epoch": 0.5905806341919578, "grad_norm": 0.0, - "learning_rate": 7.5510071199150305e-06, - "loss": 0.9422, + "learning_rate": 7.576625473351162e-06, + "loss": 0.9006, "step": 20841 }, { - "epoch": 0.5914301929625425, + "epoch": 0.5906089716342203, "grad_norm": 0.0, - "learning_rate": 7.550116039475734e-06, - "loss": 0.8059, + "learning_rate": 7.57573505173651e-06, + "loss": 0.7461, "step": 20842 }, { - "epoch": 0.5914585698070375, + "epoch": 0.5906373090764827, "grad_norm": 0.0, - "learning_rate": 7.549224979730718e-06, - "loss": 0.9683, + "learning_rate": 7.574844650542346e-06, + "loss": 0.873, "step": 20843 }, { - "epoch": 0.5914869466515323, + "epoch": 0.5906656465187452, "grad_norm": 0.0, - "learning_rate": 7.548333940687512e-06, - "loss": 0.8167, + "learning_rate": 7.5739542697761615e-06, + "loss": 0.8686, "step": 20844 }, { - "epoch": 0.5915153234960272, + "epoch": 0.5906939839610077, "grad_norm": 0.0, - "learning_rate": 7.547442922353639e-06, - "loss": 0.7832, + "learning_rate": 7.573063909445462e-06, + "loss": 0.8, "step": 20845 }, { - "epoch": 0.5915437003405222, + "epoch": 0.5907223214032702, "grad_norm": 0.0, - "learning_rate": 7.546551924736625e-06, - "loss": 0.9702, + "learning_rate": 7.5721735695577494e-06, + "loss": 0.851, "step": 20846 }, { - "epoch": 0.591572077185017, + "epoch": 0.5907506588455326, "grad_norm": 0.0, - "learning_rate": 7.5456609478439975e-06, - "loss": 0.7591, + "learning_rate": 7.5712832501205165e-06, + "loss": 0.8532, "step": 20847 }, { - "epoch": 0.5916004540295119, + "epoch": 0.5907789962877951, "grad_norm": 0.0, - "learning_rate": 7.544769991683283e-06, - "loss": 0.867, + "learning_rate": 7.57039295114127e-06, + "loss": 0.934, "step": 20848 }, { - "epoch": 0.5916288308740069, + "epoch": 0.5908073337300576, "grad_norm": 0.0, - "learning_rate": 7.543879056262004e-06, - "loss": 0.8952, + "learning_rate": 7.569502672627502e-06, + "loss": 0.8831, "step": 20849 }, { - "epoch": 0.5916572077185017, + "epoch": 0.5908356711723199, "grad_norm": 0.0, - "learning_rate": 7.542988141587692e-06, - "loss": 0.9204, + "learning_rate": 7.568612414586717e-06, + "loss": 0.885, "step": 20850 }, { - "epoch": 0.5916855845629966, + "epoch": 0.5908640086145824, "grad_norm": 0.0, - "learning_rate": 7.54209724766787e-06, - "loss": 0.762, + "learning_rate": 7.5677221770264154e-06, + "loss": 0.8648, "step": 20851 }, { - "epoch": 0.5917139614074914, + "epoch": 0.5908923460568449, "grad_norm": 0.0, - "learning_rate": 7.5412063745100624e-06, - "loss": 0.8412, + "learning_rate": 7.56683195995409e-06, + "loss": 0.8387, "step": 20852 }, { - "epoch": 0.5917423382519864, + "epoch": 0.5909206834991074, "grad_norm": 0.0, - "learning_rate": 7.540315522121794e-06, - "loss": 0.9115, + "learning_rate": 7.565941763377244e-06, + "loss": 0.8165, "step": 20853 }, { - "epoch": 0.5917707150964813, + "epoch": 0.5909490209413698, "grad_norm": 0.0, - "learning_rate": 7.539424690510592e-06, - "loss": 0.8677, + "learning_rate": 7.565051587303373e-06, + "loss": 0.844, "step": 20854 }, { - "epoch": 0.5917990919409761, + "epoch": 0.5909773583836323, "grad_norm": 0.0, - "learning_rate": 7.5385338796839805e-06, - "loss": 0.9275, + "learning_rate": 7.5641614317399755e-06, + "loss": 0.7967, "step": 20855 }, { - "epoch": 0.5918274687854711, + "epoch": 0.5910056958258948, "grad_norm": 0.0, - "learning_rate": 7.537643089649482e-06, - "loss": 0.8121, + "learning_rate": 7.563271296694555e-06, + "loss": 0.8825, "step": 20856 }, { - "epoch": 0.591855845629966, + "epoch": 0.5910340332681572, "grad_norm": 0.0, - "learning_rate": 7.5367523204146195e-06, - "loss": 0.8481, + "learning_rate": 7.562381182174603e-06, + "loss": 0.9282, "step": 20857 }, { - "epoch": 0.5918842224744608, + "epoch": 0.5910623707104197, "grad_norm": 0.0, - "learning_rate": 7.535861571986927e-06, - "loss": 0.9692, + "learning_rate": 7.561491088187618e-06, + "loss": 0.816, "step": 20858 }, { - "epoch": 0.5919125993189557, + "epoch": 0.5910907081526822, "grad_norm": 0.0, - "learning_rate": 7.534970844373922e-06, - "loss": 0.8258, + "learning_rate": 7.560601014741103e-06, + "loss": 0.9816, "step": 20859 }, { - "epoch": 0.5919409761634506, + "epoch": 0.5911190455949445, "grad_norm": 0.0, - "learning_rate": 7.534080137583128e-06, - "loss": 0.8904, + "learning_rate": 7.559710961842548e-06, + "loss": 0.7255, "step": 20860 }, { - "epoch": 0.5919693530079455, + "epoch": 0.591147383037207, "grad_norm": 0.0, - "learning_rate": 7.5331894516220696e-06, - "loss": 0.759, + "learning_rate": 7.558820929499455e-06, + "loss": 0.8162, "step": 20861 }, { - "epoch": 0.5919977298524404, + "epoch": 0.5911757204794695, "grad_norm": 0.0, - "learning_rate": 7.5322987864982725e-06, - "loss": 0.7676, + "learning_rate": 7.5579309177193185e-06, + "loss": 0.9028, "step": 20862 }, { - "epoch": 0.5920261066969353, + "epoch": 0.591204057921732, "grad_norm": 0.0, - "learning_rate": 7.531408142219257e-06, - "loss": 0.8097, + "learning_rate": 7.557040926509637e-06, + "loss": 0.835, "step": 20863 }, { - "epoch": 0.5920544835414302, + "epoch": 0.5912323953639944, "grad_norm": 0.0, - "learning_rate": 7.530517518792548e-06, - "loss": 0.749, + "learning_rate": 7.55615095587791e-06, + "loss": 0.8181, "step": 20864 }, { - "epoch": 0.5920828603859251, + "epoch": 0.5912607328062569, "grad_norm": 0.0, - "learning_rate": 7.52962691622567e-06, - "loss": 0.8023, + "learning_rate": 7.555261005831628e-06, + "loss": 0.9179, "step": 20865 }, { - "epoch": 0.59211123723042, + "epoch": 0.5912890702485194, "grad_norm": 0.0, - "learning_rate": 7.528736334526141e-06, - "loss": 0.8226, + "learning_rate": 7.55437107637829e-06, + "loss": 0.8472, "step": 20866 }, { - "epoch": 0.5921396140749149, + "epoch": 0.5913174076907818, "grad_norm": 0.0, - "learning_rate": 7.52784577370149e-06, - "loss": 0.7241, + "learning_rate": 7.553481167525394e-06, + "loss": 0.9258, "step": 20867 }, { - "epoch": 0.5921679909194097, + "epoch": 0.5913457451330443, "grad_norm": 0.0, - "learning_rate": 7.526955233759238e-06, - "loss": 0.9722, + "learning_rate": 7.552591279280434e-06, + "loss": 0.8284, "step": 20868 }, { - "epoch": 0.5921963677639046, + "epoch": 0.5913740825753068, "grad_norm": 0.0, - "learning_rate": 7.5260647147069036e-06, - "loss": 1.0038, + "learning_rate": 7.5517014116509094e-06, + "loss": 0.7781, "step": 20869 }, { - "epoch": 0.5922247446083996, + "epoch": 0.5914024200175693, "grad_norm": 0.0, - "learning_rate": 7.525174216552014e-06, - "loss": 0.8048, + "learning_rate": 7.550811564644309e-06, + "loss": 0.9194, "step": 20870 }, { - "epoch": 0.5922531214528944, + "epoch": 0.5914307574598316, "grad_norm": 0.0, - "learning_rate": 7.524283739302087e-06, - "loss": 0.8434, + "learning_rate": 7.549921738268132e-06, + "loss": 0.9077, "step": 20871 }, { - "epoch": 0.5922814982973893, + "epoch": 0.5914590949020941, "grad_norm": 0.0, - "learning_rate": 7.523393282964646e-06, - "loss": 0.8045, + "learning_rate": 7.549031932529879e-06, + "loss": 0.8909, "step": 20872 }, { - "epoch": 0.5923098751418843, + "epoch": 0.5914874323443566, "grad_norm": 0.0, - "learning_rate": 7.522502847547214e-06, - "loss": 0.7911, + "learning_rate": 7.5481421474370354e-06, + "loss": 0.7972, "step": 20873 }, { - "epoch": 0.5923382519863791, + "epoch": 0.591515769786619, "grad_norm": 0.0, - "learning_rate": 7.521612433057312e-06, - "loss": 0.7433, + "learning_rate": 7.547252382997101e-06, + "loss": 0.9335, "step": 20874 }, { - "epoch": 0.592366628830874, + "epoch": 0.5915441072288815, "grad_norm": 0.0, - "learning_rate": 7.5207220395024615e-06, - "loss": 0.9131, + "learning_rate": 7.546362639217572e-06, + "loss": 0.9583, "step": 20875 }, { - "epoch": 0.5923950056753688, + "epoch": 0.591572444671144, "grad_norm": 0.0, - "learning_rate": 7.519831666890185e-06, - "loss": 0.8573, + "learning_rate": 7.545472916105941e-06, + "loss": 0.8175, "step": 20876 }, { - "epoch": 0.5924233825198638, + "epoch": 0.5916007821134065, "grad_norm": 0.0, - "learning_rate": 7.518941315227999e-06, - "loss": 0.8642, + "learning_rate": 7.544583213669707e-06, + "loss": 0.8376, "step": 20877 }, { - "epoch": 0.5924517593643587, + "epoch": 0.5916291195556689, "grad_norm": 0.0, - "learning_rate": 7.518050984523429e-06, - "loss": 0.8768, + "learning_rate": 7.5436935319163565e-06, + "loss": 0.9164, "step": 20878 }, { - "epoch": 0.5924801362088535, + "epoch": 0.5916574569979314, "grad_norm": 0.0, - "learning_rate": 7.517160674783994e-06, - "loss": 0.8656, + "learning_rate": 7.5428038708533856e-06, + "loss": 0.8825, "step": 20879 }, { - "epoch": 0.5925085130533485, + "epoch": 0.5916857944401939, "grad_norm": 0.0, - "learning_rate": 7.516270386017214e-06, - "loss": 0.9116, + "learning_rate": 7.541914230488294e-06, + "loss": 0.8863, "step": 20880 }, { - "epoch": 0.5925368898978434, + "epoch": 0.5917141318824563, "grad_norm": 0.0, - "learning_rate": 7.51538011823061e-06, - "loss": 0.9028, + "learning_rate": 7.541024610828569e-06, + "loss": 0.7763, "step": 20881 }, { - "epoch": 0.5925652667423382, + "epoch": 0.5917424693247187, "grad_norm": 0.0, - "learning_rate": 7.514489871431703e-06, - "loss": 0.9087, + "learning_rate": 7.5401350118817106e-06, + "loss": 0.7616, "step": 20882 }, { - "epoch": 0.5925936435868332, + "epoch": 0.5917708067669812, "grad_norm": 0.0, - "learning_rate": 7.513599645628008e-06, - "loss": 0.8121, + "learning_rate": 7.539245433655204e-06, + "loss": 0.8621, "step": 20883 }, { - "epoch": 0.5926220204313281, + "epoch": 0.5917991442092436, "grad_norm": 0.0, - "learning_rate": 7.512709440827052e-06, - "loss": 0.8709, + "learning_rate": 7.538355876156549e-06, + "loss": 0.9023, "step": 20884 }, { - "epoch": 0.5926503972758229, + "epoch": 0.5918274816515061, "grad_norm": 0.0, - "learning_rate": 7.51181925703635e-06, - "loss": 0.8362, + "learning_rate": 7.537466339393239e-06, + "loss": 0.9203, "step": 20885 }, { - "epoch": 0.5926787741203178, + "epoch": 0.5918558190937686, "grad_norm": 0.0, - "learning_rate": 7.5109290942634216e-06, - "loss": 0.8174, + "learning_rate": 7.536576823372761e-06, + "loss": 0.8472, "step": 20886 }, { - "epoch": 0.5927071509648127, + "epoch": 0.5918841565360311, "grad_norm": 0.0, - "learning_rate": 7.510038952515789e-06, - "loss": 0.8511, + "learning_rate": 7.53568732810261e-06, + "loss": 0.7974, "step": 20887 }, { - "epoch": 0.5927355278093076, + "epoch": 0.5919124939782935, "grad_norm": 0.0, - "learning_rate": 7.509148831800965e-06, - "loss": 0.8742, + "learning_rate": 7.534797853590283e-06, + "loss": 0.9174, "step": 20888 }, { - "epoch": 0.5927639046538025, + "epoch": 0.591940831420556, "grad_norm": 0.0, - "learning_rate": 7.508258732126477e-06, - "loss": 0.8158, + "learning_rate": 7.533908399843266e-06, + "loss": 0.9325, "step": 20889 }, { - "epoch": 0.5927922814982974, + "epoch": 0.5919691688628185, "grad_norm": 0.0, - "learning_rate": 7.507368653499838e-06, - "loss": 0.7519, + "learning_rate": 7.53301896686906e-06, + "loss": 0.7692, "step": 20890 }, { - "epoch": 0.5928206583427923, + "epoch": 0.5919975063050809, "grad_norm": 0.0, - "learning_rate": 7.506478595928569e-06, - "loss": 0.9208, + "learning_rate": 7.532129554675146e-06, + "loss": 0.7506, "step": 20891 }, { - "epoch": 0.5928490351872872, + "epoch": 0.5920258437473433, "grad_norm": 0.0, - "learning_rate": 7.505588559420188e-06, - "loss": 0.7386, + "learning_rate": 7.531240163269021e-06, + "loss": 0.7999, "step": 20892 }, { - "epoch": 0.592877412031782, + "epoch": 0.5920541811896058, "grad_norm": 0.0, - "learning_rate": 7.504698543982213e-06, - "loss": 0.8918, + "learning_rate": 7.5303507926581795e-06, + "loss": 0.8855, "step": 20893 }, { - "epoch": 0.592905788876277, + "epoch": 0.5920825186318683, "grad_norm": 0.0, - "learning_rate": 7.503808549622158e-06, - "loss": 0.7762, + "learning_rate": 7.529461442850108e-06, + "loss": 0.8923, "step": 20894 }, { - "epoch": 0.5929341657207718, + "epoch": 0.5921108560741307, "grad_norm": 0.0, - "learning_rate": 7.502918576347548e-06, - "loss": 0.9306, + "learning_rate": 7.528572113852301e-06, + "loss": 0.8331, "step": 20895 }, { - "epoch": 0.5929625425652667, + "epoch": 0.5921391935163932, "grad_norm": 0.0, - "learning_rate": 7.502028624165896e-06, - "loss": 0.9123, + "learning_rate": 7.527682805672252e-06, + "loss": 0.8489, "step": 20896 }, { - "epoch": 0.5929909194097617, + "epoch": 0.5921675309586557, "grad_norm": 0.0, - "learning_rate": 7.50113869308472e-06, - "loss": 0.8835, + "learning_rate": 7.526793518317446e-06, + "loss": 0.8753, "step": 20897 }, { - "epoch": 0.5930192962542565, + "epoch": 0.5921958684009181, "grad_norm": 0.0, - "learning_rate": 7.5002487831115375e-06, - "loss": 0.781, + "learning_rate": 7.52590425179538e-06, + "loss": 0.8374, "step": 20898 }, { - "epoch": 0.5930476730987514, + "epoch": 0.5922242058431806, "grad_norm": 0.0, - "learning_rate": 7.4993588942538675e-06, - "loss": 0.8309, + "learning_rate": 7.525015006113537e-06, + "loss": 0.8554, "step": 20899 }, { - "epoch": 0.5930760499432464, + "epoch": 0.5922525432854431, "grad_norm": 0.0, - "learning_rate": 7.498469026519223e-06, - "loss": 0.8178, + "learning_rate": 7.52412578127941e-06, + "loss": 0.7732, "step": 20900 }, { - "epoch": 0.5931044267877412, + "epoch": 0.5922808807277056, "grad_norm": 0.0, - "learning_rate": 7.497579179915124e-06, - "loss": 0.832, + "learning_rate": 7.5232365773004945e-06, + "loss": 0.8831, "step": 20901 }, { - "epoch": 0.5931328036322361, + "epoch": 0.592309218169968, "grad_norm": 0.0, - "learning_rate": 7.496689354449088e-06, - "loss": 0.8967, + "learning_rate": 7.522347394184275e-06, + "loss": 0.9818, "step": 20902 }, { - "epoch": 0.5931611804767309, + "epoch": 0.5923375556122304, "grad_norm": 0.0, - "learning_rate": 7.495799550128625e-06, - "loss": 0.8451, + "learning_rate": 7.521458231938244e-06, + "loss": 0.936, "step": 20903 }, { - "epoch": 0.5931895573212259, + "epoch": 0.5923658930544929, "grad_norm": 0.0, - "learning_rate": 7.494909766961258e-06, - "loss": 0.9629, + "learning_rate": 7.520569090569894e-06, + "loss": 0.8288, "step": 20904 }, { - "epoch": 0.5932179341657208, + "epoch": 0.5923942304967553, "grad_norm": 0.0, - "learning_rate": 7.494020004954501e-06, - "loss": 0.7716, + "learning_rate": 7.5196799700867075e-06, + "loss": 0.8355, "step": 20905 }, { - "epoch": 0.5932463110102156, + "epoch": 0.5924225679390178, "grad_norm": 0.0, - "learning_rate": 7.493130264115871e-06, - "loss": 0.8198, + "learning_rate": 7.518790870496178e-06, + "loss": 0.9052, "step": 20906 }, { - "epoch": 0.5932746878547106, + "epoch": 0.5924509053812803, "grad_norm": 0.0, - "learning_rate": 7.492240544452881e-06, - "loss": 0.8392, + "learning_rate": 7.517901791805795e-06, + "loss": 0.7949, "step": 20907 }, { - "epoch": 0.5933030646992055, + "epoch": 0.5924792428235427, "grad_norm": 0.0, - "learning_rate": 7.491350845973049e-06, - "loss": 0.8736, + "learning_rate": 7.517012734023046e-06, + "loss": 0.7575, "step": 20908 }, { - "epoch": 0.5933314415437003, + "epoch": 0.5925075802658052, "grad_norm": 0.0, - "learning_rate": 7.490461168683889e-06, - "loss": 0.8327, + "learning_rate": 7.516123697155424e-06, + "loss": 0.8926, "step": 20909 }, { - "epoch": 0.5933598183881952, + "epoch": 0.5925359177080677, "grad_norm": 0.0, - "learning_rate": 7.489571512592915e-06, - "loss": 0.9576, + "learning_rate": 7.515234681210412e-06, + "loss": 0.7611, "step": 20910 }, { - "epoch": 0.5933881952326902, + "epoch": 0.5925642551503302, "grad_norm": 0.0, - "learning_rate": 7.488681877707645e-06, - "loss": 0.844, + "learning_rate": 7.514345686195503e-06, + "loss": 0.8729, "step": 20911 }, { - "epoch": 0.593416572077185, + "epoch": 0.5925925925925926, "grad_norm": 0.0, - "learning_rate": 7.487792264035593e-06, - "loss": 0.8908, + "learning_rate": 7.513456712118181e-06, + "loss": 0.9888, "step": 20912 }, { - "epoch": 0.5934449489216799, + "epoch": 0.592620930034855, "grad_norm": 0.0, - "learning_rate": 7.486902671584268e-06, - "loss": 0.7574, + "learning_rate": 7.512567758985936e-06, + "loss": 0.861, "step": 20913 }, { - "epoch": 0.5934733257661748, + "epoch": 0.5926492674771175, "grad_norm": 0.0, - "learning_rate": 7.486013100361193e-06, - "loss": 0.8467, + "learning_rate": 7.511678826806258e-06, + "loss": 0.9471, "step": 20914 }, { - "epoch": 0.5935017026106697, + "epoch": 0.5926776049193799, "grad_norm": 0.0, - "learning_rate": 7.485123550373879e-06, - "loss": 0.9418, + "learning_rate": 7.510789915586631e-06, + "loss": 0.8539, "step": 20915 }, { - "epoch": 0.5935300794551646, + "epoch": 0.5927059423616424, "grad_norm": 0.0, - "learning_rate": 7.484234021629837e-06, - "loss": 0.8704, + "learning_rate": 7.509901025334546e-06, + "loss": 0.8508, "step": 20916 }, { - "epoch": 0.5935584562996595, + "epoch": 0.5927342798039049, "grad_norm": 0.0, - "learning_rate": 7.483344514136584e-06, - "loss": 0.7982, + "learning_rate": 7.5090121560574924e-06, + "loss": 0.8153, "step": 20917 }, { - "epoch": 0.5935868331441544, + "epoch": 0.5927626172461674, "grad_norm": 0.0, - "learning_rate": 7.482455027901635e-06, - "loss": 0.9128, + "learning_rate": 7.5081233077629515e-06, + "loss": 0.8807, "step": 20918 }, { - "epoch": 0.5936152099886493, + "epoch": 0.5927909546884298, "grad_norm": 0.0, - "learning_rate": 7.481565562932496e-06, - "loss": 0.8603, + "learning_rate": 7.507234480458414e-06, + "loss": 0.8423, "step": 20919 }, { - "epoch": 0.5936435868331441, + "epoch": 0.5928192921306923, "grad_norm": 0.0, - "learning_rate": 7.48067611923669e-06, - "loss": 0.8893, + "learning_rate": 7.5063456741513655e-06, + "loss": 0.8549, "step": 20920 }, { - "epoch": 0.5936719636776391, + "epoch": 0.5928476295729548, "grad_norm": 0.0, - "learning_rate": 7.479786696821724e-06, - "loss": 0.9282, + "learning_rate": 7.505456888849292e-06, + "loss": 0.8251, "step": 20921 }, { - "epoch": 0.5937003405221339, + "epoch": 0.5928759670152172, "grad_norm": 0.0, - "learning_rate": 7.4788972956951164e-06, - "loss": 0.9231, + "learning_rate": 7.504568124559686e-06, + "loss": 0.7721, "step": 20922 }, { - "epoch": 0.5937287173666288, + "epoch": 0.5929043044574797, "grad_norm": 0.0, - "learning_rate": 7.478007915864376e-06, - "loss": 0.8376, + "learning_rate": 7.503679381290025e-06, + "loss": 0.7095, "step": 20923 }, { - "epoch": 0.5937570942111238, + "epoch": 0.5929326418997422, "grad_norm": 0.0, - "learning_rate": 7.4771185573370135e-06, - "loss": 0.8916, + "learning_rate": 7.5027906590478e-06, + "loss": 0.7625, "step": 20924 }, { - "epoch": 0.5937854710556186, + "epoch": 0.5929609793420046, "grad_norm": 0.0, - "learning_rate": 7.4762292201205466e-06, - "loss": 0.8597, + "learning_rate": 7.501901957840501e-06, + "loss": 0.8251, "step": 20925 }, { - "epoch": 0.5938138479001135, + "epoch": 0.592989316784267, "grad_norm": 0.0, - "learning_rate": 7.475339904222485e-06, - "loss": 0.8554, + "learning_rate": 7.501013277675605e-06, + "loss": 0.8774, "step": 20926 }, { - "epoch": 0.5938422247446083, + "epoch": 0.5930176542265295, "grad_norm": 0.0, - "learning_rate": 7.474450609650339e-06, - "loss": 0.8421, + "learning_rate": 7.500124618560605e-06, + "loss": 0.8015, "step": 20927 }, { - "epoch": 0.5938706015891033, + "epoch": 0.593045991668792, "grad_norm": 0.0, - "learning_rate": 7.4735613364116235e-06, - "loss": 0.8516, + "learning_rate": 7.49923598050298e-06, + "loss": 0.8044, "step": 20928 }, { - "epoch": 0.5938989784335982, + "epoch": 0.5930743291110544, "grad_norm": 0.0, - "learning_rate": 7.472672084513848e-06, - "loss": 0.912, + "learning_rate": 7.49834736351022e-06, + "loss": 0.8356, "step": 20929 }, { - "epoch": 0.593927355278093, + "epoch": 0.5931026665533169, "grad_norm": 0.0, - "learning_rate": 7.471782853964525e-06, - "loss": 0.821, + "learning_rate": 7.4974587675898134e-06, + "loss": 0.7896, "step": 20930 }, { - "epoch": 0.593955732122588, + "epoch": 0.5931310039955794, "grad_norm": 0.0, - "learning_rate": 7.470893644771167e-06, - "loss": 0.7538, + "learning_rate": 7.496570192749235e-06, + "loss": 0.9369, "step": 20931 }, { - "epoch": 0.5939841089670829, + "epoch": 0.5931593414378418, "grad_norm": 0.0, - "learning_rate": 7.4700044569412845e-06, - "loss": 0.8364, + "learning_rate": 7.4956816389959775e-06, + "loss": 0.7812, "step": 20932 }, { - "epoch": 0.5940124858115777, + "epoch": 0.5931876788801043, "grad_norm": 0.0, - "learning_rate": 7.469115290482385e-06, - "loss": 0.9086, + "learning_rate": 7.494793106337526e-06, + "loss": 0.9196, "step": 20933 }, { - "epoch": 0.5940408626560727, + "epoch": 0.5932160163223668, "grad_norm": 0.0, - "learning_rate": 7.4682261454019826e-06, - "loss": 0.8193, + "learning_rate": 7.493904594781358e-06, + "loss": 0.8386, "step": 20934 }, { - "epoch": 0.5940692395005676, + "epoch": 0.5932443537646293, "grad_norm": 0.0, - "learning_rate": 7.467337021707588e-06, - "loss": 0.8923, + "learning_rate": 7.493016104334968e-06, + "loss": 0.8703, "step": 20935 }, { - "epoch": 0.5940976163450624, + "epoch": 0.5932726912068916, "grad_norm": 0.0, - "learning_rate": 7.466447919406714e-06, - "loss": 0.9046, + "learning_rate": 7.49212763500583e-06, + "loss": 0.7663, "step": 20936 }, { - "epoch": 0.5941259931895573, + "epoch": 0.5933010286491541, "grad_norm": 0.0, - "learning_rate": 7.465558838506868e-06, - "loss": 0.9825, + "learning_rate": 7.491239186801431e-06, + "loss": 0.9019, "step": 20937 }, { - "epoch": 0.5941543700340522, + "epoch": 0.5933293660914166, "grad_norm": 0.0, - "learning_rate": 7.464669779015559e-06, - "loss": 0.8197, + "learning_rate": 7.490350759729259e-06, + "loss": 0.8859, "step": 20938 }, { - "epoch": 0.5941827468785471, + "epoch": 0.593357703533679, "grad_norm": 0.0, - "learning_rate": 7.463780740940299e-06, - "loss": 0.7193, + "learning_rate": 7.489462353796792e-06, + "loss": 0.8123, "step": 20939 }, { - "epoch": 0.594211123723042, + "epoch": 0.5933860409759415, "grad_norm": 0.0, - "learning_rate": 7.462891724288599e-06, - "loss": 0.8285, + "learning_rate": 7.488573969011521e-06, + "loss": 0.7866, "step": 20940 }, { - "epoch": 0.5942395005675369, + "epoch": 0.593414378418204, "grad_norm": 0.0, - "learning_rate": 7.462002729067963e-06, - "loss": 0.8834, + "learning_rate": 7.48768560538092e-06, + "loss": 0.8318, "step": 20941 }, { - "epoch": 0.5942678774120318, + "epoch": 0.5934427158604665, "grad_norm": 0.0, - "learning_rate": 7.461113755285907e-06, - "loss": 0.7433, + "learning_rate": 7.486797262912475e-06, + "loss": 0.7871, "step": 20942 }, { - "epoch": 0.5942962542565267, + "epoch": 0.5934710533027289, "grad_norm": 0.0, - "learning_rate": 7.460224802949936e-06, - "loss": 0.8969, + "learning_rate": 7.485908941613675e-06, + "loss": 0.9534, "step": 20943 }, { - "epoch": 0.5943246311010215, + "epoch": 0.5934993907449914, "grad_norm": 0.0, - "learning_rate": 7.459335872067559e-06, - "loss": 0.9919, + "learning_rate": 7.485020641491993e-06, + "loss": 0.7682, "step": 20944 }, { - "epoch": 0.5943530079455165, + "epoch": 0.5935277281872539, "grad_norm": 0.0, - "learning_rate": 7.458446962646288e-06, - "loss": 0.8304, + "learning_rate": 7.484132362554915e-06, + "loss": 0.7838, "step": 20945 }, { - "epoch": 0.5943813847900113, + "epoch": 0.5935560656295162, "grad_norm": 0.0, - "learning_rate": 7.457558074693631e-06, - "loss": 0.809, + "learning_rate": 7.483244104809928e-06, + "loss": 0.8253, "step": 20946 }, { - "epoch": 0.5944097616345062, + "epoch": 0.5935844030717787, "grad_norm": 0.0, - "learning_rate": 7.456669208217091e-06, - "loss": 0.8582, + "learning_rate": 7.482355868264508e-06, + "loss": 0.7892, "step": 20947 }, { - "epoch": 0.5944381384790012, + "epoch": 0.5936127405140412, "grad_norm": 0.0, - "learning_rate": 7.455780363224184e-06, - "loss": 0.8869, + "learning_rate": 7.4814676529261435e-06, + "loss": 0.7322, "step": 20948 }, { - "epoch": 0.594466515323496, + "epoch": 0.5936410779563036, "grad_norm": 0.0, - "learning_rate": 7.454891539722411e-06, - "loss": 1.0362, + "learning_rate": 7.4805794588023086e-06, + "loss": 0.86, "step": 20949 }, { - "epoch": 0.5944948921679909, + "epoch": 0.5936694153985661, "grad_norm": 0.0, - "learning_rate": 7.454002737719286e-06, - "loss": 0.8524, + "learning_rate": 7.479691285900487e-06, + "loss": 0.7937, "step": 20950 }, { - "epoch": 0.5945232690124859, + "epoch": 0.5936977528408286, "grad_norm": 0.0, - "learning_rate": 7.4531139572223135e-06, - "loss": 0.8473, + "learning_rate": 7.4788031342281644e-06, + "loss": 0.7714, "step": 20951 }, { - "epoch": 0.5945516458569807, + "epoch": 0.5937260902830911, "grad_norm": 0.0, - "learning_rate": 7.452225198239004e-06, - "loss": 0.8952, + "learning_rate": 7.477915003792817e-06, + "loss": 0.8015, "step": 20952 }, { - "epoch": 0.5945800227014756, + "epoch": 0.5937544277253535, "grad_norm": 0.0, - "learning_rate": 7.451336460776861e-06, - "loss": 0.8352, + "learning_rate": 7.477026894601929e-06, + "loss": 0.7916, "step": 20953 }, { - "epoch": 0.5946083995459704, + "epoch": 0.593782765167616, "grad_norm": 0.0, - "learning_rate": 7.450447744843394e-06, - "loss": 0.8347, + "learning_rate": 7.476138806662983e-06, + "loss": 0.8138, "step": 20954 }, { - "epoch": 0.5946367763904654, + "epoch": 0.5938111026098785, "grad_norm": 0.0, - "learning_rate": 7.44955905044611e-06, - "loss": 0.7711, + "learning_rate": 7.475250739983454e-06, + "loss": 0.8341, "step": 20955 }, { - "epoch": 0.5946651532349603, + "epoch": 0.5938394400521408, "grad_norm": 0.0, - "learning_rate": 7.448670377592515e-06, - "loss": 0.8222, + "learning_rate": 7.4743626945708294e-06, + "loss": 0.8561, "step": 20956 }, { - "epoch": 0.5946935300794551, + "epoch": 0.5938677774944033, "grad_norm": 0.0, - "learning_rate": 7.447781726290116e-06, - "loss": 0.8159, + "learning_rate": 7.473474670432581e-06, + "loss": 0.878, "step": 20957 }, { - "epoch": 0.5947219069239501, + "epoch": 0.5938961149366658, "grad_norm": 0.0, - "learning_rate": 7.4468930965464194e-06, - "loss": 0.8575, + "learning_rate": 7.472586667576194e-06, + "loss": 0.8846, "step": 20958 }, { - "epoch": 0.594750283768445, + "epoch": 0.5939244523789283, "grad_norm": 0.0, - "learning_rate": 7.446004488368933e-06, - "loss": 0.8314, + "learning_rate": 7.47169868600915e-06, + "loss": 0.96, "step": 20959 }, { - "epoch": 0.5947786606129398, + "epoch": 0.5939527898211907, "grad_norm": 0.0, - "learning_rate": 7.4451159017651605e-06, - "loss": 0.8713, + "learning_rate": 7.4708107257389265e-06, + "loss": 0.8298, "step": 20960 }, { - "epoch": 0.5948070374574347, + "epoch": 0.5939811272634532, "grad_norm": 0.0, - "learning_rate": 7.444227336742608e-06, - "loss": 0.9538, + "learning_rate": 7.469922786773e-06, + "loss": 0.876, "step": 20961 }, { - "epoch": 0.5948354143019297, + "epoch": 0.5940094647057157, "grad_norm": 0.0, - "learning_rate": 7.443338793308783e-06, - "loss": 0.8707, + "learning_rate": 7.469034869118861e-06, + "loss": 0.9106, "step": 20962 }, { - "epoch": 0.5948637911464245, + "epoch": 0.5940378021479781, "grad_norm": 0.0, - "learning_rate": 7.442450271471191e-06, - "loss": 0.887, + "learning_rate": 7.468146972783976e-06, + "loss": 0.855, "step": 20963 }, { - "epoch": 0.5948921679909194, + "epoch": 0.5940661395902406, "grad_norm": 0.0, - "learning_rate": 7.441561771237333e-06, - "loss": 0.8658, + "learning_rate": 7.4672590977758295e-06, + "loss": 0.8038, "step": 20964 }, { - "epoch": 0.5949205448354143, + "epoch": 0.5940944770325031, "grad_norm": 0.0, - "learning_rate": 7.440673292614717e-06, - "loss": 0.9253, + "learning_rate": 7.4663712441019e-06, + "loss": 0.8936, "step": 20965 }, { - "epoch": 0.5949489216799092, + "epoch": 0.5941228144747656, "grad_norm": 0.0, - "learning_rate": 7.439784835610853e-06, - "loss": 0.9403, + "learning_rate": 7.465483411769665e-06, + "loss": 0.7889, "step": 20966 }, { - "epoch": 0.5949772985244041, + "epoch": 0.5941511519170279, "grad_norm": 0.0, - "learning_rate": 7.4388964002332395e-06, - "loss": 0.8617, + "learning_rate": 7.4645956007866105e-06, + "loss": 0.9669, "step": 20967 }, { - "epoch": 0.5950056753688989, + "epoch": 0.5941794893592904, "grad_norm": 0.0, - "learning_rate": 7.438007986489384e-06, - "loss": 0.9445, + "learning_rate": 7.4637078111602034e-06, + "loss": 0.9264, "step": 20968 }, { - "epoch": 0.5950340522133939, + "epoch": 0.5942078268015529, "grad_norm": 0.0, - "learning_rate": 7.43711959438679e-06, - "loss": 0.7961, + "learning_rate": 7.462820042897932e-06, + "loss": 0.7775, "step": 20969 }, { - "epoch": 0.5950624290578888, + "epoch": 0.5942361642438153, "grad_norm": 0.0, - "learning_rate": 7.436231223932964e-06, - "loss": 0.9029, + "learning_rate": 7.461932296007264e-06, + "loss": 0.8229, "step": 20970 }, { - "epoch": 0.5950908059023836, + "epoch": 0.5942645016860778, "grad_norm": 0.0, - "learning_rate": 7.435342875135405e-06, - "loss": 0.894, + "learning_rate": 7.461044570495684e-06, + "loss": 0.9145, "step": 20971 }, { - "epoch": 0.5951191827468786, + "epoch": 0.5942928391283403, "grad_norm": 0.0, - "learning_rate": 7.434454548001622e-06, - "loss": 0.8247, + "learning_rate": 7.4601568663706694e-06, + "loss": 0.9325, "step": 20972 }, { - "epoch": 0.5951475595913734, + "epoch": 0.5943211765706027, "grad_norm": 0.0, - "learning_rate": 7.433566242539117e-06, - "loss": 0.9065, + "learning_rate": 7.459269183639695e-06, + "loss": 0.6732, "step": 20973 }, { - "epoch": 0.5951759364358683, + "epoch": 0.5943495140128652, "grad_norm": 0.0, - "learning_rate": 7.432677958755391e-06, - "loss": 0.8362, + "learning_rate": 7.4583815223102395e-06, + "loss": 0.7182, "step": 20974 }, { - "epoch": 0.5952043132803633, + "epoch": 0.5943778514551277, "grad_norm": 0.0, - "learning_rate": 7.431789696657952e-06, - "loss": 0.8802, + "learning_rate": 7.457493882389786e-06, + "loss": 0.8335, "step": 20975 }, { - "epoch": 0.5952326901248581, + "epoch": 0.5944061888973902, "grad_norm": 0.0, - "learning_rate": 7.4309014562543e-06, - "loss": 0.8882, + "learning_rate": 7.456606263885799e-06, + "loss": 0.7996, "step": 20976 }, { - "epoch": 0.595261066969353, + "epoch": 0.5944345263396525, "grad_norm": 0.0, - "learning_rate": 7.430013237551938e-06, - "loss": 0.7493, + "learning_rate": 7.455718666805766e-06, + "loss": 0.8472, "step": 20977 }, { - "epoch": 0.5952894438138479, + "epoch": 0.594462863781915, "grad_norm": 0.0, - "learning_rate": 7.429125040558372e-06, - "loss": 0.844, + "learning_rate": 7.454831091157156e-06, + "loss": 0.882, "step": 20978 }, { - "epoch": 0.5953178206583428, + "epoch": 0.5944912012241775, "grad_norm": 0.0, - "learning_rate": 7.428236865281102e-06, - "loss": 0.6879, + "learning_rate": 7.45394353694745e-06, + "loss": 0.8934, "step": 20979 }, { - "epoch": 0.5953461975028377, + "epoch": 0.5945195386664399, "grad_norm": 0.0, - "learning_rate": 7.427348711727627e-06, - "loss": 0.7707, + "learning_rate": 7.453056004184127e-06, + "loss": 0.8558, "step": 20980 }, { - "epoch": 0.5953745743473325, + "epoch": 0.5945478761087024, "grad_norm": 0.0, - "learning_rate": 7.4264605799054556e-06, - "loss": 0.9234, + "learning_rate": 7.452168492874654e-06, + "loss": 0.8139, "step": 20981 }, { - "epoch": 0.5954029511918275, + "epoch": 0.5945762135509649, "grad_norm": 0.0, - "learning_rate": 7.425572469822087e-06, - "loss": 0.91, + "learning_rate": 7.451281003026514e-06, + "loss": 0.8821, "step": 20982 }, { - "epoch": 0.5954313280363224, + "epoch": 0.5946045509932274, "grad_norm": 0.0, - "learning_rate": 7.424684381485023e-06, - "loss": 0.8552, + "learning_rate": 7.450393534647183e-06, + "loss": 0.9099, "step": 20983 }, { - "epoch": 0.5954597048808172, + "epoch": 0.5946328884354898, "grad_norm": 0.0, - "learning_rate": 7.423796314901769e-06, - "loss": 0.8568, + "learning_rate": 7.44950608774413e-06, + "loss": 0.9648, "step": 20984 }, { - "epoch": 0.5954880817253121, + "epoch": 0.5946612258777523, "grad_norm": 0.0, - "learning_rate": 7.4229082700798196e-06, - "loss": 0.8685, + "learning_rate": 7.448618662324836e-06, + "loss": 0.8512, "step": 20985 }, { - "epoch": 0.5955164585698071, + "epoch": 0.5946895633200148, "grad_norm": 0.0, - "learning_rate": 7.422020247026682e-06, - "loss": 0.8534, + "learning_rate": 7.4477312583967735e-06, + "loss": 0.837, "step": 20986 }, { - "epoch": 0.5955448354143019, + "epoch": 0.5947179007622772, "grad_norm": 0.0, - "learning_rate": 7.421132245749856e-06, - "loss": 0.9041, + "learning_rate": 7.446843875967418e-06, + "loss": 0.8615, "step": 20987 }, { - "epoch": 0.5955732122587968, + "epoch": 0.5947462382045396, "grad_norm": 0.0, - "learning_rate": 7.42024426625684e-06, - "loss": 0.9379, + "learning_rate": 7.4459565150442484e-06, + "loss": 0.7519, "step": 20988 }, { - "epoch": 0.5956015891032918, + "epoch": 0.5947745756468021, "grad_norm": 0.0, - "learning_rate": 7.419356308555137e-06, - "loss": 0.7881, + "learning_rate": 7.4450691756347315e-06, + "loss": 0.8659, "step": 20989 }, { - "epoch": 0.5956299659477866, + "epoch": 0.5948029130890646, "grad_norm": 0.0, - "learning_rate": 7.418468372652249e-06, - "loss": 0.8698, + "learning_rate": 7.444181857746344e-06, + "loss": 0.9791, "step": 20990 }, { - "epoch": 0.5956583427922815, + "epoch": 0.594831250531327, "grad_norm": 0.0, - "learning_rate": 7.417580458555671e-06, - "loss": 0.8489, + "learning_rate": 7.443294561386567e-06, + "loss": 0.952, "step": 20991 }, { - "epoch": 0.5956867196367764, + "epoch": 0.5948595879735895, "grad_norm": 0.0, - "learning_rate": 7.416692566272911e-06, - "loss": 0.901, + "learning_rate": 7.442407286562865e-06, + "loss": 0.8992, "step": 20992 }, { - "epoch": 0.5957150964812713, + "epoch": 0.594887925415852, "grad_norm": 0.0, - "learning_rate": 7.4158046958114635e-06, - "loss": 0.8755, + "learning_rate": 7.441520033282721e-06, + "loss": 0.8627, "step": 20993 }, { - "epoch": 0.5957434733257662, + "epoch": 0.5949162628581144, "grad_norm": 0.0, - "learning_rate": 7.414916847178828e-06, - "loss": 0.9541, + "learning_rate": 7.4406328015536e-06, + "loss": 0.8622, "step": 20994 }, { - "epoch": 0.595771850170261, + "epoch": 0.5949446003003769, "grad_norm": 0.0, - "learning_rate": 7.414029020382505e-06, - "loss": 0.8213, + "learning_rate": 7.439745591382978e-06, + "loss": 0.995, "step": 20995 }, { - "epoch": 0.595800227014756, + "epoch": 0.5949729377426394, "grad_norm": 0.0, - "learning_rate": 7.413141215429998e-06, - "loss": 0.8539, + "learning_rate": 7.438858402778336e-06, + "loss": 0.9557, "step": 20996 }, { - "epoch": 0.5958286038592508, + "epoch": 0.5950012751849018, "grad_norm": 0.0, - "learning_rate": 7.4122534323288044e-06, - "loss": 0.914, + "learning_rate": 7.437971235747135e-06, + "loss": 0.9544, "step": 20997 }, { - "epoch": 0.5958569807037457, + "epoch": 0.5950296126271643, "grad_norm": 0.0, - "learning_rate": 7.41136567108642e-06, - "loss": 0.8411, + "learning_rate": 7.437084090296856e-06, + "loss": 0.8012, "step": 20998 }, { - "epoch": 0.5958853575482407, + "epoch": 0.5950579500694267, "grad_norm": 0.0, - "learning_rate": 7.410477931710348e-06, - "loss": 0.8358, + "learning_rate": 7.436196966434968e-06, + "loss": 0.7935, "step": 20999 }, { - "epoch": 0.5959137343927355, + "epoch": 0.5950862875116892, "grad_norm": 0.0, - "learning_rate": 7.409590214208087e-06, - "loss": 0.8481, + "learning_rate": 7.435309864168945e-06, + "loss": 0.9178, "step": 21000 }, { - "epoch": 0.5959421112372304, + "epoch": 0.5951146249539516, "grad_norm": 0.0, - "learning_rate": 7.408702518587132e-06, - "loss": 0.8689, + "learning_rate": 7.434422783506264e-06, + "loss": 0.9324, "step": 21001 }, { - "epoch": 0.5959704880817253, + "epoch": 0.5951429623962141, "grad_norm": 0.0, - "learning_rate": 7.407814844854981e-06, - "loss": 0.8039, + "learning_rate": 7.433535724454386e-06, + "loss": 0.9334, "step": 21002 }, { - "epoch": 0.5959988649262202, + "epoch": 0.5951712998384766, "grad_norm": 0.0, - "learning_rate": 7.406927193019138e-06, - "loss": 0.9202, + "learning_rate": 7.432648687020791e-06, + "loss": 0.8958, "step": 21003 }, { - "epoch": 0.5960272417707151, + "epoch": 0.595199637280739, "grad_norm": 0.0, - "learning_rate": 7.4060395630870965e-06, - "loss": 0.9106, + "learning_rate": 7.431761671212952e-06, + "loss": 0.8418, "step": 21004 }, { - "epoch": 0.59605561861521, + "epoch": 0.5952279747230015, "grad_norm": 0.0, - "learning_rate": 7.405151955066353e-06, - "loss": 0.8987, + "learning_rate": 7.430874677038336e-06, + "loss": 0.9499, "step": 21005 }, { - "epoch": 0.5960839954597049, + "epoch": 0.595256312165264, "grad_norm": 0.0, - "learning_rate": 7.404264368964411e-06, - "loss": 0.8525, + "learning_rate": 7.42998770450442e-06, + "loss": 0.8568, "step": 21006 }, { - "epoch": 0.5961123723041998, + "epoch": 0.5952846496075265, "grad_norm": 0.0, - "learning_rate": 7.403376804788764e-06, - "loss": 0.8427, + "learning_rate": 7.429100753618668e-06, + "loss": 0.8832, "step": 21007 }, { - "epoch": 0.5961407491486946, + "epoch": 0.5953129870497889, "grad_norm": 0.0, - "learning_rate": 7.402489262546908e-06, - "loss": 0.9354, + "learning_rate": 7.428213824388556e-06, + "loss": 0.8765, "step": 21008 }, { - "epoch": 0.5961691259931896, + "epoch": 0.5953413244920513, "grad_norm": 0.0, - "learning_rate": 7.401601742246343e-06, - "loss": 0.8809, + "learning_rate": 7.427326916821557e-06, + "loss": 0.8841, "step": 21009 }, { - "epoch": 0.5961975028376845, + "epoch": 0.5953696619343138, "grad_norm": 0.0, - "learning_rate": 7.400714243894565e-06, - "loss": 0.799, + "learning_rate": 7.426440030925135e-06, + "loss": 0.8543, "step": 21010 }, { - "epoch": 0.5962258796821793, + "epoch": 0.5953979993765762, "grad_norm": 0.0, - "learning_rate": 7.399826767499068e-06, - "loss": 0.8955, + "learning_rate": 7.4255531667067645e-06, + "loss": 0.7916, "step": 21011 }, { - "epoch": 0.5962542565266742, + "epoch": 0.5954263368188387, "grad_norm": 0.0, - "learning_rate": 7.398939313067353e-06, - "loss": 0.8341, + "learning_rate": 7.424666324173917e-06, + "loss": 0.8011, "step": 21012 }, { - "epoch": 0.5962826333711692, + "epoch": 0.5954546742611012, "grad_norm": 0.0, - "learning_rate": 7.3980518806069155e-06, - "loss": 0.9062, + "learning_rate": 7.423779503334061e-06, + "loss": 0.9041, "step": 21013 }, { - "epoch": 0.596311010215664, + "epoch": 0.5954830117033637, "grad_norm": 0.0, - "learning_rate": 7.397164470125251e-06, - "loss": 0.8022, + "learning_rate": 7.422892704194669e-06, + "loss": 0.8391, "step": 21014 }, { - "epoch": 0.5963393870601589, + "epoch": 0.5955113491456261, "grad_norm": 0.0, - "learning_rate": 7.396277081629852e-06, - "loss": 0.9981, + "learning_rate": 7.422005926763205e-06, + "loss": 0.8185, "step": 21015 }, { - "epoch": 0.5963677639046538, + "epoch": 0.5955396865878886, "grad_norm": 0.0, - "learning_rate": 7.395389715128223e-06, - "loss": 0.8552, + "learning_rate": 7.421119171047144e-06, + "loss": 0.9247, "step": 21016 }, { - "epoch": 0.5963961407491487, + "epoch": 0.5955680240301511, "grad_norm": 0.0, - "learning_rate": 7.394502370627852e-06, - "loss": 0.8787, + "learning_rate": 7.420232437053954e-06, + "loss": 0.9392, "step": 21017 }, { - "epoch": 0.5964245175936436, + "epoch": 0.5955963614724135, "grad_norm": 0.0, - "learning_rate": 7.393615048136234e-06, - "loss": 0.9266, + "learning_rate": 7.419345724791103e-06, + "loss": 0.9253, "step": 21018 }, { - "epoch": 0.5964528944381384, + "epoch": 0.595624698914676, "grad_norm": 0.0, - "learning_rate": 7.392727747660869e-06, - "loss": 0.9401, + "learning_rate": 7.418459034266061e-06, + "loss": 0.8521, "step": 21019 }, { - "epoch": 0.5964812712826334, + "epoch": 0.5956530363569384, "grad_norm": 0.0, - "learning_rate": 7.3918404692092504e-06, - "loss": 0.8739, + "learning_rate": 7.4175723654863015e-06, + "loss": 0.8316, "step": 21020 }, { - "epoch": 0.5965096481271283, + "epoch": 0.5956813737992008, "grad_norm": 0.0, - "learning_rate": 7.390953212788872e-06, - "loss": 0.8987, + "learning_rate": 7.416685718459285e-06, + "loss": 0.8973, "step": 21021 }, { - "epoch": 0.5965380249716231, + "epoch": 0.5957097112414633, "grad_norm": 0.0, - "learning_rate": 7.390065978407228e-06, - "loss": 0.8746, + "learning_rate": 7.4157990931924884e-06, + "loss": 0.8822, "step": 21022 }, { - "epoch": 0.5965664018161181, + "epoch": 0.5957380486837258, "grad_norm": 0.0, - "learning_rate": 7.389178766071816e-06, - "loss": 0.9202, + "learning_rate": 7.414912489693371e-06, + "loss": 0.7479, "step": 21023 }, { - "epoch": 0.596594778660613, + "epoch": 0.5957663861259883, "grad_norm": 0.0, - "learning_rate": 7.388291575790127e-06, - "loss": 0.8384, + "learning_rate": 7.414025907969404e-06, + "loss": 0.7661, "step": 21024 }, { - "epoch": 0.5966231555051078, + "epoch": 0.5957947235682507, "grad_norm": 0.0, - "learning_rate": 7.387404407569657e-06, - "loss": 0.8267, + "learning_rate": 7.4131393480280624e-06, + "loss": 0.9715, "step": 21025 }, { - "epoch": 0.5966515323496028, + "epoch": 0.5958230610105132, "grad_norm": 0.0, - "learning_rate": 7.386517261417896e-06, - "loss": 0.8141, + "learning_rate": 7.412252809876804e-06, + "loss": 0.8095, "step": 21026 }, { - "epoch": 0.5966799091940976, + "epoch": 0.5958513984527757, "grad_norm": 0.0, - "learning_rate": 7.385630137342346e-06, - "loss": 0.7858, + "learning_rate": 7.411366293523107e-06, + "loss": 0.8062, "step": 21027 }, { - "epoch": 0.5967082860385925, + "epoch": 0.5958797358950381, "grad_norm": 0.0, - "learning_rate": 7.384743035350495e-06, - "loss": 0.9184, + "learning_rate": 7.410479798974428e-06, + "loss": 0.8663, "step": 21028 }, { - "epoch": 0.5967366628830874, + "epoch": 0.5959080733373006, "grad_norm": 0.0, - "learning_rate": 7.383855955449835e-06, - "loss": 0.909, + "learning_rate": 7.409593326238239e-06, + "loss": 0.8574, "step": 21029 }, { - "epoch": 0.5967650397275823, + "epoch": 0.595936410779563, "grad_norm": 0.0, - "learning_rate": 7.382968897647862e-06, - "loss": 0.8335, + "learning_rate": 7.408706875322009e-06, + "loss": 0.9246, "step": 21030 }, { - "epoch": 0.5967934165720772, + "epoch": 0.5959647482218255, "grad_norm": 0.0, - "learning_rate": 7.38208186195207e-06, - "loss": 0.7595, + "learning_rate": 7.407820446233203e-06, + "loss": 0.9066, "step": 21031 }, { - "epoch": 0.596821793416572, + "epoch": 0.5959930856640879, "grad_norm": 0.0, - "learning_rate": 7.381194848369948e-06, - "loss": 0.883, + "learning_rate": 7.406934038979286e-06, + "loss": 0.9341, "step": 21032 }, { - "epoch": 0.596850170261067, + "epoch": 0.5960214231063504, "grad_norm": 0.0, - "learning_rate": 7.3803078569089926e-06, - "loss": 0.933, + "learning_rate": 7.406047653567731e-06, + "loss": 0.8574, "step": 21033 }, { - "epoch": 0.5968785471055619, + "epoch": 0.5960497605486129, "grad_norm": 0.0, - "learning_rate": 7.379420887576696e-06, - "loss": 0.9257, + "learning_rate": 7.405161290005998e-06, + "loss": 0.9027, "step": 21034 }, { - "epoch": 0.5969069239500567, + "epoch": 0.5960780979908753, "grad_norm": 0.0, - "learning_rate": 7.378533940380545e-06, - "loss": 0.9138, + "learning_rate": 7.404274948301558e-06, + "loss": 0.8296, "step": 21035 }, { - "epoch": 0.5969353007945516, + "epoch": 0.5961064354331378, "grad_norm": 0.0, - "learning_rate": 7.3776470153280375e-06, - "loss": 0.8559, + "learning_rate": 7.4033886284618675e-06, + "loss": 0.889, "step": 21036 }, { - "epoch": 0.5969636776390466, + "epoch": 0.5961347728754003, "grad_norm": 0.0, - "learning_rate": 7.376760112426665e-06, - "loss": 0.9651, + "learning_rate": 7.402502330494401e-06, + "loss": 0.9236, "step": 21037 }, { - "epoch": 0.5969920544835414, + "epoch": 0.5961631103176628, "grad_norm": 0.0, - "learning_rate": 7.3758732316839155e-06, - "loss": 0.8026, + "learning_rate": 7.401616054406624e-06, + "loss": 1.0203, "step": 21038 }, { - "epoch": 0.5970204313280363, + "epoch": 0.5961914477599252, "grad_norm": 0.0, - "learning_rate": 7.3749863731072845e-06, - "loss": 0.881, + "learning_rate": 7.4007298002059965e-06, + "loss": 0.8587, "step": 21039 }, { - "epoch": 0.5970488081725313, + "epoch": 0.5962197852021877, "grad_norm": 0.0, - "learning_rate": 7.374099536704261e-06, - "loss": 0.8934, + "learning_rate": 7.399843567899988e-06, + "loss": 0.836, "step": 21040 }, { - "epoch": 0.5970771850170261, + "epoch": 0.5962481226444502, "grad_norm": 0.0, - "learning_rate": 7.373212722482334e-06, - "loss": 0.9094, + "learning_rate": 7.398957357496067e-06, + "loss": 0.8788, "step": 21041 }, { - "epoch": 0.597105561861521, + "epoch": 0.5962764600867125, "grad_norm": 0.0, - "learning_rate": 7.372325930449e-06, - "loss": 0.9732, + "learning_rate": 7.39807116900169e-06, + "loss": 0.9363, "step": 21042 }, { - "epoch": 0.5971339387060159, + "epoch": 0.596304797528975, "grad_norm": 0.0, - "learning_rate": 7.371439160611745e-06, - "loss": 0.7283, + "learning_rate": 7.397185002424328e-06, + "loss": 0.8589, "step": 21043 }, { - "epoch": 0.5971623155505108, + "epoch": 0.5963331349712375, "grad_norm": 0.0, - "learning_rate": 7.370552412978065e-06, - "loss": 0.9147, + "learning_rate": 7.39629885777144e-06, + "loss": 0.855, "step": 21044 }, { - "epoch": 0.5971906923950057, + "epoch": 0.5963614724134999, "grad_norm": 0.0, - "learning_rate": 7.369665687555445e-06, - "loss": 0.9657, + "learning_rate": 7.395412735050493e-06, + "loss": 0.9249, "step": 21045 }, { - "epoch": 0.5972190692395005, + "epoch": 0.5963898098557624, "grad_norm": 0.0, - "learning_rate": 7.368778984351377e-06, - "loss": 0.864, + "learning_rate": 7.394526634268958e-06, + "loss": 0.7954, "step": 21046 }, { - "epoch": 0.5972474460839955, + "epoch": 0.5964181472980249, "grad_norm": 0.0, - "learning_rate": 7.367892303373352e-06, - "loss": 0.8335, + "learning_rate": 7.393640555434287e-06, + "loss": 0.9251, "step": 21047 }, { - "epoch": 0.5972758229284904, + "epoch": 0.5964464847402874, "grad_norm": 0.0, - "learning_rate": 7.36700564462886e-06, - "loss": 0.9912, + "learning_rate": 7.392754498553952e-06, + "loss": 0.857, "step": 21048 }, { - "epoch": 0.5973041997729852, + "epoch": 0.5964748221825498, "grad_norm": 0.0, - "learning_rate": 7.366119008125386e-06, - "loss": 0.869, + "learning_rate": 7.391868463635414e-06, + "loss": 0.8441, "step": 21049 }, { - "epoch": 0.5973325766174802, + "epoch": 0.5965031596248123, "grad_norm": 0.0, - "learning_rate": 7.3652323938704275e-06, - "loss": 0.982, + "learning_rate": 7.390982450686134e-06, + "loss": 0.8852, "step": 21050 }, { - "epoch": 0.597360953461975, + "epoch": 0.5965314970670748, "grad_norm": 0.0, - "learning_rate": 7.364345801871468e-06, - "loss": 0.7779, + "learning_rate": 7.390096459713583e-06, + "loss": 0.877, "step": 21051 }, { - "epoch": 0.5973893303064699, + "epoch": 0.5965598345093371, "grad_norm": 0.0, - "learning_rate": 7.363459232135996e-06, - "loss": 1.0058, + "learning_rate": 7.3892104907252136e-06, + "loss": 0.8984, "step": 21052 }, { - "epoch": 0.5974177071509648, + "epoch": 0.5965881719515996, "grad_norm": 0.0, - "learning_rate": 7.362572684671504e-06, - "loss": 0.8296, + "learning_rate": 7.388324543728493e-06, + "loss": 0.7563, "step": 21053 }, { - "epoch": 0.5974460839954597, + "epoch": 0.5966165093938621, "grad_norm": 0.0, - "learning_rate": 7.361686159485479e-06, - "loss": 0.8203, + "learning_rate": 7.387438618730891e-06, + "loss": 0.8265, "step": 21054 }, { - "epoch": 0.5974744608399546, + "epoch": 0.5966448468361246, "grad_norm": 0.0, - "learning_rate": 7.360799656585409e-06, - "loss": 0.7727, + "learning_rate": 7.386552715739857e-06, + "loss": 0.7781, "step": 21055 }, { - "epoch": 0.5975028376844495, + "epoch": 0.596673184278387, "grad_norm": 0.0, - "learning_rate": 7.3599131759787835e-06, - "loss": 0.8243, + "learning_rate": 7.385666834762863e-06, + "loss": 0.8206, "step": 21056 }, { - "epoch": 0.5975312145289444, + "epoch": 0.5967015217206495, "grad_norm": 0.0, - "learning_rate": 7.3590267176730875e-06, - "loss": 0.8835, + "learning_rate": 7.384780975807367e-06, + "loss": 0.9418, "step": 21057 }, { - "epoch": 0.5975595913734393, + "epoch": 0.596729859162912, "grad_norm": 0.0, - "learning_rate": 7.358140281675814e-06, - "loss": 0.8774, + "learning_rate": 7.383895138880833e-06, + "loss": 0.8154, "step": 21058 }, { - "epoch": 0.5975879682179341, + "epoch": 0.5967581966051744, "grad_norm": 0.0, - "learning_rate": 7.3572538679944475e-06, - "loss": 0.9201, + "learning_rate": 7.383009323990723e-06, + "loss": 0.8949, "step": 21059 }, { - "epoch": 0.5976163450624291, + "epoch": 0.5967865340474369, "grad_norm": 0.0, - "learning_rate": 7.356367476636478e-06, - "loss": 0.8416, + "learning_rate": 7.382123531144494e-06, + "loss": 0.9004, "step": 21060 }, { - "epoch": 0.597644721906924, + "epoch": 0.5968148714896994, "grad_norm": 0.0, - "learning_rate": 7.355481107609391e-06, - "loss": 0.7586, + "learning_rate": 7.381237760349611e-06, + "loss": 0.7742, "step": 21061 }, { - "epoch": 0.5976730987514188, + "epoch": 0.5968432089319619, "grad_norm": 0.0, - "learning_rate": 7.354594760920673e-06, - "loss": 0.8672, + "learning_rate": 7.380352011613537e-06, + "loss": 0.8932, "step": 21062 }, { - "epoch": 0.5977014755959137, + "epoch": 0.5968715463742242, "grad_norm": 0.0, - "learning_rate": 7.353708436577813e-06, - "loss": 0.9131, + "learning_rate": 7.379466284943728e-06, + "loss": 0.8681, "step": 21063 }, { - "epoch": 0.5977298524404087, + "epoch": 0.5968998838164867, "grad_norm": 0.0, - "learning_rate": 7.352822134588297e-06, - "loss": 0.8806, + "learning_rate": 7.378580580347652e-06, + "loss": 0.8073, "step": 21064 }, { - "epoch": 0.5977582292849035, + "epoch": 0.5969282212587492, "grad_norm": 0.0, - "learning_rate": 7.351935854959608e-06, - "loss": 0.8114, + "learning_rate": 7.377694897832761e-06, + "loss": 0.8369, "step": 21065 }, { - "epoch": 0.5977866061293984, + "epoch": 0.5969565587010116, "grad_norm": 0.0, - "learning_rate": 7.35104959769924e-06, - "loss": 0.9058, + "learning_rate": 7.3768092374065205e-06, + "loss": 0.8833, "step": 21066 }, { - "epoch": 0.5978149829738933, + "epoch": 0.5969848961432741, "grad_norm": 0.0, - "learning_rate": 7.350163362814675e-06, - "loss": 0.7173, + "learning_rate": 7.375923599076394e-06, + "loss": 0.9163, "step": 21067 }, { - "epoch": 0.5978433598183882, + "epoch": 0.5970132335855366, "grad_norm": 0.0, - "learning_rate": 7.349277150313398e-06, - "loss": 0.8953, + "learning_rate": 7.375037982849833e-06, + "loss": 0.7639, "step": 21068 }, { - "epoch": 0.5978717366628831, + "epoch": 0.597041571027799, "grad_norm": 0.0, - "learning_rate": 7.348390960202896e-06, - "loss": 0.8376, + "learning_rate": 7.3741523887343015e-06, + "loss": 0.8048, "step": 21069 }, { - "epoch": 0.5979001135073779, + "epoch": 0.5970699084700615, "grad_norm": 0.0, - "learning_rate": 7.347504792490654e-06, - "loss": 0.9524, + "learning_rate": 7.373266816737261e-06, + "loss": 0.9137, "step": 21070 }, { - "epoch": 0.5979284903518729, + "epoch": 0.597098245912324, "grad_norm": 0.0, - "learning_rate": 7.346618647184161e-06, - "loss": 0.934, + "learning_rate": 7.372381266866169e-06, + "loss": 0.7991, "step": 21071 }, { - "epoch": 0.5979568671963678, + "epoch": 0.5971265833545865, "grad_norm": 0.0, - "learning_rate": 7.345732524290894e-06, - "loss": 0.8901, + "learning_rate": 7.371495739128488e-06, + "loss": 0.7918, "step": 21072 }, { - "epoch": 0.5979852440408626, + "epoch": 0.5971549207968488, "grad_norm": 0.0, - "learning_rate": 7.3448464238183455e-06, - "loss": 0.8495, + "learning_rate": 7.370610233531671e-06, + "loss": 0.8611, "step": 21073 }, { - "epoch": 0.5980136208853576, + "epoch": 0.5971832582391113, "grad_norm": 0.0, - "learning_rate": 7.343960345774001e-06, - "loss": 0.9805, + "learning_rate": 7.3697247500831805e-06, + "loss": 0.8279, "step": 21074 }, { - "epoch": 0.5980419977298524, + "epoch": 0.5972115956813738, "grad_norm": 0.0, - "learning_rate": 7.343074290165343e-06, - "loss": 0.949, + "learning_rate": 7.368839288790477e-06, + "loss": 0.928, "step": 21075 }, { - "epoch": 0.5980703745743473, + "epoch": 0.5972399331236362, "grad_norm": 0.0, - "learning_rate": 7.342188256999853e-06, - "loss": 0.7699, + "learning_rate": 7.3679538496610146e-06, + "loss": 0.91, "step": 21076 }, { - "epoch": 0.5980987514188423, + "epoch": 0.5972682705658987, "grad_norm": 0.0, - "learning_rate": 7.341302246285022e-06, - "loss": 0.8001, + "learning_rate": 7.3670684327022555e-06, + "loss": 0.8716, "step": 21077 }, { - "epoch": 0.5981271282633371, + "epoch": 0.5972966080081612, "grad_norm": 0.0, - "learning_rate": 7.3404162580283285e-06, - "loss": 0.8087, + "learning_rate": 7.366183037921659e-06, + "loss": 0.8531, "step": 21078 }, { - "epoch": 0.598155505107832, + "epoch": 0.5973249454504237, "grad_norm": 0.0, - "learning_rate": 7.339530292237258e-06, - "loss": 0.8258, + "learning_rate": 7.3652976653266785e-06, + "loss": 0.867, "step": 21079 }, { - "epoch": 0.5981838819523269, + "epoch": 0.5973532828926861, "grad_norm": 0.0, - "learning_rate": 7.338644348919296e-06, - "loss": 0.856, + "learning_rate": 7.3644123149247784e-06, + "loss": 0.916, "step": 21080 }, { - "epoch": 0.5982122587968218, + "epoch": 0.5973816203349486, "grad_norm": 0.0, - "learning_rate": 7.337758428081925e-06, - "loss": 0.7743, + "learning_rate": 7.363526986723406e-06, + "loss": 0.8421, "step": 21081 }, { - "epoch": 0.5982406356413167, + "epoch": 0.5974099577772111, "grad_norm": 0.0, - "learning_rate": 7.336872529732626e-06, - "loss": 1.0128, + "learning_rate": 7.362641680730027e-06, + "loss": 0.8211, "step": 21082 }, { - "epoch": 0.5982690124858115, + "epoch": 0.5974382952194734, "grad_norm": 0.0, - "learning_rate": 7.335986653878887e-06, - "loss": 0.94, + "learning_rate": 7.361756396952097e-06, + "loss": 0.861, "step": 21083 }, { - "epoch": 0.5982973893303065, + "epoch": 0.5974666326617359, "grad_norm": 0.0, - "learning_rate": 7.3351008005281875e-06, - "loss": 0.766, + "learning_rate": 7.360871135397072e-06, + "loss": 0.8562, "step": 21084 }, { - "epoch": 0.5983257661748014, + "epoch": 0.5974949701039984, "grad_norm": 0.0, - "learning_rate": 7.3342149696880106e-06, - "loss": 0.9022, + "learning_rate": 7.359985896072412e-06, + "loss": 0.8366, "step": 21085 }, { - "epoch": 0.5983541430192962, + "epoch": 0.5975233075462609, "grad_norm": 0.0, - "learning_rate": 7.333329161365841e-06, - "loss": 0.7765, + "learning_rate": 7.359100678985568e-06, + "loss": 0.8056, "step": 21086 }, { - "epoch": 0.5983825198637911, + "epoch": 0.5975516449885233, "grad_norm": 0.0, - "learning_rate": 7.332443375569156e-06, - "loss": 0.7822, + "learning_rate": 7.358215484144e-06, + "loss": 0.8159, "step": 21087 }, { - "epoch": 0.5984108967082861, + "epoch": 0.5975799824307858, "grad_norm": 0.0, - "learning_rate": 7.331557612305445e-06, - "loss": 0.8618, + "learning_rate": 7.357330311555164e-06, + "loss": 0.8441, "step": 21088 }, { - "epoch": 0.5984392735527809, + "epoch": 0.5976083198730483, "grad_norm": 0.0, - "learning_rate": 7.330671871582188e-06, - "loss": 0.7782, + "learning_rate": 7.356445161226516e-06, + "loss": 0.7843, "step": 21089 }, { - "epoch": 0.5984676503972758, + "epoch": 0.5976366573153107, "grad_norm": 0.0, - "learning_rate": 7.3297861534068636e-06, - "loss": 0.7588, + "learning_rate": 7.355560033165512e-06, + "loss": 0.8783, "step": 21090 }, { - "epoch": 0.5984960272417708, + "epoch": 0.5976649947575732, "grad_norm": 0.0, - "learning_rate": 7.328900457786956e-06, - "loss": 0.7634, + "learning_rate": 7.354674927379612e-06, + "loss": 0.8549, "step": 21091 }, { - "epoch": 0.5985244040862656, + "epoch": 0.5976933321998357, "grad_norm": 0.0, - "learning_rate": 7.328014784729949e-06, - "loss": 0.8538, + "learning_rate": 7.353789843876263e-06, + "loss": 0.9441, "step": 21092 }, { - "epoch": 0.5985527809307605, + "epoch": 0.597721669642098, "grad_norm": 0.0, - "learning_rate": 7.327129134243317e-06, - "loss": 0.9666, + "learning_rate": 7.352904782662927e-06, + "loss": 0.9156, "step": 21093 }, { - "epoch": 0.5985811577752554, + "epoch": 0.5977500070843605, "grad_norm": 0.0, - "learning_rate": 7.326243506334549e-06, - "loss": 0.8461, + "learning_rate": 7.352019743747055e-06, + "loss": 0.8582, "step": 21094 }, { - "epoch": 0.5986095346197503, + "epoch": 0.597778344526623, "grad_norm": 0.0, - "learning_rate": 7.3253579010111205e-06, - "loss": 0.8222, + "learning_rate": 7.351134727136105e-06, + "loss": 0.7925, "step": 21095 }, { - "epoch": 0.5986379114642452, + "epoch": 0.5978066819688855, "grad_norm": 0.0, - "learning_rate": 7.324472318280513e-06, - "loss": 0.8793, + "learning_rate": 7.35024973283753e-06, + "loss": 0.798, "step": 21096 }, { - "epoch": 0.59866628830874, + "epoch": 0.5978350194111479, "grad_norm": 0.0, - "learning_rate": 7.3235867581502095e-06, - "loss": 0.8083, + "learning_rate": 7.349364760858785e-06, + "loss": 0.8068, "step": 21097 }, { - "epoch": 0.598694665153235, + "epoch": 0.5978633568534104, "grad_norm": 0.0, - "learning_rate": 7.32270122062769e-06, - "loss": 0.9134, + "learning_rate": 7.3484798112073255e-06, + "loss": 0.8895, "step": 21098 }, { - "epoch": 0.5987230419977299, + "epoch": 0.5978916942956729, "grad_norm": 0.0, - "learning_rate": 7.321815705720431e-06, - "loss": 0.91, + "learning_rate": 7.347594883890608e-06, + "loss": 0.8729, "step": 21099 }, { - "epoch": 0.5987514188422247, + "epoch": 0.5979200317379353, "grad_norm": 0.0, - "learning_rate": 7.320930213435917e-06, - "loss": 0.8846, + "learning_rate": 7.34670997891608e-06, + "loss": 0.972, "step": 21100 }, { - "epoch": 0.5987797956867197, + "epoch": 0.5979483691801978, "grad_norm": 0.0, - "learning_rate": 7.320044743781627e-06, - "loss": 0.7945, + "learning_rate": 7.345825096291201e-06, + "loss": 0.8468, "step": 21101 }, { - "epoch": 0.5988081725312145, + "epoch": 0.5979767066224603, "grad_norm": 0.0, - "learning_rate": 7.319159296765036e-06, - "loss": 0.7524, + "learning_rate": 7.344940236023421e-06, + "loss": 0.8296, "step": 21102 }, { - "epoch": 0.5988365493757094, + "epoch": 0.5980050440647228, "grad_norm": 0.0, - "learning_rate": 7.3182738723936255e-06, - "loss": 0.7951, + "learning_rate": 7.3440553981201956e-06, + "loss": 0.9029, "step": 21103 }, { - "epoch": 0.5988649262202043, + "epoch": 0.5980333815069852, "grad_norm": 0.0, - "learning_rate": 7.31738847067488e-06, - "loss": 0.782, + "learning_rate": 7.343170582588981e-06, + "loss": 0.8564, "step": 21104 }, { - "epoch": 0.5988933030646992, + "epoch": 0.5980617189492476, "grad_norm": 0.0, - "learning_rate": 7.3165030916162745e-06, - "loss": 0.8727, + "learning_rate": 7.342285789437225e-06, + "loss": 0.8222, "step": 21105 }, { - "epoch": 0.5989216799091941, + "epoch": 0.5980900563915101, "grad_norm": 0.0, - "learning_rate": 7.315617735225287e-06, - "loss": 0.8523, + "learning_rate": 7.3414010186723805e-06, + "loss": 0.8487, "step": 21106 }, { - "epoch": 0.598950056753689, + "epoch": 0.5981183938337725, "grad_norm": 0.0, - "learning_rate": 7.314732401509399e-06, - "loss": 0.8695, + "learning_rate": 7.340516270301908e-06, + "loss": 0.8793, "step": 21107 }, { - "epoch": 0.5989784335981839, + "epoch": 0.598146731276035, "grad_norm": 0.0, - "learning_rate": 7.313847090476086e-06, - "loss": 0.8361, + "learning_rate": 7.33963154433325e-06, + "loss": 0.7911, "step": 21108 }, { - "epoch": 0.5990068104426788, + "epoch": 0.5981750687182975, "grad_norm": 0.0, - "learning_rate": 7.3129618021328286e-06, - "loss": 0.7537, + "learning_rate": 7.338746840773866e-06, + "loss": 0.8086, "step": 21109 }, { - "epoch": 0.5990351872871736, + "epoch": 0.59820340616056, "grad_norm": 0.0, - "learning_rate": 7.3120765364871e-06, - "loss": 0.8471, + "learning_rate": 7.337862159631203e-06, + "loss": 0.8718, "step": 21110 }, { - "epoch": 0.5990635641316686, + "epoch": 0.5982317436028224, "grad_norm": 0.0, - "learning_rate": 7.3111912935463854e-06, - "loss": 0.9148, + "learning_rate": 7.336977500912716e-06, + "loss": 0.8755, "step": 21111 }, { - "epoch": 0.5990919409761635, + "epoch": 0.5982600810450849, "grad_norm": 0.0, - "learning_rate": 7.310306073318159e-06, - "loss": 0.8537, + "learning_rate": 7.3360928646258586e-06, + "loss": 0.8441, "step": 21112 }, { - "epoch": 0.5991203178206583, + "epoch": 0.5982884184873474, "grad_norm": 0.0, - "learning_rate": 7.309420875809895e-06, - "loss": 0.8574, + "learning_rate": 7.335208250778078e-06, + "loss": 0.8482, "step": 21113 }, { - "epoch": 0.5991486946651532, + "epoch": 0.5983167559296098, "grad_norm": 0.0, - "learning_rate": 7.3085357010290755e-06, - "loss": 0.9522, + "learning_rate": 7.3343236593768295e-06, + "loss": 0.7911, "step": 21114 }, { - "epoch": 0.5991770715096482, + "epoch": 0.5983450933718722, "grad_norm": 0.0, - "learning_rate": 7.3076505489831775e-06, - "loss": 0.9428, + "learning_rate": 7.333439090429562e-06, + "loss": 0.7921, "step": 21115 }, { - "epoch": 0.599205448354143, + "epoch": 0.5983734308141347, "grad_norm": 0.0, - "learning_rate": 7.306765419679674e-06, - "loss": 0.8979, + "learning_rate": 7.332554543943725e-06, + "loss": 0.8876, "step": 21116 }, { - "epoch": 0.5992338251986379, + "epoch": 0.5984017682563971, "grad_norm": 0.0, - "learning_rate": 7.305880313126045e-06, - "loss": 0.8057, + "learning_rate": 7.331670019926778e-06, + "loss": 0.8469, "step": 21117 }, { - "epoch": 0.5992622020431329, + "epoch": 0.5984301056986596, "grad_norm": 0.0, - "learning_rate": 7.3049952293297625e-06, - "loss": 0.8787, + "learning_rate": 7.3307855183861585e-06, + "loss": 0.8794, "step": 21118 }, { - "epoch": 0.5992905788876277, + "epoch": 0.5984584431409221, "grad_norm": 0.0, - "learning_rate": 7.30411016829831e-06, - "loss": 0.7379, + "learning_rate": 7.3299010393293255e-06, + "loss": 0.8539, "step": 21119 }, { - "epoch": 0.5993189557321226, + "epoch": 0.5984867805831846, "grad_norm": 0.0, - "learning_rate": 7.30322513003916e-06, - "loss": 0.8953, + "learning_rate": 7.3290165827637305e-06, + "loss": 0.8337, "step": 21120 }, { - "epoch": 0.5993473325766174, + "epoch": 0.598515118025447, "grad_norm": 0.0, - "learning_rate": 7.302340114559788e-06, - "loss": 0.8508, + "learning_rate": 7.328132148696818e-06, + "loss": 0.9172, "step": 21121 }, { - "epoch": 0.5993757094211124, + "epoch": 0.5985434554677095, "grad_norm": 0.0, - "learning_rate": 7.301455121867672e-06, - "loss": 0.7212, + "learning_rate": 7.327247737136042e-06, + "loss": 0.8107, "step": 21122 }, { - "epoch": 0.5994040862656073, + "epoch": 0.598571792909972, "grad_norm": 0.0, - "learning_rate": 7.300570151970283e-06, - "loss": 0.9375, + "learning_rate": 7.326363348088848e-06, + "loss": 0.9212, "step": 21123 }, { - "epoch": 0.5994324631101021, + "epoch": 0.5986001303522344, "grad_norm": 0.0, - "learning_rate": 7.299685204875102e-06, - "loss": 0.7833, + "learning_rate": 7.325478981562689e-06, + "loss": 0.8275, "step": 21124 }, { - "epoch": 0.5994608399545971, + "epoch": 0.5986284677944969, "grad_norm": 0.0, - "learning_rate": 7.2988002805896015e-06, - "loss": 0.8205, + "learning_rate": 7.324594637565019e-06, + "loss": 0.8805, "step": 21125 }, { - "epoch": 0.599489216799092, + "epoch": 0.5986568052367593, "grad_norm": 0.0, - "learning_rate": 7.2979153791212545e-06, - "loss": 0.8802, + "learning_rate": 7.323710316103277e-06, + "loss": 0.7664, "step": 21126 }, { - "epoch": 0.5995175936435868, + "epoch": 0.5986851426790218, "grad_norm": 0.0, - "learning_rate": 7.297030500477538e-06, - "loss": 0.8168, + "learning_rate": 7.322826017184915e-06, + "loss": 0.9555, "step": 21127 }, { - "epoch": 0.5995459704880817, + "epoch": 0.5987134801212842, "grad_norm": 0.0, - "learning_rate": 7.296145644665928e-06, - "loss": 0.8755, + "learning_rate": 7.321941740817388e-06, + "loss": 0.9551, "step": 21128 }, { - "epoch": 0.5995743473325766, + "epoch": 0.5987418175635467, "grad_norm": 0.0, - "learning_rate": 7.295260811693896e-06, - "loss": 0.7581, + "learning_rate": 7.321057487008136e-06, + "loss": 0.9134, "step": 21129 }, { - "epoch": 0.5996027241770715, + "epoch": 0.5987701550058092, "grad_norm": 0.0, - "learning_rate": 7.2943760015689195e-06, - "loss": 0.9557, + "learning_rate": 7.320173255764617e-06, + "loss": 0.781, "step": 21130 }, { - "epoch": 0.5996311010215664, + "epoch": 0.5987984924480716, "grad_norm": 0.0, - "learning_rate": 7.293491214298469e-06, - "loss": 0.8449, + "learning_rate": 7.31928904709427e-06, + "loss": 0.8687, "step": 21131 }, { - "epoch": 0.5996594778660613, + "epoch": 0.5988268298903341, "grad_norm": 0.0, - "learning_rate": 7.2926064498900184e-06, - "loss": 0.8464, + "learning_rate": 7.318404861004547e-06, + "loss": 0.8031, "step": 21132 }, { - "epoch": 0.5996878547105562, + "epoch": 0.5988551673325966, "grad_norm": 0.0, - "learning_rate": 7.291721708351045e-06, - "loss": 0.8626, + "learning_rate": 7.3175206975028985e-06, + "loss": 0.6363, "step": 21133 }, { - "epoch": 0.599716231555051, + "epoch": 0.598883504774859, "grad_norm": 0.0, - "learning_rate": 7.2908369896890155e-06, - "loss": 0.7998, + "learning_rate": 7.316636556596766e-06, + "loss": 0.8275, "step": 21134 }, { - "epoch": 0.599744608399546, + "epoch": 0.5989118422171215, "grad_norm": 0.0, - "learning_rate": 7.289952293911412e-06, - "loss": 0.8968, + "learning_rate": 7.315752438293602e-06, + "loss": 0.8733, "step": 21135 }, { - "epoch": 0.5997729852440409, + "epoch": 0.598940179659384, "grad_norm": 0.0, - "learning_rate": 7.289067621025704e-06, - "loss": 0.773, + "learning_rate": 7.31486834260085e-06, + "loss": 0.9866, "step": 21136 }, { - "epoch": 0.5998013620885357, + "epoch": 0.5989685171016464, "grad_norm": 0.0, - "learning_rate": 7.288182971039362e-06, - "loss": 0.8781, + "learning_rate": 7.31398426952596e-06, + "loss": 0.9101, "step": 21137 }, { - "epoch": 0.5998297389330306, + "epoch": 0.5989968545439088, "grad_norm": 0.0, - "learning_rate": 7.287298343959861e-06, - "loss": 0.8994, + "learning_rate": 7.313100219076381e-06, + "loss": 0.8856, "step": 21138 }, { - "epoch": 0.5998581157775256, + "epoch": 0.5990251919861713, "grad_norm": 0.0, - "learning_rate": 7.286413739794673e-06, - "loss": 0.9065, + "learning_rate": 7.312216191259552e-06, + "loss": 0.8939, "step": 21139 }, { - "epoch": 0.5998864926220204, + "epoch": 0.5990535294284338, "grad_norm": 0.0, - "learning_rate": 7.285529158551268e-06, - "loss": 0.8924, + "learning_rate": 7.311332186082925e-06, + "loss": 0.8037, "step": 21140 }, { - "epoch": 0.5999148694665153, + "epoch": 0.5990818668706962, "grad_norm": 0.0, - "learning_rate": 7.284644600237123e-06, - "loss": 0.8922, + "learning_rate": 7.310448203553947e-06, + "loss": 0.8353, "step": 21141 }, { - "epoch": 0.5999432463110103, + "epoch": 0.5991102043129587, "grad_norm": 0.0, - "learning_rate": 7.2837600648597075e-06, - "loss": 0.8911, + "learning_rate": 7.309564243680061e-06, + "loss": 0.9166, "step": 21142 }, { - "epoch": 0.5999716231555051, + "epoch": 0.5991385417552212, "grad_norm": 0.0, - "learning_rate": 7.282875552426489e-06, - "loss": 0.7999, + "learning_rate": 7.308680306468719e-06, + "loss": 0.9172, "step": 21143 }, { - "epoch": 0.6, + "epoch": 0.5991668791974837, "grad_norm": 0.0, - "learning_rate": 7.2819910629449464e-06, - "loss": 0.7873, + "learning_rate": 7.307796391927356e-06, + "loss": 0.8909, "step": 21144 }, { - "epoch": 0.6000283768444948, + "epoch": 0.5991952166397461, "grad_norm": 0.0, - "learning_rate": 7.281106596422547e-06, - "loss": 0.9166, + "learning_rate": 7.306912500063425e-06, + "loss": 1.0015, "step": 21145 }, { - "epoch": 0.6000567536889898, + "epoch": 0.5992235540820086, "grad_norm": 0.0, - "learning_rate": 7.2802221528667604e-06, - "loss": 0.8091, + "learning_rate": 7.306028630884374e-06, + "loss": 0.8562, "step": 21146 }, { - "epoch": 0.6000851305334847, + "epoch": 0.599251891524271, "grad_norm": 0.0, - "learning_rate": 7.279337732285062e-06, - "loss": 0.9368, + "learning_rate": 7.305144784397641e-06, + "loss": 0.8033, "step": 21147 }, { - "epoch": 0.6001135073779795, + "epoch": 0.5992802289665334, "grad_norm": 0.0, - "learning_rate": 7.27845333468492e-06, - "loss": 0.9756, + "learning_rate": 7.304260960610674e-06, + "loss": 0.8346, "step": 21148 }, { - "epoch": 0.6001418842224745, + "epoch": 0.5993085664087959, "grad_norm": 0.0, - "learning_rate": 7.277568960073801e-06, - "loss": 0.8963, + "learning_rate": 7.303377159530919e-06, + "loss": 0.7252, "step": 21149 }, { - "epoch": 0.6001702610669694, + "epoch": 0.5993369038510584, "grad_norm": 0.0, - "learning_rate": 7.2766846084591835e-06, - "loss": 0.8415, + "learning_rate": 7.302493381165818e-06, + "loss": 0.8158, "step": 21150 }, { - "epoch": 0.6001986379114642, + "epoch": 0.5993652412933209, "grad_norm": 0.0, - "learning_rate": 7.27580027984853e-06, - "loss": 0.895, + "learning_rate": 7.301609625522821e-06, + "loss": 0.8497, "step": 21151 }, { - "epoch": 0.6002270147559592, + "epoch": 0.5993935787355833, "grad_norm": 0.0, - "learning_rate": 7.274915974249317e-06, - "loss": 0.8767, + "learning_rate": 7.300725892609364e-06, + "loss": 0.9134, "step": 21152 }, { - "epoch": 0.600255391600454, + "epoch": 0.5994219161778458, "grad_norm": 0.0, - "learning_rate": 7.274031691669012e-06, - "loss": 0.919, + "learning_rate": 7.299842182432895e-06, + "loss": 0.9518, "step": 21153 }, { - "epoch": 0.6002837684449489, + "epoch": 0.5994502536201083, "grad_norm": 0.0, - "learning_rate": 7.273147432115082e-06, - "loss": 0.8601, + "learning_rate": 7.29895849500086e-06, + "loss": 0.8347, "step": 21154 }, { - "epoch": 0.6003121452894438, + "epoch": 0.5994785910623707, "grad_norm": 0.0, - "learning_rate": 7.272263195594999e-06, - "loss": 0.7171, + "learning_rate": 7.298074830320699e-06, + "loss": 0.837, "step": 21155 }, { - "epoch": 0.6003405221339387, + "epoch": 0.5995069285046332, "grad_norm": 0.0, - "learning_rate": 7.271378982116232e-06, - "loss": 0.7924, + "learning_rate": 7.297191188399857e-06, + "loss": 0.8274, "step": 21156 }, { - "epoch": 0.6003688989784336, + "epoch": 0.5995352659468957, "grad_norm": 0.0, - "learning_rate": 7.270494791686247e-06, - "loss": 0.843, + "learning_rate": 7.296307569245782e-06, + "loss": 0.9137, "step": 21157 }, { - "epoch": 0.6003972758229285, + "epoch": 0.599563603389158, "grad_norm": 0.0, - "learning_rate": 7.269610624312517e-06, - "loss": 0.7198, + "learning_rate": 7.295423972865907e-06, + "loss": 0.8833, "step": 21158 }, { - "epoch": 0.6004256526674234, + "epoch": 0.5995919408314205, "grad_norm": 0.0, - "learning_rate": 7.2687264800025105e-06, - "loss": 0.8486, + "learning_rate": 7.294540399267682e-06, + "loss": 0.8522, "step": 21159 }, { - "epoch": 0.6004540295119183, + "epoch": 0.599620278273683, "grad_norm": 0.0, - "learning_rate": 7.267842358763691e-06, - "loss": 0.8672, + "learning_rate": 7.2936568484585475e-06, + "loss": 0.7804, "step": 21160 }, { - "epoch": 0.6004824063564131, + "epoch": 0.5996486157159455, "grad_norm": 0.0, - "learning_rate": 7.266958260603532e-06, - "loss": 0.8941, + "learning_rate": 7.292773320445947e-06, + "loss": 0.8851, "step": 21161 }, { - "epoch": 0.600510783200908, + "epoch": 0.5996769531582079, "grad_norm": 0.0, - "learning_rate": 7.266074185529499e-06, - "loss": 0.9014, + "learning_rate": 7.291889815237323e-06, + "loss": 0.7653, "step": 21162 }, { - "epoch": 0.600539160045403, + "epoch": 0.5997052906004704, "grad_norm": 0.0, - "learning_rate": 7.265190133549059e-06, - "loss": 0.9025, + "learning_rate": 7.291006332840113e-06, + "loss": 0.7999, "step": 21163 }, { - "epoch": 0.6005675368898978, + "epoch": 0.5997336280427329, "grad_norm": 0.0, - "learning_rate": 7.264306104669678e-06, - "loss": 0.9616, + "learning_rate": 7.290122873261769e-06, + "loss": 0.8756, "step": 21164 }, { - "epoch": 0.6005959137343927, + "epoch": 0.5997619654849953, "grad_norm": 0.0, - "learning_rate": 7.26342209889883e-06, - "loss": 0.8903, + "learning_rate": 7.289239436509721e-06, + "loss": 0.875, "step": 21165 }, { - "epoch": 0.6006242905788877, + "epoch": 0.5997903029272578, "grad_norm": 0.0, - "learning_rate": 7.26253811624398e-06, - "loss": 0.9706, + "learning_rate": 7.2883560225914165e-06, + "loss": 0.8364, "step": 21166 }, { - "epoch": 0.6006526674233825, + "epoch": 0.5998186403695203, "grad_norm": 0.0, - "learning_rate": 7.261654156712591e-06, - "loss": 0.7505, + "learning_rate": 7.287472631514298e-06, + "loss": 0.8582, "step": 21167 }, { - "epoch": 0.6006810442678774, + "epoch": 0.5998469778117828, "grad_norm": 0.0, - "learning_rate": 7.260770220312134e-06, - "loss": 0.8137, + "learning_rate": 7.286589263285801e-06, + "loss": 0.7542, "step": 21168 }, { - "epoch": 0.6007094211123724, + "epoch": 0.5998753152540451, "grad_norm": 0.0, - "learning_rate": 7.259886307050075e-06, - "loss": 0.7732, + "learning_rate": 7.285705917913372e-06, + "loss": 0.8047, "step": 21169 }, { - "epoch": 0.6007377979568672, + "epoch": 0.5999036526963076, "grad_norm": 0.0, - "learning_rate": 7.259002416933876e-06, - "loss": 0.8921, + "learning_rate": 7.284822595404455e-06, + "loss": 0.756, "step": 21170 }, { - "epoch": 0.6007661748013621, + "epoch": 0.5999319901385701, "grad_norm": 0.0, - "learning_rate": 7.258118549971011e-06, - "loss": 0.8734, + "learning_rate": 7.28393929576648e-06, + "loss": 0.8108, "step": 21171 }, { - "epoch": 0.6007945516458569, + "epoch": 0.5999603275808325, "grad_norm": 0.0, - "learning_rate": 7.257234706168941e-06, - "loss": 0.7867, + "learning_rate": 7.283056019006895e-06, + "loss": 0.7771, "step": 21172 }, { - "epoch": 0.6008229284903519, + "epoch": 0.599988665023095, "grad_norm": 0.0, - "learning_rate": 7.256350885535129e-06, - "loss": 0.854, + "learning_rate": 7.2821727651331355e-06, + "loss": 0.8617, "step": 21173 }, { - "epoch": 0.6008513053348468, + "epoch": 0.6000170024653575, "grad_norm": 0.0, - "learning_rate": 7.255467088077048e-06, - "loss": 0.8592, + "learning_rate": 7.281289534152644e-06, + "loss": 0.9615, "step": 21174 }, { - "epoch": 0.6008796821793416, + "epoch": 0.60004533990762, "grad_norm": 0.0, - "learning_rate": 7.2545833138021596e-06, - "loss": 0.838, + "learning_rate": 7.280406326072866e-06, + "loss": 0.9448, "step": 21175 }, { - "epoch": 0.6009080590238366, + "epoch": 0.6000736773498824, "grad_norm": 0.0, - "learning_rate": 7.2536995627179295e-06, - "loss": 0.937, + "learning_rate": 7.27952314090123e-06, + "loss": 0.8074, "step": 21176 }, { - "epoch": 0.6009364358683315, + "epoch": 0.6001020147921449, "grad_norm": 0.0, - "learning_rate": 7.252815834831821e-06, - "loss": 0.9591, + "learning_rate": 7.2786399786451825e-06, + "loss": 0.9363, "step": 21177 }, { - "epoch": 0.6009648127128263, + "epoch": 0.6001303522344074, "grad_norm": 0.0, - "learning_rate": 7.251932130151302e-06, - "loss": 0.8478, + "learning_rate": 7.2777568393121645e-06, + "loss": 0.8219, "step": 21178 }, { - "epoch": 0.6009931895573212, + "epoch": 0.6001586896766697, "grad_norm": 0.0, - "learning_rate": 7.251048448683836e-06, - "loss": 0.7682, + "learning_rate": 7.276873722909606e-06, + "loss": 0.8878, "step": 21179 }, { - "epoch": 0.6010215664018161, + "epoch": 0.6001870271189322, "grad_norm": 0.0, - "learning_rate": 7.2501647904368835e-06, - "loss": 0.8465, + "learning_rate": 7.275990629444954e-06, + "loss": 0.8648, "step": 21180 }, { - "epoch": 0.601049943246311, + "epoch": 0.6002153645611947, "grad_norm": 0.0, - "learning_rate": 7.249281155417914e-06, - "loss": 0.846, + "learning_rate": 7.275107558925642e-06, + "loss": 0.8974, "step": 21181 }, { - "epoch": 0.6010783200908059, + "epoch": 0.6002437020034571, "grad_norm": 0.0, - "learning_rate": 7.248397543634394e-06, - "loss": 0.7777, + "learning_rate": 7.274224511359112e-06, + "loss": 0.8353, "step": 21182 }, { - "epoch": 0.6011066969353008, + "epoch": 0.6002720394457196, "grad_norm": 0.0, - "learning_rate": 7.247513955093781e-06, - "loss": 0.8004, + "learning_rate": 7.273341486752805e-06, + "loss": 0.7133, "step": 21183 }, { - "epoch": 0.6011350737797957, + "epoch": 0.6003003768879821, "grad_norm": 0.0, - "learning_rate": 7.246630389803541e-06, - "loss": 0.9038, + "learning_rate": 7.272458485114151e-06, + "loss": 0.8553, "step": 21184 }, { - "epoch": 0.6011634506242906, + "epoch": 0.6003287143302446, "grad_norm": 0.0, - "learning_rate": 7.24574684777114e-06, - "loss": 0.909, + "learning_rate": 7.2715755064505926e-06, + "loss": 0.9384, "step": 21185 }, { - "epoch": 0.6011918274687855, + "epoch": 0.600357051772507, "grad_norm": 0.0, - "learning_rate": 7.24486332900404e-06, - "loss": 0.8389, + "learning_rate": 7.270692550769567e-06, + "loss": 0.7985, "step": 21186 }, { - "epoch": 0.6012202043132804, + "epoch": 0.6003853892147695, "grad_norm": 0.0, - "learning_rate": 7.243979833509699e-06, - "loss": 0.8184, + "learning_rate": 7.269809618078512e-06, + "loss": 0.9115, "step": 21187 }, { - "epoch": 0.6012485811577752, + "epoch": 0.600413726657032, "grad_norm": 0.0, - "learning_rate": 7.243096361295587e-06, - "loss": 0.8061, + "learning_rate": 7.268926708384867e-06, + "loss": 0.9262, "step": 21188 }, { - "epoch": 0.6012769580022701, + "epoch": 0.6004420640992943, "grad_norm": 0.0, - "learning_rate": 7.242212912369164e-06, - "loss": 0.9154, + "learning_rate": 7.268043821696062e-06, + "loss": 0.856, "step": 21189 }, { - "epoch": 0.6013053348467651, + "epoch": 0.6004704015415568, "grad_norm": 0.0, - "learning_rate": 7.241329486737891e-06, - "loss": 0.9451, + "learning_rate": 7.267160958019539e-06, + "loss": 0.8778, "step": 21190 }, { - "epoch": 0.6013337116912599, + "epoch": 0.6004987389838193, "grad_norm": 0.0, - "learning_rate": 7.240446084409234e-06, - "loss": 0.769, + "learning_rate": 7.266278117362737e-06, + "loss": 0.8133, "step": 21191 }, { - "epoch": 0.6013620885357548, + "epoch": 0.6005270764260818, "grad_norm": 0.0, - "learning_rate": 7.239562705390652e-06, - "loss": 0.8432, + "learning_rate": 7.265395299733088e-06, + "loss": 0.8456, "step": 21192 }, { - "epoch": 0.6013904653802498, + "epoch": 0.6005554138683442, "grad_norm": 0.0, - "learning_rate": 7.238679349689608e-06, - "loss": 0.9834, + "learning_rate": 7.26451250513803e-06, + "loss": 0.7882, "step": 21193 }, { - "epoch": 0.6014188422247446, + "epoch": 0.6005837513106067, "grad_norm": 0.0, - "learning_rate": 7.237796017313563e-06, - "loss": 0.7874, + "learning_rate": 7.263629733584998e-06, + "loss": 0.7835, "step": 21194 }, { - "epoch": 0.6014472190692395, + "epoch": 0.6006120887528692, "grad_norm": 0.0, - "learning_rate": 7.2369127082699766e-06, - "loss": 0.9206, + "learning_rate": 7.262746985081428e-06, + "loss": 0.8167, "step": 21195 }, { - "epoch": 0.6014755959137343, + "epoch": 0.6006404261951316, "grad_norm": 0.0, - "learning_rate": 7.236029422566318e-06, - "loss": 0.7209, + "learning_rate": 7.261864259634761e-06, + "loss": 0.8278, "step": 21196 }, { - "epoch": 0.6015039727582293, + "epoch": 0.6006687636373941, "grad_norm": 0.0, - "learning_rate": 7.235146160210041e-06, - "loss": 0.7507, + "learning_rate": 7.260981557252425e-06, + "loss": 0.7933, "step": 21197 }, { - "epoch": 0.6015323496027242, + "epoch": 0.6006971010796566, "grad_norm": 0.0, - "learning_rate": 7.234262921208607e-06, - "loss": 1.0271, + "learning_rate": 7.260098877941857e-06, + "loss": 0.8073, "step": 21198 }, { - "epoch": 0.601560726447219, + "epoch": 0.6007254385219191, "grad_norm": 0.0, - "learning_rate": 7.233379705569481e-06, - "loss": 0.9118, + "learning_rate": 7.259216221710496e-06, + "loss": 0.9083, "step": 21199 }, { - "epoch": 0.601589103291714, + "epoch": 0.6007537759641814, "grad_norm": 0.0, - "learning_rate": 7.23249651330012e-06, - "loss": 0.9785, + "learning_rate": 7.258333588565771e-06, + "loss": 0.9651, "step": 21200 }, { - "epoch": 0.6016174801362089, + "epoch": 0.6007821134064439, "grad_norm": 0.0, - "learning_rate": 7.231613344407984e-06, - "loss": 0.7809, + "learning_rate": 7.257450978515127e-06, + "loss": 0.8283, "step": 21201 }, { - "epoch": 0.6016458569807037, + "epoch": 0.6008104508487064, "grad_norm": 0.0, - "learning_rate": 7.230730198900537e-06, - "loss": 0.9162, + "learning_rate": 7.256568391565987e-06, + "loss": 0.8903, "step": 21202 }, { - "epoch": 0.6016742338251987, + "epoch": 0.6008387882909688, "grad_norm": 0.0, - "learning_rate": 7.229847076785237e-06, - "loss": 0.8361, + "learning_rate": 7.255685827725789e-06, + "loss": 0.8172, "step": 21203 }, { - "epoch": 0.6017026106696935, + "epoch": 0.6008671257332313, "grad_norm": 0.0, - "learning_rate": 7.22896397806954e-06, - "loss": 0.8326, + "learning_rate": 7.254803287001975e-06, + "loss": 0.9525, "step": 21204 }, { - "epoch": 0.6017309875141884, + "epoch": 0.6008954631754938, "grad_norm": 0.0, - "learning_rate": 7.228080902760911e-06, - "loss": 0.923, + "learning_rate": 7.253920769401965e-06, + "loss": 0.8161, "step": 21205 }, { - "epoch": 0.6017593643586833, + "epoch": 0.6009238006177562, "grad_norm": 0.0, - "learning_rate": 7.227197850866807e-06, - "loss": 0.8599, + "learning_rate": 7.253038274933202e-06, + "loss": 0.8331, "step": 21206 }, { - "epoch": 0.6017877412031782, + "epoch": 0.6009521380600187, "grad_norm": 0.0, - "learning_rate": 7.226314822394686e-06, - "loss": 0.8636, + "learning_rate": 7.25215580360312e-06, + "loss": 0.897, "step": 21207 }, { - "epoch": 0.6018161180476731, + "epoch": 0.6009804755022812, "grad_norm": 0.0, - "learning_rate": 7.225431817352011e-06, - "loss": 0.8563, + "learning_rate": 7.251273355419147e-06, + "loss": 0.8564, "step": 21208 }, { - "epoch": 0.601844494892168, + "epoch": 0.6010088129445437, "grad_norm": 0.0, - "learning_rate": 7.224548835746238e-06, - "loss": 0.8003, + "learning_rate": 7.250390930388725e-06, + "loss": 0.9273, "step": 21209 }, { - "epoch": 0.6018728717366629, + "epoch": 0.601037150386806, "grad_norm": 0.0, - "learning_rate": 7.22366587758482e-06, - "loss": 0.8195, + "learning_rate": 7.249508528519275e-06, + "loss": 0.962, "step": 21210 }, { - "epoch": 0.6019012485811578, + "epoch": 0.6010654878290685, "grad_norm": 0.0, - "learning_rate": 7.222782942875225e-06, - "loss": 0.7956, + "learning_rate": 7.248626149818237e-06, + "loss": 0.8509, "step": 21211 }, { - "epoch": 0.6019296254256526, + "epoch": 0.601093825271331, "grad_norm": 0.0, - "learning_rate": 7.221900031624909e-06, - "loss": 0.8064, + "learning_rate": 7.247743794293044e-06, + "loss": 0.814, "step": 21212 }, { - "epoch": 0.6019580022701475, + "epoch": 0.6011221627135934, "grad_norm": 0.0, - "learning_rate": 7.221017143841328e-06, - "loss": 0.7986, + "learning_rate": 7.2468614619511255e-06, + "loss": 0.9143, "step": 21213 }, { - "epoch": 0.6019863791146425, + "epoch": 0.6011505001558559, "grad_norm": 0.0, - "learning_rate": 7.2201342795319384e-06, - "loss": 0.8767, + "learning_rate": 7.245979152799915e-06, + "loss": 0.9299, "step": 21214 }, { - "epoch": 0.6020147559591373, + "epoch": 0.6011788375981184, "grad_norm": 0.0, - "learning_rate": 7.2192514387042e-06, - "loss": 0.7388, + "learning_rate": 7.2450968668468506e-06, + "loss": 0.7833, "step": 21215 }, { - "epoch": 0.6020431328036322, + "epoch": 0.6012071750403809, "grad_norm": 0.0, - "learning_rate": 7.218368621365572e-06, - "loss": 0.9205, + "learning_rate": 7.244214604099351e-06, + "loss": 0.7811, "step": 21216 }, { - "epoch": 0.6020715096481272, + "epoch": 0.6012355124826433, "grad_norm": 0.0, - "learning_rate": 7.217485827523507e-06, - "loss": 0.8319, + "learning_rate": 7.2433323645648615e-06, + "loss": 0.8219, "step": 21217 }, { - "epoch": 0.602099886492622, + "epoch": 0.6012638499249058, "grad_norm": 0.0, - "learning_rate": 7.216603057185465e-06, - "loss": 0.903, + "learning_rate": 7.242450148250804e-06, + "loss": 0.8865, "step": 21218 }, { - "epoch": 0.6021282633371169, + "epoch": 0.6012921873671683, "grad_norm": 0.0, - "learning_rate": 7.215720310358903e-06, - "loss": 0.989, + "learning_rate": 7.24156795516461e-06, + "loss": 0.7319, "step": 21219 }, { - "epoch": 0.6021566401816119, + "epoch": 0.6013205248094307, "grad_norm": 0.0, - "learning_rate": 7.214837587051275e-06, - "loss": 0.8794, + "learning_rate": 7.240685785313717e-06, + "loss": 0.8235, "step": 21220 }, { - "epoch": 0.6021850170261067, + "epoch": 0.6013488622516932, "grad_norm": 0.0, - "learning_rate": 7.213954887270039e-06, - "loss": 0.8331, + "learning_rate": 7.23980363870555e-06, + "loss": 0.8893, "step": 21221 }, { - "epoch": 0.6022133938706016, + "epoch": 0.6013771996939556, "grad_norm": 0.0, - "learning_rate": 7.213072211022653e-06, - "loss": 0.7623, + "learning_rate": 7.238921515347547e-06, + "loss": 0.8844, "step": 21222 }, { - "epoch": 0.6022417707150964, + "epoch": 0.6014055371362181, "grad_norm": 0.0, - "learning_rate": 7.2121895583165716e-06, - "loss": 0.8834, + "learning_rate": 7.238039415247129e-06, + "loss": 0.7287, "step": 21223 }, { - "epoch": 0.6022701475595914, + "epoch": 0.6014338745784805, "grad_norm": 0.0, - "learning_rate": 7.211306929159247e-06, - "loss": 0.8581, + "learning_rate": 7.237157338411729e-06, + "loss": 0.7193, "step": 21224 }, { - "epoch": 0.6022985244040863, + "epoch": 0.601462212020743, "grad_norm": 0.0, - "learning_rate": 7.21042432355814e-06, - "loss": 0.8463, + "learning_rate": 7.2362752848487814e-06, + "loss": 0.825, "step": 21225 }, { - "epoch": 0.6023269012485811, + "epoch": 0.6014905494630055, "grad_norm": 0.0, - "learning_rate": 7.2095417415207015e-06, - "loss": 0.8024, + "learning_rate": 7.235393254565713e-06, + "loss": 0.8336, "step": 21226 }, { - "epoch": 0.6023552780930761, + "epoch": 0.6015188869052679, "grad_norm": 0.0, - "learning_rate": 7.208659183054393e-06, - "loss": 0.9088, + "learning_rate": 7.234511247569953e-06, + "loss": 0.7847, "step": 21227 }, { - "epoch": 0.602383654937571, + "epoch": 0.6015472243475304, "grad_norm": 0.0, - "learning_rate": 7.207776648166664e-06, - "loss": 0.7353, + "learning_rate": 7.233629263868935e-06, + "loss": 0.8127, "step": 21228 }, { - "epoch": 0.6024120317820658, + "epoch": 0.6015755617897929, "grad_norm": 0.0, - "learning_rate": 7.2068941368649726e-06, - "loss": 0.8925, + "learning_rate": 7.232747303470082e-06, + "loss": 0.8657, "step": 21229 }, { - "epoch": 0.6024404086265607, + "epoch": 0.6016038992320553, "grad_norm": 0.0, - "learning_rate": 7.206011649156773e-06, - "loss": 0.8733, + "learning_rate": 7.231865366380828e-06, + "loss": 0.8196, "step": 21230 }, { - "epoch": 0.6024687854710556, + "epoch": 0.6016322366743178, "grad_norm": 0.0, - "learning_rate": 7.205129185049515e-06, - "loss": 0.8575, + "learning_rate": 7.230983452608598e-06, + "loss": 0.8936, "step": 21231 }, { - "epoch": 0.6024971623155505, + "epoch": 0.6016605741165802, "grad_norm": 0.0, - "learning_rate": 7.20424674455066e-06, - "loss": 0.8327, + "learning_rate": 7.230101562160822e-06, + "loss": 0.8956, "step": 21232 }, { - "epoch": 0.6025255391600454, + "epoch": 0.6016889115588427, "grad_norm": 0.0, - "learning_rate": 7.203364327667657e-06, - "loss": 0.749, + "learning_rate": 7.229219695044931e-06, + "loss": 0.8853, "step": 21233 }, { - "epoch": 0.6025539160045403, + "epoch": 0.6017172490011051, "grad_norm": 0.0, - "learning_rate": 7.20248193440796e-06, - "loss": 0.778, + "learning_rate": 7.22833785126835e-06, + "loss": 0.8807, "step": 21234 }, { - "epoch": 0.6025822928490352, + "epoch": 0.6017455864433676, "grad_norm": 0.0, - "learning_rate": 7.201599564779026e-06, - "loss": 0.7676, + "learning_rate": 7.2274560308385065e-06, + "loss": 0.8906, "step": 21235 }, { - "epoch": 0.60261066969353, + "epoch": 0.6017739238856301, "grad_norm": 0.0, - "learning_rate": 7.200717218788306e-06, - "loss": 0.9243, + "learning_rate": 7.2265742337628374e-06, + "loss": 0.8487, "step": 21236 }, { - "epoch": 0.602639046538025, + "epoch": 0.6018022613278925, "grad_norm": 0.0, - "learning_rate": 7.199834896443252e-06, - "loss": 0.8583, + "learning_rate": 7.225692460048756e-06, + "loss": 0.7817, "step": 21237 }, { - "epoch": 0.6026674233825199, + "epoch": 0.601830598770155, "grad_norm": 0.0, - "learning_rate": 7.19895259775132e-06, - "loss": 0.7879, + "learning_rate": 7.224810709703703e-06, + "loss": 0.8667, "step": 21238 }, { - "epoch": 0.6026958002270147, + "epoch": 0.6018589362124175, "grad_norm": 0.0, - "learning_rate": 7.198070322719961e-06, - "loss": 0.8063, + "learning_rate": 7.223928982735096e-06, + "loss": 0.8784, "step": 21239 }, { - "epoch": 0.6027241770715096, + "epoch": 0.60188727365468, "grad_norm": 0.0, - "learning_rate": 7.197188071356627e-06, - "loss": 0.9085, + "learning_rate": 7.2230472791503655e-06, + "loss": 0.9157, "step": 21240 }, { - "epoch": 0.6027525539160046, + "epoch": 0.6019156110969424, "grad_norm": 0.0, - "learning_rate": 7.196305843668771e-06, - "loss": 0.8369, + "learning_rate": 7.222165598956943e-06, + "loss": 0.8989, "step": 21241 }, { - "epoch": 0.6027809307604994, + "epoch": 0.6019439485392049, "grad_norm": 0.0, - "learning_rate": 7.195423639663845e-06, - "loss": 0.8403, + "learning_rate": 7.2212839421622485e-06, + "loss": 0.877, "step": 21242 }, { - "epoch": 0.6028093076049943, + "epoch": 0.6019722859814673, "grad_norm": 0.0, - "learning_rate": 7.194541459349304e-06, - "loss": 0.8521, + "learning_rate": 7.220402308773711e-06, + "loss": 0.9024, "step": 21243 }, { - "epoch": 0.6028376844494893, + "epoch": 0.6020006234237297, "grad_norm": 0.0, - "learning_rate": 7.1936593027325965e-06, - "loss": 0.8782, + "learning_rate": 7.21952069879876e-06, + "loss": 0.9552, "step": 21244 }, { - "epoch": 0.6028660612939841, + "epoch": 0.6020289608659922, "grad_norm": 0.0, - "learning_rate": 7.192777169821174e-06, - "loss": 0.67, + "learning_rate": 7.218639112244815e-06, + "loss": 0.8555, "step": 21245 }, { - "epoch": 0.602894438138479, + "epoch": 0.6020572983082547, "grad_norm": 0.0, - "learning_rate": 7.191895060622491e-06, - "loss": 0.8178, + "learning_rate": 7.217757549119308e-06, + "loss": 0.9829, "step": 21246 }, { - "epoch": 0.6029228149829738, + "epoch": 0.6020856357505172, "grad_norm": 0.0, - "learning_rate": 7.191012975143996e-06, - "loss": 0.9714, + "learning_rate": 7.21687600942966e-06, + "loss": 0.9144, "step": 21247 }, { - "epoch": 0.6029511918274688, + "epoch": 0.6021139731927796, "grad_norm": 0.0, - "learning_rate": 7.19013091339314e-06, - "loss": 0.9531, + "learning_rate": 7.215994493183298e-06, + "loss": 0.8131, "step": 21248 }, { - "epoch": 0.6029795686719637, + "epoch": 0.6021423106350421, "grad_norm": 0.0, - "learning_rate": 7.1892488753773744e-06, - "loss": 0.8558, + "learning_rate": 7.215113000387654e-06, + "loss": 0.7248, "step": 21249 }, { - "epoch": 0.6030079455164585, + "epoch": 0.6021706480773046, "grad_norm": 0.0, - "learning_rate": 7.1883668611041525e-06, - "loss": 0.824, + "learning_rate": 7.214231531050142e-06, + "loss": 0.7444, "step": 21250 }, { - "epoch": 0.6030363223609535, + "epoch": 0.602198985519567, "grad_norm": 0.0, - "learning_rate": 7.187484870580919e-06, - "loss": 0.8105, + "learning_rate": 7.213350085178195e-06, + "loss": 0.9135, "step": 21251 }, { - "epoch": 0.6030646992054484, + "epoch": 0.6022273229618295, "grad_norm": 0.0, - "learning_rate": 7.186602903815128e-06, - "loss": 0.8213, + "learning_rate": 7.212468662779233e-06, + "loss": 0.8024, "step": 21252 }, { - "epoch": 0.6030930760499432, + "epoch": 0.602255660404092, "grad_norm": 0.0, - "learning_rate": 7.185720960814229e-06, - "loss": 0.9028, + "learning_rate": 7.211587263860682e-06, + "loss": 0.8317, "step": 21253 }, { - "epoch": 0.6031214528944382, + "epoch": 0.6022839978463543, "grad_norm": 0.0, - "learning_rate": 7.184839041585671e-06, - "loss": 0.8503, + "learning_rate": 7.210705888429972e-06, + "loss": 0.8649, "step": 21254 }, { - "epoch": 0.603149829738933, + "epoch": 0.6023123352886168, "grad_norm": 0.0, - "learning_rate": 7.1839571461369054e-06, - "loss": 0.8607, + "learning_rate": 7.2098245364945165e-06, + "loss": 0.7661, "step": 21255 }, { - "epoch": 0.6031782065834279, + "epoch": 0.6023406727308793, "grad_norm": 0.0, - "learning_rate": 7.183075274475376e-06, - "loss": 0.8687, + "learning_rate": 7.208943208061746e-06, + "loss": 0.986, "step": 21256 }, { - "epoch": 0.6032065834279228, + "epoch": 0.6023690101731418, "grad_norm": 0.0, - "learning_rate": 7.182193426608542e-06, - "loss": 0.9248, + "learning_rate": 7.208061903139087e-06, + "loss": 0.7608, "step": 21257 }, { - "epoch": 0.6032349602724177, + "epoch": 0.6023973476154042, "grad_norm": 0.0, - "learning_rate": 7.181311602543845e-06, - "loss": 0.9283, + "learning_rate": 7.207180621733956e-06, + "loss": 0.9004, "step": 21258 }, { - "epoch": 0.6032633371169126, + "epoch": 0.6024256850576667, "grad_norm": 0.0, - "learning_rate": 7.180429802288733e-06, - "loss": 0.9468, + "learning_rate": 7.2062993638537815e-06, + "loss": 0.845, "step": 21259 }, { - "epoch": 0.6032917139614075, + "epoch": 0.6024540224999292, "grad_norm": 0.0, - "learning_rate": 7.17954802585066e-06, - "loss": 0.9947, + "learning_rate": 7.205418129505982e-06, + "loss": 0.8632, "step": 21260 }, { - "epoch": 0.6033200908059024, + "epoch": 0.6024823599421916, "grad_norm": 0.0, - "learning_rate": 7.178666273237073e-06, - "loss": 0.7768, + "learning_rate": 7.2045369186979845e-06, + "loss": 0.7163, "step": 21261 }, { - "epoch": 0.6033484676503973, + "epoch": 0.6025106973844541, "grad_norm": 0.0, - "learning_rate": 7.177784544455415e-06, - "loss": 0.9004, + "learning_rate": 7.203655731437214e-06, + "loss": 0.8701, "step": 21262 }, { - "epoch": 0.6033768444948922, + "epoch": 0.6025390348267166, "grad_norm": 0.0, - "learning_rate": 7.1769028395131415e-06, - "loss": 0.9076, + "learning_rate": 7.202774567731086e-06, + "loss": 0.9841, "step": 21263 }, { - "epoch": 0.603405221339387, + "epoch": 0.602567372268979, "grad_norm": 0.0, - "learning_rate": 7.176021158417695e-06, - "loss": 0.9137, + "learning_rate": 7.201893427587026e-06, + "loss": 0.8422, "step": 21264 }, { - "epoch": 0.603433598183882, + "epoch": 0.6025957097112414, "grad_norm": 0.0, - "learning_rate": 7.175139501176524e-06, - "loss": 0.8085, + "learning_rate": 7.201012311012459e-06, + "loss": 0.8689, "step": 21265 }, { - "epoch": 0.6034619750283768, + "epoch": 0.6026240471535039, "grad_norm": 0.0, - "learning_rate": 7.174257867797079e-06, - "loss": 0.8396, + "learning_rate": 7.200131218014803e-06, + "loss": 0.8101, "step": 21266 }, { - "epoch": 0.6034903518728717, + "epoch": 0.6026523845957664, "grad_norm": 0.0, - "learning_rate": 7.173376258286803e-06, - "loss": 0.8898, + "learning_rate": 7.199250148601485e-06, + "loss": 0.864, "step": 21267 }, { - "epoch": 0.6035187287173667, + "epoch": 0.6026807220380288, "grad_norm": 0.0, - "learning_rate": 7.172494672653143e-06, - "loss": 0.8261, + "learning_rate": 7.198369102779919e-06, + "loss": 0.8362, "step": 21268 }, { - "epoch": 0.6035471055618615, + "epoch": 0.6027090594802913, "grad_norm": 0.0, - "learning_rate": 7.17161311090355e-06, - "loss": 0.9125, + "learning_rate": 7.197488080557531e-06, + "loss": 0.8651, "step": 21269 }, { - "epoch": 0.6035754824063564, + "epoch": 0.6027373969225538, "grad_norm": 0.0, - "learning_rate": 7.1707315730454695e-06, - "loss": 0.7594, + "learning_rate": 7.196607081941742e-06, + "loss": 0.8458, "step": 21270 }, { - "epoch": 0.6036038592508514, + "epoch": 0.6027657343648163, "grad_norm": 0.0, - "learning_rate": 7.169850059086343e-06, - "loss": 0.7921, + "learning_rate": 7.1957261069399745e-06, + "loss": 0.7986, "step": 21271 }, { - "epoch": 0.6036322360953462, + "epoch": 0.6027940718070787, "grad_norm": 0.0, - "learning_rate": 7.168968569033619e-06, - "loss": 0.7434, + "learning_rate": 7.1948451555596445e-06, + "loss": 0.7279, "step": 21272 }, { - "epoch": 0.6036606129398411, + "epoch": 0.6028224092493412, "grad_norm": 0.0, - "learning_rate": 7.16808710289475e-06, - "loss": 0.9014, + "learning_rate": 7.193964227808177e-06, + "loss": 0.8889, "step": 21273 }, { - "epoch": 0.6036889897843359, + "epoch": 0.6028507466916037, "grad_norm": 0.0, - "learning_rate": 7.167205660677174e-06, - "loss": 0.9541, + "learning_rate": 7.193083323692989e-06, + "loss": 0.8197, "step": 21274 }, { - "epoch": 0.6037173666288309, + "epoch": 0.602879084133866, "grad_norm": 0.0, - "learning_rate": 7.166324242388338e-06, - "loss": 0.8297, + "learning_rate": 7.192202443221508e-06, + "loss": 0.7793, "step": 21275 }, { - "epoch": 0.6037457434733258, + "epoch": 0.6029074215761285, "grad_norm": 0.0, - "learning_rate": 7.165442848035691e-06, - "loss": 0.9094, + "learning_rate": 7.191321586401143e-06, + "loss": 0.8215, "step": 21276 }, { - "epoch": 0.6037741203178206, + "epoch": 0.602935759018391, "grad_norm": 0.0, - "learning_rate": 7.1645614776266755e-06, - "loss": 0.7791, + "learning_rate": 7.1904407532393196e-06, + "loss": 0.9083, "step": 21277 }, { - "epoch": 0.6038024971623156, + "epoch": 0.6029640964606534, "grad_norm": 0.0, - "learning_rate": 7.1636801311687355e-06, - "loss": 0.9365, + "learning_rate": 7.189559943743458e-06, + "loss": 0.8323, "step": 21278 }, { - "epoch": 0.6038308740068105, + "epoch": 0.6029924339029159, "grad_norm": 0.0, - "learning_rate": 7.162798808669318e-06, - "loss": 0.9394, + "learning_rate": 7.188679157920977e-06, + "loss": 0.9594, "step": 21279 }, { - "epoch": 0.6038592508513053, + "epoch": 0.6030207713451784, "grad_norm": 0.0, - "learning_rate": 7.161917510135866e-06, - "loss": 0.906, + "learning_rate": 7.187798395779298e-06, + "loss": 0.8465, "step": 21280 }, { - "epoch": 0.6038876276958002, + "epoch": 0.6030491087874409, "grad_norm": 0.0, - "learning_rate": 7.161036235575822e-06, - "loss": 0.8247, + "learning_rate": 7.186917657325833e-06, + "loss": 0.8116, "step": 21281 }, { - "epoch": 0.6039160045402951, + "epoch": 0.6030774462297033, "grad_norm": 0.0, - "learning_rate": 7.160154984996637e-06, - "loss": 0.8709, + "learning_rate": 7.186036942568004e-06, + "loss": 0.939, "step": 21282 }, { - "epoch": 0.60394438138479, + "epoch": 0.6031057836719658, "grad_norm": 0.0, - "learning_rate": 7.159273758405749e-06, - "loss": 0.9442, + "learning_rate": 7.185156251513236e-06, + "loss": 0.8894, "step": 21283 }, { - "epoch": 0.6039727582292849, + "epoch": 0.6031341211142283, "grad_norm": 0.0, - "learning_rate": 7.158392555810603e-06, - "loss": 0.9098, + "learning_rate": 7.1842755841689385e-06, + "loss": 0.857, "step": 21284 }, { - "epoch": 0.6040011350737798, + "epoch": 0.6031624585564906, "grad_norm": 0.0, - "learning_rate": 7.157511377218641e-06, - "loss": 0.8535, + "learning_rate": 7.183394940542532e-06, + "loss": 0.8823, "step": 21285 }, { - "epoch": 0.6040295119182747, + "epoch": 0.6031907959987531, "grad_norm": 0.0, - "learning_rate": 7.156630222637311e-06, - "loss": 0.8593, + "learning_rate": 7.1825143206414425e-06, + "loss": 0.7734, "step": 21286 }, { - "epoch": 0.6040578887627696, + "epoch": 0.6032191334410156, "grad_norm": 0.0, - "learning_rate": 7.1557490920740465e-06, - "loss": 0.9282, + "learning_rate": 7.181633724473075e-06, + "loss": 0.7884, "step": 21287 }, { - "epoch": 0.6040862656072645, + "epoch": 0.6032474708832781, "grad_norm": 0.0, - "learning_rate": 7.154867985536305e-06, - "loss": 0.8088, + "learning_rate": 7.180753152044859e-06, + "loss": 0.9077, "step": 21288 }, { - "epoch": 0.6041146424517594, + "epoch": 0.6032758083255405, "grad_norm": 0.0, - "learning_rate": 7.153986903031518e-06, - "loss": 0.9367, + "learning_rate": 7.1798726033642e-06, + "loss": 0.8425, "step": 21289 }, { - "epoch": 0.6041430192962542, + "epoch": 0.603304145767803, "grad_norm": 0.0, - "learning_rate": 7.153105844567133e-06, - "loss": 0.921, + "learning_rate": 7.178992078438522e-06, + "loss": 0.8261, "step": 21290 }, { - "epoch": 0.6041713961407491, + "epoch": 0.6033324832100655, "grad_norm": 0.0, - "learning_rate": 7.152224810150592e-06, - "loss": 0.885, + "learning_rate": 7.178111577275244e-06, + "loss": 0.8292, "step": 21291 }, { - "epoch": 0.6041997729852441, + "epoch": 0.6033608206523279, "grad_norm": 0.0, - "learning_rate": 7.151343799789332e-06, - "loss": 0.9265, + "learning_rate": 7.177231099881778e-06, + "loss": 0.9546, "step": 21292 }, { - "epoch": 0.6042281498297389, + "epoch": 0.6033891580945904, "grad_norm": 0.0, - "learning_rate": 7.150462813490804e-06, - "loss": 0.903, + "learning_rate": 7.176350646265542e-06, + "loss": 0.8924, "step": 21293 }, { - "epoch": 0.6042565266742338, + "epoch": 0.6034174955368529, "grad_norm": 0.0, - "learning_rate": 7.149581851262442e-06, - "loss": 0.8936, + "learning_rate": 7.1754702164339575e-06, + "loss": 0.8716, "step": 21294 }, { - "epoch": 0.6042849035187288, + "epoch": 0.6034458329791154, "grad_norm": 0.0, - "learning_rate": 7.14870091311169e-06, - "loss": 0.8967, + "learning_rate": 7.174589810394432e-06, + "loss": 0.8254, "step": 21295 }, { - "epoch": 0.6043132803632236, + "epoch": 0.6034741704213777, "grad_norm": 0.0, - "learning_rate": 7.14781999904599e-06, - "loss": 0.9042, + "learning_rate": 7.17370942815439e-06, + "loss": 0.7878, "step": 21296 }, { - "epoch": 0.6043416572077185, + "epoch": 0.6035025078636402, "grad_norm": 0.0, - "learning_rate": 7.1469391090727834e-06, - "loss": 0.7987, + "learning_rate": 7.172829069721238e-06, + "loss": 0.9081, "step": 21297 }, { - "epoch": 0.6043700340522133, + "epoch": 0.6035308453059027, "grad_norm": 0.0, - "learning_rate": 7.14605824319951e-06, - "loss": 0.7943, + "learning_rate": 7.171948735102396e-06, + "loss": 0.8417, "step": 21298 }, { - "epoch": 0.6043984108967083, + "epoch": 0.6035591827481651, "grad_norm": 0.0, - "learning_rate": 7.145177401433611e-06, - "loss": 0.8652, + "learning_rate": 7.171068424305286e-06, + "loss": 0.8, "step": 21299 }, { - "epoch": 0.6044267877412032, + "epoch": 0.6035875201904276, "grad_norm": 0.0, - "learning_rate": 7.1442965837825275e-06, - "loss": 0.8795, + "learning_rate": 7.170188137337313e-06, + "loss": 0.8336, "step": 21300 }, { - "epoch": 0.604455164585698, + "epoch": 0.6036158576326901, "grad_norm": 0.0, - "learning_rate": 7.143415790253696e-06, - "loss": 0.6523, + "learning_rate": 7.169307874205896e-06, + "loss": 0.7831, "step": 21301 }, { - "epoch": 0.604483541430193, + "epoch": 0.6036441950749525, "grad_norm": 0.0, - "learning_rate": 7.142535020854562e-06, - "loss": 0.8213, + "learning_rate": 7.168427634918453e-06, + "loss": 0.8453, "step": 21302 }, { - "epoch": 0.6045119182746879, + "epoch": 0.603672532517215, "grad_norm": 0.0, - "learning_rate": 7.141654275592561e-06, - "loss": 0.8232, + "learning_rate": 7.167547419482393e-06, + "loss": 0.8806, "step": 21303 }, { - "epoch": 0.6045402951191827, + "epoch": 0.6037008699594775, "grad_norm": 0.0, - "learning_rate": 7.140773554475138e-06, - "loss": 0.8616, + "learning_rate": 7.1666672279051345e-06, + "loss": 0.7646, "step": 21304 }, { - "epoch": 0.6045686719636776, + "epoch": 0.60372920740174, "grad_norm": 0.0, - "learning_rate": 7.139892857509729e-06, - "loss": 0.8898, + "learning_rate": 7.165787060194087e-06, + "loss": 0.7922, "step": 21305 }, { - "epoch": 0.6045970488081726, + "epoch": 0.6037575448440023, "grad_norm": 0.0, - "learning_rate": 7.139012184703771e-06, - "loss": 0.8494, + "learning_rate": 7.1649069163566685e-06, + "loss": 0.8542, "step": 21306 }, { - "epoch": 0.6046254256526674, + "epoch": 0.6037858822862648, "grad_norm": 0.0, - "learning_rate": 7.138131536064709e-06, - "loss": 0.8903, + "learning_rate": 7.1640267964002965e-06, + "loss": 0.8293, "step": 21307 }, { - "epoch": 0.6046538024971623, + "epoch": 0.6038142197285273, "grad_norm": 0.0, - "learning_rate": 7.137250911599978e-06, - "loss": 0.8665, + "learning_rate": 7.163146700332374e-06, + "loss": 0.8633, "step": 21308 }, { - "epoch": 0.6046821793416572, + "epoch": 0.6038425571707897, "grad_norm": 0.0, - "learning_rate": 7.1363703113170135e-06, - "loss": 0.8648, + "learning_rate": 7.1622666281603235e-06, + "loss": 0.9361, "step": 21309 }, { - "epoch": 0.6047105561861521, + "epoch": 0.6038708946130522, "grad_norm": 0.0, - "learning_rate": 7.135489735223262e-06, - "loss": 0.837, + "learning_rate": 7.161386579891552e-06, + "loss": 0.8785, "step": 21310 }, { - "epoch": 0.604738933030647, + "epoch": 0.6038992320553147, "grad_norm": 0.0, - "learning_rate": 7.134609183326156e-06, - "loss": 0.7659, + "learning_rate": 7.160506555533476e-06, + "loss": 0.9375, "step": 21311 }, { - "epoch": 0.6047673098751419, + "epoch": 0.6039275694975772, "grad_norm": 0.0, - "learning_rate": 7.1337286556331345e-06, - "loss": 0.8533, + "learning_rate": 7.159626555093513e-06, + "loss": 0.7942, "step": 21312 }, { - "epoch": 0.6047956867196368, + "epoch": 0.6039559069398396, "grad_norm": 0.0, - "learning_rate": 7.1328481521516366e-06, - "loss": 0.8034, + "learning_rate": 7.158746578579065e-06, + "loss": 0.7924, "step": 21313 }, { - "epoch": 0.6048240635641317, + "epoch": 0.6039842443821021, "grad_norm": 0.0, - "learning_rate": 7.131967672889101e-06, - "loss": 0.8889, + "learning_rate": 7.157866625997549e-06, + "loss": 0.8082, "step": 21314 }, { - "epoch": 0.6048524404086265, + "epoch": 0.6040125818243646, "grad_norm": 0.0, - "learning_rate": 7.131087217852959e-06, - "loss": 0.8537, + "learning_rate": 7.156986697356383e-06, + "loss": 0.8736, "step": 21315 }, { - "epoch": 0.6048808172531215, + "epoch": 0.604040919266627, "grad_norm": 0.0, - "learning_rate": 7.130206787050656e-06, - "loss": 0.9971, + "learning_rate": 7.156106792662969e-06, + "loss": 0.8202, "step": 21316 }, { - "epoch": 0.6049091940976163, + "epoch": 0.6040692567088894, "grad_norm": 0.0, - "learning_rate": 7.129326380489625e-06, - "loss": 0.7704, + "learning_rate": 7.155226911924727e-06, + "loss": 0.9435, "step": 21317 }, { - "epoch": 0.6049375709421112, + "epoch": 0.6040975941511519, "grad_norm": 0.0, - "learning_rate": 7.128445998177298e-06, - "loss": 0.8088, + "learning_rate": 7.154347055149061e-06, + "loss": 0.85, "step": 21318 }, { - "epoch": 0.6049659477866062, + "epoch": 0.6041259315934144, "grad_norm": 0.0, - "learning_rate": 7.12756564012112e-06, - "loss": 0.854, + "learning_rate": 7.153467222343386e-06, + "loss": 0.9477, "step": 21319 }, { - "epoch": 0.604994324631101, + "epoch": 0.6041542690356768, "grad_norm": 0.0, - "learning_rate": 7.126685306328526e-06, - "loss": 0.8322, + "learning_rate": 7.1525874135151204e-06, + "loss": 0.8734, "step": 21320 }, { - "epoch": 0.6050227014755959, + "epoch": 0.6041826064779393, "grad_norm": 0.0, - "learning_rate": 7.12580499680695e-06, - "loss": 0.7535, + "learning_rate": 7.151707628671662e-06, + "loss": 0.8113, "step": 21321 }, { - "epoch": 0.6050510783200908, + "epoch": 0.6042109439202018, "grad_norm": 0.0, - "learning_rate": 7.124924711563826e-06, - "loss": 0.8298, + "learning_rate": 7.15082786782043e-06, + "loss": 0.8286, "step": 21322 }, { - "epoch": 0.6050794551645857, + "epoch": 0.6042392813624642, "grad_norm": 0.0, - "learning_rate": 7.124044450606595e-06, - "loss": 0.9696, + "learning_rate": 7.1499481309688336e-06, + "loss": 0.972, "step": 21323 }, { - "epoch": 0.6051078320090806, + "epoch": 0.6042676188047267, "grad_norm": 0.0, - "learning_rate": 7.1231642139426904e-06, - "loss": 0.8212, + "learning_rate": 7.149068418124281e-06, + "loss": 0.821, "step": 21324 }, { - "epoch": 0.6051362088535754, + "epoch": 0.6042959562469892, "grad_norm": 0.0, - "learning_rate": 7.122284001579546e-06, - "loss": 0.8362, + "learning_rate": 7.148188729294188e-06, + "loss": 0.9415, "step": 21325 }, { - "epoch": 0.6051645856980704, + "epoch": 0.6043242936892516, "grad_norm": 0.0, - "learning_rate": 7.121403813524596e-06, - "loss": 0.8478, + "learning_rate": 7.1473090644859555e-06, + "loss": 0.9353, "step": 21326 }, { - "epoch": 0.6051929625425653, + "epoch": 0.604352631131514, "grad_norm": 0.0, - "learning_rate": 7.1205236497852796e-06, - "loss": 0.8035, + "learning_rate": 7.146429423706998e-06, + "loss": 0.742, "step": 21327 }, { - "epoch": 0.6052213393870601, + "epoch": 0.6043809685737765, "grad_norm": 0.0, - "learning_rate": 7.119643510369029e-06, - "loss": 0.9833, + "learning_rate": 7.14554980696473e-06, + "loss": 0.8657, "step": 21328 }, { - "epoch": 0.6052497162315551, + "epoch": 0.604409306016039, "grad_norm": 0.0, - "learning_rate": 7.118763395283277e-06, - "loss": 0.8063, + "learning_rate": 7.144670214266551e-06, + "loss": 0.8691, "step": 21329 }, { - "epoch": 0.60527809307605, + "epoch": 0.6044376434583014, "grad_norm": 0.0, - "learning_rate": 7.1178833045354625e-06, - "loss": 0.7915, + "learning_rate": 7.143790645619875e-06, + "loss": 0.8381, "step": 21330 }, { - "epoch": 0.6053064699205448, + "epoch": 0.6044659809005639, "grad_norm": 0.0, - "learning_rate": 7.117003238133018e-06, - "loss": 0.8024, + "learning_rate": 7.142911101032114e-06, + "loss": 0.9166, "step": 21331 }, { - "epoch": 0.6053348467650397, + "epoch": 0.6044943183428264, "grad_norm": 0.0, - "learning_rate": 7.116123196083373e-06, - "loss": 0.8358, + "learning_rate": 7.142031580510671e-06, + "loss": 0.8548, "step": 21332 }, { - "epoch": 0.6053632236095347, + "epoch": 0.6045226557850888, "grad_norm": 0.0, - "learning_rate": 7.115243178393965e-06, - "loss": 0.7629, + "learning_rate": 7.141152084062962e-06, + "loss": 0.9295, "step": 21333 }, { - "epoch": 0.6053916004540295, + "epoch": 0.6045509932273513, "grad_norm": 0.0, - "learning_rate": 7.1143631850722305e-06, - "loss": 0.7326, + "learning_rate": 7.140272611696386e-06, + "loss": 0.782, "step": 21334 }, { - "epoch": 0.6054199772985244, + "epoch": 0.6045793306696138, "grad_norm": 0.0, - "learning_rate": 7.1134832161256004e-06, - "loss": 0.8188, + "learning_rate": 7.139393163418355e-06, + "loss": 0.8457, "step": 21335 }, { - "epoch": 0.6054483541430193, + "epoch": 0.6046076681118763, "grad_norm": 0.0, - "learning_rate": 7.112603271561505e-06, - "loss": 0.8048, + "learning_rate": 7.138513739236281e-06, + "loss": 0.8552, "step": 21336 }, { - "epoch": 0.6054767309875142, + "epoch": 0.6046360055541387, "grad_norm": 0.0, - "learning_rate": 7.111723351387381e-06, - "loss": 0.7927, + "learning_rate": 7.137634339157566e-06, + "loss": 0.7963, "step": 21337 }, { - "epoch": 0.6055051078320091, + "epoch": 0.6046643429964011, "grad_norm": 0.0, - "learning_rate": 7.110843455610661e-06, - "loss": 0.8793, + "learning_rate": 7.136754963189625e-06, + "loss": 0.7577, "step": 21338 }, { - "epoch": 0.6055334846765039, + "epoch": 0.6046926804386636, "grad_norm": 0.0, - "learning_rate": 7.109963584238772e-06, - "loss": 0.7774, + "learning_rate": 7.1358756113398545e-06, + "loss": 0.7423, "step": 21339 }, { - "epoch": 0.6055618615209989, + "epoch": 0.604721017880926, "grad_norm": 0.0, - "learning_rate": 7.109083737279154e-06, - "loss": 0.9086, + "learning_rate": 7.134996283615667e-06, + "loss": 0.8485, "step": 21340 }, { - "epoch": 0.6055902383654937, + "epoch": 0.6047493553231885, "grad_norm": 0.0, - "learning_rate": 7.108203914739236e-06, - "loss": 0.9783, + "learning_rate": 7.134116980024474e-06, + "loss": 0.8356, "step": 21341 }, { - "epoch": 0.6056186152099886, + "epoch": 0.604777692765451, "grad_norm": 0.0, - "learning_rate": 7.107324116626447e-06, - "loss": 0.7855, + "learning_rate": 7.133237700573676e-06, + "loss": 0.8393, "step": 21342 }, { - "epoch": 0.6056469920544836, + "epoch": 0.6048060302077134, "grad_norm": 0.0, - "learning_rate": 7.106444342948224e-06, - "loss": 0.7524, + "learning_rate": 7.132358445270679e-06, + "loss": 0.8674, "step": 21343 }, { - "epoch": 0.6056753688989784, + "epoch": 0.6048343676499759, "grad_norm": 0.0, - "learning_rate": 7.1055645937119956e-06, - "loss": 0.8112, + "learning_rate": 7.131479214122894e-06, + "loss": 0.923, "step": 21344 }, { - "epoch": 0.6057037457434733, + "epoch": 0.6048627050922384, "grad_norm": 0.0, - "learning_rate": 7.10468486892519e-06, - "loss": 0.8678, + "learning_rate": 7.130600007137724e-06, + "loss": 0.8161, "step": 21345 }, { - "epoch": 0.6057321225879683, + "epoch": 0.6048910425345009, "grad_norm": 0.0, - "learning_rate": 7.103805168595244e-06, - "loss": 0.708, + "learning_rate": 7.129720824322579e-06, + "loss": 0.7991, "step": 21346 }, { - "epoch": 0.6057604994324631, + "epoch": 0.6049193799767633, "grad_norm": 0.0, - "learning_rate": 7.102925492729587e-06, - "loss": 0.9151, + "learning_rate": 7.128841665684856e-06, + "loss": 0.7769, "step": 21347 }, { - "epoch": 0.605788876276958, + "epoch": 0.6049477174190258, "grad_norm": 0.0, - "learning_rate": 7.102045841335643e-06, - "loss": 0.7823, + "learning_rate": 7.1279625312319675e-06, + "loss": 0.8516, "step": 21348 }, { - "epoch": 0.6058172531214528, + "epoch": 0.6049760548612882, "grad_norm": 0.0, - "learning_rate": 7.101166214420851e-06, - "loss": 0.8808, + "learning_rate": 7.127083420971319e-06, + "loss": 0.9318, "step": 21349 }, { - "epoch": 0.6058456299659478, + "epoch": 0.6050043923035506, "grad_norm": 0.0, - "learning_rate": 7.10028661199264e-06, - "loss": 0.9509, + "learning_rate": 7.126204334910312e-06, + "loss": 0.8467, "step": 21350 }, { - "epoch": 0.6058740068104427, + "epoch": 0.6050327297458131, "grad_norm": 0.0, - "learning_rate": 7.099407034058438e-06, - "loss": 0.9219, + "learning_rate": 7.125325273056351e-06, + "loss": 0.9298, "step": 21351 }, { - "epoch": 0.6059023836549375, + "epoch": 0.6050610671880756, "grad_norm": 0.0, - "learning_rate": 7.098527480625676e-06, - "loss": 0.947, + "learning_rate": 7.124446235416849e-06, + "loss": 0.794, "step": 21352 }, { - "epoch": 0.6059307604994325, + "epoch": 0.6050894046303381, "grad_norm": 0.0, - "learning_rate": 7.097647951701779e-06, - "loss": 0.8027, + "learning_rate": 7.123567221999199e-06, + "loss": 0.8698, "step": 21353 }, { - "epoch": 0.6059591373439274, + "epoch": 0.6051177420726005, "grad_norm": 0.0, - "learning_rate": 7.096768447294185e-06, - "loss": 0.8353, + "learning_rate": 7.122688232810815e-06, + "loss": 0.832, "step": 21354 }, { - "epoch": 0.6059875141884222, + "epoch": 0.605146079514863, "grad_norm": 0.0, - "learning_rate": 7.095888967410316e-06, - "loss": 0.7769, + "learning_rate": 7.121809267859092e-06, + "loss": 0.849, "step": 21355 }, { - "epoch": 0.6060158910329171, + "epoch": 0.6051744169571255, "grad_norm": 0.0, - "learning_rate": 7.095009512057602e-06, - "loss": 0.9371, + "learning_rate": 7.120930327151439e-06, + "loss": 0.834, "step": 21356 }, { - "epoch": 0.6060442678774121, + "epoch": 0.6052027543993879, "grad_norm": 0.0, - "learning_rate": 7.094130081243475e-06, - "loss": 0.8512, + "learning_rate": 7.1200514106952586e-06, + "loss": 0.8884, "step": 21357 }, { - "epoch": 0.6060726447219069, + "epoch": 0.6052310918416504, "grad_norm": 0.0, - "learning_rate": 7.093250674975363e-06, - "loss": 0.8736, + "learning_rate": 7.1191725184979554e-06, + "loss": 0.8912, "step": 21358 }, { - "epoch": 0.6061010215664018, + "epoch": 0.6052594292839129, "grad_norm": 0.0, - "learning_rate": 7.092371293260691e-06, - "loss": 0.8368, + "learning_rate": 7.118293650566931e-06, + "loss": 0.9155, "step": 21359 }, { - "epoch": 0.6061293984108967, + "epoch": 0.6052877667261753, "grad_norm": 0.0, - "learning_rate": 7.0914919361068915e-06, - "loss": 0.8214, + "learning_rate": 7.117414806909593e-06, + "loss": 0.9268, "step": 21360 }, { - "epoch": 0.6061577752553916, + "epoch": 0.6053161041684377, "grad_norm": 0.0, - "learning_rate": 7.09061260352139e-06, - "loss": 0.8335, + "learning_rate": 7.1165359875333374e-06, + "loss": 0.8896, "step": 21361 }, { - "epoch": 0.6061861520998865, + "epoch": 0.6053444416107002, "grad_norm": 0.0, - "learning_rate": 7.0897332955116115e-06, - "loss": 0.7861, + "learning_rate": 7.115657192445571e-06, + "loss": 0.7774, "step": 21362 }, { - "epoch": 0.6062145289443814, + "epoch": 0.6053727790529627, "grad_norm": 0.0, - "learning_rate": 7.08885401208499e-06, - "loss": 0.7756, + "learning_rate": 7.114778421653693e-06, + "loss": 0.9168, "step": 21363 }, { - "epoch": 0.6062429057888763, + "epoch": 0.6054011164952251, "grad_norm": 0.0, - "learning_rate": 7.0879747532489455e-06, - "loss": 0.8982, + "learning_rate": 7.113899675165108e-06, + "loss": 0.8763, "step": 21364 }, { - "epoch": 0.6062712826333712, + "epoch": 0.6054294539374876, "grad_norm": 0.0, - "learning_rate": 7.087095519010914e-06, - "loss": 0.792, + "learning_rate": 7.113020952987222e-06, + "loss": 0.9931, "step": 21365 }, { - "epoch": 0.606299659477866, + "epoch": 0.6054577913797501, "grad_norm": 0.0, - "learning_rate": 7.086216309378315e-06, - "loss": 0.8905, + "learning_rate": 7.112142255127427e-06, + "loss": 0.8709, "step": 21366 }, { - "epoch": 0.606328036322361, + "epoch": 0.6054861288220125, "grad_norm": 0.0, - "learning_rate": 7.085337124358578e-06, - "loss": 0.8217, + "learning_rate": 7.111263581593137e-06, + "loss": 0.913, "step": 21367 }, { - "epoch": 0.6063564131668558, + "epoch": 0.605514466264275, "grad_norm": 0.0, - "learning_rate": 7.084457963959131e-06, - "loss": 0.9547, + "learning_rate": 7.1103849323917406e-06, + "loss": 0.9552, "step": 21368 }, { - "epoch": 0.6063847900113507, + "epoch": 0.6055428037065375, "grad_norm": 0.0, - "learning_rate": 7.0835788281873985e-06, - "loss": 0.8212, + "learning_rate": 7.109506307530646e-06, + "loss": 0.9327, "step": 21369 }, { - "epoch": 0.6064131668558457, + "epoch": 0.6055711411488, "grad_norm": 0.0, - "learning_rate": 7.082699717050804e-06, - "loss": 0.8744, + "learning_rate": 7.108627707017255e-06, + "loss": 0.8772, "step": 21370 }, { - "epoch": 0.6064415437003405, + "epoch": 0.6055994785910623, "grad_norm": 0.0, - "learning_rate": 7.0818206305567795e-06, - "loss": 0.8185, + "learning_rate": 7.107749130858963e-06, + "loss": 0.9135, "step": 21371 }, { - "epoch": 0.6064699205448354, + "epoch": 0.6056278160333248, "grad_norm": 0.0, - "learning_rate": 7.0809415687127455e-06, - "loss": 0.8585, + "learning_rate": 7.1068705790631766e-06, + "loss": 0.8642, "step": 21372 }, { - "epoch": 0.6064982973893303, + "epoch": 0.6056561534755873, "grad_norm": 0.0, - "learning_rate": 7.0800625315261285e-06, - "loss": 0.9144, + "learning_rate": 7.105992051637296e-06, + "loss": 0.7909, "step": 21373 }, { - "epoch": 0.6065266742338252, + "epoch": 0.6056844909178497, "grad_norm": 0.0, - "learning_rate": 7.079183519004355e-06, - "loss": 0.8592, + "learning_rate": 7.1051135485887146e-06, + "loss": 0.8729, "step": 21374 }, { - "epoch": 0.6065550510783201, + "epoch": 0.6057128283601122, "grad_norm": 0.0, - "learning_rate": 7.078304531154853e-06, - "loss": 0.8557, + "learning_rate": 7.1042350699248394e-06, + "loss": 0.9041, "step": 21375 }, { - "epoch": 0.6065834279228149, + "epoch": 0.6057411658023747, "grad_norm": 0.0, - "learning_rate": 7.077425567985039e-06, - "loss": 0.906, + "learning_rate": 7.103356615653065e-06, + "loss": 0.743, "step": 21376 }, { - "epoch": 0.6066118047673099, + "epoch": 0.6057695032446372, "grad_norm": 0.0, - "learning_rate": 7.076546629502345e-06, - "loss": 0.8928, + "learning_rate": 7.102478185780794e-06, + "loss": 0.9404, "step": 21377 }, { - "epoch": 0.6066401816118048, + "epoch": 0.6057978406868996, "grad_norm": 0.0, - "learning_rate": 7.075667715714194e-06, - "loss": 0.9694, + "learning_rate": 7.10159978031543e-06, + "loss": 0.9211, "step": 21378 }, { - "epoch": 0.6066685584562996, + "epoch": 0.6058261781291621, "grad_norm": 0.0, - "learning_rate": 7.074788826628005e-06, - "loss": 0.8969, + "learning_rate": 7.100721399264363e-06, + "loss": 0.8971, "step": 21379 }, { - "epoch": 0.6066969353007946, + "epoch": 0.6058545155714246, "grad_norm": 0.0, - "learning_rate": 7.0739099622512085e-06, - "loss": 0.8332, + "learning_rate": 7.0998430426349955e-06, + "loss": 0.8593, "step": 21380 }, { - "epoch": 0.6067253121452895, + "epoch": 0.6058828530136869, "grad_norm": 0.0, - "learning_rate": 7.073031122591227e-06, - "loss": 0.8382, + "learning_rate": 7.0989647104347306e-06, + "loss": 0.8298, "step": 21381 }, { - "epoch": 0.6067536889897843, + "epoch": 0.6059111904559494, "grad_norm": 0.0, - "learning_rate": 7.072152307655485e-06, - "loss": 0.8228, + "learning_rate": 7.0980864026709605e-06, + "loss": 0.8155, "step": 21382 }, { - "epoch": 0.6067820658342792, + "epoch": 0.6059395278982119, "grad_norm": 0.0, - "learning_rate": 7.0712735174514e-06, - "loss": 0.822, + "learning_rate": 7.097208119351089e-06, + "loss": 0.8816, "step": 21383 }, { - "epoch": 0.6068104426787742, + "epoch": 0.6059678653404744, "grad_norm": 0.0, - "learning_rate": 7.070394751986402e-06, - "loss": 0.8498, + "learning_rate": 7.096329860482507e-06, + "loss": 0.8392, "step": 21384 }, { - "epoch": 0.606838819523269, + "epoch": 0.6059962027827368, "grad_norm": 0.0, - "learning_rate": 7.069516011267912e-06, - "loss": 0.8364, + "learning_rate": 7.095451626072618e-06, + "loss": 0.8501, "step": 21385 }, { - "epoch": 0.6068671963677639, + "epoch": 0.6060245402249993, "grad_norm": 0.0, - "learning_rate": 7.068637295303349e-06, - "loss": 0.7627, + "learning_rate": 7.094573416128823e-06, + "loss": 0.9145, "step": 21386 }, { - "epoch": 0.6068955732122588, + "epoch": 0.6060528776672618, "grad_norm": 0.0, - "learning_rate": 7.067758604100141e-06, - "loss": 0.9527, + "learning_rate": 7.093695230658511e-06, + "loss": 0.7761, "step": 21387 }, { - "epoch": 0.6069239500567537, + "epoch": 0.6060812151095242, "grad_norm": 0.0, - "learning_rate": 7.066879937665708e-06, - "loss": 0.776, + "learning_rate": 7.092817069669082e-06, + "loss": 0.8777, "step": 21388 }, { - "epoch": 0.6069523269012486, + "epoch": 0.6061095525517867, "grad_norm": 0.0, - "learning_rate": 7.066001296007469e-06, - "loss": 0.8446, + "learning_rate": 7.0919389331679365e-06, + "loss": 0.8083, "step": 21389 }, { - "epoch": 0.6069807037457434, + "epoch": 0.6061378899940492, "grad_norm": 0.0, - "learning_rate": 7.065122679132853e-06, - "loss": 0.8067, + "learning_rate": 7.091060821162468e-06, + "loss": 0.8801, "step": 21390 }, { - "epoch": 0.6070090805902384, + "epoch": 0.6061662274363115, "grad_norm": 0.0, - "learning_rate": 7.0642440870492766e-06, - "loss": 0.8739, + "learning_rate": 7.0901827336600795e-06, + "loss": 0.8805, "step": 21391 }, { - "epoch": 0.6070374574347333, + "epoch": 0.606194564878574, "grad_norm": 0.0, - "learning_rate": 7.063365519764163e-06, - "loss": 0.9392, + "learning_rate": 7.089304670668158e-06, + "loss": 0.8949, "step": 21392 }, { - "epoch": 0.6070658342792281, + "epoch": 0.6062229023208365, "grad_norm": 0.0, - "learning_rate": 7.062486977284929e-06, - "loss": 0.8183, + "learning_rate": 7.088426632194103e-06, + "loss": 0.879, "step": 21393 }, { - "epoch": 0.6070942111237231, + "epoch": 0.606251239763099, "grad_norm": 0.0, - "learning_rate": 7.061608459618998e-06, - "loss": 0.9487, + "learning_rate": 7.087548618245314e-06, + "loss": 0.7933, "step": 21394 }, { - "epoch": 0.6071225879682179, + "epoch": 0.6062795772053614, "grad_norm": 0.0, - "learning_rate": 7.060729966773798e-06, - "loss": 0.7769, + "learning_rate": 7.086670628829182e-06, + "loss": 0.8806, "step": 21395 }, { - "epoch": 0.6071509648127128, + "epoch": 0.6063079146476239, "grad_norm": 0.0, - "learning_rate": 7.059851498756743e-06, - "loss": 0.9258, + "learning_rate": 7.0857926639531104e-06, + "loss": 0.8423, "step": 21396 }, { - "epoch": 0.6071793416572078, + "epoch": 0.6063362520898864, "grad_norm": 0.0, - "learning_rate": 7.058973055575253e-06, - "loss": 0.9033, + "learning_rate": 7.084914723624483e-06, + "loss": 0.8282, "step": 21397 }, { - "epoch": 0.6072077185017026, + "epoch": 0.6063645895321488, "grad_norm": 0.0, - "learning_rate": 7.058094637236752e-06, - "loss": 0.8617, + "learning_rate": 7.084036807850704e-06, + "loss": 0.7643, "step": 21398 }, { - "epoch": 0.6072360953461975, + "epoch": 0.6063929269744113, "grad_norm": 0.0, - "learning_rate": 7.0572162437486565e-06, - "loss": 0.9134, + "learning_rate": 7.083158916639169e-06, + "loss": 0.9376, "step": 21399 }, { - "epoch": 0.6072644721906924, + "epoch": 0.6064212644166738, "grad_norm": 0.0, - "learning_rate": 7.056337875118386e-06, - "loss": 0.9118, + "learning_rate": 7.082281049997265e-06, + "loss": 0.8425, "step": 21400 }, { - "epoch": 0.6072928490351873, + "epoch": 0.6064496018589363, "grad_norm": 0.0, - "learning_rate": 7.0554595313533655e-06, - "loss": 0.7776, + "learning_rate": 7.081403207932391e-06, + "loss": 0.8712, "step": 21401 }, { - "epoch": 0.6073212258796822, + "epoch": 0.6064779393011986, "grad_norm": 0.0, - "learning_rate": 7.054581212461009e-06, - "loss": 0.7936, + "learning_rate": 7.080525390451945e-06, + "loss": 0.8267, "step": 21402 }, { - "epoch": 0.607349602724177, + "epoch": 0.6065062767434611, "grad_norm": 0.0, - "learning_rate": 7.053702918448737e-06, - "loss": 0.8448, + "learning_rate": 7.079647597563315e-06, + "loss": 0.7535, "step": 21403 }, { - "epoch": 0.607377979568672, + "epoch": 0.6065346141857236, "grad_norm": 0.0, - "learning_rate": 7.052824649323969e-06, - "loss": 0.7991, + "learning_rate": 7.078769829273901e-06, + "loss": 0.7426, "step": 21404 }, { - "epoch": 0.6074063564131669, + "epoch": 0.606562951627986, "grad_norm": 0.0, - "learning_rate": 7.0519464050941256e-06, - "loss": 0.9001, + "learning_rate": 7.0778920855910905e-06, + "loss": 0.8039, "step": 21405 }, { - "epoch": 0.6074347332576617, + "epoch": 0.6065912890702485, "grad_norm": 0.0, - "learning_rate": 7.051068185766619e-06, - "loss": 0.7578, + "learning_rate": 7.077014366522279e-06, + "loss": 0.8559, "step": 21406 }, { - "epoch": 0.6074631101021566, + "epoch": 0.606619626512511, "grad_norm": 0.0, - "learning_rate": 7.050189991348877e-06, - "loss": 0.8686, + "learning_rate": 7.076136672074865e-06, + "loss": 0.8902, "step": 21407 }, { - "epoch": 0.6074914869466516, + "epoch": 0.6066479639547735, "grad_norm": 0.0, - "learning_rate": 7.04931182184831e-06, - "loss": 0.8129, + "learning_rate": 7.0752590022562325e-06, + "loss": 0.8293, "step": 21408 }, { - "epoch": 0.6075198637911464, + "epoch": 0.6066763013970359, "grad_norm": 0.0, - "learning_rate": 7.048433677272338e-06, - "loss": 0.8517, + "learning_rate": 7.074381357073782e-06, + "loss": 0.9153, "step": 21409 }, { - "epoch": 0.6075482406356413, + "epoch": 0.6067046388392984, "grad_norm": 0.0, - "learning_rate": 7.0475555576283796e-06, - "loss": 0.9618, + "learning_rate": 7.0735037365349065e-06, + "loss": 0.8671, "step": 21410 }, { - "epoch": 0.6075766174801362, + "epoch": 0.6067329762815609, "grad_norm": 0.0, - "learning_rate": 7.0466774629238525e-06, - "loss": 0.7693, + "learning_rate": 7.072626140646992e-06, + "loss": 0.7565, "step": 21411 }, { - "epoch": 0.6076049943246311, + "epoch": 0.6067613137238232, "grad_norm": 0.0, - "learning_rate": 7.045799393166174e-06, - "loss": 0.8985, + "learning_rate": 7.071748569417439e-06, + "loss": 0.8155, "step": 21412 }, { - "epoch": 0.607633371169126, + "epoch": 0.6067896511660857, "grad_norm": 0.0, - "learning_rate": 7.044921348362761e-06, - "loss": 0.8145, + "learning_rate": 7.070871022853632e-06, + "loss": 0.8442, "step": 21413 }, { - "epoch": 0.6076617480136209, + "epoch": 0.6068179886083482, "grad_norm": 0.0, - "learning_rate": 7.04404332852103e-06, - "loss": 0.9194, + "learning_rate": 7.069993500962964e-06, + "loss": 0.8918, "step": 21414 }, { - "epoch": 0.6076901248581158, + "epoch": 0.6068463260506106, "grad_norm": 0.0, - "learning_rate": 7.043165333648399e-06, - "loss": 0.9042, + "learning_rate": 7.069116003752831e-06, + "loss": 0.8155, "step": 21415 }, { - "epoch": 0.6077185017026107, + "epoch": 0.6068746634928731, "grad_norm": 0.0, - "learning_rate": 7.0422873637522825e-06, - "loss": 0.8492, + "learning_rate": 7.068238531230622e-06, + "loss": 0.9714, "step": 21416 }, { - "epoch": 0.6077468785471055, + "epoch": 0.6069030009351356, "grad_norm": 0.0, - "learning_rate": 7.041409418840096e-06, - "loss": 0.9067, + "learning_rate": 7.067361083403732e-06, + "loss": 0.7798, "step": 21417 }, { - "epoch": 0.6077752553916005, + "epoch": 0.6069313383773981, "grad_norm": 0.0, - "learning_rate": 7.04053149891926e-06, - "loss": 0.8027, + "learning_rate": 7.066483660279544e-06, + "loss": 0.8776, "step": 21418 }, { - "epoch": 0.6078036322360953, + "epoch": 0.6069596758196605, "grad_norm": 0.0, - "learning_rate": 7.039653603997187e-06, - "loss": 0.9025, + "learning_rate": 7.065606261865453e-06, + "loss": 0.8025, "step": 21419 }, { - "epoch": 0.6078320090805902, + "epoch": 0.606988013261923, "grad_norm": 0.0, - "learning_rate": 7.038775734081291e-06, - "loss": 0.8436, + "learning_rate": 7.064728888168853e-06, + "loss": 0.8838, "step": 21420 }, { - "epoch": 0.6078603859250852, + "epoch": 0.6070163507041855, "grad_norm": 0.0, - "learning_rate": 7.037897889178991e-06, - "loss": 0.8836, + "learning_rate": 7.063851539197128e-06, + "loss": 0.8818, "step": 21421 }, { - "epoch": 0.60788876276958, + "epoch": 0.6070446881464479, "grad_norm": 0.0, - "learning_rate": 7.037020069297702e-06, - "loss": 0.9933, + "learning_rate": 7.062974214957674e-06, + "loss": 0.8647, "step": 21422 }, { - "epoch": 0.6079171396140749, + "epoch": 0.6070730255887103, "grad_norm": 0.0, - "learning_rate": 7.036142274444834e-06, - "loss": 0.8848, + "learning_rate": 7.062096915457881e-06, + "loss": 0.8743, "step": 21423 }, { - "epoch": 0.6079455164585698, + "epoch": 0.6071013630309728, "grad_norm": 0.0, - "learning_rate": 7.0352645046278075e-06, - "loss": 0.7268, + "learning_rate": 7.061219640705135e-06, + "loss": 0.8383, "step": 21424 }, { - "epoch": 0.6079738933030647, + "epoch": 0.6071297004732353, "grad_norm": 0.0, - "learning_rate": 7.034386759854033e-06, - "loss": 0.9641, + "learning_rate": 7.060342390706829e-06, + "loss": 0.9049, "step": 21425 }, { - "epoch": 0.6080022701475596, + "epoch": 0.6071580379154977, "grad_norm": 0.0, - "learning_rate": 7.0335090401309295e-06, - "loss": 0.935, + "learning_rate": 7.059465165470347e-06, + "loss": 0.9825, "step": 21426 }, { - "epoch": 0.6080306469920544, + "epoch": 0.6071863753577602, "grad_norm": 0.0, - "learning_rate": 7.032631345465905e-06, - "loss": 0.8532, + "learning_rate": 7.058587965003083e-06, + "loss": 0.8144, "step": 21427 }, { - "epoch": 0.6080590238365494, + "epoch": 0.6072147128000227, "grad_norm": 0.0, - "learning_rate": 7.031753675866382e-06, - "loss": 0.9177, + "learning_rate": 7.057710789312427e-06, + "loss": 0.8834, "step": 21428 }, { - "epoch": 0.6080874006810443, + "epoch": 0.6072430502422851, "grad_norm": 0.0, - "learning_rate": 7.030876031339768e-06, - "loss": 0.9937, + "learning_rate": 7.056833638405762e-06, + "loss": 0.8844, "step": 21429 }, { - "epoch": 0.6081157775255391, + "epoch": 0.6072713876845476, "grad_norm": 0.0, - "learning_rate": 7.029998411893475e-06, - "loss": 0.8699, + "learning_rate": 7.05595651229048e-06, + "loss": 0.8864, "step": 21430 }, { - "epoch": 0.6081441543700341, + "epoch": 0.6072997251268101, "grad_norm": 0.0, - "learning_rate": 7.0291208175349206e-06, - "loss": 0.847, + "learning_rate": 7.055079410973975e-06, + "loss": 0.925, "step": 21431 }, { - "epoch": 0.608172531214529, + "epoch": 0.6073280625690726, "grad_norm": 0.0, - "learning_rate": 7.0282432482715165e-06, - "loss": 0.8461, + "learning_rate": 7.0542023344636255e-06, + "loss": 0.8743, "step": 21432 }, { - "epoch": 0.6082009080590238, + "epoch": 0.607356400011335, "grad_norm": 0.0, - "learning_rate": 7.027365704110676e-06, - "loss": 0.9038, + "learning_rate": 7.053325282766826e-06, + "loss": 0.8851, "step": 21433 }, { - "epoch": 0.6082292849035187, + "epoch": 0.6073847374535974, "grad_norm": 0.0, - "learning_rate": 7.026488185059808e-06, - "loss": 0.8497, + "learning_rate": 7.052448255890958e-06, + "loss": 0.8377, "step": 21434 }, { - "epoch": 0.6082576617480137, + "epoch": 0.6074130748958599, "grad_norm": 0.0, - "learning_rate": 7.025610691126331e-06, - "loss": 0.9485, + "learning_rate": 7.051571253843415e-06, + "loss": 0.8511, "step": 21435 }, { - "epoch": 0.6082860385925085, + "epoch": 0.6074414123381223, "grad_norm": 0.0, - "learning_rate": 7.024733222317655e-06, - "loss": 0.9268, + "learning_rate": 7.050694276631584e-06, + "loss": 0.8428, "step": 21436 }, { - "epoch": 0.6083144154370034, + "epoch": 0.6074697497803848, "grad_norm": 0.0, - "learning_rate": 7.023855778641186e-06, - "loss": 0.7743, + "learning_rate": 7.049817324262848e-06, + "loss": 0.8754, "step": 21437 }, { - "epoch": 0.6083427922814983, + "epoch": 0.6074980872226473, "grad_norm": 0.0, - "learning_rate": 7.022978360104347e-06, - "loss": 0.8722, + "learning_rate": 7.048940396744596e-06, + "loss": 0.9609, "step": 21438 }, { - "epoch": 0.6083711691259932, + "epoch": 0.6075264246649097, "grad_norm": 0.0, - "learning_rate": 7.0221009667145405e-06, - "loss": 0.9257, + "learning_rate": 7.048063494084218e-06, + "loss": 0.8513, "step": 21439 }, { - "epoch": 0.6083995459704881, + "epoch": 0.6075547621071722, "grad_norm": 0.0, - "learning_rate": 7.0212235984791785e-06, - "loss": 0.9016, + "learning_rate": 7.047186616289095e-06, + "loss": 0.886, "step": 21440 }, { - "epoch": 0.6084279228149829, + "epoch": 0.6075830995494347, "grad_norm": 0.0, - "learning_rate": 7.0203462554056755e-06, - "loss": 0.8693, + "learning_rate": 7.046309763366617e-06, + "loss": 0.9509, "step": 21441 }, { - "epoch": 0.6084562996594779, + "epoch": 0.6076114369916972, "grad_norm": 0.0, - "learning_rate": 7.019468937501445e-06, - "loss": 0.7343, + "learning_rate": 7.0454329353241655e-06, + "loss": 0.8664, "step": 21442 }, { - "epoch": 0.6084846765039728, + "epoch": 0.6076397744339596, "grad_norm": 0.0, - "learning_rate": 7.018591644773893e-06, - "loss": 0.8316, + "learning_rate": 7.0445561321691304e-06, + "loss": 0.8564, "step": 21443 }, { - "epoch": 0.6085130533484676, + "epoch": 0.607668111876222, "grad_norm": 0.0, - "learning_rate": 7.017714377230432e-06, - "loss": 0.8506, + "learning_rate": 7.043679353908901e-06, + "loss": 0.9538, "step": 21444 }, { - "epoch": 0.6085414301929626, + "epoch": 0.6076964493184845, "grad_norm": 0.0, - "learning_rate": 7.016837134878471e-06, - "loss": 0.7858, + "learning_rate": 7.042802600550853e-06, + "loss": 0.8476, "step": 21445 }, { - "epoch": 0.6085698070374574, + "epoch": 0.6077247867607469, "grad_norm": 0.0, - "learning_rate": 7.0159599177254215e-06, - "loss": 0.8649, + "learning_rate": 7.04192587210238e-06, + "loss": 0.9415, "step": 21446 }, { - "epoch": 0.6085981838819523, + "epoch": 0.6077531242030094, "grad_norm": 0.0, - "learning_rate": 7.01508272577869e-06, - "loss": 0.7576, + "learning_rate": 7.041049168570862e-06, + "loss": 0.8673, "step": 21447 }, { - "epoch": 0.6086265607264473, + "epoch": 0.6077814616452719, "grad_norm": 0.0, - "learning_rate": 7.014205559045692e-06, - "loss": 0.8705, + "learning_rate": 7.040172489963683e-06, + "loss": 0.8533, "step": 21448 }, { - "epoch": 0.6086549375709421, + "epoch": 0.6078097990875344, "grad_norm": 0.0, - "learning_rate": 7.013328417533834e-06, - "loss": 0.9221, + "learning_rate": 7.039295836288238e-06, + "loss": 0.9504, "step": 21449 }, { - "epoch": 0.608683314415437, + "epoch": 0.6078381365297968, "grad_norm": 0.0, - "learning_rate": 7.012451301250523e-06, - "loss": 0.7613, + "learning_rate": 7.038419207551896e-06, + "loss": 0.8603, "step": 21450 }, { - "epoch": 0.6087116912599319, + "epoch": 0.6078664739720593, "grad_norm": 0.0, - "learning_rate": 7.011574210203173e-06, - "loss": 0.9301, + "learning_rate": 7.037542603762051e-06, + "loss": 0.8724, "step": 21451 }, { - "epoch": 0.6087400681044268, + "epoch": 0.6078948114143218, "grad_norm": 0.0, - "learning_rate": 7.0106971443991874e-06, - "loss": 0.9135, + "learning_rate": 7.0366660249260885e-06, + "loss": 0.8746, "step": 21452 }, { - "epoch": 0.6087684449489217, + "epoch": 0.6079231488565842, "grad_norm": 0.0, - "learning_rate": 7.0098201038459775e-06, - "loss": 0.878, + "learning_rate": 7.0357894710513845e-06, + "loss": 0.7957, "step": 21453 }, { - "epoch": 0.6087968217934165, + "epoch": 0.6079514862988467, "grad_norm": 0.0, - "learning_rate": 7.008943088550954e-06, - "loss": 0.946, + "learning_rate": 7.034912942145329e-06, + "loss": 0.675, "step": 21454 }, { - "epoch": 0.6088251986379115, + "epoch": 0.6079798237411091, "grad_norm": 0.0, - "learning_rate": 7.008066098521522e-06, - "loss": 0.9799, + "learning_rate": 7.034036438215299e-06, + "loss": 0.8675, "step": 21455 }, { - "epoch": 0.6088535754824064, + "epoch": 0.6080081611833716, "grad_norm": 0.0, - "learning_rate": 7.007189133765086e-06, - "loss": 0.8891, + "learning_rate": 7.033159959268683e-06, + "loss": 0.8449, "step": 21456 }, { - "epoch": 0.6088819523269012, + "epoch": 0.608036498625634, "grad_norm": 0.0, - "learning_rate": 7.006312194289061e-06, - "loss": 0.8754, + "learning_rate": 7.032283505312865e-06, + "loss": 0.8377, "step": 21457 }, { - "epoch": 0.6089103291713961, + "epoch": 0.6080648360678965, "grad_norm": 0.0, - "learning_rate": 7.005435280100849e-06, - "loss": 0.8727, + "learning_rate": 7.0314070763552236e-06, + "loss": 0.8846, "step": 21458 }, { - "epoch": 0.6089387060158911, + "epoch": 0.608093173510159, "grad_norm": 0.0, - "learning_rate": 7.0045583912078655e-06, - "loss": 0.9204, + "learning_rate": 7.0305306724031396e-06, + "loss": 0.9116, "step": 21459 }, { - "epoch": 0.6089670828603859, + "epoch": 0.6081215109524214, "grad_norm": 0.0, - "learning_rate": 7.003681527617508e-06, - "loss": 0.9112, + "learning_rate": 7.029654293464004e-06, + "loss": 0.7807, "step": 21460 }, { - "epoch": 0.6089954597048808, + "epoch": 0.6081498483946839, "grad_norm": 0.0, - "learning_rate": 7.002804689337188e-06, - "loss": 0.8666, + "learning_rate": 7.028777939545189e-06, + "loss": 0.8307, "step": 21461 }, { - "epoch": 0.6090238365493758, + "epoch": 0.6081781858369464, "grad_norm": 0.0, - "learning_rate": 7.001927876374313e-06, - "loss": 0.891, + "learning_rate": 7.0279016106540846e-06, + "loss": 0.8018, "step": 21462 }, { - "epoch": 0.6090522133938706, + "epoch": 0.6082065232792088, "grad_norm": 0.0, - "learning_rate": 7.001051088736286e-06, - "loss": 0.8149, + "learning_rate": 7.027025306798065e-06, + "loss": 0.9066, "step": 21463 }, { - "epoch": 0.6090805902383655, + "epoch": 0.6082348607214713, "grad_norm": 0.0, - "learning_rate": 7.000174326430515e-06, - "loss": 0.821, + "learning_rate": 7.0261490279845145e-06, + "loss": 0.8571, "step": 21464 }, { - "epoch": 0.6091089670828603, + "epoch": 0.6082631981637338, "grad_norm": 0.0, - "learning_rate": 6.999297589464409e-06, - "loss": 0.8346, + "learning_rate": 7.025272774220821e-06, + "loss": 0.8177, "step": 21465 }, { - "epoch": 0.6091373439273553, + "epoch": 0.6082915356059962, "grad_norm": 0.0, - "learning_rate": 6.9984208778453685e-06, - "loss": 0.8101, + "learning_rate": 7.024396545514354e-06, + "loss": 0.8282, "step": 21466 }, { - "epoch": 0.6091657207718502, + "epoch": 0.6083198730482586, "grad_norm": 0.0, - "learning_rate": 6.997544191580803e-06, - "loss": 0.8327, + "learning_rate": 7.0235203418725004e-06, + "loss": 0.8513, "step": 21467 }, { - "epoch": 0.609194097616345, + "epoch": 0.6083482104905211, "grad_norm": 0.0, - "learning_rate": 6.996667530678116e-06, - "loss": 0.8116, + "learning_rate": 7.022644163302641e-06, + "loss": 0.8175, "step": 21468 }, { - "epoch": 0.60922247446084, + "epoch": 0.6083765479327836, "grad_norm": 0.0, - "learning_rate": 6.9957908951447136e-06, - "loss": 0.8978, + "learning_rate": 7.021768009812155e-06, + "loss": 0.7902, "step": 21469 }, { - "epoch": 0.6092508513053349, + "epoch": 0.608404885375046, "grad_norm": 0.0, - "learning_rate": 6.9949142849880015e-06, - "loss": 0.8051, + "learning_rate": 7.020891881408427e-06, + "loss": 0.8102, "step": 21470 }, { - "epoch": 0.6092792281498297, + "epoch": 0.6084332228173085, "grad_norm": 0.0, - "learning_rate": 6.994037700215378e-06, - "loss": 0.7544, + "learning_rate": 7.0200157780988275e-06, + "loss": 0.844, "step": 21471 }, { - "epoch": 0.6093076049943247, + "epoch": 0.608461560259571, "grad_norm": 0.0, - "learning_rate": 6.993161140834259e-06, - "loss": 0.7725, + "learning_rate": 7.019139699890743e-06, + "loss": 0.8751, "step": 21472 }, { - "epoch": 0.6093359818388195, + "epoch": 0.6084898977018335, "grad_norm": 0.0, - "learning_rate": 6.9922846068520425e-06, - "loss": 0.8087, + "learning_rate": 7.018263646791555e-06, + "loss": 0.797, "step": 21473 }, { - "epoch": 0.6093643586833144, + "epoch": 0.6085182351440959, "grad_norm": 0.0, - "learning_rate": 6.991408098276134e-06, - "loss": 0.8629, + "learning_rate": 7.017387618808634e-06, + "loss": 0.8528, "step": 21474 }, { - "epoch": 0.6093927355278093, + "epoch": 0.6085465725863584, "grad_norm": 0.0, - "learning_rate": 6.990531615113934e-06, - "loss": 0.8734, + "learning_rate": 7.016511615949371e-06, + "loss": 0.9619, "step": 21475 }, { - "epoch": 0.6094211123723042, + "epoch": 0.6085749100286209, "grad_norm": 0.0, - "learning_rate": 6.9896551573728495e-06, - "loss": 0.8142, + "learning_rate": 7.015635638221134e-06, + "loss": 0.946, "step": 21476 }, { - "epoch": 0.6094494892167991, + "epoch": 0.6086032474708832, "grad_norm": 0.0, - "learning_rate": 6.988778725060285e-06, - "loss": 0.7237, + "learning_rate": 7.0147596856313076e-06, + "loss": 0.8119, "step": 21477 }, { - "epoch": 0.609477866061294, + "epoch": 0.6086315849131457, "grad_norm": 0.0, - "learning_rate": 6.987902318183639e-06, - "loss": 0.841, + "learning_rate": 7.013883758187271e-06, + "loss": 0.8677, "step": 21478 }, { - "epoch": 0.6095062429057889, + "epoch": 0.6086599223554082, "grad_norm": 0.0, - "learning_rate": 6.98702593675032e-06, - "loss": 0.849, + "learning_rate": 7.013007855896396e-06, + "loss": 0.8141, "step": 21479 }, { - "epoch": 0.6095346197502838, + "epoch": 0.6086882597976707, "grad_norm": 0.0, - "learning_rate": 6.986149580767727e-06, - "loss": 0.7331, + "learning_rate": 7.012131978766067e-06, + "loss": 0.9112, "step": 21480 }, { - "epoch": 0.6095629965947786, + "epoch": 0.6087165972399331, "grad_norm": 0.0, - "learning_rate": 6.9852732502432645e-06, - "loss": 0.7743, + "learning_rate": 7.01125612680366e-06, + "loss": 0.9142, "step": 21481 }, { - "epoch": 0.6095913734392735, + "epoch": 0.6087449346821956, "grad_norm": 0.0, - "learning_rate": 6.984396945184335e-06, - "loss": 0.9094, + "learning_rate": 7.010380300016553e-06, + "loss": 0.8057, "step": 21482 }, { - "epoch": 0.6096197502837685, + "epoch": 0.6087732721244581, "grad_norm": 0.0, - "learning_rate": 6.98352066559834e-06, - "loss": 0.8539, + "learning_rate": 7.009504498412125e-06, + "loss": 0.8746, "step": 21483 }, { - "epoch": 0.6096481271282633, + "epoch": 0.6088016095667205, "grad_norm": 0.0, - "learning_rate": 6.9826444114926785e-06, - "loss": 0.8852, + "learning_rate": 7.008628721997747e-06, + "loss": 0.879, "step": 21484 }, { - "epoch": 0.6096765039727582, + "epoch": 0.608829947008983, "grad_norm": 0.0, - "learning_rate": 6.9817681828747595e-06, - "loss": 0.9469, + "learning_rate": 7.0077529707808e-06, + "loss": 0.8701, "step": 21485 }, { - "epoch": 0.6097048808172532, + "epoch": 0.6088582844512455, "grad_norm": 0.0, - "learning_rate": 6.980891979751974e-06, - "loss": 0.9614, + "learning_rate": 7.006877244768664e-06, + "loss": 0.8544, "step": 21486 }, { - "epoch": 0.609733257661748, + "epoch": 0.6088866218935078, "grad_norm": 0.0, - "learning_rate": 6.980015802131735e-06, - "loss": 0.7786, + "learning_rate": 7.00600154396871e-06, + "loss": 0.9196, "step": 21487 }, { - "epoch": 0.6097616345062429, + "epoch": 0.6089149593357703, "grad_norm": 0.0, - "learning_rate": 6.979139650021436e-06, - "loss": 0.9517, + "learning_rate": 7.005125868388316e-06, + "loss": 0.834, "step": 21488 }, { - "epoch": 0.6097900113507378, + "epoch": 0.6089432967780328, "grad_norm": 0.0, - "learning_rate": 6.978263523428483e-06, - "loss": 0.8544, + "learning_rate": 7.0042502180348635e-06, + "loss": 0.8634, "step": 21489 }, { - "epoch": 0.6098183881952327, + "epoch": 0.6089716342202953, "grad_norm": 0.0, - "learning_rate": 6.977387422360273e-06, - "loss": 0.8748, + "learning_rate": 7.00337459291572e-06, + "loss": 0.9182, "step": 21490 }, { - "epoch": 0.6098467650397276, + "epoch": 0.6089999716625577, "grad_norm": 0.0, - "learning_rate": 6.976511346824205e-06, - "loss": 0.9496, + "learning_rate": 7.002498993038267e-06, + "loss": 0.8267, "step": 21491 }, { - "epoch": 0.6098751418842224, + "epoch": 0.6090283091048202, "grad_norm": 0.0, - "learning_rate": 6.975635296827685e-06, - "loss": 0.769, + "learning_rate": 7.001623418409878e-06, + "loss": 0.7981, "step": 21492 }, { - "epoch": 0.6099035187287174, + "epoch": 0.6090566465470827, "grad_norm": 0.0, - "learning_rate": 6.974759272378109e-06, - "loss": 0.8872, + "learning_rate": 7.000747869037927e-06, + "loss": 0.8086, "step": 21493 }, { - "epoch": 0.6099318955732123, + "epoch": 0.6090849839893451, "grad_norm": 0.0, - "learning_rate": 6.973883273482874e-06, - "loss": 0.7651, + "learning_rate": 6.999872344929791e-06, + "loss": 0.8396, "step": 21494 }, { - "epoch": 0.6099602724177071, + "epoch": 0.6091133214316076, "grad_norm": 0.0, - "learning_rate": 6.973007300149386e-06, - "loss": 0.8373, + "learning_rate": 6.9989968460928425e-06, + "loss": 0.7802, "step": 21495 }, { - "epoch": 0.6099886492622021, + "epoch": 0.6091416588738701, "grad_norm": 0.0, - "learning_rate": 6.972131352385042e-06, - "loss": 0.8526, + "learning_rate": 6.998121372534459e-06, + "loss": 0.8696, "step": 21496 }, { - "epoch": 0.610017026106697, + "epoch": 0.6091699963161326, "grad_norm": 0.0, - "learning_rate": 6.971255430197238e-06, - "loss": 0.8986, + "learning_rate": 6.997245924262018e-06, + "loss": 0.825, "step": 21497 }, { - "epoch": 0.6100454029511918, + "epoch": 0.6091983337583949, "grad_norm": 0.0, - "learning_rate": 6.9703795335933775e-06, - "loss": 0.896, + "learning_rate": 6.996370501282885e-06, + "loss": 0.9277, "step": 21498 }, { - "epoch": 0.6100737797956867, + "epoch": 0.6092266712006574, "grad_norm": 0.0, - "learning_rate": 6.969503662580858e-06, - "loss": 0.9045, + "learning_rate": 6.995495103604442e-06, + "loss": 0.9522, "step": 21499 }, { - "epoch": 0.6101021566401816, + "epoch": 0.6092550086429199, "grad_norm": 0.0, - "learning_rate": 6.9686278171670765e-06, - "loss": 0.8325, + "learning_rate": 6.994619731234056e-06, + "loss": 0.7384, "step": 21500 }, { - "epoch": 0.6101305334846765, + "epoch": 0.6092833460851823, "grad_norm": 0.0, - "learning_rate": 6.96775199735943e-06, - "loss": 0.99, + "learning_rate": 6.993744384179103e-06, + "loss": 0.9238, "step": 21501 }, { - "epoch": 0.6101589103291714, + "epoch": 0.6093116835274448, "grad_norm": 0.0, - "learning_rate": 6.966876203165317e-06, - "loss": 0.814, + "learning_rate": 6.992869062446963e-06, + "loss": 0.836, "step": 21502 }, { - "epoch": 0.6101872871736663, + "epoch": 0.6093400209697073, "grad_norm": 0.0, - "learning_rate": 6.966000434592142e-06, - "loss": 0.8884, + "learning_rate": 6.991993766045e-06, + "loss": 0.8551, "step": 21503 }, { - "epoch": 0.6102156640181612, + "epoch": 0.6093683584119698, "grad_norm": 0.0, - "learning_rate": 6.965124691647296e-06, - "loss": 0.9712, + "learning_rate": 6.991118494980591e-06, + "loss": 0.9189, "step": 21504 }, { - "epoch": 0.610244040862656, + "epoch": 0.6093966958542322, "grad_norm": 0.0, - "learning_rate": 6.964248974338177e-06, - "loss": 0.8665, + "learning_rate": 6.9902432492611065e-06, + "loss": 0.7758, "step": 21505 }, { - "epoch": 0.610272417707151, + "epoch": 0.6094250332964947, "grad_norm": 0.0, - "learning_rate": 6.963373282672185e-06, - "loss": 0.7423, + "learning_rate": 6.989368028893921e-06, + "loss": 0.7625, "step": 21506 }, { - "epoch": 0.6103007945516459, + "epoch": 0.6094533707387572, "grad_norm": 0.0, - "learning_rate": 6.962497616656716e-06, - "loss": 0.8393, + "learning_rate": 6.988492833886411e-06, + "loss": 0.9173, "step": 21507 }, { - "epoch": 0.6103291713961407, + "epoch": 0.6094817081810195, "grad_norm": 0.0, - "learning_rate": 6.961621976299163e-06, - "loss": 0.7869, + "learning_rate": 6.987617664245941e-06, + "loss": 0.8006, "step": 21508 }, { - "epoch": 0.6103575482406356, + "epoch": 0.609510045623282, "grad_norm": 0.0, - "learning_rate": 6.96074636160693e-06, - "loss": 0.7604, + "learning_rate": 6.9867425199798834e-06, + "loss": 0.916, "step": 21509 }, { - "epoch": 0.6103859250851306, + "epoch": 0.6095383830655445, "grad_norm": 0.0, - "learning_rate": 6.959870772587407e-06, - "loss": 0.8982, + "learning_rate": 6.985867401095618e-06, + "loss": 0.8531, "step": 21510 }, { - "epoch": 0.6104143019296254, + "epoch": 0.6095667205078069, "grad_norm": 0.0, - "learning_rate": 6.9589952092479906e-06, - "loss": 0.8823, + "learning_rate": 6.984992307600508e-06, + "loss": 0.8398, "step": 21511 }, { - "epoch": 0.6104426787741203, + "epoch": 0.6095950579500694, "grad_norm": 0.0, - "learning_rate": 6.958119671596081e-06, - "loss": 0.9207, + "learning_rate": 6.984117239501928e-06, + "loss": 0.851, "step": 21512 }, { - "epoch": 0.6104710556186153, + "epoch": 0.6096233953923319, "grad_norm": 0.0, - "learning_rate": 6.957244159639072e-06, - "loss": 0.8079, + "learning_rate": 6.983242196807246e-06, + "loss": 0.9086, "step": 21513 }, { - "epoch": 0.6104994324631101, + "epoch": 0.6096517328345944, "grad_norm": 0.0, - "learning_rate": 6.956368673384355e-06, - "loss": 0.9998, + "learning_rate": 6.982367179523836e-06, + "loss": 0.8586, "step": 21514 }, { - "epoch": 0.610527809307605, + "epoch": 0.6096800702768568, "grad_norm": 0.0, - "learning_rate": 6.95549321283933e-06, - "loss": 0.8422, + "learning_rate": 6.981492187659071e-06, + "loss": 0.7759, "step": 21515 }, { - "epoch": 0.6105561861520998, + "epoch": 0.6097084077191193, "grad_norm": 0.0, - "learning_rate": 6.954617778011392e-06, - "loss": 0.8752, + "learning_rate": 6.980617221220316e-06, + "loss": 0.9672, "step": 21516 }, { - "epoch": 0.6105845629965948, + "epoch": 0.6097367451613818, "grad_norm": 0.0, - "learning_rate": 6.95374236890793e-06, - "loss": 0.7756, + "learning_rate": 6.979742280214942e-06, + "loss": 0.7968, "step": 21517 }, { - "epoch": 0.6106129398410897, + "epoch": 0.6097650826036441, "grad_norm": 0.0, - "learning_rate": 6.952866985536347e-06, - "loss": 0.8356, + "learning_rate": 6.978867364650322e-06, + "loss": 0.8246, "step": 21518 }, { - "epoch": 0.6106413166855845, + "epoch": 0.6097934200459066, "grad_norm": 0.0, - "learning_rate": 6.951991627904032e-06, - "loss": 0.9219, + "learning_rate": 6.977992474533823e-06, + "loss": 0.91, "step": 21519 }, { - "epoch": 0.6106696935300795, + "epoch": 0.6098217574881691, "grad_norm": 0.0, - "learning_rate": 6.951116296018383e-06, - "loss": 0.9769, + "learning_rate": 6.977117609872819e-06, + "loss": 0.8582, "step": 21520 }, { - "epoch": 0.6106980703745744, + "epoch": 0.6098500949304316, "grad_norm": 0.0, - "learning_rate": 6.950240989886792e-06, - "loss": 0.8102, + "learning_rate": 6.976242770674673e-06, + "loss": 0.8267, "step": 21521 }, { - "epoch": 0.6107264472190692, + "epoch": 0.609878432372694, "grad_norm": 0.0, - "learning_rate": 6.9493657095166485e-06, - "loss": 0.9641, + "learning_rate": 6.9753679569467545e-06, + "loss": 0.8733, "step": 21522 }, { - "epoch": 0.6107548240635642, + "epoch": 0.6099067698149565, "grad_norm": 0.0, - "learning_rate": 6.9484904549153546e-06, - "loss": 0.7446, + "learning_rate": 6.974493168696441e-06, + "loss": 0.9104, "step": 21523 }, { - "epoch": 0.610783200908059, + "epoch": 0.609935107257219, "grad_norm": 0.0, - "learning_rate": 6.947615226090297e-06, - "loss": 0.8967, + "learning_rate": 6.973618405931091e-06, + "loss": 0.8302, "step": 21524 }, { - "epoch": 0.6108115777525539, + "epoch": 0.6099634446994814, "grad_norm": 0.0, - "learning_rate": 6.94674002304887e-06, - "loss": 0.9534, + "learning_rate": 6.972743668658075e-06, + "loss": 0.9308, "step": 21525 }, { - "epoch": 0.6108399545970488, + "epoch": 0.6099917821417439, "grad_norm": 0.0, - "learning_rate": 6.945864845798469e-06, - "loss": 0.8827, + "learning_rate": 6.971868956884767e-06, + "loss": 0.7569, "step": 21526 }, { - "epoch": 0.6108683314415437, + "epoch": 0.6100201195840064, "grad_norm": 0.0, - "learning_rate": 6.9449896943464844e-06, - "loss": 0.8079, + "learning_rate": 6.970994270618529e-06, + "loss": 0.7948, "step": 21527 }, { - "epoch": 0.6108967082860386, + "epoch": 0.6100484570262689, "grad_norm": 0.0, - "learning_rate": 6.944114568700308e-06, - "loss": 0.7892, + "learning_rate": 6.970119609866736e-06, + "loss": 0.7424, "step": 21528 }, { - "epoch": 0.6109250851305335, + "epoch": 0.6100767944685312, "grad_norm": 0.0, - "learning_rate": 6.943239468867334e-06, - "loss": 0.7919, + "learning_rate": 6.969244974636745e-06, + "loss": 0.8934, "step": 21529 }, { - "epoch": 0.6109534619750284, + "epoch": 0.6101051319107937, "grad_norm": 0.0, - "learning_rate": 6.9423643948549544e-06, - "loss": 0.8544, + "learning_rate": 6.9683703649359305e-06, + "loss": 0.8737, "step": 21530 }, { - "epoch": 0.6109818388195233, + "epoch": 0.6101334693530562, "grad_norm": 0.0, - "learning_rate": 6.941489346670558e-06, - "loss": 0.8255, + "learning_rate": 6.967495780771658e-06, + "loss": 0.8012, "step": 21531 }, { - "epoch": 0.6110102156640181, + "epoch": 0.6101618067953186, "grad_norm": 0.0, - "learning_rate": 6.940614324321537e-06, - "loss": 0.8627, + "learning_rate": 6.966621222151294e-06, + "loss": 0.7927, "step": 21532 }, { - "epoch": 0.611038592508513, + "epoch": 0.6101901442375811, "grad_norm": 0.0, - "learning_rate": 6.939739327815288e-06, - "loss": 0.8802, + "learning_rate": 6.96574668908221e-06, + "loss": 0.8698, "step": 21533 }, { - "epoch": 0.611066969353008, + "epoch": 0.6102184816798436, "grad_norm": 0.0, - "learning_rate": 6.938864357159198e-06, - "loss": 0.9267, + "learning_rate": 6.964872181571765e-06, + "loss": 0.8212, "step": 21534 }, { - "epoch": 0.6110953461975028, + "epoch": 0.610246819122106, "grad_norm": 0.0, - "learning_rate": 6.937989412360657e-06, - "loss": 0.8172, + "learning_rate": 6.963997699627327e-06, + "loss": 0.8726, "step": 21535 }, { - "epoch": 0.6111237230419977, + "epoch": 0.6102751565643685, "grad_norm": 0.0, - "learning_rate": 6.93711449342706e-06, - "loss": 0.7756, + "learning_rate": 6.963123243256269e-06, + "loss": 0.8454, "step": 21536 }, { - "epoch": 0.6111520998864927, + "epoch": 0.610303494006631, "grad_norm": 0.0, - "learning_rate": 6.936239600365793e-06, - "loss": 0.7947, + "learning_rate": 6.962248812465947e-06, + "loss": 0.7734, "step": 21537 }, { - "epoch": 0.6111804767309875, + "epoch": 0.6103318314488935, "grad_norm": 0.0, - "learning_rate": 6.935364733184247e-06, - "loss": 0.7931, + "learning_rate": 6.961374407263732e-06, + "loss": 0.8141, "step": 21538 }, { - "epoch": 0.6112088535754824, + "epoch": 0.6103601688911559, "grad_norm": 0.0, - "learning_rate": 6.934489891889813e-06, - "loss": 0.8264, + "learning_rate": 6.96050002765699e-06, + "loss": 0.8401, "step": 21539 }, { - "epoch": 0.6112372304199774, + "epoch": 0.6103885063334183, "grad_norm": 0.0, - "learning_rate": 6.933615076489882e-06, - "loss": 0.8849, + "learning_rate": 6.959625673653083e-06, + "loss": 0.9084, "step": 21540 }, { - "epoch": 0.6112656072644722, + "epoch": 0.6104168437756808, "grad_norm": 0.0, - "learning_rate": 6.932740286991843e-06, - "loss": 0.7395, + "learning_rate": 6.958751345259383e-06, + "loss": 0.8279, "step": 21541 }, { - "epoch": 0.6112939841089671, + "epoch": 0.6104451812179432, "grad_norm": 0.0, - "learning_rate": 6.931865523403082e-06, - "loss": 0.885, + "learning_rate": 6.9578770424832444e-06, + "loss": 0.8773, "step": 21542 }, { - "epoch": 0.6113223609534619, + "epoch": 0.6104735186602057, "grad_norm": 0.0, - "learning_rate": 6.930990785730992e-06, - "loss": 0.8394, + "learning_rate": 6.957002765332037e-06, + "loss": 0.8186, "step": 21543 }, { - "epoch": 0.6113507377979569, + "epoch": 0.6105018561024682, "grad_norm": 0.0, - "learning_rate": 6.930116073982962e-06, - "loss": 0.8496, + "learning_rate": 6.9561285138131285e-06, + "loss": 0.9145, "step": 21544 }, { - "epoch": 0.6113791146424518, + "epoch": 0.6105301935447307, "grad_norm": 0.0, - "learning_rate": 6.929241388166377e-06, - "loss": 0.9869, + "learning_rate": 6.955254287933877e-06, + "loss": 0.9124, "step": 21545 }, { - "epoch": 0.6114074914869466, + "epoch": 0.6105585309869931, "grad_norm": 0.0, - "learning_rate": 6.92836672828863e-06, - "loss": 0.833, + "learning_rate": 6.95438008770165e-06, + "loss": 0.8414, "step": 21546 }, { - "epoch": 0.6114358683314416, + "epoch": 0.6105868684292556, "grad_norm": 0.0, - "learning_rate": 6.927492094357108e-06, - "loss": 0.9192, + "learning_rate": 6.953505913123814e-06, + "loss": 0.8676, "step": 21547 }, { - "epoch": 0.6114642451759364, + "epoch": 0.6106152058715181, "grad_norm": 0.0, - "learning_rate": 6.926617486379194e-06, - "loss": 0.7984, + "learning_rate": 6.952631764207724e-06, + "loss": 0.9002, "step": 21548 }, { - "epoch": 0.6114926220204313, + "epoch": 0.6106435433137805, "grad_norm": 0.0, - "learning_rate": 6.925742904362281e-06, - "loss": 0.8514, + "learning_rate": 6.9517576409607545e-06, + "loss": 0.8399, "step": 21549 }, { - "epoch": 0.6115209988649262, + "epoch": 0.610671880756043, "grad_norm": 0.0, - "learning_rate": 6.924868348313759e-06, - "loss": 0.8217, + "learning_rate": 6.950883543390257e-06, + "loss": 0.802, "step": 21550 }, { - "epoch": 0.6115493757094211, + "epoch": 0.6107002181983054, "grad_norm": 0.0, - "learning_rate": 6.9239938182410126e-06, - "loss": 0.9314, + "learning_rate": 6.950009471503601e-06, + "loss": 0.8055, "step": 21551 }, { - "epoch": 0.611577752553916, + "epoch": 0.6107285556405678, "grad_norm": 0.0, - "learning_rate": 6.923119314151426e-06, - "loss": 0.8603, + "learning_rate": 6.949135425308147e-06, + "loss": 0.876, "step": 21552 }, { - "epoch": 0.6116061293984109, + "epoch": 0.6107568930828303, "grad_norm": 0.0, - "learning_rate": 6.922244836052392e-06, - "loss": 0.9512, + "learning_rate": 6.948261404811259e-06, + "loss": 0.9002, "step": 21553 }, { - "epoch": 0.6116345062429058, + "epoch": 0.6107852305250928, "grad_norm": 0.0, - "learning_rate": 6.9213703839512935e-06, - "loss": 0.7126, + "learning_rate": 6.947387410020296e-06, + "loss": 0.8119, "step": 21554 }, { - "epoch": 0.6116628830874007, + "epoch": 0.6108135679673553, "grad_norm": 0.0, - "learning_rate": 6.920495957855516e-06, - "loss": 0.8371, + "learning_rate": 6.946513440942628e-06, + "loss": 0.8452, "step": 21555 }, { - "epoch": 0.6116912599318955, + "epoch": 0.6108419054096177, "grad_norm": 0.0, - "learning_rate": 6.919621557772449e-06, - "loss": 1.0488, + "learning_rate": 6.945639497585608e-06, + "loss": 0.8885, "step": 21556 }, { - "epoch": 0.6117196367763905, + "epoch": 0.6108702428518802, "grad_norm": 0.0, - "learning_rate": 6.9187471837094774e-06, - "loss": 0.8683, + "learning_rate": 6.944765579956601e-06, + "loss": 0.9052, "step": 21557 }, { - "epoch": 0.6117480136208854, + "epoch": 0.6108985802941427, "grad_norm": 0.0, - "learning_rate": 6.917872835673984e-06, - "loss": 0.9533, + "learning_rate": 6.943891688062967e-06, + "loss": 0.858, "step": 21558 }, { - "epoch": 0.6117763904653802, + "epoch": 0.6109269177364051, "grad_norm": 0.0, - "learning_rate": 6.916998513673361e-06, - "loss": 0.8144, + "learning_rate": 6.943017821912068e-06, + "loss": 0.8437, "step": 21559 }, { - "epoch": 0.6118047673098751, + "epoch": 0.6109552551786676, "grad_norm": 0.0, - "learning_rate": 6.916124217714989e-06, - "loss": 0.7463, + "learning_rate": 6.942143981511269e-06, + "loss": 0.8914, "step": 21560 }, { - "epoch": 0.6118331441543701, + "epoch": 0.61098359262093, "grad_norm": 0.0, - "learning_rate": 6.915249947806253e-06, - "loss": 0.8373, + "learning_rate": 6.941270166867922e-06, + "loss": 0.8953, "step": 21561 }, { - "epoch": 0.6118615209988649, + "epoch": 0.6110119300631925, "grad_norm": 0.0, - "learning_rate": 6.914375703954541e-06, - "loss": 0.8905, + "learning_rate": 6.9403963779893975e-06, + "loss": 0.9315, "step": 21562 }, { - "epoch": 0.6118898978433598, + "epoch": 0.6110402675054549, "grad_norm": 0.0, - "learning_rate": 6.913501486167231e-06, - "loss": 0.868, + "learning_rate": 6.939522614883045e-06, + "loss": 0.8272, "step": 21563 }, { - "epoch": 0.6119182746878548, + "epoch": 0.6110686049477174, "grad_norm": 0.0, - "learning_rate": 6.912627294451719e-06, - "loss": 0.8889, + "learning_rate": 6.938648877556231e-06, + "loss": 0.8292, "step": 21564 }, { - "epoch": 0.6119466515323496, + "epoch": 0.6110969423899799, "grad_norm": 0.0, - "learning_rate": 6.911753128815382e-06, - "loss": 0.834, + "learning_rate": 6.937775166016316e-06, + "loss": 0.8121, "step": 21565 }, { - "epoch": 0.6119750283768445, + "epoch": 0.6111252798322423, "grad_norm": 0.0, - "learning_rate": 6.910878989265603e-06, - "loss": 0.8778, + "learning_rate": 6.9369014802706566e-06, + "loss": 0.778, "step": 21566 }, { - "epoch": 0.6120034052213393, + "epoch": 0.6111536172745048, "grad_norm": 0.0, - "learning_rate": 6.91000487580977e-06, - "loss": 0.9139, + "learning_rate": 6.936027820326613e-06, + "loss": 0.879, "step": 21567 }, { - "epoch": 0.6120317820658343, + "epoch": 0.6111819547167673, "grad_norm": 0.0, - "learning_rate": 6.909130788455265e-06, - "loss": 0.7812, + "learning_rate": 6.935154186191549e-06, + "loss": 0.8058, "step": 21568 }, { - "epoch": 0.6120601589103292, + "epoch": 0.6112102921590298, "grad_norm": 0.0, - "learning_rate": 6.908256727209469e-06, - "loss": 0.828, + "learning_rate": 6.934280577872814e-06, + "loss": 0.9384, "step": 21569 }, { - "epoch": 0.612088535754824, + "epoch": 0.6112386296012922, "grad_norm": 0.0, - "learning_rate": 6.907382692079769e-06, - "loss": 0.7914, + "learning_rate": 6.933406995377776e-06, + "loss": 0.8818, "step": 21570 }, { - "epoch": 0.612116912599319, + "epoch": 0.6112669670435547, "grad_norm": 0.0, - "learning_rate": 6.906508683073547e-06, - "loss": 0.9219, + "learning_rate": 6.932533438713787e-06, + "loss": 0.8597, "step": 21571 }, { - "epoch": 0.6121452894438139, + "epoch": 0.6112953044858171, "grad_norm": 0.0, - "learning_rate": 6.9056347001981825e-06, - "loss": 0.8656, + "learning_rate": 6.931659907888208e-06, + "loss": 0.9718, "step": 21572 }, { - "epoch": 0.6121736662883087, + "epoch": 0.6113236419280795, "grad_norm": 0.0, - "learning_rate": 6.9047607434610645e-06, - "loss": 0.8865, + "learning_rate": 6.930786402908401e-06, + "loss": 0.827, "step": 21573 }, { - "epoch": 0.6122020431328037, + "epoch": 0.611351979370342, "grad_norm": 0.0, - "learning_rate": 6.90388681286957e-06, - "loss": 0.8133, + "learning_rate": 6.929912923781716e-06, + "loss": 0.8101, "step": 21574 }, { - "epoch": 0.6122304199772985, + "epoch": 0.6113803168126045, "grad_norm": 0.0, - "learning_rate": 6.903012908431082e-06, - "loss": 0.8858, + "learning_rate": 6.929039470515513e-06, + "loss": 0.8827, "step": 21575 }, { - "epoch": 0.6122587968217934, + "epoch": 0.6114086542548669, "grad_norm": 0.0, - "learning_rate": 6.902139030152983e-06, - "loss": 0.8684, + "learning_rate": 6.928166043117157e-06, + "loss": 0.916, "step": 21576 }, { - "epoch": 0.6122871736662883, + "epoch": 0.6114369916971294, "grad_norm": 0.0, - "learning_rate": 6.901265178042657e-06, - "loss": 1.0041, + "learning_rate": 6.927292641593993e-06, + "loss": 0.9407, "step": 21577 }, { - "epoch": 0.6123155505107832, + "epoch": 0.6114653291393919, "grad_norm": 0.0, - "learning_rate": 6.900391352107478e-06, - "loss": 0.8051, + "learning_rate": 6.926419265953388e-06, + "loss": 0.9169, "step": 21578 }, { - "epoch": 0.6123439273552781, + "epoch": 0.6114936665816544, "grad_norm": 0.0, - "learning_rate": 6.899517552354836e-06, - "loss": 0.8818, + "learning_rate": 6.925545916202692e-06, + "loss": 0.8964, "step": 21579 }, { - "epoch": 0.612372304199773, + "epoch": 0.6115220040239168, "grad_norm": 0.0, - "learning_rate": 6.89864377879211e-06, - "loss": 0.7497, + "learning_rate": 6.924672592349264e-06, + "loss": 0.8304, "step": 21580 }, { - "epoch": 0.6124006810442679, + "epoch": 0.6115503414661793, "grad_norm": 0.0, - "learning_rate": 6.897770031426679e-06, - "loss": 0.7986, + "learning_rate": 6.923799294400466e-06, + "loss": 0.7581, "step": 21581 }, { - "epoch": 0.6124290578887628, + "epoch": 0.6115786789084418, "grad_norm": 0.0, - "learning_rate": 6.896896310265925e-06, - "loss": 0.899, + "learning_rate": 6.922926022363644e-06, + "loss": 0.8259, "step": 21582 }, { - "epoch": 0.6124574347332576, + "epoch": 0.6116070163507041, "grad_norm": 0.0, - "learning_rate": 6.896022615317224e-06, - "loss": 0.8756, + "learning_rate": 6.92205277624616e-06, + "loss": 0.8955, "step": 21583 }, { - "epoch": 0.6124858115777525, + "epoch": 0.6116353537929666, "grad_norm": 0.0, - "learning_rate": 6.895148946587962e-06, - "loss": 0.7413, + "learning_rate": 6.921179556055369e-06, + "loss": 0.8589, "step": 21584 }, { - "epoch": 0.6125141884222475, + "epoch": 0.6116636912352291, "grad_norm": 0.0, - "learning_rate": 6.894275304085517e-06, - "loss": 0.8698, + "learning_rate": 6.9203063617986235e-06, + "loss": 0.7011, "step": 21585 }, { - "epoch": 0.6125425652667423, + "epoch": 0.6116920286774916, "grad_norm": 0.0, - "learning_rate": 6.893401687817266e-06, - "loss": 0.8634, + "learning_rate": 6.919433193483287e-06, + "loss": 0.847, "step": 21586 }, { - "epoch": 0.6125709421112372, + "epoch": 0.611720366119754, "grad_norm": 0.0, - "learning_rate": 6.892528097790592e-06, - "loss": 0.8616, + "learning_rate": 6.918560051116703e-06, + "loss": 0.8343, "step": 21587 }, { - "epoch": 0.6125993189557322, + "epoch": 0.6117487035620165, "grad_norm": 0.0, - "learning_rate": 6.891654534012872e-06, - "loss": 0.7838, + "learning_rate": 6.9176869347062325e-06, + "loss": 0.9184, "step": 21588 }, { - "epoch": 0.612627695800227, + "epoch": 0.611777041004279, "grad_norm": 0.0, - "learning_rate": 6.890780996491486e-06, - "loss": 0.8192, + "learning_rate": 6.916813844259234e-06, + "loss": 0.7975, "step": 21589 }, { - "epoch": 0.6126560726447219, + "epoch": 0.6118053784465414, "grad_norm": 0.0, - "learning_rate": 6.889907485233813e-06, - "loss": 0.8657, + "learning_rate": 6.915940779783052e-06, + "loss": 0.8076, "step": 21590 }, { - "epoch": 0.6126844494892169, + "epoch": 0.6118337158888039, "grad_norm": 0.0, - "learning_rate": 6.889034000247232e-06, - "loss": 0.8092, + "learning_rate": 6.9150677412850485e-06, + "loss": 0.7723, "step": 21591 }, { - "epoch": 0.6127128263337117, + "epoch": 0.6118620533310664, "grad_norm": 0.0, - "learning_rate": 6.88816054153912e-06, - "loss": 0.7754, + "learning_rate": 6.914194728772574e-06, + "loss": 0.8491, "step": 21592 }, { - "epoch": 0.6127412031782066, + "epoch": 0.6118903907733289, "grad_norm": 0.0, - "learning_rate": 6.887287109116856e-06, - "loss": 0.8979, + "learning_rate": 6.913321742252983e-06, + "loss": 0.8971, "step": 21593 }, { - "epoch": 0.6127695800227014, + "epoch": 0.6119187282155912, "grad_norm": 0.0, - "learning_rate": 6.886413702987814e-06, - "loss": 0.8468, + "learning_rate": 6.912448781733633e-06, + "loss": 0.9283, "step": 21594 }, { - "epoch": 0.6127979568671964, + "epoch": 0.6119470656578537, "grad_norm": 0.0, - "learning_rate": 6.88554032315938e-06, - "loss": 0.9541, + "learning_rate": 6.9115758472218695e-06, + "loss": 0.8051, "step": 21595 }, { - "epoch": 0.6128263337116913, + "epoch": 0.6119754031001162, "grad_norm": 0.0, - "learning_rate": 6.884666969638925e-06, - "loss": 0.8035, + "learning_rate": 6.910702938725049e-06, + "loss": 0.8872, "step": 21596 }, { - "epoch": 0.6128547105561861, + "epoch": 0.6120037405423786, "grad_norm": 0.0, - "learning_rate": 6.88379364243383e-06, - "loss": 0.7909, + "learning_rate": 6.909830056250527e-06, + "loss": 0.8036, "step": 21597 }, { - "epoch": 0.6128830874006811, + "epoch": 0.6120320779846411, "grad_norm": 0.0, - "learning_rate": 6.882920341551469e-06, - "loss": 0.8599, + "learning_rate": 6.9089571998056525e-06, + "loss": 0.9722, "step": 21598 }, { - "epoch": 0.612911464245176, + "epoch": 0.6120604154269036, "grad_norm": 0.0, - "learning_rate": 6.882047066999218e-06, - "loss": 0.9262, + "learning_rate": 6.908084369397783e-06, + "loss": 0.879, "step": 21599 }, { - "epoch": 0.6129398410896708, + "epoch": 0.612088752869166, "grad_norm": 0.0, - "learning_rate": 6.881173818784459e-06, - "loss": 0.9066, + "learning_rate": 6.907211565034262e-06, + "loss": 0.7285, "step": 21600 }, { - "epoch": 0.6129682179341657, + "epoch": 0.6121170903114285, "grad_norm": 0.0, - "learning_rate": 6.880300596914564e-06, - "loss": 0.8157, + "learning_rate": 6.906338786722448e-06, + "loss": 0.7732, "step": 21601 }, { - "epoch": 0.6129965947786606, + "epoch": 0.612145427753691, "grad_norm": 0.0, - "learning_rate": 6.879427401396909e-06, - "loss": 0.9379, + "learning_rate": 6.905466034469695e-06, + "loss": 0.8997, "step": 21602 }, { - "epoch": 0.6130249716231555, + "epoch": 0.6121737651959535, "grad_norm": 0.0, - "learning_rate": 6.878554232238872e-06, - "loss": 0.7927, + "learning_rate": 6.904593308283345e-06, + "loss": 0.8498, "step": 21603 }, { - "epoch": 0.6130533484676504, + "epoch": 0.6122021026382158, "grad_norm": 0.0, - "learning_rate": 6.877681089447828e-06, - "loss": 0.8109, + "learning_rate": 6.903720608170757e-06, + "loss": 0.9739, "step": 21604 }, { - "epoch": 0.6130817253121453, + "epoch": 0.6122304400804783, "grad_norm": 0.0, - "learning_rate": 6.87680797303115e-06, - "loss": 0.8605, + "learning_rate": 6.902847934139281e-06, + "loss": 0.82, "step": 21605 }, { - "epoch": 0.6131101021566402, + "epoch": 0.6122587775227408, "grad_norm": 0.0, - "learning_rate": 6.875934882996218e-06, - "loss": 0.861, + "learning_rate": 6.901975286196265e-06, + "loss": 0.9373, "step": 21606 }, { - "epoch": 0.613138479001135, + "epoch": 0.6122871149650032, "grad_norm": 0.0, - "learning_rate": 6.875061819350405e-06, - "loss": 0.8629, + "learning_rate": 6.901102664349067e-06, + "loss": 0.8571, "step": 21607 }, { - "epoch": 0.61316685584563, + "epoch": 0.6123154524072657, "grad_norm": 0.0, - "learning_rate": 6.8741887821010844e-06, - "loss": 0.9906, + "learning_rate": 6.900230068605027e-06, + "loss": 0.9172, "step": 21608 }, { - "epoch": 0.6131952326901249, + "epoch": 0.6123437898495282, "grad_norm": 0.0, - "learning_rate": 6.873315771255627e-06, - "loss": 0.7856, + "learning_rate": 6.8993574989714995e-06, + "loss": 0.8964, "step": 21609 }, { - "epoch": 0.6132236095346197, + "epoch": 0.6123721272917907, "grad_norm": 0.0, - "learning_rate": 6.872442786821415e-06, - "loss": 0.9232, + "learning_rate": 6.898484955455837e-06, + "loss": 0.8369, "step": 21610 }, { - "epoch": 0.6132519863791146, + "epoch": 0.6124004647340531, "grad_norm": 0.0, - "learning_rate": 6.871569828805822e-06, - "loss": 0.8402, + "learning_rate": 6.897612438065388e-06, + "loss": 0.86, "step": 21611 }, { - "epoch": 0.6132803632236096, + "epoch": 0.6124288021763156, "grad_norm": 0.0, - "learning_rate": 6.870696897216218e-06, - "loss": 0.9071, + "learning_rate": 6.896739946807499e-06, + "loss": 0.7956, "step": 21612 }, { - "epoch": 0.6133087400681044, + "epoch": 0.6124571396185781, "grad_norm": 0.0, - "learning_rate": 6.869823992059976e-06, - "loss": 0.8017, + "learning_rate": 6.895867481689527e-06, + "loss": 0.7894, "step": 21613 }, { - "epoch": 0.6133371169125993, + "epoch": 0.6124854770608404, "grad_norm": 0.0, - "learning_rate": 6.868951113344473e-06, - "loss": 0.783, + "learning_rate": 6.8949950427188104e-06, + "loss": 0.8548, "step": 21614 }, { - "epoch": 0.6133654937570943, + "epoch": 0.6125138145031029, "grad_norm": 0.0, - "learning_rate": 6.868078261077083e-06, - "loss": 0.8225, + "learning_rate": 6.894122629902706e-06, + "loss": 0.9375, "step": 21615 }, { - "epoch": 0.6133938706015891, + "epoch": 0.6125421519453654, "grad_norm": 0.0, - "learning_rate": 6.867205435265172e-06, - "loss": 0.8951, + "learning_rate": 6.893250243248559e-06, + "loss": 0.817, "step": 21616 }, { - "epoch": 0.613422247446084, + "epoch": 0.6125704893876279, "grad_norm": 0.0, - "learning_rate": 6.866332635916122e-06, - "loss": 0.8277, + "learning_rate": 6.892377882763719e-06, + "loss": 0.8649, "step": 21617 }, { - "epoch": 0.6134506242905788, + "epoch": 0.6125988268298903, "grad_norm": 0.0, - "learning_rate": 6.8654598630372995e-06, - "loss": 0.819, + "learning_rate": 6.891505548455538e-06, + "loss": 0.8522, "step": 21618 }, { - "epoch": 0.6134790011350738, + "epoch": 0.6126271642721528, "grad_norm": 0.0, - "learning_rate": 6.864587116636077e-06, - "loss": 0.8503, + "learning_rate": 6.890633240331355e-06, + "loss": 0.8629, "step": 21619 }, { - "epoch": 0.6135073779795687, + "epoch": 0.6126555017144153, "grad_norm": 0.0, - "learning_rate": 6.863714396719829e-06, - "loss": 0.8385, + "learning_rate": 6.889760958398528e-06, + "loss": 0.8563, "step": 21620 }, { - "epoch": 0.6135357548240635, + "epoch": 0.6126838391566777, "grad_norm": 0.0, - "learning_rate": 6.862841703295928e-06, - "loss": 0.9062, + "learning_rate": 6.888888702664396e-06, + "loss": 0.8895, "step": 21621 }, { - "epoch": 0.6135641316685585, + "epoch": 0.6127121765989402, "grad_norm": 0.0, - "learning_rate": 6.86196903637174e-06, - "loss": 0.806, + "learning_rate": 6.888016473136308e-06, + "loss": 0.813, "step": 21622 }, { - "epoch": 0.6135925085130534, + "epoch": 0.6127405140412027, "grad_norm": 0.0, - "learning_rate": 6.861096395954645e-06, - "loss": 0.7801, + "learning_rate": 6.887144269821615e-06, + "loss": 0.8081, "step": 21623 }, { - "epoch": 0.6136208853575482, + "epoch": 0.612768851483465, "grad_norm": 0.0, - "learning_rate": 6.8602237820520045e-06, - "loss": 0.8999, + "learning_rate": 6.88627209272766e-06, + "loss": 0.8224, "step": 21624 }, { - "epoch": 0.6136492622020431, + "epoch": 0.6127971889257275, "grad_norm": 0.0, - "learning_rate": 6.8593511946712e-06, - "loss": 0.8933, + "learning_rate": 6.885399941861792e-06, + "loss": 0.9416, "step": 21625 }, { - "epoch": 0.613677639046538, + "epoch": 0.61282552636799, "grad_norm": 0.0, - "learning_rate": 6.858478633819596e-06, - "loss": 0.9169, + "learning_rate": 6.8845278172313614e-06, + "loss": 0.8345, "step": 21626 }, { - "epoch": 0.6137060158910329, + "epoch": 0.6128538638102525, "grad_norm": 0.0, - "learning_rate": 6.857606099504563e-06, - "loss": 0.8533, + "learning_rate": 6.8836557188437045e-06, + "loss": 0.8866, "step": 21627 }, { - "epoch": 0.6137343927355278, + "epoch": 0.6128822012525149, "grad_norm": 0.0, - "learning_rate": 6.856733591733473e-06, - "loss": 0.8474, + "learning_rate": 6.8827836467061745e-06, + "loss": 0.9442, "step": 21628 }, { - "epoch": 0.6137627695800227, + "epoch": 0.6129105386947774, "grad_norm": 0.0, - "learning_rate": 6.855861110513697e-06, - "loss": 0.7967, + "learning_rate": 6.8819116008261145e-06, + "loss": 0.8575, "step": 21629 }, { - "epoch": 0.6137911464245176, + "epoch": 0.6129388761370399, "grad_norm": 0.0, - "learning_rate": 6.854988655852601e-06, - "loss": 0.9407, + "learning_rate": 6.881039581210871e-06, + "loss": 0.8428, "step": 21630 }, { - "epoch": 0.6138195232690125, + "epoch": 0.6129672135793023, "grad_norm": 0.0, - "learning_rate": 6.8541162277575615e-06, - "loss": 0.869, + "learning_rate": 6.880167587867792e-06, + "loss": 0.8397, "step": 21631 }, { - "epoch": 0.6138479001135074, + "epoch": 0.6129955510215648, "grad_norm": 0.0, - "learning_rate": 6.8532438262359404e-06, - "loss": 0.9136, + "learning_rate": 6.879295620804217e-06, + "loss": 0.9321, "step": 21632 }, { - "epoch": 0.6138762769580023, + "epoch": 0.6130238884638273, "grad_norm": 0.0, - "learning_rate": 6.85237145129511e-06, - "loss": 0.8147, + "learning_rate": 6.878423680027494e-06, + "loss": 0.8571, "step": 21633 }, { - "epoch": 0.6139046538024971, + "epoch": 0.6130522259060898, "grad_norm": 0.0, - "learning_rate": 6.85149910294244e-06, - "loss": 0.9608, + "learning_rate": 6.877551765544972e-06, + "loss": 0.7731, "step": 21634 }, { - "epoch": 0.613933030646992, + "epoch": 0.6130805633483521, "grad_norm": 0.0, - "learning_rate": 6.8506267811853e-06, - "loss": 0.8505, + "learning_rate": 6.876679877363986e-06, + "loss": 0.9044, "step": 21635 }, { - "epoch": 0.613961407491487, + "epoch": 0.6131089007906146, "grad_norm": 0.0, - "learning_rate": 6.849754486031055e-06, - "loss": 0.9139, + "learning_rate": 6.8758080154918875e-06, + "loss": 0.7974, "step": 21636 }, { - "epoch": 0.6139897843359818, + "epoch": 0.6131372382328771, "grad_norm": 0.0, - "learning_rate": 6.848882217487077e-06, - "loss": 0.7708, + "learning_rate": 6.8749361799360155e-06, + "loss": 0.8383, "step": 21637 }, { - "epoch": 0.6140181611804767, + "epoch": 0.6131655756751395, "grad_norm": 0.0, - "learning_rate": 6.848009975560732e-06, - "loss": 0.7945, + "learning_rate": 6.874064370703717e-06, + "loss": 0.7987, "step": 21638 }, { - "epoch": 0.6140465380249717, + "epoch": 0.613193913117402, "grad_norm": 0.0, - "learning_rate": 6.847137760259388e-06, - "loss": 0.9077, + "learning_rate": 6.87319258780234e-06, + "loss": 0.8224, "step": 21639 }, { - "epoch": 0.6140749148694665, + "epoch": 0.6132222505596645, "grad_norm": 0.0, - "learning_rate": 6.846265571590409e-06, - "loss": 0.9622, + "learning_rate": 6.872320831239217e-06, + "loss": 0.8151, "step": 21640 }, { - "epoch": 0.6141032917139614, + "epoch": 0.613250588001927, "grad_norm": 0.0, - "learning_rate": 6.845393409561172e-06, - "loss": 0.933, + "learning_rate": 6.8714491010216985e-06, + "loss": 0.7989, "step": 21641 }, { - "epoch": 0.6141316685584562, + "epoch": 0.6132789254441894, "grad_norm": 0.0, - "learning_rate": 6.844521274179039e-06, - "loss": 0.7786, + "learning_rate": 6.870577397157128e-06, + "loss": 0.8678, "step": 21642 }, { - "epoch": 0.6141600454029512, + "epoch": 0.6133072628864519, "grad_norm": 0.0, - "learning_rate": 6.843649165451374e-06, - "loss": 0.8407, + "learning_rate": 6.869705719652844e-06, + "loss": 0.8613, "step": 21643 }, { - "epoch": 0.6141884222474461, + "epoch": 0.6133356003287144, "grad_norm": 0.0, - "learning_rate": 6.842777083385548e-06, - "loss": 0.8903, + "learning_rate": 6.868834068516195e-06, + "loss": 0.8439, "step": 21644 }, { - "epoch": 0.6142167990919409, + "epoch": 0.6133639377709768, "grad_norm": 0.0, - "learning_rate": 6.8419050279889264e-06, - "loss": 0.876, + "learning_rate": 6.8679624437545145e-06, + "loss": 0.7928, "step": 21645 }, { - "epoch": 0.6142451759364359, + "epoch": 0.6133922752132392, "grad_norm": 0.0, - "learning_rate": 6.841032999268873e-06, - "loss": 0.8382, + "learning_rate": 6.8670908453751505e-06, + "loss": 0.7563, "step": 21646 }, { - "epoch": 0.6142735527809308, + "epoch": 0.6134206126555017, "grad_norm": 0.0, - "learning_rate": 6.840160997232758e-06, - "loss": 0.8696, + "learning_rate": 6.866219273385449e-06, + "loss": 0.9113, "step": 21647 }, { - "epoch": 0.6143019296254256, + "epoch": 0.6134489500977641, "grad_norm": 0.0, - "learning_rate": 6.839289021887945e-06, - "loss": 0.899, + "learning_rate": 6.865347727792739e-06, + "loss": 0.7249, "step": 21648 }, { - "epoch": 0.6143303064699206, + "epoch": 0.6134772875400266, "grad_norm": 0.0, - "learning_rate": 6.838417073241798e-06, - "loss": 0.9136, + "learning_rate": 6.8644762086043734e-06, + "loss": 0.8223, "step": 21649 }, { - "epoch": 0.6143586833144155, + "epoch": 0.6135056249822891, "grad_norm": 0.0, - "learning_rate": 6.837545151301685e-06, - "loss": 0.8674, + "learning_rate": 6.863604715827685e-06, + "loss": 0.8282, "step": 21650 }, { - "epoch": 0.6143870601589103, + "epoch": 0.6135339624245516, "grad_norm": 0.0, - "learning_rate": 6.8366732560749706e-06, - "loss": 0.7972, + "learning_rate": 6.862733249470021e-06, + "loss": 0.9074, "step": 21651 }, { - "epoch": 0.6144154370034052, + "epoch": 0.613562299866814, "grad_norm": 0.0, - "learning_rate": 6.835801387569021e-06, - "loss": 0.914, + "learning_rate": 6.861861809538723e-06, + "loss": 0.8266, "step": 21652 }, { - "epoch": 0.6144438138479001, + "epoch": 0.6135906373090765, "grad_norm": 0.0, - "learning_rate": 6.834929545791196e-06, - "loss": 0.8584, + "learning_rate": 6.860990396041125e-06, + "loss": 0.782, "step": 21653 }, { - "epoch": 0.614472190692395, + "epoch": 0.613618974751339, "grad_norm": 0.0, - "learning_rate": 6.8340577307488645e-06, - "loss": 0.8889, + "learning_rate": 6.860119008984569e-06, + "loss": 0.9241, "step": 21654 }, { - "epoch": 0.6145005675368899, + "epoch": 0.6136473121936014, "grad_norm": 0.0, - "learning_rate": 6.8331859424493875e-06, - "loss": 0.8499, + "learning_rate": 6.859247648376399e-06, + "loss": 0.7877, "step": 21655 }, { - "epoch": 0.6145289443813848, + "epoch": 0.6136756496358639, "grad_norm": 0.0, - "learning_rate": 6.8323141809001334e-06, - "loss": 0.9184, + "learning_rate": 6.858376314223951e-06, + "loss": 0.8799, "step": 21656 }, { - "epoch": 0.6145573212258797, + "epoch": 0.6137039870781263, "grad_norm": 0.0, - "learning_rate": 6.831442446108463e-06, - "loss": 0.9015, + "learning_rate": 6.857505006534571e-06, + "loss": 0.8606, "step": 21657 }, { - "epoch": 0.6145856980703746, + "epoch": 0.6137323245203888, "grad_norm": 0.0, - "learning_rate": 6.830570738081744e-06, - "loss": 0.9643, + "learning_rate": 6.856633725315587e-06, + "loss": 0.8658, "step": 21658 }, { - "epoch": 0.6146140749148694, + "epoch": 0.6137606619626512, "grad_norm": 0.0, - "learning_rate": 6.8296990568273335e-06, - "loss": 0.8445, + "learning_rate": 6.855762470574345e-06, + "loss": 0.8737, "step": 21659 }, { - "epoch": 0.6146424517593644, + "epoch": 0.6137889994049137, "grad_norm": 0.0, - "learning_rate": 6.828827402352597e-06, - "loss": 0.7532, + "learning_rate": 6.854891242318189e-06, + "loss": 0.9573, "step": 21660 }, { - "epoch": 0.6146708286038592, + "epoch": 0.6138173368471762, "grad_norm": 0.0, - "learning_rate": 6.827955774664901e-06, - "loss": 0.8854, + "learning_rate": 6.8540200405544455e-06, + "loss": 0.8697, "step": 21661 }, { - "epoch": 0.6146992054483541, + "epoch": 0.6138456742894386, "grad_norm": 0.0, - "learning_rate": 6.827084173771603e-06, - "loss": 0.8908, + "learning_rate": 6.853148865290461e-06, + "loss": 0.8127, "step": 21662 }, { - "epoch": 0.6147275822928491, + "epoch": 0.6138740117317011, "grad_norm": 0.0, - "learning_rate": 6.826212599680066e-06, - "loss": 0.8222, + "learning_rate": 6.852277716533573e-06, + "loss": 0.84, "step": 21663 }, { - "epoch": 0.6147559591373439, + "epoch": 0.6139023491739636, "grad_norm": 0.0, - "learning_rate": 6.825341052397657e-06, - "loss": 0.8582, + "learning_rate": 6.851406594291118e-06, + "loss": 0.7726, "step": 21664 }, { - "epoch": 0.6147843359818388, + "epoch": 0.6139306866162261, "grad_norm": 0.0, - "learning_rate": 6.824469531931735e-06, - "loss": 0.7489, + "learning_rate": 6.850535498570438e-06, + "loss": 0.7218, "step": 21665 }, { - "epoch": 0.6148127128263338, + "epoch": 0.6139590240584885, "grad_norm": 0.0, - "learning_rate": 6.823598038289658e-06, - "loss": 0.8732, + "learning_rate": 6.849664429378863e-06, + "loss": 0.9227, "step": 21666 }, { - "epoch": 0.6148410896708286, + "epoch": 0.613987361500751, "grad_norm": 0.0, - "learning_rate": 6.8227265714787945e-06, - "loss": 0.8395, + "learning_rate": 6.848793386723734e-06, + "loss": 0.8729, "step": 21667 }, { - "epoch": 0.6148694665153235, + "epoch": 0.6140156989430134, "grad_norm": 0.0, - "learning_rate": 6.821855131506503e-06, - "loss": 0.8886, + "learning_rate": 6.84792237061239e-06, + "loss": 0.8579, "step": 21668 }, { - "epoch": 0.6148978433598183, + "epoch": 0.6140440363852758, "grad_norm": 0.0, - "learning_rate": 6.820983718380141e-06, - "loss": 0.8226, + "learning_rate": 6.847051381052165e-06, + "loss": 0.827, "step": 21669 }, { - "epoch": 0.6149262202043133, + "epoch": 0.6140723738275383, "grad_norm": 0.0, - "learning_rate": 6.820112332107073e-06, - "loss": 0.832, + "learning_rate": 6.846180418050397e-06, + "loss": 0.8737, "step": 21670 }, { - "epoch": 0.6149545970488082, + "epoch": 0.6141007112698008, "grad_norm": 0.0, - "learning_rate": 6.819240972694658e-06, - "loss": 0.927, + "learning_rate": 6.845309481614427e-06, + "loss": 0.7441, "step": 21671 }, { - "epoch": 0.614982973893303, + "epoch": 0.6141290487120632, "grad_norm": 0.0, - "learning_rate": 6.818369640150261e-06, - "loss": 0.9421, + "learning_rate": 6.844438571751583e-06, + "loss": 0.8772, "step": 21672 }, { - "epoch": 0.615011350737798, + "epoch": 0.6141573861543257, "grad_norm": 0.0, - "learning_rate": 6.817498334481239e-06, - "loss": 0.7523, + "learning_rate": 6.8435676884692085e-06, + "loss": 1.0026, "step": 21673 }, { - "epoch": 0.6150397275822929, + "epoch": 0.6141857235965882, "grad_norm": 0.0, - "learning_rate": 6.816627055694946e-06, - "loss": 0.9383, + "learning_rate": 6.84269683177463e-06, + "loss": 0.9254, "step": 21674 }, { - "epoch": 0.6150681044267877, + "epoch": 0.6142140610388507, "grad_norm": 0.0, - "learning_rate": 6.815755803798754e-06, - "loss": 0.7988, + "learning_rate": 6.8418260016751895e-06, + "loss": 0.828, "step": 21675 }, { - "epoch": 0.6150964812712826, + "epoch": 0.6142423984811131, "grad_norm": 0.0, - "learning_rate": 6.814884578800013e-06, - "loss": 0.8475, + "learning_rate": 6.840955198178223e-06, + "loss": 0.8313, "step": 21676 }, { - "epoch": 0.6151248581157776, + "epoch": 0.6142707359233756, "grad_norm": 0.0, - "learning_rate": 6.8140133807060834e-06, - "loss": 0.8425, + "learning_rate": 6.840084421291062e-06, + "loss": 0.8513, "step": 21677 }, { - "epoch": 0.6151532349602724, + "epoch": 0.614299073365638, "grad_norm": 0.0, - "learning_rate": 6.813142209524328e-06, - "loss": 0.8377, + "learning_rate": 6.839213671021048e-06, + "loss": 0.8531, "step": 21678 }, { - "epoch": 0.6151816118047673, + "epoch": 0.6143274108079004, "grad_norm": 0.0, - "learning_rate": 6.812271065262106e-06, - "loss": 0.8753, + "learning_rate": 6.838342947375507e-06, + "loss": 0.7253, "step": 21679 }, { - "epoch": 0.6152099886492622, + "epoch": 0.6143557482501629, "grad_norm": 0.0, - "learning_rate": 6.811399947926769e-06, - "loss": 0.8337, + "learning_rate": 6.837472250361776e-06, + "loss": 0.7784, "step": 21680 }, { - "epoch": 0.6152383654937571, + "epoch": 0.6143840856924254, "grad_norm": 0.0, - "learning_rate": 6.8105288575256824e-06, - "loss": 0.7702, + "learning_rate": 6.836601579987195e-06, + "loss": 0.8776, "step": 21681 }, { - "epoch": 0.615266742338252, + "epoch": 0.6144124231346879, "grad_norm": 0.0, - "learning_rate": 6.809657794066203e-06, - "loss": 0.8103, + "learning_rate": 6.83573093625909e-06, + "loss": 0.9547, "step": 21682 }, { - "epoch": 0.6152951191827469, + "epoch": 0.6144407605769503, "grad_norm": 0.0, - "learning_rate": 6.808786757555684e-06, - "loss": 0.9079, + "learning_rate": 6.834860319184797e-06, + "loss": 0.9609, "step": 21683 }, { - "epoch": 0.6153234960272418, + "epoch": 0.6144690980192128, "grad_norm": 0.0, - "learning_rate": 6.80791574800149e-06, - "loss": 0.815, + "learning_rate": 6.833989728771657e-06, + "loss": 0.881, "step": 21684 }, { - "epoch": 0.6153518728717366, + "epoch": 0.6144974354614753, "grad_norm": 0.0, - "learning_rate": 6.807044765410975e-06, - "loss": 0.7233, + "learning_rate": 6.833119165026993e-06, + "loss": 0.8616, "step": 21685 }, { - "epoch": 0.6153802497162315, + "epoch": 0.6145257729037377, "grad_norm": 0.0, - "learning_rate": 6.806173809791492e-06, - "loss": 0.8658, + "learning_rate": 6.832248627958146e-06, + "loss": 0.7432, "step": 21686 }, { - "epoch": 0.6154086265607265, + "epoch": 0.6145541103460002, "grad_norm": 0.0, - "learning_rate": 6.805302881150404e-06, - "loss": 0.8622, + "learning_rate": 6.831378117572441e-06, + "loss": 0.8206, "step": 21687 }, { - "epoch": 0.6154370034052213, + "epoch": 0.6145824477882627, "grad_norm": 0.0, - "learning_rate": 6.80443197949507e-06, - "loss": 0.8616, + "learning_rate": 6.8305076338772146e-06, + "loss": 0.8003, "step": 21688 }, { - "epoch": 0.6154653802497162, + "epoch": 0.6146107852305251, "grad_norm": 0.0, - "learning_rate": 6.803561104832841e-06, - "loss": 0.7868, + "learning_rate": 6.829637176879802e-06, + "loss": 0.7951, "step": 21689 }, { - "epoch": 0.6154937570942112, + "epoch": 0.6146391226727875, "grad_norm": 0.0, - "learning_rate": 6.802690257171075e-06, - "loss": 0.916, + "learning_rate": 6.828766746587529e-06, + "loss": 0.8389, "step": 21690 }, { - "epoch": 0.615522133938706, + "epoch": 0.61466746011505, "grad_norm": 0.0, - "learning_rate": 6.801819436517128e-06, - "loss": 0.869, + "learning_rate": 6.827896343007734e-06, + "loss": 0.8617, "step": 21691 }, { - "epoch": 0.6155505107832009, + "epoch": 0.6146957975573125, "grad_norm": 0.0, - "learning_rate": 6.800948642878356e-06, - "loss": 0.8312, + "learning_rate": 6.8270259661477475e-06, + "loss": 0.7684, "step": 21692 }, { - "epoch": 0.6155788876276957, + "epoch": 0.6147241349995749, "grad_norm": 0.0, - "learning_rate": 6.8000778762621145e-06, - "loss": 0.84, + "learning_rate": 6.826155616014897e-06, + "loss": 0.8629, "step": 21693 }, { - "epoch": 0.6156072644721907, + "epoch": 0.6147524724418374, "grad_norm": 0.0, - "learning_rate": 6.799207136675758e-06, - "loss": 0.857, + "learning_rate": 6.825285292616517e-06, + "loss": 0.8631, "step": 21694 }, { - "epoch": 0.6156356413166856, + "epoch": 0.6147808098840999, "grad_norm": 0.0, - "learning_rate": 6.798336424126645e-06, - "loss": 0.79, + "learning_rate": 6.824414995959938e-06, + "loss": 0.91, "step": 21695 }, { - "epoch": 0.6156640181611804, + "epoch": 0.6148091473263623, "grad_norm": 0.0, - "learning_rate": 6.797465738622127e-06, + "learning_rate": 6.823544726052489e-06, "loss": 0.8512, "step": 21696 }, { - "epoch": 0.6156923950056754, + "epoch": 0.6148374847686248, "grad_norm": 0.0, - "learning_rate": 6.796595080169559e-06, - "loss": 0.8549, + "learning_rate": 6.822674482901507e-06, + "loss": 0.8941, "step": 21697 }, { - "epoch": 0.6157207718501703, + "epoch": 0.6148658222108873, "grad_norm": 0.0, - "learning_rate": 6.795724448776298e-06, - "loss": 0.8511, + "learning_rate": 6.821804266514314e-06, + "loss": 0.9989, "step": 21698 }, { - "epoch": 0.6157491486946651, + "epoch": 0.6148941596531498, "grad_norm": 0.0, - "learning_rate": 6.7948538444496955e-06, - "loss": 0.8559, + "learning_rate": 6.820934076898247e-06, + "loss": 0.802, "step": 21699 }, { - "epoch": 0.6157775255391601, + "epoch": 0.6149224970954121, "grad_norm": 0.0, - "learning_rate": 6.793983267197106e-06, - "loss": 0.8986, + "learning_rate": 6.820063914060628e-06, + "loss": 0.8484, "step": 21700 }, { - "epoch": 0.615805902383655, + "epoch": 0.6149508345376746, "grad_norm": 0.0, - "learning_rate": 6.793112717025882e-06, - "loss": 0.9767, + "learning_rate": 6.819193778008794e-06, + "loss": 0.7723, "step": 21701 }, { - "epoch": 0.6158342792281498, + "epoch": 0.6149791719799371, "grad_norm": 0.0, - "learning_rate": 6.792242193943382e-06, - "loss": 0.9325, + "learning_rate": 6.818323668750073e-06, + "loss": 0.9022, "step": 21702 }, { - "epoch": 0.6158626560726447, + "epoch": 0.6150075094221995, "grad_norm": 0.0, - "learning_rate": 6.791371697956957e-06, - "loss": 0.9371, + "learning_rate": 6.8174535862917905e-06, + "loss": 0.805, "step": 21703 }, { - "epoch": 0.6158910329171396, + "epoch": 0.615035846864462, "grad_norm": 0.0, - "learning_rate": 6.790501229073958e-06, - "loss": 0.6561, + "learning_rate": 6.816583530641279e-06, + "loss": 0.9368, "step": 21704 }, { - "epoch": 0.6159194097616345, + "epoch": 0.6150641843067245, "grad_norm": 0.0, - "learning_rate": 6.789630787301741e-06, - "loss": 0.8029, + "learning_rate": 6.815713501805869e-06, + "loss": 0.9263, "step": 21705 }, { - "epoch": 0.6159477866061294, + "epoch": 0.615092521748987, "grad_norm": 0.0, - "learning_rate": 6.788760372647657e-06, - "loss": 0.887, + "learning_rate": 6.8148434997928846e-06, + "loss": 0.7793, "step": 21706 }, { - "epoch": 0.6159761634506243, + "epoch": 0.6151208591912494, "grad_norm": 0.0, - "learning_rate": 6.787889985119056e-06, - "loss": 0.7422, + "learning_rate": 6.8139735246096575e-06, + "loss": 0.922, "step": 21707 }, { - "epoch": 0.6160045402951192, + "epoch": 0.6151491966335119, "grad_norm": 0.0, - "learning_rate": 6.7870196247232954e-06, - "loss": 0.7759, + "learning_rate": 6.813103576263512e-06, + "loss": 0.9753, "step": 21708 }, { - "epoch": 0.6160329171396141, + "epoch": 0.6151775340757744, "grad_norm": 0.0, - "learning_rate": 6.786149291467724e-06, - "loss": 0.7945, + "learning_rate": 6.812233654761779e-06, + "loss": 0.8215, "step": 21709 }, { - "epoch": 0.6160612939841089, + "epoch": 0.6152058715180367, "grad_norm": 0.0, - "learning_rate": 6.785278985359692e-06, - "loss": 0.8864, + "learning_rate": 6.81136376011179e-06, + "loss": 0.9412, "step": 21710 }, { - "epoch": 0.6160896708286039, + "epoch": 0.6152342089602992, "grad_norm": 0.0, - "learning_rate": 6.784408706406555e-06, - "loss": 0.9153, + "learning_rate": 6.810493892320864e-06, + "loss": 0.9744, "step": 21711 }, { - "epoch": 0.6161180476730987, + "epoch": 0.6152625464025617, "grad_norm": 0.0, - "learning_rate": 6.783538454615664e-06, - "loss": 0.8265, + "learning_rate": 6.809624051396331e-06, + "loss": 0.9399, "step": 21712 }, { - "epoch": 0.6161464245175936, + "epoch": 0.6152908838448242, "grad_norm": 0.0, - "learning_rate": 6.7826682299943635e-06, - "loss": 0.9571, + "learning_rate": 6.808754237345525e-06, + "loss": 0.9285, "step": 21713 }, { - "epoch": 0.6161748013620886, + "epoch": 0.6153192212870866, "grad_norm": 0.0, - "learning_rate": 6.781798032550013e-06, - "loss": 0.7077, + "learning_rate": 6.8078844501757625e-06, + "loss": 0.7243, "step": 21714 }, { - "epoch": 0.6162031782065834, + "epoch": 0.6153475587293491, "grad_norm": 0.0, - "learning_rate": 6.780927862289957e-06, - "loss": 0.7887, + "learning_rate": 6.807014689894376e-06, + "loss": 0.7041, "step": 21715 }, { - "epoch": 0.6162315550510783, + "epoch": 0.6153758961716116, "grad_norm": 0.0, - "learning_rate": 6.780057719221551e-06, - "loss": 0.7607, + "learning_rate": 6.806144956508689e-06, + "loss": 0.8393, "step": 21716 }, { - "epoch": 0.6162599318955733, + "epoch": 0.615404233613874, "grad_norm": 0.0, - "learning_rate": 6.779187603352136e-06, - "loss": 0.8354, + "learning_rate": 6.805275250026029e-06, + "loss": 0.8675, "step": 21717 }, { - "epoch": 0.6162883087400681, + "epoch": 0.6154325710561365, "grad_norm": 0.0, - "learning_rate": 6.77831751468907e-06, - "loss": 0.8844, + "learning_rate": 6.804405570453727e-06, + "loss": 0.8879, "step": 21718 }, { - "epoch": 0.616316685584563, + "epoch": 0.615460908498399, "grad_norm": 0.0, - "learning_rate": 6.777447453239704e-06, - "loss": 0.952, + "learning_rate": 6.8035359177990976e-06, + "loss": 0.8442, "step": 21719 }, { - "epoch": 0.6163450624290578, + "epoch": 0.6154892459406613, "grad_norm": 0.0, - "learning_rate": 6.776577419011384e-06, - "loss": 0.8538, + "learning_rate": 6.802666292069473e-06, + "loss": 0.8903, "step": 21720 }, { - "epoch": 0.6163734392735528, + "epoch": 0.6155175833829238, "grad_norm": 0.0, - "learning_rate": 6.7757074120114565e-06, - "loss": 0.888, + "learning_rate": 6.80179669327218e-06, + "loss": 0.8279, "step": 21721 }, { - "epoch": 0.6164018161180477, + "epoch": 0.6155459208251863, "grad_norm": 0.0, - "learning_rate": 6.774837432247276e-06, - "loss": 0.8986, + "learning_rate": 6.800927121414539e-06, + "loss": 0.9725, "step": 21722 }, { - "epoch": 0.6164301929625425, + "epoch": 0.6155742582674488, "grad_norm": 0.0, - "learning_rate": 6.773967479726189e-06, - "loss": 0.8589, + "learning_rate": 6.80005757650388e-06, + "loss": 0.8375, "step": 21723 }, { - "epoch": 0.6164585698070375, + "epoch": 0.6156025957097112, "grad_norm": 0.0, - "learning_rate": 6.773097554455543e-06, - "loss": 0.8125, + "learning_rate": 6.799188058547521e-06, + "loss": 0.8615, "step": 21724 }, { - "epoch": 0.6164869466515324, + "epoch": 0.6156309331519737, "grad_norm": 0.0, - "learning_rate": 6.772227656442687e-06, - "loss": 0.8556, + "learning_rate": 6.79831856755279e-06, + "loss": 0.859, "step": 21725 }, { - "epoch": 0.6165153234960272, + "epoch": 0.6156592705942362, "grad_norm": 0.0, - "learning_rate": 6.77135778569497e-06, - "loss": 0.7918, + "learning_rate": 6.7974491035270115e-06, + "loss": 0.9277, "step": 21726 }, { - "epoch": 0.6165437003405221, + "epoch": 0.6156876080364986, "grad_norm": 0.0, - "learning_rate": 6.770487942219737e-06, - "loss": 0.936, + "learning_rate": 6.796579666477507e-06, + "loss": 0.8269, "step": 21727 }, { - "epoch": 0.616572077185017, + "epoch": 0.6157159454787611, "grad_norm": 0.0, - "learning_rate": 6.769618126024338e-06, - "loss": 0.8525, + "learning_rate": 6.7957102564116054e-06, + "loss": 0.8189, "step": 21728 }, { - "epoch": 0.6166004540295119, + "epoch": 0.6157442829210236, "grad_norm": 0.0, - "learning_rate": 6.768748337116121e-06, - "loss": 0.6876, + "learning_rate": 6.794840873336622e-06, + "loss": 0.8158, "step": 21729 }, { - "epoch": 0.6166288308740068, + "epoch": 0.6157726203632861, "grad_norm": 0.0, - "learning_rate": 6.76787857550243e-06, - "loss": 0.9297, + "learning_rate": 6.793971517259885e-06, + "loss": 0.9286, "step": 21730 }, { - "epoch": 0.6166572077185017, + "epoch": 0.6158009578055484, "grad_norm": 0.0, - "learning_rate": 6.767008841190616e-06, - "loss": 0.8934, + "learning_rate": 6.793102188188719e-06, + "loss": 0.9096, "step": 21731 }, { - "epoch": 0.6166855845629966, + "epoch": 0.6158292952478109, "grad_norm": 0.0, - "learning_rate": 6.766139134188019e-06, - "loss": 0.9762, + "learning_rate": 6.7922328861304406e-06, + "loss": 0.7622, "step": 21732 }, { - "epoch": 0.6167139614074915, + "epoch": 0.6158576326900734, "grad_norm": 0.0, - "learning_rate": 6.765269454501995e-06, - "loss": 0.7708, + "learning_rate": 6.791363611092377e-06, + "loss": 0.8066, "step": 21733 }, { - "epoch": 0.6167423382519864, + "epoch": 0.6158859701323358, "grad_norm": 0.0, - "learning_rate": 6.764399802139885e-06, - "loss": 0.8425, + "learning_rate": 6.79049436308185e-06, + "loss": 0.7819, "step": 21734 }, { - "epoch": 0.6167707150964813, + "epoch": 0.6159143075745983, "grad_norm": 0.0, - "learning_rate": 6.7635301771090345e-06, - "loss": 0.904, + "learning_rate": 6.78962514210618e-06, + "loss": 0.8226, "step": 21735 }, { - "epoch": 0.6167990919409762, + "epoch": 0.6159426450168608, "grad_norm": 0.0, - "learning_rate": 6.762660579416791e-06, - "loss": 0.8302, + "learning_rate": 6.788755948172691e-06, + "loss": 0.9131, "step": 21736 }, { - "epoch": 0.616827468785471, + "epoch": 0.6159709824591232, "grad_norm": 0.0, - "learning_rate": 6.761791009070499e-06, - "loss": 0.8495, + "learning_rate": 6.787886781288702e-06, + "loss": 0.821, "step": 21737 }, { - "epoch": 0.616855845629966, + "epoch": 0.6159993199013857, "grad_norm": 0.0, - "learning_rate": 6.7609214660775015e-06, - "loss": 0.8908, + "learning_rate": 6.787017641461534e-06, + "loss": 0.9477, "step": 21738 }, { - "epoch": 0.6168842224744608, + "epoch": 0.6160276573436482, "grad_norm": 0.0, - "learning_rate": 6.760051950445149e-06, - "loss": 0.7386, + "learning_rate": 6.786148528698512e-06, + "loss": 0.8181, "step": 21739 }, { - "epoch": 0.6169125993189557, + "epoch": 0.6160559947859107, "grad_norm": 0.0, - "learning_rate": 6.759182462180782e-06, - "loss": 0.8295, + "learning_rate": 6.785279443006951e-06, + "loss": 0.762, "step": 21740 }, { - "epoch": 0.6169409761634507, + "epoch": 0.616084332228173, "grad_norm": 0.0, - "learning_rate": 6.7583130012917455e-06, - "loss": 0.8107, + "learning_rate": 6.784410384394176e-06, + "loss": 0.8702, "step": 21741 }, { - "epoch": 0.6169693530079455, + "epoch": 0.6161126696704355, "grad_norm": 0.0, - "learning_rate": 6.757443567785387e-06, - "loss": 0.8655, + "learning_rate": 6.783541352867511e-06, + "loss": 0.925, "step": 21742 }, { - "epoch": 0.6169977298524404, + "epoch": 0.616141007112698, "grad_norm": 0.0, - "learning_rate": 6.756574161669048e-06, - "loss": 0.8141, + "learning_rate": 6.782672348434267e-06, + "loss": 0.7847, "step": 21743 }, { - "epoch": 0.6170261066969353, + "epoch": 0.6161693445549604, "grad_norm": 0.0, - "learning_rate": 6.755704782950071e-06, - "loss": 0.8284, + "learning_rate": 6.781803371101774e-06, + "loss": 0.8134, "step": 21744 }, { - "epoch": 0.6170544835414302, + "epoch": 0.6161976819972229, "grad_norm": 0.0, - "learning_rate": 6.754835431635804e-06, - "loss": 0.8484, + "learning_rate": 6.780934420877341e-06, + "loss": 0.6497, "step": 21745 }, { - "epoch": 0.6170828603859251, + "epoch": 0.6162260194394854, "grad_norm": 0.0, - "learning_rate": 6.753966107733587e-06, - "loss": 0.7929, + "learning_rate": 6.780065497768291e-06, + "loss": 0.9847, "step": 21746 }, { - "epoch": 0.6171112372304199, + "epoch": 0.6162543568817479, "grad_norm": 0.0, - "learning_rate": 6.753096811250761e-06, - "loss": 0.8843, + "learning_rate": 6.7791966017819496e-06, + "loss": 0.8364, "step": 21747 }, { - "epoch": 0.6171396140749149, + "epoch": 0.6162826943240103, "grad_norm": 0.0, - "learning_rate": 6.752227542194674e-06, - "loss": 0.892, + "learning_rate": 6.7783277329256285e-06, + "loss": 0.8429, "step": 21748 }, { - "epoch": 0.6171679909194098, + "epoch": 0.6163110317662728, "grad_norm": 0.0, - "learning_rate": 6.751358300572668e-06, - "loss": 0.8229, + "learning_rate": 6.777458891206649e-06, + "loss": 0.7762, "step": 21749 }, { - "epoch": 0.6171963677639046, + "epoch": 0.6163393692085353, "grad_norm": 0.0, - "learning_rate": 6.750489086392084e-06, - "loss": 0.854, + "learning_rate": 6.776590076632334e-06, + "loss": 0.8622, "step": 21750 }, { - "epoch": 0.6172247446083996, + "epoch": 0.6163677066507977, "grad_norm": 0.0, - "learning_rate": 6.749619899660265e-06, - "loss": 0.8112, + "learning_rate": 6.775721289209994e-06, + "loss": 0.7729, "step": 21751 }, { - "epoch": 0.6172531214528945, + "epoch": 0.6163960440930601, "grad_norm": 0.0, - "learning_rate": 6.748750740384553e-06, - "loss": 0.9041, + "learning_rate": 6.774852528946951e-06, + "loss": 0.7718, "step": 21752 }, { - "epoch": 0.6172814982973893, + "epoch": 0.6164243815353226, "grad_norm": 0.0, - "learning_rate": 6.747881608572291e-06, - "loss": 0.9017, + "learning_rate": 6.773983795850523e-06, + "loss": 0.8417, "step": 21753 }, { - "epoch": 0.6173098751418842, + "epoch": 0.6164527189775851, "grad_norm": 0.0, - "learning_rate": 6.747012504230817e-06, - "loss": 0.8288, + "learning_rate": 6.7731150899280275e-06, + "loss": 0.8853, "step": 21754 }, { - "epoch": 0.6173382519863791, + "epoch": 0.6164810564198475, "grad_norm": 0.0, - "learning_rate": 6.7461434273674765e-06, - "loss": 0.7885, + "learning_rate": 6.772246411186784e-06, + "loss": 0.7951, "step": 21755 }, { - "epoch": 0.617366628830874, + "epoch": 0.61650939386211, "grad_norm": 0.0, - "learning_rate": 6.745274377989608e-06, - "loss": 0.8775, + "learning_rate": 6.771377759634105e-06, + "loss": 0.7812, "step": 21756 }, { - "epoch": 0.6173950056753689, + "epoch": 0.6165377313043725, "grad_norm": 0.0, - "learning_rate": 6.744405356104554e-06, - "loss": 0.8707, + "learning_rate": 6.770509135277315e-06, + "loss": 0.9404, "step": 21757 }, { - "epoch": 0.6174233825198638, + "epoch": 0.6165660687466349, "grad_norm": 0.0, - "learning_rate": 6.743536361719651e-06, - "loss": 0.8927, + "learning_rate": 6.769640538123721e-06, + "loss": 0.9042, "step": 21758 }, { - "epoch": 0.6174517593643587, + "epoch": 0.6165944061888974, "grad_norm": 0.0, - "learning_rate": 6.742667394842247e-06, - "loss": 0.8824, + "learning_rate": 6.768771968180643e-06, + "loss": 0.8251, "step": 21759 }, { - "epoch": 0.6174801362088536, + "epoch": 0.6166227436311599, "grad_norm": 0.0, - "learning_rate": 6.741798455479677e-06, - "loss": 0.9137, + "learning_rate": 6.767903425455402e-06, + "loss": 0.8502, "step": 21760 }, { - "epoch": 0.6175085130533484, + "epoch": 0.6166510810734223, "grad_norm": 0.0, - "learning_rate": 6.740929543639279e-06, - "loss": 0.8259, + "learning_rate": 6.7670349099553075e-06, + "loss": 0.7558, "step": 21761 }, { - "epoch": 0.6175368898978434, + "epoch": 0.6166794185156848, "grad_norm": 0.0, - "learning_rate": 6.740060659328398e-06, - "loss": 0.8312, + "learning_rate": 6.766166421687679e-06, + "loss": 0.8524, "step": 21762 }, { - "epoch": 0.6175652667423382, + "epoch": 0.6167077559579472, "grad_norm": 0.0, - "learning_rate": 6.739191802554368e-06, - "loss": 0.8054, + "learning_rate": 6.765297960659836e-06, + "loss": 0.7726, "step": 21763 }, { - "epoch": 0.6175936435868331, + "epoch": 0.6167360934002097, "grad_norm": 0.0, - "learning_rate": 6.7383229733245345e-06, - "loss": 0.8301, + "learning_rate": 6.764429526879086e-06, + "loss": 0.8788, "step": 21764 }, { - "epoch": 0.6176220204313281, + "epoch": 0.6167644308424721, "grad_norm": 0.0, - "learning_rate": 6.737454171646231e-06, - "loss": 0.9967, + "learning_rate": 6.763561120352748e-06, + "loss": 0.7434, "step": 21765 }, { - "epoch": 0.6176503972758229, + "epoch": 0.6167927682847346, "grad_norm": 0.0, - "learning_rate": 6.736585397526802e-06, - "loss": 0.8056, + "learning_rate": 6.762692741088136e-06, + "loss": 0.7928, "step": 21766 }, { - "epoch": 0.6176787741203178, + "epoch": 0.6168211057269971, "grad_norm": 0.0, - "learning_rate": 6.735716650973583e-06, - "loss": 0.7994, + "learning_rate": 6.761824389092564e-06, + "loss": 0.8654, "step": 21767 }, { - "epoch": 0.6177071509648128, + "epoch": 0.6168494431692595, "grad_norm": 0.0, - "learning_rate": 6.73484793199391e-06, - "loss": 0.9818, + "learning_rate": 6.760956064373352e-06, + "loss": 0.8548, "step": 21768 }, { - "epoch": 0.6177355278093076, + "epoch": 0.616877780611522, "grad_norm": 0.0, - "learning_rate": 6.733979240595124e-06, - "loss": 0.7628, + "learning_rate": 6.760087766937806e-06, + "loss": 0.8395, "step": 21769 }, { - "epoch": 0.6177639046538025, + "epoch": 0.6169061180537845, "grad_norm": 0.0, - "learning_rate": 6.733110576784564e-06, - "loss": 0.7924, + "learning_rate": 6.759219496793245e-06, + "loss": 0.7989, "step": 21770 }, { - "epoch": 0.6177922814982973, + "epoch": 0.616934455496047, "grad_norm": 0.0, - "learning_rate": 6.7322419405695625e-06, - "loss": 0.8381, + "learning_rate": 6.758351253946984e-06, + "loss": 0.8536, "step": 21771 }, { - "epoch": 0.6178206583427923, + "epoch": 0.6169627929383094, "grad_norm": 0.0, - "learning_rate": 6.731373331957463e-06, - "loss": 0.8478, + "learning_rate": 6.757483038406331e-06, + "loss": 0.9484, "step": 21772 }, { - "epoch": 0.6178490351872872, + "epoch": 0.6169911303805719, "grad_norm": 0.0, - "learning_rate": 6.7305047509555995e-06, - "loss": 0.7826, + "learning_rate": 6.756614850178603e-06, + "loss": 0.8868, "step": 21773 }, { - "epoch": 0.617877412031782, + "epoch": 0.6170194678228343, "grad_norm": 0.0, - "learning_rate": 6.7296361975713074e-06, - "loss": 0.7965, + "learning_rate": 6.755746689271112e-06, + "loss": 0.8163, "step": 21774 }, { - "epoch": 0.617905788876277, + "epoch": 0.6170478052650967, "grad_norm": 0.0, - "learning_rate": 6.728767671811927e-06, - "loss": 0.8416, + "learning_rate": 6.754878555691171e-06, + "loss": 0.872, "step": 21775 }, { - "epoch": 0.6179341657207719, + "epoch": 0.6170761427073592, "grad_norm": 0.0, - "learning_rate": 6.727899173684793e-06, - "loss": 0.7947, + "learning_rate": 6.754010449446098e-06, + "loss": 0.8406, "step": 21776 }, { - "epoch": 0.6179625425652667, + "epoch": 0.6171044801496217, "grad_norm": 0.0, - "learning_rate": 6.727030703197241e-06, - "loss": 0.9538, + "learning_rate": 6.7531423705431945e-06, + "loss": 0.9298, "step": 21777 }, { - "epoch": 0.6179909194097616, + "epoch": 0.6171328175918842, "grad_norm": 0.0, - "learning_rate": 6.726162260356607e-06, - "loss": 0.8752, + "learning_rate": 6.752274318989779e-06, + "loss": 0.7172, "step": 21778 }, { - "epoch": 0.6180192962542566, + "epoch": 0.6171611550341466, "grad_norm": 0.0, - "learning_rate": 6.725293845170227e-06, - "loss": 0.9504, + "learning_rate": 6.7514062947931655e-06, + "loss": 0.9642, "step": 21779 }, { - "epoch": 0.6180476730987514, + "epoch": 0.6171894924764091, "grad_norm": 0.0, - "learning_rate": 6.724425457645441e-06, - "loss": 0.8848, + "learning_rate": 6.75053829796066e-06, + "loss": 0.8731, "step": 21780 }, { - "epoch": 0.6180760499432463, + "epoch": 0.6172178299186716, "grad_norm": 0.0, - "learning_rate": 6.723557097789578e-06, - "loss": 0.9665, + "learning_rate": 6.7496703284995824e-06, + "loss": 0.8431, "step": 21781 }, { - "epoch": 0.6181044267877412, + "epoch": 0.617246167360934, "grad_norm": 0.0, - "learning_rate": 6.7226887656099745e-06, - "loss": 0.7786, + "learning_rate": 6.7488023864172345e-06, + "loss": 0.7916, "step": 21782 }, { - "epoch": 0.6181328036322361, + "epoch": 0.6172745048031965, "grad_norm": 0.0, - "learning_rate": 6.7218204611139685e-06, - "loss": 0.79, + "learning_rate": 6.7479344717209305e-06, + "loss": 0.8325, "step": 21783 }, { - "epoch": 0.618161180476731, + "epoch": 0.617302842245459, "grad_norm": 0.0, - "learning_rate": 6.7209521843088925e-06, - "loss": 0.905, + "learning_rate": 6.747066584417987e-06, + "loss": 0.8492, "step": 21784 }, { - "epoch": 0.6181895573212258, + "epoch": 0.6173311796877213, "grad_norm": 0.0, - "learning_rate": 6.720083935202078e-06, - "loss": 0.7908, + "learning_rate": 6.746198724515705e-06, + "loss": 0.8571, "step": 21785 }, { - "epoch": 0.6182179341657208, + "epoch": 0.6173595171299838, "grad_norm": 0.0, - "learning_rate": 6.719215713800864e-06, - "loss": 0.7605, + "learning_rate": 6.745330892021402e-06, + "loss": 0.9452, "step": 21786 }, { - "epoch": 0.6182463110102157, + "epoch": 0.6173878545722463, "grad_norm": 0.0, - "learning_rate": 6.7183475201125835e-06, - "loss": 0.7424, + "learning_rate": 6.744463086942383e-06, + "loss": 0.8674, "step": 21787 }, { - "epoch": 0.6182746878547105, + "epoch": 0.6174161920145088, "grad_norm": 0.0, - "learning_rate": 6.717479354144567e-06, - "loss": 0.9286, + "learning_rate": 6.74359530928596e-06, + "loss": 0.9906, "step": 21788 }, { - "epoch": 0.6183030646992055, + "epoch": 0.6174445294567712, "grad_norm": 0.0, - "learning_rate": 6.716611215904151e-06, - "loss": 0.8433, + "learning_rate": 6.742727559059448e-06, + "loss": 0.8524, "step": 21789 }, { - "epoch": 0.6183314415437003, + "epoch": 0.6174728668990337, "grad_norm": 0.0, - "learning_rate": 6.715743105398669e-06, - "loss": 0.823, + "learning_rate": 6.741859836270146e-06, + "loss": 0.8357, "step": 21790 }, { - "epoch": 0.6183598183881952, + "epoch": 0.6175012043412962, "grad_norm": 0.0, - "learning_rate": 6.7148750226354485e-06, - "loss": 0.8434, + "learning_rate": 6.7409921409253685e-06, + "loss": 0.8522, "step": 21791 }, { - "epoch": 0.6183881952326902, + "epoch": 0.6175295417835586, "grad_norm": 0.0, - "learning_rate": 6.714006967621829e-06, - "loss": 0.8887, + "learning_rate": 6.740124473032428e-06, + "loss": 0.9457, "step": 21792 }, { - "epoch": 0.618416572077185, + "epoch": 0.6175578792258211, "grad_norm": 0.0, - "learning_rate": 6.7131389403651385e-06, - "loss": 0.9185, + "learning_rate": 6.739256832598626e-06, + "loss": 0.8098, "step": 21793 }, { - "epoch": 0.6184449489216799, + "epoch": 0.6175862166680836, "grad_norm": 0.0, - "learning_rate": 6.712270940872713e-06, - "loss": 0.9094, + "learning_rate": 6.73838921963128e-06, + "loss": 0.7956, "step": 21794 }, { - "epoch": 0.6184733257661748, + "epoch": 0.617614554110346, "grad_norm": 0.0, - "learning_rate": 6.711402969151881e-06, - "loss": 0.7816, + "learning_rate": 6.737521634137687e-06, + "loss": 0.9779, "step": 21795 }, { - "epoch": 0.6185017026106697, + "epoch": 0.6176428915526084, "grad_norm": 0.0, - "learning_rate": 6.710535025209978e-06, - "loss": 0.9648, + "learning_rate": 6.736654076125162e-06, + "loss": 0.791, "step": 21796 }, { - "epoch": 0.6185300794551646, + "epoch": 0.6176712289948709, "grad_norm": 0.0, - "learning_rate": 6.709667109054334e-06, - "loss": 0.7929, + "learning_rate": 6.735786545601015e-06, + "loss": 0.8185, "step": 21797 }, { - "epoch": 0.6185584562996594, + "epoch": 0.6176995664371334, "grad_norm": 0.0, - "learning_rate": 6.708799220692279e-06, - "loss": 0.7789, + "learning_rate": 6.734919042572548e-06, + "loss": 0.8718, "step": 21798 }, { - "epoch": 0.6185868331441544, + "epoch": 0.6177279038793958, "grad_norm": 0.0, - "learning_rate": 6.707931360131144e-06, - "loss": 0.7372, + "learning_rate": 6.734051567047068e-06, + "loss": 0.9011, "step": 21799 }, { - "epoch": 0.6186152099886493, + "epoch": 0.6177562413216583, "grad_norm": 0.0, - "learning_rate": 6.707063527378261e-06, - "loss": 0.8544, + "learning_rate": 6.7331841190318856e-06, + "loss": 0.7667, "step": 21800 }, { - "epoch": 0.6186435868331441, + "epoch": 0.6177845787639208, "grad_norm": 0.0, - "learning_rate": 6.706195722440963e-06, - "loss": 0.936, + "learning_rate": 6.732316698534307e-06, + "loss": 0.8333, "step": 21801 }, { - "epoch": 0.618671963677639, + "epoch": 0.6178129162061833, "grad_norm": 0.0, - "learning_rate": 6.7053279453265744e-06, - "loss": 0.9677, + "learning_rate": 6.731449305561641e-06, + "loss": 0.7618, "step": 21802 }, { - "epoch": 0.618700340522134, + "epoch": 0.6178412536484457, "grad_norm": 0.0, - "learning_rate": 6.70446019604243e-06, - "loss": 0.8933, + "learning_rate": 6.730581940121188e-06, + "loss": 0.8626, "step": 21803 }, { - "epoch": 0.6187287173666288, + "epoch": 0.6178695910907082, "grad_norm": 0.0, - "learning_rate": 6.703592474595859e-06, - "loss": 0.847, + "learning_rate": 6.729714602220256e-06, + "loss": 0.8688, "step": 21804 }, { - "epoch": 0.6187570942111237, + "epoch": 0.6178979285329707, "grad_norm": 0.0, - "learning_rate": 6.702724780994189e-06, - "loss": 0.8133, + "learning_rate": 6.728847291866156e-06, + "loss": 0.8596, "step": 21805 }, { - "epoch": 0.6187854710556187, + "epoch": 0.617926265975233, "grad_norm": 0.0, - "learning_rate": 6.701857115244752e-06, - "loss": 0.8166, + "learning_rate": 6.727980009066186e-06, + "loss": 0.784, "step": 21806 }, { - "epoch": 0.6188138479001135, + "epoch": 0.6179546034174955, "grad_norm": 0.0, - "learning_rate": 6.700989477354878e-06, - "loss": 0.9488, + "learning_rate": 6.727112753827658e-06, + "loss": 0.8125, "step": 21807 }, { - "epoch": 0.6188422247446084, + "epoch": 0.617982940859758, "grad_norm": 0.0, - "learning_rate": 6.70012186733189e-06, - "loss": 0.8416, + "learning_rate": 6.726245526157877e-06, + "loss": 0.9078, "step": 21808 }, { - "epoch": 0.6188706015891033, + "epoch": 0.6180112783020204, "grad_norm": 0.0, - "learning_rate": 6.699254285183121e-06, - "loss": 0.9091, + "learning_rate": 6.725378326064141e-06, + "loss": 0.869, "step": 21809 }, { - "epoch": 0.6188989784335982, + "epoch": 0.6180396157442829, "grad_norm": 0.0, - "learning_rate": 6.698386730915903e-06, - "loss": 0.8333, + "learning_rate": 6.7245111535537654e-06, + "loss": 0.7299, "step": 21810 }, { - "epoch": 0.6189273552780931, + "epoch": 0.6180679531865454, "grad_norm": 0.0, - "learning_rate": 6.697519204537559e-06, - "loss": 0.9325, + "learning_rate": 6.723644008634043e-06, + "loss": 0.8289, "step": 21811 }, { - "epoch": 0.6189557321225879, + "epoch": 0.6180962906288079, "grad_norm": 0.0, - "learning_rate": 6.6966517060554185e-06, - "loss": 0.813, + "learning_rate": 6.722776891312284e-06, + "loss": 0.7378, "step": 21812 }, { - "epoch": 0.6189841089670829, + "epoch": 0.6181246280710703, "grad_norm": 0.0, - "learning_rate": 6.695784235476811e-06, - "loss": 0.8438, + "learning_rate": 6.721909801595794e-06, + "loss": 0.8479, "step": 21813 }, { - "epoch": 0.6190124858115778, + "epoch": 0.6181529655133328, "grad_norm": 0.0, - "learning_rate": 6.694916792809063e-06, - "loss": 0.8625, + "learning_rate": 6.721042739491874e-06, + "loss": 0.8678, "step": 21814 }, { - "epoch": 0.6190408626560726, + "epoch": 0.6181813029555953, "grad_norm": 0.0, - "learning_rate": 6.694049378059498e-06, - "loss": 0.8224, + "learning_rate": 6.720175705007832e-06, + "loss": 0.9035, "step": 21815 }, { - "epoch": 0.6190692395005676, + "epoch": 0.6182096403978576, "grad_norm": 0.0, - "learning_rate": 6.69318199123545e-06, - "loss": 0.7755, + "learning_rate": 6.7193086981509635e-06, + "loss": 0.8481, "step": 21816 }, { - "epoch": 0.6190976163450624, + "epoch": 0.6182379778401201, "grad_norm": 0.0, - "learning_rate": 6.692314632344241e-06, - "loss": 0.8492, + "learning_rate": 6.718441718928577e-06, + "loss": 0.9373, "step": 21817 }, { - "epoch": 0.6191259931895573, + "epoch": 0.6182663152823826, "grad_norm": 0.0, - "learning_rate": 6.691447301393199e-06, - "loss": 0.8649, + "learning_rate": 6.717574767347977e-06, + "loss": 0.8214, "step": 21818 }, { - "epoch": 0.6191543700340522, + "epoch": 0.6182946527246451, "grad_norm": 0.0, - "learning_rate": 6.69057999838965e-06, - "loss": 0.8091, + "learning_rate": 6.71670784341646e-06, + "loss": 0.8687, "step": 21819 }, { - "epoch": 0.6191827468785471, + "epoch": 0.6183229901669075, "grad_norm": 0.0, - "learning_rate": 6.689712723340923e-06, - "loss": 0.8208, + "learning_rate": 6.715840947141332e-06, + "loss": 0.9056, "step": 21820 }, { - "epoch": 0.619211123723042, + "epoch": 0.61835132760917, "grad_norm": 0.0, - "learning_rate": 6.688845476254337e-06, - "loss": 1.0013, + "learning_rate": 6.714974078529901e-06, + "loss": 0.9038, "step": 21821 }, { - "epoch": 0.6192395005675368, + "epoch": 0.6183796650514325, "grad_norm": 0.0, - "learning_rate": 6.6879782571372265e-06, - "loss": 0.8771, + "learning_rate": 6.7141072375894575e-06, + "loss": 0.8541, "step": 21822 }, { - "epoch": 0.6192678774120318, + "epoch": 0.6184080024936949, "grad_norm": 0.0, - "learning_rate": 6.687111065996912e-06, - "loss": 0.7353, + "learning_rate": 6.713240424327314e-06, + "loss": 0.8835, "step": 21823 }, { - "epoch": 0.6192962542565267, + "epoch": 0.6184363399359574, "grad_norm": 0.0, - "learning_rate": 6.686243902840715e-06, - "loss": 0.8721, + "learning_rate": 6.712373638750762e-06, + "loss": 0.8074, "step": 21824 }, { - "epoch": 0.6193246311010215, + "epoch": 0.6184646773782199, "grad_norm": 0.0, - "learning_rate": 6.6853767676759675e-06, - "loss": 0.818, + "learning_rate": 6.711506880867109e-06, + "loss": 0.8269, "step": 21825 }, { - "epoch": 0.6193530079455165, + "epoch": 0.6184930148204824, "grad_norm": 0.0, - "learning_rate": 6.68450966050999e-06, - "loss": 0.8019, + "learning_rate": 6.710640150683656e-06, + "loss": 0.8765, "step": 21826 }, { - "epoch": 0.6193813847900114, + "epoch": 0.6185213522627447, "grad_norm": 0.0, - "learning_rate": 6.68364258135011e-06, - "loss": 0.8452, + "learning_rate": 6.7097734482077e-06, + "loss": 0.9168, "step": 21827 }, { - "epoch": 0.6194097616345062, + "epoch": 0.6185496897050072, "grad_norm": 0.0, - "learning_rate": 6.682775530203651e-06, - "loss": 0.8079, + "learning_rate": 6.708906773446544e-06, + "loss": 0.9141, "step": 21828 }, { - "epoch": 0.6194381384790011, + "epoch": 0.6185780271472697, "grad_norm": 0.0, - "learning_rate": 6.681908507077935e-06, - "loss": 0.9027, + "learning_rate": 6.708040126407493e-06, + "loss": 0.7822, "step": 21829 }, { - "epoch": 0.6194665153234961, + "epoch": 0.6186063645895321, "grad_norm": 0.0, - "learning_rate": 6.681041511980289e-06, - "loss": 0.9117, + "learning_rate": 6.7071735070978396e-06, + "loss": 0.7991, "step": 21830 }, { - "epoch": 0.6194948921679909, + "epoch": 0.6186347020317946, "grad_norm": 0.0, - "learning_rate": 6.680174544918032e-06, - "loss": 0.8896, + "learning_rate": 6.706306915524887e-06, + "loss": 0.8788, "step": 21831 }, { - "epoch": 0.6195232690124858, + "epoch": 0.6186630394740571, "grad_norm": 0.0, - "learning_rate": 6.67930760589849e-06, - "loss": 0.8987, + "learning_rate": 6.705440351695932e-06, + "loss": 0.888, "step": 21832 }, { - "epoch": 0.6195516458569807, + "epoch": 0.6186913769163195, "grad_norm": 0.0, - "learning_rate": 6.678440694928987e-06, - "loss": 0.7624, + "learning_rate": 6.704573815618277e-06, + "loss": 0.8702, "step": 21833 }, { - "epoch": 0.6195800227014756, + "epoch": 0.618719714358582, "grad_norm": 0.0, - "learning_rate": 6.677573812016846e-06, - "loss": 0.8248, + "learning_rate": 6.703707307299224e-06, + "loss": 0.7954, "step": 21834 }, { - "epoch": 0.6196083995459705, + "epoch": 0.6187480518008445, "grad_norm": 0.0, - "learning_rate": 6.676706957169385e-06, - "loss": 0.9006, + "learning_rate": 6.702840826746065e-06, + "loss": 0.8258, "step": 21835 }, { - "epoch": 0.6196367763904653, + "epoch": 0.618776389243107, "grad_norm": 0.0, - "learning_rate": 6.675840130393933e-06, - "loss": 0.7712, + "learning_rate": 6.7019743739661025e-06, + "loss": 0.8583, "step": 21836 }, { - "epoch": 0.6196651532349603, + "epoch": 0.6188047266853693, "grad_norm": 0.0, - "learning_rate": 6.674973331697808e-06, - "loss": 0.7288, + "learning_rate": 6.701107948966635e-06, + "loss": 0.8464, "step": 21837 }, { - "epoch": 0.6196935300794552, + "epoch": 0.6188330641276318, "grad_norm": 0.0, - "learning_rate": 6.674106561088332e-06, - "loss": 0.877, + "learning_rate": 6.70024155175496e-06, + "loss": 0.8337, "step": 21838 }, { - "epoch": 0.61972190692395, + "epoch": 0.6188614015698943, "grad_norm": 0.0, - "learning_rate": 6.673239818572826e-06, - "loss": 0.9152, + "learning_rate": 6.699375182338379e-06, + "loss": 0.7678, "step": 21839 }, { - "epoch": 0.619750283768445, + "epoch": 0.6188897390121567, "grad_norm": 0.0, - "learning_rate": 6.672373104158612e-06, - "loss": 0.9267, + "learning_rate": 6.698508840724182e-06, + "loss": 0.9094, "step": 21840 }, { - "epoch": 0.6197786606129398, + "epoch": 0.6189180764544192, "grad_norm": 0.0, - "learning_rate": 6.671506417853015e-06, - "loss": 0.8803, + "learning_rate": 6.697642526919671e-06, + "loss": 1.0568, "step": 21841 }, { - "epoch": 0.6198070374574347, + "epoch": 0.6189464138966817, "grad_norm": 0.0, - "learning_rate": 6.670639759663353e-06, - "loss": 0.8581, + "learning_rate": 6.696776240932148e-06, + "loss": 0.9548, "step": 21842 }, { - "epoch": 0.6198354143019297, + "epoch": 0.6189747513389442, "grad_norm": 0.0, - "learning_rate": 6.669773129596945e-06, - "loss": 0.8199, + "learning_rate": 6.6959099827689e-06, + "loss": 0.8733, "step": 21843 }, { - "epoch": 0.6198637911464245, + "epoch": 0.6190030887812066, "grad_norm": 0.0, - "learning_rate": 6.668906527661115e-06, - "loss": 0.8551, + "learning_rate": 6.695043752437234e-06, + "loss": 0.8359, "step": 21844 }, { - "epoch": 0.6198921679909194, + "epoch": 0.6190314262234691, "grad_norm": 0.0, - "learning_rate": 6.6680399538631816e-06, - "loss": 0.7591, + "learning_rate": 6.694177549944436e-06, + "loss": 0.7554, "step": 21845 }, { - "epoch": 0.6199205448354143, + "epoch": 0.6190597636657316, "grad_norm": 0.0, - "learning_rate": 6.667173408210461e-06, - "loss": 0.8793, + "learning_rate": 6.693311375297811e-06, + "loss": 0.8691, "step": 21846 }, { - "epoch": 0.6199489216799092, + "epoch": 0.619088101107994, "grad_norm": 0.0, - "learning_rate": 6.666306890710279e-06, - "loss": 0.89, + "learning_rate": 6.692445228504656e-06, + "loss": 0.7212, "step": 21847 }, { - "epoch": 0.6199772985244041, + "epoch": 0.6191164385502564, "grad_norm": 0.0, - "learning_rate": 6.665440401369954e-06, - "loss": 0.9237, + "learning_rate": 6.691579109572257e-06, + "loss": 0.7998, "step": 21848 }, { - "epoch": 0.620005675368899, + "epoch": 0.6191447759925189, "grad_norm": 0.0, - "learning_rate": 6.6645739401968e-06, - "loss": 0.9152, + "learning_rate": 6.690713018507917e-06, + "loss": 0.9198, "step": 21849 }, { - "epoch": 0.6200340522133939, + "epoch": 0.6191731134347814, "grad_norm": 0.0, - "learning_rate": 6.663707507198143e-06, - "loss": 0.9438, + "learning_rate": 6.6898469553189325e-06, + "loss": 0.6979, "step": 21850 }, { - "epoch": 0.6200624290578888, + "epoch": 0.6192014508770438, "grad_norm": 0.0, - "learning_rate": 6.662841102381298e-06, - "loss": 0.9296, + "learning_rate": 6.688980920012593e-06, + "loss": 0.8806, "step": 21851 }, { - "epoch": 0.6200908059023836, + "epoch": 0.6192297883193063, "grad_norm": 0.0, - "learning_rate": 6.661974725753582e-06, - "loss": 0.7682, + "learning_rate": 6.688114912596202e-06, + "loss": 0.934, "step": 21852 }, { - "epoch": 0.6201191827468785, + "epoch": 0.6192581257615688, "grad_norm": 0.0, - "learning_rate": 6.661108377322319e-06, - "loss": 0.8877, + "learning_rate": 6.687248933077045e-06, + "loss": 0.9032, "step": 21853 }, { - "epoch": 0.6201475595913735, + "epoch": 0.6192864632038312, "grad_norm": 0.0, - "learning_rate": 6.660242057094821e-06, - "loss": 0.8409, + "learning_rate": 6.686382981462421e-06, + "loss": 0.89, "step": 21854 }, { - "epoch": 0.6201759364358683, + "epoch": 0.6193148006460937, "grad_norm": 0.0, - "learning_rate": 6.659375765078407e-06, - "loss": 0.9195, + "learning_rate": 6.685517057759625e-06, + "loss": 0.8848, "step": 21855 }, { - "epoch": 0.6202043132803632, + "epoch": 0.6193431380883562, "grad_norm": 0.0, - "learning_rate": 6.658509501280397e-06, - "loss": 0.9978, + "learning_rate": 6.684651161975948e-06, + "loss": 0.8676, "step": 21856 }, { - "epoch": 0.6202326901248582, + "epoch": 0.6193714755306186, "grad_norm": 0.0, - "learning_rate": 6.6576432657081095e-06, - "loss": 0.8621, + "learning_rate": 6.683785294118684e-06, + "loss": 0.8298, "step": 21857 }, { - "epoch": 0.620261066969353, + "epoch": 0.619399812972881, "grad_norm": 0.0, - "learning_rate": 6.65677705836886e-06, - "loss": 0.867, + "learning_rate": 6.6829194541951315e-06, + "loss": 0.7756, "step": 21858 }, { - "epoch": 0.6202894438138479, + "epoch": 0.6194281504151435, "grad_norm": 0.0, - "learning_rate": 6.655910879269962e-06, - "loss": 0.8328, + "learning_rate": 6.682053642212576e-06, + "loss": 0.8694, "step": 21859 }, { - "epoch": 0.6203178206583428, + "epoch": 0.619456487857406, "grad_norm": 0.0, - "learning_rate": 6.655044728418738e-06, - "loss": 0.9285, + "learning_rate": 6.681187858178321e-06, + "loss": 0.8441, "step": 21860 }, { - "epoch": 0.6203461975028377, + "epoch": 0.6194848252996684, "grad_norm": 0.0, - "learning_rate": 6.654178605822502e-06, - "loss": 0.7657, + "learning_rate": 6.680322102099648e-06, + "loss": 0.8742, "step": 21861 }, { - "epoch": 0.6203745743473326, + "epoch": 0.6195131627419309, "grad_norm": 0.0, - "learning_rate": 6.653312511488567e-06, - "loss": 0.8338, + "learning_rate": 6.679456373983854e-06, + "loss": 0.92, "step": 21862 }, { - "epoch": 0.6204029511918274, + "epoch": 0.6195415001841934, "grad_norm": 0.0, - "learning_rate": 6.652446445424254e-06, - "loss": 0.9123, + "learning_rate": 6.678590673838234e-06, + "loss": 0.909, "step": 21863 }, { - "epoch": 0.6204313280363224, + "epoch": 0.6195698376264558, "grad_norm": 0.0, - "learning_rate": 6.651580407636876e-06, - "loss": 0.8001, + "learning_rate": 6.677725001670078e-06, + "loss": 0.9313, "step": 21864 }, { - "epoch": 0.6204597048808173, + "epoch": 0.6195981750687183, "grad_norm": 0.0, - "learning_rate": 6.650714398133749e-06, - "loss": 0.8969, + "learning_rate": 6.676859357486676e-06, + "loss": 0.8924, "step": 21865 }, { - "epoch": 0.6204880817253121, + "epoch": 0.6196265125109808, "grad_norm": 0.0, - "learning_rate": 6.6498484169221865e-06, - "loss": 0.8915, + "learning_rate": 6.675993741295327e-06, + "loss": 0.9262, "step": 21866 }, { - "epoch": 0.6205164585698071, + "epoch": 0.6196548499532433, "grad_norm": 0.0, - "learning_rate": 6.6489824640095055e-06, - "loss": 0.9219, + "learning_rate": 6.6751281531033116e-06, + "loss": 0.7947, "step": 21867 }, { - "epoch": 0.6205448354143019, + "epoch": 0.6196831873955057, "grad_norm": 0.0, - "learning_rate": 6.648116539403022e-06, - "loss": 0.8929, + "learning_rate": 6.674262592917933e-06, + "loss": 0.7587, "step": 21868 }, { - "epoch": 0.6205732122587968, + "epoch": 0.6197115248377681, "grad_norm": 0.0, - "learning_rate": 6.647250643110046e-06, - "loss": 0.8701, + "learning_rate": 6.673397060746469e-06, + "loss": 0.9097, "step": 21869 }, { - "epoch": 0.6206015891032917, + "epoch": 0.6197398622800306, "grad_norm": 0.0, - "learning_rate": 6.646384775137893e-06, - "loss": 0.8338, + "learning_rate": 6.672531556596218e-06, + "loss": 0.8405, "step": 21870 }, { - "epoch": 0.6206299659477866, + "epoch": 0.619768199722293, "grad_norm": 0.0, - "learning_rate": 6.645518935493882e-06, - "loss": 0.8836, + "learning_rate": 6.671666080474471e-06, + "loss": 0.929, "step": 21871 }, { - "epoch": 0.6206583427922815, + "epoch": 0.6197965371645555, "grad_norm": 0.0, - "learning_rate": 6.644653124185323e-06, - "loss": 0.8082, + "learning_rate": 6.670800632388514e-06, + "loss": 0.8633, "step": 21872 }, { - "epoch": 0.6206867196367764, + "epoch": 0.619824874606818, "grad_norm": 0.0, - "learning_rate": 6.6437873412195284e-06, - "loss": 0.7879, + "learning_rate": 6.669935212345645e-06, + "loss": 0.8013, "step": 21873 }, { - "epoch": 0.6207150964812713, + "epoch": 0.6198532120490805, "grad_norm": 0.0, - "learning_rate": 6.642921586603815e-06, - "loss": 0.7525, + "learning_rate": 6.6690698203531446e-06, + "loss": 0.8435, "step": 21874 }, { - "epoch": 0.6207434733257662, + "epoch": 0.6198815494913429, "grad_norm": 0.0, - "learning_rate": 6.642055860345494e-06, - "loss": 0.9025, + "learning_rate": 6.668204456418304e-06, + "loss": 0.8921, "step": 21875 }, { - "epoch": 0.620771850170261, + "epoch": 0.6199098869336054, "grad_norm": 0.0, - "learning_rate": 6.641190162451876e-06, - "loss": 0.8716, + "learning_rate": 6.6673391205484175e-06, + "loss": 0.8635, "step": 21876 }, { - "epoch": 0.620800227014756, + "epoch": 0.6199382243758679, "grad_norm": 0.0, - "learning_rate": 6.640324492930277e-06, - "loss": 0.785, + "learning_rate": 6.666473812750769e-06, + "loss": 0.9371, "step": 21877 }, { - "epoch": 0.6208286038592509, + "epoch": 0.6199665618181303, "grad_norm": 0.0, - "learning_rate": 6.63945885178801e-06, - "loss": 0.7694, + "learning_rate": 6.66560853303265e-06, + "loss": 0.864, "step": 21878 }, { - "epoch": 0.6208569807037457, + "epoch": 0.6199948992603928, "grad_norm": 0.0, - "learning_rate": 6.63859323903238e-06, - "loss": 0.8674, + "learning_rate": 6.664743281401351e-06, + "loss": 0.8972, "step": 21879 }, { - "epoch": 0.6208853575482406, + "epoch": 0.6200232367026552, "grad_norm": 0.0, - "learning_rate": 6.637727654670709e-06, - "loss": 0.901, + "learning_rate": 6.663878057864155e-06, + "loss": 0.8909, "step": 21880 }, { - "epoch": 0.6209137343927356, + "epoch": 0.6200515741449176, "grad_norm": 0.0, - "learning_rate": 6.636862098710302e-06, - "loss": 0.8235, + "learning_rate": 6.663012862428357e-06, + "loss": 0.9342, "step": 21881 }, { - "epoch": 0.6209421112372304, + "epoch": 0.6200799115871801, "grad_norm": 0.0, - "learning_rate": 6.6359965711584706e-06, - "loss": 0.8701, + "learning_rate": 6.662147695101237e-06, + "loss": 0.8369, "step": 21882 }, { - "epoch": 0.6209704880817253, + "epoch": 0.6201082490294426, "grad_norm": 0.0, - "learning_rate": 6.63513107202253e-06, - "loss": 0.7714, + "learning_rate": 6.661282555890086e-06, + "loss": 0.8967, "step": 21883 }, { - "epoch": 0.6209988649262203, + "epoch": 0.6201365864717051, "grad_norm": 0.0, - "learning_rate": 6.634265601309787e-06, - "loss": 0.8569, + "learning_rate": 6.660417444802194e-06, + "loss": 0.8528, "step": 21884 }, { - "epoch": 0.6210272417707151, + "epoch": 0.6201649239139675, "grad_norm": 0.0, - "learning_rate": 6.633400159027551e-06, - "loss": 0.8047, + "learning_rate": 6.659552361844844e-06, + "loss": 0.8251, "step": 21885 }, { - "epoch": 0.62105561861521, + "epoch": 0.62019326135623, "grad_norm": 0.0, - "learning_rate": 6.632534745183139e-06, - "loss": 0.7981, + "learning_rate": 6.658687307025325e-06, + "loss": 0.9893, "step": 21886 }, { - "epoch": 0.6210839954597048, + "epoch": 0.6202215987984925, "grad_norm": 0.0, - "learning_rate": 6.631669359783854e-06, - "loss": 0.9154, + "learning_rate": 6.657822280350927e-06, + "loss": 0.9036, "step": 21887 }, { - "epoch": 0.6211123723041998, + "epoch": 0.6202499362407549, "grad_norm": 0.0, - "learning_rate": 6.630804002837013e-06, - "loss": 0.8375, + "learning_rate": 6.65695728182893e-06, + "loss": 0.961, "step": 21888 }, { - "epoch": 0.6211407491486947, + "epoch": 0.6202782736830174, "grad_norm": 0.0, - "learning_rate": 6.629938674349921e-06, - "loss": 0.7627, + "learning_rate": 6.656092311466624e-06, + "loss": 0.8634, "step": 21889 }, { - "epoch": 0.6211691259931895, + "epoch": 0.6203066111252798, "grad_norm": 0.0, - "learning_rate": 6.629073374329889e-06, - "loss": 0.9248, + "learning_rate": 6.6552273692712935e-06, + "loss": 0.8221, "step": 21890 }, { - "epoch": 0.6211975028376845, + "epoch": 0.6203349485675423, "grad_norm": 0.0, - "learning_rate": 6.628208102784225e-06, - "loss": 0.8515, + "learning_rate": 6.654362455250224e-06, + "loss": 0.8793, "step": 21891 }, { - "epoch": 0.6212258796821793, + "epoch": 0.6203632860098047, "grad_norm": 0.0, - "learning_rate": 6.6273428597202395e-06, - "loss": 0.7264, + "learning_rate": 6.653497569410706e-06, + "loss": 0.9656, "step": 21892 }, { - "epoch": 0.6212542565266742, + "epoch": 0.6203916234520672, "grad_norm": 0.0, - "learning_rate": 6.626477645145238e-06, - "loss": 0.8512, + "learning_rate": 6.652632711760017e-06, + "loss": 0.7755, "step": 21893 }, { - "epoch": 0.6212826333711692, + "epoch": 0.6204199608943297, "grad_norm": 0.0, - "learning_rate": 6.6256124590665346e-06, - "loss": 0.715, + "learning_rate": 6.651767882305447e-06, + "loss": 0.7862, "step": 21894 }, { - "epoch": 0.621311010215664, + "epoch": 0.6204482983365921, "grad_norm": 0.0, - "learning_rate": 6.624747301491433e-06, - "loss": 0.8176, + "learning_rate": 6.650903081054281e-06, + "loss": 0.81, "step": 21895 }, { - "epoch": 0.6213393870601589, + "epoch": 0.6204766357788546, "grad_norm": 0.0, - "learning_rate": 6.623882172427242e-06, - "loss": 0.8073, + "learning_rate": 6.6500383080137985e-06, + "loss": 0.8678, "step": 21896 }, { - "epoch": 0.6213677639046538, + "epoch": 0.6205049732211171, "grad_norm": 0.0, - "learning_rate": 6.6230170718812714e-06, - "loss": 0.7603, + "learning_rate": 6.64917356319129e-06, + "loss": 0.7718, "step": 21897 }, { - "epoch": 0.6213961407491487, + "epoch": 0.6205333106633796, "grad_norm": 0.0, - "learning_rate": 6.622151999860828e-06, - "loss": 1.0014, + "learning_rate": 6.648308846594035e-06, + "loss": 0.8405, "step": 21898 }, { - "epoch": 0.6214245175936436, + "epoch": 0.620561648105642, "grad_norm": 0.0, - "learning_rate": 6.621286956373216e-06, - "loss": 0.8062, + "learning_rate": 6.647444158229319e-06, + "loss": 0.8725, "step": 21899 }, { - "epoch": 0.6214528944381384, + "epoch": 0.6205899855479045, "grad_norm": 0.0, - "learning_rate": 6.620421941425747e-06, - "loss": 0.8747, + "learning_rate": 6.64657949810443e-06, + "loss": 0.8677, "step": 21900 }, { - "epoch": 0.6214812712826334, + "epoch": 0.620618322990167, "grad_norm": 0.0, - "learning_rate": 6.619556955025721e-06, - "loss": 0.8851, + "learning_rate": 6.645714866226642e-06, + "loss": 0.9006, "step": 21901 }, { - "epoch": 0.6215096481271283, + "epoch": 0.6206466604324293, "grad_norm": 0.0, - "learning_rate": 6.618691997180456e-06, - "loss": 0.9279, + "learning_rate": 6.644850262603247e-06, + "loss": 0.7658, "step": 21902 }, { - "epoch": 0.6215380249716231, + "epoch": 0.6206749978746918, "grad_norm": 0.0, - "learning_rate": 6.617827067897246e-06, - "loss": 0.9361, + "learning_rate": 6.643985687241521e-06, + "loss": 0.8839, "step": 21903 }, { - "epoch": 0.621566401816118, + "epoch": 0.6207033353169543, "grad_norm": 0.0, - "learning_rate": 6.616962167183407e-06, - "loss": 0.8331, + "learning_rate": 6.643121140148749e-06, + "loss": 0.8336, "step": 21904 }, { - "epoch": 0.621594778660613, + "epoch": 0.6207316727592167, "grad_norm": 0.0, - "learning_rate": 6.61609729504624e-06, - "loss": 0.8775, + "learning_rate": 6.642256621332219e-06, + "loss": 0.734, "step": 21905 }, { - "epoch": 0.6216231555051078, + "epoch": 0.6207600102014792, "grad_norm": 0.0, - "learning_rate": 6.615232451493052e-06, - "loss": 0.8448, + "learning_rate": 6.641392130799205e-06, + "loss": 0.7512, "step": 21906 }, { - "epoch": 0.6216515323496027, + "epoch": 0.6207883476437417, "grad_norm": 0.0, - "learning_rate": 6.614367636531145e-06, - "loss": 0.9127, + "learning_rate": 6.640527668556993e-06, + "loss": 0.6954, "step": 21907 }, { - "epoch": 0.6216799091940977, + "epoch": 0.6208166850860042, "grad_norm": 0.0, - "learning_rate": 6.613502850167829e-06, - "loss": 0.8253, + "learning_rate": 6.639663234612865e-06, + "loss": 1.0048, "step": 21908 }, { - "epoch": 0.6217082860385925, + "epoch": 0.6208450225282666, "grad_norm": 0.0, - "learning_rate": 6.612638092410406e-06, - "loss": 0.9014, + "learning_rate": 6.6387988289741e-06, + "loss": 0.8153, "step": 21909 }, { - "epoch": 0.6217366628830874, + "epoch": 0.6208733599705291, "grad_norm": 0.0, - "learning_rate": 6.611773363266181e-06, - "loss": 0.9005, + "learning_rate": 6.637934451647983e-06, + "loss": 0.8421, "step": 21910 }, { - "epoch": 0.6217650397275823, + "epoch": 0.6209016974127916, "grad_norm": 0.0, - "learning_rate": 6.610908662742459e-06, - "loss": 0.8542, + "learning_rate": 6.637070102641788e-06, + "loss": 0.8313, "step": 21911 }, { - "epoch": 0.6217934165720772, + "epoch": 0.6209300348550539, "grad_norm": 0.0, - "learning_rate": 6.610043990846545e-06, - "loss": 0.9145, + "learning_rate": 6.636205781962803e-06, + "loss": 0.8474, "step": 21912 }, { - "epoch": 0.6218217934165721, + "epoch": 0.6209583722973164, "grad_norm": 0.0, - "learning_rate": 6.609179347585739e-06, - "loss": 0.8648, + "learning_rate": 6.635341489618308e-06, + "loss": 0.8788, "step": 21913 }, { - "epoch": 0.6218501702610669, + "epoch": 0.6209867097395789, "grad_norm": 0.0, - "learning_rate": 6.60831473296735e-06, - "loss": 0.753, + "learning_rate": 6.6344772256155766e-06, + "loss": 0.8979, "step": 21914 }, { - "epoch": 0.6218785471055619, + "epoch": 0.6210150471818414, "grad_norm": 0.0, - "learning_rate": 6.60745014699868e-06, - "loss": 0.8029, + "learning_rate": 6.633612989961895e-06, + "loss": 0.961, "step": 21915 }, { - "epoch": 0.6219069239500568, + "epoch": 0.6210433846241038, "grad_norm": 0.0, - "learning_rate": 6.606585589687025e-06, - "loss": 0.7819, + "learning_rate": 6.632748782664542e-06, + "loss": 0.7932, "step": 21916 }, { - "epoch": 0.6219353007945516, + "epoch": 0.6210717220663663, "grad_norm": 0.0, - "learning_rate": 6.605721061039696e-06, - "loss": 0.7637, + "learning_rate": 6.631884603730796e-06, + "loss": 0.876, "step": 21917 }, { - "epoch": 0.6219636776390466, + "epoch": 0.6211000595086288, "grad_norm": 0.0, - "learning_rate": 6.604856561063997e-06, - "loss": 0.8241, + "learning_rate": 6.631020453167939e-06, + "loss": 0.9641, "step": 21918 }, { - "epoch": 0.6219920544835414, + "epoch": 0.6211283969508912, "grad_norm": 0.0, - "learning_rate": 6.603992089767227e-06, - "loss": 0.9087, + "learning_rate": 6.630156330983244e-06, + "loss": 0.8362, "step": 21919 }, { - "epoch": 0.6220204313280363, + "epoch": 0.6211567343931537, "grad_norm": 0.0, - "learning_rate": 6.603127647156686e-06, - "loss": 0.8828, + "learning_rate": 6.629292237183995e-06, + "loss": 0.8698, "step": 21920 }, { - "epoch": 0.6220488081725312, + "epoch": 0.6211850718354162, "grad_norm": 0.0, - "learning_rate": 6.602263233239681e-06, - "loss": 0.8319, + "learning_rate": 6.628428171777473e-06, + "loss": 0.8328, "step": 21921 }, { - "epoch": 0.6220771850170261, + "epoch": 0.6212134092776787, "grad_norm": 0.0, - "learning_rate": 6.601398848023511e-06, - "loss": 0.7794, + "learning_rate": 6.627564134770946e-06, + "loss": 0.901, "step": 21922 }, { - "epoch": 0.622105561861521, + "epoch": 0.621241746719941, "grad_norm": 0.0, - "learning_rate": 6.600534491515476e-06, - "loss": 0.98, + "learning_rate": 6.6267001261717015e-06, + "loss": 0.7656, "step": 21923 }, { - "epoch": 0.6221339387060159, + "epoch": 0.6212700841622035, "grad_norm": 0.0, - "learning_rate": 6.599670163722881e-06, - "loss": 0.8722, + "learning_rate": 6.625836145987015e-06, + "loss": 0.8952, "step": 21924 }, { - "epoch": 0.6221623155505108, + "epoch": 0.621298421604466, "grad_norm": 0.0, - "learning_rate": 6.5988058646530255e-06, - "loss": 0.8359, + "learning_rate": 6.624972194224162e-06, + "loss": 0.8269, "step": 21925 }, { - "epoch": 0.6221906923950057, + "epoch": 0.6213267590467284, "grad_norm": 0.0, - "learning_rate": 6.597941594313206e-06, - "loss": 0.7853, + "learning_rate": 6.624108270890425e-06, + "loss": 0.9047, "step": 21926 }, { - "epoch": 0.6222190692395005, + "epoch": 0.6213550964889909, "grad_norm": 0.0, - "learning_rate": 6.59707735271073e-06, - "loss": 0.8045, + "learning_rate": 6.623244375993074e-06, + "loss": 0.8806, "step": 21927 }, { - "epoch": 0.6222474460839955, + "epoch": 0.6213834339312534, "grad_norm": 0.0, - "learning_rate": 6.5962131398528935e-06, - "loss": 0.7588, + "learning_rate": 6.62238050953939e-06, + "loss": 0.8864, "step": 21928 }, { - "epoch": 0.6222758229284904, + "epoch": 0.6214117713735158, "grad_norm": 0.0, - "learning_rate": 6.5953489557469975e-06, - "loss": 0.7002, + "learning_rate": 6.62151667153665e-06, + "loss": 0.9359, "step": 21929 }, { - "epoch": 0.6223041997729852, + "epoch": 0.6214401088157783, "grad_norm": 0.0, - "learning_rate": 6.5944848004003426e-06, - "loss": 0.9127, + "learning_rate": 6.620652861992129e-06, + "loss": 0.8543, "step": 21930 }, { - "epoch": 0.6223325766174801, + "epoch": 0.6214684462580408, "grad_norm": 0.0, - "learning_rate": 6.593620673820225e-06, - "loss": 0.8165, + "learning_rate": 6.619789080913106e-06, + "loss": 0.9016, "step": 21931 }, { - "epoch": 0.6223609534619751, + "epoch": 0.6214967837003033, "grad_norm": 0.0, - "learning_rate": 6.592756576013949e-06, - "loss": 0.8403, + "learning_rate": 6.618925328306854e-06, + "loss": 0.8724, "step": 21932 }, { - "epoch": 0.6223893303064699, + "epoch": 0.6215251211425656, "grad_norm": 0.0, - "learning_rate": 6.591892506988813e-06, - "loss": 0.866, + "learning_rate": 6.618061604180645e-06, + "loss": 0.8337, "step": 21933 }, { - "epoch": 0.6224177071509648, + "epoch": 0.6215534585848281, "grad_norm": 0.0, - "learning_rate": 6.59102846675211e-06, - "loss": 0.9634, + "learning_rate": 6.617197908541767e-06, + "loss": 0.9091, "step": 21934 }, { - "epoch": 0.6224460839954598, + "epoch": 0.6215817960270906, "grad_norm": 0.0, - "learning_rate": 6.590164455311147e-06, - "loss": 0.7262, + "learning_rate": 6.616334241397482e-06, + "loss": 0.8894, "step": 21935 }, { - "epoch": 0.6224744608399546, + "epoch": 0.621610133469353, "grad_norm": 0.0, - "learning_rate": 6.5893004726732165e-06, - "loss": 0.8379, + "learning_rate": 6.61547060275507e-06, + "loss": 0.8134, "step": 21936 }, { - "epoch": 0.6225028376844495, + "epoch": 0.6216384709116155, "grad_norm": 0.0, - "learning_rate": 6.588436518845617e-06, - "loss": 0.778, + "learning_rate": 6.614606992621807e-06, + "loss": 0.9072, "step": 21937 }, { - "epoch": 0.6225312145289443, + "epoch": 0.621666808353878, "grad_norm": 0.0, - "learning_rate": 6.587572593835649e-06, - "loss": 0.8159, + "learning_rate": 6.613743411004964e-06, + "loss": 0.8396, "step": 21938 }, { - "epoch": 0.6225595913734393, + "epoch": 0.6216951457961405, "grad_norm": 0.0, - "learning_rate": 6.58670869765061e-06, - "loss": 0.9203, + "learning_rate": 6.612879857911825e-06, + "loss": 0.7241, "step": 21939 }, { - "epoch": 0.6225879682179342, + "epoch": 0.6217234832384029, "grad_norm": 0.0, - "learning_rate": 6.585844830297793e-06, - "loss": 0.7311, + "learning_rate": 6.61201633334965e-06, + "loss": 0.7987, "step": 21940 }, { - "epoch": 0.622616345062429, + "epoch": 0.6217518206806654, "grad_norm": 0.0, - "learning_rate": 6.5849809917845e-06, - "loss": 0.7626, + "learning_rate": 6.611152837325721e-06, + "loss": 0.8741, "step": 21941 }, { - "epoch": 0.622644721906924, + "epoch": 0.6217801581229279, "grad_norm": 0.0, - "learning_rate": 6.5841171821180265e-06, - "loss": 0.9216, + "learning_rate": 6.610289369847311e-06, + "loss": 0.8516, "step": 21942 }, { - "epoch": 0.6226730987514189, + "epoch": 0.6218084955651902, "grad_norm": 0.0, - "learning_rate": 6.583253401305667e-06, - "loss": 1.0009, + "learning_rate": 6.60942593092169e-06, + "loss": 0.8792, "step": 21943 }, { - "epoch": 0.6227014755959137, + "epoch": 0.6218368330074527, "grad_norm": 0.0, - "learning_rate": 6.582389649354721e-06, - "loss": 0.9331, + "learning_rate": 6.608562520556134e-06, + "loss": 0.8286, "step": 21944 }, { - "epoch": 0.6227298524404086, + "epoch": 0.6218651704497152, "grad_norm": 0.0, - "learning_rate": 6.581525926272484e-06, - "loss": 0.8962, + "learning_rate": 6.6076991387579195e-06, + "loss": 0.8791, "step": 21945 }, { - "epoch": 0.6227582292849035, + "epoch": 0.6218935078919776, "grad_norm": 0.0, - "learning_rate": 6.580662232066249e-06, - "loss": 0.7675, + "learning_rate": 6.6068357855343115e-06, + "loss": 0.8138, "step": 21946 }, { - "epoch": 0.6227866061293984, + "epoch": 0.6219218453342401, "grad_norm": 0.0, - "learning_rate": 6.579798566743314e-06, - "loss": 0.8174, + "learning_rate": 6.605972460892586e-06, + "loss": 0.8603, "step": 21947 }, { - "epoch": 0.6228149829738933, + "epoch": 0.6219501827765026, "grad_norm": 0.0, - "learning_rate": 6.578934930310974e-06, - "loss": 0.8868, + "learning_rate": 6.605109164840013e-06, + "loss": 0.8016, "step": 21948 }, { - "epoch": 0.6228433598183882, + "epoch": 0.6219785202187651, "grad_norm": 0.0, - "learning_rate": 6.578071322776526e-06, - "loss": 0.7882, + "learning_rate": 6.6042458973838696e-06, + "loss": 0.889, "step": 21949 }, { - "epoch": 0.6228717366628831, + "epoch": 0.6220068576610275, "grad_norm": 0.0, - "learning_rate": 6.577207744147262e-06, - "loss": 0.8222, + "learning_rate": 6.603382658531423e-06, + "loss": 0.8313, "step": 21950 }, { - "epoch": 0.622900113507378, + "epoch": 0.62203519510329, "grad_norm": 0.0, - "learning_rate": 6.576344194430479e-06, - "loss": 0.8197, + "learning_rate": 6.602519448289944e-06, + "loss": 0.8982, "step": 21951 }, { - "epoch": 0.6229284903518729, + "epoch": 0.6220635325455525, "grad_norm": 0.0, - "learning_rate": 6.575480673633472e-06, - "loss": 0.9275, + "learning_rate": 6.601656266666705e-06, + "loss": 0.7794, "step": 21952 }, { - "epoch": 0.6229568671963678, + "epoch": 0.6220918699878148, "grad_norm": 0.0, - "learning_rate": 6.574617181763532e-06, - "loss": 0.9277, + "learning_rate": 6.600793113668982e-06, + "loss": 0.8891, "step": 21953 }, { - "epoch": 0.6229852440408626, + "epoch": 0.6221202074300773, "grad_norm": 0.0, - "learning_rate": 6.573753718827953e-06, - "loss": 0.8925, + "learning_rate": 6.599929989304034e-06, + "loss": 0.8249, "step": 21954 }, { - "epoch": 0.6230136208853575, + "epoch": 0.6221485448723398, "grad_norm": 0.0, - "learning_rate": 6.572890284834034e-06, - "loss": 0.875, + "learning_rate": 6.5990668935791445e-06, + "loss": 0.8239, "step": 21955 }, { - "epoch": 0.6230419977298525, + "epoch": 0.6221768823146023, "grad_norm": 0.0, - "learning_rate": 6.572026879789064e-06, - "loss": 0.988, + "learning_rate": 6.598203826501572e-06, + "loss": 0.8475, "step": 21956 }, { - "epoch": 0.6230703745743473, + "epoch": 0.6222052197568647, "grad_norm": 0.0, - "learning_rate": 6.571163503700334e-06, - "loss": 0.8547, + "learning_rate": 6.597340788078594e-06, + "loss": 0.8497, "step": 21957 }, { - "epoch": 0.6230987514188422, + "epoch": 0.6222335571991272, "grad_norm": 0.0, - "learning_rate": 6.570300156575143e-06, - "loss": 0.9959, + "learning_rate": 6.5964777783174814e-06, + "loss": 0.7928, "step": 21958 }, { - "epoch": 0.6231271282633372, + "epoch": 0.6222618946413897, "grad_norm": 0.0, - "learning_rate": 6.569436838420781e-06, - "loss": 0.8621, + "learning_rate": 6.595614797225497e-06, + "loss": 0.8483, "step": 21959 }, { - "epoch": 0.623155505107832, + "epoch": 0.6222902320836521, "grad_norm": 0.0, - "learning_rate": 6.5685735492445365e-06, - "loss": 0.8429, + "learning_rate": 6.5947518448099144e-06, + "loss": 0.8172, "step": 21960 }, { - "epoch": 0.6231838819523269, + "epoch": 0.6223185695259146, "grad_norm": 0.0, - "learning_rate": 6.5677102890537105e-06, - "loss": 0.8409, + "learning_rate": 6.593888921078e-06, + "loss": 0.9715, "step": 21961 }, { - "epoch": 0.6232122587968217, + "epoch": 0.6223469069681771, "grad_norm": 0.0, - "learning_rate": 6.566847057855583e-06, - "loss": 0.903, + "learning_rate": 6.593026026037023e-06, + "loss": 0.8372, "step": 21962 }, { - "epoch": 0.6232406356413167, + "epoch": 0.6223752444104396, "grad_norm": 0.0, - "learning_rate": 6.565983855657458e-06, - "loss": 0.8491, + "learning_rate": 6.592163159694258e-06, + "loss": 0.8651, "step": 21963 }, { - "epoch": 0.6232690124858116, + "epoch": 0.622403581852702, "grad_norm": 0.0, - "learning_rate": 6.565120682466621e-06, - "loss": 0.859, + "learning_rate": 6.591300322056964e-06, + "loss": 0.8609, "step": 21964 }, { - "epoch": 0.6232973893303064, + "epoch": 0.6224319192949644, "grad_norm": 0.0, - "learning_rate": 6.564257538290364e-06, - "loss": 0.7121, + "learning_rate": 6.590437513132414e-06, + "loss": 0.958, "step": 21965 }, { - "epoch": 0.6233257661748014, + "epoch": 0.6224602567372269, "grad_norm": 0.0, - "learning_rate": 6.563394423135978e-06, - "loss": 0.9209, + "learning_rate": 6.589574732927878e-06, + "loss": 0.7694, "step": 21966 }, { - "epoch": 0.6233541430192963, + "epoch": 0.6224885941794893, "grad_norm": 0.0, - "learning_rate": 6.562531337010754e-06, - "loss": 0.8604, + "learning_rate": 6.588711981450616e-06, + "loss": 0.8752, "step": 21967 }, { - "epoch": 0.6233825198637911, + "epoch": 0.6225169316217518, "grad_norm": 0.0, - "learning_rate": 6.561668279921982e-06, - "loss": 0.8548, + "learning_rate": 6.587849258707903e-06, + "loss": 0.9095, "step": 21968 }, { - "epoch": 0.6234108967082861, + "epoch": 0.6225452690640143, "grad_norm": 0.0, - "learning_rate": 6.560805251876954e-06, - "loss": 0.8094, + "learning_rate": 6.5869865647069995e-06, + "loss": 0.8052, "step": 21969 }, { - "epoch": 0.623439273552781, + "epoch": 0.6225736065062767, "grad_norm": 0.0, - "learning_rate": 6.559942252882956e-06, - "loss": 0.932, + "learning_rate": 6.586123899455177e-06, + "loss": 0.9008, "step": 21970 }, { - "epoch": 0.6234676503972758, + "epoch": 0.6226019439485392, "grad_norm": 0.0, - "learning_rate": 6.559079282947282e-06, - "loss": 0.9187, + "learning_rate": 6.585261262959703e-06, + "loss": 0.8258, "step": 21971 }, { - "epoch": 0.6234960272417707, + "epoch": 0.6226302813908017, "grad_norm": 0.0, - "learning_rate": 6.558216342077222e-06, - "loss": 0.7619, + "learning_rate": 6.584398655227838e-06, + "loss": 0.7756, "step": 21972 }, { - "epoch": 0.6235244040862656, + "epoch": 0.6226586188330642, "grad_norm": 0.0, - "learning_rate": 6.55735343028006e-06, - "loss": 0.8379, + "learning_rate": 6.583536076266852e-06, + "loss": 0.9214, "step": 21973 }, { - "epoch": 0.6235527809307605, + "epoch": 0.6226869562753266, "grad_norm": 0.0, - "learning_rate": 6.556490547563089e-06, - "loss": 0.9478, + "learning_rate": 6.582673526084012e-06, + "loss": 0.887, "step": 21974 }, { - "epoch": 0.6235811577752554, + "epoch": 0.622715293717589, "grad_norm": 0.0, - "learning_rate": 6.555627693933598e-06, - "loss": 0.8565, + "learning_rate": 6.58181100468658e-06, + "loss": 0.894, "step": 21975 }, { - "epoch": 0.6236095346197503, + "epoch": 0.6227436311598515, "grad_norm": 0.0, - "learning_rate": 6.554764869398875e-06, - "loss": 0.7547, + "learning_rate": 6.5809485120818265e-06, + "loss": 0.8692, "step": 21976 }, { - "epoch": 0.6236379114642452, + "epoch": 0.6227719686021139, "grad_norm": 0.0, - "learning_rate": 6.553902073966204e-06, - "loss": 0.8672, + "learning_rate": 6.58008604827701e-06, + "loss": 0.8664, "step": 21977 }, { - "epoch": 0.62366628830874, + "epoch": 0.6228003060443764, "grad_norm": 0.0, - "learning_rate": 6.553039307642879e-06, - "loss": 0.8603, + "learning_rate": 6.5792236132793985e-06, + "loss": 0.8606, "step": 21978 }, { - "epoch": 0.6236946651532349, + "epoch": 0.6228286434866389, "grad_norm": 0.0, - "learning_rate": 6.552176570436188e-06, - "loss": 0.8163, + "learning_rate": 6.578361207096261e-06, + "loss": 0.8468, "step": 21979 }, { - "epoch": 0.6237230419977299, + "epoch": 0.6228569809289014, "grad_norm": 0.0, - "learning_rate": 6.551313862353417e-06, - "loss": 0.8085, + "learning_rate": 6.577498829734853e-06, + "loss": 0.8048, "step": 21980 }, { - "epoch": 0.6237514188422247, + "epoch": 0.6228853183711638, "grad_norm": 0.0, - "learning_rate": 6.55045118340185e-06, - "loss": 0.8051, + "learning_rate": 6.5766364812024455e-06, + "loss": 0.9307, "step": 21981 }, { - "epoch": 0.6237797956867196, + "epoch": 0.6229136558134263, "grad_norm": 0.0, - "learning_rate": 6.54958853358878e-06, - "loss": 0.8144, + "learning_rate": 6.575774161506298e-06, + "loss": 0.8556, "step": 21982 }, { - "epoch": 0.6238081725312146, + "epoch": 0.6229419932556888, "grad_norm": 0.0, - "learning_rate": 6.548725912921489e-06, - "loss": 0.8455, + "learning_rate": 6.574911870653678e-06, + "loss": 0.8567, "step": 21983 }, { - "epoch": 0.6238365493757094, + "epoch": 0.6229703306979512, "grad_norm": 0.0, - "learning_rate": 6.547863321407265e-06, - "loss": 0.8251, + "learning_rate": 6.574049608651849e-06, + "loss": 0.8457, "step": 21984 }, { - "epoch": 0.6238649262202043, + "epoch": 0.6229986681402137, "grad_norm": 0.0, - "learning_rate": 6.547000759053397e-06, - "loss": 0.8505, + "learning_rate": 6.57318737550807e-06, + "loss": 0.8788, "step": 21985 }, { - "epoch": 0.6238933030646993, + "epoch": 0.6230270055824761, "grad_norm": 0.0, - "learning_rate": 6.546138225867167e-06, - "loss": 0.892, + "learning_rate": 6.572325171229606e-06, + "loss": 0.9436, "step": 21986 }, { - "epoch": 0.6239216799091941, + "epoch": 0.6230553430247386, "grad_norm": 0.0, - "learning_rate": 6.545275721855862e-06, - "loss": 0.6456, + "learning_rate": 6.571462995823721e-06, + "loss": 0.7778, "step": 21987 }, { - "epoch": 0.623950056753689, + "epoch": 0.623083680467001, "grad_norm": 0.0, - "learning_rate": 6.5444132470267695e-06, - "loss": 0.8966, + "learning_rate": 6.570600849297674e-06, + "loss": 0.9355, "step": 21988 }, { - "epoch": 0.6239784335981838, + "epoch": 0.6231120179092635, "grad_norm": 0.0, - "learning_rate": 6.543550801387174e-06, - "loss": 0.8629, + "learning_rate": 6.569738731658735e-06, + "loss": 0.875, "step": 21989 }, { - "epoch": 0.6240068104426788, + "epoch": 0.623140355351526, "grad_norm": 0.0, - "learning_rate": 6.542688384944358e-06, - "loss": 0.8671, + "learning_rate": 6.568876642914155e-06, + "loss": 0.7958, "step": 21990 }, { - "epoch": 0.6240351872871737, + "epoch": 0.6231686927937884, "grad_norm": 0.0, - "learning_rate": 6.541825997705611e-06, - "loss": 0.8745, + "learning_rate": 6.568014583071201e-06, + "loss": 0.9011, "step": 21991 }, { - "epoch": 0.6240635641316685, + "epoch": 0.6231970302360509, "grad_norm": 0.0, - "learning_rate": 6.540963639678215e-06, - "loss": 0.8721, + "learning_rate": 6.567152552137139e-06, + "loss": 0.9072, "step": 21992 }, { - "epoch": 0.6240919409761635, + "epoch": 0.6232253676783134, "grad_norm": 0.0, - "learning_rate": 6.540101310869451e-06, - "loss": 0.8615, + "learning_rate": 6.566290550119223e-06, + "loss": 0.8459, "step": 21993 }, { - "epoch": 0.6241203178206584, + "epoch": 0.6232537051205758, "grad_norm": 0.0, - "learning_rate": 6.539239011286611e-06, - "loss": 0.8791, + "learning_rate": 6.565428577024716e-06, + "loss": 0.8636, "step": 21994 }, { - "epoch": 0.6241486946651532, + "epoch": 0.6232820425628383, "grad_norm": 0.0, - "learning_rate": 6.538376740936972e-06, - "loss": 0.7399, + "learning_rate": 6.564566632860883e-06, + "loss": 0.825, "step": 21995 }, { - "epoch": 0.6241770715096481, + "epoch": 0.6233103800051008, "grad_norm": 0.0, - "learning_rate": 6.537514499827822e-06, - "loss": 0.8382, + "learning_rate": 6.563704717634975e-06, + "loss": 0.8694, "step": 21996 }, { - "epoch": 0.624205448354143, + "epoch": 0.6233387174473632, "grad_norm": 0.0, - "learning_rate": 6.536652287966443e-06, - "loss": 0.9365, + "learning_rate": 6.562842831354266e-06, + "loss": 0.7512, "step": 21997 }, { - "epoch": 0.6242338251986379, + "epoch": 0.6233670548896256, "grad_norm": 0.0, - "learning_rate": 6.535790105360116e-06, - "loss": 0.9037, + "learning_rate": 6.561980974026003e-06, + "loss": 0.8705, "step": 21998 }, { - "epoch": 0.6242622020431328, + "epoch": 0.6233953923318881, "grad_norm": 0.0, - "learning_rate": 6.534927952016128e-06, - "loss": 0.7987, + "learning_rate": 6.561119145657451e-06, + "loss": 0.8268, "step": 21999 }, { - "epoch": 0.6242905788876277, + "epoch": 0.6234237297741506, "grad_norm": 0.0, - "learning_rate": 6.534065827941759e-06, - "loss": 0.8, + "learning_rate": 6.5602573462558715e-06, + "loss": 0.8626, "step": 22000 }, { - "epoch": 0.6243189557321226, + "epoch": 0.623452067216413, "grad_norm": 0.0, - "learning_rate": 6.53320373314429e-06, - "loss": 0.9473, + "learning_rate": 6.5593955758285185e-06, + "loss": 0.7225, "step": 22001 }, { - "epoch": 0.6243473325766175, + "epoch": 0.6234804046586755, "grad_norm": 0.0, - "learning_rate": 6.532341667631006e-06, - "loss": 0.7542, + "learning_rate": 6.558533834382655e-06, + "loss": 0.7695, "step": 22002 }, { - "epoch": 0.6243757094211124, + "epoch": 0.623508742100938, "grad_norm": 0.0, - "learning_rate": 6.5314796314091885e-06, - "loss": 0.7164, + "learning_rate": 6.5576721219255435e-06, + "loss": 0.7918, "step": 22003 }, { - "epoch": 0.6244040862656073, + "epoch": 0.6235370795432005, "grad_norm": 0.0, - "learning_rate": 6.5306176244861175e-06, - "loss": 0.9421, + "learning_rate": 6.556810438464434e-06, + "loss": 0.8224, "step": 22004 }, { - "epoch": 0.6244324631101021, + "epoch": 0.6235654169854629, "grad_norm": 0.0, - "learning_rate": 6.529755646869076e-06, - "loss": 0.8183, + "learning_rate": 6.555948784006592e-06, + "loss": 0.812, "step": 22005 }, { - "epoch": 0.624460839954597, + "epoch": 0.6235937544277254, "grad_norm": 0.0, - "learning_rate": 6.5288936985653455e-06, - "loss": 0.7902, + "learning_rate": 6.555087158559268e-06, + "loss": 0.8969, "step": 22006 }, { - "epoch": 0.624489216799092, + "epoch": 0.6236220918699878, "grad_norm": 0.0, - "learning_rate": 6.528031779582202e-06, - "loss": 0.8992, + "learning_rate": 6.554225562129726e-06, + "loss": 0.8224, "step": 22007 }, { - "epoch": 0.6245175936435868, + "epoch": 0.6236504293122502, "grad_norm": 0.0, - "learning_rate": 6.52716988992693e-06, - "loss": 0.9358, + "learning_rate": 6.553363994725221e-06, + "loss": 0.8701, "step": 22008 }, { - "epoch": 0.6245459704880817, + "epoch": 0.6236787667545127, "grad_norm": 0.0, - "learning_rate": 6.5263080296068134e-06, - "loss": 0.7867, + "learning_rate": 6.552502456353011e-06, + "loss": 0.8239, "step": 22009 }, { - "epoch": 0.6245743473325767, + "epoch": 0.6237071041967752, "grad_norm": 0.0, - "learning_rate": 6.52544619862913e-06, - "loss": 0.8409, + "learning_rate": 6.551640947020356e-06, + "loss": 0.8777, "step": 22010 }, { - "epoch": 0.6246027241770715, + "epoch": 0.6237354416390377, "grad_norm": 0.0, - "learning_rate": 6.524584397001155e-06, - "loss": 0.8961, + "learning_rate": 6.550779466734507e-06, + "loss": 0.9135, "step": 22011 }, { - "epoch": 0.6246311010215664, + "epoch": 0.6237637790813001, "grad_norm": 0.0, - "learning_rate": 6.523722624730175e-06, - "loss": 0.9226, + "learning_rate": 6.549918015502722e-06, + "loss": 0.7738, "step": 22012 }, { - "epoch": 0.6246594778660612, + "epoch": 0.6237921165235626, "grad_norm": 0.0, - "learning_rate": 6.5228608818234665e-06, - "loss": 0.9106, + "learning_rate": 6.5490565933322615e-06, + "loss": 0.8528, "step": 22013 }, { - "epoch": 0.6246878547105562, + "epoch": 0.6238204539658251, "grad_norm": 0.0, - "learning_rate": 6.521999168288308e-06, - "loss": 0.8416, + "learning_rate": 6.548195200230376e-06, + "loss": 0.716, "step": 22014 }, { - "epoch": 0.6247162315550511, + "epoch": 0.6238487914080875, "grad_norm": 0.0, - "learning_rate": 6.521137484131976e-06, - "loss": 0.8085, + "learning_rate": 6.547333836204326e-06, + "loss": 0.897, "step": 22015 }, { - "epoch": 0.6247446083995459, + "epoch": 0.62387712885035, "grad_norm": 0.0, - "learning_rate": 6.5202758293617554e-06, - "loss": 0.9467, + "learning_rate": 6.546472501261367e-06, + "loss": 0.8115, "step": 22016 }, { - "epoch": 0.6247729852440409, + "epoch": 0.6239054662926125, "grad_norm": 0.0, - "learning_rate": 6.519414203984922e-06, - "loss": 0.8792, + "learning_rate": 6.54561119540875e-06, + "loss": 0.8147, "step": 22017 }, { - "epoch": 0.6248013620885358, + "epoch": 0.6239338037348748, "grad_norm": 0.0, - "learning_rate": 6.518552608008749e-06, - "loss": 0.8654, + "learning_rate": 6.544749918653737e-06, + "loss": 0.9722, "step": 22018 }, { - "epoch": 0.6248297389330306, + "epoch": 0.6239621411771373, "grad_norm": 0.0, - "learning_rate": 6.517691041440522e-06, - "loss": 0.9341, + "learning_rate": 6.543888671003573e-06, + "loss": 0.7839, "step": 22019 }, { - "epoch": 0.6248581157775256, + "epoch": 0.6239904786193998, "grad_norm": 0.0, - "learning_rate": 6.516829504287514e-06, - "loss": 0.9102, + "learning_rate": 6.543027452465518e-06, + "loss": 0.7726, "step": 22020 }, { - "epoch": 0.6248864926220205, + "epoch": 0.6240188160616623, "grad_norm": 0.0, - "learning_rate": 6.515967996557003e-06, - "loss": 0.7811, + "learning_rate": 6.54216626304683e-06, + "loss": 0.816, "step": 22021 }, { - "epoch": 0.6249148694665153, + "epoch": 0.6240471535039247, "grad_norm": 0.0, - "learning_rate": 6.515106518256269e-06, - "loss": 0.8358, + "learning_rate": 6.541305102754756e-06, + "loss": 0.7727, "step": 22022 }, { - "epoch": 0.6249432463110102, + "epoch": 0.6240754909461872, "grad_norm": 0.0, - "learning_rate": 6.514245069392583e-06, - "loss": 0.8622, + "learning_rate": 6.540443971596555e-06, + "loss": 0.9452, "step": 22023 }, { - "epoch": 0.6249716231555051, + "epoch": 0.6241038283884497, "grad_norm": 0.0, - "learning_rate": 6.513383649973229e-06, - "loss": 0.8458, + "learning_rate": 6.539582869579482e-06, + "loss": 0.7729, "step": 22024 }, { - "epoch": 0.625, + "epoch": 0.6241321658307121, "grad_norm": 0.0, - "learning_rate": 6.512522260005478e-06, - "loss": 0.8791, + "learning_rate": 6.538721796710784e-06, + "loss": 0.7805, "step": 22025 }, { - "epoch": 0.6250283768444949, + "epoch": 0.6241605032729746, "grad_norm": 0.0, - "learning_rate": 6.51166089949661e-06, - "loss": 0.8644, + "learning_rate": 6.53786075299772e-06, + "loss": 0.7509, "step": 22026 }, { - "epoch": 0.6250567536889898, + "epoch": 0.6241888407152371, "grad_norm": 0.0, - "learning_rate": 6.5107995684539e-06, - "loss": 0.8516, + "learning_rate": 6.536999738447538e-06, + "loss": 0.8236, "step": 22027 }, { - "epoch": 0.6250851305334847, + "epoch": 0.6242171781574996, "grad_norm": 0.0, - "learning_rate": 6.5099382668846215e-06, - "loss": 0.8843, + "learning_rate": 6.5361387530674935e-06, + "loss": 0.9585, "step": 22028 }, { - "epoch": 0.6251135073779795, + "epoch": 0.6242455155997619, "grad_norm": 0.0, - "learning_rate": 6.509076994796052e-06, - "loss": 0.8737, + "learning_rate": 6.535277796864842e-06, + "loss": 0.7586, "step": 22029 }, { - "epoch": 0.6251418842224744, + "epoch": 0.6242738530420244, "grad_norm": 0.0, - "learning_rate": 6.508215752195466e-06, - "loss": 0.866, + "learning_rate": 6.534416869846828e-06, + "loss": 0.7489, "step": 22030 }, { - "epoch": 0.6251702610669694, + "epoch": 0.6243021904842869, "grad_norm": 0.0, - "learning_rate": 6.507354539090138e-06, - "loss": 0.8495, + "learning_rate": 6.533555972020709e-06, + "loss": 0.9109, "step": 22031 }, { - "epoch": 0.6251986379114642, + "epoch": 0.6243305279265493, "grad_norm": 0.0, - "learning_rate": 6.506493355487345e-06, - "loss": 0.872, + "learning_rate": 6.532695103393738e-06, + "loss": 0.7543, "step": 22032 }, { - "epoch": 0.6252270147559591, + "epoch": 0.6243588653688118, "grad_norm": 0.0, - "learning_rate": 6.505632201394358e-06, - "loss": 0.8134, + "learning_rate": 6.5318342639731606e-06, + "loss": 0.9298, "step": 22033 }, { - "epoch": 0.6252553916004541, + "epoch": 0.6243872028110743, "grad_norm": 0.0, - "learning_rate": 6.504771076818451e-06, - "loss": 0.8517, + "learning_rate": 6.530973453766232e-06, + "loss": 0.8513, "step": 22034 }, { - "epoch": 0.6252837684449489, + "epoch": 0.6244155402533368, "grad_norm": 0.0, - "learning_rate": 6.503909981766903e-06, - "loss": 0.8552, + "learning_rate": 6.5301126727802e-06, + "loss": 0.8283, "step": 22035 }, { - "epoch": 0.6253121452894438, + "epoch": 0.6244438776955992, "grad_norm": 0.0, - "learning_rate": 6.503048916246983e-06, - "loss": 0.8196, + "learning_rate": 6.529251921022318e-06, + "loss": 0.8767, "step": 22036 }, { - "epoch": 0.6253405221339388, + "epoch": 0.6244722151378617, "grad_norm": 0.0, - "learning_rate": 6.502187880265969e-06, - "loss": 0.6911, + "learning_rate": 6.528391198499841e-06, + "loss": 0.8189, "step": 22037 }, { - "epoch": 0.6253688989784336, + "epoch": 0.6245005525801242, "grad_norm": 0.0, - "learning_rate": 6.501326873831126e-06, - "loss": 0.9079, + "learning_rate": 6.527530505220009e-06, + "loss": 0.9068, "step": 22038 }, { - "epoch": 0.6253972758229285, + "epoch": 0.6245288900223865, "grad_norm": 0.0, - "learning_rate": 6.500465896949732e-06, - "loss": 0.8582, + "learning_rate": 6.526669841190078e-06, + "loss": 0.7849, "step": 22039 }, { - "epoch": 0.6254256526674233, + "epoch": 0.624557227464649, "grad_norm": 0.0, - "learning_rate": 6.499604949629064e-06, - "loss": 0.9003, + "learning_rate": 6.5258092064172976e-06, + "loss": 0.8145, "step": 22040 }, { - "epoch": 0.6254540295119183, + "epoch": 0.6245855649069115, "grad_norm": 0.0, - "learning_rate": 6.49874403187639e-06, - "loss": 0.9602, + "learning_rate": 6.524948600908914e-06, + "loss": 0.8975, "step": 22041 }, { - "epoch": 0.6254824063564132, + "epoch": 0.6246139023491739, "grad_norm": 0.0, - "learning_rate": 6.49788314369898e-06, - "loss": 0.7602, + "learning_rate": 6.524088024672184e-06, + "loss": 0.8628, "step": 22042 }, { - "epoch": 0.625510783200908, + "epoch": 0.6246422397914364, "grad_norm": 0.0, - "learning_rate": 6.4970222851041106e-06, - "loss": 0.7972, + "learning_rate": 6.523227477714347e-06, + "loss": 0.7469, "step": 22043 }, { - "epoch": 0.625539160045403, + "epoch": 0.6246705772336989, "grad_norm": 0.0, - "learning_rate": 6.496161456099052e-06, - "loss": 0.7805, + "learning_rate": 6.522366960042654e-06, + "loss": 0.9883, "step": 22044 }, { - "epoch": 0.6255675368898979, + "epoch": 0.6246989146759614, "grad_norm": 0.0, - "learning_rate": 6.495300656691072e-06, - "loss": 0.8552, + "learning_rate": 6.521506471664363e-06, + "loss": 0.8422, "step": 22045 }, { - "epoch": 0.6255959137343927, + "epoch": 0.6247272521182238, "grad_norm": 0.0, - "learning_rate": 6.494439886887448e-06, - "loss": 0.8336, + "learning_rate": 6.520646012586709e-06, + "loss": 0.8078, "step": 22046 }, { - "epoch": 0.6256242905788876, + "epoch": 0.6247555895604863, "grad_norm": 0.0, - "learning_rate": 6.493579146695448e-06, - "loss": 0.8722, + "learning_rate": 6.519785582816947e-06, + "loss": 0.8773, "step": 22047 }, { - "epoch": 0.6256526674233825, + "epoch": 0.6247839270027488, "grad_norm": 0.0, - "learning_rate": 6.49271843612234e-06, - "loss": 0.9042, + "learning_rate": 6.518925182362321e-06, + "loss": 0.8349, "step": 22048 }, { - "epoch": 0.6256810442678774, + "epoch": 0.6248122644450111, "grad_norm": 0.0, - "learning_rate": 6.491857755175399e-06, - "loss": 0.8153, + "learning_rate": 6.518064811230083e-06, + "loss": 0.8234, "step": 22049 }, { - "epoch": 0.6257094211123723, + "epoch": 0.6248406018872736, "grad_norm": 0.0, - "learning_rate": 6.490997103861894e-06, - "loss": 0.7897, + "learning_rate": 6.517204469427481e-06, + "loss": 0.8871, "step": 22050 }, { - "epoch": 0.6257377979568672, + "epoch": 0.6248689393295361, "grad_norm": 0.0, - "learning_rate": 6.490136482189091e-06, - "loss": 0.9839, + "learning_rate": 6.516344156961754e-06, + "loss": 0.7627, "step": 22051 }, { - "epoch": 0.6257661748013621, + "epoch": 0.6248972767717986, "grad_norm": 0.0, - "learning_rate": 6.489275890164265e-06, - "loss": 0.8987, + "learning_rate": 6.515483873840155e-06, + "loss": 0.9404, "step": 22052 }, { - "epoch": 0.625794551645857, + "epoch": 0.624925614214061, "grad_norm": 0.0, - "learning_rate": 6.4884153277946836e-06, - "loss": 0.8486, + "learning_rate": 6.514623620069931e-06, + "loss": 0.8591, "step": 22053 }, { - "epoch": 0.6258229284903519, + "epoch": 0.6249539516563235, "grad_norm": 0.0, - "learning_rate": 6.487554795087612e-06, - "loss": 0.9037, + "learning_rate": 6.513763395658325e-06, + "loss": 0.7568, "step": 22054 }, { - "epoch": 0.6258513053348468, + "epoch": 0.624982289098586, "grad_norm": 0.0, - "learning_rate": 6.4866942920503274e-06, - "loss": 0.8293, + "learning_rate": 6.512903200612588e-06, + "loss": 0.8108, "step": 22055 }, { - "epoch": 0.6258796821793416, + "epoch": 0.6250106265408484, "grad_norm": 0.0, - "learning_rate": 6.485833818690092e-06, - "loss": 0.9354, + "learning_rate": 6.512043034939959e-06, + "loss": 0.8673, "step": 22056 }, { - "epoch": 0.6259080590238365, + "epoch": 0.6250389639831109, "grad_norm": 0.0, - "learning_rate": 6.484973375014177e-06, - "loss": 0.9177, + "learning_rate": 6.5111828986476855e-06, + "loss": 0.7851, "step": 22057 }, { - "epoch": 0.6259364358683315, + "epoch": 0.6250673014253734, "grad_norm": 0.0, - "learning_rate": 6.484112961029851e-06, - "loss": 0.869, + "learning_rate": 6.510322791743016e-06, + "loss": 0.859, "step": 22058 }, { - "epoch": 0.6259648127128263, + "epoch": 0.6250956388676359, "grad_norm": 0.0, - "learning_rate": 6.483252576744379e-06, - "loss": 0.9366, + "learning_rate": 6.509462714233194e-06, + "loss": 0.9427, "step": 22059 }, { - "epoch": 0.6259931895573212, + "epoch": 0.6251239763098982, "grad_norm": 0.0, - "learning_rate": 6.4823922221650324e-06, - "loss": 0.8288, + "learning_rate": 6.508602666125462e-06, + "loss": 0.9786, "step": 22060 }, { - "epoch": 0.6260215664018162, + "epoch": 0.6251523137521607, "grad_norm": 0.0, - "learning_rate": 6.481531897299076e-06, - "loss": 0.884, + "learning_rate": 6.507742647427068e-06, + "loss": 0.8879, "step": 22061 }, { - "epoch": 0.626049943246311, + "epoch": 0.6251806511944232, "grad_norm": 0.0, - "learning_rate": 6.480671602153778e-06, - "loss": 0.7447, + "learning_rate": 6.5068826581452525e-06, + "loss": 0.9602, "step": 22062 }, { - "epoch": 0.6260783200908059, + "epoch": 0.6252089886366856, "grad_norm": 0.0, - "learning_rate": 6.479811336736406e-06, - "loss": 0.8781, + "learning_rate": 6.506022698287265e-06, + "loss": 0.9711, "step": 22063 }, { - "epoch": 0.6261066969353007, + "epoch": 0.6252373260789481, "grad_norm": 0.0, - "learning_rate": 6.478951101054225e-06, - "loss": 0.8362, + "learning_rate": 6.5051627678603425e-06, + "loss": 0.8242, "step": 22064 }, { - "epoch": 0.6261350737797957, + "epoch": 0.6252656635212106, "grad_norm": 0.0, - "learning_rate": 6.478090895114501e-06, - "loss": 0.7631, + "learning_rate": 6.504302866871732e-06, + "loss": 0.8129, "step": 22065 }, { - "epoch": 0.6261634506242906, + "epoch": 0.625294000963473, "grad_norm": 0.0, - "learning_rate": 6.477230718924503e-06, - "loss": 0.8213, + "learning_rate": 6.503442995328678e-06, + "loss": 0.9211, "step": 22066 }, { - "epoch": 0.6261918274687854, + "epoch": 0.6253223384057355, "grad_norm": 0.0, - "learning_rate": 6.476370572491496e-06, - "loss": 0.7247, + "learning_rate": 6.50258315323842e-06, + "loss": 0.9034, "step": 22067 }, { - "epoch": 0.6262202043132804, + "epoch": 0.625350675847998, "grad_norm": 0.0, - "learning_rate": 6.475510455822743e-06, - "loss": 0.9434, + "learning_rate": 6.501723340608207e-06, + "loss": 0.8569, "step": 22068 }, { - "epoch": 0.6262485811577753, + "epoch": 0.6253790132902605, "grad_norm": 0.0, - "learning_rate": 6.47465036892551e-06, - "loss": 0.8447, + "learning_rate": 6.500863557445274e-06, + "loss": 0.9265, "step": 22069 }, { - "epoch": 0.6262769580022701, + "epoch": 0.6254073507325228, "grad_norm": 0.0, - "learning_rate": 6.473790311807066e-06, - "loss": 0.8735, + "learning_rate": 6.5000038037568645e-06, + "loss": 0.8278, "step": 22070 }, { - "epoch": 0.6263053348467651, + "epoch": 0.6254356881747853, "grad_norm": 0.0, - "learning_rate": 6.472930284474677e-06, - "loss": 0.9474, + "learning_rate": 6.499144079550227e-06, + "loss": 0.7957, "step": 22071 }, { - "epoch": 0.62633371169126, + "epoch": 0.6254640256170478, "grad_norm": 0.0, - "learning_rate": 6.4720702869356015e-06, - "loss": 0.8097, + "learning_rate": 6.498284384832596e-06, + "loss": 0.8696, "step": 22072 }, { - "epoch": 0.6263620885357548, + "epoch": 0.6254923630593102, "grad_norm": 0.0, - "learning_rate": 6.471210319197108e-06, - "loss": 0.8951, + "learning_rate": 6.497424719611216e-06, + "loss": 0.861, "step": 22073 }, { - "epoch": 0.6263904653802497, + "epoch": 0.6255207005015727, "grad_norm": 0.0, - "learning_rate": 6.470350381266459e-06, - "loss": 0.8428, + "learning_rate": 6.496565083893333e-06, + "loss": 0.8442, "step": 22074 }, { - "epoch": 0.6264188422247446, + "epoch": 0.6255490379438352, "grad_norm": 0.0, - "learning_rate": 6.469490473150917e-06, - "loss": 0.9504, + "learning_rate": 6.495705477686179e-06, + "loss": 0.8784, "step": 22075 }, { - "epoch": 0.6264472190692395, + "epoch": 0.6255773753860977, "grad_norm": 0.0, - "learning_rate": 6.46863059485775e-06, - "loss": 0.8927, + "learning_rate": 6.494845900997002e-06, + "loss": 0.8158, "step": 22076 }, { - "epoch": 0.6264755959137344, + "epoch": 0.6256057128283601, "grad_norm": 0.0, - "learning_rate": 6.46777074639422e-06, - "loss": 0.8062, + "learning_rate": 6.493986353833035e-06, + "loss": 0.7668, "step": 22077 }, { - "epoch": 0.6265039727582293, + "epoch": 0.6256340502706226, "grad_norm": 0.0, - "learning_rate": 6.466910927767589e-06, - "loss": 0.8218, + "learning_rate": 6.4931268362015245e-06, + "loss": 0.8396, "step": 22078 }, { - "epoch": 0.6265323496027242, + "epoch": 0.6256623877128851, "grad_norm": 0.0, - "learning_rate": 6.466051138985117e-06, - "loss": 0.8451, + "learning_rate": 6.492267348109711e-06, + "loss": 0.8741, "step": 22079 }, { - "epoch": 0.626560726447219, + "epoch": 0.6256907251551475, "grad_norm": 0.0, - "learning_rate": 6.465191380054075e-06, - "loss": 0.7443, + "learning_rate": 6.491407889564829e-06, + "loss": 0.8783, "step": 22080 }, { - "epoch": 0.6265891032917139, + "epoch": 0.62571906259741, "grad_norm": 0.0, - "learning_rate": 6.464331650981717e-06, - "loss": 0.837, + "learning_rate": 6.490548460574122e-06, + "loss": 0.8329, "step": 22081 }, { - "epoch": 0.6266174801362089, + "epoch": 0.6257474000396724, "grad_norm": 0.0, - "learning_rate": 6.4634719517753075e-06, - "loss": 0.8255, + "learning_rate": 6.489689061144832e-06, + "loss": 0.8103, "step": 22082 }, { - "epoch": 0.6266458569807037, + "epoch": 0.6257757374819349, "grad_norm": 0.0, - "learning_rate": 6.4626122824421114e-06, - "loss": 0.8988, + "learning_rate": 6.48882969128419e-06, + "loss": 0.8572, "step": 22083 }, { - "epoch": 0.6266742338251986, + "epoch": 0.6258040749241973, "grad_norm": 0.0, - "learning_rate": 6.461752642989389e-06, - "loss": 0.8489, + "learning_rate": 6.4879703509994444e-06, + "loss": 0.7563, "step": 22084 }, { - "epoch": 0.6267026106696936, + "epoch": 0.6258324123664598, "grad_norm": 0.0, - "learning_rate": 6.4608930334243956e-06, - "loss": 0.8785, + "learning_rate": 6.487111040297825e-06, + "loss": 0.8546, "step": 22085 }, { - "epoch": 0.6267309875141884, + "epoch": 0.6258607498087223, "grad_norm": 0.0, - "learning_rate": 6.4600334537544015e-06, - "loss": 0.8077, + "learning_rate": 6.486251759186573e-06, + "loss": 0.8125, "step": 22086 }, { - "epoch": 0.6267593643586833, + "epoch": 0.6258890872509847, "grad_norm": 0.0, - "learning_rate": 6.459173903986665e-06, - "loss": 0.8618, + "learning_rate": 6.485392507672931e-06, + "loss": 0.895, "step": 22087 }, { - "epoch": 0.6267877412031783, + "epoch": 0.6259174246932472, "grad_norm": 0.0, - "learning_rate": 6.458314384128447e-06, - "loss": 0.8525, + "learning_rate": 6.4845332857641294e-06, + "loss": 0.8721, "step": 22088 }, { - "epoch": 0.6268161180476731, + "epoch": 0.6259457621355097, "grad_norm": 0.0, - "learning_rate": 6.457454894187003e-06, - "loss": 0.8883, + "learning_rate": 6.483674093467409e-06, + "loss": 0.8167, "step": 22089 }, { - "epoch": 0.626844494892168, + "epoch": 0.6259740995777721, "grad_norm": 0.0, - "learning_rate": 6.456595434169599e-06, - "loss": 0.8127, + "learning_rate": 6.482814930790014e-06, + "loss": 0.8662, "step": 22090 }, { - "epoch": 0.6268728717366628, + "epoch": 0.6260024370200346, "grad_norm": 0.0, - "learning_rate": 6.455736004083494e-06, - "loss": 0.9352, + "learning_rate": 6.481955797739168e-06, + "loss": 0.8429, "step": 22091 }, { - "epoch": 0.6269012485811578, + "epoch": 0.626030774462297, "grad_norm": 0.0, - "learning_rate": 6.454876603935942e-06, - "loss": 0.8081, + "learning_rate": 6.481096694322118e-06, + "loss": 0.9415, "step": 22092 }, { - "epoch": 0.6269296254256527, + "epoch": 0.6260591119045595, "grad_norm": 0.0, - "learning_rate": 6.454017233734211e-06, - "loss": 0.7631, + "learning_rate": 6.480237620546095e-06, + "loss": 0.7961, "step": 22093 }, { - "epoch": 0.6269580022701475, + "epoch": 0.6260874493468219, "grad_norm": 0.0, - "learning_rate": 6.453157893485556e-06, - "loss": 0.924, + "learning_rate": 6.4793785764183356e-06, + "loss": 0.9003, "step": 22094 }, { - "epoch": 0.6269863791146425, + "epoch": 0.6261157867890844, "grad_norm": 0.0, - "learning_rate": 6.452298583197232e-06, - "loss": 0.8332, + "learning_rate": 6.478519561946085e-06, + "loss": 0.9276, "step": 22095 }, { - "epoch": 0.6270147559591374, + "epoch": 0.6261441242313469, "grad_norm": 0.0, - "learning_rate": 6.451439302876503e-06, - "loss": 0.8832, + "learning_rate": 6.4776605771365666e-06, + "loss": 0.7667, "step": 22096 }, { - "epoch": 0.6270431328036322, + "epoch": 0.6261724616736093, "grad_norm": 0.0, - "learning_rate": 6.450580052530626e-06, - "loss": 0.8557, + "learning_rate": 6.476801621997022e-06, + "loss": 0.8327, "step": 22097 }, { - "epoch": 0.6270715096481271, + "epoch": 0.6262007991158718, "grad_norm": 0.0, - "learning_rate": 6.449720832166859e-06, - "loss": 0.7948, + "learning_rate": 6.475942696534685e-06, + "loss": 0.8425, "step": 22098 }, { - "epoch": 0.627099886492622, + "epoch": 0.6262291365581343, "grad_norm": 0.0, - "learning_rate": 6.44886164179246e-06, - "loss": 0.8953, + "learning_rate": 6.4750838007567915e-06, + "loss": 0.8559, "step": 22099 }, { - "epoch": 0.6271282633371169, + "epoch": 0.6262574740003968, "grad_norm": 0.0, - "learning_rate": 6.448002481414681e-06, - "loss": 0.83, + "learning_rate": 6.474224934670579e-06, + "loss": 0.8328, "step": 22100 }, { - "epoch": 0.6271566401816118, + "epoch": 0.6262858114426592, "grad_norm": 0.0, - "learning_rate": 6.447143351040792e-06, - "loss": 0.7251, + "learning_rate": 6.473366098283276e-06, + "loss": 0.9341, "step": 22101 }, { - "epoch": 0.6271850170261067, + "epoch": 0.6263141488849217, "grad_norm": 0.0, - "learning_rate": 6.446284250678041e-06, - "loss": 0.8189, + "learning_rate": 6.472507291602119e-06, + "loss": 0.9023, "step": 22102 }, { - "epoch": 0.6272133938706016, + "epoch": 0.6263424863271841, "grad_norm": 0.0, - "learning_rate": 6.445425180333684e-06, - "loss": 0.9162, + "learning_rate": 6.471648514634348e-06, + "loss": 0.7003, "step": 22103 }, { - "epoch": 0.6272417707150965, + "epoch": 0.6263708237694465, "grad_norm": 0.0, - "learning_rate": 6.444566140014983e-06, - "loss": 0.9149, + "learning_rate": 6.470789767387188e-06, + "loss": 0.8238, "step": 22104 }, { - "epoch": 0.6272701475595914, + "epoch": 0.626399161211709, "grad_norm": 0.0, - "learning_rate": 6.443707129729192e-06, - "loss": 0.8082, + "learning_rate": 6.469931049867877e-06, + "loss": 0.9143, "step": 22105 }, { - "epoch": 0.6272985244040863, + "epoch": 0.6264274986539715, "grad_norm": 0.0, - "learning_rate": 6.442848149483565e-06, - "loss": 0.7504, + "learning_rate": 6.469072362083647e-06, + "loss": 0.8943, "step": 22106 }, { - "epoch": 0.6273269012485811, + "epoch": 0.626455836096234, "grad_norm": 0.0, - "learning_rate": 6.441989199285361e-06, - "loss": 0.893, + "learning_rate": 6.468213704041731e-06, + "loss": 0.7717, "step": 22107 }, { - "epoch": 0.627355278093076, + "epoch": 0.6264841735384964, "grad_norm": 0.0, - "learning_rate": 6.441130279141834e-06, - "loss": 0.7832, + "learning_rate": 6.4673550757493665e-06, + "loss": 0.813, "step": 22108 }, { - "epoch": 0.627383654937571, + "epoch": 0.6265125109807589, "grad_norm": 0.0, - "learning_rate": 6.440271389060238e-06, - "loss": 0.849, + "learning_rate": 6.466496477213777e-06, + "loss": 0.8424, "step": 22109 }, { - "epoch": 0.6274120317820658, + "epoch": 0.6265408484230214, "grad_norm": 0.0, - "learning_rate": 6.439412529047831e-06, - "loss": 0.9526, + "learning_rate": 6.4656379084422014e-06, + "loss": 0.8309, "step": 22110 }, { - "epoch": 0.6274404086265607, + "epoch": 0.6265691858652838, "grad_norm": 0.0, - "learning_rate": 6.438553699111867e-06, - "loss": 0.8603, + "learning_rate": 6.464779369441871e-06, + "loss": 0.8663, "step": 22111 }, { - "epoch": 0.6274687854710557, + "epoch": 0.6265975233075463, "grad_norm": 0.0, - "learning_rate": 6.437694899259597e-06, - "loss": 0.7393, + "learning_rate": 6.463920860220017e-06, + "loss": 0.9136, "step": 22112 }, { - "epoch": 0.6274971623155505, + "epoch": 0.6266258607498087, "grad_norm": 0.0, - "learning_rate": 6.4368361294982816e-06, - "loss": 0.8306, + "learning_rate": 6.4630623807838726e-06, + "loss": 0.8402, "step": 22113 }, { - "epoch": 0.6275255391600454, + "epoch": 0.6266541981920711, "grad_norm": 0.0, - "learning_rate": 6.435977389835171e-06, - "loss": 0.8438, + "learning_rate": 6.462203931140662e-06, + "loss": 0.8566, "step": 22114 }, { - "epoch": 0.6275539160045402, + "epoch": 0.6266825356343336, "grad_norm": 0.0, - "learning_rate": 6.4351186802775155e-06, - "loss": 0.9876, + "learning_rate": 6.461345511297624e-06, + "loss": 0.8667, "step": 22115 }, { - "epoch": 0.6275822928490352, + "epoch": 0.6267108730765961, "grad_norm": 0.0, - "learning_rate": 6.434260000832575e-06, - "loss": 0.8833, + "learning_rate": 6.46048712126199e-06, + "loss": 0.8545, "step": 22116 }, { - "epoch": 0.6276106696935301, + "epoch": 0.6267392105188586, "grad_norm": 0.0, - "learning_rate": 6.433401351507602e-06, - "loss": 0.9991, + "learning_rate": 6.459628761040983e-06, + "loss": 0.8671, "step": 22117 }, { - "epoch": 0.6276390465380249, + "epoch": 0.626767547961121, "grad_norm": 0.0, - "learning_rate": 6.43254273230985e-06, - "loss": 0.8318, + "learning_rate": 6.458770430641839e-06, + "loss": 0.8145, "step": 22118 }, { - "epoch": 0.6276674233825199, + "epoch": 0.6267958854033835, "grad_norm": 0.0, - "learning_rate": 6.431684143246568e-06, - "loss": 0.8035, + "learning_rate": 6.457912130071786e-06, + "loss": 0.8627, "step": 22119 }, { - "epoch": 0.6276958002270148, + "epoch": 0.626824222845646, "grad_norm": 0.0, - "learning_rate": 6.430825584325009e-06, - "loss": 0.8388, + "learning_rate": 6.457053859338054e-06, + "loss": 0.7608, "step": 22120 }, { - "epoch": 0.6277241770715096, + "epoch": 0.6268525602879084, "grad_norm": 0.0, - "learning_rate": 6.429967055552429e-06, - "loss": 0.8277, + "learning_rate": 6.456195618447877e-06, + "loss": 0.7687, "step": 22121 }, { - "epoch": 0.6277525539160045, + "epoch": 0.6268808977301709, "grad_norm": 0.0, - "learning_rate": 6.4291085569360765e-06, - "loss": 0.7572, + "learning_rate": 6.455337407408476e-06, + "loss": 1.0024, "step": 22122 }, { - "epoch": 0.6277809307604995, + "epoch": 0.6269092351724334, "grad_norm": 0.0, - "learning_rate": 6.4282500884832045e-06, - "loss": 0.736, + "learning_rate": 6.454479226227084e-06, + "loss": 0.8331, "step": 22123 }, { - "epoch": 0.6278093076049943, + "epoch": 0.6269375726146958, "grad_norm": 0.0, - "learning_rate": 6.427391650201065e-06, - "loss": 0.8538, + "learning_rate": 6.453621074910933e-06, + "loss": 0.8741, "step": 22124 }, { - "epoch": 0.6278376844494892, + "epoch": 0.6269659100569582, "grad_norm": 0.0, - "learning_rate": 6.426533242096911e-06, - "loss": 0.912, + "learning_rate": 6.452762953467246e-06, + "loss": 0.9116, "step": 22125 }, { - "epoch": 0.6278660612939841, + "epoch": 0.6269942474992207, "grad_norm": 0.0, - "learning_rate": 6.425674864177987e-06, - "loss": 0.8525, + "learning_rate": 6.451904861903258e-06, + "loss": 0.8795, "step": 22126 }, { - "epoch": 0.627894438138479, + "epoch": 0.6270225849414832, "grad_norm": 0.0, - "learning_rate": 6.424816516451551e-06, - "loss": 0.8309, + "learning_rate": 6.451046800226189e-06, + "loss": 0.8697, "step": 22127 }, { - "epoch": 0.6279228149829739, + "epoch": 0.6270509223837456, "grad_norm": 0.0, - "learning_rate": 6.42395819892485e-06, - "loss": 0.8102, + "learning_rate": 6.4501887684432706e-06, + "loss": 0.9317, "step": 22128 }, { - "epoch": 0.6279511918274688, + "epoch": 0.6270792598260081, "grad_norm": 0.0, - "learning_rate": 6.423099911605133e-06, - "loss": 0.8703, + "learning_rate": 6.449330766561735e-06, + "loss": 0.8857, "step": 22129 }, { - "epoch": 0.6279795686719637, + "epoch": 0.6271075972682706, "grad_norm": 0.0, - "learning_rate": 6.422241654499654e-06, - "loss": 0.8011, + "learning_rate": 6.4484727945888e-06, + "loss": 0.678, "step": 22130 }, { - "epoch": 0.6280079455164586, + "epoch": 0.627135934710533, "grad_norm": 0.0, - "learning_rate": 6.421383427615658e-06, - "loss": 0.68, + "learning_rate": 6.447614852531697e-06, + "loss": 0.9471, "step": 22131 }, { - "epoch": 0.6280363223609534, + "epoch": 0.6271642721527955, "grad_norm": 0.0, - "learning_rate": 6.420525230960399e-06, - "loss": 0.7879, + "learning_rate": 6.446756940397656e-06, + "loss": 0.9006, "step": 22132 }, { - "epoch": 0.6280646992054484, + "epoch": 0.627192609595058, "grad_norm": 0.0, - "learning_rate": 6.419667064541123e-06, - "loss": 0.892, + "learning_rate": 6.445899058193899e-06, + "loss": 1.0351, "step": 22133 }, { - "epoch": 0.6280930760499432, + "epoch": 0.6272209470373205, "grad_norm": 0.0, - "learning_rate": 6.418808928365083e-06, - "loss": 0.8525, + "learning_rate": 6.445041205927658e-06, + "loss": 0.8563, "step": 22134 }, { - "epoch": 0.6281214528944381, + "epoch": 0.6272492844795828, "grad_norm": 0.0, - "learning_rate": 6.417950822439524e-06, - "loss": 0.8526, + "learning_rate": 6.444183383606151e-06, + "loss": 0.9044, "step": 22135 }, { - "epoch": 0.6281498297389331, + "epoch": 0.6272776219218453, "grad_norm": 0.0, - "learning_rate": 6.417092746771693e-06, - "loss": 0.8351, + "learning_rate": 6.443325591236607e-06, + "loss": 0.7975, "step": 22136 }, { - "epoch": 0.6281782065834279, + "epoch": 0.6273059593641078, "grad_norm": 0.0, - "learning_rate": 6.416234701368844e-06, - "loss": 0.8845, + "learning_rate": 6.4424678288262556e-06, + "loss": 0.8807, "step": 22137 }, { - "epoch": 0.6282065834279228, + "epoch": 0.6273342968063702, "grad_norm": 0.0, - "learning_rate": 6.41537668623822e-06, - "loss": 0.9374, + "learning_rate": 6.441610096382316e-06, + "loss": 0.91, "step": 22138 }, { - "epoch": 0.6282349602724177, + "epoch": 0.6273626342486327, "grad_norm": 0.0, - "learning_rate": 6.414518701387069e-06, - "loss": 0.9329, + "learning_rate": 6.4407523939120154e-06, + "loss": 0.8879, "step": 22139 }, { - "epoch": 0.6282633371169126, + "epoch": 0.6273909716908952, "grad_norm": 0.0, - "learning_rate": 6.413660746822643e-06, - "loss": 0.7615, + "learning_rate": 6.439894721422584e-06, + "loss": 0.8718, "step": 22140 }, { - "epoch": 0.6282917139614075, + "epoch": 0.6274193091331577, "grad_norm": 0.0, - "learning_rate": 6.412802822552184e-06, - "loss": 0.7844, + "learning_rate": 6.439037078921235e-06, + "loss": 0.8879, "step": 22141 }, { - "epoch": 0.6283200908059023, + "epoch": 0.6274476465754201, "grad_norm": 0.0, - "learning_rate": 6.411944928582941e-06, - "loss": 0.8631, + "learning_rate": 6.4381794664152065e-06, + "loss": 0.8911, "step": 22142 }, { - "epoch": 0.6283484676503973, + "epoch": 0.6274759840176826, "grad_norm": 0.0, - "learning_rate": 6.4110870649221605e-06, - "loss": 0.9153, + "learning_rate": 6.437321883911709e-06, + "loss": 0.8178, "step": 22143 }, { - "epoch": 0.6283768444948922, + "epoch": 0.6275043214599451, "grad_norm": 0.0, - "learning_rate": 6.410229231577089e-06, - "loss": 0.8927, + "learning_rate": 6.436464331417973e-06, + "loss": 0.7959, "step": 22144 }, { - "epoch": 0.628405221339387, + "epoch": 0.6275326589022074, "grad_norm": 0.0, - "learning_rate": 6.4093714285549736e-06, - "loss": 0.7762, + "learning_rate": 6.435606808941223e-06, + "loss": 0.898, "step": 22145 }, { - "epoch": 0.628433598183882, + "epoch": 0.6275609963444699, "grad_norm": 0.0, - "learning_rate": 6.408513655863054e-06, - "loss": 0.8963, + "learning_rate": 6.434749316488678e-06, + "loss": 0.9694, "step": 22146 }, { - "epoch": 0.6284619750283769, + "epoch": 0.6275893337867324, "grad_norm": 0.0, - "learning_rate": 6.407655913508583e-06, - "loss": 0.874, + "learning_rate": 6.433891854067564e-06, + "loss": 0.8854, "step": 22147 }, { - "epoch": 0.6284903518728717, + "epoch": 0.6276176712289949, "grad_norm": 0.0, - "learning_rate": 6.4067982014988075e-06, - "loss": 0.6956, + "learning_rate": 6.433034421685107e-06, + "loss": 0.8467, "step": 22148 }, { - "epoch": 0.6285187287173666, + "epoch": 0.6276460086712573, "grad_norm": 0.0, - "learning_rate": 6.405940519840967e-06, - "loss": 0.9509, + "learning_rate": 6.432177019348521e-06, + "loss": 0.7858, "step": 22149 }, { - "epoch": 0.6285471055618616, + "epoch": 0.6276743461135198, "grad_norm": 0.0, - "learning_rate": 6.4050828685423094e-06, - "loss": 0.8449, + "learning_rate": 6.4313196470650356e-06, + "loss": 0.8937, "step": 22150 }, { - "epoch": 0.6285754824063564, + "epoch": 0.6277026835557823, "grad_norm": 0.0, - "learning_rate": 6.404225247610078e-06, - "loss": 0.9135, + "learning_rate": 6.430462304841868e-06, + "loss": 0.8212, "step": 22151 }, { - "epoch": 0.6286038592508513, + "epoch": 0.6277310209980447, "grad_norm": 0.0, - "learning_rate": 6.403367657051518e-06, - "loss": 0.8725, + "learning_rate": 6.429604992686241e-06, + "loss": 0.8391, "step": 22152 }, { - "epoch": 0.6286322360953462, + "epoch": 0.6277593584403072, "grad_norm": 0.0, - "learning_rate": 6.4025100968738715e-06, - "loss": 0.8676, + "learning_rate": 6.428747710605382e-06, + "loss": 0.9353, "step": 22153 }, { - "epoch": 0.6286606129398411, + "epoch": 0.6277876958825697, "grad_norm": 0.0, - "learning_rate": 6.401652567084386e-06, - "loss": 0.8593, + "learning_rate": 6.4278904586065025e-06, + "loss": 0.9535, "step": 22154 }, { - "epoch": 0.628688989784336, + "epoch": 0.627816033324832, "grad_norm": 0.0, - "learning_rate": 6.400795067690303e-06, - "loss": 0.7749, + "learning_rate": 6.427033236696833e-06, + "loss": 0.9786, "step": 22155 }, { - "epoch": 0.6287173666288308, + "epoch": 0.6278443707670945, "grad_norm": 0.0, - "learning_rate": 6.399937598698865e-06, - "loss": 0.9548, + "learning_rate": 6.426176044883585e-06, + "loss": 0.7816, "step": 22156 }, { - "epoch": 0.6287457434733258, + "epoch": 0.627872708209357, "grad_norm": 0.0, - "learning_rate": 6.399080160117314e-06, - "loss": 0.8771, + "learning_rate": 6.425318883173983e-06, + "loss": 0.8832, "step": 22157 }, { - "epoch": 0.6287741203178207, + "epoch": 0.6279010456516195, "grad_norm": 0.0, - "learning_rate": 6.3982227519528986e-06, - "loss": 0.9594, + "learning_rate": 6.4244617515752505e-06, + "loss": 0.8278, "step": 22158 }, { - "epoch": 0.6288024971623155, + "epoch": 0.6279293830938819, "grad_norm": 0.0, - "learning_rate": 6.397365374212854e-06, - "loss": 0.7335, + "learning_rate": 6.423604650094601e-06, + "loss": 0.8595, "step": 22159 }, { - "epoch": 0.6288308740068105, + "epoch": 0.6279577205361444, "grad_norm": 0.0, - "learning_rate": 6.3965080269044285e-06, - "loss": 0.7917, + "learning_rate": 6.422747578739258e-06, + "loss": 0.819, "step": 22160 }, { - "epoch": 0.6288592508513053, + "epoch": 0.6279860579784069, "grad_norm": 0.0, - "learning_rate": 6.395650710034858e-06, - "loss": 0.8578, + "learning_rate": 6.421890537516444e-06, + "loss": 0.7841, "step": 22161 }, { - "epoch": 0.6288876276958002, + "epoch": 0.6280143954206693, "grad_norm": 0.0, - "learning_rate": 6.3947934236113915e-06, - "loss": 0.8864, + "learning_rate": 6.4210335264333716e-06, + "loss": 0.7917, "step": 22162 }, { - "epoch": 0.6289160045402952, + "epoch": 0.6280427328629318, "grad_norm": 0.0, - "learning_rate": 6.393936167641266e-06, - "loss": 0.8579, + "learning_rate": 6.420176545497265e-06, + "loss": 0.8407, "step": 22163 }, { - "epoch": 0.62894438138479, + "epoch": 0.6280710703051943, "grad_norm": 0.0, - "learning_rate": 6.393078942131723e-06, - "loss": 0.799, + "learning_rate": 6.419319594715338e-06, + "loss": 0.8229, "step": 22164 }, { - "epoch": 0.6289727582292849, + "epoch": 0.6280994077474568, "grad_norm": 0.0, - "learning_rate": 6.392221747090006e-06, - "loss": 0.8735, + "learning_rate": 6.418462674094812e-06, + "loss": 0.9639, "step": 22165 }, { - "epoch": 0.6290011350737797, + "epoch": 0.6281277451897191, "grad_norm": 0.0, - "learning_rate": 6.391364582523355e-06, - "loss": 0.763, + "learning_rate": 6.417605783642909e-06, + "loss": 0.7734, "step": 22166 }, { - "epoch": 0.6290295119182747, + "epoch": 0.6281560826319816, "grad_norm": 0.0, - "learning_rate": 6.390507448439007e-06, - "loss": 0.7325, + "learning_rate": 6.4167489233668386e-06, + "loss": 0.8116, "step": 22167 }, { - "epoch": 0.6290578887627696, + "epoch": 0.6281844200742441, "grad_norm": 0.0, - "learning_rate": 6.389650344844206e-06, - "loss": 0.8667, + "learning_rate": 6.415892093273824e-06, + "loss": 0.8796, "step": 22168 }, { - "epoch": 0.6290862656072644, + "epoch": 0.6282127575165065, "grad_norm": 0.0, - "learning_rate": 6.388793271746192e-06, - "loss": 0.8715, + "learning_rate": 6.415035293371081e-06, + "loss": 0.7122, "step": 22169 }, { - "epoch": 0.6291146424517594, + "epoch": 0.628241094958769, "grad_norm": 0.0, - "learning_rate": 6.3879362291522e-06, - "loss": 0.8365, + "learning_rate": 6.4141785236658285e-06, + "loss": 0.9216, "step": 22170 }, { - "epoch": 0.6291430192962543, + "epoch": 0.6282694324010315, "grad_norm": 0.0, - "learning_rate": 6.387079217069476e-06, - "loss": 0.889, + "learning_rate": 6.413321784165281e-06, + "loss": 0.7541, "step": 22171 }, { - "epoch": 0.6291713961407491, + "epoch": 0.628297769843294, "grad_norm": 0.0, - "learning_rate": 6.386222235505257e-06, - "loss": 0.9462, + "learning_rate": 6.412465074876653e-06, + "loss": 0.8471, "step": 22172 }, { - "epoch": 0.629199772985244, + "epoch": 0.6283261072855564, "grad_norm": 0.0, - "learning_rate": 6.385365284466779e-06, - "loss": 0.9829, + "learning_rate": 6.4116083958071654e-06, + "loss": 0.8905, "step": 22173 }, { - "epoch": 0.629228149829739, + "epoch": 0.6283544447278189, "grad_norm": 0.0, - "learning_rate": 6.384508363961284e-06, - "loss": 0.8425, + "learning_rate": 6.410751746964037e-06, + "loss": 0.9004, "step": 22174 }, { - "epoch": 0.6292565266742338, + "epoch": 0.6283827821700814, "grad_norm": 0.0, - "learning_rate": 6.383651473996011e-06, - "loss": 0.747, + "learning_rate": 6.409895128354475e-06, + "loss": 0.8577, "step": 22175 }, { - "epoch": 0.6292849035187287, + "epoch": 0.6284111196123437, "grad_norm": 0.0, - "learning_rate": 6.382794614578193e-06, - "loss": 0.8527, + "learning_rate": 6.409038539985699e-06, + "loss": 0.9154, "step": 22176 }, { - "epoch": 0.6293132803632236, + "epoch": 0.6284394570546062, "grad_norm": 0.0, - "learning_rate": 6.381937785715069e-06, - "loss": 0.9043, + "learning_rate": 6.408181981864927e-06, + "loss": 0.892, "step": 22177 }, { - "epoch": 0.6293416572077185, + "epoch": 0.6284677944968687, "grad_norm": 0.0, - "learning_rate": 6.381080987413884e-06, - "loss": 0.8114, + "learning_rate": 6.4073254539993705e-06, + "loss": 0.8915, "step": 22178 }, { - "epoch": 0.6293700340522134, + "epoch": 0.6284961319391311, "grad_norm": 0.0, - "learning_rate": 6.380224219681872e-06, - "loss": 0.8454, + "learning_rate": 6.4064689563962505e-06, + "loss": 0.9774, "step": 22179 }, { - "epoch": 0.6293984108967083, + "epoch": 0.6285244693813936, "grad_norm": 0.0, - "learning_rate": 6.379367482526264e-06, - "loss": 0.7773, + "learning_rate": 6.405612489062771e-06, + "loss": 0.8045, "step": 22180 }, { - "epoch": 0.6294267877412032, + "epoch": 0.6285528068236561, "grad_norm": 0.0, - "learning_rate": 6.378510775954305e-06, - "loss": 0.9052, + "learning_rate": 6.404756052006153e-06, + "loss": 0.8452, "step": 22181 }, { - "epoch": 0.6294551645856981, + "epoch": 0.6285811442659186, "grad_norm": 0.0, - "learning_rate": 6.377654099973229e-06, - "loss": 0.885, + "learning_rate": 6.403899645233612e-06, + "loss": 0.9069, "step": 22182 }, { - "epoch": 0.6294835414301929, + "epoch": 0.628609481708181, "grad_norm": 0.0, - "learning_rate": 6.376797454590269e-06, - "loss": 0.7931, + "learning_rate": 6.403043268752358e-06, + "loss": 0.8199, "step": 22183 }, { - "epoch": 0.6295119182746879, + "epoch": 0.6286378191504435, "grad_norm": 0.0, - "learning_rate": 6.375940839812667e-06, - "loss": 0.901, + "learning_rate": 6.402186922569609e-06, + "loss": 0.7831, "step": 22184 }, { - "epoch": 0.6295402951191827, + "epoch": 0.628666156592706, "grad_norm": 0.0, - "learning_rate": 6.375084255647654e-06, - "loss": 0.894, + "learning_rate": 6.4013306066925725e-06, + "loss": 0.9072, "step": 22185 }, { - "epoch": 0.6295686719636776, + "epoch": 0.6286944940349684, "grad_norm": 0.0, - "learning_rate": 6.374227702102467e-06, - "loss": 0.844, + "learning_rate": 6.400474321128465e-06, + "loss": 0.864, "step": 22186 }, { - "epoch": 0.6295970488081726, + "epoch": 0.6287228314772308, "grad_norm": 0.0, - "learning_rate": 6.37337117918434e-06, - "loss": 0.8312, + "learning_rate": 6.3996180658845035e-06, + "loss": 0.8589, "step": 22187 }, { - "epoch": 0.6296254256526674, + "epoch": 0.6287511689194933, "grad_norm": 0.0, - "learning_rate": 6.372514686900513e-06, - "loss": 0.8874, + "learning_rate": 6.398761840967891e-06, + "loss": 0.7152, "step": 22188 }, { - "epoch": 0.6296538024971623, + "epoch": 0.6287795063617558, "grad_norm": 0.0, - "learning_rate": 6.371658225258216e-06, - "loss": 0.8105, + "learning_rate": 6.397905646385844e-06, + "loss": 0.8708, "step": 22189 }, { - "epoch": 0.6296821793416572, + "epoch": 0.6288078438040182, "grad_norm": 0.0, - "learning_rate": 6.370801794264683e-06, - "loss": 0.8303, + "learning_rate": 6.397049482145578e-06, + "loss": 0.8517, "step": 22190 }, { - "epoch": 0.6297105561861521, + "epoch": 0.6288361812462807, "grad_norm": 0.0, - "learning_rate": 6.3699453939271525e-06, - "loss": 0.9252, + "learning_rate": 6.3961933482543e-06, + "loss": 0.9457, "step": 22191 }, { - "epoch": 0.629738933030647, + "epoch": 0.6288645186885432, "grad_norm": 0.0, - "learning_rate": 6.3690890242528534e-06, - "loss": 0.8686, + "learning_rate": 6.395337244719229e-06, + "loss": 0.9185, "step": 22192 }, { - "epoch": 0.6297673098751418, + "epoch": 0.6288928561308056, "grad_norm": 0.0, - "learning_rate": 6.368232685249026e-06, - "loss": 0.7939, + "learning_rate": 6.394481171547566e-06, + "loss": 0.855, "step": 22193 }, { - "epoch": 0.6297956867196368, + "epoch": 0.6289211935730681, "grad_norm": 0.0, - "learning_rate": 6.367376376922898e-06, - "loss": 0.9327, + "learning_rate": 6.393625128746527e-06, + "loss": 0.895, "step": 22194 }, { - "epoch": 0.6298240635641317, + "epoch": 0.6289495310153306, "grad_norm": 0.0, - "learning_rate": 6.366520099281708e-06, - "loss": 0.7712, + "learning_rate": 6.392769116323325e-06, + "loss": 0.8512, "step": 22195 }, { - "epoch": 0.6298524404086265, + "epoch": 0.6289778684575931, "grad_norm": 0.0, - "learning_rate": 6.365663852332685e-06, - "loss": 0.8973, + "learning_rate": 6.391913134285166e-06, + "loss": 0.9281, "step": 22196 }, { - "epoch": 0.6298808172531215, + "epoch": 0.6290062058998555, "grad_norm": 0.0, - "learning_rate": 6.3648076360830615e-06, - "loss": 0.9028, + "learning_rate": 6.391057182639262e-06, + "loss": 0.8748, "step": 22197 }, { - "epoch": 0.6299091940976164, + "epoch": 0.629034543342118, "grad_norm": 0.0, - "learning_rate": 6.3639514505400735e-06, - "loss": 0.7932, + "learning_rate": 6.390201261392828e-06, + "loss": 0.94, "step": 22198 }, { - "epoch": 0.6299375709421112, + "epoch": 0.6290628807843804, "grad_norm": 0.0, - "learning_rate": 6.36309529571095e-06, - "loss": 0.7864, + "learning_rate": 6.389345370553065e-06, + "loss": 0.9703, "step": 22199 }, { - "epoch": 0.6299659477866061, + "epoch": 0.6290912182266428, "grad_norm": 0.0, - "learning_rate": 6.362239171602923e-06, - "loss": 0.8865, + "learning_rate": 6.3884895101271915e-06, + "loss": 0.7815, "step": 22200 }, { - "epoch": 0.629994324631101, + "epoch": 0.6291195556689053, "grad_norm": 0.0, - "learning_rate": 6.361383078223227e-06, - "loss": 0.858, + "learning_rate": 6.387633680122407e-06, + "loss": 0.7595, "step": 22201 }, { - "epoch": 0.6300227014755959, + "epoch": 0.6291478931111678, "grad_norm": 0.0, - "learning_rate": 6.3605270155790925e-06, - "loss": 0.8957, + "learning_rate": 6.386777880545924e-06, + "loss": 0.8605, "step": 22202 }, { - "epoch": 0.6300510783200908, + "epoch": 0.6291762305534302, "grad_norm": 0.0, - "learning_rate": 6.3596709836777455e-06, - "loss": 0.8607, + "learning_rate": 6.385922111404958e-06, + "loss": 0.9363, "step": 22203 }, { - "epoch": 0.6300794551645857, + "epoch": 0.6292045679956927, "grad_norm": 0.0, - "learning_rate": 6.3588149825264246e-06, - "loss": 0.9182, + "learning_rate": 6.385066372706707e-06, + "loss": 0.8321, "step": 22204 }, { - "epoch": 0.6301078320090806, + "epoch": 0.6292329054379552, "grad_norm": 0.0, - "learning_rate": 6.357959012132357e-06, - "loss": 0.8235, + "learning_rate": 6.3842106644583875e-06, + "loss": 0.882, "step": 22205 }, { - "epoch": 0.6301362088535755, + "epoch": 0.6292612428802177, "grad_norm": 0.0, - "learning_rate": 6.357103072502772e-06, - "loss": 0.7928, + "learning_rate": 6.383354986667206e-06, + "loss": 0.7922, "step": 22206 }, { - "epoch": 0.6301645856980703, + "epoch": 0.6292895803224801, "grad_norm": 0.0, - "learning_rate": 6.3562471636449016e-06, - "loss": 0.8156, + "learning_rate": 6.382499339340365e-06, + "loss": 0.8642, "step": 22207 }, { - "epoch": 0.6301929625425653, + "epoch": 0.6293179177647426, "grad_norm": 0.0, - "learning_rate": 6.355391285565975e-06, - "loss": 0.7854, + "learning_rate": 6.381643722485078e-06, + "loss": 0.8347, "step": 22208 }, { - "epoch": 0.6302213393870602, + "epoch": 0.629346255207005, "grad_norm": 0.0, - "learning_rate": 6.354535438273224e-06, - "loss": 0.8448, + "learning_rate": 6.3807881361085465e-06, + "loss": 0.7362, "step": 22209 }, { - "epoch": 0.630249716231555, + "epoch": 0.6293745926492674, "grad_norm": 0.0, - "learning_rate": 6.353679621773875e-06, - "loss": 0.96, + "learning_rate": 6.379932580217981e-06, + "loss": 0.9014, "step": 22210 }, { - "epoch": 0.63027809307605, + "epoch": 0.6294029300915299, "grad_norm": 0.0, - "learning_rate": 6.352823836075157e-06, - "loss": 0.9124, + "learning_rate": 6.379077054820592e-06, + "loss": 0.8125, "step": 22211 }, { - "epoch": 0.6303064699205448, + "epoch": 0.6294312675337924, "grad_norm": 0.0, - "learning_rate": 6.351968081184301e-06, - "loss": 0.7666, + "learning_rate": 6.378221559923576e-06, + "loss": 0.867, "step": 22212 }, { - "epoch": 0.6303348467650397, + "epoch": 0.6294596049760549, "grad_norm": 0.0, - "learning_rate": 6.351112357108535e-06, - "loss": 0.9066, + "learning_rate": 6.377366095534152e-06, + "loss": 0.7677, "step": 22213 }, { - "epoch": 0.6303632236095347, + "epoch": 0.6294879424183173, "grad_norm": 0.0, - "learning_rate": 6.350256663855086e-06, - "loss": 0.8039, + "learning_rate": 6.376510661659512e-06, + "loss": 0.8319, "step": 22214 }, { - "epoch": 0.6303916004540295, + "epoch": 0.6295162798605798, "grad_norm": 0.0, - "learning_rate": 6.349401001431185e-06, - "loss": 0.819, + "learning_rate": 6.375655258306869e-06, + "loss": 0.7818, "step": 22215 }, { - "epoch": 0.6304199772985244, + "epoch": 0.6295446173028423, "grad_norm": 0.0, - "learning_rate": 6.3485453698440565e-06, - "loss": 0.8717, + "learning_rate": 6.3747998854834295e-06, + "loss": 0.7577, "step": 22216 }, { - "epoch": 0.6304483541430193, + "epoch": 0.6295729547451047, "grad_norm": 0.0, - "learning_rate": 6.347689769100929e-06, - "loss": 0.9197, + "learning_rate": 6.373944543196394e-06, + "loss": 0.9365, "step": 22217 }, { - "epoch": 0.6304767309875142, + "epoch": 0.6296012921873672, "grad_norm": 0.0, - "learning_rate": 6.346834199209031e-06, - "loss": 0.8308, + "learning_rate": 6.373089231452972e-06, + "loss": 0.8455, "step": 22218 }, { - "epoch": 0.6305051078320091, + "epoch": 0.6296296296296297, "grad_norm": 0.0, - "learning_rate": 6.345978660175589e-06, - "loss": 0.7084, + "learning_rate": 6.372233950260368e-06, + "loss": 0.92, "step": 22219 }, { - "epoch": 0.6305334846765039, + "epoch": 0.6296579670718921, "grad_norm": 0.0, - "learning_rate": 6.345123152007827e-06, - "loss": 0.7356, + "learning_rate": 6.371378699625781e-06, + "loss": 0.8647, "step": 22220 }, { - "epoch": 0.6305618615209989, + "epoch": 0.6296863045141545, "grad_norm": 0.0, - "learning_rate": 6.344267674712976e-06, - "loss": 0.8578, + "learning_rate": 6.370523479556422e-06, + "loss": 0.8479, "step": 22221 }, { - "epoch": 0.6305902383654938, + "epoch": 0.629714641956417, "grad_norm": 0.0, - "learning_rate": 6.343412228298262e-06, - "loss": 0.7877, + "learning_rate": 6.369668290059489e-06, + "loss": 0.8346, "step": 22222 }, { - "epoch": 0.6306186152099886, + "epoch": 0.6297429793986795, "grad_norm": 0.0, - "learning_rate": 6.342556812770904e-06, - "loss": 0.8898, + "learning_rate": 6.368813131142187e-06, + "loss": 0.8788, "step": 22223 }, { - "epoch": 0.6306469920544835, + "epoch": 0.6297713168409419, "grad_norm": 0.0, - "learning_rate": 6.341701428138135e-06, - "loss": 0.9014, + "learning_rate": 6.367958002811726e-06, + "loss": 0.8802, "step": 22224 }, { - "epoch": 0.6306753688989785, + "epoch": 0.6297996542832044, "grad_norm": 0.0, - "learning_rate": 6.340846074407181e-06, - "loss": 0.8575, + "learning_rate": 6.367102905075299e-06, + "loss": 1.0052, "step": 22225 }, { - "epoch": 0.6307037457434733, + "epoch": 0.6298279917254669, "grad_norm": 0.0, - "learning_rate": 6.3399907515852645e-06, - "loss": 0.8442, + "learning_rate": 6.3662478379401125e-06, + "loss": 0.8415, "step": 22226 }, { - "epoch": 0.6307321225879682, + "epoch": 0.6298563291677293, "grad_norm": 0.0, - "learning_rate": 6.339135459679611e-06, - "loss": 0.8222, + "learning_rate": 6.365392801413375e-06, + "loss": 0.8046, "step": 22227 }, { - "epoch": 0.6307604994324632, + "epoch": 0.6298846666099918, "grad_norm": 0.0, - "learning_rate": 6.338280198697442e-06, - "loss": 0.9388, + "learning_rate": 6.364537795502278e-06, + "loss": 0.8148, "step": 22228 }, { - "epoch": 0.630788876276958, + "epoch": 0.6299130040522543, "grad_norm": 0.0, - "learning_rate": 6.337424968645987e-06, - "loss": 0.7813, + "learning_rate": 6.363682820214032e-06, + "loss": 0.8291, "step": 22229 }, { - "epoch": 0.6308172531214529, + "epoch": 0.6299413414945167, "grad_norm": 0.0, - "learning_rate": 6.336569769532468e-06, - "loss": 0.8235, + "learning_rate": 6.362827875555835e-06, + "loss": 0.8428, "step": 22230 }, { - "epoch": 0.6308456299659478, + "epoch": 0.6299696789367791, "grad_norm": 0.0, - "learning_rate": 6.3357146013641065e-06, - "loss": 0.7905, + "learning_rate": 6.361972961534888e-06, + "loss": 0.7999, "step": 22231 }, { - "epoch": 0.6308740068104427, + "epoch": 0.6299980163790416, "grad_norm": 0.0, - "learning_rate": 6.334859464148131e-06, - "loss": 0.9272, + "learning_rate": 6.361118078158398e-06, + "loss": 0.7719, "step": 22232 }, { - "epoch": 0.6309023836549376, + "epoch": 0.6300263538213041, "grad_norm": 0.0, - "learning_rate": 6.334004357891763e-06, - "loss": 0.9406, + "learning_rate": 6.360263225433559e-06, + "loss": 0.84, "step": 22233 }, { - "epoch": 0.6309307604994324, + "epoch": 0.6300546912635665, "grad_norm": 0.0, - "learning_rate": 6.333149282602222e-06, - "loss": 0.917, + "learning_rate": 6.359408403367572e-06, + "loss": 0.8921, "step": 22234 }, { - "epoch": 0.6309591373439274, + "epoch": 0.630083028705829, "grad_norm": 0.0, - "learning_rate": 6.332294238286736e-06, - "loss": 0.859, + "learning_rate": 6.358553611967644e-06, + "loss": 0.9025, "step": 22235 }, { - "epoch": 0.6309875141884222, + "epoch": 0.6301113661480915, "grad_norm": 0.0, - "learning_rate": 6.331439224952525e-06, - "loss": 0.829, + "learning_rate": 6.357698851240968e-06, + "loss": 0.836, "step": 22236 }, { - "epoch": 0.6310158910329171, + "epoch": 0.630139703590354, "grad_norm": 0.0, - "learning_rate": 6.3305842426068095e-06, - "loss": 0.9536, + "learning_rate": 6.356844121194751e-06, + "loss": 0.8401, "step": 22237 }, { - "epoch": 0.6310442678774121, + "epoch": 0.6301680410326164, "grad_norm": 0.0, - "learning_rate": 6.329729291256814e-06, - "loss": 0.8813, + "learning_rate": 6.355989421836185e-06, + "loss": 0.8639, "step": 22238 }, { - "epoch": 0.6310726447219069, + "epoch": 0.6301963784748789, "grad_norm": 0.0, - "learning_rate": 6.328874370909762e-06, - "loss": 0.9761, + "learning_rate": 6.355134753172474e-06, + "loss": 0.8057, "step": 22239 }, { - "epoch": 0.6311010215664018, + "epoch": 0.6302247159171414, "grad_norm": 0.0, - "learning_rate": 6.328019481572875e-06, - "loss": 0.8198, + "learning_rate": 6.35428011521082e-06, + "loss": 0.9663, "step": 22240 }, { - "epoch": 0.6311293984108967, + "epoch": 0.6302530533594037, "grad_norm": 0.0, - "learning_rate": 6.327164623253369e-06, - "loss": 0.7994, + "learning_rate": 6.353425507958414e-06, + "loss": 0.8499, "step": 22241 }, { - "epoch": 0.6311577752553916, + "epoch": 0.6302813908016662, "grad_norm": 0.0, - "learning_rate": 6.326309795958472e-06, - "loss": 0.8984, + "learning_rate": 6.3525709314224616e-06, + "loss": 0.9131, "step": 22242 }, { - "epoch": 0.6311861520998865, + "epoch": 0.6303097282439287, "grad_norm": 0.0, - "learning_rate": 6.325454999695399e-06, - "loss": 0.775, + "learning_rate": 6.351716385610156e-06, + "loss": 0.8854, "step": 22243 }, { - "epoch": 0.6312145289443813, + "epoch": 0.6303380656861912, "grad_norm": 0.0, - "learning_rate": 6.324600234471372e-06, - "loss": 0.791, + "learning_rate": 6.3508618705287e-06, + "loss": 0.9048, "step": 22244 }, { - "epoch": 0.6312429057888763, + "epoch": 0.6303664031284536, "grad_norm": 0.0, - "learning_rate": 6.323745500293614e-06, - "loss": 0.6732, + "learning_rate": 6.350007386185291e-06, + "loss": 0.9817, "step": 22245 }, { - "epoch": 0.6312712826333712, + "epoch": 0.6303947405707161, "grad_norm": 0.0, - "learning_rate": 6.322890797169344e-06, - "loss": 0.886, + "learning_rate": 6.349152932587122e-06, + "loss": 0.8416, "step": 22246 }, { - "epoch": 0.631299659477866, + "epoch": 0.6304230780129786, "grad_norm": 0.0, - "learning_rate": 6.322036125105779e-06, - "loss": 0.8795, + "learning_rate": 6.348298509741394e-06, + "loss": 0.7815, "step": 22247 }, { - "epoch": 0.631328036322361, + "epoch": 0.630451415455241, "grad_norm": 0.0, - "learning_rate": 6.3211814841101404e-06, - "loss": 0.8882, + "learning_rate": 6.347444117655306e-06, + "loss": 0.8347, "step": 22248 }, { - "epoch": 0.6313564131668559, + "epoch": 0.6304797528975035, "grad_norm": 0.0, - "learning_rate": 6.320326874189648e-06, - "loss": 0.8614, + "learning_rate": 6.34658975633605e-06, + "loss": 0.8439, "step": 22249 }, { - "epoch": 0.6313847900113507, + "epoch": 0.630508090339766, "grad_norm": 0.0, - "learning_rate": 6.319472295351517e-06, - "loss": 0.8184, + "learning_rate": 6.345735425790829e-06, + "loss": 0.867, "step": 22250 }, { - "epoch": 0.6314131668558456, + "epoch": 0.6305364277820283, "grad_norm": 0.0, - "learning_rate": 6.318617747602973e-06, - "loss": 0.7575, + "learning_rate": 6.344881126026832e-06, + "loss": 0.84, "step": 22251 }, { - "epoch": 0.6314415437003406, + "epoch": 0.6305647652242908, "grad_norm": 0.0, - "learning_rate": 6.317763230951228e-06, - "loss": 0.9316, + "learning_rate": 6.344026857051257e-06, + "loss": 0.8002, "step": 22252 }, { - "epoch": 0.6314699205448354, + "epoch": 0.6305931026665533, "grad_norm": 0.0, - "learning_rate": 6.3169087454035005e-06, - "loss": 0.8894, + "learning_rate": 6.343172618871307e-06, + "loss": 0.8192, "step": 22253 }, { - "epoch": 0.6314982973893303, + "epoch": 0.6306214401088158, "grad_norm": 0.0, - "learning_rate": 6.316054290967013e-06, - "loss": 0.7932, + "learning_rate": 6.3423184114941686e-06, + "loss": 0.8781, "step": 22254 }, { - "epoch": 0.6315266742338252, + "epoch": 0.6306497775510782, "grad_norm": 0.0, - "learning_rate": 6.31519986764898e-06, - "loss": 0.879, + "learning_rate": 6.341464234927039e-06, + "loss": 0.8406, "step": 22255 }, { - "epoch": 0.6315550510783201, + "epoch": 0.6306781149933407, "grad_norm": 0.0, - "learning_rate": 6.31434547545662e-06, - "loss": 0.9011, + "learning_rate": 6.3406100891771175e-06, + "loss": 0.798, "step": 22256 }, { - "epoch": 0.631583427922815, + "epoch": 0.6307064524356032, "grad_norm": 0.0, - "learning_rate": 6.31349111439715e-06, - "loss": 0.7681, + "learning_rate": 6.339755974251594e-06, + "loss": 0.8644, "step": 22257 }, { - "epoch": 0.6316118047673098, + "epoch": 0.6307347898778656, "grad_norm": 0.0, - "learning_rate": 6.312636784477783e-06, - "loss": 0.8327, + "learning_rate": 6.3389018901576695e-06, + "loss": 0.7942, "step": 22258 }, { - "epoch": 0.6316401816118048, + "epoch": 0.6307631273201281, "grad_norm": 0.0, - "learning_rate": 6.311782485705744e-06, - "loss": 0.8181, + "learning_rate": 6.338047836902528e-06, + "loss": 0.7166, "step": 22259 }, { - "epoch": 0.6316685584562997, + "epoch": 0.6307914647623906, "grad_norm": 0.0, - "learning_rate": 6.310928218088241e-06, - "loss": 0.7396, + "learning_rate": 6.337193814493371e-06, + "loss": 0.8883, "step": 22260 }, { - "epoch": 0.6316969353007945, + "epoch": 0.6308198022046531, "grad_norm": 0.0, - "learning_rate": 6.310073981632493e-06, - "loss": 0.8927, + "learning_rate": 6.3363398229373915e-06, + "loss": 0.8247, "step": 22261 }, { - "epoch": 0.6317253121452895, + "epoch": 0.6308481396469154, "grad_norm": 0.0, - "learning_rate": 6.309219776345718e-06, - "loss": 0.8686, + "learning_rate": 6.335485862241782e-06, + "loss": 0.833, "step": 22262 }, { - "epoch": 0.6317536889897843, + "epoch": 0.6308764770891779, "grad_norm": 0.0, - "learning_rate": 6.308365602235128e-06, - "loss": 0.9436, + "learning_rate": 6.33463193241374e-06, + "loss": 0.9013, "step": 22263 }, { - "epoch": 0.6317820658342792, + "epoch": 0.6309048145314404, "grad_norm": 0.0, - "learning_rate": 6.3075114593079375e-06, - "loss": 0.8765, + "learning_rate": 6.3337780334604485e-06, + "loss": 0.8349, "step": 22264 }, { - "epoch": 0.6318104426787742, + "epoch": 0.6309331519737028, "grad_norm": 0.0, - "learning_rate": 6.306657347571367e-06, - "loss": 0.8997, + "learning_rate": 6.332924165389106e-06, + "loss": 0.7822, "step": 22265 }, { - "epoch": 0.631838819523269, + "epoch": 0.6309614894159653, "grad_norm": 0.0, - "learning_rate": 6.3058032670326285e-06, - "loss": 0.7998, + "learning_rate": 6.33207032820691e-06, + "loss": 0.8139, "step": 22266 }, { - "epoch": 0.6318671963677639, + "epoch": 0.6309898268582278, "grad_norm": 0.0, - "learning_rate": 6.304949217698932e-06, - "loss": 0.8043, + "learning_rate": 6.331216521921044e-06, + "loss": 0.8678, "step": 22267 }, { - "epoch": 0.6318955732122588, + "epoch": 0.6310181643004903, "grad_norm": 0.0, - "learning_rate": 6.3040951995775e-06, - "loss": 0.8702, + "learning_rate": 6.330362746538703e-06, + "loss": 0.862, "step": 22268 }, { - "epoch": 0.6319239500567537, + "epoch": 0.6310465017427527, "grad_norm": 0.0, - "learning_rate": 6.303241212675536e-06, - "loss": 0.8519, + "learning_rate": 6.32950900206708e-06, + "loss": 0.8617, "step": 22269 }, { - "epoch": 0.6319523269012486, + "epoch": 0.6310748391850152, "grad_norm": 0.0, - "learning_rate": 6.302387257000264e-06, - "loss": 0.885, + "learning_rate": 6.328655288513366e-06, + "loss": 0.8307, "step": 22270 }, { - "epoch": 0.6319807037457434, + "epoch": 0.6311031766272777, "grad_norm": 0.0, - "learning_rate": 6.3015333325588934e-06, - "loss": 0.7937, + "learning_rate": 6.327801605884754e-06, + "loss": 0.9498, "step": 22271 }, { - "epoch": 0.6320090805902384, + "epoch": 0.63113151406954, "grad_norm": 0.0, - "learning_rate": 6.300679439358635e-06, - "loss": 0.8032, + "learning_rate": 6.32694795418843e-06, + "loss": 0.9684, "step": 22272 }, { - "epoch": 0.6320374574347333, + "epoch": 0.6311598515118025, "grad_norm": 0.0, - "learning_rate": 6.2998255774067056e-06, - "loss": 0.8414, + "learning_rate": 6.326094333431587e-06, + "loss": 0.7509, "step": 22273 }, { - "epoch": 0.6320658342792281, + "epoch": 0.631188188954065, "grad_norm": 0.0, - "learning_rate": 6.298971746710316e-06, - "loss": 0.8444, + "learning_rate": 6.3252407436214165e-06, + "loss": 0.7736, "step": 22274 }, { - "epoch": 0.632094211123723, + "epoch": 0.6312165263963274, "grad_norm": 0.0, - "learning_rate": 6.298117947276676e-06, - "loss": 0.8255, + "learning_rate": 6.324387184765108e-06, + "loss": 0.8365, "step": 22275 }, { - "epoch": 0.632122587968218, + "epoch": 0.6312448638385899, "grad_norm": 0.0, - "learning_rate": 6.297264179113001e-06, - "loss": 0.8847, + "learning_rate": 6.323533656869849e-06, + "loss": 0.772, "step": 22276 }, { - "epoch": 0.6321509648127128, + "epoch": 0.6312732012808524, "grad_norm": 0.0, - "learning_rate": 6.296410442226504e-06, - "loss": 0.9059, + "learning_rate": 6.322680159942838e-06, + "loss": 0.7411, "step": 22277 }, { - "epoch": 0.6321793416572077, + "epoch": 0.6313015387231149, "grad_norm": 0.0, - "learning_rate": 6.295556736624392e-06, - "loss": 0.9274, + "learning_rate": 6.321826693991251e-06, + "loss": 0.8665, "step": 22278 }, { - "epoch": 0.6322077185017027, + "epoch": 0.6313298761653773, "grad_norm": 0.0, - "learning_rate": 6.29470306231388e-06, - "loss": 0.7677, + "learning_rate": 6.320973259022286e-06, + "loss": 0.8331, "step": 22279 }, { - "epoch": 0.6322360953461975, + "epoch": 0.6313582136076398, "grad_norm": 0.0, - "learning_rate": 6.293849419302179e-06, - "loss": 0.7995, + "learning_rate": 6.32011985504313e-06, + "loss": 0.8866, "step": 22280 }, { - "epoch": 0.6322644721906924, + "epoch": 0.6313865510499023, "grad_norm": 0.0, - "learning_rate": 6.292995807596496e-06, - "loss": 0.9567, + "learning_rate": 6.3192664820609685e-06, + "loss": 0.8569, "step": 22281 }, { - "epoch": 0.6322928490351872, + "epoch": 0.6314148884921646, "grad_norm": 0.0, - "learning_rate": 6.292142227204045e-06, - "loss": 0.825, + "learning_rate": 6.318413140082995e-06, + "loss": 0.9138, "step": 22282 }, { - "epoch": 0.6323212258796822, + "epoch": 0.6314432259344271, "grad_norm": 0.0, - "learning_rate": 6.291288678132037e-06, - "loss": 0.7341, + "learning_rate": 6.317559829116392e-06, + "loss": 0.8637, "step": 22283 }, { - "epoch": 0.6323496027241771, + "epoch": 0.6314715633766896, "grad_norm": 0.0, - "learning_rate": 6.2904351603876756e-06, - "loss": 0.9837, + "learning_rate": 6.31670654916835e-06, + "loss": 0.844, "step": 22284 }, { - "epoch": 0.6323779795686719, + "epoch": 0.6314999008189521, "grad_norm": 0.0, - "learning_rate": 6.289581673978177e-06, - "loss": 0.9197, + "learning_rate": 6.315853300246061e-06, + "loss": 0.7311, "step": 22285 }, { - "epoch": 0.6324063564131669, + "epoch": 0.6315282382612145, "grad_norm": 0.0, - "learning_rate": 6.288728218910751e-06, - "loss": 0.8753, + "learning_rate": 6.315000082356704e-06, + "loss": 0.8584, "step": 22286 }, { - "epoch": 0.6324347332576618, + "epoch": 0.631556575703477, "grad_norm": 0.0, - "learning_rate": 6.287874795192603e-06, - "loss": 0.7837, + "learning_rate": 6.31414689550747e-06, + "loss": 0.8708, "step": 22287 }, { - "epoch": 0.6324631101021566, + "epoch": 0.6315849131457395, "grad_norm": 0.0, - "learning_rate": 6.287021402830944e-06, - "loss": 0.684, + "learning_rate": 6.313293739705545e-06, + "loss": 0.8027, "step": 22288 }, { - "epoch": 0.6324914869466516, + "epoch": 0.6316132505880019, "grad_norm": 0.0, - "learning_rate": 6.286168041832983e-06, - "loss": 0.7476, + "learning_rate": 6.3124406149581154e-06, + "loss": 0.8668, "step": 22289 }, { - "epoch": 0.6325198637911464, + "epoch": 0.6316415880302644, "grad_norm": 0.0, - "learning_rate": 6.285314712205928e-06, - "loss": 0.8328, + "learning_rate": 6.311587521272372e-06, + "loss": 0.871, "step": 22290 }, { - "epoch": 0.6325482406356413, + "epoch": 0.6316699254725269, "grad_norm": 0.0, - "learning_rate": 6.284461413956984e-06, - "loss": 0.78, + "learning_rate": 6.3107344586554915e-06, + "loss": 0.8318, "step": 22291 }, { - "epoch": 0.6325766174801362, + "epoch": 0.6316982629147894, "grad_norm": 0.0, - "learning_rate": 6.283608147093363e-06, - "loss": 0.9043, + "learning_rate": 6.309881427114668e-06, + "loss": 0.8255, "step": 22292 }, { - "epoch": 0.6326049943246311, + "epoch": 0.6317266003570517, "grad_norm": 0.0, - "learning_rate": 6.282754911622271e-06, - "loss": 0.7618, + "learning_rate": 6.309028426657082e-06, + "loss": 0.7355, "step": 22293 }, { - "epoch": 0.632633371169126, + "epoch": 0.6317549377993142, "grad_norm": 0.0, - "learning_rate": 6.281901707550917e-06, - "loss": 0.7795, + "learning_rate": 6.308175457289918e-06, + "loss": 0.8301, "step": 22294 }, { - "epoch": 0.6326617480136209, + "epoch": 0.6317832752415767, "grad_norm": 0.0, - "learning_rate": 6.281048534886503e-06, - "loss": 0.8342, + "learning_rate": 6.307322519020369e-06, + "loss": 0.7811, "step": 22295 }, { - "epoch": 0.6326901248581158, + "epoch": 0.6318116126838391, "grad_norm": 0.0, - "learning_rate": 6.280195393636241e-06, - "loss": 0.8994, + "learning_rate": 6.306469611855609e-06, + "loss": 0.811, "step": 22296 }, { - "epoch": 0.6327185017026107, + "epoch": 0.6318399501261016, "grad_norm": 0.0, - "learning_rate": 6.279342283807337e-06, - "loss": 0.9462, + "learning_rate": 6.305616735802827e-06, + "loss": 0.9092, "step": 22297 }, { - "epoch": 0.6327468785471055, + "epoch": 0.6318682875683641, "grad_norm": 0.0, - "learning_rate": 6.278489205406992e-06, - "loss": 0.8893, + "learning_rate": 6.304763890869211e-06, + "loss": 0.853, "step": 22298 }, { - "epoch": 0.6327752553916004, + "epoch": 0.6318966250106265, "grad_norm": 0.0, - "learning_rate": 6.2776361584424205e-06, - "loss": 0.8999, + "learning_rate": 6.303911077061937e-06, + "loss": 0.8306, "step": 22299 }, { - "epoch": 0.6328036322360954, + "epoch": 0.631924962452889, "grad_norm": 0.0, - "learning_rate": 6.276783142920818e-06, - "loss": 0.9097, + "learning_rate": 6.303058294388195e-06, + "loss": 0.7934, "step": 22300 }, { - "epoch": 0.6328320090805902, + "epoch": 0.6319532998951515, "grad_norm": 0.0, - "learning_rate": 6.2759301588494e-06, - "loss": 0.8419, + "learning_rate": 6.302205542855163e-06, + "loss": 0.8802, "step": 22301 }, { - "epoch": 0.6328603859250851, + "epoch": 0.631981637337414, "grad_norm": 0.0, - "learning_rate": 6.275077206235366e-06, - "loss": 0.8675, + "learning_rate": 6.301352822470027e-06, + "loss": 0.7993, "step": 22302 }, { - "epoch": 0.6328887627695801, + "epoch": 0.6320099747796764, "grad_norm": 0.0, - "learning_rate": 6.274224285085924e-06, - "loss": 0.8924, + "learning_rate": 6.300500133239974e-06, + "loss": 0.8666, "step": 22303 }, { - "epoch": 0.6329171396140749, + "epoch": 0.6320383122219388, "grad_norm": 0.0, - "learning_rate": 6.273371395408276e-06, - "loss": 0.7766, + "learning_rate": 6.299647475172178e-06, + "loss": 0.8737, "step": 22304 }, { - "epoch": 0.6329455164585698, + "epoch": 0.6320666496642013, "grad_norm": 0.0, - "learning_rate": 6.2725185372096266e-06, - "loss": 0.8461, + "learning_rate": 6.298794848273826e-06, + "loss": 0.7951, "step": 22305 }, { - "epoch": 0.6329738933030647, + "epoch": 0.6320949871064637, "grad_norm": 0.0, - "learning_rate": 6.271665710497182e-06, - "loss": 0.851, + "learning_rate": 6.297942252552101e-06, + "loss": 0.7897, "step": 22306 }, { - "epoch": 0.6330022701475596, + "epoch": 0.6321233245487262, "grad_norm": 0.0, - "learning_rate": 6.270812915278145e-06, - "loss": 0.9565, + "learning_rate": 6.2970896880141794e-06, + "loss": 0.9289, "step": 22307 }, { - "epoch": 0.6330306469920545, + "epoch": 0.6321516619909887, "grad_norm": 0.0, - "learning_rate": 6.269960151559718e-06, - "loss": 0.8432, + "learning_rate": 6.296237154667253e-06, + "loss": 0.7888, "step": 22308 }, { - "epoch": 0.6330590238365493, + "epoch": 0.6321799994332512, "grad_norm": 0.0, - "learning_rate": 6.269107419349105e-06, - "loss": 0.8237, + "learning_rate": 6.295384652518491e-06, + "loss": 0.8324, "step": 22309 }, { - "epoch": 0.6330874006810443, + "epoch": 0.6322083368755136, "grad_norm": 0.0, - "learning_rate": 6.2682547186535105e-06, - "loss": 0.9132, + "learning_rate": 6.29453218157508e-06, + "loss": 0.9084, "step": 22310 }, { - "epoch": 0.6331157775255392, + "epoch": 0.6322366743177761, "grad_norm": 0.0, - "learning_rate": 6.2674020494801335e-06, - "loss": 0.9189, + "learning_rate": 6.293679741844205e-06, + "loss": 0.7559, "step": 22311 }, { - "epoch": 0.633144154370034, + "epoch": 0.6322650117600386, "grad_norm": 0.0, - "learning_rate": 6.266549411836182e-06, - "loss": 0.8053, + "learning_rate": 6.292827333333037e-06, + "loss": 0.9102, "step": 22312 }, { - "epoch": 0.633172531214529, + "epoch": 0.632293349202301, "grad_norm": 0.0, - "learning_rate": 6.265696805728856e-06, - "loss": 0.8555, + "learning_rate": 6.2919749560487606e-06, + "loss": 0.871, "step": 22313 }, { - "epoch": 0.6332009080590238, + "epoch": 0.6323216866445635, "grad_norm": 0.0, - "learning_rate": 6.264844231165354e-06, - "loss": 0.7873, + "learning_rate": 6.291122609998559e-06, + "loss": 0.7963, "step": 22314 }, { - "epoch": 0.6332292849035187, + "epoch": 0.632350024086826, "grad_norm": 0.0, - "learning_rate": 6.263991688152881e-06, - "loss": 0.8676, + "learning_rate": 6.290270295189607e-06, + "loss": 0.8168, "step": 22315 }, { - "epoch": 0.6332576617480136, + "epoch": 0.6323783615290884, "grad_norm": 0.0, - "learning_rate": 6.263139176698638e-06, - "loss": 0.9449, + "learning_rate": 6.289418011629089e-06, + "loss": 0.8248, "step": 22316 }, { - "epoch": 0.6332860385925085, + "epoch": 0.6324066989713508, "grad_norm": 0.0, - "learning_rate": 6.262286696809827e-06, - "loss": 0.7717, + "learning_rate": 6.288565759324179e-06, + "loss": 0.9098, "step": 22317 }, { - "epoch": 0.6333144154370034, + "epoch": 0.6324350364136133, "grad_norm": 0.0, - "learning_rate": 6.26143424849365e-06, - "loss": 0.9086, + "learning_rate": 6.287713538282057e-06, + "loss": 0.8451, "step": 22318 }, { - "epoch": 0.6333427922814983, + "epoch": 0.6324633738558758, "grad_norm": 0.0, - "learning_rate": 6.260581831757304e-06, - "loss": 0.9547, + "learning_rate": 6.286861348509903e-06, + "loss": 0.9047, "step": 22319 }, { - "epoch": 0.6333711691259932, + "epoch": 0.6324917112981382, "grad_norm": 0.0, - "learning_rate": 6.259729446607994e-06, - "loss": 0.8962, + "learning_rate": 6.2860091900148935e-06, + "loss": 0.8292, "step": 22320 }, { - "epoch": 0.6333995459704881, + "epoch": 0.6325200487404007, "grad_norm": 0.0, - "learning_rate": 6.258877093052915e-06, - "loss": 0.9138, + "learning_rate": 6.285157062804213e-06, + "loss": 0.8363, "step": 22321 }, { - "epoch": 0.633427922814983, + "epoch": 0.6325483861826632, "grad_norm": 0.0, - "learning_rate": 6.258024771099269e-06, - "loss": 0.8507, + "learning_rate": 6.284304966885027e-06, + "loss": 0.8997, "step": 22322 }, { - "epoch": 0.6334562996594779, + "epoch": 0.6325767236249256, "grad_norm": 0.0, - "learning_rate": 6.257172480754259e-06, - "loss": 0.8559, + "learning_rate": 6.283452902264522e-06, + "loss": 0.8425, "step": 22323 }, { - "epoch": 0.6334846765039728, + "epoch": 0.6326050610671881, "grad_norm": 0.0, - "learning_rate": 6.256320222025079e-06, - "loss": 0.953, + "learning_rate": 6.282600868949877e-06, + "loss": 0.8928, "step": 22324 }, { - "epoch": 0.6335130533484676, + "epoch": 0.6326333985094506, "grad_norm": 0.0, - "learning_rate": 6.25546799491893e-06, - "loss": 0.91, + "learning_rate": 6.281748866948262e-06, + "loss": 0.9257, "step": 22325 }, { - "epoch": 0.6335414301929625, + "epoch": 0.632661735951713, "grad_norm": 0.0, - "learning_rate": 6.254615799443013e-06, - "loss": 0.8343, + "learning_rate": 6.280896896266857e-06, + "loss": 0.7826, "step": 22326 }, { - "epoch": 0.6335698070374575, + "epoch": 0.6326900733939754, "grad_norm": 0.0, - "learning_rate": 6.253763635604524e-06, - "loss": 0.9492, + "learning_rate": 6.28004495691284e-06, + "loss": 0.9311, "step": 22327 }, { - "epoch": 0.6335981838819523, + "epoch": 0.6327184108362379, "grad_norm": 0.0, - "learning_rate": 6.252911503410662e-06, - "loss": 0.828, + "learning_rate": 6.279193048893384e-06, + "loss": 0.782, "step": 22328 }, { - "epoch": 0.6336265607264472, + "epoch": 0.6327467482785004, "grad_norm": 0.0, - "learning_rate": 6.252059402868624e-06, - "loss": 0.883, + "learning_rate": 6.278341172215669e-06, + "loss": 0.8193, "step": 22329 }, { - "epoch": 0.6336549375709422, + "epoch": 0.6327750857207628, "grad_norm": 0.0, - "learning_rate": 6.2512073339856065e-06, - "loss": 0.8402, + "learning_rate": 6.277489326886866e-06, + "loss": 0.876, "step": 22330 }, { - "epoch": 0.633683314415437, + "epoch": 0.6328034231630253, "grad_norm": 0.0, - "learning_rate": 6.250355296768813e-06, - "loss": 0.8641, + "learning_rate": 6.276637512914152e-06, + "loss": 0.894, "step": 22331 }, { - "epoch": 0.6337116912599319, + "epoch": 0.6328317606052878, "grad_norm": 0.0, - "learning_rate": 6.249503291225434e-06, - "loss": 0.8705, + "learning_rate": 6.2757857303047055e-06, + "loss": 0.9033, "step": 22332 }, { - "epoch": 0.6337400681044267, + "epoch": 0.6328600980475503, "grad_norm": 0.0, - "learning_rate": 6.248651317362673e-06, - "loss": 0.8322, + "learning_rate": 6.274933979065696e-06, + "loss": 0.9079, "step": 22333 }, { - "epoch": 0.6337684449489217, + "epoch": 0.6328884354898127, "grad_norm": 0.0, - "learning_rate": 6.247799375187721e-06, - "loss": 0.9561, + "learning_rate": 6.274082259204302e-06, + "loss": 0.7741, "step": 22334 }, { - "epoch": 0.6337968217934166, + "epoch": 0.6329167729320752, "grad_norm": 0.0, - "learning_rate": 6.246947464707778e-06, - "loss": 0.9139, + "learning_rate": 6.2732305707277e-06, + "loss": 0.7737, "step": 22335 }, { - "epoch": 0.6338251986379114, + "epoch": 0.6329451103743376, "grad_norm": 0.0, - "learning_rate": 6.246095585930034e-06, - "loss": 0.8534, + "learning_rate": 6.272378913643057e-06, + "loss": 0.9423, "step": 22336 }, { - "epoch": 0.6338535754824064, + "epoch": 0.6329734478166, "grad_norm": 0.0, - "learning_rate": 6.245243738861693e-06, - "loss": 0.8099, + "learning_rate": 6.271527287957555e-06, + "loss": 0.887, "step": 22337 }, { - "epoch": 0.6338819523269013, + "epoch": 0.6330017852588625, "grad_norm": 0.0, - "learning_rate": 6.244391923509948e-06, - "loss": 0.9053, + "learning_rate": 6.270675693678359e-06, + "loss": 0.8067, "step": 22338 }, { - "epoch": 0.6339103291713961, + "epoch": 0.633030122701125, "grad_norm": 0.0, - "learning_rate": 6.24354013988199e-06, - "loss": 0.8479, + "learning_rate": 6.269824130812645e-06, + "loss": 0.8459, "step": 22339 }, { - "epoch": 0.6339387060158911, + "epoch": 0.6330584601433874, "grad_norm": 0.0, - "learning_rate": 6.2426883879850196e-06, - "loss": 0.8303, + "learning_rate": 6.268972599367591e-06, + "loss": 0.921, "step": 22340 }, { - "epoch": 0.6339670828603859, + "epoch": 0.6330867975856499, "grad_norm": 0.0, - "learning_rate": 6.241836667826228e-06, - "loss": 0.8471, + "learning_rate": 6.268121099350364e-06, + "loss": 0.7581, "step": 22341 }, { - "epoch": 0.6339954597048808, + "epoch": 0.6331151350279124, "grad_norm": 0.0, - "learning_rate": 6.2409849794128115e-06, - "loss": 0.9632, + "learning_rate": 6.267269630768138e-06, + "loss": 0.8309, "step": 22342 }, { - "epoch": 0.6340238365493757, + "epoch": 0.6331434724701749, "grad_norm": 0.0, - "learning_rate": 6.240133322751965e-06, - "loss": 0.8482, + "learning_rate": 6.266418193628092e-06, + "loss": 0.8219, "step": 22343 }, { - "epoch": 0.6340522133938706, + "epoch": 0.6331718099124373, "grad_norm": 0.0, - "learning_rate": 6.239281697850881e-06, - "loss": 0.9033, + "learning_rate": 6.265566787937386e-06, + "loss": 0.734, "step": 22344 }, { - "epoch": 0.6340805902383655, + "epoch": 0.6332001473546998, "grad_norm": 0.0, - "learning_rate": 6.23843010471675e-06, - "loss": 0.6837, + "learning_rate": 6.264715413703201e-06, + "loss": 0.7977, "step": 22345 }, { - "epoch": 0.6341089670828604, + "epoch": 0.6332284847969623, "grad_norm": 0.0, - "learning_rate": 6.237578543356769e-06, - "loss": 0.8434, + "learning_rate": 6.263864070932702e-06, + "loss": 0.7978, "step": 22346 }, { - "epoch": 0.6341373439273553, + "epoch": 0.6332568222392246, "grad_norm": 0.0, - "learning_rate": 6.236727013778134e-06, - "loss": 0.852, + "learning_rate": 6.263012759633063e-06, + "loss": 0.8712, "step": 22347 }, { - "epoch": 0.6341657207718502, + "epoch": 0.6332851596814871, "grad_norm": 0.0, - "learning_rate": 6.235875515988037e-06, - "loss": 0.7368, + "learning_rate": 6.2621614798114615e-06, + "loss": 0.7636, "step": 22348 }, { - "epoch": 0.634194097616345, + "epoch": 0.6333134971237496, "grad_norm": 0.0, - "learning_rate": 6.235024049993665e-06, - "loss": 0.8298, + "learning_rate": 6.261310231475055e-06, + "loss": 0.898, "step": 22349 }, { - "epoch": 0.6342224744608399, + "epoch": 0.6333418345660121, "grad_norm": 0.0, - "learning_rate": 6.234172615802216e-06, - "loss": 0.699, + "learning_rate": 6.260459014631027e-06, + "loss": 0.8145, "step": 22350 }, { - "epoch": 0.6342508513053349, + "epoch": 0.6333701720082745, "grad_norm": 0.0, - "learning_rate": 6.233321213420882e-06, - "loss": 0.8694, + "learning_rate": 6.259607829286537e-06, + "loss": 0.8293, "step": 22351 }, { - "epoch": 0.6342792281498297, + "epoch": 0.633398509450537, "grad_norm": 0.0, - "learning_rate": 6.23246984285685e-06, - "loss": 0.7871, + "learning_rate": 6.258756675448759e-06, + "loss": 0.9812, "step": 22352 }, { - "epoch": 0.6343076049943246, + "epoch": 0.6334268468927995, "grad_norm": 0.0, - "learning_rate": 6.231618504117316e-06, - "loss": 0.9174, + "learning_rate": 6.257905553124864e-06, + "loss": 1.0024, "step": 22353 }, { - "epoch": 0.6343359818388196, + "epoch": 0.6334551843350619, "grad_norm": 0.0, - "learning_rate": 6.23076719720947e-06, - "loss": 0.7521, + "learning_rate": 6.257054462322019e-06, + "loss": 0.876, "step": 22354 }, { - "epoch": 0.6343643586833144, + "epoch": 0.6334835217773244, "grad_norm": 0.0, - "learning_rate": 6.229915922140502e-06, - "loss": 0.8665, + "learning_rate": 6.256203403047394e-06, + "loss": 0.7786, "step": 22355 }, { - "epoch": 0.6343927355278093, + "epoch": 0.6335118592195869, "grad_norm": 0.0, - "learning_rate": 6.2290646789176045e-06, - "loss": 0.8675, + "learning_rate": 6.255352375308162e-06, + "loss": 0.8001, "step": 22356 }, { - "epoch": 0.6344211123723043, + "epoch": 0.6335401966618494, "grad_norm": 0.0, - "learning_rate": 6.228213467547967e-06, - "loss": 0.8572, + "learning_rate": 6.254501379111483e-06, + "loss": 0.8141, "step": 22357 }, { - "epoch": 0.6344494892167991, + "epoch": 0.6335685341041117, "grad_norm": 0.0, - "learning_rate": 6.227362288038778e-06, - "loss": 0.812, + "learning_rate": 6.253650414464531e-06, + "loss": 0.6763, "step": 22358 }, { - "epoch": 0.634477866061294, + "epoch": 0.6335968715463742, "grad_norm": 0.0, - "learning_rate": 6.226511140397232e-06, - "loss": 0.868, + "learning_rate": 6.252799481374472e-06, + "loss": 0.9032, "step": 22359 }, { - "epoch": 0.6345062429057888, + "epoch": 0.6336252089886367, "grad_norm": 0.0, - "learning_rate": 6.2256600246305154e-06, - "loss": 0.868, + "learning_rate": 6.251948579848475e-06, + "loss": 0.7761, "step": 22360 }, { - "epoch": 0.6345346197502838, + "epoch": 0.6336535464308991, "grad_norm": 0.0, - "learning_rate": 6.224808940745814e-06, - "loss": 0.7796, + "learning_rate": 6.25109770989371e-06, + "loss": 0.75, "step": 22361 }, { - "epoch": 0.6345629965947787, + "epoch": 0.6336818838731616, "grad_norm": 0.0, - "learning_rate": 6.223957888750327e-06, - "loss": 0.8184, + "learning_rate": 6.250246871517338e-06, + "loss": 0.889, "step": 22362 }, { - "epoch": 0.6345913734392735, + "epoch": 0.6337102213154241, "grad_norm": 0.0, - "learning_rate": 6.223106868651234e-06, - "loss": 0.8774, + "learning_rate": 6.249396064726527e-06, + "loss": 0.7396, "step": 22363 }, { - "epoch": 0.6346197502837685, + "epoch": 0.6337385587576865, "grad_norm": 0.0, - "learning_rate": 6.22225588045573e-06, - "loss": 0.8289, + "learning_rate": 6.248545289528452e-06, + "loss": 0.9053, "step": 22364 }, { - "epoch": 0.6346481271282634, + "epoch": 0.633766896199949, "grad_norm": 0.0, - "learning_rate": 6.221404924171e-06, - "loss": 0.856, + "learning_rate": 6.247694545930267e-06, + "loss": 0.8086, "step": 22365 }, { - "epoch": 0.6346765039727582, + "epoch": 0.6337952336422115, "grad_norm": 0.0, - "learning_rate": 6.22055399980423e-06, - "loss": 0.8736, + "learning_rate": 6.246843833939148e-06, + "loss": 0.8184, "step": 22366 }, { - "epoch": 0.6347048808172531, + "epoch": 0.633823571084474, "grad_norm": 0.0, - "learning_rate": 6.2197031073626126e-06, - "loss": 0.8485, + "learning_rate": 6.2459931535622554e-06, + "loss": 0.8567, "step": 22367 }, { - "epoch": 0.634733257661748, + "epoch": 0.6338519085267363, "grad_norm": 0.0, - "learning_rate": 6.218852246853333e-06, - "loss": 0.8154, + "learning_rate": 6.245142504806755e-06, + "loss": 0.7433, "step": 22368 }, { - "epoch": 0.6347616345062429, + "epoch": 0.6338802459689988, "grad_norm": 0.0, - "learning_rate": 6.218001418283577e-06, - "loss": 0.8521, + "learning_rate": 6.244291887679819e-06, + "loss": 0.8401, "step": 22369 }, { - "epoch": 0.6347900113507378, + "epoch": 0.6339085834112613, "grad_norm": 0.0, - "learning_rate": 6.217150621660532e-06, - "loss": 0.8099, + "learning_rate": 6.243441302188601e-06, + "loss": 0.7254, "step": 22370 }, { - "epoch": 0.6348183881952327, + "epoch": 0.6339369208535237, "grad_norm": 0.0, - "learning_rate": 6.21629985699139e-06, - "loss": 0.8166, + "learning_rate": 6.2425907483402735e-06, + "loss": 0.9436, "step": 22371 }, { - "epoch": 0.6348467650397276, + "epoch": 0.6339652582957862, "grad_norm": 0.0, - "learning_rate": 6.215449124283328e-06, - "loss": 0.7437, + "learning_rate": 6.241740226142002e-06, + "loss": 0.8777, "step": 22372 }, { - "epoch": 0.6348751418842224, + "epoch": 0.6339935957380487, "grad_norm": 0.0, - "learning_rate": 6.214598423543541e-06, - "loss": 0.7459, + "learning_rate": 6.240889735600943e-06, + "loss": 0.8345, "step": 22373 }, { - "epoch": 0.6349035187287174, + "epoch": 0.6340219331803112, "grad_norm": 0.0, - "learning_rate": 6.2137477547792116e-06, - "loss": 0.8632, + "learning_rate": 6.240039276724273e-06, + "loss": 0.8724, "step": 22374 }, { - "epoch": 0.6349318955732123, + "epoch": 0.6340502706225736, "grad_norm": 0.0, - "learning_rate": 6.212897117997523e-06, - "loss": 0.7712, + "learning_rate": 6.239188849519142e-06, + "loss": 0.7078, "step": 22375 }, { - "epoch": 0.6349602724177071, + "epoch": 0.6340786080648361, "grad_norm": 0.0, - "learning_rate": 6.212046513205662e-06, - "loss": 0.912, + "learning_rate": 6.2383384539927214e-06, + "loss": 0.8817, "step": 22376 }, { - "epoch": 0.634988649262202, + "epoch": 0.6341069455070986, "grad_norm": 0.0, - "learning_rate": 6.211195940410814e-06, - "loss": 0.872, + "learning_rate": 6.237488090152177e-06, + "loss": 0.8213, "step": 22377 }, { - "epoch": 0.635017026106697, + "epoch": 0.634135282949361, "grad_norm": 0.0, - "learning_rate": 6.210345399620167e-06, - "loss": 0.78, + "learning_rate": 6.2366377580046634e-06, + "loss": 0.915, "step": 22378 }, { - "epoch": 0.6350454029511918, + "epoch": 0.6341636203916234, "grad_norm": 0.0, - "learning_rate": 6.2094948908409024e-06, - "loss": 1.0054, + "learning_rate": 6.23578745755735e-06, + "loss": 0.8171, "step": 22379 }, { - "epoch": 0.6350737797956867, + "epoch": 0.6341919578338859, "grad_norm": 0.0, - "learning_rate": 6.208644414080203e-06, - "loss": 0.8244, + "learning_rate": 6.234937188817393e-06, + "loss": 0.8698, "step": 22380 }, { - "epoch": 0.6351021566401817, + "epoch": 0.6342202952761484, "grad_norm": 0.0, - "learning_rate": 6.207793969345257e-06, - "loss": 0.839, + "learning_rate": 6.234086951791959e-06, + "loss": 0.9381, "step": 22381 }, { - "epoch": 0.6351305334846765, + "epoch": 0.6342486327184108, "grad_norm": 0.0, - "learning_rate": 6.206943556643246e-06, - "loss": 0.817, + "learning_rate": 6.233236746488213e-06, + "loss": 0.8281, "step": 22382 }, { - "epoch": 0.6351589103291714, + "epoch": 0.6342769701606733, "grad_norm": 0.0, - "learning_rate": 6.20609317598135e-06, - "loss": 0.812, + "learning_rate": 6.23238657291331e-06, + "loss": 0.9281, "step": 22383 }, { - "epoch": 0.6351872871736662, + "epoch": 0.6343053076029358, "grad_norm": 0.0, - "learning_rate": 6.2052428273667576e-06, - "loss": 0.7986, + "learning_rate": 6.231536431074411e-06, + "loss": 0.6666, "step": 22384 }, { - "epoch": 0.6352156640181612, + "epoch": 0.6343336450451982, "grad_norm": 0.0, - "learning_rate": 6.204392510806652e-06, - "loss": 0.8493, + "learning_rate": 6.230686320978684e-06, + "loss": 0.8066, "step": 22385 }, { - "epoch": 0.6352440408626561, + "epoch": 0.6343619824874607, "grad_norm": 0.0, - "learning_rate": 6.203542226308208e-06, - "loss": 0.8297, + "learning_rate": 6.2298362426332825e-06, + "loss": 0.7854, "step": 22386 }, { - "epoch": 0.6352724177071509, + "epoch": 0.6343903199297232, "grad_norm": 0.0, - "learning_rate": 6.202691973878618e-06, - "loss": 0.8558, + "learning_rate": 6.228986196045377e-06, + "loss": 0.845, "step": 22387 }, { - "epoch": 0.6353007945516459, + "epoch": 0.6344186573719856, "grad_norm": 0.0, - "learning_rate": 6.201841753525059e-06, - "loss": 0.7636, + "learning_rate": 6.228136181222115e-06, + "loss": 0.8856, "step": 22388 }, { - "epoch": 0.6353291713961408, + "epoch": 0.634446994814248, "grad_norm": 0.0, - "learning_rate": 6.200991565254711e-06, - "loss": 0.8862, + "learning_rate": 6.227286198170663e-06, + "loss": 0.8857, "step": 22389 }, { - "epoch": 0.6353575482406356, + "epoch": 0.6344753322565105, "grad_norm": 0.0, - "learning_rate": 6.20014140907476e-06, - "loss": 0.9001, + "learning_rate": 6.226436246898184e-06, + "loss": 0.8398, "step": 22390 }, { - "epoch": 0.6353859250851306, + "epoch": 0.634503669698773, "grad_norm": 0.0, - "learning_rate": 6.199291284992386e-06, - "loss": 0.7754, + "learning_rate": 6.22558632741183e-06, + "loss": 0.7742, "step": 22391 }, { - "epoch": 0.6354143019296254, + "epoch": 0.6345320071410354, "grad_norm": 0.0, - "learning_rate": 6.198441193014765e-06, - "loss": 0.8879, + "learning_rate": 6.224736439718764e-06, + "loss": 0.8649, "step": 22392 }, { - "epoch": 0.6354426787741203, + "epoch": 0.6345603445832979, "grad_norm": 0.0, - "learning_rate": 6.197591133149083e-06, - "loss": 0.9203, + "learning_rate": 6.223886583826147e-06, + "loss": 0.8202, "step": 22393 }, { - "epoch": 0.6354710556186152, + "epoch": 0.6345886820255604, "grad_norm": 0.0, - "learning_rate": 6.196741105402524e-06, - "loss": 0.8558, + "learning_rate": 6.223036759741133e-06, + "loss": 0.7968, "step": 22394 }, { - "epoch": 0.6354994324631101, + "epoch": 0.6346170194678228, "grad_norm": 0.0, - "learning_rate": 6.195891109782261e-06, - "loss": 0.8079, + "learning_rate": 6.222186967470888e-06, + "loss": 0.9445, "step": 22395 }, { - "epoch": 0.635527809307605, + "epoch": 0.6346453569100853, "grad_norm": 0.0, - "learning_rate": 6.195041146295477e-06, - "loss": 0.8329, + "learning_rate": 6.221337207022561e-06, + "loss": 0.7812, "step": 22396 }, { - "epoch": 0.6355561861520999, + "epoch": 0.6346736943523478, "grad_norm": 0.0, - "learning_rate": 6.194191214949354e-06, - "loss": 0.7489, + "learning_rate": 6.220487478403313e-06, + "loss": 0.8654, "step": 22397 }, { - "epoch": 0.6355845629965948, + "epoch": 0.6347020317946103, "grad_norm": 0.0, - "learning_rate": 6.193341315751067e-06, - "loss": 0.8001, + "learning_rate": 6.2196377816203045e-06, + "loss": 0.7878, "step": 22398 }, { - "epoch": 0.6356129398410897, + "epoch": 0.6347303692368726, "grad_norm": 0.0, - "learning_rate": 6.192491448707796e-06, - "loss": 0.8766, + "learning_rate": 6.218788116680689e-06, + "loss": 0.9779, "step": 22399 }, { - "epoch": 0.6356413166855845, + "epoch": 0.6347587066791351, "grad_norm": 0.0, - "learning_rate": 6.191641613826723e-06, - "loss": 0.7907, + "learning_rate": 6.2179384835916256e-06, + "loss": 0.7271, "step": 22400 }, { - "epoch": 0.6356696935300794, + "epoch": 0.6347870441213976, "grad_norm": 0.0, - "learning_rate": 6.190791811115025e-06, - "loss": 0.8657, + "learning_rate": 6.217088882360274e-06, + "loss": 0.8263, "step": 22401 }, { - "epoch": 0.6356980703745744, + "epoch": 0.63481538156366, "grad_norm": 0.0, - "learning_rate": 6.189942040579879e-06, - "loss": 0.8439, + "learning_rate": 6.216239312993783e-06, + "loss": 0.7711, "step": 22402 }, { - "epoch": 0.6357264472190692, + "epoch": 0.6348437190059225, "grad_norm": 0.0, - "learning_rate": 6.189092302228461e-06, - "loss": 0.9291, + "learning_rate": 6.215389775499319e-06, + "loss": 0.8097, "step": 22403 }, { - "epoch": 0.6357548240635641, + "epoch": 0.634872056448185, "grad_norm": 0.0, - "learning_rate": 6.188242596067955e-06, - "loss": 0.8246, + "learning_rate": 6.214540269884026e-06, + "loss": 0.8271, "step": 22404 }, { - "epoch": 0.6357832009080591, + "epoch": 0.6349003938904475, "grad_norm": 0.0, - "learning_rate": 6.187392922105535e-06, - "loss": 0.8795, + "learning_rate": 6.2136907961550676e-06, + "loss": 0.8895, "step": 22405 }, { - "epoch": 0.6358115777525539, + "epoch": 0.6349287313327099, "grad_norm": 0.0, - "learning_rate": 6.1865432803483765e-06, - "loss": 0.8298, + "learning_rate": 6.212841354319603e-06, + "loss": 0.855, "step": 22406 }, { - "epoch": 0.6358399545970488, + "epoch": 0.6349570687749724, "grad_norm": 0.0, - "learning_rate": 6.185693670803654e-06, - "loss": 0.8887, + "learning_rate": 6.211991944384776e-06, + "loss": 0.8771, "step": 22407 }, { - "epoch": 0.6358683314415438, + "epoch": 0.6349854062172349, "grad_norm": 0.0, - "learning_rate": 6.184844093478554e-06, - "loss": 0.861, + "learning_rate": 6.211142566357753e-06, + "loss": 0.8764, "step": 22408 }, { - "epoch": 0.6358967082860386, + "epoch": 0.6350137436594973, "grad_norm": 0.0, - "learning_rate": 6.183994548380249e-06, - "loss": 0.8917, + "learning_rate": 6.210293220245678e-06, + "loss": 0.8748, "step": 22409 }, { - "epoch": 0.6359250851305335, + "epoch": 0.6350420811017597, "grad_norm": 0.0, - "learning_rate": 6.183145035515909e-06, - "loss": 0.7939, + "learning_rate": 6.20944390605571e-06, + "loss": 0.8628, "step": 22410 }, { - "epoch": 0.6359534619750283, + "epoch": 0.6350704185440222, "grad_norm": 0.0, - "learning_rate": 6.182295554892719e-06, - "loss": 0.9211, + "learning_rate": 6.208594623795007e-06, + "loss": 0.7544, "step": 22411 }, { - "epoch": 0.6359818388195233, + "epoch": 0.6350987559862846, "grad_norm": 0.0, - "learning_rate": 6.18144610651785e-06, - "loss": 0.8679, + "learning_rate": 6.207745373470717e-06, + "loss": 0.9001, "step": 22412 }, { - "epoch": 0.6360102156640182, + "epoch": 0.6351270934285471, "grad_norm": 0.0, - "learning_rate": 6.180596690398474e-06, - "loss": 0.7776, + "learning_rate": 6.206896155089995e-06, + "loss": 0.8413, "step": 22413 }, { - "epoch": 0.636038592508513, + "epoch": 0.6351554308708096, "grad_norm": 0.0, - "learning_rate": 6.179747306541771e-06, - "loss": 0.8197, + "learning_rate": 6.20604696866e-06, + "loss": 0.9223, "step": 22414 }, { - "epoch": 0.636066969353008, + "epoch": 0.6351837683130721, "grad_norm": 0.0, - "learning_rate": 6.1788979549549155e-06, - "loss": 0.8372, + "learning_rate": 6.2051978141878755e-06, + "loss": 0.8862, "step": 22415 }, { - "epoch": 0.6360953461975029, + "epoch": 0.6352121057553345, "grad_norm": 0.0, - "learning_rate": 6.178048635645078e-06, - "loss": 0.7858, + "learning_rate": 6.204348691680781e-06, + "loss": 0.9129, "step": 22416 }, { - "epoch": 0.6361237230419977, + "epoch": 0.635240443197597, "grad_norm": 0.0, - "learning_rate": 6.177199348619438e-06, - "loss": 0.9256, + "learning_rate": 6.203499601145867e-06, + "loss": 0.8947, "step": 22417 }, { - "epoch": 0.6361520998864926, + "epoch": 0.6352687806398595, "grad_norm": 0.0, - "learning_rate": 6.176350093885166e-06, - "loss": 1.0205, + "learning_rate": 6.202650542590284e-06, + "loss": 0.7961, "step": 22418 }, { - "epoch": 0.6361804767309875, + "epoch": 0.6352971180821219, "grad_norm": 0.0, - "learning_rate": 6.1755008714494356e-06, - "loss": 0.8804, + "learning_rate": 6.20180151602119e-06, + "loss": 0.9257, "step": 22419 }, { - "epoch": 0.6362088535754824, + "epoch": 0.6353254555243844, "grad_norm": 0.0, - "learning_rate": 6.174651681319424e-06, - "loss": 0.9155, + "learning_rate": 6.200952521445728e-06, + "loss": 0.8748, "step": 22420 }, { - "epoch": 0.6362372304199773, + "epoch": 0.6353537929666468, "grad_norm": 0.0, - "learning_rate": 6.1738025235023e-06, - "loss": 0.8161, + "learning_rate": 6.200103558871054e-06, + "loss": 0.8901, "step": 22421 }, { - "epoch": 0.6362656072644722, + "epoch": 0.6353821304089093, "grad_norm": 0.0, - "learning_rate": 6.172953398005234e-06, - "loss": 0.8632, + "learning_rate": 6.199254628304323e-06, + "loss": 0.8132, "step": 22422 }, { - "epoch": 0.6362939841089671, + "epoch": 0.6354104678511717, "grad_norm": 0.0, - "learning_rate": 6.1721043048354075e-06, - "loss": 0.807, + "learning_rate": 6.198405729752677e-06, + "loss": 0.8784, "step": 22423 }, { - "epoch": 0.636322360953462, + "epoch": 0.6354388052934342, "grad_norm": 0.0, - "learning_rate": 6.171255243999988e-06, - "loss": 0.8704, + "learning_rate": 6.197556863223273e-06, + "loss": 0.8747, "step": 22424 }, { - "epoch": 0.6363507377979569, + "epoch": 0.6354671427356967, "grad_norm": 0.0, - "learning_rate": 6.170406215506146e-06, - "loss": 0.8918, + "learning_rate": 6.196708028723257e-06, + "loss": 0.8694, "step": 22425 }, { - "epoch": 0.6363791146424518, + "epoch": 0.6354954801779591, "grad_norm": 0.0, - "learning_rate": 6.169557219361056e-06, - "loss": 0.8048, + "learning_rate": 6.195859226259784e-06, + "loss": 0.9125, "step": 22426 }, { - "epoch": 0.6364074914869466, + "epoch": 0.6355238176202216, "grad_norm": 0.0, - "learning_rate": 6.168708255571887e-06, - "loss": 0.7415, + "learning_rate": 6.195010455840003e-06, + "loss": 0.9148, "step": 22427 }, { - "epoch": 0.6364358683314415, + "epoch": 0.6355521550624841, "grad_norm": 0.0, - "learning_rate": 6.167859324145813e-06, - "loss": 0.8584, + "learning_rate": 6.194161717471059e-06, + "loss": 0.9079, "step": 22428 }, { - "epoch": 0.6364642451759365, + "epoch": 0.6355804925047466, "grad_norm": 0.0, - "learning_rate": 6.1670104250900045e-06, - "loss": 0.8141, + "learning_rate": 6.193313011160104e-06, + "loss": 0.8843, "step": 22429 }, { - "epoch": 0.6364926220204313, + "epoch": 0.635608829947009, "grad_norm": 0.0, - "learning_rate": 6.1661615584116276e-06, - "loss": 0.8336, + "learning_rate": 6.192464336914289e-06, + "loss": 0.8386, "step": 22430 }, { - "epoch": 0.6365209988649262, + "epoch": 0.6356371673892715, "grad_norm": 0.0, - "learning_rate": 6.1653127241178605e-06, - "loss": 0.7585, + "learning_rate": 6.191615694740758e-06, + "loss": 0.8441, "step": 22431 }, { - "epoch": 0.6365493757094212, + "epoch": 0.635665504831534, "grad_norm": 0.0, - "learning_rate": 6.164463922215867e-06, - "loss": 0.7613, + "learning_rate": 6.190767084646667e-06, + "loss": 0.7898, "step": 22432 }, { - "epoch": 0.636577752553916, + "epoch": 0.6356938422737963, "grad_norm": 0.0, - "learning_rate": 6.1636151527128185e-06, - "loss": 0.8752, + "learning_rate": 6.1899185066391565e-06, + "loss": 0.8814, "step": 22433 }, { - "epoch": 0.6366061293984109, + "epoch": 0.6357221797160588, "grad_norm": 0.0, - "learning_rate": 6.162766415615887e-06, - "loss": 0.7701, + "learning_rate": 6.189069960725375e-06, + "loss": 0.8246, "step": 22434 }, { - "epoch": 0.6366345062429057, + "epoch": 0.6357505171583213, "grad_norm": 0.0, - "learning_rate": 6.1619177109322406e-06, - "loss": 0.7826, + "learning_rate": 6.188221446912478e-06, + "loss": 0.8047, "step": 22435 }, { - "epoch": 0.6366628830874007, + "epoch": 0.6357788546005837, "grad_norm": 0.0, - "learning_rate": 6.161069038669045e-06, - "loss": 0.8583, + "learning_rate": 6.187372965207603e-06, + "loss": 0.8742, "step": 22436 }, { - "epoch": 0.6366912599318956, + "epoch": 0.6358071920428462, "grad_norm": 0.0, - "learning_rate": 6.160220398833474e-06, - "loss": 0.7549, + "learning_rate": 6.186524515617902e-06, + "loss": 0.905, "step": 22437 }, { - "epoch": 0.6367196367763904, + "epoch": 0.6358355294851087, "grad_norm": 0.0, - "learning_rate": 6.15937179143269e-06, - "loss": 0.835, + "learning_rate": 6.1856760981505205e-06, + "loss": 0.8189, "step": 22438 }, { - "epoch": 0.6367480136208854, + "epoch": 0.6358638669273712, "grad_norm": 0.0, - "learning_rate": 6.158523216473868e-06, - "loss": 0.8389, + "learning_rate": 6.184827712812605e-06, + "loss": 0.7761, "step": 22439 }, { - "epoch": 0.6367763904653803, + "epoch": 0.6358922043696336, "grad_norm": 0.0, - "learning_rate": 6.157674673964174e-06, - "loss": 0.8163, + "learning_rate": 6.183979359611308e-06, + "loss": 0.8152, "step": 22440 }, { - "epoch": 0.6368047673098751, + "epoch": 0.6359205418118961, "grad_norm": 0.0, - "learning_rate": 6.156826163910774e-06, - "loss": 0.8471, + "learning_rate": 6.183131038553763e-06, + "loss": 0.8253, "step": 22441 }, { - "epoch": 0.63683314415437, + "epoch": 0.6359488792541586, "grad_norm": 0.0, - "learning_rate": 6.155977686320837e-06, - "loss": 0.8275, + "learning_rate": 6.182282749647124e-06, + "loss": 0.8146, "step": 22442 }, { - "epoch": 0.636861520998865, + "epoch": 0.6359772166964209, "grad_norm": 0.0, - "learning_rate": 6.15512924120153e-06, - "loss": 0.8338, + "learning_rate": 6.181434492898537e-06, + "loss": 0.7491, "step": 22443 }, { - "epoch": 0.6368898978433598, + "epoch": 0.6360055541386834, "grad_norm": 0.0, - "learning_rate": 6.154280828560017e-06, - "loss": 0.8736, + "learning_rate": 6.180586268315144e-06, + "loss": 0.7813, "step": 22444 }, { - "epoch": 0.6369182746878547, + "epoch": 0.6360338915809459, "grad_norm": 0.0, - "learning_rate": 6.1534324484034666e-06, - "loss": 0.8416, + "learning_rate": 6.179738075904095e-06, + "loss": 0.7823, "step": 22445 }, { - "epoch": 0.6369466515323496, + "epoch": 0.6360622290232084, "grad_norm": 0.0, - "learning_rate": 6.152584100739048e-06, - "loss": 0.7876, + "learning_rate": 6.178889915672526e-06, + "loss": 0.8586, "step": 22446 }, { - "epoch": 0.6369750283768445, + "epoch": 0.6360905664654708, "grad_norm": 0.0, - "learning_rate": 6.151735785573921e-06, - "loss": 0.8611, + "learning_rate": 6.178041787627587e-06, + "loss": 0.7582, "step": 22447 }, { - "epoch": 0.6370034052213394, + "epoch": 0.6361189039077333, "grad_norm": 0.0, - "learning_rate": 6.150887502915257e-06, - "loss": 0.8289, + "learning_rate": 6.177193691776424e-06, + "loss": 0.8793, "step": 22448 }, { - "epoch": 0.6370317820658343, + "epoch": 0.6361472413499958, "grad_norm": 0.0, - "learning_rate": 6.150039252770219e-06, - "loss": 0.8385, + "learning_rate": 6.176345628126176e-06, + "loss": 0.8223, "step": 22449 }, { - "epoch": 0.6370601589103292, + "epoch": 0.6361755787922582, "grad_norm": 0.0, - "learning_rate": 6.149191035145972e-06, - "loss": 0.8476, + "learning_rate": 6.175497596683988e-06, + "loss": 0.8041, "step": 22450 }, { - "epoch": 0.637088535754824, + "epoch": 0.6362039162345207, "grad_norm": 0.0, - "learning_rate": 6.148342850049682e-06, - "loss": 0.7965, + "learning_rate": 6.174649597457005e-06, + "loss": 0.8164, "step": 22451 }, { - "epoch": 0.6371169125993189, + "epoch": 0.6362322536767832, "grad_norm": 0.0, - "learning_rate": 6.147494697488514e-06, - "loss": 0.8331, + "learning_rate": 6.1738016304523675e-06, + "loss": 0.9121, "step": 22452 }, { - "epoch": 0.6371452894438139, + "epoch": 0.6362605911190456, "grad_norm": 0.0, - "learning_rate": 6.146646577469626e-06, - "loss": 0.8691, + "learning_rate": 6.172953695677224e-06, + "loss": 0.8694, "step": 22453 }, { - "epoch": 0.6371736662883087, + "epoch": 0.636288928561308, "grad_norm": 0.0, - "learning_rate": 6.145798490000191e-06, - "loss": 0.8051, + "learning_rate": 6.1721057931387075e-06, + "loss": 0.9329, "step": 22454 }, { - "epoch": 0.6372020431328036, + "epoch": 0.6363172660035705, "grad_norm": 0.0, - "learning_rate": 6.144950435087371e-06, - "loss": 0.7708, + "learning_rate": 6.171257922843968e-06, + "loss": 0.8648, "step": 22455 }, { - "epoch": 0.6372304199772986, + "epoch": 0.636345603445833, "grad_norm": 0.0, - "learning_rate": 6.144102412738327e-06, - "loss": 0.953, + "learning_rate": 6.1704100848001446e-06, + "loss": 0.8448, "step": 22456 }, { - "epoch": 0.6372587968217934, + "epoch": 0.6363739408880954, "grad_norm": 0.0, - "learning_rate": 6.143254422960223e-06, - "loss": 0.8967, + "learning_rate": 6.169562279014376e-06, + "loss": 1.0143, "step": 22457 }, { - "epoch": 0.6372871736662883, + "epoch": 0.6364022783303579, "grad_norm": 0.0, - "learning_rate": 6.142406465760223e-06, - "loss": 0.8049, + "learning_rate": 6.1687145054938095e-06, + "loss": 0.8163, "step": 22458 }, { - "epoch": 0.6373155505107831, + "epoch": 0.6364306157726204, "grad_norm": 0.0, - "learning_rate": 6.141558541145489e-06, - "loss": 0.7628, + "learning_rate": 6.167866764245586e-06, + "loss": 0.8019, "step": 22459 }, { - "epoch": 0.6373439273552781, + "epoch": 0.6364589532148828, "grad_norm": 0.0, - "learning_rate": 6.140710649123183e-06, - "loss": 0.8617, + "learning_rate": 6.16701905527684e-06, + "loss": 0.8824, "step": 22460 }, { - "epoch": 0.637372304199773, + "epoch": 0.6364872906571453, "grad_norm": 0.0, - "learning_rate": 6.139862789700468e-06, - "loss": 0.9392, + "learning_rate": 6.16617137859472e-06, + "loss": 0.7146, "step": 22461 }, { - "epoch": 0.6374006810442678, + "epoch": 0.6365156280994078, "grad_norm": 0.0, - "learning_rate": 6.139014962884506e-06, - "loss": 0.8982, + "learning_rate": 6.1653237342063575e-06, + "loss": 0.9413, "step": 22462 }, { - "epoch": 0.6374290578887628, + "epoch": 0.6365439655416703, "grad_norm": 0.0, - "learning_rate": 6.138167168682456e-06, - "loss": 0.781, + "learning_rate": 6.1644761221188984e-06, + "loss": 0.7882, "step": 22463 }, { - "epoch": 0.6374574347332577, + "epoch": 0.6365723029839326, "grad_norm": 0.0, - "learning_rate": 6.137319407101484e-06, - "loss": 0.9218, + "learning_rate": 6.163628542339482e-06, + "loss": 0.8656, "step": 22464 }, { - "epoch": 0.6374858115777525, + "epoch": 0.6366006404261951, "grad_norm": 0.0, - "learning_rate": 6.136471678148748e-06, - "loss": 0.7121, + "learning_rate": 6.162780994875246e-06, + "loss": 0.9232, "step": 22465 }, { - "epoch": 0.6375141884222475, + "epoch": 0.6366289778684576, "grad_norm": 0.0, - "learning_rate": 6.1356239818314075e-06, - "loss": 0.8858, + "learning_rate": 6.161933479733333e-06, + "loss": 0.9099, "step": 22466 }, { - "epoch": 0.6375425652667424, + "epoch": 0.63665731531072, "grad_norm": 0.0, - "learning_rate": 6.134776318156627e-06, - "loss": 0.8281, + "learning_rate": 6.161085996920877e-06, + "loss": 0.7876, "step": 22467 }, { - "epoch": 0.6375709421112372, + "epoch": 0.6366856527529825, "grad_norm": 0.0, - "learning_rate": 6.1339286871315606e-06, - "loss": 0.8412, + "learning_rate": 6.160238546445019e-06, + "loss": 0.7876, "step": 22468 }, { - "epoch": 0.6375993189557321, + "epoch": 0.636713990195245, "grad_norm": 0.0, - "learning_rate": 6.133081088763377e-06, - "loss": 0.8727, + "learning_rate": 6.159391128312899e-06, + "loss": 0.8031, "step": 22469 }, { - "epoch": 0.637627695800227, + "epoch": 0.6367423276375075, "grad_norm": 0.0, - "learning_rate": 6.132233523059232e-06, - "loss": 0.9073, + "learning_rate": 6.158543742531652e-06, + "loss": 0.8878, "step": 22470 }, { - "epoch": 0.6376560726447219, + "epoch": 0.6367706650797699, "grad_norm": 0.0, - "learning_rate": 6.1313859900262805e-06, - "loss": 0.8202, + "learning_rate": 6.1576963891084175e-06, + "loss": 0.9208, "step": 22471 }, { - "epoch": 0.6376844494892168, + "epoch": 0.6367990025220324, "grad_norm": 0.0, - "learning_rate": 6.130538489671689e-06, - "loss": 0.851, + "learning_rate": 6.156849068050336e-06, + "loss": 0.8974, "step": 22472 }, { - "epoch": 0.6377128263337117, + "epoch": 0.6368273399642949, "grad_norm": 0.0, - "learning_rate": 6.12969102200261e-06, - "loss": 0.7993, + "learning_rate": 6.15600177936454e-06, + "loss": 0.7932, "step": 22473 }, { - "epoch": 0.6377412031782066, + "epoch": 0.6368556774065572, "grad_norm": 0.0, - "learning_rate": 6.128843587026207e-06, - "loss": 0.8268, + "learning_rate": 6.155154523058172e-06, + "loss": 0.8085, "step": 22474 }, { - "epoch": 0.6377695800227015, + "epoch": 0.6368840148488197, "grad_norm": 0.0, - "learning_rate": 6.127996184749636e-06, - "loss": 0.8143, + "learning_rate": 6.15430729913836e-06, + "loss": 0.8072, "step": 22475 }, { - "epoch": 0.6377979568671963, + "epoch": 0.6369123522910822, "grad_norm": 0.0, - "learning_rate": 6.127148815180055e-06, - "loss": 0.7884, + "learning_rate": 6.153460107612248e-06, + "loss": 0.8309, "step": 22476 }, { - "epoch": 0.6378263337116913, + "epoch": 0.6369406897333447, "grad_norm": 0.0, - "learning_rate": 6.126301478324621e-06, - "loss": 0.8048, + "learning_rate": 6.152612948486969e-06, + "loss": 0.905, "step": 22477 }, { - "epoch": 0.6378547105561861, + "epoch": 0.6369690271756071, "grad_norm": 0.0, - "learning_rate": 6.125454174190493e-06, - "loss": 0.8667, + "learning_rate": 6.1517658217696596e-06, + "loss": 0.8017, "step": 22478 }, { - "epoch": 0.637883087400681, + "epoch": 0.6369973646178696, "grad_norm": 0.0, - "learning_rate": 6.124606902784829e-06, - "loss": 0.7983, + "learning_rate": 6.150918727467455e-06, + "loss": 0.9927, "step": 22479 }, { - "epoch": 0.637911464245176, + "epoch": 0.6370257020601321, "grad_norm": 0.0, - "learning_rate": 6.123759664114783e-06, - "loss": 0.8739, + "learning_rate": 6.150071665587496e-06, + "loss": 0.8134, "step": 22480 }, { - "epoch": 0.6379398410896708, + "epoch": 0.6370540395023945, "grad_norm": 0.0, - "learning_rate": 6.122912458187516e-06, - "loss": 0.7595, + "learning_rate": 6.149224636136908e-06, + "loss": 0.8559, "step": 22481 }, { - "epoch": 0.6379682179341657, + "epoch": 0.637082376944657, "grad_norm": 0.0, - "learning_rate": 6.122065285010179e-06, - "loss": 0.9049, + "learning_rate": 6.1483776391228335e-06, + "loss": 0.8978, "step": 22482 }, { - "epoch": 0.6379965947786607, + "epoch": 0.6371107143869195, "grad_norm": 0.0, - "learning_rate": 6.121218144589931e-06, - "loss": 0.7959, + "learning_rate": 6.147530674552402e-06, + "loss": 0.808, "step": 22483 }, { - "epoch": 0.6380249716231555, + "epoch": 0.6371390518291818, "grad_norm": 0.0, - "learning_rate": 6.120371036933928e-06, - "loss": 0.7564, + "learning_rate": 6.1466837424327505e-06, + "loss": 0.7672, "step": 22484 }, { - "epoch": 0.6380533484676504, + "epoch": 0.6371673892714443, "grad_norm": 0.0, - "learning_rate": 6.119523962049323e-06, - "loss": 0.8603, + "learning_rate": 6.145836842771018e-06, + "loss": 0.8511, "step": 22485 }, { - "epoch": 0.6380817253121452, + "epoch": 0.6371957267137068, "grad_norm": 0.0, - "learning_rate": 6.118676919943277e-06, - "loss": 0.7702, + "learning_rate": 6.144989975574327e-06, + "loss": 0.9761, "step": 22486 }, { - "epoch": 0.6381101021566402, + "epoch": 0.6372240641559693, "grad_norm": 0.0, - "learning_rate": 6.117829910622942e-06, - "loss": 0.7907, + "learning_rate": 6.1441431408498175e-06, + "loss": 0.8099, "step": 22487 }, { - "epoch": 0.6381384790011351, + "epoch": 0.6372524015982317, "grad_norm": 0.0, - "learning_rate": 6.11698293409547e-06, - "loss": 0.8261, + "learning_rate": 6.143296338604626e-06, + "loss": 0.7717, "step": 22488 }, { - "epoch": 0.6381668558456299, + "epoch": 0.6372807390404942, "grad_norm": 0.0, - "learning_rate": 6.11613599036802e-06, - "loss": 0.8398, + "learning_rate": 6.1424495688458785e-06, + "loss": 0.8074, "step": 22489 }, { - "epoch": 0.6381952326901249, + "epoch": 0.6373090764827567, "grad_norm": 0.0, - "learning_rate": 6.115289079447743e-06, - "loss": 0.8611, + "learning_rate": 6.141602831580712e-06, + "loss": 0.8446, "step": 22490 }, { - "epoch": 0.6382236095346198, + "epoch": 0.6373374139250191, "grad_norm": 0.0, - "learning_rate": 6.114442201341792e-06, - "loss": 0.8115, + "learning_rate": 6.140756126816256e-06, + "loss": 0.8508, "step": 22491 }, { - "epoch": 0.6382519863791146, + "epoch": 0.6373657513672816, "grad_norm": 0.0, - "learning_rate": 6.1135953560573245e-06, - "loss": 0.8109, + "learning_rate": 6.139909454559644e-06, + "loss": 0.7495, "step": 22492 }, { - "epoch": 0.6382803632236095, + "epoch": 0.6373940888095441, "grad_norm": 0.0, - "learning_rate": 6.112748543601492e-06, - "loss": 0.8164, + "learning_rate": 6.139062814818012e-06, + "loss": 0.8856, "step": 22493 }, { - "epoch": 0.6383087400681045, + "epoch": 0.6374224262518066, "grad_norm": 0.0, - "learning_rate": 6.111901763981445e-06, - "loss": 0.8705, + "learning_rate": 6.138216207598484e-06, + "loss": 0.8185, "step": 22494 }, { - "epoch": 0.6383371169125993, + "epoch": 0.637450763694069, "grad_norm": 0.0, - "learning_rate": 6.11105501720434e-06, - "loss": 0.801, + "learning_rate": 6.137369632908197e-06, + "loss": 0.9044, "step": 22495 }, { - "epoch": 0.6383654937570942, + "epoch": 0.6374791011363314, "grad_norm": 0.0, - "learning_rate": 6.110208303277329e-06, - "loss": 0.8068, + "learning_rate": 6.136523090754277e-06, + "loss": 0.8935, "step": 22496 }, { - "epoch": 0.6383938706015891, + "epoch": 0.6375074385785939, "grad_norm": 0.0, - "learning_rate": 6.109361622207561e-06, - "loss": 0.8074, + "learning_rate": 6.135676581143859e-06, + "loss": 0.7762, "step": 22497 }, { - "epoch": 0.638422247446084, + "epoch": 0.6375357760208563, "grad_norm": 0.0, - "learning_rate": 6.108514974002193e-06, - "loss": 0.8088, + "learning_rate": 6.134830104084075e-06, + "loss": 0.9546, "step": 22498 }, { - "epoch": 0.6384506242905789, + "epoch": 0.6375641134631188, "grad_norm": 0.0, - "learning_rate": 6.107668358668368e-06, - "loss": 0.8864, + "learning_rate": 6.133983659582048e-06, + "loss": 0.8586, "step": 22499 }, { - "epoch": 0.6384790011350738, + "epoch": 0.6375924509053813, "grad_norm": 0.0, - "learning_rate": 6.10682177621325e-06, - "loss": 0.7859, + "learning_rate": 6.133137247644914e-06, + "loss": 0.7573, "step": 22500 }, { - "epoch": 0.6385073779795687, + "epoch": 0.6376207883476438, "grad_norm": 0.0, - "learning_rate": 6.105975226643979e-06, - "loss": 0.9326, + "learning_rate": 6.132290868279803e-06, + "loss": 0.7776, "step": 22501 }, { - "epoch": 0.6385357548240636, + "epoch": 0.6376491257899062, "grad_norm": 0.0, - "learning_rate": 6.105128709967714e-06, - "loss": 0.8448, + "learning_rate": 6.131444521493839e-06, + "loss": 0.899, "step": 22502 }, { - "epoch": 0.6385641316685584, + "epoch": 0.6376774632321687, "grad_norm": 0.0, - "learning_rate": 6.104282226191601e-06, - "loss": 0.7061, + "learning_rate": 6.130598207294156e-06, + "loss": 0.8148, "step": 22503 }, { - "epoch": 0.6385925085130534, + "epoch": 0.6377058006744312, "grad_norm": 0.0, - "learning_rate": 6.1034357753227904e-06, - "loss": 0.7853, + "learning_rate": 6.1297519256878815e-06, + "loss": 0.8305, "step": 22504 }, { - "epoch": 0.6386208853575482, + "epoch": 0.6377341381166935, "grad_norm": 0.0, - "learning_rate": 6.102589357368434e-06, - "loss": 0.7807, + "learning_rate": 6.128905676682141e-06, + "loss": 0.846, "step": 22505 }, { - "epoch": 0.6386492622020431, + "epoch": 0.637762475558956, "grad_norm": 0.0, - "learning_rate": 6.10174297233568e-06, - "loss": 0.8284, + "learning_rate": 6.12805946028407e-06, + "loss": 0.857, "step": 22506 }, { - "epoch": 0.6386776390465381, + "epoch": 0.6377908130012185, "grad_norm": 0.0, - "learning_rate": 6.1008966202316766e-06, - "loss": 0.8844, + "learning_rate": 6.127213276500789e-06, + "loss": 0.8434, "step": 22507 }, { - "epoch": 0.6387060158910329, + "epoch": 0.6378191504434809, "grad_norm": 0.0, - "learning_rate": 6.100050301063577e-06, - "loss": 0.8172, + "learning_rate": 6.126367125339428e-06, + "loss": 0.9032, "step": 22508 }, { - "epoch": 0.6387343927355278, + "epoch": 0.6378474878857434, "grad_norm": 0.0, - "learning_rate": 6.099204014838528e-06, - "loss": 0.7675, + "learning_rate": 6.125521006807116e-06, + "loss": 0.9016, "step": 22509 }, { - "epoch": 0.6387627695800226, + "epoch": 0.6378758253280059, "grad_norm": 0.0, - "learning_rate": 6.098357761563678e-06, - "loss": 0.857, + "learning_rate": 6.124674920910979e-06, + "loss": 0.8227, "step": 22510 }, { - "epoch": 0.6387911464245176, + "epoch": 0.6379041627702684, "grad_norm": 0.0, - "learning_rate": 6.097511541246173e-06, - "loss": 0.8745, + "learning_rate": 6.123828867658148e-06, + "loss": 0.8299, "step": 22511 }, { - "epoch": 0.6388195232690125, + "epoch": 0.6379325002125308, "grad_norm": 0.0, - "learning_rate": 6.096665353893165e-06, - "loss": 0.8519, + "learning_rate": 6.1229828470557405e-06, + "loss": 0.9016, "step": 22512 }, { - "epoch": 0.6388479001135073, + "epoch": 0.6379608376547933, "grad_norm": 0.0, - "learning_rate": 6.095819199511801e-06, - "loss": 0.8832, + "learning_rate": 6.1221368591108895e-06, + "loss": 0.9101, "step": 22513 }, { - "epoch": 0.6388762769580023, + "epoch": 0.6379891750970558, "grad_norm": 0.0, - "learning_rate": 6.094973078109223e-06, - "loss": 0.8897, + "learning_rate": 6.1212909038307215e-06, + "loss": 0.8067, "step": 22514 }, { - "epoch": 0.6389046538024972, + "epoch": 0.6380175125393182, "grad_norm": 0.0, - "learning_rate": 6.094126989692586e-06, - "loss": 0.9424, + "learning_rate": 6.120444981222359e-06, + "loss": 0.9126, "step": 22515 }, { - "epoch": 0.638933030646992, + "epoch": 0.6380458499815806, "grad_norm": 0.0, - "learning_rate": 6.093280934269036e-06, - "loss": 0.6844, + "learning_rate": 6.11959909129293e-06, + "loss": 0.8717, "step": 22516 }, { - "epoch": 0.638961407491487, + "epoch": 0.6380741874238431, "grad_norm": 0.0, - "learning_rate": 6.092434911845717e-06, - "loss": 0.8891, + "learning_rate": 6.118753234049559e-06, + "loss": 0.805, "step": 22517 }, { - "epoch": 0.6389897843359819, + "epoch": 0.6381025248661056, "grad_norm": 0.0, - "learning_rate": 6.091588922429774e-06, - "loss": 0.8698, + "learning_rate": 6.1179074094993695e-06, + "loss": 0.7874, "step": 22518 }, { - "epoch": 0.6390181611804767, + "epoch": 0.638130862308368, "grad_norm": 0.0, - "learning_rate": 6.090742966028357e-06, - "loss": 0.8103, + "learning_rate": 6.1170616176494916e-06, + "loss": 0.9473, "step": 22519 }, { - "epoch": 0.6390465380249716, + "epoch": 0.6381591997506305, "grad_norm": 0.0, - "learning_rate": 6.0898970426486095e-06, - "loss": 0.8348, + "learning_rate": 6.11621585850704e-06, + "loss": 0.8539, "step": 22520 }, { - "epoch": 0.6390749148694665, + "epoch": 0.638187537192893, "grad_norm": 0.0, - "learning_rate": 6.089051152297676e-06, - "loss": 0.7298, + "learning_rate": 6.1153701320791455e-06, + "loss": 0.9067, "step": 22521 }, { - "epoch": 0.6391032917139614, + "epoch": 0.6382158746351554, "grad_norm": 0.0, - "learning_rate": 6.088205294982705e-06, - "loss": 0.8323, + "learning_rate": 6.114524438372933e-06, + "loss": 0.8741, "step": 22522 }, { - "epoch": 0.6391316685584563, + "epoch": 0.6382442120774179, "grad_norm": 0.0, - "learning_rate": 6.087359470710841e-06, - "loss": 0.8778, + "learning_rate": 6.113678777395522e-06, + "loss": 0.7831, "step": 22523 }, { - "epoch": 0.6391600454029512, + "epoch": 0.6382725495196804, "grad_norm": 0.0, - "learning_rate": 6.086513679489224e-06, - "loss": 0.7892, + "learning_rate": 6.112833149154042e-06, + "loss": 0.7641, "step": 22524 }, { - "epoch": 0.6391884222474461, + "epoch": 0.6383008869619429, "grad_norm": 0.0, - "learning_rate": 6.085667921325005e-06, - "loss": 0.8753, + "learning_rate": 6.111987553655607e-06, + "loss": 0.7165, "step": 22525 }, { - "epoch": 0.639216799091941, + "epoch": 0.6383292244042053, "grad_norm": 0.0, - "learning_rate": 6.084822196225323e-06, - "loss": 0.8701, + "learning_rate": 6.111141990907346e-06, + "loss": 0.8968, "step": 22526 }, { - "epoch": 0.6392451759364358, + "epoch": 0.6383575618464677, "grad_norm": 0.0, - "learning_rate": 6.083976504197323e-06, - "loss": 0.9416, + "learning_rate": 6.1102964609163804e-06, + "loss": 0.818, "step": 22527 }, { - "epoch": 0.6392735527809308, + "epoch": 0.6383858992887302, "grad_norm": 0.0, - "learning_rate": 6.083130845248152e-06, - "loss": 0.7599, + "learning_rate": 6.109450963689831e-06, + "loss": 0.9341, "step": 22528 }, { - "epoch": 0.6393019296254256, + "epoch": 0.6384142367309926, "grad_norm": 0.0, - "learning_rate": 6.08228521938495e-06, - "loss": 0.8162, + "learning_rate": 6.108605499234821e-06, + "loss": 0.6427, "step": 22529 }, { - "epoch": 0.6393303064699205, + "epoch": 0.6384425741732551, "grad_norm": 0.0, - "learning_rate": 6.081439626614858e-06, - "loss": 0.7779, + "learning_rate": 6.107760067558476e-06, + "loss": 0.9236, "step": 22530 }, { - "epoch": 0.6393586833144155, + "epoch": 0.6384709116155176, "grad_norm": 0.0, - "learning_rate": 6.080594066945025e-06, - "loss": 0.8957, + "learning_rate": 6.106914668667909e-06, + "loss": 0.8951, "step": 22531 }, { - "epoch": 0.6393870601589103, + "epoch": 0.63849924905778, "grad_norm": 0.0, - "learning_rate": 6.079748540382588e-06, - "loss": 0.8291, + "learning_rate": 6.10606930257025e-06, + "loss": 0.8456, "step": 22532 }, { - "epoch": 0.6394154370034052, + "epoch": 0.6385275865000425, "grad_norm": 0.0, - "learning_rate": 6.078903046934692e-06, - "loss": 0.7704, + "learning_rate": 6.10522396927261e-06, + "loss": 0.8579, "step": 22533 }, { - "epoch": 0.6394438138479002, + "epoch": 0.638555923942305, "grad_norm": 0.0, - "learning_rate": 6.078057586608481e-06, - "loss": 0.8566, + "learning_rate": 6.104378668782116e-06, + "loss": 0.8745, "step": 22534 }, { - "epoch": 0.639472190692395, + "epoch": 0.6385842613845675, "grad_norm": 0.0, - "learning_rate": 6.07721215941109e-06, - "loss": 0.7609, + "learning_rate": 6.103533401105888e-06, + "loss": 0.8693, "step": 22535 }, { - "epoch": 0.6395005675368899, + "epoch": 0.6386125988268299, "grad_norm": 0.0, - "learning_rate": 6.076366765349666e-06, - "loss": 0.7949, + "learning_rate": 6.102688166251044e-06, + "loss": 0.9093, "step": 22536 }, { - "epoch": 0.6395289443813847, + "epoch": 0.6386409362690924, "grad_norm": 0.0, - "learning_rate": 6.0755214044313505e-06, - "loss": 0.9227, + "learning_rate": 6.1018429642247045e-06, + "loss": 0.7548, "step": 22537 }, { - "epoch": 0.6395573212258797, + "epoch": 0.6386692737113548, "grad_norm": 0.0, - "learning_rate": 6.074676076663277e-06, - "loss": 0.7714, + "learning_rate": 6.1009977950339926e-06, + "loss": 0.865, "step": 22538 }, { - "epoch": 0.6395856980703746, + "epoch": 0.6386976111536172, "grad_norm": 0.0, - "learning_rate": 6.073830782052595e-06, - "loss": 0.9054, + "learning_rate": 6.10015265868602e-06, + "loss": 0.7569, "step": 22539 }, { - "epoch": 0.6396140749148694, + "epoch": 0.6387259485958797, "grad_norm": 0.0, - "learning_rate": 6.07298552060644e-06, - "loss": 0.9229, + "learning_rate": 6.099307555187913e-06, + "loss": 0.8556, "step": 22540 }, { - "epoch": 0.6396424517593644, + "epoch": 0.6387542860381422, "grad_norm": 0.0, - "learning_rate": 6.072140292331951e-06, - "loss": 0.8667, + "learning_rate": 6.098462484546785e-06, + "loss": 0.8999, "step": 22541 }, { - "epoch": 0.6396708286038593, + "epoch": 0.6387826234804047, "grad_norm": 0.0, - "learning_rate": 6.071295097236274e-06, - "loss": 0.8455, + "learning_rate": 6.097617446769755e-06, + "loss": 0.8736, "step": 22542 }, { - "epoch": 0.6396992054483541, + "epoch": 0.6388109609226671, "grad_norm": 0.0, - "learning_rate": 6.070449935326541e-06, - "loss": 0.8182, + "learning_rate": 6.0967724418639474e-06, + "loss": 0.8438, "step": 22543 }, { - "epoch": 0.639727582292849, + "epoch": 0.6388392983649296, "grad_norm": 0.0, - "learning_rate": 6.0696048066098935e-06, - "loss": 0.7535, + "learning_rate": 6.095927469836471e-06, + "loss": 0.915, "step": 22544 }, { - "epoch": 0.639755959137344, + "epoch": 0.6388676358071921, "grad_norm": 0.0, - "learning_rate": 6.068759711093468e-06, - "loss": 0.8565, + "learning_rate": 6.09508253069445e-06, + "loss": 0.9162, "step": 22545 }, { - "epoch": 0.6397843359818388, + "epoch": 0.6388959732494545, "grad_norm": 0.0, - "learning_rate": 6.06791464878441e-06, - "loss": 0.9566, + "learning_rate": 6.0942376244449965e-06, + "loss": 0.8084, "step": 22546 }, { - "epoch": 0.6398127128263337, + "epoch": 0.638924310691717, "grad_norm": 0.0, - "learning_rate": 6.067069619689854e-06, - "loss": 0.9212, + "learning_rate": 6.093392751095228e-06, + "loss": 0.8058, "step": 22547 }, { - "epoch": 0.6398410896708286, + "epoch": 0.6389526481339795, "grad_norm": 0.0, - "learning_rate": 6.066224623816937e-06, - "loss": 0.9188, + "learning_rate": 6.092547910652267e-06, + "loss": 0.9222, "step": 22548 }, { - "epoch": 0.6398694665153235, + "epoch": 0.6389809855762418, "grad_norm": 0.0, - "learning_rate": 6.065379661172797e-06, - "loss": 0.8584, + "learning_rate": 6.091703103123223e-06, + "loss": 0.8311, "step": 22549 }, { - "epoch": 0.6398978433598184, + "epoch": 0.6390093230185043, "grad_norm": 0.0, - "learning_rate": 6.0645347317645735e-06, - "loss": 0.9199, + "learning_rate": 6.0908583285152154e-06, + "loss": 0.9202, "step": 22550 }, { - "epoch": 0.6399262202043133, + "epoch": 0.6390376604607668, "grad_norm": 0.0, - "learning_rate": 6.063689835599401e-06, - "loss": 0.7768, + "learning_rate": 6.0900135868353635e-06, + "loss": 0.7928, "step": 22551 }, { - "epoch": 0.6399545970488082, + "epoch": 0.6390659979030293, "grad_norm": 0.0, - "learning_rate": 6.062844972684416e-06, - "loss": 0.8558, + "learning_rate": 6.089168878090776e-06, + "loss": 0.8931, "step": 22552 }, { - "epoch": 0.639982973893303, + "epoch": 0.6390943353452917, "grad_norm": 0.0, - "learning_rate": 6.0620001430267585e-06, - "loss": 0.7978, + "learning_rate": 6.0883242022885716e-06, + "loss": 0.8331, "step": 22553 }, { - "epoch": 0.6400113507377979, + "epoch": 0.6391226727875542, "grad_norm": 0.0, - "learning_rate": 6.061155346633563e-06, - "loss": 0.9729, + "learning_rate": 6.0874795594358635e-06, + "loss": 0.8906, "step": 22554 }, { - "epoch": 0.6400397275822929, + "epoch": 0.6391510102298167, "grad_norm": 0.0, - "learning_rate": 6.060310583511964e-06, - "loss": 0.8365, + "learning_rate": 6.086634949539769e-06, + "loss": 0.892, "step": 22555 }, { - "epoch": 0.6400681044267877, + "epoch": 0.6391793476720791, "grad_norm": 0.0, - "learning_rate": 6.059465853669098e-06, - "loss": 0.7974, + "learning_rate": 6.085790372607404e-06, + "loss": 0.9627, "step": 22556 }, { - "epoch": 0.6400964812712826, + "epoch": 0.6392076851143416, "grad_norm": 0.0, - "learning_rate": 6.058621157112103e-06, - "loss": 0.8024, + "learning_rate": 6.084945828645878e-06, + "loss": 0.8511, "step": 22557 }, { - "epoch": 0.6401248581157776, + "epoch": 0.639236022556604, "grad_norm": 0.0, - "learning_rate": 6.057776493848111e-06, - "loss": 0.8321, + "learning_rate": 6.0841013176623056e-06, + "loss": 0.8148, "step": 22558 }, { - "epoch": 0.6401532349602724, + "epoch": 0.6392643599988665, "grad_norm": 0.0, - "learning_rate": 6.056931863884259e-06, - "loss": 0.8799, + "learning_rate": 6.083256839663807e-06, + "loss": 0.6747, "step": 22559 }, { - "epoch": 0.6401816118047673, + "epoch": 0.6392926974411289, "grad_norm": 0.0, - "learning_rate": 6.056087267227677e-06, - "loss": 0.8485, + "learning_rate": 6.082412394657485e-06, + "loss": 0.8281, "step": 22560 }, { - "epoch": 0.6402099886492622, + "epoch": 0.6393210348833914, "grad_norm": 0.0, - "learning_rate": 6.0552427038855065e-06, - "loss": 0.8722, + "learning_rate": 6.08156798265046e-06, + "loss": 0.8918, "step": 22561 }, { - "epoch": 0.6402383654937571, + "epoch": 0.6393493723256539, "grad_norm": 0.0, - "learning_rate": 6.0543981738648774e-06, - "loss": 0.8451, + "learning_rate": 6.080723603649843e-06, + "loss": 0.7901, "step": 22562 }, { - "epoch": 0.640266742338252, + "epoch": 0.6393777097679163, "grad_norm": 0.0, - "learning_rate": 6.0535536771729255e-06, - "loss": 0.8879, + "learning_rate": 6.079879257662746e-06, + "loss": 0.9155, "step": 22563 }, { - "epoch": 0.6402951191827468, + "epoch": 0.6394060472101788, "grad_norm": 0.0, - "learning_rate": 6.052709213816783e-06, - "loss": 0.8149, + "learning_rate": 6.079034944696285e-06, + "loss": 0.766, "step": 22564 }, { - "epoch": 0.6403234960272418, + "epoch": 0.6394343846524413, "grad_norm": 0.0, - "learning_rate": 6.051864783803583e-06, - "loss": 0.8487, + "learning_rate": 6.078190664757564e-06, + "loss": 0.8865, "step": 22565 }, { - "epoch": 0.6403518728717367, + "epoch": 0.6394627220947038, "grad_norm": 0.0, - "learning_rate": 6.051020387140458e-06, - "loss": 0.953, + "learning_rate": 6.0773464178537e-06, + "loss": 0.8566, "step": 22566 }, { - "epoch": 0.6403802497162315, + "epoch": 0.6394910595369662, "grad_norm": 0.0, - "learning_rate": 6.050176023834544e-06, - "loss": 0.7773, + "learning_rate": 6.076502203991808e-06, + "loss": 0.7186, "step": 22567 }, { - "epoch": 0.6404086265607265, + "epoch": 0.6395193969792287, "grad_norm": 0.0, - "learning_rate": 6.049331693892966e-06, - "loss": 0.8942, + "learning_rate": 6.07565802317899e-06, + "loss": 0.8786, "step": 22568 }, { - "epoch": 0.6404370034052214, + "epoch": 0.6395477344214912, "grad_norm": 0.0, - "learning_rate": 6.048487397322864e-06, - "loss": 0.7854, + "learning_rate": 6.0748138754223665e-06, + "loss": 0.7912, "step": 22569 }, { - "epoch": 0.6404653802497162, + "epoch": 0.6395760718637535, "grad_norm": 0.0, - "learning_rate": 6.047643134131367e-06, - "loss": 0.8733, + "learning_rate": 6.073969760729039e-06, + "loss": 0.8561, "step": 22570 }, { - "epoch": 0.6404937570942111, + "epoch": 0.639604409306016, "grad_norm": 0.0, - "learning_rate": 6.046798904325603e-06, - "loss": 0.8473, + "learning_rate": 6.073125679106122e-06, + "loss": 0.927, "step": 22571 }, { - "epoch": 0.640522133938706, + "epoch": 0.6396327467482785, "grad_norm": 0.0, - "learning_rate": 6.04595470791271e-06, - "loss": 0.8526, + "learning_rate": 6.0722816305607315e-06, + "loss": 0.8762, "step": 22572 }, { - "epoch": 0.6405505107832009, + "epoch": 0.6396610841905409, "grad_norm": 0.0, - "learning_rate": 6.045110544899815e-06, - "loss": 0.8428, + "learning_rate": 6.071437615099966e-06, + "loss": 0.8074, "step": 22573 }, { - "epoch": 0.6405788876276958, + "epoch": 0.6396894216328034, "grad_norm": 0.0, - "learning_rate": 6.044266415294046e-06, - "loss": 0.8827, + "learning_rate": 6.070593632730941e-06, + "loss": 0.8734, "step": 22574 }, { - "epoch": 0.6406072644721907, + "epoch": 0.6397177590750659, "grad_norm": 0.0, - "learning_rate": 6.043422319102539e-06, - "loss": 0.9241, + "learning_rate": 6.069749683460765e-06, + "loss": 0.8902, "step": 22575 }, { - "epoch": 0.6406356413166856, + "epoch": 0.6397460965173284, "grad_norm": 0.0, - "learning_rate": 6.042578256332417e-06, - "loss": 0.8161, + "learning_rate": 6.068905767296547e-06, + "loss": 0.946, "step": 22576 }, { - "epoch": 0.6406640181611805, + "epoch": 0.6397744339595908, "grad_norm": 0.0, - "learning_rate": 6.041734226990819e-06, - "loss": 0.8232, + "learning_rate": 6.068061884245398e-06, + "loss": 0.9108, "step": 22577 }, { - "epoch": 0.6406923950056753, + "epoch": 0.6398027714018533, "grad_norm": 0.0, - "learning_rate": 6.040890231084869e-06, - "loss": 0.8253, + "learning_rate": 6.0672180343144204e-06, + "loss": 0.7775, "step": 22578 }, { - "epoch": 0.6407207718501703, + "epoch": 0.6398311088441158, "grad_norm": 0.0, - "learning_rate": 6.040046268621696e-06, - "loss": 0.8205, + "learning_rate": 6.066374217510725e-06, + "loss": 0.9016, "step": 22579 }, { - "epoch": 0.6407491486946651, + "epoch": 0.6398594462863781, "grad_norm": 0.0, - "learning_rate": 6.039202339608432e-06, - "loss": 0.8576, + "learning_rate": 6.065530433841424e-06, + "loss": 0.7682, "step": 22580 }, { - "epoch": 0.64077752553916, + "epoch": 0.6398877837286406, "grad_norm": 0.0, - "learning_rate": 6.038358444052204e-06, - "loss": 0.791, + "learning_rate": 6.064686683313619e-06, + "loss": 0.8654, "step": 22581 }, { - "epoch": 0.640805902383655, + "epoch": 0.6399161211709031, "grad_norm": 0.0, - "learning_rate": 6.037514581960139e-06, - "loss": 0.7798, + "learning_rate": 6.0638429659344215e-06, + "loss": 0.8913, "step": 22582 }, { - "epoch": 0.6408342792281498, + "epoch": 0.6399444586131656, "grad_norm": 0.0, - "learning_rate": 6.036670753339367e-06, - "loss": 0.8577, + "learning_rate": 6.062999281710934e-06, + "loss": 0.7636, "step": 22583 }, { - "epoch": 0.6408626560726447, + "epoch": 0.639972796055428, "grad_norm": 0.0, - "learning_rate": 6.035826958197016e-06, - "loss": 0.8774, + "learning_rate": 6.062155630650265e-06, + "loss": 0.9593, "step": 22584 }, { - "epoch": 0.6408910329171397, + "epoch": 0.6400011334976905, "grad_norm": 0.0, - "learning_rate": 6.034983196540212e-06, - "loss": 0.8335, + "learning_rate": 6.061312012759526e-06, + "loss": 0.8336, "step": 22585 }, { - "epoch": 0.6409194097616345, + "epoch": 0.640029470939953, "grad_norm": 0.0, - "learning_rate": 6.0341394683760835e-06, - "loss": 0.8559, + "learning_rate": 6.0604684280458135e-06, + "loss": 0.7606, "step": 22586 }, { - "epoch": 0.6409477866061294, + "epoch": 0.6400578083822154, "grad_norm": 0.0, - "learning_rate": 6.0332957737117585e-06, - "loss": 0.8294, + "learning_rate": 6.059624876516239e-06, + "loss": 0.8391, "step": 22587 }, { - "epoch": 0.6409761634506242, + "epoch": 0.6400861458244779, "grad_norm": 0.0, - "learning_rate": 6.032452112554359e-06, - "loss": 0.8967, + "learning_rate": 6.058781358177909e-06, + "loss": 0.9048, "step": 22588 }, { - "epoch": 0.6410045402951192, + "epoch": 0.6401144832667404, "grad_norm": 0.0, - "learning_rate": 6.031608484911018e-06, - "loss": 0.8971, + "learning_rate": 6.057937873037925e-06, + "loss": 0.7471, "step": 22589 }, { - "epoch": 0.6410329171396141, + "epoch": 0.6401428207090029, "grad_norm": 0.0, - "learning_rate": 6.030764890788857e-06, - "loss": 0.7183, + "learning_rate": 6.057094421103398e-06, + "loss": 0.8887, "step": 22590 }, { - "epoch": 0.6410612939841089, + "epoch": 0.6401711581512652, "grad_norm": 0.0, - "learning_rate": 6.029921330195001e-06, - "loss": 0.8318, + "learning_rate": 6.0562510023814256e-06, + "loss": 0.8817, "step": 22591 }, { - "epoch": 0.6410896708286039, + "epoch": 0.6401994955935277, "grad_norm": 0.0, - "learning_rate": 6.02907780313658e-06, - "loss": 0.7826, + "learning_rate": 6.055407616879115e-06, + "loss": 0.8784, "step": 22592 }, { - "epoch": 0.6411180476730988, + "epoch": 0.6402278330357902, "grad_norm": 0.0, - "learning_rate": 6.028234309620718e-06, - "loss": 0.8448, + "learning_rate": 6.054564264603573e-06, + "loss": 0.8398, "step": 22593 }, { - "epoch": 0.6411464245175936, + "epoch": 0.6402561704780526, "grad_norm": 0.0, - "learning_rate": 6.02739084965454e-06, - "loss": 0.9076, + "learning_rate": 6.053720945561901e-06, + "loss": 0.8462, "step": 22594 }, { - "epoch": 0.6411748013620885, + "epoch": 0.6402845079203151, "grad_norm": 0.0, - "learning_rate": 6.026547423245171e-06, - "loss": 0.724, + "learning_rate": 6.0528776597612e-06, + "loss": 0.8501, "step": 22595 }, { - "epoch": 0.6412031782065835, + "epoch": 0.6403128453625776, "grad_norm": 0.0, - "learning_rate": 6.02570403039973e-06, - "loss": 0.9382, + "learning_rate": 6.052034407208582e-06, + "loss": 0.7558, "step": 22596 }, { - "epoch": 0.6412315550510783, + "epoch": 0.64034118280484, "grad_norm": 0.0, - "learning_rate": 6.02486067112535e-06, - "loss": 0.8791, + "learning_rate": 6.051191187911138e-06, + "loss": 0.8915, "step": 22597 }, { - "epoch": 0.6412599318955732, + "epoch": 0.6403695202471025, "grad_norm": 0.0, - "learning_rate": 6.024017345429149e-06, - "loss": 0.8645, + "learning_rate": 6.050348001875983e-06, + "loss": 0.8427, "step": 22598 }, { - "epoch": 0.6412883087400681, + "epoch": 0.640397857689365, "grad_norm": 0.0, - "learning_rate": 6.023174053318252e-06, - "loss": 0.869, + "learning_rate": 6.04950484911021e-06, + "loss": 0.823, "step": 22599 }, { - "epoch": 0.641316685584563, + "epoch": 0.6404261951316275, "grad_norm": 0.0, - "learning_rate": 6.022330794799784e-06, - "loss": 0.804, + "learning_rate": 6.048661729620924e-06, + "loss": 0.8723, "step": 22600 }, { - "epoch": 0.6413450624290579, + "epoch": 0.6404545325738898, "grad_norm": 0.0, - "learning_rate": 6.021487569880866e-06, - "loss": 0.8863, + "learning_rate": 6.047818643415229e-06, + "loss": 0.8431, "step": 22601 }, { - "epoch": 0.6413734392735527, + "epoch": 0.6404828700161523, "grad_norm": 0.0, - "learning_rate": 6.020644378568621e-06, - "loss": 0.8538, + "learning_rate": 6.046975590500223e-06, + "loss": 0.703, "step": 22602 }, { - "epoch": 0.6414018161180477, + "epoch": 0.6405112074584148, "grad_norm": 0.0, - "learning_rate": 6.0198012208701715e-06, - "loss": 0.8179, + "learning_rate": 6.046132570883015e-06, + "loss": 0.7347, "step": 22603 }, { - "epoch": 0.6414301929625426, + "epoch": 0.6405395449006772, "grad_norm": 0.0, - "learning_rate": 6.018958096792642e-06, - "loss": 0.8455, + "learning_rate": 6.045289584570695e-06, + "loss": 0.8593, "step": 22604 }, { - "epoch": 0.6414585698070374, + "epoch": 0.6405678823429397, "grad_norm": 0.0, - "learning_rate": 6.018115006343148e-06, - "loss": 0.9094, + "learning_rate": 6.0444466315703695e-06, + "loss": 0.8349, "step": 22605 }, { - "epoch": 0.6414869466515324, + "epoch": 0.6405962197852022, "grad_norm": 0.0, - "learning_rate": 6.017271949528816e-06, - "loss": 0.9425, + "learning_rate": 6.043603711889141e-06, + "loss": 0.7545, "step": 22606 }, { - "epoch": 0.6415153234960272, + "epoch": 0.6406245572274647, "grad_norm": 0.0, - "learning_rate": 6.01642892635677e-06, - "loss": 0.9013, + "learning_rate": 6.0427608255341064e-06, + "loss": 0.868, "step": 22607 }, { - "epoch": 0.6415437003405221, + "epoch": 0.6406528946697271, "grad_norm": 0.0, - "learning_rate": 6.015585936834127e-06, - "loss": 0.8851, + "learning_rate": 6.041917972512367e-06, + "loss": 0.827, "step": 22608 }, { - "epoch": 0.6415720771850171, + "epoch": 0.6406812321119896, "grad_norm": 0.0, - "learning_rate": 6.014742980968008e-06, - "loss": 0.8485, + "learning_rate": 6.041075152831025e-06, + "loss": 0.8335, "step": 22609 }, { - "epoch": 0.6416004540295119, + "epoch": 0.6407095695542521, "grad_norm": 0.0, - "learning_rate": 6.013900058765536e-06, - "loss": 0.8228, + "learning_rate": 6.040232366497174e-06, + "loss": 0.9903, "step": 22610 }, { - "epoch": 0.6416288308740068, + "epoch": 0.6407379069965145, "grad_norm": 0.0, - "learning_rate": 6.013057170233829e-06, - "loss": 0.743, + "learning_rate": 6.0393896135179205e-06, + "loss": 0.7785, "step": 22611 }, { - "epoch": 0.6416572077185017, + "epoch": 0.6407662444387769, "grad_norm": 0.0, - "learning_rate": 6.012214315380005e-06, - "loss": 0.8268, + "learning_rate": 6.038546893900354e-06, + "loss": 0.948, "step": 22612 }, { - "epoch": 0.6416855845629966, + "epoch": 0.6407945818810394, "grad_norm": 0.0, - "learning_rate": 6.011371494211189e-06, - "loss": 0.8695, + "learning_rate": 6.037704207651578e-06, + "loss": 0.7975, "step": 22613 }, { - "epoch": 0.6417139614074915, + "epoch": 0.6408229193233019, "grad_norm": 0.0, - "learning_rate": 6.010528706734495e-06, - "loss": 0.7918, + "learning_rate": 6.036861554778695e-06, + "loss": 0.8745, "step": 22614 }, { - "epoch": 0.6417423382519863, + "epoch": 0.6408512567655643, "grad_norm": 0.0, - "learning_rate": 6.009685952957045e-06, - "loss": 0.8756, + "learning_rate": 6.036018935288794e-06, + "loss": 0.7764, "step": 22615 }, { - "epoch": 0.6417707150964813, + "epoch": 0.6408795942078268, "grad_norm": 0.0, - "learning_rate": 6.0088432328859584e-06, - "loss": 0.7568, + "learning_rate": 6.035176349188978e-06, + "loss": 0.7312, "step": 22616 }, { - "epoch": 0.6417990919409762, + "epoch": 0.6409079316500893, "grad_norm": 0.0, - "learning_rate": 6.008000546528352e-06, - "loss": 0.802, + "learning_rate": 6.034333796486349e-06, + "loss": 0.9398, "step": 22617 }, { - "epoch": 0.641827468785471, + "epoch": 0.6409362690923517, "grad_norm": 0.0, - "learning_rate": 6.007157893891344e-06, - "loss": 0.8275, + "learning_rate": 6.033491277187995e-06, + "loss": 0.6857, "step": 22618 }, { - "epoch": 0.6418558456299659, + "epoch": 0.6409646065346142, "grad_norm": 0.0, - "learning_rate": 6.0063152749820506e-06, - "loss": 0.8162, + "learning_rate": 6.032648791301019e-06, + "loss": 0.985, "step": 22619 }, { - "epoch": 0.6418842224744609, + "epoch": 0.6409929439768767, "grad_norm": 0.0, - "learning_rate": 6.005472689807596e-06, - "loss": 0.7769, + "learning_rate": 6.0318063388325134e-06, + "loss": 0.9353, "step": 22620 }, { - "epoch": 0.6419125993189557, + "epoch": 0.641021281419139, "grad_norm": 0.0, - "learning_rate": 6.0046301383750915e-06, - "loss": 0.8896, + "learning_rate": 6.030963919789575e-06, + "loss": 0.8219, "step": 22621 }, { - "epoch": 0.6419409761634506, + "epoch": 0.6410496188614015, "grad_norm": 0.0, - "learning_rate": 6.003787620691651e-06, - "loss": 0.9875, + "learning_rate": 6.030121534179307e-06, + "loss": 0.8858, "step": 22622 }, { - "epoch": 0.6419693530079456, + "epoch": 0.641077956303664, "grad_norm": 0.0, - "learning_rate": 6.002945136764399e-06, - "loss": 0.8343, + "learning_rate": 6.029279182008795e-06, + "loss": 0.7829, "step": 22623 }, { - "epoch": 0.6419977298524404, + "epoch": 0.6411062937459265, "grad_norm": 0.0, - "learning_rate": 6.002102686600451e-06, - "loss": 0.7955, + "learning_rate": 6.0284368632851386e-06, + "loss": 0.8745, "step": 22624 }, { - "epoch": 0.6420261066969353, + "epoch": 0.6411346311881889, "grad_norm": 0.0, - "learning_rate": 6.001260270206921e-06, - "loss": 0.8394, + "learning_rate": 6.0275945780154365e-06, + "loss": 0.8672, "step": 22625 }, { - "epoch": 0.6420544835414302, + "epoch": 0.6411629686304514, "grad_norm": 0.0, - "learning_rate": 6.000417887590924e-06, - "loss": 0.9341, + "learning_rate": 6.026752326206777e-06, + "loss": 0.7994, "step": 22626 }, { - "epoch": 0.6420828603859251, + "epoch": 0.6411913060727139, "grad_norm": 0.0, - "learning_rate": 5.999575538759579e-06, - "loss": 0.9082, + "learning_rate": 6.025910107866263e-06, + "loss": 0.9336, "step": 22627 }, { - "epoch": 0.64211123723042, + "epoch": 0.6412196435149763, "grad_norm": 0.0, - "learning_rate": 5.998733223719998e-06, - "loss": 0.8025, + "learning_rate": 6.02506792300098e-06, + "loss": 0.8084, "step": 22628 }, { - "epoch": 0.6421396140749148, + "epoch": 0.6412479809572388, "grad_norm": 0.0, - "learning_rate": 5.997890942479296e-06, - "loss": 0.8094, + "learning_rate": 6.024225771618024e-06, + "loss": 0.8177, "step": 22629 }, { - "epoch": 0.6421679909194098, + "epoch": 0.6412763183995013, "grad_norm": 0.0, - "learning_rate": 5.997048695044592e-06, - "loss": 0.8004, + "learning_rate": 6.0233836537244975e-06, + "loss": 0.8899, "step": 22630 }, { - "epoch": 0.6421963677639047, + "epoch": 0.6413046558417638, "grad_norm": 0.0, - "learning_rate": 5.996206481422997e-06, - "loss": 0.8403, + "learning_rate": 6.022541569327481e-06, + "loss": 0.9047, "step": 22631 }, { - "epoch": 0.6422247446083995, + "epoch": 0.6413329932840262, "grad_norm": 0.0, - "learning_rate": 5.9953643016216225e-06, - "loss": 0.8861, + "learning_rate": 6.021699518434077e-06, + "loss": 0.869, "step": 22632 }, { - "epoch": 0.6422531214528945, + "epoch": 0.6413613307262886, "grad_norm": 0.0, - "learning_rate": 5.99452215564759e-06, - "loss": 0.86, + "learning_rate": 6.0208575010513735e-06, + "loss": 0.9428, "step": 22633 }, { - "epoch": 0.6422814982973893, + "epoch": 0.6413896681685511, "grad_norm": 0.0, - "learning_rate": 5.993680043508008e-06, - "loss": 0.7783, + "learning_rate": 6.0200155171864635e-06, + "loss": 0.8303, "step": 22634 }, { - "epoch": 0.6423098751418842, + "epoch": 0.6414180056108135, "grad_norm": 0.0, - "learning_rate": 5.992837965209988e-06, - "loss": 0.9112, + "learning_rate": 6.019173566846446e-06, + "loss": 0.7404, "step": 22635 }, { - "epoch": 0.6423382519863791, + "epoch": 0.641446343053076, "grad_norm": 0.0, - "learning_rate": 5.991995920760648e-06, - "loss": 0.937, + "learning_rate": 6.0183316500384035e-06, + "loss": 0.7889, "step": 22636 }, { - "epoch": 0.642366628830874, + "epoch": 0.6414746804953385, "grad_norm": 0.0, - "learning_rate": 5.991153910167096e-06, - "loss": 0.8208, + "learning_rate": 6.017489766769432e-06, + "loss": 0.7188, "step": 22637 }, { - "epoch": 0.6423950056753689, + "epoch": 0.641503017937601, "grad_norm": 0.0, - "learning_rate": 5.99031193343645e-06, - "loss": 0.9305, + "learning_rate": 6.016647917046625e-06, + "loss": 0.9417, "step": 22638 }, { - "epoch": 0.6424233825198638, + "epoch": 0.6415313553798634, "grad_norm": 0.0, - "learning_rate": 5.9894699905758195e-06, - "loss": 0.8515, + "learning_rate": 6.015806100877069e-06, + "loss": 0.84, "step": 22639 }, { - "epoch": 0.6424517593643587, + "epoch": 0.6415596928221259, "grad_norm": 0.0, - "learning_rate": 5.9886280815923135e-06, - "loss": 0.9775, + "learning_rate": 6.014964318267863e-06, + "loss": 0.7878, "step": 22640 }, { - "epoch": 0.6424801362088536, + "epoch": 0.6415880302643884, "grad_norm": 0.0, - "learning_rate": 5.987786206493049e-06, - "loss": 0.8965, + "learning_rate": 6.014122569226088e-06, + "loss": 0.8533, "step": 22641 }, { - "epoch": 0.6425085130533484, + "epoch": 0.6416163677066508, "grad_norm": 0.0, - "learning_rate": 5.986944365285136e-06, - "loss": 0.8162, + "learning_rate": 6.013280853758839e-06, + "loss": 0.7908, "step": 22642 }, { - "epoch": 0.6425368898978434, + "epoch": 0.6416447051489133, "grad_norm": 0.0, - "learning_rate": 5.986102557975681e-06, - "loss": 0.9292, + "learning_rate": 6.012439171873209e-06, + "loss": 0.9259, "step": 22643 }, { - "epoch": 0.6425652667423383, + "epoch": 0.6416730425911757, "grad_norm": 0.0, - "learning_rate": 5.9852607845718e-06, - "loss": 0.7806, + "learning_rate": 6.01159752357628e-06, + "loss": 0.9253, "step": 22644 }, { - "epoch": 0.6425936435868331, + "epoch": 0.6417013800334381, "grad_norm": 0.0, - "learning_rate": 5.984419045080603e-06, - "loss": 0.8303, + "learning_rate": 6.0107559088751475e-06, + "loss": 0.785, "step": 22645 }, { - "epoch": 0.642622020431328, + "epoch": 0.6417297174757006, "grad_norm": 0.0, - "learning_rate": 5.983577339509195e-06, - "loss": 0.887, + "learning_rate": 6.009914327776901e-06, + "loss": 0.8609, "step": 22646 }, { - "epoch": 0.642650397275823, + "epoch": 0.6417580549179631, "grad_norm": 0.0, - "learning_rate": 5.982735667864694e-06, - "loss": 0.8851, + "learning_rate": 6.009072780288626e-06, + "loss": 0.824, "step": 22647 }, { - "epoch": 0.6426787741203178, + "epoch": 0.6417863923602256, "grad_norm": 0.0, - "learning_rate": 5.981894030154203e-06, - "loss": 0.8407, + "learning_rate": 6.008231266417417e-06, + "loss": 0.8675, "step": 22648 }, { - "epoch": 0.6427071509648127, + "epoch": 0.641814729802488, "grad_norm": 0.0, - "learning_rate": 5.9810524263848335e-06, - "loss": 0.7805, + "learning_rate": 6.007389786170355e-06, + "loss": 0.8021, "step": 22649 }, { - "epoch": 0.6427355278093076, + "epoch": 0.6418430672447505, "grad_norm": 0.0, - "learning_rate": 5.9802108565636965e-06, - "loss": 0.8508, + "learning_rate": 6.00654833955453e-06, + "loss": 0.7954, "step": 22650 }, { - "epoch": 0.6427639046538025, + "epoch": 0.641871404687013, "grad_norm": 0.0, - "learning_rate": 5.979369320697899e-06, - "loss": 0.9327, + "learning_rate": 6.005706926577033e-06, + "loss": 0.7844, "step": 22651 }, { - "epoch": 0.6427922814982974, + "epoch": 0.6418997421292754, "grad_norm": 0.0, - "learning_rate": 5.978527818794545e-06, - "loss": 0.8713, + "learning_rate": 6.004865547244949e-06, + "loss": 0.786, "step": 22652 }, { - "epoch": 0.6428206583427922, + "epoch": 0.6419280795715379, "grad_norm": 0.0, - "learning_rate": 5.97768635086075e-06, - "loss": 0.832, + "learning_rate": 6.004024201565366e-06, + "loss": 0.8409, "step": 22653 }, { - "epoch": 0.6428490351872872, + "epoch": 0.6419564170138004, "grad_norm": 0.0, - "learning_rate": 5.97684491690362e-06, - "loss": 0.7953, + "learning_rate": 6.003182889545374e-06, + "loss": 0.8383, "step": 22654 }, { - "epoch": 0.6428774120317821, + "epoch": 0.6419847544560628, "grad_norm": 0.0, - "learning_rate": 5.976003516930263e-06, - "loss": 0.9447, + "learning_rate": 6.002341611192053e-06, + "loss": 0.8643, "step": 22655 }, { - "epoch": 0.6429057888762769, + "epoch": 0.6420130918983252, "grad_norm": 0.0, - "learning_rate": 5.975162150947783e-06, - "loss": 0.7877, + "learning_rate": 6.001500366512498e-06, + "loss": 0.8505, "step": 22656 }, { - "epoch": 0.6429341657207719, + "epoch": 0.6420414293405877, "grad_norm": 0.0, - "learning_rate": 5.97432081896329e-06, - "loss": 0.8447, + "learning_rate": 6.000659155513786e-06, + "loss": 0.8874, "step": 22657 }, { - "epoch": 0.6429625425652667, + "epoch": 0.6420697667828502, "grad_norm": 0.0, - "learning_rate": 5.973479520983892e-06, - "loss": 0.9699, + "learning_rate": 5.999817978203006e-06, + "loss": 0.8539, "step": 22658 }, { - "epoch": 0.6429909194097616, + "epoch": 0.6420981042251126, "grad_norm": 0.0, - "learning_rate": 5.972638257016693e-06, - "loss": 0.8817, + "learning_rate": 5.998976834587246e-06, + "loss": 0.8603, "step": 22659 }, { - "epoch": 0.6430192962542566, + "epoch": 0.6421264416673751, "grad_norm": 0.0, - "learning_rate": 5.971797027068797e-06, - "loss": 0.8403, + "learning_rate": 5.998135724673591e-06, + "loss": 0.8493, "step": 22660 }, { - "epoch": 0.6430476730987514, + "epoch": 0.6421547791096376, "grad_norm": 0.0, - "learning_rate": 5.970955831147316e-06, - "loss": 0.8503, + "learning_rate": 5.997294648469128e-06, + "loss": 0.754, "step": 22661 }, { - "epoch": 0.6430760499432463, + "epoch": 0.6421831165519001, "grad_norm": 0.0, - "learning_rate": 5.97011466925935e-06, - "loss": 0.8134, + "learning_rate": 5.996453605980932e-06, + "loss": 0.7707, "step": 22662 }, { - "epoch": 0.6431044267877412, + "epoch": 0.6422114539941625, "grad_norm": 0.0, - "learning_rate": 5.969273541412006e-06, - "loss": 0.8615, + "learning_rate": 5.995612597216096e-06, + "loss": 0.8517, "step": 22663 }, { - "epoch": 0.6431328036322361, + "epoch": 0.642239791436425, "grad_norm": 0.0, - "learning_rate": 5.968432447612391e-06, - "loss": 0.8867, + "learning_rate": 5.994771622181703e-06, + "loss": 0.8126, "step": 22664 }, { - "epoch": 0.643161180476731, + "epoch": 0.6422681288786874, "grad_norm": 0.0, - "learning_rate": 5.967591387867609e-06, - "loss": 0.7966, + "learning_rate": 5.993930680884834e-06, + "loss": 0.7765, "step": 22665 }, { - "epoch": 0.6431895573212258, + "epoch": 0.6422964663209498, "grad_norm": 0.0, - "learning_rate": 5.966750362184761e-06, - "loss": 0.869, + "learning_rate": 5.993089773332577e-06, + "loss": 0.9115, "step": 22666 }, { - "epoch": 0.6432179341657208, + "epoch": 0.6423248037632123, "grad_norm": 0.0, - "learning_rate": 5.965909370570957e-06, - "loss": 0.8941, + "learning_rate": 5.992248899532014e-06, + "loss": 0.8693, "step": 22667 }, { - "epoch": 0.6432463110102157, + "epoch": 0.6423531412054748, "grad_norm": 0.0, - "learning_rate": 5.965068413033292e-06, - "loss": 0.8924, + "learning_rate": 5.991408059490223e-06, + "loss": 0.9438, "step": 22668 }, { - "epoch": 0.6432746878547105, + "epoch": 0.6423814786477372, "grad_norm": 0.0, - "learning_rate": 5.964227489578882e-06, - "loss": 0.8267, + "learning_rate": 5.9905672532142955e-06, + "loss": 0.7922, "step": 22669 }, { - "epoch": 0.6433030646992054, + "epoch": 0.6424098160899997, "grad_norm": 0.0, - "learning_rate": 5.96338660021482e-06, - "loss": 0.7508, + "learning_rate": 5.989726480711304e-06, + "loss": 0.7391, "step": 22670 }, { - "epoch": 0.6433314415437004, + "epoch": 0.6424381535322622, "grad_norm": 0.0, - "learning_rate": 5.962545744948216e-06, - "loss": 0.8955, + "learning_rate": 5.988885741988336e-06, + "loss": 0.8326, "step": 22671 }, { - "epoch": 0.6433598183881952, + "epoch": 0.6424664909745247, "grad_norm": 0.0, - "learning_rate": 5.961704923786169e-06, - "loss": 0.9201, + "learning_rate": 5.9880450370524744e-06, + "loss": 0.7913, "step": 22672 }, { - "epoch": 0.6433881952326901, + "epoch": 0.6424948284167871, "grad_norm": 0.0, - "learning_rate": 5.96086413673578e-06, - "loss": 0.8706, + "learning_rate": 5.987204365910798e-06, + "loss": 0.9471, "step": 22673 }, { - "epoch": 0.6434165720771851, + "epoch": 0.6425231658590496, "grad_norm": 0.0, - "learning_rate": 5.9600233838041565e-06, - "loss": 0.8465, + "learning_rate": 5.98636372857039e-06, + "loss": 0.7513, "step": 22674 }, { - "epoch": 0.6434449489216799, + "epoch": 0.642551503301312, "grad_norm": 0.0, - "learning_rate": 5.9591826649983955e-06, - "loss": 0.8712, + "learning_rate": 5.985523125038333e-06, + "loss": 0.8769, "step": 22675 }, { - "epoch": 0.6434733257661748, + "epoch": 0.6425798407435744, "grad_norm": 0.0, - "learning_rate": 5.958341980325598e-06, - "loss": 0.7838, + "learning_rate": 5.984682555321702e-06, + "loss": 0.7896, "step": 22676 }, { - "epoch": 0.6435017026106697, + "epoch": 0.6426081781858369, "grad_norm": 0.0, - "learning_rate": 5.95750132979287e-06, - "loss": 0.9062, + "learning_rate": 5.983842019427583e-06, + "loss": 0.7218, "step": 22677 }, { - "epoch": 0.6435300794551646, + "epoch": 0.6426365156280994, "grad_norm": 0.0, - "learning_rate": 5.95666071340731e-06, - "loss": 0.8878, + "learning_rate": 5.983001517363053e-06, + "loss": 0.8828, "step": 22678 }, { - "epoch": 0.6435584562996595, + "epoch": 0.6426648530703619, "grad_norm": 0.0, - "learning_rate": 5.955820131176018e-06, - "loss": 0.8298, + "learning_rate": 5.982161049135191e-06, + "loss": 0.9589, "step": 22679 }, { - "epoch": 0.6435868331441543, + "epoch": 0.6426931905126243, "grad_norm": 0.0, - "learning_rate": 5.954979583106095e-06, - "loss": 0.9327, + "learning_rate": 5.981320614751085e-06, + "loss": 0.8207, "step": 22680 }, { - "epoch": 0.6436152099886493, + "epoch": 0.6427215279548868, "grad_norm": 0.0, - "learning_rate": 5.954139069204643e-06, - "loss": 0.9721, + "learning_rate": 5.980480214217801e-06, + "loss": 0.8981, "step": 22681 }, { - "epoch": 0.6436435868331442, + "epoch": 0.6427498653971493, "grad_norm": 0.0, - "learning_rate": 5.9532985894787575e-06, - "loss": 0.9151, + "learning_rate": 5.979639847542427e-06, + "loss": 0.937, "step": 22682 }, { - "epoch": 0.643671963677639, + "epoch": 0.6427782028394117, "grad_norm": 0.0, - "learning_rate": 5.95245814393554e-06, - "loss": 0.9443, + "learning_rate": 5.978799514732042e-06, + "loss": 0.8481, "step": 22683 }, { - "epoch": 0.643700340522134, + "epoch": 0.6428065402816742, "grad_norm": 0.0, - "learning_rate": 5.9516177325820915e-06, - "loss": 0.8425, + "learning_rate": 5.977959215793718e-06, + "loss": 0.9326, "step": 22684 }, { - "epoch": 0.6437287173666288, + "epoch": 0.6428348777239367, "grad_norm": 0.0, - "learning_rate": 5.950777355425511e-06, - "loss": 0.8531, + "learning_rate": 5.97711895073454e-06, + "loss": 0.7931, "step": 22685 }, { - "epoch": 0.6437570942111237, + "epoch": 0.6428632151661992, "grad_norm": 0.0, - "learning_rate": 5.949937012472897e-06, - "loss": 0.8563, + "learning_rate": 5.976278719561581e-06, + "loss": 0.7885, "step": 22686 }, { - "epoch": 0.6437854710556186, + "epoch": 0.6428915526084615, "grad_norm": 0.0, - "learning_rate": 5.949096703731345e-06, - "loss": 0.8883, + "learning_rate": 5.97543852228192e-06, + "loss": 0.7956, "step": 22687 }, { - "epoch": 0.6438138479001135, + "epoch": 0.642919890050724, "grad_norm": 0.0, - "learning_rate": 5.948256429207957e-06, - "loss": 0.7917, + "learning_rate": 5.974598358902639e-06, + "loss": 0.8537, "step": 22688 }, { - "epoch": 0.6438422247446084, + "epoch": 0.6429482274929865, "grad_norm": 0.0, - "learning_rate": 5.947416188909829e-06, - "loss": 0.7715, + "learning_rate": 5.973758229430806e-06, + "loss": 0.8214, "step": 22689 }, { - "epoch": 0.6438706015891033, + "epoch": 0.6429765649352489, "grad_norm": 0.0, - "learning_rate": 5.946575982844058e-06, - "loss": 0.7678, + "learning_rate": 5.972918133873506e-06, + "loss": 0.8855, "step": 22690 }, { - "epoch": 0.6438989784335982, + "epoch": 0.6430049023775114, "grad_norm": 0.0, - "learning_rate": 5.945735811017742e-06, - "loss": 0.8916, + "learning_rate": 5.972078072237808e-06, + "loss": 0.8312, "step": 22691 }, { - "epoch": 0.6439273552780931, + "epoch": 0.6430332398197739, "grad_norm": 0.0, - "learning_rate": 5.944895673437978e-06, - "loss": 0.8324, + "learning_rate": 5.971238044530794e-06, + "loss": 0.7873, "step": 22692 }, { - "epoch": 0.6439557321225879, + "epoch": 0.6430615772620363, "grad_norm": 0.0, - "learning_rate": 5.944055570111862e-06, - "loss": 0.7987, + "learning_rate": 5.97039805075954e-06, + "loss": 0.9144, "step": 22693 }, { - "epoch": 0.6439841089670829, + "epoch": 0.6430899147042988, "grad_norm": 0.0, - "learning_rate": 5.943215501046492e-06, - "loss": 0.8732, + "learning_rate": 5.969558090931118e-06, + "loss": 0.7454, "step": 22694 }, { - "epoch": 0.6440124858115778, + "epoch": 0.6431182521465613, "grad_norm": 0.0, - "learning_rate": 5.942375466248964e-06, - "loss": 0.8884, + "learning_rate": 5.968718165052604e-06, + "loss": 0.7321, "step": 22695 }, { - "epoch": 0.6440408626560726, + "epoch": 0.6431465895888238, "grad_norm": 0.0, - "learning_rate": 5.941535465726369e-06, - "loss": 0.8082, + "learning_rate": 5.967878273131078e-06, + "loss": 0.8087, "step": 22696 }, { - "epoch": 0.6440692395005675, + "epoch": 0.6431749270310861, "grad_norm": 0.0, - "learning_rate": 5.940695499485809e-06, - "loss": 0.8354, + "learning_rate": 5.967038415173605e-06, + "loss": 0.7836, "step": 22697 }, { - "epoch": 0.6440976163450625, + "epoch": 0.6432032644733486, "grad_norm": 0.0, - "learning_rate": 5.9398555675343736e-06, - "loss": 0.9352, + "learning_rate": 5.966198591187269e-06, + "loss": 0.8678, "step": 22698 }, { - "epoch": 0.6441259931895573, + "epoch": 0.6432316019156111, "grad_norm": 0.0, - "learning_rate": 5.9390156698791645e-06, - "loss": 0.799, + "learning_rate": 5.965358801179138e-06, + "loss": 0.7941, "step": 22699 }, { - "epoch": 0.6441543700340522, + "epoch": 0.6432599393578735, "grad_norm": 0.0, - "learning_rate": 5.938175806527271e-06, - "loss": 0.9588, + "learning_rate": 5.964519045156286e-06, + "loss": 0.8523, "step": 22700 }, { - "epoch": 0.6441827468785472, + "epoch": 0.643288276800136, "grad_norm": 0.0, - "learning_rate": 5.937335977485789e-06, - "loss": 0.7784, + "learning_rate": 5.963679323125795e-06, + "loss": 0.8796, "step": 22701 }, { - "epoch": 0.644211123723042, + "epoch": 0.6433166142423985, "grad_norm": 0.0, - "learning_rate": 5.9364961827618136e-06, - "loss": 0.8641, + "learning_rate": 5.962839635094726e-06, + "loss": 0.9088, "step": 22702 }, { - "epoch": 0.6442395005675369, + "epoch": 0.643344951684661, "grad_norm": 0.0, - "learning_rate": 5.935656422362438e-06, - "loss": 0.8498, + "learning_rate": 5.961999981070159e-06, + "loss": 0.9044, "step": 22703 }, { - "epoch": 0.6442678774120317, + "epoch": 0.6433732891269234, "grad_norm": 0.0, - "learning_rate": 5.934816696294753e-06, - "loss": 0.9305, + "learning_rate": 5.961160361059168e-06, + "loss": 0.7755, "step": 22704 }, { - "epoch": 0.6442962542565267, + "epoch": 0.6434016265691859, "grad_norm": 0.0, - "learning_rate": 5.933977004565856e-06, - "loss": 0.9663, + "learning_rate": 5.960320775068821e-06, + "loss": 0.8923, "step": 22705 }, { - "epoch": 0.6443246311010216, + "epoch": 0.6434299640114484, "grad_norm": 0.0, - "learning_rate": 5.933137347182838e-06, - "loss": 0.8686, + "learning_rate": 5.959481223106196e-06, + "loss": 0.6872, "step": 22706 }, { - "epoch": 0.6443530079455164, + "epoch": 0.6434583014537107, "grad_norm": 0.0, - "learning_rate": 5.932297724152791e-06, - "loss": 0.8478, + "learning_rate": 5.958641705178356e-06, + "loss": 0.8778, "step": 22707 }, { - "epoch": 0.6443813847900114, + "epoch": 0.6434866388959732, "grad_norm": 0.0, - "learning_rate": 5.931458135482808e-06, - "loss": 1.0003, + "learning_rate": 5.957802221292379e-06, + "loss": 0.7938, "step": 22708 }, { - "epoch": 0.6444097616345063, + "epoch": 0.6435149763382357, "grad_norm": 0.0, - "learning_rate": 5.9306185811799835e-06, - "loss": 0.8179, + "learning_rate": 5.956962771455338e-06, + "loss": 0.8431, "step": 22709 }, { - "epoch": 0.6444381384790011, + "epoch": 0.6435433137804982, "grad_norm": 0.0, - "learning_rate": 5.929779061251402e-06, - "loss": 0.8833, + "learning_rate": 5.956123355674297e-06, + "loss": 0.8423, "step": 22710 }, { - "epoch": 0.6444665153234961, + "epoch": 0.6435716512227606, "grad_norm": 0.0, - "learning_rate": 5.928939575704163e-06, - "loss": 0.99, + "learning_rate": 5.955283973956332e-06, + "loss": 0.7718, "step": 22711 }, { - "epoch": 0.6444948921679909, + "epoch": 0.6435999886650231, "grad_norm": 0.0, - "learning_rate": 5.9281001245453555e-06, - "loss": 0.7844, + "learning_rate": 5.954444626308513e-06, + "loss": 0.9233, "step": 22712 }, { - "epoch": 0.6445232690124858, + "epoch": 0.6436283261072856, "grad_norm": 0.0, - "learning_rate": 5.927260707782065e-06, - "loss": 0.8734, + "learning_rate": 5.953605312737907e-06, + "loss": 0.836, "step": 22713 }, { - "epoch": 0.6445516458569807, + "epoch": 0.643656663549548, "grad_norm": 0.0, - "learning_rate": 5.926421325421385e-06, - "loss": 0.7433, + "learning_rate": 5.95276603325159e-06, + "loss": 0.8731, "step": 22714 }, { - "epoch": 0.6445800227014756, + "epoch": 0.6436850009918105, "grad_norm": 0.0, - "learning_rate": 5.925581977470412e-06, - "loss": 0.9075, + "learning_rate": 5.9519267878566235e-06, + "loss": 0.8175, "step": 22715 }, { - "epoch": 0.6446083995459705, + "epoch": 0.643713338434073, "grad_norm": 0.0, - "learning_rate": 5.924742663936232e-06, - "loss": 0.8409, + "learning_rate": 5.951087576560081e-06, + "loss": 0.8231, "step": 22716 }, { - "epoch": 0.6446367763904653, + "epoch": 0.6437416758763354, "grad_norm": 0.0, - "learning_rate": 5.9239033848259305e-06, - "loss": 0.8963, + "learning_rate": 5.950248399369034e-06, + "loss": 0.8302, "step": 22717 }, { - "epoch": 0.6446651532349603, + "epoch": 0.6437700133185978, "grad_norm": 0.0, - "learning_rate": 5.923064140146603e-06, - "loss": 0.8644, + "learning_rate": 5.949409256290546e-06, + "loss": 0.9914, "step": 22718 }, { - "epoch": 0.6446935300794552, + "epoch": 0.6437983507608603, "grad_norm": 0.0, - "learning_rate": 5.922224929905336e-06, - "loss": 0.9103, + "learning_rate": 5.9485701473316925e-06, + "loss": 0.7914, "step": 22719 }, { - "epoch": 0.64472190692395, + "epoch": 0.6438266882031228, "grad_norm": 0.0, - "learning_rate": 5.921385754109214e-06, - "loss": 0.7641, + "learning_rate": 5.947731072499533e-06, + "loss": 0.818, "step": 22720 }, { - "epoch": 0.6447502837684449, + "epoch": 0.6438550256453852, "grad_norm": 0.0, - "learning_rate": 5.9205466127653345e-06, - "loss": 0.8038, + "learning_rate": 5.946892031801139e-06, + "loss": 0.865, "step": 22721 }, { - "epoch": 0.6447786606129399, + "epoch": 0.6438833630876477, "grad_norm": 0.0, - "learning_rate": 5.91970750588078e-06, - "loss": 0.8529, + "learning_rate": 5.946053025243584e-06, + "loss": 0.7211, "step": 22722 }, { - "epoch": 0.6448070374574347, + "epoch": 0.6439117005299102, "grad_norm": 0.0, - "learning_rate": 5.918868433462639e-06, - "loss": 0.872, + "learning_rate": 5.945214052833923e-06, + "loss": 0.8202, "step": 22723 }, { - "epoch": 0.6448354143019296, + "epoch": 0.6439400379721726, "grad_norm": 0.0, - "learning_rate": 5.918029395518001e-06, - "loss": 0.878, + "learning_rate": 5.944375114579232e-06, + "loss": 0.746, "step": 22724 }, { - "epoch": 0.6448637911464246, + "epoch": 0.6439683754144351, "grad_norm": 0.0, - "learning_rate": 5.917190392053953e-06, - "loss": 0.8034, + "learning_rate": 5.943536210486577e-06, + "loss": 0.7875, "step": 22725 }, { - "epoch": 0.6448921679909194, + "epoch": 0.6439967128566976, "grad_norm": 0.0, - "learning_rate": 5.91635142307758e-06, - "loss": 0.8135, + "learning_rate": 5.942697340563019e-06, + "loss": 0.8196, "step": 22726 }, { - "epoch": 0.6449205448354143, + "epoch": 0.6440250502989601, "grad_norm": 0.0, - "learning_rate": 5.915512488595968e-06, - "loss": 0.838, + "learning_rate": 5.941858504815634e-06, + "loss": 0.9056, "step": 22727 }, { - "epoch": 0.6449489216799092, + "epoch": 0.6440533877412224, "grad_norm": 0.0, - "learning_rate": 5.914673588616209e-06, - "loss": 0.8464, + "learning_rate": 5.9410197032514785e-06, + "loss": 0.8107, "step": 22728 }, { - "epoch": 0.6449772985244041, + "epoch": 0.6440817251834849, "grad_norm": 0.0, - "learning_rate": 5.9138347231453795e-06, - "loss": 0.8301, + "learning_rate": 5.94018093587762e-06, + "loss": 0.9, "step": 22729 }, { - "epoch": 0.645005675368899, + "epoch": 0.6441100626257474, "grad_norm": 0.0, - "learning_rate": 5.912995892190578e-06, - "loss": 0.7507, + "learning_rate": 5.939342202701126e-06, + "loss": 0.8747, "step": 22730 }, { - "epoch": 0.6450340522133938, + "epoch": 0.6441384000680098, "grad_norm": 0.0, - "learning_rate": 5.912157095758881e-06, - "loss": 0.7786, + "learning_rate": 5.93850350372906e-06, + "loss": 0.8754, "step": 22731 }, { - "epoch": 0.6450624290578888, + "epoch": 0.6441667375102723, "grad_norm": 0.0, - "learning_rate": 5.9113183338573786e-06, - "loss": 0.777, + "learning_rate": 5.937664838968487e-06, + "loss": 0.9167, "step": 22732 }, { - "epoch": 0.6450908059023837, + "epoch": 0.6441950749525348, "grad_norm": 0.0, - "learning_rate": 5.910479606493156e-06, - "loss": 0.7812, + "learning_rate": 5.936826208426475e-06, + "loss": 0.8706, "step": 22733 }, { - "epoch": 0.6451191827468785, + "epoch": 0.6442234123947972, "grad_norm": 0.0, - "learning_rate": 5.909640913673291e-06, - "loss": 0.8961, + "learning_rate": 5.935987612110081e-06, + "loss": 0.8357, "step": 22734 }, { - "epoch": 0.6451475595913735, + "epoch": 0.6442517498370597, "grad_norm": 0.0, - "learning_rate": 5.908802255404877e-06, - "loss": 0.7137, + "learning_rate": 5.935149050026374e-06, + "loss": 0.9044, "step": 22735 }, { - "epoch": 0.6451759364358683, + "epoch": 0.6442800872793222, "grad_norm": 0.0, - "learning_rate": 5.907963631694994e-06, - "loss": 0.8022, + "learning_rate": 5.934310522182415e-06, + "loss": 0.9314, "step": 22736 }, { - "epoch": 0.6452043132803632, + "epoch": 0.6443084247215847, "grad_norm": 0.0, - "learning_rate": 5.907125042550723e-06, - "loss": 0.8466, + "learning_rate": 5.93347202858527e-06, + "loss": 0.7881, "step": 22737 }, { - "epoch": 0.6452326901248581, + "epoch": 0.644336762163847, "grad_norm": 0.0, - "learning_rate": 5.9062864879791535e-06, - "loss": 0.8698, + "learning_rate": 5.932633569242e-06, + "loss": 0.9097, "step": 22738 }, { - "epoch": 0.645261066969353, + "epoch": 0.6443650996061095, "grad_norm": 0.0, - "learning_rate": 5.905447967987368e-06, - "loss": 0.8527, + "learning_rate": 5.9317951441596656e-06, + "loss": 0.9255, "step": 22739 }, { - "epoch": 0.6452894438138479, + "epoch": 0.644393437048372, "grad_norm": 0.0, - "learning_rate": 5.904609482582443e-06, - "loss": 0.8413, + "learning_rate": 5.930956753345332e-06, + "loss": 0.876, "step": 22740 }, { - "epoch": 0.6453178206583428, + "epoch": 0.6444217744906344, "grad_norm": 0.0, - "learning_rate": 5.903771031771468e-06, - "loss": 0.934, + "learning_rate": 5.930118396806064e-06, + "loss": 0.9145, "step": 22741 }, { - "epoch": 0.6453461975028377, + "epoch": 0.6444501119328969, "grad_norm": 0.0, - "learning_rate": 5.9029326155615245e-06, - "loss": 0.9124, + "learning_rate": 5.929280074548915e-06, + "loss": 0.9534, "step": 22742 }, { - "epoch": 0.6453745743473326, + "epoch": 0.6444784493751594, "grad_norm": 0.0, - "learning_rate": 5.9020942339596895e-06, - "loss": 0.8289, + "learning_rate": 5.928441786580957e-06, + "loss": 0.8628, "step": 22743 }, { - "epoch": 0.6454029511918274, + "epoch": 0.6445067868174219, "grad_norm": 0.0, - "learning_rate": 5.901255886973051e-06, - "loss": 0.9, + "learning_rate": 5.927603532909241e-06, + "loss": 0.8197, "step": 22744 }, { - "epoch": 0.6454313280363224, + "epoch": 0.6445351242596843, "grad_norm": 0.0, - "learning_rate": 5.900417574608687e-06, - "loss": 0.9222, + "learning_rate": 5.926765313540832e-06, + "loss": 0.8092, "step": 22745 }, { - "epoch": 0.6454597048808173, + "epoch": 0.6445634617019468, "grad_norm": 0.0, - "learning_rate": 5.899579296873682e-06, - "loss": 0.9125, + "learning_rate": 5.9259271284827965e-06, + "loss": 0.8627, "step": 22746 }, { - "epoch": 0.6454880817253121, + "epoch": 0.6445917991442093, "grad_norm": 0.0, - "learning_rate": 5.898741053775115e-06, - "loss": 0.9691, + "learning_rate": 5.925088977742186e-06, + "loss": 0.8521, "step": 22747 }, { - "epoch": 0.645516458569807, + "epoch": 0.6446201365864717, "grad_norm": 0.0, - "learning_rate": 5.897902845320065e-06, - "loss": 0.7818, + "learning_rate": 5.924250861326066e-06, + "loss": 0.8177, "step": 22748 }, { - "epoch": 0.645544835414302, + "epoch": 0.6446484740287342, "grad_norm": 0.0, - "learning_rate": 5.897064671515616e-06, - "loss": 0.7422, + "learning_rate": 5.923412779241493e-06, + "loss": 0.9145, "step": 22749 }, { - "epoch": 0.6455732122587968, + "epoch": 0.6446768114709966, "grad_norm": 0.0, - "learning_rate": 5.896226532368847e-06, - "loss": 0.7868, + "learning_rate": 5.922574731495528e-06, + "loss": 0.8644, "step": 22750 }, { - "epoch": 0.6456015891032917, + "epoch": 0.6447051489132591, "grad_norm": 0.0, - "learning_rate": 5.895388427886833e-06, - "loss": 0.8779, + "learning_rate": 5.921736718095232e-06, + "loss": 0.8698, "step": 22751 }, { - "epoch": 0.6456299659477867, + "epoch": 0.6447334863555215, "grad_norm": 0.0, - "learning_rate": 5.894550358076661e-06, - "loss": 0.8754, + "learning_rate": 5.92089873904766e-06, + "loss": 0.7928, "step": 22752 }, { - "epoch": 0.6456583427922815, + "epoch": 0.644761823797784, "grad_norm": 0.0, - "learning_rate": 5.893712322945406e-06, - "loss": 0.8523, + "learning_rate": 5.920060794359872e-06, + "loss": 0.859, "step": 22753 }, { - "epoch": 0.6456867196367764, + "epoch": 0.6447901612400465, "grad_norm": 0.0, - "learning_rate": 5.8928743225001465e-06, - "loss": 0.8131, + "learning_rate": 5.919222884038932e-06, + "loss": 0.8843, "step": 22754 }, { - "epoch": 0.6457150964812712, + "epoch": 0.6448184986823089, "grad_norm": 0.0, - "learning_rate": 5.892036356747963e-06, - "loss": 0.7874, + "learning_rate": 5.9183850080918885e-06, + "loss": 0.8425, "step": 22755 }, { - "epoch": 0.6457434733257662, + "epoch": 0.6448468361245714, "grad_norm": 0.0, - "learning_rate": 5.891198425695934e-06, - "loss": 0.8792, + "learning_rate": 5.917547166525806e-06, + "loss": 0.9469, "step": 22756 }, { - "epoch": 0.6457718501702611, + "epoch": 0.6448751735668339, "grad_norm": 0.0, - "learning_rate": 5.890360529351133e-06, - "loss": 0.8485, + "learning_rate": 5.916709359347737e-06, + "loss": 0.7899, "step": 22757 }, { - "epoch": 0.6458002270147559, + "epoch": 0.6449035110090963, "grad_norm": 0.0, - "learning_rate": 5.889522667720647e-06, - "loss": 0.8466, + "learning_rate": 5.915871586564741e-06, + "loss": 0.8485, "step": 22758 }, { - "epoch": 0.6458286038592509, + "epoch": 0.6449318484513588, "grad_norm": 0.0, - "learning_rate": 5.888684840811545e-06, - "loss": 0.8485, + "learning_rate": 5.91503384818388e-06, + "loss": 0.7884, "step": 22759 }, { - "epoch": 0.6458569807037458, + "epoch": 0.6449601858936213, "grad_norm": 0.0, - "learning_rate": 5.8878470486309025e-06, - "loss": 0.8563, + "learning_rate": 5.914196144212201e-06, + "loss": 0.9317, "step": 22760 }, { - "epoch": 0.6458853575482406, + "epoch": 0.6449885233358837, "grad_norm": 0.0, - "learning_rate": 5.887009291185803e-06, - "loss": 0.8274, + "learning_rate": 5.913358474656766e-06, + "loss": 0.7666, "step": 22761 }, { - "epoch": 0.6459137343927355, + "epoch": 0.6450168607781461, "grad_norm": 0.0, - "learning_rate": 5.8861715684833245e-06, - "loss": 0.8375, + "learning_rate": 5.9125208395246315e-06, + "loss": 0.819, "step": 22762 }, { - "epoch": 0.6459421112372304, + "epoch": 0.6450451982204086, "grad_norm": 0.0, - "learning_rate": 5.885333880530539e-06, - "loss": 0.7975, + "learning_rate": 5.911683238822851e-06, + "loss": 0.8214, "step": 22763 }, { - "epoch": 0.6459704880817253, + "epoch": 0.6450735356626711, "grad_norm": 0.0, - "learning_rate": 5.8844962273345205e-06, - "loss": 0.9034, + "learning_rate": 5.910845672558483e-06, + "loss": 0.8681, "step": 22764 }, { - "epoch": 0.6459988649262202, + "epoch": 0.6451018731049335, "grad_norm": 0.0, - "learning_rate": 5.883658608902349e-06, - "loss": 0.8309, + "learning_rate": 5.910008140738578e-06, + "loss": 0.9241, "step": 22765 }, { - "epoch": 0.6460272417707151, + "epoch": 0.645130210547196, "grad_norm": 0.0, - "learning_rate": 5.8828210252411e-06, - "loss": 0.8124, + "learning_rate": 5.909170643370192e-06, + "loss": 0.8841, "step": 22766 }, { - "epoch": 0.64605561861521, + "epoch": 0.6451585479894585, "grad_norm": 0.0, - "learning_rate": 5.881983476357846e-06, - "loss": 0.8743, + "learning_rate": 5.9083331804603865e-06, + "loss": 0.8526, "step": 22767 }, { - "epoch": 0.6460839954597049, + "epoch": 0.645186885431721, "grad_norm": 0.0, - "learning_rate": 5.8811459622596605e-06, - "loss": 0.8555, + "learning_rate": 5.907495752016203e-06, + "loss": 0.7688, "step": 22768 }, { - "epoch": 0.6461123723041998, + "epoch": 0.6452152228739834, "grad_norm": 0.0, - "learning_rate": 5.880308482953623e-06, - "loss": 0.7437, + "learning_rate": 5.906658358044704e-06, + "loss": 0.8059, "step": 22769 }, { - "epoch": 0.6461407491486947, + "epoch": 0.6452435603162459, "grad_norm": 0.0, - "learning_rate": 5.879471038446804e-06, - "loss": 0.811, + "learning_rate": 5.905820998552944e-06, + "loss": 0.8459, "step": 22770 }, { - "epoch": 0.6461691259931895, + "epoch": 0.6452718977585084, "grad_norm": 0.0, - "learning_rate": 5.878633628746276e-06, - "loss": 0.9419, + "learning_rate": 5.9049836735479725e-06, + "loss": 0.8045, "step": 22771 }, { - "epoch": 0.6461975028376844, + "epoch": 0.6453002352007707, "grad_norm": 0.0, - "learning_rate": 5.877796253859118e-06, - "loss": 0.7723, + "learning_rate": 5.904146383036849e-06, + "loss": 0.8515, "step": 22772 }, { - "epoch": 0.6462258796821794, + "epoch": 0.6453285726430332, "grad_norm": 0.0, - "learning_rate": 5.876958913792401e-06, - "loss": 0.8994, + "learning_rate": 5.903309127026615e-06, + "loss": 0.8468, "step": 22773 }, { - "epoch": 0.6462542565266742, + "epoch": 0.6453569100852957, "grad_norm": 0.0, - "learning_rate": 5.876121608553194e-06, - "loss": 0.9119, + "learning_rate": 5.902471905524331e-06, + "loss": 0.8117, "step": 22774 }, { - "epoch": 0.6462826333711691, + "epoch": 0.6453852475275582, "grad_norm": 0.0, - "learning_rate": 5.875284338148571e-06, - "loss": 0.8911, + "learning_rate": 5.901634718537048e-06, + "loss": 0.8751, "step": 22775 }, { - "epoch": 0.6463110102156641, + "epoch": 0.6454135849698206, "grad_norm": 0.0, - "learning_rate": 5.874447102585612e-06, - "loss": 0.7552, + "learning_rate": 5.900797566071818e-06, + "loss": 0.8468, "step": 22776 }, { - "epoch": 0.6463393870601589, + "epoch": 0.6454419224120831, "grad_norm": 0.0, - "learning_rate": 5.873609901871382e-06, - "loss": 0.8364, + "learning_rate": 5.8999604481356955e-06, + "loss": 0.81, "step": 22777 }, { - "epoch": 0.6463677639046538, + "epoch": 0.6454702598543456, "grad_norm": 0.0, - "learning_rate": 5.872772736012955e-06, - "loss": 0.8356, + "learning_rate": 5.899123364735724e-06, + "loss": 0.9099, "step": 22778 }, { - "epoch": 0.6463961407491486, + "epoch": 0.645498597296608, "grad_norm": 0.0, - "learning_rate": 5.871935605017402e-06, - "loss": 0.8656, + "learning_rate": 5.8982863158789605e-06, + "loss": 0.8295, "step": 22779 }, { - "epoch": 0.6464245175936436, + "epoch": 0.6455269347388705, "grad_norm": 0.0, - "learning_rate": 5.871098508891795e-06, - "loss": 0.8923, + "learning_rate": 5.8974493015724576e-06, + "loss": 0.9638, "step": 22780 }, { - "epoch": 0.6464528944381385, + "epoch": 0.645555272181133, "grad_norm": 0.0, - "learning_rate": 5.870261447643204e-06, - "loss": 0.7957, + "learning_rate": 5.896612321823258e-06, + "loss": 0.8484, "step": 22781 }, { - "epoch": 0.6464812712826333, + "epoch": 0.6455836096233953, "grad_norm": 0.0, - "learning_rate": 5.8694244212787e-06, - "loss": 0.8238, + "learning_rate": 5.895775376638417e-06, + "loss": 0.8275, "step": 22782 }, { - "epoch": 0.6465096481271283, + "epoch": 0.6456119470656578, "grad_norm": 0.0, - "learning_rate": 5.868587429805355e-06, - "loss": 0.7936, + "learning_rate": 5.894938466024986e-06, + "loss": 0.9358, "step": 22783 }, { - "epoch": 0.6465380249716232, + "epoch": 0.6456402845079203, "grad_norm": 0.0, - "learning_rate": 5.867750473230236e-06, - "loss": 0.8994, + "learning_rate": 5.894101589990011e-06, + "loss": 0.8509, "step": 22784 }, { - "epoch": 0.646566401816118, + "epoch": 0.6456686219501828, "grad_norm": 0.0, - "learning_rate": 5.866913551560416e-06, - "loss": 0.8843, + "learning_rate": 5.893264748540548e-06, + "loss": 0.8495, "step": 22785 }, { - "epoch": 0.646594778660613, + "epoch": 0.6456969593924452, "grad_norm": 0.0, - "learning_rate": 5.866076664802962e-06, - "loss": 0.8086, + "learning_rate": 5.892427941683636e-06, + "loss": 0.6925, "step": 22786 }, { - "epoch": 0.6466231555051078, + "epoch": 0.6457252968347077, "grad_norm": 0.0, - "learning_rate": 5.865239812964944e-06, - "loss": 0.8217, + "learning_rate": 5.8915911694263296e-06, + "loss": 0.8377, "step": 22787 }, { - "epoch": 0.6466515323496027, + "epoch": 0.6457536342769702, "grad_norm": 0.0, - "learning_rate": 5.864402996053432e-06, - "loss": 0.7903, + "learning_rate": 5.890754431775676e-06, + "loss": 0.8547, "step": 22788 }, { - "epoch": 0.6466799091940976, + "epoch": 0.6457819717192326, "grad_norm": 0.0, - "learning_rate": 5.863566214075495e-06, - "loss": 0.8319, + "learning_rate": 5.889917728738725e-06, + "loss": 0.8532, "step": 22789 }, { - "epoch": 0.6467082860385925, + "epoch": 0.6458103091614951, "grad_norm": 0.0, - "learning_rate": 5.862729467038195e-06, - "loss": 0.7774, + "learning_rate": 5.889081060322521e-06, + "loss": 0.8059, "step": 22790 }, { - "epoch": 0.6467366628830874, + "epoch": 0.6458386466037576, "grad_norm": 0.0, - "learning_rate": 5.8618927549486095e-06, - "loss": 0.8859, + "learning_rate": 5.888244426534118e-06, + "loss": 0.8423, "step": 22791 }, { - "epoch": 0.6467650397275823, + "epoch": 0.64586698404602, "grad_norm": 0.0, - "learning_rate": 5.861056077813799e-06, - "loss": 0.9761, + "learning_rate": 5.887407827380556e-06, + "loss": 0.7263, "step": 22792 }, { - "epoch": 0.6467934165720772, + "epoch": 0.6458953214882824, "grad_norm": 0.0, - "learning_rate": 5.860219435640837e-06, - "loss": 0.7805, + "learning_rate": 5.886571262868888e-06, + "loss": 0.9137, "step": 22793 }, { - "epoch": 0.6468217934165721, + "epoch": 0.6459236589305449, "grad_norm": 0.0, - "learning_rate": 5.859382828436788e-06, - "loss": 0.8508, + "learning_rate": 5.8857347330061545e-06, + "loss": 0.8671, "step": 22794 }, { - "epoch": 0.646850170261067, + "epoch": 0.6459519963728074, "grad_norm": 0.0, - "learning_rate": 5.858546256208715e-06, - "loss": 0.8134, + "learning_rate": 5.884898237799405e-06, + "loss": 0.8964, "step": 22795 }, { - "epoch": 0.6468785471055618, + "epoch": 0.6459803338150698, "grad_norm": 0.0, - "learning_rate": 5.857709718963691e-06, - "loss": 0.9216, + "learning_rate": 5.884061777255688e-06, + "loss": 0.9017, "step": 22796 }, { - "epoch": 0.6469069239500568, + "epoch": 0.6460086712573323, "grad_norm": 0.0, - "learning_rate": 5.8568732167087786e-06, - "loss": 0.8506, + "learning_rate": 5.883225351382044e-06, + "loss": 0.9044, "step": 22797 }, { - "epoch": 0.6469353007945516, + "epoch": 0.6460370086995948, "grad_norm": 0.0, - "learning_rate": 5.856036749451043e-06, - "loss": 0.8203, + "learning_rate": 5.882388960185522e-06, + "loss": 0.9329, "step": 22798 }, { - "epoch": 0.6469636776390465, + "epoch": 0.6460653461418573, "grad_norm": 0.0, - "learning_rate": 5.855200317197552e-06, - "loss": 0.8809, + "learning_rate": 5.881552603673171e-06, + "loss": 0.8552, "step": 22799 }, { - "epoch": 0.6469920544835415, + "epoch": 0.6460936835841197, "grad_norm": 0.0, - "learning_rate": 5.854363919955371e-06, - "loss": 0.831, + "learning_rate": 5.880716281852028e-06, + "loss": 0.7964, "step": 22800 }, { - "epoch": 0.6470204313280363, + "epoch": 0.6461220210263822, "grad_norm": 0.0, - "learning_rate": 5.853527557731563e-06, - "loss": 0.9261, + "learning_rate": 5.879879994729143e-06, + "loss": 0.8964, "step": 22801 }, { - "epoch": 0.6470488081725312, + "epoch": 0.6461503584686447, "grad_norm": 0.0, - "learning_rate": 5.852691230533196e-06, - "loss": 0.9099, + "learning_rate": 5.879043742311556e-06, + "loss": 0.7261, "step": 22802 }, { - "epoch": 0.6470771850170262, + "epoch": 0.646178695910907, "grad_norm": 0.0, - "learning_rate": 5.8518549383673316e-06, - "loss": 0.8016, + "learning_rate": 5.878207524606316e-06, + "loss": 0.8391, "step": 22803 }, { - "epoch": 0.647105561861521, + "epoch": 0.6462070333531695, "grad_norm": 0.0, - "learning_rate": 5.851018681241034e-06, - "loss": 0.8732, + "learning_rate": 5.877371341620468e-06, + "loss": 0.8768, "step": 22804 }, { - "epoch": 0.6471339387060159, + "epoch": 0.646235370795432, "grad_norm": 0.0, - "learning_rate": 5.850182459161369e-06, - "loss": 0.9015, + "learning_rate": 5.8765351933610474e-06, + "loss": 0.8396, "step": 22805 }, { - "epoch": 0.6471623155505107, + "epoch": 0.6462637082376944, "grad_norm": 0.0, - "learning_rate": 5.849346272135397e-06, - "loss": 0.8553, + "learning_rate": 5.875699079835107e-06, + "loss": 0.7616, "step": 22806 }, { - "epoch": 0.6471906923950057, + "epoch": 0.6462920456799569, "grad_norm": 0.0, - "learning_rate": 5.848510120170185e-06, - "loss": 0.8821, + "learning_rate": 5.8748630010496795e-06, + "loss": 0.8158, "step": 22807 }, { - "epoch": 0.6472190692395006, + "epoch": 0.6463203831222194, "grad_norm": 0.0, - "learning_rate": 5.847674003272798e-06, - "loss": 0.924, + "learning_rate": 5.874026957011814e-06, + "loss": 0.8793, "step": 22808 }, { - "epoch": 0.6472474460839954, + "epoch": 0.6463487205644819, "grad_norm": 0.0, - "learning_rate": 5.84683792145029e-06, - "loss": 0.8509, + "learning_rate": 5.873190947728552e-06, + "loss": 0.936, "step": 22809 }, { - "epoch": 0.6472758229284904, + "epoch": 0.6463770580067443, "grad_norm": 0.0, - "learning_rate": 5.846001874709733e-06, - "loss": 0.8868, + "learning_rate": 5.872354973206934e-06, + "loss": 0.7799, "step": 22810 }, { - "epoch": 0.6473041997729853, + "epoch": 0.6464053954490068, "grad_norm": 0.0, - "learning_rate": 5.845165863058184e-06, - "loss": 0.8455, + "learning_rate": 5.871519033454003e-06, + "loss": 0.8604, "step": 22811 }, { - "epoch": 0.6473325766174801, + "epoch": 0.6464337328912693, "grad_norm": 0.0, - "learning_rate": 5.844329886502704e-06, - "loss": 0.8338, + "learning_rate": 5.870683128476804e-06, + "loss": 0.8753, "step": 22812 }, { - "epoch": 0.647360953461975, + "epoch": 0.6464620703335316, "grad_norm": 0.0, - "learning_rate": 5.843493945050359e-06, - "loss": 0.8479, + "learning_rate": 5.8698472582823705e-06, + "loss": 0.8923, "step": 22813 }, { - "epoch": 0.64738933030647, + "epoch": 0.6464904077757941, "grad_norm": 0.0, - "learning_rate": 5.842658038708206e-06, - "loss": 0.8402, + "learning_rate": 5.869011422877748e-06, + "loss": 0.8031, "step": 22814 }, { - "epoch": 0.6474177071509648, + "epoch": 0.6465187452180566, "grad_norm": 0.0, - "learning_rate": 5.841822167483306e-06, - "loss": 0.8128, + "learning_rate": 5.868175622269976e-06, + "loss": 0.756, "step": 22815 }, { - "epoch": 0.6474460839954597, + "epoch": 0.6465470826603191, "grad_norm": 0.0, - "learning_rate": 5.840986331382724e-06, - "loss": 0.8785, + "learning_rate": 5.8673398564660946e-06, + "loss": 0.8988, "step": 22816 }, { - "epoch": 0.6474744608399546, + "epoch": 0.6465754201025815, "grad_norm": 0.0, - "learning_rate": 5.840150530413518e-06, - "loss": 0.8736, + "learning_rate": 5.866504125473149e-06, + "loss": 0.9424, "step": 22817 }, { - "epoch": 0.6475028376844495, + "epoch": 0.646603757544844, "grad_norm": 0.0, - "learning_rate": 5.839314764582743e-06, - "loss": 0.868, + "learning_rate": 5.8656684292981685e-06, + "loss": 0.8852, "step": 22818 }, { - "epoch": 0.6475312145289444, + "epoch": 0.6466320949871065, "grad_norm": 0.0, - "learning_rate": 5.838479033897464e-06, - "loss": 0.9304, + "learning_rate": 5.8648327679481984e-06, + "loss": 0.8169, "step": 22819 }, { - "epoch": 0.6475595913734393, + "epoch": 0.6466604324293689, "grad_norm": 0.0, - "learning_rate": 5.837643338364744e-06, - "loss": 0.9498, + "learning_rate": 5.863997141430282e-06, + "loss": 0.8064, "step": 22820 }, { - "epoch": 0.6475879682179342, + "epoch": 0.6466887698716314, "grad_norm": 0.0, - "learning_rate": 5.83680767799163e-06, - "loss": 0.7584, + "learning_rate": 5.8631615497514506e-06, + "loss": 0.8453, "step": 22821 }, { - "epoch": 0.647616345062429, + "epoch": 0.6467171073138939, "grad_norm": 0.0, - "learning_rate": 5.835972052785197e-06, - "loss": 0.7798, + "learning_rate": 5.8623259929187445e-06, + "loss": 0.89, "step": 22822 }, { - "epoch": 0.6476447219069239, + "epoch": 0.6467454447561564, "grad_norm": 0.0, - "learning_rate": 5.835136462752491e-06, - "loss": 0.8827, + "learning_rate": 5.861490470939204e-06, + "loss": 0.8572, "step": 22823 }, { - "epoch": 0.6476730987514189, + "epoch": 0.6467737821984187, "grad_norm": 0.0, - "learning_rate": 5.834300907900574e-06, - "loss": 0.8395, + "learning_rate": 5.860654983819865e-06, + "loss": 0.9245, "step": 22824 }, { - "epoch": 0.6477014755959137, + "epoch": 0.6468021196406812, "grad_norm": 0.0, - "learning_rate": 5.833465388236509e-06, - "loss": 0.8581, + "learning_rate": 5.85981953156777e-06, + "loss": 0.8912, "step": 22825 }, { - "epoch": 0.6477298524404086, + "epoch": 0.6468304570829437, "grad_norm": 0.0, - "learning_rate": 5.832629903767345e-06, - "loss": 0.8143, + "learning_rate": 5.85898411418995e-06, + "loss": 0.7847, "step": 22826 }, { - "epoch": 0.6477582292849036, + "epoch": 0.6468587945252061, "grad_norm": 0.0, - "learning_rate": 5.8317944545001435e-06, - "loss": 0.7516, + "learning_rate": 5.858148731693445e-06, + "loss": 0.9228, "step": 22827 }, { - "epoch": 0.6477866061293984, + "epoch": 0.6468871319674686, "grad_norm": 0.0, - "learning_rate": 5.830959040441966e-06, - "loss": 0.8085, + "learning_rate": 5.8573133840852895e-06, + "loss": 0.7625, "step": 22828 }, { - "epoch": 0.6478149829738933, + "epoch": 0.6469154694097311, "grad_norm": 0.0, - "learning_rate": 5.83012366159986e-06, - "loss": 0.7662, + "learning_rate": 5.856478071372521e-06, + "loss": 0.8477, "step": 22829 }, { - "epoch": 0.6478433598183881, + "epoch": 0.6469438068519935, "grad_norm": 0.0, - "learning_rate": 5.829288317980888e-06, - "loss": 0.8085, + "learning_rate": 5.855642793562182e-06, + "loss": 0.792, "step": 22830 }, { - "epoch": 0.6478717366628831, + "epoch": 0.646972144294256, "grad_norm": 0.0, - "learning_rate": 5.828453009592108e-06, - "loss": 0.8669, + "learning_rate": 5.854807550661296e-06, + "loss": 0.7133, "step": 22831 }, { - "epoch": 0.647900113507378, + "epoch": 0.6470004817365185, "grad_norm": 0.0, - "learning_rate": 5.82761773644057e-06, - "loss": 0.8123, + "learning_rate": 5.853972342676908e-06, + "loss": 0.837, "step": 22832 }, { - "epoch": 0.6479284903518728, + "epoch": 0.647028819178781, "grad_norm": 0.0, - "learning_rate": 5.826782498533332e-06, - "loss": 0.8626, + "learning_rate": 5.853137169616054e-06, + "loss": 0.8135, "step": 22833 }, { - "epoch": 0.6479568671963678, + "epoch": 0.6470571566210434, "grad_norm": 0.0, - "learning_rate": 5.825947295877455e-06, - "loss": 0.9269, + "learning_rate": 5.852302031485762e-06, + "loss": 0.7645, "step": 22834 }, { - "epoch": 0.6479852440408627, + "epoch": 0.6470854940633058, "grad_norm": 0.0, - "learning_rate": 5.825112128479982e-06, - "loss": 0.9161, + "learning_rate": 5.85146692829307e-06, + "loss": 0.7252, "step": 22835 }, { - "epoch": 0.6480136208853575, + "epoch": 0.6471138315055683, "grad_norm": 0.0, - "learning_rate": 5.8242769963479775e-06, - "loss": 0.8078, + "learning_rate": 5.850631860045013e-06, + "loss": 0.893, "step": 22836 }, { - "epoch": 0.6480419977298525, + "epoch": 0.6471421689478307, "grad_norm": 0.0, - "learning_rate": 5.823441899488491e-06, - "loss": 0.8059, + "learning_rate": 5.849796826748623e-06, + "loss": 0.7925, "step": 22837 }, { - "epoch": 0.6480703745743474, + "epoch": 0.6471705063900932, "grad_norm": 0.0, - "learning_rate": 5.8226068379085784e-06, - "loss": 0.8487, + "learning_rate": 5.848961828410939e-06, + "loss": 0.8354, "step": 22838 }, { - "epoch": 0.6480987514188422, + "epoch": 0.6471988438323557, "grad_norm": 0.0, - "learning_rate": 5.8217718116152985e-06, - "loss": 0.9036, + "learning_rate": 5.84812686503899e-06, + "loss": 0.8323, "step": 22839 }, { - "epoch": 0.6481271282633371, + "epoch": 0.6472271812746182, "grad_norm": 0.0, - "learning_rate": 5.820936820615697e-06, - "loss": 0.7987, + "learning_rate": 5.8472919366398075e-06, + "loss": 0.8162, "step": 22840 }, { - "epoch": 0.648155505107832, + "epoch": 0.6472555187168806, "grad_norm": 0.0, - "learning_rate": 5.820101864916827e-06, - "loss": 0.8936, + "learning_rate": 5.84645704322043e-06, + "loss": 0.9224, "step": 22841 }, { - "epoch": 0.6481838819523269, + "epoch": 0.6472838561591431, "grad_norm": 0.0, - "learning_rate": 5.819266944525752e-06, - "loss": 0.8513, + "learning_rate": 5.845622184787885e-06, + "loss": 0.8375, "step": 22842 }, { - "epoch": 0.6482122587968218, + "epoch": 0.6473121936014056, "grad_norm": 0.0, - "learning_rate": 5.818432059449511e-06, - "loss": 0.8467, + "learning_rate": 5.844787361349211e-06, + "loss": 0.7974, "step": 22843 }, { - "epoch": 0.6482406356413167, + "epoch": 0.647340531043668, "grad_norm": 0.0, - "learning_rate": 5.817597209695163e-06, - "loss": 0.7819, + "learning_rate": 5.843952572911432e-06, + "loss": 0.8499, "step": 22844 }, { - "epoch": 0.6482690124858116, + "epoch": 0.6473688684859304, "grad_norm": 0.0, - "learning_rate": 5.816762395269763e-06, - "loss": 0.8584, + "learning_rate": 5.843117819481584e-06, + "loss": 0.8436, "step": 22845 }, { - "epoch": 0.6482973893303065, + "epoch": 0.6473972059281929, "grad_norm": 0.0, - "learning_rate": 5.815927616180354e-06, - "loss": 0.8222, + "learning_rate": 5.8422831010667e-06, + "loss": 0.8195, "step": 22846 }, { - "epoch": 0.6483257661748013, + "epoch": 0.6474255433704554, "grad_norm": 0.0, - "learning_rate": 5.815092872433994e-06, - "loss": 0.9135, + "learning_rate": 5.84144841767381e-06, + "loss": 0.8544, "step": 22847 }, { - "epoch": 0.6483541430192963, + "epoch": 0.6474538808127178, "grad_norm": 0.0, - "learning_rate": 5.814258164037736e-06, - "loss": 0.8254, + "learning_rate": 5.840613769309942e-06, + "loss": 0.9016, "step": 22848 }, { - "epoch": 0.6483825198637911, + "epoch": 0.6474822182549803, "grad_norm": 0.0, - "learning_rate": 5.813423490998624e-06, - "loss": 1.0046, + "learning_rate": 5.839779155982131e-06, + "loss": 0.9496, "step": 22849 }, { - "epoch": 0.648410896708286, + "epoch": 0.6475105556972428, "grad_norm": 0.0, - "learning_rate": 5.812588853323713e-06, - "loss": 0.8512, + "learning_rate": 5.838944577697401e-06, + "loss": 0.8959, "step": 22850 }, { - "epoch": 0.648439273552781, + "epoch": 0.6475388931395052, "grad_norm": 0.0, - "learning_rate": 5.8117542510200545e-06, - "loss": 0.8314, + "learning_rate": 5.8381100344627915e-06, + "loss": 0.838, "step": 22851 }, { - "epoch": 0.6484676503972758, + "epoch": 0.6475672305817677, "grad_norm": 0.0, - "learning_rate": 5.810919684094689e-06, - "loss": 0.9276, + "learning_rate": 5.837275526285323e-06, + "loss": 0.7475, "step": 22852 }, { - "epoch": 0.6484960272417707, + "epoch": 0.6475955680240302, "grad_norm": 0.0, - "learning_rate": 5.810085152554681e-06, - "loss": 0.8615, + "learning_rate": 5.8364410531720285e-06, + "loss": 0.866, "step": 22853 }, { - "epoch": 0.6485244040862657, + "epoch": 0.6476239054662926, "grad_norm": 0.0, - "learning_rate": 5.809250656407067e-06, - "loss": 0.7124, + "learning_rate": 5.83560661512994e-06, + "loss": 0.7586, "step": 22854 }, { - "epoch": 0.6485527809307605, + "epoch": 0.647652242908555, "grad_norm": 0.0, - "learning_rate": 5.8084161956589016e-06, - "loss": 0.7984, + "learning_rate": 5.834772212166081e-06, + "loss": 0.8596, "step": 22855 }, { - "epoch": 0.6485811577752554, + "epoch": 0.6476805803508175, "grad_norm": 0.0, - "learning_rate": 5.807581770317237e-06, - "loss": 0.9153, + "learning_rate": 5.833937844287482e-06, + "loss": 0.9268, "step": 22856 }, { - "epoch": 0.6486095346197502, + "epoch": 0.64770891779308, "grad_norm": 0.0, - "learning_rate": 5.806747380389115e-06, - "loss": 0.9127, + "learning_rate": 5.833103511501171e-06, + "loss": 0.8298, "step": 22857 }, { - "epoch": 0.6486379114642452, + "epoch": 0.6477372552353424, "grad_norm": 0.0, - "learning_rate": 5.805913025881586e-06, - "loss": 0.856, + "learning_rate": 5.832269213814177e-06, + "loss": 0.9272, "step": 22858 }, { - "epoch": 0.6486662883087401, + "epoch": 0.6477655926776049, "grad_norm": 0.0, - "learning_rate": 5.805078706801701e-06, - "loss": 0.7581, + "learning_rate": 5.83143495123353e-06, + "loss": 0.8178, "step": 22859 }, { - "epoch": 0.6486946651532349, + "epoch": 0.6477939301198674, "grad_norm": 0.0, - "learning_rate": 5.804244423156502e-06, - "loss": 0.9723, + "learning_rate": 5.830600723766251e-06, + "loss": 0.846, "step": 22860 }, { - "epoch": 0.6487230419977299, + "epoch": 0.6478222675621298, "grad_norm": 0.0, - "learning_rate": 5.803410174953037e-06, - "loss": 0.9231, + "learning_rate": 5.82976653141937e-06, + "loss": 0.801, "step": 22861 }, { - "epoch": 0.6487514188422248, + "epoch": 0.6478506050043923, "grad_norm": 0.0, - "learning_rate": 5.80257596219836e-06, - "loss": 0.8357, + "learning_rate": 5.8289323741999185e-06, + "loss": 0.7885, "step": 22862 }, { - "epoch": 0.6487797956867196, + "epoch": 0.6478789424466548, "grad_norm": 0.0, - "learning_rate": 5.80174178489951e-06, - "loss": 0.8093, + "learning_rate": 5.8280982521149154e-06, + "loss": 0.8597, "step": 22863 }, { - "epoch": 0.6488081725312145, + "epoch": 0.6479072798889173, "grad_norm": 0.0, - "learning_rate": 5.800907643063534e-06, - "loss": 0.8275, + "learning_rate": 5.827264165171393e-06, + "loss": 0.7116, "step": 22864 }, { - "epoch": 0.6488365493757094, + "epoch": 0.6479356173311797, "grad_norm": 0.0, - "learning_rate": 5.8000735366974845e-06, - "loss": 0.8813, + "learning_rate": 5.826430113376369e-06, + "loss": 0.9195, "step": 22865 }, { - "epoch": 0.6488649262202043, + "epoch": 0.6479639547734422, "grad_norm": 0.0, - "learning_rate": 5.7992394658083996e-06, - "loss": 0.7861, + "learning_rate": 5.825596096736876e-06, + "loss": 0.7863, "step": 22866 }, { - "epoch": 0.6488933030646992, + "epoch": 0.6479922922157046, "grad_norm": 0.0, - "learning_rate": 5.7984054304033264e-06, - "loss": 0.7688, + "learning_rate": 5.82476211525994e-06, + "loss": 0.8704, "step": 22867 }, { - "epoch": 0.6489216799091941, + "epoch": 0.648020629657967, "grad_norm": 0.0, - "learning_rate": 5.7975714304893116e-06, - "loss": 0.8802, + "learning_rate": 5.823928168952579e-06, + "loss": 0.8474, "step": 22868 }, { - "epoch": 0.648950056753689, + "epoch": 0.6480489671002295, "grad_norm": 0.0, - "learning_rate": 5.796737466073401e-06, - "loss": 1.0013, + "learning_rate": 5.823094257821822e-06, + "loss": 0.9605, "step": 22869 }, { - "epoch": 0.6489784335981839, + "epoch": 0.648077304542492, "grad_norm": 0.0, - "learning_rate": 5.795903537162641e-06, - "loss": 0.8345, + "learning_rate": 5.822260381874694e-06, + "loss": 0.9452, "step": 22870 }, { - "epoch": 0.6490068104426788, + "epoch": 0.6481056419847545, "grad_norm": 0.0, - "learning_rate": 5.7950696437640685e-06, - "loss": 0.7841, + "learning_rate": 5.821426541118218e-06, + "loss": 0.8087, "step": 22871 }, { - "epoch": 0.6490351872871737, + "epoch": 0.6481339794270169, "grad_norm": 0.0, - "learning_rate": 5.794235785884732e-06, - "loss": 0.9678, + "learning_rate": 5.820592735559421e-06, + "loss": 0.784, "step": 22872 }, { - "epoch": 0.6490635641316685, + "epoch": 0.6481623168692794, "grad_norm": 0.0, - "learning_rate": 5.79340196353168e-06, - "loss": 0.9371, + "learning_rate": 5.81975896520532e-06, + "loss": 0.8359, "step": 22873 }, { - "epoch": 0.6490919409761634, + "epoch": 0.6481906543115419, "grad_norm": 0.0, - "learning_rate": 5.792568176711945e-06, - "loss": 0.7872, + "learning_rate": 5.818925230062941e-06, + "loss": 0.8896, "step": 22874 }, { - "epoch": 0.6491203178206584, + "epoch": 0.6482189917538043, "grad_norm": 0.0, - "learning_rate": 5.791734425432576e-06, - "loss": 0.7201, + "learning_rate": 5.818091530139311e-06, + "loss": 0.8891, "step": 22875 }, { - "epoch": 0.6491486946651532, + "epoch": 0.6482473291960668, "grad_norm": 0.0, - "learning_rate": 5.79090070970062e-06, - "loss": 0.782, + "learning_rate": 5.817257865441446e-06, + "loss": 0.8153, "step": 22876 }, { - "epoch": 0.6491770715096481, + "epoch": 0.6482756666383293, "grad_norm": 0.0, - "learning_rate": 5.790067029523111e-06, - "loss": 0.8587, + "learning_rate": 5.8164242359763705e-06, + "loss": 0.7588, "step": 22877 }, { - "epoch": 0.6492054483541431, + "epoch": 0.6483040040805916, "grad_norm": 0.0, - "learning_rate": 5.789233384907095e-06, - "loss": 0.8169, + "learning_rate": 5.8155906417511125e-06, + "loss": 0.8057, "step": 22878 }, { - "epoch": 0.6492338251986379, + "epoch": 0.6483323415228541, "grad_norm": 0.0, - "learning_rate": 5.788399775859617e-06, - "loss": 0.8663, + "learning_rate": 5.814757082772683e-06, + "loss": 0.946, "step": 22879 }, { - "epoch": 0.6492622020431328, + "epoch": 0.6483606789651166, "grad_norm": 0.0, - "learning_rate": 5.7875662023877135e-06, - "loss": 0.8907, + "learning_rate": 5.813923559048114e-06, + "loss": 0.849, "step": 22880 }, { - "epoch": 0.6492905788876276, + "epoch": 0.6483890164073791, "grad_norm": 0.0, - "learning_rate": 5.7867326644984254e-06, - "loss": 0.8589, + "learning_rate": 5.813090070584415e-06, + "loss": 0.9043, "step": 22881 }, { - "epoch": 0.6493189557321226, + "epoch": 0.6484173538496415, "grad_norm": 0.0, - "learning_rate": 5.785899162198801e-06, - "loss": 0.85, + "learning_rate": 5.812256617388614e-06, + "loss": 0.8554, "step": 22882 }, { - "epoch": 0.6493473325766175, + "epoch": 0.648445691291904, "grad_norm": 0.0, - "learning_rate": 5.785065695495868e-06, - "loss": 0.7414, + "learning_rate": 5.8114231994677315e-06, + "loss": 0.8577, "step": 22883 }, { - "epoch": 0.6493757094211123, + "epoch": 0.6484740287341665, "grad_norm": 0.0, - "learning_rate": 5.784232264396682e-06, - "loss": 0.926, + "learning_rate": 5.810589816828786e-06, + "loss": 0.7392, "step": 22884 }, { - "epoch": 0.6494040862656073, + "epoch": 0.6485023661764289, "grad_norm": 0.0, - "learning_rate": 5.783398868908272e-06, - "loss": 0.8122, + "learning_rate": 5.809756469478804e-06, + "loss": 0.8073, "step": 22885 }, { - "epoch": 0.6494324631101022, + "epoch": 0.6485307036186914, "grad_norm": 0.0, - "learning_rate": 5.782565509037681e-06, - "loss": 0.8623, + "learning_rate": 5.8089231574247926e-06, + "loss": 0.8898, "step": 22886 }, { - "epoch": 0.649460839954597, + "epoch": 0.6485590410609539, "grad_norm": 0.0, - "learning_rate": 5.781732184791953e-06, - "loss": 0.8893, + "learning_rate": 5.808089880673779e-06, + "loss": 0.7988, "step": 22887 }, { - "epoch": 0.649489216799092, + "epoch": 0.6485873785032163, "grad_norm": 0.0, - "learning_rate": 5.7808988961781186e-06, - "loss": 0.8924, + "learning_rate": 5.807256639232785e-06, + "loss": 0.7439, "step": 22888 }, { - "epoch": 0.6495175936435869, + "epoch": 0.6486157159454787, "grad_norm": 0.0, - "learning_rate": 5.780065643203221e-06, - "loss": 0.9455, + "learning_rate": 5.806423433108822e-06, + "loss": 0.7628, "step": 22889 }, { - "epoch": 0.6495459704880817, + "epoch": 0.6486440533877412, "grad_norm": 0.0, - "learning_rate": 5.779232425874303e-06, - "loss": 0.8288, + "learning_rate": 5.805590262308911e-06, + "loss": 0.7893, "step": 22890 }, { - "epoch": 0.6495743473325766, + "epoch": 0.6486723908300037, "grad_norm": 0.0, - "learning_rate": 5.7783992441983936e-06, - "loss": 0.9302, + "learning_rate": 5.804757126840075e-06, + "loss": 0.8775, "step": 22891 }, { - "epoch": 0.6496027241770715, + "epoch": 0.6487007282722661, "grad_norm": 0.0, - "learning_rate": 5.7775660981825365e-06, - "loss": 0.739, + "learning_rate": 5.803924026709323e-06, + "loss": 0.9193, "step": 22892 }, { - "epoch": 0.6496311010215664, + "epoch": 0.6487290657145286, "grad_norm": 0.0, - "learning_rate": 5.7767329878337705e-06, - "loss": 0.8113, + "learning_rate": 5.803090961923682e-06, + "loss": 0.8597, "step": 22893 }, { - "epoch": 0.6496594778660613, + "epoch": 0.6487574031567911, "grad_norm": 0.0, - "learning_rate": 5.775899913159129e-06, - "loss": 0.7565, + "learning_rate": 5.80225793249016e-06, + "loss": 0.8957, "step": 22894 }, { - "epoch": 0.6496878547105562, + "epoch": 0.6487857405990536, "grad_norm": 0.0, - "learning_rate": 5.775066874165649e-06, - "loss": 0.7387, + "learning_rate": 5.801424938415778e-06, + "loss": 0.6831, "step": 22895 }, { - "epoch": 0.6497162315550511, + "epoch": 0.648814078041316, "grad_norm": 0.0, - "learning_rate": 5.774233870860375e-06, - "loss": 0.8873, + "learning_rate": 5.800591979707553e-06, + "loss": 0.8301, "step": 22896 }, { - "epoch": 0.649744608399546, + "epoch": 0.6488424154835785, "grad_norm": 0.0, - "learning_rate": 5.773400903250332e-06, - "loss": 0.8955, + "learning_rate": 5.7997590563725e-06, + "loss": 0.8134, "step": 22897 }, { - "epoch": 0.6497729852440408, + "epoch": 0.648870752925841, "grad_norm": 0.0, - "learning_rate": 5.7725679713425575e-06, - "loss": 0.7718, + "learning_rate": 5.798926168417638e-06, + "loss": 0.7987, "step": 22898 }, { - "epoch": 0.6498013620885358, + "epoch": 0.6488990903681033, "grad_norm": 0.0, - "learning_rate": 5.7717350751441e-06, - "loss": 0.8861, + "learning_rate": 5.798093315849984e-06, + "loss": 0.8442, "step": 22899 }, { - "epoch": 0.6498297389330306, + "epoch": 0.6489274278103658, "grad_norm": 0.0, - "learning_rate": 5.770902214661983e-06, - "loss": 0.8377, + "learning_rate": 5.7972604986765456e-06, + "loss": 0.8512, "step": 22900 }, { - "epoch": 0.6498581157775255, + "epoch": 0.6489557652526283, "grad_norm": 0.0, - "learning_rate": 5.7700693899032444e-06, - "loss": 0.8001, + "learning_rate": 5.796427716904347e-06, + "loss": 0.8521, "step": 22901 }, { - "epoch": 0.6498864926220205, + "epoch": 0.6489841026948907, "grad_norm": 0.0, - "learning_rate": 5.769236600874924e-06, - "loss": 0.7772, + "learning_rate": 5.795594970540395e-06, + "loss": 0.9499, "step": 22902 }, { - "epoch": 0.6499148694665153, + "epoch": 0.6490124401371532, "grad_norm": 0.0, - "learning_rate": 5.768403847584048e-06, - "loss": 0.8884, + "learning_rate": 5.794762259591709e-06, + "loss": 0.849, "step": 22903 }, { - "epoch": 0.6499432463110102, + "epoch": 0.6490407775794157, "grad_norm": 0.0, - "learning_rate": 5.767571130037655e-06, - "loss": 0.8248, + "learning_rate": 5.793929584065306e-06, + "loss": 0.8329, "step": 22904 }, { - "epoch": 0.6499716231555052, + "epoch": 0.6490691150216782, "grad_norm": 0.0, - "learning_rate": 5.766738448242783e-06, - "loss": 0.8969, + "learning_rate": 5.793096943968191e-06, + "loss": 0.7378, "step": 22905 }, { - "epoch": 0.65, + "epoch": 0.6490974524639406, "grad_norm": 0.0, - "learning_rate": 5.765905802206457e-06, - "loss": 0.8817, + "learning_rate": 5.792264339307382e-06, + "loss": 0.7989, "step": 22906 }, { - "epoch": 0.6500283768444949, + "epoch": 0.6491257899062031, "grad_norm": 0.0, - "learning_rate": 5.76507319193572e-06, - "loss": 0.8077, + "learning_rate": 5.791431770089897e-06, + "loss": 0.8459, "step": 22907 }, { - "epoch": 0.6500567536889897, + "epoch": 0.6491541273484656, "grad_norm": 0.0, - "learning_rate": 5.764240617437595e-06, - "loss": 0.8165, + "learning_rate": 5.790599236322743e-06, + "loss": 0.8138, "step": 22908 }, { - "epoch": 0.6500851305334847, + "epoch": 0.6491824647907279, "grad_norm": 0.0, - "learning_rate": 5.763408078719121e-06, - "loss": 0.8967, + "learning_rate": 5.789766738012932e-06, + "loss": 0.8116, "step": 22909 }, { - "epoch": 0.6501135073779796, + "epoch": 0.6492108022329904, "grad_norm": 0.0, - "learning_rate": 5.762575575787333e-06, - "loss": 0.9297, + "learning_rate": 5.788934275167482e-06, + "loss": 0.8833, "step": 22910 }, { - "epoch": 0.6501418842224744, + "epoch": 0.6492391396752529, "grad_norm": 0.0, - "learning_rate": 5.761743108649256e-06, - "loss": 0.7974, + "learning_rate": 5.788101847793399e-06, + "loss": 0.9381, "step": 22911 }, { - "epoch": 0.6501702610669694, + "epoch": 0.6492674771175154, "grad_norm": 0.0, - "learning_rate": 5.760910677311925e-06, - "loss": 0.9186, + "learning_rate": 5.787269455897705e-06, + "loss": 0.7884, "step": 22912 }, { - "epoch": 0.6501986379114643, + "epoch": 0.6492958145597778, "grad_norm": 0.0, - "learning_rate": 5.760078281782372e-06, - "loss": 0.7738, + "learning_rate": 5.786437099487401e-06, + "loss": 0.9291, "step": 22913 }, { - "epoch": 0.6502270147559591, + "epoch": 0.6493241520020403, "grad_norm": 0.0, - "learning_rate": 5.759245922067629e-06, - "loss": 0.8266, + "learning_rate": 5.785604778569505e-06, + "loss": 0.8843, "step": 22914 }, { - "epoch": 0.650255391600454, + "epoch": 0.6493524894443028, "grad_norm": 0.0, - "learning_rate": 5.758413598174726e-06, - "loss": 0.8733, + "learning_rate": 5.784772493151019e-06, + "loss": 0.8812, "step": 22915 }, { - "epoch": 0.650283768444949, + "epoch": 0.6493808268865652, "grad_norm": 0.0, - "learning_rate": 5.757581310110697e-06, - "loss": 0.8904, + "learning_rate": 5.783940243238963e-06, + "loss": 0.8548, "step": 22916 }, { - "epoch": 0.6503121452894438, + "epoch": 0.6494091643288277, "grad_norm": 0.0, - "learning_rate": 5.756749057882567e-06, - "loss": 0.8742, + "learning_rate": 5.783108028840345e-06, + "loss": 0.8021, "step": 22917 }, { - "epoch": 0.6503405221339387, + "epoch": 0.6494375017710902, "grad_norm": 0.0, - "learning_rate": 5.7559168414973665e-06, - "loss": 0.8857, + "learning_rate": 5.7822758499621715e-06, + "loss": 0.7787, "step": 22918 }, { - "epoch": 0.6503688989784336, + "epoch": 0.6494658392133527, "grad_norm": 0.0, - "learning_rate": 5.755084660962134e-06, - "loss": 0.7559, + "learning_rate": 5.781443706611455e-06, + "loss": 0.8185, "step": 22919 }, { - "epoch": 0.6503972758229285, + "epoch": 0.649494176655615, "grad_norm": 0.0, - "learning_rate": 5.754252516283886e-06, - "loss": 0.9029, + "learning_rate": 5.780611598795207e-06, + "loss": 0.7133, "step": 22920 }, { - "epoch": 0.6504256526674234, + "epoch": 0.6495225140978775, "grad_norm": 0.0, - "learning_rate": 5.7534204074696584e-06, - "loss": 0.9128, + "learning_rate": 5.779779526520433e-06, + "loss": 0.8437, "step": 22921 }, { - "epoch": 0.6504540295119183, + "epoch": 0.64955085154014, "grad_norm": 0.0, - "learning_rate": 5.7525883345264835e-06, - "loss": 0.9063, + "learning_rate": 5.778947489794141e-06, + "loss": 0.8655, "step": 22922 }, { - "epoch": 0.6504824063564132, + "epoch": 0.6495791889824024, "grad_norm": 0.0, - "learning_rate": 5.751756297461381e-06, - "loss": 0.897, + "learning_rate": 5.778115488623343e-06, + "loss": 0.791, "step": 22923 }, { - "epoch": 0.650510783200908, + "epoch": 0.6496075264246649, "grad_norm": 0.0, - "learning_rate": 5.750924296281386e-06, - "loss": 0.7817, + "learning_rate": 5.777283523015045e-06, + "loss": 0.823, "step": 22924 }, { - "epoch": 0.6505391600454029, + "epoch": 0.6496358638669274, "grad_norm": 0.0, - "learning_rate": 5.750092330993527e-06, - "loss": 0.9395, + "learning_rate": 5.77645159297626e-06, + "loss": 0.8754, "step": 22925 }, { - "epoch": 0.6505675368898979, + "epoch": 0.6496642013091898, "grad_norm": 0.0, - "learning_rate": 5.7492604016048256e-06, - "loss": 0.8975, + "learning_rate": 5.7756196985139875e-06, + "loss": 0.8744, "step": 22926 }, { - "epoch": 0.6505959137343927, + "epoch": 0.6496925387514523, "grad_norm": 0.0, - "learning_rate": 5.748428508122312e-06, - "loss": 0.9081, + "learning_rate": 5.774787839635241e-06, + "loss": 0.8004, "step": 22927 }, { - "epoch": 0.6506242905788876, + "epoch": 0.6497208761937148, "grad_norm": 0.0, - "learning_rate": 5.74759665055302e-06, - "loss": 0.7841, + "learning_rate": 5.7739560163470275e-06, + "loss": 0.8029, "step": 22928 }, { - "epoch": 0.6506526674233826, + "epoch": 0.6497492136359773, "grad_norm": 0.0, - "learning_rate": 5.74676482890396e-06, - "loss": 0.8844, + "learning_rate": 5.773124228656348e-06, + "loss": 0.8264, "step": 22929 }, { - "epoch": 0.6506810442678774, + "epoch": 0.6497775510782396, "grad_norm": 0.0, - "learning_rate": 5.745933043182177e-06, - "loss": 0.7087, + "learning_rate": 5.772292476570218e-06, + "loss": 0.8656, "step": 22930 }, { - "epoch": 0.6507094211123723, + "epoch": 0.6498058885205021, "grad_norm": 0.0, - "learning_rate": 5.745101293394686e-06, - "loss": 0.8043, + "learning_rate": 5.771460760095633e-06, + "loss": 0.9036, "step": 22931 }, { - "epoch": 0.6507377979568671, + "epoch": 0.6498342259627646, "grad_norm": 0.0, - "learning_rate": 5.744269579548515e-06, - "loss": 0.7049, + "learning_rate": 5.770629079239605e-06, + "loss": 0.918, "step": 22932 }, { - "epoch": 0.6507661748013621, + "epoch": 0.649862563405027, "grad_norm": 0.0, - "learning_rate": 5.743437901650695e-06, - "loss": 0.8675, + "learning_rate": 5.769797434009141e-06, + "loss": 0.9077, "step": 22933 }, { - "epoch": 0.650794551645857, + "epoch": 0.6498909008472895, "grad_norm": 0.0, - "learning_rate": 5.742606259708241e-06, - "loss": 0.8636, + "learning_rate": 5.768965824411242e-06, + "loss": 0.9136, "step": 22934 }, { - "epoch": 0.6508229284903518, + "epoch": 0.649919238289552, "grad_norm": 0.0, - "learning_rate": 5.7417746537281825e-06, - "loss": 0.8256, + "learning_rate": 5.768134250452915e-06, + "loss": 0.8923, "step": 22935 }, { - "epoch": 0.6508513053348468, + "epoch": 0.6499475757318145, "grad_norm": 0.0, - "learning_rate": 5.740943083717551e-06, - "loss": 0.7616, + "learning_rate": 5.767302712141164e-06, + "loss": 0.8738, "step": 22936 }, { - "epoch": 0.6508796821793417, + "epoch": 0.6499759131740769, "grad_norm": 0.0, - "learning_rate": 5.7401115496833605e-06, - "loss": 0.8422, + "learning_rate": 5.766471209482994e-06, + "loss": 0.8753, "step": 22937 }, { - "epoch": 0.6509080590238365, + "epoch": 0.6500042506163394, "grad_norm": 0.0, - "learning_rate": 5.739280051632639e-06, - "loss": 0.8096, + "learning_rate": 5.765639742485414e-06, + "loss": 0.8537, "step": 22938 }, { - "epoch": 0.6509364358683314, + "epoch": 0.6500325880586019, "grad_norm": 0.0, - "learning_rate": 5.738448589572414e-06, - "loss": 0.9067, + "learning_rate": 5.764808311155419e-06, + "loss": 0.8237, "step": 22939 }, { - "epoch": 0.6509648127128264, + "epoch": 0.6500609255008643, "grad_norm": 0.0, - "learning_rate": 5.737617163509701e-06, - "loss": 0.7782, + "learning_rate": 5.763976915500013e-06, + "loss": 0.909, "step": 22940 }, { - "epoch": 0.6509931895573212, + "epoch": 0.6500892629431267, "grad_norm": 0.0, - "learning_rate": 5.736785773451528e-06, - "loss": 0.8239, + "learning_rate": 5.763145555526211e-06, + "loss": 0.8711, "step": 22941 }, { - "epoch": 0.6510215664018161, + "epoch": 0.6501176003853892, "grad_norm": 0.0, - "learning_rate": 5.735954419404921e-06, - "loss": 0.8533, + "learning_rate": 5.762314231241001e-06, + "loss": 0.8682, "step": 22942 }, { - "epoch": 0.651049943246311, + "epoch": 0.6501459378276516, "grad_norm": 0.0, - "learning_rate": 5.735123101376895e-06, - "loss": 0.8941, + "learning_rate": 5.761482942651395e-06, + "loss": 0.8882, "step": 22943 }, { - "epoch": 0.6510783200908059, + "epoch": 0.6501742752699141, "grad_norm": 0.0, - "learning_rate": 5.734291819374475e-06, - "loss": 0.8118, + "learning_rate": 5.76065168976439e-06, + "loss": 0.8224, "step": 22944 }, { - "epoch": 0.6511066969353008, + "epoch": 0.6502026127121766, "grad_norm": 0.0, - "learning_rate": 5.7334605734046855e-06, - "loss": 0.8061, + "learning_rate": 5.759820472586989e-06, + "loss": 0.7342, "step": 22945 }, { - "epoch": 0.6511350737797957, + "epoch": 0.6502309501544391, "grad_norm": 0.0, - "learning_rate": 5.732629363474544e-06, - "loss": 0.7721, + "learning_rate": 5.758989291126199e-06, + "loss": 0.8589, "step": 22946 }, { - "epoch": 0.6511634506242906, + "epoch": 0.6502592875967015, "grad_norm": 0.0, - "learning_rate": 5.73179818959108e-06, - "loss": 0.8549, + "learning_rate": 5.758158145389012e-06, + "loss": 0.7042, "step": 22947 }, { - "epoch": 0.6511918274687855, + "epoch": 0.650287625038964, "grad_norm": 0.0, - "learning_rate": 5.730967051761305e-06, - "loss": 0.8355, + "learning_rate": 5.757327035382436e-06, + "loss": 0.9609, "step": 22948 }, { - "epoch": 0.6512202043132803, + "epoch": 0.6503159624812265, "grad_norm": 0.0, - "learning_rate": 5.730135949992242e-06, - "loss": 0.8842, + "learning_rate": 5.7564959611134685e-06, + "loss": 0.8152, "step": 22949 }, { - "epoch": 0.6512485811577753, + "epoch": 0.6503442999234889, "grad_norm": 0.0, - "learning_rate": 5.729304884290916e-06, - "loss": 0.7839, + "learning_rate": 5.7556649225891125e-06, + "loss": 0.8165, "step": 22950 }, { - "epoch": 0.6512769580022701, + "epoch": 0.6503726373657513, "grad_norm": 0.0, - "learning_rate": 5.72847385466434e-06, - "loss": 0.8758, + "learning_rate": 5.75483391981637e-06, + "loss": 0.7804, "step": 22951 }, { - "epoch": 0.651305334846765, + "epoch": 0.6504009748080138, "grad_norm": 0.0, - "learning_rate": 5.727642861119538e-06, - "loss": 0.8692, + "learning_rate": 5.754002952802233e-06, + "loss": 0.7874, "step": 22952 }, { - "epoch": 0.65133371169126, + "epoch": 0.6504293122502763, "grad_norm": 0.0, - "learning_rate": 5.726811903663533e-06, - "loss": 0.7716, + "learning_rate": 5.753172021553708e-06, + "loss": 0.8639, "step": 22953 }, { - "epoch": 0.6513620885357548, + "epoch": 0.6504576496925387, "grad_norm": 0.0, - "learning_rate": 5.725980982303335e-06, - "loss": 0.907, + "learning_rate": 5.752341126077795e-06, + "loss": 0.9188, "step": 22954 }, { - "epoch": 0.6513904653802497, + "epoch": 0.6504859871348012, "grad_norm": 0.0, - "learning_rate": 5.725150097045968e-06, - "loss": 0.8801, + "learning_rate": 5.7515102663814855e-06, + "loss": 0.792, "step": 22955 }, { - "epoch": 0.6514188422247446, + "epoch": 0.6505143245770637, "grad_norm": 0.0, - "learning_rate": 5.724319247898456e-06, - "loss": 0.9284, + "learning_rate": 5.750679442471783e-06, + "loss": 0.8335, "step": 22956 }, { - "epoch": 0.6514472190692395, + "epoch": 0.6505426620193261, "grad_norm": 0.0, - "learning_rate": 5.723488434867805e-06, - "loss": 0.7834, + "learning_rate": 5.7498486543556896e-06, + "loss": 0.7662, "step": 22957 }, { - "epoch": 0.6514755959137344, + "epoch": 0.6505709994615886, "grad_norm": 0.0, - "learning_rate": 5.722657657961041e-06, - "loss": 0.9018, + "learning_rate": 5.749017902040196e-06, + "loss": 0.8817, "step": 22958 }, { - "epoch": 0.6515039727582292, + "epoch": 0.6505993369038511, "grad_norm": 0.0, - "learning_rate": 5.721826917185179e-06, - "loss": 0.9201, + "learning_rate": 5.748187185532306e-06, + "loss": 0.7761, "step": 22959 }, { - "epoch": 0.6515323496027242, + "epoch": 0.6506276743461136, "grad_norm": 0.0, - "learning_rate": 5.7209962125472385e-06, - "loss": 0.7391, + "learning_rate": 5.7473565048390115e-06, + "loss": 0.9093, "step": 22960 }, { - "epoch": 0.6515607264472191, + "epoch": 0.650656011788376, "grad_norm": 0.0, - "learning_rate": 5.720165544054239e-06, - "loss": 0.7924, + "learning_rate": 5.7465258599673115e-06, + "loss": 0.9086, "step": 22961 }, { - "epoch": 0.6515891032917139, + "epoch": 0.6506843492306384, "grad_norm": 0.0, - "learning_rate": 5.719334911713189e-06, - "loss": 0.8139, + "learning_rate": 5.745695250924204e-06, + "loss": 0.814, "step": 22962 }, { - "epoch": 0.6516174801362089, + "epoch": 0.6507126866729009, "grad_norm": 0.0, - "learning_rate": 5.71850431553111e-06, - "loss": 0.8272, + "learning_rate": 5.744864677716684e-06, + "loss": 0.8713, "step": 22963 }, { - "epoch": 0.6516458569807038, + "epoch": 0.6507410241151633, "grad_norm": 0.0, - "learning_rate": 5.717673755515021e-06, - "loss": 0.8769, + "learning_rate": 5.74403414035175e-06, + "loss": 0.7885, "step": 22964 }, { - "epoch": 0.6516742338251986, + "epoch": 0.6507693615574258, "grad_norm": 0.0, - "learning_rate": 5.71684323167193e-06, - "loss": 0.7729, + "learning_rate": 5.743203638836401e-06, + "loss": 0.9006, "step": 22965 }, { - "epoch": 0.6517026106696935, + "epoch": 0.6507976989996883, "grad_norm": 0.0, - "learning_rate": 5.716012744008858e-06, - "loss": 0.8352, + "learning_rate": 5.742373173177625e-06, + "loss": 0.8987, "step": 22966 }, { - "epoch": 0.6517309875141885, + "epoch": 0.6508260364419507, "grad_norm": 0.0, - "learning_rate": 5.7151822925328215e-06, - "loss": 0.8576, + "learning_rate": 5.741542743382422e-06, + "loss": 0.7313, "step": 22967 }, { - "epoch": 0.6517593643586833, + "epoch": 0.6508543738842132, "grad_norm": 0.0, - "learning_rate": 5.71435187725083e-06, - "loss": 0.8508, + "learning_rate": 5.740712349457785e-06, + "loss": 0.8148, "step": 22968 }, { - "epoch": 0.6517877412031782, + "epoch": 0.6508827113264757, "grad_norm": 0.0, - "learning_rate": 5.713521498169901e-06, - "loss": 0.869, + "learning_rate": 5.739881991410707e-06, + "loss": 0.7987, "step": 22969 }, { - "epoch": 0.6518161180476731, + "epoch": 0.6509110487687382, "grad_norm": 0.0, - "learning_rate": 5.7126911552970525e-06, - "loss": 0.865, + "learning_rate": 5.7390516692481905e-06, + "loss": 0.7944, "step": 22970 }, { - "epoch": 0.651844494892168, + "epoch": 0.6509393862110006, "grad_norm": 0.0, - "learning_rate": 5.711860848639289e-06, - "loss": 0.8598, + "learning_rate": 5.7382213829772205e-06, + "loss": 0.8259, "step": 22971 }, { - "epoch": 0.6518728717366629, + "epoch": 0.650967723653263, "grad_norm": 0.0, - "learning_rate": 5.711030578203632e-06, - "loss": 0.733, + "learning_rate": 5.737391132604797e-06, + "loss": 0.8073, "step": 22972 }, { - "epoch": 0.6519012485811577, + "epoch": 0.6509960610955255, "grad_norm": 0.0, - "learning_rate": 5.710200343997094e-06, - "loss": 0.9398, + "learning_rate": 5.736560918137908e-06, + "loss": 0.8797, "step": 22973 }, { - "epoch": 0.6519296254256527, + "epoch": 0.6510243985377879, "grad_norm": 0.0, - "learning_rate": 5.709370146026684e-06, - "loss": 0.7581, + "learning_rate": 5.73573073958355e-06, + "loss": 0.8326, "step": 22974 }, { - "epoch": 0.6519580022701476, + "epoch": 0.6510527359800504, "grad_norm": 0.0, - "learning_rate": 5.7085399842994175e-06, - "loss": 0.8077, + "learning_rate": 5.734900596948713e-06, + "loss": 0.8007, "step": 22975 }, { - "epoch": 0.6519863791146424, + "epoch": 0.6510810734223129, "grad_norm": 0.0, - "learning_rate": 5.707709858822306e-06, - "loss": 0.8489, + "learning_rate": 5.734070490240393e-06, + "loss": 0.9255, "step": 22976 }, { - "epoch": 0.6520147559591374, + "epoch": 0.6511094108645754, "grad_norm": 0.0, - "learning_rate": 5.706879769602362e-06, - "loss": 0.9412, + "learning_rate": 5.7332404194655824e-06, + "loss": 0.8319, "step": 22977 }, { - "epoch": 0.6520431328036322, + "epoch": 0.6511377483068378, "grad_norm": 0.0, - "learning_rate": 5.7060497166466025e-06, - "loss": 0.9082, + "learning_rate": 5.732410384631276e-06, + "loss": 0.9471, "step": 22978 }, { - "epoch": 0.6520715096481271, + "epoch": 0.6511660857491003, "grad_norm": 0.0, - "learning_rate": 5.705219699962028e-06, - "loss": 0.8754, + "learning_rate": 5.731580385744457e-06, + "loss": 0.7667, "step": 22979 }, { - "epoch": 0.6520998864926221, + "epoch": 0.6511944231913628, "grad_norm": 0.0, - "learning_rate": 5.704389719555658e-06, - "loss": 0.8662, + "learning_rate": 5.730750422812126e-06, + "loss": 0.8148, "step": 22980 }, { - "epoch": 0.6521282633371169, + "epoch": 0.6512227606336252, "grad_norm": 0.0, - "learning_rate": 5.703559775434504e-06, - "loss": 0.8255, + "learning_rate": 5.729920495841266e-06, + "loss": 0.9673, "step": 22981 }, { - "epoch": 0.6521566401816118, + "epoch": 0.6512510980758877, "grad_norm": 0.0, - "learning_rate": 5.702729867605571e-06, - "loss": 0.9294, + "learning_rate": 5.72909060483887e-06, + "loss": 0.8413, "step": 22982 }, { - "epoch": 0.6521850170261067, + "epoch": 0.6512794355181502, "grad_norm": 0.0, - "learning_rate": 5.70189999607587e-06, - "loss": 0.7138, + "learning_rate": 5.728260749811936e-06, + "loss": 0.855, "step": 22983 }, { - "epoch": 0.6522133938706016, + "epoch": 0.6513077729604126, "grad_norm": 0.0, - "learning_rate": 5.701070160852419e-06, - "loss": 0.8773, + "learning_rate": 5.727430930767441e-06, + "loss": 0.8806, "step": 22984 }, { - "epoch": 0.6522417707150965, + "epoch": 0.651336110402675, "grad_norm": 0.0, - "learning_rate": 5.700240361942216e-06, - "loss": 0.9182, + "learning_rate": 5.726601147712384e-06, + "loss": 0.8706, "step": 22985 }, { - "epoch": 0.6522701475595913, + "epoch": 0.6513644478449375, "grad_norm": 0.0, - "learning_rate": 5.699410599352278e-06, - "loss": 0.8302, + "learning_rate": 5.725771400653756e-06, + "loss": 0.8399, "step": 22986 }, { - "epoch": 0.6522985244040863, + "epoch": 0.6513927852872, "grad_norm": 0.0, - "learning_rate": 5.698580873089614e-06, - "loss": 0.8047, + "learning_rate": 5.724941689598538e-06, + "loss": 0.8966, "step": 22987 }, { - "epoch": 0.6523269012485812, + "epoch": 0.6514211227294624, "grad_norm": 0.0, - "learning_rate": 5.697751183161227e-06, - "loss": 0.8532, + "learning_rate": 5.724112014553725e-06, + "loss": 0.9018, "step": 22988 }, { - "epoch": 0.652355278093076, + "epoch": 0.6514494601717249, "grad_norm": 0.0, - "learning_rate": 5.69692152957413e-06, - "loss": 0.8673, + "learning_rate": 5.723282375526302e-06, + "loss": 0.8515, "step": 22989 }, { - "epoch": 0.6523836549375709, + "epoch": 0.6514777976139874, "grad_norm": 0.0, - "learning_rate": 5.69609191233533e-06, - "loss": 0.8304, + "learning_rate": 5.7224527725232614e-06, + "loss": 0.8968, "step": 22990 }, { - "epoch": 0.6524120317820659, + "epoch": 0.6515061350562498, "grad_norm": 0.0, - "learning_rate": 5.695262331451834e-06, - "loss": 0.747, + "learning_rate": 5.721623205551594e-06, + "loss": 0.8529, "step": 22991 }, { - "epoch": 0.6524404086265607, + "epoch": 0.6515344724985123, "grad_norm": 0.0, - "learning_rate": 5.694432786930656e-06, - "loss": 0.8356, + "learning_rate": 5.720793674618278e-06, + "loss": 0.7927, "step": 22992 }, { - "epoch": 0.6524687854710556, + "epoch": 0.6515628099407748, "grad_norm": 0.0, - "learning_rate": 5.6936032787787945e-06, - "loss": 0.8076, + "learning_rate": 5.719964179730306e-06, + "loss": 0.8624, "step": 22993 }, { - "epoch": 0.6524971623155505, + "epoch": 0.6515911473830373, "grad_norm": 0.0, - "learning_rate": 5.692773807003258e-06, - "loss": 0.8367, + "learning_rate": 5.71913472089467e-06, + "loss": 0.8434, "step": 22994 }, { - "epoch": 0.6525255391600454, + "epoch": 0.6516194848252996, "grad_norm": 0.0, - "learning_rate": 5.691944371611059e-06, - "loss": 0.9833, + "learning_rate": 5.718305298118347e-06, + "loss": 0.7674, "step": 22995 }, { - "epoch": 0.6525539160045403, + "epoch": 0.6516478222675621, "grad_norm": 0.0, - "learning_rate": 5.6911149726091955e-06, - "loss": 0.8853, + "learning_rate": 5.717475911408332e-06, + "loss": 0.8356, "step": 22996 }, { - "epoch": 0.6525822928490352, + "epoch": 0.6516761597098246, "grad_norm": 0.0, - "learning_rate": 5.690285610004678e-06, - "loss": 0.86, + "learning_rate": 5.716646560771605e-06, + "loss": 0.878, "step": 22997 }, { - "epoch": 0.6526106696935301, + "epoch": 0.651704497152087, "grad_norm": 0.0, - "learning_rate": 5.6894562838045155e-06, - "loss": 0.8846, + "learning_rate": 5.715817246215154e-06, + "loss": 0.8523, "step": 22998 }, { - "epoch": 0.652639046538025, + "epoch": 0.6517328345943495, "grad_norm": 0.0, - "learning_rate": 5.688626994015706e-06, - "loss": 0.7998, + "learning_rate": 5.714987967745969e-06, + "loss": 0.7879, "step": 22999 }, { - "epoch": 0.6526674233825198, + "epoch": 0.651761172036612, "grad_norm": 0.0, - "learning_rate": 5.687797740645258e-06, - "loss": 0.9127, + "learning_rate": 5.714158725371027e-06, + "loss": 0.8118, "step": 23000 }, { - "epoch": 0.6526958002270148, + "epoch": 0.6517895094788745, "grad_norm": 0.0, - "learning_rate": 5.68696852370018e-06, - "loss": 0.7982, + "learning_rate": 5.713329519097319e-06, + "loss": 0.8863, "step": 23001 }, { - "epoch": 0.6527241770715096, + "epoch": 0.6518178469211369, "grad_norm": 0.0, - "learning_rate": 5.686139343187468e-06, - "loss": 0.8024, + "learning_rate": 5.712500348931828e-06, + "loss": 0.8689, "step": 23002 }, { - "epoch": 0.6527525539160045, + "epoch": 0.6518461843633994, "grad_norm": 0.0, - "learning_rate": 5.685310199114132e-06, - "loss": 0.7455, + "learning_rate": 5.7116712148815375e-06, + "loss": 0.8705, "step": 23003 }, { - "epoch": 0.6527809307604995, + "epoch": 0.6518745218056619, "grad_norm": 0.0, - "learning_rate": 5.6844810914871774e-06, - "loss": 0.8069, + "learning_rate": 5.7108421169534376e-06, + "loss": 0.7594, "step": 23004 }, { - "epoch": 0.6528093076049943, + "epoch": 0.6519028592479242, "grad_norm": 0.0, - "learning_rate": 5.683652020313599e-06, - "loss": 0.9141, + "learning_rate": 5.710013055154503e-06, + "loss": 0.8108, "step": 23005 }, { - "epoch": 0.6528376844494892, + "epoch": 0.6519311966901867, "grad_norm": 0.0, - "learning_rate": 5.682822985600409e-06, - "loss": 0.8457, + "learning_rate": 5.709184029491721e-06, + "loss": 0.8862, "step": 23006 }, { - "epoch": 0.6528660612939841, + "epoch": 0.6519595341324492, "grad_norm": 0.0, - "learning_rate": 5.681993987354611e-06, - "loss": 0.9523, + "learning_rate": 5.708355039972081e-06, + "loss": 0.7383, "step": 23007 }, { - "epoch": 0.652894438138479, + "epoch": 0.6519878715747117, "grad_norm": 0.0, - "learning_rate": 5.6811650255831995e-06, - "loss": 0.8296, + "learning_rate": 5.707526086602555e-06, + "loss": 0.7904, "step": 23008 }, { - "epoch": 0.6529228149829739, + "epoch": 0.6520162090169741, "grad_norm": 0.0, - "learning_rate": 5.680336100293182e-06, - "loss": 0.7873, + "learning_rate": 5.706697169390134e-06, + "loss": 0.8372, "step": 23009 }, { - "epoch": 0.6529511918274687, + "epoch": 0.6520445464592366, "grad_norm": 0.0, - "learning_rate": 5.679507211491564e-06, - "loss": 0.7647, + "learning_rate": 5.705868288341795e-06, + "loss": 0.9494, "step": 23010 }, { - "epoch": 0.6529795686719637, + "epoch": 0.6520728839014991, "grad_norm": 0.0, - "learning_rate": 5.678678359185339e-06, - "loss": 0.8586, + "learning_rate": 5.705039443464521e-06, + "loss": 0.8244, "step": 23011 }, { - "epoch": 0.6530079455164586, + "epoch": 0.6521012213437615, "grad_norm": 0.0, - "learning_rate": 5.67784954338151e-06, - "loss": 0.8284, + "learning_rate": 5.704210634765295e-06, + "loss": 0.9152, "step": 23012 }, { - "epoch": 0.6530363223609534, + "epoch": 0.652129558786024, "grad_norm": 0.0, - "learning_rate": 5.677020764087086e-06, - "loss": 0.8146, + "learning_rate": 5.7033818622511e-06, + "loss": 0.8528, "step": 23013 }, { - "epoch": 0.6530646992054484, + "epoch": 0.6521578962282865, "grad_norm": 0.0, - "learning_rate": 5.676192021309058e-06, - "loss": 0.9294, + "learning_rate": 5.7025531259289134e-06, + "loss": 0.7973, "step": 23014 }, { - "epoch": 0.6530930760499433, + "epoch": 0.6521862336705488, "grad_norm": 0.0, - "learning_rate": 5.675363315054434e-06, - "loss": 0.811, + "learning_rate": 5.701724425805717e-06, + "loss": 0.9018, "step": 23015 }, { - "epoch": 0.6531214528944381, + "epoch": 0.6522145711128113, "grad_norm": 0.0, - "learning_rate": 5.674534645330207e-06, - "loss": 0.8038, + "learning_rate": 5.700895761888492e-06, + "loss": 0.9352, "step": 23016 }, { - "epoch": 0.653149829738933, + "epoch": 0.6522429085550738, "grad_norm": 0.0, - "learning_rate": 5.67370601214338e-06, - "loss": 0.7924, + "learning_rate": 5.7000671341842215e-06, + "loss": 0.8098, "step": 23017 }, { - "epoch": 0.653178206583428, + "epoch": 0.6522712459973363, "grad_norm": 0.0, - "learning_rate": 5.672877415500957e-06, - "loss": 0.7789, + "learning_rate": 5.699238542699879e-06, + "loss": 0.9016, "step": 23018 }, { - "epoch": 0.6532065834279228, + "epoch": 0.6522995834395987, "grad_norm": 0.0, - "learning_rate": 5.672048855409928e-06, - "loss": 0.7681, + "learning_rate": 5.698409987442448e-06, + "loss": 0.8266, "step": 23019 }, { - "epoch": 0.6532349602724177, + "epoch": 0.6523279208818612, "grad_norm": 0.0, - "learning_rate": 5.671220331877298e-06, - "loss": 0.7809, + "learning_rate": 5.697581468418909e-06, + "loss": 0.8771, "step": 23020 }, { - "epoch": 0.6532633371169126, + "epoch": 0.6523562583241237, "grad_norm": 0.0, - "learning_rate": 5.670391844910064e-06, - "loss": 0.8832, + "learning_rate": 5.696752985636237e-06, + "loss": 0.9121, "step": 23021 }, { - "epoch": 0.6532917139614075, + "epoch": 0.6523845957663861, "grad_norm": 0.0, - "learning_rate": 5.669563394515223e-06, - "loss": 0.8627, + "learning_rate": 5.695924539101412e-06, + "loss": 0.8389, "step": 23022 }, { - "epoch": 0.6533200908059024, + "epoch": 0.6524129332086486, "grad_norm": 0.0, - "learning_rate": 5.668734980699777e-06, - "loss": 0.8144, + "learning_rate": 5.695096128821417e-06, + "loss": 0.8447, "step": 23023 }, { - "epoch": 0.6533484676503972, + "epoch": 0.6524412706509111, "grad_norm": 0.0, - "learning_rate": 5.667906603470724e-06, - "loss": 0.7915, + "learning_rate": 5.694267754803221e-06, + "loss": 0.8156, "step": 23024 }, { - "epoch": 0.6533768444948922, + "epoch": 0.6524696080931736, "grad_norm": 0.0, - "learning_rate": 5.6670782628350555e-06, - "loss": 0.8617, + "learning_rate": 5.6934394170538075e-06, + "loss": 0.8174, "step": 23025 }, { - "epoch": 0.653405221339387, + "epoch": 0.6524979455354359, "grad_norm": 0.0, - "learning_rate": 5.666249958799772e-06, - "loss": 0.8131, + "learning_rate": 5.692611115580153e-06, + "loss": 0.8687, "step": 23026 }, { - "epoch": 0.6534335981838819, + "epoch": 0.6525262829776984, "grad_norm": 0.0, - "learning_rate": 5.665421691371873e-06, - "loss": 0.8469, + "learning_rate": 5.691782850389233e-06, + "loss": 0.8092, "step": 23027 }, { - "epoch": 0.6534619750283769, + "epoch": 0.6525546204199609, "grad_norm": 0.0, - "learning_rate": 5.6645934605583475e-06, - "loss": 0.7798, + "learning_rate": 5.690954621488031e-06, + "loss": 0.8545, "step": 23028 }, { - "epoch": 0.6534903518728717, + "epoch": 0.6525829578622233, "grad_norm": 0.0, - "learning_rate": 5.663765266366197e-06, - "loss": 0.9162, + "learning_rate": 5.690126428883516e-06, + "loss": 0.9595, "step": 23029 }, { - "epoch": 0.6535187287173666, + "epoch": 0.6526112953044858, "grad_norm": 0.0, - "learning_rate": 5.66293710880242e-06, - "loss": 0.8223, + "learning_rate": 5.6892982725826684e-06, + "loss": 0.7885, "step": 23030 }, { - "epoch": 0.6535471055618616, + "epoch": 0.6526396327467483, "grad_norm": 0.0, - "learning_rate": 5.662108987874004e-06, - "loss": 0.758, + "learning_rate": 5.688470152592459e-06, + "loss": 0.8671, "step": 23031 }, { - "epoch": 0.6535754824063564, + "epoch": 0.6526679701890108, "grad_norm": 0.0, - "learning_rate": 5.661280903587949e-06, - "loss": 0.7556, + "learning_rate": 5.687642068919866e-06, + "loss": 0.8071, "step": 23032 }, { - "epoch": 0.6536038592508513, + "epoch": 0.6526963076312732, "grad_norm": 0.0, - "learning_rate": 5.660452855951253e-06, - "loss": 0.8076, + "learning_rate": 5.6868140215718695e-06, + "loss": 0.8212, "step": 23033 }, { - "epoch": 0.6536322360953462, + "epoch": 0.6527246450735357, "grad_norm": 0.0, - "learning_rate": 5.659624844970901e-06, - "loss": 0.8921, + "learning_rate": 5.685986010555437e-06, + "loss": 0.8916, "step": 23034 }, { - "epoch": 0.6536606129398411, + "epoch": 0.6527529825157982, "grad_norm": 0.0, - "learning_rate": 5.658796870653896e-06, - "loss": 0.8382, + "learning_rate": 5.6851580358775445e-06, + "loss": 0.8247, "step": 23035 }, { - "epoch": 0.653688989784336, + "epoch": 0.6527813199580605, "grad_norm": 0.0, - "learning_rate": 5.6579689330072275e-06, - "loss": 0.7361, + "learning_rate": 5.684330097545174e-06, + "loss": 0.738, "step": 23036 }, { - "epoch": 0.6537173666288308, + "epoch": 0.652809657400323, "grad_norm": 0.0, - "learning_rate": 5.65714103203789e-06, - "loss": 0.7688, + "learning_rate": 5.683502195565289e-06, + "loss": 0.872, "step": 23037 }, { - "epoch": 0.6537457434733258, + "epoch": 0.6528379948425855, "grad_norm": 0.0, - "learning_rate": 5.656313167752882e-06, - "loss": 0.7251, + "learning_rate": 5.682674329944867e-06, + "loss": 0.917, "step": 23038 }, { - "epoch": 0.6537741203178207, + "epoch": 0.6528663322848479, "grad_norm": 0.0, - "learning_rate": 5.655485340159188e-06, - "loss": 0.7868, + "learning_rate": 5.681846500690884e-06, + "loss": 0.8312, "step": 23039 }, { - "epoch": 0.6538024971623155, + "epoch": 0.6528946697271104, "grad_norm": 0.0, - "learning_rate": 5.6546575492638045e-06, - "loss": 0.8444, + "learning_rate": 5.68101870781031e-06, + "loss": 0.8306, "step": 23040 }, { - "epoch": 0.6538308740068104, + "epoch": 0.6529230071693729, "grad_norm": 0.0, - "learning_rate": 5.653829795073729e-06, - "loss": 0.9377, + "learning_rate": 5.6801909513101235e-06, + "loss": 0.7673, "step": 23041 }, { - "epoch": 0.6538592508513054, + "epoch": 0.6529513446116354, "grad_norm": 0.0, - "learning_rate": 5.653002077595944e-06, - "loss": 0.9454, + "learning_rate": 5.679363231197289e-06, + "loss": 0.8614, "step": 23042 }, { - "epoch": 0.6538876276958002, + "epoch": 0.6529796820538978, "grad_norm": 0.0, - "learning_rate": 5.652174396837444e-06, - "loss": 0.8172, + "learning_rate": 5.678535547478783e-06, + "loss": 0.7861, "step": 23043 }, { - "epoch": 0.6539160045402951, + "epoch": 0.6530080194961603, "grad_norm": 0.0, - "learning_rate": 5.651346752805229e-06, - "loss": 0.8733, + "learning_rate": 5.67770790016158e-06, + "loss": 0.8464, "step": 23044 }, { - "epoch": 0.65394438138479, + "epoch": 0.6530363569384228, "grad_norm": 0.0, - "learning_rate": 5.650519145506278e-06, - "loss": 0.9438, + "learning_rate": 5.676880289252644e-06, + "loss": 0.8571, "step": 23045 }, { - "epoch": 0.6539727582292849, + "epoch": 0.6530646943806852, "grad_norm": 0.0, - "learning_rate": 5.649691574947588e-06, - "loss": 0.8466, + "learning_rate": 5.676052714758955e-06, + "loss": 0.9189, "step": 23046 }, { - "epoch": 0.6540011350737798, + "epoch": 0.6530930318229476, "grad_norm": 0.0, - "learning_rate": 5.648864041136153e-06, - "loss": 0.744, + "learning_rate": 5.675225176687477e-06, + "loss": 0.9058, "step": 23047 }, { - "epoch": 0.6540295119182747, + "epoch": 0.6531213692652101, "grad_norm": 0.0, - "learning_rate": 5.648036544078954e-06, - "loss": 0.9188, + "learning_rate": 5.674397675045182e-06, + "loss": 0.818, "step": 23048 }, { - "epoch": 0.6540578887627696, + "epoch": 0.6531497067074726, "grad_norm": 0.0, - "learning_rate": 5.647209083782986e-06, - "loss": 0.8593, + "learning_rate": 5.6735702098390454e-06, + "loss": 0.8049, "step": 23049 }, { - "epoch": 0.6540862656072645, + "epoch": 0.653178044149735, "grad_norm": 0.0, - "learning_rate": 5.646381660255242e-06, - "loss": 0.8424, + "learning_rate": 5.6727427810760305e-06, + "loss": 0.7722, "step": 23050 }, { - "epoch": 0.6541146424517593, + "epoch": 0.6532063815919975, "grad_norm": 0.0, - "learning_rate": 5.6455542735027024e-06, - "loss": 0.784, + "learning_rate": 5.67191538876311e-06, + "loss": 0.8562, "step": 23051 }, { - "epoch": 0.6541430192962543, + "epoch": 0.65323471903426, "grad_norm": 0.0, - "learning_rate": 5.644726923532368e-06, - "loss": 0.9277, + "learning_rate": 5.671088032907252e-06, + "loss": 0.8539, "step": 23052 }, { - "epoch": 0.6541713961407492, + "epoch": 0.6532630564765224, "grad_norm": 0.0, - "learning_rate": 5.643899610351217e-06, - "loss": 0.8657, + "learning_rate": 5.670260713515429e-06, + "loss": 0.7446, "step": 23053 }, { - "epoch": 0.654199772985244, + "epoch": 0.6532913939187849, "grad_norm": 0.0, - "learning_rate": 5.6430723339662415e-06, - "loss": 0.8657, + "learning_rate": 5.669433430594611e-06, + "loss": 0.9697, "step": 23054 }, { - "epoch": 0.654228149829739, + "epoch": 0.6533197313610474, "grad_norm": 0.0, - "learning_rate": 5.642245094384435e-06, - "loss": 0.7157, + "learning_rate": 5.6686061841517585e-06, + "loss": 0.831, "step": 23055 }, { - "epoch": 0.6542565266742338, + "epoch": 0.6533480688033099, "grad_norm": 0.0, - "learning_rate": 5.641417891612775e-06, - "loss": 0.8413, + "learning_rate": 5.667778974193845e-06, + "loss": 0.8784, "step": 23056 }, { - "epoch": 0.6542849035187287, + "epoch": 0.6533764062455722, "grad_norm": 0.0, - "learning_rate": 5.640590725658255e-06, - "loss": 0.9262, + "learning_rate": 5.6669518007278425e-06, + "loss": 0.8282, "step": 23057 }, { - "epoch": 0.6543132803632236, + "epoch": 0.6534047436878347, "grad_norm": 0.0, - "learning_rate": 5.639763596527863e-06, - "loss": 0.8102, + "learning_rate": 5.6661246637607085e-06, + "loss": 0.7471, "step": 23058 }, { - "epoch": 0.6543416572077185, + "epoch": 0.6534330811300972, "grad_norm": 0.0, - "learning_rate": 5.638936504228583e-06, - "loss": 0.8115, + "learning_rate": 5.6652975632994214e-06, + "loss": 0.89, "step": 23059 }, { - "epoch": 0.6543700340522134, + "epoch": 0.6534614185723596, "grad_norm": 0.0, - "learning_rate": 5.638109448767399e-06, - "loss": 0.9497, + "learning_rate": 5.664470499350938e-06, + "loss": 0.7586, "step": 23060 }, { - "epoch": 0.6543984108967082, + "epoch": 0.6534897560146221, "grad_norm": 0.0, - "learning_rate": 5.637282430151307e-06, - "loss": 0.9134, + "learning_rate": 5.663643471922229e-06, + "loss": 0.819, "step": 23061 }, { - "epoch": 0.6544267877412032, + "epoch": 0.6535180934568846, "grad_norm": 0.0, - "learning_rate": 5.636455448387282e-06, - "loss": 0.9431, + "learning_rate": 5.6628164810202655e-06, + "loss": 0.8713, "step": 23062 }, { - "epoch": 0.6544551645856981, + "epoch": 0.653546430899147, "grad_norm": 0.0, - "learning_rate": 5.635628503482313e-06, - "loss": 0.8102, + "learning_rate": 5.661989526652007e-06, + "loss": 0.8683, "step": 23063 }, { - "epoch": 0.6544835414301929, + "epoch": 0.6535747683414095, "grad_norm": 0.0, - "learning_rate": 5.634801595443391e-06, - "loss": 0.9539, + "learning_rate": 5.66116260882442e-06, + "loss": 0.8402, "step": 23064 }, { - "epoch": 0.6545119182746879, + "epoch": 0.653603105783672, "grad_norm": 0.0, - "learning_rate": 5.633974724277493e-06, - "loss": 0.772, + "learning_rate": 5.660335727544472e-06, + "loss": 0.8332, "step": 23065 }, { - "epoch": 0.6545402951191828, + "epoch": 0.6536314432259345, "grad_norm": 0.0, - "learning_rate": 5.633147889991606e-06, - "loss": 0.8847, + "learning_rate": 5.659508882819127e-06, + "loss": 0.891, "step": 23066 }, { - "epoch": 0.6545686719636776, + "epoch": 0.6536597806681969, "grad_norm": 0.0, - "learning_rate": 5.632321092592714e-06, - "loss": 0.9161, + "learning_rate": 5.658682074655355e-06, + "loss": 0.7632, "step": 23067 }, { - "epoch": 0.6545970488081725, + "epoch": 0.6536881181104593, "grad_norm": 0.0, - "learning_rate": 5.631494332087804e-06, - "loss": 0.8668, + "learning_rate": 5.657855303060112e-06, + "loss": 0.8176, "step": 23068 }, { - "epoch": 0.6546254256526675, + "epoch": 0.6537164555527218, "grad_norm": 0.0, - "learning_rate": 5.63066760848386e-06, - "loss": 0.8462, + "learning_rate": 5.657028568040366e-06, + "loss": 0.8459, "step": 23069 }, { - "epoch": 0.6546538024971623, + "epoch": 0.6537447929949842, "grad_norm": 0.0, - "learning_rate": 5.6298409217878605e-06, - "loss": 0.8665, + "learning_rate": 5.656201869603087e-06, + "loss": 0.8387, "step": 23070 }, { - "epoch": 0.6546821793416572, + "epoch": 0.6537731304372467, "grad_norm": 0.0, - "learning_rate": 5.62901427200679e-06, - "loss": 0.8321, + "learning_rate": 5.655375207755226e-06, + "loss": 0.8616, "step": 23071 }, { - "epoch": 0.6547105561861521, + "epoch": 0.6538014678795092, "grad_norm": 0.0, - "learning_rate": 5.628187659147637e-06, - "loss": 0.8833, + "learning_rate": 5.6545485825037534e-06, + "loss": 0.8383, "step": 23072 }, { - "epoch": 0.654738933030647, + "epoch": 0.6538298053217717, "grad_norm": 0.0, - "learning_rate": 5.627361083217375e-06, - "loss": 0.7435, + "learning_rate": 5.6537219938556365e-06, + "loss": 0.8196, "step": 23073 }, { - "epoch": 0.6547673098751419, + "epoch": 0.6538581427640341, "grad_norm": 0.0, - "learning_rate": 5.626534544222993e-06, - "loss": 0.8707, + "learning_rate": 5.652895441817829e-06, + "loss": 0.8119, "step": 23074 }, { - "epoch": 0.6547956867196367, + "epoch": 0.6538864802062966, "grad_norm": 0.0, - "learning_rate": 5.625708042171472e-06, - "loss": 0.9257, + "learning_rate": 5.6520689263973014e-06, + "loss": 0.8773, "step": 23075 }, { - "epoch": 0.6548240635641317, + "epoch": 0.6539148176485591, "grad_norm": 0.0, - "learning_rate": 5.624881577069788e-06, - "loss": 0.8516, + "learning_rate": 5.6512424476010085e-06, + "loss": 0.7895, "step": 23076 }, { - "epoch": 0.6548524404086266, + "epoch": 0.6539431550908215, "grad_norm": 0.0, - "learning_rate": 5.624055148924925e-06, - "loss": 0.9374, + "learning_rate": 5.650416005435915e-06, + "loss": 0.8454, "step": 23077 }, { - "epoch": 0.6548808172531214, + "epoch": 0.653971492533084, "grad_norm": 0.0, - "learning_rate": 5.62322875774387e-06, - "loss": 0.8942, + "learning_rate": 5.6495895999089826e-06, + "loss": 0.8744, "step": 23078 }, { - "epoch": 0.6549091940976164, + "epoch": 0.6539998299753464, "grad_norm": 0.0, - "learning_rate": 5.622402403533594e-06, - "loss": 0.9496, + "learning_rate": 5.648763231027171e-06, + "loss": 0.7627, "step": 23079 }, { - "epoch": 0.6549375709421112, + "epoch": 0.6540281674176089, "grad_norm": 0.0, - "learning_rate": 5.6215760863010825e-06, - "loss": 0.8294, + "learning_rate": 5.647936898797444e-06, + "loss": 0.9123, "step": 23080 }, { - "epoch": 0.6549659477866061, + "epoch": 0.6540565048598713, "grad_norm": 0.0, - "learning_rate": 5.620749806053318e-06, - "loss": 0.7496, + "learning_rate": 5.647110603226764e-06, + "loss": 0.8069, "step": 23081 }, { - "epoch": 0.6549943246311011, + "epoch": 0.6540848423021338, "grad_norm": 0.0, - "learning_rate": 5.61992356279727e-06, - "loss": 0.7905, + "learning_rate": 5.646284344322084e-06, + "loss": 0.8675, "step": 23082 }, { - "epoch": 0.6550227014755959, + "epoch": 0.6541131797443963, "grad_norm": 0.0, - "learning_rate": 5.619097356539931e-06, - "loss": 0.956, + "learning_rate": 5.645458122090372e-06, + "loss": 0.8132, "step": 23083 }, { - "epoch": 0.6550510783200908, + "epoch": 0.6541415171866587, "grad_norm": 0.0, - "learning_rate": 5.618271187288269e-06, - "loss": 0.9422, + "learning_rate": 5.644631936538578e-06, + "loss": 0.8503, "step": 23084 }, { - "epoch": 0.6550794551645857, + "epoch": 0.6541698546289212, "grad_norm": 0.0, - "learning_rate": 5.617445055049269e-06, - "loss": 0.7958, + "learning_rate": 5.643805787673668e-06, + "loss": 0.961, "step": 23085 }, { - "epoch": 0.6551078320090806, + "epoch": 0.6541981920711837, "grad_norm": 0.0, - "learning_rate": 5.616618959829911e-06, - "loss": 0.8466, + "learning_rate": 5.642979675502603e-06, + "loss": 0.8343, "step": 23086 }, { - "epoch": 0.6551362088535755, + "epoch": 0.6542265295134461, "grad_norm": 0.0, - "learning_rate": 5.6157929016371645e-06, - "loss": 0.7709, + "learning_rate": 5.642153600032334e-06, + "loss": 0.8542, "step": 23087 }, { - "epoch": 0.6551645856980703, + "epoch": 0.6542548669557086, "grad_norm": 0.0, - "learning_rate": 5.614966880478014e-06, - "loss": 0.7476, + "learning_rate": 5.641327561269828e-06, + "loss": 0.8353, "step": 23088 }, { - "epoch": 0.6551929625425653, + "epoch": 0.654283204397971, "grad_norm": 0.0, - "learning_rate": 5.614140896359439e-06, - "loss": 0.8837, + "learning_rate": 5.640501559222034e-06, + "loss": 0.8341, "step": 23089 }, { - "epoch": 0.6552213393870602, + "epoch": 0.6543115418402335, "grad_norm": 0.0, - "learning_rate": 5.613314949288409e-06, - "loss": 0.8557, + "learning_rate": 5.639675593895915e-06, + "loss": 0.7728, "step": 23090 }, { - "epoch": 0.655249716231555, + "epoch": 0.6543398792824959, "grad_norm": 0.0, - "learning_rate": 5.612489039271904e-06, - "loss": 0.9433, + "learning_rate": 5.638849665298427e-06, + "loss": 0.8019, "step": 23091 }, { - "epoch": 0.6552780930760499, + "epoch": 0.6543682167247584, "grad_norm": 0.0, - "learning_rate": 5.611663166316908e-06, - "loss": 0.8189, + "learning_rate": 5.6380237734365275e-06, + "loss": 0.8392, "step": 23092 }, { - "epoch": 0.6553064699205449, + "epoch": 0.6543965541670209, "grad_norm": 0.0, - "learning_rate": 5.610837330430383e-06, - "loss": 0.7764, + "learning_rate": 5.637197918317174e-06, + "loss": 0.7889, "step": 23093 }, { - "epoch": 0.6553348467650397, + "epoch": 0.6544248916092833, "grad_norm": 0.0, - "learning_rate": 5.610011531619315e-06, - "loss": 0.7304, + "learning_rate": 5.636372099947327e-06, + "loss": 0.8005, "step": 23094 }, { - "epoch": 0.6553632236095346, + "epoch": 0.6544532290515458, "grad_norm": 0.0, - "learning_rate": 5.60918576989068e-06, - "loss": 0.864, + "learning_rate": 5.635546318333933e-06, + "loss": 0.8816, "step": 23095 }, { - "epoch": 0.6553916004540296, + "epoch": 0.6544815664938083, "grad_norm": 0.0, - "learning_rate": 5.608360045251445e-06, - "loss": 0.9005, + "learning_rate": 5.634720573483957e-06, + "loss": 0.9515, "step": 23096 }, { - "epoch": 0.6554199772985244, + "epoch": 0.6545099039360708, "grad_norm": 0.0, - "learning_rate": 5.607534357708591e-06, - "loss": 0.8665, + "learning_rate": 5.633894865404348e-06, + "loss": 0.9091, "step": 23097 }, { - "epoch": 0.6554483541430193, + "epoch": 0.6545382413783332, "grad_norm": 0.0, - "learning_rate": 5.606708707269093e-06, - "loss": 0.7354, + "learning_rate": 5.633069194102063e-06, + "loss": 0.6696, "step": 23098 }, { - "epoch": 0.6554767309875141, + "epoch": 0.6545665788205957, "grad_norm": 0.0, - "learning_rate": 5.605883093939922e-06, - "loss": 0.7336, + "learning_rate": 5.632243559584061e-06, + "loss": 0.8004, "step": 23099 }, { - "epoch": 0.6555051078320091, + "epoch": 0.6545949162628582, "grad_norm": 0.0, - "learning_rate": 5.6050575177280586e-06, - "loss": 0.812, + "learning_rate": 5.631417961857291e-06, + "loss": 0.7814, "step": 23100 }, { - "epoch": 0.655533484676504, + "epoch": 0.6546232537051205, "grad_norm": 0.0, - "learning_rate": 5.604231978640469e-06, - "loss": 0.8748, + "learning_rate": 5.630592400928709e-06, + "loss": 0.8404, "step": 23101 }, { - "epoch": 0.6555618615209988, + "epoch": 0.654651591147383, "grad_norm": 0.0, - "learning_rate": 5.603406476684129e-06, - "loss": 0.7516, + "learning_rate": 5.629766876805274e-06, + "loss": 0.9209, "step": 23102 }, { - "epoch": 0.6555902383654938, + "epoch": 0.6546799285896455, "grad_norm": 0.0, - "learning_rate": 5.602581011866015e-06, - "loss": 0.82, + "learning_rate": 5.628941389493932e-06, + "loss": 0.8408, "step": 23103 }, { - "epoch": 0.6556186152099887, + "epoch": 0.654708266031908, "grad_norm": 0.0, - "learning_rate": 5.601755584193094e-06, - "loss": 0.9094, + "learning_rate": 5.628115939001637e-06, + "loss": 0.8176, "step": 23104 }, { - "epoch": 0.6556469920544835, + "epoch": 0.6547366034741704, "grad_norm": 0.0, - "learning_rate": 5.6009301936723415e-06, - "loss": 0.7922, + "learning_rate": 5.627290525335347e-06, + "loss": 0.8541, "step": 23105 }, { - "epoch": 0.6556753688989785, + "epoch": 0.6547649409164329, "grad_norm": 0.0, - "learning_rate": 5.600104840310733e-06, - "loss": 0.8448, + "learning_rate": 5.626465148502014e-06, + "loss": 0.8644, "step": 23106 }, { - "epoch": 0.6557037457434733, + "epoch": 0.6547932783586954, "grad_norm": 0.0, - "learning_rate": 5.5992795241152335e-06, - "loss": 0.8294, + "learning_rate": 5.625639808508592e-06, + "loss": 0.887, "step": 23107 }, { - "epoch": 0.6557321225879682, + "epoch": 0.6548216158009578, "grad_norm": 0.0, - "learning_rate": 5.598454245092816e-06, - "loss": 0.8749, + "learning_rate": 5.624814505362025e-06, + "loss": 0.9205, "step": 23108 }, { - "epoch": 0.6557604994324631, + "epoch": 0.6548499532432203, "grad_norm": 0.0, - "learning_rate": 5.597629003250457e-06, - "loss": 0.8482, + "learning_rate": 5.623989239069275e-06, + "loss": 0.8113, "step": 23109 }, { - "epoch": 0.655788876276958, + "epoch": 0.6548782906854828, "grad_norm": 0.0, - "learning_rate": 5.596803798595123e-06, - "loss": 0.7824, + "learning_rate": 5.6231640096372856e-06, + "loss": 0.8967, "step": 23110 }, { - "epoch": 0.6558172531214529, + "epoch": 0.6549066281277451, "grad_norm": 0.0, - "learning_rate": 5.5959786311337825e-06, - "loss": 0.9406, + "learning_rate": 5.62233881707301e-06, + "loss": 0.8086, "step": 23111 }, { - "epoch": 0.6558456299659478, + "epoch": 0.6549349655700076, "grad_norm": 0.0, - "learning_rate": 5.5951535008734135e-06, - "loss": 0.6753, + "learning_rate": 5.621513661383404e-06, + "loss": 0.8723, "step": 23112 }, { - "epoch": 0.6558740068104427, + "epoch": 0.6549633030122701, "grad_norm": 0.0, - "learning_rate": 5.5943284078209725e-06, - "loss": 0.8263, + "learning_rate": 5.62068854257541e-06, + "loss": 0.7288, "step": 23113 }, { - "epoch": 0.6559023836549376, + "epoch": 0.6549916404545326, "grad_norm": 0.0, - "learning_rate": 5.5935033519834415e-06, - "loss": 0.6933, + "learning_rate": 5.619863460655983e-06, + "loss": 0.7469, "step": 23114 }, { - "epoch": 0.6559307604994324, + "epoch": 0.655019977896795, "grad_norm": 0.0, - "learning_rate": 5.592678333367789e-06, - "loss": 0.8845, + "learning_rate": 5.619038415632078e-06, + "loss": 0.805, "step": 23115 }, { - "epoch": 0.6559591373439273, + "epoch": 0.6550483153390575, "grad_norm": 0.0, - "learning_rate": 5.591853351980978e-06, - "loss": 0.884, + "learning_rate": 5.618213407510633e-06, + "loss": 0.7984, "step": 23116 }, { - "epoch": 0.6559875141884223, + "epoch": 0.65507665278132, "grad_norm": 0.0, - "learning_rate": 5.591028407829979e-06, - "loss": 0.7694, + "learning_rate": 5.617388436298605e-06, + "loss": 0.8368, "step": 23117 }, { - "epoch": 0.6560158910329171, + "epoch": 0.6551049902235824, "grad_norm": 0.0, - "learning_rate": 5.590203500921767e-06, - "loss": 0.8447, + "learning_rate": 5.616563502002942e-06, + "loss": 0.7711, "step": 23118 }, { - "epoch": 0.656044267877412, + "epoch": 0.6551333276658449, "grad_norm": 0.0, - "learning_rate": 5.589378631263298e-06, - "loss": 0.8097, + "learning_rate": 5.615738604630592e-06, + "loss": 0.8075, "step": 23119 }, { - "epoch": 0.656072644721907, + "epoch": 0.6551616651081074, "grad_norm": 0.0, - "learning_rate": 5.5885537988615465e-06, - "loss": 0.7938, + "learning_rate": 5.614913744188508e-06, + "loss": 0.8084, "step": 23120 }, { - "epoch": 0.6561010215664018, + "epoch": 0.6551900025503699, "grad_norm": 0.0, - "learning_rate": 5.587729003723484e-06, - "loss": 0.8471, + "learning_rate": 5.614088920683629e-06, + "loss": 0.7842, "step": 23121 }, { - "epoch": 0.6561293984108967, + "epoch": 0.6552183399926322, "grad_norm": 0.0, - "learning_rate": 5.586904245856068e-06, + "learning_rate": 5.6132641341229085e-06, "loss": 0.8065, "step": 23122 }, { - "epoch": 0.6561577752553917, + "epoch": 0.6552466774348947, "grad_norm": 0.0, - "learning_rate": 5.586079525266276e-06, - "loss": 1.0267, + "learning_rate": 5.6124393845132964e-06, + "loss": 0.8434, "step": 23123 }, { - "epoch": 0.6561861520998865, + "epoch": 0.6552750148771572, "grad_norm": 0.0, - "learning_rate": 5.585254841961063e-06, - "loss": 0.8212, + "learning_rate": 5.611614671861733e-06, + "loss": 0.819, "step": 23124 }, { - "epoch": 0.6562145289443814, + "epoch": 0.6553033523194196, "grad_norm": 0.0, - "learning_rate": 5.584430195947402e-06, - "loss": 0.7854, + "learning_rate": 5.610789996175172e-06, + "loss": 0.8412, "step": 23125 }, { - "epoch": 0.6562429057888762, + "epoch": 0.6553316897616821, "grad_norm": 0.0, - "learning_rate": 5.583605587232262e-06, - "loss": 0.8302, + "learning_rate": 5.609965357460554e-06, + "loss": 0.9042, "step": 23126 }, { - "epoch": 0.6562712826333712, + "epoch": 0.6553600272039446, "grad_norm": 0.0, - "learning_rate": 5.582781015822598e-06, - "loss": 0.8235, + "learning_rate": 5.609140755724829e-06, + "loss": 0.9582, "step": 23127 }, { - "epoch": 0.6562996594778661, + "epoch": 0.655388364646207, "grad_norm": 0.0, - "learning_rate": 5.581956481725379e-06, - "loss": 0.8782, + "learning_rate": 5.608316190974944e-06, + "loss": 0.8207, "step": 23128 }, { - "epoch": 0.6563280363223609, + "epoch": 0.6554167020884695, "grad_norm": 0.0, - "learning_rate": 5.58113198494758e-06, - "loss": 0.8962, + "learning_rate": 5.607491663217839e-06, + "loss": 0.9238, "step": 23129 }, { - "epoch": 0.6563564131668559, + "epoch": 0.655445039530732, "grad_norm": 0.0, - "learning_rate": 5.580307525496154e-06, - "loss": 0.7436, + "learning_rate": 5.606667172460462e-06, + "loss": 0.7588, "step": 23130 }, { - "epoch": 0.6563847900113507, + "epoch": 0.6554733769729945, "grad_norm": 0.0, - "learning_rate": 5.579483103378067e-06, - "loss": 0.901, + "learning_rate": 5.605842718709759e-06, + "loss": 0.7971, "step": 23131 }, { - "epoch": 0.6564131668558456, + "epoch": 0.6555017144152568, "grad_norm": 0.0, - "learning_rate": 5.578658718600291e-06, - "loss": 0.8361, + "learning_rate": 5.605018301972677e-06, + "loss": 0.891, "step": 23132 }, { - "epoch": 0.6564415437003405, + "epoch": 0.6555300518575193, "grad_norm": 0.0, - "learning_rate": 5.577834371169779e-06, - "loss": 0.837, + "learning_rate": 5.60419392225616e-06, + "loss": 0.8469, "step": 23133 }, { - "epoch": 0.6564699205448354, + "epoch": 0.6555583892997818, "grad_norm": 0.0, - "learning_rate": 5.5770100610935e-06, - "loss": 0.9694, + "learning_rate": 5.6033695795671465e-06, + "loss": 0.91, "step": 23134 }, { - "epoch": 0.6564982973893303, + "epoch": 0.6555867267420442, "grad_norm": 0.0, - "learning_rate": 5.576185788378418e-06, - "loss": 0.7487, + "learning_rate": 5.602545273912583e-06, + "loss": 0.7542, "step": 23135 }, { - "epoch": 0.6565266742338252, + "epoch": 0.6556150641843067, "grad_norm": 0.0, - "learning_rate": 5.57536155303149e-06, - "loss": 0.8847, + "learning_rate": 5.601721005299418e-06, + "loss": 0.887, "step": 23136 }, { - "epoch": 0.6565550510783201, + "epoch": 0.6556434016265692, "grad_norm": 0.0, - "learning_rate": 5.574537355059682e-06, - "loss": 0.7939, + "learning_rate": 5.600896773734585e-06, + "loss": 0.9162, "step": 23137 }, { - "epoch": 0.656583427922815, + "epoch": 0.6556717390688317, "grad_norm": 0.0, - "learning_rate": 5.573713194469961e-06, - "loss": 0.7648, + "learning_rate": 5.600072579225038e-06, + "loss": 0.8535, "step": 23138 }, { - "epoch": 0.6566118047673098, + "epoch": 0.6557000765110941, "grad_norm": 0.0, - "learning_rate": 5.572889071269279e-06, - "loss": 0.8782, + "learning_rate": 5.5992484217777074e-06, + "loss": 0.8609, "step": 23139 }, { - "epoch": 0.6566401816118048, + "epoch": 0.6557284139533566, "grad_norm": 0.0, - "learning_rate": 5.5720649854646e-06, - "loss": 0.8937, + "learning_rate": 5.598424301399543e-06, + "loss": 0.8788, "step": 23140 }, { - "epoch": 0.6566685584562997, + "epoch": 0.6557567513956191, "grad_norm": 0.0, - "learning_rate": 5.571240937062893e-06, - "loss": 0.8658, + "learning_rate": 5.597600218097489e-06, + "loss": 0.909, "step": 23141 }, { - "epoch": 0.6566969353007945, + "epoch": 0.6557850888378814, "grad_norm": 0.0, - "learning_rate": 5.570416926071109e-06, - "loss": 0.9265, + "learning_rate": 5.5967761718784795e-06, + "loss": 0.8413, "step": 23142 }, { - "epoch": 0.6567253121452894, + "epoch": 0.6558134262801439, "grad_norm": 0.0, - "learning_rate": 5.569592952496212e-06, - "loss": 0.7489, + "learning_rate": 5.595952162749459e-06, + "loss": 0.7628, "step": 23143 }, { - "epoch": 0.6567536889897844, + "epoch": 0.6558417637224064, "grad_norm": 0.0, - "learning_rate": 5.568769016345163e-06, - "loss": 0.8845, + "learning_rate": 5.595128190717369e-06, + "loss": 0.8969, "step": 23144 }, { - "epoch": 0.6567820658342792, + "epoch": 0.6558701011646689, "grad_norm": 0.0, - "learning_rate": 5.56794511762492e-06, - "loss": 0.7537, + "learning_rate": 5.59430425578915e-06, + "loss": 0.9005, "step": 23145 }, { - "epoch": 0.6568104426787741, + "epoch": 0.6558984386069313, "grad_norm": 0.0, - "learning_rate": 5.567121256342449e-06, - "loss": 0.7813, + "learning_rate": 5.5934803579717455e-06, + "loss": 0.7923, "step": 23146 }, { - "epoch": 0.6568388195232691, + "epoch": 0.6559267760491938, "grad_norm": 0.0, - "learning_rate": 5.566297432504699e-06, - "loss": 0.8355, + "learning_rate": 5.59265649727209e-06, + "loss": 0.8592, "step": 23147 }, { - "epoch": 0.6568671963677639, + "epoch": 0.6559551134914563, "grad_norm": 0.0, - "learning_rate": 5.565473646118635e-06, - "loss": 0.8256, + "learning_rate": 5.591832673697124e-06, + "loss": 0.8652, "step": 23148 }, { - "epoch": 0.6568955732122588, + "epoch": 0.6559834509337187, "grad_norm": 0.0, - "learning_rate": 5.564649897191218e-06, - "loss": 0.7393, + "learning_rate": 5.591008887253792e-06, + "loss": 0.7237, "step": 23149 }, { - "epoch": 0.6569239500567536, + "epoch": 0.6560117883759812, "grad_norm": 0.0, - "learning_rate": 5.563826185729398e-06, - "loss": 0.6977, + "learning_rate": 5.590185137949027e-06, + "loss": 0.8219, "step": 23150 }, { - "epoch": 0.6569523269012486, + "epoch": 0.6560401258182437, "grad_norm": 0.0, - "learning_rate": 5.5630025117401385e-06, - "loss": 0.9395, + "learning_rate": 5.589361425789769e-06, + "loss": 0.8213, "step": 23151 }, { - "epoch": 0.6569807037457435, + "epoch": 0.656068463260506, "grad_norm": 0.0, - "learning_rate": 5.562178875230401e-06, - "loss": 0.8932, + "learning_rate": 5.588537750782961e-06, + "loss": 0.6799, "step": 23152 }, { - "epoch": 0.6570090805902383, + "epoch": 0.6560968007027685, "grad_norm": 0.0, - "learning_rate": 5.561355276207132e-06, - "loss": 0.9515, + "learning_rate": 5.587714112935535e-06, + "loss": 0.7655, "step": 23153 }, { - "epoch": 0.6570374574347333, + "epoch": 0.656125138145031, "grad_norm": 0.0, - "learning_rate": 5.560531714677296e-06, - "loss": 0.8303, + "learning_rate": 5.5868905122544344e-06, + "loss": 0.8544, "step": 23154 }, { - "epoch": 0.6570658342792282, + "epoch": 0.6561534755872935, "grad_norm": 0.0, - "learning_rate": 5.559708190647852e-06, - "loss": 0.8294, + "learning_rate": 5.5860669487465915e-06, + "loss": 0.8286, "step": 23155 }, { - "epoch": 0.657094211123723, + "epoch": 0.6561818130295559, "grad_norm": 0.0, - "learning_rate": 5.558884704125749e-06, - "loss": 0.8998, + "learning_rate": 5.585243422418945e-06, + "loss": 0.738, "step": 23156 }, { - "epoch": 0.657122587968218, + "epoch": 0.6562101504718184, "grad_norm": 0.0, - "learning_rate": 5.558061255117946e-06, - "loss": 0.8887, + "learning_rate": 5.584419933278431e-06, + "loss": 0.8212, "step": 23157 }, { - "epoch": 0.6571509648127128, + "epoch": 0.6562384879140809, "grad_norm": 0.0, - "learning_rate": 5.557237843631402e-06, - "loss": 0.845, + "learning_rate": 5.583596481331987e-06, + "loss": 0.7737, "step": 23158 }, { - "epoch": 0.6571793416572077, + "epoch": 0.6562668253563433, "grad_norm": 0.0, - "learning_rate": 5.556414469673064e-06, - "loss": 0.876, + "learning_rate": 5.582773066586553e-06, + "loss": 0.703, "step": 23159 }, { - "epoch": 0.6572077185017026, + "epoch": 0.6562951627986058, "grad_norm": 0.0, - "learning_rate": 5.555591133249898e-06, - "loss": 0.8512, + "learning_rate": 5.581949689049064e-06, + "loss": 0.8953, "step": 23160 }, { - "epoch": 0.6572360953461975, + "epoch": 0.6563235002408683, "grad_norm": 0.0, - "learning_rate": 5.554767834368851e-06, - "loss": 0.789, + "learning_rate": 5.58112634872645e-06, + "loss": 0.8868, "step": 23161 }, { - "epoch": 0.6572644721906924, + "epoch": 0.6563518376831308, "grad_norm": 0.0, - "learning_rate": 5.55394457303688e-06, - "loss": 0.8524, + "learning_rate": 5.580303045625652e-06, + "loss": 0.7894, "step": 23162 }, { - "epoch": 0.6572928490351873, + "epoch": 0.6563801751253932, "grad_norm": 0.0, - "learning_rate": 5.553121349260943e-06, - "loss": 0.8708, + "learning_rate": 5.5794797797536e-06, + "loss": 0.8148, "step": 23163 }, { - "epoch": 0.6573212258796822, + "epoch": 0.6564085125676556, "grad_norm": 0.0, - "learning_rate": 5.5522981630479865e-06, - "loss": 0.895, + "learning_rate": 5.578656551117232e-06, + "loss": 0.8071, "step": 23164 }, { - "epoch": 0.6573496027241771, + "epoch": 0.6564368500099181, "grad_norm": 0.0, - "learning_rate": 5.551475014404966e-06, - "loss": 0.8634, + "learning_rate": 5.577833359723485e-06, + "loss": 0.9266, "step": 23165 }, { - "epoch": 0.6573779795686719, + "epoch": 0.6564651874521805, "grad_norm": 0.0, - "learning_rate": 5.550651903338842e-06, - "loss": 0.9642, + "learning_rate": 5.577010205579285e-06, + "loss": 0.7932, "step": 23166 }, { - "epoch": 0.6574063564131668, + "epoch": 0.656493524894443, "grad_norm": 0.0, - "learning_rate": 5.549828829856555e-06, - "loss": 0.7637, + "learning_rate": 5.576187088691576e-06, + "loss": 0.8637, "step": 23167 }, { - "epoch": 0.6574347332576618, + "epoch": 0.6565218623367055, "grad_norm": 0.0, - "learning_rate": 5.549005793965065e-06, - "loss": 0.9725, + "learning_rate": 5.575364009067281e-06, + "loss": 0.82, "step": 23168 }, { - "epoch": 0.6574631101021566, + "epoch": 0.656550199778968, "grad_norm": 0.0, - "learning_rate": 5.548182795671328e-06, - "loss": 0.8324, + "learning_rate": 5.574540966713338e-06, + "loss": 0.6781, "step": 23169 }, { - "epoch": 0.6574914869466515, + "epoch": 0.6565785372212304, "grad_norm": 0.0, - "learning_rate": 5.547359834982286e-06, - "loss": 0.7942, + "learning_rate": 5.57371796163668e-06, + "loss": 0.7947, "step": 23170 }, { - "epoch": 0.6575198637911465, + "epoch": 0.6566068746634929, "grad_norm": 0.0, - "learning_rate": 5.546536911904896e-06, - "loss": 0.8253, + "learning_rate": 5.57289499384424e-06, + "loss": 0.8794, "step": 23171 }, { - "epoch": 0.6575482406356413, + "epoch": 0.6566352121057554, "grad_norm": 0.0, - "learning_rate": 5.545714026446112e-06, - "loss": 0.8732, + "learning_rate": 5.572072063342948e-06, + "loss": 0.7768, "step": 23172 }, { - "epoch": 0.6575766174801362, + "epoch": 0.6566635495480178, "grad_norm": 0.0, - "learning_rate": 5.544891178612879e-06, - "loss": 0.8623, + "learning_rate": 5.571249170139742e-06, + "loss": 0.8528, "step": 23173 }, { - "epoch": 0.6576049943246312, + "epoch": 0.6566918869902802, "grad_norm": 0.0, - "learning_rate": 5.54406836841215e-06, - "loss": 0.8814, + "learning_rate": 5.570426314241544e-06, + "loss": 0.8896, "step": 23174 }, { - "epoch": 0.657633371169126, + "epoch": 0.6567202244325427, "grad_norm": 0.0, - "learning_rate": 5.543245595850876e-06, - "loss": 0.8238, + "learning_rate": 5.569603495655296e-06, + "loss": 0.8723, "step": 23175 }, { - "epoch": 0.6576617480136209, + "epoch": 0.6567485618748051, "grad_norm": 0.0, - "learning_rate": 5.542422860936006e-06, - "loss": 0.7987, + "learning_rate": 5.568780714387917e-06, + "loss": 0.9019, "step": 23176 }, { - "epoch": 0.6576901248581157, + "epoch": 0.6567768993170676, "grad_norm": 0.0, - "learning_rate": 5.541600163674497e-06, - "loss": 0.751, + "learning_rate": 5.567957970446344e-06, + "loss": 0.7492, "step": 23177 }, { - "epoch": 0.6577185017026107, + "epoch": 0.6568052367593301, "grad_norm": 0.0, - "learning_rate": 5.5407775040732845e-06, - "loss": 0.7821, + "learning_rate": 5.567135263837511e-06, + "loss": 0.8448, "step": 23178 }, { - "epoch": 0.6577468785471056, + "epoch": 0.6568335742015926, "grad_norm": 0.0, - "learning_rate": 5.539954882139327e-06, - "loss": 0.9067, + "learning_rate": 5.56631259456834e-06, + "loss": 0.9129, "step": 23179 }, { - "epoch": 0.6577752553916004, + "epoch": 0.656861911643855, "grad_norm": 0.0, - "learning_rate": 5.539132297879575e-06, - "loss": 0.9527, + "learning_rate": 5.5654899626457645e-06, + "loss": 0.9888, "step": 23180 }, { - "epoch": 0.6578036322360954, + "epoch": 0.6568902490861175, "grad_norm": 0.0, - "learning_rate": 5.538309751300967e-06, - "loss": 0.9691, + "learning_rate": 5.564667368076717e-06, + "loss": 0.8492, "step": 23181 }, { - "epoch": 0.6578320090805903, + "epoch": 0.65691858652838, "grad_norm": 0.0, - "learning_rate": 5.5374872424104595e-06, - "loss": 0.8539, + "learning_rate": 5.5638448108681195e-06, + "loss": 0.8794, "step": 23182 }, { - "epoch": 0.6578603859250851, + "epoch": 0.6569469239706424, "grad_norm": 0.0, - "learning_rate": 5.536664771215002e-06, - "loss": 0.8567, + "learning_rate": 5.563022291026905e-06, + "loss": 0.9305, "step": 23183 }, { - "epoch": 0.65788876276958, + "epoch": 0.6569752614129049, "grad_norm": 0.0, - "learning_rate": 5.535842337721531e-06, - "loss": 0.7775, + "learning_rate": 5.562199808560001e-06, + "loss": 0.8692, "step": 23184 }, { - "epoch": 0.6579171396140749, + "epoch": 0.6570035988551673, "grad_norm": 0.0, - "learning_rate": 5.535019941937003e-06, - "loss": 0.8828, + "learning_rate": 5.561377363474336e-06, + "loss": 0.9046, "step": 23185 }, { - "epoch": 0.6579455164585698, + "epoch": 0.6570319362974298, "grad_norm": 0.0, - "learning_rate": 5.534197583868366e-06, - "loss": 0.7823, + "learning_rate": 5.560554955776843e-06, + "loss": 0.9234, "step": 23186 }, { - "epoch": 0.6579738933030647, + "epoch": 0.6570602737396922, "grad_norm": 0.0, - "learning_rate": 5.533375263522558e-06, - "loss": 0.8758, + "learning_rate": 5.559732585474438e-06, + "loss": 0.9326, "step": 23187 }, { - "epoch": 0.6580022701475596, + "epoch": 0.6570886111819547, "grad_norm": 0.0, - "learning_rate": 5.532552980906529e-06, - "loss": 0.7599, + "learning_rate": 5.558910252574054e-06, + "loss": 0.9013, "step": 23188 }, { - "epoch": 0.6580306469920545, + "epoch": 0.6571169486242172, "grad_norm": 0.0, - "learning_rate": 5.5317307360272275e-06, - "loss": 0.7431, + "learning_rate": 5.558087957082624e-06, + "loss": 0.9432, "step": 23189 }, { - "epoch": 0.6580590238365494, + "epoch": 0.6571452860664796, "grad_norm": 0.0, - "learning_rate": 5.5309085288915965e-06, - "loss": 0.8134, + "learning_rate": 5.557265699007064e-06, + "loss": 0.8482, "step": 23190 }, { - "epoch": 0.6580874006810443, + "epoch": 0.6571736235087421, "grad_norm": 0.0, - "learning_rate": 5.530086359506587e-06, - "loss": 0.8077, + "learning_rate": 5.556443478354309e-06, + "loss": 0.8298, "step": 23191 }, { - "epoch": 0.6581157775255392, + "epoch": 0.6572019609510046, "grad_norm": 0.0, - "learning_rate": 5.529264227879134e-06, - "loss": 0.8443, + "learning_rate": 5.555621295131276e-06, + "loss": 0.8527, "step": 23192 }, { - "epoch": 0.658144154370034, + "epoch": 0.6572302983932671, "grad_norm": 0.0, - "learning_rate": 5.528442134016188e-06, - "loss": 0.8318, + "learning_rate": 5.554799149344896e-06, + "loss": 0.7162, "step": 23193 }, { - "epoch": 0.6581725312145289, + "epoch": 0.6572586358355295, "grad_norm": 0.0, - "learning_rate": 5.527620077924696e-06, - "loss": 0.7833, + "learning_rate": 5.553977041002097e-06, + "loss": 0.8598, "step": 23194 }, { - "epoch": 0.6582009080590239, + "epoch": 0.657286973277792, "grad_norm": 0.0, - "learning_rate": 5.526798059611593e-06, - "loss": 0.8751, + "learning_rate": 5.553154970109795e-06, + "loss": 0.8951, "step": 23195 }, { - "epoch": 0.6582292849035187, + "epoch": 0.6573153107200544, "grad_norm": 0.0, - "learning_rate": 5.5259760790838294e-06, - "loss": 0.9629, + "learning_rate": 5.552332936674922e-06, + "loss": 0.8578, "step": 23196 }, { - "epoch": 0.6582576617480136, + "epoch": 0.6573436481623168, "grad_norm": 0.0, - "learning_rate": 5.52515413634835e-06, - "loss": 0.6862, + "learning_rate": 5.5515109407044e-06, + "loss": 0.7899, "step": 23197 }, { - "epoch": 0.6582860385925086, + "epoch": 0.6573719856045793, "grad_norm": 0.0, - "learning_rate": 5.52433223141209e-06, - "loss": 0.8814, + "learning_rate": 5.550688982205152e-06, + "loss": 0.7688, "step": 23198 }, { - "epoch": 0.6583144154370034, + "epoch": 0.6574003230468418, "grad_norm": 0.0, - "learning_rate": 5.523510364281999e-06, - "loss": 0.8609, + "learning_rate": 5.549867061184108e-06, + "loss": 0.7832, "step": 23199 }, { - "epoch": 0.6583427922814983, + "epoch": 0.6574286604891042, "grad_norm": 0.0, - "learning_rate": 5.52268853496502e-06, - "loss": 0.839, + "learning_rate": 5.549045177648182e-06, + "loss": 0.866, "step": 23200 }, { - "epoch": 0.6583711691259931, + "epoch": 0.6574569979313667, "grad_norm": 0.0, - "learning_rate": 5.52186674346809e-06, - "loss": 0.8906, + "learning_rate": 5.548223331604302e-06, + "loss": 0.6911, "step": 23201 }, { - "epoch": 0.6583995459704881, + "epoch": 0.6574853353736292, "grad_norm": 0.0, - "learning_rate": 5.52104498979815e-06, - "loss": 0.8653, + "learning_rate": 5.547401523059393e-06, + "loss": 0.698, "step": 23202 }, { - "epoch": 0.658427922814983, + "epoch": 0.6575136728158917, "grad_norm": 0.0, - "learning_rate": 5.52022327396215e-06, - "loss": 0.9359, + "learning_rate": 5.546579752020371e-06, + "loss": 0.9396, "step": 23203 }, { - "epoch": 0.6584562996594778, + "epoch": 0.6575420102581541, "grad_norm": 0.0, - "learning_rate": 5.519401595967021e-06, - "loss": 0.9284, + "learning_rate": 5.545758018494165e-06, + "loss": 0.8969, "step": 23204 }, { - "epoch": 0.6584846765039728, + "epoch": 0.6575703477004166, "grad_norm": 0.0, - "learning_rate": 5.518579955819707e-06, - "loss": 0.7828, + "learning_rate": 5.5449363224876905e-06, + "loss": 0.8533, "step": 23205 }, { - "epoch": 0.6585130533484677, + "epoch": 0.657598685142679, "grad_norm": 0.0, - "learning_rate": 5.517758353527152e-06, - "loss": 0.8615, + "learning_rate": 5.5441146640078705e-06, + "loss": 0.8432, "step": 23206 }, { - "epoch": 0.6585414301929625, + "epoch": 0.6576270225849414, "grad_norm": 0.0, - "learning_rate": 5.516936789096292e-06, - "loss": 0.8117, + "learning_rate": 5.54329304306163e-06, + "loss": 0.8607, "step": 23207 }, { - "epoch": 0.6585698070374575, + "epoch": 0.6576553600272039, "grad_norm": 0.0, - "learning_rate": 5.516115262534073e-06, - "loss": 0.8958, + "learning_rate": 5.542471459655886e-06, + "loss": 0.7587, "step": 23208 }, { - "epoch": 0.6585981838819523, + "epoch": 0.6576836974694664, "grad_norm": 0.0, - "learning_rate": 5.515293773847426e-06, - "loss": 0.8684, + "learning_rate": 5.541649913797559e-06, + "loss": 0.8338, "step": 23209 }, { - "epoch": 0.6586265607264472, + "epoch": 0.6577120349117289, "grad_norm": 0.0, - "learning_rate": 5.5144723230432935e-06, - "loss": 0.7311, + "learning_rate": 5.54082840549357e-06, + "loss": 0.8899, "step": 23210 }, { - "epoch": 0.6586549375709421, + "epoch": 0.6577403723539913, "grad_norm": 0.0, - "learning_rate": 5.51365091012862e-06, - "loss": 0.8837, + "learning_rate": 5.540006934750839e-06, + "loss": 0.824, "step": 23211 }, { - "epoch": 0.658683314415437, + "epoch": 0.6577687097962538, "grad_norm": 0.0, - "learning_rate": 5.512829535110334e-06, - "loss": 0.8369, + "learning_rate": 5.539185501576291e-06, + "loss": 0.8142, "step": 23212 }, { - "epoch": 0.6587116912599319, + "epoch": 0.6577970472385163, "grad_norm": 0.0, - "learning_rate": 5.512008197995379e-06, - "loss": 0.7863, + "learning_rate": 5.538364105976835e-06, + "loss": 0.8943, "step": 23213 }, { - "epoch": 0.6587400681044268, + "epoch": 0.6578253846807787, "grad_norm": 0.0, - "learning_rate": 5.511186898790697e-06, - "loss": 0.7865, + "learning_rate": 5.5375427479593945e-06, + "loss": 0.837, "step": 23214 }, { - "epoch": 0.6587684449489217, + "epoch": 0.6578537221230412, "grad_norm": 0.0, - "learning_rate": 5.510365637503216e-06, - "loss": 0.8059, + "learning_rate": 5.536721427530894e-06, + "loss": 0.911, "step": 23215 }, { - "epoch": 0.6587968217934166, + "epoch": 0.6578820595653037, "grad_norm": 0.0, - "learning_rate": 5.509544414139878e-06, - "loss": 0.808, + "learning_rate": 5.535900144698241e-06, + "loss": 0.7876, "step": 23216 }, { - "epoch": 0.6588251986379114, + "epoch": 0.6579103970075662, "grad_norm": 0.0, - "learning_rate": 5.508723228707625e-06, - "loss": 0.7288, + "learning_rate": 5.535078899468359e-06, + "loss": 0.7131, "step": 23217 }, { - "epoch": 0.6588535754824063, + "epoch": 0.6579387344498285, "grad_norm": 0.0, - "learning_rate": 5.507902081213385e-06, - "loss": 0.8045, + "learning_rate": 5.53425769184817e-06, + "loss": 0.8504, "step": 23218 }, { - "epoch": 0.6588819523269013, + "epoch": 0.657967071892091, "grad_norm": 0.0, - "learning_rate": 5.507080971664098e-06, - "loss": 0.8106, + "learning_rate": 5.533436521844582e-06, + "loss": 0.8527, "step": 23219 }, { - "epoch": 0.6589103291713961, + "epoch": 0.6579954093343535, "grad_norm": 0.0, - "learning_rate": 5.5062599000666995e-06, - "loss": 0.8308, + "learning_rate": 5.532615389464522e-06, + "loss": 0.8183, "step": 23220 }, { - "epoch": 0.658938706015891, + "epoch": 0.6580237467766159, "grad_norm": 0.0, - "learning_rate": 5.505438866428125e-06, - "loss": 0.8468, + "learning_rate": 5.531794294714896e-06, + "loss": 0.8232, "step": 23221 }, { - "epoch": 0.658967082860386, + "epoch": 0.6580520842188784, "grad_norm": 0.0, - "learning_rate": 5.504617870755313e-06, - "loss": 0.8062, + "learning_rate": 5.530973237602627e-06, + "loss": 0.9154, "step": 23222 }, { - "epoch": 0.6589954597048808, + "epoch": 0.6580804216611409, "grad_norm": 0.0, - "learning_rate": 5.503796913055197e-06, - "loss": 0.8196, + "learning_rate": 5.53015221813463e-06, + "loss": 0.7372, "step": 23223 }, { - "epoch": 0.6590238365493757, + "epoch": 0.6581087591034033, "grad_norm": 0.0, - "learning_rate": 5.5029759933347094e-06, - "loss": 0.8318, + "learning_rate": 5.5293312363178194e-06, + "loss": 0.8388, "step": 23224 }, { - "epoch": 0.6590522133938707, + "epoch": 0.6581370965456658, "grad_norm": 0.0, - "learning_rate": 5.5021551116007835e-06, - "loss": 0.8786, + "learning_rate": 5.528510292159117e-06, + "loss": 0.8791, "step": 23225 }, { - "epoch": 0.6590805902383655, + "epoch": 0.6581654339879283, "grad_norm": 0.0, - "learning_rate": 5.501334267860361e-06, - "loss": 0.8752, + "learning_rate": 5.527689385665429e-06, + "loss": 0.8137, "step": 23226 }, { - "epoch": 0.6591089670828604, + "epoch": 0.6581937714301908, "grad_norm": 0.0, - "learning_rate": 5.500513462120365e-06, - "loss": 0.7933, + "learning_rate": 5.526868516843673e-06, + "loss": 0.8867, "step": 23227 }, { - "epoch": 0.6591373439273552, + "epoch": 0.6582221088724531, "grad_norm": 0.0, - "learning_rate": 5.499692694387735e-06, - "loss": 0.8688, + "learning_rate": 5.526047685700771e-06, + "loss": 0.9226, "step": 23228 }, { - "epoch": 0.6591657207718502, + "epoch": 0.6582504463147156, "grad_norm": 0.0, - "learning_rate": 5.498871964669405e-06, - "loss": 0.7535, + "learning_rate": 5.525226892243623e-06, + "loss": 0.8207, "step": 23229 }, { - "epoch": 0.6591940976163451, + "epoch": 0.6582787837569781, "grad_norm": 0.0, - "learning_rate": 5.498051272972304e-06, - "loss": 0.8217, + "learning_rate": 5.524406136479153e-06, + "loss": 1.0178, "step": 23230 }, { - "epoch": 0.6592224744608399, + "epoch": 0.6583071211992405, "grad_norm": 0.0, - "learning_rate": 5.49723061930337e-06, - "loss": 0.9169, + "learning_rate": 5.5235854184142755e-06, + "loss": 0.8893, "step": 23231 }, { - "epoch": 0.6592508513053349, + "epoch": 0.658335458641503, "grad_norm": 0.0, - "learning_rate": 5.4964100036695274e-06, - "loss": 0.7943, + "learning_rate": 5.522764738055897e-06, + "loss": 0.83, "step": 23232 }, { - "epoch": 0.6592792281498298, + "epoch": 0.6583637960837655, "grad_norm": 0.0, - "learning_rate": 5.495589426077712e-06, - "loss": 0.9485, + "learning_rate": 5.521944095410936e-06, + "loss": 0.9121, "step": 23233 }, { - "epoch": 0.6593076049943246, + "epoch": 0.658392133526028, "grad_norm": 0.0, - "learning_rate": 5.494768886534858e-06, - "loss": 0.8702, + "learning_rate": 5.521123490486301e-06, + "loss": 0.8707, "step": 23234 }, { - "epoch": 0.6593359818388195, + "epoch": 0.6584204709682904, "grad_norm": 0.0, - "learning_rate": 5.493948385047887e-06, - "loss": 0.7466, + "learning_rate": 5.520302923288905e-06, + "loss": 0.8352, "step": 23235 }, { - "epoch": 0.6593643586833144, + "epoch": 0.6584488084105529, "grad_norm": 0.0, - "learning_rate": 5.49312792162374e-06, - "loss": 0.7292, + "learning_rate": 5.51948239382566e-06, + "loss": 0.7978, "step": 23236 }, { - "epoch": 0.6593927355278093, + "epoch": 0.6584771458528154, "grad_norm": 0.0, - "learning_rate": 5.492307496269349e-06, - "loss": 0.81, + "learning_rate": 5.518661902103479e-06, + "loss": 0.9237, "step": 23237 }, { - "epoch": 0.6594211123723042, + "epoch": 0.6585054832950777, "grad_norm": 0.0, - "learning_rate": 5.4914871089916334e-06, - "loss": 0.9491, + "learning_rate": 5.5178414481292745e-06, + "loss": 0.9124, "step": 23238 }, { - "epoch": 0.6594494892167991, + "epoch": 0.6585338207373402, "grad_norm": 0.0, - "learning_rate": 5.490666759797529e-06, - "loss": 0.8751, + "learning_rate": 5.5170210319099595e-06, + "loss": 0.8892, "step": 23239 }, { - "epoch": 0.659477866061294, + "epoch": 0.6585621581796027, "grad_norm": 0.0, - "learning_rate": 5.489846448693972e-06, - "loss": 0.8458, + "learning_rate": 5.5162006534524346e-06, + "loss": 0.8419, "step": 23240 }, { - "epoch": 0.6595062429057889, + "epoch": 0.6585904956218652, "grad_norm": 0.0, - "learning_rate": 5.489026175687878e-06, - "loss": 0.8426, + "learning_rate": 5.5153803127636225e-06, + "loss": 0.9222, "step": 23241 }, { - "epoch": 0.6595346197502838, + "epoch": 0.6586188330641276, "grad_norm": 0.0, - "learning_rate": 5.488205940786184e-06, - "loss": 0.8286, + "learning_rate": 5.514560009850423e-06, + "loss": 0.8024, "step": 23242 }, { - "epoch": 0.6595629965947787, + "epoch": 0.6586471705063901, "grad_norm": 0.0, - "learning_rate": 5.487385743995821e-06, - "loss": 0.7582, + "learning_rate": 5.513739744719751e-06, + "loss": 0.7844, "step": 23243 }, { - "epoch": 0.6595913734392735, + "epoch": 0.6586755079486526, "grad_norm": 0.0, - "learning_rate": 5.486565585323709e-06, - "loss": 0.8205, + "learning_rate": 5.5129195173785184e-06, + "loss": 0.7712, "step": 23244 }, { - "epoch": 0.6596197502837684, + "epoch": 0.658703845390915, "grad_norm": 0.0, - "learning_rate": 5.4857454647767805e-06, - "loss": 0.8405, + "learning_rate": 5.512099327833627e-06, + "loss": 0.8749, "step": 23245 }, { - "epoch": 0.6596481271282634, + "epoch": 0.6587321828331775, "grad_norm": 0.0, - "learning_rate": 5.484925382361967e-06, - "loss": 0.6484, + "learning_rate": 5.511279176091989e-06, + "loss": 0.8749, "step": 23246 }, { - "epoch": 0.6596765039727582, + "epoch": 0.65876052027544, "grad_norm": 0.0, - "learning_rate": 5.4841053380861875e-06, - "loss": 0.8131, + "learning_rate": 5.5104590621605135e-06, + "loss": 0.8734, "step": 23247 }, { - "epoch": 0.6597048808172531, + "epoch": 0.6587888577177023, "grad_norm": 0.0, - "learning_rate": 5.483285331956374e-06, - "loss": 0.859, + "learning_rate": 5.509638986046109e-06, + "loss": 0.9249, "step": 23248 }, { - "epoch": 0.6597332576617481, + "epoch": 0.6588171951599648, "grad_norm": 0.0, - "learning_rate": 5.482465363979457e-06, - "loss": 0.7975, + "learning_rate": 5.508818947755687e-06, + "loss": 0.8003, "step": 23249 }, { - "epoch": 0.6597616345062429, + "epoch": 0.6588455326022273, "grad_norm": 0.0, - "learning_rate": 5.481645434162353e-06, - "loss": 0.841, + "learning_rate": 5.507998947296147e-06, + "loss": 0.8706, "step": 23250 }, { - "epoch": 0.6597900113507378, + "epoch": 0.6588738700444898, "grad_norm": 0.0, - "learning_rate": 5.480825542511991e-06, - "loss": 0.7891, + "learning_rate": 5.507178984674399e-06, + "loss": 0.7957, "step": 23251 }, { - "epoch": 0.6598183881952326, + "epoch": 0.6589022074867522, "grad_norm": 0.0, - "learning_rate": 5.4800056890353025e-06, - "loss": 0.9199, + "learning_rate": 5.5063590598973545e-06, + "loss": 0.7733, "step": 23252 }, { - "epoch": 0.6598467650397276, + "epoch": 0.6589305449290147, "grad_norm": 0.0, - "learning_rate": 5.479185873739208e-06, - "loss": 0.7986, + "learning_rate": 5.505539172971912e-06, + "loss": 0.8243, "step": 23253 }, { - "epoch": 0.6598751418842225, + "epoch": 0.6589588823712772, "grad_norm": 0.0, - "learning_rate": 5.478366096630636e-06, - "loss": 0.8994, + "learning_rate": 5.504719323904988e-06, + "loss": 0.9214, "step": 23254 }, { - "epoch": 0.6599035187287173, + "epoch": 0.6589872198135396, "grad_norm": 0.0, - "learning_rate": 5.477546357716508e-06, - "loss": 0.9705, + "learning_rate": 5.503899512703477e-06, + "loss": 0.9048, "step": 23255 }, { - "epoch": 0.6599318955732123, + "epoch": 0.6590155572558021, "grad_norm": 0.0, - "learning_rate": 5.476726657003746e-06, - "loss": 0.8134, + "learning_rate": 5.5030797393742905e-06, + "loss": 0.7772, "step": 23256 }, { - "epoch": 0.6599602724177072, + "epoch": 0.6590438946980646, "grad_norm": 0.0, - "learning_rate": 5.475906994499283e-06, - "loss": 0.7606, + "learning_rate": 5.502260003924337e-06, + "loss": 0.8851, "step": 23257 }, { - "epoch": 0.659988649262202, + "epoch": 0.6590722321403271, "grad_norm": 0.0, - "learning_rate": 5.475087370210033e-06, - "loss": 0.8022, + "learning_rate": 5.501440306360512e-06, + "loss": 0.9037, "step": 23258 }, { - "epoch": 0.6600170261066969, + "epoch": 0.6591005695825894, "grad_norm": 0.0, - "learning_rate": 5.474267784142923e-06, - "loss": 0.8536, + "learning_rate": 5.50062064668973e-06, + "loss": 0.8618, "step": 23259 }, { - "epoch": 0.6600454029511919, + "epoch": 0.6591289070248519, "grad_norm": 0.0, - "learning_rate": 5.47344823630488e-06, - "loss": 0.813, + "learning_rate": 5.499801024918887e-06, + "loss": 0.7934, "step": 23260 }, { - "epoch": 0.6600737797956867, + "epoch": 0.6591572444671144, "grad_norm": 0.0, - "learning_rate": 5.47262872670282e-06, - "loss": 0.833, + "learning_rate": 5.498981441054894e-06, + "loss": 0.8999, "step": 23261 }, { - "epoch": 0.6601021566401816, + "epoch": 0.6591855819093768, "grad_norm": 0.0, - "learning_rate": 5.4718092553436684e-06, - "loss": 0.8784, + "learning_rate": 5.498161895104653e-06, + "loss": 0.8594, "step": 23262 }, { - "epoch": 0.6601305334846765, + "epoch": 0.6592139193516393, "grad_norm": 0.0, - "learning_rate": 5.4709898222343515e-06, - "loss": 0.9144, + "learning_rate": 5.4973423870750645e-06, + "loss": 0.7918, "step": 23263 }, { - "epoch": 0.6601589103291714, + "epoch": 0.6592422567939018, "grad_norm": 0.0, - "learning_rate": 5.4701704273817825e-06, - "loss": 0.8745, + "learning_rate": 5.49652291697303e-06, + "loss": 0.769, "step": 23264 }, { - "epoch": 0.6601872871736663, + "epoch": 0.6592705942361643, "grad_norm": 0.0, - "learning_rate": 5.469351070792888e-06, - "loss": 0.7761, + "learning_rate": 5.495703484805462e-06, + "loss": 0.8216, "step": 23265 }, { - "epoch": 0.6602156640181612, + "epoch": 0.6592989316784267, "grad_norm": 0.0, - "learning_rate": 5.468531752474591e-06, - "loss": 0.7957, + "learning_rate": 5.494884090579252e-06, + "loss": 0.8608, "step": 23266 }, { - "epoch": 0.6602440408626561, + "epoch": 0.6593272691206892, "grad_norm": 0.0, - "learning_rate": 5.467712472433807e-06, - "loss": 0.8888, + "learning_rate": 5.4940647343013055e-06, + "loss": 0.852, "step": 23267 }, { - "epoch": 0.660272417707151, + "epoch": 0.6593556065629517, "grad_norm": 0.0, - "learning_rate": 5.466893230677463e-06, - "loss": 0.8364, + "learning_rate": 5.49324541597853e-06, + "loss": 0.8529, "step": 23268 }, { - "epoch": 0.6603007945516458, + "epoch": 0.659383944005214, "grad_norm": 0.0, - "learning_rate": 5.466074027212475e-06, - "loss": 0.8611, + "learning_rate": 5.492426135617816e-06, + "loss": 0.7756, "step": 23269 }, { - "epoch": 0.6603291713961408, + "epoch": 0.6594122814474765, "grad_norm": 0.0, - "learning_rate": 5.465254862045761e-06, - "loss": 0.7588, + "learning_rate": 5.491606893226076e-06, + "loss": 0.8843, "step": 23270 }, { - "epoch": 0.6603575482406356, + "epoch": 0.659440618889739, "grad_norm": 0.0, - "learning_rate": 5.464435735184248e-06, - "loss": 0.7821, + "learning_rate": 5.490787688810202e-06, + "loss": 0.957, "step": 23271 }, { - "epoch": 0.6603859250851305, + "epoch": 0.6594689563320014, "grad_norm": 0.0, - "learning_rate": 5.463616646634846e-06, - "loss": 0.8861, + "learning_rate": 5.489968522377096e-06, + "loss": 0.8247, "step": 23272 }, { - "epoch": 0.6604143019296255, + "epoch": 0.6594972937742639, "grad_norm": 0.0, - "learning_rate": 5.462797596404477e-06, - "loss": 0.8581, + "learning_rate": 5.48914939393366e-06, + "loss": 0.749, "step": 23273 }, { - "epoch": 0.6604426787741203, + "epoch": 0.6595256312165264, "grad_norm": 0.0, - "learning_rate": 5.461978584500066e-06, - "loss": 0.7791, + "learning_rate": 5.488330303486795e-06, + "loss": 0.8391, "step": 23274 }, { - "epoch": 0.6604710556186152, + "epoch": 0.6595539686587889, "grad_norm": 0.0, - "learning_rate": 5.461159610928521e-06, - "loss": 0.8391, + "learning_rate": 5.487511251043398e-06, + "loss": 0.8844, "step": 23275 }, { - "epoch": 0.66049943246311, + "epoch": 0.6595823061010513, "grad_norm": 0.0, - "learning_rate": 5.460340675696766e-06, - "loss": 0.9114, + "learning_rate": 5.486692236610373e-06, + "loss": 0.7613, "step": 23276 }, { - "epoch": 0.660527809307605, + "epoch": 0.6596106435433138, "grad_norm": 0.0, - "learning_rate": 5.45952177881172e-06, - "loss": 0.7905, + "learning_rate": 5.485873260194614e-06, + "loss": 0.7878, "step": 23277 }, { - "epoch": 0.6605561861520999, + "epoch": 0.6596389809855763, "grad_norm": 0.0, - "learning_rate": 5.4587029202802946e-06, - "loss": 0.8424, + "learning_rate": 5.485054321803023e-06, + "loss": 0.7061, "step": 23278 }, { - "epoch": 0.6605845629965947, + "epoch": 0.6596673184278387, "grad_norm": 0.0, - "learning_rate": 5.45788410010941e-06, - "loss": 0.8396, + "learning_rate": 5.484235421442492e-06, + "loss": 0.7536, "step": 23279 }, { - "epoch": 0.6606129398410897, + "epoch": 0.6596956558701011, "grad_norm": 0.0, - "learning_rate": 5.457065318305985e-06, - "loss": 0.8344, + "learning_rate": 5.483416559119923e-06, + "loss": 0.7133, "step": 23280 }, { - "epoch": 0.6606413166855846, + "epoch": 0.6597239933123636, "grad_norm": 0.0, - "learning_rate": 5.4562465748769266e-06, - "loss": 0.7803, + "learning_rate": 5.482597734842217e-06, + "loss": 0.9392, "step": 23281 }, { - "epoch": 0.6606696935300794, + "epoch": 0.6597523307546261, "grad_norm": 0.0, - "learning_rate": 5.455427869829166e-06, - "loss": 0.8279, + "learning_rate": 5.481778948616264e-06, + "loss": 0.876, "step": 23282 }, { - "epoch": 0.6606980703745744, + "epoch": 0.6597806681968885, "grad_norm": 0.0, - "learning_rate": 5.454609203169606e-06, - "loss": 0.8506, + "learning_rate": 5.480960200448969e-06, + "loss": 0.8557, "step": 23283 }, { - "epoch": 0.6607264472190693, + "epoch": 0.659809005639151, "grad_norm": 0.0, - "learning_rate": 5.453790574905167e-06, - "loss": 0.8819, + "learning_rate": 5.480141490347221e-06, + "loss": 0.7025, "step": 23284 }, { - "epoch": 0.6607548240635641, + "epoch": 0.6598373430814135, "grad_norm": 0.0, - "learning_rate": 5.452971985042767e-06, - "loss": 0.8774, + "learning_rate": 5.479322818317918e-06, + "loss": 0.849, "step": 23285 }, { - "epoch": 0.660783200908059, + "epoch": 0.6598656805236759, "grad_norm": 0.0, - "learning_rate": 5.452153433589313e-06, - "loss": 0.8792, + "learning_rate": 5.478504184367959e-06, + "loss": 0.8554, "step": 23286 }, { - "epoch": 0.660811577752554, + "epoch": 0.6598940179659384, "grad_norm": 0.0, - "learning_rate": 5.451334920551723e-06, - "loss": 0.8016, + "learning_rate": 5.477685588504238e-06, + "loss": 0.7514, "step": 23287 }, { - "epoch": 0.6608399545970488, + "epoch": 0.6599223554082009, "grad_norm": 0.0, - "learning_rate": 5.450516445936915e-06, - "loss": 0.9156, + "learning_rate": 5.47686703073365e-06, + "loss": 0.7408, "step": 23288 }, { - "epoch": 0.6608683314415437, + "epoch": 0.6599506928504634, "grad_norm": 0.0, - "learning_rate": 5.449698009751796e-06, - "loss": 0.8651, + "learning_rate": 5.4760485110630956e-06, + "loss": 0.9654, "step": 23289 }, { - "epoch": 0.6608967082860386, + "epoch": 0.6599790302927258, "grad_norm": 0.0, - "learning_rate": 5.448879612003283e-06, - "loss": 0.8666, + "learning_rate": 5.47523002949946e-06, + "loss": 0.8084, "step": 23290 }, { - "epoch": 0.6609250851305335, + "epoch": 0.6600073677349882, "grad_norm": 0.0, - "learning_rate": 5.4480612526982915e-06, - "loss": 0.7929, + "learning_rate": 5.474411586049646e-06, + "loss": 0.8576, "step": 23291 }, { - "epoch": 0.6609534619750284, + "epoch": 0.6600357051772507, "grad_norm": 0.0, - "learning_rate": 5.447242931843728e-06, - "loss": 0.806, + "learning_rate": 5.47359318072054e-06, + "loss": 0.9352, "step": 23292 }, { - "epoch": 0.6609818388195232, + "epoch": 0.6600640426195131, "grad_norm": 0.0, - "learning_rate": 5.446424649446507e-06, - "loss": 0.9184, + "learning_rate": 5.472774813519039e-06, + "loss": 0.8966, "step": 23293 }, { - "epoch": 0.6610102156640182, + "epoch": 0.6600923800617756, "grad_norm": 0.0, - "learning_rate": 5.445606405513546e-06, - "loss": 0.7726, + "learning_rate": 5.4719564844520416e-06, + "loss": 0.837, "step": 23294 }, { - "epoch": 0.661038592508513, + "epoch": 0.6601207175040381, "grad_norm": 0.0, - "learning_rate": 5.444788200051749e-06, - "loss": 0.8369, + "learning_rate": 5.471138193526431e-06, + "loss": 0.8963, "step": 23295 }, { - "epoch": 0.6610669693530079, + "epoch": 0.6601490549463005, "grad_norm": 0.0, - "learning_rate": 5.443970033068031e-06, - "loss": 0.8171, + "learning_rate": 5.470319940749107e-06, + "loss": 0.8255, "step": 23296 }, { - "epoch": 0.6610953461975029, + "epoch": 0.660177392388563, "grad_norm": 0.0, - "learning_rate": 5.443151904569302e-06, - "loss": 0.8098, + "learning_rate": 5.469501726126962e-06, + "loss": 0.8583, "step": 23297 }, { - "epoch": 0.6611237230419977, + "epoch": 0.6602057298308255, "grad_norm": 0.0, - "learning_rate": 5.442333814562475e-06, - "loss": 0.8132, + "learning_rate": 5.4686835496668845e-06, + "loss": 0.807, "step": 23298 }, { - "epoch": 0.6611520998864926, + "epoch": 0.660234067273088, "grad_norm": 0.0, - "learning_rate": 5.441515763054462e-06, - "loss": 0.7907, + "learning_rate": 5.467865411375766e-06, + "loss": 0.7254, "step": 23299 }, { - "epoch": 0.6611804767309876, + "epoch": 0.6602624047153504, "grad_norm": 0.0, - "learning_rate": 5.440697750052166e-06, - "loss": 0.8757, + "learning_rate": 5.467047311260501e-06, + "loss": 0.7419, "step": 23300 }, { - "epoch": 0.6612088535754824, + "epoch": 0.6602907421576129, "grad_norm": 0.0, - "learning_rate": 5.439879775562502e-06, - "loss": 0.8507, + "learning_rate": 5.466229249327981e-06, + "loss": 0.7665, "step": 23301 }, { - "epoch": 0.6612372304199773, + "epoch": 0.6603190795998753, "grad_norm": 0.0, - "learning_rate": 5.439061839592382e-06, - "loss": 0.8794, + "learning_rate": 5.465411225585098e-06, + "loss": 0.8132, "step": 23302 }, { - "epoch": 0.6612656072644721, + "epoch": 0.6603474170421377, "grad_norm": 0.0, - "learning_rate": 5.438243942148709e-06, - "loss": 0.8473, + "learning_rate": 5.464593240038736e-06, + "loss": 0.7108, "step": 23303 }, { - "epoch": 0.6612939841089671, + "epoch": 0.6603757544844002, "grad_norm": 0.0, - "learning_rate": 5.437426083238393e-06, - "loss": 1.0122, + "learning_rate": 5.46377529269579e-06, + "loss": 0.7985, "step": 23304 }, { - "epoch": 0.661322360953462, + "epoch": 0.6604040919266627, "grad_norm": 0.0, - "learning_rate": 5.436608262868348e-06, - "loss": 0.8208, + "learning_rate": 5.462957383563154e-06, + "loss": 0.833, "step": 23305 }, { - "epoch": 0.6613507377979568, + "epoch": 0.6604324293689252, "grad_norm": 0.0, - "learning_rate": 5.435790481045474e-06, - "loss": 0.8976, + "learning_rate": 5.462139512647708e-06, + "loss": 0.8859, "step": 23306 }, { - "epoch": 0.6613791146424518, + "epoch": 0.6604607668111876, "grad_norm": 0.0, - "learning_rate": 5.4349727377766845e-06, - "loss": 0.8163, + "learning_rate": 5.461321679956349e-06, + "loss": 0.7009, "step": 23307 }, { - "epoch": 0.6614074914869467, + "epoch": 0.6604891042534501, "grad_norm": 0.0, - "learning_rate": 5.434155033068889e-06, - "loss": 0.8721, + "learning_rate": 5.46050388549596e-06, + "loss": 0.965, "step": 23308 }, { - "epoch": 0.6614358683314415, + "epoch": 0.6605174416957126, "grad_norm": 0.0, - "learning_rate": 5.4333373669289855e-06, - "loss": 0.8148, + "learning_rate": 5.459686129273433e-06, + "loss": 0.763, "step": 23309 }, { - "epoch": 0.6614642451759364, + "epoch": 0.660545779137975, "grad_norm": 0.0, - "learning_rate": 5.4325197393638885e-06, - "loss": 0.9265, + "learning_rate": 5.458868411295659e-06, + "loss": 0.8099, "step": 23310 }, { - "epoch": 0.6614926220204314, + "epoch": 0.6605741165802375, "grad_norm": 0.0, - "learning_rate": 5.4317021503805045e-06, - "loss": 0.7874, + "learning_rate": 5.458050731569517e-06, + "loss": 0.7804, "step": 23311 }, { - "epoch": 0.6615209988649262, + "epoch": 0.6606024540225, "grad_norm": 0.0, - "learning_rate": 5.430884599985732e-06, - "loss": 0.869, + "learning_rate": 5.4572330901019036e-06, + "loss": 0.7895, "step": 23312 }, { - "epoch": 0.6615493757094211, + "epoch": 0.6606307914647624, "grad_norm": 0.0, - "learning_rate": 5.4300670881864895e-06, - "loss": 0.8651, + "learning_rate": 5.456415486899701e-06, + "loss": 0.7589, "step": 23313 }, { - "epoch": 0.661577752553916, + "epoch": 0.6606591289070248, "grad_norm": 0.0, - "learning_rate": 5.429249614989671e-06, - "loss": 0.7969, + "learning_rate": 5.455597921969797e-06, + "loss": 0.811, "step": 23314 }, { - "epoch": 0.6616061293984109, + "epoch": 0.6606874663492873, "grad_norm": 0.0, - "learning_rate": 5.428432180402188e-06, - "loss": 0.7478, + "learning_rate": 5.454780395319086e-06, + "loss": 0.7819, "step": 23315 }, { - "epoch": 0.6616345062429058, + "epoch": 0.6607158037915498, "grad_norm": 0.0, - "learning_rate": 5.427614784430948e-06, - "loss": 0.981, + "learning_rate": 5.453962906954442e-06, + "loss": 0.8593, "step": 23316 }, { - "epoch": 0.6616628830874007, + "epoch": 0.6607441412338122, "grad_norm": 0.0, - "learning_rate": 5.426797427082846e-06, - "loss": 0.8726, + "learning_rate": 5.453145456882756e-06, + "loss": 0.8064, "step": 23317 }, { - "epoch": 0.6616912599318956, + "epoch": 0.6607724786760747, "grad_norm": 0.0, - "learning_rate": 5.425980108364793e-06, - "loss": 0.7819, + "learning_rate": 5.452328045110918e-06, + "loss": 0.8719, "step": 23318 }, { - "epoch": 0.6617196367763905, + "epoch": 0.6608008161183372, "grad_norm": 0.0, - "learning_rate": 5.4251628282836955e-06, - "loss": 0.8975, + "learning_rate": 5.451510671645806e-06, + "loss": 0.9217, "step": 23319 }, { - "epoch": 0.6617480136208853, + "epoch": 0.6608291535605996, "grad_norm": 0.0, - "learning_rate": 5.424345586846449e-06, - "loss": 0.8643, + "learning_rate": 5.450693336494314e-06, + "loss": 0.7717, "step": 23320 }, { - "epoch": 0.6617763904653803, + "epoch": 0.6608574910028621, "grad_norm": 0.0, - "learning_rate": 5.423528384059961e-06, - "loss": 0.8189, + "learning_rate": 5.449876039663316e-06, + "loss": 0.7778, "step": 23321 }, { - "epoch": 0.6618047673098751, + "epoch": 0.6608858284451246, "grad_norm": 0.0, - "learning_rate": 5.422711219931138e-06, - "loss": 0.8166, + "learning_rate": 5.449058781159702e-06, + "loss": 0.8344, "step": 23322 }, { - "epoch": 0.66183314415437, + "epoch": 0.660914165887387, "grad_norm": 0.0, - "learning_rate": 5.421894094466875e-06, - "loss": 0.8149, + "learning_rate": 5.448241560990361e-06, + "loss": 0.8748, "step": 23323 }, { - "epoch": 0.661861520998865, + "epoch": 0.6609425033296494, "grad_norm": 0.0, - "learning_rate": 5.42107700767408e-06, - "loss": 0.8252, + "learning_rate": 5.447424379162165e-06, + "loss": 0.7722, "step": 23324 }, { - "epoch": 0.6618898978433598, + "epoch": 0.6609708407719119, "grad_norm": 0.0, - "learning_rate": 5.420259959559655e-06, - "loss": 0.8286, + "learning_rate": 5.446607235682007e-06, + "loss": 0.8074, "step": 23325 }, { - "epoch": 0.6619182746878547, + "epoch": 0.6609991782141744, "grad_norm": 0.0, - "learning_rate": 5.419442950130498e-06, - "loss": 0.7693, + "learning_rate": 5.445790130556765e-06, + "loss": 0.929, "step": 23326 }, { - "epoch": 0.6619466515323496, + "epoch": 0.6610275156564368, "grad_norm": 0.0, - "learning_rate": 5.418625979393508e-06, - "loss": 0.8511, + "learning_rate": 5.444973063793324e-06, + "loss": 0.6883, "step": 23327 }, { - "epoch": 0.6619750283768445, + "epoch": 0.6610558530986993, "grad_norm": 0.0, - "learning_rate": 5.4178090473555975e-06, - "loss": 0.8159, + "learning_rate": 5.4441560353985715e-06, + "loss": 0.8594, "step": 23328 }, { - "epoch": 0.6620034052213394, + "epoch": 0.6610841905409618, "grad_norm": 0.0, - "learning_rate": 5.416992154023656e-06, - "loss": 0.8376, + "learning_rate": 5.44333904537938e-06, + "loss": 0.7588, "step": 23329 }, { - "epoch": 0.6620317820658342, + "epoch": 0.6611125279832243, "grad_norm": 0.0, - "learning_rate": 5.416175299404588e-06, - "loss": 0.847, + "learning_rate": 5.442522093742636e-06, + "loss": 0.8243, "step": 23330 }, { - "epoch": 0.6620601589103292, + "epoch": 0.6611408654254867, "grad_norm": 0.0, - "learning_rate": 5.415358483505298e-06, - "loss": 0.9901, + "learning_rate": 5.441705180495225e-06, + "loss": 0.8782, "step": 23331 }, { - "epoch": 0.6620885357548241, + "epoch": 0.6611692028677492, "grad_norm": 0.0, - "learning_rate": 5.4145417063326755e-06, - "loss": 0.7943, + "learning_rate": 5.440888305644019e-06, + "loss": 0.8487, "step": 23332 }, { - "epoch": 0.6621169125993189, + "epoch": 0.6611975403100117, "grad_norm": 0.0, - "learning_rate": 5.4137249678936265e-06, - "loss": 0.8687, + "learning_rate": 5.440071469195905e-06, + "loss": 0.6465, "step": 23333 }, { - "epoch": 0.6621452894438139, + "epoch": 0.661225877752274, "grad_norm": 0.0, - "learning_rate": 5.412908268195052e-06, - "loss": 0.8445, + "learning_rate": 5.439254671157764e-06, + "loss": 0.8994, "step": 23334 }, { - "epoch": 0.6621736662883088, + "epoch": 0.6612542151945365, "grad_norm": 0.0, - "learning_rate": 5.412091607243845e-06, - "loss": 0.8261, + "learning_rate": 5.438437911536473e-06, + "loss": 0.8382, "step": 23335 }, { - "epoch": 0.6622020431328036, + "epoch": 0.661282552636799, "grad_norm": 0.0, - "learning_rate": 5.411274985046906e-06, - "loss": 0.8385, + "learning_rate": 5.437621190338915e-06, + "loss": 0.8903, "step": 23336 }, { - "epoch": 0.6622304199772985, + "epoch": 0.6613108900790614, "grad_norm": 0.0, - "learning_rate": 5.410458401611136e-06, - "loss": 0.8512, + "learning_rate": 5.436804507571966e-06, + "loss": 0.7559, "step": 23337 }, { - "epoch": 0.6622587968217934, + "epoch": 0.6613392275213239, "grad_norm": 0.0, - "learning_rate": 5.409641856943429e-06, - "loss": 0.9017, + "learning_rate": 5.435987863242507e-06, + "loss": 0.7562, "step": 23338 }, { - "epoch": 0.6622871736662883, + "epoch": 0.6613675649635864, "grad_norm": 0.0, - "learning_rate": 5.408825351050684e-06, - "loss": 0.8563, + "learning_rate": 5.435171257357417e-06, + "loss": 0.6937, "step": 23339 }, { - "epoch": 0.6623155505107832, + "epoch": 0.6613959024058489, "grad_norm": 0.0, - "learning_rate": 5.408008883939796e-06, - "loss": 0.845, + "learning_rate": 5.434354689923574e-06, + "loss": 0.7945, "step": 23340 }, { - "epoch": 0.6623439273552781, + "epoch": 0.6614242398481113, "grad_norm": 0.0, - "learning_rate": 5.407192455617662e-06, - "loss": 0.778, + "learning_rate": 5.43353816094786e-06, + "loss": 0.8156, "step": 23341 }, { - "epoch": 0.662372304199773, + "epoch": 0.6614525772903738, "grad_norm": 0.0, - "learning_rate": 5.4063760660911855e-06, - "loss": 0.8496, + "learning_rate": 5.432721670437147e-06, + "loss": 0.6832, "step": 23342 }, { - "epoch": 0.6624006810442679, + "epoch": 0.6614809147326363, "grad_norm": 0.0, - "learning_rate": 5.405559715367248e-06, - "loss": 0.8284, + "learning_rate": 5.431905218398314e-06, + "loss": 0.8976, "step": 23343 }, { - "epoch": 0.6624290578887627, + "epoch": 0.6615092521748986, "grad_norm": 0.0, - "learning_rate": 5.404743403452758e-06, - "loss": 0.8203, + "learning_rate": 5.431088804838246e-06, + "loss": 0.9025, "step": 23344 }, { - "epoch": 0.6624574347332577, + "epoch": 0.6615375896171611, "grad_norm": 0.0, - "learning_rate": 5.403927130354611e-06, - "loss": 0.8189, + "learning_rate": 5.430272429763808e-06, + "loss": 0.8161, "step": 23345 }, { - "epoch": 0.6624858115777525, + "epoch": 0.6615659270594236, "grad_norm": 0.0, - "learning_rate": 5.403110896079695e-06, - "loss": 0.8959, + "learning_rate": 5.429456093181883e-06, + "loss": 0.8357, "step": 23346 }, { - "epoch": 0.6625141884222474, + "epoch": 0.6615942645016861, "grad_norm": 0.0, - "learning_rate": 5.402294700634907e-06, - "loss": 0.8347, + "learning_rate": 5.42863979509935e-06, + "loss": 0.8423, "step": 23347 }, { - "epoch": 0.6625425652667424, + "epoch": 0.6616226019439485, "grad_norm": 0.0, - "learning_rate": 5.401478544027146e-06, - "loss": 0.8248, + "learning_rate": 5.427823535523078e-06, + "loss": 0.7613, "step": 23348 }, { - "epoch": 0.6625709421112372, + "epoch": 0.661650939386211, "grad_norm": 0.0, - "learning_rate": 5.400662426263299e-06, - "loss": 0.9256, + "learning_rate": 5.427007314459949e-06, + "loss": 0.7953, "step": 23349 }, { - "epoch": 0.6625993189557321, + "epoch": 0.6616792768284735, "grad_norm": 0.0, - "learning_rate": 5.399846347350263e-06, - "loss": 0.8333, + "learning_rate": 5.4261911319168335e-06, + "loss": 0.8389, "step": 23350 }, { - "epoch": 0.6626276958002271, + "epoch": 0.6617076142707359, "grad_norm": 0.0, - "learning_rate": 5.399030307294937e-06, - "loss": 0.8567, + "learning_rate": 5.425374987900607e-06, + "loss": 0.8153, "step": 23351 }, { - "epoch": 0.6626560726447219, + "epoch": 0.6617359517129984, "grad_norm": 0.0, - "learning_rate": 5.398214306104204e-06, - "loss": 0.858, + "learning_rate": 5.424558882418146e-06, + "loss": 0.9834, "step": 23352 }, { - "epoch": 0.6626844494892168, + "epoch": 0.6617642891552609, "grad_norm": 0.0, - "learning_rate": 5.3973983437849605e-06, - "loss": 0.8388, + "learning_rate": 5.423742815476325e-06, + "loss": 0.8808, "step": 23353 }, { - "epoch": 0.6627128263337116, + "epoch": 0.6617926265975234, "grad_norm": 0.0, - "learning_rate": 5.396582420344105e-06, - "loss": 0.7433, + "learning_rate": 5.422926787082017e-06, + "loss": 0.8777, "step": 23354 }, { - "epoch": 0.6627412031782066, + "epoch": 0.6618209640397857, "grad_norm": 0.0, - "learning_rate": 5.39576653578852e-06, - "loss": 0.7738, + "learning_rate": 5.422110797242102e-06, + "loss": 0.8361, "step": 23355 }, { - "epoch": 0.6627695800227015, + "epoch": 0.6618493014820482, "grad_norm": 0.0, - "learning_rate": 5.394950690125104e-06, - "loss": 0.8045, + "learning_rate": 5.4212948459634414e-06, + "loss": 0.8971, "step": 23356 }, { - "epoch": 0.6627979568671963, + "epoch": 0.6618776389243107, "grad_norm": 0.0, - "learning_rate": 5.394134883360749e-06, - "loss": 0.8984, + "learning_rate": 5.4204789332529195e-06, + "loss": 0.7741, "step": 23357 }, { - "epoch": 0.6628263337116913, + "epoch": 0.6619059763665731, "grad_norm": 0.0, - "learning_rate": 5.393319115502336e-06, - "loss": 0.9272, + "learning_rate": 5.419663059117401e-06, + "loss": 0.7863, "step": 23358 }, { - "epoch": 0.6628547105561862, + "epoch": 0.6619343138088356, "grad_norm": 0.0, - "learning_rate": 5.392503386556771e-06, - "loss": 0.8769, + "learning_rate": 5.418847223563761e-06, + "loss": 0.7833, "step": 23359 }, { - "epoch": 0.662883087400681, + "epoch": 0.6619626512510981, "grad_norm": 0.0, - "learning_rate": 5.391687696530934e-06, - "loss": 0.8428, + "learning_rate": 5.418031426598875e-06, + "loss": 0.8676, "step": 23360 }, { - "epoch": 0.6629114642451759, + "epoch": 0.6619909886933605, "grad_norm": 0.0, - "learning_rate": 5.390872045431718e-06, - "loss": 0.8962, + "learning_rate": 5.417215668229609e-06, + "loss": 0.8774, "step": 23361 }, { - "epoch": 0.6629398410896709, + "epoch": 0.662019326135623, "grad_norm": 0.0, - "learning_rate": 5.390056433266017e-06, - "loss": 0.9384, + "learning_rate": 5.416399948462836e-06, + "loss": 0.8564, "step": 23362 }, { - "epoch": 0.6629682179341657, + "epoch": 0.6620476635778855, "grad_norm": 0.0, - "learning_rate": 5.389240860040712e-06, - "loss": 0.8892, + "learning_rate": 5.415584267305433e-06, + "loss": 0.765, "step": 23363 }, { - "epoch": 0.6629965947786606, + "epoch": 0.662076001020148, "grad_norm": 0.0, - "learning_rate": 5.388425325762696e-06, - "loss": 0.8568, + "learning_rate": 5.414768624764262e-06, + "loss": 0.9439, "step": 23364 }, { - "epoch": 0.6630249716231555, + "epoch": 0.6621043384624103, "grad_norm": 0.0, - "learning_rate": 5.387609830438865e-06, - "loss": 0.9018, + "learning_rate": 5.413953020846197e-06, + "loss": 0.8418, "step": 23365 }, { - "epoch": 0.6630533484676504, + "epoch": 0.6621326759046728, "grad_norm": 0.0, - "learning_rate": 5.386794374076096e-06, - "loss": 0.9013, + "learning_rate": 5.413137455558111e-06, + "loss": 0.8265, "step": 23366 }, { - "epoch": 0.6630817253121453, + "epoch": 0.6621610133469353, "grad_norm": 0.0, - "learning_rate": 5.385978956681281e-06, - "loss": 0.8021, + "learning_rate": 5.41232192890687e-06, + "loss": 0.8532, "step": 23367 }, { - "epoch": 0.6631101021566402, + "epoch": 0.6621893507891977, "grad_norm": 0.0, - "learning_rate": 5.385163578261314e-06, - "loss": 0.8218, + "learning_rate": 5.411506440899348e-06, + "loss": 0.8403, "step": 23368 }, { - "epoch": 0.6631384790011351, + "epoch": 0.6622176882314602, "grad_norm": 0.0, - "learning_rate": 5.384348238823074e-06, - "loss": 0.8263, + "learning_rate": 5.4106909915424075e-06, + "loss": 0.6882, "step": 23369 }, { - "epoch": 0.66316685584563, + "epoch": 0.6622460256737227, "grad_norm": 0.0, - "learning_rate": 5.383532938373453e-06, - "loss": 1.0063, + "learning_rate": 5.409875580842925e-06, + "loss": 0.8047, "step": 23370 }, { - "epoch": 0.6631952326901248, + "epoch": 0.6622743631159852, "grad_norm": 0.0, - "learning_rate": 5.382717676919338e-06, - "loss": 0.8708, + "learning_rate": 5.409060208807761e-06, + "loss": 0.6572, "step": 23371 }, { - "epoch": 0.6632236095346198, + "epoch": 0.6623027005582476, "grad_norm": 0.0, - "learning_rate": 5.381902454467612e-06, - "loss": 0.8569, + "learning_rate": 5.408244875443789e-06, + "loss": 0.8237, "step": 23372 }, { - "epoch": 0.6632519863791146, + "epoch": 0.6623310380005101, "grad_norm": 0.0, - "learning_rate": 5.381087271025164e-06, - "loss": 0.8683, + "learning_rate": 5.407429580757879e-06, + "loss": 0.8918, "step": 23373 }, { - "epoch": 0.6632803632236095, + "epoch": 0.6623593754427726, "grad_norm": 0.0, - "learning_rate": 5.380272126598879e-06, - "loss": 0.7867, + "learning_rate": 5.40661432475689e-06, + "loss": 0.9016, "step": 23374 }, { - "epoch": 0.6633087400681045, + "epoch": 0.662387712885035, "grad_norm": 0.0, - "learning_rate": 5.379457021195642e-06, - "loss": 0.8553, + "learning_rate": 5.405799107447695e-06, + "loss": 0.8013, "step": 23375 }, { - "epoch": 0.6633371169125993, + "epoch": 0.6624160503272974, "grad_norm": 0.0, - "learning_rate": 5.378641954822345e-06, - "loss": 0.8885, + "learning_rate": 5.404983928837164e-06, + "loss": 0.9261, "step": 23376 }, { - "epoch": 0.6633654937570942, + "epoch": 0.6624443877695599, "grad_norm": 0.0, - "learning_rate": 5.377826927485863e-06, - "loss": 0.9333, + "learning_rate": 5.404168788932156e-06, + "loss": 0.9099, "step": 23377 }, { - "epoch": 0.663393870601589, + "epoch": 0.6624727252118224, "grad_norm": 0.0, - "learning_rate": 5.377011939193084e-06, - "loss": 0.9035, + "learning_rate": 5.403353687739541e-06, + "loss": 0.8714, "step": 23378 }, { - "epoch": 0.663422247446084, + "epoch": 0.6625010626540848, "grad_norm": 0.0, - "learning_rate": 5.376196989950897e-06, - "loss": 0.842, + "learning_rate": 5.402538625266184e-06, + "loss": 0.8234, "step": 23379 }, { - "epoch": 0.6634506242905789, + "epoch": 0.6625294000963473, "grad_norm": 0.0, - "learning_rate": 5.375382079766177e-06, - "loss": 0.9102, + "learning_rate": 5.4017236015189515e-06, + "loss": 0.8724, "step": 23380 }, { - "epoch": 0.6634790011350737, + "epoch": 0.6625577375386098, "grad_norm": 0.0, - "learning_rate": 5.374567208645813e-06, - "loss": 0.785, + "learning_rate": 5.400908616504713e-06, + "loss": 0.8755, "step": 23381 }, { - "epoch": 0.6635073779795687, + "epoch": 0.6625860749808722, "grad_norm": 0.0, - "learning_rate": 5.3737523765966925e-06, - "loss": 0.8593, + "learning_rate": 5.400093670230324e-06, + "loss": 0.7884, "step": 23382 }, { - "epoch": 0.6635357548240636, + "epoch": 0.6626144124231347, "grad_norm": 0.0, - "learning_rate": 5.37293758362569e-06, - "loss": 0.7912, + "learning_rate": 5.399278762702655e-06, + "loss": 0.857, "step": 23383 }, { - "epoch": 0.6635641316685584, + "epoch": 0.6626427498653972, "grad_norm": 0.0, - "learning_rate": 5.37212282973969e-06, - "loss": 0.8473, + "learning_rate": 5.398463893928574e-06, + "loss": 0.7815, "step": 23384 }, { - "epoch": 0.6635925085130534, + "epoch": 0.6626710873076596, "grad_norm": 0.0, - "learning_rate": 5.371308114945581e-06, - "loss": 0.6895, + "learning_rate": 5.397649063914936e-06, + "loss": 0.8816, "step": 23385 }, { - "epoch": 0.6636208853575483, + "epoch": 0.662699424749922, "grad_norm": 0.0, - "learning_rate": 5.370493439250237e-06, - "loss": 0.8733, + "learning_rate": 5.396834272668614e-06, + "loss": 0.8144, "step": 23386 }, { - "epoch": 0.6636492622020431, + "epoch": 0.6627277621921845, "grad_norm": 0.0, - "learning_rate": 5.369678802660544e-06, - "loss": 0.8501, + "learning_rate": 5.39601952019646e-06, + "loss": 0.8257, "step": 23387 }, { - "epoch": 0.663677639046538, + "epoch": 0.662756099634447, "grad_norm": 0.0, - "learning_rate": 5.368864205183385e-06, - "loss": 0.8757, + "learning_rate": 5.395204806505345e-06, + "loss": 0.8833, "step": 23388 }, { - "epoch": 0.663706015891033, + "epoch": 0.6627844370767094, "grad_norm": 0.0, - "learning_rate": 5.36804964682563e-06, - "loss": 0.8329, + "learning_rate": 5.394390131602133e-06, + "loss": 0.7748, "step": 23389 }, { - "epoch": 0.6637343927355278, + "epoch": 0.6628127745189719, "grad_norm": 0.0, - "learning_rate": 5.367235127594177e-06, - "loss": 0.7396, + "learning_rate": 5.393575495493679e-06, + "loss": 0.8779, "step": 23390 }, { - "epoch": 0.6637627695800227, + "epoch": 0.6628411119612344, "grad_norm": 0.0, - "learning_rate": 5.366420647495892e-06, - "loss": 0.833, + "learning_rate": 5.392760898186851e-06, + "loss": 0.8441, "step": 23391 }, { - "epoch": 0.6637911464245176, + "epoch": 0.6628694494034968, "grad_norm": 0.0, - "learning_rate": 5.3656062065376616e-06, - "loss": 0.9665, + "learning_rate": 5.391946339688506e-06, + "loss": 0.8673, "step": 23392 }, { - "epoch": 0.6638195232690125, + "epoch": 0.6628977868457593, "grad_norm": 0.0, - "learning_rate": 5.364791804726366e-06, - "loss": 0.9064, + "learning_rate": 5.3911318200055105e-06, + "loss": 0.8019, "step": 23393 }, { - "epoch": 0.6638479001135074, + "epoch": 0.6629261242880218, "grad_norm": 0.0, - "learning_rate": 5.36397744206888e-06, - "loss": 0.773, + "learning_rate": 5.390317339144726e-06, + "loss": 0.8647, "step": 23394 }, { - "epoch": 0.6638762769580022, + "epoch": 0.6629544617302843, "grad_norm": 0.0, - "learning_rate": 5.3631631185720835e-06, - "loss": 0.9017, + "learning_rate": 5.389502897113006e-06, + "loss": 0.7612, "step": 23395 }, { - "epoch": 0.6639046538024972, + "epoch": 0.6629827991725467, "grad_norm": 0.0, - "learning_rate": 5.362348834242861e-06, - "loss": 0.7392, + "learning_rate": 5.388688493917216e-06, + "loss": 0.922, "step": 23396 }, { - "epoch": 0.663933030646992, + "epoch": 0.6630111366148091, "grad_norm": 0.0, - "learning_rate": 5.361534589088083e-06, - "loss": 0.8293, + "learning_rate": 5.387874129564219e-06, + "loss": 0.808, "step": 23397 }, { - "epoch": 0.6639614074914869, + "epoch": 0.6630394740570716, "grad_norm": 0.0, - "learning_rate": 5.360720383114631e-06, - "loss": 0.888, + "learning_rate": 5.387059804060866e-06, + "loss": 0.8276, "step": 23398 }, { - "epoch": 0.6639897843359819, + "epoch": 0.663067811499334, "grad_norm": 0.0, - "learning_rate": 5.3599062163293855e-06, - "loss": 0.8945, + "learning_rate": 5.386245517414026e-06, + "loss": 0.6897, "step": 23399 }, { - "epoch": 0.6640181611804767, + "epoch": 0.6630961489415965, "grad_norm": 0.0, - "learning_rate": 5.359092088739218e-06, - "loss": 0.9214, + "learning_rate": 5.38543126963055e-06, + "loss": 0.7312, "step": 23400 }, { - "epoch": 0.6640465380249716, + "epoch": 0.663124486383859, "grad_norm": 0.0, - "learning_rate": 5.358278000351007e-06, - "loss": 0.8715, + "learning_rate": 5.3846170607172985e-06, + "loss": 0.7778, "step": 23401 }, { - "epoch": 0.6640749148694666, + "epoch": 0.6631528238261215, "grad_norm": 0.0, - "learning_rate": 5.357463951171635e-06, - "loss": 0.8377, + "learning_rate": 5.383802890681136e-06, + "loss": 0.9092, "step": 23402 }, { - "epoch": 0.6641032917139614, + "epoch": 0.6631811612683839, "grad_norm": 0.0, - "learning_rate": 5.35664994120797e-06, - "loss": 0.7698, + "learning_rate": 5.382988759528912e-06, + "loss": 0.8226, "step": 23403 }, { - "epoch": 0.6641316685584563, + "epoch": 0.6632094987106464, "grad_norm": 0.0, - "learning_rate": 5.355835970466891e-06, - "loss": 0.8427, + "learning_rate": 5.382174667267488e-06, + "loss": 0.8541, "step": 23404 }, { - "epoch": 0.6641600454029511, + "epoch": 0.6632378361529089, "grad_norm": 0.0, - "learning_rate": 5.3550220389552745e-06, - "loss": 0.7957, + "learning_rate": 5.381360613903722e-06, + "loss": 0.8768, "step": 23405 }, { - "epoch": 0.6641884222474461, + "epoch": 0.6632661735951713, "grad_norm": 0.0, - "learning_rate": 5.3542081466799955e-06, - "loss": 0.8273, + "learning_rate": 5.380546599444471e-06, + "loss": 0.7639, "step": 23406 }, { - "epoch": 0.664216799091941, + "epoch": 0.6632945110374338, "grad_norm": 0.0, - "learning_rate": 5.353394293647933e-06, - "loss": 0.8639, + "learning_rate": 5.379732623896594e-06, + "loss": 0.8474, "step": 23407 }, { - "epoch": 0.6642451759364358, + "epoch": 0.6633228484796962, "grad_norm": 0.0, - "learning_rate": 5.352580479865954e-06, - "loss": 0.8428, + "learning_rate": 5.378918687266942e-06, + "loss": 0.8709, "step": 23408 }, { - "epoch": 0.6642735527809308, + "epoch": 0.6633511859219586, "grad_norm": 0.0, - "learning_rate": 5.3517667053409375e-06, - "loss": 0.8459, + "learning_rate": 5.378104789562373e-06, + "loss": 0.8562, "step": 23409 }, { - "epoch": 0.6643019296254257, + "epoch": 0.6633795233642211, "grad_norm": 0.0, - "learning_rate": 5.350952970079759e-06, - "loss": 0.7951, + "learning_rate": 5.377290930789749e-06, + "loss": 0.8369, "step": 23410 }, { - "epoch": 0.6643303064699205, + "epoch": 0.6634078608064836, "grad_norm": 0.0, - "learning_rate": 5.350139274089288e-06, - "loss": 0.8503, + "learning_rate": 5.376477110955915e-06, + "loss": 0.7971, "step": 23411 }, { - "epoch": 0.6643586833144154, + "epoch": 0.6634361982487461, "grad_norm": 0.0, - "learning_rate": 5.3493256173763976e-06, - "loss": 0.7296, + "learning_rate": 5.375663330067731e-06, + "loss": 0.7934, "step": 23412 }, { - "epoch": 0.6643870601589104, + "epoch": 0.6634645356910085, "grad_norm": 0.0, - "learning_rate": 5.348511999947968e-06, - "loss": 0.8829, + "learning_rate": 5.374849588132056e-06, + "loss": 0.7704, "step": 23413 }, { - "epoch": 0.6644154370034052, + "epoch": 0.663492873133271, "grad_norm": 0.0, - "learning_rate": 5.347698421810861e-06, - "loss": 0.7579, + "learning_rate": 5.374035885155737e-06, + "loss": 0.7813, "step": 23414 }, { - "epoch": 0.6644438138479001, + "epoch": 0.6635212105755335, "grad_norm": 0.0, - "learning_rate": 5.3468848829719555e-06, - "loss": 0.842, + "learning_rate": 5.373222221145635e-06, + "loss": 0.8813, "step": 23415 }, { - "epoch": 0.664472190692395, + "epoch": 0.6635495480177959, "grad_norm": 0.0, - "learning_rate": 5.346071383438127e-06, - "loss": 0.8282, + "learning_rate": 5.372408596108598e-06, + "loss": 0.8718, "step": 23416 }, { - "epoch": 0.6645005675368899, + "epoch": 0.6635778854600584, "grad_norm": 0.0, - "learning_rate": 5.3452579232162384e-06, - "loss": 0.7068, + "learning_rate": 5.37159501005148e-06, + "loss": 0.8144, "step": 23417 }, { - "epoch": 0.6645289443813848, + "epoch": 0.6636062229023209, "grad_norm": 0.0, - "learning_rate": 5.344444502313164e-06, - "loss": 0.7573, + "learning_rate": 5.370781462981136e-06, + "loss": 0.7909, "step": 23418 }, { - "epoch": 0.6645573212258796, + "epoch": 0.6636345603445833, "grad_norm": 0.0, - "learning_rate": 5.343631120735778e-06, - "loss": 0.7081, + "learning_rate": 5.36996795490442e-06, + "loss": 0.9869, "step": 23419 }, { - "epoch": 0.6645856980703746, + "epoch": 0.6636628977868457, "grad_norm": 0.0, - "learning_rate": 5.342817778490949e-06, - "loss": 0.7942, + "learning_rate": 5.369154485828187e-06, + "loss": 0.8407, "step": 23420 }, { - "epoch": 0.6646140749148695, + "epoch": 0.6636912352291082, "grad_norm": 0.0, - "learning_rate": 5.3420044755855514e-06, - "loss": 0.9412, + "learning_rate": 5.368341055759281e-06, + "loss": 0.8581, "step": 23421 }, { - "epoch": 0.6646424517593643, + "epoch": 0.6637195726713707, "grad_norm": 0.0, - "learning_rate": 5.3411912120264474e-06, - "loss": 0.9532, + "learning_rate": 5.367527664704557e-06, + "loss": 0.6804, "step": 23422 }, { - "epoch": 0.6646708286038593, + "epoch": 0.6637479101136331, "grad_norm": 0.0, - "learning_rate": 5.340377987820511e-06, - "loss": 0.8372, + "learning_rate": 5.366714312670873e-06, + "loss": 0.7005, "step": 23423 }, { - "epoch": 0.6646992054483541, + "epoch": 0.6637762475558956, "grad_norm": 0.0, - "learning_rate": 5.339564802974615e-06, - "loss": 0.7484, + "learning_rate": 5.3659009996650704e-06, + "loss": 0.8829, "step": 23424 }, { - "epoch": 0.664727582292849, + "epoch": 0.6638045849981581, "grad_norm": 0.0, - "learning_rate": 5.33875165749562e-06, - "loss": 0.8363, + "learning_rate": 5.3650877256940045e-06, + "loss": 0.9654, "step": 23425 }, { - "epoch": 0.664755959137344, + "epoch": 0.6638329224404206, "grad_norm": 0.0, - "learning_rate": 5.337938551390399e-06, - "loss": 0.8187, + "learning_rate": 5.36427449076453e-06, + "loss": 0.767, "step": 23426 }, { - "epoch": 0.6647843359818388, + "epoch": 0.663861259882683, "grad_norm": 0.0, - "learning_rate": 5.337125484665826e-06, - "loss": 0.6669, + "learning_rate": 5.36346129488349e-06, + "loss": 0.8701, "step": 23427 }, { - "epoch": 0.6648127128263337, + "epoch": 0.6638895973249455, "grad_norm": 0.0, - "learning_rate": 5.336312457328759e-06, - "loss": 0.8405, + "learning_rate": 5.36264813805774e-06, + "loss": 0.7912, "step": 23428 }, { - "epoch": 0.6648410896708286, + "epoch": 0.663917934767208, "grad_norm": 0.0, - "learning_rate": 5.33549946938607e-06, - "loss": 0.8953, + "learning_rate": 5.3618350202941225e-06, + "loss": 0.9457, "step": 23429 }, { - "epoch": 0.6648694665153235, + "epoch": 0.6639462722094703, "grad_norm": 0.0, - "learning_rate": 5.334686520844631e-06, - "loss": 0.8338, + "learning_rate": 5.361021941599492e-06, + "loss": 0.7649, "step": 23430 }, { - "epoch": 0.6648978433598184, + "epoch": 0.6639746096517328, "grad_norm": 0.0, - "learning_rate": 5.3338736117113e-06, - "loss": 0.7547, + "learning_rate": 5.3602089019806955e-06, + "loss": 0.7897, "step": 23431 }, { - "epoch": 0.6649262202043132, + "epoch": 0.6640029470939953, "grad_norm": 0.0, - "learning_rate": 5.333060741992949e-06, - "loss": 0.9165, + "learning_rate": 5.359395901444583e-06, + "loss": 0.831, "step": 23432 }, { - "epoch": 0.6649545970488082, + "epoch": 0.6640312845362577, "grad_norm": 0.0, - "learning_rate": 5.332247911696448e-06, - "loss": 0.7409, + "learning_rate": 5.358582939998001e-06, + "loss": 0.77, "step": 23433 }, { - "epoch": 0.6649829738933031, + "epoch": 0.6640596219785202, "grad_norm": 0.0, - "learning_rate": 5.331435120828654e-06, - "loss": 0.8332, + "learning_rate": 5.357770017647803e-06, + "loss": 0.8407, "step": 23434 }, { - "epoch": 0.6650113507377979, + "epoch": 0.6640879594207827, "grad_norm": 0.0, - "learning_rate": 5.330622369396435e-06, - "loss": 0.7419, + "learning_rate": 5.356957134400829e-06, + "loss": 0.8631, "step": 23435 }, { - "epoch": 0.6650397275822928, + "epoch": 0.6641162968630452, "grad_norm": 0.0, - "learning_rate": 5.329809657406664e-06, - "loss": 0.7571, + "learning_rate": 5.356144290263932e-06, + "loss": 0.8835, "step": 23436 }, { - "epoch": 0.6650681044267878, + "epoch": 0.6641446343053076, "grad_norm": 0.0, - "learning_rate": 5.328996984866199e-06, - "loss": 0.8026, + "learning_rate": 5.355331485243953e-06, + "loss": 0.8442, "step": 23437 }, { - "epoch": 0.6650964812712826, + "epoch": 0.6641729717475701, "grad_norm": 0.0, - "learning_rate": 5.328184351781905e-06, - "loss": 0.6869, + "learning_rate": 5.354518719347741e-06, + "loss": 0.8319, "step": 23438 }, { - "epoch": 0.6651248581157775, + "epoch": 0.6642013091898326, "grad_norm": 0.0, - "learning_rate": 5.327371758160654e-06, - "loss": 0.6095, + "learning_rate": 5.353705992582147e-06, + "loss": 0.8512, "step": 23439 }, { - "epoch": 0.6651532349602725, + "epoch": 0.6642296466320949, "grad_norm": 0.0, - "learning_rate": 5.326559204009298e-06, - "loss": 0.8556, + "learning_rate": 5.352893304954008e-06, + "loss": 0.8772, "step": 23440 }, { - "epoch": 0.6651816118047673, + "epoch": 0.6642579840743574, "grad_norm": 0.0, - "learning_rate": 5.325746689334707e-06, - "loss": 0.792, + "learning_rate": 5.352080656470175e-06, + "loss": 0.8854, "step": 23441 }, { - "epoch": 0.6652099886492622, + "epoch": 0.6642863215166199, "grad_norm": 0.0, - "learning_rate": 5.324934214143747e-06, - "loss": 0.7927, + "learning_rate": 5.351268047137497e-06, + "loss": 0.8497, "step": 23442 }, { - "epoch": 0.6652383654937571, + "epoch": 0.6643146589588824, "grad_norm": 0.0, - "learning_rate": 5.324121778443274e-06, - "loss": 0.7566, + "learning_rate": 5.35045547696281e-06, + "loss": 0.8559, "step": 23443 }, { - "epoch": 0.665266742338252, + "epoch": 0.6643429964011448, "grad_norm": 0.0, - "learning_rate": 5.323309382240156e-06, - "loss": 0.8121, + "learning_rate": 5.349642945952964e-06, + "loss": 0.8813, "step": 23444 }, { - "epoch": 0.6652951191827469, + "epoch": 0.6643713338434073, "grad_norm": 0.0, - "learning_rate": 5.3224970255412566e-06, - "loss": 0.8728, + "learning_rate": 5.348830454114802e-06, + "loss": 0.8455, "step": 23445 }, { - "epoch": 0.6653234960272417, + "epoch": 0.6643996712856698, "grad_norm": 0.0, - "learning_rate": 5.3216847083534315e-06, - "loss": 0.8006, + "learning_rate": 5.348018001455167e-06, + "loss": 0.8819, "step": 23446 }, { - "epoch": 0.6653518728717367, + "epoch": 0.6644280087279322, "grad_norm": 0.0, - "learning_rate": 5.32087243068355e-06, - "loss": 0.8331, + "learning_rate": 5.347205587980909e-06, + "loss": 0.8011, "step": 23447 }, { - "epoch": 0.6653802497162316, + "epoch": 0.6644563461701947, "grad_norm": 0.0, - "learning_rate": 5.320060192538465e-06, - "loss": 0.8164, + "learning_rate": 5.3463932136988615e-06, + "loss": 0.8306, "step": 23448 }, { - "epoch": 0.6654086265607264, + "epoch": 0.6644846836124572, "grad_norm": 0.0, - "learning_rate": 5.319247993925043e-06, - "loss": 0.7165, + "learning_rate": 5.345580878615877e-06, + "loss": 0.8757, "step": 23449 }, { - "epoch": 0.6654370034052214, + "epoch": 0.6645130210547197, "grad_norm": 0.0, - "learning_rate": 5.3184358348501415e-06, - "loss": 0.8942, + "learning_rate": 5.344768582738789e-06, + "loss": 0.8369, "step": 23450 }, { - "epoch": 0.6654653802497162, + "epoch": 0.664541358496982, "grad_norm": 0.0, - "learning_rate": 5.317623715320626e-06, - "loss": 0.7505, + "learning_rate": 5.343956326074442e-06, + "loss": 0.7826, "step": 23451 }, { - "epoch": 0.6654937570942111, + "epoch": 0.6645696959392445, "grad_norm": 0.0, - "learning_rate": 5.31681163534335e-06, - "loss": 0.875, + "learning_rate": 5.343144108629685e-06, + "loss": 0.8247, "step": 23452 }, { - "epoch": 0.665522133938706, + "epoch": 0.664598033381507, "grad_norm": 0.0, - "learning_rate": 5.315999594925183e-06, - "loss": 0.7559, + "learning_rate": 5.342331930411352e-06, + "loss": 0.8209, "step": 23453 }, { - "epoch": 0.6655505107832009, + "epoch": 0.6646263708237694, "grad_norm": 0.0, - "learning_rate": 5.315187594072974e-06, - "loss": 0.8406, + "learning_rate": 5.341519791426285e-06, + "loss": 0.8594, "step": 23454 }, { - "epoch": 0.6655788876276958, + "epoch": 0.6646547082660319, "grad_norm": 0.0, - "learning_rate": 5.314375632793584e-06, - "loss": 0.776, + "learning_rate": 5.340707691681332e-06, + "loss": 0.8231, "step": 23455 }, { - "epoch": 0.6656072644721907, + "epoch": 0.6646830457082944, "grad_norm": 0.0, - "learning_rate": 5.31356371109388e-06, - "loss": 1.0055, + "learning_rate": 5.339895631183323e-06, + "loss": 0.8827, "step": 23456 }, { - "epoch": 0.6656356413166856, + "epoch": 0.6647113831505568, "grad_norm": 0.0, - "learning_rate": 5.312751828980709e-06, - "loss": 1.0386, + "learning_rate": 5.339083609939104e-06, + "loss": 0.8614, "step": 23457 }, { - "epoch": 0.6656640181611805, + "epoch": 0.6647397205928193, "grad_norm": 0.0, - "learning_rate": 5.311939986460934e-06, - "loss": 0.7793, + "learning_rate": 5.338271627955515e-06, + "loss": 0.8477, "step": 23458 }, { - "epoch": 0.6656923950056753, + "epoch": 0.6647680580350818, "grad_norm": 0.0, - "learning_rate": 5.311128183541416e-06, - "loss": 0.8021, + "learning_rate": 5.337459685239395e-06, + "loss": 0.8971, "step": 23459 }, { - "epoch": 0.6657207718501703, + "epoch": 0.6647963954773443, "grad_norm": 0.0, - "learning_rate": 5.310316420229007e-06, - "loss": 0.8206, + "learning_rate": 5.336647781797586e-06, + "loss": 0.9438, "step": 23460 }, { - "epoch": 0.6657491486946652, + "epoch": 0.6648247329196066, "grad_norm": 0.0, - "learning_rate": 5.309504696530565e-06, - "loss": 0.8969, + "learning_rate": 5.335835917636923e-06, + "loss": 0.7644, "step": 23461 }, { - "epoch": 0.66577752553916, + "epoch": 0.6648530703618691, "grad_norm": 0.0, - "learning_rate": 5.30869301245295e-06, - "loss": 0.8471, + "learning_rate": 5.335024092764243e-06, + "loss": 0.9191, "step": 23462 }, { - "epoch": 0.6658059023836549, + "epoch": 0.6648814078041316, "grad_norm": 0.0, - "learning_rate": 5.3078813680030135e-06, - "loss": 0.8106, + "learning_rate": 5.334212307186394e-06, + "loss": 0.8723, "step": 23463 }, { - "epoch": 0.6658342792281499, + "epoch": 0.664909745246394, "grad_norm": 0.0, - "learning_rate": 5.307069763187615e-06, - "loss": 0.887, + "learning_rate": 5.333400560910202e-06, + "loss": 0.7869, "step": 23464 }, { - "epoch": 0.6658626560726447, + "epoch": 0.6649380826886565, "grad_norm": 0.0, - "learning_rate": 5.3062581980136115e-06, - "loss": 0.9604, + "learning_rate": 5.332588853942515e-06, + "loss": 0.9605, "step": 23465 }, { - "epoch": 0.6658910329171396, + "epoch": 0.664966420130919, "grad_norm": 0.0, - "learning_rate": 5.305446672487849e-06, - "loss": 0.8928, + "learning_rate": 5.33177718629016e-06, + "loss": 0.8151, "step": 23466 }, { - "epoch": 0.6659194097616346, + "epoch": 0.6649947575731815, "grad_norm": 0.0, - "learning_rate": 5.304635186617198e-06, - "loss": 0.8365, + "learning_rate": 5.3309655579599795e-06, + "loss": 0.7334, "step": 23467 }, { - "epoch": 0.6659477866061294, + "epoch": 0.6650230950154439, "grad_norm": 0.0, - "learning_rate": 5.3038237404084995e-06, - "loss": 0.918, + "learning_rate": 5.330153968958811e-06, + "loss": 0.9053, "step": 23468 }, { - "epoch": 0.6659761634506243, + "epoch": 0.6650514324577064, "grad_norm": 0.0, - "learning_rate": 5.303012333868613e-06, - "loss": 0.712, + "learning_rate": 5.329342419293488e-06, + "loss": 0.8336, "step": 23469 }, { - "epoch": 0.6660045402951191, + "epoch": 0.6650797698999689, "grad_norm": 0.0, - "learning_rate": 5.302200967004396e-06, - "loss": 0.771, + "learning_rate": 5.3285309089708545e-06, + "loss": 0.8475, "step": 23470 }, { - "epoch": 0.6660329171396141, + "epoch": 0.6651081073422312, "grad_norm": 0.0, - "learning_rate": 5.301389639822696e-06, - "loss": 0.8624, + "learning_rate": 5.327719437997734e-06, + "loss": 0.8822, "step": 23471 }, { - "epoch": 0.666061293984109, + "epoch": 0.6651364447844937, "grad_norm": 0.0, - "learning_rate": 5.300578352330367e-06, - "loss": 0.8331, + "learning_rate": 5.3269080063809685e-06, + "loss": 0.7067, "step": 23472 }, { - "epoch": 0.6660896708286038, + "epoch": 0.6651647822267562, "grad_norm": 0.0, - "learning_rate": 5.29976710453427e-06, - "loss": 0.9041, + "learning_rate": 5.326096614127396e-06, + "loss": 0.8904, "step": 23473 }, { - "epoch": 0.6661180476730988, + "epoch": 0.6651931196690187, "grad_norm": 0.0, - "learning_rate": 5.298955896441247e-06, - "loss": 0.9068, + "learning_rate": 5.325285261243843e-06, + "loss": 0.8453, "step": 23474 }, { - "epoch": 0.6661464245175936, + "epoch": 0.6652214571112811, "grad_norm": 0.0, - "learning_rate": 5.2981447280581545e-06, - "loss": 0.9069, + "learning_rate": 5.324473947737149e-06, + "loss": 0.7978, "step": 23475 }, { - "epoch": 0.6661748013620885, + "epoch": 0.6652497945535436, "grad_norm": 0.0, - "learning_rate": 5.29733359939185e-06, - "loss": 0.8053, + "learning_rate": 5.3236626736141505e-06, + "loss": 0.8782, "step": 23476 }, { - "epoch": 0.6662031782065835, + "epoch": 0.6652781319958061, "grad_norm": 0.0, - "learning_rate": 5.296522510449174e-06, - "loss": 0.8699, + "learning_rate": 5.322851438881673e-06, + "loss": 0.8217, "step": 23477 }, { - "epoch": 0.6662315550510783, + "epoch": 0.6653064694380685, "grad_norm": 0.0, - "learning_rate": 5.295711461236985e-06, - "loss": 0.7173, + "learning_rate": 5.32204024354656e-06, + "loss": 0.906, "step": 23478 }, { - "epoch": 0.6662599318955732, + "epoch": 0.665334806880331, "grad_norm": 0.0, - "learning_rate": 5.2949004517621365e-06, - "loss": 0.7578, + "learning_rate": 5.321229087615635e-06, + "loss": 0.799, "step": 23479 }, { - "epoch": 0.6662883087400681, + "epoch": 0.6653631443225935, "grad_norm": 0.0, - "learning_rate": 5.294089482031471e-06, - "loss": 0.8233, + "learning_rate": 5.320417971095735e-06, + "loss": 0.87, "step": 23480 }, { - "epoch": 0.666316685584563, + "epoch": 0.6653914817648559, "grad_norm": 0.0, - "learning_rate": 5.293278552051843e-06, - "loss": 0.8198, + "learning_rate": 5.319606893993692e-06, + "loss": 0.9475, "step": 23481 }, { - "epoch": 0.6663450624290579, + "epoch": 0.6654198192071183, "grad_norm": 0.0, - "learning_rate": 5.292467661830104e-06, - "loss": 0.9634, + "learning_rate": 5.318795856316336e-06, + "loss": 0.8925, "step": 23482 }, { - "epoch": 0.6663734392735527, + "epoch": 0.6654481566493808, "grad_norm": 0.0, - "learning_rate": 5.291656811373102e-06, - "loss": 0.8583, + "learning_rate": 5.317984858070504e-06, + "loss": 0.8215, "step": 23483 }, { - "epoch": 0.6664018161180477, + "epoch": 0.6654764940916433, "grad_norm": 0.0, - "learning_rate": 5.290846000687689e-06, - "loss": 0.848, + "learning_rate": 5.3171738992630266e-06, + "loss": 0.8446, "step": 23484 }, { - "epoch": 0.6664301929625426, + "epoch": 0.6655048315339057, "grad_norm": 0.0, - "learning_rate": 5.2900352297807085e-06, - "loss": 0.7696, + "learning_rate": 5.316362979900729e-06, + "loss": 0.6868, "step": 23485 }, { - "epoch": 0.6664585698070374, + "epoch": 0.6655331689761682, "grad_norm": 0.0, - "learning_rate": 5.2892244986590135e-06, - "loss": 0.8221, + "learning_rate": 5.315552099990449e-06, + "loss": 0.8663, "step": 23486 }, { - "epoch": 0.6664869466515323, + "epoch": 0.6655615064184307, "grad_norm": 0.0, - "learning_rate": 5.288413807329453e-06, - "loss": 0.8896, + "learning_rate": 5.314741259539009e-06, + "loss": 0.9344, "step": 23487 }, { - "epoch": 0.6665153234960273, + "epoch": 0.6655898438606931, "grad_norm": 0.0, - "learning_rate": 5.2876031557988695e-06, - "loss": 0.8101, + "learning_rate": 5.313930458553242e-06, + "loss": 0.9315, "step": 23488 }, { - "epoch": 0.6665437003405221, + "epoch": 0.6656181813029556, "grad_norm": 0.0, - "learning_rate": 5.2867925440741145e-06, - "loss": 0.7863, + "learning_rate": 5.313119697039985e-06, + "loss": 0.7509, "step": 23489 }, { - "epoch": 0.666572077185017, + "epoch": 0.6656465187452181, "grad_norm": 0.0, - "learning_rate": 5.2859819721620375e-06, - "loss": 0.8277, + "learning_rate": 5.3123089750060554e-06, + "loss": 0.8989, "step": 23490 }, { - "epoch": 0.666600454029512, + "epoch": 0.6656748561874806, "grad_norm": 0.0, - "learning_rate": 5.28517144006948e-06, - "loss": 0.759, + "learning_rate": 5.311498292458291e-06, + "loss": 0.8138, "step": 23491 }, { - "epoch": 0.6666288308740068, + "epoch": 0.665703193629743, "grad_norm": 0.0, - "learning_rate": 5.284360947803291e-06, - "loss": 0.7749, + "learning_rate": 5.310687649403518e-06, + "loss": 0.8266, "step": 23492 }, { - "epoch": 0.6666572077185017, + "epoch": 0.6657315310720054, "grad_norm": 0.0, - "learning_rate": 5.28355049537032e-06, - "loss": 0.7425, + "learning_rate": 5.309877045848561e-06, + "loss": 0.8751, "step": 23493 }, { - "epoch": 0.6666855845629966, + "epoch": 0.6657598685142679, "grad_norm": 0.0, - "learning_rate": 5.282740082777407e-06, - "loss": 0.9719, + "learning_rate": 5.309066481800253e-06, + "loss": 0.8394, "step": 23494 }, { - "epoch": 0.6667139614074915, + "epoch": 0.6657882059565303, "grad_norm": 0.0, - "learning_rate": 5.2819297100314e-06, - "loss": 0.766, + "learning_rate": 5.308255957265418e-06, + "loss": 0.8203, "step": 23495 }, { - "epoch": 0.6667423382519864, + "epoch": 0.6658165433987928, "grad_norm": 0.0, - "learning_rate": 5.281119377139146e-06, - "loss": 0.7395, + "learning_rate": 5.307445472250887e-06, + "loss": 0.8261, "step": 23496 }, { - "epoch": 0.6667707150964812, + "epoch": 0.6658448808410553, "grad_norm": 0.0, - "learning_rate": 5.280309084107488e-06, - "loss": 0.8923, + "learning_rate": 5.306635026763489e-06, + "loss": 0.9204, "step": 23497 }, { - "epoch": 0.6667990919409762, + "epoch": 0.6658732182833178, "grad_norm": 0.0, - "learning_rate": 5.279498830943275e-06, - "loss": 0.825, + "learning_rate": 5.305824620810043e-06, + "loss": 0.7321, "step": 23498 }, { - "epoch": 0.6668274687854711, + "epoch": 0.6659015557255802, "grad_norm": 0.0, - "learning_rate": 5.278688617653345e-06, - "loss": 0.8633, + "learning_rate": 5.305014254397378e-06, + "loss": 0.8232, "step": 23499 }, { - "epoch": 0.6668558456299659, + "epoch": 0.6659298931678427, "grad_norm": 0.0, - "learning_rate": 5.277878444244542e-06, - "loss": 0.8511, + "learning_rate": 5.304203927532327e-06, + "loss": 0.7622, "step": 23500 }, { - "epoch": 0.6668842224744609, + "epoch": 0.6659582306101052, "grad_norm": 0.0, - "learning_rate": 5.277068310723717e-06, - "loss": 0.7997, + "learning_rate": 5.3033936402217055e-06, + "loss": 0.8122, "step": 23501 }, { - "epoch": 0.6669125993189557, + "epoch": 0.6659865680523676, "grad_norm": 0.0, - "learning_rate": 5.276258217097705e-06, - "loss": 0.9385, + "learning_rate": 5.302583392472347e-06, + "loss": 0.8891, "step": 23502 }, { - "epoch": 0.6669409761634506, + "epoch": 0.66601490549463, "grad_norm": 0.0, - "learning_rate": 5.2754481633733525e-06, - "loss": 0.7237, + "learning_rate": 5.301773184291069e-06, + "loss": 0.8072, "step": 23503 }, { - "epoch": 0.6669693530079455, + "epoch": 0.6660432429368925, "grad_norm": 0.0, - "learning_rate": 5.274638149557505e-06, - "loss": 0.8114, + "learning_rate": 5.300963015684701e-06, + "loss": 0.8333, "step": 23504 }, { - "epoch": 0.6669977298524404, + "epoch": 0.6660715803791549, "grad_norm": 0.0, - "learning_rate": 5.273828175656997e-06, - "loss": 0.7202, + "learning_rate": 5.300152886660068e-06, + "loss": 0.726, "step": 23505 }, { - "epoch": 0.6670261066969353, + "epoch": 0.6660999178214174, "grad_norm": 0.0, - "learning_rate": 5.2730182416786756e-06, - "loss": 0.9573, + "learning_rate": 5.299342797223992e-06, + "loss": 0.8304, "step": 23506 }, { - "epoch": 0.6670544835414302, + "epoch": 0.6661282552636799, "grad_norm": 0.0, - "learning_rate": 5.272208347629386e-06, - "loss": 0.8277, + "learning_rate": 5.298532747383295e-06, + "loss": 0.7721, "step": 23507 }, { - "epoch": 0.6670828603859251, + "epoch": 0.6661565927059424, "grad_norm": 0.0, - "learning_rate": 5.27139849351596e-06, - "loss": 0.723, + "learning_rate": 5.297722737144803e-06, + "loss": 0.821, "step": 23508 }, { - "epoch": 0.66711123723042, + "epoch": 0.6661849301482048, "grad_norm": 0.0, - "learning_rate": 5.270588679345244e-06, - "loss": 0.7543, + "learning_rate": 5.296912766515338e-06, + "loss": 0.7915, "step": 23509 }, { - "epoch": 0.6671396140749148, + "epoch": 0.6662132675904673, "grad_norm": 0.0, - "learning_rate": 5.269778905124082e-06, - "loss": 0.8571, + "learning_rate": 5.296102835501728e-06, + "loss": 0.8034, "step": 23510 }, { - "epoch": 0.6671679909194098, + "epoch": 0.6662416050327298, "grad_norm": 0.0, - "learning_rate": 5.268969170859306e-06, - "loss": 0.8174, + "learning_rate": 5.295292944110786e-06, + "loss": 0.8649, "step": 23511 }, { - "epoch": 0.6671963677639047, + "epoch": 0.6662699424749922, "grad_norm": 0.0, - "learning_rate": 5.268159476557762e-06, - "loss": 0.8107, + "learning_rate": 5.294483092349338e-06, + "loss": 0.8271, "step": 23512 }, { - "epoch": 0.6672247446083995, + "epoch": 0.6662982799172547, "grad_norm": 0.0, - "learning_rate": 5.267349822226284e-06, - "loss": 0.873, + "learning_rate": 5.293673280224212e-06, + "loss": 0.9304, "step": 23513 }, { - "epoch": 0.6672531214528944, + "epoch": 0.6663266173595171, "grad_norm": 0.0, - "learning_rate": 5.266540207871717e-06, - "loss": 0.902, + "learning_rate": 5.292863507742218e-06, + "loss": 0.9075, "step": 23514 }, { - "epoch": 0.6672814982973894, + "epoch": 0.6663549548017796, "grad_norm": 0.0, - "learning_rate": 5.265730633500902e-06, - "loss": 0.8715, + "learning_rate": 5.2920537749101885e-06, + "loss": 0.8129, "step": 23515 }, { - "epoch": 0.6673098751418842, + "epoch": 0.666383292244042, "grad_norm": 0.0, - "learning_rate": 5.264921099120668e-06, - "loss": 0.7896, + "learning_rate": 5.291244081734933e-06, + "loss": 0.8866, "step": 23516 }, { - "epoch": 0.6673382519863791, + "epoch": 0.6664116296863045, "grad_norm": 0.0, - "learning_rate": 5.264111604737859e-06, - "loss": 0.7962, + "learning_rate": 5.290434428223276e-06, + "loss": 0.7765, "step": 23517 }, { - "epoch": 0.667366628830874, + "epoch": 0.666439967128567, "grad_norm": 0.0, - "learning_rate": 5.263302150359316e-06, - "loss": 0.8736, + "learning_rate": 5.289624814382046e-06, + "loss": 0.854, "step": 23518 }, { - "epoch": 0.6673950056753689, + "epoch": 0.6664683045708294, "grad_norm": 0.0, - "learning_rate": 5.262492735991868e-06, - "loss": 0.9385, + "learning_rate": 5.288815240218048e-06, + "loss": 0.8801, "step": 23519 }, { - "epoch": 0.6674233825198638, + "epoch": 0.6664966420130919, "grad_norm": 0.0, - "learning_rate": 5.261683361642358e-06, - "loss": 0.7918, + "learning_rate": 5.28800570573811e-06, + "loss": 0.9048, "step": 23520 }, { - "epoch": 0.6674517593643586, + "epoch": 0.6665249794553544, "grad_norm": 0.0, - "learning_rate": 5.260874027317626e-06, - "loss": 0.8657, + "learning_rate": 5.287196210949051e-06, + "loss": 0.8615, "step": 23521 }, { - "epoch": 0.6674801362088536, + "epoch": 0.6665533168976169, "grad_norm": 0.0, - "learning_rate": 5.260064733024499e-06, - "loss": 0.9034, + "learning_rate": 5.286386755857686e-06, + "loss": 0.7559, "step": 23522 }, { - "epoch": 0.6675085130533485, + "epoch": 0.6665816543398793, "grad_norm": 0.0, - "learning_rate": 5.259255478769818e-06, - "loss": 0.7534, + "learning_rate": 5.285577340470842e-06, + "loss": 0.8492, "step": 23523 }, { - "epoch": 0.6675368898978433, + "epoch": 0.6666099917821418, "grad_norm": 0.0, - "learning_rate": 5.258446264560424e-06, - "loss": 0.8812, + "learning_rate": 5.284767964795325e-06, + "loss": 0.9524, "step": 23524 }, { - "epoch": 0.6675652667423383, + "epoch": 0.6666383292244042, "grad_norm": 0.0, - "learning_rate": 5.257637090403143e-06, - "loss": 0.7948, + "learning_rate": 5.2839586288379595e-06, + "loss": 0.8031, "step": 23525 }, { - "epoch": 0.6675936435868332, + "epoch": 0.6666666666666666, "grad_norm": 0.0, - "learning_rate": 5.256827956304815e-06, - "loss": 0.7643, + "learning_rate": 5.2831493326055634e-06, + "loss": 0.8064, "step": 23526 }, { - "epoch": 0.667622020431328, + "epoch": 0.6666950041089291, "grad_norm": 0.0, - "learning_rate": 5.256018862272274e-06, - "loss": 0.7836, + "learning_rate": 5.28234007610495e-06, + "loss": 0.8697, "step": 23527 }, { - "epoch": 0.667650397275823, + "epoch": 0.6667233415511916, "grad_norm": 0.0, - "learning_rate": 5.255209808312356e-06, - "loss": 0.9055, + "learning_rate": 5.281530859342938e-06, + "loss": 0.8807, "step": 23528 }, { - "epoch": 0.6676787741203178, + "epoch": 0.666751678993454, "grad_norm": 0.0, - "learning_rate": 5.254400794431897e-06, - "loss": 0.9042, + "learning_rate": 5.280721682326349e-06, + "loss": 0.8767, "step": 23529 }, { - "epoch": 0.6677071509648127, + "epoch": 0.6667800164357165, "grad_norm": 0.0, - "learning_rate": 5.253591820637725e-06, - "loss": 0.8842, + "learning_rate": 5.279912545061987e-06, + "loss": 0.8004, "step": 23530 }, { - "epoch": 0.6677355278093076, + "epoch": 0.666808353877979, "grad_norm": 0.0, - "learning_rate": 5.252782886936675e-06, - "loss": 0.8416, + "learning_rate": 5.27910344755668e-06, + "loss": 0.8065, "step": 23531 }, { - "epoch": 0.6677639046538025, + "epoch": 0.6668366913202415, "grad_norm": 0.0, - "learning_rate": 5.251973993335586e-06, - "loss": 0.7511, + "learning_rate": 5.278294389817233e-06, + "loss": 0.8137, "step": 23532 }, { - "epoch": 0.6677922814982974, + "epoch": 0.6668650287625039, "grad_norm": 0.0, - "learning_rate": 5.251165139841281e-06, - "loss": 0.9773, + "learning_rate": 5.277485371850466e-06, + "loss": 0.8932, "step": 23533 }, { - "epoch": 0.6678206583427923, + "epoch": 0.6668933662047664, "grad_norm": 0.0, - "learning_rate": 5.2503563264605995e-06, - "loss": 0.8447, + "learning_rate": 5.276676393663194e-06, + "loss": 0.8324, "step": 23534 }, { - "epoch": 0.6678490351872872, + "epoch": 0.6669217036470289, "grad_norm": 0.0, - "learning_rate": 5.249547553200374e-06, - "loss": 0.829, + "learning_rate": 5.27586745526223e-06, + "loss": 0.9048, "step": 23535 }, { - "epoch": 0.6678774120317821, + "epoch": 0.6669500410892912, "grad_norm": 0.0, - "learning_rate": 5.248738820067431e-06, - "loss": 0.8686, + "learning_rate": 5.275058556654394e-06, + "loss": 0.8437, "step": 23536 }, { - "epoch": 0.6679057888762769, + "epoch": 0.6669783785315537, "grad_norm": 0.0, - "learning_rate": 5.247930127068604e-06, - "loss": 0.8663, + "learning_rate": 5.274249697846487e-06, + "loss": 0.9232, "step": 23537 }, { - "epoch": 0.6679341657207718, + "epoch": 0.6670067159738162, "grad_norm": 0.0, - "learning_rate": 5.247121474210728e-06, - "loss": 0.814, + "learning_rate": 5.2734408788453326e-06, + "loss": 0.8259, "step": 23538 }, { - "epoch": 0.6679625425652668, + "epoch": 0.6670350534160787, "grad_norm": 0.0, - "learning_rate": 5.246312861500629e-06, - "loss": 0.865, + "learning_rate": 5.272632099657744e-06, + "loss": 0.8006, "step": 23539 }, { - "epoch": 0.6679909194097616, + "epoch": 0.6670633908583411, "grad_norm": 0.0, - "learning_rate": 5.245504288945137e-06, - "loss": 0.7403, + "learning_rate": 5.2718233602905265e-06, + "loss": 0.8781, "step": 23540 }, { - "epoch": 0.6680192962542565, + "epoch": 0.6670917283006036, "grad_norm": 0.0, - "learning_rate": 5.244695756551088e-06, - "loss": 0.775, + "learning_rate": 5.271014660750498e-06, + "loss": 0.9402, "step": 23541 }, { - "epoch": 0.6680476730987515, + "epoch": 0.6671200657428661, "grad_norm": 0.0, - "learning_rate": 5.2438872643253e-06, - "loss": 0.7762, + "learning_rate": 5.270206001044471e-06, + "loss": 0.8158, "step": 23542 }, { - "epoch": 0.6680760499432463, + "epoch": 0.6671484031851285, "grad_norm": 0.0, - "learning_rate": 5.243078812274615e-06, - "loss": 0.7492, + "learning_rate": 5.269397381179253e-06, + "loss": 0.8833, "step": 23543 }, { - "epoch": 0.6681044267877412, + "epoch": 0.667176740627391, "grad_norm": 0.0, - "learning_rate": 5.24227040040586e-06, - "loss": 0.7792, + "learning_rate": 5.268588801161661e-06, + "loss": 0.8706, "step": 23544 }, { - "epoch": 0.6681328036322361, + "epoch": 0.6672050780696535, "grad_norm": 0.0, - "learning_rate": 5.241462028725858e-06, - "loss": 0.8945, + "learning_rate": 5.2677802609984974e-06, + "loss": 0.7124, "step": 23545 }, { - "epoch": 0.668161180476731, + "epoch": 0.6672334155119158, "grad_norm": 0.0, - "learning_rate": 5.240653697241439e-06, - "loss": 0.8089, + "learning_rate": 5.2669717606965785e-06, + "loss": 0.8363, "step": 23546 }, { - "epoch": 0.6681895573212259, + "epoch": 0.6672617529541783, "grad_norm": 0.0, - "learning_rate": 5.239845405959436e-06, - "loss": 0.8364, + "learning_rate": 5.266163300262714e-06, + "loss": 0.8104, "step": 23547 }, { - "epoch": 0.6682179341657207, + "epoch": 0.6672900903964408, "grad_norm": 0.0, - "learning_rate": 5.23903715488667e-06, - "loss": 0.8234, + "learning_rate": 5.265354879703715e-06, + "loss": 0.7294, "step": 23548 }, { - "epoch": 0.6682463110102157, + "epoch": 0.6673184278387033, "grad_norm": 0.0, - "learning_rate": 5.238228944029971e-06, - "loss": 0.8571, + "learning_rate": 5.264546499026388e-06, + "loss": 0.7785, "step": 23549 }, { - "epoch": 0.6682746878547106, + "epoch": 0.6673467652809657, "grad_norm": 0.0, - "learning_rate": 5.237420773396169e-06, - "loss": 0.7653, + "learning_rate": 5.26373815823755e-06, + "loss": 0.8287, "step": 23550 }, { - "epoch": 0.6683030646992054, + "epoch": 0.6673751027232282, "grad_norm": 0.0, - "learning_rate": 5.236612642992085e-06, - "loss": 0.8765, + "learning_rate": 5.262929857344e-06, + "loss": 0.8125, "step": 23551 }, { - "epoch": 0.6683314415437004, + "epoch": 0.6674034401654907, "grad_norm": 0.0, - "learning_rate": 5.235804552824548e-06, - "loss": 0.7989, + "learning_rate": 5.262121596352554e-06, + "loss": 0.8103, "step": 23552 }, { - "epoch": 0.6683598183881952, + "epoch": 0.6674317776077531, "grad_norm": 0.0, - "learning_rate": 5.234996502900387e-06, - "loss": 0.9182, + "learning_rate": 5.2613133752700145e-06, + "loss": 0.8617, "step": 23553 }, { - "epoch": 0.6683881952326901, + "epoch": 0.6674601150500156, "grad_norm": 0.0, - "learning_rate": 5.234188493226422e-06, - "loss": 0.8865, + "learning_rate": 5.260505194103191e-06, + "loss": 0.7888, "step": 23554 }, { - "epoch": 0.668416572077185, + "epoch": 0.6674884524922781, "grad_norm": 0.0, - "learning_rate": 5.2333805238094835e-06, - "loss": 0.9138, + "learning_rate": 5.259697052858896e-06, + "loss": 0.8651, "step": 23555 }, { - "epoch": 0.6684449489216799, + "epoch": 0.6675167899345406, "grad_norm": 0.0, - "learning_rate": 5.232572594656392e-06, - "loss": 0.7136, + "learning_rate": 5.25888895154393e-06, + "loss": 0.7974, "step": 23556 }, { - "epoch": 0.6684733257661748, + "epoch": 0.6675451273768029, "grad_norm": 0.0, - "learning_rate": 5.231764705773973e-06, - "loss": 0.8262, + "learning_rate": 5.258080890165102e-06, + "loss": 0.7761, "step": 23557 }, { - "epoch": 0.6685017026106697, + "epoch": 0.6675734648190654, "grad_norm": 0.0, - "learning_rate": 5.230956857169052e-06, - "loss": 0.852, + "learning_rate": 5.257272868729225e-06, + "loss": 0.7886, "step": 23558 }, { - "epoch": 0.6685300794551646, + "epoch": 0.6676018022613279, "grad_norm": 0.0, - "learning_rate": 5.230149048848453e-06, - "loss": 0.8658, + "learning_rate": 5.256464887243095e-06, + "loss": 0.7823, "step": 23559 }, { - "epoch": 0.6685584562996595, + "epoch": 0.6676301397035903, "grad_norm": 0.0, - "learning_rate": 5.229341280818999e-06, - "loss": 0.9817, + "learning_rate": 5.255656945713523e-06, + "loss": 0.9011, "step": 23560 }, { - "epoch": 0.6685868331441543, + "epoch": 0.6676584771458528, "grad_norm": 0.0, - "learning_rate": 5.228533553087518e-06, - "loss": 0.8883, + "learning_rate": 5.254849044147313e-06, + "loss": 0.8492, "step": 23561 }, { - "epoch": 0.6686152099886493, + "epoch": 0.6676868145881153, "grad_norm": 0.0, - "learning_rate": 5.227725865660825e-06, - "loss": 0.7903, + "learning_rate": 5.2540411825512724e-06, + "loss": 0.8255, "step": 23562 }, { - "epoch": 0.6686435868331442, + "epoch": 0.6677151520303778, "grad_norm": 0.0, - "learning_rate": 5.2269182185457456e-06, - "loss": 0.8203, + "learning_rate": 5.2532333609322096e-06, + "loss": 0.8958, "step": 23563 }, { - "epoch": 0.668671963677639, + "epoch": 0.6677434894726402, "grad_norm": 0.0, - "learning_rate": 5.226110611749107e-06, - "loss": 0.898, + "learning_rate": 5.25242557929692e-06, + "loss": 0.7846, "step": 23564 }, { - "epoch": 0.6687003405221339, + "epoch": 0.6677718269149027, "grad_norm": 0.0, - "learning_rate": 5.225303045277721e-06, - "loss": 0.7175, + "learning_rate": 5.251617837652219e-06, + "loss": 0.8226, "step": 23565 }, { - "epoch": 0.6687287173666289, + "epoch": 0.6678001643571652, "grad_norm": 0.0, - "learning_rate": 5.224495519138417e-06, - "loss": 0.9117, + "learning_rate": 5.250810136004898e-06, + "loss": 0.8074, "step": 23566 }, { - "epoch": 0.6687570942111237, + "epoch": 0.6678285017994275, "grad_norm": 0.0, - "learning_rate": 5.223688033338017e-06, - "loss": 0.872, + "learning_rate": 5.250002474361766e-06, + "loss": 0.827, "step": 23567 }, { - "epoch": 0.6687854710556186, + "epoch": 0.66785683924169, "grad_norm": 0.0, - "learning_rate": 5.222880587883335e-06, - "loss": 0.833, + "learning_rate": 5.249194852729633e-06, + "loss": 0.8847, "step": 23568 }, { - "epoch": 0.6688138479001136, + "epoch": 0.6678851766839525, "grad_norm": 0.0, - "learning_rate": 5.2220731827811955e-06, - "loss": 0.7766, + "learning_rate": 5.248387271115292e-06, + "loss": 0.7825, "step": 23569 }, { - "epoch": 0.6688422247446084, + "epoch": 0.6679135141262149, "grad_norm": 0.0, - "learning_rate": 5.221265818038422e-06, - "loss": 0.8368, + "learning_rate": 5.247579729525548e-06, + "loss": 0.7253, "step": 23570 }, { - "epoch": 0.6688706015891033, + "epoch": 0.6679418515684774, "grad_norm": 0.0, - "learning_rate": 5.220458493661829e-06, - "loss": 0.9729, + "learning_rate": 5.24677222796721e-06, + "loss": 0.7648, "step": 23571 }, { - "epoch": 0.6688989784335981, + "epoch": 0.6679701890107399, "grad_norm": 0.0, - "learning_rate": 5.219651209658235e-06, - "loss": 0.8012, + "learning_rate": 5.245964766447069e-06, + "loss": 0.8432, "step": 23572 }, { - "epoch": 0.6689273552780931, + "epoch": 0.6679985264530024, "grad_norm": 0.0, - "learning_rate": 5.218843966034464e-06, - "loss": 0.7401, + "learning_rate": 5.245157344971934e-06, + "loss": 0.9088, "step": 23573 }, { - "epoch": 0.668955732122588, + "epoch": 0.6680268638952648, "grad_norm": 0.0, - "learning_rate": 5.2180367627973324e-06, - "loss": 0.8134, + "learning_rate": 5.244349963548603e-06, + "loss": 0.9187, "step": 23574 }, { - "epoch": 0.6689841089670828, + "epoch": 0.6680552013375273, "grad_norm": 0.0, - "learning_rate": 5.2172295999536635e-06, - "loss": 0.8592, + "learning_rate": 5.2435426221838795e-06, + "loss": 0.9318, "step": 23575 }, { - "epoch": 0.6690124858115778, + "epoch": 0.6680835387797898, "grad_norm": 0.0, - "learning_rate": 5.216422477510267e-06, - "loss": 0.8763, + "learning_rate": 5.242735320884566e-06, + "loss": 0.8442, "step": 23576 }, { - "epoch": 0.6690408626560727, + "epoch": 0.6681118762220521, "grad_norm": 0.0, - "learning_rate": 5.2156153954739655e-06, - "loss": 0.8539, + "learning_rate": 5.241928059657455e-06, + "loss": 0.8757, "step": 23577 }, { - "epoch": 0.6690692395005675, + "epoch": 0.6681402136643146, "grad_norm": 0.0, - "learning_rate": 5.2148083538515796e-06, - "loss": 0.8524, + "learning_rate": 5.2411208385093515e-06, + "loss": 0.8121, "step": 23578 }, { - "epoch": 0.6690976163450624, + "epoch": 0.6681685511065771, "grad_norm": 0.0, - "learning_rate": 5.214001352649918e-06, - "loss": 0.9075, + "learning_rate": 5.240313657447058e-06, + "loss": 0.7363, "step": 23579 }, { - "epoch": 0.6691259931895573, + "epoch": 0.6681968885488396, "grad_norm": 0.0, - "learning_rate": 5.213194391875802e-06, - "loss": 0.7653, + "learning_rate": 5.2395065164773665e-06, + "loss": 0.9318, "step": 23580 }, { - "epoch": 0.6691543700340522, + "epoch": 0.668225225991102, "grad_norm": 0.0, - "learning_rate": 5.212387471536053e-06, - "loss": 0.8388, + "learning_rate": 5.2386994156070825e-06, + "loss": 0.8033, "step": 23581 }, { - "epoch": 0.6691827468785471, + "epoch": 0.6682535634333645, "grad_norm": 0.0, - "learning_rate": 5.211580591637478e-06, - "loss": 0.733, + "learning_rate": 5.237892354842998e-06, + "loss": 0.814, "step": 23582 }, { - "epoch": 0.669211123723042, + "epoch": 0.668281900875627, "grad_norm": 0.0, - "learning_rate": 5.210773752186895e-06, - "loss": 0.7982, + "learning_rate": 5.2370853341919145e-06, + "loss": 0.8253, "step": 23583 }, { - "epoch": 0.6692395005675369, + "epoch": 0.6683102383178894, "grad_norm": 0.0, - "learning_rate": 5.209966953191128e-06, - "loss": 0.9126, + "learning_rate": 5.236278353660634e-06, + "loss": 0.9191, "step": 23584 }, { - "epoch": 0.6692678774120318, + "epoch": 0.6683385757601519, "grad_norm": 0.0, - "learning_rate": 5.20916019465698e-06, - "loss": 0.8473, + "learning_rate": 5.235471413255946e-06, + "loss": 0.7823, "step": 23585 }, { - "epoch": 0.6692962542565267, + "epoch": 0.6683669132024144, "grad_norm": 0.0, - "learning_rate": 5.208353476591272e-06, - "loss": 0.827, + "learning_rate": 5.2346645129846504e-06, + "loss": 0.8998, "step": 23586 }, { - "epoch": 0.6693246311010216, + "epoch": 0.6683952506446769, "grad_norm": 0.0, - "learning_rate": 5.207546799000822e-06, - "loss": 0.7996, + "learning_rate": 5.233857652853547e-06, + "loss": 0.9571, "step": 23587 }, { - "epoch": 0.6693530079455164, + "epoch": 0.6684235880869392, "grad_norm": 0.0, - "learning_rate": 5.206740161892432e-06, - "loss": 0.7912, + "learning_rate": 5.23305083286943e-06, + "loss": 0.8065, "step": 23588 }, { - "epoch": 0.6693813847900113, + "epoch": 0.6684519255292017, "grad_norm": 0.0, - "learning_rate": 5.205933565272929e-06, - "loss": 0.8461, + "learning_rate": 5.232244053039099e-06, + "loss": 0.7567, "step": 23589 }, { - "epoch": 0.6694097616345063, + "epoch": 0.6684802629714642, "grad_norm": 0.0, - "learning_rate": 5.205127009149118e-06, - "loss": 0.8777, + "learning_rate": 5.231437313369343e-06, + "loss": 0.9603, "step": 23590 }, { - "epoch": 0.6694381384790011, + "epoch": 0.6685086004137266, "grad_norm": 0.0, - "learning_rate": 5.204320493527815e-06, - "loss": 0.9042, + "learning_rate": 5.230630613866962e-06, + "loss": 0.8503, "step": 23591 }, { - "epoch": 0.669466515323496, + "epoch": 0.6685369378559891, "grad_norm": 0.0, - "learning_rate": 5.2035140184158365e-06, - "loss": 0.7756, + "learning_rate": 5.229823954538754e-06, + "loss": 0.8599, "step": 23592 }, { - "epoch": 0.669494892167991, + "epoch": 0.6685652752982516, "grad_norm": 0.0, - "learning_rate": 5.202707583819986e-06, - "loss": 0.7926, + "learning_rate": 5.229017335391506e-06, + "loss": 0.7991, "step": 23593 }, { - "epoch": 0.6695232690124858, + "epoch": 0.668593612740514, "grad_norm": 0.0, - "learning_rate": 5.201901189747081e-06, - "loss": 0.868, + "learning_rate": 5.228210756432021e-06, + "loss": 0.8332, "step": 23594 }, { - "epoch": 0.6695516458569807, + "epoch": 0.6686219501827765, "grad_norm": 0.0, - "learning_rate": 5.201094836203935e-06, - "loss": 0.8756, + "learning_rate": 5.227404217667085e-06, + "loss": 0.8324, "step": 23595 }, { - "epoch": 0.6695800227014755, + "epoch": 0.668650287625039, "grad_norm": 0.0, - "learning_rate": 5.200288523197354e-06, - "loss": 0.7624, + "learning_rate": 5.226597719103495e-06, + "loss": 0.8307, "step": 23596 }, { - "epoch": 0.6696083995459705, + "epoch": 0.6686786250673015, "grad_norm": 0.0, - "learning_rate": 5.199482250734151e-06, - "loss": 0.8109, + "learning_rate": 5.22579126074805e-06, + "loss": 0.8668, "step": 23597 }, { - "epoch": 0.6696367763904654, + "epoch": 0.6687069625095639, "grad_norm": 0.0, - "learning_rate": 5.198676018821142e-06, - "loss": 0.7811, + "learning_rate": 5.2249848426075325e-06, + "loss": 0.8052, "step": 23598 }, { - "epoch": 0.6696651532349602, + "epoch": 0.6687352999518263, "grad_norm": 0.0, - "learning_rate": 5.1978698274651276e-06, - "loss": 0.8859, + "learning_rate": 5.224178464688742e-06, + "loss": 0.7452, "step": 23599 }, { - "epoch": 0.6696935300794552, + "epoch": 0.6687636373940888, "grad_norm": 0.0, - "learning_rate": 5.1970636766729225e-06, - "loss": 0.9515, + "learning_rate": 5.2233721269984695e-06, + "loss": 0.7018, "step": 23600 }, { - "epoch": 0.6697219069239501, + "epoch": 0.6687919748363512, "grad_norm": 0.0, - "learning_rate": 5.196257566451342e-06, - "loss": 0.7103, + "learning_rate": 5.2225658295435065e-06, + "loss": 0.8467, "step": 23601 }, { - "epoch": 0.6697502837684449, + "epoch": 0.6688203122786137, "grad_norm": 0.0, - "learning_rate": 5.195451496807184e-06, - "loss": 0.7103, + "learning_rate": 5.22175957233065e-06, + "loss": 0.8535, "step": 23602 }, { - "epoch": 0.6697786606129399, + "epoch": 0.6688486497208762, "grad_norm": 0.0, - "learning_rate": 5.1946454677472635e-06, - "loss": 0.9054, + "learning_rate": 5.220953355366684e-06, + "loss": 0.7446, "step": 23603 }, { - "epoch": 0.6698070374574348, + "epoch": 0.6688769871631387, "grad_norm": 0.0, - "learning_rate": 5.193839479278388e-06, - "loss": 0.8771, + "learning_rate": 5.220147178658401e-06, + "loss": 0.8227, "step": 23604 }, { - "epoch": 0.6698354143019296, + "epoch": 0.6689053246054011, "grad_norm": 0.0, - "learning_rate": 5.1930335314073685e-06, - "loss": 0.8892, + "learning_rate": 5.219341042212598e-06, + "loss": 0.7496, "step": 23605 }, { - "epoch": 0.6698637911464245, + "epoch": 0.6689336620476636, "grad_norm": 0.0, - "learning_rate": 5.192227624141014e-06, - "loss": 0.85, + "learning_rate": 5.218534946036057e-06, + "loss": 0.8725, "step": 23606 }, { - "epoch": 0.6698921679909194, + "epoch": 0.6689619994899261, "grad_norm": 0.0, - "learning_rate": 5.191421757486125e-06, - "loss": 0.9474, + "learning_rate": 5.2177288901355714e-06, + "loss": 0.991, "step": 23607 }, { - "epoch": 0.6699205448354143, + "epoch": 0.6689903369321885, "grad_norm": 0.0, - "learning_rate": 5.190615931449512e-06, - "loss": 0.8403, + "learning_rate": 5.216922874517935e-06, + "loss": 0.8498, "step": 23608 }, { - "epoch": 0.6699489216799092, + "epoch": 0.669018674374451, "grad_norm": 0.0, - "learning_rate": 5.189810146037987e-06, - "loss": 0.8329, + "learning_rate": 5.216116899189929e-06, + "loss": 0.8183, "step": 23609 }, { - "epoch": 0.6699772985244041, + "epoch": 0.6690470118167134, "grad_norm": 0.0, - "learning_rate": 5.1890044012583476e-06, - "loss": 0.8391, + "learning_rate": 5.215310964158351e-06, + "loss": 0.85, "step": 23610 }, { - "epoch": 0.670005675368899, + "epoch": 0.6690753492589759, "grad_norm": 0.0, - "learning_rate": 5.188198697117403e-06, - "loss": 0.9051, + "learning_rate": 5.214505069429982e-06, + "loss": 0.8079, "step": 23611 }, { - "epoch": 0.6700340522133938, + "epoch": 0.6691036867012383, "grad_norm": 0.0, - "learning_rate": 5.187393033621967e-06, - "loss": 0.8118, + "learning_rate": 5.213699215011614e-06, + "loss": 0.8908, "step": 23612 }, { - "epoch": 0.6700624290578887, + "epoch": 0.6691320241435008, "grad_norm": 0.0, - "learning_rate": 5.1865874107788336e-06, - "loss": 0.7512, + "learning_rate": 5.2128934009100356e-06, + "loss": 0.9901, "step": 23613 }, { - "epoch": 0.6700908059023837, + "epoch": 0.6691603615857633, "grad_norm": 0.0, - "learning_rate": 5.185781828594811e-06, - "loss": 0.886, + "learning_rate": 5.212087627132032e-06, + "loss": 0.8448, "step": 23614 }, { - "epoch": 0.6701191827468785, + "epoch": 0.6691886990280257, "grad_norm": 0.0, - "learning_rate": 5.184976287076712e-06, - "loss": 0.8412, + "learning_rate": 5.211281893684392e-06, + "loss": 0.7396, "step": 23615 }, { - "epoch": 0.6701475595913734, + "epoch": 0.6692170364702882, "grad_norm": 0.0, - "learning_rate": 5.184170786231328e-06, - "loss": 0.8605, + "learning_rate": 5.2104762005739084e-06, + "loss": 0.8341, "step": 23616 }, { - "epoch": 0.6701759364358684, + "epoch": 0.6692453739125507, "grad_norm": 0.0, - "learning_rate": 5.183365326065472e-06, - "loss": 0.8742, + "learning_rate": 5.209670547807357e-06, + "loss": 0.8026, "step": 23617 }, { - "epoch": 0.6702043132803632, + "epoch": 0.6692737113548131, "grad_norm": 0.0, - "learning_rate": 5.18255990658595e-06, - "loss": 0.8434, + "learning_rate": 5.208864935391536e-06, + "loss": 0.8462, "step": 23618 }, { - "epoch": 0.6702326901248581, + "epoch": 0.6693020487970756, "grad_norm": 0.0, - "learning_rate": 5.181754527799552e-06, - "loss": 0.9024, + "learning_rate": 5.208059363333218e-06, + "loss": 0.8812, "step": 23619 }, { - "epoch": 0.6702610669693531, + "epoch": 0.669330386239338, "grad_norm": 0.0, - "learning_rate": 5.180949189713098e-06, - "loss": 0.9323, + "learning_rate": 5.2072538316391965e-06, + "loss": 0.8881, "step": 23620 }, { - "epoch": 0.6702894438138479, + "epoch": 0.6693587236816005, "grad_norm": 0.0, - "learning_rate": 5.1801438923333805e-06, - "loss": 0.8898, + "learning_rate": 5.2064483403162595e-06, + "loss": 0.9528, "step": 23621 }, { - "epoch": 0.6703178206583428, + "epoch": 0.6693870611238629, "grad_norm": 0.0, - "learning_rate": 5.179338635667201e-06, - "loss": 0.8032, + "learning_rate": 5.205642889371184e-06, + "loss": 0.8599, "step": 23622 }, { - "epoch": 0.6703461975028376, + "epoch": 0.6694153985661254, "grad_norm": 0.0, - "learning_rate": 5.178533419721371e-06, - "loss": 0.7794, + "learning_rate": 5.2048374788107625e-06, + "loss": 0.8675, "step": 23623 }, { - "epoch": 0.6703745743473326, + "epoch": 0.6694437360083879, "grad_norm": 0.0, - "learning_rate": 5.177728244502682e-06, - "loss": 0.9131, + "learning_rate": 5.204032108641772e-06, + "loss": 0.8242, "step": 23624 }, { - "epoch": 0.6704029511918275, + "epoch": 0.6694720734506503, "grad_norm": 0.0, - "learning_rate": 5.176923110017938e-06, - "loss": 0.8162, + "learning_rate": 5.203226778871e-06, + "loss": 0.8475, "step": 23625 }, { - "epoch": 0.6704313280363223, + "epoch": 0.6695004108929128, "grad_norm": 0.0, - "learning_rate": 5.176118016273947e-06, - "loss": 0.8331, + "learning_rate": 5.202421489505231e-06, + "loss": 0.9568, "step": 23626 }, { - "epoch": 0.6704597048808173, + "epoch": 0.6695287483351753, "grad_norm": 0.0, - "learning_rate": 5.175312963277499e-06, - "loss": 0.9911, + "learning_rate": 5.2016162405512464e-06, + "loss": 0.8136, "step": 23627 }, { - "epoch": 0.6704880817253122, + "epoch": 0.6695570857774378, "grad_norm": 0.0, - "learning_rate": 5.174507951035399e-06, - "loss": 0.814, + "learning_rate": 5.20081103201583e-06, + "loss": 0.8313, "step": 23628 }, { - "epoch": 0.670516458569807, + "epoch": 0.6695854232197002, "grad_norm": 0.0, - "learning_rate": 5.173702979554452e-06, - "loss": 0.8305, + "learning_rate": 5.200005863905768e-06, + "loss": 0.8134, "step": 23629 }, { - "epoch": 0.6705448354143019, + "epoch": 0.6696137606619627, "grad_norm": 0.0, - "learning_rate": 5.172898048841448e-06, - "loss": 0.8704, + "learning_rate": 5.199200736227836e-06, + "loss": 0.7627, "step": 23630 }, { - "epoch": 0.6705732122587968, + "epoch": 0.6696420981042251, "grad_norm": 0.0, - "learning_rate": 5.172093158903191e-06, - "loss": 0.841, + "learning_rate": 5.198395648988823e-06, + "loss": 0.8902, "step": 23631 }, { - "epoch": 0.6706015891032917, + "epoch": 0.6696704355464875, "grad_norm": 0.0, - "learning_rate": 5.171288309746485e-06, - "loss": 0.8203, + "learning_rate": 5.1975906021955016e-06, + "loss": 0.9271, "step": 23632 }, { - "epoch": 0.6706299659477866, + "epoch": 0.66969877298875, "grad_norm": 0.0, - "learning_rate": 5.17048350137812e-06, - "loss": 0.8043, + "learning_rate": 5.196785595854659e-06, + "loss": 0.8417, "step": 23633 }, { - "epoch": 0.6706583427922815, + "epoch": 0.6697271104310125, "grad_norm": 0.0, - "learning_rate": 5.169678733804893e-06, - "loss": 0.7727, + "learning_rate": 5.195980629973077e-06, + "loss": 0.8773, "step": 23634 }, { - "epoch": 0.6706867196367764, + "epoch": 0.669755447873275, "grad_norm": 0.0, - "learning_rate": 5.168874007033615e-06, - "loss": 0.8447, + "learning_rate": 5.195175704557531e-06, + "loss": 0.8593, "step": 23635 }, { - "epoch": 0.6707150964812713, + "epoch": 0.6697837853155374, "grad_norm": 0.0, - "learning_rate": 5.168069321071072e-06, - "loss": 0.7327, + "learning_rate": 5.194370819614806e-06, + "loss": 0.8795, "step": 23636 }, { - "epoch": 0.6707434733257662, + "epoch": 0.6698121227577999, "grad_norm": 0.0, - "learning_rate": 5.167264675924068e-06, - "loss": 0.8083, + "learning_rate": 5.193565975151682e-06, + "loss": 0.8241, "step": 23637 }, { - "epoch": 0.6707718501702611, + "epoch": 0.6698404602000624, "grad_norm": 0.0, - "learning_rate": 5.166460071599394e-06, - "loss": 0.8907, + "learning_rate": 5.192761171174934e-06, + "loss": 0.7895, "step": 23638 }, { - "epoch": 0.670800227014756, + "epoch": 0.6698687976423248, "grad_norm": 0.0, - "learning_rate": 5.165655508103848e-06, - "loss": 0.8237, + "learning_rate": 5.191956407691343e-06, + "loss": 0.8492, "step": 23639 }, { - "epoch": 0.6708286038592508, + "epoch": 0.6698971350845873, "grad_norm": 0.0, - "learning_rate": 5.1648509854442305e-06, - "loss": 0.8507, + "learning_rate": 5.191151684707689e-06, + "loss": 0.8823, "step": 23640 }, { - "epoch": 0.6708569807037458, + "epoch": 0.6699254725268498, "grad_norm": 0.0, - "learning_rate": 5.164046503627331e-06, - "loss": 0.851, + "learning_rate": 5.190347002230749e-06, + "loss": 0.8301, "step": 23641 }, { - "epoch": 0.6708853575482406, + "epoch": 0.6699538099691121, "grad_norm": 0.0, - "learning_rate": 5.163242062659947e-06, - "loss": 0.874, + "learning_rate": 5.189542360267308e-06, + "loss": 0.8726, "step": 23642 }, { - "epoch": 0.6709137343927355, + "epoch": 0.6699821474113746, "grad_norm": 0.0, - "learning_rate": 5.16243766254888e-06, - "loss": 0.8995, + "learning_rate": 5.188737758824132e-06, + "loss": 0.8409, "step": 23643 }, { - "epoch": 0.6709421112372305, + "epoch": 0.6700104848536371, "grad_norm": 0.0, - "learning_rate": 5.161633303300916e-06, - "loss": 0.7488, + "learning_rate": 5.187933197908006e-06, + "loss": 0.7854, "step": 23644 }, { - "epoch": 0.6709704880817253, + "epoch": 0.6700388222958996, "grad_norm": 0.0, - "learning_rate": 5.1608289849228525e-06, - "loss": 0.797, + "learning_rate": 5.187128677525709e-06, + "loss": 0.818, "step": 23645 }, { - "epoch": 0.6709988649262202, + "epoch": 0.670067159738162, "grad_norm": 0.0, - "learning_rate": 5.1600247074214875e-06, - "loss": 0.8419, + "learning_rate": 5.18632419768401e-06, + "loss": 0.8979, "step": 23646 }, { - "epoch": 0.671027241770715, + "epoch": 0.6700954971804245, "grad_norm": 0.0, - "learning_rate": 5.159220470803608e-06, - "loss": 0.8064, + "learning_rate": 5.185519758389694e-06, + "loss": 0.8598, "step": 23647 }, { - "epoch": 0.67105561861521, + "epoch": 0.670123834622687, "grad_norm": 0.0, - "learning_rate": 5.1584162750760105e-06, - "loss": 0.7853, + "learning_rate": 5.18471535964953e-06, + "loss": 0.8822, "step": 23648 }, { - "epoch": 0.6710839954597049, + "epoch": 0.6701521720649494, "grad_norm": 0.0, - "learning_rate": 5.1576121202454925e-06, - "loss": 0.8147, + "learning_rate": 5.183911001470296e-06, + "loss": 0.8907, "step": 23649 }, { - "epoch": 0.6711123723041997, + "epoch": 0.6701805095072119, "grad_norm": 0.0, - "learning_rate": 5.156808006318834e-06, - "loss": 0.9078, + "learning_rate": 5.183106683858772e-06, + "loss": 0.7942, "step": 23650 }, { - "epoch": 0.6711407491486947, + "epoch": 0.6702088469494744, "grad_norm": 0.0, - "learning_rate": 5.15600393330284e-06, - "loss": 0.8575, + "learning_rate": 5.182302406821724e-06, + "loss": 0.7835, "step": 23651 }, { - "epoch": 0.6711691259931896, + "epoch": 0.6702371843917369, "grad_norm": 0.0, - "learning_rate": 5.155199901204303e-06, - "loss": 0.8737, + "learning_rate": 5.181498170365933e-06, + "loss": 0.8405, "step": 23652 }, { - "epoch": 0.6711975028376844, + "epoch": 0.6702655218339992, "grad_norm": 0.0, - "learning_rate": 5.154395910030005e-06, - "loss": 0.8806, + "learning_rate": 5.180693974498172e-06, + "loss": 0.8855, "step": 23653 }, { - "epoch": 0.6712258796821794, + "epoch": 0.6702938592762617, "grad_norm": 0.0, - "learning_rate": 5.153591959786745e-06, - "loss": 0.9112, + "learning_rate": 5.179889819225215e-06, + "loss": 0.8628, "step": 23654 }, { - "epoch": 0.6712542565266743, + "epoch": 0.6703221967185242, "grad_norm": 0.0, - "learning_rate": 5.152788050481314e-06, - "loss": 0.9117, + "learning_rate": 5.179085704553841e-06, + "loss": 0.763, "step": 23655 }, { - "epoch": 0.6712826333711691, + "epoch": 0.6703505341607866, "grad_norm": 0.0, - "learning_rate": 5.151984182120496e-06, - "loss": 0.8222, + "learning_rate": 5.178281630490814e-06, + "loss": 0.8284, "step": 23656 }, { - "epoch": 0.671311010215664, + "epoch": 0.6703788716030491, "grad_norm": 0.0, - "learning_rate": 5.151180354711087e-06, - "loss": 0.7693, + "learning_rate": 5.177477597042911e-06, + "loss": 0.8312, "step": 23657 }, { - "epoch": 0.6713393870601589, + "epoch": 0.6704072090453116, "grad_norm": 0.0, - "learning_rate": 5.150376568259879e-06, - "loss": 0.8096, + "learning_rate": 5.176673604216908e-06, + "loss": 0.7937, "step": 23658 }, { - "epoch": 0.6713677639046538, + "epoch": 0.6704355464875741, "grad_norm": 0.0, - "learning_rate": 5.149572822773652e-06, - "loss": 0.8841, + "learning_rate": 5.175869652019571e-06, + "loss": 0.9702, "step": 23659 }, { - "epoch": 0.6713961407491487, + "epoch": 0.6704638839298365, "grad_norm": 0.0, - "learning_rate": 5.148769118259204e-06, - "loss": 0.8185, + "learning_rate": 5.175065740457679e-06, + "loss": 0.7901, "step": 23660 }, { - "epoch": 0.6714245175936436, + "epoch": 0.670492221372099, "grad_norm": 0.0, - "learning_rate": 5.1479654547233246e-06, - "loss": 0.7502, + "learning_rate": 5.174261869537997e-06, + "loss": 0.714, "step": 23661 }, { - "epoch": 0.6714528944381385, + "epoch": 0.6705205588143615, "grad_norm": 0.0, - "learning_rate": 5.147161832172795e-06, - "loss": 0.8371, + "learning_rate": 5.173458039267299e-06, + "loss": 0.7576, "step": 23662 }, { - "epoch": 0.6714812712826334, + "epoch": 0.6705488962566238, "grad_norm": 0.0, - "learning_rate": 5.146358250614412e-06, - "loss": 0.8679, + "learning_rate": 5.172654249652359e-06, + "loss": 0.8451, "step": 23663 }, { - "epoch": 0.6715096481271282, + "epoch": 0.6705772336988863, "grad_norm": 0.0, - "learning_rate": 5.145554710054954e-06, - "loss": 0.859, + "learning_rate": 5.171850500699942e-06, + "loss": 0.7811, "step": 23664 }, { - "epoch": 0.6715380249716232, + "epoch": 0.6706055711411488, "grad_norm": 0.0, - "learning_rate": 5.14475121050121e-06, - "loss": 0.8531, + "learning_rate": 5.17104679241682e-06, + "loss": 0.8963, "step": 23665 }, { - "epoch": 0.671566401816118, + "epoch": 0.6706339085834112, "grad_norm": 0.0, - "learning_rate": 5.143947751959979e-06, - "loss": 0.8892, + "learning_rate": 5.170243124809766e-06, + "loss": 0.7973, "step": 23666 }, { - "epoch": 0.6715947786606129, + "epoch": 0.6706622460256737, "grad_norm": 0.0, - "learning_rate": 5.143144334438035e-06, - "loss": 0.8964, + "learning_rate": 5.169439497885546e-06, + "loss": 0.7609, "step": 23667 }, { - "epoch": 0.6716231555051079, + "epoch": 0.6706905834679362, "grad_norm": 0.0, - "learning_rate": 5.14234095794217e-06, - "loss": 0.8127, + "learning_rate": 5.168635911650937e-06, + "loss": 0.7733, "step": 23668 }, { - "epoch": 0.6716515323496027, + "epoch": 0.6707189209101987, "grad_norm": 0.0, - "learning_rate": 5.1415376224791734e-06, - "loss": 0.8514, + "learning_rate": 5.167832366112695e-06, + "loss": 0.8119, "step": 23669 }, { - "epoch": 0.6716799091940976, + "epoch": 0.6707472583524611, "grad_norm": 0.0, - "learning_rate": 5.140734328055822e-06, - "loss": 0.8279, + "learning_rate": 5.167028861277597e-06, + "loss": 0.8006, "step": 23670 }, { - "epoch": 0.6717082860385926, + "epoch": 0.6707755957947236, "grad_norm": 0.0, - "learning_rate": 5.139931074678907e-06, - "loss": 0.8813, + "learning_rate": 5.166225397152414e-06, + "loss": 0.8847, "step": 23671 }, { - "epoch": 0.6717366628830874, + "epoch": 0.6708039332369861, "grad_norm": 0.0, - "learning_rate": 5.139127862355215e-06, - "loss": 0.8216, + "learning_rate": 5.165421973743905e-06, + "loss": 0.9137, "step": 23672 }, { - "epoch": 0.6717650397275823, + "epoch": 0.6708322706792484, "grad_norm": 0.0, - "learning_rate": 5.138324691091527e-06, - "loss": 0.7129, + "learning_rate": 5.164618591058845e-06, + "loss": 0.8882, "step": 23673 }, { - "epoch": 0.6717934165720771, + "epoch": 0.6708606081215109, "grad_norm": 0.0, - "learning_rate": 5.1375215608946275e-06, - "loss": 0.8909, + "learning_rate": 5.163815249103994e-06, + "loss": 0.8681, "step": 23674 }, { - "epoch": 0.6718217934165721, + "epoch": 0.6708889455637734, "grad_norm": 0.0, - "learning_rate": 5.136718471771307e-06, - "loss": 0.7866, + "learning_rate": 5.163011947886125e-06, + "loss": 0.8794, "step": 23675 }, { - "epoch": 0.671850170261067, + "epoch": 0.6709172830060359, "grad_norm": 0.0, - "learning_rate": 5.135915423728339e-06, - "loss": 0.7991, + "learning_rate": 5.162208687412005e-06, + "loss": 0.8226, "step": 23676 }, { - "epoch": 0.6718785471055618, + "epoch": 0.6709456204482983, "grad_norm": 0.0, - "learning_rate": 5.135112416772512e-06, - "loss": 0.7838, + "learning_rate": 5.161405467688394e-06, + "loss": 0.7866, "step": 23677 }, { - "epoch": 0.6719069239500568, + "epoch": 0.6709739578905608, "grad_norm": 0.0, - "learning_rate": 5.134309450910613e-06, - "loss": 0.8842, + "learning_rate": 5.160602288722062e-06, + "loss": 0.8721, "step": 23678 }, { - "epoch": 0.6719353007945517, + "epoch": 0.6710022953328233, "grad_norm": 0.0, - "learning_rate": 5.133506526149418e-06, - "loss": 0.8447, + "learning_rate": 5.159799150519773e-06, + "loss": 0.8616, "step": 23679 }, { - "epoch": 0.6719636776390465, + "epoch": 0.6710306327750857, "grad_norm": 0.0, - "learning_rate": 5.132703642495711e-06, - "loss": 0.8811, + "learning_rate": 5.158996053088294e-06, + "loss": 0.8755, "step": 23680 }, { - "epoch": 0.6719920544835414, + "epoch": 0.6710589702173482, "grad_norm": 0.0, - "learning_rate": 5.131900799956276e-06, - "loss": 0.8298, + "learning_rate": 5.158192996434393e-06, + "loss": 0.7607, "step": 23681 }, { - "epoch": 0.6720204313280363, + "epoch": 0.6710873076596107, "grad_norm": 0.0, - "learning_rate": 5.131097998537893e-06, - "loss": 0.7886, + "learning_rate": 5.157389980564827e-06, + "loss": 0.8445, "step": 23682 }, { - "epoch": 0.6720488081725312, + "epoch": 0.6711156451018732, "grad_norm": 0.0, - "learning_rate": 5.130295238247349e-06, - "loss": 0.8911, + "learning_rate": 5.156587005486362e-06, + "loss": 0.8591, "step": 23683 }, { - "epoch": 0.6720771850170261, + "epoch": 0.6711439825441355, "grad_norm": 0.0, - "learning_rate": 5.129492519091415e-06, - "loss": 0.9789, + "learning_rate": 5.155784071205767e-06, + "loss": 0.6894, "step": 23684 }, { - "epoch": 0.672105561861521, + "epoch": 0.671172319986398, "grad_norm": 0.0, - "learning_rate": 5.128689841076877e-06, - "loss": 0.8597, + "learning_rate": 5.154981177729799e-06, + "loss": 0.8534, "step": 23685 }, { - "epoch": 0.6721339387060159, + "epoch": 0.6712006574286605, "grad_norm": 0.0, - "learning_rate": 5.12788720421052e-06, - "loss": 0.765, + "learning_rate": 5.154178325065222e-06, + "loss": 0.8455, "step": 23686 }, { - "epoch": 0.6721623155505108, + "epoch": 0.6712289948709229, "grad_norm": 0.0, - "learning_rate": 5.127084608499113e-06, - "loss": 0.7917, + "learning_rate": 5.153375513218806e-06, + "loss": 0.9374, "step": 23687 }, { - "epoch": 0.6721906923950057, + "epoch": 0.6712573323131854, "grad_norm": 0.0, - "learning_rate": 5.126282053949443e-06, - "loss": 0.9498, + "learning_rate": 5.152572742197304e-06, + "loss": 0.8261, "step": 23688 }, { - "epoch": 0.6722190692395006, + "epoch": 0.6712856697554479, "grad_norm": 0.0, - "learning_rate": 5.12547954056829e-06, - "loss": 0.883, + "learning_rate": 5.15177001200748e-06, + "loss": 0.8587, "step": 23689 }, { - "epoch": 0.6722474460839954, + "epoch": 0.6713140071977103, "grad_norm": 0.0, - "learning_rate": 5.124677068362427e-06, - "loss": 0.8419, + "learning_rate": 5.150967322656103e-06, + "loss": 0.8346, "step": 23690 }, { - "epoch": 0.6722758229284903, + "epoch": 0.6713423446399728, "grad_norm": 0.0, - "learning_rate": 5.123874637338637e-06, - "loss": 0.8912, + "learning_rate": 5.150164674149925e-06, + "loss": 0.8252, "step": 23691 }, { - "epoch": 0.6723041997729853, + "epoch": 0.6713706820822353, "grad_norm": 0.0, - "learning_rate": 5.1230722475037e-06, - "loss": 0.8881, + "learning_rate": 5.149362066495709e-06, + "loss": 0.9026, "step": 23692 }, { - "epoch": 0.6723325766174801, + "epoch": 0.6713990195244978, "grad_norm": 0.0, - "learning_rate": 5.122269898864387e-06, - "loss": 0.7569, + "learning_rate": 5.14855949970022e-06, + "loss": 0.8337, "step": 23693 }, { - "epoch": 0.672360953461975, + "epoch": 0.6714273569667601, "grad_norm": 0.0, - "learning_rate": 5.1214675914274805e-06, - "loss": 0.8192, + "learning_rate": 5.147756973770215e-06, + "loss": 0.7999, "step": 23694 }, { - "epoch": 0.67238933030647, + "epoch": 0.6714556944090226, "grad_norm": 0.0, - "learning_rate": 5.120665325199759e-06, - "loss": 0.8718, + "learning_rate": 5.146954488712458e-06, + "loss": 0.8798, "step": 23695 }, { - "epoch": 0.6724177071509648, + "epoch": 0.6714840318512851, "grad_norm": 0.0, - "learning_rate": 5.119863100187989e-06, - "loss": 0.7985, + "learning_rate": 5.146152044533702e-06, + "loss": 0.958, "step": 23696 }, { - "epoch": 0.6724460839954597, + "epoch": 0.6715123692935475, "grad_norm": 0.0, - "learning_rate": 5.1190609163989635e-06, - "loss": 0.9054, + "learning_rate": 5.145349641240714e-06, + "loss": 0.8501, "step": 23697 }, { - "epoch": 0.6724744608399545, + "epoch": 0.67154070673581, "grad_norm": 0.0, - "learning_rate": 5.118258773839446e-06, - "loss": 0.8506, + "learning_rate": 5.144547278840244e-06, + "loss": 0.8731, "step": 23698 }, { - "epoch": 0.6725028376844495, + "epoch": 0.6715690441780725, "grad_norm": 0.0, - "learning_rate": 5.117456672516217e-06, - "loss": 0.897, + "learning_rate": 5.143744957339056e-06, + "loss": 0.8388, "step": 23699 }, { - "epoch": 0.6725312145289444, + "epoch": 0.671597381620335, "grad_norm": 0.0, - "learning_rate": 5.116654612436053e-06, - "loss": 0.8047, + "learning_rate": 5.1429426767439115e-06, + "loss": 0.8605, "step": 23700 }, { - "epoch": 0.6725595913734392, + "epoch": 0.6716257190625974, "grad_norm": 0.0, - "learning_rate": 5.115852593605723e-06, - "loss": 0.8014, + "learning_rate": 5.14214043706156e-06, + "loss": 0.8231, "step": 23701 }, { - "epoch": 0.6725879682179342, + "epoch": 0.6716540565048599, "grad_norm": 0.0, - "learning_rate": 5.115050616032007e-06, - "loss": 0.871, + "learning_rate": 5.141338238298766e-06, + "loss": 0.7256, "step": 23702 }, { - "epoch": 0.6726163450624291, + "epoch": 0.6716823939471224, "grad_norm": 0.0, - "learning_rate": 5.1142486797216816e-06, - "loss": 0.903, + "learning_rate": 5.1405360804622815e-06, + "loss": 0.8894, "step": 23703 }, { - "epoch": 0.6726447219069239, + "epoch": 0.6717107313893848, "grad_norm": 0.0, - "learning_rate": 5.113446784681513e-06, - "loss": 0.972, + "learning_rate": 5.139733963558866e-06, + "loss": 0.7604, "step": 23704 }, { - "epoch": 0.6726730987514189, + "epoch": 0.6717390688316472, "grad_norm": 0.0, - "learning_rate": 5.11264493091828e-06, - "loss": 0.7807, + "learning_rate": 5.1389318875952806e-06, + "loss": 0.9774, "step": 23705 }, { - "epoch": 0.6727014755959138, + "epoch": 0.6717674062739097, "grad_norm": 0.0, - "learning_rate": 5.111843118438757e-06, - "loss": 0.9317, + "learning_rate": 5.138129852578273e-06, + "loss": 0.8363, "step": 23706 }, { - "epoch": 0.6727298524404086, + "epoch": 0.6717957437161722, "grad_norm": 0.0, - "learning_rate": 5.111041347249712e-06, - "loss": 0.8609, + "learning_rate": 5.137327858514605e-06, + "loss": 0.8169, "step": 23707 }, { - "epoch": 0.6727582292849035, + "epoch": 0.6718240811584346, "grad_norm": 0.0, - "learning_rate": 5.110239617357921e-06, - "loss": 0.8554, + "learning_rate": 5.136525905411031e-06, + "loss": 0.865, "step": 23708 }, { - "epoch": 0.6727866061293984, + "epoch": 0.6718524186006971, "grad_norm": 0.0, - "learning_rate": 5.10943792877016e-06, - "loss": 0.8553, + "learning_rate": 5.135723993274304e-06, + "loss": 0.8225, "step": 23709 }, { - "epoch": 0.6728149829738933, + "epoch": 0.6718807560429596, "grad_norm": 0.0, - "learning_rate": 5.108636281493191e-06, - "loss": 0.8687, + "learning_rate": 5.1349221221111826e-06, + "loss": 0.8094, "step": 23710 }, { - "epoch": 0.6728433598183882, + "epoch": 0.671909093485222, "grad_norm": 0.0, - "learning_rate": 5.1078346755337935e-06, - "loss": 0.7935, + "learning_rate": 5.134120291928415e-06, + "loss": 0.8459, "step": 23711 }, { - "epoch": 0.6728717366628831, + "epoch": 0.6719374309274845, "grad_norm": 0.0, - "learning_rate": 5.107033110898735e-06, - "loss": 0.8519, + "learning_rate": 5.133318502732759e-06, + "loss": 0.7923, "step": 23712 }, { - "epoch": 0.672900113507378, + "epoch": 0.671965768369747, "grad_norm": 0.0, - "learning_rate": 5.106231587594788e-06, - "loss": 0.851, + "learning_rate": 5.132516754530973e-06, + "loss": 0.8695, "step": 23713 }, { - "epoch": 0.6729284903518729, + "epoch": 0.6719941058120094, "grad_norm": 0.0, - "learning_rate": 5.105430105628726e-06, - "loss": 0.7863, + "learning_rate": 5.131715047329802e-06, + "loss": 0.8437, "step": 23714 }, { - "epoch": 0.6729568671963677, + "epoch": 0.6720224432542719, "grad_norm": 0.0, - "learning_rate": 5.104628665007313e-06, - "loss": 0.7769, + "learning_rate": 5.130913381136003e-06, + "loss": 0.8755, "step": 23715 }, { - "epoch": 0.6729852440408627, + "epoch": 0.6720507806965343, "grad_norm": 0.0, - "learning_rate": 5.103827265737322e-06, - "loss": 0.771, + "learning_rate": 5.130111755956327e-06, + "loss": 0.9099, "step": 23716 }, { - "epoch": 0.6730136208853575, + "epoch": 0.6720791181387968, "grad_norm": 0.0, - "learning_rate": 5.103025907825525e-06, - "loss": 0.7711, + "learning_rate": 5.1293101717975305e-06, + "loss": 0.8287, "step": 23717 }, { - "epoch": 0.6730419977298524, + "epoch": 0.6721074555810592, "grad_norm": 0.0, - "learning_rate": 5.102224591278685e-06, - "loss": 0.7329, + "learning_rate": 5.128508628666365e-06, + "loss": 0.8539, "step": 23718 }, { - "epoch": 0.6730703745743474, + "epoch": 0.6721357930233217, "grad_norm": 0.0, - "learning_rate": 5.101423316103573e-06, - "loss": 0.791, + "learning_rate": 5.127707126569577e-06, + "loss": 0.8521, "step": 23719 }, { - "epoch": 0.6730987514188422, + "epoch": 0.6721641304655842, "grad_norm": 0.0, - "learning_rate": 5.100622082306964e-06, - "loss": 0.7924, + "learning_rate": 5.126905665513922e-06, + "loss": 0.7702, "step": 23720 }, { - "epoch": 0.6731271282633371, + "epoch": 0.6721924679078466, "grad_norm": 0.0, - "learning_rate": 5.099820889895616e-06, - "loss": 0.7959, + "learning_rate": 5.126104245506153e-06, + "loss": 0.9623, "step": 23721 }, { - "epoch": 0.6731555051078321, + "epoch": 0.6722208053501091, "grad_norm": 0.0, - "learning_rate": 5.0990197388763005e-06, - "loss": 0.8723, + "learning_rate": 5.125302866553015e-06, + "loss": 0.825, "step": 23722 }, { - "epoch": 0.6731838819523269, + "epoch": 0.6722491427923716, "grad_norm": 0.0, - "learning_rate": 5.0982186292557915e-06, - "loss": 0.7873, + "learning_rate": 5.1245015286612586e-06, + "loss": 0.8795, "step": 23723 }, { - "epoch": 0.6732122587968218, + "epoch": 0.6722774802346341, "grad_norm": 0.0, - "learning_rate": 5.097417561040844e-06, - "loss": 0.7515, + "learning_rate": 5.123700231837643e-06, + "loss": 0.8543, "step": 23724 }, { - "epoch": 0.6732406356413166, + "epoch": 0.6723058176768965, "grad_norm": 0.0, - "learning_rate": 5.096616534238232e-06, - "loss": 0.8246, + "learning_rate": 5.122898976088906e-06, + "loss": 0.9529, "step": 23725 }, { - "epoch": 0.6732690124858116, + "epoch": 0.672334155119159, "grad_norm": 0.0, - "learning_rate": 5.0958155488547186e-06, - "loss": 0.803, + "learning_rate": 5.122097761421806e-06, + "loss": 0.8269, "step": 23726 }, { - "epoch": 0.6732973893303065, + "epoch": 0.6723624925614214, "grad_norm": 0.0, - "learning_rate": 5.095014604897075e-06, - "loss": 0.84, + "learning_rate": 5.121296587843084e-06, + "loss": 0.7075, "step": 23727 }, { - "epoch": 0.6733257661748013, + "epoch": 0.6723908300036838, "grad_norm": 0.0, - "learning_rate": 5.094213702372065e-06, - "loss": 0.8549, + "learning_rate": 5.120495455359493e-06, + "loss": 0.8622, "step": 23728 }, { - "epoch": 0.6733541430192963, + "epoch": 0.6724191674459463, "grad_norm": 0.0, - "learning_rate": 5.093412841286449e-06, - "loss": 0.9103, + "learning_rate": 5.11969436397778e-06, + "loss": 0.9746, "step": 23729 }, { - "epoch": 0.6733825198637912, + "epoch": 0.6724475048882088, "grad_norm": 0.0, - "learning_rate": 5.0926120216469956e-06, - "loss": 0.7629, + "learning_rate": 5.118893313704693e-06, + "loss": 0.8323, "step": 23730 }, { - "epoch": 0.673410896708286, + "epoch": 0.6724758423304712, "grad_norm": 0.0, - "learning_rate": 5.091811243460473e-06, - "loss": 0.8178, + "learning_rate": 5.118092304546987e-06, + "loss": 0.8831, "step": 23731 }, { - "epoch": 0.6734392735527809, + "epoch": 0.6725041797727337, "grad_norm": 0.0, - "learning_rate": 5.091010506733637e-06, - "loss": 0.8074, + "learning_rate": 5.117291336511396e-06, + "loss": 0.7449, "step": 23732 }, { - "epoch": 0.6734676503972759, + "epoch": 0.6725325172149962, "grad_norm": 0.0, - "learning_rate": 5.090209811473257e-06, - "loss": 0.8576, + "learning_rate": 5.116490409604674e-06, + "loss": 0.8734, "step": 23733 }, { - "epoch": 0.6734960272417707, + "epoch": 0.6725608546572587, "grad_norm": 0.0, - "learning_rate": 5.089409157686098e-06, - "loss": 0.9092, + "learning_rate": 5.11568952383357e-06, + "loss": 0.8476, "step": 23734 }, { - "epoch": 0.6735244040862656, + "epoch": 0.6725891920995211, "grad_norm": 0.0, - "learning_rate": 5.088608545378917e-06, - "loss": 0.861, + "learning_rate": 5.114888679204824e-06, + "loss": 0.8572, "step": 23735 }, { - "epoch": 0.6735527809307605, + "epoch": 0.6726175295417836, "grad_norm": 0.0, - "learning_rate": 5.087807974558481e-06, - "loss": 0.8418, + "learning_rate": 5.114087875725185e-06, + "loss": 0.8215, "step": 23736 }, { - "epoch": 0.6735811577752554, + "epoch": 0.672645866984046, "grad_norm": 0.0, - "learning_rate": 5.0870074452315555e-06, - "loss": 0.8074, + "learning_rate": 5.1132871134013996e-06, + "loss": 0.8629, "step": 23737 }, { - "epoch": 0.6736095346197503, + "epoch": 0.6726742044263084, "grad_norm": 0.0, - "learning_rate": 5.0862069574048956e-06, - "loss": 0.7659, + "learning_rate": 5.1124863922402104e-06, + "loss": 0.8567, "step": 23738 }, { - "epoch": 0.6736379114642451, + "epoch": 0.6727025418685709, "grad_norm": 0.0, - "learning_rate": 5.085406511085265e-06, - "loss": 0.8237, + "learning_rate": 5.111685712248364e-06, + "loss": 0.8715, "step": 23739 }, { - "epoch": 0.6736662883087401, + "epoch": 0.6727308793108334, "grad_norm": 0.0, - "learning_rate": 5.084606106279431e-06, - "loss": 0.913, + "learning_rate": 5.1108850734326035e-06, + "loss": 0.9169, "step": 23740 }, { - "epoch": 0.673694665153235, + "epoch": 0.6727592167530959, "grad_norm": 0.0, - "learning_rate": 5.083805742994147e-06, - "loss": 0.8466, + "learning_rate": 5.110084475799671e-06, + "loss": 0.7758, "step": 23741 }, { - "epoch": 0.6737230419977298, + "epoch": 0.6727875541953583, "grad_norm": 0.0, - "learning_rate": 5.083005421236173e-06, - "loss": 0.8282, + "learning_rate": 5.109283919356315e-06, + "loss": 0.8125, "step": 23742 }, { - "epoch": 0.6737514188422248, + "epoch": 0.6728158916376208, "grad_norm": 0.0, - "learning_rate": 5.08220514101228e-06, - "loss": 0.7394, + "learning_rate": 5.108483404109275e-06, + "loss": 0.6767, "step": 23743 }, { - "epoch": 0.6737797956867196, + "epoch": 0.6728442290798833, "grad_norm": 0.0, - "learning_rate": 5.081404902329219e-06, - "loss": 0.8926, + "learning_rate": 5.107682930065296e-06, + "loss": 0.8989, "step": 23744 }, { - "epoch": 0.6738081725312145, + "epoch": 0.6728725665221457, "grad_norm": 0.0, - "learning_rate": 5.080604705193754e-06, - "loss": 0.9991, + "learning_rate": 5.1068824972311245e-06, + "loss": 0.9017, "step": 23745 }, { - "epoch": 0.6738365493757095, + "epoch": 0.6729009039644082, "grad_norm": 0.0, - "learning_rate": 5.079804549612639e-06, - "loss": 0.8223, + "learning_rate": 5.106082105613496e-06, + "loss": 0.745, "step": 23746 }, { - "epoch": 0.6738649262202043, + "epoch": 0.6729292414066707, "grad_norm": 0.0, - "learning_rate": 5.079004435592636e-06, - "loss": 0.9263, + "learning_rate": 5.105281755219158e-06, + "loss": 0.8447, "step": 23747 }, { - "epoch": 0.6738933030646992, + "epoch": 0.6729575788489331, "grad_norm": 0.0, - "learning_rate": 5.078204363140507e-06, - "loss": 0.8792, + "learning_rate": 5.104481446054845e-06, + "loss": 0.7881, "step": 23748 }, { - "epoch": 0.673921679909194, + "epoch": 0.6729859162911955, "grad_norm": 0.0, - "learning_rate": 5.077404332263005e-06, - "loss": 0.81, + "learning_rate": 5.103681178127303e-06, + "loss": 0.803, "step": 23749 }, { - "epoch": 0.673950056753689, + "epoch": 0.673014253733458, "grad_norm": 0.0, - "learning_rate": 5.076604342966889e-06, - "loss": 0.8853, + "learning_rate": 5.102880951443277e-06, + "loss": 0.8111, "step": 23750 }, { - "epoch": 0.6739784335981839, + "epoch": 0.6730425911757205, "grad_norm": 0.0, - "learning_rate": 5.075804395258921e-06, - "loss": 0.7675, + "learning_rate": 5.102080766009499e-06, + "loss": 0.9313, "step": 23751 }, { - "epoch": 0.6740068104426787, + "epoch": 0.6730709286179829, "grad_norm": 0.0, - "learning_rate": 5.075004489145851e-06, - "loss": 0.8118, + "learning_rate": 5.1012806218327135e-06, + "loss": 0.8624, "step": 23752 }, { - "epoch": 0.6740351872871737, + "epoch": 0.6730992660602454, "grad_norm": 0.0, - "learning_rate": 5.074204624634439e-06, - "loss": 0.8565, + "learning_rate": 5.100480518919665e-06, + "loss": 0.7888, "step": 23753 }, { - "epoch": 0.6740635641316686, + "epoch": 0.6731276035025079, "grad_norm": 0.0, - "learning_rate": 5.073404801731446e-06, - "loss": 0.8061, + "learning_rate": 5.099680457277083e-06, + "loss": 0.9362, "step": 23754 }, { - "epoch": 0.6740919409761634, + "epoch": 0.6731559409447703, "grad_norm": 0.0, - "learning_rate": 5.07260502044362e-06, - "loss": 0.7482, + "learning_rate": 5.098880436911714e-06, + "loss": 0.7591, "step": 23755 }, { - "epoch": 0.6741203178206583, + "epoch": 0.6731842783870328, "grad_norm": 0.0, - "learning_rate": 5.071805280777722e-06, - "loss": 0.8912, + "learning_rate": 5.098080457830295e-06, + "loss": 0.8662, "step": 23756 }, { - "epoch": 0.6741486946651533, + "epoch": 0.6732126158292953, "grad_norm": 0.0, - "learning_rate": 5.0710055827405045e-06, - "loss": 0.7436, + "learning_rate": 5.097280520039562e-06, + "loss": 0.9429, "step": 23757 }, { - "epoch": 0.6741770715096481, + "epoch": 0.6732409532715578, "grad_norm": 0.0, - "learning_rate": 5.0702059263387275e-06, - "loss": 0.7355, + "learning_rate": 5.0964806235462625e-06, + "loss": 0.9145, "step": 23758 }, { - "epoch": 0.674205448354143, + "epoch": 0.6732692907138201, "grad_norm": 0.0, - "learning_rate": 5.069406311579139e-06, - "loss": 0.9204, + "learning_rate": 5.095680768357123e-06, + "loss": 0.7919, "step": 23759 }, { - "epoch": 0.674233825198638, + "epoch": 0.6732976281560826, "grad_norm": 0.0, - "learning_rate": 5.068606738468503e-06, - "loss": 0.8514, + "learning_rate": 5.094880954478889e-06, + "loss": 0.6976, "step": 23760 }, { - "epoch": 0.6742622020431328, + "epoch": 0.6733259655983451, "grad_norm": 0.0, - "learning_rate": 5.067807207013564e-06, - "loss": 0.9054, + "learning_rate": 5.094081181918291e-06, + "loss": 0.8129, "step": 23761 }, { - "epoch": 0.6742905788876277, + "epoch": 0.6733543030406075, "grad_norm": 0.0, - "learning_rate": 5.067007717221079e-06, - "loss": 0.8273, + "learning_rate": 5.0932814506820685e-06, + "loss": 0.8389, "step": 23762 }, { - "epoch": 0.6743189557321226, + "epoch": 0.67338264048287, "grad_norm": 0.0, - "learning_rate": 5.0662082690978045e-06, - "loss": 0.8763, + "learning_rate": 5.092481760776962e-06, + "loss": 0.8444, "step": 23763 }, { - "epoch": 0.6743473325766175, + "epoch": 0.6734109779251325, "grad_norm": 0.0, - "learning_rate": 5.065408862650487e-06, - "loss": 0.8468, + "learning_rate": 5.0916821122097e-06, + "loss": 0.8969, "step": 23764 }, { - "epoch": 0.6743757094211124, + "epoch": 0.673439315367395, "grad_norm": 0.0, - "learning_rate": 5.064609497885883e-06, - "loss": 0.8454, + "learning_rate": 5.090882504987023e-06, + "loss": 0.8774, "step": 23765 }, { - "epoch": 0.6744040862656072, + "epoch": 0.6734676528096574, "grad_norm": 0.0, - "learning_rate": 5.063810174810748e-06, - "loss": 0.7878, + "learning_rate": 5.090082939115668e-06, + "loss": 0.8121, "step": 23766 }, { - "epoch": 0.6744324631101022, + "epoch": 0.6734959902519199, "grad_norm": 0.0, - "learning_rate": 5.063010893431827e-06, - "loss": 0.7511, + "learning_rate": 5.0892834146023665e-06, + "loss": 0.8059, "step": 23767 }, { - "epoch": 0.674460839954597, + "epoch": 0.6735243276941824, "grad_norm": 0.0, - "learning_rate": 5.062211653755874e-06, - "loss": 0.7321, + "learning_rate": 5.088483931453851e-06, + "loss": 0.8377, "step": 23768 }, { - "epoch": 0.6744892167990919, + "epoch": 0.6735526651364447, "grad_norm": 0.0, - "learning_rate": 5.061412455789645e-06, - "loss": 0.9576, + "learning_rate": 5.087684489676862e-06, + "loss": 0.8483, "step": 23769 }, { - "epoch": 0.6745175936435869, + "epoch": 0.6735810025787072, "grad_norm": 0.0, - "learning_rate": 5.060613299539885e-06, - "loss": 0.8989, + "learning_rate": 5.08688508927813e-06, + "loss": 0.7325, "step": 23770 }, { - "epoch": 0.6745459704880817, + "epoch": 0.6736093400209697, "grad_norm": 0.0, - "learning_rate": 5.059814185013348e-06, - "loss": 0.8297, + "learning_rate": 5.086085730264392e-06, + "loss": 0.9034, "step": 23771 }, { - "epoch": 0.6745743473325766, + "epoch": 0.6736376774632322, "grad_norm": 0.0, - "learning_rate": 5.059015112216776e-06, - "loss": 0.8802, + "learning_rate": 5.085286412642376e-06, + "loss": 0.8206, "step": 23772 }, { - "epoch": 0.6746027241770715, + "epoch": 0.6736660149054946, "grad_norm": 0.0, - "learning_rate": 5.058216081156929e-06, - "loss": 0.8581, + "learning_rate": 5.084487136418816e-06, + "loss": 0.7859, "step": 23773 }, { - "epoch": 0.6746311010215664, + "epoch": 0.6736943523477571, "grad_norm": 0.0, - "learning_rate": 5.057417091840558e-06, - "loss": 0.7391, + "learning_rate": 5.083687901600452e-06, + "loss": 0.8461, "step": 23774 }, { - "epoch": 0.6746594778660613, + "epoch": 0.6737226897900196, "grad_norm": 0.0, - "learning_rate": 5.056618144274401e-06, - "loss": 0.8099, + "learning_rate": 5.082888708194006e-06, + "loss": 0.881, "step": 23775 }, { - "epoch": 0.6746878547105561, + "epoch": 0.673751027232282, "grad_norm": 0.0, - "learning_rate": 5.055819238465214e-06, - "loss": 0.8511, + "learning_rate": 5.082089556206218e-06, + "loss": 0.9337, "step": 23776 }, { - "epoch": 0.6747162315550511, + "epoch": 0.6737793646745445, "grad_norm": 0.0, - "learning_rate": 5.055020374419748e-06, - "loss": 0.8813, + "learning_rate": 5.081290445643812e-06, + "loss": 0.8087, "step": 23777 }, { - "epoch": 0.674744608399546, + "epoch": 0.673807702116807, "grad_norm": 0.0, - "learning_rate": 5.0542215521447445e-06, - "loss": 0.9001, + "learning_rate": 5.080491376513523e-06, + "loss": 0.8075, "step": 23778 }, { - "epoch": 0.6747729852440408, + "epoch": 0.6738360395590693, "grad_norm": 0.0, - "learning_rate": 5.053422771646952e-06, - "loss": 0.8074, + "learning_rate": 5.079692348822085e-06, + "loss": 0.8691, "step": 23779 }, { - "epoch": 0.6748013620885358, + "epoch": 0.6738643770013318, "grad_norm": 0.0, - "learning_rate": 5.052624032933124e-06, - "loss": 0.862, + "learning_rate": 5.078893362576223e-06, + "loss": 0.914, "step": 23780 }, { - "epoch": 0.6748297389330307, + "epoch": 0.6738927144435943, "grad_norm": 0.0, - "learning_rate": 5.05182533601e-06, - "loss": 0.8387, + "learning_rate": 5.078094417782669e-06, + "loss": 0.9131, "step": 23781 }, { - "epoch": 0.6748581157775255, + "epoch": 0.6739210518858568, "grad_norm": 0.0, - "learning_rate": 5.05102668088433e-06, - "loss": 0.8973, + "learning_rate": 5.077295514448154e-06, + "loss": 0.8436, "step": 23782 }, { - "epoch": 0.6748864926220204, + "epoch": 0.6739493893281192, "grad_norm": 0.0, - "learning_rate": 5.0502280675628625e-06, - "loss": 0.8526, + "learning_rate": 5.076496652579406e-06, + "loss": 0.8149, "step": 23783 }, { - "epoch": 0.6749148694665154, + "epoch": 0.6739777267703817, "grad_norm": 0.0, - "learning_rate": 5.049429496052338e-06, - "loss": 0.8123, + "learning_rate": 5.07569783218316e-06, + "loss": 0.8433, "step": 23784 }, { - "epoch": 0.6749432463110102, + "epoch": 0.6740060642126442, "grad_norm": 0.0, - "learning_rate": 5.048630966359505e-06, - "loss": 0.8505, + "learning_rate": 5.0748990532661345e-06, + "loss": 0.8699, "step": 23785 }, { - "epoch": 0.6749716231555051, + "epoch": 0.6740344016549066, "grad_norm": 0.0, - "learning_rate": 5.047832478491113e-06, - "loss": 0.8225, + "learning_rate": 5.074100315835064e-06, + "loss": 0.7818, "step": 23786 }, { - "epoch": 0.675, + "epoch": 0.6740627390971691, "grad_norm": 0.0, - "learning_rate": 5.0470340324538995e-06, - "loss": 0.7423, + "learning_rate": 5.073301619896679e-06, + "loss": 0.9143, "step": 23787 }, { - "epoch": 0.6750283768444949, + "epoch": 0.6740910765394316, "grad_norm": 0.0, - "learning_rate": 5.04623562825461e-06, - "loss": 0.8984, + "learning_rate": 5.072502965457701e-06, + "loss": 0.8952, "step": 23788 }, { - "epoch": 0.6750567536889898, + "epoch": 0.6741194139816941, "grad_norm": 0.0, - "learning_rate": 5.045437265899993e-06, - "loss": 0.7851, + "learning_rate": 5.071704352524863e-06, + "loss": 0.9445, "step": 23789 }, { - "epoch": 0.6750851305334846, + "epoch": 0.6741477514239564, "grad_norm": 0.0, - "learning_rate": 5.044638945396789e-06, - "loss": 0.8659, + "learning_rate": 5.070905781104887e-06, + "loss": 0.8851, "step": 23790 }, { - "epoch": 0.6751135073779796, + "epoch": 0.6741760888662189, "grad_norm": 0.0, - "learning_rate": 5.043840666751747e-06, - "loss": 0.8409, + "learning_rate": 5.0701072512045e-06, + "loss": 0.7864, "step": 23791 }, { - "epoch": 0.6751418842224745, + "epoch": 0.6742044263084814, "grad_norm": 0.0, - "learning_rate": 5.0430424299716005e-06, - "loss": 0.8529, + "learning_rate": 5.069308762830436e-06, + "loss": 0.7656, "step": 23792 }, { - "epoch": 0.6751702610669693, + "epoch": 0.6742327637507438, "grad_norm": 0.0, - "learning_rate": 5.042244235063098e-06, - "loss": 0.8716, + "learning_rate": 5.06851031598941e-06, + "loss": 0.8435, "step": 23793 }, { - "epoch": 0.6751986379114643, + "epoch": 0.6742611011930063, "grad_norm": 0.0, - "learning_rate": 5.041446082032986e-06, - "loss": 0.8172, + "learning_rate": 5.067711910688153e-06, + "loss": 0.8306, "step": 23794 }, { - "epoch": 0.6752270147559591, + "epoch": 0.6742894386352688, "grad_norm": 0.0, - "learning_rate": 5.040647970887998e-06, - "loss": 0.8062, + "learning_rate": 5.066913546933392e-06, + "loss": 0.8668, "step": 23795 }, { - "epoch": 0.675255391600454, + "epoch": 0.6743177760775313, "grad_norm": 0.0, - "learning_rate": 5.039849901634879e-06, - "loss": 0.8671, + "learning_rate": 5.066115224731848e-06, + "loss": 0.9763, "step": 23796 }, { - "epoch": 0.675283768444949, + "epoch": 0.6743461135197937, "grad_norm": 0.0, - "learning_rate": 5.039051874280374e-06, - "loss": 0.8515, + "learning_rate": 5.065316944090253e-06, + "loss": 0.8571, "step": 23797 }, { - "epoch": 0.6753121452894438, + "epoch": 0.6743744509620562, "grad_norm": 0.0, - "learning_rate": 5.038253888831219e-06, - "loss": 0.8509, + "learning_rate": 5.06451870501532e-06, + "loss": 0.8877, "step": 23798 }, { - "epoch": 0.6753405221339387, + "epoch": 0.6744027884043187, "grad_norm": 0.0, - "learning_rate": 5.0374559452941544e-06, - "loss": 0.8891, + "learning_rate": 5.063720507513781e-06, + "loss": 0.7797, "step": 23799 }, { - "epoch": 0.6753688989784336, + "epoch": 0.674431125846581, "grad_norm": 0.0, - "learning_rate": 5.036658043675928e-06, - "loss": 0.907, + "learning_rate": 5.062922351592359e-06, + "loss": 0.7705, "step": 23800 }, { - "epoch": 0.6753972758229285, + "epoch": 0.6744594632888435, "grad_norm": 0.0, - "learning_rate": 5.03586018398327e-06, - "loss": 0.808, + "learning_rate": 5.062124237257772e-06, + "loss": 0.8712, "step": 23801 }, { - "epoch": 0.6754256526674234, + "epoch": 0.674487800731106, "grad_norm": 0.0, - "learning_rate": 5.0350623662229225e-06, - "loss": 0.8745, + "learning_rate": 5.061326164516747e-06, + "loss": 0.7686, "step": 23802 }, { - "epoch": 0.6754540295119182, + "epoch": 0.6745161381733684, "grad_norm": 0.0, - "learning_rate": 5.034264590401627e-06, - "loss": 0.8736, + "learning_rate": 5.060528133376009e-06, + "loss": 0.8627, "step": 23803 }, { - "epoch": 0.6754824063564132, + "epoch": 0.6745444756156309, "grad_norm": 0.0, - "learning_rate": 5.033466856526124e-06, - "loss": 0.8402, + "learning_rate": 5.059730143842273e-06, + "loss": 0.8971, "step": 23804 }, { - "epoch": 0.6755107832009081, + "epoch": 0.6745728130578934, "grad_norm": 0.0, - "learning_rate": 5.032669164603152e-06, - "loss": 0.762, + "learning_rate": 5.058932195922268e-06, + "loss": 0.8422, "step": 23805 }, { - "epoch": 0.6755391600454029, + "epoch": 0.6746011505001559, "grad_norm": 0.0, - "learning_rate": 5.031871514639443e-06, - "loss": 0.8497, + "learning_rate": 5.058134289622709e-06, + "loss": 0.9543, "step": 23806 }, { - "epoch": 0.6755675368898978, + "epoch": 0.6746294879424183, "grad_norm": 0.0, - "learning_rate": 5.031073906641738e-06, - "loss": 0.8243, + "learning_rate": 5.057336424950319e-06, + "loss": 0.8662, "step": 23807 }, { - "epoch": 0.6755959137343928, + "epoch": 0.6746578253846808, "grad_norm": 0.0, - "learning_rate": 5.03027634061678e-06, - "loss": 0.8261, + "learning_rate": 5.05653860191182e-06, + "loss": 0.885, "step": 23808 }, { - "epoch": 0.6756242905788876, + "epoch": 0.6746861628269433, "grad_norm": 0.0, - "learning_rate": 5.029478816571296e-06, - "loss": 0.7689, + "learning_rate": 5.055740820513932e-06, + "loss": 0.7328, "step": 23809 }, { - "epoch": 0.6756526674233825, + "epoch": 0.6747145002692057, "grad_norm": 0.0, - "learning_rate": 5.028681334512029e-06, - "loss": 0.896, + "learning_rate": 5.054943080763376e-06, + "loss": 0.7733, "step": 23810 }, { - "epoch": 0.6756810442678775, + "epoch": 0.6747428377114681, "grad_norm": 0.0, - "learning_rate": 5.027883894445717e-06, - "loss": 0.8026, + "learning_rate": 5.054145382666874e-06, + "loss": 0.8948, "step": 23811 }, { - "epoch": 0.6757094211123723, + "epoch": 0.6747711751537306, "grad_norm": 0.0, - "learning_rate": 5.027086496379091e-06, - "loss": 0.888, + "learning_rate": 5.0533477262311384e-06, + "loss": 0.8032, "step": 23812 }, { - "epoch": 0.6757377979568672, + "epoch": 0.6747995125959931, "grad_norm": 0.0, - "learning_rate": 5.026289140318889e-06, - "loss": 0.869, + "learning_rate": 5.052550111462895e-06, + "loss": 0.844, "step": 23813 }, { - "epoch": 0.6757661748013621, + "epoch": 0.6748278500382555, "grad_norm": 0.0, - "learning_rate": 5.025491826271849e-06, - "loss": 0.8315, + "learning_rate": 5.051752538368855e-06, + "loss": 0.8369, "step": 23814 }, { - "epoch": 0.675794551645857, + "epoch": 0.674856187480518, "grad_norm": 0.0, - "learning_rate": 5.024694554244699e-06, - "loss": 0.8192, + "learning_rate": 5.050955006955742e-06, + "loss": 0.8134, "step": 23815 }, { - "epoch": 0.6758229284903519, + "epoch": 0.6748845249227805, "grad_norm": 0.0, - "learning_rate": 5.023897324244178e-06, - "loss": 0.7905, + "learning_rate": 5.050157517230276e-06, + "loss": 0.8497, "step": 23816 }, { - "epoch": 0.6758513053348467, + "epoch": 0.6749128623650429, "grad_norm": 0.0, - "learning_rate": 5.023100136277024e-06, - "loss": 0.8328, + "learning_rate": 5.049360069199167e-06, + "loss": 0.849, "step": 23817 }, { - "epoch": 0.6758796821793417, + "epoch": 0.6749411998073054, "grad_norm": 0.0, - "learning_rate": 5.0223029903499595e-06, - "loss": 0.902, + "learning_rate": 5.048562662869142e-06, + "loss": 0.9566, "step": 23818 }, { - "epoch": 0.6759080590238365, + "epoch": 0.6749695372495679, "grad_norm": 0.0, - "learning_rate": 5.021505886469733e-06, - "loss": 0.8749, + "learning_rate": 5.047765298246907e-06, + "loss": 0.8498, "step": 23819 }, { - "epoch": 0.6759364358683314, + "epoch": 0.6749978746918304, "grad_norm": 0.0, - "learning_rate": 5.020708824643066e-06, - "loss": 0.9185, + "learning_rate": 5.046967975339184e-06, + "loss": 0.7258, "step": 23820 }, { - "epoch": 0.6759648127128264, + "epoch": 0.6750262121340928, "grad_norm": 0.0, - "learning_rate": 5.019911804876696e-06, - "loss": 0.8863, + "learning_rate": 5.04617069415269e-06, + "loss": 0.8142, "step": 23821 }, { - "epoch": 0.6759931895573212, + "epoch": 0.6750545495763552, "grad_norm": 0.0, - "learning_rate": 5.019114827177358e-06, - "loss": 0.7816, + "learning_rate": 5.045373454694139e-06, + "loss": 0.8282, "step": 23822 }, { - "epoch": 0.6760215664018161, + "epoch": 0.6750828870186177, "grad_norm": 0.0, - "learning_rate": 5.018317891551777e-06, - "loss": 0.8681, + "learning_rate": 5.044576256970247e-06, + "loss": 0.8635, "step": 23823 }, { - "epoch": 0.676049943246311, + "epoch": 0.6751112244608801, "grad_norm": 0.0, - "learning_rate": 5.01752099800669e-06, - "loss": 0.8, + "learning_rate": 5.0437791009877335e-06, + "loss": 0.7673, "step": 23824 }, { - "epoch": 0.6760783200908059, + "epoch": 0.6751395619031426, "grad_norm": 0.0, - "learning_rate": 5.016724146548829e-06, - "loss": 0.7757, + "learning_rate": 5.042981986753306e-06, + "loss": 0.8812, "step": 23825 }, { - "epoch": 0.6761066969353008, + "epoch": 0.6751678993454051, "grad_norm": 0.0, - "learning_rate": 5.01592733718492e-06, - "loss": 0.7443, + "learning_rate": 5.042184914273685e-06, + "loss": 0.8266, "step": 23826 }, { - "epoch": 0.6761350737797956, + "epoch": 0.6751962367876675, "grad_norm": 0.0, - "learning_rate": 5.015130569921698e-06, - "loss": 0.9156, + "learning_rate": 5.041387883555577e-06, + "loss": 0.8131, "step": 23827 }, { - "epoch": 0.6761634506242906, + "epoch": 0.67522457422993, "grad_norm": 0.0, - "learning_rate": 5.014333844765895e-06, - "loss": 0.9457, + "learning_rate": 5.040590894605701e-06, + "loss": 0.8377, "step": 23828 }, { - "epoch": 0.6761918274687855, + "epoch": 0.6752529116721925, "grad_norm": 0.0, - "learning_rate": 5.013537161724234e-06, - "loss": 0.7917, + "learning_rate": 5.039793947430774e-06, + "loss": 0.8146, "step": 23829 }, { - "epoch": 0.6762202043132803, + "epoch": 0.675281249114455, "grad_norm": 0.0, - "learning_rate": 5.01274052080345e-06, - "loss": 0.9008, + "learning_rate": 5.0389970420375e-06, + "loss": 0.7974, "step": 23830 }, { - "epoch": 0.6762485811577753, + "epoch": 0.6753095865567174, "grad_norm": 0.0, - "learning_rate": 5.011943922010274e-06, - "loss": 0.8549, + "learning_rate": 5.0382001784325966e-06, + "loss": 0.8352, "step": 23831 }, { - "epoch": 0.6762769580022702, + "epoch": 0.6753379239989798, "grad_norm": 0.0, - "learning_rate": 5.011147365351427e-06, - "loss": 0.8673, + "learning_rate": 5.037403356622779e-06, + "loss": 0.8939, "step": 23832 }, { - "epoch": 0.676305334846765, + "epoch": 0.6753662614412423, "grad_norm": 0.0, - "learning_rate": 5.010350850833645e-06, - "loss": 0.9442, + "learning_rate": 5.036606576614751e-06, + "loss": 0.8337, "step": 23833 }, { - "epoch": 0.6763337116912599, + "epoch": 0.6753945988835047, "grad_norm": 0.0, - "learning_rate": 5.009554378463652e-06, - "loss": 0.8607, + "learning_rate": 5.035809838415231e-06, + "loss": 0.8076, "step": 23834 }, { - "epoch": 0.6763620885357549, + "epoch": 0.6754229363257672, "grad_norm": 0.0, - "learning_rate": 5.008757948248177e-06, - "loss": 0.8736, + "learning_rate": 5.0350131420309265e-06, + "loss": 0.8427, "step": 23835 }, { - "epoch": 0.6763904653802497, + "epoch": 0.6754512737680297, "grad_norm": 0.0, - "learning_rate": 5.007961560193952e-06, - "loss": 0.7517, + "learning_rate": 5.034216487468551e-06, + "loss": 0.8815, "step": 23836 }, { - "epoch": 0.6764188422247446, + "epoch": 0.6754796112102922, "grad_norm": 0.0, - "learning_rate": 5.007165214307699e-06, - "loss": 0.8498, + "learning_rate": 5.033419874734816e-06, + "loss": 0.8548, "step": 23837 }, { - "epoch": 0.6764472190692395, + "epoch": 0.6755079486525546, "grad_norm": 0.0, - "learning_rate": 5.006368910596142e-06, - "loss": 0.8514, + "learning_rate": 5.032623303836428e-06, + "loss": 0.813, "step": 23838 }, { - "epoch": 0.6764755959137344, + "epoch": 0.6755362860948171, "grad_norm": 0.0, - "learning_rate": 5.005572649066017e-06, - "loss": 0.8353, + "learning_rate": 5.031826774780098e-06, + "loss": 0.743, "step": 23839 }, { - "epoch": 0.6765039727582293, + "epoch": 0.6755646235370796, "grad_norm": 0.0, - "learning_rate": 5.004776429724041e-06, - "loss": 0.9066, + "learning_rate": 5.031030287572538e-06, + "loss": 0.8103, "step": 23840 }, { - "epoch": 0.6765323496027241, + "epoch": 0.675592960979342, "grad_norm": 0.0, - "learning_rate": 5.003980252576942e-06, - "loss": 0.7499, + "learning_rate": 5.030233842220453e-06, + "loss": 0.8505, "step": 23841 }, { - "epoch": 0.6765607264472191, + "epoch": 0.6756212984216045, "grad_norm": 0.0, - "learning_rate": 5.003184117631451e-06, - "loss": 0.8356, + "learning_rate": 5.029437438730558e-06, + "loss": 0.8787, "step": 23842 }, { - "epoch": 0.676589103291714, + "epoch": 0.675649635863867, "grad_norm": 0.0, - "learning_rate": 5.002388024894285e-06, - "loss": 0.7999, + "learning_rate": 5.028641077109553e-06, + "loss": 0.8056, "step": 23843 }, { - "epoch": 0.6766174801362088, + "epoch": 0.6756779733061294, "grad_norm": 0.0, - "learning_rate": 5.001591974372171e-06, - "loss": 0.7255, + "learning_rate": 5.02784475736415e-06, + "loss": 0.775, "step": 23844 }, { - "epoch": 0.6766458569807038, + "epoch": 0.6757063107483918, "grad_norm": 0.0, - "learning_rate": 5.000795966071837e-06, - "loss": 0.8762, + "learning_rate": 5.0270484795010595e-06, + "loss": 0.7693, "step": 23845 }, { - "epoch": 0.6766742338251986, + "epoch": 0.6757346481906543, "grad_norm": 0.0, - "learning_rate": 5.000000000000003e-06, - "loss": 0.871, + "learning_rate": 5.026252243526984e-06, + "loss": 0.8539, "step": 23846 }, { - "epoch": 0.6767026106696935, + "epoch": 0.6757629856329168, "grad_norm": 0.0, - "learning_rate": 4.999204076163392e-06, - "loss": 0.7698, + "learning_rate": 5.025456049448634e-06, + "loss": 0.8804, "step": 23847 }, { - "epoch": 0.6767309875141885, + "epoch": 0.6757913230751792, "grad_norm": 0.0, - "learning_rate": 4.998408194568734e-06, - "loss": 0.8016, + "learning_rate": 5.0246598972727125e-06, + "loss": 0.7795, "step": 23848 }, { - "epoch": 0.6767593643586833, + "epoch": 0.6758196605174417, "grad_norm": 0.0, - "learning_rate": 4.997612355222738e-06, - "loss": 0.7672, + "learning_rate": 5.0238637870059296e-06, + "loss": 0.7794, "step": 23849 }, { - "epoch": 0.6767877412031782, + "epoch": 0.6758479979597042, "grad_norm": 0.0, - "learning_rate": 4.996816558132139e-06, - "loss": 0.8046, + "learning_rate": 5.023067718654994e-06, + "loss": 0.8549, "step": 23850 }, { - "epoch": 0.676816118047673, + "epoch": 0.6758763354019666, "grad_norm": 0.0, - "learning_rate": 4.996020803303659e-06, - "loss": 0.8351, + "learning_rate": 5.022271692226602e-06, + "loss": 0.793, "step": 23851 }, { - "epoch": 0.676844494892168, + "epoch": 0.6759046728442291, "grad_norm": 0.0, - "learning_rate": 4.995225090744014e-06, - "loss": 0.8246, + "learning_rate": 5.021475707727466e-06, + "loss": 0.7927, "step": 23852 }, { - "epoch": 0.6768728717366629, + "epoch": 0.6759330102864916, "grad_norm": 0.0, - "learning_rate": 4.994429420459931e-06, - "loss": 0.8277, + "learning_rate": 5.02067976516429e-06, + "loss": 0.8199, "step": 23853 }, { - "epoch": 0.6769012485811577, + "epoch": 0.675961347728754, "grad_norm": 0.0, - "learning_rate": 4.993633792458123e-06, - "loss": 0.833, + "learning_rate": 5.019883864543776e-06, + "loss": 0.9016, "step": 23854 }, { - "epoch": 0.6769296254256527, + "epoch": 0.6759896851710164, "grad_norm": 0.0, - "learning_rate": 4.992838206745316e-06, - "loss": 0.8465, + "learning_rate": 5.019088005872632e-06, + "loss": 0.7827, "step": 23855 }, { - "epoch": 0.6769580022701476, + "epoch": 0.6760180226132789, "grad_norm": 0.0, - "learning_rate": 4.992042663328234e-06, - "loss": 0.7373, + "learning_rate": 5.018292189157555e-06, + "loss": 0.8128, "step": 23856 }, { - "epoch": 0.6769863791146424, + "epoch": 0.6760463600555414, "grad_norm": 0.0, - "learning_rate": 4.991247162213588e-06, - "loss": 0.9135, + "learning_rate": 5.017496414405254e-06, + "loss": 0.8806, "step": 23857 }, { - "epoch": 0.6770147559591373, + "epoch": 0.6760746974978038, "grad_norm": 0.0, - "learning_rate": 4.990451703408104e-06, - "loss": 0.8677, + "learning_rate": 5.016700681622434e-06, + "loss": 0.8531, "step": 23858 }, { - "epoch": 0.6770431328036323, + "epoch": 0.6761030349400663, "grad_norm": 0.0, - "learning_rate": 4.989656286918502e-06, - "loss": 0.8861, + "learning_rate": 5.015904990815792e-06, + "loss": 0.8354, "step": 23859 }, { - "epoch": 0.6770715096481271, + "epoch": 0.6761313723823288, "grad_norm": 0.0, - "learning_rate": 4.988860912751494e-06, - "loss": 0.8144, + "learning_rate": 5.015109341992032e-06, + "loss": 0.8177, "step": 23860 }, { - "epoch": 0.677099886492622, + "epoch": 0.6761597098245913, "grad_norm": 0.0, - "learning_rate": 4.988065580913804e-06, - "loss": 0.7518, + "learning_rate": 5.014313735157856e-06, + "loss": 0.8887, "step": 23861 }, { - "epoch": 0.677128263337117, + "epoch": 0.6761880472668537, "grad_norm": 0.0, - "learning_rate": 4.987270291412154e-06, - "loss": 0.8728, + "learning_rate": 5.013518170319968e-06, + "loss": 0.866, "step": 23862 }, { - "epoch": 0.6771566401816118, + "epoch": 0.6762163847091162, "grad_norm": 0.0, - "learning_rate": 4.986475044253253e-06, - "loss": 0.8013, + "learning_rate": 5.012722647485073e-06, + "loss": 0.9042, "step": 23863 }, { - "epoch": 0.6771850170261067, + "epoch": 0.6762447221513787, "grad_norm": 0.0, - "learning_rate": 4.985679839443819e-06, - "loss": 0.8428, + "learning_rate": 5.011927166659862e-06, + "loss": 0.8563, "step": 23864 }, { - "epoch": 0.6772133938706016, + "epoch": 0.676273059593641, "grad_norm": 0.0, - "learning_rate": 4.98488467699058e-06, - "loss": 0.8931, + "learning_rate": 5.011131727851042e-06, + "loss": 0.7808, "step": 23865 }, { - "epoch": 0.6772417707150965, + "epoch": 0.6763013970359035, "grad_norm": 0.0, - "learning_rate": 4.984089556900241e-06, - "loss": 0.8259, + "learning_rate": 5.0103363310653154e-06, + "loss": 0.8464, "step": 23866 }, { - "epoch": 0.6772701475595914, + "epoch": 0.676329734478166, "grad_norm": 0.0, - "learning_rate": 4.983294479179525e-06, - "loss": 0.8504, + "learning_rate": 5.0095409763093725e-06, + "loss": 0.8112, "step": 23867 }, { - "epoch": 0.6772985244040862, + "epoch": 0.6763580719204285, "grad_norm": 0.0, - "learning_rate": 4.982499443835148e-06, - "loss": 0.8342, + "learning_rate": 5.008745663589922e-06, + "loss": 0.7828, "step": 23868 }, { - "epoch": 0.6773269012485812, + "epoch": 0.6763864093626909, "grad_norm": 0.0, - "learning_rate": 4.981704450873821e-06, - "loss": 0.7015, + "learning_rate": 5.007950392913663e-06, + "loss": 0.8453, "step": 23869 }, { - "epoch": 0.677355278093076, + "epoch": 0.6764147468049534, "grad_norm": 0.0, - "learning_rate": 4.980909500302261e-06, - "loss": 0.8713, + "learning_rate": 5.007155164287289e-06, + "loss": 0.8742, "step": 23870 }, { - "epoch": 0.6773836549375709, + "epoch": 0.6764430842472159, "grad_norm": 0.0, - "learning_rate": 4.980114592127188e-06, - "loss": 0.8015, + "learning_rate": 5.006359977717503e-06, + "loss": 0.9038, "step": 23871 }, { - "epoch": 0.6774120317820659, + "epoch": 0.6764714216894783, "grad_norm": 0.0, - "learning_rate": 4.9793197263553105e-06, - "loss": 0.8378, + "learning_rate": 5.005564833210998e-06, + "loss": 0.7279, "step": 23872 }, { - "epoch": 0.6774404086265607, + "epoch": 0.6764997591317408, "grad_norm": 0.0, - "learning_rate": 4.978524902993343e-06, - "loss": 0.8518, + "learning_rate": 5.004769730774476e-06, + "loss": 0.8173, "step": 23873 }, { - "epoch": 0.6774687854710556, + "epoch": 0.6765280965740033, "grad_norm": 0.0, - "learning_rate": 4.977730122048007e-06, - "loss": 0.8181, + "learning_rate": 5.003974670414633e-06, + "loss": 0.6814, "step": 23874 }, { - "epoch": 0.6774971623155505, + "epoch": 0.6765564340162656, "grad_norm": 0.0, - "learning_rate": 4.976935383526006e-06, - "loss": 0.8254, + "learning_rate": 5.003179652138166e-06, + "loss": 0.7803, "step": 23875 }, { - "epoch": 0.6775255391600454, + "epoch": 0.6765847714585281, "grad_norm": 0.0, - "learning_rate": 4.9761406874340565e-06, - "loss": 0.8663, + "learning_rate": 5.002384675951777e-06, + "loss": 0.8645, "step": 23876 }, { - "epoch": 0.6775539160045403, + "epoch": 0.6766131089007906, "grad_norm": 0.0, - "learning_rate": 4.975346033778877e-06, - "loss": 0.8137, + "learning_rate": 5.001589741862153e-06, + "loss": 0.8442, "step": 23877 }, { - "epoch": 0.6775822928490352, + "epoch": 0.6766414463430531, "grad_norm": 0.0, - "learning_rate": 4.974551422567172e-06, - "loss": 0.8473, + "learning_rate": 5.0007948498759954e-06, + "loss": 0.8901, "step": 23878 }, { - "epoch": 0.6776106696935301, + "epoch": 0.6766697837853155, "grad_norm": 0.0, - "learning_rate": 4.973756853805661e-06, - "loss": 0.9009, + "learning_rate": 5.000000000000003e-06, + "loss": 0.8893, "step": 23879 }, { - "epoch": 0.677639046538025, + "epoch": 0.676698121227578, "grad_norm": 0.0, - "learning_rate": 4.9729623275010444e-06, - "loss": 0.848, + "learning_rate": 4.999205192240863e-06, + "loss": 0.8224, "step": 23880 }, { - "epoch": 0.6776674233825198, + "epoch": 0.6767264586698405, "grad_norm": 0.0, - "learning_rate": 4.9721678436600444e-06, - "loss": 0.8439, + "learning_rate": 4.998410426605274e-06, + "loss": 0.9288, "step": 23881 }, { - "epoch": 0.6776958002270148, + "epoch": 0.6767547961121029, "grad_norm": 0.0, - "learning_rate": 4.971373402289371e-06, - "loss": 0.7664, + "learning_rate": 4.9976157030999365e-06, + "loss": 0.855, "step": 23882 }, { - "epoch": 0.6777241770715097, + "epoch": 0.6767831335543654, "grad_norm": 0.0, - "learning_rate": 4.97057900339573e-06, - "loss": 0.827, + "learning_rate": 4.996821021731535e-06, + "loss": 0.7868, "step": 23883 }, { - "epoch": 0.6777525539160045, + "epoch": 0.6768114709966279, "grad_norm": 0.0, - "learning_rate": 4.969784646985834e-06, - "loss": 0.7586, + "learning_rate": 4.9960263825067725e-06, + "loss": 0.7689, "step": 23884 }, { - "epoch": 0.6777809307604994, + "epoch": 0.6768398084388904, "grad_norm": 0.0, - "learning_rate": 4.968990333066398e-06, - "loss": 0.8526, + "learning_rate": 4.995231785432333e-06, + "loss": 0.8869, "step": 23885 }, { - "epoch": 0.6778093076049944, + "epoch": 0.6768681458811527, "grad_norm": 0.0, - "learning_rate": 4.968196061644123e-06, - "loss": 0.8571, + "learning_rate": 4.994437230514916e-06, + "loss": 0.6864, "step": 23886 }, { - "epoch": 0.6778376844494892, + "epoch": 0.6768964833234152, "grad_norm": 0.0, - "learning_rate": 4.96740183272572e-06, - "loss": 0.7934, + "learning_rate": 4.993642717761211e-06, + "loss": 0.8025, "step": 23887 }, { - "epoch": 0.6778660612939841, + "epoch": 0.6769248207656777, "grad_norm": 0.0, - "learning_rate": 4.966607646317906e-06, - "loss": 0.8574, + "learning_rate": 4.992848247177913e-06, + "loss": 0.8622, "step": 23888 }, { - "epoch": 0.677894438138479, + "epoch": 0.6769531582079401, "grad_norm": 0.0, - "learning_rate": 4.965813502427378e-06, - "loss": 0.8533, + "learning_rate": 4.992053818771715e-06, + "loss": 0.8494, "step": 23889 }, { - "epoch": 0.6779228149829739, + "epoch": 0.6769814956502026, "grad_norm": 0.0, - "learning_rate": 4.965019401060851e-06, - "loss": 0.8401, + "learning_rate": 4.99125943254931e-06, + "loss": 0.9398, "step": 23890 }, { - "epoch": 0.6779511918274688, + "epoch": 0.6770098330924651, "grad_norm": 0.0, - "learning_rate": 4.964225342225034e-06, - "loss": 0.9224, + "learning_rate": 4.990465088517384e-06, + "loss": 0.9159, "step": 23891 }, { - "epoch": 0.6779795686719636, + "epoch": 0.6770381705347276, "grad_norm": 0.0, - "learning_rate": 4.9634313259266284e-06, - "loss": 0.8269, + "learning_rate": 4.989670786682634e-06, + "loss": 0.7686, "step": 23892 }, { - "epoch": 0.6780079455164586, + "epoch": 0.67706650797699, "grad_norm": 0.0, - "learning_rate": 4.9626373521723455e-06, - "loss": 0.784, + "learning_rate": 4.988876527051743e-06, + "loss": 0.7874, "step": 23893 }, { - "epoch": 0.6780363223609535, + "epoch": 0.6770948454192525, "grad_norm": 0.0, - "learning_rate": 4.961843420968895e-06, - "loss": 0.8447, + "learning_rate": 4.988082309631407e-06, + "loss": 0.9088, "step": 23894 }, { - "epoch": 0.6780646992054483, + "epoch": 0.677123182861515, "grad_norm": 0.0, - "learning_rate": 4.961049532322971e-06, - "loss": 0.9069, + "learning_rate": 4.987288134428318e-06, + "loss": 0.7418, "step": 23895 }, { - "epoch": 0.6780930760499433, + "epoch": 0.6771515203037773, "grad_norm": 0.0, - "learning_rate": 4.960255686241298e-06, - "loss": 0.8072, + "learning_rate": 4.986494001449159e-06, + "loss": 0.8119, "step": 23896 }, { - "epoch": 0.6781214528944381, + "epoch": 0.6771798577460398, "grad_norm": 0.0, - "learning_rate": 4.959461882730566e-06, - "loss": 0.9069, + "learning_rate": 4.985699910700622e-06, + "loss": 0.8231, "step": 23897 }, { - "epoch": 0.678149829738933, + "epoch": 0.6772081951883023, "grad_norm": 0.0, - "learning_rate": 4.958668121797486e-06, - "loss": 0.7395, + "learning_rate": 4.984905862189402e-06, + "loss": 0.856, "step": 23898 }, { - "epoch": 0.678178206583428, + "epoch": 0.6772365326305647, "grad_norm": 0.0, - "learning_rate": 4.957874403448768e-06, - "loss": 0.8581, + "learning_rate": 4.984111855922177e-06, + "loss": 0.7423, "step": 23899 }, { - "epoch": 0.6782065834279228, + "epoch": 0.6772648700728272, "grad_norm": 0.0, - "learning_rate": 4.957080727691107e-06, - "loss": 0.8395, + "learning_rate": 4.98331789190564e-06, + "loss": 0.8306, "step": 23900 }, { - "epoch": 0.6782349602724177, + "epoch": 0.6772932075150897, "grad_norm": 0.0, - "learning_rate": 4.956287094531211e-06, - "loss": 0.7485, + "learning_rate": 4.982523970146481e-06, + "loss": 0.7983, "step": 23901 }, { - "epoch": 0.6782633371169126, + "epoch": 0.6773215449573522, "grad_norm": 0.0, - "learning_rate": 4.9554935039757905e-06, - "loss": 0.8384, + "learning_rate": 4.981730090651384e-06, + "loss": 0.8806, "step": 23902 }, { - "epoch": 0.6782917139614075, + "epoch": 0.6773498823996146, "grad_norm": 0.0, - "learning_rate": 4.954699956031538e-06, - "loss": 0.8432, + "learning_rate": 4.980936253427044e-06, + "loss": 0.8526, "step": 23903 }, { - "epoch": 0.6783200908059024, + "epoch": 0.6773782198418771, "grad_norm": 0.0, - "learning_rate": 4.953906450705161e-06, - "loss": 0.8853, + "learning_rate": 4.980142458480136e-06, + "loss": 1.0063, "step": 23904 }, { - "epoch": 0.6783484676503972, + "epoch": 0.6774065572841396, "grad_norm": 0.0, - "learning_rate": 4.953112988003368e-06, - "loss": 0.8038, + "learning_rate": 4.979348705817356e-06, + "loss": 0.8943, "step": 23905 }, { - "epoch": 0.6783768444948922, + "epoch": 0.677434894726402, "grad_norm": 0.0, - "learning_rate": 4.952319567932853e-06, - "loss": 0.8613, + "learning_rate": 4.978554995445384e-06, + "loss": 0.9427, "step": 23906 }, { - "epoch": 0.6784052213393871, + "epoch": 0.6774632321686644, "grad_norm": 0.0, - "learning_rate": 4.951526190500321e-06, - "loss": 0.8395, + "learning_rate": 4.977761327370907e-06, + "loss": 0.9232, "step": 23907 }, { - "epoch": 0.6784335981838819, + "epoch": 0.6774915696109269, "grad_norm": 0.0, - "learning_rate": 4.950732855712478e-06, - "loss": 0.7348, + "learning_rate": 4.9769677016006145e-06, + "loss": 0.9119, "step": 23908 }, { - "epoch": 0.6784619750283768, + "epoch": 0.6775199070531894, "grad_norm": 0.0, - "learning_rate": 4.9499395635760174e-06, - "loss": 0.9005, + "learning_rate": 4.9761741181411845e-06, + "loss": 0.6942, "step": 23909 }, { - "epoch": 0.6784903518728718, + "epoch": 0.6775482444954518, "grad_norm": 0.0, - "learning_rate": 4.949146314097645e-06, - "loss": 0.8155, + "learning_rate": 4.975380576999307e-06, + "loss": 0.8709, "step": 23910 }, { - "epoch": 0.6785187287173666, + "epoch": 0.6775765819377143, "grad_norm": 0.0, - "learning_rate": 4.94835310728406e-06, - "loss": 0.8196, + "learning_rate": 4.974587078181667e-06, + "loss": 0.845, "step": 23911 }, { - "epoch": 0.6785471055618615, + "epoch": 0.6776049193799768, "grad_norm": 0.0, - "learning_rate": 4.947559943141964e-06, - "loss": 0.7896, + "learning_rate": 4.973793621694943e-06, + "loss": 0.8713, "step": 23912 }, { - "epoch": 0.6785754824063565, + "epoch": 0.6776332568222392, "grad_norm": 0.0, - "learning_rate": 4.94676682167806e-06, - "loss": 0.7996, + "learning_rate": 4.973000207545821e-06, + "loss": 0.8066, "step": 23913 }, { - "epoch": 0.6786038592508513, + "epoch": 0.6776615942645017, "grad_norm": 0.0, - "learning_rate": 4.9459737428990395e-06, - "loss": 0.8713, + "learning_rate": 4.972206835740986e-06, + "loss": 0.8007, "step": 23914 }, { - "epoch": 0.6786322360953462, + "epoch": 0.6776899317067642, "grad_norm": 0.0, - "learning_rate": 4.9451807068116064e-06, - "loss": 0.8707, + "learning_rate": 4.97141350628712e-06, + "loss": 0.7807, "step": 23915 }, { - "epoch": 0.678660612939841, + "epoch": 0.6777182691490267, "grad_norm": 0.0, - "learning_rate": 4.944387713422463e-06, - "loss": 0.864, + "learning_rate": 4.970620219190908e-06, + "loss": 0.7748, "step": 23916 }, { - "epoch": 0.678688989784336, + "epoch": 0.677746606591289, "grad_norm": 0.0, - "learning_rate": 4.9435947627383e-06, - "loss": 0.7876, + "learning_rate": 4.969826974459027e-06, + "loss": 0.8939, "step": 23917 }, { - "epoch": 0.6787173666288309, + "epoch": 0.6777749440335515, "grad_norm": 0.0, - "learning_rate": 4.94280185476582e-06, - "loss": 0.8082, + "learning_rate": 4.96903377209816e-06, + "loss": 0.8871, "step": 23918 }, { - "epoch": 0.6787457434733257, + "epoch": 0.677803281475814, "grad_norm": 0.0, - "learning_rate": 4.942008989511723e-06, - "loss": 0.7047, + "learning_rate": 4.968240612114995e-06, + "loss": 0.7778, "step": 23919 }, { - "epoch": 0.6787741203178207, + "epoch": 0.6778316189180764, "grad_norm": 0.0, - "learning_rate": 4.9412161669826995e-06, - "loss": 0.7767, + "learning_rate": 4.967447494516203e-06, + "loss": 0.881, "step": 23920 }, { - "epoch": 0.6788024971623156, + "epoch": 0.6778599563603389, "grad_norm": 0.0, - "learning_rate": 4.94042338718545e-06, - "loss": 0.8085, + "learning_rate": 4.9666544193084735e-06, + "loss": 0.7727, "step": 23921 }, { - "epoch": 0.6788308740068104, + "epoch": 0.6778882938026014, "grad_norm": 0.0, - "learning_rate": 4.9396306501266764e-06, - "loss": 0.819, + "learning_rate": 4.965861386498479e-06, + "loss": 0.8541, "step": 23922 }, { - "epoch": 0.6788592508513054, + "epoch": 0.6779166312448638, "grad_norm": 0.0, - "learning_rate": 4.938837955813066e-06, - "loss": 0.7859, + "learning_rate": 4.965068396092904e-06, + "loss": 0.8653, "step": 23923 }, { - "epoch": 0.6788876276958002, + "epoch": 0.6779449686871263, "grad_norm": 0.0, - "learning_rate": 4.938045304251319e-06, - "loss": 0.8704, + "learning_rate": 4.964275448098427e-06, + "loss": 0.9411, "step": 23924 }, { - "epoch": 0.6789160045402951, + "epoch": 0.6779733061293888, "grad_norm": 0.0, - "learning_rate": 4.937252695448135e-06, - "loss": 0.9087, + "learning_rate": 4.963482542521728e-06, + "loss": 0.967, "step": 23925 }, { - "epoch": 0.67894438138479, + "epoch": 0.6780016435716513, "grad_norm": 0.0, - "learning_rate": 4.936460129410197e-06, - "loss": 0.8982, + "learning_rate": 4.9626896793694905e-06, + "loss": 0.7994, "step": 23926 }, { - "epoch": 0.6789727582292849, + "epoch": 0.6780299810139137, "grad_norm": 0.0, - "learning_rate": 4.9356676061442154e-06, - "loss": 0.7449, + "learning_rate": 4.961896858648383e-06, + "loss": 0.9133, "step": 23927 }, { - "epoch": 0.6790011350737798, + "epoch": 0.6780583184561761, "grad_norm": 0.0, - "learning_rate": 4.9348751256568735e-06, - "loss": 0.8231, + "learning_rate": 4.96110408036509e-06, + "loss": 0.8373, "step": 23928 }, { - "epoch": 0.6790295119182747, + "epoch": 0.6780866558984386, "grad_norm": 0.0, - "learning_rate": 4.93408268795487e-06, - "loss": 0.8276, + "learning_rate": 4.9603113445262915e-06, + "loss": 0.7892, "step": 23929 }, { - "epoch": 0.6790578887627696, + "epoch": 0.678114993340701, "grad_norm": 0.0, - "learning_rate": 4.9332902930449e-06, - "loss": 0.8685, + "learning_rate": 4.95951865113866e-06, + "loss": 0.751, "step": 23930 }, { - "epoch": 0.6790862656072645, + "epoch": 0.6781433307829635, "grad_norm": 0.0, - "learning_rate": 4.932497940933653e-06, - "loss": 0.8289, + "learning_rate": 4.958726000208872e-06, + "loss": 0.7974, "step": 23931 }, { - "epoch": 0.6791146424517593, + "epoch": 0.678171668225226, "grad_norm": 0.0, - "learning_rate": 4.931705631627821e-06, - "loss": 0.7536, + "learning_rate": 4.957933391743614e-06, + "loss": 0.9037, "step": 23932 }, { - "epoch": 0.6791430192962542, + "epoch": 0.6782000056674885, "grad_norm": 0.0, - "learning_rate": 4.9309133651341046e-06, - "loss": 0.8182, + "learning_rate": 4.957140825749549e-06, + "loss": 0.8177, "step": 23933 }, { - "epoch": 0.6791713961407492, + "epoch": 0.6782283431097509, "grad_norm": 0.0, - "learning_rate": 4.9301211414591875e-06, - "loss": 0.8555, + "learning_rate": 4.956348302233364e-06, + "loss": 0.8484, "step": 23934 }, { - "epoch": 0.679199772985244, + "epoch": 0.6782566805520134, "grad_norm": 0.0, - "learning_rate": 4.929328960609764e-06, - "loss": 0.8343, + "learning_rate": 4.955555821201726e-06, + "loss": 0.7705, "step": 23935 }, { - "epoch": 0.6792281498297389, + "epoch": 0.6782850179942759, "grad_norm": 0.0, - "learning_rate": 4.928536822592531e-06, - "loss": 0.9438, + "learning_rate": 4.954763382661315e-06, + "loss": 0.8147, "step": 23936 }, { - "epoch": 0.6792565266742339, + "epoch": 0.6783133554365383, "grad_norm": 0.0, - "learning_rate": 4.927744727414172e-06, - "loss": 0.7287, + "learning_rate": 4.953970986618806e-06, + "loss": 0.9164, "step": 23937 }, { - "epoch": 0.6792849035187287, + "epoch": 0.6783416928788008, "grad_norm": 0.0, - "learning_rate": 4.926952675081381e-06, - "loss": 0.8457, + "learning_rate": 4.9531786330808715e-06, + "loss": 0.9014, "step": 23938 }, { - "epoch": 0.6793132803632236, + "epoch": 0.6783700303210632, "grad_norm": 0.0, - "learning_rate": 4.926160665600853e-06, - "loss": 0.7692, + "learning_rate": 4.952386322054189e-06, + "loss": 0.8257, "step": 23939 }, { - "epoch": 0.6793416572077186, + "epoch": 0.6783983677633256, "grad_norm": 0.0, - "learning_rate": 4.925368698979269e-06, - "loss": 0.7421, + "learning_rate": 4.951594053545435e-06, + "loss": 0.8479, "step": 23940 }, { - "epoch": 0.6793700340522134, + "epoch": 0.6784267052055881, "grad_norm": 0.0, - "learning_rate": 4.924576775223326e-06, - "loss": 0.9438, + "learning_rate": 4.950801827561274e-06, + "loss": 0.7943, "step": 23941 }, { - "epoch": 0.6793984108967083, + "epoch": 0.6784550426478506, "grad_norm": 0.0, - "learning_rate": 4.923784894339708e-06, - "loss": 0.7565, + "learning_rate": 4.950009644108388e-06, + "loss": 0.8367, "step": 23942 }, { - "epoch": 0.6794267877412031, + "epoch": 0.6784833800901131, "grad_norm": 0.0, - "learning_rate": 4.9229930563351095e-06, - "loss": 0.8852, + "learning_rate": 4.949217503193443e-06, + "loss": 0.8725, "step": 23943 }, { - "epoch": 0.6794551645856981, + "epoch": 0.6785117175323755, "grad_norm": 0.0, - "learning_rate": 4.92220126121622e-06, - "loss": 0.8732, + "learning_rate": 4.948425404823114e-06, + "loss": 0.811, "step": 23944 }, { - "epoch": 0.679483541430193, + "epoch": 0.678540054974638, "grad_norm": 0.0, - "learning_rate": 4.921409508989721e-06, - "loss": 0.958, + "learning_rate": 4.947633349004077e-06, + "loss": 0.7941, "step": 23945 }, { - "epoch": 0.6795119182746878, + "epoch": 0.6785683924169005, "grad_norm": 0.0, - "learning_rate": 4.920617799662304e-06, - "loss": 0.7738, + "learning_rate": 4.946841335742998e-06, + "loss": 0.8074, "step": 23946 }, { - "epoch": 0.6795402951191828, + "epoch": 0.6785967298591629, "grad_norm": 0.0, - "learning_rate": 4.91982613324066e-06, - "loss": 0.8776, + "learning_rate": 4.94604936504655e-06, + "loss": 0.7444, "step": 23947 }, { - "epoch": 0.6795686719636777, + "epoch": 0.6786250673014254, "grad_norm": 0.0, - "learning_rate": 4.919034509731469e-06, - "loss": 0.8743, + "learning_rate": 4.945257436921409e-06, + "loss": 0.8504, "step": 23948 }, { - "epoch": 0.6795970488081725, + "epoch": 0.6786534047436878, "grad_norm": 0.0, - "learning_rate": 4.918242929141423e-06, - "loss": 0.7738, + "learning_rate": 4.944465551374238e-06, + "loss": 0.8286, "step": 23949 }, { - "epoch": 0.6796254256526674, + "epoch": 0.6786817421859503, "grad_norm": 0.0, - "learning_rate": 4.917451391477212e-06, - "loss": 0.7701, + "learning_rate": 4.94367370841171e-06, + "loss": 0.7104, "step": 23950 }, { - "epoch": 0.6796538024971623, + "epoch": 0.6787100796282127, "grad_norm": 0.0, - "learning_rate": 4.916659896745513e-06, - "loss": 0.8357, + "learning_rate": 4.942881908040498e-06, + "loss": 0.8195, "step": 23951 }, { - "epoch": 0.6796821793416572, + "epoch": 0.6787384170704752, "grad_norm": 0.0, - "learning_rate": 4.9158684449530155e-06, - "loss": 0.7835, + "learning_rate": 4.942090150267268e-06, + "loss": 0.8861, "step": 23952 }, { - "epoch": 0.6797105561861521, + "epoch": 0.6787667545127377, "grad_norm": 0.0, - "learning_rate": 4.91507703610641e-06, - "loss": 0.8189, + "learning_rate": 4.9412984350986945e-06, + "loss": 0.85, "step": 23953 }, { - "epoch": 0.679738933030647, + "epoch": 0.6787950919550001, "grad_norm": 0.0, - "learning_rate": 4.9142856702123745e-06, - "loss": 0.7577, + "learning_rate": 4.94050676254144e-06, + "loss": 0.7703, "step": 23954 }, { - "epoch": 0.6797673098751419, + "epoch": 0.6788234293972626, "grad_norm": 0.0, - "learning_rate": 4.913494347277595e-06, - "loss": 0.8112, + "learning_rate": 4.939715132602178e-06, + "loss": 0.8552, "step": 23955 }, { - "epoch": 0.6797956867196367, + "epoch": 0.6788517668395251, "grad_norm": 0.0, - "learning_rate": 4.9127030673087585e-06, - "loss": 0.8071, + "learning_rate": 4.938923545287572e-06, + "loss": 0.8643, "step": 23956 }, { - "epoch": 0.6798240635641317, + "epoch": 0.6788801042817876, "grad_norm": 0.0, - "learning_rate": 4.911911830312548e-06, - "loss": 0.8659, + "learning_rate": 4.938132000604293e-06, + "loss": 0.9381, "step": 23957 }, { - "epoch": 0.6798524404086266, + "epoch": 0.67890844172405, "grad_norm": 0.0, - "learning_rate": 4.911120636295647e-06, - "loss": 0.6601, + "learning_rate": 4.9373404985590116e-06, + "loss": 0.8744, "step": 23958 }, { - "epoch": 0.6798808172531214, + "epoch": 0.6789367791663125, "grad_norm": 0.0, - "learning_rate": 4.910329485264741e-06, - "loss": 0.7847, + "learning_rate": 4.936549039158386e-06, + "loss": 0.8323, "step": 23959 }, { - "epoch": 0.6799091940976163, + "epoch": 0.678965116608575, "grad_norm": 0.0, - "learning_rate": 4.909538377226508e-06, - "loss": 0.7713, + "learning_rate": 4.935757622409089e-06, + "loss": 0.8203, "step": 23960 }, { - "epoch": 0.6799375709421113, + "epoch": 0.6789934540508373, "grad_norm": 0.0, - "learning_rate": 4.908747312187637e-06, - "loss": 0.9011, + "learning_rate": 4.934966248317789e-06, + "loss": 0.8016, "step": 23961 }, { - "epoch": 0.6799659477866061, + "epoch": 0.6790217914930998, "grad_norm": 0.0, - "learning_rate": 4.907956290154803e-06, - "loss": 0.8597, + "learning_rate": 4.934174916891146e-06, + "loss": 0.7558, "step": 23962 }, { - "epoch": 0.679994324631101, + "epoch": 0.6790501289353623, "grad_norm": 0.0, - "learning_rate": 4.90716531113469e-06, - "loss": 0.8889, + "learning_rate": 4.933383628135828e-06, + "loss": 0.7949, "step": 23963 }, { - "epoch": 0.680022701475596, + "epoch": 0.6790784663776247, "grad_norm": 0.0, - "learning_rate": 4.906374375133985e-06, - "loss": 0.9119, + "learning_rate": 4.932592382058503e-06, + "loss": 0.6873, "step": 23964 }, { - "epoch": 0.6800510783200908, + "epoch": 0.6791068038198872, "grad_norm": 0.0, - "learning_rate": 4.905583482159361e-06, - "loss": 0.8665, + "learning_rate": 4.931801178665833e-06, + "loss": 0.9239, "step": 23965 }, { - "epoch": 0.6800794551645857, + "epoch": 0.6791351412621497, "grad_norm": 0.0, - "learning_rate": 4.904792632217502e-06, - "loss": 0.8865, + "learning_rate": 4.931010017964487e-06, + "loss": 0.8102, "step": 23966 }, { - "epoch": 0.6801078320090805, + "epoch": 0.6791634787044122, "grad_norm": 0.0, - "learning_rate": 4.904001825315092e-06, - "loss": 0.8492, + "learning_rate": 4.930218899961123e-06, + "loss": 0.8685, "step": 23967 }, { - "epoch": 0.6801362088535755, + "epoch": 0.6791918161466746, "grad_norm": 0.0, - "learning_rate": 4.903211061458805e-06, - "loss": 0.7888, + "learning_rate": 4.929427824662408e-06, + "loss": 0.8658, "step": 23968 }, { - "epoch": 0.6801645856980704, + "epoch": 0.6792201535889371, "grad_norm": 0.0, - "learning_rate": 4.902420340655323e-06, - "loss": 0.802, + "learning_rate": 4.9286367920750075e-06, + "loss": 0.7927, "step": 23969 }, { - "epoch": 0.6801929625425652, + "epoch": 0.6792484910311996, "grad_norm": 0.0, - "learning_rate": 4.9016296629113296e-06, - "loss": 0.9424, + "learning_rate": 4.927845802205581e-06, + "loss": 0.9024, "step": 23970 }, { - "epoch": 0.6802213393870602, + "epoch": 0.6792768284734619, "grad_norm": 0.0, - "learning_rate": 4.900839028233494e-06, - "loss": 0.8203, + "learning_rate": 4.927054855060794e-06, + "loss": 0.9369, "step": 23971 }, { - "epoch": 0.6802497162315551, + "epoch": 0.6793051659157244, "grad_norm": 0.0, - "learning_rate": 4.900048436628498e-06, - "loss": 0.907, + "learning_rate": 4.9262639506473064e-06, + "loss": 0.8184, "step": 23972 }, { - "epoch": 0.6802780930760499, + "epoch": 0.6793335033579869, "grad_norm": 0.0, - "learning_rate": 4.899257888103029e-06, - "loss": 0.9477, + "learning_rate": 4.925473088971781e-06, + "loss": 0.7938, "step": 23973 }, { - "epoch": 0.6803064699205449, + "epoch": 0.6793618408002494, "grad_norm": 0.0, - "learning_rate": 4.898467382663756e-06, - "loss": 0.9076, + "learning_rate": 4.924682270040883e-06, + "loss": 0.7781, "step": 23974 }, { - "epoch": 0.6803348467650397, + "epoch": 0.6793901782425118, "grad_norm": 0.0, - "learning_rate": 4.897676920317357e-06, - "loss": 0.7551, + "learning_rate": 4.923891493861268e-06, + "loss": 0.7449, "step": 23975 }, { - "epoch": 0.6803632236095346, + "epoch": 0.6794185156847743, "grad_norm": 0.0, - "learning_rate": 4.8968865010705135e-06, - "loss": 0.8585, + "learning_rate": 4.9231007604396e-06, + "loss": 0.818, "step": 23976 }, { - "epoch": 0.6803916004540295, + "epoch": 0.6794468531270368, "grad_norm": 0.0, - "learning_rate": 4.896096124929897e-06, - "loss": 0.9225, + "learning_rate": 4.9223100697825385e-06, + "loss": 0.8565, "step": 23977 }, { - "epoch": 0.6804199772985244, + "epoch": 0.6794751905692992, "grad_norm": 0.0, - "learning_rate": 4.895305791902184e-06, - "loss": 0.7509, + "learning_rate": 4.921519421896746e-06, + "loss": 0.7773, "step": 23978 }, { - "epoch": 0.6804483541430193, + "epoch": 0.6795035280115617, "grad_norm": 0.0, - "learning_rate": 4.894515501994057e-06, - "loss": 0.9226, + "learning_rate": 4.920728816788885e-06, + "loss": 0.8595, "step": 23979 }, { - "epoch": 0.6804767309875142, + "epoch": 0.6795318654538242, "grad_norm": 0.0, - "learning_rate": 4.8937252552121835e-06, - "loss": 0.9074, + "learning_rate": 4.919938254465606e-06, + "loss": 0.843, "step": 23980 }, { - "epoch": 0.6805051078320091, + "epoch": 0.6795602028960867, "grad_norm": 0.0, - "learning_rate": 4.892935051563243e-06, - "loss": 0.8031, + "learning_rate": 4.919147734933575e-06, + "loss": 0.8306, "step": 23981 }, { - "epoch": 0.680533484676504, + "epoch": 0.679588540338349, "grad_norm": 0.0, - "learning_rate": 4.892144891053912e-06, - "loss": 0.746, + "learning_rate": 4.9183572581994525e-06, + "loss": 0.8082, "step": 23982 }, { - "epoch": 0.6805618615209988, + "epoch": 0.6796168777806115, "grad_norm": 0.0, - "learning_rate": 4.891354773690861e-06, - "loss": 0.8511, + "learning_rate": 4.91756682426989e-06, + "loss": 0.942, "step": 23983 }, { - "epoch": 0.6805902383654937, + "epoch": 0.679645215222874, "grad_norm": 0.0, - "learning_rate": 4.890564699480764e-06, - "loss": 0.8709, + "learning_rate": 4.916776433151553e-06, + "loss": 0.7736, "step": 23984 }, { - "epoch": 0.6806186152099887, + "epoch": 0.6796735526651364, "grad_norm": 0.0, - "learning_rate": 4.889774668430301e-06, - "loss": 0.9465, + "learning_rate": 4.9159860848510936e-06, + "loss": 0.8521, "step": 23985 }, { - "epoch": 0.6806469920544835, + "epoch": 0.6797018901073989, "grad_norm": 0.0, - "learning_rate": 4.888984680546137e-06, - "loss": 0.9562, + "learning_rate": 4.9151957793751695e-06, + "loss": 0.7866, "step": 23986 }, { - "epoch": 0.6806753688989784, + "epoch": 0.6797302275496614, "grad_norm": 0.0, - "learning_rate": 4.88819473583495e-06, - "loss": 0.7712, + "learning_rate": 4.914405516730444e-06, + "loss": 0.8261, "step": 23987 }, { - "epoch": 0.6807037457434734, + "epoch": 0.6797585649919238, "grad_norm": 0.0, - "learning_rate": 4.887404834303412e-06, - "loss": 0.8564, + "learning_rate": 4.913615296923566e-06, + "loss": 0.8481, "step": 23988 }, { - "epoch": 0.6807321225879682, + "epoch": 0.6797869024341863, "grad_norm": 0.0, - "learning_rate": 4.886614975958194e-06, - "loss": 0.8529, + "learning_rate": 4.912825119961194e-06, + "loss": 0.8627, "step": 23989 }, { - "epoch": 0.6807604994324631, + "epoch": 0.6798152398764488, "grad_norm": 0.0, - "learning_rate": 4.8858251608059735e-06, - "loss": 0.7801, + "learning_rate": 4.9120349858499864e-06, + "loss": 0.8191, "step": 23990 }, { - "epoch": 0.680788876276958, + "epoch": 0.6798435773187113, "grad_norm": 0.0, - "learning_rate": 4.885035388853414e-06, - "loss": 0.7812, + "learning_rate": 4.911244894596596e-06, + "loss": 0.8471, "step": 23991 }, { - "epoch": 0.6808172531214529, + "epoch": 0.6798719147609736, "grad_norm": 0.0, - "learning_rate": 4.884245660107191e-06, - "loss": 0.902, + "learning_rate": 4.910454846207685e-06, + "loss": 0.7612, "step": 23992 }, { - "epoch": 0.6808456299659478, + "epoch": 0.6799002522032361, "grad_norm": 0.0, - "learning_rate": 4.883455974573979e-06, - "loss": 0.886, + "learning_rate": 4.9096648406898974e-06, + "loss": 0.7385, "step": 23993 }, { - "epoch": 0.6808740068104426, + "epoch": 0.6799285896454986, "grad_norm": 0.0, - "learning_rate": 4.882666332260438e-06, - "loss": 0.931, + "learning_rate": 4.908874878049894e-06, + "loss": 0.8247, "step": 23994 }, { - "epoch": 0.6809023836549376, + "epoch": 0.679956927087761, "grad_norm": 0.0, - "learning_rate": 4.881876733173248e-06, - "loss": 0.9155, + "learning_rate": 4.9080849582943324e-06, + "loss": 0.8939, "step": 23995 }, { - "epoch": 0.6809307604994325, + "epoch": 0.6799852645300235, "grad_norm": 0.0, - "learning_rate": 4.8810871773190774e-06, - "loss": 0.7317, + "learning_rate": 4.907295081429857e-06, + "loss": 0.9035, "step": 23996 }, { - "epoch": 0.6809591373439273, + "epoch": 0.680013601972286, "grad_norm": 0.0, - "learning_rate": 4.880297664704589e-06, - "loss": 0.7905, + "learning_rate": 4.906505247463127e-06, + "loss": 0.8488, "step": 23997 }, { - "epoch": 0.6809875141884223, + "epoch": 0.6800419394145485, "grad_norm": 0.0, - "learning_rate": 4.879508195336457e-06, - "loss": 0.781, + "learning_rate": 4.905715456400798e-06, + "loss": 0.796, "step": 23998 }, { - "epoch": 0.6810158910329172, + "epoch": 0.6800702768568109, "grad_norm": 0.0, - "learning_rate": 4.878718769221354e-06, - "loss": 0.8555, + "learning_rate": 4.904925708249516e-06, + "loss": 0.783, "step": 23999 }, { - "epoch": 0.681044267877412, + "epoch": 0.6800986142990734, "grad_norm": 0.0, - "learning_rate": 4.877929386365939e-06, - "loss": 0.8019, + "learning_rate": 4.90413600301594e-06, + "loss": 0.8371, "step": 24000 }, { - "epoch": 0.6810726447219069, + "epoch": 0.6801269517413359, "grad_norm": 0.0, - "learning_rate": 4.877140046776885e-06, - "loss": 0.9298, + "learning_rate": 4.903346340706716e-06, + "loss": 0.7429, "step": 24001 }, { - "epoch": 0.6811010215664018, + "epoch": 0.6801552891835982, "grad_norm": 0.0, - "learning_rate": 4.8763507504608595e-06, - "loss": 0.972, + "learning_rate": 4.9025567213284975e-06, + "loss": 0.8316, "step": 24002 }, { - "epoch": 0.6811293984108967, + "epoch": 0.6801836266258607, "grad_norm": 0.0, - "learning_rate": 4.8755614974245295e-06, - "loss": 0.9087, + "learning_rate": 4.901767144887937e-06, + "loss": 0.8022, "step": 24003 }, { - "epoch": 0.6811577752553916, + "epoch": 0.6802119640681232, "grad_norm": 0.0, - "learning_rate": 4.874772287674566e-06, - "loss": 0.873, + "learning_rate": 4.900977611391685e-06, + "loss": 0.8837, "step": 24004 }, { - "epoch": 0.6811861520998865, + "epoch": 0.6802403015103857, "grad_norm": 0.0, - "learning_rate": 4.873983121217627e-06, - "loss": 0.8539, + "learning_rate": 4.900188120846392e-06, + "loss": 0.8866, "step": 24005 }, { - "epoch": 0.6812145289443814, + "epoch": 0.6802686389526481, "grad_norm": 0.0, - "learning_rate": 4.873193998060382e-06, - "loss": 0.8004, + "learning_rate": 4.899398673258712e-06, + "loss": 0.8366, "step": 24006 }, { - "epoch": 0.6812429057888763, + "epoch": 0.6802969763949106, "grad_norm": 0.0, - "learning_rate": 4.872404918209504e-06, - "loss": 0.8187, + "learning_rate": 4.898609268635287e-06, + "loss": 0.9085, "step": 24007 }, { - "epoch": 0.6812712826333712, + "epoch": 0.6803253138371731, "grad_norm": 0.0, - "learning_rate": 4.8716158816716476e-06, - "loss": 0.845, + "learning_rate": 4.897819906982775e-06, + "loss": 0.8561, "step": 24008 }, { - "epoch": 0.6812996594778661, + "epoch": 0.6803536512794355, "grad_norm": 0.0, - "learning_rate": 4.870826888453481e-06, - "loss": 0.8703, + "learning_rate": 4.897030588307816e-06, + "loss": 0.8448, "step": 24009 }, { - "epoch": 0.6813280363223609, + "epoch": 0.680381988721698, "grad_norm": 0.0, - "learning_rate": 4.870037938561676e-06, - "loss": 0.8686, + "learning_rate": 4.896241312617064e-06, + "loss": 0.9072, "step": 24010 }, { - "epoch": 0.6813564131668558, + "epoch": 0.6804103261639605, "grad_norm": 0.0, - "learning_rate": 4.8692490320028866e-06, - "loss": 0.7912, + "learning_rate": 4.89545207991717e-06, + "loss": 0.8889, "step": 24011 }, { - "epoch": 0.6813847900113508, + "epoch": 0.6804386636062228, "grad_norm": 0.0, - "learning_rate": 4.86846016878378e-06, - "loss": 0.9021, + "learning_rate": 4.894662890214775e-06, + "loss": 0.8458, "step": 24012 }, { - "epoch": 0.6814131668558456, + "epoch": 0.6804670010484853, "grad_norm": 0.0, - "learning_rate": 4.867671348911026e-06, - "loss": 0.8897, + "learning_rate": 4.893873743516534e-06, + "loss": 0.8191, "step": 24013 }, { - "epoch": 0.6814415437003405, + "epoch": 0.6804953384907478, "grad_norm": 0.0, - "learning_rate": 4.866882572391279e-06, - "loss": 0.8923, + "learning_rate": 4.8930846398290865e-06, + "loss": 0.9117, "step": 24014 }, { - "epoch": 0.6814699205448355, + "epoch": 0.6805236759330103, "grad_norm": 0.0, - "learning_rate": 4.866093839231205e-06, - "loss": 0.7966, + "learning_rate": 4.8922955791590845e-06, + "loss": 0.7569, "step": 24015 }, { - "epoch": 0.6814982973893303, + "epoch": 0.6805520133752727, "grad_norm": 0.0, - "learning_rate": 4.865305149437471e-06, - "loss": 0.8882, + "learning_rate": 4.891506561513172e-06, + "loss": 0.7976, "step": 24016 }, { - "epoch": 0.6815266742338252, + "epoch": 0.6805803508175352, "grad_norm": 0.0, - "learning_rate": 4.864516503016733e-06, - "loss": 0.7567, + "learning_rate": 4.890717586897997e-06, + "loss": 0.8845, "step": 24017 }, { - "epoch": 0.68155505107832, + "epoch": 0.6806086882597977, "grad_norm": 0.0, - "learning_rate": 4.863727899975653e-06, - "loss": 0.884, + "learning_rate": 4.889928655320206e-06, + "loss": 0.8996, "step": 24018 }, { - "epoch": 0.681583427922815, + "epoch": 0.6806370257020601, "grad_norm": 0.0, - "learning_rate": 4.8629393403208955e-06, - "loss": 0.8486, + "learning_rate": 4.889139766786447e-06, + "loss": 0.8578, "step": 24019 }, { - "epoch": 0.6816118047673099, + "epoch": 0.6806653631443226, "grad_norm": 0.0, - "learning_rate": 4.86215082405912e-06, - "loss": 0.8331, + "learning_rate": 4.888350921303358e-06, + "loss": 0.8021, "step": 24020 }, { - "epoch": 0.6816401816118047, + "epoch": 0.6806937005865851, "grad_norm": 0.0, - "learning_rate": 4.861362351196991e-06, - "loss": 0.8162, + "learning_rate": 4.887562118877591e-06, + "loss": 0.8662, "step": 24021 }, { - "epoch": 0.6816685584562997, + "epoch": 0.6807220380288476, "grad_norm": 0.0, - "learning_rate": 4.860573921741162e-06, - "loss": 0.8248, + "learning_rate": 4.886773359515783e-06, + "loss": 0.8758, "step": 24022 }, { - "epoch": 0.6816969353007946, + "epoch": 0.68075037547111, "grad_norm": 0.0, - "learning_rate": 4.859785535698296e-06, - "loss": 0.8305, + "learning_rate": 4.885984643224581e-06, + "loss": 0.6956, "step": 24023 }, { - "epoch": 0.6817253121452894, + "epoch": 0.6807787129133724, "grad_norm": 0.0, - "learning_rate": 4.8589971930750554e-06, - "loss": 0.9451, + "learning_rate": 4.885195970010634e-06, + "loss": 0.9027, "step": 24024 }, { - "epoch": 0.6817536889897844, + "epoch": 0.6808070503556349, "grad_norm": 0.0, - "learning_rate": 4.858208893878095e-06, - "loss": 0.9411, + "learning_rate": 4.884407339880577e-06, + "loss": 0.8958, "step": 24025 }, { - "epoch": 0.6817820658342792, + "epoch": 0.6808353877978973, "grad_norm": 0.0, - "learning_rate": 4.857420638114073e-06, - "loss": 0.8359, + "learning_rate": 4.883618752841056e-06, + "loss": 0.8293, "step": 24026 }, { - "epoch": 0.6818104426787741, + "epoch": 0.6808637252401598, "grad_norm": 0.0, - "learning_rate": 4.8566324257896545e-06, - "loss": 0.8981, + "learning_rate": 4.882830208898718e-06, + "loss": 0.8577, "step": 24027 }, { - "epoch": 0.681838819523269, + "epoch": 0.6808920626824223, "grad_norm": 0.0, - "learning_rate": 4.855844256911489e-06, - "loss": 0.858, + "learning_rate": 4.882041708060198e-06, + "loss": 0.8615, "step": 24028 }, { - "epoch": 0.6818671963677639, + "epoch": 0.6809204001246848, "grad_norm": 0.0, - "learning_rate": 4.85505613148624e-06, - "loss": 0.7618, + "learning_rate": 4.881253250332141e-06, + "loss": 0.8814, "step": 24029 }, { - "epoch": 0.6818955732122588, + "epoch": 0.6809487375669472, "grad_norm": 0.0, - "learning_rate": 4.854268049520565e-06, - "loss": 0.7906, + "learning_rate": 4.880464835721188e-06, + "loss": 0.8144, "step": 24030 }, { - "epoch": 0.6819239500567537, + "epoch": 0.6809770750092097, "grad_norm": 0.0, - "learning_rate": 4.853480011021116e-06, - "loss": 0.8678, + "learning_rate": 4.879676464233982e-06, + "loss": 0.9545, "step": 24031 }, { - "epoch": 0.6819523269012486, + "epoch": 0.6810054124514722, "grad_norm": 0.0, - "learning_rate": 4.8526920159945536e-06, - "loss": 0.747, + "learning_rate": 4.878888135877166e-06, + "loss": 0.7251, "step": 24032 }, { - "epoch": 0.6819807037457435, + "epoch": 0.6810337498937346, "grad_norm": 0.0, - "learning_rate": 4.851904064447531e-06, - "loss": 0.8899, + "learning_rate": 4.878099850657373e-06, + "loss": 0.9009, "step": 24033 }, { - "epoch": 0.6820090805902383, + "epoch": 0.681062087335997, "grad_norm": 0.0, - "learning_rate": 4.851116156386708e-06, - "loss": 0.7891, + "learning_rate": 4.877311608581246e-06, + "loss": 0.9192, "step": 24034 }, { - "epoch": 0.6820374574347332, + "epoch": 0.6810904247782595, "grad_norm": 0.0, - "learning_rate": 4.850328291818742e-06, - "loss": 0.8651, + "learning_rate": 4.87652340965543e-06, + "loss": 0.8029, "step": 24035 }, { - "epoch": 0.6820658342792282, + "epoch": 0.6811187622205219, "grad_norm": 0.0, - "learning_rate": 4.84954047075028e-06, - "loss": 0.836, + "learning_rate": 4.875735253886557e-06, + "loss": 0.8564, "step": 24036 }, { - "epoch": 0.682094211123723, + "epoch": 0.6811470996627844, "grad_norm": 0.0, - "learning_rate": 4.8487526931879815e-06, - "loss": 0.8702, + "learning_rate": 4.874947141281271e-06, + "loss": 0.7873, "step": 24037 }, { - "epoch": 0.6821225879682179, + "epoch": 0.6811754371050469, "grad_norm": 0.0, - "learning_rate": 4.847964959138504e-06, - "loss": 0.7987, + "learning_rate": 4.874159071846206e-06, + "loss": 0.83, "step": 24038 }, { - "epoch": 0.6821509648127129, + "epoch": 0.6812037745473094, "grad_norm": 0.0, - "learning_rate": 4.847177268608494e-06, - "loss": 0.8107, + "learning_rate": 4.873371045588002e-06, + "loss": 0.9092, "step": 24039 }, { - "epoch": 0.6821793416572077, + "epoch": 0.6812321119895718, "grad_norm": 0.0, - "learning_rate": 4.84638962160461e-06, - "loss": 0.8491, + "learning_rate": 4.872583062513301e-06, + "loss": 0.8048, "step": 24040 }, { - "epoch": 0.6822077185017026, + "epoch": 0.6812604494318343, "grad_norm": 0.0, - "learning_rate": 4.845602018133509e-06, - "loss": 0.9258, + "learning_rate": 4.871795122628733e-06, + "loss": 0.8793, "step": 24041 }, { - "epoch": 0.6822360953461976, + "epoch": 0.6812887868740968, "grad_norm": 0.0, - "learning_rate": 4.844814458201834e-06, - "loss": 0.8366, + "learning_rate": 4.87100722594094e-06, + "loss": 0.7502, "step": 24042 }, { - "epoch": 0.6822644721906924, + "epoch": 0.6813171243163592, "grad_norm": 0.0, - "learning_rate": 4.844026941816243e-06, - "loss": 0.7732, + "learning_rate": 4.8702193724565575e-06, + "loss": 0.8972, "step": 24043 }, { - "epoch": 0.6822928490351873, + "epoch": 0.6813454617586217, "grad_norm": 0.0, - "learning_rate": 4.843239468983394e-06, - "loss": 0.7964, + "learning_rate": 4.869431562182221e-06, + "loss": 0.893, "step": 24044 }, { - "epoch": 0.6823212258796821, + "epoch": 0.6813737992008841, "grad_norm": 0.0, - "learning_rate": 4.842452039709927e-06, - "loss": 0.7888, + "learning_rate": 4.868643795124572e-06, + "loss": 0.8245, "step": 24045 }, { - "epoch": 0.6823496027241771, + "epoch": 0.6814021366431466, "grad_norm": 0.0, - "learning_rate": 4.8416646540025e-06, - "loss": 0.8596, + "learning_rate": 4.8678560712902375e-06, + "loss": 0.8752, "step": 24046 }, { - "epoch": 0.682377979568672, + "epoch": 0.681430474085409, "grad_norm": 0.0, - "learning_rate": 4.840877311867768e-06, - "loss": 0.7705, + "learning_rate": 4.867068390685858e-06, + "loss": 0.8782, "step": 24047 }, { - "epoch": 0.6824063564131668, + "epoch": 0.6814588115276715, "grad_norm": 0.0, - "learning_rate": 4.8400900133123744e-06, - "loss": 0.7083, + "learning_rate": 4.866280753318071e-06, + "loss": 0.7127, "step": 24048 }, { - "epoch": 0.6824347332576618, + "epoch": 0.681487148969934, "grad_norm": 0.0, - "learning_rate": 4.839302758342971e-06, - "loss": 0.7454, + "learning_rate": 4.865493159193504e-06, + "loss": 0.8276, "step": 24049 }, { - "epoch": 0.6824631101021567, + "epoch": 0.6815154864121964, "grad_norm": 0.0, - "learning_rate": 4.838515546966209e-06, - "loss": 0.7754, + "learning_rate": 4.864705608318798e-06, + "loss": 0.9233, "step": 24050 }, { - "epoch": 0.6824914869466515, + "epoch": 0.6815438238544589, "grad_norm": 0.0, - "learning_rate": 4.837728379188738e-06, - "loss": 0.7735, + "learning_rate": 4.863918100700581e-06, + "loss": 0.7449, "step": 24051 }, { - "epoch": 0.6825198637911464, + "epoch": 0.6815721612967214, "grad_norm": 0.0, - "learning_rate": 4.836941255017212e-06, - "loss": 0.8694, + "learning_rate": 4.863130636345488e-06, + "loss": 0.8041, "step": 24052 }, { - "epoch": 0.6825482406356413, + "epoch": 0.6816004987389839, "grad_norm": 0.0, - "learning_rate": 4.8361541744582715e-06, - "loss": 0.7831, + "learning_rate": 4.862343215260157e-06, + "loss": 0.8294, "step": 24053 }, { - "epoch": 0.6825766174801362, + "epoch": 0.6816288361812463, "grad_norm": 0.0, - "learning_rate": 4.83536713751857e-06, - "loss": 0.8234, + "learning_rate": 4.861555837451213e-06, + "loss": 0.8535, "step": 24054 }, { - "epoch": 0.6826049943246311, + "epoch": 0.6816571736235087, "grad_norm": 0.0, - "learning_rate": 4.834580144204757e-06, - "loss": 0.8303, + "learning_rate": 4.8607685029252924e-06, + "loss": 0.8114, "step": 24055 }, { - "epoch": 0.682633371169126, + "epoch": 0.6816855110657712, "grad_norm": 0.0, - "learning_rate": 4.833793194523473e-06, - "loss": 0.709, + "learning_rate": 4.859981211689028e-06, + "loss": 0.9002, "step": 24056 }, { - "epoch": 0.6826617480136209, + "epoch": 0.6817138485080336, "grad_norm": 0.0, - "learning_rate": 4.8330062884813714e-06, - "loss": 0.9254, + "learning_rate": 4.859193963749049e-06, + "loss": 0.8227, "step": 24057 }, { - "epoch": 0.6826901248581158, + "epoch": 0.6817421859502961, "grad_norm": 0.0, - "learning_rate": 4.832219426085101e-06, - "loss": 0.8201, + "learning_rate": 4.858406759111993e-06, + "loss": 0.8291, "step": 24058 }, { - "epoch": 0.6827185017026107, + "epoch": 0.6817705233925586, "grad_norm": 0.0, - "learning_rate": 4.831432607341302e-06, - "loss": 0.8524, + "learning_rate": 4.8576195977844835e-06, + "loss": 0.9728, "step": 24059 }, { - "epoch": 0.6827468785471056, + "epoch": 0.681798860834821, "grad_norm": 0.0, - "learning_rate": 4.830645832256624e-06, - "loss": 0.7066, + "learning_rate": 4.856832479773152e-06, + "loss": 0.7981, "step": 24060 }, { - "epoch": 0.6827752553916004, + "epoch": 0.6818271982770835, "grad_norm": 0.0, - "learning_rate": 4.829859100837717e-06, - "loss": 0.8419, + "learning_rate": 4.856045405084634e-06, + "loss": 0.8779, "step": 24061 }, { - "epoch": 0.6828036322360953, + "epoch": 0.681855535719346, "grad_norm": 0.0, - "learning_rate": 4.829072413091219e-06, - "loss": 0.8118, + "learning_rate": 4.855258373725554e-06, + "loss": 0.7804, "step": 24062 }, { - "epoch": 0.6828320090805903, + "epoch": 0.6818838731616085, "grad_norm": 0.0, - "learning_rate": 4.828285769023778e-06, - "loss": 0.7042, + "learning_rate": 4.854471385702541e-06, + "loss": 0.8627, "step": 24063 }, { - "epoch": 0.6828603859250851, + "epoch": 0.6819122106038709, "grad_norm": 0.0, - "learning_rate": 4.82749916864204e-06, - "loss": 0.8531, + "learning_rate": 4.853684441022231e-06, + "loss": 0.8733, "step": 24064 }, { - "epoch": 0.68288876276958, + "epoch": 0.6819405480461334, "grad_norm": 0.0, - "learning_rate": 4.8267126119526495e-06, - "loss": 0.8695, + "learning_rate": 4.8528975396912435e-06, + "loss": 0.8572, "step": 24065 }, { - "epoch": 0.682917139614075, + "epoch": 0.6819688854883958, "grad_norm": 0.0, - "learning_rate": 4.82592609896225e-06, - "loss": 0.746, + "learning_rate": 4.852110681716215e-06, + "loss": 0.8568, "step": 24066 }, { - "epoch": 0.6829455164585698, + "epoch": 0.6819972229306582, "grad_norm": 0.0, - "learning_rate": 4.8251396296774886e-06, - "loss": 0.808, + "learning_rate": 4.8513238671037665e-06, + "loss": 0.8616, "step": 24067 }, { - "epoch": 0.6829738933030647, + "epoch": 0.6820255603729207, "grad_norm": 0.0, - "learning_rate": 4.8243532041050024e-06, - "loss": 0.8863, + "learning_rate": 4.85053709586053e-06, + "loss": 0.7588, "step": 24068 }, { - "epoch": 0.6830022701475595, + "epoch": 0.6820538978151832, "grad_norm": 0.0, - "learning_rate": 4.823566822251441e-06, - "loss": 0.8835, + "learning_rate": 4.84975036799313e-06, + "loss": 0.7938, "step": 24069 }, { - "epoch": 0.6830306469920545, + "epoch": 0.6820822352574457, "grad_norm": 0.0, - "learning_rate": 4.822780484123438e-06, - "loss": 0.8758, + "learning_rate": 4.848963683508196e-06, + "loss": 0.8779, "step": 24070 }, { - "epoch": 0.6830590238365494, + "epoch": 0.6821105726997081, "grad_norm": 0.0, - "learning_rate": 4.8219941897276425e-06, - "loss": 0.751, + "learning_rate": 4.8481770424123585e-06, + "loss": 0.9055, "step": 24071 }, { - "epoch": 0.6830874006810442, + "epoch": 0.6821389101419706, "grad_norm": 0.0, - "learning_rate": 4.821207939070699e-06, - "loss": 0.9182, + "learning_rate": 4.847390444712234e-06, + "loss": 0.8816, "step": 24072 }, { - "epoch": 0.6831157775255392, + "epoch": 0.6821672475842331, "grad_norm": 0.0, - "learning_rate": 4.82042173215924e-06, - "loss": 0.8255, + "learning_rate": 4.846603890414453e-06, + "loss": 0.7994, "step": 24073 }, { - "epoch": 0.6831441543700341, + "epoch": 0.6821955850264955, "grad_norm": 0.0, - "learning_rate": 4.819635568999912e-06, - "loss": 0.9062, + "learning_rate": 4.845817379525646e-06, + "loss": 0.8929, "step": 24074 }, { - "epoch": 0.6831725312145289, + "epoch": 0.682223922468758, "grad_norm": 0.0, - "learning_rate": 4.8188494495993584e-06, - "loss": 0.7957, + "learning_rate": 4.845030912052428e-06, + "loss": 0.8902, "step": 24075 }, { - "epoch": 0.6832009080590238, + "epoch": 0.6822522599110205, "grad_norm": 0.0, - "learning_rate": 4.818063373964214e-06, - "loss": 0.8438, + "learning_rate": 4.8442444880014295e-06, + "loss": 0.8871, "step": 24076 }, { - "epoch": 0.6832292849035188, + "epoch": 0.682280597353283, "grad_norm": 0.0, - "learning_rate": 4.81727734210112e-06, - "loss": 0.9398, + "learning_rate": 4.843458107379278e-06, + "loss": 0.9139, "step": 24077 }, { - "epoch": 0.6832576617480136, + "epoch": 0.6823089347955453, "grad_norm": 0.0, - "learning_rate": 4.8164913540167214e-06, - "loss": 0.8351, + "learning_rate": 4.84267177019259e-06, + "loss": 0.8502, "step": 24078 }, { - "epoch": 0.6832860385925085, + "epoch": 0.6823372722378078, "grad_norm": 0.0, - "learning_rate": 4.815705409717644e-06, - "loss": 0.8562, + "learning_rate": 4.841885476447996e-06, + "loss": 0.827, "step": 24079 }, { - "epoch": 0.6833144154370034, + "epoch": 0.6823656096800703, "grad_norm": 0.0, - "learning_rate": 4.8149195092105425e-06, - "loss": 0.7858, + "learning_rate": 4.841099226152113e-06, + "loss": 0.7851, "step": 24080 }, { - "epoch": 0.6833427922814983, + "epoch": 0.6823939471223327, "grad_norm": 0.0, - "learning_rate": 4.814133652502052e-06, - "loss": 0.8471, + "learning_rate": 4.840313019311567e-06, + "loss": 0.899, "step": 24081 }, { - "epoch": 0.6833711691259932, + "epoch": 0.6824222845645952, "grad_norm": 0.0, - "learning_rate": 4.813347839598805e-06, - "loss": 0.8566, + "learning_rate": 4.8395268559329785e-06, + "loss": 0.8804, "step": 24082 }, { - "epoch": 0.6833995459704881, + "epoch": 0.6824506220068577, "grad_norm": 0.0, - "learning_rate": 4.81256207050744e-06, - "loss": 0.7817, + "learning_rate": 4.838740736022974e-06, + "loss": 0.8065, "step": 24083 }, { - "epoch": 0.683427922814983, + "epoch": 0.6824789594491201, "grad_norm": 0.0, - "learning_rate": 4.8117763452346e-06, - "loss": 0.8525, + "learning_rate": 4.837954659588172e-06, + "loss": 0.855, "step": 24084 }, { - "epoch": 0.6834562996594779, + "epoch": 0.6825072968913826, "grad_norm": 0.0, - "learning_rate": 4.8109906637869155e-06, - "loss": 0.8942, + "learning_rate": 4.837168626635198e-06, + "loss": 0.8564, "step": 24085 }, { - "epoch": 0.6834846765039727, + "epoch": 0.6825356343336451, "grad_norm": 0.0, - "learning_rate": 4.8102050261710265e-06, - "loss": 0.8494, + "learning_rate": 4.8363826371706665e-06, + "loss": 0.7947, "step": 24086 }, { - "epoch": 0.6835130533484677, + "epoch": 0.6825639717759076, "grad_norm": 0.0, - "learning_rate": 4.8094194323935716e-06, - "loss": 0.849, + "learning_rate": 4.835596691201207e-06, + "loss": 0.8441, "step": 24087 }, { - "epoch": 0.6835414301929625, + "epoch": 0.6825923092181699, "grad_norm": 0.0, - "learning_rate": 4.808633882461181e-06, - "loss": 0.911, + "learning_rate": 4.8348107887334285e-06, + "loss": 0.8288, "step": 24088 }, { - "epoch": 0.6835698070374574, + "epoch": 0.6826206466604324, "grad_norm": 0.0, - "learning_rate": 4.807848376380494e-06, - "loss": 0.8474, + "learning_rate": 4.834024929773956e-06, + "loss": 0.7657, "step": 24089 }, { - "epoch": 0.6835981838819524, + "epoch": 0.6826489841026949, "grad_norm": 0.0, - "learning_rate": 4.807062914158148e-06, - "loss": 0.8189, + "learning_rate": 4.833239114329415e-06, + "loss": 0.8801, "step": 24090 }, { - "epoch": 0.6836265607264472, + "epoch": 0.6826773215449573, "grad_norm": 0.0, - "learning_rate": 4.806277495800772e-06, - "loss": 0.9261, + "learning_rate": 4.832453342406416e-06, + "loss": 0.8021, "step": 24091 }, { - "epoch": 0.6836549375709421, + "epoch": 0.6827056589872198, "grad_norm": 0.0, - "learning_rate": 4.805492121315003e-06, - "loss": 0.8639, + "learning_rate": 4.831667614011582e-06, + "loss": 0.8478, "step": 24092 }, { - "epoch": 0.683683314415437, + "epoch": 0.6827339964294823, "grad_norm": 0.0, - "learning_rate": 4.804706790707479e-06, - "loss": 0.8829, + "learning_rate": 4.830881929151533e-06, + "loss": 0.7739, "step": 24093 }, { - "epoch": 0.6837116912599319, + "epoch": 0.6827623338717448, "grad_norm": 0.0, - "learning_rate": 4.803921503984828e-06, - "loss": 0.807, + "learning_rate": 4.830096287832882e-06, + "loss": 0.7556, "step": 24094 }, { - "epoch": 0.6837400681044268, + "epoch": 0.6827906713140072, "grad_norm": 0.0, - "learning_rate": 4.803136261153684e-06, - "loss": 0.744, + "learning_rate": 4.82931069006225e-06, + "loss": 0.7685, "step": 24095 }, { - "epoch": 0.6837684449489216, + "epoch": 0.6828190087562697, "grad_norm": 0.0, - "learning_rate": 4.802351062220681e-06, - "loss": 0.8894, + "learning_rate": 4.8285251358462535e-06, + "loss": 0.839, "step": 24096 }, { - "epoch": 0.6837968217934166, + "epoch": 0.6828473461985322, "grad_norm": 0.0, - "learning_rate": 4.8015659071924535e-06, - "loss": 0.8966, + "learning_rate": 4.827739625191511e-06, + "loss": 0.7725, "step": 24097 }, { - "epoch": 0.6838251986379115, + "epoch": 0.6828756836407945, "grad_norm": 0.0, - "learning_rate": 4.800780796075637e-06, - "loss": 0.8887, + "learning_rate": 4.826954158104641e-06, + "loss": 0.9173, "step": 24098 }, { - "epoch": 0.6838535754824063, + "epoch": 0.682904021083057, "grad_norm": 0.0, - "learning_rate": 4.799995728876854e-06, - "loss": 0.8298, + "learning_rate": 4.826168734592254e-06, + "loss": 0.9032, "step": 24099 }, { - "epoch": 0.6838819523269013, + "epoch": 0.6829323585253195, "grad_norm": 0.0, - "learning_rate": 4.79921070560274e-06, - "loss": 0.96, + "learning_rate": 4.825383354660973e-06, + "loss": 0.7468, "step": 24100 }, { - "epoch": 0.6839103291713962, + "epoch": 0.682960695967582, "grad_norm": 0.0, - "learning_rate": 4.798425726259933e-06, - "loss": 0.8313, + "learning_rate": 4.824598018317406e-06, + "loss": 0.7773, "step": 24101 }, { - "epoch": 0.683938706015891, + "epoch": 0.6829890334098444, "grad_norm": 0.0, - "learning_rate": 4.797640790855053e-06, - "loss": 0.7635, + "learning_rate": 4.823812725568171e-06, + "loss": 0.7787, "step": 24102 }, { - "epoch": 0.6839670828603859, + "epoch": 0.6830173708521069, "grad_norm": 0.0, - "learning_rate": 4.796855899394734e-06, - "loss": 0.8326, + "learning_rate": 4.823027476419887e-06, + "loss": 0.7645, "step": 24103 }, { - "epoch": 0.6839954597048808, + "epoch": 0.6830457082943694, "grad_norm": 0.0, - "learning_rate": 4.796071051885611e-06, - "loss": 0.8119, + "learning_rate": 4.822242270879161e-06, + "loss": 0.7855, "step": 24104 }, { - "epoch": 0.6840238365493757, + "epoch": 0.6830740457366318, "grad_norm": 0.0, - "learning_rate": 4.7952862483343075e-06, - "loss": 0.8437, + "learning_rate": 4.821457108952613e-06, + "loss": 0.7197, "step": 24105 }, { - "epoch": 0.6840522133938706, + "epoch": 0.6831023831788943, "grad_norm": 0.0, - "learning_rate": 4.794501488747454e-06, - "loss": 0.8857, + "learning_rate": 4.820671990646857e-06, + "loss": 0.7884, "step": 24106 }, { - "epoch": 0.6840805902383655, + "epoch": 0.6831307206211568, "grad_norm": 0.0, - "learning_rate": 4.793716773131685e-06, - "loss": 0.8252, + "learning_rate": 4.819886915968501e-06, + "loss": 0.8858, "step": 24107 }, { - "epoch": 0.6841089670828604, + "epoch": 0.6831590580634191, "grad_norm": 0.0, - "learning_rate": 4.792932101493619e-06, - "loss": 0.8014, + "learning_rate": 4.819101884924161e-06, + "loss": 0.8335, "step": 24108 }, { - "epoch": 0.6841373439273553, + "epoch": 0.6831873955056816, "grad_norm": 0.0, - "learning_rate": 4.792147473839891e-06, - "loss": 0.8024, + "learning_rate": 4.81831689752045e-06, + "loss": 0.8596, "step": 24109 }, { - "epoch": 0.6841657207718501, + "epoch": 0.6832157329479441, "grad_norm": 0.0, - "learning_rate": 4.791362890177127e-06, - "loss": 0.8638, + "learning_rate": 4.817531953763979e-06, + "loss": 0.8699, "step": 24110 }, { - "epoch": 0.6841940976163451, + "epoch": 0.6832440703902066, "grad_norm": 0.0, - "learning_rate": 4.790578350511955e-06, - "loss": 0.6911, + "learning_rate": 4.8167470536613645e-06, + "loss": 0.8582, "step": 24111 }, { - "epoch": 0.68422247446084, + "epoch": 0.683272407832469, "grad_norm": 0.0, - "learning_rate": 4.789793854851006e-06, - "loss": 0.7966, + "learning_rate": 4.815962197219211e-06, + "loss": 0.9058, "step": 24112 }, { - "epoch": 0.6842508513053348, + "epoch": 0.6833007452747315, "grad_norm": 0.0, - "learning_rate": 4.789009403200898e-06, - "loss": 0.8753, + "learning_rate": 4.815177384444133e-06, + "loss": 0.7606, "step": 24113 }, { - "epoch": 0.6842792281498298, + "epoch": 0.683329082716994, "grad_norm": 0.0, - "learning_rate": 4.788224995568263e-06, - "loss": 0.7505, + "learning_rate": 4.814392615342746e-06, + "loss": 0.7985, "step": 24114 }, { - "epoch": 0.6843076049943246, + "epoch": 0.6833574201592564, "grad_norm": 0.0, - "learning_rate": 4.787440631959728e-06, - "loss": 0.9443, + "learning_rate": 4.813607889921651e-06, + "loss": 0.8734, "step": 24115 }, { - "epoch": 0.6843359818388195, + "epoch": 0.6833857576015189, "grad_norm": 0.0, - "learning_rate": 4.786656312381913e-06, - "loss": 0.7936, + "learning_rate": 4.8128232081874656e-06, + "loss": 0.7871, "step": 24116 }, { - "epoch": 0.6843643586833145, + "epoch": 0.6834140950437814, "grad_norm": 0.0, - "learning_rate": 4.785872036841447e-06, - "loss": 0.8493, + "learning_rate": 4.812038570146794e-06, + "loss": 0.7431, "step": 24117 }, { - "epoch": 0.6843927355278093, + "epoch": 0.6834424324860439, "grad_norm": 0.0, - "learning_rate": 4.7850878053449566e-06, - "loss": 0.8306, + "learning_rate": 4.811253975806247e-06, + "loss": 0.9622, "step": 24118 }, { - "epoch": 0.6844211123723042, + "epoch": 0.6834707699283062, "grad_norm": 0.0, - "learning_rate": 4.784303617899062e-06, - "loss": 0.8186, + "learning_rate": 4.810469425172439e-06, + "loss": 0.8524, "step": 24119 }, { - "epoch": 0.684449489216799, + "epoch": 0.6834991073705687, "grad_norm": 0.0, - "learning_rate": 4.783519474510388e-06, - "loss": 0.8595, + "learning_rate": 4.80968491825197e-06, + "loss": 0.7399, "step": 24120 }, { - "epoch": 0.684477866061294, + "epoch": 0.6835274448128312, "grad_norm": 0.0, - "learning_rate": 4.782735375185565e-06, - "loss": 0.9005, + "learning_rate": 4.8089004550514525e-06, + "loss": 0.9526, "step": 24121 }, { - "epoch": 0.6845062429057889, + "epoch": 0.6835557822550936, "grad_norm": 0.0, - "learning_rate": 4.781951319931205e-06, - "loss": 0.7891, + "learning_rate": 4.808116035577495e-06, + "loss": 0.773, "step": 24122 }, { - "epoch": 0.6845346197502837, + "epoch": 0.6835841196973561, "grad_norm": 0.0, - "learning_rate": 4.781167308753938e-06, - "loss": 0.8733, + "learning_rate": 4.807331659836703e-06, + "loss": 0.8172, "step": 24123 }, { - "epoch": 0.6845629965947787, + "epoch": 0.6836124571396186, "grad_norm": 0.0, - "learning_rate": 4.780383341660389e-06, - "loss": 0.7963, + "learning_rate": 4.8065473278356885e-06, + "loss": 0.9573, "step": 24124 }, { - "epoch": 0.6845913734392736, + "epoch": 0.683640794581881, "grad_norm": 0.0, - "learning_rate": 4.779599418657168e-06, - "loss": 0.788, + "learning_rate": 4.80576303958105e-06, + "loss": 0.7693, "step": 24125 }, { - "epoch": 0.6846197502837684, + "epoch": 0.6836691320241435, "grad_norm": 0.0, - "learning_rate": 4.778815539750913e-06, - "loss": 0.8387, + "learning_rate": 4.8049787950794e-06, + "loss": 0.8392, "step": 24126 }, { - "epoch": 0.6846481271282633, + "epoch": 0.683697469466406, "grad_norm": 0.0, - "learning_rate": 4.778031704948235e-06, - "loss": 0.8246, + "learning_rate": 4.8041945943373455e-06, + "loss": 0.9366, "step": 24127 }, { - "epoch": 0.6846765039727583, + "epoch": 0.6837258069086685, "grad_norm": 0.0, - "learning_rate": 4.777247914255757e-06, - "loss": 0.7695, + "learning_rate": 4.803410437361485e-06, + "loss": 0.7871, "step": 24128 }, { - "epoch": 0.6847048808172531, + "epoch": 0.6837541443509308, "grad_norm": 0.0, - "learning_rate": 4.776464167680106e-06, - "loss": 0.8148, + "learning_rate": 4.802626324158432e-06, + "loss": 0.8183, "step": 24129 }, { - "epoch": 0.684733257661748, + "epoch": 0.6837824817931933, "grad_norm": 0.0, - "learning_rate": 4.775680465227892e-06, - "loss": 0.8288, + "learning_rate": 4.8018422547347855e-06, + "loss": 0.8721, "step": 24130 }, { - "epoch": 0.6847616345062429, + "epoch": 0.6838108192354558, "grad_norm": 0.0, - "learning_rate": 4.77489680690574e-06, - "loss": 0.9236, + "learning_rate": 4.801058229097151e-06, + "loss": 0.9084, "step": 24131 }, { - "epoch": 0.6847900113507378, + "epoch": 0.6838391566777182, "grad_norm": 0.0, - "learning_rate": 4.774113192720273e-06, - "loss": 0.8598, + "learning_rate": 4.800274247252137e-06, + "loss": 0.7543, "step": 24132 }, { - "epoch": 0.6848183881952327, + "epoch": 0.6838674941199807, "grad_norm": 0.0, - "learning_rate": 4.773329622678105e-06, - "loss": 0.8577, + "learning_rate": 4.79949030920634e-06, + "loss": 0.8033, "step": 24133 }, { - "epoch": 0.6848467650397276, + "epoch": 0.6838958315622432, "grad_norm": 0.0, - "learning_rate": 4.772546096785854e-06, - "loss": 0.8792, + "learning_rate": 4.798706414966367e-06, + "loss": 0.9229, "step": 24134 }, { - "epoch": 0.6848751418842225, + "epoch": 0.6839241690045057, "grad_norm": 0.0, - "learning_rate": 4.771762615050146e-06, - "loss": 0.891, + "learning_rate": 4.797922564538822e-06, + "loss": 0.7639, "step": 24135 }, { - "epoch": 0.6849035187287174, + "epoch": 0.6839525064467681, "grad_norm": 0.0, - "learning_rate": 4.77097917747759e-06, - "loss": 0.8637, + "learning_rate": 4.7971387579303065e-06, + "loss": 0.9434, "step": 24136 }, { - "epoch": 0.6849318955732122, + "epoch": 0.6839808438890306, "grad_norm": 0.0, - "learning_rate": 4.7701957840748096e-06, - "loss": 0.8525, + "learning_rate": 4.796354995147428e-06, + "loss": 0.7726, "step": 24137 }, { - "epoch": 0.6849602724177072, + "epoch": 0.6840091813312931, "grad_norm": 0.0, - "learning_rate": 4.7694124348484225e-06, - "loss": 0.7862, + "learning_rate": 4.795571276196779e-06, + "loss": 0.839, "step": 24138 }, { - "epoch": 0.684988649262202, + "epoch": 0.6840375187735555, "grad_norm": 0.0, - "learning_rate": 4.768629129805041e-06, - "loss": 0.9348, + "learning_rate": 4.7947876010849655e-06, + "loss": 0.9142, "step": 24139 }, { - "epoch": 0.6850170261066969, + "epoch": 0.684065856215818, "grad_norm": 0.0, - "learning_rate": 4.767845868951284e-06, - "loss": 0.9711, + "learning_rate": 4.7940039698185935e-06, + "loss": 0.8753, "step": 24140 }, { - "epoch": 0.6850454029511919, + "epoch": 0.6840941936580804, "grad_norm": 0.0, - "learning_rate": 4.767062652293768e-06, - "loss": 0.9073, + "learning_rate": 4.7932203824042555e-06, + "loss": 0.8285, "step": 24141 }, { - "epoch": 0.6850737797956867, + "epoch": 0.6841225311003429, "grad_norm": 0.0, - "learning_rate": 4.766279479839109e-06, - "loss": 0.9061, + "learning_rate": 4.792436838848555e-06, + "loss": 0.8478, "step": 24142 }, { - "epoch": 0.6851021566401816, + "epoch": 0.6841508685426053, "grad_norm": 0.0, - "learning_rate": 4.765496351593927e-06, - "loss": 0.7869, + "learning_rate": 4.7916533391580975e-06, + "loss": 0.8353, "step": 24143 }, { - "epoch": 0.6851305334846765, + "epoch": 0.6841792059848678, "grad_norm": 0.0, - "learning_rate": 4.764713267564828e-06, - "loss": 0.8046, + "learning_rate": 4.790869883339473e-06, + "loss": 0.8615, "step": 24144 }, { - "epoch": 0.6851589103291714, + "epoch": 0.6842075434271303, "grad_norm": 0.0, - "learning_rate": 4.763930227758431e-06, - "loss": 0.8285, + "learning_rate": 4.790086471399287e-06, + "loss": 0.8832, "step": 24145 }, { - "epoch": 0.6851872871736663, + "epoch": 0.6842358808693927, "grad_norm": 0.0, - "learning_rate": 4.763147232181355e-06, - "loss": 0.7836, + "learning_rate": 4.789303103344138e-06, + "loss": 0.8529, "step": 24146 }, { - "epoch": 0.6852156640181611, + "epoch": 0.6842642183116552, "grad_norm": 0.0, - "learning_rate": 4.762364280840207e-06, - "loss": 0.882, + "learning_rate": 4.7885197791806245e-06, + "loss": 0.8456, "step": 24147 }, { - "epoch": 0.6852440408626561, + "epoch": 0.6842925557539177, "grad_norm": 0.0, - "learning_rate": 4.7615813737416014e-06, - "loss": 0.8145, + "learning_rate": 4.787736498915343e-06, + "loss": 0.9223, "step": 24148 }, { - "epoch": 0.685272417707151, + "epoch": 0.6843208931961801, "grad_norm": 0.0, - "learning_rate": 4.76079851089216e-06, - "loss": 0.8415, + "learning_rate": 4.786953262554892e-06, + "loss": 0.7055, "step": 24149 }, { - "epoch": 0.6853007945516458, + "epoch": 0.6843492306384426, "grad_norm": 0.0, - "learning_rate": 4.760015692298483e-06, - "loss": 0.8046, + "learning_rate": 4.78617007010587e-06, + "loss": 0.8262, "step": 24150 }, { - "epoch": 0.6853291713961408, + "epoch": 0.684377568080705, "grad_norm": 0.0, - "learning_rate": 4.75923291796719e-06, - "loss": 0.8024, + "learning_rate": 4.7853869215748764e-06, + "loss": 0.7906, "step": 24151 }, { - "epoch": 0.6853575482406357, + "epoch": 0.6844059055229675, "grad_norm": 0.0, - "learning_rate": 4.758450187904895e-06, - "loss": 0.8682, + "learning_rate": 4.784603816968502e-06, + "loss": 0.7743, "step": 24152 }, { - "epoch": 0.6853859250851305, + "epoch": 0.6844342429652299, "grad_norm": 0.0, - "learning_rate": 4.757667502118203e-06, - "loss": 0.8312, + "learning_rate": 4.783820756293349e-06, + "loss": 0.8898, "step": 24153 }, { - "epoch": 0.6854143019296254, + "epoch": 0.6844625804074924, "grad_norm": 0.0, - "learning_rate": 4.7568848606137294e-06, - "loss": 0.8454, + "learning_rate": 4.783037739556008e-06, + "loss": 0.8356, "step": 24154 }, { - "epoch": 0.6854426787741204, + "epoch": 0.6844909178497549, "grad_norm": 0.0, - "learning_rate": 4.756102263398091e-06, - "loss": 0.8774, + "learning_rate": 4.782254766763078e-06, + "loss": 0.7936, "step": 24155 }, { - "epoch": 0.6854710556186152, + "epoch": 0.6845192552920173, "grad_norm": 0.0, - "learning_rate": 4.755319710477882e-06, - "loss": 0.8788, + "learning_rate": 4.781471837921157e-06, + "loss": 0.8603, "step": 24156 }, { - "epoch": 0.6854994324631101, + "epoch": 0.6845475927342798, "grad_norm": 0.0, - "learning_rate": 4.754537201859732e-06, - "loss": 0.8079, + "learning_rate": 4.780688953036831e-06, + "loss": 0.8816, "step": 24157 }, { - "epoch": 0.685527809307605, + "epoch": 0.6845759301765423, "grad_norm": 0.0, - "learning_rate": 4.753754737550239e-06, - "loss": 0.8917, + "learning_rate": 4.779906112116702e-06, + "loss": 0.8677, "step": 24158 }, { - "epoch": 0.6855561861520999, + "epoch": 0.6846042676188048, "grad_norm": 0.0, - "learning_rate": 4.752972317556015e-06, - "loss": 0.8587, + "learning_rate": 4.779123315167362e-06, + "loss": 0.7912, "step": 24159 }, { - "epoch": 0.6855845629965948, + "epoch": 0.6846326050610672, "grad_norm": 0.0, - "learning_rate": 4.752189941883673e-06, - "loss": 0.9322, + "learning_rate": 4.778340562195405e-06, + "loss": 0.9097, "step": 24160 }, { - "epoch": 0.6856129398410896, + "epoch": 0.6846609425033297, "grad_norm": 0.0, - "learning_rate": 4.751407610539815e-06, - "loss": 0.7917, + "learning_rate": 4.7775578532074275e-06, + "loss": 0.8354, "step": 24161 }, { - "epoch": 0.6856413166855846, + "epoch": 0.6846892799455921, "grad_norm": 0.0, - "learning_rate": 4.750625323531053e-06, - "loss": 0.9932, + "learning_rate": 4.776775188210017e-06, + "loss": 0.8957, "step": 24162 }, { - "epoch": 0.6856696935300794, + "epoch": 0.6847176173878545, "grad_norm": 0.0, - "learning_rate": 4.749843080863998e-06, - "loss": 0.7599, + "learning_rate": 4.775992567209767e-06, + "loss": 0.8668, "step": 24163 }, { - "epoch": 0.6856980703745743, + "epoch": 0.684745954830117, "grad_norm": 0.0, - "learning_rate": 4.749060882545251e-06, - "loss": 0.859, + "learning_rate": 4.775209990213277e-06, + "loss": 0.82, "step": 24164 }, { - "epoch": 0.6857264472190693, + "epoch": 0.6847742922723795, "grad_norm": 0.0, - "learning_rate": 4.748278728581424e-06, - "loss": 0.7824, + "learning_rate": 4.774427457227129e-06, + "loss": 0.7543, "step": 24165 }, { - "epoch": 0.6857548240635641, + "epoch": 0.684802629714642, "grad_norm": 0.0, - "learning_rate": 4.747496618979125e-06, - "loss": 0.7673, + "learning_rate": 4.773644968257922e-06, + "loss": 0.9684, "step": 24166 }, { - "epoch": 0.685783200908059, + "epoch": 0.6848309671569044, "grad_norm": 0.0, - "learning_rate": 4.746714553744956e-06, - "loss": 0.8741, + "learning_rate": 4.772862523312242e-06, + "loss": 0.7618, "step": 24167 }, { - "epoch": 0.685811577752554, + "epoch": 0.6848593045991669, "grad_norm": 0.0, - "learning_rate": 4.745932532885523e-06, - "loss": 0.8219, + "learning_rate": 4.772080122396681e-06, + "loss": 1.0272, "step": 24168 }, { - "epoch": 0.6858399545970488, + "epoch": 0.6848876420414294, "grad_norm": 0.0, - "learning_rate": 4.7451505564074395e-06, - "loss": 0.8402, + "learning_rate": 4.771297765517834e-06, + "loss": 0.8102, "step": 24169 }, { - "epoch": 0.6858683314415437, + "epoch": 0.6849159794836918, "grad_norm": 0.0, - "learning_rate": 4.7443686243173015e-06, - "loss": 0.8679, + "learning_rate": 4.770515452682284e-06, + "loss": 0.823, "step": 24170 }, { - "epoch": 0.6858967082860385, + "epoch": 0.6849443169259543, "grad_norm": 0.0, - "learning_rate": 4.743586736621714e-06, - "loss": 0.7867, + "learning_rate": 4.769733183896624e-06, + "loss": 0.9285, "step": 24171 }, { - "epoch": 0.6859250851305335, + "epoch": 0.6849726543682167, "grad_norm": 0.0, - "learning_rate": 4.742804893327293e-06, - "loss": 0.8337, + "learning_rate": 4.768950959167444e-06, + "loss": 0.758, "step": 24172 }, { - "epoch": 0.6859534619750284, + "epoch": 0.6850009918104791, "grad_norm": 0.0, - "learning_rate": 4.7420230944406306e-06, - "loss": 0.9249, + "learning_rate": 4.768168778501333e-06, + "loss": 0.7692, "step": 24173 }, { - "epoch": 0.6859818388195232, + "epoch": 0.6850293292527416, "grad_norm": 0.0, - "learning_rate": 4.741241339968338e-06, - "loss": 0.807, + "learning_rate": 4.767386641904883e-06, + "loss": 0.8163, "step": 24174 }, { - "epoch": 0.6860102156640182, + "epoch": 0.6850576666950041, "grad_norm": 0.0, - "learning_rate": 4.740459629917018e-06, - "loss": 0.8282, + "learning_rate": 4.766604549384674e-06, + "loss": 0.7888, "step": 24175 }, { - "epoch": 0.6860385925085131, + "epoch": 0.6850860041372666, "grad_norm": 0.0, - "learning_rate": 4.739677964293269e-06, - "loss": 0.8905, + "learning_rate": 4.765822500947298e-06, + "loss": 0.9234, "step": 24176 }, { - "epoch": 0.6860669693530079, + "epoch": 0.685114341579529, "grad_norm": 0.0, - "learning_rate": 4.7388963431037e-06, - "loss": 0.9263, + "learning_rate": 4.765040496599347e-06, + "loss": 0.8261, "step": 24177 }, { - "epoch": 0.6860953461975028, + "epoch": 0.6851426790217915, "grad_norm": 0.0, - "learning_rate": 4.738114766354907e-06, - "loss": 0.775, + "learning_rate": 4.7642585363474e-06, + "loss": 0.855, "step": 24178 }, { - "epoch": 0.6861237230419978, + "epoch": 0.685171016464054, "grad_norm": 0.0, - "learning_rate": 4.737333234053494e-06, - "loss": 0.685, + "learning_rate": 4.763476620198048e-06, + "loss": 0.7923, "step": 24179 }, { - "epoch": 0.6861520998864926, + "epoch": 0.6851993539063164, "grad_norm": 0.0, - "learning_rate": 4.7365517462060685e-06, - "loss": 0.7821, + "learning_rate": 4.76269474815788e-06, + "loss": 0.8832, "step": 24180 }, { - "epoch": 0.6861804767309875, + "epoch": 0.6852276913485789, "grad_norm": 0.0, - "learning_rate": 4.735770302819223e-06, - "loss": 0.9268, + "learning_rate": 4.761912920233476e-06, + "loss": 0.8601, "step": 24181 }, { - "epoch": 0.6862088535754824, + "epoch": 0.6852560287908414, "grad_norm": 0.0, - "learning_rate": 4.734988903899562e-06, - "loss": 0.8595, + "learning_rate": 4.761131136431427e-06, + "loss": 0.8006, "step": 24182 }, { - "epoch": 0.6862372304199773, + "epoch": 0.6852843662331038, "grad_norm": 0.0, - "learning_rate": 4.734207549453691e-06, - "loss": 0.9205, + "learning_rate": 4.760349396758314e-06, + "loss": 0.8379, "step": 24183 }, { - "epoch": 0.6862656072644722, + "epoch": 0.6853127036753662, "grad_norm": 0.0, - "learning_rate": 4.733426239488201e-06, - "loss": 0.7666, + "learning_rate": 4.759567701220722e-06, + "loss": 0.798, "step": 24184 }, { - "epoch": 0.6862939841089671, + "epoch": 0.6853410411176287, "grad_norm": 0.0, - "learning_rate": 4.732644974009697e-06, - "loss": 0.7633, + "learning_rate": 4.758786049825238e-06, + "loss": 0.7586, "step": 24185 }, { - "epoch": 0.686322360953462, + "epoch": 0.6853693785598912, "grad_norm": 0.0, - "learning_rate": 4.7318637530247805e-06, - "loss": 0.6691, + "learning_rate": 4.758004442578445e-06, + "loss": 0.8079, "step": 24186 }, { - "epoch": 0.6863507377979569, + "epoch": 0.6853977160021536, "grad_norm": 0.0, - "learning_rate": 4.731082576540042e-06, - "loss": 0.8706, + "learning_rate": 4.757222879486931e-06, + "loss": 0.9299, "step": 24187 }, { - "epoch": 0.6863791146424517, + "epoch": 0.6854260534444161, "grad_norm": 0.0, - "learning_rate": 4.730301444562088e-06, - "loss": 0.8469, + "learning_rate": 4.756441360557272e-06, + "loss": 0.8266, "step": 24188 }, { - "epoch": 0.6864074914869467, + "epoch": 0.6854543908866786, "grad_norm": 0.0, - "learning_rate": 4.729520357097518e-06, - "loss": 0.8353, + "learning_rate": 4.755659885796054e-06, + "loss": 0.7807, "step": 24189 }, { - "epoch": 0.6864358683314415, + "epoch": 0.6854827283289411, "grad_norm": 0.0, - "learning_rate": 4.7287393141529236e-06, - "loss": 0.839, + "learning_rate": 4.754878455209866e-06, + "loss": 0.8491, "step": 24190 }, { - "epoch": 0.6864642451759364, + "epoch": 0.6855110657712035, "grad_norm": 0.0, - "learning_rate": 4.727958315734904e-06, - "loss": 0.8948, + "learning_rate": 4.754097068805279e-06, + "loss": 0.7752, "step": 24191 }, { - "epoch": 0.6864926220204314, + "epoch": 0.685539403213466, "grad_norm": 0.0, - "learning_rate": 4.7271773618500625e-06, - "loss": 0.8422, + "learning_rate": 4.7533157265888806e-06, + "loss": 0.8579, "step": 24192 }, { - "epoch": 0.6865209988649262, + "epoch": 0.6855677406557285, "grad_norm": 0.0, - "learning_rate": 4.726396452504986e-06, - "loss": 0.8532, + "learning_rate": 4.752534428567256e-06, + "loss": 0.7679, "step": 24193 }, { - "epoch": 0.6865493757094211, + "epoch": 0.6855960780979908, "grad_norm": 0.0, - "learning_rate": 4.725615587706278e-06, - "loss": 0.7395, + "learning_rate": 4.7517531747469795e-06, + "loss": 0.662, "step": 24194 }, { - "epoch": 0.686577752553916, + "epoch": 0.6856244155402533, "grad_norm": 0.0, - "learning_rate": 4.724834767460534e-06, - "loss": 0.872, + "learning_rate": 4.750971965134637e-06, + "loss": 0.9403, "step": 24195 }, { - "epoch": 0.6866061293984109, + "epoch": 0.6856527529825158, "grad_norm": 0.0, - "learning_rate": 4.724053991774345e-06, - "loss": 0.7767, + "learning_rate": 4.7501907997368035e-06, + "loss": 0.966, "step": 24196 }, { - "epoch": 0.6866345062429058, + "epoch": 0.6856810904247782, "grad_norm": 0.0, - "learning_rate": 4.7232732606543085e-06, - "loss": 0.833, + "learning_rate": 4.749409678560063e-06, + "loss": 0.8289, "step": 24197 }, { - "epoch": 0.6866628830874006, + "epoch": 0.6857094278670407, "grad_norm": 0.0, - "learning_rate": 4.722492574107024e-06, - "loss": 0.8548, + "learning_rate": 4.748628601610995e-06, + "loss": 0.8668, "step": 24198 }, { - "epoch": 0.6866912599318956, + "epoch": 0.6857377653093032, "grad_norm": 0.0, - "learning_rate": 4.721711932139078e-06, - "loss": 0.8533, + "learning_rate": 4.747847568896178e-06, + "loss": 0.8804, "step": 24199 }, { - "epoch": 0.6867196367763905, + "epoch": 0.6857661027515657, "grad_norm": 0.0, - "learning_rate": 4.720931334757068e-06, - "loss": 0.8865, + "learning_rate": 4.74706658042219e-06, + "loss": 0.8676, "step": 24200 }, { - "epoch": 0.6867480136208853, + "epoch": 0.6857944401938281, "grad_norm": 0.0, - "learning_rate": 4.720150781967594e-06, - "loss": 0.8557, + "learning_rate": 4.746285636195615e-06, + "loss": 0.854, "step": 24201 }, { - "epoch": 0.6867763904653803, + "epoch": 0.6858227776360906, "grad_norm": 0.0, - "learning_rate": 4.719370273777235e-06, - "loss": 0.8924, + "learning_rate": 4.7455047362230246e-06, + "loss": 0.7765, "step": 24202 }, { - "epoch": 0.6868047673098752, + "epoch": 0.6858511150783531, "grad_norm": 0.0, - "learning_rate": 4.7185898101926e-06, - "loss": 0.7809, + "learning_rate": 4.744723880511002e-06, + "loss": 0.7293, "step": 24203 }, { - "epoch": 0.68683314415437, + "epoch": 0.6858794525206154, "grad_norm": 0.0, - "learning_rate": 4.717809391220271e-06, - "loss": 0.8109, + "learning_rate": 4.743943069066118e-06, + "loss": 0.9052, "step": 24204 }, { - "epoch": 0.6868615209988649, + "epoch": 0.6859077899628779, "grad_norm": 0.0, - "learning_rate": 4.7170290168668435e-06, - "loss": 0.9051, + "learning_rate": 4.743162301894952e-06, + "loss": 0.9845, "step": 24205 }, { - "epoch": 0.6868898978433599, + "epoch": 0.6859361274051404, "grad_norm": 0.0, - "learning_rate": 4.7162486871389125e-06, - "loss": 0.8057, + "learning_rate": 4.7423815790040885e-06, + "loss": 0.8587, "step": 24206 }, { - "epoch": 0.6869182746878547, + "epoch": 0.6859644648474029, "grad_norm": 0.0, - "learning_rate": 4.715468402043063e-06, - "loss": 0.9261, + "learning_rate": 4.741600900400092e-06, + "loss": 0.7735, "step": 24207 }, { - "epoch": 0.6869466515323496, + "epoch": 0.6859928022896653, "grad_norm": 0.0, - "learning_rate": 4.71468816158589e-06, - "loss": 0.8485, + "learning_rate": 4.740820266089547e-06, + "loss": 0.7731, "step": 24208 }, { - "epoch": 0.6869750283768445, + "epoch": 0.6860211397319278, "grad_norm": 0.0, - "learning_rate": 4.713907965773986e-06, - "loss": 0.8708, + "learning_rate": 4.740039676079022e-06, + "loss": 0.8626, "step": 24209 }, { - "epoch": 0.6870034052213394, + "epoch": 0.6860494771741903, "grad_norm": 0.0, - "learning_rate": 4.7131278146139355e-06, - "loss": 0.8546, + "learning_rate": 4.739259130375097e-06, + "loss": 0.8629, "step": 24210 }, { - "epoch": 0.6870317820658343, + "epoch": 0.6860778146164527, "grad_norm": 0.0, - "learning_rate": 4.712347708112334e-06, - "loss": 0.867, + "learning_rate": 4.738478628984345e-06, + "loss": 0.8017, "step": 24211 }, { - "epoch": 0.6870601589103291, + "epoch": 0.6861061520587152, "grad_norm": 0.0, - "learning_rate": 4.711567646275771e-06, - "loss": 0.773, + "learning_rate": 4.737698171913343e-06, + "loss": 0.8099, "step": 24212 }, { - "epoch": 0.6870885357548241, + "epoch": 0.6861344895009777, "grad_norm": 0.0, - "learning_rate": 4.7107876291108315e-06, - "loss": 0.8275, + "learning_rate": 4.736917759168662e-06, + "loss": 0.7354, "step": 24213 }, { - "epoch": 0.687116912599319, + "epoch": 0.6861628269432402, "grad_norm": 0.0, - "learning_rate": 4.7100076566241045e-06, - "loss": 0.8342, + "learning_rate": 4.7361373907568804e-06, + "loss": 0.842, "step": 24214 }, { - "epoch": 0.6871452894438138, + "epoch": 0.6861911643855025, "grad_norm": 0.0, - "learning_rate": 4.7092277288221865e-06, - "loss": 0.69, + "learning_rate": 4.7353570666845664e-06, + "loss": 0.8198, "step": 24215 }, { - "epoch": 0.6871736662883088, + "epoch": 0.686219501827765, "grad_norm": 0.0, - "learning_rate": 4.7084478457116545e-06, - "loss": 0.9053, + "learning_rate": 4.734576786958297e-06, + "loss": 0.8415, "step": 24216 }, { - "epoch": 0.6872020431328036, + "epoch": 0.6862478392700275, "grad_norm": 0.0, - "learning_rate": 4.707668007299102e-06, - "loss": 0.8405, + "learning_rate": 4.7337965515846384e-06, + "loss": 0.8094, "step": 24217 }, { - "epoch": 0.6872304199772985, + "epoch": 0.6862761767122899, "grad_norm": 0.0, - "learning_rate": 4.7068882135911165e-06, - "loss": 0.7963, + "learning_rate": 4.733016360570169e-06, + "loss": 0.7919, "step": 24218 }, { - "epoch": 0.6872587968217935, + "epoch": 0.6863045141545524, "grad_norm": 0.0, - "learning_rate": 4.706108464594283e-06, - "loss": 0.8799, + "learning_rate": 4.73223621392146e-06, + "loss": 0.8598, "step": 24219 }, { - "epoch": 0.6872871736662883, + "epoch": 0.6863328515968149, "grad_norm": 0.0, - "learning_rate": 4.7053287603151935e-06, - "loss": 0.7707, + "learning_rate": 4.73145611164508e-06, + "loss": 0.7272, "step": 24220 }, { - "epoch": 0.6873155505107832, + "epoch": 0.6863611890390773, "grad_norm": 0.0, - "learning_rate": 4.704549100760426e-06, - "loss": 0.7807, + "learning_rate": 4.7306760537476e-06, + "loss": 0.9652, "step": 24221 }, { - "epoch": 0.687343927355278, + "epoch": 0.6863895264813398, "grad_norm": 0.0, - "learning_rate": 4.703769485936571e-06, - "loss": 0.8336, + "learning_rate": 4.7298960402355966e-06, + "loss": 0.7898, "step": 24222 }, { - "epoch": 0.687372304199773, + "epoch": 0.6864178639236023, "grad_norm": 0.0, - "learning_rate": 4.702989915850217e-06, - "loss": 0.8181, + "learning_rate": 4.729116071115632e-06, + "loss": 0.8241, "step": 24223 }, { - "epoch": 0.6874006810442679, + "epoch": 0.6864462013658648, "grad_norm": 0.0, - "learning_rate": 4.7022103905079405e-06, - "loss": 0.8651, + "learning_rate": 4.72833614639428e-06, + "loss": 0.8371, "step": 24224 }, { - "epoch": 0.6874290578887627, + "epoch": 0.6864745388081271, "grad_norm": 0.0, - "learning_rate": 4.701430909916331e-06, - "loss": 0.868, + "learning_rate": 4.727556266078111e-06, + "loss": 0.7949, "step": 24225 }, { - "epoch": 0.6874574347332577, + "epoch": 0.6865028762503896, "grad_norm": 0.0, - "learning_rate": 4.700651474081977e-06, - "loss": 0.7698, + "learning_rate": 4.726776430173693e-06, + "loss": 0.8602, "step": 24226 }, { - "epoch": 0.6874858115777526, + "epoch": 0.6865312136926521, "grad_norm": 0.0, - "learning_rate": 4.6998720830114554e-06, - "loss": 0.8431, + "learning_rate": 4.7259966386875985e-06, + "loss": 0.9072, "step": 24227 }, { - "epoch": 0.6875141884222474, + "epoch": 0.6865595511349145, "grad_norm": 0.0, - "learning_rate": 4.699092736711351e-06, - "loss": 0.7903, + "learning_rate": 4.72521689162639e-06, + "loss": 0.859, "step": 24228 }, { - "epoch": 0.6875425652667423, + "epoch": 0.686587888577177, "grad_norm": 0.0, - "learning_rate": 4.698313435188254e-06, - "loss": 0.7959, + "learning_rate": 4.7244371889966374e-06, + "loss": 0.8314, "step": 24229 }, { - "epoch": 0.6875709421112373, + "epoch": 0.6866162260194395, "grad_norm": 0.0, - "learning_rate": 4.697534178448737e-06, - "loss": 0.8242, + "learning_rate": 4.7236575308049135e-06, + "loss": 0.7849, "step": 24230 }, { - "epoch": 0.6875993189557321, + "epoch": 0.686644563461702, "grad_norm": 0.0, - "learning_rate": 4.696754966499387e-06, - "loss": 0.828, + "learning_rate": 4.722877917057777e-06, + "loss": 0.8805, "step": 24231 }, { - "epoch": 0.687627695800227, + "epoch": 0.6866729009039644, "grad_norm": 0.0, - "learning_rate": 4.69597579934679e-06, - "loss": 0.9081, + "learning_rate": 4.722098347761805e-06, + "loss": 0.7892, "step": 24232 }, { - "epoch": 0.687656072644722, + "epoch": 0.6867012383462269, "grad_norm": 0.0, - "learning_rate": 4.695196676997517e-06, - "loss": 0.8763, + "learning_rate": 4.721318822923553e-06, + "loss": 0.8146, "step": 24233 }, { - "epoch": 0.6876844494892168, + "epoch": 0.6867295757884894, "grad_norm": 0.0, - "learning_rate": 4.694417599458163e-06, - "loss": 0.894, + "learning_rate": 4.720539342549594e-06, + "loss": 0.8144, "step": 24234 }, { - "epoch": 0.6877128263337117, + "epoch": 0.6867579132307517, "grad_norm": 0.0, - "learning_rate": 4.693638566735298e-06, - "loss": 0.7652, + "learning_rate": 4.719759906646496e-06, + "loss": 0.782, "step": 24235 }, { - "epoch": 0.6877412031782065, + "epoch": 0.6867862506730142, "grad_norm": 0.0, - "learning_rate": 4.692859578835507e-06, - "loss": 0.7284, + "learning_rate": 4.718980515220817e-06, + "loss": 0.8105, "step": 24236 }, { - "epoch": 0.6877695800227015, + "epoch": 0.6868145881152767, "grad_norm": 0.0, - "learning_rate": 4.6920806357653736e-06, - "loss": 0.8068, + "learning_rate": 4.718201168279126e-06, + "loss": 0.7726, "step": 24237 }, { - "epoch": 0.6877979568671964, + "epoch": 0.6868429255575392, "grad_norm": 0.0, - "learning_rate": 4.691301737531469e-06, - "loss": 0.9149, + "learning_rate": 4.717421865827988e-06, + "loss": 0.8595, "step": 24238 }, { - "epoch": 0.6878263337116912, + "epoch": 0.6868712629998016, "grad_norm": 0.0, - "learning_rate": 4.690522884140379e-06, - "loss": 0.7652, + "learning_rate": 4.716642607873968e-06, + "loss": 0.8409, "step": 24239 }, { - "epoch": 0.6878547105561862, + "epoch": 0.6868996004420641, "grad_norm": 0.0, - "learning_rate": 4.689744075598684e-06, - "loss": 0.7539, + "learning_rate": 4.715863394423632e-06, + "loss": 0.9597, "step": 24240 }, { - "epoch": 0.687883087400681, + "epoch": 0.6869279378843266, "grad_norm": 0.0, - "learning_rate": 4.688965311912955e-06, - "loss": 0.8197, + "learning_rate": 4.715084225483538e-06, + "loss": 0.841, "step": 24241 }, { - "epoch": 0.6879114642451759, + "epoch": 0.686956275326589, "grad_norm": 0.0, - "learning_rate": 4.6881865930897756e-06, - "loss": 0.8546, + "learning_rate": 4.714305101060252e-06, + "loss": 0.7516, "step": 24242 }, { - "epoch": 0.6879398410896709, + "epoch": 0.6869846127688515, "grad_norm": 0.0, - "learning_rate": 4.687407919135726e-06, - "loss": 0.7869, + "learning_rate": 4.713526021160339e-06, + "loss": 0.9067, "step": 24243 }, { - "epoch": 0.6879682179341657, + "epoch": 0.687012950211114, "grad_norm": 0.0, - "learning_rate": 4.686629290057377e-06, - "loss": 0.7508, + "learning_rate": 4.712746985790357e-06, + "loss": 0.889, "step": 24244 }, { - "epoch": 0.6879965947786606, + "epoch": 0.6870412876533764, "grad_norm": 0.0, - "learning_rate": 4.685850705861309e-06, - "loss": 0.7769, + "learning_rate": 4.711967994956875e-06, + "loss": 0.894, "step": 24245 }, { - "epoch": 0.6880249716231555, + "epoch": 0.6870696250956388, "grad_norm": 0.0, - "learning_rate": 4.685072166554102e-06, - "loss": 0.7557, + "learning_rate": 4.7111890486664455e-06, + "loss": 0.8953, "step": 24246 }, { - "epoch": 0.6880533484676504, + "epoch": 0.6870979625379013, "grad_norm": 0.0, - "learning_rate": 4.684293672142327e-06, - "loss": 0.8953, + "learning_rate": 4.710410146925635e-06, + "loss": 0.9018, "step": 24247 }, { - "epoch": 0.6880817253121453, + "epoch": 0.6871262999801638, "grad_norm": 0.0, - "learning_rate": 4.683515222632562e-06, - "loss": 0.8795, + "learning_rate": 4.709631289741008e-06, + "loss": 0.9211, "step": 24248 }, { - "epoch": 0.6881101021566401, + "epoch": 0.6871546374224262, "grad_norm": 0.0, - "learning_rate": 4.682736818031382e-06, - "loss": 0.8963, + "learning_rate": 4.708852477119117e-06, + "loss": 0.8577, "step": 24249 }, { - "epoch": 0.6881384790011351, + "epoch": 0.6871829748646887, "grad_norm": 0.0, - "learning_rate": 4.681958458345365e-06, - "loss": 0.8152, + "learning_rate": 4.708073709066526e-06, + "loss": 0.8003, "step": 24250 }, { - "epoch": 0.68816685584563, + "epoch": 0.6872113123069512, "grad_norm": 0.0, - "learning_rate": 4.681180143581086e-06, - "loss": 0.8012, + "learning_rate": 4.707294985589796e-06, + "loss": 0.8696, "step": 24251 }, { - "epoch": 0.6881952326901248, + "epoch": 0.6872396497492136, "grad_norm": 0.0, - "learning_rate": 4.680401873745114e-06, - "loss": 0.7954, + "learning_rate": 4.7065163066954854e-06, + "loss": 0.8381, "step": 24252 }, { - "epoch": 0.6882236095346197, + "epoch": 0.6872679871914761, "grad_norm": 0.0, - "learning_rate": 4.679623648844027e-06, - "loss": 0.7565, + "learning_rate": 4.705737672390159e-06, + "loss": 0.6476, "step": 24253 }, { - "epoch": 0.6882519863791147, + "epoch": 0.6872963246337386, "grad_norm": 0.0, - "learning_rate": 4.678845468884402e-06, - "loss": 0.8512, + "learning_rate": 4.704959082680363e-06, + "loss": 0.7766, "step": 24254 }, { - "epoch": 0.6882803632236095, + "epoch": 0.6873246620760011, "grad_norm": 0.0, - "learning_rate": 4.678067333872804e-06, - "loss": 0.9457, + "learning_rate": 4.704180537572666e-06, + "loss": 0.8715, "step": 24255 }, { - "epoch": 0.6883087400681044, + "epoch": 0.6873529995182635, "grad_norm": 0.0, - "learning_rate": 4.6772892438158115e-06, - "loss": 0.838, + "learning_rate": 4.703402037073624e-06, + "loss": 0.7944, "step": 24256 }, { - "epoch": 0.6883371169125994, + "epoch": 0.687381336960526, "grad_norm": 0.0, - "learning_rate": 4.6765111987199985e-06, - "loss": 0.9105, + "learning_rate": 4.7026235811897925e-06, + "loss": 0.764, "step": 24257 }, { - "epoch": 0.6883654937570942, + "epoch": 0.6874096744027884, "grad_norm": 0.0, - "learning_rate": 4.6757331985919315e-06, - "loss": 0.8977, + "learning_rate": 4.7018451699277275e-06, + "loss": 0.8207, "step": 24258 }, { - "epoch": 0.6883938706015891, + "epoch": 0.6874380118450508, "grad_norm": 0.0, - "learning_rate": 4.674955243438186e-06, - "loss": 0.9675, + "learning_rate": 4.701066803293993e-06, + "loss": 0.8804, "step": 24259 }, { - "epoch": 0.688422247446084, + "epoch": 0.6874663492873133, "grad_norm": 0.0, - "learning_rate": 4.674177333265336e-06, - "loss": 0.8146, + "learning_rate": 4.7002884812951365e-06, + "loss": 0.8894, "step": 24260 }, { - "epoch": 0.6884506242905789, + "epoch": 0.6874946867295758, "grad_norm": 0.0, - "learning_rate": 4.6733994680799466e-06, - "loss": 0.7207, + "learning_rate": 4.699510203937722e-06, + "loss": 0.7827, "step": 24261 }, { - "epoch": 0.6884790011350738, + "epoch": 0.6875230241718383, "grad_norm": 0.0, - "learning_rate": 4.672621647888591e-06, - "loss": 0.8395, + "learning_rate": 4.698731971228298e-06, + "loss": 0.7539, "step": 24262 }, { - "epoch": 0.6885073779795686, + "epoch": 0.6875513616141007, "grad_norm": 0.0, - "learning_rate": 4.67184387269784e-06, - "loss": 0.8878, + "learning_rate": 4.697953783173423e-06, + "loss": 0.8484, "step": 24263 }, { - "epoch": 0.6885357548240636, + "epoch": 0.6875796990563632, "grad_norm": 0.0, - "learning_rate": 4.671066142514262e-06, - "loss": 0.873, + "learning_rate": 4.6971756397796506e-06, + "loss": 0.8604, "step": 24264 }, { - "epoch": 0.6885641316685585, + "epoch": 0.6876080364986257, "grad_norm": 0.0, - "learning_rate": 4.670288457344433e-06, - "loss": 0.7715, + "learning_rate": 4.6963975410535375e-06, + "loss": 0.9099, "step": 24265 }, { - "epoch": 0.6885925085130533, + "epoch": 0.6876363739408881, "grad_norm": 0.0, - "learning_rate": 4.669510817194913e-06, - "loss": 0.8116, + "learning_rate": 4.695619487001643e-06, + "loss": 0.7751, "step": 24266 }, { - "epoch": 0.6886208853575483, + "epoch": 0.6876647113831506, "grad_norm": 0.0, - "learning_rate": 4.668733222072275e-06, - "loss": 0.8907, + "learning_rate": 4.694841477630509e-06, + "loss": 0.8027, "step": 24267 }, { - "epoch": 0.6886492622020431, + "epoch": 0.687693048825413, "grad_norm": 0.0, - "learning_rate": 4.66795567198309e-06, - "loss": 0.8536, + "learning_rate": 4.694063512946697e-06, + "loss": 0.8697, "step": 24268 }, { - "epoch": 0.688677639046538, + "epoch": 0.6877213862676754, "grad_norm": 0.0, - "learning_rate": 4.66717816693392e-06, - "loss": 0.8836, + "learning_rate": 4.693285592956761e-06, + "loss": 0.7953, "step": 24269 }, { - "epoch": 0.6887060158910329, + "epoch": 0.6877497237099379, "grad_norm": 0.0, - "learning_rate": 4.666400706931335e-06, - "loss": 0.8797, + "learning_rate": 4.692507717667249e-06, + "loss": 0.8117, "step": 24270 }, { - "epoch": 0.6887343927355278, + "epoch": 0.6877780611522004, "grad_norm": 0.0, - "learning_rate": 4.6656232919819074e-06, - "loss": 0.7445, + "learning_rate": 4.6917298870847135e-06, + "loss": 0.9424, "step": 24271 }, { - "epoch": 0.6887627695800227, + "epoch": 0.6878063985944629, "grad_norm": 0.0, - "learning_rate": 4.664845922092196e-06, - "loss": 0.7738, + "learning_rate": 4.690952101215713e-06, + "loss": 0.8283, "step": 24272 }, { - "epoch": 0.6887911464245176, + "epoch": 0.6878347360367253, "grad_norm": 0.0, - "learning_rate": 4.664068597268771e-06, - "loss": 0.8442, + "learning_rate": 4.69017436006679e-06, + "loss": 0.9224, "step": 24273 }, { - "epoch": 0.6888195232690125, + "epoch": 0.6878630734789878, "grad_norm": 0.0, - "learning_rate": 4.663291317518202e-06, - "loss": 0.831, + "learning_rate": 4.6893966636445055e-06, + "loss": 0.7808, "step": 24274 }, { - "epoch": 0.6888479001135074, + "epoch": 0.6878914109212503, "grad_norm": 0.0, - "learning_rate": 4.662514082847047e-06, - "loss": 0.8378, + "learning_rate": 4.6886190119554e-06, + "loss": 0.7868, "step": 24275 }, { - "epoch": 0.6888762769580022, + "epoch": 0.6879197483635127, "grad_norm": 0.0, - "learning_rate": 4.661736893261876e-06, - "loss": 0.8158, + "learning_rate": 4.687841405006029e-06, + "loss": 0.839, "step": 24276 }, { - "epoch": 0.6889046538024972, + "epoch": 0.6879480858057752, "grad_norm": 0.0, - "learning_rate": 4.660959748769257e-06, - "loss": 0.8974, + "learning_rate": 4.687063842802943e-06, + "loss": 0.8636, "step": 24277 }, { - "epoch": 0.6889330306469921, + "epoch": 0.6879764232480376, "grad_norm": 0.0, - "learning_rate": 4.660182649375747e-06, - "loss": 0.9133, + "learning_rate": 4.686286325352689e-06, + "loss": 0.8336, "step": 24278 }, { - "epoch": 0.6889614074914869, + "epoch": 0.6880047606903001, "grad_norm": 0.0, - "learning_rate": 4.65940559508791e-06, - "loss": 0.8252, + "learning_rate": 4.6855088526618205e-06, + "loss": 0.8952, "step": 24279 }, { - "epoch": 0.6889897843359818, + "epoch": 0.6880330981325625, "grad_norm": 0.0, - "learning_rate": 4.658628585912323e-06, - "loss": 0.8643, + "learning_rate": 4.684731424736888e-06, + "loss": 0.835, "step": 24280 }, { - "epoch": 0.6890181611804768, + "epoch": 0.688061435574825, "grad_norm": 0.0, - "learning_rate": 4.657851621855536e-06, - "loss": 0.9136, + "learning_rate": 4.683954041584432e-06, + "loss": 0.752, "step": 24281 }, { - "epoch": 0.6890465380249716, + "epoch": 0.6880897730170875, "grad_norm": 0.0, - "learning_rate": 4.657074702924116e-06, - "loss": 0.7288, + "learning_rate": 4.68317670321101e-06, + "loss": 0.8727, "step": 24282 }, { - "epoch": 0.6890749148694665, + "epoch": 0.6881181104593499, "grad_norm": 0.0, - "learning_rate": 4.656297829124631e-06, - "loss": 0.8925, + "learning_rate": 4.682399409623161e-06, + "loss": 0.7572, "step": 24283 }, { - "epoch": 0.6891032917139615, + "epoch": 0.6881464479016124, "grad_norm": 0.0, - "learning_rate": 4.655521000463633e-06, - "loss": 0.8541, + "learning_rate": 4.681622160827436e-06, + "loss": 0.9151, "step": 24284 }, { - "epoch": 0.6891316685584563, + "epoch": 0.6881747853438749, "grad_norm": 0.0, - "learning_rate": 4.654744216947695e-06, - "loss": 0.8833, + "learning_rate": 4.680844956830386e-06, + "loss": 0.841, "step": 24285 }, { - "epoch": 0.6891600454029512, + "epoch": 0.6882031227861374, "grad_norm": 0.0, - "learning_rate": 4.6539674785833675e-06, - "loss": 0.8349, + "learning_rate": 4.68006779763855e-06, + "loss": 0.8044, "step": 24286 }, { - "epoch": 0.689188422247446, + "epoch": 0.6882314602283998, "grad_norm": 0.0, - "learning_rate": 4.653190785377218e-06, - "loss": 0.7977, + "learning_rate": 4.679290683258479e-06, + "loss": 0.8169, "step": 24287 }, { - "epoch": 0.689216799091941, + "epoch": 0.6882597976706623, "grad_norm": 0.0, - "learning_rate": 4.65241413733581e-06, - "loss": 0.7903, + "learning_rate": 4.678513613696724e-06, + "loss": 0.8112, "step": 24288 }, { - "epoch": 0.6892451759364359, + "epoch": 0.6882881351129247, "grad_norm": 0.0, - "learning_rate": 4.651637534465696e-06, - "loss": 0.8264, + "learning_rate": 4.677736588959818e-06, + "loss": 0.7622, "step": 24289 }, { - "epoch": 0.6892735527809307, + "epoch": 0.6883164725551871, "grad_norm": 0.0, - "learning_rate": 4.650860976773442e-06, - "loss": 0.8895, + "learning_rate": 4.676959609054315e-06, + "loss": 0.8423, "step": 24290 }, { - "epoch": 0.6893019296254257, + "epoch": 0.6883448099974496, "grad_norm": 0.0, - "learning_rate": 4.650084464265608e-06, - "loss": 0.7936, + "learning_rate": 4.676182673986757e-06, + "loss": 0.7739, "step": 24291 }, { - "epoch": 0.6893303064699206, + "epoch": 0.6883731474397121, "grad_norm": 0.0, - "learning_rate": 4.649307996948747e-06, - "loss": 0.7474, + "learning_rate": 4.6754057837636905e-06, + "loss": 0.7945, "step": 24292 }, { - "epoch": 0.6893586833144154, + "epoch": 0.6884014848819745, "grad_norm": 0.0, - "learning_rate": 4.6485315748294215e-06, - "loss": 0.8392, + "learning_rate": 4.674628938391661e-06, + "loss": 0.8766, "step": 24293 }, { - "epoch": 0.6893870601589104, + "epoch": 0.688429822324237, "grad_norm": 0.0, - "learning_rate": 4.6477551979141914e-06, - "loss": 0.8096, + "learning_rate": 4.6738521378772066e-06, + "loss": 0.8645, "step": 24294 }, { - "epoch": 0.6894154370034052, + "epoch": 0.6884581597664995, "grad_norm": 0.0, - "learning_rate": 4.646978866209613e-06, - "loss": 0.7904, + "learning_rate": 4.673075382226876e-06, + "loss": 0.8612, "step": 24295 }, { - "epoch": 0.6894438138479001, + "epoch": 0.688486497208762, "grad_norm": 0.0, - "learning_rate": 4.6462025797222445e-06, - "loss": 0.8667, + "learning_rate": 4.672298671447206e-06, + "loss": 0.788, "step": 24296 }, { - "epoch": 0.689472190692395, + "epoch": 0.6885148346510244, "grad_norm": 0.0, - "learning_rate": 4.645426338458648e-06, - "loss": 0.8227, + "learning_rate": 4.671522005544743e-06, + "loss": 0.7174, "step": 24297 }, { - "epoch": 0.6895005675368899, + "epoch": 0.6885431720932869, "grad_norm": 0.0, - "learning_rate": 4.644650142425372e-06, - "loss": 0.8597, + "learning_rate": 4.670745384526033e-06, + "loss": 0.8892, "step": 24298 }, { - "epoch": 0.6895289443813848, + "epoch": 0.6885715095355494, "grad_norm": 0.0, - "learning_rate": 4.643873991628977e-06, - "loss": 0.8835, + "learning_rate": 4.669968808397609e-06, + "loss": 0.7827, "step": 24299 }, { - "epoch": 0.6895573212258796, + "epoch": 0.6885998469778117, "grad_norm": 0.0, - "learning_rate": 4.6430978860760236e-06, - "loss": 0.7703, + "learning_rate": 4.669192277166018e-06, + "loss": 0.885, "step": 24300 }, { - "epoch": 0.6895856980703746, + "epoch": 0.6886281844200742, "grad_norm": 0.0, - "learning_rate": 4.6423218257730585e-06, - "loss": 0.7694, + "learning_rate": 4.668415790837804e-06, + "loss": 0.8758, "step": 24301 }, { - "epoch": 0.6896140749148695, + "epoch": 0.6886565218623367, "grad_norm": 0.0, - "learning_rate": 4.641545810726642e-06, - "loss": 0.7766, + "learning_rate": 4.6676393494194985e-06, + "loss": 0.8857, "step": 24302 }, { - "epoch": 0.6896424517593643, + "epoch": 0.6886848593045992, "grad_norm": 0.0, - "learning_rate": 4.6407698409433325e-06, - "loss": 0.7259, + "learning_rate": 4.666862952917647e-06, + "loss": 0.7411, "step": 24303 }, { - "epoch": 0.6896708286038592, + "epoch": 0.6887131967468616, "grad_norm": 0.0, - "learning_rate": 4.639993916429677e-06, - "loss": 0.9052, + "learning_rate": 4.6660866013387896e-06, + "loss": 0.8948, "step": 24304 }, { - "epoch": 0.6896992054483542, + "epoch": 0.6887415341891241, "grad_norm": 0.0, - "learning_rate": 4.639218037192235e-06, - "loss": 0.8891, + "learning_rate": 4.665310294689466e-06, + "loss": 0.9198, "step": 24305 }, { - "epoch": 0.689727582292849, + "epoch": 0.6887698716313866, "grad_norm": 0.0, - "learning_rate": 4.638442203237562e-06, - "loss": 0.8902, + "learning_rate": 4.664534032976218e-06, + "loss": 0.8653, "step": 24306 }, { - "epoch": 0.6897559591373439, + "epoch": 0.688798209073649, "grad_norm": 0.0, - "learning_rate": 4.637666414572205e-06, - "loss": 0.9772, + "learning_rate": 4.663757816205577e-06, + "loss": 0.8152, "step": 24307 }, { - "epoch": 0.6897843359818389, + "epoch": 0.6888265465159115, "grad_norm": 0.0, - "learning_rate": 4.636890671202725e-06, - "loss": 0.8117, + "learning_rate": 4.662981644384087e-06, + "loss": 0.8855, "step": 24308 }, { - "epoch": 0.6898127128263337, + "epoch": 0.688854883958174, "grad_norm": 0.0, - "learning_rate": 4.636114973135663e-06, - "loss": 0.8992, + "learning_rate": 4.662205517518286e-06, + "loss": 0.8686, "step": 24309 }, { - "epoch": 0.6898410896708286, + "epoch": 0.6888832214004365, "grad_norm": 0.0, - "learning_rate": 4.635339320377582e-06, - "loss": 0.7975, + "learning_rate": 4.661429435614708e-06, + "loss": 0.795, "step": 24310 }, { - "epoch": 0.6898694665153235, + "epoch": 0.6889115588426988, "grad_norm": 0.0, - "learning_rate": 4.634563712935036e-06, - "loss": 0.8696, + "learning_rate": 4.660653398679896e-06, + "loss": 0.8072, "step": 24311 }, { - "epoch": 0.6898978433598184, + "epoch": 0.6889398962849613, "grad_norm": 0.0, - "learning_rate": 4.633788150814566e-06, - "loss": 0.6868, + "learning_rate": 4.659877406720379e-06, + "loss": 0.8971, "step": 24312 }, { - "epoch": 0.6899262202043133, + "epoch": 0.6889682337272238, "grad_norm": 0.0, - "learning_rate": 4.633012634022731e-06, - "loss": 0.8413, + "learning_rate": 4.6591014597426974e-06, + "loss": 0.875, "step": 24313 }, { - "epoch": 0.6899545970488081, + "epoch": 0.6889965711694862, "grad_norm": 0.0, - "learning_rate": 4.6322371625660825e-06, - "loss": 0.8156, + "learning_rate": 4.658325557753391e-06, + "loss": 0.9238, "step": 24314 }, { - "epoch": 0.6899829738933031, + "epoch": 0.6890249086117487, "grad_norm": 0.0, - "learning_rate": 4.6314617364511625e-06, - "loss": 0.9067, + "learning_rate": 4.657549700758989e-06, + "loss": 0.8309, "step": 24315 }, { - "epoch": 0.690011350737798, + "epoch": 0.6890532460540112, "grad_norm": 0.0, - "learning_rate": 4.630686355684528e-06, - "loss": 0.8559, + "learning_rate": 4.65677388876603e-06, + "loss": 0.8571, "step": 24316 }, { - "epoch": 0.6900397275822928, + "epoch": 0.6890815834962736, "grad_norm": 0.0, - "learning_rate": 4.629911020272731e-06, - "loss": 0.7659, + "learning_rate": 4.655998121781048e-06, + "loss": 0.9662, "step": 24317 }, { - "epoch": 0.6900681044267878, + "epoch": 0.6891099209385361, "grad_norm": 0.0, - "learning_rate": 4.629135730222314e-06, - "loss": 0.9131, + "learning_rate": 4.655222399810579e-06, + "loss": 0.763, "step": 24318 }, { - "epoch": 0.6900964812712826, + "epoch": 0.6891382583807986, "grad_norm": 0.0, - "learning_rate": 4.628360485539828e-06, - "loss": 0.9184, + "learning_rate": 4.654446722861159e-06, + "loss": 0.8693, "step": 24319 }, { - "epoch": 0.6901248581157775, + "epoch": 0.6891665958230611, "grad_norm": 0.0, - "learning_rate": 4.627585286231826e-06, - "loss": 0.8614, + "learning_rate": 4.6536710909393155e-06, + "loss": 0.771, "step": 24320 }, { - "epoch": 0.6901532349602724, + "epoch": 0.6891949332653234, "grad_norm": 0.0, - "learning_rate": 4.626810132304848e-06, - "loss": 0.8427, + "learning_rate": 4.652895504051587e-06, + "loss": 0.8216, "step": 24321 }, { - "epoch": 0.6901816118047673, + "epoch": 0.6892232707075859, "grad_norm": 0.0, - "learning_rate": 4.626035023765448e-06, - "loss": 0.7521, + "learning_rate": 4.652119962204508e-06, + "loss": 0.8935, "step": 24322 }, { - "epoch": 0.6902099886492622, + "epoch": 0.6892516081498484, "grad_norm": 0.0, - "learning_rate": 4.6252599606201755e-06, - "loss": 0.7317, + "learning_rate": 4.6513444654046044e-06, + "loss": 0.8814, "step": 24323 }, { - "epoch": 0.6902383654937571, + "epoch": 0.6892799455921108, "grad_norm": 0.0, - "learning_rate": 4.624484942875569e-06, - "loss": 0.8704, + "learning_rate": 4.650569013658417e-06, + "loss": 0.7324, "step": 24324 }, { - "epoch": 0.690266742338252, + "epoch": 0.6893082830343733, "grad_norm": 0.0, - "learning_rate": 4.623709970538181e-06, - "loss": 0.9418, + "learning_rate": 4.64979360697247e-06, + "loss": 0.8114, "step": 24325 }, { - "epoch": 0.6902951191827469, + "epoch": 0.6893366204766358, "grad_norm": 0.0, - "learning_rate": 4.622935043614555e-06, - "loss": 0.7611, + "learning_rate": 4.649018245353297e-06, + "loss": 0.7992, "step": 24326 }, { - "epoch": 0.6903234960272417, + "epoch": 0.6893649579188983, "grad_norm": 0.0, - "learning_rate": 4.622160162111239e-06, - "loss": 0.9066, + "learning_rate": 4.648242928807435e-06, + "loss": 0.8498, "step": 24327 }, { - "epoch": 0.6903518728717367, + "epoch": 0.6893932953611607, "grad_norm": 0.0, - "learning_rate": 4.6213853260347816e-06, - "loss": 0.8243, + "learning_rate": 4.647467657341407e-06, + "loss": 0.7241, "step": 24328 }, { - "epoch": 0.6903802497162316, + "epoch": 0.6894216328034232, "grad_norm": 0.0, - "learning_rate": 4.620610535391721e-06, - "loss": 0.7875, + "learning_rate": 4.646692430961745e-06, + "loss": 0.8326, "step": 24329 }, { - "epoch": 0.6904086265607264, + "epoch": 0.6894499702456857, "grad_norm": 0.0, - "learning_rate": 4.619835790188605e-06, - "loss": 0.7596, + "learning_rate": 4.645917249674982e-06, + "loss": 0.9173, "step": 24330 }, { - "epoch": 0.6904370034052213, + "epoch": 0.689478307687948, "grad_norm": 0.0, - "learning_rate": 4.619061090431981e-06, - "loss": 0.7799, + "learning_rate": 4.645142113487645e-06, + "loss": 0.7828, "step": 24331 }, { - "epoch": 0.6904653802497163, + "epoch": 0.6895066451302105, "grad_norm": 0.0, - "learning_rate": 4.618286436128386e-06, - "loss": 0.8789, + "learning_rate": 4.644367022406268e-06, + "loss": 0.8623, "step": 24332 }, { - "epoch": 0.6904937570942111, + "epoch": 0.689534982572473, "grad_norm": 0.0, - "learning_rate": 4.617511827284368e-06, - "loss": 0.8221, + "learning_rate": 4.6435919764373735e-06, + "loss": 0.8642, "step": 24333 }, { - "epoch": 0.690522133938706, + "epoch": 0.6895633200147354, "grad_norm": 0.0, - "learning_rate": 4.616737263906473e-06, - "loss": 0.8486, + "learning_rate": 4.642816975587493e-06, + "loss": 0.8818, "step": 24334 }, { - "epoch": 0.690550510783201, + "epoch": 0.6895916574569979, "grad_norm": 0.0, - "learning_rate": 4.615962746001237e-06, - "loss": 0.8449, + "learning_rate": 4.642042019863158e-06, + "loss": 0.7787, "step": 24335 }, { - "epoch": 0.6905788876276958, + "epoch": 0.6896199948992604, "grad_norm": 0.0, - "learning_rate": 4.615188273575205e-06, - "loss": 0.7157, + "learning_rate": 4.641267109270889e-06, + "loss": 0.7617, "step": 24336 }, { - "epoch": 0.6906072644721907, + "epoch": 0.6896483323415229, "grad_norm": 0.0, - "learning_rate": 4.614413846634924e-06, - "loss": 0.7895, + "learning_rate": 4.640492243817216e-06, + "loss": 0.8826, "step": 24337 }, { - "epoch": 0.6906356413166855, + "epoch": 0.6896766697837853, "grad_norm": 0.0, - "learning_rate": 4.613639465186928e-06, - "loss": 0.9937, + "learning_rate": 4.639717423508672e-06, + "loss": 0.8513, "step": 24338 }, { - "epoch": 0.6906640181611805, + "epoch": 0.6897050072260478, "grad_norm": 0.0, - "learning_rate": 4.61286512923776e-06, - "loss": 0.8522, + "learning_rate": 4.638942648351774e-06, + "loss": 0.7795, "step": 24339 }, { - "epoch": 0.6906923950056754, + "epoch": 0.6897333446683103, "grad_norm": 0.0, - "learning_rate": 4.612090838793964e-06, - "loss": 0.8907, + "learning_rate": 4.638167918353057e-06, + "loss": 0.8754, "step": 24340 }, { - "epoch": 0.6907207718501702, + "epoch": 0.6897616821105726, "grad_norm": 0.0, - "learning_rate": 4.6113165938620785e-06, - "loss": 0.7831, + "learning_rate": 4.637393233519038e-06, + "loss": 0.7512, "step": 24341 }, { - "epoch": 0.6907491486946652, + "epoch": 0.6897900195528351, "grad_norm": 0.0, - "learning_rate": 4.6105423944486475e-06, - "loss": 0.8131, + "learning_rate": 4.636618593856249e-06, + "loss": 0.7931, "step": 24342 }, { - "epoch": 0.69077752553916, + "epoch": 0.6898183569950976, "grad_norm": 0.0, - "learning_rate": 4.609768240560204e-06, - "loss": 0.7277, + "learning_rate": 4.635843999371212e-06, + "loss": 0.952, "step": 24343 }, { - "epoch": 0.6908059023836549, + "epoch": 0.6898466944373601, "grad_norm": 0.0, - "learning_rate": 4.608994132203289e-06, - "loss": 0.8275, + "learning_rate": 4.635069450070453e-06, + "loss": 0.9039, "step": 24344 }, { - "epoch": 0.6908342792281499, + "epoch": 0.6898750318796225, "grad_norm": 0.0, - "learning_rate": 4.608220069384448e-06, - "loss": 0.8874, + "learning_rate": 4.634294945960497e-06, + "loss": 0.8186, "step": 24345 }, { - "epoch": 0.6908626560726447, + "epoch": 0.689903369321885, "grad_norm": 0.0, - "learning_rate": 4.607446052110211e-06, - "loss": 0.6726, + "learning_rate": 4.63352048704787e-06, + "loss": 0.7511, "step": 24346 }, { - "epoch": 0.6908910329171396, + "epoch": 0.6899317067641475, "grad_norm": 0.0, - "learning_rate": 4.606672080387118e-06, - "loss": 0.7673, + "learning_rate": 4.63274607333909e-06, + "loss": 0.9048, "step": 24347 }, { - "epoch": 0.6909194097616345, + "epoch": 0.6899600442064099, "grad_norm": 0.0, - "learning_rate": 4.605898154221713e-06, - "loss": 0.7489, + "learning_rate": 4.631971704840685e-06, + "loss": 0.841, "step": 24348 }, { - "epoch": 0.6909477866061294, + "epoch": 0.6899883816486724, "grad_norm": 0.0, - "learning_rate": 4.605124273620526e-06, - "loss": 0.7696, + "learning_rate": 4.631197381559173e-06, + "loss": 0.7938, "step": 24349 }, { - "epoch": 0.6909761634506243, + "epoch": 0.6900167190909349, "grad_norm": 0.0, - "learning_rate": 4.604350438590095e-06, - "loss": 0.8122, + "learning_rate": 4.6304231035010795e-06, + "loss": 0.8701, "step": 24350 }, { - "epoch": 0.6910045402951192, + "epoch": 0.6900450565331974, "grad_norm": 0.0, - "learning_rate": 4.603576649136964e-06, - "loss": 0.7394, + "learning_rate": 4.6296488706729306e-06, + "loss": 0.9123, "step": 24351 }, { - "epoch": 0.6910329171396141, + "epoch": 0.6900733939754597, "grad_norm": 0.0, - "learning_rate": 4.602802905267658e-06, - "loss": 0.7427, + "learning_rate": 4.6288746830812385e-06, + "loss": 0.8026, "step": 24352 }, { - "epoch": 0.691061293984109, + "epoch": 0.6901017314177222, "grad_norm": 0.0, - "learning_rate": 4.60202920698872e-06, - "loss": 0.9177, + "learning_rate": 4.628100540732533e-06, + "loss": 0.7885, "step": 24353 }, { - "epoch": 0.6910896708286038, + "epoch": 0.6901300688599847, "grad_norm": 0.0, - "learning_rate": 4.601255554306686e-06, - "loss": 0.8435, + "learning_rate": 4.627326443633327e-06, + "loss": 0.783, "step": 24354 }, { - "epoch": 0.6911180476730987, + "epoch": 0.6901584063022471, "grad_norm": 0.0, - "learning_rate": 4.600481947228084e-06, - "loss": 0.8192, + "learning_rate": 4.6265523917901476e-06, + "loss": 0.8809, "step": 24355 }, { - "epoch": 0.6911464245175937, + "epoch": 0.6901867437445096, "grad_norm": 0.0, - "learning_rate": 4.5997083857594595e-06, - "loss": 0.8119, + "learning_rate": 4.6257783852095116e-06, + "loss": 0.8126, "step": 24356 }, { - "epoch": 0.6911748013620885, + "epoch": 0.6902150811867721, "grad_norm": 0.0, - "learning_rate": 4.598934869907337e-06, - "loss": 0.7984, + "learning_rate": 4.62500442389794e-06, + "loss": 0.8472, "step": 24357 }, { - "epoch": 0.6912031782065834, + "epoch": 0.6902434186290345, "grad_norm": 0.0, - "learning_rate": 4.5981613996782546e-06, - "loss": 0.7974, + "learning_rate": 4.624230507861952e-06, + "loss": 0.8799, "step": 24358 }, { - "epoch": 0.6912315550510784, + "epoch": 0.690271756071297, "grad_norm": 0.0, - "learning_rate": 4.597387975078751e-06, - "loss": 0.8704, + "learning_rate": 4.62345663710807e-06, + "loss": 0.8579, "step": 24359 }, { - "epoch": 0.6912599318955732, + "epoch": 0.6903000935135595, "grad_norm": 0.0, - "learning_rate": 4.596614596115348e-06, - "loss": 0.8401, + "learning_rate": 4.622682811642807e-06, + "loss": 0.7657, "step": 24360 }, { - "epoch": 0.6912883087400681, + "epoch": 0.690328430955822, "grad_norm": 0.0, - "learning_rate": 4.595841262794586e-06, - "loss": 0.9305, + "learning_rate": 4.621909031472687e-06, + "loss": 0.8133, "step": 24361 }, { - "epoch": 0.691316685584563, + "epoch": 0.6903567683980844, "grad_norm": 0.0, - "learning_rate": 4.595067975122998e-06, - "loss": 0.8496, + "learning_rate": 4.621135296604219e-06, + "loss": 0.857, "step": 24362 }, { - "epoch": 0.6913450624290579, + "epoch": 0.6903851058403468, "grad_norm": 0.0, - "learning_rate": 4.594294733107112e-06, - "loss": 0.7664, + "learning_rate": 4.620361607043927e-06, + "loss": 0.8423, "step": 24363 }, { - "epoch": 0.6913734392735528, + "epoch": 0.6904134432826093, "grad_norm": 0.0, - "learning_rate": 4.5935215367534616e-06, - "loss": 0.8935, + "learning_rate": 4.61958796279833e-06, + "loss": 0.7735, "step": 24364 }, { - "epoch": 0.6914018161180476, + "epoch": 0.6904417807248717, "grad_norm": 0.0, - "learning_rate": 4.592748386068579e-06, - "loss": 0.7684, + "learning_rate": 4.618814363873938e-06, + "loss": 0.8072, "step": 24365 }, { - "epoch": 0.6914301929625426, + "epoch": 0.6904701181671342, "grad_norm": 0.0, - "learning_rate": 4.591975281058992e-06, - "loss": 0.8017, + "learning_rate": 4.618040810277271e-06, + "loss": 0.8395, "step": 24366 }, { - "epoch": 0.6914585698070375, + "epoch": 0.6904984556093967, "grad_norm": 0.0, - "learning_rate": 4.591202221731232e-06, - "loss": 0.851, + "learning_rate": 4.617267302014845e-06, + "loss": 0.8852, "step": 24367 }, { - "epoch": 0.6914869466515323, + "epoch": 0.6905267930516592, "grad_norm": 0.0, - "learning_rate": 4.5904292080918355e-06, - "loss": 0.7464, + "learning_rate": 4.616493839093179e-06, + "loss": 0.8546, "step": 24368 }, { - "epoch": 0.6915153234960273, + "epoch": 0.6905551304939216, "grad_norm": 0.0, - "learning_rate": 4.589656240147321e-06, - "loss": 0.7827, + "learning_rate": 4.61572042151878e-06, + "loss": 0.7315, "step": 24369 }, { - "epoch": 0.6915437003405221, + "epoch": 0.6905834679361841, "grad_norm": 0.0, - "learning_rate": 4.588883317904224e-06, + "learning_rate": 4.614947049298169e-06, "loss": 0.8081, "step": 24370 }, { - "epoch": 0.691572077185017, + "epoch": 0.6906118053784466, "grad_norm": 0.0, - "learning_rate": 4.588110441369074e-06, - "loss": 0.8595, + "learning_rate": 4.614173722437857e-06, + "loss": 0.9023, "step": 24371 }, { - "epoch": 0.6916004540295119, + "epoch": 0.690640142820709, "grad_norm": 0.0, - "learning_rate": 4.587337610548397e-06, - "loss": 0.8294, + "learning_rate": 4.613400440944364e-06, + "loss": 0.9157, "step": 24372 }, { - "epoch": 0.6916288308740068, + "epoch": 0.6906684802629715, "grad_norm": 0.0, - "learning_rate": 4.586564825448726e-06, - "loss": 0.8238, + "learning_rate": 4.612627204824196e-06, + "loss": 0.8587, "step": 24373 }, { - "epoch": 0.6916572077185017, + "epoch": 0.690696817705234, "grad_norm": 0.0, - "learning_rate": 4.5857920860765825e-06, - "loss": 0.7737, + "learning_rate": 4.611854014083868e-06, + "loss": 0.967, "step": 24374 }, { - "epoch": 0.6916855845629966, + "epoch": 0.6907251551474964, "grad_norm": 0.0, - "learning_rate": 4.585019392438496e-06, - "loss": 0.8418, + "learning_rate": 4.6110808687299e-06, + "loss": 0.8281, "step": 24375 }, { - "epoch": 0.6917139614074915, + "epoch": 0.6907534925897588, "grad_norm": 0.0, - "learning_rate": 4.584246744540998e-06, - "loss": 0.8697, + "learning_rate": 4.610307768768796e-06, + "loss": 0.7585, "step": 24376 }, { - "epoch": 0.6917423382519864, + "epoch": 0.6907818300320213, "grad_norm": 0.0, - "learning_rate": 4.583474142390608e-06, - "loss": 0.8895, + "learning_rate": 4.609534714207073e-06, + "loss": 0.7978, "step": 24377 }, { - "epoch": 0.6917707150964812, + "epoch": 0.6908101674742838, "grad_norm": 0.0, - "learning_rate": 4.582701585993855e-06, - "loss": 0.8264, + "learning_rate": 4.608761705051238e-06, + "loss": 0.8387, "step": 24378 }, { - "epoch": 0.6917990919409762, + "epoch": 0.6908385049165462, "grad_norm": 0.0, - "learning_rate": 4.581929075357269e-06, - "loss": 0.7989, + "learning_rate": 4.607988741307804e-06, + "loss": 0.856, "step": 24379 }, { - "epoch": 0.6918274687854711, + "epoch": 0.6908668423588087, "grad_norm": 0.0, - "learning_rate": 4.581156610487367e-06, - "loss": 0.8775, + "learning_rate": 4.607215822983284e-06, + "loss": 0.8561, "step": 24380 }, { - "epoch": 0.6918558456299659, + "epoch": 0.6908951798010712, "grad_norm": 0.0, - "learning_rate": 4.58038419139068e-06, - "loss": 0.8412, + "learning_rate": 4.606442950084188e-06, + "loss": 0.8262, "step": 24381 }, { - "epoch": 0.6918842224744608, + "epoch": 0.6909235172433336, "grad_norm": 0.0, - "learning_rate": 4.579611818073735e-06, - "loss": 0.7499, + "learning_rate": 4.605670122617028e-06, + "loss": 0.8621, "step": 24382 }, { - "epoch": 0.6919125993189558, + "epoch": 0.6909518546855961, "grad_norm": 0.0, - "learning_rate": 4.578839490543049e-06, - "loss": 0.7641, + "learning_rate": 4.60489734058831e-06, + "loss": 0.8127, "step": 24383 }, { - "epoch": 0.6919409761634506, + "epoch": 0.6909801921278586, "grad_norm": 0.0, - "learning_rate": 4.578067208805148e-06, - "loss": 0.7593, + "learning_rate": 4.604124604004544e-06, + "loss": 0.8774, "step": 24384 }, { - "epoch": 0.6919693530079455, + "epoch": 0.691008529570121, "grad_norm": 0.0, - "learning_rate": 4.577294972866563e-06, - "loss": 0.8444, + "learning_rate": 4.603351912872245e-06, + "loss": 0.8757, "step": 24385 }, { - "epoch": 0.6919977298524405, + "epoch": 0.6910368670123834, "grad_norm": 0.0, - "learning_rate": 4.576522782733802e-06, - "loss": 0.8126, + "learning_rate": 4.602579267197912e-06, + "loss": 0.8348, "step": 24386 }, { - "epoch": 0.6920261066969353, + "epoch": 0.6910652044546459, "grad_norm": 0.0, - "learning_rate": 4.5757506384134005e-06, - "loss": 0.9033, + "learning_rate": 4.601806666988058e-06, + "loss": 0.895, "step": 24387 }, { - "epoch": 0.6920544835414302, + "epoch": 0.6910935418969084, "grad_norm": 0.0, - "learning_rate": 4.574978539911881e-06, - "loss": 0.7732, + "learning_rate": 4.601034112249195e-06, + "loss": 0.8602, "step": 24388 }, { - "epoch": 0.692082860385925, + "epoch": 0.6911218793391708, "grad_norm": 0.0, - "learning_rate": 4.5742064872357594e-06, - "loss": 0.8048, + "learning_rate": 4.6002616029878235e-06, + "loss": 0.7779, "step": 24389 }, { - "epoch": 0.69211123723042, + "epoch": 0.6911502167814333, "grad_norm": 0.0, - "learning_rate": 4.573434480391559e-06, - "loss": 0.8249, + "learning_rate": 4.599489139210457e-06, + "loss": 0.7506, "step": 24390 }, { - "epoch": 0.6921396140749149, + "epoch": 0.6911785542236958, "grad_norm": 0.0, - "learning_rate": 4.572662519385804e-06, - "loss": 0.8611, + "learning_rate": 4.598716720923595e-06, + "loss": 0.8923, "step": 24391 }, { - "epoch": 0.6921679909194097, + "epoch": 0.6912068916659583, "grad_norm": 0.0, - "learning_rate": 4.57189060422501e-06, - "loss": 0.8614, + "learning_rate": 4.597944348133747e-06, + "loss": 0.8405, "step": 24392 }, { - "epoch": 0.6921963677639047, + "epoch": 0.6912352291082207, "grad_norm": 0.0, - "learning_rate": 4.571118734915704e-06, - "loss": 0.8539, + "learning_rate": 4.597172020847421e-06, + "loss": 0.8204, "step": 24393 }, { - "epoch": 0.6922247446083996, + "epoch": 0.6912635665504832, "grad_norm": 0.0, - "learning_rate": 4.570346911464397e-06, - "loss": 0.8394, + "learning_rate": 4.596399739071121e-06, + "loss": 0.8488, "step": 24394 }, { - "epoch": 0.6922531214528944, + "epoch": 0.6912919039927456, "grad_norm": 0.0, - "learning_rate": 4.569575133877615e-06, - "loss": 0.8328, + "learning_rate": 4.595627502811351e-06, + "loss": 0.6458, "step": 24395 }, { - "epoch": 0.6922814982973893, + "epoch": 0.691320241435008, "grad_norm": 0.0, - "learning_rate": 4.5688034021618795e-06, - "loss": 0.9449, + "learning_rate": 4.594855312074624e-06, + "loss": 0.937, "step": 24396 }, { - "epoch": 0.6923098751418842, + "epoch": 0.6913485788772705, "grad_norm": 0.0, - "learning_rate": 4.568031716323702e-06, - "loss": 0.7913, + "learning_rate": 4.594083166867433e-06, + "loss": 0.82, "step": 24397 }, { - "epoch": 0.6923382519863791, + "epoch": 0.691376916319533, "grad_norm": 0.0, - "learning_rate": 4.567260076369605e-06, - "loss": 0.9145, + "learning_rate": 4.59331106719629e-06, + "loss": 0.7493, "step": 24398 }, { - "epoch": 0.692366628830874, + "epoch": 0.6914052537617955, "grad_norm": 0.0, - "learning_rate": 4.56648848230611e-06, - "loss": 0.7998, + "learning_rate": 4.592539013067692e-06, + "loss": 0.8642, "step": 24399 }, { - "epoch": 0.6923950056753689, + "epoch": 0.6914335912040579, "grad_norm": 0.0, - "learning_rate": 4.5657169341397265e-06, - "loss": 0.8862, + "learning_rate": 4.591767004488147e-06, + "loss": 0.7804, "step": 24400 }, { - "epoch": 0.6924233825198638, + "epoch": 0.6914619286463204, "grad_norm": 0.0, - "learning_rate": 4.564945431876973e-06, - "loss": 0.9194, + "learning_rate": 4.590995041464159e-06, + "loss": 0.8186, "step": 24401 }, { - "epoch": 0.6924517593643587, + "epoch": 0.6914902660885829, "grad_norm": 0.0, - "learning_rate": 4.564173975524377e-06, - "loss": 0.8455, + "learning_rate": 4.590223124002225e-06, + "loss": 0.9201, "step": 24402 }, { - "epoch": 0.6924801362088536, + "epoch": 0.6915186035308453, "grad_norm": 0.0, - "learning_rate": 4.563402565088445e-06, - "loss": 0.8862, + "learning_rate": 4.589451252108851e-06, + "loss": 0.8602, "step": 24403 }, { - "epoch": 0.6925085130533485, + "epoch": 0.6915469409731078, "grad_norm": 0.0, - "learning_rate": 4.562631200575696e-06, - "loss": 0.8411, + "learning_rate": 4.588679425790542e-06, + "loss": 0.7563, "step": 24404 }, { - "epoch": 0.6925368898978433, + "epoch": 0.6915752784153703, "grad_norm": 0.0, - "learning_rate": 4.561859881992649e-06, - "loss": 0.8013, + "learning_rate": 4.587907645053792e-06, + "loss": 0.7919, "step": 24405 }, { - "epoch": 0.6925652667423382, + "epoch": 0.6916036158576326, "grad_norm": 0.0, - "learning_rate": 4.561088609345812e-06, - "loss": 0.7323, + "learning_rate": 4.587135909905105e-06, + "loss": 0.7411, "step": 24406 }, { - "epoch": 0.6925936435868332, + "epoch": 0.6916319532998951, "grad_norm": 0.0, - "learning_rate": 4.560317382641704e-06, - "loss": 0.8537, + "learning_rate": 4.5863642203509826e-06, + "loss": 0.806, "step": 24407 }, { - "epoch": 0.692622020431328, + "epoch": 0.6916602907421576, "grad_norm": 0.0, - "learning_rate": 4.559546201886843e-06, - "loss": 0.8818, + "learning_rate": 4.585592576397925e-06, + "loss": 0.7857, "step": 24408 }, { - "epoch": 0.6926503972758229, + "epoch": 0.6916886281844201, "grad_norm": 0.0, - "learning_rate": 4.558775067087736e-06, - "loss": 0.8904, + "learning_rate": 4.584820978052434e-06, + "loss": 0.7303, "step": 24409 }, { - "epoch": 0.6926787741203179, + "epoch": 0.6917169656266825, "grad_norm": 0.0, - "learning_rate": 4.558003978250901e-06, - "loss": 0.8557, + "learning_rate": 4.584049425321006e-06, + "loss": 0.7886, "step": 24410 }, { - "epoch": 0.6927071509648127, + "epoch": 0.691745303068945, "grad_norm": 0.0, - "learning_rate": 4.5572329353828546e-06, - "loss": 0.8774, + "learning_rate": 4.583277918210142e-06, + "loss": 0.7387, "step": 24411 }, { - "epoch": 0.6927355278093076, + "epoch": 0.6917736405112075, "grad_norm": 0.0, - "learning_rate": 4.5564619384901035e-06, - "loss": 0.8756, + "learning_rate": 4.582506456726337e-06, + "loss": 0.7961, "step": 24412 }, { - "epoch": 0.6927639046538024, + "epoch": 0.6918019779534699, "grad_norm": 0.0, - "learning_rate": 4.555690987579162e-06, - "loss": 0.8257, + "learning_rate": 4.581735040876091e-06, + "loss": 0.9285, "step": 24413 }, { - "epoch": 0.6927922814982974, + "epoch": 0.6918303153957324, "grad_norm": 0.0, - "learning_rate": 4.554920082656548e-06, - "loss": 0.951, + "learning_rate": 4.580963670665906e-06, + "loss": 0.852, "step": 24414 }, { - "epoch": 0.6928206583427923, + "epoch": 0.6918586528379949, "grad_norm": 0.0, - "learning_rate": 4.554149223728764e-06, - "loss": 0.8599, + "learning_rate": 4.580192346102275e-06, + "loss": 0.8676, "step": 24415 }, { - "epoch": 0.6928490351872871, + "epoch": 0.6918869902802574, "grad_norm": 0.0, - "learning_rate": 4.553378410802331e-06, - "loss": 0.8266, + "learning_rate": 4.579421067191695e-06, + "loss": 0.8324, "step": 24416 }, { - "epoch": 0.6928774120317821, + "epoch": 0.6919153277225197, "grad_norm": 0.0, - "learning_rate": 4.5526076438837486e-06, - "loss": 0.7777, + "learning_rate": 4.578649833940667e-06, + "loss": 0.9248, "step": 24417 }, { - "epoch": 0.692905788876277, + "epoch": 0.6919436651647822, "grad_norm": 0.0, - "learning_rate": 4.551836922979537e-06, - "loss": 0.8059, + "learning_rate": 4.577878646355682e-06, + "loss": 0.8715, "step": 24418 }, { - "epoch": 0.6929341657207718, + "epoch": 0.6919720026070447, "grad_norm": 0.0, - "learning_rate": 4.551066248096208e-06, - "loss": 0.8177, + "learning_rate": 4.577107504443239e-06, + "loss": 0.8615, "step": 24419 }, { - "epoch": 0.6929625425652668, + "epoch": 0.6920003400493071, "grad_norm": 0.0, - "learning_rate": 4.550295619240262e-06, - "loss": 0.8975, + "learning_rate": 4.5763364082098326e-06, + "loss": 0.8206, "step": 24420 }, { - "epoch": 0.6929909194097617, + "epoch": 0.6920286774915696, "grad_norm": 0.0, - "learning_rate": 4.549525036418217e-06, - "loss": 0.7323, + "learning_rate": 4.575565357661958e-06, + "loss": 0.7541, "step": 24421 }, { - "epoch": 0.6930192962542565, + "epoch": 0.6920570149338321, "grad_norm": 0.0, - "learning_rate": 4.54875449963658e-06, - "loss": 0.8222, + "learning_rate": 4.574794352806116e-06, + "loss": 0.9009, "step": 24422 }, { - "epoch": 0.6930476730987514, + "epoch": 0.6920853523760946, "grad_norm": 0.0, - "learning_rate": 4.547984008901855e-06, - "loss": 0.8169, + "learning_rate": 4.574023393648791e-06, + "loss": 0.7546, "step": 24423 }, { - "epoch": 0.6930760499432463, + "epoch": 0.692113689818357, "grad_norm": 0.0, - "learning_rate": 4.547213564220556e-06, - "loss": 0.6929, + "learning_rate": 4.5732524801964815e-06, + "loss": 0.9113, "step": 24424 }, { - "epoch": 0.6931044267877412, + "epoch": 0.6921420272606195, "grad_norm": 0.0, - "learning_rate": 4.546443165599193e-06, - "loss": 0.8123, + "learning_rate": 4.5724816124556856e-06, + "loss": 0.7569, "step": 24425 }, { - "epoch": 0.6931328036322361, + "epoch": 0.692170364702882, "grad_norm": 0.0, - "learning_rate": 4.545672813044264e-06, - "loss": 0.8244, + "learning_rate": 4.571710790432889e-06, + "loss": 0.7384, "step": 24426 }, { - "epoch": 0.693161180476731, + "epoch": 0.6921987021451443, "grad_norm": 0.0, - "learning_rate": 4.544902506562283e-06, - "loss": 0.879, + "learning_rate": 4.570940014134593e-06, + "loss": 0.8245, "step": 24427 }, { - "epoch": 0.6931895573212259, + "epoch": 0.6922270395874068, "grad_norm": 0.0, - "learning_rate": 4.54413224615976e-06, - "loss": 0.9296, + "learning_rate": 4.57016928356728e-06, + "loss": 0.8021, "step": 24428 }, { - "epoch": 0.6932179341657208, + "epoch": 0.6922553770296693, "grad_norm": 0.0, - "learning_rate": 4.543362031843193e-06, - "loss": 0.9436, + "learning_rate": 4.569398598737448e-06, + "loss": 0.8049, "step": 24429 }, { - "epoch": 0.6932463110102156, + "epoch": 0.6922837144719317, "grad_norm": 0.0, - "learning_rate": 4.542591863619092e-06, - "loss": 0.849, + "learning_rate": 4.568627959651593e-06, + "loss": 0.8147, "step": 24430 }, { - "epoch": 0.6932746878547106, + "epoch": 0.6923120519141942, "grad_norm": 0.0, - "learning_rate": 4.541821741493967e-06, - "loss": 0.7616, + "learning_rate": 4.567857366316196e-06, + "loss": 0.8178, "step": 24431 }, { - "epoch": 0.6933030646992054, + "epoch": 0.6923403893564567, "grad_norm": 0.0, - "learning_rate": 4.541051665474311e-06, - "loss": 0.8444, + "learning_rate": 4.567086818737754e-06, + "loss": 0.866, "step": 24432 }, { - "epoch": 0.6933314415437003, + "epoch": 0.6923687267987192, "grad_norm": 0.0, - "learning_rate": 4.540281635566645e-06, - "loss": 0.9787, + "learning_rate": 4.566316316922758e-06, + "loss": 0.9218, "step": 24433 }, { - "epoch": 0.6933598183881953, + "epoch": 0.6923970642409816, "grad_norm": 0.0, - "learning_rate": 4.539511651777461e-06, - "loss": 0.6743, + "learning_rate": 4.565545860877697e-06, + "loss": 0.8558, "step": 24434 }, { - "epoch": 0.6933881952326901, + "epoch": 0.6924254016832441, "grad_norm": 0.0, - "learning_rate": 4.538741714113268e-06, - "loss": 0.868, + "learning_rate": 4.5647754506090645e-06, + "loss": 0.9095, "step": 24435 }, { - "epoch": 0.693416572077185, + "epoch": 0.6924537391255066, "grad_norm": 0.0, - "learning_rate": 4.537971822580573e-06, - "loss": 0.7874, + "learning_rate": 4.564005086123343e-06, + "loss": 0.8143, "step": 24436 }, { - "epoch": 0.69344494892168, + "epoch": 0.692482076567769, "grad_norm": 0.0, - "learning_rate": 4.537201977185872e-06, - "loss": 0.8689, + "learning_rate": 4.563234767427026e-06, + "loss": 0.8952, "step": 24437 }, { - "epoch": 0.6934733257661748, + "epoch": 0.6925104140100314, "grad_norm": 0.0, - "learning_rate": 4.536432177935672e-06, - "loss": 0.8045, + "learning_rate": 4.562464494526605e-06, + "loss": 0.9515, "step": 24438 }, { - "epoch": 0.6935017026106697, + "epoch": 0.6925387514522939, "grad_norm": 0.0, - "learning_rate": 4.535662424836478e-06, - "loss": 0.787, + "learning_rate": 4.56169426742856e-06, + "loss": 0.9528, "step": 24439 }, { - "epoch": 0.6935300794551645, + "epoch": 0.6925670888945564, "grad_norm": 0.0, - "learning_rate": 4.534892717894785e-06, - "loss": 0.8185, + "learning_rate": 4.560924086139389e-06, + "loss": 0.7548, "step": 24440 }, { - "epoch": 0.6935584562996595, + "epoch": 0.6925954263368188, "grad_norm": 0.0, - "learning_rate": 4.5341230571171e-06, - "loss": 0.7905, + "learning_rate": 4.560153950665569e-06, + "loss": 0.8006, "step": 24441 }, { - "epoch": 0.6935868331441544, + "epoch": 0.6926237637790813, "grad_norm": 0.0, - "learning_rate": 4.5333534425099265e-06, - "loss": 0.8249, + "learning_rate": 4.559383861013593e-06, + "loss": 0.7903, "step": 24442 }, { - "epoch": 0.6936152099886492, + "epoch": 0.6926521012213438, "grad_norm": 0.0, - "learning_rate": 4.532583874079758e-06, - "loss": 0.8335, + "learning_rate": 4.558613817189951e-06, + "loss": 0.8899, "step": 24443 }, { - "epoch": 0.6936435868331442, + "epoch": 0.6926804386636062, "grad_norm": 0.0, - "learning_rate": 4.531814351833099e-06, - "loss": 0.9423, + "learning_rate": 4.557843819201121e-06, + "loss": 0.8517, "step": 24444 }, { - "epoch": 0.6936719636776391, + "epoch": 0.6927087761058687, "grad_norm": 0.0, - "learning_rate": 4.5310448757764545e-06, - "loss": 0.7805, + "learning_rate": 4.5570738670535944e-06, + "loss": 0.8, "step": 24445 }, { - "epoch": 0.6937003405221339, + "epoch": 0.6927371135481312, "grad_norm": 0.0, - "learning_rate": 4.530275445916317e-06, - "loss": 0.8772, + "learning_rate": 4.556303960753855e-06, + "loss": 0.7974, "step": 24446 }, { - "epoch": 0.6937287173666288, + "epoch": 0.6927654509903937, "grad_norm": 0.0, - "learning_rate": 4.529506062259187e-06, - "loss": 0.8464, + "learning_rate": 4.55553410030839e-06, + "loss": 0.8634, "step": 24447 }, { - "epoch": 0.6937570942111237, + "epoch": 0.692793788432656, "grad_norm": 0.0, - "learning_rate": 4.528736724811565e-06, - "loss": 0.8745, + "learning_rate": 4.554764285723685e-06, + "loss": 0.6914, "step": 24448 }, { - "epoch": 0.6937854710556186, + "epoch": 0.6928221258749185, "grad_norm": 0.0, - "learning_rate": 4.527967433579952e-06, - "loss": 0.8876, + "learning_rate": 4.553994517006219e-06, + "loss": 0.7563, "step": 24449 }, { - "epoch": 0.6938138479001135, + "epoch": 0.692850463317181, "grad_norm": 0.0, - "learning_rate": 4.527198188570846e-06, - "loss": 0.8629, + "learning_rate": 4.553224794162481e-06, + "loss": 0.7893, "step": 24450 }, { - "epoch": 0.6938422247446084, + "epoch": 0.6928788007594434, "grad_norm": 0.0, - "learning_rate": 4.5264289897907396e-06, - "loss": 0.8666, + "learning_rate": 4.552455117198955e-06, + "loss": 0.7944, "step": 24451 }, { - "epoch": 0.6938706015891033, + "epoch": 0.6929071382017059, "grad_norm": 0.0, - "learning_rate": 4.525659837246134e-06, - "loss": 0.7817, + "learning_rate": 4.55168548612212e-06, + "loss": 0.8533, "step": 24452 }, { - "epoch": 0.6938989784335982, + "epoch": 0.6929354756439684, "grad_norm": 0.0, - "learning_rate": 4.524890730943528e-06, - "loss": 0.8021, + "learning_rate": 4.55091590093846e-06, + "loss": 0.7368, "step": 24453 }, { - "epoch": 0.6939273552780931, + "epoch": 0.6929638130862308, "grad_norm": 0.0, - "learning_rate": 4.524121670889414e-06, - "loss": 0.9449, + "learning_rate": 4.550146361654465e-06, + "loss": 0.8181, "step": 24454 }, { - "epoch": 0.693955732122588, + "epoch": 0.6929921505284933, "grad_norm": 0.0, - "learning_rate": 4.523352657090291e-06, - "loss": 0.7597, + "learning_rate": 4.549376868276606e-06, + "loss": 0.8238, "step": 24455 }, { - "epoch": 0.6939841089670828, + "epoch": 0.6930204879707558, "grad_norm": 0.0, - "learning_rate": 4.522583689552656e-06, - "loss": 0.8236, + "learning_rate": 4.548607420811373e-06, + "loss": 0.7987, "step": 24456 }, { - "epoch": 0.6940124858115777, + "epoch": 0.6930488254130183, "grad_norm": 0.0, - "learning_rate": 4.521814768283e-06, - "loss": 0.8132, + "learning_rate": 4.547838019265241e-06, + "loss": 0.7644, "step": 24457 }, { - "epoch": 0.6940408626560727, + "epoch": 0.6930771628552806, "grad_norm": 0.0, - "learning_rate": 4.521045893287821e-06, - "loss": 0.8601, + "learning_rate": 4.5470686636446924e-06, + "loss": 0.8901, "step": 24458 }, { - "epoch": 0.6940692395005675, + "epoch": 0.6931055002975431, "grad_norm": 0.0, - "learning_rate": 4.520277064573617e-06, - "loss": 0.8408, + "learning_rate": 4.546299353956211e-06, + "loss": 0.8289, "step": 24459 }, { - "epoch": 0.6940976163450624, + "epoch": 0.6931338377398056, "grad_norm": 0.0, - "learning_rate": 4.519508282146876e-06, - "loss": 0.9135, + "learning_rate": 4.545530090206274e-06, + "loss": 0.821, "step": 24460 }, { - "epoch": 0.6941259931895574, + "epoch": 0.693162175182068, "grad_norm": 0.0, - "learning_rate": 4.518739546014095e-06, - "loss": 0.7995, + "learning_rate": 4.544760872401364e-06, + "loss": 0.7477, "step": 24461 }, { - "epoch": 0.6941543700340522, + "epoch": 0.6931905126243305, "grad_norm": 0.0, - "learning_rate": 4.517970856181771e-06, - "loss": 0.7827, + "learning_rate": 4.543991700547962e-06, + "loss": 0.8624, "step": 24462 }, { - "epoch": 0.6941827468785471, + "epoch": 0.693218850066593, "grad_norm": 0.0, - "learning_rate": 4.517202212656385e-06, - "loss": 0.8127, + "learning_rate": 4.54322257465254e-06, + "loss": 0.8956, "step": 24463 }, { - "epoch": 0.694211123723042, + "epoch": 0.6932471875088555, "grad_norm": 0.0, - "learning_rate": 4.516433615444447e-06, - "loss": 0.7937, + "learning_rate": 4.542453494721583e-06, + "loss": 0.8037, "step": 24464 }, { - "epoch": 0.6942395005675369, + "epoch": 0.6932755249511179, "grad_norm": 0.0, - "learning_rate": 4.515665064552437e-06, - "loss": 0.9203, + "learning_rate": 4.541684460761565e-06, + "loss": 0.8761, "step": 24465 }, { - "epoch": 0.6942678774120318, + "epoch": 0.6933038623933804, "grad_norm": 0.0, - "learning_rate": 4.51489655998685e-06, - "loss": 0.9509, + "learning_rate": 4.540915472778965e-06, + "loss": 0.7361, "step": 24466 }, { - "epoch": 0.6942962542565266, + "epoch": 0.6933321998356429, "grad_norm": 0.0, - "learning_rate": 4.514128101754183e-06, - "loss": 0.8124, + "learning_rate": 4.540146530780266e-06, + "loss": 0.7799, "step": 24467 }, { - "epoch": 0.6943246311010216, + "epoch": 0.6933605372779053, "grad_norm": 0.0, - "learning_rate": 4.513359689860918e-06, - "loss": 0.8089, + "learning_rate": 4.539377634771935e-06, + "loss": 0.7227, "step": 24468 }, { - "epoch": 0.6943530079455165, + "epoch": 0.6933888747201677, "grad_norm": 0.0, - "learning_rate": 4.51259132431355e-06, - "loss": 0.8096, + "learning_rate": 4.538608784760459e-06, + "loss": 0.8911, "step": 24469 }, { - "epoch": 0.6943813847900113, + "epoch": 0.6934172121624302, "grad_norm": 0.0, - "learning_rate": 4.511823005118574e-06, - "loss": 0.8342, + "learning_rate": 4.537839980752305e-06, + "loss": 0.6876, "step": 24470 }, { - "epoch": 0.6944097616345063, + "epoch": 0.6934455496046927, "grad_norm": 0.0, - "learning_rate": 4.511054732282472e-06, - "loss": 0.6955, + "learning_rate": 4.537071222753953e-06, + "loss": 0.7824, "step": 24471 }, { - "epoch": 0.6944381384790012, + "epoch": 0.6934738870469551, "grad_norm": 0.0, - "learning_rate": 4.510286505811738e-06, - "loss": 0.9353, + "learning_rate": 4.5363025107718775e-06, + "loss": 0.838, "step": 24472 }, { - "epoch": 0.694466515323496, + "epoch": 0.6935022244892176, "grad_norm": 0.0, - "learning_rate": 4.509518325712865e-06, - "loss": 0.874, + "learning_rate": 4.5355338448125566e-06, + "loss": 0.8769, "step": 24473 }, { - "epoch": 0.6944948921679909, + "epoch": 0.6935305619314801, "grad_norm": 0.0, - "learning_rate": 4.508750191992332e-06, - "loss": 0.88, + "learning_rate": 4.534765224882463e-06, + "loss": 0.8855, "step": 24474 }, { - "epoch": 0.6945232690124858, + "epoch": 0.6935588993737425, "grad_norm": 0.0, - "learning_rate": 4.5079821046566345e-06, - "loss": 0.816, + "learning_rate": 4.533996650988074e-06, + "loss": 0.9066, "step": 24475 }, { - "epoch": 0.6945516458569807, + "epoch": 0.693587236816005, "grad_norm": 0.0, - "learning_rate": 4.507214063712262e-06, - "loss": 0.8455, + "learning_rate": 4.533228123135858e-06, + "loss": 0.7601, "step": 24476 }, { - "epoch": 0.6945800227014756, + "epoch": 0.6936155742582675, "grad_norm": 0.0, - "learning_rate": 4.506446069165696e-06, - "loss": 0.8824, + "learning_rate": 4.532459641332295e-06, + "loss": 0.8024, "step": 24477 }, { - "epoch": 0.6946083995459705, + "epoch": 0.6936439117005299, "grad_norm": 0.0, - "learning_rate": 4.505678121023426e-06, - "loss": 0.6996, + "learning_rate": 4.531691205583852e-06, + "loss": 0.7986, "step": 24478 }, { - "epoch": 0.6946367763904654, + "epoch": 0.6936722491427924, "grad_norm": 0.0, - "learning_rate": 4.504910219291941e-06, - "loss": 0.7949, + "learning_rate": 4.530922815897003e-06, + "loss": 0.8666, "step": 24479 }, { - "epoch": 0.6946651532349603, + "epoch": 0.6937005865850548, "grad_norm": 0.0, - "learning_rate": 4.5041423639777245e-06, - "loss": 0.861, + "learning_rate": 4.530154472278228e-06, + "loss": 0.808, "step": 24480 }, { - "epoch": 0.6946935300794551, + "epoch": 0.6937289240273173, "grad_norm": 0.0, - "learning_rate": 4.50337455508727e-06, - "loss": 0.9327, + "learning_rate": 4.529386174733987e-06, + "loss": 0.8114, "step": 24481 }, { - "epoch": 0.6947219069239501, + "epoch": 0.6937572614695797, "grad_norm": 0.0, - "learning_rate": 4.502606792627053e-06, - "loss": 0.8269, + "learning_rate": 4.528617923270758e-06, + "loss": 0.8641, "step": 24482 }, { - "epoch": 0.6947502837684449, + "epoch": 0.6937855989118422, "grad_norm": 0.0, - "learning_rate": 4.501839076603565e-06, - "loss": 0.9028, + "learning_rate": 4.527849717895017e-06, + "loss": 0.8674, "step": 24483 }, { - "epoch": 0.6947786606129398, + "epoch": 0.6938139363541047, "grad_norm": 0.0, - "learning_rate": 4.501071407023292e-06, - "loss": 0.8774, + "learning_rate": 4.527081558613225e-06, + "loss": 0.7909, "step": 24484 }, { - "epoch": 0.6948070374574348, + "epoch": 0.6938422737963671, "grad_norm": 0.0, - "learning_rate": 4.500303783892712e-06, - "loss": 0.8599, + "learning_rate": 4.526313445431858e-06, + "loss": 0.8614, "step": 24485 }, { - "epoch": 0.6948354143019296, + "epoch": 0.6938706112386296, "grad_norm": 0.0, - "learning_rate": 4.499536207218315e-06, - "loss": 0.8085, + "learning_rate": 4.525545378357385e-06, + "loss": 0.7701, "step": 24486 }, { - "epoch": 0.6948637911464245, + "epoch": 0.6938989486808921, "grad_norm": 0.0, - "learning_rate": 4.4987686770065855e-06, - "loss": 0.8689, + "learning_rate": 4.524777357396277e-06, + "loss": 0.9305, "step": 24487 }, { - "epoch": 0.6948921679909195, + "epoch": 0.6939272861231546, "grad_norm": 0.0, - "learning_rate": 4.498001193264e-06, - "loss": 0.797, + "learning_rate": 4.524009382555005e-06, + "loss": 0.8597, "step": 24488 }, { - "epoch": 0.6949205448354143, + "epoch": 0.693955623565417, "grad_norm": 0.0, - "learning_rate": 4.4972337559970455e-06, - "loss": 0.7678, + "learning_rate": 4.5232414538400336e-06, + "loss": 0.7855, "step": 24489 }, { - "epoch": 0.6949489216799092, + "epoch": 0.6939839610076795, "grad_norm": 0.0, - "learning_rate": 4.49646636521221e-06, - "loss": 0.8046, + "learning_rate": 4.522473571257836e-06, + "loss": 0.7554, "step": 24490 }, { - "epoch": 0.694977298524404, + "epoch": 0.694012298449942, "grad_norm": 0.0, - "learning_rate": 4.495699020915966e-06, - "loss": 0.8966, + "learning_rate": 4.521705734814873e-06, + "loss": 0.8151, "step": 24491 }, { - "epoch": 0.695005675368899, + "epoch": 0.6940406358922043, "grad_norm": 0.0, - "learning_rate": 4.494931723114799e-06, - "loss": 0.8445, + "learning_rate": 4.520937944517617e-06, + "loss": 0.8307, "step": 24492 }, { - "epoch": 0.6950340522133939, + "epoch": 0.6940689733344668, "grad_norm": 0.0, - "learning_rate": 4.4941644718151935e-06, - "loss": 0.7987, + "learning_rate": 4.52017020037254e-06, + "loss": 0.8589, "step": 24493 }, { - "epoch": 0.6950624290578887, + "epoch": 0.6940973107767293, "grad_norm": 0.0, - "learning_rate": 4.493397267023626e-06, - "loss": 0.8957, + "learning_rate": 4.5194025023861e-06, + "loss": 0.7667, "step": 24494 }, { - "epoch": 0.6950908059023837, + "epoch": 0.6941256482189918, "grad_norm": 0.0, - "learning_rate": 4.49263010874658e-06, - "loss": 0.8783, + "learning_rate": 4.518634850564768e-06, + "loss": 0.7919, "step": 24495 }, { - "epoch": 0.6951191827468786, + "epoch": 0.6941539856612542, "grad_norm": 0.0, - "learning_rate": 4.491862996990539e-06, - "loss": 0.8494, + "learning_rate": 4.517867244915014e-06, + "loss": 0.8017, "step": 24496 }, { - "epoch": 0.6951475595913734, + "epoch": 0.6941823231035167, "grad_norm": 0.0, - "learning_rate": 4.491095931761975e-06, - "loss": 0.7955, + "learning_rate": 4.5170996854432955e-06, + "loss": 0.7976, "step": 24497 }, { - "epoch": 0.6951759364358683, + "epoch": 0.6942106605457792, "grad_norm": 0.0, - "learning_rate": 4.490328913067376e-06, - "loss": 0.8374, + "learning_rate": 4.5163321721560824e-06, + "loss": 0.8747, "step": 24498 }, { - "epoch": 0.6952043132803633, + "epoch": 0.6942389979880416, "grad_norm": 0.0, - "learning_rate": 4.4895619409132105e-06, - "loss": 0.7818, + "learning_rate": 4.515564705059841e-06, + "loss": 0.8651, "step": 24499 }, { - "epoch": 0.6952326901248581, + "epoch": 0.6942673354303041, "grad_norm": 0.0, - "learning_rate": 4.488795015305965e-06, - "loss": 0.8346, + "learning_rate": 4.514797284161033e-06, + "loss": 0.8446, "step": 24500 }, { - "epoch": 0.695261066969353, + "epoch": 0.6942956728725665, "grad_norm": 0.0, - "learning_rate": 4.488028136252118e-06, - "loss": 0.8715, + "learning_rate": 4.51402990946613e-06, + "loss": 0.8749, "step": 24501 }, { - "epoch": 0.6952894438138479, + "epoch": 0.6943240103148289, "grad_norm": 0.0, - "learning_rate": 4.487261303758143e-06, - "loss": 0.803, + "learning_rate": 4.513262580981586e-06, + "loss": 0.7871, "step": 24502 }, { - "epoch": 0.6953178206583428, + "epoch": 0.6943523477570914, "grad_norm": 0.0, - "learning_rate": 4.486494517830519e-06, - "loss": 0.8199, + "learning_rate": 4.512495298713867e-06, + "loss": 0.8164, "step": 24503 }, { - "epoch": 0.6953461975028377, + "epoch": 0.6943806851993539, "grad_norm": 0.0, - "learning_rate": 4.485727778475726e-06, - "loss": 0.7972, + "learning_rate": 4.511728062669443e-06, + "loss": 0.8527, "step": 24504 }, { - "epoch": 0.6953745743473326, + "epoch": 0.6944090226416164, "grad_norm": 0.0, - "learning_rate": 4.484961085700236e-06, - "loss": 0.926, + "learning_rate": 4.510960872854767e-06, + "loss": 0.8697, "step": 24505 }, { - "epoch": 0.6954029511918275, + "epoch": 0.6944373600838788, "grad_norm": 0.0, - "learning_rate": 4.484194439510527e-06, - "loss": 0.8151, + "learning_rate": 4.510193729276311e-06, + "loss": 0.8209, "step": 24506 }, { - "epoch": 0.6954313280363223, + "epoch": 0.6944656975261413, "grad_norm": 0.0, - "learning_rate": 4.4834278399130795e-06, - "loss": 0.8493, + "learning_rate": 4.509426631940527e-06, + "loss": 0.8811, "step": 24507 }, { - "epoch": 0.6954597048808172, + "epoch": 0.6944940349684038, "grad_norm": 0.0, - "learning_rate": 4.482661286914362e-06, - "loss": 0.8485, + "learning_rate": 4.508659580853881e-06, + "loss": 0.8425, "step": 24508 }, { - "epoch": 0.6954880817253122, + "epoch": 0.6945223724106662, "grad_norm": 0.0, - "learning_rate": 4.481894780520848e-06, - "loss": 0.8624, + "learning_rate": 4.507892576022838e-06, + "loss": 0.8857, "step": 24509 }, { - "epoch": 0.695516458569807, + "epoch": 0.6945507098529287, "grad_norm": 0.0, - "learning_rate": 4.481128320739024e-06, - "loss": 0.8226, + "learning_rate": 4.5071256174538535e-06, + "loss": 0.6905, "step": 24510 }, { - "epoch": 0.6955448354143019, + "epoch": 0.6945790472951912, "grad_norm": 0.0, - "learning_rate": 4.480361907575354e-06, - "loss": 0.8853, + "learning_rate": 4.5063587051533885e-06, + "loss": 0.8197, "step": 24511 }, { - "epoch": 0.6955732122587969, + "epoch": 0.6946073847374536, "grad_norm": 0.0, - "learning_rate": 4.479595541036316e-06, - "loss": 0.9114, + "learning_rate": 4.505591839127904e-06, + "loss": 0.7452, "step": 24512 }, { - "epoch": 0.6956015891032917, + "epoch": 0.694635722179716, "grad_norm": 0.0, - "learning_rate": 4.478829221128384e-06, - "loss": 0.8302, + "learning_rate": 4.504825019383861e-06, + "loss": 0.8799, "step": 24513 }, { - "epoch": 0.6956299659477866, + "epoch": 0.6946640596219785, "grad_norm": 0.0, - "learning_rate": 4.478062947858028e-06, - "loss": 0.918, + "learning_rate": 4.5040582459277194e-06, + "loss": 0.8501, "step": 24514 }, { - "epoch": 0.6956583427922814, + "epoch": 0.694692397064241, "grad_norm": 0.0, - "learning_rate": 4.477296721231722e-06, - "loss": 0.8701, + "learning_rate": 4.5032915187659334e-06, + "loss": 0.7217, "step": 24515 }, { - "epoch": 0.6956867196367764, + "epoch": 0.6947207345065034, "grad_norm": 0.0, - "learning_rate": 4.476530541255942e-06, - "loss": 0.7974, + "learning_rate": 4.502524837904964e-06, + "loss": 0.8811, "step": 24516 }, { - "epoch": 0.6957150964812713, + "epoch": 0.6947490719487659, "grad_norm": 0.0, - "learning_rate": 4.4757644079371544e-06, - "loss": 0.8703, + "learning_rate": 4.501758203351272e-06, + "loss": 0.7791, "step": 24517 }, { - "epoch": 0.6957434733257661, + "epoch": 0.6947774093910284, "grad_norm": 0.0, - "learning_rate": 4.474998321281833e-06, - "loss": 0.956, + "learning_rate": 4.50099161511131e-06, + "loss": 0.7669, "step": 24518 }, { - "epoch": 0.6957718501702611, + "epoch": 0.6948057468332909, "grad_norm": 0.0, - "learning_rate": 4.474232281296451e-06, - "loss": 0.7368, + "learning_rate": 4.50022507319154e-06, + "loss": 0.8418, "step": 24519 }, { - "epoch": 0.695800227014756, + "epoch": 0.6948340842755533, "grad_norm": 0.0, - "learning_rate": 4.473466287987476e-06, - "loss": 0.7393, + "learning_rate": 4.499458577598413e-06, + "loss": 0.8506, "step": 24520 }, { - "epoch": 0.6958286038592508, + "epoch": 0.6948624217178158, "grad_norm": 0.0, - "learning_rate": 4.472700341361378e-06, - "loss": 0.8727, + "learning_rate": 4.49869212833839e-06, + "loss": 0.77, "step": 24521 }, { - "epoch": 0.6958569807037458, + "epoch": 0.6948907591600783, "grad_norm": 0.0, - "learning_rate": 4.471934441424634e-06, - "loss": 0.8548, + "learning_rate": 4.49792572541793e-06, + "loss": 0.9339, "step": 24522 }, { - "epoch": 0.6958853575482407, + "epoch": 0.6949190966023406, "grad_norm": 0.0, - "learning_rate": 4.471168588183703e-06, - "loss": 0.8346, + "learning_rate": 4.49715936884348e-06, + "loss": 0.8961, "step": 24523 }, { - "epoch": 0.6959137343927355, + "epoch": 0.6949474340446031, "grad_norm": 0.0, - "learning_rate": 4.470402781645059e-06, - "loss": 0.8155, + "learning_rate": 4.4963930586215e-06, + "loss": 0.8488, "step": 24524 }, { - "epoch": 0.6959421112372304, + "epoch": 0.6949757714868656, "grad_norm": 0.0, - "learning_rate": 4.469637021815173e-06, - "loss": 0.8916, + "learning_rate": 4.495626794758445e-06, + "loss": 0.8519, "step": 24525 }, { - "epoch": 0.6959704880817253, + "epoch": 0.695004108929128, "grad_norm": 0.0, - "learning_rate": 4.468871308700511e-06, - "loss": 0.7562, + "learning_rate": 4.49486057726077e-06, + "loss": 0.7975, "step": 24526 }, { - "epoch": 0.6959988649262202, + "epoch": 0.6950324463713905, "grad_norm": 0.0, - "learning_rate": 4.468105642307545e-06, - "loss": 0.823, + "learning_rate": 4.4940944061349325e-06, + "loss": 0.8126, "step": 24527 }, { - "epoch": 0.6960272417707151, + "epoch": 0.695060783813653, "grad_norm": 0.0, - "learning_rate": 4.467340022642736e-06, - "loss": 0.8558, + "learning_rate": 4.493328281387378e-06, + "loss": 0.7981, "step": 24528 }, { - "epoch": 0.69605561861521, + "epoch": 0.6950891212559155, "grad_norm": 0.0, - "learning_rate": 4.466574449712555e-06, - "loss": 0.9101, + "learning_rate": 4.492562203024565e-06, + "loss": 0.9049, "step": 24529 }, { - "epoch": 0.6960839954597049, + "epoch": 0.6951174586981779, "grad_norm": 0.0, - "learning_rate": 4.465808923523471e-06, - "loss": 0.9674, + "learning_rate": 4.4917961710529486e-06, + "loss": 0.8376, "step": 24530 }, { - "epoch": 0.6961123723041998, + "epoch": 0.6951457961404404, "grad_norm": 0.0, - "learning_rate": 4.465043444081945e-06, - "loss": 0.8639, + "learning_rate": 4.491030185478976e-06, + "loss": 0.9393, "step": 24531 }, { - "epoch": 0.6961407491486946, + "epoch": 0.6951741335827029, "grad_norm": 0.0, - "learning_rate": 4.464278011394445e-06, - "loss": 0.7336, + "learning_rate": 4.4902642463091e-06, + "loss": 0.8826, "step": 24532 }, { - "epoch": 0.6961691259931896, + "epoch": 0.6952024710249652, "grad_norm": 0.0, - "learning_rate": 4.463512625467442e-06, - "loss": 0.7607, + "learning_rate": 4.4894983535497805e-06, + "loss": 0.7919, "step": 24533 }, { - "epoch": 0.6961975028376844, + "epoch": 0.6952308084672277, "grad_norm": 0.0, - "learning_rate": 4.462747286307393e-06, - "loss": 0.8563, + "learning_rate": 4.488732507207457e-06, + "loss": 0.8884, "step": 24534 }, { - "epoch": 0.6962258796821793, + "epoch": 0.6952591459094902, "grad_norm": 0.0, - "learning_rate": 4.461981993920766e-06, - "loss": 0.9151, + "learning_rate": 4.4879667072885906e-06, + "loss": 0.7818, "step": 24535 }, { - "epoch": 0.6962542565266743, + "epoch": 0.6952874833517527, "grad_norm": 0.0, - "learning_rate": 4.46121674831403e-06, - "loss": 0.8999, + "learning_rate": 4.487200953799623e-06, + "loss": 0.7338, "step": 24536 }, { - "epoch": 0.6962826333711691, + "epoch": 0.6953158207940151, "grad_norm": 0.0, - "learning_rate": 4.460451549493642e-06, - "loss": 0.8165, + "learning_rate": 4.4864352467470095e-06, + "loss": 0.9207, "step": 24537 }, { - "epoch": 0.696311010215664, + "epoch": 0.6953441582362776, "grad_norm": 0.0, - "learning_rate": 4.459686397466068e-06, - "loss": 0.7058, + "learning_rate": 4.485669586137199e-06, + "loss": 0.838, "step": 24538 }, { - "epoch": 0.696339387060159, + "epoch": 0.6953724956785401, "grad_norm": 0.0, - "learning_rate": 4.458921292237773e-06, - "loss": 0.8294, + "learning_rate": 4.484903971976642e-06, + "loss": 0.8319, "step": 24539 }, { - "epoch": 0.6963677639046538, + "epoch": 0.6954008331208025, "grad_norm": 0.0, - "learning_rate": 4.458156233815218e-06, - "loss": 0.7716, + "learning_rate": 4.4841384042717866e-06, + "loss": 0.792, "step": 24540 }, { - "epoch": 0.6963961407491487, + "epoch": 0.695429170563065, "grad_norm": 0.0, - "learning_rate": 4.45739122220487e-06, - "loss": 0.9002, + "learning_rate": 4.483372883029085e-06, + "loss": 0.862, "step": 24541 }, { - "epoch": 0.6964245175936435, + "epoch": 0.6954575080053275, "grad_norm": 0.0, - "learning_rate": 4.456626257413185e-06, - "loss": 0.784, + "learning_rate": 4.482607408254978e-06, + "loss": 0.8603, "step": 24542 }, { - "epoch": 0.6964528944381385, + "epoch": 0.6954858454475898, "grad_norm": 0.0, - "learning_rate": 4.4558613394466265e-06, - "loss": 0.6948, + "learning_rate": 4.481841979955922e-06, + "loss": 0.7835, "step": 24543 }, { - "epoch": 0.6964812712826334, + "epoch": 0.6955141828898523, "grad_norm": 0.0, - "learning_rate": 4.455096468311659e-06, - "loss": 0.7554, + "learning_rate": 4.481076598138356e-06, + "loss": 0.8945, "step": 24544 }, { - "epoch": 0.6965096481271282, + "epoch": 0.6955425203321148, "grad_norm": 0.0, - "learning_rate": 4.454331644014739e-06, - "loss": 0.7063, + "learning_rate": 4.480311262808732e-06, + "loss": 0.8767, "step": 24545 }, { - "epoch": 0.6965380249716232, + "epoch": 0.6955708577743773, "grad_norm": 0.0, - "learning_rate": 4.453566866562328e-06, - "loss": 0.8858, + "learning_rate": 4.479545973973499e-06, + "loss": 0.8864, "step": 24546 }, { - "epoch": 0.6965664018161181, + "epoch": 0.6955991952166397, "grad_norm": 0.0, - "learning_rate": 4.452802135960891e-06, - "loss": 0.768, + "learning_rate": 4.478780731639096e-06, + "loss": 0.7808, "step": 24547 }, { - "epoch": 0.6965947786606129, + "epoch": 0.6956275326589022, "grad_norm": 0.0, - "learning_rate": 4.45203745221688e-06, - "loss": 0.7877, + "learning_rate": 4.478015535811978e-06, + "loss": 0.8685, "step": 24548 }, { - "epoch": 0.6966231555051078, + "epoch": 0.6956558701011647, "grad_norm": 0.0, - "learning_rate": 4.451272815336758e-06, - "loss": 0.876, + "learning_rate": 4.477250386498582e-06, + "loss": 0.7406, "step": 24549 }, { - "epoch": 0.6966515323496028, + "epoch": 0.6956842075434271, "grad_norm": 0.0, - "learning_rate": 4.450508225326987e-06, - "loss": 0.8973, + "learning_rate": 4.476485283705356e-06, + "loss": 0.8248, "step": 24550 }, { - "epoch": 0.6966799091940976, + "epoch": 0.6957125449856896, "grad_norm": 0.0, - "learning_rate": 4.4497436821940195e-06, - "loss": 0.7611, + "learning_rate": 4.475720227438745e-06, + "loss": 0.7753, "step": 24551 }, { - "epoch": 0.6967082860385925, + "epoch": 0.6957408824279521, "grad_norm": 0.0, - "learning_rate": 4.448979185944317e-06, - "loss": 0.7723, + "learning_rate": 4.474955217705196e-06, + "loss": 0.9244, "step": 24552 }, { - "epoch": 0.6967366628830874, + "epoch": 0.6957692198702146, "grad_norm": 0.0, - "learning_rate": 4.448214736584339e-06, - "loss": 0.7502, + "learning_rate": 4.4741902545111495e-06, + "loss": 0.8272, "step": 24553 }, { - "epoch": 0.6967650397275823, + "epoch": 0.6957975573124769, "grad_norm": 0.0, - "learning_rate": 4.4474503341205385e-06, - "loss": 0.9936, + "learning_rate": 4.473425337863055e-06, + "loss": 0.7881, "step": 24554 }, { - "epoch": 0.6967934165720772, + "epoch": 0.6958258947547394, "grad_norm": 0.0, - "learning_rate": 4.446685978559375e-06, - "loss": 0.8228, + "learning_rate": 4.472660467767346e-06, + "loss": 0.8138, "step": 24555 }, { - "epoch": 0.696821793416572, + "epoch": 0.6958542321970019, "grad_norm": 0.0, - "learning_rate": 4.445921669907303e-06, - "loss": 0.8339, + "learning_rate": 4.471895644230475e-06, + "loss": 0.8782, "step": 24556 }, { - "epoch": 0.696850170261067, + "epoch": 0.6958825696392643, "grad_norm": 0.0, - "learning_rate": 4.445157408170781e-06, - "loss": 0.8812, + "learning_rate": 4.471130867258876e-06, + "loss": 0.8637, "step": 24557 }, { - "epoch": 0.6968785471055619, + "epoch": 0.6959109070815268, "grad_norm": 0.0, - "learning_rate": 4.444393193356269e-06, - "loss": 0.8986, + "learning_rate": 4.470366136858994e-06, + "loss": 0.7236, "step": 24558 }, { - "epoch": 0.6969069239500567, + "epoch": 0.6959392445237893, "grad_norm": 0.0, - "learning_rate": 4.443629025470213e-06, - "loss": 0.8216, + "learning_rate": 4.469601453037277e-06, + "loss": 0.7946, "step": 24559 }, { - "epoch": 0.6969353007945517, + "epoch": 0.6959675819660518, "grad_norm": 0.0, - "learning_rate": 4.442864904519072e-06, - "loss": 0.8317, + "learning_rate": 4.468836815800155e-06, + "loss": 0.6792, "step": 24560 }, { - "epoch": 0.6969636776390465, + "epoch": 0.6959959194083142, "grad_norm": 0.0, - "learning_rate": 4.442100830509305e-06, - "loss": 0.9721, + "learning_rate": 4.468072225154075e-06, + "loss": 0.7965, "step": 24561 }, { - "epoch": 0.6969920544835414, + "epoch": 0.6960242568505767, "grad_norm": 0.0, - "learning_rate": 4.441336803447358e-06, - "loss": 0.8317, + "learning_rate": 4.467307681105481e-06, + "loss": 0.8366, "step": 24562 }, { - "epoch": 0.6970204313280364, + "epoch": 0.6960525942928392, "grad_norm": 0.0, - "learning_rate": 4.440572823339689e-06, - "loss": 0.9218, + "learning_rate": 4.466543183660805e-06, + "loss": 0.8383, "step": 24563 }, { - "epoch": 0.6970488081725312, + "epoch": 0.6960809317351015, "grad_norm": 0.0, - "learning_rate": 4.439808890192755e-06, - "loss": 0.8501, + "learning_rate": 4.46577873282649e-06, + "loss": 0.8536, "step": 24564 }, { - "epoch": 0.6970771850170261, + "epoch": 0.696109269177364, "grad_norm": 0.0, - "learning_rate": 4.4390450040130015e-06, - "loss": 0.757, + "learning_rate": 4.465014328608977e-06, + "loss": 0.8724, "step": 24565 }, { - "epoch": 0.697105561861521, + "epoch": 0.6961376066196265, "grad_norm": 0.0, - "learning_rate": 4.438281164806885e-06, - "loss": 0.8777, + "learning_rate": 4.4642499710147025e-06, + "loss": 0.8051, "step": 24566 }, { - "epoch": 0.6971339387060159, + "epoch": 0.6961659440618889, "grad_norm": 0.0, - "learning_rate": 4.4375173725808615e-06, - "loss": 0.8314, + "learning_rate": 4.463485660050111e-06, + "loss": 0.7919, "step": 24567 }, { - "epoch": 0.6971623155505108, + "epoch": 0.6961942815041514, "grad_norm": 0.0, - "learning_rate": 4.436753627341375e-06, - "loss": 0.8046, + "learning_rate": 4.462721395721631e-06, + "loss": 0.8541, "step": 24568 }, { - "epoch": 0.6971906923950056, + "epoch": 0.6962226189464139, "grad_norm": 0.0, - "learning_rate": 4.43598992909488e-06, - "loss": 0.6321, + "learning_rate": 4.461957178035705e-06, + "loss": 0.9417, "step": 24569 }, { - "epoch": 0.6972190692395006, + "epoch": 0.6962509563886764, "grad_norm": 0.0, - "learning_rate": 4.435226277847828e-06, - "loss": 0.7631, + "learning_rate": 4.4611930069987755e-06, + "loss": 0.8119, "step": 24570 }, { - "epoch": 0.6972474460839955, + "epoch": 0.6962792938309388, "grad_norm": 0.0, - "learning_rate": 4.434462673606671e-06, - "loss": 0.9323, + "learning_rate": 4.4604288826172685e-06, + "loss": 0.8275, "step": 24571 }, { - "epoch": 0.6972758229284903, + "epoch": 0.6963076312732013, "grad_norm": 0.0, - "learning_rate": 4.433699116377861e-06, - "loss": 0.8151, + "learning_rate": 4.459664804897631e-06, + "loss": 0.7439, "step": 24572 }, { - "epoch": 0.6973041997729852, + "epoch": 0.6963359687154638, "grad_norm": 0.0, - "learning_rate": 4.432935606167842e-06, - "loss": 0.8422, + "learning_rate": 4.45890077384629e-06, + "loss": 0.8039, "step": 24573 }, { - "epoch": 0.6973325766174802, + "epoch": 0.6963643061577262, "grad_norm": 0.0, - "learning_rate": 4.432172142983065e-06, - "loss": 0.8207, + "learning_rate": 4.458136789469685e-06, + "loss": 0.8746, "step": 24574 }, { - "epoch": 0.697360953461975, + "epoch": 0.6963926435999886, "grad_norm": 0.0, - "learning_rate": 4.431408726829984e-06, - "loss": 0.9038, + "learning_rate": 4.457372851774256e-06, + "loss": 0.8275, "step": 24575 }, { - "epoch": 0.6973893303064699, + "epoch": 0.6964209810422511, "grad_norm": 0.0, - "learning_rate": 4.430645357715041e-06, - "loss": 0.9134, + "learning_rate": 4.456608960766429e-06, + "loss": 0.8202, "step": 24576 }, { - "epoch": 0.6974177071509648, + "epoch": 0.6964493184845136, "grad_norm": 0.0, - "learning_rate": 4.429882035644686e-06, - "loss": 0.8374, + "learning_rate": 4.455845116452644e-06, + "loss": 0.8445, "step": 24577 }, { - "epoch": 0.6974460839954597, + "epoch": 0.696477655926776, "grad_norm": 0.0, - "learning_rate": 4.429118760625373e-06, - "loss": 0.8154, + "learning_rate": 4.455081318839335e-06, + "loss": 0.9229, "step": 24578 }, { - "epoch": 0.6974744608399546, + "epoch": 0.6965059933690385, "grad_norm": 0.0, - "learning_rate": 4.428355532663538e-06, - "loss": 0.8968, + "learning_rate": 4.4543175679329345e-06, + "loss": 0.8429, "step": 24579 }, { - "epoch": 0.6975028376844495, + "epoch": 0.696534330811301, "grad_norm": 0.0, - "learning_rate": 4.427592351765637e-06, - "loss": 0.8948, + "learning_rate": 4.453553863739879e-06, + "loss": 0.8931, "step": 24580 }, { - "epoch": 0.6975312145289444, + "epoch": 0.6965626682535634, "grad_norm": 0.0, - "learning_rate": 4.426829217938117e-06, - "loss": 0.6896, + "learning_rate": 4.452790206266597e-06, + "loss": 0.7299, "step": 24581 }, { - "epoch": 0.6975595913734393, + "epoch": 0.6965910056958259, "grad_norm": 0.0, - "learning_rate": 4.426066131187416e-06, - "loss": 0.7888, + "learning_rate": 4.452026595519522e-06, + "loss": 0.7606, "step": 24582 }, { - "epoch": 0.6975879682179341, + "epoch": 0.6966193431380884, "grad_norm": 0.0, - "learning_rate": 4.425303091519986e-06, - "loss": 0.9174, + "learning_rate": 4.451263031505091e-06, + "loss": 0.7391, "step": 24583 }, { - "epoch": 0.6976163450624291, + "epoch": 0.6966476805803509, "grad_norm": 0.0, - "learning_rate": 4.424540098942275e-06, - "loss": 0.8548, + "learning_rate": 4.4504995142297294e-06, + "loss": 0.8667, "step": 24584 }, { - "epoch": 0.697644721906924, + "epoch": 0.6966760180226133, "grad_norm": 0.0, - "learning_rate": 4.423777153460717e-06, - "loss": 0.8335, + "learning_rate": 4.449736043699872e-06, + "loss": 0.8023, "step": 24585 }, { - "epoch": 0.6976730987514188, + "epoch": 0.6967043554648757, "grad_norm": 0.0, - "learning_rate": 4.423014255081771e-06, - "loss": 0.7575, + "learning_rate": 4.448972619921949e-06, + "loss": 0.8112, "step": 24586 }, { - "epoch": 0.6977014755959138, + "epoch": 0.6967326929071382, "grad_norm": 0.0, - "learning_rate": 4.422251403811872e-06, - "loss": 0.9205, + "learning_rate": 4.4482092429023886e-06, + "loss": 0.7942, "step": 24587 }, { - "epoch": 0.6977298524404086, + "epoch": 0.6967610303494006, "grad_norm": 0.0, - "learning_rate": 4.421488599657464e-06, - "loss": 0.7898, + "learning_rate": 4.447445912647623e-06, + "loss": 0.7939, "step": 24588 }, { - "epoch": 0.6977582292849035, + "epoch": 0.6967893677916631, "grad_norm": 0.0, - "learning_rate": 4.420725842624998e-06, - "loss": 0.896, + "learning_rate": 4.446682629164088e-06, + "loss": 0.9494, "step": 24589 }, { - "epoch": 0.6977866061293984, + "epoch": 0.6968177052339256, "grad_norm": 0.0, - "learning_rate": 4.419963132720907e-06, - "loss": 0.8784, + "learning_rate": 4.445919392458203e-06, + "loss": 0.9299, "step": 24590 }, { - "epoch": 0.6978149829738933, + "epoch": 0.696846042676188, "grad_norm": 0.0, - "learning_rate": 4.419200469951639e-06, - "loss": 0.7543, + "learning_rate": 4.4451562025364e-06, + "loss": 0.8042, "step": 24591 }, { - "epoch": 0.6978433598183882, + "epoch": 0.6968743801184505, "grad_norm": 0.0, - "learning_rate": 4.418437854323638e-06, - "loss": 0.8211, + "learning_rate": 4.44439305940511e-06, + "loss": 0.818, "step": 24592 }, { - "epoch": 0.697871736662883, + "epoch": 0.696902717560713, "grad_norm": 0.0, - "learning_rate": 4.41767528584334e-06, - "loss": 0.8828, + "learning_rate": 4.443629963070765e-06, + "loss": 0.9069, "step": 24593 }, { - "epoch": 0.697900113507378, + "epoch": 0.6969310550029755, "grad_norm": 0.0, - "learning_rate": 4.416912764517191e-06, - "loss": 0.9041, + "learning_rate": 4.442866913539783e-06, + "loss": 0.8891, "step": 24594 }, { - "epoch": 0.6979284903518729, + "epoch": 0.6969593924452379, "grad_norm": 0.0, - "learning_rate": 4.416150290351634e-06, - "loss": 0.7702, + "learning_rate": 4.442103910818597e-06, + "loss": 0.8502, "step": 24595 }, { - "epoch": 0.6979568671963677, + "epoch": 0.6969877298875004, "grad_norm": 0.0, - "learning_rate": 4.4153878633531024e-06, - "loss": 0.7717, + "learning_rate": 4.441340954913636e-06, + "loss": 0.8652, "step": 24596 }, { - "epoch": 0.6979852440408627, + "epoch": 0.6970160673297628, "grad_norm": 0.0, - "learning_rate": 4.4146254835280415e-06, - "loss": 0.8586, + "learning_rate": 4.440578045831323e-06, + "loss": 0.9218, "step": 24597 }, { - "epoch": 0.6980136208853576, + "epoch": 0.6970444047720252, "grad_norm": 0.0, - "learning_rate": 4.413863150882892e-06, - "loss": 0.7728, + "learning_rate": 4.439815183578085e-06, + "loss": 0.8043, "step": 24598 }, { - "epoch": 0.6980419977298524, + "epoch": 0.6970727422142877, "grad_norm": 0.0, - "learning_rate": 4.413100865424089e-06, - "loss": 0.912, + "learning_rate": 4.439052368160351e-06, + "loss": 0.7767, "step": 24599 }, { - "epoch": 0.6980703745743473, + "epoch": 0.6971010796565502, "grad_norm": 0.0, - "learning_rate": 4.412338627158075e-06, - "loss": 0.728, + "learning_rate": 4.438289599584541e-06, + "loss": 0.8297, "step": 24600 }, { - "epoch": 0.6980987514188423, + "epoch": 0.6971294170988127, "grad_norm": 0.0, - "learning_rate": 4.411576436091286e-06, - "loss": 0.7742, + "learning_rate": 4.437526877857083e-06, + "loss": 0.8363, "step": 24601 }, { - "epoch": 0.6981271282633371, + "epoch": 0.6971577545410751, "grad_norm": 0.0, - "learning_rate": 4.410814292230163e-06, - "loss": 0.872, + "learning_rate": 4.436764202984401e-06, + "loss": 0.7273, "step": 24602 }, { - "epoch": 0.698155505107832, + "epoch": 0.6971860919833376, "grad_norm": 0.0, - "learning_rate": 4.410052195581144e-06, - "loss": 0.8421, + "learning_rate": 4.436001574972921e-06, + "loss": 0.7772, "step": 24603 }, { - "epoch": 0.698183881952327, + "epoch": 0.6972144294256001, "grad_norm": 0.0, - "learning_rate": 4.409290146150668e-06, - "loss": 0.7837, + "learning_rate": 4.43523899382907e-06, + "loss": 0.8403, "step": 24604 }, { - "epoch": 0.6982122587968218, + "epoch": 0.6972427668678625, "grad_norm": 0.0, - "learning_rate": 4.4085281439451655e-06, - "loss": 0.816, + "learning_rate": 4.434476459559262e-06, + "loss": 0.7233, "step": 24605 }, { - "epoch": 0.6982406356413167, + "epoch": 0.697271104310125, "grad_norm": 0.0, - "learning_rate": 4.407766188971081e-06, - "loss": 0.8428, + "learning_rate": 4.43371397216993e-06, + "loss": 0.8792, "step": 24606 }, { - "epoch": 0.6982690124858115, + "epoch": 0.6972994417523874, "grad_norm": 0.0, - "learning_rate": 4.407004281234843e-06, - "loss": 0.7712, + "learning_rate": 4.4329515316674884e-06, + "loss": 0.7743, "step": 24607 }, { - "epoch": 0.6982973893303065, + "epoch": 0.6973277791946499, "grad_norm": 0.0, - "learning_rate": 4.406242420742892e-06, - "loss": 0.8063, + "learning_rate": 4.432189138058364e-06, + "loss": 0.8066, "step": 24608 }, { - "epoch": 0.6983257661748014, + "epoch": 0.6973561166369123, "grad_norm": 0.0, - "learning_rate": 4.405480607501666e-06, - "loss": 0.9367, + "learning_rate": 4.431426791348981e-06, + "loss": 0.7999, "step": 24609 }, { - "epoch": 0.6983541430192962, + "epoch": 0.6973844540791748, "grad_norm": 0.0, - "learning_rate": 4.4047188415175935e-06, - "loss": 0.8784, + "learning_rate": 4.430664491545754e-06, + "loss": 0.9255, "step": 24610 }, { - "epoch": 0.6983825198637912, + "epoch": 0.6974127915214373, "grad_norm": 0.0, - "learning_rate": 4.403957122797111e-06, - "loss": 0.8784, + "learning_rate": 4.429902238655108e-06, + "loss": 0.8956, "step": 24611 }, { - "epoch": 0.698410896708286, + "epoch": 0.6974411289636997, "grad_norm": 0.0, - "learning_rate": 4.40319545134666e-06, - "loss": 0.8726, + "learning_rate": 4.429140032683469e-06, + "loss": 0.8758, "step": 24612 }, { - "epoch": 0.6984392735527809, + "epoch": 0.6974694664059622, "grad_norm": 0.0, - "learning_rate": 4.402433827172664e-06, - "loss": 0.8826, + "learning_rate": 4.428377873637247e-06, + "loss": 0.8797, "step": 24613 }, { - "epoch": 0.6984676503972759, + "epoch": 0.6974978038482247, "grad_norm": 0.0, - "learning_rate": 4.401672250281561e-06, - "loss": 0.8644, + "learning_rate": 4.427615761522868e-06, + "loss": 0.8062, "step": 24614 }, { - "epoch": 0.6984960272417707, + "epoch": 0.6975261412904871, "grad_norm": 0.0, - "learning_rate": 4.4009107206797876e-06, - "loss": 0.9023, + "learning_rate": 4.42685369634675e-06, + "loss": 0.8589, "step": 24615 }, { - "epoch": 0.6985244040862656, + "epoch": 0.6975544787327496, "grad_norm": 0.0, - "learning_rate": 4.4001492383737665e-06, - "loss": 0.9016, + "learning_rate": 4.426091678115313e-06, + "loss": 0.7282, "step": 24616 }, { - "epoch": 0.6985527809307605, + "epoch": 0.697582816175012, "grad_norm": 0.0, - "learning_rate": 4.399387803369939e-06, - "loss": 0.8348, + "learning_rate": 4.42532970683498e-06, + "loss": 0.8515, "step": 24617 }, { - "epoch": 0.6985811577752554, + "epoch": 0.6976111536172745, "grad_norm": 0.0, - "learning_rate": 4.398626415674739e-06, - "loss": 0.8391, + "learning_rate": 4.42456778251216e-06, + "loss": 0.7915, "step": 24618 }, { - "epoch": 0.6986095346197503, + "epoch": 0.6976394910595369, "grad_norm": 0.0, - "learning_rate": 4.39786507529459e-06, - "loss": 0.8717, + "learning_rate": 4.423805905153278e-06, + "loss": 0.8488, "step": 24619 }, { - "epoch": 0.6986379114642451, + "epoch": 0.6976678285017994, "grad_norm": 0.0, - "learning_rate": 4.397103782235925e-06, - "loss": 0.9046, + "learning_rate": 4.423044074764752e-06, + "loss": 0.8187, "step": 24620 }, { - "epoch": 0.6986662883087401, + "epoch": 0.6976961659440619, "grad_norm": 0.0, - "learning_rate": 4.396342536505181e-06, - "loss": 0.8119, + "learning_rate": 4.422282291352993e-06, + "loss": 0.8026, "step": 24621 }, { - "epoch": 0.698694665153235, + "epoch": 0.6977245033863243, "grad_norm": 0.0, - "learning_rate": 4.395581338108781e-06, - "loss": 0.8915, + "learning_rate": 4.421520554924424e-06, + "loss": 0.8553, "step": 24622 }, { - "epoch": 0.6987230419977298, + "epoch": 0.6977528408285868, "grad_norm": 0.0, - "learning_rate": 4.394820187053156e-06, - "loss": 0.7803, + "learning_rate": 4.420758865485457e-06, + "loss": 0.7931, "step": 24623 }, { - "epoch": 0.6987514188422247, + "epoch": 0.6977811782708493, "grad_norm": 0.0, - "learning_rate": 4.394059083344741e-06, - "loss": 0.874, + "learning_rate": 4.419997223042509e-06, + "loss": 0.8494, "step": 24624 }, { - "epoch": 0.6987797956867197, + "epoch": 0.6978095157131118, "grad_norm": 0.0, - "learning_rate": 4.393298026989957e-06, - "loss": 0.8001, + "learning_rate": 4.419235627602001e-06, + "loss": 0.8155, "step": 24625 }, { - "epoch": 0.6988081725312145, + "epoch": 0.6978378531553742, "grad_norm": 0.0, - "learning_rate": 4.392537017995236e-06, - "loss": 0.8395, + "learning_rate": 4.41847407917034e-06, + "loss": 0.7861, "step": 24626 }, { - "epoch": 0.6988365493757094, + "epoch": 0.6978661905976367, "grad_norm": 0.0, - "learning_rate": 4.391776056367012e-06, - "loss": 0.8581, + "learning_rate": 4.4177125777539435e-06, + "loss": 0.7644, "step": 24627 }, { - "epoch": 0.6988649262202044, + "epoch": 0.6978945280398992, "grad_norm": 0.0, - "learning_rate": 4.391015142111703e-06, - "loss": 0.6972, + "learning_rate": 4.416951123359227e-06, + "loss": 0.7575, "step": 24628 }, { - "epoch": 0.6988933030646992, + "epoch": 0.6979228654821615, "grad_norm": 0.0, - "learning_rate": 4.3902542752357415e-06, - "loss": 0.7326, + "learning_rate": 4.416189715992605e-06, + "loss": 0.8683, "step": 24629 }, { - "epoch": 0.6989216799091941, + "epoch": 0.697951202924424, "grad_norm": 0.0, - "learning_rate": 4.389493455745558e-06, - "loss": 0.8339, + "learning_rate": 4.415428355660494e-06, + "loss": 0.8049, "step": 24630 }, { - "epoch": 0.698950056753689, + "epoch": 0.6979795403666865, "grad_norm": 0.0, - "learning_rate": 4.388732683647569e-06, - "loss": 0.8596, + "learning_rate": 4.4146670423692995e-06, + "loss": 0.8425, "step": 24631 }, { - "epoch": 0.6989784335981839, + "epoch": 0.698007877808949, "grad_norm": 0.0, - "learning_rate": 4.387971958948213e-06, - "loss": 0.9109, + "learning_rate": 4.413905776125439e-06, + "loss": 0.9549, "step": 24632 }, { - "epoch": 0.6990068104426788, + "epoch": 0.6980362152512114, "grad_norm": 0.0, - "learning_rate": 4.387211281653907e-06, - "loss": 0.8448, + "learning_rate": 4.413144556935328e-06, + "loss": 0.8247, "step": 24633 }, { - "epoch": 0.6990351872871736, + "epoch": 0.6980645526934739, "grad_norm": 0.0, - "learning_rate": 4.3864506517710804e-06, - "loss": 0.8313, + "learning_rate": 4.412383384805372e-06, + "loss": 0.7675, "step": 24634 }, { - "epoch": 0.6990635641316686, + "epoch": 0.6980928901357364, "grad_norm": 0.0, - "learning_rate": 4.38569006930616e-06, - "loss": 0.753, + "learning_rate": 4.411622259741989e-06, + "loss": 0.8303, "step": 24635 }, { - "epoch": 0.6990919409761635, + "epoch": 0.6981212275779988, "grad_norm": 0.0, - "learning_rate": 4.384929534265565e-06, - "loss": 0.7812, + "learning_rate": 4.4108611817515834e-06, + "loss": 0.81, "step": 24636 }, { - "epoch": 0.6991203178206583, + "epoch": 0.6981495650202613, "grad_norm": 0.0, - "learning_rate": 4.384169046655723e-06, - "loss": 0.8305, + "learning_rate": 4.4101001508405695e-06, + "loss": 0.7238, "step": 24637 }, { - "epoch": 0.6991486946651533, + "epoch": 0.6981779024625238, "grad_norm": 0.0, - "learning_rate": 4.383408606483061e-06, - "loss": 0.8398, + "learning_rate": 4.409339167015361e-06, + "loss": 0.8193, "step": 24638 }, { - "epoch": 0.6991770715096481, + "epoch": 0.6982062399047861, "grad_norm": 0.0, - "learning_rate": 4.382648213753994e-06, - "loss": 0.8907, + "learning_rate": 4.408578230282361e-06, + "loss": 0.9564, "step": 24639 }, { - "epoch": 0.699205448354143, + "epoch": 0.6982345773470486, "grad_norm": 0.0, - "learning_rate": 4.381887868474951e-06, - "loss": 0.8146, + "learning_rate": 4.407817340647983e-06, + "loss": 0.9365, "step": 24640 }, { - "epoch": 0.6992338251986379, + "epoch": 0.6982629147893111, "grad_norm": 0.0, - "learning_rate": 4.381127570652358e-06, - "loss": 0.8505, + "learning_rate": 4.4070564981186355e-06, + "loss": 0.8884, "step": 24641 }, { - "epoch": 0.6992622020431328, + "epoch": 0.6982912522315736, "grad_norm": 0.0, - "learning_rate": 4.380367320292629e-06, - "loss": 0.8612, + "learning_rate": 4.40629570270073e-06, + "loss": 0.7447, "step": 24642 }, { - "epoch": 0.6992905788876277, + "epoch": 0.698319589673836, "grad_norm": 0.0, - "learning_rate": 4.379607117402189e-06, - "loss": 0.7893, + "learning_rate": 4.405534954400675e-06, + "loss": 0.8044, "step": 24643 }, { - "epoch": 0.6993189557321225, + "epoch": 0.6983479271160985, "grad_norm": 0.0, - "learning_rate": 4.378846961987465e-06, - "loss": 0.796, + "learning_rate": 4.404774253224874e-06, + "loss": 0.7544, "step": 24644 }, { - "epoch": 0.6993473325766175, + "epoch": 0.698376264558361, "grad_norm": 0.0, - "learning_rate": 4.3780868540548695e-06, - "loss": 0.7549, + "learning_rate": 4.4040135991797366e-06, + "loss": 0.8387, "step": 24645 }, { - "epoch": 0.6993757094211124, + "epoch": 0.6984046020006234, "grad_norm": 0.0, - "learning_rate": 4.3773267936108265e-06, - "loss": 0.9508, + "learning_rate": 4.4032529922716735e-06, + "loss": 0.7936, "step": 24646 }, { - "epoch": 0.6994040862656072, + "epoch": 0.6984329394428859, "grad_norm": 0.0, - "learning_rate": 4.376566780661757e-06, - "loss": 0.7754, + "learning_rate": 4.402492432507086e-06, + "loss": 0.7814, "step": 24647 }, { - "epoch": 0.6994324631101022, + "epoch": 0.6984612768851484, "grad_norm": 0.0, - "learning_rate": 4.375806815214082e-06, - "loss": 0.8644, + "learning_rate": 4.401731919892384e-06, + "loss": 0.813, "step": 24648 }, { - "epoch": 0.6994608399545971, + "epoch": 0.6984896143274109, "grad_norm": 0.0, - "learning_rate": 4.3750468972742225e-06, - "loss": 0.7809, + "learning_rate": 4.4009714544339755e-06, + "loss": 0.792, "step": 24649 }, { - "epoch": 0.6994892167990919, + "epoch": 0.6985179517696732, "grad_norm": 0.0, - "learning_rate": 4.3742870268485906e-06, - "loss": 0.8932, + "learning_rate": 4.40021103613826e-06, + "loss": 0.8001, "step": 24650 }, { - "epoch": 0.6995175936435868, + "epoch": 0.6985462892119357, "grad_norm": 0.0, - "learning_rate": 4.373527203943609e-06, - "loss": 0.9189, + "learning_rate": 4.39945066501165e-06, + "loss": 0.8591, "step": 24651 }, { - "epoch": 0.6995459704880818, + "epoch": 0.6985746266541982, "grad_norm": 0.0, - "learning_rate": 4.372767428565701e-06, - "loss": 0.7742, + "learning_rate": 4.398690341060543e-06, + "loss": 0.8873, "step": 24652 }, { - "epoch": 0.6995743473325766, + "epoch": 0.6986029640964606, "grad_norm": 0.0, - "learning_rate": 4.372007700721275e-06, - "loss": 0.8286, + "learning_rate": 4.397930064291348e-06, + "loss": 0.8932, "step": 24653 }, { - "epoch": 0.6996027241770715, + "epoch": 0.6986313015387231, "grad_norm": 0.0, - "learning_rate": 4.371248020416752e-06, - "loss": 0.8757, + "learning_rate": 4.397169834710467e-06, + "loss": 0.7339, "step": 24654 }, { - "epoch": 0.6996311010215664, + "epoch": 0.6986596389809856, "grad_norm": 0.0, - "learning_rate": 4.370488387658555e-06, - "loss": 0.8627, + "learning_rate": 4.396409652324306e-06, + "loss": 0.8646, "step": 24655 }, { - "epoch": 0.6996594778660613, + "epoch": 0.6986879764232481, "grad_norm": 0.0, - "learning_rate": 4.369728802453091e-06, - "loss": 0.8186, + "learning_rate": 4.395649517139267e-06, + "loss": 0.8601, "step": 24656 }, { - "epoch": 0.6996878547105562, + "epoch": 0.6987163138655105, "grad_norm": 0.0, - "learning_rate": 4.368969264806781e-06, - "loss": 0.7877, + "learning_rate": 4.3948894291617584e-06, + "loss": 0.7379, "step": 24657 }, { - "epoch": 0.699716231555051, + "epoch": 0.698744651307773, "grad_norm": 0.0, - "learning_rate": 4.368209774726045e-06, - "loss": 0.9369, + "learning_rate": 4.394129388398172e-06, + "loss": 0.7652, "step": 24658 }, { - "epoch": 0.699744608399546, + "epoch": 0.6987729887500355, "grad_norm": 0.0, - "learning_rate": 4.367450332217291e-06, - "loss": 0.7722, + "learning_rate": 4.39336939485492e-06, + "loss": 0.7471, "step": 24659 }, { - "epoch": 0.6997729852440409, + "epoch": 0.6988013261922978, "grad_norm": 0.0, - "learning_rate": 4.366690937286936e-06, - "loss": 0.8128, + "learning_rate": 4.392609448538395e-06, + "loss": 0.8861, "step": 24660 }, { - "epoch": 0.6998013620885357, + "epoch": 0.6988296636345603, "grad_norm": 0.0, - "learning_rate": 4.3659315899414e-06, - "loss": 0.9592, + "learning_rate": 4.391849549455004e-06, + "loss": 0.8224, "step": 24661 }, { - "epoch": 0.6998297389330307, + "epoch": 0.6988580010768228, "grad_norm": 0.0, - "learning_rate": 4.365172290187086e-06, - "loss": 0.9352, + "learning_rate": 4.391089697611151e-06, + "loss": 0.8033, "step": 24662 }, { - "epoch": 0.6998581157775255, + "epoch": 0.6988863385190852, "grad_norm": 0.0, - "learning_rate": 4.364413038030423e-06, - "loss": 0.842, + "learning_rate": 4.3903298930132265e-06, + "loss": 0.7438, "step": 24663 }, { - "epoch": 0.6998864926220204, + "epoch": 0.6989146759613477, "grad_norm": 0.0, - "learning_rate": 4.363653833477812e-06, - "loss": 0.8501, + "learning_rate": 4.389570135667642e-06, + "loss": 0.8491, "step": 24664 }, { - "epoch": 0.6999148694665154, + "epoch": 0.6989430134036102, "grad_norm": 0.0, - "learning_rate": 4.36289467653567e-06, - "loss": 0.8722, + "learning_rate": 4.388810425580786e-06, + "loss": 0.8322, "step": 24665 }, { - "epoch": 0.6999432463110102, + "epoch": 0.6989713508458727, "grad_norm": 0.0, - "learning_rate": 4.362135567210415e-06, - "loss": 0.9048, + "learning_rate": 4.3880507627590655e-06, + "loss": 0.8118, "step": 24666 }, { - "epoch": 0.6999716231555051, + "epoch": 0.6989996882881351, "grad_norm": 0.0, - "learning_rate": 4.3613765055084485e-06, - "loss": 0.8504, + "learning_rate": 4.387291147208876e-06, + "loss": 0.8397, "step": 24667 }, { - "epoch": 0.7, + "epoch": 0.6990280257303976, "grad_norm": 0.0, - "learning_rate": 4.36061749143619e-06, - "loss": 0.7967, + "learning_rate": 4.386531578936618e-06, + "loss": 0.7633, "step": 24668 }, { - "epoch": 0.7000283768444949, + "epoch": 0.6990563631726601, "grad_norm": 0.0, - "learning_rate": 4.359858525000052e-06, - "loss": 0.8144, + "learning_rate": 4.38577205794869e-06, + "loss": 0.7586, "step": 24669 }, { - "epoch": 0.7000567536889898, + "epoch": 0.6990847006149224, "grad_norm": 0.0, - "learning_rate": 4.35909960620644e-06, - "loss": 0.8864, + "learning_rate": 4.38501258425149e-06, + "loss": 0.8877, "step": 24670 }, { - "epoch": 0.7000851305334846, + "epoch": 0.6991130380571849, "grad_norm": 0.0, - "learning_rate": 4.358340735061766e-06, - "loss": 0.8333, + "learning_rate": 4.384253157851413e-06, + "loss": 0.9002, "step": 24671 }, { - "epoch": 0.7001135073779796, + "epoch": 0.6991413754994474, "grad_norm": 0.0, - "learning_rate": 4.357581911572445e-06, - "loss": 0.7327, + "learning_rate": 4.3834937787548585e-06, + "loss": 0.7793, "step": 24672 }, { - "epoch": 0.7001418842224745, + "epoch": 0.6991697129417099, "grad_norm": 0.0, - "learning_rate": 4.35682313574488e-06, - "loss": 0.9391, + "learning_rate": 4.382734446968219e-06, + "loss": 0.9, "step": 24673 }, { - "epoch": 0.7001702610669693, + "epoch": 0.6991980503839723, "grad_norm": 0.0, - "learning_rate": 4.356064407585484e-06, - "loss": 0.8226, + "learning_rate": 4.381975162497892e-06, + "loss": 0.7506, "step": 24674 }, { - "epoch": 0.7001986379114642, + "epoch": 0.6992263878262348, "grad_norm": 0.0, - "learning_rate": 4.355305727100668e-06, - "loss": 0.961, + "learning_rate": 4.381215925350279e-06, + "loss": 0.8496, "step": 24675 }, { - "epoch": 0.7002270147559592, + "epoch": 0.6992547252684973, "grad_norm": 0.0, - "learning_rate": 4.354547094296836e-06, - "loss": 0.7304, + "learning_rate": 4.380456735531767e-06, + "loss": 0.7908, "step": 24676 }, { - "epoch": 0.700255391600454, + "epoch": 0.6992830627107597, "grad_norm": 0.0, - "learning_rate": 4.353788509180398e-06, - "loss": 0.8344, + "learning_rate": 4.379697593048755e-06, + "loss": 0.8105, "step": 24677 }, { - "epoch": 0.7002837684449489, + "epoch": 0.6993114001530222, "grad_norm": 0.0, - "learning_rate": 4.353029971757762e-06, - "loss": 0.8344, + "learning_rate": 4.3789384979076414e-06, + "loss": 0.9423, "step": 24678 }, { - "epoch": 0.7003121452894439, + "epoch": 0.6993397375952847, "grad_norm": 0.0, - "learning_rate": 4.352271482035336e-06, - "loss": 0.9312, + "learning_rate": 4.3781794501148116e-06, + "loss": 0.878, "step": 24679 }, { - "epoch": 0.7003405221339387, + "epoch": 0.6993680750375472, "grad_norm": 0.0, - "learning_rate": 4.3515130400195305e-06, - "loss": 0.8436, + "learning_rate": 4.377420449676664e-06, + "loss": 0.8918, "step": 24680 }, { - "epoch": 0.7003688989784336, + "epoch": 0.6993964124798095, "grad_norm": 0.0, - "learning_rate": 4.3507546457167445e-06, - "loss": 0.746, + "learning_rate": 4.376661496599593e-06, + "loss": 0.8072, "step": 24681 }, { - "epoch": 0.7003972758229285, + "epoch": 0.699424749922072, "grad_norm": 0.0, - "learning_rate": 4.3499962991333874e-06, - "loss": 0.8048, + "learning_rate": 4.375902590889988e-06, + "loss": 0.7816, "step": 24682 }, { - "epoch": 0.7004256526674234, + "epoch": 0.6994530873643345, "grad_norm": 0.0, - "learning_rate": 4.349238000275871e-06, - "loss": 0.7961, + "learning_rate": 4.375143732554249e-06, + "loss": 0.8117, "step": 24683 }, { - "epoch": 0.7004540295119183, + "epoch": 0.6994814248065969, "grad_norm": 0.0, - "learning_rate": 4.348479749150589e-06, - "loss": 0.8039, + "learning_rate": 4.37438492159876e-06, + "loss": 0.8687, "step": 24684 }, { - "epoch": 0.7004824063564131, + "epoch": 0.6995097622488594, "grad_norm": 0.0, - "learning_rate": 4.347721545763955e-06, - "loss": 0.8082, + "learning_rate": 4.373626158029915e-06, + "loss": 0.8828, "step": 24685 }, { - "epoch": 0.7005107832009081, + "epoch": 0.6995380996911219, "grad_norm": 0.0, - "learning_rate": 4.346963390122373e-06, - "loss": 0.8329, + "learning_rate": 4.372867441854109e-06, + "loss": 0.8407, "step": 24686 }, { - "epoch": 0.700539160045403, + "epoch": 0.6995664371333843, "grad_norm": 0.0, - "learning_rate": 4.346205282232242e-06, - "loss": 0.733, + "learning_rate": 4.3721087730777275e-06, + "loss": 0.8846, "step": 24687 }, { - "epoch": 0.7005675368898978, + "epoch": 0.6995947745756468, "grad_norm": 0.0, - "learning_rate": 4.34544722209997e-06, - "loss": 0.8129, + "learning_rate": 4.371350151707168e-06, + "loss": 0.7708, "step": 24688 }, { - "epoch": 0.7005959137343928, + "epoch": 0.6996231120179093, "grad_norm": 0.0, - "learning_rate": 4.344689209731963e-06, - "loss": 0.8329, + "learning_rate": 4.370591577748811e-06, + "loss": 0.9166, "step": 24689 }, { - "epoch": 0.7006242905788876, + "epoch": 0.6996514494601718, "grad_norm": 0.0, - "learning_rate": 4.343931245134616e-06, - "loss": 0.8298, + "learning_rate": 4.3698330512090535e-06, + "loss": 0.775, "step": 24690 }, { - "epoch": 0.7006526674233825, + "epoch": 0.6996797869024342, "grad_norm": 0.0, - "learning_rate": 4.3431733283143365e-06, - "loss": 0.8414, + "learning_rate": 4.369074572094285e-06, + "loss": 0.9018, "step": 24691 }, { - "epoch": 0.7006810442678774, + "epoch": 0.6997081243446966, "grad_norm": 0.0, - "learning_rate": 4.34241545927753e-06, - "loss": 0.8267, + "learning_rate": 4.36831614041089e-06, + "loss": 0.8062, "step": 24692 }, { - "epoch": 0.7007094211123723, + "epoch": 0.6997364617869591, "grad_norm": 0.0, - "learning_rate": 4.341657638030587e-06, - "loss": 0.8172, + "learning_rate": 4.367557756165259e-06, + "loss": 0.868, "step": 24693 }, { - "epoch": 0.7007377979568672, + "epoch": 0.6997647992292215, "grad_norm": 0.0, - "learning_rate": 4.340899864579924e-06, - "loss": 0.8276, + "learning_rate": 4.3667994193637794e-06, + "loss": 0.8501, "step": 24694 }, { - "epoch": 0.700766174801362, + "epoch": 0.699793136671484, "grad_norm": 0.0, - "learning_rate": 4.34014213893193e-06, - "loss": 0.8629, + "learning_rate": 4.366041130012841e-06, + "loss": 0.9832, "step": 24695 }, { - "epoch": 0.700794551645857, + "epoch": 0.6998214741137465, "grad_norm": 0.0, - "learning_rate": 4.33938446109301e-06, - "loss": 0.7901, + "learning_rate": 4.365282888118834e-06, + "loss": 0.8693, "step": 24696 }, { - "epoch": 0.7008229284903519, + "epoch": 0.699849811556009, "grad_norm": 0.0, - "learning_rate": 4.3386268310695675e-06, - "loss": 0.8572, + "learning_rate": 4.364524693688138e-06, + "loss": 0.7939, "step": 24697 }, { - "epoch": 0.7008513053348467, + "epoch": 0.6998781489982714, "grad_norm": 0.0, - "learning_rate": 4.337869248867995e-06, - "loss": 0.8784, + "learning_rate": 4.363766546727143e-06, + "loss": 0.8161, "step": 24698 }, { - "epoch": 0.7008796821793417, + "epoch": 0.6999064864405339, "grad_norm": 0.0, - "learning_rate": 4.337111714494696e-06, - "loss": 0.8506, + "learning_rate": 4.36300844724224e-06, + "loss": 0.8661, "step": 24699 }, { - "epoch": 0.7009080590238366, + "epoch": 0.6999348238827964, "grad_norm": 0.0, - "learning_rate": 4.336354227956072e-06, - "loss": 0.8776, + "learning_rate": 4.362250395239805e-06, + "loss": 0.8652, "step": 24700 }, { - "epoch": 0.7009364358683314, + "epoch": 0.6999631613250588, "grad_norm": 0.0, - "learning_rate": 4.335596789258515e-06, - "loss": 0.8533, + "learning_rate": 4.361492390726233e-06, + "loss": 0.8326, "step": 24701 }, { - "epoch": 0.7009648127128263, + "epoch": 0.6999914987673213, "grad_norm": 0.0, - "learning_rate": 4.334839398408426e-06, - "loss": 0.96, + "learning_rate": 4.3607344337079e-06, + "loss": 0.906, "step": 24702 }, { - "epoch": 0.7009931895573213, + "epoch": 0.7000198362095837, "grad_norm": 0.0, - "learning_rate": 4.334082055412207e-06, - "loss": 0.9534, + "learning_rate": 4.359976524191195e-06, + "loss": 0.8272, "step": 24703 }, { - "epoch": 0.7010215664018161, + "epoch": 0.7000481736518462, "grad_norm": 0.0, - "learning_rate": 4.3333247602762485e-06, - "loss": 0.8842, + "learning_rate": 4.359218662182506e-06, + "loss": 0.7063, "step": 24704 }, { - "epoch": 0.701049943246311, + "epoch": 0.7000765110941086, "grad_norm": 0.0, - "learning_rate": 4.332567513006951e-06, - "loss": 0.8137, + "learning_rate": 4.35846084768821e-06, + "loss": 0.8027, "step": 24705 }, { - "epoch": 0.701078320090806, + "epoch": 0.7001048485363711, "grad_norm": 0.0, - "learning_rate": 4.331810313610713e-06, - "loss": 0.7428, + "learning_rate": 4.3577030807146925e-06, + "loss": 0.8402, "step": 24706 }, { - "epoch": 0.7011066969353008, + "epoch": 0.7001331859786336, "grad_norm": 0.0, - "learning_rate": 4.331053162093924e-06, - "loss": 0.8613, + "learning_rate": 4.356945361268337e-06, + "loss": 0.798, "step": 24707 }, { - "epoch": 0.7011350737797957, + "epoch": 0.700161523420896, "grad_norm": 0.0, - "learning_rate": 4.330296058462982e-06, - "loss": 0.8881, + "learning_rate": 4.3561876893555264e-06, + "loss": 0.8911, "step": 24708 }, { - "epoch": 0.7011634506242905, + "epoch": 0.7001898608631585, "grad_norm": 0.0, - "learning_rate": 4.32953900272429e-06, - "loss": 0.8657, + "learning_rate": 4.355430064982647e-06, + "loss": 0.8899, "step": 24709 }, { - "epoch": 0.7011918274687855, + "epoch": 0.700218198305421, "grad_norm": 0.0, - "learning_rate": 4.3287819948842334e-06, - "loss": 0.9183, + "learning_rate": 4.354672488156071e-06, + "loss": 0.8425, "step": 24710 }, { - "epoch": 0.7012202043132804, + "epoch": 0.7002465357476834, "grad_norm": 0.0, - "learning_rate": 4.328025034949211e-06, - "loss": 0.8133, + "learning_rate": 4.353914958882186e-06, + "loss": 0.7869, "step": 24711 }, { - "epoch": 0.7012485811577752, + "epoch": 0.7002748731899459, "grad_norm": 0.0, - "learning_rate": 4.32726812292562e-06, - "loss": 0.8626, + "learning_rate": 4.353157477167375e-06, + "loss": 0.8389, "step": 24712 }, { - "epoch": 0.7012769580022702, + "epoch": 0.7003032106322084, "grad_norm": 0.0, - "learning_rate": 4.326511258819846e-06, - "loss": 0.7636, + "learning_rate": 4.3524000430180125e-06, + "loss": 0.9022, "step": 24713 }, { - "epoch": 0.701305334846765, + "epoch": 0.7003315480744708, "grad_norm": 0.0, - "learning_rate": 4.325754442638289e-06, - "loss": 0.8256, + "learning_rate": 4.351642656440482e-06, + "loss": 0.9137, "step": 24714 }, { - "epoch": 0.7013337116912599, + "epoch": 0.7003598855167332, "grad_norm": 0.0, - "learning_rate": 4.324997674387337e-06, - "loss": 0.9475, + "learning_rate": 4.350885317441166e-06, + "loss": 0.8029, "step": 24715 }, { - "epoch": 0.7013620885357549, + "epoch": 0.7003882229589957, "grad_norm": 0.0, - "learning_rate": 4.324240954073383e-06, - "loss": 0.8175, + "learning_rate": 4.350128026026437e-06, + "loss": 0.7618, "step": 24716 }, { - "epoch": 0.7013904653802497, + "epoch": 0.7004165604012582, "grad_norm": 0.0, - "learning_rate": 4.323484281702827e-06, - "loss": 0.8831, + "learning_rate": 4.349370782202681e-06, + "loss": 0.8442, "step": 24717 }, { - "epoch": 0.7014188422247446, + "epoch": 0.7004448978435206, "grad_norm": 0.0, - "learning_rate": 4.322727657282048e-06, - "loss": 0.8914, + "learning_rate": 4.3486135859762705e-06, + "loss": 0.8896, "step": 24718 }, { - "epoch": 0.7014472190692395, + "epoch": 0.7004732352857831, "grad_norm": 0.0, - "learning_rate": 4.3219710808174465e-06, - "loss": 0.7773, + "learning_rate": 4.347856437353584e-06, + "loss": 0.7712, "step": 24719 }, { - "epoch": 0.7014755959137344, + "epoch": 0.7005015727280456, "grad_norm": 0.0, - "learning_rate": 4.321214552315413e-06, - "loss": 0.7747, + "learning_rate": 4.347099336341004e-06, + "loss": 0.8077, "step": 24720 }, { - "epoch": 0.7015039727582293, + "epoch": 0.7005299101703081, "grad_norm": 0.0, - "learning_rate": 4.320458071782331e-06, - "loss": 0.8088, + "learning_rate": 4.346342282944905e-06, + "loss": 0.8614, "step": 24721 }, { - "epoch": 0.7015323496027241, + "epoch": 0.7005582476125705, "grad_norm": 0.0, - "learning_rate": 4.319701639224596e-06, - "loss": 0.787, + "learning_rate": 4.3455852771716675e-06, + "loss": 0.725, "step": 24722 }, { - "epoch": 0.7015607264472191, + "epoch": 0.700586585054833, "grad_norm": 0.0, - "learning_rate": 4.3189452546486e-06, - "loss": 0.8452, + "learning_rate": 4.344828319027662e-06, + "loss": 0.8218, "step": 24723 }, { - "epoch": 0.701589103291714, + "epoch": 0.7006149224970954, "grad_norm": 0.0, - "learning_rate": 4.318188918060721e-06, - "loss": 0.754, + "learning_rate": 4.344071408519267e-06, + "loss": 0.8876, "step": 24724 }, { - "epoch": 0.7016174801362088, + "epoch": 0.7006432599393578, "grad_norm": 0.0, - "learning_rate": 4.31743262946736e-06, - "loss": 0.6851, + "learning_rate": 4.343314545652863e-06, + "loss": 0.8359, "step": 24725 }, { - "epoch": 0.7016458569807037, + "epoch": 0.7006715973816203, "grad_norm": 0.0, - "learning_rate": 4.316676388874904e-06, - "loss": 0.9111, + "learning_rate": 4.342557730434818e-06, + "loss": 0.8204, "step": 24726 }, { - "epoch": 0.7016742338251987, + "epoch": 0.7006999348238828, "grad_norm": 0.0, - "learning_rate": 4.315920196289735e-06, - "loss": 0.8405, + "learning_rate": 4.341800962871508e-06, + "loss": 0.7468, "step": 24727 }, { - "epoch": 0.7017026106696935, + "epoch": 0.7007282722661452, "grad_norm": 0.0, - "learning_rate": 4.315164051718243e-06, - "loss": 0.8559, + "learning_rate": 4.341044242969315e-06, + "loss": 0.7929, "step": 24728 }, { - "epoch": 0.7017309875141884, + "epoch": 0.7007566097084077, "grad_norm": 0.0, - "learning_rate": 4.3144079551668205e-06, - "loss": 0.844, + "learning_rate": 4.340287570734604e-06, + "loss": 0.894, "step": 24729 }, { - "epoch": 0.7017593643586834, + "epoch": 0.7007849471506702, "grad_norm": 0.0, - "learning_rate": 4.313651906641845e-06, - "loss": 0.8258, + "learning_rate": 4.339530946173754e-06, + "loss": 0.7406, "step": 24730 }, { - "epoch": 0.7017877412031782, + "epoch": 0.7008132845929327, "grad_norm": 0.0, - "learning_rate": 4.312895906149708e-06, - "loss": 0.7691, + "learning_rate": 4.3387743692931365e-06, + "loss": 0.8473, "step": 24731 }, { - "epoch": 0.7018161180476731, + "epoch": 0.7008416220351951, "grad_norm": 0.0, - "learning_rate": 4.312139953696797e-06, - "loss": 0.866, + "learning_rate": 4.3380178400991225e-06, + "loss": 0.8008, "step": 24732 }, { - "epoch": 0.7018444948921679, + "epoch": 0.7008699594774576, "grad_norm": 0.0, - "learning_rate": 4.311384049289495e-06, - "loss": 0.8041, + "learning_rate": 4.337261358598087e-06, + "loss": 0.911, "step": 24733 }, { - "epoch": 0.7018728717366629, + "epoch": 0.70089829691972, "grad_norm": 0.0, - "learning_rate": 4.3106281929341855e-06, - "loss": 0.7866, + "learning_rate": 4.336504924796402e-06, + "loss": 0.7617, "step": 24734 }, { - "epoch": 0.7019012485811578, + "epoch": 0.7009266343619824, "grad_norm": 0.0, - "learning_rate": 4.309872384637259e-06, - "loss": 0.905, + "learning_rate": 4.335748538700439e-06, + "loss": 0.8669, "step": 24735 }, { - "epoch": 0.7019296254256526, + "epoch": 0.7009549718042449, "grad_norm": 0.0, - "learning_rate": 4.309116624405093e-06, - "loss": 0.8184, + "learning_rate": 4.334992200316573e-06, + "loss": 0.7324, "step": 24736 }, { - "epoch": 0.7019580022701476, + "epoch": 0.7009833092465074, "grad_norm": 0.0, - "learning_rate": 4.308360912244074e-06, - "loss": 0.7883, + "learning_rate": 4.334235909651169e-06, + "loss": 0.8788, "step": 24737 }, { - "epoch": 0.7019863791146425, + "epoch": 0.7010116466887699, "grad_norm": 0.0, - "learning_rate": 4.307605248160591e-06, - "loss": 0.8121, + "learning_rate": 4.333479666710603e-06, + "loss": 0.8039, "step": 24738 }, { - "epoch": 0.7020147559591373, + "epoch": 0.7010399841310323, "grad_norm": 0.0, - "learning_rate": 4.306849632161015e-06, - "loss": 0.8872, + "learning_rate": 4.332723471501238e-06, + "loss": 0.9565, "step": 24739 }, { - "epoch": 0.7020431328036323, + "epoch": 0.7010683215732948, "grad_norm": 0.0, - "learning_rate": 4.306094064251742e-06, - "loss": 0.7977, + "learning_rate": 4.331967324029447e-06, + "loss": 0.8183, "step": 24740 }, { - "epoch": 0.7020715096481271, + "epoch": 0.7010966590155573, "grad_norm": 0.0, - "learning_rate": 4.305338544439146e-06, - "loss": 0.7802, + "learning_rate": 4.331211224301605e-06, + "loss": 0.7677, "step": 24741 }, { - "epoch": 0.702099886492622, + "epoch": 0.7011249964578197, "grad_norm": 0.0, - "learning_rate": 4.304583072729611e-06, - "loss": 0.8788, + "learning_rate": 4.3304551723240705e-06, + "loss": 0.7738, "step": 24742 }, { - "epoch": 0.7021282633371169, + "epoch": 0.7011533339000822, "grad_norm": 0.0, - "learning_rate": 4.303827649129522e-06, - "loss": 0.759, + "learning_rate": 4.329699168103218e-06, + "loss": 0.8636, "step": 24743 }, { - "epoch": 0.7021566401816118, + "epoch": 0.7011816713423447, "grad_norm": 0.0, - "learning_rate": 4.3030722736452545e-06, - "loss": 0.9512, + "learning_rate": 4.32894321164542e-06, + "loss": 0.7869, "step": 24744 }, { - "epoch": 0.7021850170261067, + "epoch": 0.7012100087846072, "grad_norm": 0.0, - "learning_rate": 4.302316946283192e-06, - "loss": 0.8904, + "learning_rate": 4.328187302957034e-06, + "loss": 0.82, "step": 24745 }, { - "epoch": 0.7022133938706016, + "epoch": 0.7012383462268695, "grad_norm": 0.0, - "learning_rate": 4.301561667049716e-06, - "loss": 0.8293, + "learning_rate": 4.327431442044434e-06, + "loss": 0.816, "step": 24746 }, { - "epoch": 0.7022417707150965, + "epoch": 0.701266683669132, "grad_norm": 0.0, - "learning_rate": 4.300806435951203e-06, - "loss": 0.9927, + "learning_rate": 4.326675628913985e-06, + "loss": 0.8785, "step": 24747 }, { - "epoch": 0.7022701475595914, + "epoch": 0.7012950211113945, "grad_norm": 0.0, - "learning_rate": 4.300051252994032e-06, - "loss": 0.933, + "learning_rate": 4.325919863572052e-06, + "loss": 0.8598, "step": 24748 }, { - "epoch": 0.7022985244040862, + "epoch": 0.7013233585536569, "grad_norm": 0.0, - "learning_rate": 4.299296118184589e-06, - "loss": 0.8892, + "learning_rate": 4.325164146025009e-06, + "loss": 0.8462, "step": 24749 }, { - "epoch": 0.7023269012485811, + "epoch": 0.7013516959959194, "grad_norm": 0.0, - "learning_rate": 4.2985410315292455e-06, - "loss": 0.9164, + "learning_rate": 4.324408476279211e-06, + "loss": 0.8169, "step": 24750 }, { - "epoch": 0.7023552780930761, + "epoch": 0.7013800334381819, "grad_norm": 0.0, - "learning_rate": 4.297785993034381e-06, - "loss": 0.9053, + "learning_rate": 4.323652854341032e-06, + "loss": 0.6307, "step": 24751 }, { - "epoch": 0.7023836549375709, + "epoch": 0.7014083708804443, "grad_norm": 0.0, - "learning_rate": 4.297031002706377e-06, - "loss": 0.7591, + "learning_rate": 4.322897280216829e-06, + "loss": 0.763, "step": 24752 }, { - "epoch": 0.7024120317820658, + "epoch": 0.7014367083227068, "grad_norm": 0.0, - "learning_rate": 4.296276060551607e-06, - "loss": 0.8099, + "learning_rate": 4.322141753912971e-06, + "loss": 0.8719, "step": 24753 }, { - "epoch": 0.7024404086265608, + "epoch": 0.7014650457649693, "grad_norm": 0.0, - "learning_rate": 4.29552116657645e-06, - "loss": 0.7871, + "learning_rate": 4.321386275435824e-06, + "loss": 0.8416, "step": 24754 }, { - "epoch": 0.7024687854710556, + "epoch": 0.7014933832072318, "grad_norm": 0.0, - "learning_rate": 4.2947663207872804e-06, - "loss": 0.7962, + "learning_rate": 4.320630844791746e-06, + "loss": 0.823, "step": 24755 }, { - "epoch": 0.7024971623155505, + "epoch": 0.7015217206494941, "grad_norm": 0.0, - "learning_rate": 4.294011523190477e-06, - "loss": 0.8552, + "learning_rate": 4.319875461987103e-06, + "loss": 0.8489, "step": 24756 }, { - "epoch": 0.7025255391600455, + "epoch": 0.7015500580917566, "grad_norm": 0.0, - "learning_rate": 4.293256773792418e-06, - "loss": 0.8033, + "learning_rate": 4.319120127028263e-06, + "loss": 0.8095, "step": 24757 }, { - "epoch": 0.7025539160045403, + "epoch": 0.7015783955340191, "grad_norm": 0.0, - "learning_rate": 4.292502072599471e-06, - "loss": 0.915, + "learning_rate": 4.318364839921579e-06, + "loss": 0.9977, "step": 24758 }, { - "epoch": 0.7025822928490352, + "epoch": 0.7016067329762815, "grad_norm": 0.0, - "learning_rate": 4.291747419618017e-06, - "loss": 0.8728, + "learning_rate": 4.317609600673418e-06, + "loss": 0.9116, "step": 24759 }, { - "epoch": 0.70261066969353, + "epoch": 0.701635070418544, "grad_norm": 0.0, - "learning_rate": 4.290992814854432e-06, - "loss": 0.8915, + "learning_rate": 4.316854409290141e-06, + "loss": 0.827, "step": 24760 }, { - "epoch": 0.702639046538025, + "epoch": 0.7016634078608065, "grad_norm": 0.0, - "learning_rate": 4.290238258315085e-06, - "loss": 0.8623, + "learning_rate": 4.316099265778111e-06, + "loss": 0.9016, "step": 24761 }, { - "epoch": 0.7026674233825199, + "epoch": 0.701691745303069, "grad_norm": 0.0, - "learning_rate": 4.28948375000635e-06, - "loss": 0.7996, + "learning_rate": 4.315344170143691e-06, + "loss": 0.7761, "step": 24762 }, { - "epoch": 0.7026958002270147, + "epoch": 0.7017200827453314, "grad_norm": 0.0, - "learning_rate": 4.288729289934608e-06, - "loss": 0.8275, + "learning_rate": 4.314589122393232e-06, + "loss": 0.8273, "step": 24763 }, { - "epoch": 0.7027241770715097, + "epoch": 0.7017484201875939, "grad_norm": 0.0, - "learning_rate": 4.287974878106222e-06, - "loss": 0.8493, + "learning_rate": 4.313834122533102e-06, + "loss": 0.8924, "step": 24764 }, { - "epoch": 0.7027525539160046, + "epoch": 0.7017767576298564, "grad_norm": 0.0, - "learning_rate": 4.287220514527569e-06, - "loss": 0.7458, + "learning_rate": 4.3130791705696626e-06, + "loss": 0.8748, "step": 24765 }, { - "epoch": 0.7027809307604994, + "epoch": 0.7018050950721187, "grad_norm": 0.0, - "learning_rate": 4.286466199205025e-06, - "loss": 0.7979, + "learning_rate": 4.312324266509265e-06, + "loss": 0.7044, "step": 24766 }, { - "epoch": 0.7028093076049943, + "epoch": 0.7018334325143812, "grad_norm": 0.0, - "learning_rate": 4.2857119321449536e-06, - "loss": 0.9006, + "learning_rate": 4.3115694103582764e-06, + "loss": 0.8157, "step": 24767 }, { - "epoch": 0.7028376844494892, + "epoch": 0.7018617699566437, "grad_norm": 0.0, - "learning_rate": 4.284957713353731e-06, - "loss": 0.8017, + "learning_rate": 4.3108146021230465e-06, + "loss": 0.8399, "step": 24768 }, { - "epoch": 0.7028660612939841, + "epoch": 0.7018901073989062, "grad_norm": 0.0, - "learning_rate": 4.284203542837732e-06, - "loss": 0.9023, + "learning_rate": 4.310059841809938e-06, + "loss": 0.9192, "step": 24769 }, { - "epoch": 0.702894438138479, + "epoch": 0.7019184448411686, "grad_norm": 0.0, - "learning_rate": 4.283449420603312e-06, - "loss": 0.8898, + "learning_rate": 4.309305129425312e-06, + "loss": 0.9233, "step": 24770 }, { - "epoch": 0.7029228149829739, + "epoch": 0.7019467822834311, "grad_norm": 0.0, - "learning_rate": 4.2826953466568626e-06, - "loss": 0.8231, + "learning_rate": 4.308550464975518e-06, + "loss": 0.8326, "step": 24771 }, { - "epoch": 0.7029511918274688, + "epoch": 0.7019751197256936, "grad_norm": 0.0, - "learning_rate": 4.281941321004738e-06, - "loss": 0.8453, + "learning_rate": 4.307795848466918e-06, + "loss": 0.866, "step": 24772 }, { - "epoch": 0.7029795686719637, + "epoch": 0.702003457167956, "grad_norm": 0.0, - "learning_rate": 4.2811873436533116e-06, - "loss": 0.7674, + "learning_rate": 4.307041279905867e-06, + "loss": 0.867, "step": 24773 }, { - "epoch": 0.7030079455164586, + "epoch": 0.7020317946102185, "grad_norm": 0.0, - "learning_rate": 4.2804334146089566e-06, - "loss": 0.7903, + "learning_rate": 4.306286759298721e-06, + "loss": 0.8818, "step": 24774 }, { - "epoch": 0.7030363223609535, + "epoch": 0.702060132052481, "grad_norm": 0.0, - "learning_rate": 4.2796795338780336e-06, - "loss": 0.8443, + "learning_rate": 4.30553228665184e-06, + "loss": 0.9192, "step": 24775 }, { - "epoch": 0.7030646992054483, + "epoch": 0.7020884694947434, "grad_norm": 0.0, - "learning_rate": 4.278925701466915e-06, - "loss": 0.7853, + "learning_rate": 4.30477786197157e-06, + "loss": 0.8319, "step": 24776 }, { - "epoch": 0.7030930760499432, + "epoch": 0.7021168069370058, "grad_norm": 0.0, - "learning_rate": 4.2781719173819725e-06, - "loss": 0.8109, + "learning_rate": 4.304023485264273e-06, + "loss": 0.8362, "step": 24777 }, { - "epoch": 0.7031214528944382, + "epoch": 0.7021451443792683, "grad_norm": 0.0, - "learning_rate": 4.2774181816295645e-06, - "loss": 0.7489, + "learning_rate": 4.3032691565363034e-06, + "loss": 0.7917, "step": 24778 }, { - "epoch": 0.703149829738933, + "epoch": 0.7021734818215308, "grad_norm": 0.0, - "learning_rate": 4.276664494216063e-06, - "loss": 0.8493, + "learning_rate": 4.30251487579401e-06, + "loss": 0.8275, "step": 24779 }, { - "epoch": 0.7031782065834279, + "epoch": 0.7022018192637932, "grad_norm": 0.0, - "learning_rate": 4.275910855147837e-06, - "loss": 0.8453, + "learning_rate": 4.301760643043754e-06, + "loss": 0.7965, "step": 24780 }, { - "epoch": 0.7032065834279229, + "epoch": 0.7022301567060557, "grad_norm": 0.0, - "learning_rate": 4.275157264431246e-06, - "loss": 0.8415, + "learning_rate": 4.301006458291879e-06, + "loss": 0.776, "step": 24781 }, { - "epoch": 0.7032349602724177, + "epoch": 0.7022584941483182, "grad_norm": 0.0, - "learning_rate": 4.274403722072658e-06, - "loss": 0.8193, + "learning_rate": 4.300252321544744e-06, + "loss": 0.7542, "step": 24782 }, { - "epoch": 0.7032633371169126, + "epoch": 0.7022868315905806, "grad_norm": 0.0, - "learning_rate": 4.273650228078444e-06, - "loss": 0.8791, + "learning_rate": 4.299498232808704e-06, + "loss": 0.8538, "step": 24783 }, { - "epoch": 0.7032917139614074, + "epoch": 0.7023151690328431, "grad_norm": 0.0, - "learning_rate": 4.27289678245496e-06, - "loss": 0.8417, + "learning_rate": 4.298744192090103e-06, + "loss": 0.9374, "step": 24784 }, { - "epoch": 0.7033200908059024, + "epoch": 0.7023435064751056, "grad_norm": 0.0, - "learning_rate": 4.272143385208574e-06, - "loss": 0.9188, + "learning_rate": 4.2979901993952975e-06, + "loss": 0.7764, "step": 24785 }, { - "epoch": 0.7033484676503973, + "epoch": 0.7023718439173681, "grad_norm": 0.0, - "learning_rate": 4.271390036345651e-06, - "loss": 0.8309, + "learning_rate": 4.297236254730637e-06, + "loss": 0.8104, "step": 24786 }, { - "epoch": 0.7033768444948921, + "epoch": 0.7024001813596304, "grad_norm": 0.0, - "learning_rate": 4.270636735872553e-06, - "loss": 0.8011, + "learning_rate": 4.296482358102474e-06, + "loss": 0.7859, "step": 24787 }, { - "epoch": 0.7034052213393871, + "epoch": 0.7024285188018929, "grad_norm": 0.0, - "learning_rate": 4.2698834837956484e-06, - "loss": 0.9146, + "learning_rate": 4.29572850951716e-06, + "loss": 0.8721, "step": 24788 }, { - "epoch": 0.703433598183882, + "epoch": 0.7024568562441554, "grad_norm": 0.0, - "learning_rate": 4.269130280121291e-06, - "loss": 0.8353, + "learning_rate": 4.294974708981041e-06, + "loss": 0.8511, "step": 24789 }, { - "epoch": 0.7034619750283768, + "epoch": 0.7024851936864178, "grad_norm": 0.0, - "learning_rate": 4.268377124855849e-06, - "loss": 0.9107, + "learning_rate": 4.294220956500469e-06, + "loss": 0.8437, "step": 24790 }, { - "epoch": 0.7034903518728718, + "epoch": 0.7025135311286803, "grad_norm": 0.0, - "learning_rate": 4.267624018005686e-06, - "loss": 0.7966, + "learning_rate": 4.2934672520817944e-06, + "loss": 0.7963, "step": 24791 }, { - "epoch": 0.7035187287173666, + "epoch": 0.7025418685709428, "grad_norm": 0.0, - "learning_rate": 4.266870959577157e-06, - "loss": 0.8712, + "learning_rate": 4.292713595731363e-06, + "loss": 0.816, "step": 24792 }, { - "epoch": 0.7035471055618615, + "epoch": 0.7025702060132053, "grad_norm": 0.0, - "learning_rate": 4.266117949576627e-06, - "loss": 0.8501, + "learning_rate": 4.291959987455522e-06, + "loss": 0.797, "step": 24793 }, { - "epoch": 0.7035754824063564, + "epoch": 0.7025985434554677, "grad_norm": 0.0, - "learning_rate": 4.2653649880104595e-06, - "loss": 0.8966, + "learning_rate": 4.2912064272606255e-06, + "loss": 0.7771, "step": 24794 }, { - "epoch": 0.7036038592508513, + "epoch": 0.7026268808977302, "grad_norm": 0.0, - "learning_rate": 4.264612074885008e-06, - "loss": 0.8168, + "learning_rate": 4.290452915153015e-06, + "loss": 0.7818, "step": 24795 }, { - "epoch": 0.7036322360953462, + "epoch": 0.7026552183399927, "grad_norm": 0.0, - "learning_rate": 4.263859210206637e-06, - "loss": 0.83, + "learning_rate": 4.289699451139043e-06, + "loss": 0.8118, "step": 24796 }, { - "epoch": 0.7036606129398411, + "epoch": 0.702683555782255, "grad_norm": 0.0, - "learning_rate": 4.263106393981708e-06, - "loss": 0.8477, + "learning_rate": 4.288946035225049e-06, + "loss": 0.8533, "step": 24797 }, { - "epoch": 0.703688989784336, + "epoch": 0.7027118932245175, "grad_norm": 0.0, - "learning_rate": 4.262353626216575e-06, - "loss": 0.8787, + "learning_rate": 4.288192667417384e-06, + "loss": 0.9007, "step": 24798 }, { - "epoch": 0.7037173666288309, + "epoch": 0.70274023066678, "grad_norm": 0.0, - "learning_rate": 4.261600906917596e-06, - "loss": 0.8133, + "learning_rate": 4.2874393477223915e-06, + "loss": 0.8212, "step": 24799 }, { - "epoch": 0.7037457434733257, + "epoch": 0.7027685681090424, "grad_norm": 0.0, - "learning_rate": 4.260848236091135e-06, - "loss": 0.8557, + "learning_rate": 4.2866860761464205e-06, + "loss": 0.803, "step": 24800 }, { - "epoch": 0.7037741203178206, + "epoch": 0.7027969055513049, "grad_norm": 0.0, - "learning_rate": 4.260095613743546e-06, - "loss": 0.8428, + "learning_rate": 4.2859328526958165e-06, + "loss": 0.7882, "step": 24801 }, { - "epoch": 0.7038024971623156, + "epoch": 0.7028252429935674, "grad_norm": 0.0, - "learning_rate": 4.25934303988119e-06, - "loss": 0.8896, + "learning_rate": 4.285179677376919e-06, + "loss": 0.8109, "step": 24802 }, { - "epoch": 0.7038308740068104, + "epoch": 0.7028535804358299, "grad_norm": 0.0, - "learning_rate": 4.258590514510419e-06, - "loss": 0.8691, + "learning_rate": 4.2844265501960745e-06, + "loss": 0.79, "step": 24803 }, { - "epoch": 0.7038592508513053, + "epoch": 0.7028819178780923, "grad_norm": 0.0, - "learning_rate": 4.257838037637591e-06, - "loss": 0.8141, + "learning_rate": 4.283673471159632e-06, + "loss": 0.7631, "step": 24804 }, { - "epoch": 0.7038876276958003, + "epoch": 0.7029102553203548, "grad_norm": 0.0, - "learning_rate": 4.2570856092690686e-06, - "loss": 0.7505, + "learning_rate": 4.282920440273927e-06, + "loss": 0.8741, "step": 24805 }, { - "epoch": 0.7039160045402951, + "epoch": 0.7029385927626173, "grad_norm": 0.0, - "learning_rate": 4.256333229411197e-06, - "loss": 0.7638, + "learning_rate": 4.282167457545306e-06, + "loss": 0.8008, "step": 24806 }, { - "epoch": 0.70394438138479, + "epoch": 0.7029669302048797, "grad_norm": 0.0, - "learning_rate": 4.255580898070337e-06, - "loss": 0.7949, + "learning_rate": 4.2814145229801155e-06, + "loss": 0.7778, "step": 24807 }, { - "epoch": 0.703972758229285, + "epoch": 0.7029952676471422, "grad_norm": 0.0, - "learning_rate": 4.2548286152528474e-06, - "loss": 0.716, + "learning_rate": 4.28066163658469e-06, + "loss": 0.8914, "step": 24808 }, { - "epoch": 0.7040011350737798, + "epoch": 0.7030236050894046, "grad_norm": 0.0, - "learning_rate": 4.2540763809650745e-06, - "loss": 0.7796, + "learning_rate": 4.279908798365379e-06, + "loss": 0.7858, "step": 24809 }, { - "epoch": 0.7040295119182747, + "epoch": 0.7030519425316671, "grad_norm": 0.0, - "learning_rate": 4.253324195213377e-06, - "loss": 0.7687, + "learning_rate": 4.279156008328517e-06, + "loss": 0.7952, "step": 24810 }, { - "epoch": 0.7040578887627695, + "epoch": 0.7030802799739295, "grad_norm": 0.0, - "learning_rate": 4.252572058004112e-06, - "loss": 0.8872, + "learning_rate": 4.2784032664804474e-06, + "loss": 0.8152, "step": 24811 }, { - "epoch": 0.7040862656072645, + "epoch": 0.703108617416192, "grad_norm": 0.0, - "learning_rate": 4.251819969343626e-06, - "loss": 0.9587, + "learning_rate": 4.277650572827513e-06, + "loss": 0.832, "step": 24812 }, { - "epoch": 0.7041146424517594, + "epoch": 0.7031369548584545, "grad_norm": 0.0, - "learning_rate": 4.251067929238275e-06, - "loss": 0.8067, + "learning_rate": 4.2768979273760524e-06, + "loss": 0.7571, "step": 24813 }, { - "epoch": 0.7041430192962542, + "epoch": 0.7031652923007169, "grad_norm": 0.0, - "learning_rate": 4.250315937694415e-06, - "loss": 0.8564, + "learning_rate": 4.276145330132405e-06, + "loss": 0.8126, "step": 24814 }, { - "epoch": 0.7041713961407492, + "epoch": 0.7031936297429794, "grad_norm": 0.0, - "learning_rate": 4.249563994718391e-06, - "loss": 0.9404, + "learning_rate": 4.275392781102916e-06, + "loss": 0.8651, "step": 24815 }, { - "epoch": 0.704199772985244, + "epoch": 0.7032219671852419, "grad_norm": 0.0, - "learning_rate": 4.248812100316555e-06, - "loss": 0.8499, + "learning_rate": 4.274640280293915e-06, + "loss": 0.8272, "step": 24816 }, { - "epoch": 0.7042281498297389, + "epoch": 0.7032503046275044, "grad_norm": 0.0, - "learning_rate": 4.248060254495269e-06, - "loss": 0.8423, + "learning_rate": 4.273887827711749e-06, + "loss": 0.7985, "step": 24817 }, { - "epoch": 0.7042565266742338, + "epoch": 0.7032786420697668, "grad_norm": 0.0, - "learning_rate": 4.247308457260874e-06, - "loss": 0.8063, + "learning_rate": 4.273135423362748e-06, + "loss": 0.7643, "step": 24818 }, { - "epoch": 0.7042849035187287, + "epoch": 0.7033069795120293, "grad_norm": 0.0, - "learning_rate": 4.246556708619721e-06, - "loss": 0.7664, + "learning_rate": 4.272383067253254e-06, + "loss": 0.8918, "step": 24819 }, { - "epoch": 0.7043132803632236, + "epoch": 0.7033353169542917, "grad_norm": 0.0, - "learning_rate": 4.2458050085781665e-06, - "loss": 0.8201, + "learning_rate": 4.271630759389607e-06, + "loss": 0.9901, "step": 24820 }, { - "epoch": 0.7043416572077185, + "epoch": 0.7033636543965541, "grad_norm": 0.0, - "learning_rate": 4.2450533571425534e-06, - "loss": 0.8587, + "learning_rate": 4.27087849977814e-06, + "loss": 0.8229, "step": 24821 }, { - "epoch": 0.7043700340522134, + "epoch": 0.7033919918388166, "grad_norm": 0.0, - "learning_rate": 4.244301754319235e-06, - "loss": 0.876, + "learning_rate": 4.270126288425189e-06, + "loss": 0.8522, "step": 24822 }, { - "epoch": 0.7043984108967083, + "epoch": 0.7034203292810791, "grad_norm": 0.0, - "learning_rate": 4.243550200114555e-06, - "loss": 0.7728, + "learning_rate": 4.269374125337092e-06, + "loss": 0.8003, "step": 24823 }, { - "epoch": 0.7044267877412032, + "epoch": 0.7034486667233415, "grad_norm": 0.0, - "learning_rate": 4.2427986945348666e-06, - "loss": 0.8392, + "learning_rate": 4.268622010520186e-06, + "loss": 0.8548, "step": 24824 }, { - "epoch": 0.7044551645856981, + "epoch": 0.703477004165604, "grad_norm": 0.0, - "learning_rate": 4.24204723758652e-06, - "loss": 0.8182, + "learning_rate": 4.267869943980808e-06, + "loss": 0.7651, "step": 24825 }, { - "epoch": 0.704483541430193, + "epoch": 0.7035053416078665, "grad_norm": 0.0, - "learning_rate": 4.2412958292758544e-06, - "loss": 0.9066, + "learning_rate": 4.267117925725287e-06, + "loss": 0.8203, "step": 24826 }, { - "epoch": 0.7045119182746878, + "epoch": 0.703533679050129, "grad_norm": 0.0, - "learning_rate": 4.240544469609222e-06, - "loss": 0.8713, + "learning_rate": 4.26636595575996e-06, + "loss": 0.8772, "step": 24827 }, { - "epoch": 0.7045402951191827, + "epoch": 0.7035620164923914, "grad_norm": 0.0, - "learning_rate": 4.239793158592974e-06, - "loss": 0.7767, + "learning_rate": 4.2656140340911655e-06, + "loss": 0.7842, "step": 24828 }, { - "epoch": 0.7045686719636777, + "epoch": 0.7035903539346539, "grad_norm": 0.0, - "learning_rate": 4.239041896233448e-06, - "loss": 0.8398, + "learning_rate": 4.264862160725229e-06, + "loss": 0.7758, "step": 24829 }, { - "epoch": 0.7045970488081725, + "epoch": 0.7036186913769163, "grad_norm": 0.0, - "learning_rate": 4.238290682536994e-06, - "loss": 0.8007, + "learning_rate": 4.264110335668493e-06, + "loss": 0.8822, "step": 24830 }, { - "epoch": 0.7046254256526674, + "epoch": 0.7036470288191787, "grad_norm": 0.0, - "learning_rate": 4.237539517509958e-06, - "loss": 0.8651, + "learning_rate": 4.263358558927281e-06, + "loss": 0.8189, "step": 24831 }, { - "epoch": 0.7046538024971624, + "epoch": 0.7036753662614412, "grad_norm": 0.0, - "learning_rate": 4.2367884011586836e-06, - "loss": 0.86, + "learning_rate": 4.26260683050793e-06, + "loss": 0.7778, "step": 24832 }, { - "epoch": 0.7046821793416572, + "epoch": 0.7037037037037037, "grad_norm": 0.0, - "learning_rate": 4.236037333489518e-06, - "loss": 0.9024, + "learning_rate": 4.2618551504167774e-06, + "loss": 0.836, "step": 24833 }, { - "epoch": 0.7047105561861521, + "epoch": 0.7037320411459662, "grad_norm": 0.0, - "learning_rate": 4.235286314508808e-06, - "loss": 0.8617, + "learning_rate": 4.2611035186601445e-06, + "loss": 0.7625, "step": 24834 }, { - "epoch": 0.7047389330306469, + "epoch": 0.7037603785882286, "grad_norm": 0.0, - "learning_rate": 4.2345353442228876e-06, - "loss": 0.7605, + "learning_rate": 4.260351935244369e-06, + "loss": 0.8168, "step": 24835 }, { - "epoch": 0.7047673098751419, + "epoch": 0.7037887160304911, "grad_norm": 0.0, - "learning_rate": 4.2337844226381085e-06, - "loss": 0.8418, + "learning_rate": 4.259600400175779e-06, + "loss": 0.794, "step": 24836 }, { - "epoch": 0.7047956867196368, + "epoch": 0.7038170534727536, "grad_norm": 0.0, - "learning_rate": 4.233033549760815e-06, - "loss": 0.8129, + "learning_rate": 4.258848913460708e-06, + "loss": 0.7599, "step": 24837 }, { - "epoch": 0.7048240635641316, + "epoch": 0.703845390915016, "grad_norm": 0.0, - "learning_rate": 4.232282725597342e-06, - "loss": 0.8273, + "learning_rate": 4.258097475105487e-06, + "loss": 0.8143, "step": 24838 }, { - "epoch": 0.7048524404086266, + "epoch": 0.7038737283572785, "grad_norm": 0.0, - "learning_rate": 4.2315319501540365e-06, - "loss": 0.9304, + "learning_rate": 4.257346085116441e-06, + "loss": 0.9501, "step": 24839 }, { - "epoch": 0.7048808172531215, + "epoch": 0.703902065799541, "grad_norm": 0.0, - "learning_rate": 4.230781223437244e-06, - "loss": 0.9097, + "learning_rate": 4.2565947434999e-06, + "loss": 0.8868, "step": 24840 }, { - "epoch": 0.7049091940976163, + "epoch": 0.7039304032418034, "grad_norm": 0.0, - "learning_rate": 4.230030545453298e-06, - "loss": 0.8294, + "learning_rate": 4.255843450262198e-06, + "loss": 0.8165, "step": 24841 }, { - "epoch": 0.7049375709421113, + "epoch": 0.7039587406840658, "grad_norm": 0.0, - "learning_rate": 4.229279916208542e-06, - "loss": 0.7429, + "learning_rate": 4.255092205409657e-06, + "loss": 0.7723, "step": 24842 }, { - "epoch": 0.7049659477866062, + "epoch": 0.7039870781263283, "grad_norm": 0.0, - "learning_rate": 4.228529335709323e-06, - "loss": 0.8531, + "learning_rate": 4.2543410089486055e-06, + "loss": 0.7618, "step": 24843 }, { - "epoch": 0.704994324631101, + "epoch": 0.7040154155685908, "grad_norm": 0.0, - "learning_rate": 4.227778803961972e-06, - "loss": 0.898, + "learning_rate": 4.2535898608853784e-06, + "loss": 0.7982, "step": 24844 }, { - "epoch": 0.7050227014755959, + "epoch": 0.7040437530108532, "grad_norm": 0.0, - "learning_rate": 4.227028320972832e-06, - "loss": 0.7675, + "learning_rate": 4.252838761226295e-06, + "loss": 0.7669, "step": 24845 }, { - "epoch": 0.7050510783200908, + "epoch": 0.7040720904531157, "grad_norm": 0.0, - "learning_rate": 4.226277886748245e-06, - "loss": 0.8352, + "learning_rate": 4.252087709977687e-06, + "loss": 0.8829, "step": 24846 }, { - "epoch": 0.7050794551645857, + "epoch": 0.7041004278953782, "grad_norm": 0.0, - "learning_rate": 4.225527501294548e-06, - "loss": 0.7336, + "learning_rate": 4.251336707145876e-06, + "loss": 0.7014, "step": 24847 }, { - "epoch": 0.7051078320090806, + "epoch": 0.7041287653376406, "grad_norm": 0.0, - "learning_rate": 4.224777164618084e-06, - "loss": 0.723, + "learning_rate": 4.250585752737189e-06, + "loss": 0.7773, "step": 24848 }, { - "epoch": 0.7051362088535755, + "epoch": 0.7041571027799031, "grad_norm": 0.0, - "learning_rate": 4.2240268767251815e-06, - "loss": 0.824, + "learning_rate": 4.2498348467579555e-06, + "loss": 0.8284, "step": 24849 }, { - "epoch": 0.7051645856980704, + "epoch": 0.7041854402221656, "grad_norm": 0.0, - "learning_rate": 4.223276637622184e-06, - "loss": 0.8238, + "learning_rate": 4.2490839892144975e-06, + "loss": 0.866, "step": 24850 }, { - "epoch": 0.7051929625425652, + "epoch": 0.704213777664428, "grad_norm": 0.0, - "learning_rate": 4.222526447315432e-06, - "loss": 0.8593, + "learning_rate": 4.24833318011314e-06, + "loss": 0.9954, "step": 24851 }, { - "epoch": 0.7052213393870601, + "epoch": 0.7042421151066904, "grad_norm": 0.0, - "learning_rate": 4.221776305811256e-06, - "loss": 0.8347, + "learning_rate": 4.247582419460212e-06, + "loss": 0.9324, "step": 24852 }, { - "epoch": 0.7052497162315551, + "epoch": 0.7042704525489529, "grad_norm": 0.0, - "learning_rate": 4.221026213115995e-06, - "loss": 0.8464, + "learning_rate": 4.24683170726203e-06, + "loss": 0.8566, "step": 24853 }, { - "epoch": 0.7052780930760499, + "epoch": 0.7042987899912154, "grad_norm": 0.0, - "learning_rate": 4.22027616923599e-06, - "loss": 0.7955, + "learning_rate": 4.246081043524925e-06, + "loss": 0.8768, "step": 24854 }, { - "epoch": 0.7053064699205448, + "epoch": 0.7043271274334778, "grad_norm": 0.0, - "learning_rate": 4.219526174177566e-06, - "loss": 0.8325, + "learning_rate": 4.245330428255211e-06, + "loss": 0.7853, "step": 24855 }, { - "epoch": 0.7053348467650398, + "epoch": 0.7043554648757403, "grad_norm": 0.0, - "learning_rate": 4.2187762279470654e-06, - "loss": 0.8123, + "learning_rate": 4.244579861459217e-06, + "loss": 0.7987, "step": 24856 }, { - "epoch": 0.7053632236095346, + "epoch": 0.7043838023180028, "grad_norm": 0.0, - "learning_rate": 4.218026330550826e-06, - "loss": 0.7885, + "learning_rate": 4.2438293431432665e-06, + "loss": 0.9213, "step": 24857 }, { - "epoch": 0.7053916004540295, + "epoch": 0.7044121397602653, "grad_norm": 0.0, - "learning_rate": 4.217276481995175e-06, - "loss": 0.7289, + "learning_rate": 4.243078873313677e-06, + "loss": 0.8595, "step": 24858 }, { - "epoch": 0.7054199772985245, + "epoch": 0.7044404772025277, "grad_norm": 0.0, - "learning_rate": 4.216526682286448e-06, - "loss": 0.749, + "learning_rate": 4.242328451976774e-06, + "loss": 0.8976, "step": 24859 }, { - "epoch": 0.7054483541430193, + "epoch": 0.7044688146447902, "grad_norm": 0.0, - "learning_rate": 4.2157769314309846e-06, - "loss": 0.7544, + "learning_rate": 4.241578079138873e-06, + "loss": 0.8337, "step": 24860 }, { - "epoch": 0.7054767309875142, + "epoch": 0.7044971520870527, "grad_norm": 0.0, - "learning_rate": 4.2150272294351105e-06, - "loss": 0.8022, + "learning_rate": 4.240827754806299e-06, + "loss": 0.8382, "step": 24861 }, { - "epoch": 0.705505107832009, + "epoch": 0.704525489529315, "grad_norm": 0.0, - "learning_rate": 4.21427757630516e-06, - "loss": 0.8228, + "learning_rate": 4.2400774789853705e-06, + "loss": 0.8465, "step": 24862 }, { - "epoch": 0.705533484676504, + "epoch": 0.7045538269715775, "grad_norm": 0.0, - "learning_rate": 4.2135279720474675e-06, - "loss": 0.7549, + "learning_rate": 4.239327251682409e-06, + "loss": 0.8416, "step": 24863 }, { - "epoch": 0.7055618615209989, + "epoch": 0.70458216441384, "grad_norm": 0.0, - "learning_rate": 4.212778416668364e-06, - "loss": 0.8918, + "learning_rate": 4.2385770729037336e-06, + "loss": 0.8178, "step": 24864 }, { - "epoch": 0.7055902383654937, + "epoch": 0.7046105018561025, "grad_norm": 0.0, - "learning_rate": 4.212028910174186e-06, - "loss": 0.8555, + "learning_rate": 4.237826942655666e-06, + "loss": 0.9171, "step": 24865 }, { - "epoch": 0.7056186152099887, + "epoch": 0.7046388392983649, "grad_norm": 0.0, - "learning_rate": 4.211279452571255e-06, - "loss": 0.7519, + "learning_rate": 4.237076860944518e-06, + "loss": 0.8958, "step": 24866 }, { - "epoch": 0.7056469920544836, + "epoch": 0.7046671767406274, "grad_norm": 0.0, - "learning_rate": 4.210530043865908e-06, - "loss": 0.7874, + "learning_rate": 4.236326827776615e-06, + "loss": 1.0288, "step": 24867 }, { - "epoch": 0.7056753688989784, + "epoch": 0.7046955141828899, "grad_norm": 0.0, - "learning_rate": 4.2097806840644776e-06, - "loss": 0.8879, + "learning_rate": 4.235576843158269e-06, + "loss": 0.7317, "step": 24868 }, { - "epoch": 0.7057037457434733, + "epoch": 0.7047238516251523, "grad_norm": 0.0, - "learning_rate": 4.209031373173284e-06, - "loss": 0.8129, + "learning_rate": 4.2348269070957986e-06, + "loss": 0.738, "step": 24869 }, { - "epoch": 0.7057321225879682, + "epoch": 0.7047521890674148, "grad_norm": 0.0, - "learning_rate": 4.208282111198666e-06, - "loss": 0.8369, + "learning_rate": 4.2340770195955264e-06, + "loss": 0.7833, "step": 24870 }, { - "epoch": 0.7057604994324631, + "epoch": 0.7047805265096773, "grad_norm": 0.0, - "learning_rate": 4.207532898146951e-06, - "loss": 0.7519, + "learning_rate": 4.23332718066376e-06, + "loss": 0.8192, "step": 24871 }, { - "epoch": 0.705788876276958, + "epoch": 0.7048088639519396, "grad_norm": 0.0, - "learning_rate": 4.206783734024463e-06, - "loss": 0.7334, + "learning_rate": 4.232577390306821e-06, + "loss": 0.8474, "step": 24872 }, { - "epoch": 0.7058172531214529, + "epoch": 0.7048372013942021, "grad_norm": 0.0, - "learning_rate": 4.2060346188375335e-06, - "loss": 0.8439, + "learning_rate": 4.231827648531028e-06, + "loss": 0.784, "step": 24873 }, { - "epoch": 0.7058456299659478, + "epoch": 0.7048655388364646, "grad_norm": 0.0, - "learning_rate": 4.205285552592493e-06, - "loss": 0.7225, + "learning_rate": 4.231077955342688e-06, + "loss": 0.8419, "step": 24874 }, { - "epoch": 0.7058740068104427, + "epoch": 0.7048938762787271, "grad_norm": 0.0, - "learning_rate": 4.204536535295662e-06, - "loss": 0.8448, + "learning_rate": 4.230328310748122e-06, + "loss": 0.8747, "step": 24875 }, { - "epoch": 0.7059023836549376, + "epoch": 0.7049222137209895, "grad_norm": 0.0, - "learning_rate": 4.203787566953372e-06, - "loss": 0.8283, + "learning_rate": 4.229578714753642e-06, + "loss": 0.8645, "step": 24876 }, { - "epoch": 0.7059307604994325, + "epoch": 0.704950551163252, "grad_norm": 0.0, - "learning_rate": 4.20303864757195e-06, - "loss": 0.901, + "learning_rate": 4.228829167365565e-06, + "loss": 0.8596, "step": 24877 }, { - "epoch": 0.7059591373439273, + "epoch": 0.7049788886055145, "grad_norm": 0.0, - "learning_rate": 4.20228977715772e-06, - "loss": 0.861, + "learning_rate": 4.228079668590205e-06, + "loss": 0.8337, "step": 24878 }, { - "epoch": 0.7059875141884222, + "epoch": 0.7050072260477769, "grad_norm": 0.0, - "learning_rate": 4.201540955717012e-06, - "loss": 0.8726, + "learning_rate": 4.22733021843387e-06, + "loss": 0.8579, "step": 24879 }, { - "epoch": 0.7060158910329172, + "epoch": 0.7050355634900394, "grad_norm": 0.0, - "learning_rate": 4.200792183256145e-06, - "loss": 0.8374, + "learning_rate": 4.226580816902876e-06, + "loss": 0.7639, "step": 24880 }, { - "epoch": 0.706044267877412, + "epoch": 0.7050639009323019, "grad_norm": 0.0, - "learning_rate": 4.200043459781448e-06, - "loss": 0.8159, + "learning_rate": 4.225831464003541e-06, + "loss": 0.7856, "step": 24881 }, { - "epoch": 0.7060726447219069, + "epoch": 0.7050922383745644, "grad_norm": 0.0, - "learning_rate": 4.199294785299247e-06, - "loss": 0.8303, + "learning_rate": 4.225082159742166e-06, + "loss": 0.8726, "step": 24882 }, { - "epoch": 0.7061010215664019, + "epoch": 0.7051205758168267, "grad_norm": 0.0, - "learning_rate": 4.19854615981586e-06, - "loss": 0.8046, + "learning_rate": 4.224332904125072e-06, + "loss": 0.7828, "step": 24883 }, { - "epoch": 0.7061293984108967, + "epoch": 0.7051489132590892, "grad_norm": 0.0, - "learning_rate": 4.197797583337616e-06, - "loss": 0.811, + "learning_rate": 4.223583697158564e-06, + "loss": 0.761, "step": 24884 }, { - "epoch": 0.7061577752553916, + "epoch": 0.7051772507013517, "grad_norm": 0.0, - "learning_rate": 4.197049055870838e-06, - "loss": 0.8981, + "learning_rate": 4.222834538848956e-06, + "loss": 0.7578, "step": 24885 }, { - "epoch": 0.7061861520998864, + "epoch": 0.7052055881436141, "grad_norm": 0.0, - "learning_rate": 4.196300577421847e-06, - "loss": 0.6969, + "learning_rate": 4.222085429202561e-06, + "loss": 0.7129, "step": 24886 }, { - "epoch": 0.7062145289443814, + "epoch": 0.7052339255858766, "grad_norm": 0.0, - "learning_rate": 4.195552147996963e-06, - "loss": 0.7833, + "learning_rate": 4.221336368225682e-06, + "loss": 0.7348, "step": 24887 }, { - "epoch": 0.7062429057888763, + "epoch": 0.7052622630281391, "grad_norm": 0.0, - "learning_rate": 4.1948037676025156e-06, - "loss": 0.7277, + "learning_rate": 4.220587355924634e-06, + "loss": 0.8367, "step": 24888 }, { - "epoch": 0.7062712826333711, + "epoch": 0.7052906004704016, "grad_norm": 0.0, - "learning_rate": 4.194055436244818e-06, - "loss": 0.8657, + "learning_rate": 4.219838392305723e-06, + "loss": 0.8071, "step": 24889 }, { - "epoch": 0.7062996594778661, + "epoch": 0.705318937912664, "grad_norm": 0.0, - "learning_rate": 4.1933071539301965e-06, - "loss": 0.7457, + "learning_rate": 4.219089477375261e-06, + "loss": 0.8255, "step": 24890 }, { - "epoch": 0.706328036322361, + "epoch": 0.7053472753549265, "grad_norm": 0.0, - "learning_rate": 4.192558920664972e-06, - "loss": 0.7506, + "learning_rate": 4.218340611139559e-06, + "loss": 0.9721, "step": 24891 }, { - "epoch": 0.7063564131668558, + "epoch": 0.705375612797189, "grad_norm": 0.0, - "learning_rate": 4.1918107364554575e-06, - "loss": 0.7736, + "learning_rate": 4.217591793604916e-06, + "loss": 0.962, "step": 24892 }, { - "epoch": 0.7063847900113507, + "epoch": 0.7054039502394513, "grad_norm": 0.0, - "learning_rate": 4.191062601307984e-06, - "loss": 0.8056, + "learning_rate": 4.216843024777645e-06, + "loss": 0.8925, "step": 24893 }, { - "epoch": 0.7064131668558457, + "epoch": 0.7054322876817138, "grad_norm": 0.0, - "learning_rate": 4.190314515228865e-06, - "loss": 0.9357, + "learning_rate": 4.216094304664056e-06, + "loss": 0.8984, "step": 24894 }, { - "epoch": 0.7064415437003405, + "epoch": 0.7054606251239763, "grad_norm": 0.0, - "learning_rate": 4.189566478224419e-06, - "loss": 0.83, + "learning_rate": 4.215345633270449e-06, + "loss": 0.7894, "step": 24895 }, { - "epoch": 0.7064699205448354, + "epoch": 0.7054889625662387, "grad_norm": 0.0, - "learning_rate": 4.18881849030097e-06, - "loss": 0.7952, + "learning_rate": 4.2145970106031385e-06, + "loss": 0.9135, "step": 24896 }, { - "epoch": 0.7064982973893303, + "epoch": 0.7055173000085012, "grad_norm": 0.0, - "learning_rate": 4.188070551464829e-06, - "loss": 0.8801, + "learning_rate": 4.213848436668421e-06, + "loss": 0.7743, "step": 24897 }, { - "epoch": 0.7065266742338252, + "epoch": 0.7055456374507637, "grad_norm": 0.0, - "learning_rate": 4.187322661722317e-06, - "loss": 0.938, + "learning_rate": 4.213099911472607e-06, + "loss": 0.8415, "step": 24898 }, { - "epoch": 0.7065550510783201, + "epoch": 0.7055739748930262, "grad_norm": 0.0, - "learning_rate": 4.186574821079755e-06, - "loss": 0.7955, + "learning_rate": 4.212351435022005e-06, + "loss": 0.8461, "step": 24899 }, { - "epoch": 0.706583427922815, + "epoch": 0.7056023123352886, "grad_norm": 0.0, - "learning_rate": 4.185827029543454e-06, - "loss": 0.8246, + "learning_rate": 4.211603007322913e-06, + "loss": 0.711, "step": 24900 }, { - "epoch": 0.7066118047673099, + "epoch": 0.7056306497775511, "grad_norm": 0.0, - "learning_rate": 4.185079287119733e-06, - "loss": 0.9884, + "learning_rate": 4.210854628381637e-06, + "loss": 0.6815, "step": 24901 }, { - "epoch": 0.7066401816118048, + "epoch": 0.7056589872198136, "grad_norm": 0.0, - "learning_rate": 4.184331593814913e-06, - "loss": 0.8181, + "learning_rate": 4.210106298204483e-06, + "loss": 0.9104, "step": 24902 }, { - "epoch": 0.7066685584562996, + "epoch": 0.705687324662076, "grad_norm": 0.0, - "learning_rate": 4.183583949635301e-06, - "loss": 0.8907, + "learning_rate": 4.209358016797754e-06, + "loss": 0.8468, "step": 24903 }, { - "epoch": 0.7066969353007946, + "epoch": 0.7057156621043384, "grad_norm": 0.0, - "learning_rate": 4.182836354587218e-06, - "loss": 0.9069, + "learning_rate": 4.208609784167756e-06, + "loss": 0.764, "step": 24904 }, { - "epoch": 0.7067253121452894, + "epoch": 0.7057439995466009, "grad_norm": 0.0, - "learning_rate": 4.18208880867698e-06, - "loss": 0.8701, + "learning_rate": 4.207861600320785e-06, + "loss": 0.8036, "step": 24905 }, { - "epoch": 0.7067536889897843, + "epoch": 0.7057723369888634, "grad_norm": 0.0, - "learning_rate": 4.181341311910897e-06, - "loss": 0.7074, + "learning_rate": 4.207113465263146e-06, + "loss": 0.7853, "step": 24906 }, { - "epoch": 0.7067820658342793, + "epoch": 0.7058006744311258, "grad_norm": 0.0, - "learning_rate": 4.180593864295285e-06, - "loss": 0.8113, + "learning_rate": 4.206365379001146e-06, + "loss": 0.8973, "step": 24907 }, { - "epoch": 0.7068104426787741, + "epoch": 0.7058290118733883, "grad_norm": 0.0, - "learning_rate": 4.1798464658364566e-06, - "loss": 0.771, + "learning_rate": 4.205617341541078e-06, + "loss": 0.896, "step": 24908 }, { - "epoch": 0.706838819523269, + "epoch": 0.7058573493156508, "grad_norm": 0.0, - "learning_rate": 4.179099116540729e-06, - "loss": 0.8236, + "learning_rate": 4.204869352889246e-06, + "loss": 0.8276, "step": 24909 }, { - "epoch": 0.7068671963677639, + "epoch": 0.7058856867579132, "grad_norm": 0.0, - "learning_rate": 4.178351816414415e-06, - "loss": 0.7687, + "learning_rate": 4.204121413051956e-06, + "loss": 0.8494, "step": 24910 }, { - "epoch": 0.7068955732122588, + "epoch": 0.7059140242001757, "grad_norm": 0.0, - "learning_rate": 4.177604565463822e-06, - "loss": 0.7803, + "learning_rate": 4.2033735220355e-06, + "loss": 0.7709, "step": 24911 }, { - "epoch": 0.7069239500567537, + "epoch": 0.7059423616424382, "grad_norm": 0.0, - "learning_rate": 4.176857363695266e-06, - "loss": 0.7634, + "learning_rate": 4.202625679846184e-06, + "loss": 0.7474, "step": 24912 }, { - "epoch": 0.7069523269012485, + "epoch": 0.7059706990847007, "grad_norm": 0.0, - "learning_rate": 4.17611021111506e-06, - "loss": 0.7208, + "learning_rate": 4.201877886490301e-06, + "loss": 0.788, "step": 24913 }, { - "epoch": 0.7069807037457435, + "epoch": 0.705999036526963, "grad_norm": 0.0, - "learning_rate": 4.175363107729509e-06, - "loss": 0.8798, + "learning_rate": 4.201130141974154e-06, + "loss": 0.9567, "step": 24914 }, { - "epoch": 0.7070090805902384, + "epoch": 0.7060273739692255, "grad_norm": 0.0, - "learning_rate": 4.174616053544928e-06, - "loss": 0.7421, + "learning_rate": 4.200382446304042e-06, + "loss": 0.7646, "step": 24915 }, { - "epoch": 0.7070374574347332, + "epoch": 0.706055711411488, "grad_norm": 0.0, - "learning_rate": 4.17386904856763e-06, - "loss": 0.863, + "learning_rate": 4.199634799486262e-06, + "loss": 0.8382, "step": 24916 }, { - "epoch": 0.7070658342792282, + "epoch": 0.7060840488537504, "grad_norm": 0.0, - "learning_rate": 4.17312209280392e-06, - "loss": 0.742, + "learning_rate": 4.198887201527114e-06, + "loss": 0.7964, "step": 24917 }, { - "epoch": 0.7070942111237231, + "epoch": 0.7061123862960129, "grad_norm": 0.0, - "learning_rate": 4.172375186260108e-06, - "loss": 0.9367, + "learning_rate": 4.198139652432892e-06, + "loss": 0.876, "step": 24918 }, { - "epoch": 0.7071225879682179, + "epoch": 0.7061407237382754, "grad_norm": 0.0, - "learning_rate": 4.17162832894251e-06, - "loss": 0.8701, + "learning_rate": 4.197392152209892e-06, + "loss": 0.7601, "step": 24919 }, { - "epoch": 0.7071509648127128, + "epoch": 0.7061690611805378, "grad_norm": 0.0, - "learning_rate": 4.170881520857425e-06, - "loss": 0.8361, + "learning_rate": 4.196644700864419e-06, + "loss": 0.82, "step": 24920 }, { - "epoch": 0.7071793416572077, + "epoch": 0.7061973986228003, "grad_norm": 0.0, - "learning_rate": 4.170134762011165e-06, - "loss": 0.8498, + "learning_rate": 4.195897298402757e-06, + "loss": 0.7822, "step": 24921 }, { - "epoch": 0.7072077185017026, + "epoch": 0.7062257360650628, "grad_norm": 0.0, - "learning_rate": 4.169388052410044e-06, - "loss": 0.9369, + "learning_rate": 4.195149944831208e-06, + "loss": 0.8112, "step": 24922 }, { - "epoch": 0.7072360953461975, + "epoch": 0.7062540735073253, "grad_norm": 0.0, - "learning_rate": 4.168641392060357e-06, - "loss": 0.8333, + "learning_rate": 4.19440264015607e-06, + "loss": 0.8271, "step": 24923 }, { - "epoch": 0.7072644721906924, + "epoch": 0.7062824109495877, "grad_norm": 0.0, - "learning_rate": 4.16789478096842e-06, - "loss": 0.7742, + "learning_rate": 4.193655384383631e-06, + "loss": 0.9294, "step": 24924 }, { - "epoch": 0.7072928490351873, + "epoch": 0.7063107483918502, "grad_norm": 0.0, - "learning_rate": 4.167148219140543e-06, - "loss": 0.8277, + "learning_rate": 4.192908177520192e-06, + "loss": 0.8134, "step": 24925 }, { - "epoch": 0.7073212258796822, + "epoch": 0.7063390858341126, "grad_norm": 0.0, - "learning_rate": 4.1664017065830235e-06, - "loss": 0.8214, + "learning_rate": 4.19216101957204e-06, + "loss": 0.9067, "step": 24926 }, { - "epoch": 0.707349602724177, + "epoch": 0.706367423276375, "grad_norm": 0.0, - "learning_rate": 4.16565524330217e-06, - "loss": 0.842, + "learning_rate": 4.191413910545473e-06, + "loss": 0.7872, "step": 24927 }, { - "epoch": 0.707377979568672, + "epoch": 0.7063957607186375, "grad_norm": 0.0, - "learning_rate": 4.1649088293042935e-06, - "loss": 0.8428, + "learning_rate": 4.190666850446784e-06, + "loss": 0.7328, "step": 24928 }, { - "epoch": 0.7074063564131668, + "epoch": 0.7064240981609, "grad_norm": 0.0, - "learning_rate": 4.164162464595691e-06, - "loss": 0.9395, + "learning_rate": 4.189919839282265e-06, + "loss": 0.7841, "step": 24929 }, { - "epoch": 0.7074347332576617, + "epoch": 0.7064524356031625, "grad_norm": 0.0, - "learning_rate": 4.163416149182674e-06, - "loss": 0.9489, + "learning_rate": 4.1891728770582075e-06, + "loss": 0.8871, "step": 24930 }, { - "epoch": 0.7074631101021567, + "epoch": 0.7064807730454249, "grad_norm": 0.0, - "learning_rate": 4.16266988307154e-06, - "loss": 0.9139, + "learning_rate": 4.18842596378091e-06, + "loss": 0.7925, "step": 24931 }, { - "epoch": 0.7074914869466515, + "epoch": 0.7065091104876874, "grad_norm": 0.0, - "learning_rate": 4.161923666268595e-06, - "loss": 0.8734, + "learning_rate": 4.187679099456654e-06, + "loss": 0.877, "step": 24932 }, { - "epoch": 0.7075198637911464, + "epoch": 0.7065374479299499, "grad_norm": 0.0, - "learning_rate": 4.1611774987801465e-06, - "loss": 0.9036, + "learning_rate": 4.186932284091739e-06, + "loss": 0.8156, "step": 24933 }, { - "epoch": 0.7075482406356414, + "epoch": 0.7065657853722123, "grad_norm": 0.0, - "learning_rate": 4.160431380612492e-06, - "loss": 0.7336, + "learning_rate": 4.186185517692449e-06, + "loss": 0.7819, "step": 24934 }, { - "epoch": 0.7075766174801362, + "epoch": 0.7065941228144748, "grad_norm": 0.0, - "learning_rate": 4.159685311771935e-06, - "loss": 0.8104, + "learning_rate": 4.185438800265077e-06, + "loss": 0.7048, "step": 24935 }, { - "epoch": 0.7076049943246311, + "epoch": 0.7066224602567373, "grad_norm": 0.0, - "learning_rate": 4.1589392922647816e-06, - "loss": 0.9355, + "learning_rate": 4.1846921318159175e-06, + "loss": 0.7705, "step": 24936 }, { - "epoch": 0.707633371169126, + "epoch": 0.7066507976989996, "grad_norm": 0.0, - "learning_rate": 4.158193322097328e-06, - "loss": 0.7704, + "learning_rate": 4.183945512351251e-06, + "loss": 0.8531, "step": 24937 }, { - "epoch": 0.7076617480136209, + "epoch": 0.7066791351412621, "grad_norm": 0.0, - "learning_rate": 4.157447401275875e-06, - "loss": 0.8474, + "learning_rate": 4.1831989418773714e-06, + "loss": 0.8024, "step": 24938 }, { - "epoch": 0.7076901248581158, + "epoch": 0.7067074725835246, "grad_norm": 0.0, - "learning_rate": 4.156701529806732e-06, - "loss": 0.8632, + "learning_rate": 4.182452420400571e-06, + "loss": 0.8215, "step": 24939 }, { - "epoch": 0.7077185017026106, + "epoch": 0.7067358100257871, "grad_norm": 0.0, - "learning_rate": 4.155955707696192e-06, - "loss": 0.8551, + "learning_rate": 4.181705947927131e-06, + "loss": 0.8742, "step": 24940 }, { - "epoch": 0.7077468785471056, + "epoch": 0.7067641474680495, "grad_norm": 0.0, - "learning_rate": 4.155209934950556e-06, - "loss": 0.8226, + "learning_rate": 4.18095952446334e-06, + "loss": 0.7616, "step": 24941 }, { - "epoch": 0.7077752553916005, + "epoch": 0.706792484910312, "grad_norm": 0.0, - "learning_rate": 4.154464211576128e-06, - "loss": 0.8504, + "learning_rate": 4.18021315001549e-06, + "loss": 0.828, "step": 24942 }, { - "epoch": 0.7078036322360953, + "epoch": 0.7068208223525745, "grad_norm": 0.0, - "learning_rate": 4.1537185375792e-06, - "loss": 0.8326, + "learning_rate": 4.1794668245898664e-06, + "loss": 0.8581, "step": 24943 }, { - "epoch": 0.7078320090805902, + "epoch": 0.7068491597948369, "grad_norm": 0.0, - "learning_rate": 4.152972912966075e-06, - "loss": 0.7945, + "learning_rate": 4.1787205481927575e-06, + "loss": 0.8822, "step": 24944 }, { - "epoch": 0.7078603859250852, + "epoch": 0.7068774972370994, "grad_norm": 0.0, - "learning_rate": 4.152227337743053e-06, - "loss": 0.8759, + "learning_rate": 4.1779743208304435e-06, + "loss": 0.7949, "step": 24945 }, { - "epoch": 0.70788876276958, + "epoch": 0.7069058346793619, "grad_norm": 0.0, - "learning_rate": 4.151481811916427e-06, - "loss": 0.9427, + "learning_rate": 4.177228142509218e-06, + "loss": 0.8066, "step": 24946 }, { - "epoch": 0.7079171396140749, + "epoch": 0.7069341721216243, "grad_norm": 0.0, - "learning_rate": 4.150736335492496e-06, - "loss": 0.8049, + "learning_rate": 4.176482013235357e-06, + "loss": 0.7504, "step": 24947 }, { - "epoch": 0.7079455164585698, + "epoch": 0.7069625095638867, "grad_norm": 0.0, - "learning_rate": 4.149990908477564e-06, - "loss": 0.816, + "learning_rate": 4.175735933015151e-06, + "loss": 0.697, "step": 24948 }, { - "epoch": 0.7079738933030647, + "epoch": 0.7069908470061492, "grad_norm": 0.0, - "learning_rate": 4.1492455308779156e-06, - "loss": 0.9193, + "learning_rate": 4.174989901854889e-06, + "loss": 0.8763, "step": 24949 }, { - "epoch": 0.7080022701475596, + "epoch": 0.7070191844484117, "grad_norm": 0.0, - "learning_rate": 4.148500202699854e-06, - "loss": 0.7766, + "learning_rate": 4.174243919760845e-06, + "loss": 0.8291, "step": 24950 }, { - "epoch": 0.7080306469920545, + "epoch": 0.7070475218906741, "grad_norm": 0.0, - "learning_rate": 4.1477549239496785e-06, - "loss": 0.8555, + "learning_rate": 4.173497986739309e-06, + "loss": 0.8715, "step": 24951 }, { - "epoch": 0.7080590238365494, + "epoch": 0.7070758593329366, "grad_norm": 0.0, - "learning_rate": 4.147009694633676e-06, - "loss": 0.7796, + "learning_rate": 4.172752102796565e-06, + "loss": 0.8381, "step": 24952 }, { - "epoch": 0.7080874006810443, + "epoch": 0.7071041967751991, "grad_norm": 0.0, - "learning_rate": 4.1462645147581456e-06, - "loss": 0.8418, + "learning_rate": 4.172006267938893e-06, + "loss": 0.8091, "step": 24953 }, { - "epoch": 0.7081157775255391, + "epoch": 0.7071325342174616, "grad_norm": 0.0, - "learning_rate": 4.145519384329383e-06, - "loss": 0.889, + "learning_rate": 4.171260482172574e-06, + "loss": 0.8624, "step": 24954 }, { - "epoch": 0.7081441543700341, + "epoch": 0.707160871659724, "grad_norm": 0.0, - "learning_rate": 4.1447743033536805e-06, - "loss": 0.829, + "learning_rate": 4.170514745503893e-06, + "loss": 0.8698, "step": 24955 }, { - "epoch": 0.7081725312145289, + "epoch": 0.7071892091019865, "grad_norm": 0.0, - "learning_rate": 4.1440292718373366e-06, - "loss": 0.8264, + "learning_rate": 4.169769057939132e-06, + "loss": 0.8749, "step": 24956 }, { - "epoch": 0.7082009080590238, + "epoch": 0.707217546544249, "grad_norm": 0.0, - "learning_rate": 4.143284289786637e-06, - "loss": 0.7478, + "learning_rate": 4.169023419484574e-06, + "loss": 0.8483, "step": 24957 }, { - "epoch": 0.7082292849035188, + "epoch": 0.7072458839865113, "grad_norm": 0.0, - "learning_rate": 4.142539357207877e-06, - "loss": 0.9053, + "learning_rate": 4.168277830146493e-06, + "loss": 0.8497, "step": 24958 }, { - "epoch": 0.7082576617480136, + "epoch": 0.7072742214287738, "grad_norm": 0.0, - "learning_rate": 4.1417944741073555e-06, - "loss": 0.8149, + "learning_rate": 4.167532289931175e-06, + "loss": 0.7722, "step": 24959 }, { - "epoch": 0.7082860385925085, + "epoch": 0.7073025588710363, "grad_norm": 0.0, - "learning_rate": 4.141049640491356e-06, - "loss": 0.8606, + "learning_rate": 4.1667867988448995e-06, + "loss": 0.8762, "step": 24960 }, { - "epoch": 0.7083144154370034, + "epoch": 0.7073308963132987, "grad_norm": 0.0, - "learning_rate": 4.140304856366172e-06, - "loss": 0.8357, + "learning_rate": 4.166041356893943e-06, + "loss": 0.7983, "step": 24961 }, { - "epoch": 0.7083427922814983, + "epoch": 0.7073592337555612, "grad_norm": 0.0, - "learning_rate": 4.139560121738101e-06, - "loss": 0.7704, + "learning_rate": 4.1652959640845906e-06, + "loss": 0.8345, "step": 24962 }, { - "epoch": 0.7083711691259932, + "epoch": 0.7073875711978237, "grad_norm": 0.0, - "learning_rate": 4.138815436613426e-06, - "loss": 0.847, + "learning_rate": 4.164550620423112e-06, + "loss": 0.8035, "step": 24963 }, { - "epoch": 0.708399545970488, + "epoch": 0.7074159086400862, "grad_norm": 0.0, - "learning_rate": 4.1380708009984394e-06, - "loss": 0.8876, + "learning_rate": 4.16380532591579e-06, + "loss": 0.7188, "step": 24964 }, { - "epoch": 0.708427922814983, + "epoch": 0.7074442460823486, "grad_norm": 0.0, - "learning_rate": 4.137326214899436e-06, - "loss": 0.7992, + "learning_rate": 4.163060080568908e-06, + "loss": 0.8347, "step": 24965 }, { - "epoch": 0.7084562996594779, + "epoch": 0.7074725835246111, "grad_norm": 0.0, - "learning_rate": 4.1365816783226985e-06, - "loss": 0.8831, + "learning_rate": 4.162314884388735e-06, + "loss": 0.9629, "step": 24966 }, { - "epoch": 0.7084846765039727, + "epoch": 0.7075009209668736, "grad_norm": 0.0, - "learning_rate": 4.1358371912745185e-06, - "loss": 0.7676, + "learning_rate": 4.161569737381551e-06, + "loss": 0.9005, "step": 24967 }, { - "epoch": 0.7085130533484677, + "epoch": 0.7075292584091359, "grad_norm": 0.0, - "learning_rate": 4.1350927537611894e-06, - "loss": 0.9089, + "learning_rate": 4.160824639553634e-06, + "loss": 0.8211, "step": 24968 }, { - "epoch": 0.7085414301929626, + "epoch": 0.7075575958513984, "grad_norm": 0.0, - "learning_rate": 4.134348365788988e-06, - "loss": 0.9698, + "learning_rate": 4.160079590911257e-06, + "loss": 0.7914, "step": 24969 }, { - "epoch": 0.7085698070374574, + "epoch": 0.7075859332936609, "grad_norm": 0.0, - "learning_rate": 4.133604027364217e-06, - "loss": 0.8127, + "learning_rate": 4.159334591460703e-06, + "loss": 0.7917, "step": 24970 }, { - "epoch": 0.7085981838819523, + "epoch": 0.7076142707359234, "grad_norm": 0.0, - "learning_rate": 4.132859738493154e-06, - "loss": 0.817, + "learning_rate": 4.158589641208239e-06, + "loss": 0.9084, "step": 24971 }, { - "epoch": 0.7086265607264473, + "epoch": 0.7076426081781858, "grad_norm": 0.0, - "learning_rate": 4.132115499182088e-06, - "loss": 0.8345, + "learning_rate": 4.1578447401601455e-06, + "loss": 0.7555, "step": 24972 }, { - "epoch": 0.7086549375709421, + "epoch": 0.7076709456204483, "grad_norm": 0.0, - "learning_rate": 4.131371309437309e-06, - "loss": 0.7084, + "learning_rate": 4.157099888322697e-06, + "loss": 0.8551, "step": 24973 }, { - "epoch": 0.708683314415437, + "epoch": 0.7076992830627108, "grad_norm": 0.0, - "learning_rate": 4.130627169265096e-06, - "loss": 0.8259, + "learning_rate": 4.156355085702162e-06, + "loss": 0.7333, "step": 24974 }, { - "epoch": 0.7087116912599319, + "epoch": 0.7077276205049732, "grad_norm": 0.0, - "learning_rate": 4.129883078671741e-06, - "loss": 0.8481, + "learning_rate": 4.155610332304823e-06, + "loss": 0.8838, "step": 24975 }, { - "epoch": 0.7087400681044268, + "epoch": 0.7077559579472357, "grad_norm": 0.0, - "learning_rate": 4.12913903766353e-06, - "loss": 0.8958, + "learning_rate": 4.154865628136942e-06, + "loss": 0.758, "step": 24976 }, { - "epoch": 0.7087684449489217, + "epoch": 0.7077842953894982, "grad_norm": 0.0, - "learning_rate": 4.1283950462467426e-06, - "loss": 0.8887, + "learning_rate": 4.154120973204802e-06, + "loss": 0.8335, "step": 24977 }, { - "epoch": 0.7087968217934165, + "epoch": 0.7078126328317607, "grad_norm": 0.0, - "learning_rate": 4.127651104427666e-06, - "loss": 0.9391, + "learning_rate": 4.1533763675146736e-06, + "loss": 0.7853, "step": 24978 }, { - "epoch": 0.7088251986379115, + "epoch": 0.707840970274023, "grad_norm": 0.0, - "learning_rate": 4.126907212212587e-06, - "loss": 0.8145, + "learning_rate": 4.152631811072822e-06, + "loss": 0.8702, "step": 24979 }, { - "epoch": 0.7088535754824064, + "epoch": 0.7078693077162855, "grad_norm": 0.0, - "learning_rate": 4.126163369607784e-06, - "loss": 0.9265, + "learning_rate": 4.151887303885527e-06, + "loss": 0.8813, "step": 24980 }, { - "epoch": 0.7088819523269012, + "epoch": 0.707897645158548, "grad_norm": 0.0, - "learning_rate": 4.125419576619544e-06, - "loss": 0.7635, + "learning_rate": 4.151142845959055e-06, + "loss": 0.8445, "step": 24981 }, { - "epoch": 0.7089103291713962, + "epoch": 0.7079259826008104, "grad_norm": 0.0, - "learning_rate": 4.124675833254152e-06, - "loss": 0.8701, + "learning_rate": 4.15039843729968e-06, + "loss": 0.8649, "step": 24982 }, { - "epoch": 0.708938706015891, + "epoch": 0.7079543200430729, "grad_norm": 0.0, - "learning_rate": 4.123932139517882e-06, - "loss": 0.8071, + "learning_rate": 4.149654077913674e-06, + "loss": 0.7882, "step": 24983 }, { - "epoch": 0.7089670828603859, + "epoch": 0.7079826574853354, "grad_norm": 0.0, - "learning_rate": 4.123188495417023e-06, - "loss": 0.8021, + "learning_rate": 4.1489097678073e-06, + "loss": 0.8065, "step": 24984 }, { - "epoch": 0.7089954597048809, + "epoch": 0.7080109949275978, "grad_norm": 0.0, - "learning_rate": 4.122444900957855e-06, - "loss": 0.822, + "learning_rate": 4.148165506986834e-06, + "loss": 0.7724, "step": 24985 }, { - "epoch": 0.7090238365493757, + "epoch": 0.7080393323698603, "grad_norm": 0.0, - "learning_rate": 4.121701356146659e-06, - "loss": 0.8543, + "learning_rate": 4.147421295458543e-06, + "loss": 0.7323, "step": 24986 }, { - "epoch": 0.7090522133938706, + "epoch": 0.7080676698121228, "grad_norm": 0.0, - "learning_rate": 4.120957860989719e-06, - "loss": 0.7998, + "learning_rate": 4.146677133228695e-06, + "loss": 0.8705, "step": 24987 }, { - "epoch": 0.7090805902383654, + "epoch": 0.7080960072543853, "grad_norm": 0.0, - "learning_rate": 4.120214415493309e-06, - "loss": 0.815, + "learning_rate": 4.145933020303558e-06, + "loss": 0.8278, "step": 24988 }, { - "epoch": 0.7091089670828604, + "epoch": 0.7081243446966476, "grad_norm": 0.0, - "learning_rate": 4.119471019663712e-06, - "loss": 0.8285, + "learning_rate": 4.145188956689405e-06, + "loss": 0.7708, "step": 24989 }, { - "epoch": 0.7091373439273553, + "epoch": 0.7081526821389101, "grad_norm": 0.0, - "learning_rate": 4.118727673507213e-06, - "loss": 0.8381, + "learning_rate": 4.144444942392496e-06, + "loss": 0.7783, "step": 24990 }, { - "epoch": 0.7091657207718501, + "epoch": 0.7081810195811726, "grad_norm": 0.0, - "learning_rate": 4.11798437703008e-06, - "loss": 0.8866, + "learning_rate": 4.143700977419105e-06, + "loss": 0.8538, "step": 24991 }, { - "epoch": 0.7091940976163451, + "epoch": 0.708209357023435, "grad_norm": 0.0, - "learning_rate": 4.117241130238597e-06, - "loss": 0.7555, + "learning_rate": 4.142957061775491e-06, + "loss": 0.853, "step": 24992 }, { - "epoch": 0.70922247446084, + "epoch": 0.7082376944656975, "grad_norm": 0.0, - "learning_rate": 4.1164979331390476e-06, - "loss": 0.7908, + "learning_rate": 4.142213195467926e-06, + "loss": 0.848, "step": 24993 }, { - "epoch": 0.7092508513053348, + "epoch": 0.70826603190796, "grad_norm": 0.0, - "learning_rate": 4.1157547857377e-06, - "loss": 0.7733, + "learning_rate": 4.141469378502674e-06, + "loss": 0.7613, "step": 24994 }, { - "epoch": 0.7092792281498297, + "epoch": 0.7082943693502225, "grad_norm": 0.0, - "learning_rate": 4.115011688040838e-06, - "loss": 0.8806, + "learning_rate": 4.140725610886e-06, + "loss": 0.8904, "step": 24995 }, { - "epoch": 0.7093076049943247, + "epoch": 0.7083227067924849, "grad_norm": 0.0, - "learning_rate": 4.114268640054738e-06, - "loss": 0.6926, + "learning_rate": 4.139981892624172e-06, + "loss": 0.7244, "step": 24996 }, { - "epoch": 0.7093359818388195, + "epoch": 0.7083510442347474, "grad_norm": 0.0, - "learning_rate": 4.113525641785673e-06, - "loss": 0.7792, + "learning_rate": 4.139238223723454e-06, + "loss": 0.7555, "step": 24997 }, { - "epoch": 0.7093643586833144, + "epoch": 0.7083793816770099, "grad_norm": 0.0, - "learning_rate": 4.112782693239922e-06, - "loss": 0.9111, + "learning_rate": 4.138494604190106e-06, + "loss": 0.8929, "step": 24998 }, { - "epoch": 0.7093927355278093, + "epoch": 0.7084077191192723, "grad_norm": 0.0, - "learning_rate": 4.112039794423764e-06, - "loss": 0.8698, + "learning_rate": 4.1377510340304e-06, + "loss": 0.9534, "step": 24999 }, { - "epoch": 0.7094211123723042, + "epoch": 0.7084360565615347, "grad_norm": 0.0, - "learning_rate": 4.111296945343462e-06, - "loss": 0.871, + "learning_rate": 4.137007513250587e-06, + "loss": 0.8159, "step": 25000 }, { - "epoch": 0.7094494892167991, + "epoch": 0.7084643940037972, "grad_norm": 0.0, - "learning_rate": 4.110554146005307e-06, - "loss": 0.8495, + "learning_rate": 4.136264041856939e-06, + "loss": 0.8632, "step": 25001 }, { - "epoch": 0.709477866061294, + "epoch": 0.7084927314460597, "grad_norm": 0.0, - "learning_rate": 4.109811396415563e-06, - "loss": 0.84, + "learning_rate": 4.135520619855719e-06, + "loss": 0.848, "step": 25002 }, { - "epoch": 0.7095062429057889, + "epoch": 0.7085210688883221, "grad_norm": 0.0, - "learning_rate": 4.109068696580506e-06, - "loss": 0.8145, + "learning_rate": 4.134777247253184e-06, + "loss": 0.8098, "step": 25003 }, { - "epoch": 0.7095346197502838, + "epoch": 0.7085494063305846, "grad_norm": 0.0, - "learning_rate": 4.108326046506415e-06, - "loss": 0.8193, + "learning_rate": 4.134033924055601e-06, + "loss": 0.8265, "step": 25004 }, { - "epoch": 0.7095629965947786, + "epoch": 0.7085777437728471, "grad_norm": 0.0, - "learning_rate": 4.107583446199555e-06, - "loss": 0.8616, + "learning_rate": 4.133290650269226e-06, + "loss": 0.8097, "step": 25005 }, { - "epoch": 0.7095913734392736, + "epoch": 0.7086060812151095, "grad_norm": 0.0, - "learning_rate": 4.1068408956662024e-06, - "loss": 0.8307, + "learning_rate": 4.132547425900322e-06, + "loss": 0.8422, "step": 25006 }, { - "epoch": 0.7096197502837684, + "epoch": 0.708634418657372, "grad_norm": 0.0, - "learning_rate": 4.106098394912632e-06, - "loss": 0.9084, + "learning_rate": 4.131804250955149e-06, + "loss": 0.7621, "step": 25007 }, { - "epoch": 0.7096481271282633, + "epoch": 0.7086627560996345, "grad_norm": 0.0, - "learning_rate": 4.105355943945112e-06, - "loss": 0.8344, + "learning_rate": 4.131061125439969e-06, + "loss": 0.7948, "step": 25008 }, { - "epoch": 0.7096765039727583, + "epoch": 0.7086910935418969, "grad_norm": 0.0, - "learning_rate": 4.104613542769914e-06, - "loss": 0.9044, + "learning_rate": 4.130318049361039e-06, + "loss": 0.7708, "step": 25009 }, { - "epoch": 0.7097048808172531, + "epoch": 0.7087194309841593, "grad_norm": 0.0, - "learning_rate": 4.103871191393314e-06, - "loss": 0.8746, + "learning_rate": 4.1295750227246245e-06, + "loss": 0.96, "step": 25010 }, { - "epoch": 0.709733257661748, + "epoch": 0.7087477684264218, "grad_norm": 0.0, - "learning_rate": 4.103128889821576e-06, - "loss": 0.7872, + "learning_rate": 4.128832045536976e-06, + "loss": 0.762, "step": 25011 }, { - "epoch": 0.7097616345062429, + "epoch": 0.7087761058686843, "grad_norm": 0.0, - "learning_rate": 4.102386638060974e-06, - "loss": 0.7577, + "learning_rate": 4.128089117804359e-06, + "loss": 0.6761, "step": 25012 }, { - "epoch": 0.7097900113507378, + "epoch": 0.7088044433109467, "grad_norm": 0.0, - "learning_rate": 4.101644436117779e-06, - "loss": 0.8409, + "learning_rate": 4.127346239533023e-06, + "loss": 0.7932, "step": 25013 }, { - "epoch": 0.7098183881952327, + "epoch": 0.7088327807532092, "grad_norm": 0.0, - "learning_rate": 4.100902283998256e-06, - "loss": 0.8752, + "learning_rate": 4.126603410729232e-06, + "loss": 0.861, "step": 25014 }, { - "epoch": 0.7098467650397275, + "epoch": 0.7088611181954717, "grad_norm": 0.0, - "learning_rate": 4.1001601817086765e-06, - "loss": 0.8092, + "learning_rate": 4.1258606313992445e-06, + "loss": 0.959, "step": 25015 }, { - "epoch": 0.7098751418842225, + "epoch": 0.7088894556377341, "grad_norm": 0.0, - "learning_rate": 4.099418129255309e-06, - "loss": 0.8465, + "learning_rate": 4.12511790154931e-06, + "loss": 0.7225, "step": 25016 }, { - "epoch": 0.7099035187287174, + "epoch": 0.7089177930799966, "grad_norm": 0.0, - "learning_rate": 4.098676126644422e-06, - "loss": 0.8301, + "learning_rate": 4.1243752211856904e-06, + "loss": 0.7553, "step": 25017 }, { - "epoch": 0.7099318955732122, + "epoch": 0.7089461305222591, "grad_norm": 0.0, - "learning_rate": 4.097934173882287e-06, - "loss": 0.8137, + "learning_rate": 4.123632590314643e-06, + "loss": 0.8143, "step": 25018 }, { - "epoch": 0.7099602724177072, + "epoch": 0.7089744679645216, "grad_norm": 0.0, - "learning_rate": 4.097192270975163e-06, - "loss": 0.8657, + "learning_rate": 4.122890008942417e-06, + "loss": 0.8595, "step": 25019 }, { - "epoch": 0.7099886492622021, + "epoch": 0.709002805406784, "grad_norm": 0.0, - "learning_rate": 4.096450417929322e-06, - "loss": 0.8166, + "learning_rate": 4.12214747707527e-06, + "loss": 0.7589, "step": 25020 }, { - "epoch": 0.7100170261066969, + "epoch": 0.7090311428490464, "grad_norm": 0.0, - "learning_rate": 4.0957086147510325e-06, - "loss": 0.8995, + "learning_rate": 4.121404994719458e-06, + "loss": 0.9237, "step": 25021 }, { - "epoch": 0.7100454029511918, + "epoch": 0.7090594802913089, "grad_norm": 0.0, - "learning_rate": 4.094966861446555e-06, - "loss": 0.9897, + "learning_rate": 4.120662561881235e-06, + "loss": 0.7935, "step": 25022 }, { - "epoch": 0.7100737797956868, + "epoch": 0.7090878177335713, "grad_norm": 0.0, - "learning_rate": 4.094225158022157e-06, - "loss": 0.8005, + "learning_rate": 4.119920178566858e-06, + "loss": 0.8662, "step": 25023 }, { - "epoch": 0.7101021566401816, + "epoch": 0.7091161551758338, "grad_norm": 0.0, - "learning_rate": 4.093483504484109e-06, - "loss": 0.7685, + "learning_rate": 4.119177844782573e-06, + "loss": 0.8229, "step": 25024 }, { - "epoch": 0.7101305334846765, + "epoch": 0.7091444926180963, "grad_norm": 0.0, - "learning_rate": 4.092741900838667e-06, - "loss": 0.8885, + "learning_rate": 4.118435560534636e-06, + "loss": 0.8568, "step": 25025 }, { - "epoch": 0.7101589103291714, + "epoch": 0.7091728300603588, "grad_norm": 0.0, - "learning_rate": 4.0920003470921e-06, - "loss": 0.8173, + "learning_rate": 4.117693325829305e-06, + "loss": 0.7318, "step": 25026 }, { - "epoch": 0.7101872871736663, + "epoch": 0.7092011675026212, "grad_norm": 0.0, - "learning_rate": 4.091258843250675e-06, - "loss": 0.7865, + "learning_rate": 4.116951140672824e-06, + "loss": 0.7428, "step": 25027 }, { - "epoch": 0.7102156640181612, + "epoch": 0.7092295049448837, "grad_norm": 0.0, - "learning_rate": 4.09051738932065e-06, - "loss": 0.8946, + "learning_rate": 4.116209005071451e-06, + "loss": 0.736, "step": 25028 }, { - "epoch": 0.710244040862656, + "epoch": 0.7092578423871462, "grad_norm": 0.0, - "learning_rate": 4.0897759853082885e-06, - "loss": 0.856, + "learning_rate": 4.1154669190314315e-06, + "loss": 0.7632, "step": 25029 }, { - "epoch": 0.710272417707151, + "epoch": 0.7092861798294086, "grad_norm": 0.0, - "learning_rate": 4.089034631219856e-06, - "loss": 0.9408, + "learning_rate": 4.114724882559019e-06, + "loss": 0.7636, "step": 25030 }, { - "epoch": 0.7103007945516459, + "epoch": 0.709314517271671, "grad_norm": 0.0, - "learning_rate": 4.0882933270616135e-06, - "loss": 0.8546, + "learning_rate": 4.113982895660467e-06, + "loss": 0.7917, "step": 25031 }, { - "epoch": 0.7103291713961407, + "epoch": 0.7093428547139335, "grad_norm": 0.0, - "learning_rate": 4.087552072839822e-06, - "loss": 0.8521, + "learning_rate": 4.113240958342022e-06, + "loss": 0.7877, "step": 25032 }, { - "epoch": 0.7103575482406357, + "epoch": 0.7093711921561959, "grad_norm": 0.0, - "learning_rate": 4.086810868560749e-06, - "loss": 0.8624, + "learning_rate": 4.112499070609932e-06, + "loss": 0.8065, "step": 25033 }, { - "epoch": 0.7103859250851305, + "epoch": 0.7093995295984584, "grad_norm": 0.0, - "learning_rate": 4.086069714230646e-06, - "loss": 0.9126, + "learning_rate": 4.111757232470449e-06, + "loss": 0.8821, "step": 25034 }, { - "epoch": 0.7104143019296254, + "epoch": 0.7094278670407209, "grad_norm": 0.0, - "learning_rate": 4.085328609855777e-06, - "loss": 0.7354, + "learning_rate": 4.1110154439298214e-06, + "loss": 0.8117, "step": 25035 }, { - "epoch": 0.7104426787741204, + "epoch": 0.7094562044829834, "grad_norm": 0.0, - "learning_rate": 4.084587555442408e-06, - "loss": 0.9144, + "learning_rate": 4.110273704994301e-06, + "loss": 0.8641, "step": 25036 }, { - "epoch": 0.7104710556186152, + "epoch": 0.7094845419252458, "grad_norm": 0.0, - "learning_rate": 4.08384655099679e-06, - "loss": 0.8441, + "learning_rate": 4.109532015670129e-06, + "loss": 0.7982, "step": 25037 }, { - "epoch": 0.7104994324631101, + "epoch": 0.7095128793675083, "grad_norm": 0.0, - "learning_rate": 4.083105596525191e-06, - "loss": 0.7558, + "learning_rate": 4.108790375963556e-06, + "loss": 0.8281, "step": 25038 }, { - "epoch": 0.710527809307605, + "epoch": 0.7095412168097708, "grad_norm": 0.0, - "learning_rate": 4.08236469203386e-06, - "loss": 0.848, + "learning_rate": 4.1080487858808335e-06, + "loss": 0.7965, "step": 25039 }, { - "epoch": 0.7105561861520999, + "epoch": 0.7095695542520332, "grad_norm": 0.0, - "learning_rate": 4.081623837529061e-06, - "loss": 0.9035, + "learning_rate": 4.1073072454282e-06, + "loss": 0.8551, "step": 25040 }, { - "epoch": 0.7105845629965948, + "epoch": 0.7095978916942957, "grad_norm": 0.0, - "learning_rate": 4.080883033017055e-06, - "loss": 0.8059, + "learning_rate": 4.10656575461191e-06, + "loss": 0.8454, "step": 25041 }, { - "epoch": 0.7106129398410896, + "epoch": 0.7096262291365582, "grad_norm": 0.0, - "learning_rate": 4.080142278504091e-06, - "loss": 0.8228, + "learning_rate": 4.1058243134382e-06, + "loss": 0.7649, "step": 25042 }, { - "epoch": 0.7106413166855846, + "epoch": 0.7096545665788206, "grad_norm": 0.0, - "learning_rate": 4.079401573996433e-06, - "loss": 0.848, + "learning_rate": 4.105082921913322e-06, + "loss": 0.7878, "step": 25043 }, { - "epoch": 0.7106696935300795, + "epoch": 0.709682904021083, "grad_norm": 0.0, - "learning_rate": 4.078660919500339e-06, - "loss": 0.8895, + "learning_rate": 4.104341580043518e-06, + "loss": 0.7456, "step": 25044 }, { - "epoch": 0.7106980703745743, + "epoch": 0.7097112414633455, "grad_norm": 0.0, - "learning_rate": 4.077920315022059e-06, - "loss": 0.7785, + "learning_rate": 4.1036002878350354e-06, + "loss": 0.7373, "step": 25045 }, { - "epoch": 0.7107264472190692, + "epoch": 0.709739578905608, "grad_norm": 0.0, - "learning_rate": 4.077179760567848e-06, - "loss": 0.8332, + "learning_rate": 4.102859045294121e-06, + "loss": 0.78, "step": 25046 }, { - "epoch": 0.7107548240635642, + "epoch": 0.7097679163478704, "grad_norm": 0.0, - "learning_rate": 4.076439256143975e-06, - "loss": 0.8132, + "learning_rate": 4.10211785242701e-06, + "loss": 0.8562, "step": 25047 }, { - "epoch": 0.710783200908059, + "epoch": 0.7097962537901329, "grad_norm": 0.0, - "learning_rate": 4.075698801756681e-06, - "loss": 0.8192, + "learning_rate": 4.101376709239951e-06, + "loss": 0.889, "step": 25048 }, { - "epoch": 0.7108115777525539, + "epoch": 0.7098245912323954, "grad_norm": 0.0, - "learning_rate": 4.074958397412225e-06, - "loss": 0.8303, + "learning_rate": 4.10063561573919e-06, + "loss": 0.8229, "step": 25049 }, { - "epoch": 0.7108399545970489, + "epoch": 0.7098529286746579, "grad_norm": 0.0, - "learning_rate": 4.074218043116866e-06, - "loss": 0.8387, + "learning_rate": 4.099894571930962e-06, + "loss": 0.8992, "step": 25050 }, { - "epoch": 0.7108683314415437, + "epoch": 0.7098812661169203, "grad_norm": 0.0, - "learning_rate": 4.07347773887685e-06, - "loss": 0.8103, + "learning_rate": 4.0991535778215145e-06, + "loss": 0.7535, "step": 25051 }, { - "epoch": 0.7108967082860386, + "epoch": 0.7099096035591828, "grad_norm": 0.0, - "learning_rate": 4.072737484698435e-06, - "loss": 0.8107, + "learning_rate": 4.098412633417089e-06, + "loss": 0.7981, "step": 25052 }, { - "epoch": 0.7109250851305334, + "epoch": 0.7099379410014452, "grad_norm": 0.0, - "learning_rate": 4.071997280587875e-06, - "loss": 0.7479, + "learning_rate": 4.097671738723923e-06, + "loss": 0.8545, "step": 25053 }, { - "epoch": 0.7109534619750284, + "epoch": 0.7099662784437076, "grad_norm": 0.0, - "learning_rate": 4.071257126551417e-06, - "loss": 0.7209, + "learning_rate": 4.096930893748264e-06, + "loss": 0.7875, "step": 25054 }, { - "epoch": 0.7109818388195233, + "epoch": 0.7099946158859701, "grad_norm": 0.0, - "learning_rate": 4.070517022595316e-06, - "loss": 0.8438, + "learning_rate": 4.096190098496343e-06, + "loss": 0.6873, "step": 25055 }, { - "epoch": 0.7110102156640181, + "epoch": 0.7100229533282326, "grad_norm": 0.0, - "learning_rate": 4.069776968725829e-06, - "loss": 0.8491, + "learning_rate": 4.095449352974406e-06, + "loss": 0.8641, "step": 25056 }, { - "epoch": 0.7110385925085131, + "epoch": 0.710051290770495, "grad_norm": 0.0, - "learning_rate": 4.069036964949199e-06, - "loss": 0.923, + "learning_rate": 4.094708657188692e-06, + "loss": 0.7929, "step": 25057 }, { - "epoch": 0.711066969353008, + "epoch": 0.7100796282127575, "grad_norm": 0.0, - "learning_rate": 4.06829701127168e-06, - "loss": 0.8665, + "learning_rate": 4.093968011145438e-06, + "loss": 0.7877, "step": 25058 }, { - "epoch": 0.7110953461975028, + "epoch": 0.71010796565502, "grad_norm": 0.0, - "learning_rate": 4.067557107699526e-06, - "loss": 0.8187, + "learning_rate": 4.093227414850887e-06, + "loss": 0.8027, "step": 25059 }, { - "epoch": 0.7111237230419978, + "epoch": 0.7101363030972825, "grad_norm": 0.0, - "learning_rate": 4.066817254238981e-06, - "loss": 0.8789, + "learning_rate": 4.092486868311277e-06, + "loss": 0.71, "step": 25060 }, { - "epoch": 0.7111520998864926, + "epoch": 0.7101646405395449, "grad_norm": 0.0, - "learning_rate": 4.0660774508962964e-06, - "loss": 0.8126, + "learning_rate": 4.091746371532841e-06, + "loss": 0.8219, "step": 25061 }, { - "epoch": 0.7111804767309875, + "epoch": 0.7101929779818074, "grad_norm": 0.0, - "learning_rate": 4.065337697677723e-06, - "loss": 0.7914, + "learning_rate": 4.0910059245218225e-06, + "loss": 0.8658, "step": 25062 }, { - "epoch": 0.7112088535754824, + "epoch": 0.7102213154240699, "grad_norm": 0.0, - "learning_rate": 4.064597994589509e-06, - "loss": 0.8026, + "learning_rate": 4.090265527284452e-06, + "loss": 0.9857, "step": 25063 }, { - "epoch": 0.7112372304199773, + "epoch": 0.7102496528663322, "grad_norm": 0.0, - "learning_rate": 4.063858341637905e-06, - "loss": 0.7674, + "learning_rate": 4.089525179826969e-06, + "loss": 0.8752, "step": 25064 }, { - "epoch": 0.7112656072644722, + "epoch": 0.7102779903085947, "grad_norm": 0.0, - "learning_rate": 4.063118738829154e-06, - "loss": 0.8644, + "learning_rate": 4.088784882155614e-06, + "loss": 0.858, "step": 25065 }, { - "epoch": 0.711293984108967, + "epoch": 0.7103063277508572, "grad_norm": 0.0, - "learning_rate": 4.0623791861695065e-06, - "loss": 0.8011, + "learning_rate": 4.088044634276616e-06, + "loss": 0.7544, "step": 25066 }, { - "epoch": 0.711322360953462, + "epoch": 0.7103346651931197, "grad_norm": 0.0, - "learning_rate": 4.061639683665211e-06, - "loss": 0.849, + "learning_rate": 4.087304436196212e-06, + "loss": 0.7622, "step": 25067 }, { - "epoch": 0.7113507377979569, + "epoch": 0.7103630026353821, "grad_norm": 0.0, - "learning_rate": 4.060900231322509e-06, - "loss": 0.7663, + "learning_rate": 4.086564287920643e-06, + "loss": 0.8228, "step": 25068 }, { - "epoch": 0.7113791146424517, + "epoch": 0.7103913400776446, "grad_norm": 0.0, - "learning_rate": 4.0601608291476495e-06, - "loss": 0.8008, + "learning_rate": 4.085824189456136e-06, + "loss": 0.9228, "step": 25069 }, { - "epoch": 0.7114074914869466, + "epoch": 0.7104196775199071, "grad_norm": 0.0, - "learning_rate": 4.059421477146882e-06, - "loss": 0.8708, + "learning_rate": 4.085084140808927e-06, + "loss": 0.7871, "step": 25070 }, { - "epoch": 0.7114358683314416, + "epoch": 0.7104480149621695, "grad_norm": 0.0, - "learning_rate": 4.058682175326446e-06, - "loss": 1.0414, + "learning_rate": 4.08434414198525e-06, + "loss": 0.7722, "step": 25071 }, { - "epoch": 0.7114642451759364, + "epoch": 0.710476352404432, "grad_norm": 0.0, - "learning_rate": 4.057942923692587e-06, - "loss": 0.7423, + "learning_rate": 4.08360419299134e-06, + "loss": 0.913, "step": 25072 }, { - "epoch": 0.7114926220204313, + "epoch": 0.7105046898466945, "grad_norm": 0.0, - "learning_rate": 4.057203722251556e-06, - "loss": 0.9587, + "learning_rate": 4.082864293833433e-06, + "loss": 0.9001, "step": 25073 }, { - "epoch": 0.7115209988649263, + "epoch": 0.710533027288957, "grad_norm": 0.0, - "learning_rate": 4.056464571009589e-06, - "loss": 0.8287, + "learning_rate": 4.0821244445177535e-06, + "loss": 0.8034, "step": 25074 }, { - "epoch": 0.7115493757094211, + "epoch": 0.7105613647312193, "grad_norm": 0.0, - "learning_rate": 4.055725469972932e-06, - "loss": 0.8024, + "learning_rate": 4.081384645050537e-06, + "loss": 0.8115, "step": 25075 }, { - "epoch": 0.711577752553916, + "epoch": 0.7105897021734818, "grad_norm": 0.0, - "learning_rate": 4.054986419147829e-06, - "loss": 0.8858, + "learning_rate": 4.080644895438019e-06, + "loss": 0.8838, "step": 25076 }, { - "epoch": 0.711606129398411, + "epoch": 0.7106180396157443, "grad_norm": 0.0, - "learning_rate": 4.054247418540523e-06, - "loss": 0.8431, + "learning_rate": 4.079905195686424e-06, + "loss": 0.8216, "step": 25077 }, { - "epoch": 0.7116345062429058, + "epoch": 0.7106463770580067, "grad_norm": 0.0, - "learning_rate": 4.053508468157261e-06, - "loss": 0.8381, + "learning_rate": 4.079165545801988e-06, + "loss": 0.9032, "step": 25078 }, { - "epoch": 0.7116628830874007, + "epoch": 0.7106747145002692, "grad_norm": 0.0, - "learning_rate": 4.052769568004277e-06, - "loss": 0.8834, + "learning_rate": 4.078425945790937e-06, + "loss": 0.7825, "step": 25079 }, { - "epoch": 0.7116912599318955, + "epoch": 0.7107030519425317, "grad_norm": 0.0, - "learning_rate": 4.052030718087814e-06, - "loss": 0.8175, + "learning_rate": 4.077686395659504e-06, + "loss": 0.7755, "step": 25080 }, { - "epoch": 0.7117196367763905, + "epoch": 0.7107313893847941, "grad_norm": 0.0, - "learning_rate": 4.05129191841412e-06, - "loss": 0.8468, + "learning_rate": 4.076946895413919e-06, + "loss": 0.8889, "step": 25081 }, { - "epoch": 0.7117480136208854, + "epoch": 0.7107597268270566, "grad_norm": 0.0, - "learning_rate": 4.050553168989426e-06, - "loss": 0.8906, + "learning_rate": 4.0762074450604085e-06, + "loss": 0.8291, "step": 25082 }, { - "epoch": 0.7117763904653802, + "epoch": 0.7107880642693191, "grad_norm": 0.0, - "learning_rate": 4.049814469819977e-06, - "loss": 0.8383, + "learning_rate": 4.075468044605201e-06, + "loss": 0.8774, "step": 25083 }, { - "epoch": 0.7118047673098752, + "epoch": 0.7108164017115816, "grad_norm": 0.0, - "learning_rate": 4.049075820912016e-06, - "loss": 0.7393, + "learning_rate": 4.074728694054526e-06, + "loss": 0.8293, "step": 25084 }, { - "epoch": 0.71183314415437, + "epoch": 0.7108447391538439, "grad_norm": 0.0, - "learning_rate": 4.048337222271777e-06, - "loss": 0.8739, + "learning_rate": 4.073989393414613e-06, + "loss": 0.8308, "step": 25085 }, { - "epoch": 0.7118615209988649, + "epoch": 0.7108730765961064, "grad_norm": 0.0, - "learning_rate": 4.047598673905499e-06, - "loss": 0.9004, + "learning_rate": 4.073250142691691e-06, + "loss": 0.7718, "step": 25086 }, { - "epoch": 0.7118898978433598, + "epoch": 0.7109014140383689, "grad_norm": 0.0, - "learning_rate": 4.046860175819427e-06, - "loss": 0.871, + "learning_rate": 4.072510941891978e-06, + "loss": 0.8297, "step": 25087 }, { - "epoch": 0.7119182746878547, + "epoch": 0.7109297514806313, "grad_norm": 0.0, - "learning_rate": 4.046121728019792e-06, - "loss": 0.8117, + "learning_rate": 4.071771791021709e-06, + "loss": 0.7753, "step": 25088 }, { - "epoch": 0.7119466515323496, + "epoch": 0.7109580889228938, "grad_norm": 0.0, - "learning_rate": 4.045383330512833e-06, - "loss": 0.8866, + "learning_rate": 4.071032690087111e-06, + "loss": 0.9356, "step": 25089 }, { - "epoch": 0.7119750283768445, + "epoch": 0.7109864263651563, "grad_norm": 0.0, - "learning_rate": 4.0446449833047925e-06, - "loss": 0.9099, + "learning_rate": 4.070293639094401e-06, + "loss": 0.7845, "step": 25090 }, { - "epoch": 0.7120034052213394, + "epoch": 0.7110147638074188, "grad_norm": 0.0, - "learning_rate": 4.0439066864019005e-06, - "loss": 0.7223, + "learning_rate": 4.069554638049815e-06, + "loss": 0.7407, "step": 25091 }, { - "epoch": 0.7120317820658343, + "epoch": 0.7110431012496812, "grad_norm": 0.0, - "learning_rate": 4.043168439810396e-06, - "loss": 0.7024, + "learning_rate": 4.068815686959568e-06, + "loss": 0.9319, "step": 25092 }, { - "epoch": 0.7120601589103291, + "epoch": 0.7110714386919437, "grad_norm": 0.0, - "learning_rate": 4.042430243536517e-06, - "loss": 0.8858, + "learning_rate": 4.06807678582989e-06, + "loss": 0.8133, "step": 25093 }, { - "epoch": 0.7120885357548241, + "epoch": 0.7110997761342062, "grad_norm": 0.0, - "learning_rate": 4.041692097586496e-06, - "loss": 0.8406, + "learning_rate": 4.067337934667007e-06, + "loss": 0.825, "step": 25094 }, { - "epoch": 0.712116912599319, + "epoch": 0.7111281135764685, "grad_norm": 0.0, - "learning_rate": 4.040954001966574e-06, - "loss": 0.8754, + "learning_rate": 4.066599133477136e-06, + "loss": 0.7741, "step": 25095 }, { - "epoch": 0.7121452894438138, + "epoch": 0.711156451018731, "grad_norm": 0.0, - "learning_rate": 4.040215956682977e-06, - "loss": 0.8561, + "learning_rate": 4.065860382266504e-06, + "loss": 0.9644, "step": 25096 }, { - "epoch": 0.7121736662883087, + "epoch": 0.7111847884609935, "grad_norm": 0.0, - "learning_rate": 4.039477961741944e-06, - "loss": 0.7405, + "learning_rate": 4.065121681041333e-06, + "loss": 0.8818, "step": 25097 }, { - "epoch": 0.7122020431328037, + "epoch": 0.711213125903256, "grad_norm": 0.0, - "learning_rate": 4.038740017149713e-06, - "loss": 0.8844, + "learning_rate": 4.0643830298078456e-06, + "loss": 0.8067, "step": 25098 }, { - "epoch": 0.7122304199772985, + "epoch": 0.7112414633455184, "grad_norm": 0.0, - "learning_rate": 4.038002122912509e-06, - "loss": 0.8354, + "learning_rate": 4.063644428572268e-06, + "loss": 0.9167, "step": 25099 }, { - "epoch": 0.7122587968217934, + "epoch": 0.7112698007877809, "grad_norm": 0.0, - "learning_rate": 4.037264279036568e-06, - "loss": 0.8368, + "learning_rate": 4.062905877340816e-06, + "loss": 0.7546, "step": 25100 }, { - "epoch": 0.7122871736662884, + "epoch": 0.7112981382300434, "grad_norm": 0.0, - "learning_rate": 4.036526485528128e-06, - "loss": 0.9451, + "learning_rate": 4.06216737611971e-06, + "loss": 0.9047, "step": 25101 }, { - "epoch": 0.7123155505107832, + "epoch": 0.7113264756723058, "grad_norm": 0.0, - "learning_rate": 4.035788742393414e-06, - "loss": 0.7999, + "learning_rate": 4.0614289249151785e-06, + "loss": 0.7573, "step": 25102 }, { - "epoch": 0.7123439273552781, + "epoch": 0.7113548131145683, "grad_norm": 0.0, - "learning_rate": 4.03505104963866e-06, - "loss": 0.7925, + "learning_rate": 4.060690523733432e-06, + "loss": 0.9403, "step": 25103 }, { - "epoch": 0.7123723041997729, + "epoch": 0.7113831505568308, "grad_norm": 0.0, - "learning_rate": 4.0343134072701015e-06, - "loss": 0.7746, + "learning_rate": 4.059952172580694e-06, + "loss": 0.8961, "step": 25104 }, { - "epoch": 0.7124006810442679, + "epoch": 0.7114114879990932, "grad_norm": 0.0, - "learning_rate": 4.033575815293962e-06, - "loss": 0.7188, + "learning_rate": 4.059213871463189e-06, + "loss": 0.7896, "step": 25105 }, { - "epoch": 0.7124290578887628, + "epoch": 0.7114398254413556, "grad_norm": 0.0, - "learning_rate": 4.032838273716476e-06, - "loss": 0.7766, + "learning_rate": 4.058475620387129e-06, + "loss": 0.875, "step": 25106 }, { - "epoch": 0.7124574347332576, + "epoch": 0.7114681628836181, "grad_norm": 0.0, - "learning_rate": 4.032100782543874e-06, - "loss": 0.7993, + "learning_rate": 4.057737419358737e-06, + "loss": 0.8067, "step": 25107 }, { - "epoch": 0.7124858115777526, + "epoch": 0.7114965003258806, "grad_norm": 0.0, - "learning_rate": 4.031363341782385e-06, - "loss": 0.7372, + "learning_rate": 4.056999268384227e-06, + "loss": 0.7674, "step": 25108 }, { - "epoch": 0.7125141884222475, + "epoch": 0.711524837768143, "grad_norm": 0.0, - "learning_rate": 4.03062595143824e-06, - "loss": 0.9232, + "learning_rate": 4.0562611674698186e-06, + "loss": 0.8129, "step": 25109 }, { - "epoch": 0.7125425652667423, + "epoch": 0.7115531752104055, "grad_norm": 0.0, - "learning_rate": 4.029888611517664e-06, - "loss": 0.9024, + "learning_rate": 4.055523116621729e-06, + "loss": 0.8802, "step": 25110 }, { - "epoch": 0.7125709421112373, + "epoch": 0.711581512652668, "grad_norm": 0.0, - "learning_rate": 4.029151322026887e-06, - "loss": 0.7658, + "learning_rate": 4.054785115846176e-06, + "loss": 0.8515, "step": 25111 }, { - "epoch": 0.7125993189557321, + "epoch": 0.7116098500949304, "grad_norm": 0.0, - "learning_rate": 4.028414082972141e-06, - "loss": 0.8574, + "learning_rate": 4.05404716514938e-06, + "loss": 0.7795, "step": 25112 }, { - "epoch": 0.712627695800227, + "epoch": 0.7116381875371929, "grad_norm": 0.0, - "learning_rate": 4.027676894359646e-06, - "loss": 0.862, + "learning_rate": 4.053309264537549e-06, + "loss": 0.8111, "step": 25113 }, { - "epoch": 0.7126560726447219, + "epoch": 0.7116665249794554, "grad_norm": 0.0, - "learning_rate": 4.026939756195632e-06, - "loss": 0.8022, + "learning_rate": 4.0525714140169015e-06, + "loss": 0.7897, "step": 25114 }, { - "epoch": 0.7126844494892168, + "epoch": 0.7116948624217179, "grad_norm": 0.0, - "learning_rate": 4.0262026684863295e-06, - "loss": 0.7246, + "learning_rate": 4.051833613593657e-06, + "loss": 0.8812, "step": 25115 }, { - "epoch": 0.7127128263337117, + "epoch": 0.7117231998639802, "grad_norm": 0.0, - "learning_rate": 4.025465631237959e-06, - "loss": 0.7937, + "learning_rate": 4.051095863274024e-06, + "loss": 0.8329, "step": 25116 }, { - "epoch": 0.7127412031782066, + "epoch": 0.7117515373062427, "grad_norm": 0.0, - "learning_rate": 4.024728644456749e-06, - "loss": 0.8695, + "learning_rate": 4.050358163064219e-06, + "loss": 0.7572, "step": 25117 }, { - "epoch": 0.7127695800227015, + "epoch": 0.7117798747485052, "grad_norm": 0.0, - "learning_rate": 4.023991708148928e-06, - "loss": 0.8639, + "learning_rate": 4.04962051297046e-06, + "loss": 0.8323, "step": 25118 }, { - "epoch": 0.7127979568671964, + "epoch": 0.7118082121907676, "grad_norm": 0.0, - "learning_rate": 4.0232548223207145e-06, - "loss": 0.8173, + "learning_rate": 4.0488829129989536e-06, + "loss": 0.7494, "step": 25119 }, { - "epoch": 0.7128263337116912, + "epoch": 0.7118365496330301, "grad_norm": 0.0, - "learning_rate": 4.022517986978336e-06, - "loss": 0.7706, + "learning_rate": 4.048145363155921e-06, + "loss": 0.755, "step": 25120 }, { - "epoch": 0.7128547105561861, + "epoch": 0.7118648870752926, "grad_norm": 0.0, - "learning_rate": 4.02178120212802e-06, - "loss": 0.9403, + "learning_rate": 4.047407863447565e-06, + "loss": 0.812, "step": 25121 }, { - "epoch": 0.7128830874006811, + "epoch": 0.711893224517555, "grad_norm": 0.0, - "learning_rate": 4.021044467775979e-06, - "loss": 0.884, + "learning_rate": 4.0466704138801035e-06, + "loss": 0.7305, "step": 25122 }, { - "epoch": 0.7129114642451759, + "epoch": 0.7119215619598175, "grad_norm": 0.0, - "learning_rate": 4.020307783928453e-06, - "loss": 0.8557, + "learning_rate": 4.0459330144597485e-06, + "loss": 0.9158, "step": 25123 }, { - "epoch": 0.7129398410896708, + "epoch": 0.71194989940208, "grad_norm": 0.0, - "learning_rate": 4.019571150591653e-06, - "loss": 0.8105, + "learning_rate": 4.045195665192711e-06, + "loss": 0.7751, "step": 25124 }, { - "epoch": 0.7129682179341658, + "epoch": 0.7119782368443425, "grad_norm": 0.0, - "learning_rate": 4.018834567771802e-06, - "loss": 0.853, + "learning_rate": 4.0444583660852e-06, + "loss": 0.7676, "step": 25125 }, { - "epoch": 0.7129965947786606, + "epoch": 0.7120065742866049, "grad_norm": 0.0, - "learning_rate": 4.018098035475129e-06, - "loss": 0.7693, + "learning_rate": 4.043721117143432e-06, + "loss": 0.8278, "step": 25126 }, { - "epoch": 0.7130249716231555, + "epoch": 0.7120349117288673, "grad_norm": 0.0, - "learning_rate": 4.017361553707847e-06, - "loss": 0.9135, + "learning_rate": 4.04298391837361e-06, + "loss": 0.8934, "step": 25127 }, { - "epoch": 0.7130533484676504, + "epoch": 0.7120632491711298, "grad_norm": 0.0, - "learning_rate": 4.016625122476181e-06, - "loss": 0.6693, + "learning_rate": 4.042246769781949e-06, + "loss": 0.8303, "step": 25128 }, { - "epoch": 0.7130817253121453, + "epoch": 0.7120915866133922, "grad_norm": 0.0, - "learning_rate": 4.015888741786355e-06, - "loss": 0.8734, + "learning_rate": 4.041509671374653e-06, + "loss": 0.8898, "step": 25129 }, { - "epoch": 0.7131101021566402, + "epoch": 0.7121199240556547, "grad_norm": 0.0, - "learning_rate": 4.015152411644583e-06, - "loss": 0.7366, + "learning_rate": 4.040772623157933e-06, + "loss": 0.8108, "step": 25130 }, { - "epoch": 0.713138479001135, + "epoch": 0.7121482614979172, "grad_norm": 0.0, - "learning_rate": 4.014416132057086e-06, - "loss": 0.7929, + "learning_rate": 4.040035625138002e-06, + "loss": 0.8242, "step": 25131 }, { - "epoch": 0.71316685584563, + "epoch": 0.7121765989401797, "grad_norm": 0.0, - "learning_rate": 4.01367990303009e-06, - "loss": 0.8354, + "learning_rate": 4.039298677321062e-06, + "loss": 0.808, "step": 25132 }, { - "epoch": 0.7131952326901249, + "epoch": 0.7122049363824421, "grad_norm": 0.0, - "learning_rate": 4.012943724569806e-06, - "loss": 0.8119, + "learning_rate": 4.0385617797133205e-06, + "loss": 0.8903, "step": 25133 }, { - "epoch": 0.7132236095346197, + "epoch": 0.7122332738247046, "grad_norm": 0.0, - "learning_rate": 4.012207596682454e-06, - "loss": 0.7793, + "learning_rate": 4.0378249323209915e-06, + "loss": 0.8704, "step": 25134 }, { - "epoch": 0.7132519863791147, + "epoch": 0.7122616112669671, "grad_norm": 0.0, - "learning_rate": 4.0114715193742574e-06, - "loss": 0.8181, + "learning_rate": 4.0370881351502735e-06, + "loss": 0.7846, "step": 25135 }, { - "epoch": 0.7132803632236095, + "epoch": 0.7122899487092295, "grad_norm": 0.0, - "learning_rate": 4.010735492651427e-06, - "loss": 0.823, + "learning_rate": 4.036351388207376e-06, + "loss": 0.8854, "step": 25136 }, { - "epoch": 0.7133087400681044, + "epoch": 0.712318286151492, "grad_norm": 0.0, - "learning_rate": 4.0099995165201825e-06, - "loss": 0.9111, + "learning_rate": 4.035614691498505e-06, + "loss": 0.7664, "step": 25137 }, { - "epoch": 0.7133371169125993, + "epoch": 0.7123466235937544, "grad_norm": 0.0, - "learning_rate": 4.009263590986742e-06, - "loss": 0.8845, + "learning_rate": 4.034878045029867e-06, + "loss": 0.7613, "step": 25138 }, { - "epoch": 0.7133654937570942, + "epoch": 0.7123749610360169, "grad_norm": 0.0, - "learning_rate": 4.008527716057321e-06, - "loss": 0.7607, + "learning_rate": 4.03414144880767e-06, + "loss": 0.9572, "step": 25139 }, { - "epoch": 0.7133938706015891, + "epoch": 0.7124032984782793, "grad_norm": 0.0, - "learning_rate": 4.007791891738135e-06, - "loss": 0.9296, + "learning_rate": 4.0334049028381116e-06, + "loss": 0.8197, "step": 25140 }, { - "epoch": 0.713422247446084, + "epoch": 0.7124316359205418, "grad_norm": 0.0, - "learning_rate": 4.007056118035405e-06, - "loss": 0.8656, + "learning_rate": 4.032668407127403e-06, + "loss": 0.8439, "step": 25141 }, { - "epoch": 0.7134506242905789, + "epoch": 0.7124599733628043, "grad_norm": 0.0, - "learning_rate": 4.006320394955337e-06, - "loss": 0.7912, + "learning_rate": 4.031931961681738e-06, + "loss": 0.7784, "step": 25142 }, { - "epoch": 0.7134790011350738, + "epoch": 0.7124883108050667, "grad_norm": 0.0, - "learning_rate": 4.00558472250415e-06, - "loss": 0.8477, + "learning_rate": 4.03119556650733e-06, + "loss": 0.8644, "step": 25143 }, { - "epoch": 0.7135073779795686, + "epoch": 0.7125166482473292, "grad_norm": 0.0, - "learning_rate": 4.004849100688063e-06, - "loss": 0.6605, + "learning_rate": 4.0304592216103795e-06, + "loss": 0.7996, "step": 25144 }, { - "epoch": 0.7135357548240636, + "epoch": 0.7125449856895917, "grad_norm": 0.0, - "learning_rate": 4.0041135295132805e-06, - "loss": 0.8931, + "learning_rate": 4.029722926997085e-06, + "loss": 0.8612, "step": 25145 }, { - "epoch": 0.7135641316685585, + "epoch": 0.7125733231318541, "grad_norm": 0.0, - "learning_rate": 4.003378008986024e-06, - "loss": 0.8046, + "learning_rate": 4.028986682673651e-06, + "loss": 0.8612, "step": 25146 }, { - "epoch": 0.7135925085130533, + "epoch": 0.7126016605741166, "grad_norm": 0.0, - "learning_rate": 4.0026425391125e-06, - "loss": 0.8295, + "learning_rate": 4.028250488646284e-06, + "loss": 0.7989, "step": 25147 }, { - "epoch": 0.7136208853575482, + "epoch": 0.712629998016379, "grad_norm": 0.0, - "learning_rate": 4.0019071198989246e-06, - "loss": 0.8263, + "learning_rate": 4.027514344921175e-06, + "loss": 0.8505, "step": 25148 }, { - "epoch": 0.7136492622020432, + "epoch": 0.7126583354586415, "grad_norm": 0.0, - "learning_rate": 4.001171751351512e-06, - "loss": 0.8493, + "learning_rate": 4.026778251504533e-06, + "loss": 0.7387, "step": 25149 }, { - "epoch": 0.713677639046538, + "epoch": 0.7126866729009039, "grad_norm": 0.0, - "learning_rate": 4.000436433476468e-06, - "loss": 0.8467, + "learning_rate": 4.026042208402554e-06, + "loss": 0.8828, "step": 25150 }, { - "epoch": 0.7137060158910329, + "epoch": 0.7127150103431664, "grad_norm": 0.0, - "learning_rate": 3.999701166280008e-06, - "loss": 0.8119, + "learning_rate": 4.0253062156214406e-06, + "loss": 0.8198, "step": 25151 }, { - "epoch": 0.7137343927355279, + "epoch": 0.7127433477854289, "grad_norm": 0.0, - "learning_rate": 3.998965949768344e-06, - "loss": 0.8415, + "learning_rate": 4.024570273167395e-06, + "loss": 0.7533, "step": 25152 }, { - "epoch": 0.7137627695800227, + "epoch": 0.7127716852276913, "grad_norm": 0.0, - "learning_rate": 3.998230783947679e-06, - "loss": 0.8081, + "learning_rate": 4.023834381046609e-06, + "loss": 0.8051, "step": 25153 }, { - "epoch": 0.7137911464245176, + "epoch": 0.7128000226699538, "grad_norm": 0.0, - "learning_rate": 3.99749566882423e-06, - "loss": 0.8392, + "learning_rate": 4.023098539265285e-06, + "loss": 0.8174, "step": 25154 }, { - "epoch": 0.7138195232690124, + "epoch": 0.7128283601122163, "grad_norm": 0.0, - "learning_rate": 3.9967606044042094e-06, - "loss": 0.828, + "learning_rate": 4.022362747829627e-06, + "loss": 0.8852, "step": 25155 }, { - "epoch": 0.7138479001135074, + "epoch": 0.7128566975544788, "grad_norm": 0.0, - "learning_rate": 3.9960255906938185e-06, - "loss": 0.7581, + "learning_rate": 4.0216270067458215e-06, + "loss": 0.8529, "step": 25156 }, { - "epoch": 0.7138762769580023, + "epoch": 0.7128850349967412, "grad_norm": 0.0, - "learning_rate": 3.9952906276992686e-06, - "loss": 0.8922, + "learning_rate": 4.0208913160200765e-06, + "loss": 0.853, "step": 25157 }, { - "epoch": 0.7139046538024971, + "epoch": 0.7129133724390037, "grad_norm": 0.0, - "learning_rate": 3.9945557154267736e-06, - "loss": 0.7979, + "learning_rate": 4.02015567565858e-06, + "loss": 0.9067, "step": 25158 }, { - "epoch": 0.7139330306469921, + "epoch": 0.7129417098812662, "grad_norm": 0.0, - "learning_rate": 3.993820853882532e-06, - "loss": 0.7693, + "learning_rate": 4.019420085667534e-06, + "loss": 0.9476, "step": 25159 }, { - "epoch": 0.713961407491487, + "epoch": 0.7129700473235285, "grad_norm": 0.0, - "learning_rate": 3.993086043072756e-06, - "loss": 0.7158, + "learning_rate": 4.018684546053137e-06, + "loss": 0.8427, "step": 25160 }, { - "epoch": 0.7139897843359818, + "epoch": 0.712998384765791, "grad_norm": 0.0, - "learning_rate": 3.992351283003655e-06, - "loss": 0.8416, + "learning_rate": 4.017949056821576e-06, + "loss": 0.8687, "step": 25161 }, { - "epoch": 0.7140181611804768, + "epoch": 0.7130267222080535, "grad_norm": 0.0, - "learning_rate": 3.9916165736814294e-06, - "loss": 0.9009, + "learning_rate": 4.017213617979052e-06, + "loss": 0.7848, "step": 25162 }, { - "epoch": 0.7140465380249716, + "epoch": 0.713055059650316, "grad_norm": 0.0, - "learning_rate": 3.990881915112289e-06, - "loss": 0.7649, + "learning_rate": 4.01647822953176e-06, + "loss": 0.9075, "step": 25163 }, { - "epoch": 0.7140749148694665, + "epoch": 0.7130833970925784, "grad_norm": 0.0, - "learning_rate": 3.990147307302443e-06, - "loss": 0.7009, + "learning_rate": 4.015742891485893e-06, + "loss": 0.8586, "step": 25164 }, { - "epoch": 0.7141032917139614, + "epoch": 0.7131117345348409, "grad_norm": 0.0, - "learning_rate": 3.989412750258089e-06, - "loss": 0.7803, + "learning_rate": 4.015007603847651e-06, + "loss": 0.9027, "step": 25165 }, { - "epoch": 0.7141316685584563, + "epoch": 0.7131400719771034, "grad_norm": 0.0, - "learning_rate": 3.988678243985437e-06, - "loss": 0.8477, + "learning_rate": 4.014272366623216e-06, + "loss": 0.8019, "step": 25166 }, { - "epoch": 0.7141600454029512, + "epoch": 0.7131684094193658, "grad_norm": 0.0, - "learning_rate": 3.987943788490692e-06, - "loss": 0.9101, + "learning_rate": 4.0135371798187895e-06, + "loss": 0.9007, "step": 25167 }, { - "epoch": 0.714188422247446, + "epoch": 0.7131967468616283, "grad_norm": 0.0, - "learning_rate": 3.987209383780048e-06, - "loss": 0.7694, + "learning_rate": 4.012802043440565e-06, + "loss": 0.839, "step": 25168 }, { - "epoch": 0.714216799091941, + "epoch": 0.7132250843038908, "grad_norm": 0.0, - "learning_rate": 3.9864750298597255e-06, - "loss": 0.8063, + "learning_rate": 4.01206695749473e-06, + "loss": 0.7747, "step": 25169 }, { - "epoch": 0.7142451759364359, + "epoch": 0.7132534217461531, "grad_norm": 0.0, - "learning_rate": 3.985740726735915e-06, - "loss": 0.8345, + "learning_rate": 4.011331921987481e-06, + "loss": 0.7573, "step": 25170 }, { - "epoch": 0.7142735527809307, + "epoch": 0.7132817591884156, "grad_norm": 0.0, - "learning_rate": 3.985006474414823e-06, - "loss": 0.8343, + "learning_rate": 4.010596936925005e-06, + "loss": 0.8031, "step": 25171 }, { - "epoch": 0.7143019296254256, + "epoch": 0.7133100966306781, "grad_norm": 0.0, - "learning_rate": 3.984272272902655e-06, - "loss": 0.8813, + "learning_rate": 4.009862002313494e-06, + "loss": 0.7992, "step": 25172 }, { - "epoch": 0.7143303064699206, + "epoch": 0.7133384340729406, "grad_norm": 0.0, - "learning_rate": 3.9835381222056055e-06, - "loss": 0.902, + "learning_rate": 4.009127118159143e-06, + "loss": 0.8125, "step": 25173 }, { - "epoch": 0.7143586833144154, + "epoch": 0.713366771515203, "grad_norm": 0.0, - "learning_rate": 3.982804022329881e-06, - "loss": 0.7118, + "learning_rate": 4.008392284468136e-06, + "loss": 0.9113, "step": 25174 }, { - "epoch": 0.7143870601589103, + "epoch": 0.7133951089574655, "grad_norm": 0.0, - "learning_rate": 3.982069973281685e-06, - "loss": 0.8214, + "learning_rate": 4.007657501246666e-06, + "loss": 0.8522, "step": 25175 }, { - "epoch": 0.7144154370034053, + "epoch": 0.713423446399728, "grad_norm": 0.0, - "learning_rate": 3.98133597506721e-06, - "loss": 0.8681, + "learning_rate": 4.00692276850092e-06, + "loss": 0.8699, "step": 25176 }, { - "epoch": 0.7144438138479001, + "epoch": 0.7134517838419904, "grad_norm": 0.0, - "learning_rate": 3.980602027692661e-06, - "loss": 0.8637, + "learning_rate": 4.006188086237091e-06, + "loss": 0.7188, "step": 25177 }, { - "epoch": 0.714472190692395, + "epoch": 0.7134801212842529, "grad_norm": 0.0, - "learning_rate": 3.979868131164241e-06, - "loss": 0.833, + "learning_rate": 4.005453454461369e-06, + "loss": 0.8094, "step": 25178 }, { - "epoch": 0.71450056753689, + "epoch": 0.7135084587265154, "grad_norm": 0.0, - "learning_rate": 3.979134285488141e-06, - "loss": 0.8228, + "learning_rate": 4.0047188731799345e-06, + "loss": 0.7692, "step": 25179 }, { - "epoch": 0.7145289443813848, + "epoch": 0.7135367961687779, "grad_norm": 0.0, - "learning_rate": 3.978400490670565e-06, - "loss": 0.7511, + "learning_rate": 4.00398434239898e-06, + "loss": 0.731, "step": 25180 }, { - "epoch": 0.7145573212258797, + "epoch": 0.7135651336110402, "grad_norm": 0.0, - "learning_rate": 3.977666746717714e-06, - "loss": 0.878, + "learning_rate": 4.003249862124694e-06, + "loss": 0.8112, "step": 25181 }, { - "epoch": 0.7145856980703745, + "epoch": 0.7135934710533027, "grad_norm": 0.0, - "learning_rate": 3.97693305363578e-06, - "loss": 0.7244, + "learning_rate": 4.002515432363259e-06, + "loss": 0.83, "step": 25182 }, { - "epoch": 0.7146140749148695, + "epoch": 0.7136218084955652, "grad_norm": 0.0, - "learning_rate": 3.976199411430962e-06, - "loss": 0.9389, + "learning_rate": 4.001781053120863e-06, + "loss": 0.8869, "step": 25183 }, { - "epoch": 0.7146424517593644, + "epoch": 0.7136501459378276, "grad_norm": 0.0, - "learning_rate": 3.97546582010946e-06, - "loss": 0.7372, + "learning_rate": 4.001046724403697e-06, + "loss": 0.9385, "step": 25184 }, { - "epoch": 0.7146708286038592, + "epoch": 0.7136784833800901, "grad_norm": 0.0, - "learning_rate": 3.974732279677468e-06, - "loss": 0.887, + "learning_rate": 4.000312446217937e-06, + "loss": 0.8116, "step": 25185 }, { - "epoch": 0.7146992054483542, + "epoch": 0.7137068208223526, "grad_norm": 0.0, - "learning_rate": 3.973998790141187e-06, - "loss": 0.7823, + "learning_rate": 3.999578218569777e-06, + "loss": 0.8383, "step": 25186 }, { - "epoch": 0.714727582292849, + "epoch": 0.7137351582646151, "grad_norm": 0.0, - "learning_rate": 3.973265351506805e-06, - "loss": 0.8965, + "learning_rate": 3.998844041465395e-06, + "loss": 0.818, "step": 25187 }, { - "epoch": 0.7147559591373439, + "epoch": 0.7137634957068775, "grad_norm": 0.0, - "learning_rate": 3.972531963780523e-06, - "loss": 0.8834, + "learning_rate": 3.998109914910978e-06, + "loss": 0.8108, "step": 25188 }, { - "epoch": 0.7147843359818388, + "epoch": 0.71379183314914, "grad_norm": 0.0, - "learning_rate": 3.971798626968536e-06, - "loss": 0.8266, + "learning_rate": 3.99737583891271e-06, + "loss": 0.7324, "step": 25189 }, { - "epoch": 0.7148127128263337, + "epoch": 0.7138201705914025, "grad_norm": 0.0, - "learning_rate": 3.971065341077035e-06, - "loss": 0.7808, + "learning_rate": 3.9966418134767745e-06, + "loss": 0.8867, "step": 25190 }, { - "epoch": 0.7148410896708286, + "epoch": 0.7138485080336648, "grad_norm": 0.0, - "learning_rate": 3.970332106112216e-06, - "loss": 0.8784, + "learning_rate": 3.995907838609354e-06, + "loss": 0.8343, "step": 25191 }, { - "epoch": 0.7148694665153235, + "epoch": 0.7138768454759273, "grad_norm": 0.0, - "learning_rate": 3.969598922080275e-06, - "loss": 0.742, + "learning_rate": 3.995173914316635e-06, + "loss": 0.8649, "step": 25192 }, { - "epoch": 0.7148978433598184, + "epoch": 0.7139051829181898, "grad_norm": 0.0, - "learning_rate": 3.9688657889874e-06, - "loss": 0.8686, + "learning_rate": 3.994440040604792e-06, + "loss": 0.7997, "step": 25193 }, { - "epoch": 0.7149262202043133, + "epoch": 0.7139335203604522, "grad_norm": 0.0, - "learning_rate": 3.968132706839788e-06, - "loss": 0.8053, + "learning_rate": 3.993706217480015e-06, + "loss": 0.8255, "step": 25194 }, { - "epoch": 0.7149545970488081, + "epoch": 0.7139618578027147, "grad_norm": 0.0, - "learning_rate": 3.9673996756436325e-06, - "loss": 1.017, + "learning_rate": 3.992972444948476e-06, + "loss": 0.8035, "step": 25195 }, { - "epoch": 0.7149829738933031, + "epoch": 0.7139901952449772, "grad_norm": 0.0, - "learning_rate": 3.96666669540512e-06, - "loss": 0.8528, + "learning_rate": 3.992238723016363e-06, + "loss": 0.8644, "step": 25196 }, { - "epoch": 0.715011350737798, + "epoch": 0.7140185326872397, "grad_norm": 0.0, - "learning_rate": 3.965933766130445e-06, - "loss": 0.9469, + "learning_rate": 3.9915050516898554e-06, + "loss": 0.8081, "step": 25197 }, { - "epoch": 0.7150397275822928, + "epoch": 0.7140468701295021, "grad_norm": 0.0, - "learning_rate": 3.965200887825802e-06, - "loss": 0.7795, + "learning_rate": 3.99077143097513e-06, + "loss": 0.7404, "step": 25198 }, { - "epoch": 0.7150681044267877, + "epoch": 0.7140752075717646, "grad_norm": 0.0, - "learning_rate": 3.9644680604973715e-06, - "loss": 0.8525, + "learning_rate": 3.990037860878371e-06, + "loss": 0.915, "step": 25199 }, { - "epoch": 0.7150964812712827, + "epoch": 0.7141035450140271, "grad_norm": 0.0, - "learning_rate": 3.963735284151358e-06, - "loss": 0.8517, + "learning_rate": 3.989304341405752e-06, + "loss": 0.8997, "step": 25200 }, { - "epoch": 0.7151248581157775, + "epoch": 0.7141318824562894, "grad_norm": 0.0, - "learning_rate": 3.96300255879394e-06, - "loss": 0.7611, + "learning_rate": 3.988570872563454e-06, + "loss": 0.9245, "step": 25201 }, { - "epoch": 0.7151532349602724, + "epoch": 0.7141602198985519, "grad_norm": 0.0, - "learning_rate": 3.9622698844313115e-06, - "loss": 0.9531, + "learning_rate": 3.987837454357656e-06, + "loss": 0.8386, "step": 25202 }, { - "epoch": 0.7151816118047674, + "epoch": 0.7141885573408144, "grad_norm": 0.0, - "learning_rate": 3.961537261069663e-06, - "loss": 0.843, + "learning_rate": 3.987104086794536e-06, + "loss": 0.7986, "step": 25203 }, { - "epoch": 0.7152099886492622, + "epoch": 0.7142168947830769, "grad_norm": 0.0, - "learning_rate": 3.960804688715178e-06, - "loss": 0.941, + "learning_rate": 3.98637076988027e-06, + "loss": 0.8504, "step": 25204 }, { - "epoch": 0.7152383654937571, + "epoch": 0.7142452322253393, "grad_norm": 0.0, - "learning_rate": 3.960072167374047e-06, - "loss": 0.8699, + "learning_rate": 3.98563750362104e-06, + "loss": 0.7336, "step": 25205 }, { - "epoch": 0.7152667423382519, + "epoch": 0.7142735696676018, "grad_norm": 0.0, - "learning_rate": 3.9593396970524625e-06, - "loss": 0.8165, + "learning_rate": 3.984904288023016e-06, + "loss": 0.7671, "step": 25206 }, { - "epoch": 0.7152951191827469, + "epoch": 0.7143019071098643, "grad_norm": 0.0, - "learning_rate": 3.958607277756602e-06, - "loss": 0.876, + "learning_rate": 3.984171123092379e-06, + "loss": 0.7566, "step": 25207 }, { - "epoch": 0.7153234960272418, + "epoch": 0.7143302445521267, "grad_norm": 0.0, - "learning_rate": 3.957874909492658e-06, - "loss": 0.851, + "learning_rate": 3.983438008835301e-06, + "loss": 0.8113, "step": 25208 }, { - "epoch": 0.7153518728717366, + "epoch": 0.7143585819943892, "grad_norm": 0.0, - "learning_rate": 3.9571425922668206e-06, - "loss": 0.8138, + "learning_rate": 3.982704945257957e-06, + "loss": 0.8218, "step": 25209 }, { - "epoch": 0.7153802497162316, + "epoch": 0.7143869194366517, "grad_norm": 0.0, - "learning_rate": 3.956410326085267e-06, - "loss": 0.7497, + "learning_rate": 3.9819719323665285e-06, + "loss": 0.8379, "step": 25210 }, { - "epoch": 0.7154086265607265, + "epoch": 0.7144152568789142, "grad_norm": 0.0, - "learning_rate": 3.9556781109541865e-06, - "loss": 0.9859, + "learning_rate": 3.98123897016718e-06, + "loss": 0.9229, "step": 25211 }, { - "epoch": 0.7154370034052213, + "epoch": 0.7144435943211765, "grad_norm": 0.0, - "learning_rate": 3.954945946879769e-06, - "loss": 0.7762, + "learning_rate": 3.980506058666092e-06, + "loss": 0.909, "step": 25212 }, { - "epoch": 0.7154653802497162, + "epoch": 0.714471931763439, "grad_norm": 0.0, - "learning_rate": 3.954213833868191e-06, - "loss": 0.8129, + "learning_rate": 3.979773197869441e-06, + "loss": 0.8745, "step": 25213 }, { - "epoch": 0.7154937570942111, + "epoch": 0.7145002692057015, "grad_norm": 0.0, - "learning_rate": 3.953481771925641e-06, - "loss": 0.8972, + "learning_rate": 3.979040387783391e-06, + "loss": 0.8804, "step": 25214 }, { - "epoch": 0.715522133938706, + "epoch": 0.7145286066479639, "grad_norm": 0.0, - "learning_rate": 3.952749761058302e-06, - "loss": 0.8886, + "learning_rate": 3.97830762841412e-06, + "loss": 0.7994, "step": 25215 }, { - "epoch": 0.7155505107832009, + "epoch": 0.7145569440902264, "grad_norm": 0.0, - "learning_rate": 3.952017801272358e-06, - "loss": 0.9041, + "learning_rate": 3.977574919767801e-06, + "loss": 0.7664, "step": 25216 }, { - "epoch": 0.7155788876276958, + "epoch": 0.7145852815324889, "grad_norm": 0.0, - "learning_rate": 3.951285892573994e-06, - "loss": 0.7469, + "learning_rate": 3.976842261850603e-06, + "loss": 0.9216, "step": 25217 }, { - "epoch": 0.7156072644721907, + "epoch": 0.7146136189747513, "grad_norm": 0.0, - "learning_rate": 3.9505540349693875e-06, - "loss": 0.8033, + "learning_rate": 3.976109654668704e-06, + "loss": 0.8345, "step": 25218 }, { - "epoch": 0.7156356413166856, + "epoch": 0.7146419564170138, "grad_norm": 0.0, - "learning_rate": 3.9498222284647225e-06, - "loss": 0.8786, + "learning_rate": 3.975377098228266e-06, + "loss": 0.9031, "step": 25219 }, { - "epoch": 0.7156640181611805, + "epoch": 0.7146702938592763, "grad_norm": 0.0, - "learning_rate": 3.949090473066185e-06, - "loss": 0.8046, + "learning_rate": 3.974644592535464e-06, + "loss": 0.889, "step": 25220 }, { - "epoch": 0.7156923950056754, + "epoch": 0.7146986313015388, "grad_norm": 0.0, - "learning_rate": 3.9483587687799485e-06, - "loss": 0.7987, + "learning_rate": 3.973912137596472e-06, + "loss": 0.7455, "step": 25221 }, { - "epoch": 0.7157207718501702, + "epoch": 0.7147269687438011, "grad_norm": 0.0, - "learning_rate": 3.947627115612197e-06, - "loss": 0.8441, + "learning_rate": 3.973179733417453e-06, + "loss": 0.7297, "step": 25222 }, { - "epoch": 0.7157491486946651, + "epoch": 0.7147553061860636, "grad_norm": 0.0, - "learning_rate": 3.946895513569116e-06, - "loss": 0.802, + "learning_rate": 3.972447380004581e-06, + "loss": 0.8102, "step": 25223 }, { - "epoch": 0.7157775255391601, + "epoch": 0.7147836436283261, "grad_norm": 0.0, - "learning_rate": 3.946163962656876e-06, - "loss": 0.7275, + "learning_rate": 3.97171507736402e-06, + "loss": 0.8138, "step": 25224 }, { - "epoch": 0.7158059023836549, + "epoch": 0.7148119810705885, "grad_norm": 0.0, - "learning_rate": 3.945432462881662e-06, - "loss": 0.8584, + "learning_rate": 3.970982825501942e-06, + "loss": 0.885, "step": 25225 }, { - "epoch": 0.7158342792281498, + "epoch": 0.714840318512851, "grad_norm": 0.0, - "learning_rate": 3.9447010142496555e-06, - "loss": 0.8556, + "learning_rate": 3.970250624424517e-06, + "loss": 0.8147, "step": 25226 }, { - "epoch": 0.7158626560726448, + "epoch": 0.7148686559551135, "grad_norm": 0.0, - "learning_rate": 3.943969616767027e-06, - "loss": 0.9222, + "learning_rate": 3.969518474137908e-06, + "loss": 0.7577, "step": 25227 }, { - "epoch": 0.7158910329171396, + "epoch": 0.714896993397376, "grad_norm": 0.0, - "learning_rate": 3.943238270439961e-06, - "loss": 0.826, + "learning_rate": 3.968786374648283e-06, + "loss": 0.9041, "step": 25228 }, { - "epoch": 0.7159194097616345, + "epoch": 0.7149253308396384, "grad_norm": 0.0, - "learning_rate": 3.942506975274637e-06, - "loss": 0.7981, + "learning_rate": 3.9680543259618105e-06, + "loss": 0.7018, "step": 25229 }, { - "epoch": 0.7159477866061293, + "epoch": 0.7149536682819009, "grad_norm": 0.0, - "learning_rate": 3.941775731277221e-06, - "loss": 0.8837, + "learning_rate": 3.967322328084657e-06, + "loss": 0.781, "step": 25230 }, { - "epoch": 0.7159761634506243, + "epoch": 0.7149820057241634, "grad_norm": 0.0, - "learning_rate": 3.941044538453905e-06, - "loss": 0.6938, + "learning_rate": 3.96659038102299e-06, + "loss": 0.8702, "step": 25231 }, { - "epoch": 0.7160045402951192, + "epoch": 0.7150103431664258, "grad_norm": 0.0, - "learning_rate": 3.940313396810855e-06, - "loss": 0.7471, + "learning_rate": 3.96585848478297e-06, + "loss": 0.7905, "step": 25232 }, { - "epoch": 0.716032917139614, + "epoch": 0.7150386806086882, "grad_norm": 0.0, - "learning_rate": 3.939582306354251e-06, - "loss": 0.8688, + "learning_rate": 3.965126639370764e-06, + "loss": 0.7757, "step": 25233 }, { - "epoch": 0.716061293984109, + "epoch": 0.7150670180509507, "grad_norm": 0.0, - "learning_rate": 3.938851267090269e-06, - "loss": 0.7667, + "learning_rate": 3.964394844792542e-06, + "loss": 0.7582, "step": 25234 }, { - "epoch": 0.7160896708286039, + "epoch": 0.7150953554932132, "grad_norm": 0.0, - "learning_rate": 3.938120279025081e-06, - "loss": 0.7205, + "learning_rate": 3.963663101054459e-06, + "loss": 0.8359, "step": 25235 }, { - "epoch": 0.7161180476730987, + "epoch": 0.7151236929354756, "grad_norm": 0.0, - "learning_rate": 3.9373893421648625e-06, - "loss": 0.9573, + "learning_rate": 3.9629314081626864e-06, + "loss": 0.7924, "step": 25236 }, { - "epoch": 0.7161464245175937, + "epoch": 0.7151520303777381, "grad_norm": 0.0, - "learning_rate": 3.936658456515793e-06, - "loss": 0.8881, + "learning_rate": 3.962199766123382e-06, + "loss": 0.8651, "step": 25237 }, { - "epoch": 0.7161748013620886, + "epoch": 0.7151803678200006, "grad_norm": 0.0, - "learning_rate": 3.935927622084038e-06, - "loss": 0.803, + "learning_rate": 3.9614681749427105e-06, + "loss": 0.8179, "step": 25238 }, { - "epoch": 0.7162031782065834, + "epoch": 0.715208705262263, "grad_norm": 0.0, - "learning_rate": 3.935196838875776e-06, - "loss": 0.811, + "learning_rate": 3.960736634626838e-06, + "loss": 0.7392, "step": 25239 }, { - "epoch": 0.7162315550510783, + "epoch": 0.7152370427045255, "grad_norm": 0.0, - "learning_rate": 3.934466106897181e-06, - "loss": 0.9278, + "learning_rate": 3.960005145181921e-06, + "loss": 0.7914, "step": 25240 }, { - "epoch": 0.7162599318955732, + "epoch": 0.715265380146788, "grad_norm": 0.0, - "learning_rate": 3.933735426154421e-06, - "loss": 0.8925, + "learning_rate": 3.959273706614123e-06, + "loss": 0.8658, "step": 25241 }, { - "epoch": 0.7162883087400681, + "epoch": 0.7152937175890504, "grad_norm": 0.0, - "learning_rate": 3.933004796653671e-06, - "loss": 0.8005, + "learning_rate": 3.958542318929606e-06, + "loss": 0.7976, "step": 25242 }, { - "epoch": 0.716316685584563, + "epoch": 0.7153220550313129, "grad_norm": 0.0, - "learning_rate": 3.932274218401104e-06, - "loss": 0.9656, + "learning_rate": 3.95781098213453e-06, + "loss": 0.8598, "step": 25243 }, { - "epoch": 0.7163450624290579, + "epoch": 0.7153503924735753, "grad_norm": 0.0, - "learning_rate": 3.931543691402887e-06, - "loss": 0.8565, + "learning_rate": 3.957079696235059e-06, + "loss": 0.8001, "step": 25244 }, { - "epoch": 0.7163734392735528, + "epoch": 0.7153787299158378, "grad_norm": 0.0, - "learning_rate": 3.93081321566519e-06, - "loss": 0.871, + "learning_rate": 3.956348461237347e-06, + "loss": 0.8344, "step": 25245 }, { - "epoch": 0.7164018161180477, + "epoch": 0.7154070673581002, "grad_norm": 0.0, - "learning_rate": 3.930082791194193e-06, - "loss": 0.8786, + "learning_rate": 3.9556172771475554e-06, + "loss": 0.8975, "step": 25246 }, { - "epoch": 0.7164301929625425, + "epoch": 0.7154354048003627, "grad_norm": 0.0, - "learning_rate": 3.929352417996056e-06, - "loss": 0.8149, + "learning_rate": 3.954886143971848e-06, + "loss": 0.8208, "step": 25247 }, { - "epoch": 0.7164585698070375, + "epoch": 0.7154637422426252, "grad_norm": 0.0, - "learning_rate": 3.928622096076953e-06, - "loss": 0.7961, + "learning_rate": 3.954155061716376e-06, + "loss": 0.7851, "step": 25248 }, { - "epoch": 0.7164869466515323, + "epoch": 0.7154920796848876, "grad_norm": 0.0, - "learning_rate": 3.9278918254430546e-06, - "loss": 0.7898, + "learning_rate": 3.953424030387301e-06, + "loss": 0.7638, "step": 25249 }, { - "epoch": 0.7165153234960272, + "epoch": 0.7155204171271501, "grad_norm": 0.0, - "learning_rate": 3.9271616061005235e-06, - "loss": 0.8612, + "learning_rate": 3.952693049990784e-06, + "loss": 0.841, "step": 25250 }, { - "epoch": 0.7165437003405222, + "epoch": 0.7155487545694126, "grad_norm": 0.0, - "learning_rate": 3.926431438055532e-06, - "loss": 0.8961, + "learning_rate": 3.951962120532975e-06, + "loss": 0.8132, "step": 25251 }, { - "epoch": 0.716572077185017, + "epoch": 0.7155770920116751, "grad_norm": 0.0, - "learning_rate": 3.9257013213142505e-06, - "loss": 0.9271, + "learning_rate": 3.951231242020039e-06, + "loss": 0.7917, "step": 25252 }, { - "epoch": 0.7166004540295119, + "epoch": 0.7156054294539375, "grad_norm": 0.0, - "learning_rate": 3.924971255882839e-06, - "loss": 0.8691, + "learning_rate": 3.950500414458126e-06, + "loss": 0.8697, "step": 25253 }, { - "epoch": 0.7166288308740069, + "epoch": 0.7156337668962, "grad_norm": 0.0, - "learning_rate": 3.924241241767472e-06, - "loss": 0.875, + "learning_rate": 3.949769637853393e-06, + "loss": 0.7736, "step": 25254 }, { - "epoch": 0.7166572077185017, + "epoch": 0.7156621043384624, "grad_norm": 0.0, - "learning_rate": 3.923511278974309e-06, - "loss": 0.859, + "learning_rate": 3.9490389122119974e-06, + "loss": 0.7441, "step": 25255 }, { - "epoch": 0.7166855845629966, + "epoch": 0.7156904417807248, "grad_norm": 0.0, - "learning_rate": 3.92278136750952e-06, - "loss": 0.8366, + "learning_rate": 3.948308237540094e-06, + "loss": 0.8141, "step": 25256 }, { - "epoch": 0.7167139614074914, + "epoch": 0.7157187792229873, "grad_norm": 0.0, - "learning_rate": 3.922051507379272e-06, - "loss": 0.8886, + "learning_rate": 3.9475776138438414e-06, + "loss": 0.7726, "step": 25257 }, { - "epoch": 0.7167423382519864, + "epoch": 0.7157471166652498, "grad_norm": 0.0, - "learning_rate": 3.921321698589725e-06, - "loss": 0.8568, + "learning_rate": 3.946847041129386e-06, + "loss": 0.8421, "step": 25258 }, { - "epoch": 0.7167707150964813, + "epoch": 0.7157754541075123, "grad_norm": 0.0, - "learning_rate": 3.920591941147047e-06, - "loss": 0.9045, + "learning_rate": 3.946116519402886e-06, + "loss": 0.7924, "step": 25259 }, { - "epoch": 0.7167990919409761, + "epoch": 0.7158037915497747, "grad_norm": 0.0, - "learning_rate": 3.919862235057407e-06, - "loss": 0.864, + "learning_rate": 3.9453860486704975e-06, + "loss": 0.7825, "step": 25260 }, { - "epoch": 0.7168274687854711, + "epoch": 0.7158321289920372, "grad_norm": 0.0, - "learning_rate": 3.919132580326955e-06, - "loss": 0.7862, + "learning_rate": 3.944655628938369e-06, + "loss": 0.7776, "step": 25261 }, { - "epoch": 0.716855845629966, + "epoch": 0.7158604664342997, "grad_norm": 0.0, - "learning_rate": 3.918402976961868e-06, - "loss": 0.7779, + "learning_rate": 3.943925260212653e-06, + "loss": 0.8086, "step": 25262 }, { - "epoch": 0.7168842224744608, + "epoch": 0.7158888038765621, "grad_norm": 0.0, - "learning_rate": 3.917673424968308e-06, - "loss": 0.8302, + "learning_rate": 3.9431949424995075e-06, + "loss": 0.8311, "step": 25263 }, { - "epoch": 0.7169125993189557, + "epoch": 0.7159171413188246, "grad_norm": 0.0, - "learning_rate": 3.916943924352431e-06, - "loss": 0.7524, + "learning_rate": 3.942464675805077e-06, + "loss": 0.8917, "step": 25264 }, { - "epoch": 0.7169409761634506, + "epoch": 0.715945478761087, "grad_norm": 0.0, - "learning_rate": 3.9162144751204015e-06, - "loss": 0.843, + "learning_rate": 3.941734460135514e-06, + "loss": 0.849, "step": 25265 }, { - "epoch": 0.7169693530079455, + "epoch": 0.7159738162033494, "grad_norm": 0.0, - "learning_rate": 3.915485077278385e-06, - "loss": 0.8116, + "learning_rate": 3.941004295496977e-06, + "loss": 0.8582, "step": 25266 }, { - "epoch": 0.7169977298524404, + "epoch": 0.7160021536456119, "grad_norm": 0.0, - "learning_rate": 3.9147557308325355e-06, - "loss": 0.7487, + "learning_rate": 3.940274181895607e-06, + "loss": 0.7599, "step": 25267 }, { - "epoch": 0.7170261066969353, + "epoch": 0.7160304910878744, "grad_norm": 0.0, - "learning_rate": 3.914026435789019e-06, - "loss": 0.8095, + "learning_rate": 3.939544119337557e-06, + "loss": 0.8455, "step": 25268 }, { - "epoch": 0.7170544835414302, + "epoch": 0.7160588285301369, "grad_norm": 0.0, - "learning_rate": 3.9132971921539985e-06, - "loss": 0.783, + "learning_rate": 3.9388141078289775e-06, + "loss": 0.825, "step": 25269 }, { - "epoch": 0.7170828603859251, + "epoch": 0.7160871659723993, "grad_norm": 0.0, - "learning_rate": 3.912567999933626e-06, - "loss": 0.9013, + "learning_rate": 3.938084147376018e-06, + "loss": 0.8221, "step": 25270 }, { - "epoch": 0.71711123723042, + "epoch": 0.7161155034146618, "grad_norm": 0.0, - "learning_rate": 3.911838859134066e-06, - "loss": 0.7396, + "learning_rate": 3.9373542379848305e-06, + "loss": 0.7876, "step": 25271 }, { - "epoch": 0.7171396140749149, + "epoch": 0.7161438408569243, "grad_norm": 0.0, - "learning_rate": 3.91110976976148e-06, - "loss": 0.8431, + "learning_rate": 3.936624379661556e-06, + "loss": 0.7593, "step": 25272 }, { - "epoch": 0.7171679909194097, + "epoch": 0.7161721782991867, "grad_norm": 0.0, - "learning_rate": 3.91038073182202e-06, - "loss": 0.7603, + "learning_rate": 3.9358945724123484e-06, + "loss": 0.6998, "step": 25273 }, { - "epoch": 0.7171963677639046, + "epoch": 0.7162005157414492, "grad_norm": 0.0, - "learning_rate": 3.909651745321848e-06, - "loss": 0.8985, + "learning_rate": 3.9351648162433495e-06, + "loss": 0.7514, "step": 25274 }, { - "epoch": 0.7172247446083996, + "epoch": 0.7162288531837117, "grad_norm": 0.0, - "learning_rate": 3.908922810267124e-06, - "loss": 0.7992, + "learning_rate": 3.93443511116071e-06, + "loss": 0.8277, "step": 25275 }, { - "epoch": 0.7172531214528944, + "epoch": 0.7162571906259741, "grad_norm": 0.0, - "learning_rate": 3.908193926663995e-06, - "loss": 0.8365, + "learning_rate": 3.933705457170579e-06, + "loss": 0.8486, "step": 25276 }, { - "epoch": 0.7172814982973893, + "epoch": 0.7162855280682365, "grad_norm": 0.0, - "learning_rate": 3.907465094518636e-06, - "loss": 0.8076, + "learning_rate": 3.932975854279097e-06, + "loss": 0.9141, "step": 25277 }, { - "epoch": 0.7173098751418843, + "epoch": 0.716313865510499, "grad_norm": 0.0, - "learning_rate": 3.906736313837187e-06, - "loss": 0.8325, + "learning_rate": 3.932246302492411e-06, + "loss": 0.8163, "step": 25278 }, { - "epoch": 0.7173382519863791, + "epoch": 0.7163422029527615, "grad_norm": 0.0, - "learning_rate": 3.906007584625811e-06, - "loss": 0.779, + "learning_rate": 3.931516801816668e-06, + "loss": 0.7481, "step": 25279 }, { - "epoch": 0.717366628830874, + "epoch": 0.7163705403950239, "grad_norm": 0.0, - "learning_rate": 3.905278906890666e-06, - "loss": 0.7949, + "learning_rate": 3.930787352258013e-06, + "loss": 0.7662, "step": 25280 }, { - "epoch": 0.7173950056753688, + "epoch": 0.7163988778372864, "grad_norm": 0.0, - "learning_rate": 3.904550280637901e-06, - "loss": 0.7863, + "learning_rate": 3.930057953822594e-06, + "loss": 0.7766, "step": 25281 }, { - "epoch": 0.7174233825198638, + "epoch": 0.7164272152795489, "grad_norm": 0.0, - "learning_rate": 3.903821705873674e-06, - "loss": 0.846, + "learning_rate": 3.929328606516546e-06, + "loss": 0.926, "step": 25282 }, { - "epoch": 0.7174517593643587, + "epoch": 0.7164555527218114, "grad_norm": 0.0, - "learning_rate": 3.903093182604143e-06, - "loss": 0.8276, + "learning_rate": 3.928599310346017e-06, + "loss": 0.8127, "step": 25283 }, { - "epoch": 0.7174801362088535, + "epoch": 0.7164838901640738, "grad_norm": 0.0, - "learning_rate": 3.902364710835453e-06, - "loss": 0.834, + "learning_rate": 3.927870065317156e-06, + "loss": 0.7736, "step": 25284 }, { - "epoch": 0.7175085130533485, + "epoch": 0.7165122276063363, "grad_norm": 0.0, - "learning_rate": 3.901636290573763e-06, - "loss": 0.9386, + "learning_rate": 3.927140871436095e-06, + "loss": 0.8632, "step": 25285 }, { - "epoch": 0.7175368898978434, + "epoch": 0.7165405650485988, "grad_norm": 0.0, - "learning_rate": 3.90090792182523e-06, - "loss": 0.773, + "learning_rate": 3.9264117287089865e-06, + "loss": 0.9079, "step": 25286 }, { - "epoch": 0.7175652667423382, + "epoch": 0.7165689024908611, "grad_norm": 0.0, - "learning_rate": 3.900179604595998e-06, - "loss": 0.8171, + "learning_rate": 3.9256826371419635e-06, + "loss": 0.8718, "step": 25287 }, { - "epoch": 0.7175936435868332, + "epoch": 0.7165972399331236, "grad_norm": 0.0, - "learning_rate": 3.899451338892223e-06, - "loss": 0.8561, + "learning_rate": 3.92495359674117e-06, + "loss": 0.8418, "step": 25288 }, { - "epoch": 0.7176220204313281, + "epoch": 0.7166255773753861, "grad_norm": 0.0, - "learning_rate": 3.898723124720059e-06, - "loss": 0.7695, + "learning_rate": 3.9242246075127536e-06, + "loss": 0.8187, "step": 25289 }, { - "epoch": 0.7176503972758229, + "epoch": 0.7166539148176485, "grad_norm": 0.0, - "learning_rate": 3.897994962085653e-06, - "loss": 0.7538, + "learning_rate": 3.923495669462844e-06, + "loss": 0.7919, "step": 25290 }, { - "epoch": 0.7176787741203178, + "epoch": 0.716682252259911, "grad_norm": 0.0, - "learning_rate": 3.897266850995158e-06, - "loss": 0.7658, + "learning_rate": 3.922766782597588e-06, + "loss": 0.8373, "step": 25291 }, { - "epoch": 0.7177071509648127, + "epoch": 0.7167105897021735, "grad_norm": 0.0, - "learning_rate": 3.896538791454723e-06, - "loss": 0.8168, + "learning_rate": 3.922037946923124e-06, + "loss": 0.9172, "step": 25292 }, { - "epoch": 0.7177355278093076, + "epoch": 0.716738927144436, "grad_norm": 0.0, - "learning_rate": 3.8958107834705e-06, - "loss": 0.9921, + "learning_rate": 3.921309162445591e-06, + "loss": 0.8798, "step": 25293 }, { - "epoch": 0.7177639046538025, + "epoch": 0.7167672645866984, "grad_norm": 0.0, - "learning_rate": 3.895082827048641e-06, - "loss": 0.845, + "learning_rate": 3.920580429171132e-06, + "loss": 0.7483, "step": 25294 }, { - "epoch": 0.7177922814982974, + "epoch": 0.7167956020289609, "grad_norm": 0.0, - "learning_rate": 3.894354922195288e-06, - "loss": 0.9273, + "learning_rate": 3.919851747105879e-06, + "loss": 0.86, "step": 25295 }, { - "epoch": 0.7178206583427923, + "epoch": 0.7168239394712234, "grad_norm": 0.0, - "learning_rate": 3.8936270689165945e-06, - "loss": 0.9013, + "learning_rate": 3.9191231162559715e-06, + "loss": 0.8668, "step": 25296 }, { - "epoch": 0.7178490351872872, + "epoch": 0.7168522769134857, "grad_norm": 0.0, - "learning_rate": 3.89289926721871e-06, - "loss": 0.9023, + "learning_rate": 3.9183945366275524e-06, + "loss": 0.8997, "step": 25297 }, { - "epoch": 0.717877412031782, + "epoch": 0.7168806143557482, "grad_norm": 0.0, - "learning_rate": 3.892171517107777e-06, - "loss": 0.8507, + "learning_rate": 3.91766600822675e-06, + "loss": 0.808, "step": 25298 }, { - "epoch": 0.717905788876277, + "epoch": 0.7169089517980107, "grad_norm": 0.0, - "learning_rate": 3.891443818589946e-06, - "loss": 0.7839, + "learning_rate": 3.916937531059706e-06, + "loss": 0.8499, "step": 25299 }, { - "epoch": 0.7179341657207718, + "epoch": 0.7169372892402732, "grad_norm": 0.0, - "learning_rate": 3.890716171671367e-06, - "loss": 0.8179, + "learning_rate": 3.916209105132559e-06, + "loss": 0.7916, "step": 25300 }, { - "epoch": 0.7179625425652667, + "epoch": 0.7169656266825356, "grad_norm": 0.0, - "learning_rate": 3.889988576358179e-06, - "loss": 0.7959, + "learning_rate": 3.915480730451438e-06, + "loss": 0.8275, "step": 25301 }, { - "epoch": 0.7179909194097617, + "epoch": 0.7169939641247981, "grad_norm": 0.0, - "learning_rate": 3.889261032656533e-06, - "loss": 0.7565, + "learning_rate": 3.914752407022487e-06, + "loss": 0.9285, "step": 25302 }, { - "epoch": 0.7180192962542565, + "epoch": 0.7170223015670606, "grad_norm": 0.0, - "learning_rate": 3.888533540572577e-06, - "loss": 0.7995, + "learning_rate": 3.914024134851833e-06, + "loss": 0.8262, "step": 25303 }, { - "epoch": 0.7180476730987514, + "epoch": 0.717050639009323, "grad_norm": 0.0, - "learning_rate": 3.887806100112449e-06, - "loss": 0.9327, + "learning_rate": 3.9132959139456125e-06, + "loss": 0.8784, "step": 25304 }, { - "epoch": 0.7180760499432464, + "epoch": 0.7170789764515855, "grad_norm": 0.0, - "learning_rate": 3.8870787112822974e-06, - "loss": 0.8463, + "learning_rate": 3.912567744309961e-06, + "loss": 0.8057, "step": 25305 }, { - "epoch": 0.7181044267877412, + "epoch": 0.717107313893848, "grad_norm": 0.0, - "learning_rate": 3.886351374088271e-06, - "loss": 0.7232, + "learning_rate": 3.911839625951012e-06, + "loss": 0.7997, "step": 25306 }, { - "epoch": 0.7181328036322361, + "epoch": 0.7171356513361105, "grad_norm": 0.0, - "learning_rate": 3.885624088536501e-06, - "loss": 0.8679, + "learning_rate": 3.911111558874898e-06, + "loss": 0.8064, "step": 25307 }, { - "epoch": 0.7181611804767309, + "epoch": 0.7171639887783728, "grad_norm": 0.0, - "learning_rate": 3.8848968546331475e-06, - "loss": 0.7796, + "learning_rate": 3.910383543087757e-06, + "loss": 0.8696, "step": 25308 }, { - "epoch": 0.7181895573212259, + "epoch": 0.7171923262206353, "grad_norm": 0.0, - "learning_rate": 3.884169672384342e-06, - "loss": 0.8734, + "learning_rate": 3.909655578595714e-06, + "loss": 0.8445, "step": 25309 }, { - "epoch": 0.7182179341657208, + "epoch": 0.7172206636628978, "grad_norm": 0.0, - "learning_rate": 3.88344254179623e-06, - "loss": 0.9233, + "learning_rate": 3.9089276654049045e-06, + "loss": 0.7503, "step": 25310 }, { - "epoch": 0.7182463110102156, + "epoch": 0.7172490011051602, "grad_norm": 0.0, - "learning_rate": 3.882715462874957e-06, - "loss": 0.8718, + "learning_rate": 3.9081998035214576e-06, + "loss": 0.9069, "step": 25311 }, { - "epoch": 0.7182746878547106, + "epoch": 0.7172773385474227, "grad_norm": 0.0, - "learning_rate": 3.881988435626658e-06, - "loss": 0.9598, + "learning_rate": 3.907471992951505e-06, + "loss": 0.7502, "step": 25312 }, { - "epoch": 0.7183030646992055, + "epoch": 0.7173056759896852, "grad_norm": 0.0, - "learning_rate": 3.881261460057477e-06, - "loss": 0.8085, + "learning_rate": 3.9067442337011816e-06, + "loss": 0.8666, "step": 25313 }, { - "epoch": 0.7183314415437003, + "epoch": 0.7173340134319476, "grad_norm": 0.0, - "learning_rate": 3.88053453617356e-06, - "loss": 0.8652, + "learning_rate": 3.9060165257766116e-06, + "loss": 0.8911, "step": 25314 }, { - "epoch": 0.7183598183881952, + "epoch": 0.7173623508742101, "grad_norm": 0.0, - "learning_rate": 3.879807663981039e-06, - "loss": 0.9068, + "learning_rate": 3.9052888691839305e-06, + "loss": 0.6581, "step": 25315 }, { - "epoch": 0.7183881952326902, + "epoch": 0.7173906883164726, "grad_norm": 0.0, - "learning_rate": 3.879080843486057e-06, - "loss": 0.9095, + "learning_rate": 3.904561263929261e-06, + "loss": 0.8192, "step": 25316 }, { - "epoch": 0.718416572077185, + "epoch": 0.7174190257587351, "grad_norm": 0.0, - "learning_rate": 3.878354074694759e-06, - "loss": 0.8729, + "learning_rate": 3.903833710018735e-06, + "loss": 0.8827, "step": 25317 }, { - "epoch": 0.7184449489216799, + "epoch": 0.7174473632009974, "grad_norm": 0.0, - "learning_rate": 3.877627357613276e-06, - "loss": 0.7679, + "learning_rate": 3.903106207458482e-06, + "loss": 0.8973, "step": 25318 }, { - "epoch": 0.7184733257661748, + "epoch": 0.7174757006432599, "grad_norm": 0.0, - "learning_rate": 3.876900692247749e-06, - "loss": 0.7149, + "learning_rate": 3.902378756254629e-06, + "loss": 0.8601, "step": 25319 }, { - "epoch": 0.7185017026106697, + "epoch": 0.7175040380855224, "grad_norm": 0.0, - "learning_rate": 3.876174078604321e-06, - "loss": 0.8078, + "learning_rate": 3.901651356413304e-06, + "loss": 0.7753, "step": 25320 }, { - "epoch": 0.7185300794551646, + "epoch": 0.7175323755277848, "grad_norm": 0.0, - "learning_rate": 3.8754475166891225e-06, - "loss": 0.6939, + "learning_rate": 3.900924007940638e-06, + "loss": 0.8154, "step": 25321 }, { - "epoch": 0.7185584562996595, + "epoch": 0.7175607129700473, "grad_norm": 0.0, - "learning_rate": 3.874721006508293e-06, - "loss": 0.7739, + "learning_rate": 3.900196710842751e-06, + "loss": 0.7994, "step": 25322 }, { - "epoch": 0.7185868331441544, + "epoch": 0.7175890504123098, "grad_norm": 0.0, - "learning_rate": 3.873994548067972e-06, - "loss": 0.8407, + "learning_rate": 3.899469465125774e-06, + "loss": 0.8758, "step": 25323 }, { - "epoch": 0.7186152099886493, + "epoch": 0.7176173878545723, "grad_norm": 0.0, - "learning_rate": 3.873268141374293e-06, - "loss": 0.6963, + "learning_rate": 3.898742270795829e-06, + "loss": 0.8552, "step": 25324 }, { - "epoch": 0.7186435868331441, + "epoch": 0.7176457252968347, "grad_norm": 0.0, - "learning_rate": 3.872541786433398e-06, - "loss": 0.9436, + "learning_rate": 3.898015127859043e-06, + "loss": 0.7925, "step": 25325 }, { - "epoch": 0.7186719636776391, + "epoch": 0.7176740627390972, "grad_norm": 0.0, - "learning_rate": 3.871815483251414e-06, - "loss": 0.7693, + "learning_rate": 3.897288036321545e-06, + "loss": 0.8209, "step": 25326 }, { - "epoch": 0.7187003405221339, + "epoch": 0.7177024001813597, "grad_norm": 0.0, - "learning_rate": 3.8710892318344804e-06, - "loss": 0.7878, + "learning_rate": 3.896560996189454e-06, + "loss": 0.9202, "step": 25327 }, { - "epoch": 0.7187287173666288, + "epoch": 0.717730737623622, "grad_norm": 0.0, - "learning_rate": 3.870363032188735e-06, - "loss": 0.8222, + "learning_rate": 3.895834007468894e-06, + "loss": 0.8266, "step": 25328 }, { - "epoch": 0.7187570942111238, + "epoch": 0.7177590750658845, "grad_norm": 0.0, - "learning_rate": 3.869636884320306e-06, - "loss": 0.8405, + "learning_rate": 3.895107070165995e-06, + "loss": 0.8151, "step": 25329 }, { - "epoch": 0.7187854710556186, + "epoch": 0.717787412508147, "grad_norm": 0.0, - "learning_rate": 3.868910788235328e-06, - "loss": 0.7901, + "learning_rate": 3.894380184286874e-06, + "loss": 0.8499, "step": 25330 }, { - "epoch": 0.7188138479001135, + "epoch": 0.7178157499504094, "grad_norm": 0.0, - "learning_rate": 3.868184743939941e-06, - "loss": 0.7673, + "learning_rate": 3.8936533498376535e-06, + "loss": 0.7503, "step": 25331 }, { - "epoch": 0.7188422247446083, + "epoch": 0.7178440873926719, "grad_norm": 0.0, - "learning_rate": 3.86745875144027e-06, - "loss": 0.931, + "learning_rate": 3.89292656682446e-06, + "loss": 0.7229, "step": 25332 }, { - "epoch": 0.7188706015891033, + "epoch": 0.7178724248349344, "grad_norm": 0.0, - "learning_rate": 3.86673281074245e-06, - "loss": 0.8325, + "learning_rate": 3.892199835253413e-06, + "loss": 0.8606, "step": 25333 }, { - "epoch": 0.7188989784335982, + "epoch": 0.7179007622771969, "grad_norm": 0.0, - "learning_rate": 3.8660069218526165e-06, - "loss": 0.742, + "learning_rate": 3.89147315513064e-06, + "loss": 0.7603, "step": 25334 }, { - "epoch": 0.718927355278093, + "epoch": 0.7179290997194593, "grad_norm": 0.0, - "learning_rate": 3.865281084776895e-06, - "loss": 0.7761, + "learning_rate": 3.890746526462252e-06, + "loss": 0.7909, "step": 25335 }, { - "epoch": 0.718955732122588, + "epoch": 0.7179574371617218, "grad_norm": 0.0, - "learning_rate": 3.864555299521421e-06, - "loss": 0.7412, + "learning_rate": 3.890019949254378e-06, + "loss": 0.9529, "step": 25336 }, { - "epoch": 0.7189841089670829, + "epoch": 0.7179857746039843, "grad_norm": 0.0, - "learning_rate": 3.863829566092323e-06, - "loss": 0.8517, + "learning_rate": 3.889293423513132e-06, + "loss": 0.8305, "step": 25337 }, { - "epoch": 0.7190124858115777, + "epoch": 0.7180141120462467, "grad_norm": 0.0, - "learning_rate": 3.863103884495732e-06, - "loss": 0.8424, + "learning_rate": 3.8885669492446364e-06, + "loss": 0.7921, "step": 25338 }, { - "epoch": 0.7190408626560727, + "epoch": 0.7180424494885091, "grad_norm": 0.0, - "learning_rate": 3.862378254737783e-06, - "loss": 0.8241, + "learning_rate": 3.887840526455014e-06, + "loss": 0.8626, "step": 25339 }, { - "epoch": 0.7190692395005676, + "epoch": 0.7180707869307716, "grad_norm": 0.0, - "learning_rate": 3.861652676824597e-06, - "loss": 0.7425, + "learning_rate": 3.887114155150377e-06, + "loss": 0.8374, "step": 25340 }, { - "epoch": 0.7190976163450624, + "epoch": 0.7180991243730341, "grad_norm": 0.0, - "learning_rate": 3.860927150762307e-06, - "loss": 0.8264, + "learning_rate": 3.886387835336849e-06, + "loss": 0.788, "step": 25341 }, { - "epoch": 0.7191259931895573, + "epoch": 0.7181274618152965, "grad_norm": 0.0, - "learning_rate": 3.860201676557045e-06, - "loss": 0.8074, + "learning_rate": 3.885661567020549e-06, + "loss": 0.8862, "step": 25342 }, { - "epoch": 0.7191543700340522, + "epoch": 0.718155799257559, "grad_norm": 0.0, - "learning_rate": 3.859476254214933e-06, - "loss": 0.9444, + "learning_rate": 3.884935350207588e-06, + "loss": 0.8098, "step": 25343 }, { - "epoch": 0.7191827468785471, + "epoch": 0.7181841366998215, "grad_norm": 0.0, - "learning_rate": 3.858750883742098e-06, - "loss": 0.9549, + "learning_rate": 3.884209184904088e-06, + "loss": 0.7806, "step": 25344 }, { - "epoch": 0.719211123723042, + "epoch": 0.7182124741420839, "grad_norm": 0.0, - "learning_rate": 3.858025565144676e-06, - "loss": 0.8806, + "learning_rate": 3.883483071116165e-06, + "loss": 0.8551, "step": 25345 }, { - "epoch": 0.7192395005675369, + "epoch": 0.7182408115843464, "grad_norm": 0.0, - "learning_rate": 3.8573002984287845e-06, - "loss": 0.8863, + "learning_rate": 3.882757008849936e-06, + "loss": 0.7328, "step": 25346 }, { - "epoch": 0.7192678774120318, + "epoch": 0.7182691490266089, "grad_norm": 0.0, - "learning_rate": 3.856575083600553e-06, - "loss": 0.8355, + "learning_rate": 3.882030998111518e-06, + "loss": 0.8282, "step": 25347 }, { - "epoch": 0.7192962542565267, + "epoch": 0.7182974864688714, "grad_norm": 0.0, - "learning_rate": 3.855849920666111e-06, - "loss": 0.9126, + "learning_rate": 3.881305038907023e-06, + "loss": 0.8528, "step": 25348 }, { - "epoch": 0.7193246311010215, + "epoch": 0.7183258239111338, "grad_norm": 0.0, - "learning_rate": 3.8551248096315785e-06, - "loss": 0.7867, + "learning_rate": 3.880579131242567e-06, + "loss": 0.8967, "step": 25349 }, { - "epoch": 0.7193530079455165, + "epoch": 0.7183541613533962, "grad_norm": 0.0, - "learning_rate": 3.8543997505030826e-06, - "loss": 0.9125, + "learning_rate": 3.879853275124269e-06, + "loss": 0.8871, "step": 25350 }, { - "epoch": 0.7193813847900113, + "epoch": 0.7183824987956587, "grad_norm": 0.0, - "learning_rate": 3.853674743286752e-06, - "loss": 0.8604, + "learning_rate": 3.879127470558236e-06, + "loss": 0.8216, "step": 25351 }, { - "epoch": 0.7194097616345062, + "epoch": 0.7184108362379211, "grad_norm": 0.0, - "learning_rate": 3.852949787988704e-06, - "loss": 0.8495, + "learning_rate": 3.8784017175505886e-06, + "loss": 0.7688, "step": 25352 }, { - "epoch": 0.7194381384790012, + "epoch": 0.7184391736801836, "grad_norm": 0.0, - "learning_rate": 3.852224884615061e-06, - "loss": 0.8326, + "learning_rate": 3.877676016107433e-06, + "loss": 0.8034, "step": 25353 }, { - "epoch": 0.719466515323496, + "epoch": 0.7184675111224461, "grad_norm": 0.0, - "learning_rate": 3.851500033171958e-06, - "loss": 0.8169, + "learning_rate": 3.8769503662348865e-06, + "loss": 0.7658, "step": 25354 }, { - "epoch": 0.7194948921679909, + "epoch": 0.7184958485647085, "grad_norm": 0.0, - "learning_rate": 3.850775233665507e-06, - "loss": 0.8631, + "learning_rate": 3.876224767939064e-06, + "loss": 0.7475, "step": 25355 }, { - "epoch": 0.7195232690124859, + "epoch": 0.718524186006971, "grad_norm": 0.0, - "learning_rate": 3.850050486101834e-06, - "loss": 0.6999, + "learning_rate": 3.8754992212260714e-06, + "loss": 0.7917, "step": 25356 }, { - "epoch": 0.7195516458569807, + "epoch": 0.7185525234492335, "grad_norm": 0.0, - "learning_rate": 3.8493257904870654e-06, - "loss": 0.962, + "learning_rate": 3.874773726102022e-06, + "loss": 0.78, "step": 25357 }, { - "epoch": 0.7195800227014756, + "epoch": 0.718580860891496, "grad_norm": 0.0, - "learning_rate": 3.8486011468273145e-06, - "loss": 0.7861, + "learning_rate": 3.874048282573029e-06, + "loss": 0.794, "step": 25358 }, { - "epoch": 0.7196083995459704, + "epoch": 0.7186091983337584, "grad_norm": 0.0, - "learning_rate": 3.847876555128706e-06, - "loss": 0.9084, + "learning_rate": 3.873322890645202e-06, + "loss": 0.838, "step": 25359 }, { - "epoch": 0.7196367763904654, + "epoch": 0.7186375357760209, "grad_norm": 0.0, - "learning_rate": 3.847152015397363e-06, - "loss": 0.7371, + "learning_rate": 3.872597550324654e-06, + "loss": 0.735, "step": 25360 }, { - "epoch": 0.7196651532349603, + "epoch": 0.7186658732182833, "grad_norm": 0.0, - "learning_rate": 3.846427527639401e-06, - "loss": 0.9031, + "learning_rate": 3.871872261617489e-06, + "loss": 0.7533, "step": 25361 }, { - "epoch": 0.7196935300794551, + "epoch": 0.7186942106605457, "grad_norm": 0.0, - "learning_rate": 3.845703091860947e-06, - "loss": 0.7877, + "learning_rate": 3.8711470245298195e-06, + "loss": 0.8858, "step": 25362 }, { - "epoch": 0.7197219069239501, + "epoch": 0.7187225481028082, "grad_norm": 0.0, - "learning_rate": 3.8449787080681115e-06, - "loss": 0.846, + "learning_rate": 3.870421839067759e-06, + "loss": 0.8969, "step": 25363 }, { - "epoch": 0.719750283768445, + "epoch": 0.7187508855450707, "grad_norm": 0.0, - "learning_rate": 3.844254376267017e-06, - "loss": 0.8186, + "learning_rate": 3.869696705237407e-06, + "loss": 0.7277, "step": 25364 }, { - "epoch": 0.7197786606129398, + "epoch": 0.7187792229873332, "grad_norm": 0.0, - "learning_rate": 3.843530096463786e-06, - "loss": 0.8643, + "learning_rate": 3.868971623044881e-06, + "loss": 0.763, "step": 25365 }, { - "epoch": 0.7198070374574347, + "epoch": 0.7188075604295956, "grad_norm": 0.0, - "learning_rate": 3.84280586866453e-06, - "loss": 0.7289, + "learning_rate": 3.8682465924962776e-06, + "loss": 0.7481, "step": 25366 }, { - "epoch": 0.7198354143019297, + "epoch": 0.7188358978718581, "grad_norm": 0.0, - "learning_rate": 3.842081692875369e-06, - "loss": 0.8043, + "learning_rate": 3.867521613597712e-06, + "loss": 0.7876, "step": 25367 }, { - "epoch": 0.7198637911464245, + "epoch": 0.7188642353141206, "grad_norm": 0.0, - "learning_rate": 3.841357569102421e-06, - "loss": 0.9029, + "learning_rate": 3.866796686355292e-06, + "loss": 0.9061, "step": 25368 }, { - "epoch": 0.7198921679909194, + "epoch": 0.718892572756383, "grad_norm": 0.0, - "learning_rate": 3.840633497351802e-06, - "loss": 0.8248, + "learning_rate": 3.866071810775118e-06, + "loss": 0.8193, "step": 25369 }, { - "epoch": 0.7199205448354143, + "epoch": 0.7189209101986455, "grad_norm": 0.0, - "learning_rate": 3.83990947762963e-06, - "loss": 0.9258, + "learning_rate": 3.8653469868632986e-06, + "loss": 0.812, "step": 25370 }, { - "epoch": 0.7199489216799092, + "epoch": 0.718949247640908, "grad_norm": 0.0, - "learning_rate": 3.839185509942023e-06, - "loss": 0.8736, + "learning_rate": 3.86462221462594e-06, + "loss": 0.8245, "step": 25371 }, { - "epoch": 0.7199772985244041, + "epoch": 0.7189775850831704, "grad_norm": 0.0, - "learning_rate": 3.8384615942950885e-06, - "loss": 0.7949, + "learning_rate": 3.863897494069147e-06, + "loss": 0.8608, "step": 25372 }, { - "epoch": 0.7200056753688989, + "epoch": 0.7190059225254328, "grad_norm": 0.0, - "learning_rate": 3.837737730694946e-06, - "loss": 0.8441, + "learning_rate": 3.863172825199026e-06, + "loss": 0.8206, "step": 25373 }, { - "epoch": 0.7200340522133939, + "epoch": 0.7190342599676953, "grad_norm": 0.0, - "learning_rate": 3.837013919147714e-06, - "loss": 0.8639, + "learning_rate": 3.862448208021677e-06, + "loss": 0.9344, "step": 25374 }, { - "epoch": 0.7200624290578888, + "epoch": 0.7190625974099578, "grad_norm": 0.0, - "learning_rate": 3.836290159659498e-06, - "loss": 0.8333, + "learning_rate": 3.861723642543206e-06, + "loss": 0.8201, "step": 25375 }, { - "epoch": 0.7200908059023836, + "epoch": 0.7190909348522202, "grad_norm": 0.0, - "learning_rate": 3.835566452236416e-06, - "loss": 0.8177, + "learning_rate": 3.860999128769719e-06, + "loss": 0.9171, "step": 25376 }, { - "epoch": 0.7201191827468786, + "epoch": 0.7191192722944827, "grad_norm": 0.0, - "learning_rate": 3.834842796884586e-06, - "loss": 0.8101, + "learning_rate": 3.860274666707312e-06, + "loss": 0.8531, "step": 25377 }, { - "epoch": 0.7201475595913734, + "epoch": 0.7191476097367452, "grad_norm": 0.0, - "learning_rate": 3.834119193610112e-06, - "loss": 0.7855, + "learning_rate": 3.8595502563620924e-06, + "loss": 0.8717, "step": 25378 }, { - "epoch": 0.7201759364358683, + "epoch": 0.7191759471790076, "grad_norm": 0.0, - "learning_rate": 3.833395642419111e-06, - "loss": 0.7764, + "learning_rate": 3.858825897740164e-06, + "loss": 0.7526, "step": 25379 }, { - "epoch": 0.7202043132803633, + "epoch": 0.7192042846212701, "grad_norm": 0.0, - "learning_rate": 3.8326721433176975e-06, - "loss": 0.9037, + "learning_rate": 3.858101590847623e-06, + "loss": 0.7155, "step": 25380 }, { - "epoch": 0.7202326901248581, + "epoch": 0.7192326220635326, "grad_norm": 0.0, - "learning_rate": 3.831948696311974e-06, - "loss": 0.8238, + "learning_rate": 3.857377335690575e-06, + "loss": 0.7943, "step": 25381 }, { - "epoch": 0.720261066969353, + "epoch": 0.719260959505795, "grad_norm": 0.0, - "learning_rate": 3.83122530140806e-06, - "loss": 0.8806, + "learning_rate": 3.856653132275117e-06, + "loss": 0.825, "step": 25382 }, { - "epoch": 0.7202894438138479, + "epoch": 0.7192892969480574, "grad_norm": 0.0, - "learning_rate": 3.8305019586120615e-06, - "loss": 0.778, + "learning_rate": 3.85592898060735e-06, + "loss": 0.8064, "step": 25383 }, { - "epoch": 0.7203178206583428, + "epoch": 0.7193176343903199, "grad_norm": 0.0, - "learning_rate": 3.82977866793009e-06, - "loss": 0.8747, + "learning_rate": 3.855204880693374e-06, + "loss": 0.8164, "step": 25384 }, { - "epoch": 0.7203461975028377, + "epoch": 0.7193459718325824, "grad_norm": 0.0, - "learning_rate": 3.8290554293682614e-06, - "loss": 0.754, + "learning_rate": 3.85448083253929e-06, + "loss": 0.6662, "step": 25385 }, { - "epoch": 0.7203745743473325, + "epoch": 0.7193743092748448, "grad_norm": 0.0, - "learning_rate": 3.828332242932673e-06, - "loss": 0.8888, + "learning_rate": 3.853756836151197e-06, + "loss": 0.7895, "step": 25386 }, { - "epoch": 0.7204029511918275, + "epoch": 0.7194026467171073, "grad_norm": 0.0, - "learning_rate": 3.827609108629441e-06, - "loss": 0.8067, + "learning_rate": 3.8530328915351946e-06, + "loss": 0.7607, "step": 25387 }, { - "epoch": 0.7204313280363224, + "epoch": 0.7194309841593698, "grad_norm": 0.0, - "learning_rate": 3.826886026464676e-06, - "loss": 0.8584, + "learning_rate": 3.852308998697375e-06, + "loss": 0.7904, "step": 25388 }, { - "epoch": 0.7204597048808172, + "epoch": 0.7194593216016323, "grad_norm": 0.0, - "learning_rate": 3.826162996444477e-06, - "loss": 0.8275, + "learning_rate": 3.851585157643845e-06, + "loss": 0.8665, "step": 25389 }, { - "epoch": 0.7204880817253121, + "epoch": 0.7194876590438947, "grad_norm": 0.0, - "learning_rate": 3.825440018574958e-06, - "loss": 0.7658, + "learning_rate": 3.850861368380691e-06, + "loss": 0.7955, "step": 25390 }, { - "epoch": 0.7205164585698071, + "epoch": 0.7195159964861572, "grad_norm": 0.0, - "learning_rate": 3.824717092862228e-06, - "loss": 0.793, + "learning_rate": 3.850137630914015e-06, + "loss": 0.9097, "step": 25391 }, { - "epoch": 0.7205448354143019, + "epoch": 0.7195443339284197, "grad_norm": 0.0, - "learning_rate": 3.823994219312387e-06, - "loss": 0.7652, + "learning_rate": 3.849413945249918e-06, + "loss": 0.7656, "step": 25392 }, { - "epoch": 0.7205732122587968, + "epoch": 0.719572671370682, "grad_norm": 0.0, - "learning_rate": 3.823271397931544e-06, - "loss": 0.9409, + "learning_rate": 3.848690311394487e-06, + "loss": 0.7932, "step": 25393 }, { - "epoch": 0.7206015891032918, + "epoch": 0.7196010088129445, "grad_norm": 0.0, - "learning_rate": 3.82254862872581e-06, - "loss": 0.9435, + "learning_rate": 3.847966729353826e-06, + "loss": 0.7746, "step": 25394 }, { - "epoch": 0.7206299659477866, + "epoch": 0.719629346255207, "grad_norm": 0.0, - "learning_rate": 3.82182591170128e-06, - "loss": 0.8818, + "learning_rate": 3.847243199134022e-06, + "loss": 0.7977, "step": 25395 }, { - "epoch": 0.7206583427922815, + "epoch": 0.7196576836974695, "grad_norm": 0.0, - "learning_rate": 3.821103246864065e-06, - "loss": 0.8035, + "learning_rate": 3.846519720741173e-06, + "loss": 0.7488, "step": 25396 }, { - "epoch": 0.7206867196367764, + "epoch": 0.7196860211397319, "grad_norm": 0.0, - "learning_rate": 3.820380634220272e-06, - "loss": 0.862, + "learning_rate": 3.845796294181374e-06, + "loss": 0.8185, "step": 25397 }, { - "epoch": 0.7207150964812713, + "epoch": 0.7197143585819944, "grad_norm": 0.0, - "learning_rate": 3.819658073775998e-06, - "loss": 0.8205, + "learning_rate": 3.845072919460717e-06, + "loss": 0.7934, "step": 25398 }, { - "epoch": 0.7207434733257662, + "epoch": 0.7197426960242569, "grad_norm": 0.0, - "learning_rate": 3.81893556553735e-06, - "loss": 0.8755, + "learning_rate": 3.844349596585299e-06, + "loss": 0.9194, "step": 25399 }, { - "epoch": 0.720771850170261, + "epoch": 0.7197710334665193, "grad_norm": 0.0, - "learning_rate": 3.818213109510432e-06, - "loss": 0.7698, + "learning_rate": 3.843626325561211e-06, + "loss": 0.6618, "step": 25400 }, { - "epoch": 0.720800227014756, + "epoch": 0.7197993709087818, "grad_norm": 0.0, - "learning_rate": 3.817490705701346e-06, - "loss": 0.8004, + "learning_rate": 3.842903106394542e-06, + "loss": 0.8481, "step": 25401 }, { - "epoch": 0.7208286038592508, + "epoch": 0.7198277083510443, "grad_norm": 0.0, - "learning_rate": 3.816768354116196e-06, - "loss": 0.8611, + "learning_rate": 3.842179939091389e-06, + "loss": 0.7711, "step": 25402 }, { - "epoch": 0.7208569807037457, + "epoch": 0.7198560457933066, "grad_norm": 0.0, - "learning_rate": 3.8160460547610785e-06, - "loss": 0.8779, + "learning_rate": 3.841456823657839e-06, + "loss": 0.7678, "step": 25403 }, { - "epoch": 0.7208853575482407, + "epoch": 0.7198843832355691, "grad_norm": 0.0, - "learning_rate": 3.8153238076420995e-06, - "loss": 0.8561, + "learning_rate": 3.840733760099985e-06, + "loss": 0.9597, "step": 25404 }, { - "epoch": 0.7209137343927355, + "epoch": 0.7199127206778316, "grad_norm": 0.0, - "learning_rate": 3.8146016127653605e-06, - "loss": 0.8655, + "learning_rate": 3.84001074842392e-06, + "loss": 0.7599, "step": 25405 }, { - "epoch": 0.7209421112372304, + "epoch": 0.7199410581200941, "grad_norm": 0.0, - "learning_rate": 3.8138794701369562e-06, - "loss": 0.8322, + "learning_rate": 3.8392877886357296e-06, + "loss": 0.8021, "step": 25406 }, { - "epoch": 0.7209704880817253, + "epoch": 0.7199693955623565, "grad_norm": 0.0, - "learning_rate": 3.8131573797629906e-06, - "loss": 0.8502, + "learning_rate": 3.838564880741506e-06, + "loss": 0.8451, "step": 25407 }, { - "epoch": 0.7209988649262202, + "epoch": 0.719997733004619, "grad_norm": 0.0, - "learning_rate": 3.8124353416495653e-06, - "loss": 0.7471, + "learning_rate": 3.837842024747341e-06, + "loss": 0.9204, "step": 25408 }, { - "epoch": 0.7210272417707151, + "epoch": 0.7200260704468815, "grad_norm": 0.0, - "learning_rate": 3.8117133558027743e-06, - "loss": 0.8066, + "learning_rate": 3.837119220659318e-06, + "loss": 0.8494, "step": 25409 }, { - "epoch": 0.72105561861521, + "epoch": 0.7200544078891439, "grad_norm": 0.0, - "learning_rate": 3.810991422228718e-06, - "loss": 0.8997, + "learning_rate": 3.836396468483528e-06, + "loss": 0.8271, "step": 25410 }, { - "epoch": 0.7210839954597049, + "epoch": 0.7200827453314064, "grad_norm": 0.0, - "learning_rate": 3.8102695409335e-06, - "loss": 0.7708, + "learning_rate": 3.835673768226059e-06, + "loss": 0.752, "step": 25411 }, { - "epoch": 0.7211123723041998, + "epoch": 0.7201110827736689, "grad_norm": 0.0, - "learning_rate": 3.8095477119232092e-06, - "loss": 0.9176, + "learning_rate": 3.834951119893e-06, + "loss": 0.9171, "step": 25412 }, { - "epoch": 0.7211407491486946, + "epoch": 0.7201394202159314, "grad_norm": 0.0, - "learning_rate": 3.808825935203947e-06, - "loss": 0.8525, + "learning_rate": 3.83422852349044e-06, + "loss": 0.8375, "step": 25413 }, { - "epoch": 0.7211691259931896, + "epoch": 0.7201677576581937, "grad_norm": 0.0, - "learning_rate": 3.80810421078181e-06, - "loss": 0.9028, + "learning_rate": 3.8335059790244585e-06, + "loss": 0.8471, "step": 25414 }, { - "epoch": 0.7211975028376845, + "epoch": 0.7201960951004562, "grad_norm": 0.0, - "learning_rate": 3.807382538662896e-06, - "loss": 0.8686, + "learning_rate": 3.8327834865011475e-06, + "loss": 0.8956, "step": 25415 }, { - "epoch": 0.7212258796821793, + "epoch": 0.7202244325427187, "grad_norm": 0.0, - "learning_rate": 3.8066609188533033e-06, - "loss": 0.8374, + "learning_rate": 3.832061045926594e-06, + "loss": 0.8236, "step": 25416 }, { - "epoch": 0.7212542565266742, + "epoch": 0.7202527699849811, "grad_norm": 0.0, - "learning_rate": 3.8059393513591203e-06, - "loss": 0.8454, + "learning_rate": 3.831338657306877e-06, + "loss": 0.8432, "step": 25417 }, { - "epoch": 0.7212826333711692, + "epoch": 0.7202811074272436, "grad_norm": 0.0, - "learning_rate": 3.8052178361864467e-06, - "loss": 0.8446, + "learning_rate": 3.830616320648089e-06, + "loss": 0.9287, "step": 25418 }, { - "epoch": 0.721311010215664, + "epoch": 0.7203094448695061, "grad_norm": 0.0, - "learning_rate": 3.8044963733413797e-06, - "loss": 0.8345, + "learning_rate": 3.829894035956306e-06, + "loss": 0.8253, "step": 25419 }, { - "epoch": 0.7213393870601589, + "epoch": 0.7203377823117686, "grad_norm": 0.0, - "learning_rate": 3.803774962830007e-06, - "loss": 0.8116, + "learning_rate": 3.829171803237618e-06, + "loss": 0.851, "step": 25420 }, { - "epoch": 0.7213677639046538, + "epoch": 0.720366119754031, "grad_norm": 0.0, - "learning_rate": 3.8030536046584253e-06, - "loss": 0.9447, + "learning_rate": 3.8284496224981116e-06, + "loss": 0.839, "step": 25421 }, { - "epoch": 0.7213961407491487, + "epoch": 0.7203944571962935, "grad_norm": 0.0, - "learning_rate": 3.8023322988327325e-06, - "loss": 0.7523, + "learning_rate": 3.827727493743861e-06, + "loss": 0.8826, "step": 25422 }, { - "epoch": 0.7214245175936436, + "epoch": 0.720422794638556, "grad_norm": 0.0, - "learning_rate": 3.801611045359014e-06, - "loss": 0.8134, + "learning_rate": 3.827005416980955e-06, + "loss": 0.8314, "step": 25423 }, { - "epoch": 0.7214528944381384, + "epoch": 0.7204511320808183, "grad_norm": 0.0, - "learning_rate": 3.800889844243365e-06, - "loss": 0.8263, + "learning_rate": 3.826283392215473e-06, + "loss": 0.8381, "step": 25424 }, { - "epoch": 0.7214812712826334, + "epoch": 0.7204794695230808, "grad_norm": 0.0, - "learning_rate": 3.8001686954918815e-06, - "loss": 0.8946, + "learning_rate": 3.8255614194535e-06, + "loss": 0.8942, "step": 25425 }, { - "epoch": 0.7215096481271283, + "epoch": 0.7205078069653433, "grad_norm": 0.0, - "learning_rate": 3.7994475991106496e-06, - "loss": 0.8389, + "learning_rate": 3.824839498701118e-06, + "loss": 0.804, "step": 25426 }, { - "epoch": 0.7215380249716231, + "epoch": 0.7205361444076057, "grad_norm": 0.0, - "learning_rate": 3.7987265551057606e-06, - "loss": 0.8978, + "learning_rate": 3.824117629964404e-06, + "loss": 0.784, "step": 25427 }, { - "epoch": 0.7215664018161181, + "epoch": 0.7205644818498682, "grad_norm": 0.0, - "learning_rate": 3.798005563483312e-06, - "loss": 0.8049, + "learning_rate": 3.823395813249439e-06, + "loss": 0.7995, "step": 25428 }, { - "epoch": 0.721594778660613, + "epoch": 0.7205928192921307, "grad_norm": 0.0, - "learning_rate": 3.7972846242493823e-06, - "loss": 0.9158, + "learning_rate": 3.822674048562309e-06, + "loss": 0.9778, "step": 25429 }, { - "epoch": 0.7216231555051078, + "epoch": 0.7206211567343932, "grad_norm": 0.0, - "learning_rate": 3.796563737410074e-06, - "loss": 0.8361, + "learning_rate": 3.821952335909086e-06, + "loss": 0.8684, "step": 25430 }, { - "epoch": 0.7216515323496028, + "epoch": 0.7206494941766556, "grad_norm": 0.0, - "learning_rate": 3.795842902971468e-06, - "loss": 0.9321, + "learning_rate": 3.821230675295856e-06, + "loss": 0.7611, "step": 25431 }, { - "epoch": 0.7216799091940976, + "epoch": 0.7206778316189181, "grad_norm": 0.0, - "learning_rate": 3.795122120939656e-06, - "loss": 0.799, + "learning_rate": 3.820509066728691e-06, + "loss": 0.8024, "step": 25432 }, { - "epoch": 0.7217082860385925, + "epoch": 0.7207061690611806, "grad_norm": 0.0, - "learning_rate": 3.79440139132073e-06, - "loss": 0.7635, + "learning_rate": 3.819787510213673e-06, + "loss": 0.8854, "step": 25433 }, { - "epoch": 0.7217366628830874, + "epoch": 0.720734506503443, "grad_norm": 0.0, - "learning_rate": 3.79368071412077e-06, - "loss": 0.8005, + "learning_rate": 3.819066005756883e-06, + "loss": 0.9162, "step": 25434 }, { - "epoch": 0.7217650397275823, + "epoch": 0.7207628439457054, "grad_norm": 0.0, - "learning_rate": 3.7929600893458684e-06, - "loss": 0.7845, + "learning_rate": 3.818344553364392e-06, + "loss": 0.7787, "step": 25435 }, { - "epoch": 0.7217934165720772, + "epoch": 0.7207911813879679, "grad_norm": 0.0, - "learning_rate": 3.7922395170021163e-06, - "loss": 0.8175, + "learning_rate": 3.817623153042281e-06, + "loss": 0.8629, "step": 25436 }, { - "epoch": 0.721821793416572, + "epoch": 0.7208195188302304, "grad_norm": 0.0, - "learning_rate": 3.7915189970955913e-06, - "loss": 0.7751, + "learning_rate": 3.816901804796624e-06, + "loss": 0.8081, "step": 25437 }, { - "epoch": 0.721850170261067, + "epoch": 0.7208478562724928, "grad_norm": 0.0, - "learning_rate": 3.7907985296323857e-06, - "loss": 0.8562, + "learning_rate": 3.8161805086335e-06, + "loss": 0.7637, "step": 25438 }, { - "epoch": 0.7218785471055619, + "epoch": 0.7208761937147553, "grad_norm": 0.0, - "learning_rate": 3.790078114618586e-06, - "loss": 0.8151, + "learning_rate": 3.815459264558988e-06, + "loss": 0.8999, "step": 25439 }, { - "epoch": 0.7219069239500567, + "epoch": 0.7209045311570178, "grad_norm": 0.0, - "learning_rate": 3.7893577520602733e-06, - "loss": 0.793, + "learning_rate": 3.814738072579156e-06, + "loss": 0.8361, "step": 25440 }, { - "epoch": 0.7219353007945516, + "epoch": 0.7209328685992802, "grad_norm": 0.0, - "learning_rate": 3.788637441963534e-06, - "loss": 0.8346, + "learning_rate": 3.814016932700081e-06, + "loss": 0.7152, "step": 25441 }, { - "epoch": 0.7219636776390466, + "epoch": 0.7209612060415427, "grad_norm": 0.0, - "learning_rate": 3.7879171843344576e-06, - "loss": 0.7983, + "learning_rate": 3.813295844927842e-06, + "loss": 0.8287, "step": 25442 }, { - "epoch": 0.7219920544835414, + "epoch": 0.7209895434838052, "grad_norm": 0.0, - "learning_rate": 3.787196979179121e-06, - "loss": 0.8746, + "learning_rate": 3.8125748092685066e-06, + "loss": 0.8747, "step": 25443 }, { - "epoch": 0.7220204313280363, + "epoch": 0.7210178809260677, "grad_norm": 0.0, - "learning_rate": 3.78647682650361e-06, - "loss": 0.7422, + "learning_rate": 3.811853825728151e-06, + "loss": 0.7555, "step": 25444 }, { - "epoch": 0.7220488081725313, + "epoch": 0.72104621836833, "grad_norm": 0.0, - "learning_rate": 3.7857567263140084e-06, - "loss": 0.7918, + "learning_rate": 3.8111328943128524e-06, + "loss": 0.9672, "step": 25445 }, { - "epoch": 0.7220771850170261, + "epoch": 0.7210745558105925, "grad_norm": 0.0, - "learning_rate": 3.7850366786163995e-06, - "loss": 0.8452, + "learning_rate": 3.810412015028676e-06, + "loss": 0.833, "step": 25446 }, { - "epoch": 0.722105561861521, + "epoch": 0.721102893252855, "grad_norm": 0.0, - "learning_rate": 3.784316683416869e-06, - "loss": 0.8644, + "learning_rate": 3.809691187881701e-06, + "loss": 0.8504, "step": 25447 }, { - "epoch": 0.7221339387060159, + "epoch": 0.7211312306951174, "grad_norm": 0.0, - "learning_rate": 3.7835967407214913e-06, - "loss": 0.8234, + "learning_rate": 3.8089704128779913e-06, + "loss": 0.8659, "step": 25448 }, { - "epoch": 0.7221623155505108, + "epoch": 0.7211595681373799, "grad_norm": 0.0, - "learning_rate": 3.782876850536351e-06, - "loss": 0.8561, + "learning_rate": 3.8082496900236244e-06, + "loss": 0.7809, "step": 25449 }, { - "epoch": 0.7221906923950057, + "epoch": 0.7211879055796424, "grad_norm": 0.0, - "learning_rate": 3.7821570128675323e-06, - "loss": 0.7919, + "learning_rate": 3.8075290193246685e-06, + "loss": 0.7847, "step": 25450 }, { - "epoch": 0.7222190692395005, + "epoch": 0.7212162430219048, "grad_norm": 0.0, - "learning_rate": 3.781437227721111e-06, - "loss": 0.7308, + "learning_rate": 3.8068084007871965e-06, + "loss": 0.7328, "step": 25451 }, { - "epoch": 0.7222474460839955, + "epoch": 0.7212445804641673, "grad_norm": 0.0, - "learning_rate": 3.780717495103168e-06, - "loss": 0.8889, + "learning_rate": 3.8060878344172793e-06, + "loss": 0.7584, "step": 25452 }, { - "epoch": 0.7222758229284904, + "epoch": 0.7212729179064298, "grad_norm": 0.0, - "learning_rate": 3.7799978150197867e-06, - "loss": 0.8182, + "learning_rate": 3.805367320220981e-06, + "loss": 0.8385, "step": 25453 }, { - "epoch": 0.7223041997729852, + "epoch": 0.7213012553486923, "grad_norm": 0.0, - "learning_rate": 3.77927818747704e-06, - "loss": 0.757, + "learning_rate": 3.804646858204375e-06, + "loss": 0.8528, "step": 25454 }, { - "epoch": 0.7223325766174802, + "epoch": 0.7213295927909547, "grad_norm": 0.0, - "learning_rate": 3.7785586124810113e-06, - "loss": 0.949, + "learning_rate": 3.8039264483735317e-06, + "loss": 0.8782, "step": 25455 }, { - "epoch": 0.722360953461975, + "epoch": 0.7213579302332171, "grad_norm": 0.0, - "learning_rate": 3.7778390900377804e-06, - "loss": 0.8183, + "learning_rate": 3.8032060907345137e-06, + "loss": 0.6863, "step": 25456 }, { - "epoch": 0.7223893303064699, + "epoch": 0.7213862676754796, "grad_norm": 0.0, - "learning_rate": 3.77711962015342e-06, - "loss": 0.85, + "learning_rate": 3.8024857852933915e-06, + "loss": 0.8196, "step": 25457 }, { - "epoch": 0.7224177071509648, + "epoch": 0.721414605117742, "grad_norm": 0.0, - "learning_rate": 3.7764002028340086e-06, - "loss": 0.7765, + "learning_rate": 3.8017655320562363e-06, + "loss": 0.7551, "step": 25458 }, { - "epoch": 0.7224460839954597, + "epoch": 0.7214429425600045, "grad_norm": 0.0, - "learning_rate": 3.775680838085628e-06, - "loss": 0.8144, + "learning_rate": 3.8010453310291086e-06, + "loss": 0.7667, "step": 25459 }, { - "epoch": 0.7224744608399546, + "epoch": 0.721471280002267, "grad_norm": 0.0, - "learning_rate": 3.7749615259143445e-06, - "loss": 0.7186, + "learning_rate": 3.800325182218082e-06, + "loss": 0.7127, "step": 25460 }, { - "epoch": 0.7225028376844495, + "epoch": 0.7214996174445295, "grad_norm": 0.0, - "learning_rate": 3.7742422663262445e-06, - "loss": 0.8745, + "learning_rate": 3.7996050856292143e-06, + "loss": 0.9542, "step": 25461 }, { - "epoch": 0.7225312145289444, + "epoch": 0.7215279548867919, "grad_norm": 0.0, - "learning_rate": 3.773523059327403e-06, - "loss": 0.8628, + "learning_rate": 3.798885041268575e-06, + "loss": 0.9118, "step": 25462 }, { - "epoch": 0.7225595913734393, + "epoch": 0.7215562923290544, "grad_norm": 0.0, - "learning_rate": 3.7728039049238896e-06, - "loss": 0.8097, + "learning_rate": 3.7981650491422305e-06, + "loss": 0.7828, "step": 25463 }, { - "epoch": 0.7225879682179341, + "epoch": 0.7215846297713169, "grad_norm": 0.0, - "learning_rate": 3.7720848031217804e-06, - "loss": 0.8365, + "learning_rate": 3.7974451092562447e-06, + "loss": 0.857, "step": 25464 }, { - "epoch": 0.7226163450624291, + "epoch": 0.7216129672135793, "grad_norm": 0.0, - "learning_rate": 3.7713657539271553e-06, - "loss": 0.7262, + "learning_rate": 3.7967252216166827e-06, + "loss": 0.6856, "step": 25465 }, { - "epoch": 0.722644721906924, + "epoch": 0.7216413046558418, "grad_norm": 0.0, - "learning_rate": 3.770646757346079e-06, - "loss": 0.8112, + "learning_rate": 3.79600538622961e-06, + "loss": 0.791, "step": 25466 }, { - "epoch": 0.7226730987514188, + "epoch": 0.7216696420981042, "grad_norm": 0.0, - "learning_rate": 3.7699278133846306e-06, - "loss": 0.9166, + "learning_rate": 3.795285603101085e-06, + "loss": 0.8726, "step": 25467 }, { - "epoch": 0.7227014755959137, + "epoch": 0.7216979795403667, "grad_norm": 0.0, - "learning_rate": 3.769208922048886e-06, - "loss": 0.7807, + "learning_rate": 3.7945658722371768e-06, + "loss": 0.9086, "step": 25468 }, { - "epoch": 0.7227298524404087, + "epoch": 0.7217263169826291, "grad_norm": 0.0, - "learning_rate": 3.768490083344909e-06, - "loss": 0.8814, + "learning_rate": 3.793846193643941e-06, + "loss": 0.7799, "step": 25469 }, { - "epoch": 0.7227582292849035, + "epoch": 0.7217546544248916, "grad_norm": 0.0, - "learning_rate": 3.7677712972787807e-06, - "loss": 0.9192, + "learning_rate": 3.793126567327443e-06, + "loss": 0.7441, "step": 25470 }, { - "epoch": 0.7227866061293984, + "epoch": 0.7217829918671541, "grad_norm": 0.0, - "learning_rate": 3.7670525638565636e-06, - "loss": 0.7991, + "learning_rate": 3.792406993293748e-06, + "loss": 0.7984, "step": 25471 }, { - "epoch": 0.7228149829738933, + "epoch": 0.7218113293094165, "grad_norm": 0.0, - "learning_rate": 3.7663338830843353e-06, - "loss": 0.8321, + "learning_rate": 3.7916874715489117e-06, + "loss": 0.8309, "step": 25472 }, { - "epoch": 0.7228433598183882, + "epoch": 0.721839666751679, "grad_norm": 0.0, - "learning_rate": 3.7656152549681668e-06, - "loss": 0.9136, + "learning_rate": 3.790968002098997e-06, + "loss": 0.8179, "step": 25473 }, { - "epoch": 0.7228717366628831, + "epoch": 0.7218680041939415, "grad_norm": 0.0, - "learning_rate": 3.764896679514124e-06, - "loss": 0.917, + "learning_rate": 3.790248584950067e-06, + "loss": 0.758, "step": 25474 }, { - "epoch": 0.7229001135073779, + "epoch": 0.7218963416362039, "grad_norm": 0.0, - "learning_rate": 3.7641781567282767e-06, - "loss": 0.7642, + "learning_rate": 3.789529220108176e-06, + "loss": 0.7924, "step": 25475 }, { - "epoch": 0.7229284903518729, + "epoch": 0.7219246790784664, "grad_norm": 0.0, - "learning_rate": 3.7634596866167015e-06, - "loss": 0.8276, + "learning_rate": 3.788809907579387e-06, + "loss": 0.9009, "step": 25476 }, { - "epoch": 0.7229568671963678, + "epoch": 0.7219530165207289, "grad_norm": 0.0, - "learning_rate": 3.7627412691854613e-06, - "loss": 0.833, + "learning_rate": 3.788090647369759e-06, + "loss": 0.7622, "step": 25477 }, { - "epoch": 0.7229852440408626, + "epoch": 0.7219813539629913, "grad_norm": 0.0, - "learning_rate": 3.7620229044406255e-06, - "loss": 0.9004, + "learning_rate": 3.787371439485349e-06, + "loss": 0.8817, "step": 25478 }, { - "epoch": 0.7230136208853576, + "epoch": 0.7220096914052537, "grad_norm": 0.0, - "learning_rate": 3.761304592388265e-06, - "loss": 0.8518, + "learning_rate": 3.7866522839322207e-06, + "loss": 0.8478, "step": 25479 }, { - "epoch": 0.7230419977298524, + "epoch": 0.7220380288475162, "grad_norm": 0.0, - "learning_rate": 3.760586333034443e-06, - "loss": 0.8917, + "learning_rate": 3.7859331807164236e-06, + "loss": 0.9488, "step": 25480 }, { - "epoch": 0.7230703745743473, + "epoch": 0.7220663662897787, "grad_norm": 0.0, - "learning_rate": 3.7598681263852276e-06, - "loss": 0.8375, + "learning_rate": 3.7852141298440227e-06, + "loss": 0.7889, "step": 25481 }, { - "epoch": 0.7230987514188423, + "epoch": 0.7220947037320411, "grad_norm": 0.0, - "learning_rate": 3.7591499724466906e-06, - "loss": 0.8416, + "learning_rate": 3.7844951313210663e-06, + "loss": 0.9106, "step": 25482 }, { - "epoch": 0.7231271282633371, + "epoch": 0.7221230411743036, "grad_norm": 0.0, - "learning_rate": 3.7584318712248915e-06, - "loss": 0.8394, + "learning_rate": 3.783776185153617e-06, + "loss": 0.7728, "step": 25483 }, { - "epoch": 0.723155505107832, + "epoch": 0.7221513786165661, "grad_norm": 0.0, - "learning_rate": 3.757713822725898e-06, - "loss": 0.8691, + "learning_rate": 3.7830572913477325e-06, + "loss": 0.8377, "step": 25484 }, { - "epoch": 0.7231838819523269, + "epoch": 0.7221797160588286, "grad_norm": 0.0, - "learning_rate": 3.7569958269557806e-06, - "loss": 0.9334, + "learning_rate": 3.7823384499094608e-06, + "loss": 0.8006, "step": 25485 }, { - "epoch": 0.7232122587968218, + "epoch": 0.722208053501091, "grad_norm": 0.0, - "learning_rate": 3.756277883920596e-06, - "loss": 0.8469, + "learning_rate": 3.7816196608448617e-06, + "loss": 0.8338, "step": 25486 }, { - "epoch": 0.7232406356413167, + "epoch": 0.7222363909433535, "grad_norm": 0.0, - "learning_rate": 3.755559993626413e-06, - "loss": 0.8536, + "learning_rate": 3.7809009241599927e-06, + "loss": 0.8025, "step": 25487 }, { - "epoch": 0.7232690124858115, + "epoch": 0.722264728385616, "grad_norm": 0.0, - "learning_rate": 3.7548421560792993e-06, - "loss": 0.7047, + "learning_rate": 3.7801822398609013e-06, + "loss": 0.9322, "step": 25488 }, { - "epoch": 0.7232973893303065, + "epoch": 0.7222930658278783, "grad_norm": 0.0, - "learning_rate": 3.7541243712853105e-06, - "loss": 0.7857, + "learning_rate": 3.779463607953644e-06, + "loss": 0.8139, "step": 25489 }, { - "epoch": 0.7233257661748014, + "epoch": 0.7223214032701408, "grad_norm": 0.0, - "learning_rate": 3.753406639250514e-06, - "loss": 0.7817, + "learning_rate": 3.778745028444275e-06, + "loss": 0.7935, "step": 25490 }, { - "epoch": 0.7233541430192962, + "epoch": 0.7223497407124033, "grad_norm": 0.0, - "learning_rate": 3.7526889599809725e-06, - "loss": 0.8794, + "learning_rate": 3.7780265013388475e-06, + "loss": 0.8061, "step": 25491 }, { - "epoch": 0.7233825198637911, + "epoch": 0.7223780781546658, "grad_norm": 0.0, - "learning_rate": 3.751971333482749e-06, - "loss": 0.9122, + "learning_rate": 3.777308026643416e-06, + "loss": 0.8547, "step": 25492 }, { - "epoch": 0.7234108967082861, + "epoch": 0.7224064155969282, "grad_norm": 0.0, - "learning_rate": 3.751253759761907e-06, - "loss": 0.764, + "learning_rate": 3.776589604364027e-06, + "loss": 0.8118, "step": 25493 }, { - "epoch": 0.7234392735527809, + "epoch": 0.7224347530391907, "grad_norm": 0.0, - "learning_rate": 3.7505362388245026e-06, - "loss": 0.806, + "learning_rate": 3.775871234506734e-06, + "loss": 0.87, "step": 25494 }, { - "epoch": 0.7234676503972758, + "epoch": 0.7224630904814532, "grad_norm": 0.0, - "learning_rate": 3.7498187706765987e-06, - "loss": 0.882, + "learning_rate": 3.7751529170775925e-06, + "loss": 0.8482, "step": 25495 }, { - "epoch": 0.7234960272417708, + "epoch": 0.7224914279237156, "grad_norm": 0.0, - "learning_rate": 3.7491013553242605e-06, - "loss": 0.9651, + "learning_rate": 3.774434652082646e-06, + "loss": 0.8357, "step": 25496 }, { - "epoch": 0.7235244040862656, + "epoch": 0.7225197653659781, "grad_norm": 0.0, - "learning_rate": 3.748383992773541e-06, - "loss": 0.8041, + "learning_rate": 3.7737164395279514e-06, + "loss": 0.8852, "step": 25497 }, { - "epoch": 0.7235527809307605, + "epoch": 0.7225481028082406, "grad_norm": 0.0, - "learning_rate": 3.747666683030503e-06, - "loss": 0.8912, + "learning_rate": 3.772998279419553e-06, + "loss": 0.7622, "step": 25498 }, { - "epoch": 0.7235811577752554, + "epoch": 0.7225764402505029, "grad_norm": 0.0, - "learning_rate": 3.7469494261012086e-06, - "loss": 0.9097, + "learning_rate": 3.7722801717635016e-06, + "loss": 0.8, "step": 25499 }, { - "epoch": 0.7236095346197503, + "epoch": 0.7226047776927654, "grad_norm": 0.0, - "learning_rate": 3.74623222199171e-06, - "loss": 0.9041, + "learning_rate": 3.7715621165658466e-06, + "loss": 0.7889, "step": 25500 }, { - "epoch": 0.7236379114642452, + "epoch": 0.7226331151350279, "grad_norm": 0.0, - "learning_rate": 3.745515070708069e-06, - "loss": 0.876, + "learning_rate": 3.770844113832637e-06, + "loss": 0.8131, "step": 25501 }, { - "epoch": 0.72366628830874, + "epoch": 0.7226614525772904, "grad_norm": 0.0, - "learning_rate": 3.7447979722563464e-06, - "loss": 0.8946, + "learning_rate": 3.7701261635699205e-06, + "loss": 0.83, "step": 25502 }, { - "epoch": 0.723694665153235, + "epoch": 0.7226897900195528, "grad_norm": 0.0, - "learning_rate": 3.744080926642594e-06, - "loss": 0.8454, + "learning_rate": 3.7694082657837495e-06, + "loss": 0.8341, "step": 25503 }, { - "epoch": 0.7237230419977299, + "epoch": 0.7227181274618153, "grad_norm": 0.0, - "learning_rate": 3.7433639338728712e-06, - "loss": 0.7555, + "learning_rate": 3.768690420480161e-06, + "loss": 0.7022, "step": 25504 }, { - "epoch": 0.7237514188422247, + "epoch": 0.7227464649040778, "grad_norm": 0.0, - "learning_rate": 3.7426469939532374e-06, - "loss": 0.7565, + "learning_rate": 3.767972627665212e-06, + "loss": 0.9263, "step": 25505 }, { - "epoch": 0.7237797956867197, + "epoch": 0.7227748023463402, "grad_norm": 0.0, - "learning_rate": 3.7419301068897398e-06, - "loss": 0.8708, + "learning_rate": 3.7672548873449388e-06, + "loss": 0.7941, "step": 25506 }, { - "epoch": 0.7238081725312145, + "epoch": 0.7228031397886027, "grad_norm": 0.0, - "learning_rate": 3.7412132726884452e-06, - "loss": 0.8557, + "learning_rate": 3.766537199525393e-06, + "loss": 0.8699, "step": 25507 }, { - "epoch": 0.7238365493757094, + "epoch": 0.7228314772308652, "grad_norm": 0.0, - "learning_rate": 3.740496491355401e-06, - "loss": 0.8187, + "learning_rate": 3.765819564212623e-06, + "loss": 0.8845, "step": 25508 }, { - "epoch": 0.7238649262202043, + "epoch": 0.7228598146731277, "grad_norm": 0.0, - "learning_rate": 3.739779762896664e-06, - "loss": 0.7972, + "learning_rate": 3.7651019814126656e-06, + "loss": 0.758, "step": 25509 }, { - "epoch": 0.7238933030646992, + "epoch": 0.72288815211539, "grad_norm": 0.0, - "learning_rate": 3.739063087318292e-06, - "loss": 0.9059, + "learning_rate": 3.764384451131573e-06, + "loss": 0.8044, "step": 25510 }, { - "epoch": 0.7239216799091941, + "epoch": 0.7229164895576525, "grad_norm": 0.0, - "learning_rate": 3.7383464646263324e-06, - "loss": 0.881, + "learning_rate": 3.763666973375383e-06, + "loss": 0.8695, "step": 25511 }, { - "epoch": 0.723950056753689, + "epoch": 0.722944826999915, "grad_norm": 0.0, - "learning_rate": 3.737629894826842e-06, - "loss": 0.8707, + "learning_rate": 3.762949548150141e-06, + "loss": 0.7318, "step": 25512 }, { - "epoch": 0.7239784335981839, + "epoch": 0.7229731644421774, "grad_norm": 0.0, - "learning_rate": 3.736913377925877e-06, - "loss": 0.7188, + "learning_rate": 3.762232175461892e-06, + "loss": 0.8179, "step": 25513 }, { - "epoch": 0.7240068104426788, + "epoch": 0.7230015018844399, "grad_norm": 0.0, - "learning_rate": 3.736196913929482e-06, - "loss": 0.7973, + "learning_rate": 3.761514855316677e-06, + "loss": 0.837, "step": 25514 }, { - "epoch": 0.7240351872871736, + "epoch": 0.7230298393267024, "grad_norm": 0.0, - "learning_rate": 3.7354805028437137e-06, - "loss": 0.7623, + "learning_rate": 3.760797587720538e-06, + "loss": 0.7783, "step": 25515 }, { - "epoch": 0.7240635641316686, + "epoch": 0.7230581767689649, "grad_norm": 0.0, - "learning_rate": 3.7347641446746263e-06, - "loss": 0.7191, + "learning_rate": 3.760080372679523e-06, + "loss": 0.8323, "step": 25516 }, { - "epoch": 0.7240919409761635, + "epoch": 0.7230865142112273, "grad_norm": 0.0, - "learning_rate": 3.7340478394282655e-06, - "loss": 0.8607, + "learning_rate": 3.759363210199665e-06, + "loss": 0.8494, "step": 25517 }, { - "epoch": 0.7241203178206583, + "epoch": 0.7231148516534898, "grad_norm": 0.0, - "learning_rate": 3.7333315871106833e-06, - "loss": 0.8317, + "learning_rate": 3.75864610028701e-06, + "loss": 0.8007, "step": 25518 }, { - "epoch": 0.7241486946651532, + "epoch": 0.7231431890957523, "grad_norm": 0.0, - "learning_rate": 3.732615387727934e-06, - "loss": 0.8729, + "learning_rate": 3.7579290429475933e-06, + "loss": 0.8617, "step": 25519 }, { - "epoch": 0.7241770715096482, + "epoch": 0.7231715265380146, "grad_norm": 0.0, - "learning_rate": 3.731899241286061e-06, - "loss": 0.7741, + "learning_rate": 3.757212038187459e-06, + "loss": 0.8515, "step": 25520 }, { - "epoch": 0.724205448354143, + "epoch": 0.7231998639802771, "grad_norm": 0.0, - "learning_rate": 3.7311831477911177e-06, - "loss": 0.8259, + "learning_rate": 3.756495086012648e-06, + "loss": 0.8503, "step": 25521 }, { - "epoch": 0.7242338251986379, + "epoch": 0.7232282014225396, "grad_norm": 0.0, - "learning_rate": 3.7304671072491517e-06, - "loss": 0.7681, + "learning_rate": 3.7557781864291954e-06, + "loss": 0.7979, "step": 25522 }, { - "epoch": 0.7242622020431329, + "epoch": 0.723256538864802, "grad_norm": 0.0, - "learning_rate": 3.729751119666212e-06, - "loss": 0.8356, + "learning_rate": 3.7550613394431413e-06, + "loss": 0.9012, "step": 25523 }, { - "epoch": 0.7242905788876277, + "epoch": 0.7232848763070645, "grad_norm": 0.0, - "learning_rate": 3.7290351850483497e-06, - "loss": 0.858, + "learning_rate": 3.754344545060529e-06, + "loss": 0.7919, "step": 25524 }, { - "epoch": 0.7243189557321226, + "epoch": 0.723313213749327, "grad_norm": 0.0, - "learning_rate": 3.728319303401605e-06, - "loss": 0.8499, + "learning_rate": 3.753627803287386e-06, + "loss": 0.791, "step": 25525 }, { - "epoch": 0.7243473325766174, + "epoch": 0.7233415511915895, "grad_norm": 0.0, - "learning_rate": 3.72760347473203e-06, - "loss": 0.8158, + "learning_rate": 3.7529111141297582e-06, + "loss": 0.8268, "step": 25526 }, { - "epoch": 0.7243757094211124, + "epoch": 0.7233698886338519, "grad_norm": 0.0, - "learning_rate": 3.7268876990456735e-06, - "loss": 0.7705, + "learning_rate": 3.7521944775936782e-06, + "loss": 0.7338, "step": 25527 }, { - "epoch": 0.7244040862656073, + "epoch": 0.7233982260761144, "grad_norm": 0.0, - "learning_rate": 3.726171976348575e-06, - "loss": 0.884, + "learning_rate": 3.7514778936851846e-06, + "loss": 0.8963, "step": 25528 }, { - "epoch": 0.7244324631101021, + "epoch": 0.7234265635183769, "grad_norm": 0.0, - "learning_rate": 3.725456306646784e-06, - "loss": 0.8708, + "learning_rate": 3.7507613624103167e-06, + "loss": 0.8047, "step": 25529 }, { - "epoch": 0.7244608399545971, + "epoch": 0.7234549009606392, "grad_norm": 0.0, - "learning_rate": 3.724740689946349e-06, - "loss": 0.8355, + "learning_rate": 3.7500448837751024e-06, + "loss": 0.8091, "step": 25530 }, { - "epoch": 0.724489216799092, + "epoch": 0.7234832384029017, "grad_norm": 0.0, - "learning_rate": 3.7240251262533087e-06, - "loss": 0.861, + "learning_rate": 3.7493284577855816e-06, + "loss": 0.7816, "step": 25531 }, { - "epoch": 0.7245175936435868, + "epoch": 0.7235115758451642, "grad_norm": 0.0, - "learning_rate": 3.7233096155737092e-06, - "loss": 0.8034, + "learning_rate": 3.7486120844477914e-06, + "loss": 0.8206, "step": 25532 }, { - "epoch": 0.7245459704880817, + "epoch": 0.7235399132874267, "grad_norm": 0.0, - "learning_rate": 3.7225941579136004e-06, - "loss": 0.8597, + "learning_rate": 3.7478957637677593e-06, + "loss": 0.8794, "step": 25533 }, { - "epoch": 0.7245743473325766, + "epoch": 0.7235682507296891, "grad_norm": 0.0, - "learning_rate": 3.7218787532790167e-06, - "loss": 0.8176, + "learning_rate": 3.747179495751527e-06, + "loss": 0.7966, "step": 25534 }, { - "epoch": 0.7246027241770715, + "epoch": 0.7235965881719516, "grad_norm": 0.0, - "learning_rate": 3.7211634016760045e-06, - "loss": 0.7951, + "learning_rate": 3.7464632804051204e-06, + "loss": 0.7625, "step": 25535 }, { - "epoch": 0.7246311010215664, + "epoch": 0.7236249256142141, "grad_norm": 0.0, - "learning_rate": 3.720448103110612e-06, - "loss": 0.8433, + "learning_rate": 3.745747117734575e-06, + "loss": 0.9256, "step": 25536 }, { - "epoch": 0.7246594778660613, + "epoch": 0.7236532630564765, "grad_norm": 0.0, - "learning_rate": 3.719732857588869e-06, - "loss": 0.8429, + "learning_rate": 3.7450310077459274e-06, + "loss": 0.7854, "step": 25537 }, { - "epoch": 0.7246878547105562, + "epoch": 0.723681600498739, "grad_norm": 0.0, - "learning_rate": 3.719017665116833e-06, - "loss": 0.8353, + "learning_rate": 3.7443149504452036e-06, + "loss": 0.7628, "step": 25538 }, { - "epoch": 0.724716231555051, + "epoch": 0.7237099379410015, "grad_norm": 0.0, - "learning_rate": 3.7183025257005324e-06, - "loss": 0.9055, + "learning_rate": 3.743598945838438e-06, + "loss": 0.9047, "step": 25539 }, { - "epoch": 0.724744608399546, + "epoch": 0.7237382753832639, "grad_norm": 0.0, - "learning_rate": 3.717587439346013e-06, - "loss": 0.8192, + "learning_rate": 3.7428829939316615e-06, + "loss": 0.7528, "step": 25540 }, { - "epoch": 0.7247729852440409, + "epoch": 0.7237666128255263, "grad_norm": 0.0, - "learning_rate": 3.7168724060593186e-06, - "loss": 0.7605, + "learning_rate": 3.7421670947309054e-06, + "loss": 0.784, "step": 25541 }, { - "epoch": 0.7248013620885357, + "epoch": 0.7237949502677888, "grad_norm": 0.0, - "learning_rate": 3.7161574258464817e-06, - "loss": 0.8816, + "learning_rate": 3.7414512482422026e-06, + "loss": 0.8374, "step": 25542 }, { - "epoch": 0.7248297389330306, + "epoch": 0.7238232877100513, "grad_norm": 0.0, - "learning_rate": 3.7154424987135463e-06, - "loss": 0.7397, + "learning_rate": 3.740735454471577e-06, + "loss": 0.8699, "step": 25543 }, { - "epoch": 0.7248581157775256, + "epoch": 0.7238516251523137, "grad_norm": 0.0, - "learning_rate": 3.714727624666553e-06, - "loss": 0.752, + "learning_rate": 3.7400197134250606e-06, + "loss": 0.8563, "step": 25544 }, { - "epoch": 0.7248864926220204, + "epoch": 0.7238799625945762, "grad_norm": 0.0, - "learning_rate": 3.714012803711535e-06, - "loss": 0.7585, + "learning_rate": 3.7393040251086854e-06, + "loss": 0.8396, "step": 25545 }, { - "epoch": 0.7249148694665153, + "epoch": 0.7239083000368387, "grad_norm": 0.0, - "learning_rate": 3.7132980358545333e-06, - "loss": 0.7966, + "learning_rate": 3.7385883895284747e-06, + "loss": 0.8111, "step": 25546 }, { - "epoch": 0.7249432463110103, + "epoch": 0.7239366374791011, "grad_norm": 0.0, - "learning_rate": 3.7125833211015895e-06, - "loss": 0.8606, + "learning_rate": 3.7378728066904624e-06, + "loss": 0.8832, "step": 25547 }, { - "epoch": 0.7249716231555051, + "epoch": 0.7239649749213636, "grad_norm": 0.0, - "learning_rate": 3.711868659458734e-06, - "loss": 0.8127, + "learning_rate": 3.7371572766006705e-06, + "loss": 0.8614, "step": 25548 }, { - "epoch": 0.725, + "epoch": 0.7239933123636261, "grad_norm": 0.0, - "learning_rate": 3.7111540509320053e-06, - "loss": 0.8062, + "learning_rate": 3.736441799265127e-06, + "loss": 0.7997, "step": 25549 }, { - "epoch": 0.7250283768444948, + "epoch": 0.7240216498058886, "grad_norm": 0.0, - "learning_rate": 3.710439495527446e-06, - "loss": 0.8076, + "learning_rate": 3.735726374689864e-06, + "loss": 0.8374, "step": 25550 }, { - "epoch": 0.7250567536889898, + "epoch": 0.724049987248151, "grad_norm": 0.0, - "learning_rate": 3.7097249932510828e-06, - "loss": 0.7963, + "learning_rate": 3.7350110028809004e-06, + "loss": 0.7078, "step": 25551 }, { - "epoch": 0.7250851305334847, + "epoch": 0.7240783246904134, "grad_norm": 0.0, - "learning_rate": 3.709010544108954e-06, - "loss": 0.8741, + "learning_rate": 3.7342956838442658e-06, + "loss": 0.7569, "step": 25552 }, { - "epoch": 0.7251135073779795, + "epoch": 0.7241066621326759, "grad_norm": 0.0, - "learning_rate": 3.7082961481070977e-06, - "loss": 0.8351, + "learning_rate": 3.7335804175859856e-06, + "loss": 0.7597, "step": 25553 }, { - "epoch": 0.7251418842224745, + "epoch": 0.7241349995749383, "grad_norm": 0.0, - "learning_rate": 3.7075818052515466e-06, - "loss": 0.8656, + "learning_rate": 3.732865204112084e-06, + "loss": 0.7938, "step": 25554 }, { - "epoch": 0.7251702610669694, + "epoch": 0.7241633370172008, "grad_norm": 0.0, - "learning_rate": 3.706867515548338e-06, - "loss": 0.8845, + "learning_rate": 3.7321500434285895e-06, + "loss": 0.7309, "step": 25555 }, { - "epoch": 0.7251986379114642, + "epoch": 0.7241916744594633, "grad_norm": 0.0, - "learning_rate": 3.706153279003498e-06, - "loss": 0.9102, + "learning_rate": 3.7314349355415193e-06, + "loss": 0.7868, "step": 25556 }, { - "epoch": 0.7252270147559592, + "epoch": 0.7242200119017258, "grad_norm": 0.0, - "learning_rate": 3.7054390956230658e-06, - "loss": 0.854, + "learning_rate": 3.7307198804569013e-06, + "loss": 0.9165, "step": 25557 }, { - "epoch": 0.725255391600454, + "epoch": 0.7242483493439882, "grad_norm": 0.0, - "learning_rate": 3.704724965413075e-06, - "loss": 0.7562, + "learning_rate": 3.7300048781807598e-06, + "loss": 0.7546, "step": 25558 }, { - "epoch": 0.7252837684449489, + "epoch": 0.7242766867862507, "grad_norm": 0.0, - "learning_rate": 3.7040108883795522e-06, - "loss": 0.7141, + "learning_rate": 3.729289928719113e-06, + "loss": 0.9181, "step": 25559 }, { - "epoch": 0.7253121452894438, + "epoch": 0.7243050242285132, "grad_norm": 0.0, - "learning_rate": 3.703296864528532e-06, - "loss": 0.898, + "learning_rate": 3.7285750320779857e-06, + "loss": 0.8793, "step": 25560 }, { - "epoch": 0.7253405221339387, + "epoch": 0.7243333616707756, "grad_norm": 0.0, - "learning_rate": 3.70258289386605e-06, - "loss": 0.8872, + "learning_rate": 3.7278601882634026e-06, + "loss": 0.7757, "step": 25561 }, { - "epoch": 0.7253688989784336, + "epoch": 0.724361699113038, "grad_norm": 0.0, - "learning_rate": 3.70186897639813e-06, - "loss": 0.7741, + "learning_rate": 3.72714539728138e-06, + "loss": 0.8867, "step": 25562 }, { - "epoch": 0.7253972758229285, + "epoch": 0.7243900365553005, "grad_norm": 0.0, - "learning_rate": 3.7011551121308065e-06, - "loss": 0.8863, + "learning_rate": 3.726430659137943e-06, + "loss": 0.9992, "step": 25563 }, { - "epoch": 0.7254256526674234, + "epoch": 0.7244183739975629, "grad_norm": 0.0, - "learning_rate": 3.7004413010701114e-06, - "loss": 0.861, + "learning_rate": 3.725715973839108e-06, + "loss": 0.7459, "step": 25564 }, { - "epoch": 0.7254540295119183, + "epoch": 0.7244467114398254, "grad_norm": 0.0, - "learning_rate": 3.699727543222069e-06, - "loss": 0.7999, + "learning_rate": 3.725001341390896e-06, + "loss": 0.8133, "step": 25565 }, { - "epoch": 0.7254824063564131, + "epoch": 0.7244750488820879, "grad_norm": 0.0, - "learning_rate": 3.699013838592711e-06, - "loss": 0.8342, + "learning_rate": 3.7242867617993283e-06, + "loss": 0.8824, "step": 25566 }, { - "epoch": 0.725510783200908, + "epoch": 0.7245033863243504, "grad_norm": 0.0, - "learning_rate": 3.6983001871880665e-06, - "loss": 0.8675, + "learning_rate": 3.723572235070424e-06, + "loss": 0.7807, "step": 25567 }, { - "epoch": 0.725539160045403, + "epoch": 0.7245317237666128, "grad_norm": 0.0, - "learning_rate": 3.6975865890141627e-06, - "loss": 0.8566, + "learning_rate": 3.7228577612102047e-06, + "loss": 0.736, "step": 25568 }, { - "epoch": 0.7255675368898978, + "epoch": 0.7245600612088753, "grad_norm": 0.0, - "learning_rate": 3.6968730440770294e-06, - "loss": 0.8417, + "learning_rate": 3.722143340224682e-06, + "loss": 0.7706, "step": 25569 }, { - "epoch": 0.7255959137343927, + "epoch": 0.7245883986511378, "grad_norm": 0.0, - "learning_rate": 3.6961595523826955e-06, - "loss": 0.7403, + "learning_rate": 3.721428972119878e-06, + "loss": 0.7727, "step": 25570 }, { - "epoch": 0.7256242905788877, + "epoch": 0.7246167360934002, "grad_norm": 0.0, - "learning_rate": 3.6954461139371823e-06, - "loss": 0.8014, + "learning_rate": 3.7207146569018114e-06, + "loss": 0.7787, "step": 25571 }, { - "epoch": 0.7256526674233825, + "epoch": 0.7246450735356627, "grad_norm": 0.0, - "learning_rate": 3.6947327287465195e-06, - "loss": 0.9447, + "learning_rate": 3.720000394576494e-06, + "loss": 0.8673, "step": 25572 }, { - "epoch": 0.7256810442678774, + "epoch": 0.7246734109779251, "grad_norm": 0.0, - "learning_rate": 3.694019396816736e-06, - "loss": 0.8667, + "learning_rate": 3.7192861851499463e-06, + "loss": 0.7845, "step": 25573 }, { - "epoch": 0.7257094211123724, + "epoch": 0.7247017484201876, "grad_norm": 0.0, - "learning_rate": 3.69330611815385e-06, - "loss": 0.8698, + "learning_rate": 3.7185720286281855e-06, + "loss": 0.8259, "step": 25574 }, { - "epoch": 0.7257377979568672, + "epoch": 0.72473008586245, "grad_norm": 0.0, - "learning_rate": 3.692592892763891e-06, - "loss": 0.8101, + "learning_rate": 3.717857925017222e-06, + "loss": 0.6802, "step": 25575 }, { - "epoch": 0.7257661748013621, + "epoch": 0.7247584233047125, "grad_norm": 0.0, - "learning_rate": 3.691879720652888e-06, - "loss": 0.8556, + "learning_rate": 3.717143874323077e-06, + "loss": 0.7677, "step": 25576 }, { - "epoch": 0.7257945516458569, + "epoch": 0.724786760746975, "grad_norm": 0.0, - "learning_rate": 3.6911666018268556e-06, - "loss": 0.7301, + "learning_rate": 3.7164298765517594e-06, + "loss": 0.8788, "step": 25577 }, { - "epoch": 0.7258229284903519, + "epoch": 0.7248150981892374, "grad_norm": 0.0, - "learning_rate": 3.690453536291827e-06, - "loss": 0.8347, + "learning_rate": 3.715715931709286e-06, + "loss": 0.8348, "step": 25578 }, { - "epoch": 0.7258513053348468, + "epoch": 0.7248434356314999, "grad_norm": 0.0, - "learning_rate": 3.6897405240538163e-06, - "loss": 0.8292, + "learning_rate": 3.7150020398016717e-06, + "loss": 0.9135, "step": 25579 }, { - "epoch": 0.7258796821793416, + "epoch": 0.7248717730737624, "grad_norm": 0.0, - "learning_rate": 3.6890275651188524e-06, - "loss": 0.7069, + "learning_rate": 3.7142882008349277e-06, + "loss": 0.8042, "step": 25580 }, { - "epoch": 0.7259080590238366, + "epoch": 0.7249001105160249, "grad_norm": 0.0, - "learning_rate": 3.68831465949296e-06, - "loss": 0.8147, + "learning_rate": 3.7135744148150698e-06, + "loss": 0.9196, "step": 25581 }, { - "epoch": 0.7259364358683315, + "epoch": 0.7249284479582873, "grad_norm": 0.0, - "learning_rate": 3.6876018071821527e-06, - "loss": 0.8499, + "learning_rate": 3.712860681748112e-06, + "loss": 0.8806, "step": 25582 }, { - "epoch": 0.7259648127128263, + "epoch": 0.7249567854005498, "grad_norm": 0.0, - "learning_rate": 3.6868890081924537e-06, - "loss": 0.765, + "learning_rate": 3.7121470016400585e-06, + "loss": 0.8356, "step": 25583 }, { - "epoch": 0.7259931895573212, + "epoch": 0.7249851228428122, "grad_norm": 0.0, - "learning_rate": 3.6861762625298937e-06, - "loss": 0.9778, + "learning_rate": 3.7114333744969312e-06, + "loss": 0.8871, "step": 25584 }, { - "epoch": 0.7260215664018161, + "epoch": 0.7250134602850746, "grad_norm": 0.0, - "learning_rate": 3.6854635702004837e-06, - "loss": 0.8372, + "learning_rate": 3.7107198003247314e-06, + "loss": 0.866, "step": 25585 }, { - "epoch": 0.726049943246311, + "epoch": 0.7250417977273371, "grad_norm": 0.0, - "learning_rate": 3.684750931210247e-06, - "loss": 0.8114, + "learning_rate": 3.7100062791294746e-06, + "loss": 0.8787, "step": 25586 }, { - "epoch": 0.7260783200908059, + "epoch": 0.7250701351695996, "grad_norm": 0.0, - "learning_rate": 3.6840383455652063e-06, - "loss": 0.7799, + "learning_rate": 3.7092928109171734e-06, + "loss": 0.7182, "step": 25587 }, { - "epoch": 0.7261066969353008, + "epoch": 0.725098472611862, "grad_norm": 0.0, - "learning_rate": 3.6833258132713736e-06, - "loss": 0.769, + "learning_rate": 3.708579395693831e-06, + "loss": 0.763, "step": 25588 }, { - "epoch": 0.7261350737797957, + "epoch": 0.7251268100541245, "grad_norm": 0.0, - "learning_rate": 3.6826133343347716e-06, - "loss": 0.8428, + "learning_rate": 3.7078660334654616e-06, + "loss": 0.8615, "step": 25589 }, { - "epoch": 0.7261634506242906, + "epoch": 0.725155147496387, "grad_norm": 0.0, - "learning_rate": 3.681900908761423e-06, - "loss": 0.8303, + "learning_rate": 3.7071527242380767e-06, + "loss": 0.8326, "step": 25590 }, { - "epoch": 0.7261918274687855, + "epoch": 0.7251834849386495, "grad_norm": 0.0, - "learning_rate": 3.6811885365573374e-06, - "loss": 0.7485, + "learning_rate": 3.7064394680176774e-06, + "loss": 0.7224, "step": 25591 }, { - "epoch": 0.7262202043132804, + "epoch": 0.7252118223809119, "grad_norm": 0.0, - "learning_rate": 3.680476217728537e-06, - "loss": 0.8571, + "learning_rate": 3.7057262648102755e-06, + "loss": 0.8044, "step": 25592 }, { - "epoch": 0.7262485811577752, + "epoch": 0.7252401598231744, "grad_norm": 0.0, - "learning_rate": 3.6797639522810413e-06, - "loss": 0.7429, + "learning_rate": 3.705013114621878e-06, + "loss": 0.7938, "step": 25593 }, { - "epoch": 0.7262769580022701, + "epoch": 0.7252684972654369, "grad_norm": 0.0, - "learning_rate": 3.679051740220859e-06, - "loss": 0.8516, + "learning_rate": 3.7043000174584932e-06, + "loss": 0.8547, "step": 25594 }, { - "epoch": 0.7263053348467651, + "epoch": 0.7252968347076992, "grad_norm": 0.0, - "learning_rate": 3.6783395815540124e-06, - "loss": 0.9957, + "learning_rate": 3.703586973326131e-06, + "loss": 0.8883, "step": 25595 }, { - "epoch": 0.7263337116912599, + "epoch": 0.7253251721499617, "grad_norm": 0.0, - "learning_rate": 3.677627476286518e-06, - "loss": 0.8201, + "learning_rate": 3.7028739822307902e-06, + "loss": 0.8617, "step": 25596 }, { - "epoch": 0.7263620885357548, + "epoch": 0.7253535095922242, "grad_norm": 0.0, - "learning_rate": 3.6769154244243855e-06, - "loss": 0.796, + "learning_rate": 3.702161044178484e-06, + "loss": 0.8053, "step": 25597 }, { - "epoch": 0.7263904653802498, + "epoch": 0.7253818470344867, "grad_norm": 0.0, - "learning_rate": 3.6762034259736323e-06, - "loss": 0.8056, + "learning_rate": 3.7014481591752093e-06, + "loss": 0.7933, "step": 25598 }, { - "epoch": 0.7264188422247446, + "epoch": 0.7254101844767491, "grad_norm": 0.0, - "learning_rate": 3.675491480940274e-06, - "loss": 0.698, + "learning_rate": 3.7007353272269764e-06, + "loss": 0.7148, "step": 25599 }, { - "epoch": 0.7264472190692395, + "epoch": 0.7254385219190116, "grad_norm": 0.0, - "learning_rate": 3.674779589330323e-06, - "loss": 0.8085, + "learning_rate": 3.700022548339792e-06, + "loss": 0.8287, "step": 25600 }, { - "epoch": 0.7264755959137343, + "epoch": 0.7254668593612741, "grad_norm": 0.0, - "learning_rate": 3.6740677511497958e-06, - "loss": 0.8412, + "learning_rate": 3.6993098225196544e-06, + "loss": 0.9185, "step": 25601 }, { - "epoch": 0.7265039727582293, + "epoch": 0.7254951968035365, "grad_norm": 0.0, - "learning_rate": 3.6733559664047005e-06, - "loss": 0.8607, + "learning_rate": 3.6985971497725705e-06, + "loss": 0.9228, "step": 25602 }, { - "epoch": 0.7265323496027242, + "epoch": 0.725523534245799, "grad_norm": 0.0, - "learning_rate": 3.6726442351010504e-06, - "loss": 0.8657, + "learning_rate": 3.6978845301045452e-06, + "loss": 0.9017, "step": 25603 }, { - "epoch": 0.726560726447219, + "epoch": 0.7255518716880615, "grad_norm": 0.0, - "learning_rate": 3.6719325572448626e-06, - "loss": 0.8167, + "learning_rate": 3.6971719635215753e-06, + "loss": 0.7758, "step": 25604 }, { - "epoch": 0.726589103291714, + "epoch": 0.725580209130324, "grad_norm": 0.0, - "learning_rate": 3.6712209328421424e-06, - "loss": 0.8173, + "learning_rate": 3.696459450029666e-06, + "loss": 0.8337, "step": 25605 }, { - "epoch": 0.7266174801362089, + "epoch": 0.7256085465725863, "grad_norm": 0.0, - "learning_rate": 3.670509361898903e-06, - "loss": 0.7629, + "learning_rate": 3.695746989634821e-06, + "loss": 0.9484, "step": 25606 }, { - "epoch": 0.7266458569807037, + "epoch": 0.7256368840148488, "grad_norm": 0.0, - "learning_rate": 3.669797844421159e-06, - "loss": 0.7861, + "learning_rate": 3.695034582343039e-06, + "loss": 0.8218, "step": 25607 }, { - "epoch": 0.7266742338251987, + "epoch": 0.7256652214571113, "grad_norm": 0.0, - "learning_rate": 3.6690863804149135e-06, - "loss": 0.8489, + "learning_rate": 3.6943222281603252e-06, + "loss": 0.8634, "step": 25608 }, { - "epoch": 0.7267026106696935, + "epoch": 0.7256935588993737, "grad_norm": 0.0, - "learning_rate": 3.6683749698861803e-06, - "loss": 0.807, + "learning_rate": 3.693609927092674e-06, + "loss": 0.8205, "step": 25609 }, { - "epoch": 0.7267309875141884, + "epoch": 0.7257218963416362, "grad_norm": 0.0, - "learning_rate": 3.6676636128409713e-06, - "loss": 0.7926, + "learning_rate": 3.692897679146088e-06, + "loss": 0.8152, "step": 25610 }, { - "epoch": 0.7267593643586833, + "epoch": 0.7257502337838987, "grad_norm": 0.0, - "learning_rate": 3.66695230928529e-06, - "loss": 0.8072, + "learning_rate": 3.6921854843265704e-06, + "loss": 0.8511, "step": 25611 }, { - "epoch": 0.7267877412031782, + "epoch": 0.7257785712261611, "grad_norm": 0.0, - "learning_rate": 3.666241059225146e-06, - "loss": 0.8132, + "learning_rate": 3.691473342640114e-06, + "loss": 0.711, "step": 25612 }, { - "epoch": 0.7268161180476731, + "epoch": 0.7258069086684236, "grad_norm": 0.0, - "learning_rate": 3.6655298626665493e-06, - "loss": 0.8064, + "learning_rate": 3.6907612540927227e-06, + "loss": 0.7704, "step": 25613 }, { - "epoch": 0.726844494892168, + "epoch": 0.7258352461106861, "grad_norm": 0.0, - "learning_rate": 3.664818719615506e-06, - "loss": 0.7856, + "learning_rate": 3.6900492186903893e-06, + "loss": 0.9429, "step": 25614 }, { - "epoch": 0.7268728717366629, + "epoch": 0.7258635835529486, "grad_norm": 0.0, - "learning_rate": 3.6641076300780275e-06, - "loss": 0.8226, + "learning_rate": 3.6893372364391145e-06, + "loss": 0.7647, "step": 25615 }, { - "epoch": 0.7269012485811578, + "epoch": 0.7258919209952109, "grad_norm": 0.0, - "learning_rate": 3.6633965940601136e-06, - "loss": 0.8414, + "learning_rate": 3.6886253073448984e-06, + "loss": 0.8686, "step": 25616 }, { - "epoch": 0.7269296254256526, + "epoch": 0.7259202584374734, "grad_norm": 0.0, - "learning_rate": 3.6626856115677722e-06, - "loss": 0.8424, + "learning_rate": 3.687913431413732e-06, + "loss": 0.8996, "step": 25617 }, { - "epoch": 0.7269580022701475, + "epoch": 0.7259485958797359, "grad_norm": 0.0, - "learning_rate": 3.6619746826070157e-06, - "loss": 0.8194, + "learning_rate": 3.6872016086516148e-06, + "loss": 0.8498, "step": 25618 }, { - "epoch": 0.7269863791146425, + "epoch": 0.7259769333219983, "grad_norm": 0.0, - "learning_rate": 3.6612638071838393e-06, - "loss": 0.7529, + "learning_rate": 3.6864898390645434e-06, + "loss": 0.8876, "step": 25619 }, { - "epoch": 0.7270147559591373, + "epoch": 0.7260052707642608, "grad_norm": 0.0, - "learning_rate": 3.660552985304253e-06, - "loss": 0.8553, + "learning_rate": 3.685778122658511e-06, + "loss": 0.9671, "step": 25620 }, { - "epoch": 0.7270431328036322, + "epoch": 0.7260336082065233, "grad_norm": 0.0, - "learning_rate": 3.659842216974263e-06, - "loss": 0.7726, + "learning_rate": 3.685066459439518e-06, + "loss": 0.8271, "step": 25621 }, { - "epoch": 0.7270715096481272, + "epoch": 0.7260619456487858, "grad_norm": 0.0, - "learning_rate": 3.6591315021998684e-06, - "loss": 0.8721, + "learning_rate": 3.6843548494135527e-06, + "loss": 0.7942, "step": 25622 }, { - "epoch": 0.727099886492622, + "epoch": 0.7260902830910482, "grad_norm": 0.0, - "learning_rate": 3.658420840987075e-06, - "loss": 0.7597, + "learning_rate": 3.6836432925866107e-06, + "loss": 0.8008, "step": 25623 }, { - "epoch": 0.7271282633371169, + "epoch": 0.7261186205333107, "grad_norm": 0.0, - "learning_rate": 3.657710233341889e-06, - "loss": 0.7972, + "learning_rate": 3.682931788964691e-06, + "loss": 0.7757, "step": 25624 }, { - "epoch": 0.7271566401816119, + "epoch": 0.7261469579755732, "grad_norm": 0.0, - "learning_rate": 3.6569996792703065e-06, - "loss": 0.8197, + "learning_rate": 3.682220338553779e-06, + "loss": 0.9131, "step": 25625 }, { - "epoch": 0.7271850170261067, + "epoch": 0.7261752954178355, "grad_norm": 0.0, - "learning_rate": 3.6562891787783326e-06, - "loss": 0.8053, + "learning_rate": 3.6815089413598746e-06, + "loss": 0.7909, "step": 25626 }, { - "epoch": 0.7272133938706016, + "epoch": 0.726203632860098, "grad_norm": 0.0, - "learning_rate": 3.655578731871973e-06, - "loss": 0.8132, + "learning_rate": 3.680797597388963e-06, + "loss": 0.8005, "step": 25627 }, { - "epoch": 0.7272417707150964, + "epoch": 0.7262319703023605, "grad_norm": 0.0, - "learning_rate": 3.6548683385572215e-06, - "loss": 0.8173, + "learning_rate": 3.6800863066470406e-06, + "loss": 0.8765, "step": 25628 }, { - "epoch": 0.7272701475595914, + "epoch": 0.726260307744623, "grad_norm": 0.0, - "learning_rate": 3.654157998840082e-06, - "loss": 0.7716, + "learning_rate": 3.6793750691400996e-06, + "loss": 0.8338, "step": 25629 }, { - "epoch": 0.7272985244040863, + "epoch": 0.7262886451868854, "grad_norm": 0.0, - "learning_rate": 3.6534477127265555e-06, - "loss": 0.7709, + "learning_rate": 3.6786638848741273e-06, + "loss": 0.8036, "step": 25630 }, { - "epoch": 0.7273269012485811, + "epoch": 0.7263169826291479, "grad_norm": 0.0, - "learning_rate": 3.652737480222641e-06, - "loss": 0.9015, + "learning_rate": 3.6779527538551173e-06, + "loss": 0.7961, "step": 25631 }, { - "epoch": 0.7273552780930761, + "epoch": 0.7263453200714104, "grad_norm": 0.0, - "learning_rate": 3.6520273013343423e-06, - "loss": 0.8358, + "learning_rate": 3.6772416760890584e-06, + "loss": 0.7928, "step": 25632 }, { - "epoch": 0.727383654937571, + "epoch": 0.7263736575136728, "grad_norm": 0.0, - "learning_rate": 3.6513171760676514e-06, - "loss": 0.8304, + "learning_rate": 3.6765306515819398e-06, + "loss": 0.7697, "step": 25633 }, { - "epoch": 0.7274120317820658, + "epoch": 0.7264019949559353, "grad_norm": 0.0, - "learning_rate": 3.6506071044285684e-06, - "loss": 0.8199, + "learning_rate": 3.675819680339756e-06, + "loss": 0.8113, "step": 25634 }, { - "epoch": 0.7274404086265607, + "epoch": 0.7264303323981978, "grad_norm": 0.0, - "learning_rate": 3.6498970864230966e-06, - "loss": 0.7303, + "learning_rate": 3.6751087623684877e-06, + "loss": 0.9429, "step": 25635 }, { - "epoch": 0.7274687854710556, + "epoch": 0.7264586698404601, "grad_norm": 0.0, - "learning_rate": 3.649187122057226e-06, - "loss": 0.7787, + "learning_rate": 3.6743978976741267e-06, + "loss": 0.9535, "step": 25636 }, { - "epoch": 0.7274971623155505, + "epoch": 0.7264870072827226, "grad_norm": 0.0, - "learning_rate": 3.648477211336958e-06, - "loss": 0.8922, + "learning_rate": 3.673687086262665e-06, + "loss": 0.8932, "step": 25637 }, { - "epoch": 0.7275255391600454, + "epoch": 0.7265153447249851, "grad_norm": 0.0, - "learning_rate": 3.6477673542682913e-06, - "loss": 0.9139, + "learning_rate": 3.672976328140083e-06, + "loss": 0.8467, "step": 25638 }, { - "epoch": 0.7275539160045403, + "epoch": 0.7265436821672476, "grad_norm": 0.0, - "learning_rate": 3.6470575508572157e-06, - "loss": 0.7715, + "learning_rate": 3.672265623312371e-06, + "loss": 0.8172, "step": 25639 }, { - "epoch": 0.7275822928490352, + "epoch": 0.72657201960951, "grad_norm": 0.0, - "learning_rate": 3.646347801109731e-06, - "loss": 0.6953, + "learning_rate": 3.671554971785518e-06, + "loss": 0.8488, "step": 25640 }, { - "epoch": 0.72761066969353, + "epoch": 0.7266003570517725, "grad_norm": 0.0, - "learning_rate": 3.6456381050318357e-06, - "loss": 0.779, + "learning_rate": 3.6708443735655054e-06, + "loss": 0.8903, "step": 25641 }, { - "epoch": 0.727639046538025, + "epoch": 0.726628694494035, "grad_norm": 0.0, - "learning_rate": 3.6449284626295167e-06, - "loss": 0.7208, + "learning_rate": 3.6701338286583234e-06, + "loss": 0.8171, "step": 25642 }, { - "epoch": 0.7276674233825199, + "epoch": 0.7266570319362974, "grad_norm": 0.0, - "learning_rate": 3.6442188739087735e-06, - "loss": 0.8565, + "learning_rate": 3.6694233370699508e-06, + "loss": 0.7446, "step": 25643 }, { - "epoch": 0.7276958002270147, + "epoch": 0.7266853693785599, "grad_norm": 0.0, - "learning_rate": 3.6435093388755993e-06, - "loss": 0.8129, + "learning_rate": 3.6687128988063768e-06, + "loss": 0.7875, "step": 25644 }, { - "epoch": 0.7277241770715096, + "epoch": 0.7267137068208224, "grad_norm": 0.0, - "learning_rate": 3.6427998575359867e-06, - "loss": 0.8886, + "learning_rate": 3.6680025138735853e-06, + "loss": 0.7607, "step": 25645 }, { - "epoch": 0.7277525539160046, + "epoch": 0.7267420442630849, "grad_norm": 0.0, - "learning_rate": 3.6420904298959336e-06, - "loss": 0.8557, + "learning_rate": 3.66729218227756e-06, + "loss": 0.9339, "step": 25646 }, { - "epoch": 0.7277809307604994, + "epoch": 0.7267703817053472, "grad_norm": 0.0, - "learning_rate": 3.641381055961424e-06, - "loss": 0.8494, + "learning_rate": 3.6665819040242867e-06, + "loss": 0.863, "step": 25647 }, { - "epoch": 0.7278093076049943, + "epoch": 0.7267987191476097, "grad_norm": 0.0, - "learning_rate": 3.640671735738456e-06, - "loss": 0.8246, + "learning_rate": 3.6658716791197436e-06, + "loss": 0.6953, "step": 25648 }, { - "epoch": 0.7278376844494893, + "epoch": 0.7268270565898722, "grad_norm": 0.0, - "learning_rate": 3.6399624692330217e-06, - "loss": 0.8396, + "learning_rate": 3.665161507569914e-06, + "loss": 0.8357, "step": 25649 }, { - "epoch": 0.7278660612939841, + "epoch": 0.7268553940321346, "grad_norm": 0.0, - "learning_rate": 3.639253256451107e-06, - "loss": 0.7625, + "learning_rate": 3.6644513893807866e-06, + "loss": 0.8233, "step": 25650 }, { - "epoch": 0.727894438138479, + "epoch": 0.7268837314743971, "grad_norm": 0.0, - "learning_rate": 3.638544097398706e-06, - "loss": 0.7995, + "learning_rate": 3.663741324558333e-06, + "loss": 0.7356, "step": 25651 }, { - "epoch": 0.7279228149829738, + "epoch": 0.7269120689166596, "grad_norm": 0.0, - "learning_rate": 3.6378349920818137e-06, - "loss": 0.7413, + "learning_rate": 3.6630313131085404e-06, + "loss": 0.8119, "step": 25652 }, { - "epoch": 0.7279511918274688, + "epoch": 0.7269404063589221, "grad_norm": 0.0, - "learning_rate": 3.637125940506411e-06, - "loss": 0.7852, + "learning_rate": 3.6623213550373903e-06, + "loss": 0.7773, "step": 25653 }, { - "epoch": 0.7279795686719637, + "epoch": 0.7269687438011845, "grad_norm": 0.0, - "learning_rate": 3.636416942678491e-06, - "loss": 0.8363, + "learning_rate": 3.661611450350858e-06, + "loss": 0.8887, "step": 25654 }, { - "epoch": 0.7280079455164585, + "epoch": 0.726997081243447, "grad_norm": 0.0, - "learning_rate": 3.6357079986040465e-06, - "loss": 0.8716, + "learning_rate": 3.66090159905493e-06, + "loss": 0.7913, "step": 25655 }, { - "epoch": 0.7280363223609535, + "epoch": 0.7270254186857095, "grad_norm": 0.0, - "learning_rate": 3.6349991082890602e-06, - "loss": 0.9382, + "learning_rate": 3.6601918011555783e-06, + "loss": 0.731, "step": 25656 }, { - "epoch": 0.7280646992054484, + "epoch": 0.7270537561279719, "grad_norm": 0.0, - "learning_rate": 3.6342902717395225e-06, - "loss": 0.7499, + "learning_rate": 3.6594820566587854e-06, + "loss": 0.8457, "step": 25657 }, { - "epoch": 0.7280930760499432, + "epoch": 0.7270820935702343, "grad_norm": 0.0, - "learning_rate": 3.633581488961424e-06, - "loss": 0.8375, + "learning_rate": 3.658772365570529e-06, + "loss": 0.905, "step": 25658 }, { - "epoch": 0.7281214528944382, + "epoch": 0.7271104310124968, "grad_norm": 0.0, - "learning_rate": 3.6328727599607416e-06, - "loss": 0.8441, + "learning_rate": 3.6580627278967883e-06, + "loss": 0.9763, "step": 25659 }, { - "epoch": 0.728149829738933, + "epoch": 0.7271387684547592, "grad_norm": 0.0, - "learning_rate": 3.632164084743477e-06, - "loss": 0.7648, + "learning_rate": 3.6573531436435395e-06, + "loss": 0.8227, "step": 25660 }, { - "epoch": 0.7281782065834279, + "epoch": 0.7271671058970217, "grad_norm": 0.0, - "learning_rate": 3.6314554633156043e-06, - "loss": 0.8043, + "learning_rate": 3.6566436128167647e-06, + "loss": 0.775, "step": 25661 }, { - "epoch": 0.7282065834279228, + "epoch": 0.7271954433392842, "grad_norm": 0.0, - "learning_rate": 3.6307468956831136e-06, - "loss": 0.8307, + "learning_rate": 3.655934135422432e-06, + "loss": 0.8144, "step": 25662 }, { - "epoch": 0.7282349602724177, + "epoch": 0.7272237807815467, "grad_norm": 0.0, - "learning_rate": 3.6300383818519946e-06, - "loss": 0.8151, + "learning_rate": 3.6552247114665264e-06, + "loss": 0.8392, "step": 25663 }, { - "epoch": 0.7282633371169126, + "epoch": 0.7272521182238091, "grad_norm": 0.0, - "learning_rate": 3.629329921828224e-06, - "loss": 0.784, + "learning_rate": 3.654515340955015e-06, + "loss": 0.7952, "step": 25664 }, { - "epoch": 0.7282917139614075, + "epoch": 0.7272804556660716, "grad_norm": 0.0, - "learning_rate": 3.6286215156177895e-06, - "loss": 0.903, + "learning_rate": 3.6538060238938777e-06, + "loss": 0.8141, "step": 25665 }, { - "epoch": 0.7283200908059024, + "epoch": 0.7273087931083341, "grad_norm": 0.0, - "learning_rate": 3.6279131632266783e-06, - "loss": 0.8097, + "learning_rate": 3.6530967602890923e-06, + "loss": 0.7624, "step": 25666 }, { - "epoch": 0.7283484676503973, + "epoch": 0.7273371305505965, "grad_norm": 0.0, - "learning_rate": 3.627204864660869e-06, - "loss": 0.9209, + "learning_rate": 3.6523875501466255e-06, + "loss": 0.7942, "step": 25667 }, { - "epoch": 0.7283768444948922, + "epoch": 0.727365467992859, "grad_norm": 0.0, - "learning_rate": 3.626496619926346e-06, - "loss": 0.7882, + "learning_rate": 3.6516783934724563e-06, + "loss": 0.753, "step": 25668 }, { - "epoch": 0.728405221339387, + "epoch": 0.7273938054351214, "grad_norm": 0.0, - "learning_rate": 3.6257884290290955e-06, - "loss": 0.9157, + "learning_rate": 3.65096929027256e-06, + "loss": 0.8079, "step": 25669 }, { - "epoch": 0.728433598183882, + "epoch": 0.7274221428773839, "grad_norm": 0.0, - "learning_rate": 3.625080291975095e-06, - "loss": 0.803, + "learning_rate": 3.6502602405529044e-06, + "loss": 0.7184, "step": 25670 }, { - "epoch": 0.7284619750283768, + "epoch": 0.7274504803196463, "grad_norm": 0.0, - "learning_rate": 3.624372208770326e-06, - "loss": 0.7831, + "learning_rate": 3.6495512443194647e-06, + "loss": 0.831, "step": 25671 }, { - "epoch": 0.7284903518728717, + "epoch": 0.7274788177619088, "grad_norm": 0.0, - "learning_rate": 3.6236641794207762e-06, - "loss": 0.7827, + "learning_rate": 3.6488423015782128e-06, + "loss": 0.8488, "step": 25672 }, { - "epoch": 0.7285187287173667, + "epoch": 0.7275071552041713, "grad_norm": 0.0, - "learning_rate": 3.622956203932417e-06, - "loss": 0.8236, + "learning_rate": 3.6481334123351196e-06, + "loss": 0.87, "step": 25673 }, { - "epoch": 0.7285471055618615, + "epoch": 0.7275354926464337, "grad_norm": 0.0, - "learning_rate": 3.622248282311235e-06, - "loss": 0.8313, + "learning_rate": 3.6474245765961623e-06, + "loss": 0.7637, "step": 25674 }, { - "epoch": 0.7285754824063564, + "epoch": 0.7275638300886962, "grad_norm": 0.0, - "learning_rate": 3.6215404145632073e-06, - "loss": 0.7497, + "learning_rate": 3.6467157943673028e-06, + "loss": 0.785, "step": 25675 }, { - "epoch": 0.7286038592508514, + "epoch": 0.7275921675309587, "grad_norm": 0.0, - "learning_rate": 3.620832600694314e-06, - "loss": 0.7448, + "learning_rate": 3.646007065654519e-06, + "loss": 0.7625, "step": 25676 }, { - "epoch": 0.7286322360953462, + "epoch": 0.7276205049732212, "grad_norm": 0.0, - "learning_rate": 3.620124840710536e-06, - "loss": 0.7248, + "learning_rate": 3.6452983904637738e-06, + "loss": 0.8841, "step": 25677 }, { - "epoch": 0.7286606129398411, + "epoch": 0.7276488424154836, "grad_norm": 0.0, - "learning_rate": 3.619417134617853e-06, - "loss": 0.813, + "learning_rate": 3.6445897688010403e-06, + "loss": 0.84, "step": 25678 }, { - "epoch": 0.7286889897843359, + "epoch": 0.727677179857746, "grad_norm": 0.0, - "learning_rate": 3.6187094824222367e-06, - "loss": 0.7135, + "learning_rate": 3.643881200672289e-06, + "loss": 0.7947, "step": 25679 }, { - "epoch": 0.7287173666288309, + "epoch": 0.7277055173000085, "grad_norm": 0.0, - "learning_rate": 3.6180018841296684e-06, - "loss": 0.8183, + "learning_rate": 3.6431726860834848e-06, + "loss": 0.7665, "step": 25680 }, { - "epoch": 0.7287457434733258, + "epoch": 0.7277338547422709, "grad_norm": 0.0, - "learning_rate": 3.617294339746128e-06, - "loss": 0.6773, + "learning_rate": 3.6424642250405974e-06, + "loss": 0.865, "step": 25681 }, { - "epoch": 0.7287741203178206, + "epoch": 0.7277621921845334, "grad_norm": 0.0, - "learning_rate": 3.616586849277587e-06, - "loss": 0.8448, + "learning_rate": 3.6417558175495983e-06, + "loss": 0.8333, "step": 25682 }, { - "epoch": 0.7288024971623156, + "epoch": 0.7277905296267959, "grad_norm": 0.0, - "learning_rate": 3.6158794127300224e-06, - "loss": 0.7271, + "learning_rate": 3.6410474636164463e-06, + "loss": 0.7903, "step": 25683 }, { - "epoch": 0.7288308740068105, + "epoch": 0.7278188670690583, "grad_norm": 0.0, - "learning_rate": 3.615172030109415e-06, - "loss": 0.8232, + "learning_rate": 3.6403391632471142e-06, + "loss": 0.8064, "step": 25684 }, { - "epoch": 0.7288592508513053, + "epoch": 0.7278472045113208, "grad_norm": 0.0, - "learning_rate": 3.614464701421734e-06, - "loss": 0.84, + "learning_rate": 3.6396309164475665e-06, + "loss": 0.7737, "step": 25685 }, { - "epoch": 0.7288876276958002, + "epoch": 0.7278755419535833, "grad_norm": 0.0, - "learning_rate": 3.6137574266729604e-06, - "loss": 0.7738, + "learning_rate": 3.638922723223769e-06, + "loss": 0.7423, "step": 25686 }, { - "epoch": 0.7289160045402951, + "epoch": 0.7279038793958458, "grad_norm": 0.0, - "learning_rate": 3.61305020586906e-06, - "loss": 0.7633, + "learning_rate": 3.6382145835816916e-06, + "loss": 0.9021, "step": 25687 }, { - "epoch": 0.72894438138479, + "epoch": 0.7279322168381082, "grad_norm": 0.0, - "learning_rate": 3.612343039016013e-06, - "loss": 0.7815, + "learning_rate": 3.637506497527291e-06, + "loss": 0.8279, "step": 25688 }, { - "epoch": 0.7289727582292849, + "epoch": 0.7279605542803707, "grad_norm": 0.0, - "learning_rate": 3.611635926119794e-06, - "loss": 0.7691, + "learning_rate": 3.636798465066537e-06, + "loss": 0.8441, "step": 25689 }, { - "epoch": 0.7290011350737798, + "epoch": 0.7279888917226331, "grad_norm": 0.0, - "learning_rate": 3.6109288671863674e-06, - "loss": 0.9419, + "learning_rate": 3.6360904862053947e-06, + "loss": 0.7867, "step": 25690 }, { - "epoch": 0.7290295119182747, + "epoch": 0.7280172291648955, "grad_norm": 0.0, - "learning_rate": 3.6102218622217143e-06, - "loss": 0.8209, + "learning_rate": 3.6353825609498227e-06, + "loss": 0.8168, "step": 25691 }, { - "epoch": 0.7290578887627696, + "epoch": 0.728045566607158, "grad_norm": 0.0, - "learning_rate": 3.6095149112318083e-06, - "loss": 0.7825, + "learning_rate": 3.6346746893057896e-06, + "loss": 0.835, "step": 25692 }, { - "epoch": 0.7290862656072645, + "epoch": 0.7280739040494205, "grad_norm": 0.0, - "learning_rate": 3.6088080142226144e-06, - "loss": 0.778, + "learning_rate": 3.6339668712792533e-06, + "loss": 0.754, "step": 25693 }, { - "epoch": 0.7291146424517594, + "epoch": 0.728102241491683, "grad_norm": 0.0, - "learning_rate": 3.6081011712001055e-06, - "loss": 0.7986, + "learning_rate": 3.6332591068761756e-06, + "loss": 0.9351, "step": 25694 }, { - "epoch": 0.7291430192962542, + "epoch": 0.7281305789339454, "grad_norm": 0.0, - "learning_rate": 3.607394382170257e-06, - "loss": 0.8123, + "learning_rate": 3.632551396102526e-06, + "loss": 0.799, "step": 25695 }, { - "epoch": 0.7291713961407491, + "epoch": 0.7281589163762079, "grad_norm": 0.0, - "learning_rate": 3.6066876471390323e-06, - "loss": 0.8785, + "learning_rate": 3.631843738964257e-06, + "loss": 0.8311, "step": 25696 }, { - "epoch": 0.7291997729852441, + "epoch": 0.7281872538184704, "grad_norm": 0.0, - "learning_rate": 3.6059809661124047e-06, - "loss": 0.7823, + "learning_rate": 3.6311361354673324e-06, + "loss": 0.8888, "step": 25697 }, { - "epoch": 0.7292281498297389, + "epoch": 0.7282155912607328, "grad_norm": 0.0, - "learning_rate": 3.6052743390963464e-06, - "loss": 0.8266, + "learning_rate": 3.6304285856177134e-06, + "loss": 0.8455, "step": 25698 }, { - "epoch": 0.7292565266742338, + "epoch": 0.7282439287029953, "grad_norm": 0.0, - "learning_rate": 3.604567766096819e-06, - "loss": 0.7143, + "learning_rate": 3.62972108942136e-06, + "loss": 0.8409, "step": 25699 }, { - "epoch": 0.7292849035187288, + "epoch": 0.7282722661452578, "grad_norm": 0.0, - "learning_rate": 3.6038612471197963e-06, - "loss": 0.8492, + "learning_rate": 3.6290136468842343e-06, + "loss": 0.8293, "step": 25700 }, { - "epoch": 0.7293132803632236, + "epoch": 0.7283006035875202, "grad_norm": 0.0, - "learning_rate": 3.6031547821712486e-06, - "loss": 0.8034, + "learning_rate": 3.6283062580122884e-06, + "loss": 0.779, "step": 25701 }, { - "epoch": 0.7293416572077185, + "epoch": 0.7283289410297826, "grad_norm": 0.0, - "learning_rate": 3.6024483712571366e-06, - "loss": 0.7815, + "learning_rate": 3.6275989228114863e-06, + "loss": 0.8199, "step": 25702 }, { - "epoch": 0.7293700340522133, + "epoch": 0.7283572784720451, "grad_norm": 0.0, - "learning_rate": 3.60174201438343e-06, - "loss": 0.891, + "learning_rate": 3.6268916412877875e-06, + "loss": 0.8215, "step": 25703 }, { - "epoch": 0.7293984108967083, + "epoch": 0.7283856159143076, "grad_norm": 0.0, - "learning_rate": 3.6010357115561e-06, - "loss": 0.7284, + "learning_rate": 3.6261844134471434e-06, + "loss": 0.7584, "step": 25704 }, { - "epoch": 0.7294267877412032, + "epoch": 0.72841395335657, "grad_norm": 0.0, - "learning_rate": 3.6003294627811025e-06, - "loss": 0.8345, + "learning_rate": 3.6254772392955183e-06, + "loss": 0.8326, "step": 25705 }, { - "epoch": 0.729455164585698, + "epoch": 0.7284422907988325, "grad_norm": 0.0, - "learning_rate": 3.599623268064416e-06, - "loss": 0.8047, + "learning_rate": 3.6247701188388627e-06, + "loss": 0.8153, "step": 25706 }, { - "epoch": 0.729483541430193, + "epoch": 0.728470628241095, "grad_norm": 0.0, - "learning_rate": 3.598917127411997e-06, - "loss": 0.8029, + "learning_rate": 3.6240630520831354e-06, + "loss": 0.8561, "step": 25707 }, { - "epoch": 0.7295119182746879, + "epoch": 0.7284989656833574, "grad_norm": 0.0, - "learning_rate": 3.5982110408298123e-06, - "loss": 0.8054, + "learning_rate": 3.6233560390342957e-06, + "loss": 0.7949, "step": 25708 }, { - "epoch": 0.7295402951191827, + "epoch": 0.7285273031256199, "grad_norm": 0.0, - "learning_rate": 3.5975050083238294e-06, - "loss": 0.7879, + "learning_rate": 3.622649079698293e-06, + "loss": 0.739, "step": 25709 }, { - "epoch": 0.7295686719636776, + "epoch": 0.7285556405678824, "grad_norm": 0.0, - "learning_rate": 3.5967990299000066e-06, - "loss": 0.7944, + "learning_rate": 3.6219421740810855e-06, + "loss": 0.8008, "step": 25710 }, { - "epoch": 0.7295970488081726, + "epoch": 0.7285839780101449, "grad_norm": 0.0, - "learning_rate": 3.596093105564311e-06, - "loss": 0.9217, + "learning_rate": 3.6212353221886276e-06, + "loss": 0.8262, "step": 25711 }, { - "epoch": 0.7296254256526674, + "epoch": 0.7286123154524072, "grad_norm": 0.0, - "learning_rate": 3.595387235322707e-06, - "loss": 0.8395, + "learning_rate": 3.620528524026873e-06, + "loss": 0.7852, "step": 25712 }, { - "epoch": 0.7296538024971623, + "epoch": 0.7286406528946697, "grad_norm": 0.0, - "learning_rate": 3.594681419181153e-06, - "loss": 0.8568, + "learning_rate": 3.6198217796017788e-06, + "loss": 0.8105, "step": 25713 }, { - "epoch": 0.7296821793416572, + "epoch": 0.7286689903369322, "grad_norm": 0.0, - "learning_rate": 3.593975657145612e-06, - "loss": 0.8133, + "learning_rate": 3.6191150889192915e-06, + "loss": 0.9106, "step": 25714 }, { - "epoch": 0.7297105561861521, + "epoch": 0.7286973277791946, "grad_norm": 0.0, - "learning_rate": 3.5932699492220502e-06, - "loss": 0.7228, + "learning_rate": 3.618408451985368e-06, + "loss": 0.8577, "step": 25715 }, { - "epoch": 0.729738933030647, + "epoch": 0.7287256652214571, "grad_norm": 0.0, - "learning_rate": 3.5925642954164208e-06, - "loss": 0.9444, + "learning_rate": 3.617701868805963e-06, + "loss": 0.842, "step": 25716 }, { - "epoch": 0.7297673098751419, + "epoch": 0.7287540026637196, "grad_norm": 0.0, - "learning_rate": 3.59185869573469e-06, - "loss": 0.7585, + "learning_rate": 3.6169953393870203e-06, + "loss": 0.8186, "step": 25717 }, { - "epoch": 0.7297956867196368, + "epoch": 0.7287823401059821, "grad_norm": 0.0, - "learning_rate": 3.59115315018282e-06, - "loss": 0.8844, + "learning_rate": 3.616288863734498e-06, + "loss": 0.616, "step": 25718 }, { - "epoch": 0.7298240635641317, + "epoch": 0.7288106775482445, "grad_norm": 0.0, - "learning_rate": 3.5904476587667637e-06, - "loss": 0.77, + "learning_rate": 3.6155824418543482e-06, + "loss": 0.8608, "step": 25719 }, { - "epoch": 0.7298524404086265, + "epoch": 0.728839014990507, "grad_norm": 0.0, - "learning_rate": 3.5897422214924847e-06, - "loss": 0.8316, + "learning_rate": 3.6148760737525145e-06, + "loss": 0.8541, "step": 25720 }, { - "epoch": 0.7298808172531215, + "epoch": 0.7288673524327695, "grad_norm": 0.0, - "learning_rate": 3.5890368383659414e-06, - "loss": 0.7681, + "learning_rate": 3.61416975943495e-06, + "loss": 0.8039, "step": 25721 }, { - "epoch": 0.7299091940976163, + "epoch": 0.7288956898750318, "grad_norm": 0.0, - "learning_rate": 3.5883315093930913e-06, - "loss": 0.8878, + "learning_rate": 3.6134634989076065e-06, + "loss": 0.7396, "step": 25722 }, { - "epoch": 0.7299375709421112, + "epoch": 0.7289240273172943, "grad_norm": 0.0, - "learning_rate": 3.5876262345798972e-06, - "loss": 0.8162, + "learning_rate": 3.612757292176434e-06, + "loss": 0.8385, "step": 25723 }, { - "epoch": 0.7299659477866062, + "epoch": 0.7289523647595568, "grad_norm": 0.0, - "learning_rate": 3.5869210139323086e-06, - "loss": 0.9321, + "learning_rate": 3.6120511392473756e-06, + "loss": 0.726, "step": 25724 }, { - "epoch": 0.729994324631101, + "epoch": 0.7289807022018192, "grad_norm": 0.0, - "learning_rate": 3.586215847456287e-06, - "loss": 0.8474, + "learning_rate": 3.6113450401263815e-06, + "loss": 0.8416, "step": 25725 }, { - "epoch": 0.7300227014755959, + "epoch": 0.7290090396440817, "grad_norm": 0.0, - "learning_rate": 3.585510735157791e-06, - "loss": 0.8107, + "learning_rate": 3.610638994819402e-06, + "loss": 0.837, "step": 25726 }, { - "epoch": 0.7300510783200908, + "epoch": 0.7290373770863442, "grad_norm": 0.0, - "learning_rate": 3.584805677042772e-06, - "loss": 0.8942, + "learning_rate": 3.6099330033323854e-06, + "loss": 0.7184, "step": 25727 }, { - "epoch": 0.7300794551645857, + "epoch": 0.7290657145286067, "grad_norm": 0.0, - "learning_rate": 3.584100673117187e-06, - "loss": 0.9887, + "learning_rate": 3.6092270656712723e-06, + "loss": 0.8926, "step": 25728 }, { - "epoch": 0.7301078320090806, + "epoch": 0.7290940519708691, "grad_norm": 0.0, - "learning_rate": 3.5833957233869963e-06, - "loss": 0.8116, + "learning_rate": 3.6085211818420176e-06, + "loss": 0.805, "step": 25729 }, { - "epoch": 0.7301362088535754, + "epoch": 0.7291223894131316, "grad_norm": 0.0, - "learning_rate": 3.5826908278581463e-06, - "loss": 0.8476, + "learning_rate": 3.6078153518505578e-06, + "loss": 0.7212, "step": 25730 }, { - "epoch": 0.7301645856980704, + "epoch": 0.7291507268553941, "grad_norm": 0.0, - "learning_rate": 3.5819859865365957e-06, - "loss": 0.7614, + "learning_rate": 3.607109575702843e-06, + "loss": 0.8494, "step": 25731 }, { - "epoch": 0.7301929625425653, + "epoch": 0.7291790642976564, "grad_norm": 0.0, - "learning_rate": 3.5812811994283005e-06, - "loss": 0.7341, + "learning_rate": 3.606403853404823e-06, + "loss": 0.8952, "step": 25732 }, { - "epoch": 0.7302213393870601, + "epoch": 0.7292074017399189, "grad_norm": 0.0, - "learning_rate": 3.5805764665392094e-06, - "loss": 0.8613, + "learning_rate": 3.605698184962433e-06, + "loss": 0.8364, "step": 25733 }, { - "epoch": 0.7302497162315551, + "epoch": 0.7292357391821814, "grad_norm": 0.0, - "learning_rate": 3.5798717878752765e-06, - "loss": 0.7393, + "learning_rate": 3.604992570381621e-06, + "loss": 0.7719, "step": 25734 }, { - "epoch": 0.73027809307605, + "epoch": 0.7292640766244439, "grad_norm": 0.0, - "learning_rate": 3.5791671634424586e-06, - "loss": 0.6941, + "learning_rate": 3.6042870096683324e-06, + "loss": 0.7917, "step": 25735 }, { - "epoch": 0.7303064699205448, + "epoch": 0.7292924140667063, "grad_norm": 0.0, - "learning_rate": 3.5784625932466978e-06, - "loss": 0.8725, + "learning_rate": 3.603581502828508e-06, + "loss": 0.7293, "step": 25736 }, { - "epoch": 0.7303348467650397, + "epoch": 0.7293207515089688, "grad_norm": 0.0, - "learning_rate": 3.577758077293959e-06, - "loss": 0.8534, + "learning_rate": 3.602876049868097e-06, + "loss": 0.8424, "step": 25737 }, { - "epoch": 0.7303632236095347, + "epoch": 0.7293490889512313, "grad_norm": 0.0, - "learning_rate": 3.5770536155901838e-06, - "loss": 0.7543, + "learning_rate": 3.602170650793032e-06, + "loss": 0.8587, "step": 25738 }, { - "epoch": 0.7303916004540295, + "epoch": 0.7293774263934937, "grad_norm": 0.0, - "learning_rate": 3.5763492081413242e-06, - "loss": 0.7945, + "learning_rate": 3.6014653056092598e-06, + "loss": 0.8432, "step": 25739 }, { - "epoch": 0.7304199772985244, + "epoch": 0.7294057638357562, "grad_norm": 0.0, - "learning_rate": 3.575644854953335e-06, - "loss": 0.8057, + "learning_rate": 3.600760014322725e-06, + "loss": 0.8315, "step": 25740 }, { - "epoch": 0.7304483541430193, + "epoch": 0.7294341012780187, "grad_norm": 0.0, - "learning_rate": 3.5749405560321606e-06, - "loss": 0.8781, + "learning_rate": 3.600054776939361e-06, + "loss": 0.7737, "step": 25741 }, { - "epoch": 0.7304767309875142, + "epoch": 0.7294624387202812, "grad_norm": 0.0, - "learning_rate": 3.574236311383751e-06, - "loss": 0.7694, + "learning_rate": 3.5993495934651157e-06, + "loss": 0.8318, "step": 25742 }, { - "epoch": 0.7305051078320091, + "epoch": 0.7294907761625435, "grad_norm": 0.0, - "learning_rate": 3.5735321210140596e-06, - "loss": 0.8175, + "learning_rate": 3.5986444639059214e-06, + "loss": 0.8063, "step": 25743 }, { - "epoch": 0.7305334846765039, + "epoch": 0.729519113604806, "grad_norm": 0.0, - "learning_rate": 3.572827984929028e-06, - "loss": 0.8049, + "learning_rate": 3.597939388267724e-06, + "loss": 0.8634, "step": 25744 }, { - "epoch": 0.7305618615209989, + "epoch": 0.7295474510470685, "grad_norm": 0.0, - "learning_rate": 3.5721239031346067e-06, - "loss": 0.8047, + "learning_rate": 3.5972343665564625e-06, + "loss": 0.8081, "step": 25745 }, { - "epoch": 0.7305902383654937, + "epoch": 0.7295757884893309, "grad_norm": 0.0, - "learning_rate": 3.571419875636748e-06, - "loss": 0.7794, + "learning_rate": 3.596529398778069e-06, + "loss": 0.7734, "step": 25746 }, { - "epoch": 0.7306186152099886, + "epoch": 0.7296041259315934, "grad_norm": 0.0, - "learning_rate": 3.570715902441391e-06, - "loss": 0.6967, + "learning_rate": 3.595824484938488e-06, + "loss": 0.8627, "step": 25747 }, { - "epoch": 0.7306469920544836, + "epoch": 0.7296324633738559, "grad_norm": 0.0, - "learning_rate": 3.5700119835544856e-06, - "loss": 0.7296, + "learning_rate": 3.595119625043655e-06, + "loss": 0.7954, "step": 25748 }, { - "epoch": 0.7306753688989784, + "epoch": 0.7296608008161183, "grad_norm": 0.0, - "learning_rate": 3.5693081189819813e-06, - "loss": 0.8377, + "learning_rate": 3.5944148190995077e-06, + "loss": 0.9048, "step": 25749 }, { - "epoch": 0.7307037457434733, + "epoch": 0.7296891382583808, "grad_norm": 0.0, - "learning_rate": 3.568604308729817e-06, - "loss": 0.72, + "learning_rate": 3.5937100671119864e-06, + "loss": 0.8785, "step": 25750 }, { - "epoch": 0.7307321225879683, + "epoch": 0.7297174757006433, "grad_norm": 0.0, - "learning_rate": 3.5679005528039423e-06, - "loss": 0.7724, + "learning_rate": 3.5930053690870217e-06, + "loss": 0.6731, "step": 25751 }, { - "epoch": 0.7307604994324631, + "epoch": 0.7297458131429058, "grad_norm": 0.0, - "learning_rate": 3.5671968512102993e-06, - "loss": 0.8563, + "learning_rate": 3.5923007250305507e-06, + "loss": 0.8014, "step": 25752 }, { - "epoch": 0.730788876276958, + "epoch": 0.7297741505851681, "grad_norm": 0.0, - "learning_rate": 3.566493203954834e-06, - "loss": 0.8084, + "learning_rate": 3.591596134948514e-06, + "loss": 0.7245, "step": 25753 }, { - "epoch": 0.7308172531214528, + "epoch": 0.7298024880274306, "grad_norm": 0.0, - "learning_rate": 3.5657896110434942e-06, - "loss": 0.8785, + "learning_rate": 3.5908915988468386e-06, + "loss": 0.8781, "step": 25754 }, { - "epoch": 0.7308456299659478, + "epoch": 0.7298308254696931, "grad_norm": 0.0, - "learning_rate": 3.5650860724822144e-06, - "loss": 0.7968, + "learning_rate": 3.590187116731464e-06, + "loss": 0.7834, "step": 25755 }, { - "epoch": 0.7308740068104427, + "epoch": 0.7298591629119555, "grad_norm": 0.0, - "learning_rate": 3.564382588276942e-06, - "loss": 0.735, + "learning_rate": 3.5894826886083268e-06, + "loss": 0.7641, "step": 25756 }, { - "epoch": 0.7309023836549375, + "epoch": 0.729887500354218, "grad_norm": 0.0, - "learning_rate": 3.5636791584336227e-06, - "loss": 0.7628, + "learning_rate": 3.5887783144833544e-06, + "loss": 0.8021, "step": 25757 }, { - "epoch": 0.7309307604994325, + "epoch": 0.7299158377964805, "grad_norm": 0.0, - "learning_rate": 3.5629757829581912e-06, - "loss": 0.7875, + "learning_rate": 3.5880739943624855e-06, + "loss": 0.8023, "step": 25758 }, { - "epoch": 0.7309591373439274, + "epoch": 0.729944175238743, "grad_norm": 0.0, - "learning_rate": 3.562272461856593e-06, - "loss": 0.8559, + "learning_rate": 3.587369728251647e-06, + "loss": 0.7828, "step": 25759 }, { - "epoch": 0.7309875141884222, + "epoch": 0.7299725126810054, "grad_norm": 0.0, - "learning_rate": 3.561569195134772e-06, - "loss": 0.7475, + "learning_rate": 3.5866655161567754e-06, + "loss": 0.8782, "step": 25760 }, { - "epoch": 0.7310158910329171, + "epoch": 0.7300008501232679, "grad_norm": 0.0, - "learning_rate": 3.5608659827986624e-06, - "loss": 0.7799, + "learning_rate": 3.5859613580838015e-06, + "loss": 0.7994, "step": 25761 }, { - "epoch": 0.7310442678774121, + "epoch": 0.7300291875655304, "grad_norm": 0.0, - "learning_rate": 3.5601628248542065e-06, - "loss": 0.8652, + "learning_rate": 3.5852572540386564e-06, + "loss": 0.826, "step": 25762 }, { - "epoch": 0.7310726447219069, + "epoch": 0.7300575250077928, "grad_norm": 0.0, - "learning_rate": 3.559459721307349e-06, - "loss": 0.899, + "learning_rate": 3.5845532040272758e-06, + "loss": 0.7223, "step": 25763 }, { - "epoch": 0.7311010215664018, + "epoch": 0.7300858624500552, "grad_norm": 0.0, - "learning_rate": 3.558756672164021e-06, - "loss": 0.8884, + "learning_rate": 3.583849208055582e-06, + "loss": 0.8365, "step": 25764 }, { - "epoch": 0.7311293984108967, + "epoch": 0.7301141998923177, "grad_norm": 0.0, - "learning_rate": 3.5580536774301643e-06, - "loss": 0.8119, + "learning_rate": 3.5831452661295085e-06, + "loss": 0.8321, "step": 25765 }, { - "epoch": 0.7311577752553916, + "epoch": 0.7301425373345802, "grad_norm": 0.0, - "learning_rate": 3.5573507371117223e-06, - "loss": 0.8198, + "learning_rate": 3.5824413782549893e-06, + "loss": 0.8318, "step": 25766 }, { - "epoch": 0.7311861520998865, + "epoch": 0.7301708747768426, "grad_norm": 0.0, - "learning_rate": 3.556647851214621e-06, - "loss": 0.8593, + "learning_rate": 3.5817375444379454e-06, + "loss": 0.846, "step": 25767 }, { - "epoch": 0.7312145289443814, + "epoch": 0.7301992122191051, "grad_norm": 0.0, - "learning_rate": 3.555945019744811e-06, - "loss": 0.8029, + "learning_rate": 3.5810337646843096e-06, + "loss": 0.7359, "step": 25768 }, { - "epoch": 0.7312429057888763, + "epoch": 0.7302275496613676, "grad_norm": 0.0, - "learning_rate": 3.55524224270822e-06, - "loss": 0.8431, + "learning_rate": 3.580330039000014e-06, + "loss": 0.8145, "step": 25769 }, { - "epoch": 0.7312712826333712, + "epoch": 0.73025588710363, "grad_norm": 0.0, - "learning_rate": 3.554539520110788e-06, - "loss": 0.8231, + "learning_rate": 3.579626367390978e-06, + "loss": 0.8527, "step": 25770 }, { - "epoch": 0.731299659477866, + "epoch": 0.7302842245458925, "grad_norm": 0.0, - "learning_rate": 3.5538368519584543e-06, - "loss": 0.8671, + "learning_rate": 3.5789227498631376e-06, + "loss": 0.7578, "step": 25771 }, { - "epoch": 0.731328036322361, + "epoch": 0.730312561988155, "grad_norm": 0.0, - "learning_rate": 3.5531342382571467e-06, - "loss": 0.9106, + "learning_rate": 3.5782191864224113e-06, + "loss": 0.738, "step": 25772 }, { - "epoch": 0.7313564131668558, + "epoch": 0.7303408994304174, "grad_norm": 0.0, - "learning_rate": 3.5524316790128032e-06, - "loss": 0.8569, + "learning_rate": 3.57751567707473e-06, + "loss": 0.8239, "step": 25773 }, { - "epoch": 0.7313847900113507, + "epoch": 0.7303692368726799, "grad_norm": 0.0, - "learning_rate": 3.551729174231363e-06, - "loss": 0.8385, + "learning_rate": 3.5768122218260193e-06, + "loss": 0.9188, "step": 25774 }, { - "epoch": 0.7314131668558457, + "epoch": 0.7303975743149423, "grad_norm": 0.0, - "learning_rate": 3.5510267239187525e-06, - "loss": 0.7703, + "learning_rate": 3.5761088206822035e-06, + "loss": 0.8367, "step": 25775 }, { - "epoch": 0.7314415437003405, + "epoch": 0.7304259117572048, "grad_norm": 0.0, - "learning_rate": 3.5503243280809097e-06, - "loss": 0.8301, + "learning_rate": 3.5754054736492096e-06, + "loss": 0.8741, "step": 25776 }, { - "epoch": 0.7314699205448354, + "epoch": 0.7304542491994672, "grad_norm": 0.0, - "learning_rate": 3.5496219867237712e-06, - "loss": 0.8335, + "learning_rate": 3.574702180732964e-06, + "loss": 0.9326, "step": 25777 }, { - "epoch": 0.7314982973893303, + "epoch": 0.7304825866417297, "grad_norm": 0.0, - "learning_rate": 3.5489196998532616e-06, - "loss": 0.8494, + "learning_rate": 3.573998941939384e-06, + "loss": 0.7753, "step": 25778 }, { - "epoch": 0.7315266742338252, + "epoch": 0.7305109240839922, "grad_norm": 0.0, - "learning_rate": 3.548217467475317e-06, - "loss": 0.755, + "learning_rate": 3.573295757274401e-06, + "loss": 0.8791, "step": 25779 }, { - "epoch": 0.7315550510783201, + "epoch": 0.7305392615262546, "grad_norm": 0.0, - "learning_rate": 3.547515289595873e-06, - "loss": 0.8358, + "learning_rate": 3.5725926267439304e-06, + "loss": 0.6748, "step": 25780 }, { - "epoch": 0.7315834279228149, + "epoch": 0.7305675989685171, "grad_norm": 0.0, - "learning_rate": 3.5468131662208547e-06, - "loss": 0.9581, + "learning_rate": 3.5718895503538997e-06, + "loss": 0.7674, "step": 25781 }, { - "epoch": 0.7316118047673099, + "epoch": 0.7305959364107796, "grad_norm": 0.0, - "learning_rate": 3.546111097356192e-06, - "loss": 0.822, + "learning_rate": 3.5711865281102333e-06, + "loss": 0.8139, "step": 25782 }, { - "epoch": 0.7316401816118048, + "epoch": 0.7306242738530421, "grad_norm": 0.0, - "learning_rate": 3.5454090830078257e-06, - "loss": 0.8702, + "learning_rate": 3.570483560018847e-06, + "loss": 0.8177, "step": 25783 }, { - "epoch": 0.7316685584562996, + "epoch": 0.7306526112953045, "grad_norm": 0.0, - "learning_rate": 3.5447071231816753e-06, - "loss": 0.7927, + "learning_rate": 3.5697806460856655e-06, + "loss": 0.8825, "step": 25784 }, { - "epoch": 0.7316969353007946, + "epoch": 0.730680948737567, "grad_norm": 0.0, - "learning_rate": 3.544005217883675e-06, - "loss": 0.8221, + "learning_rate": 3.5690777863166115e-06, + "loss": 0.7308, "step": 25785 }, { - "epoch": 0.7317253121452895, + "epoch": 0.7307092861798294, "grad_norm": 0.0, - "learning_rate": 3.543303367119755e-06, - "loss": 0.8164, + "learning_rate": 3.5683749807176015e-06, + "loss": 0.8382, "step": 25786 }, { - "epoch": 0.7317536889897843, + "epoch": 0.7307376236220918, "grad_norm": 0.0, - "learning_rate": 3.5426015708958396e-06, - "loss": 0.8639, + "learning_rate": 3.5676722292945567e-06, + "loss": 0.7584, "step": 25787 }, { - "epoch": 0.7317820658342792, + "epoch": 0.7307659610643543, "grad_norm": 0.0, - "learning_rate": 3.5418998292178573e-06, - "loss": 0.8023, + "learning_rate": 3.566969532053397e-06, + "loss": 0.7953, "step": 25788 }, { - "epoch": 0.7318104426787742, + "epoch": 0.7307942985066168, "grad_norm": 0.0, - "learning_rate": 3.541198142091742e-06, - "loss": 0.7972, + "learning_rate": 3.5662668890000416e-06, + "loss": 0.8474, "step": 25789 }, { - "epoch": 0.731838819523269, + "epoch": 0.7308226359488793, "grad_norm": 0.0, - "learning_rate": 3.5404965095234136e-06, - "loss": 0.876, + "learning_rate": 3.565564300140414e-06, + "loss": 0.7517, "step": 25790 }, { - "epoch": 0.7318671963677639, + "epoch": 0.7308509733911417, "grad_norm": 0.0, - "learning_rate": 3.5397949315187995e-06, - "loss": 0.7978, + "learning_rate": 3.5648617654804228e-06, + "loss": 0.824, "step": 25791 }, { - "epoch": 0.7318955732122588, + "epoch": 0.7308793108334042, "grad_norm": 0.0, - "learning_rate": 3.5390934080838333e-06, - "loss": 0.885, + "learning_rate": 3.5641592850259943e-06, + "loss": 0.9108, "step": 25792 }, { - "epoch": 0.7319239500567537, + "epoch": 0.7309076482756667, "grad_norm": 0.0, - "learning_rate": 3.5383919392244315e-06, - "loss": 0.8362, + "learning_rate": 3.5634568587830386e-06, + "loss": 0.8123, "step": 25793 }, { - "epoch": 0.7319523269012486, + "epoch": 0.7309359857179291, "grad_norm": 0.0, - "learning_rate": 3.5376905249465267e-06, - "loss": 0.8934, + "learning_rate": 3.562754486757477e-06, + "loss": 0.7462, "step": 25794 }, { - "epoch": 0.7319807037457434, + "epoch": 0.7309643231601916, "grad_norm": 0.0, - "learning_rate": 3.5369891652560375e-06, - "loss": 0.8991, + "learning_rate": 3.562052168955227e-06, + "loss": 0.8199, "step": 25795 }, { - "epoch": 0.7320090805902384, + "epoch": 0.730992660602454, "grad_norm": 0.0, - "learning_rate": 3.5362878601588915e-06, - "loss": 0.8585, + "learning_rate": 3.5613499053821997e-06, + "loss": 0.7394, "step": 25796 }, { - "epoch": 0.7320374574347333, + "epoch": 0.7310209980447164, "grad_norm": 0.0, - "learning_rate": 3.5355866096610115e-06, - "loss": 0.8244, + "learning_rate": 3.5606476960443126e-06, + "loss": 0.8026, "step": 25797 }, { - "epoch": 0.7320658342792281, + "epoch": 0.7310493354869789, "grad_norm": 0.0, - "learning_rate": 3.534885413768323e-06, - "loss": 0.8345, + "learning_rate": 3.5599455409474847e-06, + "loss": 0.9248, "step": 25798 }, { - "epoch": 0.7320942111237231, + "epoch": 0.7310776729292414, "grad_norm": 0.0, - "learning_rate": 3.534184272486747e-06, - "loss": 0.8191, + "learning_rate": 3.559243440097623e-06, + "loss": 0.8516, "step": 25799 }, { - "epoch": 0.7321225879682179, + "epoch": 0.7311060103715039, "grad_norm": 0.0, - "learning_rate": 3.533483185822212e-06, - "loss": 0.8533, + "learning_rate": 3.5585413935006465e-06, + "loss": 0.7289, "step": 25800 }, { - "epoch": 0.7321509648127128, + "epoch": 0.7311343478137663, "grad_norm": 0.0, - "learning_rate": 3.5327821537806305e-06, - "loss": 0.8166, + "learning_rate": 3.5578394011624674e-06, + "loss": 0.795, "step": 25801 }, { - "epoch": 0.7321793416572078, + "epoch": 0.7311626852560288, "grad_norm": 0.0, - "learning_rate": 3.53208117636793e-06, - "loss": 0.9557, + "learning_rate": 3.557137463088999e-06, + "loss": 0.807, "step": 25802 }, { - "epoch": 0.7322077185017026, + "epoch": 0.7311910226982913, "grad_norm": 0.0, - "learning_rate": 3.5313802535900334e-06, - "loss": 0.939, + "learning_rate": 3.5564355792861573e-06, + "loss": 0.8168, "step": 25803 }, { - "epoch": 0.7322360953461975, + "epoch": 0.7312193601405537, "grad_norm": 0.0, - "learning_rate": 3.5306793854528543e-06, - "loss": 0.6973, + "learning_rate": 3.5557337497598487e-06, + "loss": 0.8682, "step": 25804 }, { - "epoch": 0.7322644721906924, + "epoch": 0.7312476975828162, "grad_norm": 0.0, - "learning_rate": 3.529978571962318e-06, - "loss": 0.8763, + "learning_rate": 3.555031974515988e-06, + "loss": 0.8301, "step": 25805 }, { - "epoch": 0.7322928490351873, + "epoch": 0.7312760350250787, "grad_norm": 0.0, - "learning_rate": 3.5292778131243475e-06, - "loss": 0.8249, + "learning_rate": 3.5543302535604897e-06, + "loss": 0.9084, "step": 25806 }, { - "epoch": 0.7323212258796822, + "epoch": 0.7313043724673411, "grad_norm": 0.0, - "learning_rate": 3.5285771089448538e-06, - "loss": 0.8893, + "learning_rate": 3.553628586899257e-06, + "loss": 0.9161, "step": 25807 }, { - "epoch": 0.732349602724177, + "epoch": 0.7313327099096035, "grad_norm": 0.0, - "learning_rate": 3.5278764594297608e-06, - "loss": 0.7897, + "learning_rate": 3.552926974538208e-06, + "loss": 0.8125, "step": 25808 }, { - "epoch": 0.732377979568672, + "epoch": 0.731361047351866, "grad_norm": 0.0, - "learning_rate": 3.5271758645849897e-06, - "loss": 0.8014, + "learning_rate": 3.5522254164832458e-06, + "loss": 0.7974, "step": 25809 }, { - "epoch": 0.7324063564131669, + "epoch": 0.7313893847941285, "grad_norm": 0.0, - "learning_rate": 3.5264753244164517e-06, - "loss": 0.8518, + "learning_rate": 3.5515239127402845e-06, + "loss": 0.8519, "step": 25810 }, { - "epoch": 0.7324347332576617, + "epoch": 0.7314177222363909, "grad_norm": 0.0, - "learning_rate": 3.5257748389300673e-06, - "loss": 0.7736, + "learning_rate": 3.5508224633152333e-06, + "loss": 0.9138, "step": 25811 }, { - "epoch": 0.7324631101021566, + "epoch": 0.7314460596786534, "grad_norm": 0.0, - "learning_rate": 3.5250744081317567e-06, - "loss": 0.8479, + "learning_rate": 3.5501210682139977e-06, + "loss": 0.7748, "step": 25812 }, { - "epoch": 0.7324914869466516, + "epoch": 0.7314743971209159, "grad_norm": 0.0, - "learning_rate": 3.5243740320274277e-06, - "loss": 0.7913, + "learning_rate": 3.549419727442487e-06, + "loss": 0.7746, "step": 25813 }, { - "epoch": 0.7325198637911464, + "epoch": 0.7315027345631784, "grad_norm": 0.0, - "learning_rate": 3.523673710623009e-06, - "loss": 0.8277, + "learning_rate": 3.5487184410066076e-06, + "loss": 0.8485, "step": 25814 }, { - "epoch": 0.7325482406356413, + "epoch": 0.7315310720054408, "grad_norm": 0.0, - "learning_rate": 3.5229734439244068e-06, - "loss": 0.8877, + "learning_rate": 3.54801720891227e-06, + "loss": 0.6954, "step": 25815 }, { - "epoch": 0.7325766174801362, + "epoch": 0.7315594094477033, "grad_norm": 0.0, - "learning_rate": 3.5222732319375385e-06, - "loss": 0.828, + "learning_rate": 3.5473160311653833e-06, + "loss": 0.7725, "step": 25816 }, { - "epoch": 0.7326049943246311, + "epoch": 0.7315877468899658, "grad_norm": 0.0, - "learning_rate": 3.521573074668323e-06, - "loss": 0.8669, + "learning_rate": 3.546614907771845e-06, + "loss": 0.7196, "step": 25817 }, { - "epoch": 0.732633371169126, + "epoch": 0.7316160843322281, "grad_norm": 0.0, - "learning_rate": 3.5208729721226663e-06, - "loss": 0.7274, + "learning_rate": 3.545913838737567e-06, + "loss": 0.9089, "step": 25818 }, { - "epoch": 0.7326617480136209, + "epoch": 0.7316444217744906, "grad_norm": 0.0, - "learning_rate": 3.5201729243064885e-06, - "loss": 0.8565, + "learning_rate": 3.545212824068456e-06, + "loss": 0.8164, "step": 25819 }, { - "epoch": 0.7326901248581158, + "epoch": 0.7316727592167531, "grad_norm": 0.0, - "learning_rate": 3.5194729312257035e-06, - "loss": 0.8158, + "learning_rate": 3.5445118637704112e-06, + "loss": 0.7598, "step": 25820 }, { - "epoch": 0.7327185017026107, + "epoch": 0.7317010966590155, "grad_norm": 0.0, - "learning_rate": 3.51877299288622e-06, - "loss": 0.8571, + "learning_rate": 3.543810957849343e-06, + "loss": 0.8507, "step": 25821 }, { - "epoch": 0.7327468785471055, + "epoch": 0.731729434101278, "grad_norm": 0.0, - "learning_rate": 3.5180731092939514e-06, - "loss": 0.8974, + "learning_rate": 3.5431101063111495e-06, + "loss": 0.9435, "step": 25822 }, { - "epoch": 0.7327752553916005, + "epoch": 0.7317577715435405, "grad_norm": 0.0, - "learning_rate": 3.5173732804548144e-06, - "loss": 0.7795, + "learning_rate": 3.5424093091617375e-06, + "loss": 0.8064, "step": 25823 }, { - "epoch": 0.7328036322360953, + "epoch": 0.731786108985803, "grad_norm": 0.0, - "learning_rate": 3.5166735063747125e-06, - "loss": 0.8244, + "learning_rate": 3.5417085664070127e-06, + "loss": 0.8952, "step": 25824 }, { - "epoch": 0.7328320090805902, + "epoch": 0.7318144464280654, "grad_norm": 0.0, - "learning_rate": 3.5159737870595623e-06, - "loss": 0.8177, + "learning_rate": 3.5410078780528723e-06, + "loss": 0.799, "step": 25825 }, { - "epoch": 0.7328603859250852, + "epoch": 0.7318427838703279, "grad_norm": 0.0, - "learning_rate": 3.5152741225152755e-06, - "loss": 0.9066, + "learning_rate": 3.54030724410522e-06, + "loss": 0.834, "step": 25826 }, { - "epoch": 0.73288876276958, + "epoch": 0.7318711213125904, "grad_norm": 0.0, - "learning_rate": 3.514574512747756e-06, - "loss": 0.8496, + "learning_rate": 3.53960666456996e-06, + "loss": 0.8056, "step": 25827 }, { - "epoch": 0.7329171396140749, + "epoch": 0.7318994587548527, "grad_norm": 0.0, - "learning_rate": 3.5138749577629184e-06, - "loss": 0.7777, + "learning_rate": 3.538906139452991e-06, + "loss": 0.8799, "step": 25828 }, { - "epoch": 0.7329455164585698, + "epoch": 0.7319277961971152, "grad_norm": 0.0, - "learning_rate": 3.5131754575666687e-06, - "loss": 0.9174, + "learning_rate": 3.5382056687602185e-06, + "loss": 0.8271, "step": 25829 }, { - "epoch": 0.7329738933030647, + "epoch": 0.7319561336393777, "grad_norm": 0.0, - "learning_rate": 3.5124760121649195e-06, - "loss": 0.8329, + "learning_rate": 3.537505252497535e-06, + "loss": 0.9871, "step": 25830 }, { - "epoch": 0.7330022701475596, + "epoch": 0.7319844710816402, "grad_norm": 0.0, - "learning_rate": 3.5117766215635795e-06, - "loss": 0.7276, + "learning_rate": 3.536804890670846e-06, + "loss": 0.8384, "step": 25831 }, { - "epoch": 0.7330306469920544, + "epoch": 0.7320128085239026, "grad_norm": 0.0, - "learning_rate": 3.5110772857685505e-06, - "loss": 0.9026, + "learning_rate": 3.536104583286052e-06, + "loss": 0.7978, "step": 25832 }, { - "epoch": 0.7330590238365494, + "epoch": 0.7320411459661651, "grad_norm": 0.0, - "learning_rate": 3.510378004785744e-06, - "loss": 0.8273, + "learning_rate": 3.535404330349046e-06, + "loss": 0.8332, "step": 25833 }, { - "epoch": 0.7330874006810443, + "epoch": 0.7320694834084276, "grad_norm": 0.0, - "learning_rate": 3.5096787786210686e-06, - "loss": 0.7614, + "learning_rate": 3.5347041318657304e-06, + "loss": 0.751, "step": 25834 }, { - "epoch": 0.7331157775255391, + "epoch": 0.73209782085069, "grad_norm": 0.0, - "learning_rate": 3.508979607280426e-06, - "loss": 0.882, + "learning_rate": 3.534003987842005e-06, + "loss": 0.7213, "step": 25835 }, { - "epoch": 0.7331441543700341, + "epoch": 0.7321261582929525, "grad_norm": 0.0, - "learning_rate": 3.508280490769723e-06, - "loss": 0.8601, + "learning_rate": 3.533303898283763e-06, + "loss": 0.9025, "step": 25836 }, { - "epoch": 0.733172531214529, + "epoch": 0.732154495735215, "grad_norm": 0.0, - "learning_rate": 3.507581429094872e-06, - "loss": 0.7929, + "learning_rate": 3.5326038631969064e-06, + "loss": 0.8133, "step": 25837 }, { - "epoch": 0.7332009080590238, + "epoch": 0.7321828331774775, "grad_norm": 0.0, - "learning_rate": 3.506882422261767e-06, - "loss": 0.9541, + "learning_rate": 3.5319038825873254e-06, + "loss": 0.7165, "step": 25838 }, { - "epoch": 0.7332292849035187, + "epoch": 0.7322111706197398, "grad_norm": 0.0, - "learning_rate": 3.50618347027632e-06, - "loss": 0.8878, + "learning_rate": 3.5312039564609203e-06, + "loss": 0.8219, "step": 25839 }, { - "epoch": 0.7332576617480137, + "epoch": 0.7322395080620023, "grad_norm": 0.0, - "learning_rate": 3.505484573144435e-06, - "loss": 0.7949, + "learning_rate": 3.530504084823586e-06, + "loss": 0.7973, "step": 25840 }, { - "epoch": 0.7332860385925085, + "epoch": 0.7322678455042648, "grad_norm": 0.0, - "learning_rate": 3.504785730872011e-06, - "loss": 0.7886, + "learning_rate": 3.5298042676812195e-06, + "loss": 0.7939, "step": 25841 }, { - "epoch": 0.7333144154370034, + "epoch": 0.7322961829465272, "grad_norm": 0.0, - "learning_rate": 3.504086943464954e-06, - "loss": 0.8642, + "learning_rate": 3.529104505039713e-06, + "loss": 0.794, "step": 25842 }, { - "epoch": 0.7333427922814983, + "epoch": 0.7323245203887897, "grad_norm": 0.0, - "learning_rate": 3.5033882109291693e-06, - "loss": 0.8181, + "learning_rate": 3.528404796904966e-06, + "loss": 0.8331, "step": 25843 }, { - "epoch": 0.7333711691259932, + "epoch": 0.7323528578310522, "grad_norm": 0.0, - "learning_rate": 3.5026895332705504e-06, - "loss": 0.8829, + "learning_rate": 3.5277051432828648e-06, + "loss": 0.8925, "step": 25844 }, { - "epoch": 0.7333995459704881, + "epoch": 0.7323811952733146, "grad_norm": 0.0, - "learning_rate": 3.5019909104950124e-06, - "loss": 0.7758, + "learning_rate": 3.5270055441793106e-06, + "loss": 0.8223, "step": 25845 }, { - "epoch": 0.7334279228149829, + "epoch": 0.7324095327155771, "grad_norm": 0.0, - "learning_rate": 3.5012923426084443e-06, - "loss": 0.7906, + "learning_rate": 3.526305999600188e-06, + "loss": 0.7833, "step": 25846 }, { - "epoch": 0.7334562996594779, + "epoch": 0.7324378701578396, "grad_norm": 0.0, - "learning_rate": 3.500593829616753e-06, - "loss": 0.8471, + "learning_rate": 3.5256065095513947e-06, + "loss": 0.9341, "step": 25847 }, { - "epoch": 0.7334846765039728, + "epoch": 0.7324662076001021, "grad_norm": 0.0, - "learning_rate": 3.499895371525841e-06, - "loss": 0.7176, + "learning_rate": 3.5249070740388246e-06, + "loss": 0.7918, "step": 25848 }, { - "epoch": 0.7335130533484676, + "epoch": 0.7324945450423644, "grad_norm": 0.0, - "learning_rate": 3.4991969683416015e-06, - "loss": 0.8539, + "learning_rate": 3.5242076930683644e-06, + "loss": 0.7361, "step": 25849 }, { - "epoch": 0.7335414301929626, + "epoch": 0.7325228824846269, "grad_norm": 0.0, - "learning_rate": 3.498498620069938e-06, - "loss": 0.7727, + "learning_rate": 3.5235083666459104e-06, + "loss": 0.8474, "step": 25850 }, { - "epoch": 0.7335698070374574, + "epoch": 0.7325512199268894, "grad_norm": 0.0, - "learning_rate": 3.4978003267167516e-06, - "loss": 0.938, + "learning_rate": 3.5228090947773473e-06, + "loss": 0.8838, "step": 25851 }, { - "epoch": 0.7335981838819523, + "epoch": 0.7325795573691518, "grad_norm": 0.0, - "learning_rate": 3.4971020882879348e-06, - "loss": 0.8665, + "learning_rate": 3.522109877468568e-06, + "loss": 0.8568, "step": 25852 }, { - "epoch": 0.7336265607264473, + "epoch": 0.7326078948114143, "grad_norm": 0.0, - "learning_rate": 3.4964039047893893e-06, - "loss": 0.8591, + "learning_rate": 3.521410714725463e-06, + "loss": 0.8479, "step": 25853 }, { - "epoch": 0.7336549375709421, + "epoch": 0.7326362322536768, "grad_norm": 0.0, - "learning_rate": 3.495705776227015e-06, - "loss": 0.8517, + "learning_rate": 3.5207116065539214e-06, + "loss": 0.8182, "step": 25854 }, { - "epoch": 0.733683314415437, + "epoch": 0.7326645696959393, "grad_norm": 0.0, - "learning_rate": 3.495007702606704e-06, - "loss": 0.8407, + "learning_rate": 3.520012552959832e-06, + "loss": 0.8316, "step": 25855 }, { - "epoch": 0.7337116912599319, + "epoch": 0.7326929071382017, "grad_norm": 0.0, - "learning_rate": 3.494309683934355e-06, - "loss": 0.7791, + "learning_rate": 3.5193135539490854e-06, + "loss": 0.8605, "step": 25856 }, { - "epoch": 0.7337400681044268, + "epoch": 0.7327212445804642, "grad_norm": 0.0, - "learning_rate": 3.493611720215868e-06, - "loss": 0.8385, + "learning_rate": 3.518614609527565e-06, + "loss": 0.8022, "step": 25857 }, { - "epoch": 0.7337684449489217, + "epoch": 0.7327495820227267, "grad_norm": 0.0, - "learning_rate": 3.4929138114571325e-06, - "loss": 0.9118, + "learning_rate": 3.517915719701164e-06, + "loss": 0.8857, "step": 25858 }, { - "epoch": 0.7337968217934165, + "epoch": 0.732777919464989, "grad_norm": 0.0, - "learning_rate": 3.492215957664046e-06, - "loss": 0.8186, + "learning_rate": 3.5172168844757625e-06, + "loss": 0.8463, "step": 25859 }, { - "epoch": 0.7338251986379115, + "epoch": 0.7328062569072515, "grad_norm": 0.0, - "learning_rate": 3.491518158842503e-06, - "loss": 0.8054, + "learning_rate": 3.5165181038572505e-06, + "loss": 0.7564, "step": 25860 }, { - "epoch": 0.7338535754824064, + "epoch": 0.732834594349514, "grad_norm": 0.0, - "learning_rate": 3.4908204149983993e-06, - "loss": 0.8098, + "learning_rate": 3.515819377851517e-06, + "loss": 0.7805, "step": 25861 }, { - "epoch": 0.7338819523269012, + "epoch": 0.7328629317917765, "grad_norm": 0.0, - "learning_rate": 3.4901227261376326e-06, - "loss": 0.6579, + "learning_rate": 3.515120706464441e-06, + "loss": 0.8261, "step": 25862 }, { - "epoch": 0.7339103291713961, + "epoch": 0.7328912692340389, "grad_norm": 0.0, - "learning_rate": 3.4894250922660865e-06, - "loss": 0.7321, + "learning_rate": 3.5144220897019122e-06, + "loss": 0.8148, "step": 25863 }, { - "epoch": 0.7339387060158911, + "epoch": 0.7329196066763014, "grad_norm": 0.0, - "learning_rate": 3.4887275133896605e-06, - "loss": 0.9133, + "learning_rate": 3.513723527569818e-06, + "loss": 0.7952, "step": 25864 }, { - "epoch": 0.7339670828603859, + "epoch": 0.7329479441185639, "grad_norm": 0.0, - "learning_rate": 3.48802998951425e-06, - "loss": 0.8855, + "learning_rate": 3.5130250200740355e-06, + "loss": 0.7081, "step": 25865 }, { - "epoch": 0.7339954597048808, + "epoch": 0.7329762815608263, "grad_norm": 0.0, - "learning_rate": 3.487332520645739e-06, - "loss": 0.8071, + "learning_rate": 3.512326567220452e-06, + "loss": 0.6999, "step": 25866 }, { - "epoch": 0.7340238365493758, + "epoch": 0.7330046190030888, "grad_norm": 0.0, - "learning_rate": 3.4866351067900216e-06, - "loss": 0.8444, + "learning_rate": 3.5116281690149514e-06, + "loss": 0.7844, "step": 25867 }, { - "epoch": 0.7340522133938706, + "epoch": 0.7330329564453513, "grad_norm": 0.0, - "learning_rate": 3.4859377479529944e-06, - "loss": 0.7224, + "learning_rate": 3.510929825463415e-06, + "loss": 0.7589, "step": 25868 }, { - "epoch": 0.7340805902383655, + "epoch": 0.7330612938876137, "grad_norm": 0.0, - "learning_rate": 3.485240444140541e-06, - "loss": 0.7109, + "learning_rate": 3.510231536571731e-06, + "loss": 0.8419, "step": 25869 }, { - "epoch": 0.7341089670828603, + "epoch": 0.7330896313298761, "grad_norm": 0.0, - "learning_rate": 3.484543195358554e-06, - "loss": 0.7838, + "learning_rate": 3.5095333023457723e-06, + "loss": 0.758, "step": 25870 }, { - "epoch": 0.7341373439273553, + "epoch": 0.7331179687721386, "grad_norm": 0.0, - "learning_rate": 3.4838460016129273e-06, - "loss": 0.8314, + "learning_rate": 3.508835122791425e-06, + "loss": 0.8338, "step": 25871 }, { - "epoch": 0.7341657207718502, + "epoch": 0.7331463062144011, "grad_norm": 0.0, - "learning_rate": 3.4831488629095433e-06, - "loss": 0.7311, + "learning_rate": 3.5081369979145743e-06, + "loss": 0.7645, "step": 25872 }, { - "epoch": 0.734194097616345, + "epoch": 0.7331746436566635, "grad_norm": 0.0, - "learning_rate": 3.4824517792542935e-06, - "loss": 0.8028, + "learning_rate": 3.507438927721092e-06, + "loss": 0.8703, "step": 25873 }, { - "epoch": 0.73422247446084, + "epoch": 0.733202981098926, "grad_norm": 0.0, - "learning_rate": 3.4817547506530656e-06, - "loss": 0.7758, + "learning_rate": 3.5067409122168663e-06, + "loss": 0.8819, "step": 25874 }, { - "epoch": 0.7342508513053349, + "epoch": 0.7332313185411885, "grad_norm": 0.0, - "learning_rate": 3.481057777111749e-06, - "loss": 0.7686, + "learning_rate": 3.5060429514077697e-06, + "loss": 0.807, "step": 25875 }, { - "epoch": 0.7342792281498297, + "epoch": 0.7332596559834509, "grad_norm": 0.0, - "learning_rate": 3.480360858636234e-06, - "loss": 0.8636, + "learning_rate": 3.505345045299684e-06, + "loss": 0.7937, "step": 25876 }, { - "epoch": 0.7343076049943247, + "epoch": 0.7332879934257134, "grad_norm": 0.0, - "learning_rate": 3.4796639952324007e-06, - "loss": 0.8305, + "learning_rate": 3.504647193898494e-06, + "loss": 0.8427, "step": 25877 }, { - "epoch": 0.7343359818388195, + "epoch": 0.7333163308679759, "grad_norm": 0.0, - "learning_rate": 3.4789671869061383e-06, - "loss": 0.8301, + "learning_rate": 3.5039493972100667e-06, + "loss": 0.7725, "step": 25878 }, { - "epoch": 0.7343643586833144, + "epoch": 0.7333446683102384, "grad_norm": 0.0, - "learning_rate": 3.4782704336633365e-06, - "loss": 0.7524, + "learning_rate": 3.5032516552402885e-06, + "loss": 0.8325, "step": 25879 }, { - "epoch": 0.7343927355278093, + "epoch": 0.7333730057525008, "grad_norm": 0.0, - "learning_rate": 3.4775737355098737e-06, - "loss": 0.8666, + "learning_rate": 3.5025539679950326e-06, + "loss": 0.7516, "step": 25880 }, { - "epoch": 0.7344211123723042, + "epoch": 0.7334013431947632, "grad_norm": 0.0, - "learning_rate": 3.4768770924516393e-06, - "loss": 0.7736, + "learning_rate": 3.501856335480177e-06, + "loss": 0.8894, "step": 25881 }, { - "epoch": 0.7344494892167991, + "epoch": 0.7334296806370257, "grad_norm": 0.0, - "learning_rate": 3.4761805044945206e-06, - "loss": 0.7909, + "learning_rate": 3.5011587577016027e-06, + "loss": 0.7808, "step": 25882 }, { - "epoch": 0.734477866061294, + "epoch": 0.7334580180792881, "grad_norm": 0.0, - "learning_rate": 3.4754839716443956e-06, - "loss": 0.7528, + "learning_rate": 3.500461234665178e-06, + "loss": 0.7608, "step": 25883 }, { - "epoch": 0.7345062429057889, + "epoch": 0.7334863555215506, "grad_norm": 0.0, - "learning_rate": 3.4747874939071503e-06, - "loss": 0.8647, + "learning_rate": 3.4997637663767827e-06, + "loss": 0.9291, "step": 25884 }, { - "epoch": 0.7345346197502838, + "epoch": 0.7335146929638131, "grad_norm": 0.0, - "learning_rate": 3.474091071288672e-06, - "loss": 0.9, + "learning_rate": 3.499066352842293e-06, + "loss": 0.7639, "step": 25885 }, { - "epoch": 0.7345629965947786, + "epoch": 0.7335430304060756, "grad_norm": 0.0, - "learning_rate": 3.4733947037948367e-06, - "loss": 0.8804, + "learning_rate": 3.498368994067578e-06, + "loss": 0.744, "step": 25886 }, { - "epoch": 0.7345913734392735, + "epoch": 0.733571367848338, "grad_norm": 0.0, - "learning_rate": 3.4726983914315294e-06, - "loss": 0.7332, + "learning_rate": 3.4976716900585194e-06, + "loss": 0.8599, "step": 25887 }, { - "epoch": 0.7346197502837685, + "epoch": 0.7335997052906005, "grad_norm": 0.0, - "learning_rate": 3.472002134204635e-06, - "loss": 0.8235, + "learning_rate": 3.496974440820984e-06, + "loss": 0.8463, "step": 25888 }, { - "epoch": 0.7346481271282633, + "epoch": 0.733628042732863, "grad_norm": 0.0, - "learning_rate": 3.4713059321200293e-06, - "loss": 0.8656, + "learning_rate": 3.4962772463608463e-06, + "loss": 0.8352, "step": 25889 }, { - "epoch": 0.7346765039727582, + "epoch": 0.7336563801751254, "grad_norm": 0.0, - "learning_rate": 3.470609785183592e-06, - "loss": 0.7643, + "learning_rate": 3.4955801066839847e-06, + "loss": 0.9104, "step": 25890 }, { - "epoch": 0.7347048808172532, + "epoch": 0.7336847176173878, "grad_norm": 0.0, - "learning_rate": 3.4699136934012145e-06, - "loss": 0.8361, + "learning_rate": 3.494883021796264e-06, + "loss": 0.8345, "step": 25891 }, { - "epoch": 0.734733257661748, + "epoch": 0.7337130550596503, "grad_norm": 0.0, - "learning_rate": 3.469217656778766e-06, - "loss": 0.7938, + "learning_rate": 3.494185991703558e-06, + "loss": 0.8315, "step": 25892 }, { - "epoch": 0.7347616345062429, + "epoch": 0.7337413925019127, "grad_norm": 0.0, - "learning_rate": 3.4685216753221295e-06, - "loss": 0.7911, + "learning_rate": 3.4934890164117407e-06, + "loss": 0.8596, "step": 25893 }, { - "epoch": 0.7347900113507378, + "epoch": 0.7337697299441752, "grad_norm": 0.0, - "learning_rate": 3.467825749037188e-06, - "loss": 0.8927, + "learning_rate": 3.4927920959266804e-06, + "loss": 0.847, "step": 25894 }, { - "epoch": 0.7348183881952327, + "epoch": 0.7337980673864377, "grad_norm": 0.0, - "learning_rate": 3.4671298779298123e-06, - "loss": 0.829, + "learning_rate": 3.4920952302542533e-06, + "loss": 0.7815, "step": 25895 }, { - "epoch": 0.7348467650397276, + "epoch": 0.7338264048287002, "grad_norm": 0.0, - "learning_rate": 3.4664340620058844e-06, - "loss": 0.7851, + "learning_rate": 3.4913984194003205e-06, + "loss": 0.7503, "step": 25896 }, { - "epoch": 0.7348751418842224, + "epoch": 0.7338547422709626, "grad_norm": 0.0, - "learning_rate": 3.4657383012712844e-06, - "loss": 0.7832, + "learning_rate": 3.4907016633707557e-06, + "loss": 0.734, "step": 25897 }, { - "epoch": 0.7349035187287174, + "epoch": 0.7338830797132251, "grad_norm": 0.0, - "learning_rate": 3.4650425957318846e-06, - "loss": 0.8512, + "learning_rate": 3.4900049621714315e-06, + "loss": 0.7383, "step": 25898 }, { - "epoch": 0.7349318955732123, + "epoch": 0.7339114171554876, "grad_norm": 0.0, - "learning_rate": 3.4643469453935653e-06, - "loss": 0.9693, + "learning_rate": 3.4893083158082096e-06, + "loss": 0.8352, "step": 25899 }, { - "epoch": 0.7349602724177071, + "epoch": 0.73393975459775, "grad_norm": 0.0, - "learning_rate": 3.4636513502621994e-06, - "loss": 0.7858, + "learning_rate": 3.4886117242869643e-06, + "loss": 0.8415, "step": 25900 }, { - "epoch": 0.7349886492622021, + "epoch": 0.7339680920400125, "grad_norm": 0.0, - "learning_rate": 3.4629558103436634e-06, - "loss": 0.7724, + "learning_rate": 3.4879151876135574e-06, + "loss": 0.6712, "step": 25901 }, { - "epoch": 0.735017026106697, + "epoch": 0.733996429482275, "grad_norm": 0.0, - "learning_rate": 3.4622603256438368e-06, - "loss": 0.8056, + "learning_rate": 3.487218705793859e-06, + "loss": 0.8392, "step": 25902 }, { - "epoch": 0.7350454029511918, + "epoch": 0.7340247669245374, "grad_norm": 0.0, - "learning_rate": 3.461564896168589e-06, - "loss": 0.8724, + "learning_rate": 3.4865222788337393e-06, + "loss": 0.8972, "step": 25903 }, { - "epoch": 0.7350737797956867, + "epoch": 0.7340531043667998, "grad_norm": 0.0, - "learning_rate": 3.4608695219237954e-06, - "loss": 0.7657, + "learning_rate": 3.4858259067390586e-06, + "loss": 0.8767, "step": 25904 }, { - "epoch": 0.7351021566401816, + "epoch": 0.7340814418090623, "grad_norm": 0.0, - "learning_rate": 3.4601742029153306e-06, - "loss": 0.8093, + "learning_rate": 3.4851295895156846e-06, + "loss": 0.8148, "step": 25905 }, { - "epoch": 0.7351305334846765, + "epoch": 0.7341097792513248, "grad_norm": 0.0, - "learning_rate": 3.459478939149069e-06, - "loss": 0.8617, + "learning_rate": 3.484433327169483e-06, + "loss": 0.7446, "step": 25906 }, { - "epoch": 0.7351589103291714, + "epoch": 0.7341381166935872, "grad_norm": 0.0, - "learning_rate": 3.4587837306308823e-06, - "loss": 0.9177, + "learning_rate": 3.48373711970632e-06, + "loss": 0.8096, "step": 25907 }, { - "epoch": 0.7351872871736663, + "epoch": 0.7341664541358497, "grad_norm": 0.0, - "learning_rate": 3.4580885773666475e-06, - "loss": 0.8471, + "learning_rate": 3.483040967132061e-06, + "loss": 0.9123, "step": 25908 }, { - "epoch": 0.7352156640181612, + "epoch": 0.7341947915781122, "grad_norm": 0.0, - "learning_rate": 3.457393479362229e-06, - "loss": 0.841, + "learning_rate": 3.482344869452565e-06, + "loss": 0.8642, "step": 25909 }, { - "epoch": 0.735244040862656, + "epoch": 0.7342231290203747, "grad_norm": 0.0, - "learning_rate": 3.4566984366235023e-06, - "loss": 0.7367, + "learning_rate": 3.4816488266736992e-06, + "loss": 0.9236, "step": 25910 }, { - "epoch": 0.735272417707151, + "epoch": 0.7342514664626371, "grad_norm": 0.0, - "learning_rate": 3.45600344915634e-06, - "loss": 0.8755, + "learning_rate": 3.480952838801328e-06, + "loss": 0.8131, "step": 25911 }, { - "epoch": 0.7353007945516459, + "epoch": 0.7342798039048996, "grad_norm": 0.0, - "learning_rate": 3.4553085169666088e-06, - "loss": 0.8166, + "learning_rate": 3.4802569058413093e-06, + "loss": 0.8617, "step": 25912 }, { - "epoch": 0.7353291713961407, + "epoch": 0.734308141347162, "grad_norm": 0.0, - "learning_rate": 3.4546136400601803e-06, - "loss": 0.805, + "learning_rate": 3.4795610277995075e-06, + "loss": 0.7665, "step": 25913 }, { - "epoch": 0.7353575482406356, + "epoch": 0.7343364787894244, "grad_norm": 0.0, - "learning_rate": 3.4539188184429275e-06, - "loss": 0.8872, + "learning_rate": 3.4788652046817885e-06, + "loss": 0.8354, "step": 25914 }, { - "epoch": 0.7353859250851306, + "epoch": 0.7343648162316869, "grad_norm": 0.0, - "learning_rate": 3.453224052120714e-06, - "loss": 0.7935, + "learning_rate": 3.4781694364940054e-06, + "loss": 0.7868, "step": 25915 }, { - "epoch": 0.7354143019296254, + "epoch": 0.7343931536739494, "grad_norm": 0.0, - "learning_rate": 3.4525293410994108e-06, - "loss": 0.7697, + "learning_rate": 3.4774737232420264e-06, + "loss": 0.8033, "step": 25916 }, { - "epoch": 0.7354426787741203, + "epoch": 0.7344214911162118, "grad_norm": 0.0, - "learning_rate": 3.4518346853848906e-06, - "loss": 0.7645, + "learning_rate": 3.4767780649317053e-06, + "loss": 0.7933, "step": 25917 }, { - "epoch": 0.7354710556186153, + "epoch": 0.7344498285584743, "grad_norm": 0.0, - "learning_rate": 3.4511400849830135e-06, - "loss": 0.9471, + "learning_rate": 3.4760824615689036e-06, + "loss": 0.7028, "step": 25918 }, { - "epoch": 0.7354994324631101, + "epoch": 0.7344781660007368, "grad_norm": 0.0, - "learning_rate": 3.4504455398996495e-06, - "loss": 0.8698, + "learning_rate": 3.4753869131594832e-06, + "loss": 0.8211, "step": 25919 }, { - "epoch": 0.735527809307605, + "epoch": 0.7345065034429993, "grad_norm": 0.0, - "learning_rate": 3.4497510501406673e-06, - "loss": 0.9215, + "learning_rate": 3.474691419709302e-06, + "loss": 0.8347, "step": 25920 }, { - "epoch": 0.7355561861520998, + "epoch": 0.7345348408852617, "grad_norm": 0.0, - "learning_rate": 3.449056615711932e-06, - "loss": 0.8848, + "learning_rate": 3.4739959812242175e-06, + "loss": 0.809, "step": 25921 }, { - "epoch": 0.7355845629965948, + "epoch": 0.7345631783275242, "grad_norm": 0.0, - "learning_rate": 3.4483622366193146e-06, - "loss": 0.7391, + "learning_rate": 3.473300597710091e-06, + "loss": 0.8826, "step": 25922 }, { - "epoch": 0.7356129398410897, + "epoch": 0.7345915157697867, "grad_norm": 0.0, - "learning_rate": 3.4476679128686718e-06, - "loss": 0.7298, + "learning_rate": 3.472605269172774e-06, + "loss": 0.82, "step": 25923 }, { - "epoch": 0.7356413166855845, + "epoch": 0.734619853212049, "grad_norm": 0.0, - "learning_rate": 3.446973644465872e-06, - "loss": 0.8087, + "learning_rate": 3.4719099956181313e-06, + "loss": 0.8896, "step": 25924 }, { - "epoch": 0.7356696935300795, + "epoch": 0.7346481906543115, "grad_norm": 0.0, - "learning_rate": 3.4462794314167846e-06, - "loss": 0.8091, + "learning_rate": 3.47121477705201e-06, + "loss": 0.7729, "step": 25925 }, { - "epoch": 0.7356980703745744, + "epoch": 0.734676528096574, "grad_norm": 0.0, - "learning_rate": 3.4455852737272654e-06, - "loss": 0.7833, + "learning_rate": 3.4705196134802723e-06, + "loss": 0.8434, "step": 25926 }, { - "epoch": 0.7357264472190692, + "epoch": 0.7347048655388365, "grad_norm": 0.0, - "learning_rate": 3.444891171403183e-06, - "loss": 0.8565, + "learning_rate": 3.4698245049087755e-06, + "loss": 0.8494, "step": 25927 }, { - "epoch": 0.7357548240635642, + "epoch": 0.7347332029810989, "grad_norm": 0.0, - "learning_rate": 3.4441971244504023e-06, - "loss": 0.7887, + "learning_rate": 3.469129451343367e-06, + "loss": 0.701, "step": 25928 }, { - "epoch": 0.735783200908059, + "epoch": 0.7347615404233614, "grad_norm": 0.0, - "learning_rate": 3.4435031328747793e-06, - "loss": 0.851, + "learning_rate": 3.4684344527899117e-06, + "loss": 0.7722, "step": 25929 }, { - "epoch": 0.7358115777525539, + "epoch": 0.7347898778656239, "grad_norm": 0.0, - "learning_rate": 3.4428091966821806e-06, - "loss": 0.7951, + "learning_rate": 3.4677395092542542e-06, + "loss": 0.8567, "step": 25930 }, { - "epoch": 0.7358399545970488, + "epoch": 0.7348182153078863, "grad_norm": 0.0, - "learning_rate": 3.4421153158784716e-06, - "loss": 0.7853, + "learning_rate": 3.4670446207422525e-06, + "loss": 0.7685, "step": 25931 }, { - "epoch": 0.7358683314415437, + "epoch": 0.7348465527501488, "grad_norm": 0.0, - "learning_rate": 3.4414214904695045e-06, - "loss": 0.9489, + "learning_rate": 3.4663497872597596e-06, + "loss": 0.9728, "step": 25932 }, { - "epoch": 0.7358967082860386, + "epoch": 0.7348748901924113, "grad_norm": 0.0, - "learning_rate": 3.4407277204611456e-06, - "loss": 0.709, + "learning_rate": 3.4656550088126294e-06, + "loss": 0.8317, "step": 25933 }, { - "epoch": 0.7359250851305335, + "epoch": 0.7349032276346736, "grad_norm": 0.0, - "learning_rate": 3.4400340058592587e-06, - "loss": 0.8476, + "learning_rate": 3.464960285406713e-06, + "loss": 0.7681, "step": 25934 }, { - "epoch": 0.7359534619750284, + "epoch": 0.7349315650769361, "grad_norm": 0.0, - "learning_rate": 3.439340346669695e-06, - "loss": 0.7719, + "learning_rate": 3.464265617047866e-06, + "loss": 0.7317, "step": 25935 }, { - "epoch": 0.7359818388195233, + "epoch": 0.7349599025191986, "grad_norm": 0.0, - "learning_rate": 3.438646742898318e-06, - "loss": 0.8978, + "learning_rate": 3.4635710037419345e-06, + "loss": 0.86, "step": 25936 }, { - "epoch": 0.7360102156640181, + "epoch": 0.7349882399614611, "grad_norm": 0.0, - "learning_rate": 3.437953194550988e-06, - "loss": 0.8167, + "learning_rate": 3.462876445494774e-06, + "loss": 0.7562, "step": 25937 }, { - "epoch": 0.736038592508513, + "epoch": 0.7350165774037235, "grad_norm": 0.0, - "learning_rate": 3.437259701633562e-06, - "loss": 0.9026, + "learning_rate": 3.4621819423122295e-06, + "loss": 0.7946, "step": 25938 }, { - "epoch": 0.736066969353008, + "epoch": 0.735044914845986, "grad_norm": 0.0, - "learning_rate": 3.4365662641519004e-06, - "loss": 0.9208, + "learning_rate": 3.4614874942001543e-06, + "loss": 0.957, "step": 25939 }, { - "epoch": 0.7360953461975028, + "epoch": 0.7350732522882485, "grad_norm": 0.0, - "learning_rate": 3.435872882111857e-06, - "loss": 0.8072, + "learning_rate": 3.4607931011644013e-06, + "loss": 0.8384, "step": 25940 }, { - "epoch": 0.7361237230419977, + "epoch": 0.7351015897305109, "grad_norm": 0.0, - "learning_rate": 3.435179555519289e-06, - "loss": 0.7649, + "learning_rate": 3.460098763210813e-06, + "loss": 0.8239, "step": 25941 }, { - "epoch": 0.7361520998864927, + "epoch": 0.7351299271727734, "grad_norm": 0.0, - "learning_rate": 3.4344862843800587e-06, - "loss": 0.7643, + "learning_rate": 3.459404480345242e-06, + "loss": 0.8275, "step": 25942 }, { - "epoch": 0.7361804767309875, + "epoch": 0.7351582646150359, "grad_norm": 0.0, - "learning_rate": 3.433793068700014e-06, - "loss": 0.7949, + "learning_rate": 3.458710252573535e-06, + "loss": 0.7444, "step": 25943 }, { - "epoch": 0.7362088535754824, + "epoch": 0.7351866020572984, "grad_norm": 0.0, - "learning_rate": 3.433099908485016e-06, - "loss": 0.8095, + "learning_rate": 3.458016079901544e-06, + "loss": 0.8831, "step": 25944 }, { - "epoch": 0.7362372304199774, + "epoch": 0.7352149394995607, "grad_norm": 0.0, - "learning_rate": 3.43240680374092e-06, - "loss": 0.7477, + "learning_rate": 3.4573219623351097e-06, + "loss": 0.8398, "step": 25945 }, { - "epoch": 0.7362656072644722, + "epoch": 0.7352432769418232, "grad_norm": 0.0, - "learning_rate": 3.4317137544735755e-06, - "loss": 0.6721, + "learning_rate": 3.456627899880082e-06, + "loss": 0.9684, "step": 25946 }, { - "epoch": 0.7362939841089671, + "epoch": 0.7352716143840857, "grad_norm": 0.0, - "learning_rate": 3.4310207606888413e-06, - "loss": 0.7711, + "learning_rate": 3.455933892542308e-06, + "loss": 0.759, "step": 25947 }, { - "epoch": 0.7363223609534619, + "epoch": 0.7352999518263481, "grad_norm": 0.0, - "learning_rate": 3.430327822392573e-06, - "loss": 0.7371, + "learning_rate": 3.455239940327635e-06, + "loss": 0.8983, "step": 25948 }, { - "epoch": 0.7363507377979569, + "epoch": 0.7353282892686106, "grad_norm": 0.0, - "learning_rate": 3.429634939590617e-06, - "loss": 0.7376, + "learning_rate": 3.454546043241904e-06, + "loss": 0.7625, "step": 25949 }, { - "epoch": 0.7363791146424518, + "epoch": 0.7353566267108731, "grad_norm": 0.0, - "learning_rate": 3.42894211228883e-06, - "loss": 0.8651, + "learning_rate": 3.4538522012909616e-06, + "loss": 0.847, "step": 25950 }, { - "epoch": 0.7364074914869466, + "epoch": 0.7353849641531356, "grad_norm": 0.0, - "learning_rate": 3.428249340493065e-06, - "loss": 0.8254, + "learning_rate": 3.4531584144806564e-06, + "loss": 0.8079, "step": 25951 }, { - "epoch": 0.7364358683314416, + "epoch": 0.735413301595398, "grad_norm": 0.0, - "learning_rate": 3.427556624209173e-06, - "loss": 0.7034, + "learning_rate": 3.452464682816826e-06, + "loss": 0.8861, "step": 25952 }, { - "epoch": 0.7364642451759364, + "epoch": 0.7354416390376605, "grad_norm": 0.0, - "learning_rate": 3.426863963443009e-06, - "loss": 0.8272, + "learning_rate": 3.45177100630532e-06, + "loss": 0.8539, "step": 25953 }, { - "epoch": 0.7364926220204313, + "epoch": 0.735469976479923, "grad_norm": 0.0, - "learning_rate": 3.4261713582004173e-06, - "loss": 0.7747, + "learning_rate": 3.4510773849519752e-06, + "loss": 0.7264, "step": 25954 }, { - "epoch": 0.7365209988649262, + "epoch": 0.7354983139221853, "grad_norm": 0.0, - "learning_rate": 3.4254788084872513e-06, - "loss": 0.7429, + "learning_rate": 3.450383818762638e-06, + "loss": 0.8147, "step": 25955 }, { - "epoch": 0.7365493757094211, + "epoch": 0.7355266513644478, "grad_norm": 0.0, - "learning_rate": 3.424786314309365e-06, - "loss": 0.8613, + "learning_rate": 3.449690307743149e-06, + "loss": 0.7364, "step": 25956 }, { - "epoch": 0.736577752553916, + "epoch": 0.7355549888067103, "grad_norm": 0.0, - "learning_rate": 3.4240938756726016e-06, - "loss": 0.8687, + "learning_rate": 3.4489968518993513e-06, + "loss": 0.7715, "step": 25957 }, { - "epoch": 0.7366061293984109, + "epoch": 0.7355833262489727, "grad_norm": 0.0, - "learning_rate": 3.4234014925828116e-06, - "loss": 0.8895, + "learning_rate": 3.4483034512370896e-06, + "loss": 0.8167, "step": 25958 }, { - "epoch": 0.7366345062429058, + "epoch": 0.7356116636912352, "grad_norm": 0.0, - "learning_rate": 3.422709165045849e-06, - "loss": 0.8457, + "learning_rate": 3.447610105762197e-06, + "loss": 0.7985, "step": 25959 }, { - "epoch": 0.7366628830874007, + "epoch": 0.7356400011334977, "grad_norm": 0.0, - "learning_rate": 3.4220168930675536e-06, - "loss": 0.8267, + "learning_rate": 3.4469168154805177e-06, + "loss": 0.89, "step": 25960 }, { - "epoch": 0.7366912599318955, + "epoch": 0.7356683385757602, "grad_norm": 0.0, - "learning_rate": 3.4213246766537776e-06, - "loss": 0.8789, + "learning_rate": 3.446223580397895e-06, + "loss": 0.8694, "step": 25961 }, { - "epoch": 0.7367196367763905, + "epoch": 0.7356966760180226, "grad_norm": 0.0, - "learning_rate": 3.4206325158103715e-06, - "loss": 0.686, + "learning_rate": 3.4455304005201617e-06, + "loss": 0.8591, "step": 25962 }, { - "epoch": 0.7367480136208854, + "epoch": 0.7357250134602851, "grad_norm": 0.0, - "learning_rate": 3.4199404105431755e-06, - "loss": 0.8447, + "learning_rate": 3.444837275853159e-06, + "loss": 0.8582, "step": 25963 }, { - "epoch": 0.7367763904653802, + "epoch": 0.7357533509025476, "grad_norm": 0.0, - "learning_rate": 3.4192483608580375e-06, - "loss": 0.7986, + "learning_rate": 3.4441442064027297e-06, + "loss": 0.8042, "step": 25964 }, { - "epoch": 0.7368047673098751, + "epoch": 0.73578168834481, "grad_norm": 0.0, - "learning_rate": 3.418556366760808e-06, - "loss": 0.88, + "learning_rate": 3.443451192174706e-06, + "loss": 0.8201, "step": 25965 }, { - "epoch": 0.7368331441543701, + "epoch": 0.7358100257870724, "grad_norm": 0.0, - "learning_rate": 3.4178644282573227e-06, - "loss": 0.715, + "learning_rate": 3.44275823317493e-06, + "loss": 0.8141, "step": 25966 }, { - "epoch": 0.7368615209988649, + "epoch": 0.7358383632293349, "grad_norm": 0.0, - "learning_rate": 3.4171725453534375e-06, - "loss": 0.757, + "learning_rate": 3.442065329409232e-06, + "loss": 0.8014, "step": 25967 }, { - "epoch": 0.7368898978433598, + "epoch": 0.7358667006715974, "grad_norm": 0.0, - "learning_rate": 3.4164807180549898e-06, - "loss": 0.8422, + "learning_rate": 3.441372480883455e-06, + "loss": 0.8844, "step": 25968 }, { - "epoch": 0.7369182746878548, + "epoch": 0.7358950381138598, "grad_norm": 0.0, - "learning_rate": 3.4157889463678252e-06, - "loss": 0.9041, + "learning_rate": 3.4406796876034323e-06, + "loss": 0.8212, "step": 25969 }, { - "epoch": 0.7369466515323496, + "epoch": 0.7359233755561223, "grad_norm": 0.0, - "learning_rate": 3.4150972302977914e-06, - "loss": 0.8363, + "learning_rate": 3.4399869495749995e-06, + "loss": 0.8208, "step": 25970 }, { - "epoch": 0.7369750283768445, + "epoch": 0.7359517129983848, "grad_norm": 0.0, - "learning_rate": 3.4144055698507227e-06, - "loss": 0.819, + "learning_rate": 3.439294266803993e-06, + "loss": 0.8112, "step": 25971 }, { - "epoch": 0.7370034052213393, + "epoch": 0.7359800504406472, "grad_norm": 0.0, - "learning_rate": 3.413713965032467e-06, - "loss": 0.856, + "learning_rate": 3.4386016392962507e-06, + "loss": 0.847, "step": 25972 }, { - "epoch": 0.7370317820658343, + "epoch": 0.7360083878829097, "grad_norm": 0.0, - "learning_rate": 3.413022415848868e-06, - "loss": 0.8104, + "learning_rate": 3.4379090670576e-06, + "loss": 0.8886, "step": 25973 }, { - "epoch": 0.7370601589103292, + "epoch": 0.7360367253251722, "grad_norm": 0.0, - "learning_rate": 3.412330922305762e-06, - "loss": 0.7335, + "learning_rate": 3.4372165500938813e-06, + "loss": 0.7421, "step": 25974 }, { - "epoch": 0.737088535754824, + "epoch": 0.7360650627674347, "grad_norm": 0.0, - "learning_rate": 3.411639484408993e-06, - "loss": 0.7954, + "learning_rate": 3.4365240884109217e-06, + "loss": 0.8244, "step": 25975 }, { - "epoch": 0.737116912599319, + "epoch": 0.736093400209697, "grad_norm": 0.0, - "learning_rate": 3.410948102164404e-06, - "loss": 0.835, + "learning_rate": 3.4358316820145564e-06, + "loss": 0.8674, "step": 25976 }, { - "epoch": 0.7371452894438139, + "epoch": 0.7361217376519595, "grad_norm": 0.0, - "learning_rate": 3.4102567755778304e-06, - "loss": 0.8441, + "learning_rate": 3.435139330910622e-06, + "loss": 0.7845, "step": 25977 }, { - "epoch": 0.7371736662883087, + "epoch": 0.736150075094222, "grad_norm": 0.0, - "learning_rate": 3.4095655046551137e-06, - "loss": 0.8432, + "learning_rate": 3.434447035104944e-06, + "loss": 0.8256, "step": 25978 }, { - "epoch": 0.7372020431328037, + "epoch": 0.7361784125364844, "grad_norm": 0.0, - "learning_rate": 3.4088742894020966e-06, - "loss": 0.8759, + "learning_rate": 3.4337547946033557e-06, + "loss": 0.8234, "step": 25979 }, { - "epoch": 0.7372304199772985, + "epoch": 0.7362067499787469, "grad_norm": 0.0, - "learning_rate": 3.4081831298246115e-06, - "loss": 0.7245, + "learning_rate": 3.4330626094116927e-06, + "loss": 0.9092, "step": 25980 }, { - "epoch": 0.7372587968217934, + "epoch": 0.7362350874210094, "grad_norm": 0.0, - "learning_rate": 3.4074920259284995e-06, - "loss": 0.8068, + "learning_rate": 3.4323704795357794e-06, + "loss": 0.6948, "step": 25981 }, { - "epoch": 0.7372871736662883, + "epoch": 0.7362634248632718, "grad_norm": 0.0, - "learning_rate": 3.4068009777195987e-06, - "loss": 0.8405, + "learning_rate": 3.431678404981448e-06, + "loss": 0.8849, "step": 25982 }, { - "epoch": 0.7373155505107832, + "epoch": 0.7362917623055343, "grad_norm": 0.0, - "learning_rate": 3.406109985203746e-06, - "loss": 0.7476, + "learning_rate": 3.430986385754528e-06, + "loss": 0.8648, "step": 25983 }, { - "epoch": 0.7373439273552781, + "epoch": 0.7363200997477968, "grad_norm": 0.0, - "learning_rate": 3.4054190483867833e-06, - "loss": 0.8914, + "learning_rate": 3.4302944218608493e-06, + "loss": 0.7755, "step": 25984 }, { - "epoch": 0.737372304199773, + "epoch": 0.7363484371900593, "grad_norm": 0.0, - "learning_rate": 3.404728167274538e-06, - "loss": 0.7889, + "learning_rate": 3.429602513306243e-06, + "loss": 0.8425, "step": 25985 }, { - "epoch": 0.7374006810442679, + "epoch": 0.7363767746323217, "grad_norm": 0.0, - "learning_rate": 3.4040373418728503e-06, - "loss": 0.8567, + "learning_rate": 3.4289106600965317e-06, + "loss": 0.7996, "step": 25986 }, { - "epoch": 0.7374290578887628, + "epoch": 0.7364051120745841, "grad_norm": 0.0, - "learning_rate": 3.403346572187559e-06, - "loss": 0.8417, + "learning_rate": 3.4282188622375488e-06, + "loss": 0.8132, "step": 25987 }, { - "epoch": 0.7374574347332576, + "epoch": 0.7364334495168466, "grad_norm": 0.0, - "learning_rate": 3.4026558582244928e-06, - "loss": 0.8149, + "learning_rate": 3.4275271197351166e-06, + "loss": 0.7923, "step": 25988 }, { - "epoch": 0.7374858115777525, + "epoch": 0.736461786959109, "grad_norm": 0.0, - "learning_rate": 3.4019651999894898e-06, - "loss": 0.7348, + "learning_rate": 3.4268354325950637e-06, + "loss": 0.8479, "step": 25989 }, { - "epoch": 0.7375141884222475, + "epoch": 0.7364901244013715, "grad_norm": 0.0, - "learning_rate": 3.4012745974883852e-06, - "loss": 0.8018, + "learning_rate": 3.4261438008232205e-06, + "loss": 0.8029, "step": 25990 }, { - "epoch": 0.7375425652667423, + "epoch": 0.736518461843634, "grad_norm": 0.0, - "learning_rate": 3.4005840507270084e-06, - "loss": 0.7655, + "learning_rate": 3.4254522244254053e-06, + "loss": 0.827, "step": 25991 }, { - "epoch": 0.7375709421112372, + "epoch": 0.7365467992858965, "grad_norm": 0.0, - "learning_rate": 3.399893559711195e-06, - "loss": 0.8176, + "learning_rate": 3.424760703407447e-06, + "loss": 0.843, "step": 25992 }, { - "epoch": 0.7375993189557322, + "epoch": 0.7365751367281589, "grad_norm": 0.0, - "learning_rate": 3.3992031244467806e-06, - "loss": 0.7223, + "learning_rate": 3.424069237775175e-06, + "loss": 0.8768, "step": 25993 }, { - "epoch": 0.737627695800227, + "epoch": 0.7366034741704214, "grad_norm": 0.0, - "learning_rate": 3.3985127449395893e-06, - "loss": 0.8637, + "learning_rate": 3.4233778275344065e-06, + "loss": 0.8125, "step": 25994 }, { - "epoch": 0.7376560726447219, + "epoch": 0.7366318116126839, "grad_norm": 0.0, - "learning_rate": 3.397822421195459e-06, - "loss": 0.8784, + "learning_rate": 3.4226864726909683e-06, + "loss": 0.7975, "step": 25995 }, { - "epoch": 0.7376844494892169, + "epoch": 0.7366601490549463, "grad_norm": 0.0, - "learning_rate": 3.3971321532202228e-06, - "loss": 0.8859, + "learning_rate": 3.421995173250684e-06, + "loss": 0.807, "step": 25996 }, { - "epoch": 0.7377128263337117, + "epoch": 0.7366884864972087, "grad_norm": 0.0, - "learning_rate": 3.3964419410197013e-06, - "loss": 0.8209, + "learning_rate": 3.4213039292193785e-06, + "loss": 0.8717, "step": 25997 }, { - "epoch": 0.7377412031782066, + "epoch": 0.7367168239394712, "grad_norm": 0.0, - "learning_rate": 3.3957517845997347e-06, - "loss": 0.9488, + "learning_rate": 3.4206127406028744e-06, + "loss": 0.8466, "step": 25998 }, { - "epoch": 0.7377695800227014, + "epoch": 0.7367451613817337, "grad_norm": 0.0, - "learning_rate": 3.3950616839661532e-06, - "loss": 0.7685, + "learning_rate": 3.4199216074069906e-06, + "loss": 0.7768, "step": 25999 }, { - "epoch": 0.7377979568671964, + "epoch": 0.7367734988239961, "grad_norm": 0.0, - "learning_rate": 3.3943716391247796e-06, - "loss": 0.8137, + "learning_rate": 3.4192305296375493e-06, + "loss": 0.8647, "step": 26000 }, { - "epoch": 0.7378263337116913, + "epoch": 0.7368018362662586, "grad_norm": 0.0, - "learning_rate": 3.393681650081445e-06, - "loss": 0.8641, + "learning_rate": 3.4185395073003768e-06, + "loss": 0.8513, "step": 26001 }, { - "epoch": 0.7378547105561861, + "epoch": 0.7368301737085211, "grad_norm": 0.0, - "learning_rate": 3.3929917168419825e-06, - "loss": 0.792, + "learning_rate": 3.4178485404012874e-06, + "loss": 0.7891, "step": 26002 }, { - "epoch": 0.7378830874006811, + "epoch": 0.7368585111507835, "grad_norm": 0.0, - "learning_rate": 3.392301839412212e-06, - "loss": 0.8175, + "learning_rate": 3.4171576289461063e-06, + "loss": 0.8108, "step": 26003 }, { - "epoch": 0.737911464245176, + "epoch": 0.736886848593046, "grad_norm": 0.0, - "learning_rate": 3.391612017797965e-06, - "loss": 0.856, + "learning_rate": 3.4164667729406487e-06, + "loss": 0.9887, "step": 26004 }, { - "epoch": 0.7379398410896708, + "epoch": 0.7369151860353085, "grad_norm": 0.0, - "learning_rate": 3.3909222520050723e-06, - "loss": 0.8401, + "learning_rate": 3.4157759723907347e-06, + "loss": 0.8151, "step": 26005 }, { - "epoch": 0.7379682179341657, + "epoch": 0.7369435234775709, "grad_norm": 0.0, - "learning_rate": 3.390232542039352e-06, - "loss": 0.7745, + "learning_rate": 3.4150852273021896e-06, + "loss": 0.7991, "step": 26006 }, { - "epoch": 0.7379965947786606, + "epoch": 0.7369718609198334, "grad_norm": 0.0, - "learning_rate": 3.3895428879066384e-06, - "loss": 0.8538, + "learning_rate": 3.414394537680823e-06, + "loss": 0.8664, "step": 26007 }, { - "epoch": 0.7380249716231555, + "epoch": 0.7370001983620958, "grad_norm": 0.0, - "learning_rate": 3.38885328961275e-06, - "loss": 0.8448, + "learning_rate": 3.413703903532456e-06, + "loss": 0.8108, "step": 26008 }, { - "epoch": 0.7380533484676504, + "epoch": 0.7370285358043583, "grad_norm": 0.0, - "learning_rate": 3.3881637471635154e-06, - "loss": 0.994, + "learning_rate": 3.413013324862907e-06, + "loss": 0.8617, "step": 26009 }, { - "epoch": 0.7380817253121453, + "epoch": 0.7370568732466207, "grad_norm": 0.0, - "learning_rate": 3.3874742605647613e-06, - "loss": 0.8802, + "learning_rate": 3.412322801677993e-06, + "loss": 0.931, "step": 26010 }, { - "epoch": 0.7381101021566402, + "epoch": 0.7370852106888832, "grad_norm": 0.0, - "learning_rate": 3.386784829822307e-06, - "loss": 0.7772, + "learning_rate": 3.4116323339835344e-06, + "loss": 0.7735, "step": 26011 }, { - "epoch": 0.738138479001135, + "epoch": 0.7371135481311457, "grad_norm": 0.0, - "learning_rate": 3.3860954549419746e-06, - "loss": 0.7852, + "learning_rate": 3.410941921785339e-06, + "loss": 0.8043, "step": 26012 }, { - "epoch": 0.73816685584563, + "epoch": 0.7371418855734081, "grad_norm": 0.0, - "learning_rate": 3.3854061359295985e-06, - "loss": 0.7874, + "learning_rate": 3.4102515650892266e-06, + "loss": 0.7571, "step": 26013 }, { - "epoch": 0.7381952326901249, + "epoch": 0.7371702230156706, "grad_norm": 0.0, - "learning_rate": 3.3847168727909896e-06, - "loss": 0.8182, + "learning_rate": 3.4095612639010158e-06, + "loss": 0.8048, "step": 26014 }, { - "epoch": 0.7382236095346197, + "epoch": 0.7371985604579331, "grad_norm": 0.0, - "learning_rate": 3.384027665531976e-06, - "loss": 0.8126, + "learning_rate": 3.4088710182265138e-06, + "loss": 0.8411, "step": 26015 }, { - "epoch": 0.7382519863791146, + "epoch": 0.7372268979001956, "grad_norm": 0.0, - "learning_rate": 3.3833385141583808e-06, - "loss": 0.8795, + "learning_rate": 3.4081808280715435e-06, + "loss": 0.7796, "step": 26016 }, { - "epoch": 0.7382803632236096, + "epoch": 0.737255235342458, "grad_norm": 0.0, - "learning_rate": 3.3826494186760195e-06, - "loss": 0.8432, + "learning_rate": 3.4074906934419094e-06, + "loss": 0.9478, "step": 26017 }, { - "epoch": 0.7383087400681044, + "epoch": 0.7372835727847205, "grad_norm": 0.0, - "learning_rate": 3.381960379090715e-06, - "loss": 0.7857, + "learning_rate": 3.4068006143434296e-06, + "loss": 0.8084, "step": 26018 }, { - "epoch": 0.7383371169125993, + "epoch": 0.737311910226983, "grad_norm": 0.0, - "learning_rate": 3.381271395408293e-06, - "loss": 0.8122, + "learning_rate": 3.4061105907819202e-06, + "loss": 0.7949, "step": 26019 }, { - "epoch": 0.7383654937570943, + "epoch": 0.7373402476692453, "grad_norm": 0.0, - "learning_rate": 3.3805824676345656e-06, - "loss": 0.7438, + "learning_rate": 3.4054206227631857e-06, + "loss": 0.8687, "step": 26020 }, { - "epoch": 0.7383938706015891, + "epoch": 0.7373685851115078, "grad_norm": 0.0, - "learning_rate": 3.3798935957753555e-06, - "loss": 0.7259, + "learning_rate": 3.4047307102930425e-06, + "loss": 0.7951, "step": 26021 }, { - "epoch": 0.738422247446084, + "epoch": 0.7373969225537703, "grad_norm": 0.0, - "learning_rate": 3.379204779836486e-06, - "loss": 0.8201, + "learning_rate": 3.4040408533773017e-06, + "loss": 0.7644, "step": 26022 }, { - "epoch": 0.7384506242905788, + "epoch": 0.7374252599960328, "grad_norm": 0.0, - "learning_rate": 3.3785160198237663e-06, - "loss": 0.875, + "learning_rate": 3.403351052021775e-06, + "loss": 0.8536, "step": 26023 }, { - "epoch": 0.7384790011350738, + "epoch": 0.7374535974382952, "grad_norm": 0.0, - "learning_rate": 3.377827315743021e-06, - "loss": 0.8328, + "learning_rate": 3.4026613062322743e-06, + "loss": 0.8022, "step": 26024 }, { - "epoch": 0.7385073779795687, + "epoch": 0.7374819348805577, "grad_norm": 0.0, - "learning_rate": 3.3771386676000682e-06, - "loss": 0.8794, + "learning_rate": 3.4019716160146043e-06, + "loss": 0.8329, "step": 26025 }, { - "epoch": 0.7385357548240635, + "epoch": 0.7375102723228202, "grad_norm": 0.0, - "learning_rate": 3.3764500754007203e-06, - "loss": 0.8149, + "learning_rate": 3.401281981374578e-06, + "loss": 0.8508, "step": 26026 }, { - "epoch": 0.7385641316685585, + "epoch": 0.7375386097650826, "grad_norm": 0.0, - "learning_rate": 3.375761539150796e-06, - "loss": 0.8008, + "learning_rate": 3.400592402318006e-06, + "loss": 0.8365, "step": 26027 }, { - "epoch": 0.7385925085130534, + "epoch": 0.7375669472073451, "grad_norm": 0.0, - "learning_rate": 3.375073058856112e-06, - "loss": 0.8178, + "learning_rate": 3.399902878850693e-06, + "loss": 0.8106, "step": 26028 }, { - "epoch": 0.7386208853575482, + "epoch": 0.7375952846496076, "grad_norm": 0.0, - "learning_rate": 3.374384634522484e-06, - "loss": 0.7751, + "learning_rate": 3.399213410978447e-06, + "loss": 0.7682, "step": 26029 }, { - "epoch": 0.7386492622020431, + "epoch": 0.7376236220918699, "grad_norm": 0.0, - "learning_rate": 3.3736962661557293e-06, - "loss": 0.8844, + "learning_rate": 3.398523998707083e-06, + "loss": 0.8368, "step": 26030 }, { - "epoch": 0.738677639046538, + "epoch": 0.7376519595341324, "grad_norm": 0.0, - "learning_rate": 3.3730079537616576e-06, - "loss": 0.8016, + "learning_rate": 3.397834642042398e-06, + "loss": 0.816, "step": 26031 }, { - "epoch": 0.7387060158910329, + "epoch": 0.7376802969763949, "grad_norm": 0.0, - "learning_rate": 3.3723196973460846e-06, - "loss": 0.8365, + "learning_rate": 3.3971453409902067e-06, + "loss": 0.8565, "step": 26032 }, { - "epoch": 0.7387343927355278, + "epoch": 0.7377086344186574, "grad_norm": 0.0, - "learning_rate": 3.371631496914829e-06, - "loss": 0.8649, + "learning_rate": 3.3964560955563097e-06, + "loss": 0.8395, "step": 26033 }, { - "epoch": 0.7387627695800227, + "epoch": 0.7377369718609198, "grad_norm": 0.0, - "learning_rate": 3.3709433524736967e-06, - "loss": 0.8274, + "learning_rate": 3.395766905746515e-06, + "loss": 0.8071, "step": 26034 }, { - "epoch": 0.7387911464245176, + "epoch": 0.7377653093031823, "grad_norm": 0.0, - "learning_rate": 3.370255264028505e-06, - "loss": 0.778, + "learning_rate": 3.3950777715666285e-06, + "loss": 0.757, "step": 26035 }, { - "epoch": 0.7388195232690125, + "epoch": 0.7377936467454448, "grad_norm": 0.0, - "learning_rate": 3.369567231585067e-06, - "loss": 0.8275, + "learning_rate": 3.3943886930224536e-06, + "loss": 0.8353, "step": 26036 }, { - "epoch": 0.7388479001135074, + "epoch": 0.7378219841877072, "grad_norm": 0.0, - "learning_rate": 3.36887925514919e-06, - "loss": 0.8128, + "learning_rate": 3.3936996701197955e-06, + "loss": 0.7597, "step": 26037 }, { - "epoch": 0.7388762769580023, + "epoch": 0.7378503216299697, "grad_norm": 0.0, - "learning_rate": 3.368191334726687e-06, - "loss": 0.8908, + "learning_rate": 3.393010702864462e-06, + "loss": 0.7251, "step": 26038 }, { - "epoch": 0.7389046538024971, + "epoch": 0.7378786590722322, "grad_norm": 0.0, - "learning_rate": 3.3675034703233743e-06, - "loss": 0.7917, + "learning_rate": 3.3923217912622495e-06, + "loss": 0.8294, "step": 26039 }, { - "epoch": 0.738933030646992, + "epoch": 0.7379069965144947, "grad_norm": 0.0, - "learning_rate": 3.366815661945054e-06, - "loss": 0.7289, + "learning_rate": 3.391632935318968e-06, + "loss": 0.8593, "step": 26040 }, { - "epoch": 0.738961407491487, + "epoch": 0.737935333956757, "grad_norm": 0.0, - "learning_rate": 3.366127909597541e-06, - "loss": 0.8628, + "learning_rate": 3.3909441350404125e-06, + "loss": 0.803, "step": 26041 }, { - "epoch": 0.7389897843359818, + "epoch": 0.7379636713990195, "grad_norm": 0.0, - "learning_rate": 3.3654402132866458e-06, - "loss": 0.7908, + "learning_rate": 3.390255390432389e-06, + "loss": 0.8332, "step": 26042 }, { - "epoch": 0.7390181611804767, + "epoch": 0.737992008841282, "grad_norm": 0.0, - "learning_rate": 3.3647525730181695e-06, - "loss": 0.7751, + "learning_rate": 3.3895667015007027e-06, + "loss": 0.7971, "step": 26043 }, { - "epoch": 0.7390465380249717, + "epoch": 0.7380203462835444, "grad_norm": 0.0, - "learning_rate": 3.3640649887979327e-06, - "loss": 0.8969, + "learning_rate": 3.388878068251147e-06, + "loss": 0.8484, "step": 26044 }, { - "epoch": 0.7390749148694665, + "epoch": 0.7380486837258069, "grad_norm": 0.0, - "learning_rate": 3.3633774606317348e-06, - "loss": 0.7889, + "learning_rate": 3.3881894906895287e-06, + "loss": 0.82, "step": 26045 }, { - "epoch": 0.7391032917139614, + "epoch": 0.7380770211680694, "grad_norm": 0.0, - "learning_rate": 3.362689988525385e-06, - "loss": 0.7429, + "learning_rate": 3.387500968821643e-06, + "loss": 0.7809, "step": 26046 }, { - "epoch": 0.7391316685584562, + "epoch": 0.7381053586103319, "grad_norm": 0.0, - "learning_rate": 3.362002572484695e-06, - "loss": 0.901, + "learning_rate": 3.3868125026532917e-06, + "loss": 0.8026, "step": 26047 }, { - "epoch": 0.7391600454029512, + "epoch": 0.7381336960525943, "grad_norm": 0.0, - "learning_rate": 3.3613152125154636e-06, - "loss": 0.8985, + "learning_rate": 3.3861240921902747e-06, + "loss": 0.7395, "step": 26048 }, { - "epoch": 0.7391884222474461, + "epoch": 0.7381620334948568, "grad_norm": 0.0, - "learning_rate": 3.3606279086235015e-06, - "loss": 0.7848, + "learning_rate": 3.3854357374383905e-06, + "loss": 0.796, "step": 26049 }, { - "epoch": 0.7392167990919409, + "epoch": 0.7381903709371193, "grad_norm": 0.0, - "learning_rate": 3.3599406608146178e-06, - "loss": 0.7113, + "learning_rate": 3.3847474384034383e-06, + "loss": 0.7709, "step": 26050 }, { - "epoch": 0.7392451759364359, + "epoch": 0.7382187083793816, "grad_norm": 0.0, - "learning_rate": 3.35925346909461e-06, - "loss": 0.7578, + "learning_rate": 3.3840591950912172e-06, + "loss": 0.7708, "step": 26051 }, { - "epoch": 0.7392735527809308, + "epoch": 0.7382470458216441, "grad_norm": 0.0, - "learning_rate": 3.3585663334692864e-06, - "loss": 0.7825, + "learning_rate": 3.383371007507519e-06, + "loss": 0.8345, "step": 26052 }, { - "epoch": 0.7393019296254256, + "epoch": 0.7382753832639066, "grad_norm": 0.0, - "learning_rate": 3.357879253944455e-06, - "loss": 0.8974, + "learning_rate": 3.3826828756581476e-06, + "loss": 0.8306, "step": 26053 }, { - "epoch": 0.7393303064699206, + "epoch": 0.738303720706169, "grad_norm": 0.0, - "learning_rate": 3.3571922305259132e-06, - "loss": 0.8923, + "learning_rate": 3.381994799548892e-06, + "loss": 0.7842, "step": 26054 }, { - "epoch": 0.7393586833144155, + "epoch": 0.7383320581484315, "grad_norm": 0.0, - "learning_rate": 3.356505263219466e-06, - "loss": 0.8714, + "learning_rate": 3.3813067791855513e-06, + "loss": 0.7913, "step": 26055 }, { - "epoch": 0.7393870601589103, + "epoch": 0.738360395590694, "grad_norm": 0.0, - "learning_rate": 3.355818352030923e-06, - "loss": 0.8006, + "learning_rate": 3.380618814573925e-06, + "loss": 0.8211, "step": 26056 }, { - "epoch": 0.7394154370034052, + "epoch": 0.7383887330329565, "grad_norm": 0.0, - "learning_rate": 3.355131496966075e-06, - "loss": 0.748, + "learning_rate": 3.3799309057198016e-06, + "loss": 0.8382, "step": 26057 }, { - "epoch": 0.7394438138479001, + "epoch": 0.7384170704752189, "grad_norm": 0.0, - "learning_rate": 3.3544446980307322e-06, - "loss": 0.8314, + "learning_rate": 3.379243052628979e-06, + "loss": 0.8374, "step": 26058 }, { - "epoch": 0.739472190692395, + "epoch": 0.7384454079174814, "grad_norm": 0.0, - "learning_rate": 3.3537579552306933e-06, - "loss": 0.7448, + "learning_rate": 3.378555255307252e-06, + "loss": 0.8151, "step": 26059 }, { - "epoch": 0.7395005675368899, + "epoch": 0.7384737453597439, "grad_norm": 0.0, - "learning_rate": 3.353071268571759e-06, - "loss": 0.8351, + "learning_rate": 3.377867513760411e-06, + "loss": 0.715, "step": 26060 }, { - "epoch": 0.7395289443813848, + "epoch": 0.7385020828020062, "grad_norm": 0.0, - "learning_rate": 3.3523846380597347e-06, - "loss": 0.8818, + "learning_rate": 3.3771798279942513e-06, + "loss": 0.8684, "step": 26061 }, { - "epoch": 0.7395573212258797, + "epoch": 0.7385304202442687, "grad_norm": 0.0, - "learning_rate": 3.3516980637004127e-06, - "loss": 0.8707, + "learning_rate": 3.376492198014565e-06, + "loss": 0.749, "step": 26062 }, { - "epoch": 0.7395856980703746, + "epoch": 0.7385587576865312, "grad_norm": 0.0, - "learning_rate": 3.351011545499595e-06, - "loss": 0.8808, + "learning_rate": 3.3758046238271436e-06, + "loss": 0.7476, "step": 26063 }, { - "epoch": 0.7396140749148694, + "epoch": 0.7385870951287937, "grad_norm": 0.0, - "learning_rate": 3.3503250834630864e-06, - "loss": 0.7667, + "learning_rate": 3.375117105437784e-06, + "loss": 0.8554, "step": 26064 }, { - "epoch": 0.7396424517593644, + "epoch": 0.7386154325710561, "grad_norm": 0.0, - "learning_rate": 3.349638677596676e-06, - "loss": 0.8242, + "learning_rate": 3.3744296428522693e-06, + "loss": 0.7609, "step": 26065 }, { - "epoch": 0.7396708286038592, + "epoch": 0.7386437700133186, "grad_norm": 0.0, - "learning_rate": 3.3489523279061677e-06, - "loss": 0.827, + "learning_rate": 3.3737422360763938e-06, + "loss": 0.7339, "step": 26066 }, { - "epoch": 0.7396992054483541, + "epoch": 0.7386721074555811, "grad_norm": 0.0, - "learning_rate": 3.348266034397362e-06, - "loss": 0.8744, + "learning_rate": 3.3730548851159517e-06, + "loss": 0.8185, "step": 26067 }, { - "epoch": 0.7397275822928491, + "epoch": 0.7387004448978435, "grad_norm": 0.0, - "learning_rate": 3.3475797970760473e-06, - "loss": 0.7775, + "learning_rate": 3.372367589976726e-06, + "loss": 0.7019, "step": 26068 }, { - "epoch": 0.7397559591373439, + "epoch": 0.738728782340106, "grad_norm": 0.0, - "learning_rate": 3.346893615948026e-06, - "loss": 0.9059, + "learning_rate": 3.3716803506645125e-06, + "loss": 0.8078, "step": 26069 }, { - "epoch": 0.7397843359818388, + "epoch": 0.7387571197823685, "grad_norm": 0.0, - "learning_rate": 3.3462074910190955e-06, - "loss": 0.8536, + "learning_rate": 3.3709931671850935e-06, + "loss": 0.7923, "step": 26070 }, { - "epoch": 0.7398127128263338, + "epoch": 0.738785457224631, "grad_norm": 0.0, - "learning_rate": 3.345521422295047e-06, - "loss": 0.9263, + "learning_rate": 3.370306039544261e-06, + "loss": 0.8686, "step": 26071 }, { - "epoch": 0.7398410896708286, + "epoch": 0.7388137946668933, "grad_norm": 0.0, - "learning_rate": 3.3448354097816794e-06, - "loss": 0.8663, + "learning_rate": 3.3696189677478053e-06, + "loss": 0.8273, "step": 26072 }, { - "epoch": 0.7398694665153235, + "epoch": 0.7388421321091558, "grad_norm": 0.0, - "learning_rate": 3.3441494534847875e-06, - "loss": 0.8622, + "learning_rate": 3.3689319518015083e-06, + "loss": 0.8522, "step": 26073 }, { - "epoch": 0.7398978433598183, + "epoch": 0.7388704695514183, "grad_norm": 0.0, - "learning_rate": 3.343463553410159e-06, - "loss": 0.8397, + "learning_rate": 3.368244991711159e-06, + "loss": 0.8489, "step": 26074 }, { - "epoch": 0.7399262202043133, + "epoch": 0.7388988069936807, "grad_norm": 0.0, - "learning_rate": 3.3427777095635996e-06, - "loss": 0.8065, + "learning_rate": 3.367558087482545e-06, + "loss": 0.8238, "step": 26075 }, { - "epoch": 0.7399545970488082, + "epoch": 0.7389271444359432, "grad_norm": 0.0, - "learning_rate": 3.342091921950892e-06, - "loss": 0.7934, + "learning_rate": 3.366871239121453e-06, + "loss": 0.7658, "step": 26076 }, { - "epoch": 0.739982973893303, + "epoch": 0.7389554818782057, "grad_norm": 0.0, - "learning_rate": 3.3414061905778337e-06, - "loss": 0.8736, + "learning_rate": 3.36618444663367e-06, + "loss": 0.7682, "step": 26077 }, { - "epoch": 0.740011350737798, + "epoch": 0.7389838193204681, "grad_norm": 0.0, - "learning_rate": 3.340720515450221e-06, - "loss": 0.7987, + "learning_rate": 3.365497710024976e-06, + "loss": 0.8577, "step": 26078 }, { - "epoch": 0.7400397275822929, + "epoch": 0.7390121567627306, "grad_norm": 0.0, - "learning_rate": 3.340034896573837e-06, - "loss": 0.812, + "learning_rate": 3.3648110293011592e-06, + "loss": 0.8181, "step": 26079 }, { - "epoch": 0.7400681044267877, + "epoch": 0.7390404942049931, "grad_norm": 0.0, - "learning_rate": 3.339349333954478e-06, - "loss": 0.8242, + "learning_rate": 3.3641244044680053e-06, + "loss": 0.8946, "step": 26080 }, { - "epoch": 0.7400964812712826, + "epoch": 0.7390688316472556, "grad_norm": 0.0, - "learning_rate": 3.3386638275979376e-06, - "loss": 0.7756, + "learning_rate": 3.3634378355312925e-06, + "loss": 0.7586, "step": 26081 }, { - "epoch": 0.7401248581157776, + "epoch": 0.739097169089518, "grad_norm": 0.0, - "learning_rate": 3.3379783775100005e-06, - "loss": 0.8389, + "learning_rate": 3.362751322496811e-06, + "loss": 0.8041, "step": 26082 }, { - "epoch": 0.7401532349602724, + "epoch": 0.7391255065317804, "grad_norm": 0.0, - "learning_rate": 3.3372929836964583e-06, - "loss": 0.8474, + "learning_rate": 3.362064865370336e-06, + "loss": 0.8302, "step": 26083 }, { - "epoch": 0.7401816118047673, + "epoch": 0.7391538439740429, "grad_norm": 0.0, - "learning_rate": 3.336607646163106e-06, - "loss": 0.7637, + "learning_rate": 3.361378464157654e-06, + "loss": 0.795, "step": 26084 }, { - "epoch": 0.7402099886492622, + "epoch": 0.7391821814163053, "grad_norm": 0.0, - "learning_rate": 3.3359223649157255e-06, - "loss": 0.7868, + "learning_rate": 3.360692118864549e-06, + "loss": 0.7388, "step": 26085 }, { - "epoch": 0.7402383654937571, + "epoch": 0.7392105188585678, "grad_norm": 0.0, - "learning_rate": 3.335237139960108e-06, - "loss": 0.8811, + "learning_rate": 3.3600058294967974e-06, + "loss": 0.8412, "step": 26086 }, { - "epoch": 0.740266742338252, + "epoch": 0.7392388563008303, "grad_norm": 0.0, - "learning_rate": 3.3345519713020445e-06, - "loss": 0.8896, + "learning_rate": 3.3593195960601822e-06, + "loss": 0.8183, "step": 26087 }, { - "epoch": 0.7402951191827469, + "epoch": 0.7392671937430928, "grad_norm": 0.0, - "learning_rate": 3.3338668589473176e-06, - "loss": 0.8096, + "learning_rate": 3.3586334185604828e-06, + "loss": 0.894, "step": 26088 }, { - "epoch": 0.7403234960272418, + "epoch": 0.7392955311853552, "grad_norm": 0.0, - "learning_rate": 3.333181802901717e-06, - "loss": 0.8246, + "learning_rate": 3.357947297003482e-06, + "loss": 0.8541, "step": 26089 }, { - "epoch": 0.7403518728717366, + "epoch": 0.7393238686276177, "grad_norm": 0.0, - "learning_rate": 3.33249680317103e-06, - "loss": 0.9209, + "learning_rate": 3.3572612313949606e-06, + "loss": 0.9086, "step": 26090 }, { - "epoch": 0.7403802497162315, + "epoch": 0.7393522060698802, "grad_norm": 0.0, - "learning_rate": 3.3318118597610417e-06, - "loss": 0.7435, + "learning_rate": 3.35657522174069e-06, + "loss": 0.8422, "step": 26091 }, { - "epoch": 0.7404086265607265, + "epoch": 0.7393805435121426, "grad_norm": 0.0, - "learning_rate": 3.331126972677542e-06, - "loss": 0.8364, + "learning_rate": 3.3558892680464538e-06, + "loss": 0.7365, "step": 26092 }, { - "epoch": 0.7404370034052213, + "epoch": 0.739408880954405, "grad_norm": 0.0, - "learning_rate": 3.330442141926308e-06, - "loss": 0.7618, + "learning_rate": 3.355203370318033e-06, + "loss": 0.8759, "step": 26093 }, { - "epoch": 0.7404653802497162, + "epoch": 0.7394372183966675, "grad_norm": 0.0, - "learning_rate": 3.32975736751313e-06, - "loss": 0.8018, + "learning_rate": 3.3545175285611986e-06, + "loss": 0.7865, "step": 26094 }, { - "epoch": 0.7404937570942112, + "epoch": 0.73946555583893, "grad_norm": 0.0, - "learning_rate": 3.329072649443795e-06, - "loss": 0.7968, + "learning_rate": 3.3538317427817315e-06, + "loss": 0.8346, "step": 26095 }, { - "epoch": 0.740522133938706, + "epoch": 0.7394938932811924, "grad_norm": 0.0, - "learning_rate": 3.328387987724079e-06, - "loss": 0.8088, + "learning_rate": 3.35314601298541e-06, + "loss": 0.7845, "step": 26096 }, { - "epoch": 0.7405505107832009, + "epoch": 0.7395222307234549, "grad_norm": 0.0, - "learning_rate": 3.3277033823597706e-06, - "loss": 0.8027, + "learning_rate": 3.3524603391780043e-06, + "loss": 0.844, "step": 26097 }, { - "epoch": 0.7405788876276957, + "epoch": 0.7395505681657174, "grad_norm": 0.0, - "learning_rate": 3.3270188333566543e-06, - "loss": 0.7285, + "learning_rate": 3.3517747213652973e-06, + "loss": 0.8643, "step": 26098 }, { - "epoch": 0.7406072644721907, + "epoch": 0.7395789056079798, "grad_norm": 0.0, - "learning_rate": 3.326334340720506e-06, - "loss": 0.8925, + "learning_rate": 3.351089159553057e-06, + "loss": 0.7595, "step": 26099 }, { - "epoch": 0.7406356413166856, + "epoch": 0.7396072430502423, "grad_norm": 0.0, - "learning_rate": 3.325649904457112e-06, - "loss": 0.8077, + "learning_rate": 3.350403653747062e-06, + "loss": 0.7286, "step": 26100 }, { - "epoch": 0.7406640181611804, + "epoch": 0.7396355804925048, "grad_norm": 0.0, - "learning_rate": 3.324965524572258e-06, - "loss": 0.707, + "learning_rate": 3.349718203953086e-06, + "loss": 0.8008, "step": 26101 }, { - "epoch": 0.7406923950056754, + "epoch": 0.7396639179347672, "grad_norm": 0.0, - "learning_rate": 3.3242812010717153e-06, - "loss": 0.9134, + "learning_rate": 3.3490328101769044e-06, + "loss": 0.8466, "step": 26102 }, { - "epoch": 0.7407207718501703, + "epoch": 0.7396922553770297, "grad_norm": 0.0, - "learning_rate": 3.3235969339612693e-06, - "loss": 0.7334, + "learning_rate": 3.3483474724242915e-06, + "loss": 0.7221, "step": 26103 }, { - "epoch": 0.7407491486946651, + "epoch": 0.7397205928192921, "grad_norm": 0.0, - "learning_rate": 3.3229127232467005e-06, - "loss": 0.8664, + "learning_rate": 3.3476621907010142e-06, + "loss": 0.87, "step": 26104 }, { - "epoch": 0.7407775255391601, + "epoch": 0.7397489302615546, "grad_norm": 0.0, - "learning_rate": 3.3222285689337886e-06, - "loss": 0.8262, + "learning_rate": 3.346976965012849e-06, + "loss": 0.844, "step": 26105 }, { - "epoch": 0.740805902383655, + "epoch": 0.739777267703817, "grad_norm": 0.0, - "learning_rate": 3.3215444710283116e-06, - "loss": 0.8698, + "learning_rate": 3.3462917953655706e-06, + "loss": 0.8325, "step": 26106 }, { - "epoch": 0.7408342792281498, + "epoch": 0.7398056051460795, "grad_norm": 0.0, - "learning_rate": 3.3208604295360526e-06, - "loss": 0.8588, + "learning_rate": 3.3456066817649446e-06, + "loss": 0.7698, "step": 26107 }, { - "epoch": 0.7408626560726447, + "epoch": 0.739833942588342, "grad_norm": 0.0, - "learning_rate": 3.3201764444627825e-06, - "loss": 0.8144, + "learning_rate": 3.344921624216744e-06, + "loss": 0.8965, "step": 26108 }, { - "epoch": 0.7408910329171396, + "epoch": 0.7398622800306044, "grad_norm": 0.0, - "learning_rate": 3.319492515814282e-06, - "loss": 0.7339, + "learning_rate": 3.344236622726743e-06, + "loss": 0.8485, "step": 26109 }, { - "epoch": 0.7409194097616345, + "epoch": 0.7398906174728669, "grad_norm": 0.0, - "learning_rate": 3.318808643596332e-06, - "loss": 0.8826, + "learning_rate": 3.3435516773007047e-06, + "loss": 0.9396, "step": 26110 }, { - "epoch": 0.7409477866061294, + "epoch": 0.7399189549151294, "grad_norm": 0.0, - "learning_rate": 3.3181248278147017e-06, - "loss": 0.8309, + "learning_rate": 3.3428667879444067e-06, + "loss": 0.8774, "step": 26111 }, { - "epoch": 0.7409761634506243, + "epoch": 0.7399472923573919, "grad_norm": 0.0, - "learning_rate": 3.317441068475171e-06, - "loss": 0.8223, + "learning_rate": 3.3421819546636104e-06, + "loss": 0.9077, "step": 26112 }, { - "epoch": 0.7410045402951192, + "epoch": 0.7399756297996543, "grad_norm": 0.0, - "learning_rate": 3.3167573655835206e-06, - "loss": 0.8367, + "learning_rate": 3.341497177464087e-06, + "loss": 0.7416, "step": 26113 }, { - "epoch": 0.7410329171396141, + "epoch": 0.7400039672419167, "grad_norm": 0.0, - "learning_rate": 3.3160737191455173e-06, - "loss": 0.8194, + "learning_rate": 3.340812456351605e-06, + "loss": 0.7569, "step": 26114 }, { - "epoch": 0.7410612939841089, + "epoch": 0.7400323046841792, "grad_norm": 0.0, - "learning_rate": 3.315390129166943e-06, - "loss": 0.8732, + "learning_rate": 3.340127791331934e-06, + "loss": 0.8505, "step": 26115 }, { - "epoch": 0.7410896708286039, + "epoch": 0.7400606421264416, "grad_norm": 0.0, - "learning_rate": 3.314706595653566e-06, - "loss": 0.8404, + "learning_rate": 3.3394431824108397e-06, + "loss": 0.8188, "step": 26116 }, { - "epoch": 0.7411180476730987, + "epoch": 0.7400889795687041, "grad_norm": 0.0, - "learning_rate": 3.314023118611163e-06, - "loss": 0.7852, + "learning_rate": 3.338758629594091e-06, + "loss": 0.7709, "step": 26117 }, { - "epoch": 0.7411464245175936, + "epoch": 0.7401173170109666, "grad_norm": 0.0, - "learning_rate": 3.313339698045509e-06, - "loss": 0.8159, + "learning_rate": 3.33807413288745e-06, + "loss": 0.8464, "step": 26118 }, { - "epoch": 0.7411748013620886, + "epoch": 0.740145654453229, "grad_norm": 0.0, - "learning_rate": 3.312656333962373e-06, - "loss": 0.7818, + "learning_rate": 3.3373896922966863e-06, + "loss": 0.8655, "step": 26119 }, { - "epoch": 0.7412031782065834, + "epoch": 0.7401739918954915, "grad_norm": 0.0, - "learning_rate": 3.3119730263675263e-06, - "loss": 0.8152, + "learning_rate": 3.3367053078275614e-06, + "loss": 0.7697, "step": 26120 }, { - "epoch": 0.7412315550510783, + "epoch": 0.740202329337754, "grad_norm": 0.0, - "learning_rate": 3.3112897752667504e-06, - "loss": 0.7634, + "learning_rate": 3.336020979485841e-06, + "loss": 0.8719, "step": 26121 }, { - "epoch": 0.7412599318955733, + "epoch": 0.7402306667800165, "grad_norm": 0.0, - "learning_rate": 3.310606580665807e-06, - "loss": 0.8329, + "learning_rate": 3.3353367072772935e-06, + "loss": 0.7299, "step": 26122 }, { - "epoch": 0.7412883087400681, + "epoch": 0.7402590042222789, "grad_norm": 0.0, - "learning_rate": 3.309923442570472e-06, - "loss": 0.92, + "learning_rate": 3.3346524912076774e-06, + "loss": 0.8083, "step": 26123 }, { - "epoch": 0.741316685584563, + "epoch": 0.7402873416645414, "grad_norm": 0.0, - "learning_rate": 3.3092403609865163e-06, - "loss": 0.8218, + "learning_rate": 3.333968331282759e-06, + "loss": 0.8598, "step": 26124 }, { - "epoch": 0.7413450624290578, + "epoch": 0.7403156791068038, "grad_norm": 0.0, - "learning_rate": 3.3085573359197045e-06, - "loss": 0.9051, + "learning_rate": 3.3332842275083023e-06, + "loss": 0.8491, "step": 26125 }, { - "epoch": 0.7413734392735528, + "epoch": 0.7403440165490662, "grad_norm": 0.0, - "learning_rate": 3.3078743673758107e-06, - "loss": 0.833, + "learning_rate": 3.3326001798900664e-06, + "loss": 0.7717, "step": 26126 }, { - "epoch": 0.7414018161180477, + "epoch": 0.7403723539913287, "grad_norm": 0.0, - "learning_rate": 3.3071914553606055e-06, - "loss": 0.9207, + "learning_rate": 3.3319161884338135e-06, + "loss": 0.8896, "step": 26127 }, { - "epoch": 0.7414301929625425, + "epoch": 0.7404006914335912, "grad_norm": 0.0, - "learning_rate": 3.3065085998798516e-06, - "loss": 0.8045, + "learning_rate": 3.331232253145308e-06, + "loss": 0.7987, "step": 26128 }, { - "epoch": 0.7414585698070375, + "epoch": 0.7404290288758537, "grad_norm": 0.0, - "learning_rate": 3.3058258009393207e-06, - "loss": 0.8103, + "learning_rate": 3.330548374030309e-06, + "loss": 0.7968, "step": 26129 }, { - "epoch": 0.7414869466515324, + "epoch": 0.7404573663181161, "grad_norm": 0.0, - "learning_rate": 3.305143058544784e-06, - "loss": 0.8184, + "learning_rate": 3.3298645510945813e-06, + "loss": 0.7838, "step": 26130 }, { - "epoch": 0.7415153234960272, + "epoch": 0.7404857037603786, "grad_norm": 0.0, - "learning_rate": 3.3044603727020007e-06, - "loss": 0.8659, + "learning_rate": 3.3291807843438784e-06, + "loss": 0.9209, "step": 26131 }, { - "epoch": 0.7415437003405221, + "epoch": 0.7405140412026411, "grad_norm": 0.0, - "learning_rate": 3.3037777434167418e-06, - "loss": 0.8069, + "learning_rate": 3.328497073783966e-06, + "loss": 0.8187, "step": 26132 }, { - "epoch": 0.741572077185017, + "epoch": 0.7405423786449035, "grad_norm": 0.0, - "learning_rate": 3.3030951706947777e-06, - "loss": 0.7696, + "learning_rate": 3.327813419420597e-06, + "loss": 0.8694, "step": 26133 }, { - "epoch": 0.7416004540295119, + "epoch": 0.740570716087166, "grad_norm": 0.0, - "learning_rate": 3.3024126545418655e-06, - "loss": 0.8847, + "learning_rate": 3.3271298212595325e-06, + "loss": 0.8366, "step": 26134 }, { - "epoch": 0.7416288308740068, + "epoch": 0.7405990535294285, "grad_norm": 0.0, - "learning_rate": 3.301730194963775e-06, - "loss": 0.7599, + "learning_rate": 3.3264462793065343e-06, + "loss": 0.8032, "step": 26135 }, { - "epoch": 0.7416572077185017, + "epoch": 0.740627390971691, "grad_norm": 0.0, - "learning_rate": 3.30104779196627e-06, - "loss": 0.7539, + "learning_rate": 3.3257627935673554e-06, + "loss": 0.8243, "step": 26136 }, { - "epoch": 0.7416855845629966, + "epoch": 0.7406557284139533, "grad_norm": 0.0, - "learning_rate": 3.300365445555116e-06, - "loss": 0.84, + "learning_rate": 3.3250793640477543e-06, + "loss": 0.7166, "step": 26137 }, { - "epoch": 0.7417139614074915, + "epoch": 0.7406840658562158, "grad_norm": 0.0, - "learning_rate": 3.2996831557360788e-06, - "loss": 0.8279, + "learning_rate": 3.3243959907534917e-06, + "loss": 0.8132, "step": 26138 }, { - "epoch": 0.7417423382519864, + "epoch": 0.7407124032984783, "grad_norm": 0.0, - "learning_rate": 3.2990009225149167e-06, - "loss": 0.8327, + "learning_rate": 3.3237126736903168e-06, + "loss": 0.8865, "step": 26139 }, { - "epoch": 0.7417707150964813, + "epoch": 0.7407407407407407, "grad_norm": 0.0, - "learning_rate": 3.298318745897394e-06, - "loss": 0.8341, + "learning_rate": 3.3230294128639894e-06, + "loss": 0.8822, "step": 26140 }, { - "epoch": 0.7417990919409762, + "epoch": 0.7407690781830032, "grad_norm": 0.0, - "learning_rate": 3.297636625889279e-06, - "loss": 0.8869, + "learning_rate": 3.322346208280265e-06, + "loss": 0.7725, "step": 26141 }, { - "epoch": 0.741827468785471, + "epoch": 0.7407974156252657, "grad_norm": 0.0, - "learning_rate": 3.296954562496324e-06, - "loss": 0.9338, + "learning_rate": 3.3216630599448985e-06, + "loss": 0.877, "step": 26142 }, { - "epoch": 0.741855845629966, + "epoch": 0.7408257530675281, "grad_norm": 0.0, - "learning_rate": 3.296272555724296e-06, - "loss": 0.7819, + "learning_rate": 3.3209799678636466e-06, + "loss": 0.8433, "step": 26143 }, { - "epoch": 0.7418842224744608, + "epoch": 0.7408540905097906, "grad_norm": 0.0, - "learning_rate": 3.295590605578959e-06, - "loss": 0.7501, + "learning_rate": 3.3202969320422586e-06, + "loss": 0.6403, "step": 26144 }, { - "epoch": 0.7419125993189557, + "epoch": 0.7408824279520531, "grad_norm": 0.0, - "learning_rate": 3.294908712066065e-06, - "loss": 0.8063, + "learning_rate": 3.319613952486488e-06, + "loss": 0.7995, "step": 26145 }, { - "epoch": 0.7419409761634507, + "epoch": 0.7409107653943156, "grad_norm": 0.0, - "learning_rate": 3.2942268751913785e-06, - "loss": 0.8285, + "learning_rate": 3.3189310292020948e-06, + "loss": 0.8067, "step": 26146 }, { - "epoch": 0.7419693530079455, + "epoch": 0.7409391028365779, "grad_norm": 0.0, - "learning_rate": 3.293545094960663e-06, - "loss": 0.8368, + "learning_rate": 3.3182481621948225e-06, + "loss": 0.7986, "step": 26147 }, { - "epoch": 0.7419977298524404, + "epoch": 0.7409674402788404, "grad_norm": 0.0, - "learning_rate": 3.29286337137967e-06, - "loss": 0.7989, + "learning_rate": 3.31756535147043e-06, + "loss": 0.8573, "step": 26148 }, { - "epoch": 0.7420261066969353, + "epoch": 0.7409957777211029, "grad_norm": 0.0, - "learning_rate": 3.2921817044541614e-06, - "loss": 0.8143, + "learning_rate": 3.316882597034663e-06, + "loss": 0.8485, "step": 26149 }, { - "epoch": 0.7420544835414302, + "epoch": 0.7410241151633653, "grad_norm": 0.0, - "learning_rate": 3.2915000941898958e-06, - "loss": 0.7589, + "learning_rate": 3.3161998988932762e-06, + "loss": 0.8698, "step": 26150 }, { - "epoch": 0.7420828603859251, + "epoch": 0.7410524526056278, "grad_norm": 0.0, - "learning_rate": 3.29081854059263e-06, - "loss": 0.8161, + "learning_rate": 3.3155172570520223e-06, + "loss": 0.8332, "step": 26151 }, { - "epoch": 0.7421112372304199, + "epoch": 0.7410807900478903, "grad_norm": 0.0, - "learning_rate": 3.290137043668126e-06, - "loss": 0.7545, + "learning_rate": 3.3148346715166457e-06, + "loss": 0.8667, "step": 26152 }, { - "epoch": 0.7421396140749149, + "epoch": 0.7411091274901528, "grad_norm": 0.0, - "learning_rate": 3.2894556034221316e-06, - "loss": 0.8286, + "learning_rate": 3.314152142292899e-06, + "loss": 0.7596, "step": 26153 }, { - "epoch": 0.7421679909194098, + "epoch": 0.7411374649324152, "grad_norm": 0.0, - "learning_rate": 3.288774219860408e-06, - "loss": 0.9023, + "learning_rate": 3.313469669386532e-06, + "loss": 0.8028, "step": 26154 }, { - "epoch": 0.7421963677639046, + "epoch": 0.7411658023746777, "grad_norm": 0.0, - "learning_rate": 3.2880928929887123e-06, - "loss": 0.8088, + "learning_rate": 3.3127872528032924e-06, + "loss": 0.859, "step": 26155 }, { - "epoch": 0.7422247446083996, + "epoch": 0.7411941398169402, "grad_norm": 0.0, - "learning_rate": 3.287411622812796e-06, - "loss": 0.8735, + "learning_rate": 3.312104892548932e-06, + "loss": 0.7053, "step": 26156 }, { - "epoch": 0.7422531214528945, + "epoch": 0.7412224772592025, "grad_norm": 0.0, - "learning_rate": 3.2867304093384145e-06, - "loss": 0.8817, + "learning_rate": 3.311422588629193e-06, + "loss": 0.8131, "step": 26157 }, { - "epoch": 0.7422814982973893, + "epoch": 0.741250814701465, "grad_norm": 0.0, - "learning_rate": 3.2860492525713274e-06, - "loss": 0.8748, + "learning_rate": 3.310740341049825e-06, + "loss": 0.7378, "step": 26158 }, { - "epoch": 0.7423098751418842, + "epoch": 0.7412791521437275, "grad_norm": 0.0, - "learning_rate": 3.2853681525172796e-06, - "loss": 0.847, + "learning_rate": 3.3100581498165783e-06, + "loss": 0.9601, "step": 26159 }, { - "epoch": 0.7423382519863791, + "epoch": 0.74130748958599, "grad_norm": 0.0, - "learning_rate": 3.284687109182029e-06, - "loss": 0.8197, + "learning_rate": 3.3093760149351926e-06, + "loss": 0.823, "step": 26160 }, { - "epoch": 0.742366628830874, + "epoch": 0.7413358270282524, "grad_norm": 0.0, - "learning_rate": 3.2840061225713316e-06, - "loss": 0.8588, + "learning_rate": 3.308693936411421e-06, + "loss": 0.9286, "step": 26161 }, { - "epoch": 0.7423950056753689, + "epoch": 0.7413641644705149, "grad_norm": 0.0, - "learning_rate": 3.2833251926909335e-06, - "loss": 0.9187, + "learning_rate": 3.3080119142510014e-06, + "loss": 0.9536, "step": 26162 }, { - "epoch": 0.7424233825198638, + "epoch": 0.7413925019127774, "grad_norm": 0.0, - "learning_rate": 3.2826443195465896e-06, - "loss": 0.8553, + "learning_rate": 3.3073299484596834e-06, + "loss": 0.8145, "step": 26163 }, { - "epoch": 0.7424517593643587, + "epoch": 0.7414208393550398, "grad_norm": 0.0, - "learning_rate": 3.281963503144053e-06, - "loss": 0.8456, + "learning_rate": 3.3066480390432085e-06, + "loss": 0.7743, "step": 26164 }, { - "epoch": 0.7424801362088536, + "epoch": 0.7414491767973023, "grad_norm": 0.0, - "learning_rate": 3.2812827434890713e-06, - "loss": 0.8052, + "learning_rate": 3.305966186007328e-06, + "loss": 0.8432, "step": 26165 }, { - "epoch": 0.7425085130533484, + "epoch": 0.7414775142395648, "grad_norm": 0.0, - "learning_rate": 3.280602040587395e-06, - "loss": 0.8178, + "learning_rate": 3.3052843893577757e-06, + "loss": 0.7718, "step": 26166 }, { - "epoch": 0.7425368898978434, + "epoch": 0.7415058516818271, "grad_norm": 0.0, - "learning_rate": 3.279921394444776e-06, - "loss": 0.8366, + "learning_rate": 3.3046026491003004e-06, + "loss": 0.842, "step": 26167 }, { - "epoch": 0.7425652667423382, + "epoch": 0.7415341891240896, "grad_norm": 0.0, - "learning_rate": 3.2792408050669634e-06, - "loss": 0.7316, + "learning_rate": 3.303920965240641e-06, + "loss": 0.8181, "step": 26168 }, { - "epoch": 0.7425936435868331, + "epoch": 0.7415625265663521, "grad_norm": 0.0, - "learning_rate": 3.2785602724597078e-06, - "loss": 0.7886, + "learning_rate": 3.303239337784547e-06, + "loss": 0.7871, "step": 26169 }, { - "epoch": 0.7426220204313281, + "epoch": 0.7415908640086146, "grad_norm": 0.0, - "learning_rate": 3.2778797966287534e-06, - "loss": 0.8406, + "learning_rate": 3.3025577667377507e-06, + "loss": 0.8155, "step": 26170 }, { - "epoch": 0.7426503972758229, + "epoch": 0.741619201450877, "grad_norm": 0.0, - "learning_rate": 3.2771993775798507e-06, - "loss": 0.7409, + "learning_rate": 3.3018762521059976e-06, + "loss": 0.7873, "step": 26171 }, { - "epoch": 0.7426787741203178, + "epoch": 0.7416475388931395, "grad_norm": 0.0, - "learning_rate": 3.27651901531875e-06, - "loss": 0.7926, + "learning_rate": 3.3011947938950317e-06, + "loss": 0.7752, "step": 26172 }, { - "epoch": 0.7427071509648128, + "epoch": 0.741675876335402, "grad_norm": 0.0, - "learning_rate": 3.275838709851191e-06, - "loss": 0.8049, + "learning_rate": 3.300513392110586e-06, + "loss": 0.8488, "step": 26173 }, { - "epoch": 0.7427355278093076, + "epoch": 0.7417042137776644, "grad_norm": 0.0, - "learning_rate": 3.275158461182927e-06, - "loss": 0.7798, + "learning_rate": 3.2998320467584034e-06, + "loss": 0.7407, "step": 26174 }, { - "epoch": 0.7427639046538025, + "epoch": 0.7417325512199269, "grad_norm": 0.0, - "learning_rate": 3.274478269319703e-06, - "loss": 0.7572, + "learning_rate": 3.2991507578442272e-06, + "loss": 0.7151, "step": 26175 }, { - "epoch": 0.7427922814982973, + "epoch": 0.7417608886621894, "grad_norm": 0.0, - "learning_rate": 3.273798134267261e-06, - "loss": 0.8415, + "learning_rate": 3.29846952537379e-06, + "loss": 0.7699, "step": 26176 }, { - "epoch": 0.7428206583427923, + "epoch": 0.7417892261044519, "grad_norm": 0.0, - "learning_rate": 3.273118056031349e-06, - "loss": 0.7889, + "learning_rate": 3.2977883493528307e-06, + "loss": 0.7631, "step": 26177 }, { - "epoch": 0.7428490351872872, + "epoch": 0.7418175635467142, "grad_norm": 0.0, - "learning_rate": 3.2724380346177143e-06, - "loss": 0.8445, + "learning_rate": 3.2971072297870897e-06, + "loss": 0.7561, "step": 26178 }, { - "epoch": 0.742877412031782, + "epoch": 0.7418459009889767, "grad_norm": 0.0, - "learning_rate": 3.271758070032094e-06, - "loss": 0.7976, + "learning_rate": 3.296426166682304e-06, + "loss": 0.8104, "step": 26179 }, { - "epoch": 0.742905788876277, + "epoch": 0.7418742384312392, "grad_norm": 0.0, - "learning_rate": 3.271078162280236e-06, - "loss": 0.7642, + "learning_rate": 3.295745160044214e-06, + "loss": 0.8453, "step": 26180 }, { - "epoch": 0.7429341657207719, + "epoch": 0.7419025758735016, "grad_norm": 0.0, - "learning_rate": 3.2703983113678837e-06, - "loss": 0.9387, + "learning_rate": 3.295064209878547e-06, + "loss": 0.812, "step": 26181 }, { - "epoch": 0.7429625425652667, + "epoch": 0.7419309133157641, "grad_norm": 0.0, - "learning_rate": 3.2697185173007784e-06, - "loss": 0.8578, + "learning_rate": 3.294383316191049e-06, + "loss": 0.9092, "step": 26182 }, { - "epoch": 0.7429909194097616, + "epoch": 0.7419592507580266, "grad_norm": 0.0, - "learning_rate": 3.2690387800846677e-06, - "loss": 0.8102, + "learning_rate": 3.2937024789874462e-06, + "loss": 0.9791, "step": 26183 }, { - "epoch": 0.7430192962542566, + "epoch": 0.7419875882002891, "grad_norm": 0.0, - "learning_rate": 3.2683590997252845e-06, - "loss": 0.8773, + "learning_rate": 3.2930216982734775e-06, + "loss": 0.7465, "step": 26184 }, { - "epoch": 0.7430476730987514, + "epoch": 0.7420159256425515, "grad_norm": 0.0, - "learning_rate": 3.267679476228376e-06, - "loss": 0.7878, + "learning_rate": 3.2923409740548805e-06, + "loss": 0.8589, "step": 26185 }, { - "epoch": 0.7430760499432463, + "epoch": 0.742044263084814, "grad_norm": 0.0, - "learning_rate": 3.266999909599684e-06, - "loss": 0.8699, + "learning_rate": 3.291660306337384e-06, + "loss": 0.7761, "step": 26186 }, { - "epoch": 0.7431044267877412, + "epoch": 0.7420726005270765, "grad_norm": 0.0, - "learning_rate": 3.2663203998449433e-06, - "loss": 0.8505, + "learning_rate": 3.290979695126724e-06, + "loss": 0.8689, "step": 26187 }, { - "epoch": 0.7431328036322361, + "epoch": 0.7421009379693388, "grad_norm": 0.0, - "learning_rate": 3.265640946969897e-06, - "loss": 0.8742, + "learning_rate": 3.2902991404286354e-06, + "loss": 0.8085, "step": 26188 }, { - "epoch": 0.743161180476731, + "epoch": 0.7421292754116013, "grad_norm": 0.0, - "learning_rate": 3.264961550980288e-06, - "loss": 0.8658, + "learning_rate": 3.2896186422488463e-06, + "loss": 0.7607, "step": 26189 }, { - "epoch": 0.7431895573212258, + "epoch": 0.7421576128538638, "grad_norm": 0.0, - "learning_rate": 3.2642822118818475e-06, - "loss": 0.8311, + "learning_rate": 3.2889382005930912e-06, + "loss": 0.7593, "step": 26190 }, { - "epoch": 0.7432179341657208, + "epoch": 0.7421859502961262, "grad_norm": 0.0, - "learning_rate": 3.2636029296803186e-06, - "loss": 0.8854, + "learning_rate": 3.2882578154671017e-06, + "loss": 0.772, "step": 26191 }, { - "epoch": 0.7432463110102157, + "epoch": 0.7422142877383887, "grad_norm": 0.0, - "learning_rate": 3.262923704381441e-06, - "loss": 0.7743, + "learning_rate": 3.2875774868766087e-06, + "loss": 0.9029, "step": 26192 }, { - "epoch": 0.7432746878547105, + "epoch": 0.7422426251806512, "grad_norm": 0.0, - "learning_rate": 3.262244535990947e-06, - "loss": 0.8957, + "learning_rate": 3.286897214827347e-06, + "loss": 0.7836, "step": 26193 }, { - "epoch": 0.7433030646992055, + "epoch": 0.7422709626229137, "grad_norm": 0.0, - "learning_rate": 3.2615654245145765e-06, - "loss": 0.786, + "learning_rate": 3.286216999325039e-06, + "loss": 0.9167, "step": 26194 }, { - "epoch": 0.7433314415437003, + "epoch": 0.7422993000651761, "grad_norm": 0.0, - "learning_rate": 3.2608863699580684e-06, - "loss": 0.9054, + "learning_rate": 3.2855368403754185e-06, + "loss": 0.7318, "step": 26195 }, { - "epoch": 0.7433598183881952, + "epoch": 0.7423276375074386, "grad_norm": 0.0, - "learning_rate": 3.2602073723271497e-06, - "loss": 0.8652, + "learning_rate": 3.2848567379842177e-06, + "loss": 0.7206, "step": 26196 }, { - "epoch": 0.7433881952326902, + "epoch": 0.7423559749497011, "grad_norm": 0.0, - "learning_rate": 3.25952843162757e-06, - "loss": 0.9111, + "learning_rate": 3.2841766921571593e-06, + "loss": 0.7099, "step": 26197 }, { - "epoch": 0.743416572077185, + "epoch": 0.7423843123919635, "grad_norm": 0.0, - "learning_rate": 3.2588495478650518e-06, - "loss": 0.781, + "learning_rate": 3.2834967028999777e-06, + "loss": 0.7578, "step": 26198 }, { - "epoch": 0.7434449489216799, + "epoch": 0.742412649834226, "grad_norm": 0.0, - "learning_rate": 3.258170721045335e-06, - "loss": 0.8086, + "learning_rate": 3.2828167702183945e-06, + "loss": 0.9524, "step": 26199 }, { - "epoch": 0.7434733257661748, + "epoch": 0.7424409872764884, "grad_norm": 0.0, - "learning_rate": 3.2574919511741575e-06, - "loss": 0.8557, + "learning_rate": 3.2821368941181396e-06, + "loss": 0.8287, "step": 26200 }, { - "epoch": 0.7435017026106697, + "epoch": 0.7424693247187509, "grad_norm": 0.0, - "learning_rate": 3.2568132382572437e-06, - "loss": 0.8481, + "learning_rate": 3.2814570746049435e-06, + "loss": 0.827, "step": 26201 }, { - "epoch": 0.7435300794551646, + "epoch": 0.7424976621610133, "grad_norm": 0.0, - "learning_rate": 3.2561345823003332e-06, - "loss": 0.8501, + "learning_rate": 3.2807773116845267e-06, + "loss": 0.8531, "step": 26202 }, { - "epoch": 0.7435584562996594, + "epoch": 0.7425259996032758, "grad_norm": 0.0, - "learning_rate": 3.255455983309159e-06, - "loss": 0.9121, + "learning_rate": 3.2800976053626168e-06, + "loss": 0.7614, "step": 26203 }, { - "epoch": 0.7435868331441544, + "epoch": 0.7425543370455383, "grad_norm": 0.0, - "learning_rate": 3.2547774412894485e-06, - "loss": 0.8318, + "learning_rate": 3.27941795564494e-06, + "loss": 0.9561, "step": 26204 }, { - "epoch": 0.7436152099886493, + "epoch": 0.7425826744878007, "grad_norm": 0.0, - "learning_rate": 3.254098956246936e-06, - "loss": 0.8462, + "learning_rate": 3.2787383625372214e-06, + "loss": 0.8697, "step": 26205 }, { - "epoch": 0.7436435868331441, + "epoch": 0.7426110119300632, "grad_norm": 0.0, - "learning_rate": 3.2534205281873565e-06, - "loss": 0.8324, + "learning_rate": 3.2780588260451896e-06, + "loss": 0.8267, "step": 26206 }, { - "epoch": 0.743671963677639, + "epoch": 0.7426393493723257, "grad_norm": 0.0, - "learning_rate": 3.252742157116434e-06, - "loss": 0.8799, + "learning_rate": 3.2773793461745608e-06, + "loss": 0.8924, "step": 26207 }, { - "epoch": 0.743700340522134, + "epoch": 0.7426676868145882, "grad_norm": 0.0, - "learning_rate": 3.252063843039901e-06, - "loss": 0.9026, + "learning_rate": 3.2766999229310627e-06, + "loss": 0.8301, "step": 26208 }, { - "epoch": 0.7437287173666288, + "epoch": 0.7426960242568506, "grad_norm": 0.0, - "learning_rate": 3.2513855859634924e-06, - "loss": 0.8844, + "learning_rate": 3.2760205563204195e-06, + "loss": 0.8431, "step": 26209 }, { - "epoch": 0.7437570942111237, + "epoch": 0.742724361699113, "grad_norm": 0.0, - "learning_rate": 3.2507073858929282e-06, - "loss": 0.8097, + "learning_rate": 3.2753412463483505e-06, + "loss": 0.7377, "step": 26210 }, { - "epoch": 0.7437854710556187, + "epoch": 0.7427526991413755, "grad_norm": 0.0, - "learning_rate": 3.250029242833943e-06, - "loss": 0.8356, + "learning_rate": 3.2746619930205815e-06, + "loss": 0.8406, "step": 26211 }, { - "epoch": 0.7438138479001135, + "epoch": 0.7427810365836379, "grad_norm": 0.0, - "learning_rate": 3.2493511567922655e-06, - "loss": 0.8601, + "learning_rate": 3.2739827963428296e-06, + "loss": 0.9456, "step": 26212 }, { - "epoch": 0.7438422247446084, + "epoch": 0.7428093740259004, "grad_norm": 0.0, - "learning_rate": 3.2486731277736205e-06, - "loss": 0.8716, + "learning_rate": 3.2733036563208165e-06, + "loss": 0.7807, "step": 26213 }, { - "epoch": 0.7438706015891033, + "epoch": 0.7428377114681629, "grad_norm": 0.0, - "learning_rate": 3.2479951557837375e-06, - "loss": 0.8115, + "learning_rate": 3.272624572960269e-06, + "loss": 0.8459, "step": 26214 }, { - "epoch": 0.7438989784335982, + "epoch": 0.7428660489104253, "grad_norm": 0.0, - "learning_rate": 3.247317240828347e-06, - "loss": 0.8974, + "learning_rate": 3.2719455462669003e-06, + "loss": 0.7729, "step": 26215 }, { - "epoch": 0.7439273552780931, + "epoch": 0.7428943863526878, "grad_norm": 0.0, - "learning_rate": 3.246639382913167e-06, - "loss": 0.8384, + "learning_rate": 3.2712665762464313e-06, + "loss": 0.7842, "step": 26216 }, { - "epoch": 0.7439557321225879, + "epoch": 0.7429227237949503, "grad_norm": 0.0, - "learning_rate": 3.2459615820439285e-06, - "loss": 0.8536, + "learning_rate": 3.270587662904584e-06, + "loss": 0.8059, "step": 26217 }, { - "epoch": 0.7439841089670829, + "epoch": 0.7429510612372128, "grad_norm": 0.0, - "learning_rate": 3.2452838382263587e-06, - "loss": 0.7542, + "learning_rate": 3.269908806247074e-06, + "loss": 0.8456, "step": 26218 }, { - "epoch": 0.7440124858115778, + "epoch": 0.7429793986794752, "grad_norm": 0.0, - "learning_rate": 3.2446061514661775e-06, - "loss": 0.8408, + "learning_rate": 3.2692300062796257e-06, + "loss": 0.7174, "step": 26219 }, { - "epoch": 0.7440408626560726, + "epoch": 0.7430077361217376, "grad_norm": 0.0, - "learning_rate": 3.2439285217691106e-06, - "loss": 0.9437, + "learning_rate": 3.2685512630079497e-06, + "loss": 0.7975, "step": 26220 }, { - "epoch": 0.7440692395005676, + "epoch": 0.7430360735640001, "grad_norm": 0.0, - "learning_rate": 3.243250949140887e-06, - "loss": 0.8338, + "learning_rate": 3.267872576437765e-06, + "loss": 0.6948, "step": 26221 }, { - "epoch": 0.7440976163450624, + "epoch": 0.7430644110062625, "grad_norm": 0.0, - "learning_rate": 3.242573433587224e-06, - "loss": 0.8515, + "learning_rate": 3.2671939465747937e-06, + "loss": 0.8484, "step": 26222 }, { - "epoch": 0.7441259931895573, + "epoch": 0.743092748448525, "grad_norm": 0.0, - "learning_rate": 3.241895975113849e-06, - "loss": 0.8353, + "learning_rate": 3.2665153734247436e-06, + "loss": 0.8096, "step": 26223 }, { - "epoch": 0.7441543700340522, + "epoch": 0.7431210858907875, "grad_norm": 0.0, - "learning_rate": 3.24121857372648e-06, - "loss": 0.8127, + "learning_rate": 3.265836856993335e-06, + "loss": 0.8711, "step": 26224 }, { - "epoch": 0.7441827468785471, + "epoch": 0.74314942333305, "grad_norm": 0.0, - "learning_rate": 3.2405412294308413e-06, - "loss": 0.8483, + "learning_rate": 3.2651583972862877e-06, + "loss": 0.8127, "step": 26225 }, { - "epoch": 0.744211123723042, + "epoch": 0.7431777607753124, "grad_norm": 0.0, - "learning_rate": 3.2398639422326583e-06, - "loss": 0.8035, + "learning_rate": 3.2644799943093075e-06, + "loss": 0.8664, "step": 26226 }, { - "epoch": 0.7442395005675368, + "epoch": 0.7432060982175749, "grad_norm": 0.0, - "learning_rate": 3.239186712137642e-06, - "loss": 0.8003, + "learning_rate": 3.263801648068118e-06, + "loss": 0.8847, "step": 26227 }, { - "epoch": 0.7442678774120318, + "epoch": 0.7432344356598374, "grad_norm": 0.0, - "learning_rate": 3.238509539151522e-06, - "loss": 0.7952, + "learning_rate": 3.2631233585684243e-06, + "loss": 0.839, "step": 26228 }, { - "epoch": 0.7442962542565267, + "epoch": 0.7432627731020998, "grad_norm": 0.0, - "learning_rate": 3.2378324232800195e-06, - "loss": 0.8846, + "learning_rate": 3.262445125815945e-06, + "loss": 0.8351, "step": 26229 }, { - "epoch": 0.7443246311010215, + "epoch": 0.7432911105443623, "grad_norm": 0.0, - "learning_rate": 3.2371553645288467e-06, - "loss": 0.8056, + "learning_rate": 3.2617669498163917e-06, + "loss": 0.8573, "step": 26230 }, { - "epoch": 0.7443530079455165, + "epoch": 0.7433194479866247, "grad_norm": 0.0, - "learning_rate": 3.2364783629037277e-06, - "loss": 0.8139, + "learning_rate": 3.2610888305754783e-06, + "loss": 0.7338, "step": 26231 }, { - "epoch": 0.7443813847900114, + "epoch": 0.7433477854288872, "grad_norm": 0.0, - "learning_rate": 3.2358014184103827e-06, - "loss": 0.8237, + "learning_rate": 3.260410768098916e-06, + "loss": 0.8511, "step": 26232 }, { - "epoch": 0.7444097616345062, + "epoch": 0.7433761228711496, "grad_norm": 0.0, - "learning_rate": 3.235124531054523e-06, - "loss": 0.8612, + "learning_rate": 3.2597327623924193e-06, + "loss": 0.8009, "step": 26233 }, { - "epoch": 0.7444381384790011, + "epoch": 0.7434044603134121, "grad_norm": 0.0, - "learning_rate": 3.2344477008418717e-06, - "loss": 0.8165, + "learning_rate": 3.259054813461693e-06, + "loss": 0.791, "step": 26234 }, { - "epoch": 0.7444665153234961, + "epoch": 0.7434327977556746, "grad_norm": 0.0, - "learning_rate": 3.2337709277781484e-06, - "loss": 0.7128, + "learning_rate": 3.258376921312455e-06, + "loss": 0.8208, "step": 26235 }, { - "epoch": 0.7444948921679909, + "epoch": 0.743461135197937, "grad_norm": 0.0, - "learning_rate": 3.2330942118690625e-06, - "loss": 0.7692, + "learning_rate": 3.2576990859504075e-06, + "loss": 0.8232, "step": 26236 }, { - "epoch": 0.7445232690124858, + "epoch": 0.7434894726401995, "grad_norm": 0.0, - "learning_rate": 3.2324175531203327e-06, - "loss": 0.7645, + "learning_rate": 3.257021307381265e-06, + "loss": 0.8598, "step": 26237 }, { - "epoch": 0.7445516458569807, + "epoch": 0.743517810082462, "grad_norm": 0.0, - "learning_rate": 3.2317409515376808e-06, - "loss": 0.8363, + "learning_rate": 3.2563435856107396e-06, + "loss": 0.8388, "step": 26238 }, { - "epoch": 0.7445800227014756, + "epoch": 0.7435461475247244, "grad_norm": 0.0, - "learning_rate": 3.231064407126814e-06, - "loss": 0.8606, + "learning_rate": 3.255665920644533e-06, + "loss": 0.8271, "step": 26239 }, { - "epoch": 0.7446083995459705, + "epoch": 0.7435744849669869, "grad_norm": 0.0, - "learning_rate": 3.2303879198934494e-06, - "loss": 0.7972, + "learning_rate": 3.2549883124883595e-06, + "loss": 0.8759, "step": 26240 }, { - "epoch": 0.7446367763904653, + "epoch": 0.7436028224092494, "grad_norm": 0.0, - "learning_rate": 3.2297114898433067e-06, - "loss": 0.7998, + "learning_rate": 3.254310761147922e-06, + "loss": 0.8632, "step": 26241 }, { - "epoch": 0.7446651532349603, + "epoch": 0.7436311598515118, "grad_norm": 0.0, - "learning_rate": 3.2290351169820888e-06, - "loss": 0.7687, + "learning_rate": 3.253633266628928e-06, + "loss": 0.7767, "step": 26242 }, { - "epoch": 0.7446935300794552, + "epoch": 0.7436594972937742, "grad_norm": 0.0, - "learning_rate": 3.228358801315522e-06, - "loss": 0.9002, + "learning_rate": 3.2529558289370877e-06, + "loss": 0.8418, "step": 26243 }, { - "epoch": 0.74472190692395, + "epoch": 0.7436878347360367, "grad_norm": 0.0, - "learning_rate": 3.2276825428493085e-06, - "loss": 0.9277, + "learning_rate": 3.2522784480781057e-06, + "loss": 0.7964, "step": 26244 }, { - "epoch": 0.744750283768445, + "epoch": 0.7437161721782992, "grad_norm": 0.0, - "learning_rate": 3.2270063415891663e-06, - "loss": 0.731, + "learning_rate": 3.2516011240576874e-06, + "loss": 0.8875, "step": 26245 }, { - "epoch": 0.7447786606129398, + "epoch": 0.7437445096205616, "grad_norm": 0.0, - "learning_rate": 3.226330197540809e-06, - "loss": 0.6864, + "learning_rate": 3.2509238568815426e-06, + "loss": 0.846, "step": 26246 }, { - "epoch": 0.7448070374574347, + "epoch": 0.7437728470628241, "grad_norm": 0.0, - "learning_rate": 3.2256541107099416e-06, - "loss": 0.746, + "learning_rate": 3.2502466465553697e-06, + "loss": 0.8103, "step": 26247 }, { - "epoch": 0.7448354143019297, + "epoch": 0.7438011845050866, "grad_norm": 0.0, - "learning_rate": 3.224978081102279e-06, - "loss": 0.8068, + "learning_rate": 3.2495694930848777e-06, + "loss": 0.9223, "step": 26248 }, { - "epoch": 0.7448637911464245, + "epoch": 0.7438295219473491, "grad_norm": 0.0, - "learning_rate": 3.2243021087235336e-06, - "loss": 0.7185, + "learning_rate": 3.2488923964757656e-06, + "loss": 0.9297, "step": 26249 }, { - "epoch": 0.7448921679909194, + "epoch": 0.7438578593896115, "grad_norm": 0.0, - "learning_rate": 3.22362619357941e-06, - "loss": 0.7812, + "learning_rate": 3.2482153567337405e-06, + "loss": 0.8323, "step": 26250 }, { - "epoch": 0.7449205448354143, + "epoch": 0.743886196831874, "grad_norm": 0.0, - "learning_rate": 3.2229503356756196e-06, - "loss": 0.7336, + "learning_rate": 3.2475383738645072e-06, + "loss": 0.8323, "step": 26251 }, { - "epoch": 0.7449489216799092, + "epoch": 0.7439145342741365, "grad_norm": 0.0, - "learning_rate": 3.2222745350178776e-06, - "loss": 0.7434, + "learning_rate": 3.2468614478737626e-06, + "loss": 0.8413, "step": 26252 }, { - "epoch": 0.7449772985244041, + "epoch": 0.7439428717163988, "grad_norm": 0.0, - "learning_rate": 3.221598791611882e-06, - "loss": 0.7769, + "learning_rate": 3.246184578767212e-06, + "loss": 0.7935, "step": 26253 }, { - "epoch": 0.745005675368899, + "epoch": 0.7439712091586613, "grad_norm": 0.0, - "learning_rate": 3.220923105463347e-06, - "loss": 0.9579, + "learning_rate": 3.2455077665505597e-06, + "loss": 0.8752, "step": 26254 }, { - "epoch": 0.7450340522133939, + "epoch": 0.7439995466009238, "grad_norm": 0.0, - "learning_rate": 3.220247476577982e-06, - "loss": 0.7777, + "learning_rate": 3.2448310112295012e-06, + "loss": 0.7594, "step": 26255 }, { - "epoch": 0.7450624290578888, + "epoch": 0.7440278840431863, "grad_norm": 0.0, - "learning_rate": 3.2195719049614893e-06, - "loss": 0.7685, + "learning_rate": 3.2441543128097386e-06, + "loss": 0.7526, "step": 26256 }, { - "epoch": 0.7450908059023836, + "epoch": 0.7440562214854487, "grad_norm": 0.0, - "learning_rate": 3.218896390619577e-06, - "loss": 0.8353, + "learning_rate": 3.243477671296973e-06, + "loss": 0.8232, "step": 26257 }, { - "epoch": 0.7451191827468785, + "epoch": 0.7440845589277112, "grad_norm": 0.0, - "learning_rate": 3.2182209335579516e-06, - "loss": 0.8424, + "learning_rate": 3.242801086696904e-06, + "loss": 0.7826, "step": 26258 }, { - "epoch": 0.7451475595913735, + "epoch": 0.7441128963699737, "grad_norm": 0.0, - "learning_rate": 3.217545533782319e-06, - "loss": 0.7576, + "learning_rate": 3.242124559015234e-06, + "loss": 0.83, "step": 26259 }, { - "epoch": 0.7451759364358683, + "epoch": 0.7441412338122361, "grad_norm": 0.0, - "learning_rate": 3.216870191298387e-06, - "loss": 0.8036, + "learning_rate": 3.241448088257655e-06, + "loss": 0.8057, "step": 26260 }, { - "epoch": 0.7452043132803632, + "epoch": 0.7441695712544986, "grad_norm": 0.0, - "learning_rate": 3.2161949061118547e-06, - "loss": 0.8326, + "learning_rate": 3.240771674429869e-06, + "loss": 0.7887, "step": 26261 }, { - "epoch": 0.7452326901248582, + "epoch": 0.7441979086967611, "grad_norm": 0.0, - "learning_rate": 3.2155196782284282e-06, - "loss": 0.8957, + "learning_rate": 3.240095317537576e-06, + "loss": 0.77, "step": 26262 }, { - "epoch": 0.745261066969353, + "epoch": 0.7442262461390234, "grad_norm": 0.0, - "learning_rate": 3.214844507653816e-06, - "loss": 0.8967, + "learning_rate": 3.239419017586467e-06, + "loss": 0.7677, "step": 26263 }, { - "epoch": 0.7452894438138479, + "epoch": 0.7442545835812859, "grad_norm": 0.0, - "learning_rate": 3.2141693943937134e-06, - "loss": 0.8509, + "learning_rate": 3.2387427745822453e-06, + "loss": 0.7943, "step": 26264 }, { - "epoch": 0.7453178206583428, + "epoch": 0.7442829210235484, "grad_norm": 0.0, - "learning_rate": 3.2134943384538274e-06, - "loss": 0.7884, + "learning_rate": 3.2380665885306017e-06, + "loss": 0.8052, "step": 26265 }, { - "epoch": 0.7453461975028377, + "epoch": 0.7443112584658109, "grad_norm": 0.0, - "learning_rate": 3.212819339839863e-06, - "loss": 0.7819, + "learning_rate": 3.237390459437233e-06, + "loss": 0.7875, "step": 26266 }, { - "epoch": 0.7453745743473326, + "epoch": 0.7443395959080733, "grad_norm": 0.0, - "learning_rate": 3.212144398557515e-06, - "loss": 0.7802, + "learning_rate": 3.236714387307839e-06, + "loss": 0.7577, "step": 26267 }, { - "epoch": 0.7454029511918274, + "epoch": 0.7443679333503358, "grad_norm": 0.0, - "learning_rate": 3.2114695146124885e-06, - "loss": 0.7511, + "learning_rate": 3.2360383721481082e-06, + "loss": 0.9586, "step": 26268 }, { - "epoch": 0.7454313280363224, + "epoch": 0.7443962707925983, "grad_norm": 0.0, - "learning_rate": 3.210794688010488e-06, - "loss": 0.8334, + "learning_rate": 3.2353624139637383e-06, + "loss": 0.9119, "step": 26269 }, { - "epoch": 0.7454597048808173, + "epoch": 0.7444246082348607, "grad_norm": 0.0, - "learning_rate": 3.2101199187572064e-06, - "loss": 0.8182, + "learning_rate": 3.234686512760422e-06, + "loss": 0.7638, "step": 26270 }, { - "epoch": 0.7454880817253121, + "epoch": 0.7444529456771232, "grad_norm": 0.0, - "learning_rate": 3.209445206858347e-06, - "loss": 0.8534, + "learning_rate": 3.2340106685438545e-06, + "loss": 0.7975, "step": 26271 }, { - "epoch": 0.7455164585698071, + "epoch": 0.7444812831193857, "grad_norm": 0.0, - "learning_rate": 3.2087705523196135e-06, - "loss": 0.7766, + "learning_rate": 3.2333348813197306e-06, + "loss": 0.8454, "step": 26272 }, { - "epoch": 0.7455448354143019, + "epoch": 0.7445096205616482, "grad_norm": 0.0, - "learning_rate": 3.2080959551466926e-06, - "loss": 0.8035, + "learning_rate": 3.2326591510937353e-06, + "loss": 0.7642, "step": 26273 }, { - "epoch": 0.7455732122587968, + "epoch": 0.7445379580039105, "grad_norm": 0.0, - "learning_rate": 3.2074214153452975e-06, - "loss": 0.766, + "learning_rate": 3.2319834778715662e-06, + "loss": 0.7983, "step": 26274 }, { - "epoch": 0.7456015891032917, + "epoch": 0.744566295446173, "grad_norm": 0.0, - "learning_rate": 3.2067469329211154e-06, - "loss": 0.9624, + "learning_rate": 3.2313078616589166e-06, + "loss": 0.8977, "step": 26275 }, { - "epoch": 0.7456299659477866, + "epoch": 0.7445946328884355, "grad_norm": 0.0, - "learning_rate": 3.206072507879847e-06, - "loss": 0.7561, + "learning_rate": 3.2306323024614717e-06, + "loss": 0.6588, "step": 26276 }, { - "epoch": 0.7456583427922815, + "epoch": 0.7446229703306979, "grad_norm": 0.0, - "learning_rate": 3.205398140227194e-06, - "loss": 0.8033, + "learning_rate": 3.229956800284927e-06, + "loss": 0.7842, "step": 26277 }, { - "epoch": 0.7456867196367764, + "epoch": 0.7446513077729604, "grad_norm": 0.0, - "learning_rate": 3.2047238299688443e-06, - "loss": 0.7093, + "learning_rate": 3.2292813551349666e-06, + "loss": 0.7995, "step": 26278 }, { - "epoch": 0.7457150964812713, + "epoch": 0.7446796452152229, "grad_norm": 0.0, - "learning_rate": 3.2040495771104983e-06, - "loss": 0.8244, + "learning_rate": 3.228605967017284e-06, + "loss": 0.8452, "step": 26279 }, { - "epoch": 0.7457434733257662, + "epoch": 0.7447079826574854, "grad_norm": 0.0, - "learning_rate": 3.2033753816578537e-06, - "loss": 0.7847, + "learning_rate": 3.2279306359375718e-06, + "loss": 0.7321, "step": 26280 }, { - "epoch": 0.745771850170261, + "epoch": 0.7447363200997478, "grad_norm": 0.0, - "learning_rate": 3.2027012436166004e-06, - "loss": 0.7898, + "learning_rate": 3.22725536190151e-06, + "loss": 0.9177, "step": 26281 }, { - "epoch": 0.745800227014756, + "epoch": 0.7447646575420103, "grad_norm": 0.0, - "learning_rate": 3.2020271629924347e-06, - "loss": 0.8431, + "learning_rate": 3.2265801449147927e-06, + "loss": 0.8528, "step": 26282 }, { - "epoch": 0.7458286038592509, + "epoch": 0.7447929949842728, "grad_norm": 0.0, - "learning_rate": 3.2013531397910546e-06, - "loss": 0.7986, + "learning_rate": 3.2259049849831047e-06, + "loss": 0.8498, "step": 26283 }, { - "epoch": 0.7458569807037457, + "epoch": 0.7448213324265351, "grad_norm": 0.0, - "learning_rate": 3.2006791740181466e-06, - "loss": 0.8125, + "learning_rate": 3.225229882112135e-06, + "loss": 0.8267, "step": 26284 }, { - "epoch": 0.7458853575482406, + "epoch": 0.7448496698687976, "grad_norm": 0.0, - "learning_rate": 3.2000052656794066e-06, - "loss": 0.8374, + "learning_rate": 3.224554836307573e-06, + "loss": 0.8639, "step": 26285 }, { - "epoch": 0.7459137343927356, + "epoch": 0.7448780073110601, "grad_norm": 0.0, - "learning_rate": 3.1993314147805322e-06, - "loss": 0.8412, + "learning_rate": 3.223879847575099e-06, + "loss": 0.8673, "step": 26286 }, { - "epoch": 0.7459421112372304, + "epoch": 0.7449063447533225, "grad_norm": 0.0, - "learning_rate": 3.198657621327208e-06, - "loss": 0.8715, + "learning_rate": 3.2232049159204005e-06, + "loss": 0.7779, "step": 26287 }, { - "epoch": 0.7459704880817253, + "epoch": 0.744934682195585, "grad_norm": 0.0, - "learning_rate": 3.1979838853251277e-06, - "loss": 0.7561, + "learning_rate": 3.222530041349168e-06, + "loss": 0.7322, "step": 26288 }, { - "epoch": 0.7459988649262203, + "epoch": 0.7449630196378475, "grad_norm": 0.0, - "learning_rate": 3.1973102067799833e-06, - "loss": 0.8327, + "learning_rate": 3.221855223867076e-06, + "loss": 0.831, "step": 26289 }, { - "epoch": 0.7460272417707151, + "epoch": 0.74499135708011, "grad_norm": 0.0, - "learning_rate": 3.1966365856974656e-06, - "loss": 0.8879, + "learning_rate": 3.221180463479817e-06, + "loss": 0.8156, "step": 26290 }, { - "epoch": 0.74605561861521, + "epoch": 0.7450196945223724, "grad_norm": 0.0, - "learning_rate": 3.1959630220832683e-06, - "loss": 0.8772, + "learning_rate": 3.2205057601930744e-06, + "loss": 0.7772, "step": 26291 }, { - "epoch": 0.7460839954597048, + "epoch": 0.7450480319646349, "grad_norm": 0.0, - "learning_rate": 3.195289515943073e-06, - "loss": 0.8665, + "learning_rate": 3.219831114012526e-06, + "loss": 0.7385, "step": 26292 }, { - "epoch": 0.7461123723041998, + "epoch": 0.7450763694068974, "grad_norm": 0.0, - "learning_rate": 3.1946160672825742e-06, - "loss": 0.8373, + "learning_rate": 3.219156524943862e-06, + "loss": 0.8115, "step": 26293 }, { - "epoch": 0.7461407491486947, + "epoch": 0.7451047068491597, "grad_norm": 0.0, - "learning_rate": 3.193942676107462e-06, - "loss": 0.8923, + "learning_rate": 3.2184819929927557e-06, + "loss": 0.8425, "step": 26294 }, { - "epoch": 0.7461691259931895, + "epoch": 0.7451330442914222, "grad_norm": 0.0, - "learning_rate": 3.1932693424234186e-06, - "loss": 0.7994, + "learning_rate": 3.2178075181648947e-06, + "loss": 0.8279, "step": 26295 }, { - "epoch": 0.7461975028376845, + "epoch": 0.7451613817336847, "grad_norm": 0.0, - "learning_rate": 3.1925960662361355e-06, - "loss": 0.7851, + "learning_rate": 3.21713310046596e-06, + "loss": 0.8014, "step": 26296 }, { - "epoch": 0.7462258796821793, + "epoch": 0.7451897191759472, "grad_norm": 0.0, - "learning_rate": 3.191922847551302e-06, - "loss": 0.8637, + "learning_rate": 3.216458739901631e-06, + "loss": 0.714, "step": 26297 }, { - "epoch": 0.7462542565266742, + "epoch": 0.7452180566182096, "grad_norm": 0.0, - "learning_rate": 3.1912496863746e-06, - "loss": 0.8747, + "learning_rate": 3.2157844364775924e-06, + "loss": 0.8363, "step": 26298 }, { - "epoch": 0.7462826333711692, + "epoch": 0.7452463940604721, "grad_norm": 0.0, - "learning_rate": 3.1905765827117173e-06, - "loss": 0.8222, + "learning_rate": 3.2151101901995184e-06, + "loss": 0.8549, "step": 26299 }, { - "epoch": 0.746311010215664, + "epoch": 0.7452747315027346, "grad_norm": 0.0, - "learning_rate": 3.189903536568343e-06, - "loss": 0.8073, + "learning_rate": 3.21443600107309e-06, + "loss": 0.8853, "step": 26300 }, { - "epoch": 0.7463393870601589, + "epoch": 0.745303068944997, "grad_norm": 0.0, - "learning_rate": 3.1892305479501574e-06, - "loss": 0.8938, + "learning_rate": 3.2137618691039908e-06, + "loss": 0.857, "step": 26301 }, { - "epoch": 0.7463677639046538, + "epoch": 0.7453314063872595, "grad_norm": 0.0, - "learning_rate": 3.1885576168628473e-06, - "loss": 0.806, + "learning_rate": 3.213087794297891e-06, + "loss": 0.7012, "step": 26302 }, { - "epoch": 0.7463961407491487, + "epoch": 0.745359743829522, "grad_norm": 0.0, - "learning_rate": 3.1878847433121005e-06, - "loss": 0.8175, + "learning_rate": 3.2124137766604735e-06, + "loss": 0.7343, "step": 26303 }, { - "epoch": 0.7464245175936436, + "epoch": 0.7453880812717845, "grad_norm": 0.0, - "learning_rate": 3.1872119273035905e-06, - "loss": 0.952, + "learning_rate": 3.211739816197419e-06, + "loss": 0.7837, "step": 26304 }, { - "epoch": 0.7464528944381384, + "epoch": 0.7454164187140468, "grad_norm": 0.0, - "learning_rate": 3.1865391688430147e-06, - "loss": 0.7627, + "learning_rate": 3.211065912914397e-06, + "loss": 0.6702, "step": 26305 }, { - "epoch": 0.7464812712826334, + "epoch": 0.7454447561563093, "grad_norm": 0.0, - "learning_rate": 3.185866467936045e-06, - "loss": 0.7404, + "learning_rate": 3.2103920668170916e-06, + "loss": 0.6604, "step": 26306 }, { - "epoch": 0.7465096481271283, + "epoch": 0.7454730935985718, "grad_norm": 0.0, - "learning_rate": 3.1851938245883686e-06, - "loss": 0.8472, + "learning_rate": 3.209718277911171e-06, + "loss": 0.7401, "step": 26307 }, { - "epoch": 0.7465380249716231, + "epoch": 0.7455014310408342, "grad_norm": 0.0, - "learning_rate": 3.184521238805668e-06, - "loss": 0.7607, + "learning_rate": 3.2090445462023156e-06, + "loss": 0.744, "step": 26308 }, { - "epoch": 0.746566401816118, + "epoch": 0.7455297684830967, "grad_norm": 0.0, - "learning_rate": 3.1838487105936212e-06, - "loss": 0.925, + "learning_rate": 3.208370871696199e-06, + "loss": 0.7884, "step": 26309 }, { - "epoch": 0.746594778660613, + "epoch": 0.7455581059253592, "grad_norm": 0.0, - "learning_rate": 3.1831762399579093e-06, - "loss": 0.7825, + "learning_rate": 3.2076972543984975e-06, + "loss": 0.8614, "step": 26310 }, { - "epoch": 0.7466231555051078, + "epoch": 0.7455864433676216, "grad_norm": 0.0, - "learning_rate": 3.1825038269042184e-06, - "loss": 0.8647, + "learning_rate": 3.2070236943148834e-06, + "loss": 0.8351, "step": 26311 }, { - "epoch": 0.7466515323496027, + "epoch": 0.7456147808098841, "grad_norm": 0.0, - "learning_rate": 3.1818314714382202e-06, - "loss": 0.8954, + "learning_rate": 3.2063501914510355e-06, + "loss": 0.8477, "step": 26312 }, { - "epoch": 0.7466799091940977, + "epoch": 0.7456431182521466, "grad_norm": 0.0, - "learning_rate": 3.181159173565599e-06, - "loss": 0.8253, + "learning_rate": 3.205676745812619e-06, + "loss": 0.7808, "step": 26313 }, { - "epoch": 0.7467082860385925, + "epoch": 0.7456714556944091, "grad_norm": 0.0, - "learning_rate": 3.180486933292035e-06, - "loss": 0.8238, + "learning_rate": 3.205003357405313e-06, + "loss": 0.8806, "step": 26314 }, { - "epoch": 0.7467366628830874, + "epoch": 0.7456997931366715, "grad_norm": 0.0, - "learning_rate": 3.1798147506232023e-06, - "loss": 0.8812, + "learning_rate": 3.2043300262347842e-06, + "loss": 0.8377, "step": 26315 }, { - "epoch": 0.7467650397275823, + "epoch": 0.745728130578934, "grad_norm": 0.0, - "learning_rate": 3.1791426255647805e-06, - "loss": 0.8659, + "learning_rate": 3.2036567523067074e-06, + "loss": 0.783, "step": 26316 }, { - "epoch": 0.7467934165720772, + "epoch": 0.7457564680211964, "grad_norm": 0.0, - "learning_rate": 3.1784705581224508e-06, - "loss": 0.8369, + "learning_rate": 3.2029835356267567e-06, + "loss": 0.8602, "step": 26317 }, { - "epoch": 0.7468217934165721, + "epoch": 0.7457848054634588, "grad_norm": 0.0, - "learning_rate": 3.1777985483018835e-06, - "loss": 0.8086, + "learning_rate": 3.202310376200596e-06, + "loss": 0.7524, "step": 26318 }, { - "epoch": 0.7468501702610669, + "epoch": 0.7458131429057213, "grad_norm": 0.0, - "learning_rate": 3.1771265961087548e-06, - "loss": 0.8383, + "learning_rate": 3.2016372740339e-06, + "loss": 0.7508, "step": 26319 }, { - "epoch": 0.7468785471055619, + "epoch": 0.7458414803479838, "grad_norm": 0.0, - "learning_rate": 3.176454701548751e-06, - "loss": 0.848, + "learning_rate": 3.2009642291323397e-06, + "loss": 0.8872, "step": 26320 }, { - "epoch": 0.7469069239500568, + "epoch": 0.7458698177902463, "grad_norm": 0.0, - "learning_rate": 3.175782864627539e-06, - "loss": 0.7825, + "learning_rate": 3.20029124150158e-06, + "loss": 0.8483, "step": 26321 }, { - "epoch": 0.7469353007945516, + "epoch": 0.7458981552325087, "grad_norm": 0.0, - "learning_rate": 3.175111085350795e-06, - "loss": 0.8287, + "learning_rate": 3.199618311147292e-06, + "loss": 0.7885, "step": 26322 }, { - "epoch": 0.7469636776390466, + "epoch": 0.7459264926747712, "grad_norm": 0.0, - "learning_rate": 3.174439363724199e-06, - "loss": 0.8706, + "learning_rate": 3.198945438075144e-06, + "loss": 0.8075, "step": 26323 }, { - "epoch": 0.7469920544835414, + "epoch": 0.7459548301170337, "grad_norm": 0.0, - "learning_rate": 3.173767699753416e-06, - "loss": 0.8994, + "learning_rate": 3.1982726222908046e-06, + "loss": 0.7656, "step": 26324 }, { - "epoch": 0.7470204313280363, + "epoch": 0.7459831675592961, "grad_norm": 0.0, - "learning_rate": 3.1730960934441234e-06, - "loss": 0.8941, + "learning_rate": 3.197599863799944e-06, + "loss": 0.8523, "step": 26325 }, { - "epoch": 0.7470488081725312, + "epoch": 0.7460115050015586, "grad_norm": 0.0, - "learning_rate": 3.1724245448019996e-06, - "loss": 0.8326, + "learning_rate": 3.196927162608222e-06, + "loss": 0.754, "step": 26326 }, { - "epoch": 0.7470771850170261, + "epoch": 0.746039842443821, "grad_norm": 0.0, - "learning_rate": 3.171753053832709e-06, - "loss": 0.8236, + "learning_rate": 3.1962545187213123e-06, + "loss": 0.8722, "step": 26327 }, { - "epoch": 0.747105561861521, + "epoch": 0.7460681798860834, "grad_norm": 0.0, - "learning_rate": 3.171081620541927e-06, - "loss": 0.832, + "learning_rate": 3.1955819321448744e-06, + "loss": 0.7943, "step": 26328 }, { - "epoch": 0.7471339387060159, + "epoch": 0.7460965173283459, "grad_norm": 0.0, - "learning_rate": 3.170410244935329e-06, - "loss": 0.7683, + "learning_rate": 3.194909402884576e-06, + "loss": 0.8173, "step": 26329 }, { - "epoch": 0.7471623155505108, + "epoch": 0.7461248547706084, "grad_norm": 0.0, - "learning_rate": 3.16973892701858e-06, - "loss": 0.8582, + "learning_rate": 3.1942369309460864e-06, + "loss": 0.7786, "step": 26330 }, { - "epoch": 0.7471906923950057, + "epoch": 0.7461531922128709, "grad_norm": 0.0, - "learning_rate": 3.1690676667973563e-06, - "loss": 0.7669, + "learning_rate": 3.1935645163350628e-06, + "loss": 0.8167, "step": 26331 }, { - "epoch": 0.7472190692395005, + "epoch": 0.7461815296551333, "grad_norm": 0.0, - "learning_rate": 3.1683964642773215e-06, - "loss": 0.8346, + "learning_rate": 3.1928921590571726e-06, + "loss": 0.8788, "step": 26332 }, { - "epoch": 0.7472474460839955, + "epoch": 0.7462098670973958, "grad_norm": 0.0, - "learning_rate": 3.167725319464149e-06, - "loss": 0.8098, + "learning_rate": 3.192219859118083e-06, + "loss": 0.7719, "step": 26333 }, { - "epoch": 0.7472758229284904, + "epoch": 0.7462382045396583, "grad_norm": 0.0, - "learning_rate": 3.1670542323635077e-06, - "loss": 0.9046, + "learning_rate": 3.1915476165234505e-06, + "loss": 0.7803, "step": 26334 }, { - "epoch": 0.7473041997729852, + "epoch": 0.7462665419819207, "grad_norm": 0.0, - "learning_rate": 3.166383202981066e-06, - "loss": 0.8513, + "learning_rate": 3.1908754312789412e-06, + "loss": 0.8823, "step": 26335 }, { - "epoch": 0.7473325766174801, + "epoch": 0.7462948794241832, "grad_norm": 0.0, - "learning_rate": 3.165712231322493e-06, - "loss": 0.7909, + "learning_rate": 3.1902033033902156e-06, + "loss": 0.9208, "step": 26336 }, { - "epoch": 0.7473609534619751, + "epoch": 0.7463232168664456, "grad_norm": 0.0, - "learning_rate": 3.1650413173934604e-06, - "loss": 0.8374, + "learning_rate": 3.1895312328629368e-06, + "loss": 0.7367, "step": 26337 }, { - "epoch": 0.7473893303064699, + "epoch": 0.7463515543087081, "grad_norm": 0.0, - "learning_rate": 3.1643704611996262e-06, - "loss": 0.8859, + "learning_rate": 3.188859219702769e-06, + "loss": 0.7116, "step": 26338 }, { - "epoch": 0.7474177071509648, + "epoch": 0.7463798917509705, "grad_norm": 0.0, - "learning_rate": 3.1636996627466622e-06, - "loss": 0.8412, + "learning_rate": 3.1881872639153655e-06, + "loss": 0.7538, "step": 26339 }, { - "epoch": 0.7474460839954598, + "epoch": 0.746408229193233, "grad_norm": 0.0, - "learning_rate": 3.163028922040239e-06, - "loss": 0.8038, + "learning_rate": 3.1875153655063907e-06, + "loss": 0.9167, "step": 26340 }, { - "epoch": 0.7474744608399546, + "epoch": 0.7464365666354955, "grad_norm": 0.0, - "learning_rate": 3.162358239086013e-06, - "loss": 0.8786, + "learning_rate": 3.1868435244815057e-06, + "loss": 0.8076, "step": 26341 }, { - "epoch": 0.7475028376844495, + "epoch": 0.7464649040777579, "grad_norm": 0.0, - "learning_rate": 3.161687613889655e-06, - "loss": 0.7886, + "learning_rate": 3.1861717408463656e-06, + "loss": 0.8445, "step": 26342 }, { - "epoch": 0.7475312145289443, + "epoch": 0.7464932415200204, "grad_norm": 0.0, - "learning_rate": 3.1610170464568325e-06, - "loss": 0.7258, + "learning_rate": 3.185500014606634e-06, + "loss": 0.7734, "step": 26343 }, { - "epoch": 0.7475595913734393, + "epoch": 0.7465215789622829, "grad_norm": 0.0, - "learning_rate": 3.160346536793203e-06, - "loss": 0.8402, + "learning_rate": 3.184828345767963e-06, + "loss": 0.8076, "step": 26344 }, { - "epoch": 0.7475879682179342, + "epoch": 0.7465499164045454, "grad_norm": 0.0, - "learning_rate": 3.159676084904434e-06, - "loss": 0.7585, + "learning_rate": 3.1841567343360136e-06, + "loss": 0.8234, "step": 26345 }, { - "epoch": 0.747616345062429, + "epoch": 0.7465782538468078, "grad_norm": 0.0, - "learning_rate": 3.1590056907961918e-06, - "loss": 0.8353, + "learning_rate": 3.183485180316447e-06, + "loss": 0.9036, "step": 26346 }, { - "epoch": 0.747644721906924, + "epoch": 0.7466065912890703, "grad_norm": 0.0, - "learning_rate": 3.1583353544741322e-06, - "loss": 0.8326, + "learning_rate": 3.1828136837149128e-06, + "loss": 0.8834, "step": 26347 }, { - "epoch": 0.7476730987514189, + "epoch": 0.7466349287313327, "grad_norm": 0.0, - "learning_rate": 3.157665075943922e-06, - "loss": 0.7802, + "learning_rate": 3.1821422445370688e-06, + "loss": 0.8256, "step": 26348 }, { - "epoch": 0.7477014755959137, + "epoch": 0.7466632661735951, "grad_norm": 0.0, - "learning_rate": 3.156994855211226e-06, - "loss": 0.8159, + "learning_rate": 3.1814708627885736e-06, + "loss": 0.8386, "step": 26349 }, { - "epoch": 0.7477298524404086, + "epoch": 0.7466916036158576, "grad_norm": 0.0, - "learning_rate": 3.1563246922816947e-06, - "loss": 0.882, + "learning_rate": 3.180799538475081e-06, + "loss": 0.7784, "step": 26350 }, { - "epoch": 0.7477582292849035, + "epoch": 0.7467199410581201, "grad_norm": 0.0, - "learning_rate": 3.1556545871610034e-06, - "loss": 0.8721, + "learning_rate": 3.1801282716022498e-06, + "loss": 0.7423, "step": 26351 }, { - "epoch": 0.7477866061293984, + "epoch": 0.7467482785003825, "grad_norm": 0.0, - "learning_rate": 3.154984539854803e-06, - "loss": 0.8494, + "learning_rate": 3.179457062175727e-06, + "loss": 0.6747, "step": 26352 }, { - "epoch": 0.7478149829738933, + "epoch": 0.746776615942645, "grad_norm": 0.0, - "learning_rate": 3.1543145503687546e-06, - "loss": 0.6747, + "learning_rate": 3.178785910201171e-06, + "loss": 0.7979, "step": 26353 }, { - "epoch": 0.7478433598183882, + "epoch": 0.7468049533849075, "grad_norm": 0.0, - "learning_rate": 3.1536446187085236e-06, - "loss": 0.8276, + "learning_rate": 3.1781148156842368e-06, + "loss": 0.8829, "step": 26354 }, { - "epoch": 0.7478717366628831, + "epoch": 0.74683329082717, "grad_norm": 0.0, - "learning_rate": 3.1529747448797598e-06, - "loss": 0.7904, + "learning_rate": 3.177443778630571e-06, + "loss": 0.746, "step": 26355 }, { - "epoch": 0.747900113507378, + "epoch": 0.7468616282694324, "grad_norm": 0.0, - "learning_rate": 3.152304928888126e-06, - "loss": 0.7101, + "learning_rate": 3.176772799045834e-06, + "loss": 0.9665, "step": 26356 }, { - "epoch": 0.7479284903518729, + "epoch": 0.7468899657116949, "grad_norm": 0.0, - "learning_rate": 3.1516351707392843e-06, - "loss": 0.8998, + "learning_rate": 3.176101876935669e-06, + "loss": 0.8636, "step": 26357 }, { - "epoch": 0.7479568671963678, + "epoch": 0.7469183031539574, "grad_norm": 0.0, - "learning_rate": 3.150965470438885e-06, - "loss": 0.8059, + "learning_rate": 3.175431012305733e-06, + "loss": 0.8409, "step": 26358 }, { - "epoch": 0.7479852440408626, + "epoch": 0.7469466405962197, "grad_norm": 0.0, - "learning_rate": 3.150295827992588e-06, - "loss": 0.8364, + "learning_rate": 3.1747602051616787e-06, + "loss": 0.9571, "step": 26359 }, { - "epoch": 0.7480136208853575, + "epoch": 0.7469749780384822, "grad_norm": 0.0, - "learning_rate": 3.1496262434060522e-06, - "loss": 0.7401, + "learning_rate": 3.1740894555091504e-06, + "loss": 0.8835, "step": 26360 }, { - "epoch": 0.7480419977298525, + "epoch": 0.7470033154807447, "grad_norm": 0.0, - "learning_rate": 3.14895671668493e-06, - "loss": 0.8256, + "learning_rate": 3.173418763353802e-06, + "loss": 0.8127, "step": 26361 }, { - "epoch": 0.7480703745743473, + "epoch": 0.7470316529230072, "grad_norm": 0.0, - "learning_rate": 3.1482872478348768e-06, - "loss": 0.8109, + "learning_rate": 3.172748128701281e-06, + "loss": 0.8152, "step": 26362 }, { - "epoch": 0.7480987514188422, + "epoch": 0.7470599903652696, "grad_norm": 0.0, - "learning_rate": 3.147617836861554e-06, - "loss": 0.8349, + "learning_rate": 3.172077551557239e-06, + "loss": 0.8197, "step": 26363 }, { - "epoch": 0.7481271282633372, + "epoch": 0.7470883278075321, "grad_norm": 0.0, - "learning_rate": 3.1469484837706065e-06, - "loss": 0.8709, + "learning_rate": 3.171407031927325e-06, + "loss": 0.6929, "step": 26364 }, { - "epoch": 0.748155505107832, + "epoch": 0.7471166652497946, "grad_norm": 0.0, - "learning_rate": 3.1462791885676948e-06, - "loss": 0.9258, + "learning_rate": 3.170736569817183e-06, + "loss": 0.8228, "step": 26365 }, { - "epoch": 0.7481838819523269, + "epoch": 0.747145002692057, "grad_norm": 0.0, - "learning_rate": 3.14560995125847e-06, - "loss": 0.7668, + "learning_rate": 3.170066165232464e-06, + "loss": 0.8213, "step": 26366 }, { - "epoch": 0.7482122587968217, + "epoch": 0.7471733401343195, "grad_norm": 0.0, - "learning_rate": 3.144940771848586e-06, - "loss": 0.796, + "learning_rate": 3.1693958181788154e-06, + "loss": 0.7896, "step": 26367 }, { - "epoch": 0.7482406356413167, + "epoch": 0.747201677576582, "grad_norm": 0.0, - "learning_rate": 3.144271650343699e-06, - "loss": 0.8766, + "learning_rate": 3.16872552866188e-06, + "loss": 0.7833, "step": 26368 }, { - "epoch": 0.7482690124858116, + "epoch": 0.7472300150188445, "grad_norm": 0.0, - "learning_rate": 3.1436025867494545e-06, - "loss": 0.8717, + "learning_rate": 3.1680552966873057e-06, + "loss": 0.744, "step": 26369 }, { - "epoch": 0.7482973893303064, + "epoch": 0.7472583524611068, "grad_norm": 0.0, - "learning_rate": 3.1429335810715067e-06, - "loss": 0.8653, + "learning_rate": 3.167385122260742e-06, + "loss": 0.7665, "step": 26370 }, { - "epoch": 0.7483257661748014, + "epoch": 0.7472866899033693, "grad_norm": 0.0, - "learning_rate": 3.1422646333155103e-06, - "loss": 0.8804, + "learning_rate": 3.166715005387827e-06, + "loss": 0.8706, "step": 26371 }, { - "epoch": 0.7483541430192963, + "epoch": 0.7473150273456318, "grad_norm": 0.0, - "learning_rate": 3.1415957434871105e-06, - "loss": 0.7904, + "learning_rate": 3.1660449460742137e-06, + "loss": 0.9063, "step": 26372 }, { - "epoch": 0.7483825198637911, + "epoch": 0.7473433647878942, "grad_norm": 0.0, - "learning_rate": 3.1409269115919593e-06, - "loss": 0.7702, + "learning_rate": 3.1653749443255367e-06, + "loss": 0.9068, "step": 26373 }, { - "epoch": 0.7484108967082861, + "epoch": 0.7473717022301567, "grad_norm": 0.0, - "learning_rate": 3.14025813763571e-06, - "loss": 0.8281, + "learning_rate": 3.1647050001474454e-06, + "loss": 0.761, "step": 26374 }, { - "epoch": 0.748439273552781, + "epoch": 0.7474000396724192, "grad_norm": 0.0, - "learning_rate": 3.1395894216240054e-06, - "loss": 0.6728, + "learning_rate": 3.1640351135455814e-06, + "loss": 0.827, "step": 26375 }, { - "epoch": 0.7484676503972758, + "epoch": 0.7474283771146816, "grad_norm": 0.0, - "learning_rate": 3.1389207635624974e-06, - "loss": 0.9048, + "learning_rate": 3.163365284525589e-06, + "loss": 0.8561, "step": 26376 }, { - "epoch": 0.7484960272417707, + "epoch": 0.7474567145569441, "grad_norm": 0.0, - "learning_rate": 3.1382521634568365e-06, - "loss": 0.8, + "learning_rate": 3.162695513093109e-06, + "loss": 0.7253, "step": 26377 }, { - "epoch": 0.7485244040862656, + "epoch": 0.7474850519992066, "grad_norm": 0.0, - "learning_rate": 3.1375836213126653e-06, - "loss": 0.8628, + "learning_rate": 3.1620257992537872e-06, + "loss": 0.8561, "step": 26378 }, { - "epoch": 0.7485527809307605, + "epoch": 0.7475133894414691, "grad_norm": 0.0, - "learning_rate": 3.1369151371356343e-06, - "loss": 0.8134, + "learning_rate": 3.161356143013258e-06, + "loss": 0.7798, "step": 26379 }, { - "epoch": 0.7485811577752554, + "epoch": 0.7475417268837314, "grad_norm": 0.0, - "learning_rate": 3.1362467109313898e-06, - "loss": 0.8239, + "learning_rate": 3.1606865443771685e-06, + "loss": 0.7626, "step": 26380 }, { - "epoch": 0.7486095346197503, + "epoch": 0.7475700643259939, "grad_norm": 0.0, - "learning_rate": 3.1355783427055773e-06, - "loss": 0.7785, + "learning_rate": 3.1600170033511525e-06, + "loss": 0.7842, "step": 26381 }, { - "epoch": 0.7486379114642452, + "epoch": 0.7475984017682564, "grad_norm": 0.0, - "learning_rate": 3.134910032463846e-06, - "loss": 0.762, + "learning_rate": 3.159347519940853e-06, + "loss": 0.8311, "step": 26382 }, { - "epoch": 0.74866628830874, + "epoch": 0.7476267392105188, "grad_norm": 0.0, - "learning_rate": 3.1342417802118365e-06, - "loss": 0.8149, + "learning_rate": 3.1586780941519135e-06, + "loss": 0.8532, "step": 26383 }, { - "epoch": 0.7486946651532349, + "epoch": 0.7476550766527813, "grad_norm": 0.0, - "learning_rate": 3.133573585955194e-06, - "loss": 0.9041, + "learning_rate": 3.1580087259899662e-06, + "loss": 0.9638, "step": 26384 }, { - "epoch": 0.7487230419977299, + "epoch": 0.7476834140950438, "grad_norm": 0.0, - "learning_rate": 3.132905449699567e-06, - "loss": 0.8137, + "learning_rate": 3.157339415460654e-06, + "loss": 0.7889, "step": 26385 }, { - "epoch": 0.7487514188422247, + "epoch": 0.7477117515373063, "grad_norm": 0.0, - "learning_rate": 3.132237371450594e-06, - "loss": 0.8282, + "learning_rate": 3.1566701625696108e-06, + "loss": 0.7064, "step": 26386 }, { - "epoch": 0.7487797956867196, + "epoch": 0.7477400889795687, "grad_norm": 0.0, - "learning_rate": 3.13156935121392e-06, - "loss": 0.7822, + "learning_rate": 3.1560009673224758e-06, + "loss": 0.7848, "step": 26387 }, { - "epoch": 0.7488081725312146, + "epoch": 0.7477684264218312, "grad_norm": 0.0, - "learning_rate": 3.130901388995192e-06, - "loss": 0.8076, + "learning_rate": 3.1553318297248847e-06, + "loss": 0.7417, "step": 26388 }, { - "epoch": 0.7488365493757094, + "epoch": 0.7477967638640937, "grad_norm": 0.0, - "learning_rate": 3.130233484800046e-06, - "loss": 0.8082, + "learning_rate": 3.1546627497824767e-06, + "loss": 0.8725, "step": 26389 }, { - "epoch": 0.7488649262202043, + "epoch": 0.747825101306356, "grad_norm": 0.0, - "learning_rate": 3.1295656386341267e-06, - "loss": 0.8095, + "learning_rate": 3.1539937275008857e-06, + "loss": 0.8755, "step": 26390 }, { - "epoch": 0.7488933030646993, + "epoch": 0.7478534387486185, "grad_norm": 0.0, - "learning_rate": 3.1288978505030777e-06, - "loss": 0.7326, + "learning_rate": 3.1533247628857523e-06, + "loss": 0.8289, "step": 26391 }, { - "epoch": 0.7489216799091941, + "epoch": 0.747881776190881, "grad_norm": 0.0, - "learning_rate": 3.1282301204125342e-06, - "loss": 0.7857, + "learning_rate": 3.1526558559427023e-06, + "loss": 0.7409, "step": 26392 }, { - "epoch": 0.748950056753689, + "epoch": 0.7479101136331435, "grad_norm": 0.0, - "learning_rate": 3.12756244836814e-06, - "loss": 0.7939, + "learning_rate": 3.1519870066773783e-06, + "loss": 0.8721, "step": 26393 }, { - "epoch": 0.7489784335981838, + "epoch": 0.7479384510754059, "grad_norm": 0.0, - "learning_rate": 3.126894834375539e-06, - "loss": 0.8559, + "learning_rate": 3.1513182150954067e-06, + "loss": 0.8368, "step": 26394 }, { - "epoch": 0.7490068104426788, + "epoch": 0.7479667885176684, "grad_norm": 0.0, - "learning_rate": 3.1262272784403613e-06, - "loss": 0.871, + "learning_rate": 3.150649481202426e-06, + "loss": 0.857, "step": 26395 }, { - "epoch": 0.7490351872871737, + "epoch": 0.7479951259599309, "grad_norm": 0.0, - "learning_rate": 3.1255597805682515e-06, - "loss": 0.8151, + "learning_rate": 3.1499808050040713e-06, + "loss": 0.8633, "step": 26396 }, { - "epoch": 0.7490635641316685, + "epoch": 0.7480234634021933, "grad_norm": 0.0, - "learning_rate": 3.124892340764848e-06, - "loss": 0.8741, + "learning_rate": 3.1493121865059684e-06, + "loss": 0.7414, "step": 26397 }, { - "epoch": 0.7490919409761635, + "epoch": 0.7480518008444558, "grad_norm": 0.0, - "learning_rate": 3.1242249590357877e-06, - "loss": 0.8159, + "learning_rate": 3.148643625713753e-06, + "loss": 0.9167, "step": 26398 }, { - "epoch": 0.7491203178206584, + "epoch": 0.7480801382867183, "grad_norm": 0.0, - "learning_rate": 3.12355763538671e-06, - "loss": 0.7676, + "learning_rate": 3.1479751226330567e-06, + "loss": 0.8107, "step": 26399 }, { - "epoch": 0.7491486946651532, + "epoch": 0.7481084757289806, "grad_norm": 0.0, - "learning_rate": 3.122890369823248e-06, - "loss": 0.7603, + "learning_rate": 3.1473066772695105e-06, + "loss": 0.7158, "step": 26400 }, { - "epoch": 0.7491770715096481, + "epoch": 0.7481368131712431, "grad_norm": 0.0, - "learning_rate": 3.12222316235104e-06, - "loss": 0.8843, + "learning_rate": 3.1466382896287474e-06, + "loss": 0.9784, "step": 26401 }, { - "epoch": 0.749205448354143, + "epoch": 0.7481651506135056, "grad_norm": 0.0, - "learning_rate": 3.121556012975726e-06, - "loss": 0.7987, + "learning_rate": 3.1459699597163917e-06, + "loss": 0.8781, "step": 26402 }, { - "epoch": 0.7492338251986379, + "epoch": 0.7481934880557681, "grad_norm": 0.0, - "learning_rate": 3.1208889217029336e-06, - "loss": 0.7124, + "learning_rate": 3.145301687538077e-06, + "loss": 0.8453, "step": 26403 }, { - "epoch": 0.7492622020431328, + "epoch": 0.7482218254980305, "grad_norm": 0.0, - "learning_rate": 3.120221888538303e-06, - "loss": 0.9607, + "learning_rate": 3.144633473099434e-06, + "loss": 0.8829, "step": 26404 }, { - "epoch": 0.7492905788876277, + "epoch": 0.748250162940293, "grad_norm": 0.0, - "learning_rate": 3.119554913487469e-06, - "loss": 0.8155, + "learning_rate": 3.143965316406087e-06, + "loss": 0.8037, "step": 26405 }, { - "epoch": 0.7493189557321226, + "epoch": 0.7482785003825555, "grad_norm": 0.0, - "learning_rate": 3.1188879965560614e-06, - "loss": 0.9381, + "learning_rate": 3.1432972174636646e-06, + "loss": 0.9703, "step": 26406 }, { - "epoch": 0.7493473325766175, + "epoch": 0.7483068378248179, "grad_norm": 0.0, - "learning_rate": 3.118221137749716e-06, - "loss": 0.8076, + "learning_rate": 3.1426291762777994e-06, + "loss": 0.8461, "step": 26407 }, { - "epoch": 0.7493757094211124, + "epoch": 0.7483351752670804, "grad_norm": 0.0, - "learning_rate": 3.117554337074069e-06, - "loss": 0.8197, + "learning_rate": 3.141961192854113e-06, + "loss": 0.7662, "step": 26408 }, { - "epoch": 0.7494040862656073, + "epoch": 0.7483635127093429, "grad_norm": 0.0, - "learning_rate": 3.116887594534748e-06, - "loss": 0.7543, + "learning_rate": 3.1412932671982368e-06, + "loss": 0.8395, "step": 26409 }, { - "epoch": 0.7494324631101021, + "epoch": 0.7483918501516054, "grad_norm": 0.0, - "learning_rate": 3.116220910137385e-06, - "loss": 0.7994, + "learning_rate": 3.14062539931579e-06, + "loss": 0.8722, "step": 26410 }, { - "epoch": 0.749460839954597, + "epoch": 0.7484201875938677, "grad_norm": 0.0, - "learning_rate": 3.115554283887614e-06, - "loss": 0.8172, + "learning_rate": 3.1399575892124035e-06, + "loss": 0.7667, "step": 26411 }, { - "epoch": 0.749489216799092, + "epoch": 0.7484485250361302, "grad_norm": 0.0, - "learning_rate": 3.114887715791066e-06, - "loss": 0.7918, + "learning_rate": 3.139289836893702e-06, + "loss": 0.8328, "step": 26412 }, { - "epoch": 0.7495175936435868, + "epoch": 0.7484768624783927, "grad_norm": 0.0, - "learning_rate": 3.1142212058533738e-06, - "loss": 0.748, + "learning_rate": 3.1386221423653096e-06, + "loss": 0.7208, "step": 26413 }, { - "epoch": 0.7495459704880817, + "epoch": 0.7485051999206551, "grad_norm": 0.0, - "learning_rate": 3.1135547540801625e-06, - "loss": 0.8632, + "learning_rate": 3.137954505632854e-06, + "loss": 0.8379, "step": 26414 }, { - "epoch": 0.7495743473325767, + "epoch": 0.7485335373629176, "grad_norm": 0.0, - "learning_rate": 3.112888360477062e-06, - "loss": 0.7155, + "learning_rate": 3.1372869267019525e-06, + "loss": 0.8113, "step": 26415 }, { - "epoch": 0.7496027241770715, + "epoch": 0.7485618748051801, "grad_norm": 0.0, - "learning_rate": 3.112222025049707e-06, - "loss": 0.8603, + "learning_rate": 3.136619405578232e-06, + "loss": 0.7983, "step": 26416 }, { - "epoch": 0.7496311010215664, + "epoch": 0.7485902122474426, "grad_norm": 0.0, - "learning_rate": 3.1115557478037196e-06, - "loss": 0.8072, + "learning_rate": 3.135951942267317e-06, + "loss": 0.785, "step": 26417 }, { - "epoch": 0.7496594778660612, + "epoch": 0.748618549689705, "grad_norm": 0.0, - "learning_rate": 3.1108895287447303e-06, - "loss": 0.789, + "learning_rate": 3.135284536774825e-06, + "loss": 0.7421, "step": 26418 }, { - "epoch": 0.7496878547105562, + "epoch": 0.7486468871319675, "grad_norm": 0.0, - "learning_rate": 3.11022336787837e-06, - "loss": 0.8193, + "learning_rate": 3.13461718910638e-06, + "loss": 0.7493, "step": 26419 }, { - "epoch": 0.7497162315550511, + "epoch": 0.74867522457423, "grad_norm": 0.0, - "learning_rate": 3.109557265210259e-06, - "loss": 0.8514, + "learning_rate": 3.1339498992676087e-06, + "loss": 0.8804, "step": 26420 }, { - "epoch": 0.7497446083995459, + "epoch": 0.7487035620164924, "grad_norm": 0.0, - "learning_rate": 3.1088912207460276e-06, - "loss": 0.8309, + "learning_rate": 3.1332826672641227e-06, + "loss": 0.8482, "step": 26421 }, { - "epoch": 0.7497729852440409, + "epoch": 0.7487318994587548, "grad_norm": 0.0, - "learning_rate": 3.1082252344913045e-06, - "loss": 0.8112, + "learning_rate": 3.1326154931015496e-06, + "loss": 0.8099, "step": 26422 }, { - "epoch": 0.7498013620885358, + "epoch": 0.7487602369010173, "grad_norm": 0.0, - "learning_rate": 3.1075593064517105e-06, - "loss": 0.7551, + "learning_rate": 3.1319483767855042e-06, + "loss": 0.972, "step": 26423 }, { - "epoch": 0.7498297389330306, + "epoch": 0.7487885743432797, "grad_norm": 0.0, - "learning_rate": 3.1068934366328727e-06, - "loss": 0.8665, + "learning_rate": 3.131281318321607e-06, + "loss": 0.9625, "step": 26424 }, { - "epoch": 0.7498581157775256, + "epoch": 0.7488169117855422, "grad_norm": 0.0, - "learning_rate": 3.106227625040419e-06, - "loss": 0.9057, + "learning_rate": 3.130614317715478e-06, + "loss": 0.7817, "step": 26425 }, { - "epoch": 0.7498864926220205, + "epoch": 0.7488452492278047, "grad_norm": 0.0, - "learning_rate": 3.105561871679966e-06, - "loss": 0.8691, + "learning_rate": 3.129947374972736e-06, + "loss": 0.9031, "step": 26426 }, { - "epoch": 0.7499148694665153, + "epoch": 0.7488735866700672, "grad_norm": 0.0, - "learning_rate": 3.104896176557141e-06, - "loss": 0.8053, + "learning_rate": 3.1292804900989983e-06, + "loss": 0.8438, "step": 26427 }, { - "epoch": 0.7499432463110102, + "epoch": 0.7489019241123296, "grad_norm": 0.0, - "learning_rate": 3.1042305396775728e-06, - "loss": 0.9173, + "learning_rate": 3.128613663099885e-06, + "loss": 0.8067, "step": 26428 }, { - "epoch": 0.7499716231555051, + "epoch": 0.7489302615545921, "grad_norm": 0.0, - "learning_rate": 3.1035649610468753e-06, - "loss": 0.8286, + "learning_rate": 3.127946893981009e-06, + "loss": 0.92, "step": 26429 }, { - "epoch": 0.75, + "epoch": 0.7489585989968546, "grad_norm": 0.0, - "learning_rate": 3.1028994406706757e-06, - "loss": 0.8642, + "learning_rate": 3.1272801827479894e-06, + "loss": 0.8387, "step": 26430 }, { - "epoch": 0.7500283768444949, + "epoch": 0.748986936439117, "grad_norm": 0.0, - "learning_rate": 3.1022339785545986e-06, - "loss": 0.8142, + "learning_rate": 3.1266135294064383e-06, + "loss": 0.6937, "step": 26431 }, { - "epoch": 0.7500567536889898, + "epoch": 0.7490152738813795, "grad_norm": 0.0, - "learning_rate": 3.101568574704257e-06, - "loss": 0.8024, + "learning_rate": 3.125946933961974e-06, + "loss": 0.8052, "step": 26432 }, { - "epoch": 0.7500851305334847, + "epoch": 0.749043611323642, "grad_norm": 0.0, - "learning_rate": 3.100903229125275e-06, - "loss": 0.706, + "learning_rate": 3.125280396420214e-06, + "loss": 0.8303, "step": 26433 }, { - "epoch": 0.7501135073779795, + "epoch": 0.7490719487659044, "grad_norm": 0.0, - "learning_rate": 3.1002379418232773e-06, - "loss": 0.8231, + "learning_rate": 3.124613916786767e-06, + "loss": 0.8443, "step": 26434 }, { - "epoch": 0.7501418842224744, + "epoch": 0.7491002862081668, "grad_norm": 0.0, - "learning_rate": 3.0995727128038775e-06, - "loss": 0.8209, + "learning_rate": 3.123947495067251e-06, + "loss": 0.8054, "step": 26435 }, { - "epoch": 0.7501702610669694, + "epoch": 0.7491286236504293, "grad_norm": 0.0, - "learning_rate": 3.0989075420726967e-06, - "loss": 0.7974, + "learning_rate": 3.1232811312672817e-06, + "loss": 0.8101, "step": 26436 }, { - "epoch": 0.7501986379114642, + "epoch": 0.7491569610926918, "grad_norm": 0.0, - "learning_rate": 3.0982424296353576e-06, - "loss": 0.7863, + "learning_rate": 3.122614825392465e-06, + "loss": 0.8535, "step": 26437 }, { - "epoch": 0.7502270147559591, + "epoch": 0.7491852985349542, "grad_norm": 0.0, - "learning_rate": 3.0975773754974705e-06, - "loss": 0.9217, + "learning_rate": 3.121948577448418e-06, + "loss": 0.7896, "step": 26438 }, { - "epoch": 0.7502553916004541, + "epoch": 0.7492136359772167, "grad_norm": 0.0, - "learning_rate": 3.0969123796646617e-06, - "loss": 0.9422, + "learning_rate": 3.1212823874407517e-06, + "loss": 0.778, "step": 26439 }, { - "epoch": 0.7502837684449489, + "epoch": 0.7492419734194792, "grad_norm": 0.0, - "learning_rate": 3.0962474421425413e-06, - "loss": 0.7675, + "learning_rate": 3.1206162553750785e-06, + "loss": 0.7732, "step": 26440 }, { - "epoch": 0.7503121452894438, + "epoch": 0.7492703108617417, "grad_norm": 0.0, - "learning_rate": 3.0955825629367288e-06, - "loss": 0.8298, + "learning_rate": 3.1199501812570133e-06, + "loss": 0.8661, "step": 26441 }, { - "epoch": 0.7503405221339388, + "epoch": 0.7492986483040041, "grad_norm": 0.0, - "learning_rate": 3.094917742052841e-06, - "loss": 0.8341, + "learning_rate": 3.119284165092158e-06, + "loss": 0.7926, "step": 26442 }, { - "epoch": 0.7503688989784336, + "epoch": 0.7493269857462665, "grad_norm": 0.0, - "learning_rate": 3.0942529794964926e-06, - "loss": 0.7798, + "learning_rate": 3.1186182068861306e-06, + "loss": 0.8074, "step": 26443 }, { - "epoch": 0.7503972758229285, + "epoch": 0.749355323188529, "grad_norm": 0.0, - "learning_rate": 3.0935882752733005e-06, - "loss": 0.8249, + "learning_rate": 3.117952306644535e-06, + "loss": 0.7644, "step": 26444 }, { - "epoch": 0.7504256526674233, + "epoch": 0.7493836606307914, "grad_norm": 0.0, - "learning_rate": 3.092923629388882e-06, - "loss": 0.7832, + "learning_rate": 3.1172864643729815e-06, + "loss": 0.7837, "step": 26445 }, { - "epoch": 0.7504540295119183, + "epoch": 0.7494119980730539, "grad_norm": 0.0, - "learning_rate": 3.092259041848845e-06, - "loss": 0.8572, + "learning_rate": 3.1166206800770847e-06, + "loss": 0.7702, "step": 26446 }, { - "epoch": 0.7504824063564132, + "epoch": 0.7494403355153164, "grad_norm": 0.0, - "learning_rate": 3.0915945126588063e-06, - "loss": 0.8484, + "learning_rate": 3.1159549537624434e-06, + "loss": 0.8267, "step": 26447 }, { - "epoch": 0.750510783200908, + "epoch": 0.7494686729575788, "grad_norm": 0.0, - "learning_rate": 3.090930041824383e-06, - "loss": 0.7743, + "learning_rate": 3.1152892854346707e-06, + "loss": 0.9099, "step": 26448 }, { - "epoch": 0.750539160045403, + "epoch": 0.7494970103998413, "grad_norm": 0.0, - "learning_rate": 3.090265629351179e-06, - "loss": 0.7241, + "learning_rate": 3.1146236750993763e-06, + "loss": 0.9514, "step": 26449 }, { - "epoch": 0.7505675368898979, + "epoch": 0.7495253478421038, "grad_norm": 0.0, - "learning_rate": 3.0896012752448134e-06, - "loss": 0.7696, + "learning_rate": 3.1139581227621595e-06, + "loss": 0.8658, "step": 26450 }, { - "epoch": 0.7505959137343927, + "epoch": 0.7495536852843663, "grad_norm": 0.0, - "learning_rate": 3.088936979510899e-06, - "loss": 0.8244, + "learning_rate": 3.113292628428631e-06, + "loss": 0.8081, "step": 26451 }, { - "epoch": 0.7506242905788876, + "epoch": 0.7495820227266287, "grad_norm": 0.0, - "learning_rate": 3.0882727421550428e-06, - "loss": 0.8245, + "learning_rate": 3.112627192104396e-06, + "loss": 0.8371, "step": 26452 }, { - "epoch": 0.7506526674233825, + "epoch": 0.7496103601688912, "grad_norm": 0.0, - "learning_rate": 3.0876085631828568e-06, - "loss": 0.8887, + "learning_rate": 3.1119618137950593e-06, + "loss": 0.8218, "step": 26453 }, { - "epoch": 0.7506810442678774, + "epoch": 0.7496386976111536, "grad_norm": 0.0, - "learning_rate": 3.086944442599954e-06, - "loss": 0.8218, + "learning_rate": 3.1112964935062297e-06, + "loss": 0.8061, "step": 26454 }, { - "epoch": 0.7507094211123723, + "epoch": 0.749667035053416, "grad_norm": 0.0, - "learning_rate": 3.08628038041194e-06, - "loss": 0.8075, + "learning_rate": 3.110631231243505e-06, + "loss": 0.842, "step": 26455 }, { - "epoch": 0.7507377979568672, + "epoch": 0.7496953724956785, "grad_norm": 0.0, - "learning_rate": 3.085616376624426e-06, - "loss": 0.7523, + "learning_rate": 3.1099660270124908e-06, + "loss": 0.7786, "step": 26456 }, { - "epoch": 0.7507661748013621, + "epoch": 0.749723709937941, "grad_norm": 0.0, - "learning_rate": 3.0849524312430223e-06, - "loss": 0.8047, + "learning_rate": 3.1093008808187952e-06, + "loss": 0.7883, "step": 26457 }, { - "epoch": 0.750794551645857, + "epoch": 0.7497520473802035, "grad_norm": 0.0, - "learning_rate": 3.084288544273336e-06, - "loss": 0.8073, + "learning_rate": 3.1086357926680134e-06, + "loss": 0.7974, "step": 26458 }, { - "epoch": 0.7508229284903519, + "epoch": 0.7497803848224659, "grad_norm": 0.0, - "learning_rate": 3.0836247157209775e-06, - "loss": 0.8615, + "learning_rate": 3.107970762565755e-06, + "loss": 0.8358, "step": 26459 }, { - "epoch": 0.7508513053348468, + "epoch": 0.7498087222647284, "grad_norm": 0.0, - "learning_rate": 3.08296094559155e-06, - "loss": 0.8445, + "learning_rate": 3.107305790517614e-06, + "loss": 0.7216, "step": 26460 }, { - "epoch": 0.7508796821793416, + "epoch": 0.7498370597069909, "grad_norm": 0.0, - "learning_rate": 3.082297233890661e-06, - "loss": 0.8721, + "learning_rate": 3.1066408765291966e-06, + "loss": 0.7375, "step": 26461 }, { - "epoch": 0.7509080590238365, + "epoch": 0.7498653971492533, "grad_norm": 0.0, - "learning_rate": 3.081633580623923e-06, - "loss": 0.8781, + "learning_rate": 3.105976020606106e-06, + "loss": 0.871, "step": 26462 }, { - "epoch": 0.7509364358683315, + "epoch": 0.7498937345915158, "grad_norm": 0.0, - "learning_rate": 3.0809699857969323e-06, - "loss": 0.9177, + "learning_rate": 3.105311222753936e-06, + "loss": 0.7946, "step": 26463 }, { - "epoch": 0.7509648127128263, + "epoch": 0.7499220720337783, "grad_norm": 0.0, - "learning_rate": 3.0803064494153002e-06, - "loss": 0.7953, + "learning_rate": 3.1046464829782906e-06, + "loss": 0.7703, "step": 26464 }, { - "epoch": 0.7509931895573212, + "epoch": 0.7499504094760407, "grad_norm": 0.0, - "learning_rate": 3.079642971484633e-06, - "loss": 0.8918, + "learning_rate": 3.1039818012847676e-06, + "loss": 0.8259, "step": 26465 }, { - "epoch": 0.7510215664018162, + "epoch": 0.7499787469183031, "grad_norm": 0.0, - "learning_rate": 3.0789795520105303e-06, - "loss": 0.8409, + "learning_rate": 3.103317177678967e-06, + "loss": 0.7812, "step": 26466 }, { - "epoch": 0.751049943246311, + "epoch": 0.7500070843605656, "grad_norm": 0.0, - "learning_rate": 3.078316190998598e-06, - "loss": 0.7729, + "learning_rate": 3.1026526121664903e-06, + "loss": 0.7534, "step": 26467 }, { - "epoch": 0.7510783200908059, + "epoch": 0.7500354218028281, "grad_norm": 0.0, - "learning_rate": 3.0776528884544433e-06, - "loss": 0.7699, + "learning_rate": 3.1019881047529286e-06, + "loss": 0.8024, "step": 26468 }, { - "epoch": 0.7511066969353007, + "epoch": 0.7500637592450905, "grad_norm": 0.0, - "learning_rate": 3.076989644383663e-06, - "loss": 0.8183, + "learning_rate": 3.101323655443882e-06, + "loss": 0.7974, "step": 26469 }, { - "epoch": 0.7511350737797957, + "epoch": 0.750092096687353, "grad_norm": 0.0, - "learning_rate": 3.0763264587918616e-06, - "loss": 0.7751, + "learning_rate": 3.1006592642449516e-06, + "loss": 0.7015, "step": 26470 }, { - "epoch": 0.7511634506242906, + "epoch": 0.7501204341296155, "grad_norm": 0.0, - "learning_rate": 3.0756633316846463e-06, - "loss": 0.8914, + "learning_rate": 3.0999949311617273e-06, + "loss": 0.7338, "step": 26471 }, { - "epoch": 0.7511918274687854, + "epoch": 0.7501487715718779, "grad_norm": 0.0, - "learning_rate": 3.07500026306761e-06, - "loss": 0.8767, + "learning_rate": 3.0993306561998116e-06, + "loss": 0.8303, "step": 26472 }, { - "epoch": 0.7512202043132804, + "epoch": 0.7501771090141404, "grad_norm": 0.0, - "learning_rate": 3.0743372529463577e-06, - "loss": 0.8775, + "learning_rate": 3.0986664393647925e-06, + "loss": 0.8657, "step": 26473 }, { - "epoch": 0.7512485811577753, + "epoch": 0.7502054464564029, "grad_norm": 0.0, - "learning_rate": 3.0736743013264903e-06, - "loss": 0.8813, + "learning_rate": 3.098002280662268e-06, + "loss": 0.7922, "step": 26474 }, { - "epoch": 0.7512769580022701, + "epoch": 0.7502337838986654, "grad_norm": 0.0, - "learning_rate": 3.0730114082136077e-06, - "loss": 0.75, + "learning_rate": 3.0973381800978374e-06, + "loss": 0.845, "step": 26475 }, { - "epoch": 0.7513053348467651, + "epoch": 0.7502621213409277, "grad_norm": 0.0, - "learning_rate": 3.0723485736133117e-06, - "loss": 0.8332, + "learning_rate": 3.096674137677087e-06, + "loss": 0.8509, "step": 26476 }, { - "epoch": 0.75133371169126, + "epoch": 0.7502904587831902, "grad_norm": 0.0, - "learning_rate": 3.0716857975311953e-06, - "loss": 0.8874, + "learning_rate": 3.096010153405614e-06, + "loss": 0.843, "step": 26477 }, { - "epoch": 0.7513620885357548, + "epoch": 0.7503187962254527, "grad_norm": 0.0, - "learning_rate": 3.0710230799728613e-06, - "loss": 0.8055, + "learning_rate": 3.095346227289011e-06, + "loss": 0.8356, "step": 26478 }, { - "epoch": 0.7513904653802497, + "epoch": 0.7503471336677151, "grad_norm": 0.0, - "learning_rate": 3.0703604209439097e-06, - "loss": 0.7652, + "learning_rate": 3.094682359332871e-06, + "loss": 0.8668, "step": 26479 }, { - "epoch": 0.7514188422247446, + "epoch": 0.7503754711099776, "grad_norm": 0.0, - "learning_rate": 3.0696978204499318e-06, - "loss": 0.8062, + "learning_rate": 3.0940185495427887e-06, + "loss": 0.862, "step": 26480 }, { - "epoch": 0.7514472190692395, + "epoch": 0.7504038085522401, "grad_norm": 0.0, - "learning_rate": 3.0690352784965274e-06, - "loss": 0.9251, + "learning_rate": 3.0933547979243494e-06, + "loss": 0.7942, "step": 26481 }, { - "epoch": 0.7514755959137344, + "epoch": 0.7504321459945026, "grad_norm": 0.0, - "learning_rate": 3.068372795089297e-06, - "loss": 0.782, + "learning_rate": 3.0926911044831476e-06, + "loss": 0.8271, "step": 26482 }, { - "epoch": 0.7515039727582293, + "epoch": 0.750460483436765, "grad_norm": 0.0, - "learning_rate": 3.0677103702338306e-06, - "loss": 0.8082, + "learning_rate": 3.0920274692247765e-06, + "loss": 0.8836, "step": 26483 }, { - "epoch": 0.7515323496027242, + "epoch": 0.7504888208790275, "grad_norm": 0.0, - "learning_rate": 3.0670480039357264e-06, - "loss": 0.7542, + "learning_rate": 3.0913638921548195e-06, + "loss": 0.8419, "step": 26484 }, { - "epoch": 0.751560726447219, + "epoch": 0.75051715832129, "grad_norm": 0.0, - "learning_rate": 3.0663856962005822e-06, - "loss": 0.8415, + "learning_rate": 3.090700373278871e-06, + "loss": 0.761, "step": 26485 }, { - "epoch": 0.7515891032917139, + "epoch": 0.7505454957635523, "grad_norm": 0.0, - "learning_rate": 3.065723447033987e-06, - "loss": 0.8507, + "learning_rate": 3.090036912602522e-06, + "loss": 0.7756, "step": 26486 }, { - "epoch": 0.7516174801362089, + "epoch": 0.7505738332058148, "grad_norm": 0.0, - "learning_rate": 3.065061256441536e-06, - "loss": 0.9136, + "learning_rate": 3.089373510131354e-06, + "loss": 0.7025, "step": 26487 }, { - "epoch": 0.7516458569807037, + "epoch": 0.7506021706480773, "grad_norm": 0.0, - "learning_rate": 3.064399124428826e-06, - "loss": 0.9674, + "learning_rate": 3.088710165870963e-06, + "loss": 0.7255, "step": 26488 }, { - "epoch": 0.7516742338251986, + "epoch": 0.7506305080903398, "grad_norm": 0.0, - "learning_rate": 3.0637370510014474e-06, - "loss": 0.6662, + "learning_rate": 3.0880468798269293e-06, + "loss": 0.8099, "step": 26489 }, { - "epoch": 0.7517026106696936, + "epoch": 0.7506588455326022, "grad_norm": 0.0, - "learning_rate": 3.0630750361649974e-06, - "loss": 0.8803, + "learning_rate": 3.087383652004844e-06, + "loss": 0.7915, "step": 26490 }, { - "epoch": 0.7517309875141884, + "epoch": 0.7506871829748647, "grad_norm": 0.0, - "learning_rate": 3.062413079925062e-06, - "loss": 0.7691, + "learning_rate": 3.0867204824102926e-06, + "loss": 0.7259, "step": 26491 }, { - "epoch": 0.7517593643586833, + "epoch": 0.7507155204171272, "grad_norm": 0.0, - "learning_rate": 3.0617511822872337e-06, - "loss": 0.7426, + "learning_rate": 3.0860573710488616e-06, + "loss": 0.9174, "step": 26492 }, { - "epoch": 0.7517877412031783, + "epoch": 0.7507438578593896, "grad_norm": 0.0, - "learning_rate": 3.0610893432571086e-06, - "loss": 0.8405, + "learning_rate": 3.0853943179261405e-06, + "loss": 0.9349, "step": 26493 }, { - "epoch": 0.7518161180476731, + "epoch": 0.7507721953016521, "grad_norm": 0.0, - "learning_rate": 3.0604275628402713e-06, - "loss": 0.885, + "learning_rate": 3.084731323047707e-06, + "loss": 0.9392, "step": 26494 }, { - "epoch": 0.751844494892168, + "epoch": 0.7508005327439146, "grad_norm": 0.0, - "learning_rate": 3.0597658410423135e-06, - "loss": 0.736, + "learning_rate": 3.084068386419149e-06, + "loss": 0.7043, "step": 26495 }, { - "epoch": 0.7518728717366628, + "epoch": 0.7508288701861769, "grad_norm": 0.0, - "learning_rate": 3.059104177868829e-06, - "loss": 0.7454, + "learning_rate": 3.0834055080460558e-06, + "loss": 0.8461, "step": 26496 }, { - "epoch": 0.7519012485811578, + "epoch": 0.7508572076284394, "grad_norm": 0.0, - "learning_rate": 3.058442573325401e-06, - "loss": 0.7538, + "learning_rate": 3.0827426879340017e-06, + "loss": 0.8472, "step": 26497 }, { - "epoch": 0.7519296254256527, + "epoch": 0.7508855450707019, "grad_norm": 0.0, - "learning_rate": 3.05778102741762e-06, - "loss": 0.9208, + "learning_rate": 3.082079926088576e-06, + "loss": 0.8297, "step": 26498 }, { - "epoch": 0.7519580022701475, + "epoch": 0.7509138825129644, "grad_norm": 0.0, - "learning_rate": 3.0571195401510777e-06, - "loss": 0.8629, + "learning_rate": 3.0814172225153626e-06, + "loss": 0.8779, "step": 26499 }, { - "epoch": 0.7519863791146425, + "epoch": 0.7509422199552268, "grad_norm": 0.0, - "learning_rate": 3.056458111531356e-06, - "loss": 0.9111, + "learning_rate": 3.0807545772199377e-06, + "loss": 0.8387, "step": 26500 }, { - "epoch": 0.7520147559591374, + "epoch": 0.7509705573974893, "grad_norm": 0.0, - "learning_rate": 3.0557967415640456e-06, - "loss": 0.7792, + "learning_rate": 3.0800919902078897e-06, + "loss": 0.7773, "step": 26501 }, { - "epoch": 0.7520431328036322, + "epoch": 0.7509988948397518, "grad_norm": 0.0, - "learning_rate": 3.0551354302547343e-06, - "loss": 0.7464, + "learning_rate": 3.079429461484793e-06, + "loss": 0.8048, "step": 26502 }, { - "epoch": 0.7520715096481271, + "epoch": 0.7510272322820142, "grad_norm": 0.0, - "learning_rate": 3.0544741776089993e-06, - "loss": 0.9159, + "learning_rate": 3.0787669910562323e-06, + "loss": 0.7966, "step": 26503 }, { - "epoch": 0.752099886492622, + "epoch": 0.7510555697242767, "grad_norm": 0.0, - "learning_rate": 3.05381298363244e-06, - "loss": 0.8254, + "learning_rate": 3.0781045789277875e-06, + "loss": 0.898, "step": 26504 }, { - "epoch": 0.7521282633371169, + "epoch": 0.7510839071665392, "grad_norm": 0.0, - "learning_rate": 3.053151848330632e-06, - "loss": 0.8672, + "learning_rate": 3.0774422251050386e-06, + "loss": 0.8036, "step": 26505 }, { - "epoch": 0.7521566401816118, + "epoch": 0.7511122446088017, "grad_norm": 0.0, - "learning_rate": 3.0524907717091636e-06, - "loss": 0.781, + "learning_rate": 3.076779929593563e-06, + "loss": 0.8391, "step": 26506 }, { - "epoch": 0.7521850170261067, + "epoch": 0.751140582051064, "grad_norm": 0.0, - "learning_rate": 3.0518297537736197e-06, - "loss": 0.7557, + "learning_rate": 3.0761176923989456e-06, + "loss": 0.7831, "step": 26507 }, { - "epoch": 0.7522133938706016, + "epoch": 0.7511689194933265, "grad_norm": 0.0, - "learning_rate": 3.0511687945295788e-06, - "loss": 0.7734, + "learning_rate": 3.075455513526756e-06, + "loss": 0.8285, "step": 26508 }, { - "epoch": 0.7522417707150965, + "epoch": 0.751197256935589, "grad_norm": 0.0, - "learning_rate": 3.050507893982628e-06, - "loss": 0.7842, + "learning_rate": 3.074793392982579e-06, + "loss": 0.8391, "step": 26509 }, { - "epoch": 0.7522701475595914, + "epoch": 0.7512255943778514, "grad_norm": 0.0, - "learning_rate": 3.0498470521383527e-06, - "loss": 0.8872, + "learning_rate": 3.0741313307719865e-06, + "loss": 0.799, "step": 26510 }, { - "epoch": 0.7522985244040863, + "epoch": 0.7512539318201139, "grad_norm": 0.0, - "learning_rate": 3.049186269002329e-06, - "loss": 0.8406, + "learning_rate": 3.0734693269005567e-06, + "loss": 0.7972, "step": 26511 }, { - "epoch": 0.7523269012485811, + "epoch": 0.7512822692623764, "grad_norm": 0.0, - "learning_rate": 3.0485255445801397e-06, - "loss": 0.7309, + "learning_rate": 3.072807381373871e-06, + "loss": 0.7682, "step": 26512 }, { - "epoch": 0.752355278093076, + "epoch": 0.7513106067046389, "grad_norm": 0.0, - "learning_rate": 3.0478648788773723e-06, - "loss": 0.9653, + "learning_rate": 3.072145494197497e-06, + "loss": 0.8956, "step": 26513 }, { - "epoch": 0.752383654937571, + "epoch": 0.7513389441469013, "grad_norm": 0.0, - "learning_rate": 3.0472042718996e-06, - "loss": 0.8484, + "learning_rate": 3.0714836653770153e-06, + "loss": 0.7857, "step": 26514 }, { - "epoch": 0.7524120317820658, + "epoch": 0.7513672815891638, "grad_norm": 0.0, - "learning_rate": 3.046543723652404e-06, - "loss": 0.7841, + "learning_rate": 3.0708218949180015e-06, + "loss": 0.8489, "step": 26515 }, { - "epoch": 0.7524404086265607, + "epoch": 0.7513956190314263, "grad_norm": 0.0, - "learning_rate": 3.04588323414137e-06, - "loss": 1.0076, + "learning_rate": 3.0701601828260253e-06, + "loss": 0.7859, "step": 26516 }, { - "epoch": 0.7524687854710557, + "epoch": 0.7514239564736886, "grad_norm": 0.0, - "learning_rate": 3.0452228033720697e-06, - "loss": 0.8499, + "learning_rate": 3.0694985291066627e-06, + "loss": 0.9057, "step": 26517 }, { - "epoch": 0.7524971623155505, + "epoch": 0.7514522939159511, "grad_norm": 0.0, - "learning_rate": 3.0445624313500853e-06, - "loss": 0.8122, + "learning_rate": 3.0688369337654876e-06, + "loss": 0.9119, "step": 26518 }, { - "epoch": 0.7525255391600454, + "epoch": 0.7514806313582136, "grad_norm": 0.0, - "learning_rate": 3.0439021180809946e-06, - "loss": 0.8078, + "learning_rate": 3.068175396808074e-06, + "loss": 0.886, "step": 26519 }, { - "epoch": 0.7525539160045402, + "epoch": 0.751508968800476, "grad_norm": 0.0, - "learning_rate": 3.0432418635703753e-06, - "loss": 0.8397, + "learning_rate": 3.067513918239995e-06, + "loss": 0.7161, "step": 26520 }, { - "epoch": 0.7525822928490352, + "epoch": 0.7515373062427385, "grad_norm": 0.0, - "learning_rate": 3.0425816678238073e-06, - "loss": 0.7811, + "learning_rate": 3.066852498066818e-06, + "loss": 0.7945, "step": 26521 }, { - "epoch": 0.7526106696935301, + "epoch": 0.751565643685001, "grad_norm": 0.0, - "learning_rate": 3.041921530846862e-06, - "loss": 0.8433, + "learning_rate": 3.06619113629412e-06, + "loss": 0.7996, "step": 26522 }, { - "epoch": 0.7526390465380249, + "epoch": 0.7515939811272635, "grad_norm": 0.0, - "learning_rate": 3.041261452645119e-06, - "loss": 0.8448, + "learning_rate": 3.0655298329274663e-06, + "loss": 0.8524, "step": 26523 }, { - "epoch": 0.7526674233825199, + "epoch": 0.7516223185695259, "grad_norm": 0.0, - "learning_rate": 3.040601433224155e-06, - "loss": 0.826, + "learning_rate": 3.0648685879724296e-06, + "loss": 0.6957, "step": 26524 }, { - "epoch": 0.7526958002270148, + "epoch": 0.7516506560117884, "grad_norm": 0.0, - "learning_rate": 3.0399414725895417e-06, - "loss": 0.7486, + "learning_rate": 3.0642074014345824e-06, + "loss": 0.8222, "step": 26525 }, { - "epoch": 0.7527241770715096, + "epoch": 0.7516789934540509, "grad_norm": 0.0, - "learning_rate": 3.039281570746855e-06, - "loss": 0.6954, + "learning_rate": 3.0635462733194886e-06, + "loss": 0.8723, "step": 26526 }, { - "epoch": 0.7527525539160045, + "epoch": 0.7517073308963133, "grad_norm": 0.0, - "learning_rate": 3.0386217277016738e-06, - "loss": 0.9082, + "learning_rate": 3.0628852036327215e-06, + "loss": 0.754, "step": 26527 }, { - "epoch": 0.7527809307604995, + "epoch": 0.7517356683385757, "grad_norm": 0.0, - "learning_rate": 3.037961943459563e-06, - "loss": 0.8443, + "learning_rate": 3.0622241923798513e-06, + "loss": 0.7442, "step": 26528 }, { - "epoch": 0.7528093076049943, + "epoch": 0.7517640057808382, "grad_norm": 0.0, - "learning_rate": 3.0373022180261025e-06, - "loss": 0.8507, + "learning_rate": 3.0615632395664395e-06, + "loss": 0.894, "step": 26529 }, { - "epoch": 0.7528376844494892, + "epoch": 0.7517923432231007, "grad_norm": 0.0, - "learning_rate": 3.0366425514068652e-06, - "loss": 0.7796, + "learning_rate": 3.0609023451980568e-06, + "loss": 0.7904, "step": 26530 }, { - "epoch": 0.7528660612939841, + "epoch": 0.7518206806653631, "grad_norm": 0.0, - "learning_rate": 3.0359829436074182e-06, - "loss": 0.9148, + "learning_rate": 3.0602415092802706e-06, + "loss": 0.7705, "step": 26531 }, { - "epoch": 0.752894438138479, + "epoch": 0.7518490181076256, "grad_norm": 0.0, - "learning_rate": 3.035323394633336e-06, - "loss": 0.8362, + "learning_rate": 3.0595807318186476e-06, + "loss": 0.8509, "step": 26532 }, { - "epoch": 0.7529228149829739, + "epoch": 0.7518773555498881, "grad_norm": 0.0, - "learning_rate": 3.034663904490194e-06, - "loss": 0.7689, + "learning_rate": 3.058920012818756e-06, + "loss": 0.904, "step": 26533 }, { - "epoch": 0.7529511918274688, + "epoch": 0.7519056929921505, "grad_norm": 0.0, - "learning_rate": 3.0340044731835526e-06, - "loss": 0.8528, + "learning_rate": 3.058259352286156e-06, + "loss": 0.8163, "step": 26534 }, { - "epoch": 0.7529795686719637, + "epoch": 0.751934030434413, "grad_norm": 0.0, - "learning_rate": 3.033345100718992e-06, - "loss": 0.7626, + "learning_rate": 3.057598750226415e-06, + "loss": 0.872, "step": 26535 }, { - "epoch": 0.7530079455164586, + "epoch": 0.7519623678766755, "grad_norm": 0.0, - "learning_rate": 3.032685787102081e-06, - "loss": 0.7903, + "learning_rate": 3.0569382066451005e-06, + "loss": 0.7794, "step": 26536 }, { - "epoch": 0.7530363223609534, + "epoch": 0.7519907053189379, "grad_norm": 0.0, - "learning_rate": 3.0320265323383834e-06, - "loss": 0.7753, + "learning_rate": 3.05627772154777e-06, + "loss": 0.878, "step": 26537 }, { - "epoch": 0.7530646992054484, + "epoch": 0.7520190427612004, "grad_norm": 0.0, - "learning_rate": 3.0313673364334716e-06, - "loss": 0.8571, + "learning_rate": 3.0556172949399955e-06, + "loss": 0.8539, "step": 26538 }, { - "epoch": 0.7530930760499432, + "epoch": 0.7520473802034628, "grad_norm": 0.0, - "learning_rate": 3.0307081993929155e-06, - "loss": 0.8191, + "learning_rate": 3.0549569268273316e-06, + "loss": 0.7953, "step": 26539 }, { - "epoch": 0.7531214528944381, + "epoch": 0.7520757176457253, "grad_norm": 0.0, - "learning_rate": 3.0300491212222784e-06, - "loss": 0.8346, + "learning_rate": 3.0542966172153433e-06, + "loss": 0.7458, "step": 26540 }, { - "epoch": 0.7531498297389331, + "epoch": 0.7521040550879877, "grad_norm": 0.0, - "learning_rate": 3.029390101927129e-06, - "loss": 0.788, + "learning_rate": 3.053636366109598e-06, + "loss": 0.7804, "step": 26541 }, { - "epoch": 0.7531782065834279, + "epoch": 0.7521323925302502, "grad_norm": 0.0, - "learning_rate": 3.028731141513037e-06, - "loss": 0.8139, + "learning_rate": 3.0529761735156505e-06, + "loss": 0.7982, "step": 26542 }, { - "epoch": 0.7532065834279228, + "epoch": 0.7521607299725127, "grad_norm": 0.0, - "learning_rate": 3.0280722399855644e-06, - "loss": 0.7508, + "learning_rate": 3.052316039439064e-06, + "loss": 0.7638, "step": 26543 }, { - "epoch": 0.7532349602724177, + "epoch": 0.7521890674147751, "grad_norm": 0.0, - "learning_rate": 3.0274133973502797e-06, - "loss": 0.873, + "learning_rate": 3.051655963885398e-06, + "loss": 0.8297, "step": 26544 }, { - "epoch": 0.7532633371169126, + "epoch": 0.7522174048570376, "grad_norm": 0.0, - "learning_rate": 3.026754613612749e-06, - "loss": 0.8699, + "learning_rate": 3.0509959468602157e-06, + "loss": 0.8915, "step": 26545 }, { - "epoch": 0.7532917139614075, + "epoch": 0.7522457422993001, "grad_norm": 0.0, - "learning_rate": 3.026095888778533e-06, - "loss": 0.7159, + "learning_rate": 3.0503359883690776e-06, + "loss": 0.8823, "step": 26546 }, { - "epoch": 0.7533200908059023, + "epoch": 0.7522740797415626, "grad_norm": 0.0, - "learning_rate": 3.0254372228532015e-06, - "loss": 0.8666, + "learning_rate": 3.049676088417537e-06, + "loss": 0.8085, "step": 26547 }, { - "epoch": 0.7533484676503973, + "epoch": 0.752302417183825, "grad_norm": 0.0, - "learning_rate": 3.024778615842313e-06, - "loss": 0.8126, + "learning_rate": 3.049016247011156e-06, + "loss": 0.7729, "step": 26548 }, { - "epoch": 0.7533768444948922, + "epoch": 0.7523307546260875, "grad_norm": 0.0, - "learning_rate": 3.0241200677514292e-06, - "loss": 0.8062, + "learning_rate": 3.0483564641554953e-06, + "loss": 0.8291, "step": 26549 }, { - "epoch": 0.753405221339387, + "epoch": 0.7523590920683499, "grad_norm": 0.0, - "learning_rate": 3.023461578586123e-06, - "loss": 0.8528, + "learning_rate": 3.0476967398561053e-06, + "loss": 0.7908, "step": 26550 }, { - "epoch": 0.753433598183882, + "epoch": 0.7523874295106123, "grad_norm": 0.0, - "learning_rate": 3.022803148351948e-06, - "loss": 0.7773, + "learning_rate": 3.047037074118552e-06, + "loss": 0.9335, "step": 26551 }, { - "epoch": 0.7534619750283769, + "epoch": 0.7524157669528748, "grad_norm": 0.0, - "learning_rate": 3.0221447770544676e-06, - "loss": 0.7812, + "learning_rate": 3.0463774669483838e-06, + "loss": 0.8369, "step": 26552 }, { - "epoch": 0.7534903518728717, + "epoch": 0.7524441043951373, "grad_norm": 0.0, - "learning_rate": 3.021486464699248e-06, - "loss": 0.9339, + "learning_rate": 3.04571791835116e-06, + "loss": 0.8371, "step": 26553 }, { - "epoch": 0.7535187287173666, + "epoch": 0.7524724418373998, "grad_norm": 0.0, - "learning_rate": 3.0208282112918417e-06, - "loss": 0.8461, + "learning_rate": 3.0450584283324404e-06, + "loss": 0.797, "step": 26554 }, { - "epoch": 0.7535471055618616, + "epoch": 0.7525007792796622, "grad_norm": 0.0, - "learning_rate": 3.0201700168378147e-06, - "loss": 0.7745, + "learning_rate": 3.0443989968977717e-06, + "loss": 0.8178, "step": 26555 }, { - "epoch": 0.7535754824063564, + "epoch": 0.7525291167219247, "grad_norm": 0.0, - "learning_rate": 3.019511881342727e-06, - "loss": 0.9434, + "learning_rate": 3.043739624052715e-06, + "loss": 0.7963, "step": 26556 }, { - "epoch": 0.7536038592508513, + "epoch": 0.7525574541641872, "grad_norm": 0.0, - "learning_rate": 3.018853804812134e-06, - "loss": 0.8444, + "learning_rate": 3.043080309802822e-06, + "loss": 0.7672, "step": 26557 }, { - "epoch": 0.7536322360953462, + "epoch": 0.7525857916064496, "grad_norm": 0.0, - "learning_rate": 3.0181957872515966e-06, - "loss": 0.7816, + "learning_rate": 3.0424210541536456e-06, + "loss": 0.8459, "step": 26558 }, { - "epoch": 0.7536606129398411, + "epoch": 0.752614129048712, "grad_norm": 0.0, - "learning_rate": 3.017537828666676e-06, - "loss": 0.7679, + "learning_rate": 3.0417618571107443e-06, + "loss": 0.8353, "step": 26559 }, { - "epoch": 0.753688989784336, + "epoch": 0.7526424664909745, "grad_norm": 0.0, - "learning_rate": 3.0168799290629246e-06, - "loss": 0.7303, + "learning_rate": 3.041102718679664e-06, + "loss": 0.8531, "step": 26560 }, { - "epoch": 0.7537173666288308, + "epoch": 0.7526708039332369, "grad_norm": 0.0, - "learning_rate": 3.0162220884459015e-06, - "loss": 0.7743, + "learning_rate": 3.0404436388659597e-06, + "loss": 0.7437, "step": 26561 }, { - "epoch": 0.7537457434733258, + "epoch": 0.7526991413754994, "grad_norm": 0.0, - "learning_rate": 3.0155643068211683e-06, - "loss": 0.8517, + "learning_rate": 3.0397846176751853e-06, + "loss": 0.7764, "step": 26562 }, { - "epoch": 0.7537741203178207, + "epoch": 0.7527274788177619, "grad_norm": 0.0, - "learning_rate": 3.0149065841942737e-06, - "loss": 0.729, + "learning_rate": 3.0391256551128877e-06, + "loss": 0.9196, "step": 26563 }, { - "epoch": 0.7538024971623155, + "epoch": 0.7527558162600244, "grad_norm": 0.0, - "learning_rate": 3.014248920570778e-06, - "loss": 0.8714, + "learning_rate": 3.038466751184619e-06, + "loss": 0.7389, "step": 26564 }, { - "epoch": 0.7538308740068105, + "epoch": 0.7527841537022868, "grad_norm": 0.0, - "learning_rate": 3.0135913159562347e-06, - "loss": 0.9473, + "learning_rate": 3.037807905895933e-06, + "loss": 0.8777, "step": 26565 }, { - "epoch": 0.7538592508513053, + "epoch": 0.7528124911445493, "grad_norm": 0.0, - "learning_rate": 3.0129337703562e-06, - "loss": 0.8128, + "learning_rate": 3.037149119252374e-06, + "loss": 0.7788, "step": 26566 }, { - "epoch": 0.7538876276958002, + "epoch": 0.7528408285868118, "grad_norm": 0.0, - "learning_rate": 3.012276283776231e-06, - "loss": 0.7259, + "learning_rate": 3.0364903912594958e-06, + "loss": 0.8211, "step": 26567 }, { - "epoch": 0.7539160045402952, + "epoch": 0.7528691660290742, "grad_norm": 0.0, - "learning_rate": 3.0116188562218764e-06, - "loss": 0.8404, + "learning_rate": 3.0358317219228415e-06, + "loss": 0.8508, "step": 26568 }, { - "epoch": 0.75394438138479, + "epoch": 0.7528975034713367, "grad_norm": 0.0, - "learning_rate": 3.0109614876986893e-06, - "loss": 0.8521, + "learning_rate": 3.035173111247963e-06, + "loss": 0.8065, "step": 26569 }, { - "epoch": 0.7539727582292849, + "epoch": 0.7529258409135992, "grad_norm": 0.0, - "learning_rate": 3.0103041782122287e-06, - "loss": 0.8064, + "learning_rate": 3.0345145592404077e-06, + "loss": 0.7396, "step": 26570 }, { - "epoch": 0.7540011350737797, + "epoch": 0.7529541783558616, "grad_norm": 0.0, - "learning_rate": 3.009646927768041e-06, - "loss": 0.8928, + "learning_rate": 3.0338560659057226e-06, + "loss": 0.9401, "step": 26571 }, { - "epoch": 0.7540295119182747, + "epoch": 0.752982515798124, "grad_norm": 0.0, - "learning_rate": 3.008989736371678e-06, - "loss": 0.8283, + "learning_rate": 3.033197631249456e-06, + "loss": 0.8646, "step": 26572 }, { - "epoch": 0.7540578887627696, + "epoch": 0.7530108532403865, "grad_norm": 0.0, - "learning_rate": 3.0083326040286977e-06, - "loss": 0.8679, + "learning_rate": 3.032539255277155e-06, + "loss": 0.8186, "step": 26573 }, { - "epoch": 0.7540862656072644, + "epoch": 0.753039190682649, "grad_norm": 0.0, - "learning_rate": 3.0076755307446427e-06, - "loss": 0.8361, + "learning_rate": 3.0318809379943594e-06, + "loss": 0.8181, "step": 26574 }, { - "epoch": 0.7541146424517594, + "epoch": 0.7530675281249114, "grad_norm": 0.0, - "learning_rate": 3.0070185165250664e-06, - "loss": 0.8173, + "learning_rate": 3.0312226794066225e-06, + "loss": 0.8981, "step": 26575 }, { - "epoch": 0.7541430192962543, + "epoch": 0.7530958655671739, "grad_norm": 0.0, - "learning_rate": 3.0063615613755214e-06, - "loss": 0.7095, + "learning_rate": 3.030564479519481e-06, + "loss": 0.8619, "step": 26576 }, { - "epoch": 0.7541713961407491, + "epoch": 0.7531242030094364, "grad_norm": 0.0, - "learning_rate": 3.0057046653015532e-06, - "loss": 0.7834, + "learning_rate": 3.029906338338483e-06, + "loss": 0.8473, "step": 26577 }, { - "epoch": 0.754199772985244, + "epoch": 0.7531525404516989, "grad_norm": 0.0, - "learning_rate": 3.0050478283087116e-06, - "loss": 0.7218, + "learning_rate": 3.0292482558691748e-06, + "loss": 0.838, "step": 26578 }, { - "epoch": 0.754228149829739, + "epoch": 0.7531808778939613, "grad_norm": 0.0, - "learning_rate": 3.0043910504025477e-06, - "loss": 0.77, + "learning_rate": 3.0285902321170945e-06, + "loss": 0.8042, "step": 26579 }, { - "epoch": 0.7542565266742338, + "epoch": 0.7532092153362238, "grad_norm": 0.0, - "learning_rate": 3.0037343315886013e-06, - "loss": 0.7565, + "learning_rate": 3.027932267087791e-06, + "loss": 0.703, "step": 26580 }, { - "epoch": 0.7542849035187287, + "epoch": 0.7532375527784863, "grad_norm": 0.0, - "learning_rate": 3.0030776718724318e-06, - "loss": 0.849, + "learning_rate": 3.0272743607867995e-06, + "loss": 0.8057, "step": 26581 }, { - "epoch": 0.7543132803632236, + "epoch": 0.7532658902207486, "grad_norm": 0.0, - "learning_rate": 3.0024210712595768e-06, - "loss": 0.8066, + "learning_rate": 3.0266165132196645e-06, + "loss": 0.7469, "step": 26582 }, { - "epoch": 0.7543416572077185, + "epoch": 0.7532942276630111, "grad_norm": 0.0, - "learning_rate": 3.0017645297555853e-06, - "loss": 0.8589, + "learning_rate": 3.0259587243919286e-06, + "loss": 0.8448, "step": 26583 }, { - "epoch": 0.7543700340522134, + "epoch": 0.7533225651052736, "grad_norm": 0.0, - "learning_rate": 3.001108047366007e-06, - "loss": 0.8712, + "learning_rate": 3.025300994309133e-06, + "loss": 0.8326, "step": 26584 }, { - "epoch": 0.7543984108967083, + "epoch": 0.753350902547536, "grad_norm": 0.0, - "learning_rate": 3.00045162409638e-06, - "loss": 0.8352, + "learning_rate": 3.024643322976816e-06, + "loss": 0.8443, "step": 26585 }, { - "epoch": 0.7544267877412032, + "epoch": 0.7533792399897985, "grad_norm": 0.0, - "learning_rate": 2.999795259952253e-06, - "loss": 0.937, + "learning_rate": 3.0239857104005223e-06, + "loss": 0.8158, "step": 26586 }, { - "epoch": 0.7544551645856981, + "epoch": 0.753407577432061, "grad_norm": 0.0, - "learning_rate": 2.9991389549391735e-06, - "loss": 0.8007, + "learning_rate": 3.0233281565857843e-06, + "loss": 0.7993, "step": 26587 }, { - "epoch": 0.7544835414301929, + "epoch": 0.7534359148743235, "grad_norm": 0.0, - "learning_rate": 2.9984827090626788e-06, - "loss": 0.878, + "learning_rate": 3.022670661538147e-06, + "loss": 0.6771, "step": 26588 }, { - "epoch": 0.7545119182746879, + "epoch": 0.7534642523165859, "grad_norm": 0.0, - "learning_rate": 2.9978265223283152e-06, - "loss": 0.8002, + "learning_rate": 3.022013225263142e-06, + "loss": 0.7406, "step": 26589 }, { - "epoch": 0.7545402951191827, + "epoch": 0.7534925897588484, "grad_norm": 0.0, - "learning_rate": 2.9971703947416284e-06, - "loss": 0.6408, + "learning_rate": 3.021355847766312e-06, + "loss": 0.8322, "step": 26590 }, { - "epoch": 0.7545686719636776, + "epoch": 0.7535209272011109, "grad_norm": 0.0, - "learning_rate": 2.9965143263081554e-06, - "loss": 0.8523, + "learning_rate": 3.0206985290531956e-06, + "loss": 0.8094, "step": 26591 }, { - "epoch": 0.7545970488081726, + "epoch": 0.7535492646433732, "grad_norm": 0.0, - "learning_rate": 2.995858317033441e-06, - "loss": 0.8467, + "learning_rate": 3.0200412691293237e-06, + "loss": 0.8351, "step": 26592 }, { - "epoch": 0.7546254256526674, + "epoch": 0.7535776020856357, "grad_norm": 0.0, - "learning_rate": 2.995202366923029e-06, - "loss": 0.8453, + "learning_rate": 3.0193840680002364e-06, + "loss": 0.8559, "step": 26593 }, { - "epoch": 0.7546538024971623, + "epoch": 0.7536059395278982, "grad_norm": 0.0, - "learning_rate": 2.994546475982455e-06, - "loss": 0.7991, + "learning_rate": 3.0187269256714724e-06, + "loss": 0.8818, "step": 26594 }, { - "epoch": 0.7546821793416572, + "epoch": 0.7536342769701607, "grad_norm": 0.0, - "learning_rate": 2.993890644217261e-06, - "loss": 0.7405, + "learning_rate": 3.018069842148561e-06, + "loss": 0.7343, "step": 26595 }, { - "epoch": 0.7547105561861521, + "epoch": 0.7536626144124231, "grad_norm": 0.0, - "learning_rate": 2.9932348716329873e-06, - "loss": 0.9657, + "learning_rate": 3.0174128174370397e-06, + "loss": 0.7916, "step": 26596 }, { - "epoch": 0.754738933030647, + "epoch": 0.7536909518546856, "grad_norm": 0.0, - "learning_rate": 2.992579158235175e-06, - "loss": 0.8401, + "learning_rate": 3.0167558515424434e-06, + "loss": 0.828, "step": 26597 }, { - "epoch": 0.7547673098751418, + "epoch": 0.7537192892969481, "grad_norm": 0.0, - "learning_rate": 2.991923504029364e-06, - "loss": 0.7579, + "learning_rate": 3.016098944470306e-06, + "loss": 0.7905, "step": 26598 }, { - "epoch": 0.7547956867196368, + "epoch": 0.7537476267392105, "grad_norm": 0.0, - "learning_rate": 2.9912679090210873e-06, - "loss": 0.9191, + "learning_rate": 3.015442096226163e-06, + "loss": 0.8137, "step": 26599 }, { - "epoch": 0.7548240635641317, + "epoch": 0.753775964181473, "grad_norm": 0.0, - "learning_rate": 2.9906123732158842e-06, - "loss": 0.8368, + "learning_rate": 3.014785306815542e-06, + "loss": 0.8697, "step": 26600 }, { - "epoch": 0.7548524404086265, + "epoch": 0.7538043016237355, "grad_norm": 0.0, - "learning_rate": 2.989956896619298e-06, - "loss": 0.9034, + "learning_rate": 3.0141285762439785e-06, + "loss": 0.7065, "step": 26601 }, { - "epoch": 0.7548808172531215, + "epoch": 0.753832639065998, "grad_norm": 0.0, - "learning_rate": 2.989301479236858e-06, - "loss": 0.6929, + "learning_rate": 3.013471904517007e-06, + "loss": 0.7987, "step": 26602 }, { - "epoch": 0.7549091940976164, + "epoch": 0.7538609765082603, "grad_norm": 0.0, - "learning_rate": 2.9886461210741035e-06, - "loss": 0.8142, + "learning_rate": 3.012815291640153e-06, + "loss": 0.807, "step": 26603 }, { - "epoch": 0.7549375709421112, + "epoch": 0.7538893139505228, "grad_norm": 0.0, - "learning_rate": 2.9879908221365726e-06, - "loss": 0.8134, + "learning_rate": 3.0121587376189544e-06, + "loss": 0.9132, "step": 26604 }, { - "epoch": 0.7549659477866061, + "epoch": 0.7539176513927853, "grad_norm": 0.0, - "learning_rate": 2.9873355824297967e-06, - "loss": 0.8935, + "learning_rate": 3.0115022424589336e-06, + "loss": 0.8351, "step": 26605 }, { - "epoch": 0.754994324631101, + "epoch": 0.7539459888350477, "grad_norm": 0.0, - "learning_rate": 2.986680401959311e-06, - "loss": 0.8355, + "learning_rate": 3.010845806165624e-06, + "loss": 1.0003, "step": 26606 }, { - "epoch": 0.7550227014755959, + "epoch": 0.7539743262773102, "grad_norm": 0.0, - "learning_rate": 2.9860252807306554e-06, - "loss": 0.8933, + "learning_rate": 3.01018942874456e-06, + "loss": 0.7714, "step": 26607 }, { - "epoch": 0.7550510783200908, + "epoch": 0.7540026637195727, "grad_norm": 0.0, - "learning_rate": 2.985370218749356e-06, - "loss": 0.7917, + "learning_rate": 3.0095331102012616e-06, + "loss": 0.8602, "step": 26608 }, { - "epoch": 0.7550794551645857, + "epoch": 0.7540310011618351, "grad_norm": 0.0, - "learning_rate": 2.9847152160209494e-06, - "loss": 0.749, + "learning_rate": 3.0088768505412623e-06, + "loss": 0.7573, "step": 26609 }, { - "epoch": 0.7551078320090806, + "epoch": 0.7540593386040976, "grad_norm": 0.0, - "learning_rate": 2.9840602725509727e-06, - "loss": 0.8614, + "learning_rate": 3.0082206497700894e-06, + "loss": 0.8071, "step": 26610 }, { - "epoch": 0.7551362088535755, + "epoch": 0.7540876760463601, "grad_norm": 0.0, - "learning_rate": 2.9834053883449467e-06, - "loss": 0.8248, + "learning_rate": 3.0075645078932703e-06, + "loss": 0.8413, "step": 26611 }, { - "epoch": 0.7551645856980703, + "epoch": 0.7541160134886226, "grad_norm": 0.0, - "learning_rate": 2.9827505634084185e-06, - "loss": 0.7973, + "learning_rate": 3.006908424916335e-06, + "loss": 0.7877, "step": 26612 }, { - "epoch": 0.7551929625425653, + "epoch": 0.7541443509308849, "grad_norm": 0.0, - "learning_rate": 2.982095797746908e-06, - "loss": 0.8084, + "learning_rate": 3.0062524008448036e-06, + "loss": 0.8512, "step": 26613 }, { - "epoch": 0.7552213393870602, + "epoch": 0.7541726883731474, "grad_norm": 0.0, - "learning_rate": 2.9814410913659496e-06, - "loss": 0.7967, + "learning_rate": 3.005596435684206e-06, + "loss": 0.8511, "step": 26614 }, { - "epoch": 0.755249716231555, + "epoch": 0.7542010258154099, "grad_norm": 0.0, - "learning_rate": 2.9807864442710767e-06, - "loss": 0.7759, + "learning_rate": 3.004940529440069e-06, + "loss": 0.7877, "step": 26615 }, { - "epoch": 0.75527809307605, + "epoch": 0.7542293632576723, "grad_norm": 0.0, - "learning_rate": 2.9801318564678138e-06, - "loss": 0.8961, + "learning_rate": 3.004284682117913e-06, + "loss": 0.8972, "step": 26616 }, { - "epoch": 0.7553064699205448, + "epoch": 0.7542577006999348, "grad_norm": 0.0, - "learning_rate": 2.9794773279616916e-06, - "loss": 0.7972, + "learning_rate": 3.0036288937232683e-06, + "loss": 0.7795, "step": 26617 }, { - "epoch": 0.7553348467650397, + "epoch": 0.7542860381421973, "grad_norm": 0.0, - "learning_rate": 2.9788228587582447e-06, - "loss": 0.7897, + "learning_rate": 3.0029731642616522e-06, + "loss": 0.837, "step": 26618 }, { - "epoch": 0.7553632236095347, + "epoch": 0.7543143755844598, "grad_norm": 0.0, - "learning_rate": 2.9781684488629923e-06, - "loss": 0.8122, + "learning_rate": 3.00231749373859e-06, + "loss": 0.8401, "step": 26619 }, { - "epoch": 0.7553916004540295, + "epoch": 0.7543427130267222, "grad_norm": 0.0, - "learning_rate": 2.9775140982814678e-06, - "loss": 0.8501, + "learning_rate": 3.0016618821596077e-06, + "loss": 0.8155, "step": 26620 }, { - "epoch": 0.7554199772985244, + "epoch": 0.7543710504689847, "grad_norm": 0.0, - "learning_rate": 2.976859807019199e-06, - "loss": 0.7623, + "learning_rate": 3.0010063295302262e-06, + "loss": 0.7862, "step": 26621 }, { - "epoch": 0.7554483541430193, + "epoch": 0.7543993879112472, "grad_norm": 0.0, - "learning_rate": 2.9762055750817086e-06, - "loss": 0.8221, + "learning_rate": 3.0003508358559697e-06, + "loss": 0.8157, "step": 26622 }, { - "epoch": 0.7554767309875142, + "epoch": 0.7544277253535095, "grad_norm": 0.0, - "learning_rate": 2.9755514024745257e-06, - "loss": 0.798, + "learning_rate": 2.999695401142354e-06, + "loss": 0.7802, "step": 26623 }, { - "epoch": 0.7555051078320091, + "epoch": 0.754456062795772, "grad_norm": 0.0, - "learning_rate": 2.9748972892031802e-06, - "loss": 0.8699, + "learning_rate": 2.999040025394905e-06, + "loss": 0.7561, "step": 26624 }, { - "epoch": 0.7555334846765039, + "epoch": 0.7544844002380345, "grad_norm": 0.0, - "learning_rate": 2.9742432352731885e-06, - "loss": 0.837, + "learning_rate": 2.998384708619143e-06, + "loss": 0.8274, "step": 26625 }, { - "epoch": 0.7555618615209989, + "epoch": 0.754512737680297, "grad_norm": 0.0, - "learning_rate": 2.9735892406900766e-06, - "loss": 0.7124, + "learning_rate": 2.9977294508205844e-06, + "loss": 0.8453, "step": 26626 }, { - "epoch": 0.7555902383654938, + "epoch": 0.7545410751225594, "grad_norm": 0.0, - "learning_rate": 2.972935305459379e-06, - "loss": 0.8844, + "learning_rate": 2.9970742520047504e-06, + "loss": 0.8264, "step": 26627 }, { - "epoch": 0.7556186152099886, + "epoch": 0.7545694125648219, "grad_norm": 0.0, - "learning_rate": 2.97228142958661e-06, - "loss": 0.9391, + "learning_rate": 2.9964191121771634e-06, + "loss": 0.8526, "step": 26628 }, { - "epoch": 0.7556469920544835, + "epoch": 0.7545977500070844, "grad_norm": 0.0, - "learning_rate": 2.9716276130772983e-06, - "loss": 0.8662, + "learning_rate": 2.9957640313433366e-06, + "loss": 0.7924, "step": 26629 }, { - "epoch": 0.7556753688989785, + "epoch": 0.7546260874493468, "grad_norm": 0.0, - "learning_rate": 2.9709738559369617e-06, - "loss": 0.8082, + "learning_rate": 2.9951090095087897e-06, + "loss": 0.8304, "step": 26630 }, { - "epoch": 0.7557037457434733, + "epoch": 0.7546544248916093, "grad_norm": 0.0, - "learning_rate": 2.9703201581711238e-06, - "loss": 0.8642, + "learning_rate": 2.994454046679045e-06, + "loss": 0.8203, "step": 26631 }, { - "epoch": 0.7557321225879682, + "epoch": 0.7546827623338718, "grad_norm": 0.0, - "learning_rate": 2.9696665197853113e-06, - "loss": 0.8413, + "learning_rate": 2.9937991428596115e-06, + "loss": 0.9019, "step": 26632 }, { - "epoch": 0.7557604994324632, + "epoch": 0.7547110997761342, "grad_norm": 0.0, - "learning_rate": 2.969012940785039e-06, - "loss": 0.7969, + "learning_rate": 2.993144298056009e-06, + "loss": 0.8249, "step": 26633 }, { - "epoch": 0.755788876276958, + "epoch": 0.7547394372183966, "grad_norm": 0.0, - "learning_rate": 2.9683594211758304e-06, - "loss": 0.7462, + "learning_rate": 2.992489512273754e-06, + "loss": 0.823, "step": 26634 }, { - "epoch": 0.7558172531214529, + "epoch": 0.7547677746606591, "grad_norm": 0.0, - "learning_rate": 2.9677059609632086e-06, - "loss": 0.7733, + "learning_rate": 2.9918347855183627e-06, + "loss": 0.7756, "step": 26635 }, { - "epoch": 0.7558456299659478, + "epoch": 0.7547961121029216, "grad_norm": 0.0, - "learning_rate": 2.967052560152687e-06, - "loss": 0.785, + "learning_rate": 2.9911801177953513e-06, + "loss": 0.7382, "step": 26636 }, { - "epoch": 0.7558740068104427, + "epoch": 0.754824449545184, "grad_norm": 0.0, - "learning_rate": 2.9663992187497893e-06, - "loss": 0.7594, + "learning_rate": 2.9905255091102313e-06, + "loss": 0.9416, "step": 26637 }, { - "epoch": 0.7559023836549376, + "epoch": 0.7548527869874465, "grad_norm": 0.0, - "learning_rate": 2.965745936760037e-06, - "loss": 0.7417, + "learning_rate": 2.9898709594685195e-06, + "loss": 0.7806, "step": 26638 }, { - "epoch": 0.7559307604994324, + "epoch": 0.754881124429709, "grad_norm": 0.0, - "learning_rate": 2.9650927141889407e-06, - "loss": 0.7577, + "learning_rate": 2.989216468875725e-06, + "loss": 0.8174, "step": 26639 }, { - "epoch": 0.7559591373439274, + "epoch": 0.7549094618719714, "grad_norm": 0.0, - "learning_rate": 2.9644395510420234e-06, - "loss": 0.8172, + "learning_rate": 2.988562037337364e-06, + "loss": 0.8219, "step": 26640 }, { - "epoch": 0.7559875141884222, + "epoch": 0.7549377993142339, "grad_norm": 0.0, - "learning_rate": 2.963786447324801e-06, - "loss": 0.7169, + "learning_rate": 2.987907664858951e-06, + "loss": 0.8518, "step": 26641 }, { - "epoch": 0.7560158910329171, + "epoch": 0.7549661367564964, "grad_norm": 0.0, - "learning_rate": 2.9631334030427915e-06, - "loss": 0.77, + "learning_rate": 2.987253351445992e-06, + "loss": 0.8537, "step": 26642 }, { - "epoch": 0.7560442678774121, + "epoch": 0.7549944741987589, "grad_norm": 0.0, - "learning_rate": 2.96248041820151e-06, - "loss": 0.8299, + "learning_rate": 2.9865990971040037e-06, + "loss": 0.8892, "step": 26643 }, { - "epoch": 0.7560726447219069, + "epoch": 0.7550228116410213, "grad_norm": 0.0, - "learning_rate": 2.961827492806476e-06, - "loss": 0.7785, + "learning_rate": 2.985944901838498e-06, + "loss": 0.8953, "step": 26644 }, { - "epoch": 0.7561010215664018, + "epoch": 0.7550511490832837, "grad_norm": 0.0, - "learning_rate": 2.961174626863198e-06, - "loss": 0.7254, + "learning_rate": 2.98529076565498e-06, + "loss": 0.7489, "step": 26645 }, { - "epoch": 0.7561293984108967, + "epoch": 0.7550794865255462, "grad_norm": 0.0, - "learning_rate": 2.9605218203771946e-06, - "loss": 0.8017, + "learning_rate": 2.984636688558963e-06, + "loss": 0.8536, "step": 26646 }, { - "epoch": 0.7561577752553916, + "epoch": 0.7551078239678086, "grad_norm": 0.0, - "learning_rate": 2.9598690733539837e-06, - "loss": 0.8756, + "learning_rate": 2.983982670555955e-06, + "loss": 0.8599, "step": 26647 }, { - "epoch": 0.7561861520998865, + "epoch": 0.7551361614100711, "grad_norm": 0.0, - "learning_rate": 2.9592163857990706e-06, - "loss": 0.7329, + "learning_rate": 2.9833287116514674e-06, + "loss": 0.7887, "step": 26648 }, { - "epoch": 0.7562145289443813, + "epoch": 0.7551644988523336, "grad_norm": 0.0, - "learning_rate": 2.958563757717975e-06, - "loss": 0.774, + "learning_rate": 2.9826748118510107e-06, + "loss": 0.9228, "step": 26649 }, { - "epoch": 0.7562429057888763, + "epoch": 0.7551928362945961, "grad_norm": 0.0, - "learning_rate": 2.9579111891162094e-06, - "loss": 0.8613, + "learning_rate": 2.9820209711600858e-06, + "loss": 0.9024, "step": 26650 }, { - "epoch": 0.7562712826333712, + "epoch": 0.7552211737368585, "grad_norm": 0.0, - "learning_rate": 2.957258679999282e-06, - "loss": 0.9105, + "learning_rate": 2.9813671895842057e-06, + "loss": 0.8083, "step": 26651 }, { - "epoch": 0.756299659477866, + "epoch": 0.755249511179121, "grad_norm": 0.0, - "learning_rate": 2.9566062303727073e-06, - "loss": 0.8603, + "learning_rate": 2.9807134671288785e-06, + "loss": 0.7518, "step": 26652 }, { - "epoch": 0.756328036322361, + "epoch": 0.7552778486213835, "grad_norm": 0.0, - "learning_rate": 2.9559538402419994e-06, - "loss": 0.8164, + "learning_rate": 2.9800598037996055e-06, + "loss": 0.7099, "step": 26653 }, { - "epoch": 0.7563564131668559, + "epoch": 0.7553061860636459, "grad_norm": 0.0, - "learning_rate": 2.9553015096126638e-06, - "loss": 0.7772, + "learning_rate": 2.9794061996018973e-06, + "loss": 0.8311, "step": 26654 }, { - "epoch": 0.7563847900113507, + "epoch": 0.7553345235059084, "grad_norm": 0.0, - "learning_rate": 2.954649238490215e-06, - "loss": 0.8115, + "learning_rate": 2.978752654541256e-06, + "loss": 0.9308, "step": 26655 }, { - "epoch": 0.7564131668558456, + "epoch": 0.7553628609481708, "grad_norm": 0.0, - "learning_rate": 2.9539970268801575e-06, - "loss": 0.8378, + "learning_rate": 2.9780991686231887e-06, + "loss": 0.8622, "step": 26656 }, { - "epoch": 0.7564415437003406, + "epoch": 0.7553911983904332, "grad_norm": 0.0, - "learning_rate": 2.953344874788001e-06, - "loss": 0.7207, + "learning_rate": 2.977445741853202e-06, + "loss": 0.7979, "step": 26657 }, { - "epoch": 0.7564699205448354, + "epoch": 0.7554195358326957, "grad_norm": 0.0, - "learning_rate": 2.952692782219264e-06, - "loss": 0.8442, + "learning_rate": 2.9767923742367945e-06, + "loss": 0.9454, "step": 26658 }, { - "epoch": 0.7564982973893303, + "epoch": 0.7554478732749582, "grad_norm": 0.0, - "learning_rate": 2.9520407491794445e-06, - "loss": 0.9066, + "learning_rate": 2.976139065779473e-06, + "loss": 0.8064, "step": 26659 }, { - "epoch": 0.7565266742338252, + "epoch": 0.7554762107172207, "grad_norm": 0.0, - "learning_rate": 2.9513887756740534e-06, - "loss": 0.8249, + "learning_rate": 2.9754858164867394e-06, + "loss": 0.8964, "step": 26660 }, { - "epoch": 0.7565550510783201, + "epoch": 0.7555045481594831, "grad_norm": 0.0, - "learning_rate": 2.950736861708601e-06, - "loss": 0.8587, + "learning_rate": 2.974832626364099e-06, + "loss": 0.9595, "step": 26661 }, { - "epoch": 0.756583427922815, + "epoch": 0.7555328856017456, "grad_norm": 0.0, - "learning_rate": 2.950085007288589e-06, - "loss": 0.7887, + "learning_rate": 2.9741794954170546e-06, + "loss": 0.7353, "step": 26662 }, { - "epoch": 0.7566118047673098, + "epoch": 0.7555612230440081, "grad_norm": 0.0, - "learning_rate": 2.9494332124195247e-06, - "loss": 0.8272, + "learning_rate": 2.9735264236511018e-06, + "loss": 0.8919, "step": 26663 }, { - "epoch": 0.7566401816118048, + "epoch": 0.7555895604862705, "grad_norm": 0.0, - "learning_rate": 2.948781477106919e-06, - "loss": 0.8199, + "learning_rate": 2.972873411071745e-06, + "loss": 0.8719, "step": 26664 }, { - "epoch": 0.7566685584562997, + "epoch": 0.755617897928533, "grad_norm": 0.0, - "learning_rate": 2.94812980135627e-06, - "loss": 0.8889, + "learning_rate": 2.9722204576844883e-06, + "loss": 0.7792, "step": 26665 }, { - "epoch": 0.7566969353007945, + "epoch": 0.7556462353707954, "grad_norm": 0.0, - "learning_rate": 2.947478185173085e-06, - "loss": 0.8465, + "learning_rate": 2.971567563494825e-06, + "loss": 0.8621, "step": 26666 }, { - "epoch": 0.7567253121452895, + "epoch": 0.7556745728130579, "grad_norm": 0.0, - "learning_rate": 2.946826628562872e-06, - "loss": 0.7678, + "learning_rate": 2.970914728508262e-06, + "loss": 0.833, "step": 26667 }, { - "epoch": 0.7567536889897843, + "epoch": 0.7557029102553203, "grad_norm": 0.0, - "learning_rate": 2.946175131531128e-06, - "loss": 0.8724, + "learning_rate": 2.9702619527302913e-06, + "loss": 0.8218, "step": 26668 }, { - "epoch": 0.7567820658342792, + "epoch": 0.7557312476975828, "grad_norm": 0.0, - "learning_rate": 2.945523694083361e-06, - "loss": 0.9465, + "learning_rate": 2.969609236166413e-06, + "loss": 0.7901, "step": 26669 }, { - "epoch": 0.7568104426787742, + "epoch": 0.7557595851398453, "grad_norm": 0.0, - "learning_rate": 2.9448723162250748e-06, - "loss": 0.7409, + "learning_rate": 2.968956578822132e-06, + "loss": 0.7478, "step": 26670 }, { - "epoch": 0.756838819523269, + "epoch": 0.7557879225821077, "grad_norm": 0.0, - "learning_rate": 2.9442209979617665e-06, - "loss": 0.8098, + "learning_rate": 2.9683039807029358e-06, + "loss": 0.9256, "step": 26671 }, { - "epoch": 0.7568671963677639, + "epoch": 0.7558162600243702, "grad_norm": 0.0, - "learning_rate": 2.9435697392989405e-06, - "loss": 0.8268, + "learning_rate": 2.9676514418143276e-06, + "loss": 0.7889, "step": 26672 }, { - "epoch": 0.7568955732122588, + "epoch": 0.7558445974666327, "grad_norm": 0.0, - "learning_rate": 2.942918540242098e-06, - "loss": 0.8738, + "learning_rate": 2.9669989621618023e-06, + "loss": 0.7791, "step": 26673 }, { - "epoch": 0.7569239500567537, + "epoch": 0.7558729349088952, "grad_norm": 0.0, - "learning_rate": 2.94226740079674e-06, - "loss": 0.7948, + "learning_rate": 2.9663465417508554e-06, + "loss": 0.8541, "step": 26674 }, { - "epoch": 0.7569523269012486, + "epoch": 0.7559012723511576, "grad_norm": 0.0, - "learning_rate": 2.9416163209683702e-06, - "loss": 0.8234, + "learning_rate": 2.965694180586988e-06, + "loss": 0.8324, "step": 26675 }, { - "epoch": 0.7569807037457434, + "epoch": 0.75592960979342, "grad_norm": 0.0, - "learning_rate": 2.9409653007624806e-06, - "loss": 0.8896, + "learning_rate": 2.9650418786756863e-06, + "loss": 0.8727, "step": 26676 }, { - "epoch": 0.7570090805902384, + "epoch": 0.7559579472356825, "grad_norm": 0.0, - "learning_rate": 2.940314340184575e-06, - "loss": 0.8551, + "learning_rate": 2.96438963602245e-06, + "loss": 0.825, "step": 26677 }, { - "epoch": 0.7570374574347333, + "epoch": 0.7559862846779449, "grad_norm": 0.0, - "learning_rate": 2.9396634392401534e-06, - "loss": 0.7886, + "learning_rate": 2.9637374526327755e-06, + "loss": 0.8373, "step": 26678 }, { - "epoch": 0.7570658342792281, + "epoch": 0.7560146221202074, "grad_norm": 0.0, - "learning_rate": 2.9390125979347106e-06, - "loss": 0.7983, + "learning_rate": 2.9630853285121506e-06, + "loss": 0.7147, "step": 26679 }, { - "epoch": 0.757094211123723, + "epoch": 0.7560429595624699, "grad_norm": 0.0, - "learning_rate": 2.938361816273745e-06, - "loss": 0.7544, + "learning_rate": 2.96243326366607e-06, + "loss": 0.8006, "step": 26680 }, { - "epoch": 0.757122587968218, + "epoch": 0.7560712970047323, "grad_norm": 0.0, - "learning_rate": 2.9377110942627573e-06, - "loss": 0.8497, + "learning_rate": 2.9617812581000318e-06, + "loss": 0.7584, "step": 26681 }, { - "epoch": 0.7571509648127128, + "epoch": 0.7560996344469948, "grad_norm": 0.0, - "learning_rate": 2.937060431907239e-06, - "loss": 0.8662, + "learning_rate": 2.9611293118195197e-06, + "loss": 0.8683, "step": 26682 }, { - "epoch": 0.7571793416572077, + "epoch": 0.7561279718892573, "grad_norm": 0.0, - "learning_rate": 2.9364098292126886e-06, - "loss": 0.7526, + "learning_rate": 2.960477424830032e-06, + "loss": 0.846, "step": 26683 }, { - "epoch": 0.7572077185017027, + "epoch": 0.7561563093315198, "grad_norm": 0.0, - "learning_rate": 2.935759286184605e-06, - "loss": 0.8644, + "learning_rate": 2.9598255971370538e-06, + "loss": 0.7802, "step": 26684 }, { - "epoch": 0.7572360953461975, + "epoch": 0.7561846467737822, "grad_norm": 0.0, - "learning_rate": 2.935108802828478e-06, - "loss": 0.8076, + "learning_rate": 2.95917382874608e-06, + "loss": 0.8596, "step": 26685 }, { - "epoch": 0.7572644721906924, + "epoch": 0.7562129842160447, "grad_norm": 0.0, - "learning_rate": 2.9344583791498028e-06, - "loss": 0.9047, + "learning_rate": 2.958522119662599e-06, + "loss": 0.846, "step": 26686 }, { - "epoch": 0.7572928490351872, + "epoch": 0.7562413216583072, "grad_norm": 0.0, - "learning_rate": 2.933808015154077e-06, - "loss": 0.8126, + "learning_rate": 2.9578704698921e-06, + "loss": 0.7905, "step": 26687 }, { - "epoch": 0.7573212258796822, + "epoch": 0.7562696591005695, "grad_norm": 0.0, - "learning_rate": 2.9331577108467914e-06, - "loss": 0.8209, + "learning_rate": 2.9572188794400745e-06, + "loss": 0.8088, "step": 26688 }, { - "epoch": 0.7573496027241771, + "epoch": 0.756297996542832, "grad_norm": 0.0, - "learning_rate": 2.932507466233444e-06, - "loss": 0.8206, + "learning_rate": 2.9565673483120126e-06, + "loss": 0.8215, "step": 26689 }, { - "epoch": 0.7573779795686719, + "epoch": 0.7563263339850945, "grad_norm": 0.0, - "learning_rate": 2.93185728131952e-06, - "loss": 0.7816, + "learning_rate": 2.9559158765133955e-06, + "loss": 0.8863, "step": 26690 }, { - "epoch": 0.7574063564131669, + "epoch": 0.756354671427357, "grad_norm": 0.0, - "learning_rate": 2.931207156110516e-06, - "loss": 0.7133, + "learning_rate": 2.9552644640497185e-06, + "loss": 0.7841, "step": 26691 }, { - "epoch": 0.7574347332576618, + "epoch": 0.7563830088696194, "grad_norm": 0.0, - "learning_rate": 2.9305570906119253e-06, - "loss": 0.8481, + "learning_rate": 2.954613110926462e-06, + "loss": 0.8447, "step": 26692 }, { - "epoch": 0.7574631101021566, + "epoch": 0.7564113463118819, "grad_norm": 0.0, - "learning_rate": 2.929907084829234e-06, - "loss": 0.8988, + "learning_rate": 2.9539618171491145e-06, + "loss": 0.7648, "step": 26693 }, { - "epoch": 0.7574914869466516, + "epoch": 0.7564396837541444, "grad_norm": 0.0, - "learning_rate": 2.929257138767936e-06, - "loss": 0.823, + "learning_rate": 2.9533105827231677e-06, + "loss": 0.8541, "step": 26694 }, { - "epoch": 0.7575198637911464, + "epoch": 0.7564680211964068, "grad_norm": 0.0, - "learning_rate": 2.9286072524335227e-06, - "loss": 0.8478, + "learning_rate": 2.9526594076540983e-06, + "loss": 0.9091, "step": 26695 }, { - "epoch": 0.7575482406356413, + "epoch": 0.7564963586386693, "grad_norm": 0.0, - "learning_rate": 2.927957425831479e-06, - "loss": 0.8542, + "learning_rate": 2.952008291947399e-06, + "loss": 0.7866, "step": 26696 }, { - "epoch": 0.7575766174801362, + "epoch": 0.7565246960809318, "grad_norm": 0.0, - "learning_rate": 2.9273076589672976e-06, - "loss": 0.6804, + "learning_rate": 2.9513572356085485e-06, + "loss": 0.8074, "step": 26697 }, { - "epoch": 0.7576049943246311, + "epoch": 0.7565530335231943, "grad_norm": 0.0, - "learning_rate": 2.9266579518464687e-06, - "loss": 0.7722, + "learning_rate": 2.950706238643033e-06, + "loss": 0.9, "step": 26698 }, { - "epoch": 0.757633371169126, + "epoch": 0.7565813709654566, "grad_norm": 0.0, - "learning_rate": 2.926008304474475e-06, - "loss": 0.8291, + "learning_rate": 2.950055301056336e-06, + "loss": 0.8724, "step": 26699 }, { - "epoch": 0.7576617480136209, + "epoch": 0.7566097084077191, "grad_norm": 0.0, - "learning_rate": 2.9253587168568074e-06, - "loss": 0.8245, + "learning_rate": 2.949404422853942e-06, + "loss": 0.9584, "step": 26700 }, { - "epoch": 0.7576901248581158, + "epoch": 0.7566380458499816, "grad_norm": 0.0, - "learning_rate": 2.9247091889989555e-06, - "loss": 0.7778, + "learning_rate": 2.948753604041332e-06, + "loss": 0.7894, "step": 26701 }, { - "epoch": 0.7577185017026107, + "epoch": 0.756666383292244, "grad_norm": 0.0, - "learning_rate": 2.9240597209064003e-06, - "loss": 0.8919, + "learning_rate": 2.948102844623992e-06, + "loss": 0.7971, "step": 26702 }, { - "epoch": 0.7577468785471055, + "epoch": 0.7566947207345065, "grad_norm": 0.0, - "learning_rate": 2.9234103125846314e-06, - "loss": 0.7612, + "learning_rate": 2.9474521446073978e-06, + "loss": 0.9042, "step": 26703 }, { - "epoch": 0.7577752553916004, + "epoch": 0.756723058176769, "grad_norm": 0.0, - "learning_rate": 2.922760964039133e-06, - "loss": 0.8796, + "learning_rate": 2.9468015039970365e-06, + "loss": 0.8268, "step": 26704 }, { - "epoch": 0.7578036322360954, + "epoch": 0.7567513956190314, "grad_norm": 0.0, - "learning_rate": 2.9221116752753908e-06, - "loss": 0.9133, + "learning_rate": 2.946150922798382e-06, + "loss": 0.8683, "step": 26705 }, { - "epoch": 0.7578320090805902, + "epoch": 0.7567797330612939, "grad_norm": 0.0, - "learning_rate": 2.9214624462988928e-06, - "loss": 0.8077, + "learning_rate": 2.9455004010169174e-06, + "loss": 0.7638, "step": 26706 }, { - "epoch": 0.7578603859250851, + "epoch": 0.7568080705035564, "grad_norm": 0.0, - "learning_rate": 2.9208132771151167e-06, - "loss": 0.8448, + "learning_rate": 2.9448499386581254e-06, + "loss": 0.7891, "step": 26707 }, { - "epoch": 0.7578887627695801, + "epoch": 0.7568364079458189, "grad_norm": 0.0, - "learning_rate": 2.920164167729548e-06, - "loss": 0.8994, + "learning_rate": 2.9441995357274787e-06, + "loss": 0.8, "step": 26708 }, { - "epoch": 0.7579171396140749, + "epoch": 0.7568647453880812, "grad_norm": 0.0, - "learning_rate": 2.9195151181476757e-06, - "loss": 0.7884, + "learning_rate": 2.9435491922304603e-06, + "loss": 0.809, "step": 26709 }, { - "epoch": 0.7579455164585698, + "epoch": 0.7568930828303437, "grad_norm": 0.0, - "learning_rate": 2.918866128374973e-06, - "loss": 0.8693, + "learning_rate": 2.94289890817255e-06, + "loss": 0.7583, "step": 26710 }, { - "epoch": 0.7579738933030647, + "epoch": 0.7569214202726062, "grad_norm": 0.0, - "learning_rate": 2.918217198416927e-06, - "loss": 0.7912, + "learning_rate": 2.94224868355922e-06, + "loss": 0.8894, "step": 26711 }, { - "epoch": 0.7580022701475596, + "epoch": 0.7569497577148686, "grad_norm": 0.0, - "learning_rate": 2.9175683282790212e-06, - "loss": 0.84, + "learning_rate": 2.9415985183959505e-06, + "loss": 0.7924, "step": 26712 }, { - "epoch": 0.7580306469920545, + "epoch": 0.7569780951571311, "grad_norm": 0.0, - "learning_rate": 2.916919517966732e-06, - "loss": 0.8407, + "learning_rate": 2.940948412688217e-06, + "loss": 0.7608, "step": 26713 }, { - "epoch": 0.7580590238365493, + "epoch": 0.7570064325993936, "grad_norm": 0.0, - "learning_rate": 2.9162707674855416e-06, - "loss": 0.786, + "learning_rate": 2.9402983664414963e-06, + "loss": 0.8679, "step": 26714 }, { - "epoch": 0.7580874006810443, + "epoch": 0.7570347700416561, "grad_norm": 0.0, - "learning_rate": 2.9156220768409336e-06, - "loss": 0.7179, + "learning_rate": 2.9396483796612685e-06, + "loss": 0.7461, "step": 26715 }, { - "epoch": 0.7581157775255392, + "epoch": 0.7570631074839185, "grad_norm": 0.0, - "learning_rate": 2.9149734460383804e-06, - "loss": 0.6374, + "learning_rate": 2.938998452353e-06, + "loss": 0.8711, "step": 26716 }, { - "epoch": 0.758144154370034, + "epoch": 0.757091444926181, "grad_norm": 0.0, - "learning_rate": 2.914324875083366e-06, - "loss": 0.8856, + "learning_rate": 2.9383485845221695e-06, + "loss": 0.8892, "step": 26717 }, { - "epoch": 0.758172531214529, + "epoch": 0.7571197823684435, "grad_norm": 0.0, - "learning_rate": 2.913676363981368e-06, - "loss": 0.7894, + "learning_rate": 2.9376987761742546e-06, + "loss": 0.8064, "step": 26718 }, { - "epoch": 0.7582009080590238, + "epoch": 0.7571481198107058, "grad_norm": 0.0, - "learning_rate": 2.9130279127378635e-06, - "loss": 0.7537, + "learning_rate": 2.9370490273147224e-06, + "loss": 0.8595, "step": 26719 }, { - "epoch": 0.7582292849035187, + "epoch": 0.7571764572529683, "grad_norm": 0.0, - "learning_rate": 2.912379521358335e-06, - "loss": 0.874, + "learning_rate": 2.9363993379490517e-06, + "loss": 0.8365, "step": 26720 }, { - "epoch": 0.7582576617480136, + "epoch": 0.7572047946952308, "grad_norm": 0.0, - "learning_rate": 2.9117311898482514e-06, - "loss": 0.8847, + "learning_rate": 2.935749708082709e-06, + "loss": 0.796, "step": 26721 }, { - "epoch": 0.7582860385925085, + "epoch": 0.7572331321374932, "grad_norm": 0.0, - "learning_rate": 2.9110829182130928e-06, - "loss": 0.8794, + "learning_rate": 2.9351001377211707e-06, + "loss": 0.8046, "step": 26722 }, { - "epoch": 0.7583144154370034, + "epoch": 0.7572614695797557, "grad_norm": 0.0, - "learning_rate": 2.9104347064583393e-06, - "loss": 0.7425, + "learning_rate": 2.934450626869909e-06, + "loss": 0.8272, "step": 26723 }, { - "epoch": 0.7583427922814983, + "epoch": 0.7572898070220182, "grad_norm": 0.0, - "learning_rate": 2.9097865545894598e-06, - "loss": 0.8505, + "learning_rate": 2.933801175534392e-06, + "loss": 0.8644, "step": 26724 }, { - "epoch": 0.7583711691259932, + "epoch": 0.7573181444642807, "grad_norm": 0.0, - "learning_rate": 2.909138462611931e-06, - "loss": 0.9396, + "learning_rate": 2.9331517837200905e-06, + "loss": 0.926, "step": 26725 }, { - "epoch": 0.7583995459704881, + "epoch": 0.7573464819065431, "grad_norm": 0.0, - "learning_rate": 2.9084904305312324e-06, - "loss": 0.8262, + "learning_rate": 2.9325024514324764e-06, + "loss": 0.8326, "step": 26726 }, { - "epoch": 0.758427922814983, + "epoch": 0.7573748193488056, "grad_norm": 0.0, - "learning_rate": 2.90784245835283e-06, - "loss": 0.8208, + "learning_rate": 2.9318531786770186e-06, + "loss": 0.7754, "step": 26727 }, { - "epoch": 0.7584562996594779, + "epoch": 0.7574031567910681, "grad_norm": 0.0, - "learning_rate": 2.9071945460822014e-06, - "loss": 0.8405, + "learning_rate": 2.9312039654591896e-06, + "loss": 0.7662, "step": 26728 }, { - "epoch": 0.7584846765039728, + "epoch": 0.7574314942333304, "grad_norm": 0.0, - "learning_rate": 2.906546693724822e-06, - "loss": 0.8466, + "learning_rate": 2.930554811784451e-06, + "loss": 0.8111, "step": 26729 }, { - "epoch": 0.7585130533484676, + "epoch": 0.7574598316755929, "grad_norm": 0.0, - "learning_rate": 2.905898901286159e-06, - "loss": 0.7706, + "learning_rate": 2.9299057176582733e-06, + "loss": 0.8507, "step": 26730 }, { - "epoch": 0.7585414301929625, + "epoch": 0.7574881691178554, "grad_norm": 0.0, - "learning_rate": 2.9052511687716867e-06, - "loss": 0.9373, + "learning_rate": 2.9292566830861303e-06, + "loss": 0.868, "step": 26731 }, { - "epoch": 0.7585698070374575, + "epoch": 0.7575165065601179, "grad_norm": 0.0, - "learning_rate": 2.9046034961868797e-06, - "loss": 0.8537, + "learning_rate": 2.9286077080734786e-06, + "loss": 0.7108, "step": 26732 }, { - "epoch": 0.7585981838819523, + "epoch": 0.7575448440023803, "grad_norm": 0.0, - "learning_rate": 2.903955883537201e-06, - "loss": 0.8042, + "learning_rate": 2.9279587926257945e-06, + "loss": 0.8077, "step": 26733 }, { - "epoch": 0.7586265607264472, + "epoch": 0.7575731814446428, "grad_norm": 0.0, - "learning_rate": 2.9033083308281273e-06, - "loss": 0.7321, + "learning_rate": 2.927309936748537e-06, + "loss": 0.9798, "step": 26734 }, { - "epoch": 0.7586549375709422, + "epoch": 0.7576015188869053, "grad_norm": 0.0, - "learning_rate": 2.902660838065131e-06, - "loss": 0.7768, + "learning_rate": 2.9266611404471736e-06, + "loss": 0.7833, "step": 26735 }, { - "epoch": 0.758683314415437, + "epoch": 0.7576298563291677, "grad_norm": 0.0, - "learning_rate": 2.9020134052536742e-06, - "loss": 0.8208, + "learning_rate": 2.926012403727173e-06, + "loss": 0.7053, "step": 26736 }, { - "epoch": 0.7587116912599319, + "epoch": 0.7576581937714302, "grad_norm": 0.0, - "learning_rate": 2.901366032399232e-06, - "loss": 0.7687, + "learning_rate": 2.9253637265939936e-06, + "loss": 0.8428, "step": 26737 }, { - "epoch": 0.7587400681044267, + "epoch": 0.7576865312136927, "grad_norm": 0.0, - "learning_rate": 2.9007187195072684e-06, - "loss": 0.8546, + "learning_rate": 2.9247151090531032e-06, + "loss": 0.7755, "step": 26738 }, { - "epoch": 0.7587684449489217, + "epoch": 0.7577148686559552, "grad_norm": 0.0, - "learning_rate": 2.900071466583252e-06, - "loss": 0.8829, + "learning_rate": 2.9240665511099643e-06, + "loss": 0.8152, "step": 26739 }, { - "epoch": 0.7587968217934166, + "epoch": 0.7577432060982175, "grad_norm": 0.0, - "learning_rate": 2.899424273632654e-06, - "loss": 0.8542, + "learning_rate": 2.9234180527700407e-06, + "loss": 0.879, "step": 26740 }, { - "epoch": 0.7588251986379114, + "epoch": 0.75777154354048, "grad_norm": 0.0, - "learning_rate": 2.8987771406609353e-06, - "loss": 0.8418, + "learning_rate": 2.9227696140387974e-06, + "loss": 0.8525, "step": 26741 }, { - "epoch": 0.7588535754824064, + "epoch": 0.7577998809827425, "grad_norm": 0.0, - "learning_rate": 2.898130067673566e-06, - "loss": 0.7521, + "learning_rate": 2.922121234921691e-06, + "loss": 0.7903, "step": 26742 }, { - "epoch": 0.7588819523269013, + "epoch": 0.7578282184250049, "grad_norm": 0.0, - "learning_rate": 2.8974830546760135e-06, - "loss": 0.8436, + "learning_rate": 2.921472915424186e-06, + "loss": 0.7416, "step": 26743 }, { - "epoch": 0.7589103291713961, + "epoch": 0.7578565558672674, "grad_norm": 0.0, - "learning_rate": 2.896836101673738e-06, - "loss": 0.7698, + "learning_rate": 2.920824655551746e-06, + "loss": 0.7641, "step": 26744 }, { - "epoch": 0.7589387060158911, + "epoch": 0.7578848933095299, "grad_norm": 0.0, - "learning_rate": 2.8961892086722076e-06, - "loss": 0.7623, + "learning_rate": 2.9201764553098254e-06, + "loss": 0.9058, "step": 26745 }, { - "epoch": 0.7589670828603859, + "epoch": 0.7579132307517923, "grad_norm": 0.0, - "learning_rate": 2.8955423756768887e-06, - "loss": 0.8299, + "learning_rate": 2.919528314703891e-06, + "loss": 0.8375, "step": 26746 }, { - "epoch": 0.7589954597048808, + "epoch": 0.7579415681940548, "grad_norm": 0.0, - "learning_rate": 2.8948956026932405e-06, - "loss": 0.739, + "learning_rate": 2.9188802337393953e-06, + "loss": 0.7196, "step": 26747 }, { - "epoch": 0.7590238365493757, + "epoch": 0.7579699056363173, "grad_norm": 0.0, - "learning_rate": 2.8942488897267284e-06, - "loss": 0.728, + "learning_rate": 2.918232212421801e-06, + "loss": 0.907, "step": 26748 }, { - "epoch": 0.7590522133938706, + "epoch": 0.7579982430785798, "grad_norm": 0.0, - "learning_rate": 2.8936022367828153e-06, - "loss": 0.7667, + "learning_rate": 2.91758425075657e-06, + "loss": 0.7833, "step": 26749 }, { - "epoch": 0.7590805902383655, + "epoch": 0.7580265805208422, "grad_norm": 0.0, - "learning_rate": 2.8929556438669626e-06, - "loss": 0.7483, + "learning_rate": 2.916936348749153e-06, + "loss": 0.817, "step": 26750 }, { - "epoch": 0.7591089670828604, + "epoch": 0.7580549179631046, "grad_norm": 0.0, - "learning_rate": 2.8923091109846348e-06, - "loss": 0.7774, + "learning_rate": 2.9162885064050117e-06, + "loss": 0.8343, "step": 26751 }, { - "epoch": 0.7591373439273553, + "epoch": 0.7580832554053671, "grad_norm": 0.0, - "learning_rate": 2.8916626381412927e-06, - "loss": 0.7493, + "learning_rate": 2.915640723729604e-06, + "loss": 0.9245, "step": 26752 }, { - "epoch": 0.7591657207718502, + "epoch": 0.7581115928476295, "grad_norm": 0.0, - "learning_rate": 2.8910162253423947e-06, - "loss": 0.7646, + "learning_rate": 2.914993000728383e-06, + "loss": 0.8463, "step": 26753 }, { - "epoch": 0.759194097616345, + "epoch": 0.758139930289892, "grad_norm": 0.0, - "learning_rate": 2.8903698725934002e-06, - "loss": 0.8743, + "learning_rate": 2.914345337406812e-06, + "loss": 0.8209, "step": 26754 }, { - "epoch": 0.7592224744608399, + "epoch": 0.7581682677321545, "grad_norm": 0.0, - "learning_rate": 2.8897235798997757e-06, - "loss": 0.7899, + "learning_rate": 2.913697733770338e-06, + "loss": 0.7537, "step": 26755 }, { - "epoch": 0.7592508513053349, + "epoch": 0.758196605174417, "grad_norm": 0.0, - "learning_rate": 2.889077347266972e-06, - "loss": 0.7851, + "learning_rate": 2.9130501898244177e-06, + "loss": 0.7744, "step": 26756 }, { - "epoch": 0.7592792281498297, + "epoch": 0.7582249426166794, "grad_norm": 0.0, - "learning_rate": 2.8884311747004513e-06, - "loss": 0.827, + "learning_rate": 2.9124027055745118e-06, + "loss": 0.7706, "step": 26757 }, { - "epoch": 0.7593076049943246, + "epoch": 0.7582532800589419, "grad_norm": 0.0, - "learning_rate": 2.8877850622056756e-06, - "loss": 0.7856, + "learning_rate": 2.9117552810260675e-06, + "loss": 0.6784, "step": 26758 }, { - "epoch": 0.7593359818388196, + "epoch": 0.7582816175012044, "grad_norm": 0.0, - "learning_rate": 2.8871390097880958e-06, - "loss": 0.8297, + "learning_rate": 2.911107916184539e-06, + "loss": 0.8004, "step": 26759 }, { - "epoch": 0.7593643586833144, + "epoch": 0.7583099549434668, "grad_norm": 0.0, - "learning_rate": 2.8864930174531726e-06, - "loss": 0.8243, + "learning_rate": 2.9104606110553844e-06, + "loss": 0.8488, "step": 26760 }, { - "epoch": 0.7593927355278093, + "epoch": 0.7583382923857293, "grad_norm": 0.0, - "learning_rate": 2.885847085206366e-06, - "loss": 0.7756, + "learning_rate": 2.909813365644051e-06, + "loss": 0.7761, "step": 26761 }, { - "epoch": 0.7594211123723043, + "epoch": 0.7583666298279917, "grad_norm": 0.0, - "learning_rate": 2.885201213053126e-06, - "loss": 0.8946, + "learning_rate": 2.9091661799559934e-06, + "loss": 0.9879, "step": 26762 }, { - "epoch": 0.7594494892167991, + "epoch": 0.7583949672702542, "grad_norm": 0.0, - "learning_rate": 2.884555400998914e-06, - "loss": 0.9069, + "learning_rate": 2.9085190539966602e-06, + "loss": 0.9129, "step": 26763 }, { - "epoch": 0.759477866061294, + "epoch": 0.7584233047125166, "grad_norm": 0.0, - "learning_rate": 2.883909649049176e-06, - "loss": 0.8457, + "learning_rate": 2.9078719877715046e-06, + "loss": 0.8429, "step": 26764 }, { - "epoch": 0.7595062429057888, + "epoch": 0.7584516421547791, "grad_norm": 0.0, - "learning_rate": 2.883263957209377e-06, - "loss": 0.8186, + "learning_rate": 2.9072249812859767e-06, + "loss": 0.7552, "step": 26765 }, { - "epoch": 0.7595346197502838, + "epoch": 0.7584799795970416, "grad_norm": 0.0, - "learning_rate": 2.882618325484969e-06, - "loss": 0.9751, + "learning_rate": 2.9065780345455265e-06, + "loss": 0.9059, "step": 26766 }, { - "epoch": 0.7595629965947787, + "epoch": 0.758508317039304, "grad_norm": 0.0, - "learning_rate": 2.8819727538814024e-06, - "loss": 0.8305, + "learning_rate": 2.905931147555604e-06, + "loss": 0.9203, "step": 26767 }, { - "epoch": 0.7595913734392735, + "epoch": 0.7585366544815665, "grad_norm": 0.0, - "learning_rate": 2.8813272424041306e-06, - "loss": 0.8285, + "learning_rate": 2.9052843203216597e-06, + "loss": 0.7653, "step": 26768 }, { - "epoch": 0.7596197502837685, + "epoch": 0.758564991923829, "grad_norm": 0.0, - "learning_rate": 2.8806817910586116e-06, - "loss": 0.8705, + "learning_rate": 2.9046375528491378e-06, + "loss": 0.8005, "step": 26769 }, { - "epoch": 0.7596481271282634, + "epoch": 0.7585933293660914, "grad_norm": 0.0, - "learning_rate": 2.88003639985029e-06, - "loss": 0.7217, + "learning_rate": 2.903990845143492e-06, + "loss": 0.8997, "step": 26770 }, { - "epoch": 0.7596765039727582, + "epoch": 0.7586216668083539, "grad_norm": 0.0, - "learning_rate": 2.8793910687846203e-06, - "loss": 0.7811, + "learning_rate": 2.9033441972101628e-06, + "loss": 0.832, "step": 26771 }, { - "epoch": 0.7597048808172531, + "epoch": 0.7586500042506163, "grad_norm": 0.0, - "learning_rate": 2.8787457978670586e-06, - "loss": 0.8237, + "learning_rate": 2.902697609054601e-06, + "loss": 0.8101, "step": 26772 }, { - "epoch": 0.759733257661748, + "epoch": 0.7586783416928788, "grad_norm": 0.0, - "learning_rate": 2.8781005871030476e-06, - "loss": 0.8383, + "learning_rate": 2.9020510806822555e-06, + "loss": 0.8817, "step": 26773 }, { - "epoch": 0.7597616345062429, + "epoch": 0.7587066791351412, "grad_norm": 0.0, - "learning_rate": 2.877455436498041e-06, - "loss": 0.8618, + "learning_rate": 2.901404612098567e-06, + "loss": 0.8452, "step": 26774 }, { - "epoch": 0.7597900113507378, + "epoch": 0.7587350165774037, "grad_norm": 0.0, - "learning_rate": 2.8768103460574925e-06, - "loss": 0.776, + "learning_rate": 2.9007582033089865e-06, + "loss": 0.7219, "step": 26775 }, { - "epoch": 0.7598183881952327, + "epoch": 0.7587633540196662, "grad_norm": 0.0, - "learning_rate": 2.8761653157868442e-06, - "loss": 0.7675, + "learning_rate": 2.900111854318952e-06, + "loss": 0.7423, "step": 26776 }, { - "epoch": 0.7598467650397276, + "epoch": 0.7587916914619286, "grad_norm": 0.0, - "learning_rate": 2.875520345691546e-06, - "loss": 0.7993, + "learning_rate": 2.899465565133912e-06, + "loss": 0.8068, "step": 26777 }, { - "epoch": 0.7598751418842224, + "epoch": 0.7588200289041911, "grad_norm": 0.0, - "learning_rate": 2.8748754357770525e-06, - "loss": 0.8799, + "learning_rate": 2.898819335759311e-06, + "loss": 0.8183, "step": 26778 }, { - "epoch": 0.7599035187287174, + "epoch": 0.7588483663464536, "grad_norm": 0.0, - "learning_rate": 2.8742305860487994e-06, - "loss": 0.778, + "learning_rate": 2.898173166200591e-06, + "loss": 0.8089, "step": 26779 }, { - "epoch": 0.7599318955732123, + "epoch": 0.7588767037887161, "grad_norm": 0.0, - "learning_rate": 2.873585796512247e-06, - "loss": 0.8336, + "learning_rate": 2.8975270564631963e-06, + "loss": 0.7563, "step": 26780 }, { - "epoch": 0.7599602724177071, + "epoch": 0.7589050412309785, "grad_norm": 0.0, - "learning_rate": 2.872941067172833e-06, - "loss": 0.7548, + "learning_rate": 2.896881006552571e-06, + "loss": 0.808, "step": 26781 }, { - "epoch": 0.759988649262202, + "epoch": 0.758933378673241, "grad_norm": 0.0, - "learning_rate": 2.8722963980360064e-06, - "loss": 0.7872, + "learning_rate": 2.8962350164741515e-06, + "loss": 0.8265, "step": 26782 }, { - "epoch": 0.760017026106697, + "epoch": 0.7589617161155034, "grad_norm": 0.0, - "learning_rate": 2.8716517891072148e-06, - "loss": 0.8295, + "learning_rate": 2.895589086233386e-06, + "loss": 0.7182, "step": 26783 }, { - "epoch": 0.7600454029511918, + "epoch": 0.7589900535577658, "grad_norm": 0.0, - "learning_rate": 2.8710072403918986e-06, - "loss": 0.7719, + "learning_rate": 2.8949432158357083e-06, + "loss": 0.7499, "step": 26784 }, { - "epoch": 0.7600737797956867, + "epoch": 0.7590183910000283, "grad_norm": 0.0, - "learning_rate": 2.8703627518955046e-06, - "loss": 0.8844, + "learning_rate": 2.8942974052865624e-06, + "loss": 0.826, "step": 26785 }, { - "epoch": 0.7601021566401817, + "epoch": 0.7590467284422908, "grad_norm": 0.0, - "learning_rate": 2.8697183236234805e-06, - "loss": 0.6954, + "learning_rate": 2.8936516545913917e-06, + "loss": 0.8432, "step": 26786 }, { - "epoch": 0.7601305334846765, + "epoch": 0.7590750658845533, "grad_norm": 0.0, - "learning_rate": 2.869073955581262e-06, - "loss": 0.7847, + "learning_rate": 2.8930059637556286e-06, + "loss": 0.8381, "step": 26787 }, { - "epoch": 0.7601589103291714, + "epoch": 0.7591034033268157, "grad_norm": 0.0, - "learning_rate": 2.868429647774297e-06, - "loss": 0.8604, + "learning_rate": 2.8923603327847148e-06, + "loss": 0.8524, "step": 26788 }, { - "epoch": 0.7601872871736662, + "epoch": 0.7591317407690782, "grad_norm": 0.0, - "learning_rate": 2.8677854002080307e-06, - "loss": 0.7894, + "learning_rate": 2.8917147616840933e-06, + "loss": 0.8191, "step": 26789 }, { - "epoch": 0.7602156640181612, + "epoch": 0.7591600782113407, "grad_norm": 0.0, - "learning_rate": 2.8671412128878985e-06, - "loss": 0.8091, + "learning_rate": 2.891069250459194e-06, + "loss": 0.8036, "step": 26790 }, { - "epoch": 0.7602440408626561, + "epoch": 0.7591884156536031, "grad_norm": 0.0, - "learning_rate": 2.866497085819344e-06, - "loss": 0.8536, + "learning_rate": 2.8904237991154594e-06, + "loss": 0.8525, "step": 26791 }, { - "epoch": 0.7602724177071509, + "epoch": 0.7592167530958656, "grad_norm": 0.0, - "learning_rate": 2.8658530190078138e-06, - "loss": 0.8045, + "learning_rate": 2.8897784076583237e-06, + "loss": 0.818, "step": 26792 }, { - "epoch": 0.7603007945516459, + "epoch": 0.759245090538128, "grad_norm": 0.0, - "learning_rate": 2.8652090124587405e-06, - "loss": 0.7756, + "learning_rate": 2.889133076093226e-06, + "loss": 0.7514, "step": 26793 }, { - "epoch": 0.7603291713961408, + "epoch": 0.7592734279803904, "grad_norm": 0.0, - "learning_rate": 2.864565066177567e-06, - "loss": 0.7658, + "learning_rate": 2.8884878044256037e-06, + "loss": 0.7791, "step": 26794 }, { - "epoch": 0.7603575482406356, + "epoch": 0.7593017654226529, "grad_norm": 0.0, - "learning_rate": 2.863921180169733e-06, - "loss": 0.8151, + "learning_rate": 2.8878425926608856e-06, + "loss": 0.6821, "step": 26795 }, { - "epoch": 0.7603859250851306, + "epoch": 0.7593301028649154, "grad_norm": 0.0, - "learning_rate": 2.863277354440679e-06, - "loss": 0.8162, + "learning_rate": 2.887197440804511e-06, + "loss": 0.7885, "step": 26796 }, { - "epoch": 0.7604143019296254, + "epoch": 0.7593584403071779, "grad_norm": 0.0, - "learning_rate": 2.8626335889958433e-06, - "loss": 0.8803, + "learning_rate": 2.8865523488619174e-06, + "loss": 0.799, "step": 26797 }, { - "epoch": 0.7604426787741203, + "epoch": 0.7593867777494403, "grad_norm": 0.0, - "learning_rate": 2.86198988384066e-06, - "loss": 0.9233, + "learning_rate": 2.885907316838531e-06, + "loss": 0.7574, "step": 26798 }, { - "epoch": 0.7604710556186152, + "epoch": 0.7594151151917028, "grad_norm": 0.0, - "learning_rate": 2.86134623898057e-06, - "loss": 0.7732, + "learning_rate": 2.885262344739792e-06, + "loss": 0.7204, "step": 26799 }, { - "epoch": 0.7604994324631101, + "epoch": 0.7594434526339653, "grad_norm": 0.0, - "learning_rate": 2.8607026544210115e-06, - "loss": 0.8679, + "learning_rate": 2.8846174325711272e-06, + "loss": 0.854, "step": 26800 }, { - "epoch": 0.760527809307605, + "epoch": 0.7594717900762277, "grad_norm": 0.0, - "learning_rate": 2.8600591301674153e-06, - "loss": 0.7973, + "learning_rate": 2.8839725803379724e-06, + "loss": 0.7798, "step": 26801 }, { - "epoch": 0.7605561861520999, + "epoch": 0.7595001275184902, "grad_norm": 0.0, - "learning_rate": 2.8594156662252205e-06, - "loss": 0.8046, + "learning_rate": 2.8833277880457622e-06, + "loss": 0.7363, "step": 26802 }, { - "epoch": 0.7605845629965948, + "epoch": 0.7595284649607527, "grad_norm": 0.0, - "learning_rate": 2.858772262599866e-06, - "loss": 0.8687, + "learning_rate": 2.8826830556999207e-06, + "loss": 0.878, "step": 26803 }, { - "epoch": 0.7606129398410897, + "epoch": 0.7595568024030152, "grad_norm": 0.0, - "learning_rate": 2.858128919296781e-06, - "loss": 0.7932, + "learning_rate": 2.882038383305884e-06, + "loss": 0.8765, "step": 26804 }, { - "epoch": 0.7606413166855845, + "epoch": 0.7595851398452775, "grad_norm": 0.0, - "learning_rate": 2.857485636321401e-06, - "loss": 0.8303, + "learning_rate": 2.881393770869081e-06, + "loss": 0.8282, "step": 26805 }, { - "epoch": 0.7606696935300794, + "epoch": 0.75961347728754, "grad_norm": 0.0, - "learning_rate": 2.856842413679164e-06, - "loss": 0.7875, + "learning_rate": 2.8807492183949404e-06, + "loss": 0.6954, "step": 26806 }, { - "epoch": 0.7606980703745744, + "epoch": 0.7596418147298025, "grad_norm": 0.0, - "learning_rate": 2.8561992513754967e-06, - "loss": 0.8057, + "learning_rate": 2.880104725888897e-06, + "loss": 0.8761, "step": 26807 }, { - "epoch": 0.7607264472190692, + "epoch": 0.7596701521720649, "grad_norm": 0.0, - "learning_rate": 2.8555561494158367e-06, - "loss": 0.844, + "learning_rate": 2.879460293356372e-06, + "loss": 0.8906, "step": 26808 }, { - "epoch": 0.7607548240635641, + "epoch": 0.7596984896143274, "grad_norm": 0.0, - "learning_rate": 2.854913107805617e-06, - "loss": 0.7656, + "learning_rate": 2.8788159208027975e-06, + "loss": 0.8934, "step": 26809 }, { - "epoch": 0.7607832009080591, + "epoch": 0.7597268270565899, "grad_norm": 0.0, - "learning_rate": 2.8542701265502624e-06, - "loss": 0.949, + "learning_rate": 2.8781716082336042e-06, + "loss": 0.8245, "step": 26810 }, { - "epoch": 0.7608115777525539, + "epoch": 0.7597551644988524, "grad_norm": 0.0, - "learning_rate": 2.853627205655215e-06, - "loss": 0.8012, + "learning_rate": 2.8775273556542116e-06, + "loss": 0.7406, "step": 26811 }, { - "epoch": 0.7608399545970488, + "epoch": 0.7597835019411148, "grad_norm": 0.0, - "learning_rate": 2.8529843451258965e-06, - "loss": 0.7569, + "learning_rate": 2.8768831630700555e-06, + "loss": 0.7799, "step": 26812 }, { - "epoch": 0.7608683314415438, + "epoch": 0.7598118393833773, "grad_norm": 0.0, - "learning_rate": 2.8523415449677404e-06, - "loss": 0.7701, + "learning_rate": 2.876239030486554e-06, + "loss": 0.789, "step": 26813 }, { - "epoch": 0.7608967082860386, + "epoch": 0.7598401768256398, "grad_norm": 0.0, - "learning_rate": 2.8516988051861803e-06, - "loss": 0.8393, + "learning_rate": 2.875594957909136e-06, + "loss": 0.7605, "step": 26814 }, { - "epoch": 0.7609250851305335, + "epoch": 0.7598685142679021, "grad_norm": 0.0, - "learning_rate": 2.8510561257866375e-06, - "loss": 0.7722, + "learning_rate": 2.874950945343231e-06, + "loss": 0.8615, "step": 26815 }, { - "epoch": 0.7609534619750283, + "epoch": 0.7598968517101646, "grad_norm": 0.0, - "learning_rate": 2.8504135067745463e-06, - "loss": 0.8879, + "learning_rate": 2.874306992794257e-06, + "loss": 0.741, "step": 26816 }, { - "epoch": 0.7609818388195233, + "epoch": 0.7599251891524271, "grad_norm": 0.0, - "learning_rate": 2.849770948155336e-06, - "loss": 0.8947, + "learning_rate": 2.873663100267641e-06, + "loss": 0.8712, "step": 26817 }, { - "epoch": 0.7610102156640182, + "epoch": 0.7599535265946895, "grad_norm": 0.0, - "learning_rate": 2.8491284499344287e-06, - "loss": 0.8291, + "learning_rate": 2.873019267768806e-06, + "loss": 0.8089, "step": 26818 }, { - "epoch": 0.761038592508513, + "epoch": 0.759981864036952, "grad_norm": 0.0, - "learning_rate": 2.848486012117255e-06, - "loss": 0.8482, + "learning_rate": 2.872375495303178e-06, + "loss": 0.7132, "step": 26819 }, { - "epoch": 0.761066969353008, + "epoch": 0.7600102014792145, "grad_norm": 0.0, - "learning_rate": 2.8478436347092454e-06, - "loss": 0.8296, + "learning_rate": 2.8717317828761805e-06, + "loss": 0.7898, "step": 26820 }, { - "epoch": 0.7610953461975029, + "epoch": 0.760038538921477, "grad_norm": 0.0, - "learning_rate": 2.8472013177158174e-06, - "loss": 0.8719, + "learning_rate": 2.8710881304932293e-06, + "loss": 0.852, "step": 26821 }, { - "epoch": 0.7611237230419977, + "epoch": 0.7600668763637394, "grad_norm": 0.0, - "learning_rate": 2.846559061142403e-06, - "loss": 0.8185, + "learning_rate": 2.8704445381597513e-06, + "loss": 0.9038, "step": 26822 }, { - "epoch": 0.7611520998864926, + "epoch": 0.7600952138060019, "grad_norm": 0.0, - "learning_rate": 2.845916864994428e-06, - "loss": 0.8835, + "learning_rate": 2.8698010058811686e-06, + "loss": 0.8311, "step": 26823 }, { - "epoch": 0.7611804767309875, + "epoch": 0.7601235512482644, "grad_norm": 0.0, - "learning_rate": 2.845274729277312e-06, - "loss": 0.7951, + "learning_rate": 2.8691575336628973e-06, + "loss": 0.8277, "step": 26824 }, { - "epoch": 0.7612088535754824, + "epoch": 0.7601518886905267, "grad_norm": 0.0, - "learning_rate": 2.8446326539964818e-06, - "loss": 0.8084, + "learning_rate": 2.8685141215103594e-06, + "loss": 0.8202, "step": 26825 }, { - "epoch": 0.7612372304199773, + "epoch": 0.7601802261327892, "grad_norm": 0.0, - "learning_rate": 2.843990639157361e-06, - "loss": 0.8383, + "learning_rate": 2.867870769428979e-06, + "loss": 0.8495, "step": 26826 }, { - "epoch": 0.7612656072644722, + "epoch": 0.7602085635750517, "grad_norm": 0.0, - "learning_rate": 2.8433486847653734e-06, - "loss": 0.8974, + "learning_rate": 2.867227477424168e-06, + "loss": 0.8295, "step": 26827 }, { - "epoch": 0.7612939841089671, + "epoch": 0.7602369010173142, "grad_norm": 0.0, - "learning_rate": 2.842706790825944e-06, - "loss": 0.885, + "learning_rate": 2.8665842455013513e-06, + "loss": 0.7648, "step": 26828 }, { - "epoch": 0.761322360953462, + "epoch": 0.7602652384595766, "grad_norm": 0.0, - "learning_rate": 2.8420649573444893e-06, - "loss": 0.7747, + "learning_rate": 2.865941073665942e-06, + "loss": 0.757, "step": 26829 }, { - "epoch": 0.7613507377979569, + "epoch": 0.7602935759018391, "grad_norm": 0.0, - "learning_rate": 2.841423184326434e-06, - "loss": 0.8879, + "learning_rate": 2.86529796192336e-06, + "loss": 0.8091, "step": 26830 }, { - "epoch": 0.7613791146424518, + "epoch": 0.7603219133441016, "grad_norm": 0.0, - "learning_rate": 2.840781471777201e-06, - "loss": 0.7713, + "learning_rate": 2.8646549102790232e-06, + "loss": 0.7972, "step": 26831 }, { - "epoch": 0.7614074914869466, + "epoch": 0.760350250786364, "grad_norm": 0.0, - "learning_rate": 2.8401398197022067e-06, - "loss": 0.7995, + "learning_rate": 2.8640119187383475e-06, + "loss": 0.8632, "step": 26832 }, { - "epoch": 0.7614358683314415, + "epoch": 0.7603785882286265, "grad_norm": 0.0, - "learning_rate": 2.8394982281068727e-06, - "loss": 0.8446, + "learning_rate": 2.863368987306753e-06, + "loss": 0.904, "step": 26833 }, { - "epoch": 0.7614642451759365, + "epoch": 0.760406925670889, "grad_norm": 0.0, - "learning_rate": 2.838856696996621e-06, - "loss": 0.9771, + "learning_rate": 2.8627261159896467e-06, + "loss": 0.8398, "step": 26834 }, { - "epoch": 0.7614926220204313, + "epoch": 0.7604352631131515, "grad_norm": 0.0, - "learning_rate": 2.838215226376867e-06, - "loss": 0.7708, + "learning_rate": 2.8620833047924502e-06, + "loss": 0.8087, "step": 26835 }, { - "epoch": 0.7615209988649262, + "epoch": 0.7604636005554138, "grad_norm": 0.0, - "learning_rate": 2.837573816253031e-06, - "loss": 0.7672, + "learning_rate": 2.8614405537205793e-06, + "loss": 0.8675, "step": 26836 }, { - "epoch": 0.7615493757094212, + "epoch": 0.7604919379976763, "grad_norm": 0.0, - "learning_rate": 2.836932466630533e-06, - "loss": 0.8624, + "learning_rate": 2.860797862779442e-06, + "loss": 0.7908, "step": 26837 }, { - "epoch": 0.761577752553916, + "epoch": 0.7605202754399388, "grad_norm": 0.0, - "learning_rate": 2.8362911775147863e-06, - "loss": 0.9012, + "learning_rate": 2.8601552319744564e-06, + "loss": 0.7421, "step": 26838 }, { - "epoch": 0.7616061293984109, + "epoch": 0.7605486128822012, "grad_norm": 0.0, - "learning_rate": 2.83564994891121e-06, - "loss": 0.8044, + "learning_rate": 2.859512661311037e-06, + "loss": 0.9404, "step": 26839 }, { - "epoch": 0.7616345062429057, + "epoch": 0.7605769503244637, "grad_norm": 0.0, - "learning_rate": 2.8350087808252236e-06, - "loss": 0.776, + "learning_rate": 2.8588701507945904e-06, + "loss": 0.8952, "step": 26840 }, { - "epoch": 0.7616628830874007, + "epoch": 0.7606052877667262, "grad_norm": 0.0, - "learning_rate": 2.8343676732622336e-06, - "loss": 0.8832, + "learning_rate": 2.8582277004305326e-06, + "loss": 0.7658, "step": 26841 }, { - "epoch": 0.7616912599318956, + "epoch": 0.7606336252089886, "grad_norm": 0.0, - "learning_rate": 2.8337266262276654e-06, - "loss": 0.7384, + "learning_rate": 2.857585310224279e-06, + "loss": 0.9089, "step": 26842 }, { - "epoch": 0.7617196367763904, + "epoch": 0.7606619626512511, "grad_norm": 0.0, - "learning_rate": 2.8330856397269336e-06, - "loss": 0.8274, + "learning_rate": 2.856942980181232e-06, + "loss": 0.7845, "step": 26843 }, { - "epoch": 0.7617480136208854, + "epoch": 0.7606903000935136, "grad_norm": 0.0, - "learning_rate": 2.8324447137654464e-06, - "loss": 0.8136, + "learning_rate": 2.8563007103068075e-06, + "loss": 0.8256, "step": 26844 }, { - "epoch": 0.7617763904653803, + "epoch": 0.7607186375357761, "grad_norm": 0.0, - "learning_rate": 2.8318038483486245e-06, - "loss": 0.7258, + "learning_rate": 2.8556585006064153e-06, + "loss": 0.8076, "step": 26845 }, { - "epoch": 0.7618047673098751, + "epoch": 0.7607469749780384, "grad_norm": 0.0, - "learning_rate": 2.8311630434818736e-06, - "loss": 0.9802, + "learning_rate": 2.8550163510854647e-06, + "loss": 0.787, "step": 26846 }, { - "epoch": 0.76183314415437, + "epoch": 0.7607753124203009, "grad_norm": 0.0, - "learning_rate": 2.8305222991706114e-06, - "loss": 0.8298, + "learning_rate": 2.8543742617493665e-06, + "loss": 0.7976, "step": 26847 }, { - "epoch": 0.761861520998865, + "epoch": 0.7608036498625634, "grad_norm": 0.0, - "learning_rate": 2.8298816154202526e-06, - "loss": 0.8295, + "learning_rate": 2.8537322326035253e-06, + "loss": 0.8303, "step": 26848 }, { - "epoch": 0.7618898978433598, + "epoch": 0.7608319873048258, "grad_norm": 0.0, - "learning_rate": 2.8292409922362028e-06, - "loss": 0.7379, + "learning_rate": 2.853090263653354e-06, + "loss": 0.8739, "step": 26849 }, { - "epoch": 0.7619182746878547, + "epoch": 0.7608603247470883, "grad_norm": 0.0, - "learning_rate": 2.8286004296238767e-06, - "loss": 0.8618, + "learning_rate": 2.8524483549042537e-06, + "loss": 0.8042, "step": 26850 }, { - "epoch": 0.7619466515323496, + "epoch": 0.7608886621893508, "grad_norm": 0.0, - "learning_rate": 2.8279599275886895e-06, - "loss": 0.7612, + "learning_rate": 2.8518065063616353e-06, + "loss": 0.7961, "step": 26851 }, { - "epoch": 0.7619750283768445, + "epoch": 0.7609169996316133, "grad_norm": 0.0, - "learning_rate": 2.827319486136042e-06, - "loss": 0.827, + "learning_rate": 2.8511647180309087e-06, + "loss": 0.8029, "step": 26852 }, { - "epoch": 0.7620034052213394, + "epoch": 0.7609453370738757, "grad_norm": 0.0, - "learning_rate": 2.8266791052713503e-06, - "loss": 0.743, + "learning_rate": 2.8505229899174734e-06, + "loss": 0.8207, "step": 26853 }, { - "epoch": 0.7620317820658343, + "epoch": 0.7609736745161382, "grad_norm": 0.0, - "learning_rate": 2.826038785000026e-06, - "loss": 0.7464, + "learning_rate": 2.8498813220267373e-06, + "loss": 0.8276, "step": 26854 }, { - "epoch": 0.7620601589103292, + "epoch": 0.7610020119584007, "grad_norm": 0.0, - "learning_rate": 2.825398525327472e-06, - "loss": 0.7578, + "learning_rate": 2.849239714364106e-06, + "loss": 0.852, "step": 26855 }, { - "epoch": 0.762088535754824, + "epoch": 0.761030349400663, "grad_norm": 0.0, - "learning_rate": 2.824758326259095e-06, - "loss": 0.9122, + "learning_rate": 2.848598166934984e-06, + "loss": 0.8941, "step": 26856 }, { - "epoch": 0.7621169125993189, + "epoch": 0.7610586868429255, "grad_norm": 0.0, - "learning_rate": 2.8241181878003134e-06, - "loss": 0.6902, + "learning_rate": 2.847956679744779e-06, + "loss": 0.7535, "step": 26857 }, { - "epoch": 0.7621452894438139, + "epoch": 0.761087024285188, "grad_norm": 0.0, - "learning_rate": 2.8234781099565245e-06, - "loss": 0.8574, + "learning_rate": 2.847315252798887e-06, + "loss": 0.8969, "step": 26858 }, { - "epoch": 0.7621736662883087, + "epoch": 0.7611153617274505, "grad_norm": 0.0, - "learning_rate": 2.8228380927331388e-06, - "loss": 0.8783, + "learning_rate": 2.8466738861027143e-06, + "loss": 0.8151, "step": 26859 }, { - "epoch": 0.7622020431328036, + "epoch": 0.7611436991697129, "grad_norm": 0.0, - "learning_rate": 2.8221981361355666e-06, - "loss": 0.8397, + "learning_rate": 2.846032579661667e-06, + "loss": 0.7988, "step": 26860 }, { - "epoch": 0.7622304199772986, + "epoch": 0.7611720366119754, "grad_norm": 0.0, - "learning_rate": 2.8215582401692054e-06, - "loss": 0.7552, + "learning_rate": 2.845391333481141e-06, + "loss": 0.9037, "step": 26861 }, { - "epoch": 0.7622587968217934, + "epoch": 0.7612003740542379, "grad_norm": 0.0, - "learning_rate": 2.8209184048394645e-06, - "loss": 0.844, + "learning_rate": 2.844750147566544e-06, + "loss": 0.7809, "step": 26862 }, { - "epoch": 0.7622871736662883, + "epoch": 0.7612287114965003, "grad_norm": 0.0, - "learning_rate": 2.8202786301517516e-06, - "loss": 0.7234, + "learning_rate": 2.84410902192327e-06, + "loss": 0.8574, "step": 26863 }, { - "epoch": 0.7623155505107831, + "epoch": 0.7612570489387628, "grad_norm": 0.0, - "learning_rate": 2.8196389161114647e-06, - "loss": 0.8923, + "learning_rate": 2.8434679565567236e-06, + "loss": 0.8169, "step": 26864 }, { - "epoch": 0.7623439273552781, + "epoch": 0.7612853863810253, "grad_norm": 0.0, - "learning_rate": 2.8189992627240117e-06, - "loss": 0.8097, + "learning_rate": 2.842826951472306e-06, + "loss": 0.8764, "step": 26865 }, { - "epoch": 0.762372304199773, + "epoch": 0.7613137238232877, "grad_norm": 0.0, - "learning_rate": 2.8183596699947967e-06, - "loss": 0.8487, + "learning_rate": 2.8421860066754126e-06, + "loss": 0.8084, "step": 26866 }, { - "epoch": 0.7624006810442678, + "epoch": 0.7613420612655502, "grad_norm": 0.0, - "learning_rate": 2.8177201379292174e-06, - "loss": 0.7781, + "learning_rate": 2.841545122171445e-06, + "loss": 0.7886, "step": 26867 }, { - "epoch": 0.7624290578887628, + "epoch": 0.7613703987078126, "grad_norm": 0.0, - "learning_rate": 2.8170806665326787e-06, - "loss": 0.7798, + "learning_rate": 2.8409042979657997e-06, + "loss": 0.7565, "step": 26868 }, { - "epoch": 0.7624574347332577, + "epoch": 0.7613987361500751, "grad_norm": 0.0, - "learning_rate": 2.8164412558105856e-06, - "loss": 0.7888, + "learning_rate": 2.8402635340638775e-06, + "loss": 0.8076, "step": 26869 }, { - "epoch": 0.7624858115777525, + "epoch": 0.7614270735923375, "grad_norm": 0.0, - "learning_rate": 2.815801905768334e-06, - "loss": 0.9004, + "learning_rate": 2.839622830471076e-06, + "loss": 0.8498, "step": 26870 }, { - "epoch": 0.7625141884222475, + "epoch": 0.7614554110346, "grad_norm": 0.0, - "learning_rate": 2.8151626164113265e-06, - "loss": 0.7858, + "learning_rate": 2.8389821871927882e-06, + "loss": 0.7803, "step": 26871 }, { - "epoch": 0.7625425652667424, + "epoch": 0.7614837484768625, "grad_norm": 0.0, - "learning_rate": 2.814523387744963e-06, - "loss": 0.814, + "learning_rate": 2.8383416042344114e-06, + "loss": 0.7967, "step": 26872 }, { - "epoch": 0.7625709421112372, + "epoch": 0.7615120859191249, "grad_norm": 0.0, - "learning_rate": 2.813884219774643e-06, - "loss": 0.7769, + "learning_rate": 2.8377010816013463e-06, + "loss": 0.8056, "step": 26873 }, { - "epoch": 0.7625993189557321, + "epoch": 0.7615404233613874, "grad_norm": 0.0, - "learning_rate": 2.8132451125057703e-06, - "loss": 0.8728, + "learning_rate": 2.8370606192989826e-06, + "loss": 0.8004, "step": 26874 }, { - "epoch": 0.762627695800227, + "epoch": 0.7615687608036499, "grad_norm": 0.0, - "learning_rate": 2.8126060659437347e-06, - "loss": 0.7982, + "learning_rate": 2.836420217332716e-06, + "loss": 0.8096, "step": 26875 }, { - "epoch": 0.7626560726447219, + "epoch": 0.7615970982459124, "grad_norm": 0.0, - "learning_rate": 2.8119670800939393e-06, - "loss": 0.892, + "learning_rate": 2.835779875707946e-06, + "loss": 0.8175, "step": 26876 }, { - "epoch": 0.7626844494892168, + "epoch": 0.7616254356881748, "grad_norm": 0.0, - "learning_rate": 2.8113281549617842e-06, - "loss": 0.7635, + "learning_rate": 2.8351395944300586e-06, + "loss": 0.8075, "step": 26877 }, { - "epoch": 0.7627128263337117, + "epoch": 0.7616537731304373, "grad_norm": 0.0, - "learning_rate": 2.8106892905526606e-06, - "loss": 0.7491, + "learning_rate": 2.8344993735044546e-06, + "loss": 0.8321, "step": 26878 }, { - "epoch": 0.7627412031782066, + "epoch": 0.7616821105726997, "grad_norm": 0.0, - "learning_rate": 2.810050486871968e-06, - "loss": 0.8039, + "learning_rate": 2.8338592129365194e-06, + "loss": 0.824, "step": 26879 }, { - "epoch": 0.7627695800227015, + "epoch": 0.7617104480149621, "grad_norm": 0.0, - "learning_rate": 2.8094117439251045e-06, - "loss": 0.7306, + "learning_rate": 2.83321911273165e-06, + "loss": 0.8326, "step": 26880 }, { - "epoch": 0.7627979568671963, + "epoch": 0.7617387854572246, "grad_norm": 0.0, - "learning_rate": 2.8087730617174603e-06, - "loss": 0.8798, + "learning_rate": 2.8325790728952364e-06, + "loss": 0.8308, "step": 26881 }, { - "epoch": 0.7628263337116913, + "epoch": 0.7617671228994871, "grad_norm": 0.0, - "learning_rate": 2.808134440254433e-06, - "loss": 0.8984, + "learning_rate": 2.831939093432672e-06, + "loss": 0.8247, "step": 26882 }, { - "epoch": 0.7628547105561861, + "epoch": 0.7617954603417496, "grad_norm": 0.0, - "learning_rate": 2.8074958795414207e-06, - "loss": 0.9624, + "learning_rate": 2.8312991743493457e-06, + "loss": 0.826, "step": 26883 }, { - "epoch": 0.762883087400681, + "epoch": 0.761823797784012, "grad_norm": 0.0, - "learning_rate": 2.806857379583812e-06, - "loss": 0.8769, + "learning_rate": 2.830659315650651e-06, + "loss": 0.7347, "step": 26884 }, { - "epoch": 0.762911464245176, + "epoch": 0.7618521352262745, "grad_norm": 0.0, - "learning_rate": 2.806218940387001e-06, - "loss": 0.6356, + "learning_rate": 2.830019517341973e-06, + "loss": 0.7853, "step": 26885 }, { - "epoch": 0.7629398410896708, + "epoch": 0.761880472668537, "grad_norm": 0.0, - "learning_rate": 2.8055805619563857e-06, - "loss": 0.7783, + "learning_rate": 2.829379779428706e-06, + "loss": 0.8872, "step": 26886 }, { - "epoch": 0.7629682179341657, + "epoch": 0.7619088101107994, "grad_norm": 0.0, - "learning_rate": 2.8049422442973483e-06, - "loss": 0.787, + "learning_rate": 2.8287401019162332e-06, + "loss": 0.8272, "step": 26887 }, { - "epoch": 0.7629965947786607, + "epoch": 0.7619371475530619, "grad_norm": 0.0, - "learning_rate": 2.8043039874152945e-06, - "loss": 0.8489, + "learning_rate": 2.828100484809945e-06, + "loss": 0.8768, "step": 26888 }, { - "epoch": 0.7630249716231555, + "epoch": 0.7619654849953243, "grad_norm": 0.0, - "learning_rate": 2.803665791315604e-06, - "loss": 0.8475, + "learning_rate": 2.8274609281152322e-06, + "loss": 0.7922, "step": 26889 }, { - "epoch": 0.7630533484676504, + "epoch": 0.7619938224375867, "grad_norm": 0.0, - "learning_rate": 2.803027656003672e-06, - "loss": 0.7482, + "learning_rate": 2.8268214318374764e-06, + "loss": 0.7268, "step": 26890 }, { - "epoch": 0.7630817253121452, + "epoch": 0.7620221598798492, "grad_norm": 0.0, - "learning_rate": 2.8023895814848923e-06, - "loss": 0.6296, + "learning_rate": 2.8261819959820713e-06, + "loss": 0.9297, "step": 26891 }, { - "epoch": 0.7631101021566402, + "epoch": 0.7620504973221117, "grad_norm": 0.0, - "learning_rate": 2.801751567764649e-06, - "loss": 0.7938, + "learning_rate": 2.8255426205543957e-06, + "loss": 0.8552, "step": 26892 }, { - "epoch": 0.7631384790011351, + "epoch": 0.7620788347643742, "grad_norm": 0.0, - "learning_rate": 2.801113614848333e-06, - "loss": 0.8376, + "learning_rate": 2.8249033055598387e-06, + "loss": 0.8178, "step": 26893 }, { - "epoch": 0.7631668558456299, + "epoch": 0.7621071722066366, "grad_norm": 0.0, - "learning_rate": 2.800475722741337e-06, - "loss": 0.8546, + "learning_rate": 2.8242640510037853e-06, + "loss": 0.7923, "step": 26894 }, { - "epoch": 0.7631952326901249, + "epoch": 0.7621355096488991, "grad_norm": 0.0, - "learning_rate": 2.7998378914490433e-06, - "loss": 0.9331, + "learning_rate": 2.8236248568916215e-06, + "loss": 1.0781, "step": 26895 }, { - "epoch": 0.7632236095346198, + "epoch": 0.7621638470911616, "grad_norm": 0.0, - "learning_rate": 2.7992001209768427e-06, - "loss": 0.749, + "learning_rate": 2.8229857232287293e-06, + "loss": 0.832, "step": 26896 }, { - "epoch": 0.7632519863791146, + "epoch": 0.762192184533424, "grad_norm": 0.0, - "learning_rate": 2.798562411330126e-06, - "loss": 0.7929, + "learning_rate": 2.822346650020498e-06, + "loss": 0.8611, "step": 26897 }, { - "epoch": 0.7632803632236095, + "epoch": 0.7622205219756865, "grad_norm": 0.0, - "learning_rate": 2.7979247625142724e-06, - "loss": 0.8845, + "learning_rate": 2.8217076372723017e-06, + "loss": 0.7281, "step": 26898 }, { - "epoch": 0.7633087400681045, + "epoch": 0.762248859417949, "grad_norm": 0.0, - "learning_rate": 2.797287174534672e-06, - "loss": 0.8805, + "learning_rate": 2.821068684989531e-06, + "loss": 0.7729, "step": 26899 }, { - "epoch": 0.7633371169125993, + "epoch": 0.7622771968602114, "grad_norm": 0.0, - "learning_rate": 2.796649647396714e-06, - "loss": 0.8264, + "learning_rate": 2.8204297931775615e-06, + "loss": 0.7008, "step": 26900 }, { - "epoch": 0.7633654937570942, + "epoch": 0.7623055343024738, "grad_norm": 0.0, - "learning_rate": 2.796012181105777e-06, - "loss": 0.7736, + "learning_rate": 2.8197909618417786e-06, + "loss": 0.9041, "step": 26901 }, { - "epoch": 0.7633938706015891, + "epoch": 0.7623338717447363, "grad_norm": 0.0, - "learning_rate": 2.79537477566725e-06, - "loss": 0.7863, + "learning_rate": 2.819152190987565e-06, + "loss": 0.8251, "step": 26902 }, { - "epoch": 0.763422247446084, + "epoch": 0.7623622091869988, "grad_norm": 0.0, - "learning_rate": 2.794737431086515e-06, - "loss": 0.7513, + "learning_rate": 2.818513480620296e-06, + "loss": 0.7669, "step": 26903 }, { - "epoch": 0.7634506242905789, + "epoch": 0.7623905466292612, "grad_norm": 0.0, - "learning_rate": 2.794100147368957e-06, - "loss": 0.8282, + "learning_rate": 2.8178748307453552e-06, + "loss": 0.7689, "step": 26904 }, { - "epoch": 0.7634790011350738, + "epoch": 0.7624188840715237, "grad_norm": 0.0, - "learning_rate": 2.793462924519962e-06, - "loss": 0.7968, + "learning_rate": 2.8172362413681243e-06, + "loss": 0.731, "step": 26905 }, { - "epoch": 0.7635073779795687, + "epoch": 0.7624472215137862, "grad_norm": 0.0, - "learning_rate": 2.792825762544907e-06, - "loss": 0.746, + "learning_rate": 2.816597712493977e-06, + "loss": 0.8451, "step": 26906 }, { - "epoch": 0.7635357548240636, + "epoch": 0.7624755589560487, "grad_norm": 0.0, - "learning_rate": 2.7921886614491765e-06, - "loss": 0.7699, + "learning_rate": 2.8159592441282948e-06, + "loss": 0.876, "step": 26907 }, { - "epoch": 0.7635641316685584, + "epoch": 0.7625038963983111, "grad_norm": 0.0, - "learning_rate": 2.7915516212381554e-06, - "loss": 0.8692, + "learning_rate": 2.815320836276455e-06, + "loss": 0.8327, "step": 26908 }, { - "epoch": 0.7635925085130534, + "epoch": 0.7625322338405736, "grad_norm": 0.0, - "learning_rate": 2.7909146419172184e-06, - "loss": 0.7933, + "learning_rate": 2.814682488943836e-06, + "loss": 0.7852, "step": 26909 }, { - "epoch": 0.7636208853575482, + "epoch": 0.762560571282836, "grad_norm": 0.0, - "learning_rate": 2.79027772349175e-06, - "loss": 0.8393, + "learning_rate": 2.8140442021358185e-06, + "loss": 0.8958, "step": 26910 }, { - "epoch": 0.7636492622020431, + "epoch": 0.7625889087250984, "grad_norm": 0.0, - "learning_rate": 2.7896408659671327e-06, - "loss": 0.7851, + "learning_rate": 2.8134059758577714e-06, + "loss": 0.8584, "step": 26911 }, { - "epoch": 0.7636776390465381, + "epoch": 0.7626172461673609, "grad_norm": 0.0, - "learning_rate": 2.789004069348741e-06, - "loss": 0.7219, + "learning_rate": 2.8127678101150744e-06, + "loss": 0.8098, "step": 26912 }, { - "epoch": 0.7637060158910329, + "epoch": 0.7626455836096234, "grad_norm": 0.0, - "learning_rate": 2.7883673336419547e-06, - "loss": 0.9103, + "learning_rate": 2.8121297049131057e-06, + "loss": 0.7666, "step": 26913 }, { - "epoch": 0.7637343927355278, + "epoch": 0.7626739210518858, "grad_norm": 0.0, - "learning_rate": 2.7877306588521567e-06, - "loss": 0.8396, + "learning_rate": 2.811491660257235e-06, + "loss": 0.7876, "step": 26914 }, { - "epoch": 0.7637627695800226, + "epoch": 0.7627022584941483, "grad_norm": 0.0, - "learning_rate": 2.7870940449847194e-06, - "loss": 0.7073, + "learning_rate": 2.8108536761528426e-06, + "loss": 0.7601, "step": 26915 }, { - "epoch": 0.7637911464245176, + "epoch": 0.7627305959364108, "grad_norm": 0.0, - "learning_rate": 2.786457492045024e-06, - "loss": 0.851, + "learning_rate": 2.8102157526052963e-06, + "loss": 0.8619, "step": 26916 }, { - "epoch": 0.7638195232690125, + "epoch": 0.7627589333786733, "grad_norm": 0.0, - "learning_rate": 2.7858210000384443e-06, - "loss": 0.9055, + "learning_rate": 2.809577889619972e-06, + "loss": 0.7665, "step": 26917 }, { - "epoch": 0.7638479001135073, + "epoch": 0.7627872708209357, "grad_norm": 0.0, - "learning_rate": 2.7851845689703605e-06, - "loss": 0.8585, + "learning_rate": 2.8089400872022475e-06, + "loss": 0.8813, "step": 26918 }, { - "epoch": 0.7638762769580023, + "epoch": 0.7628156082631982, "grad_norm": 0.0, - "learning_rate": 2.78454819884615e-06, - "loss": 0.7521, + "learning_rate": 2.8083023453574867e-06, + "loss": 0.8743, "step": 26919 }, { - "epoch": 0.7639046538024972, + "epoch": 0.7628439457054607, "grad_norm": 0.0, - "learning_rate": 2.7839118896711813e-06, - "loss": 0.8349, + "learning_rate": 2.8076646640910666e-06, + "loss": 0.8001, "step": 26920 }, { - "epoch": 0.763933030646992, + "epoch": 0.762872283147723, "grad_norm": 0.0, - "learning_rate": 2.7832756414508343e-06, - "loss": 0.8244, + "learning_rate": 2.807027043408358e-06, + "loss": 0.8083, "step": 26921 }, { - "epoch": 0.763961407491487, + "epoch": 0.7629006205899855, "grad_norm": 0.0, - "learning_rate": 2.7826394541904846e-06, - "loss": 0.8127, + "learning_rate": 2.8063894833147308e-06, + "loss": 0.8499, "step": 26922 }, { - "epoch": 0.7639897843359819, + "epoch": 0.762928958032248, "grad_norm": 0.0, - "learning_rate": 2.7820033278955016e-06, - "loss": 0.8339, + "learning_rate": 2.80575198381556e-06, + "loss": 0.7451, "step": 26923 }, { - "epoch": 0.7640181611804767, + "epoch": 0.7629572954745105, "grad_norm": 0.0, - "learning_rate": 2.781367262571261e-06, - "loss": 0.8023, + "learning_rate": 2.8051145449162075e-06, + "loss": 0.7696, "step": 26924 }, { - "epoch": 0.7640465380249716, + "epoch": 0.7629856329167729, "grad_norm": 0.0, - "learning_rate": 2.7807312582231373e-06, - "loss": 0.764, + "learning_rate": 2.8044771666220483e-06, + "loss": 0.7453, "step": 26925 }, { - "epoch": 0.7640749148694665, + "epoch": 0.7630139703590354, "grad_norm": 0.0, - "learning_rate": 2.780095314856499e-06, - "loss": 0.92, + "learning_rate": 2.8038398489384522e-06, + "loss": 0.7651, "step": 26926 }, { - "epoch": 0.7641032917139614, + "epoch": 0.7630423078012979, "grad_norm": 0.0, - "learning_rate": 2.77945943247672e-06, - "loss": 0.7685, + "learning_rate": 2.8032025918707828e-06, + "loss": 0.7592, "step": 26927 }, { - "epoch": 0.7641316685584563, + "epoch": 0.7630706452435603, "grad_norm": 0.0, - "learning_rate": 2.7788236110891754e-06, - "loss": 0.8506, + "learning_rate": 2.8025653954244135e-06, + "loss": 0.8349, "step": 26928 }, { - "epoch": 0.7641600454029512, + "epoch": 0.7630989826858228, "grad_norm": 0.0, - "learning_rate": 2.7781878506992288e-06, - "loss": 0.6986, + "learning_rate": 2.801928259604705e-06, + "loss": 0.7732, "step": 26929 }, { - "epoch": 0.7641884222474461, + "epoch": 0.7631273201280853, "grad_norm": 0.0, - "learning_rate": 2.7775521513122537e-06, - "loss": 0.7609, + "learning_rate": 2.8012911844170277e-06, + "loss": 0.8064, "step": 26930 }, { - "epoch": 0.764216799091941, + "epoch": 0.7631556575703476, "grad_norm": 0.0, - "learning_rate": 2.776916512933624e-06, - "loss": 0.8518, + "learning_rate": 2.8006541698667512e-06, + "loss": 0.7159, "step": 26931 }, { - "epoch": 0.7642451759364358, + "epoch": 0.7631839950126101, "grad_norm": 0.0, - "learning_rate": 2.7762809355687013e-06, - "loss": 0.8583, + "learning_rate": 2.8000172159592353e-06, + "loss": 0.8452, "step": 26932 }, { - "epoch": 0.7642735527809308, + "epoch": 0.7632123324548726, "grad_norm": 0.0, - "learning_rate": 2.7756454192228597e-06, - "loss": 0.8114, + "learning_rate": 2.7993803226998485e-06, + "loss": 0.7677, "step": 26933 }, { - "epoch": 0.7643019296254256, + "epoch": 0.7632406698971351, "grad_norm": 0.0, - "learning_rate": 2.775009963901465e-06, - "loss": 0.7521, + "learning_rate": 2.7987434900939537e-06, + "loss": 0.8142, "step": 26934 }, { - "epoch": 0.7643303064699205, + "epoch": 0.7632690073393975, "grad_norm": 0.0, - "learning_rate": 2.7743745696098858e-06, - "loss": 0.7612, + "learning_rate": 2.798106718146918e-06, + "loss": 0.7989, "step": 26935 }, { - "epoch": 0.7643586833144155, + "epoch": 0.76329734478166, "grad_norm": 0.0, - "learning_rate": 2.7737392363534934e-06, - "loss": 0.8027, + "learning_rate": 2.797470006864106e-06, + "loss": 0.8725, "step": 26936 }, { - "epoch": 0.7643870601589103, + "epoch": 0.7633256822239225, "grad_norm": 0.0, - "learning_rate": 2.773103964137647e-06, - "loss": 0.9995, + "learning_rate": 2.7968333562508754e-06, + "loss": 0.7712, "step": 26937 }, { - "epoch": 0.7644154370034052, + "epoch": 0.7633540196661849, "grad_norm": 0.0, - "learning_rate": 2.772468752967715e-06, - "loss": 0.8619, + "learning_rate": 2.7961967663125924e-06, + "loss": 0.801, "step": 26938 }, { - "epoch": 0.7644438138479002, + "epoch": 0.7633823571084474, "grad_norm": 0.0, - "learning_rate": 2.771833602849069e-06, - "loss": 0.7278, + "learning_rate": 2.795560237054623e-06, + "loss": 0.6979, "step": 26939 }, { - "epoch": 0.764472190692395, + "epoch": 0.7634106945507099, "grad_norm": 0.0, - "learning_rate": 2.771198513787066e-06, - "loss": 0.938, + "learning_rate": 2.7949237684823217e-06, + "loss": 0.8677, "step": 26940 }, { - "epoch": 0.7645005675368899, + "epoch": 0.7634390319929724, "grad_norm": 0.0, - "learning_rate": 2.7705634857870747e-06, - "loss": 0.7708, + "learning_rate": 2.7942873606010524e-06, + "loss": 0.7477, "step": 26941 }, { - "epoch": 0.7645289443813847, + "epoch": 0.7634673694352347, "grad_norm": 0.0, - "learning_rate": 2.76992851885446e-06, - "loss": 0.8241, + "learning_rate": 2.79365101341618e-06, + "loss": 0.8114, "step": 26942 }, { - "epoch": 0.7645573212258797, + "epoch": 0.7634957068774972, "grad_norm": 0.0, - "learning_rate": 2.7692936129945823e-06, - "loss": 0.8193, + "learning_rate": 2.7930147269330577e-06, + "loss": 0.869, "step": 26943 }, { - "epoch": 0.7645856980703746, + "epoch": 0.7635240443197597, "grad_norm": 0.0, - "learning_rate": 2.7686587682128062e-06, - "loss": 0.785, + "learning_rate": 2.7923785011570513e-06, + "loss": 0.7404, "step": 26944 }, { - "epoch": 0.7646140749148694, + "epoch": 0.7635523817620221, "grad_norm": 0.0, - "learning_rate": 2.7680239845144986e-06, - "loss": 0.8586, + "learning_rate": 2.7917423360935147e-06, + "loss": 0.7714, "step": 26945 }, { - "epoch": 0.7646424517593644, + "epoch": 0.7635807192042846, "grad_norm": 0.0, - "learning_rate": 2.7673892619050135e-06, - "loss": 0.8009, + "learning_rate": 2.79110623174781e-06, + "loss": 0.9005, "step": 26946 }, { - "epoch": 0.7646708286038593, + "epoch": 0.7636090566465471, "grad_norm": 0.0, - "learning_rate": 2.766754600389716e-06, - "loss": 0.7226, + "learning_rate": 2.7904701881252936e-06, + "loss": 0.9427, "step": 26947 }, { - "epoch": 0.7646992054483541, + "epoch": 0.7636373940888096, "grad_norm": 0.0, - "learning_rate": 2.7661199999739683e-06, - "loss": 0.7578, + "learning_rate": 2.7898342052313233e-06, + "loss": 0.7796, "step": 26948 }, { - "epoch": 0.764727582292849, + "epoch": 0.763665731531072, "grad_norm": 0.0, - "learning_rate": 2.76548546066313e-06, - "loss": 0.8104, + "learning_rate": 2.7891982830712614e-06, + "loss": 0.7212, "step": 26949 }, { - "epoch": 0.764755959137344, + "epoch": 0.7636940689733345, "grad_norm": 0.0, - "learning_rate": 2.7648509824625603e-06, - "loss": 0.8316, + "learning_rate": 2.788562421650456e-06, + "loss": 0.8625, "step": 26950 }, { - "epoch": 0.7647843359818388, + "epoch": 0.763722406415597, "grad_norm": 0.0, - "learning_rate": 2.7642165653776242e-06, - "loss": 0.7984, + "learning_rate": 2.787926620974267e-06, + "loss": 0.8551, "step": 26951 }, { - "epoch": 0.7648127128263337, + "epoch": 0.7637507438578593, "grad_norm": 0.0, - "learning_rate": 2.763582209413672e-06, - "loss": 0.8308, + "learning_rate": 2.787290881048055e-06, + "loss": 0.8, "step": 26952 }, { - "epoch": 0.7648410896708286, + "epoch": 0.7637790813001218, "grad_norm": 0.0, - "learning_rate": 2.7629479145760694e-06, - "loss": 0.9337, + "learning_rate": 2.7866552018771652e-06, + "loss": 0.827, "step": 26953 }, { - "epoch": 0.7648694665153235, + "epoch": 0.7638074187423843, "grad_norm": 0.0, - "learning_rate": 2.762313680870168e-06, - "loss": 0.881, + "learning_rate": 2.786019583466958e-06, + "loss": 0.8557, "step": 26954 }, { - "epoch": 0.7648978433598184, + "epoch": 0.7638357561846467, "grad_norm": 0.0, - "learning_rate": 2.761679508301328e-06, - "loss": 0.7945, + "learning_rate": 2.7853840258227905e-06, + "loss": 0.7764, "step": 26955 }, { - "epoch": 0.7649262202043133, + "epoch": 0.7638640936269092, "grad_norm": 0.0, - "learning_rate": 2.7610453968749108e-06, - "loss": 0.8098, + "learning_rate": 2.7847485289500085e-06, + "loss": 0.7391, "step": 26956 }, { - "epoch": 0.7649545970488082, + "epoch": 0.7638924310691717, "grad_norm": 0.0, - "learning_rate": 2.7604113465962643e-06, - "loss": 0.6985, + "learning_rate": 2.7841130928539716e-06, + "loss": 0.7411, "step": 26957 }, { - "epoch": 0.764982973893303, + "epoch": 0.7639207685114342, "grad_norm": 0.0, - "learning_rate": 2.759777357470749e-06, - "loss": 0.7807, + "learning_rate": 2.783477717540027e-06, + "loss": 0.8417, "step": 26958 }, { - "epoch": 0.7650113507377979, + "epoch": 0.7639491059536966, "grad_norm": 0.0, - "learning_rate": 2.7591434295037236e-06, - "loss": 0.8499, + "learning_rate": 2.7828424030135305e-06, + "loss": 0.9581, "step": 26959 }, { - "epoch": 0.7650397275822929, + "epoch": 0.7639774433959591, "grad_norm": 0.0, - "learning_rate": 2.7585095627005353e-06, - "loss": 0.8213, + "learning_rate": 2.7822071492798307e-06, + "loss": 0.8283, "step": 26960 }, { - "epoch": 0.7650681044267877, + "epoch": 0.7640057808382216, "grad_norm": 0.0, - "learning_rate": 2.7578757570665416e-06, - "loss": 0.8395, + "learning_rate": 2.781571956344282e-06, + "loss": 0.7682, "step": 26961 }, { - "epoch": 0.7650964812712826, + "epoch": 0.764034118280484, "grad_norm": 0.0, - "learning_rate": 2.757242012607101e-06, - "loss": 0.8022, + "learning_rate": 2.7809368242122327e-06, + "loss": 0.8165, "step": 26962 }, { - "epoch": 0.7651248581157776, + "epoch": 0.7640624557227464, "grad_norm": 0.0, - "learning_rate": 2.756608329327557e-06, - "loss": 0.8668, + "learning_rate": 2.780301752889035e-06, + "loss": 0.8147, "step": 26963 }, { - "epoch": 0.7651532349602724, + "epoch": 0.7640907931650089, "grad_norm": 0.0, - "learning_rate": 2.7559747072332697e-06, - "loss": 0.7721, + "learning_rate": 2.779666742380035e-06, + "loss": 0.8795, "step": 26964 }, { - "epoch": 0.7651816118047673, + "epoch": 0.7641191306072714, "grad_norm": 0.0, - "learning_rate": 2.755341146329594e-06, - "loss": 0.8406, + "learning_rate": 2.7790317926905865e-06, + "loss": 0.7748, "step": 26965 }, { - "epoch": 0.7652099886492622, + "epoch": 0.7641474680495338, "grad_norm": 0.0, - "learning_rate": 2.7547076466218734e-06, - "loss": 0.8838, + "learning_rate": 2.7783969038260306e-06, + "loss": 0.8454, "step": 26966 }, { - "epoch": 0.7652383654937571, + "epoch": 0.7641758054917963, "grad_norm": 0.0, - "learning_rate": 2.7540742081154638e-06, - "loss": 0.7775, + "learning_rate": 2.77776207579172e-06, + "loss": 0.8919, "step": 26967 }, { - "epoch": 0.765266742338252, + "epoch": 0.7642041429340588, "grad_norm": 0.0, - "learning_rate": 2.753440830815718e-06, - "loss": 0.8571, + "learning_rate": 2.777127308593004e-06, + "loss": 0.7395, "step": 26968 }, { - "epoch": 0.7652951191827468, + "epoch": 0.7642324803763212, "grad_norm": 0.0, - "learning_rate": 2.75280751472798e-06, - "loss": 0.7406, + "learning_rate": 2.7764926022352232e-06, + "loss": 0.8613, "step": 26969 }, { - "epoch": 0.7653234960272418, + "epoch": 0.7642608178185837, "grad_norm": 0.0, - "learning_rate": 2.752174259857602e-06, - "loss": 0.819, + "learning_rate": 2.7758579567237286e-06, + "loss": 0.9025, "step": 26970 }, { - "epoch": 0.7653518728717367, + "epoch": 0.7642891552608462, "grad_norm": 0.0, - "learning_rate": 2.7515410662099375e-06, - "loss": 0.7959, + "learning_rate": 2.7752233720638678e-06, + "loss": 0.8205, "step": 26971 }, { - "epoch": 0.7653802497162315, + "epoch": 0.7643174927031087, "grad_norm": 0.0, - "learning_rate": 2.750907933790329e-06, - "loss": 0.8415, + "learning_rate": 2.7745888482609796e-06, + "loss": 0.7346, "step": 26972 }, { - "epoch": 0.7654086265607265, + "epoch": 0.764345830145371, "grad_norm": 0.0, - "learning_rate": 2.7502748626041266e-06, - "loss": 0.8074, + "learning_rate": 2.773954385320413e-06, + "loss": 0.7494, "step": 26973 }, { - "epoch": 0.7654370034052214, + "epoch": 0.7643741675876335, "grad_norm": 0.0, - "learning_rate": 2.74964185265668e-06, - "loss": 0.8496, + "learning_rate": 2.773319983247513e-06, + "loss": 0.6812, "step": 26974 }, { - "epoch": 0.7654653802497162, + "epoch": 0.764402505029896, "grad_norm": 0.0, - "learning_rate": 2.749008903953333e-06, - "loss": 0.7859, + "learning_rate": 2.772685642047621e-06, + "loss": 0.7317, "step": 26975 }, { - "epoch": 0.7654937570942111, + "epoch": 0.7644308424721584, "grad_norm": 0.0, - "learning_rate": 2.7483760164994344e-06, - "loss": 0.8239, + "learning_rate": 2.7720513617260857e-06, + "loss": 0.7711, "step": 26976 }, { - "epoch": 0.765522133938706, + "epoch": 0.7644591799144209, "grad_norm": 0.0, - "learning_rate": 2.7477431903003314e-06, - "loss": 0.844, + "learning_rate": 2.771417142288242e-06, + "loss": 0.8067, "step": 26977 }, { - "epoch": 0.7655505107832009, + "epoch": 0.7644875173566834, "grad_norm": 0.0, - "learning_rate": 2.7471104253613645e-06, - "loss": 0.8224, + "learning_rate": 2.7707829837394394e-06, + "loss": 0.8474, "step": 26978 }, { - "epoch": 0.7655788876276958, + "epoch": 0.7645158547989458, "grad_norm": 0.0, - "learning_rate": 2.7464777216878825e-06, - "loss": 0.8941, + "learning_rate": 2.7701488860850134e-06, + "loss": 0.8189, "step": 26979 }, { - "epoch": 0.7656072644721907, + "epoch": 0.7645441922412083, "grad_norm": 0.0, - "learning_rate": 2.7458450792852296e-06, - "loss": 0.8168, + "learning_rate": 2.769514849330308e-06, + "loss": 0.8309, "step": 26980 }, { - "epoch": 0.7656356413166856, + "epoch": 0.7645725296834708, "grad_norm": 0.0, - "learning_rate": 2.745212498158749e-06, - "loss": 0.8879, + "learning_rate": 2.768880873480666e-06, + "loss": 0.8073, "step": 26981 }, { - "epoch": 0.7656640181611805, + "epoch": 0.7646008671257333, "grad_norm": 0.0, - "learning_rate": 2.744579978313787e-06, - "loss": 0.8229, + "learning_rate": 2.768246958541424e-06, + "loss": 0.72, "step": 26982 }, { - "epoch": 0.7656923950056753, + "epoch": 0.7646292045679957, "grad_norm": 0.0, - "learning_rate": 2.743947519755682e-06, - "loss": 0.7864, + "learning_rate": 2.767613104517922e-06, + "loss": 0.8513, "step": 26983 }, { - "epoch": 0.7657207718501703, + "epoch": 0.7646575420102582, "grad_norm": 0.0, - "learning_rate": 2.7433151224897782e-06, - "loss": 0.9086, + "learning_rate": 2.766979311415505e-06, + "loss": 0.8147, "step": 26984 }, { - "epoch": 0.7657491486946651, + "epoch": 0.7646858794525206, "grad_norm": 0.0, - "learning_rate": 2.7426827865214212e-06, - "loss": 0.7885, + "learning_rate": 2.766345579239503e-06, + "loss": 0.9265, "step": 26985 }, { - "epoch": 0.76577752553916, + "epoch": 0.764714216894783, "grad_norm": 0.0, - "learning_rate": 2.7420505118559447e-06, - "loss": 0.8361, + "learning_rate": 2.7657119079952588e-06, + "loss": 0.6989, "step": 26986 }, { - "epoch": 0.765805902383655, + "epoch": 0.7647425543370455, "grad_norm": 0.0, - "learning_rate": 2.7414182984986947e-06, - "loss": 0.8351, + "learning_rate": 2.7650782976881096e-06, + "loss": 0.8923, "step": 26987 }, { - "epoch": 0.7658342792281498, + "epoch": 0.764770891779308, "grad_norm": 0.0, - "learning_rate": 2.7407861464550134e-06, - "loss": 0.7031, + "learning_rate": 2.764444748323393e-06, + "loss": 0.7433, "step": 26988 }, { - "epoch": 0.7658626560726447, + "epoch": 0.7647992292215705, "grad_norm": 0.0, - "learning_rate": 2.7401540557302355e-06, - "loss": 0.7607, + "learning_rate": 2.763811259906447e-06, + "loss": 0.7883, "step": 26989 }, { - "epoch": 0.7658910329171397, + "epoch": 0.7648275666638329, "grad_norm": 0.0, - "learning_rate": 2.739522026329702e-06, - "loss": 0.7565, + "learning_rate": 2.763177832442603e-06, + "loss": 0.8686, "step": 26990 }, { - "epoch": 0.7659194097616345, + "epoch": 0.7648559041060954, "grad_norm": 0.0, - "learning_rate": 2.7388900582587553e-06, - "loss": 0.851, + "learning_rate": 2.7625444659372e-06, + "loss": 0.7798, "step": 26991 }, { - "epoch": 0.7659477866061294, + "epoch": 0.7648842415483579, "grad_norm": 0.0, - "learning_rate": 2.738258151522727e-06, - "loss": 0.7998, + "learning_rate": 2.7619111603955763e-06, + "loss": 0.8181, "step": 26992 }, { - "epoch": 0.7659761634506242, + "epoch": 0.7649125789906203, "grad_norm": 0.0, - "learning_rate": 2.7376263061269594e-06, - "loss": 0.846, + "learning_rate": 2.7612779158230583e-06, + "loss": 0.7403, "step": 26993 }, { - "epoch": 0.7660045402951192, + "epoch": 0.7649409164328828, "grad_norm": 0.0, - "learning_rate": 2.736994522076789e-06, - "loss": 0.8281, + "learning_rate": 2.7606447322249876e-06, + "loss": 0.8081, "step": 26994 }, { - "epoch": 0.7660329171396141, + "epoch": 0.7649692538751452, "grad_norm": 0.0, - "learning_rate": 2.7363627993775522e-06, - "loss": 0.7607, + "learning_rate": 2.760011609606692e-06, + "loss": 0.896, "step": 26995 }, { - "epoch": 0.7660612939841089, + "epoch": 0.7649975913174077, "grad_norm": 0.0, - "learning_rate": 2.7357311380345873e-06, - "loss": 0.8834, + "learning_rate": 2.759378547973507e-06, + "loss": 0.8057, "step": 26996 }, { - "epoch": 0.7660896708286039, + "epoch": 0.7650259287596701, "grad_norm": 0.0, - "learning_rate": 2.7350995380532264e-06, - "loss": 0.7576, + "learning_rate": 2.758745547330769e-06, + "loss": 0.8546, "step": 26997 }, { - "epoch": 0.7661180476730988, + "epoch": 0.7650542662019326, "grad_norm": 0.0, - "learning_rate": 2.7344679994388057e-06, - "loss": 0.8608, + "learning_rate": 2.7581126076838017e-06, + "loss": 0.8987, "step": 26998 }, { - "epoch": 0.7661464245175936, + "epoch": 0.7650826036441951, "grad_norm": 0.0, - "learning_rate": 2.7338365221966634e-06, - "loss": 0.8036, + "learning_rate": 2.757479729037942e-06, + "loss": 0.8495, "step": 26999 }, { - "epoch": 0.7661748013620885, + "epoch": 0.7651109410864575, "grad_norm": 0.0, - "learning_rate": 2.7332051063321284e-06, - "loss": 0.7675, + "learning_rate": 2.7568469113985197e-06, + "loss": 0.7148, "step": 27000 }, { - "epoch": 0.7662031782065835, + "epoch": 0.76513927852872, "grad_norm": 0.0, - "learning_rate": 2.732573751850536e-06, - "loss": 0.806, + "learning_rate": 2.7562141547708663e-06, + "loss": 0.8463, "step": 27001 }, { - "epoch": 0.7662315550510783, + "epoch": 0.7651676159709825, "grad_norm": 0.0, - "learning_rate": 2.731942458757223e-06, - "loss": 0.8765, + "learning_rate": 2.755581459160314e-06, + "loss": 0.7814, "step": 27002 }, { - "epoch": 0.7662599318955732, + "epoch": 0.7651959534132449, "grad_norm": 0.0, - "learning_rate": 2.731311227057515e-06, - "loss": 0.8396, + "learning_rate": 2.7549488245721845e-06, + "loss": 0.8639, "step": 27003 }, { - "epoch": 0.7662883087400681, + "epoch": 0.7652242908555074, "grad_norm": 0.0, - "learning_rate": 2.730680056756748e-06, - "loss": 0.8286, + "learning_rate": 2.7543162510118125e-06, + "loss": 0.9053, "step": 27004 }, { - "epoch": 0.766316685584563, + "epoch": 0.7652526282977699, "grad_norm": 0.0, - "learning_rate": 2.7300489478602567e-06, - "loss": 0.8415, + "learning_rate": 2.7536837384845296e-06, + "loss": 0.8438, "step": 27005 }, { - "epoch": 0.7663450624290579, + "epoch": 0.7652809657400323, "grad_norm": 0.0, - "learning_rate": 2.7294179003733656e-06, - "loss": 0.7499, + "learning_rate": 2.753051286995655e-06, + "loss": 0.8302, "step": 27006 }, { - "epoch": 0.7663734392735527, + "epoch": 0.7653093031822947, "grad_norm": 0.0, - "learning_rate": 2.728786914301409e-06, - "loss": 0.8442, + "learning_rate": 2.752418896550524e-06, + "loss": 0.7483, "step": 27007 }, { - "epoch": 0.7664018161180477, + "epoch": 0.7653376406245572, "grad_norm": 0.0, - "learning_rate": 2.728155989649719e-06, - "loss": 0.7564, + "learning_rate": 2.751786567154456e-06, + "loss": 0.6518, "step": 27008 }, { - "epoch": 0.7664301929625426, + "epoch": 0.7653659780668197, "grad_norm": 0.0, - "learning_rate": 2.727525126423618e-06, - "loss": 0.8572, + "learning_rate": 2.7511542988127815e-06, + "loss": 0.7654, "step": 27009 }, { - "epoch": 0.7664585698070374, + "epoch": 0.7653943155090821, "grad_norm": 0.0, - "learning_rate": 2.7268943246284407e-06, - "loss": 0.8332, + "learning_rate": 2.7505220915308304e-06, + "loss": 0.8078, "step": 27010 }, { - "epoch": 0.7664869466515324, + "epoch": 0.7654226529513446, "grad_norm": 0.0, - "learning_rate": 2.726263584269513e-06, - "loss": 0.9026, + "learning_rate": 2.7498899453139193e-06, + "loss": 0.8265, "step": 27011 }, { - "epoch": 0.7665153234960272, + "epoch": 0.7654509903936071, "grad_norm": 0.0, - "learning_rate": 2.7256329053521646e-06, - "loss": 0.793, + "learning_rate": 2.7492578601673793e-06, + "loss": 0.8463, "step": 27012 }, { - "epoch": 0.7665437003405221, + "epoch": 0.7654793278358696, "grad_norm": 0.0, - "learning_rate": 2.725002287881724e-06, - "loss": 0.7629, + "learning_rate": 2.748625836096531e-06, + "loss": 0.8338, "step": 27013 }, { - "epoch": 0.7665720771850171, + "epoch": 0.765507665278132, "grad_norm": 0.0, - "learning_rate": 2.7243717318635143e-06, - "loss": 0.7748, + "learning_rate": 2.7479938731067e-06, + "loss": 0.8434, "step": 27014 }, { - "epoch": 0.7666004540295119, + "epoch": 0.7655360027203945, "grad_norm": 0.0, - "learning_rate": 2.723741237302863e-06, - "loss": 0.8246, + "learning_rate": 2.747361971203214e-06, + "loss": 0.8719, "step": 27015 }, { - "epoch": 0.7666288308740068, + "epoch": 0.765564340162657, "grad_norm": 0.0, - "learning_rate": 2.723110804205099e-06, - "loss": 0.7797, + "learning_rate": 2.7467301303913874e-06, + "loss": 0.7602, "step": 27016 }, { - "epoch": 0.7666572077185017, + "epoch": 0.7655926776049193, "grad_norm": 0.0, - "learning_rate": 2.7224804325755427e-06, - "loss": 0.8037, + "learning_rate": 2.7460983506765472e-06, + "loss": 0.7467, "step": 27017 }, { - "epoch": 0.7666855845629966, + "epoch": 0.7656210150471818, "grad_norm": 0.0, - "learning_rate": 2.7218501224195217e-06, - "loss": 0.6979, + "learning_rate": 2.7454666320640165e-06, + "loss": 0.7846, "step": 27018 }, { - "epoch": 0.7667139614074915, + "epoch": 0.7656493524894443, "grad_norm": 0.0, - "learning_rate": 2.7212198737423624e-06, - "loss": 0.8588, + "learning_rate": 2.7448349745591108e-06, + "loss": 0.8039, "step": 27019 }, { - "epoch": 0.7667423382519863, + "epoch": 0.7656776899317068, "grad_norm": 0.0, - "learning_rate": 2.720589686549383e-06, - "loss": 0.8245, + "learning_rate": 2.7442033781671553e-06, + "loss": 0.8383, "step": 27020 }, { - "epoch": 0.7667707150964813, + "epoch": 0.7657060273739692, "grad_norm": 0.0, - "learning_rate": 2.7199595608459107e-06, - "loss": 0.7872, + "learning_rate": 2.743571842893471e-06, + "loss": 0.8502, "step": 27021 }, { - "epoch": 0.7667990919409762, + "epoch": 0.7657343648162317, "grad_norm": 0.0, - "learning_rate": 2.7193294966372697e-06, - "loss": 0.8735, + "learning_rate": 2.742940368743373e-06, + "loss": 0.7192, "step": 27022 }, { - "epoch": 0.766827468785471, + "epoch": 0.7657627022584942, "grad_norm": 0.0, - "learning_rate": 2.718699493928776e-06, - "loss": 0.7957, + "learning_rate": 2.742308955722187e-06, + "loss": 0.7508, "step": 27023 }, { - "epoch": 0.7668558456299659, + "epoch": 0.7657910397007566, "grad_norm": 0.0, - "learning_rate": 2.718069552725756e-06, - "loss": 0.9076, + "learning_rate": 2.7416776038352246e-06, + "loss": 0.7594, "step": 27024 }, { - "epoch": 0.7668842224744609, + "epoch": 0.7658193771430191, "grad_norm": 0.0, - "learning_rate": 2.7174396730335296e-06, - "loss": 0.8037, + "learning_rate": 2.7410463130878063e-06, + "loss": 0.7471, "step": 27025 }, { - "epoch": 0.7669125993189557, + "epoch": 0.7658477145852816, "grad_norm": 0.0, - "learning_rate": 2.7168098548574175e-06, - "loss": 0.8169, + "learning_rate": 2.7404150834852506e-06, + "loss": 0.833, "step": 27026 }, { - "epoch": 0.7669409761634506, + "epoch": 0.7658760520275439, "grad_norm": 0.0, - "learning_rate": 2.716180098202742e-06, - "loss": 0.7763, + "learning_rate": 2.7397839150328744e-06, + "loss": 0.831, "step": 27027 }, { - "epoch": 0.7669693530079456, + "epoch": 0.7659043894698064, "grad_norm": 0.0, - "learning_rate": 2.7155504030748193e-06, - "loss": 0.7596, + "learning_rate": 2.7391528077359975e-06, + "loss": 0.8495, "step": 27028 }, { - "epoch": 0.7669977298524404, + "epoch": 0.7659327269120689, "grad_norm": 0.0, - "learning_rate": 2.714920769478969e-06, - "loss": 0.7406, + "learning_rate": 2.7385217615999303e-06, + "loss": 0.8195, "step": 27029 }, { - "epoch": 0.7670261066969353, + "epoch": 0.7659610643543314, "grad_norm": 0.0, - "learning_rate": 2.7142911974205135e-06, - "loss": 0.9037, + "learning_rate": 2.73789077662999e-06, + "loss": 0.7941, "step": 27030 }, { - "epoch": 0.7670544835414302, + "epoch": 0.7659894017965938, "grad_norm": 0.0, - "learning_rate": 2.713661686904765e-06, - "loss": 0.7309, + "learning_rate": 2.7372598528314955e-06, + "loss": 0.9313, "step": 27031 }, { - "epoch": 0.7670828603859251, + "epoch": 0.7660177392388563, "grad_norm": 0.0, - "learning_rate": 2.7130322379370434e-06, - "loss": 0.8065, + "learning_rate": 2.7366289902097555e-06, + "loss": 0.7934, "step": 27032 }, { - "epoch": 0.76711123723042, + "epoch": 0.7660460766811188, "grad_norm": 0.0, - "learning_rate": 2.7124028505226685e-06, - "loss": 0.7455, + "learning_rate": 2.735998188770087e-06, + "loss": 0.874, "step": 27033 }, { - "epoch": 0.7671396140749148, + "epoch": 0.7660744141233812, "grad_norm": 0.0, - "learning_rate": 2.7117735246669517e-06, - "loss": 0.8083, + "learning_rate": 2.735367448517805e-06, + "loss": 0.7773, "step": 27034 }, { - "epoch": 0.7671679909194098, + "epoch": 0.7661027515656437, "grad_norm": 0.0, - "learning_rate": 2.7111442603752125e-06, - "loss": 0.7621, + "learning_rate": 2.7347367694582183e-06, + "loss": 0.931, "step": 27035 }, { - "epoch": 0.7671963677639047, + "epoch": 0.7661310890079062, "grad_norm": 0.0, - "learning_rate": 2.7105150576527672e-06, - "loss": 0.8235, + "learning_rate": 2.734106151596645e-06, + "loss": 0.8207, "step": 27036 }, { - "epoch": 0.7672247446083995, + "epoch": 0.7661594264501687, "grad_norm": 0.0, - "learning_rate": 2.709885916504927e-06, - "loss": 0.7789, + "learning_rate": 2.7334755949383905e-06, + "loss": 0.9189, "step": 27037 }, { - "epoch": 0.7672531214528945, + "epoch": 0.766187763892431, "grad_norm": 0.0, - "learning_rate": 2.7092568369370075e-06, - "loss": 0.8421, + "learning_rate": 2.732845099488769e-06, + "loss": 0.865, "step": 27038 }, { - "epoch": 0.7672814982973893, + "epoch": 0.7662161013346935, "grad_norm": 0.0, - "learning_rate": 2.7086278189543267e-06, - "loss": 0.8564, + "learning_rate": 2.732214665253092e-06, + "loss": 0.8018, "step": 27039 }, { - "epoch": 0.7673098751418842, + "epoch": 0.766244438776956, "grad_norm": 0.0, - "learning_rate": 2.7079988625621877e-06, - "loss": 0.7464, + "learning_rate": 2.7315842922366708e-06, + "loss": 0.8741, "step": 27040 }, { - "epoch": 0.7673382519863791, + "epoch": 0.7662727762192184, "grad_norm": 0.0, - "learning_rate": 2.707369967765917e-06, - "loss": 0.8763, + "learning_rate": 2.7309539804448127e-06, + "loss": 0.772, "step": 27041 }, { - "epoch": 0.767366628830874, + "epoch": 0.7663011136614809, "grad_norm": 0.0, - "learning_rate": 2.706741134570816e-06, - "loss": 0.8447, + "learning_rate": 2.7303237298828323e-06, + "loss": 0.8656, "step": 27042 }, { - "epoch": 0.7673950056753689, + "epoch": 0.7663294511037434, "grad_norm": 0.0, - "learning_rate": 2.7061123629822016e-06, - "loss": 0.8309, + "learning_rate": 2.729693540556032e-06, + "loss": 0.874, "step": 27043 }, { - "epoch": 0.7674233825198638, + "epoch": 0.7663577885460059, "grad_norm": 0.0, - "learning_rate": 2.7054836530053864e-06, - "loss": 0.7458, + "learning_rate": 2.7290634124697248e-06, + "loss": 0.7304, "step": 27044 }, { - "epoch": 0.7674517593643587, + "epoch": 0.7663861259882683, "grad_norm": 0.0, - "learning_rate": 2.704855004645676e-06, - "loss": 0.8937, + "learning_rate": 2.7284333456292135e-06, + "loss": 0.8883, "step": 27045 }, { - "epoch": 0.7674801362088536, + "epoch": 0.7664144634305308, "grad_norm": 0.0, - "learning_rate": 2.7042264179083834e-06, - "loss": 0.7813, + "learning_rate": 2.7278033400398095e-06, + "loss": 0.7863, "step": 27046 }, { - "epoch": 0.7675085130533484, + "epoch": 0.7664428008727933, "grad_norm": 0.0, - "learning_rate": 2.7035978927988205e-06, - "loss": 0.8589, + "learning_rate": 2.7271733957068203e-06, + "loss": 0.8157, "step": 27047 }, { - "epoch": 0.7675368898978434, + "epoch": 0.7664711383150556, "grad_norm": 0.0, - "learning_rate": 2.7029694293222907e-06, - "loss": 0.8725, + "learning_rate": 2.726543512635548e-06, + "loss": 0.8549, "step": 27048 }, { - "epoch": 0.7675652667423383, + "epoch": 0.7664994757573181, "grad_norm": 0.0, - "learning_rate": 2.702341027484108e-06, - "loss": 0.7843, + "learning_rate": 2.7259136908313e-06, + "loss": 0.8409, "step": 27049 }, { - "epoch": 0.7675936435868331, + "epoch": 0.7665278131995806, "grad_norm": 0.0, - "learning_rate": 2.7017126872895805e-06, - "loss": 0.7337, + "learning_rate": 2.725283930299385e-06, + "loss": 0.8435, "step": 27050 }, { - "epoch": 0.767622020431328, + "epoch": 0.766556150641843, "grad_norm": 0.0, - "learning_rate": 2.7010844087440113e-06, - "loss": 0.7615, + "learning_rate": 2.724654231045103e-06, + "loss": 0.8222, "step": 27051 }, { - "epoch": 0.767650397275823, + "epoch": 0.7665844880841055, "grad_norm": 0.0, - "learning_rate": 2.7004561918527096e-06, - "loss": 0.8245, + "learning_rate": 2.7240245930737586e-06, + "loss": 0.8193, "step": 27052 }, { - "epoch": 0.7676787741203178, + "epoch": 0.766612825526368, "grad_norm": 0.0, - "learning_rate": 2.699828036620986e-06, - "loss": 0.7901, + "learning_rate": 2.7233950163906577e-06, + "loss": 0.7025, "step": 27053 }, { - "epoch": 0.7677071509648127, + "epoch": 0.7666411629686305, "grad_norm": 0.0, - "learning_rate": 2.6991999430541405e-06, - "loss": 0.7905, + "learning_rate": 2.7227655010011034e-06, + "loss": 0.7967, "step": 27054 }, { - "epoch": 0.7677355278093076, + "epoch": 0.7666695004108929, "grad_norm": 0.0, - "learning_rate": 2.6985719111574814e-06, - "loss": 0.7891, + "learning_rate": 2.7221360469103997e-06, + "loss": 0.759, "step": 27055 }, { - "epoch": 0.7677639046538025, + "epoch": 0.7666978378531554, "grad_norm": 0.0, - "learning_rate": 2.697943940936313e-06, - "loss": 0.8175, + "learning_rate": 2.7215066541238433e-06, + "loss": 0.9316, "step": 27056 }, { - "epoch": 0.7677922814982974, + "epoch": 0.7667261752954179, "grad_norm": 0.0, - "learning_rate": 2.69731603239594e-06, - "loss": 0.8268, + "learning_rate": 2.7208773226467433e-06, + "loss": 0.8134, "step": 27057 }, { - "epoch": 0.7678206583427922, + "epoch": 0.7667545127376802, "grad_norm": 0.0, - "learning_rate": 2.6966881855416684e-06, - "loss": 0.8898, + "learning_rate": 2.7202480524843924e-06, + "loss": 0.9733, "step": 27058 }, { - "epoch": 0.7678490351872872, + "epoch": 0.7667828501799427, "grad_norm": 0.0, - "learning_rate": 2.6960604003788014e-06, - "loss": 0.8306, + "learning_rate": 2.7196188436420955e-06, + "loss": 0.8744, "step": 27059 }, { - "epoch": 0.7678774120317821, + "epoch": 0.7668111876222052, "grad_norm": 0.0, - "learning_rate": 2.695432676912638e-06, - "loss": 0.7986, + "learning_rate": 2.718989696125157e-06, + "loss": 0.7885, "step": 27060 }, { - "epoch": 0.7679057888762769, + "epoch": 0.7668395250644677, "grad_norm": 0.0, - "learning_rate": 2.6948050151484862e-06, - "loss": 0.8189, + "learning_rate": 2.718360609938868e-06, + "loss": 0.7965, "step": 27061 }, { - "epoch": 0.7679341657207719, + "epoch": 0.7668678625067301, "grad_norm": 0.0, - "learning_rate": 2.694177415091642e-06, - "loss": 0.82, + "learning_rate": 2.717731585088531e-06, + "loss": 0.8539, "step": 27062 }, { - "epoch": 0.7679625425652667, + "epoch": 0.7668961999489926, "grad_norm": 0.0, - "learning_rate": 2.69354987674741e-06, - "loss": 0.7923, + "learning_rate": 2.717102621579449e-06, + "loss": 0.7648, "step": 27063 }, { - "epoch": 0.7679909194097616, + "epoch": 0.7669245373912551, "grad_norm": 0.0, - "learning_rate": 2.692922400121093e-06, - "loss": 0.8463, + "learning_rate": 2.7164737194169132e-06, + "loss": 0.7664, "step": 27064 }, { - "epoch": 0.7680192962542566, + "epoch": 0.7669528748335175, "grad_norm": 0.0, - "learning_rate": 2.692294985217986e-06, - "loss": 0.8687, + "learning_rate": 2.715844878606223e-06, + "loss": 0.8203, "step": 27065 }, { - "epoch": 0.7680476730987514, + "epoch": 0.76698121227578, "grad_norm": 0.0, - "learning_rate": 2.691667632043391e-06, - "loss": 0.8854, + "learning_rate": 2.7152160991526768e-06, + "loss": 0.7223, "step": 27066 }, { - "epoch": 0.7680760499432463, + "epoch": 0.7670095497180425, "grad_norm": 0.0, - "learning_rate": 2.691040340602612e-06, - "loss": 0.8362, + "learning_rate": 2.714587381061571e-06, + "loss": 0.7688, "step": 27067 }, { - "epoch": 0.7681044267877412, + "epoch": 0.767037887160305, "grad_norm": 0.0, - "learning_rate": 2.690413110900941e-06, - "loss": 0.8279, + "learning_rate": 2.7139587243382037e-06, + "loss": 0.8391, "step": 27068 }, { - "epoch": 0.7681328036322361, + "epoch": 0.7670662246025673, "grad_norm": 0.0, - "learning_rate": 2.689785942943679e-06, - "loss": 0.8553, + "learning_rate": 2.7133301289878644e-06, + "loss": 0.8765, "step": 27069 }, { - "epoch": 0.768161180476731, + "epoch": 0.7670945620448298, "grad_norm": 0.0, - "learning_rate": 2.6891588367361265e-06, - "loss": 0.8215, + "learning_rate": 2.712701595015852e-06, + "loss": 0.8645, "step": 27070 }, { - "epoch": 0.7681895573212258, + "epoch": 0.7671228994870923, "grad_norm": 0.0, - "learning_rate": 2.6885317922835717e-06, - "loss": 0.8167, + "learning_rate": 2.7120731224274623e-06, + "loss": 0.7526, "step": 27071 }, { - "epoch": 0.7682179341657208, + "epoch": 0.7671512369293547, "grad_norm": 0.0, - "learning_rate": 2.6879048095913206e-06, - "loss": 0.841, + "learning_rate": 2.711444711227984e-06, + "loss": 0.9221, "step": 27072 }, { - "epoch": 0.7682463110102157, + "epoch": 0.7671795743716172, "grad_norm": 0.0, - "learning_rate": 2.68727788866467e-06, - "loss": 0.7621, + "learning_rate": 2.7108163614227168e-06, + "loss": 0.784, "step": 27073 }, { - "epoch": 0.7682746878547105, + "epoch": 0.7672079118138797, "grad_norm": 0.0, - "learning_rate": 2.6866510295089077e-06, - "loss": 0.8842, + "learning_rate": 2.710188073016947e-06, + "loss": 0.9271, "step": 27074 }, { - "epoch": 0.7683030646992054, + "epoch": 0.7672362492561421, "grad_norm": 0.0, - "learning_rate": 2.686024232129334e-06, - "loss": 0.8448, + "learning_rate": 2.70955984601597e-06, + "loss": 0.819, "step": 27075 }, { - "epoch": 0.7683314415437004, + "epoch": 0.7672645866984046, "grad_norm": 0.0, - "learning_rate": 2.6853974965312446e-06, - "loss": 0.7312, + "learning_rate": 2.7089316804250777e-06, + "loss": 0.7563, "step": 27076 }, { - "epoch": 0.7683598183881952, + "epoch": 0.7672929241406671, "grad_norm": 0.0, - "learning_rate": 2.684770822719929e-06, - "loss": 0.7688, + "learning_rate": 2.708303576249561e-06, + "loss": 0.8537, "step": 27077 }, { - "epoch": 0.7683881952326901, + "epoch": 0.7673212615829296, "grad_norm": 0.0, - "learning_rate": 2.684144210700682e-06, - "loss": 0.8245, + "learning_rate": 2.7076755334947126e-06, + "loss": 0.7807, "step": 27078 }, { - "epoch": 0.7684165720771851, + "epoch": 0.767349599025192, "grad_norm": 0.0, - "learning_rate": 2.6835176604788014e-06, - "loss": 0.84, + "learning_rate": 2.7070475521658226e-06, + "loss": 0.8645, "step": 27079 }, { - "epoch": 0.7684449489216799, + "epoch": 0.7673779364674544, "grad_norm": 0.0, - "learning_rate": 2.682891172059573e-06, - "loss": 0.9411, + "learning_rate": 2.7064196322681767e-06, + "loss": 0.7799, "step": 27080 }, { - "epoch": 0.7684733257661748, + "epoch": 0.7674062739097169, "grad_norm": 0.0, - "learning_rate": 2.682264745448292e-06, - "loss": 0.727, + "learning_rate": 2.705791773807069e-06, + "loss": 0.7095, "step": 27081 }, { - "epoch": 0.7685017026106697, + "epoch": 0.7674346113519793, "grad_norm": 0.0, - "learning_rate": 2.681638380650252e-06, - "loss": 0.7444, + "learning_rate": 2.7051639767877836e-06, + "loss": 0.7765, "step": 27082 }, { - "epoch": 0.7685300794551646, + "epoch": 0.7674629487942418, "grad_norm": 0.0, - "learning_rate": 2.6810120776707395e-06, - "loss": 0.8182, + "learning_rate": 2.7045362412156107e-06, + "loss": 0.7681, "step": 27083 }, { - "epoch": 0.7685584562996595, + "epoch": 0.7674912862365043, "grad_norm": 0.0, - "learning_rate": 2.680385836515046e-06, - "loss": 0.7007, + "learning_rate": 2.703908567095841e-06, + "loss": 0.7939, "step": 27084 }, { - "epoch": 0.7685868331441543, + "epoch": 0.7675196236787668, "grad_norm": 0.0, - "learning_rate": 2.6797596571884663e-06, - "loss": 0.7534, + "learning_rate": 2.7032809544337556e-06, + "loss": 0.8468, "step": 27085 }, { - "epoch": 0.7686152099886493, + "epoch": 0.7675479611210292, "grad_norm": 0.0, - "learning_rate": 2.6791335396962794e-06, - "loss": 0.8722, + "learning_rate": 2.7026534032346472e-06, + "loss": 0.8824, "step": 27086 }, { - "epoch": 0.7686435868331442, + "epoch": 0.7675762985632917, "grad_norm": 0.0, - "learning_rate": 2.6785074840437864e-06, - "loss": 0.8576, + "learning_rate": 2.702025913503796e-06, + "loss": 0.8144, "step": 27087 }, { - "epoch": 0.768671963677639, + "epoch": 0.7676046360055542, "grad_norm": 0.0, - "learning_rate": 2.677881490236267e-06, - "loss": 0.7956, + "learning_rate": 2.7013984852464912e-06, + "loss": 0.7733, "step": 27088 }, { - "epoch": 0.768700340522134, + "epoch": 0.7676329734478166, "grad_norm": 0.0, - "learning_rate": 2.6772555582790128e-06, - "loss": 0.7951, + "learning_rate": 2.7007711184680176e-06, + "loss": 0.8641, "step": 27089 }, { - "epoch": 0.7687287173666288, + "epoch": 0.767661310890079, "grad_norm": 0.0, - "learning_rate": 2.676629688177311e-06, - "loss": 0.9308, + "learning_rate": 2.700143813173658e-06, + "loss": 0.7844, "step": 27090 }, { - "epoch": 0.7687570942111237, + "epoch": 0.7676896483323415, "grad_norm": 0.0, - "learning_rate": 2.6760038799364462e-06, - "loss": 0.909, + "learning_rate": 2.6995165693686986e-06, + "loss": 0.8306, "step": 27091 }, { - "epoch": 0.7687854710556186, + "epoch": 0.767717985774604, "grad_norm": 0.0, - "learning_rate": 2.6753781335617057e-06, - "loss": 0.75, + "learning_rate": 2.698889387058425e-06, + "loss": 0.8434, "step": 27092 }, { - "epoch": 0.7688138479001135, + "epoch": 0.7677463232168664, "grad_norm": 0.0, - "learning_rate": 2.6747524490583775e-06, - "loss": 0.793, + "learning_rate": 2.698262266248115e-06, + "loss": 0.792, "step": 27093 }, { - "epoch": 0.7688422247446084, + "epoch": 0.7677746606591289, "grad_norm": 0.0, - "learning_rate": 2.674126826431742e-06, - "loss": 0.8277, + "learning_rate": 2.6976352069430554e-06, + "loss": 0.9187, "step": 27094 }, { - "epoch": 0.7688706015891033, + "epoch": 0.7678029981013914, "grad_norm": 0.0, - "learning_rate": 2.6735012656870874e-06, - "loss": 0.8655, + "learning_rate": 2.6970082091485228e-06, + "loss": 0.7837, "step": 27095 }, { - "epoch": 0.7688989784335982, + "epoch": 0.7678313355436538, "grad_norm": 0.0, - "learning_rate": 2.672875766829699e-06, - "loss": 0.884, + "learning_rate": 2.6963812728698024e-06, + "loss": 0.7726, "step": 27096 }, { - "epoch": 0.7689273552780931, + "epoch": 0.7678596729859163, "grad_norm": 0.0, - "learning_rate": 2.672250329864855e-06, - "loss": 0.9211, + "learning_rate": 2.695754398112178e-06, + "loss": 0.9157, "step": 27097 }, { - "epoch": 0.7689557321225879, + "epoch": 0.7678880104281788, "grad_norm": 0.0, - "learning_rate": 2.6716249547978424e-06, - "loss": 0.7671, + "learning_rate": 2.695127584880923e-06, + "loss": 0.7495, "step": 27098 }, { - "epoch": 0.7689841089670829, + "epoch": 0.7679163478704412, "grad_norm": 0.0, - "learning_rate": 2.6709996416339468e-06, - "loss": 0.8434, + "learning_rate": 2.694500833181323e-06, + "loss": 0.8438, "step": 27099 }, { - "epoch": 0.7690124858115778, + "epoch": 0.7679446853127037, "grad_norm": 0.0, - "learning_rate": 2.670374390378443e-06, - "loss": 0.6745, + "learning_rate": 2.6938741430186555e-06, + "loss": 0.7631, "step": 27100 }, { - "epoch": 0.7690408626560726, + "epoch": 0.7679730227549662, "grad_norm": 0.0, - "learning_rate": 2.6697492010366165e-06, - "loss": 0.8179, + "learning_rate": 2.6932475143981975e-06, + "loss": 0.9176, "step": 27101 }, { - "epoch": 0.7690692395005675, + "epoch": 0.7680013601972286, "grad_norm": 0.0, - "learning_rate": 2.669124073613748e-06, - "loss": 0.7952, + "learning_rate": 2.6926209473252294e-06, + "loss": 0.9221, "step": 27102 }, { - "epoch": 0.7690976163450625, + "epoch": 0.768029697639491, "grad_norm": 0.0, - "learning_rate": 2.6684990081151174e-06, - "loss": 0.7286, + "learning_rate": 2.691994441805028e-06, + "loss": 0.813, "step": 27103 }, { - "epoch": 0.7691259931895573, + "epoch": 0.7680580350817535, "grad_norm": 0.0, - "learning_rate": 2.6678740045460085e-06, - "loss": 0.8235, + "learning_rate": 2.6913679978428707e-06, + "loss": 0.8479, "step": 27104 }, { - "epoch": 0.7691543700340522, + "epoch": 0.768086372524016, "grad_norm": 0.0, - "learning_rate": 2.6672490629116954e-06, - "loss": 0.8176, + "learning_rate": 2.690741615444039e-06, + "loss": 0.7522, "step": 27105 }, { - "epoch": 0.7691827468785472, + "epoch": 0.7681147099662784, "grad_norm": 0.0, - "learning_rate": 2.6666241832174577e-06, - "loss": 0.7782, + "learning_rate": 2.690115294613801e-06, + "loss": 0.7392, "step": 27106 }, { - "epoch": 0.769211123723042, + "epoch": 0.7681430474085409, "grad_norm": 0.0, - "learning_rate": 2.665999365468579e-06, - "loss": 0.7151, + "learning_rate": 2.6894890353574364e-06, + "loss": 0.7909, "step": 27107 }, { - "epoch": 0.7692395005675369, + "epoch": 0.7681713848508034, "grad_norm": 0.0, - "learning_rate": 2.6653746096703305e-06, - "loss": 0.8196, + "learning_rate": 2.688862837680223e-06, + "loss": 0.8375, "step": 27108 }, { - "epoch": 0.7692678774120317, + "epoch": 0.7681997222930659, "grad_norm": 0.0, - "learning_rate": 2.664749915827992e-06, - "loss": 0.7088, + "learning_rate": 2.6882367015874313e-06, + "loss": 0.7197, "step": 27109 }, { - "epoch": 0.7692962542565267, + "epoch": 0.7682280597353283, "grad_norm": 0.0, - "learning_rate": 2.6641252839468435e-06, - "loss": 0.8242, + "learning_rate": 2.6876106270843382e-06, + "loss": 0.8019, "step": 27110 }, { - "epoch": 0.7693246311010216, + "epoch": 0.7682563971775908, "grad_norm": 0.0, - "learning_rate": 2.6635007140321557e-06, - "loss": 0.8295, + "learning_rate": 2.6869846141762148e-06, + "loss": 0.7038, "step": 27111 }, { - "epoch": 0.7693530079455164, + "epoch": 0.7682847346198532, "grad_norm": 0.0, - "learning_rate": 2.6628762060892076e-06, - "loss": 0.8818, + "learning_rate": 2.6863586628683345e-06, + "loss": 0.899, "step": 27112 }, { - "epoch": 0.7693813847900114, + "epoch": 0.7683130720621156, "grad_norm": 0.0, - "learning_rate": 2.6622517601232766e-06, - "loss": 0.7572, + "learning_rate": 2.685732773165974e-06, + "loss": 0.7659, "step": 27113 }, { - "epoch": 0.7694097616345063, + "epoch": 0.7683414095043781, "grad_norm": 0.0, - "learning_rate": 2.6616273761396315e-06, - "loss": 0.8951, + "learning_rate": 2.6851069450743996e-06, + "loss": 0.746, "step": 27114 }, { - "epoch": 0.7694381384790011, + "epoch": 0.7683697469466406, "grad_norm": 0.0, - "learning_rate": 2.6610030541435504e-06, - "loss": 0.7898, + "learning_rate": 2.6844811785988866e-06, + "loss": 0.7961, "step": 27115 }, { - "epoch": 0.7694665153234961, + "epoch": 0.768398084388903, "grad_norm": 0.0, - "learning_rate": 2.6603787941403092e-06, - "loss": 0.812, + "learning_rate": 2.683855473744704e-06, + "loss": 0.8304, "step": 27116 }, { - "epoch": 0.7694948921679909, + "epoch": 0.7684264218311655, "grad_norm": 0.0, - "learning_rate": 2.6597545961351733e-06, - "loss": 0.7844, + "learning_rate": 2.6832298305171246e-06, + "loss": 0.8567, "step": 27117 }, { - "epoch": 0.7695232690124858, + "epoch": 0.768454759273428, "grad_norm": 0.0, - "learning_rate": 2.6591304601334247e-06, - "loss": 0.6546, + "learning_rate": 2.6826042489214186e-06, + "loss": 0.7952, "step": 27118 }, { - "epoch": 0.7695516458569807, + "epoch": 0.7684830967156905, "grad_norm": 0.0, - "learning_rate": 2.6585063861403293e-06, - "loss": 0.8402, + "learning_rate": 2.681978728962853e-06, + "loss": 0.8721, "step": 27119 }, { - "epoch": 0.7695800227014756, + "epoch": 0.7685114341579529, "grad_norm": 0.0, - "learning_rate": 2.6578823741611593e-06, - "loss": 0.7909, + "learning_rate": 2.6813532706466973e-06, + "loss": 0.8123, "step": 27120 }, { - "epoch": 0.7696083995459705, + "epoch": 0.7685397716002154, "grad_norm": 0.0, - "learning_rate": 2.657258424201191e-06, - "loss": 0.8094, + "learning_rate": 2.6807278739782238e-06, + "loss": 0.8331, "step": 27121 }, { - "epoch": 0.7696367763904653, + "epoch": 0.7685681090424779, "grad_norm": 0.0, - "learning_rate": 2.6566345362656876e-06, - "loss": 0.8309, + "learning_rate": 2.6801025389626945e-06, + "loss": 0.9057, "step": 27122 }, { - "epoch": 0.7696651532349603, + "epoch": 0.7685964464847402, "grad_norm": 0.0, - "learning_rate": 2.656010710359922e-06, - "loss": 0.7928, + "learning_rate": 2.6794772656053824e-06, + "loss": 0.7392, "step": 27123 }, { - "epoch": 0.7696935300794552, + "epoch": 0.7686247839270027, "grad_norm": 0.0, - "learning_rate": 2.655386946489167e-06, - "loss": 0.9464, + "learning_rate": 2.6788520539115492e-06, + "loss": 0.821, "step": 27124 }, { - "epoch": 0.76972190692395, + "epoch": 0.7686531213692652, "grad_norm": 0.0, - "learning_rate": 2.654763244658686e-06, - "loss": 0.8681, + "learning_rate": 2.678226903886464e-06, + "loss": 0.7705, "step": 27125 }, { - "epoch": 0.7697502837684449, + "epoch": 0.7686814588115277, "grad_norm": 0.0, - "learning_rate": 2.65413960487375e-06, - "loss": 0.7304, + "learning_rate": 2.6776018155353946e-06, + "loss": 0.8316, "step": 27126 }, { - "epoch": 0.7697786606129399, + "epoch": 0.7687097962537901, "grad_norm": 0.0, - "learning_rate": 2.65351602713963e-06, - "loss": 0.8241, + "learning_rate": 2.676976788863602e-06, + "loss": 0.8271, "step": 27127 }, { - "epoch": 0.7698070374574347, + "epoch": 0.7687381336960526, "grad_norm": 0.0, - "learning_rate": 2.6528925114615877e-06, - "loss": 0.7967, + "learning_rate": 2.676351823876353e-06, + "loss": 0.8089, "step": 27128 }, { - "epoch": 0.7698354143019296, + "epoch": 0.7687664711383151, "grad_norm": 0.0, - "learning_rate": 2.652269057844892e-06, - "loss": 0.8688, + "learning_rate": 2.6757269205789118e-06, + "loss": 0.8369, "step": 27129 }, { - "epoch": 0.7698637911464246, + "epoch": 0.7687948085805775, "grad_norm": 0.0, - "learning_rate": 2.651645666294813e-06, - "loss": 0.8008, + "learning_rate": 2.6751020789765423e-06, + "loss": 0.9176, "step": 27130 }, { - "epoch": 0.7698921679909194, + "epoch": 0.76882314602284, "grad_norm": 0.0, - "learning_rate": 2.651022336816611e-06, - "loss": 0.7011, + "learning_rate": 2.6744772990745117e-06, + "loss": 0.8175, "step": 27131 }, { - "epoch": 0.7699205448354143, + "epoch": 0.7688514834651025, "grad_norm": 0.0, - "learning_rate": 2.6503990694155522e-06, - "loss": 0.8064, + "learning_rate": 2.6738525808780757e-06, + "loss": 0.8136, "step": 27132 }, { - "epoch": 0.7699489216799092, + "epoch": 0.768879820907365, "grad_norm": 0.0, - "learning_rate": 2.649775864096904e-06, - "loss": 0.8174, + "learning_rate": 2.673227924392501e-06, + "loss": 0.8659, "step": 27133 }, { - "epoch": 0.7699772985244041, + "epoch": 0.7689081583496273, "grad_norm": 0.0, - "learning_rate": 2.6491527208659294e-06, - "loss": 0.8197, + "learning_rate": 2.6726033296230492e-06, + "loss": 0.7881, "step": 27134 }, { - "epoch": 0.770005675368899, + "epoch": 0.7689364957918898, "grad_norm": 0.0, - "learning_rate": 2.648529639727896e-06, - "loss": 0.819, + "learning_rate": 2.671978796574979e-06, + "loss": 0.8563, "step": 27135 }, { - "epoch": 0.7700340522133938, + "epoch": 0.7689648332341523, "grad_norm": 0.0, - "learning_rate": 2.647906620688059e-06, - "loss": 0.6771, + "learning_rate": 2.6713543252535523e-06, + "loss": 0.8868, "step": 27136 }, { - "epoch": 0.7700624290578888, + "epoch": 0.7689931706764147, "grad_norm": 0.0, - "learning_rate": 2.647283663751685e-06, - "loss": 0.8058, + "learning_rate": 2.6707299156640322e-06, + "loss": 0.7954, "step": 27137 }, { - "epoch": 0.7700908059023837, + "epoch": 0.7690215081186772, "grad_norm": 0.0, - "learning_rate": 2.646660768924041e-06, - "loss": 0.9134, + "learning_rate": 2.6701055678116727e-06, + "loss": 0.7613, "step": 27138 }, { - "epoch": 0.7701191827468785, + "epoch": 0.7690498455609397, "grad_norm": 0.0, - "learning_rate": 2.6460379362103794e-06, - "loss": 0.8322, + "learning_rate": 2.669481281701739e-06, + "loss": 0.8693, "step": 27139 }, { - "epoch": 0.7701475595913735, + "epoch": 0.7690781830032021, "grad_norm": 0.0, - "learning_rate": 2.6454151656159666e-06, - "loss": 0.8004, + "learning_rate": 2.6688570573394844e-06, + "loss": 0.8134, "step": 27140 }, { - "epoch": 0.7701759364358683, + "epoch": 0.7691065204454646, "grad_norm": 0.0, - "learning_rate": 2.644792457146066e-06, - "loss": 0.8912, + "learning_rate": 2.6682328947301685e-06, + "loss": 0.8657, "step": 27141 }, { - "epoch": 0.7702043132803632, + "epoch": 0.7691348578877271, "grad_norm": 0.0, - "learning_rate": 2.64416981080593e-06, - "loss": 0.7582, + "learning_rate": 2.6676087938790496e-06, + "loss": 0.8955, "step": 27142 }, { - "epoch": 0.7702326901248581, + "epoch": 0.7691631953299896, "grad_norm": 0.0, - "learning_rate": 2.643547226600823e-06, - "loss": 0.7104, + "learning_rate": 2.666984754791384e-06, + "loss": 0.7366, "step": 27143 }, { - "epoch": 0.770261066969353, + "epoch": 0.7691915327722519, "grad_norm": 0.0, - "learning_rate": 2.6429247045360062e-06, - "loss": 0.7479, + "learning_rate": 2.666360777472432e-06, + "loss": 0.8001, "step": 27144 }, { - "epoch": 0.7702894438138479, + "epoch": 0.7692198702145144, "grad_norm": 0.0, - "learning_rate": 2.6423022446167325e-06, - "loss": 0.828, + "learning_rate": 2.6657368619274447e-06, + "loss": 0.8838, "step": 27145 }, { - "epoch": 0.7703178206583428, + "epoch": 0.7692482076567769, "grad_norm": 0.0, - "learning_rate": 2.641679846848262e-06, - "loss": 0.8878, + "learning_rate": 2.665113008161678e-06, + "loss": 0.9005, "step": 27146 }, { - "epoch": 0.7703461975028377, + "epoch": 0.7692765450990393, "grad_norm": 0.0, - "learning_rate": 2.641057511235856e-06, - "loss": 0.7299, + "learning_rate": 2.6644892161803917e-06, + "loss": 0.89, "step": 27147 }, { - "epoch": 0.7703745743473326, + "epoch": 0.7693048825413018, "grad_norm": 0.0, - "learning_rate": 2.640435237784762e-06, - "loss": 0.8321, + "learning_rate": 2.6638654859888335e-06, + "loss": 0.8573, "step": 27148 }, { - "epoch": 0.7704029511918274, + "epoch": 0.7693332199835643, "grad_norm": 0.0, - "learning_rate": 2.6398130265002476e-06, - "loss": 0.7754, + "learning_rate": 2.6632418175922613e-06, + "loss": 0.8338, "step": 27149 }, { - "epoch": 0.7704313280363224, + "epoch": 0.7693615574258268, "grad_norm": 0.0, - "learning_rate": 2.6391908773875607e-06, - "loss": 0.8629, + "learning_rate": 2.66261821099593e-06, + "loss": 0.8394, "step": 27150 }, { - "epoch": 0.7704597048808173, + "epoch": 0.7693898948680892, "grad_norm": 0.0, - "learning_rate": 2.6385687904519595e-06, - "loss": 0.8216, + "learning_rate": 2.6619946662050866e-06, + "loss": 0.8655, "step": 27151 }, { - "epoch": 0.7704880817253121, + "epoch": 0.7694182323103517, "grad_norm": 0.0, - "learning_rate": 2.6379467656987025e-06, - "loss": 0.8189, + "learning_rate": 2.6613711832249912e-06, + "loss": 0.8218, "step": 27152 }, { - "epoch": 0.770516458569807, + "epoch": 0.7694465697526142, "grad_norm": 0.0, - "learning_rate": 2.6373248031330367e-06, - "loss": 0.7701, + "learning_rate": 2.6607477620608877e-06, + "loss": 0.8754, "step": 27153 }, { - "epoch": 0.770544835414302, + "epoch": 0.7694749071948765, "grad_norm": 0.0, - "learning_rate": 2.6367029027602187e-06, - "loss": 0.8334, + "learning_rate": 2.660124402718032e-06, + "loss": 0.7453, "step": 27154 }, { - "epoch": 0.7705732122587968, + "epoch": 0.769503244637139, "grad_norm": 0.0, - "learning_rate": 2.636081064585505e-06, - "loss": 0.9233, + "learning_rate": 2.659501105201673e-06, + "loss": 0.818, "step": 27155 }, { - "epoch": 0.7706015891032917, + "epoch": 0.7695315820794015, "grad_norm": 0.0, - "learning_rate": 2.635459288614144e-06, - "loss": 0.8252, + "learning_rate": 2.6588778695170625e-06, + "loss": 0.751, "step": 27156 }, { - "epoch": 0.7706299659477867, + "epoch": 0.769559919521664, "grad_norm": 0.0, - "learning_rate": 2.6348375748513887e-06, - "loss": 0.7732, + "learning_rate": 2.65825469566945e-06, + "loss": 0.8011, "step": 27157 }, { - "epoch": 0.7706583427922815, + "epoch": 0.7695882569639264, "grad_norm": 0.0, - "learning_rate": 2.6342159233024945e-06, - "loss": 0.802, + "learning_rate": 2.6576315836640866e-06, + "loss": 0.7693, "step": 27158 }, { - "epoch": 0.7706867196367764, + "epoch": 0.7696165944061889, "grad_norm": 0.0, - "learning_rate": 2.6335943339727053e-06, - "loss": 0.8268, + "learning_rate": 2.6570085335062166e-06, + "loss": 0.7589, "step": 27159 }, { - "epoch": 0.7707150964812712, + "epoch": 0.7696449318484514, "grad_norm": 0.0, - "learning_rate": 2.6329728068672777e-06, - "loss": 0.8485, + "learning_rate": 2.6563855452010933e-06, + "loss": 0.8518, "step": 27160 }, { - "epoch": 0.7707434733257662, + "epoch": 0.7696732692907138, "grad_norm": 0.0, - "learning_rate": 2.6323513419914626e-06, - "loss": 0.8684, + "learning_rate": 2.6557626187539586e-06, + "loss": 0.8072, "step": 27161 }, { - "epoch": 0.7707718501702611, + "epoch": 0.7697016067329763, "grad_norm": 0.0, - "learning_rate": 2.6317299393505035e-06, - "loss": 0.7707, + "learning_rate": 2.655139754170063e-06, + "loss": 0.8668, "step": 27162 }, { - "epoch": 0.7708002270147559, + "epoch": 0.7697299441752388, "grad_norm": 0.0, - "learning_rate": 2.63110859894965e-06, - "loss": 0.8657, + "learning_rate": 2.6545169514546554e-06, + "loss": 0.8259, "step": 27163 }, { - "epoch": 0.7708286038592509, + "epoch": 0.7697582816175011, "grad_norm": 0.0, - "learning_rate": 2.6304873207941584e-06, - "loss": 0.8538, + "learning_rate": 2.6538942106129762e-06, + "loss": 0.8297, "step": 27164 }, { - "epoch": 0.7708569807037458, + "epoch": 0.7697866190597636, "grad_norm": 0.0, - "learning_rate": 2.6298661048892705e-06, - "loss": 0.7897, + "learning_rate": 2.6532715316502734e-06, + "loss": 0.8162, "step": 27165 }, { - "epoch": 0.7708853575482406, + "epoch": 0.7698149565020261, "grad_norm": 0.0, - "learning_rate": 2.629244951240234e-06, - "loss": 0.8631, + "learning_rate": 2.652648914571796e-06, + "loss": 0.7263, "step": 27166 }, { - "epoch": 0.7709137343927355, + "epoch": 0.7698432939442886, "grad_norm": 0.0, - "learning_rate": 2.6286238598522993e-06, - "loss": 0.711, + "learning_rate": 2.652026359382782e-06, + "loss": 0.8579, "step": 27167 }, { - "epoch": 0.7709421112372304, + "epoch": 0.769871631386551, "grad_norm": 0.0, - "learning_rate": 2.628002830730708e-06, - "loss": 0.8413, + "learning_rate": 2.651403866088479e-06, + "loss": 0.7989, "step": 27168 }, { - "epoch": 0.7709704880817253, + "epoch": 0.7698999688288135, "grad_norm": 0.0, - "learning_rate": 2.62738186388071e-06, - "loss": 0.8334, + "learning_rate": 2.65078143469413e-06, + "loss": 0.7563, "step": 27169 }, { - "epoch": 0.7709988649262202, + "epoch": 0.769928306271076, "grad_norm": 0.0, - "learning_rate": 2.626760959307547e-06, - "loss": 0.8353, + "learning_rate": 2.650159065204978e-06, + "loss": 0.7946, "step": 27170 }, { - "epoch": 0.7710272417707151, + "epoch": 0.7699566437133384, "grad_norm": 0.0, - "learning_rate": 2.6261401170164658e-06, - "loss": 0.8169, + "learning_rate": 2.6495367576262687e-06, + "loss": 0.8221, "step": 27171 }, { - "epoch": 0.77105561861521, + "epoch": 0.7699849811556009, "grad_norm": 0.0, - "learning_rate": 2.625519337012713e-06, - "loss": 0.8423, + "learning_rate": 2.6489145119632374e-06, + "loss": 0.916, "step": 27172 }, { - "epoch": 0.7710839954597049, + "epoch": 0.7700133185978634, "grad_norm": 0.0, - "learning_rate": 2.624898619301527e-06, - "loss": 0.8547, + "learning_rate": 2.6482923282211313e-06, + "loss": 0.818, "step": 27173 }, { - "epoch": 0.7711123723041998, + "epoch": 0.7700416560401259, "grad_norm": 0.0, - "learning_rate": 2.6242779638881532e-06, - "loss": 0.7989, + "learning_rate": 2.6476702064051873e-06, + "loss": 0.7312, "step": 27174 }, { - "epoch": 0.7711407491486947, + "epoch": 0.7700699934823882, "grad_norm": 0.0, - "learning_rate": 2.623657370777839e-06, - "loss": 0.8319, + "learning_rate": 2.6470481465206468e-06, + "loss": 0.7601, "step": 27175 }, { - "epoch": 0.7711691259931895, + "epoch": 0.7700983309246507, "grad_norm": 0.0, - "learning_rate": 2.623036839975819e-06, - "loss": 0.7908, + "learning_rate": 2.646426148572753e-06, + "loss": 0.8018, "step": 27176 }, { - "epoch": 0.7711975028376844, + "epoch": 0.7701266683669132, "grad_norm": 0.0, - "learning_rate": 2.6224163714873386e-06, - "loss": 0.821, + "learning_rate": 2.6458042125667393e-06, + "loss": 0.8052, "step": 27177 }, { - "epoch": 0.7712258796821794, + "epoch": 0.7701550058091756, "grad_norm": 0.0, - "learning_rate": 2.6217959653176396e-06, - "loss": 0.7313, + "learning_rate": 2.6451823385078477e-06, + "loss": 0.8479, "step": 27178 }, { - "epoch": 0.7712542565266742, + "epoch": 0.7701833432514381, "grad_norm": 0.0, - "learning_rate": 2.621175621471961e-06, - "loss": 0.8063, + "learning_rate": 2.6445605264013206e-06, + "loss": 0.8426, "step": 27179 }, { - "epoch": 0.7712826333711691, + "epoch": 0.7702116806937006, "grad_norm": 0.0, - "learning_rate": 2.6205553399555428e-06, - "loss": 0.8337, + "learning_rate": 2.6439387762523873e-06, + "loss": 0.8483, "step": 27180 }, { - "epoch": 0.7713110102156641, + "epoch": 0.7702400181359631, "grad_norm": 0.0, - "learning_rate": 2.619935120773629e-06, - "loss": 0.8228, + "learning_rate": 2.6433170880662895e-06, + "loss": 0.9337, "step": 27181 }, { - "epoch": 0.7713393870601589, + "epoch": 0.7702683555782255, "grad_norm": 0.0, - "learning_rate": 2.619314963931452e-06, - "loss": 0.762, + "learning_rate": 2.6426954618482638e-06, + "loss": 0.6728, "step": 27182 }, { - "epoch": 0.7713677639046538, + "epoch": 0.770296693020488, "grad_norm": 0.0, - "learning_rate": 2.618694869434254e-06, - "loss": 0.8497, + "learning_rate": 2.6420738976035463e-06, + "loss": 0.7821, "step": 27183 }, { - "epoch": 0.7713961407491486, + "epoch": 0.7703250304627505, "grad_norm": 0.0, - "learning_rate": 2.6180748372872743e-06, - "loss": 0.8253, + "learning_rate": 2.641452395337376e-06, + "loss": 0.7966, "step": 27184 }, { - "epoch": 0.7714245175936436, + "epoch": 0.7703533679050129, "grad_norm": 0.0, - "learning_rate": 2.6174548674957457e-06, - "loss": 0.868, + "learning_rate": 2.6408309550549817e-06, + "loss": 0.8589, "step": 27185 }, { - "epoch": 0.7714528944381385, + "epoch": 0.7703817053472753, "grad_norm": 0.0, - "learning_rate": 2.616834960064908e-06, - "loss": 0.7862, + "learning_rate": 2.6402095767615997e-06, + "loss": 0.8567, "step": 27186 }, { - "epoch": 0.7714812712826333, + "epoch": 0.7704100427895378, "grad_norm": 0.0, - "learning_rate": 2.616215114999999e-06, - "loss": 0.9896, + "learning_rate": 2.63958826046247e-06, + "loss": 0.7852, "step": 27187 }, { - "epoch": 0.7715096481271283, + "epoch": 0.7704383802318002, "grad_norm": 0.0, - "learning_rate": 2.615595332306251e-06, - "loss": 0.8488, + "learning_rate": 2.638967006162818e-06, + "loss": 0.7017, "step": 27188 }, { - "epoch": 0.7715380249716232, + "epoch": 0.7704667176740627, "grad_norm": 0.0, - "learning_rate": 2.6149756119889013e-06, - "loss": 0.8521, + "learning_rate": 2.638345813867883e-06, + "loss": 0.721, "step": 27189 }, { - "epoch": 0.771566401816118, + "epoch": 0.7704950551163252, "grad_norm": 0.0, - "learning_rate": 2.614355954053187e-06, - "loss": 0.8013, + "learning_rate": 2.637724683582893e-06, + "loss": 0.8036, "step": 27190 }, { - "epoch": 0.771594778660613, + "epoch": 0.7705233925585877, "grad_norm": 0.0, - "learning_rate": 2.6137363585043374e-06, - "loss": 0.7583, + "learning_rate": 2.63710361531308e-06, + "loss": 0.7912, "step": 27191 }, { - "epoch": 0.7716231555051078, + "epoch": 0.7705517300008501, "grad_norm": 0.0, - "learning_rate": 2.6131168253475892e-06, - "loss": 0.7315, + "learning_rate": 2.636482609063682e-06, + "loss": 0.7689, "step": 27192 }, { - "epoch": 0.7716515323496027, + "epoch": 0.7705800674431126, "grad_norm": 0.0, - "learning_rate": 2.612497354588177e-06, - "loss": 0.8946, + "learning_rate": 2.6358616648399216e-06, + "loss": 0.7697, "step": 27193 }, { - "epoch": 0.7716799091940976, + "epoch": 0.7706084048853751, "grad_norm": 0.0, - "learning_rate": 2.611877946231327e-06, - "loss": 0.7252, + "learning_rate": 2.6352407826470338e-06, + "loss": 0.7851, "step": 27194 }, { - "epoch": 0.7717082860385925, + "epoch": 0.7706367423276375, "grad_norm": 0.0, - "learning_rate": 2.6112586002822814e-06, - "loss": 0.8285, + "learning_rate": 2.634619962490247e-06, + "loss": 0.8113, "step": 27195 }, { - "epoch": 0.7717366628830874, + "epoch": 0.7706650797699, "grad_norm": 0.0, - "learning_rate": 2.610639316746263e-06, - "loss": 0.8317, + "learning_rate": 2.633999204374792e-06, + "loss": 0.8994, "step": 27196 }, { - "epoch": 0.7717650397275823, + "epoch": 0.7706934172121624, "grad_norm": 0.0, - "learning_rate": 2.610020095628507e-06, - "loss": 1.0211, + "learning_rate": 2.633378508305899e-06, + "loss": 0.8519, "step": 27197 }, { - "epoch": 0.7717934165720772, + "epoch": 0.7707217546544249, "grad_norm": 0.0, - "learning_rate": 2.6094009369342477e-06, - "loss": 0.7877, + "learning_rate": 2.632757874288793e-06, + "loss": 0.8746, "step": 27198 }, { - "epoch": 0.7718217934165721, + "epoch": 0.7707500920966873, "grad_norm": 0.0, - "learning_rate": 2.608781840668706e-06, - "loss": 0.7776, + "learning_rate": 2.632137302328701e-06, + "loss": 0.8675, "step": 27199 }, { - "epoch": 0.771850170261067, + "epoch": 0.7707784295389498, "grad_norm": 0.0, - "learning_rate": 2.608162806837118e-06, - "loss": 0.8496, + "learning_rate": 2.631516792430857e-06, + "loss": 0.8825, "step": 27200 }, { - "epoch": 0.7718785471055618, + "epoch": 0.7708067669812123, "grad_norm": 0.0, - "learning_rate": 2.6075438354447125e-06, - "loss": 0.8629, + "learning_rate": 2.63089634460048e-06, + "loss": 0.814, "step": 27201 }, { - "epoch": 0.7719069239500568, + "epoch": 0.7708351044234747, "grad_norm": 0.0, - "learning_rate": 2.6069249264967145e-06, - "loss": 0.8516, + "learning_rate": 2.630275958842802e-06, + "loss": 0.8298, "step": 27202 }, { - "epoch": 0.7719353007945516, + "epoch": 0.7708634418657372, "grad_norm": 0.0, - "learning_rate": 2.606306079998354e-06, - "loss": 0.769, + "learning_rate": 2.629655635163044e-06, + "loss": 0.8118, "step": 27203 }, { - "epoch": 0.7719636776390465, + "epoch": 0.7708917793079997, "grad_norm": 0.0, - "learning_rate": 2.6056872959548607e-06, - "loss": 0.8009, + "learning_rate": 2.629035373566433e-06, + "loss": 0.7574, "step": 27204 }, { - "epoch": 0.7719920544835415, + "epoch": 0.7709201167502622, "grad_norm": 0.0, - "learning_rate": 2.6050685743714565e-06, - "loss": 0.8627, + "learning_rate": 2.6284151740581974e-06, + "loss": 0.8696, "step": 27205 }, { - "epoch": 0.7720204313280363, + "epoch": 0.7709484541925246, "grad_norm": 0.0, - "learning_rate": 2.6044499152533708e-06, - "loss": 0.7733, + "learning_rate": 2.6277950366435556e-06, + "loss": 0.8436, "step": 27206 }, { - "epoch": 0.7720488081725312, + "epoch": 0.770976791634787, "grad_norm": 0.0, - "learning_rate": 2.6038313186058317e-06, - "loss": 0.6891, + "learning_rate": 2.6271749613277333e-06, + "loss": 0.834, "step": 27207 }, { - "epoch": 0.7720771850170262, + "epoch": 0.7710051290770495, "grad_norm": 0.0, - "learning_rate": 2.603212784434059e-06, - "loss": 0.8109, + "learning_rate": 2.6265549481159538e-06, + "loss": 0.6682, "step": 27208 }, { - "epoch": 0.772105561861521, + "epoch": 0.7710334665193119, "grad_norm": 0.0, - "learning_rate": 2.6025943127432807e-06, - "loss": 0.757, + "learning_rate": 2.6259349970134406e-06, + "loss": 0.782, "step": 27209 }, { - "epoch": 0.7721339387060159, + "epoch": 0.7710618039615744, "grad_norm": 0.0, - "learning_rate": 2.6019759035387216e-06, - "loss": 0.7387, + "learning_rate": 2.625315108025418e-06, + "loss": 0.8048, "step": 27210 }, { - "epoch": 0.7721623155505107, + "epoch": 0.7710901414038369, "grad_norm": 0.0, - "learning_rate": 2.6013575568256034e-06, - "loss": 0.7802, + "learning_rate": 2.6246952811571015e-06, + "loss": 0.8943, "step": 27211 }, { - "epoch": 0.7721906923950057, + "epoch": 0.7711184788460993, "grad_norm": 0.0, - "learning_rate": 2.600739272609154e-06, - "loss": 0.8686, + "learning_rate": 2.6240755164137156e-06, + "loss": 0.8548, "step": 27212 }, { - "epoch": 0.7722190692395006, + "epoch": 0.7711468162883618, "grad_norm": 0.0, - "learning_rate": 2.6001210508945894e-06, - "loss": 0.8383, + "learning_rate": 2.623455813800484e-06, + "loss": 0.7739, "step": 27213 }, { - "epoch": 0.7722474460839954, + "epoch": 0.7711751537306243, "grad_norm": 0.0, - "learning_rate": 2.5995028916871346e-06, - "loss": 0.9129, + "learning_rate": 2.6228361733226204e-06, + "loss": 0.8743, "step": 27214 }, { - "epoch": 0.7722758229284904, + "epoch": 0.7712034911728868, "grad_norm": 0.0, - "learning_rate": 2.598884794992015e-06, - "loss": 0.8632, + "learning_rate": 2.622216594985346e-06, + "loss": 0.9149, "step": 27215 }, { - "epoch": 0.7723041997729853, + "epoch": 0.7712318286151492, "grad_norm": 0.0, - "learning_rate": 2.5982667608144454e-06, - "loss": 0.875, + "learning_rate": 2.621597078793885e-06, + "loss": 0.8079, "step": 27216 }, { - "epoch": 0.7723325766174801, + "epoch": 0.7712601660574117, "grad_norm": 0.0, - "learning_rate": 2.597648789159648e-06, - "loss": 0.7911, + "learning_rate": 2.620977624753448e-06, + "loss": 0.7951, "step": 27217 }, { - "epoch": 0.772360953461975, + "epoch": 0.7712885034996741, "grad_norm": 0.0, - "learning_rate": 2.597030880032848e-06, - "loss": 0.804, + "learning_rate": 2.62035823286926e-06, + "loss": 0.884, "step": 27218 }, { - "epoch": 0.77238933030647, + "epoch": 0.7713168409419365, "grad_norm": 0.0, - "learning_rate": 2.5964130334392577e-06, - "loss": 0.8277, + "learning_rate": 2.6197389031465328e-06, + "loss": 0.8027, "step": 27219 }, { - "epoch": 0.7724177071509648, + "epoch": 0.771345178384199, "grad_norm": 0.0, - "learning_rate": 2.5957952493841e-06, - "loss": 0.7563, + "learning_rate": 2.6191196355904834e-06, + "loss": 0.7532, "step": 27220 }, { - "epoch": 0.7724460839954597, + "epoch": 0.7713735158264615, "grad_norm": 0.0, - "learning_rate": 2.5951775278725956e-06, - "loss": 0.9776, + "learning_rate": 2.618500430206331e-06, + "loss": 0.9007, "step": 27221 }, { - "epoch": 0.7724744608399546, + "epoch": 0.771401853268724, "grad_norm": 0.0, - "learning_rate": 2.594559868909956e-06, - "loss": 0.8005, + "learning_rate": 2.617881286999291e-06, + "loss": 0.7609, "step": 27222 }, { - "epoch": 0.7725028376844495, + "epoch": 0.7714301907109864, "grad_norm": 0.0, - "learning_rate": 2.593942272501402e-06, - "loss": 0.8807, + "learning_rate": 2.617262205974578e-06, + "loss": 0.7785, "step": 27223 }, { - "epoch": 0.7725312145289444, + "epoch": 0.7714585281532489, "grad_norm": 0.0, - "learning_rate": 2.593324738652151e-06, - "loss": 0.8559, + "learning_rate": 2.6166431871374096e-06, + "loss": 0.7304, "step": 27224 }, { - "epoch": 0.7725595913734393, + "epoch": 0.7714868655955114, "grad_norm": 0.0, - "learning_rate": 2.592707267367418e-06, - "loss": 0.8107, + "learning_rate": 2.6160242304929952e-06, + "loss": 0.7981, "step": 27225 }, { - "epoch": 0.7725879682179342, + "epoch": 0.7715152030377738, "grad_norm": 0.0, - "learning_rate": 2.5920898586524233e-06, - "loss": 0.7212, + "learning_rate": 2.6154053360465536e-06, + "loss": 0.8286, "step": 27226 }, { - "epoch": 0.772616345062429, + "epoch": 0.7715435404800363, "grad_norm": 0.0, - "learning_rate": 2.5914725125123765e-06, - "loss": 0.8502, + "learning_rate": 2.6147865038032915e-06, + "loss": 0.7921, "step": 27227 }, { - "epoch": 0.7726447219069239, + "epoch": 0.7715718779222988, "grad_norm": 0.0, - "learning_rate": 2.5908552289524926e-06, - "loss": 0.817, + "learning_rate": 2.6141677337684245e-06, + "loss": 0.7682, "step": 27228 }, { - "epoch": 0.7726730987514189, + "epoch": 0.7716002153645612, "grad_norm": 0.0, - "learning_rate": 2.590238007977992e-06, - "loss": 0.7779, + "learning_rate": 2.6135490259471695e-06, + "loss": 0.8296, "step": 27229 }, { - "epoch": 0.7727014755959137, + "epoch": 0.7716285528068236, "grad_norm": 0.0, - "learning_rate": 2.58962084959408e-06, - "loss": 0.8183, + "learning_rate": 2.6129303803447302e-06, + "loss": 0.8578, "step": 27230 }, { - "epoch": 0.7727298524404086, + "epoch": 0.7716568902490861, "grad_norm": 0.0, - "learning_rate": 2.5890037538059744e-06, - "loss": 0.8815, + "learning_rate": 2.612311796966325e-06, + "loss": 0.7074, "step": 27231 }, { - "epoch": 0.7727582292849036, + "epoch": 0.7716852276913486, "grad_norm": 0.0, - "learning_rate": 2.5883867206188907e-06, - "loss": 0.8499, + "learning_rate": 2.6116932758171574e-06, + "loss": 0.8486, "step": 27232 }, { - "epoch": 0.7727866061293984, + "epoch": 0.771713565133611, "grad_norm": 0.0, - "learning_rate": 2.5877697500380337e-06, - "loss": 0.7957, + "learning_rate": 2.611074816902441e-06, + "loss": 0.7887, "step": 27233 }, { - "epoch": 0.7728149829738933, + "epoch": 0.7717419025758735, "grad_norm": 0.0, - "learning_rate": 2.5871528420686196e-06, - "loss": 0.8114, + "learning_rate": 2.610456420227386e-06, + "loss": 0.7963, "step": 27234 }, { - "epoch": 0.7728433598183881, + "epoch": 0.771770240018136, "grad_norm": 0.0, - "learning_rate": 2.586535996715861e-06, - "loss": 0.902, + "learning_rate": 2.6098380857972007e-06, + "loss": 0.9099, "step": 27235 }, { - "epoch": 0.7728717366628831, + "epoch": 0.7717985774603984, "grad_norm": 0.0, - "learning_rate": 2.585919213984963e-06, - "loss": 0.9553, + "learning_rate": 2.609219813617092e-06, + "loss": 0.8496, "step": 27236 }, { - "epoch": 0.772900113507378, + "epoch": 0.7718269149026609, "grad_norm": 0.0, - "learning_rate": 2.58530249388114e-06, - "loss": 0.804, + "learning_rate": 2.6086016036922736e-06, + "loss": 0.8759, "step": 27237 }, { - "epoch": 0.7729284903518728, + "epoch": 0.7718552523449234, "grad_norm": 0.0, - "learning_rate": 2.5846858364096017e-06, - "loss": 0.7819, + "learning_rate": 2.607983456027945e-06, + "loss": 0.7226, "step": 27238 }, { - "epoch": 0.7729568671963678, + "epoch": 0.7718835897871859, "grad_norm": 0.0, - "learning_rate": 2.584069241575553e-06, - "loss": 0.7981, + "learning_rate": 2.6073653706293202e-06, + "loss": 0.7867, "step": 27239 }, { - "epoch": 0.7729852440408627, + "epoch": 0.7719119272294482, "grad_norm": 0.0, - "learning_rate": 2.583452709384204e-06, - "loss": 0.7789, + "learning_rate": 2.606747347501598e-06, + "loss": 0.7983, "step": 27240 }, { - "epoch": 0.7730136208853575, + "epoch": 0.7719402646717107, "grad_norm": 0.0, - "learning_rate": 2.582836239840765e-06, - "loss": 0.8091, + "learning_rate": 2.6061293866499894e-06, + "loss": 0.7545, "step": 27241 }, { - "epoch": 0.7730419977298525, + "epoch": 0.7719686021139732, "grad_norm": 0.0, - "learning_rate": 2.5822198329504412e-06, - "loss": 0.8266, + "learning_rate": 2.6055114880797008e-06, + "loss": 0.7902, "step": 27242 }, { - "epoch": 0.7730703745743474, + "epoch": 0.7719969395562356, "grad_norm": 0.0, - "learning_rate": 2.5816034887184414e-06, - "loss": 0.8001, + "learning_rate": 2.604893651795932e-06, + "loss": 0.7396, "step": 27243 }, { - "epoch": 0.7730987514188422, + "epoch": 0.7720252769984981, "grad_norm": 0.0, - "learning_rate": 2.5809872071499687e-06, - "loss": 0.8986, + "learning_rate": 2.60427587780389e-06, + "loss": 0.7402, "step": 27244 }, { - "epoch": 0.7731271282633371, + "epoch": 0.7720536144407606, "grad_norm": 0.0, - "learning_rate": 2.58037098825023e-06, - "loss": 0.8534, + "learning_rate": 2.603658166108782e-06, + "loss": 0.839, "step": 27245 }, { - "epoch": 0.773155505107832, + "epoch": 0.7720819518830231, "grad_norm": 0.0, - "learning_rate": 2.579754832024436e-06, - "loss": 0.748, + "learning_rate": 2.603040516715806e-06, + "loss": 0.7391, "step": 27246 }, { - "epoch": 0.7731838819523269, + "epoch": 0.7721102893252855, "grad_norm": 0.0, - "learning_rate": 2.579138738477781e-06, - "loss": 0.7811, + "learning_rate": 2.602422929630165e-06, + "loss": 0.8553, "step": 27247 }, { - "epoch": 0.7732122587968218, + "epoch": 0.772138626767548, "grad_norm": 0.0, - "learning_rate": 2.578522707615476e-06, - "loss": 0.8419, + "learning_rate": 2.601805404857063e-06, + "loss": 0.789, "step": 27248 }, { - "epoch": 0.7732406356413167, + "epoch": 0.7721669642098105, "grad_norm": 0.0, - "learning_rate": 2.577906739442726e-06, - "loss": 0.8197, + "learning_rate": 2.6011879424017006e-06, + "loss": 0.8086, "step": 27249 }, { - "epoch": 0.7732690124858116, + "epoch": 0.7721953016520728, "grad_norm": 0.0, - "learning_rate": 2.5772908339647286e-06, - "loss": 0.8024, + "learning_rate": 2.600570542269284e-06, + "loss": 0.8964, "step": 27250 }, { - "epoch": 0.7732973893303065, + "epoch": 0.7722236390943353, "grad_norm": 0.0, - "learning_rate": 2.5766749911866897e-06, - "loss": 0.8326, + "learning_rate": 2.5999532044650056e-06, + "loss": 0.8268, "step": 27251 }, { - "epoch": 0.7733257661748013, + "epoch": 0.7722519765365978, "grad_norm": 0.0, - "learning_rate": 2.5760592111138126e-06, - "loss": 0.769, + "learning_rate": 2.599335928994069e-06, + "loss": 0.8696, "step": 27252 }, { - "epoch": 0.7733541430192963, + "epoch": 0.7722803139788603, "grad_norm": 0.0, - "learning_rate": 2.575443493751294e-06, - "loss": 0.7719, + "learning_rate": 2.5987187158616777e-06, + "loss": 0.8139, "step": 27253 }, { - "epoch": 0.7733825198637911, + "epoch": 0.7723086514211227, "grad_norm": 0.0, - "learning_rate": 2.574827839104339e-06, - "loss": 0.8403, + "learning_rate": 2.5981015650730234e-06, + "loss": 0.762, "step": 27254 }, { - "epoch": 0.773410896708286, + "epoch": 0.7723369888633852, "grad_norm": 0.0, - "learning_rate": 2.574212247178147e-06, - "loss": 0.8408, + "learning_rate": 2.597484476633312e-06, + "loss": 0.771, "step": 27255 }, { - "epoch": 0.773439273552781, + "epoch": 0.7723653263056477, "grad_norm": 0.0, - "learning_rate": 2.5735967179779174e-06, - "loss": 0.9781, + "learning_rate": 2.5968674505477342e-06, + "loss": 0.841, "step": 27256 }, { - "epoch": 0.7734676503972758, + "epoch": 0.7723936637479101, "grad_norm": 0.0, - "learning_rate": 2.572981251508854e-06, - "loss": 0.8123, + "learning_rate": 2.596250486821491e-06, + "loss": 0.8098, "step": 27257 }, { - "epoch": 0.7734960272417707, + "epoch": 0.7724220011901726, "grad_norm": 0.0, - "learning_rate": 2.572365847776147e-06, - "loss": 0.8828, + "learning_rate": 2.5956335854597826e-06, + "loss": 0.7704, "step": 27258 }, { - "epoch": 0.7735244040862657, + "epoch": 0.7724503386324351, "grad_norm": 0.0, - "learning_rate": 2.5717505067850012e-06, - "loss": 0.8857, + "learning_rate": 2.595016746467799e-06, + "loss": 0.7568, "step": 27259 }, { - "epoch": 0.7735527809307605, + "epoch": 0.7724786760746974, "grad_norm": 0.0, - "learning_rate": 2.5711352285406155e-06, - "loss": 0.7966, + "learning_rate": 2.5943999698507394e-06, + "loss": 0.7341, "step": 27260 }, { - "epoch": 0.7735811577752554, + "epoch": 0.7725070135169599, "grad_norm": 0.0, - "learning_rate": 2.570520013048181e-06, - "loss": 0.7223, + "learning_rate": 2.5937832556137986e-06, + "loss": 0.7164, "step": 27261 }, { - "epoch": 0.7736095346197502, + "epoch": 0.7725353509592224, "grad_norm": 0.0, - "learning_rate": 2.5699048603128986e-06, - "loss": 0.8299, + "learning_rate": 2.5931666037621718e-06, + "loss": 0.8514, "step": 27262 }, { - "epoch": 0.7736379114642452, + "epoch": 0.7725636884014849, "grad_norm": 0.0, - "learning_rate": 2.5692897703399665e-06, - "loss": 0.8049, + "learning_rate": 2.592550014301055e-06, + "loss": 0.8326, "step": 27263 }, { - "epoch": 0.7736662883087401, + "epoch": 0.7725920258437473, "grad_norm": 0.0, - "learning_rate": 2.568674743134575e-06, - "loss": 0.8332, + "learning_rate": 2.5919334872356384e-06, + "loss": 0.7695, "step": 27264 }, { - "epoch": 0.7736946651532349, + "epoch": 0.7726203632860098, "grad_norm": 0.0, - "learning_rate": 2.5680597787019214e-06, - "loss": 0.8245, + "learning_rate": 2.591317022571116e-06, + "loss": 0.7376, "step": 27265 }, { - "epoch": 0.7737230419977299, + "epoch": 0.7726487007282723, "grad_norm": 0.0, - "learning_rate": 2.567444877047205e-06, - "loss": 0.829, + "learning_rate": 2.590700620312685e-06, + "loss": 0.7328, "step": 27266 }, { - "epoch": 0.7737514188422248, + "epoch": 0.7726770381705347, "grad_norm": 0.0, - "learning_rate": 2.5668300381756116e-06, - "loss": 0.8681, + "learning_rate": 2.590084280465531e-06, + "loss": 0.8178, "step": 27267 }, { - "epoch": 0.7737797956867196, + "epoch": 0.7727053756127972, "grad_norm": 0.0, - "learning_rate": 2.56621526209234e-06, - "loss": 0.8402, + "learning_rate": 2.5894680030348516e-06, + "loss": 0.778, "step": 27268 }, { - "epoch": 0.7738081725312145, + "epoch": 0.7727337130550597, "grad_norm": 0.0, - "learning_rate": 2.565600548802585e-06, - "loss": 0.928, + "learning_rate": 2.5888517880258323e-06, + "loss": 0.7776, "step": 27269 }, { - "epoch": 0.7738365493757094, + "epoch": 0.7727620504973222, "grad_norm": 0.0, - "learning_rate": 2.5649858983115293e-06, - "loss": 0.8237, + "learning_rate": 2.588235635443667e-06, + "loss": 0.8596, "step": 27270 }, { - "epoch": 0.7738649262202043, + "epoch": 0.7727903879395845, "grad_norm": 0.0, - "learning_rate": 2.5643713106243762e-06, - "loss": 0.854, + "learning_rate": 2.587619545293547e-06, + "loss": 0.7837, "step": 27271 }, { - "epoch": 0.7738933030646992, + "epoch": 0.772818725381847, "grad_norm": 0.0, - "learning_rate": 2.5637567857463154e-06, - "loss": 0.8331, + "learning_rate": 2.5870035175806584e-06, + "loss": 0.7695, "step": 27272 }, { - "epoch": 0.7739216799091941, + "epoch": 0.7728470628241095, "grad_norm": 0.0, - "learning_rate": 2.5631423236825328e-06, - "loss": 0.7534, + "learning_rate": 2.586387552310191e-06, + "loss": 0.737, "step": 27273 }, { - "epoch": 0.773950056753689, + "epoch": 0.7728754002663719, "grad_norm": 0.0, - "learning_rate": 2.5625279244382206e-06, - "loss": 0.7828, + "learning_rate": 2.5857716494873343e-06, + "loss": 0.8598, "step": 27274 }, { - "epoch": 0.7739784335981839, + "epoch": 0.7729037377086344, "grad_norm": 0.0, - "learning_rate": 2.561913588018573e-06, - "loss": 0.8203, + "learning_rate": 2.5851558091172767e-06, + "loss": 0.7751, "step": 27275 }, { - "epoch": 0.7740068104426788, + "epoch": 0.7729320751508969, "grad_norm": 0.0, - "learning_rate": 2.5612993144287725e-06, - "loss": 0.7902, + "learning_rate": 2.5845400312052073e-06, + "loss": 0.8211, "step": 27276 }, { - "epoch": 0.7740351872871737, + "epoch": 0.7729604125931594, "grad_norm": 0.0, - "learning_rate": 2.560685103674013e-06, - "loss": 0.8714, + "learning_rate": 2.5839243157563087e-06, + "loss": 0.872, "step": 27277 }, { - "epoch": 0.7740635641316685, + "epoch": 0.7729887500354218, "grad_norm": 0.0, - "learning_rate": 2.5600709557594794e-06, - "loss": 0.7766, + "learning_rate": 2.5833086627757684e-06, + "loss": 0.7363, "step": 27278 }, { - "epoch": 0.7740919409761634, + "epoch": 0.7730170874776843, "grad_norm": 0.0, - "learning_rate": 2.55945687069036e-06, - "loss": 0.8669, + "learning_rate": 2.582693072268778e-06, + "loss": 0.7525, "step": 27279 }, { - "epoch": 0.7741203178206584, + "epoch": 0.7730454249199468, "grad_norm": 0.0, - "learning_rate": 2.5588428484718464e-06, - "loss": 0.7491, + "learning_rate": 2.5820775442405142e-06, + "loss": 0.7977, "step": 27280 }, { - "epoch": 0.7741486946651532, + "epoch": 0.7730737623622091, "grad_norm": 0.0, - "learning_rate": 2.558228889109119e-06, - "loss": 0.797, + "learning_rate": 2.5814620786961654e-06, + "loss": 0.6359, "step": 27281 }, { - "epoch": 0.7741770715096481, + "epoch": 0.7731020998044716, "grad_norm": 0.0, - "learning_rate": 2.557614992607366e-06, - "loss": 0.8979, + "learning_rate": 2.58084667564092e-06, + "loss": 0.8377, "step": 27282 }, { - "epoch": 0.7742054483541431, + "epoch": 0.7731304372467341, "grad_norm": 0.0, - "learning_rate": 2.5570011589717768e-06, - "loss": 0.8578, + "learning_rate": 2.580231335079956e-06, + "loss": 0.794, "step": 27283 }, { - "epoch": 0.7742338251986379, + "epoch": 0.7731587746889965, "grad_norm": 0.0, - "learning_rate": 2.556387388207531e-06, - "loss": 0.9291, + "learning_rate": 2.57961605701846e-06, + "loss": 0.7228, "step": 27284 }, { - "epoch": 0.7742622020431328, + "epoch": 0.773187112131259, "grad_norm": 0.0, - "learning_rate": 2.555773680319815e-06, - "loss": 0.9015, + "learning_rate": 2.5790008414616107e-06, + "loss": 0.8378, "step": 27285 }, { - "epoch": 0.7742905788876276, + "epoch": 0.7732154495735215, "grad_norm": 0.0, - "learning_rate": 2.555160035313814e-06, - "loss": 0.8319, + "learning_rate": 2.578385688414594e-06, + "loss": 0.8221, "step": 27286 }, { - "epoch": 0.7743189557321226, + "epoch": 0.773243787015784, "grad_norm": 0.0, - "learning_rate": 2.5545464531947097e-06, - "loss": 0.8338, + "learning_rate": 2.5777705978825894e-06, + "loss": 0.8249, "step": 27287 }, { - "epoch": 0.7743473325766175, + "epoch": 0.7732721244580464, "grad_norm": 0.0, - "learning_rate": 2.5539329339676866e-06, - "loss": 0.8144, + "learning_rate": 2.5771555698707805e-06, + "loss": 0.7414, "step": 27288 }, { - "epoch": 0.7743757094211123, + "epoch": 0.7733004619003089, "grad_norm": 0.0, - "learning_rate": 2.55331947763793e-06, - "loss": 0.8762, + "learning_rate": 2.576540604384349e-06, + "loss": 0.8325, "step": 27289 }, { - "epoch": 0.7744040862656073, + "epoch": 0.7733287993425714, "grad_norm": 0.0, - "learning_rate": 2.5527060842106154e-06, - "loss": 0.9209, + "learning_rate": 2.575925701428469e-06, + "loss": 0.8379, "step": 27290 }, { - "epoch": 0.7744324631101022, + "epoch": 0.7733571367848338, "grad_norm": 0.0, - "learning_rate": 2.5520927536909277e-06, - "loss": 0.7883, + "learning_rate": 2.5753108610083243e-06, + "loss": 0.7365, "step": 27291 }, { - "epoch": 0.774460839954597, + "epoch": 0.7733854742270962, "grad_norm": 0.0, - "learning_rate": 2.5514794860840498e-06, - "loss": 0.8117, + "learning_rate": 2.5746960831290967e-06, + "loss": 0.7921, "step": 27292 }, { - "epoch": 0.774489216799092, + "epoch": 0.7734138116693587, "grad_norm": 0.0, - "learning_rate": 2.550866281395157e-06, - "loss": 0.97, + "learning_rate": 2.5740813677959576e-06, + "loss": 0.8515, "step": 27293 }, { - "epoch": 0.7745175936435869, + "epoch": 0.7734421491116212, "grad_norm": 0.0, - "learning_rate": 2.5502531396294318e-06, - "loss": 0.8203, + "learning_rate": 2.573466715014089e-06, + "loss": 0.7863, "step": 27294 }, { - "epoch": 0.7745459704880817, + "epoch": 0.7734704865538836, "grad_norm": 0.0, - "learning_rate": 2.5496400607920556e-06, - "loss": 0.9292, + "learning_rate": 2.572852124788672e-06, + "loss": 0.7498, "step": 27295 }, { - "epoch": 0.7745743473325766, + "epoch": 0.7734988239961461, "grad_norm": 0.0, - "learning_rate": 2.5490270448882016e-06, - "loss": 0.8009, + "learning_rate": 2.5722375971248747e-06, + "loss": 0.8651, "step": 27296 }, { - "epoch": 0.7746027241770715, + "epoch": 0.7735271614384086, "grad_norm": 0.0, - "learning_rate": 2.5484140919230516e-06, - "loss": 0.8, + "learning_rate": 2.5716231320278794e-06, + "loss": 0.7852, "step": 27297 }, { - "epoch": 0.7746311010215664, + "epoch": 0.773555498880671, "grad_norm": 0.0, - "learning_rate": 2.547801201901785e-06, - "loss": 0.8591, + "learning_rate": 2.571008729502861e-06, + "loss": 0.8303, "step": 27298 }, { - "epoch": 0.7746594778660613, + "epoch": 0.7735838363229335, "grad_norm": 0.0, - "learning_rate": 2.5471883748295747e-06, - "loss": 0.7981, + "learning_rate": 2.5703943895549974e-06, + "loss": 0.7635, "step": 27299 }, { - "epoch": 0.7746878547105562, + "epoch": 0.773612173765196, "grad_norm": 0.0, - "learning_rate": 2.546575610711598e-06, - "loss": 0.7997, + "learning_rate": 2.569780112189458e-06, + "loss": 0.7582, "step": 27300 }, { - "epoch": 0.7747162315550511, + "epoch": 0.7736405112074585, "grad_norm": 0.0, - "learning_rate": 2.5459629095530336e-06, - "loss": 0.756, + "learning_rate": 2.5691658974114197e-06, + "loss": 0.872, "step": 27301 }, { - "epoch": 0.774744608399546, + "epoch": 0.7736688486497209, "grad_norm": 0.0, - "learning_rate": 2.545350271359055e-06, - "loss": 0.7789, + "learning_rate": 2.5685517452260566e-06, + "loss": 0.8386, "step": 27302 }, { - "epoch": 0.7747729852440408, + "epoch": 0.7736971860919833, "grad_norm": 0.0, - "learning_rate": 2.5447376961348404e-06, - "loss": 0.7964, + "learning_rate": 2.567937655638545e-06, + "loss": 0.8004, "step": 27303 }, { - "epoch": 0.7748013620885358, + "epoch": 0.7737255235342458, "grad_norm": 0.0, - "learning_rate": 2.5441251838855584e-06, - "loss": 0.8704, + "learning_rate": 2.5673236286540506e-06, + "loss": 0.7637, "step": 27304 }, { - "epoch": 0.7748297389330306, + "epoch": 0.7737538609765082, "grad_norm": 0.0, - "learning_rate": 2.543512734616387e-06, - "loss": 0.861, + "learning_rate": 2.566709664277752e-06, + "loss": 0.759, "step": 27305 }, { - "epoch": 0.7748581157775255, + "epoch": 0.7737821984187707, "grad_norm": 0.0, - "learning_rate": 2.542900348332501e-06, - "loss": 0.7905, + "learning_rate": 2.5660957625148164e-06, + "loss": 0.7845, "step": 27306 }, { - "epoch": 0.7748864926220205, + "epoch": 0.7738105358610332, "grad_norm": 0.0, - "learning_rate": 2.542288025039068e-06, - "loss": 0.8271, + "learning_rate": 2.5654819233704164e-06, + "loss": 0.7842, "step": 27307 }, { - "epoch": 0.7749148694665153, + "epoch": 0.7738388733032956, "grad_norm": 0.0, - "learning_rate": 2.541675764741264e-06, - "loss": 0.8649, + "learning_rate": 2.564868146849726e-06, + "loss": 0.6569, "step": 27308 }, { - "epoch": 0.7749432463110102, + "epoch": 0.7738672107455581, "grad_norm": 0.0, - "learning_rate": 2.5410635674442618e-06, - "loss": 0.7948, + "learning_rate": 2.5642544329579088e-06, + "loss": 0.8211, "step": 27309 }, { - "epoch": 0.7749716231555052, + "epoch": 0.7738955481878206, "grad_norm": 0.0, - "learning_rate": 2.540451433153229e-06, - "loss": 0.8147, + "learning_rate": 2.5636407817001374e-06, + "loss": 0.8884, "step": 27310 }, { - "epoch": 0.775, + "epoch": 0.7739238856300831, "grad_norm": 0.0, - "learning_rate": 2.5398393618733386e-06, - "loss": 0.7438, + "learning_rate": 2.563027193081582e-06, + "loss": 0.789, "step": 27311 }, { - "epoch": 0.7750283768444949, + "epoch": 0.7739522230723455, "grad_norm": 0.0, - "learning_rate": 2.5392273536097623e-06, - "loss": 0.8601, + "learning_rate": 2.5624136671074096e-06, + "loss": 0.7077, "step": 27312 }, { - "epoch": 0.7750567536889897, + "epoch": 0.773980560514608, "grad_norm": 0.0, - "learning_rate": 2.5386154083676662e-06, - "loss": 0.7029, + "learning_rate": 2.5618002037827916e-06, + "loss": 0.8411, "step": 27313 }, { - "epoch": 0.7750851305334847, + "epoch": 0.7740088979568704, "grad_norm": 0.0, - "learning_rate": 2.5380035261522207e-06, - "loss": 0.8181, + "learning_rate": 2.5611868031128894e-06, + "loss": 0.8574, "step": 27314 }, { - "epoch": 0.7751135073779796, + "epoch": 0.7740372353991328, "grad_norm": 0.0, - "learning_rate": 2.5373917069685972e-06, - "loss": 0.9202, + "learning_rate": 2.5605734651028737e-06, + "loss": 0.8324, "step": 27315 }, { - "epoch": 0.7751418842224744, + "epoch": 0.7740655728413953, "grad_norm": 0.0, - "learning_rate": 2.536779950821956e-06, - "loss": 0.8121, + "learning_rate": 2.5599601897579128e-06, + "loss": 0.801, "step": 27316 }, { - "epoch": 0.7751702610669694, + "epoch": 0.7740939102836578, "grad_norm": 0.0, - "learning_rate": 2.536168257717476e-06, - "loss": 0.8805, + "learning_rate": 2.559346977083168e-06, + "loss": 1.0109, "step": 27317 }, { - "epoch": 0.7751986379114643, + "epoch": 0.7741222477259203, "grad_norm": 0.0, - "learning_rate": 2.535556627660315e-06, - "loss": 0.7292, + "learning_rate": 2.558733827083809e-06, + "loss": 0.8192, "step": 27318 }, { - "epoch": 0.7752270147559591, + "epoch": 0.7741505851681827, "grad_norm": 0.0, - "learning_rate": 2.5349450606556425e-06, - "loss": 0.8135, + "learning_rate": 2.5581207397649953e-06, + "loss": 0.7735, "step": 27319 }, { - "epoch": 0.775255391600454, + "epoch": 0.7741789226104452, "grad_norm": 0.0, - "learning_rate": 2.534333556708628e-06, - "loss": 0.8583, + "learning_rate": 2.557507715131894e-06, + "loss": 0.8659, "step": 27320 }, { - "epoch": 0.775283768444949, + "epoch": 0.7742072600527077, "grad_norm": 0.0, - "learning_rate": 2.5337221158244306e-06, - "loss": 0.724, + "learning_rate": 2.5568947531896714e-06, + "loss": 0.7072, "step": 27321 }, { - "epoch": 0.7753121452894438, + "epoch": 0.7742355974949701, "grad_norm": 0.0, - "learning_rate": 2.533110738008219e-06, - "loss": 0.7728, + "learning_rate": 2.5562818539434864e-06, + "loss": 0.8158, "step": 27322 }, { - "epoch": 0.7753405221339387, + "epoch": 0.7742639349372326, "grad_norm": 0.0, - "learning_rate": 2.532499423265159e-06, - "loss": 0.8618, + "learning_rate": 2.555669017398502e-06, + "loss": 0.7994, "step": 27323 }, { - "epoch": 0.7753688989784336, + "epoch": 0.774292272379495, "grad_norm": 0.0, - "learning_rate": 2.5318881716004085e-06, - "loss": 0.7449, + "learning_rate": 2.5550562435598834e-06, + "loss": 0.7058, "step": 27324 }, { - "epoch": 0.7753972758229285, + "epoch": 0.7743206098217574, "grad_norm": 0.0, - "learning_rate": 2.5312769830191343e-06, - "loss": 0.9393, + "learning_rate": 2.5544435324327898e-06, + "loss": 0.7987, "step": 27325 }, { - "epoch": 0.7754256526674234, + "epoch": 0.7743489472640199, "grad_norm": 0.0, - "learning_rate": 2.530665857526503e-06, - "loss": 0.8293, + "learning_rate": 2.5538308840223856e-06, + "loss": 0.8268, "step": 27326 }, { - "epoch": 0.7754540295119183, + "epoch": 0.7743772847062824, "grad_norm": 0.0, - "learning_rate": 2.5300547951276688e-06, - "loss": 0.7575, + "learning_rate": 2.553218298333827e-06, + "loss": 0.8448, "step": 27327 }, { - "epoch": 0.7754824063564132, + "epoch": 0.7744056221485449, "grad_norm": 0.0, - "learning_rate": 2.5294437958277985e-06, - "loss": 0.9161, + "learning_rate": 2.5526057753722754e-06, + "loss": 0.6967, "step": 27328 }, { - "epoch": 0.775510783200908, + "epoch": 0.7744339595908073, "grad_norm": 0.0, - "learning_rate": 2.5288328596320557e-06, - "loss": 0.8569, + "learning_rate": 2.5519933151428943e-06, + "loss": 0.7997, "step": 27329 }, { - "epoch": 0.7755391600454029, + "epoch": 0.7744622970330698, "grad_norm": 0.0, - "learning_rate": 2.5282219865455935e-06, - "loss": 0.8695, + "learning_rate": 2.5513809176508364e-06, + "loss": 0.7656, "step": 27330 }, { - "epoch": 0.7755675368898979, + "epoch": 0.7744906344753323, "grad_norm": 0.0, - "learning_rate": 2.527611176573577e-06, - "loss": 0.9368, + "learning_rate": 2.5507685829012616e-06, + "loss": 0.8837, "step": 27331 }, { - "epoch": 0.7755959137343927, + "epoch": 0.7745189719175947, "grad_norm": 0.0, - "learning_rate": 2.5270004297211637e-06, - "loss": 0.7476, + "learning_rate": 2.5501563108993346e-06, + "loss": 0.6979, "step": 27332 }, { - "epoch": 0.7756242905788876, + "epoch": 0.7745473093598572, "grad_norm": 0.0, - "learning_rate": 2.5263897459935138e-06, - "loss": 0.9504, + "learning_rate": 2.549544101650202e-06, + "loss": 0.7846, "step": 27333 }, { - "epoch": 0.7756526674233826, + "epoch": 0.7745756468021197, "grad_norm": 0.0, - "learning_rate": 2.5257791253957897e-06, - "loss": 0.7202, + "learning_rate": 2.5489319551590307e-06, + "loss": 0.7956, "step": 27334 }, { - "epoch": 0.7756810442678774, + "epoch": 0.7746039842443821, "grad_norm": 0.0, - "learning_rate": 2.525168567933142e-06, - "loss": 0.7977, + "learning_rate": 2.5483198714309687e-06, + "loss": 0.8527, "step": 27335 }, { - "epoch": 0.7757094211123723, + "epoch": 0.7746323216866445, "grad_norm": 0.0, - "learning_rate": 2.5245580736107313e-06, - "loss": 0.8791, + "learning_rate": 2.547707850471176e-06, + "loss": 0.7409, "step": 27336 }, { - "epoch": 0.7757377979568671, + "epoch": 0.774660659128907, "grad_norm": 0.0, - "learning_rate": 2.523947642433717e-06, - "loss": 0.8145, + "learning_rate": 2.5470958922848064e-06, + "loss": 0.7998, "step": 27337 }, { - "epoch": 0.7757661748013621, + "epoch": 0.7746889965711695, "grad_norm": 0.0, - "learning_rate": 2.5233372744072504e-06, - "loss": 0.8265, + "learning_rate": 2.546483996877017e-06, + "loss": 0.787, "step": 27338 }, { - "epoch": 0.775794551645857, + "epoch": 0.7747173340134319, "grad_norm": 0.0, - "learning_rate": 2.522726969536491e-06, - "loss": 0.7919, + "learning_rate": 2.5458721642529637e-06, + "loss": 0.7489, "step": 27339 }, { - "epoch": 0.7758229284903518, + "epoch": 0.7747456714556944, "grad_norm": 0.0, - "learning_rate": 2.5221167278265946e-06, - "loss": 0.9254, + "learning_rate": 2.545260394417793e-06, + "loss": 0.7974, "step": 27340 }, { - "epoch": 0.7758513053348468, + "epoch": 0.7747740088979569, "grad_norm": 0.0, - "learning_rate": 2.521506549282712e-06, - "loss": 0.7724, + "learning_rate": 2.544648687376663e-06, + "loss": 0.9048, "step": 27341 }, { - "epoch": 0.7758796821793417, + "epoch": 0.7748023463402194, "grad_norm": 0.0, - "learning_rate": 2.5208964339099996e-06, - "loss": 0.884, + "learning_rate": 2.544037043134728e-06, + "loss": 0.7188, "step": 27342 }, { - "epoch": 0.7759080590238365, + "epoch": 0.7748306837824818, "grad_norm": 0.0, - "learning_rate": 2.5202863817136137e-06, - "loss": 0.7358, + "learning_rate": 2.5434254616971356e-06, + "loss": 0.7513, "step": 27343 }, { - "epoch": 0.7759364358683314, + "epoch": 0.7748590212247443, "grad_norm": 0.0, - "learning_rate": 2.519676392698703e-06, - "loss": 0.8664, + "learning_rate": 2.542813943069039e-06, + "loss": 0.785, "step": 27344 }, { - "epoch": 0.7759648127128264, + "epoch": 0.7748873586670068, "grad_norm": 0.0, - "learning_rate": 2.519066466870421e-06, - "loss": 0.7854, + "learning_rate": 2.542202487255593e-06, + "loss": 0.8709, "step": 27345 }, { - "epoch": 0.7759931895573212, + "epoch": 0.7749156961092691, "grad_norm": 0.0, - "learning_rate": 2.5184566042339253e-06, - "loss": 0.8029, + "learning_rate": 2.5415910942619416e-06, + "loss": 0.7994, "step": 27346 }, { - "epoch": 0.7760215664018161, + "epoch": 0.7749440335515316, "grad_norm": 0.0, - "learning_rate": 2.517846804794356e-06, - "loss": 0.9194, + "learning_rate": 2.540979764093241e-06, + "loss": 0.8105, "step": 27347 }, { - "epoch": 0.776049943246311, + "epoch": 0.7749723709937941, "grad_norm": 0.0, - "learning_rate": 2.517237068556877e-06, - "loss": 0.8403, + "learning_rate": 2.540368496754634e-06, + "loss": 0.8225, "step": 27348 }, { - "epoch": 0.7760783200908059, + "epoch": 0.7750007084360565, "grad_norm": 0.0, - "learning_rate": 2.5166273955266297e-06, - "loss": 0.8285, + "learning_rate": 2.5397572922512735e-06, + "loss": 0.7648, "step": 27349 }, { - "epoch": 0.7761066969353008, + "epoch": 0.775029045878319, "grad_norm": 0.0, - "learning_rate": 2.5160177857087677e-06, - "loss": 0.7669, + "learning_rate": 2.5391461505883087e-06, + "loss": 0.8256, "step": 27350 }, { - "epoch": 0.7761350737797957, + "epoch": 0.7750573833205815, "grad_norm": 0.0, - "learning_rate": 2.5154082391084435e-06, - "loss": 0.8416, + "learning_rate": 2.5385350717708857e-06, + "loss": 0.7415, "step": 27351 }, { - "epoch": 0.7761634506242906, + "epoch": 0.775085720762844, "grad_norm": 0.0, - "learning_rate": 2.5147987557307983e-06, - "loss": 0.8742, + "learning_rate": 2.5379240558041527e-06, + "loss": 0.8433, "step": 27352 }, { - "epoch": 0.7761918274687855, + "epoch": 0.7751140582051064, "grad_norm": 0.0, - "learning_rate": 2.5141893355809845e-06, - "loss": 0.801, + "learning_rate": 2.5373131026932585e-06, + "loss": 0.7924, "step": 27353 }, { - "epoch": 0.7762202043132803, + "epoch": 0.7751423956473689, "grad_norm": 0.0, - "learning_rate": 2.5135799786641534e-06, - "loss": 0.7745, + "learning_rate": 2.536702212443345e-06, + "loss": 0.8972, "step": 27354 }, { - "epoch": 0.7762485811577753, + "epoch": 0.7751707330896314, "grad_norm": 0.0, - "learning_rate": 2.512970684985445e-06, - "loss": 0.6505, + "learning_rate": 2.5360913850595635e-06, + "loss": 0.8043, "step": 27355 }, { - "epoch": 0.7762769580022701, + "epoch": 0.7751990705318937, "grad_norm": 0.0, - "learning_rate": 2.5123614545500107e-06, - "loss": 0.7181, + "learning_rate": 2.535480620547053e-06, + "loss": 0.8488, "step": 27356 }, { - "epoch": 0.776305334846765, + "epoch": 0.7752274079741562, "grad_norm": 0.0, - "learning_rate": 2.5117522873629986e-06, - "loss": 0.8577, + "learning_rate": 2.5348699189109606e-06, + "loss": 0.8779, "step": 27357 }, { - "epoch": 0.77633371169126, + "epoch": 0.7752557454164187, "grad_norm": 0.0, - "learning_rate": 2.5111431834295486e-06, - "loss": 0.8782, + "learning_rate": 2.5342592801564336e-06, + "loss": 0.7386, "step": 27358 }, { - "epoch": 0.7763620885357548, + "epoch": 0.7752840828586812, "grad_norm": 0.0, - "learning_rate": 2.510534142754808e-06, - "loss": 0.7754, + "learning_rate": 2.5336487042886106e-06, + "loss": 0.7924, "step": 27359 }, { - "epoch": 0.7763904653802497, + "epoch": 0.7753124203009436, "grad_norm": 0.0, - "learning_rate": 2.509925165343926e-06, - "loss": 0.7737, + "learning_rate": 2.5330381913126368e-06, + "loss": 0.7841, "step": 27360 }, { - "epoch": 0.7764188422247446, + "epoch": 0.7753407577432061, "grad_norm": 0.0, - "learning_rate": 2.50931625120204e-06, - "loss": 0.7109, + "learning_rate": 2.5324277412336585e-06, + "loss": 0.6884, "step": 27361 }, { - "epoch": 0.7764472190692395, + "epoch": 0.7753690951854686, "grad_norm": 0.0, - "learning_rate": 2.508707400334296e-06, - "loss": 0.8688, + "learning_rate": 2.53181735405681e-06, + "loss": 0.8085, "step": 27362 }, { - "epoch": 0.7764755959137344, + "epoch": 0.775397432627731, "grad_norm": 0.0, - "learning_rate": 2.5080986127458373e-06, - "loss": 0.7428, + "learning_rate": 2.531207029787239e-06, + "loss": 0.8027, "step": 27363 }, { - "epoch": 0.7765039727582292, + "epoch": 0.7754257700699935, "grad_norm": 0.0, - "learning_rate": 2.5074898884418063e-06, - "loss": 0.8352, + "learning_rate": 2.5305967684300836e-06, + "loss": 0.7956, "step": 27364 }, { - "epoch": 0.7765323496027242, + "epoch": 0.775454107512256, "grad_norm": 0.0, - "learning_rate": 2.506881227427348e-06, - "loss": 0.8994, + "learning_rate": 2.529986569990486e-06, + "loss": 0.7408, "step": 27365 }, { - "epoch": 0.7765607264472191, + "epoch": 0.7754824449545185, "grad_norm": 0.0, - "learning_rate": 2.506272629707598e-06, - "loss": 0.9462, + "learning_rate": 2.529376434473588e-06, + "loss": 0.8527, "step": 27366 }, { - "epoch": 0.7765891032917139, + "epoch": 0.7755107823967808, "grad_norm": 0.0, - "learning_rate": 2.5056640952876997e-06, - "loss": 0.8214, + "learning_rate": 2.5287663618845236e-06, + "loss": 0.8809, "step": 27367 }, { - "epoch": 0.7766174801362089, + "epoch": 0.7755391198390433, "grad_norm": 0.0, - "learning_rate": 2.5050556241727963e-06, - "loss": 1.015, + "learning_rate": 2.528156352228437e-06, + "loss": 0.801, "step": 27368 }, { - "epoch": 0.7766458569807038, + "epoch": 0.7755674572813058, "grad_norm": 0.0, - "learning_rate": 2.5044472163680234e-06, - "loss": 0.8439, + "learning_rate": 2.5275464055104615e-06, + "loss": 0.8192, "step": 27369 }, { - "epoch": 0.7766742338251986, + "epoch": 0.7755957947235682, "grad_norm": 0.0, - "learning_rate": 2.50383887187852e-06, - "loss": 0.8589, + "learning_rate": 2.5269365217357376e-06, + "loss": 0.8581, "step": 27370 }, { - "epoch": 0.7767026106696935, + "epoch": 0.7756241321658307, "grad_norm": 0.0, - "learning_rate": 2.503230590709431e-06, - "loss": 0.8269, + "learning_rate": 2.526326700909405e-06, + "loss": 0.8395, "step": 27371 }, { - "epoch": 0.7767309875141885, + "epoch": 0.7756524696080932, "grad_norm": 0.0, - "learning_rate": 2.5026223728658873e-06, - "loss": 0.8698, + "learning_rate": 2.5257169430365956e-06, + "loss": 0.7807, "step": 27372 }, { - "epoch": 0.7767593643586833, + "epoch": 0.7756808070503556, "grad_norm": 0.0, - "learning_rate": 2.502014218353028e-06, - "loss": 0.8612, + "learning_rate": 2.525107248122447e-06, + "loss": 0.7753, "step": 27373 }, { - "epoch": 0.7767877412031782, + "epoch": 0.7757091444926181, "grad_norm": 0.0, - "learning_rate": 2.501406127175996e-06, - "loss": 0.828, + "learning_rate": 2.5244976161720993e-06, + "loss": 0.8293, "step": 27374 }, { - "epoch": 0.7768161180476731, + "epoch": 0.7757374819348806, "grad_norm": 0.0, - "learning_rate": 2.5007980993399197e-06, - "loss": 0.8826, + "learning_rate": 2.523888047190681e-06, + "loss": 0.813, "step": 27375 }, { - "epoch": 0.776844494892168, + "epoch": 0.7757658193771431, "grad_norm": 0.0, - "learning_rate": 2.500190134849939e-06, - "loss": 0.7339, + "learning_rate": 2.5232785411833306e-06, + "loss": 0.743, "step": 27376 }, { - "epoch": 0.7768728717366629, + "epoch": 0.7757941568194054, "grad_norm": 0.0, - "learning_rate": 2.499582233711193e-06, - "loss": 0.7319, + "learning_rate": 2.5226690981551806e-06, + "loss": 0.7951, "step": 27377 }, { - "epoch": 0.7769012485811577, + "epoch": 0.7758224942616679, "grad_norm": 0.0, - "learning_rate": 2.498974395928807e-06, - "loss": 0.8377, + "learning_rate": 2.522059718111366e-06, + "loss": 0.7457, "step": 27378 }, { - "epoch": 0.7769296254256527, + "epoch": 0.7758508317039304, "grad_norm": 0.0, - "learning_rate": 2.498366621507925e-06, - "loss": 0.8913, + "learning_rate": 2.5214504010570217e-06, + "loss": 0.8579, "step": 27379 }, { - "epoch": 0.7769580022701476, + "epoch": 0.7758791691461928, "grad_norm": 0.0, - "learning_rate": 2.497758910453679e-06, - "loss": 0.8461, + "learning_rate": 2.520841146997275e-06, + "loss": 0.8159, "step": 27380 }, { - "epoch": 0.7769863791146424, + "epoch": 0.7759075065884553, "grad_norm": 0.0, - "learning_rate": 2.497151262771198e-06, - "loss": 0.8152, + "learning_rate": 2.52023195593726e-06, + "loss": 0.7419, "step": 27381 }, { - "epoch": 0.7770147559591374, + "epoch": 0.7759358440307178, "grad_norm": 0.0, - "learning_rate": 2.4965436784656182e-06, - "loss": 0.8712, + "learning_rate": 2.5196228278821123e-06, + "loss": 0.8276, "step": 27382 }, { - "epoch": 0.7770431328036322, + "epoch": 0.7759641814729803, "grad_norm": 0.0, - "learning_rate": 2.495936157542074e-06, - "loss": 0.8122, + "learning_rate": 2.519013762836957e-06, + "loss": 0.892, "step": 27383 }, { - "epoch": 0.7770715096481271, + "epoch": 0.7759925189152427, "grad_norm": 0.0, - "learning_rate": 2.495328700005691e-06, - "loss": 0.8625, + "learning_rate": 2.5184047608069283e-06, + "loss": 0.8462, "step": 27384 }, { - "epoch": 0.7770998864926221, + "epoch": 0.7760208563575052, "grad_norm": 0.0, - "learning_rate": 2.494721305861607e-06, - "loss": 0.8951, + "learning_rate": 2.517795821797153e-06, + "loss": 0.8279, "step": 27385 }, { - "epoch": 0.7771282633371169, + "epoch": 0.7760491937997677, "grad_norm": 0.0, - "learning_rate": 2.494113975114947e-06, - "loss": 0.7995, + "learning_rate": 2.5171869458127605e-06, + "loss": 0.8799, "step": 27386 }, { - "epoch": 0.7771566401816118, + "epoch": 0.77607753124203, "grad_norm": 0.0, - "learning_rate": 2.4935067077708443e-06, - "loss": 0.7566, + "learning_rate": 2.5165781328588855e-06, + "loss": 0.8706, "step": 27387 }, { - "epoch": 0.7771850170261067, + "epoch": 0.7761058686842925, "grad_norm": 0.0, - "learning_rate": 2.4928995038344294e-06, - "loss": 0.7663, + "learning_rate": 2.5159693829406485e-06, + "loss": 0.7798, "step": 27388 }, { - "epoch": 0.7772133938706016, + "epoch": 0.776134206126555, "grad_norm": 0.0, - "learning_rate": 2.4922923633108274e-06, - "loss": 0.8538, + "learning_rate": 2.515360696063179e-06, + "loss": 0.7584, "step": 27389 }, { - "epoch": 0.7772417707150965, + "epoch": 0.7761625435688175, "grad_norm": 0.0, - "learning_rate": 2.4916852862051687e-06, - "loss": 0.8003, + "learning_rate": 2.514752072231608e-06, + "loss": 0.7691, "step": 27390 }, { - "epoch": 0.7772701475595913, + "epoch": 0.7761908810110799, "grad_norm": 0.0, - "learning_rate": 2.4910782725225856e-06, - "loss": 0.8904, + "learning_rate": 2.5141435114510583e-06, + "loss": 0.828, "step": 27391 }, { - "epoch": 0.7772985244040863, + "epoch": 0.7762192184533424, "grad_norm": 0.0, - "learning_rate": 2.4904713222682e-06, - "loss": 0.6858, + "learning_rate": 2.513535013726661e-06, + "loss": 0.8008, "step": 27392 }, { - "epoch": 0.7773269012485812, + "epoch": 0.7762475558956049, "grad_norm": 0.0, - "learning_rate": 2.489864435447136e-06, - "loss": 0.8552, + "learning_rate": 2.5129265790635347e-06, + "loss": 0.8054, "step": 27393 }, { - "epoch": 0.777355278093076, + "epoch": 0.7762758933378673, "grad_norm": 0.0, - "learning_rate": 2.489257612064531e-06, - "loss": 0.7856, + "learning_rate": 2.5123182074668098e-06, + "loss": 0.7901, "step": 27394 }, { - "epoch": 0.7773836549375709, + "epoch": 0.7763042307801298, "grad_norm": 0.0, - "learning_rate": 2.4886508521255015e-06, - "loss": 0.7894, + "learning_rate": 2.51170989894161e-06, + "loss": 0.7754, "step": 27395 }, { - "epoch": 0.7774120317820659, + "epoch": 0.7763325682223923, "grad_norm": 0.0, - "learning_rate": 2.4880441556351764e-06, - "loss": 0.862, + "learning_rate": 2.5111016534930565e-06, + "loss": 0.755, "step": 27396 }, { - "epoch": 0.7774404086265607, + "epoch": 0.7763609056646547, "grad_norm": 0.0, - "learning_rate": 2.4874375225986834e-06, - "loss": 0.8119, + "learning_rate": 2.5104934711262774e-06, + "loss": 0.8028, "step": 27397 }, { - "epoch": 0.7774687854710556, + "epoch": 0.7763892431069171, "grad_norm": 0.0, - "learning_rate": 2.48683095302114e-06, - "loss": 0.847, + "learning_rate": 2.5098853518463907e-06, + "loss": 0.87, "step": 27398 }, { - "epoch": 0.7774971623155505, + "epoch": 0.7764175805491796, "grad_norm": 0.0, - "learning_rate": 2.4862244469076724e-06, - "loss": 0.7478, + "learning_rate": 2.509277295658521e-06, + "loss": 0.7579, "step": 27399 }, { - "epoch": 0.7775255391600454, + "epoch": 0.7764459179914421, "grad_norm": 0.0, - "learning_rate": 2.4856180042634082e-06, - "loss": 0.842, + "learning_rate": 2.508669302567792e-06, + "loss": 0.7911, "step": 27400 }, { - "epoch": 0.7775539160045403, + "epoch": 0.7764742554337045, "grad_norm": 0.0, - "learning_rate": 2.4850116250934642e-06, - "loss": 0.7153, + "learning_rate": 2.5080613725793212e-06, + "loss": 0.8052, "step": 27401 }, { - "epoch": 0.7775822928490352, + "epoch": 0.776502592875967, "grad_norm": 0.0, - "learning_rate": 2.484405309402964e-06, - "loss": 0.8568, + "learning_rate": 2.507453505698232e-06, + "loss": 0.7323, "step": 27402 }, { - "epoch": 0.7776106696935301, + "epoch": 0.7765309303182295, "grad_norm": 0.0, - "learning_rate": 2.483799057197034e-06, - "loss": 0.8461, + "learning_rate": 2.506845701929643e-06, + "loss": 0.7838, "step": 27403 }, { - "epoch": 0.777639046538025, + "epoch": 0.7765592677604919, "grad_norm": 0.0, - "learning_rate": 2.483192868480787e-06, - "loss": 0.8514, + "learning_rate": 2.5062379612786757e-06, + "loss": 0.7775, "step": 27404 }, { - "epoch": 0.7776674233825198, + "epoch": 0.7765876052027544, "grad_norm": 0.0, - "learning_rate": 2.482586743259349e-06, - "loss": 0.9535, + "learning_rate": 2.505630283750452e-06, + "loss": 0.7557, "step": 27405 }, { - "epoch": 0.7776958002270148, + "epoch": 0.7766159426450169, "grad_norm": 0.0, - "learning_rate": 2.48198068153784e-06, - "loss": 0.8294, + "learning_rate": 2.5050226693500843e-06, + "loss": 0.7914, "step": 27406 }, { - "epoch": 0.7777241770715096, + "epoch": 0.7766442800872794, "grad_norm": 0.0, - "learning_rate": 2.481374683321376e-06, - "loss": 0.8038, + "learning_rate": 2.5044151180826947e-06, + "loss": 0.8148, "step": 27407 }, { - "epoch": 0.7777525539160045, + "epoch": 0.7766726175295418, "grad_norm": 0.0, - "learning_rate": 2.480768748615079e-06, - "loss": 0.7553, + "learning_rate": 2.503807629953402e-06, + "loss": 0.717, "step": 27408 }, { - "epoch": 0.7777809307604995, + "epoch": 0.7767009549718042, "grad_norm": 0.0, - "learning_rate": 2.480162877424065e-06, - "loss": 0.8644, + "learning_rate": 2.5032002049673175e-06, + "loss": 0.7716, "step": 27409 }, { - "epoch": 0.7778093076049943, + "epoch": 0.7767292924140667, "grad_norm": 0.0, - "learning_rate": 2.4795570697534544e-06, - "loss": 0.7628, + "learning_rate": 2.5025928431295634e-06, + "loss": 0.8113, "step": 27410 }, { - "epoch": 0.7778376844494892, + "epoch": 0.7767576298563291, "grad_norm": 0.0, - "learning_rate": 2.478951325608365e-06, - "loss": 0.7887, + "learning_rate": 2.5019855444452556e-06, + "loss": 0.8386, "step": 27411 }, { - "epoch": 0.7778660612939841, + "epoch": 0.7767859672985916, "grad_norm": 0.0, - "learning_rate": 2.47834564499391e-06, - "loss": 0.8279, + "learning_rate": 2.5013783089195055e-06, + "loss": 0.8658, "step": 27412 }, { - "epoch": 0.777894438138479, + "epoch": 0.7768143047408541, "grad_norm": 0.0, - "learning_rate": 2.4777400279152064e-06, - "loss": 0.7864, + "learning_rate": 2.5007711365574326e-06, + "loss": 0.8289, "step": 27413 }, { - "epoch": 0.7779228149829739, + "epoch": 0.7768426421831166, "grad_norm": 0.0, - "learning_rate": 2.477134474377375e-06, - "loss": 0.9189, + "learning_rate": 2.500164027364147e-06, + "loss": 0.8303, "step": 27414 }, { - "epoch": 0.7779511918274687, + "epoch": 0.776870979625379, "grad_norm": 0.0, - "learning_rate": 2.4765289843855233e-06, - "loss": 0.8206, + "learning_rate": 2.499556981344764e-06, + "loss": 0.7456, "step": 27415 }, { - "epoch": 0.7779795686719637, + "epoch": 0.7768993170676415, "grad_norm": 0.0, - "learning_rate": 2.475923557944769e-06, - "loss": 0.8513, + "learning_rate": 2.4989499985043986e-06, + "loss": 0.8287, "step": 27416 }, { - "epoch": 0.7780079455164586, + "epoch": 0.776927654509904, "grad_norm": 0.0, - "learning_rate": 2.475318195060231e-06, - "loss": 0.7208, + "learning_rate": 2.498343078848162e-06, + "loss": 0.8696, "step": 27417 }, { - "epoch": 0.7780363223609534, + "epoch": 0.7769559919521664, "grad_norm": 0.0, - "learning_rate": 2.474712895737015e-06, - "loss": 0.8563, + "learning_rate": 2.4977362223811684e-06, + "loss": 0.7905, "step": 27418 }, { - "epoch": 0.7780646992054484, + "epoch": 0.7769843293944289, "grad_norm": 0.0, - "learning_rate": 2.474107659980236e-06, - "loss": 0.8654, + "learning_rate": 2.4971294291085313e-06, + "loss": 0.7772, "step": 27419 }, { - "epoch": 0.7780930760499433, + "epoch": 0.7770126668366913, "grad_norm": 0.0, - "learning_rate": 2.473502487795012e-06, - "loss": 0.767, + "learning_rate": 2.496522699035355e-06, + "loss": 0.7574, "step": 27420 }, { - "epoch": 0.7781214528944381, + "epoch": 0.7770410042789537, "grad_norm": 0.0, - "learning_rate": 2.4728973791864485e-06, - "loss": 0.8081, + "learning_rate": 2.4959160321667586e-06, + "loss": 0.812, "step": 27421 }, { - "epoch": 0.778149829738933, + "epoch": 0.7770693417212162, "grad_norm": 0.0, - "learning_rate": 2.4722923341596585e-06, - "loss": 0.8466, + "learning_rate": 2.495309428507844e-06, + "loss": 0.7974, "step": 27422 }, { - "epoch": 0.778178206583428, + "epoch": 0.7770976791634787, "grad_norm": 0.0, - "learning_rate": 2.471687352719756e-06, - "loss": 0.8413, + "learning_rate": 2.494702888063726e-06, + "loss": 0.7312, "step": 27423 }, { - "epoch": 0.7782065834279228, + "epoch": 0.7771260166057412, "grad_norm": 0.0, - "learning_rate": 2.471082434871844e-06, - "loss": 0.8693, + "learning_rate": 2.494096410839515e-06, + "loss": 0.7508, "step": 27424 }, { - "epoch": 0.7782349602724177, + "epoch": 0.7771543540480036, "grad_norm": 0.0, - "learning_rate": 2.4704775806210423e-06, - "loss": 0.8075, + "learning_rate": 2.4934899968403135e-06, + "loss": 0.7784, "step": 27425 }, { - "epoch": 0.7782633371169126, + "epoch": 0.7771826914902661, "grad_norm": 0.0, - "learning_rate": 2.469872789972453e-06, - "loss": 0.8655, + "learning_rate": 2.492883646071237e-06, + "loss": 0.8121, "step": 27426 }, { - "epoch": 0.7782917139614075, + "epoch": 0.7772110289325286, "grad_norm": 0.0, - "learning_rate": 2.469268062931186e-06, - "loss": 0.8369, + "learning_rate": 2.492277358537386e-06, + "loss": 0.831, "step": 27427 }, { - "epoch": 0.7783200908059024, + "epoch": 0.777239366374791, "grad_norm": 0.0, - "learning_rate": 2.4686633995023525e-06, - "loss": 0.8242, + "learning_rate": 2.4916711342438717e-06, + "loss": 0.8517, "step": 27428 }, { - "epoch": 0.7783484676503972, + "epoch": 0.7772677038170535, "grad_norm": 0.0, - "learning_rate": 2.4680587996910555e-06, - "loss": 0.9178, + "learning_rate": 2.491064973195798e-06, + "loss": 0.7989, "step": 27429 }, { - "epoch": 0.7783768444948922, + "epoch": 0.777296041259316, "grad_norm": 0.0, - "learning_rate": 2.4674542635024048e-06, + "learning_rate": 2.4904588753982738e-06, "loss": 0.8296, "step": 27430 }, { - "epoch": 0.778405221339387, + "epoch": 0.7773243787015784, "grad_norm": 0.0, - "learning_rate": 2.466849790941509e-06, - "loss": 0.8018, + "learning_rate": 2.4898528408564025e-06, + "loss": 0.8321, "step": 27431 }, { - "epoch": 0.7784335981838819, + "epoch": 0.7773527161438408, "grad_norm": 0.0, - "learning_rate": 2.466245382013469e-06, - "loss": 0.7868, + "learning_rate": 2.4892468695752924e-06, + "loss": 0.8116, "step": 27432 }, { - "epoch": 0.7784619750283769, + "epoch": 0.7773810535861033, "grad_norm": 0.0, - "learning_rate": 2.4656410367233928e-06, - "loss": 0.9031, + "learning_rate": 2.4886409615600425e-06, + "loss": 0.8293, "step": 27433 }, { - "epoch": 0.7784903518728717, + "epoch": 0.7774093910283658, "grad_norm": 0.0, - "learning_rate": 2.4650367550763877e-06, - "loss": 0.7445, + "learning_rate": 2.4880351168157614e-06, + "loss": 0.7636, "step": 27434 }, { - "epoch": 0.7785187287173666, + "epoch": 0.7774377284706282, "grad_norm": 0.0, - "learning_rate": 2.4644325370775533e-06, - "loss": 0.7609, + "learning_rate": 2.4874293353475477e-06, + "loss": 0.8172, "step": 27435 }, { - "epoch": 0.7785471055618616, + "epoch": 0.7774660659128907, "grad_norm": 0.0, - "learning_rate": 2.4638283827319964e-06, - "loss": 0.7868, + "learning_rate": 2.486823617160505e-06, + "loss": 0.8473, "step": 27436 }, { - "epoch": 0.7785754824063564, + "epoch": 0.7774944033551532, "grad_norm": 0.0, - "learning_rate": 2.4632242920448233e-06, - "loss": 0.7484, + "learning_rate": 2.4862179622597415e-06, + "loss": 0.8235, "step": 27437 }, { - "epoch": 0.7786038592508513, + "epoch": 0.7775227407974157, "grad_norm": 0.0, - "learning_rate": 2.4626202650211293e-06, - "loss": 0.8391, + "learning_rate": 2.4856123706503508e-06, + "loss": 0.7724, "step": 27438 }, { - "epoch": 0.7786322360953462, + "epoch": 0.7775510782396781, "grad_norm": 0.0, - "learning_rate": 2.4620163016660216e-06, - "loss": 0.7798, + "learning_rate": 2.4850068423374376e-06, + "loss": 0.8174, "step": 27439 }, { - "epoch": 0.7786606129398411, + "epoch": 0.7775794156819406, "grad_norm": 0.0, - "learning_rate": 2.4614124019846007e-06, - "loss": 0.9446, + "learning_rate": 2.4844013773261044e-06, + "loss": 0.828, "step": 27440 }, { - "epoch": 0.778688989784336, + "epoch": 0.777607753124203, "grad_norm": 0.0, - "learning_rate": 2.460808565981969e-06, - "loss": 0.7184, + "learning_rate": 2.483795975621448e-06, + "loss": 0.8001, "step": 27441 }, { - "epoch": 0.7787173666288308, + "epoch": 0.7776360905664654, "grad_norm": 0.0, - "learning_rate": 2.4602047936632277e-06, - "loss": 0.8201, + "learning_rate": 2.4831906372285676e-06, + "loss": 0.7811, "step": 27442 }, { - "epoch": 0.7787457434733258, + "epoch": 0.7776644280087279, "grad_norm": 0.0, - "learning_rate": 2.459601085033474e-06, - "loss": 0.8572, + "learning_rate": 2.482585362152564e-06, + "loss": 0.7292, "step": 27443 }, { - "epoch": 0.7787741203178207, + "epoch": 0.7776927654509904, "grad_norm": 0.0, - "learning_rate": 2.458997440097808e-06, - "loss": 0.7744, + "learning_rate": 2.4819801503985365e-06, + "loss": 0.812, "step": 27444 }, { - "epoch": 0.7788024971623155, + "epoch": 0.7777211028932528, "grad_norm": 0.0, - "learning_rate": 2.458393858861332e-06, - "loss": 0.7655, + "learning_rate": 2.4813750019715844e-06, + "loss": 0.8347, "step": 27445 }, { - "epoch": 0.7788308740068104, + "epoch": 0.7777494403355153, "grad_norm": 0.0, - "learning_rate": 2.4577903413291405e-06, - "loss": 0.8322, + "learning_rate": 2.4807699168767995e-06, + "loss": 0.779, "step": 27446 }, { - "epoch": 0.7788592508513054, + "epoch": 0.7777777777777778, "grad_norm": 0.0, - "learning_rate": 2.457186887506332e-06, - "loss": 0.932, + "learning_rate": 2.4801648951192815e-06, + "loss": 0.724, "step": 27447 }, { - "epoch": 0.7788876276958002, + "epoch": 0.7778061152200403, "grad_norm": 0.0, - "learning_rate": 2.4565834973980086e-06, - "loss": 0.7778, + "learning_rate": 2.47955993670413e-06, + "loss": 0.7924, "step": 27448 }, { - "epoch": 0.7789160045402951, + "epoch": 0.7778344526623027, "grad_norm": 0.0, - "learning_rate": 2.4559801710092602e-06, - "loss": 0.8215, + "learning_rate": 2.478955041636435e-06, + "loss": 0.8624, "step": 27449 }, { - "epoch": 0.77894438138479, + "epoch": 0.7778627901045652, "grad_norm": 0.0, - "learning_rate": 2.455376908345186e-06, - "loss": 0.7675, + "learning_rate": 2.4783502099212974e-06, + "loss": 0.7629, "step": 27450 }, { - "epoch": 0.7789727582292849, + "epoch": 0.7778911275468277, "grad_norm": 0.0, - "learning_rate": 2.454773709410886e-06, - "loss": 0.8405, + "learning_rate": 2.4777454415638067e-06, + "loss": 0.8789, "step": 27451 }, { - "epoch": 0.7790011350737798, + "epoch": 0.77791946498909, "grad_norm": 0.0, - "learning_rate": 2.4541705742114486e-06, - "loss": 0.8646, + "learning_rate": 2.477140736569059e-06, + "loss": 0.745, "step": 27452 }, { - "epoch": 0.7790295119182747, + "epoch": 0.7779478024313525, "grad_norm": 0.0, - "learning_rate": 2.453567502751971e-06, - "loss": 1.0068, + "learning_rate": 2.476536094942151e-06, + "loss": 0.7658, "step": 27453 }, { - "epoch": 0.7790578887627696, + "epoch": 0.777976139873615, "grad_norm": 0.0, - "learning_rate": 2.4529644950375474e-06, - "loss": 0.8542, + "learning_rate": 2.4759315166881713e-06, + "loss": 0.853, "step": 27454 }, { - "epoch": 0.7790862656072645, + "epoch": 0.7780044773158775, "grad_norm": 0.0, - "learning_rate": 2.4523615510732734e-06, - "loss": 0.7959, + "learning_rate": 2.4753270018122142e-06, + "loss": 0.8347, "step": 27455 }, { - "epoch": 0.7791146424517593, + "epoch": 0.7780328147581399, "grad_norm": 0.0, - "learning_rate": 2.4517586708642427e-06, - "loss": 0.7569, + "learning_rate": 2.4747225503193717e-06, + "loss": 0.8758, "step": 27456 }, { - "epoch": 0.7791430192962543, + "epoch": 0.7780611522004024, "grad_norm": 0.0, - "learning_rate": 2.4511558544155434e-06, - "loss": 0.9086, + "learning_rate": 2.474118162214735e-06, + "loss": 0.7337, "step": 27457 }, { - "epoch": 0.7791713961407492, + "epoch": 0.7780894896426649, "grad_norm": 0.0, - "learning_rate": 2.4505531017322705e-06, - "loss": 0.8408, + "learning_rate": 2.4735138375033995e-06, + "loss": 0.7796, "step": 27458 }, { - "epoch": 0.779199772985244, + "epoch": 0.7781178270849273, "grad_norm": 0.0, - "learning_rate": 2.449950412819516e-06, - "loss": 0.8616, + "learning_rate": 2.4729095761904487e-06, + "loss": 0.8302, "step": 27459 }, { - "epoch": 0.779228149829739, + "epoch": 0.7781461645271898, "grad_norm": 0.0, - "learning_rate": 2.449347787682368e-06, - "loss": 0.8647, + "learning_rate": 2.472305378280976e-06, + "loss": 0.8055, "step": 27460 }, { - "epoch": 0.7792565266742338, + "epoch": 0.7781745019694523, "grad_norm": 0.0, - "learning_rate": 2.448745226325919e-06, - "loss": 0.8783, + "learning_rate": 2.4717012437800724e-06, + "loss": 0.8061, "step": 27461 }, { - "epoch": 0.7792849035187287, + "epoch": 0.7782028394117148, "grad_norm": 0.0, - "learning_rate": 2.4481427287552606e-06, - "loss": 0.7701, + "learning_rate": 2.4710971726928224e-06, + "loss": 0.7748, "step": 27462 }, { - "epoch": 0.7793132803632236, + "epoch": 0.7782311768539771, "grad_norm": 0.0, - "learning_rate": 2.447540294975478e-06, - "loss": 0.7599, + "learning_rate": 2.470493165024319e-06, + "loss": 0.7437, "step": 27463 }, { - "epoch": 0.7793416572077185, + "epoch": 0.7782595142962396, "grad_norm": 0.0, - "learning_rate": 2.4469379249916614e-06, - "loss": 0.8557, + "learning_rate": 2.4698892207796453e-06, + "loss": 0.7357, "step": 27464 }, { - "epoch": 0.7793700340522134, + "epoch": 0.7782878517385021, "grad_norm": 0.0, - "learning_rate": 2.4463356188089025e-06, - "loss": 0.9209, + "learning_rate": 2.469285339963892e-06, + "loss": 0.9117, "step": 27465 }, { - "epoch": 0.7793984108967082, + "epoch": 0.7783161891807645, "grad_norm": 0.0, - "learning_rate": 2.445733376432284e-06, - "loss": 0.8372, + "learning_rate": 2.4686815225821457e-06, + "loss": 0.834, "step": 27466 }, { - "epoch": 0.7794267877412032, + "epoch": 0.778344526623027, "grad_norm": 0.0, - "learning_rate": 2.4451311978668947e-06, - "loss": 0.6985, + "learning_rate": 2.4680777686394895e-06, + "loss": 0.8761, "step": 27467 }, { - "epoch": 0.7794551645856981, + "epoch": 0.7783728640652895, "grad_norm": 0.0, - "learning_rate": 2.4445290831178246e-06, - "loss": 0.8313, + "learning_rate": 2.4674740781410122e-06, + "loss": 0.892, "step": 27468 }, { - "epoch": 0.7794835414301929, + "epoch": 0.7784012015075519, "grad_norm": 0.0, - "learning_rate": 2.443927032190154e-06, - "loss": 0.8324, + "learning_rate": 2.466870451091796e-06, + "loss": 0.808, "step": 27469 }, { - "epoch": 0.7795119182746879, + "epoch": 0.7784295389498144, "grad_norm": 0.0, - "learning_rate": 2.4433250450889724e-06, - "loss": 0.8911, + "learning_rate": 2.4662668874969287e-06, + "loss": 0.9325, "step": 27470 }, { - "epoch": 0.7795402951191828, + "epoch": 0.7784578763920769, "grad_norm": 0.0, - "learning_rate": 2.4427231218193628e-06, - "loss": 0.743, + "learning_rate": 2.4656633873614953e-06, + "loss": 0.8305, "step": 27471 }, { - "epoch": 0.7795686719636776, + "epoch": 0.7784862138343394, "grad_norm": 0.0, - "learning_rate": 2.442121262386411e-06, - "loss": 0.7963, + "learning_rate": 2.4650599506905746e-06, + "loss": 0.8156, "step": 27472 }, { - "epoch": 0.7795970488081725, + "epoch": 0.7785145512766017, "grad_norm": 0.0, - "learning_rate": 2.4415194667952038e-06, - "loss": 0.7627, + "learning_rate": 2.464456577489253e-06, + "loss": 0.8323, "step": 27473 }, { - "epoch": 0.7796254256526675, + "epoch": 0.7785428887188642, "grad_norm": 0.0, - "learning_rate": 2.4409177350508184e-06, - "loss": 0.7916, + "learning_rate": 2.4638532677626124e-06, + "loss": 0.8374, "step": 27474 }, { - "epoch": 0.7796538024971623, + "epoch": 0.7785712261611267, "grad_norm": 0.0, - "learning_rate": 2.4403160671583402e-06, - "loss": 0.9249, + "learning_rate": 2.463250021515733e-06, + "loss": 0.8293, "step": 27475 }, { - "epoch": 0.7796821793416572, + "epoch": 0.7785995636033891, "grad_norm": 0.0, - "learning_rate": 2.439714463122855e-06, - "loss": 0.8108, + "learning_rate": 2.462646838753696e-06, + "loss": 0.797, "step": 27476 }, { - "epoch": 0.7797105561861521, + "epoch": 0.7786279010456516, "grad_norm": 0.0, - "learning_rate": 2.4391129229494402e-06, - "loss": 0.9347, + "learning_rate": 2.462043719481587e-06, + "loss": 0.9395, "step": 27477 }, { - "epoch": 0.779738933030647, + "epoch": 0.7786562384879141, "grad_norm": 0.0, - "learning_rate": 2.438511446643177e-06, - "loss": 0.7884, + "learning_rate": 2.4614406637044808e-06, + "loss": 0.8499, "step": 27478 }, { - "epoch": 0.7797673098751419, + "epoch": 0.7786845759301766, "grad_norm": 0.0, - "learning_rate": 2.4379100342091512e-06, - "loss": 0.7951, + "learning_rate": 2.4608376714274617e-06, + "loss": 0.8799, "step": 27479 }, { - "epoch": 0.7797956867196367, + "epoch": 0.778712913372439, "grad_norm": 0.0, - "learning_rate": 2.437308685652435e-06, - "loss": 0.7465, + "learning_rate": 2.4602347426556037e-06, + "loss": 0.7471, "step": 27480 }, { - "epoch": 0.7798240635641317, + "epoch": 0.7787412508147015, "grad_norm": 0.0, - "learning_rate": 2.4367074009781135e-06, - "loss": 0.8725, + "learning_rate": 2.4596318773939885e-06, + "loss": 0.7945, "step": 27481 }, { - "epoch": 0.7798524404086266, + "epoch": 0.778769588256964, "grad_norm": 0.0, - "learning_rate": 2.4361061801912666e-06, - "loss": 0.8197, + "learning_rate": 2.4590290756476933e-06, + "loss": 0.8744, "step": 27482 }, { - "epoch": 0.7798808172531214, + "epoch": 0.7787979256992263, "grad_norm": 0.0, - "learning_rate": 2.435505023296969e-06, - "loss": 0.7955, + "learning_rate": 2.4584263374217963e-06, + "loss": 0.8316, "step": 27483 }, { - "epoch": 0.7799091940976164, + "epoch": 0.7788262631414888, "grad_norm": 0.0, - "learning_rate": 2.4349039303002997e-06, - "loss": 0.6739, + "learning_rate": 2.457823662721379e-06, + "loss": 0.9414, "step": 27484 }, { - "epoch": 0.7799375709421112, + "epoch": 0.7788546005837513, "grad_norm": 0.0, - "learning_rate": 2.434302901206338e-06, - "loss": 0.8372, + "learning_rate": 2.4572210515515093e-06, + "loss": 0.8941, "step": 27485 }, { - "epoch": 0.7799659477866061, + "epoch": 0.7788829380260138, "grad_norm": 0.0, - "learning_rate": 2.4337019360201585e-06, - "loss": 0.8117, + "learning_rate": 2.4566185039172687e-06, + "loss": 0.8203, "step": 27486 }, { - "epoch": 0.7799943246311011, + "epoch": 0.7789112754682762, "grad_norm": 0.0, - "learning_rate": 2.4331010347468397e-06, - "loss": 0.7798, + "learning_rate": 2.4560160198237337e-06, + "loss": 0.8393, "step": 27487 }, { - "epoch": 0.7800227014755959, + "epoch": 0.7789396129105387, "grad_norm": 0.0, - "learning_rate": 2.4325001973914586e-06, - "loss": 0.8712, + "learning_rate": 2.4554135992759753e-06, + "loss": 0.7803, "step": 27488 }, { - "epoch": 0.7800510783200908, + "epoch": 0.7789679503528012, "grad_norm": 0.0, - "learning_rate": 2.4318994239590866e-06, - "loss": 0.8396, + "learning_rate": 2.4548112422790695e-06, + "loss": 0.8033, "step": 27489 }, { - "epoch": 0.7800794551645857, + "epoch": 0.7789962877950636, "grad_norm": 0.0, - "learning_rate": 2.4312987144548005e-06, - "loss": 0.7719, + "learning_rate": 2.4542089488380925e-06, + "loss": 0.8664, "step": 27490 }, { - "epoch": 0.7801078320090806, + "epoch": 0.7790246252373261, "grad_norm": 0.0, - "learning_rate": 2.430698068883677e-06, - "loss": 0.7347, + "learning_rate": 2.453606718958114e-06, + "loss": 0.7804, "step": 27491 }, { - "epoch": 0.7801362088535755, + "epoch": 0.7790529626795886, "grad_norm": 0.0, - "learning_rate": 2.4300974872507833e-06, - "loss": 0.8513, + "learning_rate": 2.4530045526442105e-06, + "loss": 0.89, "step": 27492 }, { - "epoch": 0.7801645856980703, + "epoch": 0.779081300121851, "grad_norm": 0.0, - "learning_rate": 2.4294969695612e-06, - "loss": 0.8054, + "learning_rate": 2.4524024499014488e-06, + "loss": 0.8454, "step": 27493 }, { - "epoch": 0.7801929625425653, + "epoch": 0.7791096375641134, "grad_norm": 0.0, - "learning_rate": 2.428896515819994e-06, - "loss": 0.8, + "learning_rate": 2.451800410734905e-06, + "loss": 0.8537, "step": 27494 }, { - "epoch": 0.7802213393870602, + "epoch": 0.7791379750063759, "grad_norm": 0.0, - "learning_rate": 2.428296126032239e-06, - "loss": 0.7174, + "learning_rate": 2.4511984351496477e-06, + "loss": 0.8444, "step": 27495 }, { - "epoch": 0.780249716231555, + "epoch": 0.7791663124486384, "grad_norm": 0.0, - "learning_rate": 2.4276958002030093e-06, - "loss": 0.7118, + "learning_rate": 2.45059652315075e-06, + "loss": 0.7712, "step": 27496 }, { - "epoch": 0.7802780930760499, + "epoch": 0.7791946498909008, "grad_norm": 0.0, - "learning_rate": 2.4270955383373717e-06, - "loss": 0.8947, + "learning_rate": 2.4499946747432814e-06, + "loss": 0.7674, "step": 27497 }, { - "epoch": 0.7803064699205449, + "epoch": 0.7792229873331633, "grad_norm": 0.0, - "learning_rate": 2.426495340440398e-06, - "loss": 0.8293, + "learning_rate": 2.449392889932315e-06, + "loss": 0.8188, "step": 27498 }, { - "epoch": 0.7803348467650397, + "epoch": 0.7792513247754258, "grad_norm": 0.0, - "learning_rate": 2.4258952065171603e-06, - "loss": 0.8757, + "learning_rate": 2.4487911687229116e-06, + "loss": 0.8887, "step": 27499 }, { - "epoch": 0.7803632236095346, + "epoch": 0.7792796622176882, "grad_norm": 0.0, - "learning_rate": 2.4252951365727218e-06, - "loss": 0.889, + "learning_rate": 2.448189511120148e-06, + "loss": 0.765, "step": 27500 }, { - "epoch": 0.7803916004540296, + "epoch": 0.7793079996599507, "grad_norm": 0.0, - "learning_rate": 2.424695130612159e-06, - "loss": 0.784, + "learning_rate": 2.4475879171290863e-06, + "loss": 0.8183, "step": 27501 }, { - "epoch": 0.7804199772985244, + "epoch": 0.7793363371022132, "grad_norm": 0.0, - "learning_rate": 2.4240951886405396e-06, - "loss": 0.8265, + "learning_rate": 2.446986386754795e-06, + "loss": 0.8897, "step": 27502 }, { - "epoch": 0.7804483541430193, + "epoch": 0.7793646745444757, "grad_norm": 0.0, - "learning_rate": 2.4234953106629256e-06, - "loss": 0.7767, + "learning_rate": 2.446384920002345e-06, + "loss": 0.9255, "step": 27503 }, { - "epoch": 0.7804767309875141, + "epoch": 0.779393011986738, "grad_norm": 0.0, - "learning_rate": 2.4228954966843864e-06, - "loss": 0.8055, + "learning_rate": 2.4457835168767975e-06, + "loss": 0.6553, "step": 27504 }, { - "epoch": 0.7805051078320091, + "epoch": 0.7794213494290005, "grad_norm": 0.0, - "learning_rate": 2.4222957467099937e-06, - "loss": 0.7767, + "learning_rate": 2.4451821773832197e-06, + "loss": 0.9094, "step": 27505 }, { - "epoch": 0.780533484676504, + "epoch": 0.779449686871263, "grad_norm": 0.0, - "learning_rate": 2.421696060744806e-06, - "loss": 0.9547, + "learning_rate": 2.444580901526682e-06, + "loss": 0.8049, "step": 27506 }, { - "epoch": 0.7805618615209988, + "epoch": 0.7794780243135254, "grad_norm": 0.0, - "learning_rate": 2.421096438793893e-06, - "loss": 0.7995, + "learning_rate": 2.4439796893122415e-06, + "loss": 0.717, "step": 27507 }, { - "epoch": 0.7805902383654938, + "epoch": 0.7795063617557879, "grad_norm": 0.0, - "learning_rate": 2.4204968808623207e-06, - "loss": 0.7224, + "learning_rate": 2.443378540744965e-06, + "loss": 0.7226, "step": 27508 }, { - "epoch": 0.7806186152099887, + "epoch": 0.7795346991980504, "grad_norm": 0.0, - "learning_rate": 2.41989738695515e-06, - "loss": 0.8677, + "learning_rate": 2.442777455829919e-06, + "loss": 0.8981, "step": 27509 }, { - "epoch": 0.7806469920544835, + "epoch": 0.7795630366403129, "grad_norm": 0.0, - "learning_rate": 2.4192979570774467e-06, - "loss": 0.74, + "learning_rate": 2.442176434572163e-06, + "loss": 0.8265, "step": 27510 }, { - "epoch": 0.7806753688989785, + "epoch": 0.7795913740825753, "grad_norm": 0.0, - "learning_rate": 2.4186985912342773e-06, - "loss": 0.791, + "learning_rate": 2.4415754769767654e-06, + "loss": 0.8063, "step": 27511 }, { - "epoch": 0.7807037457434733, + "epoch": 0.7796197115248378, "grad_norm": 0.0, - "learning_rate": 2.4180992894306985e-06, - "loss": 0.7564, + "learning_rate": 2.440974583048781e-06, + "loss": 0.7294, "step": 27512 }, { - "epoch": 0.7807321225879682, + "epoch": 0.7796480489671003, "grad_norm": 0.0, - "learning_rate": 2.4175000516717763e-06, - "loss": 0.892, + "learning_rate": 2.440373752793278e-06, + "loss": 0.692, "step": 27513 }, { - "epoch": 0.7807604994324631, + "epoch": 0.7796763864093627, "grad_norm": 0.0, - "learning_rate": 2.416900877962575e-06, - "loss": 0.8417, + "learning_rate": 2.4397729862153107e-06, + "loss": 0.7637, "step": 27514 }, { - "epoch": 0.780788876276958, + "epoch": 0.7797047238516251, "grad_norm": 0.0, - "learning_rate": 2.4163017683081503e-06, - "loss": 0.7145, + "learning_rate": 2.4391722833199436e-06, + "loss": 0.796, "step": 27515 }, { - "epoch": 0.7808172531214529, + "epoch": 0.7797330612938876, "grad_norm": 0.0, - "learning_rate": 2.415702722713564e-06, - "loss": 0.7684, + "learning_rate": 2.4385716441122385e-06, + "loss": 0.8748, "step": 27516 }, { - "epoch": 0.7808456299659478, + "epoch": 0.77976139873615, "grad_norm": 0.0, - "learning_rate": 2.4151037411838784e-06, - "loss": 0.7252, + "learning_rate": 2.437971068597249e-06, + "loss": 0.7331, "step": 27517 }, { - "epoch": 0.7808740068104427, + "epoch": 0.7797897361784125, "grad_norm": 0.0, - "learning_rate": 2.414504823724153e-06, - "loss": 0.8244, + "learning_rate": 2.437370556780038e-06, + "loss": 0.7761, "step": 27518 }, { - "epoch": 0.7809023836549376, + "epoch": 0.779818073620675, "grad_norm": 0.0, - "learning_rate": 2.4139059703394486e-06, - "loss": 0.8143, + "learning_rate": 2.4367701086656625e-06, + "loss": 0.8519, "step": 27519 }, { - "epoch": 0.7809307604994324, + "epoch": 0.7798464110629375, "grad_norm": 0.0, - "learning_rate": 2.4133071810348198e-06, - "loss": 0.7704, + "learning_rate": 2.4361697242591844e-06, + "loss": 0.8687, "step": 27520 }, { - "epoch": 0.7809591373439273, + "epoch": 0.7798747485051999, "grad_norm": 0.0, - "learning_rate": 2.4127084558153248e-06, - "loss": 0.8251, + "learning_rate": 2.435569403565654e-06, + "loss": 0.8615, "step": 27521 }, { - "epoch": 0.7809875141884223, + "epoch": 0.7799030859474624, "grad_norm": 0.0, - "learning_rate": 2.4121097946860263e-06, - "loss": 0.796, + "learning_rate": 2.4349691465901313e-06, + "loss": 0.8986, "step": 27522 }, { - "epoch": 0.7810158910329171, + "epoch": 0.7799314233897249, "grad_norm": 0.0, - "learning_rate": 2.4115111976519735e-06, - "loss": 0.7093, + "learning_rate": 2.4343689533376734e-06, + "loss": 0.69, "step": 27523 }, { - "epoch": 0.781044267877412, + "epoch": 0.7799597608319873, "grad_norm": 0.0, - "learning_rate": 2.410912664718228e-06, - "loss": 0.8153, + "learning_rate": 2.433768823813337e-06, + "loss": 0.8712, "step": 27524 }, { - "epoch": 0.781072644721907, + "epoch": 0.7799880982742498, "grad_norm": 0.0, - "learning_rate": 2.410314195889847e-06, - "loss": 0.7803, + "learning_rate": 2.4331687580221743e-06, + "loss": 0.7674, "step": 27525 }, { - "epoch": 0.7811010215664018, + "epoch": 0.7800164357165122, "grad_norm": 0.0, - "learning_rate": 2.4097157911718793e-06, - "loss": 0.8488, + "learning_rate": 2.43256875596924e-06, + "loss": 0.7971, "step": 27526 }, { - "epoch": 0.7811293984108967, + "epoch": 0.7800447731587747, "grad_norm": 0.0, - "learning_rate": 2.409117450569385e-06, - "loss": 0.8626, + "learning_rate": 2.4319688176595922e-06, + "loss": 0.6126, "step": 27527 }, { - "epoch": 0.7811577752553917, + "epoch": 0.7800731106010371, "grad_norm": 0.0, - "learning_rate": 2.4085191740874182e-06, - "loss": 0.811, + "learning_rate": 2.4313689430982777e-06, + "loss": 0.7747, "step": 27528 }, { - "epoch": 0.7811861520998865, + "epoch": 0.7801014480432996, "grad_norm": 0.0, - "learning_rate": 2.4079209617310296e-06, - "loss": 0.7262, + "learning_rate": 2.430769132290357e-06, + "loss": 0.8734, "step": 27529 }, { - "epoch": 0.7812145289443814, + "epoch": 0.7801297854855621, "grad_norm": 0.0, - "learning_rate": 2.407322813505273e-06, - "loss": 0.8274, + "learning_rate": 2.4301693852408757e-06, + "loss": 0.8374, "step": 27530 }, { - "epoch": 0.7812429057888762, + "epoch": 0.7801581229278245, "grad_norm": 0.0, - "learning_rate": 2.406724729415203e-06, - "loss": 0.8796, + "learning_rate": 2.429569701954889e-06, + "loss": 0.7324, "step": 27531 }, { - "epoch": 0.7812712826333712, + "epoch": 0.780186460370087, "grad_norm": 0.0, - "learning_rate": 2.4061267094658693e-06, - "loss": 0.8259, + "learning_rate": 2.4289700824374473e-06, + "loss": 0.7983, "step": 27532 }, { - "epoch": 0.7812996594778661, + "epoch": 0.7802147978123495, "grad_norm": 0.0, - "learning_rate": 2.405528753662328e-06, - "loss": 0.769, + "learning_rate": 2.4283705266936018e-06, + "loss": 0.8305, "step": 27533 }, { - "epoch": 0.7813280363223609, + "epoch": 0.7802431352546119, "grad_norm": 0.0, - "learning_rate": 2.4049308620096244e-06, - "loss": 0.9118, + "learning_rate": 2.4277710347284035e-06, + "loss": 0.7948, "step": 27534 }, { - "epoch": 0.7813564131668559, + "epoch": 0.7802714726968744, "grad_norm": 0.0, - "learning_rate": 2.40433303451281e-06, - "loss": 0.8746, + "learning_rate": 2.427171606546904e-06, + "loss": 0.8138, "step": 27535 }, { - "epoch": 0.7813847900113507, + "epoch": 0.7802998101391369, "grad_norm": 0.0, - "learning_rate": 2.40373527117694e-06, - "loss": 0.7445, + "learning_rate": 2.4265722421541482e-06, + "loss": 0.7781, "step": 27536 }, { - "epoch": 0.7814131668558456, + "epoch": 0.7803281475813993, "grad_norm": 0.0, - "learning_rate": 2.403137572007056e-06, - "loss": 0.8535, + "learning_rate": 2.425972941555189e-06, + "loss": 0.7348, "step": 27537 }, { - "epoch": 0.7814415437003405, + "epoch": 0.7803564850236617, "grad_norm": 0.0, - "learning_rate": 2.4025399370082114e-06, - "loss": 0.8383, + "learning_rate": 2.4253737047550697e-06, + "loss": 0.8481, "step": 27538 }, { - "epoch": 0.7814699205448354, + "epoch": 0.7803848224659242, "grad_norm": 0.0, - "learning_rate": 2.4019423661854557e-06, - "loss": 0.8137, + "learning_rate": 2.42477453175884e-06, + "loss": 0.7261, "step": 27539 }, { - "epoch": 0.7814982973893303, + "epoch": 0.7804131599081867, "grad_norm": 0.0, - "learning_rate": 2.4013448595438316e-06, - "loss": 0.7654, + "learning_rate": 2.4241754225715507e-06, + "loss": 0.807, "step": 27540 }, { - "epoch": 0.7815266742338252, + "epoch": 0.7804414973504491, "grad_norm": 0.0, - "learning_rate": 2.4007474170883893e-06, - "loss": 0.7693, + "learning_rate": 2.4235763771982414e-06, + "loss": 0.8019, "step": 27541 }, { - "epoch": 0.7815550510783201, + "epoch": 0.7804698347927116, "grad_norm": 0.0, - "learning_rate": 2.400150038824177e-06, - "loss": 0.7634, + "learning_rate": 2.422977395643965e-06, + "loss": 0.8888, "step": 27542 }, { - "epoch": 0.781583427922815, + "epoch": 0.7804981722349741, "grad_norm": 0.0, - "learning_rate": 2.399552724756237e-06, - "loss": 0.8715, + "learning_rate": 2.4223784779137614e-06, + "loss": 0.7316, "step": 27543 }, { - "epoch": 0.7816118047673098, + "epoch": 0.7805265096772366, "grad_norm": 0.0, - "learning_rate": 2.398955474889617e-06, - "loss": 0.7338, + "learning_rate": 2.4217796240126767e-06, + "loss": 0.7155, "step": 27544 }, { - "epoch": 0.7816401816118048, + "epoch": 0.780554847119499, "grad_norm": 0.0, - "learning_rate": 2.3983582892293642e-06, - "loss": 0.7808, + "learning_rate": 2.4211808339457576e-06, + "loss": 0.8316, "step": 27545 }, { - "epoch": 0.7816685584562997, + "epoch": 0.7805831845617615, "grad_norm": 0.0, - "learning_rate": 2.3977611677805146e-06, - "loss": 0.7773, + "learning_rate": 2.4205821077180457e-06, + "loss": 0.984, "step": 27546 }, { - "epoch": 0.7816969353007945, + "epoch": 0.780611522004024, "grad_norm": 0.0, - "learning_rate": 2.397164110548125e-06, - "loss": 0.7991, + "learning_rate": 2.419983445334586e-06, + "loss": 0.9177, "step": 27547 }, { - "epoch": 0.7817253121452894, + "epoch": 0.7806398594462863, "grad_norm": 0.0, - "learning_rate": 2.3965671175372274e-06, - "loss": 0.8501, + "learning_rate": 2.4193848468004224e-06, + "loss": 0.8052, "step": 27548 }, { - "epoch": 0.7817536889897844, + "epoch": 0.7806681968885488, "grad_norm": 0.0, - "learning_rate": 2.3959701887528707e-06, - "loss": 0.887, + "learning_rate": 2.4187863121205933e-06, + "loss": 0.741, "step": 27549 }, { - "epoch": 0.7817820658342792, + "epoch": 0.7806965343308113, "grad_norm": 0.0, - "learning_rate": 2.3953733242000977e-06, - "loss": 0.8336, + "learning_rate": 2.4181878413001447e-06, + "loss": 0.768, "step": 27550 }, { - "epoch": 0.7818104426787741, + "epoch": 0.7807248717730738, "grad_norm": 0.0, - "learning_rate": 2.394776523883946e-06, - "loss": 0.8802, + "learning_rate": 2.417589434344112e-06, + "loss": 0.9, "step": 27551 }, { - "epoch": 0.7818388195232691, + "epoch": 0.7807532092153362, "grad_norm": 0.0, - "learning_rate": 2.3941797878094576e-06, - "loss": 0.8407, + "learning_rate": 2.4169910912575402e-06, + "loss": 0.8936, "step": 27552 }, { - "epoch": 0.7818671963677639, + "epoch": 0.7807815466575987, "grad_norm": 0.0, - "learning_rate": 2.393583115981678e-06, - "loss": 0.7924, + "learning_rate": 2.4163928120454705e-06, + "loss": 0.8428, "step": 27553 }, { - "epoch": 0.7818955732122588, + "epoch": 0.7808098840998612, "grad_norm": 0.0, - "learning_rate": 2.3929865084056415e-06, - "loss": 0.7857, + "learning_rate": 2.415794596712937e-06, + "loss": 0.9125, "step": 27554 }, { - "epoch": 0.7819239500567536, + "epoch": 0.7808382215421236, "grad_norm": 0.0, - "learning_rate": 2.392389965086389e-06, - "loss": 0.8277, + "learning_rate": 2.4151964452649823e-06, + "loss": 0.7683, "step": 27555 }, { - "epoch": 0.7819523269012486, + "epoch": 0.7808665589843861, "grad_norm": 0.0, - "learning_rate": 2.391793486028964e-06, - "loss": 0.9047, + "learning_rate": 2.414598357706648e-06, + "loss": 0.8478, "step": 27556 }, { - "epoch": 0.7819807037457435, + "epoch": 0.7808948964266486, "grad_norm": 0.0, - "learning_rate": 2.391197071238398e-06, - "loss": 0.7626, + "learning_rate": 2.414000334042965e-06, + "loss": 0.758, "step": 27557 }, { - "epoch": 0.7820090805902383, + "epoch": 0.7809232338689109, "grad_norm": 0.0, - "learning_rate": 2.3906007207197325e-06, - "loss": 0.8416, + "learning_rate": 2.4134023742789745e-06, + "loss": 0.8331, "step": 27558 }, { - "epoch": 0.7820374574347333, + "epoch": 0.7809515713111734, "grad_norm": 0.0, - "learning_rate": 2.3900044344780084e-06, - "loss": 0.844, + "learning_rate": 2.4128044784197124e-06, + "loss": 0.836, "step": 27559 }, { - "epoch": 0.7820658342792282, + "epoch": 0.7809799087534359, "grad_norm": 0.0, - "learning_rate": 2.389408212518255e-06, - "loss": 0.7634, + "learning_rate": 2.4122066464702165e-06, + "loss": 0.7946, "step": 27560 }, { - "epoch": 0.782094211123723, + "epoch": 0.7810082461956984, "grad_norm": 0.0, - "learning_rate": 2.3888120548455127e-06, - "loss": 0.8728, + "learning_rate": 2.411608878435524e-06, + "loss": 0.8046, "step": 27561 }, { - "epoch": 0.782122587968218, + "epoch": 0.7810365836379608, "grad_norm": 0.0, - "learning_rate": 2.388215961464817e-06, - "loss": 0.7502, + "learning_rate": 2.4110111743206655e-06, + "loss": 0.8582, "step": 27562 }, { - "epoch": 0.7821509648127128, + "epoch": 0.7810649210802233, "grad_norm": 0.0, - "learning_rate": 2.3876199323812043e-06, - "loss": 0.756, + "learning_rate": 2.4104135341306777e-06, + "loss": 0.8674, "step": 27563 }, { - "epoch": 0.7821793416572077, + "epoch": 0.7810932585224858, "grad_norm": 0.0, - "learning_rate": 2.38702396759971e-06, - "loss": 0.8145, + "learning_rate": 2.409815957870597e-06, + "loss": 0.7782, "step": 27564 }, { - "epoch": 0.7822077185017026, + "epoch": 0.7811215959647482, "grad_norm": 0.0, - "learning_rate": 2.3864280671253647e-06, - "loss": 0.9206, + "learning_rate": 2.4092184455454537e-06, + "loss": 0.8782, "step": 27565 }, { - "epoch": 0.7822360953461975, + "epoch": 0.7811499334070107, "grad_norm": 0.0, - "learning_rate": 2.385832230963203e-06, - "loss": 0.8768, + "learning_rate": 2.4086209971602836e-06, + "loss": 0.8141, "step": 27566 }, { - "epoch": 0.7822644721906924, + "epoch": 0.7811782708492732, "grad_norm": 0.0, - "learning_rate": 2.385236459118262e-06, - "loss": 0.6989, + "learning_rate": 2.4080236127201162e-06, + "loss": 0.8442, "step": 27567 }, { - "epoch": 0.7822928490351873, + "epoch": 0.7812066082915357, "grad_norm": 0.0, - "learning_rate": 2.384640751595567e-06, - "loss": 0.7576, + "learning_rate": 2.407426292229984e-06, + "loss": 0.7345, "step": 27568 }, { - "epoch": 0.7823212258796822, + "epoch": 0.781234945733798, "grad_norm": 0.0, - "learning_rate": 2.384045108400155e-06, - "loss": 0.7151, + "learning_rate": 2.406829035694923e-06, + "loss": 0.8305, "step": 27569 }, { - "epoch": 0.7823496027241771, + "epoch": 0.7812632831760605, "grad_norm": 0.0, - "learning_rate": 2.3834495295370586e-06, - "loss": 0.7782, + "learning_rate": 2.4062318431199584e-06, + "loss": 0.7723, "step": 27570 }, { - "epoch": 0.7823779795686719, + "epoch": 0.781291620618323, "grad_norm": 0.0, - "learning_rate": 2.3828540150113043e-06, - "loss": 0.7952, + "learning_rate": 2.405634714510122e-06, + "loss": 0.8622, "step": 27571 }, { - "epoch": 0.7824063564131668, + "epoch": 0.7813199580605854, "grad_norm": 0.0, - "learning_rate": 2.382258564827924e-06, - "loss": 0.8745, + "learning_rate": 2.405037649870444e-06, + "loss": 0.8659, "step": 27572 }, { - "epoch": 0.7824347332576618, + "epoch": 0.7813482955028479, "grad_norm": 0.0, - "learning_rate": 2.381663178991951e-06, - "loss": 0.7855, + "learning_rate": 2.404440649205956e-06, + "loss": 0.8, "step": 27573 }, { - "epoch": 0.7824631101021566, + "epoch": 0.7813766329451104, "grad_norm": 0.0, - "learning_rate": 2.381067857508409e-06, - "loss": 0.7903, + "learning_rate": 2.4038437125216862e-06, + "loss": 0.8236, "step": 27574 }, { - "epoch": 0.7824914869466515, + "epoch": 0.7814049703873729, "grad_norm": 0.0, - "learning_rate": 2.3804726003823287e-06, - "loss": 0.8437, + "learning_rate": 2.4032468398226595e-06, + "loss": 0.7759, "step": 27575 }, { - "epoch": 0.7825198637911465, + "epoch": 0.7814333078296353, "grad_norm": 0.0, - "learning_rate": 2.3798774076187412e-06, - "loss": 0.7059, + "learning_rate": 2.4026500311139056e-06, + "loss": 0.8407, "step": 27576 }, { - "epoch": 0.7825482406356413, + "epoch": 0.7814616452718978, "grad_norm": 0.0, - "learning_rate": 2.3792822792226676e-06, - "loss": 0.8068, + "learning_rate": 2.4020532864004543e-06, + "loss": 0.7338, "step": 27577 }, { - "epoch": 0.7825766174801362, + "epoch": 0.7814899827141603, "grad_norm": 0.0, - "learning_rate": 2.3786872151991435e-06, - "loss": 0.7927, + "learning_rate": 2.401456605687327e-06, + "loss": 0.7724, "step": 27578 }, { - "epoch": 0.7826049943246312, + "epoch": 0.7815183201564226, "grad_norm": 0.0, - "learning_rate": 2.3780922155531896e-06, - "loss": 0.8153, + "learning_rate": 2.400859988979555e-06, + "loss": 0.7957, "step": 27579 }, { - "epoch": 0.782633371169126, + "epoch": 0.7815466575986851, "grad_norm": 0.0, - "learning_rate": 2.377497280289832e-06, - "loss": 0.785, + "learning_rate": 2.4002634362821573e-06, + "loss": 0.7863, "step": 27580 }, { - "epoch": 0.7826617480136209, + "epoch": 0.7815749950409476, "grad_norm": 0.0, - "learning_rate": 2.3769024094141015e-06, - "loss": 0.8503, + "learning_rate": 2.3996669476001645e-06, + "loss": 0.777, "step": 27581 }, { - "epoch": 0.7826901248581157, + "epoch": 0.78160333248321, "grad_norm": 0.0, - "learning_rate": 2.376307602931016e-06, - "loss": 0.8555, + "learning_rate": 2.3990705229386015e-06, + "loss": 0.9422, "step": 27582 }, { - "epoch": 0.7827185017026107, + "epoch": 0.7816316699254725, "grad_norm": 0.0, - "learning_rate": 2.3757128608456028e-06, - "loss": 0.7725, + "learning_rate": 2.398474162302488e-06, + "loss": 0.753, "step": 27583 }, { - "epoch": 0.7827468785471056, + "epoch": 0.781660007367735, "grad_norm": 0.0, - "learning_rate": 2.375118183162889e-06, - "loss": 0.7789, + "learning_rate": 2.3978778656968472e-06, + "loss": 0.8007, "step": 27584 }, { - "epoch": 0.7827752553916004, + "epoch": 0.7816883448099975, "grad_norm": 0.0, - "learning_rate": 2.374523569887891e-06, - "loss": 0.7931, + "learning_rate": 2.3972816331267056e-06, + "loss": 0.8011, "step": 27585 }, { - "epoch": 0.7828036322360954, + "epoch": 0.7817166822522599, "grad_norm": 0.0, - "learning_rate": 2.373929021025635e-06, - "loss": 0.7587, + "learning_rate": 2.3966854645970838e-06, + "loss": 0.7269, "step": 27586 }, { - "epoch": 0.7828320090805903, + "epoch": 0.7817450196945224, "grad_norm": 0.0, - "learning_rate": 2.3733345365811465e-06, - "loss": 0.7836, + "learning_rate": 2.3960893601130075e-06, + "loss": 0.8297, "step": 27587 }, { - "epoch": 0.7828603859250851, + "epoch": 0.7817733571367849, "grad_norm": 0.0, - "learning_rate": 2.372740116559442e-06, - "loss": 0.8496, + "learning_rate": 2.3954933196794905e-06, + "loss": 0.8787, "step": 27588 }, { - "epoch": 0.78288876276958, + "epoch": 0.7818016945790472, "grad_norm": 0.0, - "learning_rate": 2.372145760965544e-06, - "loss": 0.9076, + "learning_rate": 2.3948973433015564e-06, + "loss": 0.9092, "step": 27589 }, { - "epoch": 0.7829171396140749, + "epoch": 0.7818300320213097, "grad_norm": 0.0, - "learning_rate": 2.3715514698044762e-06, - "loss": 0.7965, + "learning_rate": 2.3943014309842294e-06, + "loss": 0.8394, "step": 27590 }, { - "epoch": 0.7829455164585698, + "epoch": 0.7818583694635722, "grad_norm": 0.0, - "learning_rate": 2.370957243081253e-06, - "loss": 0.8714, + "learning_rate": 2.393705582732524e-06, + "loss": 0.823, "step": 27591 }, { - "epoch": 0.7829738933030647, + "epoch": 0.7818867069058347, "grad_norm": 0.0, - "learning_rate": 2.370363080800898e-06, - "loss": 0.8611, + "learning_rate": 2.3931097985514627e-06, + "loss": 0.9499, "step": 27592 }, { - "epoch": 0.7830022701475596, + "epoch": 0.7819150443480971, "grad_norm": 0.0, - "learning_rate": 2.369768982968429e-06, - "loss": 0.7718, + "learning_rate": 2.39251407844606e-06, + "loss": 0.8085, "step": 27593 }, { - "epoch": 0.7830306469920545, + "epoch": 0.7819433817903596, "grad_norm": 0.0, - "learning_rate": 2.369174949588864e-06, - "loss": 0.8116, + "learning_rate": 2.3919184224213354e-06, + "loss": 0.7068, "step": 27594 }, { - "epoch": 0.7830590238365494, + "epoch": 0.7819717192326221, "grad_norm": 0.0, - "learning_rate": 2.368580980667221e-06, - "loss": 0.8237, + "learning_rate": 2.391322830482311e-06, + "loss": 0.8722, "step": 27595 }, { - "epoch": 0.7830874006810443, + "epoch": 0.7820000566748845, "grad_norm": 0.0, - "learning_rate": 2.3679870762085198e-06, - "loss": 0.8474, + "learning_rate": 2.390727302633996e-06, + "loss": 0.7644, "step": 27596 }, { - "epoch": 0.7831157775255392, + "epoch": 0.782028394117147, "grad_norm": 0.0, - "learning_rate": 2.367393236217773e-06, - "loss": 0.7947, + "learning_rate": 2.390131838881411e-06, + "loss": 0.8512, "step": 27597 }, { - "epoch": 0.783144154370034, + "epoch": 0.7820567315594095, "grad_norm": 0.0, - "learning_rate": 2.3667994607000013e-06, - "loss": 0.9276, + "learning_rate": 2.3895364392295717e-06, + "loss": 0.7827, "step": 27598 }, { - "epoch": 0.7831725312145289, + "epoch": 0.782085069001672, "grad_norm": 0.0, - "learning_rate": 2.3662057496602142e-06, - "loss": 0.8121, + "learning_rate": 2.388941103683493e-06, + "loss": 0.6978, "step": 27599 }, { - "epoch": 0.7832009080590239, + "epoch": 0.7821134064439343, "grad_norm": 0.0, - "learning_rate": 2.365612103103432e-06, - "loss": 0.7814, + "learning_rate": 2.3883458322481924e-06, + "loss": 0.7328, "step": 27600 }, { - "epoch": 0.7832292849035187, + "epoch": 0.7821417438861968, "grad_norm": 0.0, - "learning_rate": 2.3650185210346686e-06, - "loss": 0.8612, + "learning_rate": 2.3877506249286787e-06, + "loss": 0.7743, "step": 27601 }, { - "epoch": 0.7832576617480136, + "epoch": 0.7821700813284593, "grad_norm": 0.0, - "learning_rate": 2.3644250034589342e-06, - "loss": 0.7408, + "learning_rate": 2.387155481729967e-06, + "loss": 0.8029, "step": 27602 }, { - "epoch": 0.7832860385925086, + "epoch": 0.7821984187707217, "grad_norm": 0.0, - "learning_rate": 2.3638315503812457e-06, - "loss": 0.8016, + "learning_rate": 2.3865604026570755e-06, + "loss": 0.7813, "step": 27603 }, { - "epoch": 0.7833144154370034, + "epoch": 0.7822267562129842, "grad_norm": 0.0, - "learning_rate": 2.363238161806618e-06, - "loss": 0.7655, + "learning_rate": 2.38596538771501e-06, + "loss": 0.7388, "step": 27604 }, { - "epoch": 0.7833427922814983, + "epoch": 0.7822550936552467, "grad_norm": 0.0, - "learning_rate": 2.362644837740059e-06, - "loss": 0.7998, + "learning_rate": 2.385370436908786e-06, + "loss": 0.8267, "step": 27605 }, { - "epoch": 0.7833711691259931, + "epoch": 0.7822834310975091, "grad_norm": 0.0, - "learning_rate": 2.3620515781865803e-06, - "loss": 0.8592, + "learning_rate": 2.3847755502434166e-06, + "loss": 0.7637, "step": 27606 }, { - "epoch": 0.7833995459704881, + "epoch": 0.7823117685397716, "grad_norm": 0.0, - "learning_rate": 2.3614583831512006e-06, - "loss": 0.8704, + "learning_rate": 2.3841807277239083e-06, + "loss": 0.8279, "step": 27607 }, { - "epoch": 0.783427922814983, + "epoch": 0.7823401059820341, "grad_norm": 0.0, - "learning_rate": 2.3608652526389177e-06, - "loss": 0.7385, + "learning_rate": 2.3835859693552754e-06, + "loss": 0.7292, "step": 27608 }, { - "epoch": 0.7834562996594778, + "epoch": 0.7823684434242966, "grad_norm": 0.0, - "learning_rate": 2.360272186654753e-06, - "loss": 0.8209, + "learning_rate": 2.3829912751425244e-06, + "loss": 0.8709, "step": 27609 }, { - "epoch": 0.7834846765039728, + "epoch": 0.782396780866559, "grad_norm": 0.0, - "learning_rate": 2.359679185203715e-06, - "loss": 0.7538, + "learning_rate": 2.3823966450906667e-06, + "loss": 0.8214, "step": 27610 }, { - "epoch": 0.7835130533484677, + "epoch": 0.7824251183088214, "grad_norm": 0.0, - "learning_rate": 2.3590862482908085e-06, - "loss": 0.9343, + "learning_rate": 2.381802079204709e-06, + "loss": 0.8642, "step": 27611 }, { - "epoch": 0.7835414301929625, + "epoch": 0.7824534557510839, "grad_norm": 0.0, - "learning_rate": 2.3584933759210426e-06, - "loss": 0.784, + "learning_rate": 2.381207577489664e-06, + "loss": 0.8639, "step": 27612 }, { - "epoch": 0.7835698070374575, + "epoch": 0.7824817931933463, "grad_norm": 0.0, - "learning_rate": 2.3579005680994305e-06, - "loss": 0.7528, + "learning_rate": 2.380613139950535e-06, + "loss": 0.7994, "step": 27613 }, { - "epoch": 0.7835981838819523, + "epoch": 0.7825101306356088, "grad_norm": 0.0, - "learning_rate": 2.3573078248309722e-06, - "loss": 0.828, + "learning_rate": 2.3800187665923337e-06, + "loss": 0.809, "step": 27614 }, { - "epoch": 0.7836265607264472, + "epoch": 0.7825384680778713, "grad_norm": 0.0, - "learning_rate": 2.3567151461206792e-06, - "loss": 0.8519, + "learning_rate": 2.3794244574200607e-06, + "loss": 0.7676, "step": 27615 }, { - "epoch": 0.7836549375709421, + "epoch": 0.7825668055201338, "grad_norm": 0.0, - "learning_rate": 2.356122531973559e-06, - "loss": 0.8255, + "learning_rate": 2.3788302124387295e-06, + "loss": 0.7754, "step": 27616 }, { - "epoch": 0.783683314415437, + "epoch": 0.7825951429623962, "grad_norm": 0.0, - "learning_rate": 2.355529982394613e-06, - "loss": 0.804, + "learning_rate": 2.378236031653338e-06, + "loss": 0.7556, "step": 27617 }, { - "epoch": 0.7837116912599319, + "epoch": 0.7826234804046587, "grad_norm": 0.0, - "learning_rate": 2.3549374973888483e-06, - "loss": 0.7765, + "learning_rate": 2.3776419150688947e-06, + "loss": 0.8349, "step": 27618 }, { - "epoch": 0.7837400681044268, + "epoch": 0.7826518178469212, "grad_norm": 0.0, - "learning_rate": 2.354345076961274e-06, - "loss": 0.8103, + "learning_rate": 2.377047862690407e-06, + "loss": 0.7538, "step": 27619 }, { - "epoch": 0.7837684449489217, + "epoch": 0.7826801552891836, "grad_norm": 0.0, - "learning_rate": 2.3537527211168877e-06, - "loss": 0.8684, + "learning_rate": 2.376453874522873e-06, + "loss": 0.7768, "step": 27620 }, { - "epoch": 0.7837968217934166, + "epoch": 0.782708492731446, "grad_norm": 0.0, - "learning_rate": 2.353160429860695e-06, - "loss": 0.8977, + "learning_rate": 2.375859950571302e-06, + "loss": 0.8001, "step": 27621 }, { - "epoch": 0.7838251986379114, + "epoch": 0.7827368301737085, "grad_norm": 0.0, - "learning_rate": 2.3525682031977027e-06, - "loss": 0.7946, + "learning_rate": 2.37526609084069e-06, + "loss": 0.8628, "step": 27622 }, { - "epoch": 0.7838535754824063, + "epoch": 0.782765167615971, "grad_norm": 0.0, - "learning_rate": 2.3519760411329053e-06, - "loss": 0.874, + "learning_rate": 2.3746722953360437e-06, + "loss": 0.7625, "step": 27623 }, { - "epoch": 0.7838819523269013, + "epoch": 0.7827935050582334, "grad_norm": 0.0, - "learning_rate": 2.351383943671316e-06, - "loss": 0.8394, + "learning_rate": 2.374078564062364e-06, + "loss": 0.8074, "step": 27624 }, { - "epoch": 0.7839103291713961, + "epoch": 0.7828218425004959, "grad_norm": 0.0, - "learning_rate": 2.3507919108179277e-06, - "loss": 0.8408, + "learning_rate": 2.3734848970246537e-06, + "loss": 0.8839, "step": 27625 }, { - "epoch": 0.783938706015891, + "epoch": 0.7828501799427584, "grad_norm": 0.0, - "learning_rate": 2.3501999425777433e-06, - "loss": 0.7593, + "learning_rate": 2.3728912942279105e-06, + "loss": 0.8219, "step": 27626 }, { - "epoch": 0.783967082860386, + "epoch": 0.7828785173850208, "grad_norm": 0.0, - "learning_rate": 2.3496080389557675e-06, - "loss": 0.8268, + "learning_rate": 2.37229775567714e-06, + "loss": 0.8412, "step": 27627 }, { - "epoch": 0.7839954597048808, + "epoch": 0.7829068548272833, "grad_norm": 0.0, - "learning_rate": 2.3490161999569925e-06, - "loss": 0.8033, + "learning_rate": 2.371704281377335e-06, + "loss": 0.7377, "step": 27628 }, { - "epoch": 0.7840238365493757, + "epoch": 0.7829351922695458, "grad_norm": 0.0, - "learning_rate": 2.348424425586422e-06, - "loss": 0.7753, + "learning_rate": 2.3711108713334995e-06, + "loss": 0.8034, "step": 27629 }, { - "epoch": 0.7840522133938707, + "epoch": 0.7829635297118082, "grad_norm": 0.0, - "learning_rate": 2.3478327158490578e-06, - "loss": 0.9019, + "learning_rate": 2.3705175255506285e-06, + "loss": 0.8694, "step": 27630 }, { - "epoch": 0.7840805902383655, + "epoch": 0.7829918671540707, "grad_norm": 0.0, - "learning_rate": 2.3472410707498917e-06, - "loss": 0.785, + "learning_rate": 2.369924244033721e-06, + "loss": 0.8463, "step": 27631 }, { - "epoch": 0.7841089670828604, + "epoch": 0.7830202045963331, "grad_norm": 0.0, - "learning_rate": 2.346649490293924e-06, - "loss": 0.7982, + "learning_rate": 2.369331026787778e-06, + "loss": 0.6942, "step": 27632 }, { - "epoch": 0.7841373439273552, + "epoch": 0.7830485420385956, "grad_norm": 0.0, - "learning_rate": 2.3460579744861547e-06, - "loss": 0.9077, + "learning_rate": 2.368737873817789e-06, + "loss": 0.8897, "step": 27633 }, { - "epoch": 0.7841657207718502, + "epoch": 0.783076879480858, "grad_norm": 0.0, - "learning_rate": 2.3454665233315753e-06, - "loss": 0.8653, + "learning_rate": 2.3681447851287566e-06, + "loss": 0.8447, "step": 27634 }, { - "epoch": 0.7841940976163451, + "epoch": 0.7831052169231205, "grad_norm": 0.0, - "learning_rate": 2.344875136835184e-06, - "loss": 0.7516, + "learning_rate": 2.367551760725677e-06, + "loss": 0.8709, "step": 27635 }, { - "epoch": 0.7842224744608399, + "epoch": 0.783133554365383, "grad_norm": 0.0, - "learning_rate": 2.34428381500198e-06, - "loss": 0.9759, + "learning_rate": 2.3669588006135403e-06, + "loss": 0.8883, "step": 27636 }, { - "epoch": 0.7842508513053349, + "epoch": 0.7831618918076454, "grad_norm": 0.0, - "learning_rate": 2.343692557836951e-06, - "loss": 0.8254, + "learning_rate": 2.366365904797343e-06, + "loss": 0.7403, "step": 27637 }, { - "epoch": 0.7842792281498298, + "epoch": 0.7831902292499079, "grad_norm": 0.0, - "learning_rate": 2.343101365345095e-06, - "loss": 0.8015, + "learning_rate": 2.365773073282082e-06, + "loss": 0.8279, "step": 27638 }, { - "epoch": 0.7843076049943246, + "epoch": 0.7832185666921704, "grad_norm": 0.0, - "learning_rate": 2.3425102375314066e-06, - "loss": 0.7611, + "learning_rate": 2.3651803060727484e-06, + "loss": 0.7922, "step": 27639 }, { - "epoch": 0.7843359818388195, + "epoch": 0.7832469041344329, "grad_norm": 0.0, - "learning_rate": 2.3419191744008794e-06, - "loss": 0.7115, + "learning_rate": 2.3645876031743387e-06, + "loss": 0.7306, "step": 27640 }, { - "epoch": 0.7843643586833144, + "epoch": 0.7832752415766953, "grad_norm": 0.0, - "learning_rate": 2.3413281759585073e-06, - "loss": 0.7456, + "learning_rate": 2.3639949645918415e-06, + "loss": 0.8559, "step": 27641 }, { - "epoch": 0.7843927355278093, + "epoch": 0.7833035790189578, "grad_norm": 0.0, - "learning_rate": 2.3407372422092765e-06, - "loss": 0.8618, + "learning_rate": 2.3634023903302485e-06, + "loss": 0.7919, "step": 27642 }, { - "epoch": 0.7844211123723042, + "epoch": 0.7833319164612202, "grad_norm": 0.0, - "learning_rate": 2.3401463731581832e-06, - "loss": 0.7267, + "learning_rate": 2.3628098803945576e-06, + "loss": 0.7988, "step": 27643 }, { - "epoch": 0.7844494892167991, + "epoch": 0.7833602539034826, "grad_norm": 0.0, - "learning_rate": 2.339555568810221e-06, - "loss": 0.8365, + "learning_rate": 2.362217434789751e-06, + "loss": 0.7939, "step": 27644 }, { - "epoch": 0.784477866061294, + "epoch": 0.7833885913457451, "grad_norm": 0.0, - "learning_rate": 2.338964829170375e-06, - "loss": 0.8058, + "learning_rate": 2.3616250535208263e-06, + "loss": 0.729, "step": 27645 }, { - "epoch": 0.7845062429057889, + "epoch": 0.7834169287880076, "grad_norm": 0.0, - "learning_rate": 2.3383741542436354e-06, - "loss": 0.8665, + "learning_rate": 2.3610327365927677e-06, + "loss": 0.7434, "step": 27646 }, { - "epoch": 0.7845346197502838, + "epoch": 0.7834452662302701, "grad_norm": 0.0, - "learning_rate": 2.3377835440349985e-06, - "loss": 0.8464, + "learning_rate": 2.3604404840105665e-06, + "loss": 0.7939, "step": 27647 }, { - "epoch": 0.7845629965947787, + "epoch": 0.7834736036725325, "grad_norm": 0.0, - "learning_rate": 2.3371929985494445e-06, - "loss": 0.8506, + "learning_rate": 2.3598482957792146e-06, + "loss": 0.8377, "step": 27648 }, { - "epoch": 0.7845913734392735, + "epoch": 0.783501941114795, "grad_norm": 0.0, - "learning_rate": 2.336602517791966e-06, - "loss": 0.8405, + "learning_rate": 2.3592561719036956e-06, + "loss": 0.8018, "step": 27649 }, { - "epoch": 0.7846197502837684, + "epoch": 0.7835302785570575, "grad_norm": 0.0, - "learning_rate": 2.336012101767554e-06, - "loss": 0.7728, + "learning_rate": 2.3586641123889984e-06, + "loss": 0.8894, "step": 27650 }, { - "epoch": 0.7846481271282634, + "epoch": 0.7835586159993199, "grad_norm": 0.0, - "learning_rate": 2.3354217504811893e-06, - "loss": 0.9036, + "learning_rate": 2.358072117240112e-06, + "loss": 0.8306, "step": 27651 }, { - "epoch": 0.7846765039727582, + "epoch": 0.7835869534415824, "grad_norm": 0.0, - "learning_rate": 2.334831463937861e-06, - "loss": 0.78, + "learning_rate": 2.357480186462021e-06, + "loss": 0.705, "step": 27652 }, { - "epoch": 0.7847048808172531, + "epoch": 0.7836152908838449, "grad_norm": 0.0, - "learning_rate": 2.3342412421425585e-06, - "loss": 0.85, + "learning_rate": 2.356888320059716e-06, + "loss": 0.8229, "step": 27653 }, { - "epoch": 0.7847332576617481, + "epoch": 0.7836436283261072, "grad_norm": 0.0, - "learning_rate": 2.3336510851002604e-06, - "loss": 0.9516, + "learning_rate": 2.3562965180381746e-06, + "loss": 0.838, "step": 27654 }, { - "epoch": 0.7847616345062429, + "epoch": 0.7836719657683697, "grad_norm": 0.0, - "learning_rate": 2.333060992815961e-06, - "loss": 0.8115, + "learning_rate": 2.355704780402387e-06, + "loss": 0.7015, "step": 27655 }, { - "epoch": 0.7847900113507378, + "epoch": 0.7837003032106322, "grad_norm": 0.0, - "learning_rate": 2.3324709652946376e-06, - "loss": 0.8081, + "learning_rate": 2.3551131071573397e-06, + "loss": 0.7659, "step": 27656 }, { - "epoch": 0.7848183881952326, + "epoch": 0.7837286406528947, "grad_norm": 0.0, - "learning_rate": 2.3318810025412765e-06, - "loss": 0.7123, + "learning_rate": 2.354521498308011e-06, + "loss": 0.8265, "step": 27657 }, { - "epoch": 0.7848467650397276, + "epoch": 0.7837569780951571, "grad_norm": 0.0, - "learning_rate": 2.3312911045608645e-06, - "loss": 0.7263, + "learning_rate": 2.3539299538593883e-06, + "loss": 0.783, "step": 27658 }, { - "epoch": 0.7848751418842225, + "epoch": 0.7837853155374196, "grad_norm": 0.0, - "learning_rate": 2.3307012713583776e-06, - "loss": 0.9203, + "learning_rate": 2.353338473816451e-06, + "loss": 0.7982, "step": 27659 }, { - "epoch": 0.7849035187287173, + "epoch": 0.7838136529796821, "grad_norm": 0.0, - "learning_rate": 2.330111502938802e-06, - "loss": 0.8109, + "learning_rate": 2.3527470581841837e-06, + "loss": 0.9108, "step": 27660 }, { - "epoch": 0.7849318955732123, + "epoch": 0.7838419904219445, "grad_norm": 0.0, - "learning_rate": 2.329521799307123e-06, - "loss": 0.8094, + "learning_rate": 2.3521557069675693e-06, + "loss": 0.9449, "step": 27661 }, { - "epoch": 0.7849602724177072, + "epoch": 0.783870327864207, "grad_norm": 0.0, - "learning_rate": 2.3289321604683135e-06, - "loss": 0.9184, + "learning_rate": 2.3515644201715858e-06, + "loss": 0.8467, "step": 27662 }, { - "epoch": 0.784988649262202, + "epoch": 0.7838986653064695, "grad_norm": 0.0, - "learning_rate": 2.3283425864273602e-06, - "loss": 0.7679, + "learning_rate": 2.350973197801214e-06, + "loss": 0.777, "step": 27663 }, { - "epoch": 0.7850170261066969, + "epoch": 0.783927002748732, "grad_norm": 0.0, - "learning_rate": 2.3277530771892453e-06, - "loss": 0.8082, + "learning_rate": 2.3503820398614365e-06, + "loss": 0.7768, "step": 27664 }, { - "epoch": 0.7850454029511919, + "epoch": 0.7839553401909943, "grad_norm": 0.0, - "learning_rate": 2.327163632758941e-06, - "loss": 0.794, + "learning_rate": 2.349790946357231e-06, + "loss": 0.8707, "step": 27665 }, { - "epoch": 0.7850737797956867, + "epoch": 0.7839836776332568, "grad_norm": 0.0, - "learning_rate": 2.3265742531414316e-06, - "loss": 0.8514, + "learning_rate": 2.3491999172935807e-06, + "loss": 0.7836, "step": 27666 }, { - "epoch": 0.7851021566401816, + "epoch": 0.7840120150755193, "grad_norm": 0.0, - "learning_rate": 2.3259849383416965e-06, - "loss": 0.9447, + "learning_rate": 2.3486089526754572e-06, + "loss": 0.7652, "step": 27667 }, { - "epoch": 0.7851305334846765, + "epoch": 0.7840403525177817, "grad_norm": 0.0, - "learning_rate": 2.325395688364709e-06, - "loss": 0.8928, + "learning_rate": 2.348018052507842e-06, + "loss": 0.8693, "step": 27668 }, { - "epoch": 0.7851589103291714, + "epoch": 0.7840686899600442, "grad_norm": 0.0, - "learning_rate": 2.324806503215449e-06, - "loss": 0.771, + "learning_rate": 2.3474272167957144e-06, + "loss": 0.8177, "step": 27669 }, { - "epoch": 0.7851872871736663, + "epoch": 0.7840970274023067, "grad_norm": 0.0, - "learning_rate": 2.3242173828988932e-06, - "loss": 0.8211, + "learning_rate": 2.3468364455440472e-06, + "loss": 0.7978, "step": 27670 }, { - "epoch": 0.7852156640181612, + "epoch": 0.7841253648445692, "grad_norm": 0.0, - "learning_rate": 2.3236283274200187e-06, - "loss": 0.7669, + "learning_rate": 2.3462457387578185e-06, + "loss": 0.8986, "step": 27671 }, { - "epoch": 0.7852440408626561, + "epoch": 0.7841537022868316, "grad_norm": 0.0, - "learning_rate": 2.323039336783803e-06, - "loss": 0.7382, + "learning_rate": 2.3456550964420068e-06, + "loss": 0.8312, "step": 27672 }, { - "epoch": 0.785272417707151, + "epoch": 0.7841820397290941, "grad_norm": 0.0, - "learning_rate": 2.3224504109952172e-06, - "loss": 0.7458, + "learning_rate": 2.3450645186015806e-06, + "loss": 0.8106, "step": 27673 }, { - "epoch": 0.7853007945516458, + "epoch": 0.7842103771713566, "grad_norm": 0.0, - "learning_rate": 2.321861550059238e-06, - "loss": 0.7977, + "learning_rate": 2.3444740052415228e-06, + "loss": 0.768, "step": 27674 }, { - "epoch": 0.7853291713961408, + "epoch": 0.7842387146136189, "grad_norm": 0.0, - "learning_rate": 2.321272753980841e-06, - "loss": 0.7846, + "learning_rate": 2.3438835563667993e-06, + "loss": 0.7443, "step": 27675 }, { - "epoch": 0.7853575482406356, + "epoch": 0.7842670520558814, "grad_norm": 0.0, - "learning_rate": 2.3206840227649965e-06, - "loss": 0.8246, + "learning_rate": 2.343293171982388e-06, + "loss": 0.6809, "step": 27676 }, { - "epoch": 0.7853859250851305, + "epoch": 0.7842953894981439, "grad_norm": 0.0, - "learning_rate": 2.3200953564166785e-06, - "loss": 0.8015, + "learning_rate": 2.3427028520932628e-06, + "loss": 0.8743, "step": 27677 }, { - "epoch": 0.7854143019296255, + "epoch": 0.7843237269404063, "grad_norm": 0.0, - "learning_rate": 2.3195067549408636e-06, - "loss": 0.8185, + "learning_rate": 2.3421125967043934e-06, + "loss": 0.8554, "step": 27678 }, { - "epoch": 0.7854426787741203, + "epoch": 0.7843520643826688, "grad_norm": 0.0, - "learning_rate": 2.3189182183425174e-06, - "loss": 0.8904, + "learning_rate": 2.3415224058207565e-06, + "loss": 0.806, "step": 27679 }, { - "epoch": 0.7854710556186152, + "epoch": 0.7843804018249313, "grad_norm": 0.0, - "learning_rate": 2.3183297466266142e-06, - "loss": 0.8295, + "learning_rate": 2.340932279447318e-06, + "loss": 0.7967, "step": 27680 }, { - "epoch": 0.78549943246311, + "epoch": 0.7844087392671938, "grad_norm": 0.0, - "learning_rate": 2.3177413397981286e-06, - "loss": 0.831, + "learning_rate": 2.3403422175890513e-06, + "loss": 0.8356, "step": 27681 }, { - "epoch": 0.785527809307605, + "epoch": 0.7844370767094562, "grad_norm": 0.0, - "learning_rate": 2.3171529978620234e-06, - "loss": 0.7941, + "learning_rate": 2.3397522202509284e-06, + "loss": 0.8753, "step": 27682 }, { - "epoch": 0.7855561861520999, + "epoch": 0.7844654141517187, "grad_norm": 0.0, - "learning_rate": 2.3165647208232723e-06, - "loss": 0.7909, + "learning_rate": 2.339162287437914e-06, + "loss": 0.8019, "step": 27683 }, { - "epoch": 0.7855845629965947, + "epoch": 0.7844937515939812, "grad_norm": 0.0, - "learning_rate": 2.315976508686848e-06, - "loss": 0.9534, + "learning_rate": 2.3385724191549807e-06, + "loss": 0.8907, "step": 27684 }, { - "epoch": 0.7856129398410897, + "epoch": 0.7845220890362435, "grad_norm": 0.0, - "learning_rate": 2.315388361457709e-06, - "loss": 0.809, + "learning_rate": 2.3379826154071006e-06, + "loss": 0.787, "step": 27685 }, { - "epoch": 0.7856413166855846, + "epoch": 0.784550426478506, "grad_norm": 0.0, - "learning_rate": 2.3148002791408363e-06, - "loss": 0.7557, + "learning_rate": 2.3373928761992347e-06, + "loss": 0.8446, "step": 27686 }, { - "epoch": 0.7856696935300794, + "epoch": 0.7845787639207685, "grad_norm": 0.0, - "learning_rate": 2.314212261741188e-06, - "loss": 0.7527, + "learning_rate": 2.3368032015363564e-06, + "loss": 0.736, "step": 27687 }, { - "epoch": 0.7856980703745744, + "epoch": 0.784607101363031, "grad_norm": 0.0, - "learning_rate": 2.3136243092637345e-06, - "loss": 0.8401, + "learning_rate": 2.3362135914234284e-06, + "loss": 0.7096, "step": 27688 }, { - "epoch": 0.7857264472190693, + "epoch": 0.7846354388052934, "grad_norm": 0.0, - "learning_rate": 2.3130364217134436e-06, - "loss": 0.8043, + "learning_rate": 2.335624045865419e-06, + "loss": 0.8513, "step": 27689 }, { - "epoch": 0.7857548240635641, + "epoch": 0.7846637762475559, "grad_norm": 0.0, - "learning_rate": 2.3124485990952784e-06, - "loss": 0.8286, + "learning_rate": 2.3350345648672945e-06, + "loss": 0.8098, "step": 27690 }, { - "epoch": 0.785783200908059, + "epoch": 0.7846921136898184, "grad_norm": 0.0, - "learning_rate": 2.311860841414204e-06, - "loss": 0.6899, + "learning_rate": 2.334445148434019e-06, + "loss": 0.7664, "step": 27691 }, { - "epoch": 0.785811577752554, + "epoch": 0.7847204511320808, "grad_norm": 0.0, - "learning_rate": 2.3112731486751905e-06, - "loss": 0.8272, + "learning_rate": 2.33385579657056e-06, + "loss": 0.8127, "step": 27692 }, { - "epoch": 0.7858399545970488, + "epoch": 0.7847487885743433, "grad_norm": 0.0, - "learning_rate": 2.310685520883196e-06, - "loss": 0.7574, + "learning_rate": 2.333266509281883e-06, + "loss": 0.7906, "step": 27693 }, { - "epoch": 0.7858683314415437, + "epoch": 0.7847771260166058, "grad_norm": 0.0, - "learning_rate": 2.310097958043185e-06, - "loss": 0.8363, + "learning_rate": 2.332677286572945e-06, + "loss": 0.8631, "step": 27694 }, { - "epoch": 0.7858967082860386, + "epoch": 0.7848054634588683, "grad_norm": 0.0, - "learning_rate": 2.309510460160126e-06, - "loss": 0.8835, + "learning_rate": 2.332088128448717e-06, + "loss": 0.8105, "step": 27695 }, { - "epoch": 0.7859250851305335, + "epoch": 0.7848338009011306, "grad_norm": 0.0, - "learning_rate": 2.308923027238975e-06, - "loss": 0.9025, + "learning_rate": 2.331499034914156e-06, + "loss": 0.7371, "step": 27696 }, { - "epoch": 0.7859534619750284, + "epoch": 0.7848621383433931, "grad_norm": 0.0, - "learning_rate": 2.3083356592846974e-06, - "loss": 0.7514, + "learning_rate": 2.330910005974226e-06, + "loss": 0.7923, "step": 27697 }, { - "epoch": 0.7859818388195232, + "epoch": 0.7848904757856556, "grad_norm": 0.0, - "learning_rate": 2.307748356302256e-06, - "loss": 0.9778, + "learning_rate": 2.330321041633892e-06, + "loss": 0.7711, "step": 27698 }, { - "epoch": 0.7860102156640182, + "epoch": 0.784918813227918, "grad_norm": 0.0, - "learning_rate": 2.307161118296608e-06, - "loss": 0.8808, + "learning_rate": 2.3297321418981077e-06, + "loss": 0.8709, "step": 27699 }, { - "epoch": 0.786038592508513, + "epoch": 0.7849471506701805, "grad_norm": 0.0, - "learning_rate": 2.306573945272713e-06, - "loss": 0.8946, + "learning_rate": 2.3291433067718385e-06, + "loss": 0.7805, "step": 27700 }, { - "epoch": 0.7860669693530079, + "epoch": 0.784975488112443, "grad_norm": 0.0, - "learning_rate": 2.3059868372355387e-06, - "loss": 0.872, + "learning_rate": 2.328554536260047e-06, + "loss": 0.8282, "step": 27701 }, { - "epoch": 0.7860953461975029, + "epoch": 0.7850038255547054, "grad_norm": 0.0, - "learning_rate": 2.3053997941900376e-06, - "loss": 0.8522, + "learning_rate": 2.3279658303676868e-06, + "loss": 0.9275, "step": 27702 }, { - "epoch": 0.7861237230419977, + "epoch": 0.7850321629969679, "grad_norm": 0.0, - "learning_rate": 2.3048128161411697e-06, - "loss": 0.724, + "learning_rate": 2.3273771890997187e-06, + "loss": 0.787, "step": 27703 }, { - "epoch": 0.7861520998864926, + "epoch": 0.7850605004392304, "grad_norm": 0.0, - "learning_rate": 2.304225903093896e-06, - "loss": 0.8282, + "learning_rate": 2.3267886124611015e-06, + "loss": 0.6927, "step": 27704 }, { - "epoch": 0.7861804767309876, + "epoch": 0.7850888378814929, "grad_norm": 0.0, - "learning_rate": 2.3036390550531705e-06, - "loss": 0.8484, + "learning_rate": 2.3262001004567936e-06, + "loss": 0.8659, "step": 27705 }, { - "epoch": 0.7862088535754824, + "epoch": 0.7851171753237552, "grad_norm": 0.0, - "learning_rate": 2.3030522720239546e-06, - "loss": 0.7764, + "learning_rate": 2.325611653091755e-06, + "loss": 0.793, "step": 27706 }, { - "epoch": 0.7862372304199773, + "epoch": 0.7851455127660177, "grad_norm": 0.0, - "learning_rate": 2.3024655540111984e-06, - "loss": 0.8518, + "learning_rate": 2.3250232703709353e-06, + "loss": 0.7717, "step": 27707 }, { - "epoch": 0.7862656072644721, + "epoch": 0.7851738502082802, "grad_norm": 0.0, - "learning_rate": 2.301878901019863e-06, - "loss": 0.756, + "learning_rate": 2.324434952299298e-06, + "loss": 0.8262, "step": 27708 }, { - "epoch": 0.7862939841089671, + "epoch": 0.7852021876505426, "grad_norm": 0.0, - "learning_rate": 2.3012923130549037e-06, - "loss": 0.8118, + "learning_rate": 2.3238466988817934e-06, + "loss": 0.7922, "step": 27709 }, { - "epoch": 0.786322360953462, + "epoch": 0.7852305250928051, "grad_norm": 0.0, - "learning_rate": 2.3007057901212726e-06, - "loss": 0.881, + "learning_rate": 2.3232585101233775e-06, + "loss": 0.8392, "step": 27710 }, { - "epoch": 0.7863507377979568, + "epoch": 0.7852588625350676, "grad_norm": 0.0, - "learning_rate": 2.3001193322239255e-06, - "loss": 0.8183, + "learning_rate": 2.322670386029009e-06, + "loss": 0.822, "step": 27711 }, { - "epoch": 0.7863791146424518, + "epoch": 0.7852871999773301, "grad_norm": 0.0, - "learning_rate": 2.29953293936782e-06, - "loss": 0.6979, + "learning_rate": 2.322082326603636e-06, + "loss": 0.6776, "step": 27712 }, { - "epoch": 0.7864074914869467, + "epoch": 0.7853155374195925, "grad_norm": 0.0, - "learning_rate": 2.298946611557903e-06, - "loss": 0.9088, + "learning_rate": 2.3214943318522143e-06, + "loss": 0.7844, "step": 27713 }, { - "epoch": 0.7864358683314415, + "epoch": 0.785343874861855, "grad_norm": 0.0, - "learning_rate": 2.298360348799129e-06, - "loss": 0.8038, + "learning_rate": 2.3209064017797014e-06, + "loss": 0.7267, "step": 27714 }, { - "epoch": 0.7864642451759364, + "epoch": 0.7853722123041175, "grad_norm": 0.0, - "learning_rate": 2.2977741510964523e-06, - "loss": 0.7709, + "learning_rate": 2.3203185363910408e-06, + "loss": 0.8209, "step": 27715 }, { - "epoch": 0.7864926220204314, + "epoch": 0.7854005497463799, "grad_norm": 0.0, - "learning_rate": 2.2971880184548234e-06, - "loss": 0.8907, + "learning_rate": 2.31973073569119e-06, + "loss": 0.8119, "step": 27716 }, { - "epoch": 0.7865209988649262, + "epoch": 0.7854288871886423, "grad_norm": 0.0, - "learning_rate": 2.2966019508791938e-06, - "loss": 0.8371, + "learning_rate": 2.319142999685099e-06, + "loss": 0.81, "step": 27717 }, { - "epoch": 0.7865493757094211, + "epoch": 0.7854572246309048, "grad_norm": 0.0, - "learning_rate": 2.296015948374516e-06, - "loss": 0.7518, + "learning_rate": 2.3185553283777185e-06, + "loss": 0.7736, "step": 27718 }, { - "epoch": 0.786577752553916, + "epoch": 0.7854855620731672, "grad_norm": 0.0, - "learning_rate": 2.2954300109457363e-06, - "loss": 0.8381, + "learning_rate": 2.3179677217740015e-06, + "loss": 0.7933, "step": 27719 }, { - "epoch": 0.7866061293984109, + "epoch": 0.7855138995154297, "grad_norm": 0.0, - "learning_rate": 2.294844138597804e-06, - "loss": 0.7982, + "learning_rate": 2.317380179878893e-06, + "loss": 0.8244, "step": 27720 }, { - "epoch": 0.7866345062429058, + "epoch": 0.7855422369576922, "grad_norm": 0.0, - "learning_rate": 2.2942583313356736e-06, - "loss": 0.772, + "learning_rate": 2.3167927026973436e-06, + "loss": 0.7208, "step": 27721 }, { - "epoch": 0.7866628830874007, + "epoch": 0.7855705743999547, "grad_norm": 0.0, - "learning_rate": 2.2936725891642864e-06, - "loss": 0.7742, + "learning_rate": 2.3162052902343044e-06, + "loss": 0.8596, "step": 27722 }, { - "epoch": 0.7866912599318956, + "epoch": 0.7855989118422171, "grad_norm": 0.0, - "learning_rate": 2.2930869120885925e-06, - "loss": 0.7008, + "learning_rate": 2.31561794249472e-06, + "loss": 0.8975, "step": 27723 }, { - "epoch": 0.7867196367763905, + "epoch": 0.7856272492844796, "grad_norm": 0.0, - "learning_rate": 2.292501300113543e-06, - "loss": 0.7397, + "learning_rate": 2.3150306594835413e-06, + "loss": 0.7167, "step": 27724 }, { - "epoch": 0.7867480136208853, + "epoch": 0.7856555867267421, "grad_norm": 0.0, - "learning_rate": 2.2919157532440796e-06, - "loss": 0.8661, + "learning_rate": 2.3144434412057106e-06, + "loss": 0.8966, "step": 27725 }, { - "epoch": 0.7867763904653803, + "epoch": 0.7856839241690045, "grad_norm": 0.0, - "learning_rate": 2.2913302714851493e-06, - "loss": 0.7402, + "learning_rate": 2.3138562876661765e-06, + "loss": 0.7815, "step": 27726 }, { - "epoch": 0.7868047673098751, + "epoch": 0.785712261611267, "grad_norm": 0.0, - "learning_rate": 2.2907448548417024e-06, - "loss": 0.8121, + "learning_rate": 2.3132691988698874e-06, + "loss": 0.6661, "step": 27727 }, { - "epoch": 0.78683314415437, + "epoch": 0.7857405990535294, "grad_norm": 0.0, - "learning_rate": 2.2901595033186765e-06, - "loss": 0.7519, + "learning_rate": 2.3126821748217844e-06, + "loss": 0.8576, "step": 27728 }, { - "epoch": 0.786861520998865, + "epoch": 0.7857689364957919, "grad_norm": 0.0, - "learning_rate": 2.289574216921021e-06, - "loss": 0.8021, + "learning_rate": 2.312095215526814e-06, + "loss": 0.7391, "step": 27729 }, { - "epoch": 0.7868898978433598, + "epoch": 0.7857972739380543, "grad_norm": 0.0, - "learning_rate": 2.2889889956536803e-06, - "loss": 0.8408, + "learning_rate": 2.3115083209899193e-06, + "loss": 0.7028, "step": 27730 }, { - "epoch": 0.7869182746878547, + "epoch": 0.7858256113803168, "grad_norm": 0.0, - "learning_rate": 2.2884038395215915e-06, - "loss": 0.7938, + "learning_rate": 2.3109214912160462e-06, + "loss": 0.7176, "step": 27731 }, { - "epoch": 0.7869466515323496, + "epoch": 0.7858539488225793, "grad_norm": 0.0, - "learning_rate": 2.2878187485297075e-06, - "loss": 0.8346, + "learning_rate": 2.3103347262101392e-06, + "loss": 0.7652, "step": 27732 }, { - "epoch": 0.7869750283768445, + "epoch": 0.7858822862648417, "grad_norm": 0.0, - "learning_rate": 2.2872337226829623e-06, - "loss": 0.7786, + "learning_rate": 2.3097480259771352e-06, + "loss": 0.7875, "step": 27733 }, { - "epoch": 0.7870034052213394, + "epoch": 0.7859106237071042, "grad_norm": 0.0, - "learning_rate": 2.2866487619863e-06, - "loss": 0.8314, + "learning_rate": 2.309161390521979e-06, + "loss": 0.718, "step": 27734 }, { - "epoch": 0.7870317820658342, + "epoch": 0.7859389611493667, "grad_norm": 0.0, - "learning_rate": 2.2860638664446665e-06, - "loss": 0.8943, + "learning_rate": 2.308574819849616e-06, + "loss": 0.9418, "step": 27735 }, { - "epoch": 0.7870601589103292, + "epoch": 0.7859672985916292, "grad_norm": 0.0, - "learning_rate": 2.285479036062994e-06, - "loss": 0.8012, + "learning_rate": 2.3079883139649806e-06, + "loss": 0.8501, "step": 27736 }, { - "epoch": 0.7870885357548241, + "epoch": 0.7859956360338916, "grad_norm": 0.0, - "learning_rate": 2.2848942708462275e-06, - "loss": 0.8954, + "learning_rate": 2.3074018728730175e-06, + "loss": 0.8933, "step": 27737 }, { - "epoch": 0.7871169125993189, + "epoch": 0.786023973476154, "grad_norm": 0.0, - "learning_rate": 2.284309570799309e-06, - "loss": 0.7519, + "learning_rate": 2.3068154965786637e-06, + "loss": 0.8646, "step": 27738 }, { - "epoch": 0.7871452894438139, + "epoch": 0.7860523109184165, "grad_norm": 0.0, - "learning_rate": 2.2837249359271718e-06, - "loss": 0.7789, + "learning_rate": 2.306229185086859e-06, + "loss": 0.7315, "step": 27739 }, { - "epoch": 0.7871736662883088, + "epoch": 0.7860806483606789, "grad_norm": 0.0, - "learning_rate": 2.2831403662347563e-06, - "loss": 0.8394, + "learning_rate": 2.3056429384025424e-06, + "loss": 0.8324, "step": 27740 }, { - "epoch": 0.7872020431328036, + "epoch": 0.7861089858029414, "grad_norm": 0.0, - "learning_rate": 2.282555861727004e-06, - "loss": 0.7812, + "learning_rate": 2.305056756530657e-06, + "loss": 0.8242, "step": 27741 }, { - "epoch": 0.7872304199772985, + "epoch": 0.7861373232452039, "grad_norm": 0.0, - "learning_rate": 2.281971422408846e-06, - "loss": 0.9066, + "learning_rate": 2.3044706394761316e-06, + "loss": 0.6463, "step": 27742 }, { - "epoch": 0.7872587968217934, + "epoch": 0.7861656606874663, "grad_norm": 0.0, - "learning_rate": 2.281387048285223e-06, - "loss": 0.8159, + "learning_rate": 2.303884587243909e-06, + "loss": 0.7773, "step": 27743 }, { - "epoch": 0.7872871736662883, + "epoch": 0.7861939981297288, "grad_norm": 0.0, - "learning_rate": 2.2808027393610733e-06, - "loss": 0.8792, + "learning_rate": 2.3032985998389236e-06, + "loss": 0.8306, "step": 27744 }, { - "epoch": 0.7873155505107832, + "epoch": 0.7862223355719913, "grad_norm": 0.0, - "learning_rate": 2.2802184956413277e-06, - "loss": 0.8209, + "learning_rate": 2.3027126772661146e-06, + "loss": 0.8609, "step": 27745 }, { - "epoch": 0.7873439273552781, + "epoch": 0.7862506730142538, "grad_norm": 0.0, - "learning_rate": 2.279634317130922e-06, - "loss": 0.7006, + "learning_rate": 2.3021268195304124e-06, + "loss": 0.8503, "step": 27746 }, { - "epoch": 0.787372304199773, + "epoch": 0.7862790104565162, "grad_norm": 0.0, - "learning_rate": 2.279050203834794e-06, - "loss": 0.8237, + "learning_rate": 2.3015410266367543e-06, + "loss": 0.892, "step": 27747 }, { - "epoch": 0.7874006810442679, + "epoch": 0.7863073478987787, "grad_norm": 0.0, - "learning_rate": 2.2784661557578747e-06, - "loss": 0.7712, + "learning_rate": 2.3009552985900786e-06, + "loss": 0.7899, "step": 27748 }, { - "epoch": 0.7874290578887627, + "epoch": 0.7863356853410411, "grad_norm": 0.0, - "learning_rate": 2.2778821729051017e-06, - "loss": 0.8431, + "learning_rate": 2.300369635395312e-06, + "loss": 0.817, "step": 27749 }, { - "epoch": 0.7874574347332577, + "epoch": 0.7863640227833035, "grad_norm": 0.0, - "learning_rate": 2.277298255281403e-06, - "loss": 0.8457, + "learning_rate": 2.299784037057392e-06, + "loss": 0.729, "step": 27750 }, { - "epoch": 0.7874858115777525, + "epoch": 0.786392360225566, "grad_norm": 0.0, - "learning_rate": 2.276714402891713e-06, - "loss": 0.8241, + "learning_rate": 2.2991985035812523e-06, + "loss": 0.8152, "step": 27751 }, { - "epoch": 0.7875141884222474, + "epoch": 0.7864206976678285, "grad_norm": 0.0, - "learning_rate": 2.2761306157409657e-06, - "loss": 0.9194, + "learning_rate": 2.2986130349718203e-06, + "loss": 0.7179, "step": 27752 }, { - "epoch": 0.7875425652667424, + "epoch": 0.786449035110091, "grad_norm": 0.0, - "learning_rate": 2.2755468938340884e-06, - "loss": 0.8593, + "learning_rate": 2.298027631234031e-06, + "loss": 0.8984, "step": 27753 }, { - "epoch": 0.7875709421112372, + "epoch": 0.7864773725523534, "grad_norm": 0.0, - "learning_rate": 2.274963237176013e-06, - "loss": 0.7116, + "learning_rate": 2.2974422923728155e-06, + "loss": 0.6956, "step": 27754 }, { - "epoch": 0.7875993189557321, + "epoch": 0.7865057099946159, "grad_norm": 0.0, - "learning_rate": 2.2743796457716736e-06, - "loss": 0.813, + "learning_rate": 2.2968570183931026e-06, + "loss": 0.7459, "step": 27755 }, { - "epoch": 0.7876276958002271, + "epoch": 0.7865340474368784, "grad_norm": 0.0, - "learning_rate": 2.273796119625994e-06, - "loss": 0.7659, + "learning_rate": 2.296271809299828e-06, + "loss": 0.7996, "step": 27756 }, { - "epoch": 0.7876560726447219, + "epoch": 0.7865623848791408, "grad_norm": 0.0, - "learning_rate": 2.273212658743905e-06, - "loss": 0.8613, + "learning_rate": 2.2956866650979125e-06, + "loss": 0.821, "step": 27757 }, { - "epoch": 0.7876844494892168, + "epoch": 0.7865907223214033, "grad_norm": 0.0, - "learning_rate": 2.27262926313034e-06, - "loss": 0.8402, + "learning_rate": 2.2951015857922896e-06, + "loss": 0.8072, "step": 27758 }, { - "epoch": 0.7877128263337116, + "epoch": 0.7866190597636658, "grad_norm": 0.0, - "learning_rate": 2.27204593279022e-06, - "loss": 0.8266, + "learning_rate": 2.29451657138789e-06, + "loss": 0.8392, "step": 27759 }, { - "epoch": 0.7877412031782066, + "epoch": 0.7866473972059282, "grad_norm": 0.0, - "learning_rate": 2.2714626677284746e-06, - "loss": 0.785, + "learning_rate": 2.2939316218896357e-06, + "loss": 0.8399, "step": 27760 }, { - "epoch": 0.7877695800227015, + "epoch": 0.7866757346481906, "grad_norm": 0.0, - "learning_rate": 2.2708794679500325e-06, - "loss": 0.7951, + "learning_rate": 2.29334673730246e-06, + "loss": 0.7421, "step": 27761 }, { - "epoch": 0.7877979568671963, + "epoch": 0.7867040720904531, "grad_norm": 0.0, - "learning_rate": 2.2702963334598184e-06, - "loss": 0.7919, + "learning_rate": 2.292761917631283e-06, + "loss": 0.8859, "step": 27762 }, { - "epoch": 0.7878263337116913, + "epoch": 0.7867324095327156, "grad_norm": 0.0, - "learning_rate": 2.269713264262762e-06, - "loss": 0.9326, + "learning_rate": 2.292177162881035e-06, + "loss": 0.7326, "step": 27763 }, { - "epoch": 0.7878547105561862, + "epoch": 0.786760746974978, "grad_norm": 0.0, - "learning_rate": 2.269130260363781e-06, - "loss": 0.756, + "learning_rate": 2.291592473056644e-06, + "loss": 0.8481, "step": 27764 }, { - "epoch": 0.787883087400681, + "epoch": 0.7867890844172405, "grad_norm": 0.0, - "learning_rate": 2.2685473217678057e-06, - "loss": 0.839, + "learning_rate": 2.2910078481630295e-06, + "loss": 0.8109, "step": 27765 }, { - "epoch": 0.7879114642451759, + "epoch": 0.786817421859503, "grad_norm": 0.0, - "learning_rate": 2.2679644484797602e-06, - "loss": 0.8671, + "learning_rate": 2.2904232882051182e-06, + "loss": 0.7915, "step": 27766 }, { - "epoch": 0.7879398410896709, + "epoch": 0.7868457593017654, "grad_norm": 0.0, - "learning_rate": 2.2673816405045644e-06, - "loss": 0.7638, + "learning_rate": 2.289838793187834e-06, + "loss": 0.8456, "step": 27767 }, { - "epoch": 0.7879682179341657, + "epoch": 0.7868740967440279, "grad_norm": 0.0, - "learning_rate": 2.2667988978471432e-06, - "loss": 0.7873, + "learning_rate": 2.289254363116101e-06, + "loss": 0.8554, "step": 27768 }, { - "epoch": 0.7879965947786606, + "epoch": 0.7869024341862904, "grad_norm": 0.0, - "learning_rate": 2.2662162205124227e-06, - "loss": 0.7356, + "learning_rate": 2.2886699979948445e-06, + "loss": 0.8539, "step": 27769 }, { - "epoch": 0.7880249716231555, + "epoch": 0.7869307716285528, "grad_norm": 0.0, - "learning_rate": 2.2656336085053186e-06, - "loss": 0.8861, + "learning_rate": 2.2880856978289813e-06, + "loss": 0.8998, "step": 27770 }, { - "epoch": 0.7880533484676504, + "epoch": 0.7869591090708152, "grad_norm": 0.0, - "learning_rate": 2.2650510618307555e-06, - "loss": 0.8637, + "learning_rate": 2.287501462623436e-06, + "loss": 0.7713, "step": 27771 }, { - "epoch": 0.7880817253121453, + "epoch": 0.7869874465130777, "grad_norm": 0.0, - "learning_rate": 2.264468580493655e-06, - "loss": 0.8405, + "learning_rate": 2.286917292383133e-06, + "loss": 0.7118, "step": 27772 }, { - "epoch": 0.7881101021566402, + "epoch": 0.7870157839553402, "grad_norm": 0.0, - "learning_rate": 2.2638861644989353e-06, - "loss": 0.7426, + "learning_rate": 2.286333187112987e-06, + "loss": 0.7968, "step": 27773 }, { - "epoch": 0.7881384790011351, + "epoch": 0.7870441213976026, "grad_norm": 0.0, - "learning_rate": 2.263303813851515e-06, - "loss": 0.7687, + "learning_rate": 2.285749146817924e-06, + "loss": 0.7195, "step": 27774 }, { - "epoch": 0.78816685584563, + "epoch": 0.7870724588398651, "grad_norm": 0.0, - "learning_rate": 2.26272152855632e-06, - "loss": 0.9241, + "learning_rate": 2.2851651715028565e-06, + "loss": 0.8528, "step": 27775 }, { - "epoch": 0.7881952326901248, + "epoch": 0.7871007962821276, "grad_norm": 0.0, - "learning_rate": 2.2621393086182597e-06, - "loss": 0.7956, + "learning_rate": 2.284581261172709e-06, + "loss": 0.8428, "step": 27776 }, { - "epoch": 0.7882236095346198, + "epoch": 0.7871291337243901, "grad_norm": 0.0, - "learning_rate": 2.2615571540422567e-06, - "loss": 0.8425, + "learning_rate": 2.2839974158324006e-06, + "loss": 0.8504, "step": 27777 }, { - "epoch": 0.7882519863791146, + "epoch": 0.7871574711666525, "grad_norm": 0.0, - "learning_rate": 2.2609750648332286e-06, - "loss": 0.8536, + "learning_rate": 2.2834136354868454e-06, + "loss": 0.8683, "step": 27778 }, { - "epoch": 0.7882803632236095, + "epoch": 0.787185808608915, "grad_norm": 0.0, - "learning_rate": 2.2603930409960927e-06, - "loss": 0.8129, + "learning_rate": 2.282829920140962e-06, + "loss": 0.8557, "step": 27779 }, { - "epoch": 0.7883087400681045, + "epoch": 0.7872141460511775, "grad_norm": 0.0, - "learning_rate": 2.2598110825357667e-06, - "loss": 0.7499, + "learning_rate": 2.2822462697996672e-06, + "loss": 0.7354, "step": 27780 }, { - "epoch": 0.7883371169125993, + "epoch": 0.7872424834934398, "grad_norm": 0.0, - "learning_rate": 2.2592291894571617e-06, - "loss": 0.7492, + "learning_rate": 2.2816626844678792e-06, + "loss": 0.807, "step": 27781 }, { - "epoch": 0.7883654937570942, + "epoch": 0.7872708209357023, "grad_norm": 0.0, - "learning_rate": 2.258647361765196e-06, - "loss": 0.8865, + "learning_rate": 2.2810791641505147e-06, + "loss": 0.8528, "step": 27782 }, { - "epoch": 0.788393870601589, + "epoch": 0.7872991583779648, "grad_norm": 0.0, - "learning_rate": 2.258065599464786e-06, - "loss": 0.7702, + "learning_rate": 2.2804957088524837e-06, + "loss": 0.8797, "step": 27783 }, { - "epoch": 0.788422247446084, + "epoch": 0.7873274958202273, "grad_norm": 0.0, - "learning_rate": 2.257483902560842e-06, - "loss": 0.9125, + "learning_rate": 2.2799123185787043e-06, + "loss": 0.8013, "step": 27784 }, { - "epoch": 0.7884506242905789, + "epoch": 0.7873558332624897, "grad_norm": 0.0, - "learning_rate": 2.256902271058279e-06, - "loss": 0.8761, + "learning_rate": 2.279328993334092e-06, + "loss": 0.808, "step": 27785 }, { - "epoch": 0.7884790011350737, + "epoch": 0.7873841707047522, "grad_norm": 0.0, - "learning_rate": 2.256320704962014e-06, - "loss": 0.9013, + "learning_rate": 2.278745733123557e-06, + "loss": 0.7298, "step": 27786 }, { - "epoch": 0.7885073779795687, + "epoch": 0.7874125081470147, "grad_norm": 0.0, - "learning_rate": 2.255739204276952e-06, - "loss": 0.9221, + "learning_rate": 2.2781625379520146e-06, + "loss": 0.7767, "step": 27787 }, { - "epoch": 0.7885357548240636, + "epoch": 0.7874408455892771, "grad_norm": 0.0, - "learning_rate": 2.255157769008011e-06, - "loss": 0.8748, + "learning_rate": 2.2775794078243786e-06, + "loss": 0.7676, "step": 27788 }, { - "epoch": 0.7885641316685584, + "epoch": 0.7874691830315396, "grad_norm": 0.0, - "learning_rate": 2.2545763991601024e-06, - "loss": 0.7984, + "learning_rate": 2.2769963427455555e-06, + "loss": 0.8017, "step": 27789 }, { - "epoch": 0.7885925085130534, + "epoch": 0.7874975204738021, "grad_norm": 0.0, - "learning_rate": 2.2539950947381318e-06, - "loss": 0.7965, + "learning_rate": 2.2764133427204628e-06, + "loss": 0.8115, "step": 27790 }, { - "epoch": 0.7886208853575483, + "epoch": 0.7875258579160644, "grad_norm": 0.0, - "learning_rate": 2.253413855747013e-06, - "loss": 0.8426, + "learning_rate": 2.275830407754006e-06, + "loss": 0.8305, "step": 27791 }, { - "epoch": 0.7886492622020431, + "epoch": 0.7875541953583269, "grad_norm": 0.0, - "learning_rate": 2.2528326821916556e-06, - "loss": 0.9473, + "learning_rate": 2.2752475378510985e-06, + "loss": 0.8613, "step": 27792 }, { - "epoch": 0.788677639046538, + "epoch": 0.7875825328005894, "grad_norm": 0.0, - "learning_rate": 2.252251574076969e-06, - "loss": 0.7852, + "learning_rate": 2.274664733016649e-06, + "loss": 0.8097, "step": 27793 }, { - "epoch": 0.788706015891033, + "epoch": 0.7876108702428519, "grad_norm": 0.0, - "learning_rate": 2.2516705314078645e-06, - "loss": 0.7882, + "learning_rate": 2.274081993255568e-06, + "loss": 0.7579, "step": 27794 }, { - "epoch": 0.7887343927355278, + "epoch": 0.7876392076851143, "grad_norm": 0.0, - "learning_rate": 2.2510895541892453e-06, - "loss": 0.7788, + "learning_rate": 2.273499318572766e-06, + "loss": 0.6508, "step": 27795 }, { - "epoch": 0.7887627695800227, + "epoch": 0.7876675451273768, "grad_norm": 0.0, - "learning_rate": 2.2505086424260204e-06, - "loss": 0.8721, + "learning_rate": 2.272916708973145e-06, + "loss": 0.8677, "step": 27796 }, { - "epoch": 0.7887911464245176, + "epoch": 0.7876958825696393, "grad_norm": 0.0, - "learning_rate": 2.249927796123099e-06, - "loss": 0.814, + "learning_rate": 2.272334164461616e-06, + "loss": 0.8095, "step": 27797 }, { - "epoch": 0.7888195232690125, + "epoch": 0.7877242200119017, "grad_norm": 0.0, - "learning_rate": 2.2493470152853846e-06, - "loss": 0.864, + "learning_rate": 2.2717516850430885e-06, + "loss": 0.8111, "step": 27798 }, { - "epoch": 0.7888479001135074, + "epoch": 0.7877525574541642, "grad_norm": 0.0, - "learning_rate": 2.248766299917784e-06, - "loss": 0.7656, + "learning_rate": 2.271169270722464e-06, + "loss": 0.8914, "step": 27799 }, { - "epoch": 0.7888762769580022, + "epoch": 0.7877808948964267, "grad_norm": 0.0, - "learning_rate": 2.2481856500252053e-06, - "loss": 0.8556, + "learning_rate": 2.2705869215046506e-06, + "loss": 0.7763, "step": 27800 }, { - "epoch": 0.7889046538024972, + "epoch": 0.7878092323386892, "grad_norm": 0.0, - "learning_rate": 2.247605065612548e-06, - "loss": 0.8001, + "learning_rate": 2.2700046373945573e-06, + "loss": 0.9079, "step": 27801 }, { - "epoch": 0.788933030646992, + "epoch": 0.7878375697809515, "grad_norm": 0.0, - "learning_rate": 2.2470245466847196e-06, - "loss": 0.8455, + "learning_rate": 2.2694224183970815e-06, + "loss": 0.8079, "step": 27802 }, { - "epoch": 0.7889614074914869, + "epoch": 0.787865907223214, "grad_norm": 0.0, - "learning_rate": 2.2464440932466267e-06, - "loss": 0.7606, + "learning_rate": 2.268840264517135e-06, + "loss": 0.8582, "step": 27803 }, { - "epoch": 0.7889897843359819, + "epoch": 0.7878942446654765, "grad_norm": 0.0, - "learning_rate": 2.2458637053031653e-06, - "loss": 0.7805, + "learning_rate": 2.2682581757596144e-06, + "loss": 0.8407, "step": 27804 }, { - "epoch": 0.7890181611804767, + "epoch": 0.7879225821077389, "grad_norm": 0.0, - "learning_rate": 2.2452833828592425e-06, - "loss": 0.898, + "learning_rate": 2.2676761521294264e-06, + "loss": 0.851, "step": 27805 }, { - "epoch": 0.7890465380249716, + "epoch": 0.7879509195500014, "grad_norm": 0.0, - "learning_rate": 2.2447031259197614e-06, - "loss": 0.8407, + "learning_rate": 2.267094193631474e-06, + "loss": 0.8746, "step": 27806 }, { - "epoch": 0.7890749148694666, + "epoch": 0.7879792569922639, "grad_norm": 0.0, - "learning_rate": 2.2441229344896175e-06, - "loss": 0.9026, + "learning_rate": 2.266512300270658e-06, + "loss": 0.7885, "step": 27807 }, { - "epoch": 0.7891032917139614, + "epoch": 0.7880075944345264, "grad_norm": 0.0, - "learning_rate": 2.2435428085737175e-06, - "loss": 0.8928, + "learning_rate": 2.2659304720518814e-06, + "loss": 0.7238, "step": 27808 }, { - "epoch": 0.7891316685584563, + "epoch": 0.7880359318767888, "grad_norm": 0.0, - "learning_rate": 2.2429627481769633e-06, - "loss": 0.7342, + "learning_rate": 2.265348708980046e-06, + "loss": 0.8072, "step": 27809 }, { - "epoch": 0.7891600454029511, + "epoch": 0.7880642693190513, "grad_norm": 0.0, - "learning_rate": 2.2423827533042486e-06, - "loss": 0.7493, + "learning_rate": 2.2647670110600493e-06, + "loss": 0.8365, "step": 27810 }, { - "epoch": 0.7891884222474461, + "epoch": 0.7880926067613138, "grad_norm": 0.0, - "learning_rate": 2.241802823960475e-06, - "loss": 0.9021, + "learning_rate": 2.264185378296795e-06, + "loss": 0.8773, "step": 27811 }, { - "epoch": 0.789216799091941, + "epoch": 0.7881209442035761, "grad_norm": 0.0, - "learning_rate": 2.2412229601505454e-06, - "loss": 0.8722, + "learning_rate": 2.2636038106951763e-06, + "loss": 0.6928, "step": 27812 }, { - "epoch": 0.7892451759364358, + "epoch": 0.7881492816458386, "grad_norm": 0.0, - "learning_rate": 2.24064316187935e-06, - "loss": 0.75, + "learning_rate": 2.2630223082600965e-06, + "loss": 0.9026, "step": 27813 }, { - "epoch": 0.7892735527809308, + "epoch": 0.7881776190881011, "grad_norm": 0.0, - "learning_rate": 2.2400634291517952e-06, - "loss": 0.7601, + "learning_rate": 2.262440870996455e-06, + "loss": 0.8035, "step": 27814 }, { - "epoch": 0.7893019296254257, + "epoch": 0.7882059565303635, "grad_norm": 0.0, - "learning_rate": 2.2394837619727692e-06, - "loss": 0.8101, + "learning_rate": 2.2618594989091447e-06, + "loss": 0.843, "step": 27815 }, { - "epoch": 0.7893303064699205, + "epoch": 0.788234293972626, "grad_norm": 0.0, - "learning_rate": 2.238904160347174e-06, - "loss": 0.8352, + "learning_rate": 2.2612781920030658e-06, + "loss": 0.7859, "step": 27816 }, { - "epoch": 0.7893586833144154, + "epoch": 0.7882626314148885, "grad_norm": 0.0, - "learning_rate": 2.238324624279906e-06, - "loss": 0.8302, + "learning_rate": 2.2606969502831165e-06, + "loss": 0.9004, "step": 27817 }, { - "epoch": 0.7893870601589104, + "epoch": 0.788290968857151, "grad_norm": 0.0, - "learning_rate": 2.2377451537758567e-06, - "loss": 0.7888, + "learning_rate": 2.260115773754188e-06, + "loss": 0.8237, "step": 27818 }, { - "epoch": 0.7894154370034052, + "epoch": 0.7883193062994134, "grad_norm": 0.0, - "learning_rate": 2.2371657488399223e-06, - "loss": 0.7602, + "learning_rate": 2.259534662421179e-06, + "loss": 0.8349, "step": 27819 }, { - "epoch": 0.7894438138479001, + "epoch": 0.7883476437416759, "grad_norm": 0.0, - "learning_rate": 2.2365864094770007e-06, - "loss": 0.8483, + "learning_rate": 2.2589536162889824e-06, + "loss": 0.8462, "step": 27820 }, { - "epoch": 0.789472190692395, + "epoch": 0.7883759811839384, "grad_norm": 0.0, - "learning_rate": 2.236007135691981e-06, - "loss": 0.8457, + "learning_rate": 2.2583726353624956e-06, + "loss": 0.9072, "step": 27821 }, { - "epoch": 0.7895005675368899, + "epoch": 0.7884043186262008, "grad_norm": 0.0, - "learning_rate": 2.2354279274897574e-06, - "loss": 0.7822, + "learning_rate": 2.257791719646614e-06, + "loss": 0.8437, "step": 27822 }, { - "epoch": 0.7895289443813848, + "epoch": 0.7884326560684632, "grad_norm": 0.0, - "learning_rate": 2.234848784875222e-06, - "loss": 0.8543, + "learning_rate": 2.2572108691462234e-06, + "loss": 0.8454, "step": 27823 }, { - "epoch": 0.7895573212258796, + "epoch": 0.7884609935107257, "grad_norm": 0.0, - "learning_rate": 2.2342697078532695e-06, - "loss": 0.6795, + "learning_rate": 2.2566300838662247e-06, + "loss": 0.8285, "step": 27824 }, { - "epoch": 0.7895856980703746, + "epoch": 0.7884893309529882, "grad_norm": 0.0, - "learning_rate": 2.2336906964287897e-06, - "loss": 0.7699, + "learning_rate": 2.2560493638115034e-06, + "loss": 0.9254, "step": 27825 }, { - "epoch": 0.7896140749148695, + "epoch": 0.7885176683952506, "grad_norm": 0.0, - "learning_rate": 2.233111750606676e-06, - "loss": 0.8643, + "learning_rate": 2.2554687089869533e-06, + "loss": 0.8199, "step": 27826 }, { - "epoch": 0.7896424517593643, + "epoch": 0.7885460058375131, "grad_norm": 0.0, - "learning_rate": 2.2325328703918135e-06, - "loss": 0.7017, + "learning_rate": 2.2548881193974694e-06, + "loss": 0.9413, "step": 27827 }, { - "epoch": 0.7896708286038593, + "epoch": 0.7885743432797756, "grad_norm": 0.0, - "learning_rate": 2.231954055789095e-06, - "loss": 0.864, + "learning_rate": 2.2543075950479356e-06, + "loss": 0.7939, "step": 27828 }, { - "epoch": 0.7896992054483541, + "epoch": 0.788602680722038, "grad_norm": 0.0, - "learning_rate": 2.231375306803414e-06, - "loss": 0.7552, + "learning_rate": 2.2537271359432457e-06, + "loss": 0.7579, "step": 27829 }, { - "epoch": 0.789727582292849, + "epoch": 0.7886310181643005, "grad_norm": 0.0, - "learning_rate": 2.2307966234396504e-06, - "loss": 0.9255, + "learning_rate": 2.2531467420882913e-06, + "loss": 0.8076, "step": 27830 }, { - "epoch": 0.789755959137344, + "epoch": 0.788659355606563, "grad_norm": 0.0, - "learning_rate": 2.230218005702698e-06, - "loss": 0.7682, + "learning_rate": 2.2525664134879565e-06, + "loss": 0.7624, "step": 27831 }, { - "epoch": 0.7897843359818388, + "epoch": 0.7886876930488255, "grad_norm": 0.0, - "learning_rate": 2.2296394535974455e-06, - "loss": 0.7489, + "learning_rate": 2.2519861501471306e-06, + "loss": 0.834, "step": 27832 }, { - "epoch": 0.7898127128263337, + "epoch": 0.7887160304910878, "grad_norm": 0.0, - "learning_rate": 2.2290609671287756e-06, - "loss": 0.8476, + "learning_rate": 2.2514059520707033e-06, + "loss": 0.8336, "step": 27833 }, { - "epoch": 0.7898410896708286, + "epoch": 0.7887443679333503, "grad_norm": 0.0, - "learning_rate": 2.2284825463015767e-06, - "loss": 0.704, + "learning_rate": 2.2508258192635614e-06, + "loss": 0.8461, "step": 27834 }, { - "epoch": 0.7898694665153235, + "epoch": 0.7887727053756128, "grad_norm": 0.0, - "learning_rate": 2.2279041911207377e-06, - "loss": 0.7423, + "learning_rate": 2.250245751730593e-06, + "loss": 0.8293, "step": 27835 }, { - "epoch": 0.7898978433598184, + "epoch": 0.7888010428178752, "grad_norm": 0.0, - "learning_rate": 2.2273259015911397e-06, - "loss": 0.7254, + "learning_rate": 2.2496657494766805e-06, + "loss": 0.7599, "step": 27836 }, { - "epoch": 0.7899262202043132, + "epoch": 0.7888293802601377, "grad_norm": 0.0, - "learning_rate": 2.2267476777176676e-06, - "loss": 0.8473, + "learning_rate": 2.2490858125067103e-06, + "loss": 0.8573, "step": 27837 }, { - "epoch": 0.7899545970488082, + "epoch": 0.7888577177024002, "grad_norm": 0.0, - "learning_rate": 2.226169519505209e-06, - "loss": 0.7789, + "learning_rate": 2.2485059408255726e-06, + "loss": 0.9152, "step": 27838 }, { - "epoch": 0.7899829738933031, + "epoch": 0.7888860551446626, "grad_norm": 0.0, - "learning_rate": 2.2255914269586456e-06, - "loss": 0.8038, + "learning_rate": 2.247926134438144e-06, + "loss": 0.7666, "step": 27839 }, { - "epoch": 0.7900113507377979, + "epoch": 0.7889143925869251, "grad_norm": 0.0, - "learning_rate": 2.225013400082863e-06, - "loss": 0.8704, + "learning_rate": 2.2473463933493157e-06, + "loss": 0.9085, "step": 27840 }, { - "epoch": 0.7900397275822928, + "epoch": 0.7889427300291876, "grad_norm": 0.0, - "learning_rate": 2.224435438882738e-06, - "loss": 0.8531, + "learning_rate": 2.246766717563964e-06, + "loss": 0.8306, "step": 27841 }, { - "epoch": 0.7900681044267878, + "epoch": 0.7889710674714501, "grad_norm": 0.0, - "learning_rate": 2.223857543363158e-06, - "loss": 0.872, + "learning_rate": 2.246187107086977e-06, + "loss": 0.9183, "step": 27842 }, { - "epoch": 0.7900964812712826, + "epoch": 0.7889994049137125, "grad_norm": 0.0, - "learning_rate": 2.223279713529005e-06, - "loss": 0.7883, + "learning_rate": 2.2456075619232366e-06, + "loss": 0.8498, "step": 27843 }, { - "epoch": 0.7901248581157775, + "epoch": 0.789027742355975, "grad_norm": 0.0, - "learning_rate": 2.2227019493851553e-06, - "loss": 0.8343, + "learning_rate": 2.2450280820776205e-06, + "loss": 0.7895, "step": 27844 }, { - "epoch": 0.7901532349602725, + "epoch": 0.7890560797982374, "grad_norm": 0.0, - "learning_rate": 2.2221242509364914e-06, - "loss": 0.774, + "learning_rate": 2.2444486675550125e-06, + "loss": 0.8776, "step": 27845 }, { - "epoch": 0.7901816118047673, + "epoch": 0.7890844172404998, "grad_norm": 0.0, - "learning_rate": 2.2215466181878975e-06, - "loss": 0.8678, + "learning_rate": 2.2438693183602945e-06, + "loss": 0.8309, "step": 27846 }, { - "epoch": 0.7902099886492622, + "epoch": 0.7891127546827623, "grad_norm": 0.0, - "learning_rate": 2.220969051144245e-06, - "loss": 0.644, + "learning_rate": 2.2432900344983445e-06, + "loss": 0.8368, "step": 27847 }, { - "epoch": 0.7902383654937571, + "epoch": 0.7891410921250248, "grad_norm": 0.0, - "learning_rate": 2.220391549810418e-06, - "loss": 0.8313, + "learning_rate": 2.242710815974045e-06, + "loss": 0.8792, "step": 27848 }, { - "epoch": 0.790266742338252, + "epoch": 0.7891694295672873, "grad_norm": 0.0, - "learning_rate": 2.2198141141912944e-06, - "loss": 0.8638, + "learning_rate": 2.242131662792272e-06, + "loss": 0.9001, "step": 27849 }, { - "epoch": 0.7902951191827469, + "epoch": 0.7891977670095497, "grad_norm": 0.0, - "learning_rate": 2.2192367442917484e-06, - "loss": 0.864, + "learning_rate": 2.2415525749579036e-06, + "loss": 0.9062, "step": 27850 }, { - "epoch": 0.7903234960272417, + "epoch": 0.7892261044518122, "grad_norm": 0.0, - "learning_rate": 2.2186594401166604e-06, - "loss": 0.8184, + "learning_rate": 2.240973552475821e-06, + "loss": 0.9113, "step": 27851 }, { - "epoch": 0.7903518728717367, + "epoch": 0.7892544418940747, "grad_norm": 0.0, - "learning_rate": 2.2180822016709082e-06, - "loss": 0.8188, + "learning_rate": 2.2403945953508975e-06, + "loss": 0.8533, "step": 27852 }, { - "epoch": 0.7903802497162316, + "epoch": 0.7892827793363371, "grad_norm": 0.0, - "learning_rate": 2.2175050289593592e-06, - "loss": 0.8783, + "learning_rate": 2.2398157035880154e-06, + "loss": 0.8185, "step": 27853 }, { - "epoch": 0.7904086265607264, + "epoch": 0.7893111167785996, "grad_norm": 0.0, - "learning_rate": 2.2169279219869012e-06, - "loss": 0.8094, + "learning_rate": 2.2392368771920435e-06, + "loss": 0.869, "step": 27854 }, { - "epoch": 0.7904370034052214, + "epoch": 0.789339454220862, "grad_norm": 0.0, - "learning_rate": 2.2163508807584e-06, - "loss": 0.8708, + "learning_rate": 2.238658116167861e-06, + "loss": 0.8642, "step": 27855 }, { - "epoch": 0.7904653802497162, + "epoch": 0.7893677916631245, "grad_norm": 0.0, - "learning_rate": 2.215773905278733e-06, - "loss": 0.698, + "learning_rate": 2.2380794205203473e-06, + "loss": 0.7887, "step": 27856 }, { - "epoch": 0.7904937570942111, + "epoch": 0.7893961291053869, "grad_norm": 0.0, - "learning_rate": 2.2151969955527764e-06, - "loss": 0.7957, + "learning_rate": 2.237500790254369e-06, + "loss": 0.7719, "step": 27857 }, { - "epoch": 0.790522133938706, + "epoch": 0.7894244665476494, "grad_norm": 0.0, - "learning_rate": 2.214620151585397e-06, - "loss": 0.8725, + "learning_rate": 2.2369222253748046e-06, + "loss": 0.8893, "step": 27858 }, { - "epoch": 0.7905505107832009, + "epoch": 0.7894528039899119, "grad_norm": 0.0, - "learning_rate": 2.2140433733814727e-06, - "loss": 0.8019, + "learning_rate": 2.2363437258865273e-06, + "loss": 0.866, "step": 27859 }, { - "epoch": 0.7905788876276958, + "epoch": 0.7894811414321743, "grad_norm": 0.0, - "learning_rate": 2.2134666609458764e-06, - "loss": 0.8522, + "learning_rate": 2.2357652917944083e-06, + "loss": 0.8224, "step": 27860 }, { - "epoch": 0.7906072644721907, + "epoch": 0.7895094788744368, "grad_norm": 0.0, - "learning_rate": 2.212890014283474e-06, - "loss": 0.7788, + "learning_rate": 2.2351869231033265e-06, + "loss": 0.7415, "step": 27861 }, { - "epoch": 0.7906356413166856, + "epoch": 0.7895378163166993, "grad_norm": 0.0, - "learning_rate": 2.2123134333991405e-06, - "loss": 0.7804, + "learning_rate": 2.234608619818144e-06, + "loss": 0.7966, "step": 27862 }, { - "epoch": 0.7906640181611805, + "epoch": 0.7895661537589617, "grad_norm": 0.0, - "learning_rate": 2.2117369182977476e-06, - "loss": 0.7431, + "learning_rate": 2.234030381943737e-06, + "loss": 0.9384, "step": 27863 }, { - "epoch": 0.7906923950056753, + "epoch": 0.7895944912012242, "grad_norm": 0.0, - "learning_rate": 2.2111604689841602e-06, - "loss": 0.7342, + "learning_rate": 2.2334522094849798e-06, + "loss": 0.7777, "step": 27864 }, { - "epoch": 0.7907207718501703, + "epoch": 0.7896228286434867, "grad_norm": 0.0, - "learning_rate": 2.210584085463251e-06, - "loss": 0.8754, + "learning_rate": 2.232874102446735e-06, + "loss": 0.9527, "step": 27865 }, { - "epoch": 0.7907491486946652, + "epoch": 0.7896511660857491, "grad_norm": 0.0, - "learning_rate": 2.21000776773989e-06, - "loss": 0.806, + "learning_rate": 2.2322960608338763e-06, + "loss": 0.7546, "step": 27866 }, { - "epoch": 0.79077752553916, + "epoch": 0.7896795035280115, "grad_norm": 0.0, - "learning_rate": 2.2094315158189416e-06, - "loss": 0.8622, + "learning_rate": 2.2317180846512744e-06, + "loss": 0.7455, "step": 27867 }, { - "epoch": 0.7908059023836549, + "epoch": 0.789707840970274, "grad_norm": 0.0, - "learning_rate": 2.208855329705275e-06, - "loss": 0.8165, + "learning_rate": 2.231140173903793e-06, + "loss": 0.7388, "step": 27868 }, { - "epoch": 0.7908342792281499, + "epoch": 0.7897361784125365, "grad_norm": 0.0, - "learning_rate": 2.2082792094037585e-06, - "loss": 0.8546, + "learning_rate": 2.230562328596306e-06, + "loss": 0.8696, "step": 27869 }, { - "epoch": 0.7908626560726447, + "epoch": 0.7897645158547989, "grad_norm": 0.0, - "learning_rate": 2.207703154919257e-06, - "loss": 0.7801, + "learning_rate": 2.2299845487336746e-06, + "loss": 0.7573, "step": 27870 }, { - "epoch": 0.7908910329171396, + "epoch": 0.7897928532970614, "grad_norm": 0.0, - "learning_rate": 2.2071271662566396e-06, - "loss": 0.7816, + "learning_rate": 2.22940683432077e-06, + "loss": 0.8859, "step": 27871 }, { - "epoch": 0.7909194097616346, + "epoch": 0.7898211907393239, "grad_norm": 0.0, - "learning_rate": 2.2065512434207683e-06, - "loss": 0.8424, + "learning_rate": 2.2288291853624556e-06, + "loss": 0.7762, "step": 27872 }, { - "epoch": 0.7909477866061294, + "epoch": 0.7898495281815864, "grad_norm": 0.0, - "learning_rate": 2.205975386416507e-06, - "loss": 0.7793, + "learning_rate": 2.2282516018635992e-06, + "loss": 0.8092, "step": 27873 }, { - "epoch": 0.7909761634506243, + "epoch": 0.7898778656238488, "grad_norm": 0.0, - "learning_rate": 2.205399595248726e-06, - "loss": 0.8113, + "learning_rate": 2.2276740838290678e-06, + "loss": 0.8544, "step": 27874 }, { - "epoch": 0.7910045402951191, + "epoch": 0.7899062030661113, "grad_norm": 0.0, - "learning_rate": 2.2048238699222814e-06, - "loss": 0.7973, + "learning_rate": 2.227096631263722e-06, + "loss": 0.7191, "step": 27875 }, { - "epoch": 0.7910329171396141, + "epoch": 0.7899345405083738, "grad_norm": 0.0, - "learning_rate": 2.2042482104420404e-06, - "loss": 0.7593, + "learning_rate": 2.2265192441724272e-06, + "loss": 0.8557, "step": 27876 }, { - "epoch": 0.791061293984109, + "epoch": 0.7899628779506361, "grad_norm": 0.0, - "learning_rate": 2.2036726168128687e-06, - "loss": 0.7167, + "learning_rate": 2.2259419225600497e-06, + "loss": 0.7273, "step": 27877 }, { - "epoch": 0.7910896708286038, + "epoch": 0.7899912153928986, "grad_norm": 0.0, - "learning_rate": 2.203097089039621e-06, - "loss": 0.8314, + "learning_rate": 2.2253646664314488e-06, + "loss": 0.9011, "step": 27878 }, { - "epoch": 0.7911180476730988, + "epoch": 0.7900195528351611, "grad_norm": 0.0, - "learning_rate": 2.2025216271271632e-06, - "loss": 0.7186, + "learning_rate": 2.2247874757914865e-06, + "loss": 0.8932, "step": 27879 }, { - "epoch": 0.7911464245175936, + "epoch": 0.7900478902774236, "grad_norm": 0.0, - "learning_rate": 2.2019462310803586e-06, - "loss": 0.7602, + "learning_rate": 2.2242103506450307e-06, + "loss": 0.8802, "step": 27880 }, { - "epoch": 0.7911748013620885, + "epoch": 0.790076227719686, "grad_norm": 0.0, - "learning_rate": 2.2013709009040617e-06, - "loss": 0.7185, + "learning_rate": 2.2236332909969362e-06, + "loss": 0.8753, "step": 27881 }, { - "epoch": 0.7912031782065835, + "epoch": 0.7901045651619485, "grad_norm": 0.0, - "learning_rate": 2.2007956366031358e-06, - "loss": 0.7602, + "learning_rate": 2.2230562968520675e-06, + "loss": 0.781, "step": 27882 }, { - "epoch": 0.7912315550510783, + "epoch": 0.790132902604211, "grad_norm": 0.0, - "learning_rate": 2.200220438182442e-06, - "loss": 0.6907, + "learning_rate": 2.222479368215281e-06, + "loss": 0.7885, "step": 27883 }, { - "epoch": 0.7912599318955732, + "epoch": 0.7901612400464734, "grad_norm": 0.0, - "learning_rate": 2.1996453056468316e-06, - "loss": 0.752, + "learning_rate": 2.22190250509144e-06, + "loss": 0.7742, "step": 27884 }, { - "epoch": 0.7912883087400681, + "epoch": 0.7901895774887359, "grad_norm": 0.0, - "learning_rate": 2.199070239001173e-06, - "loss": 0.8402, + "learning_rate": 2.221325707485401e-06, + "loss": 0.7878, "step": 27885 }, { - "epoch": 0.791316685584563, + "epoch": 0.7902179149309984, "grad_norm": 0.0, - "learning_rate": 2.1984952382503165e-06, - "loss": 0.8687, + "learning_rate": 2.220748975402025e-06, + "loss": 0.8234, "step": 27886 }, { - "epoch": 0.7913450624290579, + "epoch": 0.7902462523732607, "grad_norm": 0.0, - "learning_rate": 2.1979203033991205e-06, - "loss": 0.8783, + "learning_rate": 2.2201723088461693e-06, + "loss": 0.8032, "step": 27887 }, { - "epoch": 0.7913734392735527, + "epoch": 0.7902745898155232, "grad_norm": 0.0, - "learning_rate": 2.197345434452446e-06, - "loss": 0.8953, + "learning_rate": 2.2195957078226935e-06, + "loss": 0.7184, "step": 27888 }, { - "epoch": 0.7914018161180477, + "epoch": 0.7903029272577857, "grad_norm": 0.0, - "learning_rate": 2.196770631415143e-06, - "loss": 0.7441, + "learning_rate": 2.2190191723364495e-06, + "loss": 0.9767, "step": 27889 }, { - "epoch": 0.7914301929625426, + "epoch": 0.7903312647000482, "grad_norm": 0.0, - "learning_rate": 2.196195894292068e-06, - "loss": 0.9143, + "learning_rate": 2.2184427023922994e-06, + "loss": 0.6841, "step": 27890 }, { - "epoch": 0.7914585698070374, + "epoch": 0.7903596021423106, "grad_norm": 0.0, - "learning_rate": 2.1956212230880815e-06, - "loss": 0.8761, + "learning_rate": 2.217866297995094e-06, + "loss": 0.7687, "step": 27891 }, { - "epoch": 0.7914869466515323, + "epoch": 0.7903879395845731, "grad_norm": 0.0, - "learning_rate": 2.19504661780803e-06, - "loss": 0.7731, + "learning_rate": 2.21728995914969e-06, + "loss": 0.9306, "step": 27892 }, { - "epoch": 0.7915153234960273, + "epoch": 0.7904162770268356, "grad_norm": 0.0, - "learning_rate": 2.1944720784567698e-06, - "loss": 0.9314, + "learning_rate": 2.216713685860945e-06, + "loss": 0.7979, "step": 27893 }, { - "epoch": 0.7915437003405221, + "epoch": 0.790444614469098, "grad_norm": 0.0, - "learning_rate": 2.193897605039158e-06, - "loss": 0.8818, + "learning_rate": 2.2161374781337084e-06, + "loss": 0.8131, "step": 27894 }, { - "epoch": 0.791572077185017, + "epoch": 0.7904729519113605, "grad_norm": 0.0, - "learning_rate": 2.1933231975600412e-06, - "loss": 0.8509, + "learning_rate": 2.2155613359728356e-06, + "loss": 0.7997, "step": 27895 }, { - "epoch": 0.791600454029512, + "epoch": 0.790501289353623, "grad_norm": 0.0, - "learning_rate": 2.192748856024275e-06, - "loss": 0.7965, + "learning_rate": 2.2149852593831845e-06, + "loss": 0.8899, "step": 27896 }, { - "epoch": 0.7916288308740068, + "epoch": 0.7905296267958855, "grad_norm": 0.0, - "learning_rate": 2.1921745804367125e-06, - "loss": 0.8141, + "learning_rate": 2.2144092483696e-06, + "loss": 0.809, "step": 27897 }, { - "epoch": 0.7916572077185017, + "epoch": 0.7905579642381478, "grad_norm": 0.0, - "learning_rate": 2.1916003708021993e-06, - "loss": 0.6858, + "learning_rate": 2.2138333029369376e-06, + "loss": 0.8841, "step": 27898 }, { - "epoch": 0.7916855845629966, + "epoch": 0.7905863016804103, "grad_norm": 0.0, - "learning_rate": 2.191026227125589e-06, - "loss": 0.8831, + "learning_rate": 2.2132574230900484e-06, + "loss": 0.7757, "step": 27899 }, { - "epoch": 0.7917139614074915, + "epoch": 0.7906146391226728, "grad_norm": 0.0, - "learning_rate": 2.1904521494117304e-06, - "loss": 0.7945, + "learning_rate": 2.2126816088337834e-06, + "loss": 0.8903, "step": 27900 }, { - "epoch": 0.7917423382519864, + "epoch": 0.7906429765649352, "grad_norm": 0.0, - "learning_rate": 2.189878137665474e-06, - "loss": 0.7465, + "learning_rate": 2.212105860172996e-06, + "loss": 0.8128, "step": 27901 }, { - "epoch": 0.7917707150964812, + "epoch": 0.7906713140071977, "grad_norm": 0.0, - "learning_rate": 2.189304191891671e-06, - "loss": 0.9045, + "learning_rate": 2.2115301771125296e-06, + "loss": 0.7341, "step": 27902 }, { - "epoch": 0.7917990919409762, + "epoch": 0.7906996514494602, "grad_norm": 0.0, - "learning_rate": 2.188730312095165e-06, - "loss": 0.8739, + "learning_rate": 2.21095455965724e-06, + "loss": 0.7405, "step": 27903 }, { - "epoch": 0.7918274687854711, + "epoch": 0.7907279888917227, "grad_norm": 0.0, - "learning_rate": 2.1881564982808033e-06, - "loss": 0.8546, + "learning_rate": 2.2103790078119703e-06, + "loss": 0.7598, "step": 27904 }, { - "epoch": 0.7918558456299659, + "epoch": 0.7907563263339851, "grad_norm": 0.0, - "learning_rate": 2.1875827504534387e-06, - "loss": 0.8496, + "learning_rate": 2.2098035215815694e-06, + "loss": 0.7874, "step": 27905 }, { - "epoch": 0.7918842224744609, + "epoch": 0.7907846637762476, "grad_norm": 0.0, - "learning_rate": 2.1870090686179112e-06, - "loss": 0.7439, + "learning_rate": 2.2092281009708906e-06, + "loss": 0.8682, "step": 27906 }, { - "epoch": 0.7919125993189557, + "epoch": 0.7908130012185101, "grad_norm": 0.0, - "learning_rate": 2.186435452779069e-06, - "loss": 0.7932, + "learning_rate": 2.208652745984773e-06, + "loss": 0.8647, "step": 27907 }, { - "epoch": 0.7919409761634506, + "epoch": 0.7908413386607724, "grad_norm": 0.0, - "learning_rate": 2.1858619029417606e-06, - "loss": 0.7209, + "learning_rate": 2.208077456628066e-06, + "loss": 0.7244, "step": 27908 }, { - "epoch": 0.7919693530079455, + "epoch": 0.7908696761030349, "grad_norm": 0.0, - "learning_rate": 2.1852884191108246e-06, - "loss": 0.8166, + "learning_rate": 2.2075022329056193e-06, + "loss": 0.7907, "step": 27909 }, { - "epoch": 0.7919977298524404, + "epoch": 0.7908980135452974, "grad_norm": 0.0, - "learning_rate": 2.184715001291109e-06, - "loss": 0.8239, + "learning_rate": 2.2069270748222726e-06, + "loss": 0.9384, "step": 27910 }, { - "epoch": 0.7920261066969353, + "epoch": 0.7909263509875598, "grad_norm": 0.0, - "learning_rate": 2.18414164948746e-06, - "loss": 0.807, + "learning_rate": 2.2063519823828727e-06, + "loss": 0.8403, "step": 27911 }, { - "epoch": 0.7920544835414302, + "epoch": 0.7909546884298223, "grad_norm": 0.0, - "learning_rate": 2.1835683637047156e-06, - "loss": 0.7823, + "learning_rate": 2.2057769555922637e-06, + "loss": 0.8554, "step": 27912 }, { - "epoch": 0.7920828603859251, + "epoch": 0.7909830258720848, "grad_norm": 0.0, - "learning_rate": 2.1829951439477193e-06, - "loss": 0.7821, + "learning_rate": 2.2052019944552903e-06, + "loss": 0.7872, "step": 27913 }, { - "epoch": 0.79211123723042, + "epoch": 0.7910113633143473, "grad_norm": 0.0, - "learning_rate": 2.1824219902213183e-06, - "loss": 0.8123, + "learning_rate": 2.2046270989767983e-06, + "loss": 0.8434, "step": 27914 }, { - "epoch": 0.7921396140749148, + "epoch": 0.7910397007566097, "grad_norm": 0.0, - "learning_rate": 2.181848902530345e-06, - "loss": 0.8134, + "learning_rate": 2.204052269161623e-06, + "loss": 0.854, "step": 27915 }, { - "epoch": 0.7921679909194098, + "epoch": 0.7910680381988722, "grad_norm": 0.0, - "learning_rate": 2.181275880879646e-06, - "loss": 0.7359, + "learning_rate": 2.2034775050146107e-06, + "loss": 0.7727, "step": 27916 }, { - "epoch": 0.7921963677639047, + "epoch": 0.7910963756411347, "grad_norm": 0.0, - "learning_rate": 2.1807029252740665e-06, - "loss": 0.7766, + "learning_rate": 2.2029028065406056e-06, + "loss": 0.8009, "step": 27917 }, { - "epoch": 0.7922247446083995, + "epoch": 0.791124713083397, "grad_norm": 0.0, - "learning_rate": 2.1801300357184363e-06, - "loss": 0.7926, + "learning_rate": 2.2023281737444434e-06, + "loss": 0.8385, "step": 27918 }, { - "epoch": 0.7922531214528944, + "epoch": 0.7911530505256595, "grad_norm": 0.0, - "learning_rate": 2.1795572122176003e-06, - "loss": 0.8169, + "learning_rate": 2.2017536066309687e-06, + "loss": 0.806, "step": 27919 }, { - "epoch": 0.7922814982973894, + "epoch": 0.791181387967922, "grad_norm": 0.0, - "learning_rate": 2.1789844547763984e-06, - "loss": 0.8545, + "learning_rate": 2.201179105205016e-06, + "loss": 0.7695, "step": 27920 }, { - "epoch": 0.7923098751418842, + "epoch": 0.7912097254101845, "grad_norm": 0.0, - "learning_rate": 2.1784117633996638e-06, - "loss": 0.8611, + "learning_rate": 2.2006046694714277e-06, + "loss": 0.7815, "step": 27921 }, { - "epoch": 0.7923382519863791, + "epoch": 0.7912380628524469, "grad_norm": 0.0, - "learning_rate": 2.1778391380922394e-06, - "loss": 1.0455, + "learning_rate": 2.2000302994350463e-06, + "loss": 0.8451, "step": 27922 }, { - "epoch": 0.792366628830874, + "epoch": 0.7912664002947094, "grad_norm": 0.0, - "learning_rate": 2.1772665788589566e-06, - "loss": 0.822, + "learning_rate": 2.1994559951007033e-06, + "loss": 0.792, "step": 27923 }, { - "epoch": 0.7923950056753689, + "epoch": 0.7912947377369719, "grad_norm": 0.0, - "learning_rate": 2.176694085704654e-06, - "loss": 0.8355, + "learning_rate": 2.198881756473238e-06, + "loss": 0.7505, "step": 27924 }, { - "epoch": 0.7924233825198638, + "epoch": 0.7913230751792343, "grad_norm": 0.0, - "learning_rate": 2.1761216586341717e-06, - "loss": 0.8295, + "learning_rate": 2.19830758355749e-06, + "loss": 0.7986, "step": 27925 }, { - "epoch": 0.7924517593643586, + "epoch": 0.7913514126214968, "grad_norm": 0.0, - "learning_rate": 2.1755492976523384e-06, - "loss": 0.844, + "learning_rate": 2.1977334763582924e-06, + "loss": 0.8455, "step": 27926 }, { - "epoch": 0.7924801362088536, + "epoch": 0.7913797500637593, "grad_norm": 0.0, - "learning_rate": 2.1749770027639917e-06, - "loss": 0.8235, + "learning_rate": 2.197159434880487e-06, + "loss": 0.8228, "step": 27927 }, { - "epoch": 0.7925085130533485, + "epoch": 0.7914080875060217, "grad_norm": 0.0, - "learning_rate": 2.1744047739739695e-06, - "loss": 0.8059, + "learning_rate": 2.1965854591289025e-06, + "loss": 0.7402, "step": 27928 }, { - "epoch": 0.7925368898978433, + "epoch": 0.7914364249482841, "grad_norm": 0.0, - "learning_rate": 2.173832611287099e-06, - "loss": 0.826, + "learning_rate": 2.1960115491083754e-06, + "loss": 0.802, "step": 27929 }, { - "epoch": 0.7925652667423383, + "epoch": 0.7914647623905466, "grad_norm": 0.0, - "learning_rate": 2.1732605147082133e-06, - "loss": 0.8327, + "learning_rate": 2.1954377048237452e-06, + "loss": 0.8233, "step": 27930 }, { - "epoch": 0.7925936435868332, + "epoch": 0.7914930998328091, "grad_norm": 0.0, - "learning_rate": 2.1726884842421524e-06, - "loss": 0.7752, + "learning_rate": 2.194863926279838e-06, + "loss": 0.9305, "step": 27931 }, { - "epoch": 0.792622020431328, + "epoch": 0.7915214372750715, "grad_norm": 0.0, - "learning_rate": 2.172116519893742e-06, - "loss": 0.897, + "learning_rate": 2.1942902134814924e-06, + "loss": 0.7208, "step": 27932 }, { - "epoch": 0.792650397275823, + "epoch": 0.791549774717334, "grad_norm": 0.0, - "learning_rate": 2.171544621667816e-06, - "loss": 0.812, + "learning_rate": 2.193716566433537e-06, + "loss": 0.7525, "step": 27933 }, { - "epoch": 0.7926787741203178, + "epoch": 0.7915781121595965, "grad_norm": 0.0, - "learning_rate": 2.1709727895692056e-06, - "loss": 0.858, + "learning_rate": 2.193142985140806e-06, + "loss": 0.8333, "step": 27934 }, { - "epoch": 0.7927071509648127, + "epoch": 0.7916064496018589, "grad_norm": 0.0, - "learning_rate": 2.1704010236027385e-06, - "loss": 0.7684, + "learning_rate": 2.1925694696081325e-06, + "loss": 0.8998, "step": 27935 }, { - "epoch": 0.7927355278093076, + "epoch": 0.7916347870441214, "grad_norm": 0.0, - "learning_rate": 2.169829323773246e-06, - "loss": 0.8952, + "learning_rate": 2.1919960198403435e-06, + "loss": 0.9037, "step": 27936 }, { - "epoch": 0.7927639046538025, + "epoch": 0.7916631244863839, "grad_norm": 0.0, - "learning_rate": 2.169257690085559e-06, - "loss": 0.7898, + "learning_rate": 2.191422635842271e-06, + "loss": 0.8275, "step": 27937 }, { - "epoch": 0.7927922814982974, + "epoch": 0.7916914619286464, "grad_norm": 0.0, - "learning_rate": 2.1686861225445024e-06, - "loss": 0.827, + "learning_rate": 2.190849317618745e-06, + "loss": 0.7641, "step": 27938 }, { - "epoch": 0.7928206583427923, + "epoch": 0.7917197993709088, "grad_norm": 0.0, - "learning_rate": 2.1681146211549054e-06, - "loss": 0.8034, + "learning_rate": 2.190276065174596e-06, + "loss": 0.7998, "step": 27939 }, { - "epoch": 0.7928490351872872, + "epoch": 0.7917481368131712, "grad_norm": 0.0, - "learning_rate": 2.167543185921599e-06, - "loss": 0.8806, + "learning_rate": 2.1897028785146534e-06, + "loss": 0.8275, "step": 27940 }, { - "epoch": 0.7928774120317821, + "epoch": 0.7917764742554337, "grad_norm": 0.0, - "learning_rate": 2.166971816849406e-06, - "loss": 0.9624, + "learning_rate": 2.189129757643742e-06, + "loss": 0.8189, "step": 27941 }, { - "epoch": 0.7929057888762769, + "epoch": 0.7918048116976961, "grad_norm": 0.0, - "learning_rate": 2.166400513943152e-06, - "loss": 0.7841, + "learning_rate": 2.188556702566691e-06, + "loss": 0.8196, "step": 27942 }, { - "epoch": 0.7929341657207718, + "epoch": 0.7918331491399586, "grad_norm": 0.0, - "learning_rate": 2.1658292772076695e-06, - "loss": 0.8958, + "learning_rate": 2.1879837132883298e-06, + "loss": 0.9447, "step": 27943 }, { - "epoch": 0.7929625425652668, + "epoch": 0.7918614865822211, "grad_norm": 0.0, - "learning_rate": 2.165258106647776e-06, - "loss": 0.7919, + "learning_rate": 2.18741078981348e-06, + "loss": 0.8427, "step": 27944 }, { - "epoch": 0.7929909194097616, + "epoch": 0.7918898240244836, "grad_norm": 0.0, - "learning_rate": 2.1646870022682997e-06, - "loss": 0.8881, + "learning_rate": 2.186837932146971e-06, + "loss": 0.7379, "step": 27945 }, { - "epoch": 0.7930192962542565, + "epoch": 0.791918161466746, "grad_norm": 0.0, - "learning_rate": 2.1641159640740638e-06, - "loss": 0.7542, + "learning_rate": 2.18626514029363e-06, + "loss": 0.8244, "step": 27946 }, { - "epoch": 0.7930476730987515, + "epoch": 0.7919464989090085, "grad_norm": 0.0, - "learning_rate": 2.1635449920698936e-06, - "loss": 0.8547, + "learning_rate": 2.185692414258276e-06, + "loss": 0.7392, "step": 27947 }, { - "epoch": 0.7930760499432463, + "epoch": 0.791974836351271, "grad_norm": 0.0, - "learning_rate": 2.162974086260613e-06, - "loss": 0.8508, + "learning_rate": 2.1851197540457393e-06, + "loss": 0.8717, "step": 27948 }, { - "epoch": 0.7931044267877412, + "epoch": 0.7920031737935334, "grad_norm": 0.0, - "learning_rate": 2.1624032466510405e-06, - "loss": 0.7796, + "learning_rate": 2.1845471596608382e-06, + "loss": 0.784, "step": 27949 }, { - "epoch": 0.7931328036322361, + "epoch": 0.7920315112357958, "grad_norm": 0.0, - "learning_rate": 2.1618324732459993e-06, - "loss": 0.834, + "learning_rate": 2.1839746311083988e-06, + "loss": 0.9454, "step": 27950 }, { - "epoch": 0.793161180476731, + "epoch": 0.7920598486780583, "grad_norm": 0.0, - "learning_rate": 2.1612617660503154e-06, - "loss": 0.7683, + "learning_rate": 2.183402168393244e-06, + "loss": 0.797, "step": 27951 }, { - "epoch": 0.7931895573212259, + "epoch": 0.7920881861203207, "grad_norm": 0.0, - "learning_rate": 2.160691125068802e-06, - "loss": 0.7383, + "learning_rate": 2.182829771520194e-06, + "loss": 0.7384, "step": 27952 }, { - "epoch": 0.7932179341657207, + "epoch": 0.7921165235625832, "grad_norm": 0.0, - "learning_rate": 2.160120550306284e-06, - "loss": 0.8544, + "learning_rate": 2.182257440494073e-06, + "loss": 0.8344, "step": 27953 }, { - "epoch": 0.7932463110102157, + "epoch": 0.7921448610048457, "grad_norm": 0.0, - "learning_rate": 2.159550041767582e-06, - "loss": 0.874, + "learning_rate": 2.1816851753197023e-06, + "loss": 0.8105, "step": 27954 }, { - "epoch": 0.7932746878547106, + "epoch": 0.7921731984471082, "grad_norm": 0.0, - "learning_rate": 2.1589795994575102e-06, - "loss": 0.8377, + "learning_rate": 2.181112976001899e-06, + "loss": 0.818, "step": 27955 }, { - "epoch": 0.7933030646992054, + "epoch": 0.7922015358893706, "grad_norm": 0.0, - "learning_rate": 2.1584092233808904e-06, - "loss": 0.7005, + "learning_rate": 2.1805408425454865e-06, + "loss": 0.7314, "step": 27956 }, { - "epoch": 0.7933314415437004, + "epoch": 0.7922298733316331, "grad_norm": 0.0, - "learning_rate": 2.157838913542545e-06, - "loss": 0.875, + "learning_rate": 2.17996877495528e-06, + "loss": 0.6973, "step": 27957 }, { - "epoch": 0.7933598183881952, + "epoch": 0.7922582107738956, "grad_norm": 0.0, - "learning_rate": 2.157268669947282e-06, - "loss": 0.8059, + "learning_rate": 2.1793967732360997e-06, + "loss": 0.8205, "step": 27958 }, { - "epoch": 0.7933881952326901, + "epoch": 0.792286548216158, "grad_norm": 0.0, - "learning_rate": 2.1566984925999234e-06, - "loss": 0.8279, + "learning_rate": 2.178824837392768e-06, + "loss": 0.8308, "step": 27959 }, { - "epoch": 0.793416572077185, + "epoch": 0.7923148856584205, "grad_norm": 0.0, - "learning_rate": 2.156128381505289e-06, - "loss": 0.7502, + "learning_rate": 2.1782529674300955e-06, + "loss": 0.8321, "step": 27960 }, { - "epoch": 0.7934449489216799, + "epoch": 0.792343223100683, "grad_norm": 0.0, - "learning_rate": 2.1555583366681843e-06, - "loss": 0.8273, + "learning_rate": 2.177681163352906e-06, + "loss": 0.854, "step": 27961 }, { - "epoch": 0.7934733257661748, + "epoch": 0.7923715605429454, "grad_norm": 0.0, - "learning_rate": 2.154988358093437e-06, - "loss": 0.7992, + "learning_rate": 2.1771094251660096e-06, + "loss": 0.8181, "step": 27962 }, { - "epoch": 0.7935017026106697, + "epoch": 0.7923998979852078, "grad_norm": 0.0, - "learning_rate": 2.1544184457858517e-06, - "loss": 0.8531, + "learning_rate": 2.176537752874226e-06, + "loss": 0.769, "step": 27963 }, { - "epoch": 0.7935300794551646, + "epoch": 0.7924282354274703, "grad_norm": 0.0, - "learning_rate": 2.153848599750248e-06, - "loss": 0.7672, + "learning_rate": 2.17596614648237e-06, + "loss": 0.7024, "step": 27964 }, { - "epoch": 0.7935584562996595, + "epoch": 0.7924565728697328, "grad_norm": 0.0, - "learning_rate": 2.15327881999144e-06, - "loss": 0.8652, + "learning_rate": 2.1753946059952567e-06, + "loss": 0.8717, "step": 27965 }, { - "epoch": 0.7935868331441543, + "epoch": 0.7924849103119952, "grad_norm": 0.0, - "learning_rate": 2.1527091065142358e-06, - "loss": 0.8182, + "learning_rate": 2.1748231314177004e-06, + "loss": 0.7634, "step": 27966 }, { - "epoch": 0.7936152099886493, + "epoch": 0.7925132477542577, "grad_norm": 0.0, - "learning_rate": 2.152139459323451e-06, - "loss": 0.7155, + "learning_rate": 2.1742517227545167e-06, + "loss": 0.746, "step": 27967 }, { - "epoch": 0.7936435868331442, + "epoch": 0.7925415851965202, "grad_norm": 0.0, - "learning_rate": 2.151569878423899e-06, - "loss": 0.8325, + "learning_rate": 2.1736803800105142e-06, + "loss": 0.8543, "step": 27968 }, { - "epoch": 0.793671963677639, + "epoch": 0.7925699226387827, "grad_norm": 0.0, - "learning_rate": 2.151000363820387e-06, - "loss": 0.7749, + "learning_rate": 2.1731091031905118e-06, + "loss": 0.9109, "step": 27969 }, { - "epoch": 0.7937003405221339, + "epoch": 0.7925982600810451, "grad_norm": 0.0, - "learning_rate": 2.150430915517727e-06, - "loss": 0.8856, + "learning_rate": 2.1725378922993133e-06, + "loss": 0.937, "step": 27970 }, { - "epoch": 0.7937287173666289, + "epoch": 0.7926265975233076, "grad_norm": 0.0, - "learning_rate": 2.149861533520734e-06, - "loss": 0.8567, + "learning_rate": 2.171966747341736e-06, + "loss": 0.9096, "step": 27971 }, { - "epoch": 0.7937570942111237, + "epoch": 0.79265493496557, "grad_norm": 0.0, - "learning_rate": 2.1492922178342092e-06, - "loss": 0.8324, + "learning_rate": 2.171395668322592e-06, + "loss": 0.8247, "step": 27972 }, { - "epoch": 0.7937854710556186, + "epoch": 0.7926832724078324, "grad_norm": 0.0, - "learning_rate": 2.148722968462967e-06, - "loss": 0.8601, + "learning_rate": 2.170824655246687e-06, + "loss": 0.7822, "step": 27973 }, { - "epoch": 0.7938138479001136, + "epoch": 0.7927116098500949, "grad_norm": 0.0, - "learning_rate": 2.1481537854118172e-06, - "loss": 0.7646, + "learning_rate": 2.1702537081188336e-06, + "loss": 0.7903, "step": 27974 }, { - "epoch": 0.7938422247446084, + "epoch": 0.7927399472923574, "grad_norm": 0.0, - "learning_rate": 2.147584668685563e-06, - "loss": 0.8418, + "learning_rate": 2.16968282694384e-06, + "loss": 0.6878, "step": 27975 }, { - "epoch": 0.7938706015891033, + "epoch": 0.7927682847346198, "grad_norm": 0.0, - "learning_rate": 2.147015618289013e-06, - "loss": 0.7826, + "learning_rate": 2.169112011726515e-06, + "loss": 0.8662, "step": 27976 }, { - "epoch": 0.7938989784335981, + "epoch": 0.7927966221768823, "grad_norm": 0.0, - "learning_rate": 2.1464466342269763e-06, - "loss": 0.7221, + "learning_rate": 2.1685412624716716e-06, + "loss": 0.8123, "step": 27977 }, { - "epoch": 0.7939273552780931, + "epoch": 0.7928249596191448, "grad_norm": 0.0, - "learning_rate": 2.1458777165042566e-06, - "loss": 0.8259, + "learning_rate": 2.1679705791841097e-06, + "loss": 0.8361, "step": 27978 }, { - "epoch": 0.793955732122588, + "epoch": 0.7928532970614073, "grad_norm": 0.0, - "learning_rate": 2.145308865125665e-06, - "loss": 0.8202, + "learning_rate": 2.1673999618686403e-06, + "loss": 0.8322, "step": 27979 }, { - "epoch": 0.7939841089670828, + "epoch": 0.7928816345036697, "grad_norm": 0.0, - "learning_rate": 2.1447400800959994e-06, - "loss": 0.8408, + "learning_rate": 2.1668294105300723e-06, + "loss": 0.8895, "step": 27980 }, { - "epoch": 0.7940124858115778, + "epoch": 0.7929099719459322, "grad_norm": 0.0, - "learning_rate": 2.1441713614200664e-06, - "loss": 0.7983, + "learning_rate": 2.166258925173206e-06, + "loss": 0.8355, "step": 27981 }, { - "epoch": 0.7940408626560727, + "epoch": 0.7929383093881947, "grad_norm": 0.0, - "learning_rate": 2.1436027091026745e-06, - "loss": 0.83, + "learning_rate": 2.16568850580285e-06, + "loss": 0.865, "step": 27982 }, { - "epoch": 0.7940692395005675, + "epoch": 0.792966646830457, "grad_norm": 0.0, - "learning_rate": 2.143034123148622e-06, - "loss": 0.8505, + "learning_rate": 2.1651181524238117e-06, + "loss": 0.8279, "step": 27983 }, { - "epoch": 0.7940976163450624, + "epoch": 0.7929949842727195, "grad_norm": 0.0, - "learning_rate": 2.1424656035627124e-06, - "loss": 0.8664, + "learning_rate": 2.164547865040889e-06, + "loss": 0.7778, "step": 27984 }, { - "epoch": 0.7941259931895573, + "epoch": 0.793023321714982, "grad_norm": 0.0, - "learning_rate": 2.141897150349752e-06, - "loss": 0.7388, + "learning_rate": 2.1639776436588932e-06, + "loss": 0.8519, "step": 27985 }, { - "epoch": 0.7941543700340522, + "epoch": 0.7930516591572445, "grad_norm": 0.0, - "learning_rate": 2.1413287635145365e-06, - "loss": 0.7571, + "learning_rate": 2.1634074882826206e-06, + "loss": 0.8224, "step": 27986 }, { - "epoch": 0.7941827468785471, + "epoch": 0.7930799965995069, "grad_norm": 0.0, - "learning_rate": 2.14076044306187e-06, - "loss": 0.9072, + "learning_rate": 2.162837398916876e-06, + "loss": 0.9296, "step": 27987 }, { - "epoch": 0.794211123723042, + "epoch": 0.7931083340417694, "grad_norm": 0.0, - "learning_rate": 2.140192188996557e-06, - "loss": 0.7317, + "learning_rate": 2.1622673755664634e-06, + "loss": 0.7418, "step": 27988 }, { - "epoch": 0.7942395005675369, + "epoch": 0.7931366714840319, "grad_norm": 0.0, - "learning_rate": 2.1396240013233903e-06, - "loss": 0.8087, + "learning_rate": 2.1616974182361826e-06, + "loss": 0.7942, "step": 27989 }, { - "epoch": 0.7942678774120318, + "epoch": 0.7931650089262943, "grad_norm": 0.0, - "learning_rate": 2.139055880047174e-06, - "loss": 0.8426, + "learning_rate": 2.161127526930837e-06, + "loss": 0.9018, "step": 27990 }, { - "epoch": 0.7942962542565267, + "epoch": 0.7931933463685568, "grad_norm": 0.0, - "learning_rate": 2.138487825172706e-06, - "loss": 0.7904, + "learning_rate": 2.160557701655224e-06, + "loss": 0.7703, "step": 27991 }, { - "epoch": 0.7943246311010216, + "epoch": 0.7932216838108193, "grad_norm": 0.0, - "learning_rate": 2.1379198367047838e-06, - "loss": 0.7792, + "learning_rate": 2.1599879424141434e-06, + "loss": 0.8326, "step": 27992 }, { - "epoch": 0.7943530079455164, + "epoch": 0.7932500212530817, "grad_norm": 0.0, - "learning_rate": 2.1373519146482092e-06, - "loss": 0.7539, + "learning_rate": 2.1594182492123995e-06, + "loss": 0.7484, "step": 27993 }, { - "epoch": 0.7943813847900113, + "epoch": 0.7932783586953441, "grad_norm": 0.0, - "learning_rate": 2.1367840590077735e-06, - "loss": 0.7743, + "learning_rate": 2.158848622054783e-06, + "loss": 0.8048, "step": 27994 }, { - "epoch": 0.7944097616345063, + "epoch": 0.7933066961376066, "grad_norm": 0.0, - "learning_rate": 2.1362162697882773e-06, - "loss": 0.8275, + "learning_rate": 2.158279060946097e-06, + "loss": 0.8238, "step": 27995 }, { - "epoch": 0.7944381384790011, + "epoch": 0.7933350335798691, "grad_norm": 0.0, - "learning_rate": 2.1356485469945175e-06, - "loss": 0.8074, + "learning_rate": 2.1577095658911417e-06, + "loss": 0.8194, "step": 27996 }, { - "epoch": 0.794466515323496, + "epoch": 0.7933633710221315, "grad_norm": 0.0, - "learning_rate": 2.135080890631286e-06, - "loss": 0.7688, + "learning_rate": 2.1571401368947077e-06, + "loss": 0.7649, "step": 27997 }, { - "epoch": 0.794494892167991, + "epoch": 0.793391708464394, "grad_norm": 0.0, - "learning_rate": 2.134513300703379e-06, - "loss": 0.8053, + "learning_rate": 2.156570773961597e-06, + "loss": 0.7087, "step": 27998 }, { - "epoch": 0.7945232690124858, + "epoch": 0.7934200459066565, "grad_norm": 0.0, - "learning_rate": 2.1339457772155957e-06, - "loss": 0.8484, + "learning_rate": 2.156001477096601e-06, + "loss": 0.7731, "step": 27999 }, { - "epoch": 0.7945516458569807, + "epoch": 0.7934483833489189, "grad_norm": 0.0, - "learning_rate": 2.133378320172722e-06, - "loss": 0.8196, + "learning_rate": 2.1554322463045173e-06, + "loss": 0.8659, "step": 28000 }, { - "epoch": 0.7945800227014755, + "epoch": 0.7934767207911814, "grad_norm": 0.0, - "learning_rate": 2.1328109295795563e-06, - "loss": 0.7378, + "learning_rate": 2.1548630815901407e-06, + "loss": 0.8644, "step": 28001 }, { - "epoch": 0.7946083995459705, + "epoch": 0.7935050582334439, "grad_norm": 0.0, - "learning_rate": 2.1322436054408923e-06, - "loss": 0.8456, + "learning_rate": 2.1542939829582656e-06, + "loss": 0.8676, "step": 28002 }, { - "epoch": 0.7946367763904654, + "epoch": 0.7935333956757064, "grad_norm": 0.0, - "learning_rate": 2.1316763477615175e-06, - "loss": 0.8169, + "learning_rate": 2.1537249504136857e-06, + "loss": 0.7978, "step": 28003 }, { - "epoch": 0.7946651532349602, + "epoch": 0.7935617331179687, "grad_norm": 0.0, - "learning_rate": 2.131109156546226e-06, - "loss": 0.7251, + "learning_rate": 2.153155983961197e-06, + "loss": 0.7462, "step": 28004 }, { - "epoch": 0.7946935300794552, + "epoch": 0.7935900705602312, "grad_norm": 0.0, - "learning_rate": 2.130542031799813e-06, - "loss": 0.7467, + "learning_rate": 2.1525870836055873e-06, + "loss": 0.8111, "step": 28005 }, { - "epoch": 0.7947219069239501, + "epoch": 0.7936184080024937, "grad_norm": 0.0, - "learning_rate": 2.1299749735270613e-06, - "loss": 0.8536, + "learning_rate": 2.152018249351653e-06, + "loss": 0.7925, "step": 28006 }, { - "epoch": 0.7947502837684449, + "epoch": 0.7936467454447561, "grad_norm": 0.0, - "learning_rate": 2.129407981732766e-06, - "loss": 0.8356, + "learning_rate": 2.15144948120418e-06, + "loss": 0.7549, "step": 28007 }, { - "epoch": 0.7947786606129399, + "epoch": 0.7936750828870186, "grad_norm": 0.0, - "learning_rate": 2.1288410564217145e-06, - "loss": 0.7962, + "learning_rate": 2.150880779167962e-06, + "loss": 0.8176, "step": 28008 }, { - "epoch": 0.7948070374574348, + "epoch": 0.7937034203292811, "grad_norm": 0.0, - "learning_rate": 2.1282741975986964e-06, - "loss": 0.8664, + "learning_rate": 2.1503121432477936e-06, + "loss": 0.861, "step": 28009 }, { - "epoch": 0.7948354143019296, + "epoch": 0.7937317577715436, "grad_norm": 0.0, - "learning_rate": 2.127707405268503e-06, - "loss": 0.8542, + "learning_rate": 2.1497435734484585e-06, + "loss": 0.7678, "step": 28010 }, { - "epoch": 0.7948637911464245, + "epoch": 0.793760095213806, "grad_norm": 0.0, - "learning_rate": 2.127140679435916e-06, - "loss": 0.7982, + "learning_rate": 2.149175069774747e-06, + "loss": 0.9372, "step": 28011 }, { - "epoch": 0.7948921679909194, + "epoch": 0.7937884326560685, "grad_norm": 0.0, - "learning_rate": 2.126574020105726e-06, - "loss": 0.7806, + "learning_rate": 2.1486066322314526e-06, + "loss": 0.7945, "step": 28012 }, { - "epoch": 0.7949205448354143, + "epoch": 0.793816770098331, "grad_norm": 0.0, - "learning_rate": 2.1260074272827227e-06, - "loss": 0.8553, + "learning_rate": 2.1480382608233574e-06, + "loss": 0.7381, "step": 28013 }, { - "epoch": 0.7949489216799092, + "epoch": 0.7938451075405933, "grad_norm": 0.0, - "learning_rate": 2.1254409009716837e-06, - "loss": 0.7828, + "learning_rate": 2.1474699555552527e-06, + "loss": 0.7241, "step": 28014 }, { - "epoch": 0.7949772985244041, + "epoch": 0.7938734449828558, "grad_norm": 0.0, - "learning_rate": 2.1248744411774014e-06, - "loss": 0.7774, + "learning_rate": 2.146901716431923e-06, + "loss": 0.986, "step": 28015 }, { - "epoch": 0.795005675368899, + "epoch": 0.7939017824251183, "grad_norm": 0.0, - "learning_rate": 2.1243080479046606e-06, - "loss": 0.8355, + "learning_rate": 2.1463335434581566e-06, + "loss": 0.7848, "step": 28016 }, { - "epoch": 0.7950340522133938, + "epoch": 0.7939301198673808, "grad_norm": 0.0, - "learning_rate": 2.123741721158241e-06, - "loss": 0.8107, + "learning_rate": 2.1457654366387427e-06, + "loss": 0.9301, "step": 28017 }, { - "epoch": 0.7950624290578887, + "epoch": 0.7939584573096432, "grad_norm": 0.0, - "learning_rate": 2.123175460942929e-06, - "loss": 0.7677, + "learning_rate": 2.14519739597846e-06, + "loss": 0.8928, "step": 28018 }, { - "epoch": 0.7950908059023837, + "epoch": 0.7939867947519057, "grad_norm": 0.0, - "learning_rate": 2.122609267263511e-06, - "loss": 0.7904, + "learning_rate": 2.1446294214820995e-06, + "loss": 0.9571, "step": 28019 }, { - "epoch": 0.7951191827468785, + "epoch": 0.7940151321941682, "grad_norm": 0.0, - "learning_rate": 2.122043140124764e-06, - "loss": 0.8893, + "learning_rate": 2.1440615131544395e-06, + "loss": 0.7284, "step": 28020 }, { - "epoch": 0.7951475595913734, + "epoch": 0.7940434696364306, "grad_norm": 0.0, - "learning_rate": 2.121477079531472e-06, - "loss": 0.727, + "learning_rate": 2.1434936710002663e-06, + "loss": 0.7268, "step": 28021 }, { - "epoch": 0.7951759364358684, + "epoch": 0.7940718070786931, "grad_norm": 0.0, - "learning_rate": 2.1209110854884184e-06, - "loss": 0.7927, + "learning_rate": 2.142925895024366e-06, + "loss": 0.8307, "step": 28022 }, { - "epoch": 0.7952043132803632, + "epoch": 0.7941001445209556, "grad_norm": 0.0, - "learning_rate": 2.120345158000382e-06, - "loss": 0.8681, + "learning_rate": 2.1423581852315156e-06, + "loss": 0.8959, "step": 28023 }, { - "epoch": 0.7952326901248581, + "epoch": 0.794128481963218, "grad_norm": 0.0, - "learning_rate": 2.1197792970721454e-06, - "loss": 0.7431, + "learning_rate": 2.1417905416265006e-06, + "loss": 0.8685, "step": 28024 }, { - "epoch": 0.7952610669693531, + "epoch": 0.7941568194054804, "grad_norm": 0.0, - "learning_rate": 2.1192135027084893e-06, - "loss": 0.8414, + "learning_rate": 2.1412229642141047e-06, + "loss": 0.8206, "step": 28025 }, { - "epoch": 0.7952894438138479, + "epoch": 0.7941851568477429, "grad_norm": 0.0, - "learning_rate": 2.118647774914189e-06, - "loss": 0.8222, + "learning_rate": 2.140655452999103e-06, + "loss": 0.7806, "step": 28026 }, { - "epoch": 0.7953178206583428, + "epoch": 0.7942134942900054, "grad_norm": 0.0, - "learning_rate": 2.1180821136940244e-06, - "loss": 0.8484, + "learning_rate": 2.1400880079862795e-06, + "loss": 0.818, "step": 28027 }, { - "epoch": 0.7953461975028376, + "epoch": 0.7942418317322678, "grad_norm": 0.0, - "learning_rate": 2.117516519052777e-06, - "loss": 0.9273, + "learning_rate": 2.1395206291804127e-06, + "loss": 0.9071, "step": 28028 }, { - "epoch": 0.7953745743473326, + "epoch": 0.7942701691745303, "grad_norm": 0.0, - "learning_rate": 2.11695099099522e-06, - "loss": 0.8832, + "learning_rate": 2.138953316586283e-06, + "loss": 0.8687, "step": 28029 }, { - "epoch": 0.7954029511918275, + "epoch": 0.7942985066167928, "grad_norm": 0.0, - "learning_rate": 2.116385529526135e-06, - "loss": 0.8226, + "learning_rate": 2.138386070208671e-06, + "loss": 0.8513, "step": 28030 }, { - "epoch": 0.7954313280363223, + "epoch": 0.7943268440590552, "grad_norm": 0.0, - "learning_rate": 2.1158201346502927e-06, - "loss": 0.8211, + "learning_rate": 2.13781889005235e-06, + "loss": 0.9061, "step": 28031 }, { - "epoch": 0.7954597048808173, + "epoch": 0.7943551815013177, "grad_norm": 0.0, - "learning_rate": 2.1152548063724722e-06, - "loss": 0.7286, + "learning_rate": 2.1372517761221e-06, + "loss": 0.8025, "step": 28032 }, { - "epoch": 0.7954880817253122, + "epoch": 0.7943835189435802, "grad_norm": 0.0, - "learning_rate": 2.114689544697451e-06, - "loss": 0.8586, + "learning_rate": 2.136684728422701e-06, + "loss": 0.8127, "step": 28033 }, { - "epoch": 0.795516458569807, + "epoch": 0.7944118563858427, "grad_norm": 0.0, - "learning_rate": 2.11412434963e-06, - "loss": 0.8387, + "learning_rate": 2.1361177469589234e-06, + "loss": 0.8839, "step": 28034 }, { - "epoch": 0.7955448354143019, + "epoch": 0.794440193828105, "grad_norm": 0.0, - "learning_rate": 2.1135592211748947e-06, - "loss": 0.7379, + "learning_rate": 2.135550831735549e-06, + "loss": 0.7402, "step": 28035 }, { - "epoch": 0.7955732122587968, + "epoch": 0.7944685312703675, "grad_norm": 0.0, - "learning_rate": 2.112994159336912e-06, - "loss": 0.8265, + "learning_rate": 2.134983982757347e-06, + "loss": 0.8909, "step": 28036 }, { - "epoch": 0.7956015891032917, + "epoch": 0.79449686871263, "grad_norm": 0.0, - "learning_rate": 2.112429164120816e-06, - "loss": 0.8206, + "learning_rate": 2.1344172000290964e-06, + "loss": 0.809, "step": 28037 }, { - "epoch": 0.7956299659477866, + "epoch": 0.7945252061548924, "grad_norm": 0.0, - "learning_rate": 2.111864235531389e-06, - "loss": 0.8069, + "learning_rate": 2.133850483555573e-06, + "loss": 0.8506, "step": 28038 }, { - "epoch": 0.7956583427922815, + "epoch": 0.7945535435971549, "grad_norm": 0.0, - "learning_rate": 2.111299373573402e-06, - "loss": 0.6778, + "learning_rate": 2.133283833341545e-06, + "loss": 0.7338, "step": 28039 }, { - "epoch": 0.7956867196367764, + "epoch": 0.7945818810394174, "grad_norm": 0.0, - "learning_rate": 2.1107345782516207e-06, - "loss": 0.7874, + "learning_rate": 2.1327172493917893e-06, + "loss": 0.8938, "step": 28040 }, { - "epoch": 0.7957150964812713, + "epoch": 0.7946102184816799, "grad_norm": 0.0, - "learning_rate": 2.110169849570819e-06, - "loss": 0.7967, + "learning_rate": 2.132150731711078e-06, + "loss": 0.755, "step": 28041 }, { - "epoch": 0.7957434733257662, + "epoch": 0.7946385559239423, "grad_norm": 0.0, - "learning_rate": 2.1096051875357703e-06, - "loss": 0.8579, + "learning_rate": 2.1315842803041807e-06, + "loss": 0.8201, "step": 28042 }, { - "epoch": 0.7957718501702611, + "epoch": 0.7946668933662048, "grad_norm": 0.0, - "learning_rate": 2.109040592151239e-06, - "loss": 0.7252, + "learning_rate": 2.131017895175875e-06, + "loss": 0.8864, "step": 28043 }, { - "epoch": 0.795800227014756, + "epoch": 0.7946952308084673, "grad_norm": 0.0, - "learning_rate": 2.108476063421996e-06, - "loss": 0.7036, + "learning_rate": 2.130451576330925e-06, + "loss": 0.9208, "step": 28044 }, { - "epoch": 0.7958286038592508, + "epoch": 0.7947235682507297, "grad_norm": 0.0, - "learning_rate": 2.107911601352813e-06, - "loss": 0.7614, + "learning_rate": 2.1298853237741034e-06, + "loss": 0.7283, "step": 28045 }, { - "epoch": 0.7958569807037458, + "epoch": 0.7947519056929921, "grad_norm": 0.0, - "learning_rate": 2.1073472059484534e-06, - "loss": 0.7868, + "learning_rate": 2.129319137510183e-06, + "loss": 0.8738, "step": 28046 }, { - "epoch": 0.7958853575482406, + "epoch": 0.7947802431352546, "grad_norm": 0.0, - "learning_rate": 2.1067828772136856e-06, - "loss": 1.0014, + "learning_rate": 2.1287530175439277e-06, + "loss": 0.9072, "step": 28047 }, { - "epoch": 0.7959137343927355, + "epoch": 0.794808580577517, "grad_norm": 0.0, - "learning_rate": 2.1062186151532813e-06, - "loss": 0.8203, + "learning_rate": 2.1281869638801113e-06, + "loss": 0.7947, "step": 28048 }, { - "epoch": 0.7959421112372305, + "epoch": 0.7948369180197795, "grad_norm": 0.0, - "learning_rate": 2.1056544197720006e-06, - "loss": 0.8329, + "learning_rate": 2.1276209765234956e-06, + "loss": 0.8203, "step": 28049 }, { - "epoch": 0.7959704880817253, + "epoch": 0.794865255462042, "grad_norm": 0.0, - "learning_rate": 2.1050902910746118e-06, - "loss": 0.9742, + "learning_rate": 2.1270550554788528e-06, + "loss": 0.8383, "step": 28050 }, { - "epoch": 0.7959988649262202, + "epoch": 0.7948935929043045, "grad_norm": 0.0, - "learning_rate": 2.104526229065883e-06, - "loss": 0.6776, + "learning_rate": 2.1264892007509507e-06, + "loss": 0.7493, "step": 28051 }, { - "epoch": 0.796027241770715, + "epoch": 0.7949219303465669, "grad_norm": 0.0, - "learning_rate": 2.1039622337505726e-06, - "loss": 0.9295, + "learning_rate": 2.1259234123445515e-06, + "loss": 0.7345, "step": 28052 }, { - "epoch": 0.79605561861521, + "epoch": 0.7949502677888294, "grad_norm": 0.0, - "learning_rate": 2.1033983051334485e-06, - "loss": 0.8938, + "learning_rate": 2.1253576902644234e-06, + "loss": 0.8624, "step": 28053 }, { - "epoch": 0.7960839954597049, + "epoch": 0.7949786052310919, "grad_norm": 0.0, - "learning_rate": 2.102834443219275e-06, - "loss": 0.9515, + "learning_rate": 2.124792034515333e-06, + "loss": 0.8993, "step": 28054 }, { - "epoch": 0.7961123723041997, + "epoch": 0.7950069426733543, "grad_norm": 0.0, - "learning_rate": 2.102270648012812e-06, - "loss": 0.8186, + "learning_rate": 2.1242264451020412e-06, + "loss": 0.8651, "step": 28055 }, { - "epoch": 0.7961407491486947, + "epoch": 0.7950352801156167, "grad_norm": 0.0, - "learning_rate": 2.101706919518828e-06, - "loss": 0.8318, + "learning_rate": 2.123660922029319e-06, + "loss": 0.7969, "step": 28056 }, { - "epoch": 0.7961691259931896, + "epoch": 0.7950636175578792, "grad_norm": 0.0, - "learning_rate": 2.1011432577420777e-06, - "loss": 0.7952, + "learning_rate": 2.123095465301922e-06, + "loss": 0.7926, "step": 28057 }, { - "epoch": 0.7961975028376844, + "epoch": 0.7950919550001417, "grad_norm": 0.0, - "learning_rate": 2.1005796626873253e-06, - "loss": 0.8855, + "learning_rate": 2.1225300749246182e-06, + "loss": 0.7918, "step": 28058 }, { - "epoch": 0.7962258796821794, + "epoch": 0.7951202924424041, "grad_norm": 0.0, - "learning_rate": 2.100016134359334e-06, - "loss": 0.8647, + "learning_rate": 2.12196475090217e-06, + "loss": 0.7844, "step": 28059 }, { - "epoch": 0.7962542565266743, + "epoch": 0.7951486298846666, "grad_norm": 0.0, - "learning_rate": 2.0994526727628593e-06, - "loss": 0.8015, + "learning_rate": 2.1213994932393367e-06, + "loss": 0.8258, "step": 28060 }, { - "epoch": 0.7962826333711691, + "epoch": 0.7951769673269291, "grad_norm": 0.0, - "learning_rate": 2.0988892779026626e-06, - "loss": 0.9025, + "learning_rate": 2.1208343019408807e-06, + "loss": 0.7704, "step": 28061 }, { - "epoch": 0.796311010215664, + "epoch": 0.7952053047691915, "grad_norm": 0.0, - "learning_rate": 2.098325949783506e-06, - "loss": 0.8825, + "learning_rate": 2.120269177011566e-06, + "loss": 0.7249, "step": 28062 }, { - "epoch": 0.7963393870601589, + "epoch": 0.795233642211454, "grad_norm": 0.0, - "learning_rate": 2.0977626884101433e-06, - "loss": 0.7247, + "learning_rate": 2.119704118456146e-06, + "loss": 0.8212, "step": 28063 }, { - "epoch": 0.7963677639046538, + "epoch": 0.7952619796537165, "grad_norm": 0.0, - "learning_rate": 2.0971994937873342e-06, - "loss": 0.785, + "learning_rate": 2.119139126279389e-06, + "loss": 0.6883, "step": 28064 }, { - "epoch": 0.7963961407491487, + "epoch": 0.795290317095979, "grad_norm": 0.0, - "learning_rate": 2.0966363659198385e-06, - "loss": 0.7773, + "learning_rate": 2.1185742004860465e-06, + "loss": 0.8581, "step": 28065 }, { - "epoch": 0.7964245175936436, + "epoch": 0.7953186545382414, "grad_norm": 0.0, - "learning_rate": 2.0960733048124082e-06, - "loss": 0.8859, + "learning_rate": 2.11800934108088e-06, + "loss": 0.7789, "step": 28066 }, { - "epoch": 0.7964528944381385, + "epoch": 0.7953469919805038, "grad_norm": 0.0, - "learning_rate": 2.0955103104698026e-06, - "loss": 0.6983, + "learning_rate": 2.117444548068648e-06, + "loss": 0.7367, "step": 28067 }, { - "epoch": 0.7964812712826334, + "epoch": 0.7953753294227663, "grad_norm": 0.0, - "learning_rate": 2.094947382896776e-06, - "loss": 0.6953, + "learning_rate": 2.1168798214541075e-06, + "loss": 0.8672, "step": 28068 }, { - "epoch": 0.7965096481271282, + "epoch": 0.7954036668650287, "grad_norm": 0.0, - "learning_rate": 2.094384522098084e-06, - "loss": 0.891, + "learning_rate": 2.1163151612420153e-06, + "loss": 0.7834, "step": 28069 }, { - "epoch": 0.7965380249716232, + "epoch": 0.7954320043072912, "grad_norm": 0.0, - "learning_rate": 2.0938217280784846e-06, - "loss": 0.759, + "learning_rate": 2.1157505674371305e-06, + "loss": 0.8042, "step": 28070 }, { - "epoch": 0.796566401816118, + "epoch": 0.7954603417495537, "grad_norm": 0.0, - "learning_rate": 2.0932590008427256e-06, - "loss": 0.788, + "learning_rate": 2.115186040044205e-06, + "loss": 0.8503, "step": 28071 }, { - "epoch": 0.7965947786606129, + "epoch": 0.7954886791918161, "grad_norm": 0.0, - "learning_rate": 2.0926963403955637e-06, - "loss": 0.8058, + "learning_rate": 2.114621579067997e-06, + "loss": 0.7169, "step": 28072 }, { - "epoch": 0.7966231555051079, + "epoch": 0.7955170166340786, "grad_norm": 0.0, - "learning_rate": 2.0921337467417536e-06, - "loss": 0.7253, + "learning_rate": 2.114057184513256e-06, + "loss": 0.8415, "step": 28073 }, { - "epoch": 0.7966515323496027, + "epoch": 0.7955453540763411, "grad_norm": 0.0, - "learning_rate": 2.0915712198860417e-06, - "loss": 0.8843, + "learning_rate": 2.113492856384741e-06, + "loss": 0.834, "step": 28074 }, { - "epoch": 0.7966799091940976, + "epoch": 0.7955736915186036, "grad_norm": 0.0, - "learning_rate": 2.091008759833184e-06, - "loss": 0.7838, + "learning_rate": 2.112928594687208e-06, + "loss": 0.7303, "step": 28075 }, { - "epoch": 0.7967082860385926, + "epoch": 0.795602028960866, "grad_norm": 0.0, - "learning_rate": 2.090446366587934e-06, - "loss": 0.8925, + "learning_rate": 2.1123643994254016e-06, + "loss": 0.8035, "step": 28076 }, { - "epoch": 0.7967366628830874, + "epoch": 0.7956303664031285, "grad_norm": 0.0, - "learning_rate": 2.0898840401550356e-06, - "loss": 0.7518, + "learning_rate": 2.111800270604083e-06, + "loss": 0.7039, "step": 28077 }, { - "epoch": 0.7967650397275823, + "epoch": 0.795658703845391, "grad_norm": 0.0, - "learning_rate": 2.089321780539243e-06, - "loss": 0.7919, + "learning_rate": 2.1112362082279957e-06, + "loss": 0.8556, "step": 28078 }, { - "epoch": 0.7967934165720771, + "epoch": 0.7956870412876533, "grad_norm": 0.0, - "learning_rate": 2.088759587745307e-06, - "loss": 0.7871, + "learning_rate": 2.1106722123018965e-06, + "loss": 0.7682, "step": 28079 }, { - "epoch": 0.7968217934165721, + "epoch": 0.7957153787299158, "grad_norm": 0.0, - "learning_rate": 2.088197461777972e-06, - "loss": 0.8213, + "learning_rate": 2.110108282830534e-06, + "loss": 0.7594, "step": 28080 }, { - "epoch": 0.796850170261067, + "epoch": 0.7957437161721783, "grad_norm": 0.0, - "learning_rate": 2.0876354026419887e-06, - "loss": 0.7914, + "learning_rate": 2.109544419818661e-06, + "loss": 0.8502, "step": 28081 }, { - "epoch": 0.7968785471055618, + "epoch": 0.7957720536144408, "grad_norm": 0.0, - "learning_rate": 2.0870734103421075e-06, - "loss": 0.8437, + "learning_rate": 2.108980623271024e-06, + "loss": 0.7769, "step": 28082 }, { - "epoch": 0.7969069239500568, + "epoch": 0.7958003910567032, "grad_norm": 0.0, - "learning_rate": 2.086511484883067e-06, - "loss": 0.7804, + "learning_rate": 2.1084168931923766e-06, + "loss": 0.8229, "step": 28083 }, { - "epoch": 0.7969353007945517, + "epoch": 0.7958287284989657, "grad_norm": 0.0, - "learning_rate": 2.0859496262696254e-06, - "loss": 0.7683, + "learning_rate": 2.107853229587461e-06, + "loss": 0.8048, "step": 28084 }, { - "epoch": 0.7969636776390465, + "epoch": 0.7958570659412282, "grad_norm": 0.0, - "learning_rate": 2.0853878345065203e-06, - "loss": 0.762, + "learning_rate": 2.1072896324610305e-06, + "loss": 0.8282, "step": 28085 }, { - "epoch": 0.7969920544835414, + "epoch": 0.7958854033834906, "grad_norm": 0.0, - "learning_rate": 2.0848261095985e-06, - "loss": 0.7509, + "learning_rate": 2.1067261018178287e-06, + "loss": 0.7545, "step": 28086 }, { - "epoch": 0.7970204313280363, + "epoch": 0.7959137408257531, "grad_norm": 0.0, - "learning_rate": 2.084264451550313e-06, - "loss": 0.7758, + "learning_rate": 2.106162637662603e-06, + "loss": 0.8895, "step": 28087 }, { - "epoch": 0.7970488081725312, + "epoch": 0.7959420782680156, "grad_norm": 0.0, - "learning_rate": 2.0837028603666965e-06, - "loss": 0.8207, + "learning_rate": 2.1055992400001046e-06, + "loss": 0.7329, "step": 28088 }, { - "epoch": 0.7970771850170261, + "epoch": 0.795970415710278, "grad_norm": 0.0, - "learning_rate": 2.083141336052398e-06, - "loss": 0.8813, + "learning_rate": 2.1050359088350724e-06, + "loss": 0.866, "step": 28089 }, { - "epoch": 0.797105561861521, + "epoch": 0.7959987531525404, "grad_norm": 0.0, - "learning_rate": 2.0825798786121632e-06, - "loss": 0.7508, + "learning_rate": 2.104472644172254e-06, + "loss": 0.8306, "step": 28090 }, { - "epoch": 0.7971339387060159, + "epoch": 0.7960270905948029, "grad_norm": 0.0, - "learning_rate": 2.0820184880507287e-06, - "loss": 0.7859, + "learning_rate": 2.1039094460163978e-06, + "loss": 0.8162, "step": 28091 }, { - "epoch": 0.7971623155505108, + "epoch": 0.7960554280370654, "grad_norm": 0.0, - "learning_rate": 2.081457164372841e-06, - "loss": 0.9124, + "learning_rate": 2.103346314372241e-06, + "loss": 0.7989, "step": 28092 }, { - "epoch": 0.7971906923950057, + "epoch": 0.7960837654793278, "grad_norm": 0.0, - "learning_rate": 2.0808959075832423e-06, - "loss": 0.9303, + "learning_rate": 2.102783249244531e-06, + "loss": 0.8901, "step": 28093 }, { - "epoch": 0.7972190692395006, + "epoch": 0.7961121029215903, "grad_norm": 0.0, - "learning_rate": 2.080334717686671e-06, - "loss": 0.7578, + "learning_rate": 2.1022202506380097e-06, + "loss": 0.6738, "step": 28094 }, { - "epoch": 0.7972474460839954, + "epoch": 0.7961404403638528, "grad_norm": 0.0, - "learning_rate": 2.0797735946878663e-06, - "loss": 0.9327, + "learning_rate": 2.1016573185574206e-06, + "loss": 0.7461, "step": 28095 }, { - "epoch": 0.7972758229284903, + "epoch": 0.7961687778061152, "grad_norm": 0.0, - "learning_rate": 2.079212538591574e-06, - "loss": 0.9008, + "learning_rate": 2.1010944530075074e-06, + "loss": 0.8838, "step": 28096 }, { - "epoch": 0.7973041997729853, + "epoch": 0.7961971152483777, "grad_norm": 0.0, - "learning_rate": 2.078651549402526e-06, - "loss": 0.7726, + "learning_rate": 2.1005316539930064e-06, + "loss": 0.7732, "step": 28097 }, { - "epoch": 0.7973325766174801, + "epoch": 0.7962254526906402, "grad_norm": 0.0, - "learning_rate": 2.0780906271254643e-06, - "loss": 0.827, + "learning_rate": 2.0999689215186603e-06, + "loss": 0.8648, "step": 28098 }, { - "epoch": 0.797360953461975, + "epoch": 0.7962537901329027, "grad_norm": 0.0, - "learning_rate": 2.0775297717651275e-06, - "loss": 0.814, + "learning_rate": 2.0994062555892123e-06, + "loss": 0.7302, "step": 28099 }, { - "epoch": 0.79738933030647, + "epoch": 0.796282127575165, "grad_norm": 0.0, - "learning_rate": 2.076968983326253e-06, - "loss": 0.8335, + "learning_rate": 2.098843656209396e-06, + "loss": 0.8085, "step": 28100 }, { - "epoch": 0.7974177071509648, + "epoch": 0.7963104650174275, "grad_norm": 0.0, - "learning_rate": 2.0764082618135806e-06, - "loss": 0.777, + "learning_rate": 2.098281123383957e-06, + "loss": 0.8076, "step": 28101 }, { - "epoch": 0.7974460839954597, + "epoch": 0.79633880245969, "grad_norm": 0.0, - "learning_rate": 2.075847607231841e-06, - "loss": 0.8844, + "learning_rate": 2.097718657117628e-06, + "loss": 0.8088, "step": 28102 }, { - "epoch": 0.7974744608399545, + "epoch": 0.7963671399019524, "grad_norm": 0.0, - "learning_rate": 2.075287019585773e-06, - "loss": 0.8071, + "learning_rate": 2.0971562574151483e-06, + "loss": 0.7762, "step": 28103 }, { - "epoch": 0.7975028376844495, + "epoch": 0.7963954773442149, "grad_norm": 0.0, - "learning_rate": 2.0747264988801142e-06, - "loss": 0.7609, + "learning_rate": 2.0965939242812594e-06, + "loss": 0.8867, "step": 28104 }, { - "epoch": 0.7975312145289444, + "epoch": 0.7964238147864774, "grad_norm": 0.0, - "learning_rate": 2.0741660451195942e-06, - "loss": 0.8577, + "learning_rate": 2.096031657720692e-06, + "loss": 0.8228, "step": 28105 }, { - "epoch": 0.7975595913734392, + "epoch": 0.7964521522287399, "grad_norm": 0.0, - "learning_rate": 2.0736056583089505e-06, - "loss": 0.7584, + "learning_rate": 2.095469457738185e-06, + "loss": 0.7543, "step": 28106 }, { - "epoch": 0.7975879682179342, + "epoch": 0.7964804896710023, "grad_norm": 0.0, - "learning_rate": 2.073045338452918e-06, - "loss": 0.8415, + "learning_rate": 2.094907324338473e-06, + "loss": 0.6374, "step": 28107 }, { - "epoch": 0.7976163450624291, + "epoch": 0.7965088271132648, "grad_norm": 0.0, - "learning_rate": 2.072485085556225e-06, - "loss": 0.7102, + "learning_rate": 2.0943452575262935e-06, + "loss": 0.9035, "step": 28108 }, { - "epoch": 0.7976447219069239, + "epoch": 0.7965371645555273, "grad_norm": 0.0, - "learning_rate": 2.0719248996236075e-06, - "loss": 0.8514, + "learning_rate": 2.0937832573063823e-06, + "loss": 0.7816, "step": 28109 }, { - "epoch": 0.7976730987514189, + "epoch": 0.7965655019977896, "grad_norm": 0.0, - "learning_rate": 2.071364780659799e-06, - "loss": 0.757, + "learning_rate": 2.0932213236834663e-06, + "loss": 0.7554, "step": 28110 }, { - "epoch": 0.7977014755959138, + "epoch": 0.7965938394400521, "grad_norm": 0.0, - "learning_rate": 2.0708047286695265e-06, - "loss": 0.7997, + "learning_rate": 2.0926594566622847e-06, + "loss": 0.7658, "step": 28111 }, { - "epoch": 0.7977298524404086, + "epoch": 0.7966221768823146, "grad_norm": 0.0, - "learning_rate": 2.0702447436575223e-06, - "loss": 0.8235, + "learning_rate": 2.0920976562475714e-06, + "loss": 0.8488, "step": 28112 }, { - "epoch": 0.7977582292849035, + "epoch": 0.796650514324577, "grad_norm": 0.0, - "learning_rate": 2.0696848256285206e-06, - "loss": 0.8605, + "learning_rate": 2.0915359224440535e-06, + "loss": 0.7847, "step": 28113 }, { - "epoch": 0.7977866061293984, + "epoch": 0.7966788517668395, "grad_norm": 0.0, - "learning_rate": 2.069124974587242e-06, - "loss": 0.8441, + "learning_rate": 2.090974255256467e-06, + "loss": 0.7645, "step": 28114 }, { - "epoch": 0.7978149829738933, + "epoch": 0.796707189209102, "grad_norm": 0.0, - "learning_rate": 2.0685651905384264e-06, - "loss": 0.8462, + "learning_rate": 2.0904126546895385e-06, + "loss": 0.7967, "step": 28115 }, { - "epoch": 0.7978433598183882, + "epoch": 0.7967355266513645, "grad_norm": 0.0, - "learning_rate": 2.068005473486795e-06, - "loss": 0.8734, + "learning_rate": 2.089851120748002e-06, + "loss": 0.776, "step": 28116 }, { - "epoch": 0.7978717366628831, + "epoch": 0.7967638640936269, "grad_norm": 0.0, - "learning_rate": 2.0674458234370777e-06, - "loss": 0.8097, + "learning_rate": 2.08928965343659e-06, + "loss": 0.844, "step": 28117 }, { - "epoch": 0.797900113507378, + "epoch": 0.7967922015358894, "grad_norm": 0.0, - "learning_rate": 2.066886240394004e-06, - "loss": 0.8307, + "learning_rate": 2.088728252760026e-06, + "loss": 0.8049, "step": 28118 }, { - "epoch": 0.7979284903518729, + "epoch": 0.7968205389781519, "grad_norm": 0.0, - "learning_rate": 2.0663267243622963e-06, - "loss": 0.7994, + "learning_rate": 2.0881669187230415e-06, + "loss": 0.8976, "step": 28119 }, { - "epoch": 0.7979568671963677, + "epoch": 0.7968488764204142, "grad_norm": 0.0, - "learning_rate": 2.065767275346684e-06, - "loss": 0.8316, + "learning_rate": 2.0876056513303644e-06, + "loss": 0.8206, "step": 28120 }, { - "epoch": 0.7979852440408627, + "epoch": 0.7968772138626767, "grad_norm": 0.0, - "learning_rate": 2.065207893351895e-06, - "loss": 0.6845, + "learning_rate": 2.087044450586724e-06, + "loss": 0.7701, "step": 28121 }, { - "epoch": 0.7980136208853575, + "epoch": 0.7969055513049392, "grad_norm": 0.0, - "learning_rate": 2.0646485783826488e-06, - "loss": 0.8457, + "learning_rate": 2.086483316496849e-06, + "loss": 0.8652, "step": 28122 }, { - "epoch": 0.7980419977298524, + "epoch": 0.7969338887472017, "grad_norm": 0.0, - "learning_rate": 2.064089330443673e-06, - "loss": 0.9491, + "learning_rate": 2.0859222490654608e-06, + "loss": 0.8466, "step": 28123 }, { - "epoch": 0.7980703745743474, + "epoch": 0.7969622261894641, "grad_norm": 0.0, - "learning_rate": 2.063530149539694e-06, - "loss": 0.8669, + "learning_rate": 2.0853612482972887e-06, + "loss": 0.7194, "step": 28124 }, { - "epoch": 0.7980987514188422, + "epoch": 0.7969905636317266, "grad_norm": 0.0, - "learning_rate": 2.0629710356754296e-06, - "loss": 0.6781, + "learning_rate": 2.0848003141970597e-06, + "loss": 0.8566, "step": 28125 }, { - "epoch": 0.7981271282633371, + "epoch": 0.7970189010739891, "grad_norm": 0.0, - "learning_rate": 2.062411988855606e-06, - "loss": 0.8762, + "learning_rate": 2.0842394467694947e-06, + "loss": 0.8454, "step": 28126 }, { - "epoch": 0.7981555051078321, + "epoch": 0.7970472385162515, "grad_norm": 0.0, - "learning_rate": 2.0618530090849475e-06, - "loss": 0.9263, + "learning_rate": 2.0836786460193203e-06, + "loss": 0.8079, "step": 28127 }, { - "epoch": 0.7981838819523269, + "epoch": 0.797075575958514, "grad_norm": 0.0, - "learning_rate": 2.0612940963681703e-06, - "loss": 0.8432, + "learning_rate": 2.0831179119512623e-06, + "loss": 0.7901, "step": 28128 }, { - "epoch": 0.7982122587968218, + "epoch": 0.7971039134007765, "grad_norm": 0.0, - "learning_rate": 2.0607352507099963e-06, - "loss": 0.8727, + "learning_rate": 2.0825572445700406e-06, + "loss": 0.8611, "step": 28129 }, { - "epoch": 0.7982406356413166, + "epoch": 0.797132250843039, "grad_norm": 0.0, - "learning_rate": 2.060176472115155e-06, - "loss": 0.8395, + "learning_rate": 2.0819966438803806e-06, + "loss": 0.8606, "step": 28130 }, { - "epoch": 0.7982690124858116, + "epoch": 0.7971605882853013, "grad_norm": 0.0, - "learning_rate": 2.059617760588356e-06, - "loss": 0.8218, + "learning_rate": 2.0814361098870016e-06, + "loss": 0.9009, "step": 28131 }, { - "epoch": 0.7982973893303065, + "epoch": 0.7971889257275638, "grad_norm": 0.0, - "learning_rate": 2.059059116134323e-06, - "loss": 0.8971, + "learning_rate": 2.0808756425946262e-06, + "loss": 0.7625, "step": 28132 }, { - "epoch": 0.7983257661748013, + "epoch": 0.7972172631698263, "grad_norm": 0.0, - "learning_rate": 2.0585005387577784e-06, - "loss": 0.75, + "learning_rate": 2.0803152420079763e-06, + "loss": 0.8191, "step": 28133 }, { - "epoch": 0.7983541430192963, + "epoch": 0.7972456006120887, "grad_norm": 0.0, - "learning_rate": 2.057942028463433e-06, - "loss": 0.8324, + "learning_rate": 2.0797549081317724e-06, + "loss": 0.8284, "step": 28134 }, { - "epoch": 0.7983825198637912, + "epoch": 0.7972739380543512, "grad_norm": 0.0, - "learning_rate": 2.0573835852560086e-06, - "loss": 0.8337, + "learning_rate": 2.0791946409707353e-06, + "loss": 0.8239, "step": 28135 }, { - "epoch": 0.798410896708286, + "epoch": 0.7973022754966137, "grad_norm": 0.0, - "learning_rate": 2.056825209140224e-06, - "loss": 0.8413, + "learning_rate": 2.0786344405295822e-06, + "loss": 0.8809, "step": 28136 }, { - "epoch": 0.7984392735527809, + "epoch": 0.7973306129388761, "grad_norm": 0.0, - "learning_rate": 2.0562669001207923e-06, - "loss": 0.7867, + "learning_rate": 2.0780743068130316e-06, + "loss": 0.8584, "step": 28137 }, { - "epoch": 0.7984676503972759, + "epoch": 0.7973589503811386, "grad_norm": 0.0, - "learning_rate": 2.055708658202433e-06, - "loss": 0.7985, + "learning_rate": 2.077514239825805e-06, + "loss": 0.8122, "step": 28138 }, { - "epoch": 0.7984960272417707, + "epoch": 0.7973872878234011, "grad_norm": 0.0, - "learning_rate": 2.0551504833898582e-06, - "loss": 0.885, + "learning_rate": 2.076954239572616e-06, + "loss": 0.8515, "step": 28139 }, { - "epoch": 0.7985244040862656, + "epoch": 0.7974156252656636, "grad_norm": 0.0, - "learning_rate": 2.0545923756877837e-06, - "loss": 0.8052, + "learning_rate": 2.0763943060581836e-06, + "loss": 0.7602, "step": 28140 }, { - "epoch": 0.7985527809307605, + "epoch": 0.797443962707926, "grad_norm": 0.0, - "learning_rate": 2.0540343351009272e-06, - "loss": 0.8732, + "learning_rate": 2.0758344392872265e-06, + "loss": 0.7893, "step": 28141 }, { - "epoch": 0.7985811577752554, + "epoch": 0.7974723001501884, "grad_norm": 0.0, - "learning_rate": 2.053476361633997e-06, - "loss": 0.8627, + "learning_rate": 2.0752746392644563e-06, + "loss": 0.8811, "step": 28142 }, { - "epoch": 0.7986095346197503, + "epoch": 0.7975006375924509, "grad_norm": 0.0, - "learning_rate": 2.05291845529171e-06, - "loss": 0.9613, + "learning_rate": 2.0747149059945937e-06, + "loss": 0.7294, "step": 28143 }, { - "epoch": 0.7986379114642451, + "epoch": 0.7975289750347133, "grad_norm": 0.0, - "learning_rate": 2.0523606160787803e-06, - "loss": 0.8018, + "learning_rate": 2.074155239482347e-06, + "loss": 0.7651, "step": 28144 }, { - "epoch": 0.7986662883087401, + "epoch": 0.7975573124769758, "grad_norm": 0.0, - "learning_rate": 2.051802843999913e-06, - "loss": 0.8622, + "learning_rate": 2.0735956397324344e-06, + "loss": 0.796, "step": 28145 }, { - "epoch": 0.798694665153235, + "epoch": 0.7975856499192383, "grad_norm": 0.0, - "learning_rate": 2.0512451390598254e-06, - "loss": 0.8751, + "learning_rate": 2.0730361067495685e-06, + "loss": 0.731, "step": 28146 }, { - "epoch": 0.7987230419977298, + "epoch": 0.7976139873615008, "grad_norm": 0.0, - "learning_rate": 2.050687501263231e-06, - "loss": 0.7453, + "learning_rate": 2.072476640538463e-06, + "loss": 0.7866, "step": 28147 }, { - "epoch": 0.7987514188422248, + "epoch": 0.7976423248037632, "grad_norm": 0.0, - "learning_rate": 2.050129930614835e-06, - "loss": 0.8897, + "learning_rate": 2.071917241103831e-06, + "loss": 0.8329, "step": 28148 }, { - "epoch": 0.7987797956867196, + "epoch": 0.7976706622460257, "grad_norm": 0.0, - "learning_rate": 2.0495724271193472e-06, - "loss": 0.8199, + "learning_rate": 2.0713579084503877e-06, + "loss": 0.8065, "step": 28149 }, { - "epoch": 0.7988081725312145, + "epoch": 0.7976989996882882, "grad_norm": 0.0, - "learning_rate": 2.0490149907814825e-06, - "loss": 0.8038, + "learning_rate": 2.0707986425828363e-06, + "loss": 0.6681, "step": 28150 }, { - "epoch": 0.7988365493757095, + "epoch": 0.7977273371305506, "grad_norm": 0.0, - "learning_rate": 2.0484576216059414e-06, - "loss": 0.8394, + "learning_rate": 2.070239443505897e-06, + "loss": 0.7018, "step": 28151 }, { - "epoch": 0.7988649262202043, + "epoch": 0.797755674572813, "grad_norm": 0.0, - "learning_rate": 2.0479003195974376e-06, - "loss": 0.8398, + "learning_rate": 2.0696803112242716e-06, + "loss": 0.8727, "step": 28152 }, { - "epoch": 0.7988933030646992, + "epoch": 0.7977840120150755, "grad_norm": 0.0, - "learning_rate": 2.04734308476068e-06, - "loss": 0.7646, + "learning_rate": 2.0691212457426748e-06, + "loss": 0.8461, "step": 28153 }, { - "epoch": 0.798921679909194, + "epoch": 0.797812349457338, "grad_norm": 0.0, - "learning_rate": 2.046785917100369e-06, - "loss": 0.6956, + "learning_rate": 2.068562247065816e-06, + "loss": 0.764, "step": 28154 }, { - "epoch": 0.798950056753689, + "epoch": 0.7978406868996004, "grad_norm": 0.0, - "learning_rate": 2.0462288166212164e-06, - "loss": 0.8389, + "learning_rate": 2.068003315198401e-06, + "loss": 0.8519, "step": 28155 }, { - "epoch": 0.7989784335981839, + "epoch": 0.7978690243418629, "grad_norm": 0.0, - "learning_rate": 2.0456717833279284e-06, - "loss": 0.8366, + "learning_rate": 2.067444450145142e-06, + "loss": 0.7843, "step": 28156 }, { - "epoch": 0.7990068104426787, + "epoch": 0.7978973617841254, "grad_norm": 0.0, - "learning_rate": 2.0451148172252057e-06, - "loss": 0.7478, + "learning_rate": 2.0668856519107415e-06, + "loss": 0.7012, "step": 28157 }, { - "epoch": 0.7990351872871737, + "epoch": 0.7979256992263878, "grad_norm": 0.0, - "learning_rate": 2.0445579183177554e-06, - "loss": 0.8315, + "learning_rate": 2.0663269204999094e-06, + "loss": 0.8944, "step": 28158 }, { - "epoch": 0.7990635641316686, + "epoch": 0.7979540366686503, "grad_norm": 0.0, - "learning_rate": 2.0440010866102844e-06, - "loss": 0.7617, + "learning_rate": 2.065768255917351e-06, + "loss": 0.795, "step": 28159 }, { - "epoch": 0.7990919409761634, + "epoch": 0.7979823741109128, "grad_norm": 0.0, - "learning_rate": 2.0434443221074896e-06, - "loss": 0.8612, + "learning_rate": 2.065209658167773e-06, + "loss": 0.7629, "step": 28160 }, { - "epoch": 0.7991203178206583, + "epoch": 0.7980107115531752, "grad_norm": 0.0, - "learning_rate": 2.042887624814083e-06, - "loss": 0.845, + "learning_rate": 2.064651127255881e-06, + "loss": 0.8751, "step": 28161 }, { - "epoch": 0.7991486946651533, + "epoch": 0.7980390489954376, "grad_norm": 0.0, - "learning_rate": 2.0423309947347603e-06, - "loss": 0.8556, + "learning_rate": 2.064092663186381e-06, + "loss": 0.7558, "step": 28162 }, { - "epoch": 0.7991770715096481, + "epoch": 0.7980673864377001, "grad_norm": 0.0, - "learning_rate": 2.041774431874225e-06, - "loss": 0.843, + "learning_rate": 2.0635342659639734e-06, + "loss": 0.8123, "step": 28163 }, { - "epoch": 0.799205448354143, + "epoch": 0.7980957238799626, "grad_norm": 0.0, - "learning_rate": 2.041217936237181e-06, - "loss": 0.7334, + "learning_rate": 2.0629759355933665e-06, + "loss": 0.7497, "step": 28164 }, { - "epoch": 0.799233825198638, + "epoch": 0.798124061322225, "grad_norm": 0.0, - "learning_rate": 2.040661507828324e-06, - "loss": 0.7235, + "learning_rate": 2.0624176720792587e-06, + "loss": 0.7443, "step": 28165 }, { - "epoch": 0.7992622020431328, + "epoch": 0.7981523987644875, "grad_norm": 0.0, - "learning_rate": 2.040105146652358e-06, - "loss": 0.8637, + "learning_rate": 2.0618594754263534e-06, + "loss": 0.7894, "step": 28166 }, { - "epoch": 0.7992905788876277, + "epoch": 0.79818073620675, "grad_norm": 0.0, - "learning_rate": 2.039548852713984e-06, - "loss": 0.7504, + "learning_rate": 2.061301345639356e-06, + "loss": 0.9195, "step": 28167 }, { - "epoch": 0.7993189557321226, + "epoch": 0.7982090736490124, "grad_norm": 0.0, - "learning_rate": 2.038992626017896e-06, - "loss": 0.7665, + "learning_rate": 2.0607432827229635e-06, + "loss": 0.8908, "step": 28168 }, { - "epoch": 0.7993473325766175, + "epoch": 0.7982374110912749, "grad_norm": 0.0, - "learning_rate": 2.038436466568795e-06, - "loss": 0.7969, + "learning_rate": 2.0601852866818784e-06, + "loss": 0.8413, "step": 28169 }, { - "epoch": 0.7993757094211124, + "epoch": 0.7982657485335374, "grad_norm": 0.0, - "learning_rate": 2.0378803743713826e-06, - "loss": 0.8026, + "learning_rate": 2.0596273575208035e-06, + "loss": 0.8235, "step": 28170 }, { - "epoch": 0.7994040862656072, + "epoch": 0.7982940859757999, "grad_norm": 0.0, - "learning_rate": 2.0373243494303483e-06, - "loss": 0.8712, + "learning_rate": 2.0590694952444333e-06, + "loss": 0.6608, "step": 28171 }, { - "epoch": 0.7994324631101022, + "epoch": 0.7983224234180623, "grad_norm": 0.0, - "learning_rate": 2.0367683917503945e-06, - "loss": 0.7891, + "learning_rate": 2.0585116998574693e-06, + "loss": 0.8655, "step": 28172 }, { - "epoch": 0.799460839954597, + "epoch": 0.7983507608603247, "grad_norm": 0.0, - "learning_rate": 2.0362125013362187e-06, - "loss": 0.7601, + "learning_rate": 2.05795397136461e-06, + "loss": 0.8109, "step": 28173 }, { - "epoch": 0.7994892167990919, + "epoch": 0.7983790983025872, "grad_norm": 0.0, - "learning_rate": 2.0356566781925103e-06, - "loss": 0.8078, + "learning_rate": 2.057396309770554e-06, + "loss": 0.851, "step": 28174 }, { - "epoch": 0.7995175936435869, + "epoch": 0.7984074357448496, "grad_norm": 0.0, - "learning_rate": 2.0351009223239693e-06, - "loss": 0.892, + "learning_rate": 2.056838715080001e-06, + "loss": 0.8013, "step": 28175 }, { - "epoch": 0.7995459704880817, + "epoch": 0.7984357731871121, "grad_norm": 0.0, - "learning_rate": 2.034545233735288e-06, - "loss": 0.7358, + "learning_rate": 2.056281187297643e-06, + "loss": 0.7916, "step": 28176 }, { - "epoch": 0.7995743473325766, + "epoch": 0.7984641106293746, "grad_norm": 0.0, - "learning_rate": 2.033989612431161e-06, - "loss": 0.8436, + "learning_rate": 2.0557237264281772e-06, + "loss": 0.7334, "step": 28177 }, { - "epoch": 0.7996027241770715, + "epoch": 0.7984924480716371, "grad_norm": 0.0, - "learning_rate": 2.033434058416285e-06, - "loss": 0.8746, + "learning_rate": 2.0551663324763037e-06, + "loss": 0.8475, "step": 28178 }, { - "epoch": 0.7996311010215664, + "epoch": 0.7985207855138995, "grad_norm": 0.0, - "learning_rate": 2.032878571695347e-06, - "loss": 0.9033, + "learning_rate": 2.0546090054467118e-06, + "loss": 0.8448, "step": 28179 }, { - "epoch": 0.7996594778660613, + "epoch": 0.798549122956162, "grad_norm": 0.0, - "learning_rate": 2.032323152273041e-06, - "loss": 0.8784, + "learning_rate": 2.054051745344101e-06, + "loss": 0.8131, "step": 28180 }, { - "epoch": 0.7996878547105561, + "epoch": 0.7985774603984245, "grad_norm": 0.0, - "learning_rate": 2.031767800154063e-06, - "loss": 0.7133, + "learning_rate": 2.0534945521731607e-06, + "loss": 0.7973, "step": 28181 }, { - "epoch": 0.7997162315550511, + "epoch": 0.7986057978406869, "grad_norm": 0.0, - "learning_rate": 2.0312125153430974e-06, - "loss": 0.8566, + "learning_rate": 2.052937425938587e-06, + "loss": 0.8139, "step": 28182 }, { - "epoch": 0.799744608399546, + "epoch": 0.7986341352829494, "grad_norm": 0.0, - "learning_rate": 2.030657297844838e-06, - "loss": 0.7455, + "learning_rate": 2.0523803666450746e-06, + "loss": 0.8239, "step": 28183 }, { - "epoch": 0.7997729852440408, + "epoch": 0.7986624727252118, "grad_norm": 0.0, - "learning_rate": 2.0301021476639782e-06, - "loss": 0.8058, + "learning_rate": 2.0518233742973114e-06, + "loss": 0.7664, "step": 28184 }, { - "epoch": 0.7998013620885358, + "epoch": 0.7986908101674742, "grad_norm": 0.0, - "learning_rate": 2.0295470648051995e-06, - "loss": 0.8438, + "learning_rate": 2.051266448899991e-06, + "loss": 0.7428, "step": 28185 }, { - "epoch": 0.7998297389330307, + "epoch": 0.7987191476097367, "grad_norm": 0.0, - "learning_rate": 2.0289920492731963e-06, - "loss": 0.7278, + "learning_rate": 2.0507095904578043e-06, + "loss": 0.7445, "step": 28186 }, { - "epoch": 0.7998581157775255, + "epoch": 0.7987474850519992, "grad_norm": 0.0, - "learning_rate": 2.0284371010726578e-06, - "loss": 0.8385, + "learning_rate": 2.0501527989754444e-06, + "loss": 0.8344, "step": 28187 }, { - "epoch": 0.7998864926220204, + "epoch": 0.7987758224942617, "grad_norm": 0.0, - "learning_rate": 2.027882220208267e-06, - "loss": 0.7923, + "learning_rate": 2.0495960744576017e-06, + "loss": 0.8431, "step": 28188 }, { - "epoch": 0.7999148694665154, + "epoch": 0.7988041599365241, "grad_norm": 0.0, - "learning_rate": 2.0273274066847136e-06, - "loss": 0.8201, + "learning_rate": 2.04903941690896e-06, + "loss": 0.8042, "step": 28189 }, { - "epoch": 0.7999432463110102, + "epoch": 0.7988324973787866, "grad_norm": 0.0, - "learning_rate": 2.0267726605066864e-06, - "loss": 0.759, + "learning_rate": 2.0484828263342114e-06, + "loss": 0.8106, "step": 28190 }, { - "epoch": 0.7999716231555051, + "epoch": 0.7988608348210491, "grad_norm": 0.0, - "learning_rate": 2.026217981678864e-06, - "loss": 0.8291, + "learning_rate": 2.0479263027380493e-06, + "loss": 0.8661, "step": 28191 }, { - "epoch": 0.8, + "epoch": 0.7988891722633115, "grad_norm": 0.0, - "learning_rate": 2.0256633702059415e-06, - "loss": 0.8047, + "learning_rate": 2.0473698461251523e-06, + "loss": 0.8172, "step": 28192 }, { - "epoch": 0.8000283768444949, + "epoch": 0.798917509705574, "grad_norm": 0.0, - "learning_rate": 2.0251088260925967e-06, - "loss": 0.7194, + "learning_rate": 2.0468134565002163e-06, + "loss": 0.7657, "step": 28193 }, { - "epoch": 0.8000567536889898, + "epoch": 0.7989458471478365, "grad_norm": 0.0, - "learning_rate": 2.024554349343516e-06, - "loss": 0.8768, + "learning_rate": 2.0462571338679204e-06, + "loss": 0.7907, "step": 28194 }, { - "epoch": 0.8000851305334846, + "epoch": 0.798974184590099, "grad_norm": 0.0, - "learning_rate": 2.023999939963386e-06, - "loss": 0.7004, + "learning_rate": 2.0457008782329546e-06, + "loss": 0.8459, "step": 28195 }, { - "epoch": 0.8001135073779796, + "epoch": 0.7990025220323613, "grad_norm": 0.0, - "learning_rate": 2.0234455979568847e-06, - "loss": 0.7628, + "learning_rate": 2.0451446896000038e-06, + "loss": 0.7632, "step": 28196 }, { - "epoch": 0.8001418842224745, + "epoch": 0.7990308594746238, "grad_norm": 0.0, - "learning_rate": 2.0228913233286963e-06, - "loss": 0.7894, + "learning_rate": 2.0445885679737533e-06, + "loss": 0.788, "step": 28197 }, { - "epoch": 0.8001702610669693, + "epoch": 0.7990591969168863, "grad_norm": 0.0, - "learning_rate": 2.022337116083507e-06, - "loss": 0.8619, + "learning_rate": 2.04403251335889e-06, + "loss": 0.8492, "step": 28198 }, { - "epoch": 0.8001986379114643, + "epoch": 0.7990875343591487, "grad_norm": 0.0, - "learning_rate": 2.021782976225991e-06, - "loss": 0.8398, + "learning_rate": 2.043476525760093e-06, + "loss": 0.7338, "step": 28199 }, { - "epoch": 0.8002270147559591, + "epoch": 0.7991158718014112, "grad_norm": 0.0, - "learning_rate": 2.0212289037608335e-06, - "loss": 0.7888, + "learning_rate": 2.042920605182048e-06, + "loss": 0.7321, "step": 28200 }, { - "epoch": 0.800255391600454, + "epoch": 0.7991442092436737, "grad_norm": 0.0, - "learning_rate": 2.020674898692716e-06, - "loss": 0.7026, + "learning_rate": 2.0423647516294398e-06, + "loss": 0.8003, "step": 28201 }, { - "epoch": 0.800283768444949, + "epoch": 0.7991725466859362, "grad_norm": 0.0, - "learning_rate": 2.0201209610263153e-06, - "loss": 0.7733, + "learning_rate": 2.041808965106945e-06, + "loss": 0.8971, "step": 28202 }, { - "epoch": 0.8003121452894438, + "epoch": 0.7992008841281986, "grad_norm": 0.0, - "learning_rate": 2.0195670907663102e-06, - "loss": 0.7694, + "learning_rate": 2.0412532456192492e-06, + "loss": 0.8134, "step": 28203 }, { - "epoch": 0.8003405221339387, + "epoch": 0.7992292215704611, "grad_norm": 0.0, - "learning_rate": 2.019013287917384e-06, - "loss": 0.7461, + "learning_rate": 2.040697593171036e-06, + "loss": 0.8448, "step": 28204 }, { - "epoch": 0.8003688989784336, + "epoch": 0.7992575590127236, "grad_norm": 0.0, - "learning_rate": 2.0184595524842077e-06, - "loss": 0.7274, + "learning_rate": 2.0401420077669788e-06, + "loss": 0.8366, "step": 28205 }, { - "epoch": 0.8003972758229285, + "epoch": 0.7992858964549859, "grad_norm": 0.0, - "learning_rate": 2.0179058844714627e-06, - "loss": 0.8725, + "learning_rate": 2.0395864894117613e-06, + "loss": 0.6802, "step": 28206 }, { - "epoch": 0.8004256526674234, + "epoch": 0.7993142338972484, "grad_norm": 0.0, - "learning_rate": 2.0173522838838265e-06, - "loss": 0.9259, + "learning_rate": 2.0390310381100664e-06, + "loss": 0.8021, "step": 28207 }, { - "epoch": 0.8004540295119182, + "epoch": 0.7993425713395109, "grad_norm": 0.0, - "learning_rate": 2.0167987507259735e-06, - "loss": 0.8276, + "learning_rate": 2.038475653866566e-06, + "loss": 0.8995, "step": 28208 }, { - "epoch": 0.8004824063564132, + "epoch": 0.7993709087817733, "grad_norm": 0.0, - "learning_rate": 2.0162452850025827e-06, - "loss": 0.7975, + "learning_rate": 2.0379203366859413e-06, + "loss": 0.8059, "step": 28209 }, { - "epoch": 0.8005107832009081, + "epoch": 0.7993992462240358, "grad_norm": 0.0, - "learning_rate": 2.015691886718325e-06, - "loss": 0.7948, + "learning_rate": 2.037365086572871e-06, + "loss": 0.8449, "step": 28210 }, { - "epoch": 0.8005391600454029, + "epoch": 0.7994275836662983, "grad_norm": 0.0, - "learning_rate": 2.015138555877877e-06, - "loss": 0.8011, + "learning_rate": 2.036809903532031e-06, + "loss": 0.743, "step": 28211 }, { - "epoch": 0.8005675368898978, + "epoch": 0.7994559211085608, "grad_norm": 0.0, - "learning_rate": 2.0145852924859146e-06, - "loss": 0.8547, + "learning_rate": 2.0362547875681006e-06, + "loss": 0.8348, "step": 28212 }, { - "epoch": 0.8005959137343928, + "epoch": 0.7994842585508232, "grad_norm": 0.0, - "learning_rate": 2.014032096547107e-06, - "loss": 0.9126, + "learning_rate": 2.0356997386857515e-06, + "loss": 0.735, "step": 28213 }, { - "epoch": 0.8006242905788876, + "epoch": 0.7995125959930857, "grad_norm": 0.0, - "learning_rate": 2.013478968066128e-06, - "loss": 0.7729, + "learning_rate": 2.035144756889663e-06, + "loss": 0.8539, "step": 28214 }, { - "epoch": 0.8006526674233825, + "epoch": 0.7995409334353482, "grad_norm": 0.0, - "learning_rate": 2.012925907047656e-06, - "loss": 0.8805, + "learning_rate": 2.0345898421845056e-06, + "loss": 0.8313, "step": 28215 }, { - "epoch": 0.8006810442678775, + "epoch": 0.7995692708776105, "grad_norm": 0.0, - "learning_rate": 2.012372913496353e-06, - "loss": 0.7643, + "learning_rate": 2.034034994574956e-06, + "loss": 0.9464, "step": 28216 }, { - "epoch": 0.8007094211123723, + "epoch": 0.799597608319873, "grad_norm": 0.0, - "learning_rate": 2.0118199874168966e-06, - "loss": 0.8506, + "learning_rate": 2.0334802140656896e-06, + "loss": 0.7657, "step": 28217 }, { - "epoch": 0.8007377979568672, + "epoch": 0.7996259457621355, "grad_norm": 0.0, - "learning_rate": 2.0112671288139583e-06, - "loss": 0.8684, + "learning_rate": 2.032925500661376e-06, + "loss": 0.7319, "step": 28218 }, { - "epoch": 0.8007661748013621, + "epoch": 0.799654283204398, "grad_norm": 0.0, - "learning_rate": 2.0107143376922024e-06, - "loss": 0.8085, + "learning_rate": 2.0323708543666888e-06, + "loss": 0.8215, "step": 28219 }, { - "epoch": 0.800794551645857, + "epoch": 0.7996826206466604, "grad_norm": 0.0, - "learning_rate": 2.010161614056302e-06, - "loss": 0.8309, + "learning_rate": 2.0318162751863037e-06, + "loss": 0.7924, "step": 28220 }, { - "epoch": 0.8008229284903519, + "epoch": 0.7997109580889229, "grad_norm": 0.0, - "learning_rate": 2.009608957910929e-06, - "loss": 0.7286, + "learning_rate": 2.0312617631248855e-06, + "loss": 0.817, "step": 28221 }, { - "epoch": 0.8008513053348467, + "epoch": 0.7997392955311854, "grad_norm": 0.0, - "learning_rate": 2.009056369260742e-06, - "loss": 0.8369, + "learning_rate": 2.0307073181871095e-06, + "loss": 0.7407, "step": 28222 }, { - "epoch": 0.8008796821793417, + "epoch": 0.7997676329734478, "grad_norm": 0.0, - "learning_rate": 2.0085038481104204e-06, - "loss": 0.75, + "learning_rate": 2.030152940377644e-06, + "loss": 0.8474, "step": 28223 }, { - "epoch": 0.8009080590238365, + "epoch": 0.7997959704157103, "grad_norm": 0.0, - "learning_rate": 2.0079513944646236e-06, - "loss": 0.6989, + "learning_rate": 2.0295986297011603e-06, + "loss": 0.8151, "step": 28224 }, { - "epoch": 0.8009364358683314, + "epoch": 0.7998243078579728, "grad_norm": 0.0, - "learning_rate": 2.0073990083280214e-06, - "loss": 0.8657, + "learning_rate": 2.029044386162331e-06, + "loss": 0.7105, "step": 28225 }, { - "epoch": 0.8009648127128264, + "epoch": 0.7998526453002353, "grad_norm": 0.0, - "learning_rate": 2.0068466897052806e-06, - "loss": 0.8586, + "learning_rate": 2.0284902097658166e-06, + "loss": 0.7275, "step": 28226 }, { - "epoch": 0.8009931895573212, + "epoch": 0.7998809827424976, "grad_norm": 0.0, - "learning_rate": 2.006294438601063e-06, - "loss": 0.8293, + "learning_rate": 2.02793610051629e-06, + "loss": 0.7628, "step": 28227 }, { - "epoch": 0.8010215664018161, + "epoch": 0.7999093201847601, "grad_norm": 0.0, - "learning_rate": 2.0057422550200355e-06, - "loss": 0.7447, + "learning_rate": 2.02738205841842e-06, + "loss": 0.8118, "step": 28228 }, { - "epoch": 0.801049943246311, + "epoch": 0.7999376576270226, "grad_norm": 0.0, - "learning_rate": 2.0051901389668647e-06, - "loss": 0.8622, + "learning_rate": 2.0268280834768695e-06, + "loss": 0.8629, "step": 28229 }, { - "epoch": 0.8010783200908059, + "epoch": 0.799965995069285, "grad_norm": 0.0, - "learning_rate": 2.0046380904462103e-06, - "loss": 0.7515, + "learning_rate": 2.026274175696309e-06, + "loss": 0.764, "step": 28230 }, { - "epoch": 0.8011066969353008, + "epoch": 0.7999943325115475, "grad_norm": 0.0, - "learning_rate": 2.0040861094627374e-06, - "loss": 0.8096, + "learning_rate": 2.025720335081399e-06, + "loss": 0.8286, "step": 28231 }, { - "epoch": 0.8011350737797956, + "epoch": 0.80002266995381, "grad_norm": 0.0, - "learning_rate": 2.003534196021111e-06, - "loss": 0.723, + "learning_rate": 2.025166561636809e-06, + "loss": 0.7882, "step": 28232 }, { - "epoch": 0.8011634506242906, + "epoch": 0.8000510073960724, "grad_norm": 0.0, - "learning_rate": 2.0029823501259884e-06, - "loss": 0.8505, + "learning_rate": 2.024612855367205e-06, + "loss": 0.8938, "step": 28233 }, { - "epoch": 0.8011918274687855, + "epoch": 0.8000793448383349, "grad_norm": 0.0, - "learning_rate": 2.0024305717820326e-06, - "loss": 0.8176, + "learning_rate": 2.0240592162772454e-06, + "loss": 0.8009, "step": 28234 }, { - "epoch": 0.8012202043132803, + "epoch": 0.8001076822805974, "grad_norm": 0.0, - "learning_rate": 2.0018788609939087e-06, - "loss": 0.7864, + "learning_rate": 2.0235056443715962e-06, + "loss": 0.8517, "step": 28235 }, { - "epoch": 0.8012485811577753, + "epoch": 0.8001360197228599, "grad_norm": 0.0, - "learning_rate": 2.0013272177662702e-06, - "loss": 0.7569, + "learning_rate": 2.022952139654922e-06, + "loss": 0.7931, "step": 28236 }, { - "epoch": 0.8012769580022702, + "epoch": 0.8001643571651222, "grad_norm": 0.0, - "learning_rate": 2.0007756421037784e-06, - "loss": 0.8979, + "learning_rate": 2.0223987021318835e-06, + "loss": 0.7972, "step": 28237 }, { - "epoch": 0.801305334846765, + "epoch": 0.8001926946073847, "grad_norm": 0.0, - "learning_rate": 2.0002241340110983e-06, - "loss": 0.7587, + "learning_rate": 2.0218453318071462e-06, + "loss": 0.7682, "step": 28238 }, { - "epoch": 0.8013337116912599, + "epoch": 0.8002210320496472, "grad_norm": 0.0, - "learning_rate": 1.9996726934928834e-06, - "loss": 0.84, + "learning_rate": 2.0212920286853656e-06, + "loss": 0.8135, "step": 28239 }, { - "epoch": 0.8013620885357549, + "epoch": 0.8002493694919096, "grad_norm": 0.0, - "learning_rate": 1.9991213205537917e-06, - "loss": 0.7737, + "learning_rate": 2.020738792771204e-06, + "loss": 0.7517, "step": 28240 }, { - "epoch": 0.8013904653802497, + "epoch": 0.8002777069341721, "grad_norm": 0.0, - "learning_rate": 1.9985700151984856e-06, - "loss": 0.721, + "learning_rate": 2.0201856240693264e-06, + "loss": 0.7712, "step": 28241 }, { - "epoch": 0.8014188422247446, + "epoch": 0.8003060443764346, "grad_norm": 0.0, - "learning_rate": 1.998018777431615e-06, - "loss": 0.8646, + "learning_rate": 2.019632522584386e-06, + "loss": 0.842, "step": 28242 }, { - "epoch": 0.8014472190692395, + "epoch": 0.8003343818186971, "grad_norm": 0.0, - "learning_rate": 1.9974676072578405e-06, - "loss": 0.761, + "learning_rate": 2.0190794883210462e-06, + "loss": 0.7464, "step": 28243 }, { - "epoch": 0.8014755959137344, + "epoch": 0.8003627192609595, "grad_norm": 0.0, - "learning_rate": 1.9969165046818184e-06, - "loss": 0.7226, + "learning_rate": 2.0185265212839612e-06, + "loss": 0.8402, "step": 28244 }, { - "epoch": 0.8015039727582293, + "epoch": 0.800391056703222, "grad_norm": 0.0, - "learning_rate": 1.9963654697082e-06, - "loss": 0.8469, + "learning_rate": 2.0179736214777913e-06, + "loss": 0.8063, "step": 28245 }, { - "epoch": 0.8015323496027241, + "epoch": 0.8004193941454845, "grad_norm": 0.0, - "learning_rate": 1.9958145023416443e-06, - "loss": 0.7163, + "learning_rate": 2.017420788907196e-06, + "loss": 0.8833, "step": 28246 }, { - "epoch": 0.8015607264472191, + "epoch": 0.8004477315877468, "grad_norm": 0.0, - "learning_rate": 1.9952636025868012e-06, - "loss": 0.7665, + "learning_rate": 2.0168680235768267e-06, + "loss": 0.8431, "step": 28247 }, { - "epoch": 0.801589103291714, + "epoch": 0.8004760690300093, "grad_norm": 0.0, - "learning_rate": 1.994712770448327e-06, - "loss": 0.7884, + "learning_rate": 2.0163153254913436e-06, + "loss": 0.7838, "step": 28248 }, { - "epoch": 0.8016174801362088, + "epoch": 0.8005044064722718, "grad_norm": 0.0, - "learning_rate": 1.9941620059308753e-06, - "loss": 0.8087, + "learning_rate": 2.0157626946554e-06, + "loss": 0.8373, "step": 28249 }, { - "epoch": 0.8016458569807038, + "epoch": 0.8005327439145343, "grad_norm": 0.0, - "learning_rate": 1.9936113090390953e-06, - "loss": 0.7946, + "learning_rate": 2.0152101310736527e-06, + "loss": 0.7799, "step": 28250 }, { - "epoch": 0.8016742338251986, + "epoch": 0.8005610813567967, "grad_norm": 0.0, - "learning_rate": 1.9930606797776396e-06, - "loss": 0.8376, + "learning_rate": 2.0146576347507586e-06, + "loss": 0.7894, "step": 28251 }, { - "epoch": 0.8017026106696935, + "epoch": 0.8005894187990592, "grad_norm": 0.0, - "learning_rate": 1.992510118151162e-06, - "loss": 0.8622, + "learning_rate": 2.0141052056913644e-06, + "loss": 0.8429, "step": 28252 }, { - "epoch": 0.8017309875141885, + "epoch": 0.8006177562413217, "grad_norm": 0.0, - "learning_rate": 1.991959624164309e-06, - "loss": 0.8746, + "learning_rate": 2.0135528439001294e-06, + "loss": 0.8737, "step": 28253 }, { - "epoch": 0.8017593643586833, + "epoch": 0.8006460936835841, "grad_norm": 0.0, - "learning_rate": 1.9914091978217354e-06, - "loss": 0.8061, + "learning_rate": 2.0130005493817063e-06, + "loss": 0.8359, "step": 28254 }, { - "epoch": 0.8017877412031782, + "epoch": 0.8006744311258466, "grad_norm": 0.0, - "learning_rate": 1.9908588391280893e-06, - "loss": 0.7234, + "learning_rate": 2.012448322140742e-06, + "loss": 0.7909, "step": 28255 }, { - "epoch": 0.801816118047673, + "epoch": 0.8007027685681091, "grad_norm": 0.0, - "learning_rate": 1.990308548088017e-06, - "loss": 0.8158, + "learning_rate": 2.0118961621818923e-06, + "loss": 0.7097, "step": 28256 }, { - "epoch": 0.801844494892168, + "epoch": 0.8007311060103715, "grad_norm": 0.0, - "learning_rate": 1.9897583247061667e-06, - "loss": 0.8067, + "learning_rate": 2.0113440695098107e-06, + "loss": 0.8443, "step": 28257 }, { - "epoch": 0.8018728717366629, + "epoch": 0.800759443452634, "grad_norm": 0.0, - "learning_rate": 1.9892081689871923e-06, - "loss": 0.8354, + "learning_rate": 2.010792044129142e-06, + "loss": 0.874, "step": 28258 }, { - "epoch": 0.8019012485811577, + "epoch": 0.8007877808948964, "grad_norm": 0.0, - "learning_rate": 1.9886580809357335e-06, - "loss": 0.8334, + "learning_rate": 2.01024008604454e-06, + "loss": 0.8446, "step": 28259 }, { - "epoch": 0.8019296254256527, + "epoch": 0.8008161183371589, "grad_norm": 0.0, - "learning_rate": 1.9881080605564406e-06, - "loss": 0.6847, + "learning_rate": 2.0096881952606507e-06, + "loss": 0.7935, "step": 28260 }, { - "epoch": 0.8019580022701476, + "epoch": 0.8008444557794213, "grad_norm": 0.0, - "learning_rate": 1.987558107853961e-06, - "loss": 0.8396, + "learning_rate": 2.0091363717821255e-06, + "loss": 0.7846, "step": 28261 }, { - "epoch": 0.8019863791146424, + "epoch": 0.8008727932216838, "grad_norm": 0.0, - "learning_rate": 1.9870082228329355e-06, - "loss": 0.8438, + "learning_rate": 2.0085846156136113e-06, + "loss": 0.7994, "step": 28262 }, { - "epoch": 0.8020147559591373, + "epoch": 0.8009011306639463, "grad_norm": 0.0, - "learning_rate": 1.986458405498012e-06, - "loss": 0.7503, + "learning_rate": 2.0080329267597567e-06, + "loss": 0.8354, "step": 28263 }, { - "epoch": 0.8020431328036323, + "epoch": 0.8009294681062087, "grad_norm": 0.0, - "learning_rate": 1.9859086558538377e-06, - "loss": 0.7804, + "learning_rate": 2.0074813052252075e-06, + "loss": 0.8149, "step": 28264 }, { - "epoch": 0.8020715096481271, + "epoch": 0.8009578055484712, "grad_norm": 0.0, - "learning_rate": 1.9853589739050494e-06, - "loss": 0.7085, + "learning_rate": 2.0069297510146145e-06, + "loss": 0.777, "step": 28265 }, { - "epoch": 0.802099886492622, + "epoch": 0.8009861429907337, "grad_norm": 0.0, - "learning_rate": 1.9848093596562945e-06, - "loss": 0.8141, + "learning_rate": 2.006378264132618e-06, + "loss": 0.7332, "step": 28266 }, { - "epoch": 0.802128263337117, + "epoch": 0.8010144804329962, "grad_norm": 0.0, - "learning_rate": 1.984259813112219e-06, - "loss": 0.6905, + "learning_rate": 2.005826844583868e-06, + "loss": 0.7744, "step": 28267 }, { - "epoch": 0.8021566401816118, + "epoch": 0.8010428178752586, "grad_norm": 0.0, - "learning_rate": 1.9837103342774543e-06, - "loss": 0.8422, + "learning_rate": 2.0052754923730055e-06, + "loss": 0.791, "step": 28268 }, { - "epoch": 0.8021850170261067, + "epoch": 0.801071155317521, "grad_norm": 0.0, - "learning_rate": 1.983160923156655e-06, - "loss": 0.8794, + "learning_rate": 2.004724207504675e-06, + "loss": 0.7703, "step": 28269 }, { - "epoch": 0.8022133938706016, + "epoch": 0.8010994927597835, "grad_norm": 0.0, - "learning_rate": 1.982611579754453e-06, - "loss": 0.871, + "learning_rate": 2.004172989983525e-06, + "loss": 0.7831, "step": 28270 }, { - "epoch": 0.8022417707150965, + "epoch": 0.8011278302020459, "grad_norm": 0.0, - "learning_rate": 1.9820623040754915e-06, - "loss": 0.8439, + "learning_rate": 2.0036218398141917e-06, + "loss": 0.74, "step": 28271 }, { - "epoch": 0.8022701475595914, + "epoch": 0.8011561676443084, "grad_norm": 0.0, - "learning_rate": 1.9815130961244135e-06, - "loss": 0.8263, + "learning_rate": 2.003070757001324e-06, + "loss": 0.7873, "step": 28272 }, { - "epoch": 0.8022985244040862, + "epoch": 0.8011845050865709, "grad_norm": 0.0, - "learning_rate": 1.9809639559058513e-06, - "loss": 0.8385, + "learning_rate": 2.0025197415495578e-06, + "loss": 0.8224, "step": 28273 }, { - "epoch": 0.8023269012485812, + "epoch": 0.8012128425288334, "grad_norm": 0.0, - "learning_rate": 1.9804148834244465e-06, - "loss": 0.7914, + "learning_rate": 2.0019687934635378e-06, + "loss": 0.8797, "step": 28274 }, { - "epoch": 0.802355278093076, + "epoch": 0.8012411799710958, "grad_norm": 0.0, - "learning_rate": 1.9798658786848414e-06, - "loss": 0.8269, + "learning_rate": 2.001417912747905e-06, + "loss": 0.8646, "step": 28275 }, { - "epoch": 0.8023836549375709, + "epoch": 0.8012695174133583, "grad_norm": 0.0, - "learning_rate": 1.979316941691667e-06, - "loss": 0.767, + "learning_rate": 2.0008670994073e-06, + "loss": 0.7499, "step": 28276 }, { - "epoch": 0.8024120317820659, + "epoch": 0.8012978548556208, "grad_norm": 0.0, - "learning_rate": 1.9787680724495617e-06, - "loss": 0.8667, + "learning_rate": 2.000316353446361e-06, + "loss": 0.743, "step": 28277 }, { - "epoch": 0.8024404086265607, + "epoch": 0.8013261922978832, "grad_norm": 0.0, - "learning_rate": 1.9782192709631677e-06, - "loss": 0.7616, + "learning_rate": 1.99976567486973e-06, + "loss": 0.8867, "step": 28278 }, { - "epoch": 0.8024687854710556, + "epoch": 0.8013545297401456, "grad_norm": 0.0, - "learning_rate": 1.977670537237112e-06, - "loss": 0.8263, + "learning_rate": 1.999215063682042e-06, + "loss": 0.7451, "step": 28279 }, { - "epoch": 0.8024971623155505, + "epoch": 0.8013828671824081, "grad_norm": 0.0, - "learning_rate": 1.9771218712760344e-06, - "loss": 0.8495, + "learning_rate": 1.9986645198879385e-06, + "loss": 0.7792, "step": 28280 }, { - "epoch": 0.8025255391600454, + "epoch": 0.8014112046246705, "grad_norm": 0.0, - "learning_rate": 1.976573273084571e-06, - "loss": 0.8228, + "learning_rate": 1.998114043492053e-06, + "loss": 0.8606, "step": 28281 }, { - "epoch": 0.8025539160045403, + "epoch": 0.801439542066933, "grad_norm": 0.0, - "learning_rate": 1.976024742667352e-06, - "loss": 0.9527, + "learning_rate": 1.9975636344990233e-06, + "loss": 0.7161, "step": 28282 }, { - "epoch": 0.8025822928490352, + "epoch": 0.8014678795091955, "grad_norm": 0.0, - "learning_rate": 1.975476280029013e-06, - "loss": 0.9011, + "learning_rate": 1.99701329291349e-06, + "loss": 0.8242, "step": 28283 }, { - "epoch": 0.8026106696935301, + "epoch": 0.801496216951458, "grad_norm": 0.0, - "learning_rate": 1.974927885174186e-06, - "loss": 0.8233, + "learning_rate": 1.9964630187400834e-06, + "loss": 0.8103, "step": 28284 }, { - "epoch": 0.802639046538025, + "epoch": 0.8015245543937204, "grad_norm": 0.0, - "learning_rate": 1.974379558107503e-06, - "loss": 0.7814, + "learning_rate": 1.99591281198344e-06, + "loss": 0.8122, "step": 28285 }, { - "epoch": 0.8026674233825198, + "epoch": 0.8015528918359829, "grad_norm": 0.0, - "learning_rate": 1.9738312988336006e-06, - "loss": 0.8143, + "learning_rate": 1.995362672648198e-06, + "loss": 0.7589, "step": 28286 }, { - "epoch": 0.8026958002270148, + "epoch": 0.8015812292782454, "grad_norm": 0.0, - "learning_rate": 1.9732831073571036e-06, - "loss": 0.6979, + "learning_rate": 1.9948126007389866e-06, + "loss": 0.8677, "step": 28287 }, { - "epoch": 0.8027241770715097, + "epoch": 0.8016095667205078, "grad_norm": 0.0, - "learning_rate": 1.972734983682644e-06, - "loss": 0.8828, + "learning_rate": 1.994262596260441e-06, + "loss": 0.7821, "step": 28288 }, { - "epoch": 0.8027525539160045, + "epoch": 0.8016379041627703, "grad_norm": 0.0, - "learning_rate": 1.9721869278148563e-06, - "loss": 0.8702, + "learning_rate": 1.993712659217194e-06, + "loss": 0.9125, "step": 28289 }, { - "epoch": 0.8027809307604994, + "epoch": 0.8016662416050327, "grad_norm": 0.0, - "learning_rate": 1.9716389397583636e-06, - "loss": 0.7965, + "learning_rate": 1.9931627896138785e-06, + "loss": 0.8001, "step": 28290 }, { - "epoch": 0.8028093076049944, + "epoch": 0.8016945790472952, "grad_norm": 0.0, - "learning_rate": 1.971091019517797e-06, - "loss": 0.8712, + "learning_rate": 1.9926129874551294e-06, + "loss": 0.8516, "step": 28291 }, { - "epoch": 0.8028376844494892, + "epoch": 0.8017229164895576, "grad_norm": 0.0, - "learning_rate": 1.970543167097789e-06, - "loss": 0.9088, + "learning_rate": 1.9920632527455707e-06, + "loss": 0.7959, "step": 28292 }, { - "epoch": 0.8028660612939841, + "epoch": 0.8017512539318201, "grad_norm": 0.0, - "learning_rate": 1.9699953825029607e-06, - "loss": 0.7793, + "learning_rate": 1.9915135854898383e-06, + "loss": 0.8265, "step": 28293 }, { - "epoch": 0.802894438138479, + "epoch": 0.8017795913740826, "grad_norm": 0.0, - "learning_rate": 1.9694476657379425e-06, - "loss": 0.8291, + "learning_rate": 1.9909639856925623e-06, + "loss": 0.8286, "step": 28294 }, { - "epoch": 0.8029228149829739, + "epoch": 0.801807928816345, "grad_norm": 0.0, - "learning_rate": 1.9689000168073634e-06, - "loss": 0.7697, + "learning_rate": 1.9904144533583693e-06, + "loss": 0.8275, "step": 28295 }, { - "epoch": 0.8029511918274688, + "epoch": 0.8018362662586075, "grad_norm": 0.0, - "learning_rate": 1.9683524357158445e-06, - "loss": 0.7923, + "learning_rate": 1.989864988491891e-06, + "loss": 0.7905, "step": 28296 }, { - "epoch": 0.8029795686719636, + "epoch": 0.80186460370087, "grad_norm": 0.0, - "learning_rate": 1.967804922468014e-06, - "loss": 0.7838, + "learning_rate": 1.9893155910977523e-06, + "loss": 0.9146, "step": 28297 }, { - "epoch": 0.8030079455164586, + "epoch": 0.8018929411431325, "grad_norm": 0.0, - "learning_rate": 1.967257477068495e-06, - "loss": 0.8791, + "learning_rate": 1.9887662611805834e-06, + "loss": 0.7398, "step": 28298 }, { - "epoch": 0.8030363223609535, + "epoch": 0.8019212785853949, "grad_norm": 0.0, - "learning_rate": 1.9667100995219147e-06, - "loss": 0.7813, + "learning_rate": 1.988216998745014e-06, + "loss": 0.8267, "step": 28299 }, { - "epoch": 0.8030646992054483, + "epoch": 0.8019496160276574, "grad_norm": 0.0, - "learning_rate": 1.966162789832897e-06, - "loss": 0.772, + "learning_rate": 1.9876678037956645e-06, + "loss": 0.7058, "step": 28300 }, { - "epoch": 0.8030930760499433, + "epoch": 0.8019779534699198, "grad_norm": 0.0, - "learning_rate": 1.9656155480060613e-06, - "loss": 0.8652, + "learning_rate": 1.987118676337164e-06, + "loss": 0.871, "step": 28301 }, { - "epoch": 0.8031214528944381, + "epoch": 0.8020062909121822, "grad_norm": 0.0, - "learning_rate": 1.965068374046032e-06, - "loss": 0.8086, + "learning_rate": 1.98656961637414e-06, + "loss": 0.8767, "step": 28302 }, { - "epoch": 0.803149829738933, + "epoch": 0.8020346283544447, "grad_norm": 0.0, - "learning_rate": 1.9645212679574345e-06, - "loss": 0.7845, + "learning_rate": 1.9860206239112144e-06, + "loss": 0.717, "step": 28303 }, { - "epoch": 0.803178206583428, + "epoch": 0.8020629657967072, "grad_norm": 0.0, - "learning_rate": 1.963974229744884e-06, - "loss": 0.788, + "learning_rate": 1.9854716989530166e-06, + "loss": 0.7603, "step": 28304 }, { - "epoch": 0.8032065834279228, + "epoch": 0.8020913032389696, "grad_norm": 0.0, - "learning_rate": 1.963427259413003e-06, - "loss": 0.7694, + "learning_rate": 1.9849228415041633e-06, + "loss": 0.7244, "step": 28305 }, { - "epoch": 0.8032349602724177, + "epoch": 0.8021196406812321, "grad_norm": 0.0, - "learning_rate": 1.9628803569664178e-06, - "loss": 0.8, + "learning_rate": 1.9843740515692824e-06, + "loss": 0.7087, "step": 28306 }, { - "epoch": 0.8032633371169126, + "epoch": 0.8021479781234946, "grad_norm": 0.0, - "learning_rate": 1.9623335224097404e-06, - "loss": 0.8514, + "learning_rate": 1.9838253291529973e-06, + "loss": 0.8484, "step": 28307 }, { - "epoch": 0.8032917139614075, + "epoch": 0.8021763155657571, "grad_norm": 0.0, - "learning_rate": 1.9617867557475933e-06, - "loss": 0.8063, + "learning_rate": 1.983276674259925e-06, + "loss": 0.8324, "step": 28308 }, { - "epoch": 0.8033200908059024, + "epoch": 0.8022046530080195, "grad_norm": 0.0, - "learning_rate": 1.961240056984597e-06, - "loss": 0.7997, + "learning_rate": 1.982728086894694e-06, + "loss": 0.8551, "step": 28309 }, { - "epoch": 0.8033484676503972, + "epoch": 0.802232990450282, "grad_norm": 0.0, - "learning_rate": 1.960693426125364e-06, - "loss": 0.7519, + "learning_rate": 1.982179567061918e-06, + "loss": 0.7818, "step": 28310 }, { - "epoch": 0.8033768444948922, + "epoch": 0.8022613278925445, "grad_norm": 0.0, - "learning_rate": 1.960146863174516e-06, - "loss": 0.7844, + "learning_rate": 1.9816311147662216e-06, + "loss": 0.8183, "step": 28311 }, { - "epoch": 0.8034052213393871, + "epoch": 0.8022896653348068, "grad_norm": 0.0, - "learning_rate": 1.9596003681366716e-06, - "loss": 0.8421, + "learning_rate": 1.981082730012226e-06, + "loss": 0.8663, "step": 28312 }, { - "epoch": 0.8034335981838819, + "epoch": 0.8023180027770693, "grad_norm": 0.0, - "learning_rate": 1.959053941016441e-06, - "loss": 0.7446, + "learning_rate": 1.9805344128045456e-06, + "loss": 0.8116, "step": 28313 }, { - "epoch": 0.8034619750283768, + "epoch": 0.8023463402193318, "grad_norm": 0.0, - "learning_rate": 1.958507581818444e-06, - "loss": 0.8226, + "learning_rate": 1.9799861631478013e-06, + "loss": 0.8186, "step": 28314 }, { - "epoch": 0.8034903518728718, + "epoch": 0.8023746776615943, "grad_norm": 0.0, - "learning_rate": 1.9579612905472944e-06, - "loss": 0.8708, + "learning_rate": 1.9794379810466123e-06, + "loss": 0.8362, "step": 28315 }, { - "epoch": 0.8035187287173666, + "epoch": 0.8024030151038567, "grad_norm": 0.0, - "learning_rate": 1.9574150672076076e-06, - "loss": 0.767, + "learning_rate": 1.9788898665055958e-06, + "loss": 0.7553, "step": 28316 }, { - "epoch": 0.8035471055618615, + "epoch": 0.8024313525461192, "grad_norm": 0.0, - "learning_rate": 1.9568689118039997e-06, - "loss": 0.8082, + "learning_rate": 1.97834181952937e-06, + "loss": 0.7835, "step": 28317 }, { - "epoch": 0.8035754824063565, + "epoch": 0.8024596899883817, "grad_norm": 0.0, - "learning_rate": 1.9563228243410794e-06, - "loss": 0.7885, + "learning_rate": 1.9777938401225483e-06, + "loss": 0.8012, "step": 28318 }, { - "epoch": 0.8036038592508513, + "epoch": 0.8024880274306441, "grad_norm": 0.0, - "learning_rate": 1.9557768048234606e-06, - "loss": 0.746, + "learning_rate": 1.9772459282897484e-06, + "loss": 0.8078, "step": 28319 }, { - "epoch": 0.8036322360953462, + "epoch": 0.8025163648729066, "grad_norm": 0.0, - "learning_rate": 1.9552308532557607e-06, - "loss": 0.8171, + "learning_rate": 1.9766980840355876e-06, + "loss": 0.7942, "step": 28320 }, { - "epoch": 0.803660612939841, + "epoch": 0.8025447023151691, "grad_norm": 0.0, - "learning_rate": 1.954684969642584e-06, - "loss": 0.7519, + "learning_rate": 1.976150307364675e-06, + "loss": 0.7371, "step": 28321 }, { - "epoch": 0.803688989784336, + "epoch": 0.8025730397574314, "grad_norm": 0.0, - "learning_rate": 1.9541391539885456e-06, - "loss": 0.7892, + "learning_rate": 1.9756025982816284e-06, + "loss": 0.6754, "step": 28322 }, { - "epoch": 0.8037173666288309, + "epoch": 0.8026013771996939, "grad_norm": 0.0, - "learning_rate": 1.9535934062982575e-06, - "loss": 0.8204, + "learning_rate": 1.9750549567910627e-06, + "loss": 0.8174, "step": 28323 }, { - "epoch": 0.8037457434733257, + "epoch": 0.8026297146419564, "grad_norm": 0.0, - "learning_rate": 1.9530477265763258e-06, - "loss": 0.8042, + "learning_rate": 1.974507382897588e-06, + "loss": 0.6738, "step": 28324 }, { - "epoch": 0.8037741203178207, + "epoch": 0.8026580520842189, "grad_norm": 0.0, - "learning_rate": 1.9525021148273604e-06, - "loss": 0.7813, + "learning_rate": 1.973959876605819e-06, + "loss": 0.8439, "step": 28325 }, { - "epoch": 0.8038024971623156, + "epoch": 0.8026863895264813, "grad_norm": 0.0, - "learning_rate": 1.951956571055975e-06, - "loss": 0.7207, + "learning_rate": 1.9734124379203645e-06, + "loss": 0.8873, "step": 28326 }, { - "epoch": 0.8038308740068104, + "epoch": 0.8027147269687438, "grad_norm": 0.0, - "learning_rate": 1.951411095266772e-06, - "loss": 0.8729, + "learning_rate": 1.9728650668458373e-06, + "loss": 0.7632, "step": 28327 }, { - "epoch": 0.8038592508513054, + "epoch": 0.8027430644110063, "grad_norm": 0.0, - "learning_rate": 1.9508656874643604e-06, - "loss": 0.8593, + "learning_rate": 1.9723177633868483e-06, + "loss": 0.8242, "step": 28328 }, { - "epoch": 0.8038876276958002, + "epoch": 0.8027714018532687, "grad_norm": 0.0, - "learning_rate": 1.9503203476533483e-06, - "loss": 0.7731, + "learning_rate": 1.971770527548008e-06, + "loss": 0.8072, "step": 28329 }, { - "epoch": 0.8039160045402951, + "epoch": 0.8027997392955312, "grad_norm": 0.0, - "learning_rate": 1.9497750758383427e-06, - "loss": 0.8042, + "learning_rate": 1.971223359333929e-06, + "loss": 0.7491, "step": 28330 }, { - "epoch": 0.80394438138479, + "epoch": 0.8028280767377937, "grad_norm": 0.0, - "learning_rate": 1.949229872023951e-06, - "loss": 0.8718, + "learning_rate": 1.9706762587492134e-06, + "loss": 0.8146, "step": 28331 }, { - "epoch": 0.8039727582292849, + "epoch": 0.8028564141800562, "grad_norm": 0.0, - "learning_rate": 1.9486847362147743e-06, - "loss": 0.9501, + "learning_rate": 1.970129225798474e-06, + "loss": 0.8583, "step": 28332 }, { - "epoch": 0.8040011350737798, + "epoch": 0.8028847516223185, "grad_norm": 0.0, - "learning_rate": 1.9481396684154188e-06, - "loss": 0.7988, + "learning_rate": 1.9695822604863203e-06, + "loss": 0.7586, "step": 28333 }, { - "epoch": 0.8040295119182747, + "epoch": 0.802913089064581, "grad_norm": 0.0, - "learning_rate": 1.9475946686304927e-06, - "loss": 0.7325, + "learning_rate": 1.969035362817354e-06, + "loss": 0.8511, "step": 28334 }, { - "epoch": 0.8040578887627696, + "epoch": 0.8029414265068435, "grad_norm": 0.0, - "learning_rate": 1.9470497368645934e-06, - "loss": 0.7753, + "learning_rate": 1.9684885327961866e-06, + "loss": 0.7618, "step": 28335 }, { - "epoch": 0.8040862656072645, + "epoch": 0.8029697639491059, "grad_norm": 0.0, - "learning_rate": 1.946504873122327e-06, - "loss": 0.5911, + "learning_rate": 1.9679417704274238e-06, + "loss": 0.8634, "step": 28336 }, { - "epoch": 0.8041146424517593, + "epoch": 0.8029981013913684, "grad_norm": 0.0, - "learning_rate": 1.9459600774082987e-06, - "loss": 0.7887, + "learning_rate": 1.967395075715668e-06, + "loss": 0.7762, "step": 28337 }, { - "epoch": 0.8041430192962542, + "epoch": 0.8030264388336309, "grad_norm": 0.0, - "learning_rate": 1.9454153497271055e-06, - "loss": 0.8739, + "learning_rate": 1.9668484486655294e-06, + "loss": 0.8931, "step": 28338 }, { - "epoch": 0.8041713961407492, + "epoch": 0.8030547762758934, "grad_norm": 0.0, - "learning_rate": 1.94487069008335e-06, - "loss": 0.7355, + "learning_rate": 1.966301889281607e-06, + "loss": 0.8044, "step": 28339 }, { - "epoch": 0.804199772985244, + "epoch": 0.8030831137181558, "grad_norm": 0.0, - "learning_rate": 1.944326098481638e-06, - "loss": 0.7527, + "learning_rate": 1.9657553975685063e-06, + "loss": 0.9227, "step": 28340 }, { - "epoch": 0.8042281498297389, + "epoch": 0.8031114511604183, "grad_norm": 0.0, - "learning_rate": 1.943781574926562e-06, - "loss": 0.8094, + "learning_rate": 1.965208973530831e-06, + "loss": 0.7324, "step": 28341 }, { - "epoch": 0.8042565266742339, + "epoch": 0.8031397886026808, "grad_norm": 0.0, - "learning_rate": 1.943237119422725e-06, - "loss": 0.7401, + "learning_rate": 1.964662617173184e-06, + "loss": 0.7412, "step": 28342 }, { - "epoch": 0.8042849035187287, + "epoch": 0.8031681260449431, "grad_norm": 0.0, - "learning_rate": 1.942692731974729e-06, - "loss": 0.8464, + "learning_rate": 1.9641163285001686e-06, + "loss": 0.8055, "step": 28343 }, { - "epoch": 0.8043132803632236, + "epoch": 0.8031964634872056, "grad_norm": 0.0, - "learning_rate": 1.942148412587165e-06, - "loss": 0.706, + "learning_rate": 1.9635701075163884e-06, + "loss": 0.7581, "step": 28344 }, { - "epoch": 0.8043416572077186, + "epoch": 0.8032248009294681, "grad_norm": 0.0, - "learning_rate": 1.9416041612646376e-06, - "loss": 0.9081, + "learning_rate": 1.9630239542264373e-06, + "loss": 0.8138, "step": 28345 }, { - "epoch": 0.8043700340522134, + "epoch": 0.8032531383717305, "grad_norm": 0.0, - "learning_rate": 1.9410599780117443e-06, - "loss": 0.8746, + "learning_rate": 1.9624778686349232e-06, + "loss": 0.8341, "step": 28346 }, { - "epoch": 0.8043984108967083, + "epoch": 0.803281475813993, "grad_norm": 0.0, - "learning_rate": 1.9405158628330777e-06, - "loss": 0.9046, + "learning_rate": 1.96193185074644e-06, + "loss": 0.7397, "step": 28347 }, { - "epoch": 0.8044267877412031, + "epoch": 0.8033098132562555, "grad_norm": 0.0, - "learning_rate": 1.9399718157332358e-06, - "loss": 0.7686, + "learning_rate": 1.96138590056559e-06, + "loss": 0.8689, "step": 28348 }, { - "epoch": 0.8044551645856981, + "epoch": 0.803338150698518, "grad_norm": 0.0, - "learning_rate": 1.9394278367168175e-06, - "loss": 0.7908, + "learning_rate": 1.9608400180969743e-06, + "loss": 0.7206, "step": 28349 }, { - "epoch": 0.804483541430193, + "epoch": 0.8033664881407804, "grad_norm": 0.0, - "learning_rate": 1.938883925788411e-06, - "loss": 0.8315, + "learning_rate": 1.9602942033451853e-06, + "loss": 0.7699, "step": 28350 }, { - "epoch": 0.8045119182746878, + "epoch": 0.8033948255830429, "grad_norm": 0.0, - "learning_rate": 1.9383400829526144e-06, - "loss": 0.8454, + "learning_rate": 1.959748456314824e-06, + "loss": 0.7613, "step": 28351 }, { - "epoch": 0.8045402951191828, + "epoch": 0.8034231630253054, "grad_norm": 0.0, - "learning_rate": 1.937796308214025e-06, - "loss": 0.7716, + "learning_rate": 1.9592027770104905e-06, + "loss": 0.7489, "step": 28352 }, { - "epoch": 0.8045686719636777, + "epoch": 0.8034515004675677, "grad_norm": 0.0, - "learning_rate": 1.9372526015772296e-06, - "loss": 0.7305, + "learning_rate": 1.9586571654367737e-06, + "loss": 0.8121, "step": 28353 }, { - "epoch": 0.8045970488081725, + "epoch": 0.8034798379098302, "grad_norm": 0.0, - "learning_rate": 1.936708963046826e-06, - "loss": 0.8169, + "learning_rate": 1.9581116215982743e-06, + "loss": 0.7269, "step": 28354 }, { - "epoch": 0.8046254256526674, + "epoch": 0.8035081753520927, "grad_norm": 0.0, - "learning_rate": 1.9361653926274016e-06, - "loss": 0.8695, + "learning_rate": 1.9575661454995877e-06, + "loss": 0.7795, "step": 28355 }, { - "epoch": 0.8046538024971623, + "epoch": 0.8035365127943552, "grad_norm": 0.0, - "learning_rate": 1.9356218903235515e-06, - "loss": 0.7559, + "learning_rate": 1.9570207371453075e-06, + "loss": 0.9281, "step": 28356 }, { - "epoch": 0.8046821793416572, + "epoch": 0.8035648502366176, "grad_norm": 0.0, - "learning_rate": 1.9350784561398685e-06, - "loss": 0.8021, + "learning_rate": 1.956475396540031e-06, + "loss": 0.7192, "step": 28357 }, { - "epoch": 0.8047105561861521, + "epoch": 0.8035931876788801, "grad_norm": 0.0, - "learning_rate": 1.934535090080937e-06, - "loss": 0.7853, + "learning_rate": 1.9559301236883454e-06, + "loss": 0.8565, "step": 28358 }, { - "epoch": 0.804738933030647, + "epoch": 0.8036215251211426, "grad_norm": 0.0, - "learning_rate": 1.9339917921513494e-06, - "loss": 0.7955, + "learning_rate": 1.9553849185948514e-06, + "loss": 0.6497, "step": 28359 }, { - "epoch": 0.8047673098751419, + "epoch": 0.803649862563405, "grad_norm": 0.0, - "learning_rate": 1.9334485623556977e-06, - "loss": 0.8368, + "learning_rate": 1.954839781264135e-06, + "loss": 0.7893, "step": 28360 }, { - "epoch": 0.8047956867196367, + "epoch": 0.8036782000056675, "grad_norm": 0.0, - "learning_rate": 1.932905400698567e-06, - "loss": 0.8009, + "learning_rate": 1.9542947117007894e-06, + "loss": 0.9075, "step": 28361 }, { - "epoch": 0.8048240635641317, + "epoch": 0.80370653744793, "grad_norm": 0.0, - "learning_rate": 1.932362307184548e-06, - "loss": 0.8131, + "learning_rate": 1.9537497099094104e-06, + "loss": 0.7892, "step": 28362 }, { - "epoch": 0.8048524404086266, + "epoch": 0.8037348748901925, "grad_norm": 0.0, - "learning_rate": 1.931819281818229e-06, - "loss": 0.8371, + "learning_rate": 1.953204775894584e-06, + "loss": 0.8381, "step": 28363 }, { - "epoch": 0.8048808172531214, + "epoch": 0.8037632123324548, "grad_norm": 0.0, - "learning_rate": 1.9312763246041934e-06, - "loss": 0.7793, + "learning_rate": 1.9526599096609e-06, + "loss": 0.8076, "step": 28364 }, { - "epoch": 0.8049091940976163, + "epoch": 0.8037915497747173, "grad_norm": 0.0, - "learning_rate": 1.9307334355470285e-06, - "loss": 0.7682, + "learning_rate": 1.952115111212953e-06, + "loss": 0.8724, "step": 28365 }, { - "epoch": 0.8049375709421113, + "epoch": 0.8038198872169798, "grad_norm": 0.0, - "learning_rate": 1.930190614651325e-06, - "loss": 0.8564, + "learning_rate": 1.9515703805553277e-06, + "loss": 0.8456, "step": 28366 }, { - "epoch": 0.8049659477866061, + "epoch": 0.8038482246592422, "grad_norm": 0.0, - "learning_rate": 1.9296478619216608e-06, - "loss": 0.7962, + "learning_rate": 1.9510257176926117e-06, + "loss": 0.8114, "step": 28367 }, { - "epoch": 0.804994324631101, + "epoch": 0.8038765621015047, "grad_norm": 0.0, - "learning_rate": 1.9291051773626236e-06, - "loss": 0.7649, + "learning_rate": 1.9504811226293964e-06, + "loss": 0.8231, "step": 28368 }, { - "epoch": 0.805022701475596, + "epoch": 0.8039048995437672, "grad_norm": 0.0, - "learning_rate": 1.9285625609788005e-06, - "loss": 0.8343, + "learning_rate": 1.9499365953702678e-06, + "loss": 0.7719, "step": 28369 }, { - "epoch": 0.8050510783200908, + "epoch": 0.8039332369860296, "grad_norm": 0.0, - "learning_rate": 1.9280200127747706e-06, - "loss": 0.7987, + "learning_rate": 1.949392135919814e-06, + "loss": 0.733, "step": 28370 }, { - "epoch": 0.8050794551645857, + "epoch": 0.8039615744282921, "grad_norm": 0.0, - "learning_rate": 1.9274775327551176e-06, - "loss": 0.8013, + "learning_rate": 1.9488477442826183e-06, + "loss": 0.8317, "step": 28371 }, { - "epoch": 0.8051078320090805, + "epoch": 0.8039899118705546, "grad_norm": 0.0, - "learning_rate": 1.926935120924427e-06, - "loss": 0.7952, + "learning_rate": 1.948303420463268e-06, + "loss": 0.8102, "step": 28372 }, { - "epoch": 0.8051362088535755, + "epoch": 0.8040182493128171, "grad_norm": 0.0, - "learning_rate": 1.9263927772872757e-06, - "loss": 0.7396, + "learning_rate": 1.9477591644663496e-06, + "loss": 0.9002, "step": 28373 }, { - "epoch": 0.8051645856980704, + "epoch": 0.8040465867550795, "grad_norm": 0.0, - "learning_rate": 1.925850501848249e-06, - "loss": 0.8996, + "learning_rate": 1.947214976296443e-06, + "loss": 0.8601, "step": 28374 }, { - "epoch": 0.8051929625425652, + "epoch": 0.804074924197342, "grad_norm": 0.0, - "learning_rate": 1.9253082946119252e-06, - "loss": 0.7275, + "learning_rate": 1.946670855958138e-06, + "loss": 0.7625, "step": 28375 }, { - "epoch": 0.8052213393870602, + "epoch": 0.8041032616396044, "grad_norm": 0.0, - "learning_rate": 1.9247661555828844e-06, - "loss": 0.8087, + "learning_rate": 1.946126803456013e-06, + "loss": 0.7308, "step": 28376 }, { - "epoch": 0.8052497162315551, + "epoch": 0.8041315990818668, "grad_norm": 0.0, - "learning_rate": 1.9242240847657113e-06, - "loss": 0.8684, + "learning_rate": 1.945582818794652e-06, + "loss": 0.8467, "step": 28377 }, { - "epoch": 0.8052780930760499, + "epoch": 0.8041599365241293, "grad_norm": 0.0, - "learning_rate": 1.923682082164976e-06, - "loss": 0.6896, + "learning_rate": 1.9450389019786407e-06, + "loss": 0.8591, "step": 28378 }, { - "epoch": 0.8053064699205449, + "epoch": 0.8041882739663918, "grad_norm": 0.0, - "learning_rate": 1.9231401477852617e-06, - "loss": 0.8017, + "learning_rate": 1.944495053012555e-06, + "loss": 0.7797, "step": 28379 }, { - "epoch": 0.8053348467650397, + "epoch": 0.8042166114086543, "grad_norm": 0.0, - "learning_rate": 1.9225982816311484e-06, - "loss": 0.7514, + "learning_rate": 1.943951271900979e-06, + "loss": 0.9085, "step": 28380 }, { - "epoch": 0.8053632236095346, + "epoch": 0.8042449488509167, "grad_norm": 0.0, - "learning_rate": 1.9220564837072086e-06, - "loss": 0.8227, + "learning_rate": 1.9434075586484922e-06, + "loss": 0.8677, "step": 28381 }, { - "epoch": 0.8053916004540295, + "epoch": 0.8042732862931792, "grad_norm": 0.0, - "learning_rate": 1.9215147540180203e-06, - "loss": 0.7982, + "learning_rate": 1.9428639132596774e-06, + "loss": 0.7141, "step": 28382 }, { - "epoch": 0.8054199772985244, + "epoch": 0.8043016237354417, "grad_norm": 0.0, - "learning_rate": 1.9209730925681623e-06, - "loss": 0.8592, + "learning_rate": 1.942320335739112e-06, + "loss": 0.8048, "step": 28383 }, { - "epoch": 0.8054483541430193, + "epoch": 0.8043299611777041, "grad_norm": 0.0, - "learning_rate": 1.920431499362205e-06, - "loss": 0.8997, + "learning_rate": 1.9417768260913726e-06, + "loss": 0.9076, "step": 28384 }, { - "epoch": 0.8054767309875142, + "epoch": 0.8043582986199665, "grad_norm": 0.0, - "learning_rate": 1.919889974404727e-06, - "loss": 0.7743, + "learning_rate": 1.941233384321041e-06, + "loss": 0.7601, "step": 28385 }, { - "epoch": 0.8055051078320091, + "epoch": 0.804386636062229, "grad_norm": 0.0, - "learning_rate": 1.9193485177003037e-06, - "loss": 0.8297, + "learning_rate": 1.9406900104326944e-06, + "loss": 0.8434, "step": 28386 }, { - "epoch": 0.805533484676504, + "epoch": 0.8044149735044915, "grad_norm": 0.0, - "learning_rate": 1.918807129253504e-06, - "loss": 0.8935, + "learning_rate": 1.9401467044309054e-06, + "loss": 0.7891, "step": 28387 }, { - "epoch": 0.8055618615209988, + "epoch": 0.8044433109467539, "grad_norm": 0.0, - "learning_rate": 1.9182658090689043e-06, - "loss": 0.8513, + "learning_rate": 1.939603466320257e-06, + "loss": 0.7388, "step": 28388 }, { - "epoch": 0.8055902383654937, + "epoch": 0.8044716483890164, "grad_norm": 0.0, - "learning_rate": 1.9177245571510784e-06, - "loss": 0.7709, + "learning_rate": 1.9390602961053194e-06, + "loss": 0.761, "step": 28389 }, { - "epoch": 0.8056186152099887, + "epoch": 0.8044999858312789, "grad_norm": 0.0, - "learning_rate": 1.917183373504592e-06, - "loss": 0.7971, + "learning_rate": 1.93851719379067e-06, + "loss": 0.8247, "step": 28390 }, { - "epoch": 0.8056469920544835, + "epoch": 0.8045283232735413, "grad_norm": 0.0, - "learning_rate": 1.916642258134026e-06, - "loss": 0.8261, + "learning_rate": 1.9379741593808865e-06, + "loss": 0.7778, "step": 28391 }, { - "epoch": 0.8056753688989784, + "epoch": 0.8045566607158038, "grad_norm": 0.0, - "learning_rate": 1.9161012110439424e-06, - "loss": 0.9006, + "learning_rate": 1.937431192880537e-06, + "loss": 0.7858, "step": 28392 }, { - "epoch": 0.8057037457434734, + "epoch": 0.8045849981580663, "grad_norm": 0.0, - "learning_rate": 1.9155602322389167e-06, - "loss": 0.8347, + "learning_rate": 1.9368882942941992e-06, + "loss": 0.8848, "step": 28393 }, { - "epoch": 0.8057321225879682, + "epoch": 0.8046133356003287, "grad_norm": 0.0, - "learning_rate": 1.915019321723519e-06, - "loss": 0.8166, + "learning_rate": 1.9363454636264455e-06, + "loss": 0.7411, "step": 28394 }, { - "epoch": 0.8057604994324631, + "epoch": 0.8046416730425912, "grad_norm": 0.0, - "learning_rate": 1.914478479502315e-06, - "loss": 0.8437, + "learning_rate": 1.935802700881848e-06, + "loss": 0.8004, "step": 28395 }, { - "epoch": 0.805788876276958, + "epoch": 0.8046700104848536, "grad_norm": 0.0, - "learning_rate": 1.9139377055798736e-06, - "loss": 0.8032, + "learning_rate": 1.935260006064983e-06, + "loss": 0.7778, "step": 28396 }, { - "epoch": 0.8058172531214529, + "epoch": 0.8046983479271161, "grad_norm": 0.0, - "learning_rate": 1.913396999960767e-06, - "loss": 0.842, + "learning_rate": 1.934717379180413e-06, + "loss": 0.8773, "step": 28397 }, { - "epoch": 0.8058456299659478, + "epoch": 0.8047266853693785, "grad_norm": 0.0, - "learning_rate": 1.9128563626495568e-06, - "loss": 0.9083, + "learning_rate": 1.934174820232715e-06, + "loss": 0.887, "step": 28398 }, { - "epoch": 0.8058740068104426, + "epoch": 0.804755022811641, "grad_norm": 0.0, - "learning_rate": 1.912315793650813e-06, - "loss": 0.7935, + "learning_rate": 1.933632329226459e-06, + "loss": 0.7593, "step": 28399 }, { - "epoch": 0.8059023836549376, + "epoch": 0.8047833602539035, "grad_norm": 0.0, - "learning_rate": 1.9117752929691037e-06, - "loss": 0.7944, + "learning_rate": 1.933089906166212e-06, + "loss": 0.843, "step": 28400 }, { - "epoch": 0.8059307604994325, + "epoch": 0.8048116976961659, "grad_norm": 0.0, - "learning_rate": 1.911234860608988e-06, - "loss": 0.9423, + "learning_rate": 1.932547551056544e-06, + "loss": 0.8336, "step": 28401 }, { - "epoch": 0.8059591373439273, + "epoch": 0.8048400351384284, "grad_norm": 0.0, - "learning_rate": 1.9106944965750364e-06, - "loss": 0.8023, + "learning_rate": 1.9320052639020257e-06, + "loss": 0.7788, "step": 28402 }, { - "epoch": 0.8059875141884223, + "epoch": 0.8048683725806909, "grad_norm": 0.0, - "learning_rate": 1.9101542008718144e-06, - "loss": 0.8336, + "learning_rate": 1.9314630447072202e-06, + "loss": 0.7853, "step": 28403 }, { - "epoch": 0.8060158910329172, + "epoch": 0.8048967100229534, "grad_norm": 0.0, - "learning_rate": 1.909613973503881e-06, - "loss": 0.8469, + "learning_rate": 1.930920893476701e-06, + "loss": 0.7485, "step": 28404 }, { - "epoch": 0.806044267877412, + "epoch": 0.8049250474652158, "grad_norm": 0.0, - "learning_rate": 1.9090738144758027e-06, - "loss": 0.8691, + "learning_rate": 1.930378810215029e-06, + "loss": 0.7955, "step": 28405 }, { - "epoch": 0.8060726447219069, + "epoch": 0.8049533849074783, "grad_norm": 0.0, - "learning_rate": 1.9085337237921398e-06, - "loss": 0.8799, + "learning_rate": 1.9298367949267726e-06, + "loss": 0.6941, "step": 28406 }, { - "epoch": 0.8061010215664018, + "epoch": 0.8049817223497407, "grad_norm": 0.0, - "learning_rate": 1.907993701457458e-06, - "loss": 0.801, + "learning_rate": 1.9292948476164976e-06, + "loss": 0.8331, "step": 28407 }, { - "epoch": 0.8061293984108967, + "epoch": 0.8050100597920031, "grad_norm": 0.0, - "learning_rate": 1.907453747476319e-06, - "loss": 0.7741, + "learning_rate": 1.9287529682887685e-06, + "loss": 0.8215, "step": 28408 }, { - "epoch": 0.8061577752553916, + "epoch": 0.8050383972342656, "grad_norm": 0.0, - "learning_rate": 1.906913861853279e-06, - "loss": 0.9736, + "learning_rate": 1.928211156948151e-06, + "loss": 0.8037, "step": 28409 }, { - "epoch": 0.8061861520998865, + "epoch": 0.8050667346765281, "grad_norm": 0.0, - "learning_rate": 1.9063740445929025e-06, - "loss": 0.9189, + "learning_rate": 1.9276694135992115e-06, + "loss": 0.9016, "step": 28410 }, { - "epoch": 0.8062145289443814, + "epoch": 0.8050950721187906, "grad_norm": 0.0, - "learning_rate": 1.90583429569975e-06, - "loss": 0.8704, + "learning_rate": 1.927127738246507e-06, + "loss": 0.8015, "step": 28411 }, { - "epoch": 0.8062429057888763, + "epoch": 0.805123409561053, "grad_norm": 0.0, - "learning_rate": 1.905294615178377e-06, - "loss": 0.7907, + "learning_rate": 1.9265861308946055e-06, + "loss": 0.8407, "step": 28412 }, { - "epoch": 0.8062712826333712, + "epoch": 0.8051517470033155, "grad_norm": 0.0, - "learning_rate": 1.904755003033344e-06, - "loss": 0.6819, + "learning_rate": 1.926044591548064e-06, + "loss": 0.8726, "step": 28413 }, { - "epoch": 0.8062996594778661, + "epoch": 0.805180084445578, "grad_norm": 0.0, - "learning_rate": 1.9042154592692118e-06, - "loss": 0.7981, + "learning_rate": 1.925503120211448e-06, + "loss": 0.8087, "step": 28414 }, { - "epoch": 0.8063280363223609, + "epoch": 0.8052084218878404, "grad_norm": 0.0, - "learning_rate": 1.9036759838905329e-06, - "loss": 0.746, + "learning_rate": 1.924961716889321e-06, + "loss": 0.8925, "step": 28415 }, { - "epoch": 0.8063564131668558, + "epoch": 0.8052367593301029, "grad_norm": 0.0, - "learning_rate": 1.9031365769018673e-06, - "loss": 0.7727, + "learning_rate": 1.924420381586236e-06, + "loss": 0.7489, "step": 28416 }, { - "epoch": 0.8063847900113508, + "epoch": 0.8052650967723654, "grad_norm": 0.0, - "learning_rate": 1.902597238307774e-06, - "loss": 0.7661, + "learning_rate": 1.923879114306757e-06, + "loss": 0.8089, "step": 28417 }, { - "epoch": 0.8064131668558456, + "epoch": 0.8052934342146277, "grad_norm": 0.0, - "learning_rate": 1.9020579681128027e-06, - "loss": 0.8552, + "learning_rate": 1.9233379150554466e-06, + "loss": 0.8566, "step": 28418 }, { - "epoch": 0.8064415437003405, + "epoch": 0.8053217716568902, "grad_norm": 0.0, - "learning_rate": 1.9015187663215117e-06, - "loss": 0.9178, + "learning_rate": 1.9227967838368566e-06, + "loss": 0.8283, "step": 28419 }, { - "epoch": 0.8064699205448355, + "epoch": 0.8053501090991527, "grad_norm": 0.0, - "learning_rate": 1.9009796329384578e-06, - "loss": 0.7872, + "learning_rate": 1.9222557206555494e-06, + "loss": 0.8073, "step": 28420 }, { - "epoch": 0.8064982973893303, + "epoch": 0.8053784465414152, "grad_norm": 0.0, - "learning_rate": 1.9004405679681893e-06, - "loss": 0.7833, + "learning_rate": 1.9217147255160816e-06, + "loss": 0.8315, "step": 28421 }, { - "epoch": 0.8065266742338252, + "epoch": 0.8054067839836776, "grad_norm": 0.0, - "learning_rate": 1.899901571415268e-06, - "loss": 0.7548, + "learning_rate": 1.9211737984230107e-06, + "loss": 0.9609, "step": 28422 }, { - "epoch": 0.80655505107832, + "epoch": 0.8054351214259401, "grad_norm": 0.0, - "learning_rate": 1.8993626432842394e-06, - "loss": 0.7731, + "learning_rate": 1.9206329393808955e-06, + "loss": 0.7244, "step": 28423 }, { - "epoch": 0.806583427922815, + "epoch": 0.8054634588682026, "grad_norm": 0.0, - "learning_rate": 1.8988237835796586e-06, - "loss": 0.8263, + "learning_rate": 1.920092148394287e-06, + "loss": 0.7892, "step": 28424 }, { - "epoch": 0.8066118047673099, + "epoch": 0.805491796310465, "grad_norm": 0.0, - "learning_rate": 1.8982849923060797e-06, - "loss": 0.713, + "learning_rate": 1.919551425467744e-06, + "loss": 0.7275, "step": 28425 }, { - "epoch": 0.8066401816118047, + "epoch": 0.8055201337527275, "grad_norm": 0.0, - "learning_rate": 1.8977462694680493e-06, - "loss": 0.6675, + "learning_rate": 1.919010770605818e-06, + "loss": 0.8242, "step": 28426 }, { - "epoch": 0.8066685584562997, + "epoch": 0.80554847119499, "grad_norm": 0.0, - "learning_rate": 1.8972076150701212e-06, - "loss": 0.7801, + "learning_rate": 1.918470183813066e-06, + "loss": 0.8296, "step": 28427 }, { - "epoch": 0.8066969353007946, + "epoch": 0.8055768086372525, "grad_norm": 0.0, - "learning_rate": 1.8966690291168467e-06, - "loss": 0.7284, + "learning_rate": 1.9179296650940425e-06, + "loss": 0.8089, "step": 28428 }, { - "epoch": 0.8067253121452894, + "epoch": 0.8056051460795148, "grad_norm": 0.0, - "learning_rate": 1.8961305116127705e-06, - "loss": 0.8225, + "learning_rate": 1.9173892144532957e-06, + "loss": 0.8465, "step": 28429 }, { - "epoch": 0.8067536889897844, + "epoch": 0.8056334835217773, "grad_norm": 0.0, - "learning_rate": 1.8955920625624435e-06, - "loss": 0.7871, + "learning_rate": 1.9168488318953814e-06, + "loss": 0.7544, "step": 28430 }, { - "epoch": 0.8067820658342792, + "epoch": 0.8056618209640398, "grad_norm": 0.0, - "learning_rate": 1.895053681970419e-06, - "loss": 0.8055, + "learning_rate": 1.9163085174248506e-06, + "loss": 0.8003, "step": 28431 }, { - "epoch": 0.8068104426787741, + "epoch": 0.8056901584063022, "grad_norm": 0.0, - "learning_rate": 1.8945153698412367e-06, - "loss": 0.8816, + "learning_rate": 1.9157682710462553e-06, + "loss": 0.8391, "step": 28432 }, { - "epoch": 0.806838819523269, + "epoch": 0.8057184958485647, "grad_norm": 0.0, - "learning_rate": 1.8939771261794471e-06, - "loss": 0.769, + "learning_rate": 1.915228092764149e-06, + "loss": 0.7722, "step": 28433 }, { - "epoch": 0.8068671963677639, + "epoch": 0.8057468332908272, "grad_norm": 0.0, - "learning_rate": 1.8934389509896e-06, - "loss": 0.8602, + "learning_rate": 1.9146879825830753e-06, + "loss": 0.8777, "step": 28434 }, { - "epoch": 0.8068955732122588, + "epoch": 0.8057751707330897, "grad_norm": 0.0, - "learning_rate": 1.8929008442762365e-06, - "loss": 0.9251, + "learning_rate": 1.914147940507587e-06, + "loss": 0.7726, "step": 28435 }, { - "epoch": 0.8069239500567537, + "epoch": 0.8058035081753521, "grad_norm": 0.0, - "learning_rate": 1.8923628060439037e-06, - "loss": 0.8784, + "learning_rate": 1.913607966542236e-06, + "loss": 0.7516, "step": 28436 }, { - "epoch": 0.8069523269012486, + "epoch": 0.8058318456176146, "grad_norm": 0.0, - "learning_rate": 1.8918248362971459e-06, - "loss": 0.7883, + "learning_rate": 1.9130680606915653e-06, + "loss": 0.7173, "step": 28437 }, { - "epoch": 0.8069807037457435, + "epoch": 0.805860183059877, "grad_norm": 0.0, - "learning_rate": 1.8912869350405095e-06, - "loss": 0.8581, + "learning_rate": 1.9125282229601284e-06, + "loss": 0.8951, "step": 28438 }, { - "epoch": 0.8070090805902383, + "epoch": 0.8058885205021394, "grad_norm": 0.0, - "learning_rate": 1.8907491022785385e-06, - "loss": 0.8131, + "learning_rate": 1.911988453352467e-06, + "loss": 0.8766, "step": 28439 }, { - "epoch": 0.8070374574347332, + "epoch": 0.8059168579444019, "grad_norm": 0.0, - "learning_rate": 1.8902113380157715e-06, - "loss": 0.7904, + "learning_rate": 1.9114487518731296e-06, + "loss": 0.9544, "step": 28440 }, { - "epoch": 0.8070658342792282, + "epoch": 0.8059451953866644, "grad_norm": 0.0, - "learning_rate": 1.8896736422567552e-06, - "loss": 0.7719, + "learning_rate": 1.910909118526666e-06, + "loss": 0.7445, "step": 28441 }, { - "epoch": 0.807094211123723, + "epoch": 0.8059735328289268, "grad_norm": 0.0, - "learning_rate": 1.8891360150060323e-06, - "loss": 0.8795, + "learning_rate": 1.9103695533176157e-06, + "loss": 0.7459, "step": 28442 }, { - "epoch": 0.8071225879682179, + "epoch": 0.8060018702711893, "grad_norm": 0.0, - "learning_rate": 1.8885984562681392e-06, - "loss": 0.7653, + "learning_rate": 1.9098300562505266e-06, + "loss": 0.8805, "step": 28443 }, { - "epoch": 0.8071509648127129, + "epoch": 0.8060302077134518, "grad_norm": 0.0, - "learning_rate": 1.888060966047619e-06, - "loss": 0.7522, + "learning_rate": 1.9092906273299427e-06, + "loss": 0.9172, "step": 28444 }, { - "epoch": 0.8071793416572077, + "epoch": 0.8060585451557143, "grad_norm": 0.0, - "learning_rate": 1.887523544349016e-06, - "loss": 0.8265, + "learning_rate": 1.908751266560409e-06, + "loss": 0.801, "step": 28445 }, { - "epoch": 0.8072077185017026, + "epoch": 0.8060868825979767, "grad_norm": 0.0, - "learning_rate": 1.886986191176864e-06, - "loss": 0.7693, + "learning_rate": 1.908211973946471e-06, + "loss": 0.8115, "step": 28446 }, { - "epoch": 0.8072360953461976, + "epoch": 0.8061152200402392, "grad_norm": 0.0, - "learning_rate": 1.8864489065357039e-06, - "loss": 0.8237, + "learning_rate": 1.907672749492665e-06, + "loss": 0.7677, "step": 28447 }, { - "epoch": 0.8072644721906924, + "epoch": 0.8061435574825017, "grad_norm": 0.0, - "learning_rate": 1.8859116904300768e-06, - "loss": 0.7328, + "learning_rate": 1.9071335932035373e-06, + "loss": 0.7986, "step": 28448 }, { - "epoch": 0.8072928490351873, + "epoch": 0.806171894924764, "grad_norm": 0.0, - "learning_rate": 1.8853745428645165e-06, - "loss": 0.7045, + "learning_rate": 1.9065945050836299e-06, + "loss": 0.7994, "step": 28449 }, { - "epoch": 0.8073212258796821, + "epoch": 0.8062002323670265, "grad_norm": 0.0, - "learning_rate": 1.8848374638435618e-06, - "loss": 0.9406, + "learning_rate": 1.9060554851374813e-06, + "loss": 0.837, "step": 28450 }, { - "epoch": 0.8073496027241771, + "epoch": 0.806228569809289, "grad_norm": 0.0, - "learning_rate": 1.8843004533717523e-06, - "loss": 0.8266, + "learning_rate": 1.9055165333696324e-06, + "loss": 0.7935, "step": 28451 }, { - "epoch": 0.807377979568672, + "epoch": 0.8062569072515515, "grad_norm": 0.0, - "learning_rate": 1.8837635114536168e-06, - "loss": 0.822, + "learning_rate": 1.9049776497846251e-06, + "loss": 0.7932, "step": 28452 }, { - "epoch": 0.8074063564131668, + "epoch": 0.8062852446938139, "grad_norm": 0.0, - "learning_rate": 1.883226638093698e-06, - "loss": 0.9123, + "learning_rate": 1.9044388343869958e-06, + "loss": 0.6978, "step": 28453 }, { - "epoch": 0.8074347332576618, + "epoch": 0.8063135821360764, "grad_norm": 0.0, - "learning_rate": 1.8826898332965315e-06, - "loss": 0.7941, + "learning_rate": 1.9039000871812863e-06, + "loss": 0.8028, "step": 28454 }, { - "epoch": 0.8074631101021567, + "epoch": 0.8063419195783389, "grad_norm": 0.0, - "learning_rate": 1.8821530970666468e-06, - "loss": 0.7808, + "learning_rate": 1.9033614081720297e-06, + "loss": 0.7915, "step": 28455 }, { - "epoch": 0.8074914869466515, + "epoch": 0.8063702570206013, "grad_norm": 0.0, - "learning_rate": 1.8816164294085792e-06, - "loss": 0.8346, + "learning_rate": 1.902822797363768e-06, + "loss": 0.8278, "step": 28456 }, { - "epoch": 0.8075198637911464, + "epoch": 0.8063985944628638, "grad_norm": 0.0, - "learning_rate": 1.8810798303268651e-06, - "loss": 0.8574, + "learning_rate": 1.9022842547610354e-06, + "loss": 0.7998, "step": 28457 }, { - "epoch": 0.8075482406356413, + "epoch": 0.8064269319051263, "grad_norm": 0.0, - "learning_rate": 1.880543299826032e-06, - "loss": 0.8424, + "learning_rate": 1.9017457803683704e-06, + "loss": 0.7027, "step": 28458 }, { - "epoch": 0.8075766174801362, + "epoch": 0.8064552693473888, "grad_norm": 0.0, - "learning_rate": 1.8800068379106152e-06, - "loss": 0.7363, + "learning_rate": 1.9012073741903069e-06, + "loss": 0.9221, "step": 28459 }, { - "epoch": 0.8076049943246311, + "epoch": 0.8064836067896511, "grad_norm": 0.0, - "learning_rate": 1.8794704445851474e-06, - "loss": 0.6885, + "learning_rate": 1.900669036231385e-06, + "loss": 0.7655, "step": 28460 }, { - "epoch": 0.807633371169126, + "epoch": 0.8065119442319136, "grad_norm": 0.0, - "learning_rate": 1.8789341198541556e-06, - "loss": 0.8099, + "learning_rate": 1.9001307664961322e-06, + "loss": 0.8473, "step": 28461 }, { - "epoch": 0.8076617480136209, + "epoch": 0.8065402816741761, "grad_norm": 0.0, - "learning_rate": 1.8783978637221755e-06, - "loss": 0.8567, + "learning_rate": 1.899592564989088e-06, + "loss": 0.8581, "step": 28462 }, { - "epoch": 0.8076901248581158, + "epoch": 0.8065686191164385, "grad_norm": 0.0, - "learning_rate": 1.8778616761937308e-06, - "loss": 0.8425, + "learning_rate": 1.8990544317147818e-06, + "loss": 0.7745, "step": 28463 }, { - "epoch": 0.8077185017026107, + "epoch": 0.806596956558701, "grad_norm": 0.0, - "learning_rate": 1.877325557273354e-06, - "loss": 0.8627, + "learning_rate": 1.8985163666777473e-06, + "loss": 0.756, "step": 28464 }, { - "epoch": 0.8077468785471056, + "epoch": 0.8066252940009635, "grad_norm": 0.0, - "learning_rate": 1.876789506965575e-06, - "loss": 0.877, + "learning_rate": 1.8979783698825216e-06, + "loss": 0.8717, "step": 28465 }, { - "epoch": 0.8077752553916004, + "epoch": 0.8066536314432259, "grad_norm": 0.0, - "learning_rate": 1.8762535252749181e-06, - "loss": 0.8118, + "learning_rate": 1.89744044133363e-06, + "loss": 0.8784, "step": 28466 }, { - "epoch": 0.8078036322360953, + "epoch": 0.8066819688854884, "grad_norm": 0.0, - "learning_rate": 1.8757176122059106e-06, - "loss": 0.8911, + "learning_rate": 1.896902581035608e-06, + "loss": 0.8716, "step": 28467 }, { - "epoch": 0.8078320090805903, + "epoch": 0.8067103063277509, "grad_norm": 0.0, - "learning_rate": 1.8751817677630857e-06, - "loss": 0.757, + "learning_rate": 1.8963647889929826e-06, + "loss": 0.8239, "step": 28468 }, { - "epoch": 0.8078603859250851, + "epoch": 0.8067386437700134, "grad_norm": 0.0, - "learning_rate": 1.874645991950964e-06, - "loss": 0.7889, + "learning_rate": 1.8958270652102862e-06, + "loss": 0.7481, "step": 28469 }, { - "epoch": 0.80788876276958, + "epoch": 0.8067669812122757, "grad_norm": 0.0, - "learning_rate": 1.8741102847740734e-06, - "loss": 0.7717, + "learning_rate": 1.8952894096920472e-06, + "loss": 0.8252, "step": 28470 }, { - "epoch": 0.807917139614075, + "epoch": 0.8067953186545382, "grad_norm": 0.0, - "learning_rate": 1.8735746462369398e-06, - "loss": 0.7262, + "learning_rate": 1.8947518224427951e-06, + "loss": 0.7386, "step": 28471 }, { - "epoch": 0.8079455164585698, + "epoch": 0.8068236560968007, "grad_norm": 0.0, - "learning_rate": 1.8730390763440853e-06, - "loss": 0.8152, + "learning_rate": 1.894214303467058e-06, + "loss": 0.7472, "step": 28472 }, { - "epoch": 0.8079738933030647, + "epoch": 0.8068519935390631, "grad_norm": 0.0, - "learning_rate": 1.8725035751000342e-06, - "loss": 0.7587, + "learning_rate": 1.8936768527693673e-06, + "loss": 0.8141, "step": 28473 }, { - "epoch": 0.8080022701475595, + "epoch": 0.8068803309813256, "grad_norm": 0.0, - "learning_rate": 1.8719681425093127e-06, - "loss": 0.7244, + "learning_rate": 1.8931394703542437e-06, + "loss": 0.8249, "step": 28474 }, { - "epoch": 0.8080306469920545, + "epoch": 0.8069086684235881, "grad_norm": 0.0, - "learning_rate": 1.8714327785764397e-06, - "loss": 0.8487, + "learning_rate": 1.8926021562262187e-06, + "loss": 0.7908, "step": 28475 }, { - "epoch": 0.8080590238365494, + "epoch": 0.8069370058658506, "grad_norm": 0.0, - "learning_rate": 1.87089748330594e-06, - "loss": 0.8441, + "learning_rate": 1.8920649103898148e-06, + "loss": 0.8365, "step": 28476 }, { - "epoch": 0.8080874006810442, + "epoch": 0.806965343308113, "grad_norm": 0.0, - "learning_rate": 1.8703622567023373e-06, - "loss": 0.823, + "learning_rate": 1.8915277328495584e-06, + "loss": 0.8578, "step": 28477 }, { - "epoch": 0.8081157775255392, + "epoch": 0.8069936807503755, "grad_norm": 0.0, - "learning_rate": 1.8698270987701462e-06, - "loss": 0.681, + "learning_rate": 1.8909906236099774e-06, + "loss": 0.8756, "step": 28478 }, { - "epoch": 0.8081441543700341, + "epoch": 0.807022018192638, "grad_norm": 0.0, - "learning_rate": 1.8692920095138923e-06, - "loss": 0.8923, + "learning_rate": 1.890453582675591e-06, + "loss": 0.8282, "step": 28479 }, { - "epoch": 0.8081725312145289, + "epoch": 0.8070503556349004, "grad_norm": 0.0, - "learning_rate": 1.8687569889380962e-06, - "loss": 0.7967, + "learning_rate": 1.889916610050926e-06, + "loss": 0.8993, "step": 28480 }, { - "epoch": 0.8082009080590238, + "epoch": 0.8070786930771628, "grad_norm": 0.0, - "learning_rate": 1.8682220370472737e-06, - "loss": 0.8456, + "learning_rate": 1.8893797057405072e-06, + "loss": 0.8537, "step": 28481 }, { - "epoch": 0.8082292849035188, + "epoch": 0.8071070305194253, "grad_norm": 0.0, - "learning_rate": 1.867687153845944e-06, - "loss": 0.7803, + "learning_rate": 1.8888428697488525e-06, + "loss": 0.6697, "step": 28482 }, { - "epoch": 0.8082576617480136, + "epoch": 0.8071353679616878, "grad_norm": 0.0, - "learning_rate": 1.8671523393386272e-06, - "loss": 0.8784, + "learning_rate": 1.888306102080486e-06, + "loss": 0.8183, "step": 28483 }, { - "epoch": 0.8082860385925085, + "epoch": 0.8071637054039502, "grad_norm": 0.0, - "learning_rate": 1.8666175935298393e-06, - "loss": 0.8527, + "learning_rate": 1.8877694027399296e-06, + "loss": 0.7586, "step": 28484 }, { - "epoch": 0.8083144154370034, + "epoch": 0.8071920428462127, "grad_norm": 0.0, - "learning_rate": 1.8660829164241023e-06, - "loss": 0.8917, + "learning_rate": 1.887232771731704e-06, + "loss": 0.7153, "step": 28485 }, { - "epoch": 0.8083427922814983, + "epoch": 0.8072203802884752, "grad_norm": 0.0, - "learning_rate": 1.8655483080259252e-06, - "loss": 0.8616, + "learning_rate": 1.8866962090603314e-06, + "loss": 0.8432, "step": 28486 }, { - "epoch": 0.8083711691259932, + "epoch": 0.8072487177307376, "grad_norm": 0.0, - "learning_rate": 1.8650137683398261e-06, - "loss": 0.7874, + "learning_rate": 1.886159714730328e-06, + "loss": 0.8187, "step": 28487 }, { - "epoch": 0.8083995459704881, + "epoch": 0.8072770551730001, "grad_norm": 0.0, - "learning_rate": 1.8644792973703252e-06, - "loss": 0.7945, + "learning_rate": 1.8856232887462134e-06, + "loss": 0.68, "step": 28488 }, { - "epoch": 0.808427922814983, + "epoch": 0.8073053926152626, "grad_norm": 0.0, - "learning_rate": 1.8639448951219308e-06, - "loss": 0.751, + "learning_rate": 1.8850869311125098e-06, + "loss": 0.7888, "step": 28489 }, { - "epoch": 0.8084562996594779, + "epoch": 0.807333730057525, "grad_norm": 0.0, - "learning_rate": 1.8634105615991592e-06, - "loss": 0.8166, + "learning_rate": 1.88455064183373e-06, + "loss": 0.7726, "step": 28490 }, { - "epoch": 0.8084846765039727, + "epoch": 0.8073620674997875, "grad_norm": 0.0, - "learning_rate": 1.8628762968065272e-06, - "loss": 0.9382, + "learning_rate": 1.8840144209143963e-06, + "loss": 0.7548, "step": 28491 }, { - "epoch": 0.8085130533484677, + "epoch": 0.8073904049420499, "grad_norm": 0.0, - "learning_rate": 1.8623421007485431e-06, - "loss": 0.7435, + "learning_rate": 1.8834782683590202e-06, + "loss": 0.8792, "step": 28492 }, { - "epoch": 0.8085414301929625, + "epoch": 0.8074187423843124, "grad_norm": 0.0, - "learning_rate": 1.86180797342972e-06, - "loss": 0.7484, + "learning_rate": 1.8829421841721206e-06, + "loss": 0.8982, "step": 28493 }, { - "epoch": 0.8085698070374574, + "epoch": 0.8074470798265748, "grad_norm": 0.0, - "learning_rate": 1.8612739148545733e-06, - "loss": 0.8698, + "learning_rate": 1.882406168358215e-06, + "loss": 0.8582, "step": 28494 }, { - "epoch": 0.8085981838819524, + "epoch": 0.8074754172688373, "grad_norm": 0.0, - "learning_rate": 1.8607399250276104e-06, - "loss": 0.8137, + "learning_rate": 1.8818702209218153e-06, + "loss": 0.8009, "step": 28495 }, { - "epoch": 0.8086265607264472, + "epoch": 0.8075037547110998, "grad_norm": 0.0, - "learning_rate": 1.860206003953342e-06, - "loss": 0.9097, + "learning_rate": 1.8813343418674367e-06, + "loss": 0.817, "step": 28496 }, { - "epoch": 0.8086549375709421, + "epoch": 0.8075320921533622, "grad_norm": 0.0, - "learning_rate": 1.8596721516362825e-06, - "loss": 0.9088, + "learning_rate": 1.8807985311995948e-06, + "loss": 0.8222, "step": 28497 }, { - "epoch": 0.808683314415437, + "epoch": 0.8075604295956247, "grad_norm": 0.0, - "learning_rate": 1.8591383680809327e-06, - "loss": 0.684, + "learning_rate": 1.8802627889228008e-06, + "loss": 0.8287, "step": 28498 }, { - "epoch": 0.8087116912599319, + "epoch": 0.8075887670378872, "grad_norm": 0.0, - "learning_rate": 1.8586046532918123e-06, - "loss": 0.7933, + "learning_rate": 1.8797271150415709e-06, + "loss": 0.9437, "step": 28499 }, { - "epoch": 0.8087400681044268, + "epoch": 0.8076171044801497, "grad_norm": 0.0, - "learning_rate": 1.858071007273422e-06, - "loss": 0.6893, + "learning_rate": 1.879191509560413e-06, + "loss": 0.8644, "step": 28500 }, { - "epoch": 0.8087684449489216, + "epoch": 0.807645441922412, "grad_norm": 0.0, - "learning_rate": 1.8575374300302717e-06, - "loss": 0.8233, + "learning_rate": 1.8786559724838405e-06, + "loss": 0.69, "step": 28501 }, { - "epoch": 0.8087968217934166, + "epoch": 0.8076737793646745, "grad_norm": 0.0, - "learning_rate": 1.8570039215668712e-06, - "loss": 0.8203, + "learning_rate": 1.8781205038163663e-06, + "loss": 0.7253, "step": 28502 }, { - "epoch": 0.8088251986379115, + "epoch": 0.807702116806937, "grad_norm": 0.0, - "learning_rate": 1.8564704818877222e-06, - "loss": 0.6585, + "learning_rate": 1.877585103562497e-06, + "loss": 0.7714, "step": 28503 }, { - "epoch": 0.8088535754824063, + "epoch": 0.8077304542491994, "grad_norm": 0.0, - "learning_rate": 1.8559371109973335e-06, - "loss": 0.7342, + "learning_rate": 1.8770497717267477e-06, + "loss": 0.9132, "step": 28504 }, { - "epoch": 0.8088819523269013, + "epoch": 0.8077587916914619, "grad_norm": 0.0, - "learning_rate": 1.8554038089002125e-06, - "loss": 0.8412, + "learning_rate": 1.8765145083136216e-06, + "loss": 0.9194, "step": 28505 }, { - "epoch": 0.8089103291713962, + "epoch": 0.8077871291337244, "grad_norm": 0.0, - "learning_rate": 1.854870575600859e-06, - "loss": 0.9006, + "learning_rate": 1.8759793133276306e-06, + "loss": 0.9137, "step": 28506 }, { - "epoch": 0.808938706015891, + "epoch": 0.8078154665759869, "grad_norm": 0.0, - "learning_rate": 1.8543374111037804e-06, - "loss": 0.8184, + "learning_rate": 1.8754441867732842e-06, + "loss": 0.8407, "step": 28507 }, { - "epoch": 0.8089670828603859, + "epoch": 0.8078438040182493, "grad_norm": 0.0, - "learning_rate": 1.853804315413481e-06, - "loss": 0.7124, + "learning_rate": 1.874909128655087e-06, + "loss": 0.7618, "step": 28508 }, { - "epoch": 0.8089954597048808, + "epoch": 0.8078721414605118, "grad_norm": 0.0, - "learning_rate": 1.8532712885344616e-06, - "loss": 0.8624, + "learning_rate": 1.8743741389775472e-06, + "loss": 0.7835, "step": 28509 }, { - "epoch": 0.8090238365493757, + "epoch": 0.8079004789027743, "grad_norm": 0.0, - "learning_rate": 1.8527383304712254e-06, - "loss": 0.8652, + "learning_rate": 1.8738392177451703e-06, + "loss": 0.7291, "step": 28510 }, { - "epoch": 0.8090522133938706, + "epoch": 0.8079288163450367, "grad_norm": 0.0, - "learning_rate": 1.8522054412282775e-06, - "loss": 0.9023, + "learning_rate": 1.873304364962465e-06, + "loss": 0.6626, "step": 28511 }, { - "epoch": 0.8090805902383655, + "epoch": 0.8079571537872992, "grad_norm": 0.0, - "learning_rate": 1.8516726208101133e-06, - "loss": 0.7691, + "learning_rate": 1.8727695806339363e-06, + "loss": 0.801, "step": 28512 }, { - "epoch": 0.8091089670828604, + "epoch": 0.8079854912295616, "grad_norm": 0.0, - "learning_rate": 1.8511398692212379e-06, - "loss": 0.8327, + "learning_rate": 1.8722348647640842e-06, + "loss": 0.7979, "step": 28513 }, { - "epoch": 0.8091373439273553, + "epoch": 0.808013828671824, "grad_norm": 0.0, - "learning_rate": 1.850607186466149e-06, - "loss": 0.8799, + "learning_rate": 1.8717002173574173e-06, + "loss": 0.7914, "step": 28514 }, { - "epoch": 0.8091657207718501, + "epoch": 0.8080421661140865, "grad_norm": 0.0, - "learning_rate": 1.8500745725493485e-06, - "loss": 0.8098, + "learning_rate": 1.8711656384184396e-06, + "loss": 0.8878, "step": 28515 }, { - "epoch": 0.8091940976163451, + "epoch": 0.808070503556349, "grad_norm": 0.0, - "learning_rate": 1.8495420274753362e-06, - "loss": 0.848, + "learning_rate": 1.8706311279516499e-06, + "loss": 0.88, "step": 28516 }, { - "epoch": 0.80922247446084, + "epoch": 0.8080988409986115, "grad_norm": 0.0, - "learning_rate": 1.8490095512486072e-06, - "loss": 0.8673, + "learning_rate": 1.8700966859615533e-06, + "loss": 0.7991, "step": 28517 }, { - "epoch": 0.8092508513053348, + "epoch": 0.8081271784408739, "grad_norm": 0.0, - "learning_rate": 1.8484771438736604e-06, - "loss": 0.8039, + "learning_rate": 1.8695623124526541e-06, + "loss": 0.9601, "step": 28518 }, { - "epoch": 0.8092792281498298, + "epoch": 0.8081555158831364, "grad_norm": 0.0, - "learning_rate": 1.8479448053549965e-06, - "loss": 0.7653, + "learning_rate": 1.8690280074294475e-06, + "loss": 0.8395, "step": 28519 }, { - "epoch": 0.8093076049943246, + "epoch": 0.8081838533253989, "grad_norm": 0.0, - "learning_rate": 1.8474125356971061e-06, - "loss": 0.8393, + "learning_rate": 1.8684937708964402e-06, + "loss": 0.7484, "step": 28520 }, { - "epoch": 0.8093359818388195, + "epoch": 0.8082121907676613, "grad_norm": 0.0, - "learning_rate": 1.8468803349044894e-06, - "loss": 0.854, + "learning_rate": 1.8679596028581271e-06, + "loss": 0.7794, "step": 28521 }, { - "epoch": 0.8093643586833145, + "epoch": 0.8082405282099238, "grad_norm": 0.0, - "learning_rate": 1.8463482029816427e-06, - "loss": 0.8133, + "learning_rate": 1.867425503319009e-06, + "loss": 0.8037, "step": 28522 }, { - "epoch": 0.8093927355278093, + "epoch": 0.8082688656521863, "grad_norm": 0.0, - "learning_rate": 1.8458161399330565e-06, - "loss": 0.6878, + "learning_rate": 1.8668914722835873e-06, + "loss": 0.8232, "step": 28523 }, { - "epoch": 0.8094211123723042, + "epoch": 0.8082972030944487, "grad_norm": 0.0, - "learning_rate": 1.8452841457632287e-06, - "loss": 0.7716, + "learning_rate": 1.866357509756358e-06, + "loss": 0.7739, "step": 28524 }, { - "epoch": 0.809449489216799, + "epoch": 0.8083255405367111, "grad_norm": 0.0, - "learning_rate": 1.8447522204766545e-06, - "loss": 0.6482, + "learning_rate": 1.865823615741822e-06, + "loss": 0.8165, "step": 28525 }, { - "epoch": 0.809477866061294, + "epoch": 0.8083538779789736, "grad_norm": 0.0, - "learning_rate": 1.8442203640778222e-06, - "loss": 0.8826, + "learning_rate": 1.8652897902444721e-06, + "loss": 0.8069, "step": 28526 }, { - "epoch": 0.8095062429057889, + "epoch": 0.8083822154212361, "grad_norm": 0.0, - "learning_rate": 1.8436885765712277e-06, - "loss": 0.7833, + "learning_rate": 1.8647560332688076e-06, + "loss": 0.7755, "step": 28527 }, { - "epoch": 0.8095346197502837, + "epoch": 0.8084105528634985, "grad_norm": 0.0, - "learning_rate": 1.8431568579613624e-06, - "loss": 0.8098, + "learning_rate": 1.8642223448193253e-06, + "loss": 0.7341, "step": 28528 }, { - "epoch": 0.8095629965947787, + "epoch": 0.808438890305761, "grad_norm": 0.0, - "learning_rate": 1.8426252082527175e-06, - "loss": 0.8568, + "learning_rate": 1.8636887249005176e-06, + "loss": 0.9131, "step": 28529 }, { - "epoch": 0.8095913734392736, + "epoch": 0.8084672277480235, "grad_norm": 0.0, - "learning_rate": 1.842093627449787e-06, - "loss": 0.8909, + "learning_rate": 1.8631551735168806e-06, + "loss": 0.9237, "step": 28530 }, { - "epoch": 0.8096197502837684, + "epoch": 0.8084955651902859, "grad_norm": 0.0, - "learning_rate": 1.841562115557055e-06, - "loss": 0.8388, + "learning_rate": 1.8626216906729123e-06, + "loss": 0.7023, "step": 28531 }, { - "epoch": 0.8096481271282633, + "epoch": 0.8085239026325484, "grad_norm": 0.0, - "learning_rate": 1.8410306725790151e-06, - "loss": 0.8279, + "learning_rate": 1.8620882763731008e-06, + "loss": 0.761, "step": 28532 }, { - "epoch": 0.8096765039727583, + "epoch": 0.8085522400748109, "grad_norm": 0.0, - "learning_rate": 1.8404992985201587e-06, - "loss": 0.7871, + "learning_rate": 1.8615549306219438e-06, + "loss": 0.7398, "step": 28533 }, { - "epoch": 0.8097048808172531, + "epoch": 0.8085805775170734, "grad_norm": 0.0, - "learning_rate": 1.839967993384969e-06, - "loss": 0.7056, + "learning_rate": 1.861021653423929e-06, + "loss": 0.7489, "step": 28534 }, { - "epoch": 0.809733257661748, + "epoch": 0.8086089149593357, "grad_norm": 0.0, - "learning_rate": 1.839436757177936e-06, - "loss": 0.7133, + "learning_rate": 1.8604884447835515e-06, + "loss": 0.7755, "step": 28535 }, { - "epoch": 0.8097616345062429, + "epoch": 0.8086372524015982, "grad_norm": 0.0, - "learning_rate": 1.83890558990355e-06, - "loss": 0.8429, + "learning_rate": 1.8599553047053032e-06, + "loss": 0.7096, "step": 28536 }, { - "epoch": 0.8097900113507378, + "epoch": 0.8086655898438607, "grad_norm": 0.0, - "learning_rate": 1.8383744915662927e-06, - "loss": 0.8392, + "learning_rate": 1.8594222331936728e-06, + "loss": 0.8338, "step": 28537 }, { - "epoch": 0.8098183881952327, + "epoch": 0.8086939272861231, "grad_norm": 0.0, - "learning_rate": 1.8378434621706542e-06, - "loss": 0.8404, + "learning_rate": 1.8588892302531525e-06, + "loss": 0.8666, "step": 28538 }, { - "epoch": 0.8098467650397276, + "epoch": 0.8087222647283856, "grad_norm": 0.0, - "learning_rate": 1.8373125017211201e-06, - "loss": 0.8462, + "learning_rate": 1.8583562958882329e-06, + "loss": 0.8711, "step": 28539 }, { - "epoch": 0.8098751418842225, + "epoch": 0.8087506021706481, "grad_norm": 0.0, - "learning_rate": 1.8367816102221724e-06, - "loss": 0.7194, + "learning_rate": 1.8578234301034004e-06, + "loss": 0.8247, "step": 28540 }, { - "epoch": 0.8099035187287174, + "epoch": 0.8087789396129106, "grad_norm": 0.0, - "learning_rate": 1.8362507876782964e-06, - "loss": 0.87, + "learning_rate": 1.857290632903146e-06, + "loss": 0.8364, "step": 28541 }, { - "epoch": 0.8099318955732122, + "epoch": 0.808807277055173, "grad_norm": 0.0, - "learning_rate": 1.8357200340939807e-06, - "loss": 0.8234, + "learning_rate": 1.8567579042919548e-06, + "loss": 0.8435, "step": 28542 }, { - "epoch": 0.8099602724177072, + "epoch": 0.8088356144974355, "grad_norm": 0.0, - "learning_rate": 1.8351893494737017e-06, - "loss": 0.8348, + "learning_rate": 1.8562252442743156e-06, + "loss": 0.776, "step": 28543 }, { - "epoch": 0.809988649262202, + "epoch": 0.808863951939698, "grad_norm": 0.0, - "learning_rate": 1.8346587338219456e-06, - "loss": 0.8615, + "learning_rate": 1.855692652854717e-06, + "loss": 0.7802, "step": 28544 }, { - "epoch": 0.8100170261066969, + "epoch": 0.8088922893819603, "grad_norm": 0.0, - "learning_rate": 1.8341281871431947e-06, - "loss": 0.8092, + "learning_rate": 1.855160130037641e-06, + "loss": 0.8467, "step": 28545 }, { - "epoch": 0.8100454029511919, + "epoch": 0.8089206268242228, "grad_norm": 0.0, - "learning_rate": 1.8335977094419288e-06, - "loss": 0.7958, + "learning_rate": 1.854627675827576e-06, + "loss": 0.7268, "step": 28546 }, { - "epoch": 0.8100737797956867, + "epoch": 0.8089489642664853, "grad_norm": 0.0, - "learning_rate": 1.8330673007226341e-06, - "loss": 0.7917, + "learning_rate": 1.85409529022901e-06, + "loss": 0.8524, "step": 28547 }, { - "epoch": 0.8101021566401816, + "epoch": 0.8089773017087478, "grad_norm": 0.0, - "learning_rate": 1.8325369609897837e-06, - "loss": 0.8015, + "learning_rate": 1.8535629732464211e-06, + "loss": 0.7778, "step": 28548 }, { - "epoch": 0.8101305334846765, + "epoch": 0.8090056391510102, "grad_norm": 0.0, - "learning_rate": 1.8320066902478606e-06, - "loss": 0.8567, + "learning_rate": 1.853030724884297e-06, + "loss": 0.8227, "step": 28549 }, { - "epoch": 0.8101589103291714, + "epoch": 0.8090339765932727, "grad_norm": 0.0, - "learning_rate": 1.831476488501347e-06, - "loss": 0.8151, + "learning_rate": 1.85249854514712e-06, + "loss": 0.8754, "step": 28550 }, { - "epoch": 0.8101872871736663, + "epoch": 0.8090623140355352, "grad_norm": 0.0, - "learning_rate": 1.830946355754717e-06, - "loss": 0.8007, + "learning_rate": 1.8519664340393729e-06, + "loss": 0.7769, "step": 28551 }, { - "epoch": 0.8102156640181611, + "epoch": 0.8090906514777976, "grad_norm": 0.0, - "learning_rate": 1.8304162920124492e-06, - "loss": 0.8779, + "learning_rate": 1.851434391565541e-06, + "loss": 0.8679, "step": 28552 }, { - "epoch": 0.8102440408626561, + "epoch": 0.8091189889200601, "grad_norm": 0.0, - "learning_rate": 1.8298862972790243e-06, - "loss": 0.8124, + "learning_rate": 1.8509024177301004e-06, + "loss": 0.8381, "step": 28553 }, { - "epoch": 0.810272417707151, + "epoch": 0.8091473263623226, "grad_norm": 0.0, - "learning_rate": 1.8293563715589158e-06, - "loss": 0.6941, + "learning_rate": 1.8503705125375382e-06, + "loss": 0.7746, "step": 28554 }, { - "epoch": 0.8103007945516458, + "epoch": 0.8091756638045849, "grad_norm": 0.0, - "learning_rate": 1.8288265148566008e-06, - "loss": 0.8202, + "learning_rate": 1.8498386759923282e-06, + "loss": 0.7475, "step": 28555 }, { - "epoch": 0.8103291713961408, + "epoch": 0.8092040012468474, "grad_norm": 0.0, - "learning_rate": 1.8282967271765583e-06, - "loss": 0.8063, + "learning_rate": 1.8493069080989534e-06, + "loss": 0.8773, "step": 28556 }, { - "epoch": 0.8103575482406357, + "epoch": 0.8092323386891099, "grad_norm": 0.0, - "learning_rate": 1.8277670085232578e-06, - "loss": 0.8438, + "learning_rate": 1.8487752088618959e-06, + "loss": 0.7012, "step": 28557 }, { - "epoch": 0.8103859250851305, + "epoch": 0.8092606761313724, "grad_norm": 0.0, - "learning_rate": 1.8272373589011772e-06, - "loss": 0.8025, + "learning_rate": 1.8482435782856289e-06, + "loss": 0.7659, "step": 28558 }, { - "epoch": 0.8104143019296254, + "epoch": 0.8092890135736348, "grad_norm": 0.0, - "learning_rate": 1.8267077783147901e-06, - "loss": 0.7991, + "learning_rate": 1.8477120163746343e-06, + "loss": 0.9258, "step": 28559 }, { - "epoch": 0.8104426787741204, + "epoch": 0.8093173510158973, "grad_norm": 0.0, - "learning_rate": 1.826178266768569e-06, - "loss": 0.8333, + "learning_rate": 1.8471805231333906e-06, + "loss": 0.7881, "step": 28560 }, { - "epoch": 0.8104710556186152, + "epoch": 0.8093456884581598, "grad_norm": 0.0, - "learning_rate": 1.8256488242669878e-06, - "loss": 0.8326, + "learning_rate": 1.84664909856637e-06, + "loss": 0.7956, "step": 28561 }, { - "epoch": 0.8104994324631101, + "epoch": 0.8093740259004222, "grad_norm": 0.0, - "learning_rate": 1.825119450814522e-06, - "loss": 0.8199, + "learning_rate": 1.846117742678052e-06, + "loss": 0.9267, "step": 28562 }, { - "epoch": 0.810527809307605, + "epoch": 0.8094023633426847, "grad_norm": 0.0, - "learning_rate": 1.8245901464156358e-06, - "loss": 0.8287, + "learning_rate": 1.8455864554729119e-06, + "loss": 0.8295, "step": 28563 }, { - "epoch": 0.8105561861520999, + "epoch": 0.8094307007849472, "grad_norm": 0.0, - "learning_rate": 1.8240609110748053e-06, - "loss": 0.8107, + "learning_rate": 1.8450552369554254e-06, + "loss": 0.8293, "step": 28564 }, { - "epoch": 0.8105845629965948, + "epoch": 0.8094590382272097, "grad_norm": 0.0, - "learning_rate": 1.8235317447965017e-06, - "loss": 0.8759, + "learning_rate": 1.8445240871300696e-06, + "loss": 0.863, "step": 28565 }, { - "epoch": 0.8106129398410896, + "epoch": 0.809487375669472, "grad_norm": 0.0, - "learning_rate": 1.8230026475851902e-06, - "loss": 0.7203, + "learning_rate": 1.8439930060013134e-06, + "loss": 0.8383, "step": 28566 }, { - "epoch": 0.8106413166855846, + "epoch": 0.8095157131117345, "grad_norm": 0.0, - "learning_rate": 1.8224736194453429e-06, - "loss": 0.7696, + "learning_rate": 1.843461993573632e-06, + "loss": 0.7216, "step": 28567 }, { - "epoch": 0.8106696935300794, + "epoch": 0.809544050553997, "grad_norm": 0.0, - "learning_rate": 1.8219446603814317e-06, - "loss": 0.7902, + "learning_rate": 1.842931049851502e-06, + "loss": 0.7571, "step": 28568 }, { - "epoch": 0.8106980703745743, + "epoch": 0.8095723879962594, "grad_norm": 0.0, - "learning_rate": 1.8214157703979186e-06, - "loss": 0.8483, + "learning_rate": 1.8424001748393905e-06, + "loss": 0.8224, "step": 28569 }, { - "epoch": 0.8107264472190693, + "epoch": 0.8096007254385219, "grad_norm": 0.0, - "learning_rate": 1.8208869494992776e-06, - "loss": 0.8125, + "learning_rate": 1.8418693685417743e-06, + "loss": 0.8709, "step": 28570 }, { - "epoch": 0.8107548240635641, + "epoch": 0.8096290628807844, "grad_norm": 0.0, - "learning_rate": 1.8203581976899687e-06, - "loss": 0.7444, + "learning_rate": 1.8413386309631188e-06, + "loss": 0.8333, "step": 28571 }, { - "epoch": 0.810783200908059, + "epoch": 0.8096574003230469, "grad_norm": 0.0, - "learning_rate": 1.8198295149744617e-06, - "loss": 0.8511, + "learning_rate": 1.8408079621078977e-06, + "loss": 0.8759, "step": 28572 }, { - "epoch": 0.810811577752554, + "epoch": 0.8096857377653093, "grad_norm": 0.0, - "learning_rate": 1.819300901357226e-06, - "loss": 0.7966, + "learning_rate": 1.8402773619805837e-06, + "loss": 0.8056, "step": 28573 }, { - "epoch": 0.8108399545970488, + "epoch": 0.8097140752075718, "grad_norm": 0.0, - "learning_rate": 1.8187723568427173e-06, - "loss": 0.7885, + "learning_rate": 1.8397468305856413e-06, + "loss": 0.8678, "step": 28574 }, { - "epoch": 0.8108683314415437, + "epoch": 0.8097424126498343, "grad_norm": 0.0, - "learning_rate": 1.8182438814354087e-06, - "loss": 0.8389, + "learning_rate": 1.839216367927541e-06, + "loss": 0.7531, "step": 28575 }, { - "epoch": 0.8108967082860385, + "epoch": 0.8097707500920966, "grad_norm": 0.0, - "learning_rate": 1.8177154751397652e-06, - "loss": 0.7888, + "learning_rate": 1.838685974010752e-06, + "loss": 0.8835, "step": 28576 }, { - "epoch": 0.8109250851305335, + "epoch": 0.8097990875343591, "grad_norm": 0.0, - "learning_rate": 1.8171871379602435e-06, - "loss": 0.7885, + "learning_rate": 1.8381556488397411e-06, + "loss": 0.7483, "step": 28577 }, { - "epoch": 0.8109534619750284, + "epoch": 0.8098274249766216, "grad_norm": 0.0, - "learning_rate": 1.816658869901311e-06, - "loss": 0.911, + "learning_rate": 1.8376253924189791e-06, + "loss": 0.8434, "step": 28578 }, { - "epoch": 0.8109818388195232, + "epoch": 0.809855762418884, "grad_norm": 0.0, - "learning_rate": 1.816130670967431e-06, - "loss": 0.913, + "learning_rate": 1.8370952047529267e-06, + "loss": 0.8222, "step": 28579 }, { - "epoch": 0.8110102156640182, + "epoch": 0.8098840998611465, "grad_norm": 0.0, - "learning_rate": 1.8156025411630596e-06, - "loss": 0.8761, + "learning_rate": 1.8365650858460527e-06, + "loss": 0.8187, "step": 28580 }, { - "epoch": 0.8110385925085131, + "epoch": 0.809912437303409, "grad_norm": 0.0, - "learning_rate": 1.8150744804926623e-06, - "loss": 0.8707, + "learning_rate": 1.8360350357028256e-06, + "loss": 0.6897, "step": 28581 }, { - "epoch": 0.8110669693530079, + "epoch": 0.8099407747456715, "grad_norm": 0.0, - "learning_rate": 1.8145464889607012e-06, - "loss": 0.8776, + "learning_rate": 1.835505054327703e-06, + "loss": 0.8492, "step": 28582 }, { - "epoch": 0.8110953461975028, + "epoch": 0.8099691121879339, "grad_norm": 0.0, - "learning_rate": 1.8140185665716315e-06, - "loss": 0.7984, + "learning_rate": 1.8349751417251571e-06, + "loss": 0.737, "step": 28583 }, { - "epoch": 0.8111237230419978, + "epoch": 0.8099974496301964, "grad_norm": 0.0, - "learning_rate": 1.813490713329915e-06, - "loss": 0.8954, + "learning_rate": 1.8344452978996441e-06, + "loss": 0.8331, "step": 28584 }, { - "epoch": 0.8111520998864926, + "epoch": 0.8100257870724589, "grad_norm": 0.0, - "learning_rate": 1.812962929240013e-06, - "loss": 0.8128, + "learning_rate": 1.8339155228556315e-06, + "loss": 0.7176, "step": 28585 }, { - "epoch": 0.8111804767309875, + "epoch": 0.8100541245147213, "grad_norm": 0.0, - "learning_rate": 1.8124352143063784e-06, - "loss": 0.8035, + "learning_rate": 1.8333858165975827e-06, + "loss": 0.8217, "step": 28586 }, { - "epoch": 0.8112088535754824, + "epoch": 0.8100824619569837, "grad_norm": 0.0, - "learning_rate": 1.811907568533472e-06, - "loss": 0.8552, + "learning_rate": 1.8328561791299548e-06, + "loss": 0.8269, "step": 28587 }, { - "epoch": 0.8112372304199773, + "epoch": 0.8101107993992462, "grad_norm": 0.0, - "learning_rate": 1.8113799919257525e-06, - "loss": 0.8253, + "learning_rate": 1.8323266104572135e-06, + "loss": 0.7921, "step": 28588 }, { - "epoch": 0.8112656072644722, + "epoch": 0.8101391368415087, "grad_norm": 0.0, - "learning_rate": 1.810852484487672e-06, - "loss": 0.8749, + "learning_rate": 1.8317971105838173e-06, + "loss": 0.9049, "step": 28589 }, { - "epoch": 0.8112939841089671, + "epoch": 0.8101674742837711, "grad_norm": 0.0, - "learning_rate": 1.8103250462236888e-06, - "loss": 0.7762, + "learning_rate": 1.831267679514227e-06, + "loss": 0.876, "step": 28590 }, { - "epoch": 0.811322360953462, + "epoch": 0.8101958117260336, "grad_norm": 0.0, - "learning_rate": 1.8097976771382575e-06, - "loss": 0.9039, + "learning_rate": 1.8307383172529046e-06, + "loss": 0.8401, "step": 28591 }, { - "epoch": 0.8113507377979569, + "epoch": 0.8102241491682961, "grad_norm": 0.0, - "learning_rate": 1.8092703772358345e-06, - "loss": 0.8114, + "learning_rate": 1.8302090238043057e-06, + "loss": 0.8354, "step": 28592 }, { - "epoch": 0.8113791146424517, + "epoch": 0.8102524866105585, "grad_norm": 0.0, - "learning_rate": 1.8087431465208738e-06, - "loss": 0.9059, + "learning_rate": 1.8296797991728887e-06, + "loss": 0.8234, "step": 28593 }, { - "epoch": 0.8114074914869467, + "epoch": 0.810280824052821, "grad_norm": 0.0, - "learning_rate": 1.8082159849978265e-06, - "loss": 0.7675, + "learning_rate": 1.8291506433631156e-06, + "loss": 0.7856, "step": 28594 }, { - "epoch": 0.8114358683314415, + "epoch": 0.8103091614950835, "grad_norm": 0.0, - "learning_rate": 1.8076888926711466e-06, - "loss": 0.8047, + "learning_rate": 1.8286215563794386e-06, + "loss": 0.7243, "step": 28595 }, { - "epoch": 0.8114642451759364, + "epoch": 0.810337498937346, "grad_norm": 0.0, - "learning_rate": 1.8071618695452896e-06, - "loss": 0.8044, + "learning_rate": 1.828092538226317e-06, + "loss": 0.7821, "step": 28596 }, { - "epoch": 0.8114926220204314, + "epoch": 0.8103658363796084, "grad_norm": 0.0, - "learning_rate": 1.806634915624702e-06, - "loss": 0.7602, + "learning_rate": 1.8275635889082088e-06, + "loss": 0.8163, "step": 28597 }, { - "epoch": 0.8115209988649262, + "epoch": 0.8103941738218708, "grad_norm": 0.0, - "learning_rate": 1.8061080309138379e-06, - "loss": 0.7529, + "learning_rate": 1.8270347084295636e-06, + "loss": 0.6885, "step": 28598 }, { - "epoch": 0.8115493757094211, + "epoch": 0.8104225112641333, "grad_norm": 0.0, - "learning_rate": 1.8055812154171505e-06, - "loss": 0.7212, + "learning_rate": 1.8265058967948434e-06, + "loss": 0.8235, "step": 28599 }, { - "epoch": 0.811577752553916, + "epoch": 0.8104508487063957, "grad_norm": 0.0, - "learning_rate": 1.8050544691390836e-06, - "loss": 0.7892, + "learning_rate": 1.825977154008497e-06, + "loss": 0.8029, "step": 28600 }, { - "epoch": 0.8116061293984109, + "epoch": 0.8104791861486582, "grad_norm": 0.0, - "learning_rate": 1.80452779208409e-06, - "loss": 0.809, + "learning_rate": 1.8254484800749794e-06, + "loss": 0.9368, "step": 28601 }, { - "epoch": 0.8116345062429058, + "epoch": 0.8105075235909207, "grad_norm": 0.0, - "learning_rate": 1.8040011842566218e-06, - "loss": 0.8653, + "learning_rate": 1.8249198749987463e-06, + "loss": 0.9131, "step": 28602 }, { - "epoch": 0.8116628830874006, + "epoch": 0.8105358610331831, "grad_norm": 0.0, - "learning_rate": 1.8034746456611207e-06, - "loss": 0.792, + "learning_rate": 1.824391338784247e-06, + "loss": 0.8035, "step": 28603 }, { - "epoch": 0.8116912599318956, + "epoch": 0.8105641984754456, "grad_norm": 0.0, - "learning_rate": 1.8029481763020384e-06, - "loss": 0.8267, + "learning_rate": 1.8238628714359375e-06, + "loss": 0.8852, "step": 28604 }, { - "epoch": 0.8117196367763905, + "epoch": 0.8105925359177081, "grad_norm": 0.0, - "learning_rate": 1.8024217761838202e-06, - "loss": 0.813, + "learning_rate": 1.823334472958268e-06, + "loss": 0.7789, "step": 28605 }, { - "epoch": 0.8117480136208853, + "epoch": 0.8106208733599706, "grad_norm": 0.0, - "learning_rate": 1.801895445310915e-06, - "loss": 0.6711, + "learning_rate": 1.8228061433556866e-06, + "loss": 0.8364, "step": 28606 }, { - "epoch": 0.8117763904653803, + "epoch": 0.810649210802233, "grad_norm": 0.0, - "learning_rate": 1.80136918368777e-06, - "loss": 0.7664, + "learning_rate": 1.8222778826326482e-06, + "loss": 0.7294, "step": 28607 }, { - "epoch": 0.8118047673098752, + "epoch": 0.8106775482444954, "grad_norm": 0.0, - "learning_rate": 1.800842991318824e-06, - "loss": 0.8569, + "learning_rate": 1.8217496907935971e-06, + "loss": 0.7418, "step": 28608 }, { - "epoch": 0.81183314415437, + "epoch": 0.8107058856867579, "grad_norm": 0.0, - "learning_rate": 1.8003168682085282e-06, - "loss": 0.8642, + "learning_rate": 1.8212215678429856e-06, + "loss": 0.8607, "step": 28609 }, { - "epoch": 0.8118615209988649, + "epoch": 0.8107342231290203, "grad_norm": 0.0, - "learning_rate": 1.7997908143613252e-06, - "loss": 0.7483, + "learning_rate": 1.8206935137852644e-06, + "loss": 0.6863, "step": 28610 }, { - "epoch": 0.8118898978433599, + "epoch": 0.8107625605712828, "grad_norm": 0.0, - "learning_rate": 1.7992648297816563e-06, - "loss": 0.8139, + "learning_rate": 1.8201655286248766e-06, + "loss": 0.7438, "step": 28611 }, { - "epoch": 0.8119182746878547, + "epoch": 0.8107908980135453, "grad_norm": 0.0, - "learning_rate": 1.7987389144739653e-06, - "loss": 0.7744, + "learning_rate": 1.819637612366274e-06, + "loss": 0.8421, "step": 28612 }, { - "epoch": 0.8119466515323496, + "epoch": 0.8108192354558078, "grad_norm": 0.0, - "learning_rate": 1.798213068442698e-06, - "loss": 0.7751, + "learning_rate": 1.819109765013899e-06, + "loss": 0.8374, "step": 28613 }, { - "epoch": 0.8119750283768445, + "epoch": 0.8108475728980702, "grad_norm": 0.0, - "learning_rate": 1.7976872916922905e-06, - "loss": 0.8485, + "learning_rate": 1.818581986572201e-06, + "loss": 0.784, "step": 28614 }, { - "epoch": 0.8120034052213394, + "epoch": 0.8108759103403327, "grad_norm": 0.0, - "learning_rate": 1.7971615842271883e-06, - "loss": 0.9004, + "learning_rate": 1.8180542770456244e-06, + "loss": 0.8058, "step": 28615 }, { - "epoch": 0.8120317820658343, + "epoch": 0.8109042477825952, "grad_norm": 0.0, - "learning_rate": 1.7966359460518323e-06, - "loss": 0.8547, + "learning_rate": 1.8175266364386157e-06, + "loss": 0.8275, "step": 28616 }, { - "epoch": 0.8120601589103291, + "epoch": 0.8109325852248576, "grad_norm": 0.0, - "learning_rate": 1.79611037717066e-06, - "loss": 0.8469, + "learning_rate": 1.8169990647556179e-06, + "loss": 0.7766, "step": 28617 }, { - "epoch": 0.8120885357548241, + "epoch": 0.81096092266712, "grad_norm": 0.0, - "learning_rate": 1.7955848775881114e-06, - "loss": 0.8719, + "learning_rate": 1.8164715620010788e-06, + "loss": 0.826, "step": 28618 }, { - "epoch": 0.812116912599319, + "epoch": 0.8109892601093825, "grad_norm": 0.0, - "learning_rate": 1.795059447308629e-06, - "loss": 0.7386, + "learning_rate": 1.8159441281794355e-06, + "loss": 0.7421, "step": 28619 }, { - "epoch": 0.8121452894438138, + "epoch": 0.811017597551645, "grad_norm": 0.0, - "learning_rate": 1.7945340863366434e-06, - "loss": 0.805, + "learning_rate": 1.815416763295137e-06, + "loss": 0.7963, "step": 28620 }, { - "epoch": 0.8121736662883088, + "epoch": 0.8110459349939074, "grad_norm": 0.0, - "learning_rate": 1.7940087946766017e-06, - "loss": 0.8765, + "learning_rate": 1.8148894673526196e-06, + "loss": 0.7684, "step": 28621 }, { - "epoch": 0.8122020431328036, + "epoch": 0.8110742724361699, "grad_norm": 0.0, - "learning_rate": 1.7934835723329347e-06, - "loss": 0.8405, + "learning_rate": 1.8143622403563277e-06, + "loss": 0.8713, "step": 28622 }, { - "epoch": 0.8122304199772985, + "epoch": 0.8111026098784324, "grad_norm": 0.0, - "learning_rate": 1.7929584193100802e-06, - "loss": 0.8185, + "learning_rate": 1.8138350823107043e-06, + "loss": 0.834, "step": 28623 }, { - "epoch": 0.8122587968217935, + "epoch": 0.8111309473206948, "grad_norm": 0.0, - "learning_rate": 1.792433335612478e-06, - "loss": 0.6992, + "learning_rate": 1.8133079932201857e-06, + "loss": 0.8129, "step": 28624 }, { - "epoch": 0.8122871736662883, + "epoch": 0.8111592847629573, "grad_norm": 0.0, - "learning_rate": 1.7919083212445566e-06, - "loss": 0.7076, + "learning_rate": 1.8127809730892143e-06, + "loss": 0.7866, "step": 28625 }, { - "epoch": 0.8123155505107832, + "epoch": 0.8111876222052198, "grad_norm": 0.0, - "learning_rate": 1.7913833762107557e-06, - "loss": 0.8207, + "learning_rate": 1.8122540219222306e-06, + "loss": 0.8448, "step": 28626 }, { - "epoch": 0.812343927355278, + "epoch": 0.8112159596474822, "grad_norm": 0.0, - "learning_rate": 1.790858500515511e-06, - "loss": 0.7883, + "learning_rate": 1.8117271397236703e-06, + "loss": 0.7285, "step": 28627 }, { - "epoch": 0.812372304199773, + "epoch": 0.8112442970897447, "grad_norm": 0.0, - "learning_rate": 1.790333694163251e-06, - "loss": 0.8306, + "learning_rate": 1.8112003264979728e-06, + "loss": 0.8181, "step": 28628 }, { - "epoch": 0.8124006810442679, + "epoch": 0.8112726345320072, "grad_norm": 0.0, - "learning_rate": 1.789808957158411e-06, - "loss": 0.8243, + "learning_rate": 1.8106735822495746e-06, + "loss": 0.7952, "step": 28629 }, { - "epoch": 0.8124290578887627, + "epoch": 0.8113009719742696, "grad_norm": 0.0, - "learning_rate": 1.7892842895054263e-06, - "loss": 0.7466, + "learning_rate": 1.8101469069829148e-06, + "loss": 0.8836, "step": 28630 }, { - "epoch": 0.8124574347332577, + "epoch": 0.811329309416532, "grad_norm": 0.0, - "learning_rate": 1.7887596912087245e-06, - "loss": 0.8074, + "learning_rate": 1.8096203007024315e-06, + "loss": 0.739, "step": 28631 }, { - "epoch": 0.8124858115777526, + "epoch": 0.8113576468587945, "grad_norm": 0.0, - "learning_rate": 1.7882351622727378e-06, - "loss": 0.7763, + "learning_rate": 1.809093763412555e-06, + "loss": 0.8115, "step": 28632 }, { - "epoch": 0.8125141884222474, + "epoch": 0.811385984301057, "grad_norm": 0.0, - "learning_rate": 1.7877107027019003e-06, - "loss": 0.8282, + "learning_rate": 1.8085672951177236e-06, + "loss": 0.8182, "step": 28633 }, { - "epoch": 0.8125425652667423, + "epoch": 0.8114143217433194, "grad_norm": 0.0, - "learning_rate": 1.7871863125006383e-06, - "loss": 0.8658, + "learning_rate": 1.8080408958223738e-06, + "loss": 0.8697, "step": 28634 }, { - "epoch": 0.8125709421112373, + "epoch": 0.8114426591855819, "grad_norm": 0.0, - "learning_rate": 1.786661991673382e-06, - "loss": 0.689, + "learning_rate": 1.8075145655309356e-06, + "loss": 0.7, "step": 28635 }, { - "epoch": 0.8125993189557321, + "epoch": 0.8114709966278444, "grad_norm": 0.0, - "learning_rate": 1.7861377402245605e-06, - "loss": 0.8186, + "learning_rate": 1.8069883042478464e-06, + "loss": 0.8642, "step": 28636 }, { - "epoch": 0.812627695800227, + "epoch": 0.8114993340701069, "grad_norm": 0.0, - "learning_rate": 1.785613558158603e-06, - "loss": 0.8846, + "learning_rate": 1.806462111977535e-06, + "loss": 0.7817, "step": 28637 }, { - "epoch": 0.812656072644722, + "epoch": 0.8115276715123693, "grad_norm": 0.0, - "learning_rate": 1.7850894454799405e-06, - "loss": 0.7828, + "learning_rate": 1.8059359887244353e-06, + "loss": 0.7726, "step": 28638 }, { - "epoch": 0.8126844494892168, + "epoch": 0.8115560089546318, "grad_norm": 0.0, - "learning_rate": 1.7845654021929936e-06, - "loss": 0.87, + "learning_rate": 1.8054099344929833e-06, + "loss": 0.8387, "step": 28639 }, { - "epoch": 0.8127128263337117, + "epoch": 0.8115843463968943, "grad_norm": 0.0, - "learning_rate": 1.7840414283021923e-06, - "loss": 0.7443, + "learning_rate": 1.8048839492876024e-06, + "loss": 0.7922, "step": 28640 }, { - "epoch": 0.8127412031782065, + "epoch": 0.8116126838391566, "grad_norm": 0.0, - "learning_rate": 1.783517523811964e-06, - "loss": 0.7712, + "learning_rate": 1.8043580331127275e-06, + "loss": 0.8675, "step": 28641 }, { - "epoch": 0.8127695800227015, + "epoch": 0.8116410212814191, "grad_norm": 0.0, - "learning_rate": 1.7829936887267306e-06, - "loss": 0.8959, + "learning_rate": 1.8038321859727891e-06, + "loss": 0.883, "step": 28642 }, { - "epoch": 0.8127979568671964, + "epoch": 0.8116693587236816, "grad_norm": 0.0, - "learning_rate": 1.782469923050919e-06, - "loss": 0.8844, + "learning_rate": 1.803306407872215e-06, + "loss": 0.7382, "step": 28643 }, { - "epoch": 0.8128263337116912, + "epoch": 0.8116976961659441, "grad_norm": 0.0, - "learning_rate": 1.7819462267889564e-06, - "loss": 0.8031, + "learning_rate": 1.8027806988154373e-06, + "loss": 0.8146, "step": 28644 }, { - "epoch": 0.8128547105561862, + "epoch": 0.8117260336082065, "grad_norm": 0.0, - "learning_rate": 1.7814225999452605e-06, - "loss": 0.9039, + "learning_rate": 1.8022550588068799e-06, + "loss": 0.8305, "step": 28645 }, { - "epoch": 0.812883087400681, + "epoch": 0.811754371050469, "grad_norm": 0.0, - "learning_rate": 1.7808990425242567e-06, - "loss": 0.7456, + "learning_rate": 1.8017294878509716e-06, + "loss": 0.7656, "step": 28646 }, { - "epoch": 0.8129114642451759, + "epoch": 0.8117827084927315, "grad_norm": 0.0, - "learning_rate": 1.7803755545303714e-06, - "loss": 0.9313, + "learning_rate": 1.8012039859521425e-06, + "loss": 0.8917, "step": 28647 }, { - "epoch": 0.8129398410896709, + "epoch": 0.8118110459349939, "grad_norm": 0.0, - "learning_rate": 1.77985213596802e-06, - "loss": 0.9164, + "learning_rate": 1.8006785531148153e-06, + "loss": 0.8327, "step": 28648 }, { - "epoch": 0.8129682179341657, + "epoch": 0.8118393833772564, "grad_norm": 0.0, - "learning_rate": 1.7793287868416275e-06, - "loss": 0.9063, + "learning_rate": 1.8001531893434188e-06, + "loss": 0.7618, "step": 28649 }, { - "epoch": 0.8129965947786606, + "epoch": 0.8118677208195189, "grad_norm": 0.0, - "learning_rate": 1.7788055071556175e-06, - "loss": 0.8171, + "learning_rate": 1.799627894642375e-06, + "loss": 0.7718, "step": 28650 }, { - "epoch": 0.8130249716231555, + "epoch": 0.8118960582617812, "grad_norm": 0.0, - "learning_rate": 1.778282296914402e-06, - "loss": 0.7142, + "learning_rate": 1.7991026690161107e-06, + "loss": 0.9865, "step": 28651 }, { - "epoch": 0.8130533484676504, + "epoch": 0.8119243957040437, "grad_norm": 0.0, - "learning_rate": 1.7777591561224094e-06, - "loss": 0.7791, + "learning_rate": 1.7985775124690496e-06, + "loss": 0.8698, "step": 28652 }, { - "epoch": 0.8130817253121453, + "epoch": 0.8119527331463062, "grad_norm": 0.0, - "learning_rate": 1.777236084784053e-06, - "loss": 0.7656, + "learning_rate": 1.7980524250056153e-06, + "loss": 0.8542, "step": 28653 }, { - "epoch": 0.8131101021566401, + "epoch": 0.8119810705885687, "grad_norm": 0.0, - "learning_rate": 1.7767130829037527e-06, - "loss": 0.7611, + "learning_rate": 1.7975274066302317e-06, + "loss": 0.8359, "step": 28654 }, { - "epoch": 0.8131384790011351, + "epoch": 0.8120094080308311, "grad_norm": 0.0, - "learning_rate": 1.7761901504859291e-06, - "loss": 0.9168, + "learning_rate": 1.7970024573473233e-06, + "loss": 0.7393, "step": 28655 }, { - "epoch": 0.81316685584563, + "epoch": 0.8120377454730936, "grad_norm": 0.0, - "learning_rate": 1.7756672875349956e-06, - "loss": 0.7582, + "learning_rate": 1.7964775771613064e-06, + "loss": 0.8516, "step": 28656 }, { - "epoch": 0.8131952326901248, + "epoch": 0.8120660829153561, "grad_norm": 0.0, - "learning_rate": 1.7751444940553686e-06, - "loss": 0.8145, + "learning_rate": 1.7959527660766074e-06, + "loss": 0.8159, "step": 28657 }, { - "epoch": 0.8132236095346197, + "epoch": 0.8120944203576185, "grad_norm": 0.0, - "learning_rate": 1.77462177005147e-06, - "loss": 0.8569, + "learning_rate": 1.795428024097643e-06, + "loss": 0.775, "step": 28658 }, { - "epoch": 0.8132519863791147, + "epoch": 0.812122757799881, "grad_norm": 0.0, - "learning_rate": 1.7740991155277076e-06, - "loss": 0.8255, + "learning_rate": 1.794903351228835e-06, + "loss": 0.8292, "step": 28659 }, { - "epoch": 0.8132803632236095, + "epoch": 0.8121510952421435, "grad_norm": 0.0, - "learning_rate": 1.7735765304884988e-06, - "loss": 0.7549, + "learning_rate": 1.7943787474746044e-06, + "loss": 0.8217, "step": 28660 }, { - "epoch": 0.8133087400681044, + "epoch": 0.812179432684406, "grad_norm": 0.0, - "learning_rate": 1.7730540149382625e-06, - "loss": 0.8252, + "learning_rate": 1.7938542128393677e-06, + "loss": 0.759, "step": 28661 }, { - "epoch": 0.8133371169125994, + "epoch": 0.8122077701266683, "grad_norm": 0.0, - "learning_rate": 1.7725315688814059e-06, - "loss": 0.9313, + "learning_rate": 1.7933297473275435e-06, + "loss": 0.77, "step": 28662 }, { - "epoch": 0.8133654937570942, + "epoch": 0.8122361075689308, "grad_norm": 0.0, - "learning_rate": 1.7720091923223458e-06, - "loss": 0.7741, + "learning_rate": 1.7928053509435527e-06, + "loss": 0.7601, "step": 28663 }, { - "epoch": 0.8133938706015891, + "epoch": 0.8122644450111933, "grad_norm": 0.0, - "learning_rate": 1.7714868852654953e-06, - "loss": 0.8616, + "learning_rate": 1.7922810236918077e-06, + "loss": 0.8857, "step": 28664 }, { - "epoch": 0.813422247446084, + "epoch": 0.8122927824534557, "grad_norm": 0.0, - "learning_rate": 1.770964647715263e-06, - "loss": 0.7913, + "learning_rate": 1.7917567655767277e-06, + "loss": 0.7785, "step": 28665 }, { - "epoch": 0.8134506242905789, + "epoch": 0.8123211198957182, "grad_norm": 0.0, - "learning_rate": 1.77044247967606e-06, - "loss": 0.9026, + "learning_rate": 1.7912325766027282e-06, + "loss": 0.6933, "step": 28666 }, { - "epoch": 0.8134790011350738, + "epoch": 0.8123494573379807, "grad_norm": 0.0, - "learning_rate": 1.7699203811523047e-06, - "loss": 0.826, + "learning_rate": 1.790708456774225e-06, + "loss": 0.8925, "step": 28667 }, { - "epoch": 0.8135073779795686, + "epoch": 0.8123777947802432, "grad_norm": 0.0, - "learning_rate": 1.7693983521483982e-06, - "loss": 0.7056, + "learning_rate": 1.7901844060956353e-06, + "loss": 0.9857, "step": 28668 }, { - "epoch": 0.8135357548240636, + "epoch": 0.8124061322225056, "grad_norm": 0.0, - "learning_rate": 1.7688763926687546e-06, - "loss": 0.8731, + "learning_rate": 1.7896604245713688e-06, + "loss": 0.7842, "step": 28669 }, { - "epoch": 0.8135641316685585, + "epoch": 0.8124344696647681, "grad_norm": 0.0, - "learning_rate": 1.768354502717784e-06, - "loss": 0.7025, + "learning_rate": 1.7891365122058435e-06, + "loss": 0.7381, "step": 28670 }, { - "epoch": 0.8135925085130533, + "epoch": 0.8124628071070306, "grad_norm": 0.0, - "learning_rate": 1.7678326822998914e-06, - "loss": 0.8011, + "learning_rate": 1.7886126690034688e-06, + "loss": 0.7828, "step": 28671 }, { - "epoch": 0.8136208853575483, + "epoch": 0.8124911445492929, "grad_norm": 0.0, - "learning_rate": 1.7673109314194858e-06, - "loss": 0.74, + "learning_rate": 1.788088894968658e-06, + "loss": 0.9276, "step": 28672 }, { - "epoch": 0.8136492622020431, + "epoch": 0.8125194819915554, "grad_norm": 0.0, - "learning_rate": 1.766789250080977e-06, - "loss": 0.795, + "learning_rate": 1.7875651901058266e-06, + "loss": 0.7706, "step": 28673 }, { - "epoch": 0.813677639046538, + "epoch": 0.8125478194338179, "grad_norm": 0.0, - "learning_rate": 1.7662676382887667e-06, - "loss": 0.8328, + "learning_rate": 1.7870415544193808e-06, + "loss": 0.8839, "step": 28674 }, { - "epoch": 0.8137060158910329, + "epoch": 0.8125761568760803, "grad_norm": 0.0, - "learning_rate": 1.765746096047265e-06, - "loss": 0.8511, + "learning_rate": 1.786517987913734e-06, + "loss": 0.7878, "step": 28675 }, { - "epoch": 0.8137343927355278, + "epoch": 0.8126044943183428, "grad_norm": 0.0, - "learning_rate": 1.7652246233608783e-06, - "loss": 0.7955, + "learning_rate": 1.785994490593298e-06, + "loss": 0.8313, "step": 28676 }, { - "epoch": 0.8137627695800227, + "epoch": 0.8126328317606053, "grad_norm": 0.0, - "learning_rate": 1.7647032202340065e-06, - "loss": 0.8694, + "learning_rate": 1.7854710624624782e-06, + "loss": 0.8317, "step": 28677 }, { - "epoch": 0.8137911464245176, + "epoch": 0.8126611692028678, "grad_norm": 0.0, - "learning_rate": 1.7641818866710592e-06, - "loss": 0.7445, + "learning_rate": 1.7849477035256868e-06, + "loss": 0.8214, "step": 28678 }, { - "epoch": 0.8138195232690125, + "epoch": 0.8126895066451302, "grad_norm": 0.0, - "learning_rate": 1.7636606226764353e-06, - "loss": 0.7511, + "learning_rate": 1.7844244137873302e-06, + "loss": 0.7649, "step": 28679 }, { - "epoch": 0.8138479001135074, + "epoch": 0.8127178440873927, "grad_norm": 0.0, - "learning_rate": 1.76313942825454e-06, - "loss": 0.8615, + "learning_rate": 1.783901193251819e-06, + "loss": 0.8218, "step": 28680 }, { - "epoch": 0.8138762769580022, + "epoch": 0.8127461815296552, "grad_norm": 0.0, - "learning_rate": 1.76261830340978e-06, - "loss": 0.7655, + "learning_rate": 1.7833780419235603e-06, + "loss": 0.7683, "step": 28681 }, { - "epoch": 0.8139046538024972, + "epoch": 0.8127745189719175, "grad_norm": 0.0, - "learning_rate": 1.762097248146547e-06, - "loss": 0.7441, + "learning_rate": 1.7828549598069578e-06, + "loss": 0.9675, "step": 28682 }, { - "epoch": 0.8139330306469921, + "epoch": 0.81280285641418, "grad_norm": 0.0, - "learning_rate": 1.7615762624692523e-06, - "loss": 0.8289, + "learning_rate": 1.7823319469064194e-06, + "loss": 0.8831, "step": 28683 }, { - "epoch": 0.8139614074914869, + "epoch": 0.8128311938564425, "grad_norm": 0.0, - "learning_rate": 1.7610553463822954e-06, - "loss": 0.8843, + "learning_rate": 1.7818090032263524e-06, + "loss": 0.7558, "step": 28684 }, { - "epoch": 0.8139897843359818, + "epoch": 0.812859531298705, "grad_norm": 0.0, - "learning_rate": 1.760534499890072e-06, - "loss": 0.8586, + "learning_rate": 1.781286128771158e-06, + "loss": 0.8466, "step": 28685 }, { - "epoch": 0.8140181611804768, + "epoch": 0.8128878687409674, "grad_norm": 0.0, - "learning_rate": 1.7600137229969837e-06, - "loss": 0.8555, + "learning_rate": 1.7807633235452449e-06, + "loss": 0.7594, "step": 28686 }, { - "epoch": 0.8140465380249716, + "epoch": 0.8129162061832299, "grad_norm": 0.0, - "learning_rate": 1.759493015707433e-06, - "loss": 0.8651, + "learning_rate": 1.7802405875530116e-06, + "loss": 0.8461, "step": 28687 }, { - "epoch": 0.8140749148694665, + "epoch": 0.8129445436254924, "grad_norm": 0.0, - "learning_rate": 1.7589723780258127e-06, - "loss": 0.7833, + "learning_rate": 1.7797179207988635e-06, + "loss": 0.7689, "step": 28688 }, { - "epoch": 0.8141032917139615, + "epoch": 0.8129728810677548, "grad_norm": 0.0, - "learning_rate": 1.758451809956523e-06, - "loss": 0.8325, + "learning_rate": 1.7791953232872083e-06, + "loss": 0.8311, "step": 28689 }, { - "epoch": 0.8141316685584563, + "epoch": 0.8130012185100173, "grad_norm": 0.0, - "learning_rate": 1.7579313115039642e-06, - "loss": 0.8263, + "learning_rate": 1.778672795022439e-06, + "loss": 0.8588, "step": 28690 }, { - "epoch": 0.8141600454029512, + "epoch": 0.8130295559522798, "grad_norm": 0.0, - "learning_rate": 1.7574108826725268e-06, - "loss": 0.8271, + "learning_rate": 1.7781503360089635e-06, + "loss": 0.8206, "step": 28691 }, { - "epoch": 0.814188422247446, + "epoch": 0.8130578933945423, "grad_norm": 0.0, - "learning_rate": 1.756890523466611e-06, - "loss": 0.7824, + "learning_rate": 1.7776279462511803e-06, + "loss": 0.903, "step": 28692 }, { - "epoch": 0.814216799091941, + "epoch": 0.8130862308368046, "grad_norm": 0.0, - "learning_rate": 1.756370233890613e-06, - "loss": 0.8002, + "learning_rate": 1.777105625753489e-06, + "loss": 0.8216, "step": 28693 }, { - "epoch": 0.8142451759364359, + "epoch": 0.8131145682790671, "grad_norm": 0.0, - "learning_rate": 1.7558500139489243e-06, - "loss": 0.705, + "learning_rate": 1.7765833745202943e-06, + "loss": 0.7496, "step": 28694 }, { - "epoch": 0.8142735527809307, + "epoch": 0.8131429057213296, "grad_norm": 0.0, - "learning_rate": 1.7553298636459416e-06, - "loss": 0.8763, + "learning_rate": 1.776061192555989e-06, + "loss": 0.7057, "step": 28695 }, { - "epoch": 0.8143019296254257, + "epoch": 0.813171243163592, "grad_norm": 0.0, - "learning_rate": 1.7548097829860599e-06, - "loss": 0.8821, + "learning_rate": 1.7755390798649741e-06, + "loss": 0.6081, "step": 28696 }, { - "epoch": 0.8143303064699206, + "epoch": 0.8131995806058545, "grad_norm": 0.0, - "learning_rate": 1.7542897719736662e-06, - "loss": 0.7668, + "learning_rate": 1.7750170364516495e-06, + "loss": 0.7129, "step": 28697 }, { - "epoch": 0.8143586833144154, + "epoch": 0.813227918048117, "grad_norm": 0.0, - "learning_rate": 1.7537698306131624e-06, - "loss": 0.7936, + "learning_rate": 1.7744950623204082e-06, + "loss": 0.8238, "step": 28698 }, { - "epoch": 0.8143870601589104, + "epoch": 0.8132562554903794, "grad_norm": 0.0, - "learning_rate": 1.7532499589089324e-06, - "loss": 0.7509, + "learning_rate": 1.7739731574756524e-06, + "loss": 0.7628, "step": 28699 }, { - "epoch": 0.8144154370034052, + "epoch": 0.8132845929326419, "grad_norm": 0.0, - "learning_rate": 1.7527301568653709e-06, - "loss": 0.807, + "learning_rate": 1.773451321921773e-06, + "loss": 0.8006, "step": 28700 }, { - "epoch": 0.8144438138479001, + "epoch": 0.8133129303749044, "grad_norm": 0.0, - "learning_rate": 1.752210424486872e-06, - "loss": 0.7594, + "learning_rate": 1.7729295556631677e-06, + "loss": 0.8805, "step": 28701 }, { - "epoch": 0.814472190692395, + "epoch": 0.8133412678171669, "grad_norm": 0.0, - "learning_rate": 1.7516907617778189e-06, - "loss": 0.7812, + "learning_rate": 1.7724078587042347e-06, + "loss": 0.761, "step": 28702 }, { - "epoch": 0.8145005675368899, + "epoch": 0.8133696052594293, "grad_norm": 0.0, - "learning_rate": 1.7511711687426047e-06, - "loss": 0.7796, + "learning_rate": 1.7718862310493624e-06, + "loss": 0.906, "step": 28703 }, { - "epoch": 0.8145289443813848, + "epoch": 0.8133979427016917, "grad_norm": 0.0, - "learning_rate": 1.7506516453856216e-06, - "loss": 0.7681, + "learning_rate": 1.7713646727029476e-06, + "loss": 0.8292, "step": 28704 }, { - "epoch": 0.8145573212258796, + "epoch": 0.8134262801439542, "grad_norm": 0.0, - "learning_rate": 1.7501321917112525e-06, - "loss": 0.8032, + "learning_rate": 1.770843183669385e-06, + "loss": 0.7415, "step": 28705 }, { - "epoch": 0.8145856980703746, + "epoch": 0.8134546175862166, "grad_norm": 0.0, - "learning_rate": 1.7496128077238872e-06, - "loss": 0.7531, + "learning_rate": 1.7703217639530646e-06, + "loss": 0.7794, "step": 28706 }, { - "epoch": 0.8146140749148695, + "epoch": 0.8134829550284791, "grad_norm": 0.0, - "learning_rate": 1.7490934934279159e-06, - "loss": 0.7529, + "learning_rate": 1.7698004135583835e-06, + "loss": 0.8856, "step": 28707 }, { - "epoch": 0.8146424517593643, + "epoch": 0.8135112924707416, "grad_norm": 0.0, - "learning_rate": 1.7485742488277202e-06, - "loss": 0.7811, + "learning_rate": 1.7692791324897274e-06, + "loss": 0.8194, "step": 28708 }, { - "epoch": 0.8146708286038592, + "epoch": 0.8135396299130041, "grad_norm": 0.0, - "learning_rate": 1.748055073927688e-06, - "loss": 0.7959, + "learning_rate": 1.7687579207514893e-06, + "loss": 0.8609, "step": 28709 }, { - "epoch": 0.8146992054483542, + "epoch": 0.8135679673552665, "grad_norm": 0.0, - "learning_rate": 1.7475359687322092e-06, - "loss": 0.9049, + "learning_rate": 1.7682367783480614e-06, + "loss": 0.8473, "step": 28710 }, { - "epoch": 0.814727582292849, + "epoch": 0.813596304797529, "grad_norm": 0.0, - "learning_rate": 1.747016933245662e-06, - "loss": 0.7566, + "learning_rate": 1.76771570528383e-06, + "loss": 0.8191, "step": 28711 }, { - "epoch": 0.8147559591373439, + "epoch": 0.8136246422397915, "grad_norm": 0.0, - "learning_rate": 1.7464979674724335e-06, - "loss": 0.8267, + "learning_rate": 1.7671947015631875e-06, + "loss": 0.7636, "step": 28712 }, { - "epoch": 0.8147843359818389, + "epoch": 0.8136529796820539, "grad_norm": 0.0, - "learning_rate": 1.7459790714169089e-06, - "loss": 0.8307, + "learning_rate": 1.766673767190522e-06, + "loss": 0.8136, "step": 28713 }, { - "epoch": 0.8148127128263337, + "epoch": 0.8136813171243164, "grad_norm": 0.0, - "learning_rate": 1.745460245083469e-06, - "loss": 0.711, + "learning_rate": 1.7661529021702184e-06, + "loss": 0.8783, "step": 28714 }, { - "epoch": 0.8148410896708286, + "epoch": 0.8137096545665788, "grad_norm": 0.0, - "learning_rate": 1.7449414884765005e-06, - "loss": 0.8221, + "learning_rate": 1.7656321065066696e-06, + "loss": 0.7793, "step": 28715 }, { - "epoch": 0.8148694665153235, + "epoch": 0.8137379920088412, "grad_norm": 0.0, - "learning_rate": 1.7444228016003795e-06, - "loss": 0.8383, + "learning_rate": 1.7651113802042563e-06, + "loss": 0.6947, "step": 28716 }, { - "epoch": 0.8148978433598184, + "epoch": 0.8137663294511037, "grad_norm": 0.0, - "learning_rate": 1.7439041844594907e-06, - "loss": 0.8086, + "learning_rate": 1.7645907232673686e-06, + "loss": 0.7625, "step": 28717 }, { - "epoch": 0.8149262202043133, + "epoch": 0.8137946668933662, "grad_norm": 0.0, - "learning_rate": 1.743385637058216e-06, - "loss": 0.8482, + "learning_rate": 1.764070135700391e-06, + "loss": 0.8545, "step": 28718 }, { - "epoch": 0.8149545970488081, + "epoch": 0.8138230043356287, "grad_norm": 0.0, - "learning_rate": 1.7428671594009327e-06, - "loss": 0.8539, + "learning_rate": 1.7635496175077082e-06, + "loss": 0.784, "step": 28719 }, { - "epoch": 0.8149829738933031, + "epoch": 0.8138513417778911, "grad_norm": 0.0, - "learning_rate": 1.74234875149202e-06, - "loss": 0.813, + "learning_rate": 1.7630291686937096e-06, + "loss": 0.8195, "step": 28720 }, { - "epoch": 0.815011350737798, + "epoch": 0.8138796792201536, "grad_norm": 0.0, - "learning_rate": 1.7418304133358633e-06, - "loss": 0.7028, + "learning_rate": 1.7625087892627713e-06, + "loss": 0.8493, "step": 28721 }, { - "epoch": 0.8150397275822928, + "epoch": 0.8139080166624161, "grad_norm": 0.0, - "learning_rate": 1.741312144936832e-06, - "loss": 0.9066, + "learning_rate": 1.761988479219281e-06, + "loss": 0.8564, "step": 28722 }, { - "epoch": 0.8150681044267878, + "epoch": 0.8139363541046785, "grad_norm": 0.0, - "learning_rate": 1.7407939462993094e-06, - "loss": 0.8019, + "learning_rate": 1.761468238567623e-06, + "loss": 0.7417, "step": 28723 }, { - "epoch": 0.8150964812712826, + "epoch": 0.813964691546941, "grad_norm": 0.0, - "learning_rate": 1.7402758174276734e-06, - "loss": 0.9114, + "learning_rate": 1.7609480673121748e-06, + "loss": 0.7885, "step": 28724 }, { - "epoch": 0.8151248581157775, + "epoch": 0.8139930289892034, "grad_norm": 0.0, - "learning_rate": 1.739757758326297e-06, - "loss": 0.87, + "learning_rate": 1.760427965457321e-06, + "loss": 0.8878, "step": 28725 }, { - "epoch": 0.8151532349602724, + "epoch": 0.8140213664314659, "grad_norm": 0.0, - "learning_rate": 1.7392397689995578e-06, - "loss": 0.9003, + "learning_rate": 1.7599079330074443e-06, + "loss": 0.7871, "step": 28726 }, { - "epoch": 0.8151816118047673, + "epoch": 0.8140497038737283, "grad_norm": 0.0, - "learning_rate": 1.7387218494518331e-06, - "loss": 0.8405, + "learning_rate": 1.7593879699669202e-06, + "loss": 0.6866, "step": 28727 }, { - "epoch": 0.8152099886492622, + "epoch": 0.8140780413159908, "grad_norm": 0.0, - "learning_rate": 1.7382039996874934e-06, - "loss": 0.8478, + "learning_rate": 1.7588680763401333e-06, + "loss": 0.7098, "step": 28728 }, { - "epoch": 0.8152383654937571, + "epoch": 0.8141063787582533, "grad_norm": 0.0, - "learning_rate": 1.737686219710919e-06, - "loss": 0.7514, + "learning_rate": 1.7583482521314598e-06, + "loss": 0.7317, "step": 28729 }, { - "epoch": 0.815266742338252, + "epoch": 0.8141347162005157, "grad_norm": 0.0, - "learning_rate": 1.737168509526479e-06, - "loss": 0.7424, + "learning_rate": 1.7578284973452786e-06, + "loss": 0.7132, "step": 28730 }, { - "epoch": 0.8152951191827469, + "epoch": 0.8141630536427782, "grad_norm": 0.0, - "learning_rate": 1.7366508691385475e-06, - "loss": 0.7902, + "learning_rate": 1.7573088119859684e-06, + "loss": 0.7885, "step": 28731 }, { - "epoch": 0.8153234960272417, + "epoch": 0.8141913910850407, "grad_norm": 0.0, - "learning_rate": 1.7361332985515011e-06, - "loss": 0.803, + "learning_rate": 1.7567891960579075e-06, + "loss": 0.8171, "step": 28732 }, { - "epoch": 0.8153518728717367, + "epoch": 0.8142197285273032, "grad_norm": 0.0, - "learning_rate": 1.7356157977697052e-06, - "loss": 0.798, + "learning_rate": 1.756269649565472e-06, + "loss": 0.7173, "step": 28733 }, { - "epoch": 0.8153802497162316, + "epoch": 0.8142480659695656, "grad_norm": 0.0, - "learning_rate": 1.7350983667975342e-06, - "loss": 0.7361, + "learning_rate": 1.755750172513041e-06, + "loss": 0.7185, "step": 28734 }, { - "epoch": 0.8154086265607264, + "epoch": 0.814276403411828, "grad_norm": 0.0, - "learning_rate": 1.734581005639361e-06, - "loss": 0.8181, + "learning_rate": 1.755230764904985e-06, + "loss": 0.7858, "step": 28735 }, { - "epoch": 0.8154370034052213, + "epoch": 0.8143047408540905, "grad_norm": 0.0, - "learning_rate": 1.7340637142995508e-06, - "loss": 0.869, + "learning_rate": 1.7547114267456845e-06, + "loss": 0.7718, "step": 28736 }, { - "epoch": 0.8154653802497163, + "epoch": 0.8143330782963529, "grad_norm": 0.0, - "learning_rate": 1.7335464927824764e-06, - "loss": 0.7466, + "learning_rate": 1.7541921580395094e-06, + "loss": 0.6812, "step": 28737 }, { - "epoch": 0.8154937570942111, + "epoch": 0.8143614157386154, "grad_norm": 0.0, - "learning_rate": 1.7330293410925091e-06, - "loss": 0.8153, + "learning_rate": 1.7536729587908353e-06, + "loss": 0.7862, "step": 28738 }, { - "epoch": 0.815522133938706, + "epoch": 0.8143897531808779, "grad_norm": 0.0, - "learning_rate": 1.7325122592340115e-06, - "loss": 0.8198, + "learning_rate": 1.7531538290040384e-06, + "loss": 0.8655, "step": 28739 }, { - "epoch": 0.815550510783201, + "epoch": 0.8144180906231403, "grad_norm": 0.0, - "learning_rate": 1.7319952472113554e-06, - "loss": 0.8027, + "learning_rate": 1.7526347686834866e-06, + "loss": 0.8244, "step": 28740 }, { - "epoch": 0.8155788876276958, + "epoch": 0.8144464280654028, "grad_norm": 0.0, - "learning_rate": 1.7314783050289086e-06, - "loss": 0.6793, + "learning_rate": 1.752115777833555e-06, + "loss": 0.8151, "step": 28741 }, { - "epoch": 0.8156072644721907, + "epoch": 0.8144747655076653, "grad_norm": 0.0, - "learning_rate": 1.730961432691034e-06, - "loss": 0.7462, + "learning_rate": 1.7515968564586183e-06, + "loss": 0.7788, "step": 28742 }, { - "epoch": 0.8156356413166855, + "epoch": 0.8145031029499278, "grad_norm": 0.0, - "learning_rate": 1.7304446302021005e-06, - "loss": 0.8334, + "learning_rate": 1.7510780045630405e-06, + "loss": 0.7148, "step": 28743 }, { - "epoch": 0.8156640181611805, + "epoch": 0.8145314403921902, "grad_norm": 0.0, - "learning_rate": 1.7299278975664724e-06, - "loss": 0.7228, + "learning_rate": 1.750559222151197e-06, + "loss": 0.7869, "step": 28744 }, { - "epoch": 0.8156923950056754, + "epoch": 0.8145597778344527, "grad_norm": 0.0, - "learning_rate": 1.729411234788516e-06, - "loss": 0.6773, + "learning_rate": 1.7500405092274553e-06, + "loss": 0.7367, "step": 28745 }, { - "epoch": 0.8157207718501702, + "epoch": 0.8145881152767152, "grad_norm": 0.0, - "learning_rate": 1.728894641872596e-06, - "loss": 0.7586, + "learning_rate": 1.7495218657961866e-06, + "loss": 0.7662, "step": 28746 }, { - "epoch": 0.8157491486946652, + "epoch": 0.8146164527189775, "grad_norm": 0.0, - "learning_rate": 1.728378118823073e-06, - "loss": 0.8137, + "learning_rate": 1.7490032918617617e-06, + "loss": 0.9321, "step": 28747 }, { - "epoch": 0.81577752553916, + "epoch": 0.81464479016124, "grad_norm": 0.0, - "learning_rate": 1.7278616656443115e-06, - "loss": 0.7489, + "learning_rate": 1.7484847874285426e-06, + "loss": 0.8622, "step": 28748 }, { - "epoch": 0.8158059023836549, + "epoch": 0.8146731276035025, "grad_norm": 0.0, - "learning_rate": 1.7273452823406768e-06, - "loss": 0.7492, + "learning_rate": 1.747966352500904e-06, + "loss": 0.8482, "step": 28749 }, { - "epoch": 0.8158342792281499, + "epoch": 0.814701465045765, "grad_norm": 0.0, - "learning_rate": 1.7268289689165263e-06, - "loss": 0.8991, + "learning_rate": 1.7474479870832063e-06, + "loss": 0.769, "step": 28750 }, { - "epoch": 0.8158626560726447, + "epoch": 0.8147298024880274, "grad_norm": 0.0, - "learning_rate": 1.7263127253762234e-06, - "loss": 0.8115, + "learning_rate": 1.7469296911798172e-06, + "loss": 0.7684, "step": 28751 }, { - "epoch": 0.8158910329171396, + "epoch": 0.8147581399302899, "grad_norm": 0.0, - "learning_rate": 1.7257965517241316e-06, - "loss": 0.8674, + "learning_rate": 1.7464114647951081e-06, + "loss": 0.9321, "step": 28752 }, { - "epoch": 0.8159194097616345, + "epoch": 0.8147864773725524, "grad_norm": 0.0, - "learning_rate": 1.7252804479646046e-06, - "loss": 0.7159, + "learning_rate": 1.7458933079334374e-06, + "loss": 0.7725, "step": 28753 }, { - "epoch": 0.8159477866061294, + "epoch": 0.8148148148148148, "grad_norm": 0.0, - "learning_rate": 1.724764414102007e-06, - "loss": 0.7831, + "learning_rate": 1.7453752205991726e-06, + "loss": 0.8662, "step": 28754 }, { - "epoch": 0.8159761634506243, + "epoch": 0.8148431522570773, "grad_norm": 0.0, - "learning_rate": 1.7242484501406976e-06, - "loss": 0.8071, + "learning_rate": 1.7448572027966792e-06, + "loss": 0.751, "step": 28755 }, { - "epoch": 0.8160045402951192, + "epoch": 0.8148714896993398, "grad_norm": 0.0, - "learning_rate": 1.7237325560850316e-06, - "loss": 0.7764, + "learning_rate": 1.7443392545303172e-06, + "loss": 0.8787, "step": 28756 }, { - "epoch": 0.8160329171396141, + "epoch": 0.8148998271416023, "grad_norm": 0.0, - "learning_rate": 1.723216731939369e-06, - "loss": 0.8195, + "learning_rate": 1.743821375804451e-06, + "loss": 0.7598, "step": 28757 }, { - "epoch": 0.816061293984109, + "epoch": 0.8149281645838646, "grad_norm": 0.0, - "learning_rate": 1.7227009777080694e-06, - "loss": 0.6754, + "learning_rate": 1.7433035666234443e-06, + "loss": 0.7406, "step": 28758 }, { - "epoch": 0.8160896708286038, + "epoch": 0.8149565020261271, "grad_norm": 0.0, - "learning_rate": 1.7221852933954809e-06, - "loss": 0.7911, + "learning_rate": 1.7427858269916565e-06, + "loss": 0.7939, "step": 28759 }, { - "epoch": 0.8161180476730987, + "epoch": 0.8149848394683896, "grad_norm": 0.0, - "learning_rate": 1.7216696790059718e-06, - "loss": 0.8133, + "learning_rate": 1.7422681569134525e-06, + "loss": 0.7026, "step": 28760 }, { - "epoch": 0.8161464245175937, + "epoch": 0.815013176910652, "grad_norm": 0.0, - "learning_rate": 1.7211541345438864e-06, - "loss": 0.7724, + "learning_rate": 1.741750556393188e-06, + "loss": 0.7879, "step": 28761 }, { - "epoch": 0.8161748013620885, + "epoch": 0.8150415143529145, "grad_norm": 0.0, - "learning_rate": 1.7206386600135861e-06, - "loss": 0.7988, + "learning_rate": 1.741233025435225e-06, + "loss": 0.8437, "step": 28762 }, { - "epoch": 0.8162031782065834, + "epoch": 0.815069851795177, "grad_norm": 0.0, - "learning_rate": 1.7201232554194247e-06, - "loss": 0.8253, + "learning_rate": 1.7407155640439265e-06, + "loss": 0.8056, "step": 28763 }, { - "epoch": 0.8162315550510784, + "epoch": 0.8150981892374394, "grad_norm": 0.0, - "learning_rate": 1.7196079207657523e-06, - "loss": 0.9286, + "learning_rate": 1.7401981722236438e-06, + "loss": 0.7955, "step": 28764 }, { - "epoch": 0.8162599318955732, + "epoch": 0.8151265266797019, "grad_norm": 0.0, - "learning_rate": 1.7190926560569242e-06, - "loss": 0.8626, + "learning_rate": 1.7396808499787431e-06, + "loss": 0.8474, "step": 28765 }, { - "epoch": 0.8162883087400681, + "epoch": 0.8151548641219644, "grad_norm": 0.0, - "learning_rate": 1.718577461297295e-06, - "loss": 0.7686, + "learning_rate": 1.7391635973135746e-06, + "loss": 0.8122, "step": 28766 }, { - "epoch": 0.816316685584563, + "epoch": 0.8151832015642269, "grad_norm": 0.0, - "learning_rate": 1.7180623364912118e-06, - "loss": 0.8082, + "learning_rate": 1.7386464142324989e-06, + "loss": 0.8701, "step": 28767 }, { - "epoch": 0.8163450624290579, + "epoch": 0.8152115390064892, "grad_norm": 0.0, - "learning_rate": 1.7175472816430284e-06, - "loss": 0.8393, + "learning_rate": 1.7381293007398747e-06, + "loss": 0.7614, "step": 28768 }, { - "epoch": 0.8163734392735528, + "epoch": 0.8152398764487517, "grad_norm": 0.0, - "learning_rate": 1.7170322967570974e-06, - "loss": 0.7838, + "learning_rate": 1.7376122568400533e-06, + "loss": 0.6645, "step": 28769 }, { - "epoch": 0.8164018161180476, + "epoch": 0.8152682138910142, "grad_norm": 0.0, - "learning_rate": 1.7165173818377646e-06, - "loss": 0.7784, + "learning_rate": 1.737095282537391e-06, + "loss": 0.7663, "step": 28770 }, { - "epoch": 0.8164301929625426, + "epoch": 0.8152965513332766, "grad_norm": 0.0, - "learning_rate": 1.7160025368893817e-06, - "loss": 0.8133, + "learning_rate": 1.7365783778362443e-06, + "loss": 0.8135, "step": 28771 }, { - "epoch": 0.8164585698070375, + "epoch": 0.8153248887755391, "grad_norm": 0.0, - "learning_rate": 1.7154877619163013e-06, - "loss": 0.7725, + "learning_rate": 1.7360615427409667e-06, + "loss": 0.8066, "step": 28772 }, { - "epoch": 0.8164869466515323, + "epoch": 0.8153532262178016, "grad_norm": 0.0, - "learning_rate": 1.714973056922865e-06, - "loss": 0.8213, + "learning_rate": 1.735544777255913e-06, + "loss": 0.7372, "step": 28773 }, { - "epoch": 0.8165153234960273, + "epoch": 0.8153815636600641, "grad_norm": 0.0, - "learning_rate": 1.7144584219134207e-06, - "loss": 0.8406, + "learning_rate": 1.7350280813854326e-06, + "loss": 0.8259, "step": 28774 }, { - "epoch": 0.8165437003405221, + "epoch": 0.8154099011023265, "grad_norm": 0.0, - "learning_rate": 1.7139438568923239e-06, - "loss": 0.8962, + "learning_rate": 1.7345114551338793e-06, + "loss": 0.8454, "step": 28775 }, { - "epoch": 0.816572077185017, + "epoch": 0.815438238544589, "grad_norm": 0.0, - "learning_rate": 1.7134293618639142e-06, - "loss": 0.8947, + "learning_rate": 1.7339948985056087e-06, + "loss": 0.8066, "step": 28776 }, { - "epoch": 0.8166004540295119, + "epoch": 0.8154665759868515, "grad_norm": 0.0, - "learning_rate": 1.7129149368325382e-06, - "loss": 0.8894, + "learning_rate": 1.7334784115049663e-06, + "loss": 0.9094, "step": 28777 }, { - "epoch": 0.8166288308740068, + "epoch": 0.8154949134291138, "grad_norm": 0.0, - "learning_rate": 1.7124005818025447e-06, - "loss": 0.7708, + "learning_rate": 1.7329619941363062e-06, + "loss": 0.7922, "step": 28778 }, { - "epoch": 0.8166572077185017, + "epoch": 0.8155232508713763, "grad_norm": 0.0, - "learning_rate": 1.7118862967782745e-06, - "loss": 0.7422, + "learning_rate": 1.7324456464039751e-06, + "loss": 0.8397, "step": 28779 }, { - "epoch": 0.8166855845629966, + "epoch": 0.8155515883136388, "grad_norm": 0.0, - "learning_rate": 1.7113720817640723e-06, - "loss": 0.7889, + "learning_rate": 1.731929368312325e-06, + "loss": 0.8689, "step": 28780 }, { - "epoch": 0.8167139614074915, + "epoch": 0.8155799257559013, "grad_norm": 0.0, - "learning_rate": 1.7108579367642853e-06, - "loss": 0.9082, + "learning_rate": 1.7314131598657058e-06, + "loss": 0.8328, "step": 28781 }, { - "epoch": 0.8167423382519864, + "epoch": 0.8156082631981637, "grad_norm": 0.0, - "learning_rate": 1.7103438617832524e-06, - "loss": 0.8559, + "learning_rate": 1.7308970210684618e-06, + "loss": 0.8981, "step": 28782 }, { - "epoch": 0.8167707150964812, + "epoch": 0.8156366006404262, "grad_norm": 0.0, - "learning_rate": 1.709829856825317e-06, - "loss": 0.8068, + "learning_rate": 1.7303809519249426e-06, + "loss": 0.7619, "step": 28783 }, { - "epoch": 0.8167990919409762, + "epoch": 0.8156649380826887, "grad_norm": 0.0, - "learning_rate": 1.709315921894823e-06, - "loss": 0.6693, + "learning_rate": 1.7298649524394962e-06, + "loss": 0.6938, "step": 28784 }, { - "epoch": 0.8168274687854711, + "epoch": 0.8156932755249511, "grad_norm": 0.0, - "learning_rate": 1.7088020569961094e-06, - "loss": 0.8105, + "learning_rate": 1.7293490226164677e-06, + "loss": 0.7921, "step": 28785 }, { - "epoch": 0.8168558456299659, + "epoch": 0.8157216129672136, "grad_norm": 0.0, - "learning_rate": 1.7082882621335184e-06, - "loss": 0.7527, + "learning_rate": 1.7288331624602073e-06, + "loss": 0.697, "step": 28786 }, { - "epoch": 0.8168842224744608, + "epoch": 0.8157499504094761, "grad_norm": 0.0, - "learning_rate": 1.7077745373113874e-06, - "loss": 0.7924, + "learning_rate": 1.7283173719750523e-06, + "loss": 0.7467, "step": 28787 }, { - "epoch": 0.8169125993189558, + "epoch": 0.8157782878517384, "grad_norm": 0.0, - "learning_rate": 1.7072608825340576e-06, - "loss": 0.76, + "learning_rate": 1.7278016511653529e-06, + "loss": 0.8225, "step": 28788 }, { - "epoch": 0.8169409761634506, + "epoch": 0.8158066252940009, "grad_norm": 0.0, - "learning_rate": 1.7067472978058685e-06, - "loss": 0.889, + "learning_rate": 1.727286000035454e-06, + "loss": 0.8978, "step": 28789 }, { - "epoch": 0.8169693530079455, + "epoch": 0.8158349627362634, "grad_norm": 0.0, - "learning_rate": 1.706233783131157e-06, - "loss": 0.6926, + "learning_rate": 1.726770418589695e-06, + "loss": 0.8556, "step": 28790 }, { - "epoch": 0.8169977298524405, + "epoch": 0.8158633001785259, "grad_norm": 0.0, - "learning_rate": 1.7057203385142618e-06, - "loss": 0.8905, + "learning_rate": 1.7262549068324208e-06, + "loss": 0.8089, "step": 28791 }, { - "epoch": 0.8170261066969353, + "epoch": 0.8158916376207883, "grad_norm": 0.0, - "learning_rate": 1.7052069639595225e-06, - "loss": 0.833, + "learning_rate": 1.725739464767976e-06, + "loss": 0.7428, "step": 28792 }, { - "epoch": 0.8170544835414302, + "epoch": 0.8159199750630508, "grad_norm": 0.0, - "learning_rate": 1.7046936594712704e-06, - "loss": 0.8529, + "learning_rate": 1.725224092400698e-06, + "loss": 0.8214, "step": 28793 }, { - "epoch": 0.817082860385925, + "epoch": 0.8159483125053133, "grad_norm": 0.0, - "learning_rate": 1.7041804250538429e-06, - "loss": 0.779, + "learning_rate": 1.7247087897349334e-06, + "loss": 0.7379, "step": 28794 }, { - "epoch": 0.81711123723042, + "epoch": 0.8159766499475757, "grad_norm": 0.0, - "learning_rate": 1.70366726071158e-06, - "loss": 0.7727, + "learning_rate": 1.7241935567750157e-06, + "loss": 0.829, "step": 28795 }, { - "epoch": 0.8171396140749149, + "epoch": 0.8160049873898382, "grad_norm": 0.0, - "learning_rate": 1.7031541664488093e-06, - "loss": 0.8409, + "learning_rate": 1.7236783935252898e-06, + "loss": 0.7653, "step": 28796 }, { - "epoch": 0.8171679909194097, + "epoch": 0.8160333248321007, "grad_norm": 0.0, - "learning_rate": 1.7026411422698685e-06, - "loss": 0.8178, + "learning_rate": 1.7231632999900939e-06, + "loss": 0.7616, "step": 28797 }, { - "epoch": 0.8171963677639047, + "epoch": 0.8160616622743632, "grad_norm": 0.0, - "learning_rate": 1.702128188179094e-06, - "loss": 0.7861, + "learning_rate": 1.7226482761737674e-06, + "loss": 0.8571, "step": 28798 }, { - "epoch": 0.8172247446083996, + "epoch": 0.8160899997166255, "grad_norm": 0.0, - "learning_rate": 1.7016153041808125e-06, - "loss": 0.8607, + "learning_rate": 1.722133322080648e-06, + "loss": 0.8594, "step": 28799 }, { - "epoch": 0.8172531214528944, + "epoch": 0.816118337158888, "grad_norm": 0.0, - "learning_rate": 1.7011024902793604e-06, - "loss": 0.8848, + "learning_rate": 1.7216184377150758e-06, + "loss": 0.7679, "step": 28800 }, { - "epoch": 0.8172814982973893, + "epoch": 0.8161466746011505, "grad_norm": 0.0, - "learning_rate": 1.700589746479071e-06, - "loss": 0.8064, + "learning_rate": 1.7211036230813838e-06, + "loss": 0.8167, "step": 28801 }, { - "epoch": 0.8173098751418842, + "epoch": 0.8161750120434129, "grad_norm": 0.0, - "learning_rate": 1.7000770727842698e-06, - "loss": 0.7606, + "learning_rate": 1.720588878183912e-06, + "loss": 0.7751, "step": 28802 }, { - "epoch": 0.8173382519863791, + "epoch": 0.8162033494856754, "grad_norm": 0.0, - "learning_rate": 1.6995644691992907e-06, - "loss": 0.776, + "learning_rate": 1.7200742030269924e-06, + "loss": 0.8111, "step": 28803 }, { - "epoch": 0.817366628830874, + "epoch": 0.8162316869279379, "grad_norm": 0.0, - "learning_rate": 1.6990519357284674e-06, - "loss": 0.8018, + "learning_rate": 1.7195595976149615e-06, + "loss": 0.8432, "step": 28804 }, { - "epoch": 0.8173950056753689, + "epoch": 0.8162600243702004, "grad_norm": 0.0, - "learning_rate": 1.6985394723761194e-06, - "loss": 0.7912, + "learning_rate": 1.7190450619521593e-06, + "loss": 0.7854, "step": 28805 }, { - "epoch": 0.8174233825198638, + "epoch": 0.8162883618124628, "grad_norm": 0.0, - "learning_rate": 1.6980270791465868e-06, - "loss": 0.7783, + "learning_rate": 1.718530596042911e-06, + "loss": 0.8087, "step": 28806 }, { - "epoch": 0.8174517593643587, + "epoch": 0.8163166992547253, "grad_norm": 0.0, - "learning_rate": 1.6975147560441908e-06, - "loss": 0.666, + "learning_rate": 1.718016199891559e-06, + "loss": 0.9446, "step": 28807 }, { - "epoch": 0.8174801362088536, + "epoch": 0.8163450366969878, "grad_norm": 0.0, - "learning_rate": 1.6970025030732606e-06, - "loss": 0.8334, + "learning_rate": 1.7175018735024284e-06, + "loss": 0.7875, "step": 28808 }, { - "epoch": 0.8175085130533485, + "epoch": 0.8163733741392502, "grad_norm": 0.0, - "learning_rate": 1.6964903202381255e-06, - "loss": 0.7619, + "learning_rate": 1.7169876168798561e-06, + "loss": 0.7549, "step": 28809 }, { - "epoch": 0.8175368898978433, + "epoch": 0.8164017115815126, "grad_norm": 0.0, - "learning_rate": 1.6959782075431076e-06, - "loss": 0.8234, + "learning_rate": 1.7164734300281727e-06, + "loss": 0.7949, "step": 28810 }, { - "epoch": 0.8175652667423382, + "epoch": 0.8164300490237751, "grad_norm": 0.0, - "learning_rate": 1.6954661649925352e-06, - "loss": 0.7732, + "learning_rate": 1.7159593129517095e-06, + "loss": 0.8143, "step": 28811 }, { - "epoch": 0.8175936435868332, + "epoch": 0.8164583864660375, "grad_norm": 0.0, - "learning_rate": 1.6949541925907364e-06, - "loss": 0.8592, + "learning_rate": 1.715445265654796e-06, + "loss": 0.8415, "step": 28812 }, { - "epoch": 0.817622020431328, + "epoch": 0.8164867239083, "grad_norm": 0.0, - "learning_rate": 1.69444229034203e-06, - "loss": 0.836, + "learning_rate": 1.714931288141768e-06, + "loss": 0.7895, "step": 28813 }, { - "epoch": 0.8176503972758229, + "epoch": 0.8165150613505625, "grad_norm": 0.0, - "learning_rate": 1.693930458250742e-06, - "loss": 0.8157, + "learning_rate": 1.7144173804169462e-06, + "loss": 0.7766, "step": 28814 }, { - "epoch": 0.8176787741203179, + "epoch": 0.816543398792825, "grad_norm": 0.0, - "learning_rate": 1.6934186963212006e-06, - "loss": 0.8531, + "learning_rate": 1.7139035424846673e-06, + "loss": 0.9133, "step": 28815 }, { - "epoch": 0.8177071509648127, + "epoch": 0.8165717362350874, "grad_norm": 0.0, - "learning_rate": 1.6929070045577222e-06, - "loss": 0.8406, + "learning_rate": 1.7133897743492522e-06, + "loss": 0.8463, "step": 28816 }, { - "epoch": 0.8177355278093076, + "epoch": 0.8166000736773499, "grad_norm": 0.0, - "learning_rate": 1.692395382964631e-06, - "loss": 0.8104, + "learning_rate": 1.7128760760150331e-06, + "loss": 0.7711, "step": 28817 }, { - "epoch": 0.8177639046538024, + "epoch": 0.8166284111196124, "grad_norm": 0.0, - "learning_rate": 1.6918838315462528e-06, - "loss": 0.8188, + "learning_rate": 1.7123624474863377e-06, + "loss": 0.7261, "step": 28818 }, { - "epoch": 0.8177922814982974, + "epoch": 0.8166567485618748, "grad_norm": 0.0, - "learning_rate": 1.6913723503069024e-06, - "loss": 0.8039, + "learning_rate": 1.7118488887674889e-06, + "loss": 0.7805, "step": 28819 }, { - "epoch": 0.8178206583427923, + "epoch": 0.8166850860041373, "grad_norm": 0.0, - "learning_rate": 1.6908609392509035e-06, - "loss": 0.9014, + "learning_rate": 1.7113353998628147e-06, + "loss": 0.7932, "step": 28820 }, { - "epoch": 0.8178490351872871, + "epoch": 0.8167134234463997, "grad_norm": 0.0, - "learning_rate": 1.6903495983825756e-06, - "loss": 0.7941, + "learning_rate": 1.7108219807766436e-06, + "loss": 0.7666, "step": 28821 }, { - "epoch": 0.8178774120317821, + "epoch": 0.8167417608886622, "grad_norm": 0.0, - "learning_rate": 1.689838327706238e-06, - "loss": 0.8544, + "learning_rate": 1.7103086315132933e-06, + "loss": 0.853, "step": 28822 }, { - "epoch": 0.817905788876277, + "epoch": 0.8167700983309246, "grad_norm": 0.0, - "learning_rate": 1.6893271272262123e-06, - "loss": 0.8802, + "learning_rate": 1.7097953520770927e-06, + "loss": 0.7123, "step": 28823 }, { - "epoch": 0.8179341657207718, + "epoch": 0.8167984357731871, "grad_norm": 0.0, - "learning_rate": 1.688815996946812e-06, - "loss": 0.8101, + "learning_rate": 1.7092821424723637e-06, + "loss": 0.7651, "step": 28824 }, { - "epoch": 0.8179625425652668, + "epoch": 0.8168267732154496, "grad_norm": 0.0, - "learning_rate": 1.688304936872356e-06, - "loss": 0.8124, + "learning_rate": 1.7087690027034298e-06, + "loss": 0.7666, "step": 28825 }, { - "epoch": 0.8179909194097617, + "epoch": 0.816855110657712, "grad_norm": 0.0, - "learning_rate": 1.6877939470071648e-06, - "loss": 0.8255, + "learning_rate": 1.7082559327746161e-06, + "loss": 0.8024, "step": 28826 }, { - "epoch": 0.8180192962542565, + "epoch": 0.8168834480999745, "grad_norm": 0.0, - "learning_rate": 1.6872830273555485e-06, - "loss": 0.7982, + "learning_rate": 1.7077429326902396e-06, + "loss": 0.8077, "step": 28827 }, { - "epoch": 0.8180476730987514, + "epoch": 0.816911785542237, "grad_norm": 0.0, - "learning_rate": 1.686772177921826e-06, - "loss": 0.7743, + "learning_rate": 1.707230002454624e-06, + "loss": 0.8355, "step": 28828 }, { - "epoch": 0.8180760499432463, + "epoch": 0.8169401229844995, "grad_norm": 0.0, - "learning_rate": 1.6862613987103161e-06, - "loss": 0.8476, + "learning_rate": 1.7067171420720908e-06, + "loss": 0.8167, "step": 28829 }, { - "epoch": 0.8181044267877412, + "epoch": 0.8169684604267619, "grad_norm": 0.0, - "learning_rate": 1.6857506897253272e-06, - "loss": 0.7983, + "learning_rate": 1.7062043515469572e-06, + "loss": 0.8411, "step": 28830 }, { - "epoch": 0.8181328036322361, + "epoch": 0.8169967978690243, "grad_norm": 0.0, - "learning_rate": 1.6852400509711754e-06, - "loss": 0.7176, + "learning_rate": 1.7056916308835457e-06, + "loss": 0.8098, "step": 28831 }, { - "epoch": 0.818161180476731, + "epoch": 0.8170251353112868, "grad_norm": 0.0, - "learning_rate": 1.6847294824521777e-06, - "loss": 0.8805, + "learning_rate": 1.7051789800861729e-06, + "loss": 0.7156, "step": 28832 }, { - "epoch": 0.8181895573212259, + "epoch": 0.8170534727535492, "grad_norm": 0.0, - "learning_rate": 1.684218984172641e-06, - "loss": 0.8474, + "learning_rate": 1.7046663991591562e-06, + "loss": 0.7324, "step": 28833 }, { - "epoch": 0.8182179341657208, + "epoch": 0.8170818101958117, "grad_norm": 0.0, - "learning_rate": 1.6837085561368805e-06, - "loss": 0.7689, + "learning_rate": 1.7041538881068176e-06, + "loss": 0.86, "step": 28834 }, { - "epoch": 0.8182463110102156, + "epoch": 0.8171101476380742, "grad_norm": 0.0, - "learning_rate": 1.6831981983492074e-06, - "loss": 0.8593, + "learning_rate": 1.7036414469334694e-06, + "loss": 0.8351, "step": 28835 }, { - "epoch": 0.8182746878547106, + "epoch": 0.8171384850803366, "grad_norm": 0.0, - "learning_rate": 1.6826879108139338e-06, - "loss": 0.7733, + "learning_rate": 1.7031290756434293e-06, + "loss": 0.7685, "step": 28836 }, { - "epoch": 0.8183030646992054, + "epoch": 0.8171668225225991, "grad_norm": 0.0, - "learning_rate": 1.6821776935353706e-06, - "loss": 0.813, + "learning_rate": 1.7026167742410148e-06, + "loss": 0.8591, "step": 28837 }, { - "epoch": 0.8183314415437003, + "epoch": 0.8171951599648616, "grad_norm": 0.0, - "learning_rate": 1.6816675465178257e-06, - "loss": 0.9406, + "learning_rate": 1.7021045427305394e-06, + "loss": 0.8444, "step": 28838 }, { - "epoch": 0.8183598183881953, + "epoch": 0.8172234974071241, "grad_norm": 0.0, - "learning_rate": 1.6811574697656075e-06, - "loss": 0.8654, + "learning_rate": 1.7015923811163225e-06, + "loss": 0.8677, "step": 28839 }, { - "epoch": 0.8183881952326901, + "epoch": 0.8172518348493865, "grad_norm": 0.0, - "learning_rate": 1.6806474632830284e-06, - "loss": 0.7558, + "learning_rate": 1.7010802894026713e-06, + "loss": 0.8339, "step": 28840 }, { - "epoch": 0.818416572077185, + "epoch": 0.817280172291649, "grad_norm": 0.0, - "learning_rate": 1.6801375270743925e-06, - "loss": 0.7871, + "learning_rate": 1.7005682675939027e-06, + "loss": 0.8399, "step": 28841 }, { - "epoch": 0.81844494892168, + "epoch": 0.8173085097339114, "grad_norm": 0.0, - "learning_rate": 1.6796276611440088e-06, - "loss": 0.738, + "learning_rate": 1.7000563156943316e-06, + "loss": 0.7779, "step": 28842 }, { - "epoch": 0.8184733257661748, + "epoch": 0.8173368471761738, "grad_norm": 0.0, - "learning_rate": 1.679117865496186e-06, - "loss": 0.875, + "learning_rate": 1.6995444337082656e-06, + "loss": 0.9197, "step": 28843 }, { - "epoch": 0.8185017026106697, + "epoch": 0.8173651846184363, "grad_norm": 0.0, - "learning_rate": 1.6786081401352271e-06, - "loss": 0.6652, + "learning_rate": 1.6990326216400221e-06, + "loss": 0.7775, "step": 28844 }, { - "epoch": 0.8185300794551645, + "epoch": 0.8173935220606988, "grad_norm": 0.0, - "learning_rate": 1.6780984850654392e-06, - "loss": 0.8408, + "learning_rate": 1.6985208794939068e-06, + "loss": 0.7564, "step": 28845 }, { - "epoch": 0.8185584562996595, + "epoch": 0.8174218595029613, "grad_norm": 0.0, - "learning_rate": 1.6775889002911295e-06, - "loss": 0.8068, + "learning_rate": 1.6980092072742338e-06, + "loss": 0.7382, "step": 28846 }, { - "epoch": 0.8185868331441544, + "epoch": 0.8174501969452237, "grad_norm": 0.0, - "learning_rate": 1.6770793858165979e-06, - "loss": 0.9044, + "learning_rate": 1.697497604985313e-06, + "loss": 0.7688, "step": 28847 }, { - "epoch": 0.8186152099886492, + "epoch": 0.8174785343874862, "grad_norm": 0.0, - "learning_rate": 1.6765699416461511e-06, - "loss": 0.8088, + "learning_rate": 1.6969860726314513e-06, + "loss": 0.8311, "step": 28848 }, { - "epoch": 0.8186435868331442, + "epoch": 0.8175068718297487, "grad_norm": 0.0, - "learning_rate": 1.6760605677840947e-06, - "loss": 0.7766, + "learning_rate": 1.6964746102169582e-06, + "loss": 0.7989, "step": 28849 }, { - "epoch": 0.8186719636776391, + "epoch": 0.8175352092720111, "grad_norm": 0.0, - "learning_rate": 1.675551264234726e-06, - "loss": 0.7191, + "learning_rate": 1.6959632177461437e-06, + "loss": 0.7817, "step": 28850 }, { - "epoch": 0.8187003405221339, + "epoch": 0.8175635467142736, "grad_norm": 0.0, - "learning_rate": 1.6750420310023507e-06, - "loss": 0.8225, + "learning_rate": 1.6954518952233145e-06, + "loss": 0.852, "step": 28851 }, { - "epoch": 0.8187287173666288, + "epoch": 0.817591884156536, "grad_norm": 0.0, - "learning_rate": 1.6745328680912697e-06, - "loss": 0.8265, + "learning_rate": 1.6949406426527815e-06, + "loss": 0.7379, "step": 28852 }, { - "epoch": 0.8187570942111237, + "epoch": 0.8176202215987985, "grad_norm": 0.0, - "learning_rate": 1.674023775505783e-06, - "loss": 0.9151, + "learning_rate": 1.6944294600388434e-06, + "loss": 0.8092, "step": 28853 }, { - "epoch": 0.8187854710556186, + "epoch": 0.8176485590410609, "grad_norm": 0.0, - "learning_rate": 1.6735147532501949e-06, - "loss": 0.8334, + "learning_rate": 1.6939183473858101e-06, + "loss": 0.8419, "step": 28854 }, { - "epoch": 0.8188138479001135, + "epoch": 0.8176768964833234, "grad_norm": 0.0, - "learning_rate": 1.6730058013287986e-06, - "loss": 0.83, + "learning_rate": 1.6934073046979905e-06, + "loss": 0.6861, "step": 28855 }, { - "epoch": 0.8188422247446084, + "epoch": 0.8177052339255859, "grad_norm": 0.0, - "learning_rate": 1.6724969197458972e-06, - "loss": 0.7891, + "learning_rate": 1.692896331979682e-06, + "loss": 0.8245, "step": 28856 }, { - "epoch": 0.8188706015891033, + "epoch": 0.8177335713678483, "grad_norm": 0.0, - "learning_rate": 1.671988108505792e-06, - "loss": 0.9021, + "learning_rate": 1.6923854292351938e-06, + "loss": 0.6965, "step": 28857 }, { - "epoch": 0.8188989784335982, + "epoch": 0.8177619088101108, "grad_norm": 0.0, - "learning_rate": 1.671479367612774e-06, - "loss": 0.8219, + "learning_rate": 1.69187459646883e-06, + "loss": 0.8604, "step": 28858 }, { - "epoch": 0.8189273552780931, + "epoch": 0.8177902462523733, "grad_norm": 0.0, - "learning_rate": 1.6709706970711447e-06, - "loss": 0.7821, + "learning_rate": 1.6913638336848892e-06, + "loss": 0.7567, "step": 28859 }, { - "epoch": 0.818955732122588, + "epoch": 0.8178185836946357, "grad_norm": 0.0, - "learning_rate": 1.6704620968852027e-06, - "loss": 0.6814, + "learning_rate": 1.6908531408876783e-06, + "loss": 0.9297, "step": 28860 }, { - "epoch": 0.8189841089670828, + "epoch": 0.8178469211368982, "grad_norm": 0.0, - "learning_rate": 1.6699535670592393e-06, - "loss": 0.7544, + "learning_rate": 1.6903425180814947e-06, + "loss": 0.8952, "step": 28861 }, { - "epoch": 0.8190124858115777, + "epoch": 0.8178752585791607, "grad_norm": 0.0, - "learning_rate": 1.6694451075975526e-06, - "loss": 0.7027, + "learning_rate": 1.6898319652706418e-06, + "loss": 0.8722, "step": 28862 }, { - "epoch": 0.8190408626560727, + "epoch": 0.8179035960214232, "grad_norm": 0.0, - "learning_rate": 1.6689367185044393e-06, - "loss": 0.8777, + "learning_rate": 1.6893214824594195e-06, + "loss": 0.6927, "step": 28863 }, { - "epoch": 0.8190692395005675, + "epoch": 0.8179319334636855, "grad_norm": 0.0, - "learning_rate": 1.6684283997841899e-06, - "loss": 0.726, + "learning_rate": 1.6888110696521299e-06, + "loss": 0.8244, "step": 28864 }, { - "epoch": 0.8190976163450624, + "epoch": 0.817960270905948, "grad_norm": 0.0, - "learning_rate": 1.6679201514411014e-06, - "loss": 0.6855, + "learning_rate": 1.6883007268530726e-06, + "loss": 0.7656, "step": 28865 }, { - "epoch": 0.8191259931895574, + "epoch": 0.8179886083482105, "grad_norm": 0.0, - "learning_rate": 1.6674119734794647e-06, - "loss": 0.8708, + "learning_rate": 1.6877904540665423e-06, + "loss": 0.8033, "step": 28866 }, { - "epoch": 0.8191543700340522, + "epoch": 0.8180169457904729, "grad_norm": 0.0, - "learning_rate": 1.6669038659035741e-06, - "loss": 0.7579, + "learning_rate": 1.6872802512968388e-06, + "loss": 0.7538, "step": 28867 }, { - "epoch": 0.8191827468785471, + "epoch": 0.8180452832327354, "grad_norm": 0.0, - "learning_rate": 1.666395828717724e-06, - "loss": 0.8847, + "learning_rate": 1.6867701185482644e-06, + "loss": 0.8095, "step": 28868 }, { - "epoch": 0.819211123723042, + "epoch": 0.8180736206749979, "grad_norm": 0.0, - "learning_rate": 1.6658878619261997e-06, - "loss": 0.8089, + "learning_rate": 1.6862600558251097e-06, + "loss": 0.7707, "step": 28869 }, { - "epoch": 0.8192395005675369, + "epoch": 0.8181019581172604, "grad_norm": 0.0, - "learning_rate": 1.6653799655332958e-06, - "loss": 0.8087, + "learning_rate": 1.6857500631316726e-06, + "loss": 0.7555, "step": 28870 }, { - "epoch": 0.8192678774120318, + "epoch": 0.8181302955595228, "grad_norm": 0.0, - "learning_rate": 1.6648721395433043e-06, - "loss": 0.8572, + "learning_rate": 1.685240140472254e-06, + "loss": 0.7682, "step": 28871 }, { - "epoch": 0.8192962542565266, + "epoch": 0.8181586330017853, "grad_norm": 0.0, - "learning_rate": 1.6643643839605106e-06, - "loss": 0.7725, + "learning_rate": 1.6847302878511417e-06, + "loss": 0.8995, "step": 28872 }, { - "epoch": 0.8193246311010216, + "epoch": 0.8181869704440478, "grad_norm": 0.0, - "learning_rate": 1.6638566987892057e-06, - "loss": 0.7593, + "learning_rate": 1.6842205052726346e-06, + "loss": 0.7812, "step": 28873 }, { - "epoch": 0.8193530079455165, + "epoch": 0.8182153078863101, "grad_norm": 0.0, - "learning_rate": 1.6633490840336797e-06, - "loss": 0.7855, + "learning_rate": 1.6837107927410268e-06, + "loss": 0.9042, "step": 28874 }, { - "epoch": 0.8193813847900113, + "epoch": 0.8182436453285726, "grad_norm": 0.0, - "learning_rate": 1.6628415396982167e-06, - "loss": 0.8302, + "learning_rate": 1.6832011502606127e-06, + "loss": 0.6829, "step": 28875 }, { - "epoch": 0.8194097616345063, + "epoch": 0.8182719827708351, "grad_norm": 0.0, - "learning_rate": 1.6623340657871057e-06, - "loss": 0.8619, + "learning_rate": 1.682691577835681e-06, + "loss": 0.7726, "step": 28876 }, { - "epoch": 0.8194381384790012, + "epoch": 0.8183003202130976, "grad_norm": 0.0, - "learning_rate": 1.6618266623046363e-06, - "loss": 0.7785, + "learning_rate": 1.6821820754705275e-06, + "loss": 0.8498, "step": 28877 }, { - "epoch": 0.819466515323496, + "epoch": 0.81832865765536, "grad_norm": 0.0, - "learning_rate": 1.6613193292550888e-06, - "loss": 0.7882, + "learning_rate": 1.681672643169443e-06, + "loss": 0.9044, "step": 28878 }, { - "epoch": 0.8194948921679909, + "epoch": 0.8183569950976225, "grad_norm": 0.0, - "learning_rate": 1.660812066642753e-06, - "loss": 0.8967, + "learning_rate": 1.6811632809367207e-06, + "loss": 0.8179, "step": 28879 }, { - "epoch": 0.8195232690124858, + "epoch": 0.818385332539885, "grad_norm": 0.0, - "learning_rate": 1.6603048744719141e-06, - "loss": 0.8016, + "learning_rate": 1.680653988776647e-06, + "loss": 0.8602, "step": 28880 }, { - "epoch": 0.8195516458569807, + "epoch": 0.8184136699821474, "grad_norm": 0.0, - "learning_rate": 1.65979775274685e-06, - "loss": 0.758, + "learning_rate": 1.6801447666935167e-06, + "loss": 0.811, "step": 28881 }, { - "epoch": 0.8195800227014756, + "epoch": 0.8184420074244099, "grad_norm": 0.0, - "learning_rate": 1.6592907014718518e-06, - "loss": 0.7207, + "learning_rate": 1.6796356146916149e-06, + "loss": 0.8054, "step": 28882 }, { - "epoch": 0.8196083995459705, + "epoch": 0.8184703448666724, "grad_norm": 0.0, - "learning_rate": 1.6587837206512025e-06, - "loss": 0.9282, + "learning_rate": 1.6791265327752304e-06, + "loss": 0.9067, "step": 28883 }, { - "epoch": 0.8196367763904654, + "epoch": 0.8184986823089347, "grad_norm": 0.0, - "learning_rate": 1.65827681028918e-06, - "loss": 0.8029, + "learning_rate": 1.6786175209486565e-06, + "loss": 0.8227, "step": 28884 }, { - "epoch": 0.8196651532349603, + "epoch": 0.8185270197511972, "grad_norm": 0.0, - "learning_rate": 1.6577699703900686e-06, - "loss": 0.8986, + "learning_rate": 1.6781085792161744e-06, + "loss": 0.7273, "step": 28885 }, { - "epoch": 0.8196935300794551, + "epoch": 0.8185553571934597, "grad_norm": 0.0, - "learning_rate": 1.6572632009581502e-06, - "loss": 0.8625, + "learning_rate": 1.6775997075820738e-06, + "loss": 0.7832, "step": 28886 }, { - "epoch": 0.8197219069239501, + "epoch": 0.8185836946357222, "grad_norm": 0.0, - "learning_rate": 1.6567565019977028e-06, - "loss": 0.7989, + "learning_rate": 1.6770909060506412e-06, + "loss": 0.8344, "step": 28887 }, { - "epoch": 0.8197502837684449, + "epoch": 0.8186120320779846, "grad_norm": 0.0, - "learning_rate": 1.6562498735130084e-06, - "loss": 0.8698, + "learning_rate": 1.6765821746261635e-06, + "loss": 0.6688, "step": 28888 }, { - "epoch": 0.8197786606129398, + "epoch": 0.8186403695202471, "grad_norm": 0.0, - "learning_rate": 1.6557433155083491e-06, - "loss": 0.8143, + "learning_rate": 1.6760735133129269e-06, + "loss": 0.7344, "step": 28889 }, { - "epoch": 0.8198070374574348, + "epoch": 0.8186687069625096, "grad_norm": 0.0, - "learning_rate": 1.6552368279879982e-06, - "loss": 0.8221, + "learning_rate": 1.6755649221152114e-06, + "loss": 0.8146, "step": 28890 }, { - "epoch": 0.8198354143019296, + "epoch": 0.818697044404772, "grad_norm": 0.0, - "learning_rate": 1.654730410956238e-06, - "loss": 0.8582, + "learning_rate": 1.6750564010373037e-06, + "loss": 0.8795, "step": 28891 }, { - "epoch": 0.8198637911464245, + "epoch": 0.8187253818470345, "grad_norm": 0.0, - "learning_rate": 1.654224064417347e-06, - "loss": 0.7686, + "learning_rate": 1.6745479500834904e-06, + "loss": 0.8722, "step": 28892 }, { - "epoch": 0.8198921679909195, + "epoch": 0.818753719289297, "grad_norm": 0.0, - "learning_rate": 1.6537177883755974e-06, - "loss": 0.735, + "learning_rate": 1.674039569258048e-06, + "loss": 0.8116, "step": 28893 }, { - "epoch": 0.8199205448354143, + "epoch": 0.8187820567315595, "grad_norm": 0.0, - "learning_rate": 1.6532115828352712e-06, - "loss": 0.802, + "learning_rate": 1.6735312585652651e-06, + "loss": 0.7826, "step": 28894 }, { - "epoch": 0.8199489216799092, + "epoch": 0.8188103941738218, "grad_norm": 0.0, - "learning_rate": 1.65270544780064e-06, - "loss": 0.8196, + "learning_rate": 1.6730230180094188e-06, + "loss": 0.6834, "step": 28895 }, { - "epoch": 0.819977298524404, + "epoch": 0.8188387316160843, "grad_norm": 0.0, - "learning_rate": 1.652199383275982e-06, - "loss": 0.8208, + "learning_rate": 1.6725148475947906e-06, + "loss": 0.7899, "step": 28896 }, { - "epoch": 0.820005675368899, + "epoch": 0.8188670690583468, "grad_norm": 0.0, - "learning_rate": 1.6516933892655694e-06, - "loss": 0.8264, + "learning_rate": 1.6720067473256641e-06, + "loss": 0.7606, "step": 28897 }, { - "epoch": 0.8200340522133939, + "epoch": 0.8188954065006092, "grad_norm": 0.0, - "learning_rate": 1.6511874657736792e-06, - "loss": 0.8351, + "learning_rate": 1.6714987172063157e-06, + "loss": 0.8151, "step": 28898 }, { - "epoch": 0.8200624290578887, + "epoch": 0.8189237439428717, "grad_norm": 0.0, - "learning_rate": 1.6506816128045832e-06, - "loss": 0.6967, + "learning_rate": 1.6709907572410266e-06, + "loss": 0.833, "step": 28899 }, { - "epoch": 0.8200908059023837, + "epoch": 0.8189520813851342, "grad_norm": 0.0, - "learning_rate": 1.6501758303625581e-06, - "loss": 0.7883, + "learning_rate": 1.6704828674340745e-06, + "loss": 0.8111, "step": 28900 }, { - "epoch": 0.8201191827468786, + "epoch": 0.8189804188273967, "grad_norm": 0.0, - "learning_rate": 1.6496701184518704e-06, - "loss": 0.8074, + "learning_rate": 1.6699750477897391e-06, + "loss": 0.7948, "step": 28901 }, { - "epoch": 0.8201475595913734, + "epoch": 0.8190087562696591, "grad_norm": 0.0, - "learning_rate": 1.6491644770767946e-06, - "loss": 0.8628, + "learning_rate": 1.6694672983122983e-06, + "loss": 0.7555, "step": 28902 }, { - "epoch": 0.8201759364358683, + "epoch": 0.8190370937119216, "grad_norm": 0.0, - "learning_rate": 1.6486589062416037e-06, - "loss": 0.848, + "learning_rate": 1.6689596190060264e-06, + "loss": 0.7919, "step": 28903 }, { - "epoch": 0.8202043132803633, + "epoch": 0.8190654311541841, "grad_norm": 0.0, - "learning_rate": 1.6481534059505643e-06, - "loss": 0.8073, + "learning_rate": 1.668452009875201e-06, + "loss": 0.7738, "step": 28904 }, { - "epoch": 0.8202326901248581, + "epoch": 0.8190937685964464, "grad_norm": 0.0, - "learning_rate": 1.647647976207949e-06, - "loss": 0.7482, + "learning_rate": 1.667944470924101e-06, + "loss": 0.8129, "step": 28905 }, { - "epoch": 0.820261066969353, + "epoch": 0.8191221060387089, "grad_norm": 0.0, - "learning_rate": 1.6471426170180282e-06, - "loss": 0.8673, + "learning_rate": 1.6674370021569964e-06, + "loss": 0.8638, "step": 28906 }, { - "epoch": 0.8202894438138479, + "epoch": 0.8191504434809714, "grad_norm": 0.0, - "learning_rate": 1.6466373283850679e-06, - "loss": 0.8219, + "learning_rate": 1.6669296035781635e-06, + "loss": 0.6804, "step": 28907 }, { - "epoch": 0.8203178206583428, + "epoch": 0.8191787809232338, "grad_norm": 0.0, - "learning_rate": 1.6461321103133366e-06, - "loss": 0.7483, + "learning_rate": 1.6664222751918801e-06, + "loss": 0.8799, "step": 28908 }, { - "epoch": 0.8203461975028377, + "epoch": 0.8192071183654963, "grad_norm": 0.0, - "learning_rate": 1.6456269628071052e-06, - "loss": 0.8094, + "learning_rate": 1.665915017002414e-06, + "loss": 0.8247, "step": 28909 }, { - "epoch": 0.8203745743473326, + "epoch": 0.8192354558077588, "grad_norm": 0.0, - "learning_rate": 1.6451218858706374e-06, - "loss": 0.8767, + "learning_rate": 1.6654078290140418e-06, + "loss": 0.9199, "step": 28910 }, { - "epoch": 0.8204029511918275, + "epoch": 0.8192637932500213, "grad_norm": 0.0, - "learning_rate": 1.6446168795081997e-06, - "loss": 0.7249, + "learning_rate": 1.6649007112310334e-06, + "loss": 0.8698, "step": 28911 }, { - "epoch": 0.8204313280363223, + "epoch": 0.8192921306922837, "grad_norm": 0.0, - "learning_rate": 1.6441119437240582e-06, - "loss": 0.9157, + "learning_rate": 1.6643936636576608e-06, + "loss": 0.7946, "step": 28912 }, { - "epoch": 0.8204597048808172, + "epoch": 0.8193204681345462, "grad_norm": 0.0, - "learning_rate": 1.64360707852248e-06, - "loss": 0.8975, + "learning_rate": 1.663886686298196e-06, + "loss": 0.8002, "step": 28913 }, { - "epoch": 0.8204880817253122, + "epoch": 0.8193488055768087, "grad_norm": 0.0, - "learning_rate": 1.6431022839077293e-06, - "loss": 0.8563, + "learning_rate": 1.6633797791569085e-06, + "loss": 0.7948, "step": 28914 }, { - "epoch": 0.820516458569807, + "epoch": 0.819377143019071, "grad_norm": 0.0, - "learning_rate": 1.6425975598840683e-06, - "loss": 0.8034, + "learning_rate": 1.6628729422380695e-06, + "loss": 0.7813, "step": 28915 }, { - "epoch": 0.8205448354143019, + "epoch": 0.8194054804613335, "grad_norm": 0.0, - "learning_rate": 1.6420929064557611e-06, - "loss": 0.7115, + "learning_rate": 1.6623661755459498e-06, + "loss": 0.7233, "step": 28916 }, { - "epoch": 0.8205732122587969, + "epoch": 0.819433817903596, "grad_norm": 0.0, - "learning_rate": 1.6415883236270724e-06, - "loss": 0.7402, + "learning_rate": 1.6618594790848143e-06, + "loss": 0.8357, "step": 28917 }, { - "epoch": 0.8206015891032917, + "epoch": 0.8194621553458585, "grad_norm": 0.0, - "learning_rate": 1.641083811402262e-06, - "loss": 0.8365, + "learning_rate": 1.661352852858934e-06, + "loss": 0.7788, "step": 28918 }, { - "epoch": 0.8206299659477866, + "epoch": 0.8194904927881209, "grad_norm": 0.0, - "learning_rate": 1.640579369785591e-06, - "loss": 0.9041, + "learning_rate": 1.6608462968725736e-06, + "loss": 0.8457, "step": 28919 }, { - "epoch": 0.8206583427922814, + "epoch": 0.8195188302303834, "grad_norm": 0.0, - "learning_rate": 1.6400749987813247e-06, - "loss": 0.8162, + "learning_rate": 1.6603398111300006e-06, + "loss": 0.8006, "step": 28920 }, { - "epoch": 0.8206867196367764, + "epoch": 0.8195471676726459, "grad_norm": 0.0, - "learning_rate": 1.6395706983937186e-06, - "loss": 0.8105, + "learning_rate": 1.659833395635485e-06, + "loss": 0.8231, "step": 28921 }, { - "epoch": 0.8207150964812713, + "epoch": 0.8195755051149083, "grad_norm": 0.0, - "learning_rate": 1.6390664686270342e-06, - "loss": 0.9146, + "learning_rate": 1.6593270503932867e-06, + "loss": 0.7596, "step": 28922 }, { - "epoch": 0.8207434733257661, + "epoch": 0.8196038425571708, "grad_norm": 0.0, - "learning_rate": 1.6385623094855341e-06, - "loss": 0.9146, + "learning_rate": 1.658820775407678e-06, + "loss": 0.7516, "step": 28923 }, { - "epoch": 0.8207718501702611, + "epoch": 0.8196321799994333, "grad_norm": 0.0, - "learning_rate": 1.6380582209734707e-06, - "loss": 0.7717, + "learning_rate": 1.6583145706829152e-06, + "loss": 0.804, "step": 28924 }, { - "epoch": 0.820800227014756, + "epoch": 0.8196605174416957, "grad_norm": 0.0, - "learning_rate": 1.6375542030951063e-06, - "loss": 0.7461, + "learning_rate": 1.6578084362232672e-06, + "loss": 0.8311, "step": 28925 }, { - "epoch": 0.8208286038592508, + "epoch": 0.8196888548839582, "grad_norm": 0.0, - "learning_rate": 1.6370502558546997e-06, - "loss": 0.8097, + "learning_rate": 1.657302372032996e-06, + "loss": 0.7819, "step": 28926 }, { - "epoch": 0.8208569807037458, + "epoch": 0.8197171923262206, "grad_norm": 0.0, - "learning_rate": 1.6365463792565005e-06, - "loss": 0.8799, + "learning_rate": 1.6567963781163653e-06, + "loss": 0.8555, "step": 28927 }, { - "epoch": 0.8208853575482407, + "epoch": 0.8197455297684831, "grad_norm": 0.0, - "learning_rate": 1.6360425733047758e-06, - "loss": 0.8374, + "learning_rate": 1.656290454477637e-06, + "loss": 0.7642, "step": 28928 }, { - "epoch": 0.8209137343927355, + "epoch": 0.8197738672107455, "grad_norm": 0.0, - "learning_rate": 1.6355388380037717e-06, - "loss": 0.7515, + "learning_rate": 1.6557846011210753e-06, + "loss": 0.7817, "step": 28929 }, { - "epoch": 0.8209421112372304, + "epoch": 0.819802204653008, "grad_norm": 0.0, - "learning_rate": 1.6350351733577474e-06, - "loss": 0.8645, + "learning_rate": 1.655278818050936e-06, + "loss": 0.8105, "step": 28930 }, { - "epoch": 0.8209704880817253, + "epoch": 0.8198305420952705, "grad_norm": 0.0, - "learning_rate": 1.6345315793709592e-06, - "loss": 0.7822, + "learning_rate": 1.6547731052714834e-06, + "loss": 0.7722, "step": 28931 }, { - "epoch": 0.8209988649262202, + "epoch": 0.8198588795375329, "grad_norm": 0.0, - "learning_rate": 1.6340280560476573e-06, - "loss": 0.8202, + "learning_rate": 1.6542674627869738e-06, + "loss": 0.8116, "step": 28932 }, { - "epoch": 0.8210272417707151, + "epoch": 0.8198872169797954, "grad_norm": 0.0, - "learning_rate": 1.6335246033920959e-06, - "loss": 0.8544, + "learning_rate": 1.6537618906016695e-06, + "loss": 0.7141, "step": 28933 }, { - "epoch": 0.82105561861521, + "epoch": 0.8199155544220579, "grad_norm": 0.0, - "learning_rate": 1.6330212214085306e-06, - "loss": 0.8014, + "learning_rate": 1.6532563887198295e-06, + "loss": 0.8387, "step": 28934 }, { - "epoch": 0.8210839954597049, + "epoch": 0.8199438918643204, "grad_norm": 0.0, - "learning_rate": 1.632517910101209e-06, - "loss": 0.8298, + "learning_rate": 1.6527509571457078e-06, + "loss": 0.8105, "step": 28935 }, { - "epoch": 0.8211123723041998, + "epoch": 0.8199722293065828, "grad_norm": 0.0, - "learning_rate": 1.6320146694743844e-06, - "loss": 0.9295, + "learning_rate": 1.6522455958835648e-06, + "loss": 0.8741, "step": 28936 }, { - "epoch": 0.8211407491486946, + "epoch": 0.8200005667488452, "grad_norm": 0.0, - "learning_rate": 1.6315114995323111e-06, - "loss": 0.7741, + "learning_rate": 1.65174030493766e-06, + "loss": 0.8626, "step": 28937 }, { - "epoch": 0.8211691259931896, + "epoch": 0.8200289041911077, "grad_norm": 0.0, - "learning_rate": 1.6310084002792336e-06, - "loss": 0.767, + "learning_rate": 1.6512350843122438e-06, + "loss": 0.7509, "step": 28938 }, { - "epoch": 0.8211975028376844, + "epoch": 0.8200572416333701, "grad_norm": 0.0, - "learning_rate": 1.6305053717194041e-06, - "loss": 0.7537, + "learning_rate": 1.6507299340115746e-06, + "loss": 0.7776, "step": 28939 }, { - "epoch": 0.8212258796821793, + "epoch": 0.8200855790756326, "grad_norm": 0.0, - "learning_rate": 1.6300024138570748e-06, - "loss": 0.794, + "learning_rate": 1.650224854039907e-06, + "loss": 0.8747, "step": 28940 }, { - "epoch": 0.8212542565266743, + "epoch": 0.8201139165178951, "grad_norm": 0.0, - "learning_rate": 1.6294995266964885e-06, - "loss": 0.7827, + "learning_rate": 1.6497198444014973e-06, + "loss": 0.7309, "step": 28941 }, { - "epoch": 0.8212826333711691, + "epoch": 0.8201422539601576, "grad_norm": 0.0, - "learning_rate": 1.6289967102418969e-06, - "loss": 0.847, + "learning_rate": 1.6492149051006002e-06, + "loss": 0.7699, "step": 28942 }, { - "epoch": 0.821311010215664, + "epoch": 0.82017059140242, "grad_norm": 0.0, - "learning_rate": 1.6284939644975451e-06, - "loss": 0.8771, + "learning_rate": 1.6487100361414642e-06, + "loss": 0.8561, "step": 28943 }, { - "epoch": 0.821339387060159, + "epoch": 0.8201989288446825, "grad_norm": 0.0, - "learning_rate": 1.627991289467682e-06, - "loss": 0.8144, + "learning_rate": 1.6482052375283442e-06, + "loss": 0.9211, "step": 28944 }, { - "epoch": 0.8213677639046538, + "epoch": 0.820227266286945, "grad_norm": 0.0, - "learning_rate": 1.6274886851565552e-06, - "loss": 0.872, + "learning_rate": 1.6477005092654963e-06, + "loss": 0.8943, "step": 28945 }, { - "epoch": 0.8213961407491487, + "epoch": 0.8202556037292074, "grad_norm": 0.0, - "learning_rate": 1.626986151568405e-06, - "loss": 0.8341, + "learning_rate": 1.6471958513571662e-06, + "loss": 0.8937, "step": 28946 }, { - "epoch": 0.8214245175936435, + "epoch": 0.8202839411714699, "grad_norm": 0.0, - "learning_rate": 1.6264836887074797e-06, - "loss": 0.838, + "learning_rate": 1.6466912638076082e-06, + "loss": 0.8901, "step": 28947 }, { - "epoch": 0.8214528944381385, + "epoch": 0.8203122786137323, "grad_norm": 0.0, - "learning_rate": 1.6259812965780253e-06, - "loss": 0.9161, + "learning_rate": 1.6461867466210702e-06, + "loss": 0.7475, "step": 28948 }, { - "epoch": 0.8214812712826334, + "epoch": 0.8203406160559947, "grad_norm": 0.0, - "learning_rate": 1.6254789751842803e-06, - "loss": 0.8172, + "learning_rate": 1.645682299801804e-06, + "loss": 0.7041, "step": 28949 }, { - "epoch": 0.8215096481271282, + "epoch": 0.8203689534982572, "grad_norm": 0.0, - "learning_rate": 1.6249767245304914e-06, - "loss": 0.7733, + "learning_rate": 1.64517792335406e-06, + "loss": 0.6994, "step": 28950 }, { - "epoch": 0.8215380249716232, + "epoch": 0.8203972909405197, "grad_norm": 0.0, - "learning_rate": 1.6244745446209032e-06, - "loss": 0.7946, + "learning_rate": 1.644673617282082e-06, + "loss": 0.8054, "step": 28951 }, { - "epoch": 0.8215664018161181, + "epoch": 0.8204256283827822, "grad_norm": 0.0, - "learning_rate": 1.623972435459752e-06, - "loss": 0.8065, + "learning_rate": 1.6441693815901205e-06, + "loss": 0.7959, "step": 28952 }, { - "epoch": 0.8215947786606129, + "epoch": 0.8204539658250446, "grad_norm": 0.0, - "learning_rate": 1.6234703970512822e-06, - "loss": 0.7663, + "learning_rate": 1.6436652162824241e-06, + "loss": 0.7537, "step": 28953 }, { - "epoch": 0.8216231555051078, + "epoch": 0.8204823032673071, "grad_norm": 0.0, - "learning_rate": 1.6229684293997372e-06, - "loss": 0.7116, + "learning_rate": 1.6431611213632381e-06, + "loss": 0.9058, "step": 28954 }, { - "epoch": 0.8216515323496028, + "epoch": 0.8205106407095696, "grad_norm": 0.0, - "learning_rate": 1.6224665325093525e-06, - "loss": 0.8336, + "learning_rate": 1.6426570968368116e-06, + "loss": 0.7818, "step": 28955 }, { - "epoch": 0.8216799091940976, + "epoch": 0.820538978151832, "grad_norm": 0.0, - "learning_rate": 1.6219647063843691e-06, - "loss": 0.8703, + "learning_rate": 1.6421531427073856e-06, + "loss": 0.7891, "step": 28956 }, { - "epoch": 0.8217082860385925, + "epoch": 0.8205673155940945, "grad_norm": 0.0, - "learning_rate": 1.6214629510290291e-06, - "loss": 0.8745, + "learning_rate": 1.6416492589792076e-06, + "loss": 0.8108, "step": 28957 }, { - "epoch": 0.8217366628830874, + "epoch": 0.820595653036357, "grad_norm": 0.0, - "learning_rate": 1.6209612664475639e-06, - "loss": 0.807, + "learning_rate": 1.6411454456565234e-06, + "loss": 0.7131, "step": 28958 }, { - "epoch": 0.8217650397275823, + "epoch": 0.8206239904786194, "grad_norm": 0.0, - "learning_rate": 1.6204596526442195e-06, - "loss": 0.8484, + "learning_rate": 1.6406417027435728e-06, + "loss": 0.8456, "step": 28959 }, { - "epoch": 0.8217934165720772, + "epoch": 0.8206523279208818, "grad_norm": 0.0, - "learning_rate": 1.6199581096232264e-06, - "loss": 0.7628, + "learning_rate": 1.6401380302446046e-06, + "loss": 0.7271, "step": 28960 }, { - "epoch": 0.821821793416572, + "epoch": 0.8206806653631443, "grad_norm": 0.0, - "learning_rate": 1.6194566373888244e-06, - "loss": 0.7875, + "learning_rate": 1.6396344281638544e-06, + "loss": 0.795, "step": 28961 }, { - "epoch": 0.821850170261067, + "epoch": 0.8207090028054068, "grad_norm": 0.0, - "learning_rate": 1.6189552359452509e-06, - "loss": 0.8295, + "learning_rate": 1.6391308965055685e-06, + "loss": 0.745, "step": 28962 }, { - "epoch": 0.8218785471055619, + "epoch": 0.8207373402476692, "grad_norm": 0.0, - "learning_rate": 1.6184539052967375e-06, - "loss": 0.7843, + "learning_rate": 1.6386274352739906e-06, + "loss": 0.8578, "step": 28963 }, { - "epoch": 0.8219069239500567, + "epoch": 0.8207656776899317, "grad_norm": 0.0, - "learning_rate": 1.6179526454475202e-06, - "loss": 0.8379, + "learning_rate": 1.638124044473356e-06, + "loss": 0.7935, "step": 28964 }, { - "epoch": 0.8219353007945517, + "epoch": 0.8207940151321942, "grad_norm": 0.0, - "learning_rate": 1.6174514564018374e-06, - "loss": 0.8215, + "learning_rate": 1.6376207241079078e-06, + "loss": 0.7175, "step": 28965 }, { - "epoch": 0.8219636776390465, + "epoch": 0.8208223525744567, "grad_norm": 0.0, - "learning_rate": 1.616950338163915e-06, - "loss": 0.8071, + "learning_rate": 1.6371174741818851e-06, + "loss": 0.7941, "step": 28966 }, { - "epoch": 0.8219920544835414, + "epoch": 0.8208506900167191, "grad_norm": 0.0, - "learning_rate": 1.6164492907379903e-06, - "loss": 0.9129, + "learning_rate": 1.6366142946995278e-06, + "loss": 0.8066, "step": 28967 }, { - "epoch": 0.8220204313280364, + "epoch": 0.8208790274589816, "grad_norm": 0.0, - "learning_rate": 1.6159483141282984e-06, - "loss": 0.845, + "learning_rate": 1.6361111856650768e-06, + "loss": 0.931, "step": 28968 }, { - "epoch": 0.8220488081725312, + "epoch": 0.820907364901244, "grad_norm": 0.0, - "learning_rate": 1.6154474083390648e-06, - "loss": 0.8012, + "learning_rate": 1.6356081470827635e-06, + "loss": 0.8437, "step": 28969 }, { - "epoch": 0.8220771850170261, + "epoch": 0.8209357023435064, "grad_norm": 0.0, - "learning_rate": 1.614946573374524e-06, - "loss": 0.8487, + "learning_rate": 1.6351051789568296e-06, + "loss": 0.8608, "step": 28970 }, { - "epoch": 0.822105561861521, + "epoch": 0.8209640397857689, "grad_norm": 0.0, - "learning_rate": 1.6144458092389092e-06, - "loss": 0.7899, + "learning_rate": 1.6346022812915141e-06, + "loss": 0.8173, "step": 28971 }, { - "epoch": 0.8221339387060159, + "epoch": 0.8209923772280314, "grad_norm": 0.0, - "learning_rate": 1.6139451159364449e-06, - "loss": 0.7518, + "learning_rate": 1.634099454091046e-06, + "loss": 0.7907, "step": 28972 }, { - "epoch": 0.8221623155505108, + "epoch": 0.8210207146702938, "grad_norm": 0.0, - "learning_rate": 1.6134444934713611e-06, - "loss": 0.7118, + "learning_rate": 1.6335966973596662e-06, + "loss": 0.8506, "step": 28973 }, { - "epoch": 0.8221906923950056, + "epoch": 0.8210490521125563, "grad_norm": 0.0, - "learning_rate": 1.6129439418478932e-06, - "loss": 0.8754, + "learning_rate": 1.6330940111016103e-06, + "loss": 0.7816, "step": 28974 }, { - "epoch": 0.8222190692395006, + "epoch": 0.8210773895548188, "grad_norm": 0.0, - "learning_rate": 1.6124434610702622e-06, - "loss": 0.8995, + "learning_rate": 1.632591395321107e-06, + "loss": 0.9137, "step": 28975 }, { - "epoch": 0.8222474460839955, + "epoch": 0.8211057269970813, "grad_norm": 0.0, - "learning_rate": 1.6119430511427015e-06, - "loss": 0.7523, + "learning_rate": 1.6320888500223975e-06, + "loss": 0.8108, "step": 28976 }, { - "epoch": 0.8222758229284903, + "epoch": 0.8211340644393437, "grad_norm": 0.0, - "learning_rate": 1.611442712069431e-06, - "loss": 0.7529, + "learning_rate": 1.6315863752097071e-06, + "loss": 0.7457, "step": 28977 }, { - "epoch": 0.8223041997729852, + "epoch": 0.8211624018816062, "grad_norm": 0.0, - "learning_rate": 1.6109424438546827e-06, - "loss": 0.7986, + "learning_rate": 1.6310839708872738e-06, + "loss": 0.8892, "step": 28978 }, { - "epoch": 0.8223325766174802, + "epoch": 0.8211907393238687, "grad_norm": 0.0, - "learning_rate": 1.6104422465026825e-06, - "loss": 0.8482, + "learning_rate": 1.6305816370593263e-06, + "loss": 0.8286, "step": 28979 }, { - "epoch": 0.822360953461975, + "epoch": 0.821219076766131, "grad_norm": 0.0, - "learning_rate": 1.6099421200176513e-06, - "loss": 0.8081, + "learning_rate": 1.630079373730099e-06, + "loss": 0.8281, "step": 28980 }, { - "epoch": 0.8223893303064699, + "epoch": 0.8212474142083935, "grad_norm": 0.0, - "learning_rate": 1.6094420644038157e-06, - "loss": 0.7638, + "learning_rate": 1.629577180903823e-06, + "loss": 0.835, "step": 28981 }, { - "epoch": 0.8224177071509648, + "epoch": 0.821275751650656, "grad_norm": 0.0, - "learning_rate": 1.608942079665403e-06, - "loss": 0.7189, + "learning_rate": 1.6290750585847237e-06, + "loss": 0.827, "step": 28982 }, { - "epoch": 0.8224460839954597, + "epoch": 0.8213040890929185, "grad_norm": 0.0, - "learning_rate": 1.608442165806632e-06, - "loss": 0.8063, + "learning_rate": 1.6285730067770344e-06, + "loss": 0.9044, "step": 28983 }, { - "epoch": 0.8224744608399546, + "epoch": 0.8213324265351809, "grad_norm": 0.0, - "learning_rate": 1.6079423228317259e-06, - "loss": 0.7153, + "learning_rate": 1.6280710254849862e-06, + "loss": 0.8045, "step": 28984 }, { - "epoch": 0.8225028376844495, + "epoch": 0.8213607639774434, "grad_norm": 0.0, - "learning_rate": 1.6074425507449122e-06, - "loss": 0.8552, + "learning_rate": 1.6275691147128014e-06, + "loss": 0.7732, "step": 28985 }, { - "epoch": 0.8225312145289444, + "epoch": 0.8213891014197059, "grad_norm": 0.0, - "learning_rate": 1.6069428495504047e-06, - "loss": 0.9139, + "learning_rate": 1.627067274464711e-06, + "loss": 0.7358, "step": 28986 }, { - "epoch": 0.8225595913734393, + "epoch": 0.8214174388619683, "grad_norm": 0.0, - "learning_rate": 1.6064432192524293e-06, - "loss": 0.7852, + "learning_rate": 1.6265655047449448e-06, + "loss": 0.7457, "step": 28987 }, { - "epoch": 0.8225879682179341, + "epoch": 0.8214457763042308, "grad_norm": 0.0, - "learning_rate": 1.6059436598552068e-06, - "loss": 0.8166, + "learning_rate": 1.6260638055577238e-06, + "loss": 0.8849, "step": 28988 }, { - "epoch": 0.8226163450624291, + "epoch": 0.8214741137464933, "grad_norm": 0.0, - "learning_rate": 1.605444171362952e-06, - "loss": 0.8536, + "learning_rate": 1.6255621769072805e-06, + "loss": 0.8881, "step": 28989 }, { - "epoch": 0.822644721906924, + "epoch": 0.8215024511887558, "grad_norm": 0.0, - "learning_rate": 1.604944753779889e-06, - "loss": 0.838, + "learning_rate": 1.6250606187978334e-06, + "loss": 0.7931, "step": 28990 }, { - "epoch": 0.8226730987514188, + "epoch": 0.8215307886310181, "grad_norm": 0.0, - "learning_rate": 1.6044454071102379e-06, - "loss": 0.8081, + "learning_rate": 1.6245591312336106e-06, + "loss": 0.8167, "step": 28991 }, { - "epoch": 0.8227014755959138, + "epoch": 0.8215591260732806, "grad_norm": 0.0, - "learning_rate": 1.6039461313582106e-06, - "loss": 0.8476, + "learning_rate": 1.6240577142188362e-06, + "loss": 0.8539, "step": 28992 }, { - "epoch": 0.8227298524404086, + "epoch": 0.8215874635155431, "grad_norm": 0.0, - "learning_rate": 1.6034469265280295e-06, - "loss": 0.8508, + "learning_rate": 1.623556367757735e-06, + "loss": 0.7707, "step": 28993 }, { - "epoch": 0.8227582292849035, + "epoch": 0.8216158009578055, "grad_norm": 0.0, - "learning_rate": 1.6029477926239102e-06, - "loss": 0.705, + "learning_rate": 1.6230550918545286e-06, + "loss": 0.8289, "step": 28994 }, { - "epoch": 0.8227866061293984, + "epoch": 0.821644138400068, "grad_norm": 0.0, - "learning_rate": 1.6024487296500668e-06, - "loss": 0.7782, + "learning_rate": 1.6225538865134427e-06, + "loss": 0.792, "step": 28995 }, { - "epoch": 0.8228149829738933, + "epoch": 0.8216724758423305, "grad_norm": 0.0, - "learning_rate": 1.6019497376107162e-06, - "loss": 0.8443, + "learning_rate": 1.6220527517386931e-06, + "loss": 0.8506, "step": 28996 }, { - "epoch": 0.8228433598183882, + "epoch": 0.8217008132845929, "grad_norm": 0.0, - "learning_rate": 1.6014508165100772e-06, - "loss": 0.8294, + "learning_rate": 1.6215516875345073e-06, + "loss": 0.8215, "step": 28997 }, { - "epoch": 0.822871736662883, + "epoch": 0.8217291507268554, "grad_norm": 0.0, - "learning_rate": 1.6009519663523577e-06, - "loss": 0.8308, + "learning_rate": 1.6210506939050996e-06, + "loss": 0.7953, "step": 28998 }, { - "epoch": 0.822900113507378, + "epoch": 0.8217574881691179, "grad_norm": 0.0, - "learning_rate": 1.600453187141776e-06, - "loss": 0.849, + "learning_rate": 1.6205497708546936e-06, + "loss": 0.7771, "step": 28999 }, { - "epoch": 0.8229284903518729, + "epoch": 0.8217858256113804, "grad_norm": 0.0, - "learning_rate": 1.5999544788825427e-06, - "loss": 0.82, + "learning_rate": 1.6200489183875112e-06, + "loss": 0.7816, "step": 29000 }, { - "epoch": 0.8229568671963677, + "epoch": 0.8218141630536427, "grad_norm": 0.0, - "learning_rate": 1.599455841578871e-06, - "loss": 0.8669, + "learning_rate": 1.6195481365077658e-06, + "loss": 0.8289, "step": 29001 }, { - "epoch": 0.8229852440408627, + "epoch": 0.8218425004959052, "grad_norm": 0.0, - "learning_rate": 1.5989572752349758e-06, - "loss": 0.8872, + "learning_rate": 1.6190474252196819e-06, + "loss": 0.8992, "step": 29002 }, { - "epoch": 0.8230136208853576, + "epoch": 0.8218708379381677, "grad_norm": 0.0, - "learning_rate": 1.5984587798550633e-06, - "loss": 0.7955, + "learning_rate": 1.6185467845274704e-06, + "loss": 0.8206, "step": 29003 }, { - "epoch": 0.8230419977298524, + "epoch": 0.8218991753804301, "grad_norm": 0.0, - "learning_rate": 1.5979603554433454e-06, - "loss": 0.9014, + "learning_rate": 1.6180462144353526e-06, + "loss": 0.7723, "step": 29004 }, { - "epoch": 0.8230703745743473, + "epoch": 0.8219275128226926, "grad_norm": 0.0, - "learning_rate": 1.5974620020040388e-06, - "loss": 0.8502, + "learning_rate": 1.6175457149475427e-06, + "loss": 0.8106, "step": 29005 }, { - "epoch": 0.8230987514188423, + "epoch": 0.8219558502649551, "grad_norm": 0.0, - "learning_rate": 1.5969637195413457e-06, - "loss": 0.803, + "learning_rate": 1.6170452860682595e-06, + "loss": 0.8176, "step": 29006 }, { - "epoch": 0.8231271282633371, + "epoch": 0.8219841877072176, "grad_norm": 0.0, - "learning_rate": 1.596465508059477e-06, - "loss": 0.888, + "learning_rate": 1.6165449278017153e-06, + "loss": 0.7368, "step": 29007 }, { - "epoch": 0.823155505107832, + "epoch": 0.82201252514948, "grad_norm": 0.0, - "learning_rate": 1.5959673675626452e-06, - "loss": 0.968, + "learning_rate": 1.61604464015213e-06, + "loss": 0.7601, "step": 29008 }, { - "epoch": 0.823183881952327, + "epoch": 0.8220408625917425, "grad_norm": 0.0, - "learning_rate": 1.5954692980550534e-06, - "loss": 0.8132, + "learning_rate": 1.6155444231237106e-06, + "loss": 0.8006, "step": 29009 }, { - "epoch": 0.8232122587968218, + "epoch": 0.822069200034005, "grad_norm": 0.0, - "learning_rate": 1.5949712995409095e-06, - "loss": 0.8026, + "learning_rate": 1.6150442767206775e-06, + "loss": 0.8197, "step": 29010 }, { - "epoch": 0.8232406356413167, + "epoch": 0.8220975374762673, "grad_norm": 0.0, - "learning_rate": 1.5944733720244222e-06, - "loss": 0.7498, + "learning_rate": 1.6145442009472367e-06, + "loss": 0.8316, "step": 29011 }, { - "epoch": 0.8232690124858115, + "epoch": 0.8221258749185298, "grad_norm": 0.0, - "learning_rate": 1.5939755155097947e-06, - "loss": 0.8002, + "learning_rate": 1.6140441958076047e-06, + "loss": 0.825, "step": 29012 }, { - "epoch": 0.8232973893303065, + "epoch": 0.8221542123607923, "grad_norm": 0.0, - "learning_rate": 1.5934777300012339e-06, - "loss": 0.8074, + "learning_rate": 1.6135442613059936e-06, + "loss": 0.8958, "step": 29013 }, { - "epoch": 0.8233257661748014, + "epoch": 0.8221825498030548, "grad_norm": 0.0, - "learning_rate": 1.5929800155029473e-06, - "loss": 0.7702, + "learning_rate": 1.6130443974466126e-06, + "loss": 0.711, "step": 29014 }, { - "epoch": 0.8233541430192962, + "epoch": 0.8222108872453172, "grad_norm": 0.0, - "learning_rate": 1.592482372019134e-06, - "loss": 0.7656, + "learning_rate": 1.6125446042336723e-06, + "loss": 0.8173, "step": 29015 }, { - "epoch": 0.8233825198637912, + "epoch": 0.8222392246875797, "grad_norm": 0.0, - "learning_rate": 1.5919847995539993e-06, - "loss": 0.8131, + "learning_rate": 1.6120448816713863e-06, + "loss": 0.8007, "step": 29016 }, { - "epoch": 0.823410896708286, + "epoch": 0.8222675621298422, "grad_norm": 0.0, - "learning_rate": 1.5914872981117501e-06, - "loss": 0.8239, + "learning_rate": 1.6115452297639579e-06, + "loss": 0.8254, "step": 29017 }, { - "epoch": 0.8234392735527809, + "epoch": 0.8222958995721046, "grad_norm": 0.0, - "learning_rate": 1.5909898676965828e-06, - "loss": 0.8653, + "learning_rate": 1.611045648515599e-06, + "loss": 0.7705, "step": 29018 }, { - "epoch": 0.8234676503972759, + "epoch": 0.8223242370143671, "grad_norm": 0.0, - "learning_rate": 1.5904925083127021e-06, - "loss": 0.9415, + "learning_rate": 1.6105461379305187e-06, + "loss": 0.8101, "step": 29019 }, { - "epoch": 0.8234960272417707, + "epoch": 0.8223525744566296, "grad_norm": 0.0, - "learning_rate": 1.5899952199643099e-06, - "loss": 0.7639, + "learning_rate": 1.6100466980129226e-06, + "loss": 0.7583, "step": 29020 }, { - "epoch": 0.8235244040862656, + "epoch": 0.822380911898892, "grad_norm": 0.0, - "learning_rate": 1.5894980026556062e-06, - "loss": 0.846, + "learning_rate": 1.6095473287670215e-06, + "loss": 0.8704, "step": 29021 }, { - "epoch": 0.8235527809307605, + "epoch": 0.8224092493411544, "grad_norm": 0.0, - "learning_rate": 1.589000856390792e-06, - "loss": 0.6944, + "learning_rate": 1.6090480301970168e-06, + "loss": 0.8754, "step": 29022 }, { - "epoch": 0.8235811577752554, + "epoch": 0.8224375867834169, "grad_norm": 0.0, - "learning_rate": 1.5885037811740644e-06, - "loss": 0.8826, + "learning_rate": 1.608548802307116e-06, + "loss": 0.7704, "step": 29023 }, { - "epoch": 0.8236095346197503, + "epoch": 0.8224659242256794, "grad_norm": 0.0, - "learning_rate": 1.588006777009623e-06, - "loss": 0.7427, + "learning_rate": 1.6080496451015282e-06, + "loss": 0.8331, "step": 29024 }, { - "epoch": 0.8236379114642451, + "epoch": 0.8224942616679418, "grad_norm": 0.0, - "learning_rate": 1.587509843901669e-06, - "loss": 0.8641, + "learning_rate": 1.607550558584451e-06, + "loss": 0.7407, "step": 29025 }, { - "epoch": 0.8236662883087401, + "epoch": 0.8225225991102043, "grad_norm": 0.0, - "learning_rate": 1.5870129818543956e-06, - "loss": 0.798, + "learning_rate": 1.6070515427600941e-06, + "loss": 0.8636, "step": 29026 }, { - "epoch": 0.823694665153235, + "epoch": 0.8225509365524668, "grad_norm": 0.0, - "learning_rate": 1.5865161908720006e-06, - "loss": 0.9046, + "learning_rate": 1.6065525976326568e-06, + "loss": 0.9063, "step": 29027 }, { - "epoch": 0.8237230419977298, + "epoch": 0.8225792739947292, "grad_norm": 0.0, - "learning_rate": 1.586019470958685e-06, - "loss": 0.729, + "learning_rate": 1.6060537232063445e-06, + "loss": 0.7334, "step": 29028 }, { - "epoch": 0.8237514188422247, + "epoch": 0.8226076114369917, "grad_norm": 0.0, - "learning_rate": 1.5855228221186391e-06, - "loss": 0.7949, + "learning_rate": 1.6055549194853604e-06, + "loss": 0.993, "step": 29029 }, { - "epoch": 0.8237797956867197, + "epoch": 0.8226359488792542, "grad_norm": 0.0, - "learning_rate": 1.5850262443560594e-06, - "loss": 0.7293, + "learning_rate": 1.6050561864739012e-06, + "loss": 0.8695, "step": 29030 }, { - "epoch": 0.8238081725312145, + "epoch": 0.8226642863215167, "grad_norm": 0.0, - "learning_rate": 1.5845297376751433e-06, - "loss": 0.9186, + "learning_rate": 1.6045575241761724e-06, + "loss": 0.7433, "step": 29031 }, { - "epoch": 0.8238365493757094, + "epoch": 0.822692623763779, "grad_norm": 0.0, - "learning_rate": 1.5840333020800813e-06, - "loss": 0.8, + "learning_rate": 1.604058932596373e-06, + "loss": 0.7265, "step": 29032 }, { - "epoch": 0.8238649262202044, + "epoch": 0.8227209612060415, "grad_norm": 0.0, - "learning_rate": 1.5835369375750676e-06, - "loss": 0.8284, + "learning_rate": 1.6035604117387028e-06, + "loss": 0.8355, "step": 29033 }, { - "epoch": 0.8238933030646992, + "epoch": 0.822749298648304, "grad_norm": 0.0, - "learning_rate": 1.5830406441642987e-06, - "loss": 0.8075, + "learning_rate": 1.6030619616073628e-06, + "loss": 0.8754, "step": 29034 }, { - "epoch": 0.8239216799091941, + "epoch": 0.8227776360905664, "grad_norm": 0.0, - "learning_rate": 1.5825444218519593e-06, - "loss": 0.7989, + "learning_rate": 1.602563582206549e-06, + "loss": 0.7799, "step": 29035 }, { - "epoch": 0.823950056753689, + "epoch": 0.8228059735328289, "grad_norm": 0.0, - "learning_rate": 1.582048270642249e-06, - "loss": 0.7388, + "learning_rate": 1.6020652735404595e-06, + "loss": 0.9523, "step": 29036 }, { - "epoch": 0.8239784335981839, + "epoch": 0.8228343109750914, "grad_norm": 0.0, - "learning_rate": 1.5815521905393538e-06, - "loss": 0.7978, + "learning_rate": 1.601567035613295e-06, + "loss": 0.7898, "step": 29037 }, { - "epoch": 0.8240068104426788, + "epoch": 0.8228626484173539, "grad_norm": 0.0, - "learning_rate": 1.5810561815474657e-06, - "loss": 0.8815, + "learning_rate": 1.6010688684292485e-06, + "loss": 0.7555, "step": 29038 }, { - "epoch": 0.8240351872871736, + "epoch": 0.8228909858596163, "grad_norm": 0.0, - "learning_rate": 1.580560243670778e-06, - "loss": 0.8065, + "learning_rate": 1.600570771992519e-06, + "loss": 0.8185, "step": 29039 }, { - "epoch": 0.8240635641316686, + "epoch": 0.8229193233018788, "grad_norm": 0.0, - "learning_rate": 1.5800643769134728e-06, - "loss": 0.9437, + "learning_rate": 1.6000727463072995e-06, + "loss": 0.8656, "step": 29040 }, { - "epoch": 0.8240919409761635, + "epoch": 0.8229476607441413, "grad_norm": 0.0, - "learning_rate": 1.5795685812797423e-06, - "loss": 0.8242, + "learning_rate": 1.5995747913777858e-06, + "loss": 0.816, "step": 29041 }, { - "epoch": 0.8241203178206583, + "epoch": 0.8229759981864037, "grad_norm": 0.0, - "learning_rate": 1.5790728567737767e-06, - "loss": 0.8117, + "learning_rate": 1.5990769072081758e-06, + "loss": 0.7194, "step": 29042 }, { - "epoch": 0.8241486946651533, + "epoch": 0.8230043356286662, "grad_norm": 0.0, - "learning_rate": 1.5785772033997593e-06, - "loss": 0.7218, + "learning_rate": 1.5985790938026591e-06, + "loss": 0.769, "step": 29043 }, { - "epoch": 0.8241770715096481, + "epoch": 0.8230326730709286, "grad_norm": 0.0, - "learning_rate": 1.5780816211618787e-06, - "loss": 0.8718, + "learning_rate": 1.5980813511654291e-06, + "loss": 0.7992, "step": 29044 }, { - "epoch": 0.824205448354143, + "epoch": 0.823061010513191, "grad_norm": 0.0, - "learning_rate": 1.577586110064323e-06, - "loss": 0.8319, + "learning_rate": 1.5975836793006805e-06, + "loss": 0.8527, "step": 29045 }, { - "epoch": 0.8242338251986379, + "epoch": 0.8230893479554535, "grad_norm": 0.0, - "learning_rate": 1.5770906701112753e-06, - "loss": 0.8006, + "learning_rate": 1.5970860782126053e-06, + "loss": 0.8666, "step": 29046 }, { - "epoch": 0.8242622020431328, + "epoch": 0.823117685397716, "grad_norm": 0.0, - "learning_rate": 1.5765953013069201e-06, - "loss": 0.8919, + "learning_rate": 1.5965885479053956e-06, + "loss": 0.7603, "step": 29047 }, { - "epoch": 0.8242905788876277, + "epoch": 0.8231460228399785, "grad_norm": 0.0, - "learning_rate": 1.5761000036554453e-06, - "loss": 0.8355, + "learning_rate": 1.5960910883832391e-06, + "loss": 0.8501, "step": 29048 }, { - "epoch": 0.8243189557321225, + "epoch": 0.8231743602822409, "grad_norm": 0.0, - "learning_rate": 1.5756047771610306e-06, - "loss": 0.8095, + "learning_rate": 1.5955936996503285e-06, + "loss": 0.8583, "step": 29049 }, { - "epoch": 0.8243473325766175, + "epoch": 0.8232026977245034, "grad_norm": 0.0, - "learning_rate": 1.5751096218278606e-06, - "loss": 0.8007, + "learning_rate": 1.5950963817108545e-06, + "loss": 0.7612, "step": 29050 }, { - "epoch": 0.8243757094211124, + "epoch": 0.8232310351667659, "grad_norm": 0.0, - "learning_rate": 1.5746145376601184e-06, - "loss": 0.8337, + "learning_rate": 1.5945991345690037e-06, + "loss": 0.7446, "step": 29051 }, { - "epoch": 0.8244040862656072, + "epoch": 0.8232593726090283, "grad_norm": 0.0, - "learning_rate": 1.5741195246619867e-06, - "loss": 0.8225, + "learning_rate": 1.594101958228965e-06, + "loss": 0.7736, "step": 29052 }, { - "epoch": 0.8244324631101022, + "epoch": 0.8232877100512908, "grad_norm": 0.0, - "learning_rate": 1.5736245828376483e-06, - "loss": 0.8087, + "learning_rate": 1.5936048526949288e-06, + "loss": 0.8291, "step": 29053 }, { - "epoch": 0.8244608399545971, + "epoch": 0.8233160474935532, "grad_norm": 0.0, - "learning_rate": 1.5731297121912791e-06, - "loss": 0.832, + "learning_rate": 1.5931078179710791e-06, + "loss": 0.802, "step": 29054 }, { - "epoch": 0.8244892167990919, + "epoch": 0.8233443849358157, "grad_norm": 0.0, - "learning_rate": 1.5726349127270624e-06, - "loss": 0.896, + "learning_rate": 1.592610854061606e-06, + "loss": 0.8403, "step": 29055 }, { - "epoch": 0.8245175936435868, + "epoch": 0.8233727223780781, "grad_norm": 0.0, - "learning_rate": 1.572140184449179e-06, - "loss": 0.7661, + "learning_rate": 1.5921139609706915e-06, + "loss": 0.8961, "step": 29056 }, { - "epoch": 0.8245459704880818, + "epoch": 0.8234010598203406, "grad_norm": 0.0, - "learning_rate": 1.5716455273618048e-06, - "loss": 0.7031, + "learning_rate": 1.5916171387025237e-06, + "loss": 0.7959, "step": 29057 }, { - "epoch": 0.8245743473325766, + "epoch": 0.8234293972626031, "grad_norm": 0.0, - "learning_rate": 1.5711509414691196e-06, - "loss": 0.7681, + "learning_rate": 1.5911203872612858e-06, + "loss": 0.7842, "step": 29058 }, { - "epoch": 0.8246027241770715, + "epoch": 0.8234577347048655, "grad_norm": 0.0, - "learning_rate": 1.5706564267753032e-06, - "loss": 0.8166, + "learning_rate": 1.5906237066511643e-06, + "loss": 0.8989, "step": 29059 }, { - "epoch": 0.8246311010215664, + "epoch": 0.823486072147128, "grad_norm": 0.0, - "learning_rate": 1.5701619832845284e-06, - "loss": 0.8474, + "learning_rate": 1.590127096876345e-06, + "loss": 0.7945, "step": 29060 }, { - "epoch": 0.8246594778660613, + "epoch": 0.8235144095893905, "grad_norm": 0.0, - "learning_rate": 1.5696676110009746e-06, - "loss": 0.8615, + "learning_rate": 1.5896305579410042e-06, + "loss": 0.8693, "step": 29061 }, { - "epoch": 0.8246878547105562, + "epoch": 0.823542747031653, "grad_norm": 0.0, - "learning_rate": 1.5691733099288197e-06, - "loss": 0.8054, + "learning_rate": 1.5891340898493302e-06, + "loss": 0.8185, "step": 29062 }, { - "epoch": 0.824716231555051, + "epoch": 0.8235710844739154, "grad_norm": 0.0, - "learning_rate": 1.5686790800722352e-06, - "loss": 0.8217, + "learning_rate": 1.5886376926055037e-06, + "loss": 0.9116, "step": 29063 }, { - "epoch": 0.824744608399546, + "epoch": 0.8235994219161779, "grad_norm": 0.0, - "learning_rate": 1.5681849214353973e-06, - "loss": 0.8547, + "learning_rate": 1.5881413662137047e-06, + "loss": 0.8405, "step": 29064 }, { - "epoch": 0.8247729852440409, + "epoch": 0.8236277593584403, "grad_norm": 0.0, - "learning_rate": 1.5676908340224794e-06, - "loss": 0.9324, + "learning_rate": 1.5876451106781132e-06, + "loss": 0.7672, "step": 29065 }, { - "epoch": 0.8248013620885357, + "epoch": 0.8236560968007027, "grad_norm": 0.0, - "learning_rate": 1.5671968178376574e-06, - "loss": 0.7606, + "learning_rate": 1.5871489260029138e-06, + "loss": 0.9601, "step": 29066 }, { - "epoch": 0.8248297389330307, + "epoch": 0.8236844342429652, "grad_norm": 0.0, - "learning_rate": 1.5667028728851041e-06, - "loss": 0.7457, + "learning_rate": 1.5866528121922808e-06, + "loss": 0.8777, "step": 29067 }, { - "epoch": 0.8248581157775255, + "epoch": 0.8237127716852277, "grad_norm": 0.0, - "learning_rate": 1.5662089991689889e-06, - "loss": 0.8018, + "learning_rate": 1.5861567692503977e-06, + "loss": 0.9039, "step": 29068 }, { - "epoch": 0.8248864926220204, + "epoch": 0.8237411091274901, "grad_norm": 0.0, - "learning_rate": 1.5657151966934857e-06, - "loss": 0.8416, + "learning_rate": 1.5856607971814375e-06, + "loss": 0.7304, "step": 29069 }, { - "epoch": 0.8249148694665154, + "epoch": 0.8237694465697526, "grad_norm": 0.0, - "learning_rate": 1.5652214654627672e-06, - "loss": 0.7917, + "learning_rate": 1.5851648959895815e-06, + "loss": 0.8108, "step": 29070 }, { - "epoch": 0.8249432463110102, + "epoch": 0.8237977840120151, "grad_norm": 0.0, - "learning_rate": 1.5647278054810012e-06, - "loss": 0.8081, + "learning_rate": 1.5846690656790066e-06, + "loss": 0.8274, "step": 29071 }, { - "epoch": 0.8249716231555051, + "epoch": 0.8238261214542776, "grad_norm": 0.0, - "learning_rate": 1.5642342167523571e-06, - "loss": 0.899, + "learning_rate": 1.5841733062538877e-06, + "loss": 0.8845, "step": 29072 }, { - "epoch": 0.825, + "epoch": 0.82385445889654, "grad_norm": 0.0, - "learning_rate": 1.563740699281009e-06, - "loss": 0.7506, + "learning_rate": 1.5836776177184044e-06, + "loss": 0.8495, "step": 29073 }, { - "epoch": 0.8250283768444949, + "epoch": 0.8238827963388025, "grad_norm": 0.0, - "learning_rate": 1.5632472530711195e-06, - "loss": 0.797, + "learning_rate": 1.5831820000767307e-06, + "loss": 0.7887, "step": 29074 }, { - "epoch": 0.8250567536889898, + "epoch": 0.823911133781065, "grad_norm": 0.0, - "learning_rate": 1.5627538781268591e-06, - "loss": 0.7981, + "learning_rate": 1.5826864533330387e-06, + "loss": 0.7769, "step": 29075 }, { - "epoch": 0.8250851305334846, + "epoch": 0.8239394712233273, "grad_norm": 0.0, - "learning_rate": 1.5622605744524e-06, - "loss": 0.7732, + "learning_rate": 1.5821909774915068e-06, + "loss": 0.8536, "step": 29076 }, { - "epoch": 0.8251135073779796, + "epoch": 0.8239678086655898, "grad_norm": 0.0, - "learning_rate": 1.5617673420519019e-06, - "loss": 0.8301, + "learning_rate": 1.5816955725563031e-06, + "loss": 0.7775, "step": 29077 }, { - "epoch": 0.8251418842224745, + "epoch": 0.8239961461078523, "grad_norm": 0.0, - "learning_rate": 1.5612741809295339e-06, - "loss": 0.8193, + "learning_rate": 1.5812002385316038e-06, + "loss": 0.7687, "step": 29078 }, { - "epoch": 0.8251702610669693, + "epoch": 0.8240244835501148, "grad_norm": 0.0, - "learning_rate": 1.560781091089465e-06, - "loss": 0.8282, + "learning_rate": 1.580704975421584e-06, + "loss": 0.8343, "step": 29079 }, { - "epoch": 0.8251986379114642, + "epoch": 0.8240528209923772, "grad_norm": 0.0, - "learning_rate": 1.560288072535855e-06, - "loss": 0.7621, + "learning_rate": 1.580209783230411e-06, + "loss": 0.8076, "step": 29080 }, { - "epoch": 0.8252270147559592, + "epoch": 0.8240811584346397, "grad_norm": 0.0, - "learning_rate": 1.5597951252728694e-06, - "loss": 0.8084, + "learning_rate": 1.5797146619622561e-06, + "loss": 0.8062, "step": 29081 }, { - "epoch": 0.825255391600454, + "epoch": 0.8241094958769022, "grad_norm": 0.0, - "learning_rate": 1.5593022493046782e-06, - "loss": 0.6946, + "learning_rate": 1.5792196116212944e-06, + "loss": 0.854, "step": 29082 }, { - "epoch": 0.8252837684449489, + "epoch": 0.8241378333191646, "grad_norm": 0.0, - "learning_rate": 1.5588094446354373e-06, - "loss": 0.7908, + "learning_rate": 1.5787246322116911e-06, + "loss": 0.8744, "step": 29083 }, { - "epoch": 0.8253121452894439, + "epoch": 0.8241661707614271, "grad_norm": 0.0, - "learning_rate": 1.5583167112693153e-06, - "loss": 0.8534, + "learning_rate": 1.5782297237376165e-06, + "loss": 0.8188, "step": 29084 }, { - "epoch": 0.8253405221339387, + "epoch": 0.8241945082036896, "grad_norm": 0.0, - "learning_rate": 1.5578240492104701e-06, - "loss": 0.7928, + "learning_rate": 1.5777348862032405e-06, + "loss": 0.7489, "step": 29085 }, { - "epoch": 0.8253688989784336, + "epoch": 0.824222845645952, "grad_norm": 0.0, - "learning_rate": 1.5573314584630639e-06, - "loss": 0.7349, + "learning_rate": 1.5772401196127318e-06, + "loss": 0.6846, "step": 29086 }, { - "epoch": 0.8253972758229285, + "epoch": 0.8242511830882144, "grad_norm": 0.0, - "learning_rate": 1.55683893903126e-06, - "loss": 0.8172, + "learning_rate": 1.5767454239702585e-06, + "loss": 0.8402, "step": 29087 }, { - "epoch": 0.8254256526674234, + "epoch": 0.8242795205304769, "grad_norm": 0.0, - "learning_rate": 1.5563464909192162e-06, - "loss": 0.8467, + "learning_rate": 1.5762507992799846e-06, + "loss": 0.7904, "step": 29088 }, { - "epoch": 0.8254540295119183, + "epoch": 0.8243078579727394, "grad_norm": 0.0, - "learning_rate": 1.5558541141310923e-06, - "loss": 0.8457, + "learning_rate": 1.5757562455460807e-06, + "loss": 0.7488, "step": 29089 }, { - "epoch": 0.8254824063564131, + "epoch": 0.8243361954150018, "grad_norm": 0.0, - "learning_rate": 1.555361808671051e-06, - "loss": 0.7258, + "learning_rate": 1.5752617627727085e-06, + "loss": 0.8914, "step": 29090 }, { - "epoch": 0.8255107832009081, + "epoch": 0.8243645328572643, "grad_norm": 0.0, - "learning_rate": 1.554869574543245e-06, - "loss": 0.8103, + "learning_rate": 1.5747673509640337e-06, + "loss": 0.8985, "step": 29091 }, { - "epoch": 0.825539160045403, + "epoch": 0.8243928702995268, "grad_norm": 0.0, - "learning_rate": 1.5543774117518362e-06, - "loss": 0.7628, + "learning_rate": 1.5742730101242255e-06, + "loss": 0.7765, "step": 29092 }, { - "epoch": 0.8255675368898978, + "epoch": 0.8244212077417892, "grad_norm": 0.0, - "learning_rate": 1.553885320300983e-06, - "loss": 0.7133, + "learning_rate": 1.573778740257441e-06, + "loss": 0.7649, "step": 29093 }, { - "epoch": 0.8255959137343928, + "epoch": 0.8244495451840517, "grad_norm": 0.0, - "learning_rate": 1.5533933001948376e-06, - "loss": 0.8996, + "learning_rate": 1.5732845413678477e-06, + "loss": 0.741, "step": 29094 }, { - "epoch": 0.8256242905788876, + "epoch": 0.8244778826263142, "grad_norm": 0.0, - "learning_rate": 1.5529013514375591e-06, - "loss": 0.734, + "learning_rate": 1.5727904134596084e-06, + "loss": 0.8535, "step": 29095 }, { - "epoch": 0.8256526674233825, + "epoch": 0.8245062200685767, "grad_norm": 0.0, - "learning_rate": 1.5524094740333028e-06, - "loss": 0.7472, + "learning_rate": 1.5722963565368864e-06, + "loss": 0.8923, "step": 29096 }, { - "epoch": 0.8256810442678774, + "epoch": 0.824534557510839, "grad_norm": 0.0, - "learning_rate": 1.5519176679862224e-06, - "loss": 0.9652, + "learning_rate": 1.5718023706038399e-06, + "loss": 0.8053, "step": 29097 }, { - "epoch": 0.8257094211123723, + "epoch": 0.8245628949531015, "grad_norm": 0.0, - "learning_rate": 1.5514259333004744e-06, - "loss": 0.8575, + "learning_rate": 1.5713084556646318e-06, + "loss": 0.874, "step": 29098 }, { - "epoch": 0.8257377979568672, + "epoch": 0.824591232395364, "grad_norm": 0.0, - "learning_rate": 1.5509342699802132e-06, - "loss": 0.8064, + "learning_rate": 1.5708146117234225e-06, + "loss": 0.798, "step": 29099 }, { - "epoch": 0.825766174801362, + "epoch": 0.8246195698376264, "grad_norm": 0.0, - "learning_rate": 1.5504426780295877e-06, - "loss": 0.8168, + "learning_rate": 1.5703208387843737e-06, + "loss": 0.8972, "step": 29100 }, { - "epoch": 0.825794551645857, + "epoch": 0.8246479072798889, "grad_norm": 0.0, - "learning_rate": 1.5499511574527526e-06, - "loss": 0.8555, + "learning_rate": 1.5698271368516416e-06, + "loss": 0.6942, "step": 29101 }, { - "epoch": 0.8258229284903519, + "epoch": 0.8246762447221514, "grad_norm": 0.0, - "learning_rate": 1.549459708253863e-06, - "loss": 0.7931, + "learning_rate": 1.5693335059293845e-06, + "loss": 0.6882, "step": 29102 }, { - "epoch": 0.8258513053348467, + "epoch": 0.8247045821644139, "grad_norm": 0.0, - "learning_rate": 1.548968330437064e-06, - "loss": 0.8012, + "learning_rate": 1.5688399460217651e-06, + "loss": 0.8751, "step": 29103 }, { - "epoch": 0.8258796821793417, + "epoch": 0.8247329196066763, "grad_norm": 0.0, - "learning_rate": 1.5484770240065094e-06, - "loss": 0.8778, + "learning_rate": 1.5683464571329354e-06, + "loss": 0.8353, "step": 29104 }, { - "epoch": 0.8259080590238366, + "epoch": 0.8247612570489388, "grad_norm": 0.0, - "learning_rate": 1.5479857889663519e-06, - "loss": 0.8313, + "learning_rate": 1.5678530392670566e-06, + "loss": 0.8624, "step": 29105 }, { - "epoch": 0.8259364358683314, + "epoch": 0.8247895944912013, "grad_norm": 0.0, - "learning_rate": 1.5474946253207357e-06, - "loss": 0.7848, + "learning_rate": 1.5673596924282807e-06, + "loss": 0.808, "step": 29106 }, { - "epoch": 0.8259648127128263, + "epoch": 0.8248179319334636, "grad_norm": 0.0, - "learning_rate": 1.5470035330738153e-06, - "loss": 0.8329, + "learning_rate": 1.5668664166207647e-06, + "loss": 0.7996, "step": 29107 }, { - "epoch": 0.8259931895573213, + "epoch": 0.8248462693757261, "grad_norm": 0.0, - "learning_rate": 1.5465125122297342e-06, - "loss": 0.6738, + "learning_rate": 1.5663732118486653e-06, + "loss": 0.7492, "step": 29108 }, { - "epoch": 0.8260215664018161, + "epoch": 0.8248746068179886, "grad_norm": 0.0, - "learning_rate": 1.5460215627926411e-06, - "loss": 0.832, + "learning_rate": 1.5658800781161365e-06, + "loss": 0.6582, "step": 29109 }, { - "epoch": 0.826049943246311, + "epoch": 0.824902944260251, "grad_norm": 0.0, - "learning_rate": 1.5455306847666861e-06, - "loss": 0.818, + "learning_rate": 1.565387015427331e-06, + "loss": 0.7079, "step": 29110 }, { - "epoch": 0.826078320090806, + "epoch": 0.8249312817025135, "grad_norm": 0.0, - "learning_rate": 1.5450398781560083e-06, - "loss": 0.825, + "learning_rate": 1.5648940237864051e-06, + "loss": 0.852, "step": 29111 }, { - "epoch": 0.8261066969353008, + "epoch": 0.824959619144776, "grad_norm": 0.0, - "learning_rate": 1.5445491429647618e-06, - "loss": 0.8584, + "learning_rate": 1.564401103197507e-06, + "loss": 0.8451, "step": 29112 }, { - "epoch": 0.8261350737797957, + "epoch": 0.8249879565870385, "grad_norm": 0.0, - "learning_rate": 1.5440584791970914e-06, - "loss": 0.8386, + "learning_rate": 1.5639082536647931e-06, + "loss": 0.8469, "step": 29113 }, { - "epoch": 0.8261634506242905, + "epoch": 0.8250162940293009, "grad_norm": 0.0, - "learning_rate": 1.5435678868571369e-06, - "loss": 0.8187, + "learning_rate": 1.5634154751924102e-06, + "loss": 0.7623, "step": 29114 }, { - "epoch": 0.8261918274687855, + "epoch": 0.8250446314715634, "grad_norm": 0.0, - "learning_rate": 1.543077365949044e-06, - "loss": 0.7763, + "learning_rate": 1.5629227677845115e-06, + "loss": 0.9183, "step": 29115 }, { - "epoch": 0.8262202043132804, + "epoch": 0.8250729689138259, "grad_norm": 0.0, - "learning_rate": 1.5425869164769592e-06, - "loss": 0.7815, + "learning_rate": 1.5624301314452505e-06, + "loss": 0.7941, "step": 29116 }, { - "epoch": 0.8262485811577752, + "epoch": 0.8251013063560882, "grad_norm": 0.0, - "learning_rate": 1.5420965384450205e-06, - "loss": 0.7619, + "learning_rate": 1.5619375661787705e-06, + "loss": 0.7831, "step": 29117 }, { - "epoch": 0.8262769580022702, + "epoch": 0.8251296437983507, "grad_norm": 0.0, - "learning_rate": 1.541606231857372e-06, - "loss": 0.9126, + "learning_rate": 1.561445071989226e-06, + "loss": 0.8541, "step": 29118 }, { - "epoch": 0.826305334846765, + "epoch": 0.8251579812406132, "grad_norm": 0.0, - "learning_rate": 1.54111599671816e-06, - "loss": 0.7117, + "learning_rate": 1.5609526488807613e-06, + "loss": 0.8873, "step": 29119 }, { - "epoch": 0.8263337116912599, + "epoch": 0.8251863186828757, "grad_norm": 0.0, - "learning_rate": 1.5406258330315172e-06, - "loss": 0.7691, + "learning_rate": 1.5604602968575267e-06, + "loss": 0.9124, "step": 29120 }, { - "epoch": 0.8263620885357549, + "epoch": 0.8252146561251381, "grad_norm": 0.0, - "learning_rate": 1.5401357408015893e-06, - "loss": 0.8795, + "learning_rate": 1.5599680159236685e-06, + "loss": 0.7274, "step": 29121 }, { - "epoch": 0.8263904653802497, + "epoch": 0.8252429935674006, "grad_norm": 0.0, - "learning_rate": 1.5396457200325177e-06, - "loss": 0.8372, + "learning_rate": 1.5594758060833336e-06, + "loss": 0.8979, "step": 29122 }, { - "epoch": 0.8264188422247446, + "epoch": 0.8252713310096631, "grad_norm": 0.0, - "learning_rate": 1.5391557707284355e-06, - "loss": 0.8819, + "learning_rate": 1.5589836673406688e-06, + "loss": 0.8229, "step": 29123 }, { - "epoch": 0.8264472190692395, + "epoch": 0.8252996684519255, "grad_norm": 0.0, - "learning_rate": 1.538665892893486e-06, - "loss": 0.895, + "learning_rate": 1.5584915996998217e-06, + "loss": 0.7966, "step": 29124 }, { - "epoch": 0.8264755959137344, + "epoch": 0.825328005894188, "grad_norm": 0.0, - "learning_rate": 1.5381760865318073e-06, - "loss": 0.8488, + "learning_rate": 1.5579996031649314e-06, + "loss": 0.7104, "step": 29125 }, { - "epoch": 0.8265039727582293, + "epoch": 0.8253563433364505, "grad_norm": 0.0, - "learning_rate": 1.5376863516475339e-06, - "loss": 0.7115, + "learning_rate": 1.5575076777401477e-06, + "loss": 0.8544, "step": 29126 }, { - "epoch": 0.8265323496027241, + "epoch": 0.825384680778713, "grad_norm": 0.0, - "learning_rate": 1.537196688244804e-06, - "loss": 0.7165, + "learning_rate": 1.5570158234296096e-06, + "loss": 0.7505, "step": 29127 }, { - "epoch": 0.8265607264472191, + "epoch": 0.8254130182209753, "grad_norm": 0.0, - "learning_rate": 1.5367070963277553e-06, - "loss": 0.7131, + "learning_rate": 1.5565240402374625e-06, + "loss": 0.7462, "step": 29128 }, { - "epoch": 0.826589103291714, + "epoch": 0.8254413556632378, "grad_norm": 0.0, - "learning_rate": 1.5362175759005204e-06, - "loss": 0.8243, + "learning_rate": 1.5560323281678514e-06, + "loss": 0.8001, "step": 29129 }, { - "epoch": 0.8266174801362088, + "epoch": 0.8254696931055003, "grad_norm": 0.0, - "learning_rate": 1.5357281269672398e-06, - "loss": 0.801, + "learning_rate": 1.5555406872249134e-06, + "loss": 0.8008, "step": 29130 }, { - "epoch": 0.8266458569807037, + "epoch": 0.8254980305477627, "grad_norm": 0.0, - "learning_rate": 1.535238749532042e-06, - "loss": 0.836, + "learning_rate": 1.5550491174127913e-06, + "loss": 0.8218, "step": 29131 }, { - "epoch": 0.8266742338251987, + "epoch": 0.8255263679900252, "grad_norm": 0.0, - "learning_rate": 1.5347494435990617e-06, - "loss": 0.7942, + "learning_rate": 1.5545576187356292e-06, + "loss": 0.9484, "step": 29132 }, { - "epoch": 0.8267026106696935, + "epoch": 0.8255547054322877, "grad_norm": 0.0, - "learning_rate": 1.5342602091724367e-06, - "loss": 0.8047, + "learning_rate": 1.554066191197562e-06, + "loss": 0.7511, "step": 29133 }, { - "epoch": 0.8267309875141884, + "epoch": 0.8255830428745501, "grad_norm": 0.0, - "learning_rate": 1.5337710462562938e-06, - "loss": 0.7996, + "learning_rate": 1.5535748348027312e-06, + "loss": 0.81, "step": 29134 }, { - "epoch": 0.8267593643586834, + "epoch": 0.8256113803168126, "grad_norm": 0.0, - "learning_rate": 1.5332819548547672e-06, - "loss": 0.7778, + "learning_rate": 1.5530835495552764e-06, + "loss": 0.847, "step": 29135 }, { - "epoch": 0.8267877412031782, + "epoch": 0.8256397177590751, "grad_norm": 0.0, - "learning_rate": 1.5327929349719906e-06, - "loss": 0.7712, + "learning_rate": 1.5525923354593354e-06, + "loss": 0.7273, "step": 29136 }, { - "epoch": 0.8268161180476731, + "epoch": 0.8256680552013376, "grad_norm": 0.0, - "learning_rate": 1.5323039866120914e-06, - "loss": 0.7829, + "learning_rate": 1.5521011925190487e-06, + "loss": 0.8037, "step": 29137 }, { - "epoch": 0.8268444948921679, + "epoch": 0.8256963926436, "grad_norm": 0.0, - "learning_rate": 1.5318151097792e-06, - "loss": 0.8108, + "learning_rate": 1.5516101207385481e-06, + "loss": 0.7237, "step": 29138 }, { - "epoch": 0.8268728717366629, + "epoch": 0.8257247300858624, "grad_norm": 0.0, - "learning_rate": 1.5313263044774496e-06, - "loss": 0.8273, + "learning_rate": 1.5511191201219733e-06, + "loss": 0.7118, "step": 29139 }, { - "epoch": 0.8269012485811578, + "epoch": 0.8257530675281249, "grad_norm": 0.0, - "learning_rate": 1.5308375707109647e-06, - "loss": 0.8038, + "learning_rate": 1.550628190673461e-06, + "loss": 0.7528, "step": 29140 }, { - "epoch": 0.8269296254256526, + "epoch": 0.8257814049703873, "grad_norm": 0.0, - "learning_rate": 1.5303489084838751e-06, - "loss": 0.8676, + "learning_rate": 1.5501373323971436e-06, + "loss": 0.7278, "step": 29141 }, { - "epoch": 0.8269580022701476, + "epoch": 0.8258097424126498, "grad_norm": 0.0, - "learning_rate": 1.529860317800309e-06, - "loss": 0.8727, + "learning_rate": 1.5496465452971588e-06, + "loss": 0.8076, "step": 29142 }, { - "epoch": 0.8269863791146425, + "epoch": 0.8258380798549123, "grad_norm": 0.0, - "learning_rate": 1.5293717986643941e-06, - "loss": 0.8002, + "learning_rate": 1.5491558293776377e-06, + "loss": 0.8311, "step": 29143 }, { - "epoch": 0.8270147559591373, + "epoch": 0.8258664172971748, "grad_norm": 0.0, - "learning_rate": 1.5288833510802593e-06, - "loss": 0.7576, + "learning_rate": 1.5486651846427136e-06, + "loss": 0.8403, "step": 29144 }, { - "epoch": 0.8270431328036323, + "epoch": 0.8258947547394372, "grad_norm": 0.0, - "learning_rate": 1.5283949750520245e-06, - "loss": 0.8036, + "learning_rate": 1.5481746110965245e-06, + "loss": 0.847, "step": 29145 }, { - "epoch": 0.8270715096481271, + "epoch": 0.8259230921816997, "grad_norm": 0.0, - "learning_rate": 1.5279066705838175e-06, - "loss": 0.7793, + "learning_rate": 1.547684108743197e-06, + "loss": 0.8592, "step": 29146 }, { - "epoch": 0.827099886492622, + "epoch": 0.8259514296239622, "grad_norm": 0.0, - "learning_rate": 1.5274184376797662e-06, - "loss": 0.7839, + "learning_rate": 1.5471936775868645e-06, + "loss": 0.8078, "step": 29147 }, { - "epoch": 0.8271282633371169, + "epoch": 0.8259797670662246, "grad_norm": 0.0, - "learning_rate": 1.5269302763439907e-06, - "loss": 0.8159, + "learning_rate": 1.5467033176316581e-06, + "loss": 0.8993, "step": 29148 }, { - "epoch": 0.8271566401816118, + "epoch": 0.826008104508487, "grad_norm": 0.0, - "learning_rate": 1.5264421865806155e-06, - "loss": 0.833, + "learning_rate": 1.546213028881709e-06, + "loss": 0.78, "step": 29149 }, { - "epoch": 0.8271850170261067, + "epoch": 0.8260364419507495, "grad_norm": 0.0, - "learning_rate": 1.5259541683937673e-06, - "loss": 0.8456, + "learning_rate": 1.5457228113411492e-06, + "loss": 0.8136, "step": 29150 }, { - "epoch": 0.8272133938706016, + "epoch": 0.826064779393012, "grad_norm": 0.0, - "learning_rate": 1.5254662217875615e-06, - "loss": 0.839, + "learning_rate": 1.5452326650141036e-06, + "loss": 0.8547, "step": 29151 }, { - "epoch": 0.8272417707150965, + "epoch": 0.8260931168352744, "grad_norm": 0.0, - "learning_rate": 1.524978346766124e-06, - "loss": 0.8639, + "learning_rate": 1.544742589904703e-06, + "loss": 0.909, "step": 29152 }, { - "epoch": 0.8272701475595914, + "epoch": 0.8261214542775369, "grad_norm": 0.0, - "learning_rate": 1.5244905433335777e-06, - "loss": 0.6985, + "learning_rate": 1.5442525860170776e-06, + "loss": 0.8721, "step": 29153 }, { - "epoch": 0.8272985244040862, + "epoch": 0.8261497917197994, "grad_norm": 0.0, - "learning_rate": 1.5240028114940386e-06, - "loss": 0.7558, + "learning_rate": 1.5437626533553497e-06, + "loss": 0.848, "step": 29154 }, { - "epoch": 0.8273269012485811, + "epoch": 0.8261781291620618, "grad_norm": 0.0, - "learning_rate": 1.5235151512516288e-06, - "loss": 0.8657, + "learning_rate": 1.5432727919236513e-06, + "loss": 0.8619, "step": 29155 }, { - "epoch": 0.8273552780930761, + "epoch": 0.8262064666043243, "grad_norm": 0.0, - "learning_rate": 1.5230275626104707e-06, - "loss": 0.933, + "learning_rate": 1.5427830017261047e-06, + "loss": 0.7931, "step": 29156 }, { - "epoch": 0.8273836549375709, + "epoch": 0.8262348040465868, "grad_norm": 0.0, - "learning_rate": 1.522540045574674e-06, - "loss": 0.756, + "learning_rate": 1.542293282766838e-06, + "loss": 0.8697, "step": 29157 }, { - "epoch": 0.8274120317820658, + "epoch": 0.8262631414888492, "grad_norm": 0.0, - "learning_rate": 1.5220526001483671e-06, - "loss": 0.8681, + "learning_rate": 1.5418036350499766e-06, + "loss": 0.7746, "step": 29158 }, { - "epoch": 0.8274404086265608, + "epoch": 0.8262914789311117, "grad_norm": 0.0, - "learning_rate": 1.5215652263356618e-06, - "loss": 0.7711, + "learning_rate": 1.5413140585796426e-06, + "loss": 0.8353, "step": 29159 }, { - "epoch": 0.8274687854710556, + "epoch": 0.8263198163733741, "grad_norm": 0.0, - "learning_rate": 1.5210779241406747e-06, - "loss": 0.7861, + "learning_rate": 1.5408245533599608e-06, + "loss": 0.8408, "step": 29160 }, { - "epoch": 0.8274971623155505, + "epoch": 0.8263481538156366, "grad_norm": 0.0, - "learning_rate": 1.5205906935675274e-06, - "loss": 0.8875, + "learning_rate": 1.5403351193950554e-06, + "loss": 0.7492, "step": 29161 }, { - "epoch": 0.8275255391600455, + "epoch": 0.826376491257899, "grad_norm": 0.0, - "learning_rate": 1.5201035346203286e-06, - "loss": 0.7959, + "learning_rate": 1.539845756689049e-06, + "loss": 0.8481, "step": 29162 }, { - "epoch": 0.8275539160045403, + "epoch": 0.8264048287001615, "grad_norm": 0.0, - "learning_rate": 1.519616447303196e-06, - "loss": 0.8427, + "learning_rate": 1.5393564652460658e-06, + "loss": 0.9007, "step": 29163 }, { - "epoch": 0.8275822928490352, + "epoch": 0.826433166142424, "grad_norm": 0.0, - "learning_rate": 1.5191294316202476e-06, - "loss": 0.8673, + "learning_rate": 1.5388672450702214e-06, + "loss": 0.6985, "step": 29164 }, { - "epoch": 0.82761066969353, + "epoch": 0.8264615035846864, "grad_norm": 0.0, - "learning_rate": 1.518642487575591e-06, - "loss": 0.7657, + "learning_rate": 1.5383780961656414e-06, + "loss": 0.7687, "step": 29165 }, { - "epoch": 0.827639046538025, + "epoch": 0.8264898410269489, "grad_norm": 0.0, - "learning_rate": 1.5181556151733433e-06, - "loss": 0.7619, + "learning_rate": 1.537889018536447e-06, + "loss": 0.8342, "step": 29166 }, { - "epoch": 0.8276674233825199, + "epoch": 0.8265181784692114, "grad_norm": 0.0, - "learning_rate": 1.5176688144176188e-06, - "loss": 0.7523, + "learning_rate": 1.5374000121867527e-06, + "loss": 0.9037, "step": 29167 }, { - "epoch": 0.8276958002270147, + "epoch": 0.8265465159114739, "grad_norm": 0.0, - "learning_rate": 1.517182085312524e-06, - "loss": 0.7746, + "learning_rate": 1.5369110771206807e-06, + "loss": 0.8946, "step": 29168 }, { - "epoch": 0.8277241770715097, + "epoch": 0.8265748533537363, "grad_norm": 0.0, - "learning_rate": 1.5166954278621748e-06, - "loss": 0.8684, + "learning_rate": 1.5364222133423523e-06, + "loss": 0.7764, "step": 29169 }, { - "epoch": 0.8277525539160046, + "epoch": 0.8266031907959988, "grad_norm": 0.0, - "learning_rate": 1.5162088420706822e-06, - "loss": 0.8205, + "learning_rate": 1.5359334208558797e-06, + "loss": 0.7613, "step": 29170 }, { - "epoch": 0.8277809307604994, + "epoch": 0.8266315282382612, "grad_norm": 0.0, - "learning_rate": 1.5157223279421517e-06, - "loss": 0.7921, + "learning_rate": 1.535444699665386e-06, + "loss": 0.6652, "step": 29171 }, { - "epoch": 0.8278093076049943, + "epoch": 0.8266598656805236, "grad_norm": 0.0, - "learning_rate": 1.515235885480697e-06, - "loss": 0.751, + "learning_rate": 1.5349560497749816e-06, + "loss": 0.8198, "step": 29172 }, { - "epoch": 0.8278376844494892, + "epoch": 0.8266882031227861, "grad_norm": 0.0, - "learning_rate": 1.5147495146904268e-06, - "loss": 0.8762, + "learning_rate": 1.5344674711887864e-06, + "loss": 0.7459, "step": 29173 }, { - "epoch": 0.8278660612939841, + "epoch": 0.8267165405650486, "grad_norm": 0.0, - "learning_rate": 1.5142632155754478e-06, - "loss": 0.8101, + "learning_rate": 1.5339789639109148e-06, + "loss": 0.785, "step": 29174 }, { - "epoch": 0.827894438138479, + "epoch": 0.8267448780073111, "grad_norm": 0.0, - "learning_rate": 1.5137769881398722e-06, - "loss": 0.9117, + "learning_rate": 1.5334905279454826e-06, + "loss": 0.7225, "step": 29175 }, { - "epoch": 0.8279228149829739, + "epoch": 0.8267732154495735, "grad_norm": 0.0, - "learning_rate": 1.5132908323878004e-06, - "loss": 0.9326, + "learning_rate": 1.5330021632966052e-06, + "loss": 0.842, "step": 29176 }, { - "epoch": 0.8279511918274688, + "epoch": 0.826801552891836, "grad_norm": 0.0, - "learning_rate": 1.5128047483233432e-06, - "loss": 0.975, + "learning_rate": 1.5325138699683928e-06, + "loss": 0.7005, "step": 29177 }, { - "epoch": 0.8279795686719637, + "epoch": 0.8268298903340985, "grad_norm": 0.0, - "learning_rate": 1.5123187359506075e-06, - "loss": 0.791, + "learning_rate": 1.532025647964961e-06, + "loss": 0.7359, "step": 29178 }, { - "epoch": 0.8280079455164586, + "epoch": 0.8268582277763609, "grad_norm": 0.0, - "learning_rate": 1.5118327952736955e-06, - "loss": 0.9395, + "learning_rate": 1.531537497290424e-06, + "loss": 0.8873, "step": 29179 }, { - "epoch": 0.8280363223609535, + "epoch": 0.8268865652186234, "grad_norm": 0.0, - "learning_rate": 1.511346926296713e-06, - "loss": 0.8955, + "learning_rate": 1.5310494179488876e-06, + "loss": 0.7912, "step": 29180 }, { - "epoch": 0.8280646992054483, + "epoch": 0.8269149026608859, "grad_norm": 0.0, - "learning_rate": 1.5108611290237675e-06, - "loss": 0.8838, + "learning_rate": 1.5305614099444687e-06, + "loss": 0.8254, "step": 29181 }, { - "epoch": 0.8280930760499432, + "epoch": 0.8269432401031482, "grad_norm": 0.0, - "learning_rate": 1.5103754034589568e-06, - "loss": 0.8473, + "learning_rate": 1.5300734732812772e-06, + "loss": 0.8701, "step": 29182 }, { - "epoch": 0.8281214528944382, + "epoch": 0.8269715775454107, "grad_norm": 0.0, - "learning_rate": 1.509889749606387e-06, - "loss": 0.7682, + "learning_rate": 1.5295856079634196e-06, + "loss": 0.8143, "step": 29183 }, { - "epoch": 0.828149829738933, + "epoch": 0.8269999149876732, "grad_norm": 0.0, - "learning_rate": 1.509404167470162e-06, - "loss": 0.7976, + "learning_rate": 1.5290978139950108e-06, + "loss": 0.7733, "step": 29184 }, { - "epoch": 0.8281782065834279, + "epoch": 0.8270282524299357, "grad_norm": 0.0, - "learning_rate": 1.508918657054379e-06, - "loss": 0.8232, + "learning_rate": 1.5286100913801549e-06, + "loss": 0.7995, "step": 29185 }, { - "epoch": 0.8282065834279229, + "epoch": 0.8270565898721981, "grad_norm": 0.0, - "learning_rate": 1.5084332183631423e-06, - "loss": 0.8797, + "learning_rate": 1.5281224401229611e-06, + "loss": 0.8582, "step": 29186 }, { - "epoch": 0.8282349602724177, + "epoch": 0.8270849273144606, "grad_norm": 0.0, - "learning_rate": 1.507947851400554e-06, - "loss": 0.8142, + "learning_rate": 1.527634860227538e-06, + "loss": 0.8192, "step": 29187 }, { - "epoch": 0.8282633371169126, + "epoch": 0.8271132647567231, "grad_norm": 0.0, - "learning_rate": 1.507462556170708e-06, - "loss": 0.7945, + "learning_rate": 1.527147351697993e-06, + "loss": 0.6878, "step": 29188 }, { - "epoch": 0.8282917139614074, + "epoch": 0.8271416021989855, "grad_norm": 0.0, - "learning_rate": 1.5069773326777116e-06, - "loss": 0.8617, + "learning_rate": 1.526659914538432e-06, + "loss": 0.8349, "step": 29189 }, { - "epoch": 0.8283200908059024, + "epoch": 0.827169939641248, "grad_norm": 0.0, - "learning_rate": 1.5064921809256572e-06, - "loss": 0.8076, + "learning_rate": 1.5261725487529632e-06, + "loss": 0.8077, "step": 29190 }, { - "epoch": 0.8283484676503973, + "epoch": 0.8271982770835105, "grad_norm": 0.0, - "learning_rate": 1.5060071009186462e-06, - "loss": 0.8448, + "learning_rate": 1.5256852543456868e-06, + "loss": 0.7917, "step": 29191 }, { - "epoch": 0.8283768444948921, + "epoch": 0.827226614525773, "grad_norm": 0.0, - "learning_rate": 1.505522092660776e-06, - "loss": 0.844, + "learning_rate": 1.5251980313207138e-06, + "loss": 0.7822, "step": 29192 }, { - "epoch": 0.8284052213393871, + "epoch": 0.8272549519680353, "grad_norm": 0.0, - "learning_rate": 1.5050371561561405e-06, - "loss": 0.6641, + "learning_rate": 1.5247108796821418e-06, + "loss": 0.7814, "step": 29193 }, { - "epoch": 0.828433598183882, + "epoch": 0.8272832894102978, "grad_norm": 0.0, - "learning_rate": 1.5045522914088385e-06, - "loss": 0.7356, + "learning_rate": 1.5242237994340768e-06, + "loss": 0.8028, "step": 29194 }, { - "epoch": 0.8284619750283768, + "epoch": 0.8273116268525603, "grad_norm": 0.0, - "learning_rate": 1.5040674984229676e-06, - "loss": 0.7806, + "learning_rate": 1.5237367905806256e-06, + "loss": 0.8138, "step": 29195 }, { - "epoch": 0.8284903518728718, + "epoch": 0.8273399642948227, "grad_norm": 0.0, - "learning_rate": 1.5035827772026168e-06, - "loss": 0.7786, + "learning_rate": 1.5232498531258843e-06, + "loss": 0.7753, "step": 29196 }, { - "epoch": 0.8285187287173666, + "epoch": 0.8273683017370852, "grad_norm": 0.0, - "learning_rate": 1.5030981277518852e-06, - "loss": 0.8182, + "learning_rate": 1.522762987073957e-06, + "loss": 0.828, "step": 29197 }, { - "epoch": 0.8285471055618615, + "epoch": 0.8273966391793477, "grad_norm": 0.0, - "learning_rate": 1.5026135500748684e-06, - "loss": 0.8146, + "learning_rate": 1.5222761924289475e-06, + "loss": 0.8853, "step": 29198 }, { - "epoch": 0.8285754824063564, + "epoch": 0.8274249766216102, "grad_norm": 0.0, - "learning_rate": 1.5021290441756542e-06, - "loss": 0.8619, + "learning_rate": 1.521789469194952e-06, + "loss": 0.8439, "step": 29199 }, { - "epoch": 0.8286038592508513, + "epoch": 0.8274533140638726, "grad_norm": 0.0, - "learning_rate": 1.5016446100583381e-06, - "loss": 0.8061, + "learning_rate": 1.5213028173760713e-06, + "loss": 0.883, "step": 29200 }, { - "epoch": 0.8286322360953462, + "epoch": 0.8274816515061351, "grad_norm": 0.0, - "learning_rate": 1.5011602477270137e-06, - "loss": 0.7399, + "learning_rate": 1.5208162369764057e-06, + "loss": 0.7949, "step": 29201 }, { - "epoch": 0.8286606129398411, + "epoch": 0.8275099889483976, "grad_norm": 0.0, - "learning_rate": 1.5006759571857687e-06, - "loss": 0.8914, + "learning_rate": 1.520329728000054e-06, + "loss": 0.7894, "step": 29202 }, { - "epoch": 0.828688989784336, + "epoch": 0.8275383263906599, "grad_norm": 0.0, - "learning_rate": 1.5001917384386944e-06, - "loss": 0.7932, + "learning_rate": 1.519843290451115e-06, + "loss": 0.8647, "step": 29203 }, { - "epoch": 0.8287173666288309, + "epoch": 0.8275666638329224, "grad_norm": 0.0, - "learning_rate": 1.4997075914898863e-06, - "loss": 0.8943, + "learning_rate": 1.5193569243336836e-06, + "loss": 0.7841, "step": 29204 }, { - "epoch": 0.8287457434733257, + "epoch": 0.8275950012751849, "grad_norm": 0.0, - "learning_rate": 1.4992235163434288e-06, - "loss": 0.6832, + "learning_rate": 1.5188706296518607e-06, + "loss": 0.8262, "step": 29205 }, { - "epoch": 0.8287741203178206, + "epoch": 0.8276233387174473, "grad_norm": 0.0, - "learning_rate": 1.498739513003411e-06, - "loss": 0.7694, + "learning_rate": 1.5183844064097364e-06, + "loss": 0.7831, "step": 29206 }, { - "epoch": 0.8288024971623156, + "epoch": 0.8276516761597098, "grad_norm": 0.0, - "learning_rate": 1.4982555814739264e-06, - "loss": 0.8041, + "learning_rate": 1.5178982546114107e-06, + "loss": 0.7076, "step": 29207 }, { - "epoch": 0.8288308740068104, + "epoch": 0.8276800136019723, "grad_norm": 0.0, - "learning_rate": 1.497771721759056e-06, - "loss": 0.7494, + "learning_rate": 1.5174121742609804e-06, + "loss": 0.8449, "step": 29208 }, { - "epoch": 0.8288592508513053, + "epoch": 0.8277083510442348, "grad_norm": 0.0, - "learning_rate": 1.4972879338628909e-06, - "loss": 0.8268, + "learning_rate": 1.5169261653625345e-06, + "loss": 0.8502, "step": 29209 }, { - "epoch": 0.8288876276958003, + "epoch": 0.8277366884864972, "grad_norm": 0.0, - "learning_rate": 1.496804217789518e-06, - "loss": 0.8832, + "learning_rate": 1.5164402279201695e-06, + "loss": 0.7338, "step": 29210 }, { - "epoch": 0.8289160045402951, + "epoch": 0.8277650259287597, "grad_norm": 0.0, - "learning_rate": 1.496320573543021e-06, - "loss": 0.805, + "learning_rate": 1.5159543619379803e-06, + "loss": 0.8797, "step": 29211 }, { - "epoch": 0.82894438138479, + "epoch": 0.8277933633710222, "grad_norm": 0.0, - "learning_rate": 1.4958370011274859e-06, - "loss": 0.8732, + "learning_rate": 1.5154685674200565e-06, + "loss": 0.8089, "step": 29212 }, { - "epoch": 0.828972758229285, + "epoch": 0.8278217008132845, "grad_norm": 0.0, - "learning_rate": 1.495353500547e-06, - "loss": 0.7659, + "learning_rate": 1.5149828443704917e-06, + "loss": 0.7857, "step": 29213 }, { - "epoch": 0.8290011350737798, + "epoch": 0.827850038255547, "grad_norm": 0.0, - "learning_rate": 1.494870071805643e-06, - "loss": 0.8674, + "learning_rate": 1.514497192793377e-06, + "loss": 0.8117, "step": 29214 }, { - "epoch": 0.8290295119182747, + "epoch": 0.8278783756978095, "grad_norm": 0.0, - "learning_rate": 1.4943867149075032e-06, - "loss": 0.8346, + "learning_rate": 1.514011612692804e-06, + "loss": 0.8171, "step": 29215 }, { - "epoch": 0.8290578887627695, + "epoch": 0.827906713140072, "grad_norm": 0.0, - "learning_rate": 1.4939034298566591e-06, - "loss": 0.7918, + "learning_rate": 1.5135261040728643e-06, + "loss": 0.807, "step": 29216 }, { - "epoch": 0.8290862656072645, + "epoch": 0.8279350505823344, "grad_norm": 0.0, - "learning_rate": 1.4934202166571953e-06, - "loss": 0.8076, + "learning_rate": 1.513040666937643e-06, + "loss": 0.7568, "step": 29217 }, { - "epoch": 0.8291146424517594, + "epoch": 0.8279633880245969, "grad_norm": 0.0, - "learning_rate": 1.4929370753131956e-06, - "loss": 0.7627, + "learning_rate": 1.512555301291232e-06, + "loss": 0.7379, "step": 29218 }, { - "epoch": 0.8291430192962542, + "epoch": 0.8279917254668594, "grad_norm": 0.0, - "learning_rate": 1.492454005828734e-06, - "loss": 0.7982, + "learning_rate": 1.5120700071377215e-06, + "loss": 0.8669, "step": 29219 }, { - "epoch": 0.8291713961407492, + "epoch": 0.8280200629091218, "grad_norm": 0.0, - "learning_rate": 1.4919710082078976e-06, - "loss": 0.8287, + "learning_rate": 1.511584784481196e-06, + "loss": 0.7588, "step": 29220 }, { - "epoch": 0.829199772985244, + "epoch": 0.8280484003513843, "grad_norm": 0.0, - "learning_rate": 1.4914880824547673e-06, - "loss": 0.8197, + "learning_rate": 1.5110996333257454e-06, + "loss": 0.7675, "step": 29221 }, { - "epoch": 0.8292281498297389, + "epoch": 0.8280767377936468, "grad_norm": 0.0, - "learning_rate": 1.4910052285734177e-06, - "loss": 0.6845, + "learning_rate": 1.5106145536754524e-06, + "loss": 0.798, "step": 29222 }, { - "epoch": 0.8292565266742338, + "epoch": 0.8281050752359093, "grad_norm": 0.0, - "learning_rate": 1.490522446567929e-06, - "loss": 0.7986, + "learning_rate": 1.5101295455344057e-06, + "loss": 0.7751, "step": 29223 }, { - "epoch": 0.8292849035187287, + "epoch": 0.8281334126781716, "grad_norm": 0.0, - "learning_rate": 1.4900397364423825e-06, - "loss": 0.9353, + "learning_rate": 1.509644608906693e-06, + "loss": 0.744, "step": 29224 }, { - "epoch": 0.8293132803632236, + "epoch": 0.8281617501204341, "grad_norm": 0.0, - "learning_rate": 1.4895570982008511e-06, - "loss": 0.8008, + "learning_rate": 1.5091597437963934e-06, + "loss": 0.768, "step": 29225 }, { - "epoch": 0.8293416572077185, + "epoch": 0.8281900875626966, "grad_norm": 0.0, - "learning_rate": 1.489074531847412e-06, - "loss": 0.7327, + "learning_rate": 1.5086749502075949e-06, + "loss": 0.7774, "step": 29226 }, { - "epoch": 0.8293700340522134, + "epoch": 0.828218425004959, "grad_norm": 0.0, - "learning_rate": 1.4885920373861463e-06, - "loss": 0.8394, + "learning_rate": 1.5081902281443805e-06, + "loss": 0.8669, "step": 29227 }, { - "epoch": 0.8293984108967083, + "epoch": 0.8282467624472215, "grad_norm": 0.0, - "learning_rate": 1.4881096148211238e-06, - "loss": 0.7036, + "learning_rate": 1.507705577610833e-06, + "loss": 0.8398, "step": 29228 }, { - "epoch": 0.8294267877412032, + "epoch": 0.828275099889484, "grad_norm": 0.0, - "learning_rate": 1.4876272641564215e-06, - "loss": 0.8692, + "learning_rate": 1.5072209986110376e-06, + "loss": 0.7038, "step": 29229 }, { - "epoch": 0.8294551645856981, + "epoch": 0.8283034373317464, "grad_norm": 0.0, - "learning_rate": 1.4871449853961172e-06, - "loss": 0.7644, + "learning_rate": 1.5067364911490713e-06, + "loss": 0.8438, "step": 29230 }, { - "epoch": 0.829483541430193, + "epoch": 0.8283317747740089, "grad_norm": 0.0, - "learning_rate": 1.4866627785442788e-06, - "loss": 0.8317, + "learning_rate": 1.506252055229016e-06, + "loss": 0.7686, "step": 29231 }, { - "epoch": 0.8295119182746878, + "epoch": 0.8283601122162714, "grad_norm": 0.0, - "learning_rate": 1.486180643604983e-06, - "loss": 0.7725, + "learning_rate": 1.505767690854958e-06, + "loss": 0.8237, "step": 29232 }, { - "epoch": 0.8295402951191827, + "epoch": 0.8283884496585339, "grad_norm": 0.0, - "learning_rate": 1.4856985805823055e-06, - "loss": 0.7886, + "learning_rate": 1.505283398030969e-06, + "loss": 0.7595, "step": 29233 }, { - "epoch": 0.8295686719636777, + "epoch": 0.8284167871007962, "grad_norm": 0.0, - "learning_rate": 1.4852165894803083e-06, - "loss": 0.9008, + "learning_rate": 1.504799176761136e-06, + "loss": 0.8572, "step": 29234 }, { - "epoch": 0.8295970488081725, + "epoch": 0.8284451245430587, "grad_norm": 0.0, - "learning_rate": 1.484734670303075e-06, - "loss": 0.7793, + "learning_rate": 1.504315027049531e-06, + "loss": 0.8026, "step": 29235 }, { - "epoch": 0.8296254256526674, + "epoch": 0.8284734619853212, "grad_norm": 0.0, - "learning_rate": 1.4842528230546681e-06, - "loss": 0.8347, + "learning_rate": 1.5038309489002357e-06, + "loss": 0.7882, "step": 29236 }, { - "epoch": 0.8296538024971624, + "epoch": 0.8285017994275836, "grad_norm": 0.0, - "learning_rate": 1.483771047739161e-06, - "loss": 0.7517, + "learning_rate": 1.5033469423173298e-06, + "loss": 0.7784, "step": 29237 }, { - "epoch": 0.8296821793416572, + "epoch": 0.8285301368698461, "grad_norm": 0.0, - "learning_rate": 1.4832893443606244e-06, - "loss": 0.736, + "learning_rate": 1.5028630073048844e-06, + "loss": 0.8146, "step": 29238 }, { - "epoch": 0.8297105561861521, + "epoch": 0.8285584743121086, "grad_norm": 0.0, - "learning_rate": 1.4828077129231245e-06, - "loss": 0.6394, + "learning_rate": 1.50237914386698e-06, + "loss": 0.8599, "step": 29239 }, { - "epoch": 0.8297389330306469, + "epoch": 0.8285868117543711, "grad_norm": 0.0, - "learning_rate": 1.4823261534307286e-06, - "loss": 0.8342, + "learning_rate": 1.5018953520076917e-06, + "loss": 0.7885, "step": 29240 }, { - "epoch": 0.8297673098751419, + "epoch": 0.8286151491966335, "grad_norm": 0.0, - "learning_rate": 1.4818446658875108e-06, - "loss": 0.9032, + "learning_rate": 1.5014116317310946e-06, + "loss": 0.7833, "step": 29241 }, { - "epoch": 0.8297956867196368, + "epoch": 0.828643486638896, "grad_norm": 0.0, - "learning_rate": 1.4813632502975305e-06, - "loss": 0.9419, + "learning_rate": 1.5009279830412648e-06, + "loss": 0.7657, "step": 29242 }, { - "epoch": 0.8298240635641316, + "epoch": 0.8286718240811585, "grad_norm": 0.0, - "learning_rate": 1.4808819066648583e-06, - "loss": 0.7744, + "learning_rate": 1.500444405942273e-06, + "loss": 0.7644, "step": 29243 }, { - "epoch": 0.8298524404086266, + "epoch": 0.8287001615234209, "grad_norm": 0.0, - "learning_rate": 1.4804006349935618e-06, - "loss": 0.8801, + "learning_rate": 1.4999609004381944e-06, + "loss": 0.7538, "step": 29244 }, { - "epoch": 0.8298808172531215, + "epoch": 0.8287284989656833, "grad_norm": 0.0, - "learning_rate": 1.4799194352877023e-06, - "loss": 0.7926, + "learning_rate": 1.4994774665331035e-06, + "loss": 0.7345, "step": 29245 }, { - "epoch": 0.8299091940976163, + "epoch": 0.8287568364079458, "grad_norm": 0.0, - "learning_rate": 1.4794383075513453e-06, - "loss": 0.7788, + "learning_rate": 1.4989941042310684e-06, + "loss": 0.7792, "step": 29246 }, { - "epoch": 0.8299375709421113, + "epoch": 0.8287851738502083, "grad_norm": 0.0, - "learning_rate": 1.4789572517885586e-06, - "loss": 0.8583, + "learning_rate": 1.4985108135361626e-06, + "loss": 0.7764, "step": 29247 }, { - "epoch": 0.8299659477866062, + "epoch": 0.8288135112924707, "grad_norm": 0.0, - "learning_rate": 1.4784762680034015e-06, - "loss": 0.8179, + "learning_rate": 1.4980275944524592e-06, + "loss": 0.8219, "step": 29248 }, { - "epoch": 0.829994324631101, + "epoch": 0.8288418487347332, "grad_norm": 0.0, - "learning_rate": 1.4779953561999372e-06, - "loss": 0.7907, + "learning_rate": 1.497544446984024e-06, + "loss": 0.8082, "step": 29249 }, { - "epoch": 0.8300227014755959, + "epoch": 0.8288701861769957, "grad_norm": 0.0, - "learning_rate": 1.4775145163822302e-06, - "loss": 0.806, + "learning_rate": 1.4970613711349325e-06, + "loss": 0.8506, "step": 29250 }, { - "epoch": 0.8300510783200908, + "epoch": 0.8288985236192581, "grad_norm": 0.0, - "learning_rate": 1.4770337485543407e-06, - "loss": 0.7764, + "learning_rate": 1.4965783669092472e-06, + "loss": 0.7972, "step": 29251 }, { - "epoch": 0.8300794551645857, + "epoch": 0.8289268610615206, "grad_norm": 0.0, - "learning_rate": 1.476553052720333e-06, - "loss": 0.8272, + "learning_rate": 1.4960954343110412e-06, + "loss": 0.7655, "step": 29252 }, { - "epoch": 0.8301078320090806, + "epoch": 0.8289551985037831, "grad_norm": 0.0, - "learning_rate": 1.476072428884262e-06, - "loss": 0.8484, + "learning_rate": 1.4956125733443804e-06, + "loss": 0.8387, "step": 29253 }, { - "epoch": 0.8301362088535755, + "epoch": 0.8289835359460455, "grad_norm": 0.0, - "learning_rate": 1.475591877050191e-06, - "loss": 0.7674, + "learning_rate": 1.4951297840133326e-06, + "loss": 0.7854, "step": 29254 }, { - "epoch": 0.8301645856980704, + "epoch": 0.829011873388308, "grad_norm": 0.0, - "learning_rate": 1.4751113972221797e-06, - "loss": 0.8598, + "learning_rate": 1.4946470663219647e-06, + "loss": 0.7955, "step": 29255 }, { - "epoch": 0.8301929625425652, + "epoch": 0.8290402108305704, "grad_norm": 0.0, - "learning_rate": 1.4746309894042843e-06, - "loss": 0.7953, + "learning_rate": 1.4941644202743467e-06, + "loss": 0.7975, "step": 29256 }, { - "epoch": 0.8302213393870601, + "epoch": 0.8290685482728329, "grad_norm": 0.0, - "learning_rate": 1.4741506536005646e-06, - "loss": 0.8178, + "learning_rate": 1.4936818458745373e-06, + "loss": 0.8363, "step": 29257 }, { - "epoch": 0.8302497162315551, + "epoch": 0.8290968857150953, "grad_norm": 0.0, - "learning_rate": 1.4736703898150795e-06, - "loss": 0.7628, + "learning_rate": 1.4931993431266056e-06, + "loss": 0.8467, "step": 29258 }, { - "epoch": 0.8302780930760499, + "epoch": 0.8291252231573578, "grad_norm": 0.0, - "learning_rate": 1.4731901980518816e-06, - "loss": 0.782, + "learning_rate": 1.492716912034614e-06, + "loss": 0.7933, "step": 29259 }, { - "epoch": 0.8303064699205448, + "epoch": 0.8291535605996203, "grad_norm": 0.0, - "learning_rate": 1.4727100783150283e-06, - "loss": 0.8397, + "learning_rate": 1.492234552602626e-06, + "loss": 0.7747, "step": 29260 }, { - "epoch": 0.8303348467650398, + "epoch": 0.8291818980418827, "grad_norm": 0.0, - "learning_rate": 1.4722300306085802e-06, - "loss": 0.8536, + "learning_rate": 1.4917522648347083e-06, + "loss": 0.7702, "step": 29261 }, { - "epoch": 0.8303632236095346, + "epoch": 0.8292102354841452, "grad_norm": 0.0, - "learning_rate": 1.4717500549365848e-06, - "loss": 0.8073, + "learning_rate": 1.4912700487349186e-06, + "loss": 0.8773, "step": 29262 }, { - "epoch": 0.8303916004540295, + "epoch": 0.8292385729264077, "grad_norm": 0.0, - "learning_rate": 1.4712701513031015e-06, - "loss": 0.9244, + "learning_rate": 1.4907879043073236e-06, + "loss": 0.8418, "step": 29263 }, { - "epoch": 0.8304199772985245, + "epoch": 0.8292669103686702, "grad_norm": 0.0, - "learning_rate": 1.470790319712183e-06, - "loss": 0.8706, + "learning_rate": 1.4903058315559783e-06, + "loss": 0.8942, "step": 29264 }, { - "epoch": 0.8304483541430193, + "epoch": 0.8292952478109326, "grad_norm": 0.0, - "learning_rate": 1.470310560167879e-06, - "loss": 0.8921, + "learning_rate": 1.4898238304849477e-06, + "loss": 0.7672, "step": 29265 }, { - "epoch": 0.8304767309875142, + "epoch": 0.829323585253195, "grad_norm": 0.0, - "learning_rate": 1.4698308726742493e-06, - "loss": 0.8258, + "learning_rate": 1.4893419010982913e-06, + "loss": 0.7584, "step": 29266 }, { - "epoch": 0.830505107832009, + "epoch": 0.8293519226954575, "grad_norm": 0.0, - "learning_rate": 1.4693512572353396e-06, - "loss": 0.8695, + "learning_rate": 1.4888600434000688e-06, + "loss": 0.8304, "step": 29267 }, { - "epoch": 0.830533484676504, + "epoch": 0.8293802601377199, "grad_norm": 0.0, - "learning_rate": 1.4688717138552032e-06, - "loss": 0.793, + "learning_rate": 1.4883782573943383e-06, + "loss": 0.9331, "step": 29268 }, { - "epoch": 0.8305618615209989, + "epoch": 0.8294085975799824, "grad_norm": 0.0, - "learning_rate": 1.4683922425378926e-06, - "loss": 0.8095, + "learning_rate": 1.4878965430851612e-06, + "loss": 0.7614, "step": 29269 }, { - "epoch": 0.8305902383654937, + "epoch": 0.8294369350222449, "grad_norm": 0.0, - "learning_rate": 1.4679128432874545e-06, - "loss": 0.8499, + "learning_rate": 1.4874149004765892e-06, + "loss": 0.8105, "step": 29270 }, { - "epoch": 0.8306186152099887, + "epoch": 0.8294652724645074, "grad_norm": 0.0, - "learning_rate": 1.4674335161079401e-06, - "loss": 0.7407, + "learning_rate": 1.4869333295726851e-06, + "loss": 0.7542, "step": 29271 }, { - "epoch": 0.8306469920544836, + "epoch": 0.8294936099067698, "grad_norm": 0.0, - "learning_rate": 1.4669542610034016e-06, - "loss": 0.8009, + "learning_rate": 1.486451830377501e-06, + "loss": 0.8113, "step": 29272 }, { - "epoch": 0.8306753688989784, + "epoch": 0.8295219473490323, "grad_norm": 0.0, - "learning_rate": 1.4664750779778802e-06, - "loss": 0.7857, + "learning_rate": 1.4859704028950938e-06, + "loss": 0.7119, "step": 29273 }, { - "epoch": 0.8307037457434733, + "epoch": 0.8295502847912948, "grad_norm": 0.0, - "learning_rate": 1.4659959670354285e-06, - "loss": 0.8186, + "learning_rate": 1.4854890471295225e-06, + "loss": 0.7828, "step": 29274 }, { - "epoch": 0.8307321225879682, + "epoch": 0.8295786222335572, "grad_norm": 0.0, - "learning_rate": 1.465516928180094e-06, - "loss": 0.8132, + "learning_rate": 1.485007763084836e-06, + "loss": 0.7729, "step": 29275 }, { - "epoch": 0.8307604994324631, + "epoch": 0.8296069596758197, "grad_norm": 0.0, - "learning_rate": 1.4650379614159193e-06, - "loss": 0.823, + "learning_rate": 1.4845265507650909e-06, + "loss": 0.8795, "step": 29276 }, { - "epoch": 0.830788876276958, + "epoch": 0.8296352971180821, "grad_norm": 0.0, - "learning_rate": 1.4645590667469533e-06, - "loss": 0.691, + "learning_rate": 1.484045410174344e-06, + "loss": 0.8651, "step": 29277 }, { - "epoch": 0.8308172531214529, + "epoch": 0.8296636345603445, "grad_norm": 0.0, - "learning_rate": 1.464080244177243e-06, - "loss": 0.8671, + "learning_rate": 1.4835643413166423e-06, + "loss": 0.7552, "step": 29278 }, { - "epoch": 0.8308456299659478, + "epoch": 0.829691972002607, "grad_norm": 0.0, - "learning_rate": 1.4636014937108278e-06, - "loss": 0.7085, + "learning_rate": 1.4830833441960402e-06, + "loss": 0.7485, "step": 29279 }, { - "epoch": 0.8308740068104427, + "epoch": 0.8297203094448695, "grad_norm": 0.0, - "learning_rate": 1.463122815351755e-06, - "loss": 0.7118, + "learning_rate": 1.48260241881659e-06, + "loss": 0.783, "step": 29280 }, { - "epoch": 0.8309023836549376, + "epoch": 0.829748646887132, "grad_norm": 0.0, - "learning_rate": 1.462644209104067e-06, - "loss": 0.7978, + "learning_rate": 1.4821215651823418e-06, + "loss": 0.8244, "step": 29281 }, { - "epoch": 0.8309307604994325, + "epoch": 0.8297769843293944, "grad_norm": 0.0, - "learning_rate": 1.4621656749718072e-06, - "loss": 0.8301, + "learning_rate": 1.48164078329735e-06, + "loss": 0.8198, "step": 29282 }, { - "epoch": 0.8309591373439273, + "epoch": 0.8298053217716569, "grad_norm": 0.0, - "learning_rate": 1.46168721295902e-06, - "loss": 0.9693, + "learning_rate": 1.4811600731656583e-06, + "loss": 0.7727, "step": 29283 }, { - "epoch": 0.8309875141884222, + "epoch": 0.8298336592139194, "grad_norm": 0.0, - "learning_rate": 1.461208823069743e-06, - "loss": 0.8328, + "learning_rate": 1.480679434791321e-06, + "loss": 0.7371, "step": 29284 }, { - "epoch": 0.8310158910329172, + "epoch": 0.8298619966561818, "grad_norm": 0.0, - "learning_rate": 1.4607305053080179e-06, - "loss": 0.8111, + "learning_rate": 1.480198868178383e-06, + "loss": 0.8564, "step": 29285 }, { - "epoch": 0.831044267877412, + "epoch": 0.8298903340984443, "grad_norm": 0.0, - "learning_rate": 1.4602522596778889e-06, - "loss": 0.773, + "learning_rate": 1.4797183733308928e-06, + "loss": 0.9831, "step": 29286 }, { - "epoch": 0.8310726447219069, + "epoch": 0.8299186715407068, "grad_norm": 0.0, - "learning_rate": 1.4597740861833909e-06, - "loss": 0.8232, + "learning_rate": 1.479237950252901e-06, + "loss": 0.787, "step": 29287 }, { - "epoch": 0.8311010215664019, + "epoch": 0.8299470089829692, "grad_norm": 0.0, - "learning_rate": 1.4592959848285649e-06, - "loss": 0.8847, + "learning_rate": 1.4787575989484504e-06, + "loss": 0.6696, "step": 29288 }, { - "epoch": 0.8311293984108967, + "epoch": 0.8299753464252316, "grad_norm": 0.0, - "learning_rate": 1.458817955617452e-06, - "loss": 0.9457, + "learning_rate": 1.4782773194215883e-06, + "loss": 0.7077, "step": 29289 }, { - "epoch": 0.8311577752553916, + "epoch": 0.8300036838674941, "grad_norm": 0.0, - "learning_rate": 1.4583399985540859e-06, - "loss": 0.7829, + "learning_rate": 1.4777971116763622e-06, + "loss": 0.8645, "step": 29290 }, { - "epoch": 0.8311861520998864, + "epoch": 0.8300320213097566, "grad_norm": 0.0, - "learning_rate": 1.4578621136425053e-06, - "loss": 0.9142, + "learning_rate": 1.4773169757168148e-06, + "loss": 0.7288, "step": 29291 }, { - "epoch": 0.8312145289443814, + "epoch": 0.830060358752019, "grad_norm": 0.0, - "learning_rate": 1.4573843008867495e-06, - "loss": 0.7511, + "learning_rate": 1.4768369115469905e-06, + "loss": 0.8839, "step": 29292 }, { - "epoch": 0.8312429057888763, + "epoch": 0.8300886961942815, "grad_norm": 0.0, - "learning_rate": 1.4569065602908506e-06, - "loss": 0.8209, + "learning_rate": 1.476356919170935e-06, + "loss": 0.8859, "step": 29293 }, { - "epoch": 0.8312712826333711, + "epoch": 0.830117033636544, "grad_norm": 0.0, - "learning_rate": 1.4564288918588465e-06, - "loss": 0.7423, + "learning_rate": 1.475876998592689e-06, + "loss": 0.7914, "step": 29294 }, { - "epoch": 0.8312996594778661, + "epoch": 0.8301453710788065, "grad_norm": 0.0, - "learning_rate": 1.4559512955947708e-06, - "loss": 0.8434, + "learning_rate": 1.4753971498162988e-06, + "loss": 0.7114, "step": 29295 }, { - "epoch": 0.831328036322361, + "epoch": 0.8301737085210689, "grad_norm": 0.0, - "learning_rate": 1.455473771502659e-06, - "loss": 0.8957, + "learning_rate": 1.4749173728458022e-06, + "loss": 0.7484, "step": 29296 }, { - "epoch": 0.8313564131668558, + "epoch": 0.8302020459633314, "grad_norm": 0.0, - "learning_rate": 1.454996319586547e-06, - "loss": 0.7962, + "learning_rate": 1.4744376676852424e-06, + "loss": 0.8763, "step": 29297 }, { - "epoch": 0.8313847900113507, + "epoch": 0.8302303834055939, "grad_norm": 0.0, - "learning_rate": 1.4545189398504622e-06, - "loss": 0.8168, + "learning_rate": 1.4739580343386628e-06, + "loss": 0.8136, "step": 29298 }, { - "epoch": 0.8314131668558457, + "epoch": 0.8302587208478562, "grad_norm": 0.0, - "learning_rate": 1.4540416322984396e-06, - "loss": 0.8021, + "learning_rate": 1.473478472810097e-06, + "loss": 0.7836, "step": 29299 }, { - "epoch": 0.8314415437003405, + "epoch": 0.8302870582901187, "grad_norm": 0.0, - "learning_rate": 1.4535643969345149e-06, - "loss": 0.9205, + "learning_rate": 1.4729989831035918e-06, + "loss": 0.8198, "step": 29300 }, { - "epoch": 0.8314699205448354, + "epoch": 0.8303153957323812, "grad_norm": 0.0, - "learning_rate": 1.4530872337627132e-06, - "loss": 0.8126, + "learning_rate": 1.4725195652231794e-06, + "loss": 0.7654, "step": 29301 }, { - "epoch": 0.8314982973893303, + "epoch": 0.8303437331746436, "grad_norm": 0.0, - "learning_rate": 1.4526101427870675e-06, - "loss": 0.8159, + "learning_rate": 1.4720402191729022e-06, + "loss": 0.733, "step": 29302 }, { - "epoch": 0.8315266742338252, + "epoch": 0.8303720706169061, "grad_norm": 0.0, - "learning_rate": 1.452133124011611e-06, - "loss": 0.8326, + "learning_rate": 1.471560944956798e-06, + "loss": 0.7621, "step": 29303 }, { - "epoch": 0.8315550510783201, + "epoch": 0.8304004080591686, "grad_norm": 0.0, - "learning_rate": 1.4516561774403682e-06, - "loss": 0.8353, + "learning_rate": 1.4710817425789015e-06, + "loss": 0.7714, "step": 29304 }, { - "epoch": 0.831583427922815, + "epoch": 0.8304287455014311, "grad_norm": 0.0, - "learning_rate": 1.451179303077369e-06, - "loss": 0.8407, + "learning_rate": 1.4706026120432505e-06, + "loss": 0.7513, "step": 29305 }, { - "epoch": 0.8316118047673099, + "epoch": 0.8304570829436935, "grad_norm": 0.0, - "learning_rate": 1.450702500926645e-06, - "loss": 0.7386, + "learning_rate": 1.4701235533538816e-06, + "loss": 0.763, "step": 29306 }, { - "epoch": 0.8316401816118048, + "epoch": 0.830485420385956, "grad_norm": 0.0, - "learning_rate": 1.4502257709922197e-06, - "loss": 0.7662, + "learning_rate": 1.4696445665148285e-06, + "loss": 0.706, "step": 29307 }, { - "epoch": 0.8316685584562996, + "epoch": 0.8305137578282185, "grad_norm": 0.0, - "learning_rate": 1.4497491132781216e-06, - "loss": 0.8603, + "learning_rate": 1.46916565153013e-06, + "loss": 0.7857, "step": 29308 }, { - "epoch": 0.8316969353007946, + "epoch": 0.8305420952704808, "grad_norm": 0.0, - "learning_rate": 1.4492725277883791e-06, - "loss": 0.9175, + "learning_rate": 1.468686808403814e-06, + "loss": 0.8079, "step": 29309 }, { - "epoch": 0.8317253121452894, + "epoch": 0.8305704327127433, "grad_norm": 0.0, - "learning_rate": 1.448796014527013e-06, - "loss": 0.8099, + "learning_rate": 1.4682080371399176e-06, + "loss": 0.9454, "step": 29310 }, { - "epoch": 0.8317536889897843, + "epoch": 0.8305987701550058, "grad_norm": 0.0, - "learning_rate": 1.4483195734980504e-06, - "loss": 0.8335, + "learning_rate": 1.4677293377424752e-06, + "loss": 0.7587, "step": 29311 }, { - "epoch": 0.8317820658342793, + "epoch": 0.8306271075972683, "grad_norm": 0.0, - "learning_rate": 1.4478432047055202e-06, - "loss": 0.8372, + "learning_rate": 1.4672507102155153e-06, + "loss": 0.8113, "step": 29312 }, { - "epoch": 0.8318104426787741, + "epoch": 0.8306554450395307, "grad_norm": 0.0, - "learning_rate": 1.4473669081534414e-06, - "loss": 0.8638, + "learning_rate": 1.466772154563073e-06, + "loss": 0.7467, "step": 29313 }, { - "epoch": 0.831838819523269, + "epoch": 0.8306837824817932, "grad_norm": 0.0, - "learning_rate": 1.4468906838458385e-06, - "loss": 0.8816, + "learning_rate": 1.466293670789175e-06, + "loss": 0.9363, "step": 29314 }, { - "epoch": 0.8318671963677639, + "epoch": 0.8307121199240557, "grad_norm": 0.0, - "learning_rate": 1.4464145317867372e-06, - "loss": 0.8131, + "learning_rate": 1.4658152588978548e-06, + "loss": 0.7047, "step": 29315 }, { - "epoch": 0.8318955732122588, + "epoch": 0.8307404573663181, "grad_norm": 0.0, - "learning_rate": 1.4459384519801533e-06, - "loss": 0.8522, + "learning_rate": 1.465336918893141e-06, + "loss": 0.7322, "step": 29316 }, { - "epoch": 0.8319239500567537, + "epoch": 0.8307687948085806, "grad_norm": 0.0, - "learning_rate": 1.4454624444301135e-06, - "loss": 0.8404, + "learning_rate": 1.4648586507790663e-06, + "loss": 0.7932, "step": 29317 }, { - "epoch": 0.8319523269012485, + "epoch": 0.8307971322508431, "grad_norm": 0.0, - "learning_rate": 1.444986509140638e-06, - "loss": 0.7387, + "learning_rate": 1.4643804545596551e-06, + "loss": 0.7002, "step": 29318 }, { - "epoch": 0.8319807037457435, + "epoch": 0.8308254696931054, "grad_norm": 0.0, - "learning_rate": 1.4445106461157443e-06, - "loss": 0.8555, + "learning_rate": 1.4639023302389366e-06, + "loss": 0.7389, "step": 29319 }, { - "epoch": 0.8320090805902384, + "epoch": 0.8308538071353679, "grad_norm": 0.0, - "learning_rate": 1.444034855359453e-06, - "loss": 0.7638, + "learning_rate": 1.4634242778209373e-06, + "loss": 0.7893, "step": 29320 }, { - "epoch": 0.8320374574347332, + "epoch": 0.8308821445776304, "grad_norm": 0.0, - "learning_rate": 1.4435591368757872e-06, - "loss": 0.8374, + "learning_rate": 1.4629462973096887e-06, + "loss": 0.7977, "step": 29321 }, { - "epoch": 0.8320658342792282, + "epoch": 0.8309104820198929, "grad_norm": 0.0, - "learning_rate": 1.4430834906687597e-06, - "loss": 0.7609, + "learning_rate": 1.4624683887092117e-06, + "loss": 0.6519, "step": 29322 }, { - "epoch": 0.8320942111237231, + "epoch": 0.8309388194621553, "grad_norm": 0.0, - "learning_rate": 1.4426079167423923e-06, - "loss": 0.8342, + "learning_rate": 1.4619905520235333e-06, + "loss": 0.7322, "step": 29323 }, { - "epoch": 0.8321225879682179, + "epoch": 0.8309671569044178, "grad_norm": 0.0, - "learning_rate": 1.4421324151006988e-06, - "loss": 0.834, + "learning_rate": 1.4615127872566815e-06, + "loss": 0.7927, "step": 29324 }, { - "epoch": 0.8321509648127128, + "epoch": 0.8309954943466803, "grad_norm": 0.0, - "learning_rate": 1.441656985747697e-06, - "loss": 0.8311, + "learning_rate": 1.461035094412675e-06, + "loss": 0.7787, "step": 29325 }, { - "epoch": 0.8321793416572077, + "epoch": 0.8310238317889427, "grad_norm": 0.0, - "learning_rate": 1.4411816286874036e-06, - "loss": 0.8676, + "learning_rate": 1.4605574734955418e-06, + "loss": 0.8071, "step": 29326 }, { - "epoch": 0.8322077185017026, + "epoch": 0.8310521692312052, "grad_norm": 0.0, - "learning_rate": 1.4407063439238333e-06, - "loss": 0.7919, + "learning_rate": 1.4600799245093055e-06, + "loss": 0.8381, "step": 29327 }, { - "epoch": 0.8322360953461975, + "epoch": 0.8310805066734677, "grad_norm": 0.0, - "learning_rate": 1.4402311314610019e-06, - "loss": 0.7479, + "learning_rate": 1.459602447457985e-06, + "loss": 0.8608, "step": 29328 }, { - "epoch": 0.8322644721906924, + "epoch": 0.8311088441157302, "grad_norm": 0.0, - "learning_rate": 1.4397559913029247e-06, - "loss": 0.7584, + "learning_rate": 1.4591250423456048e-06, + "loss": 0.8533, "step": 29329 }, { - "epoch": 0.8322928490351873, + "epoch": 0.8311371815579925, "grad_norm": 0.0, - "learning_rate": 1.439280923453612e-06, - "loss": 0.8411, + "learning_rate": 1.4586477091761863e-06, + "loss": 0.7496, "step": 29330 }, { - "epoch": 0.8323212258796822, + "epoch": 0.831165519000255, "grad_norm": 0.0, - "learning_rate": 1.4388059279170774e-06, - "loss": 0.7973, + "learning_rate": 1.4581704479537495e-06, + "loss": 0.7026, "step": 29331 }, { - "epoch": 0.832349602724177, + "epoch": 0.8311938564425175, "grad_norm": 0.0, - "learning_rate": 1.4383310046973365e-06, - "loss": 0.7964, + "learning_rate": 1.4576932586823178e-06, + "loss": 0.767, "step": 29332 }, { - "epoch": 0.832377979568672, + "epoch": 0.8312221938847799, "grad_norm": 0.0, - "learning_rate": 1.437856153798396e-06, - "loss": 0.8374, + "learning_rate": 1.4572161413659047e-06, + "loss": 0.7675, "step": 29333 }, { - "epoch": 0.8324063564131668, + "epoch": 0.8312505313270424, "grad_norm": 0.0, - "learning_rate": 1.4373813752242694e-06, - "loss": 0.9408, + "learning_rate": 1.4567390960085325e-06, + "loss": 0.7971, "step": 29334 }, { - "epoch": 0.8324347332576617, + "epoch": 0.8312788687693049, "grad_norm": 0.0, - "learning_rate": 1.4369066689789702e-06, - "loss": 0.8055, + "learning_rate": 1.4562621226142225e-06, + "loss": 0.8456, "step": 29335 }, { - "epoch": 0.8324631101021567, + "epoch": 0.8313072062115674, "grad_norm": 0.0, - "learning_rate": 1.4364320350665018e-06, - "loss": 0.7576, + "learning_rate": 1.455785221186986e-06, + "loss": 0.8208, "step": 29336 }, { - "epoch": 0.8324914869466515, + "epoch": 0.8313355436538298, "grad_norm": 0.0, - "learning_rate": 1.4359574734908777e-06, - "loss": 0.8828, + "learning_rate": 1.4553083917308464e-06, + "loss": 0.713, "step": 29337 }, { - "epoch": 0.8325198637911464, + "epoch": 0.8313638810960923, "grad_norm": 0.0, - "learning_rate": 1.435482984256108e-06, - "loss": 0.9105, + "learning_rate": 1.4548316342498148e-06, + "loss": 0.8228, "step": 29338 }, { - "epoch": 0.8325482406356414, + "epoch": 0.8313922185383548, "grad_norm": 0.0, - "learning_rate": 1.4350085673661974e-06, - "loss": 0.9063, + "learning_rate": 1.4543549487479092e-06, + "loss": 0.9128, "step": 29339 }, { - "epoch": 0.8325766174801362, + "epoch": 0.8314205559806171, "grad_norm": 0.0, - "learning_rate": 1.4345342228251524e-06, - "loss": 0.7696, + "learning_rate": 1.4538783352291474e-06, + "loss": 0.7908, "step": 29340 }, { - "epoch": 0.8326049943246311, + "epoch": 0.8314488934228796, "grad_norm": 0.0, - "learning_rate": 1.4340599506369835e-06, - "loss": 0.8122, + "learning_rate": 1.4534017936975396e-06, + "loss": 0.8934, "step": 29341 }, { - "epoch": 0.832633371169126, + "epoch": 0.8314772308651421, "grad_norm": 0.0, - "learning_rate": 1.4335857508056949e-06, - "loss": 0.7718, + "learning_rate": 1.4529253241571029e-06, + "loss": 0.8106, "step": 29342 }, { - "epoch": 0.8326617480136209, + "epoch": 0.8315055683074045, "grad_norm": 0.0, - "learning_rate": 1.4331116233352938e-06, - "loss": 0.8122, + "learning_rate": 1.452448926611849e-06, + "loss": 0.8568, "step": 29343 }, { - "epoch": 0.8326901248581158, + "epoch": 0.831533905749667, "grad_norm": 0.0, - "learning_rate": 1.4326375682297833e-06, - "loss": 0.7485, + "learning_rate": 1.4519726010657931e-06, + "loss": 0.7575, "step": 29344 }, { - "epoch": 0.8327185017026106, + "epoch": 0.8315622431919295, "grad_norm": 0.0, - "learning_rate": 1.4321635854931671e-06, - "loss": 0.8456, + "learning_rate": 1.4514963475229482e-06, + "loss": 0.8874, "step": 29345 }, { - "epoch": 0.8327468785471056, + "epoch": 0.831590580634192, "grad_norm": 0.0, - "learning_rate": 1.4316896751294528e-06, - "loss": 0.7649, + "learning_rate": 1.4510201659873212e-06, + "loss": 0.8784, "step": 29346 }, { - "epoch": 0.8327752553916005, + "epoch": 0.8316189180764544, "grad_norm": 0.0, - "learning_rate": 1.4312158371426388e-06, - "loss": 0.9178, + "learning_rate": 1.4505440564629258e-06, + "loss": 0.76, "step": 29347 }, { - "epoch": 0.8328036322360953, + "epoch": 0.8316472555187169, "grad_norm": 0.0, - "learning_rate": 1.4307420715367304e-06, - "loss": 0.7775, + "learning_rate": 1.4500680189537753e-06, + "loss": 0.8553, "step": 29348 }, { - "epoch": 0.8328320090805902, + "epoch": 0.8316755929609794, "grad_norm": 0.0, - "learning_rate": 1.43026837831573e-06, - "loss": 0.826, + "learning_rate": 1.4495920534638741e-06, + "loss": 0.7473, "step": 29349 }, { - "epoch": 0.8328603859250852, + "epoch": 0.8317039304032418, "grad_norm": 0.0, - "learning_rate": 1.4297947574836367e-06, - "loss": 0.717, + "learning_rate": 1.449116159997237e-06, + "loss": 0.8597, "step": 29350 }, { - "epoch": 0.83288876276958, + "epoch": 0.8317322678455042, "grad_norm": 0.0, - "learning_rate": 1.4293212090444518e-06, - "loss": 0.8933, + "learning_rate": 1.4486403385578673e-06, + "loss": 0.8087, "step": 29351 }, { - "epoch": 0.8329171396140749, + "epoch": 0.8317606052877667, "grad_norm": 0.0, - "learning_rate": 1.4288477330021777e-06, - "loss": 0.8338, + "learning_rate": 1.4481645891497753e-06, + "loss": 0.8061, "step": 29352 }, { - "epoch": 0.8329455164585698, + "epoch": 0.8317889427300292, "grad_norm": 0.0, - "learning_rate": 1.428374329360811e-06, - "loss": 0.8693, + "learning_rate": 1.447688911776971e-06, + "loss": 0.8133, "step": 29353 }, { - "epoch": 0.8329738933030647, + "epoch": 0.8318172801722916, "grad_norm": 0.0, - "learning_rate": 1.4279009981243507e-06, - "loss": 0.7809, + "learning_rate": 1.4472133064434568e-06, + "loss": 0.8478, "step": 29354 }, { - "epoch": 0.8330022701475596, + "epoch": 0.8318456176145541, "grad_norm": 0.0, - "learning_rate": 1.4274277392967982e-06, - "loss": 0.8001, + "learning_rate": 1.4467377731532405e-06, + "loss": 0.7962, "step": 29355 }, { - "epoch": 0.8330306469920545, + "epoch": 0.8318739550568166, "grad_norm": 0.0, - "learning_rate": 1.4269545528821483e-06, - "loss": 0.7441, + "learning_rate": 1.4462623119103281e-06, + "loss": 0.8001, "step": 29356 }, { - "epoch": 0.8330590238365494, + "epoch": 0.831902292499079, "grad_norm": 0.0, - "learning_rate": 1.4264814388843973e-06, - "loss": 0.9287, + "learning_rate": 1.4457869227187248e-06, + "loss": 0.7478, "step": 29357 }, { - "epoch": 0.8330874006810443, + "epoch": 0.8319306299413415, "grad_norm": 0.0, - "learning_rate": 1.4260083973075433e-06, - "loss": 0.7875, + "learning_rate": 1.4453116055824368e-06, + "loss": 0.7775, "step": 29358 }, { - "epoch": 0.8331157775255391, + "epoch": 0.831958967383604, "grad_norm": 0.0, - "learning_rate": 1.425535428155581e-06, - "loss": 0.8026, + "learning_rate": 1.4448363605054638e-06, + "loss": 0.8668, "step": 29359 }, { - "epoch": 0.8331441543700341, + "epoch": 0.8319873048258665, "grad_norm": 0.0, - "learning_rate": 1.4250625314325095e-06, - "loss": 0.8318, + "learning_rate": 1.4443611874918106e-06, + "loss": 0.8075, "step": 29360 }, { - "epoch": 0.8331725312145289, + "epoch": 0.8320156422681289, "grad_norm": 0.0, - "learning_rate": 1.4245897071423175e-06, - "loss": 0.8907, + "learning_rate": 1.4438860865454828e-06, + "loss": 0.8354, "step": 29361 }, { - "epoch": 0.8332009080590238, + "epoch": 0.8320439797103913, "grad_norm": 0.0, - "learning_rate": 1.4241169552890022e-06, - "loss": 0.7908, + "learning_rate": 1.4434110576704774e-06, + "loss": 0.7199, "step": 29362 }, { - "epoch": 0.8332292849035188, + "epoch": 0.8320723171526538, "grad_norm": 0.0, - "learning_rate": 1.423644275876559e-06, - "loss": 0.7645, + "learning_rate": 1.4429361008707986e-06, + "loss": 0.8215, "step": 29363 }, { - "epoch": 0.8332576617480136, + "epoch": 0.8321006545949162, "grad_norm": 0.0, - "learning_rate": 1.4231716689089757e-06, - "loss": 0.8275, + "learning_rate": 1.4424612161504482e-06, + "loss": 0.8465, "step": 29364 }, { - "epoch": 0.8332860385925085, + "epoch": 0.8321289920371787, "grad_norm": 0.0, - "learning_rate": 1.4226991343902463e-06, - "loss": 0.7332, + "learning_rate": 1.4419864035134236e-06, + "loss": 0.7578, "step": 29365 }, { - "epoch": 0.8333144154370034, + "epoch": 0.8321573294794412, "grad_norm": 0.0, - "learning_rate": 1.422226672324366e-06, - "loss": 0.7899, + "learning_rate": 1.441511662963726e-06, + "loss": 0.8926, "step": 29366 }, { - "epoch": 0.8333427922814983, + "epoch": 0.8321856669217036, "grad_norm": 0.0, - "learning_rate": 1.4217542827153196e-06, - "loss": 0.8014, + "learning_rate": 1.4410369945053526e-06, + "loss": 0.9387, "step": 29367 }, { - "epoch": 0.8333711691259932, + "epoch": 0.8322140043639661, "grad_norm": 0.0, - "learning_rate": 1.4212819655671006e-06, - "loss": 0.8236, + "learning_rate": 1.4405623981423022e-06, + "loss": 0.9099, "step": 29368 }, { - "epoch": 0.833399545970488, + "epoch": 0.8322423418062286, "grad_norm": 0.0, - "learning_rate": 1.4208097208837013e-06, - "loss": 0.6689, + "learning_rate": 1.440087873878574e-06, + "loss": 0.872, "step": 29369 }, { - "epoch": 0.833427922814983, + "epoch": 0.8322706792484911, "grad_norm": 0.0, - "learning_rate": 1.420337548669105e-06, - "loss": 0.7874, + "learning_rate": 1.4396134217181645e-06, + "loss": 0.8924, "step": 29370 }, { - "epoch": 0.8334562996594779, + "epoch": 0.8322990166907535, "grad_norm": 0.0, - "learning_rate": 1.4198654489273035e-06, - "loss": 0.7998, + "learning_rate": 1.4391390416650708e-06, + "loss": 0.8132, "step": 29371 }, { - "epoch": 0.8334846765039727, + "epoch": 0.832327354133016, "grad_norm": 0.0, - "learning_rate": 1.419393421662284e-06, - "loss": 0.7936, + "learning_rate": 1.4386647337232873e-06, + "loss": 0.8683, "step": 29372 }, { - "epoch": 0.8335130533484677, + "epoch": 0.8323556915752784, "grad_norm": 0.0, - "learning_rate": 1.4189214668780337e-06, - "loss": 0.712, + "learning_rate": 1.4381904978968086e-06, + "loss": 0.7988, "step": 29373 }, { - "epoch": 0.8335414301929626, + "epoch": 0.8323840290175408, "grad_norm": 0.0, - "learning_rate": 1.4184495845785418e-06, - "loss": 0.8137, + "learning_rate": 1.437716334189634e-06, + "loss": 0.8527, "step": 29374 }, { - "epoch": 0.8335698070374574, + "epoch": 0.8324123664598033, "grad_norm": 0.0, - "learning_rate": 1.4179777747677903e-06, - "loss": 0.7686, + "learning_rate": 1.4372422426057509e-06, + "loss": 0.8506, "step": 29375 }, { - "epoch": 0.8335981838819523, + "epoch": 0.8324407039020658, "grad_norm": 0.0, - "learning_rate": 1.4175060374497662e-06, - "loss": 0.8372, + "learning_rate": 1.4367682231491576e-06, + "loss": 0.7222, "step": 29376 }, { - "epoch": 0.8336265607264473, + "epoch": 0.8324690413443283, "grad_norm": 0.0, - "learning_rate": 1.4170343726284564e-06, - "loss": 0.8499, + "learning_rate": 1.4362942758238463e-06, + "loss": 0.7947, "step": 29377 }, { - "epoch": 0.8336549375709421, + "epoch": 0.8324973787865907, "grad_norm": 0.0, - "learning_rate": 1.4165627803078418e-06, - "loss": 0.7038, + "learning_rate": 1.4358204006338061e-06, + "loss": 0.8271, "step": 29378 }, { - "epoch": 0.833683314415437, + "epoch": 0.8325257162288532, "grad_norm": 0.0, - "learning_rate": 1.4160912604919063e-06, - "loss": 0.799, + "learning_rate": 1.435346597583034e-06, + "loss": 0.8944, "step": 29379 }, { - "epoch": 0.8337116912599319, + "epoch": 0.8325540536711157, "grad_norm": 0.0, - "learning_rate": 1.4156198131846367e-06, - "loss": 0.7906, + "learning_rate": 1.4348728666755152e-06, + "loss": 0.8198, "step": 29380 }, { - "epoch": 0.8337400681044268, + "epoch": 0.8325823911133781, "grad_norm": 0.0, - "learning_rate": 1.4151484383900093e-06, - "loss": 0.7353, + "learning_rate": 1.4343992079152436e-06, + "loss": 0.7636, "step": 29381 }, { - "epoch": 0.8337684449489217, + "epoch": 0.8326107285556406, "grad_norm": 0.0, - "learning_rate": 1.4146771361120094e-06, - "loss": 0.9139, + "learning_rate": 1.4339256213062069e-06, + "loss": 0.8129, "step": 29382 }, { - "epoch": 0.8337968217934165, + "epoch": 0.832639065997903, "grad_norm": 0.0, - "learning_rate": 1.4142059063546199e-06, - "loss": 0.7812, + "learning_rate": 1.4334521068523966e-06, + "loss": 0.8786, "step": 29383 }, { - "epoch": 0.8338251986379115, + "epoch": 0.8326674034401655, "grad_norm": 0.0, - "learning_rate": 1.4137347491218167e-06, - "loss": 0.7484, + "learning_rate": 1.4329786645578004e-06, + "loss": 0.8219, "step": 29384 }, { - "epoch": 0.8338535754824064, + "epoch": 0.8326957408824279, "grad_norm": 0.0, - "learning_rate": 1.4132636644175824e-06, - "loss": 0.808, + "learning_rate": 1.4325052944264074e-06, + "loss": 0.822, "step": 29385 }, { - "epoch": 0.8338819523269012, + "epoch": 0.8327240783246904, "grad_norm": 0.0, - "learning_rate": 1.412792652245898e-06, - "loss": 0.7942, + "learning_rate": 1.432031996462202e-06, + "loss": 0.785, "step": 29386 }, { - "epoch": 0.8339103291713962, + "epoch": 0.8327524157669529, "grad_norm": 0.0, - "learning_rate": 1.412321712610737e-06, - "loss": 0.9127, + "learning_rate": 1.4315587706691759e-06, + "loss": 0.7493, "step": 29387 }, { - "epoch": 0.833938706015891, + "epoch": 0.8327807532092153, "grad_norm": 0.0, - "learning_rate": 1.4118508455160818e-06, - "loss": 0.8405, + "learning_rate": 1.4310856170513088e-06, + "loss": 0.8272, "step": 29388 }, { - "epoch": 0.8339670828603859, + "epoch": 0.8328090906514778, "grad_norm": 0.0, - "learning_rate": 1.411380050965908e-06, - "loss": 0.8564, + "learning_rate": 1.4306125356125899e-06, + "loss": 0.9279, "step": 29389 }, { - "epoch": 0.8339954597048809, + "epoch": 0.8328374280937403, "grad_norm": 0.0, - "learning_rate": 1.410909328964193e-06, - "loss": 0.6507, + "learning_rate": 1.430139526357006e-06, + "loss": 0.8542, "step": 29390 }, { - "epoch": 0.8340238365493757, + "epoch": 0.8328657655360027, "grad_norm": 0.0, - "learning_rate": 1.410438679514915e-06, - "loss": 0.7671, + "learning_rate": 1.4296665892885364e-06, + "loss": 0.7574, "step": 29391 }, { - "epoch": 0.8340522133938706, + "epoch": 0.8328941029782652, "grad_norm": 0.0, - "learning_rate": 1.4099681026220457e-06, - "loss": 0.7831, + "learning_rate": 1.4291937244111688e-06, + "loss": 0.8108, "step": 29392 }, { - "epoch": 0.8340805902383654, + "epoch": 0.8329224404205277, "grad_norm": 0.0, - "learning_rate": 1.409497598289562e-06, - "loss": 0.7684, + "learning_rate": 1.4287209317288864e-06, + "loss": 0.8762, "step": 29393 }, { - "epoch": 0.8341089670828604, + "epoch": 0.8329507778627901, "grad_norm": 0.0, - "learning_rate": 1.4090271665214405e-06, - "loss": 0.7215, + "learning_rate": 1.4282482112456686e-06, + "loss": 0.7006, "step": 29394 }, { - "epoch": 0.8341373439273553, + "epoch": 0.8329791153050525, "grad_norm": 0.0, - "learning_rate": 1.4085568073216505e-06, - "loss": 0.8255, + "learning_rate": 1.4277755629654987e-06, + "loss": 0.8313, "step": 29395 }, { - "epoch": 0.8341657207718501, + "epoch": 0.833007452747315, "grad_norm": 0.0, - "learning_rate": 1.4080865206941674e-06, - "loss": 0.6745, + "learning_rate": 1.4273029868923593e-06, + "loss": 0.722, "step": 29396 }, { - "epoch": 0.8341940976163451, + "epoch": 0.8330357901895775, "grad_norm": 0.0, - "learning_rate": 1.407616306642966e-06, - "loss": 0.8313, + "learning_rate": 1.4268304830302293e-06, + "loss": 0.8065, "step": 29397 }, { - "epoch": 0.83422247446084, + "epoch": 0.8330641276318399, "grad_norm": 0.0, - "learning_rate": 1.407146165172013e-06, - "loss": 0.7999, + "learning_rate": 1.426358051383092e-06, + "loss": 0.7261, "step": 29398 }, { - "epoch": 0.8342508513053348, + "epoch": 0.8330924650741024, "grad_norm": 0.0, - "learning_rate": 1.4066760962852832e-06, - "loss": 0.8404, + "learning_rate": 1.4258856919549236e-06, + "loss": 0.758, "step": 29399 }, { - "epoch": 0.8342792281498297, + "epoch": 0.8331208025163649, "grad_norm": 0.0, - "learning_rate": 1.4062060999867489e-06, - "loss": 0.7866, + "learning_rate": 1.4254134047497047e-06, + "loss": 0.7052, "step": 29400 }, { - "epoch": 0.8343076049943247, + "epoch": 0.8331491399586274, "grad_norm": 0.0, - "learning_rate": 1.4057361762803756e-06, - "loss": 0.7825, + "learning_rate": 1.4249411897714117e-06, + "loss": 0.7652, "step": 29401 }, { - "epoch": 0.8343359818388195, + "epoch": 0.8331774774008898, "grad_norm": 0.0, - "learning_rate": 1.405266325170136e-06, - "loss": 0.6745, + "learning_rate": 1.424469047024023e-06, + "loss": 0.8266, "step": 29402 }, { - "epoch": 0.8343643586833144, + "epoch": 0.8332058148431523, "grad_norm": 0.0, - "learning_rate": 1.4047965466599967e-06, - "loss": 0.8428, + "learning_rate": 1.423996976511518e-06, + "loss": 0.8231, "step": 29403 }, { - "epoch": 0.8343927355278093, + "epoch": 0.8332341522854148, "grad_norm": 0.0, - "learning_rate": 1.404326840753929e-06, - "loss": 0.9674, + "learning_rate": 1.423524978237869e-06, + "loss": 0.8726, "step": 29404 }, { - "epoch": 0.8344211123723042, + "epoch": 0.8332624897276771, "grad_norm": 0.0, - "learning_rate": 1.4038572074558997e-06, - "loss": 0.8277, + "learning_rate": 1.423053052207053e-06, + "loss": 0.7826, "step": 29405 }, { - "epoch": 0.8344494892167991, + "epoch": 0.8332908271699396, "grad_norm": 0.0, - "learning_rate": 1.4033876467698738e-06, - "loss": 0.781, + "learning_rate": 1.4225811984230497e-06, + "loss": 0.7652, "step": 29406 }, { - "epoch": 0.834477866061294, + "epoch": 0.8333191646122021, "grad_norm": 0.0, - "learning_rate": 1.4029181586998175e-06, - "loss": 0.7418, + "learning_rate": 1.4221094168898276e-06, + "loss": 0.7757, "step": 29407 }, { - "epoch": 0.8345062429057889, + "epoch": 0.8333475020544646, "grad_norm": 0.0, - "learning_rate": 1.4024487432497013e-06, - "loss": 0.714, + "learning_rate": 1.421637707611363e-06, + "loss": 0.8117, "step": 29408 }, { - "epoch": 0.8345346197502838, + "epoch": 0.833375839496727, "grad_norm": 0.0, - "learning_rate": 1.4019794004234844e-06, - "loss": 0.7961, + "learning_rate": 1.4211660705916286e-06, + "loss": 0.8185, "step": 29409 }, { - "epoch": 0.8345629965947786, + "epoch": 0.8334041769389895, "grad_norm": 0.0, - "learning_rate": 1.4015101302251343e-06, - "loss": 0.8519, + "learning_rate": 1.4206945058345988e-06, + "loss": 0.7627, "step": 29410 }, { - "epoch": 0.8345913734392736, + "epoch": 0.833432514381252, "grad_norm": 0.0, - "learning_rate": 1.401040932658617e-06, - "loss": 0.6943, + "learning_rate": 1.4202230133442474e-06, + "loss": 0.7517, "step": 29411 }, { - "epoch": 0.8346197502837684, + "epoch": 0.8334608518235144, "grad_norm": 0.0, - "learning_rate": 1.4005718077278907e-06, - "loss": 0.9004, + "learning_rate": 1.419751593124542e-06, + "loss": 0.8314, "step": 29412 }, { - "epoch": 0.8346481271282633, + "epoch": 0.8334891892657769, "grad_norm": 0.0, - "learning_rate": 1.4001027554369217e-06, - "loss": 0.8764, + "learning_rate": 1.419280245179454e-06, + "loss": 0.8862, "step": 29413 }, { - "epoch": 0.8346765039727583, + "epoch": 0.8335175267080394, "grad_norm": 0.0, - "learning_rate": 1.3996337757896726e-06, - "loss": 0.7701, + "learning_rate": 1.418808969512957e-06, + "loss": 0.8283, "step": 29414 }, { - "epoch": 0.8347048808172531, + "epoch": 0.8335458641503017, "grad_norm": 0.0, - "learning_rate": 1.3991648687901017e-06, - "loss": 0.8339, + "learning_rate": 1.4183377661290175e-06, + "loss": 0.8415, "step": 29415 }, { - "epoch": 0.834733257661748, + "epoch": 0.8335742015925642, "grad_norm": 0.0, - "learning_rate": 1.3986960344421718e-06, - "loss": 0.7451, + "learning_rate": 1.4178666350316072e-06, + "loss": 0.7769, "step": 29416 }, { - "epoch": 0.8347616345062429, + "epoch": 0.8336025390348267, "grad_norm": 0.0, - "learning_rate": 1.398227272749846e-06, - "loss": 0.8047, + "learning_rate": 1.4173955762246905e-06, + "loss": 0.7752, "step": 29417 }, { - "epoch": 0.8347900113507378, + "epoch": 0.8336308764770892, "grad_norm": 0.0, - "learning_rate": 1.397758583717076e-06, - "loss": 0.8678, + "learning_rate": 1.4169245897122385e-06, + "loss": 0.7384, "step": 29418 }, { - "epoch": 0.8348183881952327, + "epoch": 0.8336592139193516, "grad_norm": 0.0, - "learning_rate": 1.397289967347828e-06, - "loss": 0.818, + "learning_rate": 1.4164536754982206e-06, + "loss": 0.8094, "step": 29419 }, { - "epoch": 0.8348467650397275, + "epoch": 0.8336875513616141, "grad_norm": 0.0, - "learning_rate": 1.396821423646062e-06, - "loss": 0.6978, + "learning_rate": 1.4159828335865978e-06, + "loss": 0.8318, "step": 29420 }, { - "epoch": 0.8348751418842225, + "epoch": 0.8337158888038766, "grad_norm": 0.0, - "learning_rate": 1.3963529526157283e-06, - "loss": 0.7525, + "learning_rate": 1.4155120639813392e-06, + "loss": 0.9395, "step": 29421 }, { - "epoch": 0.8349035187287174, + "epoch": 0.833744226246139, "grad_norm": 0.0, - "learning_rate": 1.395884554260789e-06, - "loss": 0.7666, + "learning_rate": 1.4150413666864104e-06, + "loss": 0.8167, "step": 29422 }, { - "epoch": 0.8349318955732122, + "epoch": 0.8337725636884015, "grad_norm": 0.0, - "learning_rate": 1.3954162285852013e-06, - "loss": 0.834, + "learning_rate": 1.4145707417057763e-06, + "loss": 0.8394, "step": 29423 }, { - "epoch": 0.8349602724177072, + "epoch": 0.833800901130664, "grad_norm": 0.0, - "learning_rate": 1.3949479755929174e-06, - "loss": 0.8059, + "learning_rate": 1.4141001890434035e-06, + "loss": 0.7932, "step": 29424 }, { - "epoch": 0.8349886492622021, + "epoch": 0.8338292385729265, "grad_norm": 0.0, - "learning_rate": 1.394479795287894e-06, - "loss": 0.7963, + "learning_rate": 1.4136297087032503e-06, + "loss": 0.7405, "step": 29425 }, { - "epoch": 0.8350170261066969, + "epoch": 0.8338575760151888, "grad_norm": 0.0, - "learning_rate": 1.3940116876740905e-06, - "loss": 0.8307, + "learning_rate": 1.413159300689283e-06, + "loss": 0.7615, "step": 29426 }, { - "epoch": 0.8350454029511918, + "epoch": 0.8338859134574513, "grad_norm": 0.0, - "learning_rate": 1.3935436527554535e-06, - "loss": 0.8183, + "learning_rate": 1.4126889650054654e-06, + "loss": 0.9176, "step": 29427 }, { - "epoch": 0.8350737797956868, + "epoch": 0.8339142508997138, "grad_norm": 0.0, - "learning_rate": 1.393075690535941e-06, - "loss": 0.7567, + "learning_rate": 1.4122187016557553e-06, + "loss": 0.8896, "step": 29428 }, { - "epoch": 0.8351021566401816, + "epoch": 0.8339425883419762, "grad_norm": 0.0, - "learning_rate": 1.3926078010195066e-06, - "loss": 0.8467, + "learning_rate": 1.4117485106441188e-06, + "loss": 0.7076, "step": 29429 }, { - "epoch": 0.8351305334846765, + "epoch": 0.8339709257842387, "grad_norm": 0.0, - "learning_rate": 1.392139984210099e-06, - "loss": 0.7962, + "learning_rate": 1.411278391974511e-06, + "loss": 0.7792, "step": 29430 }, { - "epoch": 0.8351589103291714, + "epoch": 0.8339992632265012, "grad_norm": 0.0, - "learning_rate": 1.3916722401116756e-06, - "loss": 0.6983, + "learning_rate": 1.4108083456508948e-06, + "loss": 0.9512, "step": 29431 }, { - "epoch": 0.8351872871736663, + "epoch": 0.8340276006687637, "grad_norm": 0.0, - "learning_rate": 1.3912045687281794e-06, - "loss": 0.7507, + "learning_rate": 1.4103383716772313e-06, + "loss": 0.7908, "step": 29432 }, { - "epoch": 0.8352156640181612, + "epoch": 0.8340559381110261, "grad_norm": 0.0, - "learning_rate": 1.3907369700635664e-06, - "loss": 0.7459, + "learning_rate": 1.409868470057475e-06, + "loss": 0.7025, "step": 29433 }, { - "epoch": 0.835244040862656, + "epoch": 0.8340842755532886, "grad_norm": 0.0, - "learning_rate": 1.3902694441217845e-06, - "loss": 0.78, + "learning_rate": 1.4093986407955873e-06, + "loss": 0.829, "step": 29434 }, { - "epoch": 0.835272417707151, + "epoch": 0.8341126129955511, "grad_norm": 0.0, - "learning_rate": 1.389801990906784e-06, - "loss": 0.9526, + "learning_rate": 1.408928883895524e-06, + "loss": 0.804, "step": 29435 }, { - "epoch": 0.8353007945516459, + "epoch": 0.8341409504378134, "grad_norm": 0.0, - "learning_rate": 1.3893346104225135e-06, - "loss": 0.8059, + "learning_rate": 1.408459199361244e-06, + "loss": 0.7374, "step": 29436 }, { - "epoch": 0.8353291713961407, + "epoch": 0.8341692878800759, "grad_norm": 0.0, - "learning_rate": 1.3888673026729227e-06, - "loss": 0.9319, + "learning_rate": 1.4079895871967043e-06, + "loss": 0.7504, "step": 29437 }, { - "epoch": 0.8353575482406357, + "epoch": 0.8341976253223384, "grad_norm": 0.0, - "learning_rate": 1.3884000676619546e-06, - "loss": 0.74, + "learning_rate": 1.407520047405856e-06, + "loss": 0.8945, "step": 29438 }, { - "epoch": 0.8353859250851305, + "epoch": 0.8342259627646008, "grad_norm": 0.0, - "learning_rate": 1.3879329053935575e-06, - "loss": 0.8033, + "learning_rate": 1.407050579992658e-06, + "loss": 0.7568, "step": 29439 }, { - "epoch": 0.8354143019296254, + "epoch": 0.8342543002068633, "grad_norm": 0.0, - "learning_rate": 1.3874658158716814e-06, - "loss": 0.8737, + "learning_rate": 1.4065811849610655e-06, + "loss": 1.0082, "step": 29440 }, { - "epoch": 0.8354426787741204, + "epoch": 0.8342826376491258, "grad_norm": 0.0, - "learning_rate": 1.3869987991002665e-06, - "loss": 0.819, + "learning_rate": 1.4061118623150283e-06, + "loss": 0.9051, "step": 29441 }, { - "epoch": 0.8354710556186152, + "epoch": 0.8343109750913883, "grad_norm": 0.0, - "learning_rate": 1.3865318550832595e-06, - "loss": 0.8174, + "learning_rate": 1.4056426120585032e-06, + "loss": 0.8691, "step": 29442 }, { - "epoch": 0.8354994324631101, + "epoch": 0.8343393125336507, "grad_norm": 0.0, - "learning_rate": 1.3860649838246076e-06, - "loss": 0.8059, + "learning_rate": 1.4051734341954436e-06, + "loss": 0.8463, "step": 29443 }, { - "epoch": 0.835527809307605, + "epoch": 0.8343676499759132, "grad_norm": 0.0, - "learning_rate": 1.3855981853282496e-06, - "loss": 0.7828, + "learning_rate": 1.4047043287297967e-06, + "loss": 0.7945, "step": 29444 }, { - "epoch": 0.8355561861520999, + "epoch": 0.8343959874181757, "grad_norm": 0.0, - "learning_rate": 1.3851314595981314e-06, - "loss": 0.8954, + "learning_rate": 1.4042352956655202e-06, + "loss": 0.7488, "step": 29445 }, { - "epoch": 0.8355845629965948, + "epoch": 0.834424324860438, "grad_norm": 0.0, - "learning_rate": 1.3846648066381962e-06, - "loss": 0.7912, + "learning_rate": 1.40376633500656e-06, + "loss": 0.8653, "step": 29446 }, { - "epoch": 0.8356129398410896, + "epoch": 0.8344526623027005, "grad_norm": 0.0, - "learning_rate": 1.3841982264523835e-06, - "loss": 0.8051, + "learning_rate": 1.4032974467568672e-06, + "loss": 0.7586, "step": 29447 }, { - "epoch": 0.8356413166855846, + "epoch": 0.834480999744963, "grad_norm": 0.0, - "learning_rate": 1.3837317190446354e-06, - "loss": 0.817, + "learning_rate": 1.402828630920392e-06, + "loss": 0.8357, "step": 29448 }, { - "epoch": 0.8356696935300795, + "epoch": 0.8345093371872255, "grad_norm": 0.0, - "learning_rate": 1.3832652844188932e-06, - "loss": 0.7742, + "learning_rate": 1.4023598875010846e-06, + "loss": 0.7682, "step": 29449 }, { - "epoch": 0.8356980703745743, + "epoch": 0.8345376746294879, "grad_norm": 0.0, - "learning_rate": 1.3827989225790961e-06, - "loss": 0.7657, + "learning_rate": 1.401891216502892e-06, + "loss": 0.8228, "step": 29450 }, { - "epoch": 0.8357264472190692, + "epoch": 0.8345660120717504, "grad_norm": 0.0, - "learning_rate": 1.3823326335291853e-06, - "loss": 0.8032, + "learning_rate": 1.4014226179297652e-06, + "loss": 0.7967, "step": 29451 }, { - "epoch": 0.8357548240635642, + "epoch": 0.8345943495140129, "grad_norm": 0.0, - "learning_rate": 1.3818664172730967e-06, - "loss": 0.8842, + "learning_rate": 1.4009540917856457e-06, + "loss": 0.8013, "step": 29452 }, { - "epoch": 0.835783200908059, + "epoch": 0.8346226869562753, "grad_norm": 0.0, - "learning_rate": 1.3814002738147704e-06, - "loss": 0.8408, + "learning_rate": 1.4004856380744857e-06, + "loss": 0.7335, "step": 29453 }, { - "epoch": 0.8358115777525539, + "epoch": 0.8346510243985378, "grad_norm": 0.0, - "learning_rate": 1.3809342031581451e-06, - "loss": 0.8175, + "learning_rate": 1.4000172568002268e-06, + "loss": 0.7343, "step": 29454 }, { - "epoch": 0.8358399545970489, + "epoch": 0.8346793618408003, "grad_norm": 0.0, - "learning_rate": 1.3804682053071538e-06, - "loss": 0.8709, + "learning_rate": 1.3995489479668156e-06, + "loss": 0.8075, "step": 29455 }, { - "epoch": 0.8358683314415437, + "epoch": 0.8347076992830628, "grad_norm": 0.0, - "learning_rate": 1.3800022802657343e-06, - "loss": 0.7276, + "learning_rate": 1.399080711578199e-06, + "loss": 0.9366, "step": 29456 }, { - "epoch": 0.8358967082860386, + "epoch": 0.8347360367253251, "grad_norm": 0.0, - "learning_rate": 1.3795364280378265e-06, - "loss": 0.8688, + "learning_rate": 1.3986125476383172e-06, + "loss": 0.7808, "step": 29457 }, { - "epoch": 0.8359250851305334, + "epoch": 0.8347643741675876, "grad_norm": 0.0, - "learning_rate": 1.3790706486273587e-06, - "loss": 0.6719, + "learning_rate": 1.3981444561511193e-06, + "loss": 0.8888, "step": 29458 }, { - "epoch": 0.8359534619750284, + "epoch": 0.8347927116098501, "grad_norm": 0.0, - "learning_rate": 1.3786049420382697e-06, - "loss": 0.7971, + "learning_rate": 1.397676437120542e-06, + "loss": 0.7519, "step": 29459 }, { - "epoch": 0.8359818388195233, + "epoch": 0.8348210490521125, "grad_norm": 0.0, - "learning_rate": 1.3781393082744942e-06, - "loss": 0.8923, + "learning_rate": 1.3972084905505302e-06, + "loss": 0.8495, "step": 29460 }, { - "epoch": 0.8360102156640181, + "epoch": 0.834849386494375, "grad_norm": 0.0, - "learning_rate": 1.3776737473399615e-06, - "loss": 0.9541, + "learning_rate": 1.396740616445027e-06, + "loss": 0.826, "step": 29461 }, { - "epoch": 0.8360385925085131, + "epoch": 0.8348777239366375, "grad_norm": 0.0, - "learning_rate": 1.377208259238606e-06, - "loss": 0.7718, + "learning_rate": 1.3962728148079707e-06, + "loss": 0.7915, "step": 29462 }, { - "epoch": 0.836066969353008, + "epoch": 0.8349060613788999, "grad_norm": 0.0, - "learning_rate": 1.3767428439743612e-06, - "loss": 0.7, + "learning_rate": 1.3958050856433048e-06, + "loss": 0.779, "step": 29463 }, { - "epoch": 0.8360953461975028, + "epoch": 0.8349343988211624, "grad_norm": 0.0, - "learning_rate": 1.3762775015511543e-06, - "loss": 0.803, + "learning_rate": 1.3953374289549693e-06, + "loss": 0.8709, "step": 29464 }, { - "epoch": 0.8361237230419978, + "epoch": 0.8349627362634249, "grad_norm": 0.0, - "learning_rate": 1.3758122319729228e-06, - "loss": 0.7658, + "learning_rate": 1.3948698447469e-06, + "loss": 0.7873, "step": 29465 }, { - "epoch": 0.8361520998864926, + "epoch": 0.8349910737056874, "grad_norm": 0.0, - "learning_rate": 1.3753470352435904e-06, - "loss": 0.9274, + "learning_rate": 1.3944023330230393e-06, + "loss": 0.7483, "step": 29466 }, { - "epoch": 0.8361804767309875, + "epoch": 0.8350194111479498, "grad_norm": 0.0, - "learning_rate": 1.3748819113670908e-06, - "loss": 0.8495, + "learning_rate": 1.3939348937873209e-06, + "loss": 0.835, "step": 29467 }, { - "epoch": 0.8362088535754824, + "epoch": 0.8350477485902122, "grad_norm": 0.0, - "learning_rate": 1.3744168603473518e-06, - "loss": 0.7028, + "learning_rate": 1.3934675270436848e-06, + "loss": 0.6595, "step": 29468 }, { - "epoch": 0.8362372304199773, + "epoch": 0.8350760860324747, "grad_norm": 0.0, - "learning_rate": 1.3739518821883001e-06, - "loss": 0.8384, + "learning_rate": 1.3930002327960702e-06, + "loss": 0.8798, "step": 29469 }, { - "epoch": 0.8362656072644722, + "epoch": 0.8351044234747371, "grad_norm": 0.0, - "learning_rate": 1.3734869768938652e-06, - "loss": 0.8146, + "learning_rate": 1.3925330110484092e-06, + "loss": 0.7131, "step": 29470 }, { - "epoch": 0.836293984108967, + "epoch": 0.8351327609169996, "grad_norm": 0.0, - "learning_rate": 1.3730221444679748e-06, - "loss": 0.8932, + "learning_rate": 1.3920658618046378e-06, + "loss": 0.8674, "step": 29471 }, { - "epoch": 0.836322360953462, + "epoch": 0.8351610983592621, "grad_norm": 0.0, - "learning_rate": 1.3725573849145525e-06, - "loss": 0.7911, + "learning_rate": 1.391598785068695e-06, + "loss": 0.8708, "step": 29472 }, { - "epoch": 0.8363507377979569, + "epoch": 0.8351894358015246, "grad_norm": 0.0, - "learning_rate": 1.372092698237525e-06, - "loss": 0.8901, + "learning_rate": 1.391131780844509e-06, + "loss": 0.7654, "step": 29473 }, { - "epoch": 0.8363791146424517, + "epoch": 0.835217773243787, "grad_norm": 0.0, - "learning_rate": 1.3716280844408215e-06, - "loss": 0.7168, + "learning_rate": 1.3906648491360186e-06, + "loss": 0.7505, "step": 29474 }, { - "epoch": 0.8364074914869466, + "epoch": 0.8352461106860495, "grad_norm": 0.0, - "learning_rate": 1.371163543528361e-06, - "loss": 0.8831, + "learning_rate": 1.390197989947154e-06, + "loss": 0.8443, "step": 29475 }, { - "epoch": 0.8364358683314416, + "epoch": 0.835274448128312, "grad_norm": 0.0, - "learning_rate": 1.3706990755040695e-06, - "loss": 0.7919, + "learning_rate": 1.389731203281849e-06, + "loss": 0.7123, "step": 29476 }, { - "epoch": 0.8364642451759364, + "epoch": 0.8353027855705744, "grad_norm": 0.0, - "learning_rate": 1.3702346803718736e-06, - "loss": 0.7616, + "learning_rate": 1.3892644891440378e-06, + "loss": 0.7111, "step": 29477 }, { - "epoch": 0.8364926220204313, + "epoch": 0.8353311230128369, "grad_norm": 0.0, - "learning_rate": 1.3697703581356903e-06, - "loss": 0.8207, + "learning_rate": 1.3887978475376472e-06, + "loss": 0.7485, "step": 29478 }, { - "epoch": 0.8365209988649263, + "epoch": 0.8353594604550993, "grad_norm": 0.0, - "learning_rate": 1.3693061087994452e-06, - "loss": 0.8724, + "learning_rate": 1.3883312784666091e-06, + "loss": 0.7427, "step": 29479 }, { - "epoch": 0.8365493757094211, + "epoch": 0.8353877978973618, "grad_norm": 0.0, - "learning_rate": 1.3688419323670598e-06, - "loss": 0.8161, + "learning_rate": 1.387864781934858e-06, + "loss": 0.8595, "step": 29480 }, { - "epoch": 0.836577752553916, + "epoch": 0.8354161353396242, "grad_norm": 0.0, - "learning_rate": 1.3683778288424542e-06, - "loss": 0.8483, + "learning_rate": 1.3873983579463168e-06, + "loss": 0.7805, "step": 29481 }, { - "epoch": 0.836606129398411, + "epoch": 0.8354444727818867, "grad_norm": 0.0, - "learning_rate": 1.367913798229551e-06, - "loss": 0.7208, + "learning_rate": 1.3869320065049209e-06, + "loss": 0.8277, "step": 29482 }, { - "epoch": 0.8366345062429058, + "epoch": 0.8354728102241492, "grad_norm": 0.0, - "learning_rate": 1.3674498405322668e-06, - "loss": 0.8343, + "learning_rate": 1.3864657276145921e-06, + "loss": 0.734, "step": 29483 }, { - "epoch": 0.8366628830874007, + "epoch": 0.8355011476664116, "grad_norm": 0.0, - "learning_rate": 1.3669859557545206e-06, - "loss": 0.824, + "learning_rate": 1.385999521279261e-06, + "loss": 0.843, "step": 29484 }, { - "epoch": 0.8366912599318955, + "epoch": 0.8355294851086741, "grad_norm": 0.0, - "learning_rate": 1.3665221439002352e-06, - "loss": 0.8684, + "learning_rate": 1.3855333875028566e-06, + "loss": 0.9165, "step": 29485 }, { - "epoch": 0.8367196367763905, + "epoch": 0.8355578225509366, "grad_norm": 0.0, - "learning_rate": 1.3660584049733228e-06, - "loss": 0.811, + "learning_rate": 1.385067326289301e-06, + "loss": 0.8077, "step": 29486 }, { - "epoch": 0.8367480136208854, + "epoch": 0.835586159993199, "grad_norm": 0.0, - "learning_rate": 1.3655947389777025e-06, - "loss": 0.8487, + "learning_rate": 1.3846013376425227e-06, + "loss": 0.8349, "step": 29487 }, { - "epoch": 0.8367763904653802, + "epoch": 0.8356144974354615, "grad_norm": 0.0, - "learning_rate": 1.3651311459172944e-06, - "loss": 0.8077, + "learning_rate": 1.384135421566447e-06, + "loss": 0.7806, "step": 29488 }, { - "epoch": 0.8368047673098752, + "epoch": 0.835642834877724, "grad_norm": 0.0, - "learning_rate": 1.36466762579601e-06, - "loss": 0.831, + "learning_rate": 1.3836695780649979e-06, + "loss": 0.7439, "step": 29489 }, { - "epoch": 0.83683314415437, + "epoch": 0.8356711723199864, "grad_norm": 0.0, - "learning_rate": 1.3642041786177662e-06, - "loss": 0.8322, + "learning_rate": 1.3832038071421017e-06, + "loss": 0.8399, "step": 29490 }, { - "epoch": 0.8368615209988649, + "epoch": 0.8356995097622488, "grad_norm": 0.0, - "learning_rate": 1.3637408043864797e-06, - "loss": 0.7492, + "learning_rate": 1.3827381088016767e-06, + "loss": 0.7955, "step": 29491 }, { - "epoch": 0.8368898978433598, + "epoch": 0.8357278472045113, "grad_norm": 0.0, - "learning_rate": 1.3632775031060608e-06, - "loss": 0.8327, + "learning_rate": 1.3822724830476497e-06, + "loss": 0.8855, "step": 29492 }, { - "epoch": 0.8369182746878547, + "epoch": 0.8357561846467738, "grad_norm": 0.0, - "learning_rate": 1.3628142747804252e-06, - "loss": 0.8443, + "learning_rate": 1.3818069298839431e-06, + "loss": 0.8515, "step": 29493 }, { - "epoch": 0.8369466515323496, + "epoch": 0.8357845220890362, "grad_norm": 0.0, - "learning_rate": 1.3623511194134875e-06, - "loss": 0.7584, + "learning_rate": 1.381341449314475e-06, + "loss": 0.8294, "step": 29494 }, { - "epoch": 0.8369750283768445, + "epoch": 0.8358128595312987, "grad_norm": 0.0, - "learning_rate": 1.3618880370091536e-06, - "loss": 0.8446, + "learning_rate": 1.380876041343171e-06, + "loss": 0.7489, "step": 29495 }, { - "epoch": 0.8370034052213394, + "epoch": 0.8358411969735612, "grad_norm": 0.0, - "learning_rate": 1.3614250275713447e-06, - "loss": 0.7909, + "learning_rate": 1.3804107059739457e-06, + "loss": 0.8418, "step": 29496 }, { - "epoch": 0.8370317820658343, + "epoch": 0.8358695344158237, "grad_norm": 0.0, - "learning_rate": 1.3609620911039644e-06, - "loss": 0.8398, + "learning_rate": 1.3799454432107229e-06, + "loss": 0.7202, "step": 29497 }, { - "epoch": 0.8370601589103291, + "epoch": 0.8358978718580861, "grad_norm": 0.0, - "learning_rate": 1.3604992276109263e-06, - "loss": 0.7374, + "learning_rate": 1.3794802530574213e-06, + "loss": 0.9275, "step": 29498 }, { - "epoch": 0.8370885357548241, + "epoch": 0.8359262093003486, "grad_norm": 0.0, - "learning_rate": 1.3600364370961417e-06, - "loss": 0.8499, + "learning_rate": 1.3790151355179581e-06, + "loss": 0.8664, "step": 29499 }, { - "epoch": 0.837116912599319, + "epoch": 0.835954546742611, "grad_norm": 0.0, - "learning_rate": 1.359573719563515e-06, - "loss": 0.7321, + "learning_rate": 1.37855009059625e-06, + "loss": 0.8421, "step": 29500 }, { - "epoch": 0.8371452894438138, + "epoch": 0.8359828841848734, "grad_norm": 0.0, - "learning_rate": 1.359111075016958e-06, - "loss": 0.7939, + "learning_rate": 1.3780851182962174e-06, + "loss": 0.8497, "step": 29501 }, { - "epoch": 0.8371736662883087, + "epoch": 0.8360112216271359, "grad_norm": 0.0, - "learning_rate": 1.3586485034603792e-06, - "loss": 0.8138, + "learning_rate": 1.3776202186217747e-06, + "loss": 0.8304, "step": 29502 }, { - "epoch": 0.8372020431328037, + "epoch": 0.8360395590693984, "grad_norm": 0.0, - "learning_rate": 1.3581860048976836e-06, - "loss": 0.6968, + "learning_rate": 1.3771553915768421e-06, + "loss": 0.8026, "step": 29503 }, { - "epoch": 0.8372304199772985, + "epoch": 0.8360678965116609, "grad_norm": 0.0, - "learning_rate": 1.3577235793327792e-06, - "loss": 0.8364, + "learning_rate": 1.3766906371653289e-06, + "loss": 0.8541, "step": 29504 }, { - "epoch": 0.8372587968217934, + "epoch": 0.8360962339539233, "grad_norm": 0.0, - "learning_rate": 1.357261226769575e-06, - "loss": 0.7972, + "learning_rate": 1.3762259553911516e-06, + "loss": 0.8031, "step": 29505 }, { - "epoch": 0.8372871736662884, + "epoch": 0.8361245713961858, "grad_norm": 0.0, - "learning_rate": 1.3567989472119714e-06, - "loss": 0.8555, + "learning_rate": 1.3757613462582286e-06, + "loss": 0.8717, "step": 29506 }, { - "epoch": 0.8373155505107832, + "epoch": 0.8361529088384483, "grad_norm": 0.0, - "learning_rate": 1.356336740663875e-06, - "loss": 0.9111, + "learning_rate": 1.3752968097704677e-06, + "loss": 0.7498, "step": 29507 }, { - "epoch": 0.8373439273552781, + "epoch": 0.8361812462807107, "grad_norm": 0.0, - "learning_rate": 1.3558746071291928e-06, - "loss": 0.8558, + "learning_rate": 1.3748323459317848e-06, + "loss": 0.8939, "step": 29508 }, { - "epoch": 0.8373723041997729, + "epoch": 0.8362095837229732, "grad_norm": 0.0, - "learning_rate": 1.355412546611825e-06, - "loss": 0.8434, + "learning_rate": 1.3743679547460943e-06, + "loss": 0.8755, "step": 29509 }, { - "epoch": 0.8374006810442679, + "epoch": 0.8362379211652357, "grad_norm": 0.0, - "learning_rate": 1.354950559115673e-06, - "loss": 0.7953, + "learning_rate": 1.373903636217303e-06, + "loss": 0.6976, "step": 29510 }, { - "epoch": 0.8374290578887628, + "epoch": 0.836266258607498, "grad_norm": 0.0, - "learning_rate": 1.354488644644647e-06, - "loss": 0.9151, + "learning_rate": 1.373439390349327e-06, + "loss": 0.7961, "step": 29511 }, { - "epoch": 0.8374574347332576, + "epoch": 0.8362945960497605, "grad_norm": 0.0, - "learning_rate": 1.3540268032026427e-06, - "loss": 0.7523, + "learning_rate": 1.372975217146072e-06, + "loss": 0.8956, "step": 29512 }, { - "epoch": 0.8374858115777526, + "epoch": 0.836322933492023, "grad_norm": 0.0, - "learning_rate": 1.3535650347935636e-06, - "loss": 0.9322, + "learning_rate": 1.3725111166114514e-06, + "loss": 0.7522, "step": 29513 }, { - "epoch": 0.8375141884222475, + "epoch": 0.8363512709342855, "grad_norm": 0.0, - "learning_rate": 1.3531033394213078e-06, - "loss": 0.7072, + "learning_rate": 1.372047088749372e-06, + "loss": 0.7977, "step": 29514 }, { - "epoch": 0.8375425652667423, + "epoch": 0.8363796083765479, "grad_norm": 0.0, - "learning_rate": 1.3526417170897766e-06, - "loss": 0.7918, + "learning_rate": 1.371583133563744e-06, + "loss": 0.815, "step": 29515 }, { - "epoch": 0.8375709421112373, + "epoch": 0.8364079458188104, "grad_norm": 0.0, - "learning_rate": 1.3521801678028713e-06, - "loss": 0.7924, + "learning_rate": 1.371119251058478e-06, + "loss": 0.7686, "step": 29516 }, { - "epoch": 0.8375993189557321, + "epoch": 0.8364362832610729, "grad_norm": 0.0, - "learning_rate": 1.3517186915644876e-06, - "loss": 0.792, + "learning_rate": 1.3706554412374762e-06, + "loss": 0.8358, "step": 29517 }, { - "epoch": 0.837627695800227, + "epoch": 0.8364646207033353, "grad_norm": 0.0, - "learning_rate": 1.3512572883785235e-06, - "loss": 0.819, + "learning_rate": 1.3701917041046486e-06, + "loss": 0.7649, "step": 29518 }, { - "epoch": 0.8376560726447219, + "epoch": 0.8364929581455978, "grad_norm": 0.0, - "learning_rate": 1.3507959582488816e-06, - "loss": 0.8338, + "learning_rate": 1.3697280396639035e-06, + "loss": 0.8711, "step": 29519 }, { - "epoch": 0.8376844494892168, + "epoch": 0.8365212955878603, "grad_norm": 0.0, - "learning_rate": 1.3503347011794522e-06, - "loss": 0.7605, + "learning_rate": 1.369264447919141e-06, + "loss": 0.9089, "step": 29520 }, { - "epoch": 0.8377128263337117, + "epoch": 0.8365496330301228, "grad_norm": 0.0, - "learning_rate": 1.349873517174134e-06, - "loss": 0.874, + "learning_rate": 1.3688009288742688e-06, + "loss": 0.7816, "step": 29521 }, { - "epoch": 0.8377412031782066, + "epoch": 0.8365779704723851, "grad_norm": 0.0, - "learning_rate": 1.3494124062368264e-06, - "loss": 0.8828, + "learning_rate": 1.368337482533194e-06, + "loss": 0.8154, "step": 29522 }, { - "epoch": 0.8377695800227015, + "epoch": 0.8366063079146476, "grad_norm": 0.0, - "learning_rate": 1.3489513683714183e-06, - "loss": 0.7993, + "learning_rate": 1.367874108899815e-06, + "loss": 0.8033, "step": 29523 }, { - "epoch": 0.8377979568671964, + "epoch": 0.8366346453569101, "grad_norm": 0.0, - "learning_rate": 1.3484904035818059e-06, - "loss": 0.7822, + "learning_rate": 1.3674108079780414e-06, + "loss": 0.7932, "step": 29524 }, { - "epoch": 0.8378263337116912, + "epoch": 0.8366629827991725, "grad_norm": 0.0, - "learning_rate": 1.3480295118718877e-06, - "loss": 0.8159, + "learning_rate": 1.366947579771769e-06, + "loss": 0.8269, "step": 29525 }, { - "epoch": 0.8378547105561861, + "epoch": 0.836691320241435, "grad_norm": 0.0, - "learning_rate": 1.347568693245548e-06, - "loss": 0.8138, + "learning_rate": 1.366484424284903e-06, + "loss": 0.8682, "step": 29526 }, { - "epoch": 0.8378830874006811, + "epoch": 0.8367196576836975, "grad_norm": 0.0, - "learning_rate": 1.3471079477066863e-06, - "loss": 0.8224, + "learning_rate": 1.366021341521344e-06, + "loss": 0.7366, "step": 29527 }, { - "epoch": 0.8379114642451759, + "epoch": 0.8367479951259599, "grad_norm": 0.0, - "learning_rate": 1.3466472752591953e-06, - "loss": 0.876, + "learning_rate": 1.3655583314849952e-06, + "loss": 0.7274, "step": 29528 }, { - "epoch": 0.8379398410896708, + "epoch": 0.8367763325682224, "grad_norm": 0.0, - "learning_rate": 1.346186675906962e-06, - "loss": 0.89, + "learning_rate": 1.365095394179754e-06, + "loss": 0.7977, "step": 29529 }, { - "epoch": 0.8379682179341658, + "epoch": 0.8368046700104849, "grad_norm": 0.0, - "learning_rate": 1.3457261496538787e-06, - "loss": 0.7398, + "learning_rate": 1.364632529609522e-06, + "loss": 0.8098, "step": 29530 }, { - "epoch": 0.8379965947786606, + "epoch": 0.8368330074527474, "grad_norm": 0.0, - "learning_rate": 1.345265696503838e-06, - "loss": 0.804, + "learning_rate": 1.3641697377781959e-06, + "loss": 0.774, "step": 29531 }, { - "epoch": 0.8380249716231555, + "epoch": 0.8368613448950097, "grad_norm": 0.0, - "learning_rate": 1.3448053164607243e-06, - "loss": 0.7138, + "learning_rate": 1.3637070186896773e-06, + "loss": 0.805, "step": 29532 }, { - "epoch": 0.8380533484676504, + "epoch": 0.8368896823372722, "grad_norm": 0.0, - "learning_rate": 1.3443450095284294e-06, - "loss": 0.8469, + "learning_rate": 1.3632443723478584e-06, + "loss": 0.8457, "step": 29533 }, { - "epoch": 0.8380817253121453, + "epoch": 0.8369180197795347, "grad_norm": 0.0, - "learning_rate": 1.343884775710843e-06, - "loss": 0.7706, + "learning_rate": 1.3627817987566394e-06, + "loss": 0.8724, "step": 29534 }, { - "epoch": 0.8381101021566402, + "epoch": 0.8369463572217971, "grad_norm": 0.0, - "learning_rate": 1.343424615011849e-06, - "loss": 0.8917, + "learning_rate": 1.3623192979199196e-06, + "loss": 0.7828, "step": 29535 }, { - "epoch": 0.838138479001135, + "epoch": 0.8369746946640596, "grad_norm": 0.0, - "learning_rate": 1.3429645274353375e-06, - "loss": 0.8184, + "learning_rate": 1.361856869841589e-06, + "loss": 0.7601, "step": 29536 }, { - "epoch": 0.83816685584563, + "epoch": 0.8370030321063221, "grad_norm": 0.0, - "learning_rate": 1.3425045129851954e-06, - "loss": 0.8616, + "learning_rate": 1.3613945145255458e-06, + "loss": 0.8475, "step": 29537 }, { - "epoch": 0.8381952326901249, + "epoch": 0.8370313695485846, "grad_norm": 0.0, - "learning_rate": 1.3420445716653042e-06, - "loss": 0.911, + "learning_rate": 1.3609322319756868e-06, + "loss": 0.7317, "step": 29538 }, { - "epoch": 0.8382236095346197, + "epoch": 0.837059706990847, "grad_norm": 0.0, - "learning_rate": 1.3415847034795537e-06, - "loss": 0.7003, + "learning_rate": 1.3604700221959022e-06, + "loss": 0.8177, "step": 29539 }, { - "epoch": 0.8382519863791147, + "epoch": 0.8370880444331095, "grad_norm": 0.0, - "learning_rate": 1.3411249084318245e-06, - "loss": 0.9179, + "learning_rate": 1.3600078851900854e-06, + "loss": 0.9022, "step": 29540 }, { - "epoch": 0.8382803632236095, + "epoch": 0.837116381875372, "grad_norm": 0.0, - "learning_rate": 1.3406651865260012e-06, - "loss": 0.8384, + "learning_rate": 1.3595458209621314e-06, + "loss": 0.7782, "step": 29541 }, { - "epoch": 0.8383087400681044, + "epoch": 0.8371447193176343, "grad_norm": 0.0, - "learning_rate": 1.3402055377659716e-06, - "loss": 0.8087, + "learning_rate": 1.3590838295159315e-06, + "loss": 0.7754, "step": 29542 }, { - "epoch": 0.8383371169125993, + "epoch": 0.8371730567598968, "grad_norm": 0.0, - "learning_rate": 1.339745962155613e-06, - "loss": 0.7833, + "learning_rate": 1.3586219108553799e-06, + "loss": 0.7941, "step": 29543 }, { - "epoch": 0.8383654937570942, + "epoch": 0.8372013942021593, "grad_norm": 0.0, - "learning_rate": 1.3392864596988097e-06, - "loss": 0.7984, + "learning_rate": 1.3581600649843617e-06, + "loss": 0.8321, "step": 29544 }, { - "epoch": 0.8383938706015891, + "epoch": 0.8372297316444218, "grad_norm": 0.0, - "learning_rate": 1.3388270303994455e-06, - "loss": 0.7957, + "learning_rate": 1.3576982919067727e-06, + "loss": 0.7216, "step": 29545 }, { - "epoch": 0.838422247446084, + "epoch": 0.8372580690866842, "grad_norm": 0.0, - "learning_rate": 1.3383676742613971e-06, - "loss": 0.8737, + "learning_rate": 1.3572365916264984e-06, + "loss": 0.7889, "step": 29546 }, { - "epoch": 0.8384506242905789, + "epoch": 0.8372864065289467, "grad_norm": 0.0, - "learning_rate": 1.337908391288545e-06, - "loss": 0.8606, + "learning_rate": 1.3567749641474294e-06, + "loss": 0.7599, "step": 29547 }, { - "epoch": 0.8384790011350738, + "epoch": 0.8373147439712092, "grad_norm": 0.0, - "learning_rate": 1.3374491814847734e-06, - "loss": 0.7307, + "learning_rate": 1.3563134094734566e-06, + "loss": 0.8001, "step": 29548 }, { - "epoch": 0.8385073779795686, + "epoch": 0.8373430814134716, "grad_norm": 0.0, - "learning_rate": 1.3369900448539553e-06, - "loss": 0.8079, + "learning_rate": 1.3558519276084636e-06, + "loss": 0.7742, "step": 29549 }, { - "epoch": 0.8385357548240636, + "epoch": 0.8373714188557341, "grad_norm": 0.0, - "learning_rate": 1.3365309813999716e-06, - "loss": 0.7138, + "learning_rate": 1.3553905185563398e-06, + "loss": 0.8362, "step": 29550 }, { - "epoch": 0.8385641316685585, + "epoch": 0.8373997562979966, "grad_norm": 0.0, - "learning_rate": 1.3360719911267027e-06, - "loss": 0.8384, + "learning_rate": 1.354929182320972e-06, + "loss": 0.8127, "step": 29551 }, { - "epoch": 0.8385925085130533, + "epoch": 0.837428093740259, "grad_norm": 0.0, - "learning_rate": 1.3356130740380203e-06, - "loss": 0.83, + "learning_rate": 1.354467918906246e-06, + "loss": 0.8409, "step": 29552 }, { - "epoch": 0.8386208853575482, + "epoch": 0.8374564311825214, "grad_norm": 0.0, - "learning_rate": 1.335154230137804e-06, - "loss": 0.7746, + "learning_rate": 1.3540067283160485e-06, + "loss": 0.6887, "step": 29553 }, { - "epoch": 0.8386492622020432, + "epoch": 0.8374847686247839, "grad_norm": 0.0, - "learning_rate": 1.3346954594299322e-06, - "loss": 0.7807, + "learning_rate": 1.353545610554261e-06, + "loss": 0.9284, "step": 29554 }, { - "epoch": 0.838677639046538, + "epoch": 0.8375131060670464, "grad_norm": 0.0, - "learning_rate": 1.3342367619182751e-06, - "loss": 0.74, + "learning_rate": 1.3530845656247705e-06, + "loss": 0.8128, "step": 29555 }, { - "epoch": 0.8387060158910329, + "epoch": 0.8375414435093088, "grad_norm": 0.0, - "learning_rate": 1.3337781376067094e-06, - "loss": 0.7594, + "learning_rate": 1.3526235935314614e-06, + "loss": 0.8599, "step": 29556 }, { - "epoch": 0.8387343927355279, + "epoch": 0.8375697809515713, "grad_norm": 0.0, - "learning_rate": 1.3333195864991088e-06, - "loss": 0.8842, + "learning_rate": 1.352162694278213e-06, + "loss": 0.777, "step": 29557 }, { - "epoch": 0.8387627695800227, + "epoch": 0.8375981183938338, "grad_norm": 0.0, - "learning_rate": 1.3328611085993482e-06, - "loss": 0.9163, + "learning_rate": 1.3517018678689086e-06, + "loss": 0.898, "step": 29558 }, { - "epoch": 0.8387911464245176, + "epoch": 0.8376264558360962, "grad_norm": 0.0, - "learning_rate": 1.3324027039113008e-06, - "loss": 0.7342, + "learning_rate": 1.3512411143074333e-06, + "loss": 0.7872, "step": 29559 }, { - "epoch": 0.8388195232690124, + "epoch": 0.8376547932783587, "grad_norm": 0.0, - "learning_rate": 1.3319443724388358e-06, - "loss": 0.7413, + "learning_rate": 1.3507804335976638e-06, + "loss": 0.8946, "step": 29560 }, { - "epoch": 0.8388479001135074, + "epoch": 0.8376831307206212, "grad_norm": 0.0, - "learning_rate": 1.331486114185826e-06, - "loss": 0.8178, + "learning_rate": 1.3503198257434847e-06, + "loss": 0.7947, "step": 29561 }, { - "epoch": 0.8388762769580023, + "epoch": 0.8377114681628837, "grad_norm": 0.0, - "learning_rate": 1.3310279291561446e-06, - "loss": 0.8462, + "learning_rate": 1.3498592907487717e-06, + "loss": 0.751, "step": 29562 }, { - "epoch": 0.8389046538024971, + "epoch": 0.837739805605146, "grad_norm": 0.0, - "learning_rate": 1.3305698173536586e-06, - "loss": 0.8107, + "learning_rate": 1.3493988286174054e-06, + "loss": 0.7694, "step": 29563 }, { - "epoch": 0.8389330306469921, + "epoch": 0.8377681430474085, "grad_norm": 0.0, - "learning_rate": 1.3301117787822382e-06, - "loss": 0.7722, + "learning_rate": 1.3489384393532656e-06, + "loss": 0.7465, "step": 29564 }, { - "epoch": 0.838961407491487, + "epoch": 0.837796480489671, "grad_norm": 0.0, - "learning_rate": 1.329653813445756e-06, - "loss": 0.7845, + "learning_rate": 1.3484781229602295e-06, + "loss": 0.8208, "step": 29565 }, { - "epoch": 0.8389897843359818, + "epoch": 0.8378248179319334, "grad_norm": 0.0, - "learning_rate": 1.3291959213480753e-06, - "loss": 0.8503, + "learning_rate": 1.3480178794421773e-06, + "loss": 0.9112, "step": 29566 }, { - "epoch": 0.8390181611804768, + "epoch": 0.8378531553741959, "grad_norm": 0.0, - "learning_rate": 1.3287381024930668e-06, - "loss": 0.8511, + "learning_rate": 1.3475577088029812e-06, + "loss": 0.8387, "step": 29567 }, { - "epoch": 0.8390465380249716, + "epoch": 0.8378814928164584, "grad_norm": 0.0, - "learning_rate": 1.3282803568845992e-06, - "loss": 0.6626, + "learning_rate": 1.3470976110465196e-06, + "loss": 0.6472, "step": 29568 }, { - "epoch": 0.8390749148694665, + "epoch": 0.8379098302587209, "grad_norm": 0.0, - "learning_rate": 1.327822684526535e-06, - "loss": 0.7807, + "learning_rate": 1.34663758617667e-06, + "loss": 0.7478, "step": 29569 }, { - "epoch": 0.8391032917139614, + "epoch": 0.8379381677009833, "grad_norm": 0.0, - "learning_rate": 1.3273650854227438e-06, - "loss": 0.8049, + "learning_rate": 1.346177634197303e-06, + "loss": 0.7971, "step": 29570 }, { - "epoch": 0.8391316685584563, + "epoch": 0.8379665051432458, "grad_norm": 0.0, - "learning_rate": 1.3269075595770896e-06, - "loss": 0.7373, + "learning_rate": 1.3457177551122958e-06, + "loss": 0.8736, "step": 29571 }, { - "epoch": 0.8391600454029512, + "epoch": 0.8379948425855083, "grad_norm": 0.0, - "learning_rate": 1.3264501069934343e-06, - "loss": 0.8166, + "learning_rate": 1.3452579489255235e-06, + "loss": 0.8584, "step": 29572 }, { - "epoch": 0.839188422247446, + "epoch": 0.8380231800277707, "grad_norm": 0.0, - "learning_rate": 1.325992727675649e-06, - "loss": 0.7581, + "learning_rate": 1.3447982156408556e-06, + "loss": 0.817, "step": 29573 }, { - "epoch": 0.839216799091941, + "epoch": 0.8380515174700331, "grad_norm": 0.0, - "learning_rate": 1.3255354216275905e-06, - "loss": 0.7517, + "learning_rate": 1.344338555262168e-06, + "loss": 0.7616, "step": 29574 }, { - "epoch": 0.8392451759364359, + "epoch": 0.8380798549122956, "grad_norm": 0.0, - "learning_rate": 1.3250781888531239e-06, - "loss": 0.7478, + "learning_rate": 1.3438789677933283e-06, + "loss": 0.8565, "step": 29575 }, { - "epoch": 0.8392735527809307, + "epoch": 0.838108192354558, "grad_norm": 0.0, - "learning_rate": 1.3246210293561146e-06, - "loss": 0.8073, + "learning_rate": 1.3434194532382116e-06, + "loss": 0.8116, "step": 29576 }, { - "epoch": 0.8393019296254256, + "epoch": 0.8381365297968205, "grad_norm": 0.0, - "learning_rate": 1.3241639431404196e-06, - "loss": 0.776, + "learning_rate": 1.342960011600687e-06, + "loss": 0.8107, "step": 29577 }, { - "epoch": 0.8393303064699206, + "epoch": 0.838164867239083, "grad_norm": 0.0, - "learning_rate": 1.3237069302099014e-06, - "loss": 0.8972, + "learning_rate": 1.3425006428846243e-06, + "loss": 0.7544, "step": 29578 }, { - "epoch": 0.8393586833144154, + "epoch": 0.8381932046813455, "grad_norm": 0.0, - "learning_rate": 1.3232499905684226e-06, - "loss": 0.7756, + "learning_rate": 1.3420413470938942e-06, + "loss": 0.8513, "step": 29579 }, { - "epoch": 0.8393870601589103, + "epoch": 0.8382215421236079, "grad_norm": 0.0, - "learning_rate": 1.3227931242198388e-06, - "loss": 0.8732, + "learning_rate": 1.3415821242323667e-06, + "loss": 0.8511, "step": 29580 }, { - "epoch": 0.8394154370034053, + "epoch": 0.8382498795658704, "grad_norm": 0.0, - "learning_rate": 1.3223363311680126e-06, - "loss": 0.8323, + "learning_rate": 1.3411229743039055e-06, + "loss": 0.7938, "step": 29581 }, { - "epoch": 0.8394438138479001, + "epoch": 0.8382782170081329, "grad_norm": 0.0, - "learning_rate": 1.3218796114168031e-06, - "loss": 0.7079, + "learning_rate": 1.3406638973123842e-06, + "loss": 0.844, "step": 29582 }, { - "epoch": 0.839472190692395, + "epoch": 0.8383065544503953, "grad_norm": 0.0, - "learning_rate": 1.3214229649700639e-06, - "loss": 0.7099, + "learning_rate": 1.3402048932616641e-06, + "loss": 0.8172, "step": 29583 }, { - "epoch": 0.83950056753689, + "epoch": 0.8383348918926578, "grad_norm": 0.0, - "learning_rate": 1.3209663918316562e-06, - "loss": 0.7437, + "learning_rate": 1.339745962155613e-06, + "loss": 0.8924, "step": 29584 }, { - "epoch": 0.8395289443813848, + "epoch": 0.8383632293349202, "grad_norm": 0.0, - "learning_rate": 1.320509892005437e-06, - "loss": 0.7855, + "learning_rate": 1.3392871039981004e-06, + "loss": 0.8009, "step": 29585 }, { - "epoch": 0.8395573212258797, + "epoch": 0.8383915667771827, "grad_norm": 0.0, - "learning_rate": 1.320053465495259e-06, - "loss": 0.7776, + "learning_rate": 1.3388283187929874e-06, + "loss": 0.8813, "step": 29586 }, { - "epoch": 0.8395856980703745, + "epoch": 0.8384199042194451, "grad_norm": 0.0, - "learning_rate": 1.3195971123049788e-06, - "loss": 0.8581, + "learning_rate": 1.3383696065441376e-06, + "loss": 0.8349, "step": 29587 }, { - "epoch": 0.8396140749148695, + "epoch": 0.8384482416617076, "grad_norm": 0.0, - "learning_rate": 1.3191408324384525e-06, - "loss": 0.8259, + "learning_rate": 1.3379109672554213e-06, + "loss": 0.7775, "step": 29588 }, { - "epoch": 0.8396424517593644, + "epoch": 0.8384765791039701, "grad_norm": 0.0, - "learning_rate": 1.3186846258995346e-06, - "loss": 0.8102, + "learning_rate": 1.3374524009306944e-06, + "loss": 0.8109, "step": 29589 }, { - "epoch": 0.8396708286038592, + "epoch": 0.8385049165462325, "grad_norm": 0.0, - "learning_rate": 1.3182284926920797e-06, - "loss": 0.7161, + "learning_rate": 1.3369939075738226e-06, + "loss": 0.8611, "step": 29590 }, { - "epoch": 0.8396992054483542, + "epoch": 0.838533253988495, "grad_norm": 0.0, - "learning_rate": 1.317772432819936e-06, - "loss": 0.9015, + "learning_rate": 1.3365354871886672e-06, + "loss": 0.8345, "step": 29591 }, { - "epoch": 0.839727582292849, + "epoch": 0.8385615914307575, "grad_norm": 0.0, - "learning_rate": 1.3173164462869591e-06, - "loss": 0.7343, + "learning_rate": 1.3360771397790918e-06, + "loss": 0.7632, "step": 29592 }, { - "epoch": 0.8397559591373439, + "epoch": 0.83858992887302, "grad_norm": 0.0, - "learning_rate": 1.316860533097003e-06, - "loss": 0.8127, + "learning_rate": 1.3356188653489578e-06, + "loss": 0.8641, "step": 29593 }, { - "epoch": 0.8397843359818388, + "epoch": 0.8386182663152824, "grad_norm": 0.0, - "learning_rate": 1.3164046932539142e-06, - "loss": 0.7197, + "learning_rate": 1.3351606639021209e-06, + "loss": 0.8512, "step": 29594 }, { - "epoch": 0.8398127128263337, + "epoch": 0.8386466037575449, "grad_norm": 0.0, - "learning_rate": 1.3159489267615455e-06, - "loss": 0.89, + "learning_rate": 1.3347025354424459e-06, + "loss": 0.7646, "step": 29595 }, { - "epoch": 0.8398410896708286, + "epoch": 0.8386749411998073, "grad_norm": 0.0, - "learning_rate": 1.3154932336237481e-06, - "loss": 0.8116, + "learning_rate": 1.3342444799737876e-06, + "loss": 0.7321, "step": 29596 }, { - "epoch": 0.8398694665153235, + "epoch": 0.8387032786420697, "grad_norm": 0.0, - "learning_rate": 1.3150376138443676e-06, - "loss": 0.737, + "learning_rate": 1.3337864975000047e-06, + "loss": 0.788, "step": 29597 }, { - "epoch": 0.8398978433598184, + "epoch": 0.8387316160843322, "grad_norm": 0.0, - "learning_rate": 1.3145820674272558e-06, - "loss": 0.823, + "learning_rate": 1.333328588024959e-06, + "loss": 0.8132, "step": 29598 }, { - "epoch": 0.8399262202043133, + "epoch": 0.8387599535265947, "grad_norm": 0.0, - "learning_rate": 1.3141265943762615e-06, - "loss": 0.8521, + "learning_rate": 1.332870751552503e-06, + "loss": 0.8197, "step": 29599 }, { - "epoch": 0.8399545970488081, + "epoch": 0.8387882909688571, "grad_norm": 0.0, - "learning_rate": 1.3136711946952274e-06, - "loss": 0.8555, + "learning_rate": 1.3324129880864954e-06, + "loss": 0.7013, "step": 29600 }, { - "epoch": 0.8399829738933031, + "epoch": 0.8388166284111196, "grad_norm": 0.0, - "learning_rate": 1.3132158683880037e-06, - "loss": 0.8014, + "learning_rate": 1.3319552976307938e-06, + "loss": 0.7254, "step": 29601 }, { - "epoch": 0.840011350737798, + "epoch": 0.8388449658533821, "grad_norm": 0.0, - "learning_rate": 1.312760615458436e-06, - "loss": 0.8889, + "learning_rate": 1.3314976801892487e-06, + "loss": 0.8366, "step": 29602 }, { - "epoch": 0.8400397275822928, + "epoch": 0.8388733032956446, "grad_norm": 0.0, - "learning_rate": 1.3123054359103705e-06, - "loss": 0.8307, + "learning_rate": 1.3310401357657176e-06, + "loss": 0.8129, "step": 29603 }, { - "epoch": 0.8400681044267877, + "epoch": 0.838901640737907, "grad_norm": 0.0, - "learning_rate": 1.3118503297476525e-06, - "loss": 0.7955, + "learning_rate": 1.3305826643640552e-06, + "loss": 0.7324, "step": 29604 }, { - "epoch": 0.8400964812712827, + "epoch": 0.8389299781801695, "grad_norm": 0.0, - "learning_rate": 1.311395296974124e-06, - "loss": 0.8045, + "learning_rate": 1.3301252659881148e-06, + "loss": 0.8308, "step": 29605 }, { - "epoch": 0.8401248581157775, + "epoch": 0.838958315622432, "grad_norm": 0.0, - "learning_rate": 1.3109403375936302e-06, - "loss": 0.8697, + "learning_rate": 1.3296679406417502e-06, + "loss": 0.7472, "step": 29606 }, { - "epoch": 0.8401532349602724, + "epoch": 0.8389866530646943, "grad_norm": 0.0, - "learning_rate": 1.3104854516100152e-06, - "loss": 0.8871, + "learning_rate": 1.3292106883288092e-06, + "loss": 0.7225, "step": 29607 }, { - "epoch": 0.8401816118047674, + "epoch": 0.8390149905069568, "grad_norm": 0.0, - "learning_rate": 1.3100306390271188e-06, - "loss": 0.9156, + "learning_rate": 1.3287535090531478e-06, + "loss": 0.8215, "step": 29608 }, { - "epoch": 0.8402099886492622, + "epoch": 0.8390433279492193, "grad_norm": 0.0, - "learning_rate": 1.309575899848784e-06, - "loss": 0.8091, + "learning_rate": 1.3282964028186175e-06, + "loss": 0.8414, "step": 29609 }, { - "epoch": 0.8402383654937571, + "epoch": 0.8390716653914818, "grad_norm": 0.0, - "learning_rate": 1.309121234078854e-06, - "loss": 0.72, + "learning_rate": 1.3278393696290636e-06, + "loss": 0.7817, "step": 29610 }, { - "epoch": 0.8402667423382519, + "epoch": 0.8391000028337442, "grad_norm": 0.0, - "learning_rate": 1.3086666417211658e-06, - "loss": 0.8155, + "learning_rate": 1.3273824094883425e-06, + "loss": 0.7706, "step": 29611 }, { - "epoch": 0.8402951191827469, + "epoch": 0.8391283402760067, "grad_norm": 0.0, - "learning_rate": 1.3082121227795619e-06, - "loss": 0.8987, + "learning_rate": 1.3269255224002963e-06, + "loss": 0.6686, "step": 29612 }, { - "epoch": 0.8403234960272418, + "epoch": 0.8391566777182692, "grad_norm": 0.0, - "learning_rate": 1.3077576772578827e-06, - "loss": 0.7992, + "learning_rate": 1.3264687083687788e-06, + "loss": 0.7631, "step": 29613 }, { - "epoch": 0.8403518728717366, + "epoch": 0.8391850151605316, "grad_norm": 0.0, - "learning_rate": 1.3073033051599626e-06, - "loss": 0.6708, + "learning_rate": 1.3260119673976369e-06, + "loss": 0.8482, "step": 29614 }, { - "epoch": 0.8403802497162316, + "epoch": 0.8392133526027941, "grad_norm": 0.0, - "learning_rate": 1.3068490064896422e-06, - "loss": 0.825, + "learning_rate": 1.325555299490716e-06, + "loss": 0.6667, "step": 29615 }, { - "epoch": 0.8404086265607265, + "epoch": 0.8392416900450566, "grad_norm": 0.0, - "learning_rate": 1.3063947812507615e-06, - "loss": 0.7653, + "learning_rate": 1.3250987046518638e-06, + "loss": 0.8007, "step": 29616 }, { - "epoch": 0.8404370034052213, + "epoch": 0.839270027487319, "grad_norm": 0.0, - "learning_rate": 1.3059406294471523e-06, - "loss": 0.8858, + "learning_rate": 1.3246421828849255e-06, + "loss": 0.7828, "step": 29617 }, { - "epoch": 0.8404653802497162, + "epoch": 0.8392983649295814, "grad_norm": 0.0, - "learning_rate": 1.3054865510826508e-06, - "loss": 0.7793, + "learning_rate": 1.3241857341937491e-06, + "loss": 0.8111, "step": 29618 }, { - "epoch": 0.8404937570942111, + "epoch": 0.8393267023718439, "grad_norm": 0.0, - "learning_rate": 1.3050325461611013e-06, - "loss": 0.8317, + "learning_rate": 1.3237293585821786e-06, + "loss": 0.7924, "step": 29619 }, { - "epoch": 0.840522133938706, + "epoch": 0.8393550398141064, "grad_norm": 0.0, - "learning_rate": 1.3045786146863293e-06, - "loss": 0.8454, + "learning_rate": 1.3232730560540564e-06, + "loss": 0.8059, "step": 29620 }, { - "epoch": 0.8405505107832009, + "epoch": 0.8393833772563688, "grad_norm": 0.0, - "learning_rate": 1.3041247566621752e-06, - "loss": 0.7506, + "learning_rate": 1.322816826613228e-06, + "loss": 0.8033, "step": 29621 }, { - "epoch": 0.8405788876276958, + "epoch": 0.8394117146986313, "grad_norm": 0.0, - "learning_rate": 1.3036709720924679e-06, - "loss": 0.8296, + "learning_rate": 1.3223606702635362e-06, + "loss": 0.878, "step": 29622 }, { - "epoch": 0.8406072644721907, + "epoch": 0.8394400521408938, "grad_norm": 0.0, - "learning_rate": 1.3032172609810422e-06, - "loss": 0.7038, + "learning_rate": 1.321904587008822e-06, + "loss": 0.7423, "step": 29623 }, { - "epoch": 0.8406356413166856, + "epoch": 0.8394683895831562, "grad_norm": 0.0, - "learning_rate": 1.3027636233317342e-06, - "loss": 0.8354, + "learning_rate": 1.3214485768529296e-06, + "loss": 0.8175, "step": 29624 }, { - "epoch": 0.8406640181611805, + "epoch": 0.8394967270254187, "grad_norm": 0.0, - "learning_rate": 1.3023100591483707e-06, - "loss": 0.7029, + "learning_rate": 1.3209926397996963e-06, + "loss": 0.7769, "step": 29625 }, { - "epoch": 0.8406923950056754, + "epoch": 0.8395250644676812, "grad_norm": 0.0, - "learning_rate": 1.301856568434785e-06, - "loss": 0.7558, + "learning_rate": 1.320536775852964e-06, + "loss": 0.9094, "step": 29626 }, { - "epoch": 0.8407207718501702, + "epoch": 0.8395534019099437, "grad_norm": 0.0, - "learning_rate": 1.30140315119481e-06, - "loss": 0.8165, + "learning_rate": 1.3200809850165775e-06, + "loss": 0.7786, "step": 29627 }, { - "epoch": 0.8407491486946651, + "epoch": 0.839581739352206, "grad_norm": 0.0, - "learning_rate": 1.3009498074322712e-06, - "loss": 0.8291, + "learning_rate": 1.3196252672943688e-06, + "loss": 0.7927, "step": 29628 }, { - "epoch": 0.8407775255391601, + "epoch": 0.8396100767944685, "grad_norm": 0.0, - "learning_rate": 1.3004965371510003e-06, - "loss": 0.838, + "learning_rate": 1.3191696226901795e-06, + "loss": 0.7952, "step": 29629 }, { - "epoch": 0.8408059023836549, + "epoch": 0.839638414236731, "grad_norm": 0.0, - "learning_rate": 1.3000433403548297e-06, - "loss": 0.7999, + "learning_rate": 1.318714051207849e-06, + "loss": 0.862, "step": 29630 }, { - "epoch": 0.8408342792281498, + "epoch": 0.8396667516789934, "grad_norm": 0.0, - "learning_rate": 1.2995902170475804e-06, - "loss": 0.855, + "learning_rate": 1.3182585528512126e-06, + "loss": 0.7749, "step": 29631 }, { - "epoch": 0.8408626560726448, + "epoch": 0.8396950891212559, "grad_norm": 0.0, - "learning_rate": 1.2991371672330844e-06, - "loss": 0.8292, + "learning_rate": 1.3178031276241122e-06, + "loss": 0.8727, "step": 29632 }, { - "epoch": 0.8408910329171396, + "epoch": 0.8397234265635184, "grad_norm": 0.0, - "learning_rate": 1.2986841909151683e-06, - "loss": 0.8334, + "learning_rate": 1.3173477755303765e-06, + "loss": 0.8189, "step": 29633 }, { - "epoch": 0.8409194097616345, + "epoch": 0.8397517640057809, "grad_norm": 0.0, - "learning_rate": 1.2982312880976567e-06, - "loss": 0.8965, + "learning_rate": 1.316892496573845e-06, + "loss": 0.7247, "step": 29634 }, { - "epoch": 0.8409477866061293, + "epoch": 0.8397801014480433, "grad_norm": 0.0, - "learning_rate": 1.2977784587843778e-06, - "loss": 0.8661, + "learning_rate": 1.3164372907583545e-06, + "loss": 0.7868, "step": 29635 }, { - "epoch": 0.8409761634506243, + "epoch": 0.8398084388903058, "grad_norm": 0.0, - "learning_rate": 1.2973257029791564e-06, - "loss": 0.7602, + "learning_rate": 1.3159821580877353e-06, + "loss": 0.8867, "step": 29636 }, { - "epoch": 0.8410045402951192, + "epoch": 0.8398367763325683, "grad_norm": 0.0, - "learning_rate": 1.2968730206858148e-06, - "loss": 0.7975, + "learning_rate": 1.315527098565823e-06, + "loss": 0.8831, "step": 29637 }, { - "epoch": 0.841032917139614, + "epoch": 0.8398651137748306, "grad_norm": 0.0, - "learning_rate": 1.296420411908178e-06, - "loss": 0.8016, + "learning_rate": 1.3150721121964538e-06, + "loss": 0.8476, "step": 29638 }, { - "epoch": 0.841061293984109, + "epoch": 0.8398934512170931, "grad_norm": 0.0, - "learning_rate": 1.2959678766500706e-06, - "loss": 0.7955, + "learning_rate": 1.3146171989834544e-06, + "loss": 0.7886, "step": 29639 }, { - "epoch": 0.8410896708286039, + "epoch": 0.8399217886593556, "grad_norm": 0.0, - "learning_rate": 1.2955154149153126e-06, - "loss": 0.8784, + "learning_rate": 1.3141623589306619e-06, + "loss": 0.7295, "step": 29640 }, { - "epoch": 0.8411180476730987, + "epoch": 0.8399501261016181, "grad_norm": 0.0, - "learning_rate": 1.295063026707727e-06, - "loss": 0.7681, + "learning_rate": 1.3137075920419017e-06, + "loss": 0.9214, "step": 29641 }, { - "epoch": 0.8411464245175937, + "epoch": 0.8399784635438805, "grad_norm": 0.0, - "learning_rate": 1.2946107120311369e-06, - "loss": 0.7555, + "learning_rate": 1.3132528983210103e-06, + "loss": 0.95, "step": 29642 }, { - "epoch": 0.8411748013620886, + "epoch": 0.840006800986143, "grad_norm": 0.0, - "learning_rate": 1.2941584708893596e-06, - "loss": 0.7713, + "learning_rate": 1.312798277771814e-06, + "loss": 0.8737, "step": 29643 }, { - "epoch": 0.8412031782065834, + "epoch": 0.8400351384284055, "grad_norm": 0.0, - "learning_rate": 1.2937063032862174e-06, - "loss": 0.8138, + "learning_rate": 1.3123437303981446e-06, + "loss": 0.7684, "step": 29644 }, { - "epoch": 0.8412315550510783, + "epoch": 0.8400634758706679, "grad_norm": 0.0, - "learning_rate": 1.2932542092255308e-06, - "loss": 0.8692, + "learning_rate": 1.3118892562038288e-06, + "loss": 0.7925, "step": 29645 }, { - "epoch": 0.8412599318955732, + "epoch": 0.8400918133129304, "grad_norm": 0.0, - "learning_rate": 1.2928021887111164e-06, - "loss": 0.7622, + "learning_rate": 1.3114348551926991e-06, + "loss": 0.7913, "step": 29646 }, { - "epoch": 0.8412883087400681, + "epoch": 0.8401201507551929, "grad_norm": 0.0, - "learning_rate": 1.2923502417467948e-06, - "loss": 0.8107, + "learning_rate": 1.3109805273685783e-06, + "loss": 0.8591, "step": 29647 }, { - "epoch": 0.841316685584563, + "epoch": 0.8401484881974552, "grad_norm": 0.0, - "learning_rate": 1.2918983683363772e-06, - "loss": 0.8102, + "learning_rate": 1.3105262727352964e-06, + "loss": 0.8033, "step": 29648 }, { - "epoch": 0.8413450624290579, + "epoch": 0.8401768256397177, "grad_norm": 0.0, - "learning_rate": 1.2914465684836886e-06, - "loss": 0.7069, + "learning_rate": 1.310072091296677e-06, + "loss": 0.7766, "step": 29649 }, { - "epoch": 0.8413734392735528, + "epoch": 0.8402051630819802, "grad_norm": 0.0, - "learning_rate": 1.2909948421925445e-06, - "loss": 0.8383, + "learning_rate": 1.309617983056547e-06, + "loss": 0.8144, "step": 29650 }, { - "epoch": 0.8414018161180477, + "epoch": 0.8402335005242427, "grad_norm": 0.0, - "learning_rate": 1.2905431894667552e-06, - "loss": 0.8071, + "learning_rate": 1.3091639480187334e-06, + "loss": 0.9367, "step": 29651 }, { - "epoch": 0.8414301929625425, + "epoch": 0.8402618379665051, "grad_norm": 0.0, - "learning_rate": 1.2900916103101403e-06, - "loss": 0.8055, + "learning_rate": 1.3087099861870578e-06, + "loss": 0.7022, "step": 29652 }, { - "epoch": 0.8414585698070375, + "epoch": 0.8402901754087676, "grad_norm": 0.0, - "learning_rate": 1.289640104726515e-06, - "loss": 0.8209, + "learning_rate": 1.308256097565347e-06, + "loss": 0.8697, "step": 29653 }, { - "epoch": 0.8414869466515323, + "epoch": 0.8403185128510301, "grad_norm": 0.0, - "learning_rate": 1.289188672719689e-06, - "loss": 0.7025, + "learning_rate": 1.307802282157421e-06, + "loss": 0.8225, "step": 29654 }, { - "epoch": 0.8415153234960272, + "epoch": 0.8403468502932925, "grad_norm": 0.0, - "learning_rate": 1.2887373142934767e-06, - "loss": 0.7956, + "learning_rate": 1.3073485399671038e-06, + "loss": 0.9019, "step": 29655 }, { - "epoch": 0.8415437003405222, + "epoch": 0.840375187735555, "grad_norm": 0.0, - "learning_rate": 1.2882860294516952e-06, - "loss": 0.7915, + "learning_rate": 1.3068948709982177e-06, + "loss": 0.7809, "step": 29656 }, { - "epoch": 0.841572077185017, + "epoch": 0.8404035251778175, "grad_norm": 0.0, - "learning_rate": 1.2878348181981516e-06, - "loss": 0.865, + "learning_rate": 1.3064412752545853e-06, + "loss": 0.8148, "step": 29657 }, { - "epoch": 0.8416004540295119, + "epoch": 0.84043186262008, "grad_norm": 0.0, - "learning_rate": 1.2873836805366579e-06, - "loss": 0.7742, + "learning_rate": 1.3059877527400244e-06, + "loss": 0.7771, "step": 29658 }, { - "epoch": 0.8416288308740069, + "epoch": 0.8404602000623423, "grad_norm": 0.0, - "learning_rate": 1.2869326164710294e-06, - "loss": 0.83, + "learning_rate": 1.3055343034583611e-06, + "loss": 0.7962, "step": 29659 }, { - "epoch": 0.8416572077185017, + "epoch": 0.8404885375046048, "grad_norm": 0.0, - "learning_rate": 1.2864816260050694e-06, - "loss": 0.8788, + "learning_rate": 1.305080927413408e-06, + "loss": 0.7949, "step": 29660 }, { - "epoch": 0.8416855845629966, + "epoch": 0.8405168749468673, "grad_norm": 0.0, - "learning_rate": 1.2860307091425905e-06, - "loss": 0.7978, + "learning_rate": 1.3046276246089896e-06, + "loss": 0.9243, "step": 29661 }, { - "epoch": 0.8417139614074914, + "epoch": 0.8405452123891297, "grad_norm": 0.0, - "learning_rate": 1.2855798658874052e-06, - "loss": 0.7449, + "learning_rate": 1.3041743950489184e-06, + "loss": 0.7758, "step": 29662 }, { - "epoch": 0.8417423382519864, + "epoch": 0.8405735498313922, "grad_norm": 0.0, - "learning_rate": 1.285129096243316e-06, - "loss": 0.8071, + "learning_rate": 1.3037212387370157e-06, + "loss": 0.7922, "step": 29663 }, { - "epoch": 0.8417707150964813, + "epoch": 0.8406018872736547, "grad_norm": 0.0, - "learning_rate": 1.2846784002141333e-06, - "loss": 0.8091, + "learning_rate": 1.303268155677101e-06, + "loss": 0.8699, "step": 29664 }, { - "epoch": 0.8417990919409761, + "epoch": 0.8406302247159172, "grad_norm": 0.0, - "learning_rate": 1.2842277778036627e-06, - "loss": 0.8426, + "learning_rate": 1.3028151458729865e-06, + "loss": 0.7324, "step": 29665 }, { - "epoch": 0.8418274687854711, + "epoch": 0.8406585621581796, "grad_norm": 0.0, - "learning_rate": 1.2837772290157135e-06, - "loss": 0.8438, + "learning_rate": 1.3023622093284883e-06, + "loss": 0.8776, "step": 29666 }, { - "epoch": 0.841855845629966, + "epoch": 0.8406868996004421, "grad_norm": 0.0, - "learning_rate": 1.2833267538540907e-06, - "loss": 0.8638, + "learning_rate": 1.3019093460474264e-06, + "loss": 0.7644, "step": 29667 }, { - "epoch": 0.8418842224744608, + "epoch": 0.8407152370427046, "grad_norm": 0.0, - "learning_rate": 1.2828763523225974e-06, - "loss": 0.8244, + "learning_rate": 1.301456556033609e-06, + "loss": 0.7827, "step": 29668 }, { - "epoch": 0.8419125993189557, + "epoch": 0.840743574484967, "grad_norm": 0.0, - "learning_rate": 1.2824260244250385e-06, - "loss": 0.883, + "learning_rate": 1.301003839290853e-06, + "loss": 0.8344, "step": 29669 }, { - "epoch": 0.8419409761634506, + "epoch": 0.8407719119272294, "grad_norm": 0.0, - "learning_rate": 1.2819757701652224e-06, - "loss": 0.7674, + "learning_rate": 1.300551195822972e-06, + "loss": 0.8494, "step": 29670 }, { - "epoch": 0.8419693530079455, + "epoch": 0.8408002493694919, "grad_norm": 0.0, - "learning_rate": 1.2815255895469458e-06, - "loss": 0.8444, + "learning_rate": 1.30009862563378e-06, + "loss": 0.8693, "step": 29671 }, { - "epoch": 0.8419977298524404, + "epoch": 0.8408285868117543, "grad_norm": 0.0, - "learning_rate": 1.2810754825740146e-06, - "loss": 0.8069, + "learning_rate": 1.299646128727089e-06, + "loss": 0.8352, "step": 29672 }, { - "epoch": 0.8420261066969353, + "epoch": 0.8408569242540168, "grad_norm": 0.0, - "learning_rate": 1.2806254492502323e-06, - "loss": 0.8293, + "learning_rate": 1.2991937051067072e-06, + "loss": 0.8333, "step": 29673 }, { - "epoch": 0.8420544835414302, + "epoch": 0.8408852616962793, "grad_norm": 0.0, - "learning_rate": 1.2801754895793972e-06, - "loss": 0.8294, + "learning_rate": 1.2987413547764482e-06, + "loss": 0.8694, "step": 29674 }, { - "epoch": 0.8420828603859251, + "epoch": 0.8409135991385418, "grad_norm": 0.0, - "learning_rate": 1.2797256035653128e-06, - "loss": 0.8141, + "learning_rate": 1.2982890777401236e-06, + "loss": 0.9512, "step": 29675 }, { - "epoch": 0.84211123723042, + "epoch": 0.8409419365808042, "grad_norm": 0.0, - "learning_rate": 1.2792757912117793e-06, - "loss": 0.7814, + "learning_rate": 1.2978368740015401e-06, + "loss": 0.7463, "step": 29676 }, { - "epoch": 0.8421396140749149, + "epoch": 0.8409702740230667, "grad_norm": 0.0, - "learning_rate": 1.2788260525225926e-06, - "loss": 0.8212, + "learning_rate": 1.2973847435645092e-06, + "loss": 0.7831, "step": 29677 }, { - "epoch": 0.8421679909194097, + "epoch": 0.8409986114653292, "grad_norm": 0.0, - "learning_rate": 1.2783763875015542e-06, - "loss": 0.8692, + "learning_rate": 1.2969326864328368e-06, + "loss": 0.7881, "step": 29678 }, { - "epoch": 0.8421963677639046, + "epoch": 0.8410269489075916, "grad_norm": 0.0, - "learning_rate": 1.2779267961524633e-06, - "loss": 0.8667, + "learning_rate": 1.296480702610332e-06, + "loss": 0.7714, "step": 29679 }, { - "epoch": 0.8422247446083996, + "epoch": 0.841055286349854, "grad_norm": 0.0, - "learning_rate": 1.2774772784791168e-06, - "loss": 0.7224, + "learning_rate": 1.2960287921008041e-06, + "loss": 0.7257, "step": 29680 }, { - "epoch": 0.8422531214528944, + "epoch": 0.8410836237921165, "grad_norm": 0.0, - "learning_rate": 1.2770278344853139e-06, - "loss": 0.865, + "learning_rate": 1.2955769549080566e-06, + "loss": 0.8504, "step": 29681 }, { - "epoch": 0.8422814982973893, + "epoch": 0.841111961234379, "grad_norm": 0.0, - "learning_rate": 1.2765784641748469e-06, - "loss": 0.8582, + "learning_rate": 1.2951251910358952e-06, + "loss": 0.8273, "step": 29682 }, { - "epoch": 0.8423098751418843, + "epoch": 0.8411402986766414, "grad_norm": 0.0, - "learning_rate": 1.276129167551513e-06, - "loss": 0.817, + "learning_rate": 1.2946735004881284e-06, + "loss": 0.8642, "step": 29683 }, { - "epoch": 0.8423382519863791, + "epoch": 0.8411686361189039, "grad_norm": 0.0, - "learning_rate": 1.2756799446191114e-06, - "loss": 0.9648, + "learning_rate": 1.2942218832685583e-06, + "loss": 0.8523, "step": 29684 }, { - "epoch": 0.842366628830874, + "epoch": 0.8411969735611664, "grad_norm": 0.0, - "learning_rate": 1.2752307953814313e-06, - "loss": 0.7846, + "learning_rate": 1.293770339380992e-06, + "loss": 0.6905, "step": 29685 }, { - "epoch": 0.8423950056753688, + "epoch": 0.8412253110034288, "grad_norm": 0.0, - "learning_rate": 1.2747817198422685e-06, - "loss": 0.9028, + "learning_rate": 1.2933188688292298e-06, + "loss": 0.8825, "step": 29686 }, { - "epoch": 0.8424233825198638, + "epoch": 0.8412536484456913, "grad_norm": 0.0, - "learning_rate": 1.2743327180054188e-06, - "loss": 0.8257, + "learning_rate": 1.2928674716170754e-06, + "loss": 0.7824, "step": 29687 }, { - "epoch": 0.8424517593643587, + "epoch": 0.8412819858879538, "grad_norm": 0.0, - "learning_rate": 1.2738837898746703e-06, - "loss": 0.7889, + "learning_rate": 1.2924161477483343e-06, + "loss": 0.7234, "step": 29688 }, { - "epoch": 0.8424801362088535, + "epoch": 0.8413103233302163, "grad_norm": 0.0, - "learning_rate": 1.2734349354538188e-06, - "loss": 0.7358, + "learning_rate": 1.291964897226803e-06, + "loss": 0.7368, "step": 29689 }, { - "epoch": 0.8425085130533485, + "epoch": 0.8413386607724787, "grad_norm": 0.0, - "learning_rate": 1.272986154746656e-06, - "loss": 0.8267, + "learning_rate": 1.2915137200562867e-06, + "loss": 0.8202, "step": 29690 }, { - "epoch": 0.8425368898978434, + "epoch": 0.8413669982147411, "grad_norm": 0.0, - "learning_rate": 1.2725374477569697e-06, - "loss": 0.7207, + "learning_rate": 1.2910626162405827e-06, + "loss": 0.7564, "step": 29691 }, { - "epoch": 0.8425652667423382, + "epoch": 0.8413953356570036, "grad_norm": 0.0, - "learning_rate": 1.2720888144885512e-06, - "loss": 0.7135, + "learning_rate": 1.290611585783491e-06, + "loss": 0.7218, "step": 29692 }, { - "epoch": 0.8425936435868332, + "epoch": 0.841423673099266, "grad_norm": 0.0, - "learning_rate": 1.2716402549451945e-06, - "loss": 0.7631, + "learning_rate": 1.2901606286888157e-06, + "loss": 0.8102, "step": 29693 }, { - "epoch": 0.8426220204313281, + "epoch": 0.8414520105415285, "grad_norm": 0.0, - "learning_rate": 1.2711917691306797e-06, - "loss": 0.7788, + "learning_rate": 1.2897097449603491e-06, + "loss": 0.8495, "step": 29694 }, { - "epoch": 0.8426503972758229, + "epoch": 0.841480347983791, "grad_norm": 0.0, - "learning_rate": 1.2707433570488037e-06, - "loss": 0.8487, + "learning_rate": 1.2892589346018914e-06, + "loss": 0.8096, "step": 29695 }, { - "epoch": 0.8426787741203178, + "epoch": 0.8415086854260534, "grad_norm": 0.0, - "learning_rate": 1.2702950187033502e-06, - "loss": 0.8292, + "learning_rate": 1.2888081976172418e-06, + "loss": 0.908, "step": 29696 }, { - "epoch": 0.8427071509648127, + "epoch": 0.8415370228683159, "grad_norm": 0.0, - "learning_rate": 1.2698467540981064e-06, - "loss": 0.7413, + "learning_rate": 1.288357534010194e-06, + "loss": 0.8659, "step": 29697 }, { - "epoch": 0.8427355278093076, + "epoch": 0.8415653603105784, "grad_norm": 0.0, - "learning_rate": 1.2693985632368622e-06, - "loss": 0.7832, + "learning_rate": 1.2879069437845483e-06, + "loss": 0.877, "step": 29698 }, { - "epoch": 0.8427639046538025, + "epoch": 0.8415936977528409, "grad_norm": 0.0, - "learning_rate": 1.2689504461233982e-06, - "loss": 0.647, + "learning_rate": 1.2874564269440958e-06, + "loss": 0.9081, "step": 29699 }, { - "epoch": 0.8427922814982974, + "epoch": 0.8416220351951033, "grad_norm": 0.0, - "learning_rate": 1.2685024027615034e-06, - "loss": 0.8098, + "learning_rate": 1.287005983492633e-06, + "loss": 0.8411, "step": 29700 }, { - "epoch": 0.8428206583427923, + "epoch": 0.8416503726373658, "grad_norm": 0.0, - "learning_rate": 1.2680544331549617e-06, - "loss": 0.8829, + "learning_rate": 1.2865556134339552e-06, + "loss": 0.7938, "step": 29701 }, { - "epoch": 0.8428490351872872, + "epoch": 0.8416787100796282, "grad_norm": 0.0, - "learning_rate": 1.2676065373075552e-06, - "loss": 0.7465, + "learning_rate": 1.286105316771854e-06, + "loss": 0.8667, "step": 29702 }, { - "epoch": 0.842877412031782, + "epoch": 0.8417070475218906, "grad_norm": 0.0, - "learning_rate": 1.267158715223069e-06, - "loss": 0.7909, + "learning_rate": 1.2856550935101242e-06, + "loss": 0.7466, "step": 29703 }, { - "epoch": 0.842905788876277, + "epoch": 0.8417353849641531, "grad_norm": 0.0, - "learning_rate": 1.266710966905289e-06, - "loss": 0.873, + "learning_rate": 1.2852049436525593e-06, + "loss": 0.8264, "step": 29704 }, { - "epoch": 0.8429341657207718, + "epoch": 0.8417637224064156, "grad_norm": 0.0, - "learning_rate": 1.2662632923579899e-06, - "loss": 0.8599, + "learning_rate": 1.2847548672029464e-06, + "loss": 0.869, "step": 29705 }, { - "epoch": 0.8429625425652667, + "epoch": 0.8417920598486781, "grad_norm": 0.0, - "learning_rate": 1.2658156915849585e-06, - "loss": 0.7571, + "learning_rate": 1.2843048641650825e-06, + "loss": 0.913, "step": 29706 }, { - "epoch": 0.8429909194097617, + "epoch": 0.8418203972909405, "grad_norm": 0.0, - "learning_rate": 1.2653681645899774e-06, - "loss": 0.7411, + "learning_rate": 1.2838549345427532e-06, + "loss": 0.7893, "step": 29707 }, { - "epoch": 0.8430192962542565, + "epoch": 0.841848734733203, "grad_norm": 0.0, - "learning_rate": 1.2649207113768202e-06, - "loss": 0.8552, + "learning_rate": 1.28340507833975e-06, + "loss": 0.6951, "step": 29708 }, { - "epoch": 0.8430476730987514, + "epoch": 0.8418770721754655, "grad_norm": 0.0, - "learning_rate": 1.264473331949272e-06, - "loss": 0.8071, + "learning_rate": 1.2829552955598623e-06, + "loss": 0.788, "step": 29709 }, { - "epoch": 0.8430760499432464, + "epoch": 0.8419054096177279, "grad_norm": 0.0, - "learning_rate": 1.2640260263111093e-06, - "loss": 0.7426, + "learning_rate": 1.282505586206879e-06, + "loss": 0.903, "step": 29710 }, { - "epoch": 0.8431044267877412, + "epoch": 0.8419337470599904, "grad_norm": 0.0, - "learning_rate": 1.2635787944661114e-06, - "loss": 0.8077, + "learning_rate": 1.282055950284592e-06, + "loss": 0.8021, "step": 29711 }, { - "epoch": 0.8431328036322361, + "epoch": 0.8419620845022528, "grad_norm": 0.0, - "learning_rate": 1.263131636418059e-06, - "loss": 0.6895, + "learning_rate": 1.281606387796781e-06, + "loss": 0.768, "step": 29712 }, { - "epoch": 0.8431611804767309, + "epoch": 0.8419904219445152, "grad_norm": 0.0, - "learning_rate": 1.2626845521707232e-06, - "loss": 0.7522, + "learning_rate": 1.281156898747238e-06, + "loss": 0.8383, "step": 29713 }, { - "epoch": 0.8431895573212259, + "epoch": 0.8420187593867777, "grad_norm": 0.0, - "learning_rate": 1.2622375417278843e-06, - "loss": 0.8532, + "learning_rate": 1.2807074831397503e-06, + "loss": 0.8257, "step": 29714 }, { - "epoch": 0.8432179341657208, + "epoch": 0.8420470968290402, "grad_norm": 0.0, - "learning_rate": 1.2617906050933204e-06, - "loss": 0.771, + "learning_rate": 1.280258140978099e-06, + "loss": 0.8948, "step": 29715 }, { - "epoch": 0.8432463110102156, + "epoch": 0.8420754342713027, "grad_norm": 0.0, - "learning_rate": 1.261343742270802e-06, - "loss": 0.962, + "learning_rate": 1.2798088722660717e-06, + "loss": 0.6973, "step": 29716 }, { - "epoch": 0.8432746878547106, + "epoch": 0.8421037717135651, "grad_norm": 0.0, - "learning_rate": 1.2608969532641046e-06, - "loss": 0.9327, + "learning_rate": 1.2793596770074534e-06, + "loss": 0.7997, "step": 29717 }, { - "epoch": 0.8433030646992055, + "epoch": 0.8421321091558276, "grad_norm": 0.0, - "learning_rate": 1.2604502380770067e-06, - "loss": 0.8697, + "learning_rate": 1.2789105552060255e-06, + "loss": 0.7563, "step": 29718 }, { - "epoch": 0.8433314415437003, + "epoch": 0.8421604465980901, "grad_norm": 0.0, - "learning_rate": 1.2600035967132762e-06, - "loss": 0.8095, + "learning_rate": 1.2784615068655747e-06, + "loss": 0.7537, "step": 29719 }, { - "epoch": 0.8433598183881952, + "epoch": 0.8421887840403525, "grad_norm": 0.0, - "learning_rate": 1.259557029176688e-06, - "loss": 0.7022, + "learning_rate": 1.2780125319898795e-06, + "loss": 0.7618, "step": 29720 }, { - "epoch": 0.8433881952326902, + "epoch": 0.842217121482615, "grad_norm": 0.0, - "learning_rate": 1.2591105354710165e-06, - "loss": 0.8298, + "learning_rate": 1.2775636305827234e-06, + "loss": 0.8307, "step": 29721 }, { - "epoch": 0.843416572077185, + "epoch": 0.8422454589248775, "grad_norm": 0.0, - "learning_rate": 1.258664115600029e-06, - "loss": 0.7427, + "learning_rate": 1.2771148026478874e-06, + "loss": 0.7905, "step": 29722 }, { - "epoch": 0.8434449489216799, + "epoch": 0.84227379636714, "grad_norm": 0.0, - "learning_rate": 1.258217769567499e-06, - "loss": 0.7323, + "learning_rate": 1.2766660481891525e-06, + "loss": 0.7005, "step": 29723 }, { - "epoch": 0.8434733257661748, + "epoch": 0.8423021338094023, "grad_norm": 0.0, - "learning_rate": 1.2577714973771983e-06, - "loss": 0.775, + "learning_rate": 1.2762173672102996e-06, + "loss": 0.8754, "step": 29724 }, { - "epoch": 0.8435017026106697, + "epoch": 0.8423304712516648, "grad_norm": 0.0, - "learning_rate": 1.2573252990328911e-06, - "loss": 0.8309, + "learning_rate": 1.275768759715108e-06, + "loss": 0.7585, "step": 29725 }, { - "epoch": 0.8435300794551646, + "epoch": 0.8423588086939273, "grad_norm": 0.0, - "learning_rate": 1.2568791745383513e-06, - "loss": 0.7858, + "learning_rate": 1.275320225707355e-06, + "loss": 0.6942, "step": 29726 }, { - "epoch": 0.8435584562996595, + "epoch": 0.8423871461361897, "grad_norm": 0.0, - "learning_rate": 1.2564331238973494e-06, - "loss": 0.886, + "learning_rate": 1.2748717651908215e-06, + "loss": 0.9536, "step": 29727 }, { - "epoch": 0.8435868331441544, + "epoch": 0.8424154835784522, "grad_norm": 0.0, - "learning_rate": 1.2559871471136464e-06, - "loss": 0.8496, + "learning_rate": 1.2744233781692805e-06, + "loss": 0.7257, "step": 29728 }, { - "epoch": 0.8436152099886493, + "epoch": 0.8424438210207147, "grad_norm": 0.0, - "learning_rate": 1.2555412441910165e-06, - "loss": 0.8954, + "learning_rate": 1.273975064646512e-06, + "loss": 0.9219, "step": 29729 }, { - "epoch": 0.8436435868331441, + "epoch": 0.8424721584629772, "grad_norm": 0.0, - "learning_rate": 1.2550954151332205e-06, - "loss": 0.8201, + "learning_rate": 1.2735268246262943e-06, + "loss": 0.8456, "step": 29730 }, { - "epoch": 0.8436719636776391, + "epoch": 0.8425004959052396, "grad_norm": 0.0, - "learning_rate": 1.2546496599440261e-06, - "loss": 0.7777, + "learning_rate": 1.2730786581123977e-06, + "loss": 0.7633, "step": 29731 }, { - "epoch": 0.8437003405221339, + "epoch": 0.8425288333475021, "grad_norm": 0.0, - "learning_rate": 1.2542039786272008e-06, - "loss": 0.9089, + "learning_rate": 1.2726305651086001e-06, + "loss": 0.8238, "step": 29732 }, { - "epoch": 0.8437287173666288, + "epoch": 0.8425571707897646, "grad_norm": 0.0, - "learning_rate": 1.2537583711865065e-06, - "loss": 0.8239, + "learning_rate": 1.2721825456186788e-06, + "loss": 0.8515, "step": 29733 }, { - "epoch": 0.8437570942111238, + "epoch": 0.8425855082320269, "grad_norm": 0.0, - "learning_rate": 1.2533128376257088e-06, - "loss": 0.8735, + "learning_rate": 1.2717345996464025e-06, + "loss": 0.8032, "step": 29734 }, { - "epoch": 0.8437854710556186, + "epoch": 0.8426138456742894, "grad_norm": 0.0, - "learning_rate": 1.2528673779485723e-06, - "loss": 0.8236, + "learning_rate": 1.2712867271955475e-06, + "loss": 0.8241, "step": 29735 }, { - "epoch": 0.8438138479001135, + "epoch": 0.8426421831165519, "grad_norm": 0.0, - "learning_rate": 1.2524219921588553e-06, - "loss": 0.8293, + "learning_rate": 1.2708389282698852e-06, + "loss": 0.7683, "step": 29736 }, { - "epoch": 0.8438422247446083, + "epoch": 0.8426705205588143, "grad_norm": 0.0, - "learning_rate": 1.2519766802603239e-06, - "loss": 0.854, + "learning_rate": 1.270391202873188e-06, + "loss": 0.787, "step": 29737 }, { - "epoch": 0.8438706015891033, + "epoch": 0.8426988580010768, "grad_norm": 0.0, - "learning_rate": 1.2515314422567405e-06, - "loss": 0.9057, + "learning_rate": 1.2699435510092295e-06, + "loss": 0.788, "step": 29738 }, { - "epoch": 0.8438989784335982, + "epoch": 0.8427271954433393, "grad_norm": 0.0, - "learning_rate": 1.2510862781518628e-06, - "loss": 0.6796, + "learning_rate": 1.269495972681777e-06, + "loss": 0.6998, "step": 29739 }, { - "epoch": 0.843927355278093, + "epoch": 0.8427555328856018, "grad_norm": 0.0, - "learning_rate": 1.2506411879494496e-06, - "loss": 0.7247, + "learning_rate": 1.2690484678946025e-06, + "loss": 0.8788, "step": 29740 }, { - "epoch": 0.843955732122588, + "epoch": 0.8427838703278642, "grad_norm": 0.0, - "learning_rate": 1.2501961716532685e-06, - "loss": 0.7943, + "learning_rate": 1.2686010366514744e-06, + "loss": 0.6861, "step": 29741 }, { - "epoch": 0.8439841089670829, + "epoch": 0.8428122077701267, "grad_norm": 0.0, - "learning_rate": 1.249751229267071e-06, - "loss": 0.8858, + "learning_rate": 1.268153678956161e-06, + "loss": 0.6615, "step": 29742 }, { - "epoch": 0.8440124858115777, + "epoch": 0.8428405452123892, "grad_norm": 0.0, - "learning_rate": 1.2493063607946188e-06, - "loss": 0.7716, + "learning_rate": 1.2677063948124336e-06, + "loss": 0.857, "step": 29743 }, { - "epoch": 0.8440408626560727, + "epoch": 0.8428688826546515, "grad_norm": 0.0, - "learning_rate": 1.2488615662396709e-06, - "loss": 0.7812, + "learning_rate": 1.2672591842240566e-06, + "loss": 0.694, "step": 29744 }, { - "epoch": 0.8440692395005676, + "epoch": 0.842897220096914, "grad_norm": 0.0, - "learning_rate": 1.248416845605981e-06, - "loss": 0.864, + "learning_rate": 1.2668120471947975e-06, + "loss": 0.6923, "step": 29745 }, { - "epoch": 0.8440976163450624, + "epoch": 0.8429255575391765, "grad_norm": 0.0, - "learning_rate": 1.247972198897307e-06, - "loss": 0.8689, + "learning_rate": 1.2663649837284265e-06, + "loss": 0.9003, "step": 29746 }, { - "epoch": 0.8441259931895573, + "epoch": 0.842953894981439, "grad_norm": 0.0, - "learning_rate": 1.2475276261174074e-06, - "loss": 0.8249, + "learning_rate": 1.2659179938287035e-06, + "loss": 0.7547, "step": 29747 }, { - "epoch": 0.8441543700340522, + "epoch": 0.8429822324237014, "grad_norm": 0.0, - "learning_rate": 1.2470831272700334e-06, - "loss": 0.852, + "learning_rate": 1.2654710774993973e-06, + "loss": 0.7837, "step": 29748 }, { - "epoch": 0.8441827468785471, + "epoch": 0.8430105698659639, "grad_norm": 0.0, - "learning_rate": 1.246638702358941e-06, - "loss": 0.7739, + "learning_rate": 1.265024234744271e-06, + "loss": 0.8609, "step": 29749 }, { - "epoch": 0.844211123723042, + "epoch": 0.8430389073082264, "grad_norm": 0.0, - "learning_rate": 1.2461943513878883e-06, - "loss": 0.759, + "learning_rate": 1.2645774655670894e-06, + "loss": 0.7796, "step": 29750 }, { - "epoch": 0.8442395005675369, + "epoch": 0.8430672447504888, "grad_norm": 0.0, - "learning_rate": 1.2457500743606232e-06, - "loss": 0.7708, + "learning_rate": 1.2641307699716188e-06, + "loss": 0.8123, "step": 29751 }, { - "epoch": 0.8442678774120318, + "epoch": 0.8430955821927513, "grad_norm": 0.0, - "learning_rate": 1.2453058712808996e-06, - "loss": 0.9415, + "learning_rate": 1.2636841479616158e-06, + "loss": 0.8691, "step": 29752 }, { - "epoch": 0.8442962542565267, + "epoch": 0.8431239196350138, "grad_norm": 0.0, - "learning_rate": 1.2448617421524745e-06, - "loss": 0.7883, + "learning_rate": 1.2632375995408453e-06, + "loss": 0.9242, "step": 29753 }, { - "epoch": 0.8443246311010215, + "epoch": 0.8431522570772763, "grad_norm": 0.0, - "learning_rate": 1.2444176869790925e-06, - "loss": 0.7737, + "learning_rate": 1.2627911247130709e-06, + "loss": 0.8158, "step": 29754 }, { - "epoch": 0.8443530079455165, + "epoch": 0.8431805945195386, "grad_norm": 0.0, - "learning_rate": 1.2439737057645106e-06, - "loss": 0.8257, + "learning_rate": 1.2623447234820485e-06, + "loss": 0.6794, "step": 29755 }, { - "epoch": 0.8443813847900113, + "epoch": 0.8432089319618011, "grad_norm": 0.0, - "learning_rate": 1.2435297985124716e-06, - "loss": 0.7763, + "learning_rate": 1.261898395851544e-06, + "loss": 0.8102, "step": 29756 }, { - "epoch": 0.8444097616345062, + "epoch": 0.8432372694040636, "grad_norm": 0.0, - "learning_rate": 1.2430859652267323e-06, - "loss": 0.8289, + "learning_rate": 1.2614521418253123e-06, + "loss": 0.7619, "step": 29757 }, { - "epoch": 0.8444381384790012, + "epoch": 0.843265606846326, "grad_norm": 0.0, - "learning_rate": 1.242642205911042e-06, - "loss": 0.8719, + "learning_rate": 1.2610059614071135e-06, + "loss": 0.7307, "step": 29758 }, { - "epoch": 0.844466515323496, + "epoch": 0.8432939442885885, "grad_norm": 0.0, - "learning_rate": 1.2421985205691444e-06, - "loss": 0.7926, + "learning_rate": 1.260559854600709e-06, + "loss": 0.8516, "step": 29759 }, { - "epoch": 0.8444948921679909, + "epoch": 0.843322281730851, "grad_norm": 0.0, - "learning_rate": 1.2417549092047898e-06, - "loss": 0.8014, + "learning_rate": 1.2601138214098528e-06, + "loss": 0.8608, "step": 29760 }, { - "epoch": 0.8445232690124859, + "epoch": 0.8433506191731134, "grad_norm": 0.0, - "learning_rate": 1.2413113718217273e-06, - "loss": 0.844, + "learning_rate": 1.259667861838303e-06, + "loss": 0.913, "step": 29761 }, { - "epoch": 0.8445516458569807, + "epoch": 0.8433789566153759, "grad_norm": 0.0, - "learning_rate": 1.2408679084236985e-06, - "loss": 0.7687, + "learning_rate": 1.259221975889816e-06, + "loss": 0.758, "step": 29762 }, { - "epoch": 0.8445800227014756, + "epoch": 0.8434072940576384, "grad_norm": 0.0, - "learning_rate": 1.2404245190144537e-06, - "loss": 0.7192, + "learning_rate": 1.2587761635681484e-06, + "loss": 0.8022, "step": 29763 }, { - "epoch": 0.8446083995459704, + "epoch": 0.8434356314999009, "grad_norm": 0.0, - "learning_rate": 1.2399812035977377e-06, - "loss": 0.8085, + "learning_rate": 1.2583304248770568e-06, + "loss": 0.8128, "step": 29764 }, { - "epoch": 0.8446367763904654, + "epoch": 0.8434639689421632, "grad_norm": 0.0, - "learning_rate": 1.2395379621772918e-06, - "loss": 0.7922, + "learning_rate": 1.2578847598202925e-06, + "loss": 0.764, "step": 29765 }, { - "epoch": 0.8446651532349603, + "epoch": 0.8434923063844257, "grad_norm": 0.0, - "learning_rate": 1.2390947947568632e-06, - "loss": 0.8136, + "learning_rate": 1.2574391684016107e-06, + "loss": 0.8097, "step": 29766 }, { - "epoch": 0.8446935300794551, + "epoch": 0.8435206438266882, "grad_norm": 0.0, - "learning_rate": 1.2386517013401967e-06, - "loss": 0.9113, + "learning_rate": 1.2569936506247682e-06, + "loss": 0.8784, "step": 29767 }, { - "epoch": 0.8447219069239501, + "epoch": 0.8435489812689506, "grad_norm": 0.0, - "learning_rate": 1.2382086819310312e-06, - "loss": 0.7621, + "learning_rate": 1.256548206493512e-06, + "loss": 0.8247, "step": 29768 }, { - "epoch": 0.844750283768445, + "epoch": 0.8435773187112131, "grad_norm": 0.0, - "learning_rate": 1.237765736533111e-06, - "loss": 0.7482, + "learning_rate": 1.2561028360116002e-06, + "loss": 0.639, "step": 29769 }, { - "epoch": 0.8447786606129398, + "epoch": 0.8436056561534756, "grad_norm": 0.0, - "learning_rate": 1.237322865150179e-06, - "loss": 0.8853, + "learning_rate": 1.2556575391827774e-06, + "loss": 0.8951, "step": 29770 }, { - "epoch": 0.8448070374574347, + "epoch": 0.8436339935957381, "grad_norm": 0.0, - "learning_rate": 1.2368800677859716e-06, - "loss": 0.8199, + "learning_rate": 1.2552123160108e-06, + "loss": 0.8059, "step": 29771 }, { - "epoch": 0.8448354143019297, + "epoch": 0.8436623310380005, "grad_norm": 0.0, - "learning_rate": 1.2364373444442356e-06, - "loss": 0.7741, + "learning_rate": 1.2547671664994154e-06, + "loss": 0.7021, "step": 29772 }, { - "epoch": 0.8448637911464245, + "epoch": 0.843690668480263, "grad_norm": 0.0, - "learning_rate": 1.235994695128705e-06, - "loss": 0.7553, + "learning_rate": 1.2543220906523745e-06, + "loss": 0.6827, "step": 29773 }, { - "epoch": 0.8448921679909194, + "epoch": 0.8437190059225255, "grad_norm": 0.0, - "learning_rate": 1.2355521198431209e-06, - "loss": 0.828, + "learning_rate": 1.2538770884734286e-06, + "loss": 0.7177, "step": 29774 }, { - "epoch": 0.8449205448354143, + "epoch": 0.8437473433647878, "grad_norm": 0.0, - "learning_rate": 1.2351096185912248e-06, - "loss": 0.9626, + "learning_rate": 1.2534321599663213e-06, + "loss": 0.7451, "step": 29775 }, { - "epoch": 0.8449489216799092, + "epoch": 0.8437756808070503, "grad_norm": 0.0, - "learning_rate": 1.2346671913767494e-06, - "loss": 0.8751, + "learning_rate": 1.2529873051348041e-06, + "loss": 0.8187, "step": 29776 }, { - "epoch": 0.8449772985244041, + "epoch": 0.8438040182493128, "grad_norm": 0.0, - "learning_rate": 1.234224838203435e-06, - "loss": 0.7491, + "learning_rate": 1.252542523982624e-06, + "loss": 0.8032, "step": 29777 }, { - "epoch": 0.8450056753688989, + "epoch": 0.8438323556915753, "grad_norm": 0.0, - "learning_rate": 1.2337825590750186e-06, - "loss": 0.827, + "learning_rate": 1.2520978165135245e-06, + "loss": 0.806, "step": 29778 }, { - "epoch": 0.8450340522133939, + "epoch": 0.8438606931338377, "grad_norm": 0.0, - "learning_rate": 1.2333403539952327e-06, - "loss": 0.8025, + "learning_rate": 1.251653182731254e-06, + "loss": 0.7664, "step": 29779 }, { - "epoch": 0.8450624290578888, + "epoch": 0.8438890305761002, "grad_norm": 0.0, - "learning_rate": 1.2328982229678154e-06, - "loss": 0.8485, + "learning_rate": 1.2512086226395591e-06, + "loss": 0.7789, "step": 29780 }, { - "epoch": 0.8450908059023836, + "epoch": 0.8439173680183627, "grad_norm": 0.0, - "learning_rate": 1.2324561659965029e-06, - "loss": 0.8259, + "learning_rate": 1.2507641362421808e-06, + "loss": 0.8614, "step": 29781 }, { - "epoch": 0.8451191827468786, + "epoch": 0.8439457054606251, "grad_norm": 0.0, - "learning_rate": 1.2320141830850253e-06, - "loss": 0.763, + "learning_rate": 1.2503197235428643e-06, + "loss": 0.8063, "step": 29782 }, { - "epoch": 0.8451475595913734, + "epoch": 0.8439740429028876, "grad_norm": 0.0, - "learning_rate": 1.2315722742371184e-06, - "loss": 0.8567, + "learning_rate": 1.249875384545357e-06, + "loss": 0.7572, "step": 29783 }, { - "epoch": 0.8451759364358683, + "epoch": 0.8440023803451501, "grad_norm": 0.0, - "learning_rate": 1.231130439456516e-06, - "loss": 0.7539, + "learning_rate": 1.2494311192533958e-06, + "loss": 0.806, "step": 29784 }, { - "epoch": 0.8452043132803633, + "epoch": 0.8440307177874125, "grad_norm": 0.0, - "learning_rate": 1.2306886787469475e-06, - "loss": 0.8157, + "learning_rate": 1.2489869276707268e-06, + "loss": 0.8256, "step": 29785 }, { - "epoch": 0.8452326901248581, + "epoch": 0.844059055229675, "grad_norm": 0.0, - "learning_rate": 1.2302469921121462e-06, - "loss": 0.8575, + "learning_rate": 1.2485428098010898e-06, + "loss": 0.7853, "step": 29786 }, { - "epoch": 0.845261066969353, + "epoch": 0.8440873926719374, "grad_norm": 0.0, - "learning_rate": 1.2298053795558419e-06, - "loss": 0.8382, + "learning_rate": 1.2480987656482257e-06, + "loss": 0.6586, "step": 29787 }, { - "epoch": 0.8452894438138479, + "epoch": 0.8441157301141999, "grad_norm": 0.0, - "learning_rate": 1.2293638410817665e-06, - "loss": 0.8059, + "learning_rate": 1.247654795215879e-06, + "loss": 0.8545, "step": 29788 }, { - "epoch": 0.8453178206583428, + "epoch": 0.8441440675564623, "grad_norm": 0.0, - "learning_rate": 1.2289223766936508e-06, - "loss": 0.7677, + "learning_rate": 1.2472108985077836e-06, + "loss": 0.8739, "step": 29789 }, { - "epoch": 0.8453461975028377, + "epoch": 0.8441724049987248, "grad_norm": 0.0, - "learning_rate": 1.2284809863952197e-06, - "loss": 0.7887, + "learning_rate": 1.2467670755276805e-06, + "loss": 0.766, "step": 29790 }, { - "epoch": 0.8453745743473325, + "epoch": 0.8442007424409873, "grad_norm": 0.0, - "learning_rate": 1.2280396701902043e-06, - "loss": 0.8357, + "learning_rate": 1.2463233262793118e-06, + "loss": 0.7926, "step": 29791 }, { - "epoch": 0.8454029511918275, + "epoch": 0.8442290798832497, "grad_norm": 0.0, - "learning_rate": 1.227598428082335e-06, - "loss": 0.72, + "learning_rate": 1.2458796507664095e-06, + "loss": 0.7927, "step": 29792 }, { - "epoch": 0.8454313280363224, + "epoch": 0.8442574173255122, "grad_norm": 0.0, - "learning_rate": 1.2271572600753324e-06, - "loss": 0.801, + "learning_rate": 1.2454360489927163e-06, + "loss": 0.7361, "step": 29793 }, { - "epoch": 0.8454597048808172, + "epoch": 0.8442857547677747, "grad_norm": 0.0, - "learning_rate": 1.2267161661729277e-06, - "loss": 0.7374, + "learning_rate": 1.244992520961964e-06, + "loss": 0.8135, "step": 29794 }, { - "epoch": 0.8454880817253121, + "epoch": 0.8443140922100372, "grad_norm": 0.0, - "learning_rate": 1.226275146378848e-06, - "loss": 0.7241, + "learning_rate": 1.2445490666778904e-06, + "loss": 0.8112, "step": 29795 }, { - "epoch": 0.8455164585698071, + "epoch": 0.8443424296522996, "grad_norm": 0.0, - "learning_rate": 1.225834200696815e-06, - "loss": 0.8384, + "learning_rate": 1.2441056861442336e-06, + "loss": 0.7326, "step": 29796 }, { - "epoch": 0.8455448354143019, + "epoch": 0.844370767094562, "grad_norm": 0.0, - "learning_rate": 1.2253933291305542e-06, - "loss": 0.7768, + "learning_rate": 1.243662379364724e-06, + "loss": 0.8136, "step": 29797 }, { - "epoch": 0.8455732122587968, + "epoch": 0.8443991045368245, "grad_norm": 0.0, - "learning_rate": 1.224952531683793e-06, - "loss": 0.9258, + "learning_rate": 1.2432191463430977e-06, + "loss": 0.8123, "step": 29798 }, { - "epoch": 0.8456015891032918, + "epoch": 0.8444274419790869, "grad_norm": 0.0, - "learning_rate": 1.2245118083602492e-06, - "loss": 0.7333, + "learning_rate": 1.242775987083088e-06, + "loss": 0.8264, "step": 29799 }, { - "epoch": 0.8456299659477866, + "epoch": 0.8444557794213494, "grad_norm": 0.0, - "learning_rate": 1.224071159163649e-06, - "loss": 0.8426, + "learning_rate": 1.2423329015884289e-06, + "loss": 0.8956, "step": 29800 }, { - "epoch": 0.8456583427922815, + "epoch": 0.8444841168636119, "grad_norm": 0.0, - "learning_rate": 1.223630584097717e-06, - "loss": 0.8274, + "learning_rate": 1.241889889862853e-06, + "loss": 0.8645, "step": 29801 }, { - "epoch": 0.8456867196367764, + "epoch": 0.8445124543058744, "grad_norm": 0.0, - "learning_rate": 1.223190083166167e-06, - "loss": 0.7593, + "learning_rate": 1.241446951910089e-06, + "loss": 0.6949, "step": 29802 }, { - "epoch": 0.8457150964812713, + "epoch": 0.8445407917481368, "grad_norm": 0.0, - "learning_rate": 1.2227496563727293e-06, - "loss": 0.7925, + "learning_rate": 1.2410040877338704e-06, + "loss": 0.816, "step": 29803 }, { - "epoch": 0.8457434733257662, + "epoch": 0.8445691291903993, "grad_norm": 0.0, - "learning_rate": 1.2223093037211186e-06, - "loss": 0.7491, + "learning_rate": 1.2405612973379277e-06, + "loss": 0.8441, "step": 29804 }, { - "epoch": 0.845771850170261, + "epoch": 0.8445974666326618, "grad_norm": 0.0, - "learning_rate": 1.2218690252150546e-06, - "loss": 0.8291, + "learning_rate": 1.2401185807259886e-06, + "loss": 0.9709, "step": 29805 }, { - "epoch": 0.845800227014756, + "epoch": 0.8446258040749242, "grad_norm": 0.0, - "learning_rate": 1.2214288208582604e-06, - "loss": 0.8542, + "learning_rate": 1.2396759379017865e-06, + "loss": 0.8243, "step": 29806 }, { - "epoch": 0.8458286038592508, + "epoch": 0.8446541415171867, "grad_norm": 0.0, - "learning_rate": 1.2209886906544489e-06, - "loss": 0.865, + "learning_rate": 1.2392333688690438e-06, + "loss": 0.7738, "step": 29807 }, { - "epoch": 0.8458569807037457, + "epoch": 0.8446824789594491, "grad_norm": 0.0, - "learning_rate": 1.2205486346073415e-06, - "loss": 0.806, + "learning_rate": 1.2387908736314924e-06, + "loss": 0.7001, "step": 29808 }, { - "epoch": 0.8458853575482407, + "epoch": 0.8447108164017115, "grad_norm": 0.0, - "learning_rate": 1.2201086527206552e-06, - "loss": 0.7423, + "learning_rate": 1.2383484521928602e-06, + "loss": 0.8396, "step": 29809 }, { - "epoch": 0.8459137343927355, + "epoch": 0.844739153843974, "grad_norm": 0.0, - "learning_rate": 1.219668744998105e-06, - "loss": 0.8463, + "learning_rate": 1.2379061045568707e-06, + "loss": 0.8129, "step": 29810 }, { - "epoch": 0.8459421112372304, + "epoch": 0.8447674912862365, "grad_norm": 0.0, - "learning_rate": 1.2192289114434064e-06, - "loss": 0.825, + "learning_rate": 1.2374638307272524e-06, + "loss": 0.8895, "step": 29811 }, { - "epoch": 0.8459704880817253, + "epoch": 0.844795828728499, "grad_norm": 0.0, - "learning_rate": 1.218789152060279e-06, - "loss": 0.7739, + "learning_rate": 1.2370216307077287e-06, + "loss": 0.7012, "step": 29812 }, { - "epoch": 0.8459988649262202, + "epoch": 0.8448241661707614, "grad_norm": 0.0, - "learning_rate": 1.218349466852432e-06, - "loss": 0.7783, + "learning_rate": 1.2365795045020268e-06, + "loss": 0.7628, "step": 29813 }, { - "epoch": 0.8460272417707151, + "epoch": 0.8448525036130239, "grad_norm": 0.0, - "learning_rate": 1.2179098558235814e-06, - "loss": 0.753, + "learning_rate": 1.2361374521138724e-06, + "loss": 0.7955, "step": 29814 }, { - "epoch": 0.84605561861521, + "epoch": 0.8448808410552864, "grad_norm": 0.0, - "learning_rate": 1.217470318977444e-06, - "loss": 0.7771, + "learning_rate": 1.2356954735469839e-06, + "loss": 0.7773, "step": 29815 }, { - "epoch": 0.8460839954597049, + "epoch": 0.8449091784975488, "grad_norm": 0.0, - "learning_rate": 1.2170308563177268e-06, - "loss": 0.8551, + "learning_rate": 1.235253568805087e-06, + "loss": 0.958, "step": 29816 }, { - "epoch": 0.8461123723041998, + "epoch": 0.8449375159398113, "grad_norm": 0.0, - "learning_rate": 1.2165914678481449e-06, - "loss": 0.8079, + "learning_rate": 1.2348117378919068e-06, + "loss": 0.8281, "step": 29817 }, { - "epoch": 0.8461407491486946, + "epoch": 0.8449658533820738, "grad_norm": 0.0, - "learning_rate": 1.2161521535724097e-06, - "loss": 0.7865, + "learning_rate": 1.2343699808111598e-06, + "loss": 0.797, "step": 29818 }, { - "epoch": 0.8461691259931896, + "epoch": 0.8449941908243362, "grad_norm": 0.0, - "learning_rate": 1.2157129134942325e-06, - "loss": 0.8679, + "learning_rate": 1.2339282975665712e-06, + "loss": 0.9106, "step": 29819 }, { - "epoch": 0.8461975028376845, + "epoch": 0.8450225282665986, "grad_norm": 0.0, - "learning_rate": 1.2152737476173248e-06, - "loss": 0.7487, + "learning_rate": 1.233486688161858e-06, + "loss": 0.8064, "step": 29820 }, { - "epoch": 0.8462258796821793, + "epoch": 0.8450508657088611, "grad_norm": 0.0, - "learning_rate": 1.2148346559453928e-06, - "loss": 0.7284, + "learning_rate": 1.2330451526007414e-06, + "loss": 0.7648, "step": 29821 }, { - "epoch": 0.8462542565266742, + "epoch": 0.8450792031511236, "grad_norm": 0.0, - "learning_rate": 1.2143956384821475e-06, - "loss": 0.8359, + "learning_rate": 1.2326036908869444e-06, + "loss": 0.774, "step": 29822 }, { - "epoch": 0.8462826333711692, + "epoch": 0.845107540593386, "grad_norm": 0.0, - "learning_rate": 1.2139566952312987e-06, - "loss": 0.7722, + "learning_rate": 1.2321623030241781e-06, + "loss": 0.8705, "step": 29823 }, { - "epoch": 0.846311010215664, + "epoch": 0.8451358780356485, "grad_norm": 0.0, - "learning_rate": 1.2135178261965508e-06, - "loss": 0.7095, + "learning_rate": 1.2317209890161663e-06, + "loss": 0.8035, "step": 29824 }, { - "epoch": 0.8463393870601589, + "epoch": 0.845164215477911, "grad_norm": 0.0, - "learning_rate": 1.2130790313816133e-06, - "loss": 0.862, + "learning_rate": 1.231279748866624e-06, + "loss": 0.7744, "step": 29825 }, { - "epoch": 0.8463677639046538, + "epoch": 0.8451925529201735, "grad_norm": 0.0, - "learning_rate": 1.212640310790194e-06, - "loss": 0.8451, + "learning_rate": 1.230838582579268e-06, + "loss": 0.8617, "step": 29826 }, { - "epoch": 0.8463961407491487, + "epoch": 0.8452208903624359, "grad_norm": 0.0, - "learning_rate": 1.2122016644259948e-06, - "loss": 0.8445, + "learning_rate": 1.2303974901578176e-06, + "loss": 0.8379, "step": 29827 }, { - "epoch": 0.8464245175936436, + "epoch": 0.8452492278046984, "grad_norm": 0.0, - "learning_rate": 1.2117630922927237e-06, - "loss": 0.8449, + "learning_rate": 1.229956471605983e-06, + "loss": 0.8931, "step": 29828 }, { - "epoch": 0.8464528944381384, + "epoch": 0.8452775652469608, "grad_norm": 0.0, - "learning_rate": 1.2113245943940865e-06, - "loss": 0.7462, + "learning_rate": 1.2295155269274827e-06, + "loss": 0.8456, "step": 29829 }, { - "epoch": 0.8464812712826334, + "epoch": 0.8453059026892232, "grad_norm": 0.0, - "learning_rate": 1.2108861707337827e-06, - "loss": 0.8224, + "learning_rate": 1.2290746561260313e-06, + "loss": 0.7755, "step": 29830 }, { - "epoch": 0.8465096481271283, + "epoch": 0.8453342401314857, "grad_norm": 0.0, - "learning_rate": 1.2104478213155202e-06, - "loss": 0.9013, + "learning_rate": 1.2286338592053393e-06, + "loss": 0.7882, "step": 29831 }, { - "epoch": 0.8465380249716231, + "epoch": 0.8453625775737482, "grad_norm": 0.0, - "learning_rate": 1.2100095461429984e-06, - "loss": 0.7207, + "learning_rate": 1.2281931361691214e-06, + "loss": 0.8708, "step": 29832 }, { - "epoch": 0.8465664018161181, + "epoch": 0.8453909150160106, "grad_norm": 0.0, - "learning_rate": 1.209571345219922e-06, - "loss": 0.7697, + "learning_rate": 1.2277524870210922e-06, + "loss": 0.7992, "step": 29833 }, { - "epoch": 0.846594778660613, + "epoch": 0.8454192524582731, "grad_norm": 0.0, - "learning_rate": 1.2091332185499916e-06, - "loss": 0.8249, + "learning_rate": 1.227311911764959e-06, + "loss": 0.7597, "step": 29834 }, { - "epoch": 0.8466231555051078, + "epoch": 0.8454475899005356, "grad_norm": 0.0, - "learning_rate": 1.2086951661369095e-06, - "loss": 0.7698, + "learning_rate": 1.2268714104044376e-06, + "loss": 0.9009, "step": 29835 }, { - "epoch": 0.8466515323496028, + "epoch": 0.8454759273427981, "grad_norm": 0.0, - "learning_rate": 1.208257187984373e-06, - "loss": 0.8424, + "learning_rate": 1.226430982943234e-06, + "loss": 0.8851, "step": 29836 }, { - "epoch": 0.8466799091940976, + "epoch": 0.8455042647850605, "grad_norm": 0.0, - "learning_rate": 1.2078192840960857e-06, - "loss": 0.7512, + "learning_rate": 1.22599062938506e-06, + "loss": 0.8484, "step": 29837 }, { - "epoch": 0.8467082860385925, + "epoch": 0.845532602227323, "grad_norm": 0.0, - "learning_rate": 1.2073814544757413e-06, - "loss": 0.7643, + "learning_rate": 1.2255503497336251e-06, + "loss": 0.8604, "step": 29838 }, { - "epoch": 0.8467366628830874, + "epoch": 0.8455609396695855, "grad_norm": 0.0, - "learning_rate": 1.206943699127041e-06, - "loss": 0.7768, + "learning_rate": 1.2251101439926383e-06, + "loss": 0.8938, "step": 29839 }, { - "epoch": 0.8467650397275823, + "epoch": 0.8455892771118478, "grad_norm": 0.0, - "learning_rate": 1.2065060180536858e-06, - "loss": 0.6969, + "learning_rate": 1.2246700121658073e-06, + "loss": 0.7876, "step": 29840 }, { - "epoch": 0.8467934165720772, + "epoch": 0.8456176145541103, "grad_norm": 0.0, - "learning_rate": 1.2060684112593668e-06, - "loss": 0.8108, + "learning_rate": 1.2242299542568404e-06, + "loss": 0.7923, "step": 29841 }, { - "epoch": 0.846821793416572, + "epoch": 0.8456459519963728, "grad_norm": 0.0, - "learning_rate": 1.2056308787477822e-06, - "loss": 0.8698, + "learning_rate": 1.2237899702694423e-06, + "loss": 0.8521, "step": 29842 }, { - "epoch": 0.846850170261067, + "epoch": 0.8456742894386353, "grad_norm": 0.0, - "learning_rate": 1.2051934205226324e-06, - "loss": 0.8153, + "learning_rate": 1.2233500602073222e-06, + "loss": 0.7414, "step": 29843 }, { - "epoch": 0.8468785471055619, + "epoch": 0.8457026268808977, "grad_norm": 0.0, - "learning_rate": 1.2047560365876065e-06, - "loss": 0.9056, + "learning_rate": 1.2229102240741819e-06, + "loss": 0.7923, "step": 29844 }, { - "epoch": 0.8469069239500567, + "epoch": 0.8457309643231602, "grad_norm": 0.0, - "learning_rate": 1.2043187269464018e-06, - "loss": 0.8747, + "learning_rate": 1.222470461873727e-06, + "loss": 0.8356, "step": 29845 }, { - "epoch": 0.8469353007945516, + "epoch": 0.8457593017654227, "grad_norm": 0.0, - "learning_rate": 1.2038814916027142e-06, - "loss": 0.829, + "learning_rate": 1.222030773609666e-06, + "loss": 0.9092, "step": 29846 }, { - "epoch": 0.8469636776390466, + "epoch": 0.8457876392076851, "grad_norm": 0.0, - "learning_rate": 1.2034443305602327e-06, - "loss": 0.8501, + "learning_rate": 1.2215911592856967e-06, + "loss": 0.8378, "step": 29847 }, { - "epoch": 0.8469920544835414, + "epoch": 0.8458159766499476, "grad_norm": 0.0, - "learning_rate": 1.2030072438226514e-06, - "loss": 0.8205, + "learning_rate": 1.2211516189055272e-06, + "loss": 0.8911, "step": 29848 }, { - "epoch": 0.8470204313280363, + "epoch": 0.8458443140922101, "grad_norm": 0.0, - "learning_rate": 1.202570231393666e-06, - "loss": 0.8591, + "learning_rate": 1.220712152472856e-06, + "loss": 0.8093, "step": 29849 }, { - "epoch": 0.8470488081725313, + "epoch": 0.8458726515344726, "grad_norm": 0.0, - "learning_rate": 1.2021332932769648e-06, - "loss": 0.7012, + "learning_rate": 1.2202727599913855e-06, + "loss": 0.8242, "step": 29850 }, { - "epoch": 0.8470771850170261, + "epoch": 0.8459009889767349, "grad_norm": 0.0, - "learning_rate": 1.2016964294762379e-06, - "loss": 0.783, + "learning_rate": 1.2198334414648183e-06, + "loss": 0.8649, "step": 29851 }, { - "epoch": 0.847105561861521, + "epoch": 0.8459293264189974, "grad_norm": 0.0, - "learning_rate": 1.2012596399951793e-06, - "loss": 0.8608, + "learning_rate": 1.2193941968968538e-06, + "loss": 0.7779, "step": 29852 }, { - "epoch": 0.8471339387060159, + "epoch": 0.8459576638612599, "grad_norm": 0.0, - "learning_rate": 1.2008229248374737e-06, - "loss": 0.7377, + "learning_rate": 1.2189550262911932e-06, + "loss": 0.8088, "step": 29853 }, { - "epoch": 0.8471623155505108, + "epoch": 0.8459860013035223, "grad_norm": 0.0, - "learning_rate": 1.2003862840068125e-06, - "loss": 0.7521, + "learning_rate": 1.218515929651537e-06, + "loss": 0.9201, "step": 29854 }, { - "epoch": 0.8471906923950057, + "epoch": 0.8460143387457848, "grad_norm": 0.0, - "learning_rate": 1.1999497175068863e-06, - "loss": 0.7593, + "learning_rate": 1.21807690698158e-06, + "loss": 0.8315, "step": 29855 }, { - "epoch": 0.8472190692395005, + "epoch": 0.8460426761880473, "grad_norm": 0.0, - "learning_rate": 1.1995132253413788e-06, - "loss": 0.7533, + "learning_rate": 1.2176379582850252e-06, + "loss": 0.875, "step": 29856 }, { - "epoch": 0.8472474460839955, + "epoch": 0.8460710136303097, "grad_norm": 0.0, - "learning_rate": 1.199076807513978e-06, - "loss": 0.806, + "learning_rate": 1.2171990835655645e-06, + "loss": 0.7885, "step": 29857 }, { - "epoch": 0.8472758229284904, + "epoch": 0.8460993510725722, "grad_norm": 0.0, - "learning_rate": 1.1986404640283734e-06, - "loss": 0.9158, + "learning_rate": 1.2167602828268965e-06, + "loss": 0.739, "step": 29858 }, { - "epoch": 0.8473041997729852, + "epoch": 0.8461276885148347, "grad_norm": 0.0, - "learning_rate": 1.1982041948882462e-06, - "loss": 0.7171, + "learning_rate": 1.2163215560727215e-06, + "loss": 0.8007, "step": 29859 }, { - "epoch": 0.8473325766174802, + "epoch": 0.8461560259570972, "grad_norm": 0.0, - "learning_rate": 1.1977680000972836e-06, - "loss": 0.8712, + "learning_rate": 1.2158829033067288e-06, + "loss": 0.781, "step": 29860 }, { - "epoch": 0.847360953461975, + "epoch": 0.8461843633993595, "grad_norm": 0.0, - "learning_rate": 1.1973318796591726e-06, - "loss": 0.7014, + "learning_rate": 1.2154443245326176e-06, + "loss": 0.8798, "step": 29861 }, { - "epoch": 0.8473893303064699, + "epoch": 0.846212700841622, "grad_norm": 0.0, - "learning_rate": 1.1968958335775938e-06, - "loss": 0.7515, + "learning_rate": 1.2150058197540815e-06, + "loss": 0.8077, "step": 29862 }, { - "epoch": 0.8474177071509648, + "epoch": 0.8462410382838845, "grad_norm": 0.0, - "learning_rate": 1.1964598618562305e-06, - "loss": 0.7531, + "learning_rate": 1.2145673889748122e-06, + "loss": 0.8881, "step": 29863 }, { - "epoch": 0.8474460839954597, + "epoch": 0.8462693757261469, "grad_norm": 0.0, - "learning_rate": 1.196023964498767e-06, - "loss": 0.8527, + "learning_rate": 1.2141290321985045e-06, + "loss": 0.8286, "step": 29864 }, { - "epoch": 0.8474744608399546, + "epoch": 0.8462977131684094, "grad_norm": 0.0, - "learning_rate": 1.1955881415088844e-06, - "loss": 0.8731, + "learning_rate": 1.2136907494288497e-06, + "loss": 0.6791, "step": 29865 }, { - "epoch": 0.8475028376844495, + "epoch": 0.8463260506106719, "grad_norm": 0.0, - "learning_rate": 1.1951523928902665e-06, - "loss": 0.7656, + "learning_rate": 1.2132525406695405e-06, + "loss": 0.9569, "step": 29866 }, { - "epoch": 0.8475312145289444, + "epoch": 0.8463543880529344, "grad_norm": 0.0, - "learning_rate": 1.1947167186465903e-06, - "loss": 0.8619, + "learning_rate": 1.2128144059242708e-06, + "loss": 0.8012, "step": 29867 }, { - "epoch": 0.8475595913734393, + "epoch": 0.8463827254951968, "grad_norm": 0.0, - "learning_rate": 1.1942811187815385e-06, - "loss": 0.8522, + "learning_rate": 1.2123763451967262e-06, + "loss": 0.8146, "step": 29868 }, { - "epoch": 0.8475879682179341, + "epoch": 0.8464110629374593, "grad_norm": 0.0, - "learning_rate": 1.1938455932987913e-06, - "loss": 0.8418, + "learning_rate": 1.2119383584905985e-06, + "loss": 0.731, "step": 29869 }, { - "epoch": 0.8476163450624291, + "epoch": 0.8464394003797218, "grad_norm": 0.0, - "learning_rate": 1.193410142202024e-06, - "loss": 0.7763, + "learning_rate": 1.2115004458095793e-06, + "loss": 0.8063, "step": 29870 }, { - "epoch": 0.847644721906924, + "epoch": 0.8464677378219841, "grad_norm": 0.0, - "learning_rate": 1.1929747654949174e-06, - "loss": 0.8855, + "learning_rate": 1.2110626071573539e-06, + "loss": 0.8032, "step": 29871 }, { - "epoch": 0.8476730987514188, + "epoch": 0.8464960752642466, "grad_norm": 0.0, - "learning_rate": 1.1925394631811515e-06, - "loss": 0.7714, + "learning_rate": 1.2106248425376133e-06, + "loss": 0.7899, "step": 29872 }, { - "epoch": 0.8477014755959137, + "epoch": 0.8465244127065091, "grad_norm": 0.0, - "learning_rate": 1.1921042352643975e-06, - "loss": 0.8163, + "learning_rate": 1.210187151954042e-06, + "loss": 0.6821, "step": 29873 }, { - "epoch": 0.8477298524404087, + "epoch": 0.8465527501487716, "grad_norm": 0.0, - "learning_rate": 1.1916690817483355e-06, - "loss": 0.7568, + "learning_rate": 1.2097495354103284e-06, + "loss": 0.7102, "step": 29874 }, { - "epoch": 0.8477582292849035, + "epoch": 0.846581087591034, "grad_norm": 0.0, - "learning_rate": 1.1912340026366431e-06, - "loss": 0.8568, + "learning_rate": 1.2093119929101605e-06, + "loss": 0.7543, "step": 29875 }, { - "epoch": 0.8477866061293984, + "epoch": 0.8466094250332965, "grad_norm": 0.0, - "learning_rate": 1.1907989979329904e-06, - "loss": 0.7955, + "learning_rate": 1.2088745244572198e-06, + "loss": 0.8157, "step": 29876 }, { - "epoch": 0.8478149829738933, + "epoch": 0.846637762475559, "grad_norm": 0.0, - "learning_rate": 1.1903640676410556e-06, - "loss": 0.7472, + "learning_rate": 1.2084371300551935e-06, + "loss": 0.9299, "step": 29877 }, { - "epoch": 0.8478433598183882, + "epoch": 0.8466660999178214, "grad_norm": 0.0, - "learning_rate": 1.1899292117645101e-06, - "loss": 0.8984, + "learning_rate": 1.207999809707766e-06, + "loss": 0.8919, "step": 29878 }, { - "epoch": 0.8478717366628831, + "epoch": 0.8466944373600839, "grad_norm": 0.0, - "learning_rate": 1.1894944303070299e-06, - "loss": 0.8335, + "learning_rate": 1.2075625634186205e-06, + "loss": 0.7903, "step": 29879 }, { - "epoch": 0.8479001135073779, + "epoch": 0.8467227748023464, "grad_norm": 0.0, - "learning_rate": 1.1890597232722877e-06, - "loss": 0.788, + "learning_rate": 1.2071253911914427e-06, + "loss": 0.8284, "step": 29880 }, { - "epoch": 0.8479284903518729, + "epoch": 0.8467511122446088, "grad_norm": 0.0, - "learning_rate": 1.1886250906639518e-06, - "loss": 0.7534, + "learning_rate": 1.2066882930299117e-06, + "loss": 0.8572, "step": 29881 }, { - "epoch": 0.8479568671963678, + "epoch": 0.8467794496868712, "grad_norm": 0.0, - "learning_rate": 1.1881905324856968e-06, - "loss": 0.8711, + "learning_rate": 1.2062512689377093e-06, + "loss": 0.8058, "step": 29882 }, { - "epoch": 0.8479852440408626, + "epoch": 0.8468077871291337, "grad_norm": 0.0, - "learning_rate": 1.1877560487411931e-06, - "loss": 0.7359, + "learning_rate": 1.2058143189185213e-06, + "loss": 0.7917, "step": 29883 }, { - "epoch": 0.8480136208853576, + "epoch": 0.8468361245713962, "grad_norm": 0.0, - "learning_rate": 1.1873216394341091e-06, - "loss": 0.7613, + "learning_rate": 1.2053774429760213e-06, + "loss": 0.9333, "step": 29884 }, { - "epoch": 0.8480419977298524, + "epoch": 0.8468644620136586, "grad_norm": 0.0, - "learning_rate": 1.186887304568115e-06, - "loss": 0.8443, + "learning_rate": 1.2049406411138963e-06, + "loss": 0.8276, "step": 29885 }, { - "epoch": 0.8480703745743473, + "epoch": 0.8468927994559211, "grad_norm": 0.0, - "learning_rate": 1.1864530441468824e-06, - "loss": 0.765, + "learning_rate": 1.2045039133358193e-06, + "loss": 0.7917, "step": 29886 }, { - "epoch": 0.8480987514188423, + "epoch": 0.8469211368981836, "grad_norm": 0.0, - "learning_rate": 1.186018858174075e-06, - "loss": 0.767, + "learning_rate": 1.2040672596454727e-06, + "loss": 0.8958, "step": 29887 }, { - "epoch": 0.8481271282633371, + "epoch": 0.846949474340446, "grad_norm": 0.0, - "learning_rate": 1.1855847466533632e-06, - "loss": 0.7779, + "learning_rate": 1.203630680046536e-06, + "loss": 0.8814, "step": 29888 }, { - "epoch": 0.848155505107832, + "epoch": 0.8469778117827085, "grad_norm": 0.0, - "learning_rate": 1.185150709588414e-06, - "loss": 0.8204, + "learning_rate": 1.2031941745426824e-06, + "loss": 0.8143, "step": 29889 }, { - "epoch": 0.8481838819523269, + "epoch": 0.847006149224971, "grad_norm": 0.0, - "learning_rate": 1.1847167469828924e-06, - "loss": 0.7602, + "learning_rate": 1.2027577431375902e-06, + "loss": 0.8185, "step": 29890 }, { - "epoch": 0.8482122587968218, + "epoch": 0.8470344866672335, "grad_norm": 0.0, - "learning_rate": 1.184282858840463e-06, - "loss": 0.7503, + "learning_rate": 1.202321385834937e-06, + "loss": 0.7249, "step": 29891 }, { - "epoch": 0.8482406356413167, + "epoch": 0.8470628241094958, "grad_norm": 0.0, - "learning_rate": 1.1838490451647965e-06, - "loss": 0.8781, + "learning_rate": 1.2018851026383971e-06, + "loss": 0.814, "step": 29892 }, { - "epoch": 0.8482690124858115, + "epoch": 0.8470911615517583, "grad_norm": 0.0, - "learning_rate": 1.18341530595955e-06, - "loss": 0.8596, + "learning_rate": 1.2014488935516476e-06, + "loss": 0.8161, "step": 29893 }, { - "epoch": 0.8482973893303065, + "epoch": 0.8471194989940208, "grad_norm": 0.0, - "learning_rate": 1.1829816412283912e-06, - "loss": 0.7072, + "learning_rate": 1.2010127585783604e-06, + "loss": 0.8692, "step": 29894 }, { - "epoch": 0.8483257661748014, + "epoch": 0.8471478364362832, "grad_norm": 0.0, - "learning_rate": 1.1825480509749831e-06, - "loss": 0.8434, + "learning_rate": 1.20057669772221e-06, + "loss": 0.8122, "step": 29895 }, { - "epoch": 0.8483541430192962, + "epoch": 0.8471761738785457, "grad_norm": 0.0, - "learning_rate": 1.1821145352029872e-06, - "loss": 0.7729, + "learning_rate": 1.2001407109868713e-06, + "loss": 0.8171, "step": 29896 }, { - "epoch": 0.8483825198637911, + "epoch": 0.8472045113208082, "grad_norm": 0.0, - "learning_rate": 1.1816810939160684e-06, - "loss": 0.8417, + "learning_rate": 1.1997047983760136e-06, + "loss": 0.7604, "step": 29897 }, { - "epoch": 0.8484108967082861, + "epoch": 0.8472328487630707, "grad_norm": 0.0, - "learning_rate": 1.1812477271178846e-06, - "loss": 0.8233, + "learning_rate": 1.1992689598933104e-06, + "loss": 0.7986, "step": 29898 }, { - "epoch": 0.8484392735527809, + "epoch": 0.8472611862053331, "grad_norm": 0.0, - "learning_rate": 1.1808144348120975e-06, - "loss": 0.8109, + "learning_rate": 1.198833195542435e-06, + "loss": 0.799, "step": 29899 }, { - "epoch": 0.8484676503972758, + "epoch": 0.8472895236475956, "grad_norm": 0.0, - "learning_rate": 1.1803812170023688e-06, - "loss": 0.794, + "learning_rate": 1.1983975053270525e-06, + "loss": 0.853, "step": 29900 }, { - "epoch": 0.8484960272417708, + "epoch": 0.8473178610898581, "grad_norm": 0.0, - "learning_rate": 1.179948073692354e-06, - "loss": 0.9086, + "learning_rate": 1.1979618892508394e-06, + "loss": 0.8414, "step": 29901 }, { - "epoch": 0.8485244040862656, + "epoch": 0.8473461985321205, "grad_norm": 0.0, - "learning_rate": 1.1795150048857151e-06, - "loss": 0.7884, + "learning_rate": 1.1975263473174592e-06, + "loss": 0.8321, "step": 29902 }, { - "epoch": 0.8485527809307605, + "epoch": 0.847374535974383, "grad_norm": 0.0, - "learning_rate": 1.1790820105861123e-06, - "loss": 0.8535, + "learning_rate": 1.1970908795305835e-06, + "loss": 0.844, "step": 29903 }, { - "epoch": 0.8485811577752554, + "epoch": 0.8474028734166454, "grad_norm": 0.0, - "learning_rate": 1.178649090797197e-06, - "loss": 0.8665, + "learning_rate": 1.1966554858938805e-06, + "loss": 0.8309, "step": 29904 }, { - "epoch": 0.8486095346197503, + "epoch": 0.8474312108589078, "grad_norm": 0.0, - "learning_rate": 1.1782162455226299e-06, - "loss": 0.7978, + "learning_rate": 1.1962201664110174e-06, + "loss": 0.8238, "step": 29905 }, { - "epoch": 0.8486379114642452, + "epoch": 0.8474595483011703, "grad_norm": 0.0, - "learning_rate": 1.1777834747660678e-06, - "loss": 0.8111, + "learning_rate": 1.195784921085662e-06, + "loss": 0.8124, "step": 29906 }, { - "epoch": 0.84866628830874, + "epoch": 0.8474878857434328, "grad_norm": 0.0, - "learning_rate": 1.1773507785311645e-06, - "loss": 0.9698, + "learning_rate": 1.1953497499214784e-06, + "loss": 0.7284, "step": 29907 }, { - "epoch": 0.848694665153235, + "epoch": 0.8475162231856953, "grad_norm": 0.0, - "learning_rate": 1.1769181568215749e-06, - "loss": 0.7578, + "learning_rate": 1.1949146529221334e-06, + "loss": 0.7906, "step": 29908 }, { - "epoch": 0.8487230419977299, + "epoch": 0.8475445606279577, "grad_norm": 0.0, - "learning_rate": 1.176485609640955e-06, - "loss": 0.7918, + "learning_rate": 1.1944796300912942e-06, + "loss": 0.9176, "step": 29909 }, { - "epoch": 0.8487514188422247, + "epoch": 0.8475728980702202, "grad_norm": 0.0, - "learning_rate": 1.1760531369929573e-06, - "loss": 0.762, + "learning_rate": 1.19404468143262e-06, + "loss": 0.7837, "step": 29910 }, { - "epoch": 0.8487797956867197, + "epoch": 0.8476012355124827, "grad_norm": 0.0, - "learning_rate": 1.175620738881238e-06, - "loss": 0.7686, + "learning_rate": 1.1936098069497792e-06, + "loss": 0.7294, "step": 29911 }, { - "epoch": 0.8488081725312145, + "epoch": 0.8476295729547451, "grad_norm": 0.0, - "learning_rate": 1.175188415309444e-06, - "loss": 0.8655, + "learning_rate": 1.1931750066464331e-06, + "loss": 0.8404, "step": 29912 }, { - "epoch": 0.8488365493757094, + "epoch": 0.8476579103970076, "grad_norm": 0.0, - "learning_rate": 1.1747561662812312e-06, - "loss": 0.8634, + "learning_rate": 1.1927402805262434e-06, + "loss": 0.8261, "step": 29913 }, { - "epoch": 0.8488649262202043, + "epoch": 0.84768624783927, "grad_norm": 0.0, - "learning_rate": 1.1743239918002513e-06, - "loss": 0.9096, + "learning_rate": 1.1923056285928747e-06, + "loss": 0.7341, "step": 29914 }, { - "epoch": 0.8488933030646992, + "epoch": 0.8477145852815325, "grad_norm": 0.0, - "learning_rate": 1.1738918918701515e-06, - "loss": 0.9243, + "learning_rate": 1.1918710508499842e-06, + "loss": 0.7682, "step": 29915 }, { - "epoch": 0.8489216799091941, + "epoch": 0.8477429227237949, "grad_norm": 0.0, - "learning_rate": 1.173459866494583e-06, - "loss": 0.8118, + "learning_rate": 1.1914365473012346e-06, + "loss": 0.8444, "step": 29916 }, { - "epoch": 0.848950056753689, + "epoch": 0.8477712601660574, "grad_norm": 0.0, - "learning_rate": 1.1730279156771984e-06, - "loss": 0.7956, + "learning_rate": 1.1910021179502861e-06, + "loss": 0.8019, "step": 29917 }, { - "epoch": 0.8489784335981839, + "epoch": 0.8477995976083199, "grad_norm": 0.0, - "learning_rate": 1.1725960394216417e-06, - "loss": 0.6795, + "learning_rate": 1.1905677628007983e-06, + "loss": 0.8766, "step": 29918 }, { - "epoch": 0.8490068104426788, + "epoch": 0.8478279350505823, "grad_norm": 0.0, - "learning_rate": 1.1721642377315635e-06, - "loss": 0.7954, + "learning_rate": 1.1901334818564291e-06, + "loss": 0.874, "step": 29919 }, { - "epoch": 0.8490351872871736, + "epoch": 0.8478562724928448, "grad_norm": 0.0, - "learning_rate": 1.1717325106106126e-06, - "loss": 0.7671, + "learning_rate": 1.1896992751208392e-06, + "loss": 0.7787, "step": 29920 }, { - "epoch": 0.8490635641316686, + "epoch": 0.8478846099351073, "grad_norm": 0.0, - "learning_rate": 1.1713008580624319e-06, - "loss": 0.8286, + "learning_rate": 1.189265142597682e-06, + "loss": 0.7599, "step": 29921 }, { - "epoch": 0.8490919409761635, + "epoch": 0.8479129473773697, "grad_norm": 0.0, - "learning_rate": 1.1708692800906696e-06, - "loss": 0.8535, + "learning_rate": 1.1888310842906181e-06, + "loss": 0.7863, "step": 29922 }, { - "epoch": 0.8491203178206583, + "epoch": 0.8479412848196322, "grad_norm": 0.0, - "learning_rate": 1.1704377766989727e-06, - "loss": 0.7405, + "learning_rate": 1.1883971002033002e-06, + "loss": 0.7196, "step": 29923 }, { - "epoch": 0.8491486946651532, + "epoch": 0.8479696222618947, "grad_norm": 0.0, - "learning_rate": 1.1700063478909817e-06, - "loss": 0.7661, + "learning_rate": 1.1879631903393851e-06, + "loss": 0.8417, "step": 29924 }, { - "epoch": 0.8491770715096482, + "epoch": 0.8479979597041571, "grad_norm": 0.0, - "learning_rate": 1.1695749936703482e-06, - "loss": 0.8593, + "learning_rate": 1.1875293547025302e-06, + "loss": 0.7911, "step": 29925 }, { - "epoch": 0.849205448354143, + "epoch": 0.8480262971464195, "grad_norm": 0.0, - "learning_rate": 1.1691437140407092e-06, - "loss": 0.8796, + "learning_rate": 1.1870955932963868e-06, + "loss": 0.8043, "step": 29926 }, { - "epoch": 0.8492338251986379, + "epoch": 0.848054634588682, "grad_norm": 0.0, - "learning_rate": 1.1687125090057106e-06, - "loss": 0.9077, + "learning_rate": 1.1866619061246088e-06, + "loss": 0.9359, "step": 29927 }, { - "epoch": 0.8492622020431329, + "epoch": 0.8480829720309445, "grad_norm": 0.0, - "learning_rate": 1.1682813785689962e-06, - "loss": 0.8191, + "learning_rate": 1.186228293190853e-06, + "loss": 0.8721, "step": 29928 }, { - "epoch": 0.8492905788876277, + "epoch": 0.8481113094732069, "grad_norm": 0.0, - "learning_rate": 1.1678503227342042e-06, - "loss": 0.8332, + "learning_rate": 1.1857947544987668e-06, + "loss": 0.8656, "step": 29929 }, { - "epoch": 0.8493189557321226, + "epoch": 0.8481396469154694, "grad_norm": 0.0, - "learning_rate": 1.1674193415049773e-06, - "loss": 0.8463, + "learning_rate": 1.1853612900520052e-06, + "loss": 0.8021, "step": 29930 }, { - "epoch": 0.8493473325766174, + "epoch": 0.8481679843577319, "grad_norm": 0.0, - "learning_rate": 1.1669884348849593e-06, - "loss": 0.6398, + "learning_rate": 1.1849278998542179e-06, + "loss": 0.8587, "step": 29931 }, { - "epoch": 0.8493757094211124, + "epoch": 0.8481963217999944, "grad_norm": 0.0, - "learning_rate": 1.1665576028777835e-06, - "loss": 0.8799, + "learning_rate": 1.1844945839090571e-06, + "loss": 0.8067, "step": 29932 }, { - "epoch": 0.8494040862656073, + "epoch": 0.8482246592422568, "grad_norm": 0.0, - "learning_rate": 1.1661268454870933e-06, - "loss": 0.8831, + "learning_rate": 1.184061342220173e-06, + "loss": 0.7614, "step": 29933 }, { - "epoch": 0.8494324631101021, + "epoch": 0.8482529966845193, "grad_norm": 0.0, - "learning_rate": 1.1656961627165286e-06, - "loss": 0.8312, + "learning_rate": 1.1836281747912125e-06, + "loss": 0.7037, "step": 29934 }, { - "epoch": 0.8494608399545971, + "epoch": 0.8482813341267817, "grad_norm": 0.0, - "learning_rate": 1.1652655545697244e-06, - "loss": 0.8716, + "learning_rate": 1.1831950816258275e-06, + "loss": 0.7994, "step": 29935 }, { - "epoch": 0.849489216799092, + "epoch": 0.8483096715690441, "grad_norm": 0.0, - "learning_rate": 1.164835021050318e-06, - "loss": 0.8115, + "learning_rate": 1.1827620627276627e-06, + "loss": 0.7742, "step": 29936 }, { - "epoch": 0.8495175936435868, + "epoch": 0.8483380090113066, "grad_norm": 0.0, - "learning_rate": 1.1644045621619504e-06, - "loss": 0.798, + "learning_rate": 1.1823291181003671e-06, + "loss": 0.7963, "step": 29937 }, { - "epoch": 0.8495459704880817, + "epoch": 0.8483663464535691, "grad_norm": 0.0, - "learning_rate": 1.1639741779082525e-06, - "loss": 0.8473, + "learning_rate": 1.1818962477475903e-06, + "loss": 0.7562, "step": 29938 }, { - "epoch": 0.8495743473325766, + "epoch": 0.8483946838958316, "grad_norm": 0.0, - "learning_rate": 1.1635438682928613e-06, - "loss": 0.8325, + "learning_rate": 1.1814634516729729e-06, + "loss": 0.9471, "step": 29939 }, { - "epoch": 0.8496027241770715, + "epoch": 0.848423021338094, "grad_norm": 0.0, - "learning_rate": 1.1631136333194127e-06, - "loss": 0.8604, + "learning_rate": 1.1810307298801638e-06, + "loss": 0.8055, "step": 29940 }, { - "epoch": 0.8496311010215664, + "epoch": 0.8484513587803565, "grad_norm": 0.0, - "learning_rate": 1.1626834729915404e-06, - "loss": 0.7995, + "learning_rate": 1.1805980823728103e-06, + "loss": 0.762, "step": 29941 }, { - "epoch": 0.8496594778660613, + "epoch": 0.848479696222619, "grad_norm": 0.0, - "learning_rate": 1.1622533873128771e-06, - "loss": 0.7253, + "learning_rate": 1.1801655091545516e-06, + "loss": 0.7526, "step": 29942 }, { - "epoch": 0.8496878547105562, + "epoch": 0.8485080336648814, "grad_norm": 0.0, - "learning_rate": 1.16182337628706e-06, - "loss": 0.7571, + "learning_rate": 1.179733010229034e-06, + "loss": 0.7954, "step": 29943 }, { - "epoch": 0.849716231555051, + "epoch": 0.8485363711071439, "grad_norm": 0.0, - "learning_rate": 1.1613934399177162e-06, - "loss": 0.7968, + "learning_rate": 1.1793005855999007e-06, + "loss": 0.8671, "step": 29944 }, { - "epoch": 0.849744608399546, + "epoch": 0.8485647085494064, "grad_norm": 0.0, - "learning_rate": 1.1609635782084804e-06, - "loss": 0.8135, + "learning_rate": 1.1788682352707936e-06, + "loss": 0.7565, "step": 29945 }, { - "epoch": 0.8497729852440409, + "epoch": 0.8485930459916687, "grad_norm": 0.0, - "learning_rate": 1.160533791162981e-06, - "loss": 0.8683, + "learning_rate": 1.1784359592453576e-06, + "loss": 0.9071, "step": 29946 }, { - "epoch": 0.8498013620885357, + "epoch": 0.8486213834339312, "grad_norm": 0.0, - "learning_rate": 1.1601040787848504e-06, - "loss": 0.8228, + "learning_rate": 1.1780037575272297e-06, + "loss": 0.8821, "step": 29947 }, { - "epoch": 0.8498297389330306, + "epoch": 0.8486497208761937, "grad_norm": 0.0, - "learning_rate": 1.1596744410777206e-06, - "loss": 0.6974, + "learning_rate": 1.1775716301200512e-06, + "loss": 0.7518, "step": 29948 }, { - "epoch": 0.8498581157775256, + "epoch": 0.8486780583184562, "grad_norm": 0.0, - "learning_rate": 1.1592448780452148e-06, - "loss": 0.8779, + "learning_rate": 1.1771395770274653e-06, + "loss": 0.9046, "step": 29949 }, { - "epoch": 0.8498864926220204, + "epoch": 0.8487063957607186, "grad_norm": 0.0, - "learning_rate": 1.158815389690966e-06, - "loss": 0.7973, + "learning_rate": 1.1767075982531073e-06, + "loss": 0.8208, "step": 29950 }, { - "epoch": 0.8499148694665153, + "epoch": 0.8487347332029811, "grad_norm": 0.0, - "learning_rate": 1.1583859760186023e-06, - "loss": 0.7579, + "learning_rate": 1.1762756938006192e-06, + "loss": 0.7977, "step": 29951 }, { - "epoch": 0.8499432463110103, + "epoch": 0.8487630706452436, "grad_norm": 0.0, - "learning_rate": 1.1579566370317473e-06, - "loss": 0.9463, + "learning_rate": 1.1758438636736358e-06, + "loss": 0.8668, "step": 29952 }, { - "epoch": 0.8499716231555051, + "epoch": 0.848791408087506, "grad_norm": 0.0, - "learning_rate": 1.1575273727340308e-06, - "loss": 0.7221, + "learning_rate": 1.1754121078757963e-06, + "loss": 0.8088, "step": 29953 }, { - "epoch": 0.85, + "epoch": 0.8488197455297685, "grad_norm": 0.0, - "learning_rate": 1.1570981831290806e-06, - "loss": 0.8031, + "learning_rate": 1.1749804264107402e-06, + "loss": 0.7791, "step": 29954 }, { - "epoch": 0.8500283768444948, + "epoch": 0.848848082972031, "grad_norm": 0.0, - "learning_rate": 1.156669068220515e-06, - "loss": 0.8926, + "learning_rate": 1.1745488192820974e-06, + "loss": 0.7444, "step": 29955 }, { - "epoch": 0.8500567536889898, + "epoch": 0.8488764204142935, "grad_norm": 0.0, - "learning_rate": 1.1562400280119656e-06, - "loss": 0.6935, + "learning_rate": 1.174117286493508e-06, + "loss": 0.7007, "step": 29956 }, { - "epoch": 0.8500851305334847, + "epoch": 0.8489047578565558, "grad_norm": 0.0, - "learning_rate": 1.155811062507055e-06, - "loss": 0.7206, + "learning_rate": 1.1736858280486063e-06, + "loss": 0.8027, "step": 29957 }, { - "epoch": 0.8501135073779795, + "epoch": 0.8489330952988183, "grad_norm": 0.0, - "learning_rate": 1.155382171709405e-06, - "loss": 0.7319, + "learning_rate": 1.1732544439510262e-06, + "loss": 0.8864, "step": 29958 }, { - "epoch": 0.8501418842224745, + "epoch": 0.8489614327410808, "grad_norm": 0.0, - "learning_rate": 1.15495335562264e-06, - "loss": 0.7572, + "learning_rate": 1.172823134204405e-06, + "loss": 0.7837, "step": 29959 }, { - "epoch": 0.8501702610669694, + "epoch": 0.8489897701833432, "grad_norm": 0.0, - "learning_rate": 1.154524614250383e-06, - "loss": 0.7631, + "learning_rate": 1.1723918988123684e-06, + "loss": 0.7595, "step": 29960 }, { - "epoch": 0.8501986379114642, + "epoch": 0.8490181076256057, "grad_norm": 0.0, - "learning_rate": 1.154095947596252e-06, - "loss": 0.8611, + "learning_rate": 1.1719607377785537e-06, + "loss": 0.8862, "step": 29961 }, { - "epoch": 0.8502270147559592, + "epoch": 0.8490464450678682, "grad_norm": 0.0, - "learning_rate": 1.15366735566387e-06, - "loss": 0.7203, + "learning_rate": 1.1715296511065942e-06, + "loss": 0.7113, "step": 29962 }, { - "epoch": 0.850255391600454, + "epoch": 0.8490747825101307, "grad_norm": 0.0, - "learning_rate": 1.1532388384568594e-06, - "loss": 0.7823, + "learning_rate": 1.1710986388001167e-06, + "loss": 0.7292, "step": 29963 }, { - "epoch": 0.8502837684449489, + "epoch": 0.8491031199523931, "grad_norm": 0.0, - "learning_rate": 1.1528103959788373e-06, - "loss": 0.8461, + "learning_rate": 1.1706677008627564e-06, + "loss": 0.7895, "step": 29964 }, { - "epoch": 0.8503121452894438, + "epoch": 0.8491314573946556, "grad_norm": 0.0, - "learning_rate": 1.152382028233422e-06, - "loss": 0.7433, + "learning_rate": 1.1702368372981375e-06, + "loss": 0.7895, "step": 29965 }, { - "epoch": 0.8503405221339387, + "epoch": 0.8491597948369181, "grad_norm": 0.0, - "learning_rate": 1.151953735224236e-06, - "loss": 0.7355, + "learning_rate": 1.1698060481098938e-06, + "loss": 0.8641, "step": 29966 }, { - "epoch": 0.8503688989784336, + "epoch": 0.8491881322791804, "grad_norm": 0.0, - "learning_rate": 1.1515255169548934e-06, - "loss": 0.8443, + "learning_rate": 1.1693753333016534e-06, + "loss": 0.889, "step": 29967 }, { - "epoch": 0.8503972758229285, + "epoch": 0.8492164697214429, "grad_norm": 0.0, - "learning_rate": 1.151097373429011e-06, - "loss": 0.7827, + "learning_rate": 1.1689446928770432e-06, + "loss": 0.9406, "step": 29968 }, { - "epoch": 0.8504256526674234, + "epoch": 0.8492448071637054, "grad_norm": 0.0, - "learning_rate": 1.1506693046502093e-06, - "loss": 0.8774, + "learning_rate": 1.1685141268396906e-06, + "loss": 0.7718, "step": 29969 }, { - "epoch": 0.8504540295119183, + "epoch": 0.8492731446059678, "grad_norm": 0.0, - "learning_rate": 1.1502413106221012e-06, - "loss": 0.8426, + "learning_rate": 1.1680836351932223e-06, + "loss": 0.7969, "step": 29970 }, { - "epoch": 0.8504824063564131, + "epoch": 0.8493014820482303, "grad_norm": 0.0, - "learning_rate": 1.1498133913483012e-06, - "loss": 0.7563, + "learning_rate": 1.1676532179412659e-06, + "loss": 0.7618, "step": 29971 }, { - "epoch": 0.850510783200908, + "epoch": 0.8493298194904928, "grad_norm": 0.0, - "learning_rate": 1.1493855468324255e-06, - "loss": 0.867, + "learning_rate": 1.1672228750874482e-06, + "loss": 0.7022, "step": 29972 }, { - "epoch": 0.850539160045403, + "epoch": 0.8493581569327553, "grad_norm": 0.0, - "learning_rate": 1.148957777078088e-06, - "loss": 0.8642, + "learning_rate": 1.1667926066353895e-06, + "loss": 0.8724, "step": 29973 }, { - "epoch": 0.8505675368898978, + "epoch": 0.8493864943750177, "grad_norm": 0.0, - "learning_rate": 1.1485300820889035e-06, - "loss": 0.8541, + "learning_rate": 1.1663624125887186e-06, + "loss": 0.8222, "step": 29974 }, { - "epoch": 0.8505959137343927, + "epoch": 0.8494148318172802, "grad_norm": 0.0, - "learning_rate": 1.1481024618684821e-06, - "loss": 0.8094, + "learning_rate": 1.1659322929510574e-06, + "loss": 0.8931, "step": 29975 }, { - "epoch": 0.8506242905788877, + "epoch": 0.8494431692595427, "grad_norm": 0.0, - "learning_rate": 1.147674916420437e-06, - "loss": 0.8384, + "learning_rate": 1.165502247726028e-06, + "loss": 0.8468, "step": 29976 }, { - "epoch": 0.8506526674233825, + "epoch": 0.849471506701805, "grad_norm": 0.0, - "learning_rate": 1.1472474457483818e-06, - "loss": 0.8222, + "learning_rate": 1.165072276917254e-06, + "loss": 0.8625, "step": 29977 }, { - "epoch": 0.8506810442678774, + "epoch": 0.8494998441440675, "grad_norm": 0.0, - "learning_rate": 1.1468200498559234e-06, - "loss": 0.8436, + "learning_rate": 1.164642380528358e-06, + "loss": 0.784, "step": 29978 }, { - "epoch": 0.8507094211123724, + "epoch": 0.84952818158633, "grad_norm": 0.0, - "learning_rate": 1.1463927287466736e-06, - "loss": 0.87, + "learning_rate": 1.1642125585629593e-06, + "loss": 0.8494, "step": 29979 }, { - "epoch": 0.8507377979568672, + "epoch": 0.8495565190285925, "grad_norm": 0.0, - "learning_rate": 1.145965482424245e-06, - "loss": 0.8453, + "learning_rate": 1.1637828110246806e-06, + "loss": 0.8281, "step": 29980 }, { - "epoch": 0.8507661748013621, + "epoch": 0.8495848564708549, "grad_norm": 0.0, - "learning_rate": 1.1455383108922414e-06, - "loss": 0.9001, + "learning_rate": 1.1633531379171382e-06, + "loss": 0.8645, "step": 29981 }, { - "epoch": 0.8507945516458569, + "epoch": 0.8496131939131174, "grad_norm": 0.0, - "learning_rate": 1.1451112141542742e-06, - "loss": 0.7484, + "learning_rate": 1.1629235392439542e-06, + "loss": 0.7483, "step": 29982 }, { - "epoch": 0.8508229284903519, + "epoch": 0.8496415313553799, "grad_norm": 0.0, - "learning_rate": 1.144684192213953e-06, - "loss": 0.8969, + "learning_rate": 1.1624940150087472e-06, + "loss": 0.7309, "step": 29983 }, { - "epoch": 0.8508513053348468, + "epoch": 0.8496698687976423, "grad_norm": 0.0, - "learning_rate": 1.14425724507488e-06, - "loss": 0.8552, + "learning_rate": 1.162064565215134e-06, + "loss": 0.7781, "step": 29984 }, { - "epoch": 0.8508796821793416, + "epoch": 0.8496982062399048, "grad_norm": 0.0, - "learning_rate": 1.143830372740664e-06, - "loss": 0.8023, + "learning_rate": 1.1616351898667333e-06, + "loss": 0.7639, "step": 29985 }, { - "epoch": 0.8509080590238366, + "epoch": 0.8497265436821673, "grad_norm": 0.0, - "learning_rate": 1.1434035752149108e-06, - "loss": 0.8357, + "learning_rate": 1.161205888967164e-06, + "loss": 0.7879, "step": 29986 }, { - "epoch": 0.8509364358683315, + "epoch": 0.8497548811244298, "grad_norm": 0.0, - "learning_rate": 1.1429768525012264e-06, - "loss": 0.9157, + "learning_rate": 1.1607766625200367e-06, + "loss": 0.7846, "step": 29987 }, { - "epoch": 0.8509648127128263, + "epoch": 0.8497832185666921, "grad_norm": 0.0, - "learning_rate": 1.1425502046032154e-06, - "loss": 0.758, + "learning_rate": 1.160347510528972e-06, + "loss": 0.8569, "step": 29988 }, { - "epoch": 0.8509931895573212, + "epoch": 0.8498115560089546, "grad_norm": 0.0, - "learning_rate": 1.14212363152448e-06, - "loss": 0.898, + "learning_rate": 1.159918432997581e-06, + "loss": 0.7323, "step": 29989 }, { - "epoch": 0.8510215664018161, + "epoch": 0.8498398934512171, "grad_norm": 0.0, - "learning_rate": 1.1416971332686243e-06, - "loss": 0.8428, + "learning_rate": 1.1594894299294801e-06, + "loss": 0.9263, "step": 29990 }, { - "epoch": 0.851049943246311, + "epoch": 0.8498682308934795, "grad_norm": 0.0, - "learning_rate": 1.141270709839254e-06, - "loss": 0.8324, + "learning_rate": 1.1590605013282842e-06, + "loss": 0.8359, "step": 29991 }, { - "epoch": 0.8510783200908059, + "epoch": 0.849896568335742, "grad_norm": 0.0, - "learning_rate": 1.1408443612399655e-06, - "loss": 0.6615, + "learning_rate": 1.1586316471976034e-06, + "loss": 0.7249, "step": 29992 }, { - "epoch": 0.8511066969353008, + "epoch": 0.8499249057780045, "grad_norm": 0.0, - "learning_rate": 1.140418087474362e-06, - "loss": 0.7568, + "learning_rate": 1.1582028675410507e-06, + "loss": 0.7437, "step": 29993 }, { - "epoch": 0.8511350737797957, + "epoch": 0.8499532432202669, "grad_norm": 0.0, - "learning_rate": 1.1399918885460482e-06, - "loss": 0.8383, + "learning_rate": 1.1577741623622407e-06, + "loss": 0.8148, "step": 29994 }, { - "epoch": 0.8511634506242906, + "epoch": 0.8499815806625294, "grad_norm": 0.0, - "learning_rate": 1.139565764458619e-06, - "loss": 0.8168, + "learning_rate": 1.1573455316647796e-06, + "loss": 0.8909, "step": 29995 }, { - "epoch": 0.8511918274687855, + "epoch": 0.8500099181047919, "grad_norm": 0.0, - "learning_rate": 1.1391397152156768e-06, - "loss": 0.9777, + "learning_rate": 1.1569169754522812e-06, + "loss": 0.8099, "step": 29996 }, { - "epoch": 0.8512202043132804, + "epoch": 0.8500382555470544, "grad_norm": 0.0, - "learning_rate": 1.1387137408208203e-06, - "loss": 0.8475, + "learning_rate": 1.156488493728356e-06, + "loss": 0.7614, "step": 29997 }, { - "epoch": 0.8512485811577752, + "epoch": 0.8500665929893167, "grad_norm": 0.0, - "learning_rate": 1.1382878412776466e-06, - "loss": 0.811, + "learning_rate": 1.1560600864966108e-06, + "loss": 0.8447, "step": 29998 }, { - "epoch": 0.8512769580022701, + "epoch": 0.8500949304315792, "grad_norm": 0.0, - "learning_rate": 1.1378620165897537e-06, - "loss": 0.8005, + "learning_rate": 1.1556317537606588e-06, + "loss": 0.7722, "step": 29999 }, { - "epoch": 0.8513053348467651, + "epoch": 0.8501232678738417, "grad_norm": 0.0, - "learning_rate": 1.1374362667607397e-06, - "loss": 0.8486, + "learning_rate": 1.1552034955241021e-06, + "loss": 0.5796, "step": 30000 }, { - "epoch": 0.8513337116912599, + "epoch": 0.8501516053161041, "grad_norm": 0.0, - "learning_rate": 1.1370105917941966e-06, - "loss": 0.7732, + "learning_rate": 1.1547753117905524e-06, + "loss": 0.8333, "step": 30001 }, { - "epoch": 0.8513620885357548, + "epoch": 0.8501799427583666, "grad_norm": 0.0, - "learning_rate": 1.1365849916937277e-06, - "loss": 0.8043, + "learning_rate": 1.1543472025636138e-06, + "loss": 0.8045, "step": 30002 }, { - "epoch": 0.8513904653802498, + "epoch": 0.8502082802006291, "grad_norm": 0.0, - "learning_rate": 1.1361594664629206e-06, - "loss": 0.8915, + "learning_rate": 1.153919167846893e-06, + "loss": 0.8165, "step": 30003 }, { - "epoch": 0.8514188422247446, + "epoch": 0.8502366176428916, "grad_norm": 0.0, - "learning_rate": 1.1357340161053742e-06, - "loss": 0.8869, + "learning_rate": 1.1534912076439975e-06, + "loss": 0.7543, "step": 30004 }, { - "epoch": 0.8514472190692395, + "epoch": 0.850264955085154, "grad_norm": 0.0, - "learning_rate": 1.1353086406246827e-06, - "loss": 0.8106, + "learning_rate": 1.1530633219585296e-06, + "loss": 0.8651, "step": 30005 }, { - "epoch": 0.8514755959137343, + "epoch": 0.8502932925274165, "grad_norm": 0.0, - "learning_rate": 1.1348833400244353e-06, - "loss": 0.7887, + "learning_rate": 1.1526355107940934e-06, + "loss": 0.8371, "step": 30006 }, { - "epoch": 0.8515039727582293, + "epoch": 0.850321629969679, "grad_norm": 0.0, - "learning_rate": 1.1344581143082279e-06, - "loss": 0.7877, + "learning_rate": 1.1522077741542947e-06, + "loss": 0.7567, "step": 30007 }, { - "epoch": 0.8515323496027242, + "epoch": 0.8503499674119414, "grad_norm": 0.0, - "learning_rate": 1.134032963479652e-06, - "loss": 0.7962, + "learning_rate": 1.1517801120427351e-06, + "loss": 0.8094, "step": 30008 }, { - "epoch": 0.851560726447219, + "epoch": 0.8503783048542038, "grad_norm": 0.0, - "learning_rate": 1.1336078875422973e-06, - "loss": 0.7975, + "learning_rate": 1.1513525244630198e-06, + "loss": 0.7182, "step": 30009 }, { - "epoch": 0.851589103291714, + "epoch": 0.8504066422964663, "grad_norm": 0.0, - "learning_rate": 1.133182886499754e-06, - "loss": 0.8225, + "learning_rate": 1.1509250114187454e-06, + "loss": 0.7382, "step": 30010 }, { - "epoch": 0.8516174801362089, + "epoch": 0.8504349797387288, "grad_norm": 0.0, - "learning_rate": 1.1327579603556172e-06, - "loss": 0.8351, + "learning_rate": 1.1504975729135158e-06, + "loss": 0.8505, "step": 30011 }, { - "epoch": 0.8516458569807037, + "epoch": 0.8504633171809912, "grad_norm": 0.0, - "learning_rate": 1.1323331091134693e-06, - "loss": 0.7404, + "learning_rate": 1.1500702089509331e-06, + "loss": 0.7049, "step": 30012 }, { - "epoch": 0.8516742338251987, + "epoch": 0.8504916546232537, "grad_norm": 0.0, - "learning_rate": 1.131908332776902e-06, - "loss": 0.7704, + "learning_rate": 1.1496429195345937e-06, + "loss": 0.8181, "step": 30013 }, { - "epoch": 0.8517026106696935, + "epoch": 0.8505199920655162, "grad_norm": 0.0, - "learning_rate": 1.131483631349507e-06, - "loss": 0.8096, + "learning_rate": 1.1492157046680985e-06, + "loss": 0.7416, "step": 30014 }, { - "epoch": 0.8517309875141884, + "epoch": 0.8505483295077786, "grad_norm": 0.0, - "learning_rate": 1.1310590048348647e-06, - "loss": 0.797, + "learning_rate": 1.1487885643550478e-06, + "loss": 0.8532, "step": 30015 }, { - "epoch": 0.8517593643586833, + "epoch": 0.8505766669500411, "grad_norm": 0.0, - "learning_rate": 1.1306344532365665e-06, - "loss": 0.7616, + "learning_rate": 1.1483614985990365e-06, + "loss": 0.7982, "step": 30016 }, { - "epoch": 0.8517877412031782, + "epoch": 0.8506050043923036, "grad_norm": 0.0, - "learning_rate": 1.1302099765581987e-06, - "loss": 0.7809, + "learning_rate": 1.1479345074036641e-06, + "loss": 0.7857, "step": 30017 }, { - "epoch": 0.8518161180476731, + "epoch": 0.850633341834566, "grad_norm": 0.0, - "learning_rate": 1.1297855748033448e-06, - "loss": 0.7907, + "learning_rate": 1.1475075907725252e-06, + "loss": 0.7975, "step": 30018 }, { - "epoch": 0.851844494892168, + "epoch": 0.8506616792768285, "grad_norm": 0.0, - "learning_rate": 1.1293612479755934e-06, - "loss": 0.7799, + "learning_rate": 1.1470807487092173e-06, + "loss": 0.8083, "step": 30019 }, { - "epoch": 0.8518728717366629, + "epoch": 0.850690016719091, "grad_norm": 0.0, - "learning_rate": 1.1289369960785234e-06, - "loss": 0.901, + "learning_rate": 1.1466539812173362e-06, + "loss": 0.8154, "step": 30020 }, { - "epoch": 0.8519012485811578, + "epoch": 0.8507183541613534, "grad_norm": 0.0, - "learning_rate": 1.1285128191157224e-06, - "loss": 0.7927, + "learning_rate": 1.1462272883004754e-06, + "loss": 0.7533, "step": 30021 }, { - "epoch": 0.8519296254256526, + "epoch": 0.8507466916036158, "grad_norm": 0.0, - "learning_rate": 1.1280887170907739e-06, - "loss": 0.8136, + "learning_rate": 1.1458006699622314e-06, + "loss": 0.8323, "step": 30022 }, { - "epoch": 0.8519580022701475, + "epoch": 0.8507750290458783, "grad_norm": 0.0, - "learning_rate": 1.1276646900072574e-06, - "loss": 0.8043, + "learning_rate": 1.1453741262061958e-06, + "loss": 0.759, "step": 30023 }, { - "epoch": 0.8519863791146425, + "epoch": 0.8508033664881408, "grad_norm": 0.0, - "learning_rate": 1.1272407378687556e-06, - "loss": 0.8326, + "learning_rate": 1.1449476570359608e-06, + "loss": 0.8875, "step": 30024 }, { - "epoch": 0.8520147559591373, + "epoch": 0.8508317039304032, "grad_norm": 0.0, - "learning_rate": 1.1268168606788532e-06, - "loss": 0.8038, + "learning_rate": 1.1445212624551217e-06, + "loss": 0.8584, "step": 30025 }, { - "epoch": 0.8520431328036322, + "epoch": 0.8508600413726657, "grad_norm": 0.0, - "learning_rate": 1.1263930584411243e-06, - "loss": 0.7544, + "learning_rate": 1.1440949424672677e-06, + "loss": 0.6919, "step": 30026 }, { - "epoch": 0.8520715096481272, + "epoch": 0.8508883788149282, "grad_norm": 0.0, - "learning_rate": 1.1259693311591523e-06, - "loss": 0.8928, + "learning_rate": 1.1436686970759892e-06, + "loss": 0.8748, "step": 30027 }, { - "epoch": 0.852099886492622, + "epoch": 0.8509167162571907, "grad_norm": 0.0, - "learning_rate": 1.1255456788365193e-06, - "loss": 0.8204, + "learning_rate": 1.1432425262848811e-06, + "loss": 0.8663, "step": 30028 }, { - "epoch": 0.8521282633371169, + "epoch": 0.8509450536994531, "grad_norm": 0.0, - "learning_rate": 1.1251221014767977e-06, - "loss": 0.8518, + "learning_rate": 1.1428164300975276e-06, + "loss": 0.806, "step": 30029 }, { - "epoch": 0.8521566401816119, + "epoch": 0.8509733911417156, "grad_norm": 0.0, - "learning_rate": 1.124698599083569e-06, - "loss": 0.7625, + "learning_rate": 1.1423904085175219e-06, + "loss": 0.9011, "step": 30030 }, { - "epoch": 0.8521850170261067, + "epoch": 0.851001728583978, "grad_norm": 0.0, - "learning_rate": 1.1242751716604128e-06, - "loss": 0.8601, + "learning_rate": 1.1419644615484504e-06, + "loss": 0.7139, "step": 30031 }, { - "epoch": 0.8522133938706016, + "epoch": 0.8510300660262404, "grad_norm": 0.0, - "learning_rate": 1.1238518192108983e-06, - "loss": 0.7817, + "learning_rate": 1.1415385891939001e-06, + "loss": 0.7327, "step": 30032 }, { - "epoch": 0.8522417707150964, + "epoch": 0.8510584034685029, "grad_norm": 0.0, - "learning_rate": 1.1234285417386115e-06, - "loss": 0.7102, + "learning_rate": 1.1411127914574604e-06, + "loss": 0.7271, "step": 30033 }, { - "epoch": 0.8522701475595914, + "epoch": 0.8510867409107654, "grad_norm": 0.0, - "learning_rate": 1.1230053392471196e-06, - "loss": 0.7669, + "learning_rate": 1.1406870683427174e-06, + "loss": 0.7739, "step": 30034 }, { - "epoch": 0.8522985244040863, + "epoch": 0.8511150783530279, "grad_norm": 0.0, - "learning_rate": 1.122582211740002e-06, - "loss": 0.8001, + "learning_rate": 1.140261419853257e-06, + "loss": 0.8432, "step": 30035 }, { - "epoch": 0.8523269012485811, + "epoch": 0.8511434157952903, "grad_norm": 0.0, - "learning_rate": 1.1221591592208325e-06, - "loss": 0.7275, + "learning_rate": 1.1398358459926663e-06, + "loss": 0.8467, "step": 30036 }, { - "epoch": 0.8523552780930761, + "epoch": 0.8511717532375528, "grad_norm": 0.0, - "learning_rate": 1.1217361816931815e-06, - "loss": 0.7807, + "learning_rate": 1.1394103467645267e-06, + "loss": 0.7906, "step": 30037 }, { - "epoch": 0.852383654937571, + "epoch": 0.8512000906798153, "grad_norm": 0.0, - "learning_rate": 1.1213132791606251e-06, - "loss": 0.7243, + "learning_rate": 1.1389849221724258e-06, + "loss": 0.7801, "step": 30038 }, { - "epoch": 0.8524120317820658, + "epoch": 0.8512284281220777, "grad_norm": 0.0, - "learning_rate": 1.1208904516267361e-06, - "loss": 0.8185, + "learning_rate": 1.1385595722199438e-06, + "loss": 0.7416, "step": 30039 }, { - "epoch": 0.8524404086265607, + "epoch": 0.8512567655643402, "grad_norm": 0.0, - "learning_rate": 1.1204676990950826e-06, - "loss": 0.7972, + "learning_rate": 1.1381342969106647e-06, + "loss": 0.7755, "step": 30040 }, { - "epoch": 0.8524687854710556, + "epoch": 0.8512851030066027, "grad_norm": 0.0, - "learning_rate": 1.120045021569237e-06, - "loss": 0.7875, + "learning_rate": 1.137709096248173e-06, + "loss": 0.7466, "step": 30041 }, { - "epoch": 0.8524971623155505, + "epoch": 0.851313440448865, "grad_norm": 0.0, - "learning_rate": 1.1196224190527737e-06, - "loss": 0.726, + "learning_rate": 1.137283970236047e-06, + "loss": 0.8568, "step": 30042 }, { - "epoch": 0.8525255391600454, + "epoch": 0.8513417778911275, "grad_norm": 0.0, - "learning_rate": 1.1191998915492553e-06, - "loss": 0.9319, + "learning_rate": 1.1368589188778689e-06, + "loss": 0.8058, "step": 30043 }, { - "epoch": 0.8525539160045403, + "epoch": 0.85137011533339, "grad_norm": 0.0, - "learning_rate": 1.1187774390622563e-06, - "loss": 0.8593, + "learning_rate": 1.1364339421772219e-06, + "loss": 0.8587, "step": 30044 }, { - "epoch": 0.8525822928490352, + "epoch": 0.8513984527756525, "grad_norm": 0.0, - "learning_rate": 1.1183550615953442e-06, - "loss": 0.6944, + "learning_rate": 1.1360090401376812e-06, + "loss": 0.8493, "step": 30045 }, { - "epoch": 0.85261066969353, + "epoch": 0.8514267902179149, "grad_norm": 0.0, - "learning_rate": 1.117932759152085e-06, - "loss": 0.6451, + "learning_rate": 1.135584212762827e-06, + "loss": 0.7938, "step": 30046 }, { - "epoch": 0.852639046538025, + "epoch": 0.8514551276601774, "grad_norm": 0.0, - "learning_rate": 1.1175105317360446e-06, - "loss": 0.8126, + "learning_rate": 1.1351594600562399e-06, + "loss": 0.8162, "step": 30047 }, { - "epoch": 0.8526674233825199, + "epoch": 0.8514834651024399, "grad_norm": 0.0, - "learning_rate": 1.1170883793507959e-06, - "loss": 0.862, + "learning_rate": 1.134734782021496e-06, + "loss": 0.7567, "step": 30048 }, { - "epoch": 0.8526958002270147, + "epoch": 0.8515118025447023, "grad_norm": 0.0, - "learning_rate": 1.1166663019998992e-06, - "loss": 0.8186, + "learning_rate": 1.1343101786621747e-06, + "loss": 0.7966, "step": 30049 }, { - "epoch": 0.8527241770715096, + "epoch": 0.8515401399869648, "grad_norm": 0.0, - "learning_rate": 1.1162442996869216e-06, - "loss": 0.7159, + "learning_rate": 1.1338856499818497e-06, + "loss": 0.8665, "step": 30050 }, { - "epoch": 0.8527525539160046, + "epoch": 0.8515684774292273, "grad_norm": 0.0, - "learning_rate": 1.1158223724154305e-06, - "loss": 0.7714, + "learning_rate": 1.1334611959840992e-06, + "loss": 0.7662, "step": 30051 }, { - "epoch": 0.8527809307604994, + "epoch": 0.8515968148714897, "grad_norm": 0.0, - "learning_rate": 1.115400520188984e-06, - "loss": 0.7541, + "learning_rate": 1.1330368166724958e-06, + "loss": 0.8507, "step": 30052 }, { - "epoch": 0.8528093076049943, + "epoch": 0.8516251523137521, "grad_norm": 0.0, - "learning_rate": 1.1149787430111514e-06, - "loss": 0.8529, + "learning_rate": 1.132612512050617e-06, + "loss": 0.8287, "step": 30053 }, { - "epoch": 0.8528376844494893, + "epoch": 0.8516534897560146, "grad_norm": 0.0, - "learning_rate": 1.11455704088549e-06, - "loss": 0.7297, + "learning_rate": 1.1321882821220375e-06, + "loss": 0.8044, "step": 30054 }, { - "epoch": 0.8528660612939841, + "epoch": 0.8516818271982771, "grad_norm": 0.0, - "learning_rate": 1.1141354138155658e-06, - "loss": 0.8934, + "learning_rate": 1.1317641268903267e-06, + "loss": 0.8184, "step": 30055 }, { - "epoch": 0.852894438138479, + "epoch": 0.8517101646405395, "grad_norm": 0.0, - "learning_rate": 1.1137138618049403e-06, - "loss": 0.7665, + "learning_rate": 1.1313400463590597e-06, + "loss": 0.8664, "step": 30056 }, { - "epoch": 0.8529228149829738, + "epoch": 0.851738502082802, "grad_norm": 0.0, - "learning_rate": 1.113292384857172e-06, - "loss": 0.7938, + "learning_rate": 1.130916040531811e-06, + "loss": 0.8127, "step": 30057 }, { - "epoch": 0.8529511918274688, + "epoch": 0.8517668395250645, "grad_norm": 0.0, - "learning_rate": 1.1128709829758221e-06, - "loss": 0.8748, + "learning_rate": 1.130492109412148e-06, + "loss": 0.6593, "step": 30058 }, { - "epoch": 0.8529795686719637, + "epoch": 0.851795176967327, "grad_norm": 0.0, - "learning_rate": 1.1124496561644526e-06, - "loss": 0.7451, + "learning_rate": 1.1300682530036432e-06, + "loss": 0.8302, "step": 30059 }, { - "epoch": 0.8530079455164585, + "epoch": 0.8518235144095894, "grad_norm": 0.0, - "learning_rate": 1.1120284044266183e-06, - "loss": 0.8809, + "learning_rate": 1.1296444713098675e-06, + "loss": 0.6813, "step": 30060 }, { - "epoch": 0.8530363223609535, + "epoch": 0.8518518518518519, "grad_norm": 0.0, - "learning_rate": 1.1116072277658795e-06, - "loss": 0.717, + "learning_rate": 1.12922076433439e-06, + "loss": 0.7687, "step": 30061 }, { - "epoch": 0.8530646992054484, + "epoch": 0.8518801892941144, "grad_norm": 0.0, - "learning_rate": 1.111186126185796e-06, - "loss": 0.7908, + "learning_rate": 1.1287971320807822e-06, + "loss": 0.811, "step": 30062 }, { - "epoch": 0.8530930760499432, + "epoch": 0.8519085267363767, "grad_norm": 0.0, - "learning_rate": 1.1107650996899189e-06, - "loss": 0.8497, + "learning_rate": 1.1283735745526093e-06, + "loss": 0.8304, "step": 30063 }, { - "epoch": 0.8531214528944382, + "epoch": 0.8519368641786392, "grad_norm": 0.0, - "learning_rate": 1.11034414828181e-06, - "loss": 0.8261, + "learning_rate": 1.1279500917534391e-06, + "loss": 0.8358, "step": 30064 }, { - "epoch": 0.853149829738933, + "epoch": 0.8519652016209017, "grad_norm": 0.0, - "learning_rate": 1.1099232719650265e-06, - "loss": 0.793, + "learning_rate": 1.1275266836868426e-06, + "loss": 0.8722, "step": 30065 }, { - "epoch": 0.8531782065834279, + "epoch": 0.8519935390631641, "grad_norm": 0.0, - "learning_rate": 1.109502470743119e-06, - "loss": 0.7743, + "learning_rate": 1.1271033503563811e-06, + "loss": 0.7941, "step": 30066 }, { - "epoch": 0.8532065834279228, + "epoch": 0.8520218765054266, "grad_norm": 0.0, - "learning_rate": 1.1090817446196433e-06, - "loss": 0.8425, + "learning_rate": 1.126680091765625e-06, + "loss": 0.8144, "step": 30067 }, { - "epoch": 0.8532349602724177, + "epoch": 0.8520502139476891, "grad_norm": 0.0, - "learning_rate": 1.1086610935981557e-06, - "loss": 0.7569, + "learning_rate": 1.1262569079181352e-06, + "loss": 0.8106, "step": 30068 }, { - "epoch": 0.8532633371169126, + "epoch": 0.8520785513899516, "grad_norm": 0.0, - "learning_rate": 1.1082405176822054e-06, - "loss": 0.7518, + "learning_rate": 1.1258337988174794e-06, + "loss": 0.8154, "step": 30069 }, { - "epoch": 0.8532917139614075, + "epoch": 0.852106888832214, "grad_norm": 0.0, - "learning_rate": 1.1078200168753473e-06, - "loss": 0.9122, + "learning_rate": 1.1254107644672218e-06, + "loss": 0.924, "step": 30070 }, { - "epoch": 0.8533200908059024, + "epoch": 0.8521352262744765, "grad_norm": 0.0, - "learning_rate": 1.1073995911811364e-06, - "loss": 0.8186, + "learning_rate": 1.124987804870924e-06, + "loss": 0.826, "step": 30071 }, { - "epoch": 0.8533484676503973, + "epoch": 0.852163563716739, "grad_norm": 0.0, - "learning_rate": 1.1069792406031178e-06, - "loss": 0.8055, + "learning_rate": 1.1245649200321485e-06, + "loss": 0.8202, "step": 30072 }, { - "epoch": 0.8533768444948922, + "epoch": 0.8521919011590013, "grad_norm": 0.0, - "learning_rate": 1.106558965144845e-06, - "loss": 0.8174, + "learning_rate": 1.124142109954459e-06, + "loss": 0.8382, "step": 30073 }, { - "epoch": 0.853405221339387, + "epoch": 0.8522202386012638, "grad_norm": 0.0, - "learning_rate": 1.106138764809871e-06, - "loss": 0.8075, + "learning_rate": 1.1237193746414167e-06, + "loss": 0.7394, "step": 30074 }, { - "epoch": 0.853433598183882, + "epoch": 0.8522485760435263, "grad_norm": 0.0, - "learning_rate": 1.1057186396017405e-06, - "loss": 0.7993, + "learning_rate": 1.1232967140965845e-06, + "loss": 0.8092, "step": 30075 }, { - "epoch": 0.8534619750283768, + "epoch": 0.8522769134857888, "grad_norm": 0.0, - "learning_rate": 1.1052985895240043e-06, - "loss": 0.8266, + "learning_rate": 1.122874128323518e-06, + "loss": 0.7403, "step": 30076 }, { - "epoch": 0.8534903518728717, + "epoch": 0.8523052509280512, "grad_norm": 0.0, - "learning_rate": 1.1048786145802126e-06, - "loss": 0.6706, + "learning_rate": 1.1224516173257782e-06, + "loss": 0.7761, "step": 30077 }, { - "epoch": 0.8535187287173667, + "epoch": 0.8523335883703137, "grad_norm": 0.0, - "learning_rate": 1.1044587147739072e-06, - "loss": 0.7284, + "learning_rate": 1.1220291811069285e-06, + "loss": 0.8264, "step": 30078 }, { - "epoch": 0.8535471055618615, + "epoch": 0.8523619258125762, "grad_norm": 0.0, - "learning_rate": 1.104038890108644e-06, - "loss": 0.7702, + "learning_rate": 1.121606819670521e-06, + "loss": 0.8417, "step": 30079 }, { - "epoch": 0.8535754824063564, + "epoch": 0.8523902632548386, "grad_norm": 0.0, - "learning_rate": 1.1036191405879614e-06, - "loss": 0.8391, + "learning_rate": 1.1211845330201188e-06, + "loss": 0.7738, "step": 30080 }, { - "epoch": 0.8536038592508514, + "epoch": 0.8524186006971011, "grad_norm": 0.0, - "learning_rate": 1.1031994662154077e-06, - "loss": 0.7757, + "learning_rate": 1.1207623211592733e-06, + "loss": 0.8556, "step": 30081 }, { - "epoch": 0.8536322360953462, + "epoch": 0.8524469381393636, "grad_norm": 0.0, - "learning_rate": 1.10277986699453e-06, - "loss": 0.7732, + "learning_rate": 1.120340184091544e-06, + "loss": 0.8328, "step": 30082 }, { - "epoch": 0.8536606129398411, + "epoch": 0.8524752755816261, "grad_norm": 0.0, - "learning_rate": 1.1023603429288688e-06, - "loss": 0.817, + "learning_rate": 1.1199181218204902e-06, + "loss": 0.8345, "step": 30083 }, { - "epoch": 0.8536889897843359, + "epoch": 0.8525036130238884, "grad_norm": 0.0, - "learning_rate": 1.10194089402197e-06, - "loss": 0.7796, + "learning_rate": 1.1194961343496603e-06, + "loss": 0.8024, "step": 30084 }, { - "epoch": 0.8537173666288309, + "epoch": 0.8525319504661509, "grad_norm": 0.0, - "learning_rate": 1.1015215202773778e-06, - "loss": 0.8363, + "learning_rate": 1.1190742216826122e-06, + "loss": 0.7432, "step": 30085 }, { - "epoch": 0.8537457434733258, + "epoch": 0.8525602879084134, "grad_norm": 0.0, - "learning_rate": 1.1011022216986322e-06, - "loss": 0.8947, + "learning_rate": 1.1186523838229003e-06, + "loss": 0.7464, "step": 30086 }, { - "epoch": 0.8537741203178206, + "epoch": 0.8525886253506758, "grad_norm": 0.0, - "learning_rate": 1.1006829982892754e-06, - "loss": 0.7813, + "learning_rate": 1.1182306207740768e-06, + "loss": 0.801, "step": 30087 }, { - "epoch": 0.8538024971623156, + "epoch": 0.8526169627929383, "grad_norm": 0.0, - "learning_rate": 1.1002638500528517e-06, - "loss": 0.8413, + "learning_rate": 1.117808932539698e-06, + "loss": 0.8055, "step": 30088 }, { - "epoch": 0.8538308740068105, + "epoch": 0.8526453002352008, "grad_norm": 0.0, - "learning_rate": 1.0998447769928978e-06, - "loss": 0.8517, + "learning_rate": 1.1173873191233097e-06, + "loss": 0.7646, "step": 30089 }, { - "epoch": 0.8538592508513053, + "epoch": 0.8526736376774632, "grad_norm": 0.0, - "learning_rate": 1.0994257791129548e-06, - "loss": 0.8289, + "learning_rate": 1.1169657805284673e-06, + "loss": 0.8144, "step": 30090 }, { - "epoch": 0.8538876276958002, + "epoch": 0.8527019751197257, "grad_norm": 0.0, - "learning_rate": 1.0990068564165645e-06, - "loss": 0.7805, + "learning_rate": 1.116544316758722e-06, + "loss": 0.8656, "step": 30091 }, { - "epoch": 0.8539160045402951, + "epoch": 0.8527303125619882, "grad_norm": 0.0, - "learning_rate": 1.0985880089072609e-06, - "loss": 0.8508, + "learning_rate": 1.116122927817621e-06, + "loss": 0.7112, "step": 30092 }, { - "epoch": 0.85394438138479, + "epoch": 0.8527586500042507, "grad_norm": 0.0, - "learning_rate": 1.0981692365885843e-06, - "loss": 0.8465, + "learning_rate": 1.1157016137087163e-06, + "loss": 0.8537, "step": 30093 }, { - "epoch": 0.8539727582292849, + "epoch": 0.852786987446513, "grad_norm": 0.0, - "learning_rate": 1.097750539464073e-06, - "loss": 0.822, + "learning_rate": 1.1152803744355578e-06, + "loss": 0.6937, "step": 30094 }, { - "epoch": 0.8540011350737798, + "epoch": 0.8528153248887755, "grad_norm": 0.0, - "learning_rate": 1.0973319175372632e-06, - "loss": 0.851, + "learning_rate": 1.1148592100016885e-06, + "loss": 0.8046, "step": 30095 }, { - "epoch": 0.8540295119182747, + "epoch": 0.852843662331038, "grad_norm": 0.0, - "learning_rate": 1.096913370811693e-06, - "loss": 0.8093, + "learning_rate": 1.1144381204106635e-06, + "loss": 0.7662, "step": 30096 }, { - "epoch": 0.8540578887627696, + "epoch": 0.8528719997733004, "grad_norm": 0.0, - "learning_rate": 1.096494899290893e-06, - "loss": 0.8768, + "learning_rate": 1.114017105666022e-06, + "loss": 0.8412, "step": 30097 }, { - "epoch": 0.8540862656072645, + "epoch": 0.8529003372155629, "grad_norm": 0.0, - "learning_rate": 1.0960765029784015e-06, - "loss": 0.8674, + "learning_rate": 1.1135961657713156e-06, + "loss": 0.7594, "step": 30098 }, { - "epoch": 0.8541146424517594, + "epoch": 0.8529286746578254, "grad_norm": 0.0, - "learning_rate": 1.0956581818777533e-06, - "loss": 0.819, + "learning_rate": 1.1131753007300884e-06, + "loss": 0.8636, "step": 30099 }, { - "epoch": 0.8541430192962542, + "epoch": 0.8529570121000879, "grad_norm": 0.0, - "learning_rate": 1.0952399359924793e-06, - "loss": 0.7323, + "learning_rate": 1.1127545105458847e-06, + "loss": 0.759, "step": 30100 }, { - "epoch": 0.8541713961407491, + "epoch": 0.8529853495423503, "grad_norm": 0.0, - "learning_rate": 1.094821765326114e-06, - "loss": 0.8467, + "learning_rate": 1.1123337952222524e-06, + "loss": 0.8674, "step": 30101 }, { - "epoch": 0.8541997729852441, + "epoch": 0.8530136869846128, "grad_norm": 0.0, - "learning_rate": 1.0944036698821913e-06, - "loss": 0.8072, + "learning_rate": 1.1119131547627315e-06, + "loss": 0.7527, "step": 30102 }, { - "epoch": 0.8542281498297389, + "epoch": 0.8530420244268753, "grad_norm": 0.0, - "learning_rate": 1.0939856496642398e-06, - "loss": 0.8086, + "learning_rate": 1.111492589170866e-06, + "loss": 0.8159, "step": 30103 }, { - "epoch": 0.8542565266742338, + "epoch": 0.8530703618691376, "grad_norm": 0.0, - "learning_rate": 1.0935677046757908e-06, - "loss": 0.8026, + "learning_rate": 1.111072098450201e-06, + "loss": 0.8443, "step": 30104 }, { - "epoch": 0.8542849035187288, + "epoch": 0.8530986993114001, "grad_norm": 0.0, - "learning_rate": 1.0931498349203785e-06, - "loss": 0.791, + "learning_rate": 1.1106516826042747e-06, + "loss": 0.8094, "step": 30105 }, { - "epoch": 0.8543132803632236, + "epoch": 0.8531270367536626, "grad_norm": 0.0, - "learning_rate": 1.0927320404015274e-06, - "loss": 0.7582, + "learning_rate": 1.1102313416366307e-06, + "loss": 0.9428, "step": 30106 }, { - "epoch": 0.8543416572077185, + "epoch": 0.853155374195925, "grad_norm": 0.0, - "learning_rate": 1.0923143211227695e-06, - "loss": 0.8442, + "learning_rate": 1.1098110755508107e-06, + "loss": 0.8042, "step": 30107 }, { - "epoch": 0.8543700340522133, + "epoch": 0.8531837116381875, "grad_norm": 0.0, - "learning_rate": 1.0918966770876349e-06, - "loss": 0.8262, + "learning_rate": 1.109390884350351e-06, + "loss": 0.8254, "step": 30108 }, { - "epoch": 0.8543984108967083, + "epoch": 0.85321204908045, "grad_norm": 0.0, - "learning_rate": 1.0914791082996456e-06, - "loss": 0.7442, + "learning_rate": 1.1089707680387962e-06, + "loss": 0.7864, "step": 30109 }, { - "epoch": 0.8544267877412032, + "epoch": 0.8532403865227125, "grad_norm": 0.0, - "learning_rate": 1.0910616147623365e-06, - "loss": 0.8866, + "learning_rate": 1.1085507266196794e-06, + "loss": 0.7938, "step": 30110 }, { - "epoch": 0.854455164585698, + "epoch": 0.8532687239649749, "grad_norm": 0.0, - "learning_rate": 1.090644196479228e-06, - "loss": 0.7355, + "learning_rate": 1.1081307600965419e-06, + "loss": 0.7949, "step": 30111 }, { - "epoch": 0.854483541430193, + "epoch": 0.8532970614072374, "grad_norm": 0.0, - "learning_rate": 1.0902268534538496e-06, - "loss": 0.9549, + "learning_rate": 1.107710868472921e-06, + "loss": 0.8596, "step": 30112 }, { - "epoch": 0.8545119182746879, + "epoch": 0.8533253988494999, "grad_norm": 0.0, - "learning_rate": 1.089809585689726e-06, - "loss": 0.761, + "learning_rate": 1.107291051752354e-06, + "loss": 0.7646, "step": 30113 }, { - "epoch": 0.8545402951191827, + "epoch": 0.8533537362917623, "grad_norm": 0.0, - "learning_rate": 1.0893923931903805e-06, - "loss": 0.9006, + "learning_rate": 1.1068713099383754e-06, + "loss": 0.7164, "step": 30114 }, { - "epoch": 0.8545686719636776, + "epoch": 0.8533820737340247, "grad_norm": 0.0, - "learning_rate": 1.0889752759593386e-06, - "loss": 0.6226, + "learning_rate": 1.1064516430345252e-06, + "loss": 0.7798, "step": 30115 }, { - "epoch": 0.8545970488081726, + "epoch": 0.8534104111762872, "grad_norm": 0.0, - "learning_rate": 1.0885582340001245e-06, - "loss": 0.7026, + "learning_rate": 1.1060320510443335e-06, + "loss": 0.7664, "step": 30116 }, { - "epoch": 0.8546254256526674, + "epoch": 0.8534387486185497, "grad_norm": 0.0, - "learning_rate": 1.0881412673162572e-06, - "loss": 0.8443, + "learning_rate": 1.1056125339713385e-06, + "loss": 0.8559, "step": 30117 }, { - "epoch": 0.8546538024971623, + "epoch": 0.8534670860608121, "grad_norm": 0.0, - "learning_rate": 1.087724375911261e-06, - "loss": 0.8173, + "learning_rate": 1.105193091819069e-06, + "loss": 0.9047, "step": 30118 }, { - "epoch": 0.8546821793416572, + "epoch": 0.8534954235030746, "grad_norm": 0.0, - "learning_rate": 1.0873075597886607e-06, - "loss": 0.8459, + "learning_rate": 1.1047737245910617e-06, + "loss": 0.81, "step": 30119 }, { - "epoch": 0.8547105561861521, + "epoch": 0.8535237609453371, "grad_norm": 0.0, - "learning_rate": 1.086890818951971e-06, - "loss": 0.819, + "learning_rate": 1.1043544322908506e-06, + "loss": 0.8039, "step": 30120 }, { - "epoch": 0.854738933030647, + "epoch": 0.8535520983875995, "grad_norm": 0.0, - "learning_rate": 1.0864741534047151e-06, - "loss": 0.8398, + "learning_rate": 1.103935214921963e-06, + "loss": 0.7573, "step": 30121 }, { - "epoch": 0.8547673098751419, + "epoch": 0.853580435829862, "grad_norm": 0.0, - "learning_rate": 1.0860575631504154e-06, - "loss": 0.8147, + "learning_rate": 1.1035160724879335e-06, + "loss": 0.7999, "step": 30122 }, { - "epoch": 0.8547956867196368, + "epoch": 0.8536087732721245, "grad_norm": 0.0, - "learning_rate": 1.0856410481925861e-06, - "loss": 0.8052, + "learning_rate": 1.103097004992293e-06, + "loss": 0.8541, "step": 30123 }, { - "epoch": 0.8548240635641317, + "epoch": 0.853637110714387, "grad_norm": 0.0, - "learning_rate": 1.0852246085347483e-06, - "loss": 0.8729, + "learning_rate": 1.1026780124385684e-06, + "loss": 0.8405, "step": 30124 }, { - "epoch": 0.8548524404086265, + "epoch": 0.8536654481566494, "grad_norm": 0.0, - "learning_rate": 1.0848082441804176e-06, - "loss": 0.8728, + "learning_rate": 1.1022590948302904e-06, + "loss": 0.8717, "step": 30125 }, { - "epoch": 0.8548808172531215, + "epoch": 0.8536937855989118, "grad_norm": 0.0, - "learning_rate": 1.0843919551331139e-06, - "loss": 0.7308, + "learning_rate": 1.1018402521709892e-06, + "loss": 0.8048, "step": 30126 }, { - "epoch": 0.8549091940976163, + "epoch": 0.8537221230411743, "grad_norm": 0.0, - "learning_rate": 1.0839757413963524e-06, - "loss": 0.7211, + "learning_rate": 1.10142148446419e-06, + "loss": 0.7565, "step": 30127 }, { - "epoch": 0.8549375709421112, + "epoch": 0.8537504604834367, "grad_norm": 0.0, - "learning_rate": 1.0835596029736484e-06, - "loss": 0.8331, + "learning_rate": 1.1010027917134258e-06, + "loss": 0.8266, "step": 30128 }, { - "epoch": 0.8549659477866062, + "epoch": 0.8537787979256992, "grad_norm": 0.0, - "learning_rate": 1.0831435398685164e-06, - "loss": 0.9331, + "learning_rate": 1.1005841739222166e-06, + "loss": 0.8594, "step": 30129 }, { - "epoch": 0.854994324631101, + "epoch": 0.8538071353679617, "grad_norm": 0.0, - "learning_rate": 1.0827275520844738e-06, - "loss": 0.7891, + "learning_rate": 1.1001656310940944e-06, + "loss": 0.7385, "step": 30130 }, { - "epoch": 0.8550227014755959, + "epoch": 0.8538354728102241, "grad_norm": 0.0, - "learning_rate": 1.08231163962503e-06, - "loss": 0.7822, + "learning_rate": 1.0997471632325795e-06, + "loss": 0.8104, "step": 30131 }, { - "epoch": 0.8550510783200908, + "epoch": 0.8538638102524866, "grad_norm": 0.0, - "learning_rate": 1.081895802493701e-06, - "loss": 0.8855, + "learning_rate": 1.0993287703411992e-06, + "loss": 0.8008, "step": 30132 }, { - "epoch": 0.8550794551645857, + "epoch": 0.8538921476947491, "grad_norm": 0.0, - "learning_rate": 1.0814800406940007e-06, - "loss": 0.8247, + "learning_rate": 1.0989104524234807e-06, + "loss": 0.8357, "step": 30133 }, { - "epoch": 0.8551078320090806, + "epoch": 0.8539204851370116, "grad_norm": 0.0, - "learning_rate": 1.0810643542294386e-06, - "loss": 0.8238, + "learning_rate": 1.098492209482942e-06, + "loss": 0.8456, "step": 30134 }, { - "epoch": 0.8551362088535754, + "epoch": 0.853948822579274, "grad_norm": 0.0, - "learning_rate": 1.0806487431035262e-06, - "loss": 0.7561, + "learning_rate": 1.0980740415231084e-06, + "loss": 0.8807, "step": 30135 }, { - "epoch": 0.8551645856980704, + "epoch": 0.8539771600215365, "grad_norm": 0.0, - "learning_rate": 1.0802332073197762e-06, - "loss": 0.8997, + "learning_rate": 1.0976559485475058e-06, + "loss": 0.8292, "step": 30136 }, { - "epoch": 0.8551929625425653, + "epoch": 0.854005497463799, "grad_norm": 0.0, - "learning_rate": 1.079817746881696e-06, - "loss": 0.7685, + "learning_rate": 1.0972379305596503e-06, + "loss": 0.8196, "step": 30137 }, { - "epoch": 0.8552213393870601, + "epoch": 0.8540338349060613, "grad_norm": 0.0, - "learning_rate": 1.0794023617927961e-06, - "loss": 0.8734, + "learning_rate": 1.0968199875630658e-06, + "loss": 0.8035, "step": 30138 }, { - "epoch": 0.8552497162315551, + "epoch": 0.8540621723483238, "grad_norm": 0.0, - "learning_rate": 1.0789870520565848e-06, - "loss": 0.8049, + "learning_rate": 1.0964021195612728e-06, + "loss": 0.848, "step": 30139 }, { - "epoch": 0.85527809307605, + "epoch": 0.8540905097905863, "grad_norm": 0.0, - "learning_rate": 1.0785718176765715e-06, - "loss": 0.8208, + "learning_rate": 1.0959843265577918e-06, + "loss": 0.8752, "step": 30140 }, { - "epoch": 0.8553064699205448, + "epoch": 0.8541188472328488, "grad_norm": 0.0, - "learning_rate": 1.0781566586562653e-06, - "loss": 0.756, + "learning_rate": 1.0955666085561423e-06, + "loss": 0.836, "step": 30141 }, { - "epoch": 0.8553348467650397, + "epoch": 0.8541471846751112, "grad_norm": 0.0, - "learning_rate": 1.0777415749991682e-06, - "loss": 0.807, + "learning_rate": 1.0951489655598402e-06, + "loss": 0.8615, "step": 30142 }, { - "epoch": 0.8553632236095347, + "epoch": 0.8541755221173737, "grad_norm": 0.0, - "learning_rate": 1.0773265667087896e-06, - "loss": 0.7837, + "learning_rate": 1.094731397572405e-06, + "loss": 0.8237, "step": 30143 }, { - "epoch": 0.8553916004540295, + "epoch": 0.8542038595596362, "grad_norm": 0.0, - "learning_rate": 1.0769116337886366e-06, - "loss": 0.8071, + "learning_rate": 1.094313904597355e-06, + "loss": 0.808, "step": 30144 }, { - "epoch": 0.8554199772985244, + "epoch": 0.8542321970018986, "grad_norm": 0.0, - "learning_rate": 1.0764967762422107e-06, - "loss": 0.7993, + "learning_rate": 1.0938964866382052e-06, + "loss": 0.8637, "step": 30145 }, { - "epoch": 0.8554483541430193, + "epoch": 0.8542605344441611, "grad_norm": 0.0, - "learning_rate": 1.076081994073017e-06, - "loss": 0.7256, + "learning_rate": 1.0934791436984748e-06, + "loss": 0.7366, "step": 30146 }, { - "epoch": 0.8554767309875142, + "epoch": 0.8542888718864236, "grad_norm": 0.0, - "learning_rate": 1.075667287284562e-06, - "loss": 0.7882, + "learning_rate": 1.0930618757816747e-06, + "loss": 0.7381, "step": 30147 }, { - "epoch": 0.8555051078320091, + "epoch": 0.854317209328686, "grad_norm": 0.0, - "learning_rate": 1.0752526558803444e-06, - "loss": 0.785, + "learning_rate": 1.0926446828913218e-06, + "loss": 0.8462, "step": 30148 }, { - "epoch": 0.8555334846765039, + "epoch": 0.8543455467709484, "grad_norm": 0.0, - "learning_rate": 1.0748380998638685e-06, - "loss": 0.7169, + "learning_rate": 1.0922275650309322e-06, + "loss": 0.8709, "step": 30149 }, { - "epoch": 0.8555618615209989, + "epoch": 0.8543738842132109, "grad_norm": 0.0, - "learning_rate": 1.0744236192386393e-06, - "loss": 0.8578, + "learning_rate": 1.0918105222040154e-06, + "loss": 0.7929, "step": 30150 }, { - "epoch": 0.8555902383654937, + "epoch": 0.8544022216554734, "grad_norm": 0.0, - "learning_rate": 1.0740092140081527e-06, - "loss": 0.8264, + "learning_rate": 1.0913935544140864e-06, + "loss": 0.8415, "step": 30151 }, { - "epoch": 0.8556186152099886, + "epoch": 0.8544305590977358, "grad_norm": 0.0, - "learning_rate": 1.0735948841759114e-06, - "loss": 0.7025, + "learning_rate": 1.0909766616646589e-06, + "loss": 0.7744, "step": 30152 }, { - "epoch": 0.8556469920544836, + "epoch": 0.8544588965399983, "grad_norm": 0.0, - "learning_rate": 1.0731806297454172e-06, - "loss": 0.8415, + "learning_rate": 1.0905598439592425e-06, + "loss": 0.7891, "step": 30153 }, { - "epoch": 0.8556753688989784, + "epoch": 0.8544872339822608, "grad_norm": 0.0, - "learning_rate": 1.0727664507201652e-06, - "loss": 0.7992, + "learning_rate": 1.0901431013013507e-06, + "loss": 0.775, "step": 30154 }, { - "epoch": 0.8557037457434733, + "epoch": 0.8545155714245232, "grad_norm": 0.0, - "learning_rate": 1.0723523471036545e-06, - "loss": 0.7943, + "learning_rate": 1.0897264336944901e-06, + "loss": 0.8033, "step": 30155 }, { - "epoch": 0.8557321225879683, + "epoch": 0.8545439088667857, "grad_norm": 0.0, - "learning_rate": 1.0719383188993893e-06, - "loss": 0.8413, + "learning_rate": 1.0893098411421731e-06, + "loss": 0.9655, "step": 30156 }, { - "epoch": 0.8557604994324631, + "epoch": 0.8545722463090482, "grad_norm": 0.0, - "learning_rate": 1.071524366110861e-06, - "loss": 0.8307, + "learning_rate": 1.0888933236479094e-06, + "loss": 0.7708, "step": 30157 }, { - "epoch": 0.855788876276958, + "epoch": 0.8546005837513106, "grad_norm": 0.0, - "learning_rate": 1.071110488741567e-06, - "loss": 0.843, + "learning_rate": 1.0884768812152047e-06, + "loss": 0.6867, "step": 30158 }, { - "epoch": 0.8558172531214528, + "epoch": 0.854628921193573, "grad_norm": 0.0, - "learning_rate": 1.0706966867950052e-06, - "loss": 0.742, + "learning_rate": 1.0880605138475708e-06, + "loss": 0.8427, "step": 30159 }, { - "epoch": 0.8558456299659478, + "epoch": 0.8546572586358355, "grad_norm": 0.0, - "learning_rate": 1.070282960274669e-06, - "loss": 0.7392, + "learning_rate": 1.0876442215485106e-06, + "loss": 0.9402, "step": 30160 }, { - "epoch": 0.8558740068104427, + "epoch": 0.854685596078098, "grad_norm": 0.0, - "learning_rate": 1.069869309184056e-06, - "loss": 0.9044, + "learning_rate": 1.0872280043215321e-06, + "loss": 0.7349, "step": 30161 }, { - "epoch": 0.8559023836549375, + "epoch": 0.8547139335203604, "grad_norm": 0.0, - "learning_rate": 1.0694557335266564e-06, - "loss": 0.8074, + "learning_rate": 1.086811862170144e-06, + "loss": 0.7294, "step": 30162 }, { - "epoch": 0.8559307604994325, + "epoch": 0.8547422709626229, "grad_norm": 0.0, - "learning_rate": 1.0690422333059657e-06, - "loss": 0.779, + "learning_rate": 1.0863957950978477e-06, + "loss": 0.6671, "step": 30163 }, { - "epoch": 0.8559591373439274, + "epoch": 0.8547706084048854, "grad_norm": 0.0, - "learning_rate": 1.0686288085254782e-06, - "loss": 0.815, + "learning_rate": 1.0859798031081491e-06, + "loss": 0.765, "step": 30164 }, { - "epoch": 0.8559875141884222, + "epoch": 0.8547989458471479, "grad_norm": 0.0, - "learning_rate": 1.0682154591886828e-06, - "loss": 0.7169, + "learning_rate": 1.0855638862045537e-06, + "loss": 0.816, "step": 30165 }, { - "epoch": 0.8560158910329171, + "epoch": 0.8548272832894103, "grad_norm": 0.0, - "learning_rate": 1.0678021852990727e-06, - "loss": 0.7116, + "learning_rate": 1.0851480443905627e-06, + "loss": 0.8549, "step": 30166 }, { - "epoch": 0.8560442678774121, + "epoch": 0.8548556207316728, "grad_norm": 0.0, - "learning_rate": 1.0673889868601416e-06, - "loss": 0.8556, + "learning_rate": 1.0847322776696834e-06, + "loss": 0.7703, "step": 30167 }, { - "epoch": 0.8560726447219069, + "epoch": 0.8548839581739353, "grad_norm": 0.0, - "learning_rate": 1.0669758638753759e-06, - "loss": 0.8476, + "learning_rate": 1.084316586045412e-06, + "loss": 0.8399, "step": 30168 }, { - "epoch": 0.8561010215664018, + "epoch": 0.8549122956161976, "grad_norm": 0.0, - "learning_rate": 1.0665628163482655e-06, - "loss": 0.8782, + "learning_rate": 1.0839009695212521e-06, + "loss": 0.872, "step": 30169 }, { - "epoch": 0.8561293984108967, + "epoch": 0.8549406330584601, "grad_norm": 0.0, - "learning_rate": 1.0661498442823015e-06, - "loss": 0.86, + "learning_rate": 1.083485428100708e-06, + "loss": 0.8024, "step": 30170 }, { - "epoch": 0.8561577752553916, + "epoch": 0.8549689705007226, "grad_norm": 0.0, - "learning_rate": 1.065736947680971e-06, - "loss": 0.7662, + "learning_rate": 1.0830699617872752e-06, + "loss": 0.9213, "step": 30171 }, { - "epoch": 0.8561861520998865, + "epoch": 0.8549973079429851, "grad_norm": 0.0, - "learning_rate": 1.065324126547761e-06, - "loss": 0.8226, + "learning_rate": 1.082654570584456e-06, + "loss": 0.8043, "step": 30172 }, { - "epoch": 0.8562145289443814, + "epoch": 0.8550256453852475, "grad_norm": 0.0, - "learning_rate": 1.0649113808861633e-06, - "loss": 0.8503, + "learning_rate": 1.0822392544957494e-06, + "loss": 0.819, "step": 30173 }, { - "epoch": 0.8562429057888763, + "epoch": 0.85505398282751, "grad_norm": 0.0, - "learning_rate": 1.0644987106996584e-06, - "loss": 0.9405, + "learning_rate": 1.0818240135246528e-06, + "loss": 0.8283, "step": 30174 }, { - "epoch": 0.8562712826333712, + "epoch": 0.8550823202697725, "grad_norm": 0.0, - "learning_rate": 1.0640861159917337e-06, - "loss": 0.8539, + "learning_rate": 1.0814088476746642e-06, + "loss": 0.7864, "step": 30175 }, { - "epoch": 0.856299659477866, + "epoch": 0.8551106577120349, "grad_norm": 0.0, - "learning_rate": 1.0636735967658785e-06, - "loss": 0.7798, + "learning_rate": 1.0809937569492801e-06, + "loss": 0.7297, "step": 30176 }, { - "epoch": 0.856328036322361, + "epoch": 0.8551389951542974, "grad_norm": 0.0, - "learning_rate": 1.063261153025571e-06, - "loss": 0.81, + "learning_rate": 1.0805787413519974e-06, + "loss": 0.9168, "step": 30177 }, { - "epoch": 0.8563564131668558, + "epoch": 0.8551673325965599, "grad_norm": 0.0, - "learning_rate": 1.0628487847742997e-06, - "loss": 0.8757, + "learning_rate": 1.0801638008863114e-06, + "loss": 0.8869, "step": 30178 }, { - "epoch": 0.8563847900113507, + "epoch": 0.8551956700388222, "grad_norm": 0.0, - "learning_rate": 1.0624364920155473e-06, - "loss": 0.8144, + "learning_rate": 1.079748935555719e-06, + "loss": 0.933, "step": 30179 }, { - "epoch": 0.8564131668558457, + "epoch": 0.8552240074810847, "grad_norm": 0.0, - "learning_rate": 1.0620242747527943e-06, - "loss": 0.7818, + "learning_rate": 1.079334145363713e-06, + "loss": 0.8191, "step": 30180 }, { - "epoch": 0.8564415437003405, + "epoch": 0.8552523449233472, "grad_norm": 0.0, - "learning_rate": 1.0616121329895235e-06, - "loss": 0.8275, + "learning_rate": 1.0789194303137907e-06, + "loss": 0.8214, "step": 30181 }, { - "epoch": 0.8564699205448354, + "epoch": 0.8552806823656097, "grad_norm": 0.0, - "learning_rate": 1.0612000667292188e-06, - "loss": 0.8327, + "learning_rate": 1.0785047904094404e-06, + "loss": 0.9814, "step": 30182 }, { - "epoch": 0.8564982973893303, + "epoch": 0.8553090198078721, "grad_norm": 0.0, - "learning_rate": 1.0607880759753575e-06, - "loss": 0.8549, + "learning_rate": 1.0780902256541592e-06, + "loss": 0.7666, "step": 30183 }, { - "epoch": 0.8565266742338252, + "epoch": 0.8553373572501346, "grad_norm": 0.0, - "learning_rate": 1.0603761607314212e-06, - "loss": 0.7878, + "learning_rate": 1.0776757360514345e-06, + "loss": 0.7788, "step": 30184 }, { - "epoch": 0.8565550510783201, + "epoch": 0.8553656946923971, "grad_norm": 0.0, - "learning_rate": 1.0599643210008893e-06, - "loss": 0.7553, + "learning_rate": 1.0772613216047612e-06, + "loss": 0.8405, "step": 30185 }, { - "epoch": 0.8565834279228149, + "epoch": 0.8553940321346595, "grad_norm": 0.0, - "learning_rate": 1.05955255678724e-06, - "loss": 0.7643, + "learning_rate": 1.0768469823176308e-06, + "loss": 0.8174, "step": 30186 }, { - "epoch": 0.8566118047673099, + "epoch": 0.855422369576922, "grad_norm": 0.0, - "learning_rate": 1.0591408680939541e-06, - "loss": 0.8181, + "learning_rate": 1.0764327181935297e-06, + "loss": 0.7665, "step": 30187 }, { - "epoch": 0.8566401816118048, + "epoch": 0.8554507070191845, "grad_norm": 0.0, - "learning_rate": 1.0587292549245065e-06, - "loss": 0.7962, + "learning_rate": 1.0760185292359515e-06, + "loss": 0.7435, "step": 30188 }, { - "epoch": 0.8566685584562996, + "epoch": 0.855479044461447, "grad_norm": 0.0, - "learning_rate": 1.0583177172823734e-06, - "loss": 0.7705, + "learning_rate": 1.0756044154483813e-06, + "loss": 0.7957, "step": 30189 }, { - "epoch": 0.8566969353007946, + "epoch": 0.8555073819037093, "grad_norm": 0.0, - "learning_rate": 1.057906255171035e-06, - "loss": 0.8612, + "learning_rate": 1.0751903768343098e-06, + "loss": 0.8244, "step": 30190 }, { - "epoch": 0.8567253121452895, + "epoch": 0.8555357193459718, "grad_norm": 0.0, - "learning_rate": 1.0574948685939624e-06, - "loss": 0.8209, + "learning_rate": 1.0747764133972226e-06, + "loss": 0.6693, "step": 30191 }, { - "epoch": 0.8567536889897843, + "epoch": 0.8555640567882343, "grad_norm": 0.0, - "learning_rate": 1.0570835575546333e-06, - "loss": 0.766, + "learning_rate": 1.0743625251406087e-06, + "loss": 0.8065, "step": 30192 }, { - "epoch": 0.8567820658342792, + "epoch": 0.8555923942304967, "grad_norm": 0.0, - "learning_rate": 1.0566723220565222e-06, - "loss": 0.8464, + "learning_rate": 1.0739487120679537e-06, + "loss": 0.7787, "step": 30193 }, { - "epoch": 0.8568104426787742, + "epoch": 0.8556207316727592, "grad_norm": 0.0, - "learning_rate": 1.0562611621031016e-06, - "loss": 0.809, + "learning_rate": 1.073534974182745e-06, + "loss": 0.7537, "step": 30194 }, { - "epoch": 0.856838819523269, + "epoch": 0.8556490691150217, "grad_norm": 0.0, - "learning_rate": 1.0558500776978431e-06, - "loss": 0.8466, + "learning_rate": 1.0731213114884643e-06, + "loss": 0.8501, "step": 30195 }, { - "epoch": 0.8568671963677639, + "epoch": 0.8556774065572842, "grad_norm": 0.0, - "learning_rate": 1.055439068844224e-06, - "loss": 0.8068, + "learning_rate": 1.0727077239885985e-06, + "loss": 0.8509, "step": 30196 }, { - "epoch": 0.8568955732122588, + "epoch": 0.8557057439995466, "grad_norm": 0.0, - "learning_rate": 1.0550281355457115e-06, - "loss": 0.8524, + "learning_rate": 1.0722942116866296e-06, + "loss": 0.7719, "step": 30197 }, { - "epoch": 0.8569239500567537, + "epoch": 0.8557340814418091, "grad_norm": 0.0, - "learning_rate": 1.0546172778057783e-06, - "loss": 0.8196, + "learning_rate": 1.0718807745860426e-06, + "loss": 0.7438, "step": 30198 }, { - "epoch": 0.8569523269012486, + "epoch": 0.8557624188840716, "grad_norm": 0.0, - "learning_rate": 1.0542064956278974e-06, - "loss": 0.8502, + "learning_rate": 1.0714674126903202e-06, + "loss": 0.7624, "step": 30199 }, { - "epoch": 0.8569807037457434, + "epoch": 0.855790756326334, "grad_norm": 0.0, - "learning_rate": 1.0537957890155338e-06, - "loss": 0.8101, + "learning_rate": 1.0710541260029416e-06, + "loss": 0.8267, "step": 30200 }, { - "epoch": 0.8570090805902384, + "epoch": 0.8558190937685964, "grad_norm": 0.0, - "learning_rate": 1.0533851579721588e-06, - "loss": 0.8391, + "learning_rate": 1.07064091452739e-06, + "loss": 0.7842, "step": 30201 }, { - "epoch": 0.8570374574347333, + "epoch": 0.8558474312108589, "grad_norm": 0.0, - "learning_rate": 1.0529746025012422e-06, - "loss": 0.8677, + "learning_rate": 1.0702277782671467e-06, + "loss": 0.8126, "step": 30202 }, { - "epoch": 0.8570658342792281, + "epoch": 0.8558757686531213, "grad_norm": 0.0, - "learning_rate": 1.0525641226062522e-06, - "loss": 0.7254, + "learning_rate": 1.06981471722569e-06, + "loss": 0.766, "step": 30203 }, { - "epoch": 0.8570942111237231, + "epoch": 0.8559041060953838, "grad_norm": 0.0, - "learning_rate": 1.052153718290656e-06, - "loss": 0.8189, + "learning_rate": 1.0694017314064997e-06, + "loss": 0.7243, "step": 30204 }, { - "epoch": 0.8571225879682179, + "epoch": 0.8559324435376463, "grad_norm": 0.0, - "learning_rate": 1.0517433895579177e-06, - "loss": 0.7535, + "learning_rate": 1.068988820813055e-06, + "loss": 0.8456, "step": 30205 }, { - "epoch": 0.8571509648127128, + "epoch": 0.8559607809799088, "grad_norm": 0.0, - "learning_rate": 1.0513331364115055e-06, - "loss": 0.7825, + "learning_rate": 1.0685759854488341e-06, + "loss": 0.7393, "step": 30206 }, { - "epoch": 0.8571793416572078, + "epoch": 0.8559891184221712, "grad_norm": 0.0, - "learning_rate": 1.0509229588548865e-06, - "loss": 0.8469, + "learning_rate": 1.0681632253173158e-06, + "loss": 0.873, "step": 30207 }, { - "epoch": 0.8572077185017026, + "epoch": 0.8560174558644337, "grad_norm": 0.0, - "learning_rate": 1.0505128568915223e-06, - "loss": 0.7453, + "learning_rate": 1.0677505404219735e-06, + "loss": 0.7491, "step": 30208 }, { - "epoch": 0.8572360953461975, + "epoch": 0.8560457933066962, "grad_norm": 0.0, - "learning_rate": 1.0501028305248773e-06, - "loss": 0.859, + "learning_rate": 1.0673379307662856e-06, + "loss": 0.8982, "step": 30209 }, { - "epoch": 0.8572644721906924, + "epoch": 0.8560741307489586, "grad_norm": 0.0, - "learning_rate": 1.0496928797584183e-06, - "loss": 0.8914, + "learning_rate": 1.0669253963537285e-06, + "loss": 0.6862, "step": 30210 }, { - "epoch": 0.8572928490351873, + "epoch": 0.856102468191221, "grad_norm": 0.0, - "learning_rate": 1.0492830045956037e-06, - "loss": 0.8146, + "learning_rate": 1.0665129371877748e-06, + "loss": 0.8092, "step": 30211 }, { - "epoch": 0.8573212258796822, + "epoch": 0.8561308056334835, "grad_norm": 0.0, - "learning_rate": 1.0488732050398986e-06, - "loss": 0.8124, + "learning_rate": 1.0661005532719027e-06, + "loss": 0.7912, "step": 30212 }, { - "epoch": 0.857349602724177, + "epoch": 0.856159143075746, "grad_norm": 0.0, - "learning_rate": 1.0484634810947658e-06, - "loss": 0.8784, + "learning_rate": 1.0656882446095795e-06, + "loss": 0.8452, "step": 30213 }, { - "epoch": 0.857377979568672, + "epoch": 0.8561874805180084, "grad_norm": 0.0, - "learning_rate": 1.0480538327636614e-06, - "loss": 0.9319, + "learning_rate": 1.0652760112042837e-06, + "loss": 0.8139, "step": 30214 }, { - "epoch": 0.8574063564131669, + "epoch": 0.8562158179602709, "grad_norm": 0.0, - "learning_rate": 1.0476442600500503e-06, - "loss": 0.9042, + "learning_rate": 1.0648638530594867e-06, + "loss": 0.8686, "step": 30215 }, { - "epoch": 0.8574347332576617, + "epoch": 0.8562441554025334, "grad_norm": 0.0, - "learning_rate": 1.0472347629573886e-06, - "loss": 0.8392, + "learning_rate": 1.0644517701786571e-06, + "loss": 0.6955, "step": 30216 }, { - "epoch": 0.8574631101021566, + "epoch": 0.8562724928447958, "grad_norm": 0.0, - "learning_rate": 1.0468253414891393e-06, - "loss": 0.8451, + "learning_rate": 1.0640397625652688e-06, + "loss": 0.7899, "step": 30217 }, { - "epoch": 0.8574914869466516, + "epoch": 0.8563008302870583, "grad_norm": 0.0, - "learning_rate": 1.0464159956487596e-06, - "loss": 0.8501, + "learning_rate": 1.0636278302227921e-06, + "loss": 0.7462, "step": 30218 }, { - "epoch": 0.8575198637911464, + "epoch": 0.8563291677293208, "grad_norm": 0.0, - "learning_rate": 1.0460067254397043e-06, - "loss": 0.7685, + "learning_rate": 1.0632159731546965e-06, + "loss": 0.8284, "step": 30219 }, { - "epoch": 0.8575482406356413, + "epoch": 0.8563575051715833, "grad_norm": 0.0, - "learning_rate": 1.0455975308654332e-06, - "loss": 0.8012, + "learning_rate": 1.0628041913644538e-06, + "loss": 0.8328, "step": 30220 }, { - "epoch": 0.8575766174801362, + "epoch": 0.8563858426138456, "grad_norm": 0.0, - "learning_rate": 1.0451884119294043e-06, - "loss": 0.7668, + "learning_rate": 1.0623924848555279e-06, + "loss": 0.8245, "step": 30221 }, { - "epoch": 0.8576049943246311, + "epoch": 0.8564141800561081, "grad_norm": 0.0, - "learning_rate": 1.0447793686350694e-06, - "loss": 0.7883, + "learning_rate": 1.0619808536313892e-06, + "loss": 0.689, "step": 30222 }, { - "epoch": 0.857633371169126, + "epoch": 0.8564425174983706, "grad_norm": 0.0, - "learning_rate": 1.0443704009858857e-06, - "loss": 0.8036, + "learning_rate": 1.061569297695506e-06, + "loss": 0.8578, "step": 30223 }, { - "epoch": 0.8576617480136209, + "epoch": 0.856470854940633, "grad_norm": 0.0, - "learning_rate": 1.0439615089853094e-06, - "loss": 0.9032, + "learning_rate": 1.0611578170513426e-06, + "loss": 0.8025, "step": 30224 }, { - "epoch": 0.8576901248581158, + "epoch": 0.8564991923828955, "grad_norm": 0.0, - "learning_rate": 1.0435526926367924e-06, - "loss": 0.8613, + "learning_rate": 1.060746411702368e-06, + "loss": 0.75, "step": 30225 }, { - "epoch": 0.8577185017026107, + "epoch": 0.856527529825158, "grad_norm": 0.0, - "learning_rate": 1.0431439519437869e-06, - "loss": 0.7081, + "learning_rate": 1.060335081652043e-06, + "loss": 0.6893, "step": 30226 }, { - "epoch": 0.8577468785471055, + "epoch": 0.8565558672674204, "grad_norm": 0.0, - "learning_rate": 1.0427352869097495e-06, - "loss": 0.7344, + "learning_rate": 1.0599238269038359e-06, + "loss": 0.8976, "step": 30227 }, { - "epoch": 0.8577752553916005, + "epoch": 0.8565842047096829, "grad_norm": 0.0, - "learning_rate": 1.0423266975381274e-06, - "loss": 0.7542, + "learning_rate": 1.0595126474612105e-06, + "loss": 0.7759, "step": 30228 }, { - "epoch": 0.8578036322360953, + "epoch": 0.8566125421519454, "grad_norm": 0.0, - "learning_rate": 1.0419181838323743e-06, - "loss": 0.8453, + "learning_rate": 1.0591015433276308e-06, + "loss": 0.8157, "step": 30229 }, { - "epoch": 0.8578320090805902, + "epoch": 0.8566408795942079, "grad_norm": 0.0, - "learning_rate": 1.0415097457959433e-06, - "loss": 0.8249, + "learning_rate": 1.0586905145065573e-06, + "loss": 0.7309, "step": 30230 }, { - "epoch": 0.8578603859250852, + "epoch": 0.8566692170364703, "grad_norm": 0.0, - "learning_rate": 1.0411013834322791e-06, - "loss": 0.8021, + "learning_rate": 1.0582795610014573e-06, + "loss": 0.7626, "step": 30231 }, { - "epoch": 0.85788876276958, + "epoch": 0.8566975544787327, "grad_norm": 0.0, - "learning_rate": 1.040693096744837e-06, - "loss": 0.9249, + "learning_rate": 1.0578686828157859e-06, + "loss": 0.8447, "step": 30232 }, { - "epoch": 0.8579171396140749, + "epoch": 0.8567258919209952, "grad_norm": 0.0, - "learning_rate": 1.0402848857370617e-06, - "loss": 0.8316, + "learning_rate": 1.05745787995301e-06, + "loss": 0.8387, "step": 30233 }, { - "epoch": 0.8579455164585698, + "epoch": 0.8567542293632576, "grad_norm": 0.0, - "learning_rate": 1.039876750412403e-06, - "loss": 0.7838, + "learning_rate": 1.057047152416585e-06, + "loss": 0.8039, "step": 30234 }, { - "epoch": 0.8579738933030647, + "epoch": 0.8567825668055201, "grad_norm": 0.0, - "learning_rate": 1.039468690774309e-06, - "loss": 0.8858, + "learning_rate": 1.056636500209972e-06, + "loss": 0.7832, "step": 30235 }, { - "epoch": 0.8580022701475596, + "epoch": 0.8568109042477826, "grad_norm": 0.0, - "learning_rate": 1.0390607068262248e-06, - "loss": 0.7366, + "learning_rate": 1.0562259233366334e-06, + "loss": 0.817, "step": 30236 }, { - "epoch": 0.8580306469920544, + "epoch": 0.8568392416900451, "grad_norm": 0.0, - "learning_rate": 1.0386527985715977e-06, - "loss": 0.826, + "learning_rate": 1.0558154218000227e-06, + "loss": 0.7183, "step": 30237 }, { - "epoch": 0.8580590238365494, + "epoch": 0.8568675791323075, "grad_norm": 0.0, - "learning_rate": 1.038244966013875e-06, - "loss": 0.8214, + "learning_rate": 1.0554049956035994e-06, + "loss": 0.8786, "step": 30238 }, { - "epoch": 0.8580874006810443, + "epoch": 0.85689591657457, "grad_norm": 0.0, - "learning_rate": 1.037837209156497e-06, - "loss": 0.8304, + "learning_rate": 1.054994644750824e-06, + "loss": 0.7983, "step": 30239 }, { - "epoch": 0.8581157775255391, + "epoch": 0.8569242540168325, "grad_norm": 0.0, - "learning_rate": 1.0374295280029123e-06, - "loss": 0.8813, + "learning_rate": 1.0545843692451495e-06, + "loss": 0.7937, "step": 30240 }, { - "epoch": 0.8581441543700341, + "epoch": 0.8569525914590949, "grad_norm": 0.0, - "learning_rate": 1.0370219225565647e-06, - "loss": 0.8266, + "learning_rate": 1.054174169090031e-06, + "loss": 0.7968, "step": 30241 }, { - "epoch": 0.858172531214529, + "epoch": 0.8569809289013574, "grad_norm": 0.0, - "learning_rate": 1.0366143928208938e-06, - "loss": 0.8003, + "learning_rate": 1.0537640442889263e-06, + "loss": 0.8503, "step": 30242 }, { - "epoch": 0.8582009080590238, + "epoch": 0.8570092663436198, "grad_norm": 0.0, - "learning_rate": 1.0362069387993434e-06, - "loss": 0.8006, + "learning_rate": 1.0533539948452886e-06, + "loss": 0.834, "step": 30243 }, { - "epoch": 0.8582292849035187, + "epoch": 0.8570376037858823, "grad_norm": 0.0, - "learning_rate": 1.0357995604953597e-06, - "loss": 0.8366, + "learning_rate": 1.0529440207625752e-06, + "loss": 0.8193, "step": 30244 }, { - "epoch": 0.8582576617480137, + "epoch": 0.8570659412281447, "grad_norm": 0.0, - "learning_rate": 1.0353922579123765e-06, - "loss": 0.7174, + "learning_rate": 1.0525341220442342e-06, + "loss": 0.9002, "step": 30245 }, { - "epoch": 0.8582860385925085, + "epoch": 0.8570942786704072, "grad_norm": 0.0, - "learning_rate": 1.034985031053839e-06, - "loss": 0.8398, + "learning_rate": 1.052124298693723e-06, + "loss": 0.7768, "step": 30246 }, { - "epoch": 0.8583144154370034, + "epoch": 0.8571226161126697, "grad_norm": 0.0, - "learning_rate": 1.0345778799231854e-06, - "loss": 0.8239, + "learning_rate": 1.0517145507144889e-06, + "loss": 0.7262, "step": 30247 }, { - "epoch": 0.8583427922814983, + "epoch": 0.8571509535549321, "grad_norm": 0.0, - "learning_rate": 1.0341708045238553e-06, - "loss": 0.7521, + "learning_rate": 1.0513048781099867e-06, + "loss": 0.8562, "step": 30248 }, { - "epoch": 0.8583711691259932, + "epoch": 0.8571792909971946, "grad_norm": 0.0, - "learning_rate": 1.033763804859289e-06, - "loss": 0.711, + "learning_rate": 1.0508952808836682e-06, + "loss": 0.7673, "step": 30249 }, { - "epoch": 0.8583995459704881, + "epoch": 0.8572076284394571, "grad_norm": 0.0, - "learning_rate": 1.0333568809329219e-06, - "loss": 0.7376, + "learning_rate": 1.0504857590389805e-06, + "loss": 0.876, "step": 30250 }, { - "epoch": 0.8584279228149829, + "epoch": 0.8572359658817195, "grad_norm": 0.0, - "learning_rate": 1.0329500327481924e-06, - "loss": 0.8894, + "learning_rate": 1.0500763125793745e-06, + "loss": 0.8463, "step": 30251 }, { - "epoch": 0.8584562996594779, + "epoch": 0.857264303323982, "grad_norm": 0.0, - "learning_rate": 1.0325432603085384e-06, - "loss": 0.7317, + "learning_rate": 1.0496669415083006e-06, + "loss": 0.8195, "step": 30252 }, { - "epoch": 0.8584846765039728, + "epoch": 0.8572926407662445, "grad_norm": 0.0, - "learning_rate": 1.032136563617392e-06, - "loss": 0.7145, + "learning_rate": 1.0492576458292036e-06, + "loss": 0.7537, "step": 30253 }, { - "epoch": 0.8585130533484676, + "epoch": 0.857320978208507, "grad_norm": 0.0, - "learning_rate": 1.0317299426781924e-06, - "loss": 0.766, + "learning_rate": 1.0488484255455344e-06, + "loss": 0.7795, "step": 30254 }, { - "epoch": 0.8585414301929626, + "epoch": 0.8573493156507693, "grad_norm": 0.0, - "learning_rate": 1.0313233974943748e-06, - "loss": 0.7903, + "learning_rate": 1.048439280660738e-06, + "loss": 0.8452, "step": 30255 }, { - "epoch": 0.8585698070374574, + "epoch": 0.8573776530930318, "grad_norm": 0.0, - "learning_rate": 1.0309169280693698e-06, - "loss": 0.8231, + "learning_rate": 1.0480302111782614e-06, + "loss": 0.7622, "step": 30256 }, { - "epoch": 0.8585981838819523, + "epoch": 0.8574059905352943, "grad_norm": 0.0, - "learning_rate": 1.0305105344066125e-06, - "loss": 0.7363, + "learning_rate": 1.0476212171015532e-06, + "loss": 0.7665, "step": 30257 }, { - "epoch": 0.8586265607264473, + "epoch": 0.8574343279775567, "grad_norm": 0.0, - "learning_rate": 1.0301042165095377e-06, - "loss": 0.8205, + "learning_rate": 1.0472122984340528e-06, + "loss": 0.8332, "step": 30258 }, { - "epoch": 0.8586549375709421, + "epoch": 0.8574626654198192, "grad_norm": 0.0, - "learning_rate": 1.0296979743815739e-06, - "loss": 0.8538, + "learning_rate": 1.0468034551792083e-06, + "loss": 0.7463, "step": 30259 }, { - "epoch": 0.858683314415437, + "epoch": 0.8574910028620817, "grad_norm": 0.0, - "learning_rate": 1.0292918080261538e-06, - "loss": 0.7704, + "learning_rate": 1.046394687340465e-06, + "loss": 0.8427, "step": 30260 }, { - "epoch": 0.8587116912599319, + "epoch": 0.8575193403043442, "grad_norm": 0.0, - "learning_rate": 1.0288857174467127e-06, - "loss": 0.6984, + "learning_rate": 1.0459859949212625e-06, + "loss": 0.8212, "step": 30261 }, { - "epoch": 0.8587400681044268, + "epoch": 0.8575476777466066, "grad_norm": 0.0, - "learning_rate": 1.028479702646672e-06, - "loss": 0.6125, + "learning_rate": 1.0455773779250466e-06, + "loss": 0.7726, "step": 30262 }, { - "epoch": 0.8587684449489217, + "epoch": 0.8575760151888691, "grad_norm": 0.0, - "learning_rate": 1.028073763629469e-06, - "loss": 0.816, + "learning_rate": 1.045168836355256e-06, + "loss": 0.7805, "step": 30263 }, { - "epoch": 0.8587968217934165, + "epoch": 0.8576043526311316, "grad_norm": 0.0, - "learning_rate": 1.0276679003985323e-06, - "loss": 0.78, + "learning_rate": 1.044760370215333e-06, + "loss": 0.8391, "step": 30264 }, { - "epoch": 0.8588251986379115, + "epoch": 0.8576326900733939, "grad_norm": 0.0, - "learning_rate": 1.0272621129572858e-06, - "loss": 0.8352, + "learning_rate": 1.0443519795087209e-06, + "loss": 0.8348, "step": 30265 }, { - "epoch": 0.8588535754824064, + "epoch": 0.8576610275156564, "grad_norm": 0.0, - "learning_rate": 1.0268564013091598e-06, - "loss": 0.8116, + "learning_rate": 1.0439436642388555e-06, + "loss": 0.782, "step": 30266 }, { - "epoch": 0.8588819523269012, + "epoch": 0.8576893649579189, "grad_norm": 0.0, - "learning_rate": 1.026450765457584e-06, - "loss": 0.8311, + "learning_rate": 1.043535424409179e-06, + "loss": 0.7651, "step": 30267 }, { - "epoch": 0.8589103291713961, + "epoch": 0.8577177024001814, "grad_norm": 0.0, - "learning_rate": 1.0260452054059788e-06, - "loss": 0.7205, + "learning_rate": 1.043127260023129e-06, + "loss": 0.864, "step": 30268 }, { - "epoch": 0.8589387060158911, + "epoch": 0.8577460398424438, "grad_norm": 0.0, - "learning_rate": 1.025639721157775e-06, - "loss": 0.7022, + "learning_rate": 1.0427191710841444e-06, + "loss": 0.8462, "step": 30269 }, { - "epoch": 0.8589670828603859, + "epoch": 0.8577743772847063, "grad_norm": 0.0, - "learning_rate": 1.0252343127163943e-06, - "loss": 0.6886, + "learning_rate": 1.0423111575956646e-06, + "loss": 0.771, "step": 30270 }, { - "epoch": 0.8589954597048808, + "epoch": 0.8578027147269688, "grad_norm": 0.0, - "learning_rate": 1.024828980085264e-06, + "learning_rate": 1.0419032195611223e-06, "loss": 0.7745, "step": 30271 }, { - "epoch": 0.8590238365493758, + "epoch": 0.8578310521692312, "grad_norm": 0.0, - "learning_rate": 1.0244237232678067e-06, - "loss": 0.8367, + "learning_rate": 1.0414953569839558e-06, + "loss": 0.7396, "step": 30272 }, { - "epoch": 0.8590522133938706, + "epoch": 0.8578593896114937, "grad_norm": 0.0, - "learning_rate": 1.0240185422674453e-06, - "loss": 0.7615, + "learning_rate": 1.0410875698676014e-06, + "loss": 0.7647, "step": 30273 }, { - "epoch": 0.8590805902383655, + "epoch": 0.8578877270537562, "grad_norm": 0.0, - "learning_rate": 1.0236134370876016e-06, - "loss": 0.7852, + "learning_rate": 1.040679858215493e-06, + "loss": 0.8276, "step": 30274 }, { - "epoch": 0.8591089670828603, + "epoch": 0.8579160644960185, "grad_norm": 0.0, - "learning_rate": 1.0232084077317017e-06, - "loss": 0.8626, + "learning_rate": 1.0402722220310656e-06, + "loss": 0.9141, "step": 30275 }, { - "epoch": 0.8591373439273553, + "epoch": 0.857944401938281, "grad_norm": 0.0, - "learning_rate": 1.0228034542031617e-06, - "loss": 0.8388, + "learning_rate": 1.039864661317751e-06, + "loss": 0.7338, "step": 30276 }, { - "epoch": 0.8591657207718502, + "epoch": 0.8579727393805435, "grad_norm": 0.0, - "learning_rate": 1.0223985765054024e-06, - "loss": 0.8754, + "learning_rate": 1.039457176078984e-06, + "loss": 0.7123, "step": 30277 }, { - "epoch": 0.859194097616345, + "epoch": 0.858001076822806, "grad_norm": 0.0, - "learning_rate": 1.0219937746418496e-06, - "loss": 0.8258, + "learning_rate": 1.0390497663181975e-06, + "loss": 0.788, "step": 30278 }, { - "epoch": 0.85922247446084, + "epoch": 0.8580294142650684, "grad_norm": 0.0, - "learning_rate": 1.0215890486159174e-06, - "loss": 0.8535, + "learning_rate": 1.038642432038821e-06, + "loss": 0.7615, "step": 30279 }, { - "epoch": 0.8592508513053349, + "epoch": 0.8580577517073309, "grad_norm": 0.0, - "learning_rate": 1.0211843984310254e-06, - "loss": 0.8165, + "learning_rate": 1.0382351732442876e-06, + "loss": 0.8964, "step": 30280 }, { - "epoch": 0.8592792281498297, + "epoch": 0.8580860891495934, "grad_norm": 0.0, - "learning_rate": 1.0207798240905954e-06, - "loss": 0.8407, + "learning_rate": 1.0378279899380261e-06, + "loss": 0.7957, "step": 30281 }, { - "epoch": 0.8593076049943247, + "epoch": 0.8581144265918558, "grad_norm": 0.0, - "learning_rate": 1.0203753255980387e-06, - "loss": 0.8232, + "learning_rate": 1.0374208821234688e-06, + "loss": 0.8293, "step": 30282 }, { - "epoch": 0.8593359818388195, + "epoch": 0.8581427640341183, "grad_norm": 0.0, - "learning_rate": 1.0199709029567761e-06, - "loss": 0.7412, + "learning_rate": 1.0370138498040449e-06, + "loss": 0.7423, "step": 30283 }, { - "epoch": 0.8593643586833144, + "epoch": 0.8581711014763808, "grad_norm": 0.0, - "learning_rate": 1.019566556170225e-06, - "loss": 0.7872, + "learning_rate": 1.0366068929831797e-06, + "loss": 0.7652, "step": 30284 }, { - "epoch": 0.8593927355278093, + "epoch": 0.8581994389186433, "grad_norm": 0.0, - "learning_rate": 1.019162285241796e-06, - "loss": 0.7387, + "learning_rate": 1.0362000116643024e-06, + "loss": 0.8204, "step": 30285 }, { - "epoch": 0.8594211123723042, + "epoch": 0.8582277763609056, "grad_norm": 0.0, - "learning_rate": 1.0187580901749084e-06, - "loss": 0.7625, + "learning_rate": 1.0357932058508434e-06, + "loss": 0.8543, "step": 30286 }, { - "epoch": 0.8594494892167991, + "epoch": 0.8582561138031681, "grad_norm": 0.0, - "learning_rate": 1.0183539709729761e-06, - "loss": 0.9139, + "learning_rate": 1.0353864755462262e-06, + "loss": 0.9058, "step": 30287 }, { - "epoch": 0.859477866061294, + "epoch": 0.8582844512454306, "grad_norm": 0.0, - "learning_rate": 1.0179499276394088e-06, - "loss": 0.7673, + "learning_rate": 1.0349798207538764e-06, + "loss": 0.85, "step": 30288 }, { - "epoch": 0.8595062429057889, + "epoch": 0.858312788687693, "grad_norm": 0.0, - "learning_rate": 1.017545960177624e-06, - "loss": 0.7388, + "learning_rate": 1.0345732414772224e-06, + "loss": 0.7908, "step": 30289 }, { - "epoch": 0.8595346197502838, + "epoch": 0.8583411261299555, "grad_norm": 0.0, - "learning_rate": 1.0171420685910328e-06, - "loss": 0.7676, + "learning_rate": 1.0341667377196863e-06, + "loss": 0.8517, "step": 30290 }, { - "epoch": 0.8595629965947786, + "epoch": 0.858369463572218, "grad_norm": 0.0, - "learning_rate": 1.016738252883045e-06, - "loss": 0.819, + "learning_rate": 1.0337603094846948e-06, + "loss": 0.7592, "step": 30291 }, { - "epoch": 0.8595913734392735, + "epoch": 0.8583978010144805, "grad_norm": 0.0, - "learning_rate": 1.0163345130570723e-06, - "loss": 0.7876, + "learning_rate": 1.0333539567756668e-06, + "loss": 0.826, "step": 30292 }, { - "epoch": 0.8596197502837685, + "epoch": 0.8584261384567429, "grad_norm": 0.0, - "learning_rate": 1.0159308491165264e-06, - "loss": 0.8331, + "learning_rate": 1.032947679596029e-06, + "loss": 0.8438, "step": 30293 }, { - "epoch": 0.8596481271282633, + "epoch": 0.8584544758990054, "grad_norm": 0.0, - "learning_rate": 1.015527261064817e-06, - "loss": 0.798, + "learning_rate": 1.0325414779492028e-06, + "loss": 0.8263, "step": 30294 }, { - "epoch": 0.8596765039727582, + "epoch": 0.8584828133412679, "grad_norm": 0.0, - "learning_rate": 1.0151237489053545e-06, - "loss": 0.7333, + "learning_rate": 1.03213535183861e-06, + "loss": 0.7601, "step": 30295 }, { - "epoch": 0.8597048808172532, + "epoch": 0.8585111507835302, "grad_norm": 0.0, - "learning_rate": 1.0147203126415427e-06, - "loss": 0.745, + "learning_rate": 1.03172930126767e-06, + "loss": 0.774, "step": 30296 }, { - "epoch": 0.859733257661748, + "epoch": 0.8585394882257927, "grad_norm": 0.0, - "learning_rate": 1.0143169522767926e-06, - "loss": 0.8654, + "learning_rate": 1.031323326239807e-06, + "loss": 0.7679, "step": 30297 }, { - "epoch": 0.8597616345062429, + "epoch": 0.8585678256680552, "grad_norm": 0.0, - "learning_rate": 1.0139136678145133e-06, - "loss": 0.7656, + "learning_rate": 1.0309174267584365e-06, + "loss": 0.7691, "step": 30298 }, { - "epoch": 0.8597900113507378, + "epoch": 0.8585961631103176, "grad_norm": 0.0, - "learning_rate": 1.013510459258108e-06, - "loss": 0.8795, + "learning_rate": 1.0305116028269812e-06, + "loss": 0.736, "step": 30299 }, { - "epoch": 0.8598183881952327, + "epoch": 0.8586245005525801, "grad_norm": 0.0, - "learning_rate": 1.0131073266109826e-06, - "loss": 0.8599, + "learning_rate": 1.0301058544488552e-06, + "loss": 0.909, "step": 30300 }, { - "epoch": 0.8598467650397276, + "epoch": 0.8586528379948426, "grad_norm": 0.0, - "learning_rate": 1.0127042698765465e-06, - "loss": 0.8456, + "learning_rate": 1.0297001816274775e-06, + "loss": 0.7208, "step": 30301 }, { - "epoch": 0.8598751418842224, + "epoch": 0.8586811754371051, "grad_norm": 0.0, - "learning_rate": 1.0123012890581985e-06, - "loss": 0.9191, + "learning_rate": 1.0292945843662694e-06, + "loss": 0.9248, "step": 30302 }, { - "epoch": 0.8599035187287174, + "epoch": 0.8587095128793675, "grad_norm": 0.0, - "learning_rate": 1.0118983841593467e-06, - "loss": 0.7082, + "learning_rate": 1.028889062668642e-06, + "loss": 0.6766, "step": 30303 }, { - "epoch": 0.8599318955732123, + "epoch": 0.85873785032163, "grad_norm": 0.0, - "learning_rate": 1.011495555183395e-06, - "loss": 0.7877, + "learning_rate": 1.0284836165380153e-06, + "loss": 0.8526, "step": 30304 }, { - "epoch": 0.8599602724177071, + "epoch": 0.8587661877638925, "grad_norm": 0.0, - "learning_rate": 1.011092802133742e-06, - "loss": 0.8803, + "learning_rate": 1.0280782459778006e-06, + "loss": 0.7605, "step": 30305 }, { - "epoch": 0.8599886492622021, + "epoch": 0.8587945252061548, "grad_norm": 0.0, - "learning_rate": 1.0106901250137924e-06, - "loss": 0.8368, + "learning_rate": 1.027672950991414e-06, + "loss": 0.7348, "step": 30306 }, { - "epoch": 0.860017026106697, + "epoch": 0.8588228626484173, "grad_norm": 0.0, - "learning_rate": 1.0102875238269494e-06, - "loss": 0.7628, + "learning_rate": 1.02726773158227e-06, + "loss": 0.839, "step": 30307 }, { - "epoch": 0.8600454029511918, + "epoch": 0.8588512000906798, "grad_norm": 0.0, - "learning_rate": 1.0098849985766067e-06, - "loss": 0.7524, + "learning_rate": 1.0268625877537818e-06, + "loss": 0.7724, "step": 30308 }, { - "epoch": 0.8600737797956867, + "epoch": 0.8588795375329423, "grad_norm": 0.0, - "learning_rate": 1.0094825492661754e-06, - "loss": 0.7019, + "learning_rate": 1.0264575195093628e-06, + "loss": 0.8086, "step": 30309 }, { - "epoch": 0.8601021566401816, + "epoch": 0.8589078749752047, "grad_norm": 0.0, - "learning_rate": 1.0090801758990465e-06, - "loss": 0.8515, + "learning_rate": 1.0260525268524258e-06, + "loss": 0.808, "step": 30310 }, { - "epoch": 0.8601305334846765, + "epoch": 0.8589362124174672, "grad_norm": 0.0, - "learning_rate": 1.008677878478621e-06, - "loss": 0.8965, + "learning_rate": 1.0256476097863788e-06, + "loss": 0.8638, "step": 30311 }, { - "epoch": 0.8601589103291714, + "epoch": 0.8589645498597297, "grad_norm": 0.0, - "learning_rate": 1.0082756570082997e-06, - "loss": 0.8235, + "learning_rate": 1.025242768314637e-06, + "loss": 0.7162, "step": 30312 }, { - "epoch": 0.8601872871736663, + "epoch": 0.8589928873019921, "grad_norm": 0.0, - "learning_rate": 1.0078735114914761e-06, - "loss": 0.7856, + "learning_rate": 1.0248380024406057e-06, + "loss": 0.7854, "step": 30313 }, { - "epoch": 0.8602156640181612, + "epoch": 0.8590212247442546, "grad_norm": 0.0, - "learning_rate": 1.00747144193155e-06, - "loss": 0.8107, + "learning_rate": 1.0244333121676964e-06, + "loss": 0.7272, "step": 30314 }, { - "epoch": 0.860244040862656, + "epoch": 0.8590495621865171, "grad_norm": 0.0, - "learning_rate": 1.0070694483319187e-06, - "loss": 0.8241, + "learning_rate": 1.0240286974993207e-06, + "loss": 0.8109, "step": 30315 }, { - "epoch": 0.860272417707151, + "epoch": 0.8590778996287795, "grad_norm": 0.0, - "learning_rate": 1.006667530695974e-06, - "loss": 0.8154, + "learning_rate": 1.023624158438883e-06, + "loss": 0.8263, "step": 30316 }, { - "epoch": 0.8603007945516459, + "epoch": 0.859106237071042, "grad_norm": 0.0, - "learning_rate": 1.0062656890271116e-06, - "loss": 0.7921, + "learning_rate": 1.0232196949897922e-06, + "loss": 0.7771, "step": 30317 }, { - "epoch": 0.8603291713961407, + "epoch": 0.8591345745133044, "grad_norm": 0.0, - "learning_rate": 1.0058639233287304e-06, - "loss": 0.8684, + "learning_rate": 1.0228153071554559e-06, + "loss": 0.7501, "step": 30318 }, { - "epoch": 0.8603575482406356, + "epoch": 0.8591629119555669, "grad_norm": 0.0, - "learning_rate": 1.0054622336042198e-06, - "loss": 0.7991, + "learning_rate": 1.022410994939279e-06, + "loss": 0.817, "step": 30319 }, { - "epoch": 0.8603859250851306, + "epoch": 0.8591912493978293, "grad_norm": 0.0, - "learning_rate": 1.0050606198569723e-06, - "loss": 0.7494, + "learning_rate": 1.022006758344668e-06, + "loss": 0.8397, "step": 30320 }, { - "epoch": 0.8604143019296254, + "epoch": 0.8592195868400918, "grad_norm": 0.0, - "learning_rate": 1.0046590820903845e-06, - "loss": 0.7775, + "learning_rate": 1.0216025973750277e-06, + "loss": 0.8756, "step": 30321 }, { - "epoch": 0.8604426787741203, + "epoch": 0.8592479242823543, "grad_norm": 0.0, - "learning_rate": 1.0042576203078447e-06, - "loss": 0.7809, + "learning_rate": 1.0211985120337631e-06, + "loss": 0.8075, "step": 30322 }, { - "epoch": 0.8604710556186153, + "epoch": 0.8592762617246167, "grad_norm": 0.0, - "learning_rate": 1.0038562345127445e-06, - "loss": 0.7913, + "learning_rate": 1.0207945023242794e-06, + "loss": 0.8062, "step": 30323 }, { - "epoch": 0.8604994324631101, + "epoch": 0.8593045991668792, "grad_norm": 0.0, - "learning_rate": 1.0034549247084757e-06, - "loss": 0.8492, + "learning_rate": 1.020390568249976e-06, + "loss": 0.7743, "step": 30324 }, { - "epoch": 0.860527809307605, + "epoch": 0.8593329366091417, "grad_norm": 0.0, - "learning_rate": 1.0030536908984268e-06, - "loss": 0.8247, + "learning_rate": 1.019986709814257e-06, + "loss": 0.8623, "step": 30325 }, { - "epoch": 0.8605561861520998, + "epoch": 0.8593612740514042, "grad_norm": 0.0, - "learning_rate": 1.0026525330859904e-06, - "loss": 0.7455, + "learning_rate": 1.0195829270205272e-06, + "loss": 0.8801, "step": 30326 }, { - "epoch": 0.8605845629965948, + "epoch": 0.8593896114936665, "grad_norm": 0.0, - "learning_rate": 1.0022514512745495e-06, - "loss": 0.8012, + "learning_rate": 1.0191792198721829e-06, + "loss": 0.8327, "step": 30327 }, { - "epoch": 0.8606129398410897, + "epoch": 0.859417948935929, "grad_norm": 0.0, - "learning_rate": 1.001850445467496e-06, - "loss": 0.8147, + "learning_rate": 1.0187755883726291e-06, + "loss": 0.8674, "step": 30328 }, { - "epoch": 0.8606413166855845, + "epoch": 0.8594462863781915, "grad_norm": 0.0, - "learning_rate": 1.001449515668218e-06, - "loss": 0.8292, + "learning_rate": 1.018372032525261e-06, + "loss": 0.8328, "step": 30329 }, { - "epoch": 0.8606696935300795, + "epoch": 0.8594746238204539, "grad_norm": 0.0, - "learning_rate": 1.0010486618800986e-06, - "loss": 0.7278, + "learning_rate": 1.0179685523334814e-06, + "loss": 0.7601, "step": 30330 }, { - "epoch": 0.8606980703745744, + "epoch": 0.8595029612627164, "grad_norm": 0.0, - "learning_rate": 1.0006478841065259e-06, - "loss": 0.8385, + "learning_rate": 1.0175651478006898e-06, + "loss": 0.882, "step": 30331 }, { - "epoch": 0.8607264472190692, + "epoch": 0.8595312987049789, "grad_norm": 0.0, - "learning_rate": 1.0002471823508864e-06, - "loss": 0.8043, + "learning_rate": 1.0171618189302802e-06, + "loss": 0.7391, "step": 30332 }, { - "epoch": 0.8607548240635642, + "epoch": 0.8595596361472414, "grad_norm": 0.0, - "learning_rate": 9.998465566165627e-07, - "loss": 0.7102, + "learning_rate": 1.0167585657256528e-06, + "loss": 0.6824, "step": 30333 }, { - "epoch": 0.860783200908059, + "epoch": 0.8595879735895038, "grad_norm": 0.0, - "learning_rate": 9.994460069069389e-07, - "loss": 0.8361, + "learning_rate": 1.0163553881902032e-06, + "loss": 0.9123, "step": 30334 }, { - "epoch": 0.8608115777525539, + "epoch": 0.8596163110317663, "grad_norm": 0.0, - "learning_rate": 9.990455332254012e-07, - "loss": 0.6816, + "learning_rate": 1.0159522863273285e-06, + "loss": 0.8143, "step": 30335 }, { - "epoch": 0.8608399545970488, + "epoch": 0.8596446484740288, "grad_norm": 0.0, - "learning_rate": 9.986451355753279e-07, - "loss": 0.8423, + "learning_rate": 1.015549260140426e-06, + "loss": 0.8538, "step": 30336 }, { - "epoch": 0.8608683314415437, + "epoch": 0.8596729859162912, "grad_norm": 0.0, - "learning_rate": 9.982448139601053e-07, - "loss": 0.8395, + "learning_rate": 1.0151463096328863e-06, + "loss": 0.788, "step": 30337 }, { - "epoch": 0.8608967082860386, + "epoch": 0.8597013233585536, "grad_norm": 0.0, - "learning_rate": 9.97844568383114e-07, - "loss": 0.8001, + "learning_rate": 1.0147434348081052e-06, + "loss": 0.8385, "step": 30338 }, { - "epoch": 0.8609250851305335, + "epoch": 0.8597296608008161, "grad_norm": 0.0, - "learning_rate": 9.974443988477312e-07, - "loss": 0.7749, + "learning_rate": 1.0143406356694797e-06, + "loss": 0.734, "step": 30339 }, { - "epoch": 0.8609534619750284, + "epoch": 0.8597579982430785, "grad_norm": 0.0, - "learning_rate": 9.970443053573442e-07, - "loss": 0.8149, + "learning_rate": 1.0139379122203974e-06, + "loss": 0.7234, "step": 30340 }, { - "epoch": 0.8609818388195233, + "epoch": 0.859786335685341, "grad_norm": 0.0, - "learning_rate": 9.96644287915326e-07, - "loss": 0.8247, + "learning_rate": 1.0135352644642538e-06, + "loss": 0.8382, "step": 30341 }, { - "epoch": 0.8610102156640181, + "epoch": 0.8598146731276035, "grad_norm": 0.0, - "learning_rate": 9.962443465250592e-07, - "loss": 0.8578, + "learning_rate": 1.0131326924044393e-06, + "loss": 0.8536, "step": 30342 }, { - "epoch": 0.861038592508513, + "epoch": 0.859843010569866, "grad_norm": 0.0, - "learning_rate": 9.958444811899225e-07, - "loss": 0.8276, + "learning_rate": 1.0127301960443448e-06, + "loss": 0.7842, "step": 30343 }, { - "epoch": 0.861066969353008, + "epoch": 0.8598713480121284, "grad_norm": 0.0, - "learning_rate": 9.954446919132898e-07, - "loss": 0.8404, + "learning_rate": 1.0123277753873629e-06, + "loss": 0.7164, "step": 30344 }, { - "epoch": 0.8610953461975028, + "epoch": 0.8598996854543909, "grad_norm": 0.0, - "learning_rate": 9.950449786985416e-07, - "loss": 0.7904, + "learning_rate": 1.0119254304368798e-06, + "loss": 0.72, "step": 30345 }, { - "epoch": 0.8611237230419977, + "epoch": 0.8599280228966534, "grad_norm": 0.0, - "learning_rate": 9.946453415490542e-07, - "loss": 0.8413, + "learning_rate": 1.0115231611962861e-06, + "loss": 0.7695, "step": 30346 }, { - "epoch": 0.8611520998864927, + "epoch": 0.8599563603389158, "grad_norm": 0.0, - "learning_rate": 9.942457804682015e-07, - "loss": 0.7923, + "learning_rate": 1.0111209676689715e-06, + "loss": 0.8455, "step": 30347 }, { - "epoch": 0.8611804767309875, + "epoch": 0.8599846977811783, "grad_norm": 0.0, - "learning_rate": 9.938462954593587e-07, - "loss": 0.8295, + "learning_rate": 1.010718849858322e-06, + "loss": 0.9431, "step": 30348 }, { - "epoch": 0.8612088535754824, + "epoch": 0.8600130352234407, "grad_norm": 0.0, - "learning_rate": 9.934468865259038e-07, - "loss": 0.9045, + "learning_rate": 1.0103168077677284e-06, + "loss": 0.8363, "step": 30349 }, { - "epoch": 0.8612372304199774, + "epoch": 0.8600413726657032, "grad_norm": 0.0, - "learning_rate": 9.93047553671206e-07, - "loss": 0.9049, + "learning_rate": 1.0099148414005723e-06, + "loss": 0.8103, "step": 30350 }, { - "epoch": 0.8612656072644722, + "epoch": 0.8600697101079656, "grad_norm": 0.0, - "learning_rate": 9.926482968986396e-07, - "loss": 0.7451, + "learning_rate": 1.009512950760242e-06, + "loss": 0.8199, "step": 30351 }, { - "epoch": 0.8612939841089671, + "epoch": 0.8600980475502281, "grad_norm": 0.0, - "learning_rate": 9.9224911621158e-07, - "loss": 0.8098, + "learning_rate": 1.0091111358501238e-06, + "loss": 0.8273, "step": 30352 }, { - "epoch": 0.8613223609534619, + "epoch": 0.8601263849924906, "grad_norm": 0.0, - "learning_rate": 9.918500116133957e-07, - "loss": 0.7762, + "learning_rate": 1.0087093966736006e-06, + "loss": 0.8581, "step": 30353 }, { - "epoch": 0.8613507377979569, + "epoch": 0.860154722434753, "grad_norm": 0.0, - "learning_rate": 9.914509831074592e-07, - "loss": 0.7845, + "learning_rate": 1.0083077332340563e-06, + "loss": 0.7918, "step": 30354 }, { - "epoch": 0.8613791146424518, + "epoch": 0.8601830598770155, "grad_norm": 0.0, - "learning_rate": 9.910520306971405e-07, - "loss": 0.9228, + "learning_rate": 1.007906145534877e-06, + "loss": 0.7294, "step": 30355 }, { - "epoch": 0.8614074914869466, + "epoch": 0.860211397319278, "grad_norm": 0.0, - "learning_rate": 9.90653154385811e-07, - "loss": 0.7604, + "learning_rate": 1.0075046335794413e-06, + "loss": 0.7835, "step": 30356 }, { - "epoch": 0.8614358683314416, + "epoch": 0.8602397347615405, "grad_norm": 0.0, - "learning_rate": 9.902543541768407e-07, - "loss": 0.8426, + "learning_rate": 1.0071031973711354e-06, + "loss": 0.7706, "step": 30357 }, { - "epoch": 0.8614642451759364, + "epoch": 0.8602680722038029, "grad_norm": 0.0, - "learning_rate": 9.898556300735951e-07, - "loss": 0.6944, + "learning_rate": 1.0067018369133363e-06, + "loss": 0.7615, "step": 30358 }, { - "epoch": 0.8614926220204313, + "epoch": 0.8602964096460654, "grad_norm": 0.0, - "learning_rate": 9.89456982079444e-07, - "loss": 0.7755, + "learning_rate": 1.006300552209427e-06, + "loss": 0.8248, "step": 30359 }, { - "epoch": 0.8615209988649262, + "epoch": 0.8603247470883278, "grad_norm": 0.0, - "learning_rate": 9.890584101977573e-07, - "loss": 0.8515, + "learning_rate": 1.0058993432627884e-06, + "loss": 0.7589, "step": 30360 }, { - "epoch": 0.8615493757094211, + "epoch": 0.8603530845305902, "grad_norm": 0.0, - "learning_rate": 9.886599144318977e-07, - "loss": 0.7985, + "learning_rate": 1.0054982100767996e-06, + "loss": 0.896, "step": 30361 }, { - "epoch": 0.861577752553916, + "epoch": 0.8603814219728527, "grad_norm": 0.0, - "learning_rate": 9.88261494785232e-07, - "loss": 0.8807, + "learning_rate": 1.0050971526548413e-06, + "loss": 0.8611, "step": 30362 }, { - "epoch": 0.8616061293984109, + "epoch": 0.8604097594151152, "grad_norm": 0.0, - "learning_rate": 9.878631512611281e-07, - "loss": 0.8639, + "learning_rate": 1.0046961710002879e-06, + "loss": 0.7198, "step": 30363 }, { - "epoch": 0.8616345062429058, + "epoch": 0.8604380968573776, "grad_norm": 0.0, - "learning_rate": 9.874648838629485e-07, - "loss": 0.842, + "learning_rate": 1.0042952651165195e-06, + "loss": 0.815, "step": 30364 }, { - "epoch": 0.8616628830874007, + "epoch": 0.8604664342996401, "grad_norm": 0.0, - "learning_rate": 9.870666925940576e-07, - "loss": 0.8526, + "learning_rate": 1.0038944350069136e-06, + "loss": 0.7106, "step": 30365 }, { - "epoch": 0.8616912599318955, + "epoch": 0.8604947717419026, "grad_norm": 0.0, - "learning_rate": 9.866685774578212e-07, - "loss": 0.8813, + "learning_rate": 1.003493680674844e-06, + "loss": 0.6529, "step": 30366 }, { - "epoch": 0.8617196367763905, + "epoch": 0.8605231091841651, "grad_norm": 0.0, - "learning_rate": 9.862705384575988e-07, - "loss": 0.7932, + "learning_rate": 1.0030930021236884e-06, + "loss": 0.8077, "step": 30367 }, { - "epoch": 0.8617480136208854, + "epoch": 0.8605514466264275, "grad_norm": 0.0, - "learning_rate": 9.858725755967547e-07, - "loss": 0.784, + "learning_rate": 1.0026923993568228e-06, + "loss": 0.7614, "step": 30368 }, { - "epoch": 0.8617763904653802, + "epoch": 0.86057978406869, "grad_norm": 0.0, - "learning_rate": 9.854746888786493e-07, - "loss": 0.8767, + "learning_rate": 1.0022918723776175e-06, + "loss": 0.8098, "step": 30369 }, { - "epoch": 0.8618047673098751, + "epoch": 0.8606081215109525, "grad_norm": 0.0, - "learning_rate": 9.850768783066455e-07, - "loss": 0.7771, + "learning_rate": 1.0018914211894514e-06, + "loss": 0.8223, "step": 30370 }, { - "epoch": 0.8618331441543701, + "epoch": 0.8606364589532148, "grad_norm": 0.0, - "learning_rate": 9.84679143884102e-07, - "loss": 0.8429, + "learning_rate": 1.001491045795694e-06, + "loss": 0.782, "step": 30371 }, { - "epoch": 0.8618615209988649, + "epoch": 0.8606647963954773, "grad_norm": 0.0, - "learning_rate": 9.8428148561438e-07, - "loss": 0.6946, + "learning_rate": 1.0010907461997189e-06, + "loss": 0.7423, "step": 30372 }, { - "epoch": 0.8618898978433598, + "epoch": 0.8606931338377398, "grad_norm": 0.0, - "learning_rate": 9.838839035008374e-07, - "loss": 0.8295, + "learning_rate": 1.000690522404898e-06, + "loss": 0.8014, "step": 30373 }, { - "epoch": 0.8619182746878548, + "epoch": 0.8607214712800023, "grad_norm": 0.0, - "learning_rate": 9.834863975468323e-07, - "loss": 0.8185, + "learning_rate": 1.0002903744146019e-06, + "loss": 0.6997, "step": 30374 }, { - "epoch": 0.8619466515323496, + "epoch": 0.8607498087222647, "grad_norm": 0.0, - "learning_rate": 9.830889677557243e-07, - "loss": 0.8681, + "learning_rate": 9.998903022322026e-07, + "loss": 0.7261, "step": 30375 }, { - "epoch": 0.8619750283768445, + "epoch": 0.8607781461645272, "grad_norm": 0.0, - "learning_rate": 9.826916141308674e-07, - "loss": 0.8571, + "learning_rate": 9.994903058610706e-07, + "loss": 0.7921, "step": 30376 }, { - "epoch": 0.8620034052213393, + "epoch": 0.8608064836067897, "grad_norm": 0.0, - "learning_rate": 9.822943366756222e-07, - "loss": 0.795, + "learning_rate": 9.99090385304573e-07, + "loss": 0.711, "step": 30377 }, { - "epoch": 0.8620317820658343, + "epoch": 0.8608348210490521, "grad_norm": 0.0, - "learning_rate": 9.818971353933394e-07, - "loss": 0.756, + "learning_rate": 9.986905405660806e-07, + "loss": 0.9577, "step": 30378 }, { - "epoch": 0.8620601589103292, + "epoch": 0.8608631584913146, "grad_norm": 0.0, - "learning_rate": 9.815000102873772e-07, - "loss": 0.8257, + "learning_rate": 9.982907716489587e-07, + "loss": 0.7569, "step": 30379 }, { - "epoch": 0.862088535754824, + "epoch": 0.8608914959335771, "grad_norm": 0.0, - "learning_rate": 9.811029613610913e-07, - "loss": 0.7401, + "learning_rate": 9.978910785565765e-07, + "loss": 0.9012, "step": 30380 }, { - "epoch": 0.862116912599319, + "epoch": 0.8609198333758395, "grad_norm": 0.0, - "learning_rate": 9.807059886178327e-07, - "loss": 0.8455, + "learning_rate": 9.974914612923026e-07, + "loss": 0.7772, "step": 30381 }, { - "epoch": 0.8621452894438139, + "epoch": 0.8609481708181019, "grad_norm": 0.0, - "learning_rate": 9.80309092060956e-07, - "loss": 0.7878, + "learning_rate": 9.970919198594998e-07, + "loss": 0.7725, "step": 30382 }, { - "epoch": 0.8621736662883087, + "epoch": 0.8609765082603644, "grad_norm": 0.0, - "learning_rate": 9.799122716938158e-07, - "loss": 0.7779, + "learning_rate": 9.966924542615353e-07, + "loss": 0.8095, "step": 30383 }, { - "epoch": 0.8622020431328037, + "epoch": 0.8610048457026269, "grad_norm": 0.0, - "learning_rate": 9.79515527519761e-07, - "loss": 0.6936, + "learning_rate": 9.962930645017731e-07, + "loss": 0.7639, "step": 30384 }, { - "epoch": 0.8622304199772985, + "epoch": 0.8610331831448893, "grad_norm": 0.0, - "learning_rate": 9.791188595421409e-07, - "loss": 0.8165, + "learning_rate": 9.958937505835776e-07, + "loss": 0.8279, "step": 30385 }, { - "epoch": 0.8622587968217934, + "epoch": 0.8610615205871518, "grad_norm": 0.0, - "learning_rate": 9.787222677643139e-07, - "loss": 0.7582, + "learning_rate": 9.954945125103122e-07, + "loss": 0.8632, "step": 30386 }, { - "epoch": 0.8622871736662883, + "epoch": 0.8610898580294143, "grad_norm": 0.0, - "learning_rate": 9.783257521896228e-07, - "loss": 0.8646, + "learning_rate": 9.9509535028534e-07, + "loss": 0.8481, "step": 30387 }, { - "epoch": 0.8623155505107832, + "epoch": 0.8611181954716767, "grad_norm": 0.0, - "learning_rate": 9.779293128214206e-07, - "loss": 0.8655, + "learning_rate": 9.94696263912024e-07, + "loss": 0.8136, "step": 30388 }, { - "epoch": 0.8623439273552781, + "epoch": 0.8611465329139392, "grad_norm": 0.0, - "learning_rate": 9.775329496630559e-07, - "loss": 0.8589, + "learning_rate": 9.942972533937268e-07, + "loss": 0.7862, "step": 30389 }, { - "epoch": 0.862372304199773, + "epoch": 0.8611748703562017, "grad_norm": 0.0, - "learning_rate": 9.771366627178748e-07, - "loss": 0.8507, + "learning_rate": 9.938983187338068e-07, + "loss": 0.7909, "step": 30390 }, { - "epoch": 0.8624006810442679, + "epoch": 0.8612032077984642, "grad_norm": 0.0, - "learning_rate": 9.767404519892254e-07, - "loss": 0.7369, + "learning_rate": 9.934994599356275e-07, + "loss": 0.8235, "step": 30391 }, { - "epoch": 0.8624290578887628, + "epoch": 0.8612315452407265, "grad_norm": 0.0, - "learning_rate": 9.763443174804576e-07, - "loss": 0.7855, + "learning_rate": 9.931006770025442e-07, + "loss": 0.7921, "step": 30392 }, { - "epoch": 0.8624574347332576, + "epoch": 0.861259882682989, "grad_norm": 0.0, - "learning_rate": 9.759482591949132e-07, - "loss": 0.8177, + "learning_rate": 9.927019699379182e-07, + "loss": 0.7888, "step": 30393 }, { - "epoch": 0.8624858115777525, + "epoch": 0.8612882201252515, "grad_norm": 0.0, - "learning_rate": 9.755522771359382e-07, - "loss": 0.7366, + "learning_rate": 9.923033387451108e-07, + "loss": 0.7909, "step": 30394 }, { - "epoch": 0.8625141884222475, + "epoch": 0.8613165575675139, "grad_norm": 0.0, - "learning_rate": 9.751563713068824e-07, - "loss": 0.8682, + "learning_rate": 9.919047834274754e-07, + "loss": 0.6747, "step": 30395 }, { - "epoch": 0.8625425652667423, + "epoch": 0.8613448950097764, "grad_norm": 0.0, - "learning_rate": 9.747605417110828e-07, - "loss": 0.8215, + "learning_rate": 9.91506303988371e-07, + "loss": 0.7632, "step": 30396 }, { - "epoch": 0.8625709421112372, + "epoch": 0.8613732324520389, "grad_norm": 0.0, - "learning_rate": 9.74364788351888e-07, - "loss": 0.8355, + "learning_rate": 9.911079004311563e-07, + "loss": 0.7812, "step": 30397 }, { - "epoch": 0.8625993189557322, + "epoch": 0.8614015698943014, "grad_norm": 0.0, - "learning_rate": 9.73969111232641e-07, - "loss": 0.7755, + "learning_rate": 9.90709572759183e-07, + "loss": 0.771, "step": 30398 }, { - "epoch": 0.862627695800227, + "epoch": 0.8614299073365638, "grad_norm": 0.0, - "learning_rate": 9.735735103566802e-07, - "loss": 0.8145, + "learning_rate": 9.903113209758098e-07, + "loss": 0.807, "step": 30399 }, { - "epoch": 0.8626560726447219, + "epoch": 0.8614582447788263, "grad_norm": 0.0, - "learning_rate": 9.731779857273505e-07, - "loss": 0.8449, + "learning_rate": 9.89913145084388e-07, + "loss": 0.7735, "step": 30400 }, { - "epoch": 0.8626844494892169, + "epoch": 0.8614865822210888, "grad_norm": 0.0, - "learning_rate": 9.727825373479915e-07, - "loss": 0.7569, + "learning_rate": 9.89515045088275e-07, + "loss": 0.7772, "step": 30401 }, { - "epoch": 0.8627128263337117, + "epoch": 0.8615149196633511, "grad_norm": 0.0, - "learning_rate": 9.723871652219452e-07, - "loss": 0.6881, + "learning_rate": 9.89117020990824e-07, + "loss": 0.9465, "step": 30402 }, { - "epoch": 0.8627412031782066, + "epoch": 0.8615432571056136, "grad_norm": 0.0, - "learning_rate": 9.71991869352551e-07, - "loss": 0.8122, + "learning_rate": 9.887190727953844e-07, + "loss": 0.8226, "step": 30403 }, { - "epoch": 0.8627695800227014, + "epoch": 0.8615715945478761, "grad_norm": 0.0, - "learning_rate": 9.715966497431462e-07, - "loss": 0.8673, + "learning_rate": 9.88321200505311e-07, + "loss": 0.8157, "step": 30404 }, { - "epoch": 0.8627979568671964, + "epoch": 0.8615999319901386, "grad_norm": 0.0, - "learning_rate": 9.712015063970703e-07, - "loss": 0.8234, + "learning_rate": 9.87923404123956e-07, + "loss": 0.7487, "step": 30405 }, { - "epoch": 0.8628263337116913, + "epoch": 0.861628269432401, "grad_norm": 0.0, - "learning_rate": 9.70806439317663e-07, - "loss": 0.8661, + "learning_rate": 9.875256836546664e-07, + "loss": 0.7594, "step": 30406 }, { - "epoch": 0.8628547105561861, + "epoch": 0.8616566068746635, "grad_norm": 0.0, - "learning_rate": 9.704114485082571e-07, - "loss": 0.6619, + "learning_rate": 9.871280391007965e-07, + "loss": 0.8849, "step": 30407 }, { - "epoch": 0.8628830874006811, + "epoch": 0.861684944316926, "grad_norm": 0.0, - "learning_rate": 9.700165339721924e-07, - "loss": 0.7732, + "learning_rate": 9.867304704656932e-07, + "loss": 0.7116, "step": 30408 }, { - "epoch": 0.862911464245176, + "epoch": 0.8617132817591884, "grad_norm": 0.0, - "learning_rate": 9.696216957128058e-07, - "loss": 0.7662, + "learning_rate": 9.863329777527053e-07, + "loss": 0.9157, "step": 30409 }, { - "epoch": 0.8629398410896708, + "epoch": 0.8617416192014509, "grad_norm": 0.0, - "learning_rate": 9.692269337334282e-07, - "loss": 0.8881, + "learning_rate": 9.859355609651843e-07, + "loss": 0.8549, "step": 30410 }, { - "epoch": 0.8629682179341657, + "epoch": 0.8617699566437134, "grad_norm": 0.0, - "learning_rate": 9.688322480373967e-07, - "loss": 0.9023, + "learning_rate": 9.855382201064723e-07, + "loss": 0.8464, "step": 30411 }, { - "epoch": 0.8629965947786606, + "epoch": 0.8617982940859757, "grad_norm": 0.0, - "learning_rate": 9.684376386280481e-07, - "loss": 0.8221, + "learning_rate": 9.851409551799207e-07, + "loss": 0.7854, "step": 30412 }, { - "epoch": 0.8630249716231555, + "epoch": 0.8618266315282382, "grad_norm": 0.0, - "learning_rate": 9.680431055087103e-07, - "loss": 0.892, + "learning_rate": 9.847437661888738e-07, + "loss": 0.8498, "step": 30413 }, { - "epoch": 0.8630533484676504, + "epoch": 0.8618549689705007, "grad_norm": 0.0, - "learning_rate": 9.676486486827186e-07, - "loss": 0.8571, + "learning_rate": 9.843466531366774e-07, + "loss": 0.784, "step": 30414 }, { - "epoch": 0.8630817253121453, + "epoch": 0.8618833064127632, "grad_norm": 0.0, - "learning_rate": 9.672542681534035e-07, - "loss": 0.8638, + "learning_rate": 9.839496160266805e-07, + "loss": 0.7884, "step": 30415 }, { - "epoch": 0.8631101021566402, + "epoch": 0.8619116438550256, "grad_norm": 0.0, - "learning_rate": 9.668599639240993e-07, - "loss": 0.8292, + "learning_rate": 9.835526548622209e-07, + "loss": 0.9023, "step": 30416 }, { - "epoch": 0.863138479001135, + "epoch": 0.8619399812972881, "grad_norm": 0.0, - "learning_rate": 9.664657359981355e-07, - "loss": 0.8043, + "learning_rate": 9.831557696466454e-07, + "loss": 0.7565, "step": 30417 }, { - "epoch": 0.86316685584563, + "epoch": 0.8619683187395506, "grad_norm": 0.0, - "learning_rate": 9.660715843788416e-07, - "loss": 0.7466, + "learning_rate": 9.827589603832989e-07, + "loss": 0.7215, "step": 30418 }, { - "epoch": 0.8631952326901249, + "epoch": 0.861996656181813, "grad_norm": 0.0, - "learning_rate": 9.656775090695458e-07, - "loss": 0.6932, + "learning_rate": 9.823622270755206e-07, + "loss": 0.8525, "step": 30419 }, { - "epoch": 0.8632236095346197, + "epoch": 0.8620249936240755, "grad_norm": 0.0, - "learning_rate": 9.652835100735803e-07, - "loss": 0.7987, + "learning_rate": 9.81965569726656e-07, + "loss": 0.816, "step": 30420 }, { - "epoch": 0.8632519863791146, + "epoch": 0.862053331066338, "grad_norm": 0.0, - "learning_rate": 9.648895873942698e-07, - "loss": 0.868, + "learning_rate": 9.815689883400426e-07, + "loss": 0.8604, "step": 30421 }, { - "epoch": 0.8632803632236096, + "epoch": 0.8620816685086005, "grad_norm": 0.0, - "learning_rate": 9.64495741034942e-07, - "loss": 0.9142, + "learning_rate": 9.811724829190216e-07, + "loss": 0.7939, "step": 30422 }, { - "epoch": 0.8633087400681044, + "epoch": 0.8621100059508628, "grad_norm": 0.0, - "learning_rate": 9.641019709989275e-07, - "loss": 0.8613, + "learning_rate": 9.807760534669363e-07, + "loss": 0.8024, "step": 30423 }, { - "epoch": 0.8633371169125993, + "epoch": 0.8621383433931253, "grad_norm": 0.0, - "learning_rate": 9.637082772895479e-07, - "loss": 0.7293, + "learning_rate": 9.803796999871217e-07, + "loss": 0.8118, "step": 30424 }, { - "epoch": 0.8633654937570943, + "epoch": 0.8621666808353878, "grad_norm": 0.0, - "learning_rate": 9.633146599101306e-07, - "loss": 0.7207, + "learning_rate": 9.799834224829173e-07, + "loss": 0.8487, "step": 30425 }, { - "epoch": 0.8633938706015891, + "epoch": 0.8621950182776502, "grad_norm": 0.0, - "learning_rate": 9.629211188640031e-07, - "loss": 0.8029, + "learning_rate": 9.79587220957663e-07, + "loss": 0.7967, "step": 30426 }, { - "epoch": 0.863422247446084, + "epoch": 0.8622233557199127, "grad_norm": 0.0, - "learning_rate": 9.625276541544849e-07, - "loss": 0.8438, + "learning_rate": 9.791910954146943e-07, + "loss": 0.7587, "step": 30427 }, { - "epoch": 0.8634506242905788, + "epoch": 0.8622516931621752, "grad_norm": 0.0, - "learning_rate": 9.62134265784901e-07, - "loss": 0.8318, + "learning_rate": 9.787950458573515e-07, + "loss": 0.7705, "step": 30428 }, { - "epoch": 0.8634790011350738, + "epoch": 0.8622800306044377, "grad_norm": 0.0, - "learning_rate": 9.617409537585776e-07, - "loss": 0.7213, + "learning_rate": 9.783990722889658e-07, + "loss": 0.8698, "step": 30429 }, { - "epoch": 0.8635073779795687, + "epoch": 0.8623083680467001, "grad_norm": 0.0, - "learning_rate": 9.613477180788332e-07, - "loss": 0.826, + "learning_rate": 9.78003174712876e-07, + "loss": 0.7935, "step": 30430 }, { - "epoch": 0.8635357548240635, + "epoch": 0.8623367054889626, "grad_norm": 0.0, - "learning_rate": 9.609545587489911e-07, - "loss": 0.8558, + "learning_rate": 9.776073531324159e-07, + "loss": 0.7977, "step": 30431 }, { - "epoch": 0.8635641316685585, + "epoch": 0.8623650429312251, "grad_norm": 0.0, - "learning_rate": 9.605614757723713e-07, - "loss": 0.8011, + "learning_rate": 9.772116075509185e-07, + "loss": 0.8076, "step": 30432 }, { - "epoch": 0.8635925085130534, + "epoch": 0.8623933803734875, "grad_norm": 0.0, - "learning_rate": 9.601684691522961e-07, - "loss": 0.8936, + "learning_rate": 9.76815937971718e-07, + "loss": 0.8167, "step": 30433 }, { - "epoch": 0.8636208853575482, + "epoch": 0.8624217178157499, "grad_norm": 0.0, - "learning_rate": 9.597755388920848e-07, - "loss": 0.9445, + "learning_rate": 9.76420344398149e-07, + "loss": 0.7665, "step": 30434 }, { - "epoch": 0.8636492622020431, + "epoch": 0.8624500552580124, "grad_norm": 0.0, - "learning_rate": 9.593826849950549e-07, - "loss": 0.7967, + "learning_rate": 9.760248268335405e-07, + "loss": 0.7822, "step": 30435 }, { - "epoch": 0.863677639046538, + "epoch": 0.8624783927002748, "grad_norm": 0.0, - "learning_rate": 9.58989907464526e-07, - "loss": 0.8541, + "learning_rate": 9.756293852812283e-07, + "loss": 0.7388, "step": 30436 }, { - "epoch": 0.8637060158910329, + "epoch": 0.8625067301425373, "grad_norm": 0.0, - "learning_rate": 9.585972063038174e-07, - "loss": 0.854, + "learning_rate": 9.752340197445386e-07, + "loss": 0.7741, "step": 30437 }, { - "epoch": 0.8637343927355278, + "epoch": 0.8625350675847998, "grad_norm": 0.0, - "learning_rate": 9.582045815162422e-07, - "loss": 0.8285, + "learning_rate": 9.748387302268037e-07, + "loss": 0.7977, "step": 30438 }, { - "epoch": 0.8637627695800227, + "epoch": 0.8625634050270623, "grad_norm": 0.0, - "learning_rate": 9.578120331051198e-07, - "loss": 0.6961, + "learning_rate": 9.744435167313537e-07, + "loss": 0.9248, "step": 30439 }, { - "epoch": 0.8637911464245176, + "epoch": 0.8625917424693247, "grad_norm": 0.0, - "learning_rate": 9.57419561073768e-07, - "loss": 0.7484, + "learning_rate": 9.740483792615184e-07, + "loss": 0.7869, "step": 30440 }, { - "epoch": 0.8638195232690125, + "epoch": 0.8626200799115872, "grad_norm": 0.0, - "learning_rate": 9.570271654254982e-07, - "loss": 0.7416, + "learning_rate": 9.736533178206265e-07, + "loss": 0.8096, "step": 30441 }, { - "epoch": 0.8638479001135074, + "epoch": 0.8626484173538497, "grad_norm": 0.0, - "learning_rate": 9.566348461636254e-07, - "loss": 0.8277, + "learning_rate": 9.732583324120027e-07, + "loss": 0.8471, "step": 30442 }, { - "epoch": 0.8638762769580023, + "epoch": 0.862676754796112, "grad_norm": 0.0, - "learning_rate": 9.562426032914663e-07, - "loss": 0.83, + "learning_rate": 9.728634230389756e-07, + "loss": 0.8017, "step": 30443 }, { - "epoch": 0.8639046538024971, + "epoch": 0.8627050922383745, "grad_norm": 0.0, - "learning_rate": 9.55850436812331e-07, - "loss": 0.9396, + "learning_rate": 9.724685897048747e-07, + "loss": 0.7805, "step": 30444 }, { - "epoch": 0.863933030646992, + "epoch": 0.862733429680637, "grad_norm": 0.0, - "learning_rate": 9.554583467295341e-07, - "loss": 0.8705, + "learning_rate": 9.720738324130208e-07, + "loss": 0.7667, "step": 30445 }, { - "epoch": 0.863961407491487, + "epoch": 0.8627617671228995, "grad_norm": 0.0, - "learning_rate": 9.55066333046386e-07, - "loss": 0.7712, + "learning_rate": 9.716791511667412e-07, + "loss": 0.8061, "step": 30446 }, { - "epoch": 0.8639897843359818, + "epoch": 0.8627901045651619, "grad_norm": 0.0, - "learning_rate": 9.546743957661997e-07, - "loss": 0.7772, + "learning_rate": 9.712845459693632e-07, + "loss": 0.7146, "step": 30447 }, { - "epoch": 0.8640181611804767, + "epoch": 0.8628184420074244, "grad_norm": 0.0, - "learning_rate": 9.542825348922879e-07, - "loss": 0.791, + "learning_rate": 9.708900168242063e-07, + "loss": 0.69, "step": 30448 }, { - "epoch": 0.8640465380249717, + "epoch": 0.8628467794496869, "grad_norm": 0.0, - "learning_rate": 9.53890750427956e-07, - "loss": 0.714, + "learning_rate": 9.704955637345948e-07, + "loss": 0.8178, "step": 30449 }, { - "epoch": 0.8640749148694665, + "epoch": 0.8628751168919493, "grad_norm": 0.0, - "learning_rate": 9.534990423765155e-07, - "loss": 0.7899, + "learning_rate": 9.701011867038534e-07, + "loss": 0.8627, "step": 30450 }, { - "epoch": 0.8641032917139614, + "epoch": 0.8629034543342118, "grad_norm": 0.0, - "learning_rate": 9.531074107412763e-07, - "loss": 0.8516, + "learning_rate": 9.697068857353043e-07, + "loss": 0.7915, "step": 30451 }, { - "epoch": 0.8641316685584562, + "epoch": 0.8629317917764743, "grad_norm": 0.0, - "learning_rate": 9.527158555255445e-07, - "loss": 0.8369, + "learning_rate": 9.693126608322645e-07, + "loss": 0.87, "step": 30452 }, { - "epoch": 0.8641600454029512, + "epoch": 0.8629601292187368, "grad_norm": 0.0, - "learning_rate": 9.523243767326284e-07, - "loss": 0.7632, + "learning_rate": 9.689185119980592e-07, + "loss": 0.7387, "step": 30453 }, { - "epoch": 0.8641884222474461, + "epoch": 0.8629884666609992, "grad_norm": 0.0, - "learning_rate": 9.519329743658367e-07, - "loss": 0.6739, + "learning_rate": 9.68524439236006e-07, + "loss": 0.8791, "step": 30454 }, { - "epoch": 0.8642167990919409, + "epoch": 0.8630168041032616, "grad_norm": 0.0, - "learning_rate": 9.515416484284723e-07, - "loss": 0.766, + "learning_rate": 9.681304425494275e-07, + "loss": 0.836, "step": 30455 }, { - "epoch": 0.8642451759364359, + "epoch": 0.8630451415455241, "grad_norm": 0.0, - "learning_rate": 9.511503989238424e-07, - "loss": 0.877, + "learning_rate": 9.67736521941638e-07, + "loss": 0.7728, "step": 30456 }, { - "epoch": 0.8642735527809308, + "epoch": 0.8630734789877865, "grad_norm": 0.0, - "learning_rate": 9.507592258552534e-07, - "loss": 0.8316, + "learning_rate": 9.6734267741596e-07, + "loss": 0.7637, "step": 30457 }, { - "epoch": 0.8643019296254256, + "epoch": 0.863101816430049, "grad_norm": 0.0, - "learning_rate": 9.50368129226007e-07, - "loss": 0.7328, + "learning_rate": 9.669489089757068e-07, + "loss": 0.9259, "step": 30458 }, { - "epoch": 0.8643303064699206, + "epoch": 0.8631301538723115, "grad_norm": 0.0, - "learning_rate": 9.499771090394072e-07, - "loss": 0.8788, + "learning_rate": 9.665552166241965e-07, + "loss": 0.8235, "step": 30459 }, { - "epoch": 0.8643586833144155, + "epoch": 0.8631584913145739, "grad_norm": 0.0, - "learning_rate": 9.495861652987592e-07, - "loss": 0.8106, + "learning_rate": 9.66161600364749e-07, + "loss": 0.8092, "step": 30460 }, { - "epoch": 0.8643870601589103, + "epoch": 0.8631868287568364, "grad_norm": 0.0, - "learning_rate": 9.491952980073604e-07, - "loss": 0.8966, + "learning_rate": 9.657680602006747e-07, + "loss": 0.7545, "step": 30461 }, { - "epoch": 0.8644154370034052, + "epoch": 0.8632151661990989, "grad_norm": 0.0, - "learning_rate": 9.488045071685193e-07, - "loss": 0.8511, + "learning_rate": 9.653745961352911e-07, + "loss": 0.7632, "step": 30462 }, { - "epoch": 0.8644438138479001, + "epoch": 0.8632435036413614, "grad_norm": 0.0, - "learning_rate": 9.484137927855308e-07, - "loss": 0.8843, + "learning_rate": 9.649812081719124e-07, + "loss": 0.8079, "step": 30463 }, { - "epoch": 0.864472190692395, + "epoch": 0.8632718410836238, "grad_norm": 0.0, - "learning_rate": 9.480231548616991e-07, - "loss": 0.8813, + "learning_rate": 9.64587896313851e-07, + "loss": 0.8585, "step": 30464 }, { - "epoch": 0.8645005675368899, + "epoch": 0.8633001785258863, "grad_norm": 0.0, - "learning_rate": 9.476325934003239e-07, - "loss": 0.8315, + "learning_rate": 9.641946605644237e-07, + "loss": 0.7452, "step": 30465 }, { - "epoch": 0.8645289443813848, + "epoch": 0.8633285159681487, "grad_norm": 0.0, - "learning_rate": 9.472421084047012e-07, - "loss": 0.6886, + "learning_rate": 9.638015009269375e-07, + "loss": 0.7324, "step": 30466 }, { - "epoch": 0.8645573212258797, + "epoch": 0.8633568534104111, "grad_norm": 0.0, - "learning_rate": 9.468516998781319e-07, - "loss": 0.7729, + "learning_rate": 9.634084174047076e-07, + "loss": 0.7964, "step": 30467 }, { - "epoch": 0.8645856980703746, + "epoch": 0.8633851908526736, "grad_norm": 0.0, - "learning_rate": 9.464613678239154e-07, - "loss": 0.7477, + "learning_rate": 9.630154100010458e-07, + "loss": 0.7516, "step": 30468 }, { - "epoch": 0.8646140749148694, + "epoch": 0.8634135282949361, "grad_norm": 0.0, - "learning_rate": 9.460711122453437e-07, - "loss": 0.7499, + "learning_rate": 9.626224787192594e-07, + "loss": 0.8114, "step": 30469 }, { - "epoch": 0.8646424517593644, + "epoch": 0.8634418657371986, "grad_norm": 0.0, - "learning_rate": 9.456809331457173e-07, - "loss": 0.8135, + "learning_rate": 9.622296235626616e-07, + "loss": 0.8508, "step": 30470 }, { - "epoch": 0.8646708286038592, + "epoch": 0.863470203179461, "grad_norm": 0.0, - "learning_rate": 9.452908305283326e-07, - "loss": 0.8591, + "learning_rate": 9.61836844534557e-07, + "loss": 0.7847, "step": 30471 }, { - "epoch": 0.8646992054483541, + "epoch": 0.8634985406217235, "grad_norm": 0.0, - "learning_rate": 9.449008043964813e-07, - "loss": 0.9089, + "learning_rate": 9.614441416382581e-07, + "loss": 0.8264, "step": 30472 }, { - "epoch": 0.8647275822928491, + "epoch": 0.863526878063986, "grad_norm": 0.0, - "learning_rate": 9.445108547534598e-07, - "loss": 0.7931, + "learning_rate": 9.610515148770726e-07, + "loss": 0.8028, "step": 30473 }, { - "epoch": 0.8647559591373439, + "epoch": 0.8635552155062484, "grad_norm": 0.0, - "learning_rate": 9.441209816025643e-07, - "loss": 0.8459, + "learning_rate": 9.606589642543064e-07, + "loss": 0.7535, "step": 30474 }, { - "epoch": 0.8647843359818388, + "epoch": 0.8635835529485109, "grad_norm": 0.0, - "learning_rate": 9.437311849470843e-07, - "loss": 0.8949, + "learning_rate": 9.602664897732649e-07, + "loss": 0.7674, "step": 30475 }, { - "epoch": 0.8648127128263338, + "epoch": 0.8636118903907734, "grad_norm": 0.0, - "learning_rate": 9.433414647903138e-07, - "loss": 0.7822, + "learning_rate": 9.598740914372562e-07, + "loss": 0.7019, "step": 30476 }, { - "epoch": 0.8648410896708286, + "epoch": 0.8636402278330358, "grad_norm": 0.0, - "learning_rate": 9.429518211355448e-07, - "loss": 0.7358, + "learning_rate": 9.594817692495839e-07, + "loss": 0.7114, "step": 30477 }, { - "epoch": 0.8648694665153235, + "epoch": 0.8636685652752982, "grad_norm": 0.0, - "learning_rate": 9.425622539860702e-07, - "loss": 0.7809, + "learning_rate": 9.590895232135566e-07, + "loss": 0.8663, "step": 30478 }, { - "epoch": 0.8648978433598183, + "epoch": 0.8636969027175607, "grad_norm": 0.0, - "learning_rate": 9.421727633451783e-07, - "loss": 0.8381, + "learning_rate": 9.586973533324738e-07, + "loss": 0.8663, "step": 30479 }, { - "epoch": 0.8649262202043133, + "epoch": 0.8637252401598232, "grad_norm": 0.0, - "learning_rate": 9.417833492161622e-07, - "loss": 0.8705, + "learning_rate": 9.583052596096409e-07, + "loss": 0.7928, "step": 30480 }, { - "epoch": 0.8649545970488082, + "epoch": 0.8637535776020856, "grad_norm": 0.0, - "learning_rate": 9.413940116023068e-07, - "loss": 0.9361, + "learning_rate": 9.579132420483617e-07, + "loss": 0.7616, "step": 30481 }, { - "epoch": 0.864982973893303, + "epoch": 0.8637819150443481, "grad_norm": 0.0, - "learning_rate": 9.410047505069042e-07, - "loss": 0.7795, + "learning_rate": 9.575213006519347e-07, + "loss": 0.719, "step": 30482 }, { - "epoch": 0.865011350737798, + "epoch": 0.8638102524866106, "grad_norm": 0.0, - "learning_rate": 9.406155659332439e-07, - "loss": 0.7986, + "learning_rate": 9.571294354236637e-07, + "loss": 0.8385, "step": 30483 }, { - "epoch": 0.8650397275822929, + "epoch": 0.863838589928873, "grad_norm": 0.0, - "learning_rate": 9.402264578846088e-07, - "loss": 0.8054, + "learning_rate": 9.567376463668522e-07, + "loss": 0.7773, "step": 30484 }, { - "epoch": 0.8650681044267877, + "epoch": 0.8638669273711355, "grad_norm": 0.0, - "learning_rate": 9.398374263642907e-07, - "loss": 0.8289, + "learning_rate": 9.563459334847946e-07, + "loss": 0.7826, "step": 30485 }, { - "epoch": 0.8650964812712826, + "epoch": 0.863895264813398, "grad_norm": 0.0, - "learning_rate": 9.394484713755703e-07, - "loss": 0.855, + "learning_rate": 9.559542967807954e-07, + "loss": 0.8519, "step": 30486 }, { - "epoch": 0.8651248581157776, + "epoch": 0.8639236022556604, "grad_norm": 0.0, - "learning_rate": 9.390595929217361e-07, - "loss": 0.841, + "learning_rate": 9.55562736258151e-07, + "loss": 0.7896, "step": 30487 }, { - "epoch": 0.8651532349602724, + "epoch": 0.8639519396979228, "grad_norm": 0.0, - "learning_rate": 9.386707910060755e-07, - "loss": 0.8673, + "learning_rate": 9.551712519201594e-07, + "loss": 0.8864, "step": 30488 }, { - "epoch": 0.8651816118047673, + "epoch": 0.8639802771401853, "grad_norm": 0.0, - "learning_rate": 9.382820656318669e-07, - "loss": 0.7236, + "learning_rate": 9.547798437701194e-07, + "loss": 0.7431, "step": 30489 }, { - "epoch": 0.8652099886492622, + "epoch": 0.8640086145824478, "grad_norm": 0.0, - "learning_rate": 9.378934168023979e-07, - "loss": 0.8158, + "learning_rate": 9.543885118113272e-07, + "loss": 0.7349, "step": 30490 }, { - "epoch": 0.8652383654937571, + "epoch": 0.8640369520247102, "grad_norm": 0.0, - "learning_rate": 9.375048445209523e-07, - "loss": 0.805, + "learning_rate": 9.5399725604708e-07, + "loss": 0.7958, "step": 30491 }, { - "epoch": 0.865266742338252, + "epoch": 0.8640652894669727, "grad_norm": 0.0, - "learning_rate": 9.371163487908074e-07, - "loss": 0.7377, + "learning_rate": 9.536060764806742e-07, + "loss": 0.7771, "step": 30492 }, { - "epoch": 0.8652951191827469, + "epoch": 0.8640936269092352, "grad_norm": 0.0, - "learning_rate": 9.367279296152487e-07, - "loss": 0.7545, + "learning_rate": 9.532149731154028e-07, + "loss": 0.7665, "step": 30493 }, { - "epoch": 0.8653234960272418, + "epoch": 0.8641219643514977, "grad_norm": 0.0, - "learning_rate": 9.3633958699756e-07, - "loss": 0.7042, + "learning_rate": 9.528239459545618e-07, + "loss": 0.8644, "step": 30494 }, { - "epoch": 0.8653518728717366, + "epoch": 0.8641503017937601, "grad_norm": 0.0, - "learning_rate": 9.359513209410154e-07, - "loss": 0.8803, + "learning_rate": 9.524329950014433e-07, + "loss": 0.8532, "step": 30495 }, { - "epoch": 0.8653802497162315, + "epoch": 0.8641786392360226, "grad_norm": 0.0, - "learning_rate": 9.355631314488978e-07, - "loss": 0.7707, + "learning_rate": 9.520421202593411e-07, + "loss": 0.7433, "step": 30496 }, { - "epoch": 0.8654086265607265, + "epoch": 0.864206976678285, "grad_norm": 0.0, - "learning_rate": 9.351750185244879e-07, - "loss": 0.8282, + "learning_rate": 9.516513217315504e-07, + "loss": 0.9474, "step": 30497 }, { - "epoch": 0.8654370034052213, + "epoch": 0.8642353141205474, "grad_norm": 0.0, - "learning_rate": 9.347869821710598e-07, - "loss": 0.9199, + "learning_rate": 9.512605994213587e-07, + "loss": 0.7378, "step": 30498 }, { - "epoch": 0.8654653802497162, + "epoch": 0.8642636515628099, "grad_norm": 0.0, - "learning_rate": 9.343990223918931e-07, - "loss": 0.6731, + "learning_rate": 9.508699533320598e-07, + "loss": 0.8163, "step": 30499 }, { - "epoch": 0.8654937570942112, + "epoch": 0.8642919890050724, "grad_norm": 0.0, - "learning_rate": 9.340111391902684e-07, - "loss": 0.7892, + "learning_rate": 9.504793834669424e-07, + "loss": 0.8921, "step": 30500 }, { - "epoch": 0.865522133938706, + "epoch": 0.8643203264473349, "grad_norm": 0.0, - "learning_rate": 9.336233325694566e-07, - "loss": 0.7686, + "learning_rate": 9.500888898292981e-07, + "loss": 0.9147, "step": 30501 }, { - "epoch": 0.8655505107832009, + "epoch": 0.8643486638895973, "grad_norm": 0.0, - "learning_rate": 9.332356025327361e-07, - "loss": 0.7974, + "learning_rate": 9.496984724224156e-07, + "loss": 0.8594, "step": 30502 }, { - "epoch": 0.8655788876276957, + "epoch": 0.8643770013318598, "grad_norm": 0.0, - "learning_rate": 9.328479490833842e-07, - "loss": 0.8605, + "learning_rate": 9.493081312495834e-07, + "loss": 0.843, "step": 30503 }, { - "epoch": 0.8656072644721907, + "epoch": 0.8644053387741223, "grad_norm": 0.0, - "learning_rate": 9.324603722246717e-07, - "loss": 0.8283, + "learning_rate": 9.489178663140897e-07, + "loss": 0.8056, "step": 30504 }, { - "epoch": 0.8656356413166856, + "epoch": 0.8644336762163847, "grad_norm": 0.0, - "learning_rate": 9.320728719598737e-07, - "loss": 0.8754, + "learning_rate": 9.485276776192243e-07, + "loss": 0.7301, "step": 30505 }, { - "epoch": 0.8656640181611804, + "epoch": 0.8644620136586472, "grad_norm": 0.0, - "learning_rate": 9.316854482922655e-07, - "loss": 0.7385, + "learning_rate": 9.481375651682689e-07, + "loss": 0.736, "step": 30506 }, { - "epoch": 0.8656923950056754, + "epoch": 0.8644903511009097, "grad_norm": 0.0, - "learning_rate": 9.312981012251143e-07, - "loss": 0.8063, + "learning_rate": 9.477475289645133e-07, + "loss": 0.7825, "step": 30507 }, { - "epoch": 0.8657207718501703, + "epoch": 0.864518688543172, "grad_norm": 0.0, - "learning_rate": 9.309108307616998e-07, - "loss": 0.8012, + "learning_rate": 9.473575690112413e-07, + "loss": 0.7729, "step": 30508 }, { - "epoch": 0.8657491486946651, + "epoch": 0.8645470259854345, "grad_norm": 0.0, - "learning_rate": 9.305236369052872e-07, - "loss": 0.8272, + "learning_rate": 9.469676853117371e-07, + "loss": 0.7802, "step": 30509 }, { - "epoch": 0.8657775255391601, + "epoch": 0.864575363427697, "grad_norm": 0.0, - "learning_rate": 9.301365196591483e-07, - "loss": 0.7739, + "learning_rate": 9.46577877869288e-07, + "loss": 0.8346, "step": 30510 }, { - "epoch": 0.865805902383655, + "epoch": 0.8646037008699595, "grad_norm": 0.0, - "learning_rate": 9.297494790265571e-07, - "loss": 0.8419, + "learning_rate": 9.461881466871736e-07, + "loss": 0.8704, "step": 30511 }, { - "epoch": 0.8658342792281498, + "epoch": 0.8646320383122219, "grad_norm": 0.0, - "learning_rate": 9.293625150107766e-07, - "loss": 0.7895, + "learning_rate": 9.457984917686791e-07, + "loss": 0.7534, "step": 30512 }, { - "epoch": 0.8658626560726447, + "epoch": 0.8646603757544844, "grad_norm": 0.0, - "learning_rate": 9.289756276150797e-07, - "loss": 0.8951, + "learning_rate": 9.454089131170874e-07, + "loss": 0.806, "step": 30513 }, { - "epoch": 0.8658910329171396, + "epoch": 0.8646887131967469, "grad_norm": 0.0, - "learning_rate": 9.285888168427349e-07, - "loss": 0.8337, + "learning_rate": 9.450194107356758e-07, + "loss": 0.8412, "step": 30514 }, { - "epoch": 0.8659194097616345, + "epoch": 0.8647170506390093, "grad_norm": 0.0, - "learning_rate": 9.282020826970061e-07, - "loss": 0.7333, + "learning_rate": 9.446299846277296e-07, + "loss": 0.7653, "step": 30515 }, { - "epoch": 0.8659477866061294, + "epoch": 0.8647453880812718, "grad_norm": 0.0, - "learning_rate": 9.278154251811633e-07, - "loss": 0.8755, + "learning_rate": 9.442406347965271e-07, + "loss": 0.8008, "step": 30516 }, { - "epoch": 0.8659761634506243, + "epoch": 0.8647737255235343, "grad_norm": 0.0, - "learning_rate": 9.274288442984714e-07, - "loss": 0.6891, + "learning_rate": 9.438513612453493e-07, + "loss": 0.8427, "step": 30517 }, { - "epoch": 0.8660045402951192, + "epoch": 0.8648020629657968, "grad_norm": 0.0, - "learning_rate": 9.270423400521955e-07, - "loss": 0.7961, + "learning_rate": 9.434621639774755e-07, + "loss": 0.8038, "step": 30518 }, { - "epoch": 0.8660329171396141, + "epoch": 0.8648304004080591, "grad_norm": 0.0, - "learning_rate": 9.266559124455998e-07, - "loss": 0.7739, + "learning_rate": 9.43073042996181e-07, + "loss": 0.8797, "step": 30519 }, { - "epoch": 0.8660612939841089, + "epoch": 0.8648587378503216, "grad_norm": 0.0, - "learning_rate": 9.262695614819528e-07, - "loss": 0.8491, + "learning_rate": 9.426839983047454e-07, + "loss": 0.7849, "step": 30520 }, { - "epoch": 0.8660896708286039, + "epoch": 0.8648870752925841, "grad_norm": 0.0, - "learning_rate": 9.258832871645118e-07, - "loss": 0.794, + "learning_rate": 9.422950299064482e-07, + "loss": 0.778, "step": 30521 }, { - "epoch": 0.8661180476730987, + "epoch": 0.8649154127348465, "grad_norm": 0.0, - "learning_rate": 9.25497089496542e-07, - "loss": 0.761, + "learning_rate": 9.419061378045613e-07, + "loss": 0.7887, "step": 30522 }, { - "epoch": 0.8661464245175936, + "epoch": 0.864943750177109, "grad_norm": 0.0, - "learning_rate": 9.251109684813075e-07, - "loss": 0.8359, + "learning_rate": 9.415173220023643e-07, + "loss": 0.8551, "step": 30523 }, { - "epoch": 0.8661748013620886, + "epoch": 0.8649720876193715, "grad_norm": 0.0, - "learning_rate": 9.247249241220679e-07, - "loss": 0.7663, + "learning_rate": 9.411285825031291e-07, + "loss": 0.7521, "step": 30524 }, { - "epoch": 0.8662031782065834, + "epoch": 0.8650004250616339, "grad_norm": 0.0, - "learning_rate": 9.243389564220861e-07, - "loss": 0.8853, + "learning_rate": 9.407399193101319e-07, + "loss": 0.8413, "step": 30525 }, { - "epoch": 0.8662315550510783, + "epoch": 0.8650287625038964, "grad_norm": 0.0, - "learning_rate": 9.239530653846207e-07, - "loss": 0.7635, + "learning_rate": 9.403513324266489e-07, + "loss": 0.75, "step": 30526 }, { - "epoch": 0.8662599318955733, + "epoch": 0.8650570999461589, "grad_norm": 0.0, - "learning_rate": 9.235672510129312e-07, - "loss": 0.9229, + "learning_rate": 9.399628218559476e-07, + "loss": 0.9069, "step": 30527 }, { - "epoch": 0.8662883087400681, + "epoch": 0.8650854373884214, "grad_norm": 0.0, - "learning_rate": 9.231815133102784e-07, - "loss": 0.7747, + "learning_rate": 9.395743876013052e-07, + "loss": 0.7727, "step": 30528 }, { - "epoch": 0.866316685584563, + "epoch": 0.8651137748306837, "grad_norm": 0.0, - "learning_rate": 9.227958522799185e-07, - "loss": 0.7634, + "learning_rate": 9.391860296659916e-07, + "loss": 0.7445, "step": 30529 }, { - "epoch": 0.8663450624290578, + "epoch": 0.8651421122729462, "grad_norm": 0.0, - "learning_rate": 9.22410267925109e-07, - "loss": 0.8077, + "learning_rate": 9.387977480532784e-07, + "loss": 0.8797, "step": 30530 }, { - "epoch": 0.8663734392735528, + "epoch": 0.8651704497152087, "grad_norm": 0.0, - "learning_rate": 9.220247602491105e-07, - "loss": 0.8136, + "learning_rate": 9.384095427664386e-07, + "loss": 0.8147, "step": 30531 }, { - "epoch": 0.8664018161180477, + "epoch": 0.8651987871574711, "grad_norm": 0.0, - "learning_rate": 9.216393292551751e-07, - "loss": 0.7956, + "learning_rate": 9.380214138087385e-07, + "loss": 0.8486, "step": 30532 }, { - "epoch": 0.8664301929625425, + "epoch": 0.8652271245997336, "grad_norm": 0.0, - "learning_rate": 9.212539749465598e-07, - "loss": 0.8853, + "learning_rate": 9.376333611834487e-07, + "loss": 0.8412, "step": 30533 }, { - "epoch": 0.8664585698070375, + "epoch": 0.8652554620419961, "grad_norm": 0.0, - "learning_rate": 9.208686973265224e-07, - "loss": 0.7864, + "learning_rate": 9.372453848938401e-07, + "loss": 0.6922, "step": 30534 }, { - "epoch": 0.8664869466515324, + "epoch": 0.8652837994842586, "grad_norm": 0.0, - "learning_rate": 9.204834963983145e-07, - "loss": 0.7158, + "learning_rate": 9.368574849431778e-07, + "loss": 0.8937, "step": 30535 }, { - "epoch": 0.8665153234960272, + "epoch": 0.865312136926521, "grad_norm": 0.0, - "learning_rate": 9.20098372165189e-07, - "loss": 0.8724, + "learning_rate": 9.364696613347324e-07, + "loss": 0.9178, "step": 30536 }, { - "epoch": 0.8665437003405221, + "epoch": 0.8653404743687835, "grad_norm": 0.0, - "learning_rate": 9.197133246304035e-07, - "loss": 0.8212, + "learning_rate": 9.360819140717659e-07, + "loss": 0.7726, "step": 30537 }, { - "epoch": 0.866572077185017, + "epoch": 0.865368811811046, "grad_norm": 0.0, - "learning_rate": 9.193283537972042e-07, - "loss": 0.7806, + "learning_rate": 9.356942431575478e-07, + "loss": 0.8648, "step": 30538 }, { - "epoch": 0.8666004540295119, + "epoch": 0.8653971492533084, "grad_norm": 0.0, - "learning_rate": 9.189434596688496e-07, - "loss": 0.8094, + "learning_rate": 9.353066485953455e-07, + "loss": 0.8196, "step": 30539 }, { - "epoch": 0.8666288308740068, + "epoch": 0.8654254866955708, "grad_norm": 0.0, - "learning_rate": 9.185586422485859e-07, - "loss": 0.8149, + "learning_rate": 9.349191303884187e-07, + "loss": 0.7846, "step": 30540 }, { - "epoch": 0.8666572077185017, + "epoch": 0.8654538241378333, "grad_norm": 0.0, - "learning_rate": 9.181739015396662e-07, - "loss": 0.8323, + "learning_rate": 9.345316885400346e-07, + "loss": 0.765, "step": 30541 }, { - "epoch": 0.8666855845629966, + "epoch": 0.8654821615800958, "grad_norm": 0.0, - "learning_rate": 9.177892375453413e-07, - "loss": 0.7412, + "learning_rate": 9.341443230534564e-07, + "loss": 0.9407, "step": 30542 }, { - "epoch": 0.8667139614074915, + "epoch": 0.8655104990223582, "grad_norm": 0.0, - "learning_rate": 9.174046502688572e-07, - "loss": 0.7243, + "learning_rate": 9.337570339319468e-07, + "loss": 0.8322, "step": 30543 }, { - "epoch": 0.8667423382519864, + "epoch": 0.8655388364646207, "grad_norm": 0.0, - "learning_rate": 9.170201397134648e-07, - "loss": 0.7469, + "learning_rate": 9.3336982117877e-07, + "loss": 0.8063, "step": 30544 }, { - "epoch": 0.8667707150964813, + "epoch": 0.8655671739068832, "grad_norm": 0.0, - "learning_rate": 9.166357058824127e-07, - "loss": 0.7717, + "learning_rate": 9.329826847971857e-07, + "loss": 0.7929, "step": 30545 }, { - "epoch": 0.8667990919409762, + "epoch": 0.8655955113491456, "grad_norm": 0.0, - "learning_rate": 9.162513487789459e-07, - "loss": 0.8531, + "learning_rate": 9.325956247904532e-07, + "loss": 0.7756, "step": 30546 }, { - "epoch": 0.866827468785471, + "epoch": 0.8656238487914081, "grad_norm": 0.0, - "learning_rate": 9.15867068406312e-07, - "loss": 0.8283, + "learning_rate": 9.322086411618381e-07, + "loss": 0.7553, "step": 30547 }, { - "epoch": 0.866855845629966, + "epoch": 0.8656521862336706, "grad_norm": 0.0, - "learning_rate": 9.154828647677594e-07, - "loss": 0.6936, + "learning_rate": 9.318217339145941e-07, + "loss": 0.8593, "step": 30548 }, { - "epoch": 0.8668842224744608, + "epoch": 0.865680523675933, "grad_norm": 0.0, - "learning_rate": 9.150987378665288e-07, - "loss": 0.8039, + "learning_rate": 9.314349030519843e-07, + "loss": 0.7759, "step": 30549 }, { - "epoch": 0.8669125993189557, + "epoch": 0.8657088611181954, "grad_norm": 0.0, - "learning_rate": 9.147146877058688e-07, - "loss": 0.8462, + "learning_rate": 9.31048148577266e-07, + "loss": 0.8863, "step": 30550 }, { - "epoch": 0.8669409761634507, + "epoch": 0.8657371985604579, "grad_norm": 0.0, - "learning_rate": 9.143307142890224e-07, - "loss": 0.8779, + "learning_rate": 9.306614704936967e-07, + "loss": 0.7493, "step": 30551 }, { - "epoch": 0.8669693530079455, + "epoch": 0.8657655360027204, "grad_norm": 0.0, - "learning_rate": 9.139468176192323e-07, - "loss": 0.7803, + "learning_rate": 9.302748688045338e-07, + "loss": 0.7789, "step": 30552 }, { - "epoch": 0.8669977298524404, + "epoch": 0.8657938734449828, "grad_norm": 0.0, - "learning_rate": 9.135629976997418e-07, - "loss": 0.831, + "learning_rate": 9.298883435130335e-07, + "loss": 0.8285, "step": 30553 }, { - "epoch": 0.8670261066969353, + "epoch": 0.8658222108872453, "grad_norm": 0.0, - "learning_rate": 9.131792545337926e-07, - "loss": 0.7445, + "learning_rate": 9.295018946224499e-07, + "loss": 0.8696, "step": 30554 }, { - "epoch": 0.8670544835414302, + "epoch": 0.8658505483295078, "grad_norm": 0.0, - "learning_rate": 9.127955881246275e-07, - "loss": 0.8562, + "learning_rate": 9.291155221360415e-07, + "loss": 0.8132, "step": 30555 }, { - "epoch": 0.8670828603859251, + "epoch": 0.8658788857717702, "grad_norm": 0.0, - "learning_rate": 9.124119984754876e-07, - "loss": 0.7904, + "learning_rate": 9.287292260570613e-07, + "loss": 0.7889, "step": 30556 }, { - "epoch": 0.8671112372304199, + "epoch": 0.8659072232140327, "grad_norm": 0.0, - "learning_rate": 9.120284855896111e-07, - "loss": 0.7955, + "learning_rate": 9.283430063887644e-07, + "loss": 0.8345, "step": 30557 }, { - "epoch": 0.8671396140749149, + "epoch": 0.8659355606562952, "grad_norm": 0.0, - "learning_rate": 9.116450494702378e-07, + "learning_rate": 9.279568631344016e-07, "loss": 0.8182, "step": 30558 }, { - "epoch": 0.8671679909194098, + "epoch": 0.8659638980985577, "grad_norm": 0.0, - "learning_rate": 9.112616901206095e-07, - "loss": 0.7264, + "learning_rate": 9.275707962972281e-07, + "loss": 0.7798, "step": 30559 }, { - "epoch": 0.8671963677639046, + "epoch": 0.86599223554082, "grad_norm": 0.0, - "learning_rate": 9.108784075439603e-07, - "loss": 0.8136, + "learning_rate": 9.271848058804955e-07, + "loss": 0.8902, "step": 30560 }, { - "epoch": 0.8672247446083996, + "epoch": 0.8660205729830825, "grad_norm": 0.0, - "learning_rate": 9.104952017435287e-07, - "loss": 0.7147, + "learning_rate": 9.267988918874527e-07, + "loss": 0.7761, "step": 30561 }, { - "epoch": 0.8672531214528945, + "epoch": 0.866048910425345, "grad_norm": 0.0, - "learning_rate": 9.101120727225565e-07, - "loss": 0.7295, + "learning_rate": 9.264130543213512e-07, + "loss": 0.736, "step": 30562 }, { - "epoch": 0.8672814982973893, + "epoch": 0.8660772478676074, "grad_norm": 0.0, - "learning_rate": 9.097290204842735e-07, - "loss": 0.7986, + "learning_rate": 9.260272931854453e-07, + "loss": 0.7769, "step": 30563 }, { - "epoch": 0.8673098751418842, + "epoch": 0.8661055853098699, "grad_norm": 0.0, - "learning_rate": 9.093460450319181e-07, - "loss": 0.7866, + "learning_rate": 9.256416084829778e-07, + "loss": 0.7887, "step": 30564 }, { - "epoch": 0.8673382519863791, + "epoch": 0.8661339227521324, "grad_norm": 0.0, - "learning_rate": 9.089631463687265e-07, - "loss": 0.7992, + "learning_rate": 9.252560002172039e-07, + "loss": 0.6973, "step": 30565 }, { - "epoch": 0.867366628830874, + "epoch": 0.8661622601943949, "grad_norm": 0.0, - "learning_rate": 9.085803244979308e-07, - "loss": 0.8598, + "learning_rate": 9.248704683913656e-07, + "loss": 0.8291, "step": 30566 }, { - "epoch": 0.8673950056753689, + "epoch": 0.8661905976366573, "grad_norm": 0.0, - "learning_rate": 9.08197579422766e-07, - "loss": 0.8283, + "learning_rate": 9.244850130087135e-07, + "loss": 0.7484, "step": 30567 }, { - "epoch": 0.8674233825198638, + "epoch": 0.8662189350789198, "grad_norm": 0.0, - "learning_rate": 9.078149111464663e-07, - "loss": 0.8331, + "learning_rate": 9.24099634072495e-07, + "loss": 0.7242, "step": 30568 }, { - "epoch": 0.8674517593643587, + "epoch": 0.8662472725211823, "grad_norm": 0.0, - "learning_rate": 9.074323196722589e-07, - "loss": 0.8723, + "learning_rate": 9.237143315859553e-07, + "loss": 0.7497, "step": 30569 }, { - "epoch": 0.8674801362088536, + "epoch": 0.8662756099634447, "grad_norm": 0.0, - "learning_rate": 9.070498050033838e-07, - "loss": 0.8949, + "learning_rate": 9.233291055523396e-07, + "loss": 0.7747, "step": 30570 }, { - "epoch": 0.8675085130533484, + "epoch": 0.8663039474057072, "grad_norm": 0.0, - "learning_rate": 9.066673671430659e-07, - "loss": 0.8026, + "learning_rate": 9.229439559748954e-07, + "loss": 0.7346, "step": 30571 }, { - "epoch": 0.8675368898978434, + "epoch": 0.8663322848479696, "grad_norm": 0.0, - "learning_rate": 9.062850060945372e-07, - "loss": 0.8033, + "learning_rate": 9.225588828568633e-07, + "loss": 0.8973, "step": 30572 }, { - "epoch": 0.8675652667423382, + "epoch": 0.866360622290232, "grad_norm": 0.0, - "learning_rate": 9.059027218610294e-07, - "loss": 0.7951, + "learning_rate": 9.221738862014906e-07, + "loss": 0.7803, "step": 30573 }, { - "epoch": 0.8675936435868331, + "epoch": 0.8663889597324945, "grad_norm": 0.0, - "learning_rate": 9.05520514445769e-07, - "loss": 0.7899, + "learning_rate": 9.21788966012016e-07, + "loss": 0.782, "step": 30574 }, { - "epoch": 0.8676220204313281, + "epoch": 0.866417297174757, "grad_norm": 0.0, - "learning_rate": 9.051383838519845e-07, - "loss": 0.784, + "learning_rate": 9.214041222916836e-07, + "loss": 0.8479, "step": 30575 }, { - "epoch": 0.8676503972758229, + "epoch": 0.8664456346170195, "grad_norm": 0.0, - "learning_rate": 9.047563300829077e-07, + "learning_rate": 9.210193550437385e-07, "loss": 0.701, "step": 30576 }, { - "epoch": 0.8676787741203178, + "epoch": 0.8664739720592819, "grad_norm": 0.0, - "learning_rate": 9.043743531417615e-07, - "loss": 0.7761, + "learning_rate": 9.206346642714159e-07, + "loss": 0.7927, "step": 30577 }, { - "epoch": 0.8677071509648128, + "epoch": 0.8665023095015444, "grad_norm": 0.0, - "learning_rate": 9.039924530317734e-07, - "loss": 0.9463, + "learning_rate": 9.202500499779599e-07, + "loss": 0.7955, "step": 30578 }, { - "epoch": 0.8677355278093076, + "epoch": 0.8665306469438069, "grad_norm": 0.0, - "learning_rate": 9.03610629756172e-07, - "loss": 0.8404, + "learning_rate": 9.198655121666111e-07, + "loss": 0.7617, "step": 30579 }, { - "epoch": 0.8677639046538025, + "epoch": 0.8665589843860693, "grad_norm": 0.0, - "learning_rate": 9.032288833181779e-07, - "loss": 0.8568, + "learning_rate": 9.194810508406049e-07, + "loss": 0.7692, "step": 30580 }, { - "epoch": 0.8677922814982973, + "epoch": 0.8665873218283318, "grad_norm": 0.0, - "learning_rate": 9.028472137210187e-07, - "loss": 0.9182, + "learning_rate": 9.190966660031819e-07, + "loss": 0.8673, "step": 30581 }, { - "epoch": 0.8678206583427923, + "epoch": 0.8666156592705943, "grad_norm": 0.0, - "learning_rate": 9.024656209679206e-07, - "loss": 0.8349, + "learning_rate": 9.187123576575795e-07, + "loss": 0.8497, "step": 30582 }, { - "epoch": 0.8678490351872872, + "epoch": 0.8666439967128567, "grad_norm": 0.0, - "learning_rate": 9.020841050621021e-07, - "loss": 0.8071, + "learning_rate": 9.183281258070353e-07, + "loss": 0.8278, "step": 30583 }, { - "epoch": 0.867877412031782, + "epoch": 0.8666723341551191, "grad_norm": 0.0, - "learning_rate": 9.017026660067862e-07, - "loss": 0.7852, + "learning_rate": 9.179439704547876e-07, + "loss": 0.8581, "step": 30584 }, { - "epoch": 0.867905788876277, + "epoch": 0.8667006715973816, "grad_norm": 0.0, - "learning_rate": 9.013213038052005e-07, - "loss": 0.7441, + "learning_rate": 9.175598916040684e-07, + "loss": 0.7462, "step": 30585 }, { - "epoch": 0.8679341657207719, + "epoch": 0.8667290090396441, "grad_norm": 0.0, - "learning_rate": 9.00940018460561e-07, - "loss": 0.8705, + "learning_rate": 9.171758892581162e-07, + "loss": 0.8963, "step": 30586 }, { - "epoch": 0.8679625425652667, + "epoch": 0.8667573464819065, "grad_norm": 0.0, - "learning_rate": 9.005588099760909e-07, - "loss": 0.8408, + "learning_rate": 9.167919634201638e-07, + "loss": 0.8256, "step": 30587 }, { - "epoch": 0.8679909194097616, + "epoch": 0.866785683924169, "grad_norm": 0.0, - "learning_rate": 9.001776783550109e-07, - "loss": 0.7695, + "learning_rate": 9.164081140934444e-07, + "loss": 0.7796, "step": 30588 }, { - "epoch": 0.8680192962542566, + "epoch": 0.8668140213664315, "grad_norm": 0.0, - "learning_rate": 8.997966236005373e-07, - "loss": 0.7738, + "learning_rate": 9.160243412811953e-07, + "loss": 0.9037, "step": 30589 }, { - "epoch": 0.8680476730987514, + "epoch": 0.866842358808694, "grad_norm": 0.0, - "learning_rate": 8.994156457158898e-07, - "loss": 0.7074, + "learning_rate": 9.15640644986644e-07, + "loss": 0.8323, "step": 30590 }, { - "epoch": 0.8680760499432463, + "epoch": 0.8668706962509564, "grad_norm": 0.0, - "learning_rate": 8.990347447042902e-07, - "loss": 0.8135, + "learning_rate": 9.152570252130255e-07, + "loss": 0.7654, "step": 30591 }, { - "epoch": 0.8681044267877412, + "epoch": 0.8668990336932189, "grad_norm": 0.0, - "learning_rate": 8.986539205689515e-07, - "loss": 0.7968, + "learning_rate": 9.148734819635718e-07, + "loss": 0.7948, "step": 30592 }, { - "epoch": 0.8681328036322361, + "epoch": 0.8669273711354814, "grad_norm": 0.0, - "learning_rate": 8.982731733130934e-07, - "loss": 0.8172, + "learning_rate": 9.144900152415104e-07, + "loss": 0.7278, "step": 30593 }, { - "epoch": 0.868161180476731, + "epoch": 0.8669557085777437, "grad_norm": 0.0, - "learning_rate": 8.978925029399299e-07, - "loss": 0.9187, + "learning_rate": 9.141066250500741e-07, + "loss": 0.7251, "step": 30594 }, { - "epoch": 0.8681895573212258, + "epoch": 0.8669840460200062, "grad_norm": 0.0, - "learning_rate": 8.975119094526774e-07, - "loss": 0.7954, + "learning_rate": 9.137233113924915e-07, + "loss": 0.7877, "step": 30595 }, { - "epoch": 0.8682179341657208, + "epoch": 0.8670123834622687, "grad_norm": 0.0, - "learning_rate": 8.971313928545522e-07, - "loss": 0.7296, + "learning_rate": 9.133400742719922e-07, + "loss": 0.9439, "step": 30596 }, { - "epoch": 0.8682463110102157, + "epoch": 0.8670407209045311, "grad_norm": 0.0, - "learning_rate": 8.967509531487662e-07, - "loss": 0.727, + "learning_rate": 9.129569136918048e-07, + "loss": 0.6764, "step": 30597 }, { - "epoch": 0.8682746878547105, + "epoch": 0.8670690583467936, "grad_norm": 0.0, - "learning_rate": 8.963705903385344e-07, - "loss": 0.7906, + "learning_rate": 9.125738296551534e-07, + "loss": 0.7347, "step": 30598 }, { - "epoch": 0.8683030646992055, + "epoch": 0.8670973957890561, "grad_norm": 0.0, - "learning_rate": 8.959903044270702e-07, - "loss": 0.7828, + "learning_rate": 9.121908221652675e-07, + "loss": 0.8449, "step": 30599 }, { - "epoch": 0.8683314415437003, + "epoch": 0.8671257332313186, "grad_norm": 0.0, - "learning_rate": 8.956100954175828e-07, - "loss": 0.8137, + "learning_rate": 9.118078912253758e-07, + "loss": 0.8514, "step": 30600 }, { - "epoch": 0.8683598183881952, + "epoch": 0.867154070673581, "grad_norm": 0.0, - "learning_rate": 8.952299633132867e-07, - "loss": 0.7508, + "learning_rate": 9.114250368386979e-07, + "loss": 0.7701, "step": 30601 }, { - "epoch": 0.8683881952326902, + "epoch": 0.8671824081158435, "grad_norm": 0.0, - "learning_rate": 8.948499081173956e-07, - "loss": 0.8646, + "learning_rate": 9.110422590084644e-07, + "loss": 0.8511, "step": 30602 }, { - "epoch": 0.868416572077185, + "epoch": 0.867210745558106, "grad_norm": 0.0, - "learning_rate": 8.944699298331139e-07, - "loss": 0.8333, + "learning_rate": 9.106595577378951e-07, + "loss": 0.901, "step": 30603 }, { - "epoch": 0.8684449489216799, + "epoch": 0.8672390830003683, "grad_norm": 0.0, - "learning_rate": 8.940900284636533e-07, - "loss": 0.8633, + "learning_rate": 9.102769330302164e-07, + "loss": 0.8927, "step": 30604 }, { - "epoch": 0.8684733257661748, + "epoch": 0.8672674204426308, "grad_norm": 0.0, - "learning_rate": 8.93710204012227e-07, - "loss": 0.8521, + "learning_rate": 9.09894384888651e-07, + "loss": 0.7527, "step": 30605 }, { - "epoch": 0.8685017026106697, + "epoch": 0.8672957578848933, "grad_norm": 0.0, - "learning_rate": 8.933304564820366e-07, - "loss": 0.8933, + "learning_rate": 9.095119133164199e-07, + "loss": 0.7804, "step": 30606 }, { - "epoch": 0.8685300794551646, + "epoch": 0.8673240953271558, "grad_norm": 0.0, - "learning_rate": 8.929507858762943e-07, - "loss": 0.8212, + "learning_rate": 9.091295183167448e-07, + "loss": 0.8661, "step": 30607 }, { - "epoch": 0.8685584562996594, + "epoch": 0.8673524327694182, "grad_norm": 0.0, - "learning_rate": 8.925711921982083e-07, - "loss": 0.7762, + "learning_rate": 9.087471998928477e-07, + "loss": 0.8458, "step": 30608 }, { - "epoch": 0.8685868331441544, + "epoch": 0.8673807702116807, "grad_norm": 0.0, - "learning_rate": 8.921916754509796e-07, - "loss": 0.8083, + "learning_rate": 9.083649580479493e-07, + "loss": 0.8359, "step": 30609 }, { - "epoch": 0.8686152099886493, + "epoch": 0.8674091076539432, "grad_norm": 0.0, - "learning_rate": 8.918122356378178e-07, - "loss": 0.7457, + "learning_rate": 9.079827927852702e-07, + "loss": 0.7427, "step": 30610 }, { - "epoch": 0.8686435868331441, + "epoch": 0.8674374450962056, "grad_norm": 0.0, - "learning_rate": 8.914328727619304e-07, - "loss": 0.8048, + "learning_rate": 9.07600704108027e-07, + "loss": 0.7018, "step": 30611 }, { - "epoch": 0.868671963677639, + "epoch": 0.8674657825384681, "grad_norm": 0.0, - "learning_rate": 8.910535868265158e-07, - "loss": 0.7683, + "learning_rate": 9.072186920194392e-07, + "loss": 0.8557, "step": 30612 }, { - "epoch": 0.868700340522134, + "epoch": 0.8674941199807306, "grad_norm": 0.0, - "learning_rate": 8.906743778347815e-07, - "loss": 0.9604, + "learning_rate": 9.068367565227266e-07, + "loss": 0.8245, "step": 30613 }, { - "epoch": 0.8687287173666288, + "epoch": 0.867522457422993, "grad_norm": 0.0, - "learning_rate": 8.902952457899317e-07, - "loss": 0.8055, + "learning_rate": 9.06454897621103e-07, + "loss": 0.7882, "step": 30614 }, { - "epoch": 0.8687570942111237, + "epoch": 0.8675507948652554, "grad_norm": 0.0, - "learning_rate": 8.899161906951648e-07, - "loss": 0.7776, + "learning_rate": 9.060731153177882e-07, + "loss": 0.7969, "step": 30615 }, { - "epoch": 0.8687854710556187, + "epoch": 0.8675791323075179, "grad_norm": 0.0, - "learning_rate": 8.895372125536883e-07, - "loss": 0.795, + "learning_rate": 9.056914096159952e-07, + "loss": 0.7857, "step": 30616 }, { - "epoch": 0.8688138479001135, + "epoch": 0.8676074697497804, "grad_norm": 0.0, - "learning_rate": 8.891583113686986e-07, - "loss": 0.8225, + "learning_rate": 9.053097805189404e-07, + "loss": 0.8252, "step": 30617 }, { - "epoch": 0.8688422247446084, + "epoch": 0.8676358071920428, "grad_norm": 0.0, - "learning_rate": 8.887794871433985e-07, - "loss": 0.8109, + "learning_rate": 9.049282280298399e-07, + "loss": 0.834, "step": 30618 }, { - "epoch": 0.8688706015891033, + "epoch": 0.8676641446343053, "grad_norm": 0.0, - "learning_rate": 8.884007398809891e-07, - "loss": 0.8359, + "learning_rate": 9.045467521519047e-07, + "loss": 0.746, "step": 30619 }, { - "epoch": 0.8688989784335982, + "epoch": 0.8676924820765678, "grad_norm": 0.0, - "learning_rate": 8.880220695846664e-07, - "loss": 0.7957, + "learning_rate": 9.041653528883498e-07, + "loss": 0.738, "step": 30620 }, { - "epoch": 0.8689273552780931, + "epoch": 0.8677208195188302, "grad_norm": 0.0, - "learning_rate": 8.876434762576314e-07, - "loss": 0.7484, + "learning_rate": 9.037840302423883e-07, + "loss": 0.7405, "step": 30621 }, { - "epoch": 0.8689557321225879, + "epoch": 0.8677491569610927, "grad_norm": 0.0, - "learning_rate": 8.872649599030836e-07, - "loss": 0.8066, + "learning_rate": 9.034027842172311e-07, + "loss": 0.6961, "step": 30622 }, { - "epoch": 0.8689841089670829, + "epoch": 0.8677774944033552, "grad_norm": 0.0, - "learning_rate": 8.86886520524216e-07, - "loss": 0.799, + "learning_rate": 9.030216148160919e-07, + "loss": 0.7856, "step": 30623 }, { - "epoch": 0.8690124858115778, + "epoch": 0.8678058318456177, "grad_norm": 0.0, - "learning_rate": 8.865081581242274e-07, - "loss": 0.7325, + "learning_rate": 9.026405220421785e-07, + "loss": 0.7862, "step": 30624 }, { - "epoch": 0.8690408626560726, + "epoch": 0.86783416928788, "grad_norm": 0.0, - "learning_rate": 8.861298727063161e-07, - "loss": 0.8073, + "learning_rate": 9.022595058987016e-07, + "loss": 0.7867, "step": 30625 }, { - "epoch": 0.8690692395005676, + "epoch": 0.8678625067301425, "grad_norm": 0.0, - "learning_rate": 8.857516642736741e-07, - "loss": 0.7208, + "learning_rate": 9.01878566388873e-07, + "loss": 0.7673, "step": 30626 }, { - "epoch": 0.8690976163450624, + "epoch": 0.867890844172405, "grad_norm": 0.0, - "learning_rate": 8.853735328294966e-07, - "loss": 0.7649, + "learning_rate": 9.01497703515899e-07, + "loss": 0.7882, "step": 30627 }, { - "epoch": 0.8691259931895573, + "epoch": 0.8679191816146674, "grad_norm": 0.0, - "learning_rate": 8.84995478376981e-07, - "loss": 0.7407, + "learning_rate": 9.01116917282987e-07, + "loss": 0.7867, "step": 30628 }, { - "epoch": 0.8691543700340522, + "epoch": 0.8679475190569299, "grad_norm": 0.0, - "learning_rate": 8.84617500919317e-07, - "loss": 0.8795, + "learning_rate": 9.00736207693349e-07, + "loss": 0.8213, "step": 30629 }, { - "epoch": 0.8691827468785471, + "epoch": 0.8679758564991924, "grad_norm": 0.0, - "learning_rate": 8.842396004596976e-07, - "loss": 0.7502, + "learning_rate": 9.003555747501869e-07, + "loss": 0.8344, "step": 30630 }, { - "epoch": 0.869211123723042, + "epoch": 0.8680041939414549, "grad_norm": 0.0, - "learning_rate": 8.838617770013169e-07, - "loss": 0.8537, + "learning_rate": 8.999750184567102e-07, + "loss": 0.791, "step": 30631 }, { - "epoch": 0.8692395005675368, + "epoch": 0.8680325313837173, "grad_norm": 0.0, - "learning_rate": 8.834840305473658e-07, - "loss": 0.7181, + "learning_rate": 8.995945388161209e-07, + "loss": 0.7742, "step": 30632 }, { - "epoch": 0.8692678774120318, + "epoch": 0.8680608688259798, "grad_norm": 0.0, - "learning_rate": 8.83106361101036e-07, - "loss": 0.913, + "learning_rate": 8.992141358316264e-07, + "loss": 0.7696, "step": 30633 }, { - "epoch": 0.8692962542565267, + "epoch": 0.8680892062682423, "grad_norm": 0.0, - "learning_rate": 8.82728768665515e-07, - "loss": 0.7621, + "learning_rate": 8.988338095064308e-07, + "loss": 0.874, "step": 30634 }, { - "epoch": 0.8693246311010215, + "epoch": 0.8681175437105046, "grad_norm": 0.0, - "learning_rate": 8.823512532439938e-07, - "loss": 0.7415, + "learning_rate": 8.984535598437382e-07, + "loss": 0.7387, "step": 30635 }, { - "epoch": 0.8693530079455165, + "epoch": 0.8681458811527671, "grad_norm": 0.0, - "learning_rate": 8.819738148396639e-07, - "loss": 0.8593, + "learning_rate": 8.980733868467506e-07, + "loss": 0.9206, "step": 30636 }, { - "epoch": 0.8693813847900114, + "epoch": 0.8681742185950296, "grad_norm": 0.0, - "learning_rate": 8.815964534557087e-07, - "loss": 0.8283, + "learning_rate": 8.97693290518673e-07, + "loss": 0.85, "step": 30637 }, { - "epoch": 0.8694097616345062, + "epoch": 0.8682025560372921, "grad_norm": 0.0, - "learning_rate": 8.812191690953186e-07, - "loss": 0.8638, + "learning_rate": 8.97313270862703e-07, + "loss": 0.7953, "step": 30638 }, { - "epoch": 0.8694381384790011, + "epoch": 0.8682308934795545, "grad_norm": 0.0, - "learning_rate": 8.808419617616814e-07, - "loss": 0.8423, + "learning_rate": 8.969333278820447e-07, + "loss": 0.7759, "step": 30639 }, { - "epoch": 0.8694665153234961, + "epoch": 0.868259230921817, "grad_norm": 0.0, - "learning_rate": 8.8046483145798e-07, - "loss": 0.7374, + "learning_rate": 8.965534615798965e-07, + "loss": 0.8961, "step": 30640 }, { - "epoch": 0.8694948921679909, + "epoch": 0.8682875683640795, "grad_norm": 0.0, - "learning_rate": 8.800877781874028e-07, - "loss": 0.9056, + "learning_rate": 8.961736719594582e-07, + "loss": 0.7877, "step": 30641 }, { - "epoch": 0.8695232690124858, + "epoch": 0.8683159058063419, "grad_norm": 0.0, - "learning_rate": 8.797108019531353e-07, - "loss": 0.8541, + "learning_rate": 8.957939590239317e-07, + "loss": 0.7018, "step": 30642 }, { - "epoch": 0.8695516458569807, + "epoch": 0.8683442432486044, "grad_norm": 0.0, - "learning_rate": 8.793339027583591e-07, - "loss": 0.8344, + "learning_rate": 8.954143227765111e-07, + "loss": 0.8645, "step": 30643 }, { - "epoch": 0.8695800227014756, + "epoch": 0.8683725806908669, "grad_norm": 0.0, - "learning_rate": 8.789570806062597e-07, - "loss": 0.7782, + "learning_rate": 8.950347632203993e-07, + "loss": 0.7014, "step": 30644 }, { - "epoch": 0.8696083995459705, + "epoch": 0.8684009181331293, "grad_norm": 0.0, - "learning_rate": 8.785803355000222e-07, - "loss": 0.8486, + "learning_rate": 8.946552803587882e-07, + "loss": 0.6656, "step": 30645 }, { - "epoch": 0.8696367763904653, + "epoch": 0.8684292555753917, "grad_norm": 0.0, - "learning_rate": 8.782036674428218e-07, - "loss": 0.9416, + "learning_rate": 8.942758741948776e-07, + "loss": 0.8123, "step": 30646 }, { - "epoch": 0.8696651532349603, + "epoch": 0.8684575930176542, "grad_norm": 0.0, - "learning_rate": 8.778270764378494e-07, - "loss": 0.7594, + "learning_rate": 8.938965447318626e-07, + "loss": 0.8929, "step": 30647 }, { - "epoch": 0.8696935300794552, + "epoch": 0.8684859304599167, "grad_norm": 0.0, - "learning_rate": 8.774505624882801e-07, - "loss": 0.8453, + "learning_rate": 8.935172919729373e-07, + "loss": 0.7632, "step": 30648 }, { - "epoch": 0.86972190692395, + "epoch": 0.8685142679021791, "grad_norm": 0.0, - "learning_rate": 8.770741255972959e-07, - "loss": 0.8009, + "learning_rate": 8.931381159212982e-07, + "loss": 0.7562, "step": 30649 }, { - "epoch": 0.869750283768445, + "epoch": 0.8685426053444416, "grad_norm": 0.0, - "learning_rate": 8.766977657680776e-07, - "loss": 0.7973, + "learning_rate": 8.927590165801403e-07, + "loss": 0.8246, "step": 30650 }, { - "epoch": 0.8697786606129398, + "epoch": 0.8685709427867041, "grad_norm": 0.0, - "learning_rate": 8.763214830038025e-07, - "loss": 0.8714, + "learning_rate": 8.923799939526534e-07, + "loss": 0.8123, "step": 30651 }, { - "epoch": 0.8698070374574347, + "epoch": 0.8685992802289665, "grad_norm": 0.0, - "learning_rate": 8.759452773076493e-07, - "loss": 0.7444, + "learning_rate": 8.920010480420338e-07, + "loss": 0.7659, "step": 30652 }, { - "epoch": 0.8698354143019297, + "epoch": 0.868627617671229, "grad_norm": 0.0, - "learning_rate": 8.755691486827989e-07, - "loss": 0.7481, + "learning_rate": 8.916221788514701e-07, + "loss": 0.8107, "step": 30653 }, { - "epoch": 0.8698637911464245, + "epoch": 0.8686559551134915, "grad_norm": 0.0, - "learning_rate": 8.75193097132424e-07, - "loss": 0.7889, + "learning_rate": 8.912433863841541e-07, + "loss": 0.8521, "step": 30654 }, { - "epoch": 0.8698921679909194, + "epoch": 0.868684292555754, "grad_norm": 0.0, - "learning_rate": 8.748171226597035e-07, - "loss": 0.8398, + "learning_rate": 8.9086467064328e-07, + "loss": 0.8611, "step": 30655 }, { - "epoch": 0.8699205448354143, + "epoch": 0.8687126299980164, "grad_norm": 0.0, - "learning_rate": 8.744412252678148e-07, - "loss": 0.7911, + "learning_rate": 8.904860316320329e-07, + "loss": 0.8142, "step": 30656 }, { - "epoch": 0.8699489216799092, + "epoch": 0.8687409674402788, "grad_norm": 0.0, - "learning_rate": 8.740654049599295e-07, - "loss": 0.8118, + "learning_rate": 8.90107469353605e-07, + "loss": 0.772, "step": 30657 }, { - "epoch": 0.8699772985244041, + "epoch": 0.8687693048825413, "grad_norm": 0.0, - "learning_rate": 8.736896617392232e-07, - "loss": 0.6958, + "learning_rate": 8.897289838111866e-07, + "loss": 0.7271, "step": 30658 }, { - "epoch": 0.870005675368899, + "epoch": 0.8687976423248037, "grad_norm": 0.0, - "learning_rate": 8.733139956088732e-07, - "loss": 0.7618, + "learning_rate": 8.893505750079623e-07, + "loss": 0.7755, "step": 30659 }, { - "epoch": 0.8700340522133939, + "epoch": 0.8688259797670662, "grad_norm": 0.0, - "learning_rate": 8.729384065720481e-07, - "loss": 0.731, + "learning_rate": 8.889722429471215e-07, + "loss": 0.8128, "step": 30660 }, { - "epoch": 0.8700624290578888, + "epoch": 0.8688543172093287, "grad_norm": 0.0, - "learning_rate": 8.72562894631922e-07, - "loss": 0.7233, + "learning_rate": 8.885939876318505e-07, + "loss": 0.7312, "step": 30661 }, { - "epoch": 0.8700908059023836, + "epoch": 0.8688826546515912, "grad_norm": 0.0, - "learning_rate": 8.721874597916679e-07, - "loss": 0.7333, + "learning_rate": 8.882158090653359e-07, + "loss": 0.8467, "step": 30662 }, { - "epoch": 0.8701191827468785, + "epoch": 0.8689109920938536, "grad_norm": 0.0, - "learning_rate": 8.718121020544567e-07, - "loss": 0.8084, + "learning_rate": 8.878377072507649e-07, + "loss": 0.8122, "step": 30663 }, { - "epoch": 0.8701475595913735, + "epoch": 0.8689393295361161, "grad_norm": 0.0, - "learning_rate": 8.714368214234614e-07, - "loss": 0.9047, + "learning_rate": 8.874596821913184e-07, + "loss": 0.8054, "step": 30664 }, { - "epoch": 0.8701759364358683, + "epoch": 0.8689676669783786, "grad_norm": 0.0, - "learning_rate": 8.710616179018472e-07, - "loss": 0.8414, + "learning_rate": 8.870817338901849e-07, + "loss": 0.7732, "step": 30665 }, { - "epoch": 0.8702043132803632, + "epoch": 0.868996004420641, "grad_norm": 0.0, - "learning_rate": 8.70686491492786e-07, - "loss": 0.8196, + "learning_rate": 8.867038623505442e-07, + "loss": 0.7925, "step": 30666 }, { - "epoch": 0.8702326901248582, + "epoch": 0.8690243418629034, "grad_norm": 0.0, - "learning_rate": 8.703114421994474e-07, - "loss": 0.7209, + "learning_rate": 8.863260675755813e-07, + "loss": 0.8237, "step": 30667 }, { - "epoch": 0.870261066969353, + "epoch": 0.8690526793051659, "grad_norm": 0.0, - "learning_rate": 8.699364700249979e-07, - "loss": 0.7711, + "learning_rate": 8.859483495684795e-07, + "loss": 0.7913, "step": 30668 }, { - "epoch": 0.8702894438138479, + "epoch": 0.8690810167474283, "grad_norm": 0.0, - "learning_rate": 8.69561574972605e-07, - "loss": 0.7142, + "learning_rate": 8.855707083324183e-07, + "loss": 0.8767, "step": 30669 }, { - "epoch": 0.8703178206583428, + "epoch": 0.8691093541896908, "grad_norm": 0.0, - "learning_rate": 8.691867570454371e-07, - "loss": 0.8544, + "learning_rate": 8.851931438705786e-07, + "loss": 0.7781, "step": 30670 }, { - "epoch": 0.8703461975028377, + "epoch": 0.8691376916319533, "grad_norm": 0.0, - "learning_rate": 8.688120162466584e-07, - "loss": 0.8936, + "learning_rate": 8.848156561861421e-07, + "loss": 0.7649, "step": 30671 }, { - "epoch": 0.8703745743473326, + "epoch": 0.8691660290742158, "grad_norm": 0.0, - "learning_rate": 8.684373525794343e-07, - "loss": 0.9252, + "learning_rate": 8.844382452822897e-07, + "loss": 0.817, "step": 30672 }, { - "epoch": 0.8704029511918274, + "epoch": 0.8691943665164782, "grad_norm": 0.0, - "learning_rate": 8.680627660469321e-07, - "loss": 0.8322, + "learning_rate": 8.840609111621978e-07, + "loss": 0.7121, "step": 30673 }, { - "epoch": 0.8704313280363224, + "epoch": 0.8692227039587407, "grad_norm": 0.0, - "learning_rate": 8.676882566523137e-07, - "loss": 0.7887, + "learning_rate": 8.836836538290449e-07, + "loss": 0.8612, "step": 30674 }, { - "epoch": 0.8704597048808173, + "epoch": 0.8692510414010032, "grad_norm": 0.0, - "learning_rate": 8.673138243987411e-07, - "loss": 0.7966, + "learning_rate": 8.833064732860108e-07, + "loss": 0.8025, "step": 30675 }, { - "epoch": 0.8704880817253121, + "epoch": 0.8692793788432656, "grad_norm": 0.0, - "learning_rate": 8.669394692893807e-07, - "loss": 0.793, + "learning_rate": 8.829293695362728e-07, + "loss": 0.8345, "step": 30676 }, { - "epoch": 0.8705164585698071, + "epoch": 0.869307716285528, "grad_norm": 0.0, - "learning_rate": 8.665651913273931e-07, - "loss": 0.8202, + "learning_rate": 8.825523425830051e-07, + "loss": 0.7892, "step": 30677 }, { - "epoch": 0.8705448354143019, + "epoch": 0.8693360537277905, "grad_norm": 0.0, - "learning_rate": 8.661909905159415e-07, - "loss": 0.7995, + "learning_rate": 8.821753924293841e-07, + "loss": 0.8787, "step": 30678 }, { - "epoch": 0.8705732122587968, + "epoch": 0.869364391170053, "grad_norm": 0.0, - "learning_rate": 8.658168668581824e-07, - "loss": 0.779, + "learning_rate": 8.817985190785882e-07, + "loss": 0.8113, "step": 30679 }, { - "epoch": 0.8706015891032917, + "epoch": 0.8693927286123154, "grad_norm": 0.0, - "learning_rate": 8.654428203572796e-07, - "loss": 0.8124, + "learning_rate": 8.814217225337873e-07, + "loss": 0.781, "step": 30680 }, { - "epoch": 0.8706299659477866, + "epoch": 0.8694210660545779, "grad_norm": 0.0, - "learning_rate": 8.650688510163941e-07, - "loss": 0.8462, + "learning_rate": 8.810450027981587e-07, + "loss": 0.7651, "step": 30681 }, { - "epoch": 0.8706583427922815, + "epoch": 0.8694494034968404, "grad_norm": 0.0, - "learning_rate": 8.646949588386811e-07, - "loss": 0.8525, + "learning_rate": 8.806683598748722e-07, + "loss": 0.8056, "step": 30682 }, { - "epoch": 0.8706867196367764, + "epoch": 0.8694777409391028, "grad_norm": 0.0, - "learning_rate": 8.643211438272992e-07, - "loss": 0.7363, + "learning_rate": 8.802917937671029e-07, + "loss": 0.7812, "step": 30683 }, { - "epoch": 0.8707150964812713, + "epoch": 0.8695060783813653, "grad_norm": 0.0, - "learning_rate": 8.639474059854103e-07, - "loss": 0.8721, + "learning_rate": 8.799153044780229e-07, + "loss": 0.7797, "step": 30684 }, { - "epoch": 0.8707434733257662, + "epoch": 0.8695344158236278, "grad_norm": 0.0, - "learning_rate": 8.635737453161652e-07, - "loss": 0.7445, + "learning_rate": 8.795388920108016e-07, + "loss": 0.7682, "step": 30685 }, { - "epoch": 0.870771850170261, + "epoch": 0.8695627532658903, "grad_norm": 0.0, - "learning_rate": 8.632001618227248e-07, - "loss": 0.8094, + "learning_rate": 8.791625563686123e-07, + "loss": 0.7607, "step": 30686 }, { - "epoch": 0.870800227014756, + "epoch": 0.8695910907081527, "grad_norm": 0.0, - "learning_rate": 8.628266555082443e-07, - "loss": 0.865, + "learning_rate": 8.787862975546246e-07, + "loss": 0.8081, "step": 30687 }, { - "epoch": 0.8708286038592509, + "epoch": 0.8696194281504152, "grad_norm": 0.0, - "learning_rate": 8.624532263758767e-07, - "loss": 0.931, + "learning_rate": 8.78410115572006e-07, + "loss": 0.8722, "step": 30688 }, { - "epoch": 0.8708569807037457, + "epoch": 0.8696477655926776, "grad_norm": 0.0, - "learning_rate": 8.620798744287761e-07, - "loss": 0.873, + "learning_rate": 8.780340104239283e-07, + "loss": 0.8026, "step": 30689 }, { - "epoch": 0.8708853575482406, + "epoch": 0.86967610303494, "grad_norm": 0.0, - "learning_rate": 8.617065996701013e-07, - "loss": 0.8497, + "learning_rate": 8.776579821135544e-07, + "loss": 0.8055, "step": 30690 }, { - "epoch": 0.8709137343927356, + "epoch": 0.8697044404772025, "grad_norm": 0.0, - "learning_rate": 8.613334021029984e-07, - "loss": 0.7761, + "learning_rate": 8.772820306440555e-07, + "loss": 0.7451, "step": 30691 }, { - "epoch": 0.8709421112372304, + "epoch": 0.869732777919465, "grad_norm": 0.0, - "learning_rate": 8.609602817306217e-07, - "loss": 0.7439, + "learning_rate": 8.769061560185999e-07, + "loss": 0.83, "step": 30692 }, { - "epoch": 0.8709704880817253, + "epoch": 0.8697611153617274, "grad_norm": 0.0, - "learning_rate": 8.605872385561276e-07, - "loss": 0.7203, + "learning_rate": 8.765303582403495e-07, + "loss": 0.8168, "step": 30693 }, { - "epoch": 0.8709988649262203, + "epoch": 0.8697894528039899, "grad_norm": 0.0, - "learning_rate": 8.602142725826624e-07, - "loss": 0.8335, + "learning_rate": 8.76154637312473e-07, + "loss": 0.8264, "step": 30694 }, { - "epoch": 0.8710272417707151, + "epoch": 0.8698177902462524, "grad_norm": 0.0, - "learning_rate": 8.59841383813379e-07, - "loss": 0.7589, + "learning_rate": 8.757789932381322e-07, + "loss": 0.7092, "step": 30695 }, { - "epoch": 0.87105561861521, + "epoch": 0.8698461276885149, "grad_norm": 0.0, - "learning_rate": 8.594685722514273e-07, - "loss": 0.8018, + "learning_rate": 8.754034260204936e-07, + "loss": 0.7811, "step": 30696 }, { - "epoch": 0.8710839954597048, + "epoch": 0.8698744651307773, "grad_norm": 0.0, - "learning_rate": 8.590958378999537e-07, - "loss": 0.9158, + "learning_rate": 8.750279356627211e-07, + "loss": 0.7353, "step": 30697 }, { - "epoch": 0.8711123723041998, + "epoch": 0.8699028025730398, "grad_norm": 0.0, - "learning_rate": 8.587231807621099e-07, - "loss": 0.8862, + "learning_rate": 8.746525221679758e-07, + "loss": 0.8234, "step": 30698 }, { - "epoch": 0.8711407491486947, + "epoch": 0.8699311400153023, "grad_norm": 0.0, - "learning_rate": 8.583506008410403e-07, - "loss": 0.8286, + "learning_rate": 8.742771855394205e-07, + "loss": 0.7159, "step": 30699 }, { - "epoch": 0.8711691259931895, + "epoch": 0.8699594774575646, "grad_norm": 0.0, - "learning_rate": 8.579780981398955e-07, - "loss": 0.8871, + "learning_rate": 8.739019257802195e-07, + "loss": 0.8135, "step": 30700 }, { - "epoch": 0.8711975028376845, + "epoch": 0.8699878148998271, "grad_norm": 0.0, - "learning_rate": 8.576056726618209e-07, - "loss": 0.8668, + "learning_rate": 8.735267428935301e-07, + "loss": 0.7439, "step": 30701 }, { - "epoch": 0.8712258796821793, + "epoch": 0.8700161523420896, "grad_norm": 0.0, - "learning_rate": 8.572333244099617e-07, - "loss": 0.9214, + "learning_rate": 8.731516368825154e-07, + "loss": 0.7341, "step": 30702 }, { - "epoch": 0.8712542565266742, + "epoch": 0.8700444897843521, "grad_norm": 0.0, - "learning_rate": 8.568610533874622e-07, - "loss": 0.7167, + "learning_rate": 8.727766077503319e-07, + "loss": 0.8425, "step": 30703 }, { - "epoch": 0.8712826333711692, + "epoch": 0.8700728272266145, "grad_norm": 0.0, - "learning_rate": 8.564888595974718e-07, - "loss": 0.9073, + "learning_rate": 8.724016555001402e-07, + "loss": 0.8138, "step": 30704 }, { - "epoch": 0.871311010215664, + "epoch": 0.870101164668877, "grad_norm": 0.0, - "learning_rate": 8.561167430431283e-07, - "loss": 0.7731, + "learning_rate": 8.720267801351013e-07, + "loss": 0.8347, "step": 30705 }, { - "epoch": 0.8713393870601589, + "epoch": 0.8701295021111395, "grad_norm": 0.0, - "learning_rate": 8.557447037275779e-07, - "loss": 0.7499, + "learning_rate": 8.716519816583679e-07, + "loss": 0.7972, "step": 30706 }, { - "epoch": 0.8713677639046538, + "epoch": 0.8701578395534019, "grad_norm": 0.0, - "learning_rate": 8.553727416539626e-07, - "loss": 0.7696, + "learning_rate": 8.71277260073099e-07, + "loss": 0.9212, "step": 30707 }, { - "epoch": 0.8713961407491487, + "epoch": 0.8701861769956644, "grad_norm": 0.0, - "learning_rate": 8.550008568254253e-07, - "loss": 0.7885, + "learning_rate": 8.709026153824541e-07, + "loss": 0.7516, "step": 30708 }, { - "epoch": 0.8714245175936436, + "epoch": 0.8702145144379269, "grad_norm": 0.0, - "learning_rate": 8.54629049245107e-07, - "loss": 0.7779, + "learning_rate": 8.70528047589585e-07, + "loss": 0.7431, "step": 30709 }, { - "epoch": 0.8714528944381384, + "epoch": 0.8702428518801892, "grad_norm": 0.0, - "learning_rate": 8.542573189161496e-07, - "loss": 0.7882, + "learning_rate": 8.701535566976482e-07, + "loss": 0.7947, "step": 30710 }, { - "epoch": 0.8714812712826334, + "epoch": 0.8702711893224517, "grad_norm": 0.0, - "learning_rate": 8.538856658416905e-07, - "loss": 0.8536, + "learning_rate": 8.697791427097979e-07, + "loss": 0.9075, "step": 30711 }, { - "epoch": 0.8715096481271283, + "epoch": 0.8702995267647142, "grad_norm": 0.0, - "learning_rate": 8.535140900248696e-07, - "loss": 0.8237, + "learning_rate": 8.694048056291882e-07, + "loss": 0.8517, "step": 30712 }, { - "epoch": 0.8715380249716231, + "epoch": 0.8703278642069767, "grad_norm": 0.0, - "learning_rate": 8.531425914688285e-07, - "loss": 0.7429, + "learning_rate": 8.690305454589754e-07, + "loss": 0.7129, "step": 30713 }, { - "epoch": 0.871566401816118, + "epoch": 0.8703562016492391, "grad_norm": 0.0, - "learning_rate": 8.527711701767016e-07, - "loss": 0.8773, + "learning_rate": 8.686563622023059e-07, + "loss": 0.7027, "step": 30714 }, { - "epoch": 0.871594778660613, + "epoch": 0.8703845390915016, "grad_norm": 0.0, - "learning_rate": 8.523998261516276e-07, - "loss": 0.7702, + "learning_rate": 8.682822558623349e-07, + "loss": 0.9021, "step": 30715 }, { - "epoch": 0.8716231555051078, + "epoch": 0.8704128765337641, "grad_norm": 0.0, - "learning_rate": 8.520285593967448e-07, - "loss": 0.8515, + "learning_rate": 8.679082264422156e-07, + "loss": 0.7154, "step": 30716 }, { - "epoch": 0.8716515323496027, + "epoch": 0.8704412139760265, "grad_norm": 0.0, - "learning_rate": 8.516573699151875e-07, - "loss": 0.7328, + "learning_rate": 8.675342739450942e-07, + "loss": 0.9176, "step": 30717 }, { - "epoch": 0.8716799091940977, + "epoch": 0.870469551418289, "grad_norm": 0.0, - "learning_rate": 8.5128625771009e-07, - "loss": 0.7541, + "learning_rate": 8.67160398374125e-07, + "loss": 0.7948, "step": 30718 }, { - "epoch": 0.8717082860385925, + "epoch": 0.8704978888605515, "grad_norm": 0.0, - "learning_rate": 8.509152227845918e-07, - "loss": 0.7864, + "learning_rate": 8.667865997324532e-07, + "loss": 0.8869, "step": 30719 }, { - "epoch": 0.8717366628830874, + "epoch": 0.870526226302814, "grad_norm": 0.0, - "learning_rate": 8.505442651418217e-07, - "loss": 0.8027, + "learning_rate": 8.664128780232295e-07, + "loss": 0.8552, "step": 30720 }, { - "epoch": 0.8717650397275823, + "epoch": 0.8705545637450763, "grad_norm": 0.0, - "learning_rate": 8.501733847849158e-07, - "loss": 0.8846, + "learning_rate": 8.660392332496037e-07, + "loss": 0.8096, "step": 30721 }, { - "epoch": 0.8717934165720772, + "epoch": 0.8705829011873388, "grad_norm": 0.0, - "learning_rate": 8.498025817170063e-07, - "loss": 0.8473, + "learning_rate": 8.656656654147199e-07, + "loss": 0.8795, "step": 30722 }, { - "epoch": 0.8718217934165721, + "epoch": 0.8706112386296013, "grad_norm": 0.0, - "learning_rate": 8.494318559412251e-07, - "loss": 0.7593, + "learning_rate": 8.652921745217258e-07, + "loss": 0.6876, "step": 30723 }, { - "epoch": 0.8718501702610669, + "epoch": 0.8706395760718637, "grad_norm": 0.0, - "learning_rate": 8.490612074607074e-07, - "loss": 0.8594, + "learning_rate": 8.649187605737675e-07, + "loss": 0.8043, "step": 30724 }, { - "epoch": 0.8718785471055619, + "epoch": 0.8706679135141262, "grad_norm": 0.0, - "learning_rate": 8.486906362785785e-07, - "loss": 0.8705, + "learning_rate": 8.645454235739903e-07, + "loss": 0.8463, "step": 30725 }, { - "epoch": 0.8719069239500568, + "epoch": 0.8706962509563887, "grad_norm": 0.0, - "learning_rate": 8.483201423979714e-07, - "loss": 0.78, + "learning_rate": 8.641721635255418e-07, + "loss": 0.755, "step": 30726 }, { - "epoch": 0.8719353007945516, + "epoch": 0.8707245883986512, "grad_norm": 0.0, - "learning_rate": 8.47949725822017e-07, - "loss": 0.8234, + "learning_rate": 8.637989804315616e-07, + "loss": 0.8652, "step": 30727 }, { - "epoch": 0.8719636776390466, + "epoch": 0.8707529258409136, "grad_norm": 0.0, - "learning_rate": 8.475793865538417e-07, - "loss": 0.9073, + "learning_rate": 8.634258742951951e-07, + "loss": 0.7472, "step": 30728 }, { - "epoch": 0.8719920544835414, + "epoch": 0.8707812632831761, "grad_norm": 0.0, - "learning_rate": 8.472091245965741e-07, - "loss": 0.7867, + "learning_rate": 8.630528451195874e-07, + "loss": 0.8281, "step": 30729 }, { - "epoch": 0.8720204313280363, + "epoch": 0.8708096007254386, "grad_norm": 0.0, - "learning_rate": 8.468389399533438e-07, - "loss": 0.8636, + "learning_rate": 8.626798929078773e-07, + "loss": 0.7662, "step": 30730 }, { - "epoch": 0.8720488081725312, + "epoch": 0.8708379381677009, "grad_norm": 0.0, - "learning_rate": 8.464688326272752e-07, - "loss": 0.791, + "learning_rate": 8.623070176632087e-07, + "loss": 0.7473, "step": 30731 }, { - "epoch": 0.8720771850170261, + "epoch": 0.8708662756099634, "grad_norm": 0.0, - "learning_rate": 8.460988026214955e-07, - "loss": 0.7315, + "learning_rate": 8.619342193887192e-07, + "loss": 0.7343, "step": 30732 }, { - "epoch": 0.872105561861521, + "epoch": 0.8708946130522259, "grad_norm": 0.0, - "learning_rate": 8.457288499391336e-07, - "loss": 0.8038, + "learning_rate": 8.615614980875508e-07, + "loss": 0.6898, "step": 30733 }, { - "epoch": 0.8721339387060159, + "epoch": 0.8709229504944883, "grad_norm": 0.0, - "learning_rate": 8.453589745833091e-07, - "loss": 0.8605, + "learning_rate": 8.611888537628466e-07, + "loss": 0.7976, "step": 30734 }, { - "epoch": 0.8721623155505108, + "epoch": 0.8709512879367508, "grad_norm": 0.0, - "learning_rate": 8.449891765571483e-07, - "loss": 0.7727, + "learning_rate": 8.608162864177394e-07, + "loss": 0.8695, "step": 30735 }, { - "epoch": 0.8721906923950057, + "epoch": 0.8709796253790133, "grad_norm": 0.0, - "learning_rate": 8.446194558637777e-07, - "loss": 0.749, + "learning_rate": 8.604437960553702e-07, + "loss": 0.8479, "step": 30736 }, { - "epoch": 0.8722190692395005, + "epoch": 0.8710079628212758, "grad_norm": 0.0, - "learning_rate": 8.44249812506317e-07, - "loss": 0.6828, + "learning_rate": 8.600713826788776e-07, + "loss": 0.7282, "step": 30737 }, { - "epoch": 0.8722474460839955, + "epoch": 0.8710363002635382, "grad_norm": 0.0, - "learning_rate": 8.438802464878881e-07, - "loss": 0.7164, + "learning_rate": 8.596990462913967e-07, + "loss": 0.8357, "step": 30738 }, { - "epoch": 0.8722758229284904, + "epoch": 0.8710646377058007, "grad_norm": 0.0, - "learning_rate": 8.435107578116164e-07, - "loss": 0.8026, + "learning_rate": 8.593267868960675e-07, + "loss": 0.8526, "step": 30739 }, { - "epoch": 0.8723041997729852, + "epoch": 0.8710929751480632, "grad_norm": 0.0, - "learning_rate": 8.431413464806193e-07, - "loss": 0.8325, + "learning_rate": 8.589546044960218e-07, + "loss": 0.7132, "step": 30740 }, { - "epoch": 0.8723325766174801, + "epoch": 0.8711213125903255, "grad_norm": 0.0, - "learning_rate": 8.42772012498021e-07, - "loss": 0.8371, + "learning_rate": 8.585824990943947e-07, + "loss": 0.8274, "step": 30741 }, { - "epoch": 0.8723609534619751, + "epoch": 0.871149650032588, "grad_norm": 0.0, - "learning_rate": 8.424027558669379e-07, - "loss": 0.7168, + "learning_rate": 8.58210470694324e-07, + "loss": 0.8627, "step": 30742 }, { - "epoch": 0.8723893303064699, + "epoch": 0.8711779874748505, "grad_norm": 0.0, - "learning_rate": 8.420335765904908e-07, - "loss": 0.8206, + "learning_rate": 8.578385192989402e-07, + "loss": 0.9072, "step": 30743 }, { - "epoch": 0.8724177071509648, + "epoch": 0.871206324917113, "grad_norm": 0.0, - "learning_rate": 8.416644746717984e-07, - "loss": 0.838, + "learning_rate": 8.574666449113766e-07, + "loss": 0.8055, "step": 30744 }, { - "epoch": 0.8724460839954598, + "epoch": 0.8712346623593754, "grad_norm": 0.0, - "learning_rate": 8.41295450113977e-07, - "loss": 0.8563, + "learning_rate": 8.570948475347685e-07, + "loss": 0.7383, "step": 30745 }, { - "epoch": 0.8724744608399546, + "epoch": 0.8712629998016379, "grad_norm": 0.0, - "learning_rate": 8.409265029201441e-07, - "loss": 0.7724, + "learning_rate": 8.567231271722443e-07, + "loss": 0.8288, "step": 30746 }, { - "epoch": 0.8725028376844495, + "epoch": 0.8712913372439004, "grad_norm": 0.0, - "learning_rate": 8.405576330934184e-07, - "loss": 0.7355, + "learning_rate": 8.563514838269371e-07, + "loss": 0.7384, "step": 30747 }, { - "epoch": 0.8725312145289443, + "epoch": 0.8713196746861628, "grad_norm": 0.0, - "learning_rate": 8.40188840636913e-07, - "loss": 0.809, + "learning_rate": 8.559799175019756e-07, + "loss": 0.8648, "step": 30748 }, { - "epoch": 0.8725595913734393, + "epoch": 0.8713480121284253, "grad_norm": 0.0, - "learning_rate": 8.398201255537431e-07, - "loss": 0.802, + "learning_rate": 8.556084282004906e-07, + "loss": 0.8267, "step": 30749 }, { - "epoch": 0.8725879682179342, + "epoch": 0.8713763495706878, "grad_norm": 0.0, - "learning_rate": 8.394514878470272e-07, - "loss": 0.8582, + "learning_rate": 8.552370159256118e-07, + "loss": 0.823, "step": 30750 }, { - "epoch": 0.872616345062429, + "epoch": 0.8714046870129503, "grad_norm": 0.0, - "learning_rate": 8.390829275198741e-07, - "loss": 0.7768, + "learning_rate": 8.548656806804678e-07, + "loss": 0.7899, "step": 30751 }, { - "epoch": 0.872644721906924, + "epoch": 0.8714330244552126, "grad_norm": 0.0, - "learning_rate": 8.387144445753992e-07, - "loss": 0.6954, + "learning_rate": 8.544944224681872e-07, + "loss": 0.7912, "step": 30752 }, { - "epoch": 0.8726730987514189, + "epoch": 0.8714613618974751, "grad_norm": 0.0, - "learning_rate": 8.383460390167164e-07, - "loss": 0.8907, + "learning_rate": 8.541232412918943e-07, + "loss": 0.854, "step": 30753 }, { - "epoch": 0.8727014755959137, + "epoch": 0.8714896993397376, "grad_norm": 0.0, - "learning_rate": 8.379777108469355e-07, - "loss": 0.7773, + "learning_rate": 8.537521371547186e-07, + "loss": 0.8179, "step": 30754 }, { - "epoch": 0.8727298524404086, + "epoch": 0.871518036782, "grad_norm": 0.0, - "learning_rate": 8.376094600691698e-07, - "loss": 0.8436, + "learning_rate": 8.533811100597855e-07, + "loss": 0.8052, "step": 30755 }, { - "epoch": 0.8727582292849035, + "epoch": 0.8715463742242625, "grad_norm": 0.0, - "learning_rate": 8.372412866865287e-07, - "loss": 0.8209, + "learning_rate": 8.530101600102192e-07, + "loss": 0.8941, "step": 30756 }, { - "epoch": 0.8727866061293984, + "epoch": 0.871574711666525, "grad_norm": 0.0, - "learning_rate": 8.36873190702121e-07, - "loss": 0.8971, + "learning_rate": 8.526392870091449e-07, + "loss": 0.8204, "step": 30757 }, { - "epoch": 0.8728149829738933, + "epoch": 0.8716030491087874, "grad_norm": 0.0, - "learning_rate": 8.365051721190598e-07, - "loss": 0.8007, + "learning_rate": 8.522684910596879e-07, + "loss": 0.8077, "step": 30758 }, { - "epoch": 0.8728433598183882, + "epoch": 0.8716313865510499, "grad_norm": 0.0, - "learning_rate": 8.361372309404492e-07, - "loss": 0.7925, + "learning_rate": 8.518977721649679e-07, + "loss": 0.6565, "step": 30759 }, { - "epoch": 0.8728717366628831, + "epoch": 0.8716597239933124, "grad_norm": 0.0, - "learning_rate": 8.357693671694001e-07, - "loss": 0.7896, + "learning_rate": 8.515271303281125e-07, + "loss": 0.7511, "step": 30760 }, { - "epoch": 0.872900113507378, + "epoch": 0.8716880614355749, "grad_norm": 0.0, - "learning_rate": 8.354015808090199e-07, - "loss": 0.8234, + "learning_rate": 8.511565655522403e-07, + "loss": 0.8089, "step": 30761 }, { - "epoch": 0.8729284903518729, + "epoch": 0.8717163988778373, "grad_norm": 0.0, - "learning_rate": 8.350338718624129e-07, - "loss": 0.8623, + "learning_rate": 8.507860778404731e-07, + "loss": 0.7752, "step": 30762 }, { - "epoch": 0.8729568671963678, + "epoch": 0.8717447363200997, "grad_norm": 0.0, - "learning_rate": 8.346662403326877e-07, - "loss": 0.799, + "learning_rate": 8.504156671959319e-07, + "loss": 0.7115, "step": 30763 }, { - "epoch": 0.8729852440408626, + "epoch": 0.8717730737623622, "grad_norm": 0.0, - "learning_rate": 8.342986862229497e-07, - "loss": 0.7692, + "learning_rate": 8.500453336217374e-07, + "loss": 0.7609, "step": 30764 }, { - "epoch": 0.8730136208853575, + "epoch": 0.8718014112046246, "grad_norm": 0.0, - "learning_rate": 8.339312095363017e-07, - "loss": 0.8273, + "learning_rate": 8.496750771210083e-07, + "loss": 0.857, "step": 30765 }, { - "epoch": 0.8730419977298525, + "epoch": 0.8718297486468871, "grad_norm": 0.0, - "learning_rate": 8.335638102758481e-07, - "loss": 0.7997, + "learning_rate": 8.493048976968665e-07, + "loss": 0.7812, "step": 30766 }, { - "epoch": 0.8730703745743473, + "epoch": 0.8718580860891496, "grad_norm": 0.0, - "learning_rate": 8.331964884446953e-07, - "loss": 0.867, + "learning_rate": 8.489347953524252e-07, + "loss": 0.7824, "step": 30767 }, { - "epoch": 0.8730987514188422, + "epoch": 0.8718864235314121, "grad_norm": 0.0, - "learning_rate": 8.328292440459396e-07, - "loss": 0.819, + "learning_rate": 8.485647700908062e-07, + "loss": 0.7018, "step": 30768 }, { - "epoch": 0.8731271282633372, + "epoch": 0.8719147609736745, "grad_norm": 0.0, - "learning_rate": 8.324620770826919e-07, - "loss": 0.8201, + "learning_rate": 8.481948219151226e-07, + "loss": 0.7322, "step": 30769 }, { - "epoch": 0.873155505107832, + "epoch": 0.871943098415937, "grad_norm": 0.0, - "learning_rate": 8.320949875580464e-07, - "loss": 0.8178, + "learning_rate": 8.47824950828493e-07, + "loss": 0.7717, "step": 30770 }, { - "epoch": 0.8731838819523269, + "epoch": 0.8719714358581995, "grad_norm": 0.0, - "learning_rate": 8.317279754751073e-07, - "loss": 0.8373, + "learning_rate": 8.474551568340338e-07, + "loss": 0.7534, "step": 30771 }, { - "epoch": 0.8732122587968217, + "epoch": 0.8719997733004619, "grad_norm": 0.0, - "learning_rate": 8.313610408369754e-07, - "loss": 0.7811, + "learning_rate": 8.470854399348572e-07, + "loss": 0.754, "step": 30772 }, { - "epoch": 0.8732406356413167, + "epoch": 0.8720281107427243, "grad_norm": 0.0, - "learning_rate": 8.309941836467472e-07, - "loss": 0.7711, + "learning_rate": 8.46715800134078e-07, + "loss": 0.747, "step": 30773 }, { - "epoch": 0.8732690124858116, + "epoch": 0.8720564481849868, "grad_norm": 0.0, - "learning_rate": 8.306274039075246e-07, - "loss": 0.8773, + "learning_rate": 8.46346237434813e-07, + "loss": 0.7962, "step": 30774 }, { - "epoch": 0.8732973893303064, + "epoch": 0.8720847856272493, "grad_norm": 0.0, - "learning_rate": 8.302607016224052e-07, - "loss": 0.7532, + "learning_rate": 8.459767518401707e-07, + "loss": 0.7905, "step": 30775 }, { - "epoch": 0.8733257661748014, + "epoch": 0.8721131230695117, "grad_norm": 0.0, - "learning_rate": 8.298940767944841e-07, - "loss": 0.9122, + "learning_rate": 8.456073433532652e-07, + "loss": 0.7587, "step": 30776 }, { - "epoch": 0.8733541430192963, + "epoch": 0.8721414605117742, "grad_norm": 0.0, - "learning_rate": 8.295275294268601e-07, - "loss": 0.7998, + "learning_rate": 8.452380119772086e-07, + "loss": 0.8089, "step": 30777 }, { - "epoch": 0.8733825198637911, + "epoch": 0.8721697979540367, "grad_norm": 0.0, - "learning_rate": 8.291610595226307e-07, - "loss": 0.8118, + "learning_rate": 8.448687577151127e-07, + "loss": 0.8303, "step": 30778 }, { - "epoch": 0.8734108967082861, + "epoch": 0.8721981353962991, "grad_norm": 0.0, - "learning_rate": 8.287946670848889e-07, - "loss": 0.7311, + "learning_rate": 8.444995805700873e-07, + "loss": 0.7797, "step": 30779 }, { - "epoch": 0.873439273552781, + "epoch": 0.8722264728385616, "grad_norm": 0.0, - "learning_rate": 8.284283521167303e-07, - "loss": 0.8638, + "learning_rate": 8.441304805452411e-07, + "loss": 0.7225, "step": 30780 }, { - "epoch": 0.8734676503972758, + "epoch": 0.8722548102808241, "grad_norm": 0.0, - "learning_rate": 8.280621146212519e-07, - "loss": 0.643, + "learning_rate": 8.437614576436848e-07, + "loss": 0.8363, "step": 30781 }, { - "epoch": 0.8734960272417707, + "epoch": 0.8722831477230865, "grad_norm": 0.0, - "learning_rate": 8.276959546015429e-07, - "loss": 0.7706, + "learning_rate": 8.433925118685249e-07, + "loss": 0.801, "step": 30782 }, { - "epoch": 0.8735244040862656, + "epoch": 0.872311485165349, "grad_norm": 0.0, - "learning_rate": 8.273298720606993e-07, - "loss": 0.7343, + "learning_rate": 8.430236432228689e-07, + "loss": 0.8022, "step": 30783 }, { - "epoch": 0.8735527809307605, + "epoch": 0.8723398226076114, "grad_norm": 0.0, - "learning_rate": 8.269638670018121e-07, - "loss": 0.7631, + "learning_rate": 8.426548517098276e-07, + "loss": 0.7489, "step": 30784 }, { - "epoch": 0.8735811577752554, + "epoch": 0.8723681600498739, "grad_norm": 0.0, - "learning_rate": 8.265979394279732e-07, - "loss": 0.9211, + "learning_rate": 8.422861373325031e-07, + "loss": 0.8455, "step": 30785 }, { - "epoch": 0.8736095346197503, + "epoch": 0.8723964974921363, "grad_norm": 0.0, - "learning_rate": 8.262320893422759e-07, - "loss": 0.7629, + "learning_rate": 8.419175000940028e-07, + "loss": 0.7624, "step": 30786 }, { - "epoch": 0.8736379114642452, + "epoch": 0.8724248349343988, "grad_norm": 0.0, - "learning_rate": 8.258663167478065e-07, - "loss": 0.7369, + "learning_rate": 8.415489399974341e-07, + "loss": 0.8147, "step": 30787 }, { - "epoch": 0.87366628830874, + "epoch": 0.8724531723766613, "grad_norm": 0.0, - "learning_rate": 8.255006216476569e-07, - "loss": 0.8349, + "learning_rate": 8.411804570458981e-07, + "loss": 0.84, "step": 30788 }, { - "epoch": 0.8736946651532349, + "epoch": 0.8724815098189237, "grad_norm": 0.0, - "learning_rate": 8.25135004044918e-07, - "loss": 0.835, + "learning_rate": 8.408120512425e-07, + "loss": 0.9423, "step": 30789 }, { - "epoch": 0.8737230419977299, + "epoch": 0.8725098472611862, "grad_norm": 0.0, - "learning_rate": 8.24769463942674e-07, - "loss": 0.8059, + "learning_rate": 8.404437225903439e-07, + "loss": 0.8903, "step": 30790 }, { - "epoch": 0.8737514188422247, + "epoch": 0.8725381847034487, "grad_norm": 0.0, - "learning_rate": 8.244040013440147e-07, - "loss": 0.8506, + "learning_rate": 8.400754710925307e-07, + "loss": 0.6893, "step": 30791 }, { - "epoch": 0.8737797956867196, + "epoch": 0.8725665221457112, "grad_norm": 0.0, - "learning_rate": 8.240386162520298e-07, - "loss": 0.824, + "learning_rate": 8.397072967521658e-07, + "loss": 0.7889, "step": 30792 }, { - "epoch": 0.8738081725312146, + "epoch": 0.8725948595879736, "grad_norm": 0.0, - "learning_rate": 8.236733086698013e-07, - "loss": 0.8278, + "learning_rate": 8.393391995723454e-07, + "loss": 0.8421, "step": 30793 }, { - "epoch": 0.8738365493757094, + "epoch": 0.872623197030236, "grad_norm": 0.0, - "learning_rate": 8.233080786004167e-07, - "loss": 0.8368, + "learning_rate": 8.389711795561728e-07, + "loss": 0.7525, "step": 30794 }, { - "epoch": 0.8738649262202043, + "epoch": 0.8726515344724985, "grad_norm": 0.0, - "learning_rate": 8.229429260469623e-07, - "loss": 0.85, + "learning_rate": 8.386032367067498e-07, + "loss": 0.7009, "step": 30795 }, { - "epoch": 0.8738933030646993, + "epoch": 0.8726798719147609, "grad_norm": 0.0, - "learning_rate": 8.225778510125204e-07, - "loss": 0.8107, + "learning_rate": 8.382353710271718e-07, + "loss": 0.8015, "step": 30796 }, { - "epoch": 0.8739216799091941, + "epoch": 0.8727082093570234, "grad_norm": 0.0, - "learning_rate": 8.222128535001761e-07, - "loss": 0.8246, + "learning_rate": 8.378675825205407e-07, + "loss": 0.7569, "step": 30797 }, { - "epoch": 0.873950056753689, + "epoch": 0.8727365467992859, "grad_norm": 0.0, - "learning_rate": 8.218479335130136e-07, - "loss": 0.7978, + "learning_rate": 8.374998711899529e-07, + "loss": 0.9151, "step": 30798 }, { - "epoch": 0.8739784335981838, + "epoch": 0.8727648842415484, "grad_norm": 0.0, - "learning_rate": 8.214830910541116e-07, - "loss": 0.9156, + "learning_rate": 8.371322370385049e-07, + "loss": 0.7608, "step": 30799 }, { - "epoch": 0.8740068104426788, + "epoch": 0.8727932216838108, "grad_norm": 0.0, - "learning_rate": 8.211183261265554e-07, - "loss": 0.7498, + "learning_rate": 8.367646800692964e-07, + "loss": 0.7359, "step": 30800 }, { - "epoch": 0.8740351872871737, + "epoch": 0.8728215591260733, "grad_norm": 0.0, - "learning_rate": 8.20753638733428e-07, - "loss": 0.8253, + "learning_rate": 8.363972002854204e-07, + "loss": 0.7426, "step": 30801 }, { - "epoch": 0.8740635641316685, + "epoch": 0.8728498965683358, "grad_norm": 0.0, - "learning_rate": 8.203890288778049e-07, - "loss": 0.8255, + "learning_rate": 8.360297976899734e-07, + "loss": 0.822, "step": 30802 }, { - "epoch": 0.8740919409761635, + "epoch": 0.8728782340105982, "grad_norm": 0.0, - "learning_rate": 8.20024496562768e-07, - "loss": 0.8326, + "learning_rate": 8.356624722860507e-07, + "loss": 0.8512, "step": 30803 }, { - "epoch": 0.8741203178206584, + "epoch": 0.8729065714528607, "grad_norm": 0.0, - "learning_rate": 8.196600417913991e-07, - "loss": 0.7773, + "learning_rate": 8.352952240767453e-07, + "loss": 0.8187, "step": 30804 }, { - "epoch": 0.8741486946651532, + "epoch": 0.8729349088951232, "grad_norm": 0.0, - "learning_rate": 8.192956645667727e-07, - "loss": 0.8274, + "learning_rate": 8.349280530651538e-07, + "loss": 0.7996, "step": 30805 }, { - "epoch": 0.8741770715096481, + "epoch": 0.8729632463373855, "grad_norm": 0.0, - "learning_rate": 8.189313648919695e-07, - "loss": 0.7314, + "learning_rate": 8.345609592543647e-07, + "loss": 0.7562, "step": 30806 }, { - "epoch": 0.874205448354143, + "epoch": 0.872991583779648, "grad_norm": 0.0, - "learning_rate": 8.185671427700648e-07, - "loss": 0.7775, + "learning_rate": 8.341939426474721e-07, + "loss": 0.8064, "step": 30807 }, { - "epoch": 0.8742338251986379, + "epoch": 0.8730199212219105, "grad_norm": 0.0, - "learning_rate": 8.182029982041361e-07, - "loss": 0.8294, + "learning_rate": 8.338270032475693e-07, + "loss": 0.7777, "step": 30808 }, { - "epoch": 0.8742622020431328, + "epoch": 0.873048258664173, "grad_norm": 0.0, - "learning_rate": 8.178389311972612e-07, - "loss": 0.9185, + "learning_rate": 8.334601410577436e-07, + "loss": 0.7925, "step": 30809 }, { - "epoch": 0.8742905788876277, + "epoch": 0.8730765961064354, "grad_norm": 0.0, - "learning_rate": 8.174749417525119e-07, - "loss": 0.8025, + "learning_rate": 8.330933560810895e-07, + "loss": 0.7826, "step": 30810 }, { - "epoch": 0.8743189557321226, + "epoch": 0.8731049335486979, "grad_norm": 0.0, - "learning_rate": 8.171110298729645e-07, - "loss": 0.84, + "learning_rate": 8.32726648320692e-07, + "loss": 0.727, "step": 30811 }, { - "epoch": 0.8743473325766175, + "epoch": 0.8731332709909604, "grad_norm": 0.0, - "learning_rate": 8.167471955616946e-07, - "loss": 0.83, + "learning_rate": 8.32360017779642e-07, + "loss": 0.7533, "step": 30812 }, { - "epoch": 0.8743757094211124, + "epoch": 0.8731616084332228, "grad_norm": 0.0, - "learning_rate": 8.163834388217728e-07, - "loss": 0.789, + "learning_rate": 8.319934644610295e-07, + "loss": 0.8366, "step": 30813 }, { - "epoch": 0.8744040862656073, + "epoch": 0.8731899458754853, "grad_norm": 0.0, - "learning_rate": 8.160197596562702e-07, - "loss": 0.8269, + "learning_rate": 8.316269883679384e-07, + "loss": 0.7604, "step": 30814 }, { - "epoch": 0.8744324631101021, + "epoch": 0.8732182833177478, "grad_norm": 0.0, - "learning_rate": 8.156561580682665e-07, - "loss": 0.7746, + "learning_rate": 8.312605895034587e-07, + "loss": 0.7466, "step": 30815 }, { - "epoch": 0.874460839954597, + "epoch": 0.8732466207600103, "grad_norm": 0.0, - "learning_rate": 8.152926340608247e-07, - "loss": 0.794, + "learning_rate": 8.308942678706756e-07, + "loss": 0.7565, "step": 30816 }, { - "epoch": 0.874489216799092, + "epoch": 0.8732749582022726, "grad_norm": 0.0, - "learning_rate": 8.149291876370203e-07, - "loss": 0.8661, + "learning_rate": 8.305280234726743e-07, + "loss": 0.7723, "step": 30817 }, { - "epoch": 0.8745175936435868, + "epoch": 0.8733032956445351, "grad_norm": 0.0, - "learning_rate": 8.145658187999228e-07, - "loss": 0.7688, + "learning_rate": 8.301618563125424e-07, + "loss": 0.7731, "step": 30818 }, { - "epoch": 0.8745459704880817, + "epoch": 0.8733316330867976, "grad_norm": 0.0, - "learning_rate": 8.142025275525989e-07, - "loss": 0.8826, + "learning_rate": 8.297957663933609e-07, + "loss": 0.8455, "step": 30819 }, { - "epoch": 0.8745743473325767, + "epoch": 0.87335997052906, "grad_norm": 0.0, - "learning_rate": 8.138393138981193e-07, - "loss": 0.7918, + "learning_rate": 8.29429753718215e-07, + "loss": 0.8031, "step": 30820 }, { - "epoch": 0.8746027241770715, + "epoch": 0.8733883079713225, "grad_norm": 0.0, - "learning_rate": 8.134761778395539e-07, - "loss": 0.8359, + "learning_rate": 8.290638182901889e-07, + "loss": 0.8692, "step": 30821 }, { - "epoch": 0.8746311010215664, + "epoch": 0.873416645413585, "grad_norm": 0.0, - "learning_rate": 8.131131193799668e-07, - "loss": 0.8529, + "learning_rate": 8.286979601123623e-07, + "loss": 0.7728, "step": 30822 }, { - "epoch": 0.8746594778660612, + "epoch": 0.8734449828558475, "grad_norm": 0.0, - "learning_rate": 8.127501385224257e-07, - "loss": 0.7403, + "learning_rate": 8.283321791878185e-07, + "loss": 0.8217, "step": 30823 }, { - "epoch": 0.8746878547105562, + "epoch": 0.8734733202981099, "grad_norm": 0.0, - "learning_rate": 8.123872352699991e-07, - "loss": 0.7884, + "learning_rate": 8.279664755196404e-07, + "loss": 0.7692, "step": 30824 }, { - "epoch": 0.8747162315550511, + "epoch": 0.8735016577403724, "grad_norm": 0.0, - "learning_rate": 8.12024409625749e-07, - "loss": 0.9632, + "learning_rate": 8.276008491109056e-07, + "loss": 0.6743, "step": 30825 }, { - "epoch": 0.8747446083995459, + "epoch": 0.8735299951826349, "grad_norm": 0.0, - "learning_rate": 8.116616615927409e-07, - "loss": 0.6648, + "learning_rate": 8.27235299964696e-07, + "loss": 0.7969, "step": 30826 }, { - "epoch": 0.8747729852440409, + "epoch": 0.8735583326248972, "grad_norm": 0.0, - "learning_rate": 8.112989911740421e-07, - "loss": 0.7803, + "learning_rate": 8.26869828084087e-07, + "loss": 0.7775, "step": 30827 }, { - "epoch": 0.8748013620885358, + "epoch": 0.8735866700671597, "grad_norm": 0.0, - "learning_rate": 8.109363983727125e-07, - "loss": 0.766, + "learning_rate": 8.265044334721606e-07, + "loss": 0.7374, "step": 30828 }, { - "epoch": 0.8748297389330306, + "epoch": 0.8736150075094222, "grad_norm": 0.0, - "learning_rate": 8.105738831918153e-07, - "loss": 0.9154, + "learning_rate": 8.261391161319942e-07, + "loss": 0.8165, "step": 30829 }, { - "epoch": 0.8748581157775256, + "epoch": 0.8736433449516846, "grad_norm": 0.0, - "learning_rate": 8.102114456344145e-07, - "loss": 0.7935, + "learning_rate": 8.257738760666645e-07, + "loss": 0.8211, "step": 30830 }, { - "epoch": 0.8748864926220205, + "epoch": 0.8736716823939471, "grad_norm": 0.0, - "learning_rate": 8.098490857035702e-07, - "loss": 0.8595, + "learning_rate": 8.254087132792476e-07, + "loss": 0.7741, "step": 30831 }, { - "epoch": 0.8749148694665153, + "epoch": 0.8737000198362096, "grad_norm": 0.0, - "learning_rate": 8.094868034023462e-07, - "loss": 0.835, + "learning_rate": 8.250436277728224e-07, + "loss": 0.8948, "step": 30832 }, { - "epoch": 0.8749432463110102, + "epoch": 0.8737283572784721, "grad_norm": 0.0, - "learning_rate": 8.09124598733797e-07, - "loss": 0.8004, + "learning_rate": 8.246786195504597e-07, + "loss": 0.8164, "step": 30833 }, { - "epoch": 0.8749716231555051, + "epoch": 0.8737566947207345, "grad_norm": 0.0, - "learning_rate": 8.087624717009868e-07, - "loss": 0.7961, + "learning_rate": 8.243136886152381e-07, + "loss": 0.7258, "step": 30834 }, { - "epoch": 0.875, + "epoch": 0.873785032162997, "grad_norm": 0.0, - "learning_rate": 8.084004223069752e-07, - "loss": 0.8994, + "learning_rate": 8.239488349702285e-07, + "loss": 0.7448, "step": 30835 }, { - "epoch": 0.8750283768444949, + "epoch": 0.8738133696052595, "grad_norm": 0.0, - "learning_rate": 8.080384505548155e-07, - "loss": 0.879, + "learning_rate": 8.23584058618504e-07, + "loss": 0.7541, "step": 30836 }, { - "epoch": 0.8750567536889898, + "epoch": 0.8738417070475218, "grad_norm": 0.0, - "learning_rate": 8.076765564475686e-07, - "loss": 0.8522, + "learning_rate": 8.23219359563141e-07, + "loss": 0.7979, "step": 30837 }, { - "epoch": 0.8750851305334847, + "epoch": 0.8738700444897843, "grad_norm": 0.0, - "learning_rate": 8.07314739988293e-07, - "loss": 0.8292, + "learning_rate": 8.228547378072072e-07, + "loss": 0.8319, "step": 30838 }, { - "epoch": 0.8751135073779795, + "epoch": 0.8738983819320468, "grad_norm": 0.0, - "learning_rate": 8.06953001180042e-07, - "loss": 0.7334, + "learning_rate": 8.224901933537777e-07, + "loss": 0.7563, "step": 30839 }, { - "epoch": 0.8751418842224744, + "epoch": 0.8739267193743093, "grad_norm": 0.0, - "learning_rate": 8.065913400258718e-07, - "loss": 0.7527, + "learning_rate": 8.22125726205919e-07, + "loss": 0.8925, "step": 30840 }, { - "epoch": 0.8751702610669694, + "epoch": 0.8739550568165717, "grad_norm": 0.0, - "learning_rate": 8.06229756528839e-07, - "loss": 0.7972, + "learning_rate": 8.217613363667043e-07, + "loss": 0.7222, "step": 30841 }, { - "epoch": 0.8751986379114642, + "epoch": 0.8739833942588342, "grad_norm": 0.0, - "learning_rate": 8.058682506919946e-07, - "loss": 0.8046, + "learning_rate": 8.213970238392022e-07, + "loss": 0.8313, "step": 30842 }, { - "epoch": 0.8752270147559591, + "epoch": 0.8740117317010967, "grad_norm": 0.0, - "learning_rate": 8.055068225183959e-07, - "loss": 0.8608, + "learning_rate": 8.210327886264802e-07, + "loss": 0.8585, "step": 30843 }, { - "epoch": 0.8752553916004541, + "epoch": 0.8740400691433591, "grad_norm": 0.0, - "learning_rate": 8.051454720110952e-07, - "loss": 0.8573, + "learning_rate": 8.20668630731608e-07, + "loss": 0.8493, "step": 30844 }, { - "epoch": 0.8752837684449489, + "epoch": 0.8740684065856216, "grad_norm": 0.0, - "learning_rate": 8.047841991731409e-07, - "loss": 0.6901, + "learning_rate": 8.203045501576545e-07, + "loss": 0.8626, "step": 30845 }, { - "epoch": 0.8753121452894438, + "epoch": 0.8740967440278841, "grad_norm": 0.0, - "learning_rate": 8.044230040075907e-07, - "loss": 0.7782, + "learning_rate": 8.199405469076826e-07, + "loss": 0.8807, "step": 30846 }, { - "epoch": 0.8753405221339388, + "epoch": 0.8741250814701466, "grad_norm": 0.0, - "learning_rate": 8.040618865174909e-07, - "loss": 0.9429, + "learning_rate": 8.195766209847623e-07, + "loss": 0.9004, "step": 30847 }, { - "epoch": 0.8753688989784336, + "epoch": 0.8741534189124089, "grad_norm": 0.0, - "learning_rate": 8.037008467058948e-07, - "loss": 0.8097, + "learning_rate": 8.192127723919563e-07, + "loss": 0.6978, "step": 30848 }, { - "epoch": 0.8753972758229285, + "epoch": 0.8741817563546714, "grad_norm": 0.0, - "learning_rate": 8.033398845758522e-07, - "loss": 0.7748, + "learning_rate": 8.188490011323291e-07, + "loss": 0.8436, "step": 30849 }, { - "epoch": 0.8754256526674233, + "epoch": 0.8742100937969339, "grad_norm": 0.0, - "learning_rate": 8.029790001304094e-07, - "loss": 0.862, + "learning_rate": 8.184853072089482e-07, + "loss": 0.854, "step": 30850 }, { - "epoch": 0.8754540295119183, + "epoch": 0.8742384312391963, "grad_norm": 0.0, - "learning_rate": 8.026181933726162e-07, - "loss": 0.7122, + "learning_rate": 8.181216906248734e-07, + "loss": 0.731, "step": 30851 }, { - "epoch": 0.8754824063564132, + "epoch": 0.8742667686814588, "grad_norm": 0.0, - "learning_rate": 8.022574643055226e-07, - "loss": 0.7995, + "learning_rate": 8.17758151383169e-07, + "loss": 0.8032, "step": 30852 }, { - "epoch": 0.875510783200908, + "epoch": 0.8742951061237213, "grad_norm": 0.0, - "learning_rate": 8.018968129321714e-07, - "loss": 0.7852, + "learning_rate": 8.173946894869001e-07, + "loss": 0.8477, "step": 30853 }, { - "epoch": 0.875539160045403, + "epoch": 0.8743234435659837, "grad_norm": 0.0, - "learning_rate": 8.015362392556114e-07, - "loss": 0.838, + "learning_rate": 8.170313049391221e-07, + "loss": 0.6806, "step": 30854 }, { - "epoch": 0.8755675368898979, + "epoch": 0.8743517810082462, "grad_norm": 0.0, - "learning_rate": 8.011757432788902e-07, - "loss": 0.8386, + "learning_rate": 8.166679977429004e-07, + "loss": 0.8575, "step": 30855 }, { - "epoch": 0.8755959137343927, + "epoch": 0.8743801184505087, "grad_norm": 0.0, - "learning_rate": 8.008153250050499e-07, - "loss": 0.9053, + "learning_rate": 8.163047679012948e-07, + "loss": 0.7542, "step": 30856 }, { - "epoch": 0.8756242905788876, + "epoch": 0.8744084558927712, "grad_norm": 0.0, - "learning_rate": 8.004549844371357e-07, - "loss": 0.7743, + "learning_rate": 8.159416154173638e-07, + "loss": 0.7783, "step": 30857 }, { - "epoch": 0.8756526674233825, + "epoch": 0.8744367933350335, "grad_norm": 0.0, - "learning_rate": 8.000947215781929e-07, - "loss": 0.8594, + "learning_rate": 8.155785402941685e-07, + "loss": 0.8151, "step": 30858 }, { - "epoch": 0.8756810442678774, + "epoch": 0.874465130777296, "grad_norm": 0.0, - "learning_rate": 7.997345364312626e-07, - "loss": 0.7351, + "learning_rate": 8.152155425347652e-07, + "loss": 0.7185, "step": 30859 }, { - "epoch": 0.8757094211123723, + "epoch": 0.8744934682195585, "grad_norm": 0.0, - "learning_rate": 7.993744289993876e-07, - "loss": 0.867, + "learning_rate": 8.148526221422104e-07, + "loss": 0.742, "step": 30860 }, { - "epoch": 0.8757377979568672, + "epoch": 0.8745218056618209, "grad_norm": 0.0, - "learning_rate": 7.990143992856114e-07, - "loss": 0.7677, + "learning_rate": 8.144897791195661e-07, + "loss": 0.8684, "step": 30861 }, { - "epoch": 0.8757661748013621, + "epoch": 0.8745501431040834, "grad_norm": 0.0, - "learning_rate": 7.986544472929736e-07, - "loss": 0.8368, + "learning_rate": 8.14127013469882e-07, + "loss": 0.8797, "step": 30862 }, { - "epoch": 0.875794551645857, + "epoch": 0.8745784805463459, "grad_norm": 0.0, - "learning_rate": 7.982945730245162e-07, - "loss": 0.6977, + "learning_rate": 8.137643251962202e-07, + "loss": 0.8357, "step": 30863 }, { - "epoch": 0.8758229284903519, + "epoch": 0.8746068179886084, "grad_norm": 0.0, - "learning_rate": 7.979347764832779e-07, - "loss": 0.8156, + "learning_rate": 8.134017143016304e-07, + "loss": 0.907, "step": 30864 }, { - "epoch": 0.8758513053348468, + "epoch": 0.8746351554308708, "grad_norm": 0.0, - "learning_rate": 7.975750576722974e-07, - "loss": 0.9587, + "learning_rate": 8.130391807891691e-07, + "loss": 0.8304, "step": 30865 }, { - "epoch": 0.8758796821793416, + "epoch": 0.8746634928731333, "grad_norm": 0.0, - "learning_rate": 7.972154165946155e-07, - "loss": 0.7287, + "learning_rate": 8.126767246618927e-07, + "loss": 0.9608, "step": 30866 }, { - "epoch": 0.8759080590238365, + "epoch": 0.8746918303153958, "grad_norm": 0.0, - "learning_rate": 7.968558532532677e-07, - "loss": 0.7894, + "learning_rate": 8.1231434592285e-07, + "loss": 0.8288, "step": 30867 }, { - "epoch": 0.8759364358683315, + "epoch": 0.8747201677576582, "grad_norm": 0.0, - "learning_rate": 7.964963676512915e-07, - "loss": 0.8428, + "learning_rate": 8.11952044575095e-07, + "loss": 0.7871, "step": 30868 }, { - "epoch": 0.8759648127128263, + "epoch": 0.8747485051999206, "grad_norm": 0.0, - "learning_rate": 7.961369597917268e-07, - "loss": 0.775, + "learning_rate": 8.1158982062168e-07, + "loss": 0.91, "step": 30869 }, { - "epoch": 0.8759931895573212, + "epoch": 0.8747768426421831, "grad_norm": 0.0, - "learning_rate": 7.957776296776043e-07, - "loss": 0.8506, + "learning_rate": 8.112276740656555e-07, + "loss": 0.7605, "step": 30870 }, { - "epoch": 0.8760215664018162, + "epoch": 0.8748051800844456, "grad_norm": 0.0, - "learning_rate": 7.954183773119628e-07, - "loss": 0.8162, + "learning_rate": 8.10865604910075e-07, + "loss": 0.8262, "step": 30871 }, { - "epoch": 0.876049943246311, + "epoch": 0.874833517526708, "grad_norm": 0.0, - "learning_rate": 7.950592026978377e-07, - "loss": 0.8545, + "learning_rate": 8.105036131579835e-07, + "loss": 0.8399, "step": 30872 }, { - "epoch": 0.8760783200908059, + "epoch": 0.8748618549689705, "grad_norm": 0.0, - "learning_rate": 7.947001058382586e-07, - "loss": 0.7322, + "learning_rate": 8.101416988124333e-07, + "loss": 0.7096, "step": 30873 }, { - "epoch": 0.8761066969353007, + "epoch": 0.874890192411233, "grad_norm": 0.0, - "learning_rate": 7.943410867362622e-07, - "loss": 0.6706, + "learning_rate": 8.097798618764729e-07, + "loss": 0.8527, "step": 30874 }, { - "epoch": 0.8761350737797957, + "epoch": 0.8749185298534954, "grad_norm": 0.0, - "learning_rate": 7.939821453948826e-07, - "loss": 0.8378, + "learning_rate": 8.094181023531478e-07, + "loss": 0.6894, "step": 30875 }, { - "epoch": 0.8761634506242906, + "epoch": 0.8749468672957579, "grad_norm": 0.0, - "learning_rate": 7.936232818171453e-07, - "loss": 0.8065, + "learning_rate": 8.090564202455087e-07, + "loss": 0.8452, "step": 30876 }, { - "epoch": 0.8761918274687854, + "epoch": 0.8749752047380204, "grad_norm": 0.0, - "learning_rate": 7.932644960060898e-07, - "loss": 0.7721, + "learning_rate": 8.086948155565988e-07, + "loss": 0.9098, "step": 30877 }, { - "epoch": 0.8762202043132804, + "epoch": 0.8750035421802828, "grad_norm": 0.0, - "learning_rate": 7.929057879647417e-07, - "loss": 0.7579, + "learning_rate": 8.083332882894657e-07, + "loss": 0.8037, "step": 30878 }, { - "epoch": 0.8762485811577753, + "epoch": 0.8750318796225453, "grad_norm": 0.0, - "learning_rate": 7.925471576961319e-07, - "loss": 0.6696, + "learning_rate": 8.079718384471557e-07, + "loss": 0.8526, "step": 30879 }, { - "epoch": 0.8762769580022701, + "epoch": 0.8750602170648077, "grad_norm": 0.0, - "learning_rate": 7.921886052032913e-07, - "loss": 0.8781, + "learning_rate": 8.076104660327111e-07, + "loss": 0.7828, "step": 30880 }, { - "epoch": 0.8763053348467651, + "epoch": 0.8750885545070702, "grad_norm": 0.0, - "learning_rate": 7.918301304892461e-07, - "loss": 0.8037, + "learning_rate": 8.07249171049177e-07, + "loss": 0.8048, "step": 30881 }, { - "epoch": 0.87633371169126, + "epoch": 0.8751168919493326, "grad_norm": 0.0, - "learning_rate": 7.914717335570266e-07, - "loss": 0.7478, + "learning_rate": 8.068879534995966e-07, + "loss": 0.8508, "step": 30882 }, { - "epoch": 0.8763620885357548, + "epoch": 0.8751452293915951, "grad_norm": 0.0, - "learning_rate": 7.9111341440966e-07, - "loss": 0.7444, + "learning_rate": 8.06526813387013e-07, + "loss": 0.721, "step": 30883 }, { - "epoch": 0.8763904653802497, + "epoch": 0.8751735668338576, "grad_norm": 0.0, - "learning_rate": 7.907551730501717e-07, - "loss": 0.7817, + "learning_rate": 8.061657507144705e-07, + "loss": 0.8739, "step": 30884 }, { - "epoch": 0.8764188422247446, + "epoch": 0.87520190427612, "grad_norm": 0.0, - "learning_rate": 7.903970094815872e-07, - "loss": 0.8155, + "learning_rate": 8.058047654850043e-07, + "loss": 0.8188, "step": 30885 }, { - "epoch": 0.8764472190692395, + "epoch": 0.8752302417183825, "grad_norm": 0.0, - "learning_rate": 7.900389237069361e-07, - "loss": 0.7862, + "learning_rate": 8.054438577016599e-07, + "loss": 0.784, "step": 30886 }, { - "epoch": 0.8764755959137344, + "epoch": 0.875258579160645, "grad_norm": 0.0, - "learning_rate": 7.896809157292373e-07, - "loss": 0.9027, + "learning_rate": 8.050830273674781e-07, + "loss": 0.7527, "step": 30887 }, { - "epoch": 0.8765039727582293, + "epoch": 0.8752869166029075, "grad_norm": 0.0, - "learning_rate": 7.893229855515184e-07, - "loss": 0.8271, + "learning_rate": 8.047222744854943e-07, + "loss": 0.8623, "step": 30888 }, { - "epoch": 0.8765323496027242, + "epoch": 0.8753152540451699, "grad_norm": 0.0, - "learning_rate": 7.889651331768045e-07, - "loss": 0.9244, + "learning_rate": 8.043615990587495e-07, + "loss": 0.7459, "step": 30889 }, { - "epoch": 0.876560726447219, + "epoch": 0.8753435914874323, "grad_norm": 0.0, - "learning_rate": 7.886073586081133e-07, - "loss": 0.6976, + "learning_rate": 8.040010010902832e-07, + "loss": 0.7546, "step": 30890 }, { - "epoch": 0.8765891032917139, + "epoch": 0.8753719289296948, "grad_norm": 0.0, - "learning_rate": 7.882496618484714e-07, - "loss": 0.8876, + "learning_rate": 8.036404805831299e-07, + "loss": 0.7652, "step": 30891 }, { - "epoch": 0.8766174801362089, + "epoch": 0.8754002663719572, "grad_norm": 0.0, - "learning_rate": 7.878920429008974e-07, - "loss": 0.914, + "learning_rate": 8.032800375403271e-07, + "loss": 0.7868, "step": 30892 }, { - "epoch": 0.8766458569807037, + "epoch": 0.8754286038142197, "grad_norm": 0.0, - "learning_rate": 7.875345017684144e-07, - "loss": 0.7234, + "learning_rate": 8.029196719649135e-07, + "loss": 0.923, "step": 30893 }, { - "epoch": 0.8766742338251986, + "epoch": 0.8754569412564822, "grad_norm": 0.0, - "learning_rate": 7.871770384540423e-07, - "loss": 0.8349, + "learning_rate": 8.025593838599221e-07, + "loss": 0.847, "step": 30894 }, { - "epoch": 0.8767026106696936, + "epoch": 0.8754852786987447, "grad_norm": 0.0, - "learning_rate": 7.868196529607985e-07, - "loss": 0.7433, + "learning_rate": 8.021991732283874e-07, + "loss": 0.9013, "step": 30895 }, { - "epoch": 0.8767309875141884, + "epoch": 0.8755136161410071, "grad_norm": 0.0, - "learning_rate": 7.864623452917031e-07, - "loss": 0.784, + "learning_rate": 8.018390400733444e-07, + "loss": 0.6446, "step": 30896 }, { - "epoch": 0.8767593643586833, + "epoch": 0.8755419535832696, "grad_norm": 0.0, - "learning_rate": 7.861051154497767e-07, - "loss": 0.8125, + "learning_rate": 8.014789843978288e-07, + "loss": 0.8192, "step": 30897 }, { - "epoch": 0.8767877412031783, + "epoch": 0.8755702910255321, "grad_norm": 0.0, - "learning_rate": 7.857479634380327e-07, - "loss": 0.8155, + "learning_rate": 8.011190062048701e-07, + "loss": 0.8712, "step": 30898 }, { - "epoch": 0.8768161180476731, + "epoch": 0.8755986284677945, "grad_norm": 0.0, - "learning_rate": 7.853908892594897e-07, - "loss": 0.7732, + "learning_rate": 8.007591054975016e-07, + "loss": 0.806, "step": 30899 }, { - "epoch": 0.876844494892168, + "epoch": 0.875626965910057, "grad_norm": 0.0, - "learning_rate": 7.850338929171663e-07, - "loss": 0.7864, + "learning_rate": 8.003992822787576e-07, + "loss": 0.8154, "step": 30900 }, { - "epoch": 0.8768728717366628, + "epoch": 0.8756553033523194, "grad_norm": 0.0, - "learning_rate": 7.846769744140736e-07, - "loss": 0.7408, + "learning_rate": 8.000395365516644e-07, + "loss": 0.8441, "step": 30901 }, { - "epoch": 0.8769012485811578, + "epoch": 0.8756836407945818, "grad_norm": 0.0, - "learning_rate": 7.843201337532292e-07, - "loss": 0.7157, + "learning_rate": 7.996798683192553e-07, + "loss": 0.9037, "step": 30902 }, { - "epoch": 0.8769296254256527, + "epoch": 0.8757119782368443, "grad_norm": 0.0, - "learning_rate": 7.839633709376481e-07, - "loss": 0.789, + "learning_rate": 7.993202775845599e-07, + "loss": 0.7753, "step": 30903 }, { - "epoch": 0.8769580022701475, + "epoch": 0.8757403156791068, "grad_norm": 0.0, - "learning_rate": 7.836066859703406e-07, - "loss": 0.8169, + "learning_rate": 7.989607643506048e-07, + "loss": 0.8172, "step": 30904 }, { - "epoch": 0.8769863791146425, + "epoch": 0.8757686531213693, "grad_norm": 0.0, - "learning_rate": 7.832500788543229e-07, - "loss": 0.8188, + "learning_rate": 7.986013286204197e-07, + "loss": 0.9255, "step": 30905 }, { - "epoch": 0.8770147559591374, + "epoch": 0.8757969905636317, "grad_norm": 0.0, - "learning_rate": 7.828935495926048e-07, - "loss": 0.8241, + "learning_rate": 7.982419703970323e-07, + "loss": 0.8601, "step": 30906 }, { - "epoch": 0.8770431328036322, + "epoch": 0.8758253280058942, "grad_norm": 0.0, - "learning_rate": 7.825370981882008e-07, - "loss": 0.8167, + "learning_rate": 7.978826896834702e-07, + "loss": 0.8894, "step": 30907 }, { - "epoch": 0.8770715096481271, + "epoch": 0.8758536654481567, "grad_norm": 0.0, - "learning_rate": 7.821807246441193e-07, - "loss": 0.8018, + "learning_rate": 7.975234864827619e-07, + "loss": 0.7941, "step": 30908 }, { - "epoch": 0.877099886492622, + "epoch": 0.8758820028904191, "grad_norm": 0.0, - "learning_rate": 7.818244289633736e-07, - "loss": 0.8621, + "learning_rate": 7.971643607979273e-07, + "loss": 0.7818, "step": 30909 }, { - "epoch": 0.8771282633371169, + "epoch": 0.8759103403326816, "grad_norm": 0.0, - "learning_rate": 7.814682111489691e-07, - "loss": 0.8508, + "learning_rate": 7.968053126319953e-07, + "loss": 0.8166, "step": 30910 }, { - "epoch": 0.8771566401816118, + "epoch": 0.875938677774944, "grad_norm": 0.0, - "learning_rate": 7.811120712039178e-07, - "loss": 0.8101, + "learning_rate": 7.96446341987992e-07, + "loss": 0.8409, "step": 30911 }, { - "epoch": 0.8771850170261067, + "epoch": 0.8759670152172065, "grad_norm": 0.0, - "learning_rate": 7.807560091312283e-07, - "loss": 0.8514, + "learning_rate": 7.960874488689363e-07, + "loss": 0.8081, "step": 30912 }, { - "epoch": 0.8772133938706016, + "epoch": 0.8759953526594689, "grad_norm": 0.0, - "learning_rate": 7.804000249339061e-07, - "loss": 0.8801, + "learning_rate": 7.957286332778558e-07, + "loss": 0.744, "step": 30913 }, { - "epoch": 0.8772417707150965, + "epoch": 0.8760236901017314, "grad_norm": 0.0, - "learning_rate": 7.800441186149599e-07, - "loss": 0.7429, + "learning_rate": 7.953698952177691e-07, + "loss": 0.8441, "step": 30914 }, { - "epoch": 0.8772701475595914, + "epoch": 0.8760520275439939, "grad_norm": 0.0, - "learning_rate": 7.79688290177395e-07, - "loss": 0.7716, + "learning_rate": 7.950112346917004e-07, + "loss": 0.93, "step": 30915 }, { - "epoch": 0.8772985244040863, + "epoch": 0.8760803649862563, "grad_norm": 0.0, - "learning_rate": 7.793325396242169e-07, - "loss": 0.772, + "learning_rate": 7.94652651702671e-07, + "loss": 0.7751, "step": 30916 }, { - "epoch": 0.8773269012485811, + "epoch": 0.8761087024285188, "grad_norm": 0.0, - "learning_rate": 7.78976866958433e-07, - "loss": 0.8399, + "learning_rate": 7.942941462536991e-07, + "loss": 0.8565, "step": 30917 }, { - "epoch": 0.877355278093076, + "epoch": 0.8761370398707813, "grad_norm": 0.0, - "learning_rate": 7.786212721830455e-07, - "loss": 0.7786, + "learning_rate": 7.939357183478069e-07, + "loss": 0.8511, "step": 30918 }, { - "epoch": 0.877383654937571, + "epoch": 0.8761653773130437, "grad_norm": 0.0, - "learning_rate": 7.782657553010575e-07, - "loss": 0.7877, + "learning_rate": 7.935773679880121e-07, + "loss": 0.88, "step": 30919 }, { - "epoch": 0.8774120317820658, + "epoch": 0.8761937147553062, "grad_norm": 0.0, - "learning_rate": 7.779103163154756e-07, - "loss": 0.7516, + "learning_rate": 7.932190951773344e-07, + "loss": 0.8878, "step": 30920 }, { - "epoch": 0.8774404086265607, + "epoch": 0.8762220521975687, "grad_norm": 0.0, - "learning_rate": 7.775549552292972e-07, - "loss": 1.0217, + "learning_rate": 7.928608999187925e-07, + "loss": 0.7878, "step": 30921 }, { - "epoch": 0.8774687854710557, + "epoch": 0.8762503896398312, "grad_norm": 0.0, - "learning_rate": 7.771996720455255e-07, - "loss": 0.8386, + "learning_rate": 7.925027822153997e-07, + "loss": 0.7765, "step": 30922 }, { - "epoch": 0.8774971623155505, + "epoch": 0.8762787270820935, "grad_norm": 0.0, - "learning_rate": 7.76844466767166e-07, - "loss": 0.8236, + "learning_rate": 7.921447420701755e-07, + "loss": 0.8335, "step": 30923 }, { - "epoch": 0.8775255391600454, + "epoch": 0.876307064524356, "grad_norm": 0.0, - "learning_rate": 7.76489339397215e-07, - "loss": 0.8245, + "learning_rate": 7.917867794861378e-07, + "loss": 0.7573, "step": 30924 }, { - "epoch": 0.8775539160045402, + "epoch": 0.8763354019666185, "grad_norm": 0.0, - "learning_rate": 7.761342899386725e-07, - "loss": 0.8139, + "learning_rate": 7.914288944662973e-07, + "loss": 0.741, "step": 30925 }, { - "epoch": 0.8775822928490352, + "epoch": 0.8763637394088809, "grad_norm": 0.0, - "learning_rate": 7.757793183945395e-07, - "loss": 0.7612, + "learning_rate": 7.910710870136729e-07, + "loss": 0.7058, "step": 30926 }, { - "epoch": 0.8776106696935301, + "epoch": 0.8763920768511434, "grad_norm": 0.0, - "learning_rate": 7.754244247678122e-07, - "loss": 0.8871, + "learning_rate": 7.907133571312742e-07, + "loss": 0.8421, "step": 30927 }, { - "epoch": 0.8776390465380249, + "epoch": 0.8764204142934059, "grad_norm": 0.0, - "learning_rate": 7.750696090614895e-07, - "loss": 0.7878, + "learning_rate": 7.903557048221167e-07, + "loss": 0.8385, "step": 30928 }, { - "epoch": 0.8776674233825199, + "epoch": 0.8764487517356684, "grad_norm": 0.0, - "learning_rate": 7.747148712785701e-07, - "loss": 0.7469, + "learning_rate": 7.899981300892145e-07, + "loss": 0.737, "step": 30929 }, { - "epoch": 0.8776958002270148, + "epoch": 0.8764770891779308, "grad_norm": 0.0, - "learning_rate": 7.743602114220472e-07, - "loss": 0.7568, + "learning_rate": 7.896406329355766e-07, + "loss": 0.7677, "step": 30930 }, { - "epoch": 0.8777241770715096, + "epoch": 0.8765054266201933, "grad_norm": 0.0, - "learning_rate": 7.740056294949183e-07, - "loss": 0.8555, + "learning_rate": 7.892832133642159e-07, + "loss": 0.7902, "step": 30931 }, { - "epoch": 0.8777525539160045, + "epoch": 0.8765337640624558, "grad_norm": 0.0, - "learning_rate": 7.7365112550018e-07, - "loss": 0.9123, + "learning_rate": 7.889258713781434e-07, + "loss": 0.8621, "step": 30932 }, { - "epoch": 0.8777809307604995, + "epoch": 0.8765621015047181, "grad_norm": 0.0, - "learning_rate": 7.732966994408242e-07, - "loss": 0.788, + "learning_rate": 7.885686069803678e-07, + "loss": 0.7804, "step": 30933 }, { - "epoch": 0.8778093076049943, + "epoch": 0.8765904389469806, "grad_norm": 0.0, - "learning_rate": 7.729423513198453e-07, - "loss": 0.7563, + "learning_rate": 7.882114201739022e-07, + "loss": 0.833, "step": 30934 }, { - "epoch": 0.8778376844494892, + "epoch": 0.8766187763892431, "grad_norm": 0.0, - "learning_rate": 7.725880811402397e-07, - "loss": 0.8025, + "learning_rate": 7.878543109617498e-07, + "loss": 0.8356, "step": 30935 }, { - "epoch": 0.8778660612939841, + "epoch": 0.8766471138315056, "grad_norm": 0.0, - "learning_rate": 7.722338889049952e-07, - "loss": 0.7649, + "learning_rate": 7.874972793469216e-07, + "loss": 0.6792, "step": 30936 }, { - "epoch": 0.877894438138479, + "epoch": 0.876675451273768, "grad_norm": 0.0, - "learning_rate": 7.718797746171058e-07, - "loss": 0.8349, + "learning_rate": 7.871403253324272e-07, + "loss": 0.7862, "step": 30937 }, { - "epoch": 0.8779228149829739, + "epoch": 0.8767037887160305, "grad_norm": 0.0, - "learning_rate": 7.715257382795627e-07, - "loss": 0.7017, + "learning_rate": 7.86783448921269e-07, + "loss": 0.7868, "step": 30938 }, { - "epoch": 0.8779511918274688, + "epoch": 0.876732126158293, "grad_norm": 0.0, - "learning_rate": 7.711717798953566e-07, - "loss": 0.7736, + "learning_rate": 7.864266501164541e-07, + "loss": 0.794, "step": 30939 }, { - "epoch": 0.8779795686719637, + "epoch": 0.8767604636005554, "grad_norm": 0.0, - "learning_rate": 7.708178994674787e-07, - "loss": 0.8083, + "learning_rate": 7.860699289209917e-07, + "loss": 0.8351, "step": 30940 }, { - "epoch": 0.8780079455164586, + "epoch": 0.8767888010428179, "grad_norm": 0.0, - "learning_rate": 7.704640969989163e-07, - "loss": 0.7417, + "learning_rate": 7.857132853378813e-07, + "loss": 0.8063, "step": 30941 }, { - "epoch": 0.8780363223609534, + "epoch": 0.8768171384850804, "grad_norm": 0.0, - "learning_rate": 7.701103724926573e-07, - "loss": 0.6993, + "learning_rate": 7.853567193701317e-07, + "loss": 0.8833, "step": 30942 }, { - "epoch": 0.8780646992054484, + "epoch": 0.8768454759273427, "grad_norm": 0.0, - "learning_rate": 7.697567259516936e-07, - "loss": 0.6878, + "learning_rate": 7.850002310207428e-07, + "loss": 0.7169, "step": 30943 }, { - "epoch": 0.8780930760499432, + "epoch": 0.8768738133696052, "grad_norm": 0.0, - "learning_rate": 7.694031573790072e-07, - "loss": 0.7807, + "learning_rate": 7.846438202927187e-07, + "loss": 0.7746, "step": 30944 }, { - "epoch": 0.8781214528944381, + "epoch": 0.8769021508118677, "grad_norm": 0.0, - "learning_rate": 7.690496667775882e-07, - "loss": 0.7584, + "learning_rate": 7.842874871890615e-07, + "loss": 0.8215, "step": 30945 }, { - "epoch": 0.8781498297389331, + "epoch": 0.8769304882541302, "grad_norm": 0.0, - "learning_rate": 7.686962541504228e-07, - "loss": 0.7783, + "learning_rate": 7.839312317127734e-07, + "loss": 0.875, "step": 30946 }, { - "epoch": 0.8781782065834279, + "epoch": 0.8769588256963926, "grad_norm": 0.0, - "learning_rate": 7.683429195004932e-07, - "loss": 0.7449, + "learning_rate": 7.835750538668563e-07, + "loss": 0.7836, "step": 30947 }, { - "epoch": 0.8782065834279228, + "epoch": 0.8769871631386551, "grad_norm": 0.0, - "learning_rate": 7.679896628307859e-07, - "loss": 0.8104, + "learning_rate": 7.832189536543067e-07, + "loss": 0.7994, "step": 30948 }, { - "epoch": 0.8782349602724177, + "epoch": 0.8770155005809176, "grad_norm": 0.0, - "learning_rate": 7.676364841442874e-07, - "loss": 0.7661, + "learning_rate": 7.828629310781266e-07, + "loss": 0.8072, "step": 30949 }, { - "epoch": 0.8782633371169126, + "epoch": 0.87704383802318, "grad_norm": 0.0, - "learning_rate": 7.672833834439764e-07, - "loss": 0.7256, + "learning_rate": 7.82506986141317e-07, + "loss": 0.8545, "step": 30950 }, { - "epoch": 0.8782917139614075, + "epoch": 0.8770721754654425, "grad_norm": 0.0, - "learning_rate": 7.669303607328371e-07, - "loss": 0.7092, + "learning_rate": 7.821511188468723e-07, + "loss": 0.7961, "step": 30951 }, { - "epoch": 0.8783200908059023, + "epoch": 0.877100512907705, "grad_norm": 0.0, - "learning_rate": 7.665774160138528e-07, - "loss": 0.87, + "learning_rate": 7.817953291977908e-07, + "loss": 0.8611, "step": 30952 }, { - "epoch": 0.8783484676503973, + "epoch": 0.8771288503499675, "grad_norm": 0.0, - "learning_rate": 7.662245492900045e-07, - "loss": 0.7244, + "learning_rate": 7.814396171970729e-07, + "loss": 0.6945, "step": 30953 }, { - "epoch": 0.8783768444948922, + "epoch": 0.8771571877922298, "grad_norm": 0.0, - "learning_rate": 7.658717605642729e-07, - "loss": 0.8015, + "learning_rate": 7.810839828477101e-07, + "loss": 0.7793, "step": 30954 }, { - "epoch": 0.878405221339387, + "epoch": 0.8771855252344923, "grad_norm": 0.0, - "learning_rate": 7.65519049839637e-07, - "loss": 0.7215, + "learning_rate": 7.807284261527015e-07, + "loss": 0.8228, "step": 30955 }, { - "epoch": 0.878433598183882, + "epoch": 0.8772138626767548, "grad_norm": 0.0, - "learning_rate": 7.651664171190765e-07, - "loss": 0.8345, + "learning_rate": 7.803729471150401e-07, + "loss": 0.7364, "step": 30956 }, { - "epoch": 0.8784619750283769, + "epoch": 0.8772422001190172, "grad_norm": 0.0, - "learning_rate": 7.648138624055724e-07, - "loss": 0.8436, + "learning_rate": 7.800175457377213e-07, + "loss": 0.8452, "step": 30957 }, { - "epoch": 0.8784903518728717, + "epoch": 0.8772705375612797, "grad_norm": 0.0, - "learning_rate": 7.644613857020988e-07, - "loss": 0.7752, + "learning_rate": 7.796622220237371e-07, + "loss": 0.8568, "step": 30958 }, { - "epoch": 0.8785187287173666, + "epoch": 0.8772988750035422, "grad_norm": 0.0, - "learning_rate": 7.641089870116347e-07, - "loss": 0.832, + "learning_rate": 7.79306975976083e-07, + "loss": 0.8328, "step": 30959 }, { - "epoch": 0.8785471055618616, + "epoch": 0.8773272124458047, "grad_norm": 0.0, - "learning_rate": 7.637566663371598e-07, - "loss": 0.7076, + "learning_rate": 7.789518075977498e-07, + "loss": 0.8232, "step": 30960 }, { - "epoch": 0.8785754824063564, + "epoch": 0.8773555498880671, "grad_norm": 0.0, - "learning_rate": 7.634044236816451e-07, - "loss": 0.9467, + "learning_rate": 7.785967168917319e-07, + "loss": 0.8373, "step": 30961 }, { - "epoch": 0.8786038592508513, + "epoch": 0.8773838873303296, "grad_norm": 0.0, - "learning_rate": 7.630522590480693e-07, - "loss": 0.8483, + "learning_rate": 7.782417038610158e-07, + "loss": 0.7657, "step": 30962 }, { - "epoch": 0.8786322360953462, + "epoch": 0.8774122247725921, "grad_norm": 0.0, - "learning_rate": 7.627001724394067e-07, - "loss": 0.8597, + "learning_rate": 7.778867685085956e-07, + "loss": 0.9094, "step": 30963 }, { - "epoch": 0.8786606129398411, + "epoch": 0.8774405622148544, "grad_norm": 0.0, - "learning_rate": 7.623481638586305e-07, - "loss": 0.8462, + "learning_rate": 7.77531910837459e-07, + "loss": 0.7997, "step": 30964 }, { - "epoch": 0.878688989784336, + "epoch": 0.8774688996571169, "grad_norm": 0.0, - "learning_rate": 7.619962333087138e-07, - "loss": 0.8569, + "learning_rate": 7.771771308505949e-07, + "loss": 0.8588, "step": 30965 }, { - "epoch": 0.8787173666288308, + "epoch": 0.8774972370993794, "grad_norm": 0.0, - "learning_rate": 7.616443807926321e-07, - "loss": 0.8732, + "learning_rate": 7.768224285509951e-07, + "loss": 0.7821, "step": 30966 }, { - "epoch": 0.8787457434733258, + "epoch": 0.8775255745416418, "grad_norm": 0.0, - "learning_rate": 7.612926063133541e-07, - "loss": 0.683, + "learning_rate": 7.764678039416429e-07, + "loss": 0.8618, "step": 30967 }, { - "epoch": 0.8787741203178207, + "epoch": 0.8775539119839043, "grad_norm": 0.0, - "learning_rate": 7.609409098738519e-07, - "loss": 0.8092, + "learning_rate": 7.761132570255281e-07, + "loss": 0.742, "step": 30968 }, { - "epoch": 0.8788024971623155, + "epoch": 0.8775822494261668, "grad_norm": 0.0, - "learning_rate": 7.605892914770985e-07, - "loss": 0.7726, + "learning_rate": 7.757587878056372e-07, + "loss": 0.8156, "step": 30969 }, { - "epoch": 0.8788308740068105, + "epoch": 0.8776105868684293, "grad_norm": 0.0, - "learning_rate": 7.602377511260617e-07, - "loss": 0.7257, + "learning_rate": 7.754043962849545e-07, + "loss": 0.8437, "step": 30970 }, { - "epoch": 0.8788592508513053, + "epoch": 0.8776389243106917, "grad_norm": 0.0, - "learning_rate": 7.598862888237146e-07, - "loss": 0.8171, + "learning_rate": 7.750500824664653e-07, + "loss": 0.7639, "step": 30971 }, { - "epoch": 0.8788876276958002, + "epoch": 0.8776672617529542, "grad_norm": 0.0, - "learning_rate": 7.595349045730205e-07, - "loss": 0.7205, + "learning_rate": 7.746958463531551e-07, + "loss": 0.8253, "step": 30972 }, { - "epoch": 0.8789160045402952, + "epoch": 0.8776955991952167, "grad_norm": 0.0, - "learning_rate": 7.591835983769513e-07, - "loss": 0.7946, + "learning_rate": 7.74341687948007e-07, + "loss": 0.8647, "step": 30973 }, { - "epoch": 0.87894438138479, + "epoch": 0.877723936637479, "grad_norm": 0.0, - "learning_rate": 7.588323702384748e-07, - "loss": 0.8581, + "learning_rate": 7.739876072540076e-07, + "loss": 0.8323, "step": 30974 }, { - "epoch": 0.8789727582292849, + "epoch": 0.8777522740797415, "grad_norm": 0.0, - "learning_rate": 7.584812201605562e-07, - "loss": 0.7941, + "learning_rate": 7.736336042741344e-07, + "loss": 0.8237, "step": 30975 }, { - "epoch": 0.8790011350737797, + "epoch": 0.877780611522004, "grad_norm": 0.0, - "learning_rate": 7.581301481461611e-07, - "loss": 0.8621, + "learning_rate": 7.732796790113728e-07, + "loss": 0.8755, "step": 30976 }, { - "epoch": 0.8790295119182747, + "epoch": 0.8778089489642665, "grad_norm": 0.0, - "learning_rate": 7.577791541982582e-07, - "loss": 0.8433, + "learning_rate": 7.729258314687016e-07, + "loss": 0.7042, "step": 30977 }, { - "epoch": 0.8790578887627696, + "epoch": 0.8778372864065289, "grad_norm": 0.0, - "learning_rate": 7.574282383198072e-07, - "loss": 0.8274, + "learning_rate": 7.725720616491017e-07, + "loss": 0.7729, "step": 30978 }, { - "epoch": 0.8790862656072644, + "epoch": 0.8778656238487914, "grad_norm": 0.0, - "learning_rate": 7.570774005137771e-07, - "loss": 0.8351, + "learning_rate": 7.722183695555563e-07, + "loss": 0.7447, "step": 30979 }, { - "epoch": 0.8791146424517594, + "epoch": 0.8778939612910539, "grad_norm": 0.0, - "learning_rate": 7.567266407831308e-07, - "loss": 0.7695, + "learning_rate": 7.718647551910408e-07, + "loss": 0.7202, "step": 30980 }, { - "epoch": 0.8791430192962543, + "epoch": 0.8779222987333163, "grad_norm": 0.0, - "learning_rate": 7.563759591308284e-07, - "loss": 0.8057, + "learning_rate": 7.71511218558535e-07, + "loss": 0.7582, "step": 30981 }, { - "epoch": 0.8791713961407491, + "epoch": 0.8779506361755788, "grad_norm": 0.0, - "learning_rate": 7.56025355559834e-07, - "loss": 0.8039, + "learning_rate": 7.711577596610176e-07, + "loss": 0.793, "step": 30982 }, { - "epoch": 0.879199772985244, + "epoch": 0.8779789736178413, "grad_norm": 0.0, - "learning_rate": 7.556748300731087e-07, - "loss": 0.8287, + "learning_rate": 7.708043785014652e-07, + "loss": 0.7762, "step": 30983 }, { - "epoch": 0.879228149829739, + "epoch": 0.8780073110601038, "grad_norm": 0.0, - "learning_rate": 7.553243826736134e-07, - "loss": 0.8195, + "learning_rate": 7.704510750828542e-07, + "loss": 0.8306, "step": 30984 }, { - "epoch": 0.8792565266742338, + "epoch": 0.8780356485023662, "grad_norm": 0.0, - "learning_rate": 7.549740133643102e-07, - "loss": 0.8193, + "learning_rate": 7.700978494081612e-07, + "loss": 0.7961, "step": 30985 }, { - "epoch": 0.8792849035187287, + "epoch": 0.8780639859446286, "grad_norm": 0.0, - "learning_rate": 7.546237221481567e-07, - "loss": 0.7827, + "learning_rate": 7.697447014803617e-07, + "loss": 0.8608, "step": 30986 }, { - "epoch": 0.8793132803632236, + "epoch": 0.8780923233868911, "grad_norm": 0.0, - "learning_rate": 7.542735090281105e-07, - "loss": 0.8288, + "learning_rate": 7.693916313024308e-07, + "loss": 0.8079, "step": 30987 }, { - "epoch": 0.8793416572077185, + "epoch": 0.8781206608291535, "grad_norm": 0.0, - "learning_rate": 7.53923374007135e-07, - "loss": 0.82, + "learning_rate": 7.690386388773408e-07, + "loss": 0.7378, "step": 30988 }, { - "epoch": 0.8793700340522134, + "epoch": 0.878148998271416, "grad_norm": 0.0, - "learning_rate": 7.53573317088181e-07, - "loss": 0.6867, + "learning_rate": 7.68685724208067e-07, + "loss": 0.8843, "step": 30989 }, { - "epoch": 0.8793984108967083, + "epoch": 0.8781773357136785, "grad_norm": 0.0, - "learning_rate": 7.532233382742105e-07, - "loss": 0.7805, + "learning_rate": 7.683328872975815e-07, + "loss": 0.8146, "step": 30990 }, { - "epoch": 0.8794267877412032, + "epoch": 0.8782056731559409, "grad_norm": 0.0, - "learning_rate": 7.52873437568179e-07, - "loss": 0.7463, + "learning_rate": 7.679801281488553e-07, + "loss": 0.7772, "step": 30991 }, { - "epoch": 0.8794551645856981, + "epoch": 0.8782340105982034, "grad_norm": 0.0, - "learning_rate": 7.525236149730397e-07, - "loss": 0.7873, + "learning_rate": 7.676274467648626e-07, + "loss": 0.8304, "step": 30992 }, { - "epoch": 0.8794835414301929, + "epoch": 0.8782623480404659, "grad_norm": 0.0, - "learning_rate": 7.521738704917481e-07, - "loss": 0.8142, + "learning_rate": 7.6727484314857e-07, + "loss": 0.802, "step": 30993 }, { - "epoch": 0.8795119182746879, + "epoch": 0.8782906854827284, "grad_norm": 0.0, - "learning_rate": 7.518242041272628e-07, - "loss": 0.7077, + "learning_rate": 7.669223173029505e-07, + "loss": 0.9297, "step": 30994 }, { - "epoch": 0.8795402951191827, + "epoch": 0.8783190229249908, "grad_norm": 0.0, - "learning_rate": 7.514746158825314e-07, - "loss": 0.8478, + "learning_rate": 7.665698692309742e-07, + "loss": 0.7788, "step": 30995 }, { - "epoch": 0.8795686719636776, + "epoch": 0.8783473603672532, "grad_norm": 0.0, - "learning_rate": 7.511251057605107e-07, - "loss": 0.8141, + "learning_rate": 7.662174989356075e-07, + "loss": 0.8208, "step": 30996 }, { - "epoch": 0.8795970488081726, + "epoch": 0.8783756978095157, "grad_norm": 0.0, - "learning_rate": 7.507756737641525e-07, - "loss": 0.7767, + "learning_rate": 7.658652064198191e-07, + "loss": 0.7616, "step": 30997 }, { - "epoch": 0.8796254256526674, + "epoch": 0.8784040352517781, "grad_norm": 0.0, - "learning_rate": 7.504263198964057e-07, - "loss": 0.7251, + "learning_rate": 7.655129916865778e-07, + "loss": 0.826, "step": 30998 }, { - "epoch": 0.8796538024971623, + "epoch": 0.8784323726940406, "grad_norm": 0.0, - "learning_rate": 7.500770441602257e-07, - "loss": 0.7027, + "learning_rate": 7.651608547388489e-07, + "loss": 0.9096, "step": 30999 }, { - "epoch": 0.8796821793416572, + "epoch": 0.8784607101363031, "grad_norm": 0.0, - "learning_rate": 7.4972784655856e-07, - "loss": 0.969, + "learning_rate": 7.648087955796014e-07, + "loss": 0.8034, "step": 31000 }, { - "epoch": 0.8797105561861521, + "epoch": 0.8784890475785656, "grad_norm": 0.0, - "learning_rate": 7.493787270943586e-07, - "loss": 0.7793, + "learning_rate": 7.64456814211797e-07, + "loss": 0.8561, "step": 31001 }, { - "epoch": 0.879738933030647, + "epoch": 0.878517385020828, "grad_norm": 0.0, - "learning_rate": 7.490296857705737e-07, - "loss": 0.698, + "learning_rate": 7.641049106384024e-07, + "loss": 0.7369, "step": 31002 }, { - "epoch": 0.8797673098751418, + "epoch": 0.8785457224630905, "grad_norm": 0.0, - "learning_rate": 7.486807225901483e-07, - "loss": 0.8721, + "learning_rate": 7.637530848623842e-07, + "loss": 0.7457, "step": 31003 }, { - "epoch": 0.8797956867196368, + "epoch": 0.878574059905353, "grad_norm": 0.0, - "learning_rate": 7.483318375560322e-07, - "loss": 0.7081, + "learning_rate": 7.63401336886701e-07, + "loss": 0.7704, "step": 31004 }, { - "epoch": 0.8798240635641317, + "epoch": 0.8786023973476154, "grad_norm": 0.0, - "learning_rate": 7.479830306711755e-07, - "loss": 0.7912, + "learning_rate": 7.630496667143195e-07, + "loss": 0.8679, "step": 31005 }, { - "epoch": 0.8798524404086265, + "epoch": 0.8786307347898779, "grad_norm": 0.0, - "learning_rate": 7.476343019385202e-07, - "loss": 0.6914, + "learning_rate": 7.626980743482004e-07, + "loss": 0.7412, "step": 31006 }, { - "epoch": 0.8798808172531215, + "epoch": 0.8786590722321403, "grad_norm": 0.0, - "learning_rate": 7.472856513610139e-07, - "loss": 0.8059, + "learning_rate": 7.623465597913049e-07, + "loss": 0.8965, "step": 31007 }, { - "epoch": 0.8799091940976164, + "epoch": 0.8786874096744028, "grad_norm": 0.0, - "learning_rate": 7.46937078941603e-07, - "loss": 0.847, + "learning_rate": 7.619951230465961e-07, + "loss": 0.8325, "step": 31008 }, { - "epoch": 0.8799375709421112, + "epoch": 0.8787157471166652, "grad_norm": 0.0, - "learning_rate": 7.465885846832288e-07, - "loss": 0.7284, + "learning_rate": 7.616437641170316e-07, + "loss": 0.7794, "step": 31009 }, { - "epoch": 0.8799659477866061, + "epoch": 0.8787440845589277, "grad_norm": 0.0, - "learning_rate": 7.462401685888365e-07, - "loss": 0.7863, + "learning_rate": 7.612924830055724e-07, + "loss": 0.7895, "step": 31010 }, { - "epoch": 0.879994324631101, + "epoch": 0.8787724220011902, "grad_norm": 0.0, - "learning_rate": 7.458918306613716e-07, - "loss": 0.8565, + "learning_rate": 7.609412797151771e-07, + "loss": 0.7914, "step": 31011 }, { - "epoch": 0.8800227014755959, + "epoch": 0.8788007594434526, "grad_norm": 0.0, - "learning_rate": 7.455435709037728e-07, - "loss": 0.7269, + "learning_rate": 7.605901542488037e-07, + "loss": 0.7873, "step": 31012 }, { - "epoch": 0.8800510783200908, + "epoch": 0.8788290968857151, "grad_norm": 0.0, - "learning_rate": 7.451953893189834e-07, - "loss": 0.8727, + "learning_rate": 7.602391066094128e-07, + "loss": 0.793, "step": 31013 }, { - "epoch": 0.8800794551645857, + "epoch": 0.8788574343279776, "grad_norm": 0.0, - "learning_rate": 7.448472859099454e-07, - "loss": 0.7757, + "learning_rate": 7.598881367999566e-07, + "loss": 0.8063, "step": 31014 }, { - "epoch": 0.8801078320090806, + "epoch": 0.87888577177024, "grad_norm": 0.0, - "learning_rate": 7.444992606795975e-07, - "loss": 0.7706, + "learning_rate": 7.595372448233951e-07, + "loss": 0.7688, "step": 31015 }, { - "epoch": 0.8801362088535755, + "epoch": 0.8789141092125025, "grad_norm": 0.0, - "learning_rate": 7.441513136308809e-07, - "loss": 0.7676, + "learning_rate": 7.591864306826835e-07, + "loss": 0.7817, "step": 31016 }, { - "epoch": 0.8801645856980703, + "epoch": 0.878942446654765, "grad_norm": 0.0, - "learning_rate": 7.438034447667353e-07, - "loss": 0.7795, + "learning_rate": 7.58835694380774e-07, + "loss": 0.7736, "step": 31017 }, { - "epoch": 0.8801929625425653, + "epoch": 0.8789707840970274, "grad_norm": 0.0, - "learning_rate": 7.434556540900962e-07, - "loss": 0.7326, + "learning_rate": 7.584850359206242e-07, + "loss": 0.7618, "step": 31018 }, { - "epoch": 0.8802213393870602, + "epoch": 0.8789991215392898, "grad_norm": 0.0, - "learning_rate": 7.431079416039045e-07, - "loss": 0.8133, + "learning_rate": 7.581344553051873e-07, + "loss": 0.7847, "step": 31019 }, { - "epoch": 0.880249716231555, + "epoch": 0.8790274589815523, "grad_norm": 0.0, - "learning_rate": 7.427603073110967e-07, - "loss": 0.9627, + "learning_rate": 7.577839525374142e-07, + "loss": 0.6697, "step": 31020 }, { - "epoch": 0.88027809307605, + "epoch": 0.8790557964238148, "grad_norm": 0.0, - "learning_rate": 7.424127512146073e-07, - "loss": 0.8157, + "learning_rate": 7.574335276202616e-07, + "loss": 0.7665, "step": 31021 }, { - "epoch": 0.8803064699205448, + "epoch": 0.8790841338660772, "grad_norm": 0.0, - "learning_rate": 7.420652733173761e-07, - "loss": 0.7302, + "learning_rate": 7.570831805566759e-07, + "loss": 0.8285, "step": 31022 }, { - "epoch": 0.8803348467650397, + "epoch": 0.8791124713083397, "grad_norm": 0.0, - "learning_rate": 7.41717873622333e-07, - "loss": 0.8015, + "learning_rate": 7.567329113496113e-07, + "loss": 0.8422, "step": 31023 }, { - "epoch": 0.8803632236095347, + "epoch": 0.8791408087506022, "grad_norm": 0.0, - "learning_rate": 7.413705521324155e-07, - "loss": 0.668, + "learning_rate": 7.563827200020179e-07, + "loss": 0.8476, "step": 31024 }, { - "epoch": 0.8803916004540295, + "epoch": 0.8791691461928647, "grad_norm": 0.0, - "learning_rate": 7.410233088505592e-07, - "loss": 0.8189, + "learning_rate": 7.560326065168444e-07, + "loss": 0.9246, "step": 31025 }, { - "epoch": 0.8804199772985244, + "epoch": 0.8791974836351271, "grad_norm": 0.0, - "learning_rate": 7.406761437796928e-07, - "loss": 0.7223, + "learning_rate": 7.556825708970417e-07, + "loss": 0.875, "step": 31026 }, { - "epoch": 0.8804483541430193, + "epoch": 0.8792258210773896, "grad_norm": 0.0, - "learning_rate": 7.403290569227517e-07, - "loss": 0.7962, + "learning_rate": 7.553326131455596e-07, + "loss": 0.8228, "step": 31027 }, { - "epoch": 0.8804767309875142, + "epoch": 0.879254158519652, "grad_norm": 0.0, - "learning_rate": 7.399820482826692e-07, - "loss": 0.7355, + "learning_rate": 7.549827332653415e-07, + "loss": 0.7733, "step": 31028 }, { - "epoch": 0.8805051078320091, + "epoch": 0.8792824959619144, "grad_norm": 0.0, - "learning_rate": 7.396351178623706e-07, - "loss": 0.8265, + "learning_rate": 7.546329312593382e-07, + "loss": 0.7799, "step": 31029 }, { - "epoch": 0.8805334846765039, + "epoch": 0.8793108334041769, "grad_norm": 0.0, - "learning_rate": 7.392882656647926e-07, - "loss": 0.8203, + "learning_rate": 7.54283207130494e-07, + "loss": 0.9121, "step": 31030 }, { - "epoch": 0.8805618615209989, + "epoch": 0.8793391708464394, "grad_norm": 0.0, - "learning_rate": 7.38941491692865e-07, - "loss": 0.7924, + "learning_rate": 7.539335608817556e-07, + "loss": 0.7378, "step": 31031 }, { - "epoch": 0.8805902383654938, + "epoch": 0.8793675082887019, "grad_norm": 0.0, - "learning_rate": 7.385947959495122e-07, - "loss": 0.792, + "learning_rate": 7.535839925160693e-07, + "loss": 0.8227, "step": 31032 }, { - "epoch": 0.8806186152099886, + "epoch": 0.8793958457309643, "grad_norm": 0.0, - "learning_rate": 7.382481784376661e-07, - "loss": 0.7199, + "learning_rate": 7.532345020363774e-07, + "loss": 0.8085, "step": 31033 }, { - "epoch": 0.8806469920544835, + "epoch": 0.8794241831732268, "grad_norm": 0.0, - "learning_rate": 7.379016391602556e-07, - "loss": 0.7516, + "learning_rate": 7.528850894456275e-07, + "loss": 0.9068, "step": 31034 }, { - "epoch": 0.8806753688989785, + "epoch": 0.8794525206154893, "grad_norm": 0.0, - "learning_rate": 7.375551781202051e-07, - "loss": 0.8252, + "learning_rate": 7.525357547467572e-07, + "loss": 0.7574, "step": 31035 }, { - "epoch": 0.8807037457434733, + "epoch": 0.8794808580577517, "grad_norm": 0.0, - "learning_rate": 7.372087953204421e-07, - "loss": 0.8059, + "learning_rate": 7.52186497942713e-07, + "loss": 0.8854, "step": 31036 }, { - "epoch": 0.8807321225879682, + "epoch": 0.8795091955000142, "grad_norm": 0.0, - "learning_rate": 7.368624907638944e-07, - "loss": 0.7315, + "learning_rate": 7.518373190364359e-07, + "loss": 0.7841, "step": 31037 }, { - "epoch": 0.8807604994324632, + "epoch": 0.8795375329422767, "grad_norm": 0.0, - "learning_rate": 7.365162644534841e-07, - "loss": 0.8443, + "learning_rate": 7.514882180308669e-07, + "loss": 0.8412, "step": 31038 }, { - "epoch": 0.880788876276958, + "epoch": 0.879565870384539, "grad_norm": 0.0, - "learning_rate": 7.361701163921364e-07, - "loss": 0.7333, + "learning_rate": 7.51139194928947e-07, + "loss": 0.8128, "step": 31039 }, { - "epoch": 0.8808172531214529, + "epoch": 0.8795942078268015, "grad_norm": 0.0, - "learning_rate": 7.358240465827793e-07, - "loss": 0.8475, + "learning_rate": 7.507902497336184e-07, + "loss": 0.7966, "step": 31040 }, { - "epoch": 0.8808456299659478, + "epoch": 0.879622545269064, "grad_norm": 0.0, - "learning_rate": 7.354780550283303e-07, - "loss": 0.8578, + "learning_rate": 7.504413824478163e-07, + "loss": 0.7214, "step": 31041 }, { - "epoch": 0.8808740068104427, + "epoch": 0.8796508827113265, "grad_norm": 0.0, - "learning_rate": 7.351321417317148e-07, - "loss": 0.6969, + "learning_rate": 7.500925930744829e-07, + "loss": 0.8701, "step": 31042 }, { - "epoch": 0.8809023836549376, + "epoch": 0.8796792201535889, "grad_norm": 0.0, - "learning_rate": 7.34786306695856e-07, - "loss": 0.8696, + "learning_rate": 7.497438816165526e-07, + "loss": 0.7859, "step": 31043 }, { - "epoch": 0.8809307604994324, + "epoch": 0.8797075575958514, "grad_norm": 0.0, - "learning_rate": 7.344405499236706e-07, - "loss": 0.8208, + "learning_rate": 7.49395248076964e-07, + "loss": 0.7924, "step": 31044 }, { - "epoch": 0.8809591373439274, + "epoch": 0.8797358950381139, "grad_norm": 0.0, - "learning_rate": 7.340948714180863e-07, - "loss": 0.7923, + "learning_rate": 7.49046692458657e-07, + "loss": 0.901, "step": 31045 }, { - "epoch": 0.8809875141884222, + "epoch": 0.8797642324803763, "grad_norm": 0.0, - "learning_rate": 7.337492711820171e-07, - "loss": 0.7187, + "learning_rate": 7.486982147645628e-07, + "loss": 0.858, "step": 31046 }, { - "epoch": 0.8810158910329171, + "epoch": 0.8797925699226388, "grad_norm": 0.0, - "learning_rate": 7.334037492183843e-07, - "loss": 0.7175, + "learning_rate": 7.483498149976187e-07, + "loss": 0.8436, "step": 31047 }, { - "epoch": 0.8810442678774121, + "epoch": 0.8798209073649013, "grad_norm": 0.0, - "learning_rate": 7.330583055301089e-07, - "loss": 0.7462, + "learning_rate": 7.480014931607616e-07, + "loss": 0.774, "step": 31048 }, { - "epoch": 0.8810726447219069, + "epoch": 0.8798492448071638, "grad_norm": 0.0, - "learning_rate": 7.327129401201039e-07, - "loss": 0.7952, + "learning_rate": 7.476532492569222e-07, + "loss": 0.8154, "step": 31049 }, { - "epoch": 0.8811010215664018, + "epoch": 0.8798775822494261, "grad_norm": 0.0, - "learning_rate": 7.323676529912893e-07, - "loss": 0.7532, + "learning_rate": 7.47305083289035e-07, + "loss": 0.7838, "step": 31050 }, { - "epoch": 0.8811293984108967, + "epoch": 0.8799059196916886, "grad_norm": 0.0, - "learning_rate": 7.32022444146584e-07, - "loss": 0.8689, + "learning_rate": 7.46956995260033e-07, + "loss": 0.8373, "step": 31051 }, { - "epoch": 0.8811577752553916, + "epoch": 0.8799342571339511, "grad_norm": 0.0, - "learning_rate": 7.316773135889e-07, - "loss": 0.7646, + "learning_rate": 7.466089851728475e-07, + "loss": 0.8372, "step": 31052 }, { - "epoch": 0.8811861520998865, + "epoch": 0.8799625945762135, "grad_norm": 0.0, - "learning_rate": 7.313322613211537e-07, - "loss": 0.8077, + "learning_rate": 7.462610530304137e-07, + "loss": 0.7567, "step": 31053 }, { - "epoch": 0.8812145289443813, + "epoch": 0.879990932018476, "grad_norm": 0.0, - "learning_rate": 7.309872873462632e-07, - "loss": 0.8213, + "learning_rate": 7.459131988356571e-07, + "loss": 0.7736, "step": 31054 }, { - "epoch": 0.8812429057888763, + "epoch": 0.8800192694607385, "grad_norm": 0.0, - "learning_rate": 7.306423916671379e-07, - "loss": 0.8443, + "learning_rate": 7.455654225915098e-07, + "loss": 0.7355, "step": 31055 }, { - "epoch": 0.8812712826333712, + "epoch": 0.880047606903001, "grad_norm": 0.0, - "learning_rate": 7.302975742866925e-07, - "loss": 0.8707, + "learning_rate": 7.452177243009028e-07, + "loss": 0.7963, "step": 31056 }, { - "epoch": 0.881299659477866, + "epoch": 0.8800759443452634, "grad_norm": 0.0, - "learning_rate": 7.299528352078422e-07, - "loss": 0.7238, + "learning_rate": 7.448701039667628e-07, + "loss": 0.7775, "step": 31057 }, { - "epoch": 0.881328036322361, + "epoch": 0.8801042817875259, "grad_norm": 0.0, - "learning_rate": 7.296081744334948e-07, - "loss": 0.8653, + "learning_rate": 7.445225615920204e-07, + "loss": 0.7447, "step": 31058 }, { - "epoch": 0.8813564131668559, + "epoch": 0.8801326192297884, "grad_norm": 0.0, - "learning_rate": 7.292635919665647e-07, - "loss": 0.7617, + "learning_rate": 7.441750971795991e-07, + "loss": 0.7506, "step": 31059 }, { - "epoch": 0.8813847900113507, + "epoch": 0.8801609566720507, "grad_norm": 0.0, - "learning_rate": 7.289190878099606e-07, - "loss": 0.7904, + "learning_rate": 7.438277107324288e-07, + "loss": 0.7639, "step": 31060 }, { - "epoch": 0.8814131668558456, + "epoch": 0.8801892941143132, "grad_norm": 0.0, - "learning_rate": 7.285746619665945e-07, - "loss": 0.8824, + "learning_rate": 7.434804022534359e-07, + "loss": 0.7352, "step": 31061 }, { - "epoch": 0.8814415437003406, + "epoch": 0.8802176315565757, "grad_norm": 0.0, - "learning_rate": 7.282303144393766e-07, - "loss": 0.7191, + "learning_rate": 7.431331717455426e-07, + "loss": 0.8665, "step": 31062 }, { - "epoch": 0.8814699205448354, + "epoch": 0.8802459689988381, "grad_norm": 0.0, - "learning_rate": 7.278860452312131e-07, - "loss": 0.9144, + "learning_rate": 7.427860192116776e-07, + "loss": 0.8934, "step": 31063 }, { - "epoch": 0.8814982973893303, + "epoch": 0.8802743064411006, "grad_norm": 0.0, - "learning_rate": 7.275418543450119e-07, - "loss": 0.8724, + "learning_rate": 7.42438944654762e-07, + "loss": 0.9115, "step": 31064 }, { - "epoch": 0.8815266742338252, + "epoch": 0.8803026438833631, "grad_norm": 0.0, - "learning_rate": 7.271977417836839e-07, - "loss": 0.7269, + "learning_rate": 7.420919480777222e-07, + "loss": 0.8441, "step": 31065 }, { - "epoch": 0.8815550510783201, + "epoch": 0.8803309813256256, "grad_norm": 0.0, - "learning_rate": 7.268537075501314e-07, - "loss": 0.7668, + "learning_rate": 7.417450294834805e-07, + "loss": 0.7938, "step": 31066 }, { - "epoch": 0.881583427922815, + "epoch": 0.880359318767888, "grad_norm": 0.0, - "learning_rate": 7.265097516472619e-07, - "loss": 0.8047, + "learning_rate": 7.413981888749577e-07, + "loss": 0.7366, "step": 31067 }, { - "epoch": 0.8816118047673098, + "epoch": 0.8803876562101505, "grad_norm": 0.0, - "learning_rate": 7.261658740779842e-07, - "loss": 0.7778, + "learning_rate": 7.410514262550749e-07, + "loss": 0.7457, "step": 31068 }, { - "epoch": 0.8816401816118048, + "epoch": 0.880415993652413, "grad_norm": 0.0, - "learning_rate": 7.258220748451971e-07, - "loss": 0.8724, + "learning_rate": 7.407047416267565e-07, + "loss": 0.8226, "step": 31069 }, { - "epoch": 0.8816685584562997, + "epoch": 0.8804443310946753, "grad_norm": 0.0, - "learning_rate": 7.254783539518096e-07, - "loss": 0.7677, + "learning_rate": 7.40358134992919e-07, + "loss": 0.848, "step": 31070 }, { - "epoch": 0.8816969353007945, + "epoch": 0.8804726685369378, "grad_norm": 0.0, - "learning_rate": 7.251347114007234e-07, - "loss": 0.7191, + "learning_rate": 7.400116063564844e-07, + "loss": 0.8233, "step": 31071 }, { - "epoch": 0.8817253121452895, + "epoch": 0.8805010059792003, "grad_norm": 0.0, - "learning_rate": 7.24791147194841e-07, - "loss": 0.6689, + "learning_rate": 7.396651557203693e-07, + "loss": 0.778, "step": 31072 }, { - "epoch": 0.8817536889897843, + "epoch": 0.8805293434214628, "grad_norm": 0.0, - "learning_rate": 7.244476613370644e-07, - "loss": 0.8194, + "learning_rate": 7.393187830874938e-07, + "loss": 0.8549, "step": 31073 }, { - "epoch": 0.8817820658342792, + "epoch": 0.8805576808637252, "grad_norm": 0.0, - "learning_rate": 7.241042538302967e-07, - "loss": 0.7698, + "learning_rate": 7.389724884607763e-07, + "loss": 0.7875, "step": 31074 }, { - "epoch": 0.8818104426787742, + "epoch": 0.8805860183059877, "grad_norm": 0.0, - "learning_rate": 7.237609246774335e-07, - "loss": 0.709, + "learning_rate": 7.386262718431303e-07, + "loss": 0.7172, "step": 31075 }, { - "epoch": 0.881838819523269, + "epoch": 0.8806143557482502, "grad_norm": 0.0, - "learning_rate": 7.234176738813825e-07, - "loss": 0.7845, + "learning_rate": 7.382801332374745e-07, + "loss": 0.7436, "step": 31076 }, { - "epoch": 0.8818671963677639, + "epoch": 0.8806426931905126, "grad_norm": 0.0, - "learning_rate": 7.23074501445038e-07, - "loss": 0.7818, + "learning_rate": 7.379340726467254e-07, + "loss": 0.7433, "step": 31077 }, { - "epoch": 0.8818955732122588, + "epoch": 0.8806710306327751, "grad_norm": 0.0, - "learning_rate": 7.227314073712998e-07, - "loss": 0.7624, + "learning_rate": 7.375880900737964e-07, + "loss": 0.8369, "step": 31078 }, { - "epoch": 0.8819239500567537, + "epoch": 0.8806993680750376, "grad_norm": 0.0, - "learning_rate": 7.223883916630691e-07, - "loss": 0.7339, + "learning_rate": 7.372421855216039e-07, + "loss": 0.7453, "step": 31079 }, { - "epoch": 0.8819523269012486, + "epoch": 0.8807277055173001, "grad_norm": 0.0, - "learning_rate": 7.22045454323238e-07, - "loss": 1.0245, + "learning_rate": 7.368963589930589e-07, + "loss": 0.7084, "step": 31080 }, { - "epoch": 0.8819807037457434, + "epoch": 0.8807560429595624, "grad_norm": 0.0, - "learning_rate": 7.217025953547064e-07, - "loss": 0.8804, + "learning_rate": 7.365506104910747e-07, + "loss": 0.8287, "step": 31081 }, { - "epoch": 0.8820090805902384, + "epoch": 0.8807843804018249, "grad_norm": 0.0, - "learning_rate": 7.213598147603717e-07, - "loss": 0.8136, + "learning_rate": 7.362049400185667e-07, + "loss": 0.7672, "step": 31082 }, { - "epoch": 0.8820374574347333, + "epoch": 0.8808127178440874, "grad_norm": 0.0, - "learning_rate": 7.210171125431264e-07, - "loss": 0.7509, + "learning_rate": 7.358593475784437e-07, + "loss": 0.8154, "step": 31083 }, { - "epoch": 0.8820658342792281, + "epoch": 0.8808410552863498, "grad_norm": 0.0, - "learning_rate": 7.206744887058658e-07, - "loss": 0.805, + "learning_rate": 7.355138331736166e-07, + "loss": 0.8024, "step": 31084 }, { - "epoch": 0.882094211123723, + "epoch": 0.8808693927286123, "grad_norm": 0.0, - "learning_rate": 7.203319432514877e-07, - "loss": 0.7339, + "learning_rate": 7.351683968069989e-07, + "loss": 0.8004, "step": 31085 }, { - "epoch": 0.882122587968218, + "epoch": 0.8808977301708748, "grad_norm": 0.0, - "learning_rate": 7.199894761828807e-07, - "loss": 0.8708, + "learning_rate": 7.348230384814958e-07, + "loss": 0.9252, "step": 31086 }, { - "epoch": 0.8821509648127128, + "epoch": 0.8809260676131372, "grad_norm": 0.0, - "learning_rate": 7.196470875029393e-07, - "loss": 0.8929, + "learning_rate": 7.344777582000218e-07, + "loss": 0.7631, "step": 31087 }, { - "epoch": 0.8821793416572077, + "epoch": 0.8809544050553997, "grad_norm": 0.0, - "learning_rate": 7.19304777214559e-07, - "loss": 0.7891, + "learning_rate": 7.341325559654799e-07, + "loss": 0.8244, "step": 31088 }, { - "epoch": 0.8822077185017027, + "epoch": 0.8809827424976622, "grad_norm": 0.0, - "learning_rate": 7.18962545320625e-07, - "loss": 0.7981, + "learning_rate": 7.337874317807803e-07, + "loss": 0.7612, "step": 31089 }, { - "epoch": 0.8822360953461975, + "epoch": 0.8810110799399247, "grad_norm": 0.0, - "learning_rate": 7.186203918240331e-07, - "loss": 0.858, + "learning_rate": 7.334423856488293e-07, + "loss": 0.8492, "step": 31090 }, { - "epoch": 0.8822644721906924, + "epoch": 0.881039417382187, "grad_norm": 0.0, - "learning_rate": 7.182783167276719e-07, - "loss": 0.8176, + "learning_rate": 7.330974175725347e-07, + "loss": 0.7167, "step": 31091 }, { - "epoch": 0.8822928490351872, + "epoch": 0.8810677548244495, "grad_norm": 0.0, - "learning_rate": 7.179363200344303e-07, - "loss": 0.7969, + "learning_rate": 7.327525275548042e-07, + "loss": 0.8153, "step": 31092 }, { - "epoch": 0.8823212258796822, + "epoch": 0.881096092266712, "grad_norm": 0.0, - "learning_rate": 7.175944017471991e-07, - "loss": 0.7967, + "learning_rate": 7.324077155985376e-07, + "loss": 0.8581, "step": 31093 }, { - "epoch": 0.8823496027241771, + "epoch": 0.8811244297089744, "grad_norm": 0.0, - "learning_rate": 7.172525618688641e-07, - "loss": 0.8094, + "learning_rate": 7.320629817066427e-07, + "loss": 0.783, "step": 31094 }, { - "epoch": 0.8823779795686719, + "epoch": 0.8811527671512369, "grad_norm": 0.0, - "learning_rate": 7.169108004023151e-07, - "loss": 0.8582, + "learning_rate": 7.317183258820248e-07, + "loss": 0.741, "step": 31095 }, { - "epoch": 0.8824063564131669, + "epoch": 0.8811811045934994, "grad_norm": 0.0, - "learning_rate": 7.165691173504385e-07, - "loss": 0.8536, + "learning_rate": 7.31373748127584e-07, + "loss": 0.7597, "step": 31096 }, { - "epoch": 0.8824347332576618, + "epoch": 0.8812094420357619, "grad_norm": 0.0, - "learning_rate": 7.162275127161178e-07, - "loss": 0.744, + "learning_rate": 7.310292484462234e-07, + "loss": 0.8424, "step": 31097 }, { - "epoch": 0.8824631101021566, + "epoch": 0.8812377794780243, "grad_norm": 0.0, - "learning_rate": 7.158859865022416e-07, - "loss": 0.9444, + "learning_rate": 7.306848268408462e-07, + "loss": 0.778, "step": 31098 }, { - "epoch": 0.8824914869466516, + "epoch": 0.8812661169202868, "grad_norm": 0.0, - "learning_rate": 7.155445387116944e-07, - "loss": 0.8637, + "learning_rate": 7.303404833143524e-07, + "loss": 0.6703, "step": 31099 }, { - "epoch": 0.8825198637911464, + "epoch": 0.8812944543625493, "grad_norm": 0.0, - "learning_rate": 7.152031693473593e-07, - "loss": 0.8291, + "learning_rate": 7.29996217869644e-07, + "loss": 0.8397, "step": 31100 }, { - "epoch": 0.8825482406356413, + "epoch": 0.8813227918048117, "grad_norm": 0.0, - "learning_rate": 7.148618784121198e-07, - "loss": 0.8023, + "learning_rate": 7.296520305096177e-07, + "loss": 0.9629, "step": 31101 }, { - "epoch": 0.8825766174801362, + "epoch": 0.8813511292470741, "grad_norm": 0.0, - "learning_rate": 7.145206659088611e-07, - "loss": 0.8394, + "learning_rate": 7.293079212371757e-07, + "loss": 0.8726, "step": 31102 }, { - "epoch": 0.8826049943246311, + "epoch": 0.8813794666893366, "grad_norm": 0.0, - "learning_rate": 7.14179531840462e-07, - "loss": 0.7963, + "learning_rate": 7.289638900552154e-07, + "loss": 0.792, "step": 31103 }, { - "epoch": 0.882633371169126, + "epoch": 0.881407804131599, "grad_norm": 0.0, - "learning_rate": 7.138384762098061e-07, - "loss": 0.8651, + "learning_rate": 7.286199369666346e-07, + "loss": 0.8351, "step": 31104 }, { - "epoch": 0.8826617480136209, + "epoch": 0.8814361415738615, "grad_norm": 0.0, - "learning_rate": 7.134974990197763e-07, - "loss": 0.6753, + "learning_rate": 7.282760619743312e-07, + "loss": 0.7571, "step": 31105 }, { - "epoch": 0.8826901248581158, + "epoch": 0.881464479016124, "grad_norm": 0.0, - "learning_rate": 7.13156600273246e-07, - "loss": 0.783, + "learning_rate": 7.279322650812026e-07, + "loss": 0.8173, "step": 31106 }, { - "epoch": 0.8827185017026107, + "epoch": 0.8814928164583865, "grad_norm": 0.0, - "learning_rate": 7.128157799731039e-07, - "loss": 0.8205, + "learning_rate": 7.275885462901422e-07, + "loss": 0.7646, "step": 31107 }, { - "epoch": 0.8827468785471055, + "epoch": 0.8815211539006489, "grad_norm": 0.0, - "learning_rate": 7.124750381222223e-07, - "loss": 0.7793, + "learning_rate": 7.272449056040487e-07, + "loss": 0.8323, "step": 31108 }, { - "epoch": 0.8827752553916004, + "epoch": 0.8815494913429114, "grad_norm": 0.0, - "learning_rate": 7.12134374723481e-07, - "loss": 0.8544, + "learning_rate": 7.269013430258132e-07, + "loss": 0.8248, "step": 31109 }, { - "epoch": 0.8828036322360954, + "epoch": 0.8815778287851739, "grad_norm": 0.0, - "learning_rate": 7.1179378977976e-07, - "loss": 0.9313, + "learning_rate": 7.265578585583311e-07, + "loss": 0.7475, "step": 31110 }, { - "epoch": 0.8828320090805902, + "epoch": 0.8816061662274363, "grad_norm": 0.0, - "learning_rate": 7.114532832939325e-07, - "loss": 0.8069, + "learning_rate": 7.262144522044967e-07, + "loss": 0.8196, "step": 31111 }, { - "epoch": 0.8828603859250851, + "epoch": 0.8816345036696988, "grad_norm": 0.0, - "learning_rate": 7.111128552688773e-07, - "loss": 0.8141, + "learning_rate": 7.258711239672012e-07, + "loss": 0.9018, "step": 31112 }, { - "epoch": 0.8828887627695801, + "epoch": 0.8816628411119612, "grad_norm": 0.0, - "learning_rate": 7.107725057074711e-07, - "loss": 0.9088, + "learning_rate": 7.255278738493355e-07, + "loss": 0.8672, "step": 31113 }, { - "epoch": 0.8829171396140749, + "epoch": 0.8816911785542237, "grad_norm": 0.0, - "learning_rate": 7.104322346125848e-07, - "loss": 0.8332, + "learning_rate": 7.251847018537961e-07, + "loss": 0.8556, "step": 31114 }, { - "epoch": 0.8829455164585698, + "epoch": 0.8817195159964861, "grad_norm": 0.0, - "learning_rate": 7.10092041987096e-07, - "loss": 0.7971, + "learning_rate": 7.248416079834663e-07, + "loss": 0.7871, "step": 31115 }, { - "epoch": 0.8829738933030647, + "epoch": 0.8817478534387486, "grad_norm": 0.0, - "learning_rate": 7.097519278338783e-07, - "loss": 0.7936, + "learning_rate": 7.244985922412417e-07, + "loss": 0.8502, "step": 31116 }, { - "epoch": 0.8830022701475596, + "epoch": 0.8817761908810111, "grad_norm": 0.0, - "learning_rate": 7.094118921558024e-07, - "loss": 0.798, + "learning_rate": 7.241556546300088e-07, + "loss": 0.7179, "step": 31117 }, { - "epoch": 0.8830306469920545, + "epoch": 0.8818045283232735, "grad_norm": 0.0, - "learning_rate": 7.090719349557407e-07, - "loss": 0.7567, + "learning_rate": 7.238127951526575e-07, + "loss": 0.7252, "step": 31118 }, { - "epoch": 0.8830590238365493, + "epoch": 0.881832865765536, "grad_norm": 0.0, - "learning_rate": 7.087320562365685e-07, - "loss": 0.8133, + "learning_rate": 7.234700138120776e-07, + "loss": 0.7939, "step": 31119 }, { - "epoch": 0.8830874006810443, + "epoch": 0.8818612032077985, "grad_norm": 0.0, - "learning_rate": 7.083922560011536e-07, - "loss": 0.678, + "learning_rate": 7.231273106111536e-07, + "loss": 0.8053, "step": 31120 }, { - "epoch": 0.8831157775255392, + "epoch": 0.881889540650061, "grad_norm": 0.0, - "learning_rate": 7.080525342523636e-07, - "loss": 0.7449, + "learning_rate": 7.227846855527732e-07, + "loss": 0.8411, "step": 31121 }, { - "epoch": 0.883144154370034, + "epoch": 0.8819178780923234, "grad_norm": 0.0, - "learning_rate": 7.077128909930763e-07, - "loss": 0.7881, + "learning_rate": 7.224421386398217e-07, + "loss": 0.7272, "step": 31122 }, { - "epoch": 0.883172531214529, + "epoch": 0.8819462155345859, "grad_norm": 0.0, - "learning_rate": 7.07373326226154e-07, - "loss": 0.8005, + "learning_rate": 7.220996698751847e-07, + "loss": 0.8148, "step": 31123 }, { - "epoch": 0.8832009080590238, + "epoch": 0.8819745529768483, "grad_norm": 0.0, - "learning_rate": 7.070338399544663e-07, - "loss": 0.8605, + "learning_rate": 7.2175727926175e-07, + "loss": 0.8607, "step": 31124 }, { - "epoch": 0.8832292849035187, + "epoch": 0.8820028904191107, "grad_norm": 0.0, - "learning_rate": 7.066944321808833e-07, - "loss": 0.7772, + "learning_rate": 7.214149668023962e-07, + "loss": 0.798, "step": 31125 }, { - "epoch": 0.8832576617480136, + "epoch": 0.8820312278613732, "grad_norm": 0.0, - "learning_rate": 7.063551029082694e-07, - "loss": 0.7504, + "learning_rate": 7.210727325000111e-07, + "loss": 0.6664, "step": 31126 }, { - "epoch": 0.8832860385925085, + "epoch": 0.8820595653036357, "grad_norm": 0.0, - "learning_rate": 7.0601585213949e-07, - "loss": 0.8177, + "learning_rate": 7.207305763574746e-07, + "loss": 0.7999, "step": 31127 }, { - "epoch": 0.8833144154370034, + "epoch": 0.8820879027458981, "grad_norm": 0.0, - "learning_rate": 7.056766798774162e-07, - "loss": 0.8397, + "learning_rate": 7.20388498377671e-07, + "loss": 0.8188, "step": 31128 }, { - "epoch": 0.8833427922814983, + "epoch": 0.8821162401881606, "grad_norm": 0.0, - "learning_rate": 7.053375861249056e-07, - "loss": 0.8645, + "learning_rate": 7.200464985634825e-07, + "loss": 0.7401, "step": 31129 }, { - "epoch": 0.8833711691259932, + "epoch": 0.8821445776304231, "grad_norm": 0.0, - "learning_rate": 7.049985708848295e-07, - "loss": 0.8645, + "learning_rate": 7.197045769177868e-07, + "loss": 0.7831, "step": 31130 }, { - "epoch": 0.8833995459704881, + "epoch": 0.8821729150726856, "grad_norm": 0.0, - "learning_rate": 7.046596341600465e-07, - "loss": 0.7985, + "learning_rate": 7.193627334434661e-07, + "loss": 0.7219, "step": 31131 }, { - "epoch": 0.883427922814983, + "epoch": 0.882201252514948, "grad_norm": 0.0, - "learning_rate": 7.04320775953421e-07, - "loss": 0.7297, + "learning_rate": 7.190209681434002e-07, + "loss": 0.8662, "step": 31132 }, { - "epoch": 0.8834562996594779, + "epoch": 0.8822295899572105, "grad_norm": 0.0, - "learning_rate": 7.039819962678174e-07, - "loss": 0.8592, + "learning_rate": 7.186792810204657e-07, + "loss": 0.7695, "step": 31133 }, { - "epoch": 0.8834846765039728, + "epoch": 0.882257927399473, "grad_norm": 0.0, - "learning_rate": 7.036432951060934e-07, - "loss": 0.7508, + "learning_rate": 7.183376720775415e-07, + "loss": 0.8616, "step": 31134 }, { - "epoch": 0.8835130533484676, + "epoch": 0.8822862648417353, "grad_norm": 0.0, - "learning_rate": 7.033046724711135e-07, - "loss": 0.7764, + "learning_rate": 7.179961413175085e-07, + "loss": 0.6879, "step": 31135 }, { - "epoch": 0.8835414301929625, + "epoch": 0.8823146022839978, "grad_norm": 0.0, - "learning_rate": 7.029661283657385e-07, - "loss": 0.7729, + "learning_rate": 7.17654688743239e-07, + "loss": 0.7307, "step": 31136 }, { - "epoch": 0.8835698070374575, + "epoch": 0.8823429397262603, "grad_norm": 0.0, - "learning_rate": 7.02627662792823e-07, - "loss": 0.7891, + "learning_rate": 7.173133143576116e-07, + "loss": 0.7684, "step": 31137 }, { - "epoch": 0.8835981838819523, + "epoch": 0.8823712771685228, "grad_norm": 0.0, - "learning_rate": 7.022892757552313e-07, - "loss": 0.7621, + "learning_rate": 7.16972018163501e-07, + "loss": 0.7639, "step": 31138 }, { - "epoch": 0.8836265607264472, + "epoch": 0.8823996146107852, "grad_norm": 0.0, - "learning_rate": 7.019509672558211e-07, - "loss": 0.8375, + "learning_rate": 7.166308001637812e-07, + "loss": 0.8204, "step": 31139 }, { - "epoch": 0.8836549375709422, + "epoch": 0.8824279520530477, "grad_norm": 0.0, - "learning_rate": 7.016127372974479e-07, - "loss": 0.8572, + "learning_rate": 7.162896603613278e-07, + "loss": 0.7898, "step": 31140 }, { - "epoch": 0.883683314415437, + "epoch": 0.8824562894953102, "grad_norm": 0.0, - "learning_rate": 7.012745858829706e-07, - "loss": 0.7789, + "learning_rate": 7.159485987590143e-07, + "loss": 0.7417, "step": 31141 }, { - "epoch": 0.8837116912599319, + "epoch": 0.8824846269375726, "grad_norm": 0.0, - "learning_rate": 7.009365130152457e-07, - "loss": 0.8293, + "learning_rate": 7.156076153597124e-07, + "loss": 0.7285, "step": 31142 }, { - "epoch": 0.8837400681044267, + "epoch": 0.8825129643798351, "grad_norm": 0.0, - "learning_rate": 7.005985186971265e-07, - "loss": 0.8628, + "learning_rate": 7.15266710166298e-07, + "loss": 0.8655, "step": 31143 }, { - "epoch": 0.8837684449489217, + "epoch": 0.8825413018220976, "grad_norm": 0.0, - "learning_rate": 7.002606029314695e-07, - "loss": 0.7702, + "learning_rate": 7.149258831816374e-07, + "loss": 0.8743, "step": 31144 }, { - "epoch": 0.8837968217934166, + "epoch": 0.88256963926436, "grad_norm": 0.0, - "learning_rate": 6.999227657211316e-07, - "loss": 0.8194, + "learning_rate": 7.145851344086052e-07, + "loss": 0.8323, "step": 31145 }, { - "epoch": 0.8838251986379114, + "epoch": 0.8825979767066224, "grad_norm": 0.0, - "learning_rate": 6.995850070689614e-07, - "loss": 0.7699, + "learning_rate": 7.142444638500701e-07, + "loss": 0.7967, "step": 31146 }, { - "epoch": 0.8838535754824064, + "epoch": 0.8826263141488849, "grad_norm": 0.0, - "learning_rate": 6.992473269778155e-07, - "loss": 0.7476, + "learning_rate": 7.13903871508902e-07, + "loss": 0.8541, "step": 31147 }, { - "epoch": 0.8838819523269013, + "epoch": 0.8826546515911474, "grad_norm": 0.0, - "learning_rate": 6.989097254505472e-07, - "loss": 0.8148, + "learning_rate": 7.135633573879707e-07, + "loss": 0.8566, "step": 31148 }, { - "epoch": 0.8839103291713961, + "epoch": 0.8826829890334098, "grad_norm": 0.0, - "learning_rate": 6.985722024900044e-07, - "loss": 0.86, + "learning_rate": 7.13222921490142e-07, + "loss": 0.8851, "step": 31149 }, { - "epoch": 0.8839387060158911, + "epoch": 0.8827113264756723, "grad_norm": 0.0, - "learning_rate": 6.982347580990412e-07, - "loss": 0.7768, + "learning_rate": 7.128825638182879e-07, + "loss": 0.8934, "step": 31150 }, { - "epoch": 0.8839670828603859, + "epoch": 0.8827396639179348, "grad_norm": 0.0, - "learning_rate": 6.978973922805077e-07, - "loss": 0.8432, + "learning_rate": 7.125422843752706e-07, + "loss": 0.757, "step": 31151 }, { - "epoch": 0.8839954597048808, + "epoch": 0.8827680013601972, "grad_norm": 0.0, - "learning_rate": 6.975601050372505e-07, - "loss": 0.7487, + "learning_rate": 7.122020831639576e-07, + "loss": 0.7648, "step": 31152 }, { - "epoch": 0.8840238365493757, + "epoch": 0.8827963388024597, "grad_norm": 0.0, - "learning_rate": 6.97222896372125e-07, - "loss": 0.9062, + "learning_rate": 7.118619601872157e-07, + "loss": 0.8382, "step": 31153 }, { - "epoch": 0.8840522133938706, + "epoch": 0.8828246762447222, "grad_norm": 0.0, - "learning_rate": 6.968857662879735e-07, - "loss": 0.8359, + "learning_rate": 7.115219154479102e-07, + "loss": 0.7866, "step": 31154 }, { - "epoch": 0.8840805902383655, + "epoch": 0.8828530136869847, "grad_norm": 0.0, - "learning_rate": 6.96548714787646e-07, - "loss": 0.7855, + "learning_rate": 7.111819489489047e-07, + "loss": 0.7342, "step": 31155 }, { - "epoch": 0.8841089670828604, + "epoch": 0.882881351129247, "grad_norm": 0.0, - "learning_rate": 6.962117418739922e-07, - "loss": 0.8211, + "learning_rate": 7.108420606930644e-07, + "loss": 0.9412, "step": 31156 }, { - "epoch": 0.8841373439273553, + "epoch": 0.8829096885715095, "grad_norm": 0.0, - "learning_rate": 6.958748475498533e-07, - "loss": 0.7647, + "learning_rate": 7.105022506832493e-07, + "loss": 0.7797, "step": 31157 }, { - "epoch": 0.8841657207718502, + "epoch": 0.882938026013772, "grad_norm": 0.0, - "learning_rate": 6.955380318180793e-07, - "loss": 0.8026, + "learning_rate": 7.101625189223249e-07, + "loss": 0.7433, "step": 31158 }, { - "epoch": 0.884194097616345, + "epoch": 0.8829663634560344, "grad_norm": 0.0, - "learning_rate": 6.952012946815145e-07, - "loss": 0.8451, + "learning_rate": 7.098228654131489e-07, + "loss": 0.901, "step": 31159 }, { - "epoch": 0.8842224744608399, + "epoch": 0.8829947008982969, "grad_norm": 0.0, - "learning_rate": 6.948646361430011e-07, - "loss": 0.8071, + "learning_rate": 7.094832901585857e-07, + "loss": 0.8453, "step": 31160 }, { - "epoch": 0.8842508513053349, + "epoch": 0.8830230383405594, "grad_norm": 0.0, - "learning_rate": 6.945280562053836e-07, - "loss": 0.8539, + "learning_rate": 7.091437931614964e-07, + "loss": 0.8462, "step": 31161 }, { - "epoch": 0.8842792281498297, + "epoch": 0.8830513757828219, "grad_norm": 0.0, - "learning_rate": 6.941915548715083e-07, - "loss": 0.8663, + "learning_rate": 7.088043744247375e-07, + "loss": 0.8684, "step": 31162 }, { - "epoch": 0.8843076049943246, + "epoch": 0.8830797132250843, "grad_norm": 0.0, - "learning_rate": 6.938551321442144e-07, - "loss": 0.7112, + "learning_rate": 7.08465033951169e-07, + "loss": 0.7629, "step": 31163 }, { - "epoch": 0.8843359818388196, + "epoch": 0.8831080506673468, "grad_norm": 0.0, - "learning_rate": 6.935187880263439e-07, - "loss": 0.6713, + "learning_rate": 7.081257717436507e-07, + "loss": 0.79, "step": 31164 }, { - "epoch": 0.8843643586833144, + "epoch": 0.8831363881096093, "grad_norm": 0.0, - "learning_rate": 6.9318252252074e-07, - "loss": 0.8129, + "learning_rate": 7.077865878050394e-07, + "loss": 0.7833, "step": 31165 }, { - "epoch": 0.8843927355278093, + "epoch": 0.8831647255518716, "grad_norm": 0.0, - "learning_rate": 6.928463356302395e-07, - "loss": 0.8299, + "learning_rate": 7.074474821381916e-07, + "loss": 0.7978, "step": 31166 }, { - "epoch": 0.8844211123723043, + "epoch": 0.8831930629941341, "grad_norm": 0.0, - "learning_rate": 6.925102273576856e-07, - "loss": 0.8173, + "learning_rate": 7.071084547459639e-07, + "loss": 0.7291, "step": 31167 }, { - "epoch": 0.8844494892167991, + "epoch": 0.8832214004363966, "grad_norm": 0.0, - "learning_rate": 6.92174197705916e-07, - "loss": 0.7769, + "learning_rate": 7.06769505631213e-07, + "loss": 0.7599, "step": 31168 }, { - "epoch": 0.884477866061294, + "epoch": 0.8832497378786591, "grad_norm": 0.0, - "learning_rate": 6.918382466777685e-07, - "loss": 0.7807, + "learning_rate": 7.064306347967953e-07, + "loss": 0.8256, "step": 31169 }, { - "epoch": 0.8845062429057888, + "epoch": 0.8832780753209215, "grad_norm": 0.0, - "learning_rate": 6.91502374276084e-07, - "loss": 0.747, + "learning_rate": 7.06091842245562e-07, + "loss": 0.7128, "step": 31170 }, { - "epoch": 0.8845346197502838, + "epoch": 0.883306412763184, "grad_norm": 0.0, - "learning_rate": 6.91166580503696e-07, - "loss": 0.8164, + "learning_rate": 7.057531279803676e-07, + "loss": 0.8423, "step": 31171 }, { - "epoch": 0.8845629965947787, + "epoch": 0.8833347502054465, "grad_norm": 0.0, - "learning_rate": 6.908308653634421e-07, - "loss": 0.7521, + "learning_rate": 7.054144920040684e-07, + "loss": 0.7952, "step": 31172 }, { - "epoch": 0.8845913734392735, + "epoch": 0.8833630876477089, "grad_norm": 0.0, - "learning_rate": 6.90495228858159e-07, - "loss": 0.7772, + "learning_rate": 7.050759343195113e-07, + "loss": 0.8718, "step": 31173 }, { - "epoch": 0.8846197502837685, + "epoch": 0.8833914250899714, "grad_norm": 0.0, - "learning_rate": 6.901596709906811e-07, - "loss": 0.6995, + "learning_rate": 7.047374549295538e-07, + "loss": 0.8162, "step": 31174 }, { - "epoch": 0.8846481271282634, + "epoch": 0.8834197625322339, "grad_norm": 0.0, - "learning_rate": 6.898241917638426e-07, - "loss": 0.8044, + "learning_rate": 7.043990538370437e-07, + "loss": 0.8221, "step": 31175 }, { - "epoch": 0.8846765039727582, + "epoch": 0.8834480999744962, "grad_norm": 0.0, - "learning_rate": 6.894887911804792e-07, - "loss": 0.8265, + "learning_rate": 7.04060731044831e-07, + "loss": 0.8254, "step": 31176 }, { - "epoch": 0.8847048808172531, + "epoch": 0.8834764374167587, "grad_norm": 0.0, - "learning_rate": 6.891534692434199e-07, - "loss": 0.9088, + "learning_rate": 7.03722486555769e-07, + "loss": 0.8622, "step": 31177 }, { - "epoch": 0.884733257661748, + "epoch": 0.8835047748590212, "grad_norm": 0.0, - "learning_rate": 6.888182259555009e-07, - "loss": 0.8211, + "learning_rate": 7.03384320372702e-07, + "loss": 0.8167, "step": 31178 }, { - "epoch": 0.8847616345062429, + "epoch": 0.8835331123012837, "grad_norm": 0.0, - "learning_rate": 6.884830613195547e-07, - "loss": 0.7517, + "learning_rate": 7.030462324984821e-07, + "loss": 0.8173, "step": 31179 }, { - "epoch": 0.8847900113507378, + "epoch": 0.8835614497435461, "grad_norm": 0.0, - "learning_rate": 6.88147975338408e-07, - "loss": 0.778, + "learning_rate": 7.02708222935955e-07, + "loss": 0.7873, "step": 31180 }, { - "epoch": 0.8848183881952327, + "epoch": 0.8835897871858086, "grad_norm": 0.0, - "learning_rate": 6.878129680148948e-07, - "loss": 0.7894, + "learning_rate": 7.023702916879705e-07, + "loss": 0.811, "step": 31181 }, { - "epoch": 0.8848467650397276, + "epoch": 0.8836181246280711, "grad_norm": 0.0, - "learning_rate": 6.874780393518455e-07, - "loss": 0.7998, + "learning_rate": 7.02032438757374e-07, + "loss": 0.8099, "step": 31182 }, { - "epoch": 0.8848751418842224, + "epoch": 0.8836464620703335, "grad_norm": 0.0, - "learning_rate": 6.871431893520853e-07, - "loss": 0.7863, + "learning_rate": 7.016946641470102e-07, + "loss": 0.8302, "step": 31183 }, { - "epoch": 0.8849035187287174, + "epoch": 0.883674799512596, "grad_norm": 0.0, - "learning_rate": 6.868084180184475e-07, - "loss": 0.8216, + "learning_rate": 7.013569678597243e-07, + "loss": 0.764, "step": 31184 }, { - "epoch": 0.8849318955732123, + "epoch": 0.8837031369548585, "grad_norm": 0.0, - "learning_rate": 6.864737253537557e-07, - "loss": 0.7623, + "learning_rate": 7.010193498983641e-07, + "loss": 0.8571, "step": 31185 }, { - "epoch": 0.8849602724177071, + "epoch": 0.883731474397121, "grad_norm": 0.0, - "learning_rate": 6.861391113608395e-07, - "loss": 0.7567, + "learning_rate": 7.006818102657687e-07, + "loss": 0.7209, "step": 31186 }, { - "epoch": 0.884988649262202, + "epoch": 0.8837598118393833, "grad_norm": 0.0, - "learning_rate": 6.858045760425269e-07, - "loss": 0.7567, + "learning_rate": 7.003443489647854e-07, + "loss": 0.7199, "step": 31187 }, { - "epoch": 0.885017026106697, + "epoch": 0.8837881492816458, "grad_norm": 0.0, - "learning_rate": 6.8547011940164e-07, - "loss": 0.7668, + "learning_rate": 7.000069659982534e-07, + "loss": 0.8073, "step": 31188 }, { - "epoch": 0.8850454029511918, + "epoch": 0.8838164867239083, "grad_norm": 0.0, - "learning_rate": 6.851357414410053e-07, - "loss": 0.8084, + "learning_rate": 6.996696613690157e-07, + "loss": 0.7671, "step": 31189 }, { - "epoch": 0.8850737797956867, + "epoch": 0.8838448241661707, "grad_norm": 0.0, - "learning_rate": 6.848014421634497e-07, - "loss": 0.8303, + "learning_rate": 6.993324350799169e-07, + "loss": 0.7398, "step": 31190 }, { - "epoch": 0.8851021566401817, + "epoch": 0.8838731616084332, "grad_norm": 0.0, - "learning_rate": 6.844672215717929e-07, - "loss": 0.8438, + "learning_rate": 6.989952871337924e-07, + "loss": 0.8388, "step": 31191 }, { - "epoch": 0.8851305334846765, + "epoch": 0.8839014990506957, "grad_norm": 0.0, - "learning_rate": 6.841330796688606e-07, - "loss": 0.7841, + "learning_rate": 6.986582175334844e-07, + "loss": 0.8561, "step": 31192 }, { - "epoch": 0.8851589103291714, + "epoch": 0.8839298364929582, "grad_norm": 0.0, - "learning_rate": 6.83799016457477e-07, - "loss": 0.7356, + "learning_rate": 6.983212262818318e-07, + "loss": 0.8139, "step": 31193 }, { - "epoch": 0.8851872871736662, + "epoch": 0.8839581739352206, "grad_norm": 0.0, - "learning_rate": 6.834650319404601e-07, - "loss": 0.8238, + "learning_rate": 6.979843133816744e-07, + "loss": 0.8556, "step": 31194 }, { - "epoch": 0.8852156640181612, + "epoch": 0.8839865113774831, "grad_norm": 0.0, - "learning_rate": 6.831311261206331e-07, - "loss": 0.7145, + "learning_rate": 6.976474788358501e-07, + "loss": 0.8547, "step": 31195 }, { - "epoch": 0.8852440408626561, + "epoch": 0.8840148488197456, "grad_norm": 0.0, - "learning_rate": 6.82797299000817e-07, - "loss": 0.7513, + "learning_rate": 6.973107226471953e-07, + "loss": 0.7636, "step": 31196 }, { - "epoch": 0.8852724177071509, + "epoch": 0.884043186262008, "grad_norm": 0.0, - "learning_rate": 6.824635505838295e-07, - "loss": 0.7475, + "learning_rate": 6.969740448185458e-07, + "loss": 0.7775, "step": 31197 }, { - "epoch": 0.8853007945516459, + "epoch": 0.8840715237042704, "grad_norm": 0.0, - "learning_rate": 6.821298808724919e-07, - "loss": 0.8548, + "learning_rate": 6.966374453527392e-07, + "loss": 0.7942, "step": 31198 }, { - "epoch": 0.8853291713961408, + "epoch": 0.8840998611465329, "grad_norm": 0.0, - "learning_rate": 6.817962898696218e-07, - "loss": 0.8, + "learning_rate": 6.963009242526098e-07, + "loss": 0.8284, "step": 31199 }, { - "epoch": 0.8853575482406356, + "epoch": 0.8841281985887953, "grad_norm": 0.0, - "learning_rate": 6.814627775780381e-07, - "loss": 0.8271, + "learning_rate": 6.959644815209921e-07, + "loss": 0.749, "step": 31200 }, { - "epoch": 0.8853859250851306, + "epoch": 0.8841565360310578, "grad_norm": 0.0, - "learning_rate": 6.811293440005573e-07, - "loss": 0.8618, + "learning_rate": 6.956281171607227e-07, + "loss": 0.8471, "step": 31201 }, { - "epoch": 0.8854143019296254, + "epoch": 0.8841848734733203, "grad_norm": 0.0, - "learning_rate": 6.807959891399951e-07, - "loss": 0.7517, + "learning_rate": 6.952918311746304e-07, + "loss": 0.7522, "step": 31202 }, { - "epoch": 0.8854426787741203, + "epoch": 0.8842132109155828, "grad_norm": 0.0, - "learning_rate": 6.804627129991681e-07, - "loss": 0.7823, + "learning_rate": 6.949556235655519e-07, + "loss": 0.6954, "step": 31203 }, { - "epoch": 0.8854710556186152, + "epoch": 0.8842415483578452, "grad_norm": 0.0, - "learning_rate": 6.80129515580893e-07, - "loss": 0.7958, + "learning_rate": 6.94619494336316e-07, + "loss": 0.758, "step": 31204 }, { - "epoch": 0.8854994324631101, + "epoch": 0.8842698858001077, "grad_norm": 0.0, - "learning_rate": 6.797963968879806e-07, - "loss": 0.7866, + "learning_rate": 6.942834434897561e-07, + "loss": 0.7654, "step": 31205 }, { - "epoch": 0.885527809307605, + "epoch": 0.8842982232423702, "grad_norm": 0.0, - "learning_rate": 6.794633569232479e-07, - "loss": 0.817, + "learning_rate": 6.93947471028702e-07, + "loss": 0.7483, "step": 31206 }, { - "epoch": 0.8855561861520999, + "epoch": 0.8843265606846326, "grad_norm": 0.0, - "learning_rate": 6.79130395689509e-07, - "loss": 0.8737, + "learning_rate": 6.936115769559837e-07, + "loss": 0.7323, "step": 31207 }, { - "epoch": 0.8855845629965948, + "epoch": 0.884354898126895, "grad_norm": 0.0, - "learning_rate": 6.787975131895718e-07, - "loss": 0.8572, + "learning_rate": 6.932757612744334e-07, + "loss": 0.8524, "step": 31208 }, { - "epoch": 0.8856129398410897, + "epoch": 0.8843832355691575, "grad_norm": 0.0, - "learning_rate": 6.78464709426252e-07, - "loss": 0.7802, + "learning_rate": 6.929400239868745e-07, + "loss": 0.7645, "step": 31209 }, { - "epoch": 0.8856413166855845, + "epoch": 0.88441157301142, "grad_norm": 0.0, - "learning_rate": 6.781319844023615e-07, - "loss": 0.8793, + "learning_rate": 6.92604365096139e-07, + "loss": 0.8622, "step": 31210 }, { - "epoch": 0.8856696935300794, + "epoch": 0.8844399104536824, "grad_norm": 0.0, - "learning_rate": 6.777993381207071e-07, - "loss": 0.9071, + "learning_rate": 6.922687846050535e-07, + "loss": 0.7018, "step": 31211 }, { - "epoch": 0.8856980703745744, + "epoch": 0.8844682478959449, "grad_norm": 0.0, - "learning_rate": 6.774667705841009e-07, - "loss": 0.7639, + "learning_rate": 6.919332825164437e-07, + "loss": 0.8028, "step": 31212 }, { - "epoch": 0.8857264472190692, + "epoch": 0.8844965853382074, "grad_norm": 0.0, - "learning_rate": 6.77134281795353e-07, - "loss": 0.9312, + "learning_rate": 6.915978588331362e-07, + "loss": 0.7315, "step": 31213 }, { - "epoch": 0.8857548240635641, + "epoch": 0.8845249227804698, "grad_norm": 0.0, - "learning_rate": 6.768018717572699e-07, - "loss": 0.7794, + "learning_rate": 6.912625135579587e-07, + "loss": 0.8201, "step": 31214 }, { - "epoch": 0.8857832009080591, + "epoch": 0.8845532602227323, "grad_norm": 0.0, - "learning_rate": 6.764695404726618e-07, - "loss": 0.8252, + "learning_rate": 6.909272466937312e-07, + "loss": 0.7898, "step": 31215 }, { - "epoch": 0.8858115777525539, + "epoch": 0.8845815976649948, "grad_norm": 0.0, - "learning_rate": 6.761372879443329e-07, - "loss": 0.8509, + "learning_rate": 6.905920582432824e-07, + "loss": 0.9851, "step": 31216 }, { - "epoch": 0.8858399545970488, + "epoch": 0.8846099351072573, "grad_norm": 0.0, - "learning_rate": 6.758051141750921e-07, - "loss": 0.7545, + "learning_rate": 6.902569482094324e-07, + "loss": 0.7848, "step": 31217 }, { - "epoch": 0.8858683314415438, + "epoch": 0.8846382725495197, "grad_norm": 0.0, - "learning_rate": 6.754730191677461e-07, - "loss": 0.7845, + "learning_rate": 6.899219165950044e-07, + "loss": 0.8764, "step": 31218 }, { - "epoch": 0.8858967082860386, + "epoch": 0.8846666099917821, "grad_norm": 0.0, - "learning_rate": 6.751410029250971e-07, - "loss": 0.8841, + "learning_rate": 6.895869634028218e-07, + "loss": 0.755, "step": 31219 }, { - "epoch": 0.8859250851305335, + "epoch": 0.8846949474340446, "grad_norm": 0.0, - "learning_rate": 6.748090654499518e-07, - "loss": 0.6691, + "learning_rate": 6.892520886357057e-07, + "loss": 0.8299, "step": 31220 }, { - "epoch": 0.8859534619750283, + "epoch": 0.884723284876307, "grad_norm": 0.0, - "learning_rate": 6.744772067451144e-07, - "loss": 0.8465, + "learning_rate": 6.88917292296476e-07, + "loss": 0.7885, "step": 31221 }, { - "epoch": 0.8859818388195233, + "epoch": 0.8847516223185695, "grad_norm": 0.0, - "learning_rate": 6.741454268133852e-07, - "loss": 0.7901, + "learning_rate": 6.88582574387956e-07, + "loss": 0.7861, "step": 31222 }, { - "epoch": 0.8860102156640182, + "epoch": 0.884779959760832, "grad_norm": 0.0, - "learning_rate": 6.738137256575694e-07, - "loss": 0.8088, + "learning_rate": 6.8824793491296e-07, + "loss": 0.8526, "step": 31223 }, { - "epoch": 0.886038592508513, + "epoch": 0.8848082972030944, "grad_norm": 0.0, - "learning_rate": 6.734821032804706e-07, - "loss": 0.7803, + "learning_rate": 6.879133738743116e-07, + "loss": 0.6415, "step": 31224 }, { - "epoch": 0.886066969353008, + "epoch": 0.8848366346453569, "grad_norm": 0.0, - "learning_rate": 6.731505596848853e-07, - "loss": 0.8196, + "learning_rate": 6.875788912748261e-07, + "loss": 0.8395, "step": 31225 }, { - "epoch": 0.8860953461975029, + "epoch": 0.8848649720876194, "grad_norm": 0.0, - "learning_rate": 6.728190948736158e-07, + "learning_rate": 6.872444871173211e-07, "loss": 0.8001, "step": 31226 }, { - "epoch": 0.8861237230419977, + "epoch": 0.8848933095298819, "grad_norm": 0.0, - "learning_rate": 6.724877088494652e-07, - "loss": 0.7843, + "learning_rate": 6.869101614046148e-07, + "loss": 0.7997, "step": 31227 }, { - "epoch": 0.8861520998864926, + "epoch": 0.8849216469721443, "grad_norm": 0.0, - "learning_rate": 6.721564016152293e-07, - "loss": 0.8553, + "learning_rate": 6.865759141395223e-07, + "loss": 0.8209, "step": 31228 }, { - "epoch": 0.8861804767309875, + "epoch": 0.8849499844144068, "grad_norm": 0.0, - "learning_rate": 6.718251731737047e-07, - "loss": 0.8898, + "learning_rate": 6.862417453248593e-07, + "loss": 0.68, "step": 31229 }, { - "epoch": 0.8862088535754824, + "epoch": 0.8849783218566692, "grad_norm": 0.0, - "learning_rate": 6.714940235276957e-07, - "loss": 0.7572, + "learning_rate": 6.859076549634403e-07, + "loss": 0.7719, "step": 31230 }, { - "epoch": 0.8862372304199773, + "epoch": 0.8850066592989316, "grad_norm": 0.0, - "learning_rate": 6.711629526799946e-07, - "loss": 0.9116, + "learning_rate": 6.855736430580795e-07, + "loss": 0.7846, "step": 31231 }, { - "epoch": 0.8862656072644722, + "epoch": 0.8850349967411941, "grad_norm": 0.0, - "learning_rate": 6.708319606334002e-07, - "loss": 0.797, + "learning_rate": 6.852397096115904e-07, + "loss": 0.8098, "step": 31232 }, { - "epoch": 0.8862939841089671, + "epoch": 0.8850633341834566, "grad_norm": 0.0, - "learning_rate": 6.705010473907081e-07, - "loss": 0.7328, + "learning_rate": 6.849058546267873e-07, + "loss": 0.7617, "step": 31233 }, { - "epoch": 0.886322360953462, + "epoch": 0.8850916716257191, "grad_norm": 0.0, - "learning_rate": 6.701702129547116e-07, - "loss": 0.7857, + "learning_rate": 6.845720781064802e-07, + "loss": 0.7565, "step": 31234 }, { - "epoch": 0.8863507377979569, + "epoch": 0.8851200090679815, "grad_norm": 0.0, - "learning_rate": 6.698394573282063e-07, - "loss": 0.8111, + "learning_rate": 6.842383800534835e-07, + "loss": 0.715, "step": 31235 }, { - "epoch": 0.8863791146424518, + "epoch": 0.885148346510244, "grad_norm": 0.0, - "learning_rate": 6.695087805139888e-07, - "loss": 0.826, + "learning_rate": 6.839047604706051e-07, + "loss": 0.8041, "step": 31236 }, { - "epoch": 0.8864074914869466, + "epoch": 0.8851766839525065, "grad_norm": 0.0, - "learning_rate": 6.691781825148491e-07, - "loss": 0.78, + "learning_rate": 6.83571219360657e-07, + "loss": 0.7616, "step": 31237 }, { - "epoch": 0.8864358683314415, + "epoch": 0.8852050213947689, "grad_norm": 0.0, - "learning_rate": 6.688476633335816e-07, - "loss": 0.826, + "learning_rate": 6.832377567264469e-07, + "loss": 0.8206, "step": 31238 }, { - "epoch": 0.8864642451759365, + "epoch": 0.8852333588370314, "grad_norm": 0.0, - "learning_rate": 6.685172229729752e-07, - "loss": 0.8328, + "learning_rate": 6.829043725707852e-07, + "loss": 0.8125, "step": 31239 }, { - "epoch": 0.8864926220204313, + "epoch": 0.8852616962792939, "grad_norm": 0.0, - "learning_rate": 6.681868614358245e-07, - "loss": 0.8297, + "learning_rate": 6.825710668964814e-07, + "loss": 0.7883, "step": 31240 }, { - "epoch": 0.8865209988649262, + "epoch": 0.8852900337215563, "grad_norm": 0.0, - "learning_rate": 6.678565787249192e-07, - "loss": 0.711, + "learning_rate": 6.822378397063389e-07, + "loss": 0.84, "step": 31241 }, { - "epoch": 0.8865493757094212, + "epoch": 0.8853183711638187, "grad_norm": 0.0, - "learning_rate": 6.675263748430483e-07, - "loss": 0.8552, + "learning_rate": 6.81904691003168e-07, + "loss": 0.8437, "step": 31242 }, { - "epoch": 0.886577752553916, + "epoch": 0.8853467086060812, "grad_norm": 0.0, - "learning_rate": 6.671962497930018e-07, - "loss": 0.7785, + "learning_rate": 6.81571620789776e-07, + "loss": 0.798, "step": 31243 }, { - "epoch": 0.8866061293984109, + "epoch": 0.8853750460483437, "grad_norm": 0.0, - "learning_rate": 6.668662035775674e-07, - "loss": 0.8518, + "learning_rate": 6.812386290689643e-07, + "loss": 0.7617, "step": 31244 }, { - "epoch": 0.8866345062429057, + "epoch": 0.8854033834906061, "grad_norm": 0.0, - "learning_rate": 6.665362361995332e-07, - "loss": 0.7541, + "learning_rate": 6.809057158435406e-07, + "loss": 0.8605, "step": 31245 }, { - "epoch": 0.8866628830874007, + "epoch": 0.8854317209328686, "grad_norm": 0.0, - "learning_rate": 6.662063476616887e-07, - "loss": 0.8577, + "learning_rate": 6.805728811163082e-07, + "loss": 0.7546, "step": 31246 }, { - "epoch": 0.8866912599318956, + "epoch": 0.8854600583751311, "grad_norm": 0.0, - "learning_rate": 6.658765379668186e-07, - "loss": 0.7217, + "learning_rate": 6.802401248900714e-07, + "loss": 0.7877, "step": 31247 }, { - "epoch": 0.8867196367763904, + "epoch": 0.8854883958173935, "grad_norm": 0.0, - "learning_rate": 6.655468071177084e-07, - "loss": 0.8113, + "learning_rate": 6.799074471676337e-07, + "loss": 0.8016, "step": 31248 }, { - "epoch": 0.8867480136208854, + "epoch": 0.885516733259656, "grad_norm": 0.0, - "learning_rate": 6.652171551171438e-07, - "loss": 0.7714, + "learning_rate": 6.79574847951796e-07, + "loss": 0.6401, "step": 31249 }, { - "epoch": 0.8867763904653803, + "epoch": 0.8855450707019185, "grad_norm": 0.0, - "learning_rate": 6.648875819679113e-07, - "loss": 0.7528, + "learning_rate": 6.792423272453596e-07, + "loss": 0.8026, "step": 31250 }, { - "epoch": 0.8868047673098751, + "epoch": 0.885573408144181, "grad_norm": 0.0, - "learning_rate": 6.645580876727908e-07, - "loss": 0.6508, + "learning_rate": 6.789098850511278e-07, + "loss": 0.8001, "step": 31251 }, { - "epoch": 0.88683314415437, + "epoch": 0.8856017455864433, "grad_norm": 0.0, - "learning_rate": 6.642286722345682e-07, - "loss": 0.8147, + "learning_rate": 6.785775213718981e-07, + "loss": 0.835, "step": 31252 }, { - "epoch": 0.886861520998865, + "epoch": 0.8856300830287058, "grad_norm": 0.0, - "learning_rate": 6.638993356560275e-07, - "loss": 0.7656, + "learning_rate": 6.78245236210473e-07, + "loss": 0.7094, "step": 31253 }, { - "epoch": 0.8868898978433598, + "epoch": 0.8856584204709683, "grad_norm": 0.0, - "learning_rate": 6.635700779399468e-07, - "loss": 0.7868, + "learning_rate": 6.779130295696479e-07, + "loss": 0.793, "step": 31254 }, { - "epoch": 0.8869182746878547, + "epoch": 0.8856867579132307, "grad_norm": 0.0, - "learning_rate": 6.632408990891092e-07, - "loss": 0.6762, + "learning_rate": 6.775809014522238e-07, + "loss": 0.7456, "step": 31255 }, { - "epoch": 0.8869466515323496, + "epoch": 0.8857150953554932, "grad_norm": 0.0, - "learning_rate": 6.629117991062972e-07, - "loss": 0.8667, + "learning_rate": 6.772488518609987e-07, + "loss": 0.7778, "step": 31256 }, { - "epoch": 0.8869750283768445, + "epoch": 0.8857434327977557, "grad_norm": 0.0, - "learning_rate": 6.625827779942873e-07, - "loss": 0.8213, + "learning_rate": 6.769168807987658e-07, + "loss": 0.7242, "step": 31257 }, { - "epoch": 0.8870034052213394, + "epoch": 0.8857717702400182, "grad_norm": 0.0, - "learning_rate": 6.622538357558606e-07, - "loss": 0.8311, + "learning_rate": 6.765849882683251e-07, + "loss": 0.8327, "step": 31258 }, { - "epoch": 0.8870317820658343, + "epoch": 0.8858001076822806, "grad_norm": 0.0, - "learning_rate": 6.619249723937959e-07, - "loss": 0.843, + "learning_rate": 6.76253174272472e-07, + "loss": 0.7645, "step": 31259 }, { - "epoch": 0.8870601589103292, + "epoch": 0.8858284451245431, "grad_norm": 0.0, - "learning_rate": 6.615961879108701e-07, - "loss": 0.7427, + "learning_rate": 6.75921438813999e-07, + "loss": 0.8408, "step": 31260 }, { - "epoch": 0.887088535754824, + "epoch": 0.8858567825668056, "grad_norm": 0.0, - "learning_rate": 6.612674823098631e-07, - "loss": 0.8177, + "learning_rate": 6.755897818957047e-07, + "loss": 0.6857, "step": 31261 }, { - "epoch": 0.8871169125993189, + "epoch": 0.8858851200090679, "grad_norm": 0.0, - "learning_rate": 6.609388555935481e-07, - "loss": 0.8666, + "learning_rate": 6.752582035203792e-07, + "loss": 0.7151, "step": 31262 }, { - "epoch": 0.8871452894438139, + "epoch": 0.8859134574513304, "grad_norm": 0.0, - "learning_rate": 6.60610307764702e-07, - "loss": 0.7995, + "learning_rate": 6.749267036908147e-07, + "loss": 0.7471, "step": 31263 }, { - "epoch": 0.8871736662883087, + "epoch": 0.8859417948935929, "grad_norm": 0.0, - "learning_rate": 6.602818388261012e-07, - "loss": 0.7515, + "learning_rate": 6.745952824098089e-07, + "loss": 0.931, "step": 31264 }, { - "epoch": 0.8872020431328036, + "epoch": 0.8859701323358554, "grad_norm": 0.0, - "learning_rate": 6.599534487805193e-07, - "loss": 0.7676, + "learning_rate": 6.742639396801476e-07, + "loss": 0.8108, "step": 31265 }, { - "epoch": 0.8872304199772986, + "epoch": 0.8859984697781178, "grad_norm": 0.0, - "learning_rate": 6.596251376307305e-07, - "loss": 0.8393, + "learning_rate": 6.739326755046249e-07, + "loss": 0.8421, "step": 31266 }, { - "epoch": 0.8872587968217934, + "epoch": 0.8860268072203803, "grad_norm": 0.0, - "learning_rate": 6.592969053795084e-07, - "loss": 0.8403, + "learning_rate": 6.736014898860299e-07, + "loss": 0.7425, "step": 31267 }, { - "epoch": 0.8872871736662883, + "epoch": 0.8860551446626428, "grad_norm": 0.0, - "learning_rate": 6.589687520296251e-07, - "loss": 0.8741, + "learning_rate": 6.732703828271526e-07, + "loss": 0.889, "step": 31268 }, { - "epoch": 0.8873155505107831, + "epoch": 0.8860834821049052, "grad_norm": 0.0, - "learning_rate": 6.586406775838517e-07, - "loss": 0.8591, + "learning_rate": 6.729393543307838e-07, + "loss": 0.7606, "step": 31269 }, { - "epoch": 0.8873439273552781, + "epoch": 0.8861118195471677, "grad_norm": 0.0, - "learning_rate": 6.583126820449626e-07, - "loss": 0.8795, + "learning_rate": 6.726084043997083e-07, + "loss": 0.7573, "step": 31270 }, { - "epoch": 0.887372304199773, + "epoch": 0.8861401569894302, "grad_norm": 0.0, - "learning_rate": 6.579847654157234e-07, - "loss": 0.813, + "learning_rate": 6.722775330367159e-07, + "loss": 0.7777, "step": 31271 }, { - "epoch": 0.8874006810442678, + "epoch": 0.8861684944316925, "grad_norm": 0.0, - "learning_rate": 6.576569276989087e-07, - "loss": 0.798, + "learning_rate": 6.719467402445945e-07, + "loss": 0.8498, "step": 31272 }, { - "epoch": 0.8874290578887628, + "epoch": 0.886196831873955, "grad_norm": 0.0, - "learning_rate": 6.57329168897286e-07, - "loss": 0.802, + "learning_rate": 6.716160260261284e-07, + "loss": 0.8094, "step": 31273 }, { - "epoch": 0.8874574347332577, + "epoch": 0.8862251693162175, "grad_norm": 0.0, - "learning_rate": 6.570014890136222e-07, - "loss": 0.8287, + "learning_rate": 6.712853903841077e-07, + "loss": 0.8078, "step": 31274 }, { - "epoch": 0.8874858115777525, + "epoch": 0.88625350675848, "grad_norm": 0.0, - "learning_rate": 6.566738880506884e-07, - "loss": 0.8287, + "learning_rate": 6.709548333213112e-07, + "loss": 0.7831, "step": 31275 }, { - "epoch": 0.8875141884222475, + "epoch": 0.8862818442007424, "grad_norm": 0.0, - "learning_rate": 6.56346366011249e-07, - "loss": 0.7573, + "learning_rate": 6.706243548405267e-07, + "loss": 0.8651, "step": 31276 }, { - "epoch": 0.8875425652667424, + "epoch": 0.8863101816430049, "grad_norm": 0.0, - "learning_rate": 6.560189228980717e-07, - "loss": 0.8101, + "learning_rate": 6.702939549445397e-07, + "loss": 0.8131, "step": 31277 }, { - "epoch": 0.8875709421112372, + "epoch": 0.8863385190852674, "grad_norm": 0.0, - "learning_rate": 6.556915587139246e-07, - "loss": 0.8124, + "learning_rate": 6.699636336361293e-07, + "loss": 0.7946, "step": 31278 }, { - "epoch": 0.8875993189557321, + "epoch": 0.8863668565275298, "grad_norm": 0.0, - "learning_rate": 6.553642734615695e-07, - "loss": 0.752, + "learning_rate": 6.696333909180796e-07, + "loss": 0.8995, "step": 31279 }, { - "epoch": 0.887627695800227, + "epoch": 0.8863951939697923, "grad_norm": 0.0, - "learning_rate": 6.550370671437723e-07, - "loss": 0.7862, + "learning_rate": 6.693032267931754e-07, + "loss": 0.7681, "step": 31280 }, { - "epoch": 0.8876560726447219, + "epoch": 0.8864235314120548, "grad_norm": 0.0, - "learning_rate": 6.547099397632983e-07, - "loss": 0.8156, + "learning_rate": 6.68973141264192e-07, + "loss": 0.8011, "step": 31281 }, { - "epoch": 0.8876844494892168, + "epoch": 0.8864518688543173, "grad_norm": 0.0, - "learning_rate": 6.543828913229089e-07, - "loss": 0.8652, + "learning_rate": 6.68643134333915e-07, + "loss": 0.698, "step": 31282 }, { - "epoch": 0.8877128263337117, + "epoch": 0.8864802062965796, "grad_norm": 0.0, - "learning_rate": 6.540559218253662e-07, - "loss": 0.7807, + "learning_rate": 6.683132060051201e-07, + "loss": 0.8309, "step": 31283 }, { - "epoch": 0.8877412031782066, + "epoch": 0.8865085437388421, "grad_norm": 0.0, - "learning_rate": 6.537290312734356e-07, - "loss": 0.7694, + "learning_rate": 6.679833562805882e-07, + "loss": 0.7436, "step": 31284 }, { - "epoch": 0.8877695800227015, + "epoch": 0.8865368811811046, "grad_norm": 0.0, - "learning_rate": 6.534022196698742e-07, - "loss": 0.7967, + "learning_rate": 6.676535851630983e-07, + "loss": 0.8654, "step": 31285 }, { - "epoch": 0.8877979568671963, + "epoch": 0.886565218623367, "grad_norm": 0.0, - "learning_rate": 6.530754870174449e-07, - "loss": 0.8057, + "learning_rate": 6.673238926554282e-07, + "loss": 0.7994, "step": 31286 }, { - "epoch": 0.8878263337116913, + "epoch": 0.8865935560656295, "grad_norm": 0.0, - "learning_rate": 6.52748833318908e-07, - "loss": 0.7871, + "learning_rate": 6.669942787603556e-07, + "loss": 0.7879, "step": 31287 }, { - "epoch": 0.8878547105561861, + "epoch": 0.886621893507892, "grad_norm": 0.0, - "learning_rate": 6.524222585770212e-07, - "loss": 0.8486, + "learning_rate": 6.666647434806539e-07, + "loss": 0.9088, "step": 31288 }, { - "epoch": 0.887883087400681, + "epoch": 0.8866502309501545, "grad_norm": 0.0, - "learning_rate": 6.520957627945424e-07, - "loss": 0.8758, + "learning_rate": 6.663352868191008e-07, + "loss": 0.7967, "step": 31289 }, { - "epoch": 0.887911464245176, + "epoch": 0.8866785683924169, "grad_norm": 0.0, - "learning_rate": 6.517693459742324e-07, - "loss": 0.7725, + "learning_rate": 6.660059087784743e-07, + "loss": 0.7656, "step": 31290 }, { - "epoch": 0.8879398410896708, + "epoch": 0.8867069058346794, "grad_norm": 0.0, - "learning_rate": 6.514430081188461e-07, - "loss": 0.7311, + "learning_rate": 6.656766093615442e-07, + "loss": 0.9299, "step": 31291 }, { - "epoch": 0.8879682179341657, + "epoch": 0.8867352432769419, "grad_norm": 0.0, - "learning_rate": 6.511167492311421e-07, - "loss": 0.7205, + "learning_rate": 6.65347388571086e-07, + "loss": 0.8552, "step": 31292 }, { - "epoch": 0.8879965947786607, + "epoch": 0.8867635807192042, "grad_norm": 0.0, - "learning_rate": 6.50790569313875e-07, - "loss": 0.8383, + "learning_rate": 6.650182464098743e-07, + "loss": 0.793, "step": 31293 }, { - "epoch": 0.8880249716231555, + "epoch": 0.8867919181614667, "grad_norm": 0.0, - "learning_rate": 6.504644683697992e-07, - "loss": 0.7621, + "learning_rate": 6.64689182880679e-07, + "loss": 0.845, "step": 31294 }, { - "epoch": 0.8880533484676504, + "epoch": 0.8868202556037292, "grad_norm": 0.0, - "learning_rate": 6.501384464016725e-07, - "loss": 0.7116, + "learning_rate": 6.643601979862746e-07, + "loss": 0.8289, "step": 31295 }, { - "epoch": 0.8880817253121452, + "epoch": 0.8868485930459916, "grad_norm": 0.0, - "learning_rate": 6.498125034122438e-07, - "loss": 0.8071, + "learning_rate": 6.640312917294301e-07, + "loss": 0.7797, "step": 31296 }, { - "epoch": 0.8881101021566402, + "epoch": 0.8868769304882541, "grad_norm": 0.0, - "learning_rate": 6.494866394042709e-07, - "loss": 0.7332, + "learning_rate": 6.637024641129164e-07, + "loss": 0.7506, "step": 31297 }, { - "epoch": 0.8881384790011351, + "epoch": 0.8869052679305166, "grad_norm": 0.0, - "learning_rate": 6.49160854380505e-07, - "loss": 0.8688, + "learning_rate": 6.633737151395037e-07, + "loss": 0.7501, "step": 31298 }, { - "epoch": 0.8881668558456299, + "epoch": 0.8869336053727791, "grad_norm": 0.0, - "learning_rate": 6.488351483436961e-07, - "loss": 0.8746, + "learning_rate": 6.630450448119618e-07, + "loss": 0.7375, "step": 31299 }, { - "epoch": 0.8881952326901249, + "epoch": 0.8869619428150415, "grad_norm": 0.0, - "learning_rate": 6.485095212965986e-07, - "loss": 0.8482, + "learning_rate": 6.627164531330576e-07, + "loss": 0.7408, "step": 31300 }, { - "epoch": 0.8882236095346198, + "epoch": 0.886990280257304, "grad_norm": 0.0, - "learning_rate": 6.481839732419615e-07, - "loss": 0.8526, + "learning_rate": 6.623879401055622e-07, + "loss": 0.7662, "step": 31301 }, { - "epoch": 0.8882519863791146, + "epoch": 0.8870186176995665, "grad_norm": 0.0, - "learning_rate": 6.478585041825336e-07, - "loss": 0.8963, + "learning_rate": 6.620595057322399e-07, + "loss": 0.7269, "step": 31302 }, { - "epoch": 0.8882803632236095, + "epoch": 0.8870469551418289, "grad_norm": 0.0, - "learning_rate": 6.475331141210661e-07, - "loss": 0.8274, + "learning_rate": 6.617311500158585e-07, + "loss": 0.8232, "step": 31303 }, { - "epoch": 0.8883087400681045, + "epoch": 0.8870752925840913, "grad_norm": 0.0, - "learning_rate": 6.47207803060308e-07, - "loss": 0.7738, + "learning_rate": 6.614028729591815e-07, + "loss": 0.7791, "step": 31304 }, { - "epoch": 0.8883371169125993, + "epoch": 0.8871036300263538, "grad_norm": 0.0, - "learning_rate": 6.468825710030024e-07, - "loss": 0.9615, + "learning_rate": 6.610746745649765e-07, + "loss": 0.7429, "step": 31305 }, { - "epoch": 0.8883654937570942, + "epoch": 0.8871319674686163, "grad_norm": 0.0, - "learning_rate": 6.465574179519029e-07, - "loss": 0.7508, + "learning_rate": 6.607465548360092e-07, + "loss": 0.8251, "step": 31306 }, { - "epoch": 0.8883938706015891, + "epoch": 0.8871603049108787, "grad_norm": 0.0, - "learning_rate": 6.462323439097528e-07, - "loss": 0.791, + "learning_rate": 6.604185137750396e-07, + "loss": 0.8153, "step": 31307 }, { - "epoch": 0.888422247446084, + "epoch": 0.8871886423531412, "grad_norm": 0.0, - "learning_rate": 6.459073488792989e-07, - "loss": 0.7152, + "learning_rate": 6.600905513848333e-07, + "loss": 0.649, "step": 31308 }, { - "epoch": 0.8884506242905789, + "epoch": 0.8872169797954037, "grad_norm": 0.0, - "learning_rate": 6.455824328632865e-07, - "loss": 0.8655, + "learning_rate": 6.597626676681545e-07, + "loss": 0.7864, "step": 31309 }, { - "epoch": 0.8884790011350738, + "epoch": 0.8872453172376661, "grad_norm": 0.0, - "learning_rate": 6.452575958644592e-07, - "loss": 0.9061, + "learning_rate": 6.594348626277613e-07, + "loss": 0.8176, "step": 31310 }, { - "epoch": 0.8885073779795687, + "epoch": 0.8872736546799286, "grad_norm": 0.0, - "learning_rate": 6.449328378855613e-07, - "loss": 0.8069, + "learning_rate": 6.59107136266417e-07, + "loss": 0.9017, "step": 31311 }, { - "epoch": 0.8885357548240636, + "epoch": 0.8873019921221911, "grad_norm": 0.0, - "learning_rate": 6.446081589293373e-07, - "loss": 0.721, + "learning_rate": 6.587794885868815e-07, + "loss": 0.8169, "step": 31312 }, { - "epoch": 0.8885641316685584, + "epoch": 0.8873303295644535, "grad_norm": 0.0, - "learning_rate": 6.442835589985274e-07, - "loss": 0.8966, + "learning_rate": 6.584519195919148e-07, + "loss": 0.7525, "step": 31313 }, { - "epoch": 0.8885925085130534, + "epoch": 0.887358667006716, "grad_norm": 0.0, - "learning_rate": 6.439590380958749e-07, - "loss": 0.8709, + "learning_rate": 6.581244292842792e-07, + "loss": 0.7887, "step": 31314 }, { - "epoch": 0.8886208853575482, + "epoch": 0.8873870044489784, "grad_norm": 0.0, - "learning_rate": 6.43634596224123e-07, - "loss": 0.7715, + "learning_rate": 6.577970176667281e-07, + "loss": 0.867, "step": 31315 }, { - "epoch": 0.8886492622020431, + "epoch": 0.8874153418912409, "grad_norm": 0.0, - "learning_rate": 6.433102333860075e-07, - "loss": 0.7975, + "learning_rate": 6.574696847420236e-07, + "loss": 0.7345, "step": 31316 }, { - "epoch": 0.8886776390465381, + "epoch": 0.8874436793335033, "grad_norm": 0.0, - "learning_rate": 6.429859495842727e-07, - "loss": 0.6893, + "learning_rate": 6.571424305129193e-07, + "loss": 0.7609, "step": 31317 }, { - "epoch": 0.8887060158910329, + "epoch": 0.8874720167757658, "grad_norm": 0.0, - "learning_rate": 6.426617448216565e-07, - "loss": 0.8039, + "learning_rate": 6.568152549821749e-07, + "loss": 0.7222, "step": 31318 }, { - "epoch": 0.8887343927355278, + "epoch": 0.8875003542180283, "grad_norm": 0.0, - "learning_rate": 6.423376191008967e-07, - "loss": 0.7086, + "learning_rate": 6.56488158152545e-07, + "loss": 0.8619, "step": 31319 }, { - "epoch": 0.8887627695800226, + "epoch": 0.8875286916602907, "grad_norm": 0.0, - "learning_rate": 6.4201357242473e-07, - "loss": 0.7211, + "learning_rate": 6.561611400267853e-07, + "loss": 0.8691, "step": 31320 }, { - "epoch": 0.8887911464245176, + "epoch": 0.8875570291025532, "grad_norm": 0.0, - "learning_rate": 6.416896047958965e-07, - "loss": 0.7332, + "learning_rate": 6.558342006076491e-07, + "loss": 0.8065, "step": 31321 }, { - "epoch": 0.8888195232690125, + "epoch": 0.8875853665448157, "grad_norm": 0.0, - "learning_rate": 6.413657162171317e-07, - "loss": 0.807, + "learning_rate": 6.555073398978929e-07, + "loss": 0.7745, "step": 31322 }, { - "epoch": 0.8888479001135073, + "epoch": 0.8876137039870782, "grad_norm": 0.0, - "learning_rate": 6.410419066911732e-07, - "loss": 0.7151, + "learning_rate": 6.551805579002657e-07, + "loss": 0.8516, "step": 31323 }, { - "epoch": 0.8888762769580023, + "epoch": 0.8876420414293406, "grad_norm": 0.0, - "learning_rate": 6.407181762207526e-07, - "loss": 0.8723, + "learning_rate": 6.548538546175243e-07, + "loss": 0.7829, "step": 31324 }, { - "epoch": 0.8889046538024972, + "epoch": 0.887670378871603, "grad_norm": 0.0, - "learning_rate": 6.403945248086052e-07, - "loss": 0.7811, + "learning_rate": 6.545272300524186e-07, + "loss": 0.8062, "step": 31325 }, { - "epoch": 0.888933030646992, + "epoch": 0.8876987163138655, "grad_norm": 0.0, - "learning_rate": 6.400709524574688e-07, - "loss": 0.8169, + "learning_rate": 6.542006842077009e-07, + "loss": 0.8465, "step": 31326 }, { - "epoch": 0.888961407491487, + "epoch": 0.8877270537561279, "grad_norm": 0.0, - "learning_rate": 6.397474591700726e-07, - "loss": 0.777, + "learning_rate": 6.538742170861224e-07, + "loss": 0.7611, "step": 31327 }, { - "epoch": 0.8889897843359819, + "epoch": 0.8877553911983904, "grad_norm": 0.0, - "learning_rate": 6.394240449491496e-07, - "loss": 0.8112, + "learning_rate": 6.535478286904295e-07, + "loss": 0.8145, "step": 31328 }, { - "epoch": 0.8890181611804767, + "epoch": 0.8877837286406529, "grad_norm": 0.0, - "learning_rate": 6.391007097974333e-07, - "loss": 0.812, + "learning_rate": 6.532215190233748e-07, + "loss": 0.7893, "step": 31329 }, { - "epoch": 0.8890465380249716, + "epoch": 0.8878120660829154, "grad_norm": 0.0, - "learning_rate": 6.387774537176538e-07, - "loss": 0.8192, + "learning_rate": 6.528952880877082e-07, + "loss": 0.9083, "step": 31330 }, { - "epoch": 0.8890749148694665, + "epoch": 0.8878404035251778, "grad_norm": 0.0, - "learning_rate": 6.384542767125412e-07, - "loss": 0.7879, + "learning_rate": 6.52569135886173e-07, + "loss": 0.7992, "step": 31331 }, { - "epoch": 0.8891032917139614, + "epoch": 0.8878687409674403, "grad_norm": 0.0, - "learning_rate": 6.381311787848287e-07, - "loss": 0.8301, + "learning_rate": 6.522430624215215e-07, + "loss": 0.7482, "step": 31332 }, { - "epoch": 0.8891316685584563, + "epoch": 0.8878970784097028, "grad_norm": 0.0, - "learning_rate": 6.37808159937241e-07, - "loss": 0.699, + "learning_rate": 6.519170676964958e-07, + "loss": 0.7943, "step": 31333 }, { - "epoch": 0.8891600454029512, + "epoch": 0.8879254158519652, "grad_norm": 0.0, - "learning_rate": 6.374852201725079e-07, - "loss": 0.7682, + "learning_rate": 6.51591151713844e-07, + "loss": 0.8771, "step": 31334 }, { - "epoch": 0.8891884222474461, + "epoch": 0.8879537532942277, "grad_norm": 0.0, - "learning_rate": 6.371623594933596e-07, - "loss": 0.8529, + "learning_rate": 6.512653144763137e-07, + "loss": 0.759, "step": 31335 }, { - "epoch": 0.889216799091941, + "epoch": 0.8879820907364901, "grad_norm": 0.0, - "learning_rate": 6.368395779025194e-07, - "loss": 0.8387, + "learning_rate": 6.509395559866449e-07, + "loss": 0.7431, "step": 31336 }, { - "epoch": 0.8892451759364358, + "epoch": 0.8880104281787525, "grad_norm": 0.0, - "learning_rate": 6.365168754027173e-07, - "loss": 0.8633, + "learning_rate": 6.506138762475833e-07, + "loss": 0.7902, "step": 31337 }, { - "epoch": 0.8892735527809308, + "epoch": 0.888038765621015, "grad_norm": 0.0, - "learning_rate": 6.361942519966802e-07, - "loss": 0.8328, + "learning_rate": 6.502882752618744e-07, + "loss": 0.9192, "step": 31338 }, { - "epoch": 0.8893019296254256, + "epoch": 0.8880671030632775, "grad_norm": 0.0, - "learning_rate": 6.35871707687129e-07, - "loss": 0.9023, + "learning_rate": 6.499627530322583e-07, + "loss": 0.771, "step": 31339 }, { - "epoch": 0.8893303064699205, + "epoch": 0.88809544050554, "grad_norm": 0.0, - "learning_rate": 6.355492424767906e-07, - "loss": 0.8787, + "learning_rate": 6.496373095614794e-07, + "loss": 0.7976, "step": 31340 }, { - "epoch": 0.8893586833144155, + "epoch": 0.8881237779478024, "grad_norm": 0.0, - "learning_rate": 6.352268563683905e-07, - "loss": 0.6715, + "learning_rate": 6.493119448522767e-07, + "loss": 0.8031, "step": 31341 }, { - "epoch": 0.8893870601589103, + "epoch": 0.8881521153900649, "grad_norm": 0.0, - "learning_rate": 6.349045493646489e-07, - "loss": 0.8462, + "learning_rate": 6.489866589073912e-07, + "loss": 0.8079, "step": 31342 }, { - "epoch": 0.8894154370034052, + "epoch": 0.8881804528323274, "grad_norm": 0.0, - "learning_rate": 6.345823214682889e-07, - "loss": 0.8313, + "learning_rate": 6.486614517295653e-07, + "loss": 0.9909, "step": 31343 }, { - "epoch": 0.8894438138479002, + "epoch": 0.8882087902745898, "grad_norm": 0.0, - "learning_rate": 6.342601726820341e-07, - "loss": 0.8654, + "learning_rate": 6.483363233215345e-07, + "loss": 0.9246, "step": 31344 }, { - "epoch": 0.889472190692395, + "epoch": 0.8882371277168523, "grad_norm": 0.0, - "learning_rate": 6.339381030086045e-07, - "loss": 0.7577, + "learning_rate": 6.480112736860411e-07, + "loss": 0.8436, "step": 31345 }, { - "epoch": 0.8895005675368899, + "epoch": 0.8882654651591148, "grad_norm": 0.0, - "learning_rate": 6.336161124507212e-07, - "loss": 0.822, + "learning_rate": 6.476863028258207e-07, + "loss": 0.8293, "step": 31346 }, { - "epoch": 0.8895289443813847, + "epoch": 0.8882938026013772, "grad_norm": 0.0, - "learning_rate": 6.33294201011102e-07, - "loss": 0.6691, + "learning_rate": 6.47361410743611e-07, + "loss": 0.7169, "step": 31347 }, { - "epoch": 0.8895573212258797, + "epoch": 0.8883221400436396, "grad_norm": 0.0, - "learning_rate": 6.329723686924682e-07, - "loss": 0.7229, + "learning_rate": 6.470365974421499e-07, + "loss": 0.7682, "step": 31348 }, { - "epoch": 0.8895856980703746, + "epoch": 0.8883504774859021, "grad_norm": 0.0, - "learning_rate": 6.326506154975398e-07, - "loss": 0.8359, + "learning_rate": 6.46711862924172e-07, + "loss": 0.8178, "step": 31349 }, { - "epoch": 0.8896140749148694, + "epoch": 0.8883788149281646, "grad_norm": 0.0, - "learning_rate": 6.323289414290312e-07, - "loss": 0.8487, + "learning_rate": 6.463872071924149e-07, + "loss": 0.9244, "step": 31350 }, { - "epoch": 0.8896424517593644, + "epoch": 0.888407152370427, "grad_norm": 0.0, - "learning_rate": 6.320073464896592e-07, - "loss": 0.7523, + "learning_rate": 6.460626302496098e-07, + "loss": 0.8346, "step": 31351 }, { - "epoch": 0.8896708286038593, + "epoch": 0.8884354898126895, "grad_norm": 0.0, - "learning_rate": 6.316858306821449e-07, - "loss": 0.7606, + "learning_rate": 6.457381320984935e-07, + "loss": 0.7598, "step": 31352 }, { - "epoch": 0.8896992054483541, + "epoch": 0.888463827254952, "grad_norm": 0.0, - "learning_rate": 6.313643940092007e-07, - "loss": 0.8874, + "learning_rate": 6.454137127417992e-07, + "loss": 0.7743, "step": 31353 }, { - "epoch": 0.889727582292849, + "epoch": 0.8884921646972145, "grad_norm": 0.0, - "learning_rate": 6.31043036473542e-07, - "loss": 0.7884, + "learning_rate": 6.450893721822582e-07, + "loss": 0.8784, "step": 31354 }, { - "epoch": 0.889755959137344, + "epoch": 0.8885205021394769, "grad_norm": 0.0, - "learning_rate": 6.307217580778857e-07, - "loss": 0.8901, + "learning_rate": 6.447651104226026e-07, + "loss": 0.7916, "step": 31355 }, { - "epoch": 0.8897843359818388, + "epoch": 0.8885488395817394, "grad_norm": 0.0, - "learning_rate": 6.304005588249429e-07, - "loss": 0.8153, + "learning_rate": 6.444409274655661e-07, + "loss": 0.7241, "step": 31356 }, { - "epoch": 0.8898127128263337, + "epoch": 0.8885771770240019, "grad_norm": 0.0, - "learning_rate": 6.300794387174269e-07, - "loss": 0.7002, + "learning_rate": 6.441168233138761e-07, + "loss": 0.7709, "step": 31357 }, { - "epoch": 0.8898410896708286, + "epoch": 0.8886055144662642, "grad_norm": 0.0, - "learning_rate": 6.297583977580535e-07, - "loss": 0.7985, + "learning_rate": 6.437927979702651e-07, + "loss": 0.722, "step": 31358 }, { - "epoch": 0.8898694665153235, + "epoch": 0.8886338519085267, "grad_norm": 0.0, - "learning_rate": 6.294374359495303e-07, - "loss": 0.8466, + "learning_rate": 6.434688514374632e-07, + "loss": 0.8753, "step": 31359 }, { - "epoch": 0.8898978433598184, + "epoch": 0.8886621893507892, "grad_norm": 0.0, - "learning_rate": 6.291165532945697e-07, - "loss": 0.8726, + "learning_rate": 6.431449837181958e-07, + "loss": 0.8638, "step": 31360 }, { - "epoch": 0.8899262202043133, + "epoch": 0.8886905267930516, "grad_norm": 0.0, - "learning_rate": 6.287957497958852e-07, - "loss": 0.7844, + "learning_rate": 6.428211948151919e-07, + "loss": 0.8681, "step": 31361 }, { - "epoch": 0.8899545970488082, + "epoch": 0.8887188642353141, "grad_norm": 0.0, - "learning_rate": 6.284750254561822e-07, - "loss": 0.8509, + "learning_rate": 6.424974847311804e-07, + "loss": 0.7528, "step": 31362 }, { - "epoch": 0.889982973893303, + "epoch": 0.8887472016775766, "grad_norm": 0.0, - "learning_rate": 6.281543802781731e-07, - "loss": 0.8179, + "learning_rate": 6.421738534688882e-07, + "loss": 0.8258, "step": 31363 }, { - "epoch": 0.8900113507377979, + "epoch": 0.8887755391198391, "grad_norm": 0.0, - "learning_rate": 6.278338142645657e-07, - "loss": 0.7327, + "learning_rate": 6.418503010310417e-07, + "loss": 0.7338, "step": 31364 }, { - "epoch": 0.8900397275822929, + "epoch": 0.8888038765621015, "grad_norm": 0.0, - "learning_rate": 6.275133274180656e-07, - "loss": 0.8294, + "learning_rate": 6.415268274203634e-07, + "loss": 0.7371, "step": 31365 }, { - "epoch": 0.8900681044267877, + "epoch": 0.888832214004364, "grad_norm": 0.0, - "learning_rate": 6.271929197413817e-07, - "loss": 0.8317, + "learning_rate": 6.412034326395799e-07, + "loss": 0.6881, "step": 31366 }, { - "epoch": 0.8900964812712826, + "epoch": 0.8888605514466265, "grad_norm": 0.0, - "learning_rate": 6.268725912372209e-07, - "loss": 0.782, + "learning_rate": 6.40880116691417e-07, + "loss": 0.9239, "step": 31367 }, { - "epoch": 0.8901248581157776, + "epoch": 0.8888888888888888, "grad_norm": 0.0, - "learning_rate": 6.265523419082897e-07, - "loss": 0.7889, + "learning_rate": 6.405568795785944e-07, + "loss": 0.7835, "step": 31368 }, { - "epoch": 0.8901532349602724, + "epoch": 0.8889172263311513, "grad_norm": 0.0, - "learning_rate": 6.262321717572928e-07, - "loss": 0.7714, + "learning_rate": 6.402337213038379e-07, + "loss": 0.7253, "step": 31369 }, { - "epoch": 0.8901816118047673, + "epoch": 0.8889455637734138, "grad_norm": 0.0, - "learning_rate": 6.259120807869324e-07, - "loss": 0.8201, + "learning_rate": 6.399106418698675e-07, + "loss": 0.85, "step": 31370 }, { - "epoch": 0.8902099886492622, + "epoch": 0.8889739012156763, "grad_norm": 0.0, - "learning_rate": 6.255920689999139e-07, - "loss": 0.7077, + "learning_rate": 6.395876412794055e-07, + "loss": 0.8659, "step": 31371 }, { - "epoch": 0.8902383654937571, + "epoch": 0.8890022386579387, "grad_norm": 0.0, - "learning_rate": 6.252721363989423e-07, - "loss": 0.7664, + "learning_rate": 6.392647195351731e-07, + "loss": 0.8586, "step": 31372 }, { - "epoch": 0.890266742338252, + "epoch": 0.8890305761002012, "grad_norm": 0.0, - "learning_rate": 6.249522829867172e-07, - "loss": 0.7894, + "learning_rate": 6.389418766398903e-07, + "loss": 0.9138, "step": 31373 }, { - "epoch": 0.8902951191827468, + "epoch": 0.8890589135424637, "grad_norm": 0.0, - "learning_rate": 6.246325087659411e-07, - "loss": 0.8111, + "learning_rate": 6.386191125962749e-07, + "loss": 0.7917, "step": 31374 }, { - "epoch": 0.8903234960272418, + "epoch": 0.8890872509847261, "grad_norm": 0.0, - "learning_rate": 6.243128137393184e-07, - "loss": 0.7102, + "learning_rate": 6.38296427407048e-07, + "loss": 0.781, "step": 31375 }, { - "epoch": 0.8903518728717367, + "epoch": 0.8891155884269886, "grad_norm": 0.0, - "learning_rate": 6.239931979095437e-07, - "loss": 0.9014, + "learning_rate": 6.379738210749253e-07, + "loss": 0.698, "step": 31376 }, { - "epoch": 0.8903802497162315, + "epoch": 0.8891439258692511, "grad_norm": 0.0, - "learning_rate": 6.236736612793204e-07, - "loss": 0.7601, + "learning_rate": 6.37651293602628e-07, + "loss": 0.7875, "step": 31377 }, { - "epoch": 0.8904086265607265, + "epoch": 0.8891722633115136, "grad_norm": 0.0, - "learning_rate": 6.233542038513496e-07, - "loss": 0.8115, + "learning_rate": 6.373288449928694e-07, + "loss": 0.8704, "step": 31378 }, { - "epoch": 0.8904370034052214, + "epoch": 0.8892006007537759, "grad_norm": 0.0, - "learning_rate": 6.230348256283247e-07, - "loss": 0.8396, + "learning_rate": 6.370064752483662e-07, + "loss": 0.8625, "step": 31379 }, { - "epoch": 0.8904653802497162, + "epoch": 0.8892289381960384, "grad_norm": 0.0, - "learning_rate": 6.22715526612947e-07, - "loss": 0.8745, + "learning_rate": 6.366841843718352e-07, + "loss": 0.7608, "step": 31380 }, { - "epoch": 0.8904937570942111, + "epoch": 0.8892572756383009, "grad_norm": 0.0, - "learning_rate": 6.223963068079142e-07, - "loss": 0.8441, + "learning_rate": 6.363619723659898e-07, + "loss": 0.7606, "step": 31381 }, { - "epoch": 0.890522133938706, + "epoch": 0.8892856130805633, "grad_norm": 0.0, - "learning_rate": 6.220771662159175e-07, - "loss": 0.8921, + "learning_rate": 6.360398392335454e-07, + "loss": 0.8272, "step": 31382 }, { - "epoch": 0.8905505107832009, + "epoch": 0.8893139505228258, "grad_norm": 0.0, - "learning_rate": 6.217581048396604e-07, - "loss": 0.7331, + "learning_rate": 6.357177849772134e-07, + "loss": 0.9471, "step": 31383 }, { - "epoch": 0.8905788876276958, + "epoch": 0.8893422879650883, "grad_norm": 0.0, - "learning_rate": 6.214391226818329e-07, - "loss": 0.7514, + "learning_rate": 6.353958095997081e-07, + "loss": 0.8463, "step": 31384 }, { - "epoch": 0.8906072644721907, + "epoch": 0.8893706254073507, "grad_norm": 0.0, - "learning_rate": 6.211202197451294e-07, - "loss": 0.7427, + "learning_rate": 6.350739131037431e-07, + "loss": 0.738, "step": 31385 }, { - "epoch": 0.8906356413166856, + "epoch": 0.8893989628496132, "grad_norm": 0.0, - "learning_rate": 6.208013960322478e-07, - "loss": 0.7678, + "learning_rate": 6.347520954920261e-07, + "loss": 0.8257, "step": 31386 }, { - "epoch": 0.8906640181611805, + "epoch": 0.8894273002918757, "grad_norm": 0.0, - "learning_rate": 6.204826515458762e-07, - "loss": 0.7386, + "learning_rate": 6.344303567672694e-07, + "loss": 0.7702, "step": 31387 }, { - "epoch": 0.8906923950056753, + "epoch": 0.8894556377341382, "grad_norm": 0.0, - "learning_rate": 6.201639862887099e-07, - "loss": 0.8536, + "learning_rate": 6.341086969321853e-07, + "loss": 0.7513, "step": 31388 }, { - "epoch": 0.8907207718501703, + "epoch": 0.8894839751764005, "grad_norm": 0.0, - "learning_rate": 6.198454002634414e-07, - "loss": 0.8085, + "learning_rate": 6.337871159894804e-07, + "loss": 0.6694, "step": 31389 }, { - "epoch": 0.8907491486946651, + "epoch": 0.889512312618663, "grad_norm": 0.0, - "learning_rate": 6.195268934727583e-07, - "loss": 0.7527, + "learning_rate": 6.334656139418661e-07, + "loss": 0.7507, "step": 31390 }, { - "epoch": 0.89077752553916, + "epoch": 0.8895406500609255, "grad_norm": 0.0, - "learning_rate": 6.192084659193542e-07, - "loss": 0.7662, + "learning_rate": 6.331441907920477e-07, + "loss": 0.813, "step": 31391 }, { - "epoch": 0.890805902383655, + "epoch": 0.8895689875031879, "grad_norm": 0.0, - "learning_rate": 6.188901176059181e-07, - "loss": 0.8061, + "learning_rate": 6.328228465427344e-07, + "loss": 0.8963, "step": 31392 }, { - "epoch": 0.8908342792281498, + "epoch": 0.8895973249454504, "grad_norm": 0.0, - "learning_rate": 6.185718485351388e-07, - "loss": 0.7588, + "learning_rate": 6.325015811966339e-07, + "loss": 0.758, "step": 31393 }, { - "epoch": 0.8908626560726447, + "epoch": 0.8896256623877129, "grad_norm": 0.0, - "learning_rate": 6.182536587097043e-07, - "loss": 0.7892, + "learning_rate": 6.321803947564487e-07, + "loss": 0.9589, "step": 31394 }, { - "epoch": 0.8908910329171397, + "epoch": 0.8896539998299754, "grad_norm": 0.0, - "learning_rate": 6.179355481323035e-07, - "loss": 0.9762, + "learning_rate": 6.318592872248886e-07, + "loss": 0.7841, "step": 31395 }, { - "epoch": 0.8909194097616345, + "epoch": 0.8896823372722378, "grad_norm": 0.0, - "learning_rate": 6.176175168056231e-07, - "loss": 0.8548, + "learning_rate": 6.31538258604657e-07, + "loss": 0.808, "step": 31396 }, { - "epoch": 0.8909477866061294, + "epoch": 0.8897106747145003, "grad_norm": 0.0, - "learning_rate": 6.172995647323477e-07, - "loss": 0.8857, + "learning_rate": 6.312173088984552e-07, + "loss": 0.7796, "step": 31397 }, { - "epoch": 0.8909761634506242, + "epoch": 0.8897390121567628, "grad_norm": 0.0, - "learning_rate": 6.169816919151649e-07, - "loss": 0.8423, + "learning_rate": 6.308964381089921e-07, + "loss": 0.8283, "step": 31398 }, { - "epoch": 0.8910045402951192, + "epoch": 0.8897673495990251, "grad_norm": 0.0, - "learning_rate": 6.166638983567607e-07, - "loss": 0.8354, + "learning_rate": 6.305756462389645e-07, + "loss": 0.775, "step": 31399 }, { - "epoch": 0.8910329171396141, + "epoch": 0.8897956870412876, "grad_norm": 0.0, - "learning_rate": 6.163461840598184e-07, - "loss": 0.7692, + "learning_rate": 6.30254933291079e-07, + "loss": 0.8029, "step": 31400 }, { - "epoch": 0.8910612939841089, + "epoch": 0.8898240244835501, "grad_norm": 0.0, - "learning_rate": 6.160285490270212e-07, - "loss": 0.7909, + "learning_rate": 6.299342992680346e-07, + "loss": 0.7182, "step": 31401 }, { - "epoch": 0.8910896708286039, + "epoch": 0.8898523619258126, "grad_norm": 0.0, - "learning_rate": 6.157109932610527e-07, - "loss": 0.8035, + "learning_rate": 6.296137441725336e-07, + "loss": 0.827, "step": 31402 }, { - "epoch": 0.8911180476730988, + "epoch": 0.889880699368075, "grad_norm": 0.0, - "learning_rate": 6.153935167645964e-07, - "loss": 0.8195, + "learning_rate": 6.292932680072761e-07, + "loss": 0.8002, "step": 31403 }, { - "epoch": 0.8911464245175936, + "epoch": 0.8899090368103375, "grad_norm": 0.0, - "learning_rate": 6.150761195403321e-07, - "loss": 0.8833, + "learning_rate": 6.289728707749609e-07, + "loss": 0.8932, "step": 31404 }, { - "epoch": 0.8911748013620885, + "epoch": 0.8899373742526, "grad_norm": 0.0, - "learning_rate": 6.147588015909412e-07, - "loss": 0.8149, + "learning_rate": 6.286525524782861e-07, + "loss": 0.8458, "step": 31405 }, { - "epoch": 0.8912031782065835, + "epoch": 0.8899657116948624, "grad_norm": 0.0, - "learning_rate": 6.144415629191058e-07, - "loss": 0.8074, + "learning_rate": 6.283323131199526e-07, + "loss": 0.7309, "step": 31406 }, { - "epoch": 0.8912315550510783, + "epoch": 0.8899940491371249, "grad_norm": 0.0, - "learning_rate": 6.141244035275029e-07, - "loss": 0.8543, + "learning_rate": 6.28012152702655e-07, + "loss": 0.8422, "step": 31407 }, { - "epoch": 0.8912599318955732, + "epoch": 0.8900223865793874, "grad_norm": 0.0, - "learning_rate": 6.138073234188136e-07, - "loss": 0.7698, + "learning_rate": 6.276920712290913e-07, + "loss": 0.8447, "step": 31408 }, { - "epoch": 0.8912883087400681, + "epoch": 0.8900507240216498, "grad_norm": 0.0, - "learning_rate": 6.134903225957168e-07, - "loss": 0.7828, + "learning_rate": 6.27372068701958e-07, + "loss": 0.6966, "step": 31409 }, { - "epoch": 0.891316685584563, + "epoch": 0.8900790614639122, "grad_norm": 0.0, - "learning_rate": 6.131734010608881e-07, - "loss": 0.8346, + "learning_rate": 6.270521451239498e-07, + "loss": 0.8311, "step": 31410 }, { - "epoch": 0.8913450624290579, + "epoch": 0.8901073989061747, "grad_norm": 0.0, - "learning_rate": 6.128565588170055e-07, - "loss": 0.7649, + "learning_rate": 6.267323004977633e-07, + "loss": 0.8316, "step": 31411 }, { - "epoch": 0.8913734392735527, + "epoch": 0.8901357363484372, "grad_norm": 0.0, - "learning_rate": 6.125397958667467e-07, - "loss": 0.7336, + "learning_rate": 6.264125348260896e-07, + "loss": 0.853, "step": 31412 }, { - "epoch": 0.8914018161180477, + "epoch": 0.8901640737906996, "grad_norm": 0.0, - "learning_rate": 6.122231122127831e-07, - "loss": 0.718, + "learning_rate": 6.260928481116235e-07, + "loss": 0.8489, "step": 31413 }, { - "epoch": 0.8914301929625426, + "epoch": 0.8901924112329621, "grad_norm": 0.0, - "learning_rate": 6.119065078577968e-07, - "loss": 0.8368, + "learning_rate": 6.257732403570594e-07, + "loss": 0.788, "step": 31414 }, { - "epoch": 0.8914585698070374, + "epoch": 0.8902207486752246, "grad_norm": 0.0, - "learning_rate": 6.115899828044569e-07, - "loss": 0.7972, + "learning_rate": 6.254537115650871e-07, + "loss": 0.7577, "step": 31415 }, { - "epoch": 0.8914869466515324, + "epoch": 0.890249086117487, "grad_norm": 0.0, - "learning_rate": 6.112735370554368e-07, - "loss": 0.8328, + "learning_rate": 6.251342617383993e-07, + "loss": 0.8214, "step": 31416 }, { - "epoch": 0.8915153234960272, + "epoch": 0.8902774235597495, "grad_norm": 0.0, - "learning_rate": 6.109571706134143e-07, - "loss": 0.8131, + "learning_rate": 6.248148908796892e-07, + "loss": 0.9744, "step": 31417 }, { - "epoch": 0.8915437003405221, + "epoch": 0.890305761002012, "grad_norm": 0.0, - "learning_rate": 6.106408834810563e-07, - "loss": 0.8231, + "learning_rate": 6.244955989916434e-07, + "loss": 0.7392, "step": 31418 }, { - "epoch": 0.8915720771850171, + "epoch": 0.8903340984442745, "grad_norm": 0.0, - "learning_rate": 6.10324675661037e-07, - "loss": 0.7164, + "learning_rate": 6.241763860769535e-07, + "loss": 0.776, "step": 31419 }, { - "epoch": 0.8916004540295119, + "epoch": 0.8903624358865369, "grad_norm": 0.0, - "learning_rate": 6.10008547156029e-07, - "loss": 0.7806, + "learning_rate": 6.238572521383058e-07, + "loss": 0.885, "step": 31420 }, { - "epoch": 0.8916288308740068, + "epoch": 0.8903907733287993, "grad_norm": 0.0, - "learning_rate": 6.096924979686991e-07, - "loss": 0.8346, + "learning_rate": 6.235381971783904e-07, + "loss": 0.809, "step": 31421 }, { - "epoch": 0.8916572077185017, + "epoch": 0.8904191107710618, "grad_norm": 0.0, - "learning_rate": 6.093765281017195e-07, - "loss": 0.8419, + "learning_rate": 6.232192211998967e-07, + "loss": 0.7081, "step": 31422 }, { - "epoch": 0.8916855845629966, + "epoch": 0.8904474482133242, "grad_norm": 0.0, - "learning_rate": 6.09060637557759e-07, - "loss": 0.6774, + "learning_rate": 6.229003242055076e-07, + "loss": 0.7204, "step": 31423 }, { - "epoch": 0.8917139614074915, + "epoch": 0.8904757856555867, "grad_norm": 0.0, - "learning_rate": 6.087448263394846e-07, - "loss": 0.8633, + "learning_rate": 6.225815061979113e-07, + "loss": 0.7911, "step": 31424 }, { - "epoch": 0.8917423382519863, + "epoch": 0.8905041230978492, "grad_norm": 0.0, - "learning_rate": 6.084290944495652e-07, - "loss": 0.7245, + "learning_rate": 6.222627671797943e-07, + "loss": 0.7876, "step": 31425 }, { - "epoch": 0.8917707150964813, + "epoch": 0.8905324605401117, "grad_norm": 0.0, - "learning_rate": 6.081134418906675e-07, - "loss": 0.8115, + "learning_rate": 6.21944107153839e-07, + "loss": 0.7494, "step": 31426 }, { - "epoch": 0.8917990919409762, + "epoch": 0.8905607979823741, "grad_norm": 0.0, - "learning_rate": 6.077978686654573e-07, - "loss": 0.84, + "learning_rate": 6.216255261227311e-07, + "loss": 0.7662, "step": 31427 }, { - "epoch": 0.891827468785471, + "epoch": 0.8905891354246366, "grad_norm": 0.0, - "learning_rate": 6.074823747766012e-07, - "loss": 0.8461, + "learning_rate": 6.21307024089155e-07, + "loss": 0.78, "step": 31428 }, { - "epoch": 0.8918558456299659, + "epoch": 0.8906174728668991, "grad_norm": 0.0, - "learning_rate": 6.071669602267627e-07, - "loss": 0.9329, + "learning_rate": 6.209886010557908e-07, + "loss": 0.7891, "step": 31429 }, { - "epoch": 0.8918842224744609, + "epoch": 0.8906458103091615, "grad_norm": 0.0, - "learning_rate": 6.068516250186074e-07, - "loss": 0.8586, + "learning_rate": 6.206702570253242e-07, + "loss": 0.8456, "step": 31430 }, { - "epoch": 0.8919125993189557, + "epoch": 0.890674147751424, "grad_norm": 0.0, - "learning_rate": 6.065363691547999e-07, - "loss": 0.9041, + "learning_rate": 6.203519920004341e-07, + "loss": 0.8008, "step": 31431 }, { - "epoch": 0.8919409761634506, + "epoch": 0.8907024851936864, "grad_norm": 0.0, - "learning_rate": 6.062211926380012e-07, - "loss": 0.7368, + "learning_rate": 6.20033805983804e-07, + "loss": 0.8408, "step": 31432 }, { - "epoch": 0.8919693530079456, + "epoch": 0.8907308226359488, "grad_norm": 0.0, - "learning_rate": 6.05906095470874e-07, - "loss": 0.8035, + "learning_rate": 6.197156989781106e-07, + "loss": 0.7798, "step": 31433 }, { - "epoch": 0.8919977298524404, + "epoch": 0.8907591600782113, "grad_norm": 0.0, - "learning_rate": 6.055910776560813e-07, - "loss": 0.7894, + "learning_rate": 6.193976709860339e-07, + "loss": 0.786, "step": 31434 }, { - "epoch": 0.8920261066969353, + "epoch": 0.8907874975204738, "grad_norm": 0.0, - "learning_rate": 6.052761391962825e-07, - "loss": 0.7742, + "learning_rate": 6.190797220102573e-07, + "loss": 0.8263, "step": 31435 }, { - "epoch": 0.8920544835414302, + "epoch": 0.8908158349627363, "grad_norm": 0.0, - "learning_rate": 6.049612800941385e-07, - "loss": 0.8311, + "learning_rate": 6.187618520534533e-07, + "loss": 0.8292, "step": 31436 }, { - "epoch": 0.8920828603859251, + "epoch": 0.8908441724049987, "grad_norm": 0.0, - "learning_rate": 6.046465003523094e-07, - "loss": 0.7296, + "learning_rate": 6.184440611183018e-07, + "loss": 0.8345, "step": 31437 }, { - "epoch": 0.89211123723042, + "epoch": 0.8908725098472612, "grad_norm": 0.0, - "learning_rate": 6.043317999734533e-07, - "loss": 0.8382, + "learning_rate": 6.181263492074808e-07, + "loss": 0.8422, "step": 31438 }, { - "epoch": 0.8921396140749148, + "epoch": 0.8909008472895237, "grad_norm": 0.0, - "learning_rate": 6.040171789602278e-07, - "loss": 0.8133, + "learning_rate": 6.178087163236645e-07, + "loss": 0.7624, "step": 31439 }, { - "epoch": 0.8921679909194098, + "epoch": 0.8909291847317861, "grad_norm": 0.0, - "learning_rate": 6.037026373152933e-07, - "loss": 0.8379, + "learning_rate": 6.174911624695301e-07, + "loss": 0.7605, "step": 31440 }, { - "epoch": 0.8921963677639047, + "epoch": 0.8909575221740486, "grad_norm": 0.0, - "learning_rate": 6.03388175041304e-07, - "loss": 0.8354, + "learning_rate": 6.171736876477508e-07, + "loss": 0.6794, "step": 31441 }, { - "epoch": 0.8922247446083995, + "epoch": 0.890985859616311, "grad_norm": 0.0, - "learning_rate": 6.030737921409169e-07, - "loss": 0.7798, + "learning_rate": 6.168562918610021e-07, + "loss": 0.7526, "step": 31442 }, { - "epoch": 0.8922531214528945, + "epoch": 0.8910141970585735, "grad_norm": 0.0, - "learning_rate": 6.027594886167876e-07, - "loss": 0.8146, + "learning_rate": 6.165389751119577e-07, + "loss": 0.771, "step": 31443 }, { - "epoch": 0.8922814982973893, + "epoch": 0.8910425345008359, "grad_norm": 0.0, - "learning_rate": 6.024452644715717e-07, - "loss": 0.7202, + "learning_rate": 6.162217374032897e-07, + "loss": 0.7807, "step": 31444 }, { - "epoch": 0.8923098751418842, + "epoch": 0.8910708719430984, "grad_norm": 0.0, - "learning_rate": 6.021311197079228e-07, - "loss": 0.6989, + "learning_rate": 6.159045787376705e-07, + "loss": 0.8684, "step": 31445 }, { - "epoch": 0.8923382519863791, + "epoch": 0.8910992093853609, "grad_norm": 0.0, - "learning_rate": 6.018170543284973e-07, - "loss": 0.8075, + "learning_rate": 6.155874991177724e-07, + "loss": 0.8774, "step": 31446 }, { - "epoch": 0.892366628830874, + "epoch": 0.8911275468276233, "grad_norm": 0.0, - "learning_rate": 6.015030683359435e-07, - "loss": 0.7743, + "learning_rate": 6.152704985462654e-07, + "loss": 0.7606, "step": 31447 }, { - "epoch": 0.8923950056753689, + "epoch": 0.8911558842698858, "grad_norm": 0.0, - "learning_rate": 6.011891617329147e-07, - "loss": 0.8486, + "learning_rate": 6.149535770258208e-07, + "loss": 0.8475, "step": 31448 }, { - "epoch": 0.8924233825198638, + "epoch": 0.8911842217121483, "grad_norm": 0.0, - "learning_rate": 6.008753345220664e-07, - "loss": 0.8311, + "learning_rate": 6.146367345591053e-07, + "loss": 0.7676, "step": 31449 }, { - "epoch": 0.8924517593643587, + "epoch": 0.8912125591544108, "grad_norm": 0.0, - "learning_rate": 6.005615867060443e-07, - "loss": 0.7874, + "learning_rate": 6.143199711487901e-07, + "loss": 0.7599, "step": 31450 }, { - "epoch": 0.8924801362088536, + "epoch": 0.8912408965966732, "grad_norm": 0.0, - "learning_rate": 6.002479182875021e-07, - "loss": 0.8013, + "learning_rate": 6.140032867975443e-07, + "loss": 0.9092, "step": 31451 }, { - "epoch": 0.8925085130533484, + "epoch": 0.8912692340389357, "grad_norm": 0.0, - "learning_rate": 5.999343292690895e-07, - "loss": 0.6534, + "learning_rate": 6.136866815080333e-07, + "loss": 0.7814, "step": 31452 }, { - "epoch": 0.8925368898978434, + "epoch": 0.8912975714811981, "grad_norm": 0.0, - "learning_rate": 5.996208196534514e-07, - "loss": 0.8272, + "learning_rate": 6.133701552829252e-07, + "loss": 0.7861, "step": 31453 }, { - "epoch": 0.8925652667423383, + "epoch": 0.8913259089234605, "grad_norm": 0.0, - "learning_rate": 5.993073894432422e-07, - "loss": 0.7992, + "learning_rate": 6.130537081248844e-07, + "loss": 0.77, "step": 31454 }, { - "epoch": 0.8925936435868331, + "epoch": 0.891354246365723, "grad_norm": 0.0, - "learning_rate": 5.98994038641103e-07, - "loss": 0.8354, + "learning_rate": 6.127373400365788e-07, + "loss": 0.8139, "step": 31455 }, { - "epoch": 0.892622020431328, + "epoch": 0.8913825838079855, "grad_norm": 0.0, - "learning_rate": 5.986807672496841e-07, - "loss": 0.8789, + "learning_rate": 6.12421051020674e-07, + "loss": 0.8415, "step": 31456 }, { - "epoch": 0.892650397275823, + "epoch": 0.8914109212502479, "grad_norm": 0.0, - "learning_rate": 5.983675752716334e-07, - "loss": 0.8335, + "learning_rate": 6.121048410798314e-07, + "loss": 0.8165, "step": 31457 }, { - "epoch": 0.8926787741203178, + "epoch": 0.8914392586925104, "grad_norm": 0.0, - "learning_rate": 5.98054462709593e-07, - "loss": 0.8789, + "learning_rate": 6.117887102167164e-07, + "loss": 0.8887, "step": 31458 }, { - "epoch": 0.8927071509648127, + "epoch": 0.8914675961347729, "grad_norm": 0.0, - "learning_rate": 5.977414295662076e-07, - "loss": 0.859, + "learning_rate": 6.114726584339914e-07, + "loss": 0.8605, "step": 31459 }, { - "epoch": 0.8927355278093076, + "epoch": 0.8914959335770354, "grad_norm": 0.0, - "learning_rate": 5.974284758441251e-07, - "loss": 0.8472, + "learning_rate": 6.111566857343176e-07, + "loss": 0.8611, "step": 31460 }, { - "epoch": 0.8927639046538025, + "epoch": 0.8915242710192978, "grad_norm": 0.0, - "learning_rate": 5.971156015459867e-07, - "loss": 0.8396, + "learning_rate": 6.108407921203597e-07, + "loss": 0.8068, "step": 31461 }, { - "epoch": 0.8927922814982974, + "epoch": 0.8915526084615603, "grad_norm": 0.0, - "learning_rate": 5.968028066744347e-07, - "loss": 0.7252, + "learning_rate": 6.105249775947741e-07, + "loss": 0.9131, "step": 31462 }, { - "epoch": 0.8928206583427922, + "epoch": 0.8915809459038228, "grad_norm": 0.0, - "learning_rate": 5.964900912321137e-07, - "loss": 0.8841, + "learning_rate": 6.102092421602234e-07, + "loss": 0.8052, "step": 31463 }, { - "epoch": 0.8928490351872872, + "epoch": 0.8916092833460851, "grad_norm": 0.0, - "learning_rate": 5.961774552216626e-07, - "loss": 0.826, + "learning_rate": 6.098935858193688e-07, + "loss": 0.897, "step": 31464 }, { - "epoch": 0.8928774120317821, + "epoch": 0.8916376207883476, "grad_norm": 0.0, - "learning_rate": 5.958648986457227e-07, - "loss": 0.8789, + "learning_rate": 6.095780085748659e-07, + "loss": 0.8373, "step": 31465 }, { - "epoch": 0.8929057888762769, + "epoch": 0.8916659582306101, "grad_norm": 0.0, - "learning_rate": 5.955524215069364e-07, - "loss": 0.7832, + "learning_rate": 6.092625104293748e-07, + "loss": 0.7797, "step": 31466 }, { - "epoch": 0.8929341657207719, + "epoch": 0.8916942956728726, "grad_norm": 0.0, - "learning_rate": 5.952400238079403e-07, - "loss": 0.7401, + "learning_rate": 6.089470913855522e-07, + "loss": 0.7441, "step": 31467 }, { - "epoch": 0.8929625425652667, + "epoch": 0.891722633115135, "grad_norm": 0.0, - "learning_rate": 5.949277055513746e-07, - "loss": 0.8235, + "learning_rate": 6.08631751446056e-07, + "loss": 0.8708, "step": 31468 }, { - "epoch": 0.8929909194097616, + "epoch": 0.8917509705573975, "grad_norm": 0.0, - "learning_rate": 5.946154667398785e-07, - "loss": 0.7889, + "learning_rate": 6.083164906135431e-07, + "loss": 0.8432, "step": 31469 }, { - "epoch": 0.8930192962542566, + "epoch": 0.89177930799966, "grad_norm": 0.0, - "learning_rate": 5.943033073760873e-07, - "loss": 0.7599, + "learning_rate": 6.080013088906667e-07, + "loss": 0.8091, "step": 31470 }, { - "epoch": 0.8930476730987514, + "epoch": 0.8918076454419224, "grad_norm": 0.0, - "learning_rate": 5.939912274626381e-07, - "loss": 0.778, + "learning_rate": 6.076862062800825e-07, + "loss": 0.8127, "step": 31471 }, { - "epoch": 0.8930760499432463, + "epoch": 0.8918359828841849, "grad_norm": 0.0, - "learning_rate": 5.936792270021696e-07, - "loss": 0.7632, + "learning_rate": 6.073711827844464e-07, + "loss": 0.7853, "step": 31472 }, { - "epoch": 0.8931044267877412, + "epoch": 0.8918643203264474, "grad_norm": 0.0, - "learning_rate": 5.933673059973144e-07, - "loss": 0.7655, + "learning_rate": 6.070562384064094e-07, + "loss": 0.7854, "step": 31473 }, { - "epoch": 0.8931328036322361, + "epoch": 0.8918926577687099, "grad_norm": 0.0, - "learning_rate": 5.93055464450708e-07, - "loss": 0.9046, + "learning_rate": 6.06741373148626e-07, + "loss": 0.8996, "step": 31474 }, { - "epoch": 0.893161180476731, + "epoch": 0.8919209952109722, "grad_norm": 0.0, - "learning_rate": 5.927437023649851e-07, - "loss": 0.732, + "learning_rate": 6.064265870137498e-07, + "loss": 0.7608, "step": 31475 }, { - "epoch": 0.8931895573212258, + "epoch": 0.8919493326532347, "grad_norm": 0.0, - "learning_rate": 5.924320197427791e-07, - "loss": 0.7824, + "learning_rate": 6.061118800044285e-07, + "loss": 0.877, "step": 31476 }, { - "epoch": 0.8932179341657208, + "epoch": 0.8919776700954972, "grad_norm": 0.0, - "learning_rate": 5.921204165867244e-07, - "loss": 0.955, + "learning_rate": 6.05797252123318e-07, + "loss": 0.7809, "step": 31477 }, { - "epoch": 0.8932463110102157, + "epoch": 0.8920060075377596, "grad_norm": 0.0, - "learning_rate": 5.918088928994492e-07, - "loss": 0.7804, + "learning_rate": 6.054827033730625e-07, + "loss": 0.8469, "step": 31478 }, { - "epoch": 0.8932746878547105, + "epoch": 0.8920343449800221, "grad_norm": 0.0, - "learning_rate": 5.914974486835878e-07, - "loss": 0.9193, + "learning_rate": 6.051682337563158e-07, + "loss": 0.869, "step": 31479 }, { - "epoch": 0.8933030646992054, + "epoch": 0.8920626824222846, "grad_norm": 0.0, - "learning_rate": 5.911860839417727e-07, - "loss": 0.7713, + "learning_rate": 6.048538432757256e-07, + "loss": 0.7518, "step": 31480 }, { - "epoch": 0.8933314415437004, + "epoch": 0.892091019864547, "grad_norm": 0.0, - "learning_rate": 5.908747986766295e-07, - "loss": 0.754, + "learning_rate": 6.045395319339397e-07, + "loss": 0.7707, "step": 31481 }, { - "epoch": 0.8933598183881952, + "epoch": 0.8921193573068095, "grad_norm": 0.0, - "learning_rate": 5.905635928907894e-07, - "loss": 0.8356, + "learning_rate": 6.042252997336073e-07, + "loss": 0.7256, "step": 31482 }, { - "epoch": 0.8933881952326901, + "epoch": 0.892147694749072, "grad_norm": 0.0, - "learning_rate": 5.902524665868836e-07, - "loss": 0.776, + "learning_rate": 6.03911146677375e-07, + "loss": 0.9565, "step": 31483 }, { - "epoch": 0.8934165720771851, + "epoch": 0.8921760321913345, "grad_norm": 0.0, - "learning_rate": 5.899414197675357e-07, - "loss": 0.7542, + "learning_rate": 6.035970727678864e-07, + "loss": 0.8289, "step": 31484 }, { - "epoch": 0.8934449489216799, + "epoch": 0.8922043696335968, "grad_norm": 0.0, - "learning_rate": 5.896304524353769e-07, - "loss": 0.8282, + "learning_rate": 6.032830780077914e-07, + "loss": 0.8371, "step": 31485 }, { - "epoch": 0.8934733257661748, + "epoch": 0.8922327070758593, "grad_norm": 0.0, - "learning_rate": 5.893195645930339e-07, - "loss": 0.8144, + "learning_rate": 6.029691623997302e-07, + "loss": 0.7243, "step": 31486 }, { - "epoch": 0.8935017026106697, + "epoch": 0.8922610445181218, "grad_norm": 0.0, - "learning_rate": 5.890087562431301e-07, - "loss": 0.7604, + "learning_rate": 6.026553259463497e-07, + "loss": 0.8271, "step": 31487 }, { - "epoch": 0.8935300794551646, + "epoch": 0.8922893819603842, "grad_norm": 0.0, - "learning_rate": 5.886980273882925e-07, - "loss": 0.8391, + "learning_rate": 6.023415686502942e-07, + "loss": 0.7517, "step": 31488 }, { - "epoch": 0.8935584562996595, + "epoch": 0.8923177194026467, "grad_norm": 0.0, - "learning_rate": 5.883873780311455e-07, - "loss": 0.7377, + "learning_rate": 6.02027890514204e-07, + "loss": 0.8134, "step": 31489 }, { - "epoch": 0.8935868331441543, + "epoch": 0.8923460568449092, "grad_norm": 0.0, - "learning_rate": 5.880768081743127e-07, - "loss": 0.7544, + "learning_rate": 6.017142915407237e-07, + "loss": 0.8398, "step": 31490 }, { - "epoch": 0.8936152099886493, + "epoch": 0.8923743942871717, "grad_norm": 0.0, - "learning_rate": 5.877663178204207e-07, - "loss": 0.7455, + "learning_rate": 6.014007717324933e-07, + "loss": 0.7667, "step": 31491 }, { - "epoch": 0.8936435868331442, + "epoch": 0.8924027317294341, "grad_norm": 0.0, - "learning_rate": 5.874559069720875e-07, - "loss": 0.8268, + "learning_rate": 6.010873310921538e-07, + "loss": 0.6759, "step": 31492 }, { - "epoch": 0.893671963677639, + "epoch": 0.8924310691716966, "grad_norm": 0.0, - "learning_rate": 5.871455756319377e-07, - "loss": 0.8728, + "learning_rate": 6.007739696223458e-07, + "loss": 0.7094, "step": 31493 }, { - "epoch": 0.893700340522134, + "epoch": 0.8924594066139591, "grad_norm": 0.0, - "learning_rate": 5.868353238025948e-07, - "loss": 0.6942, + "learning_rate": 6.004606873257101e-07, + "loss": 0.9069, "step": 31494 }, { - "epoch": 0.8937287173666288, + "epoch": 0.8924877440562214, "grad_norm": 0.0, - "learning_rate": 5.865251514866743e-07, - "loss": 0.8319, + "learning_rate": 6.001474842048826e-07, + "loss": 0.8758, "step": 31495 }, { - "epoch": 0.8937570942111237, + "epoch": 0.8925160814984839, "grad_norm": 0.0, - "learning_rate": 5.862150586867998e-07, - "loss": 0.7099, + "learning_rate": 5.998343602625067e-07, + "loss": 0.9381, "step": 31496 }, { - "epoch": 0.8937854710556186, + "epoch": 0.8925444189407464, "grad_norm": 0.0, - "learning_rate": 5.859050454055904e-07, - "loss": 0.9278, + "learning_rate": 5.995213155012136e-07, + "loss": 0.811, "step": 31497 }, { - "epoch": 0.8938138479001135, + "epoch": 0.8925727563830089, "grad_norm": 0.0, - "learning_rate": 5.855951116456638e-07, - "loss": 0.8438, + "learning_rate": 5.992083499236456e-07, + "loss": 0.9016, "step": 31498 }, { - "epoch": 0.8938422247446084, + "epoch": 0.8926010938252713, "grad_norm": 0.0, - "learning_rate": 5.85285257409638e-07, - "loss": 0.7474, + "learning_rate": 5.988954635324351e-07, + "loss": 0.7618, "step": 31499 }, { - "epoch": 0.8938706015891033, + "epoch": 0.8926294312675338, "grad_norm": 0.0, - "learning_rate": 5.849754827001319e-07, - "loss": 0.8231, + "learning_rate": 5.985826563302188e-07, + "loss": 0.6848, "step": 31500 }, { - "epoch": 0.8938989784335982, + "epoch": 0.8926577687097963, "grad_norm": 0.0, - "learning_rate": 5.846657875197604e-07, - "loss": 0.8966, + "learning_rate": 5.982699283196336e-07, + "loss": 0.8147, "step": 31501 }, { - "epoch": 0.8939273552780931, + "epoch": 0.8926861061520587, "grad_norm": 0.0, - "learning_rate": 5.843561718711399e-07, - "loss": 0.8755, + "learning_rate": 5.979572795033106e-07, + "loss": 0.9476, "step": 31502 }, { - "epoch": 0.8939557321225879, + "epoch": 0.8927144435943212, "grad_norm": 0.0, - "learning_rate": 5.840466357568875e-07, - "loss": 0.8266, + "learning_rate": 5.976447098838845e-07, + "loss": 0.8338, "step": 31503 }, { - "epoch": 0.8939841089670829, + "epoch": 0.8927427810365837, "grad_norm": 0.0, - "learning_rate": 5.837371791796153e-07, - "loss": 0.6825, + "learning_rate": 5.973322194639897e-07, + "loss": 0.7913, "step": 31504 }, { - "epoch": 0.8940124858115778, + "epoch": 0.892771118478846, "grad_norm": 0.0, - "learning_rate": 5.834278021419382e-07, - "loss": 0.7987, + "learning_rate": 5.970198082462564e-07, + "loss": 0.8021, "step": 31505 }, { - "epoch": 0.8940408626560726, + "epoch": 0.8927994559211085, "grad_norm": 0.0, - "learning_rate": 5.831185046464704e-07, - "loss": 0.746, + "learning_rate": 5.96707476233317e-07, + "loss": 0.7716, "step": 31506 }, { - "epoch": 0.8940692395005675, + "epoch": 0.892827793363371, "grad_norm": 0.0, - "learning_rate": 5.828092866958235e-07, - "loss": 0.7728, + "learning_rate": 5.963952234278025e-07, + "loss": 0.8014, "step": 31507 }, { - "epoch": 0.8940976163450625, + "epoch": 0.8928561308056335, "grad_norm": 0.0, - "learning_rate": 5.825001482926107e-07, - "loss": 0.8542, + "learning_rate": 5.960830498323422e-07, + "loss": 0.8517, "step": 31508 }, { - "epoch": 0.8941259931895573, + "epoch": 0.8928844682478959, "grad_norm": 0.0, - "learning_rate": 5.821910894394411e-07, - "loss": 0.754, + "learning_rate": 5.957709554495683e-07, + "loss": 0.7911, "step": 31509 }, { - "epoch": 0.8941543700340522, + "epoch": 0.8929128056901584, "grad_norm": 0.0, - "learning_rate": 5.818821101389272e-07, - "loss": 0.7634, + "learning_rate": 5.954589402821065e-07, + "loss": 0.7899, "step": 31510 }, { - "epoch": 0.8941827468785472, + "epoch": 0.8929411431324209, "grad_norm": 0.0, - "learning_rate": 5.815732103936799e-07, - "loss": 0.8348, + "learning_rate": 5.951470043325869e-07, + "loss": 0.7325, "step": 31511 }, { - "epoch": 0.894211123723042, + "epoch": 0.8929694805746833, "grad_norm": 0.0, - "learning_rate": 5.812643902063065e-07, - "loss": 0.8706, + "learning_rate": 5.948351476036363e-07, + "loss": 0.7422, "step": 31512 }, { - "epoch": 0.8942395005675369, + "epoch": 0.8929978180169458, "grad_norm": 0.0, - "learning_rate": 5.809556495794145e-07, - "loss": 0.8256, + "learning_rate": 5.945233700978814e-07, + "loss": 0.7636, "step": 31513 }, { - "epoch": 0.8942678774120317, + "epoch": 0.8930261554592083, "grad_norm": 0.0, - "learning_rate": 5.806469885156163e-07, - "loss": 0.795, + "learning_rate": 5.942116718179502e-07, + "loss": 0.7445, "step": 31514 }, { - "epoch": 0.8942962542565267, + "epoch": 0.8930544929014708, "grad_norm": 0.0, - "learning_rate": 5.803384070175144e-07, - "loss": 0.7662, + "learning_rate": 5.939000527664651e-07, + "loss": 0.8888, "step": 31515 }, { - "epoch": 0.8943246311010216, + "epoch": 0.8930828303437331, "grad_norm": 0.0, - "learning_rate": 5.800299050877178e-07, - "loss": 0.8062, + "learning_rate": 5.935885129460528e-07, + "loss": 0.8093, "step": 31516 }, { - "epoch": 0.8943530079455164, + "epoch": 0.8931111677859956, "grad_norm": 0.0, - "learning_rate": 5.797214827288333e-07, - "loss": 0.7994, + "learning_rate": 5.93277052359339e-07, + "loss": 0.7696, "step": 31517 }, { - "epoch": 0.8943813847900114, + "epoch": 0.8931395052282581, "grad_norm": 0.0, - "learning_rate": 5.794131399434632e-07, - "loss": 0.7703, + "learning_rate": 5.929656710089438e-07, + "loss": 0.9081, "step": 31518 }, { - "epoch": 0.8944097616345063, + "epoch": 0.8931678426705205, "grad_norm": 0.0, - "learning_rate": 5.791048767342133e-07, - "loss": 0.6856, + "learning_rate": 5.926543688974928e-07, + "loss": 0.811, "step": 31519 }, { - "epoch": 0.8944381384790011, + "epoch": 0.893196180112783, "grad_norm": 0.0, - "learning_rate": 5.787966931036893e-07, - "loss": 0.8187, + "learning_rate": 5.923431460276063e-07, + "loss": 0.756, "step": 31520 }, { - "epoch": 0.8944665153234961, + "epoch": 0.8932245175550455, "grad_norm": 0.0, - "learning_rate": 5.784885890544922e-07, - "loss": 0.8024, + "learning_rate": 5.920320024019078e-07, + "loss": 0.8583, "step": 31521 }, { - "epoch": 0.8944948921679909, + "epoch": 0.8932528549973079, "grad_norm": 0.0, - "learning_rate": 5.78180564589228e-07, - "loss": 0.6985, + "learning_rate": 5.917209380230182e-07, + "loss": 0.8372, "step": 31522 }, { - "epoch": 0.8945232690124858, + "epoch": 0.8932811924395704, "grad_norm": 0.0, - "learning_rate": 5.778726197104934e-07, - "loss": 0.8939, + "learning_rate": 5.914099528935558e-07, + "loss": 0.9012, "step": 31523 }, { - "epoch": 0.8945516458569807, + "epoch": 0.8933095298818329, "grad_norm": 0.0, - "learning_rate": 5.775647544208929e-07, - "loss": 0.7863, + "learning_rate": 5.910990470161416e-07, + "loss": 0.8038, "step": 31524 }, { - "epoch": 0.8945800227014756, + "epoch": 0.8933378673240954, "grad_norm": 0.0, - "learning_rate": 5.772569687230278e-07, - "loss": 0.7663, + "learning_rate": 5.907882203933946e-07, + "loss": 0.8284, "step": 31525 }, { - "epoch": 0.8946083995459705, + "epoch": 0.8933662047663578, "grad_norm": 0.0, - "learning_rate": 5.76949262619495e-07, - "loss": 0.8243, + "learning_rate": 5.904774730279317e-07, + "loss": 0.8026, "step": 31526 }, { - "epoch": 0.8946367763904653, + "epoch": 0.8933945422086202, "grad_norm": 0.0, - "learning_rate": 5.766416361128957e-07, - "loss": 0.7805, + "learning_rate": 5.901668049223719e-07, + "loss": 0.8059, "step": 31527 }, { - "epoch": 0.8946651532349603, + "epoch": 0.8934228796508827, "grad_norm": 0.0, - "learning_rate": 5.7633408920583e-07, - "loss": 0.7956, + "learning_rate": 5.898562160793308e-07, + "loss": 0.8354, "step": 31528 }, { - "epoch": 0.8946935300794552, + "epoch": 0.8934512170931451, "grad_norm": 0.0, - "learning_rate": 5.760266219008914e-07, - "loss": 0.812, + "learning_rate": 5.895457065014243e-07, + "loss": 0.8367, "step": 31529 }, { - "epoch": 0.89472190692395, + "epoch": 0.8934795545354076, "grad_norm": 0.0, - "learning_rate": 5.757192342006812e-07, - "loss": 0.7884, + "learning_rate": 5.892352761912712e-07, + "loss": 0.8109, "step": 31530 }, { - "epoch": 0.8947502837684449, + "epoch": 0.8935078919776701, "grad_norm": 0.0, - "learning_rate": 5.754119261077939e-07, - "loss": 0.9029, + "learning_rate": 5.889249251514817e-07, + "loss": 0.8918, "step": 31531 }, { - "epoch": 0.8947786606129399, + "epoch": 0.8935362294199326, "grad_norm": 0.0, - "learning_rate": 5.751046976248254e-07, - "loss": 0.775, + "learning_rate": 5.886146533846726e-07, + "loss": 0.838, "step": 31532 }, { - "epoch": 0.8948070374574347, + "epoch": 0.893564566862195, "grad_norm": 0.0, - "learning_rate": 5.747975487543711e-07, - "loss": 0.8818, + "learning_rate": 5.883044608934563e-07, + "loss": 0.845, "step": 31533 }, { - "epoch": 0.8948354143019296, + "epoch": 0.8935929043044575, "grad_norm": 0.0, - "learning_rate": 5.74490479499027e-07, - "loss": 0.7809, + "learning_rate": 5.879943476804472e-07, + "loss": 0.7822, "step": 31534 }, { - "epoch": 0.8948637911464246, + "epoch": 0.89362124174672, "grad_norm": 0.0, - "learning_rate": 5.741834898613819e-07, - "loss": 0.8957, + "learning_rate": 5.876843137482591e-07, + "loss": 0.8156, "step": 31535 }, { - "epoch": 0.8948921679909194, + "epoch": 0.8936495791889824, "grad_norm": 0.0, - "learning_rate": 5.73876579844036e-07, - "loss": 0.7834, + "learning_rate": 5.873743590994985e-07, + "loss": 0.8125, "step": 31536 }, { - "epoch": 0.8949205448354143, + "epoch": 0.8936779166312449, "grad_norm": 0.0, - "learning_rate": 5.735697494495773e-07, - "loss": 0.7903, + "learning_rate": 5.87064483736779e-07, + "loss": 0.7557, "step": 31537 }, { - "epoch": 0.8949489216799092, + "epoch": 0.8937062540735073, "grad_norm": 0.0, - "learning_rate": 5.732629986805982e-07, - "loss": 0.8334, + "learning_rate": 5.867546876627129e-07, + "loss": 0.7889, "step": 31538 }, { - "epoch": 0.8949772985244041, + "epoch": 0.8937345915157698, "grad_norm": 0.0, - "learning_rate": 5.72956327539692e-07, - "loss": 0.8523, + "learning_rate": 5.864449708799059e-07, + "loss": 0.846, "step": 31539 }, { - "epoch": 0.895005675368899, + "epoch": 0.8937629289580322, "grad_norm": 0.0, - "learning_rate": 5.726497360294459e-07, - "loss": 0.8324, + "learning_rate": 5.861353333909692e-07, + "loss": 0.8727, "step": 31540 }, { - "epoch": 0.8950340522133938, + "epoch": 0.8937912664002947, "grad_norm": 0.0, - "learning_rate": 5.723432241524507e-07, - "loss": 0.8095, + "learning_rate": 5.858257751985097e-07, + "loss": 0.7493, "step": 31541 }, { - "epoch": 0.8950624290578888, + "epoch": 0.8938196038425572, "grad_norm": 0.0, - "learning_rate": 5.720367919112979e-07, - "loss": 0.7378, + "learning_rate": 5.855162963051353e-07, + "loss": 0.806, "step": 31542 }, { - "epoch": 0.8950908059023837, + "epoch": 0.8938479412848196, "grad_norm": 0.0, - "learning_rate": 5.717304393085732e-07, - "loss": 0.7745, + "learning_rate": 5.85206896713455e-07, + "loss": 0.7835, "step": 31543 }, { - "epoch": 0.8951191827468785, + "epoch": 0.8938762787270821, "grad_norm": 0.0, - "learning_rate": 5.714241663468645e-07, - "loss": 0.6891, + "learning_rate": 5.848975764260711e-07, + "loss": 0.8323, "step": 31544 }, { - "epoch": 0.8951475595913735, + "epoch": 0.8939046161693446, "grad_norm": 0.0, - "learning_rate": 5.71117973028762e-07, - "loss": 0.8222, + "learning_rate": 5.845883354455917e-07, + "loss": 0.8809, "step": 31545 }, { - "epoch": 0.8951759364358683, + "epoch": 0.893932953611607, "grad_norm": 0.0, - "learning_rate": 5.708118593568479e-07, - "loss": 0.7387, + "learning_rate": 5.842791737746212e-07, + "loss": 0.8323, "step": 31546 }, { - "epoch": 0.8952043132803632, + "epoch": 0.8939612910538695, "grad_norm": 0.0, - "learning_rate": 5.705058253337104e-07, - "loss": 0.8065, + "learning_rate": 5.839700914157631e-07, + "loss": 0.8487, "step": 31547 }, { - "epoch": 0.8952326901248581, + "epoch": 0.893989628496132, "grad_norm": 0.0, - "learning_rate": 5.70199870961935e-07, - "loss": 0.7835, + "learning_rate": 5.836610883716232e-07, + "loss": 0.8299, "step": 31548 }, { - "epoch": 0.895261066969353, + "epoch": 0.8940179659383944, "grad_norm": 0.0, - "learning_rate": 5.698939962441042e-07, - "loss": 0.7852, + "learning_rate": 5.833521646448003e-07, + "loss": 0.9036, "step": 31549 }, { - "epoch": 0.8952894438138479, + "epoch": 0.8940463033806568, "grad_norm": 0.0, - "learning_rate": 5.695882011828024e-07, - "loss": 0.6659, + "learning_rate": 5.830433202379004e-07, + "loss": 0.7799, "step": 31550 }, { - "epoch": 0.8953178206583428, + "epoch": 0.8940746408229193, "grad_norm": 0.0, - "learning_rate": 5.692824857806134e-07, - "loss": 0.8405, + "learning_rate": 5.827345551535235e-07, + "loss": 0.8072, "step": 31551 }, { - "epoch": 0.8953461975028377, + "epoch": 0.8941029782651818, "grad_norm": 0.0, - "learning_rate": 5.689768500401183e-07, - "loss": 0.8312, + "learning_rate": 5.824258693942698e-07, + "loss": 0.7978, "step": 31552 }, { - "epoch": 0.8953745743473326, + "epoch": 0.8941313157074442, "grad_norm": 0.0, - "learning_rate": 5.686712939639005e-07, - "loss": 0.838, + "learning_rate": 5.821172629627403e-07, + "loss": 0.8147, "step": 31553 }, { - "epoch": 0.8954029511918274, + "epoch": 0.8941596531497067, "grad_norm": 0.0, - "learning_rate": 5.683658175545415e-07, - "loss": 0.6995, + "learning_rate": 5.818087358615354e-07, + "loss": 0.6852, "step": 31554 }, { - "epoch": 0.8954313280363224, + "epoch": 0.8941879905919692, "grad_norm": 0.0, - "learning_rate": 5.68060420814619e-07, - "loss": 0.7883, + "learning_rate": 5.815002880932519e-07, + "loss": 0.7568, "step": 31555 }, { - "epoch": 0.8954597048808173, + "epoch": 0.8942163280342317, "grad_norm": 0.0, - "learning_rate": 5.677551037467133e-07, - "loss": 0.7626, + "learning_rate": 5.811919196604898e-07, + "loss": 0.7265, "step": 31556 }, { - "epoch": 0.8954880817253121, + "epoch": 0.8942446654764941, "grad_norm": 0.0, - "learning_rate": 5.674498663534067e-07, - "loss": 0.7356, + "learning_rate": 5.808836305658449e-07, + "loss": 0.7524, "step": 31557 }, { - "epoch": 0.895516458569807, + "epoch": 0.8942730029187566, "grad_norm": 0.0, - "learning_rate": 5.671447086372728e-07, - "loss": 0.7855, + "learning_rate": 5.805754208119141e-07, + "loss": 0.8004, "step": 31558 }, { - "epoch": 0.895544835414302, + "epoch": 0.894301340361019, "grad_norm": 0.0, - "learning_rate": 5.668396306008928e-07, - "loss": 0.9352, + "learning_rate": 5.802672904012951e-07, + "loss": 0.8272, "step": 31559 }, { - "epoch": 0.8955732122587968, + "epoch": 0.8943296778032814, "grad_norm": 0.0, - "learning_rate": 5.665346322468435e-07, - "loss": 0.8099, + "learning_rate": 5.799592393365816e-07, + "loss": 0.7426, "step": 31560 }, { - "epoch": 0.8956015891032917, + "epoch": 0.8943580152455439, "grad_norm": 0.0, - "learning_rate": 5.662297135776984e-07, - "loss": 0.7784, + "learning_rate": 5.796512676203703e-07, + "loss": 0.7448, "step": 31561 }, { - "epoch": 0.8956299659477867, + "epoch": 0.8943863526878064, "grad_norm": 0.0, - "learning_rate": 5.659248745960366e-07, - "loss": 0.7205, + "learning_rate": 5.793433752552557e-07, + "loss": 0.7081, "step": 31562 }, { - "epoch": 0.8956583427922815, + "epoch": 0.8944146901300689, "grad_norm": 0.0, - "learning_rate": 5.656201153044294e-07, - "loss": 0.8396, + "learning_rate": 5.790355622438293e-07, + "loss": 0.8076, "step": 31563 }, { - "epoch": 0.8956867196367764, + "epoch": 0.8944430275723313, "grad_norm": 0.0, - "learning_rate": 5.653154357054547e-07, - "loss": 0.8832, + "learning_rate": 5.787278285886855e-07, + "loss": 0.8452, "step": 31564 }, { - "epoch": 0.8957150964812712, + "epoch": 0.8944713650145938, "grad_norm": 0.0, - "learning_rate": 5.650108358016848e-07, - "loss": 0.7209, + "learning_rate": 5.784201742924145e-07, + "loss": 0.8459, "step": 31565 }, { - "epoch": 0.8957434733257662, + "epoch": 0.8944997024568563, "grad_norm": 0.0, - "learning_rate": 5.6470631559569e-07, - "loss": 0.7281, + "learning_rate": 5.781125993576086e-07, + "loss": 0.948, "step": 31566 }, { - "epoch": 0.8957718501702611, + "epoch": 0.8945280398991187, "grad_norm": 0.0, - "learning_rate": 5.644018750900459e-07, - "loss": 0.734, + "learning_rate": 5.778051037868615e-07, + "loss": 0.8139, "step": 31567 }, { - "epoch": 0.8958002270147559, + "epoch": 0.8945563773413812, "grad_norm": 0.0, - "learning_rate": 5.640975142873262e-07, - "loss": 0.8519, + "learning_rate": 5.774976875827587e-07, + "loss": 0.8278, "step": 31568 }, { - "epoch": 0.8958286038592509, + "epoch": 0.8945847147836437, "grad_norm": 0.0, - "learning_rate": 5.637932331900963e-07, - "loss": 0.845, + "learning_rate": 5.771903507478915e-07, + "loss": 0.8113, "step": 31569 }, { - "epoch": 0.8958569807037458, + "epoch": 0.894613052225906, "grad_norm": 0.0, - "learning_rate": 5.634890318009301e-07, - "loss": 0.7561, + "learning_rate": 5.768830932848513e-07, + "loss": 0.747, "step": 31570 }, { - "epoch": 0.8958853575482406, + "epoch": 0.8946413896681685, "grad_norm": 0.0, - "learning_rate": 5.631849101223963e-07, - "loss": 0.926, + "learning_rate": 5.765759151962225e-07, + "loss": 0.832, "step": 31571 }, { - "epoch": 0.8959137343927355, + "epoch": 0.894669727110431, "grad_norm": 0.0, - "learning_rate": 5.628808681570641e-07, - "loss": 0.648, + "learning_rate": 5.762688164845931e-07, + "loss": 0.8179, "step": 31572 }, { - "epoch": 0.8959421112372304, + "epoch": 0.8946980645526935, "grad_norm": 0.0, - "learning_rate": 5.625769059075004e-07, - "loss": 0.7742, + "learning_rate": 5.75961797152551e-07, + "loss": 0.7387, "step": 31573 }, { - "epoch": 0.8959704880817253, + "epoch": 0.8947264019949559, "grad_norm": 0.0, - "learning_rate": 5.622730233762752e-07, - "loss": 0.697, + "learning_rate": 5.756548572026832e-07, + "loss": 0.8141, "step": 31574 }, { - "epoch": 0.8959988649262202, + "epoch": 0.8947547394372184, "grad_norm": 0.0, - "learning_rate": 5.619692205659532e-07, - "loss": 0.8878, + "learning_rate": 5.753479966375752e-07, + "loss": 0.8778, "step": 31575 }, { - "epoch": 0.8960272417707151, + "epoch": 0.8947830768794809, "grad_norm": 0.0, - "learning_rate": 5.616654974791003e-07, - "loss": 0.7861, + "learning_rate": 5.750412154598095e-07, + "loss": 0.7376, "step": 31576 }, { - "epoch": 0.89605561861521, + "epoch": 0.8948114143217433, "grad_norm": 0.0, - "learning_rate": 5.613618541182852e-07, - "loss": 0.6677, + "learning_rate": 5.747345136719729e-07, + "loss": 0.7967, "step": 31577 }, { - "epoch": 0.8960839954597049, + "epoch": 0.8948397517640058, "grad_norm": 0.0, - "learning_rate": 5.610582904860706e-07, - "loss": 0.8866, + "learning_rate": 5.744278912766454e-07, + "loss": 0.7607, "step": 31578 }, { - "epoch": 0.8961123723041998, + "epoch": 0.8948680892062683, "grad_norm": 0.0, - "learning_rate": 5.607548065850199e-07, - "loss": 0.7847, + "learning_rate": 5.741213482764118e-07, + "loss": 0.7933, "step": 31579 }, { - "epoch": 0.8961407491486947, + "epoch": 0.8948964266485308, "grad_norm": 0.0, - "learning_rate": 5.604514024176988e-07, - "loss": 0.8867, + "learning_rate": 5.738148846738568e-07, + "loss": 0.8162, "step": 31580 }, { - "epoch": 0.8961691259931895, + "epoch": 0.8949247640907931, "grad_norm": 0.0, - "learning_rate": 5.601480779866664e-07, - "loss": 0.7378, + "learning_rate": 5.73508500471558e-07, + "loss": 0.7468, "step": 31581 }, { - "epoch": 0.8961975028376844, + "epoch": 0.8949531015330556, "grad_norm": 0.0, - "learning_rate": 5.598448332944906e-07, - "loss": 0.7594, + "learning_rate": 5.732021956720968e-07, + "loss": 0.8817, "step": 31582 }, { - "epoch": 0.8962258796821794, + "epoch": 0.8949814389753181, "grad_norm": 0.0, - "learning_rate": 5.595416683437283e-07, - "loss": 0.7738, + "learning_rate": 5.728959702780534e-07, + "loss": 0.888, "step": 31583 }, { - "epoch": 0.8962542565266742, + "epoch": 0.8950097764175805, "grad_norm": 0.0, - "learning_rate": 5.592385831369418e-07, - "loss": 0.8179, + "learning_rate": 5.725898242920092e-07, + "loss": 0.7507, "step": 31584 }, { - "epoch": 0.8962826333711691, + "epoch": 0.895038113859843, "grad_norm": 0.0, - "learning_rate": 5.589355776766924e-07, - "loss": 0.8816, + "learning_rate": 5.722837577165419e-07, + "loss": 0.8343, "step": 31585 }, { - "epoch": 0.8963110102156641, + "epoch": 0.8950664513021055, "grad_norm": 0.0, - "learning_rate": 5.586326519655383e-07, - "loss": 0.8268, + "learning_rate": 5.719777705542296e-07, + "loss": 0.8501, "step": 31586 }, { - "epoch": 0.8963393870601589, + "epoch": 0.895094788744368, "grad_norm": 0.0, - "learning_rate": 5.583298060060382e-07, - "loss": 0.7962, + "learning_rate": 5.716718628076479e-07, + "loss": 0.7241, "step": 31587 }, { - "epoch": 0.8963677639046538, + "epoch": 0.8951231261866304, "grad_norm": 0.0, - "learning_rate": 5.580270398007514e-07, - "loss": 0.862, + "learning_rate": 5.713660344793781e-07, + "loss": 0.733, "step": 31588 }, { - "epoch": 0.8963961407491486, + "epoch": 0.8951514636288929, "grad_norm": 0.0, - "learning_rate": 5.577243533522336e-07, - "loss": 0.6994, + "learning_rate": 5.710602855719904e-07, + "loss": 0.7966, "step": 31589 }, { - "epoch": 0.8964245175936436, + "epoch": 0.8951798010711554, "grad_norm": 0.0, - "learning_rate": 5.574217466630427e-07, - "loss": 0.7801, + "learning_rate": 5.707546160880651e-07, + "loss": 0.8014, "step": 31590 }, { - "epoch": 0.8964528944381385, + "epoch": 0.8952081385134177, "grad_norm": 0.0, - "learning_rate": 5.571192197357367e-07, - "loss": 0.8305, + "learning_rate": 5.704490260301754e-07, + "loss": 0.7613, "step": 31591 }, { - "epoch": 0.8964812712826333, + "epoch": 0.8952364759556802, "grad_norm": 0.0, - "learning_rate": 5.568167725728679e-07, - "loss": 0.7643, + "learning_rate": 5.701435154008939e-07, + "loss": 0.788, "step": 31592 }, { - "epoch": 0.8965096481271283, + "epoch": 0.8952648133979427, "grad_norm": 0.0, - "learning_rate": 5.565144051769911e-07, - "loss": 0.7723, + "learning_rate": 5.698380842027962e-07, + "loss": 0.9317, "step": 31593 }, { - "epoch": 0.8965380249716232, + "epoch": 0.8952931508402051, "grad_norm": 0.0, - "learning_rate": 5.56212117550664e-07, - "loss": 0.7605, + "learning_rate": 5.695327324384536e-07, + "loss": 0.7643, "step": 31594 }, { - "epoch": 0.896566401816118, + "epoch": 0.8953214882824676, "grad_norm": 0.0, - "learning_rate": 5.55909909696436e-07, - "loss": 0.8982, + "learning_rate": 5.692274601104387e-07, + "loss": 0.8464, "step": 31595 }, { - "epoch": 0.896594778660613, + "epoch": 0.8953498257247301, "grad_norm": 0.0, - "learning_rate": 5.556077816168615e-07, - "loss": 0.7858, + "learning_rate": 5.689222672213224e-07, + "loss": 0.8206, "step": 31596 }, { - "epoch": 0.8966231555051078, + "epoch": 0.8953781631669926, "grad_norm": 0.0, - "learning_rate": 5.553057333144917e-07, - "loss": 0.8505, + "learning_rate": 5.686171537736762e-07, + "loss": 0.7732, "step": 31597 }, { - "epoch": 0.8966515323496027, + "epoch": 0.895406500609255, "grad_norm": 0.0, - "learning_rate": 5.550037647918805e-07, - "loss": 0.83, + "learning_rate": 5.683121197700714e-07, + "loss": 0.779, "step": 31598 }, { - "epoch": 0.8966799091940976, + "epoch": 0.8954348380515175, "grad_norm": 0.0, - "learning_rate": 5.547018760515776e-07, - "loss": 0.7691, + "learning_rate": 5.680071652130736e-07, + "loss": 0.8395, "step": 31599 }, { - "epoch": 0.8967082860385925, + "epoch": 0.89546317549378, "grad_norm": 0.0, - "learning_rate": 5.544000670961313e-07, - "loss": 0.7297, + "learning_rate": 5.677022901052553e-07, + "loss": 0.7096, "step": 31600 }, { - "epoch": 0.8967366628830874, + "epoch": 0.8954915129360423, "grad_norm": 0.0, - "learning_rate": 5.540983379280929e-07, - "loss": 0.8657, + "learning_rate": 5.673974944491845e-07, + "loss": 0.7855, "step": 31601 }, { - "epoch": 0.8967650397275823, + "epoch": 0.8955198503783048, "grad_norm": 0.0, - "learning_rate": 5.537966885500113e-07, - "loss": 0.8685, + "learning_rate": 5.670927782474256e-07, + "loss": 0.7406, "step": 31602 }, { - "epoch": 0.8967934165720772, + "epoch": 0.8955481878205673, "grad_norm": 0.0, - "learning_rate": 5.534951189644322e-07, - "loss": 0.7205, + "learning_rate": 5.667881415025466e-07, + "loss": 0.7692, "step": 31603 }, { - "epoch": 0.8968217934165721, + "epoch": 0.8955765252628298, "grad_norm": 0.0, - "learning_rate": 5.531936291739037e-07, - "loss": 0.7689, + "learning_rate": 5.664835842171157e-07, + "loss": 0.8419, "step": 31604 }, { - "epoch": 0.896850170261067, + "epoch": 0.8956048627050922, "grad_norm": 0.0, - "learning_rate": 5.52892219180976e-07, - "loss": 0.8004, + "learning_rate": 5.66179106393695e-07, + "loss": 0.7679, "step": 31605 }, { - "epoch": 0.8968785471055618, + "epoch": 0.8956332001473547, "grad_norm": 0.0, - "learning_rate": 5.525908889881903e-07, - "loss": 0.9629, + "learning_rate": 5.658747080348525e-07, + "loss": 0.8662, "step": 31606 }, { - "epoch": 0.8969069239500568, + "epoch": 0.8956615375896172, "grad_norm": 0.0, - "learning_rate": 5.522896385980947e-07, - "loss": 0.8109, + "learning_rate": 5.655703891431496e-07, + "loss": 0.7844, "step": 31607 }, { - "epoch": 0.8969353007945516, + "epoch": 0.8956898750318796, "grad_norm": 0.0, - "learning_rate": 5.519884680132337e-07, - "loss": 0.7878, + "learning_rate": 5.652661497211509e-07, + "loss": 0.7797, "step": 31608 }, { - "epoch": 0.8969636776390465, + "epoch": 0.8957182124741421, "grad_norm": 0.0, - "learning_rate": 5.516873772361497e-07, - "loss": 0.733, + "learning_rate": 5.649619897714187e-07, + "loss": 0.8909, "step": 31609 }, { - "epoch": 0.8969920544835415, + "epoch": 0.8957465499164046, "grad_norm": 0.0, - "learning_rate": 5.513863662693875e-07, - "loss": 0.7658, + "learning_rate": 5.646579092965143e-07, + "loss": 0.8684, "step": 31610 }, { - "epoch": 0.8970204313280363, + "epoch": 0.8957748873586671, "grad_norm": 0.0, - "learning_rate": 5.510854351154915e-07, - "loss": 0.81, + "learning_rate": 5.643539082990013e-07, + "loss": 0.8203, "step": 31611 }, { - "epoch": 0.8970488081725312, + "epoch": 0.8958032248009294, "grad_norm": 0.0, - "learning_rate": 5.507845837769976e-07, - "loss": 0.7788, + "learning_rate": 5.640499867814397e-07, + "loss": 0.7673, "step": 31612 }, { - "epoch": 0.8970771850170262, + "epoch": 0.8958315622431919, "grad_norm": 0.0, - "learning_rate": 5.504838122564537e-07, - "loss": 0.7768, + "learning_rate": 5.637461447463876e-07, + "loss": 0.8085, "step": 31613 }, { - "epoch": 0.897105561861521, + "epoch": 0.8958598996854544, "grad_norm": 0.0, - "learning_rate": 5.501831205563978e-07, - "loss": 0.811, + "learning_rate": 5.634423821964074e-07, + "loss": 0.642, "step": 31614 }, { - "epoch": 0.8971339387060159, + "epoch": 0.8958882371277168, "grad_norm": 0.0, - "learning_rate": 5.498825086793691e-07, - "loss": 0.8572, + "learning_rate": 5.631386991340559e-07, + "loss": 0.8372, "step": 31615 }, { - "epoch": 0.8971623155505107, + "epoch": 0.8959165745699793, "grad_norm": 0.0, - "learning_rate": 5.495819766279086e-07, - "loss": 0.8206, + "learning_rate": 5.6283509556189e-07, + "loss": 0.7887, "step": 31616 }, { - "epoch": 0.8971906923950057, + "epoch": 0.8959449120122418, "grad_norm": 0.0, - "learning_rate": 5.492815244045535e-07, - "loss": 0.8046, + "learning_rate": 5.625315714824708e-07, + "loss": 0.8642, "step": 31617 }, { - "epoch": 0.8972190692395006, + "epoch": 0.8959732494545042, "grad_norm": 0.0, - "learning_rate": 5.489811520118415e-07, - "loss": 0.7567, + "learning_rate": 5.622281268983509e-07, + "loss": 0.924, "step": 31618 }, { - "epoch": 0.8972474460839954, + "epoch": 0.8960015868967667, "grad_norm": 0.0, - "learning_rate": 5.486808594523118e-07, - "loss": 0.7883, + "learning_rate": 5.619247618120871e-07, + "loss": 0.7805, "step": 31619 }, { - "epoch": 0.8972758229284904, + "epoch": 0.8960299243390292, "grad_norm": 0.0, - "learning_rate": 5.483806467284992e-07, - "loss": 0.7143, + "learning_rate": 5.616214762262384e-07, + "loss": 0.7609, "step": 31620 }, { - "epoch": 0.8973041997729853, + "epoch": 0.8960582617812917, "grad_norm": 0.0, - "learning_rate": 5.480805138429391e-07, - "loss": 0.7888, + "learning_rate": 5.613182701433551e-07, + "loss": 0.742, "step": 31621 }, { - "epoch": 0.8973325766174801, + "epoch": 0.896086599223554, "grad_norm": 0.0, - "learning_rate": 5.477804607981707e-07, - "loss": 0.7882, + "learning_rate": 5.610151435659939e-07, + "loss": 0.7765, "step": 31622 }, { - "epoch": 0.897360953461975, + "epoch": 0.8961149366658165, "grad_norm": 0.0, - "learning_rate": 5.474804875967233e-07, - "loss": 0.8201, + "learning_rate": 5.607120964967061e-07, + "loss": 0.8269, "step": 31623 }, { - "epoch": 0.89738933030647, + "epoch": 0.896143274108079, "grad_norm": 0.0, - "learning_rate": 5.471805942411323e-07, - "loss": 0.7135, + "learning_rate": 5.604091289380453e-07, + "loss": 0.7374, "step": 31624 }, { - "epoch": 0.8974177071509648, + "epoch": 0.8961716115503414, "grad_norm": 0.0, - "learning_rate": 5.468807807339349e-07, - "loss": 0.7929, + "learning_rate": 5.601062408925662e-07, + "loss": 0.8635, "step": 31625 }, { - "epoch": 0.8974460839954597, + "epoch": 0.8961999489926039, "grad_norm": 0.0, - "learning_rate": 5.465810470776578e-07, - "loss": 0.8775, + "learning_rate": 5.598034323628154e-07, + "loss": 0.8199, "step": 31626 }, { - "epoch": 0.8974744608399546, + "epoch": 0.8962282864348664, "grad_norm": 0.0, - "learning_rate": 5.462813932748356e-07, - "loss": 0.8806, + "learning_rate": 5.595007033513478e-07, + "loss": 0.7488, "step": 31627 }, { - "epoch": 0.8975028376844495, + "epoch": 0.8962566238771289, "grad_norm": 0.0, - "learning_rate": 5.459818193279998e-07, - "loss": 0.8019, + "learning_rate": 5.5919805386071e-07, + "loss": 0.7946, "step": 31628 }, { - "epoch": 0.8975312145289444, + "epoch": 0.8962849613193913, "grad_norm": 0.0, - "learning_rate": 5.456823252396815e-07, - "loss": 0.8277, + "learning_rate": 5.588954838934523e-07, + "loss": 0.9534, "step": 31629 }, { - "epoch": 0.8975595913734393, + "epoch": 0.8963132987616538, "grad_norm": 0.0, - "learning_rate": 5.453829110124098e-07, - "loss": 0.8415, + "learning_rate": 5.58592993452125e-07, + "loss": 0.8905, "step": 31630 }, { - "epoch": 0.8975879682179342, + "epoch": 0.8963416362039163, "grad_norm": 0.0, - "learning_rate": 5.450835766487128e-07, - "loss": 0.8308, + "learning_rate": 5.582905825392737e-07, + "loss": 0.8035, "step": 31631 }, { - "epoch": 0.897616345062429, + "epoch": 0.8963699736461787, "grad_norm": 0.0, - "learning_rate": 5.447843221511195e-07, - "loss": 0.8545, + "learning_rate": 5.579882511574475e-07, + "loss": 0.7713, "step": 31632 }, { - "epoch": 0.8976447219069239, + "epoch": 0.8963983110884411, "grad_norm": 0.0, - "learning_rate": 5.444851475221591e-07, - "loss": 0.8553, + "learning_rate": 5.576859993091932e-07, + "loss": 0.8414, "step": 31633 }, { - "epoch": 0.8976730987514189, + "epoch": 0.8964266485307036, "grad_norm": 0.0, - "learning_rate": 5.441860527643572e-07, - "loss": 0.8068, + "learning_rate": 5.573838269970555e-07, + "loss": 0.7636, "step": 31634 }, { - "epoch": 0.8977014755959137, + "epoch": 0.8964549859729661, "grad_norm": 0.0, - "learning_rate": 5.438870378802397e-07, - "loss": 0.6872, + "learning_rate": 5.570817342235791e-07, + "loss": 0.816, "step": 31635 }, { - "epoch": 0.8977298524404086, + "epoch": 0.8964833234152285, "grad_norm": 0.0, - "learning_rate": 5.435881028723355e-07, - "loss": 0.87, + "learning_rate": 5.567797209913106e-07, + "loss": 0.8476, "step": 31636 }, { - "epoch": 0.8977582292849036, + "epoch": 0.896511660857491, "grad_norm": 0.0, - "learning_rate": 5.432892477431672e-07, - "loss": 0.8286, + "learning_rate": 5.564777873027927e-07, + "loss": 0.8768, "step": 31637 }, { - "epoch": 0.8977866061293984, + "epoch": 0.8965399982997535, "grad_norm": 0.0, - "learning_rate": 5.429904724952583e-07, - "loss": 0.7277, + "learning_rate": 5.56175933160571e-07, + "loss": 0.803, "step": 31638 }, { - "epoch": 0.8978149829738933, + "epoch": 0.8965683357420159, "grad_norm": 0.0, - "learning_rate": 5.426917771311368e-07, - "loss": 0.7288, + "learning_rate": 5.558741585671845e-07, + "loss": 0.7687, "step": 31639 }, { - "epoch": 0.8978433598183881, + "epoch": 0.8965966731842784, "grad_norm": 0.0, - "learning_rate": 5.423931616533207e-07, - "loss": 0.78, + "learning_rate": 5.555724635251769e-07, + "loss": 0.8719, "step": 31640 }, { - "epoch": 0.8978717366628831, + "epoch": 0.8966250106265409, "grad_norm": 0.0, - "learning_rate": 5.420946260643345e-07, - "loss": 0.8838, + "learning_rate": 5.552708480370916e-07, + "loss": 0.6805, "step": 31641 }, { - "epoch": 0.897900113507378, + "epoch": 0.8966533480688033, "grad_norm": 0.0, - "learning_rate": 5.417961703667019e-07, - "loss": 0.8189, + "learning_rate": 5.549693121054656e-07, + "loss": 0.8032, "step": 31642 }, { - "epoch": 0.8979284903518728, + "epoch": 0.8966816855110658, "grad_norm": 0.0, - "learning_rate": 5.414977945629396e-07, - "loss": 0.7925, + "learning_rate": 5.546678557328411e-07, + "loss": 0.7405, "step": 31643 }, { - "epoch": 0.8979568671963678, + "epoch": 0.8967100229533282, "grad_norm": 0.0, - "learning_rate": 5.411994986555725e-07, - "loss": 0.8051, + "learning_rate": 5.543664789217562e-07, + "loss": 0.8011, "step": 31644 }, { - "epoch": 0.8979852440408627, + "epoch": 0.8967383603955907, "grad_norm": 0.0, - "learning_rate": 5.409012826471183e-07, - "loss": 0.7658, + "learning_rate": 5.540651816747489e-07, + "loss": 0.7953, "step": 31645 }, { - "epoch": 0.8980136208853575, + "epoch": 0.8967666978378531, "grad_norm": 0.0, - "learning_rate": 5.406031465400951e-07, - "loss": 0.8632, + "learning_rate": 5.537639639943604e-07, + "loss": 0.7415, "step": 31646 }, { - "epoch": 0.8980419977298525, + "epoch": 0.8967950352801156, "grad_norm": 0.0, - "learning_rate": 5.403050903370244e-07, - "loss": 0.8527, + "learning_rate": 5.534628258831243e-07, + "loss": 0.6919, "step": 31647 }, { - "epoch": 0.8980703745743474, + "epoch": 0.8968233727223781, "grad_norm": 0.0, - "learning_rate": 5.400071140404206e-07, - "loss": 0.756, + "learning_rate": 5.531617673435785e-07, + "loss": 0.7636, "step": 31648 }, { - "epoch": 0.8980987514188422, + "epoch": 0.8968517101646405, "grad_norm": 0.0, - "learning_rate": 5.397092176528018e-07, - "loss": 0.7878, + "learning_rate": 5.528607883782599e-07, + "loss": 0.8044, "step": 31649 }, { - "epoch": 0.8981271282633371, + "epoch": 0.896880047606903, "grad_norm": 0.0, - "learning_rate": 5.394114011766849e-07, - "loss": 0.7769, + "learning_rate": 5.525598889897022e-07, + "loss": 0.7574, "step": 31650 }, { - "epoch": 0.898155505107832, + "epoch": 0.8969083850491655, "grad_norm": 0.0, - "learning_rate": 5.391136646145856e-07, - "loss": 0.7559, + "learning_rate": 5.522590691804419e-07, + "loss": 0.7488, "step": 31651 }, { - "epoch": 0.8981838819523269, + "epoch": 0.896936722491428, "grad_norm": 0.0, - "learning_rate": 5.388160079690174e-07, - "loss": 0.8505, + "learning_rate": 5.519583289530106e-07, + "loss": 0.9018, "step": 31652 }, { - "epoch": 0.8982122587968218, + "epoch": 0.8969650599336904, "grad_norm": 0.0, - "learning_rate": 5.385184312424973e-07, - "loss": 0.8669, + "learning_rate": 5.51657668309944e-07, + "loss": 0.7356, "step": 31653 }, { - "epoch": 0.8982406356413167, + "epoch": 0.8969933973759529, "grad_norm": 0.0, - "learning_rate": 5.382209344375355e-07, - "loss": 0.8947, + "learning_rate": 5.513570872537732e-07, + "loss": 0.7782, "step": 31654 }, { - "epoch": 0.8982690124858116, + "epoch": 0.8970217348182153, "grad_norm": 0.0, - "learning_rate": 5.379235175566466e-07, - "loss": 0.8483, + "learning_rate": 5.510565857870298e-07, + "loss": 0.8828, "step": 31655 }, { - "epoch": 0.8982973893303065, + "epoch": 0.8970500722604777, "grad_norm": 0.0, - "learning_rate": 5.376261806023453e-07, - "loss": 0.7451, + "learning_rate": 5.507561639122461e-07, + "loss": 0.7116, "step": 31656 }, { - "epoch": 0.8983257661748013, + "epoch": 0.8970784097027402, "grad_norm": 0.0, - "learning_rate": 5.373289235771395e-07, - "loss": 0.7599, + "learning_rate": 5.504558216319522e-07, + "loss": 0.9333, "step": 31657 }, { - "epoch": 0.8983541430192963, + "epoch": 0.8971067471450027, "grad_norm": 0.0, - "learning_rate": 5.370317464835406e-07, - "loss": 0.8244, + "learning_rate": 5.501555589486762e-07, + "loss": 0.9272, "step": 31658 }, { - "epoch": 0.8983825198637911, + "epoch": 0.8971350845872652, "grad_norm": 0.0, - "learning_rate": 5.367346493240622e-07, - "loss": 0.8499, + "learning_rate": 5.498553758649516e-07, + "loss": 0.8759, "step": 31659 }, { - "epoch": 0.898410896708286, + "epoch": 0.8971634220295276, "grad_norm": 0.0, - "learning_rate": 5.364376321012099e-07, - "loss": 0.7989, + "learning_rate": 5.495552723833031e-07, + "loss": 0.656, "step": 31660 }, { - "epoch": 0.898439273552781, + "epoch": 0.8971917594717901, "grad_norm": 0.0, - "learning_rate": 5.361406948174952e-07, - "loss": 0.8628, + "learning_rate": 5.492552485062585e-07, + "loss": 0.7589, "step": 31661 }, { - "epoch": 0.8984676503972758, + "epoch": 0.8972200969140526, "grad_norm": 0.0, - "learning_rate": 5.35843837475426e-07, - "loss": 0.7917, + "learning_rate": 5.489553042363483e-07, + "loss": 0.7892, "step": 31662 }, { - "epoch": 0.8984960272417707, + "epoch": 0.897248434356315, "grad_norm": 0.0, - "learning_rate": 5.355470600775081e-07, - "loss": 0.721, + "learning_rate": 5.486554395760957e-07, + "loss": 0.7789, "step": 31663 }, { - "epoch": 0.8985244040862657, + "epoch": 0.8972767717985775, "grad_norm": 0.0, - "learning_rate": 5.352503626262507e-07, - "loss": 0.7863, + "learning_rate": 5.483556545280299e-07, + "loss": 0.7397, "step": 31664 }, { - "epoch": 0.8985527809307605, + "epoch": 0.89730510924084, "grad_norm": 0.0, - "learning_rate": 5.349537451241593e-07, - "loss": 0.8284, + "learning_rate": 5.480559490946724e-07, + "loss": 0.7799, "step": 31665 }, { - "epoch": 0.8985811577752554, + "epoch": 0.8973334466831023, "grad_norm": 0.0, - "learning_rate": 5.346572075737377e-07, - "loss": 0.8521, + "learning_rate": 5.477563232785499e-07, + "loss": 0.7158, "step": 31666 }, { - "epoch": 0.8986095346197502, + "epoch": 0.8973617841253648, "grad_norm": 0.0, - "learning_rate": 5.343607499774928e-07, - "loss": 0.7651, + "learning_rate": 5.47456777082187e-07, + "loss": 0.8356, "step": 31667 }, { - "epoch": 0.8986379114642452, + "epoch": 0.8973901215676273, "grad_norm": 0.0, - "learning_rate": 5.34064372337929e-07, - "loss": 0.795, + "learning_rate": 5.471573105081052e-07, + "loss": 0.8555, "step": 31668 }, { - "epoch": 0.8986662883087401, + "epoch": 0.8974184590098898, "grad_norm": 0.0, - "learning_rate": 5.33768074657548e-07, - "loss": 0.7277, + "learning_rate": 5.468579235588268e-07, + "loss": 0.6931, "step": 31669 }, { - "epoch": 0.8986946651532349, + "epoch": 0.8974467964521522, "grad_norm": 0.0, - "learning_rate": 5.334718569388542e-07, - "loss": 0.8946, + "learning_rate": 5.465586162368764e-07, + "loss": 0.7878, "step": 31670 }, { - "epoch": 0.8987230419977299, + "epoch": 0.8974751338944147, "grad_norm": 0.0, - "learning_rate": 5.331757191843489e-07, - "loss": 0.8176, + "learning_rate": 5.46259388544772e-07, + "loss": 0.852, "step": 31671 }, { - "epoch": 0.8987514188422248, + "epoch": 0.8975034713366772, "grad_norm": 0.0, - "learning_rate": 5.328796613965326e-07, - "loss": 0.8546, + "learning_rate": 5.459602404850362e-07, + "loss": 0.825, "step": 31672 }, { - "epoch": 0.8987797956867196, + "epoch": 0.8975318087789396, "grad_norm": 0.0, - "learning_rate": 5.325836835779085e-07, - "loss": 0.8499, + "learning_rate": 5.456611720601868e-07, + "loss": 0.9182, "step": 31673 }, { - "epoch": 0.8988081725312145, + "epoch": 0.8975601462212021, "grad_norm": 0.0, - "learning_rate": 5.32287785730975e-07, - "loss": 0.7959, + "learning_rate": 5.45362183272743e-07, + "loss": 0.7797, "step": 31674 }, { - "epoch": 0.8988365493757094, + "epoch": 0.8975884836634646, "grad_norm": 0.0, - "learning_rate": 5.319919678582319e-07, - "loss": 0.8127, + "learning_rate": 5.450632741252259e-07, + "loss": 0.7679, "step": 31675 }, { - "epoch": 0.8988649262202043, + "epoch": 0.897616821105727, "grad_norm": 0.0, - "learning_rate": 5.316962299621808e-07, - "loss": 0.8184, + "learning_rate": 5.447644446201516e-07, + "loss": 0.7933, "step": 31676 }, { - "epoch": 0.8988933030646992, + "epoch": 0.8976451585479894, "grad_norm": 0.0, - "learning_rate": 5.314005720453153e-07, - "loss": 0.8424, + "learning_rate": 5.444656947600368e-07, + "loss": 0.7319, "step": 31677 }, { - "epoch": 0.8989216799091941, + "epoch": 0.8976734959902519, "grad_norm": 0.0, - "learning_rate": 5.311049941101343e-07, - "loss": 0.8182, + "learning_rate": 5.441670245474007e-07, + "loss": 0.7894, "step": 31678 }, { - "epoch": 0.898950056753689, + "epoch": 0.8977018334325144, "grad_norm": 0.0, - "learning_rate": 5.308094961591371e-07, - "loss": 0.85, + "learning_rate": 5.438684339847556e-07, + "loss": 0.8432, "step": 31679 }, { - "epoch": 0.8989784335981839, + "epoch": 0.8977301708747768, "grad_norm": 0.0, - "learning_rate": 5.30514078194816e-07, - "loss": 0.7293, + "learning_rate": 5.435699230746194e-07, + "loss": 0.9022, "step": 31680 }, { - "epoch": 0.8990068104426788, + "epoch": 0.8977585083170393, "grad_norm": 0.0, - "learning_rate": 5.302187402196679e-07, - "loss": 0.8455, + "learning_rate": 5.432714918195037e-07, + "loss": 0.8651, "step": 31681 }, { - "epoch": 0.8990351872871737, + "epoch": 0.8977868457593018, "grad_norm": 0.0, - "learning_rate": 5.299234822361898e-07, - "loss": 0.7245, + "learning_rate": 5.42973140221924e-07, + "loss": 0.8564, "step": 31682 }, { - "epoch": 0.8990635641316685, + "epoch": 0.8978151832015643, "grad_norm": 0.0, - "learning_rate": 5.296283042468719e-07, - "loss": 0.8146, + "learning_rate": 5.426748682843952e-07, + "loss": 0.8642, "step": 31683 }, { - "epoch": 0.8990919409761634, + "epoch": 0.8978435206438267, "grad_norm": 0.0, - "learning_rate": 5.293332062542101e-07, - "loss": 0.8729, + "learning_rate": 5.423766760094262e-07, + "loss": 0.7845, "step": 31684 }, { - "epoch": 0.8991203178206584, + "epoch": 0.8978718580860892, "grad_norm": 0.0, - "learning_rate": 5.290381882606977e-07, - "loss": 0.8775, + "learning_rate": 5.420785633995318e-07, + "loss": 0.7891, "step": 31685 }, { - "epoch": 0.8991486946651532, + "epoch": 0.8979001955283517, "grad_norm": 0.0, - "learning_rate": 5.287432502688239e-07, - "loss": 0.7677, + "learning_rate": 5.417805304572199e-07, + "loss": 0.8749, "step": 31686 }, { - "epoch": 0.8991770715096481, + "epoch": 0.897928532970614, "grad_norm": 0.0, - "learning_rate": 5.284483922810812e-07, - "loss": 0.7298, + "learning_rate": 5.414825771850041e-07, + "loss": 0.8202, "step": 31687 }, { - "epoch": 0.8992054483541431, + "epoch": 0.8979568704128765, "grad_norm": 0.0, - "learning_rate": 5.281536142999622e-07, - "loss": 0.6595, + "learning_rate": 5.411847035853912e-07, + "loss": 0.8238, "step": 31688 }, { - "epoch": 0.8992338251986379, + "epoch": 0.897985207855139, "grad_norm": 0.0, - "learning_rate": 5.278589163279535e-07, - "loss": 0.8006, + "learning_rate": 5.408869096608926e-07, + "loss": 0.8523, "step": 31689 }, { - "epoch": 0.8992622020431328, + "epoch": 0.8980135452974014, "grad_norm": 0.0, - "learning_rate": 5.275642983675477e-07, - "loss": 0.8403, + "learning_rate": 5.405891954140175e-07, + "loss": 0.8349, "step": 31690 }, { - "epoch": 0.8992905788876276, + "epoch": 0.8980418827396639, "grad_norm": 0.0, - "learning_rate": 5.272697604212318e-07, - "loss": 0.7625, + "learning_rate": 5.402915608472726e-07, + "loss": 0.9136, "step": 31691 }, { - "epoch": 0.8993189557321226, + "epoch": 0.8980702201819264, "grad_norm": 0.0, - "learning_rate": 5.269753024914936e-07, - "loss": 0.8119, + "learning_rate": 5.399940059631625e-07, + "loss": 0.828, "step": 31692 }, { - "epoch": 0.8993473325766175, + "epoch": 0.8980985576241889, "grad_norm": 0.0, - "learning_rate": 5.266809245808225e-07, - "loss": 0.7323, + "learning_rate": 5.396965307641977e-07, + "loss": 0.7942, "step": 31693 }, { - "epoch": 0.8993757094211123, + "epoch": 0.8981268950664513, "grad_norm": 0.0, - "learning_rate": 5.263866266917028e-07, - "loss": 0.7921, + "learning_rate": 5.393991352528816e-07, + "loss": 0.7524, "step": 31694 }, { - "epoch": 0.8994040862656073, + "epoch": 0.8981552325087138, "grad_norm": 0.0, - "learning_rate": 5.260924088266195e-07, - "loss": 0.731, + "learning_rate": 5.391018194317188e-07, + "loss": 0.798, "step": 31695 }, { - "epoch": 0.8994324631101022, + "epoch": 0.8981835699509763, "grad_norm": 0.0, - "learning_rate": 5.257982709880616e-07, - "loss": 0.7625, + "learning_rate": 5.388045833032152e-07, + "loss": 0.7985, "step": 31696 }, { - "epoch": 0.899460839954597, + "epoch": 0.8982119073932386, "grad_norm": 0.0, - "learning_rate": 5.255042131785104e-07, - "loss": 0.7995, + "learning_rate": 5.385074268698742e-07, + "loss": 0.8032, "step": 31697 }, { - "epoch": 0.899489216799092, + "epoch": 0.8982402448355011, "grad_norm": 0.0, - "learning_rate": 5.252102354004518e-07, - "loss": 0.8544, + "learning_rate": 5.382103501341973e-07, + "loss": 0.6627, "step": 31698 }, { - "epoch": 0.8995175936435869, + "epoch": 0.8982685822777636, "grad_norm": 0.0, - "learning_rate": 5.249163376563682e-07, - "loss": 0.7557, + "learning_rate": 5.379133530986902e-07, + "loss": 0.827, "step": 31699 }, { - "epoch": 0.8995459704880817, + "epoch": 0.8982969197200261, "grad_norm": 0.0, - "learning_rate": 5.24622519948742e-07, - "loss": 0.7192, + "learning_rate": 5.376164357658508e-07, + "loss": 0.8096, "step": 31700 }, { - "epoch": 0.8995743473325766, + "epoch": 0.8983252571622885, "grad_norm": 0.0, - "learning_rate": 5.243287822800547e-07, - "loss": 0.8204, + "learning_rate": 5.373195981381817e-07, + "loss": 0.8359, "step": 31701 }, { - "epoch": 0.8996027241770715, + "epoch": 0.898353594604551, "grad_norm": 0.0, - "learning_rate": 5.240351246527897e-07, - "loss": 0.737, + "learning_rate": 5.370228402181843e-07, + "loss": 0.7946, "step": 31702 }, { - "epoch": 0.8996311010215664, + "epoch": 0.8983819320468135, "grad_norm": 0.0, - "learning_rate": 5.23741547069424e-07, - "loss": 0.784, + "learning_rate": 5.367261620083575e-07, + "loss": 0.8535, "step": 31703 }, { - "epoch": 0.8996594778660613, + "epoch": 0.8984102694890759, "grad_norm": 0.0, - "learning_rate": 5.234480495324401e-07, - "loss": 0.9426, + "learning_rate": 5.364295635112016e-07, + "loss": 0.747, "step": 31704 }, { - "epoch": 0.8996878547105562, + "epoch": 0.8984386069313384, "grad_norm": 0.0, - "learning_rate": 5.231546320443159e-07, - "loss": 0.9029, + "learning_rate": 5.361330447292123e-07, + "loss": 0.8538, "step": 31705 }, { - "epoch": 0.8997162315550511, + "epoch": 0.8984669443736009, "grad_norm": 0.0, - "learning_rate": 5.228612946075317e-07, - "loss": 0.8367, + "learning_rate": 5.358366056648879e-07, + "loss": 0.9291, "step": 31706 }, { - "epoch": 0.899744608399546, + "epoch": 0.8984952818158632, "grad_norm": 0.0, - "learning_rate": 5.225680372245645e-07, - "loss": 0.7422, + "learning_rate": 5.355402463207282e-07, + "loss": 0.818, "step": 31707 }, { - "epoch": 0.8997729852440408, + "epoch": 0.8985236192581257, "grad_norm": 0.0, - "learning_rate": 5.2227485989789e-07, - "loss": 0.7195, + "learning_rate": 5.352439666992259e-07, + "loss": 0.7641, "step": 31708 }, { - "epoch": 0.8998013620885358, + "epoch": 0.8985519567003882, "grad_norm": 0.0, - "learning_rate": 5.219817626299861e-07, - "loss": 0.8507, + "learning_rate": 5.349477668028802e-07, + "loss": 0.7941, "step": 31709 }, { - "epoch": 0.8998297389330306, + "epoch": 0.8985802941426507, "grad_norm": 0.0, - "learning_rate": 5.2168874542333e-07, - "loss": 0.9164, + "learning_rate": 5.346516466341834e-07, + "loss": 0.7796, "step": 31710 }, { - "epoch": 0.8998581157775255, + "epoch": 0.8986086315849131, "grad_norm": 0.0, - "learning_rate": 5.213958082803927e-07, - "loss": 0.8523, + "learning_rate": 5.34355606195629e-07, + "loss": 0.7511, "step": 31711 }, { - "epoch": 0.8998864926220205, + "epoch": 0.8986369690271756, "grad_norm": 0.0, - "learning_rate": 5.211029512036514e-07, - "loss": 0.8299, + "learning_rate": 5.340596454897151e-07, + "loss": 0.7751, "step": 31712 }, { - "epoch": 0.8999148694665153, + "epoch": 0.8986653064694381, "grad_norm": 0.0, - "learning_rate": 5.208101741955818e-07, - "loss": 0.757, + "learning_rate": 5.337637645189298e-07, + "loss": 0.8626, "step": 31713 }, { - "epoch": 0.8999432463110102, + "epoch": 0.8986936439117005, "grad_norm": 0.0, - "learning_rate": 5.20517477258653e-07, - "loss": 0.7586, + "learning_rate": 5.334679632857675e-07, + "loss": 0.9059, "step": 31714 }, { - "epoch": 0.8999716231555052, + "epoch": 0.898721981353963, "grad_norm": 0.0, - "learning_rate": 5.202248603953386e-07, - "loss": 0.7575, + "learning_rate": 5.331722417927199e-07, + "loss": 0.7479, "step": 31715 }, { - "epoch": 0.9, + "epoch": 0.8987503187962255, "grad_norm": 0.0, - "learning_rate": 5.199323236081133e-07, - "loss": 0.6871, + "learning_rate": 5.32876600042278e-07, + "loss": 0.8725, "step": 31716 }, { - "epoch": 0.9000283768444949, + "epoch": 0.898778656238488, "grad_norm": 0.0, - "learning_rate": 5.196398668994428e-07, - "loss": 0.8199, + "learning_rate": 5.325810380369334e-07, + "loss": 0.8284, "step": 31717 }, { - "epoch": 0.9000567536889897, + "epoch": 0.8988069936807503, "grad_norm": 0.0, - "learning_rate": 5.193474902718021e-07, - "loss": 0.9147, + "learning_rate": 5.322855557791729e-07, + "loss": 0.8645, "step": 31718 }, { - "epoch": 0.9000851305334847, + "epoch": 0.8988353311230128, "grad_norm": 0.0, - "learning_rate": 5.1905519372766e-07, - "loss": 0.7761, + "learning_rate": 5.319901532714877e-07, + "loss": 0.8257, "step": 31719 }, { - "epoch": 0.9001135073779796, + "epoch": 0.8988636685652753, "grad_norm": 0.0, - "learning_rate": 5.187629772694824e-07, - "loss": 0.7882, + "learning_rate": 5.31694830516366e-07, + "loss": 0.7077, "step": 31720 }, { - "epoch": 0.9001418842224744, + "epoch": 0.8988920060075377, "grad_norm": 0.0, - "learning_rate": 5.184708408997429e-07, - "loss": 0.7933, + "learning_rate": 5.313995875162925e-07, + "loss": 0.7507, "step": 31721 }, { - "epoch": 0.9001702610669694, + "epoch": 0.8989203434498002, "grad_norm": 0.0, - "learning_rate": 5.181787846209052e-07, - "loss": 0.7854, + "learning_rate": 5.311044242737595e-07, + "loss": 0.8809, "step": 31722 }, { - "epoch": 0.9001986379114643, + "epoch": 0.8989486808920627, "grad_norm": 0.0, - "learning_rate": 5.178868084354371e-07, - "loss": 0.7454, + "learning_rate": 5.308093407912473e-07, + "loss": 0.772, "step": 31723 }, { - "epoch": 0.9002270147559591, + "epoch": 0.8989770183343252, "grad_norm": 0.0, - "learning_rate": 5.17594912345808e-07, - "loss": 0.7481, + "learning_rate": 5.305143370712451e-07, + "loss": 0.8176, "step": 31724 }, { - "epoch": 0.900255391600454, + "epoch": 0.8990053557765876, "grad_norm": 0.0, - "learning_rate": 5.173030963544778e-07, - "loss": 0.8378, + "learning_rate": 5.302194131162386e-07, + "loss": 0.8238, "step": 31725 }, { - "epoch": 0.900283768444949, + "epoch": 0.8990336932188501, "grad_norm": 0.0, - "learning_rate": 5.170113604639159e-07, - "loss": 0.7417, + "learning_rate": 5.299245689287081e-07, + "loss": 0.8101, "step": 31726 }, { - "epoch": 0.9003121452894438, + "epoch": 0.8990620306611126, "grad_norm": 0.0, - "learning_rate": 5.16719704676586e-07, - "loss": 0.7986, + "learning_rate": 5.296298045111403e-07, + "loss": 0.7568, "step": 31727 }, { - "epoch": 0.9003405221339387, + "epoch": 0.899090368103375, "grad_norm": 0.0, - "learning_rate": 5.164281289949502e-07, - "loss": 0.7916, + "learning_rate": 5.293351198660168e-07, + "loss": 0.8203, "step": 31728 }, { - "epoch": 0.9003688989784336, + "epoch": 0.8991187055456374, "grad_norm": 0.0, - "learning_rate": 5.161366334214712e-07, - "loss": 0.8555, + "learning_rate": 5.290405149958211e-07, + "loss": 0.8191, "step": 31729 }, { - "epoch": 0.9003972758229285, + "epoch": 0.8991470429878999, "grad_norm": 0.0, - "learning_rate": 5.158452179586148e-07, - "loss": 0.8692, + "learning_rate": 5.287459899030356e-07, + "loss": 0.7673, "step": 31730 }, { - "epoch": 0.9004256526674234, + "epoch": 0.8991753804301623, "grad_norm": 0.0, - "learning_rate": 5.155538826088391e-07, - "loss": 0.8578, + "learning_rate": 5.284515445901383e-07, + "loss": 0.85, "step": 31731 }, { - "epoch": 0.9004540295119183, + "epoch": 0.8992037178724248, "grad_norm": 0.0, - "learning_rate": 5.152626273746053e-07, - "loss": 0.8899, + "learning_rate": 5.281571790596096e-07, + "loss": 0.8359, "step": 31732 }, { - "epoch": 0.9004824063564132, + "epoch": 0.8992320553146873, "grad_norm": 0.0, - "learning_rate": 5.14971452258376e-07, - "loss": 0.7397, + "learning_rate": 5.278628933139329e-07, + "loss": 0.7125, "step": 31733 }, { - "epoch": 0.900510783200908, + "epoch": 0.8992603927569498, "grad_norm": 0.0, - "learning_rate": 5.14680357262608e-07, - "loss": 0.8579, + "learning_rate": 5.27568687355583e-07, + "loss": 0.7692, "step": 31734 }, { - "epoch": 0.9005391600454029, + "epoch": 0.8992887301992122, "grad_norm": 0.0, - "learning_rate": 5.143893423897617e-07, - "loss": 0.7848, + "learning_rate": 5.272745611870389e-07, + "loss": 0.9003, "step": 31735 }, { - "epoch": 0.9005675368898979, + "epoch": 0.8993170676414747, "grad_norm": 0.0, - "learning_rate": 5.140984076422939e-07, - "loss": 0.8654, + "learning_rate": 5.269805148107809e-07, + "loss": 0.8728, "step": 31736 }, { - "epoch": 0.9005959137343927, + "epoch": 0.8993454050837372, "grad_norm": 0.0, - "learning_rate": 5.138075530226649e-07, - "loss": 0.7741, + "learning_rate": 5.266865482292815e-07, + "loss": 0.7597, "step": 31737 }, { - "epoch": 0.9006242905788876, + "epoch": 0.8993737425259996, "grad_norm": 0.0, - "learning_rate": 5.135167785333306e-07, - "loss": 0.8414, + "learning_rate": 5.26392661445021e-07, + "loss": 0.773, "step": 31738 }, { - "epoch": 0.9006526674233826, + "epoch": 0.899402079968262, "grad_norm": 0.0, - "learning_rate": 5.132260841767444e-07, - "loss": 0.8123, + "learning_rate": 5.260988544604717e-07, + "loss": 0.8646, "step": 31739 }, { - "epoch": 0.9006810442678774, + "epoch": 0.8994304174105245, "grad_norm": 0.0, - "learning_rate": 5.129354699553646e-07, - "loss": 0.7312, + "learning_rate": 5.258051272781095e-07, + "loss": 0.7986, "step": 31740 }, { - "epoch": 0.9007094211123723, + "epoch": 0.899458754852787, "grad_norm": 0.0, - "learning_rate": 5.126449358716468e-07, - "loss": 0.8857, + "learning_rate": 5.255114799004091e-07, + "loss": 0.6973, "step": 31741 }, { - "epoch": 0.9007377979568671, + "epoch": 0.8994870922950494, "grad_norm": 0.0, - "learning_rate": 5.123544819280423e-07, - "loss": 0.8016, + "learning_rate": 5.25217912329844e-07, + "loss": 0.7441, "step": 31742 }, { - "epoch": 0.9007661748013621, + "epoch": 0.8995154297373119, "grad_norm": 0.0, - "learning_rate": 5.120641081270061e-07, - "loss": 0.8307, + "learning_rate": 5.249244245688878e-07, + "loss": 0.9146, "step": 31743 }, { - "epoch": 0.900794551645857, + "epoch": 0.8995437671795744, "grad_norm": 0.0, - "learning_rate": 5.117738144709916e-07, - "loss": 0.7483, + "learning_rate": 5.246310166200108e-07, + "loss": 0.8036, "step": 31744 }, { - "epoch": 0.9008229284903518, + "epoch": 0.8995721046218368, "grad_norm": 0.0, - "learning_rate": 5.114836009624502e-07, - "loss": 0.7667, + "learning_rate": 5.243376884856854e-07, + "loss": 0.8766, "step": 31745 }, { - "epoch": 0.9008513053348468, + "epoch": 0.8996004420640993, "grad_norm": 0.0, - "learning_rate": 5.111934676038322e-07, - "loss": 0.7554, + "learning_rate": 5.240444401683831e-07, + "loss": 0.7684, "step": 31746 }, { - "epoch": 0.9008796821793417, + "epoch": 0.8996287795063618, "grad_norm": 0.0, - "learning_rate": 5.109034143975911e-07, - "loss": 0.8053, + "learning_rate": 5.237512716705718e-07, + "loss": 0.7995, "step": 31747 }, { - "epoch": 0.9009080590238365, + "epoch": 0.8996571169486243, "grad_norm": 0.0, - "learning_rate": 5.106134413461739e-07, - "loss": 0.8158, + "learning_rate": 5.234581829947227e-07, + "loss": 0.8084, "step": 31748 }, { - "epoch": 0.9009364358683314, + "epoch": 0.8996854543908867, "grad_norm": 0.0, - "learning_rate": 5.103235484520319e-07, - "loss": 0.8628, + "learning_rate": 5.231651741433063e-07, + "loss": 0.8743, "step": 31749 }, { - "epoch": 0.9009648127128264, + "epoch": 0.8997137918331491, "grad_norm": 0.0, - "learning_rate": 5.100337357176133e-07, - "loss": 0.7099, + "learning_rate": 5.228722451187862e-07, + "loss": 0.7907, "step": 31750 }, { - "epoch": 0.9009931895573212, + "epoch": 0.8997421292754116, "grad_norm": 0.0, - "learning_rate": 5.097440031453671e-07, - "loss": 0.7966, + "learning_rate": 5.225793959236347e-07, + "loss": 0.8579, "step": 31751 }, { - "epoch": 0.9010215664018161, + "epoch": 0.899770466717674, "grad_norm": 0.0, - "learning_rate": 5.094543507377403e-07, - "loss": 0.9094, + "learning_rate": 5.222866265603155e-07, + "loss": 0.8689, "step": 31752 }, { - "epoch": 0.901049943246311, + "epoch": 0.8997988041599365, "grad_norm": 0.0, - "learning_rate": 5.091647784971776e-07, - "loss": 0.8346, + "learning_rate": 5.219939370312943e-07, + "loss": 0.8618, "step": 31753 }, { - "epoch": 0.9010783200908059, + "epoch": 0.899827141602199, "grad_norm": 0.0, - "learning_rate": 5.088752864261271e-07, - "loss": 0.8391, + "learning_rate": 5.217013273390381e-07, + "loss": 0.728, "step": 31754 }, { - "epoch": 0.9011066969353008, + "epoch": 0.8998554790444614, "grad_norm": 0.0, - "learning_rate": 5.085858745270355e-07, - "loss": 0.6826, + "learning_rate": 5.214087974860116e-07, + "loss": 0.883, "step": 31755 }, { - "epoch": 0.9011350737797957, + "epoch": 0.8998838164867239, "grad_norm": 0.0, - "learning_rate": 5.082965428023446e-07, - "loss": 0.8873, + "learning_rate": 5.211163474746783e-07, + "loss": 0.8499, "step": 31756 }, { - "epoch": 0.9011634506242906, + "epoch": 0.8999121539289864, "grad_norm": 0.0, - "learning_rate": 5.080072912544987e-07, - "loss": 0.7529, + "learning_rate": 5.20823977307503e-07, + "loss": 0.7754, "step": 31757 }, { - "epoch": 0.9011918274687855, + "epoch": 0.8999404913712489, "grad_norm": 0.0, - "learning_rate": 5.077181198859437e-07, - "loss": 0.8445, + "learning_rate": 5.205316869869459e-07, + "loss": 0.8001, "step": 31758 }, { - "epoch": 0.9012202043132803, + "epoch": 0.8999688288135113, "grad_norm": 0.0, - "learning_rate": 5.07429028699119e-07, - "loss": 0.7633, + "learning_rate": 5.202394765154728e-07, + "loss": 0.91, "step": 31759 }, { - "epoch": 0.9012485811577753, + "epoch": 0.8999971662557738, "grad_norm": 0.0, - "learning_rate": 5.071400176964669e-07, - "loss": 0.7704, + "learning_rate": 5.199473458955406e-07, + "loss": 0.7285, "step": 31760 }, { - "epoch": 0.9012769580022701, + "epoch": 0.9000255036980362, "grad_norm": 0.0, - "learning_rate": 5.068510868804322e-07, - "loss": 0.8024, + "learning_rate": 5.19655295129613e-07, + "loss": 0.7839, "step": 31761 }, { - "epoch": 0.901305334846765, + "epoch": 0.9000538411402986, "grad_norm": 0.0, - "learning_rate": 5.065622362534517e-07, - "loss": 0.7565, + "learning_rate": 5.193633242201501e-07, + "loss": 0.7748, "step": 31762 }, { - "epoch": 0.90133371169126, + "epoch": 0.9000821785825611, "grad_norm": 0.0, - "learning_rate": 5.062734658179669e-07, - "loss": 0.7044, + "learning_rate": 5.19071433169609e-07, + "loss": 0.7142, "step": 31763 }, { - "epoch": 0.9013620885357548, + "epoch": 0.9001105160248236, "grad_norm": 0.0, - "learning_rate": 5.05984775576418e-07, - "loss": 0.81, + "learning_rate": 5.187796219804508e-07, + "loss": 0.9089, "step": 31764 }, { - "epoch": 0.9013904653802497, + "epoch": 0.9001388534670861, "grad_norm": 0.0, - "learning_rate": 5.05696165531242e-07, - "loss": 0.8462, + "learning_rate": 5.184878906551328e-07, + "loss": 0.9005, "step": 31765 }, { - "epoch": 0.9014188422247446, + "epoch": 0.9001671909093485, "grad_norm": 0.0, - "learning_rate": 5.054076356848748e-07, - "loss": 0.7692, + "learning_rate": 5.181962391961115e-07, + "loss": 0.7361, "step": 31766 }, { - "epoch": 0.9014472190692395, + "epoch": 0.900195528351611, "grad_norm": 0.0, - "learning_rate": 5.051191860397597e-07, - "loss": 0.8073, + "learning_rate": 5.179046676058442e-07, + "loss": 0.7975, "step": 31767 }, { - "epoch": 0.9014755959137344, + "epoch": 0.9002238657938735, "grad_norm": 0.0, - "learning_rate": 5.048308165983273e-07, - "loss": 0.7691, + "learning_rate": 5.176131758867864e-07, + "loss": 0.8371, "step": 31768 }, { - "epoch": 0.9015039727582292, + "epoch": 0.9002522032361359, "grad_norm": 0.0, - "learning_rate": 5.045425273630167e-07, - "loss": 0.6679, + "learning_rate": 5.173217640413942e-07, + "loss": 0.8061, "step": 31769 }, { - "epoch": 0.9015323496027242, + "epoch": 0.9002805406783984, "grad_norm": 0.0, - "learning_rate": 5.042543183362625e-07, - "loss": 0.815, + "learning_rate": 5.170304320721243e-07, + "loss": 0.8337, "step": 31770 }, { - "epoch": 0.9015607264472191, + "epoch": 0.9003088781206608, "grad_norm": 0.0, - "learning_rate": 5.039661895204984e-07, - "loss": 0.8193, + "learning_rate": 5.167391799814258e-07, + "loss": 0.7373, "step": 31771 }, { - "epoch": 0.9015891032917139, + "epoch": 0.9003372155629233, "grad_norm": 0.0, - "learning_rate": 5.03678140918159e-07, - "loss": 0.7813, + "learning_rate": 5.16448007771757e-07, + "loss": 0.7629, "step": 31772 }, { - "epoch": 0.9016174801362089, + "epoch": 0.9003655530051857, "grad_norm": 0.0, - "learning_rate": 5.033901725316781e-07, - "loss": 0.7185, + "learning_rate": 5.16156915445567e-07, + "loss": 0.753, "step": 31773 }, { - "epoch": 0.9016458569807038, + "epoch": 0.9003938904474482, "grad_norm": 0.0, - "learning_rate": 5.031022843634859e-07, - "loss": 0.8561, + "learning_rate": 5.158659030053081e-07, + "loss": 0.8122, "step": 31774 }, { - "epoch": 0.9016742338251986, + "epoch": 0.9004222278897107, "grad_norm": 0.0, - "learning_rate": 5.028144764160159e-07, - "loss": 0.7862, + "learning_rate": 5.155749704534352e-07, + "loss": 0.752, "step": 31775 }, { - "epoch": 0.9017026106696935, + "epoch": 0.9004505653319731, "grad_norm": 0.0, - "learning_rate": 5.025267486917007e-07, - "loss": 0.7237, + "learning_rate": 5.15284117792394e-07, + "loss": 0.7221, "step": 31776 }, { - "epoch": 0.9017309875141885, + "epoch": 0.9004789027742356, "grad_norm": 0.0, - "learning_rate": 5.022391011929672e-07, - "loss": 0.8225, + "learning_rate": 5.14993345024637e-07, + "loss": 0.7958, "step": 31777 }, { - "epoch": 0.9017593643586833, + "epoch": 0.9005072402164981, "grad_norm": 0.0, - "learning_rate": 5.019515339222481e-07, - "loss": 0.7555, + "learning_rate": 5.147026521526144e-07, + "loss": 0.8219, "step": 31778 }, { - "epoch": 0.9017877412031782, + "epoch": 0.9005355776587605, "grad_norm": 0.0, - "learning_rate": 5.0166404688197e-07, - "loss": 0.771, + "learning_rate": 5.144120391787732e-07, + "loss": 0.7698, "step": 31779 }, { - "epoch": 0.9018161180476731, + "epoch": 0.900563915101023, "grad_norm": 0.0, - "learning_rate": 5.013766400745634e-07, - "loss": 0.8731, + "learning_rate": 5.141215061055605e-07, + "loss": 0.8995, "step": 31780 }, { - "epoch": 0.901844494892168, + "epoch": 0.9005922525432855, "grad_norm": 0.0, - "learning_rate": 5.010893135024542e-07, - "loss": 0.7921, + "learning_rate": 5.138310529354251e-07, + "loss": 0.7993, "step": 31781 }, { - "epoch": 0.9018728717366629, + "epoch": 0.900620589985548, "grad_norm": 0.0, - "learning_rate": 5.008020671680714e-07, - "loss": 0.7166, + "learning_rate": 5.135406796708142e-07, + "loss": 0.8121, "step": 31782 }, { - "epoch": 0.9019012485811577, + "epoch": 0.9006489274278103, "grad_norm": 0.0, - "learning_rate": 5.005149010738408e-07, - "loss": 0.854, + "learning_rate": 5.132503863141736e-07, + "loss": 0.7561, "step": 31783 }, { - "epoch": 0.9019296254256527, + "epoch": 0.9006772648700728, "grad_norm": 0.0, - "learning_rate": 5.002278152221885e-07, - "loss": 0.8138, + "learning_rate": 5.129601728679468e-07, + "loss": 0.8269, "step": 31784 }, { - "epoch": 0.9019580022701476, + "epoch": 0.9007056023123353, "grad_norm": 0.0, - "learning_rate": 4.999408096155378e-07, - "loss": 0.803, + "learning_rate": 5.126700393345785e-07, + "loss": 0.8791, "step": 31785 }, { - "epoch": 0.9019863791146424, + "epoch": 0.9007339397545977, "grad_norm": 0.0, - "learning_rate": 4.996538842563137e-07, - "loss": 0.827, + "learning_rate": 5.123799857165157e-07, + "loss": 0.811, "step": 31786 }, { - "epoch": 0.9020147559591374, + "epoch": 0.9007622771968602, "grad_norm": 0.0, - "learning_rate": 4.99367039146943e-07, - "loss": 0.6938, + "learning_rate": 5.120900120161976e-07, + "loss": 0.8053, "step": 31787 }, { - "epoch": 0.9020431328036322, + "epoch": 0.9007906146391227, "grad_norm": 0.0, - "learning_rate": 4.990802742898437e-07, - "loss": 0.8183, + "learning_rate": 5.118001182360699e-07, + "loss": 0.7497, "step": 31788 }, { - "epoch": 0.9020715096481271, + "epoch": 0.9008189520813852, "grad_norm": 0.0, - "learning_rate": 4.987935896874396e-07, - "loss": 0.8167, + "learning_rate": 5.115103043785718e-07, + "loss": 0.8065, "step": 31789 }, { - "epoch": 0.9020998864926221, + "epoch": 0.9008472895236476, "grad_norm": 0.0, - "learning_rate": 4.985069853421554e-07, - "loss": 0.8164, + "learning_rate": 5.11220570446147e-07, + "loss": 0.8781, "step": 31790 }, { - "epoch": 0.9021282633371169, + "epoch": 0.9008756269659101, "grad_norm": 0.0, - "learning_rate": 4.98220461256409e-07, - "loss": 0.8075, + "learning_rate": 5.109309164412346e-07, + "loss": 0.8149, "step": 31791 }, { - "epoch": 0.9021566401816118, + "epoch": 0.9009039644081726, "grad_norm": 0.0, - "learning_rate": 4.979340174326208e-07, - "loss": 0.7554, + "learning_rate": 5.106413423662748e-07, + "loss": 0.6713, "step": 31792 }, { - "epoch": 0.9021850170261067, + "epoch": 0.9009323018504349, "grad_norm": 0.0, - "learning_rate": 4.976476538732123e-07, - "loss": 0.7876, + "learning_rate": 5.103518482237058e-07, + "loss": 0.7746, "step": 31793 }, { - "epoch": 0.9022133938706016, + "epoch": 0.9009606392926974, "grad_norm": 0.0, - "learning_rate": 4.973613705806002e-07, - "loss": 0.8251, + "learning_rate": 5.100624340159676e-07, + "loss": 0.7763, "step": 31794 }, { - "epoch": 0.9022417707150965, + "epoch": 0.9009889767349599, "grad_norm": 0.0, - "learning_rate": 4.970751675572039e-07, - "loss": 0.8793, + "learning_rate": 5.097730997454975e-07, + "loss": 0.75, "step": 31795 }, { - "epoch": 0.9022701475595913, + "epoch": 0.9010173141772224, "grad_norm": 0.0, - "learning_rate": 4.967890448054413e-07, - "loss": 0.7906, + "learning_rate": 5.094838454147333e-07, + "loss": 0.8022, "step": 31796 }, { - "epoch": 0.9022985244040863, + "epoch": 0.9010456516194848, "grad_norm": 0.0, - "learning_rate": 4.965030023277273e-07, - "loss": 0.7678, + "learning_rate": 5.091946710261108e-07, + "loss": 0.8108, "step": 31797 }, { - "epoch": 0.9023269012485812, + "epoch": 0.9010739890617473, "grad_norm": 0.0, - "learning_rate": 4.962170401264821e-07, - "loss": 0.7544, + "learning_rate": 5.08905576582065e-07, + "loss": 0.7548, "step": 31798 }, { - "epoch": 0.902355278093076, + "epoch": 0.9011023265040098, "grad_norm": 0.0, - "learning_rate": 4.959311582041171e-07, - "loss": 0.7113, + "learning_rate": 5.086165620850337e-07, + "loss": 0.829, "step": 31799 }, { - "epoch": 0.9023836549375709, + "epoch": 0.9011306639462722, "grad_norm": 0.0, - "learning_rate": 4.956453565630492e-07, - "loss": 0.773, + "learning_rate": 5.083276275374482e-07, + "loss": 0.7727, "step": 31800 }, { - "epoch": 0.9024120317820659, + "epoch": 0.9011590013885347, "grad_norm": 0.0, - "learning_rate": 4.953596352056933e-07, - "loss": 0.8979, + "learning_rate": 5.080387729417447e-07, + "loss": 0.847, "step": 31801 }, { - "epoch": 0.9024404086265607, + "epoch": 0.9011873388307972, "grad_norm": 0.0, - "learning_rate": 4.950739941344607e-07, - "loss": 0.8171, + "learning_rate": 5.077499983003542e-07, + "loss": 0.887, "step": 31802 }, { - "epoch": 0.9024687854710556, + "epoch": 0.9012156762730595, "grad_norm": 0.0, - "learning_rate": 4.947884333517649e-07, - "loss": 0.6732, + "learning_rate": 5.074613036157105e-07, + "loss": 0.7928, "step": 31803 }, { - "epoch": 0.9024971623155505, + "epoch": 0.901244013715322, "grad_norm": 0.0, - "learning_rate": 4.945029528600209e-07, - "loss": 0.8207, + "learning_rate": 5.071726888902451e-07, + "loss": 0.8119, "step": 31804 }, { - "epoch": 0.9025255391600454, + "epoch": 0.9012723511575845, "grad_norm": 0.0, - "learning_rate": 4.942175526616355e-07, - "loss": 0.7117, + "learning_rate": 5.068841541263892e-07, + "loss": 0.8601, "step": 31805 }, { - "epoch": 0.9025539160045403, + "epoch": 0.901300688599847, "grad_norm": 0.0, - "learning_rate": 4.939322327590224e-07, - "loss": 0.863, + "learning_rate": 5.065956993265742e-07, + "loss": 0.8662, "step": 31806 }, { - "epoch": 0.9025822928490352, + "epoch": 0.9013290260421094, "grad_norm": 0.0, - "learning_rate": 4.936469931545918e-07, - "loss": 0.7803, + "learning_rate": 5.063073244932293e-07, + "loss": 0.795, "step": 31807 }, { - "epoch": 0.9026106696935301, + "epoch": 0.9013573634843719, "grad_norm": 0.0, - "learning_rate": 4.933618338507506e-07, - "loss": 0.7434, + "learning_rate": 5.060190296287815e-07, + "loss": 0.7637, "step": 31808 }, { - "epoch": 0.902639046538025, + "epoch": 0.9013857009266344, "grad_norm": 0.0, - "learning_rate": 4.930767548499105e-07, - "loss": 0.8739, + "learning_rate": 5.057308147356632e-07, + "loss": 0.8197, "step": 31809 }, { - "epoch": 0.9026674233825198, + "epoch": 0.9014140383688968, "grad_norm": 0.0, - "learning_rate": 4.927917561544792e-07, - "loss": 0.8329, + "learning_rate": 5.054426798162971e-07, + "loss": 0.8104, "step": 31810 }, { - "epoch": 0.9026958002270148, + "epoch": 0.9014423758111593, "grad_norm": 0.0, - "learning_rate": 4.925068377668618e-07, - "loss": 0.763, + "learning_rate": 5.051546248731142e-07, + "loss": 0.8559, "step": 31811 }, { - "epoch": 0.9027241770715096, + "epoch": 0.9014707132534218, "grad_norm": 0.0, - "learning_rate": 4.922219996894662e-07, - "loss": 0.9436, + "learning_rate": 5.048666499085408e-07, + "loss": 0.8274, "step": 31812 }, { - "epoch": 0.9027525539160045, + "epoch": 0.9014990506956843, "grad_norm": 0.0, - "learning_rate": 4.919372419246993e-07, - "loss": 0.8946, + "learning_rate": 5.045787549250003e-07, + "loss": 0.8191, "step": 31813 }, { - "epoch": 0.9027809307604995, + "epoch": 0.9015273881379466, "grad_norm": 0.0, - "learning_rate": 4.916525644749659e-07, - "loss": 0.8631, + "learning_rate": 5.042909399249185e-07, + "loss": 0.7788, "step": 31814 }, { - "epoch": 0.9028093076049943, + "epoch": 0.9015557255802091, "grad_norm": 0.0, - "learning_rate": 4.913679673426719e-07, - "loss": 0.8148, + "learning_rate": 5.040032049107225e-07, + "loss": 0.8405, "step": 31815 }, { - "epoch": 0.9028376844494892, + "epoch": 0.9015840630224716, "grad_norm": 0.0, - "learning_rate": 4.910834505302198e-07, - "loss": 0.7549, + "learning_rate": 5.037155498848323e-07, + "loss": 0.785, "step": 31816 }, { - "epoch": 0.9028660612939841, + "epoch": 0.901612400464734, "grad_norm": 0.0, - "learning_rate": 4.907990140400121e-07, - "loss": 0.8407, + "learning_rate": 5.034279748496717e-07, + "loss": 0.8163, "step": 31817 }, { - "epoch": 0.902894438138479, + "epoch": 0.9016407379069965, "grad_norm": 0.0, - "learning_rate": 4.905146578744535e-07, - "loss": 0.7035, + "learning_rate": 5.031404798076644e-07, + "loss": 0.8025, "step": 31818 }, { - "epoch": 0.9029228149829739, + "epoch": 0.901669075349259, "grad_norm": 0.0, - "learning_rate": 4.902303820359445e-07, - "loss": 0.7733, + "learning_rate": 5.028530647612306e-07, + "loss": 0.7512, "step": 31819 }, { - "epoch": 0.9029511918274687, + "epoch": 0.9016974127915215, "grad_norm": 0.0, - "learning_rate": 4.899461865268873e-07, - "loss": 0.7628, + "learning_rate": 5.025657297127939e-07, + "loss": 0.8669, "step": 31820 }, { - "epoch": 0.9029795686719637, + "epoch": 0.9017257502337839, "grad_norm": 0.0, - "learning_rate": 4.896620713496836e-07, - "loss": 0.8552, + "learning_rate": 5.022784746647714e-07, + "loss": 0.8478, "step": 31821 }, { - "epoch": 0.9030079455164586, + "epoch": 0.9017540876760464, "grad_norm": 0.0, - "learning_rate": 4.893780365067291e-07, - "loss": 0.8058, + "learning_rate": 5.019912996195852e-07, + "loss": 0.8234, "step": 31822 }, { - "epoch": 0.9030363223609534, + "epoch": 0.9017824251183089, "grad_norm": 0.0, - "learning_rate": 4.890940820004275e-07, - "loss": 0.6916, + "learning_rate": 5.017042045796527e-07, + "loss": 0.6664, "step": 31823 }, { - "epoch": 0.9030646992054484, + "epoch": 0.9018107625605712, "grad_norm": 0.0, - "learning_rate": 4.888102078331758e-07, - "loss": 0.7926, + "learning_rate": 5.014171895473929e-07, + "loss": 0.8139, "step": 31824 }, { - "epoch": 0.9030930760499433, + "epoch": 0.9018391000028337, "grad_norm": 0.0, - "learning_rate": 4.88526414007372e-07, - "loss": 0.7492, + "learning_rate": 5.011302545252239e-07, + "loss": 0.7974, "step": 31825 }, { - "epoch": 0.9031214528944381, + "epoch": 0.9018674374450962, "grad_norm": 0.0, - "learning_rate": 4.88242700525412e-07, - "loss": 0.8136, + "learning_rate": 5.008433995155615e-07, + "loss": 0.8355, "step": 31826 }, { - "epoch": 0.903149829738933, + "epoch": 0.9018957748873586, "grad_norm": 0.0, - "learning_rate": 4.879590673896939e-07, - "loss": 0.784, + "learning_rate": 5.005566245208215e-07, + "loss": 0.6943, "step": 31827 }, { - "epoch": 0.903178206583428, + "epoch": 0.9019241123296211, "grad_norm": 0.0, - "learning_rate": 4.876755146026124e-07, - "loss": 0.8301, + "learning_rate": 5.002699295434233e-07, + "loss": 0.7581, "step": 31828 }, { - "epoch": 0.9032065834279228, + "epoch": 0.9019524497718836, "grad_norm": 0.0, - "learning_rate": 4.873920421665657e-07, - "loss": 0.8611, + "learning_rate": 4.999833145857769e-07, + "loss": 0.8743, "step": 31829 }, { - "epoch": 0.9032349602724177, + "epoch": 0.9019807872141461, "grad_norm": 0.0, - "learning_rate": 4.871086500839439e-07, - "loss": 0.7979, + "learning_rate": 4.996967796502982e-07, + "loss": 0.7561, "step": 31830 }, { - "epoch": 0.9032633371169126, + "epoch": 0.9020091246564085, "grad_norm": 0.0, - "learning_rate": 4.868253383571442e-07, - "loss": 0.8413, + "learning_rate": 4.994103247394022e-07, + "loss": 0.8122, "step": 31831 }, { - "epoch": 0.9032917139614075, + "epoch": 0.902037462098671, "grad_norm": 0.0, - "learning_rate": 4.86542106988559e-07, - "loss": 0.7813, + "learning_rate": 4.991239498555011e-07, + "loss": 0.8265, "step": 31832 }, { - "epoch": 0.9033200908059024, + "epoch": 0.9020657995409335, "grad_norm": 0.0, - "learning_rate": 4.862589559805786e-07, - "loss": 0.7744, + "learning_rate": 4.988376550010088e-07, + "loss": 0.7117, "step": 31833 }, { - "epoch": 0.9033484676503972, + "epoch": 0.9020941369831958, "grad_norm": 0.0, - "learning_rate": 4.859758853355967e-07, - "loss": 0.8659, + "learning_rate": 4.985514401783332e-07, + "loss": 0.7349, "step": 31834 }, { - "epoch": 0.9033768444948922, + "epoch": 0.9021224744254583, "grad_norm": 0.0, - "learning_rate": 4.856928950560058e-07, - "loss": 0.7112, + "learning_rate": 4.982653053898867e-07, + "loss": 0.8302, "step": 31835 }, { - "epoch": 0.903405221339387, + "epoch": 0.9021508118677208, "grad_norm": 0.0, - "learning_rate": 4.85409985144194e-07, - "loss": 0.8075, + "learning_rate": 4.97979250638082e-07, + "loss": 0.7267, "step": 31836 }, { - "epoch": 0.9034335981838819, + "epoch": 0.9021791493099833, "grad_norm": 0.0, - "learning_rate": 4.851271556025505e-07, - "loss": 0.7432, + "learning_rate": 4.976932759253239e-07, + "loss": 0.8126, "step": 31837 }, { - "epoch": 0.9034619750283769, + "epoch": 0.9022074867522457, "grad_norm": 0.0, - "learning_rate": 4.848444064334679e-07, - "loss": 0.8182, + "learning_rate": 4.97407381254027e-07, + "loss": 0.8011, "step": 31838 }, { - "epoch": 0.9034903518728717, + "epoch": 0.9022358241945082, "grad_norm": 0.0, - "learning_rate": 4.845617376393308e-07, - "loss": 0.8977, + "learning_rate": 4.971215666265939e-07, + "loss": 0.8203, "step": 31839 }, { - "epoch": 0.9035187287173666, + "epoch": 0.9022641616367707, "grad_norm": 0.0, - "learning_rate": 4.842791492225285e-07, - "loss": 0.7821, + "learning_rate": 4.968358320454348e-07, + "loss": 0.826, "step": 31840 }, { - "epoch": 0.9035471055618616, + "epoch": 0.9022924990790331, "grad_norm": 0.0, - "learning_rate": 4.839966411854491e-07, - "loss": 0.7835, + "learning_rate": 4.965501775129578e-07, + "loss": 0.8031, "step": 31841 }, { - "epoch": 0.9035754824063564, + "epoch": 0.9023208365212956, "grad_norm": 0.0, - "learning_rate": 4.83714213530475e-07, - "loss": 0.8909, + "learning_rate": 4.962646030315665e-07, + "loss": 0.7842, "step": 31842 }, { - "epoch": 0.9036038592508513, + "epoch": 0.9023491739635581, "grad_norm": 0.0, - "learning_rate": 4.834318662599968e-07, - "loss": 0.7856, + "learning_rate": 4.95979108603668e-07, + "loss": 0.8431, "step": 31843 }, { - "epoch": 0.9036322360953462, + "epoch": 0.9023775114058206, "grad_norm": 0.0, - "learning_rate": 4.831495993763968e-07, - "loss": 0.7129, + "learning_rate": 4.956936942316659e-07, + "loss": 0.8124, "step": 31844 }, { - "epoch": 0.9036606129398411, + "epoch": 0.902405848848083, "grad_norm": 0.0, - "learning_rate": 4.82867412882061e-07, - "loss": 0.7656, + "learning_rate": 4.95408359917966e-07, + "loss": 0.8533, "step": 31845 }, { - "epoch": 0.903688989784336, + "epoch": 0.9024341862903454, "grad_norm": 0.0, - "learning_rate": 4.825853067793717e-07, - "loss": 0.7619, + "learning_rate": 4.951231056649719e-07, + "loss": 0.8383, "step": 31846 }, { - "epoch": 0.9037173666288308, + "epoch": 0.9024625237326079, "grad_norm": 0.0, - "learning_rate": 4.823032810707118e-07, - "loss": 0.9077, + "learning_rate": 4.94837931475084e-07, + "loss": 0.7717, "step": 31847 }, { - "epoch": 0.9037457434733258, + "epoch": 0.9024908611748703, "grad_norm": 0.0, - "learning_rate": 4.820213357584636e-07, - "loss": 0.8321, + "learning_rate": 4.945528373507059e-07, + "loss": 0.8251, "step": 31848 }, { - "epoch": 0.9037741203178207, + "epoch": 0.9025191986171328, "grad_norm": 0.0, - "learning_rate": 4.817394708450107e-07, - "loss": 0.8951, + "learning_rate": 4.942678232942399e-07, + "loss": 0.7634, "step": 31849 }, { - "epoch": 0.9038024971623155, + "epoch": 0.9025475360593953, "grad_norm": 0.0, - "learning_rate": 4.814576863327303e-07, - "loss": 0.7642, + "learning_rate": 4.939828893080844e-07, + "loss": 0.794, "step": 31850 }, { - "epoch": 0.9038308740068104, + "epoch": 0.9025758735016577, "grad_norm": 0.0, - "learning_rate": 4.811759822240047e-07, - "loss": 0.8353, + "learning_rate": 4.93698035394643e-07, + "loss": 0.8164, "step": 31851 }, { - "epoch": 0.9038592508513054, + "epoch": 0.9026042109439202, "grad_norm": 0.0, - "learning_rate": 4.808943585212156e-07, - "loss": 0.8302, + "learning_rate": 4.934132615563125e-07, + "loss": 0.87, "step": 31852 }, { - "epoch": 0.9038876276958002, + "epoch": 0.9026325483861827, "grad_norm": 0.0, - "learning_rate": 4.806128152267387e-07, - "loss": 0.8248, + "learning_rate": 4.931285677954912e-07, + "loss": 0.7803, "step": 31853 }, { - "epoch": 0.9039160045402951, + "epoch": 0.9026608858284452, "grad_norm": 0.0, - "learning_rate": 4.80331352342952e-07, - "loss": 0.8389, + "learning_rate": 4.928439541145802e-07, + "loss": 0.821, "step": 31854 }, { - "epoch": 0.90394438138479, + "epoch": 0.9026892232707076, "grad_norm": 0.0, - "learning_rate": 4.80049969872236e-07, - "loss": 0.8803, + "learning_rate": 4.925594205159734e-07, + "loss": 0.6557, "step": 31855 }, { - "epoch": 0.9039727582292849, + "epoch": 0.90271756071297, "grad_norm": 0.0, - "learning_rate": 4.797686678169655e-07, - "loss": 0.8553, + "learning_rate": 4.922749670020687e-07, + "loss": 0.8055, "step": 31856 }, { - "epoch": 0.9040011350737798, + "epoch": 0.9027458981552325, "grad_norm": 0.0, - "learning_rate": 4.794874461795163e-07, - "loss": 0.789, + "learning_rate": 4.919905935752622e-07, + "loss": 0.8243, "step": 31857 }, { - "epoch": 0.9040295119182747, + "epoch": 0.9027742355974949, "grad_norm": 0.0, - "learning_rate": 4.792063049622642e-07, - "loss": 0.866, + "learning_rate": 4.917063002379507e-07, + "loss": 0.8531, "step": 31858 }, { - "epoch": 0.9040578887627696, + "epoch": 0.9028025730397574, "grad_norm": 0.0, - "learning_rate": 4.789252441675851e-07, - "loss": 0.8573, + "learning_rate": 4.91422086992529e-07, + "loss": 0.9068, "step": 31859 }, { - "epoch": 0.9040862656072645, + "epoch": 0.9028309104820199, "grad_norm": 0.0, - "learning_rate": 4.786442637978539e-07, - "loss": 0.8257, + "learning_rate": 4.911379538413885e-07, + "loss": 0.7777, "step": 31860 }, { - "epoch": 0.9041146424517593, + "epoch": 0.9028592479242824, "grad_norm": 0.0, - "learning_rate": 4.783633638554408e-07, - "loss": 0.7754, + "learning_rate": 4.90853900786924e-07, + "loss": 0.7461, "step": 31861 }, { - "epoch": 0.9041430192962543, + "epoch": 0.9028875853665448, "grad_norm": 0.0, - "learning_rate": 4.780825443427206e-07, - "loss": 0.7092, + "learning_rate": 4.90569927831529e-07, + "loss": 0.821, "step": 31862 }, { - "epoch": 0.9041713961407492, + "epoch": 0.9029159228088073, "grad_norm": 0.0, - "learning_rate": 4.77801805262067e-07, - "loss": 0.7785, + "learning_rate": 4.90286034977594e-07, + "loss": 0.8683, "step": 31863 }, { - "epoch": 0.904199772985244, + "epoch": 0.9029442602510698, "grad_norm": 0.0, - "learning_rate": 4.775211466158469e-07, - "loss": 0.7953, + "learning_rate": 4.900022222275113e-07, + "loss": 0.8549, "step": 31864 }, { - "epoch": 0.904228149829739, + "epoch": 0.9029725976933322, "grad_norm": 0.0, - "learning_rate": 4.772405684064352e-07, - "loss": 0.6603, + "learning_rate": 4.897184895836726e-07, + "loss": 0.7164, "step": 31865 }, { - "epoch": 0.9042565266742338, + "epoch": 0.9030009351355947, "grad_norm": 0.0, - "learning_rate": 4.76960070636201e-07, - "loss": 0.9319, + "learning_rate": 4.894348370484648e-07, + "loss": 0.7466, "step": 31866 }, { - "epoch": 0.9042849035187287, + "epoch": 0.9030292725778571, "grad_norm": 0.0, - "learning_rate": 4.766796533075124e-07, - "loss": 0.7287, + "learning_rate": 4.891512646242802e-07, + "loss": 0.7949, "step": 31867 }, { - "epoch": 0.9043132803632236, + "epoch": 0.9030576100201196, "grad_norm": 0.0, - "learning_rate": 4.7639931642273875e-07, - "loss": 0.9405, + "learning_rate": 4.88867772313506e-07, + "loss": 0.7359, "step": 31868 }, { - "epoch": 0.9043416572077185, + "epoch": 0.903085947462382, "grad_norm": 0.0, - "learning_rate": 4.7611905998424914e-07, - "loss": 0.7869, + "learning_rate": 4.885843601185291e-07, + "loss": 0.7622, "step": 31869 }, { - "epoch": 0.9043700340522134, + "epoch": 0.9031142849046445, "grad_norm": 0.0, - "learning_rate": 4.7583888399440834e-07, - "loss": 0.7292, + "learning_rate": 4.883010280417389e-07, + "loss": 0.7971, "step": 31870 }, { - "epoch": 0.9043984108967082, + "epoch": 0.903142622346907, "grad_norm": 0.0, - "learning_rate": 4.7555878845558567e-07, - "loss": 0.9308, + "learning_rate": 4.880177760855209e-07, + "loss": 0.7892, "step": 31871 }, { - "epoch": 0.9044267877412032, + "epoch": 0.9031709597891694, "grad_norm": 0.0, - "learning_rate": 4.7527877337014694e-07, - "loss": 0.8957, + "learning_rate": 4.877346042522624e-07, + "loss": 0.8986, "step": 31872 }, { - "epoch": 0.9044551645856981, + "epoch": 0.9031992972314319, "grad_norm": 0.0, - "learning_rate": 4.749988387404536e-07, - "loss": 0.7041, + "learning_rate": 4.87451512544348e-07, + "loss": 0.9154, "step": 31873 }, { - "epoch": 0.9044835414301929, + "epoch": 0.9032276346736944, "grad_norm": 0.0, - "learning_rate": 4.7471898456887487e-07, - "loss": 0.807, + "learning_rate": 4.871685009641603e-07, + "loss": 0.7377, "step": 31874 }, { - "epoch": 0.9045119182746879, + "epoch": 0.9032559721159568, "grad_norm": 0.0, - "learning_rate": 4.7443921085777444e-07, - "loss": 0.6505, + "learning_rate": 4.868855695140861e-07, + "loss": 0.7409, "step": 31875 }, { - "epoch": 0.9045402951191828, + "epoch": 0.9032843095582193, "grad_norm": 0.0, - "learning_rate": 4.7415951760951263e-07, - "loss": 0.8774, + "learning_rate": 4.866027181965071e-07, + "loss": 0.8312, "step": 31876 }, { - "epoch": 0.9045686719636776, + "epoch": 0.9033126470004817, "grad_norm": 0.0, - "learning_rate": 4.738799048264531e-07, - "loss": 0.7671, + "learning_rate": 4.863199470138058e-07, + "loss": 0.8923, "step": 31877 }, { - "epoch": 0.9045970488081725, + "epoch": 0.9033409844427442, "grad_norm": 0.0, - "learning_rate": 4.736003725109606e-07, - "loss": 0.8166, + "learning_rate": 4.860372559683646e-07, + "loss": 0.7953, "step": 31878 }, { - "epoch": 0.9046254256526675, + "epoch": 0.9033693218850066, "grad_norm": 0.0, - "learning_rate": 4.7332092066539214e-07, - "loss": 0.7328, + "learning_rate": 4.85754645062565e-07, + "loss": 0.8275, "step": 31879 }, { - "epoch": 0.9046538024971623, + "epoch": 0.9033976593272691, "grad_norm": 0.0, - "learning_rate": 4.7304154929211034e-07, - "loss": 0.9136, + "learning_rate": 4.854721142987873e-07, + "loss": 0.8341, "step": 31880 }, { - "epoch": 0.9046821793416572, + "epoch": 0.9034259967695316, "grad_norm": 0.0, - "learning_rate": 4.7276225839347544e-07, - "loss": 0.82, + "learning_rate": 4.851896636794096e-07, + "loss": 0.7327, "step": 31881 }, { - "epoch": 0.9047105561861521, + "epoch": 0.903454334211794, "grad_norm": 0.0, - "learning_rate": 4.7248304797184563e-07, - "loss": 0.902, + "learning_rate": 4.849072932068133e-07, + "loss": 0.8018, "step": 31882 }, { - "epoch": 0.904738933030647, + "epoch": 0.9034826716540565, "grad_norm": 0.0, - "learning_rate": 4.72203918029579e-07, - "loss": 0.8913, + "learning_rate": 4.846250028833755e-07, + "loss": 0.8229, "step": 31883 }, { - "epoch": 0.9047673098751419, + "epoch": 0.903511009096319, "grad_norm": 0.0, - "learning_rate": 4.71924868569037e-07, - "loss": 0.7748, + "learning_rate": 4.843427927114752e-07, + "loss": 0.825, "step": 31884 }, { - "epoch": 0.9047956867196367, + "epoch": 0.9035393465385815, "grad_norm": 0.0, - "learning_rate": 4.7164589959257103e-07, - "loss": 0.8067, + "learning_rate": 4.840606626934885e-07, + "loss": 0.761, "step": 31885 }, { - "epoch": 0.9048240635641317, + "epoch": 0.9035676839808439, "grad_norm": 0.0, - "learning_rate": 4.713670111025426e-07, - "loss": 0.7897, + "learning_rate": 4.837786128317945e-07, + "loss": 0.7447, "step": 31886 }, { - "epoch": 0.9048524404086266, + "epoch": 0.9035960214231064, "grad_norm": 0.0, - "learning_rate": 4.710882031013042e-07, - "loss": 0.9559, + "learning_rate": 4.834966431287657e-07, + "loss": 1.035, "step": 31887 }, { - "epoch": 0.9048808172531214, + "epoch": 0.9036243588653688, "grad_norm": 0.0, - "learning_rate": 4.708094755912096e-07, - "loss": 0.7861, + "learning_rate": 4.832147535867792e-07, + "loss": 0.7627, "step": 31888 }, { - "epoch": 0.9049091940976164, + "epoch": 0.9036526963076312, "grad_norm": 0.0, - "learning_rate": 4.7053082857461906e-07, - "loss": 0.7892, + "learning_rate": 4.829329442082076e-07, + "loss": 0.8073, "step": 31889 }, { - "epoch": 0.9049375709421112, + "epoch": 0.9036810337498937, "grad_norm": 0.0, - "learning_rate": 4.7025226205388185e-07, - "loss": 0.8123, + "learning_rate": 4.826512149954266e-07, + "loss": 0.7329, "step": 31890 }, { - "epoch": 0.9049659477866061, + "epoch": 0.9037093711921562, "grad_norm": 0.0, - "learning_rate": 4.6997377603135277e-07, - "loss": 0.7381, + "learning_rate": 4.823695659508099e-07, + "loss": 0.8093, "step": 31891 }, { - "epoch": 0.9049943246311011, + "epoch": 0.9037377086344187, "grad_norm": 0.0, - "learning_rate": 4.696953705093843e-07, - "loss": 0.7916, + "learning_rate": 4.820879970767267e-07, + "loss": 0.8333, "step": 31892 }, { - "epoch": 0.9050227014755959, + "epoch": 0.9037660460766811, "grad_norm": 0.0, - "learning_rate": 4.694170454903257e-07, - "loss": 0.7876, + "learning_rate": 4.818065083755508e-07, + "loss": 0.7162, "step": 31893 }, { - "epoch": 0.9050510783200908, + "epoch": 0.9037943835189436, "grad_norm": 0.0, - "learning_rate": 4.6913880097653075e-07, - "loss": 0.7353, + "learning_rate": 4.815250998496546e-07, + "loss": 0.8489, "step": 31894 }, { - "epoch": 0.9050794551645857, + "epoch": 0.9038227209612061, "grad_norm": 0.0, - "learning_rate": 4.6886063697034856e-07, - "loss": 0.893, + "learning_rate": 4.812437715014062e-07, + "loss": 0.7647, "step": 31895 }, { - "epoch": 0.9051078320090806, + "epoch": 0.9038510584034685, "grad_norm": 0.0, - "learning_rate": 4.685825534741295e-07, - "loss": 0.8799, + "learning_rate": 4.809625233331749e-07, + "loss": 0.81, "step": 31896 }, { - "epoch": 0.9051362088535755, + "epoch": 0.903879395845731, "grad_norm": 0.0, - "learning_rate": 4.683045504902206e-07, - "loss": 0.7551, + "learning_rate": 4.806813553473321e-07, + "loss": 0.8438, "step": 31897 }, { - "epoch": 0.9051645856980703, + "epoch": 0.9039077332879935, "grad_norm": 0.0, - "learning_rate": 4.6802662802097443e-07, - "loss": 0.8029, + "learning_rate": 4.804002675462449e-07, + "loss": 0.7528, "step": 31898 }, { - "epoch": 0.9051929625425653, + "epoch": 0.9039360707302558, "grad_norm": 0.0, - "learning_rate": 4.6774878606873353e-07, - "loss": 0.8724, + "learning_rate": 4.801192599322835e-07, + "loss": 0.6756, "step": 31899 }, { - "epoch": 0.9052213393870602, + "epoch": 0.9039644081725183, "grad_norm": 0.0, - "learning_rate": 4.6747102463584714e-07, - "loss": 0.7893, + "learning_rate": 4.798383325078104e-07, + "loss": 0.7947, "step": 31900 }, { - "epoch": 0.905249716231555, + "epoch": 0.9039927456147808, "grad_norm": 0.0, - "learning_rate": 4.671933437246623e-07, - "loss": 0.8305, + "learning_rate": 4.795574852751939e-07, + "loss": 0.7702, "step": 31901 }, { - "epoch": 0.9052780930760499, + "epoch": 0.9040210830570433, "grad_norm": 0.0, - "learning_rate": 4.6691574333752375e-07, - "loss": 0.8293, + "learning_rate": 4.792767182368019e-07, + "loss": 0.8778, "step": 31902 }, { - "epoch": 0.9053064699205449, + "epoch": 0.9040494204993057, "grad_norm": 0.0, - "learning_rate": 4.6663822347677633e-07, - "loss": 0.8109, + "learning_rate": 4.789960313949959e-07, + "loss": 0.802, "step": 31903 }, { - "epoch": 0.9053348467650397, + "epoch": 0.9040777579415682, "grad_norm": 0.0, - "learning_rate": 4.663607841447637e-07, - "loss": 0.8848, + "learning_rate": 4.787154247521442e-07, + "loss": 0.8044, "step": 31904 }, { - "epoch": 0.9053632236095346, + "epoch": 0.9041060953838307, "grad_norm": 0.0, - "learning_rate": 4.6608342534382956e-07, - "loss": 0.8278, + "learning_rate": 4.784348983106057e-07, + "loss": 0.8793, "step": 31905 }, { - "epoch": 0.9053916004540296, + "epoch": 0.9041344328260931, "grad_norm": 0.0, - "learning_rate": 4.6580614707631975e-07, - "loss": 0.6845, + "learning_rate": 4.781544520727466e-07, + "loss": 0.8844, "step": 31906 }, { - "epoch": 0.9054199772985244, + "epoch": 0.9041627702683556, "grad_norm": 0.0, - "learning_rate": 4.655289493445725e-07, - "loss": 0.7735, + "learning_rate": 4.778740860409292e-07, + "loss": 0.8232, "step": 31907 }, { - "epoch": 0.9054483541430193, + "epoch": 0.9041911077106181, "grad_norm": 0.0, - "learning_rate": 4.652518321509303e-07, - "loss": 0.8367, + "learning_rate": 4.775938002175129e-07, + "loss": 0.8043, "step": 31908 }, { - "epoch": 0.9054767309875141, + "epoch": 0.9042194451528806, "grad_norm": 0.0, - "learning_rate": 4.649747954977368e-07, - "loss": 0.8117, + "learning_rate": 4.773135946048601e-07, + "loss": 0.825, "step": 31909 }, { - "epoch": 0.9055051078320091, + "epoch": 0.9042477825951429, "grad_norm": 0.0, - "learning_rate": 4.64697839387328e-07, - "loss": 0.7218, + "learning_rate": 4.770334692053313e-07, + "loss": 0.7399, "step": 31910 }, { - "epoch": 0.905533484676504, + "epoch": 0.9042761200374054, "grad_norm": 0.0, - "learning_rate": 4.644209638220443e-07, - "loss": 0.7193, + "learning_rate": 4.767534240212857e-07, + "loss": 0.8531, "step": 31911 }, { - "epoch": 0.9055618615209988, + "epoch": 0.9043044574796679, "grad_norm": 0.0, - "learning_rate": 4.641441688042281e-07, - "loss": 0.7593, + "learning_rate": 4.764734590550835e-07, + "loss": 0.8734, "step": 31912 }, { - "epoch": 0.9055902383654938, + "epoch": 0.9043327949219303, "grad_norm": 0.0, - "learning_rate": 4.6386745433621317e-07, - "loss": 0.7531, + "learning_rate": 4.7619357430908177e-07, + "loss": 0.7766, "step": 31913 }, { - "epoch": 0.9056186152099887, + "epoch": 0.9043611323641928, "grad_norm": 0.0, - "learning_rate": 4.6359082042033766e-07, - "loss": 0.7779, + "learning_rate": 4.759137697856364e-07, + "loss": 0.8495, "step": 31914 }, { - "epoch": 0.9056469920544835, + "epoch": 0.9043894698064553, "grad_norm": 0.0, - "learning_rate": 4.633142670589408e-07, - "loss": 0.8304, + "learning_rate": 4.756340454871089e-07, + "loss": 0.7744, "step": 31915 }, { - "epoch": 0.9056753688989785, + "epoch": 0.9044178072487177, "grad_norm": 0.0, - "learning_rate": 4.6303779425435625e-07, - "loss": 0.8406, + "learning_rate": 4.7535440141584956e-07, + "loss": 0.8933, "step": 31916 }, { - "epoch": 0.9057037457434733, + "epoch": 0.9044461446909802, "grad_norm": 0.0, - "learning_rate": 4.6276140200891994e-07, - "loss": 0.8048, + "learning_rate": 4.750748375742198e-07, + "loss": 0.7608, "step": 31917 }, { - "epoch": 0.9057321225879682, + "epoch": 0.9044744821332427, "grad_norm": 0.0, - "learning_rate": 4.624850903249678e-07, - "loss": 0.8424, + "learning_rate": 4.7479535396457e-07, + "loss": 0.8578, "step": 31918 }, { - "epoch": 0.9057604994324631, + "epoch": 0.9045028195755052, "grad_norm": 0.0, - "learning_rate": 4.6220885920483014e-07, - "loss": 0.8051, + "learning_rate": 4.7451595058925606e-07, + "loss": 0.8084, "step": 31919 }, { - "epoch": 0.905788876276958, + "epoch": 0.9045311570177675, "grad_norm": 0.0, - "learning_rate": 4.619327086508463e-07, - "loss": 0.7716, + "learning_rate": 4.742366274506327e-07, + "loss": 0.7892, "step": 31920 }, { - "epoch": 0.9058172531214529, + "epoch": 0.90455949446003, "grad_norm": 0.0, - "learning_rate": 4.616566386653443e-07, - "loss": 0.8834, + "learning_rate": 4.739573845510492e-07, + "loss": 0.6701, "step": 31921 }, { - "epoch": 0.9058456299659478, + "epoch": 0.9045878319022925, "grad_norm": 0.0, - "learning_rate": 4.6138064925065673e-07, - "loss": 0.8383, + "learning_rate": 4.736782218928615e-07, + "loss": 0.8632, "step": 31922 }, { - "epoch": 0.9058740068104427, + "epoch": 0.9046161693445549, "grad_norm": 0.0, - "learning_rate": 4.6110474040911735e-07, - "loss": 0.7537, + "learning_rate": 4.7339913947841876e-07, + "loss": 0.7685, "step": 31923 }, { - "epoch": 0.9059023836549376, + "epoch": 0.9046445067868174, "grad_norm": 0.0, - "learning_rate": 4.6082891214305423e-07, - "loss": 0.7521, + "learning_rate": 4.731201373100736e-07, + "loss": 0.8332, "step": 31924 }, { - "epoch": 0.9059307604994324, + "epoch": 0.9046728442290799, "grad_norm": 0.0, - "learning_rate": 4.605531644547967e-07, - "loss": 0.7717, + "learning_rate": 4.7284121539017514e-07, + "loss": 0.7114, "step": 31925 }, { - "epoch": 0.9059591373439273, + "epoch": 0.9047011816713424, "grad_norm": 0.0, - "learning_rate": 4.6027749734667837e-07, - "loss": 0.9222, + "learning_rate": 4.725623737210727e-07, + "loss": 0.7514, "step": 31926 }, { - "epoch": 0.9059875141884223, + "epoch": 0.9047295191136048, "grad_norm": 0.0, - "learning_rate": 4.60001910821023e-07, - "loss": 0.866, + "learning_rate": 4.722836123051155e-07, + "loss": 0.8871, "step": 31927 }, { - "epoch": 0.9060158910329171, + "epoch": 0.9047578565558673, "grad_norm": 0.0, - "learning_rate": 4.5972640488015975e-07, - "loss": 0.8458, + "learning_rate": 4.720049311446517e-07, + "loss": 0.7983, "step": 31928 }, { - "epoch": 0.906044267877412, + "epoch": 0.9047861939981298, "grad_norm": 0.0, - "learning_rate": 4.594509795264179e-07, - "loss": 0.8058, + "learning_rate": 4.717263302420283e-07, + "loss": 0.8147, "step": 31929 }, { - "epoch": 0.906072644721907, + "epoch": 0.9048145314403921, "grad_norm": 0.0, - "learning_rate": 4.5917563476212123e-07, - "loss": 0.8121, + "learning_rate": 4.714478095995922e-07, + "loss": 0.7722, "step": 31930 }, { - "epoch": 0.9061010215664018, + "epoch": 0.9048428688826546, "grad_norm": 0.0, - "learning_rate": 4.589003705895967e-07, - "loss": 0.81, + "learning_rate": 4.711693692196906e-07, + "loss": 0.7926, "step": 31931 }, { - "epoch": 0.9061293984108967, + "epoch": 0.9048712063249171, "grad_norm": 0.0, - "learning_rate": 4.586251870111713e-07, - "loss": 0.8249, + "learning_rate": 4.708910091046659e-07, + "loss": 0.8029, "step": 31932 }, { - "epoch": 0.9061577752553917, + "epoch": 0.9048995437671796, "grad_norm": 0.0, - "learning_rate": 4.583500840291666e-07, - "loss": 0.835, + "learning_rate": 4.706127292568663e-07, + "loss": 0.7272, "step": 31933 }, { - "epoch": 0.9061861520998865, + "epoch": 0.904927881209442, "grad_norm": 0.0, - "learning_rate": 4.5807506164590843e-07, - "loss": 0.8169, + "learning_rate": 4.7033452967863324e-07, + "loss": 0.8544, "step": 31934 }, { - "epoch": 0.9062145289443814, + "epoch": 0.9049562186517045, "grad_norm": 0.0, - "learning_rate": 4.578001198637183e-07, - "loss": 0.7754, + "learning_rate": 4.7005641037231155e-07, + "loss": 0.815, "step": 31935 }, { - "epoch": 0.9062429057888762, + "epoch": 0.904984556093967, "grad_norm": 0.0, - "learning_rate": 4.5752525868491883e-07, - "loss": 0.861, + "learning_rate": 4.6977837134024375e-07, + "loss": 0.6617, "step": 31936 }, { - "epoch": 0.9062712826333712, + "epoch": 0.9050128935362294, "grad_norm": 0.0, - "learning_rate": 4.5725047811183475e-07, - "loss": 0.8506, + "learning_rate": 4.695004125847713e-07, + "loss": 0.8276, "step": 31937 }, { - "epoch": 0.9062996594778661, + "epoch": 0.9050412309784919, "grad_norm": 0.0, - "learning_rate": 4.569757781467832e-07, - "loss": 0.8009, + "learning_rate": 4.6922253410823683e-07, + "loss": 0.9032, "step": 31938 }, { - "epoch": 0.9063280363223609, + "epoch": 0.9050695684207544, "grad_norm": 0.0, - "learning_rate": 4.5670115879208663e-07, - "loss": 0.8113, + "learning_rate": 4.6894473591297953e-07, + "loss": 0.8046, "step": 31939 }, { - "epoch": 0.9063564131668559, + "epoch": 0.9050979058630167, "grad_norm": 0.0, - "learning_rate": 4.5642662005006556e-07, - "loss": 0.8637, + "learning_rate": 4.6866701800133864e-07, + "loss": 0.8987, "step": 31940 }, { - "epoch": 0.9063847900113507, + "epoch": 0.9051262433052792, "grad_norm": 0.0, - "learning_rate": 4.561521619230358e-07, - "loss": 0.7771, + "learning_rate": 4.6838938037565784e-07, + "loss": 0.8199, "step": 31941 }, { - "epoch": 0.9064131668558456, + "epoch": 0.9051545807475417, "grad_norm": 0.0, - "learning_rate": 4.5587778441331887e-07, - "loss": 0.7859, + "learning_rate": 4.681118230382709e-07, + "loss": 0.789, "step": 31942 }, { - "epoch": 0.9064415437003405, + "epoch": 0.9051829181898042, "grad_norm": 0.0, - "learning_rate": 4.556034875232318e-07, - "loss": 0.8177, + "learning_rate": 4.678343459915169e-07, + "loss": 0.8421, "step": 31943 }, { - "epoch": 0.9064699205448354, + "epoch": 0.9052112556320666, "grad_norm": 0.0, - "learning_rate": 4.553292712550894e-07, - "loss": 0.6723, + "learning_rate": 4.675569492377363e-07, + "loss": 0.8729, "step": 31944 }, { - "epoch": 0.9064982973893303, + "epoch": 0.9052395930743291, "grad_norm": 0.0, - "learning_rate": 4.5505513561121096e-07, - "loss": 0.7466, + "learning_rate": 4.672796327792617e-07, + "loss": 0.8106, "step": 31945 }, { - "epoch": 0.9065266742338252, + "epoch": 0.9052679305165916, "grad_norm": 0.0, - "learning_rate": 4.5478108059391123e-07, - "loss": 0.72, + "learning_rate": 4.6700239661843114e-07, + "loss": 0.8002, "step": 31946 }, { - "epoch": 0.9065550510783201, + "epoch": 0.905296267958854, "grad_norm": 0.0, - "learning_rate": 4.5450710620550395e-07, - "loss": 0.7738, + "learning_rate": 4.667252407575784e-07, + "loss": 0.9086, "step": 31947 }, { - "epoch": 0.906583427922815, + "epoch": 0.9053246054011165, "grad_norm": 0.0, - "learning_rate": 4.54233212448304e-07, - "loss": 0.8223, + "learning_rate": 4.6644816519903936e-07, + "loss": 0.7101, "step": 31948 }, { - "epoch": 0.9066118047673098, + "epoch": 0.905352942843379, "grad_norm": 0.0, - "learning_rate": 4.539593993246272e-07, - "loss": 0.8965, + "learning_rate": 4.6617116994514764e-07, + "loss": 0.8166, "step": 31949 }, { - "epoch": 0.9066401816118048, + "epoch": 0.9053812802856415, "grad_norm": 0.0, - "learning_rate": 4.536856668367806e-07, - "loss": 0.8159, + "learning_rate": 4.6589425499823703e-07, + "loss": 0.7672, "step": 31950 }, { - "epoch": 0.9066685584562997, + "epoch": 0.9054096177279038, "grad_norm": 0.0, - "learning_rate": 4.5341201498708464e-07, - "loss": 0.7668, + "learning_rate": 4.6561742036063896e-07, + "loss": 0.7451, "step": 31951 }, { - "epoch": 0.9066969353007945, + "epoch": 0.9054379551701663, "grad_norm": 0.0, - "learning_rate": 4.5313844377784413e-07, - "loss": 0.9172, + "learning_rate": 4.653406660346871e-07, + "loss": 0.7926, "step": 31952 }, { - "epoch": 0.9067253121452894, + "epoch": 0.9054662926124288, "grad_norm": 0.0, - "learning_rate": 4.528649532113727e-07, - "loss": 0.7153, + "learning_rate": 4.650639920227096e-07, + "loss": 0.8558, "step": 31953 }, { - "epoch": 0.9067536889897844, + "epoch": 0.9054946300546912, "grad_norm": 0.0, - "learning_rate": 4.5259154328998077e-07, - "loss": 0.623, + "learning_rate": 4.6478739832704123e-07, + "loss": 0.7577, "step": 31954 }, { - "epoch": 0.9067820658342792, + "epoch": 0.9055229674969537, "grad_norm": 0.0, - "learning_rate": 4.523182140159765e-07, - "loss": 0.8066, + "learning_rate": 4.6451088495000685e-07, + "loss": 0.7798, "step": 31955 }, { - "epoch": 0.9068104426787741, + "epoch": 0.9055513049392162, "grad_norm": 0.0, - "learning_rate": 4.520449653916692e-07, - "loss": 0.7312, + "learning_rate": 4.64234451893939e-07, + "loss": 0.8313, "step": 31956 }, { - "epoch": 0.9068388195232691, + "epoch": 0.9055796423814787, "grad_norm": 0.0, - "learning_rate": 4.5177179741936917e-07, - "loss": 0.8661, + "learning_rate": 4.63958099161167e-07, + "loss": 0.6499, "step": 31957 }, { - "epoch": 0.9068671963677639, + "epoch": 0.9056079798237411, "grad_norm": 0.0, - "learning_rate": 4.514987101013801e-07, - "loss": 0.8707, + "learning_rate": 4.636818267540144e-07, + "loss": 0.8158, "step": 31958 }, { - "epoch": 0.9068955732122588, + "epoch": 0.9056363172660036, "grad_norm": 0.0, - "learning_rate": 4.5122570344001136e-07, - "loss": 0.741, + "learning_rate": 4.634056346748117e-07, + "loss": 0.8566, "step": 31959 }, { - "epoch": 0.9069239500567536, + "epoch": 0.9056646547082661, "grad_norm": 0.0, - "learning_rate": 4.509527774375688e-07, - "loss": 0.8457, + "learning_rate": 4.6312952292588696e-07, + "loss": 0.8207, "step": 31960 }, { - "epoch": 0.9069523269012486, + "epoch": 0.9056929921505285, "grad_norm": 0.0, - "learning_rate": 4.5067993209635617e-07, - "loss": 0.8396, + "learning_rate": 4.6285349150956174e-07, + "loss": 0.7572, "step": 31961 }, { - "epoch": 0.9069807037457435, + "epoch": 0.905721329592791, "grad_norm": 0.0, - "learning_rate": 4.504071674186805e-07, - "loss": 0.8767, + "learning_rate": 4.6257754042816295e-07, + "loss": 0.8283, "step": 31962 }, { - "epoch": 0.9070090805902383, + "epoch": 0.9057496670350534, "grad_norm": 0.0, - "learning_rate": 4.501344834068444e-07, - "loss": 0.7731, + "learning_rate": 4.6230166968401433e-07, + "loss": 0.8682, "step": 31963 }, { - "epoch": 0.9070374574347333, + "epoch": 0.9057780044773158, "grad_norm": 0.0, - "learning_rate": 4.4986188006315157e-07, - "loss": 0.8615, + "learning_rate": 4.620258792794419e-07, + "loss": 0.7704, "step": 31964 }, { - "epoch": 0.9070658342792282, + "epoch": 0.9058063419195783, "grad_norm": 0.0, - "learning_rate": 4.495893573899035e-07, - "loss": 0.6938, + "learning_rate": 4.6175016921676806e-07, + "loss": 0.7909, "step": 31965 }, { - "epoch": 0.907094211123723, + "epoch": 0.9058346793618408, "grad_norm": 0.0, - "learning_rate": 4.49316915389405e-07, - "loss": 0.7275, + "learning_rate": 4.614745394983133e-07, + "loss": 0.7765, "step": 31966 }, { - "epoch": 0.907122587968218, + "epoch": 0.9058630168041033, "grad_norm": 0.0, - "learning_rate": 4.4904455406395544e-07, - "loss": 0.7841, + "learning_rate": 4.6119899012640137e-07, + "loss": 0.8726, "step": 31967 }, { - "epoch": 0.9071509648127128, + "epoch": 0.9058913542463657, "grad_norm": 0.0, - "learning_rate": 4.487722734158573e-07, - "loss": 0.7981, + "learning_rate": 4.6092352110335135e-07, + "loss": 0.8357, "step": 31968 }, { - "epoch": 0.9071793416572077, + "epoch": 0.9059196916886282, "grad_norm": 0.0, - "learning_rate": 4.4850007344740765e-07, - "loss": 0.8396, + "learning_rate": 4.6064813243148487e-07, + "loss": 0.7523, "step": 31969 }, { - "epoch": 0.9072077185017026, + "epoch": 0.9059480291308907, "grad_norm": 0.0, - "learning_rate": 4.482279541609069e-07, - "loss": 0.8311, + "learning_rate": 4.6037282411312333e-07, + "loss": 0.7756, "step": 31970 }, { - "epoch": 0.9072360953461975, + "epoch": 0.9059763665731531, "grad_norm": 0.0, - "learning_rate": 4.479559155586555e-07, - "loss": 0.7697, + "learning_rate": 4.6009759615058156e-07, + "loss": 0.7563, "step": 31971 }, { - "epoch": 0.9072644721906924, + "epoch": 0.9060047040154156, "grad_norm": 0.0, - "learning_rate": 4.4768395764294814e-07, - "loss": 0.7911, + "learning_rate": 4.5982244854618107e-07, + "loss": 0.7118, "step": 31972 }, { - "epoch": 0.9072928490351873, + "epoch": 0.906033041457678, "grad_norm": 0.0, - "learning_rate": 4.474120804160842e-07, - "loss": 0.73, + "learning_rate": 4.5954738130224e-07, + "loss": 0.7927, "step": 31973 }, { - "epoch": 0.9073212258796822, + "epoch": 0.9060613788999405, "grad_norm": 0.0, - "learning_rate": 4.4714028388036066e-07, - "loss": 0.8204, + "learning_rate": 4.5927239442107306e-07, + "loss": 0.9189, "step": 31974 }, { - "epoch": 0.9073496027241771, + "epoch": 0.9060897163422029, "grad_norm": 0.0, - "learning_rate": 4.4686856803807246e-07, - "loss": 0.8979, + "learning_rate": 4.5899748790499743e-07, + "loss": 0.715, "step": 31975 }, { - "epoch": 0.9073779795686719, + "epoch": 0.9061180537844654, "grad_norm": 0.0, - "learning_rate": 4.465969328915143e-07, - "loss": 0.6613, + "learning_rate": 4.5872266175632783e-07, + "loss": 0.7553, "step": 31976 }, { - "epoch": 0.9074063564131668, + "epoch": 0.9061463912267279, "grad_norm": 0.0, - "learning_rate": 4.463253784429822e-07, - "loss": 0.8432, + "learning_rate": 4.5844791597738135e-07, + "loss": 0.9398, "step": 31977 }, { - "epoch": 0.9074347332576618, + "epoch": 0.9061747286689903, "grad_norm": 0.0, - "learning_rate": 4.4605390469476763e-07, - "loss": 0.708, + "learning_rate": 4.581732505704728e-07, + "loss": 0.6522, "step": 31978 }, { - "epoch": 0.9074631101021566, + "epoch": 0.9062030661112528, "grad_norm": 0.0, - "learning_rate": 4.4578251164916654e-07, - "loss": 0.791, + "learning_rate": 4.5789866553791253e-07, + "loss": 0.8813, "step": 31979 }, { - "epoch": 0.9074914869466515, + "epoch": 0.9062314035535153, "grad_norm": 0.0, - "learning_rate": 4.455111993084693e-07, - "loss": 0.793, + "learning_rate": 4.5762416088201535e-07, + "loss": 0.8475, "step": 31980 }, { - "epoch": 0.9075198637911465, + "epoch": 0.9062597409957778, "grad_norm": 0.0, - "learning_rate": 4.452399676749675e-07, - "loss": 0.888, + "learning_rate": 4.5734973660509387e-07, + "loss": 0.93, "step": 31981 }, { - "epoch": 0.9075482406356413, + "epoch": 0.9062880784380402, "grad_norm": 0.0, - "learning_rate": 4.4496881675095473e-07, - "loss": 0.8062, + "learning_rate": 4.5707539270945847e-07, + "loss": 0.7831, "step": 31982 }, { - "epoch": 0.9075766174801362, + "epoch": 0.9063164158803027, "grad_norm": 0.0, - "learning_rate": 4.4469774653872035e-07, - "loss": 0.889, + "learning_rate": 4.568011291974228e-07, + "loss": 0.8505, "step": 31983 }, { - "epoch": 0.9076049943246312, + "epoch": 0.9063447533225651, "grad_norm": 0.0, - "learning_rate": 4.444267570405525e-07, - "loss": 0.8501, + "learning_rate": 4.5652694607129287e-07, + "loss": 0.8261, "step": 31984 }, { - "epoch": 0.907633371169126, + "epoch": 0.9063730907648275, "grad_norm": 0.0, - "learning_rate": 4.4415584825874157e-07, - "loss": 0.8059, + "learning_rate": 4.5625284333338017e-07, + "loss": 0.8101, "step": 31985 }, { - "epoch": 0.9076617480136209, + "epoch": 0.90640142820709, "grad_norm": 0.0, - "learning_rate": 4.4388502019557686e-07, - "loss": 0.7628, + "learning_rate": 4.55978820985995e-07, + "loss": 0.8187, "step": 31986 }, { - "epoch": 0.9076901248581157, + "epoch": 0.9064297656493525, "grad_norm": 0.0, - "learning_rate": 4.4361427285334326e-07, - "loss": 0.8773, + "learning_rate": 4.5570487903144335e-07, + "loss": 0.8209, "step": 31987 }, { - "epoch": 0.9077185017026107, + "epoch": 0.9064581030916149, "grad_norm": 0.0, - "learning_rate": 4.433436062343299e-07, - "loss": 0.7652, + "learning_rate": 4.5543101747203334e-07, + "loss": 0.8128, "step": 31988 }, { - "epoch": 0.9077468785471056, + "epoch": 0.9064864405338774, "grad_norm": 0.0, - "learning_rate": 4.43073020340824e-07, - "loss": 0.8287, + "learning_rate": 4.551572363100731e-07, + "loss": 0.8141, "step": 31989 }, { - "epoch": 0.9077752553916004, + "epoch": 0.9065147779761399, "grad_norm": 0.0, - "learning_rate": 4.4280251517510917e-07, - "loss": 0.8086, + "learning_rate": 4.5488353554786644e-07, + "loss": 0.768, "step": 31990 }, { - "epoch": 0.9078036322360954, + "epoch": 0.9065431154184024, "grad_norm": 0.0, - "learning_rate": 4.4253209073947034e-07, - "loss": 0.7722, + "learning_rate": 4.546099151877226e-07, + "loss": 0.834, "step": 31991 }, { - "epoch": 0.9078320090805903, + "epoch": 0.9065714528606648, "grad_norm": 0.0, - "learning_rate": 4.422617470361945e-07, - "loss": 0.7628, + "learning_rate": 4.543363752319419e-07, + "loss": 0.9072, "step": 31992 }, { - "epoch": 0.9078603859250851, + "epoch": 0.9065997903029273, "grad_norm": 0.0, - "learning_rate": 4.41991484067561e-07, - "loss": 0.884, + "learning_rate": 4.5406291568283134e-07, + "loss": 0.8304, "step": 31993 }, { - "epoch": 0.90788876276958, + "epoch": 0.9066281277451897, "grad_norm": 0.0, - "learning_rate": 4.417213018358579e-07, - "loss": 0.8037, + "learning_rate": 4.5378953654269475e-07, + "loss": 0.7298, "step": 31994 }, { - "epoch": 0.9079171396140749, + "epoch": 0.9066564651874521, "grad_norm": 0.0, - "learning_rate": 4.414512003433624e-07, - "loss": 0.8706, + "learning_rate": 4.535162378138325e-07, + "loss": 0.8835, "step": 31995 }, { - "epoch": 0.9079455164585698, + "epoch": 0.9066848026297146, "grad_norm": 0.0, - "learning_rate": 4.4118117959235706e-07, - "loss": 0.7527, + "learning_rate": 4.5324301949854935e-07, + "loss": 0.704, "step": 31996 }, { - "epoch": 0.9079738933030647, + "epoch": 0.9067131400719771, "grad_norm": 0.0, - "learning_rate": 4.409112395851267e-07, - "loss": 0.8399, + "learning_rate": 4.529698815991446e-07, + "loss": 0.7117, "step": 31997 }, { - "epoch": 0.9080022701475596, + "epoch": 0.9067414775142396, "grad_norm": 0.0, - "learning_rate": 4.406413803239473e-07, - "loss": 0.789, + "learning_rate": 4.526968241179186e-07, + "loss": 0.7693, "step": 31998 }, { - "epoch": 0.9080306469920545, + "epoch": 0.906769814956502, "grad_norm": 0.0, - "learning_rate": 4.403716018111004e-07, - "loss": 0.8851, + "learning_rate": 4.5242384705717404e-07, + "loss": 0.8011, "step": 31999 }, { - "epoch": 0.9080590238365494, + "epoch": 0.9067981523987645, "grad_norm": 0.0, - "learning_rate": 4.401019040488652e-07, - "loss": 0.7789, + "learning_rate": 4.5215095041920787e-07, + "loss": 0.8143, "step": 32000 }, { - "epoch": 0.9080874006810443, + "epoch": 0.906826489841027, "grad_norm": 0.0, - "learning_rate": 4.3983228703951777e-07, - "loss": 0.7342, + "learning_rate": 4.5187813420631944e-07, + "loss": 0.82, "step": 32001 }, { - "epoch": 0.9081157775255392, + "epoch": 0.9068548272832894, "grad_norm": 0.0, - "learning_rate": 4.3956275078533615e-07, - "loss": 0.8101, + "learning_rate": 4.5160539842080797e-07, + "loss": 0.9413, "step": 32002 }, { - "epoch": 0.908144154370034, + "epoch": 0.9068831647255519, "grad_norm": 0.0, - "learning_rate": 4.3929329528859974e-07, - "loss": 0.8604, + "learning_rate": 4.5133274306496944e-07, + "loss": 0.7793, "step": 32003 }, { - "epoch": 0.9081725312145289, + "epoch": 0.9069115021678144, "grad_norm": 0.0, - "learning_rate": 4.390239205515812e-07, - "loss": 0.8509, + "learning_rate": 4.5106016814110197e-07, + "loss": 0.7582, "step": 32004 }, { - "epoch": 0.9082009080590239, + "epoch": 0.9069398396100768, "grad_norm": 0.0, - "learning_rate": 4.387546265765574e-07, - "loss": 0.8934, + "learning_rate": 4.507876736514993e-07, + "loss": 0.7489, "step": 32005 }, { - "epoch": 0.9082292849035187, + "epoch": 0.9069681770523392, "grad_norm": 0.0, - "learning_rate": 4.384854133658045e-07, - "loss": 0.8448, + "learning_rate": 4.505152595984585e-07, + "loss": 0.7941, "step": 32006 }, { - "epoch": 0.9082576617480136, + "epoch": 0.9069965144946017, "grad_norm": 0.0, - "learning_rate": 4.3821628092159394e-07, - "loss": 0.8215, + "learning_rate": 4.502429259842744e-07, + "loss": 0.8187, "step": 32007 }, { - "epoch": 0.9082860385925086, + "epoch": 0.9070248519368642, "grad_norm": 0.0, - "learning_rate": 4.379472292461995e-07, - "loss": 0.8047, + "learning_rate": 4.499706728112396e-07, + "loss": 0.8319, "step": 32008 }, { - "epoch": 0.9083144154370034, + "epoch": 0.9070531893791266, "grad_norm": 0.0, - "learning_rate": 4.3767825834189703e-07, - "loss": 0.6979, + "learning_rate": 4.4969850008164897e-07, + "loss": 0.8311, "step": 32009 }, { - "epoch": 0.9083427922814983, + "epoch": 0.9070815268213891, "grad_norm": 0.0, - "learning_rate": 4.374093682109537e-07, - "loss": 0.861, + "learning_rate": 4.494264077977939e-07, + "loss": 0.8461, "step": 32010 }, { - "epoch": 0.9083711691259931, + "epoch": 0.9071098642636516, "grad_norm": 0.0, - "learning_rate": 4.3714055885564433e-07, - "loss": 0.787, + "learning_rate": 4.491543959619671e-07, + "loss": 0.8142, "step": 32011 }, { - "epoch": 0.9083995459704881, + "epoch": 0.907138201705914, "grad_norm": 0.0, - "learning_rate": 4.368718302782382e-07, - "loss": 0.7497, + "learning_rate": 4.4888246457646e-07, + "loss": 0.7962, "step": 32012 }, { - "epoch": 0.908427922814983, + "epoch": 0.9071665391481765, "grad_norm": 0.0, - "learning_rate": 4.3660318248100575e-07, - "loss": 0.8012, + "learning_rate": 4.4861061364356086e-07, + "loss": 0.6748, "step": 32013 }, { - "epoch": 0.9084562996594778, + "epoch": 0.907194876590439, "grad_norm": 0.0, - "learning_rate": 4.3633461546621737e-07, - "loss": 0.7728, + "learning_rate": 4.483388431655611e-07, + "loss": 0.7438, "step": 32014 }, { - "epoch": 0.9084846765039728, + "epoch": 0.9072232140327015, "grad_norm": 0.0, - "learning_rate": 4.36066129236139e-07, - "loss": 0.8416, + "learning_rate": 4.480671531447511e-07, + "loss": 0.8347, "step": 32015 }, { - "epoch": 0.9085130533484677, + "epoch": 0.9072515514749638, "grad_norm": 0.0, - "learning_rate": 4.3579772379303997e-07, - "loss": 0.7862, + "learning_rate": 4.4779554358341803e-07, + "loss": 0.7319, "step": 32016 }, { - "epoch": 0.9085414301929625, + "epoch": 0.9072798889172263, "grad_norm": 0.0, - "learning_rate": 4.3552939913918845e-07, - "loss": 0.8378, + "learning_rate": 4.4752401448384997e-07, + "loss": 0.7805, "step": 32017 }, { - "epoch": 0.9085698070374575, + "epoch": 0.9073082263594888, "grad_norm": 0.0, - "learning_rate": 4.3526115527684933e-07, - "loss": 0.6988, + "learning_rate": 4.472525658483362e-07, + "loss": 0.8318, "step": 32018 }, { - "epoch": 0.9085981838819523, + "epoch": 0.9073365638017512, "grad_norm": 0.0, - "learning_rate": 4.349929922082896e-07, - "loss": 0.8493, + "learning_rate": 4.469811976791605e-07, + "loss": 0.8062, "step": 32019 }, { - "epoch": 0.9086265607264472, + "epoch": 0.9073649012440137, "grad_norm": 0.0, - "learning_rate": 4.347249099357742e-07, - "loss": 0.7967, + "learning_rate": 4.467099099786099e-07, + "loss": 0.933, "step": 32020 }, { - "epoch": 0.9086549375709421, + "epoch": 0.9073932386862762, "grad_norm": 0.0, - "learning_rate": 4.3445690846156683e-07, - "loss": 0.8094, + "learning_rate": 4.4643870274896805e-07, + "loss": 0.8487, "step": 32021 }, { - "epoch": 0.908683314415437, + "epoch": 0.9074215761285387, "grad_norm": 0.0, - "learning_rate": 4.341889877879313e-07, - "loss": 0.8475, + "learning_rate": 4.4616757599252104e-07, + "loss": 0.8734, "step": 32022 }, { - "epoch": 0.9087116912599319, + "epoch": 0.9074499135708011, "grad_norm": 0.0, - "learning_rate": 4.339211479171335e-07, - "loss": 0.8484, + "learning_rate": 4.458965297115536e-07, + "loss": 0.6697, "step": 32023 }, { - "epoch": 0.9087400681044268, + "epoch": 0.9074782510130636, "grad_norm": 0.0, - "learning_rate": 4.3365338885143273e-07, - "loss": 0.841, + "learning_rate": 4.456255639083462e-07, + "loss": 0.8787, "step": 32024 }, { - "epoch": 0.9087684449489217, + "epoch": 0.9075065884553261, "grad_norm": 0.0, - "learning_rate": 4.3338571059309056e-07, - "loss": 0.7736, + "learning_rate": 4.4535467858518254e-07, + "loss": 0.8048, "step": 32025 }, { - "epoch": 0.9087968217934166, + "epoch": 0.9075349258975884, "grad_norm": 0.0, - "learning_rate": 4.3311811314437067e-07, - "loss": 0.7869, + "learning_rate": 4.4508387374434416e-07, + "loss": 0.8081, "step": 32026 }, { - "epoch": 0.9088251986379114, + "epoch": 0.9075632633398509, "grad_norm": 0.0, - "learning_rate": 4.3285059650753135e-07, - "loss": 0.8852, + "learning_rate": 4.448131493881147e-07, + "loss": 0.8078, "step": 32027 }, { - "epoch": 0.9088535754824063, + "epoch": 0.9075916007821134, "grad_norm": 0.0, - "learning_rate": 4.325831606848352e-07, - "loss": 0.7766, + "learning_rate": 4.445425055187702e-07, + "loss": 0.7677, "step": 32028 }, { - "epoch": 0.9088819523269013, + "epoch": 0.9076199382243759, "grad_norm": 0.0, - "learning_rate": 4.3231580567853705e-07, - "loss": 0.7466, + "learning_rate": 4.4427194213859216e-07, + "loss": 0.8049, "step": 32029 }, { - "epoch": 0.9089103291713961, + "epoch": 0.9076482756666383, "grad_norm": 0.0, - "learning_rate": 4.3204853149089733e-07, - "loss": 0.8268, + "learning_rate": 4.4400145924986096e-07, + "loss": 0.807, "step": 32030 }, { - "epoch": 0.908938706015891, + "epoch": 0.9076766131089008, "grad_norm": 0.0, - "learning_rate": 4.3178133812417535e-07, - "loss": 0.8028, + "learning_rate": 4.4373105685485475e-07, + "loss": 0.7139, "step": 32031 }, { - "epoch": 0.908967082860386, + "epoch": 0.9077049505511633, "grad_norm": 0.0, - "learning_rate": 4.3151422558062595e-07, - "loss": 0.8184, + "learning_rate": 4.434607349558495e-07, + "loss": 0.8195, "step": 32032 }, { - "epoch": 0.9089954597048808, + "epoch": 0.9077332879934257, "grad_norm": 0.0, - "learning_rate": 4.3124719386250515e-07, - "loss": 0.7209, + "learning_rate": 4.4319049355512345e-07, + "loss": 0.8352, "step": 32033 }, { - "epoch": 0.9090238365493757, + "epoch": 0.9077616254356882, "grad_norm": 0.0, - "learning_rate": 4.309802429720711e-07, - "loss": 0.8292, + "learning_rate": 4.429203326549525e-07, + "loss": 0.7426, "step": 32034 }, { - "epoch": 0.9090522133938707, + "epoch": 0.9077899628779507, "grad_norm": 0.0, - "learning_rate": 4.3071337291157535e-07, - "loss": 0.8379, + "learning_rate": 4.426502522576126e-07, + "loss": 0.8008, "step": 32035 }, { - "epoch": 0.9090805902383655, + "epoch": 0.907818300320213, "grad_norm": 0.0, - "learning_rate": 4.3044658368327383e-07, - "loss": 0.8322, + "learning_rate": 4.423802523653797e-07, + "loss": 0.7, "step": 32036 }, { - "epoch": 0.9091089670828604, + "epoch": 0.9078466377624755, "grad_norm": 0.0, - "learning_rate": 4.301798752894215e-07, - "loss": 0.8069, + "learning_rate": 4.421103329805254e-07, + "loss": 0.8284, "step": 32037 }, { - "epoch": 0.9091373439273552, + "epoch": 0.907874975204738, "grad_norm": 0.0, - "learning_rate": 4.299132477322698e-07, - "loss": 0.7182, + "learning_rate": 4.4184049410532557e-07, + "loss": 0.9196, "step": 32038 }, { - "epoch": 0.9091657207718502, + "epoch": 0.9079033126470005, "grad_norm": 0.0, - "learning_rate": 4.2964670101407037e-07, - "loss": 0.8797, + "learning_rate": 4.4157073574205176e-07, + "loss": 0.7228, "step": 32039 }, { - "epoch": 0.9091940976163451, + "epoch": 0.9079316500892629, "grad_norm": 0.0, - "learning_rate": 4.2938023513707683e-07, - "loss": 0.7064, + "learning_rate": 4.4130105789297775e-07, + "loss": 0.8678, "step": 32040 }, { - "epoch": 0.9092224744608399, + "epoch": 0.9079599875315254, "grad_norm": 0.0, - "learning_rate": 4.2911385010353744e-07, - "loss": 0.7981, + "learning_rate": 4.4103146056037606e-07, + "loss": 0.7676, "step": 32041 }, { - "epoch": 0.9092508513053349, + "epoch": 0.9079883249737879, "grad_norm": 0.0, - "learning_rate": 4.2884754591570267e-07, - "loss": 0.8558, + "learning_rate": 4.4076194374651384e-07, + "loss": 0.7428, "step": 32042 }, { - "epoch": 0.9092792281498298, + "epoch": 0.9080166624160503, "grad_norm": 0.0, - "learning_rate": 4.28581322575824e-07, - "loss": 0.8775, + "learning_rate": 4.404925074536637e-07, + "loss": 0.7764, "step": 32043 }, { - "epoch": 0.9093076049943246, + "epoch": 0.9080449998583128, "grad_norm": 0.0, - "learning_rate": 4.283151800861485e-07, - "loss": 0.8826, + "learning_rate": 4.402231516840971e-07, + "loss": 0.8489, "step": 32044 }, { - "epoch": 0.9093359818388195, + "epoch": 0.9080733373005753, "grad_norm": 0.0, - "learning_rate": 4.280491184489266e-07, - "loss": 0.889, + "learning_rate": 4.3995387644007904e-07, + "loss": 0.6994, "step": 32045 }, { - "epoch": 0.9093643586833144, + "epoch": 0.9081016747428378, "grad_norm": 0.0, - "learning_rate": 4.277831376664032e-07, - "loss": 0.7959, + "learning_rate": 4.3968468172387866e-07, + "loss": 0.8172, "step": 32046 }, { - "epoch": 0.9093927355278093, + "epoch": 0.9081300121851001, "grad_norm": 0.0, - "learning_rate": 4.2751723774082543e-07, - "loss": 0.734, + "learning_rate": 4.3941556753776646e-07, + "loss": 0.7691, "step": 32047 }, { - "epoch": 0.9094211123723042, + "epoch": 0.9081583496273626, "grad_norm": 0.0, - "learning_rate": 4.2725141867444143e-07, - "loss": 0.7906, + "learning_rate": 4.391465338840062e-07, + "loss": 0.7777, "step": 32048 }, { - "epoch": 0.9094494892167991, + "epoch": 0.9081866870696251, "grad_norm": 0.0, - "learning_rate": 4.2698568046949383e-07, - "loss": 0.8543, + "learning_rate": 4.3887758076486597e-07, + "loss": 0.7858, "step": 32049 }, { - "epoch": 0.909477866061294, + "epoch": 0.9082150245118875, "grad_norm": 0.0, - "learning_rate": 4.267200231282276e-07, - "loss": 0.7293, + "learning_rate": 4.386087081826085e-07, + "loss": 0.8106, "step": 32050 }, { - "epoch": 0.9095062429057889, + "epoch": 0.90824336195415, "grad_norm": 0.0, - "learning_rate": 4.264544466528897e-07, - "loss": 0.7686, + "learning_rate": 4.383399161395008e-07, + "loss": 0.6837, "step": 32051 }, { - "epoch": 0.9095346197502838, + "epoch": 0.9082716993964125, "grad_norm": 0.0, - "learning_rate": 4.2618895104572065e-07, - "loss": 0.6962, + "learning_rate": 4.3807120463780774e-07, + "loss": 0.8506, "step": 32052 }, { - "epoch": 0.9095629965947787, + "epoch": 0.908300036838675, "grad_norm": 0.0, - "learning_rate": 4.259235363089631e-07, - "loss": 0.7646, + "learning_rate": 4.378025736797897e-07, + "loss": 0.7967, "step": 32053 }, { - "epoch": 0.9095913734392735, + "epoch": 0.9083283742809374, "grad_norm": 0.0, - "learning_rate": 4.2565820244486075e-07, - "loss": 0.7478, + "learning_rate": 4.375340232677139e-07, + "loss": 0.8411, "step": 32054 }, { - "epoch": 0.9096197502837684, + "epoch": 0.9083567117231999, "grad_norm": 0.0, - "learning_rate": 4.2539294945565415e-07, - "loss": 0.809, + "learning_rate": 4.372655534038384e-07, + "loss": 0.7683, "step": 32055 }, { - "epoch": 0.9096481271282634, + "epoch": 0.9083850491654624, "grad_norm": 0.0, - "learning_rate": 4.251277773435825e-07, - "loss": 0.7354, + "learning_rate": 4.369971640904269e-07, + "loss": 0.7267, "step": 32056 }, { - "epoch": 0.9096765039727582, + "epoch": 0.9084133866077247, "grad_norm": 0.0, - "learning_rate": 4.248626861108873e-07, - "loss": 0.7802, + "learning_rate": 4.3672885532973997e-07, + "loss": 0.8124, "step": 32057 }, { - "epoch": 0.9097048808172531, + "epoch": 0.9084417240499872, "grad_norm": 0.0, - "learning_rate": 4.2459767575980694e-07, - "loss": 0.7786, + "learning_rate": 4.3646062712403567e-07, + "loss": 0.758, "step": 32058 }, { - "epoch": 0.9097332576617481, + "epoch": 0.9084700614922497, "grad_norm": 0.0, - "learning_rate": 4.243327462925828e-07, - "loss": 0.9494, + "learning_rate": 4.361924794755745e-07, + "loss": 0.8134, "step": 32059 }, { - "epoch": 0.9097616345062429, + "epoch": 0.9084983989345121, "grad_norm": 0.0, - "learning_rate": 4.2406789771144876e-07, - "loss": 0.772, + "learning_rate": 4.35924412386618e-07, + "loss": 0.8308, "step": 32060 }, { - "epoch": 0.9097900113507378, + "epoch": 0.9085267363767746, "grad_norm": 0.0, - "learning_rate": 4.238031300186429e-07, - "loss": 0.777, + "learning_rate": 4.356564258594198e-07, + "loss": 0.7621, "step": 32061 }, { - "epoch": 0.9098183881952326, + "epoch": 0.9085550738190371, "grad_norm": 0.0, - "learning_rate": 4.235384432164047e-07, - "loss": 0.7256, + "learning_rate": 4.3538851989624044e-07, + "loss": 0.8879, "step": 32062 }, { - "epoch": 0.9098467650397276, + "epoch": 0.9085834112612996, "grad_norm": 0.0, - "learning_rate": 4.232738373069656e-07, - "loss": 0.8195, + "learning_rate": 4.3512069449933357e-07, + "loss": 0.8831, "step": 32063 }, { - "epoch": 0.9098751418842225, + "epoch": 0.908611748703562, "grad_norm": 0.0, - "learning_rate": 4.230093122925638e-07, - "loss": 0.773, + "learning_rate": 4.3485294967095747e-07, + "loss": 0.8358, "step": 32064 }, { - "epoch": 0.9099035187287173, + "epoch": 0.9086400861458245, "grad_norm": 0.0, - "learning_rate": 4.2274486817543314e-07, - "loss": 0.8252, + "learning_rate": 4.34585285413367e-07, + "loss": 0.7923, "step": 32065 }, { - "epoch": 0.9099318955732123, + "epoch": 0.908668423588087, "grad_norm": 0.0, - "learning_rate": 4.2248050495780737e-07, - "loss": 0.9028, + "learning_rate": 4.343177017288158e-07, + "loss": 0.7519, "step": 32066 }, { - "epoch": 0.9099602724177072, + "epoch": 0.9086967610303494, "grad_norm": 0.0, - "learning_rate": 4.22216222641918e-07, - "loss": 0.8006, + "learning_rate": 4.3405019861956e-07, + "loss": 0.8231, "step": 32067 }, { - "epoch": 0.909988649262202, + "epoch": 0.9087250984726118, "grad_norm": 0.0, - "learning_rate": 4.219520212299999e-07, - "loss": 0.7729, + "learning_rate": 4.337827760878521e-07, + "loss": 0.8826, "step": 32068 }, { - "epoch": 0.9100170261066969, + "epoch": 0.9087534359148743, "grad_norm": 0.0, - "learning_rate": 4.2168790072428354e-07, - "loss": 0.6617, + "learning_rate": 4.3351543413594263e-07, + "loss": 0.6996, "step": 32069 }, { - "epoch": 0.9100454029511919, + "epoch": 0.9087817733571368, "grad_norm": 0.0, - "learning_rate": 4.214238611269994e-07, - "loss": 0.6993, + "learning_rate": 4.3324817276608755e-07, + "loss": 0.8027, "step": 32070 }, { - "epoch": 0.9100737797956867, + "epoch": 0.9088101107993992, "grad_norm": 0.0, - "learning_rate": 4.2115990244037895e-07, - "loss": 0.7186, + "learning_rate": 4.329809919805328e-07, + "loss": 0.8774, "step": 32071 }, { - "epoch": 0.9101021566401816, + "epoch": 0.9088384482416617, "grad_norm": 0.0, - "learning_rate": 4.2089602466665045e-07, - "loss": 0.7501, + "learning_rate": 4.327138917815332e-07, + "loss": 0.8176, "step": 32072 }, { - "epoch": 0.9101305334846765, + "epoch": 0.9088667856839242, "grad_norm": 0.0, - "learning_rate": 4.2063222780804435e-07, - "loss": 0.803, + "learning_rate": 4.3244687217133816e-07, + "loss": 0.8358, "step": 32073 }, { - "epoch": 0.9101589103291714, + "epoch": 0.9088951231261866, "grad_norm": 0.0, - "learning_rate": 4.203685118667888e-07, - "loss": 0.8168, + "learning_rate": 4.3217993315219363e-07, + "loss": 0.7641, "step": 32074 }, { - "epoch": 0.9101872871736663, + "epoch": 0.9089234605684491, "grad_norm": 0.0, - "learning_rate": 4.2010487684511105e-07, - "loss": 0.6974, + "learning_rate": 4.3191307472635335e-07, + "loss": 0.8002, "step": 32075 }, { - "epoch": 0.9102156640181612, + "epoch": 0.9089517980107116, "grad_norm": 0.0, - "learning_rate": 4.1984132274523913e-07, - "loss": 0.895, + "learning_rate": 4.3164629689605994e-07, + "loss": 0.7376, "step": 32076 }, { - "epoch": 0.9102440408626561, + "epoch": 0.9089801354529741, "grad_norm": 0.0, - "learning_rate": 4.1957784956939697e-07, - "loss": 0.8373, + "learning_rate": 4.313795996635628e-07, + "loss": 0.6882, "step": 32077 }, { - "epoch": 0.910272417707151, + "epoch": 0.9090084728952365, "grad_norm": 0.0, - "learning_rate": 4.1931445731981045e-07, - "loss": 0.8619, + "learning_rate": 4.3111298303110895e-07, + "loss": 0.7692, "step": 32078 }, { - "epoch": 0.9103007945516458, + "epoch": 0.909036810337499, "grad_norm": 0.0, - "learning_rate": 4.1905114599870786e-07, - "loss": 0.7893, + "learning_rate": 4.308464470009432e-07, + "loss": 0.7713, "step": 32079 }, { - "epoch": 0.9103291713961408, + "epoch": 0.9090651477797614, "grad_norm": 0.0, - "learning_rate": 4.187879156083086e-07, - "loss": 0.8571, + "learning_rate": 4.305799915753117e-07, + "loss": 0.8583, "step": 32080 }, { - "epoch": 0.9103575482406356, + "epoch": 0.9090934852220238, "grad_norm": 0.0, - "learning_rate": 4.1852476615083957e-07, - "loss": 0.8191, + "learning_rate": 4.3031361675646033e-07, + "loss": 0.8797, "step": 32081 }, { - "epoch": 0.9103859250851305, + "epoch": 0.9091218226642863, "grad_norm": 0.0, - "learning_rate": 4.1826169762852365e-07, - "loss": 0.7827, + "learning_rate": 4.300473225466284e-07, + "loss": 0.7766, "step": 32082 }, { - "epoch": 0.9104143019296255, + "epoch": 0.9091501601065488, "grad_norm": 0.0, - "learning_rate": 4.1799871004358005e-07, - "loss": 0.757, + "learning_rate": 4.2978110894806415e-07, + "loss": 0.7608, "step": 32083 }, { - "epoch": 0.9104426787741203, + "epoch": 0.9091784975488112, "grad_norm": 0.0, - "learning_rate": 4.177358033982326e-07, - "loss": 0.8596, + "learning_rate": 4.295149759630057e-07, + "loss": 0.8521, "step": 32084 }, { - "epoch": 0.9104710556186152, + "epoch": 0.9092068349910737, "grad_norm": 0.0, - "learning_rate": 4.1747297769470285e-07, - "loss": 0.8733, + "learning_rate": 4.2924892359369584e-07, + "loss": 0.8221, "step": 32085 }, { - "epoch": 0.91049943246311, + "epoch": 0.9092351724333362, "grad_norm": 0.0, - "learning_rate": 4.172102329352079e-07, - "loss": 0.7896, + "learning_rate": 4.2898295184237827e-07, + "loss": 0.7771, "step": 32086 }, { - "epoch": 0.910527809307605, + "epoch": 0.9092635098755987, "grad_norm": 0.0, - "learning_rate": 4.169475691219693e-07, - "loss": 0.8368, + "learning_rate": 4.2871706071129006e-07, + "loss": 0.8413, "step": 32087 }, { - "epoch": 0.9105561861520999, + "epoch": 0.9092918473178611, "grad_norm": 0.0, - "learning_rate": 4.1668498625720423e-07, - "loss": 0.8008, + "learning_rate": 4.2845125020267273e-07, + "loss": 0.8154, "step": 32088 }, { - "epoch": 0.9105845629965947, + "epoch": 0.9093201847601236, "grad_norm": 0.0, - "learning_rate": 4.16422484343133e-07, - "loss": 0.8463, + "learning_rate": 4.2818552031876457e-07, + "loss": 0.8534, "step": 32089 }, { - "epoch": 0.9106129398410897, + "epoch": 0.909348522202386, "grad_norm": 0.0, - "learning_rate": 4.161600633819718e-07, - "loss": 0.7503, + "learning_rate": 4.2791987106180486e-07, + "loss": 0.7219, "step": 32090 }, { - "epoch": 0.9106413166855846, + "epoch": 0.9093768596446484, "grad_norm": 0.0, - "learning_rate": 4.158977233759387e-07, - "loss": 0.6851, + "learning_rate": 4.276543024340296e-07, + "loss": 0.8632, "step": 32091 }, { - "epoch": 0.9106696935300794, + "epoch": 0.9094051970869109, "grad_norm": 0.0, - "learning_rate": 4.1563546432724646e-07, - "loss": 0.8504, + "learning_rate": 4.273888144376759e-07, + "loss": 0.7091, "step": 32092 }, { - "epoch": 0.9106980703745744, + "epoch": 0.9094335345291734, "grad_norm": 0.0, - "learning_rate": 4.1537328623811325e-07, - "loss": 0.8412, + "learning_rate": 4.271234070749819e-07, + "loss": 0.7799, "step": 32093 }, { - "epoch": 0.9107264472190693, + "epoch": 0.9094618719714359, "grad_norm": 0.0, - "learning_rate": 4.15111189110754e-07, - "loss": 0.8667, + "learning_rate": 4.2685808034818366e-07, + "loss": 0.8703, "step": 32094 }, { - "epoch": 0.9107548240635641, + "epoch": 0.9094902094136983, "grad_norm": 0.0, - "learning_rate": 4.1484917294737914e-07, - "loss": 0.8354, + "learning_rate": 4.265928342595127e-07, + "loss": 0.7442, "step": 32095 }, { - "epoch": 0.910783200908059, + "epoch": 0.9095185468559608, "grad_norm": 0.0, - "learning_rate": 4.145872377502047e-07, - "loss": 0.8192, + "learning_rate": 4.2632766881120614e-07, + "loss": 0.7079, "step": 32096 }, { - "epoch": 0.910811577752554, + "epoch": 0.9095468842982233, "grad_norm": 0.0, - "learning_rate": 4.1432538352144445e-07, - "loss": 0.8064, + "learning_rate": 4.260625840054977e-07, + "loss": 0.8269, "step": 32097 }, { - "epoch": 0.9108399545970488, + "epoch": 0.9095752217404857, "grad_norm": 0.0, - "learning_rate": 4.1406361026330666e-07, - "loss": 0.8593, + "learning_rate": 4.257975798446179e-07, + "loss": 0.841, "step": 32098 }, { - "epoch": 0.9108683314415437, + "epoch": 0.9096035591827482, "grad_norm": 0.0, - "learning_rate": 4.138019179780062e-07, - "loss": 0.7154, + "learning_rate": 4.2553265633080154e-07, + "loss": 0.8055, "step": 32099 }, { - "epoch": 0.9108967082860386, + "epoch": 0.9096318966250106, "grad_norm": 0.0, - "learning_rate": 4.1354030666775015e-07, - "loss": 0.8508, + "learning_rate": 4.25267813466278e-07, + "loss": 0.7201, "step": 32100 }, { - "epoch": 0.9109250851305335, + "epoch": 0.909660234067273, "grad_norm": 0.0, - "learning_rate": 4.1327877633475013e-07, - "loss": 0.8066, + "learning_rate": 4.250030512532788e-07, + "loss": 0.8106, "step": 32101 }, { - "epoch": 0.9109534619750284, + "epoch": 0.9096885715095355, "grad_norm": 0.0, - "learning_rate": 4.1301732698121653e-07, - "loss": 0.7845, + "learning_rate": 4.247383696940366e-07, + "loss": 0.8034, "step": 32102 }, { - "epoch": 0.9109818388195232, + "epoch": 0.909716908951798, "grad_norm": 0.0, - "learning_rate": 4.1275595860935434e-07, - "loss": 0.8241, + "learning_rate": 4.244737687907763e-07, + "loss": 0.7889, "step": 32103 }, { - "epoch": 0.9110102156640182, + "epoch": 0.9097452463940605, "grad_norm": 0.0, - "learning_rate": 4.124946712213751e-07, - "loss": 0.8072, + "learning_rate": 4.2420924854573055e-07, + "loss": 0.8682, "step": 32104 }, { - "epoch": 0.911038592508513, + "epoch": 0.9097735838363229, "grad_norm": 0.0, - "learning_rate": 4.122334648194848e-07, - "loss": 0.8237, + "learning_rate": 4.239448089611253e-07, + "loss": 0.9388, "step": 32105 }, { - "epoch": 0.9110669693530079, + "epoch": 0.9098019212785854, "grad_norm": 0.0, - "learning_rate": 4.119723394058883e-07, - "loss": 0.7299, + "learning_rate": 4.236804500391889e-07, + "loss": 0.7594, "step": 32106 }, { - "epoch": 0.9110953461975029, + "epoch": 0.9098302587208479, "grad_norm": 0.0, - "learning_rate": 4.1171129498279285e-07, - "loss": 0.7305, + "learning_rate": 4.234161717821494e-07, + "loss": 0.7024, "step": 32107 }, { - "epoch": 0.9111237230419977, + "epoch": 0.9098585961631103, "grad_norm": 0.0, - "learning_rate": 4.114503315524043e-07, - "loss": 0.7291, + "learning_rate": 4.2315197419223073e-07, + "loss": 0.8323, "step": 32108 }, { - "epoch": 0.9111520998864926, + "epoch": 0.9098869336053728, "grad_norm": 0.0, - "learning_rate": 4.1118944911692435e-07, - "loss": 0.6702, + "learning_rate": 4.228878572716588e-07, + "loss": 0.7192, "step": 32109 }, { - "epoch": 0.9111804767309876, + "epoch": 0.9099152710476353, "grad_norm": 0.0, - "learning_rate": 4.10928647678559e-07, - "loss": 0.7991, + "learning_rate": 4.226238210226608e-07, + "loss": 0.8482, "step": 32110 }, { - "epoch": 0.9112088535754824, + "epoch": 0.9099436084898977, "grad_norm": 0.0, - "learning_rate": 4.1066792723951086e-07, - "loss": 0.8531, + "learning_rate": 4.22359865447457e-07, + "loss": 0.9164, "step": 32111 }, { - "epoch": 0.9112372304199773, + "epoch": 0.9099719459321601, "grad_norm": 0.0, - "learning_rate": 4.1040728780198047e-07, - "loss": 0.6849, + "learning_rate": 4.2209599054827465e-07, + "loss": 0.8669, "step": 32112 }, { - "epoch": 0.9112656072644721, + "epoch": 0.9100002833744226, "grad_norm": 0.0, - "learning_rate": 4.1014672936817155e-07, - "loss": 0.7463, + "learning_rate": 4.2183219632733195e-07, + "loss": 0.8775, "step": 32113 }, { - "epoch": 0.9112939841089671, + "epoch": 0.9100286208166851, "grad_norm": 0.0, - "learning_rate": 4.0988625194028464e-07, - "loss": 0.8348, + "learning_rate": 4.215684827868538e-07, + "loss": 0.7483, "step": 32114 }, { - "epoch": 0.911322360953462, + "epoch": 0.9100569582589475, "grad_norm": 0.0, - "learning_rate": 4.0962585552051905e-07, - "loss": 0.8129, + "learning_rate": 4.2130484992906285e-07, + "loss": 0.7244, "step": 32115 }, { - "epoch": 0.9113507377979568, + "epoch": 0.91008529570121, "grad_norm": 0.0, - "learning_rate": 4.093655401110741e-07, - "loss": 0.8363, + "learning_rate": 4.210412977561773e-07, + "loss": 0.7099, "step": 32116 }, { - "epoch": 0.9113791146424518, + "epoch": 0.9101136331434725, "grad_norm": 0.0, - "learning_rate": 4.0910530571415143e-07, - "loss": 0.843, + "learning_rate": 4.2077782627041764e-07, + "loss": 0.8323, "step": 32117 }, { - "epoch": 0.9114074914869467, + "epoch": 0.910141970585735, "grad_norm": 0.0, - "learning_rate": 4.0884515233194586e-07, - "loss": 0.679, + "learning_rate": 4.205144354740032e-07, + "loss": 0.7764, "step": 32118 }, { - "epoch": 0.9114358683314415, + "epoch": 0.9101703080279974, "grad_norm": 0.0, - "learning_rate": 4.0858507996665795e-07, - "loss": 0.7208, + "learning_rate": 4.202511253691521e-07, + "loss": 0.7636, "step": 32119 }, { - "epoch": 0.9114642451759364, + "epoch": 0.9101986454702599, "grad_norm": 0.0, - "learning_rate": 4.0832508862048147e-07, - "loss": 0.8206, + "learning_rate": 4.199878959580861e-07, + "loss": 0.8869, "step": 32120 }, { - "epoch": 0.9114926220204314, + "epoch": 0.9102269829125224, "grad_norm": 0.0, - "learning_rate": 4.0806517829561466e-07, - "loss": 0.8627, + "learning_rate": 4.197247472430166e-07, + "loss": 0.7502, "step": 32121 }, { - "epoch": 0.9115209988649262, + "epoch": 0.9102553203547847, "grad_norm": 0.0, - "learning_rate": 4.078053489942535e-07, - "loss": 0.717, + "learning_rate": 4.194616792261641e-07, + "loss": 0.8641, "step": 32122 }, { - "epoch": 0.9115493757094211, + "epoch": 0.9102836577970472, "grad_norm": 0.0, - "learning_rate": 4.075456007185907e-07, - "loss": 0.7943, + "learning_rate": 4.191986919097446e-07, + "loss": 0.7808, "step": 32123 }, { - "epoch": 0.911577752553916, + "epoch": 0.9103119952393097, "grad_norm": 0.0, - "learning_rate": 4.0728593347082126e-07, - "loss": 0.7621, + "learning_rate": 4.189357852959708e-07, + "loss": 0.7236, "step": 32124 }, { - "epoch": 0.9116061293984109, + "epoch": 0.9103403326815721, "grad_norm": 0.0, - "learning_rate": 4.070263472531399e-07, - "loss": 0.7871, + "learning_rate": 4.1867295938705866e-07, + "loss": 0.7813, "step": 32125 }, { - "epoch": 0.9116345062429058, + "epoch": 0.9103686701238346, "grad_norm": 0.0, - "learning_rate": 4.067668420677373e-07, - "loss": 0.8087, + "learning_rate": 4.184102141852242e-07, + "loss": 0.7328, "step": 32126 }, { - "epoch": 0.9116628830874007, + "epoch": 0.9103970075660971, "grad_norm": 0.0, - "learning_rate": 4.06507417916806e-07, - "loss": 0.7179, + "learning_rate": 4.181475496926768e-07, + "loss": 0.8053, "step": 32127 }, { - "epoch": 0.9116912599318956, + "epoch": 0.9104253450083596, "grad_norm": 0.0, - "learning_rate": 4.062480748025388e-07, - "loss": 0.8473, + "learning_rate": 4.178849659116313e-07, + "loss": 0.9826, "step": 32128 }, { - "epoch": 0.9117196367763905, + "epoch": 0.910453682450622, "grad_norm": 0.0, - "learning_rate": 4.05988812727125e-07, - "loss": 0.8888, + "learning_rate": 4.176224628442982e-07, + "loss": 0.8944, "step": 32129 }, { - "epoch": 0.9117480136208853, + "epoch": 0.9104820198928845, "grad_norm": 0.0, - "learning_rate": 4.05729631692755e-07, - "loss": 0.8212, + "learning_rate": 4.173600404928901e-07, + "loss": 0.7672, "step": 32130 }, { - "epoch": 0.9117763904653803, + "epoch": 0.910510357335147, "grad_norm": 0.0, - "learning_rate": 4.054705317016183e-07, - "loss": 0.8641, + "learning_rate": 4.170976988596165e-07, + "loss": 0.5988, "step": 32131 }, { - "epoch": 0.9118047673098751, + "epoch": 0.9105386947774093, "grad_norm": 0.0, - "learning_rate": 4.052115127559031e-07, - "loss": 0.8438, + "learning_rate": 4.1683543794668657e-07, + "loss": 0.7878, "step": 32132 }, { - "epoch": 0.91183314415437, + "epoch": 0.9105670322196718, "grad_norm": 0.0, - "learning_rate": 4.0495257485779646e-07, - "loss": 0.6183, + "learning_rate": 4.1657325775631195e-07, + "loss": 0.9031, "step": 32133 }, { - "epoch": 0.911861520998865, + "epoch": 0.9105953696619343, "grad_norm": 0.0, - "learning_rate": 4.046937180094879e-07, - "loss": 0.8297, + "learning_rate": 4.1631115829069866e-07, + "loss": 0.7905, "step": 32134 }, { - "epoch": 0.9118898978433598, + "epoch": 0.9106237071041968, "grad_norm": 0.0, - "learning_rate": 4.0443494221316215e-07, - "loss": 0.7378, + "learning_rate": 4.160491395520561e-07, + "loss": 0.7171, "step": 32135 }, { - "epoch": 0.9119182746878547, + "epoch": 0.9106520445464592, "grad_norm": 0.0, - "learning_rate": 4.041762474710076e-07, - "loss": 0.6882, + "learning_rate": 4.157872015425901e-07, + "loss": 0.7048, "step": 32136 }, { - "epoch": 0.9119466515323496, + "epoch": 0.9106803819887217, "grad_norm": 0.0, - "learning_rate": 4.0391763378520576e-07, - "loss": 0.8301, + "learning_rate": 4.1552534426450686e-07, + "loss": 0.7892, "step": 32137 }, { - "epoch": 0.9119750283768445, + "epoch": 0.9107087194309842, "grad_norm": 0.0, - "learning_rate": 4.036591011579438e-07, - "loss": 0.8329, + "learning_rate": 4.152635677200134e-07, + "loss": 0.7542, "step": 32138 }, { - "epoch": 0.9120034052213394, + "epoch": 0.9107370568732466, "grad_norm": 0.0, - "learning_rate": 4.0340064959140555e-07, - "loss": 0.8282, + "learning_rate": 4.150018719113147e-07, + "loss": 0.8212, "step": 32139 }, { - "epoch": 0.9120317820658342, + "epoch": 0.9107653943155091, "grad_norm": 0.0, - "learning_rate": 4.031422790877726e-07, - "loss": 0.6851, + "learning_rate": 4.147402568406134e-07, + "loss": 0.8409, "step": 32140 }, { - "epoch": 0.9120601589103292, + "epoch": 0.9107937317577716, "grad_norm": 0.0, - "learning_rate": 4.028839896492276e-07, - "loss": 0.886, + "learning_rate": 4.1447872251011655e-07, + "loss": 0.793, "step": 32141 }, { - "epoch": 0.9120885357548241, + "epoch": 0.9108220692000341, "grad_norm": 0.0, - "learning_rate": 4.026257812779544e-07, - "loss": 0.7219, + "learning_rate": 4.142172689220225e-07, + "loss": 0.7295, "step": 32142 }, { - "epoch": 0.9121169125993189, + "epoch": 0.9108504066422964, "grad_norm": 0.0, - "learning_rate": 4.023676539761312e-07, - "loss": 0.8671, + "learning_rate": 4.139558960785361e-07, + "loss": 0.7601, "step": 32143 }, { - "epoch": 0.9121452894438139, + "epoch": 0.9108787440845589, "grad_norm": 0.0, - "learning_rate": 4.021096077459408e-07, - "loss": 0.8356, + "learning_rate": 4.13694603981859e-07, + "loss": 0.8557, "step": 32144 }, { - "epoch": 0.9121736662883088, + "epoch": 0.9109070815268214, "grad_norm": 0.0, - "learning_rate": 4.0185164258956245e-07, - "loss": 0.8148, + "learning_rate": 4.1343339263419155e-07, + "loss": 0.8048, "step": 32145 }, { - "epoch": 0.9122020431328036, + "epoch": 0.9109354189690838, "grad_norm": 0.0, - "learning_rate": 4.015937585091734e-07, - "loss": 0.7918, + "learning_rate": 4.131722620377354e-07, + "loss": 0.9366, "step": 32146 }, { - "epoch": 0.9122304199772985, + "epoch": 0.9109637564113463, "grad_norm": 0.0, - "learning_rate": 4.0133595550695405e-07, - "loss": 0.7865, + "learning_rate": 4.129112121946899e-07, + "loss": 0.7485, "step": 32147 }, { - "epoch": 0.9122587968217934, + "epoch": 0.9109920938536088, "grad_norm": 0.0, - "learning_rate": 4.010782335850816e-07, - "loss": 0.8547, + "learning_rate": 4.1265024310725323e-07, + "loss": 0.8019, "step": 32148 }, { - "epoch": 0.9122871736662883, + "epoch": 0.9110204312958712, "grad_norm": 0.0, - "learning_rate": 4.008205927457309e-07, - "loss": 0.8197, + "learning_rate": 4.1238935477762367e-07, + "loss": 0.8354, "step": 32149 }, { - "epoch": 0.9123155505107832, + "epoch": 0.9110487687381337, "grad_norm": 0.0, - "learning_rate": 4.0056303299108257e-07, - "loss": 0.7999, + "learning_rate": 4.121285472079983e-07, + "loss": 0.7948, "step": 32150 }, { - "epoch": 0.9123439273552781, + "epoch": 0.9110771061803962, "grad_norm": 0.0, - "learning_rate": 4.003055543233092e-07, - "loss": 0.8448, + "learning_rate": 4.118678204005744e-07, + "loss": 0.777, "step": 32151 }, { - "epoch": 0.912372304199773, + "epoch": 0.9111054436226587, "grad_norm": 0.0, - "learning_rate": 4.0004815674458576e-07, - "loss": 0.8673, + "learning_rate": 4.1160717435754895e-07, + "loss": 0.6964, "step": 32152 }, { - "epoch": 0.9124006810442679, + "epoch": 0.911133781064921, "grad_norm": 0.0, - "learning_rate": 3.9979084025708825e-07, - "loss": 0.8108, + "learning_rate": 4.113466090811158e-07, + "loss": 0.869, "step": 32153 }, { - "epoch": 0.9124290578887627, + "epoch": 0.9111621185071835, "grad_norm": 0.0, - "learning_rate": 3.995336048629883e-07, - "loss": 0.7994, + "learning_rate": 4.110861245734721e-07, + "loss": 0.8041, "step": 32154 }, { - "epoch": 0.9124574347332577, + "epoch": 0.911190455949446, "grad_norm": 0.0, - "learning_rate": 3.992764505644586e-07, - "loss": 0.8239, + "learning_rate": 4.108257208368105e-07, + "loss": 0.7758, "step": 32155 }, { - "epoch": 0.9124858115777525, + "epoch": 0.9112187933917084, "grad_norm": 0.0, - "learning_rate": 3.9901937736367524e-07, - "loss": 0.802, + "learning_rate": 4.105653978733237e-07, + "loss": 0.7815, "step": 32156 }, { - "epoch": 0.9125141884222474, + "epoch": 0.9112471308339709, "grad_norm": 0.0, - "learning_rate": 3.987623852628042e-07, - "loss": 0.8778, + "learning_rate": 4.103051556852056e-07, + "loss": 0.8902, "step": 32157 }, { - "epoch": 0.9125425652667424, + "epoch": 0.9112754682762334, "grad_norm": 0.0, - "learning_rate": 3.985054742640193e-07, - "loss": 0.8339, + "learning_rate": 4.1004499427464873e-07, + "loss": 0.8013, "step": 32158 }, { - "epoch": 0.9125709421112372, + "epoch": 0.9113038057184959, "grad_norm": 0.0, - "learning_rate": 3.982486443694911e-07, - "loss": 0.8814, + "learning_rate": 4.0978491364384365e-07, + "loss": 0.8298, "step": 32159 }, { - "epoch": 0.9125993189557321, + "epoch": 0.9113321431607583, "grad_norm": 0.0, - "learning_rate": 3.979918955813877e-07, - "loss": 0.8088, + "learning_rate": 4.0952491379498305e-07, + "loss": 0.8384, "step": 32160 }, { - "epoch": 0.9126276958002271, + "epoch": 0.9113604806030208, "grad_norm": 0.0, - "learning_rate": 3.977352279018776e-07, - "loss": 0.7892, + "learning_rate": 4.0926499473025295e-07, + "loss": 0.762, "step": 32161 }, { - "epoch": 0.9126560726447219, + "epoch": 0.9113888180452833, "grad_norm": 0.0, - "learning_rate": 3.974786413331311e-07, - "loss": 0.7084, + "learning_rate": 4.0900515645184823e-07, + "loss": 0.7875, "step": 32162 }, { - "epoch": 0.9126844494892168, + "epoch": 0.9114171554875456, "grad_norm": 0.0, - "learning_rate": 3.972221358773132e-07, - "loss": 0.8683, + "learning_rate": 4.0874539896195275e-07, + "loss": 0.7858, "step": 32163 }, { - "epoch": 0.9127128263337116, + "epoch": 0.9114454929298081, "grad_norm": 0.0, - "learning_rate": 3.9696571153659214e-07, - "loss": 0.8091, + "learning_rate": 4.0848572226275583e-07, + "loss": 0.8096, "step": 32164 }, { - "epoch": 0.9127412031782066, + "epoch": 0.9114738303720706, "grad_norm": 0.0, - "learning_rate": 3.967093683131329e-07, - "loss": 0.8576, + "learning_rate": 4.082261263564469e-07, + "loss": 0.8041, "step": 32165 }, { - "epoch": 0.9127695800227015, + "epoch": 0.9115021678143331, "grad_norm": 0.0, - "learning_rate": 3.964531062091004e-07, - "loss": 0.8189, + "learning_rate": 4.0796661124520964e-07, + "loss": 0.8525, "step": 32166 }, { - "epoch": 0.9127979568671963, + "epoch": 0.9115305052565955, "grad_norm": 0.0, - "learning_rate": 3.9619692522666287e-07, - "loss": 0.81, + "learning_rate": 4.0770717693123243e-07, + "loss": 0.8411, "step": 32167 }, { - "epoch": 0.9128263337116913, + "epoch": 0.911558842698858, "grad_norm": 0.0, - "learning_rate": 3.959408253679797e-07, - "loss": 0.7578, + "learning_rate": 4.074478234167001e-07, + "loss": 0.7772, "step": 32168 }, { - "epoch": 0.9128547105561862, + "epoch": 0.9115871801411205, "grad_norm": 0.0, - "learning_rate": 3.956848066352159e-07, - "loss": 0.7813, + "learning_rate": 4.0718855070379535e-07, + "loss": 0.6951, "step": 32169 }, { - "epoch": 0.912883087400681, + "epoch": 0.9116155175833829, "grad_norm": 0.0, - "learning_rate": 3.9542886903053635e-07, - "loss": 0.7115, + "learning_rate": 4.069293587947043e-07, + "loss": 0.7969, "step": 32170 }, { - "epoch": 0.9129114642451759, + "epoch": 0.9116438550256454, "grad_norm": 0.0, - "learning_rate": 3.951730125560993e-07, - "loss": 0.8086, + "learning_rate": 4.0667024769160957e-07, + "loss": 0.91, "step": 32171 }, { - "epoch": 0.9129398410896709, + "epoch": 0.9116721924679079, "grad_norm": 0.0, - "learning_rate": 3.949172372140675e-07, - "loss": 0.8511, + "learning_rate": 4.0641121739669387e-07, + "loss": 0.8301, "step": 32172 }, { - "epoch": 0.9129682179341657, + "epoch": 0.9117005299101703, "grad_norm": 0.0, - "learning_rate": 3.946615430066025e-07, - "loss": 0.8627, + "learning_rate": 4.061522679121399e-07, + "loss": 0.905, "step": 32173 }, { - "epoch": 0.9129965947786606, + "epoch": 0.9117288673524327, "grad_norm": 0.0, - "learning_rate": 3.9440592993586267e-07, - "loss": 0.727, + "learning_rate": 4.0589339924012705e-07, + "loss": 0.7479, "step": 32174 }, { - "epoch": 0.9130249716231555, + "epoch": 0.9117572047946952, "grad_norm": 0.0, - "learning_rate": 3.9415039800400735e-07, - "loss": 0.8477, + "learning_rate": 4.0563461138283577e-07, + "loss": 0.77, "step": 32175 }, { - "epoch": 0.9130533484676504, + "epoch": 0.9117855422369577, "grad_norm": 0.0, - "learning_rate": 3.9389494721319585e-07, - "loss": 0.8595, + "learning_rate": 4.0537590434244876e-07, + "loss": 0.706, "step": 32176 }, { - "epoch": 0.9130817253121453, + "epoch": 0.9118138796792201, "grad_norm": 0.0, - "learning_rate": 3.9363957756558547e-07, - "loss": 0.8089, + "learning_rate": 4.051172781211421e-07, + "loss": 0.8289, "step": 32177 }, { - "epoch": 0.9131101021566402, + "epoch": 0.9118422171214826, "grad_norm": 0.0, - "learning_rate": 3.933842890633333e-07, - "loss": 0.7169, + "learning_rate": 4.048587327210973e-07, + "loss": 0.8608, "step": 32178 }, { - "epoch": 0.9131384790011351, + "epoch": 0.9118705545637451, "grad_norm": 0.0, - "learning_rate": 3.9312908170859645e-07, - "loss": 0.8236, + "learning_rate": 4.046002681444894e-07, + "loss": 0.7599, "step": 32179 }, { - "epoch": 0.91316685584563, + "epoch": 0.9118988920060075, "grad_norm": 0.0, - "learning_rate": 3.928739555035288e-07, - "loss": 0.7482, + "learning_rate": 4.0434188439349544e-07, + "loss": 0.7389, "step": 32180 }, { - "epoch": 0.9131952326901248, + "epoch": 0.91192722944827, "grad_norm": 0.0, - "learning_rate": 3.926189104502864e-07, - "loss": 0.7686, + "learning_rate": 4.040835814702959e-07, + "loss": 0.8545, "step": 32181 }, { - "epoch": 0.9132236095346198, + "epoch": 0.9119555668905325, "grad_norm": 0.0, - "learning_rate": 3.923639465510265e-07, - "loss": 0.9016, + "learning_rate": 4.0382535937706136e-07, + "loss": 0.8261, "step": 32182 }, { - "epoch": 0.9132519863791146, + "epoch": 0.911983904332795, "grad_norm": 0.0, - "learning_rate": 3.9210906380789836e-07, - "loss": 0.6859, + "learning_rate": 4.035672181159689e-07, + "loss": 0.7963, "step": 32183 }, { - "epoch": 0.9132803632236095, + "epoch": 0.9120122417750574, "grad_norm": 0.0, - "learning_rate": 3.918542622230581e-07, - "loss": 0.8588, + "learning_rate": 4.0330915768919454e-07, + "loss": 0.7833, "step": 32184 }, { - "epoch": 0.9133087400681045, + "epoch": 0.9120405792173198, "grad_norm": 0.0, - "learning_rate": 3.915995417986562e-07, - "loss": 0.7755, + "learning_rate": 4.0305117809890993e-07, + "loss": 0.7779, "step": 32185 }, { - "epoch": 0.9133371169125993, + "epoch": 0.9120689166595823, "grad_norm": 0.0, - "learning_rate": 3.913449025368443e-07, - "loss": 0.7692, + "learning_rate": 4.0279327934728997e-07, + "loss": 0.7498, "step": 32186 }, { - "epoch": 0.9133654937570942, + "epoch": 0.9120972541018447, "grad_norm": 0.0, - "learning_rate": 3.9109034443977513e-07, - "loss": 0.6957, + "learning_rate": 4.0253546143650514e-07, + "loss": 0.9216, "step": 32187 }, { - "epoch": 0.913393870601589, + "epoch": 0.9121255915441072, "grad_norm": 0.0, - "learning_rate": 3.908358675095969e-07, - "loss": 0.8429, + "learning_rate": 4.0227772436872813e-07, + "loss": 0.8197, "step": 32188 }, { - "epoch": 0.913422247446084, + "epoch": 0.9121539289863697, "grad_norm": 0.0, - "learning_rate": 3.9058147174846016e-07, - "loss": 0.7297, + "learning_rate": 4.020200681461317e-07, + "loss": 0.8378, "step": 32189 }, { - "epoch": 0.9134506242905789, + "epoch": 0.9121822664286322, "grad_norm": 0.0, - "learning_rate": 3.903271571585143e-07, - "loss": 0.845, + "learning_rate": 4.017624927708819e-07, + "loss": 0.7802, "step": 32190 }, { - "epoch": 0.9134790011350737, + "epoch": 0.9122106038708946, "grad_norm": 0.0, - "learning_rate": 3.9007292374190654e-07, - "loss": 0.7853, + "learning_rate": 4.0150499824515353e-07, + "loss": 0.7528, "step": 32191 }, { - "epoch": 0.9135073779795687, + "epoch": 0.9122389413131571, "grad_norm": 0.0, - "learning_rate": 3.8981877150078395e-07, - "loss": 0.8622, + "learning_rate": 4.012475845711106e-07, + "loss": 0.8596, "step": 32192 }, { - "epoch": 0.9135357548240636, + "epoch": 0.9122672787554196, "grad_norm": 0.0, - "learning_rate": 3.89564700437296e-07, - "loss": 0.8195, + "learning_rate": 4.0099025175092346e-07, + "loss": 0.7823, "step": 32193 }, { - "epoch": 0.9135641316685584, + "epoch": 0.912295616197682, "grad_norm": 0.0, - "learning_rate": 3.893107105535854e-07, - "loss": 0.9178, + "learning_rate": 4.007329997867615e-07, + "loss": 0.7973, "step": 32194 }, { - "epoch": 0.9135925085130534, + "epoch": 0.9123239536399445, "grad_norm": 0.0, - "learning_rate": 3.8905680185179815e-07, - "loss": 0.9042, + "learning_rate": 4.004758286807897e-07, + "loss": 0.8781, "step": 32195 }, { - "epoch": 0.9136208853575483, + "epoch": 0.912352291082207, "grad_norm": 0.0, - "learning_rate": 3.8880297433408263e-07, - "loss": 0.7309, + "learning_rate": 4.0021873843517413e-07, + "loss": 0.7954, "step": 32196 }, { - "epoch": 0.9136492622020431, + "epoch": 0.9123806285244693, "grad_norm": 0.0, - "learning_rate": 3.8854922800257935e-07, - "loss": 0.8364, + "learning_rate": 3.9996172905208074e-07, + "loss": 0.7719, "step": 32197 }, { - "epoch": 0.913677639046538, + "epoch": 0.9124089659667318, "grad_norm": 0.0, - "learning_rate": 3.8829556285943314e-07, - "loss": 0.9028, + "learning_rate": 3.9970480053367454e-07, + "loss": 0.7619, "step": 32198 }, { - "epoch": 0.913706015891033, + "epoch": 0.9124373034089943, "grad_norm": 0.0, - "learning_rate": 3.8804197890678683e-07, - "loss": 0.8649, + "learning_rate": 3.9944795288212047e-07, + "loss": 0.654, "step": 32199 }, { - "epoch": 0.9137343927355278, + "epoch": 0.9124656408512568, "grad_norm": 0.0, - "learning_rate": 3.8778847614678095e-07, - "loss": 0.768, + "learning_rate": 3.991911860995812e-07, + "loss": 0.8306, "step": 32200 }, { - "epoch": 0.9137627695800227, + "epoch": 0.9124939782935192, "grad_norm": 0.0, - "learning_rate": 3.8753505458155815e-07, - "loss": 0.7476, + "learning_rate": 3.9893450018821946e-07, + "loss": 0.8209, "step": 32201 }, { - "epoch": 0.9137911464245176, + "epoch": 0.9125223157357817, "grad_norm": 0.0, - "learning_rate": 3.872817142132601e-07, - "loss": 0.766, + "learning_rate": 3.9867789515019795e-07, + "loss": 0.9213, "step": 32202 }, { - "epoch": 0.9138195232690125, + "epoch": 0.9125506531780442, "grad_norm": 0.0, - "learning_rate": 3.8702845504402395e-07, - "loss": 0.8715, + "learning_rate": 3.984213709876783e-07, + "loss": 0.8536, "step": 32203 }, { - "epoch": 0.9138479001135074, + "epoch": 0.9125789906203066, "grad_norm": 0.0, - "learning_rate": 3.867752770759914e-07, - "loss": 0.8315, + "learning_rate": 3.981649277028199e-07, + "loss": 0.7901, "step": 32204 }, { - "epoch": 0.9138762769580022, + "epoch": 0.9126073280625691, "grad_norm": 0.0, - "learning_rate": 3.865221803113006e-07, - "loss": 0.8035, + "learning_rate": 3.979085652977854e-07, + "loss": 0.8101, "step": 32205 }, { - "epoch": 0.9139046538024972, + "epoch": 0.9126356655048316, "grad_norm": 0.0, - "learning_rate": 3.862691647520889e-07, - "loss": 0.7948, + "learning_rate": 3.97652283774731e-07, + "loss": 0.9178, "step": 32206 }, { - "epoch": 0.913933030646992, + "epoch": 0.912664002947094, "grad_norm": 0.0, - "learning_rate": 3.860162304004933e-07, - "loss": 0.9176, + "learning_rate": 3.973960831358181e-07, + "loss": 0.8873, "step": 32207 }, { - "epoch": 0.9139614074914869, + "epoch": 0.9126923403893564, "grad_norm": 0.0, - "learning_rate": 3.857633772586511e-07, - "loss": 0.7779, + "learning_rate": 3.9713996338320405e-07, + "loss": 0.7344, "step": 32208 }, { - "epoch": 0.9139897843359819, + "epoch": 0.9127206778316189, "grad_norm": 0.0, - "learning_rate": 3.855106053286972e-07, - "loss": 0.7957, + "learning_rate": 3.968839245190448e-07, + "loss": 0.8656, "step": 32209 }, { - "epoch": 0.9140181611804767, + "epoch": 0.9127490152738814, "grad_norm": 0.0, - "learning_rate": 3.852579146127677e-07, - "loss": 0.8513, + "learning_rate": 3.9662796654549865e-07, + "loss": 0.8521, "step": 32210 }, { - "epoch": 0.9140465380249716, + "epoch": 0.9127773527161438, "grad_norm": 0.0, - "learning_rate": 3.850053051129965e-07, - "loss": 0.8538, + "learning_rate": 3.963720894647216e-07, + "loss": 0.8286, "step": 32211 }, { - "epoch": 0.9140749148694666, + "epoch": 0.9128056901584063, "grad_norm": 0.0, - "learning_rate": 3.8475277683151733e-07, - "loss": 0.7009, + "learning_rate": 3.961162932788687e-07, + "loss": 0.7733, "step": 32212 }, { - "epoch": 0.9141032917139614, + "epoch": 0.9128340276006688, "grad_norm": 0.0, - "learning_rate": 3.8450032977046526e-07, - "loss": 0.8237, + "learning_rate": 3.9586057799009483e-07, + "loss": 0.6883, "step": 32213 }, { - "epoch": 0.9141316685584563, + "epoch": 0.9128623650429313, "grad_norm": 0.0, - "learning_rate": 3.8424796393197075e-07, - "loss": 0.8809, + "learning_rate": 3.956049436005538e-07, + "loss": 0.8186, "step": 32214 }, { - "epoch": 0.9141600454029511, + "epoch": 0.9128907024851937, "grad_norm": 0.0, - "learning_rate": 3.8399567931816537e-07, - "loss": 0.8658, + "learning_rate": 3.953493901123984e-07, + "loss": 0.831, "step": 32215 }, { - "epoch": 0.9141884222474461, + "epoch": 0.9129190399274562, "grad_norm": 0.0, - "learning_rate": 3.837434759311809e-07, - "loss": 0.7244, + "learning_rate": 3.9509391752778236e-07, + "loss": 0.7517, "step": 32216 }, { - "epoch": 0.914216799091941, + "epoch": 0.9129473773697186, "grad_norm": 0.0, - "learning_rate": 3.8349135377314773e-07, - "loss": 0.8219, + "learning_rate": 3.948385258488552e-07, + "loss": 0.7745, "step": 32217 }, { - "epoch": 0.9142451759364358, + "epoch": 0.912975714811981, "grad_norm": 0.0, - "learning_rate": 3.832393128461953e-07, - "loss": 0.7232, + "learning_rate": 3.9458321507777286e-07, + "loss": 0.808, "step": 32218 }, { - "epoch": 0.9142735527809308, + "epoch": 0.9130040522542435, "grad_norm": 0.0, - "learning_rate": 3.8298735315245306e-07, - "loss": 0.8276, + "learning_rate": 3.9432798521668035e-07, + "loss": 0.9205, "step": 32219 }, { - "epoch": 0.9143019296254257, + "epoch": 0.913032389696506, "grad_norm": 0.0, - "learning_rate": 3.8273547469404815e-07, - "loss": 0.8415, + "learning_rate": 3.940728362677315e-07, + "loss": 0.764, "step": 32220 }, { - "epoch": 0.9143303064699205, + "epoch": 0.9130607271387684, "grad_norm": 0.0, - "learning_rate": 3.8248367747311e-07, - "loss": 0.8098, + "learning_rate": 3.9381776823307347e-07, + "loss": 0.807, "step": 32221 }, { - "epoch": 0.9143586833144154, + "epoch": 0.9130890645810309, "grad_norm": 0.0, - "learning_rate": 3.822319614917647e-07, - "loss": 0.8759, + "learning_rate": 3.9356278111485567e-07, + "loss": 0.7223, "step": 32222 }, { - "epoch": 0.9143870601589104, + "epoch": 0.9131174020232934, "grad_norm": 0.0, - "learning_rate": 3.8198032675213715e-07, - "loss": 0.8431, + "learning_rate": 3.933078749152264e-07, + "loss": 0.826, "step": 32223 }, { - "epoch": 0.9144154370034052, + "epoch": 0.9131457394655559, "grad_norm": 0.0, - "learning_rate": 3.817287732563546e-07, - "loss": 0.827, + "learning_rate": 3.9305304963633163e-07, + "loss": 0.7383, "step": 32224 }, { - "epoch": 0.9144438138479001, + "epoch": 0.9131740769078183, "grad_norm": 0.0, - "learning_rate": 3.81477301006542e-07, - "loss": 0.8602, + "learning_rate": 3.9279830528031747e-07, + "loss": 0.838, "step": 32225 }, { - "epoch": 0.914472190692395, + "epoch": 0.9132024143500808, "grad_norm": 0.0, - "learning_rate": 3.81225910004821e-07, - "loss": 0.7193, + "learning_rate": 3.9254364184933335e-07, + "loss": 0.8197, "step": 32226 }, { - "epoch": 0.9145005675368899, + "epoch": 0.9132307517923433, "grad_norm": 0.0, - "learning_rate": 3.809746002533199e-07, - "loss": 0.7905, + "learning_rate": 3.922890593455198e-07, + "loss": 0.7839, "step": 32227 }, { - "epoch": 0.9145289443813848, + "epoch": 0.9132590892346056, "grad_norm": 0.0, - "learning_rate": 3.80723371754157e-07, - "loss": 0.8401, + "learning_rate": 3.9203455777102493e-07, + "loss": 0.9164, "step": 32228 }, { - "epoch": 0.9145573212258796, + "epoch": 0.9132874266768681, "grad_norm": 0.0, - "learning_rate": 3.804722245094561e-07, - "loss": 0.808, + "learning_rate": 3.917801371279895e-07, + "loss": 0.7929, "step": 32229 }, { - "epoch": 0.9145856980703746, + "epoch": 0.9133157641191306, "grad_norm": 0.0, - "learning_rate": 3.802211585213411e-07, - "loss": 0.7949, + "learning_rate": 3.915257974185582e-07, + "loss": 0.7388, "step": 32230 }, { - "epoch": 0.9146140749148695, + "epoch": 0.9133441015613931, "grad_norm": 0.0, - "learning_rate": 3.7997017379192813e-07, - "loss": 0.8057, + "learning_rate": 3.91271538644874e-07, + "loss": 0.8461, "step": 32231 }, { - "epoch": 0.9146424517593643, + "epoch": 0.9133724390036555, "grad_norm": 0.0, - "learning_rate": 3.7971927032333986e-07, - "loss": 0.7975, + "learning_rate": 3.9101736080907615e-07, + "loss": 0.7818, "step": 32232 }, { - "epoch": 0.9146708286038593, + "epoch": 0.913400776445918, "grad_norm": 0.0, - "learning_rate": 3.794684481176969e-07, - "loss": 0.8134, + "learning_rate": 3.907632639133074e-07, + "loss": 0.8478, "step": 32233 }, { - "epoch": 0.9146992054483541, + "epoch": 0.9134291138881805, "grad_norm": 0.0, - "learning_rate": 3.7921770717711414e-07, - "loss": 0.7745, + "learning_rate": 3.9050924795970944e-07, + "loss": 0.7175, "step": 32234 }, { - "epoch": 0.914727582292849, + "epoch": 0.9134574513304429, "grad_norm": 0.0, - "learning_rate": 3.789670475037133e-07, - "loss": 0.8277, + "learning_rate": 3.902553129504194e-07, + "loss": 0.8686, "step": 32235 }, { - "epoch": 0.914755959137344, + "epoch": 0.9134857887727054, "grad_norm": 0.0, - "learning_rate": 3.7871646909961033e-07, - "loss": 0.7618, + "learning_rate": 3.900014588875767e-07, + "loss": 0.8848, "step": 32236 }, { - "epoch": 0.9147843359818388, + "epoch": 0.9135141262149679, "grad_norm": 0.0, - "learning_rate": 3.784659719669215e-07, - "loss": 0.8042, + "learning_rate": 3.8974768577332067e-07, + "loss": 0.8265, "step": 32237 }, { - "epoch": 0.9148127128263337, + "epoch": 0.9135424636572304, "grad_norm": 0.0, - "learning_rate": 3.7821555610776274e-07, - "loss": 0.7529, + "learning_rate": 3.8949399360978967e-07, + "loss": 0.7109, "step": 32238 }, { - "epoch": 0.9148410896708286, + "epoch": 0.9135708010994927, "grad_norm": 0.0, - "learning_rate": 3.779652215242513e-07, - "loss": 0.8137, + "learning_rate": 3.8924038239911977e-07, + "loss": 0.8042, "step": 32239 }, { - "epoch": 0.9148694665153235, + "epoch": 0.9135991385417552, "grad_norm": 0.0, - "learning_rate": 3.7771496821849774e-07, - "loss": 0.7498, + "learning_rate": 3.8898685214344477e-07, + "loss": 0.7862, "step": 32240 }, { - "epoch": 0.9148978433598184, + "epoch": 0.9136274759840177, "grad_norm": 0.0, - "learning_rate": 3.7746479619261924e-07, - "loss": 0.873, + "learning_rate": 3.887334028449041e-07, + "loss": 0.7338, "step": 32241 }, { - "epoch": 0.9149262202043132, + "epoch": 0.9136558134262801, "grad_norm": 0.0, - "learning_rate": 3.772147054487285e-07, - "loss": 0.7618, + "learning_rate": 3.8848003450563167e-07, + "loss": 0.7314, "step": 32242 }, { - "epoch": 0.9149545970488082, + "epoch": 0.9136841508685426, "grad_norm": 0.0, - "learning_rate": 3.7696469598893727e-07, - "loss": 0.8298, + "learning_rate": 3.882267471277601e-07, + "loss": 0.7549, "step": 32243 }, { - "epoch": 0.9149829738933031, + "epoch": 0.9137124883108051, "grad_norm": 0.0, - "learning_rate": 3.7671476781535934e-07, - "loss": 0.7644, + "learning_rate": 3.8797354071342443e-07, + "loss": 0.8427, "step": 32244 }, { - "epoch": 0.9150113507377979, + "epoch": 0.9137408257530675, "grad_norm": 0.0, - "learning_rate": 3.76464920930103e-07, - "loss": 0.7561, + "learning_rate": 3.8772041526475624e-07, + "loss": 0.779, "step": 32245 }, { - "epoch": 0.9150397275822928, + "epoch": 0.91376916319533, "grad_norm": 0.0, - "learning_rate": 3.7621515533527996e-07, - "loss": 0.7657, + "learning_rate": 3.874673707838883e-07, + "loss": 0.8701, "step": 32246 }, { - "epoch": 0.9150681044267878, + "epoch": 0.9137975006375925, "grad_norm": 0.0, - "learning_rate": 3.7596547103300187e-07, - "loss": 0.7782, + "learning_rate": 3.872144072729522e-07, + "loss": 0.8734, "step": 32247 }, { - "epoch": 0.9150964812712826, + "epoch": 0.913825838079855, "grad_norm": 0.0, - "learning_rate": 3.7571586802537475e-07, - "loss": 0.747, + "learning_rate": 3.8696152473407966e-07, + "loss": 0.7263, "step": 32248 }, { - "epoch": 0.9151248581157775, + "epoch": 0.9138541755221173, "grad_norm": 0.0, - "learning_rate": 3.754663463145081e-07, - "loss": 0.8201, + "learning_rate": 3.867087231693989e-07, + "loss": 0.8558, "step": 32249 }, { - "epoch": 0.9151532349602725, + "epoch": 0.9138825129643798, "grad_norm": 0.0, - "learning_rate": 3.7521690590251015e-07, - "loss": 0.7802, + "learning_rate": 3.8645600258104045e-07, + "loss": 0.7775, "step": 32250 }, { - "epoch": 0.9151816118047673, + "epoch": 0.9139108504066423, "grad_norm": 0.0, - "learning_rate": 3.7496754679148815e-07, - "loss": 0.7926, + "learning_rate": 3.862033629711337e-07, + "loss": 0.8732, "step": 32251 }, { - "epoch": 0.9152099886492622, + "epoch": 0.9139391878489047, "grad_norm": 0.0, - "learning_rate": 3.747182689835471e-07, - "loss": 0.8166, + "learning_rate": 3.859508043418059e-07, + "loss": 0.821, "step": 32252 }, { - "epoch": 0.9152383654937571, + "epoch": 0.9139675252911672, "grad_norm": 0.0, - "learning_rate": 3.7446907248079423e-07, - "loss": 0.7221, + "learning_rate": 3.8569832669518417e-07, + "loss": 0.6778, "step": 32253 }, { - "epoch": 0.915266742338252, + "epoch": 0.9139958627334297, "grad_norm": 0.0, - "learning_rate": 3.7421995728533335e-07, - "loss": 0.9021, + "learning_rate": 3.854459300333957e-07, + "loss": 0.918, "step": 32254 }, { - "epoch": 0.9152951191827469, + "epoch": 0.9140242001756922, "grad_norm": 0.0, - "learning_rate": 3.7397092339926943e-07, - "loss": 0.7296, + "learning_rate": 3.8519361435856774e-07, + "loss": 0.7461, "step": 32255 }, { - "epoch": 0.9153234960272417, + "epoch": 0.9140525376179546, "grad_norm": 0.0, - "learning_rate": 3.737219708247053e-07, - "loss": 0.8621, + "learning_rate": 3.8494137967282186e-07, + "loss": 0.7355, "step": 32256 }, { - "epoch": 0.9153518728717367, + "epoch": 0.9140808750602171, "grad_norm": 0.0, - "learning_rate": 3.7347309956374367e-07, - "loss": 0.7386, + "learning_rate": 3.846892259782875e-07, + "loss": 0.7569, "step": 32257 }, { - "epoch": 0.9153802497162316, + "epoch": 0.9141092125024796, "grad_norm": 0.0, - "learning_rate": 3.7322430961848955e-07, - "loss": 0.8226, + "learning_rate": 3.844371532770852e-07, + "loss": 0.7789, "step": 32258 }, { - "epoch": 0.9154086265607264, + "epoch": 0.914137549944742, "grad_norm": 0.0, - "learning_rate": 3.729756009910412e-07, - "loss": 0.8673, + "learning_rate": 3.841851615713399e-07, + "loss": 0.9501, "step": 32259 }, { - "epoch": 0.9154370034052214, + "epoch": 0.9141658873870044, "grad_norm": 0.0, - "learning_rate": 3.7272697368350154e-07, - "loss": 0.7854, + "learning_rate": 3.839332508631721e-07, + "loss": 0.8665, "step": 32260 }, { - "epoch": 0.9154653802497162, + "epoch": 0.9141942248292669, "grad_norm": 0.0, - "learning_rate": 3.7247842769796983e-07, - "loss": 0.7584, + "learning_rate": 3.8368142115470683e-07, + "loss": 0.7752, "step": 32261 }, { - "epoch": 0.9154937570942111, + "epoch": 0.9142225622715294, "grad_norm": 0.0, - "learning_rate": 3.7222996303654557e-07, - "loss": 0.9095, + "learning_rate": 3.834296724480635e-07, + "loss": 0.899, "step": 32262 }, { - "epoch": 0.915522133938706, + "epoch": 0.9142508997137918, "grad_norm": 0.0, - "learning_rate": 3.7198157970132706e-07, - "loss": 0.7702, + "learning_rate": 3.831780047453637e-07, + "loss": 0.818, "step": 32263 }, { - "epoch": 0.9155505107832009, + "epoch": 0.9142792371560543, "grad_norm": 0.0, - "learning_rate": 3.717332776944138e-07, - "loss": 0.7386, + "learning_rate": 3.8292641804872576e-07, + "loss": 0.7593, "step": 32264 }, { - "epoch": 0.9155788876276958, + "epoch": 0.9143075745983168, "grad_norm": 0.0, - "learning_rate": 3.7148505701790073e-07, - "loss": 0.8032, + "learning_rate": 3.8267491236027133e-07, + "loss": 0.7914, "step": 32265 }, { - "epoch": 0.9156072644721907, + "epoch": 0.9143359120405792, "grad_norm": 0.0, - "learning_rate": 3.7123691767388726e-07, - "loss": 0.8412, + "learning_rate": 3.824234876821165e-07, + "loss": 0.8185, "step": 32266 }, { - "epoch": 0.9156356413166856, + "epoch": 0.9143642494828417, "grad_norm": 0.0, - "learning_rate": 3.709888596644695e-07, - "loss": 0.7546, + "learning_rate": 3.821721440163795e-07, + "loss": 0.805, "step": 32267 }, { - "epoch": 0.9156640181611805, + "epoch": 0.9143925869251042, "grad_norm": 0.0, - "learning_rate": 3.707408829917403e-07, - "loss": 0.8334, + "learning_rate": 3.819208813651787e-07, + "loss": 0.7702, "step": 32268 }, { - "epoch": 0.9156923950056753, + "epoch": 0.9144209243673665, "grad_norm": 0.0, - "learning_rate": 3.7049298765779564e-07, - "loss": 0.7114, + "learning_rate": 3.8166969973063015e-07, + "loss": 0.7633, "step": 32269 }, { - "epoch": 0.9157207718501703, + "epoch": 0.914449261809629, "grad_norm": 0.0, - "learning_rate": 3.702451736647306e-07, - "loss": 0.7534, + "learning_rate": 3.814185991148478e-07, + "loss": 0.7297, "step": 32270 }, { - "epoch": 0.9157491486946652, + "epoch": 0.9144775992518915, "grad_norm": 0.0, - "learning_rate": 3.6999744101463677e-07, - "loss": 0.7596, + "learning_rate": 3.81167579519951e-07, + "loss": 0.8786, "step": 32271 }, { - "epoch": 0.91577752553916, + "epoch": 0.914505936694154, "grad_norm": 0.0, - "learning_rate": 3.6974978970960695e-07, - "loss": 0.751, + "learning_rate": 3.8091664094804913e-07, + "loss": 0.8219, "step": 32272 }, { - "epoch": 0.9158059023836549, + "epoch": 0.9145342741364164, "grad_norm": 0.0, - "learning_rate": 3.6950221975173393e-07, - "loss": 0.7794, + "learning_rate": 3.806657834012595e-07, + "loss": 0.8948, "step": 32273 }, { - "epoch": 0.9158342792281499, + "epoch": 0.9145626115786789, "grad_norm": 0.0, - "learning_rate": 3.692547311431083e-07, - "loss": 0.8248, + "learning_rate": 3.8041500688169253e-07, + "loss": 0.7774, "step": 32274 }, { - "epoch": 0.9158626560726447, + "epoch": 0.9145909490209414, "grad_norm": 0.0, - "learning_rate": 3.6900732388582274e-07, - "loss": 0.7361, + "learning_rate": 3.8016431139146327e-07, + "loss": 0.7835, "step": 32275 }, { - "epoch": 0.9158910329171396, + "epoch": 0.9146192864632038, "grad_norm": 0.0, - "learning_rate": 3.6875999798196335e-07, - "loss": 0.8297, + "learning_rate": 3.799136969326833e-07, + "loss": 0.8905, "step": 32276 }, { - "epoch": 0.9159194097616346, + "epoch": 0.9146476239054663, "grad_norm": 0.0, - "learning_rate": 3.685127534336208e-07, - "loss": 0.7682, + "learning_rate": 3.79663163507461e-07, + "loss": 0.8646, "step": 32277 }, { - "epoch": 0.9159477866061294, + "epoch": 0.9146759613477288, "grad_norm": 0.0, - "learning_rate": 3.6826559024288664e-07, - "loss": 0.7197, + "learning_rate": 3.7941271111791024e-07, + "loss": 0.8673, "step": 32278 }, { - "epoch": 0.9159761634506243, + "epoch": 0.9147042987899913, "grad_norm": 0.0, - "learning_rate": 3.6801850841184375e-07, - "loss": 0.6943, + "learning_rate": 3.7916233976613704e-07, + "loss": 0.7366, "step": 32279 }, { - "epoch": 0.9160045402951191, + "epoch": 0.9147326362322536, "grad_norm": 0.0, - "learning_rate": 3.677715079425814e-07, - "loss": 0.7341, + "learning_rate": 3.78912049454252e-07, + "loss": 0.8067, "step": 32280 }, { - "epoch": 0.9160329171396141, + "epoch": 0.9147609736745161, "grad_norm": 0.0, - "learning_rate": 3.675245888371881e-07, - "loss": 0.8166, + "learning_rate": 3.786618401843645e-07, + "loss": 0.7544, "step": 32281 }, { - "epoch": 0.916061293984109, + "epoch": 0.9147893111167786, "grad_norm": 0.0, - "learning_rate": 3.672777510977454e-07, - "loss": 0.8529, + "learning_rate": 3.784117119585806e-07, + "loss": 0.8566, "step": 32282 }, { - "epoch": 0.9160896708286038, + "epoch": 0.914817648559041, "grad_norm": 0.0, - "learning_rate": 3.6703099472634174e-07, - "loss": 0.903, + "learning_rate": 3.781616647790065e-07, + "loss": 0.8228, "step": 32283 }, { - "epoch": 0.9161180476730988, + "epoch": 0.9148459860013035, "grad_norm": 0.0, - "learning_rate": 3.667843197250609e-07, - "loss": 0.9817, + "learning_rate": 3.7791169864775266e-07, + "loss": 0.9584, "step": 32284 }, { - "epoch": 0.9161464245175936, + "epoch": 0.914874323443566, "grad_norm": 0.0, - "learning_rate": 3.665377260959846e-07, - "loss": 0.7169, + "learning_rate": 3.7766181356691854e-07, + "loss": 0.8647, "step": 32285 }, { - "epoch": 0.9161748013620885, + "epoch": 0.9149026608858285, "grad_norm": 0.0, - "learning_rate": 3.662912138411967e-07, - "loss": 0.8826, + "learning_rate": 3.7741200953861356e-07, + "loss": 0.7173, "step": 32286 }, { - "epoch": 0.9162031782065835, + "epoch": 0.9149309983280909, "grad_norm": 0.0, - "learning_rate": 3.660447829627811e-07, - "loss": 0.7569, + "learning_rate": 3.771622865649405e-07, + "loss": 0.8088, "step": 32287 }, { - "epoch": 0.9162315550510783, + "epoch": 0.9149593357703534, "grad_norm": 0.0, - "learning_rate": 3.657984334628173e-07, - "loss": 0.8162, + "learning_rate": 3.7691264464800203e-07, + "loss": 0.8021, "step": 32288 }, { - "epoch": 0.9162599318955732, + "epoch": 0.9149876732126159, "grad_norm": 0.0, - "learning_rate": 3.655521653433891e-07, - "loss": 0.8379, + "learning_rate": 3.766630837899032e-07, + "loss": 0.7786, "step": 32289 }, { - "epoch": 0.9162883087400681, + "epoch": 0.9150160106548783, "grad_norm": 0.0, - "learning_rate": 3.653059786065738e-07, - "loss": 0.74, + "learning_rate": 3.7641360399274354e-07, + "loss": 0.8477, "step": 32290 }, { - "epoch": 0.916316685584563, + "epoch": 0.9150443480971407, "grad_norm": 0.0, - "learning_rate": 3.650598732544519e-07, - "loss": 0.8822, + "learning_rate": 3.7616420525862564e-07, + "loss": 0.7772, "step": 32291 }, { - "epoch": 0.9163450624290579, + "epoch": 0.9150726855394032, "grad_norm": 0.0, - "learning_rate": 3.648138492891029e-07, - "loss": 0.795, + "learning_rate": 3.7591488758965233e-07, + "loss": 0.6798, "step": 32292 }, { - "epoch": 0.9163734392735527, + "epoch": 0.9151010229816656, "grad_norm": 0.0, - "learning_rate": 3.64567906712604e-07, - "loss": 0.7635, + "learning_rate": 3.7566565098791975e-07, + "loss": 0.8518, "step": 32293 }, { - "epoch": 0.9164018161180477, + "epoch": 0.9151293604239281, "grad_norm": 0.0, - "learning_rate": 3.6432204552703245e-07, - "loss": 0.7166, + "learning_rate": 3.754164954555306e-07, + "loss": 0.8423, "step": 32294 }, { - "epoch": 0.9164301929625426, + "epoch": 0.9151576978661906, "grad_norm": 0.0, - "learning_rate": 3.6407626573446763e-07, - "loss": 0.7946, + "learning_rate": 3.7516742099458106e-07, + "loss": 0.8206, "step": 32295 }, { - "epoch": 0.9164585698070374, + "epoch": 0.9151860353084531, "grad_norm": 0.0, - "learning_rate": 3.638305673369824e-07, - "loss": 0.8439, + "learning_rate": 3.7491842760717046e-07, + "loss": 0.7498, "step": 32296 }, { - "epoch": 0.9164869466515323, + "epoch": 0.9152143727507155, "grad_norm": 0.0, - "learning_rate": 3.6358495033665286e-07, - "loss": 0.7282, + "learning_rate": 3.746695152953983e-07, + "loss": 0.8784, "step": 32297 }, { - "epoch": 0.9165153234960273, + "epoch": 0.915242710192978, "grad_norm": 0.0, - "learning_rate": 3.633394147355562e-07, - "loss": 0.7852, + "learning_rate": 3.7442068406135624e-07, + "loss": 0.8014, "step": 32298 }, { - "epoch": 0.9165437003405221, + "epoch": 0.9152710476352405, "grad_norm": 0.0, - "learning_rate": 3.630939605357642e-07, - "loss": 0.8135, + "learning_rate": 3.741719339071448e-07, + "loss": 0.8016, "step": 32299 }, { - "epoch": 0.916572077185017, + "epoch": 0.9152993850775029, "grad_norm": 0.0, - "learning_rate": 3.628485877393495e-07, - "loss": 0.7996, + "learning_rate": 3.7392326483485673e-07, + "loss": 0.7922, "step": 32300 }, { - "epoch": 0.916600454029512, + "epoch": 0.9153277225197654, "grad_norm": 0.0, - "learning_rate": 3.6260329634838833e-07, - "loss": 0.7895, + "learning_rate": 3.736746768465871e-07, + "loss": 0.8773, "step": 32301 }, { - "epoch": 0.9166288308740068, + "epoch": 0.9153560599620278, "grad_norm": 0.0, - "learning_rate": 3.6235808636494787e-07, - "loss": 0.8076, + "learning_rate": 3.7342616994443194e-07, + "loss": 0.8072, "step": 32302 }, { - "epoch": 0.9166572077185017, + "epoch": 0.9153843974042903, "grad_norm": 0.0, - "learning_rate": 3.62112957791102e-07, - "loss": 0.7827, + "learning_rate": 3.731777441304818e-07, + "loss": 0.6919, "step": 32303 }, { - "epoch": 0.9166855845629966, + "epoch": 0.9154127348465527, "grad_norm": 0.0, - "learning_rate": 3.618679106289225e-07, - "loss": 0.7502, + "learning_rate": 3.729293994068306e-07, + "loss": 0.8218, "step": 32304 }, { - "epoch": 0.9167139614074915, + "epoch": 0.9154410722888152, "grad_norm": 0.0, - "learning_rate": 3.6162294488047753e-07, - "loss": 0.8, + "learning_rate": 3.7268113577557107e-07, + "loss": 0.796, "step": 32305 }, { - "epoch": 0.9167423382519864, + "epoch": 0.9154694097310777, "grad_norm": 0.0, - "learning_rate": 3.6137806054783674e-07, - "loss": 0.8187, + "learning_rate": 3.724329532387916e-07, + "loss": 0.7786, "step": 32306 }, { - "epoch": 0.9167707150964812, + "epoch": 0.9154977471733401, "grad_norm": 0.0, - "learning_rate": 3.611332576330695e-07, - "loss": 0.8184, + "learning_rate": 3.72184851798586e-07, + "loss": 0.7088, "step": 32307 }, { - "epoch": 0.9167990919409762, + "epoch": 0.9155260846156026, "grad_norm": 0.0, - "learning_rate": 3.6088853613824195e-07, - "loss": 0.7574, + "learning_rate": 3.7193683145704153e-07, + "loss": 0.8048, "step": 32308 }, { - "epoch": 0.9168274687854711, + "epoch": 0.9155544220578651, "grad_norm": 0.0, - "learning_rate": 3.6064389606542237e-07, - "loss": 0.7076, + "learning_rate": 3.716888922162487e-07, + "loss": 0.8409, "step": 32309 }, { - "epoch": 0.9168558456299659, + "epoch": 0.9155827595001275, "grad_norm": 0.0, - "learning_rate": 3.603993374166781e-07, - "loss": 0.7626, + "learning_rate": 3.71441034078297e-07, + "loss": 0.8492, "step": 32310 }, { - "epoch": 0.9168842224744609, + "epoch": 0.91561109694239, "grad_norm": 0.0, - "learning_rate": 3.60154860194073e-07, - "loss": 0.8172, + "learning_rate": 3.711932570452703e-07, + "loss": 0.7658, "step": 32311 }, { - "epoch": 0.9169125993189557, + "epoch": 0.9156394343846525, "grad_norm": 0.0, - "learning_rate": 3.599104643996731e-07, - "loss": 0.9004, + "learning_rate": 3.709455611192603e-07, + "loss": 0.7861, "step": 32312 }, { - "epoch": 0.9169409761634506, + "epoch": 0.915667771826915, "grad_norm": 0.0, - "learning_rate": 3.596661500355436e-07, - "loss": 0.897, + "learning_rate": 3.706979463023497e-07, + "loss": 0.7051, "step": 32313 }, { - "epoch": 0.9169693530079455, + "epoch": 0.9156961092691773, "grad_norm": 0.0, - "learning_rate": 3.594219171037472e-07, - "loss": 0.7869, + "learning_rate": 3.7045041259662793e-07, + "loss": 0.7412, "step": 32314 }, { - "epoch": 0.9169977298524404, + "epoch": 0.9157244467114398, "grad_norm": 0.0, - "learning_rate": 3.5917776560634775e-07, - "loss": 0.685, + "learning_rate": 3.702029600041779e-07, + "loss": 0.8285, "step": 32315 }, { - "epoch": 0.9170261066969353, + "epoch": 0.9157527841537023, "grad_norm": 0.0, - "learning_rate": 3.589336955454059e-07, - "loss": 0.7937, + "learning_rate": 3.6995558852708335e-07, + "loss": 0.8269, "step": 32316 }, { - "epoch": 0.9170544835414302, + "epoch": 0.9157811215959647, "grad_norm": 0.0, - "learning_rate": 3.5868970692298553e-07, - "loss": 0.9001, + "learning_rate": 3.6970829816742936e-07, + "loss": 0.8237, "step": 32317 }, { - "epoch": 0.9170828603859251, + "epoch": 0.9158094590382272, "grad_norm": 0.0, - "learning_rate": 3.5844579974114503e-07, - "loss": 0.7371, + "learning_rate": 3.694610889272998e-07, + "loss": 0.7979, "step": 32318 }, { - "epoch": 0.91711123723042, + "epoch": 0.9158377964804897, "grad_norm": 0.0, - "learning_rate": 3.582019740019471e-07, - "loss": 0.7663, + "learning_rate": 3.6921396080877414e-07, + "loss": 0.7781, "step": 32319 }, { - "epoch": 0.9171396140749148, + "epoch": 0.9158661339227522, "grad_norm": 0.0, - "learning_rate": 3.5795822970745133e-07, - "loss": 0.7764, + "learning_rate": 3.689669138139351e-07, + "loss": 0.8215, "step": 32320 }, { - "epoch": 0.9171679909194098, + "epoch": 0.9158944713650146, "grad_norm": 0.0, - "learning_rate": 3.5771456685971593e-07, - "loss": 0.8457, + "learning_rate": 3.687199479448655e-07, + "loss": 0.7742, "step": 32321 }, { - "epoch": 0.9171963677639047, + "epoch": 0.9159228088072771, "grad_norm": 0.0, - "learning_rate": 3.5747098546079827e-07, - "loss": 0.8194, + "learning_rate": 3.684730632036437e-07, + "loss": 0.8168, "step": 32322 }, { - "epoch": 0.9172247446083995, + "epoch": 0.9159511462495395, "grad_norm": 0.0, - "learning_rate": 3.5722748551275665e-07, - "loss": 0.8001, + "learning_rate": 3.6822625959235136e-07, + "loss": 0.8814, "step": 32323 }, { - "epoch": 0.9172531214528944, + "epoch": 0.9159794836918019, "grad_norm": 0.0, - "learning_rate": 3.569840670176483e-07, - "loss": 0.7399, + "learning_rate": 3.6797953711306344e-07, + "loss": 0.8375, "step": 32324 }, { - "epoch": 0.9172814982973894, + "epoch": 0.9160078211340644, "grad_norm": 0.0, - "learning_rate": 3.5674072997752827e-07, - "loss": 0.8365, + "learning_rate": 3.677328957678616e-07, + "loss": 0.8039, "step": 32325 }, { - "epoch": 0.9173098751418842, + "epoch": 0.9160361585763269, "grad_norm": 0.0, - "learning_rate": 3.564974743944538e-07, - "loss": 0.8332, + "learning_rate": 3.6748633555882206e-07, + "loss": 0.8227, "step": 32326 }, { - "epoch": 0.9173382519863791, + "epoch": 0.9160644960185894, "grad_norm": 0.0, - "learning_rate": 3.562543002704788e-07, - "loss": 0.7747, + "learning_rate": 3.6723985648802196e-07, + "loss": 0.7333, "step": 32327 }, { - "epoch": 0.917366628830874, + "epoch": 0.9160928334608518, "grad_norm": 0.0, - "learning_rate": 3.5601120760765604e-07, - "loss": 0.8238, + "learning_rate": 3.6699345855753856e-07, + "loss": 0.8767, "step": 32328 }, { - "epoch": 0.9173950056753689, + "epoch": 0.9161211709031143, "grad_norm": 0.0, - "learning_rate": 3.5576819640804064e-07, - "loss": 0.7441, + "learning_rate": 3.667471417694468e-07, + "loss": 0.8205, "step": 32329 }, { - "epoch": 0.9174233825198638, + "epoch": 0.9161495083453768, "grad_norm": 0.0, - "learning_rate": 3.5552526667368637e-07, - "loss": 0.8205, + "learning_rate": 3.6650090612581955e-07, + "loss": 0.8213, "step": 32330 }, { - "epoch": 0.9174517593643586, + "epoch": 0.9161778457876392, "grad_norm": 0.0, - "learning_rate": 3.552824184066417e-07, - "loss": 0.6797, + "learning_rate": 3.6625475162873404e-07, + "loss": 0.7462, "step": 32331 }, { - "epoch": 0.9174801362088536, + "epoch": 0.9162061832299017, "grad_norm": 0.0, - "learning_rate": 3.5503965160896045e-07, - "loss": 0.8179, + "learning_rate": 3.6600867828026076e-07, + "loss": 0.8176, "step": 32332 }, { - "epoch": 0.9175085130533485, + "epoch": 0.9162345206721642, "grad_norm": 0.0, - "learning_rate": 3.5479696628269225e-07, - "loss": 0.8805, + "learning_rate": 3.6576268608247477e-07, + "loss": 0.7749, "step": 32333 }, { - "epoch": 0.9175368898978433, + "epoch": 0.9162628581144265, "grad_norm": 0.0, - "learning_rate": 3.545543624298886e-07, - "loss": 0.8751, + "learning_rate": 3.6551677503744776e-07, + "loss": 0.8304, "step": 32334 }, { - "epoch": 0.9175652667423383, + "epoch": 0.916291195556689, "grad_norm": 0.0, - "learning_rate": 3.54311840052598e-07, - "loss": 0.8672, + "learning_rate": 3.6527094514724914e-07, + "loss": 0.7773, "step": 32335 }, { - "epoch": 0.9175936435868332, + "epoch": 0.9163195329989515, "grad_norm": 0.0, - "learning_rate": 3.540693991528676e-07, - "loss": 0.7496, + "learning_rate": 3.6502519641395286e-07, + "loss": 0.7817, "step": 32336 }, { - "epoch": 0.917622020431328, + "epoch": 0.916347870441214, "grad_norm": 0.0, - "learning_rate": 3.5382703973274813e-07, - "loss": 0.8125, + "learning_rate": 3.6477952883962606e-07, + "loss": 0.8004, "step": 32337 }, { - "epoch": 0.917650397275823, + "epoch": 0.9163762078834764, "grad_norm": 0.0, - "learning_rate": 3.535847617942856e-07, - "loss": 0.7366, + "learning_rate": 3.645339424263383e-07, + "loss": 0.797, "step": 32338 }, { - "epoch": 0.9176787741203178, + "epoch": 0.9164045453257389, "grad_norm": 0.0, - "learning_rate": 3.5334256533952506e-07, - "loss": 0.7901, + "learning_rate": 3.642884371761601e-07, + "loss": 0.8947, "step": 32339 }, { - "epoch": 0.9177071509648127, + "epoch": 0.9164328827680014, "grad_norm": 0.0, - "learning_rate": 3.5310045037051264e-07, - "loss": 0.7876, + "learning_rate": 3.6404301309115763e-07, + "loss": 0.8291, "step": 32340 }, { - "epoch": 0.9177355278093076, + "epoch": 0.9164612202102638, "grad_norm": 0.0, - "learning_rate": 3.528584168892968e-07, - "loss": 0.819, + "learning_rate": 3.637976701734003e-07, + "loss": 0.8566, "step": 32341 }, { - "epoch": 0.9177639046538025, + "epoch": 0.9164895576525263, "grad_norm": 0.0, - "learning_rate": 3.52616464897918e-07, - "loss": 0.8011, + "learning_rate": 3.635524084249542e-07, + "loss": 0.854, "step": 32342 }, { - "epoch": 0.9177922814982974, + "epoch": 0.9165178950947888, "grad_norm": 0.0, - "learning_rate": 3.523745943984214e-07, - "loss": 0.8428, + "learning_rate": 3.633072278478833e-07, + "loss": 0.8015, "step": 32343 }, { - "epoch": 0.9178206583427923, + "epoch": 0.9165462325370513, "grad_norm": 0.0, - "learning_rate": 3.521328053928519e-07, - "loss": 0.7576, + "learning_rate": 3.630621284442548e-07, + "loss": 0.6635, "step": 32344 }, { - "epoch": 0.9178490351872872, + "epoch": 0.9165745699793136, "grad_norm": 0.0, - "learning_rate": 3.51891097883249e-07, - "loss": 0.7401, + "learning_rate": 3.6281711021613265e-07, + "loss": 0.8427, "step": 32345 }, { - "epoch": 0.9178774120317821, + "epoch": 0.9166029074215761, "grad_norm": 0.0, - "learning_rate": 3.5164947187165454e-07, - "loss": 0.7819, + "learning_rate": 3.625721731655796e-07, + "loss": 0.8074, "step": 32346 }, { - "epoch": 0.9179057888762769, + "epoch": 0.9166312448638386, "grad_norm": 0.0, - "learning_rate": 3.514079273601134e-07, - "loss": 0.8074, + "learning_rate": 3.6232731729466174e-07, + "loss": 0.7374, "step": 32347 }, { - "epoch": 0.9179341657207718, + "epoch": 0.916659582306101, "grad_norm": 0.0, - "learning_rate": 3.511664643506618e-07, - "loss": 0.8914, + "learning_rate": 3.6208254260543753e-07, + "loss": 0.7948, "step": 32348 }, { - "epoch": 0.9179625425652668, + "epoch": 0.9166879197483635, "grad_norm": 0.0, - "learning_rate": 3.5092508284534034e-07, - "loss": 0.7928, + "learning_rate": 3.618378490999719e-07, + "loss": 0.7877, "step": 32349 }, { - "epoch": 0.9179909194097616, + "epoch": 0.916716257190626, "grad_norm": 0.0, - "learning_rate": 3.506837828461884e-07, - "loss": 0.8504, + "learning_rate": 3.6159323678032654e-07, + "loss": 0.8164, "step": 32350 }, { - "epoch": 0.9180192962542565, + "epoch": 0.9167445946328885, "grad_norm": 0.0, - "learning_rate": 3.504425643552445e-07, - "loss": 0.7686, + "learning_rate": 3.6134870564855873e-07, + "loss": 0.7718, "step": 32351 }, { - "epoch": 0.9180476730987515, + "epoch": 0.9167729320751509, "grad_norm": 0.0, - "learning_rate": 3.50201427374548e-07, - "loss": 0.6963, + "learning_rate": 3.6110425570673015e-07, + "loss": 0.7565, "step": 32352 }, { - "epoch": 0.9180760499432463, + "epoch": 0.9168012695174134, "grad_norm": 0.0, - "learning_rate": 3.499603719061317e-07, - "loss": 0.6557, + "learning_rate": 3.6085988695689913e-07, + "loss": 0.775, "step": 32353 }, { - "epoch": 0.9181044267877412, + "epoch": 0.9168296069596759, "grad_norm": 0.0, - "learning_rate": 3.4971939795203414e-07, - "loss": 0.8079, + "learning_rate": 3.6061559940112625e-07, + "loss": 0.6568, "step": 32354 }, { - "epoch": 0.9181328036322361, + "epoch": 0.9168579444019382, "grad_norm": 0.0, - "learning_rate": 3.494785055142924e-07, - "loss": 0.8383, + "learning_rate": 3.603713930414676e-07, + "loss": 0.8429, "step": 32355 }, { - "epoch": 0.918161180476731, + "epoch": 0.9168862818442007, "grad_norm": 0.0, - "learning_rate": 3.4923769459493717e-07, - "loss": 0.7889, + "learning_rate": 3.6012726787997943e-07, + "loss": 0.7151, "step": 32356 }, { - "epoch": 0.9181895573212259, + "epoch": 0.9169146192864632, "grad_norm": 0.0, - "learning_rate": 3.4899696519600566e-07, - "loss": 0.7625, + "learning_rate": 3.5988322391872e-07, + "loss": 0.7126, "step": 32357 }, { - "epoch": 0.9182179341657207, + "epoch": 0.9169429567287256, "grad_norm": 0.0, - "learning_rate": 3.4875631731953184e-07, - "loss": 0.8661, + "learning_rate": 3.5963926115974324e-07, + "loss": 0.8069, "step": 32358 }, { - "epoch": 0.9182463110102157, + "epoch": 0.9169712941709881, "grad_norm": 0.0, - "learning_rate": 3.485157509675463e-07, - "loss": 0.7228, + "learning_rate": 3.5939537960510416e-07, + "loss": 0.851, "step": 32359 }, { - "epoch": 0.9182746878547106, + "epoch": 0.9169996316132506, "grad_norm": 0.0, - "learning_rate": 3.4827526614208186e-07, - "loss": 0.7974, + "learning_rate": 3.59151579256859e-07, + "loss": 0.8167, "step": 32360 }, { - "epoch": 0.9183030646992054, + "epoch": 0.9170279690555131, "grad_norm": 0.0, - "learning_rate": 3.480348628451713e-07, - "loss": 0.8094, + "learning_rate": 3.5890786011705926e-07, + "loss": 0.834, "step": 32361 }, { - "epoch": 0.9183314415437004, + "epoch": 0.9170563064977755, "grad_norm": 0.0, - "learning_rate": 3.47794541078843e-07, - "loss": 0.8249, + "learning_rate": 3.5866422218775896e-07, + "loss": 0.7504, "step": 32362 }, { - "epoch": 0.9183598183881952, + "epoch": 0.917084643940038, "grad_norm": 0.0, - "learning_rate": 3.4755430084512765e-07, - "loss": 0.8825, + "learning_rate": 3.5842066547101097e-07, + "loss": 0.829, "step": 32363 }, { - "epoch": 0.9183881952326901, + "epoch": 0.9171129813823005, "grad_norm": 0.0, - "learning_rate": 3.4731414214605574e-07, - "loss": 0.7967, + "learning_rate": 3.581771899688646e-07, + "loss": 0.7586, "step": 32364 }, { - "epoch": 0.918416572077185, + "epoch": 0.9171413188245628, "grad_norm": 0.0, - "learning_rate": 3.4707406498365457e-07, - "loss": 0.8833, + "learning_rate": 3.579337956833728e-07, + "loss": 0.873, "step": 32365 }, { - "epoch": 0.9184449489216799, + "epoch": 0.9171696562668253, "grad_norm": 0.0, - "learning_rate": 3.468340693599548e-07, - "loss": 0.8723, + "learning_rate": 3.57690482616585e-07, + "loss": 0.8303, "step": 32366 }, { - "epoch": 0.9184733257661748, + "epoch": 0.9171979937090878, "grad_norm": 0.0, - "learning_rate": 3.4659415527697916e-07, - "loss": 0.749, + "learning_rate": 3.5744725077055063e-07, + "loss": 0.8908, "step": 32367 }, { - "epoch": 0.9185017026106697, + "epoch": 0.9172263311513503, "grad_norm": 0.0, - "learning_rate": 3.463543227367572e-07, - "loss": 0.8536, + "learning_rate": 3.572041001473192e-07, + "loss": 0.7027, "step": 32368 }, { - "epoch": 0.9185300794551646, + "epoch": 0.9172546685936127, "grad_norm": 0.0, - "learning_rate": 3.461145717413161e-07, - "loss": 0.8859, + "learning_rate": 3.56961030748938e-07, + "loss": 0.7344, "step": 32369 }, { - "epoch": 0.9185584562996595, + "epoch": 0.9172830060358752, "grad_norm": 0.0, - "learning_rate": 3.4587490229267774e-07, - "loss": 0.7895, + "learning_rate": 3.5671804257745526e-07, + "loss": 0.8056, "step": 32370 }, { - "epoch": 0.9185868331441543, + "epoch": 0.9173113434781377, "grad_norm": 0.0, - "learning_rate": 3.45635314392867e-07, - "loss": 0.8172, + "learning_rate": 3.5647513563491833e-07, + "loss": 0.7633, "step": 32371 }, { - "epoch": 0.9186152099886493, + "epoch": 0.9173396809204001, "grad_norm": 0.0, - "learning_rate": 3.4539580804391124e-07, - "loss": 0.8175, + "learning_rate": 3.562323099233711e-07, + "loss": 0.8862, "step": 32372 }, { - "epoch": 0.9186435868331442, + "epoch": 0.9173680183626626, "grad_norm": 0.0, - "learning_rate": 3.4515638324782883e-07, - "loss": 0.8656, + "learning_rate": 3.5598956544486087e-07, + "loss": 0.8537, "step": 32373 }, { - "epoch": 0.918671963677639, + "epoch": 0.9173963558049251, "grad_norm": 0.0, - "learning_rate": 3.449170400066448e-07, - "loss": 0.8359, + "learning_rate": 3.557469022014315e-07, + "loss": 0.6714, "step": 32374 }, { - "epoch": 0.9187003405221339, + "epoch": 0.9174246932471876, "grad_norm": 0.0, - "learning_rate": 3.4467777832238204e-07, - "loss": 0.797, + "learning_rate": 3.5550432019512693e-07, + "loss": 0.858, "step": 32375 }, { - "epoch": 0.9187287173666289, + "epoch": 0.9174530306894499, "grad_norm": 0.0, - "learning_rate": 3.444385981970588e-07, - "loss": 0.8996, + "learning_rate": 3.552618194279922e-07, + "loss": 0.7644, "step": 32376 }, { - "epoch": 0.9187570942111237, + "epoch": 0.9174813681317124, "grad_norm": 0.0, - "learning_rate": 3.4419949963269693e-07, - "loss": 0.8767, + "learning_rate": 3.550193999020668e-07, + "loss": 0.7763, "step": 32377 }, { - "epoch": 0.9187854710556186, + "epoch": 0.9175097055739749, "grad_norm": 0.0, - "learning_rate": 3.4396048263131697e-07, - "loss": 0.7383, + "learning_rate": 3.5477706161939463e-07, + "loss": 0.7976, "step": 32378 }, { - "epoch": 0.9188138479001136, + "epoch": 0.9175380430162373, "grad_norm": 0.0, - "learning_rate": 3.437215471949351e-07, - "loss": 0.7104, + "learning_rate": 3.545348045820174e-07, + "loss": 0.7939, "step": 32379 }, { - "epoch": 0.9188422247446084, + "epoch": 0.9175663804584998, "grad_norm": 0.0, - "learning_rate": 3.43482693325573e-07, - "loss": 0.7873, + "learning_rate": 3.542926287919757e-07, + "loss": 0.8185, "step": 32380 }, { - "epoch": 0.9188706015891033, + "epoch": 0.9175947179007623, "grad_norm": 0.0, - "learning_rate": 3.4324392102524585e-07, - "loss": 0.7052, + "learning_rate": 3.5405053425130896e-07, + "loss": 0.7801, "step": 32381 }, { - "epoch": 0.9188989784335981, + "epoch": 0.9176230553430247, "grad_norm": 0.0, - "learning_rate": 3.430052302959708e-07, - "loss": 0.7653, + "learning_rate": 3.5380852096205673e-07, + "loss": 0.7009, "step": 32382 }, { - "epoch": 0.9189273552780931, + "epoch": 0.9176513927852872, "grad_norm": 0.0, - "learning_rate": 3.427666211397662e-07, - "loss": 0.7974, + "learning_rate": 3.535665889262563e-07, + "loss": 0.9158, "step": 32383 }, { - "epoch": 0.918955732122588, + "epoch": 0.9176797302275497, "grad_norm": 0.0, - "learning_rate": 3.42528093558645e-07, - "loss": 0.8755, + "learning_rate": 3.5332473814594814e-07, + "loss": 0.8031, "step": 32384 }, { - "epoch": 0.9189841089670828, + "epoch": 0.9177080676698122, "grad_norm": 0.0, - "learning_rate": 3.422896475546234e-07, - "loss": 0.8359, + "learning_rate": 3.530829686231674e-07, + "loss": 0.8744, "step": 32385 }, { - "epoch": 0.9190124858115778, + "epoch": 0.9177364051120745, "grad_norm": 0.0, - "learning_rate": 3.4205128312971623e-07, - "loss": 0.8285, + "learning_rate": 3.5284128035995127e-07, + "loss": 0.7121, "step": 32386 }, { - "epoch": 0.9190408626560727, + "epoch": 0.917764742554337, "grad_norm": 0.0, - "learning_rate": 3.4181300028593433e-07, - "loss": 0.8477, + "learning_rate": 3.525996733583348e-07, + "loss": 0.8163, "step": 32387 }, { - "epoch": 0.9190692395005675, + "epoch": 0.9177930799965995, "grad_norm": 0.0, - "learning_rate": 3.4157479902529267e-07, - "loss": 0.8459, + "learning_rate": 3.52358147620353e-07, + "loss": 0.8088, "step": 32388 }, { - "epoch": 0.9190976163450624, + "epoch": 0.9178214174388619, "grad_norm": 0.0, - "learning_rate": 3.413366793498041e-07, - "loss": 0.7475, + "learning_rate": 3.5211670314804326e-07, + "loss": 0.8685, "step": 32389 }, { - "epoch": 0.9191259931895573, + "epoch": 0.9178497548811244, "grad_norm": 0.0, - "learning_rate": 3.4109864126147806e-07, - "loss": 0.7701, + "learning_rate": 3.518753399434349e-07, + "loss": 0.8073, "step": 32390 }, { - "epoch": 0.9191543700340522, + "epoch": 0.9178780923233869, "grad_norm": 0.0, - "learning_rate": 3.4086068476232525e-07, - "loss": 0.7158, + "learning_rate": 3.5163405800856423e-07, + "loss": 0.8491, "step": 32391 }, { - "epoch": 0.9191827468785471, + "epoch": 0.9179064297656494, "grad_norm": 0.0, - "learning_rate": 3.4062280985435736e-07, - "loss": 0.8351, + "learning_rate": 3.513928573454628e-07, + "loss": 0.7816, "step": 32392 }, { - "epoch": 0.919211123723042, + "epoch": 0.9179347672079118, "grad_norm": 0.0, - "learning_rate": 3.4038501653958277e-07, - "loss": 0.8288, + "learning_rate": 3.511517379561613e-07, + "loss": 0.8774, "step": 32393 }, { - "epoch": 0.9192395005675369, + "epoch": 0.9179631046501743, "grad_norm": 0.0, - "learning_rate": 3.401473048200088e-07, - "loss": 0.6812, + "learning_rate": 3.5091069984269366e-07, + "loss": 0.7983, "step": 32394 }, { - "epoch": 0.9192678774120318, + "epoch": 0.9179914420924368, "grad_norm": 0.0, - "learning_rate": 3.39909674697646e-07, - "loss": 0.7693, + "learning_rate": 3.506697430070871e-07, + "loss": 0.7449, "step": 32395 }, { - "epoch": 0.9192962542565267, + "epoch": 0.9180197795346992, "grad_norm": 0.0, - "learning_rate": 3.3967212617449954e-07, - "loss": 0.7539, + "learning_rate": 3.504288674513723e-07, + "loss": 0.767, "step": 32396 }, { - "epoch": 0.9193246311010216, + "epoch": 0.9180481169769616, "grad_norm": 0.0, - "learning_rate": 3.394346592525788e-07, - "loss": 0.7089, + "learning_rate": 3.5018807317757975e-07, + "loss": 0.8017, "step": 32397 }, { - "epoch": 0.9193530079455164, + "epoch": 0.9180764544192241, "grad_norm": 0.0, - "learning_rate": 3.391972739338867e-07, - "loss": 0.8232, + "learning_rate": 3.499473601877357e-07, + "loss": 0.819, "step": 32398 }, { - "epoch": 0.9193813847900113, + "epoch": 0.9181047918614866, "grad_norm": 0.0, - "learning_rate": 3.3895997022042826e-07, - "loss": 0.8238, + "learning_rate": 3.497067284838673e-07, + "loss": 0.8597, "step": 32399 }, { - "epoch": 0.9194097616345063, + "epoch": 0.918133129303749, "grad_norm": 0.0, - "learning_rate": 3.3872274811421193e-07, - "loss": 0.6935, + "learning_rate": 3.4946617806800534e-07, + "loss": 0.8351, "step": 32400 }, { - "epoch": 0.9194381384790011, + "epoch": 0.9181614667460115, "grad_norm": 0.0, - "learning_rate": 3.3848560761723715e-07, - "loss": 0.8288, + "learning_rate": 3.492257089421713e-07, + "loss": 0.7587, "step": 32401 }, { - "epoch": 0.919466515323496, + "epoch": 0.918189804188274, "grad_norm": 0.0, - "learning_rate": 3.382485487315079e-07, - "loss": 0.7703, + "learning_rate": 3.4898532110839377e-07, + "loss": 0.8075, "step": 32402 }, { - "epoch": 0.919494892167991, + "epoch": 0.9182181416305364, "grad_norm": 0.0, - "learning_rate": 3.380115714590293e-07, - "loss": 0.8238, + "learning_rate": 3.4874501456869545e-07, + "loss": 0.8178, "step": 32403 }, { - "epoch": 0.9195232690124858, + "epoch": 0.9182464790727989, "grad_norm": 0.0, - "learning_rate": 3.377746758018008e-07, - "loss": 0.8388, + "learning_rate": 3.4850478932510256e-07, + "loss": 0.7815, "step": 32404 }, { - "epoch": 0.9195516458569807, + "epoch": 0.9182748165150614, "grad_norm": 0.0, - "learning_rate": 3.3753786176182303e-07, - "loss": 0.8552, + "learning_rate": 3.482646453796379e-07, + "loss": 0.7976, "step": 32405 }, { - "epoch": 0.9195800227014755, + "epoch": 0.9183031539573238, "grad_norm": 0.0, - "learning_rate": 3.3730112934109885e-07, - "loss": 0.7915, + "learning_rate": 3.4802458273432316e-07, + "loss": 0.756, "step": 32406 }, { - "epoch": 0.9196083995459705, + "epoch": 0.9183314913995863, "grad_norm": 0.0, - "learning_rate": 3.370644785416255e-07, - "loss": 0.7644, + "learning_rate": 3.477846013911823e-07, + "loss": 0.9391, "step": 32407 }, { - "epoch": 0.9196367763904654, + "epoch": 0.9183598288418487, "grad_norm": 0.0, - "learning_rate": 3.3682790936540257e-07, - "loss": 0.8167, + "learning_rate": 3.4754470135223707e-07, + "loss": 0.7474, "step": 32408 }, { - "epoch": 0.9196651532349602, + "epoch": 0.9183881662841112, "grad_norm": 0.0, - "learning_rate": 3.365914218144295e-07, - "loss": 0.7574, + "learning_rate": 3.473048826195058e-07, + "loss": 0.8458, "step": 32409 }, { - "epoch": 0.9196935300794552, + "epoch": 0.9184165037263736, "grad_norm": 0.0, - "learning_rate": 3.363550158907014e-07, - "loss": 0.8161, + "learning_rate": 3.4706514519501135e-07, + "loss": 0.7659, "step": 32410 }, { - "epoch": 0.9197219069239501, + "epoch": 0.9184448411686361, "grad_norm": 0.0, - "learning_rate": 3.361186915962178e-07, - "loss": 0.8637, + "learning_rate": 3.46825489080771e-07, + "loss": 0.8811, "step": 32411 }, { - "epoch": 0.9197502837684449, + "epoch": 0.9184731786108986, "grad_norm": 0.0, - "learning_rate": 3.358824489329748e-07, - "loss": 0.7707, + "learning_rate": 3.4658591427880305e-07, + "loss": 0.7223, "step": 32412 }, { - "epoch": 0.9197786606129399, + "epoch": 0.918501516053161, "grad_norm": 0.0, - "learning_rate": 3.356462879029665e-07, - "loss": 0.8057, + "learning_rate": 3.4634642079112825e-07, + "loss": 0.8209, "step": 32413 }, { - "epoch": 0.9198070374574348, + "epoch": 0.9185298534954235, "grad_norm": 0.0, - "learning_rate": 3.354102085081878e-07, - "loss": 0.8386, + "learning_rate": 3.4610700861976153e-07, + "loss": 0.93, "step": 32414 }, { - "epoch": 0.9198354143019296, + "epoch": 0.918558190937686, "grad_norm": 0.0, - "learning_rate": 3.3517421075063507e-07, - "loss": 0.9007, + "learning_rate": 3.4586767776672136e-07, + "loss": 0.6695, "step": 32415 }, { - "epoch": 0.9198637911464245, + "epoch": 0.9185865283799485, "grad_norm": 0.0, - "learning_rate": 3.349382946322999e-07, - "loss": 0.8553, + "learning_rate": 3.4562842823402167e-07, + "loss": 0.7744, "step": 32416 }, { - "epoch": 0.9198921679909194, + "epoch": 0.9186148658222109, "grad_norm": 0.0, - "learning_rate": 3.3470246015517403e-07, - "loss": 0.8149, + "learning_rate": 3.4538926002367855e-07, + "loss": 0.8118, "step": 32417 }, { - "epoch": 0.9199205448354143, + "epoch": 0.9186432032644734, "grad_norm": 0.0, - "learning_rate": 3.3446670732125373e-07, - "loss": 0.8016, + "learning_rate": 3.4515017313770716e-07, + "loss": 0.7895, "step": 32418 }, { - "epoch": 0.9199489216799092, + "epoch": 0.9186715407067358, "grad_norm": 0.0, - "learning_rate": 3.342310361325252e-07, - "loss": 0.8277, + "learning_rate": 3.4491116757812024e-07, + "loss": 0.818, "step": 32419 }, { - "epoch": 0.9199772985244041, + "epoch": 0.9186998781489982, "grad_norm": 0.0, - "learning_rate": 3.3399544659098224e-07, - "loss": 0.7958, + "learning_rate": 3.4467224334693295e-07, + "loss": 0.8566, "step": 32420 }, { - "epoch": 0.920005675368899, + "epoch": 0.9187282155912607, "grad_norm": 0.0, - "learning_rate": 3.3375993869861454e-07, - "loss": 0.8253, + "learning_rate": 3.4443340044615805e-07, + "loss": 0.8356, "step": 32421 }, { - "epoch": 0.9200340522133938, + "epoch": 0.9187565530335232, "grad_norm": 0.0, - "learning_rate": 3.3352451245741155e-07, - "loss": 0.7498, + "learning_rate": 3.441946388778039e-07, + "loss": 0.684, "step": 32422 }, { - "epoch": 0.9200624290578887, + "epoch": 0.9187848904757857, "grad_norm": 0.0, - "learning_rate": 3.332891678693617e-07, - "loss": 0.8512, + "learning_rate": 3.4395595864388567e-07, + "loss": 0.8558, "step": 32423 }, { - "epoch": 0.9200908059023837, + "epoch": 0.9188132279180481, "grad_norm": 0.0, - "learning_rate": 3.3305390493645115e-07, - "loss": 0.8646, + "learning_rate": 3.4371735974641053e-07, + "loss": 0.7707, "step": 32424 }, { - "epoch": 0.9201191827468785, + "epoch": 0.9188415653603106, "grad_norm": 0.0, - "learning_rate": 3.3281872366066835e-07, - "loss": 0.8732, + "learning_rate": 3.4347884218739025e-07, + "loss": 0.7997, "step": 32425 }, { - "epoch": 0.9201475595913734, + "epoch": 0.9188699028025731, "grad_norm": 0.0, - "learning_rate": 3.325836240440028e-07, - "loss": 0.8159, + "learning_rate": 3.4324040596883436e-07, + "loss": 0.8327, "step": 32426 }, { - "epoch": 0.9201759364358684, + "epoch": 0.9188982402448355, "grad_norm": 0.0, - "learning_rate": 3.3234860608843623e-07, - "loss": 0.843, + "learning_rate": 3.4300205109275007e-07, + "loss": 0.8458, "step": 32427 }, { - "epoch": 0.9202043132803632, + "epoch": 0.918926577687098, "grad_norm": 0.0, - "learning_rate": 3.3211366979595596e-07, - "loss": 0.7289, + "learning_rate": 3.4276377756114474e-07, + "loss": 0.7813, "step": 32428 }, { - "epoch": 0.9202326901248581, + "epoch": 0.9189549151293605, "grad_norm": 0.0, - "learning_rate": 3.3187881516854703e-07, - "loss": 0.7289, + "learning_rate": 3.4252558537602785e-07, + "loss": 0.8419, "step": 32429 }, { - "epoch": 0.9202610669693531, + "epoch": 0.9189832525716228, "grad_norm": 0.0, - "learning_rate": 3.3164404220819236e-07, - "loss": 0.8453, + "learning_rate": 3.4228747453940225e-07, + "loss": 0.7642, "step": 32430 }, { - "epoch": 0.9202894438138479, + "epoch": 0.9190115900138853, "grad_norm": 0.0, - "learning_rate": 3.3140935091687367e-07, - "loss": 0.8315, + "learning_rate": 3.4204944505327633e-07, + "loss": 0.6923, "step": 32431 }, { - "epoch": 0.9203178206583428, + "epoch": 0.9190399274561478, "grad_norm": 0.0, - "learning_rate": 3.31174741296576e-07, - "loss": 0.6964, + "learning_rate": 3.41811496919654e-07, + "loss": 0.848, "step": 32432 }, { - "epoch": 0.9203461975028376, + "epoch": 0.9190682648984103, "grad_norm": 0.0, - "learning_rate": 3.3094021334928007e-07, - "loss": 0.7261, + "learning_rate": 3.4157363014054034e-07, + "loss": 0.8252, "step": 32433 }, { - "epoch": 0.9203745743473326, + "epoch": 0.9190966023406727, "grad_norm": 0.0, - "learning_rate": 3.3070576707696536e-07, - "loss": 0.76, + "learning_rate": 3.413358447179393e-07, + "loss": 0.7634, "step": 32434 }, { - "epoch": 0.9204029511918275, + "epoch": 0.9191249397829352, "grad_norm": 0.0, - "learning_rate": 3.304714024816158e-07, - "loss": 0.9017, + "learning_rate": 3.410981406538527e-07, + "loss": 0.7803, "step": 32435 }, { - "epoch": 0.9204313280363223, + "epoch": 0.9191532772251977, "grad_norm": 0.0, - "learning_rate": 3.3023711956520767e-07, - "loss": 0.7398, + "learning_rate": 3.408605179502822e-07, + "loss": 0.8064, "step": 32436 }, { - "epoch": 0.9204597048808173, + "epoch": 0.9191816146674601, "grad_norm": 0.0, - "learning_rate": 3.300029183297204e-07, - "loss": 0.9238, + "learning_rate": 3.406229766092317e-07, + "loss": 0.7845, "step": 32437 }, { - "epoch": 0.9204880817253122, + "epoch": 0.9192099521097226, "grad_norm": 0.0, - "learning_rate": 3.2976879877713584e-07, - "loss": 0.7642, + "learning_rate": 3.403855166326997e-07, + "loss": 0.8756, "step": 32438 }, { - "epoch": 0.920516458569807, + "epoch": 0.919238289551985, "grad_norm": 0.0, - "learning_rate": 3.2953476090942685e-07, - "loss": 0.9384, + "learning_rate": 3.40148138022689e-07, + "loss": 0.8699, "step": 32439 }, { - "epoch": 0.9205448354143019, + "epoch": 0.9192666269942475, "grad_norm": 0.0, - "learning_rate": 3.293008047285728e-07, - "loss": 0.8449, + "learning_rate": 3.3991084078119684e-07, + "loss": 0.8704, "step": 32440 }, { - "epoch": 0.9205732122587968, + "epoch": 0.9192949644365099, "grad_norm": 0.0, - "learning_rate": 3.29066930236549e-07, - "loss": 0.8544, + "learning_rate": 3.396736249102217e-07, + "loss": 0.7739, "step": 32441 }, { - "epoch": 0.9206015891032917, + "epoch": 0.9193233018787724, "grad_norm": 0.0, - "learning_rate": 3.2883313743533265e-07, - "loss": 0.8528, + "learning_rate": 3.394364904117653e-07, + "loss": 0.8217, "step": 32442 }, { - "epoch": 0.9206299659477866, + "epoch": 0.9193516393210349, "grad_norm": 0.0, - "learning_rate": 3.2859942632689765e-07, - "loss": 0.6908, + "learning_rate": 3.3919943728782046e-07, + "loss": 0.919, "step": 32443 }, { - "epoch": 0.9206583427922815, + "epoch": 0.9193799767632973, "grad_norm": 0.0, - "learning_rate": 3.28365796913217e-07, - "loss": 0.8194, + "learning_rate": 3.389624655403867e-07, + "loss": 0.8162, "step": 32444 }, { - "epoch": 0.9206867196367764, + "epoch": 0.9194083142055598, "grad_norm": 0.0, - "learning_rate": 3.2813224919626576e-07, - "loss": 0.7982, + "learning_rate": 3.387255751714602e-07, + "loss": 0.7386, "step": 32445 }, { - "epoch": 0.9207150964812713, + "epoch": 0.9194366516478223, "grad_norm": 0.0, - "learning_rate": 3.2789878317801673e-07, - "loss": 0.8896, + "learning_rate": 3.3848876618303496e-07, + "loss": 0.8021, "step": 32446 }, { - "epoch": 0.9207434733257662, + "epoch": 0.9194649890900848, "grad_norm": 0.0, - "learning_rate": 3.276653988604406e-07, - "loss": 0.7961, + "learning_rate": 3.382520385771082e-07, + "loss": 0.8036, "step": 32447 }, { - "epoch": 0.9207718501702611, + "epoch": 0.9194933265323472, "grad_norm": 0.0, - "learning_rate": 3.274320962455102e-07, - "loss": 0.7922, + "learning_rate": 3.380153923556706e-07, + "loss": 0.882, "step": 32448 }, { - "epoch": 0.920800227014756, + "epoch": 0.9195216639746097, "grad_norm": 0.0, - "learning_rate": 3.2719887533519626e-07, - "loss": 0.8666, + "learning_rate": 3.3777882752071724e-07, + "loss": 0.7798, "step": 32449 }, { - "epoch": 0.9208286038592508, + "epoch": 0.9195500014168722, "grad_norm": 0.0, - "learning_rate": 3.269657361314671e-07, - "loss": 0.7677, + "learning_rate": 3.3754234407424204e-07, + "loss": 0.8405, "step": 32450 }, { - "epoch": 0.9208569807037458, + "epoch": 0.9195783388591345, "grad_norm": 0.0, - "learning_rate": 3.267326786362934e-07, - "loss": 0.8625, + "learning_rate": 3.3730594201823566e-07, + "loss": 0.7753, "step": 32451 }, { - "epoch": 0.9208853575482406, + "epoch": 0.919606676301397, "grad_norm": 0.0, - "learning_rate": 3.2649970285164477e-07, - "loss": 0.8465, + "learning_rate": 3.370696213546898e-07, + "loss": 0.8209, "step": 32452 }, { - "epoch": 0.9209137343927355, + "epoch": 0.9196350137436595, "grad_norm": 0.0, - "learning_rate": 3.2626680877948624e-07, - "loss": 0.8053, + "learning_rate": 3.368333820855929e-07, + "loss": 0.8144, "step": 32453 }, { - "epoch": 0.9209421112372305, + "epoch": 0.9196633511859219, "grad_norm": 0.0, - "learning_rate": 3.2603399642178737e-07, - "loss": 0.7944, + "learning_rate": 3.365972242129378e-07, + "loss": 0.8007, "step": 32454 }, { - "epoch": 0.9209704880817253, + "epoch": 0.9196916886281844, "grad_norm": 0.0, - "learning_rate": 3.258012657805154e-07, - "loss": 0.8887, + "learning_rate": 3.36361147738713e-07, + "loss": 0.8822, "step": 32455 }, { - "epoch": 0.9209988649262202, + "epoch": 0.9197200260704469, "grad_norm": 0.0, - "learning_rate": 3.255686168576333e-07, - "loss": 0.8558, + "learning_rate": 3.3612515266490675e-07, + "loss": 0.8954, "step": 32456 }, { - "epoch": 0.921027241770715, + "epoch": 0.9197483635127094, "grad_norm": 0.0, - "learning_rate": 3.2533604965510946e-07, - "loss": 0.879, + "learning_rate": 3.3588923899350645e-07, + "loss": 0.7897, "step": 32457 }, { - "epoch": 0.92105561861521, + "epoch": 0.9197767009549718, "grad_norm": 0.0, - "learning_rate": 3.251035641749056e-07, - "loss": 0.9252, + "learning_rate": 3.3565340672649935e-07, + "loss": 0.8126, "step": 32458 }, { - "epoch": 0.9210839954597049, + "epoch": 0.9198050383972343, "grad_norm": 0.0, - "learning_rate": 3.248711604189869e-07, - "loss": 0.9197, + "learning_rate": 3.3541765586587285e-07, + "loss": 0.8563, "step": 32459 }, { - "epoch": 0.9211123723041997, + "epoch": 0.9198333758394968, "grad_norm": 0.0, - "learning_rate": 3.246388383893184e-07, - "loss": 0.8581, + "learning_rate": 3.35181986413613e-07, + "loss": 0.7434, "step": 32460 }, { - "epoch": 0.9211407491486947, + "epoch": 0.9198617132817591, "grad_norm": 0.0, - "learning_rate": 3.2440659808785857e-07, - "loss": 0.8282, + "learning_rate": 3.3494639837170276e-07, + "loss": 0.8099, "step": 32461 }, { - "epoch": 0.9211691259931896, + "epoch": 0.9198900507240216, "grad_norm": 0.0, - "learning_rate": 3.2417443951657133e-07, - "loss": 0.7209, + "learning_rate": 3.3471089174212936e-07, + "loss": 0.7506, "step": 32462 }, { - "epoch": 0.9211975028376844, + "epoch": 0.9199183881662841, "grad_norm": 0.0, - "learning_rate": 3.2394236267741965e-07, - "loss": 0.8337, + "learning_rate": 3.344754665268746e-07, + "loss": 0.7673, "step": 32463 }, { - "epoch": 0.9212258796821794, + "epoch": 0.9199467256085466, "grad_norm": 0.0, - "learning_rate": 3.2371036757235963e-07, - "loss": 0.8671, + "learning_rate": 3.342401227279224e-07, + "loss": 0.7033, "step": 32464 }, { - "epoch": 0.9212542565266743, + "epoch": 0.919975063050809, "grad_norm": 0.0, - "learning_rate": 3.234784542033531e-07, - "loss": 0.7946, + "learning_rate": 3.3400486034725455e-07, + "loss": 0.7872, "step": 32465 }, { - "epoch": 0.9212826333711691, + "epoch": 0.9200034004930715, "grad_norm": 0.0, - "learning_rate": 3.232466225723607e-07, - "loss": 0.7866, + "learning_rate": 3.337696793868539e-07, + "loss": 0.8384, "step": 32466 }, { - "epoch": 0.921311010215664, + "epoch": 0.920031737935334, "grad_norm": 0.0, - "learning_rate": 3.230148726813376e-07, - "loss": 0.8367, + "learning_rate": 3.3353457984869994e-07, + "loss": 0.7847, "step": 32467 }, { - "epoch": 0.9213393870601589, + "epoch": 0.9200600753775964, "grad_norm": 0.0, - "learning_rate": 3.227832045322432e-07, - "loss": 0.8259, + "learning_rate": 3.332995617347745e-07, + "loss": 0.8618, "step": 32468 }, { - "epoch": 0.9213677639046538, + "epoch": 0.9200884128198589, "grad_norm": 0.0, - "learning_rate": 3.2255161812703496e-07, - "loss": 0.7325, + "learning_rate": 3.3306462504705706e-07, + "loss": 0.8199, "step": 32469 }, { - "epoch": 0.9213961407491487, + "epoch": 0.9201167502621214, "grad_norm": 0.0, - "learning_rate": 3.2232011346766677e-07, - "loss": 0.7551, + "learning_rate": 3.328297697875249e-07, + "loss": 0.8242, "step": 32470 }, { - "epoch": 0.9214245175936436, + "epoch": 0.9201450877043839, "grad_norm": 0.0, - "learning_rate": 3.2208869055609604e-07, - "loss": 0.6814, + "learning_rate": 3.3259499595815647e-07, + "loss": 0.6915, "step": 32471 }, { - "epoch": 0.9214528944381385, + "epoch": 0.9201734251466462, "grad_norm": 0.0, - "learning_rate": 3.218573493942767e-07, - "loss": 0.734, + "learning_rate": 3.323603035609313e-07, + "loss": 0.8733, "step": 32472 }, { - "epoch": 0.9214812712826334, + "epoch": 0.9202017625889087, "grad_norm": 0.0, - "learning_rate": 3.216260899841639e-07, - "loss": 0.881, + "learning_rate": 3.321256925978267e-07, + "loss": 0.7831, "step": 32473 }, { - "epoch": 0.9215096481271282, + "epoch": 0.9202301000311712, "grad_norm": 0.0, - "learning_rate": 3.213949123277127e-07, - "loss": 0.7743, + "learning_rate": 3.318911630708155e-07, + "loss": 0.8351, "step": 32474 }, { - "epoch": 0.9215380249716232, + "epoch": 0.9202584374734336, "grad_norm": 0.0, - "learning_rate": 3.2116381642687154e-07, - "loss": 0.8839, + "learning_rate": 3.31656714981875e-07, + "loss": 0.8177, "step": 32475 }, { - "epoch": 0.921566401816118, + "epoch": 0.9202867749156961, "grad_norm": 0.0, - "learning_rate": 3.2093280228359447e-07, - "loss": 0.8234, + "learning_rate": 3.314223483329826e-07, + "loss": 0.8434, "step": 32476 }, { - "epoch": 0.9215947786606129, + "epoch": 0.9203151123579586, "grad_norm": 0.0, - "learning_rate": 3.207018698998343e-07, - "loss": 0.7424, + "learning_rate": 3.3118806312610775e-07, + "loss": 0.8306, "step": 32477 }, { - "epoch": 0.9216231555051079, + "epoch": 0.920343449800221, "grad_norm": 0.0, - "learning_rate": 3.2047101927753956e-07, - "loss": 0.7581, + "learning_rate": 3.309538593632267e-07, + "loss": 0.8088, "step": 32478 }, { - "epoch": 0.9216515323496027, + "epoch": 0.9203717872424835, "grad_norm": 0.0, - "learning_rate": 3.2024025041866194e-07, - "loss": 0.8369, + "learning_rate": 3.307197370463133e-07, + "loss": 0.7404, "step": 32479 }, { - "epoch": 0.9216799091940976, + "epoch": 0.920400124684746, "grad_norm": 0.0, - "learning_rate": 3.2000956332514987e-07, - "loss": 0.8006, + "learning_rate": 3.304856961773362e-07, + "loss": 0.7019, "step": 32480 }, { - "epoch": 0.9217082860385926, + "epoch": 0.9204284621270085, "grad_norm": 0.0, - "learning_rate": 3.1977895799895185e-07, - "loss": 0.7651, + "learning_rate": 3.3025173675826917e-07, + "loss": 0.7313, "step": 32481 }, { - "epoch": 0.9217366628830874, + "epoch": 0.9204567995692708, "grad_norm": 0.0, - "learning_rate": 3.195484344420163e-07, - "loss": 0.8428, + "learning_rate": 3.300178587910829e-07, + "loss": 0.7873, "step": 32482 }, { - "epoch": 0.9217650397275823, + "epoch": 0.9204851370115333, "grad_norm": 0.0, - "learning_rate": 3.193179926562917e-07, - "loss": 0.8262, + "learning_rate": 3.297840622777471e-07, + "loss": 0.7885, "step": 32483 }, { - "epoch": 0.9217934165720771, + "epoch": 0.9205134744537958, "grad_norm": 0.0, - "learning_rate": 3.1908763264372087e-07, - "loss": 0.8425, + "learning_rate": 3.2955034722023214e-07, + "loss": 0.7511, "step": 32484 }, { - "epoch": 0.9218217934165721, + "epoch": 0.9205418118960582, "grad_norm": 0.0, - "learning_rate": 3.1885735440625343e-07, - "loss": 0.7648, + "learning_rate": 3.2931671362050444e-07, + "loss": 0.7414, "step": 32485 }, { - "epoch": 0.921850170261067, + "epoch": 0.9205701493383207, "grad_norm": 0.0, - "learning_rate": 3.186271579458333e-07, - "loss": 0.8534, + "learning_rate": 3.2908316148053234e-07, + "loss": 0.8526, "step": 32486 }, { - "epoch": 0.9218785471055618, + "epoch": 0.9205984867805832, "grad_norm": 0.0, - "learning_rate": 3.1839704326440235e-07, - "loss": 0.8502, + "learning_rate": 3.2884969080228644e-07, + "loss": 0.8512, "step": 32487 }, { - "epoch": 0.9219069239500568, + "epoch": 0.9206268242228457, "grad_norm": 0.0, - "learning_rate": 3.1816701036390897e-07, - "loss": 0.8172, + "learning_rate": 3.286163015877286e-07, + "loss": 0.8786, "step": 32488 }, { - "epoch": 0.9219353007945517, + "epoch": 0.9206551616651081, "grad_norm": 0.0, - "learning_rate": 3.179370592462938e-07, - "loss": 0.8114, + "learning_rate": 3.283829938388294e-07, + "loss": 0.6368, "step": 32489 }, { - "epoch": 0.9219636776390465, + "epoch": 0.9206834991073706, "grad_norm": 0.0, - "learning_rate": 3.177071899134987e-07, - "loss": 0.8157, + "learning_rate": 3.281497675575496e-07, + "loss": 0.7971, "step": 32490 }, { - "epoch": 0.9219920544835414, + "epoch": 0.9207118365496331, "grad_norm": 0.0, - "learning_rate": 3.1747740236746873e-07, - "loss": 0.7885, + "learning_rate": 3.2791662274585636e-07, + "loss": 0.7344, "step": 32491 }, { - "epoch": 0.9220204313280363, + "epoch": 0.9207401739918954, "grad_norm": 0.0, - "learning_rate": 3.172476966101401e-07, - "loss": 0.8145, + "learning_rate": 3.276835594057137e-07, + "loss": 0.8235, "step": 32492 }, { - "epoch": 0.9220488081725312, + "epoch": 0.9207685114341579, "grad_norm": 0.0, - "learning_rate": 3.1701807264345574e-07, - "loss": 0.786, + "learning_rate": 3.274505775390835e-07, + "loss": 0.8223, "step": 32493 }, { - "epoch": 0.9220771850170261, + "epoch": 0.9207968488764204, "grad_norm": 0.0, - "learning_rate": 3.167885304693552e-07, - "loss": 0.737, + "learning_rate": 3.272176771479285e-07, + "loss": 0.753, "step": 32494 }, { - "epoch": 0.922105561861521, + "epoch": 0.9208251863186829, "grad_norm": 0.0, - "learning_rate": 3.1655907008977695e-07, - "loss": 0.8049, + "learning_rate": 3.2698485823421057e-07, + "loss": 0.8084, "step": 32495 }, { - "epoch": 0.9221339387060159, + "epoch": 0.9208535237609453, "grad_norm": 0.0, - "learning_rate": 3.163296915066605e-07, - "loss": 0.8347, + "learning_rate": 3.267521207998925e-07, + "loss": 0.7503, "step": 32496 }, { - "epoch": 0.9221623155505108, + "epoch": 0.9208818612032078, "grad_norm": 0.0, - "learning_rate": 3.161003947219421e-07, - "loss": 0.8065, + "learning_rate": 3.2651946484693274e-07, + "loss": 0.7682, "step": 32497 }, { - "epoch": 0.9221906923950057, + "epoch": 0.9209101986454703, "grad_norm": 0.0, - "learning_rate": 3.15871179737558e-07, - "loss": 0.7034, + "learning_rate": 3.26286890377292e-07, + "loss": 0.7825, "step": 32498 }, { - "epoch": 0.9222190692395006, + "epoch": 0.9209385360877327, "grad_norm": 0.0, - "learning_rate": 3.156420465554466e-07, - "loss": 0.7498, + "learning_rate": 3.2605439739292863e-07, + "loss": 0.8048, "step": 32499 }, { - "epoch": 0.9222474460839954, + "epoch": 0.9209668735299952, "grad_norm": 0.0, - "learning_rate": 3.1541299517754197e-07, - "loss": 0.732, + "learning_rate": 3.258219858958023e-07, + "loss": 0.7058, "step": 32500 }, { - "epoch": 0.9222758229284903, + "epoch": 0.9209952109722577, "grad_norm": 0.0, - "learning_rate": 3.151840256057781e-07, - "loss": 0.8347, + "learning_rate": 3.2558965588786905e-07, + "loss": 0.767, "step": 32501 }, { - "epoch": 0.9223041997729853, + "epoch": 0.92102354841452, "grad_norm": 0.0, - "learning_rate": 3.149551378420901e-07, - "loss": 0.8581, + "learning_rate": 3.253574073710875e-07, + "loss": 0.8904, "step": 32502 }, { - "epoch": 0.9223325766174801, + "epoch": 0.9210518858567825, "grad_norm": 0.0, - "learning_rate": 3.1472633188841304e-07, - "loss": 0.8392, + "learning_rate": 3.2512524034741146e-07, + "loss": 0.7674, "step": 32503 }, { - "epoch": 0.922360953461975, + "epoch": 0.921080223299045, "grad_norm": 0.0, - "learning_rate": 3.144976077466766e-07, - "loss": 0.7881, + "learning_rate": 3.248931548187995e-07, + "loss": 0.7751, "step": 32504 }, { - "epoch": 0.92238933030647, + "epoch": 0.9211085607413075, "grad_norm": 0.0, - "learning_rate": 3.142689654188158e-07, - "loss": 0.9224, + "learning_rate": 3.246611507872055e-07, + "loss": 0.8569, "step": 32505 }, { - "epoch": 0.9224177071509648, + "epoch": 0.9211368981835699, "grad_norm": 0.0, - "learning_rate": 3.1404040490675915e-07, - "loss": 0.7969, + "learning_rate": 3.2442922825458243e-07, + "loss": 0.778, "step": 32506 }, { - "epoch": 0.9224460839954597, + "epoch": 0.9211652356258324, "grad_norm": 0.0, - "learning_rate": 3.138119262124384e-07, - "loss": 0.8223, + "learning_rate": 3.241973872228843e-07, + "loss": 0.7897, "step": 32507 }, { - "epoch": 0.9224744608399545, + "epoch": 0.9211935730680949, "grad_norm": 0.0, - "learning_rate": 3.1358352933778533e-07, - "loss": 0.8589, + "learning_rate": 3.23965627694065e-07, + "loss": 0.8647, "step": 32508 }, { - "epoch": 0.9225028376844495, + "epoch": 0.9212219105103573, "grad_norm": 0.0, - "learning_rate": 3.1335521428472626e-07, - "loss": 0.9095, + "learning_rate": 3.2373394967007753e-07, + "loss": 0.8401, "step": 32509 }, { - "epoch": 0.9225312145289444, + "epoch": 0.9212502479526198, "grad_norm": 0.0, - "learning_rate": 3.1312698105519066e-07, - "loss": 0.7236, + "learning_rate": 3.2350235315287136e-07, + "loss": 0.7741, "step": 32510 }, { - "epoch": 0.9225595913734392, + "epoch": 0.9212785853948823, "grad_norm": 0.0, - "learning_rate": 3.128988296511093e-07, - "loss": 0.7422, + "learning_rate": 3.232708381443983e-07, + "loss": 0.9023, "step": 32511 }, { - "epoch": 0.9225879682179342, + "epoch": 0.9213069228371448, "grad_norm": 0.0, - "learning_rate": 3.12670760074405e-07, - "loss": 0.689, + "learning_rate": 3.230394046466079e-07, + "loss": 0.7873, "step": 32512 }, { - "epoch": 0.9226163450624291, + "epoch": 0.9213352602794072, "grad_norm": 0.0, - "learning_rate": 3.1244277232700624e-07, - "loss": 0.6897, + "learning_rate": 3.228080526614519e-07, + "loss": 0.7031, "step": 32513 }, { - "epoch": 0.9226447219069239, + "epoch": 0.9213635977216696, "grad_norm": 0.0, - "learning_rate": 3.122148664108393e-07, - "loss": 0.8716, + "learning_rate": 3.2257678219087543e-07, + "loss": 0.7792, "step": 32514 }, { - "epoch": 0.9226730987514189, + "epoch": 0.9213919351639321, "grad_norm": 0.0, - "learning_rate": 3.1198704232782927e-07, - "loss": 0.876, + "learning_rate": 3.22345593236828e-07, + "loss": 0.7449, "step": 32515 }, { - "epoch": 0.9227014755959138, + "epoch": 0.9214202726061945, "grad_norm": 0.0, - "learning_rate": 3.1175930007989907e-07, - "loss": 0.8367, + "learning_rate": 3.2211448580125927e-07, + "loss": 0.7073, "step": 32516 }, { - "epoch": 0.9227298524404086, + "epoch": 0.921448610048457, "grad_norm": 0.0, - "learning_rate": 3.115316396689738e-07, - "loss": 0.7967, + "learning_rate": 3.2188345988611205e-07, + "loss": 0.835, "step": 32517 }, { - "epoch": 0.9227582292849035, + "epoch": 0.9214769474907195, "grad_norm": 0.0, - "learning_rate": 3.1130406109697643e-07, - "loss": 0.7735, + "learning_rate": 3.2165251549333585e-07, + "loss": 0.7211, "step": 32518 }, { - "epoch": 0.9227866061293984, + "epoch": 0.9215052849329819, "grad_norm": 0.0, - "learning_rate": 3.110765643658298e-07, - "loss": 0.7872, + "learning_rate": 3.2142165262487366e-07, + "loss": 0.9277, "step": 32519 }, { - "epoch": 0.9228149829738933, + "epoch": 0.9215336223752444, "grad_norm": 0.0, - "learning_rate": 3.108491494774557e-07, - "loss": 0.7725, + "learning_rate": 3.211908712826706e-07, + "loss": 0.777, "step": 32520 }, { - "epoch": 0.9228433598183882, + "epoch": 0.9215619598175069, "grad_norm": 0.0, - "learning_rate": 3.1062181643377265e-07, - "loss": 0.7589, + "learning_rate": 3.2096017146867166e-07, + "loss": 0.7462, "step": 32521 }, { - "epoch": 0.9228717366628831, + "epoch": 0.9215902972597694, "grad_norm": 0.0, - "learning_rate": 3.103945652367035e-07, - "loss": 0.8203, + "learning_rate": 3.207295531848187e-07, + "loss": 0.8262, "step": 32522 }, { - "epoch": 0.922900113507378, + "epoch": 0.9216186347020318, "grad_norm": 0.0, - "learning_rate": 3.101673958881679e-07, - "loss": 0.8128, + "learning_rate": 3.2049901643305456e-07, + "loss": 0.7791, "step": 32523 }, { - "epoch": 0.9229284903518729, + "epoch": 0.9216469721442943, "grad_norm": 0.0, - "learning_rate": 3.0994030839008206e-07, - "loss": 0.8262, + "learning_rate": 3.2026856121532336e-07, + "loss": 0.9127, "step": 32524 }, { - "epoch": 0.9229568671963677, + "epoch": 0.9216753095865567, "grad_norm": 0.0, - "learning_rate": 3.097133027443666e-07, - "loss": 0.9047, + "learning_rate": 3.200381875335634e-07, + "loss": 0.8293, "step": 32525 }, { - "epoch": 0.9229852440408627, + "epoch": 0.9217036470288191, "grad_norm": 0.0, - "learning_rate": 3.094863789529401e-07, - "loss": 0.8331, + "learning_rate": 3.198078953897177e-07, + "loss": 0.8271, "step": 32526 }, { - "epoch": 0.9230136208853575, + "epoch": 0.9217319844710816, "grad_norm": 0.0, - "learning_rate": 3.092595370177165e-07, - "loss": 0.7158, + "learning_rate": 3.195776847857235e-07, + "loss": 0.8346, "step": 32527 }, { - "epoch": 0.9230419977298524, + "epoch": 0.9217603219133441, "grad_norm": 0.0, - "learning_rate": 3.0903277694061206e-07, - "loss": 0.8808, + "learning_rate": 3.193475557235215e-07, + "loss": 0.791, "step": 32528 }, { - "epoch": 0.9230703745743474, + "epoch": 0.9217886593556066, "grad_norm": 0.0, - "learning_rate": 3.088060987235464e-07, - "loss": 0.7447, + "learning_rate": 3.191175082050502e-07, + "loss": 0.847, "step": 32529 }, { - "epoch": 0.9230987514188422, + "epoch": 0.921816996797869, "grad_norm": 0.0, - "learning_rate": 3.08579502368429e-07, - "loss": 0.6674, + "learning_rate": 3.1888754223224574e-07, + "loss": 0.9144, "step": 32530 }, { - "epoch": 0.9231271282633371, + "epoch": 0.9218453342401315, "grad_norm": 0.0, - "learning_rate": 3.083529878771774e-07, - "loss": 0.7348, + "learning_rate": 3.186576578070488e-07, + "loss": 0.8291, "step": 32531 }, { - "epoch": 0.9231555051078321, + "epoch": 0.921873671682394, "grad_norm": 0.0, - "learning_rate": 3.081265552517021e-07, - "loss": 0.8008, + "learning_rate": 3.1842785493139126e-07, + "loss": 0.7172, "step": 32532 }, { - "epoch": 0.9231838819523269, + "epoch": 0.9219020091246564, "grad_norm": 0.0, - "learning_rate": 3.0790020449391724e-07, - "loss": 0.7175, + "learning_rate": 3.1819813360721154e-07, + "loss": 0.7975, "step": 32533 }, { - "epoch": 0.9232122587968218, + "epoch": 0.9219303465669189, "grad_norm": 0.0, - "learning_rate": 3.0767393560573675e-07, - "loss": 0.8488, + "learning_rate": 3.179684938364447e-07, + "loss": 0.797, "step": 32534 }, { - "epoch": 0.9232406356413166, + "epoch": 0.9219586840091814, "grad_norm": 0.0, - "learning_rate": 3.0744774858907033e-07, - "loss": 0.9014, + "learning_rate": 3.1773893562102363e-07, + "loss": 0.7173, "step": 32535 }, { - "epoch": 0.9232690124858116, + "epoch": 0.9219870214514438, "grad_norm": 0.0, - "learning_rate": 3.072216434458275e-07, - "loss": 0.8121, + "learning_rate": 3.1750945896288353e-07, + "loss": 0.8546, "step": 32536 }, { - "epoch": 0.9232973893303065, + "epoch": 0.9220153588937062, "grad_norm": 0.0, - "learning_rate": 3.0699562017792004e-07, - "loss": 0.7281, + "learning_rate": 3.1728006386395726e-07, + "loss": 0.7169, "step": 32537 }, { - "epoch": 0.9233257661748013, + "epoch": 0.9220436963359687, "grad_norm": 0.0, - "learning_rate": 3.067696787872554e-07, - "loss": 0.8247, + "learning_rate": 3.1705075032617663e-07, + "loss": 0.8332, "step": 32538 }, { - "epoch": 0.9233541430192963, + "epoch": 0.9220720337782312, "grad_norm": 0.0, - "learning_rate": 3.065438192757431e-07, - "loss": 0.8128, + "learning_rate": 3.1682151835147336e-07, + "loss": 0.7727, "step": 32539 }, { - "epoch": 0.9233825198637912, + "epoch": 0.9221003712204936, "grad_norm": 0.0, - "learning_rate": 3.0631804164529156e-07, - "loss": 0.6649, + "learning_rate": 3.1659236794177704e-07, + "loss": 0.8038, "step": 32540 }, { - "epoch": 0.923410896708286, + "epoch": 0.9221287086627561, "grad_norm": 0.0, - "learning_rate": 3.060923458978071e-07, - "loss": 0.767, + "learning_rate": 3.1636329909901954e-07, + "loss": 0.8465, "step": 32541 }, { - "epoch": 0.9234392735527809, + "epoch": 0.9221570461050186, "grad_norm": 0.0, - "learning_rate": 3.05866732035196e-07, - "loss": 0.879, + "learning_rate": 3.1613431182513035e-07, + "loss": 0.8502, "step": 32542 }, { - "epoch": 0.9234676503972759, + "epoch": 0.922185383547281, "grad_norm": 0.0, - "learning_rate": 3.056412000593656e-07, - "loss": 0.8319, + "learning_rate": 3.159054061220379e-07, + "loss": 0.7249, "step": 32543 }, { - "epoch": 0.9234960272417707, + "epoch": 0.9222137209895435, "grad_norm": 0.0, - "learning_rate": 3.0541574997221767e-07, - "loss": 0.918, + "learning_rate": 3.156765819916696e-07, + "loss": 0.7213, "step": 32544 }, { - "epoch": 0.9235244040862656, + "epoch": 0.922242058431806, "grad_norm": 0.0, - "learning_rate": 3.051903817756596e-07, - "loss": 0.8116, + "learning_rate": 3.15447839435955e-07, + "loss": 0.8403, "step": 32545 }, { - "epoch": 0.9235527809307605, + "epoch": 0.9222703958740684, "grad_norm": 0.0, - "learning_rate": 3.049650954715955e-07, - "loss": 0.8216, + "learning_rate": 3.152191784568182e-07, + "loss": 0.822, "step": 32546 }, { - "epoch": 0.9235811577752554, + "epoch": 0.9222987333163308, "grad_norm": 0.0, - "learning_rate": 3.0473989106192593e-07, - "loss": 0.888, + "learning_rate": 3.1499059905618634e-07, + "loss": 0.8828, "step": 32547 }, { - "epoch": 0.9236095346197503, + "epoch": 0.9223270707585933, "grad_norm": 0.0, - "learning_rate": 3.045147685485539e-07, - "loss": 0.8029, + "learning_rate": 3.147621012359847e-07, + "loss": 0.8292, "step": 32548 }, { - "epoch": 0.9236379114642451, + "epoch": 0.9223554082008558, "grad_norm": 0.0, - "learning_rate": 3.042897279333812e-07, - "loss": 0.7372, + "learning_rate": 3.145336849981395e-07, + "loss": 0.7277, "step": 32549 }, { - "epoch": 0.9236662883087401, + "epoch": 0.9223837456431182, "grad_norm": 0.0, - "learning_rate": 3.0406476921830964e-07, - "loss": 0.7448, + "learning_rate": 3.143053503445748e-07, + "loss": 0.7046, "step": 32550 }, { - "epoch": 0.923694665153235, + "epoch": 0.9224120830853807, "grad_norm": 0.0, - "learning_rate": 3.038398924052399e-07, - "loss": 0.8524, + "learning_rate": 3.1407709727721114e-07, + "loss": 0.8022, "step": 32551 }, { - "epoch": 0.9237230419977298, + "epoch": 0.9224404205276432, "grad_norm": 0.0, - "learning_rate": 3.036150974960694e-07, - "loss": 0.7723, + "learning_rate": 3.138489257979727e-07, + "loss": 0.731, "step": 32552 }, { - "epoch": 0.9237514188422248, + "epoch": 0.9224687579699057, "grad_norm": 0.0, - "learning_rate": 3.0339038449269777e-07, - "loss": 0.8472, + "learning_rate": 3.1362083590878333e-07, + "loss": 0.8185, "step": 32553 }, { - "epoch": 0.9237797956867196, + "epoch": 0.9224970954121681, "grad_norm": 0.0, - "learning_rate": 3.031657533970245e-07, - "loss": 0.7379, + "learning_rate": 3.1339282761156055e-07, + "loss": 0.7313, "step": 32554 }, { - "epoch": 0.9238081725312145, + "epoch": 0.9225254328544306, "grad_norm": 0.0, - "learning_rate": 3.029412042109447e-07, - "loss": 0.827, + "learning_rate": 3.1316490090822935e-07, + "loss": 0.8431, "step": 32555 }, { - "epoch": 0.9238365493757095, + "epoch": 0.922553770296693, "grad_norm": 0.0, - "learning_rate": 3.0271673693635704e-07, - "loss": 0.8123, + "learning_rate": 3.12937055800705e-07, + "loss": 0.7075, "step": 32556 }, { - "epoch": 0.9238649262202043, + "epoch": 0.9225821077389554, "grad_norm": 0.0, - "learning_rate": 3.0249235157515767e-07, - "loss": 0.8072, + "learning_rate": 3.127092922909103e-07, + "loss": 0.7832, "step": 32557 }, { - "epoch": 0.9238933030646992, + "epoch": 0.9226104451812179, "grad_norm": 0.0, - "learning_rate": 3.022680481292406e-07, - "loss": 0.8111, + "learning_rate": 3.124816103807626e-07, + "loss": 0.8046, "step": 32558 }, { - "epoch": 0.923921679909194, + "epoch": 0.9226387826234804, "grad_norm": 0.0, - "learning_rate": 3.02043826600501e-07, - "loss": 0.8141, + "learning_rate": 3.122540100721794e-07, + "loss": 0.8287, "step": 32559 }, { - "epoch": 0.923950056753689, + "epoch": 0.9226671200657429, "grad_norm": 0.0, - "learning_rate": 3.0181968699083517e-07, - "loss": 0.8371, + "learning_rate": 3.1202649136707787e-07, + "loss": 0.7947, "step": 32560 }, { - "epoch": 0.9239784335981839, + "epoch": 0.9226954575080053, "grad_norm": 0.0, - "learning_rate": 3.015956293021327e-07, - "loss": 0.7943, + "learning_rate": 3.1179905426737435e-07, + "loss": 0.7776, "step": 32561 }, { - "epoch": 0.9240068104426787, + "epoch": 0.9227237949502678, "grad_norm": 0.0, - "learning_rate": 3.013716535362876e-07, - "loss": 0.8351, + "learning_rate": 3.1157169877498506e-07, + "loss": 0.8599, "step": 32562 }, { - "epoch": 0.9240351872871737, + "epoch": 0.9227521323925303, "grad_norm": 0.0, - "learning_rate": 3.0114775969519175e-07, - "loss": 0.798, + "learning_rate": 3.1134442489182626e-07, + "loss": 0.805, "step": 32563 }, { - "epoch": 0.9240635641316686, + "epoch": 0.9227804698347927, "grad_norm": 0.0, - "learning_rate": 3.0092394778073795e-07, - "loss": 0.8185, + "learning_rate": 3.1111723261980976e-07, + "loss": 0.6976, "step": 32564 }, { - "epoch": 0.9240919409761634, + "epoch": 0.9228088072770552, "grad_norm": 0.0, - "learning_rate": 3.0070021779481594e-07, - "loss": 0.767, + "learning_rate": 3.108901219608518e-07, + "loss": 0.7107, "step": 32565 }, { - "epoch": 0.9241203178206583, + "epoch": 0.9228371447193177, "grad_norm": 0.0, - "learning_rate": 3.004765697393142e-07, - "loss": 0.7628, + "learning_rate": 3.1066309291686414e-07, + "loss": 0.8045, "step": 32566 }, { - "epoch": 0.9241486946651533, + "epoch": 0.92286548216158, "grad_norm": 0.0, - "learning_rate": 3.002530036161222e-07, - "loss": 0.7841, + "learning_rate": 3.1043614548975864e-07, + "loss": 0.7477, "step": 32567 }, { - "epoch": 0.9241770715096481, + "epoch": 0.9228938196038425, "grad_norm": 0.0, - "learning_rate": 3.000295194271308e-07, - "loss": 0.8154, + "learning_rate": 3.1020927968144934e-07, + "loss": 0.8407, "step": 32568 }, { - "epoch": 0.924205448354143, + "epoch": 0.922922157046105, "grad_norm": 0.0, - "learning_rate": 2.99806117174225e-07, - "loss": 0.8106, + "learning_rate": 3.099824954938435e-07, + "loss": 0.7429, "step": 32569 }, { - "epoch": 0.924233825198638, + "epoch": 0.9229504944883675, "grad_norm": 0.0, - "learning_rate": 2.995827968592935e-07, - "loss": 0.7979, + "learning_rate": 3.097557929288542e-07, + "loss": 0.7303, "step": 32570 }, { - "epoch": 0.9242622020431328, + "epoch": 0.9229788319306299, "grad_norm": 0.0, - "learning_rate": 2.9935955848422347e-07, - "loss": 0.7421, + "learning_rate": 3.0952917198839084e-07, + "loss": 0.7745, "step": 32571 }, { - "epoch": 0.9242905788876277, + "epoch": 0.9230071693728924, "grad_norm": 0.0, - "learning_rate": 2.991364020508969e-07, - "loss": 0.7301, + "learning_rate": 3.093026326743609e-07, + "loss": 0.8088, "step": 32572 }, { - "epoch": 0.9243189557321226, + "epoch": 0.9230355068151549, "grad_norm": 0.0, - "learning_rate": 2.989133275612022e-07, - "loss": 0.7732, + "learning_rate": 3.090761749886728e-07, + "loss": 0.8621, "step": 32573 }, { - "epoch": 0.9243473325766175, + "epoch": 0.9230638442574173, "grad_norm": 0.0, - "learning_rate": 2.986903350170234e-07, - "loss": 0.7442, + "learning_rate": 3.088497989332351e-07, + "loss": 0.894, "step": 32574 }, { - "epoch": 0.9243757094211124, + "epoch": 0.9230921816996798, "grad_norm": 0.0, - "learning_rate": 2.9846742442024235e-07, - "loss": 0.8099, + "learning_rate": 3.0862350450995393e-07, + "loss": 0.7971, "step": 32575 }, { - "epoch": 0.9244040862656072, + "epoch": 0.9231205191419423, "grad_norm": 0.0, - "learning_rate": 2.982445957727431e-07, - "loss": 0.9075, + "learning_rate": 3.0839729172073785e-07, + "loss": 0.8046, "step": 32576 }, { - "epoch": 0.9244324631101022, + "epoch": 0.9231488565842048, "grad_norm": 0.0, - "learning_rate": 2.9802184907640973e-07, - "loss": 0.7023, + "learning_rate": 3.0817116056748863e-07, + "loss": 0.9058, "step": 32577 }, { - "epoch": 0.924460839954597, + "epoch": 0.9231771940264671, "grad_norm": 0.0, - "learning_rate": 2.977991843331196e-07, - "loss": 0.7296, + "learning_rate": 3.0794511105211254e-07, + "loss": 0.7952, "step": 32578 }, { - "epoch": 0.9244892167990919, + "epoch": 0.9232055314687296, "grad_norm": 0.0, - "learning_rate": 2.9757660154475675e-07, - "loss": 0.8413, + "learning_rate": 3.0771914317651475e-07, + "loss": 0.8609, "step": 32579 }, { - "epoch": 0.9245175936435869, + "epoch": 0.9232338689109921, "grad_norm": 0.0, - "learning_rate": 2.973541007132008e-07, - "loss": 0.7931, + "learning_rate": 3.07493256942597e-07, + "loss": 0.8768, "step": 32580 }, { - "epoch": 0.9245459704880817, + "epoch": 0.9232622063532545, "grad_norm": 0.0, - "learning_rate": 2.9713168184033024e-07, - "loss": 0.7778, + "learning_rate": 3.072674523522623e-07, + "loss": 0.6907, "step": 32581 }, { - "epoch": 0.9245743473325766, + "epoch": 0.923290543795517, "grad_norm": 0.0, - "learning_rate": 2.969093449280258e-07, - "loss": 0.9114, + "learning_rate": 3.070417294074146e-07, + "loss": 0.8765, "step": 32582 }, { - "epoch": 0.9246027241770715, + "epoch": 0.9233188812377795, "grad_norm": 0.0, - "learning_rate": 2.9668708997816267e-07, - "loss": 0.7103, + "learning_rate": 3.0681608810995355e-07, + "loss": 0.6901, "step": 32583 }, { - "epoch": 0.9246311010215664, + "epoch": 0.923347218680042, "grad_norm": 0.0, - "learning_rate": 2.964649169926204e-07, - "loss": 0.8618, + "learning_rate": 3.065905284617798e-07, + "loss": 0.7662, "step": 32584 }, { - "epoch": 0.9246594778660613, + "epoch": 0.9233755561223044, "grad_norm": 0.0, - "learning_rate": 2.9624282597327637e-07, - "loss": 0.8651, + "learning_rate": 3.06365050464793e-07, + "loss": 0.8218, "step": 32585 }, { - "epoch": 0.9246878547105561, + "epoch": 0.9234038935645669, "grad_norm": 0.0, - "learning_rate": 2.960208169220047e-07, - "loss": 0.7784, + "learning_rate": 3.0613965412089387e-07, + "loss": 0.8234, "step": 32586 }, { - "epoch": 0.9247162315550511, + "epoch": 0.9234322310068294, "grad_norm": 0.0, - "learning_rate": 2.957988898406805e-07, - "loss": 0.7253, + "learning_rate": 3.059143394319786e-07, + "loss": 0.756, "step": 32587 }, { - "epoch": 0.924744608399546, + "epoch": 0.9234605684490917, "grad_norm": 0.0, - "learning_rate": 2.955770447311812e-07, - "loss": 0.7481, + "learning_rate": 3.0568910639994785e-07, + "loss": 0.7808, "step": 32588 }, { - "epoch": 0.9247729852440408, + "epoch": 0.9234889058913542, "grad_norm": 0.0, - "learning_rate": 2.953552815953775e-07, - "loss": 0.7747, + "learning_rate": 3.05463955026698e-07, + "loss": 0.8008, "step": 32589 }, { - "epoch": 0.9248013620885358, + "epoch": 0.9235172433336167, "grad_norm": 0.0, - "learning_rate": 2.951336004351435e-07, - "loss": 0.7783, + "learning_rate": 3.0523888531412527e-07, + "loss": 0.817, "step": 32590 }, { - "epoch": 0.9248297389330307, + "epoch": 0.9235455807758791, "grad_norm": 0.0, - "learning_rate": 2.949120012523543e-07, - "loss": 0.7723, + "learning_rate": 3.0501389726412367e-07, + "loss": 0.7382, "step": 32591 }, { - "epoch": 0.9248581157775255, + "epoch": 0.9235739182181416, "grad_norm": 0.0, - "learning_rate": 2.9469048404887736e-07, - "loss": 0.7845, + "learning_rate": 3.047889908785928e-07, + "loss": 0.9228, "step": 32592 }, { - "epoch": 0.9248864926220204, + "epoch": 0.9236022556604041, "grad_norm": 0.0, - "learning_rate": 2.944690488265878e-07, - "loss": 0.7968, + "learning_rate": 3.045641661594223e-07, + "loss": 0.7652, "step": 32593 }, { - "epoch": 0.9249148694665154, + "epoch": 0.9236305931026666, "grad_norm": 0.0, - "learning_rate": 2.94247695587353e-07, - "loss": 0.7343, + "learning_rate": 3.043394231085095e-07, + "loss": 0.8751, "step": 32594 }, { - "epoch": 0.9249432463110102, + "epoch": 0.923658930544929, "grad_norm": 0.0, - "learning_rate": 2.940264243330448e-07, - "loss": 0.7959, + "learning_rate": 3.0411476172774625e-07, + "loss": 0.7833, "step": 32595 }, { - "epoch": 0.9249716231555051, + "epoch": 0.9236872679871915, "grad_norm": 0.0, - "learning_rate": 2.938052350655329e-07, - "loss": 0.8812, + "learning_rate": 3.0389018201902434e-07, + "loss": 0.7904, "step": 32596 }, { - "epoch": 0.925, + "epoch": 0.923715605429454, "grad_norm": 0.0, - "learning_rate": 2.9358412778668354e-07, - "loss": 0.8395, + "learning_rate": 3.036656839842356e-07, + "loss": 0.8422, "step": 32597 }, { - "epoch": 0.9250283768444949, + "epoch": 0.9237439428717164, "grad_norm": 0.0, - "learning_rate": 2.933631024983652e-07, - "loss": 0.8517, + "learning_rate": 3.034412676252718e-07, + "loss": 0.9535, "step": 32598 }, { - "epoch": 0.9250567536889898, + "epoch": 0.9237722803139788, "grad_norm": 0.0, - "learning_rate": 2.9314215920244525e-07, - "loss": 0.9117, + "learning_rate": 3.0321693294402264e-07, + "loss": 0.7849, "step": 32599 }, { - "epoch": 0.9250851305334846, + "epoch": 0.9238006177562413, "grad_norm": 0.0, - "learning_rate": 2.9292129790079005e-07, - "loss": 0.7578, + "learning_rate": 3.029926799423777e-07, + "loss": 0.9123, "step": 32600 }, { - "epoch": 0.9251135073779796, + "epoch": 0.9238289551985038, "grad_norm": 0.0, - "learning_rate": 2.9270051859526473e-07, - "loss": 0.752, + "learning_rate": 3.0276850862222653e-07, + "loss": 0.7886, "step": 32601 }, { - "epoch": 0.9251418842224745, + "epoch": 0.9238572926407662, "grad_norm": 0.0, - "learning_rate": 2.924798212877356e-07, - "loss": 0.7635, + "learning_rate": 3.025444189854576e-07, + "loss": 0.8102, "step": 32602 }, { - "epoch": 0.9251702610669693, + "epoch": 0.9238856300830287, "grad_norm": 0.0, - "learning_rate": 2.922592059800644e-07, - "loss": 0.8456, + "learning_rate": 3.023204110339584e-07, + "loss": 0.8951, "step": 32603 }, { - "epoch": 0.9251986379114643, + "epoch": 0.9239139675252912, "grad_norm": 0.0, - "learning_rate": 2.9203867267411537e-07, - "loss": 0.869, + "learning_rate": 3.020964847696151e-07, + "loss": 0.7474, "step": 32604 }, { - "epoch": 0.9252270147559591, + "epoch": 0.9239423049675536, "grad_norm": 0.0, - "learning_rate": 2.9181822137175464e-07, - "loss": 0.9092, + "learning_rate": 3.0187264019431506e-07, + "loss": 0.7618, "step": 32605 }, { - "epoch": 0.925255391600454, + "epoch": 0.9239706424098161, "grad_norm": 0.0, - "learning_rate": 2.915978520748397e-07, - "loss": 0.7562, + "learning_rate": 3.016488773099424e-07, + "loss": 0.7918, "step": 32606 }, { - "epoch": 0.925283768444949, + "epoch": 0.9239989798520786, "grad_norm": 0.0, - "learning_rate": 2.913775647852346e-07, - "loss": 0.7698, + "learning_rate": 3.014251961183823e-07, + "loss": 0.7893, "step": 32607 }, { - "epoch": 0.9253121452894438, + "epoch": 0.9240273172943411, "grad_norm": 0.0, - "learning_rate": 2.9115735950480117e-07, - "loss": 0.8782, + "learning_rate": 3.0120159662152095e-07, + "loss": 0.7444, "step": 32608 }, { - "epoch": 0.9253405221339387, + "epoch": 0.9240556547366034, "grad_norm": 0.0, - "learning_rate": 2.909372362353946e-07, - "loss": 0.7333, + "learning_rate": 3.00978078821238e-07, + "loss": 0.6873, "step": 32609 }, { - "epoch": 0.9253688989784336, + "epoch": 0.9240839921788659, "grad_norm": 0.0, - "learning_rate": 2.9071719497888007e-07, - "loss": 0.8317, + "learning_rate": 3.007546427194186e-07, + "loss": 0.8311, "step": 32610 }, { - "epoch": 0.9253972758229285, + "epoch": 0.9241123296211284, "grad_norm": 0.0, - "learning_rate": 2.9049723573711384e-07, - "loss": 0.8407, + "learning_rate": 3.0053128831794564e-07, + "loss": 0.7163, "step": 32611 }, { - "epoch": 0.9254256526674234, + "epoch": 0.9241406670633908, "grad_norm": 0.0, - "learning_rate": 2.9027735851195336e-07, - "loss": 0.7103, + "learning_rate": 3.003080156186988e-07, + "loss": 0.6873, "step": 32612 }, { - "epoch": 0.9254540295119182, + "epoch": 0.9241690045056533, "grad_norm": 0.0, - "learning_rate": 2.900575633052571e-07, - "loss": 0.7569, + "learning_rate": 3.0008482462355993e-07, + "loss": 0.7295, "step": 32613 }, { - "epoch": 0.9254824063564132, + "epoch": 0.9241973419479158, "grad_norm": 0.0, - "learning_rate": 2.898378501188803e-07, - "loss": 0.7405, + "learning_rate": 2.9986171533440746e-07, + "loss": 0.838, "step": 32614 }, { - "epoch": 0.9255107832009081, + "epoch": 0.9242256793901782, "grad_norm": 0.0, - "learning_rate": 2.896182189546803e-07, - "loss": 0.8339, + "learning_rate": 2.996386877531221e-07, + "loss": 0.7386, "step": 32615 }, { - "epoch": 0.9255391600454029, + "epoch": 0.9242540168324407, "grad_norm": 0.0, - "learning_rate": 2.8939866981451236e-07, - "loss": 0.8321, + "learning_rate": 2.994157418815835e-07, + "loss": 0.7917, "step": 32616 }, { - "epoch": 0.9255675368898978, + "epoch": 0.9242823542747032, "grad_norm": 0.0, - "learning_rate": 2.8917920270022934e-07, - "loss": 0.6844, + "learning_rate": 2.991928777216668e-07, + "loss": 0.8017, "step": 32617 }, { - "epoch": 0.9255959137343928, + "epoch": 0.9243106917169657, "grad_norm": 0.0, - "learning_rate": 2.8895981761368653e-07, - "loss": 0.8602, + "learning_rate": 2.989700952752528e-07, + "loss": 0.7371, "step": 32618 }, { - "epoch": 0.9256242905788876, + "epoch": 0.924339029159228, "grad_norm": 0.0, - "learning_rate": 2.887405145567368e-07, - "loss": 0.8516, + "learning_rate": 2.987473945442143e-07, + "loss": 0.9048, "step": 32619 }, { - "epoch": 0.9256526674233825, + "epoch": 0.9243673666014905, "grad_norm": 0.0, - "learning_rate": 2.8852129353123204e-07, - "loss": 0.8104, + "learning_rate": 2.985247755304288e-07, + "loss": 0.8271, "step": 32620 }, { - "epoch": 0.9256810442678775, + "epoch": 0.924395704043753, "grad_norm": 0.0, - "learning_rate": 2.883021545390241e-07, - "loss": 0.8174, + "learning_rate": 2.9830223823577367e-07, + "loss": 0.8829, "step": 32621 }, { - "epoch": 0.9257094211123723, + "epoch": 0.9244240414860154, "grad_norm": 0.0, - "learning_rate": 2.8808309758196593e-07, - "loss": 0.8378, + "learning_rate": 2.9807978266211955e-07, + "loss": 0.8183, "step": 32622 }, { - "epoch": 0.9257377979568672, + "epoch": 0.9244523789282779, "grad_norm": 0.0, - "learning_rate": 2.87864122661905e-07, - "loss": 0.7373, + "learning_rate": 2.9785740881134175e-07, + "loss": 0.7866, "step": 32623 }, { - "epoch": 0.9257661748013621, + "epoch": 0.9244807163705404, "grad_norm": 0.0, - "learning_rate": 2.8764522978069197e-07, - "loss": 0.8183, + "learning_rate": 2.9763511668531644e-07, + "loss": 0.8816, "step": 32624 }, { - "epoch": 0.925794551645857, + "epoch": 0.9245090538128029, "grad_norm": 0.0, - "learning_rate": 2.874264189401776e-07, - "loss": 0.8076, + "learning_rate": 2.97412906285911e-07, + "loss": 0.8442, "step": 32625 }, { - "epoch": 0.9258229284903519, + "epoch": 0.9245373912550653, "grad_norm": 0.0, - "learning_rate": 2.872076901422083e-07, - "loss": 0.7662, + "learning_rate": 2.971907776149996e-07, + "loss": 0.8028, "step": 32626 }, { - "epoch": 0.9258513053348467, + "epoch": 0.9245657286973278, "grad_norm": 0.0, - "learning_rate": 2.8698904338863134e-07, - "loss": 0.7355, + "learning_rate": 2.96968730674454e-07, + "loss": 0.8173, "step": 32627 }, { - "epoch": 0.9258796821793417, + "epoch": 0.9245940661395903, "grad_norm": 0.0, - "learning_rate": 2.8677047868129635e-07, - "loss": 0.6918, + "learning_rate": 2.967467654661438e-07, + "loss": 0.7439, "step": 32628 }, { - "epoch": 0.9259080590238365, + "epoch": 0.9246224035818527, "grad_norm": 0.0, - "learning_rate": 2.8655199602204755e-07, - "loss": 0.8654, + "learning_rate": 2.965248819919397e-07, + "loss": 0.8861, "step": 32629 }, { - "epoch": 0.9259364358683314, + "epoch": 0.9246507410241152, "grad_norm": 0.0, - "learning_rate": 2.8633359541273e-07, - "loss": 0.7259, + "learning_rate": 2.9630308025370815e-07, + "loss": 0.7851, "step": 32630 }, { - "epoch": 0.9259648127128264, + "epoch": 0.9246790784663776, "grad_norm": 0.0, - "learning_rate": 2.8611527685519e-07, - "loss": 0.7733, + "learning_rate": 2.960813602533197e-07, + "loss": 0.7562, "step": 32631 }, { - "epoch": 0.9259931895573212, + "epoch": 0.9247074159086401, "grad_norm": 0.0, - "learning_rate": 2.8589704035126953e-07, - "loss": 0.8652, + "learning_rate": 2.958597219926429e-07, + "loss": 0.8367, "step": 32632 }, { - "epoch": 0.9260215664018161, + "epoch": 0.9247357533509025, "grad_norm": 0.0, - "learning_rate": 2.8567888590281476e-07, - "loss": 0.8633, + "learning_rate": 2.9563816547354185e-07, + "loss": 0.783, "step": 32633 }, { - "epoch": 0.926049943246311, + "epoch": 0.924764090793165, "grad_norm": 0.0, - "learning_rate": 2.8546081351166656e-07, - "loss": 0.6837, + "learning_rate": 2.9541669069788505e-07, + "loss": 0.8158, "step": 32634 }, { - "epoch": 0.9260783200908059, + "epoch": 0.9247924282354275, "grad_norm": 0.0, - "learning_rate": 2.8524282317966776e-07, - "loss": 0.8886, + "learning_rate": 2.9519529766753654e-07, + "loss": 0.7806, "step": 32635 }, { - "epoch": 0.9261066969353008, + "epoch": 0.9248207656776899, "grad_norm": 0.0, - "learning_rate": 2.8502491490865923e-07, - "loss": 0.8162, + "learning_rate": 2.949739863843615e-07, + "loss": 0.7865, "step": 32636 }, { - "epoch": 0.9261350737797956, + "epoch": 0.9248491031199524, "grad_norm": 0.0, - "learning_rate": 2.848070887004828e-07, - "loss": 0.912, + "learning_rate": 2.947527568502251e-07, + "loss": 0.7646, "step": 32637 }, { - "epoch": 0.9261634506242906, + "epoch": 0.9248774405622149, "grad_norm": 0.0, - "learning_rate": 2.8458934455697693e-07, - "loss": 0.7961, + "learning_rate": 2.945316090669892e-07, + "loss": 0.7826, "step": 32638 }, { - "epoch": 0.9261918274687855, + "epoch": 0.9249057780044773, "grad_norm": 0.0, - "learning_rate": 2.8437168247998245e-07, - "loss": 0.78, + "learning_rate": 2.9431054303651787e-07, + "loss": 0.7982, "step": 32639 }, { - "epoch": 0.9262202043132803, + "epoch": 0.9249341154467398, "grad_norm": 0.0, - "learning_rate": 2.8415410247133457e-07, - "loss": 0.8292, + "learning_rate": 2.940895587606729e-07, + "loss": 0.862, "step": 32640 }, { - "epoch": 0.9262485811577753, + "epoch": 0.9249624528890023, "grad_norm": 0.0, - "learning_rate": 2.839366045328751e-07, - "loss": 0.831, + "learning_rate": 2.938686562413162e-07, + "loss": 0.7624, "step": 32641 }, { - "epoch": 0.9262769580022702, + "epoch": 0.9249907903312647, "grad_norm": 0.0, - "learning_rate": 2.837191886664414e-07, - "loss": 0.7173, + "learning_rate": 2.936478354803085e-07, + "loss": 0.8958, "step": 32642 }, { - "epoch": 0.926305334846765, + "epoch": 0.9250191277735271, "grad_norm": 0.0, - "learning_rate": 2.835018548738666e-07, - "loss": 0.8395, + "learning_rate": 2.934270964795094e-07, + "loss": 0.7908, "step": 32643 }, { - "epoch": 0.9263337116912599, + "epoch": 0.9250474652157896, "grad_norm": 0.0, - "learning_rate": 2.8328460315698914e-07, - "loss": 0.8272, + "learning_rate": 2.932064392407774e-07, + "loss": 0.858, "step": 32644 }, { - "epoch": 0.9263620885357549, + "epoch": 0.9250758026580521, "grad_norm": 0.0, - "learning_rate": 2.830674335176442e-07, - "loss": 0.8817, + "learning_rate": 2.929858637659733e-07, + "loss": 0.8301, "step": 32645 }, { - "epoch": 0.9263904653802497, + "epoch": 0.9251041401003145, "grad_norm": 0.0, - "learning_rate": 2.828503459576648e-07, - "loss": 0.7839, + "learning_rate": 2.9276537005695215e-07, + "loss": 0.6989, "step": 32646 }, { - "epoch": 0.9264188422247446, + "epoch": 0.925132477542577, "grad_norm": 0.0, - "learning_rate": 2.8263334047888504e-07, - "loss": 0.8367, + "learning_rate": 2.925449581155748e-07, + "loss": 0.6797, "step": 32647 }, { - "epoch": 0.9264472190692395, + "epoch": 0.9251608149848395, "grad_norm": 0.0, - "learning_rate": 2.824164170831389e-07, - "loss": 0.7653, + "learning_rate": 2.923246279436953e-07, + "loss": 0.8265, "step": 32648 }, { - "epoch": 0.9264755959137344, + "epoch": 0.925189152427102, "grad_norm": 0.0, - "learning_rate": 2.821995757722573e-07, - "loss": 0.8691, + "learning_rate": 2.9210437954316997e-07, + "loss": 0.7828, "step": 32649 }, { - "epoch": 0.9265039727582293, + "epoch": 0.9252174898693644, "grad_norm": 0.0, - "learning_rate": 2.8198281654807313e-07, - "loss": 0.8836, + "learning_rate": 2.91884212915855e-07, + "loss": 0.8454, "step": 32650 }, { - "epoch": 0.9265323496027241, + "epoch": 0.9252458273116269, "grad_norm": 0.0, - "learning_rate": 2.8176613941241826e-07, - "loss": 0.8416, + "learning_rate": 2.9166412806360455e-07, + "loss": 0.8017, "step": 32651 }, { - "epoch": 0.9265607264472191, + "epoch": 0.9252741647538893, "grad_norm": 0.0, - "learning_rate": 2.8154954436712014e-07, - "loss": 0.8264, + "learning_rate": 2.9144412498827156e-07, + "loss": 0.7603, "step": 32652 }, { - "epoch": 0.926589103291714, + "epoch": 0.9253025021961517, "grad_norm": 0.0, - "learning_rate": 2.8133303141400946e-07, - "loss": 0.7162, + "learning_rate": 2.91224203691709e-07, + "loss": 0.8512, "step": 32653 }, { - "epoch": 0.9266174801362088, + "epoch": 0.9253308396384142, "grad_norm": 0.0, - "learning_rate": 2.811166005549171e-07, - "loss": 0.7685, + "learning_rate": 2.9100436417577093e-07, + "loss": 0.9108, "step": 32654 }, { - "epoch": 0.9266458569807038, + "epoch": 0.9253591770806767, "grad_norm": 0.0, - "learning_rate": 2.809002517916681e-07, - "loss": 0.7775, + "learning_rate": 2.9078460644231033e-07, + "loss": 0.881, "step": 32655 }, { - "epoch": 0.9266742338251986, + "epoch": 0.9253875145229392, "grad_norm": 0.0, - "learning_rate": 2.806839851260923e-07, - "loss": 0.8608, + "learning_rate": 2.905649304931746e-07, + "loss": 0.8502, "step": 32656 }, { - "epoch": 0.9267026106696935, + "epoch": 0.9254158519652016, "grad_norm": 0.0, - "learning_rate": 2.8046780056001587e-07, - "loss": 0.7071, + "learning_rate": 2.9034533633021555e-07, + "loss": 0.8126, "step": 32657 }, { - "epoch": 0.9267309875141885, + "epoch": 0.9254441894074641, "grad_norm": 0.0, - "learning_rate": 2.802516980952652e-07, - "loss": 0.8698, + "learning_rate": 2.901258239552851e-07, + "loss": 0.7618, "step": 32658 }, { - "epoch": 0.9267593643586833, + "epoch": 0.9254725268497266, "grad_norm": 0.0, - "learning_rate": 2.800356777336655e-07, - "loss": 0.7666, + "learning_rate": 2.8990639337022844e-07, + "loss": 0.7634, "step": 32659 }, { - "epoch": 0.9267877412031782, + "epoch": 0.925500864291989, "grad_norm": 0.0, - "learning_rate": 2.798197394770408e-07, - "loss": 0.7099, + "learning_rate": 2.896870445768973e-07, + "loss": 0.7701, "step": 32660 }, { - "epoch": 0.926816118047673, + "epoch": 0.9255292017342515, "grad_norm": 0.0, - "learning_rate": 2.796038833272152e-07, - "loss": 0.7816, + "learning_rate": 2.894677775771382e-07, + "loss": 0.725, "step": 32661 }, { - "epoch": 0.926844494892168, + "epoch": 0.925557539176514, "grad_norm": 0.0, - "learning_rate": 2.79388109286014e-07, - "loss": 0.8253, + "learning_rate": 2.8924859237279725e-07, + "loss": 0.8003, "step": 32662 }, { - "epoch": 0.9268728717366629, + "epoch": 0.9255858766187763, "grad_norm": 0.0, - "learning_rate": 2.791724173552568e-07, - "loss": 0.8337, + "learning_rate": 2.8902948896572194e-07, + "loss": 0.8316, "step": 32663 }, { - "epoch": 0.9269012485811577, + "epoch": 0.9256142140610388, "grad_norm": 0.0, - "learning_rate": 2.789568075367677e-07, - "loss": 0.7489, + "learning_rate": 2.888104673577574e-07, + "loss": 0.8494, "step": 32664 }, { - "epoch": 0.9269296254256527, + "epoch": 0.9256425515033013, "grad_norm": 0.0, - "learning_rate": 2.7874127983236853e-07, - "loss": 0.8474, + "learning_rate": 2.885915275507467e-07, + "loss": 0.7994, "step": 32665 }, { - "epoch": 0.9269580022701476, + "epoch": 0.9256708889455638, "grad_norm": 0.0, - "learning_rate": 2.785258342438779e-07, - "loss": 0.7237, + "learning_rate": 2.883726695465372e-07, + "loss": 0.802, "step": 32666 }, { - "epoch": 0.9269863791146424, + "epoch": 0.9256992263878262, "grad_norm": 0.0, - "learning_rate": 2.783104707731166e-07, - "loss": 0.7724, + "learning_rate": 2.881538933469707e-07, + "loss": 0.7909, "step": 32667 }, { - "epoch": 0.9270147559591373, + "epoch": 0.9257275638300887, "grad_norm": 0.0, - "learning_rate": 2.7809518942190415e-07, - "loss": 0.8243, + "learning_rate": 2.879351989538914e-07, + "loss": 0.8327, "step": 32668 }, { - "epoch": 0.9270431328036323, + "epoch": 0.9257559012723512, "grad_norm": 0.0, - "learning_rate": 2.778799901920581e-07, - "loss": 0.7503, + "learning_rate": 2.877165863691389e-07, + "loss": 0.7376, "step": 32669 }, { - "epoch": 0.9270715096481271, + "epoch": 0.9257842387146136, "grad_norm": 0.0, - "learning_rate": 2.77664873085397e-07, - "loss": 0.7932, + "learning_rate": 2.8749805559455724e-07, + "loss": 0.7721, "step": 32670 }, { - "epoch": 0.927099886492622, + "epoch": 0.9258125761568761, "grad_norm": 0.0, - "learning_rate": 2.7744983810373716e-07, - "loss": 0.7433, + "learning_rate": 2.8727960663198607e-07, + "loss": 0.7846, "step": 32671 }, { - "epoch": 0.927128263337117, + "epoch": 0.9258409135991386, "grad_norm": 0.0, - "learning_rate": 2.7723488524889596e-07, - "loss": 0.7442, + "learning_rate": 2.8706123948326504e-07, + "loss": 0.779, "step": 32672 }, { - "epoch": 0.9271566401816118, + "epoch": 0.925869251041401, "grad_norm": 0.0, - "learning_rate": 2.7702001452268976e-07, - "loss": 0.7287, + "learning_rate": 2.868429541502338e-07, + "loss": 0.8481, "step": 32673 }, { - "epoch": 0.9271850170261067, + "epoch": 0.9258975884836634, "grad_norm": 0.0, - "learning_rate": 2.768052259269316e-07, - "loss": 0.8224, + "learning_rate": 2.8662475063473195e-07, + "loss": 0.8347, "step": 32674 }, { - "epoch": 0.9272133938706016, + "epoch": 0.9259259259259259, "grad_norm": 0.0, - "learning_rate": 2.765905194634366e-07, - "loss": 0.9004, + "learning_rate": 2.864066289385969e-07, + "loss": 0.735, "step": 32675 }, { - "epoch": 0.9272417707150965, + "epoch": 0.9259542633681884, "grad_norm": 0.0, - "learning_rate": 2.7637589513401896e-07, - "loss": 0.7845, + "learning_rate": 2.861885890636662e-07, + "loss": 0.7995, "step": 32676 }, { - "epoch": 0.9272701475595914, + "epoch": 0.9259826008104508, "grad_norm": 0.0, - "learning_rate": 2.7616135294049164e-07, - "loss": 0.7213, + "learning_rate": 2.859706310117749e-07, + "loss": 0.852, "step": 32677 }, { - "epoch": 0.9272985244040862, + "epoch": 0.9260109382527133, "grad_norm": 0.0, - "learning_rate": 2.759468928846654e-07, - "loss": 0.7766, + "learning_rate": 2.8575275478476047e-07, + "loss": 0.8901, "step": 32678 }, { - "epoch": 0.9273269012485812, + "epoch": 0.9260392756949758, "grad_norm": 0.0, - "learning_rate": 2.757325149683543e-07, - "loss": 0.7572, + "learning_rate": 2.8553496038445707e-07, + "loss": 0.8027, "step": 32679 }, { - "epoch": 0.927355278093076, + "epoch": 0.9260676131372383, "grad_norm": 0.0, - "learning_rate": 2.7551821919336695e-07, - "loss": 0.8024, + "learning_rate": 2.853172478127009e-07, + "loss": 0.767, "step": 32680 }, { - "epoch": 0.9273836549375709, + "epoch": 0.9260959505795007, "grad_norm": 0.0, - "learning_rate": 2.7530400556151413e-07, - "loss": 0.8255, + "learning_rate": 2.8509961707132496e-07, + "loss": 0.8307, "step": 32681 }, { - "epoch": 0.9274120317820659, + "epoch": 0.9261242880217632, "grad_norm": 0.0, - "learning_rate": 2.7508987407460664e-07, - "loss": 0.816, + "learning_rate": 2.848820681621633e-07, + "loss": 0.7855, "step": 32682 }, { - "epoch": 0.9274404086265607, + "epoch": 0.9261526254640257, "grad_norm": 0.0, - "learning_rate": 2.7487582473445076e-07, - "loss": 0.8224, + "learning_rate": 2.8466460108704685e-07, + "loss": 0.7805, "step": 32683 }, { - "epoch": 0.9274687854710556, + "epoch": 0.926180962906288, "grad_norm": 0.0, - "learning_rate": 2.7466185754285723e-07, - "loss": 0.8549, + "learning_rate": 2.8444721584780956e-07, + "loss": 0.8329, "step": 32684 }, { - "epoch": 0.9274971623155505, + "epoch": 0.9262093003485505, "grad_norm": 0.0, - "learning_rate": 2.7444797250163136e-07, - "loss": 0.8221, + "learning_rate": 2.8422991244627995e-07, + "loss": 0.8333, "step": 32685 }, { - "epoch": 0.9275255391600454, + "epoch": 0.926237637790813, "grad_norm": 0.0, - "learning_rate": 2.7423416961257944e-07, - "loss": 0.8583, + "learning_rate": 2.840126908842888e-07, + "loss": 0.7608, "step": 32686 }, { - "epoch": 0.9275539160045403, + "epoch": 0.9262659752330754, "grad_norm": 0.0, - "learning_rate": 2.740204488775111e-07, - "loss": 0.8354, + "learning_rate": 2.837955511636681e-07, + "loss": 0.9116, "step": 32687 }, { - "epoch": 0.9275822928490352, + "epoch": 0.9262943126753379, "grad_norm": 0.0, - "learning_rate": 2.738068102982283e-07, - "loss": 0.7885, + "learning_rate": 2.835784932862451e-07, + "loss": 0.8137, "step": 32688 }, { - "epoch": 0.9276106696935301, + "epoch": 0.9263226501176004, "grad_norm": 0.0, - "learning_rate": 2.735932538765362e-07, - "loss": 0.8675, + "learning_rate": 2.833615172538473e-07, + "loss": 0.8542, "step": 32689 }, { - "epoch": 0.927639046538025, + "epoch": 0.9263509875598629, "grad_norm": 0.0, - "learning_rate": 2.733797796142401e-07, - "loss": 0.8766, + "learning_rate": 2.831446230683055e-07, + "loss": 0.8433, "step": 32690 }, { - "epoch": 0.9276674233825198, + "epoch": 0.9263793250021253, "grad_norm": 0.0, - "learning_rate": 2.7316638751314074e-07, - "loss": 1.0179, + "learning_rate": 2.829278107314437e-07, + "loss": 0.7995, "step": 32691 }, { - "epoch": 0.9276958002270148, + "epoch": 0.9264076624443878, "grad_norm": 0.0, - "learning_rate": 2.729530775750433e-07, - "loss": 0.7942, + "learning_rate": 2.8271108024508943e-07, + "loss": 0.7984, "step": 32692 }, { - "epoch": 0.9277241770715097, + "epoch": 0.9264359998866503, "grad_norm": 0.0, - "learning_rate": 2.7273984980174863e-07, - "loss": 0.807, + "learning_rate": 2.824944316110678e-07, + "loss": 0.8424, "step": 32693 }, { - "epoch": 0.9277525539160045, + "epoch": 0.9264643373289126, "grad_norm": 0.0, - "learning_rate": 2.725267041950574e-07, - "loss": 0.7942, + "learning_rate": 2.8227786483120523e-07, + "loss": 0.7901, "step": 32694 }, { - "epoch": 0.9277809307604994, + "epoch": 0.9264926747711751, "grad_norm": 0.0, - "learning_rate": 2.7231364075676947e-07, - "loss": 0.8559, + "learning_rate": 2.8206137990732465e-07, + "loss": 0.7536, "step": 32695 }, { - "epoch": 0.9278093076049944, + "epoch": 0.9265210122134376, "grad_norm": 0.0, - "learning_rate": 2.721006594886877e-07, - "loss": 0.881, + "learning_rate": 2.81844976841249e-07, + "loss": 0.7865, "step": 32696 }, { - "epoch": 0.9278376844494892, + "epoch": 0.9265493496557001, "grad_norm": 0.0, - "learning_rate": 2.7188776039260735e-07, - "loss": 0.829, + "learning_rate": 2.8162865563480244e-07, + "loss": 0.8919, "step": 32697 }, { - "epoch": 0.9278660612939841, + "epoch": 0.9265776870979625, "grad_norm": 0.0, - "learning_rate": 2.716749434703281e-07, - "loss": 0.8845, + "learning_rate": 2.8141241628980576e-07, + "loss": 0.7468, "step": 32698 }, { - "epoch": 0.927894438138479, + "epoch": 0.926606024540225, "grad_norm": 0.0, - "learning_rate": 2.714622087236485e-07, - "loss": 0.8506, + "learning_rate": 2.8119625880808185e-07, + "loss": 0.6884, "step": 32699 }, { - "epoch": 0.9279228149829739, + "epoch": 0.9266343619824875, "grad_norm": 0.0, - "learning_rate": 2.712495561543649e-07, - "loss": 0.7802, + "learning_rate": 2.8098018319145157e-07, + "loss": 0.8085, "step": 32700 }, { - "epoch": 0.9279511918274688, + "epoch": 0.9266626994247499, "grad_norm": 0.0, - "learning_rate": 2.7103698576427364e-07, - "loss": 0.7894, + "learning_rate": 2.807641894417323e-07, + "loss": 0.8535, "step": 32701 }, { - "epoch": 0.9279795686719636, + "epoch": 0.9266910368670124, "grad_norm": 0.0, - "learning_rate": 2.7082449755516995e-07, - "loss": 0.8023, + "learning_rate": 2.805482775607471e-07, + "loss": 0.7146, "step": 32702 }, { - "epoch": 0.9280079455164586, + "epoch": 0.9267193743092749, "grad_norm": 0.0, - "learning_rate": 2.70612091528849e-07, - "loss": 0.7454, + "learning_rate": 2.8033244755031106e-07, + "loss": 0.8652, "step": 32703 }, { - "epoch": 0.9280363223609535, + "epoch": 0.9267477117515373, "grad_norm": 0.0, - "learning_rate": 2.703997676871062e-07, - "loss": 0.8301, + "learning_rate": 2.8011669941224616e-07, + "loss": 0.9113, "step": 32704 }, { - "epoch": 0.9280646992054483, + "epoch": 0.9267760491937997, "grad_norm": 0.0, - "learning_rate": 2.701875260317333e-07, - "loss": 0.8313, + "learning_rate": 2.7990103314836756e-07, + "loss": 0.869, "step": 32705 }, { - "epoch": 0.9280930760499433, + "epoch": 0.9268043866360622, "grad_norm": 0.0, - "learning_rate": 2.699753665645233e-07, - "loss": 0.7492, + "learning_rate": 2.796854487604905e-07, + "loss": 0.8426, "step": 32706 }, { - "epoch": 0.9281214528944381, + "epoch": 0.9268327240783247, "grad_norm": 0.0, - "learning_rate": 2.6976328928726923e-07, - "loss": 0.851, + "learning_rate": 2.7946994625043357e-07, + "loss": 0.7653, "step": 32707 }, { - "epoch": 0.928149829738933, + "epoch": 0.9268610615205871, "grad_norm": 0.0, - "learning_rate": 2.6955129420176193e-07, - "loss": 0.7371, + "learning_rate": 2.7925452562001077e-07, + "loss": 0.7117, "step": 32708 }, { - "epoch": 0.928178206583428, + "epoch": 0.9268893989628496, "grad_norm": 0.0, - "learning_rate": 2.693393813097922e-07, - "loss": 0.8242, + "learning_rate": 2.790391868710374e-07, + "loss": 0.8414, "step": 32709 }, { - "epoch": 0.9282065834279228, + "epoch": 0.9269177364051121, "grad_norm": 0.0, - "learning_rate": 2.691275506131508e-07, - "loss": 0.7856, + "learning_rate": 2.7882393000532526e-07, + "loss": 0.7463, "step": 32710 }, { - "epoch": 0.9282349602724177, + "epoch": 0.9269460738473745, "grad_norm": 0.0, - "learning_rate": 2.689158021136251e-07, - "loss": 0.745, + "learning_rate": 2.7860875502469076e-07, + "loss": 0.9142, "step": 32711 }, { - "epoch": 0.9282633371169126, + "epoch": 0.926974411289637, "grad_norm": 0.0, - "learning_rate": 2.6870413581300603e-07, - "loss": 0.8713, + "learning_rate": 2.783936619309435e-07, + "loss": 0.8074, "step": 32712 }, { - "epoch": 0.9282917139614075, + "epoch": 0.9270027487318995, "grad_norm": 0.0, - "learning_rate": 2.6849255171308097e-07, - "loss": 0.7955, + "learning_rate": 2.7817865072589765e-07, + "loss": 0.8158, "step": 32713 }, { - "epoch": 0.9283200908059024, + "epoch": 0.927031086174162, "grad_norm": 0.0, - "learning_rate": 2.682810498156363e-07, - "loss": 0.8867, + "learning_rate": 2.7796372141136174e-07, + "loss": 0.812, "step": 32714 }, { - "epoch": 0.9283484676503972, + "epoch": 0.9270594236164243, "grad_norm": 0.0, - "learning_rate": 2.6806963012245833e-07, - "loss": 0.7998, + "learning_rate": 2.777488739891476e-07, + "loss": 0.816, "step": 32715 }, { - "epoch": 0.9283768444948922, + "epoch": 0.9270877610586868, "grad_norm": 0.0, - "learning_rate": 2.678582926353357e-07, - "loss": 0.8315, + "learning_rate": 2.7753410846106496e-07, + "loss": 0.8241, "step": 32716 }, { - "epoch": 0.9284052213393871, + "epoch": 0.9271160985009493, "grad_norm": 0.0, - "learning_rate": 2.67647037356048e-07, - "loss": 0.8448, + "learning_rate": 2.773194248289235e-07, + "loss": 0.739, "step": 32717 }, { - "epoch": 0.9284335981838819, + "epoch": 0.9271444359432117, "grad_norm": 0.0, - "learning_rate": 2.67435864286385e-07, - "loss": 0.767, + "learning_rate": 2.7710482309453056e-07, + "loss": 0.7957, "step": 32718 }, { - "epoch": 0.9284619750283768, + "epoch": 0.9271727733854742, "grad_norm": 0.0, - "learning_rate": 2.6722477342813083e-07, - "loss": 0.8321, + "learning_rate": 2.7689030325969477e-07, + "loss": 0.7487, "step": 32719 }, { - "epoch": 0.9284903518728718, + "epoch": 0.9272011108277367, "grad_norm": 0.0, - "learning_rate": 2.6701376478306396e-07, - "loss": 0.7788, + "learning_rate": 2.766758653262225e-07, + "loss": 0.7935, "step": 32720 }, { - "epoch": 0.9285187287173666, + "epoch": 0.9272294482699992, "grad_norm": 0.0, - "learning_rate": 2.668028383529719e-07, - "loss": 0.7422, + "learning_rate": 2.7646150929591996e-07, + "loss": 0.7907, "step": 32721 }, { - "epoch": 0.9285471055618615, + "epoch": 0.9272577857122616, "grad_norm": 0.0, - "learning_rate": 2.665919941396311e-07, - "loss": 0.8143, + "learning_rate": 2.7624723517059247e-07, + "loss": 0.7238, "step": 32722 }, { - "epoch": 0.9285754824063565, + "epoch": 0.9272861231545241, "grad_norm": 0.0, - "learning_rate": 2.663812321448267e-07, - "loss": 0.7961, + "learning_rate": 2.760330429520453e-07, + "loss": 0.8697, "step": 32723 }, { - "epoch": 0.9286038592508513, + "epoch": 0.9273144605967866, "grad_norm": 0.0, - "learning_rate": 2.661705523703373e-07, - "loss": 0.7525, + "learning_rate": 2.7581893264208346e-07, + "loss": 0.7913, "step": 32724 }, { - "epoch": 0.9286322360953462, + "epoch": 0.927342798039049, "grad_norm": 0.0, - "learning_rate": 2.659599548179426e-07, - "loss": 0.7842, + "learning_rate": 2.756049042425091e-07, + "loss": 0.7621, "step": 32725 }, { - "epoch": 0.928660612939841, + "epoch": 0.9273711354813114, "grad_norm": 0.0, - "learning_rate": 2.6574943948942224e-07, - "loss": 0.7983, + "learning_rate": 2.7539095775512724e-07, + "loss": 0.871, "step": 32726 }, { - "epoch": 0.928688989784336, + "epoch": 0.9273994729235739, "grad_norm": 0.0, - "learning_rate": 2.6553900638655373e-07, - "loss": 0.7085, + "learning_rate": 2.751770931817366e-07, + "loss": 0.8645, "step": 32727 }, { - "epoch": 0.9287173666288309, + "epoch": 0.9274278103658363, "grad_norm": 0.0, - "learning_rate": 2.6532865551111456e-07, - "loss": 0.8469, + "learning_rate": 2.7496331052414114e-07, + "loss": 0.8459, "step": 32728 }, { - "epoch": 0.9287457434733257, + "epoch": 0.9274561478080988, "grad_norm": 0.0, - "learning_rate": 2.651183868648821e-07, - "loss": 0.8223, + "learning_rate": 2.7474960978414065e-07, + "loss": 0.7445, "step": 32729 }, { - "epoch": 0.9287741203178207, + "epoch": 0.9274844852503613, "grad_norm": 0.0, - "learning_rate": 2.649082004496328e-07, - "loss": 0.8371, + "learning_rate": 2.745359909635348e-07, + "loss": 0.8252, "step": 32730 }, { - "epoch": 0.9288024971623156, + "epoch": 0.9275128226926238, "grad_norm": 0.0, - "learning_rate": 2.646980962671408e-07, - "loss": 0.7583, + "learning_rate": 2.7432245406412425e-07, + "loss": 0.886, "step": 32731 }, { - "epoch": 0.9288308740068104, + "epoch": 0.9275411601348862, "grad_norm": 0.0, - "learning_rate": 2.6448807431918024e-07, - "loss": 0.8029, + "learning_rate": 2.741089990877088e-07, + "loss": 0.7668, "step": 32732 }, { - "epoch": 0.9288592508513054, + "epoch": 0.9275694975771487, "grad_norm": 0.0, - "learning_rate": 2.642781346075285e-07, - "loss": 0.8274, + "learning_rate": 2.7389562603608366e-07, + "loss": 0.8296, "step": 32733 }, { - "epoch": 0.9288876276958002, + "epoch": 0.9275978350194112, "grad_norm": 0.0, - "learning_rate": 2.6406827713395647e-07, - "loss": 0.8724, + "learning_rate": 2.7368233491104846e-07, + "loss": 0.7046, "step": 32734 }, { - "epoch": 0.9289160045402951, + "epoch": 0.9276261724616736, "grad_norm": 0.0, - "learning_rate": 2.638585019002371e-07, - "loss": 0.7801, + "learning_rate": 2.734691257143973e-07, + "loss": 0.759, "step": 32735 }, { - "epoch": 0.92894438138479, + "epoch": 0.927654509903936, "grad_norm": 0.0, - "learning_rate": 2.636488089081435e-07, - "loss": 0.7196, + "learning_rate": 2.7325599844792774e-07, + "loss": 0.7971, "step": 32736 }, { - "epoch": 0.9289727582292849, + "epoch": 0.9276828473461985, "grad_norm": 0.0, - "learning_rate": 2.634391981594453e-07, - "loss": 0.7579, + "learning_rate": 2.7304295311343596e-07, + "loss": 0.8641, "step": 32737 }, { - "epoch": 0.9290011350737798, + "epoch": 0.927711184788461, "grad_norm": 0.0, - "learning_rate": 2.6322966965591444e-07, - "loss": 0.8387, + "learning_rate": 2.728299897127151e-07, + "loss": 0.7482, "step": 32738 }, { - "epoch": 0.9290295119182747, + "epoch": 0.9277395222307234, "grad_norm": 0.0, - "learning_rate": 2.6302022339932066e-07, - "loss": 0.8193, + "learning_rate": 2.7261710824755814e-07, + "loss": 0.8465, "step": 32739 }, { - "epoch": 0.9290578887627696, + "epoch": 0.9277678596729859, "grad_norm": 0.0, - "learning_rate": 2.6281085939143134e-07, - "loss": 0.7658, + "learning_rate": 2.724043087197603e-07, + "loss": 0.8417, "step": 32740 }, { - "epoch": 0.9290862656072645, + "epoch": 0.9277961971152484, "grad_norm": 0.0, - "learning_rate": 2.6260157763401627e-07, - "loss": 0.8528, + "learning_rate": 2.721915911311135e-07, + "loss": 0.8149, "step": 32741 }, { - "epoch": 0.9291146424517593, + "epoch": 0.9278245345575108, "grad_norm": 0.0, - "learning_rate": 2.623923781288451e-07, - "loss": 0.8123, + "learning_rate": 2.719789554834085e-07, + "loss": 0.7841, "step": 32742 }, { - "epoch": 0.9291430192962542, + "epoch": 0.9278528719997733, "grad_norm": 0.0, - "learning_rate": 2.62183260877682e-07, - "loss": 0.7532, + "learning_rate": 2.717664017784372e-07, + "loss": 0.8566, "step": 32743 }, { - "epoch": 0.9291713961407492, + "epoch": 0.9278812094420358, "grad_norm": 0.0, - "learning_rate": 2.619742258822955e-07, - "loss": 0.8416, + "learning_rate": 2.715539300179903e-07, + "loss": 0.8148, "step": 32744 }, { - "epoch": 0.929199772985244, + "epoch": 0.9279095468842983, "grad_norm": 0.0, - "learning_rate": 2.617652731444509e-07, - "loss": 0.8211, + "learning_rate": 2.713415402038577e-07, + "loss": 0.8143, "step": 32745 }, { - "epoch": 0.9292281498297389, + "epoch": 0.9279378843265607, "grad_norm": 0.0, - "learning_rate": 2.615564026659112e-07, - "loss": 0.7533, + "learning_rate": 2.7112923233782674e-07, + "loss": 0.7953, "step": 32746 }, { - "epoch": 0.9292565266742339, + "epoch": 0.9279662217688232, "grad_norm": 0.0, - "learning_rate": 2.613476144484428e-07, - "loss": 0.7102, + "learning_rate": 2.709170064216882e-07, + "loss": 0.8217, "step": 32747 }, { - "epoch": 0.9292849035187287, + "epoch": 0.9279945592110856, "grad_norm": 0.0, - "learning_rate": 2.6113890849380875e-07, - "loss": 0.8981, + "learning_rate": 2.707048624572284e-07, + "loss": 0.7184, "step": 32748 }, { - "epoch": 0.9293132803632236, + "epoch": 0.928022896653348, "grad_norm": 0.0, - "learning_rate": 2.6093028480377203e-07, - "loss": 0.7546, + "learning_rate": 2.704928004462337e-07, + "loss": 0.716, "step": 32749 }, { - "epoch": 0.9293416572077186, + "epoch": 0.9280512340956105, "grad_norm": 0.0, - "learning_rate": 2.607217433800968e-07, - "loss": 0.7648, + "learning_rate": 2.7028082039049274e-07, + "loss": 0.8709, "step": 32750 }, { - "epoch": 0.9293700340522134, + "epoch": 0.928079571537873, "grad_norm": 0.0, - "learning_rate": 2.605132842245406e-07, - "loss": 0.828, + "learning_rate": 2.700689222917874e-07, + "loss": 0.7889, "step": 32751 }, { - "epoch": 0.9293984108967083, + "epoch": 0.9281079089801354, "grad_norm": 0.0, - "learning_rate": 2.603049073388675e-07, - "loss": 0.8145, + "learning_rate": 2.698571061519051e-07, + "loss": 0.809, "step": 32752 }, { - "epoch": 0.9294267877412031, + "epoch": 0.9281362464223979, "grad_norm": 0.0, - "learning_rate": 2.600966127248372e-07, - "loss": 0.7846, + "learning_rate": 2.6964537197263107e-07, + "loss": 0.7829, "step": 32753 }, { - "epoch": 0.9294551645856981, + "epoch": 0.9281645838646604, "grad_norm": 0.0, - "learning_rate": 2.598884003842084e-07, - "loss": 0.8365, + "learning_rate": 2.694337197557462e-07, + "loss": 0.8219, "step": 32754 }, { - "epoch": 0.929483541430193, + "epoch": 0.9281929213069229, "grad_norm": 0.0, - "learning_rate": 2.5968027031873954e-07, - "loss": 0.7545, + "learning_rate": 2.6922214950303337e-07, + "loss": 0.834, "step": 32755 }, { - "epoch": 0.9295119182746878, + "epoch": 0.9282212587491853, "grad_norm": 0.0, - "learning_rate": 2.594722225301893e-07, - "loss": 0.8113, + "learning_rate": 2.6901066121627685e-07, + "loss": 0.7932, "step": 32756 }, { - "epoch": 0.9295402951191828, + "epoch": 0.9282495961914478, "grad_norm": 0.0, - "learning_rate": 2.5926425702031523e-07, - "loss": 0.8329, + "learning_rate": 2.687992548972573e-07, + "loss": 0.8362, "step": 32757 }, { - "epoch": 0.9295686719636777, + "epoch": 0.9282779336337103, "grad_norm": 0.0, - "learning_rate": 2.5905637379087357e-07, - "loss": 0.7587, + "learning_rate": 2.6858793054775567e-07, + "loss": 0.7769, "step": 32758 }, { - "epoch": 0.9295970488081725, + "epoch": 0.9283062710759726, "grad_norm": 0.0, - "learning_rate": 2.5884857284362187e-07, - "loss": 0.845, + "learning_rate": 2.683766881695504e-07, + "loss": 0.7598, "step": 32759 }, { - "epoch": 0.9296254256526674, + "epoch": 0.9283346085182351, "grad_norm": 0.0, - "learning_rate": 2.5864085418031316e-07, - "loss": 0.8687, + "learning_rate": 2.681655277644224e-07, + "loss": 0.7938, "step": 32760 }, { - "epoch": 0.9296538024971623, + "epoch": 0.9283629459604976, "grad_norm": 0.0, - "learning_rate": 2.5843321780270267e-07, - "loss": 0.7881, + "learning_rate": 2.679544493341513e-07, + "loss": 0.8975, "step": 32761 }, { - "epoch": 0.9296821793416572, + "epoch": 0.9283912834027601, "grad_norm": 0.0, - "learning_rate": 2.5822566371254576e-07, - "loss": 0.8138, + "learning_rate": 2.677434528805123e-07, + "loss": 0.8658, "step": 32762 }, { - "epoch": 0.9297105561861521, + "epoch": 0.9284196208450225, "grad_norm": 0.0, - "learning_rate": 2.5801819191159314e-07, - "loss": 0.7734, + "learning_rate": 2.6753253840528516e-07, + "loss": 0.7999, "step": 32763 }, { - "epoch": 0.929738933030647, + "epoch": 0.928447958287285, "grad_norm": 0.0, - "learning_rate": 2.578108024016002e-07, - "loss": 0.7853, + "learning_rate": 2.67321705910244e-07, + "loss": 0.8607, "step": 32764 }, { - "epoch": 0.9297673098751419, + "epoch": 0.9284762957295475, "grad_norm": 0.0, - "learning_rate": 2.576034951843165e-07, - "loss": 0.7613, + "learning_rate": 2.671109553971674e-07, + "loss": 0.7438, "step": 32765 }, { - "epoch": 0.9297956867196367, + "epoch": 0.9285046331718099, "grad_norm": 0.0, - "learning_rate": 2.5739627026149404e-07, - "loss": 0.7095, + "learning_rate": 2.669002868678294e-07, + "loss": 0.7637, "step": 32766 }, { - "epoch": 0.9298240635641317, + "epoch": 0.9285329706140724, "grad_norm": 0.0, - "learning_rate": 2.571891276348848e-07, - "loss": 0.7887, + "learning_rate": 2.6668970032400433e-07, + "loss": 0.7662, "step": 32767 }, { - "epoch": 0.9298524404086266, + "epoch": 0.9285613080563349, "grad_norm": 0.0, - "learning_rate": 2.5698206730623507e-07, - "loss": 0.799, + "learning_rate": 2.6647919576746615e-07, + "loss": 0.7723, "step": 32768 }, { - "epoch": 0.9298808172531214, + "epoch": 0.9285896454985973, "grad_norm": 0.0, - "learning_rate": 2.567750892772958e-07, - "loss": 0.8683, + "learning_rate": 2.66268773199988e-07, + "loss": 0.778, "step": 32769 }, { - "epoch": 0.9299091940976163, + "epoch": 0.9286179829408597, "grad_norm": 0.0, - "learning_rate": 2.5656819354981765e-07, - "loss": 0.7726, + "learning_rate": 2.6605843262334284e-07, + "loss": 0.79, "step": 32770 }, { - "epoch": 0.9299375709421113, + "epoch": 0.9286463203831222, "grad_norm": 0.0, - "learning_rate": 2.563613801255438e-07, - "loss": 0.784, + "learning_rate": 2.6584817403930265e-07, + "loss": 0.7953, "step": 32771 }, { - "epoch": 0.9299659477866061, + "epoch": 0.9286746578253847, "grad_norm": 0.0, - "learning_rate": 2.561546490062239e-07, - "loss": 0.7489, + "learning_rate": 2.6563799744963704e-07, + "loss": 0.8076, "step": 32772 }, { - "epoch": 0.929994324631101, + "epoch": 0.9287029952676471, "grad_norm": 0.0, - "learning_rate": 2.559480001936043e-07, - "loss": 0.8132, + "learning_rate": 2.65427902856118e-07, + "loss": 0.8226, "step": 32773 }, { - "epoch": 0.930022701475596, + "epoch": 0.9287313327099096, "grad_norm": 0.0, - "learning_rate": 2.5574143368942817e-07, - "loss": 0.6251, + "learning_rate": 2.6521789026051516e-07, + "loss": 0.7759, "step": 32774 }, { - "epoch": 0.9300510783200908, + "epoch": 0.9287596701521721, "grad_norm": 0.0, - "learning_rate": 2.55534949495444e-07, - "loss": 0.8465, + "learning_rate": 2.6500795966459494e-07, + "loss": 0.8663, "step": 32775 }, { - "epoch": 0.9300794551645857, + "epoch": 0.9287880075944345, "grad_norm": 0.0, - "learning_rate": 2.5532854761339375e-07, - "loss": 0.7425, + "learning_rate": 2.647981110701292e-07, + "loss": 0.7789, "step": 32776 }, { - "epoch": 0.9301078320090805, + "epoch": 0.928816345036697, "grad_norm": 0.0, - "learning_rate": 2.551222280450205e-07, - "loss": 0.7996, + "learning_rate": 2.6458834447888436e-07, + "loss": 0.7709, "step": 32777 }, { - "epoch": 0.9301362088535755, + "epoch": 0.9288446824789595, "grad_norm": 0.0, - "learning_rate": 2.549159907920684e-07, - "loss": 0.7643, + "learning_rate": 2.6437865989262566e-07, + "loss": 0.7612, "step": 32778 }, { - "epoch": 0.9301645856980704, + "epoch": 0.928873019921222, "grad_norm": 0.0, - "learning_rate": 2.547098358562794e-07, - "loss": 0.7924, + "learning_rate": 2.641690573131228e-07, + "loss": 0.8471, "step": 32779 }, { - "epoch": 0.9301929625425652, + "epoch": 0.9289013573634843, "grad_norm": 0.0, - "learning_rate": 2.5450376323939316e-07, - "loss": 0.765, + "learning_rate": 2.639595367421377e-07, + "loss": 0.9312, "step": 32780 }, { - "epoch": 0.9302213393870602, + "epoch": 0.9289296948057468, "grad_norm": 0.0, - "learning_rate": 2.5429777294315394e-07, - "loss": 0.8625, + "learning_rate": 2.6375009818143673e-07, + "loss": 0.8346, "step": 32781 }, { - "epoch": 0.9302497162315551, + "epoch": 0.9289580322480093, "grad_norm": 0.0, - "learning_rate": 2.5409186496929803e-07, - "loss": 0.8162, + "learning_rate": 2.63540741632784e-07, + "loss": 0.7728, "step": 32782 }, { - "epoch": 0.9302780930760499, + "epoch": 0.9289863696902717, "grad_norm": 0.0, - "learning_rate": 2.5388603931956635e-07, - "loss": 0.7542, + "learning_rate": 2.633314670979437e-07, + "loss": 0.7974, "step": 32783 }, { - "epoch": 0.9303064699205449, + "epoch": 0.9290147071325342, "grad_norm": 0.0, - "learning_rate": 2.5368029599569744e-07, - "loss": 0.7861, + "learning_rate": 2.631222745786788e-07, + "loss": 0.856, "step": 32784 }, { - "epoch": 0.9303348467650397, + "epoch": 0.9290430445747967, "grad_norm": 0.0, - "learning_rate": 2.5347463499942993e-07, - "loss": 0.7923, + "learning_rate": 2.62913164076749e-07, + "loss": 0.7868, "step": 32785 }, { - "epoch": 0.9303632236095346, + "epoch": 0.9290713820170592, "grad_norm": 0.0, - "learning_rate": 2.532690563324991e-07, - "loss": 0.7498, + "learning_rate": 2.627041355939186e-07, + "loss": 0.9766, "step": 32786 }, { - "epoch": 0.9303916004540295, + "epoch": 0.9290997194593216, "grad_norm": 0.0, - "learning_rate": 2.5306355999664354e-07, - "loss": 0.7903, + "learning_rate": 2.6249518913194713e-07, + "loss": 0.8009, "step": 32787 }, { - "epoch": 0.9304199772985244, + "epoch": 0.9291280569015841, "grad_norm": 0.0, - "learning_rate": 2.528581459935986e-07, - "loss": 0.7908, + "learning_rate": 2.622863246925944e-07, + "loss": 0.7612, "step": 32788 }, { - "epoch": 0.9304483541430193, + "epoch": 0.9291563943438466, "grad_norm": 0.0, - "learning_rate": 2.5265281432509836e-07, - "loss": 0.8081, + "learning_rate": 2.6207754227761897e-07, + "loss": 0.831, "step": 32789 }, { - "epoch": 0.9304767309875142, + "epoch": 0.9291847317861089, "grad_norm": 0.0, - "learning_rate": 2.5244756499287817e-07, - "loss": 0.7969, + "learning_rate": 2.618688418887827e-07, + "loss": 0.8546, "step": 32790 }, { - "epoch": 0.9305051078320091, + "epoch": 0.9292130692283714, "grad_norm": 0.0, - "learning_rate": 2.52242397998671e-07, - "loss": 0.7662, + "learning_rate": 2.616602235278398e-07, + "loss": 0.8199, "step": 32791 }, { - "epoch": 0.930533484676504, + "epoch": 0.9292414066706339, "grad_norm": 0.0, - "learning_rate": 2.5203731334421113e-07, - "loss": 0.7666, + "learning_rate": 2.6145168719655e-07, + "loss": 0.8309, "step": 32792 }, { - "epoch": 0.9305618615209988, + "epoch": 0.9292697441128964, "grad_norm": 0.0, - "learning_rate": 2.518323110312293e-07, - "loss": 0.8359, + "learning_rate": 2.6124323289666744e-07, + "loss": 0.8466, "step": 32793 }, { - "epoch": 0.9305902383654937, + "epoch": 0.9292980815551588, "grad_norm": 0.0, - "learning_rate": 2.516273910614597e-07, - "loss": 0.8249, + "learning_rate": 2.6103486062995063e-07, + "loss": 0.8221, "step": 32794 }, { - "epoch": 0.9306186152099887, + "epoch": 0.9293264189974213, "grad_norm": 0.0, - "learning_rate": 2.5142255343663104e-07, - "loss": 0.7976, + "learning_rate": 2.6082657039815275e-07, + "loss": 0.8245, "step": 32795 }, { - "epoch": 0.9306469920544835, + "epoch": 0.9293547564396838, "grad_norm": 0.0, - "learning_rate": 2.5121779815847404e-07, - "loss": 0.8257, + "learning_rate": 2.6061836220303004e-07, + "loss": 0.728, "step": 32796 }, { - "epoch": 0.9306753688989784, + "epoch": 0.9293830938819462, "grad_norm": 0.0, - "learning_rate": 2.5101312522871846e-07, - "loss": 0.845, + "learning_rate": 2.6041023604633455e-07, + "loss": 0.8397, "step": 32797 }, { - "epoch": 0.9307037457434734, + "epoch": 0.9294114313242087, "grad_norm": 0.0, - "learning_rate": 2.5080853464909515e-07, - "loss": 0.7966, + "learning_rate": 2.6020219192982144e-07, + "loss": 0.7803, "step": 32798 }, { - "epoch": 0.9307321225879682, + "epoch": 0.9294397687664712, "grad_norm": 0.0, - "learning_rate": 2.506040264213283e-07, - "loss": 0.8103, + "learning_rate": 2.5999422985524157e-07, + "loss": 0.8384, "step": 32799 }, { - "epoch": 0.9307604994324631, + "epoch": 0.9294681062087335, "grad_norm": 0.0, - "learning_rate": 2.503996005471476e-07, - "loss": 0.8318, + "learning_rate": 2.5978634982434804e-07, + "loss": 0.8317, "step": 32800 }, { - "epoch": 0.930788876276958, + "epoch": 0.929496443650996, "grad_norm": 0.0, - "learning_rate": 2.5019525702828063e-07, - "loss": 0.8274, + "learning_rate": 2.5957855183889046e-07, + "loss": 0.8204, "step": 32801 }, { - "epoch": 0.9308172531214529, + "epoch": 0.9295247810932585, "grad_norm": 0.0, - "learning_rate": 2.499909958664526e-07, - "loss": 0.8039, + "learning_rate": 2.5937083590061973e-07, + "loss": 0.9321, "step": 32802 }, { - "epoch": 0.9308456299659478, + "epoch": 0.929553118535521, "grad_norm": 0.0, - "learning_rate": 2.497868170633877e-07, - "loss": 0.83, + "learning_rate": 2.591632020112855e-07, + "loss": 0.8477, "step": 32803 }, { - "epoch": 0.9308740068104426, + "epoch": 0.9295814559777834, "grad_norm": 0.0, - "learning_rate": 2.4958272062081343e-07, - "loss": 0.8535, + "learning_rate": 2.5895565017263647e-07, + "loss": 0.7968, "step": 32804 }, { - "epoch": 0.9309023836549376, + "epoch": 0.9296097934200459, "grad_norm": 0.0, - "learning_rate": 2.493787065404518e-07, - "loss": 0.8544, + "learning_rate": 2.587481803864211e-07, + "loss": 0.7267, "step": 32805 }, { - "epoch": 0.9309307604994325, + "epoch": 0.9296381308623084, "grad_norm": 0.0, - "learning_rate": 2.4917477482402585e-07, - "loss": 0.6626, + "learning_rate": 2.585407926543881e-07, + "loss": 0.721, "step": 32806 }, { - "epoch": 0.9309591373439273, + "epoch": 0.9296664683045708, "grad_norm": 0.0, - "learning_rate": 2.4897092547325976e-07, - "loss": 0.7699, + "learning_rate": 2.5833348697828277e-07, + "loss": 0.8839, "step": 32807 }, { - "epoch": 0.9309875141884223, + "epoch": 0.9296948057468333, "grad_norm": 0.0, - "learning_rate": 2.4876715848987323e-07, - "loss": 0.7717, + "learning_rate": 2.5812626335985135e-07, + "loss": 0.7078, "step": 32808 }, { - "epoch": 0.9310158910329172, + "epoch": 0.9297231431890958, "grad_norm": 0.0, - "learning_rate": 2.4856347387559045e-07, - "loss": 0.7983, + "learning_rate": 2.5791912180084033e-07, + "loss": 0.7568, "step": 32809 }, { - "epoch": 0.931044267877412, + "epoch": 0.9297514806313583, "grad_norm": 0.0, - "learning_rate": 2.483598716321289e-07, - "loss": 0.8089, + "learning_rate": 2.5771206230299497e-07, + "loss": 0.8388, "step": 32810 }, { - "epoch": 0.9310726447219069, + "epoch": 0.9297798180736206, "grad_norm": 0.0, - "learning_rate": 2.481563517612107e-07, - "loss": 0.88, + "learning_rate": 2.5750508486805825e-07, + "loss": 0.7776, "step": 32811 }, { - "epoch": 0.9311010215664018, + "epoch": 0.9298081555158831, "grad_norm": 0.0, - "learning_rate": 2.4795291426455425e-07, - "loss": 0.8278, + "learning_rate": 2.572981894977744e-07, + "loss": 0.812, "step": 32812 }, { - "epoch": 0.9311293984108967, + "epoch": 0.9298364929581456, "grad_norm": 0.0, - "learning_rate": 2.477495591438783e-07, - "loss": 0.8344, + "learning_rate": 2.5709137619388536e-07, + "loss": 0.7831, "step": 32813 }, { - "epoch": 0.9311577752553916, + "epoch": 0.929864830400408, "grad_norm": 0.0, - "learning_rate": 2.475462864008993e-07, - "loss": 0.7968, + "learning_rate": 2.5688464495813304e-07, + "loss": 0.8008, "step": 32814 }, { - "epoch": 0.9311861520998865, + "epoch": 0.9298931678426705, "grad_norm": 0.0, - "learning_rate": 2.4734309603733573e-07, - "loss": 0.8125, + "learning_rate": 2.566779957922594e-07, + "loss": 0.8407, "step": 32815 }, { - "epoch": 0.9312145289443814, + "epoch": 0.929921505284933, "grad_norm": 0.0, - "learning_rate": 2.47139988054903e-07, - "loss": 0.7776, + "learning_rate": 2.5647142869800635e-07, + "loss": 0.9191, "step": 32816 }, { - "epoch": 0.9312429057888763, + "epoch": 0.9299498427271955, "grad_norm": 0.0, - "learning_rate": 2.469369624553175e-07, - "loss": 0.797, + "learning_rate": 2.562649436771114e-07, + "loss": 0.9326, "step": 32817 }, { - "epoch": 0.9312712826333712, + "epoch": 0.9299781801694579, "grad_norm": 0.0, - "learning_rate": 2.467340192402945e-07, - "loss": 0.8005, + "learning_rate": 2.560585407313154e-07, + "loss": 0.8326, "step": 32818 }, { - "epoch": 0.9312996594778661, + "epoch": 0.9300065176117204, "grad_norm": 0.0, - "learning_rate": 2.4653115841154704e-07, - "loss": 0.7609, + "learning_rate": 2.55852219862357e-07, + "loss": 0.8763, "step": 32819 }, { - "epoch": 0.9313280363223609, + "epoch": 0.9300348550539829, "grad_norm": 0.0, - "learning_rate": 2.463283799707894e-07, - "loss": 0.7265, + "learning_rate": 2.556459810719736e-07, + "loss": 0.7368, "step": 32820 }, { - "epoch": 0.9313564131668558, + "epoch": 0.9300631924962453, "grad_norm": 0.0, - "learning_rate": 2.461256839197357e-07, - "loss": 0.7724, + "learning_rate": 2.5543982436190273e-07, + "loss": 0.7691, "step": 32821 }, { - "epoch": 0.9313847900113508, + "epoch": 0.9300915299385077, "grad_norm": 0.0, - "learning_rate": 2.4592307026009453e-07, - "loss": 0.7995, + "learning_rate": 2.552337497338797e-07, + "loss": 0.7349, "step": 32822 }, { - "epoch": 0.9314131668558456, + "epoch": 0.9301198673807702, "grad_norm": 0.0, - "learning_rate": 2.457205389935802e-07, - "loss": 0.7766, + "learning_rate": 2.55027757189642e-07, + "loss": 0.8969, "step": 32823 }, { - "epoch": 0.9314415437003405, + "epoch": 0.9301482048230326, "grad_norm": 0.0, - "learning_rate": 2.4551809012190344e-07, - "loss": 0.8686, + "learning_rate": 2.5482184673092493e-07, + "loss": 0.7555, "step": 32824 }, { - "epoch": 0.9314699205448355, + "epoch": 0.9301765422652951, "grad_norm": 0.0, - "learning_rate": 2.4531572364677406e-07, - "loss": 0.9134, + "learning_rate": 2.546160183594615e-07, + "loss": 0.8374, "step": 32825 }, { - "epoch": 0.9314982973893303, + "epoch": 0.9302048797075576, "grad_norm": 0.0, - "learning_rate": 2.4511343956990064e-07, - "loss": 0.8089, + "learning_rate": 2.5441027207698587e-07, + "loss": 0.6585, "step": 32826 }, { - "epoch": 0.9315266742338252, + "epoch": 0.9302332171498201, "grad_norm": 0.0, - "learning_rate": 2.449112378929941e-07, - "loss": 0.8116, + "learning_rate": 2.5420460788523336e-07, + "loss": 0.7269, "step": 32827 }, { - "epoch": 0.93155505107832, + "epoch": 0.9302615545920825, "grad_norm": 0.0, - "learning_rate": 2.447091186177586e-07, - "loss": 0.7918, + "learning_rate": 2.5399902578593263e-07, + "loss": 0.7131, "step": 32828 }, { - "epoch": 0.931583427922815, + "epoch": 0.930289892034345, "grad_norm": 0.0, - "learning_rate": 2.445070817459061e-07, - "loss": 0.7867, + "learning_rate": 2.537935257808177e-07, + "loss": 0.8235, "step": 32829 }, { - "epoch": 0.9316118047673099, + "epoch": 0.9303182294766075, "grad_norm": 0.0, - "learning_rate": 2.443051272791386e-07, - "loss": 0.8113, + "learning_rate": 2.5358810787161956e-07, + "loss": 0.8718, "step": 32830 }, { - "epoch": 0.9316401816118047, + "epoch": 0.9303465669188699, "grad_norm": 0.0, - "learning_rate": 2.441032552191658e-07, - "loss": 0.8231, + "learning_rate": 2.5338277206006677e-07, + "loss": 0.7032, "step": 32831 }, { - "epoch": 0.9316685584562997, + "epoch": 0.9303749043611323, "grad_norm": 0.0, - "learning_rate": 2.4390146556769077e-07, - "loss": 0.8988, + "learning_rate": 2.531775183478913e-07, + "loss": 0.7673, "step": 32832 }, { - "epoch": 0.9316969353007946, + "epoch": 0.9304032418033948, "grad_norm": 0.0, - "learning_rate": 2.436997583264189e-07, - "loss": 0.9318, + "learning_rate": 2.529723467368206e-07, + "loss": 0.8009, "step": 32833 }, { - "epoch": 0.9317253121452894, + "epoch": 0.9304315792456573, "grad_norm": 0.0, - "learning_rate": 2.434981334970532e-07, - "loss": 0.9136, + "learning_rate": 2.527672572285833e-07, + "loss": 0.8526, "step": 32834 }, { - "epoch": 0.9317536889897844, + "epoch": 0.9304599166879197, "grad_norm": 0.0, - "learning_rate": 2.432965910812979e-07, - "loss": 0.8094, + "learning_rate": 2.5256224982490584e-07, + "loss": 0.8743, "step": 32835 }, { - "epoch": 0.9317820658342792, + "epoch": 0.9304882541301822, "grad_norm": 0.0, - "learning_rate": 2.430951310808538e-07, - "loss": 0.9137, + "learning_rate": 2.5235732452751793e-07, + "loss": 0.7195, "step": 32836 }, { - "epoch": 0.9318104426787741, + "epoch": 0.9305165915724447, "grad_norm": 0.0, - "learning_rate": 2.4289375349742516e-07, - "loss": 0.662, + "learning_rate": 2.5215248133814375e-07, + "loss": 0.7939, "step": 32837 }, { - "epoch": 0.931838819523269, + "epoch": 0.9305449290147071, "grad_norm": 0.0, - "learning_rate": 2.426924583327117e-07, - "loss": 0.7786, + "learning_rate": 2.5194772025850854e-07, + "loss": 0.8385, "step": 32838 }, { - "epoch": 0.9318671963677639, + "epoch": 0.9305732664569696, "grad_norm": 0.0, - "learning_rate": 2.42491245588411e-07, - "loss": 0.8613, + "learning_rate": 2.5174304129033655e-07, + "loss": 0.7768, "step": 32839 }, { - "epoch": 0.9318955732122588, + "epoch": 0.9306016038992321, "grad_norm": 0.0, - "learning_rate": 2.4229011526622714e-07, - "loss": 0.8016, + "learning_rate": 2.5153844443535525e-07, + "loss": 0.8597, "step": 32840 }, { - "epoch": 0.9319239500567537, + "epoch": 0.9306299413414946, "grad_norm": 0.0, - "learning_rate": 2.4208906736785886e-07, - "loss": 0.9113, + "learning_rate": 2.5133392969528326e-07, + "loss": 0.8691, "step": 32841 }, { - "epoch": 0.9319523269012486, + "epoch": 0.930658278783757, "grad_norm": 0.0, - "learning_rate": 2.418881018950003e-07, - "loss": 0.7595, + "learning_rate": 2.5112949707184695e-07, + "loss": 0.8695, "step": 32842 }, { - "epoch": 0.9319807037457435, + "epoch": 0.9306866162260194, "grad_norm": 0.0, - "learning_rate": 2.416872188493535e-07, - "loss": 0.7697, + "learning_rate": 2.5092514656676727e-07, + "loss": 0.8601, "step": 32843 }, { - "epoch": 0.9320090805902383, + "epoch": 0.9307149536682819, "grad_norm": 0.0, - "learning_rate": 2.4148641823261267e-07, - "loss": 0.8975, + "learning_rate": 2.507208781817638e-07, + "loss": 0.8802, "step": 32844 }, { - "epoch": 0.9320374574347332, + "epoch": 0.9307432911105443, "grad_norm": 0.0, - "learning_rate": 2.4128570004647525e-07, - "loss": 0.7712, + "learning_rate": 2.5051669191856087e-07, + "loss": 0.7202, "step": 32845 }, { - "epoch": 0.9320658342792282, + "epoch": 0.9307716285528068, "grad_norm": 0.0, - "learning_rate": 2.4108506429263547e-07, - "loss": 0.7998, + "learning_rate": 2.503125877788748e-07, + "loss": 0.6507, "step": 32846 }, { - "epoch": 0.932094211123723, + "epoch": 0.9307999659950693, "grad_norm": 0.0, - "learning_rate": 2.4088451097278973e-07, - "loss": 0.8127, + "learning_rate": 2.501085657644264e-07, + "loss": 0.7129, "step": 32847 }, { - "epoch": 0.9321225879682179, + "epoch": 0.9308283034373317, "grad_norm": 0.0, - "learning_rate": 2.406840400886301e-07, - "loss": 0.7947, + "learning_rate": 2.499046258769333e-07, + "loss": 0.8707, "step": 32848 }, { - "epoch": 0.9321509648127129, + "epoch": 0.9308566408795942, "grad_norm": 0.0, - "learning_rate": 2.404836516418518e-07, - "loss": 0.8082, + "learning_rate": 2.4970076811811514e-07, + "loss": 0.8152, "step": 32849 }, { - "epoch": 0.9321793416572077, + "epoch": 0.9308849783218567, "grad_norm": 0.0, - "learning_rate": 2.4028334563414693e-07, - "loss": 0.8332, + "learning_rate": 2.494969924896884e-07, + "loss": 0.8394, "step": 32850 }, { - "epoch": 0.9322077185017026, + "epoch": 0.9309133157641192, "grad_norm": 0.0, - "learning_rate": 2.400831220672062e-07, - "loss": 0.7776, + "learning_rate": 2.492932989933683e-07, + "loss": 0.8277, "step": 32851 }, { - "epoch": 0.9322360953461976, + "epoch": 0.9309416532063816, "grad_norm": 0.0, - "learning_rate": 2.398829809427228e-07, - "loss": 0.7759, + "learning_rate": 2.4908968763087235e-07, + "loss": 0.7749, "step": 32852 }, { - "epoch": 0.9322644721906924, + "epoch": 0.930969990648644, "grad_norm": 0.0, - "learning_rate": 2.3968292226238756e-07, - "loss": 0.8201, + "learning_rate": 2.4888615840391485e-07, + "loss": 0.7453, "step": 32853 }, { - "epoch": 0.9322928490351873, + "epoch": 0.9309983280909065, "grad_norm": 0.0, - "learning_rate": 2.394829460278891e-07, - "loss": 0.8616, + "learning_rate": 2.4868271131420985e-07, + "loss": 0.7999, "step": 32854 }, { - "epoch": 0.9323212258796821, + "epoch": 0.9310266655331689, "grad_norm": 0.0, - "learning_rate": 2.392830522409162e-07, - "loss": 0.7324, + "learning_rate": 2.484793463634716e-07, + "loss": 0.7687, "step": 32855 }, { - "epoch": 0.9323496027241771, + "epoch": 0.9310550029754314, "grad_norm": 0.0, - "learning_rate": 2.390832409031574e-07, - "loss": 0.7543, + "learning_rate": 2.4827606355341317e-07, + "loss": 0.8004, "step": 32856 }, { - "epoch": 0.932377979568672, + "epoch": 0.9310833404176939, "grad_norm": 0.0, - "learning_rate": 2.3888351201630243e-07, - "loss": 0.8575, + "learning_rate": 2.480728628857465e-07, + "loss": 0.7175, "step": 32857 }, { - "epoch": 0.9324063564131668, + "epoch": 0.9311116778599564, "grad_norm": 0.0, - "learning_rate": 2.386838655820378e-07, - "loss": 0.8153, + "learning_rate": 2.478697443621836e-07, + "loss": 0.8549, "step": 32858 }, { - "epoch": 0.9324347332576618, + "epoch": 0.9311400153022188, "grad_norm": 0.0, - "learning_rate": 2.384843016020488e-07, - "loss": 0.7533, + "learning_rate": 2.4766670798443414e-07, + "loss": 0.7595, "step": 32859 }, { - "epoch": 0.9324631101021567, + "epoch": 0.9311683527444813, "grad_norm": 0.0, - "learning_rate": 2.382848200780208e-07, - "loss": 0.8083, + "learning_rate": 2.474637537542102e-07, + "loss": 0.7718, "step": 32860 }, { - "epoch": 0.9324914869466515, + "epoch": 0.9311966901867438, "grad_norm": 0.0, - "learning_rate": 2.3808542101164122e-07, - "loss": 0.8404, + "learning_rate": 2.472608816732203e-07, + "loss": 0.7314, "step": 32861 }, { - "epoch": 0.9325198637911464, + "epoch": 0.9312250276290062, "grad_norm": 0.0, - "learning_rate": 2.3788610440459214e-07, - "loss": 0.8128, + "learning_rate": 2.470580917431742e-07, + "loss": 0.8219, "step": 32862 }, { - "epoch": 0.9325482406356413, + "epoch": 0.9312533650712687, "grad_norm": 0.0, - "learning_rate": 2.3768687025855774e-07, - "loss": 0.7236, + "learning_rate": 2.4685538396577835e-07, + "loss": 0.8554, "step": 32863 }, { - "epoch": 0.9325766174801362, + "epoch": 0.9312817025135312, "grad_norm": 0.0, - "learning_rate": 2.3748771857522224e-07, - "loss": 0.6872, + "learning_rate": 2.466527583427425e-07, + "loss": 0.722, "step": 32864 }, { - "epoch": 0.9326049943246311, + "epoch": 0.9313100399557936, "grad_norm": 0.0, - "learning_rate": 2.372886493562654e-07, - "loss": 0.7561, + "learning_rate": 2.464502148757719e-07, + "loss": 0.8451, "step": 32865 }, { - "epoch": 0.932633371169126, + "epoch": 0.931338377398056, "grad_norm": 0.0, - "learning_rate": 2.370896626033714e-07, - "loss": 0.9048, + "learning_rate": 2.4624775356657417e-07, + "loss": 0.7882, "step": 32866 }, { - "epoch": 0.9326617480136209, + "epoch": 0.9313667148403185, "grad_norm": 0.0, - "learning_rate": 2.3689075831822006e-07, - "loss": 0.8029, + "learning_rate": 2.460453744168523e-07, + "loss": 0.7978, "step": 32867 }, { - "epoch": 0.9326901248581158, + "epoch": 0.931395052282581, "grad_norm": 0.0, - "learning_rate": 2.3669193650249e-07, - "loss": 0.8129, + "learning_rate": 2.458430774283116e-07, + "loss": 0.875, "step": 32868 }, { - "epoch": 0.9327185017026107, + "epoch": 0.9314233897248434, "grad_norm": 0.0, - "learning_rate": 2.364931971578621e-07, - "loss": 0.7742, + "learning_rate": 2.456408626026585e-07, + "loss": 0.8122, "step": 32869 }, { - "epoch": 0.9327468785471056, + "epoch": 0.9314517271671059, "grad_norm": 0.0, - "learning_rate": 2.3629454028601617e-07, - "loss": 0.8302, + "learning_rate": 2.454387299415928e-07, + "loss": 0.7335, "step": 32870 }, { - "epoch": 0.9327752553916004, + "epoch": 0.9314800646093684, "grad_norm": 0.0, - "learning_rate": 2.3609596588862748e-07, - "loss": 0.8653, + "learning_rate": 2.4523667944682085e-07, + "loss": 0.8599, "step": 32871 }, { - "epoch": 0.9328036322360953, + "epoch": 0.9315084020516308, "grad_norm": 0.0, - "learning_rate": 2.3589747396737804e-07, - "loss": 0.8038, + "learning_rate": 2.450347111200413e-07, + "loss": 0.6976, "step": 32872 }, { - "epoch": 0.9328320090805903, + "epoch": 0.9315367394938933, "grad_norm": 0.0, - "learning_rate": 2.3569906452393876e-07, - "loss": 0.779, + "learning_rate": 2.448328249629572e-07, + "loss": 0.6672, "step": 32873 }, { - "epoch": 0.9328603859250851, + "epoch": 0.9315650769361558, "grad_norm": 0.0, - "learning_rate": 2.3550073755998937e-07, - "loss": 0.7747, + "learning_rate": 2.4463102097726843e-07, + "loss": 0.9163, "step": 32874 }, { - "epoch": 0.93288876276958, + "epoch": 0.9315934143784182, "grad_norm": 0.0, - "learning_rate": 2.3530249307720521e-07, - "loss": 0.8001, + "learning_rate": 2.444292991646746e-07, + "loss": 0.8144, "step": 32875 }, { - "epoch": 0.932917139614075, + "epoch": 0.9316217518206806, "grad_norm": 0.0, - "learning_rate": 2.3510433107725827e-07, - "loss": 0.8715, + "learning_rate": 2.4422765952687666e-07, + "loss": 0.802, "step": 32876 }, { - "epoch": 0.9329455164585698, + "epoch": 0.9316500892629431, "grad_norm": 0.0, - "learning_rate": 2.34906251561825e-07, - "loss": 0.6895, + "learning_rate": 2.440261020655721e-07, + "loss": 0.8751, "step": 32877 }, { - "epoch": 0.9329738933030647, + "epoch": 0.9316784267052056, "grad_norm": 0.0, - "learning_rate": 2.347082545325774e-07, - "loss": 0.7736, + "learning_rate": 2.4382462678245735e-07, + "loss": 0.8382, "step": 32878 }, { - "epoch": 0.9330022701475595, + "epoch": 0.931706764147468, "grad_norm": 0.0, - "learning_rate": 2.3451033999118854e-07, - "loss": 0.83, + "learning_rate": 2.4362323367923216e-07, + "loss": 0.8336, "step": 32879 }, { - "epoch": 0.9330306469920545, + "epoch": 0.9317351015897305, "grad_norm": 0.0, - "learning_rate": 2.3431250793932825e-07, - "loss": 0.8935, + "learning_rate": 2.434219227575896e-07, + "loss": 0.7511, "step": 32880 }, { - "epoch": 0.9330590238365494, + "epoch": 0.931763439031993, "grad_norm": 0.0, - "learning_rate": 2.3411475837867182e-07, - "loss": 0.8535, + "learning_rate": 2.4322069401922723e-07, + "loss": 0.7217, "step": 32881 }, { - "epoch": 0.9330874006810442, + "epoch": 0.9317917764742555, "grad_norm": 0.0, - "learning_rate": 2.3391709131088457e-07, - "loss": 0.7633, + "learning_rate": 2.4301954746584145e-07, + "loss": 0.7755, "step": 32882 }, { - "epoch": 0.9331157775255392, + "epoch": 0.9318201139165179, "grad_norm": 0.0, - "learning_rate": 2.3371950673763966e-07, - "loss": 0.7347, + "learning_rate": 2.4281848309912425e-07, + "loss": 0.8456, "step": 32883 }, { - "epoch": 0.9331441543700341, + "epoch": 0.9318484513587804, "grad_norm": 0.0, - "learning_rate": 2.3352200466060571e-07, - "loss": 0.7022, + "learning_rate": 2.4261750092077095e-07, + "loss": 0.8094, "step": 32884 }, { - "epoch": 0.9331725312145289, + "epoch": 0.9318767888010429, "grad_norm": 0.0, - "learning_rate": 2.3332458508144916e-07, - "loss": 0.7872, + "learning_rate": 2.4241660093247356e-07, + "loss": 0.7241, "step": 32885 }, { - "epoch": 0.9332009080590238, + "epoch": 0.9319051262433052, "grad_norm": 0.0, - "learning_rate": 2.3312724800183983e-07, - "loss": 0.8155, + "learning_rate": 2.4221578313592397e-07, + "loss": 0.8401, "step": 32886 }, { - "epoch": 0.9332292849035188, + "epoch": 0.9319334636855677, "grad_norm": 0.0, - "learning_rate": 2.3292999342344303e-07, - "loss": 0.81, + "learning_rate": 2.4201504753281424e-07, + "loss": 0.8499, "step": 32887 }, { - "epoch": 0.9332576617480136, + "epoch": 0.9319618011278302, "grad_norm": 0.0, - "learning_rate": 2.327328213479252e-07, - "loss": 0.7779, + "learning_rate": 2.418143941248352e-07, + "loss": 0.8513, "step": 32888 }, { - "epoch": 0.9332860385925085, + "epoch": 0.9319901385700927, "grad_norm": 0.0, - "learning_rate": 2.3253573177695388e-07, - "loss": 0.717, + "learning_rate": 2.4161382291367776e-07, + "loss": 0.8185, "step": 32889 }, { - "epoch": 0.9333144154370034, + "epoch": 0.9320184760123551, "grad_norm": 0.0, - "learning_rate": 2.323387247121911e-07, - "loss": 0.7579, + "learning_rate": 2.414133339010305e-07, + "loss": 0.8604, "step": 32890 }, { - "epoch": 0.9333427922814983, + "epoch": 0.9320468134546176, "grad_norm": 0.0, - "learning_rate": 2.321418001553022e-07, - "loss": 0.8109, + "learning_rate": 2.4121292708858324e-07, + "loss": 0.7944, "step": 32891 }, { - "epoch": 0.9333711691259932, + "epoch": 0.9320751508968801, "grad_norm": 0.0, - "learning_rate": 2.3194495810795137e-07, - "loss": 0.8576, + "learning_rate": 2.410126024780224e-07, + "loss": 0.712, "step": 32892 }, { - "epoch": 0.9333995459704881, + "epoch": 0.9321034883391425, "grad_norm": 0.0, - "learning_rate": 2.3174819857180065e-07, - "loss": 0.8279, + "learning_rate": 2.408123600710366e-07, + "loss": 0.6886, "step": 32893 }, { - "epoch": 0.933427922814983, + "epoch": 0.932131825781405, "grad_norm": 0.0, - "learning_rate": 2.315515215485109e-07, - "loss": 0.9006, + "learning_rate": 2.4061219986931226e-07, + "loss": 0.7926, "step": 32894 }, { - "epoch": 0.9334562996594779, + "epoch": 0.9321601632236675, "grad_norm": 0.0, - "learning_rate": 2.3135492703974638e-07, - "loss": 0.8073, + "learning_rate": 2.404121218745359e-07, + "loss": 0.7021, "step": 32895 }, { - "epoch": 0.9334846765039727, + "epoch": 0.9321885006659298, "grad_norm": 0.0, - "learning_rate": 2.3115841504716463e-07, - "loss": 0.7323, + "learning_rate": 2.4021212608839163e-07, + "loss": 0.7697, "step": 32896 }, { - "epoch": 0.9335130533484677, + "epoch": 0.9322168381081923, "grad_norm": 0.0, - "learning_rate": 2.3096198557242656e-07, - "loss": 0.9456, + "learning_rate": 2.400122125125648e-07, + "loss": 0.8724, "step": 32897 }, { - "epoch": 0.9335414301929625, + "epoch": 0.9322451755504548, "grad_norm": 0.0, - "learning_rate": 2.3076563861719305e-07, - "loss": 0.7631, + "learning_rate": 2.398123811487407e-07, + "loss": 0.8373, "step": 32898 }, { - "epoch": 0.9335698070374574, + "epoch": 0.9322735129927173, "grad_norm": 0.0, - "learning_rate": 2.3056937418311943e-07, - "loss": 0.8277, + "learning_rate": 2.396126319985992e-07, + "loss": 0.7727, "step": 32899 }, { - "epoch": 0.9335981838819524, + "epoch": 0.9323018504349797, "grad_norm": 0.0, - "learning_rate": 2.303731922718666e-07, - "loss": 0.7634, + "learning_rate": 2.394129650638266e-07, + "loss": 0.7722, "step": 32900 }, { - "epoch": 0.9336265607264472, + "epoch": 0.9323301878772422, "grad_norm": 0.0, - "learning_rate": 2.3017709288508993e-07, - "loss": 0.6845, + "learning_rate": 2.3921338034610165e-07, + "loss": 0.7549, "step": 32901 }, { - "epoch": 0.9336549375709421, + "epoch": 0.9323585253195047, "grad_norm": 0.0, - "learning_rate": 2.2998107602444697e-07, - "loss": 0.7477, + "learning_rate": 2.390138778471074e-07, + "loss": 0.8655, "step": 32902 }, { - "epoch": 0.933683314415437, + "epoch": 0.9323868627617671, "grad_norm": 0.0, - "learning_rate": 2.2978514169159417e-07, - "loss": 0.832, + "learning_rate": 2.3881445756852473e-07, + "loss": 0.8001, "step": 32903 }, { - "epoch": 0.9337116912599319, + "epoch": 0.9324152002040296, "grad_norm": 0.0, - "learning_rate": 2.2958928988818463e-07, - "loss": 0.7504, + "learning_rate": 2.386151195120323e-07, + "loss": 0.8221, "step": 32904 }, { - "epoch": 0.9337400681044268, + "epoch": 0.9324435376462921, "grad_norm": 0.0, - "learning_rate": 2.293935206158726e-07, - "loss": 0.7767, + "learning_rate": 2.384158636793088e-07, + "loss": 0.7349, "step": 32905 }, { - "epoch": 0.9337684449489216, + "epoch": 0.9324718750885546, "grad_norm": 0.0, - "learning_rate": 2.2919783387631455e-07, - "loss": 0.824, + "learning_rate": 2.3821669007203508e-07, + "loss": 0.7885, "step": 32906 }, { - "epoch": 0.9337968217934166, + "epoch": 0.9325002125308169, "grad_norm": 0.0, - "learning_rate": 2.2900222967116025e-07, - "loss": 0.7785, + "learning_rate": 2.3801759869188534e-07, + "loss": 0.8607, "step": 32907 }, { - "epoch": 0.9338251986379115, + "epoch": 0.9325285499730794, "grad_norm": 0.0, - "learning_rate": 2.2880670800206395e-07, - "loss": 0.8334, + "learning_rate": 2.378185895405405e-07, + "loss": 0.7627, "step": 32908 }, { - "epoch": 0.9338535754824063, + "epoch": 0.9325568874153419, "grad_norm": 0.0, - "learning_rate": 2.2861126887067652e-07, - "loss": 0.8309, + "learning_rate": 2.3761966261967252e-07, + "loss": 0.8801, "step": 32909 }, { - "epoch": 0.9338819523269013, + "epoch": 0.9325852248576043, "grad_norm": 0.0, - "learning_rate": 2.284159122786489e-07, - "loss": 0.8611, + "learning_rate": 2.3742081793096006e-07, + "loss": 0.8448, "step": 32910 }, { - "epoch": 0.9339103291713962, + "epoch": 0.9326135622998668, "grad_norm": 0.0, - "learning_rate": 2.2822063822763084e-07, - "loss": 0.7672, + "learning_rate": 2.3722205547607846e-07, + "loss": 0.8843, "step": 32911 }, { - "epoch": 0.933938706015891, + "epoch": 0.9326418997421293, "grad_norm": 0.0, - "learning_rate": 2.280254467192744e-07, - "loss": 0.7898, + "learning_rate": 2.370233752566986e-07, + "loss": 0.7244, "step": 32912 }, { - "epoch": 0.9339670828603859, + "epoch": 0.9326702371843917, "grad_norm": 0.0, - "learning_rate": 2.278303377552238e-07, - "loss": 0.8103, + "learning_rate": 2.3682477727449692e-07, + "loss": 0.8589, "step": 32913 }, { - "epoch": 0.9339954597048808, + "epoch": 0.9326985746266542, "grad_norm": 0.0, - "learning_rate": 2.2763531133712992e-07, - "loss": 0.8458, + "learning_rate": 2.3662626153114655e-07, + "loss": 0.733, "step": 32914 }, { - "epoch": 0.9340238365493757, + "epoch": 0.9327269120689167, "grad_norm": 0.0, - "learning_rate": 2.2744036746664145e-07, - "loss": 0.9305, + "learning_rate": 2.364278280283172e-07, + "loss": 0.8522, "step": 32915 }, { - "epoch": 0.9340522133938706, + "epoch": 0.9327552495111792, "grad_norm": 0.0, - "learning_rate": 2.2724550614540152e-07, - "loss": 0.9314, + "learning_rate": 2.3622947676768427e-07, + "loss": 0.8009, "step": 32916 }, { - "epoch": 0.9340805902383655, + "epoch": 0.9327835869534415, "grad_norm": 0.0, - "learning_rate": 2.2705072737505994e-07, - "loss": 0.7972, + "learning_rate": 2.3603120775091415e-07, + "loss": 0.8363, "step": 32917 }, { - "epoch": 0.9341089670828604, + "epoch": 0.932811924395704, "grad_norm": 0.0, - "learning_rate": 2.2685603115725874e-07, - "loss": 0.8188, + "learning_rate": 2.3583302097967887e-07, + "loss": 0.749, "step": 32918 }, { - "epoch": 0.9341373439273553, + "epoch": 0.9328402618379665, "grad_norm": 0.0, - "learning_rate": 2.2666141749364434e-07, - "loss": 0.8601, + "learning_rate": 2.356349164556493e-07, + "loss": 0.8142, "step": 32919 }, { - "epoch": 0.9341657207718501, + "epoch": 0.9328685992802289, "grad_norm": 0.0, - "learning_rate": 2.26466886385861e-07, - "loss": 0.886, + "learning_rate": 2.3543689418049187e-07, + "loss": 0.8169, "step": 32920 }, { - "epoch": 0.9341940976163451, + "epoch": 0.9328969367224914, "grad_norm": 0.0, - "learning_rate": 2.2627243783555075e-07, - "loss": 0.7706, + "learning_rate": 2.3523895415587637e-07, + "loss": 0.6965, "step": 32921 }, { - "epoch": 0.93422247446084, + "epoch": 0.9329252741647539, "grad_norm": 0.0, - "learning_rate": 2.260780718443567e-07, - "loss": 0.9071, + "learning_rate": 2.3504109638346817e-07, + "loss": 0.7983, "step": 32922 }, { - "epoch": 0.9342508513053348, + "epoch": 0.9329536116070164, "grad_norm": 0.0, - "learning_rate": 2.258837884139209e-07, - "loss": 0.7914, + "learning_rate": 2.3484332086493478e-07, + "loss": 0.8933, "step": 32923 }, { - "epoch": 0.9342792281498298, + "epoch": 0.9329819490492788, "grad_norm": 0.0, - "learning_rate": 2.256895875458831e-07, - "loss": 0.908, + "learning_rate": 2.3464562760194266e-07, + "loss": 0.8387, "step": 32924 }, { - "epoch": 0.9343076049943246, + "epoch": 0.9330102864915413, "grad_norm": 0.0, - "learning_rate": 2.2549546924188537e-07, - "loss": 0.74, + "learning_rate": 2.3444801659615602e-07, + "loss": 0.7654, "step": 32925 }, { - "epoch": 0.9343359818388195, + "epoch": 0.9330386239338038, "grad_norm": 0.0, - "learning_rate": 2.253014335035675e-07, - "loss": 0.86, + "learning_rate": 2.342504878492413e-07, + "loss": 0.6836, "step": 32926 }, { - "epoch": 0.9343643586833145, + "epoch": 0.9330669613760662, "grad_norm": 0.0, - "learning_rate": 2.2510748033256703e-07, - "loss": 0.8615, + "learning_rate": 2.3405304136286055e-07, + "loss": 0.6686, "step": 32927 }, { - "epoch": 0.9343927355278093, + "epoch": 0.9330952988183286, "grad_norm": 0.0, - "learning_rate": 2.2491360973052267e-07, - "loss": 0.8676, + "learning_rate": 2.3385567713867797e-07, + "loss": 0.8935, "step": 32928 }, { - "epoch": 0.9344211123723042, + "epoch": 0.9331236362605911, "grad_norm": 0.0, - "learning_rate": 2.2471982169907424e-07, - "loss": 0.8155, + "learning_rate": 2.3365839517835554e-07, + "loss": 0.7024, "step": 32929 }, { - "epoch": 0.934449489216799, + "epoch": 0.9331519737028536, "grad_norm": 0.0, - "learning_rate": 2.2452611623985488e-07, - "loss": 0.8083, + "learning_rate": 2.3346119548355416e-07, + "loss": 0.7678, "step": 32930 }, { - "epoch": 0.934477866061294, + "epoch": 0.933180311145116, "grad_norm": 0.0, - "learning_rate": 2.2433249335450324e-07, - "loss": 0.688, + "learning_rate": 2.3326407805593698e-07, + "loss": 0.9169, "step": 32931 }, { - "epoch": 0.9345062429057889, + "epoch": 0.9332086485873785, "grad_norm": 0.0, - "learning_rate": 2.241389530446547e-07, - "loss": 0.6982, + "learning_rate": 2.3306704289716263e-07, + "loss": 0.8935, "step": 32932 }, { - "epoch": 0.9345346197502837, + "epoch": 0.933236986029641, "grad_norm": 0.0, - "learning_rate": 2.2394549531194353e-07, - "loss": 0.6939, + "learning_rate": 2.3287009000889205e-07, + "loss": 0.7702, "step": 32933 }, { - "epoch": 0.9345629965947787, + "epoch": 0.9332653234719034, "grad_norm": 0.0, - "learning_rate": 2.2375212015800507e-07, - "loss": 0.7689, + "learning_rate": 2.3267321939278277e-07, + "loss": 0.8944, "step": 32934 }, { - "epoch": 0.9345913734392736, + "epoch": 0.9332936609141659, "grad_norm": 0.0, - "learning_rate": 2.2355882758447135e-07, - "loss": 0.83, + "learning_rate": 2.3247643105049454e-07, + "loss": 0.7904, "step": 32935 }, { - "epoch": 0.9346197502837684, + "epoch": 0.9333219983564284, "grad_norm": 0.0, - "learning_rate": 2.233656175929766e-07, - "loss": 0.7853, + "learning_rate": 2.322797249836839e-07, + "loss": 0.7982, "step": 32936 }, { - "epoch": 0.9346481271282633, + "epoch": 0.9333503357986908, "grad_norm": 0.0, - "learning_rate": 2.2317249018515175e-07, - "loss": 0.888, + "learning_rate": 2.3208310119400834e-07, + "loss": 0.7349, "step": 32937 }, { - "epoch": 0.9346765039727583, + "epoch": 0.9333786732409532, "grad_norm": 0.0, - "learning_rate": 2.2297944536262772e-07, - "loss": 0.7869, + "learning_rate": 2.3188655968312435e-07, + "loss": 0.9124, "step": 32938 }, { - "epoch": 0.9347048808172531, + "epoch": 0.9334070106832157, "grad_norm": 0.0, - "learning_rate": 2.2278648312703543e-07, - "loss": 0.7652, + "learning_rate": 2.3169010045268725e-07, + "loss": 0.782, "step": 32939 }, { - "epoch": 0.934733257661748, + "epoch": 0.9334353481254782, "grad_norm": 0.0, - "learning_rate": 2.225936034800069e-07, - "loss": 0.7867, + "learning_rate": 2.314937235043524e-07, + "loss": 0.7449, "step": 32940 }, { - "epoch": 0.9347616345062429, + "epoch": 0.9334636855677406, "grad_norm": 0.0, - "learning_rate": 2.2240080642316751e-07, - "loss": 0.7074, + "learning_rate": 2.312974288397718e-07, + "loss": 0.8444, "step": 32941 }, { - "epoch": 0.9347900113507378, + "epoch": 0.9334920230100031, "grad_norm": 0.0, - "learning_rate": 2.222080919581493e-07, - "loss": 0.7815, + "learning_rate": 2.311012164606008e-07, + "loss": 0.7039, "step": 32942 }, { - "epoch": 0.9348183881952327, + "epoch": 0.9335203604522656, "grad_norm": 0.0, - "learning_rate": 2.220154600865798e-07, - "loss": 0.753, + "learning_rate": 2.3090508636849362e-07, + "loss": 0.8975, "step": 32943 }, { - "epoch": 0.9348467650397276, + "epoch": 0.933548697894528, "grad_norm": 0.0, - "learning_rate": 2.2182291081008334e-07, - "loss": 0.8795, + "learning_rate": 2.3070903856509897e-07, + "loss": 0.7983, "step": 32944 }, { - "epoch": 0.9348751418842225, + "epoch": 0.9335770353367905, "grad_norm": 0.0, - "learning_rate": 2.2163044413028855e-07, - "loss": 0.906, + "learning_rate": 2.3051307305207105e-07, + "loss": 0.6039, "step": 32945 }, { - "epoch": 0.9349035187287174, + "epoch": 0.933605372779053, "grad_norm": 0.0, - "learning_rate": 2.2143806004882307e-07, - "loss": 0.7189, + "learning_rate": 2.3031718983105744e-07, + "loss": 0.9116, "step": 32946 }, { - "epoch": 0.9349318955732122, + "epoch": 0.9336337102213155, "grad_norm": 0.0, - "learning_rate": 2.2124575856730669e-07, - "loss": 0.8028, + "learning_rate": 2.3012138890371126e-07, + "loss": 0.666, "step": 32947 }, { - "epoch": 0.9349602724177072, + "epoch": 0.9336620476635779, "grad_norm": 0.0, - "learning_rate": 2.210535396873681e-07, - "loss": 0.8303, + "learning_rate": 2.2992567027168122e-07, + "loss": 0.7862, "step": 32948 }, { - "epoch": 0.934988649262202, + "epoch": 0.9336903851058403, "grad_norm": 0.0, - "learning_rate": 2.2086140341063155e-07, - "loss": 0.7769, + "learning_rate": 2.2973003393661374e-07, + "loss": 0.7515, "step": 32949 }, { - "epoch": 0.9350170261066969, + "epoch": 0.9337187225481028, "grad_norm": 0.0, - "learning_rate": 2.2066934973871689e-07, - "loss": 0.8917, + "learning_rate": 2.2953447990015865e-07, + "loss": 0.8479, "step": 32950 }, { - "epoch": 0.9350454029511919, + "epoch": 0.9337470599903652, "grad_norm": 0.0, - "learning_rate": 2.2047737867324835e-07, - "loss": 0.7973, + "learning_rate": 2.2933900816396238e-07, + "loss": 0.8685, "step": 32951 }, { - "epoch": 0.9350737797956867, + "epoch": 0.9337753974326277, "grad_norm": 0.0, - "learning_rate": 2.2028549021584689e-07, - "loss": 0.7758, + "learning_rate": 2.291436187296725e-07, + "loss": 0.7534, "step": 32952 }, { - "epoch": 0.9351021566401816, + "epoch": 0.9338037348748902, "grad_norm": 0.0, - "learning_rate": 2.200936843681334e-07, - "loss": 0.8167, + "learning_rate": 2.289483115989355e-07, + "loss": 0.7293, "step": 32953 }, { - "epoch": 0.9351305334846765, + "epoch": 0.9338320723171527, "grad_norm": 0.0, - "learning_rate": 2.1990196113172768e-07, - "loss": 0.8814, + "learning_rate": 2.2875308677339336e-07, + "loss": 0.8457, "step": 32954 }, { - "epoch": 0.9351589103291714, + "epoch": 0.9338604097594151, "grad_norm": 0.0, - "learning_rate": 2.1971032050825182e-07, - "loss": 0.8581, + "learning_rate": 2.2855794425469368e-07, + "loss": 0.8363, "step": 32955 }, { - "epoch": 0.9351872871736663, + "epoch": 0.9338887472016776, "grad_norm": 0.0, - "learning_rate": 2.1951876249932003e-07, - "loss": 0.7684, + "learning_rate": 2.283628840444785e-07, + "loss": 0.911, "step": 32956 }, { - "epoch": 0.9352156640181611, + "epoch": 0.9339170846439401, "grad_norm": 0.0, - "learning_rate": 2.193272871065544e-07, - "loss": 0.8145, + "learning_rate": 2.2816790614439089e-07, + "loss": 0.8907, "step": 32957 }, { - "epoch": 0.9352440408626561, + "epoch": 0.9339454220862025, "grad_norm": 0.0, - "learning_rate": 2.1913589433157022e-07, - "loss": 0.819, + "learning_rate": 2.2797301055607513e-07, + "loss": 0.8984, "step": 32958 }, { - "epoch": 0.935272417707151, + "epoch": 0.933973759528465, "grad_norm": 0.0, - "learning_rate": 2.1894458417598518e-07, - "loss": 0.7711, + "learning_rate": 2.277781972811699e-07, + "loss": 0.8229, "step": 32959 }, { - "epoch": 0.9353007945516458, + "epoch": 0.9340020969707274, "grad_norm": 0.0, - "learning_rate": 2.187533566414146e-07, - "loss": 0.8523, + "learning_rate": 2.2758346632131833e-07, + "loss": 0.7534, "step": 32960 }, { - "epoch": 0.9353291713961408, + "epoch": 0.9340304344129898, "grad_norm": 0.0, - "learning_rate": 2.18562211729475e-07, - "loss": 0.7935, + "learning_rate": 2.2738881767816134e-07, + "loss": 0.8428, "step": 32961 }, { - "epoch": 0.9353575482406357, + "epoch": 0.9340587718552523, "grad_norm": 0.0, - "learning_rate": 2.183711494417784e-07, - "loss": 0.7912, + "learning_rate": 2.271942513533354e-07, + "loss": 0.8208, "step": 32962 }, { - "epoch": 0.9353859250851305, + "epoch": 0.9340871092975148, "grad_norm": 0.0, - "learning_rate": 2.1818016977994128e-07, - "loss": 0.6633, + "learning_rate": 2.2699976734848138e-07, + "loss": 0.7882, "step": 32963 }, { - "epoch": 0.9354143019296254, + "epoch": 0.9341154467397773, "grad_norm": 0.0, - "learning_rate": 2.1798927274557567e-07, - "loss": 0.7448, + "learning_rate": 2.2680536566523802e-07, + "loss": 0.7546, "step": 32964 }, { - "epoch": 0.9354426787741204, + "epoch": 0.9341437841820397, "grad_norm": 0.0, - "learning_rate": 2.177984583402948e-07, - "loss": 1.0289, + "learning_rate": 2.2661104630524177e-07, + "loss": 0.8787, "step": 32965 }, { - "epoch": 0.9354710556186152, + "epoch": 0.9341721216243022, "grad_norm": 0.0, - "learning_rate": 2.176077265657106e-07, - "loss": 0.8952, + "learning_rate": 2.2641680927013133e-07, + "loss": 0.7113, "step": 32966 }, { - "epoch": 0.9354994324631101, + "epoch": 0.9342004590665647, "grad_norm": 0.0, - "learning_rate": 2.17417077423433e-07, - "loss": 0.7203, + "learning_rate": 2.262226545615398e-07, + "loss": 0.8062, "step": 32967 }, { - "epoch": 0.935527809307605, + "epoch": 0.9342287965088271, "grad_norm": 0.0, - "learning_rate": 2.1722651091507284e-07, - "loss": 0.8557, + "learning_rate": 2.260285821811048e-07, + "loss": 0.8605, "step": 32968 }, { - "epoch": 0.9355561861520999, + "epoch": 0.9342571339510896, "grad_norm": 0.0, - "learning_rate": 2.1703602704224114e-07, - "loss": 0.8502, + "learning_rate": 2.2583459213046167e-07, + "loss": 0.9068, "step": 32969 }, { - "epoch": 0.9355845629965948, + "epoch": 0.934285471393352, "grad_norm": 0.0, - "learning_rate": 2.1684562580654546e-07, - "loss": 0.7321, + "learning_rate": 2.2564068441124243e-07, + "loss": 0.8721, "step": 32970 }, { - "epoch": 0.9356129398410896, + "epoch": 0.9343138088356145, "grad_norm": 0.0, - "learning_rate": 2.166553072095945e-07, - "loss": 0.7748, + "learning_rate": 2.2544685902508135e-07, + "loss": 0.7456, "step": 32971 }, { - "epoch": 0.9356413166855846, + "epoch": 0.9343421462778769, "grad_norm": 0.0, - "learning_rate": 2.1646507125299587e-07, - "loss": 0.9369, + "learning_rate": 2.2525311597361154e-07, + "loss": 0.7994, "step": 32972 }, { - "epoch": 0.9356696935300794, + "epoch": 0.9343704837201394, "grad_norm": 0.0, - "learning_rate": 2.162749179383572e-07, - "loss": 0.7663, + "learning_rate": 2.2505945525846285e-07, + "loss": 0.7502, "step": 32973 }, { - "epoch": 0.9356980703745743, + "epoch": 0.9343988211624019, "grad_norm": 0.0, - "learning_rate": 2.1608484726728275e-07, - "loss": 0.8414, + "learning_rate": 2.248658768812706e-07, + "loss": 0.7388, "step": 32974 }, { - "epoch": 0.9357264472190693, + "epoch": 0.9344271586046643, "grad_norm": 0.0, - "learning_rate": 2.158948592413812e-07, - "loss": 0.743, + "learning_rate": 2.2467238084366127e-07, + "loss": 0.8099, "step": 32975 }, { - "epoch": 0.9357548240635641, + "epoch": 0.9344554960469268, "grad_norm": 0.0, - "learning_rate": 2.1570495386225355e-07, - "loss": 0.8098, + "learning_rate": 2.2447896714726692e-07, + "loss": 0.7854, "step": 32976 }, { - "epoch": 0.935783200908059, + "epoch": 0.9344838334891893, "grad_norm": 0.0, - "learning_rate": 2.1551513113150735e-07, - "loss": 0.7488, + "learning_rate": 2.2428563579371508e-07, + "loss": 0.7308, "step": 32977 }, { - "epoch": 0.935811577752554, + "epoch": 0.9345121709314518, "grad_norm": 0.0, - "learning_rate": 2.1532539105074357e-07, - "loss": 0.6977, + "learning_rate": 2.2409238678463673e-07, + "loss": 0.8525, "step": 32978 }, { - "epoch": 0.9358399545970488, + "epoch": 0.9345405083737142, "grad_norm": 0.0, - "learning_rate": 2.1513573362156647e-07, - "loss": 0.8712, + "learning_rate": 2.2389922012165944e-07, + "loss": 0.8264, "step": 32979 }, { - "epoch": 0.9358683314415437, + "epoch": 0.9345688458159767, "grad_norm": 0.0, - "learning_rate": 2.1494615884557924e-07, - "loss": 0.754, + "learning_rate": 2.2370613580640744e-07, + "loss": 0.8267, "step": 32980 }, { - "epoch": 0.9358967082860385, + "epoch": 0.9345971832582392, "grad_norm": 0.0, - "learning_rate": 2.1475666672437945e-07, - "loss": 0.7248, + "learning_rate": 2.2351313384050942e-07, + "loss": 0.7663, "step": 32981 }, { - "epoch": 0.9359250851305335, + "epoch": 0.9346255207005015, "grad_norm": 0.0, - "learning_rate": 2.145672572595714e-07, - "loss": 0.7845, + "learning_rate": 2.2332021422559193e-07, + "loss": 0.8014, "step": 32982 }, { - "epoch": 0.9359534619750284, + "epoch": 0.934653858142764, "grad_norm": 0.0, - "learning_rate": 2.143779304527538e-07, - "loss": 0.8407, + "learning_rate": 2.2312737696327691e-07, + "loss": 0.7495, "step": 32983 }, { - "epoch": 0.9359818388195232, + "epoch": 0.9346821955850265, "grad_norm": 0.0, - "learning_rate": 2.1418868630552424e-07, - "loss": 0.7633, + "learning_rate": 2.229346220551909e-07, + "loss": 0.8413, "step": 32984 }, { - "epoch": 0.9360102156640182, + "epoch": 0.9347105330272889, "grad_norm": 0.0, - "learning_rate": 2.139995248194837e-07, - "loss": 0.7669, + "learning_rate": 2.2274194950295813e-07, + "loss": 0.6988, "step": 32985 }, { - "epoch": 0.9360385925085131, + "epoch": 0.9347388704695514, "grad_norm": 0.0, - "learning_rate": 2.138104459962298e-07, - "loss": 0.8552, + "learning_rate": 2.2254935930820066e-07, + "loss": 0.923, "step": 32986 }, { - "epoch": 0.9360669693530079, + "epoch": 0.9347672079118139, "grad_norm": 0.0, - "learning_rate": 2.136214498373579e-07, - "loss": 0.9534, + "learning_rate": 2.223568514725405e-07, + "loss": 0.8071, "step": 32987 }, { - "epoch": 0.9360953461975028, + "epoch": 0.9347955453540764, "grad_norm": 0.0, - "learning_rate": 2.134325363444656e-07, - "loss": 0.8112, + "learning_rate": 2.2216442599759857e-07, + "loss": 0.8482, "step": 32988 }, { - "epoch": 0.9361237230419978, + "epoch": 0.9348238827963388, "grad_norm": 0.0, - "learning_rate": 2.1324370551914942e-07, - "loss": 0.8412, + "learning_rate": 2.2197208288499694e-07, + "loss": 0.7735, "step": 32989 }, { - "epoch": 0.9361520998864926, + "epoch": 0.9348522202386013, "grad_norm": 0.0, - "learning_rate": 2.1305495736300253e-07, - "loss": 0.8062, + "learning_rate": 2.217798221363554e-07, + "loss": 0.722, "step": 32990 }, { - "epoch": 0.9361804767309875, + "epoch": 0.9348805576808638, "grad_norm": 0.0, - "learning_rate": 2.1286629187762142e-07, - "loss": 0.9071, + "learning_rate": 2.2158764375329378e-07, + "loss": 0.8112, "step": 32991 }, { - "epoch": 0.9362088535754824, + "epoch": 0.9349088951231261, "grad_norm": 0.0, - "learning_rate": 2.1267770906459817e-07, - "loss": 0.7938, + "learning_rate": 2.2139554773742967e-07, + "loss": 0.7552, "step": 32992 }, { - "epoch": 0.9362372304199773, + "epoch": 0.9349372325653886, "grad_norm": 0.0, - "learning_rate": 2.1248920892552593e-07, - "loss": 0.8517, + "learning_rate": 2.2120353409038398e-07, + "loss": 0.8011, "step": 32993 }, { - "epoch": 0.9362656072644722, + "epoch": 0.9349655700076511, "grad_norm": 0.0, - "learning_rate": 2.123007914619979e-07, - "loss": 0.7159, + "learning_rate": 2.2101160281377098e-07, + "loss": 0.7936, "step": 32994 }, { - "epoch": 0.9362939841089671, + "epoch": 0.9349939074499136, "grad_norm": 0.0, - "learning_rate": 2.12112456675605e-07, - "loss": 0.8179, + "learning_rate": 2.208197539092094e-07, + "loss": 0.8165, "step": 32995 }, { - "epoch": 0.936322360953462, + "epoch": 0.935022244892176, "grad_norm": 0.0, - "learning_rate": 2.1192420456793706e-07, - "loss": 0.7196, + "learning_rate": 2.206279873783135e-07, + "loss": 0.7591, "step": 32996 }, { - "epoch": 0.9363507377979569, + "epoch": 0.9350505823344385, "grad_norm": 0.0, - "learning_rate": 2.1173603514058728e-07, - "loss": 0.8204, + "learning_rate": 2.2043630322269972e-07, + "loss": 0.8335, "step": 32997 }, { - "epoch": 0.9363791146424517, + "epoch": 0.935078919776701, "grad_norm": 0.0, - "learning_rate": 2.1154794839514215e-07, - "loss": 0.8634, + "learning_rate": 2.2024470144398235e-07, + "loss": 0.6725, "step": 32998 }, { - "epoch": 0.9364074914869467, + "epoch": 0.9351072572189634, "grad_norm": 0.0, - "learning_rate": 2.1135994433319152e-07, - "loss": 0.9074, + "learning_rate": 2.2005318204377569e-07, + "loss": 0.8338, "step": 32999 }, { - "epoch": 0.9364358683314415, + "epoch": 0.9351355946612259, "grad_norm": 0.0, - "learning_rate": 2.111720229563241e-07, - "loss": 0.7512, + "learning_rate": 2.1986174502369285e-07, + "loss": 0.7859, "step": 33000 }, { - "epoch": 0.9364642451759364, + "epoch": 0.9351639321034884, "grad_norm": 0.0, - "learning_rate": 2.1098418426612532e-07, - "loss": 0.74, + "learning_rate": 2.19670390385347e-07, + "loss": 0.7933, "step": 33001 }, { - "epoch": 0.9364926220204314, + "epoch": 0.9351922695457509, "grad_norm": 0.0, - "learning_rate": 2.107964282641839e-07, - "loss": 0.76, + "learning_rate": 2.1947911813034795e-07, + "loss": 0.7726, "step": 33002 }, { - "epoch": 0.9365209988649262, + "epoch": 0.9352206069880132, "grad_norm": 0.0, - "learning_rate": 2.1060875495208633e-07, - "loss": 0.8963, + "learning_rate": 2.1928792826030887e-07, + "loss": 0.8104, "step": 33003 }, { - "epoch": 0.9365493757094211, + "epoch": 0.9352489444302757, "grad_norm": 0.0, - "learning_rate": 2.1042116433141468e-07, - "loss": 0.7406, + "learning_rate": 2.1909682077683848e-07, + "loss": 0.7295, "step": 33004 }, { - "epoch": 0.936577752553916, + "epoch": 0.9352772818725382, "grad_norm": 0.0, - "learning_rate": 2.102336564037566e-07, - "loss": 0.7314, + "learning_rate": 2.189057956815488e-07, + "loss": 0.8853, "step": 33005 }, { - "epoch": 0.9366061293984109, + "epoch": 0.9353056193148006, "grad_norm": 0.0, - "learning_rate": 2.1004623117069524e-07, - "loss": 0.8185, + "learning_rate": 2.1871485297604856e-07, + "loss": 0.779, "step": 33006 }, { - "epoch": 0.9366345062429058, + "epoch": 0.9353339567570631, "grad_norm": 0.0, - "learning_rate": 2.0985888863381264e-07, - "loss": 0.6895, + "learning_rate": 2.1852399266194312e-07, + "loss": 0.7531, "step": 33007 }, { - "epoch": 0.9366628830874006, + "epoch": 0.9353622941993256, "grad_norm": 0.0, - "learning_rate": 2.0967162879469206e-07, - "loss": 0.8352, + "learning_rate": 2.1833321474084456e-07, + "loss": 0.865, "step": 33008 }, { - "epoch": 0.9366912599318956, + "epoch": 0.935390631641588, "grad_norm": 0.0, - "learning_rate": 2.094844516549155e-07, - "loss": 0.7409, + "learning_rate": 2.1814251921435603e-07, + "loss": 0.7489, "step": 33009 }, { - "epoch": 0.9367196367763905, + "epoch": 0.9354189690838505, "grad_norm": 0.0, - "learning_rate": 2.092973572160639e-07, - "loss": 0.9134, + "learning_rate": 2.1795190608408623e-07, + "loss": 0.7337, "step": 33010 }, { - "epoch": 0.9367480136208853, + "epoch": 0.935447306526113, "grad_norm": 0.0, - "learning_rate": 2.091103454797183e-07, - "loss": 0.7525, + "learning_rate": 2.1776137535164054e-07, + "loss": 0.9417, "step": 33011 }, { - "epoch": 0.9367763904653803, + "epoch": 0.9354756439683755, "grad_norm": 0.0, - "learning_rate": 2.089234164474574e-07, - "loss": 0.8247, + "learning_rate": 2.1757092701862213e-07, + "loss": 0.7723, "step": 33012 }, { - "epoch": 0.9368047673098752, + "epoch": 0.9355039814106378, "grad_norm": 0.0, - "learning_rate": 2.087365701208599e-07, - "loss": 0.872, + "learning_rate": 2.173805610866364e-07, + "loss": 0.836, "step": 33013 }, { - "epoch": 0.93683314415437, + "epoch": 0.9355323188529003, "grad_norm": 0.0, - "learning_rate": 2.085498065015057e-07, - "loss": 0.8381, + "learning_rate": 2.1719027755728762e-07, + "loss": 0.7029, "step": 33014 }, { - "epoch": 0.9368615209988649, + "epoch": 0.9355606562951628, "grad_norm": 0.0, - "learning_rate": 2.0836312559097015e-07, - "loss": 0.8513, + "learning_rate": 2.170000764321778e-07, + "loss": 0.7631, "step": 33015 }, { - "epoch": 0.9368898978433599, + "epoch": 0.9355889937374252, "grad_norm": 0.0, - "learning_rate": 2.0817652739083204e-07, - "loss": 0.7871, + "learning_rate": 2.1680995771290904e-07, + "loss": 0.7991, "step": 33016 }, { - "epoch": 0.9369182746878547, + "epoch": 0.9356173311796877, "grad_norm": 0.0, - "learning_rate": 2.0799001190266788e-07, - "loss": 0.9548, + "learning_rate": 2.1661992140108224e-07, + "loss": 0.765, "step": 33017 }, { - "epoch": 0.9369466515323496, + "epoch": 0.9356456686219502, "grad_norm": 0.0, - "learning_rate": 2.0780357912805193e-07, - "loss": 0.821, + "learning_rate": 2.1642996749829948e-07, + "loss": 0.8336, "step": 33018 }, { - "epoch": 0.9369750283768445, + "epoch": 0.9356740060642127, "grad_norm": 0.0, - "learning_rate": 2.076172290685585e-07, - "loss": 0.8216, + "learning_rate": 2.162400960061606e-07, + "loss": 0.823, "step": 33019 }, { - "epoch": 0.9370034052213394, + "epoch": 0.9357023435064751, "grad_norm": 0.0, - "learning_rate": 2.0743096172576416e-07, - "loss": 0.8187, + "learning_rate": 2.1605030692626317e-07, + "loss": 0.7002, "step": 33020 }, { - "epoch": 0.9370317820658343, + "epoch": 0.9357306809487376, "grad_norm": 0.0, - "learning_rate": 2.072447771012387e-07, - "loss": 0.8598, + "learning_rate": 2.1586060026020816e-07, + "loss": 0.7974, "step": 33021 }, { - "epoch": 0.9370601589103291, + "epoch": 0.9357590183910001, "grad_norm": 0.0, - "learning_rate": 2.0705867519655753e-07, - "loss": 0.7502, + "learning_rate": 2.156709760095932e-07, + "loss": 0.7996, "step": 33022 }, { - "epoch": 0.9370885357548241, + "epoch": 0.9357873558332624, "grad_norm": 0.0, - "learning_rate": 2.0687265601329275e-07, - "loss": 0.8402, + "learning_rate": 2.1548143417601364e-07, + "loss": 0.8064, "step": 33023 }, { - "epoch": 0.937116912599319, + "epoch": 0.9358156932755249, "grad_norm": 0.0, - "learning_rate": 2.066867195530131e-07, - "loss": 0.7495, + "learning_rate": 2.1529197476106821e-07, + "loss": 0.746, "step": 33024 }, { - "epoch": 0.9371452894438138, + "epoch": 0.9358440307177874, "grad_norm": 0.0, - "learning_rate": 2.065008658172929e-07, - "loss": 0.8154, + "learning_rate": 2.1510259776635122e-07, + "loss": 0.7213, "step": 33025 }, { - "epoch": 0.9371736662883088, + "epoch": 0.9358723681600499, "grad_norm": 0.0, - "learning_rate": 2.0631509480769862e-07, - "loss": 0.8774, + "learning_rate": 2.1491330319345915e-07, + "loss": 0.8307, "step": 33026 }, { - "epoch": 0.9372020431328036, + "epoch": 0.9359007056023123, "grad_norm": 0.0, - "learning_rate": 2.0612940652580126e-07, - "loss": 0.7939, + "learning_rate": 2.1472409104398629e-07, + "loss": 0.7279, "step": 33027 }, { - "epoch": 0.9372304199772985, + "epoch": 0.9359290430445748, "grad_norm": 0.0, - "learning_rate": 2.0594380097317068e-07, - "loss": 0.7669, + "learning_rate": 2.145349613195258e-07, + "loss": 0.7924, "step": 33028 }, { - "epoch": 0.9372587968217935, + "epoch": 0.9359573804868373, "grad_norm": 0.0, - "learning_rate": 2.0575827815137118e-07, - "loss": 0.8093, + "learning_rate": 2.143459140216697e-07, + "loss": 0.8427, "step": 33029 }, { - "epoch": 0.9372871736662883, + "epoch": 0.9359857179290997, "grad_norm": 0.0, - "learning_rate": 2.055728380619726e-07, - "loss": 0.7556, + "learning_rate": 2.1415694915201346e-07, + "loss": 0.7994, "step": 33030 }, { - "epoch": 0.9373155505107832, + "epoch": 0.9360140553713622, "grad_norm": 0.0, - "learning_rate": 2.0538748070654147e-07, - "loss": 0.7874, + "learning_rate": 2.1396806671214576e-07, + "loss": 0.836, "step": 33031 }, { - "epoch": 0.937343927355278, + "epoch": 0.9360423928136247, "grad_norm": 0.0, - "learning_rate": 2.0520220608664098e-07, - "loss": 0.7442, + "learning_rate": 2.137792667036609e-07, + "loss": 0.7585, "step": 33032 }, { - "epoch": 0.937372304199773, + "epoch": 0.936070730255887, "grad_norm": 0.0, - "learning_rate": 2.0501701420383878e-07, - "loss": 0.8784, + "learning_rate": 2.1359054912814537e-07, + "loss": 0.7535, "step": 33033 }, { - "epoch": 0.9374006810442679, + "epoch": 0.9360990676981495, "grad_norm": 0.0, - "learning_rate": 2.0483190505969808e-07, - "loss": 0.8355, + "learning_rate": 2.1340191398719125e-07, + "loss": 0.8518, "step": 33034 }, { - "epoch": 0.9374290578887627, + "epoch": 0.936127405140412, "grad_norm": 0.0, - "learning_rate": 2.0464687865578204e-07, - "loss": 0.7909, + "learning_rate": 2.1321336128238835e-07, + "loss": 0.7859, "step": 33035 }, { - "epoch": 0.9374574347332577, + "epoch": 0.9361557425826745, "grad_norm": 0.0, - "learning_rate": 2.0446193499365387e-07, - "loss": 0.8761, + "learning_rate": 2.1302489101532208e-07, + "loss": 0.7174, "step": 33036 }, { - "epoch": 0.9374858115777526, + "epoch": 0.9361840800249369, "grad_norm": 0.0, - "learning_rate": 2.0427707407487785e-07, - "loss": 0.7883, + "learning_rate": 2.1283650318758231e-07, + "loss": 0.7539, "step": 33037 }, { - "epoch": 0.9375141884222474, + "epoch": 0.9362124174671994, "grad_norm": 0.0, - "learning_rate": 2.0409229590101164e-07, - "loss": 0.9673, + "learning_rate": 2.1264819780075441e-07, + "loss": 0.9672, "step": 33038 }, { - "epoch": 0.9375425652667423, + "epoch": 0.9362407549094619, "grad_norm": 0.0, - "learning_rate": 2.0390760047361845e-07, - "loss": 0.8687, + "learning_rate": 2.124599748564249e-07, + "loss": 0.7478, "step": 33039 }, { - "epoch": 0.9375709421112373, + "epoch": 0.9362690923517243, "grad_norm": 0.0, - "learning_rate": 2.0372298779425926e-07, - "loss": 0.7986, + "learning_rate": 2.1227183435618026e-07, + "loss": 0.8509, "step": 33040 }, { - "epoch": 0.9375993189557321, + "epoch": 0.9362974297939868, "grad_norm": 0.0, - "learning_rate": 2.0353845786449167e-07, - "loss": 0.8313, + "learning_rate": 2.120837763016048e-07, + "loss": 0.749, "step": 33041 }, { - "epoch": 0.937627695800227, + "epoch": 0.9363257672362493, "grad_norm": 0.0, - "learning_rate": 2.0335401068587447e-07, - "loss": 0.8123, + "learning_rate": 2.118958006942806e-07, + "loss": 0.8846, "step": 33042 }, { - "epoch": 0.937656072644722, + "epoch": 0.9363541046785118, "grad_norm": 0.0, - "learning_rate": 2.0316964625996749e-07, - "loss": 0.8285, + "learning_rate": 2.1170790753579417e-07, + "loss": 0.8392, "step": 33043 }, { - "epoch": 0.9376844494892168, + "epoch": 0.9363824421207741, "grad_norm": 0.0, - "learning_rate": 2.0298536458832617e-07, - "loss": 0.7773, + "learning_rate": 2.1152009682772645e-07, + "loss": 0.9044, "step": 33044 }, { - "epoch": 0.9377128263337117, + "epoch": 0.9364107795630366, "grad_norm": 0.0, - "learning_rate": 2.0280116567250928e-07, - "loss": 0.8264, + "learning_rate": 2.1133236857166062e-07, + "loss": 0.7732, "step": 33045 }, { - "epoch": 0.9377412031782065, + "epoch": 0.9364391170052991, "grad_norm": 0.0, - "learning_rate": 2.0261704951407003e-07, - "loss": 0.7963, + "learning_rate": 2.1114472276917654e-07, + "loss": 0.7915, "step": 33046 }, { - "epoch": 0.9377695800227015, + "epoch": 0.9364674544475615, "grad_norm": 0.0, - "learning_rate": 2.024330161145649e-07, - "loss": 0.8385, + "learning_rate": 2.1095715942185512e-07, + "loss": 0.9001, "step": 33047 }, { - "epoch": 0.9377979568671964, + "epoch": 0.936495791889824, "grad_norm": 0.0, - "learning_rate": 2.0224906547554933e-07, - "loss": 0.8521, + "learning_rate": 2.1076967853127738e-07, + "loss": 0.8908, "step": 33048 }, { - "epoch": 0.9378263337116912, + "epoch": 0.9365241293320865, "grad_norm": 0.0, - "learning_rate": 2.0206519759857545e-07, - "loss": 0.7537, + "learning_rate": 2.1058228009902094e-07, + "loss": 0.7736, "step": 33049 }, { - "epoch": 0.9378547105561862, + "epoch": 0.936552466774349, "grad_norm": 0.0, - "learning_rate": 2.0188141248519754e-07, - "loss": 0.8925, + "learning_rate": 2.103949641266656e-07, + "loss": 0.7822, "step": 33050 }, { - "epoch": 0.937883087400681, + "epoch": 0.9365808042166114, "grad_norm": 0.0, - "learning_rate": 2.0169771013696882e-07, - "loss": 0.8254, + "learning_rate": 2.1020773061578903e-07, + "loss": 0.7573, "step": 33051 }, { - "epoch": 0.9379114642451759, + "epoch": 0.9366091416588739, "grad_norm": 0.0, - "learning_rate": 2.0151409055543912e-07, - "loss": 0.7997, + "learning_rate": 2.1002057956796773e-07, + "loss": 0.6948, "step": 33052 }, { - "epoch": 0.9379398410896709, + "epoch": 0.9366374791011364, "grad_norm": 0.0, - "learning_rate": 2.013305537421606e-07, - "loss": 0.7889, + "learning_rate": 2.0983351098477932e-07, + "loss": 0.7569, "step": 33053 }, { - "epoch": 0.9379682179341657, + "epoch": 0.9366658165433988, "grad_norm": 0.0, - "learning_rate": 2.0114709969868308e-07, - "loss": 0.8618, + "learning_rate": 2.0964652486779814e-07, + "loss": 0.7186, "step": 33054 }, { - "epoch": 0.9379965947786606, + "epoch": 0.9366941539856612, "grad_norm": 0.0, - "learning_rate": 2.0096372842655641e-07, - "loss": 0.7532, + "learning_rate": 2.0945962121859954e-07, + "loss": 0.8061, "step": 33055 }, { - "epoch": 0.9380249716231555, + "epoch": 0.9367224914279237, "grad_norm": 0.0, - "learning_rate": 2.0078043992732943e-07, - "loss": 0.8035, + "learning_rate": 2.0927280003875783e-07, + "loss": 0.8209, "step": 33056 }, { - "epoch": 0.9380533484676504, + "epoch": 0.9367508288701861, "grad_norm": 0.0, - "learning_rate": 2.0059723420255084e-07, - "loss": 0.8732, + "learning_rate": 2.0908606132984732e-07, + "loss": 0.8365, "step": 33057 }, { - "epoch": 0.9380817253121453, + "epoch": 0.9367791663124486, "grad_norm": 0.0, - "learning_rate": 2.004141112537683e-07, - "loss": 0.8365, + "learning_rate": 2.088994050934412e-07, + "loss": 0.8322, "step": 33058 }, { - "epoch": 0.9381101021566401, + "epoch": 0.9368075037547111, "grad_norm": 0.0, - "learning_rate": 2.0023107108252836e-07, - "loss": 0.9015, + "learning_rate": 2.0871283133111153e-07, + "loss": 0.7107, "step": 33059 }, { - "epoch": 0.9381384790011351, + "epoch": 0.9368358411969736, "grad_norm": 0.0, - "learning_rate": 2.0004811369037646e-07, - "loss": 0.8276, + "learning_rate": 2.085263400444282e-07, + "loss": 0.7782, "step": 33060 }, { - "epoch": 0.93816685584563, + "epoch": 0.936864178639236, "grad_norm": 0.0, - "learning_rate": 1.9986523907885913e-07, - "loss": 0.8515, + "learning_rate": 2.0833993123496544e-07, + "loss": 0.8379, "step": 33061 }, { - "epoch": 0.9381952326901248, + "epoch": 0.9368925160814985, "grad_norm": 0.0, - "learning_rate": 1.996824472495207e-07, - "loss": 0.7366, + "learning_rate": 2.0815360490428983e-07, + "loss": 0.72, "step": 33062 }, { - "epoch": 0.9382236095346197, + "epoch": 0.936920853523761, "grad_norm": 0.0, - "learning_rate": 1.9949973820390546e-07, - "loss": 0.8481, + "learning_rate": 2.0796736105397232e-07, + "loss": 0.711, "step": 33063 }, { - "epoch": 0.9382519863791147, + "epoch": 0.9369491909660234, "grad_norm": 0.0, - "learning_rate": 1.9931711194355552e-07, - "loss": 0.7078, + "learning_rate": 2.0778119968558387e-07, + "loss": 0.8654, "step": 33064 }, { - "epoch": 0.9382803632236095, + "epoch": 0.9369775284082859, "grad_norm": 0.0, - "learning_rate": 1.9913456847001523e-07, - "loss": 0.7839, + "learning_rate": 2.0759512080068877e-07, + "loss": 0.7656, "step": 33065 }, { - "epoch": 0.9383087400681044, + "epoch": 0.9370058658505483, "grad_norm": 0.0, - "learning_rate": 1.9895210778482665e-07, - "loss": 0.7627, + "learning_rate": 2.0740912440085803e-07, + "loss": 0.8293, "step": 33066 }, { - "epoch": 0.9383371169125994, + "epoch": 0.9370342032928108, "grad_norm": 0.0, - "learning_rate": 1.9876972988952859e-07, - "loss": 0.8067, + "learning_rate": 2.072232104876548e-07, + "loss": 0.8727, "step": 33067 }, { - "epoch": 0.9383654937570942, + "epoch": 0.9370625407350732, "grad_norm": 0.0, - "learning_rate": 1.9858743478566312e-07, - "loss": 0.8969, + "learning_rate": 2.0703737906264788e-07, + "loss": 0.7872, "step": 33068 }, { - "epoch": 0.9383938706015891, + "epoch": 0.9370908781773357, "grad_norm": 0.0, - "learning_rate": 1.9840522247477234e-07, - "loss": 0.6472, + "learning_rate": 2.0685163012740039e-07, + "loss": 0.8268, "step": 33069 }, { - "epoch": 0.938422247446084, + "epoch": 0.9371192156195982, "grad_norm": 0.0, - "learning_rate": 1.982230929583906e-07, - "loss": 0.7929, + "learning_rate": 2.066659636834789e-07, + "loss": 0.7936, "step": 33070 }, { - "epoch": 0.9384506242905789, + "epoch": 0.9371475530618606, "grad_norm": 0.0, - "learning_rate": 1.9804104623806107e-07, - "loss": 0.7639, + "learning_rate": 2.064803797324466e-07, + "loss": 0.774, "step": 33071 }, { - "epoch": 0.9384790011350738, + "epoch": 0.9371758905041231, "grad_norm": 0.0, - "learning_rate": 1.9785908231531813e-07, - "loss": 0.7854, + "learning_rate": 2.062948782758678e-07, + "loss": 0.8156, "step": 33072 }, { - "epoch": 0.9385073779795686, + "epoch": 0.9372042279463856, "grad_norm": 0.0, - "learning_rate": 1.9767720119170054e-07, - "loss": 0.8153, + "learning_rate": 2.0610945931530347e-07, + "loss": 0.7682, "step": 33073 }, { - "epoch": 0.9385357548240636, + "epoch": 0.9372325653886481, "grad_norm": 0.0, - "learning_rate": 1.974954028687448e-07, - "loss": 0.856, + "learning_rate": 2.0592412285231677e-07, + "loss": 0.7505, "step": 33074 }, { - "epoch": 0.9385641316685585, + "epoch": 0.9372609028309105, "grad_norm": 0.0, - "learning_rate": 1.973136873479853e-07, - "loss": 0.8682, + "learning_rate": 2.057388688884665e-07, + "loss": 0.9424, "step": 33075 }, { - "epoch": 0.9385925085130533, + "epoch": 0.937289240273173, "grad_norm": 0.0, - "learning_rate": 1.9713205463095742e-07, - "loss": 0.7968, + "learning_rate": 2.0555369742531584e-07, + "loss": 0.762, "step": 33076 }, { - "epoch": 0.9386208853575483, + "epoch": 0.9373175777154354, "grad_norm": 0.0, - "learning_rate": 1.9695050471919773e-07, - "loss": 0.7286, + "learning_rate": 2.0536860846442353e-07, + "loss": 0.8695, "step": 33077 }, { - "epoch": 0.9386492622020431, + "epoch": 0.9373459151576978, "grad_norm": 0.0, - "learning_rate": 1.9676903761423617e-07, - "loss": 0.8115, + "learning_rate": 2.0518360200734833e-07, + "loss": 0.8603, "step": 33078 }, { - "epoch": 0.938677639046538, + "epoch": 0.9373742525999603, "grad_norm": 0.0, - "learning_rate": 1.96587653317607e-07, - "loss": 0.731, + "learning_rate": 2.0499867805564789e-07, + "loss": 0.8669, "step": 33079 }, { - "epoch": 0.9387060158910329, + "epoch": 0.9374025900422228, "grad_norm": 0.0, - "learning_rate": 1.9640635183084344e-07, - "loss": 0.8268, + "learning_rate": 2.0481383661088316e-07, + "loss": 0.7399, "step": 33080 }, { - "epoch": 0.9387343927355278, + "epoch": 0.9374309274844852, "grad_norm": 0.0, - "learning_rate": 1.9622513315547653e-07, - "loss": 0.7808, + "learning_rate": 2.0462907767460628e-07, + "loss": 0.8475, "step": 33081 }, { - "epoch": 0.9387627695800227, + "epoch": 0.9374592649267477, "grad_norm": 0.0, - "learning_rate": 1.9604399729303504e-07, - "loss": 0.7634, + "learning_rate": 2.0444440124837706e-07, + "loss": 0.8812, "step": 33082 }, { - "epoch": 0.9387911464245176, + "epoch": 0.9374876023690102, "grad_norm": 0.0, - "learning_rate": 1.9586294424505216e-07, - "loss": 0.8464, + "learning_rate": 2.0425980733375096e-07, + "loss": 0.7998, "step": 33083 }, { - "epoch": 0.9388195232690125, + "epoch": 0.9375159398112727, "grad_norm": 0.0, - "learning_rate": 1.9568197401305556e-07, - "loss": 0.7076, + "learning_rate": 2.0407529593228114e-07, + "loss": 0.8218, "step": 33084 }, { - "epoch": 0.9388479001135074, + "epoch": 0.9375442772535351, "grad_norm": 0.0, - "learning_rate": 1.9550108659857292e-07, - "loss": 0.8863, + "learning_rate": 2.0389086704552307e-07, + "loss": 0.8293, "step": 33085 }, { - "epoch": 0.9388762769580022, + "epoch": 0.9375726146957976, "grad_norm": 0.0, - "learning_rate": 1.9532028200313412e-07, - "loss": 0.8336, + "learning_rate": 2.0370652067502993e-07, + "loss": 0.8553, "step": 33086 }, { - "epoch": 0.9389046538024972, + "epoch": 0.93760095213806, "grad_norm": 0.0, - "learning_rate": 1.9513956022826464e-07, - "loss": 0.826, + "learning_rate": 2.0352225682235384e-07, + "loss": 0.7986, "step": 33087 }, { - "epoch": 0.9389330306469921, + "epoch": 0.9376292895803224, "grad_norm": 0.0, - "learning_rate": 1.949589212754943e-07, - "loss": 0.8357, + "learning_rate": 2.0333807548904906e-07, + "loss": 0.7124, "step": 33088 }, { - "epoch": 0.9389614074914869, + "epoch": 0.9376576270225849, "grad_norm": 0.0, - "learning_rate": 1.947783651463453e-07, - "loss": 0.7709, + "learning_rate": 2.0315397667666438e-07, + "loss": 0.8076, "step": 33089 }, { - "epoch": 0.9389897843359818, + "epoch": 0.9376859644648474, "grad_norm": 0.0, - "learning_rate": 1.9459789184234413e-07, - "loss": 0.7791, + "learning_rate": 2.0296996038675298e-07, + "loss": 0.8688, "step": 33090 }, { - "epoch": 0.9390181611804768, + "epoch": 0.9377143019071099, "grad_norm": 0.0, - "learning_rate": 1.9441750136501514e-07, - "loss": 0.7418, + "learning_rate": 2.0278602662086257e-07, + "loss": 0.7852, "step": 33091 }, { - "epoch": 0.9390465380249716, + "epoch": 0.9377426393493723, "grad_norm": 0.0, - "learning_rate": 1.9423719371588269e-07, - "loss": 0.7642, + "learning_rate": 2.0260217538054295e-07, + "loss": 0.8213, "step": 33092 }, { - "epoch": 0.9390749148694665, + "epoch": 0.9377709767916348, "grad_norm": 0.0, - "learning_rate": 1.9405696889646996e-07, - "loss": 0.8052, + "learning_rate": 2.0241840666734515e-07, + "loss": 0.7356, "step": 33093 }, { - "epoch": 0.9391032917139615, + "epoch": 0.9377993142338973, "grad_norm": 0.0, - "learning_rate": 1.9387682690829912e-07, - "loss": 0.8882, + "learning_rate": 2.0223472048281455e-07, + "loss": 0.8702, "step": 33094 }, { - "epoch": 0.9391316685584563, + "epoch": 0.9378276516761597, "grad_norm": 0.0, - "learning_rate": 1.9369676775289005e-07, - "loss": 0.7984, + "learning_rate": 2.0205111682849887e-07, + "loss": 0.7592, "step": 33095 }, { - "epoch": 0.9391600454029512, + "epoch": 0.9378559891184222, "grad_norm": 0.0, - "learning_rate": 1.9351679143176595e-07, - "loss": 0.785, + "learning_rate": 2.018675957059446e-07, + "loss": 0.7684, "step": 33096 }, { - "epoch": 0.939188422247446, + "epoch": 0.9378843265606847, "grad_norm": 0.0, - "learning_rate": 1.9333689794644672e-07, - "loss": 0.8288, + "learning_rate": 2.016841571166983e-07, + "loss": 0.753, "step": 33097 }, { - "epoch": 0.939216799091941, + "epoch": 0.937912664002947, "grad_norm": 0.0, - "learning_rate": 1.9315708729845117e-07, - "loss": 0.7253, + "learning_rate": 2.015008010623054e-07, + "loss": 0.7821, "step": 33098 }, { - "epoch": 0.9392451759364359, + "epoch": 0.9379410014452095, "grad_norm": 0.0, - "learning_rate": 1.9297735948929808e-07, - "loss": 0.6903, + "learning_rate": 2.013175275443102e-07, + "loss": 0.8273, "step": 33099 }, { - "epoch": 0.9392735527809307, + "epoch": 0.937969338887472, "grad_norm": 0.0, - "learning_rate": 1.9279771452050622e-07, - "loss": 0.7615, + "learning_rate": 2.0113433656425484e-07, + "loss": 0.8081, "step": 33100 }, { - "epoch": 0.9393019296254257, + "epoch": 0.9379976763297345, "grad_norm": 0.0, - "learning_rate": 1.9261815239359327e-07, - "loss": 0.8251, + "learning_rate": 2.0095122812368472e-07, + "loss": 0.8486, "step": 33101 }, { - "epoch": 0.9393303064699206, + "epoch": 0.9380260137719969, "grad_norm": 0.0, - "learning_rate": 1.924386731100758e-07, - "loss": 0.7294, + "learning_rate": 2.0076820222414083e-07, + "loss": 0.7673, "step": 33102 }, { - "epoch": 0.9393586833144154, + "epoch": 0.9380543512142594, "grad_norm": 0.0, - "learning_rate": 1.9225927667146927e-07, - "loss": 0.7933, + "learning_rate": 2.005852588671664e-07, + "loss": 0.8688, "step": 33103 }, { - "epoch": 0.9393870601589104, + "epoch": 0.9380826886565219, "grad_norm": 0.0, - "learning_rate": 1.9207996307929023e-07, - "loss": 0.8586, + "learning_rate": 2.0040239805429906e-07, + "loss": 0.841, "step": 33104 }, { - "epoch": 0.9394154370034052, + "epoch": 0.9381110260987843, "grad_norm": 0.0, - "learning_rate": 1.9190073233505301e-07, - "loss": 0.7478, + "learning_rate": 2.0021961978708204e-07, + "loss": 0.7985, "step": 33105 }, { - "epoch": 0.9394438138479001, + "epoch": 0.9381393635410468, "grad_norm": 0.0, - "learning_rate": 1.9172158444026978e-07, - "loss": 0.7522, + "learning_rate": 2.0003692406705523e-07, + "loss": 0.8701, "step": 33106 }, { - "epoch": 0.939472190692395, + "epoch": 0.9381677009833093, "grad_norm": 0.0, - "learning_rate": 1.9154251939645595e-07, - "loss": 0.8081, + "learning_rate": 1.9985431089575624e-07, + "loss": 0.7082, "step": 33107 }, { - "epoch": 0.9395005675368899, + "epoch": 0.9381960384255718, "grad_norm": 0.0, - "learning_rate": 1.9136353720512367e-07, - "loss": 0.8362, + "learning_rate": 1.996717802747228e-07, + "loss": 0.7686, "step": 33108 }, { - "epoch": 0.9395289443813848, + "epoch": 0.9382243758678341, "grad_norm": 0.0, - "learning_rate": 1.9118463786778397e-07, - "loss": 0.8692, + "learning_rate": 1.994893322054925e-07, + "loss": 0.7668, "step": 33109 }, { - "epoch": 0.9395573212258796, + "epoch": 0.9382527133100966, "grad_norm": 0.0, - "learning_rate": 1.9100582138594892e-07, - "loss": 0.8029, + "learning_rate": 1.9930696668960415e-07, + "loss": 0.7774, "step": 33110 }, { - "epoch": 0.9395856980703746, + "epoch": 0.9382810507523591, "grad_norm": 0.0, - "learning_rate": 1.9082708776112847e-07, - "loss": 0.8332, + "learning_rate": 1.9912468372859317e-07, + "loss": 0.9053, "step": 33111 }, { - "epoch": 0.9396140749148695, + "epoch": 0.9383093881946215, "grad_norm": 0.0, - "learning_rate": 1.906484369948325e-07, - "loss": 0.7782, + "learning_rate": 1.989424833239939e-07, + "loss": 0.8387, "step": 33112 }, { - "epoch": 0.9396424517593643, + "epoch": 0.938337725636884, "grad_norm": 0.0, - "learning_rate": 1.904698690885698e-07, - "loss": 0.8718, + "learning_rate": 1.9876036547734067e-07, + "loss": 0.819, "step": 33113 }, { - "epoch": 0.9396708286038592, + "epoch": 0.9383660630791465, "grad_norm": 0.0, - "learning_rate": 1.9029138404385027e-07, - "loss": 0.8143, + "learning_rate": 1.9857833019017004e-07, + "loss": 0.89, "step": 33114 }, { - "epoch": 0.9396992054483542, + "epoch": 0.938394400521409, "grad_norm": 0.0, - "learning_rate": 1.9011298186217942e-07, - "loss": 0.748, + "learning_rate": 1.9839637746401298e-07, + "loss": 0.8478, "step": 33115 }, { - "epoch": 0.939727582292849, + "epoch": 0.9384227379636714, "grad_norm": 0.0, - "learning_rate": 1.8993466254506489e-07, - "loss": 0.8293, + "learning_rate": 1.982145073004027e-07, + "loss": 0.8062, "step": 33116 }, { - "epoch": 0.9397559591373439, + "epoch": 0.9384510754059339, "grad_norm": 0.0, - "learning_rate": 1.8975642609401434e-07, - "loss": 0.8478, + "learning_rate": 1.9803271970087246e-07, + "loss": 0.8266, "step": 33117 }, { - "epoch": 0.9397843359818389, + "epoch": 0.9384794128481964, "grad_norm": 0.0, - "learning_rate": 1.8957827251053107e-07, - "loss": 0.7706, + "learning_rate": 1.97851014666951e-07, + "loss": 0.7939, "step": 33118 }, { - "epoch": 0.9398127128263337, + "epoch": 0.9385077502904587, "grad_norm": 0.0, - "learning_rate": 1.8940020179612272e-07, - "loss": 0.7596, + "learning_rate": 1.9766939220017155e-07, + "loss": 0.7729, "step": 33119 }, { - "epoch": 0.9398410896708286, + "epoch": 0.9385360877327212, "grad_norm": 0.0, - "learning_rate": 1.8922221395229035e-07, - "loss": 0.7641, + "learning_rate": 1.974878523020629e-07, + "loss": 0.8986, "step": 33120 }, { - "epoch": 0.9398694665153235, + "epoch": 0.9385644251749837, "grad_norm": 0.0, - "learning_rate": 1.8904430898053938e-07, - "loss": 0.8048, + "learning_rate": 1.9730639497415272e-07, + "loss": 0.6718, "step": 33121 }, { - "epoch": 0.9398978433598184, + "epoch": 0.9385927626172461, "grad_norm": 0.0, - "learning_rate": 1.8886648688237308e-07, - "loss": 0.787, + "learning_rate": 1.9712502021797197e-07, + "loss": 0.7924, "step": 33122 }, { - "epoch": 0.9399262202043133, + "epoch": 0.9386211000595086, "grad_norm": 0.0, - "learning_rate": 1.8868874765929136e-07, - "loss": 0.9207, + "learning_rate": 1.9694372803504724e-07, + "loss": 0.8696, "step": 33123 }, { - "epoch": 0.9399545970488081, + "epoch": 0.9386494375017711, "grad_norm": 0.0, - "learning_rate": 1.8851109131279743e-07, - "loss": 0.8439, + "learning_rate": 1.967625184269062e-07, + "loss": 0.8373, "step": 33124 }, { - "epoch": 0.9399829738933031, + "epoch": 0.9386777749440336, "grad_norm": 0.0, - "learning_rate": 1.8833351784439237e-07, - "loss": 0.8546, + "learning_rate": 1.9658139139507427e-07, + "loss": 0.7478, "step": 33125 }, { - "epoch": 0.940011350737798, + "epoch": 0.938706112386296, "grad_norm": 0.0, - "learning_rate": 1.881560272555738e-07, - "loss": 0.821, + "learning_rate": 1.96400346941078e-07, + "loss": 0.7644, "step": 33126 }, { - "epoch": 0.9400397275822928, + "epoch": 0.9387344498285585, "grad_norm": 0.0, - "learning_rate": 1.8797861954784168e-07, - "loss": 0.7833, + "learning_rate": 1.9621938506644178e-07, + "loss": 0.7525, "step": 33127 }, { - "epoch": 0.9400681044267878, + "epoch": 0.938762787270821, "grad_norm": 0.0, - "learning_rate": 1.8780129472269704e-07, - "loss": 0.7777, + "learning_rate": 1.960385057726899e-07, + "loss": 0.6307, "step": 33128 }, { - "epoch": 0.9400964812712826, + "epoch": 0.9387911247130833, "grad_norm": 0.0, - "learning_rate": 1.8762405278163532e-07, - "loss": 0.916, + "learning_rate": 1.9585770906134671e-07, + "loss": 0.8032, "step": 33129 }, { - "epoch": 0.9401248581157775, + "epoch": 0.9388194621553458, "grad_norm": 0.0, - "learning_rate": 1.874468937261531e-07, - "loss": 0.8195, + "learning_rate": 1.9567699493393545e-07, + "loss": 0.8039, "step": 33130 }, { - "epoch": 0.9401532349602724, + "epoch": 0.9388477995976083, "grad_norm": 0.0, - "learning_rate": 1.872698175577492e-07, - "loss": 0.8815, + "learning_rate": 1.9549636339197708e-07, + "loss": 0.8006, "step": 33131 }, { - "epoch": 0.9401816118047673, + "epoch": 0.9388761370398708, "grad_norm": 0.0, - "learning_rate": 1.8709282427791799e-07, - "loss": 0.8042, + "learning_rate": 1.9531581443699376e-07, + "loss": 0.7641, "step": 33132 }, { - "epoch": 0.9402099886492622, + "epoch": 0.9389044744821332, "grad_norm": 0.0, - "learning_rate": 1.86915913888156e-07, - "loss": 0.8276, + "learning_rate": 1.9513534807050538e-07, + "loss": 0.7223, "step": 33133 }, { - "epoch": 0.9402383654937571, + "epoch": 0.9389328119243957, "grad_norm": 0.0, - "learning_rate": 1.867390863899543e-07, - "loss": 0.7281, + "learning_rate": 1.9495496429403403e-07, + "loss": 0.8444, "step": 33134 }, { - "epoch": 0.940266742338252, + "epoch": 0.9389611493666582, "grad_norm": 0.0, - "learning_rate": 1.8656234178480948e-07, - "loss": 0.8436, + "learning_rate": 1.9477466310909633e-07, + "loss": 0.8535, "step": 33135 }, { - "epoch": 0.9402951191827469, + "epoch": 0.9389894868089206, "grad_norm": 0.0, - "learning_rate": 1.8638568007421477e-07, - "loss": 0.7695, + "learning_rate": 1.9459444451721433e-07, + "loss": 0.8429, "step": 33136 }, { - "epoch": 0.9403234960272417, + "epoch": 0.9390178242511831, "grad_norm": 0.0, - "learning_rate": 1.8620910125966009e-07, - "loss": 0.8642, + "learning_rate": 1.9441430851990352e-07, + "loss": 0.7498, "step": 33137 }, { - "epoch": 0.9403518728717367, + "epoch": 0.9390461616934456, "grad_norm": 0.0, - "learning_rate": 1.8603260534263868e-07, - "loss": 0.8331, + "learning_rate": 1.9423425511868376e-07, + "loss": 0.7796, "step": 33138 }, { - "epoch": 0.9403802497162316, + "epoch": 0.9390744991357081, "grad_norm": 0.0, - "learning_rate": 1.858561923246427e-07, - "loss": 0.8297, + "learning_rate": 1.9405428431506835e-07, + "loss": 0.8722, "step": 33139 }, { - "epoch": 0.9404086265607264, + "epoch": 0.9391028365779704, "grad_norm": 0.0, - "learning_rate": 1.8567986220715872e-07, - "loss": 0.7652, + "learning_rate": 1.93874396110576e-07, + "loss": 0.7592, "step": 33140 }, { - "epoch": 0.9404370034052213, + "epoch": 0.9391311740202329, "grad_norm": 0.0, - "learning_rate": 1.855036149916778e-07, - "loss": 0.8636, + "learning_rate": 1.936945905067211e-07, + "loss": 0.7679, "step": 33141 }, { - "epoch": 0.9404653802497163, + "epoch": 0.9391595114624954, "grad_norm": 0.0, - "learning_rate": 1.8532745067968983e-07, - "loss": 0.7744, + "learning_rate": 1.9351486750501803e-07, + "loss": 0.7328, "step": 33142 }, { - "epoch": 0.9404937570942111, + "epoch": 0.9391878489047578, "grad_norm": 0.0, - "learning_rate": 1.8515136927268251e-07, - "loss": 0.7218, + "learning_rate": 1.9333522710698106e-07, + "loss": 0.8043, "step": 33143 }, { - "epoch": 0.940522133938706, + "epoch": 0.9392161863470203, "grad_norm": 0.0, - "learning_rate": 1.8497537077214133e-07, - "loss": 0.8696, + "learning_rate": 1.9315566931412233e-07, + "loss": 0.9236, "step": 33144 }, { - "epoch": 0.940550510783201, + "epoch": 0.9392445237892828, "grad_norm": 0.0, - "learning_rate": 1.847994551795562e-07, - "loss": 0.8137, + "learning_rate": 1.929761941279551e-07, + "loss": 0.8685, "step": 33145 }, { - "epoch": 0.9405788876276958, + "epoch": 0.9392728612315452, "grad_norm": 0.0, - "learning_rate": 1.846236224964093e-07, - "loss": 0.8038, + "learning_rate": 1.9279680154999148e-07, + "loss": 0.7902, "step": 33146 }, { - "epoch": 0.9406072644721907, + "epoch": 0.9393011986738077, "grad_norm": 0.0, - "learning_rate": 1.844478727241894e-07, - "loss": 0.7418, + "learning_rate": 1.9261749158174248e-07, + "loss": 0.7671, "step": 33147 }, { - "epoch": 0.9406356413166855, + "epoch": 0.9393295361160702, "grad_norm": 0.0, - "learning_rate": 1.8427220586437978e-07, - "loss": 0.802, + "learning_rate": 1.9243826422471689e-07, + "loss": 0.8038, "step": 33148 }, { - "epoch": 0.9406640181611805, + "epoch": 0.9393578735583327, "grad_norm": 0.0, - "learning_rate": 1.8409662191846256e-07, - "loss": 0.8068, + "learning_rate": 1.9225911948042685e-07, + "loss": 0.7716, "step": 33149 }, { - "epoch": 0.9406923950056754, + "epoch": 0.939386211000595, "grad_norm": 0.0, - "learning_rate": 1.8392112088792323e-07, - "loss": 0.8848, + "learning_rate": 1.9208005735038005e-07, + "loss": 0.8513, "step": 33150 }, { - "epoch": 0.9407207718501702, + "epoch": 0.9394145484428575, "grad_norm": 0.0, - "learning_rate": 1.8374570277424398e-07, - "loss": 0.8259, + "learning_rate": 1.9190107783608635e-07, + "loss": 0.8355, "step": 33151 }, { - "epoch": 0.9407491486946652, + "epoch": 0.93944288588512, "grad_norm": 0.0, - "learning_rate": 1.8357036757890578e-07, - "loss": 0.7324, + "learning_rate": 1.9172218093905015e-07, + "loss": 0.7468, "step": 33152 }, { - "epoch": 0.94077752553916, + "epoch": 0.9394712233273824, "grad_norm": 0.0, - "learning_rate": 1.8339511530339082e-07, - "loss": 0.8357, + "learning_rate": 1.9154336666078132e-07, + "loss": 0.8204, "step": 33153 }, { - "epoch": 0.9408059023836549, + "epoch": 0.9394995607696449, "grad_norm": 0.0, - "learning_rate": 1.832199459491768e-07, - "loss": 0.8615, + "learning_rate": 1.9136463500278424e-07, + "loss": 0.7779, "step": 33154 }, { - "epoch": 0.9408342792281499, + "epoch": 0.9395278982119074, "grad_norm": 0.0, - "learning_rate": 1.8304485951774587e-07, - "loss": 0.7132, + "learning_rate": 1.9118598596656547e-07, + "loss": 0.7846, "step": 33155 }, { - "epoch": 0.9408626560726447, + "epoch": 0.9395562356541699, "grad_norm": 0.0, - "learning_rate": 1.8286985601057682e-07, - "loss": 0.7878, + "learning_rate": 1.9100741955363044e-07, + "loss": 0.8141, "step": 33156 }, { - "epoch": 0.9408910329171396, + "epoch": 0.9395845730964323, "grad_norm": 0.0, - "learning_rate": 1.826949354291474e-07, - "loss": 0.6996, + "learning_rate": 1.9082893576548133e-07, + "loss": 0.762, "step": 33157 }, { - "epoch": 0.9409194097616345, + "epoch": 0.9396129105386948, "grad_norm": 0.0, - "learning_rate": 1.8252009777493417e-07, - "loss": 0.7994, + "learning_rate": 1.9065053460362247e-07, + "loss": 0.7991, "step": 33158 }, { - "epoch": 0.9409477866061294, + "epoch": 0.9396412479809573, "grad_norm": 0.0, - "learning_rate": 1.82345343049416e-07, - "loss": 0.7746, + "learning_rate": 1.9047221606955713e-07, + "loss": 0.8659, "step": 33159 }, { - "epoch": 0.9409761634506243, + "epoch": 0.9396695854232197, "grad_norm": 0.0, - "learning_rate": 1.821706712540683e-07, - "loss": 0.8613, + "learning_rate": 1.902939801647863e-07, + "loss": 0.8626, "step": 33160 }, { - "epoch": 0.9410045402951192, + "epoch": 0.9396979228654821, "grad_norm": 0.0, - "learning_rate": 1.8199608239036547e-07, - "loss": 0.772, + "learning_rate": 1.901158268908132e-07, + "loss": 0.8007, "step": 33161 }, { - "epoch": 0.9410329171396141, + "epoch": 0.9397262603077446, "grad_norm": 0.0, - "learning_rate": 1.8182157645978303e-07, - "loss": 0.7902, + "learning_rate": 1.899377562491367e-07, + "loss": 0.8307, "step": 33162 }, { - "epoch": 0.941061293984109, + "epoch": 0.9397545977500071, "grad_norm": 0.0, - "learning_rate": 1.8164715346379536e-07, - "loss": 0.861, + "learning_rate": 1.8975976824125775e-07, + "loss": 0.7458, "step": 33163 }, { - "epoch": 0.9410896708286038, + "epoch": 0.9397829351922695, "grad_norm": 0.0, - "learning_rate": 1.8147281340387458e-07, - "loss": 0.8066, + "learning_rate": 1.8958186286867518e-07, + "loss": 0.7902, "step": 33164 }, { - "epoch": 0.9411180476730987, + "epoch": 0.939811272634532, "grad_norm": 0.0, - "learning_rate": 1.8129855628149617e-07, - "loss": 0.7899, + "learning_rate": 1.8940404013288672e-07, + "loss": 0.8037, "step": 33165 }, { - "epoch": 0.9411464245175937, + "epoch": 0.9398396100767945, "grad_norm": 0.0, - "learning_rate": 1.81124382098129e-07, - "loss": 0.8339, + "learning_rate": 1.8922630003539222e-07, + "loss": 0.8395, "step": 33166 }, { - "epoch": 0.9411748013620885, + "epoch": 0.9398679475190569, "grad_norm": 0.0, - "learning_rate": 1.809502908552463e-07, - "loss": 0.8311, + "learning_rate": 1.8904864257768718e-07, + "loss": 0.8867, "step": 33167 }, { - "epoch": 0.9412031782065834, + "epoch": 0.9398962849613194, "grad_norm": 0.0, - "learning_rate": 1.80776282554318e-07, - "loss": 0.8627, + "learning_rate": 1.888710677612693e-07, + "loss": 0.8629, "step": 33168 }, { - "epoch": 0.9412315550510784, + "epoch": 0.9399246224035819, "grad_norm": 0.0, - "learning_rate": 1.8060235719681297e-07, - "loss": 0.8453, + "learning_rate": 1.886935755876329e-07, + "loss": 0.8042, "step": 33169 }, { - "epoch": 0.9412599318955732, + "epoch": 0.9399529598458443, "grad_norm": 0.0, - "learning_rate": 1.804285147842011e-07, - "loss": 0.8632, + "learning_rate": 1.885161660582746e-07, + "loss": 0.8288, "step": 33170 }, { - "epoch": 0.9412883087400681, + "epoch": 0.9399812972881068, "grad_norm": 0.0, - "learning_rate": 1.8025475531795233e-07, - "loss": 0.8571, + "learning_rate": 1.8833883917468654e-07, + "loss": 0.8565, "step": 33171 }, { - "epoch": 0.941316685584563, + "epoch": 0.9400096347303692, "grad_norm": 0.0, - "learning_rate": 1.800810787995322e-07, - "loss": 0.7697, + "learning_rate": 1.8816159493836528e-07, + "loss": 0.8027, "step": 33172 }, { - "epoch": 0.9413450624290579, + "epoch": 0.9400379721726317, "grad_norm": 0.0, - "learning_rate": 1.7990748523040835e-07, - "loss": 0.7479, + "learning_rate": 1.8798443335080185e-07, + "loss": 0.8875, "step": 33173 }, { - "epoch": 0.9413734392735528, + "epoch": 0.9400663096148941, "grad_norm": 0.0, - "learning_rate": 1.7973397461204856e-07, - "loss": 0.8195, + "learning_rate": 1.8780735441348842e-07, + "loss": 0.7801, "step": 33174 }, { - "epoch": 0.9414018161180476, + "epoch": 0.9400946470571566, "grad_norm": 0.0, - "learning_rate": 1.7956054694591607e-07, - "loss": 0.7347, + "learning_rate": 1.876303581279193e-07, + "loss": 0.7581, "step": 33175 }, { - "epoch": 0.9414301929625426, + "epoch": 0.9401229844994191, "grad_norm": 0.0, - "learning_rate": 1.793872022334775e-07, - "loss": 0.8239, + "learning_rate": 1.8745344449558222e-07, + "loss": 0.8384, "step": 33176 }, { - "epoch": 0.9414585698070375, + "epoch": 0.9401513219416815, "grad_norm": 0.0, - "learning_rate": 1.7921394047619613e-07, - "loss": 0.8098, + "learning_rate": 1.8727661351796932e-07, + "loss": 0.8081, "step": 33177 }, { - "epoch": 0.9414869466515323, + "epoch": 0.940179659383944, "grad_norm": 0.0, - "learning_rate": 1.790407616755363e-07, - "loss": 0.8474, + "learning_rate": 1.870998651965683e-07, + "loss": 0.7348, "step": 33178 }, { - "epoch": 0.9415153234960273, + "epoch": 0.9402079968262065, "grad_norm": 0.0, - "learning_rate": 1.7886766583296245e-07, - "loss": 0.8081, + "learning_rate": 1.8692319953286908e-07, + "loss": 0.8175, "step": 33179 }, { - "epoch": 0.9415437003405221, + "epoch": 0.940236334268469, "grad_norm": 0.0, - "learning_rate": 1.7869465294993338e-07, - "loss": 0.812, + "learning_rate": 1.8674661652836045e-07, + "loss": 0.8174, "step": 33180 }, { - "epoch": 0.941572077185017, + "epoch": 0.9402646717107314, "grad_norm": 0.0, - "learning_rate": 1.7852172302791126e-07, - "loss": 0.797, + "learning_rate": 1.8657011618452902e-07, + "loss": 0.8406, "step": 33181 }, { - "epoch": 0.9416004540295119, + "epoch": 0.9402930091529939, "grad_norm": 0.0, - "learning_rate": 1.7834887606835938e-07, - "loss": 0.7562, + "learning_rate": 1.8639369850286137e-07, + "loss": 0.7629, "step": 33182 }, { - "epoch": 0.9416288308740068, + "epoch": 0.9403213465952563, "grad_norm": 0.0, - "learning_rate": 1.7817611207273544e-07, - "loss": 0.7178, + "learning_rate": 1.862173634848441e-07, + "loss": 0.7659, "step": 33183 }, { - "epoch": 0.9416572077185017, + "epoch": 0.9403496840375187, "grad_norm": 0.0, - "learning_rate": 1.7800343104249827e-07, - "loss": 0.7081, + "learning_rate": 1.8604111113196154e-07, + "loss": 0.8596, "step": 33184 }, { - "epoch": 0.9416855845629966, + "epoch": 0.9403780214797812, "grad_norm": 0.0, - "learning_rate": 1.7783083297911008e-07, - "loss": 0.8269, + "learning_rate": 1.858649414456992e-07, + "loss": 0.7437, "step": 33185 }, { - "epoch": 0.9417139614074915, + "epoch": 0.9404063589220437, "grad_norm": 0.0, - "learning_rate": 1.776583178840241e-07, - "loss": 0.8, + "learning_rate": 1.8568885442754148e-07, + "loss": 0.7636, "step": 33186 }, { - "epoch": 0.9417423382519864, + "epoch": 0.9404346963643062, "grad_norm": 0.0, - "learning_rate": 1.7748588575870029e-07, - "loss": 0.7927, + "learning_rate": 1.8551285007897046e-07, + "loss": 0.8628, "step": 33187 }, { - "epoch": 0.9417707150964812, + "epoch": 0.9404630338065686, "grad_norm": 0.0, - "learning_rate": 1.7731353660459637e-07, - "loss": 0.7553, + "learning_rate": 1.8533692840146944e-07, + "loss": 0.8188, "step": 33188 }, { - "epoch": 0.9417990919409762, + "epoch": 0.9404913712488311, "grad_norm": 0.0, - "learning_rate": 1.7714127042316454e-07, - "loss": 0.8952, + "learning_rate": 1.8516108939651945e-07, + "loss": 0.8202, "step": 33189 }, { - "epoch": 0.9418274687854711, + "epoch": 0.9405197086910936, "grad_norm": 0.0, - "learning_rate": 1.7696908721586248e-07, - "loss": 0.7164, + "learning_rate": 1.8498533306560374e-07, + "loss": 0.8276, "step": 33190 }, { - "epoch": 0.9418558456299659, + "epoch": 0.940548046133356, "grad_norm": 0.0, - "learning_rate": 1.767969869841457e-07, - "loss": 0.7807, + "learning_rate": 1.848096594102e-07, + "loss": 0.6824, "step": 33191 }, { - "epoch": 0.9418842224744608, + "epoch": 0.9405763835756185, "grad_norm": 0.0, - "learning_rate": 1.766249697294642e-07, - "loss": 0.6435, + "learning_rate": 1.8463406843178933e-07, + "loss": 0.6722, "step": 33192 }, { - "epoch": 0.9419125993189558, + "epoch": 0.940604721017881, "grad_norm": 0.0, - "learning_rate": 1.7645303545327453e-07, - "loss": 0.8333, + "learning_rate": 1.844585601318516e-07, + "loss": 0.8409, "step": 33193 }, { - "epoch": 0.9419409761634506, + "epoch": 0.9406330584601433, "grad_norm": 0.0, - "learning_rate": 1.7628118415702667e-07, - "loss": 0.7992, + "learning_rate": 1.8428313451186452e-07, + "loss": 0.8322, "step": 33194 }, { - "epoch": 0.9419693530079455, + "epoch": 0.9406613959024058, "grad_norm": 0.0, - "learning_rate": 1.761094158421739e-07, - "loss": 0.9277, + "learning_rate": 1.8410779157330362e-07, + "loss": 0.8025, "step": 33195 }, { - "epoch": 0.9419977298524405, + "epoch": 0.9406897333446683, "grad_norm": 0.0, - "learning_rate": 1.7593773051016617e-07, - "loss": 0.8292, + "learning_rate": 1.8393253131764988e-07, + "loss": 0.7038, "step": 33196 }, { - "epoch": 0.9420261066969353, + "epoch": 0.9407180707869308, "grad_norm": 0.0, - "learning_rate": 1.7576612816245454e-07, - "loss": 0.7278, + "learning_rate": 1.8375735374637659e-07, + "loss": 0.8573, "step": 33197 }, { - "epoch": 0.9420544835414302, + "epoch": 0.9407464082291932, "grad_norm": 0.0, - "learning_rate": 1.7559460880048784e-07, - "loss": 0.8297, + "learning_rate": 1.8358225886096038e-07, + "loss": 0.7607, "step": 33198 }, { - "epoch": 0.942082860385925, + "epoch": 0.9407747456714557, "grad_norm": 0.0, - "learning_rate": 1.7542317242571493e-07, - "loss": 0.7313, + "learning_rate": 1.8340724666287556e-07, + "loss": 0.8181, "step": 33199 }, { - "epoch": 0.94211123723042, + "epoch": 0.9408030831137182, "grad_norm": 0.0, - "learning_rate": 1.7525181903958465e-07, - "loss": 0.7962, + "learning_rate": 1.832323171535977e-07, + "loss": 0.7065, "step": 33200 }, { - "epoch": 0.9421396140749149, + "epoch": 0.9408314205559806, "grad_norm": 0.0, - "learning_rate": 1.750805486435436e-07, - "loss": 0.7717, + "learning_rate": 1.8305747033459885e-07, + "loss": 0.8533, "step": 33201 }, { - "epoch": 0.9421679909194097, + "epoch": 0.9408597579982431, "grad_norm": 0.0, - "learning_rate": 1.7490936123903845e-07, - "loss": 0.7378, + "learning_rate": 1.8288270620735237e-07, + "loss": 0.8652, "step": 33202 }, { - "epoch": 0.9421963677639047, + "epoch": 0.9408880954405056, "grad_norm": 0.0, - "learning_rate": 1.7473825682751576e-07, - "loss": 0.7268, + "learning_rate": 1.827080247733315e-07, + "loss": 0.7716, "step": 33203 }, { - "epoch": 0.9422247446083996, + "epoch": 0.940916432882768, "grad_norm": 0.0, - "learning_rate": 1.745672354104211e-07, - "loss": 0.797, + "learning_rate": 1.8253342603400503e-07, + "loss": 0.8692, "step": 33204 }, { - "epoch": 0.9422531214528944, + "epoch": 0.9409447703250304, "grad_norm": 0.0, - "learning_rate": 1.743962969891988e-07, - "loss": 0.7148, + "learning_rate": 1.8235890999084515e-07, + "loss": 0.9185, "step": 33205 }, { - "epoch": 0.9422814982973893, + "epoch": 0.9409731077672929, "grad_norm": 0.0, - "learning_rate": 1.7422544156529218e-07, - "loss": 0.7449, + "learning_rate": 1.821844766453229e-07, + "loss": 0.8293, "step": 33206 }, { - "epoch": 0.9423098751418842, + "epoch": 0.9410014452095554, "grad_norm": 0.0, - "learning_rate": 1.7405466914014456e-07, - "loss": 0.7947, + "learning_rate": 1.8201012599890598e-07, + "loss": 0.7478, "step": 33207 }, { - "epoch": 0.9423382519863791, + "epoch": 0.9410297826518178, "grad_norm": 0.0, - "learning_rate": 1.738839797151992e-07, - "loss": 0.751, + "learning_rate": 1.8183585805306325e-07, + "loss": 0.7405, "step": 33208 }, { - "epoch": 0.942366628830874, + "epoch": 0.9410581200940803, "grad_norm": 0.0, - "learning_rate": 1.7371337329189718e-07, - "loss": 0.7239, + "learning_rate": 1.816616728092646e-07, + "loss": 0.7675, "step": 33209 }, { - "epoch": 0.9423950056753689, + "epoch": 0.9410864575363428, "grad_norm": 0.0, - "learning_rate": 1.735428498716818e-07, - "loss": 0.8295, + "learning_rate": 1.8148757026897335e-07, + "loss": 0.8942, "step": 33210 }, { - "epoch": 0.9424233825198638, + "epoch": 0.9411147949786053, "grad_norm": 0.0, - "learning_rate": 1.7337240945598965e-07, - "loss": 0.9081, + "learning_rate": 1.813135504336594e-07, + "loss": 0.8561, "step": 33211 }, { - "epoch": 0.9424517593643587, + "epoch": 0.9411431324208677, "grad_norm": 0.0, - "learning_rate": 1.7320205204626295e-07, - "loss": 0.8128, + "learning_rate": 1.8113961330478714e-07, + "loss": 0.8035, "step": 33212 }, { - "epoch": 0.9424801362088536, + "epoch": 0.9411714698631302, "grad_norm": 0.0, - "learning_rate": 1.7303177764394163e-07, - "loss": 0.7236, + "learning_rate": 1.8096575888382205e-07, + "loss": 0.8055, "step": 33213 }, { - "epoch": 0.9425085130533485, + "epoch": 0.9411998073053927, "grad_norm": 0.0, - "learning_rate": 1.7286158625046122e-07, - "loss": 0.7711, + "learning_rate": 1.8079198717222967e-07, + "loss": 0.7626, "step": 33214 }, { - "epoch": 0.9425368898978433, + "epoch": 0.941228144747655, "grad_norm": 0.0, - "learning_rate": 1.726914778672606e-07, - "loss": 0.8362, + "learning_rate": 1.8061829817147103e-07, + "loss": 0.8465, "step": 33215 }, { - "epoch": 0.9425652667423382, + "epoch": 0.9412564821899175, "grad_norm": 0.0, - "learning_rate": 1.7252145249577856e-07, - "loss": 0.8606, + "learning_rate": 1.8044469188301161e-07, + "loss": 0.793, "step": 33216 }, { - "epoch": 0.9425936435868332, + "epoch": 0.94128481963218, "grad_norm": 0.0, - "learning_rate": 1.7235151013744844e-07, - "loss": 0.8307, + "learning_rate": 1.802711683083136e-07, + "loss": 0.8669, "step": 33217 }, { - "epoch": 0.942622020431328, + "epoch": 0.9413131570744424, "grad_norm": 0.0, - "learning_rate": 1.7218165079370574e-07, - "loss": 0.7972, + "learning_rate": 1.800977274488369e-07, + "loss": 0.8085, "step": 33218 }, { - "epoch": 0.9426503972758229, + "epoch": 0.9413414945167049, "grad_norm": 0.0, - "learning_rate": 1.7201187446598823e-07, - "loss": 0.771, + "learning_rate": 1.7992436930604484e-07, + "loss": 0.7226, "step": 33219 }, { - "epoch": 0.9426787741203179, + "epoch": 0.9413698319589674, "grad_norm": 0.0, - "learning_rate": 1.7184218115572805e-07, - "loss": 0.8876, + "learning_rate": 1.7975109388139511e-07, + "loss": 0.794, "step": 33220 }, { - "epoch": 0.9427071509648127, + "epoch": 0.9413981694012299, "grad_norm": 0.0, - "learning_rate": 1.716725708643574e-07, - "loss": 0.7681, + "learning_rate": 1.7957790117634877e-07, + "loss": 0.9688, "step": 33221 }, { - "epoch": 0.9427355278093076, + "epoch": 0.9414265068434923, "grad_norm": 0.0, - "learning_rate": 1.7150304359331182e-07, - "loss": 0.7009, + "learning_rate": 1.7940479119236576e-07, + "loss": 0.8314, "step": 33222 }, { - "epoch": 0.9427639046538024, + "epoch": 0.9414548442857548, "grad_norm": 0.0, - "learning_rate": 1.7133359934402017e-07, - "loss": 0.7961, + "learning_rate": 1.7923176393090158e-07, + "loss": 0.8257, "step": 33223 }, { - "epoch": 0.9427922814982974, + "epoch": 0.9414831817280173, "grad_norm": 0.0, - "learning_rate": 1.7116423811791794e-07, - "loss": 0.7572, + "learning_rate": 1.7905881939341617e-07, + "loss": 0.7338, "step": 33224 }, { - "epoch": 0.9428206583427923, + "epoch": 0.9415115191702796, "grad_norm": 0.0, - "learning_rate": 1.7099495991643066e-07, - "loss": 0.8554, + "learning_rate": 1.7888595758136396e-07, + "loss": 0.8153, "step": 33225 }, { - "epoch": 0.9428490351872871, + "epoch": 0.9415398566125421, "grad_norm": 0.0, - "learning_rate": 1.7082576474099277e-07, - "loss": 0.8201, + "learning_rate": 1.7871317849620262e-07, + "loss": 0.7195, "step": 33226 }, { - "epoch": 0.9428774120317821, + "epoch": 0.9415681940548046, "grad_norm": 0.0, - "learning_rate": 1.7065665259303087e-07, - "loss": 0.9063, + "learning_rate": 1.7854048213938767e-07, + "loss": 0.8579, "step": 33227 }, { - "epoch": 0.942905788876277, + "epoch": 0.9415965314970671, "grad_norm": 0.0, - "learning_rate": 1.7048762347397385e-07, - "loss": 0.7519, + "learning_rate": 1.7836786851237354e-07, + "loss": 0.722, "step": 33228 }, { - "epoch": 0.9429341657207718, + "epoch": 0.9416248689393295, "grad_norm": 0.0, - "learning_rate": 1.7031867738524943e-07, - "loss": 0.8276, + "learning_rate": 1.7819533761661346e-07, + "loss": 0.7161, "step": 33229 }, { - "epoch": 0.9429625425652668, + "epoch": 0.941653206381592, "grad_norm": 0.0, - "learning_rate": 1.7014981432828537e-07, - "loss": 0.7456, + "learning_rate": 1.7802288945356184e-07, + "loss": 0.6929, "step": 33230 }, { - "epoch": 0.9429909194097617, + "epoch": 0.9416815438238545, "grad_norm": 0.0, - "learning_rate": 1.699810343045083e-07, - "loss": 0.8253, + "learning_rate": 1.7785052402467086e-07, + "loss": 0.848, "step": 33231 }, { - "epoch": 0.9430192962542565, + "epoch": 0.9417098812661169, "grad_norm": 0.0, - "learning_rate": 1.6981233731534263e-07, - "loss": 0.7604, + "learning_rate": 1.7767824133139265e-07, + "loss": 0.8411, "step": 33232 }, { - "epoch": 0.9430476730987514, + "epoch": 0.9417382187083794, "grad_norm": 0.0, - "learning_rate": 1.696437233622139e-07, - "loss": 0.8325, + "learning_rate": 1.7750604137517614e-07, + "loss": 0.758, "step": 33233 }, { - "epoch": 0.9430760499432463, + "epoch": 0.9417665561506419, "grad_norm": 0.0, - "learning_rate": 1.6947519244654654e-07, - "loss": 0.7419, + "learning_rate": 1.7733392415747452e-07, + "loss": 0.7953, "step": 33234 }, { - "epoch": 0.9431044267877412, + "epoch": 0.9417948935929044, "grad_norm": 0.0, - "learning_rate": 1.6930674456976494e-07, - "loss": 0.7982, + "learning_rate": 1.771618896797378e-07, + "loss": 0.805, "step": 33235 }, { - "epoch": 0.9431328036322361, + "epoch": 0.9418232310351667, "grad_norm": 0.0, - "learning_rate": 1.6913837973329129e-07, - "loss": 0.8092, + "learning_rate": 1.7698993794341368e-07, + "loss": 0.9186, "step": 33236 }, { - "epoch": 0.943161180476731, + "epoch": 0.9418515684774292, "grad_norm": 0.0, - "learning_rate": 1.689700979385467e-07, - "loss": 0.8633, + "learning_rate": 1.7681806894995102e-07, + "loss": 0.7537, "step": 33237 }, { - "epoch": 0.9431895573212259, + "epoch": 0.9418799059196917, "grad_norm": 0.0, - "learning_rate": 1.6880189918695334e-07, - "loss": 0.6842, + "learning_rate": 1.766462827007964e-07, + "loss": 0.7567, "step": 33238 }, { - "epoch": 0.9432179341657208, + "epoch": 0.9419082433619541, "grad_norm": 0.0, - "learning_rate": 1.6863378347993343e-07, - "loss": 0.6992, + "learning_rate": 1.7647457919739873e-07, + "loss": 0.8597, "step": 33239 }, { - "epoch": 0.9432463110102156, + "epoch": 0.9419365808042166, "grad_norm": 0.0, - "learning_rate": 1.6846575081890472e-07, - "loss": 0.7282, + "learning_rate": 1.763029584412046e-07, + "loss": 0.9316, "step": 33240 }, { - "epoch": 0.9432746878547106, + "epoch": 0.9419649182464791, "grad_norm": 0.0, - "learning_rate": 1.6829780120528938e-07, - "loss": 0.8217, + "learning_rate": 1.7613142043365728e-07, + "loss": 0.839, "step": 33241 }, { - "epoch": 0.9433030646992054, + "epoch": 0.9419932556887415, "grad_norm": 0.0, - "learning_rate": 1.6812993464050298e-07, - "loss": 0.7066, + "learning_rate": 1.759599651762034e-07, + "loss": 0.8476, "step": 33242 }, { - "epoch": 0.9433314415437003, + "epoch": 0.942021593131004, "grad_norm": 0.0, - "learning_rate": 1.6796215112596547e-07, - "loss": 0.9032, + "learning_rate": 1.7578859267028736e-07, + "loss": 0.8118, "step": 33243 }, { - "epoch": 0.9433598183881953, + "epoch": 0.9420499305732665, "grad_norm": 0.0, - "learning_rate": 1.6779445066309351e-07, - "loss": 0.8796, + "learning_rate": 1.7561730291735025e-07, + "loss": 0.8092, "step": 33244 }, { - "epoch": 0.9433881952326901, + "epoch": 0.942078268015529, "grad_norm": 0.0, - "learning_rate": 1.6762683325330486e-07, - "loss": 0.8949, + "learning_rate": 1.7544609591883865e-07, + "loss": 0.8228, "step": 33245 }, { - "epoch": 0.943416572077185, + "epoch": 0.9421066054577913, "grad_norm": 0.0, - "learning_rate": 1.6745929889801283e-07, - "loss": 0.7664, + "learning_rate": 1.7527497167619256e-07, + "loss": 0.7724, "step": 33246 }, { - "epoch": 0.94344494892168, + "epoch": 0.9421349429000538, "grad_norm": 0.0, - "learning_rate": 1.6729184759863515e-07, - "loss": 0.8072, + "learning_rate": 1.7510393019085303e-07, + "loss": 0.7664, "step": 33247 }, { - "epoch": 0.9434733257661748, + "epoch": 0.9421632803423163, "grad_norm": 0.0, - "learning_rate": 1.6712447935658515e-07, - "loss": 0.8028, + "learning_rate": 1.7493297146426225e-07, + "loss": 0.7784, "step": 33248 }, { - "epoch": 0.9435017026106697, + "epoch": 0.9421916177845787, "grad_norm": 0.0, - "learning_rate": 1.6695719417327617e-07, - "loss": 0.8142, + "learning_rate": 1.7476209549785906e-07, + "loss": 0.7443, "step": 33249 }, { - "epoch": 0.9435300794551645, + "epoch": 0.9422199552268412, "grad_norm": 0.0, - "learning_rate": 1.667899920501226e-07, - "loss": 0.7569, + "learning_rate": 1.7459130229308342e-07, + "loss": 0.8297, "step": 33250 }, { - "epoch": 0.9435584562996595, + "epoch": 0.9422482926691037, "grad_norm": 0.0, - "learning_rate": 1.6662287298853553e-07, - "loss": 0.7747, + "learning_rate": 1.7442059185137306e-07, + "loss": 0.8416, "step": 33251 }, { - "epoch": 0.9435868331441544, + "epoch": 0.9422766301113662, "grad_norm": 0.0, - "learning_rate": 1.6645583698992606e-07, - "loss": 0.7736, + "learning_rate": 1.7424996417416796e-07, + "loss": 0.7846, "step": 33252 }, { - "epoch": 0.9436152099886492, + "epoch": 0.9423049675536286, "grad_norm": 0.0, - "learning_rate": 1.6628888405570752e-07, - "loss": 0.8049, + "learning_rate": 1.740794192629025e-07, + "loss": 0.8355, "step": 33253 }, { - "epoch": 0.9436435868331442, + "epoch": 0.9423333049958911, "grad_norm": 0.0, - "learning_rate": 1.6612201418728767e-07, - "loss": 0.7802, + "learning_rate": 1.7390895711901668e-07, + "loss": 0.7784, "step": 33254 }, { - "epoch": 0.9436719636776391, + "epoch": 0.9423616424381536, "grad_norm": 0.0, - "learning_rate": 1.659552273860776e-07, - "loss": 0.8308, + "learning_rate": 1.7373857774394376e-07, + "loss": 0.7986, "step": 33255 }, { - "epoch": 0.9437003405221339, + "epoch": 0.942389979880416, "grad_norm": 0.0, - "learning_rate": 1.657885236534873e-07, - "loss": 0.8117, + "learning_rate": 1.735682811391204e-07, + "loss": 0.8552, "step": 33256 }, { - "epoch": 0.9437287173666288, + "epoch": 0.9424183173226784, "grad_norm": 0.0, - "learning_rate": 1.6562190299092229e-07, - "loss": 0.7802, + "learning_rate": 1.7339806730597875e-07, + "loss": 0.7219, "step": 33257 }, { - "epoch": 0.9437570942111237, + "epoch": 0.9424466547649409, "grad_norm": 0.0, - "learning_rate": 1.6545536539979034e-07, - "loss": 0.8102, + "learning_rate": 1.7322793624595547e-07, + "loss": 0.8001, "step": 33258 }, { - "epoch": 0.9437854710556186, + "epoch": 0.9424749922072034, "grad_norm": 0.0, - "learning_rate": 1.6528891088150145e-07, - "loss": 0.7101, + "learning_rate": 1.7305788796048274e-07, + "loss": 0.8212, "step": 33259 }, { - "epoch": 0.9438138479001135, + "epoch": 0.9425033296494658, "grad_norm": 0.0, - "learning_rate": 1.6512253943745671e-07, - "loss": 0.7512, + "learning_rate": 1.728879224509905e-07, + "loss": 0.694, "step": 33260 }, { - "epoch": 0.9438422247446084, + "epoch": 0.9425316670917283, "grad_norm": 0.0, - "learning_rate": 1.649562510690661e-07, + "learning_rate": 1.7271803971891432e-07, "loss": 0.8089, "step": 33261 }, { - "epoch": 0.9438706015891033, + "epoch": 0.9425600045339908, "grad_norm": 0.0, - "learning_rate": 1.647900457777307e-07, - "loss": 0.672, + "learning_rate": 1.7254823976568301e-07, + "loss": 0.6896, "step": 33262 }, { - "epoch": 0.9438989784335982, + "epoch": 0.9425883419762532, "grad_norm": 0.0, - "learning_rate": 1.6462392356485611e-07, - "loss": 0.7434, + "learning_rate": 1.7237852259272658e-07, + "loss": 0.8221, "step": 33263 }, { - "epoch": 0.9439273552780931, + "epoch": 0.9426166794185157, "grad_norm": 0.0, - "learning_rate": 1.6445788443184562e-07, - "loss": 0.8038, + "learning_rate": 1.7220888820147607e-07, + "loss": 0.7783, "step": 33264 }, { - "epoch": 0.943955732122588, + "epoch": 0.9426450168607782, "grad_norm": 0.0, - "learning_rate": 1.6429192838010143e-07, - "loss": 0.8009, + "learning_rate": 1.7203933659335926e-07, + "loss": 0.7024, "step": 33265 }, { - "epoch": 0.9439841089670828, + "epoch": 0.9426733543030406, "grad_norm": 0.0, - "learning_rate": 1.6412605541102465e-07, - "loss": 0.819, + "learning_rate": 1.7186986776980386e-07, + "loss": 0.8548, "step": 33266 }, { - "epoch": 0.9440124858115777, + "epoch": 0.942701691745303, "grad_norm": 0.0, - "learning_rate": 1.6396026552601863e-07, - "loss": 0.6895, + "learning_rate": 1.7170048173223985e-07, + "loss": 0.7308, "step": 33267 }, { - "epoch": 0.9440408626560727, + "epoch": 0.9427300291875655, "grad_norm": 0.0, - "learning_rate": 1.637945587264822e-07, - "loss": 0.8173, + "learning_rate": 1.7153117848209056e-07, + "loss": 0.7453, "step": 33268 }, { - "epoch": 0.9440692395005675, + "epoch": 0.942758366629828, "grad_norm": 0.0, - "learning_rate": 1.6362893501381317e-07, - "loss": 0.8632, + "learning_rate": 1.7136195802078481e-07, + "loss": 0.8387, "step": 33269 }, { - "epoch": 0.9440976163450624, + "epoch": 0.9427867040720904, "grad_norm": 0.0, - "learning_rate": 1.6346339438941483e-07, - "loss": 0.7896, + "learning_rate": 1.7119282034974705e-07, + "loss": 0.7654, "step": 33270 }, { - "epoch": 0.9441259931895574, + "epoch": 0.9428150415143529, "grad_norm": 0.0, - "learning_rate": 1.6329793685468276e-07, - "loss": 0.8698, + "learning_rate": 1.7102376547040166e-07, + "loss": 0.8109, "step": 33271 }, { - "epoch": 0.9441543700340522, + "epoch": 0.9428433789566154, "grad_norm": 0.0, - "learning_rate": 1.6313256241101584e-07, - "loss": 0.8584, + "learning_rate": 1.7085479338417422e-07, + "loss": 0.7743, "step": 33272 }, { - "epoch": 0.9441827468785471, + "epoch": 0.9428717163988778, "grad_norm": 0.0, - "learning_rate": 1.629672710598096e-07, - "loss": 0.71, + "learning_rate": 1.7068590409248464e-07, + "loss": 0.7756, "step": 33273 }, { - "epoch": 0.944211123723042, + "epoch": 0.9429000538411403, "grad_norm": 0.0, - "learning_rate": 1.628020628024618e-07, - "loss": 0.82, + "learning_rate": 1.7051709759675962e-07, + "loss": 0.7699, "step": 33274 }, { - "epoch": 0.9442395005675369, + "epoch": 0.9429283912834028, "grad_norm": 0.0, - "learning_rate": 1.6263693764036581e-07, - "loss": 0.8691, + "learning_rate": 1.7034837389841906e-07, + "loss": 0.8285, "step": 33275 }, { - "epoch": 0.9442678774120318, + "epoch": 0.9429567287256653, "grad_norm": 0.0, - "learning_rate": 1.6247189557491937e-07, - "loss": 0.7663, + "learning_rate": 1.7017973299888302e-07, + "loss": 0.8237, "step": 33276 }, { - "epoch": 0.9442962542565266, + "epoch": 0.9429850661679277, "grad_norm": 0.0, - "learning_rate": 1.6230693660751472e-07, - "loss": 0.7749, + "learning_rate": 1.7001117489957363e-07, + "loss": 0.8831, "step": 33277 }, { - "epoch": 0.9443246311010216, + "epoch": 0.9430134036101901, "grad_norm": 0.0, - "learning_rate": 1.6214206073954518e-07, - "loss": 0.8147, + "learning_rate": 1.6984269960191092e-07, + "loss": 0.9291, "step": 33278 }, { - "epoch": 0.9443530079455165, + "epoch": 0.9430417410524526, "grad_norm": 0.0, - "learning_rate": 1.619772679724041e-07, - "loss": 0.784, + "learning_rate": 1.6967430710731258e-07, + "loss": 0.7842, "step": 33279 }, { - "epoch": 0.9443813847900113, + "epoch": 0.943070078494715, "grad_norm": 0.0, - "learning_rate": 1.6181255830748366e-07, - "loss": 0.7651, + "learning_rate": 1.6950599741719864e-07, + "loss": 0.8481, "step": 33280 }, { - "epoch": 0.9444097616345063, + "epoch": 0.9430984159369775, "grad_norm": 0.0, - "learning_rate": 1.6164793174617388e-07, - "loss": 0.778, + "learning_rate": 1.6933777053298684e-07, + "loss": 0.8126, "step": 33281 }, { - "epoch": 0.9444381384790012, + "epoch": 0.94312675337924, "grad_norm": 0.0, - "learning_rate": 1.6148338828986698e-07, - "loss": 0.8013, + "learning_rate": 1.691696264560927e-07, + "loss": 0.7927, "step": 33282 }, { - "epoch": 0.944466515323496, + "epoch": 0.9431550908215025, "grad_norm": 0.0, - "learning_rate": 1.6131892793995184e-07, - "loss": 0.7841, + "learning_rate": 1.6900156518793398e-07, + "loss": 0.6961, "step": 33283 }, { - "epoch": 0.9444948921679909, + "epoch": 0.9431834282637649, "grad_norm": 0.0, - "learning_rate": 1.6115455069781848e-07, - "loss": 0.7729, + "learning_rate": 1.688335867299251e-07, + "loss": 0.6726, "step": 33284 }, { - "epoch": 0.9445232690124858, + "epoch": 0.9432117657060274, "grad_norm": 0.0, - "learning_rate": 1.6099025656485357e-07, - "loss": 0.9056, + "learning_rate": 1.6866569108348164e-07, + "loss": 0.7564, "step": 33285 }, { - "epoch": 0.9445516458569807, + "epoch": 0.9432401031482899, "grad_norm": 0.0, - "learning_rate": 1.6082604554244706e-07, - "loss": 0.9222, + "learning_rate": 1.6849787825001796e-07, + "loss": 0.7841, "step": 33286 }, { - "epoch": 0.9445800227014756, + "epoch": 0.9432684405905523, "grad_norm": 0.0, - "learning_rate": 1.606619176319868e-07, - "loss": 0.7879, + "learning_rate": 1.683301482309474e-07, + "loss": 0.7573, "step": 33287 }, { - "epoch": 0.9446083995459705, + "epoch": 0.9432967780328148, "grad_norm": 0.0, - "learning_rate": 1.6049787283485608e-07, - "loss": 0.7815, + "learning_rate": 1.6816250102768327e-07, + "loss": 0.8344, "step": 33288 }, { - "epoch": 0.9446367763904654, + "epoch": 0.9433251154750772, "grad_norm": 0.0, - "learning_rate": 1.6033391115244156e-07, - "loss": 0.7923, + "learning_rate": 1.679949366416367e-07, + "loss": 0.8525, "step": 33289 }, { - "epoch": 0.9446651532349603, + "epoch": 0.9433534529173396, "grad_norm": 0.0, - "learning_rate": 1.6017003258612996e-07, - "loss": 0.7753, + "learning_rate": 1.6782745507422094e-07, + "loss": 0.7985, "step": 33290 }, { - "epoch": 0.9446935300794551, + "epoch": 0.9433817903596021, "grad_norm": 0.0, - "learning_rate": 1.6000623713730457e-07, - "loss": 0.7171, + "learning_rate": 1.6766005632684378e-07, + "loss": 0.8052, "step": 33291 }, { - "epoch": 0.9447219069239501, + "epoch": 0.9434101278018646, "grad_norm": 0.0, - "learning_rate": 1.5984252480734763e-07, - "loss": 0.7577, + "learning_rate": 1.6749274040091857e-07, + "loss": 0.5718, "step": 33292 }, { - "epoch": 0.9447502837684449, + "epoch": 0.9434384652441271, "grad_norm": 0.0, - "learning_rate": 1.5967889559764472e-07, - "loss": 0.8194, + "learning_rate": 1.67325507297853e-07, + "loss": 0.871, "step": 33293 }, { - "epoch": 0.9447786606129398, + "epoch": 0.9434668026863895, "grad_norm": 0.0, - "learning_rate": 1.5951534950957582e-07, - "loss": 0.6961, + "learning_rate": 1.6715835701905604e-07, + "loss": 0.6904, "step": 33294 }, { - "epoch": 0.9448070374574348, + "epoch": 0.943495140128652, "grad_norm": 0.0, - "learning_rate": 1.5935188654452206e-07, - "loss": 0.7103, + "learning_rate": 1.6699128956593535e-07, + "loss": 0.7762, "step": 33295 }, { - "epoch": 0.9448354143019296, + "epoch": 0.9435234775709145, "grad_norm": 0.0, - "learning_rate": 1.5918850670386677e-07, - "loss": 0.8477, + "learning_rate": 1.6682430493989876e-07, + "loss": 0.8211, "step": 33296 }, { - "epoch": 0.9448637911464245, + "epoch": 0.9435518150131769, "grad_norm": 0.0, - "learning_rate": 1.5902520998898773e-07, - "loss": 0.8229, + "learning_rate": 1.666574031423518e-07, + "loss": 0.7954, "step": 33297 }, { - "epoch": 0.9448921679909195, + "epoch": 0.9435801524554394, "grad_norm": 0.0, - "learning_rate": 1.588619964012661e-07, - "loss": 0.7606, + "learning_rate": 1.6649058417470222e-07, + "loss": 0.8693, "step": 33298 }, { - "epoch": 0.9449205448354143, + "epoch": 0.9436084898977019, "grad_norm": 0.0, - "learning_rate": 1.586988659420785e-07, - "loss": 0.797, + "learning_rate": 1.6632384803835334e-07, + "loss": 0.8546, "step": 33299 }, { - "epoch": 0.9449489216799092, + "epoch": 0.9436368273399643, "grad_norm": 0.0, - "learning_rate": 1.5853581861280387e-07, - "loss": 0.7752, + "learning_rate": 1.661571947347096e-07, + "loss": 0.7813, "step": 33300 }, { - "epoch": 0.944977298524404, + "epoch": 0.9436651647822267, "grad_norm": 0.0, - "learning_rate": 1.5837285441482108e-07, - "loss": 0.7966, + "learning_rate": 1.6599062426517653e-07, + "loss": 0.7854, "step": 33301 }, { - "epoch": 0.945005675368899, + "epoch": 0.9436935022244892, "grad_norm": 0.0, - "learning_rate": 1.582099733495035e-07, - "loss": 0.8117, + "learning_rate": 1.6582413663115636e-07, + "loss": 0.8404, "step": 33302 }, { - "epoch": 0.9450340522133939, + "epoch": 0.9437218396667517, "grad_norm": 0.0, - "learning_rate": 1.5804717541822888e-07, - "loss": 0.8405, + "learning_rate": 1.656577318340502e-07, + "loss": 0.8627, "step": 33303 }, { - "epoch": 0.9450624290578887, + "epoch": 0.9437501771090141, "grad_norm": 0.0, - "learning_rate": 1.5788446062237394e-07, - "loss": 0.7249, + "learning_rate": 1.6549140987526136e-07, + "loss": 0.8806, "step": 33304 }, { - "epoch": 0.9450908059023837, + "epoch": 0.9437785145512766, "grad_norm": 0.0, - "learning_rate": 1.577218289633098e-07, - "loss": 0.8497, + "learning_rate": 1.6532517075618982e-07, + "loss": 0.8126, "step": 33305 }, { - "epoch": 0.9451191827468786, + "epoch": 0.9438068519935391, "grad_norm": 0.0, - "learning_rate": 1.5755928044241198e-07, - "loss": 0.8894, + "learning_rate": 1.651590144782378e-07, + "loss": 0.6935, "step": 33306 }, { - "epoch": 0.9451475595913734, + "epoch": 0.9438351894358015, "grad_norm": 0.0, - "learning_rate": 1.5739681506105388e-07, - "loss": 0.8046, + "learning_rate": 1.6499294104280195e-07, + "loss": 0.7948, "step": 33307 }, { - "epoch": 0.9451759364358683, + "epoch": 0.943863526878064, "grad_norm": 0.0, - "learning_rate": 1.5723443282060657e-07, - "loss": 0.7761, + "learning_rate": 1.6482695045128338e-07, + "loss": 0.7845, "step": 33308 }, { - "epoch": 0.9452043132803633, + "epoch": 0.9438918643203265, "grad_norm": 0.0, - "learning_rate": 1.5707213372244345e-07, - "loss": 0.8293, + "learning_rate": 1.6466104270508099e-07, + "loss": 0.8118, "step": 33309 }, { - "epoch": 0.9452326901248581, + "epoch": 0.943920201762589, "grad_norm": 0.0, - "learning_rate": 1.569099177679345e-07, - "loss": 0.7976, + "learning_rate": 1.644952178055892e-07, + "loss": 0.7911, "step": 33310 }, { - "epoch": 0.945261066969353, + "epoch": 0.9439485392048513, "grad_norm": 0.0, - "learning_rate": 1.5674778495844868e-07, - "loss": 0.7976, + "learning_rate": 1.6432947575420578e-07, + "loss": 0.8001, "step": 33311 }, { - "epoch": 0.9452894438138479, + "epoch": 0.9439768766471138, "grad_norm": 0.0, - "learning_rate": 1.5658573529535815e-07, - "loss": 0.8317, + "learning_rate": 1.641638165523296e-07, + "loss": 0.7869, "step": 33312 }, { - "epoch": 0.9453178206583428, + "epoch": 0.9440052140893763, "grad_norm": 0.0, - "learning_rate": 1.5642376878002963e-07, - "loss": 0.8215, + "learning_rate": 1.6399824020135292e-07, + "loss": 0.8314, "step": 33313 }, { - "epoch": 0.9453461975028377, + "epoch": 0.9440335515316387, "grad_norm": 0.0, - "learning_rate": 1.5626188541383203e-07, - "loss": 0.8297, + "learning_rate": 1.6383274670267234e-07, + "loss": 0.7428, "step": 33314 }, { - "epoch": 0.9453745743473326, + "epoch": 0.9440618889739012, "grad_norm": 0.0, - "learning_rate": 1.5610008519813312e-07, - "loss": 0.8426, + "learning_rate": 1.6366733605768014e-07, + "loss": 0.8313, "step": 33315 }, { - "epoch": 0.9454029511918275, + "epoch": 0.9440902264161637, "grad_norm": 0.0, - "learning_rate": 1.5593836813429852e-07, - "loss": 0.7625, + "learning_rate": 1.6350200826777073e-07, + "loss": 0.7929, "step": 33316 }, { - "epoch": 0.9454313280363223, + "epoch": 0.9441185638584262, "grad_norm": 0.0, - "learning_rate": 1.55776734223696e-07, - "loss": 0.8032, + "learning_rate": 1.6333676333433745e-07, + "loss": 0.8182, "step": 33317 }, { - "epoch": 0.9454597048808172, + "epoch": 0.9441469013006886, "grad_norm": 0.0, - "learning_rate": 1.5561518346769e-07, - "loss": 0.7268, + "learning_rate": 1.631716012587703e-07, + "loss": 0.7866, "step": 33318 }, { - "epoch": 0.9454880817253122, + "epoch": 0.9441752387429511, "grad_norm": 0.0, - "learning_rate": 1.5545371586764391e-07, - "loss": 0.8071, + "learning_rate": 1.630065220424626e-07, + "loss": 0.7991, "step": 33319 }, { - "epoch": 0.945516458569807, + "epoch": 0.9442035761852136, "grad_norm": 0.0, - "learning_rate": 1.552923314249244e-07, - "loss": 0.7743, + "learning_rate": 1.628415256868032e-07, + "loss": 0.8351, "step": 33320 }, { - "epoch": 0.9455448354143019, + "epoch": 0.9442319136274759, "grad_norm": 0.0, - "learning_rate": 1.551310301408926e-07, - "loss": 0.7966, + "learning_rate": 1.626766121931822e-07, + "loss": 0.7774, "step": 33321 }, { - "epoch": 0.9455732122587969, + "epoch": 0.9442602510697384, "grad_norm": 0.0, - "learning_rate": 1.5496981201691075e-07, - "loss": 0.8, + "learning_rate": 1.6251178156298952e-07, + "loss": 0.7574, "step": 33322 }, { - "epoch": 0.9456015891032917, + "epoch": 0.9442885885120009, "grad_norm": 0.0, - "learning_rate": 1.548086770543422e-07, - "loss": 0.7865, + "learning_rate": 1.6234703379761297e-07, + "loss": 0.8089, "step": 33323 }, { - "epoch": 0.9456299659477866, + "epoch": 0.9443169259542634, "grad_norm": 0.0, - "learning_rate": 1.5464762525454702e-07, - "loss": 0.8294, + "learning_rate": 1.6218236889844142e-07, + "loss": 0.8221, "step": 33324 }, { - "epoch": 0.9456583427922814, + "epoch": 0.9443452633965258, "grad_norm": 0.0, - "learning_rate": 1.5448665661888629e-07, - "loss": 0.7318, + "learning_rate": 1.6201778686686043e-07, + "loss": 0.7939, "step": 33325 }, { - "epoch": 0.9456867196367764, + "epoch": 0.9443736008387883, "grad_norm": 0.0, - "learning_rate": 1.5432577114871894e-07, - "loss": 0.7944, + "learning_rate": 1.6185328770425667e-07, + "loss": 0.9152, "step": 33326 }, { - "epoch": 0.9457150964812713, + "epoch": 0.9444019382810508, "grad_norm": 0.0, - "learning_rate": 1.54164968845405e-07, - "loss": 0.7385, + "learning_rate": 1.6168887141201572e-07, + "loss": 0.9014, "step": 33327 }, { - "epoch": 0.9457434733257661, + "epoch": 0.9444302757233132, "grad_norm": 0.0, - "learning_rate": 1.5400424971030114e-07, - "loss": 0.7801, + "learning_rate": 1.615245379915231e-07, + "loss": 0.7674, "step": 33328 }, { - "epoch": 0.9457718501702611, + "epoch": 0.9444586131655757, "grad_norm": 0.0, - "learning_rate": 1.538436137447663e-07, - "loss": 0.8109, + "learning_rate": 1.613602874441622e-07, + "loss": 0.8055, "step": 33329 }, { - "epoch": 0.945800227014756, + "epoch": 0.9444869506078382, "grad_norm": 0.0, - "learning_rate": 1.5368306095015716e-07, - "loss": 0.6722, + "learning_rate": 1.6119611977131743e-07, + "loss": 0.8983, "step": 33330 }, { - "epoch": 0.9458286038592508, + "epoch": 0.9445152880501005, "grad_norm": 0.0, - "learning_rate": 1.5352259132783043e-07, - "loss": 0.7965, + "learning_rate": 1.6103203497437104e-07, + "loss": 0.8058, "step": 33331 }, { - "epoch": 0.9458569807037458, + "epoch": 0.944543625492363, "grad_norm": 0.0, - "learning_rate": 1.5336220487914054e-07, - "loss": 0.8705, + "learning_rate": 1.6086803305470633e-07, + "loss": 0.8207, "step": 33332 }, { - "epoch": 0.9458853575482407, + "epoch": 0.9445719629346255, "grad_norm": 0.0, - "learning_rate": 1.5320190160544312e-07, - "loss": 0.778, + "learning_rate": 1.6070411401370335e-07, + "loss": 0.8525, "step": 33333 }, { - "epoch": 0.9459137343927355, + "epoch": 0.944600300376888, "grad_norm": 0.0, - "learning_rate": 1.530416815080915e-07, - "loss": 0.7754, + "learning_rate": 1.6054027785274317e-07, + "loss": 0.8421, "step": 33334 }, { - "epoch": 0.9459421112372304, + "epoch": 0.9446286378191504, "grad_norm": 0.0, - "learning_rate": 1.5288154458844019e-07, - "loss": 0.8404, + "learning_rate": 1.6037652457320697e-07, + "loss": 0.718, "step": 33335 }, { - "epoch": 0.9459704880817253, + "epoch": 0.9446569752614129, "grad_norm": 0.0, - "learning_rate": 1.5272149084784026e-07, - "loss": 0.7981, + "learning_rate": 1.6021285417647247e-07, + "loss": 0.8907, "step": 33336 }, { - "epoch": 0.9459988649262202, + "epoch": 0.9446853127036754, "grad_norm": 0.0, - "learning_rate": 1.5256152028764404e-07, - "loss": 0.821, + "learning_rate": 1.6004926666391863e-07, + "loss": 0.7211, "step": 33337 }, { - "epoch": 0.9460272417707151, + "epoch": 0.9447136501459378, "grad_norm": 0.0, - "learning_rate": 1.5240163290920485e-07, - "loss": 0.8251, + "learning_rate": 1.598857620369243e-07, + "loss": 0.7962, "step": 33338 }, { - "epoch": 0.94605561861521, + "epoch": 0.9447419875882003, "grad_norm": 0.0, - "learning_rate": 1.522418287138705e-07, - "loss": 0.7835, + "learning_rate": 1.5972234029686617e-07, + "loss": 0.7631, "step": 33339 }, { - "epoch": 0.9460839954597049, + "epoch": 0.9447703250304628, "grad_norm": 0.0, - "learning_rate": 1.5208210770299215e-07, - "loss": 0.9389, + "learning_rate": 1.5955900144511982e-07, + "loss": 0.858, "step": 33340 }, { - "epoch": 0.9461123723041998, + "epoch": 0.9447986624727253, "grad_norm": 0.0, - "learning_rate": 1.519224698779198e-07, - "loss": 0.8435, + "learning_rate": 1.5939574548306414e-07, + "loss": 0.8165, "step": 33341 }, { - "epoch": 0.9461407491486946, + "epoch": 0.9448269999149876, "grad_norm": 0.0, - "learning_rate": 1.5176291524000018e-07, - "loss": 0.9167, + "learning_rate": 1.5923257241207024e-07, + "loss": 0.8632, "step": 33342 }, { - "epoch": 0.9461691259931896, + "epoch": 0.9448553373572501, "grad_norm": 0.0, - "learning_rate": 1.5160344379058223e-07, - "loss": 0.8091, + "learning_rate": 1.5906948223351593e-07, + "loss": 0.8725, "step": 33343 }, { - "epoch": 0.9461975028376844, + "epoch": 0.9448836747995126, "grad_norm": 0.0, - "learning_rate": 1.5144405553101372e-07, - "loss": 0.7651, + "learning_rate": 1.5890647494877342e-07, + "loss": 0.858, "step": 33344 }, { - "epoch": 0.9462258796821793, + "epoch": 0.944912012241775, "grad_norm": 0.0, - "learning_rate": 1.5128475046263914e-07, - "loss": 0.8731, + "learning_rate": 1.587435505592161e-07, + "loss": 0.7441, "step": 33345 }, { - "epoch": 0.9462542565266743, + "epoch": 0.9449403496840375, "grad_norm": 0.0, - "learning_rate": 1.5112552858680408e-07, - "loss": 0.8193, + "learning_rate": 1.5858070906621615e-07, + "loss": 0.8117, "step": 33346 }, { - "epoch": 0.9462826333711691, + "epoch": 0.9449686871263, "grad_norm": 0.0, - "learning_rate": 1.5096638990485524e-07, - "loss": 0.8716, + "learning_rate": 1.5841795047114584e-07, + "loss": 0.8649, "step": 33347 }, { - "epoch": 0.946311010215664, + "epoch": 0.9449970245685625, "grad_norm": 0.0, - "learning_rate": 1.50807334418136e-07, - "loss": 0.7693, + "learning_rate": 1.5825527477537518e-07, + "loss": 0.809, "step": 33348 }, { - "epoch": 0.946339387060159, + "epoch": 0.9450253620108249, "grad_norm": 0.0, - "learning_rate": 1.5064836212799084e-07, - "loss": 0.7285, + "learning_rate": 1.5809268198027527e-07, + "loss": 0.8686, "step": 33349 }, { - "epoch": 0.9463677639046538, + "epoch": 0.9450536994530874, "grad_norm": 0.0, - "learning_rate": 1.504894730357609e-07, - "loss": 0.8424, + "learning_rate": 1.5793017208721507e-07, + "loss": 0.892, "step": 33350 }, { - "epoch": 0.9463961407491487, + "epoch": 0.9450820368953499, "grad_norm": 0.0, - "learning_rate": 1.5033066714278843e-07, - "loss": 0.9508, + "learning_rate": 1.5776774509756455e-07, + "loss": 0.755, "step": 33351 }, { - "epoch": 0.9464245175936435, + "epoch": 0.9451103743376122, "grad_norm": 0.0, - "learning_rate": 1.5017194445041682e-07, - "loss": 0.8673, + "learning_rate": 1.576054010126904e-07, + "loss": 0.846, "step": 33352 }, { - "epoch": 0.9464528944381385, + "epoch": 0.9451387117798747, "grad_norm": 0.0, - "learning_rate": 1.50013304959985e-07, - "loss": 0.7407, + "learning_rate": 1.5744313983396153e-07, + "loss": 0.7871, "step": 33353 }, { - "epoch": 0.9464812712826334, + "epoch": 0.9451670492221372, "grad_norm": 0.0, - "learning_rate": 1.498547486728341e-07, - "loss": 0.847, + "learning_rate": 1.5728096156274353e-07, + "loss": 0.84, "step": 33354 }, { - "epoch": 0.9465096481271282, + "epoch": 0.9451953866643996, "grad_norm": 0.0, - "learning_rate": 1.496962755903031e-07, - "loss": 0.8353, + "learning_rate": 1.5711886620040305e-07, + "loss": 0.7543, "step": 33355 }, { - "epoch": 0.9465380249716232, + "epoch": 0.9452237241066621, "grad_norm": 0.0, - "learning_rate": 1.4953788571372863e-07, - "loss": 0.8629, + "learning_rate": 1.5695685374830572e-07, + "loss": 0.8179, "step": 33356 }, { - "epoch": 0.9465664018161181, + "epoch": 0.9452520615489246, "grad_norm": 0.0, - "learning_rate": 1.4937957904445188e-07, - "loss": 0.883, + "learning_rate": 1.5679492420781483e-07, + "loss": 0.8184, "step": 33357 }, { - "epoch": 0.9465947786606129, + "epoch": 0.9452803989911871, "grad_norm": 0.0, - "learning_rate": 1.4922135558380957e-07, - "loss": 0.9202, + "learning_rate": 1.56633077580296e-07, + "loss": 0.7906, "step": 33358 }, { - "epoch": 0.9466231555051078, + "epoch": 0.9453087364334495, "grad_norm": 0.0, - "learning_rate": 1.4906321533313507e-07, - "loss": 0.8246, + "learning_rate": 1.5647131386711366e-07, + "loss": 0.9186, "step": 33359 }, { - "epoch": 0.9466515323496028, + "epoch": 0.945337073875712, "grad_norm": 0.0, - "learning_rate": 1.489051582937673e-07, - "loss": 0.8615, + "learning_rate": 1.5630963306962676e-07, + "loss": 0.7927, "step": 33360 }, { - "epoch": 0.9466799091940976, + "epoch": 0.9453654113179745, "grad_norm": 0.0, - "learning_rate": 1.4874718446704074e-07, - "loss": 0.8278, + "learning_rate": 1.5614803518919974e-07, + "loss": 0.7838, "step": 33361 }, { - "epoch": 0.9467082860385925, + "epoch": 0.9453937487602369, "grad_norm": 0.0, - "learning_rate": 1.485892938542899e-07, - "loss": 0.8342, + "learning_rate": 1.5598652022719373e-07, + "loss": 0.8907, "step": 33362 }, { - "epoch": 0.9467366628830874, + "epoch": 0.9454220862024993, "grad_norm": 0.0, - "learning_rate": 1.48431486456847e-07, - "loss": 0.8432, + "learning_rate": 1.5582508818496765e-07, + "loss": 0.7937, "step": 33363 }, { - "epoch": 0.9467650397275823, + "epoch": 0.9454504236447618, "grad_norm": 0.0, - "learning_rate": 1.4827376227604772e-07, - "loss": 0.8044, + "learning_rate": 1.5566373906388377e-07, + "loss": 0.7849, "step": 33364 }, { - "epoch": 0.9467934165720772, + "epoch": 0.9454787610870243, "grad_norm": 0.0, - "learning_rate": 1.4811612131322205e-07, - "loss": 0.8097, + "learning_rate": 1.5550247286529874e-07, + "loss": 0.7445, "step": 33365 }, { - "epoch": 0.946821793416572, + "epoch": 0.9455070985292867, "grad_norm": 0.0, - "learning_rate": 1.4795856356970227e-07, - "loss": 0.8142, + "learning_rate": 1.553412895905726e-07, + "loss": 0.7451, "step": 33366 }, { - "epoch": 0.946850170261067, + "epoch": 0.9455354359715492, "grad_norm": 0.0, - "learning_rate": 1.4780108904681955e-07, - "loss": 0.8187, + "learning_rate": 1.5518018924106316e-07, + "loss": 0.8317, "step": 33367 }, { - "epoch": 0.9468785471055619, + "epoch": 0.9455637734138117, "grad_norm": 0.0, - "learning_rate": 1.4764369774590392e-07, - "loss": 0.8072, + "learning_rate": 1.5501917181812597e-07, + "loss": 0.8538, "step": 33368 }, { - "epoch": 0.9469069239500567, + "epoch": 0.9455921108560741, "grad_norm": 0.0, - "learning_rate": 1.4748638966828545e-07, - "loss": 0.8156, + "learning_rate": 1.5485823732311777e-07, + "loss": 0.7126, "step": 33369 }, { - "epoch": 0.9469353007945517, + "epoch": 0.9456204482983366, "grad_norm": 0.0, - "learning_rate": 1.4732916481529192e-07, - "loss": 0.8364, + "learning_rate": 1.5469738575739412e-07, + "loss": 0.7461, "step": 33370 }, { - "epoch": 0.9469636776390465, + "epoch": 0.9456487857405991, "grad_norm": 0.0, - "learning_rate": 1.4717202318825119e-07, - "loss": 0.8777, + "learning_rate": 1.545366171223117e-07, + "loss": 0.8208, "step": 33371 }, { - "epoch": 0.9469920544835414, + "epoch": 0.9456771231828616, "grad_norm": 0.0, - "learning_rate": 1.470149647884922e-07, - "loss": 0.8137, + "learning_rate": 1.543759314192228e-07, + "loss": 0.8288, "step": 33372 }, { - "epoch": 0.9470204313280364, + "epoch": 0.945705460625124, "grad_norm": 0.0, - "learning_rate": 1.4685798961734055e-07, - "loss": 0.7284, + "learning_rate": 1.5421532864948184e-07, + "loss": 0.8109, "step": 33373 }, { - "epoch": 0.9470488081725312, + "epoch": 0.9457337980673864, "grad_norm": 0.0, - "learning_rate": 1.4670109767612183e-07, - "loss": 0.8663, + "learning_rate": 1.5405480881444002e-07, + "loss": 0.7955, "step": 33374 }, { - "epoch": 0.9470771850170261, + "epoch": 0.9457621355096489, "grad_norm": 0.0, - "learning_rate": 1.465442889661628e-07, - "loss": 0.7445, + "learning_rate": 1.5389437191545286e-07, + "loss": 0.7843, "step": 33375 }, { - "epoch": 0.947105561861521, + "epoch": 0.9457904729519113, "grad_norm": 0.0, - "learning_rate": 1.4638756348878569e-07, - "loss": 0.9106, + "learning_rate": 1.537340179538682e-07, + "loss": 0.7511, "step": 33376 }, { - "epoch": 0.9471339387060159, + "epoch": 0.9458188103941738, "grad_norm": 0.0, - "learning_rate": 1.4623092124531613e-07, - "loss": 0.7888, + "learning_rate": 1.5357374693103943e-07, + "loss": 0.6902, "step": 33377 }, { - "epoch": 0.9471623155505108, + "epoch": 0.9458471478364363, "grad_norm": 0.0, - "learning_rate": 1.4607436223707749e-07, - "loss": 0.7298, + "learning_rate": 1.5341355884831433e-07, + "loss": 0.8535, "step": 33378 }, { - "epoch": 0.9471906923950056, + "epoch": 0.9458754852786987, "grad_norm": 0.0, - "learning_rate": 1.4591788646539207e-07, - "loss": 0.794, + "learning_rate": 1.5325345370704292e-07, + "loss": 0.8467, "step": 33379 }, { - "epoch": 0.9472190692395006, + "epoch": 0.9459038227209612, "grad_norm": 0.0, - "learning_rate": 1.457614939315799e-07, - "loss": 0.7057, + "learning_rate": 1.5309343150857415e-07, + "loss": 0.8511, "step": 33380 }, { - "epoch": 0.9472474460839955, + "epoch": 0.9459321601632237, "grad_norm": 0.0, - "learning_rate": 1.4560518463696549e-07, - "loss": 0.7966, + "learning_rate": 1.529334922542558e-07, + "loss": 0.8293, "step": 33381 }, { - "epoch": 0.9472758229284903, + "epoch": 0.9459604976054862, "grad_norm": 0.0, - "learning_rate": 1.4544895858286555e-07, - "loss": 0.8317, + "learning_rate": 1.5277363594543572e-07, + "loss": 0.7389, "step": 33382 }, { - "epoch": 0.9473041997729852, + "epoch": 0.9459888350477486, "grad_norm": 0.0, - "learning_rate": 1.452928157706013e-07, - "loss": 0.8506, + "learning_rate": 1.5261386258346167e-07, + "loss": 0.8034, "step": 33383 }, { - "epoch": 0.9473325766174802, + "epoch": 0.946017172490011, "grad_norm": 0.0, - "learning_rate": 1.451367562014927e-07, - "loss": 0.7067, + "learning_rate": 1.5245417216967596e-07, + "loss": 0.868, "step": 33384 }, { - "epoch": 0.947360953461975, + "epoch": 0.9460455099322735, "grad_norm": 0.0, - "learning_rate": 1.4498077987685543e-07, - "loss": 0.8749, + "learning_rate": 1.5229456470542636e-07, + "loss": 0.8245, "step": 33385 }, { - "epoch": 0.9473893303064699, + "epoch": 0.9460738473745359, "grad_norm": 0.0, - "learning_rate": 1.4482488679800844e-07, - "loss": 0.9474, + "learning_rate": 1.5213504019205627e-07, + "loss": 0.714, "step": 33386 }, { - "epoch": 0.9474177071509648, + "epoch": 0.9461021848167984, "grad_norm": 0.0, - "learning_rate": 1.4466907696627064e-07, - "loss": 0.803, + "learning_rate": 1.5197559863090906e-07, + "loss": 0.7948, "step": 33387 }, { - "epoch": 0.9474460839954597, + "epoch": 0.9461305222590609, "grad_norm": 0.0, - "learning_rate": 1.4451335038295433e-07, - "loss": 0.8324, + "learning_rate": 1.5181624002332918e-07, + "loss": 0.8327, "step": 33388 }, { - "epoch": 0.9474744608399546, + "epoch": 0.9461588597013234, "grad_norm": 0.0, - "learning_rate": 1.4435770704937736e-07, - "loss": 0.885, + "learning_rate": 1.516569643706578e-07, + "loss": 0.8148, "step": 33389 }, { - "epoch": 0.9475028376844495, + "epoch": 0.9461871971435858, "grad_norm": 0.0, - "learning_rate": 1.4420214696685418e-07, - "loss": 0.7926, + "learning_rate": 1.5149777167423607e-07, + "loss": 0.7668, "step": 33390 }, { - "epoch": 0.9475312145289444, + "epoch": 0.9462155345858483, "grad_norm": 0.0, - "learning_rate": 1.4404667013669827e-07, - "loss": 0.8416, + "learning_rate": 1.5133866193540735e-07, + "loss": 0.8579, "step": 33391 }, { - "epoch": 0.9475595913734393, + "epoch": 0.9462438720281108, "grad_norm": 0.0, - "learning_rate": 1.4389127656022296e-07, - "loss": 0.8548, + "learning_rate": 1.5117963515550837e-07, + "loss": 0.8195, "step": 33392 }, { - "epoch": 0.9475879682179341, + "epoch": 0.9462722094703732, "grad_norm": 0.0, - "learning_rate": 1.4373596623874054e-07, - "loss": 0.7885, + "learning_rate": 1.5102069133588247e-07, + "loss": 0.729, "step": 33393 }, { - "epoch": 0.9476163450624291, + "epoch": 0.9463005469126357, "grad_norm": 0.0, - "learning_rate": 1.435807391735644e-07, - "loss": 0.7245, + "learning_rate": 1.508618304778653e-07, + "loss": 0.7572, "step": 33394 }, { - "epoch": 0.947644721906924, + "epoch": 0.9463288843548981, "grad_norm": 0.0, - "learning_rate": 1.4342559536600576e-07, - "loss": 0.7348, + "learning_rate": 1.5070305258279684e-07, + "loss": 0.6711, "step": 33395 }, { - "epoch": 0.9476730987514188, + "epoch": 0.9463572217971606, "grad_norm": 0.0, - "learning_rate": 1.432705348173724e-07, - "loss": 0.8127, + "learning_rate": 1.505443576520138e-07, + "loss": 0.7697, "step": 33396 }, { - "epoch": 0.9477014755959138, + "epoch": 0.946385559239423, "grad_norm": 0.0, - "learning_rate": 1.4311555752897666e-07, - "loss": 0.8532, + "learning_rate": 1.5038574568685294e-07, + "loss": 0.7706, "step": 33397 }, { - "epoch": 0.9477298524404086, + "epoch": 0.9464138966816855, "grad_norm": 0.0, - "learning_rate": 1.4296066350212746e-07, - "loss": 0.7146, + "learning_rate": 1.5022721668865092e-07, + "loss": 0.8028, "step": 33398 }, { - "epoch": 0.9477582292849035, + "epoch": 0.946442234123948, "grad_norm": 0.0, - "learning_rate": 1.4280585273813152e-07, - "loss": 0.7186, + "learning_rate": 1.5006877065874338e-07, + "loss": 0.719, "step": 33399 }, { - "epoch": 0.9477866061293984, + "epoch": 0.9464705715662104, "grad_norm": 0.0, - "learning_rate": 1.4265112523829782e-07, - "loss": 0.7722, + "learning_rate": 1.4991040759846366e-07, + "loss": 0.8688, "step": 33400 }, { - "epoch": 0.9478149829738933, + "epoch": 0.9464989090084729, "grad_norm": 0.0, - "learning_rate": 1.424964810039342e-07, - "loss": 0.7858, + "learning_rate": 1.4975212750914625e-07, + "loss": 0.7401, "step": 33401 }, { - "epoch": 0.9478433598183882, + "epoch": 0.9465272464507354, "grad_norm": 0.0, - "learning_rate": 1.4234192003634517e-07, - "loss": 0.8673, + "learning_rate": 1.4959393039212455e-07, + "loss": 0.7539, "step": 33402 }, { - "epoch": 0.947871736662883, + "epoch": 0.9465555838929978, "grad_norm": 0.0, - "learning_rate": 1.4218744233683746e-07, - "loss": 0.7733, + "learning_rate": 1.4943581624873084e-07, + "loss": 0.7605, "step": 33403 }, { - "epoch": 0.947900113507378, + "epoch": 0.9465839213352603, "grad_norm": 0.0, - "learning_rate": 1.4203304790671557e-07, - "loss": 0.911, + "learning_rate": 1.4927778508029733e-07, + "loss": 0.8068, "step": 33404 }, { - "epoch": 0.9479284903518729, + "epoch": 0.9466122587775228, "grad_norm": 0.0, - "learning_rate": 1.4187873674728292e-07, - "loss": 0.7657, + "learning_rate": 1.4911983688815522e-07, + "loss": 0.7947, "step": 33405 }, { - "epoch": 0.9479568671963677, + "epoch": 0.9466405962197852, "grad_norm": 0.0, - "learning_rate": 1.4172450885984402e-07, - "loss": 0.9008, + "learning_rate": 1.4896197167363345e-07, + "loss": 0.8014, "step": 33406 }, { - "epoch": 0.9479852440408627, + "epoch": 0.9466689336620476, "grad_norm": 0.0, - "learning_rate": 1.4157036424570114e-07, - "loss": 0.8139, + "learning_rate": 1.488041894380643e-07, + "loss": 0.8319, "step": 33407 }, { - "epoch": 0.9480136208853576, + "epoch": 0.9466972711043101, "grad_norm": 0.0, - "learning_rate": 1.414163029061577e-07, - "loss": 0.7898, + "learning_rate": 1.4864649018277555e-07, + "loss": 0.8768, "step": 33408 }, { - "epoch": 0.9480419977298524, + "epoch": 0.9467256085465726, "grad_norm": 0.0, - "learning_rate": 1.4126232484251267e-07, - "loss": 0.774, + "learning_rate": 1.4848887390909615e-07, + "loss": 0.8291, "step": 33409 }, { - "epoch": 0.9480703745743473, + "epoch": 0.946753945988835, "grad_norm": 0.0, - "learning_rate": 1.4110843005606833e-07, - "loss": 0.8489, + "learning_rate": 1.4833134061835176e-07, + "loss": 0.8236, "step": 33410 }, { - "epoch": 0.9480987514188423, + "epoch": 0.9467822834310975, "grad_norm": 0.0, - "learning_rate": 1.4095461854812476e-07, - "loss": 0.8772, + "learning_rate": 1.4817389031187124e-07, + "loss": 0.849, "step": 33411 }, { - "epoch": 0.9481271282633371, + "epoch": 0.94681062087336, "grad_norm": 0.0, - "learning_rate": 1.408008903199809e-07, - "loss": 0.7513, + "learning_rate": 1.4801652299098136e-07, + "loss": 0.7146, "step": 33412 }, { - "epoch": 0.948155505107832, + "epoch": 0.9468389583156225, "grad_norm": 0.0, - "learning_rate": 1.4064724537293462e-07, - "loss": 0.801, + "learning_rate": 1.4785923865700658e-07, + "loss": 0.8385, "step": 33413 }, { - "epoch": 0.948183881952327, + "epoch": 0.9468672957578849, "grad_norm": 0.0, - "learning_rate": 1.4049368370828376e-07, - "loss": 0.828, + "learning_rate": 1.477020373112714e-07, + "loss": 0.811, "step": 33414 }, { - "epoch": 0.9482122587968218, + "epoch": 0.9468956332001474, "grad_norm": 0.0, - "learning_rate": 1.4034020532732728e-07, - "loss": 0.8205, + "learning_rate": 1.4754491895510147e-07, + "loss": 0.8089, "step": 33415 }, { - "epoch": 0.9482406356413167, + "epoch": 0.9469239706424099, "grad_norm": 0.0, - "learning_rate": 1.4018681023135861e-07, - "loss": 0.7829, + "learning_rate": 1.4738788358981791e-07, + "loss": 0.8083, "step": 33416 }, { - "epoch": 0.9482690124858115, + "epoch": 0.9469523080846722, "grad_norm": 0.0, - "learning_rate": 1.400334984216767e-07, - "loss": 0.897, + "learning_rate": 1.4723093121674635e-07, + "loss": 0.8088, "step": 33417 }, { - "epoch": 0.9482973893303065, + "epoch": 0.9469806455269347, "grad_norm": 0.0, - "learning_rate": 1.3988026989957493e-07, - "loss": 0.8071, + "learning_rate": 1.4707406183720574e-07, + "loss": 0.7301, "step": 33418 }, { - "epoch": 0.9483257661748014, + "epoch": 0.9470089829691972, "grad_norm": 0.0, - "learning_rate": 1.3972712466634676e-07, - "loss": 0.7445, + "learning_rate": 1.4691727545251945e-07, + "loss": 0.8964, "step": 33419 }, { - "epoch": 0.9483541430192962, + "epoch": 0.9470373204114597, "grad_norm": 0.0, - "learning_rate": 1.3957406272328666e-07, - "loss": 0.749, + "learning_rate": 1.4676057206400862e-07, + "loss": 0.7263, "step": 33420 }, { - "epoch": 0.9483825198637912, + "epoch": 0.9470656578537221, "grad_norm": 0.0, - "learning_rate": 1.394210840716892e-07, - "loss": 0.8284, + "learning_rate": 1.4660395167299112e-07, + "loss": 0.8224, "step": 33421 }, { - "epoch": 0.948410896708286, + "epoch": 0.9470939952959846, "grad_norm": 0.0, - "learning_rate": 1.392681887128433e-07, - "loss": 0.8092, + "learning_rate": 1.4644741428078923e-07, + "loss": 0.8926, "step": 33422 }, { - "epoch": 0.9484392735527809, + "epoch": 0.9471223327382471, "grad_norm": 0.0, - "learning_rate": 1.3911537664804352e-07, - "loss": 0.7454, + "learning_rate": 1.4629095988871854e-07, + "loss": 0.8588, "step": 33423 }, { - "epoch": 0.9484676503972759, + "epoch": 0.9471506701805095, "grad_norm": 0.0, - "learning_rate": 1.3896264787857994e-07, - "loss": 0.7685, + "learning_rate": 1.4613458849809915e-07, + "loss": 0.7777, "step": 33424 }, { - "epoch": 0.9484960272417707, + "epoch": 0.947179007622772, "grad_norm": 0.0, - "learning_rate": 1.388100024057404e-07, - "loss": 0.8183, + "learning_rate": 1.459783001102466e-07, + "loss": 0.7067, "step": 33425 }, { - "epoch": 0.9485244040862656, + "epoch": 0.9472073450650345, "grad_norm": 0.0, - "learning_rate": 1.3865744023081828e-07, - "loss": 0.9327, + "learning_rate": 1.458220947264788e-07, + "loss": 0.8328, "step": 33426 }, { - "epoch": 0.9485527809307605, + "epoch": 0.9472356825072968, "grad_norm": 0.0, - "learning_rate": 1.3850496135509815e-07, - "loss": 0.8783, + "learning_rate": 1.4566597234810908e-07, + "loss": 0.7098, "step": 33427 }, { - "epoch": 0.9485811577752554, + "epoch": 0.9472640199495593, "grad_norm": 0.0, - "learning_rate": 1.383525657798701e-07, - "loss": 0.8256, + "learning_rate": 1.4550993297645643e-07, + "loss": 0.8134, "step": 33428 }, { - "epoch": 0.9486095346197503, + "epoch": 0.9472923573918218, "grad_norm": 0.0, - "learning_rate": 1.3820025350642308e-07, - "loss": 0.8383, + "learning_rate": 1.4535397661283092e-07, + "loss": 0.7784, "step": 33429 }, { - "epoch": 0.9486379114642451, + "epoch": 0.9473206948340843, "grad_norm": 0.0, - "learning_rate": 1.380480245360405e-07, - "loss": 0.6982, + "learning_rate": 1.4519810325855033e-07, + "loss": 0.7904, "step": 33430 }, { - "epoch": 0.9486662883087401, + "epoch": 0.9473490322763467, "grad_norm": 0.0, - "learning_rate": 1.378958788700091e-07, - "loss": 0.8205, + "learning_rate": 1.4504231291492365e-07, + "loss": 0.8207, "step": 33431 }, { - "epoch": 0.948694665153235, + "epoch": 0.9473773697186092, "grad_norm": 0.0, - "learning_rate": 1.3774381650961567e-07, - "loss": 0.7743, + "learning_rate": 1.448866055832654e-07, + "loss": 0.7683, "step": 33432 }, { - "epoch": 0.9487230419977298, + "epoch": 0.9474057071608717, "grad_norm": 0.0, - "learning_rate": 1.3759183745614246e-07, - "loss": 0.917, + "learning_rate": 1.4473098126488783e-07, + "loss": 0.7452, "step": 33433 }, { - "epoch": 0.9487514188422247, + "epoch": 0.9474340446031341, "grad_norm": 0.0, - "learning_rate": 1.3743994171087406e-07, - "loss": 0.8104, + "learning_rate": 1.445754399610999e-07, + "loss": 0.8375, "step": 33434 }, { - "epoch": 0.9487797956867197, + "epoch": 0.9474623820453966, "grad_norm": 0.0, - "learning_rate": 1.3728812927509605e-07, - "loss": 0.8583, + "learning_rate": 1.4441998167321393e-07, + "loss": 0.7801, "step": 33435 }, { - "epoch": 0.9488081725312145, + "epoch": 0.9474907194876591, "grad_norm": 0.0, - "learning_rate": 1.3713640015008634e-07, - "loss": 0.7867, + "learning_rate": 1.442646064025377e-07, + "loss": 0.7056, "step": 33436 }, { - "epoch": 0.9488365493757094, + "epoch": 0.9475190569299216, "grad_norm": 0.0, - "learning_rate": 1.3698475433712942e-07, - "loss": 0.919, + "learning_rate": 1.441093141503802e-07, + "loss": 0.8442, "step": 33437 }, { - "epoch": 0.9488649262202044, + "epoch": 0.9475473943721839, "grad_norm": 0.0, - "learning_rate": 1.3683319183750544e-07, - "loss": 0.7642, + "learning_rate": 1.4395410491805039e-07, + "loss": 0.7954, "step": 33438 }, { - "epoch": 0.9488933030646992, + "epoch": 0.9475757318144464, "grad_norm": 0.0, - "learning_rate": 1.3668171265249552e-07, - "loss": 0.8335, + "learning_rate": 1.43798978706855e-07, + "loss": 0.8506, "step": 33439 }, { - "epoch": 0.9489216799091941, + "epoch": 0.9476040692567089, "grad_norm": 0.0, - "learning_rate": 1.3653031678337869e-07, - "loss": 0.9676, + "learning_rate": 1.4364393551809963e-07, + "loss": 0.7908, "step": 33440 }, { - "epoch": 0.948950056753689, + "epoch": 0.9476324066989713, "grad_norm": 0.0, - "learning_rate": 1.363790042314328e-07, - "loss": 0.7456, + "learning_rate": 1.4348897535309324e-07, + "loss": 0.8359, "step": 33441 }, { - "epoch": 0.9489784335981839, + "epoch": 0.9476607441412338, "grad_norm": 0.0, - "learning_rate": 1.362277749979368e-07, - "loss": 0.691, + "learning_rate": 1.4333409821313815e-07, + "loss": 0.7438, "step": 33442 }, { - "epoch": 0.9490068104426788, + "epoch": 0.9476890815834963, "grad_norm": 0.0, - "learning_rate": 1.3607662908416973e-07, - "loss": 0.7186, + "learning_rate": 1.4317930409954107e-07, + "loss": 0.8566, "step": 33443 }, { - "epoch": 0.9490351872871736, + "epoch": 0.9477174190257588, "grad_norm": 0.0, - "learning_rate": 1.359255664914061e-07, - "loss": 0.7938, + "learning_rate": 1.4302459301360428e-07, + "loss": 0.7958, "step": 33444 }, { - "epoch": 0.9490635641316686, + "epoch": 0.9477457564680212, "grad_norm": 0.0, - "learning_rate": 1.357745872209215e-07, - "loss": 0.6964, + "learning_rate": 1.4286996495663119e-07, + "loss": 0.8079, "step": 33445 }, { - "epoch": 0.9490919409761635, + "epoch": 0.9477740939102837, "grad_norm": 0.0, - "learning_rate": 1.356236912739939e-07, - "loss": 0.7322, + "learning_rate": 1.427154199299252e-07, + "loss": 0.8057, "step": 33446 }, { - "epoch": 0.9491203178206583, + "epoch": 0.9478024313525462, "grad_norm": 0.0, - "learning_rate": 1.354728786518955e-07, - "loss": 0.8132, + "learning_rate": 1.4256095793478752e-07, + "loss": 0.7866, "step": 33447 }, { - "epoch": 0.9491486946651533, + "epoch": 0.9478307687948085, "grad_norm": 0.0, - "learning_rate": 1.3532214935590094e-07, - "loss": 0.87, + "learning_rate": 1.424065789725193e-07, + "loss": 0.7564, "step": 33448 }, { - "epoch": 0.9491770715096481, + "epoch": 0.947859106237071, "grad_norm": 0.0, - "learning_rate": 1.3517150338728468e-07, - "loss": 0.7538, + "learning_rate": 1.4225228304442173e-07, + "loss": 0.7857, "step": 33449 }, { - "epoch": 0.949205448354143, + "epoch": 0.9478874436793335, "grad_norm": 0.0, - "learning_rate": 1.3502094074731797e-07, - "loss": 0.7953, + "learning_rate": 1.4209807015179378e-07, + "loss": 0.7299, "step": 33450 }, { - "epoch": 0.9492338251986379, + "epoch": 0.9479157811215959, "grad_norm": 0.0, - "learning_rate": 1.34870461437272e-07, - "loss": 0.8206, + "learning_rate": 1.419439402959344e-07, + "loss": 0.7562, "step": 33451 }, { - "epoch": 0.9492622020431328, + "epoch": 0.9479441185638584, "grad_norm": 0.0, - "learning_rate": 1.347200654584191e-07, - "loss": 0.839, + "learning_rate": 1.4178989347814143e-07, + "loss": 0.8105, "step": 33452 }, { - "epoch": 0.9492905788876277, + "epoch": 0.9479724560061209, "grad_norm": 0.0, - "learning_rate": 1.3456975281202822e-07, - "loss": 0.7893, + "learning_rate": 1.4163592969971273e-07, + "loss": 0.7761, "step": 33453 }, { - "epoch": 0.9493189557321225, + "epoch": 0.9480007934483834, "grad_norm": 0.0, - "learning_rate": 1.3441952349937171e-07, - "loss": 0.8203, + "learning_rate": 1.4148204896194616e-07, + "loss": 0.7679, "step": 33454 }, { - "epoch": 0.9493473325766175, + "epoch": 0.9480291308906458, "grad_norm": 0.0, - "learning_rate": 1.3426937752171633e-07, - "loss": 0.8756, + "learning_rate": 1.4132825126613626e-07, + "loss": 0.8155, "step": 33455 }, { - "epoch": 0.9493757094211124, + "epoch": 0.9480574683329083, "grad_norm": 0.0, - "learning_rate": 1.3411931488033104e-07, - "loss": 0.9141, + "learning_rate": 1.411745366135797e-07, + "loss": 0.8702, "step": 33456 }, { - "epoch": 0.9494040862656072, + "epoch": 0.9480858057751708, "grad_norm": 0.0, - "learning_rate": 1.3396933557648485e-07, - "loss": 0.7214, + "learning_rate": 1.410209050055711e-07, + "loss": 0.8058, "step": 33457 }, { - "epoch": 0.9494324631101022, + "epoch": 0.9481141432174331, "grad_norm": 0.0, - "learning_rate": 1.3381943961144118e-07, - "loss": 0.7786, + "learning_rate": 1.4086735644340487e-07, + "loss": 0.8571, "step": 33458 }, { - "epoch": 0.9494608399545971, + "epoch": 0.9481424806596956, "grad_norm": 0.0, - "learning_rate": 1.3366962698646902e-07, - "loss": 0.8003, + "learning_rate": 1.4071389092837339e-07, + "loss": 0.8465, "step": 33459 }, { - "epoch": 0.9494892167990919, + "epoch": 0.9481708181019581, "grad_norm": 0.0, - "learning_rate": 1.335198977028329e-07, - "loss": 0.8034, + "learning_rate": 1.4056050846177004e-07, + "loss": 0.7473, "step": 33460 }, { - "epoch": 0.9495175936435868, + "epoch": 0.9481991555442206, "grad_norm": 0.0, - "learning_rate": 1.3337025176179742e-07, - "loss": 0.6811, + "learning_rate": 1.4040720904488603e-07, + "loss": 0.8148, "step": 33461 }, { - "epoch": 0.9495459704880818, + "epoch": 0.948227492986483, "grad_norm": 0.0, - "learning_rate": 1.3322068916462706e-07, - "loss": 0.8831, + "learning_rate": 1.4025399267901473e-07, + "loss": 0.7973, "step": 33462 }, { - "epoch": 0.9495743473325766, + "epoch": 0.9482558304287455, "grad_norm": 0.0, - "learning_rate": 1.3307120991258637e-07, - "loss": 0.7987, + "learning_rate": 1.4010085936544515e-07, + "loss": 0.7693, "step": 33463 }, { - "epoch": 0.9496027241770715, + "epoch": 0.948284167871008, "grad_norm": 0.0, - "learning_rate": 1.329218140069355e-07, - "loss": 0.7973, + "learning_rate": 1.3994780910546735e-07, + "loss": 0.7811, "step": 33464 }, { - "epoch": 0.9496311010215664, + "epoch": 0.9483125053132704, "grad_norm": 0.0, - "learning_rate": 1.3277250144893782e-07, - "loss": 0.9111, + "learning_rate": 1.397948419003703e-07, + "loss": 0.8053, "step": 33465 }, { - "epoch": 0.9496594778660613, + "epoch": 0.9483408427555329, "grad_norm": 0.0, - "learning_rate": 1.326232722398546e-07, - "loss": 0.78, + "learning_rate": 1.3964195775144295e-07, + "loss": 0.8234, "step": 33466 }, { - "epoch": 0.9496878547105562, + "epoch": 0.9483691801977954, "grad_norm": 0.0, - "learning_rate": 1.3247412638094593e-07, - "loss": 0.8833, + "learning_rate": 1.394891566599732e-07, + "loss": 0.8955, "step": 33467 }, { - "epoch": 0.949716231555051, + "epoch": 0.9483975176400579, "grad_norm": 0.0, - "learning_rate": 1.3232506387347078e-07, - "loss": 0.7364, + "learning_rate": 1.3933643862724777e-07, + "loss": 0.7618, "step": 33468 }, { - "epoch": 0.949744608399546, + "epoch": 0.9484258550823202, "grad_norm": 0.0, - "learning_rate": 1.3217608471869149e-07, - "loss": 0.7893, + "learning_rate": 1.3918380365455232e-07, + "loss": 0.7576, "step": 33469 }, { - "epoch": 0.9497729852440409, + "epoch": 0.9484541925245827, "grad_norm": 0.0, - "learning_rate": 1.320271889178626e-07, - "loss": 0.8087, + "learning_rate": 1.3903125174317467e-07, + "loss": 0.8648, "step": 33470 }, { - "epoch": 0.9498013620885357, + "epoch": 0.9484825299668452, "grad_norm": 0.0, - "learning_rate": 1.3187837647224421e-07, - "loss": 0.7966, + "learning_rate": 1.3887878289439827e-07, + "loss": 0.8185, "step": 33471 }, { - "epoch": 0.9498297389330307, + "epoch": 0.9485108674091076, "grad_norm": 0.0, - "learning_rate": 1.3172964738309423e-07, - "loss": 0.733, + "learning_rate": 1.3872639710950652e-07, + "loss": 0.8748, "step": 33472 }, { - "epoch": 0.9498581157775255, + "epoch": 0.9485392048513701, "grad_norm": 0.0, - "learning_rate": 1.315810016516661e-07, - "loss": 0.8, + "learning_rate": 1.3857409438978508e-07, + "loss": 0.7886, "step": 33473 }, { - "epoch": 0.9498864926220204, + "epoch": 0.9485675422936326, "grad_norm": 0.0, - "learning_rate": 1.3143243927921656e-07, - "loss": 0.8152, + "learning_rate": 1.3842187473651626e-07, + "loss": 0.8049, "step": 33474 }, { - "epoch": 0.9499148694665154, + "epoch": 0.948595879735895, "grad_norm": 0.0, - "learning_rate": 1.312839602670024e-07, - "loss": 0.752, + "learning_rate": 1.3826973815098233e-07, + "loss": 0.7791, "step": 33475 }, { - "epoch": 0.9499432463110102, + "epoch": 0.9486242171781575, "grad_norm": 0.0, - "learning_rate": 1.3113556461627486e-07, - "loss": 0.776, + "learning_rate": 1.381176846344634e-07, + "loss": 0.7599, "step": 33476 }, { - "epoch": 0.9499716231555051, + "epoch": 0.94865255462042, "grad_norm": 0.0, - "learning_rate": 1.3098725232828958e-07, - "loss": 0.8419, + "learning_rate": 1.3796571418824177e-07, + "loss": 0.7886, "step": 33477 }, { - "epoch": 0.95, + "epoch": 0.9486808920626825, "grad_norm": 0.0, - "learning_rate": 1.3083902340429777e-07, - "loss": 0.6902, + "learning_rate": 1.3781382681359756e-07, + "loss": 0.821, "step": 33478 }, { - "epoch": 0.9500283768444949, + "epoch": 0.9487092295049449, "grad_norm": 0.0, - "learning_rate": 1.306908778455529e-07, - "loss": 0.6773, + "learning_rate": 1.376620225118086e-07, + "loss": 0.8355, "step": 33479 }, { - "epoch": 0.9500567536889898, + "epoch": 0.9487375669472073, "grad_norm": 0.0, - "learning_rate": 1.3054281565330619e-07, - "loss": 0.763, + "learning_rate": 1.375103012841561e-07, + "loss": 0.8772, "step": 33480 }, { - "epoch": 0.9500851305334846, + "epoch": 0.9487659043894698, "grad_norm": 0.0, - "learning_rate": 1.3039483682880772e-07, - "loss": 0.7847, + "learning_rate": 1.373586631319157e-07, + "loss": 0.759, "step": 33481 }, { - "epoch": 0.9501135073779796, + "epoch": 0.9487942418317322, "grad_norm": 0.0, - "learning_rate": 1.3024694137330652e-07, - "loss": 0.7272, + "learning_rate": 1.3720710805636638e-07, + "loss": 0.787, "step": 33482 }, { - "epoch": 0.9501418842224745, + "epoch": 0.9488225792739947, "grad_norm": 0.0, - "learning_rate": 1.300991292880549e-07, - "loss": 0.7971, + "learning_rate": 1.370556360587838e-07, + "loss": 0.8082, "step": 33483 }, { - "epoch": 0.9501702610669693, + "epoch": 0.9488509167162572, "grad_norm": 0.0, - "learning_rate": 1.2995140057429855e-07, - "loss": 0.7062, + "learning_rate": 1.3690424714044358e-07, + "loss": 0.7694, "step": 33484 }, { - "epoch": 0.9501986379114642, + "epoch": 0.9488792541585197, "grad_norm": 0.0, - "learning_rate": 1.2980375523328648e-07, - "loss": 0.8293, + "learning_rate": 1.367529413026225e-07, + "loss": 0.7693, "step": 33485 }, { - "epoch": 0.9502270147559592, + "epoch": 0.9489075916007821, "grad_norm": 0.0, - "learning_rate": 1.2965619326626655e-07, - "loss": 0.8508, + "learning_rate": 1.3660171854659288e-07, + "loss": 0.8324, "step": 33486 }, { - "epoch": 0.950255391600454, + "epoch": 0.9489359290430446, "grad_norm": 0.0, - "learning_rate": 1.2950871467448333e-07, - "loss": 0.7851, + "learning_rate": 1.364505788736292e-07, + "loss": 0.8222, "step": 33487 }, { - "epoch": 0.9502837684449489, + "epoch": 0.9489642664853071, "grad_norm": 0.0, - "learning_rate": 1.2936131945918472e-07, - "loss": 0.6464, + "learning_rate": 1.362995222850072e-07, + "loss": 0.8704, "step": 33488 }, { - "epoch": 0.9503121452894439, + "epoch": 0.9489926039275695, "grad_norm": 0.0, - "learning_rate": 1.2921400762161417e-07, - "loss": 0.9062, + "learning_rate": 1.3614854878199578e-07, + "loss": 0.7492, "step": 33489 }, { - "epoch": 0.9503405221339387, + "epoch": 0.949020941369832, "grad_norm": 0.0, - "learning_rate": 1.2906677916301736e-07, - "loss": 0.7929, + "learning_rate": 1.359976583658673e-07, + "loss": 0.8388, "step": 33490 }, { - "epoch": 0.9503688989784336, + "epoch": 0.9490492788120944, "grad_norm": 0.0, - "learning_rate": 1.2891963408463658e-07, - "loss": 0.7471, + "learning_rate": 1.358468510378952e-07, + "loss": 0.7449, "step": 33491 }, { - "epoch": 0.9503972758229285, + "epoch": 0.9490776162543569, "grad_norm": 0.0, - "learning_rate": 1.2877257238771535e-07, - "loss": 0.7541, + "learning_rate": 1.356961267993473e-07, + "loss": 0.8048, "step": 33492 }, { - "epoch": 0.9504256526674234, + "epoch": 0.9491059536966193, "grad_norm": 0.0, - "learning_rate": 1.2862559407349595e-07, - "loss": 0.7689, + "learning_rate": 1.3554548565149372e-07, + "loss": 0.816, "step": 33493 }, { - "epoch": 0.9504540295119183, + "epoch": 0.9491342911388818, "grad_norm": 0.0, - "learning_rate": 1.2847869914321965e-07, - "loss": 0.96, + "learning_rate": 1.3539492759560347e-07, + "loss": 0.7609, "step": 33494 }, { - "epoch": 0.9504824063564131, + "epoch": 0.9491626285811443, "grad_norm": 0.0, - "learning_rate": 1.2833188759812877e-07, - "loss": 0.8039, + "learning_rate": 1.3524445263294438e-07, + "loss": 0.7274, "step": 33495 }, { - "epoch": 0.9505107832009081, + "epoch": 0.9491909660234067, "grad_norm": 0.0, - "learning_rate": 1.2818515943946118e-07, - "loss": 0.7985, + "learning_rate": 1.350940607647866e-07, + "loss": 0.6915, "step": 33496 }, { - "epoch": 0.950539160045403, + "epoch": 0.9492193034656692, "grad_norm": 0.0, - "learning_rate": 1.2803851466845708e-07, - "loss": 0.8186, + "learning_rate": 1.349437519923924e-07, + "loss": 0.8061, "step": 33497 }, { - "epoch": 0.9505675368898978, + "epoch": 0.9492476409079317, "grad_norm": 0.0, - "learning_rate": 1.2789195328635651e-07, - "loss": 0.6094, + "learning_rate": 1.347935263170308e-07, + "loss": 0.8777, "step": 33498 }, { - "epoch": 0.9505959137343928, + "epoch": 0.9492759783501941, "grad_norm": 0.0, - "learning_rate": 1.2774547529439518e-07, - "loss": 0.7701, + "learning_rate": 1.3464338373996744e-07, + "loss": 0.8594, "step": 33499 }, { - "epoch": 0.9506242905788876, + "epoch": 0.9493043157924566, "grad_norm": 0.0, - "learning_rate": 1.275990806938121e-07, - "loss": 0.862, + "learning_rate": 1.3449332426246575e-07, + "loss": 0.8125, "step": 33500 }, { - "epoch": 0.9506526674233825, + "epoch": 0.949332653234719, "grad_norm": 0.0, - "learning_rate": 1.2745276948584296e-07, - "loss": 0.7304, + "learning_rate": 1.3434334788579028e-07, + "loss": 0.6687, "step": 33501 }, { - "epoch": 0.9506810442678774, + "epoch": 0.9493609906769815, "grad_norm": 0.0, - "learning_rate": 1.2730654167172452e-07, - "loss": 0.7755, + "learning_rate": 1.3419345461120446e-07, + "loss": 0.8055, "step": 33502 }, { - "epoch": 0.9507094211123723, + "epoch": 0.9493893281192439, "grad_norm": 0.0, - "learning_rate": 1.2716039725269247e-07, - "loss": 0.7771, + "learning_rate": 1.3404364443997066e-07, + "loss": 0.8588, "step": 33503 }, { - "epoch": 0.9507377979568672, + "epoch": 0.9494176655615064, "grad_norm": 0.0, - "learning_rate": 1.2701433622997916e-07, - "loss": 0.8677, + "learning_rate": 1.3389391737335112e-07, + "loss": 0.8151, "step": 33504 }, { - "epoch": 0.950766174801362, + "epoch": 0.9494460030037689, "grad_norm": 0.0, - "learning_rate": 1.2686835860481916e-07, - "loss": 0.8019, + "learning_rate": 1.337442734126071e-07, + "loss": 0.7426, "step": 33505 }, { - "epoch": 0.950794551645857, + "epoch": 0.9494743404460313, "grad_norm": 0.0, - "learning_rate": 1.2672246437844705e-07, - "loss": 0.7653, + "learning_rate": 1.3359471255899758e-07, + "loss": 0.7895, "step": 33506 }, { - "epoch": 0.9508229284903519, + "epoch": 0.9495026778882938, "grad_norm": 0.0, - "learning_rate": 1.2657665355209403e-07, - "loss": 0.7518, + "learning_rate": 1.334452348137849e-07, + "loss": 0.7562, "step": 33507 }, { - "epoch": 0.9508513053348467, + "epoch": 0.9495310153305563, "grad_norm": 0.0, - "learning_rate": 1.2643092612699136e-07, - "loss": 0.8124, + "learning_rate": 1.3329584017822582e-07, + "loss": 0.721, "step": 33508 }, { - "epoch": 0.9508796821793417, + "epoch": 0.9495593527728188, "grad_norm": 0.0, - "learning_rate": 1.262852821043714e-07, - "loss": 0.8844, + "learning_rate": 1.3314652865358158e-07, + "loss": 0.7953, "step": 33509 }, { - "epoch": 0.9509080590238366, + "epoch": 0.9495876902150812, "grad_norm": 0.0, - "learning_rate": 1.2613972148546204e-07, - "loss": 0.8209, + "learning_rate": 1.3299730024110559e-07, + "loss": 0.7369, "step": 33510 }, { - "epoch": 0.9509364358683314, + "epoch": 0.9496160276573437, "grad_norm": 0.0, - "learning_rate": 1.2599424427149565e-07, - "loss": 0.8153, + "learning_rate": 1.3284815494205906e-07, + "loss": 0.7799, "step": 33511 }, { - "epoch": 0.9509648127128263, + "epoch": 0.9496443650996061, "grad_norm": 0.0, - "learning_rate": 1.25848850463699e-07, - "loss": 0.8977, + "learning_rate": 1.3269909275769543e-07, + "loss": 0.9166, "step": 33512 }, { - "epoch": 0.9509931895573213, + "epoch": 0.9496727025418685, "grad_norm": 0.0, - "learning_rate": 1.257035400633011e-07, - "loss": 0.8224, + "learning_rate": 1.3255011368927263e-07, + "loss": 0.8786, "step": 33513 }, { - "epoch": 0.9510215664018161, + "epoch": 0.949701039984131, "grad_norm": 0.0, - "learning_rate": 1.2555831307152877e-07, - "loss": 0.6676, + "learning_rate": 1.3240121773804404e-07, + "loss": 0.7479, "step": 33514 }, { - "epoch": 0.951049943246311, + "epoch": 0.9497293774263935, "grad_norm": 0.0, - "learning_rate": 1.2541316948960992e-07, - "loss": 0.731, + "learning_rate": 1.3225240490526426e-07, + "loss": 0.8874, "step": 33515 }, { - "epoch": 0.951078320090806, + "epoch": 0.9497577148686559, "grad_norm": 0.0, - "learning_rate": 1.2526810931877021e-07, - "loss": 0.8928, + "learning_rate": 1.321036751921856e-07, + "loss": 0.8042, "step": 33516 }, { - "epoch": 0.9511066969353008, + "epoch": 0.9497860523109184, "grad_norm": 0.0, - "learning_rate": 1.2512313256023424e-07, - "loss": 0.7505, + "learning_rate": 1.319550286000637e-07, + "loss": 0.8054, "step": 33517 }, { - "epoch": 0.9511350737797957, + "epoch": 0.9498143897531809, "grad_norm": 0.0, - "learning_rate": 1.2497823921522767e-07, - "loss": 0.8231, + "learning_rate": 1.3180646513014873e-07, + "loss": 0.7542, "step": 33518 }, { - "epoch": 0.9511634506242905, + "epoch": 0.9498427271954434, "grad_norm": 0.0, - "learning_rate": 1.248334292849729e-07, - "loss": 0.7158, + "learning_rate": 1.3165798478369184e-07, + "loss": 0.7921, "step": 33519 }, { - "epoch": 0.9511918274687855, + "epoch": 0.9498710646377058, "grad_norm": 0.0, - "learning_rate": 1.2468870277069444e-07, - "loss": 0.7794, + "learning_rate": 1.3150958756194432e-07, + "loss": 0.8479, "step": 33520 }, { - "epoch": 0.9512202043132804, + "epoch": 0.9498994020799683, "grad_norm": 0.0, - "learning_rate": 1.245440596736147e-07, - "loss": 0.8323, + "learning_rate": 1.3136127346615624e-07, + "loss": 0.74, "step": 33521 }, { - "epoch": 0.9512485811577752, + "epoch": 0.9499277395222308, "grad_norm": 0.0, - "learning_rate": 1.2439949999495493e-07, - "loss": 0.8009, + "learning_rate": 1.3121304249757772e-07, + "loss": 0.8339, "step": 33522 }, { - "epoch": 0.9512769580022702, + "epoch": 0.9499560769644931, "grad_norm": 0.0, - "learning_rate": 1.2425502373593634e-07, - "loss": 0.7782, + "learning_rate": 1.3106489465745443e-07, + "loss": 0.8328, "step": 33523 }, { - "epoch": 0.951305334846765, + "epoch": 0.9499844144067556, "grad_norm": 0.0, - "learning_rate": 1.241106308977802e-07, - "loss": 0.777, + "learning_rate": 1.3091682994703757e-07, + "loss": 0.8039, "step": 33524 }, { - "epoch": 0.9513337116912599, + "epoch": 0.9500127518490181, "grad_norm": 0.0, - "learning_rate": 1.239663214817044e-07, - "loss": 0.8151, + "learning_rate": 1.3076884836757286e-07, + "loss": 0.7737, "step": 33525 }, { - "epoch": 0.9513620885357549, + "epoch": 0.9500410892912806, "grad_norm": 0.0, - "learning_rate": 1.2382209548893022e-07, - "loss": 0.8468, + "learning_rate": 1.3062094992030595e-07, + "loss": 0.8171, "step": 33526 }, { - "epoch": 0.9513904653802497, + "epoch": 0.950069426733543, "grad_norm": 0.0, - "learning_rate": 1.236779529206744e-07, - "loss": 0.8762, + "learning_rate": 1.3047313460648469e-07, + "loss": 0.9163, "step": 33527 }, { - "epoch": 0.9514188422247446, + "epoch": 0.9500977641758055, "grad_norm": 0.0, - "learning_rate": 1.2353389377815494e-07, - "loss": 0.8099, + "learning_rate": 1.303254024273537e-07, + "loss": 0.7989, "step": 33528 }, { - "epoch": 0.9514472190692395, + "epoch": 0.950126101618068, "grad_norm": 0.0, - "learning_rate": 1.233899180625886e-07, - "loss": 0.8405, + "learning_rate": 1.3017775338415638e-07, + "loss": 0.7001, "step": 33529 }, { - "epoch": 0.9514755959137344, + "epoch": 0.9501544390603304, "grad_norm": 0.0, - "learning_rate": 1.2324602577518997e-07, - "loss": 0.7856, + "learning_rate": 1.3003018747813734e-07, + "loss": 0.7096, "step": 33530 }, { - "epoch": 0.9515039727582293, + "epoch": 0.9501827765025929, "grad_norm": 0.0, - "learning_rate": 1.2310221691717916e-07, - "loss": 0.847, + "learning_rate": 1.2988270471053775e-07, + "loss": 0.7581, "step": 33531 }, { - "epoch": 0.9515323496027241, + "epoch": 0.9502111139448554, "grad_norm": 0.0, - "learning_rate": 1.2295849148976524e-07, - "loss": 0.7231, + "learning_rate": 1.2973530508260224e-07, + "loss": 0.8217, "step": 33532 }, { - "epoch": 0.9515607264472191, + "epoch": 0.9502394513871179, "grad_norm": 0.0, - "learning_rate": 1.228148494941661e-07, - "loss": 0.8941, + "learning_rate": 1.29587988595572e-07, + "loss": 0.7587, "step": 33533 }, { - "epoch": 0.951589103291714, + "epoch": 0.9502677888293802, "grad_norm": 0.0, - "learning_rate": 1.2267129093159523e-07, - "loss": 0.814, + "learning_rate": 1.2944075525068712e-07, + "loss": 0.891, "step": 33534 }, { - "epoch": 0.9516174801362088, + "epoch": 0.9502961262716427, "grad_norm": 0.0, - "learning_rate": 1.225278158032617e-07, - "loss": 0.8437, + "learning_rate": 1.2929360504918775e-07, + "loss": 0.7962, "step": 33535 }, { - "epoch": 0.9516458569807037, + "epoch": 0.9503244637139052, "grad_norm": 0.0, - "learning_rate": 1.223844241103811e-07, - "loss": 0.854, + "learning_rate": 1.2914653799231403e-07, + "loss": 0.6598, "step": 33536 }, { - "epoch": 0.9516742338251987, + "epoch": 0.9503528011561676, "grad_norm": 0.0, - "learning_rate": 1.2224111585416365e-07, - "loss": 0.8035, + "learning_rate": 1.2899955408130383e-07, + "loss": 0.7578, "step": 33537 }, { - "epoch": 0.9517026106696935, + "epoch": 0.9503811385984301, "grad_norm": 0.0, - "learning_rate": 1.2209789103581836e-07, - "loss": 0.7627, + "learning_rate": 1.2885265331739617e-07, + "loss": 0.8106, "step": 33538 }, { - "epoch": 0.9517309875141884, + "epoch": 0.9504094760406926, "grad_norm": 0.0, - "learning_rate": 1.2195474965655652e-07, - "loss": 0.7916, + "learning_rate": 1.287058357018278e-07, + "loss": 0.7477, "step": 33539 }, { - "epoch": 0.9517593643586834, + "epoch": 0.950437813482955, "grad_norm": 0.0, - "learning_rate": 1.2181169171758712e-07, - "loss": 0.8696, + "learning_rate": 1.285591012358367e-07, + "loss": 0.7882, "step": 33540 }, { - "epoch": 0.9517877412031782, + "epoch": 0.9504661509252175, "grad_norm": 0.0, - "learning_rate": 1.216687172201181e-07, - "loss": 0.8287, + "learning_rate": 1.2841244992065738e-07, + "loss": 0.8159, "step": 33541 }, { - "epoch": 0.9518161180476731, + "epoch": 0.95049448836748, "grad_norm": 0.0, - "learning_rate": 1.2152582616535846e-07, - "loss": 0.7999, + "learning_rate": 1.2826588175752664e-07, + "loss": 0.751, "step": 33542 }, { - "epoch": 0.9518444948921679, + "epoch": 0.9505228258097425, "grad_norm": 0.0, - "learning_rate": 1.2138301855451397e-07, - "loss": 0.7476, + "learning_rate": 1.2811939674767793e-07, + "loss": 0.6882, "step": 33543 }, { - "epoch": 0.9518728717366629, + "epoch": 0.9505511632520048, "grad_norm": 0.0, - "learning_rate": 1.2124029438879027e-07, - "loss": 0.7315, + "learning_rate": 1.2797299489234472e-07, + "loss": 0.778, "step": 33544 }, { - "epoch": 0.9519012485811578, + "epoch": 0.9505795006942673, "grad_norm": 0.0, - "learning_rate": 1.2109765366939418e-07, - "loss": 0.7604, + "learning_rate": 1.2782667619276047e-07, + "loss": 0.7994, "step": 33545 }, { - "epoch": 0.9519296254256526, + "epoch": 0.9506078381365298, "grad_norm": 0.0, - "learning_rate": 1.2095509639753034e-07, - "loss": 0.7729, + "learning_rate": 1.276804406501586e-07, + "loss": 0.7438, "step": 33546 }, { - "epoch": 0.9519580022701476, + "epoch": 0.9506361755787922, "grad_norm": 0.0, - "learning_rate": 1.2081262257440329e-07, - "loss": 0.8755, + "learning_rate": 1.275342882657704e-07, + "loss": 0.7191, "step": 33547 }, { - "epoch": 0.9519863791146425, + "epoch": 0.9506645130210547, "grad_norm": 0.0, - "learning_rate": 1.2067023220121654e-07, - "loss": 0.884, + "learning_rate": 1.273882190408271e-07, + "loss": 0.8924, "step": 33548 }, { - "epoch": 0.9520147559591373, + "epoch": 0.9506928504633172, "grad_norm": 0.0, - "learning_rate": 1.2052792527917134e-07, - "loss": 0.7818, + "learning_rate": 1.272422329765588e-07, + "loss": 0.7398, "step": 33549 }, { - "epoch": 0.9520431328036323, + "epoch": 0.9507211879055797, "grad_norm": 0.0, - "learning_rate": 1.2038570180947117e-07, - "loss": 0.8381, + "learning_rate": 1.2709633007419563e-07, + "loss": 0.741, "step": 33550 }, { - "epoch": 0.9520715096481271, + "epoch": 0.9507495253478421, "grad_norm": 0.0, - "learning_rate": 1.2024356179331732e-07, - "loss": 0.867, + "learning_rate": 1.2695051033496554e-07, + "loss": 0.7559, "step": 33551 }, { - "epoch": 0.952099886492622, + "epoch": 0.9507778627901046, "grad_norm": 0.0, - "learning_rate": 1.201015052319099e-07, - "loss": 0.7663, + "learning_rate": 1.2680477376009748e-07, + "loss": 0.779, "step": 33552 }, { - "epoch": 0.9521282633371169, + "epoch": 0.9508062002323671, "grad_norm": 0.0, - "learning_rate": 1.1995953212645018e-07, - "loss": 0.786, + "learning_rate": 1.266591203508194e-07, + "loss": 0.7943, "step": 33553 }, { - "epoch": 0.9521566401816118, + "epoch": 0.9508345376746294, "grad_norm": 0.0, - "learning_rate": 1.198176424781361e-07, - "loss": 0.8171, + "learning_rate": 1.265135501083592e-07, + "loss": 0.785, "step": 33554 }, { - "epoch": 0.9521850170261067, + "epoch": 0.9508628751168919, "grad_norm": 0.0, - "learning_rate": 1.196758362881656e-07, - "loss": 0.7856, + "learning_rate": 1.2636806303394035e-07, + "loss": 0.7482, "step": 33555 }, { - "epoch": 0.9522133938706016, + "epoch": 0.9508912125591544, "grad_norm": 0.0, - "learning_rate": 1.195341135577377e-07, - "loss": 0.886, + "learning_rate": 1.2622265912878962e-07, + "loss": 0.7846, "step": 33556 }, { - "epoch": 0.9522417707150965, + "epoch": 0.9509195500014169, "grad_norm": 0.0, - "learning_rate": 1.1939247428805035e-07, - "loss": 0.7594, + "learning_rate": 1.2607733839413383e-07, + "loss": 0.8264, "step": 33557 }, { - "epoch": 0.9522701475595914, + "epoch": 0.9509478874436793, "grad_norm": 0.0, - "learning_rate": 1.1925091848029813e-07, - "loss": 0.8622, + "learning_rate": 1.2593210083119312e-07, + "loss": 0.7549, "step": 33558 }, { - "epoch": 0.9522985244040862, + "epoch": 0.9509762248859418, "grad_norm": 0.0, - "learning_rate": 1.1910944613567787e-07, - "loss": 0.7474, + "learning_rate": 1.2578694644119427e-07, + "loss": 0.756, "step": 33559 }, { - "epoch": 0.9523269012485811, + "epoch": 0.9510045623282043, "grad_norm": 0.0, - "learning_rate": 1.1896805725538418e-07, - "loss": 0.7822, + "learning_rate": 1.2564187522535855e-07, + "loss": 0.861, "step": 33560 }, { - "epoch": 0.9523552780930761, + "epoch": 0.9510328997704667, "grad_norm": 0.0, - "learning_rate": 1.1882675184061054e-07, - "loss": 0.8536, + "learning_rate": 1.2549688718490715e-07, + "loss": 0.8161, "step": 33561 }, { - "epoch": 0.9523836549375709, + "epoch": 0.9510612372127292, "grad_norm": 0.0, - "learning_rate": 1.1868552989255266e-07, - "loss": 0.8823, + "learning_rate": 1.2535198232106361e-07, + "loss": 0.7017, "step": 33562 }, { - "epoch": 0.9524120317820658, + "epoch": 0.9510895746549917, "grad_norm": 0.0, - "learning_rate": 1.1854439141240182e-07, - "loss": 0.7935, + "learning_rate": 1.252071606350458e-07, + "loss": 0.8296, "step": 33563 }, { - "epoch": 0.9524404086265608, + "epoch": 0.951117912097254, "grad_norm": 0.0, - "learning_rate": 1.184033364013515e-07, - "loss": 0.8458, + "learning_rate": 1.2506242212807607e-07, + "loss": 0.6635, "step": 33564 }, { - "epoch": 0.9524687854710556, + "epoch": 0.9511462495395165, "grad_norm": 0.0, - "learning_rate": 1.1826236486059184e-07, - "loss": 0.852, + "learning_rate": 1.2491776680137123e-07, + "loss": 0.7457, "step": 33565 }, { - "epoch": 0.9524971623155505, + "epoch": 0.951174586981779, "grad_norm": 0.0, - "learning_rate": 1.1812147679131414e-07, - "loss": 0.7697, + "learning_rate": 1.2477319465615144e-07, + "loss": 0.9013, "step": 33566 }, { - "epoch": 0.9525255391600455, + "epoch": 0.9512029244240415, "grad_norm": 0.0, - "learning_rate": 1.1798067219470854e-07, - "loss": 0.7763, + "learning_rate": 1.2462870569363572e-07, + "loss": 0.7305, "step": 33567 }, { - "epoch": 0.9525539160045403, + "epoch": 0.9512312618663039, "grad_norm": 0.0, - "learning_rate": 1.1783995107196523e-07, - "loss": 0.6888, + "learning_rate": 1.244842999150375e-07, + "loss": 0.7877, "step": 33568 }, { - "epoch": 0.9525822928490352, + "epoch": 0.9512595993085664, "grad_norm": 0.0, - "learning_rate": 1.1769931342427099e-07, - "loss": 0.848, + "learning_rate": 1.2433997732157588e-07, + "loss": 0.7785, "step": 33569 }, { - "epoch": 0.95261066969353, + "epoch": 0.9512879367508289, "grad_norm": 0.0, - "learning_rate": 1.175587592528149e-07, - "loss": 0.7391, + "learning_rate": 1.241957379144665e-07, + "loss": 0.8136, "step": 33570 }, { - "epoch": 0.952639046538025, + "epoch": 0.9513162741930913, "grad_norm": 0.0, - "learning_rate": 1.174182885587849e-07, - "loss": 0.7888, + "learning_rate": 1.2405158169492393e-07, + "loss": 0.8122, "step": 33571 }, { - "epoch": 0.9526674233825199, + "epoch": 0.9513446116353538, "grad_norm": 0.0, - "learning_rate": 1.172779013433667e-07, - "loss": 0.8014, + "learning_rate": 1.2390750866416167e-07, + "loss": 0.7573, "step": 33572 }, { - "epoch": 0.9526958002270147, + "epoch": 0.9513729490776163, "grad_norm": 0.0, - "learning_rate": 1.1713759760774601e-07, - "loss": 0.8622, + "learning_rate": 1.237635188233932e-07, + "loss": 0.8092, "step": 33573 }, { - "epoch": 0.9527241770715097, + "epoch": 0.9514012865198788, "grad_norm": 0.0, - "learning_rate": 1.1699737735310857e-07, - "loss": 0.7328, + "learning_rate": 1.2361961217383312e-07, + "loss": 0.826, "step": 33574 }, { - "epoch": 0.9527525539160046, + "epoch": 0.9514296239621411, "grad_norm": 0.0, - "learning_rate": 1.1685724058063896e-07, - "loss": 0.8554, + "learning_rate": 1.2347578871669264e-07, + "loss": 0.7209, "step": 33575 }, { - "epoch": 0.9527809307604994, + "epoch": 0.9514579614044036, "grad_norm": 0.0, - "learning_rate": 1.1671718729151849e-07, - "loss": 0.7712, + "learning_rate": 1.2333204845318192e-07, + "loss": 0.8446, "step": 33576 }, { - "epoch": 0.9528093076049943, + "epoch": 0.9514862988466661, "grad_norm": 0.0, - "learning_rate": 1.1657721748693395e-07, - "loss": 0.7494, + "learning_rate": 1.2318839138451333e-07, + "loss": 0.9539, "step": 33577 }, { - "epoch": 0.9528376844494892, + "epoch": 0.9515146362889285, "grad_norm": 0.0, - "learning_rate": 1.1643733116806555e-07, - "loss": 0.7488, + "learning_rate": 1.23044817511897e-07, + "loss": 0.7879, "step": 33578 }, { - "epoch": 0.9528660612939841, + "epoch": 0.951542973731191, "grad_norm": 0.0, - "learning_rate": 1.1629752833609564e-07, - "loss": 0.816, + "learning_rate": 1.2290132683654087e-07, + "loss": 0.7682, "step": 33579 }, { - "epoch": 0.952894438138479, + "epoch": 0.9515713111734535, "grad_norm": 0.0, - "learning_rate": 1.1615780899220552e-07, - "loss": 0.7305, + "learning_rate": 1.227579193596562e-07, + "loss": 0.7294, "step": 33580 }, { - "epoch": 0.9529228149829739, + "epoch": 0.951599648615716, "grad_norm": 0.0, - "learning_rate": 1.1601817313757313e-07, - "loss": 0.8706, + "learning_rate": 1.2261459508244865e-07, + "loss": 0.8842, "step": 33581 }, { - "epoch": 0.9529511918274688, + "epoch": 0.9516279860579784, "grad_norm": 0.0, - "learning_rate": 1.1587862077337975e-07, - "loss": 0.8392, + "learning_rate": 1.224713540061262e-07, + "loss": 0.7233, "step": 33582 }, { - "epoch": 0.9529795686719637, + "epoch": 0.9516563235002409, "grad_norm": 0.0, - "learning_rate": 1.1573915190080553e-07, - "loss": 0.7861, + "learning_rate": 1.223281961318956e-07, + "loss": 0.7962, "step": 33583 }, { - "epoch": 0.9530079455164586, + "epoch": 0.9516846609425034, "grad_norm": 0.0, - "learning_rate": 1.1559976652102622e-07, - "loss": 0.7593, + "learning_rate": 1.2218512146096263e-07, + "loss": 0.9001, "step": 33584 }, { - "epoch": 0.9530363223609535, + "epoch": 0.9517129983847658, "grad_norm": 0.0, - "learning_rate": 1.1546046463521976e-07, - "loss": 0.7238, + "learning_rate": 1.220421299945318e-07, + "loss": 0.8102, "step": 33585 }, { - "epoch": 0.9530646992054483, + "epoch": 0.9517413358270282, "grad_norm": 0.0, - "learning_rate": 1.1532124624456297e-07, - "loss": 0.804, + "learning_rate": 1.2189922173380998e-07, + "loss": 0.8825, "step": 33586 }, { - "epoch": 0.9530930760499432, + "epoch": 0.9517696732692907, "grad_norm": 0.0, - "learning_rate": 1.151821113502316e-07, - "loss": 0.7204, + "learning_rate": 1.2175639667999728e-07, + "loss": 0.7971, "step": 33587 }, { - "epoch": 0.9531214528944382, + "epoch": 0.9517980107115531, "grad_norm": 0.0, - "learning_rate": 1.1504305995340248e-07, - "loss": 0.8392, + "learning_rate": 1.2161365483429943e-07, + "loss": 0.87, "step": 33588 }, { - "epoch": 0.953149829738933, + "epoch": 0.9518263481538156, "grad_norm": 0.0, - "learning_rate": 1.14904092055248e-07, - "loss": 0.8229, + "learning_rate": 1.214709961979177e-07, + "loss": 0.7646, "step": 33589 }, { - "epoch": 0.9531782065834279, + "epoch": 0.9518546855960781, "grad_norm": 0.0, - "learning_rate": 1.1476520765694388e-07, - "loss": 0.8624, + "learning_rate": 1.213284207720544e-07, + "loss": 0.7049, "step": 33590 }, { - "epoch": 0.9532065834279229, + "epoch": 0.9518830230383406, "grad_norm": 0.0, - "learning_rate": 1.1462640675966141e-07, - "loss": 0.793, + "learning_rate": 1.2118592855790978e-07, + "loss": 0.7435, "step": 33591 }, { - "epoch": 0.9532349602724177, + "epoch": 0.951911360480603, "grad_norm": 0.0, - "learning_rate": 1.1448768936457411e-07, - "loss": 0.7519, + "learning_rate": 1.2104351955668502e-07, + "loss": 0.7307, "step": 33592 }, { - "epoch": 0.9532633371169126, + "epoch": 0.9519396979228655, "grad_norm": 0.0, - "learning_rate": 1.1434905547285436e-07, - "loss": 0.6982, + "learning_rate": 1.209011937695781e-07, + "loss": 0.833, "step": 33593 }, { - "epoch": 0.9532917139614074, + "epoch": 0.951968035365128, "grad_norm": 0.0, - "learning_rate": 1.1421050508567233e-07, - "loss": 0.8583, + "learning_rate": 1.2075895119779025e-07, + "loss": 0.8612, "step": 33594 }, { - "epoch": 0.9533200908059024, + "epoch": 0.9519963728073904, "grad_norm": 0.0, - "learning_rate": 1.1407203820419821e-07, - "loss": 0.9156, + "learning_rate": 1.2061679184251719e-07, + "loss": 0.8851, "step": 33595 }, { - "epoch": 0.9533484676503973, + "epoch": 0.9520247102496529, "grad_norm": 0.0, - "learning_rate": 1.1393365482960217e-07, - "loss": 0.8322, + "learning_rate": 1.2047471570495905e-07, + "loss": 0.6314, "step": 33596 }, { - "epoch": 0.9533768444948921, + "epoch": 0.9520530476919153, "grad_norm": 0.0, - "learning_rate": 1.1379535496305327e-07, - "loss": 0.7929, + "learning_rate": 1.2033272278630936e-07, + "loss": 0.8823, "step": 33597 }, { - "epoch": 0.9534052213393871, + "epoch": 0.9520813851341778, "grad_norm": 0.0, - "learning_rate": 1.1365713860571948e-07, - "loss": 0.6997, + "learning_rate": 1.2019081308776715e-07, + "loss": 0.7071, "step": 33598 }, { - "epoch": 0.953433598183882, + "epoch": 0.9521097225764402, "grad_norm": 0.0, - "learning_rate": 1.1351900575876762e-07, - "loss": 0.7605, + "learning_rate": 1.2004898661052588e-07, + "loss": 0.7371, "step": 33599 }, { - "epoch": 0.9534619750283768, + "epoch": 0.9521380600187027, "grad_norm": 0.0, - "learning_rate": 1.1338095642336566e-07, - "loss": 0.7749, + "learning_rate": 1.199072433557813e-07, + "loss": 0.8055, "step": 33600 }, { - "epoch": 0.9534903518728718, + "epoch": 0.9521663974609652, "grad_norm": 0.0, - "learning_rate": 1.1324299060067934e-07, - "loss": 0.7995, + "learning_rate": 1.1976558332472576e-07, + "loss": 0.7045, "step": 33601 }, { - "epoch": 0.9535187287173666, + "epoch": 0.9521947349032276, "grad_norm": 0.0, - "learning_rate": 1.1310510829187326e-07, - "loss": 0.8915, + "learning_rate": 1.1962400651855387e-07, + "loss": 0.8152, "step": 33602 }, { - "epoch": 0.9535471055618615, + "epoch": 0.9522230723454901, "grad_norm": 0.0, - "learning_rate": 1.129673094981143e-07, - "loss": 0.7258, + "learning_rate": 1.1948251293845913e-07, + "loss": 0.8184, "step": 33603 }, { - "epoch": 0.9535754824063564, + "epoch": 0.9522514097877526, "grad_norm": 0.0, - "learning_rate": 1.1282959422056372e-07, - "loss": 0.8008, + "learning_rate": 1.193411025856317e-07, + "loss": 0.7764, "step": 33604 }, { - "epoch": 0.9536038592508513, + "epoch": 0.9522797472300151, "grad_norm": 0.0, - "learning_rate": 1.1269196246038505e-07, - "loss": 0.7423, + "learning_rate": 1.1919977546126283e-07, + "loss": 0.8535, "step": 33605 }, { - "epoch": 0.9536322360953462, + "epoch": 0.9523080846722775, "grad_norm": 0.0, - "learning_rate": 1.1255441421874402e-07, - "loss": 0.799, + "learning_rate": 1.1905853156654378e-07, + "loss": 0.7661, "step": 33606 }, { - "epoch": 0.9536606129398411, + "epoch": 0.95233642211454, "grad_norm": 0.0, - "learning_rate": 1.1241694949679748e-07, - "loss": 0.7078, + "learning_rate": 1.1891737090266365e-07, + "loss": 0.9195, "step": 33607 }, { - "epoch": 0.953688989784336, + "epoch": 0.9523647595568024, "grad_norm": 0.0, - "learning_rate": 1.122795682957123e-07, - "loss": 0.8773, + "learning_rate": 1.1877629347081254e-07, + "loss": 0.7453, "step": 33608 }, { - "epoch": 0.9537173666288309, + "epoch": 0.9523930969990648, "grad_norm": 0.0, - "learning_rate": 1.1214227061664418e-07, - "loss": 0.8105, + "learning_rate": 1.1863529927217731e-07, + "loss": 0.7238, "step": 33609 }, { - "epoch": 0.9537457434733257, + "epoch": 0.9524214344413273, "grad_norm": 0.0, - "learning_rate": 1.1200505646075554e-07, - "loss": 0.8781, + "learning_rate": 1.184943883079459e-07, + "loss": 0.7685, "step": 33610 }, { - "epoch": 0.9537741203178206, + "epoch": 0.9524497718835898, "grad_norm": 0.0, - "learning_rate": 1.1186792582920547e-07, - "loss": 0.8258, + "learning_rate": 1.1835356057930625e-07, + "loss": 0.8752, "step": 33611 }, { - "epoch": 0.9538024971623156, + "epoch": 0.9524781093258522, "grad_norm": 0.0, - "learning_rate": 1.1173087872315081e-07, - "loss": 0.9068, + "learning_rate": 1.1821281608744406e-07, + "loss": 0.7421, "step": 33612 }, { - "epoch": 0.9538308740068104, + "epoch": 0.9525064467681147, "grad_norm": 0.0, - "learning_rate": 1.1159391514374951e-07, - "loss": 0.8085, + "learning_rate": 1.1807215483354506e-07, + "loss": 0.7768, "step": 33613 }, { - "epoch": 0.9538592508513053, + "epoch": 0.9525347842103772, "grad_norm": 0.0, - "learning_rate": 1.1145703509215955e-07, - "loss": 0.8474, + "learning_rate": 1.1793157681879275e-07, + "loss": 0.8147, "step": 33614 }, { - "epoch": 0.9538876276958003, + "epoch": 0.9525631216526397, "grad_norm": 0.0, - "learning_rate": 1.1132023856953667e-07, - "loss": 0.7033, + "learning_rate": 1.1779108204437285e-07, + "loss": 0.7811, "step": 33615 }, { - "epoch": 0.9539160045402951, + "epoch": 0.9525914590949021, "grad_norm": 0.0, - "learning_rate": 1.1118352557703549e-07, - "loss": 0.7724, + "learning_rate": 1.1765067051146883e-07, + "loss": 0.8709, "step": 33616 }, { - "epoch": 0.95394438138479, + "epoch": 0.9526197965371646, "grad_norm": 0.0, - "learning_rate": 1.1104689611581177e-07, - "loss": 0.7765, + "learning_rate": 1.1751034222126312e-07, + "loss": 0.798, "step": 33617 }, { - "epoch": 0.953972758229285, + "epoch": 0.952648133979427, "grad_norm": 0.0, - "learning_rate": 1.1091035018702013e-07, - "loss": 0.8216, + "learning_rate": 1.1737009717493697e-07, + "loss": 0.7753, "step": 33618 }, { - "epoch": 0.9540011350737798, + "epoch": 0.9526764714216894, "grad_norm": 0.0, - "learning_rate": 1.1077388779181297e-07, - "loss": 0.818, + "learning_rate": 1.1722993537367278e-07, + "loss": 0.7692, "step": 33619 }, { - "epoch": 0.9540295119182747, + "epoch": 0.9527048088639519, "grad_norm": 0.0, - "learning_rate": 1.1063750893134273e-07, - "loss": 0.6754, + "learning_rate": 1.170898568186507e-07, + "loss": 0.8381, "step": 33620 }, { - "epoch": 0.9540578887627695, + "epoch": 0.9527331463062144, "grad_norm": 0.0, - "learning_rate": 1.1050121360676292e-07, - "loss": 0.6885, + "learning_rate": 1.1694986151104981e-07, + "loss": 0.7284, "step": 33621 }, { - "epoch": 0.9540862656072645, + "epoch": 0.9527614837484769, "grad_norm": 0.0, - "learning_rate": 1.1036500181922372e-07, - "loss": 0.7679, + "learning_rate": 1.1680994945205137e-07, + "loss": 0.8187, "step": 33622 }, { - "epoch": 0.9541146424517594, + "epoch": 0.9527898211907393, "grad_norm": 0.0, - "learning_rate": 1.1022887356987533e-07, - "loss": 0.7974, + "learning_rate": 1.1667012064283223e-07, + "loss": 0.7469, "step": 33623 }, { - "epoch": 0.9541430192962542, + "epoch": 0.9528181586330018, "grad_norm": 0.0, - "learning_rate": 1.1009282885986793e-07, - "loss": 0.7397, + "learning_rate": 1.1653037508457032e-07, + "loss": 0.7676, "step": 33624 }, { - "epoch": 0.9541713961407492, + "epoch": 0.9528464960752643, "grad_norm": 0.0, - "learning_rate": 1.0995686769035286e-07, - "loss": 0.7272, + "learning_rate": 1.1639071277844472e-07, + "loss": 0.9022, "step": 33625 }, { - "epoch": 0.954199772985244, + "epoch": 0.9528748335175267, "grad_norm": 0.0, - "learning_rate": 1.098209900624747e-07, - "loss": 0.7749, + "learning_rate": 1.1625113372562891e-07, + "loss": 0.7297, "step": 33626 }, { - "epoch": 0.9542281498297389, + "epoch": 0.9529031709597892, "grad_norm": 0.0, - "learning_rate": 1.0968519597738369e-07, - "loss": 0.7898, + "learning_rate": 1.1611163792729974e-07, + "loss": 0.7204, "step": 33627 }, { - "epoch": 0.9542565266742338, + "epoch": 0.9529315084020517, "grad_norm": 0.0, - "learning_rate": 1.0954948543622779e-07, - "loss": 0.8548, + "learning_rate": 1.1597222538463293e-07, + "loss": 0.719, "step": 33628 }, { - "epoch": 0.9542849035187287, + "epoch": 0.9529598458443141, "grad_norm": 0.0, - "learning_rate": 1.0941385844015051e-07, - "loss": 0.8064, + "learning_rate": 1.1583289609880311e-07, + "loss": 0.782, "step": 33629 }, { - "epoch": 0.9543132803632236, + "epoch": 0.9529881832865765, "grad_norm": 0.0, - "learning_rate": 1.0927831499029873e-07, - "loss": 0.7968, + "learning_rate": 1.1569365007098265e-07, + "loss": 0.7742, "step": 33630 }, { - "epoch": 0.9543416572077185, + "epoch": 0.953016520728839, "grad_norm": 0.0, - "learning_rate": 1.091428550878193e-07, - "loss": 0.8369, + "learning_rate": 1.1555448730234509e-07, + "loss": 0.8166, "step": 33631 }, { - "epoch": 0.9543700340522134, + "epoch": 0.9530448581711015, "grad_norm": 0.0, - "learning_rate": 1.0900747873385353e-07, - "loss": 0.8234, + "learning_rate": 1.154154077940628e-07, + "loss": 0.7527, "step": 33632 }, { - "epoch": 0.9543984108967083, + "epoch": 0.9530731956133639, "grad_norm": 0.0, - "learning_rate": 1.0887218592954607e-07, - "loss": 0.8177, + "learning_rate": 1.1527641154730707e-07, + "loss": 0.7554, "step": 33633 }, { - "epoch": 0.9544267877412032, + "epoch": 0.9531015330556264, "grad_norm": 0.0, - "learning_rate": 1.0873697667604155e-07, - "loss": 0.7665, + "learning_rate": 1.1513749856324807e-07, + "loss": 0.7451, "step": 33634 }, { - "epoch": 0.9544551645856981, + "epoch": 0.9531298704978889, "grad_norm": 0.0, - "learning_rate": 1.0860185097447907e-07, - "loss": 0.7253, + "learning_rate": 1.149986688430571e-07, + "loss": 0.8812, "step": 33635 }, { - "epoch": 0.954483541430193, + "epoch": 0.9531582079401513, "grad_norm": 0.0, - "learning_rate": 1.0846680882600102e-07, - "loss": 0.8799, + "learning_rate": 1.1485992238790322e-07, + "loss": 0.7935, "step": 33636 }, { - "epoch": 0.9545119182746878, + "epoch": 0.9531865453824138, "grad_norm": 0.0, - "learning_rate": 1.0833185023174875e-07, - "loss": 0.7831, + "learning_rate": 1.1472125919895438e-07, + "loss": 0.779, "step": 33637 }, { - "epoch": 0.9545402951191827, + "epoch": 0.9532148828246763, "grad_norm": 0.0, - "learning_rate": 1.0819697519286243e-07, - "loss": 0.7791, + "learning_rate": 1.1458267927738077e-07, + "loss": 0.7516, "step": 33638 }, { - "epoch": 0.9545686719636777, + "epoch": 0.9532432202669388, "grad_norm": 0.0, - "learning_rate": 1.0806218371048116e-07, - "loss": 0.765, + "learning_rate": 1.1444418262434587e-07, + "loss": 0.8848, "step": 33639 }, { - "epoch": 0.9545970488081725, + "epoch": 0.9532715577092011, "grad_norm": 0.0, - "learning_rate": 1.0792747578574291e-07, - "loss": 0.8464, + "learning_rate": 1.1430576924101988e-07, + "loss": 0.8255, "step": 33640 }, { - "epoch": 0.9546254256526674, + "epoch": 0.9532998951514636, "grad_norm": 0.0, - "learning_rate": 1.0779285141978568e-07, - "loss": 0.8666, + "learning_rate": 1.1416743912856631e-07, + "loss": 0.835, "step": 33641 }, { - "epoch": 0.9546538024971624, + "epoch": 0.9533282325937261, "grad_norm": 0.0, - "learning_rate": 1.0765831061374743e-07, - "loss": 0.7937, + "learning_rate": 1.1402919228815201e-07, + "loss": 0.7805, "step": 33642 }, { - "epoch": 0.9546821793416572, + "epoch": 0.9533565700359885, "grad_norm": 0.0, - "learning_rate": 1.0752385336876392e-07, - "loss": 0.8244, + "learning_rate": 1.138910287209416e-07, + "loss": 0.764, "step": 33643 }, { - "epoch": 0.9547105561861521, + "epoch": 0.953384907478251, "grad_norm": 0.0, - "learning_rate": 1.0738947968597092e-07, - "loss": 0.6858, + "learning_rate": 1.1375294842809748e-07, + "loss": 0.7609, "step": 33644 }, { - "epoch": 0.9547389330306469, + "epoch": 0.9534132449205135, "grad_norm": 0.0, - "learning_rate": 1.0725518956650527e-07, - "loss": 0.7774, + "learning_rate": 1.1361495141078316e-07, + "loss": 0.8125, "step": 33645 }, { - "epoch": 0.9547673098751419, + "epoch": 0.953441582362776, "grad_norm": 0.0, - "learning_rate": 1.071209830114972e-07, - "loss": 0.8065, + "learning_rate": 1.1347703767016216e-07, + "loss": 0.8113, "step": 33646 }, { - "epoch": 0.9547956867196368, + "epoch": 0.9534699198050384, "grad_norm": 0.0, - "learning_rate": 1.0698686002208469e-07, - "loss": 0.7672, + "learning_rate": 1.1333920720739466e-07, + "loss": 0.7354, "step": 33647 }, { - "epoch": 0.9548240635641316, + "epoch": 0.9534982572473009, "grad_norm": 0.0, - "learning_rate": 1.0685282059939795e-07, - "loss": 0.8134, + "learning_rate": 1.1320146002364307e-07, + "loss": 0.8283, "step": 33648 }, { - "epoch": 0.9548524404086266, + "epoch": 0.9535265946895634, "grad_norm": 0.0, - "learning_rate": 1.067188647445705e-07, - "loss": 0.7494, + "learning_rate": 1.1306379612006646e-07, + "loss": 0.8473, "step": 33649 }, { - "epoch": 0.9548808172531215, + "epoch": 0.9535549321318257, "grad_norm": 0.0, - "learning_rate": 1.0658499245873366e-07, - "loss": 0.7559, + "learning_rate": 1.1292621549782501e-07, + "loss": 0.8989, "step": 33650 }, { - "epoch": 0.9549091940976163, + "epoch": 0.9535832695740882, "grad_norm": 0.0, - "learning_rate": 1.0645120374301876e-07, - "loss": 0.7657, + "learning_rate": 1.1278871815807668e-07, + "loss": 0.86, "step": 33651 }, { - "epoch": 0.9549375709421113, + "epoch": 0.9536116070163507, "grad_norm": 0.0, - "learning_rate": 1.0631749859855379e-07, - "loss": 0.8922, + "learning_rate": 1.1265130410198167e-07, + "loss": 0.7769, "step": 33652 }, { - "epoch": 0.9549659477866062, + "epoch": 0.9536399444586132, "grad_norm": 0.0, - "learning_rate": 1.0618387702647004e-07, - "loss": 0.7406, + "learning_rate": 1.125139733306957e-07, + "loss": 0.774, "step": 33653 }, { - "epoch": 0.954994324631101, + "epoch": 0.9536682819008756, "grad_norm": 0.0, - "learning_rate": 1.0605033902789663e-07, - "loss": 0.8041, + "learning_rate": 1.1237672584537673e-07, + "loss": 0.7509, "step": 33654 }, { - "epoch": 0.9550227014755959, + "epoch": 0.9536966193431381, "grad_norm": 0.0, - "learning_rate": 1.0591688460396043e-07, - "loss": 0.7959, + "learning_rate": 1.1223956164717941e-07, + "loss": 0.7271, "step": 33655 }, { - "epoch": 0.9550510783200908, + "epoch": 0.9537249567854006, "grad_norm": 0.0, - "learning_rate": 1.0578351375578943e-07, - "loss": 0.9026, + "learning_rate": 1.121024807372606e-07, + "loss": 0.7688, "step": 33656 }, { - "epoch": 0.9550794551645857, + "epoch": 0.953753294227663, "grad_norm": 0.0, - "learning_rate": 1.056502264845083e-07, - "loss": 0.8536, + "learning_rate": 1.119654831167749e-07, + "loss": 0.7001, "step": 33657 }, { - "epoch": 0.9551078320090806, + "epoch": 0.9537816316699255, "grad_norm": 0.0, - "learning_rate": 1.0551702279124609e-07, - "loss": 0.8538, + "learning_rate": 1.1182856878687476e-07, + "loss": 0.7683, "step": 33658 }, { - "epoch": 0.9551362088535755, + "epoch": 0.953809969112188, "grad_norm": 0.0, - "learning_rate": 1.053839026771264e-07, - "loss": 0.759, + "learning_rate": 1.1169173774871478e-07, + "loss": 0.7309, "step": 33659 }, { - "epoch": 0.9551645856980704, + "epoch": 0.9538383065544503, "grad_norm": 0.0, - "learning_rate": 1.0525086614327274e-07, - "loss": 0.8501, + "learning_rate": 1.1155499000344738e-07, + "loss": 0.7812, "step": 33660 }, { - "epoch": 0.9551929625425652, + "epoch": 0.9538666439967128, "grad_norm": 0.0, - "learning_rate": 1.0511791319080978e-07, - "loss": 0.8064, + "learning_rate": 1.1141832555222387e-07, + "loss": 0.8294, "step": 33661 }, { - "epoch": 0.9552213393870601, + "epoch": 0.9538949814389753, "grad_norm": 0.0, - "learning_rate": 1.0498504382086216e-07, - "loss": 0.7415, + "learning_rate": 1.1128174439619666e-07, + "loss": 0.7067, "step": 33662 }, { - "epoch": 0.9552497162315551, + "epoch": 0.9539233188812378, "grad_norm": 0.0, - "learning_rate": 1.0485225803454902e-07, - "loss": 0.8533, + "learning_rate": 1.1114524653651481e-07, + "loss": 0.7076, "step": 33663 }, { - "epoch": 0.9552780930760499, + "epoch": 0.9539516563235002, "grad_norm": 0.0, - "learning_rate": 1.0471955583299387e-07, - "loss": 0.725, + "learning_rate": 1.1100883197432855e-07, + "loss": 0.686, "step": 33664 }, { - "epoch": 0.9553064699205448, + "epoch": 0.9539799937657627, "grad_norm": 0.0, - "learning_rate": 1.0458693721731916e-07, - "loss": 0.7513, + "learning_rate": 1.1087250071078803e-07, + "loss": 0.7628, "step": 33665 }, { - "epoch": 0.9553348467650398, + "epoch": 0.9540083312080252, "grad_norm": 0.0, - "learning_rate": 1.0445440218864177e-07, - "loss": 0.8997, + "learning_rate": 1.1073625274703903e-07, + "loss": 0.8353, "step": 33666 }, { - "epoch": 0.9553632236095346, + "epoch": 0.9540366686502876, "grad_norm": 0.0, - "learning_rate": 1.0432195074808305e-07, - "loss": 0.7941, + "learning_rate": 1.1060008808423172e-07, + "loss": 0.7039, "step": 33667 }, { - "epoch": 0.9553916004540295, + "epoch": 0.9540650060925501, "grad_norm": 0.0, - "learning_rate": 1.0418958289676096e-07, - "loss": 0.7479, + "learning_rate": 1.1046400672351188e-07, + "loss": 0.807, "step": 33668 }, { - "epoch": 0.9554199772985245, + "epoch": 0.9540933435348126, "grad_norm": 0.0, - "learning_rate": 1.0405729863579573e-07, - "loss": 0.7872, + "learning_rate": 1.1032800866602633e-07, + "loss": 0.7188, "step": 33669 }, { - "epoch": 0.9554483541430193, + "epoch": 0.9541216809770751, "grad_norm": 0.0, - "learning_rate": 1.0392509796630312e-07, - "loss": 0.7983, + "learning_rate": 1.1019209391292085e-07, + "loss": 0.7743, "step": 33670 }, { - "epoch": 0.9554767309875142, + "epoch": 0.9541500184193374, "grad_norm": 0.0, - "learning_rate": 1.0379298088939893e-07, - "loss": 0.6717, + "learning_rate": 1.1005626246534006e-07, + "loss": 0.8174, "step": 33671 }, { - "epoch": 0.955505107832009, + "epoch": 0.9541783558615999, "grad_norm": 0.0, - "learning_rate": 1.0366094740620115e-07, - "loss": 0.8265, + "learning_rate": 1.0992051432442641e-07, + "loss": 0.8035, "step": 33672 }, { - "epoch": 0.955533484676504, + "epoch": 0.9542066933038624, "grad_norm": 0.0, - "learning_rate": 1.0352899751782441e-07, - "loss": 0.682, + "learning_rate": 1.0978484949132672e-07, + "loss": 0.7798, "step": 33673 }, { - "epoch": 0.9555618615209989, + "epoch": 0.9542350307461248, "grad_norm": 0.0, - "learning_rate": 1.0339713122538342e-07, - "loss": 0.7966, + "learning_rate": 1.0964926796718123e-07, + "loss": 0.8283, "step": 33674 }, { - "epoch": 0.9555902383654937, + "epoch": 0.9542633681883873, "grad_norm": 0.0, - "learning_rate": 1.0326534852999059e-07, - "loss": 0.8078, + "learning_rate": 1.0951376975313232e-07, + "loss": 0.8159, "step": 33675 }, { - "epoch": 0.9556186152099887, + "epoch": 0.9542917056306498, "grad_norm": 0.0, - "learning_rate": 1.0313364943276172e-07, - "loss": 0.8755, + "learning_rate": 1.0937835485032133e-07, + "loss": 0.7891, "step": 33676 }, { - "epoch": 0.9556469920544836, + "epoch": 0.9543200430729123, "grad_norm": 0.0, - "learning_rate": 1.0300203393480701e-07, - "loss": 0.85, + "learning_rate": 1.0924302325988956e-07, + "loss": 0.7955, "step": 33677 }, { - "epoch": 0.9556753688989784, + "epoch": 0.9543483805151747, "grad_norm": 0.0, - "learning_rate": 1.0287050203723892e-07, - "loss": 0.8218, + "learning_rate": 1.091077749829772e-07, + "loss": 0.9022, "step": 33678 }, { - "epoch": 0.9557037457434733, + "epoch": 0.9543767179574372, "grad_norm": 0.0, - "learning_rate": 1.0273905374116877e-07, - "loss": 0.8141, + "learning_rate": 1.0897261002072223e-07, + "loss": 0.8713, "step": 33679 }, { - "epoch": 0.9557321225879682, + "epoch": 0.9544050553996997, "grad_norm": 0.0, - "learning_rate": 1.026076890477068e-07, - "loss": 0.8211, + "learning_rate": 1.0883752837426486e-07, + "loss": 0.8207, "step": 33680 }, { - "epoch": 0.9557604994324631, + "epoch": 0.954433392841962, "grad_norm": 0.0, - "learning_rate": 1.024764079579621e-07, - "loss": 0.7161, + "learning_rate": 1.0870253004474086e-07, + "loss": 0.8275, "step": 33681 }, { - "epoch": 0.955788876276958, + "epoch": 0.9544617302842245, "grad_norm": 0.0, - "learning_rate": 1.0234521047304491e-07, - "loss": 0.7842, + "learning_rate": 1.0856761503328816e-07, + "loss": 0.7086, "step": 33682 }, { - "epoch": 0.9558172531214529, + "epoch": 0.954490067726487, "grad_norm": 0.0, - "learning_rate": 1.0221409659406101e-07, - "loss": 0.8197, + "learning_rate": 1.0843278334104479e-07, + "loss": 0.839, "step": 33683 }, { - "epoch": 0.9558456299659478, + "epoch": 0.9545184051687494, "grad_norm": 0.0, - "learning_rate": 1.0208306632212061e-07, - "loss": 0.7664, + "learning_rate": 1.0829803496914537e-07, + "loss": 0.7709, "step": 33684 }, { - "epoch": 0.9558740068104427, + "epoch": 0.9545467426110119, "grad_norm": 0.0, - "learning_rate": 1.0195211965832951e-07, - "loss": 0.8445, + "learning_rate": 1.0816336991872345e-07, + "loss": 0.8613, "step": 33685 }, { - "epoch": 0.9559023836549376, + "epoch": 0.9545750800532744, "grad_norm": 0.0, - "learning_rate": 1.0182125660379461e-07, - "loss": 0.8136, + "learning_rate": 1.0802878819091589e-07, + "loss": 0.7253, "step": 33686 }, { - "epoch": 0.9559307604994325, + "epoch": 0.9546034174955369, "grad_norm": 0.0, - "learning_rate": 1.0169047715961944e-07, - "loss": 0.8655, + "learning_rate": 1.0789428978685512e-07, + "loss": 0.9132, "step": 33687 }, { - "epoch": 0.9559591373439273, + "epoch": 0.9546317549377993, "grad_norm": 0.0, - "learning_rate": 1.0155978132691091e-07, - "loss": 0.823, + "learning_rate": 1.0775987470767357e-07, + "loss": 0.7631, "step": 33688 }, { - "epoch": 0.9559875141884222, + "epoch": 0.9546600923800618, "grad_norm": 0.0, - "learning_rate": 1.0142916910677036e-07, - "loss": 0.8488, + "learning_rate": 1.0762554295450367e-07, + "loss": 0.8334, "step": 33689 }, { - "epoch": 0.9560158910329172, + "epoch": 0.9546884298223243, "grad_norm": 0.0, - "learning_rate": 1.0129864050030357e-07, - "loss": 0.7926, + "learning_rate": 1.0749129452847785e-07, + "loss": 0.835, "step": 33690 }, { - "epoch": 0.956044267877412, + "epoch": 0.9547167672645867, "grad_norm": 0.0, - "learning_rate": 1.011681955086119e-07, - "loss": 0.7953, + "learning_rate": 1.0735712943072629e-07, + "loss": 0.7221, "step": 33691 }, { - "epoch": 0.9560726447219069, + "epoch": 0.9547451047068491, "grad_norm": 0.0, - "learning_rate": 1.0103783413279777e-07, - "loss": 0.7788, + "learning_rate": 1.0722304766237923e-07, + "loss": 0.8436, "step": 33692 }, { - "epoch": 0.9561010215664019, + "epoch": 0.9547734421491116, "grad_norm": 0.0, - "learning_rate": 1.0090755637396254e-07, - "loss": 0.8858, + "learning_rate": 1.0708904922456575e-07, + "loss": 0.8509, "step": 33693 }, { - "epoch": 0.9561293984108967, + "epoch": 0.9548017795913741, "grad_norm": 0.0, - "learning_rate": 1.0077736223320422e-07, - "loss": 0.7874, + "learning_rate": 1.0695513411841496e-07, + "loss": 0.8183, "step": 33694 }, { - "epoch": 0.9561577752553916, + "epoch": 0.9548301170336365, "grad_norm": 0.0, - "learning_rate": 1.0064725171162636e-07, - "loss": 0.7003, + "learning_rate": 1.0682130234505372e-07, + "loss": 0.7014, "step": 33695 }, { - "epoch": 0.9561861520998864, + "epoch": 0.954858454475899, "grad_norm": 0.0, - "learning_rate": 1.0051722481032589e-07, - "loss": 0.8563, + "learning_rate": 1.0668755390561225e-07, + "loss": 0.7142, "step": 33696 }, { - "epoch": 0.9562145289443814, + "epoch": 0.9548867919181615, "grad_norm": 0.0, - "learning_rate": 1.0038728153040078e-07, - "loss": 0.7898, + "learning_rate": 1.0655388880121408e-07, + "loss": 0.8057, "step": 33697 }, { - "epoch": 0.9562429057888763, + "epoch": 0.9549151293604239, "grad_norm": 0.0, - "learning_rate": 1.0025742187294907e-07, - "loss": 0.7421, + "learning_rate": 1.0642030703298722e-07, + "loss": 0.8464, "step": 33698 }, { - "epoch": 0.9562712826333711, + "epoch": 0.9549434668026864, "grad_norm": 0.0, - "learning_rate": 1.0012764583906875e-07, - "loss": 0.8708, + "learning_rate": 1.062868086020552e-07, + "loss": 0.742, "step": 33699 }, { - "epoch": 0.9562996594778661, + "epoch": 0.9549718042449489, "grad_norm": 0.0, - "learning_rate": 9.999795342985452e-08, - "loss": 0.8411, + "learning_rate": 1.061533935095438e-07, + "loss": 0.7853, "step": 33700 }, { - "epoch": 0.956328036322361, + "epoch": 0.9550001416872113, "grad_norm": 0.0, - "learning_rate": 9.986834464640328e-08, - "loss": 0.8276, + "learning_rate": 1.0602006175657653e-07, + "loss": 0.7879, "step": 33701 }, { - "epoch": 0.9563564131668558, + "epoch": 0.9550284791294738, "grad_norm": 0.0, - "learning_rate": 9.973881948980856e-08, - "loss": 0.843, + "learning_rate": 1.0588681334427586e-07, + "loss": 0.7887, "step": 33702 }, { - "epoch": 0.9563847900113507, + "epoch": 0.9550568165717362, "grad_norm": 0.0, - "learning_rate": 9.960937796116621e-08, - "loss": 0.784, + "learning_rate": 1.0575364827376533e-07, + "loss": 0.7198, "step": 33703 }, { - "epoch": 0.9564131668558457, + "epoch": 0.9550851540139987, "grad_norm": 0.0, - "learning_rate": 9.948002006156754e-08, - "loss": 0.7569, + "learning_rate": 1.0562056654616515e-07, + "loss": 0.7976, "step": 33704 }, { - "epoch": 0.9564415437003405, + "epoch": 0.9551134914562611, "grad_norm": 0.0, - "learning_rate": 9.935074579210613e-08, - "loss": 0.7578, + "learning_rate": 1.0548756816259775e-07, + "loss": 0.7467, "step": 33705 }, { - "epoch": 0.9564699205448354, + "epoch": 0.9551418288985236, "grad_norm": 0.0, - "learning_rate": 9.922155515387444e-08, - "loss": 0.7729, + "learning_rate": 1.0535465312418225e-07, + "loss": 0.7184, "step": 33706 }, { - "epoch": 0.9564982973893303, + "epoch": 0.9551701663407861, "grad_norm": 0.0, - "learning_rate": 9.90924481479627e-08, - "loss": 0.8276, + "learning_rate": 1.0522182143203886e-07, + "loss": 0.791, "step": 33707 }, { - "epoch": 0.9565266742338252, + "epoch": 0.9551985037830485, "grad_norm": 0.0, - "learning_rate": 9.896342477546339e-08, - "loss": 0.8434, + "learning_rate": 1.0508907308728666e-07, + "loss": 0.7671, "step": 33708 }, { - "epoch": 0.9565550510783201, + "epoch": 0.955226841225311, "grad_norm": 0.0, - "learning_rate": 9.883448503746451e-08, - "loss": 0.789, + "learning_rate": 1.0495640809104257e-07, + "loss": 0.8255, "step": 33709 }, { - "epoch": 0.956583427922815, + "epoch": 0.9552551786675735, "grad_norm": 0.0, - "learning_rate": 9.870562893505519e-08, - "loss": 0.7062, + "learning_rate": 1.048238264444268e-07, + "loss": 0.8889, "step": 33710 }, { - "epoch": 0.9566118047673099, + "epoch": 0.955283516109836, "grad_norm": 0.0, - "learning_rate": 9.857685646932569e-08, - "loss": 0.8322, + "learning_rate": 1.0469132814855287e-07, + "loss": 0.7067, "step": 33711 }, { - "epoch": 0.9566401816118048, + "epoch": 0.9553118535520984, "grad_norm": 0.0, - "learning_rate": 9.844816764136179e-08, - "loss": 0.9138, + "learning_rate": 1.0455891320453992e-07, + "loss": 0.8001, "step": 33712 }, { - "epoch": 0.9566685584562996, + "epoch": 0.9553401909943608, "grad_norm": 0.0, - "learning_rate": 9.831956245225149e-08, - "loss": 0.896, + "learning_rate": 1.044265816135015e-07, + "loss": 0.8063, "step": 33713 }, { - "epoch": 0.9566969353007946, + "epoch": 0.9553685284366233, "grad_norm": 0.0, - "learning_rate": 9.819104090308062e-08, - "loss": 0.7607, + "learning_rate": 1.0429433337655115e-07, + "loss": 0.892, "step": 33714 }, { - "epoch": 0.9567253121452894, + "epoch": 0.9553968658788857, "grad_norm": 0.0, - "learning_rate": 9.806260299493498e-08, - "loss": 0.8938, + "learning_rate": 1.0416216849480576e-07, + "loss": 0.8249, "step": 33715 }, { - "epoch": 0.9567536889897843, + "epoch": 0.9554252033211482, "grad_norm": 0.0, - "learning_rate": 9.793424872890034e-08, - "loss": 0.8135, + "learning_rate": 1.0403008696937666e-07, + "loss": 0.8597, "step": 33716 }, { - "epoch": 0.9567820658342793, + "epoch": 0.9554535407634107, "grad_norm": 0.0, - "learning_rate": 9.78059781060603e-08, - "loss": 0.8353, + "learning_rate": 1.0389808880137742e-07, + "loss": 0.846, "step": 33717 }, { - "epoch": 0.9568104426787741, + "epoch": 0.9554818782056732, "grad_norm": 0.0, - "learning_rate": 9.767779112749731e-08, - "loss": 0.8519, + "learning_rate": 1.0376617399191935e-07, + "loss": 0.8318, "step": 33718 }, { - "epoch": 0.956838819523269, + "epoch": 0.9555102156479356, "grad_norm": 0.0, - "learning_rate": 9.754968779429719e-08, - "loss": 0.7505, + "learning_rate": 1.0363434254211269e-07, + "loss": 0.8736, "step": 33719 }, { - "epoch": 0.9568671963677639, + "epoch": 0.9555385530901981, "grad_norm": 0.0, - "learning_rate": 9.742166810753795e-08, - "loss": 0.6792, + "learning_rate": 1.0350259445306987e-07, + "loss": 0.7622, "step": 33720 }, { - "epoch": 0.9568955732122588, + "epoch": 0.9555668905324606, "grad_norm": 0.0, - "learning_rate": 9.729373206830428e-08, - "loss": 0.7687, + "learning_rate": 1.0337092972589891e-07, + "loss": 0.8968, "step": 33721 }, { - "epoch": 0.9569239500567537, + "epoch": 0.955595227974723, "grad_norm": 0.0, - "learning_rate": 9.716587967767532e-08, - "loss": 0.801, + "learning_rate": 1.0323934836171001e-07, + "loss": 0.8119, "step": 33722 }, { - "epoch": 0.9569523269012485, + "epoch": 0.9556235654169855, "grad_norm": 0.0, - "learning_rate": 9.703811093673022e-08, - "loss": 0.9086, + "learning_rate": 1.031078503616112e-07, + "loss": 0.8463, "step": 33723 }, { - "epoch": 0.9569807037457435, + "epoch": 0.955651902859248, "grad_norm": 0.0, - "learning_rate": 9.69104258465503e-08, - "loss": 0.7722, + "learning_rate": 1.0297643572670935e-07, + "loss": 0.6695, "step": 33724 }, { - "epoch": 0.9570090805902384, + "epoch": 0.9556802403015103, "grad_norm": 0.0, - "learning_rate": 9.678282440821363e-08, - "loss": 0.7766, + "learning_rate": 1.0284510445811357e-07, + "loss": 0.7099, "step": 33725 }, { - "epoch": 0.9570374574347332, + "epoch": 0.9557085777437728, "grad_norm": 0.0, - "learning_rate": 9.66553066227971e-08, - "loss": 0.7003, + "learning_rate": 1.0271385655692745e-07, + "loss": 0.8047, "step": 33726 }, { - "epoch": 0.9570658342792282, + "epoch": 0.9557369151860353, "grad_norm": 0.0, - "learning_rate": 9.652787249137874e-08, - "loss": 0.8416, + "learning_rate": 1.0258269202425896e-07, + "loss": 0.7967, "step": 33727 }, { - "epoch": 0.9570942111237231, + "epoch": 0.9557652526282978, "grad_norm": 0.0, - "learning_rate": 9.640052201503436e-08, - "loss": 0.83, + "learning_rate": 1.024516108612117e-07, + "loss": 0.7832, "step": 33728 }, { - "epoch": 0.9571225879682179, + "epoch": 0.9557935900705602, "grad_norm": 0.0, - "learning_rate": 9.627325519483865e-08, - "loss": 0.801, + "learning_rate": 1.0232061306888918e-07, + "loss": 0.6953, "step": 33729 }, { - "epoch": 0.9571509648127128, + "epoch": 0.9558219275128227, "grad_norm": 0.0, - "learning_rate": 9.614607203186966e-08, - "loss": 0.7729, + "learning_rate": 1.02189698648395e-07, + "loss": 0.901, "step": 33730 }, { - "epoch": 0.9571793416572077, + "epoch": 0.9558502649550852, "grad_norm": 0.0, - "learning_rate": 9.601897252719983e-08, - "loss": 0.829, + "learning_rate": 1.0205886760083383e-07, + "loss": 0.6466, "step": 33731 }, { - "epoch": 0.9572077185017026, + "epoch": 0.9558786023973476, "grad_norm": 0.0, - "learning_rate": 9.589195668190277e-08, - "loss": 0.8205, + "learning_rate": 1.0192811992730478e-07, + "loss": 0.7294, "step": 33732 }, { - "epoch": 0.9572360953461975, + "epoch": 0.9559069398396101, "grad_norm": 0.0, - "learning_rate": 9.576502449705205e-08, - "loss": 0.9095, + "learning_rate": 1.017974556289114e-07, + "loss": 0.7663, "step": 33733 }, { - "epoch": 0.9572644721906924, + "epoch": 0.9559352772818726, "grad_norm": 0.0, - "learning_rate": 9.563817597371793e-08, - "loss": 0.7699, + "learning_rate": 1.0166687470675395e-07, + "loss": 0.7323, "step": 33734 }, { - "epoch": 0.9572928490351873, + "epoch": 0.955963614724135, "grad_norm": 0.0, - "learning_rate": 9.551141111297402e-08, - "loss": 0.7014, + "learning_rate": 1.0153637716193154e-07, + "loss": 0.8777, "step": 33735 }, { - "epoch": 0.9573212258796822, + "epoch": 0.9559919521663974, "grad_norm": 0.0, - "learning_rate": 9.538472991589164e-08, - "loss": 0.8281, + "learning_rate": 1.014059629955455e-07, + "loss": 0.821, "step": 33736 }, { - "epoch": 0.957349602724177, + "epoch": 0.9560202896086599, "grad_norm": 0.0, - "learning_rate": 9.525813238353887e-08, - "loss": 0.8419, + "learning_rate": 1.0127563220869052e-07, + "loss": 0.8253, "step": 33737 }, { - "epoch": 0.957377979568672, + "epoch": 0.9560486270509224, "grad_norm": 0.0, - "learning_rate": 9.513161851698594e-08, - "loss": 0.8703, + "learning_rate": 1.0114538480246794e-07, + "loss": 0.761, "step": 33738 }, { - "epoch": 0.9574063564131668, + "epoch": 0.9560769644931848, "grad_norm": 0.0, - "learning_rate": 9.500518831730087e-08, - "loss": 0.8571, + "learning_rate": 1.0101522077797354e-07, + "loss": 0.8113, "step": 33739 }, { - "epoch": 0.9574347332576617, + "epoch": 0.9561053019354473, "grad_norm": 0.0, - "learning_rate": 9.487884178555284e-08, - "loss": 0.7215, + "learning_rate": 1.0088514013630424e-07, + "loss": 0.8237, "step": 33740 }, { - "epoch": 0.9574631101021567, + "epoch": 0.9561336393777098, "grad_norm": 0.0, - "learning_rate": 9.475257892280876e-08, - "loss": 0.7381, + "learning_rate": 1.007551428785547e-07, + "loss": 0.8439, "step": 33741 }, { - "epoch": 0.9574914869466515, + "epoch": 0.9561619768199723, "grad_norm": 0.0, - "learning_rate": 9.462639973013443e-08, - "loss": 0.7376, + "learning_rate": 1.0062522900582072e-07, + "loss": 0.6616, "step": 33742 }, { - "epoch": 0.9575198637911464, + "epoch": 0.9561903142622347, "grad_norm": 0.0, - "learning_rate": 9.450030420859679e-08, - "loss": 0.7724, + "learning_rate": 1.0049539851919699e-07, + "loss": 0.816, "step": 33743 }, { - "epoch": 0.9575482406356414, + "epoch": 0.9562186517044972, "grad_norm": 0.0, - "learning_rate": 9.437429235925944e-08, - "loss": 0.6968, + "learning_rate": 1.0036565141977594e-07, + "loss": 0.7919, "step": 33744 }, { - "epoch": 0.9575766174801362, + "epoch": 0.9562469891467597, "grad_norm": 0.0, - "learning_rate": 9.424836418318817e-08, - "loss": 0.7792, + "learning_rate": 1.0023598770865117e-07, + "loss": 0.7618, "step": 33745 }, { - "epoch": 0.9576049943246311, + "epoch": 0.956275326589022, "grad_norm": 0.0, - "learning_rate": 9.412251968144547e-08, - "loss": 0.7948, + "learning_rate": 1.0010640738691513e-07, + "loss": 0.8618, "step": 33746 }, { - "epoch": 0.957633371169126, + "epoch": 0.9563036640312845, "grad_norm": 0.0, - "learning_rate": 9.399675885509607e-08, - "loss": 0.7894, + "learning_rate": 9.997691045565916e-08, + "loss": 0.7813, "step": 33747 }, { - "epoch": 0.9576617480136209, + "epoch": 0.956332001473547, "grad_norm": 0.0, - "learning_rate": 9.387108170520131e-08, - "loss": 0.754, + "learning_rate": 9.984749691597351e-08, + "loss": 0.7507, "step": 33748 }, { - "epoch": 0.9576901248581158, + "epoch": 0.9563603389158094, "grad_norm": 0.0, - "learning_rate": 9.374548823282259e-08, - "loss": 0.7943, + "learning_rate": 9.971816676894952e-08, + "loss": 0.8209, "step": 33749 }, { - "epoch": 0.9577185017026106, + "epoch": 0.9563886763580719, "grad_norm": 0.0, - "learning_rate": 9.361997843902238e-08, - "loss": 0.8179, + "learning_rate": 9.958892001567521e-08, + "loss": 0.8712, "step": 33750 }, { - "epoch": 0.9577468785471056, + "epoch": 0.9564170138003344, "grad_norm": 0.0, - "learning_rate": 9.34945523248576e-08, - "loss": 0.8466, + "learning_rate": 9.945975665723972e-08, + "loss": 0.8255, "step": 33751 }, { - "epoch": 0.9577752553916005, + "epoch": 0.9564453512425969, "grad_norm": 0.0, - "learning_rate": 9.336920989139075e-08, - "loss": 0.7855, + "learning_rate": 9.933067669473107e-08, + "loss": 0.7453, "step": 33752 }, { - "epoch": 0.9578036322360953, + "epoch": 0.9564736886848593, "grad_norm": 0.0, - "learning_rate": 9.324395113967877e-08, - "loss": 0.8102, + "learning_rate": 9.920168012923725e-08, + "loss": 0.8209, "step": 33753 }, { - "epoch": 0.9578320090805902, + "epoch": 0.9565020261271218, "grad_norm": 0.0, - "learning_rate": 9.31187760707808e-08, - "loss": 0.7249, + "learning_rate": 9.90727669618441e-08, + "loss": 0.7219, "step": 33754 }, { - "epoch": 0.9578603859250852, + "epoch": 0.9565303635693843, "grad_norm": 0.0, - "learning_rate": 9.299368468575376e-08, - "loss": 0.8186, + "learning_rate": 9.894393719363737e-08, + "loss": 0.8693, "step": 33755 }, { - "epoch": 0.95788876276958, + "epoch": 0.9565587010116466, "grad_norm": 0.0, - "learning_rate": 9.286867698565571e-08, - "loss": 0.7904, + "learning_rate": 9.88151908257029e-08, + "loss": 0.8547, "step": 33756 }, { - "epoch": 0.9579171396140749, + "epoch": 0.9565870384539091, "grad_norm": 0.0, - "learning_rate": 9.274375297154025e-08, - "loss": 0.8332, + "learning_rate": 9.868652785912424e-08, + "loss": 0.8511, "step": 33757 }, { - "epoch": 0.9579455164585698, + "epoch": 0.9566153758961716, "grad_norm": 0.0, - "learning_rate": 9.261891264446321e-08, - "loss": 0.7939, + "learning_rate": 9.85579482949861e-08, + "loss": 0.8428, "step": 33758 }, { - "epoch": 0.9579738933030647, + "epoch": 0.9566437133384341, "grad_norm": 0.0, - "learning_rate": 9.24941560054804e-08, - "loss": 0.7544, + "learning_rate": 9.842945213437094e-08, + "loss": 0.8185, "step": 33759 }, { - "epoch": 0.9580022701475596, + "epoch": 0.9566720507806965, "grad_norm": 0.0, - "learning_rate": 9.23694830556432e-08, - "loss": 0.703, + "learning_rate": 9.830103937836122e-08, + "loss": 0.86, "step": 33760 }, { - "epoch": 0.9580306469920545, + "epoch": 0.956700388222959, "grad_norm": 0.0, - "learning_rate": 9.224489379600854e-08, - "loss": 0.7164, + "learning_rate": 9.81727100280394e-08, + "loss": 0.8765, "step": 33761 }, { - "epoch": 0.9580590238365494, + "epoch": 0.9567287256652215, "grad_norm": 0.0, - "learning_rate": 9.212038822762558e-08, - "loss": 0.7923, + "learning_rate": 9.804446408448576e-08, + "loss": 0.8229, "step": 33762 }, { - "epoch": 0.9580874006810443, + "epoch": 0.9567570631074839, "grad_norm": 0.0, - "learning_rate": 9.199596635154684e-08, - "loss": 0.917, + "learning_rate": 9.791630154878052e-08, + "loss": 0.8101, "step": 33763 }, { - "epoch": 0.9581157775255391, + "epoch": 0.9567854005497464, "grad_norm": 0.0, - "learning_rate": 9.187162816882477e-08, - "loss": 0.8519, + "learning_rate": 9.778822242200281e-08, + "loss": 0.7991, "step": 33764 }, { - "epoch": 0.9581441543700341, + "epoch": 0.9568137379920089, "grad_norm": 0.0, - "learning_rate": 9.174737368050746e-08, - "loss": 0.8869, + "learning_rate": 9.766022670523179e-08, + "loss": 0.7911, "step": 33765 }, { - "epoch": 0.9581725312145289, + "epoch": 0.9568420754342714, "grad_norm": 0.0, - "learning_rate": 9.162320288764515e-08, - "loss": 0.9213, + "learning_rate": 9.753231439954658e-08, + "loss": 0.8131, "step": 33766 }, { - "epoch": 0.9582009080590238, + "epoch": 0.9568704128765337, "grad_norm": 0.0, - "learning_rate": 9.149911579128812e-08, - "loss": 0.8292, + "learning_rate": 9.74044855060241e-08, + "loss": 0.8033, "step": 33767 }, { - "epoch": 0.9582292849035188, + "epoch": 0.9568987503187962, "grad_norm": 0.0, - "learning_rate": 9.13751123924822e-08, - "loss": 0.7248, + "learning_rate": 9.727674002574017e-08, + "loss": 0.8581, "step": 33768 }, { - "epoch": 0.9582576617480136, + "epoch": 0.9569270877610587, "grad_norm": 0.0, - "learning_rate": 9.125119269227656e-08, - "loss": 0.7651, + "learning_rate": 9.714907795977169e-08, + "loss": 0.7588, "step": 33769 }, { - "epoch": 0.9582860385925085, + "epoch": 0.9569554252033211, "grad_norm": 0.0, - "learning_rate": 9.112735669171924e-08, - "loss": 0.7629, + "learning_rate": 9.702149930919446e-08, + "loss": 0.9757, "step": 33770 }, { - "epoch": 0.9583144154370034, + "epoch": 0.9569837626455836, "grad_norm": 0.0, - "learning_rate": 9.100360439185275e-08, - "loss": 0.8536, + "learning_rate": 9.68940040750821e-08, + "loss": 0.8053, "step": 33771 }, { - "epoch": 0.9583427922814983, + "epoch": 0.9570121000878461, "grad_norm": 0.0, - "learning_rate": 9.087993579372401e-08, - "loss": 0.8005, + "learning_rate": 9.676659225850815e-08, + "loss": 0.7213, "step": 33772 }, { - "epoch": 0.9583711691259932, + "epoch": 0.9570404375301085, "grad_norm": 0.0, - "learning_rate": 9.075635089837886e-08, - "loss": 0.7897, + "learning_rate": 9.663926386054734e-08, + "loss": 0.889, "step": 33773 }, { - "epoch": 0.958399545970488, + "epoch": 0.957068774972371, "grad_norm": 0.0, - "learning_rate": 9.063284970685982e-08, - "loss": 0.837, + "learning_rate": 9.651201888227102e-08, + "loss": 0.8972, "step": 33774 }, { - "epoch": 0.958427922814983, + "epoch": 0.9570971124146335, "grad_norm": 0.0, - "learning_rate": 9.050943222021047e-08, - "loss": 0.727, + "learning_rate": 9.638485732475277e-08, + "loss": 0.8253, "step": 33775 }, { - "epoch": 0.9584562996594779, + "epoch": 0.957125449856896, "grad_norm": 0.0, - "learning_rate": 9.038609843947333e-08, - "loss": 0.8403, + "learning_rate": 9.625777918906176e-08, + "loss": 0.7628, "step": 33776 }, { - "epoch": 0.9584846765039727, + "epoch": 0.9571537872991583, "grad_norm": 0.0, - "learning_rate": 9.026284836569088e-08, - "loss": 0.8025, + "learning_rate": 9.613078447626933e-08, + "loss": 0.8382, "step": 33777 }, { - "epoch": 0.9585130533484677, + "epoch": 0.9571821247414208, "grad_norm": 0.0, - "learning_rate": 9.013968199990342e-08, - "loss": 0.704, + "learning_rate": 9.600387318744464e-08, + "loss": 0.7368, "step": 33778 }, { - "epoch": 0.9585414301929626, + "epoch": 0.9572104621836833, "grad_norm": 0.0, - "learning_rate": 9.001659934315121e-08, - "loss": 0.7958, + "learning_rate": 9.587704532365683e-08, + "loss": 0.8051, "step": 33779 }, { - "epoch": 0.9585698070374574, + "epoch": 0.9572387996259457, "grad_norm": 0.0, - "learning_rate": 8.989360039647454e-08, - "loss": 0.8265, + "learning_rate": 9.575030088597503e-08, + "loss": 0.77, "step": 33780 }, { - "epoch": 0.9585981838819523, + "epoch": 0.9572671370682082, "grad_norm": 0.0, - "learning_rate": 8.977068516091258e-08, - "loss": 0.7167, + "learning_rate": 9.56236398754673e-08, + "loss": 0.8452, "step": 33781 }, { - "epoch": 0.9586265607264473, + "epoch": 0.9572954745104707, "grad_norm": 0.0, - "learning_rate": 8.964785363750228e-08, - "loss": 0.8894, + "learning_rate": 9.549706229319832e-08, + "loss": 0.8125, "step": 33782 }, { - "epoch": 0.9586549375709421, + "epoch": 0.9573238119527332, "grad_norm": 0.0, - "learning_rate": 8.95251058272828e-08, - "loss": 0.729, + "learning_rate": 9.537056814023505e-08, + "loss": 0.6729, "step": 33783 }, { - "epoch": 0.958683314415437, + "epoch": 0.9573521493949956, "grad_norm": 0.0, - "learning_rate": 8.940244173128998e-08, - "loss": 0.766, + "learning_rate": 9.524415741764437e-08, + "loss": 0.8428, "step": 33784 }, { - "epoch": 0.9587116912599319, + "epoch": 0.9573804868372581, "grad_norm": 0.0, - "learning_rate": 8.927986135056077e-08, - "loss": 0.7869, + "learning_rate": 9.51178301264899e-08, + "loss": 0.7845, "step": 33785 }, { - "epoch": 0.9587400681044268, + "epoch": 0.9574088242795206, "grad_norm": 0.0, - "learning_rate": 8.915736468612879e-08, - "loss": 0.8708, + "learning_rate": 9.499158626783633e-08, + "loss": 0.9006, "step": 33786 }, { - "epoch": 0.9587684449489217, + "epoch": 0.957437161721783, "grad_norm": 0.0, - "learning_rate": 8.903495173903099e-08, - "loss": 0.8215, + "learning_rate": 9.486542584274616e-08, + "loss": 0.7194, "step": 33787 }, { - "epoch": 0.9587968217934165, + "epoch": 0.9574654991640454, "grad_norm": 0.0, - "learning_rate": 8.891262251029986e-08, - "loss": 0.7918, + "learning_rate": 9.473934885228298e-08, + "loss": 0.7351, "step": 33788 }, { - "epoch": 0.9588251986379115, + "epoch": 0.9574938366063079, "grad_norm": 0.0, - "learning_rate": 8.879037700097015e-08, - "loss": 0.8017, + "learning_rate": 9.461335529750815e-08, + "loss": 0.716, "step": 33789 }, { - "epoch": 0.9588535754824064, + "epoch": 0.9575221740485704, "grad_norm": 0.0, - "learning_rate": 8.866821521207325e-08, - "loss": 0.6861, + "learning_rate": 9.448744517948415e-08, + "loss": 0.7295, "step": 33790 }, { - "epoch": 0.9588819523269012, + "epoch": 0.9575505114908328, "grad_norm": 0.0, - "learning_rate": 8.854613714464166e-08, - "loss": 0.7429, + "learning_rate": 9.436161849927016e-08, + "loss": 0.7607, "step": 33791 }, { - "epoch": 0.9589103291713962, + "epoch": 0.9575788489330953, "grad_norm": 0.0, - "learning_rate": 8.842414279970568e-08, - "loss": 0.7928, + "learning_rate": 9.423587525792644e-08, + "loss": 0.806, "step": 33792 }, { - "epoch": 0.958938706015891, + "epoch": 0.9576071863753578, "grad_norm": 0.0, - "learning_rate": 8.830223217829781e-08, - "loss": 0.8681, + "learning_rate": 9.411021545651322e-08, + "loss": 0.8994, "step": 33793 }, { - "epoch": 0.9589670828603859, + "epoch": 0.9576355238176202, "grad_norm": 0.0, - "learning_rate": 8.8180405281445e-08, - "loss": 0.8107, + "learning_rate": 9.398463909608746e-08, + "loss": 0.8196, "step": 33794 }, { - "epoch": 0.9589954597048809, + "epoch": 0.9576638612598827, "grad_norm": 0.0, - "learning_rate": 8.805866211017866e-08, - "loss": 0.8014, + "learning_rate": 9.385914617770719e-08, + "loss": 0.8805, "step": 33795 }, { - "epoch": 0.9590238365493757, + "epoch": 0.9576921987021452, "grad_norm": 0.0, - "learning_rate": 8.793700266552685e-08, - "loss": 0.7667, + "learning_rate": 9.373373670243158e-08, + "loss": 0.789, "step": 33796 }, { - "epoch": 0.9590522133938706, + "epoch": 0.9577205361444076, "grad_norm": 0.0, - "learning_rate": 8.781542694851653e-08, - "loss": 0.7643, + "learning_rate": 9.36084106713131e-08, + "loss": 0.8477, "step": 33797 }, { - "epoch": 0.9590805902383654, + "epoch": 0.95774887358667, "grad_norm": 0.0, - "learning_rate": 8.769393496017465e-08, - "loss": 0.7806, + "learning_rate": 9.348316808541092e-08, + "loss": 0.7651, "step": 33798 }, { - "epoch": 0.9591089670828604, + "epoch": 0.9577772110289325, "grad_norm": 0.0, - "learning_rate": 8.757252670152816e-08, - "loss": 0.8386, + "learning_rate": 9.335800894577862e-08, + "loss": 0.7736, "step": 33799 }, { - "epoch": 0.9591373439273553, + "epoch": 0.957805548471195, "grad_norm": 0.0, - "learning_rate": 8.74512021736007e-08, - "loss": 0.8017, + "learning_rate": 9.323293325346983e-08, + "loss": 0.7661, "step": 33800 }, { - "epoch": 0.9591657207718501, + "epoch": 0.9578338859134574, "grad_norm": 0.0, - "learning_rate": 8.732996137742034e-08, - "loss": 0.8022, + "learning_rate": 9.310794100953923e-08, + "loss": 0.7645, "step": 33801 }, { - "epoch": 0.9591940976163451, + "epoch": 0.9578622233557199, "grad_norm": 0.0, - "learning_rate": 8.720880431400847e-08, - "loss": 0.8565, + "learning_rate": 9.298303221503935e-08, + "loss": 0.9476, "step": 33802 }, { - "epoch": 0.95922247446084, + "epoch": 0.9578905607979824, "grad_norm": 0.0, - "learning_rate": 8.708773098438872e-08, - "loss": 0.763, + "learning_rate": 9.285820687102264e-08, + "loss": 0.7655, "step": 33803 }, { - "epoch": 0.9592508513053348, + "epoch": 0.9579188982402448, "grad_norm": 0.0, - "learning_rate": 8.696674138958583e-08, - "loss": 0.7926, + "learning_rate": 9.273346497854052e-08, + "loss": 0.7388, "step": 33804 }, { - "epoch": 0.9592792281498297, + "epoch": 0.9579472356825073, "grad_norm": 0.0, - "learning_rate": 8.684583553062009e-08, - "loss": 0.7992, + "learning_rate": 9.260880653864212e-08, + "loss": 0.7987, "step": 33805 }, { - "epoch": 0.9593076049943247, + "epoch": 0.9579755731247698, "grad_norm": 0.0, - "learning_rate": 8.672501340851291e-08, - "loss": 0.8345, + "learning_rate": 9.248423155237884e-08, + "loss": 0.7499, "step": 33806 }, { - "epoch": 0.9593359818388195, + "epoch": 0.9580039105670323, "grad_norm": 0.0, - "learning_rate": 8.66042750242857e-08, - "loss": 0.8158, + "learning_rate": 9.235974002080094e-08, + "loss": 0.7896, "step": 33807 }, { - "epoch": 0.9593643586833144, + "epoch": 0.9580322480092947, "grad_norm": 0.0, - "learning_rate": 8.648362037895764e-08, - "loss": 0.8277, + "learning_rate": 9.223533194495537e-08, + "loss": 0.7645, "step": 33808 }, { - "epoch": 0.9593927355278093, + "epoch": 0.9580605854515571, "grad_norm": 0.0, - "learning_rate": 8.636304947354679e-08, - "loss": 0.7535, + "learning_rate": 9.211100732589129e-08, + "loss": 0.779, "step": 33809 }, { - "epoch": 0.9594211123723042, + "epoch": 0.9580889228938196, "grad_norm": 0.0, - "learning_rate": 8.624256230907458e-08, - "loss": 0.8369, + "learning_rate": 9.198676616465562e-08, + "loss": 0.9678, "step": 33810 }, { - "epoch": 0.9594494892167991, + "epoch": 0.958117260336082, "grad_norm": 0.0, - "learning_rate": 8.612215888655684e-08, - "loss": 0.7511, + "learning_rate": 9.186260846229423e-08, + "loss": 0.7738, "step": 33811 }, { - "epoch": 0.959477866061294, + "epoch": 0.9581455977783445, "grad_norm": 0.0, - "learning_rate": 8.600183920701055e-08, - "loss": 0.8513, + "learning_rate": 9.173853421985291e-08, + "loss": 0.8972, "step": 33812 }, { - "epoch": 0.9595062429057889, + "epoch": 0.958173935220607, "grad_norm": 0.0, - "learning_rate": 8.588160327145268e-08, - "loss": 0.762, + "learning_rate": 9.16145434383775e-08, + "loss": 0.866, "step": 33813 }, { - "epoch": 0.9595346197502838, + "epoch": 0.9582022726628695, "grad_norm": 0.0, - "learning_rate": 8.576145108089795e-08, - "loss": 0.8537, + "learning_rate": 9.149063611891162e-08, + "loss": 0.7182, "step": 33814 }, { - "epoch": 0.9595629965947786, + "epoch": 0.9582306101051319, "grad_norm": 0.0, - "learning_rate": 8.564138263636224e-08, - "loss": 0.8208, + "learning_rate": 9.13668122625011e-08, + "loss": 0.9929, "step": 33815 }, { - "epoch": 0.9595913734392736, + "epoch": 0.9582589475473944, "grad_norm": 0.0, - "learning_rate": 8.552139793886028e-08, - "loss": 0.7588, + "learning_rate": 9.124307187018622e-08, + "loss": 0.8937, "step": 33816 }, { - "epoch": 0.9596197502837684, + "epoch": 0.9582872849896569, "grad_norm": 0.0, - "learning_rate": 8.540149698940347e-08, - "loss": 0.7666, + "learning_rate": 9.111941494301057e-08, + "loss": 0.774, "step": 33817 }, { - "epoch": 0.9596481271282633, + "epoch": 0.9583156224319193, "grad_norm": 0.0, - "learning_rate": 8.52816797890066e-08, - "loss": 0.8232, + "learning_rate": 9.099584148201668e-08, + "loss": 0.7815, "step": 33818 }, { - "epoch": 0.9596765039727583, + "epoch": 0.9583439598741818, "grad_norm": 0.0, - "learning_rate": 8.516194633868213e-08, - "loss": 0.7105, + "learning_rate": 9.08723514882437e-08, + "loss": 0.8315, "step": 33819 }, { - "epoch": 0.9597048808172531, + "epoch": 0.9583722973164442, "grad_norm": 0.0, - "learning_rate": 8.50422966394382e-08, - "loss": 0.7874, + "learning_rate": 9.074894496273301e-08, + "loss": 0.6975, "step": 33820 }, { - "epoch": 0.959733257661748, + "epoch": 0.9584006347587066, "grad_norm": 0.0, - "learning_rate": 8.49227306922884e-08, - "loss": 0.7616, + "learning_rate": 9.062562190652269e-08, + "loss": 0.8087, "step": 33821 }, { - "epoch": 0.9597616345062429, + "epoch": 0.9584289722009691, "grad_norm": 0.0, - "learning_rate": 8.480324849824306e-08, - "loss": 0.7759, + "learning_rate": 9.0502382320653e-08, + "loss": 0.8992, "step": 33822 }, { - "epoch": 0.9597900113507378, + "epoch": 0.9584573096432316, "grad_norm": 0.0, - "learning_rate": 8.468385005830914e-08, - "loss": 0.779, + "learning_rate": 9.037922620616091e-08, + "loss": 0.8283, "step": 33823 }, { - "epoch": 0.9598183881952327, + "epoch": 0.9584856470854941, "grad_norm": 0.0, - "learning_rate": 8.456453537349697e-08, - "loss": 0.8511, + "learning_rate": 9.025615356408557e-08, + "loss": 0.875, "step": 33824 }, { - "epoch": 0.9598467650397275, + "epoch": 0.9585139845277565, "grad_norm": 0.0, - "learning_rate": 8.444530444481458e-08, - "loss": 0.8855, + "learning_rate": 9.013316439546171e-08, + "loss": 0.7475, "step": 33825 }, { - "epoch": 0.9598751418842225, + "epoch": 0.958542321970019, "grad_norm": 0.0, - "learning_rate": 8.432615727326676e-08, - "loss": 0.8901, + "learning_rate": 9.001025870132629e-08, + "loss": 0.723, "step": 33826 }, { - "epoch": 0.9599035187287174, + "epoch": 0.9585706594122815, "grad_norm": 0.0, - "learning_rate": 8.42070938598638e-08, - "loss": 0.8695, + "learning_rate": 8.988743648271514e-08, + "loss": 0.6757, "step": 33827 }, { - "epoch": 0.9599318955732122, + "epoch": 0.9585989968545439, "grad_norm": 0.0, - "learning_rate": 8.408811420560826e-08, - "loss": 0.8083, + "learning_rate": 8.976469774066187e-08, + "loss": 0.7652, "step": 33828 }, { - "epoch": 0.9599602724177072, + "epoch": 0.9586273342968064, "grad_norm": 0.0, - "learning_rate": 8.396921831150484e-08, - "loss": 0.7608, + "learning_rate": 8.964204247620012e-08, + "loss": 0.7584, "step": 33829 }, { - "epoch": 0.9599886492622021, + "epoch": 0.9586556717390688, "grad_norm": 0.0, - "learning_rate": 8.385040617856166e-08, - "loss": 0.8414, + "learning_rate": 8.951947069036349e-08, + "loss": 0.8178, "step": 33830 }, { - "epoch": 0.9600170261066969, + "epoch": 0.9586840091813313, "grad_norm": 0.0, - "learning_rate": 8.373167780777791e-08, - "loss": 0.6832, + "learning_rate": 8.939698238418559e-08, + "loss": 0.8071, "step": 33831 }, { - "epoch": 0.9600454029511918, + "epoch": 0.9587123466235937, "grad_norm": 0.0, - "learning_rate": 8.361303320015945e-08, - "loss": 0.8604, + "learning_rate": 8.927457755869562e-08, + "loss": 0.8116, "step": 33832 }, { - "epoch": 0.9600737797956868, + "epoch": 0.9587406840658562, "grad_norm": 0.0, - "learning_rate": 8.349447235670771e-08, - "loss": 0.6407, + "learning_rate": 8.915225621492718e-08, + "loss": 0.8046, "step": 33833 }, { - "epoch": 0.9601021566401816, + "epoch": 0.9587690215081187, "grad_norm": 0.0, - "learning_rate": 8.3375995278423e-08, - "loss": 0.772, + "learning_rate": 8.903001835390946e-08, + "loss": 0.877, "step": 33834 }, { - "epoch": 0.9601305334846765, + "epoch": 0.9587973589503811, "grad_norm": 0.0, - "learning_rate": 8.325760196630673e-08, - "loss": 0.8157, + "learning_rate": 8.890786397667161e-08, + "loss": 0.8457, "step": 33835 }, { - "epoch": 0.9601589103291714, + "epoch": 0.9588256963926436, "grad_norm": 0.0, - "learning_rate": 8.313929242136031e-08, - "loss": 0.6735, + "learning_rate": 8.878579308424395e-08, + "loss": 0.7407, "step": 33836 }, { - "epoch": 0.9601872871736663, + "epoch": 0.9588540338349061, "grad_norm": 0.0, - "learning_rate": 8.302106664458187e-08, - "loss": 0.7344, + "learning_rate": 8.866380567765342e-08, + "loss": 0.8474, "step": 33837 }, { - "epoch": 0.9602156640181612, + "epoch": 0.9588823712771686, "grad_norm": 0.0, - "learning_rate": 8.290292463697059e-08, - "loss": 0.7244, + "learning_rate": 8.854190175792921e-08, + "loss": 0.724, "step": 33838 }, { - "epoch": 0.960244040862656, + "epoch": 0.958910708719431, "grad_norm": 0.0, - "learning_rate": 8.278486639952343e-08, - "loss": 0.9345, + "learning_rate": 8.842008132609603e-08, + "loss": 0.7336, "step": 33839 }, { - "epoch": 0.960272417707151, + "epoch": 0.9589390461616935, "grad_norm": 0.0, - "learning_rate": 8.266689193323852e-08, - "loss": 0.9033, + "learning_rate": 8.829834438318196e-08, + "loss": 0.8462, "step": 33840 }, { - "epoch": 0.9603007945516459, + "epoch": 0.958967383603956, "grad_norm": 0.0, - "learning_rate": 8.25490012391117e-08, - "loss": 0.8618, + "learning_rate": 8.817669093021064e-08, + "loss": 0.8768, "step": 33841 }, { - "epoch": 0.9603291713961407, + "epoch": 0.9589957210462183, "grad_norm": 0.0, - "learning_rate": 8.243119431813994e-08, - "loss": 0.8251, + "learning_rate": 8.80551209682079e-08, + "loss": 0.8164, "step": 33842 }, { - "epoch": 0.9603575482406357, + "epoch": 0.9590240584884808, "grad_norm": 0.0, - "learning_rate": 8.231347117131804e-08, - "loss": 0.8074, + "learning_rate": 8.793363449819848e-08, + "loss": 0.7506, "step": 33843 }, { - "epoch": 0.9603859250851305, + "epoch": 0.9590523959307433, "grad_norm": 0.0, - "learning_rate": 8.219583179963964e-08, - "loss": 0.8028, + "learning_rate": 8.781223152120377e-08, + "loss": 0.7319, "step": 33844 }, { - "epoch": 0.9604143019296254, + "epoch": 0.9590807333730057, "grad_norm": 0.0, - "learning_rate": 8.207827620409836e-08, - "loss": 0.7717, + "learning_rate": 8.769091203824743e-08, + "loss": 0.8511, "step": 33845 }, { - "epoch": 0.9604426787741204, + "epoch": 0.9591090708152682, "grad_norm": 0.0, - "learning_rate": 8.196080438568788e-08, - "loss": 0.7946, + "learning_rate": 8.756967605035194e-08, + "loss": 0.8438, "step": 33846 }, { - "epoch": 0.9604710556186152, + "epoch": 0.9591374082575307, "grad_norm": 0.0, - "learning_rate": 8.184341634540183e-08, - "loss": 0.8726, + "learning_rate": 8.744852355853761e-08, + "loss": 0.7777, "step": 33847 }, { - "epoch": 0.9604994324631101, + "epoch": 0.9591657456997932, "grad_norm": 0.0, - "learning_rate": 8.172611208422832e-08, - "loss": 0.8444, + "learning_rate": 8.732745456382586e-08, + "loss": 0.8294, "step": 33848 }, { - "epoch": 0.960527809307605, + "epoch": 0.9591940831420556, "grad_norm": 0.0, - "learning_rate": 8.160889160316099e-08, - "loss": 0.7829, + "learning_rate": 8.720646906723585e-08, + "loss": 0.8973, "step": 33849 }, { - "epoch": 0.9605561861520999, + "epoch": 0.9592224205843181, "grad_norm": 0.0, - "learning_rate": 8.149175490319017e-08, - "loss": 0.7542, + "learning_rate": 8.708556706978566e-08, + "loss": 0.8327, "step": 33850 }, { - "epoch": 0.9605845629965948, + "epoch": 0.9592507580265806, "grad_norm": 0.0, - "learning_rate": 8.137470198530284e-08, - "loss": 0.7732, + "learning_rate": 8.696474857249559e-08, + "loss": 0.7734, "step": 33851 }, { - "epoch": 0.9606129398410896, + "epoch": 0.9592790954688429, "grad_norm": 0.0, - "learning_rate": 8.125773285049044e-08, - "loss": 0.782, + "learning_rate": 8.684401357638261e-08, + "loss": 0.8227, "step": 33852 }, { - "epoch": 0.9606413166855846, + "epoch": 0.9593074329111054, "grad_norm": 0.0, - "learning_rate": 8.114084749973882e-08, - "loss": 0.7865, + "learning_rate": 8.672336208246368e-08, + "loss": 0.8045, "step": 33853 }, { - "epoch": 0.9606696935300795, + "epoch": 0.9593357703533679, "grad_norm": 0.0, - "learning_rate": 8.102404593403612e-08, - "loss": 0.7883, + "learning_rate": 8.660279409175576e-08, + "loss": 0.6377, "step": 33854 }, { - "epoch": 0.9606980703745743, + "epoch": 0.9593641077956304, "grad_norm": 0.0, - "learning_rate": 8.090732815437041e-08, - "loss": 0.8518, + "learning_rate": 8.648230960527249e-08, + "loss": 0.806, "step": 33855 }, { - "epoch": 0.9607264472190692, + "epoch": 0.9593924452378928, "grad_norm": 0.0, - "learning_rate": 8.079069416172646e-08, - "loss": 0.8258, + "learning_rate": 8.636190862403082e-08, + "loss": 0.7925, "step": 33856 }, { - "epoch": 0.9607548240635642, + "epoch": 0.9594207826801553, "grad_norm": 0.0, - "learning_rate": 8.067414395708795e-08, - "loss": 0.7208, + "learning_rate": 8.62415911490444e-08, + "loss": 0.8611, "step": 33857 }, { - "epoch": 0.960783200908059, + "epoch": 0.9594491201224178, "grad_norm": 0.0, - "learning_rate": 8.055767754144295e-08, - "loss": 0.8285, + "learning_rate": 8.612135718132575e-08, + "loss": 0.7748, "step": 33858 }, { - "epoch": 0.9608115777525539, + "epoch": 0.9594774575646802, "grad_norm": 0.0, - "learning_rate": 8.04412949157718e-08, - "loss": 0.8023, + "learning_rate": 8.600120672188739e-08, + "loss": 0.8252, "step": 33859 }, { - "epoch": 0.9608399545970489, + "epoch": 0.9595057950069427, "grad_norm": 0.0, - "learning_rate": 8.032499608105815e-08, - "loss": 0.8226, + "learning_rate": 8.588113977174405e-08, + "loss": 0.8185, "step": 33860 }, { - "epoch": 0.9608683314415437, + "epoch": 0.9595341324492052, "grad_norm": 0.0, - "learning_rate": 8.020878103828678e-08, - "loss": 0.8289, + "learning_rate": 8.576115633190496e-08, + "loss": 0.8151, "step": 33861 }, { - "epoch": 0.9608967082860386, + "epoch": 0.9595624698914677, "grad_norm": 0.0, - "learning_rate": 8.009264978843689e-08, - "loss": 0.7576, + "learning_rate": 8.564125640338039e-08, + "loss": 0.824, "step": 33862 }, { - "epoch": 0.9609250851305334, + "epoch": 0.95959080733373, "grad_norm": 0.0, - "learning_rate": 7.997660233249105e-08, - "loss": 0.7516, + "learning_rate": 8.552143998718177e-08, + "loss": 0.8406, "step": 33863 }, { - "epoch": 0.9609534619750284, + "epoch": 0.9596191447759925, "grad_norm": 0.0, - "learning_rate": 7.986063867142846e-08, - "loss": 0.6981, + "learning_rate": 8.540170708431716e-08, + "loss": 0.7169, "step": 33864 }, { - "epoch": 0.9609818388195233, + "epoch": 0.959647482218255, "grad_norm": 0.0, - "learning_rate": 7.974475880622834e-08, - "loss": 0.7468, + "learning_rate": 8.52820576957969e-08, + "loss": 0.7723, "step": 33865 }, { - "epoch": 0.9610102156640181, + "epoch": 0.9596758196605174, "grad_norm": 0.0, - "learning_rate": 7.962896273787102e-08, - "loss": 0.7226, + "learning_rate": 8.516249182262682e-08, + "loss": 0.8635, "step": 33866 }, { - "epoch": 0.9610385925085131, + "epoch": 0.9597041571027799, "grad_norm": 0.0, - "learning_rate": 7.951325046733238e-08, - "loss": 0.7611, + "learning_rate": 8.5043009465815e-08, + "loss": 0.7605, "step": 33867 }, { - "epoch": 0.961066969353008, + "epoch": 0.9597324945450424, "grad_norm": 0.0, - "learning_rate": 7.939762199559275e-08, - "loss": 0.7592, + "learning_rate": 8.492361062636845e-08, + "loss": 0.8124, "step": 33868 }, { - "epoch": 0.9610953461975028, + "epoch": 0.9597608319873048, "grad_norm": 0.0, - "learning_rate": 7.928207732362692e-08, - "loss": 0.8567, + "learning_rate": 8.480429530529077e-08, + "loss": 0.7897, "step": 33869 }, { - "epoch": 0.9611237230419978, + "epoch": 0.9597891694295673, "grad_norm": 0.0, - "learning_rate": 7.916661645241186e-08, - "loss": 0.7509, + "learning_rate": 8.468506350358896e-08, + "loss": 0.8253, "step": 33870 }, { - "epoch": 0.9611520998864926, + "epoch": 0.9598175068718298, "grad_norm": 0.0, - "learning_rate": 7.905123938292125e-08, - "loss": 0.8344, + "learning_rate": 8.456591522226776e-08, + "loss": 0.8012, "step": 33871 }, { - "epoch": 0.9611804767309875, + "epoch": 0.9598458443140923, "grad_norm": 0.0, - "learning_rate": 7.893594611613208e-08, - "loss": 0.8761, + "learning_rate": 8.44468504623286e-08, + "loss": 0.861, "step": 33872 }, { - "epoch": 0.9612088535754824, + "epoch": 0.9598741817563546, "grad_norm": 0.0, - "learning_rate": 7.882073665301581e-08, - "loss": 0.8047, + "learning_rate": 8.432786922477621e-08, + "loss": 0.8299, "step": 33873 }, { - "epoch": 0.9612372304199773, + "epoch": 0.9599025191986171, "grad_norm": 0.0, - "learning_rate": 7.870561099454721e-08, - "loss": 0.9159, + "learning_rate": 8.420897151061202e-08, + "loss": 0.7935, "step": 33874 }, { - "epoch": 0.9612656072644722, + "epoch": 0.9599308566408796, "grad_norm": 0.0, - "learning_rate": 7.859056914169882e-08, - "loss": 0.8171, + "learning_rate": 8.409015732083748e-08, + "loss": 0.7343, "step": 33875 }, { - "epoch": 0.961293984108967, + "epoch": 0.959959194083142, "grad_norm": 0.0, - "learning_rate": 7.8475611095441e-08, - "loss": 0.8065, + "learning_rate": 8.397142665645508e-08, + "loss": 0.8854, "step": 33876 }, { - "epoch": 0.961322360953462, + "epoch": 0.9599875315254045, "grad_norm": 0.0, - "learning_rate": 7.836073685674628e-08, - "loss": 0.7346, + "learning_rate": 8.385277951846182e-08, + "loss": 0.7505, "step": 33877 }, { - "epoch": 0.9613507377979569, + "epoch": 0.960015868967667, "grad_norm": 0.0, - "learning_rate": 7.824594642658501e-08, - "loss": 0.7669, + "learning_rate": 8.373421590786024e-08, + "loss": 0.8011, "step": 33878 }, { - "epoch": 0.9613791146424517, + "epoch": 0.9600442064099295, "grad_norm": 0.0, - "learning_rate": 7.81312398059253e-08, - "loss": 0.7024, + "learning_rate": 8.36157358256473e-08, + "loss": 0.764, "step": 33879 }, { - "epoch": 0.9614074914869466, + "epoch": 0.9600725438521919, "grad_norm": 0.0, - "learning_rate": 7.801661699573637e-08, - "loss": 0.8635, + "learning_rate": 8.349733927282112e-08, + "loss": 0.7881, "step": 33880 }, { - "epoch": 0.9614358683314416, + "epoch": 0.9601008812944544, "grad_norm": 0.0, - "learning_rate": 7.790207799698857e-08, - "loss": 0.7094, + "learning_rate": 8.337902625037975e-08, + "loss": 0.84, "step": 33881 }, { - "epoch": 0.9614642451759364, + "epoch": 0.9601292187367169, "grad_norm": 0.0, - "learning_rate": 7.778762281064777e-08, - "loss": 0.885, + "learning_rate": 8.326079675931908e-08, + "loss": 0.8147, "step": 33882 }, { - "epoch": 0.9614926220204313, + "epoch": 0.9601575561789792, "grad_norm": 0.0, - "learning_rate": 7.767325143767989e-08, - "loss": 0.8145, + "learning_rate": 8.314265080063499e-08, + "loss": 0.7976, "step": 33883 }, { - "epoch": 0.9615209988649263, + "epoch": 0.9601858936212417, "grad_norm": 0.0, - "learning_rate": 7.755896387905304e-08, - "loss": 0.847, + "learning_rate": 8.302458837532335e-08, + "loss": 0.8198, "step": 33884 }, { - "epoch": 0.9615493757094211, + "epoch": 0.9602142310635042, "grad_norm": 0.0, - "learning_rate": 7.744476013573088e-08, - "loss": 0.7385, + "learning_rate": 8.290660948437779e-08, + "loss": 0.839, "step": 33885 }, { - "epoch": 0.961577752553916, + "epoch": 0.9602425685057667, "grad_norm": 0.0, - "learning_rate": 7.733064020868042e-08, - "loss": 0.6665, + "learning_rate": 8.278871412879196e-08, + "loss": 0.8405, "step": 33886 }, { - "epoch": 0.961606129398411, + "epoch": 0.9602709059480291, "grad_norm": 0.0, - "learning_rate": 7.721660409886201e-08, - "loss": 0.8727, + "learning_rate": 8.267090230956065e-08, + "loss": 0.7859, "step": 33887 }, { - "epoch": 0.9616345062429058, + "epoch": 0.9602992433902916, "grad_norm": 0.0, - "learning_rate": 7.710265180724264e-08, - "loss": 0.749, + "learning_rate": 8.255317402767415e-08, + "loss": 0.7732, "step": 33888 }, { - "epoch": 0.9616628830874007, + "epoch": 0.9603275808325541, "grad_norm": 0.0, - "learning_rate": 7.698878333478266e-08, - "loss": 0.8243, + "learning_rate": 8.243552928412501e-08, + "loss": 0.8132, "step": 33889 }, { - "epoch": 0.9616912599318955, + "epoch": 0.9603559182748165, "grad_norm": 0.0, - "learning_rate": 7.687499868244464e-08, - "loss": 0.8337, + "learning_rate": 8.231796807990356e-08, + "loss": 0.7337, "step": 33890 }, { - "epoch": 0.9617196367763905, + "epoch": 0.960384255717079, "grad_norm": 0.0, - "learning_rate": 7.676129785118891e-08, - "loss": 0.8067, + "learning_rate": 8.220049041600231e-08, + "loss": 0.8089, "step": 33891 }, { - "epoch": 0.9617480136208854, + "epoch": 0.9604125931593415, "grad_norm": 0.0, - "learning_rate": 7.664768084197804e-08, - "loss": 0.7644, + "learning_rate": 8.208309629340827e-08, + "loss": 0.7375, "step": 33892 }, { - "epoch": 0.9617763904653802, + "epoch": 0.9604409306016038, "grad_norm": 0.0, - "learning_rate": 7.653414765576905e-08, - "loss": 0.7479, + "learning_rate": 8.196578571311175e-08, + "loss": 0.763, "step": 33893 }, { - "epoch": 0.9618047673098752, + "epoch": 0.9604692680438663, "grad_norm": 0.0, - "learning_rate": 7.642069829352228e-08, - "loss": 0.7952, + "learning_rate": 8.184855867609976e-08, + "loss": 0.6955, "step": 33894 }, { - "epoch": 0.96183314415437, + "epoch": 0.9604976054861288, "grad_norm": 0.0, - "learning_rate": 7.630733275619585e-08, - "loss": 0.8615, + "learning_rate": 8.173141518336147e-08, + "loss": 0.7424, "step": 33895 }, { - "epoch": 0.9618615209988649, + "epoch": 0.9605259429283913, "grad_norm": 0.0, - "learning_rate": 7.619405104474786e-08, - "loss": 0.8297, + "learning_rate": 8.161435523588168e-08, + "loss": 0.6838, "step": 33896 }, { - "epoch": 0.9618898978433598, + "epoch": 0.9605542803706537, "grad_norm": 0.0, - "learning_rate": 7.608085316013536e-08, - "loss": 0.6254, + "learning_rate": 8.149737883464737e-08, + "loss": 0.8333, "step": 33897 }, { - "epoch": 0.9619182746878547, + "epoch": 0.9605826178129162, "grad_norm": 0.0, - "learning_rate": 7.596773910331313e-08, - "loss": 0.8174, + "learning_rate": 8.138048598064329e-08, + "loss": 0.8483, "step": 33898 }, { - "epoch": 0.9619466515323496, + "epoch": 0.9606109552551787, "grad_norm": 0.0, - "learning_rate": 7.585470887523705e-08, - "loss": 0.8182, + "learning_rate": 8.126367667485535e-08, + "loss": 0.7685, "step": 33899 }, { - "epoch": 0.9619750283768445, + "epoch": 0.9606392926974411, "grad_norm": 0.0, - "learning_rate": 7.574176247686305e-08, - "loss": 0.8813, + "learning_rate": 8.114695091826607e-08, + "loss": 0.796, "step": 33900 }, { - "epoch": 0.9620034052213394, + "epoch": 0.9606676301397036, "grad_norm": 0.0, - "learning_rate": 7.562889990914479e-08, - "loss": 0.7323, + "learning_rate": 8.103030871186024e-08, + "loss": 0.753, "step": 33901 }, { - "epoch": 0.9620317820658343, + "epoch": 0.9606959675819661, "grad_norm": 0.0, - "learning_rate": 7.551612117303597e-08, - "loss": 0.8743, + "learning_rate": 8.091375005661817e-08, + "loss": 0.7613, "step": 33902 }, { - "epoch": 0.9620601589103291, + "epoch": 0.9607243050242286, "grad_norm": 0.0, - "learning_rate": 7.540342626948693e-08, - "loss": 0.8319, + "learning_rate": 8.079727495352352e-08, + "loss": 0.7835, "step": 33903 }, { - "epoch": 0.9620885357548241, + "epoch": 0.960752642466491, "grad_norm": 0.0, - "learning_rate": 7.529081519945248e-08, - "loss": 0.8804, + "learning_rate": 8.068088340355662e-08, + "loss": 0.8477, "step": 33904 }, { - "epoch": 0.962116912599319, + "epoch": 0.9607809799087534, "grad_norm": 0.0, - "learning_rate": 7.517828796388294e-08, - "loss": 0.8461, + "learning_rate": 8.056457540769891e-08, + "loss": 0.796, "step": 33905 }, { - "epoch": 0.9621452894438138, + "epoch": 0.9608093173510159, "grad_norm": 0.0, - "learning_rate": 7.506584456372757e-08, - "loss": 0.8045, + "learning_rate": 8.04483509669285e-08, + "loss": 0.8861, "step": 33906 }, { - "epoch": 0.9621736662883087, + "epoch": 0.9608376547932783, "grad_norm": 0.0, - "learning_rate": 7.495348499993782e-08, - "loss": 0.8791, + "learning_rate": 8.03322100822257e-08, + "loss": 0.7841, "step": 33907 }, { - "epoch": 0.9622020431328037, + "epoch": 0.9608659922355408, "grad_norm": 0.0, - "learning_rate": 7.484120927346184e-08, - "loss": 0.8616, + "learning_rate": 8.021615275456862e-08, + "loss": 0.7726, "step": 33908 }, { - "epoch": 0.9622304199772985, + "epoch": 0.9608943296778033, "grad_norm": 0.0, - "learning_rate": 7.472901738524885e-08, - "loss": 0.8972, + "learning_rate": 8.010017898493316e-08, + "loss": 0.8711, "step": 33909 }, { - "epoch": 0.9622587968217934, + "epoch": 0.9609226671200657, "grad_norm": 0.0, - "learning_rate": 7.461690933624476e-08, - "loss": 0.8133, + "learning_rate": 7.998428877429854e-08, + "loss": 0.6717, "step": 33910 }, { - "epoch": 0.9622871736662884, + "epoch": 0.9609510045623282, "grad_norm": 0.0, - "learning_rate": 7.450488512739884e-08, - "loss": 0.9294, + "learning_rate": 7.986848212363952e-08, + "loss": 0.7998, "step": 33911 }, { - "epoch": 0.9623155505107832, + "epoch": 0.9609793420045907, "grad_norm": 0.0, - "learning_rate": 7.439294475965696e-08, - "loss": 0.7785, + "learning_rate": 7.975275903393309e-08, + "loss": 0.8421, "step": 33912 }, { - "epoch": 0.9623439273552781, + "epoch": 0.9610076794468532, "grad_norm": 0.0, - "learning_rate": 7.428108823396174e-08, - "loss": 0.8501, + "learning_rate": 7.963711950615183e-08, + "loss": 0.8365, "step": 33913 }, { - "epoch": 0.9623723041997729, + "epoch": 0.9610360168891156, "grad_norm": 0.0, - "learning_rate": 7.41693155512624e-08, - "loss": 0.738, + "learning_rate": 7.95215635412705e-08, + "loss": 0.7657, "step": 33914 }, { - "epoch": 0.9624006810442679, + "epoch": 0.961064354331378, "grad_norm": 0.0, - "learning_rate": 7.405762671250039e-08, - "loss": 0.7826, + "learning_rate": 7.940609114026388e-08, + "loss": 0.8018, "step": 33915 }, { - "epoch": 0.9624290578887628, + "epoch": 0.9610926917736405, "grad_norm": 0.0, - "learning_rate": 7.394602171861942e-08, - "loss": 0.7716, + "learning_rate": 7.92907023041034e-08, + "loss": 0.8054, "step": 33916 }, { - "epoch": 0.9624574347332576, + "epoch": 0.9611210292159029, "grad_norm": 0.0, - "learning_rate": 7.383450057056318e-08, - "loss": 0.88, + "learning_rate": 7.917539703376054e-08, + "loss": 0.8164, "step": 33917 }, { - "epoch": 0.9624858115777526, + "epoch": 0.9611493666581654, "grad_norm": 0.0, - "learning_rate": 7.372306326927315e-08, - "loss": 0.8802, + "learning_rate": 7.906017533020893e-08, + "loss": 0.8816, "step": 33918 }, { - "epoch": 0.9625141884222475, + "epoch": 0.9611777041004279, "grad_norm": 0.0, - "learning_rate": 7.361170981569077e-08, - "loss": 0.746, + "learning_rate": 7.89450371944167e-08, + "loss": 0.8081, "step": 33919 }, { - "epoch": 0.9625425652667423, + "epoch": 0.9612060415426904, "grad_norm": 0.0, - "learning_rate": 7.350044021075642e-08, - "loss": 0.7804, + "learning_rate": 7.88299826273542e-08, + "loss": 0.8008, "step": 33920 }, { - "epoch": 0.9625709421112373, + "epoch": 0.9612343789849528, "grad_norm": 0.0, - "learning_rate": 7.338925445541046e-08, - "loss": 0.8769, + "learning_rate": 7.871501162999173e-08, + "loss": 0.7716, "step": 33921 }, { - "epoch": 0.9625993189557321, + "epoch": 0.9612627164272153, "grad_norm": 0.0, - "learning_rate": 7.32781525505899e-08, - "loss": 0.7687, + "learning_rate": 7.860012420329633e-08, + "loss": 0.8246, "step": 33922 }, { - "epoch": 0.962627695800227, + "epoch": 0.9612910538694778, "grad_norm": 0.0, - "learning_rate": 7.316713449723623e-08, - "loss": 0.7574, + "learning_rate": 7.848532034823608e-08, + "loss": 0.7676, "step": 33923 }, { - "epoch": 0.9626560726447219, + "epoch": 0.9613193913117402, "grad_norm": 0.0, - "learning_rate": 7.305620029628646e-08, - "loss": 0.7825, + "learning_rate": 7.837060006577801e-08, + "loss": 0.8119, "step": 33924 }, { - "epoch": 0.9626844494892168, + "epoch": 0.9613477287540027, "grad_norm": 0.0, - "learning_rate": 7.294534994867652e-08, - "loss": 0.897, + "learning_rate": 7.825596335688912e-08, + "loss": 0.8937, "step": 33925 }, { - "epoch": 0.9627128263337117, + "epoch": 0.9613760661962651, "grad_norm": 0.0, - "learning_rate": 7.283458345534456e-08, - "loss": 0.9465, + "learning_rate": 7.814141022253529e-08, + "loss": 0.8057, "step": 33926 }, { - "epoch": 0.9627412031782066, + "epoch": 0.9614044036385276, "grad_norm": 0.0, - "learning_rate": 7.272390081722425e-08, - "loss": 0.8598, + "learning_rate": 7.802694066368022e-08, + "loss": 0.9024, "step": 33927 }, { - "epoch": 0.9627695800227015, + "epoch": 0.96143274108079, "grad_norm": 0.0, - "learning_rate": 7.261330203525153e-08, - "loss": 0.8241, + "learning_rate": 7.791255468128755e-08, + "loss": 0.8156, "step": 33928 }, { - "epoch": 0.9627979568671964, + "epoch": 0.9614610785230525, "grad_norm": 0.0, - "learning_rate": 7.250278711036007e-08, - "loss": 0.7854, + "learning_rate": 7.779825227632321e-08, + "loss": 0.8457, "step": 33929 }, { - "epoch": 0.9628263337116912, + "epoch": 0.961489415965315, "grad_norm": 0.0, - "learning_rate": 7.23923560434836e-08, - "loss": 0.756, + "learning_rate": 7.768403344974862e-08, + "loss": 0.8333, "step": 33930 }, { - "epoch": 0.9628547105561861, + "epoch": 0.9615177534075774, "grad_norm": 0.0, - "learning_rate": 7.228200883555581e-08, - "loss": 0.7317, + "learning_rate": 7.756989820252525e-08, + "loss": 0.7463, "step": 33931 }, { - "epoch": 0.9628830874006811, + "epoch": 0.9615460908498399, "grad_norm": 0.0, - "learning_rate": 7.217174548750926e-08, - "loss": 0.8224, + "learning_rate": 7.745584653561566e-08, + "loss": 0.7404, "step": 33932 }, { - "epoch": 0.9629114642451759, + "epoch": 0.9615744282921024, "grad_norm": 0.0, - "learning_rate": 7.206156600027214e-08, - "loss": 0.755, + "learning_rate": 7.73418784499802e-08, + "loss": 0.7204, "step": 33933 }, { - "epoch": 0.9629398410896708, + "epoch": 0.9616027657343648, "grad_norm": 0.0, - "learning_rate": 7.195147037477812e-08, - "loss": 0.7099, + "learning_rate": 7.722799394657921e-08, + "loss": 0.8489, "step": 33934 }, { - "epoch": 0.9629682179341658, + "epoch": 0.9616311031766273, "grad_norm": 0.0, - "learning_rate": 7.184145861195757e-08, - "loss": 0.7454, + "learning_rate": 7.711419302637079e-08, + "loss": 0.7831, "step": 33935 }, { - "epoch": 0.9629965947786606, + "epoch": 0.9616594406188897, "grad_norm": 0.0, - "learning_rate": 7.173153071273753e-08, - "loss": 0.8744, + "learning_rate": 7.700047569031533e-08, + "loss": 0.7292, "step": 33936 }, { - "epoch": 0.9630249716231555, + "epoch": 0.9616877780611522, "grad_norm": 0.0, - "learning_rate": 7.162168667804726e-08, - "loss": 0.8315, + "learning_rate": 7.688684193936868e-08, + "loss": 0.7574, "step": 33937 }, { - "epoch": 0.9630533484676504, + "epoch": 0.9617161155034146, "grad_norm": 0.0, - "learning_rate": 7.151192650881488e-08, - "loss": 0.8031, + "learning_rate": 7.6773291774489e-08, + "loss": 0.7774, "step": 33938 }, { - "epoch": 0.9630817253121453, + "epoch": 0.9617444529456771, "grad_norm": 0.0, - "learning_rate": 7.140225020596858e-08, - "loss": 0.8405, + "learning_rate": 7.665982519663329e-08, + "loss": 0.7814, "step": 33939 }, { - "epoch": 0.9631101021566402, + "epoch": 0.9617727903879396, "grad_norm": 0.0, - "learning_rate": 7.129265777043204e-08, - "loss": 0.7271, + "learning_rate": 7.654644220675744e-08, + "loss": 0.7346, "step": 33940 }, { - "epoch": 0.963138479001135, + "epoch": 0.961801127830202, "grad_norm": 0.0, - "learning_rate": 7.118314920313452e-08, - "loss": 0.9159, + "learning_rate": 7.643314280581404e-08, + "loss": 0.7434, "step": 33941 }, { - "epoch": 0.96316685584563, + "epoch": 0.9618294652724645, "grad_norm": 0.0, - "learning_rate": 7.10737245049986e-08, - "loss": 0.7791, + "learning_rate": 7.631992699476009e-08, + "loss": 0.7592, "step": 33942 }, { - "epoch": 0.9631952326901249, + "epoch": 0.961857802714727, "grad_norm": 0.0, - "learning_rate": 7.096438367694913e-08, - "loss": 0.8168, + "learning_rate": 7.620679477454929e-08, + "loss": 0.8746, "step": 33943 }, { - "epoch": 0.9632236095346197, + "epoch": 0.9618861401569895, "grad_norm": 0.0, - "learning_rate": 7.08551267199098e-08, - "loss": 0.9449, + "learning_rate": 7.609374614613307e-08, + "loss": 0.8032, "step": 33944 }, { - "epoch": 0.9632519863791147, + "epoch": 0.9619144775992519, "grad_norm": 0.0, - "learning_rate": 7.074595363480318e-08, - "loss": 0.8184, + "learning_rate": 7.598078111046514e-08, + "loss": 0.7911, "step": 33945 }, { - "epoch": 0.9632803632236095, + "epoch": 0.9619428150415144, "grad_norm": 0.0, - "learning_rate": 7.063686442255302e-08, - "loss": 0.7368, + "learning_rate": 7.586789966849473e-08, + "loss": 0.7153, "step": 33946 }, { - "epoch": 0.9633087400681044, + "epoch": 0.9619711524837768, "grad_norm": 0.0, - "learning_rate": 7.052785908407855e-08, - "loss": 0.8143, + "learning_rate": 7.575510182117551e-08, + "loss": 0.8218, "step": 33947 }, { - "epoch": 0.9633371169125993, + "epoch": 0.9619994899260392, "grad_norm": 0.0, - "learning_rate": 7.041893762030127e-08, - "loss": 0.8876, + "learning_rate": 7.564238756945563e-08, + "loss": 0.7654, "step": 33948 }, { - "epoch": 0.9633654937570942, + "epoch": 0.9620278273683017, "grad_norm": 0.0, - "learning_rate": 7.031010003214267e-08, - "loss": 0.8899, + "learning_rate": 7.552975691428655e-08, + "loss": 0.8331, "step": 33949 }, { - "epoch": 0.9633938706015891, + "epoch": 0.9620561648105642, "grad_norm": 0.0, - "learning_rate": 7.020134632052089e-08, - "loss": 0.7814, + "learning_rate": 7.541720985661416e-08, + "loss": 0.8679, "step": 33950 }, { - "epoch": 0.963422247446084, + "epoch": 0.9620845022528267, "grad_norm": 0.0, - "learning_rate": 7.00926764863541e-08, - "loss": 0.7469, + "learning_rate": 7.530474639738883e-08, + "loss": 0.7922, "step": 33951 }, { - "epoch": 0.9634506242905789, + "epoch": 0.9621128396950891, "grad_norm": 0.0, - "learning_rate": 6.998409053056154e-08, - "loss": 0.7856, + "learning_rate": 7.519236653755757e-08, + "loss": 0.8246, "step": 33952 }, { - "epoch": 0.9634790011350738, + "epoch": 0.9621411771373516, "grad_norm": 0.0, - "learning_rate": 6.987558845405917e-08, - "loss": 0.7812, + "learning_rate": 7.508007027806519e-08, + "loss": 0.8269, "step": 33953 }, { - "epoch": 0.9635073779795686, + "epoch": 0.9621695145796141, "grad_norm": 0.0, - "learning_rate": 6.976717025776514e-08, - "loss": 0.8531, + "learning_rate": 7.49678576198587e-08, + "loss": 0.8055, "step": 33954 }, { - "epoch": 0.9635357548240636, + "epoch": 0.9621978520218765, "grad_norm": 0.0, - "learning_rate": 6.965883594259315e-08, - "loss": 0.7694, + "learning_rate": 7.485572856388512e-08, + "loss": 0.864, "step": 33955 }, { - "epoch": 0.9635641316685585, + "epoch": 0.962226189464139, "grad_norm": 0.0, - "learning_rate": 6.955058550945915e-08, - "loss": 0.7556, + "learning_rate": 7.474368311108593e-08, + "loss": 0.8069, "step": 33956 }, { - "epoch": 0.9635925085130533, + "epoch": 0.9622545269064015, "grad_norm": 0.0, - "learning_rate": 6.944241895927906e-08, - "loss": 0.7407, + "learning_rate": 7.463172126240703e-08, + "loss": 0.8686, "step": 33957 }, { - "epoch": 0.9636208853575482, + "epoch": 0.9622828643486638, "grad_norm": 0.0, - "learning_rate": 6.933433629296438e-08, - "loss": 0.823, + "learning_rate": 7.451984301879101e-08, + "loss": 0.9261, "step": 33958 }, { - "epoch": 0.9636492622020432, + "epoch": 0.9623112017909263, "grad_norm": 0.0, - "learning_rate": 6.922633751142882e-08, - "loss": 0.8366, + "learning_rate": 7.440804838117932e-08, + "loss": 0.7992, "step": 33959 }, { - "epoch": 0.963677639046538, + "epoch": 0.9623395392331888, "grad_norm": 0.0, - "learning_rate": 6.911842261558499e-08, - "loss": 0.8427, + "learning_rate": 7.429633735051566e-08, + "loss": 0.8956, "step": 33960 }, { - "epoch": 0.9637060158910329, + "epoch": 0.9623678766754513, "grad_norm": 0.0, - "learning_rate": 6.901059160634437e-08, - "loss": 0.7893, + "learning_rate": 7.418470992773818e-08, + "loss": 0.7847, "step": 33961 }, { - "epoch": 0.9637343927355279, + "epoch": 0.9623962141177137, "grad_norm": 0.0, - "learning_rate": 6.890284448461738e-08, - "loss": 0.8287, + "learning_rate": 7.407316611378945e-08, + "loss": 0.7571, "step": 33962 }, { - "epoch": 0.9637627695800227, + "epoch": 0.9624245515599762, "grad_norm": 0.0, - "learning_rate": 6.879518125131545e-08, - "loss": 0.677, + "learning_rate": 7.396170590960982e-08, + "loss": 0.8843, "step": 33963 }, { - "epoch": 0.9637911464245176, + "epoch": 0.9624528890022387, "grad_norm": 0.0, - "learning_rate": 6.868760190734568e-08, - "loss": 0.7993, + "learning_rate": 7.385032931613412e-08, + "loss": 0.682, "step": 33964 }, { - "epoch": 0.9638195232690124, + "epoch": 0.9624812264445011, "grad_norm": 0.0, - "learning_rate": 6.858010645361957e-08, - "loss": 0.8263, + "learning_rate": 7.37390363343049e-08, + "loss": 0.861, "step": 33965 }, { - "epoch": 0.9638479001135074, + "epoch": 0.9625095638867636, "grad_norm": 0.0, - "learning_rate": 6.847269489104303e-08, - "loss": 0.7765, + "learning_rate": 7.362782696505699e-08, + "loss": 0.7391, "step": 33966 }, { - "epoch": 0.9638762769580023, + "epoch": 0.9625379013290261, "grad_norm": 0.0, - "learning_rate": 6.836536722052423e-08, - "loss": 0.8036, + "learning_rate": 7.351670120932852e-08, + "loss": 0.8348, "step": 33967 }, { - "epoch": 0.9639046538024971, + "epoch": 0.9625662387712886, "grad_norm": 0.0, - "learning_rate": 6.825812344296911e-08, - "loss": 0.8348, + "learning_rate": 7.34056590680543e-08, + "loss": 0.7409, "step": 33968 }, { - "epoch": 0.9639330306469921, + "epoch": 0.9625945762135509, "grad_norm": 0.0, - "learning_rate": 6.815096355928475e-08, - "loss": 0.8076, + "learning_rate": 7.329470054217024e-08, + "loss": 0.8047, "step": 33969 }, { - "epoch": 0.963961407491487, + "epoch": 0.9626229136558134, "grad_norm": 0.0, - "learning_rate": 6.804388757037594e-08, - "loss": 0.7331, + "learning_rate": 7.318382563261228e-08, + "loss": 0.9362, "step": 33970 }, { - "epoch": 0.9639897843359818, + "epoch": 0.9626512510980759, "grad_norm": 0.0, - "learning_rate": 6.793689547714644e-08, - "loss": 0.753, + "learning_rate": 7.307303434031187e-08, + "loss": 0.7658, "step": 33971 }, { - "epoch": 0.9640181611804768, + "epoch": 0.9626795885403383, "grad_norm": 0.0, - "learning_rate": 6.782998728049995e-08, - "loss": 0.7763, + "learning_rate": 7.296232666620496e-08, + "loss": 0.8639, "step": 33972 }, { - "epoch": 0.9640465380249716, + "epoch": 0.9627079259826008, "grad_norm": 0.0, - "learning_rate": 6.772316298134129e-08, - "loss": 0.8283, + "learning_rate": 7.285170261122187e-08, + "loss": 0.8254, "step": 33973 }, { - "epoch": 0.9640749148694665, + "epoch": 0.9627362634248633, "grad_norm": 0.0, - "learning_rate": 6.761642258056977e-08, - "loss": 0.8775, + "learning_rate": 7.274116217629524e-08, + "loss": 0.8107, "step": 33974 }, { - "epoch": 0.9641032917139614, + "epoch": 0.9627646008671258, "grad_norm": 0.0, - "learning_rate": 6.75097660790902e-08, - "loss": 0.8241, + "learning_rate": 7.26307053623565e-08, + "loss": 0.8484, "step": 33975 }, { - "epoch": 0.9641316685584563, + "epoch": 0.9627929383093882, "grad_norm": 0.0, - "learning_rate": 6.740319347780189e-08, - "loss": 0.7958, + "learning_rate": 7.252033217033494e-08, + "loss": 0.8327, "step": 33976 }, { - "epoch": 0.9641600454029512, + "epoch": 0.9628212757516507, "grad_norm": 0.0, - "learning_rate": 6.72967047776052e-08, - "loss": 0.8349, + "learning_rate": 7.241004260116202e-08, + "loss": 0.8601, "step": 33977 }, { - "epoch": 0.964188422247446, + "epoch": 0.9628496131939132, "grad_norm": 0.0, - "learning_rate": 6.719029997939942e-08, - "loss": 0.8764, + "learning_rate": 7.22998366557659e-08, + "loss": 0.8094, "step": 33978 }, { - "epoch": 0.964216799091941, + "epoch": 0.9628779506361755, "grad_norm": 0.0, - "learning_rate": 6.708397908408382e-08, - "loss": 0.8153, + "learning_rate": 7.218971433507471e-08, + "loss": 0.8453, "step": 33979 }, { - "epoch": 0.9642451759364359, + "epoch": 0.962906288078438, "grad_norm": 0.0, - "learning_rate": 6.69777420925577e-08, - "loss": 0.8473, + "learning_rate": 7.20796756400155e-08, + "loss": 0.8594, "step": 33980 }, { - "epoch": 0.9642735527809307, + "epoch": 0.9629346255207005, "grad_norm": 0.0, - "learning_rate": 6.687158900571588e-08, - "loss": 0.7697, + "learning_rate": 7.19697205715153e-08, + "loss": 0.7394, "step": 33981 }, { - "epoch": 0.9643019296254256, + "epoch": 0.9629629629629629, "grad_norm": 0.0, - "learning_rate": 6.676551982445545e-08, - "loss": 0.8839, + "learning_rate": 7.185984913050225e-08, + "loss": 0.684, "step": 33982 }, { - "epoch": 0.9643303064699206, + "epoch": 0.9629913004052254, "grad_norm": 0.0, - "learning_rate": 6.665953454967456e-08, - "loss": 0.7884, + "learning_rate": 7.175006131789897e-08, + "loss": 0.7591, "step": 33983 }, { - "epoch": 0.9643586833144154, + "epoch": 0.9630196378474879, "grad_norm": 0.0, - "learning_rate": 6.655363318226693e-08, - "loss": 0.8294, + "learning_rate": 7.164035713463358e-08, + "loss": 0.7263, "step": 33984 }, { - "epoch": 0.9643870601589103, + "epoch": 0.9630479752897504, "grad_norm": 0.0, - "learning_rate": 6.644781572312631e-08, - "loss": 0.7868, + "learning_rate": 7.153073658162646e-08, + "loss": 0.8465, "step": 33985 }, { - "epoch": 0.9644154370034053, + "epoch": 0.9630763127320128, "grad_norm": 0.0, - "learning_rate": 6.634208217314864e-08, - "loss": 0.8892, + "learning_rate": 7.142119965980465e-08, + "loss": 0.7481, "step": 33986 }, { - "epoch": 0.9644438138479001, + "epoch": 0.9631046501742753, "grad_norm": 0.0, - "learning_rate": 6.623643253322543e-08, - "loss": 0.7969, + "learning_rate": 7.131174637008742e-08, + "loss": 0.8961, "step": 33987 }, { - "epoch": 0.964472190692395, + "epoch": 0.9631329876165378, "grad_norm": 0.0, - "learning_rate": 6.61308668042504e-08, - "loss": 0.8914, + "learning_rate": 7.120237671339847e-08, + "loss": 0.8419, "step": 33988 }, { - "epoch": 0.96450056753689, + "epoch": 0.9631613250588001, "grad_norm": 0.0, - "learning_rate": 6.602538498711397e-08, - "loss": 0.7843, + "learning_rate": 7.109309069065928e-08, + "loss": 0.7736, "step": 33989 }, { - "epoch": 0.9645289443813848, + "epoch": 0.9631896625010626, "grad_norm": 0.0, - "learning_rate": 6.591998708270652e-08, - "loss": 0.8184, + "learning_rate": 7.098388830279024e-08, + "loss": 0.8225, "step": 33990 }, { - "epoch": 0.9645573212258797, + "epoch": 0.9632179999433251, "grad_norm": 0.0, - "learning_rate": 6.58146730919218e-08, - "loss": 0.7259, + "learning_rate": 7.08747695507106e-08, + "loss": 0.8638, "step": 33991 }, { - "epoch": 0.9645856980703745, + "epoch": 0.9632463373855876, "grad_norm": 0.0, - "learning_rate": 6.570944301564575e-08, - "loss": 0.8463, + "learning_rate": 7.076573443533963e-08, + "loss": 0.7143, "step": 33992 }, { - "epoch": 0.9646140749148695, + "epoch": 0.96327467482785, "grad_norm": 0.0, - "learning_rate": 6.560429685476765e-08, - "loss": 0.8422, + "learning_rate": 7.065678295759659e-08, + "loss": 0.8029, "step": 33993 }, { - "epoch": 0.9646424517593644, + "epoch": 0.9633030122701125, "grad_norm": 0.0, - "learning_rate": 6.549923461017904e-08, - "loss": 0.906, + "learning_rate": 7.054791511839853e-08, + "loss": 0.8159, "step": 33994 }, { - "epoch": 0.9646708286038592, + "epoch": 0.963331349712375, "grad_norm": 0.0, - "learning_rate": 6.539425628276252e-08, - "loss": 0.8444, + "learning_rate": 7.04391309186614e-08, + "loss": 0.7871, "step": 33995 }, { - "epoch": 0.9646992054483542, + "epoch": 0.9633596871546374, "grad_norm": 0.0, - "learning_rate": 6.52893618734085e-08, - "loss": 0.7883, + "learning_rate": 7.033043035930442e-08, + "loss": 0.7246, "step": 33996 }, { - "epoch": 0.964727582292849, + "epoch": 0.9633880245968999, "grad_norm": 0.0, - "learning_rate": 6.518455138300295e-08, - "loss": 0.8208, + "learning_rate": 7.022181344124024e-08, + "loss": 0.8115, "step": 33997 }, { - "epoch": 0.9647559591373439, + "epoch": 0.9634163620391624, "grad_norm": 0.0, - "learning_rate": 6.507982481242847e-08, - "loss": 0.8117, + "learning_rate": 7.011328016538588e-08, + "loss": 0.8004, "step": 33998 }, { - "epoch": 0.9647843359818388, + "epoch": 0.9634446994814249, "grad_norm": 0.0, - "learning_rate": 6.497518216257104e-08, - "loss": 0.6667, + "learning_rate": 7.000483053265506e-08, + "loss": 0.8565, "step": 33999 }, { - "epoch": 0.9648127128263337, + "epoch": 0.9634730369236872, "grad_norm": 0.0, - "learning_rate": 6.48706234343166e-08, - "loss": 0.7803, + "learning_rate": 6.989646454396037e-08, + "loss": 0.7903, "step": 34000 }, { - "epoch": 0.9648410896708286, + "epoch": 0.9635013743659497, "grad_norm": 0.0, - "learning_rate": 6.476614862854558e-08, - "loss": 0.8948, + "learning_rate": 6.978818220021444e-08, + "loss": 0.8174, "step": 34001 }, { - "epoch": 0.9648694665153235, + "epoch": 0.9635297118082122, "grad_norm": 0.0, - "learning_rate": 6.46617577461417e-08, - "loss": 0.7105, + "learning_rate": 6.967998350233096e-08, + "loss": 0.7314, "step": 34002 }, { - "epoch": 0.9648978433598184, + "epoch": 0.9635580492504746, "grad_norm": 0.0, - "learning_rate": 6.455745078798647e-08, - "loss": 0.8808, + "learning_rate": 6.957186845122032e-08, + "loss": 0.765, "step": 34003 }, { - "epoch": 0.9649262202043133, + "epoch": 0.9635863866927371, "grad_norm": 0.0, - "learning_rate": 6.445322775496032e-08, - "loss": 0.7983, + "learning_rate": 6.946383704779403e-08, + "loss": 0.7782, "step": 34004 }, { - "epoch": 0.9649545970488081, + "epoch": 0.9636147241349996, "grad_norm": 0.0, - "learning_rate": 6.434908864794475e-08, - "loss": 0.7789, + "learning_rate": 6.935588929296134e-08, + "loss": 0.8618, "step": 34005 }, { - "epoch": 0.9649829738933031, + "epoch": 0.963643061577262, "grad_norm": 0.0, - "learning_rate": 6.424503346782018e-08, - "loss": 0.8278, + "learning_rate": 6.924802518763152e-08, + "loss": 0.7805, "step": 34006 }, { - "epoch": 0.965011350737798, + "epoch": 0.9636713990195245, "grad_norm": 0.0, - "learning_rate": 6.414106221546368e-08, - "loss": 0.7964, + "learning_rate": 6.914024473271274e-08, + "loss": 0.757, "step": 34007 }, { - "epoch": 0.9650397275822928, + "epoch": 0.963699736461787, "grad_norm": 0.0, - "learning_rate": 6.403717489175343e-08, - "loss": 0.8026, + "learning_rate": 6.903254792911318e-08, + "loss": 0.7573, "step": 34008 }, { - "epoch": 0.9650681044267877, + "epoch": 0.9637280739040495, "grad_norm": 0.0, - "learning_rate": 6.393337149756984e-08, - "loss": 0.7517, + "learning_rate": 6.892493477774098e-08, + "loss": 0.8697, "step": 34009 }, { - "epoch": 0.9650964812712827, + "epoch": 0.9637564113463118, "grad_norm": 0.0, - "learning_rate": 6.382965203378665e-08, - "loss": 0.7001, + "learning_rate": 6.881740527950209e-08, + "loss": 0.7452, "step": 34010 }, { - "epoch": 0.9651248581157775, + "epoch": 0.9637847487885743, "grad_norm": 0.0, - "learning_rate": 6.372601650127986e-08, - "loss": 0.7436, + "learning_rate": 6.870995943530134e-08, + "loss": 0.8108, "step": 34011 }, { - "epoch": 0.9651532349602724, + "epoch": 0.9638130862308368, "grad_norm": 0.0, - "learning_rate": 6.36224649009276e-08, - "loss": 0.7837, + "learning_rate": 6.860259724604468e-08, + "loss": 0.7869, "step": 34012 }, { - "epoch": 0.9651816118047674, + "epoch": 0.9638414236730992, "grad_norm": 0.0, - "learning_rate": 6.351899723360256e-08, - "loss": 0.6897, + "learning_rate": 6.849531871263692e-08, + "loss": 0.881, "step": 34013 }, { - "epoch": 0.9652099886492622, + "epoch": 0.9638697611153617, "grad_norm": 0.0, - "learning_rate": 6.341561350017955e-08, - "loss": 0.8688, + "learning_rate": 6.838812383597959e-08, + "loss": 0.8519, "step": 34014 }, { - "epoch": 0.9652383654937571, + "epoch": 0.9638980985576242, "grad_norm": 0.0, - "learning_rate": 6.331231370153123e-08, - "loss": 0.7578, + "learning_rate": 6.828101261697862e-08, + "loss": 0.8247, "step": 34015 }, { - "epoch": 0.9652667423382519, + "epoch": 0.9639264359998867, "grad_norm": 0.0, - "learning_rate": 6.320909783853136e-08, - "loss": 0.8584, + "learning_rate": 6.817398505653439e-08, + "loss": 0.8479, "step": 34016 }, { - "epoch": 0.9652951191827469, + "epoch": 0.9639547734421491, "grad_norm": 0.0, - "learning_rate": 6.310596591205031e-08, - "loss": 0.8283, + "learning_rate": 6.80670411555484e-08, + "loss": 0.7916, "step": 34017 }, { - "epoch": 0.9653234960272418, + "epoch": 0.9639831108844116, "grad_norm": 0.0, - "learning_rate": 6.300291792295965e-08, - "loss": 0.9131, + "learning_rate": 6.796018091492219e-08, + "loss": 0.7773, "step": 34018 }, { - "epoch": 0.9653518728717366, + "epoch": 0.9640114483266741, "grad_norm": 0.0, - "learning_rate": 6.289995387212977e-08, - "loss": 0.7585, + "learning_rate": 6.785340433555499e-08, + "loss": 0.8961, "step": 34019 }, { - "epoch": 0.9653802497162316, + "epoch": 0.9640397857689365, "grad_norm": 0.0, - "learning_rate": 6.27970737604311e-08, - "loss": 0.715, + "learning_rate": 6.77467114183472e-08, + "loss": 0.8469, "step": 34020 }, { - "epoch": 0.9654086265607265, + "epoch": 0.964068123211199, "grad_norm": 0.0, - "learning_rate": 6.269427758873182e-08, - "loss": 0.8032, + "learning_rate": 6.764010216419703e-08, + "loss": 0.8163, "step": 34021 }, { - "epoch": 0.9654370034052213, + "epoch": 0.9640964606534614, "grad_norm": 0.0, - "learning_rate": 6.259156535790012e-08, - "loss": 0.7753, + "learning_rate": 6.753357657400261e-08, + "loss": 0.742, "step": 34022 }, { - "epoch": 0.9654653802497162, + "epoch": 0.9641247980957239, "grad_norm": 0.0, - "learning_rate": 6.248893706880643e-08, - "loss": 0.8111, + "learning_rate": 6.742713464866102e-08, + "loss": 0.7387, "step": 34023 }, { - "epoch": 0.9654937570942111, + "epoch": 0.9641531355379863, "grad_norm": 0.0, - "learning_rate": 6.23863927223134e-08, - "loss": 0.789, + "learning_rate": 6.73207763890693e-08, + "loss": 0.8072, "step": 34024 }, { - "epoch": 0.965522133938706, + "epoch": 0.9641814729802488, "grad_norm": 0.0, - "learning_rate": 6.22839323192892e-08, - "loss": 0.8445, + "learning_rate": 6.721450179612232e-08, + "loss": 0.8047, "step": 34025 }, { - "epoch": 0.9655505107832009, + "epoch": 0.9642098104225113, "grad_norm": 0.0, - "learning_rate": 6.218155586060093e-08, - "loss": 0.7977, + "learning_rate": 6.710831087071712e-08, + "loss": 0.8548, "step": 34026 }, { - "epoch": 0.9655788876276958, + "epoch": 0.9642381478647737, "grad_norm": 0.0, - "learning_rate": 6.207926334711123e-08, - "loss": 0.7561, + "learning_rate": 6.700220361374632e-08, + "loss": 0.7459, "step": 34027 }, { - "epoch": 0.9656072644721907, + "epoch": 0.9642664853070362, "grad_norm": 0.0, - "learning_rate": 6.197705477968386e-08, - "loss": 0.86, + "learning_rate": 6.689618002610587e-08, + "loss": 0.893, "step": 34028 }, { - "epoch": 0.9656356413166856, + "epoch": 0.9642948227492987, "grad_norm": 0.0, - "learning_rate": 6.187493015918367e-08, - "loss": 0.7321, + "learning_rate": 6.679024010868617e-08, + "loss": 0.8166, "step": 34029 }, { - "epoch": 0.9656640181611805, + "epoch": 0.9643231601915611, "grad_norm": 0.0, - "learning_rate": 6.177288948647331e-08, - "loss": 0.7317, + "learning_rate": 6.668438386238096e-08, + "loss": 0.6645, "step": 34030 }, { - "epoch": 0.9656923950056754, + "epoch": 0.9643514976338236, "grad_norm": 0.0, - "learning_rate": 6.16709327624132e-08, - "loss": 0.8723, + "learning_rate": 6.657861128808285e-08, + "loss": 0.8194, "step": 34031 }, { - "epoch": 0.9657207718501702, + "epoch": 0.964379835076086, "grad_norm": 0.0, - "learning_rate": 6.1569059987866e-08, - "loss": 0.7495, + "learning_rate": 6.647292238668001e-08, + "loss": 0.894, "step": 34032 }, { - "epoch": 0.9657491486946651, + "epoch": 0.9644081725183485, "grad_norm": 0.0, - "learning_rate": 6.1467271163691e-08, - "loss": 0.8297, + "learning_rate": 6.63673171590662e-08, + "loss": 0.7776, "step": 34033 }, { - "epoch": 0.9657775255391601, + "epoch": 0.9644365099606109, "grad_norm": 0.0, - "learning_rate": 6.136556629074863e-08, - "loss": 0.9274, + "learning_rate": 6.626179560612733e-08, + "loss": 0.8374, "step": 34034 }, { - "epoch": 0.9658059023836549, + "epoch": 0.9644648474028734, "grad_norm": 0.0, - "learning_rate": 6.126394536989933e-08, - "loss": 0.7884, + "learning_rate": 6.615635772875606e-08, + "loss": 0.8201, "step": 34035 }, { - "epoch": 0.9658342792281498, + "epoch": 0.9644931848451359, "grad_norm": 0.0, - "learning_rate": 6.116240840199906e-08, - "loss": 0.8539, + "learning_rate": 6.605100352783833e-08, + "loss": 0.818, "step": 34036 }, { - "epoch": 0.9658626560726448, + "epoch": 0.9645215222873983, "grad_norm": 0.0, - "learning_rate": 6.106095538790713e-08, - "loss": 0.7592, + "learning_rate": 6.594573300426121e-08, + "loss": 0.7487, "step": 34037 }, { - "epoch": 0.9658910329171396, + "epoch": 0.9645498597296608, "grad_norm": 0.0, - "learning_rate": 6.095958632848065e-08, - "loss": 0.7917, + "learning_rate": 6.584054615891178e-08, + "loss": 0.8025, "step": 34038 }, { - "epoch": 0.9659194097616345, + "epoch": 0.9645781971719233, "grad_norm": 0.0, - "learning_rate": 6.085830122457448e-08, - "loss": 0.9514, + "learning_rate": 6.573544299267709e-08, + "loss": 0.8145, "step": 34039 }, { - "epoch": 0.9659477866061293, + "epoch": 0.9646065346141858, "grad_norm": 0.0, - "learning_rate": 6.075710007704572e-08, - "loss": 0.8335, + "learning_rate": 6.5630423506442e-08, + "loss": 0.8329, "step": 34040 }, { - "epoch": 0.9659761634506243, + "epoch": 0.9646348720564482, "grad_norm": 0.0, - "learning_rate": 6.065598288674812e-08, - "loss": 0.7636, + "learning_rate": 6.552548770109024e-08, + "loss": 0.8817, "step": 34041 }, { - "epoch": 0.9660045402951192, + "epoch": 0.9646632094987106, "grad_norm": 0.0, - "learning_rate": 6.055494965453656e-08, - "loss": 0.8593, + "learning_rate": 6.542063557750667e-08, + "loss": 0.6853, "step": 34042 }, { - "epoch": 0.966032917139614, + "epoch": 0.9646915469409731, "grad_norm": 0.0, - "learning_rate": 6.04540003812637e-08, - "loss": 0.7529, + "learning_rate": 6.531586713657389e-08, + "loss": 0.8479, "step": 34043 }, { - "epoch": 0.966061293984109, + "epoch": 0.9647198843832355, "grad_norm": 0.0, - "learning_rate": 6.035313506778328e-08, - "loss": 0.8868, + "learning_rate": 6.521118237917456e-08, + "loss": 0.8068, "step": 34044 }, { - "epoch": 0.9660896708286039, + "epoch": 0.964748221825498, "grad_norm": 0.0, - "learning_rate": 6.025235371494576e-08, - "loss": 0.845, + "learning_rate": 6.510658130619241e-08, + "loss": 0.8908, "step": 34045 }, { - "epoch": 0.9661180476730987, + "epoch": 0.9647765592677605, "grad_norm": 0.0, - "learning_rate": 6.015165632360487e-08, - "loss": 0.7673, + "learning_rate": 6.50020639185045e-08, + "loss": 0.8531, "step": 34046 }, { - "epoch": 0.9661464245175937, + "epoch": 0.964804896710023, "grad_norm": 0.0, - "learning_rate": 6.005104289460884e-08, - "loss": 0.7895, + "learning_rate": 6.489763021699458e-08, + "loss": 0.838, "step": 34047 }, { - "epoch": 0.9661748013620886, + "epoch": 0.9648332341522854, "grad_norm": 0.0, - "learning_rate": 5.995051342880809e-08, - "loss": 0.8859, + "learning_rate": 6.479328020254084e-08, + "loss": 0.7028, "step": 34048 }, { - "epoch": 0.9662031782065834, + "epoch": 0.9648615715945479, "grad_norm": 0.0, - "learning_rate": 5.985006792705305e-08, - "loss": 0.8261, + "learning_rate": 6.468901387602367e-08, + "loss": 0.8272, "step": 34049 }, { - "epoch": 0.9662315550510783, + "epoch": 0.9648899090368104, "grad_norm": 0.0, - "learning_rate": 5.974970639019084e-08, - "loss": 0.8613, + "learning_rate": 6.458483123831905e-08, + "loss": 0.876, "step": 34050 }, { - "epoch": 0.9662599318955732, + "epoch": 0.9649182464790728, "grad_norm": 0.0, - "learning_rate": 5.964942881906966e-08, - "loss": 0.8, + "learning_rate": 6.448073229030626e-08, + "loss": 0.7401, "step": 34051 }, { - "epoch": 0.9662883087400681, + "epoch": 0.9649465839213353, "grad_norm": 0.0, - "learning_rate": 5.95492352145377e-08, - "loss": 0.8666, + "learning_rate": 6.43767170328613e-08, + "loss": 0.8188, "step": 34052 }, { - "epoch": 0.966316685584563, + "epoch": 0.9649749213635977, "grad_norm": 0.0, - "learning_rate": 5.944912557743876e-08, - "loss": 0.8325, + "learning_rate": 6.427278546686122e-08, + "loss": 0.854, "step": 34053 }, { - "epoch": 0.9663450624290579, + "epoch": 0.9650032588058601, "grad_norm": 0.0, - "learning_rate": 5.934909990862103e-08, - "loss": 0.9158, + "learning_rate": 6.416893759318089e-08, + "loss": 0.8368, "step": 34054 }, { - "epoch": 0.9663734392735528, + "epoch": 0.9650315962481226, "grad_norm": 0.0, - "learning_rate": 5.924915820892718e-08, - "loss": 0.8178, + "learning_rate": 6.406517341269624e-08, + "loss": 0.7818, "step": 34055 }, { - "epoch": 0.9664018161180477, + "epoch": 0.9650599336903851, "grad_norm": 0.0, - "learning_rate": 5.91493004792032e-08, - "loss": 0.7421, + "learning_rate": 6.396149292627885e-08, + "loss": 0.8087, "step": 34056 }, { - "epoch": 0.9664301929625425, + "epoch": 0.9650882711326476, "grad_norm": 0.0, - "learning_rate": 5.904952672029174e-08, - "loss": 0.8195, + "learning_rate": 6.385789613480353e-08, + "loss": 0.8624, "step": 34057 }, { - "epoch": 0.9664585698070375, + "epoch": 0.96511660857491, "grad_norm": 0.0, - "learning_rate": 5.894983693303657e-08, - "loss": 0.7008, + "learning_rate": 6.375438303914294e-08, + "loss": 0.9444, "step": 34058 }, { - "epoch": 0.9664869466515323, + "epoch": 0.9651449460171725, "grad_norm": 0.0, - "learning_rate": 5.8850231118278144e-08, - "loss": 0.8114, + "learning_rate": 6.365095364016971e-08, + "loss": 0.8283, "step": 34059 }, { - "epoch": 0.9665153234960272, + "epoch": 0.965173283459435, "grad_norm": 0.0, - "learning_rate": 5.875070927685911e-08, - "loss": 0.8404, + "learning_rate": 6.354760793875314e-08, + "loss": 0.7924, "step": 34060 }, { - "epoch": 0.9665437003405222, + "epoch": 0.9652016209016974, "grad_norm": 0.0, - "learning_rate": 5.86512714096199e-08, - "loss": 0.9474, + "learning_rate": 6.344434593576587e-08, + "loss": 0.8374, "step": 34061 }, { - "epoch": 0.966572077185017, + "epoch": 0.9652299583439599, "grad_norm": 0.0, - "learning_rate": 5.8551917517399856e-08, - "loss": 0.8293, + "learning_rate": 6.33411676320761e-08, + "loss": 0.7756, "step": 34062 }, { - "epoch": 0.9666004540295119, + "epoch": 0.9652582957862224, "grad_norm": 0.0, - "learning_rate": 5.8452647601038304e-08, - "loss": 0.8263, + "learning_rate": 6.323807302855422e-08, + "loss": 0.7516, "step": 34063 }, { - "epoch": 0.9666288308740069, + "epoch": 0.9652866332284848, "grad_norm": 0.0, - "learning_rate": 5.8353461661374566e-08, - "loss": 0.8738, + "learning_rate": 6.313506212606734e-08, + "loss": 0.7996, "step": 34064 }, { - "epoch": 0.9666572077185017, + "epoch": 0.9653149706707472, "grad_norm": 0.0, - "learning_rate": 5.8254359699245754e-08, - "loss": 0.8887, + "learning_rate": 6.303213492548477e-08, + "loss": 0.8447, "step": 34065 }, { - "epoch": 0.9666855845629966, + "epoch": 0.9653433081130097, "grad_norm": 0.0, - "learning_rate": 5.815534171549009e-08, - "loss": 0.8311, + "learning_rate": 6.292929142767135e-08, + "loss": 0.7932, "step": 34066 }, { - "epoch": 0.9667139614074914, + "epoch": 0.9653716455552722, "grad_norm": 0.0, - "learning_rate": 5.8056407710942454e-08, - "loss": 0.7809, + "learning_rate": 6.282653163349528e-08, + "loss": 0.8484, "step": 34067 }, { - "epoch": 0.9667423382519864, + "epoch": 0.9653999829975346, "grad_norm": 0.0, - "learning_rate": 5.795755768643996e-08, - "loss": 0.8483, + "learning_rate": 6.272385554382143e-08, + "loss": 0.8635, "step": 34068 }, { - "epoch": 0.9667707150964813, + "epoch": 0.9654283204397971, "grad_norm": 0.0, - "learning_rate": 5.785879164281638e-08, - "loss": 0.8796, + "learning_rate": 6.262126315951355e-08, + "loss": 0.7771, "step": 34069 }, { - "epoch": 0.9667990919409761, + "epoch": 0.9654566578820596, "grad_norm": 0.0, - "learning_rate": 5.776010958090661e-08, - "loss": 0.7798, + "learning_rate": 6.251875448143763e-08, + "loss": 0.8418, "step": 34070 }, { - "epoch": 0.9668274687854711, + "epoch": 0.9654849953243221, "grad_norm": 0.0, - "learning_rate": 5.766151150154442e-08, - "loss": 0.7875, + "learning_rate": 6.241632951045629e-08, + "loss": 0.7505, "step": 34071 }, { - "epoch": 0.966855845629966, + "epoch": 0.9655133327665845, "grad_norm": 0.0, - "learning_rate": 5.7562997405561373e-08, - "loss": 0.8191, + "learning_rate": 6.231398824743218e-08, + "loss": 0.8371, "step": 34072 }, { - "epoch": 0.9668842224744608, + "epoch": 0.965541670208847, "grad_norm": 0.0, - "learning_rate": 5.746456729379124e-08, - "loss": 0.7103, + "learning_rate": 6.221173069322905e-08, + "loss": 0.7922, "step": 34073 }, { - "epoch": 0.9669125993189557, + "epoch": 0.9655700076511095, "grad_norm": 0.0, - "learning_rate": 5.736622116706558e-08, - "loss": 0.7432, + "learning_rate": 6.210955684870512e-08, + "loss": 0.8297, "step": 34074 }, { - "epoch": 0.9669409761634506, + "epoch": 0.9655983450933718, "grad_norm": 0.0, - "learning_rate": 5.7267959026213735e-08, - "loss": 0.7918, + "learning_rate": 6.200746671472413e-08, + "loss": 0.7281, "step": 34075 }, { - "epoch": 0.9669693530079455, + "epoch": 0.9656266825356343, "grad_norm": 0.0, - "learning_rate": 5.716978087206615e-08, - "loss": 0.8338, + "learning_rate": 6.190546029214428e-08, + "loss": 0.9131, "step": 34076 }, { - "epoch": 0.9669977298524404, + "epoch": 0.9656550199778968, "grad_norm": 0.0, - "learning_rate": 5.7071686705453265e-08, - "loss": 0.9017, + "learning_rate": 6.18035375818249e-08, + "loss": 0.7802, "step": 34077 }, { - "epoch": 0.9670261066969353, + "epoch": 0.9656833574201592, "grad_norm": 0.0, - "learning_rate": 5.697367652720221e-08, - "loss": 0.7705, + "learning_rate": 6.170169858462416e-08, + "loss": 0.876, "step": 34078 }, { - "epoch": 0.9670544835414302, + "epoch": 0.9657116948624217, "grad_norm": 0.0, - "learning_rate": 5.687575033814119e-08, - "loss": 0.8134, + "learning_rate": 6.15999433014014e-08, + "loss": 0.8361, "step": 34079 }, { - "epoch": 0.9670828603859251, + "epoch": 0.9657400323046842, "grad_norm": 0.0, - "learning_rate": 5.6777908139098445e-08, - "loss": 0.8801, + "learning_rate": 6.149827173301259e-08, + "loss": 0.7874, "step": 34080 }, { - "epoch": 0.96711123723042, + "epoch": 0.9657683697469467, "grad_norm": 0.0, - "learning_rate": 5.668014993089888e-08, - "loss": 0.7135, + "learning_rate": 6.139668388031484e-08, + "loss": 0.8683, "step": 34081 }, { - "epoch": 0.9671396140749149, + "epoch": 0.9657967071892091, "grad_norm": 0.0, - "learning_rate": 5.658247571436959e-08, - "loss": 0.9474, + "learning_rate": 6.129517974416299e-08, + "loss": 0.8124, "step": 34082 }, { - "epoch": 0.9671679909194097, + "epoch": 0.9658250446314716, "grad_norm": 0.0, - "learning_rate": 5.6484885490334375e-08, - "loss": 0.8024, + "learning_rate": 6.119375932541194e-08, + "loss": 0.7239, "step": 34083 }, { - "epoch": 0.9671963677639046, + "epoch": 0.9658533820737341, "grad_norm": 0.0, - "learning_rate": 5.638737925961924e-08, - "loss": 0.8458, + "learning_rate": 6.109242262491655e-08, + "loss": 0.827, "step": 34084 }, { - "epoch": 0.9672247446083996, + "epoch": 0.9658817195159964, "grad_norm": 0.0, - "learning_rate": 5.6289957023046846e-08, - "loss": 0.7773, + "learning_rate": 6.099116964353058e-08, + "loss": 0.7387, "step": 34085 }, { - "epoch": 0.9672531214528944, + "epoch": 0.9659100569582589, "grad_norm": 0.0, - "learning_rate": 5.619261878143989e-08, - "loss": 0.8544, + "learning_rate": 6.08900003821078e-08, + "loss": 0.8368, "step": 34086 }, { - "epoch": 0.9672814982973893, + "epoch": 0.9659383944005214, "grad_norm": 0.0, - "learning_rate": 5.6095364535619925e-08, - "loss": 0.8482, + "learning_rate": 6.078891484149863e-08, + "loss": 0.767, "step": 34087 }, { - "epoch": 0.9673098751418843, + "epoch": 0.9659667318427839, "grad_norm": 0.0, - "learning_rate": 5.5998194286410736e-08, - "loss": 0.9068, + "learning_rate": 6.068791302255462e-08, + "loss": 0.8099, "step": 34088 }, { - "epoch": 0.9673382519863791, + "epoch": 0.9659950692850463, "grad_norm": 0.0, - "learning_rate": 5.590110803463167e-08, - "loss": 0.8967, + "learning_rate": 6.058699492612841e-08, + "loss": 0.8061, "step": 34089 }, { - "epoch": 0.967366628830874, + "epoch": 0.9660234067273088, "grad_norm": 0.0, - "learning_rate": 5.5804105781103177e-08, - "loss": 0.8098, + "learning_rate": 6.048616055306822e-08, + "loss": 0.8774, "step": 34090 }, { - "epoch": 0.9673950056753688, + "epoch": 0.9660517441695713, "grad_norm": 0.0, - "learning_rate": 5.57071875266435e-08, - "loss": 0.773, + "learning_rate": 6.038540990422448e-08, + "loss": 0.7394, "step": 34091 }, { - "epoch": 0.9674233825198638, + "epoch": 0.9660800816118337, "grad_norm": 0.0, - "learning_rate": 5.5610353272073094e-08, - "loss": 0.8292, + "learning_rate": 6.02847429804454e-08, + "loss": 0.7355, "step": 34092 }, { - "epoch": 0.9674517593643587, + "epoch": 0.9661084190540962, "grad_norm": 0.0, - "learning_rate": 5.551360301820907e-08, - "loss": 0.8845, + "learning_rate": 6.018415978257808e-08, + "loss": 0.8275, "step": 34093 }, { - "epoch": 0.9674801362088535, + "epoch": 0.9661367564963587, "grad_norm": 0.0, - "learning_rate": 5.541693676586857e-08, - "loss": 0.8043, + "learning_rate": 6.008366031147184e-08, + "loss": 0.788, "step": 34094 }, { - "epoch": 0.9675085130533485, + "epoch": 0.966165093938621, "grad_norm": 0.0, - "learning_rate": 5.532035451586759e-08, - "loss": 0.9492, + "learning_rate": 5.998324456797044e-08, + "loss": 0.8389, "step": 34095 }, { - "epoch": 0.9675368898978434, + "epoch": 0.9661934313808835, "grad_norm": 0.0, - "learning_rate": 5.522385626902327e-08, - "loss": 0.8255, + "learning_rate": 5.988291255292211e-08, + "loss": 0.7691, "step": 34096 }, { - "epoch": 0.9675652667423382, + "epoch": 0.966221768823146, "grad_norm": 0.0, - "learning_rate": 5.512744202614939e-08, - "loss": 0.8207, + "learning_rate": 5.978266426717171e-08, + "loss": 0.7357, "step": 34097 }, { - "epoch": 0.9675936435868332, + "epoch": 0.9662501062654085, "grad_norm": 0.0, - "learning_rate": 5.5031111788061975e-08, - "loss": 0.8454, + "learning_rate": 5.968249971156193e-08, + "loss": 0.7601, "step": 34098 }, { - "epoch": 0.9676220204313281, + "epoch": 0.9662784437076709, "grad_norm": 0.0, - "learning_rate": 5.49348655555737e-08, - "loss": 0.7376, + "learning_rate": 5.958241888693872e-08, + "loss": 0.7629, "step": 34099 }, { - "epoch": 0.9676503972758229, + "epoch": 0.9663067811499334, "grad_norm": 0.0, - "learning_rate": 5.483870332949615e-08, - "loss": 0.8083, + "learning_rate": 5.948242179414365e-08, + "loss": 0.7767, "step": 34100 }, { - "epoch": 0.9676787741203178, + "epoch": 0.9663351185921959, "grad_norm": 0.0, - "learning_rate": 5.474262511064421e-08, - "loss": 0.7814, + "learning_rate": 5.9382508434020495e-08, + "loss": 0.7826, "step": 34101 }, { - "epoch": 0.9677071509648127, + "epoch": 0.9663634560344583, "grad_norm": 0.0, - "learning_rate": 5.4646630899829465e-08, - "loss": 0.7719, + "learning_rate": 5.9282678807408566e-08, + "loss": 0.7945, "step": 34102 }, { - "epoch": 0.9677355278093076, + "epoch": 0.9663917934767208, "grad_norm": 0.0, - "learning_rate": 5.455072069786016e-08, - "loss": 0.7903, + "learning_rate": 5.9182932915150536e-08, + "loss": 0.7129, "step": 34103 }, { - "epoch": 0.9677639046538025, + "epoch": 0.9664201309189833, "grad_norm": 0.0, - "learning_rate": 5.4454894505548974e-08, - "loss": 0.8054, + "learning_rate": 5.9083270758085733e-08, + "loss": 0.7967, "step": 34104 }, { - "epoch": 0.9677922814982974, + "epoch": 0.9664484683612458, "grad_norm": 0.0, - "learning_rate": 5.435915232370414e-08, - "loss": 0.827, + "learning_rate": 5.898369233705459e-08, + "loss": 0.7272, "step": 34105 }, { - "epoch": 0.9678206583427923, + "epoch": 0.9664768058035081, "grad_norm": 0.0, - "learning_rate": 5.426349415313503e-08, - "loss": 0.8256, + "learning_rate": 5.8884197652895325e-08, + "loss": 0.7454, "step": 34106 }, { - "epoch": 0.9678490351872872, + "epoch": 0.9665051432457706, "grad_norm": 0.0, - "learning_rate": 5.416791999464766e-08, - "loss": 0.8451, + "learning_rate": 5.878478670644616e-08, + "loss": 0.8106, "step": 34107 }, { - "epoch": 0.967877412031782, + "epoch": 0.9665334806880331, "grad_norm": 0.0, - "learning_rate": 5.4072429849052476e-08, - "loss": 0.8517, + "learning_rate": 5.8685459498543095e-08, + "loss": 0.7702, "step": 34108 }, { - "epoch": 0.967905788876277, + "epoch": 0.9665618181302955, "grad_norm": 0.0, - "learning_rate": 5.3977023717154406e-08, - "loss": 0.7726, + "learning_rate": 5.858621603002434e-08, + "loss": 0.7729, "step": 34109 }, { - "epoch": 0.9679341657207718, + "epoch": 0.966590155572558, "grad_norm": 0.0, - "learning_rate": 5.388170159975947e-08, - "loss": 0.8837, + "learning_rate": 5.848705630172591e-08, + "loss": 0.8131, "step": 34110 }, { - "epoch": 0.9679625425652667, + "epoch": 0.9666184930148205, "grad_norm": 0.0, - "learning_rate": 5.378646349767258e-08, - "loss": 0.7802, + "learning_rate": 5.8387980314482674e-08, + "loss": 0.775, "step": 34111 }, { - "epoch": 0.9679909194097617, + "epoch": 0.966646830457083, "grad_norm": 0.0, - "learning_rate": 5.369130941169865e-08, - "loss": 0.8074, + "learning_rate": 5.8288988069129525e-08, + "loss": 0.7579, "step": 34112 }, { - "epoch": 0.9680192962542565, + "epoch": 0.9666751678993454, "grad_norm": 0.0, - "learning_rate": 5.359623934264036e-08, - "loss": 0.7796, + "learning_rate": 5.819007956650024e-08, + "loss": 0.776, "step": 34113 }, { - "epoch": 0.9680476730987514, + "epoch": 0.9667035053416079, "grad_norm": 0.0, - "learning_rate": 5.350125329130373e-08, - "loss": 0.8125, + "learning_rate": 5.809125480742639e-08, + "loss": 0.7791, "step": 34114 }, { - "epoch": 0.9680760499432464, + "epoch": 0.9667318427838704, "grad_norm": 0.0, - "learning_rate": 5.340635125848814e-08, - "loss": 0.7898, + "learning_rate": 5.799251379274284e-08, + "loss": 0.7474, "step": 34115 }, { - "epoch": 0.9681044267877412, + "epoch": 0.9667601802261327, "grad_norm": 0.0, - "learning_rate": 5.3311533244996274e-08, - "loss": 0.8757, + "learning_rate": 5.7893856523280056e-08, + "loss": 0.8081, "step": 34116 }, { - "epoch": 0.9681328036322361, + "epoch": 0.9667885176683952, "grad_norm": 0.0, - "learning_rate": 5.321679925162859e-08, - "loss": 0.805, + "learning_rate": 5.7795282999869587e-08, + "loss": 0.8531, "step": 34117 }, { - "epoch": 0.9681611804767309, + "epoch": 0.9668168551106577, "grad_norm": 0.0, - "learning_rate": 5.312214927918669e-08, - "loss": 0.7827, + "learning_rate": 5.7696793223340764e-08, + "loss": 0.8298, "step": 34118 }, { - "epoch": 0.9681895573212259, + "epoch": 0.9668451925529201, "grad_norm": 0.0, - "learning_rate": 5.302758332846769e-08, - "loss": 0.7155, + "learning_rate": 5.759838719452404e-08, + "loss": 0.8799, "step": 34119 }, { - "epoch": 0.9682179341657208, + "epoch": 0.9668735299951826, "grad_norm": 0.0, - "learning_rate": 5.293310140027319e-08, - "loss": 0.8178, + "learning_rate": 5.7500064914247645e-08, + "loss": 0.8882, "step": 34120 }, { - "epoch": 0.9682463110102156, + "epoch": 0.9669018674374451, "grad_norm": 0.0, - "learning_rate": 5.2838703495400324e-08, - "loss": 0.8415, + "learning_rate": 5.740182638334091e-08, + "loss": 0.7037, "step": 34121 }, { - "epoch": 0.9682746878547106, + "epoch": 0.9669302048797076, "grad_norm": 0.0, - "learning_rate": 5.274438961464512e-08, - "loss": 0.7311, + "learning_rate": 5.730367160263095e-08, + "loss": 0.7829, "step": 34122 }, { - "epoch": 0.9683030646992055, + "epoch": 0.96695854232197, "grad_norm": 0.0, - "learning_rate": 5.265015975880472e-08, - "loss": 0.809, + "learning_rate": 5.7205600572943774e-08, + "loss": 0.8006, "step": 34123 }, { - "epoch": 0.9683314415437003, + "epoch": 0.9669868797642325, "grad_norm": 0.0, - "learning_rate": 5.255601392867626e-08, - "loss": 0.7969, + "learning_rate": 5.710761329510539e-08, + "loss": 0.7653, "step": 34124 }, { - "epoch": 0.9683598183881952, + "epoch": 0.967015217206495, "grad_norm": 0.0, - "learning_rate": 5.246195212505467e-08, - "loss": 0.7045, + "learning_rate": 5.700970976994291e-08, + "loss": 0.8198, "step": 34125 }, { - "epoch": 0.9683881952326902, + "epoch": 0.9670435546487574, "grad_norm": 0.0, - "learning_rate": 5.236797434873375e-08, - "loss": 0.7759, + "learning_rate": 5.691188999827901e-08, + "loss": 0.7614, "step": 34126 }, { - "epoch": 0.968416572077185, + "epoch": 0.9670718920910198, "grad_norm": 0.0, - "learning_rate": 5.227408060050842e-08, - "loss": 0.7921, + "learning_rate": 5.6814153980938593e-08, + "loss": 0.7936, "step": 34127 }, { - "epoch": 0.9684449489216799, + "epoch": 0.9671002295332823, "grad_norm": 0.0, - "learning_rate": 5.218027088117028e-08, - "loss": 0.7897, + "learning_rate": 5.6716501718745434e-08, + "loss": 0.6608, "step": 34128 }, { - "epoch": 0.9684733257661748, + "epoch": 0.9671285669755448, "grad_norm": 0.0, - "learning_rate": 5.208654519151202e-08, - "loss": 0.7964, + "learning_rate": 5.661893321252221e-08, + "loss": 0.8125, "step": 34129 }, { - "epoch": 0.9685017026106697, + "epoch": 0.9671569044178072, "grad_norm": 0.0, - "learning_rate": 5.199290353232633e-08, - "loss": 0.8771, + "learning_rate": 5.652144846308827e-08, + "loss": 0.6989, "step": 34130 }, { - "epoch": 0.9685300794551646, + "epoch": 0.9671852418600697, "grad_norm": 0.0, - "learning_rate": 5.189934590440482e-08, - "loss": 0.8663, + "learning_rate": 5.6424047471268507e-08, + "loss": 0.7764, "step": 34131 }, { - "epoch": 0.9685584562996595, + "epoch": 0.9672135793023322, "grad_norm": 0.0, - "learning_rate": 5.1805872308535734e-08, - "loss": 0.7556, + "learning_rate": 5.6326730237880043e-08, + "loss": 0.7264, "step": 34132 }, { - "epoch": 0.9685868331441544, + "epoch": 0.9672419167445946, "grad_norm": 0.0, - "learning_rate": 5.171248274550955e-08, - "loss": 0.7751, + "learning_rate": 5.622949676374445e-08, + "loss": 0.7917, "step": 34133 }, { - "epoch": 0.9686152099886493, + "epoch": 0.9672702541868571, "grad_norm": 0.0, - "learning_rate": 5.1619177216115646e-08, - "loss": 0.8193, + "learning_rate": 5.6132347049679955e-08, + "loss": 0.7803, "step": 34134 }, { - "epoch": 0.9686435868331441, + "epoch": 0.9672985916291196, "grad_norm": 0.0, - "learning_rate": 5.1525955721142275e-08, - "loss": 0.707, + "learning_rate": 5.603528109650591e-08, + "loss": 0.8512, "step": 34135 }, { - "epoch": 0.9686719636776391, + "epoch": 0.9673269290713821, "grad_norm": 0.0, - "learning_rate": 5.143281826137547e-08, - "loss": 0.7773, + "learning_rate": 5.593829890503832e-08, + "loss": 0.7644, "step": 34136 }, { - "epoch": 0.9687003405221339, + "epoch": 0.9673552665136445, "grad_norm": 0.0, - "learning_rate": 5.13397648376035e-08, - "loss": 0.8609, + "learning_rate": 5.584140047609654e-08, + "loss": 0.7991, "step": 34137 }, { - "epoch": 0.9687287173666288, + "epoch": 0.967383603955907, "grad_norm": 0.0, - "learning_rate": 5.1246795450611284e-08, - "loss": 0.8381, + "learning_rate": 5.574458581049436e-08, + "loss": 0.7426, "step": 34138 }, { - "epoch": 0.9687570942111238, + "epoch": 0.9674119413981694, "grad_norm": 0.0, - "learning_rate": 5.115391010118487e-08, - "loss": 0.7869, + "learning_rate": 5.5647854909047786e-08, + "loss": 0.7732, "step": 34139 }, { - "epoch": 0.9687854710556186, + "epoch": 0.9674402788404318, "grad_norm": 0.0, - "learning_rate": 5.106110879010917e-08, - "loss": 0.8179, + "learning_rate": 5.555120777257284e-08, + "loss": 0.7265, "step": 34140 }, { - "epoch": 0.9688138479001135, + "epoch": 0.9674686162826943, "grad_norm": 0.0, - "learning_rate": 5.0968391518166905e-08, - "loss": 0.8285, + "learning_rate": 5.5454644401883307e-08, + "loss": 0.7893, "step": 34141 }, { - "epoch": 0.9688422247446083, + "epoch": 0.9674969537249568, "grad_norm": 0.0, - "learning_rate": 5.087575828614077e-08, - "loss": 0.7724, + "learning_rate": 5.535816479779188e-08, + "loss": 0.8078, "step": 34142 }, { - "epoch": 0.9688706015891033, + "epoch": 0.9675252911672192, "grad_norm": 0.0, - "learning_rate": 5.078320909481571e-08, - "loss": 0.8446, + "learning_rate": 5.526176896111013e-08, + "loss": 0.7804, "step": 34143 }, { - "epoch": 0.9688989784335982, + "epoch": 0.9675536286094817, "grad_norm": 0.0, - "learning_rate": 5.06907439449722e-08, - "loss": 0.7396, + "learning_rate": 5.5165456892652955e-08, + "loss": 0.7966, "step": 34144 }, { - "epoch": 0.968927355278093, + "epoch": 0.9675819660517442, "grad_norm": 0.0, - "learning_rate": 5.0598362837390725e-08, - "loss": 0.8789, + "learning_rate": 5.506922859322972e-08, + "loss": 0.8063, "step": 34145 }, { - "epoch": 0.968955732122588, + "epoch": 0.9676103034940067, "grad_norm": 0.0, - "learning_rate": 5.0506065772851774e-08, - "loss": 0.8743, + "learning_rate": 5.497308406365087e-08, + "loss": 0.7552, "step": 34146 }, { - "epoch": 0.9689841089670829, + "epoch": 0.9676386409362691, "grad_norm": 0.0, - "learning_rate": 5.041385275213584e-08, - "loss": 0.8244, + "learning_rate": 5.4877023304726885e-08, + "loss": 0.6937, "step": 34147 }, { - "epoch": 0.9690124858115777, + "epoch": 0.9676669783785316, "grad_norm": 0.0, - "learning_rate": 5.032172377602229e-08, - "loss": 0.7456, + "learning_rate": 5.4781046317267103e-08, + "loss": 0.8536, "step": 34148 }, { - "epoch": 0.9690408626560727, + "epoch": 0.967695315820794, "grad_norm": 0.0, - "learning_rate": 5.0229678845287176e-08, - "loss": 0.8181, + "learning_rate": 5.468515310207867e-08, + "loss": 0.8423, "step": 34149 }, { - "epoch": 0.9690692395005676, + "epoch": 0.9677236532630564, "grad_norm": 0.0, - "learning_rate": 5.013771796070987e-08, - "loss": 0.7419, + "learning_rate": 5.458934365997093e-08, + "loss": 0.8633, "step": 34150 }, { - "epoch": 0.9690976163450624, + "epoch": 0.9677519907053189, "grad_norm": 0.0, - "learning_rate": 5.004584112306643e-08, - "loss": 0.7402, + "learning_rate": 5.449361799175101e-08, + "loss": 0.8203, "step": 34151 }, { - "epoch": 0.9691259931895573, + "epoch": 0.9677803281475814, "grad_norm": 0.0, - "learning_rate": 4.995404833313289e-08, - "loss": 0.8455, + "learning_rate": 5.4397976098223834e-08, + "loss": 0.7646, "step": 34152 }, { - "epoch": 0.9691543700340522, + "epoch": 0.9678086655898439, "grad_norm": 0.0, - "learning_rate": 4.9862339591684184e-08, - "loss": 0.838, + "learning_rate": 5.430241798019542e-08, + "loss": 0.7386, "step": 34153 }, { - "epoch": 0.9691827468785471, + "epoch": 0.9678370030321063, "grad_norm": 0.0, - "learning_rate": 4.9770714899496366e-08, - "loss": 0.8645, + "learning_rate": 5.420694363847068e-08, + "loss": 0.7814, "step": 34154 }, { - "epoch": 0.969211123723042, + "epoch": 0.9678653404743688, "grad_norm": 0.0, - "learning_rate": 4.967917425734214e-08, - "loss": 0.7957, + "learning_rate": 5.411155307385563e-08, + "loss": 0.8062, "step": 34155 }, { - "epoch": 0.9692395005675369, + "epoch": 0.9678936779166313, "grad_norm": 0.0, - "learning_rate": 4.958771766599535e-08, - "loss": 0.8548, + "learning_rate": 5.4016246287150745e-08, + "loss": 0.6538, "step": 34156 }, { - "epoch": 0.9692678774120318, + "epoch": 0.9679220153588937, "grad_norm": 0.0, - "learning_rate": 4.94963451262287e-08, - "loss": 0.9109, + "learning_rate": 5.392102327916093e-08, + "loss": 0.8068, "step": 34157 }, { - "epoch": 0.9692962542565267, + "epoch": 0.9679503528011562, "grad_norm": 0.0, - "learning_rate": 4.940505663881379e-08, - "loss": 0.8962, + "learning_rate": 5.382588405068889e-08, + "loss": 0.7716, "step": 34158 }, { - "epoch": 0.9693246311010215, + "epoch": 0.9679786902434186, "grad_norm": 0.0, - "learning_rate": 4.9313852204521114e-08, - "loss": 0.8448, + "learning_rate": 5.373082860253287e-08, + "loss": 0.8419, "step": 34159 }, { - "epoch": 0.9693530079455165, + "epoch": 0.9680070276856811, "grad_norm": 0.0, - "learning_rate": 4.9222731824122295e-08, - "loss": 0.9281, + "learning_rate": 5.3635856935497775e-08, + "loss": 0.8443, "step": 34160 }, { - "epoch": 0.9693813847900113, + "epoch": 0.9680353651279435, "grad_norm": 0.0, - "learning_rate": 4.913169549838559e-08, - "loss": 0.7923, + "learning_rate": 5.354096905037964e-08, + "loss": 0.7811, "step": 34161 }, { - "epoch": 0.9694097616345062, + "epoch": 0.968063702570206, "grad_norm": 0.0, - "learning_rate": 4.90407432280815e-08, - "loss": 0.7862, + "learning_rate": 5.344616494797894e-08, + "loss": 0.8582, "step": 34162 }, { - "epoch": 0.9694381384790012, + "epoch": 0.9680920400124685, "grad_norm": 0.0, - "learning_rate": 4.894987501397719e-08, - "loss": 0.7938, + "learning_rate": 5.3351444629096136e-08, + "loss": 0.7948, "step": 34163 }, { - "epoch": 0.969466515323496, + "epoch": 0.9681203774547309, "grad_norm": 0.0, - "learning_rate": 4.885909085684093e-08, - "loss": 0.7919, + "learning_rate": 5.3256808094527266e-08, + "loss": 0.8599, "step": 34164 }, { - "epoch": 0.9694948921679909, + "epoch": 0.9681487148969934, "grad_norm": 0.0, - "learning_rate": 4.8768390757439886e-08, - "loss": 0.8685, + "learning_rate": 5.316225534506947e-08, + "loss": 0.7778, "step": 34165 }, { - "epoch": 0.9695232690124859, + "epoch": 0.9681770523392559, "grad_norm": 0.0, - "learning_rate": 4.8677774716539007e-08, - "loss": 0.8542, + "learning_rate": 5.306778638151988e-08, + "loss": 0.7112, "step": 34166 }, { - "epoch": 0.9695516458569807, + "epoch": 0.9682053897815183, "grad_norm": 0.0, - "learning_rate": 4.858724273490323e-08, - "loss": 0.7398, + "learning_rate": 5.297340120467453e-08, + "loss": 0.8128, "step": 34167 }, { - "epoch": 0.9695800227014756, + "epoch": 0.9682337272237808, "grad_norm": 0.0, - "learning_rate": 4.8496794813299716e-08, - "loss": 0.7746, + "learning_rate": 5.287909981532835e-08, + "loss": 0.8099, "step": 34168 }, { - "epoch": 0.9696083995459704, + "epoch": 0.9682620646660433, "grad_norm": 0.0, - "learning_rate": 4.8406430952491204e-08, - "loss": 0.8001, + "learning_rate": 5.2784882214274025e-08, + "loss": 0.8637, "step": 34169 }, { - "epoch": 0.9696367763904654, + "epoch": 0.9682904021083057, "grad_norm": 0.0, - "learning_rate": 4.831615115323929e-08, - "loss": 0.8247, + "learning_rate": 5.269074840230648e-08, + "loss": 0.8098, "step": 34170 }, { - "epoch": 0.9696651532349603, + "epoch": 0.9683187395505681, "grad_norm": 0.0, - "learning_rate": 4.822595541631003e-08, - "loss": 0.8725, + "learning_rate": 5.2596698380219525e-08, + "loss": 0.793, "step": 34171 }, { - "epoch": 0.9696935300794551, + "epoch": 0.9683470769928306, "grad_norm": 0.0, - "learning_rate": 4.813584374246283e-08, - "loss": 0.6947, + "learning_rate": 5.250273214880475e-08, + "loss": 0.7939, "step": 34172 }, { - "epoch": 0.9697219069239501, + "epoch": 0.9683754144350931, "grad_norm": 0.0, - "learning_rate": 4.804581613245929e-08, - "loss": 0.7735, + "learning_rate": 5.240884970885263e-08, + "loss": 0.8619, "step": 34173 }, { - "epoch": 0.969750283768445, + "epoch": 0.9684037518773555, "grad_norm": 0.0, - "learning_rate": 4.7955872587059916e-08, - "loss": 0.8173, + "learning_rate": 5.2315051061154755e-08, + "loss": 0.7221, "step": 34174 }, { - "epoch": 0.9697786606129398, + "epoch": 0.968432089319618, "grad_norm": 0.0, - "learning_rate": 4.786601310702521e-08, - "loss": 0.8843, + "learning_rate": 5.2221336206500494e-08, + "loss": 0.832, "step": 34175 }, { - "epoch": 0.9698070374574347, + "epoch": 0.9684604267618805, "grad_norm": 0.0, - "learning_rate": 4.777623769311346e-08, - "loss": 0.8409, + "learning_rate": 5.212770514568144e-08, + "loss": 0.8309, "step": 34176 }, { - "epoch": 0.9698354143019297, + "epoch": 0.968488764204143, "grad_norm": 0.0, - "learning_rate": 4.768654634608294e-08, - "loss": 0.7643, + "learning_rate": 5.203415787948363e-08, + "loss": 0.8388, "step": 34177 }, { - "epoch": 0.9698637911464245, + "epoch": 0.9685171016464054, "grad_norm": 0.0, - "learning_rate": 4.7596939066691936e-08, - "loss": 0.9181, + "learning_rate": 5.1940694408696425e-08, + "loss": 0.7867, "step": 34178 }, { - "epoch": 0.9698921679909194, + "epoch": 0.9685454390886679, "grad_norm": 0.0, - "learning_rate": 4.7507415855697624e-08, - "loss": 0.8052, + "learning_rate": 5.184731473410698e-08, + "loss": 0.9363, "step": 34179 }, { - "epoch": 0.9699205448354143, + "epoch": 0.9685737765309304, "grad_norm": 0.0, - "learning_rate": 4.741797671385495e-08, - "loss": 0.8682, + "learning_rate": 5.1754018856501334e-08, + "loss": 0.8118, "step": 34180 }, { - "epoch": 0.9699489216799092, + "epoch": 0.9686021139731927, "grad_norm": 0.0, - "learning_rate": 4.732862164191887e-08, - "loss": 0.8443, + "learning_rate": 5.166080677666663e-08, + "loss": 0.7752, "step": 34181 }, { - "epoch": 0.9699772985244041, + "epoch": 0.9686304514154552, "grad_norm": 0.0, - "learning_rate": 4.723935064064766e-08, - "loss": 0.8548, + "learning_rate": 5.156767849538669e-08, + "loss": 0.8255, "step": 34182 }, { - "epoch": 0.9700056753688989, + "epoch": 0.9686587888577177, "grad_norm": 0.0, - "learning_rate": 4.715016371079073e-08, - "loss": 0.7491, + "learning_rate": 5.1474634013446435e-08, + "loss": 0.7935, "step": 34183 }, { - "epoch": 0.9700340522133939, + "epoch": 0.9686871262999802, "grad_norm": 0.0, - "learning_rate": 4.7061060853105247e-08, - "loss": 0.7965, + "learning_rate": 5.13816733316308e-08, + "loss": 0.7714, "step": 34184 }, { - "epoch": 0.9700624290578888, + "epoch": 0.9687154637422426, "grad_norm": 0.0, - "learning_rate": 4.6972042068341714e-08, - "loss": 0.7613, + "learning_rate": 5.128879645072027e-08, + "loss": 0.861, "step": 34185 }, { - "epoch": 0.9700908059023836, + "epoch": 0.9687438011845051, "grad_norm": 0.0, - "learning_rate": 4.6883107357252876e-08, - "loss": 0.7929, + "learning_rate": 5.119600337149866e-08, + "loss": 0.8221, "step": 34186 }, { - "epoch": 0.9701191827468786, + "epoch": 0.9687721386267676, "grad_norm": 0.0, - "learning_rate": 4.679425672059035e-08, - "loss": 0.7574, + "learning_rate": 5.110329409474757e-08, + "loss": 0.7983, "step": 34187 }, { - "epoch": 0.9701475595913734, + "epoch": 0.96880047606903, "grad_norm": 0.0, - "learning_rate": 4.670549015910464e-08, - "loss": 0.7514, + "learning_rate": 5.101066862124859e-08, + "loss": 0.8587, "step": 34188 }, { - "epoch": 0.9701759364358683, + "epoch": 0.9688288135112925, "grad_norm": 0.0, - "learning_rate": 4.661680767354404e-08, - "loss": 0.7705, + "learning_rate": 5.091812695178e-08, + "loss": 0.7656, "step": 34189 }, { - "epoch": 0.9702043132803633, + "epoch": 0.968857150953555, "grad_norm": 0.0, - "learning_rate": 4.6528209264657954e-08, - "loss": 0.7961, + "learning_rate": 5.0825669087123385e-08, + "loss": 0.7412, "step": 34190 }, { - "epoch": 0.9702326901248581, + "epoch": 0.9688854883958173, "grad_norm": 0.0, - "learning_rate": 4.643969493319689e-08, - "loss": 0.7296, + "learning_rate": 5.073329502805591e-08, + "loss": 0.7268, "step": 34191 }, { - "epoch": 0.970261066969353, + "epoch": 0.9689138258380798, "grad_norm": 0.0, - "learning_rate": 4.6351264679906915e-08, - "loss": 0.7671, + "learning_rate": 5.064100477535805e-08, + "loss": 0.7539, "step": 34192 }, { - "epoch": 0.9702894438138479, + "epoch": 0.9689421632803423, "grad_norm": 0.0, - "learning_rate": 4.626291850553521e-08, - "loss": 0.8024, + "learning_rate": 5.054879832980364e-08, + "loss": 0.9056, "step": 34193 }, { - "epoch": 0.9703178206583428, + "epoch": 0.9689705007226048, "grad_norm": 0.0, - "learning_rate": 4.617465641082786e-08, - "loss": 0.9087, + "learning_rate": 5.045667569217316e-08, + "loss": 0.7827, "step": 34194 }, { - "epoch": 0.9703461975028377, + "epoch": 0.9689988381648672, "grad_norm": 0.0, - "learning_rate": 4.608647839653091e-08, - "loss": 0.9028, + "learning_rate": 5.036463686323934e-08, + "loss": 0.7937, "step": 34195 }, { - "epoch": 0.9703745743473325, + "epoch": 0.9690271756071297, "grad_norm": 0.0, - "learning_rate": 4.5998384463389335e-08, - "loss": 0.7566, + "learning_rate": 5.027268184377931e-08, + "loss": 0.8509, "step": 34196 }, { - "epoch": 0.9704029511918275, + "epoch": 0.9690555130493922, "grad_norm": 0.0, - "learning_rate": 4.591037461214587e-08, - "loss": 0.8708, + "learning_rate": 5.018081063456803e-08, + "loss": 0.7776, "step": 34197 }, { - "epoch": 0.9704313280363224, + "epoch": 0.9690838504916546, "grad_norm": 0.0, - "learning_rate": 4.5822448843545477e-08, - "loss": 0.7508, + "learning_rate": 5.008902323637821e-08, + "loss": 0.7541, "step": 34198 }, { - "epoch": 0.9704597048808172, + "epoch": 0.9691121879339171, "grad_norm": 0.0, - "learning_rate": 4.573460715832978e-08, - "loss": 0.8865, + "learning_rate": 4.999731964998256e-08, + "loss": 0.7135, "step": 34199 }, { - "epoch": 0.9704880817253121, + "epoch": 0.9691405253761796, "grad_norm": 0.0, - "learning_rate": 4.5646849557242634e-08, - "loss": 0.7441, + "learning_rate": 4.99056998761549e-08, + "loss": 0.7452, "step": 34200 }, { - "epoch": 0.9705164585698071, + "epoch": 0.9691688628184421, "grad_norm": 0.0, - "learning_rate": 4.5559176041022336e-08, - "loss": 0.7716, + "learning_rate": 4.9814163915666843e-08, + "loss": 0.7872, "step": 34201 }, { - "epoch": 0.9705448354143019, + "epoch": 0.9691972002607044, "grad_norm": 0.0, - "learning_rate": 4.547158661041273e-08, - "loss": 0.7413, + "learning_rate": 4.972271176928778e-08, + "loss": 0.7982, "step": 34202 }, { - "epoch": 0.9705732122587968, + "epoch": 0.9692255377029669, "grad_norm": 0.0, - "learning_rate": 4.5384081266151014e-08, - "loss": 0.6967, + "learning_rate": 4.963134343779041e-08, + "loss": 0.8777, "step": 34203 }, { - "epoch": 0.9706015891032918, + "epoch": 0.9692538751452294, "grad_norm": 0.0, - "learning_rate": 4.52966600089777e-08, - "loss": 0.8009, + "learning_rate": 4.954005892194191e-08, + "loss": 0.884, "step": 34204 }, { - "epoch": 0.9706299659477866, + "epoch": 0.9692822125874918, "grad_norm": 0.0, - "learning_rate": 4.5209322839631085e-08, - "loss": 0.8088, + "learning_rate": 4.9448858222513884e-08, + "loss": 0.7809, "step": 34205 }, { - "epoch": 0.9706583427922815, + "epoch": 0.9693105500297543, "grad_norm": 0.0, - "learning_rate": 4.5122069758849476e-08, - "loss": 0.8278, + "learning_rate": 4.935774134027238e-08, + "loss": 0.7448, "step": 34206 }, { - "epoch": 0.9706867196367764, + "epoch": 0.9693388874720168, "grad_norm": 0.0, - "learning_rate": 4.5034900767368937e-08, - "loss": 0.7237, + "learning_rate": 4.9266708275985675e-08, + "loss": 0.8746, "step": 34207 }, { - "epoch": 0.9707150964812713, + "epoch": 0.9693672249142793, "grad_norm": 0.0, - "learning_rate": 4.494781586592556e-08, - "loss": 0.8656, + "learning_rate": 4.917575903042093e-08, + "loss": 0.7782, "step": 34208 }, { - "epoch": 0.9707434733257662, + "epoch": 0.9693955623565417, "grad_norm": 0.0, - "learning_rate": 4.486081505525541e-08, - "loss": 0.7885, + "learning_rate": 4.9084893604344205e-08, + "loss": 0.8106, "step": 34209 }, { - "epoch": 0.970771850170261, + "epoch": 0.9694238997988042, "grad_norm": 0.0, - "learning_rate": 4.477389833609347e-08, - "loss": 0.6816, + "learning_rate": 4.899411199852044e-08, + "loss": 0.806, "step": 34210 }, { - "epoch": 0.970800227014756, + "epoch": 0.9694522372410667, "grad_norm": 0.0, - "learning_rate": 4.4687065709174695e-08, - "loss": 0.8631, + "learning_rate": 4.890341421371458e-08, + "loss": 0.7378, "step": 34211 }, { - "epoch": 0.9708286038592508, + "epoch": 0.969480574683329, "grad_norm": 0.0, - "learning_rate": 4.4600317175231834e-08, - "loss": 0.7935, + "learning_rate": 4.881280025069046e-08, + "loss": 0.8186, "step": 34212 }, { - "epoch": 0.9708569807037457, + "epoch": 0.9695089121255915, "grad_norm": 0.0, - "learning_rate": 4.451365273499653e-08, - "loss": 0.7816, + "learning_rate": 4.87222701102108e-08, + "loss": 0.8073, "step": 34213 }, { - "epoch": 0.9708853575482407, + "epoch": 0.969537249567854, "grad_norm": 0.0, - "learning_rate": 4.442707238920263e-08, - "loss": 0.8195, + "learning_rate": 4.8631823793039436e-08, + "loss": 0.738, "step": 34214 }, { - "epoch": 0.9709137343927355, + "epoch": 0.9695655870101164, "grad_norm": 0.0, - "learning_rate": 4.434057613858067e-08, - "loss": 0.7133, + "learning_rate": 4.8541461299936864e-08, + "loss": 0.9238, "step": 34215 }, { - "epoch": 0.9709421112372304, + "epoch": 0.9695939244523789, "grad_norm": 0.0, - "learning_rate": 4.425416398386229e-08, - "loss": 0.7493, + "learning_rate": 4.8451182631665814e-08, + "loss": 0.689, "step": 34216 }, { - "epoch": 0.9709704880817253, + "epoch": 0.9696222618946414, "grad_norm": 0.0, - "learning_rate": 4.416783592577578e-08, - "loss": 0.7227, + "learning_rate": 4.836098778898457e-08, + "loss": 0.7962, "step": 34217 }, { - "epoch": 0.9709988649262202, + "epoch": 0.9696505993369039, "grad_norm": 0.0, - "learning_rate": 4.408159196505057e-08, - "loss": 0.7742, + "learning_rate": 4.827087677265585e-08, + "loss": 0.8631, "step": 34218 }, { - "epoch": 0.9710272417707151, + "epoch": 0.9696789367791663, "grad_norm": 0.0, - "learning_rate": 4.399543210241608e-08, - "loss": 0.764, + "learning_rate": 4.818084958343572e-08, + "loss": 0.7884, "step": 34219 }, { - "epoch": 0.97105561861521, + "epoch": 0.9697072742214288, "grad_norm": 0.0, - "learning_rate": 4.3909356338599496e-08, - "loss": 0.7528, + "learning_rate": 4.8090906222084674e-08, + "loss": 0.8161, "step": 34220 }, { - "epoch": 0.9710839954597049, + "epoch": 0.9697356116636913, "grad_norm": 0.0, - "learning_rate": 4.3823364674328016e-08, - "loss": 0.8484, + "learning_rate": 4.8001046689358785e-08, + "loss": 0.7752, "step": 34221 }, { - "epoch": 0.9711123723041998, + "epoch": 0.9697639491059536, "grad_norm": 0.0, - "learning_rate": 4.373745711032884e-08, - "loss": 0.7308, + "learning_rate": 4.7911270986016335e-08, + "loss": 0.7616, "step": 34222 }, { - "epoch": 0.9711407491486946, + "epoch": 0.9697922865482161, "grad_norm": 0.0, - "learning_rate": 4.365163364732694e-08, - "loss": 0.7972, + "learning_rate": 4.7821579112812266e-08, + "loss": 0.7211, "step": 34223 }, { - "epoch": 0.9711691259931896, + "epoch": 0.9698206239904786, "grad_norm": 0.0, - "learning_rate": 4.3565894286047295e-08, - "loss": 0.8668, + "learning_rate": 4.7731971070503754e-08, + "loss": 0.832, "step": 34224 }, { - "epoch": 0.9711975028376845, + "epoch": 0.9698489614327411, "grad_norm": 0.0, - "learning_rate": 4.348023902721377e-08, - "loss": 0.7906, + "learning_rate": 4.764244685984354e-08, + "loss": 0.8593, "step": 34225 }, { - "epoch": 0.9712258796821793, + "epoch": 0.9698772988750035, "grad_norm": 0.0, - "learning_rate": 4.3394667871550225e-08, - "loss": 0.8284, + "learning_rate": 4.755300648158656e-08, + "loss": 0.7968, "step": 34226 }, { - "epoch": 0.9712542565266742, + "epoch": 0.969905636317266, "grad_norm": 0.0, - "learning_rate": 4.3309180819779416e-08, - "loss": 0.8043, + "learning_rate": 4.7463649936486665e-08, + "loss": 0.7791, "step": 34227 }, { - "epoch": 0.9712826333711692, + "epoch": 0.9699339737595285, "grad_norm": 0.0, - "learning_rate": 4.3223777872624104e-08, - "loss": 0.7293, + "learning_rate": 4.7374377225296585e-08, + "loss": 0.7929, "step": 34228 }, { - "epoch": 0.971311010215664, + "epoch": 0.9699623112017909, "grad_norm": 0.0, - "learning_rate": 4.313845903080371e-08, - "loss": 0.8201, + "learning_rate": 4.728518834876683e-08, + "loss": 0.7919, "step": 34229 }, { - "epoch": 0.9713393870601589, + "epoch": 0.9699906486440534, "grad_norm": 0.0, - "learning_rate": 4.30532242950421e-08, - "loss": 0.8501, + "learning_rate": 4.719608330765124e-08, + "loss": 0.8457, "step": 34230 }, { - "epoch": 0.9713677639046538, + "epoch": 0.9700189860863159, "grad_norm": 0.0, - "learning_rate": 4.296807366605649e-08, - "loss": 0.81, + "learning_rate": 4.710706210269811e-08, + "loss": 0.7922, "step": 34231 }, { - "epoch": 0.9713961407491487, + "epoch": 0.9700473235285784, "grad_norm": 0.0, - "learning_rate": 4.288300714456739e-08, - "loss": 0.7531, + "learning_rate": 4.7018124734657944e-08, + "loss": 0.9264, "step": 34232 }, { - "epoch": 0.9714245175936436, + "epoch": 0.9700756609708407, "grad_norm": 0.0, - "learning_rate": 4.279802473129313e-08, - "loss": 0.8423, + "learning_rate": 4.692927120428015e-08, + "loss": 0.7922, "step": 34233 }, { - "epoch": 0.9714528944381384, + "epoch": 0.9701039984131032, "grad_norm": 0.0, - "learning_rate": 4.271312642695202e-08, - "loss": 0.8292, + "learning_rate": 4.684050151231412e-08, + "loss": 0.7718, "step": 34234 }, { - "epoch": 0.9714812712826334, + "epoch": 0.9701323358553657, "grad_norm": 0.0, - "learning_rate": 4.262831223226016e-08, - "loss": 0.8382, + "learning_rate": 4.675181565950482e-08, + "loss": 0.8553, "step": 34235 }, { - "epoch": 0.9715096481271283, + "epoch": 0.9701606732976281, "grad_norm": 0.0, - "learning_rate": 4.254358214793475e-08, - "loss": 0.7819, + "learning_rate": 4.6663213646602754e-08, + "loss": 0.7535, "step": 34236 }, { - "epoch": 0.9715380249716231, + "epoch": 0.9701890107398906, "grad_norm": 0.0, - "learning_rate": 4.245893617469077e-08, - "loss": 0.8349, + "learning_rate": 4.657469547435178e-08, + "loss": 0.8593, "step": 34237 }, { - "epoch": 0.9715664018161181, + "epoch": 0.9702173481821531, "grad_norm": 0.0, - "learning_rate": 4.237437431324432e-08, - "loss": 0.7056, + "learning_rate": 4.6486261143497967e-08, + "loss": 0.8166, "step": 34238 }, { - "epoch": 0.971594778660613, + "epoch": 0.9702456856244155, "grad_norm": 0.0, - "learning_rate": 4.228989656430926e-08, - "loss": 0.7056, + "learning_rate": 4.639791065478738e-08, + "loss": 0.8183, "step": 34239 }, { - "epoch": 0.9716231555051078, + "epoch": 0.970274023066678, "grad_norm": 0.0, - "learning_rate": 4.220550292859948e-08, - "loss": 0.8302, + "learning_rate": 4.630964400896165e-08, + "loss": 0.7692, "step": 34240 }, { - "epoch": 0.9716515323496028, + "epoch": 0.9703023605089405, "grad_norm": 0.0, - "learning_rate": 4.212119340682663e-08, - "loss": 0.7799, + "learning_rate": 4.622146120676796e-08, + "loss": 0.7816, "step": 34241 }, { - "epoch": 0.9716799091940976, + "epoch": 0.970330697951203, "grad_norm": 0.0, - "learning_rate": 4.203696799970347e-08, - "loss": 0.8773, + "learning_rate": 4.613336224894571e-08, + "loss": 0.7807, "step": 34242 }, { - "epoch": 0.9717082860385925, + "epoch": 0.9703590353934654, "grad_norm": 0.0, - "learning_rate": 4.195282670794276e-08, - "loss": 0.8294, + "learning_rate": 4.604534713623876e-08, + "loss": 0.677, "step": 34243 }, { - "epoch": 0.9717366628830874, + "epoch": 0.9703873728357278, "grad_norm": 0.0, - "learning_rate": 4.186876953225283e-08, - "loss": 0.7473, + "learning_rate": 4.595741586938873e-08, + "loss": 0.7557, "step": 34244 }, { - "epoch": 0.9717650397275823, + "epoch": 0.9704157102779903, "grad_norm": 0.0, - "learning_rate": 4.178479647334644e-08, - "loss": 0.8187, + "learning_rate": 4.586956844913504e-08, + "loss": 0.8582, "step": 34245 }, { - "epoch": 0.9717934165720772, + "epoch": 0.9704440477202527, "grad_norm": 0.0, - "learning_rate": 4.170090753193079e-08, - "loss": 0.7756, + "learning_rate": 4.5781804876219303e-08, + "loss": 0.7719, "step": 34246 }, { - "epoch": 0.971821793416572, + "epoch": 0.9704723851625152, "grad_norm": 0.0, - "learning_rate": 4.1617102708715326e-08, - "loss": 0.7715, + "learning_rate": 4.569412515137872e-08, + "loss": 0.7619, "step": 34247 }, { - "epoch": 0.971850170261067, + "epoch": 0.9705007226047777, "grad_norm": 0.0, - "learning_rate": 4.153338200440837e-08, - "loss": 0.7256, + "learning_rate": 4.5606529275353806e-08, + "loss": 0.8923, "step": 34248 }, { - "epoch": 0.9718785471055619, + "epoch": 0.9705290600470402, "grad_norm": 0.0, - "learning_rate": 4.144974541971603e-08, - "loss": 0.768, + "learning_rate": 4.5519017248880635e-08, + "loss": 0.7976, "step": 34249 }, { - "epoch": 0.9719069239500567, + "epoch": 0.9705573974893026, "grad_norm": 0.0, - "learning_rate": 4.1366192955345495e-08, - "loss": 0.821, + "learning_rate": 4.5431589072698625e-08, + "loss": 0.7302, "step": 34250 }, { - "epoch": 0.9719353007945516, + "epoch": 0.9705857349315651, "grad_norm": 0.0, - "learning_rate": 4.1282724612001777e-08, - "loss": 0.7279, + "learning_rate": 4.534424474754162e-08, + "loss": 0.7486, "step": 34251 }, { - "epoch": 0.9719636776390466, + "epoch": 0.9706140723738276, "grad_norm": 0.0, - "learning_rate": 4.1199340390390974e-08, - "loss": 0.7844, + "learning_rate": 4.525698427414793e-08, + "loss": 0.8322, "step": 34252 }, { - "epoch": 0.9719920544835414, + "epoch": 0.97064240981609, "grad_norm": 0.0, - "learning_rate": 4.1116040291218075e-08, - "loss": 0.8043, + "learning_rate": 4.51698076532503e-08, + "loss": 0.7882, "step": 34253 }, { - "epoch": 0.9720204313280363, + "epoch": 0.9706707472583525, "grad_norm": 0.0, - "learning_rate": 4.1032824315184736e-08, - "loss": 0.8093, + "learning_rate": 4.508271488558369e-08, + "loss": 0.8494, "step": 34254 }, { - "epoch": 0.9720488081725313, + "epoch": 0.970699084700615, "grad_norm": 0.0, - "learning_rate": 4.094969246299485e-08, - "loss": 0.7822, + "learning_rate": 4.499570597188307e-08, + "loss": 0.6901, "step": 34255 }, { - "epoch": 0.9720771850170261, + "epoch": 0.9707274221428774, "grad_norm": 0.0, - "learning_rate": 4.086664473535007e-08, - "loss": 0.7578, + "learning_rate": 4.490878091287898e-08, + "loss": 0.7619, "step": 34256 }, { - "epoch": 0.972105561861521, + "epoch": 0.9707557595851398, "grad_norm": 0.0, - "learning_rate": 4.078368113295317e-08, - "loss": 0.7027, + "learning_rate": 4.482193970930637e-08, + "loss": 0.7961, "step": 34257 }, { - "epoch": 0.9721339387060159, + "epoch": 0.9707840970274023, "grad_norm": 0.0, - "learning_rate": 4.070080165650359e-08, - "loss": 0.7774, + "learning_rate": 4.473518236189467e-08, + "loss": 0.8048, "step": 34258 }, { - "epoch": 0.9721623155505108, + "epoch": 0.9708124344696648, "grad_norm": 0.0, - "learning_rate": 4.061800630670298e-08, - "loss": 0.9104, + "learning_rate": 4.464850887137551e-08, + "loss": 0.8007, "step": 34259 }, { - "epoch": 0.9721906923950057, + "epoch": 0.9708407719119272, "grad_norm": 0.0, - "learning_rate": 4.053529508424969e-08, - "loss": 0.8421, + "learning_rate": 4.4561919238478304e-08, + "loss": 0.7752, "step": 34260 }, { - "epoch": 0.9722190692395005, + "epoch": 0.9708691093541897, "grad_norm": 0.0, - "learning_rate": 4.045266798984204e-08, - "loss": 0.8627, + "learning_rate": 4.447541346393358e-08, + "loss": 0.7957, "step": 34261 }, { - "epoch": 0.9722474460839955, + "epoch": 0.9708974467964522, "grad_norm": 0.0, - "learning_rate": 4.0370125024178364e-08, - "loss": 0.8214, + "learning_rate": 4.438899154846854e-08, + "loss": 0.8154, "step": 34262 }, { - "epoch": 0.9722758229284904, + "epoch": 0.9709257842387146, "grad_norm": 0.0, - "learning_rate": 4.028766618795588e-08, - "loss": 0.7895, + "learning_rate": 4.43026534928126e-08, + "loss": 0.8538, "step": 34263 }, { - "epoch": 0.9723041997729852, + "epoch": 0.9709541216809771, "grad_norm": 0.0, - "learning_rate": 4.020529148187069e-08, - "loss": 0.8158, + "learning_rate": 4.421639929769295e-08, + "loss": 0.7242, "step": 34264 }, { - "epoch": 0.9723325766174802, + "epoch": 0.9709824591232395, "grad_norm": 0.0, - "learning_rate": 4.012300090662002e-08, - "loss": 0.8515, + "learning_rate": 4.413022896383457e-08, + "loss": 0.8319, "step": 34265 }, { - "epoch": 0.972360953461975, + "epoch": 0.971010796565502, "grad_norm": 0.0, - "learning_rate": 4.004079446289777e-08, - "loss": 0.8139, + "learning_rate": 4.404414249196465e-08, + "loss": 0.8373, "step": 34266 }, { - "epoch": 0.9723893303064699, + "epoch": 0.9710391340077644, "grad_norm": 0.0, - "learning_rate": 3.995867215139892e-08, - "loss": 0.7266, + "learning_rate": 4.395813988280817e-08, + "loss": 0.8127, "step": 34267 }, { - "epoch": 0.9724177071509648, + "epoch": 0.9710674714500269, "grad_norm": 0.0, - "learning_rate": 3.987663397281627e-08, - "loss": 0.808, + "learning_rate": 4.3872221137089e-08, + "loss": 0.8365, "step": 34268 }, { - "epoch": 0.9724460839954597, + "epoch": 0.9710958088922894, "grad_norm": 0.0, - "learning_rate": 3.9794679927843696e-08, - "loss": 0.766, + "learning_rate": 4.3786386255531e-08, + "loss": 0.8457, "step": 34269 }, { - "epoch": 0.9724744608399546, + "epoch": 0.9711241463345518, "grad_norm": 0.0, - "learning_rate": 3.9712810017173975e-08, - "loss": 0.7858, + "learning_rate": 4.3700635238856927e-08, + "loss": 0.7833, "step": 34270 }, { - "epoch": 0.9725028376844495, + "epoch": 0.9711524837768143, "grad_norm": 0.0, - "learning_rate": 3.9631024241497676e-08, - "loss": 0.711, + "learning_rate": 4.3614968087790644e-08, + "loss": 0.7677, "step": 34271 }, { - "epoch": 0.9725312145289444, + "epoch": 0.9711808212190768, "grad_norm": 0.0, - "learning_rate": 3.954932260150535e-08, - "loss": 0.821, + "learning_rate": 4.352938480305269e-08, + "loss": 0.849, "step": 34272 }, { - "epoch": 0.9725595913734393, + "epoch": 0.9712091586613393, "grad_norm": 0.0, - "learning_rate": 3.9467705097887556e-08, - "loss": 0.7693, + "learning_rate": 4.3443885385363597e-08, + "loss": 0.8651, "step": 34273 }, { - "epoch": 0.9725879682179341, + "epoch": 0.9712374961036017, "grad_norm": 0.0, - "learning_rate": 3.9386171731334855e-08, - "loss": 0.7266, + "learning_rate": 4.335846983544389e-08, + "loss": 0.7828, "step": 34274 }, { - "epoch": 0.9726163450624291, + "epoch": 0.9712658335458642, "grad_norm": 0.0, - "learning_rate": 3.930472250253336e-08, - "loss": 0.8834, + "learning_rate": 4.3273138154013014e-08, + "loss": 0.7978, "step": 34275 }, { - "epoch": 0.972644721906924, + "epoch": 0.9712941709881266, "grad_norm": 0.0, - "learning_rate": 3.922335741217365e-08, - "loss": 0.7155, + "learning_rate": 4.318789034179038e-08, + "loss": 0.7834, "step": 34276 }, { - "epoch": 0.9726730987514188, + "epoch": 0.971322508430389, "grad_norm": 0.0, - "learning_rate": 3.9142076460941816e-08, - "loss": 0.8442, + "learning_rate": 4.31027263994932e-08, + "loss": 0.7797, "step": 34277 }, { - "epoch": 0.9727014755959137, + "epoch": 0.9713508458726515, "grad_norm": 0.0, - "learning_rate": 3.906087964952399e-08, - "loss": 0.9036, + "learning_rate": 4.301764632783978e-08, + "loss": 0.8148, "step": 34278 }, { - "epoch": 0.9727298524404087, + "epoch": 0.971379183314914, "grad_norm": 0.0, - "learning_rate": 3.8979766978608505e-08, - "loss": 0.7888, + "learning_rate": 4.293265012754511e-08, + "loss": 0.788, "step": 34279 }, { - "epoch": 0.9727582292849035, + "epoch": 0.9714075207571765, "grad_norm": 0.0, - "learning_rate": 3.8898738448877036e-08, - "loss": 0.9247, + "learning_rate": 4.2847737799326384e-08, + "loss": 0.8311, "step": 34280 }, { - "epoch": 0.9727866061293984, + "epoch": 0.9714358581994389, "grad_norm": 0.0, - "learning_rate": 3.881779406101571e-08, - "loss": 0.716, + "learning_rate": 4.27629093438986e-08, + "loss": 0.7301, "step": 34281 }, { - "epoch": 0.9728149829738933, + "epoch": 0.9714641956417014, "grad_norm": 0.0, - "learning_rate": 3.8736933815708425e-08, - "loss": 0.7918, + "learning_rate": 4.2678164761976724e-08, + "loss": 0.8444, "step": 34282 }, { - "epoch": 0.9728433598183882, + "epoch": 0.9714925330839639, "grad_norm": 0.0, - "learning_rate": 3.865615771363906e-08, - "loss": 0.9132, + "learning_rate": 4.259350405427465e-08, + "loss": 0.7507, "step": 34283 }, { - "epoch": 0.9728717366628831, + "epoch": 0.9715208705262263, "grad_norm": 0.0, - "learning_rate": 3.8575465755488206e-08, - "loss": 0.8458, + "learning_rate": 4.250892722150401e-08, + "loss": 0.7811, "step": 34284 }, { - "epoch": 0.9729001135073779, + "epoch": 0.9715492079684888, "grad_norm": 0.0, - "learning_rate": 3.849485794193752e-08, - "loss": 0.8454, + "learning_rate": 4.242443426437981e-08, + "loss": 0.8019, "step": 34285 }, { - "epoch": 0.9729284903518729, + "epoch": 0.9715775454107513, "grad_norm": 0.0, - "learning_rate": 3.8414334273669804e-08, - "loss": 0.9217, + "learning_rate": 4.234002518361036e-08, + "loss": 0.8314, "step": 34286 }, { - "epoch": 0.9729568671963678, + "epoch": 0.9716058828530136, "grad_norm": 0.0, - "learning_rate": 3.83338947513634e-08, - "loss": 0.6878, + "learning_rate": 4.2255699979910637e-08, + "loss": 0.7985, "step": 34287 }, { - "epoch": 0.9729852440408626, + "epoch": 0.9716342202952761, "grad_norm": 0.0, - "learning_rate": 3.8253539375698865e-08, - "loss": 0.908, + "learning_rate": 4.2171458653986755e-08, + "loss": 0.8033, "step": 34288 }, { - "epoch": 0.9730136208853576, + "epoch": 0.9716625577375386, "grad_norm": 0.0, - "learning_rate": 3.817326814735345e-08, - "loss": 0.8893, + "learning_rate": 4.2087301206552576e-08, + "loss": 0.7527, "step": 34289 }, { - "epoch": 0.9730419977298524, + "epoch": 0.9716908951798011, "grad_norm": 0.0, - "learning_rate": 3.809308106700771e-08, - "loss": 0.8402, + "learning_rate": 4.20032276383131e-08, + "loss": 0.8228, "step": 34290 }, { - "epoch": 0.9730703745743473, + "epoch": 0.9717192326220635, "grad_norm": 0.0, - "learning_rate": 3.801297813533555e-08, - "loss": 0.6819, + "learning_rate": 4.1919237949978876e-08, + "loss": 0.8319, "step": 34291 }, { - "epoch": 0.9730987514188423, + "epoch": 0.971747570064326, "grad_norm": 0.0, - "learning_rate": 3.7932959353017554e-08, - "loss": 0.8689, + "learning_rate": 4.183533214225599e-08, + "loss": 0.8304, "step": 34292 }, { - "epoch": 0.9731271282633371, + "epoch": 0.9717759075065885, "grad_norm": 0.0, - "learning_rate": 3.7853024720726494e-08, - "loss": 0.8704, + "learning_rate": 4.175151021585277e-08, + "loss": 0.7194, "step": 34293 }, { - "epoch": 0.973155505107832, + "epoch": 0.9718042449488509, "grad_norm": 0.0, - "learning_rate": 3.777317423913962e-08, - "loss": 0.8583, + "learning_rate": 4.1667772171474216e-08, + "loss": 0.8839, "step": 34294 }, { - "epoch": 0.9731838819523269, + "epoch": 0.9718325823911134, "grad_norm": 0.0, - "learning_rate": 3.769340790892973e-08, - "loss": 0.7805, + "learning_rate": 4.1584118009826424e-08, + "loss": 0.8002, "step": 34295 }, { - "epoch": 0.9732122587968218, + "epoch": 0.9718609198333759, "grad_norm": 0.0, - "learning_rate": 3.761372573077182e-08, - "loss": 0.9217, + "learning_rate": 4.150054773161327e-08, + "loss": 0.7978, "step": 34296 }, { - "epoch": 0.9732406356413167, + "epoch": 0.9718892572756384, "grad_norm": 0.0, - "learning_rate": 3.753412770533871e-08, - "loss": 0.7889, + "learning_rate": 4.1417061337539753e-08, + "loss": 0.9084, "step": 34297 }, { - "epoch": 0.9732690124858115, + "epoch": 0.9719175947179007, "grad_norm": 0.0, - "learning_rate": 3.7454613833302066e-08, - "loss": 0.7636, + "learning_rate": 4.133365882830753e-08, + "loss": 0.8314, "step": 34298 }, { - "epoch": 0.9732973893303065, + "epoch": 0.9719459321601632, "grad_norm": 0.0, - "learning_rate": 3.7375184115334694e-08, - "loss": 0.7683, + "learning_rate": 4.1250340204619375e-08, + "loss": 0.7739, "step": 34299 }, { - "epoch": 0.9733257661748014, + "epoch": 0.9719742696024257, "grad_norm": 0.0, - "learning_rate": 3.7295838552107166e-08, - "loss": 0.7733, + "learning_rate": 4.116710546717917e-08, + "loss": 0.8654, "step": 34300 }, { - "epoch": 0.9733541430192962, + "epoch": 0.9720026070446881, "grad_norm": 0.0, - "learning_rate": 3.721657714428895e-08, - "loss": 0.806, + "learning_rate": 4.108395461668524e-08, + "loss": 0.7424, "step": 34301 }, { - "epoch": 0.9733825198637911, + "epoch": 0.9720309444869506, "grad_norm": 0.0, - "learning_rate": 3.713739989255061e-08, - "loss": 0.8155, + "learning_rate": 4.100088765384036e-08, + "loss": 0.7191, "step": 34302 }, { - "epoch": 0.9734108967082861, + "epoch": 0.9720592819292131, "grad_norm": 0.0, - "learning_rate": 3.7058306797560504e-08, - "loss": 0.7427, + "learning_rate": 4.091790457934286e-08, + "loss": 0.8351, "step": 34303 }, { - "epoch": 0.9734392735527809, + "epoch": 0.9720876193714755, "grad_norm": 0.0, - "learning_rate": 3.6979297859986995e-08, - "loss": 0.8068, + "learning_rate": 4.083500539389107e-08, + "loss": 0.7713, "step": 34304 }, { - "epoch": 0.9734676503972758, + "epoch": 0.972115956813738, "grad_norm": 0.0, - "learning_rate": 3.690037308049732e-08, - "loss": 0.7269, + "learning_rate": 4.075219009818554e-08, + "loss": 0.9171, "step": 34305 }, { - "epoch": 0.9734960272417708, + "epoch": 0.9721442942560005, "grad_norm": 0.0, - "learning_rate": 3.682153245975761e-08, - "loss": 0.8166, + "learning_rate": 4.066945869292238e-08, + "loss": 0.8491, "step": 34306 }, { - "epoch": 0.9735244040862656, + "epoch": 0.972172631698263, "grad_norm": 0.0, - "learning_rate": 3.674277599843401e-08, - "loss": 0.7792, + "learning_rate": 4.0586811178797704e-08, + "loss": 0.8345, "step": 34307 }, { - "epoch": 0.9735527809307605, + "epoch": 0.9722009691405253, "grad_norm": 0.0, - "learning_rate": 3.6664103697192647e-08, - "loss": 0.7718, + "learning_rate": 4.050424755650984e-08, + "loss": 0.7275, "step": 34308 }, { - "epoch": 0.9735811577752554, + "epoch": 0.9722293065827878, "grad_norm": 0.0, - "learning_rate": 3.6585515556697424e-08, - "loss": 0.7497, + "learning_rate": 4.042176782675267e-08, + "loss": 0.7594, "step": 34309 }, { - "epoch": 0.9736095346197503, + "epoch": 0.9722576440250503, "grad_norm": 0.0, - "learning_rate": 3.650701157761227e-08, - "loss": 0.8413, + "learning_rate": 4.03393719902212e-08, + "loss": 0.6586, "step": 34310 }, { - "epoch": 0.9736379114642452, + "epoch": 0.9722859814673127, "grad_norm": 0.0, - "learning_rate": 3.6428591760599985e-08, - "loss": 0.8054, + "learning_rate": 4.025706004760932e-08, + "loss": 0.7297, "step": 34311 }, { - "epoch": 0.97366628830874, + "epoch": 0.9723143189095752, "grad_norm": 0.0, - "learning_rate": 3.635025610632337e-08, - "loss": 0.7774, + "learning_rate": 4.017483199961092e-08, + "loss": 0.8015, "step": 34312 }, { - "epoch": 0.973694665153235, + "epoch": 0.9723426563518377, "grad_norm": 0.0, - "learning_rate": 3.6272004615443e-08, - "loss": 0.7762, + "learning_rate": 4.0092687846919885e-08, + "loss": 0.7686, "step": 34313 }, { - "epoch": 0.9737230419977299, + "epoch": 0.9723709937941002, "grad_norm": 0.0, - "learning_rate": 3.619383728862169e-08, - "loss": 0.7717, + "learning_rate": 4.001062759022456e-08, + "loss": 0.8086, "step": 34314 }, { - "epoch": 0.9737514188422247, + "epoch": 0.9723993312363626, "grad_norm": 0.0, - "learning_rate": 3.611575412652002e-08, - "loss": 0.9269, + "learning_rate": 3.992865123021883e-08, + "loss": 0.842, "step": 34315 }, { - "epoch": 0.9737797956867197, + "epoch": 0.9724276686786251, "grad_norm": 0.0, - "learning_rate": 3.603775512979524e-08, - "loss": 0.792, + "learning_rate": 3.984675876759325e-08, + "loss": 0.7915, "step": 34316 }, { - "epoch": 0.9738081725312145, + "epoch": 0.9724560061208876, "grad_norm": 0.0, - "learning_rate": 3.595984029910793e-08, - "loss": 0.7815, + "learning_rate": 3.976495020303617e-08, + "loss": 0.8775, "step": 34317 }, { - "epoch": 0.9738365493757094, + "epoch": 0.9724843435631499, "grad_norm": 0.0, - "learning_rate": 3.588200963511646e-08, - "loss": 0.7892, + "learning_rate": 3.968322553723813e-08, + "loss": 0.8187, "step": 34318 }, { - "epoch": 0.9738649262202043, + "epoch": 0.9725126810054124, "grad_norm": 0.0, - "learning_rate": 3.5804263138475847e-08, - "loss": 0.7672, + "learning_rate": 3.960158477088749e-08, + "loss": 0.8891, "step": 34319 }, { - "epoch": 0.9738933030646992, + "epoch": 0.9725410184476749, "grad_norm": 0.0, - "learning_rate": 3.572660080984558e-08, - "loss": 0.805, + "learning_rate": 3.9520027904670354e-08, + "loss": 0.899, "step": 34320 }, { - "epoch": 0.9739216799091941, + "epoch": 0.9725693558899374, "grad_norm": 0.0, - "learning_rate": 3.564902264988068e-08, - "loss": 0.7714, + "learning_rate": 3.9438554939275064e-08, + "loss": 0.7366, "step": 34321 }, { - "epoch": 0.973950056753689, + "epoch": 0.9725976933321998, "grad_norm": 0.0, - "learning_rate": 3.5571528659236185e-08, - "loss": 0.7774, + "learning_rate": 3.9357165875387735e-08, + "loss": 0.6672, "step": 34322 }, { - "epoch": 0.9739784335981839, + "epoch": 0.9726260307744623, "grad_norm": 0.0, - "learning_rate": 3.549411883856713e-08, - "loss": 0.8128, + "learning_rate": 3.927586071369338e-08, + "loss": 0.6958, "step": 34323 }, { - "epoch": 0.9740068104426788, + "epoch": 0.9726543682167248, "grad_norm": 0.0, - "learning_rate": 3.541679318852742e-08, - "loss": 0.8017, + "learning_rate": 3.9194639454878115e-08, + "loss": 0.7664, "step": 34324 }, { - "epoch": 0.9740351872871736, + "epoch": 0.9726827056589872, "grad_norm": 0.0, - "learning_rate": 3.533955170976988e-08, - "loss": 0.7116, + "learning_rate": 3.911350209962472e-08, + "loss": 0.8311, "step": 34325 }, { - "epoch": 0.9740635641316686, + "epoch": 0.9727110431012497, "grad_norm": 0.0, - "learning_rate": 3.526239440294621e-08, - "loss": 0.7435, + "learning_rate": 3.9032448648617106e-08, + "loss": 0.7833, "step": 34326 }, { - "epoch": 0.9740919409761635, + "epoch": 0.9727393805435122, "grad_norm": 0.0, - "learning_rate": 3.5185321268710324e-08, - "loss": 0.8322, + "learning_rate": 3.895147910253916e-08, + "loss": 0.8379, "step": 34327 }, { - "epoch": 0.9741203178206583, + "epoch": 0.9727677179857745, "grad_norm": 0.0, - "learning_rate": 3.5108332307710605e-08, - "loss": 0.7969, + "learning_rate": 3.887059346207034e-08, + "loss": 0.6923, "step": 34328 }, { - "epoch": 0.9741486946651532, + "epoch": 0.972796055428037, "grad_norm": 0.0, - "learning_rate": 3.503142752059985e-08, - "loss": 0.8282, + "learning_rate": 3.878979172789454e-08, + "loss": 0.7864, "step": 34329 }, { - "epoch": 0.9741770715096482, + "epoch": 0.9728243928702995, "grad_norm": 0.0, - "learning_rate": 3.495460690802643e-08, - "loss": 0.8279, + "learning_rate": 3.870907390069012e-08, + "loss": 0.8196, "step": 34330 }, { - "epoch": 0.974205448354143, + "epoch": 0.972852730312562, "grad_norm": 0.0, - "learning_rate": 3.487787047063873e-08, - "loss": 0.8055, + "learning_rate": 3.862843998113874e-08, + "loss": 0.7467, "step": 34331 }, { - "epoch": 0.9742338251986379, + "epoch": 0.9728810677548244, "grad_norm": 0.0, - "learning_rate": 3.480121820908622e-08, - "loss": 0.727, + "learning_rate": 3.854788996991987e-08, + "loss": 0.7753, "step": 34332 }, { - "epoch": 0.9742622020431329, + "epoch": 0.9729094051970869, "grad_norm": 0.0, - "learning_rate": 3.4724650124016154e-08, - "loss": 0.8191, + "learning_rate": 3.846742386771074e-08, + "loss": 0.7587, "step": 34333 }, { - "epoch": 0.9742905788876277, + "epoch": 0.9729377426393494, "grad_norm": 0.0, - "learning_rate": 3.46481662160747e-08, - "loss": 0.7554, + "learning_rate": 3.83870416751897e-08, + "loss": 0.6986, "step": 34334 }, { - "epoch": 0.9743189557321226, + "epoch": 0.9729660800816118, "grad_norm": 0.0, - "learning_rate": 3.4571766485907986e-08, - "loss": 0.8233, + "learning_rate": 3.8306743393032864e-08, + "loss": 0.8426, "step": 34335 }, { - "epoch": 0.9743473325766174, + "epoch": 0.9729944175238743, "grad_norm": 0.0, - "learning_rate": 3.449545093416107e-08, - "loss": 0.7627, + "learning_rate": 3.822652902191859e-08, + "loss": 0.7897, "step": 34336 }, { - "epoch": 0.9743757094211124, + "epoch": 0.9730227549661368, "grad_norm": 0.0, - "learning_rate": 3.441921956148009e-08, - "loss": 0.7977, + "learning_rate": 3.814639856252078e-08, + "loss": 0.7995, "step": 34337 }, { - "epoch": 0.9744040862656073, + "epoch": 0.9730510924083993, "grad_norm": 0.0, - "learning_rate": 3.434307236850676e-08, - "loss": 0.8358, + "learning_rate": 3.8066352015515564e-08, + "loss": 0.7112, "step": 34338 }, { - "epoch": 0.9744324631101021, + "epoch": 0.9730794298506616, "grad_norm": 0.0, - "learning_rate": 3.426700935588612e-08, - "loss": 0.7777, + "learning_rate": 3.798638938157684e-08, + "loss": 0.7998, "step": 34339 }, { - "epoch": 0.9744608399545971, + "epoch": 0.9731077672929241, "grad_norm": 0.0, - "learning_rate": 3.4191030524259874e-08, - "loss": 0.8683, + "learning_rate": 3.7906510661377406e-08, + "loss": 0.8952, "step": 34340 }, { - "epoch": 0.974489216799092, + "epoch": 0.9731361047351866, "grad_norm": 0.0, - "learning_rate": 3.4115135874270845e-08, - "loss": 0.8567, + "learning_rate": 3.7826715855591167e-08, + "loss": 0.8029, "step": 34341 }, { - "epoch": 0.9745175936435868, + "epoch": 0.973164442177449, "grad_norm": 0.0, - "learning_rate": 3.403932540655852e-08, - "loss": 0.8204, + "learning_rate": 3.774700496489092e-08, + "loss": 0.7651, "step": 34342 }, { - "epoch": 0.9745459704880817, + "epoch": 0.9731927796197115, "grad_norm": 0.0, - "learning_rate": 3.39635991217635e-08, - "loss": 0.6762, + "learning_rate": 3.766737798994502e-08, + "loss": 0.6938, "step": 34343 }, { - "epoch": 0.9745743473325766, + "epoch": 0.973221117061974, "grad_norm": 0.0, - "learning_rate": 3.388795702052638e-08, - "loss": 0.8548, + "learning_rate": 3.758783493142737e-08, + "loss": 0.8183, "step": 34344 }, { - "epoch": 0.9746027241770715, + "epoch": 0.9732494545042365, "grad_norm": 0.0, - "learning_rate": 3.381239910348555e-08, - "loss": 0.7434, + "learning_rate": 3.7508375790006326e-08, + "loss": 0.8593, "step": 34345 }, { - "epoch": 0.9746311010215664, + "epoch": 0.9732777919464989, "grad_norm": 0.0, - "learning_rate": 3.3736925371279375e-08, - "loss": 0.7881, + "learning_rate": 3.742900056635246e-08, + "loss": 0.7594, "step": 34346 }, { - "epoch": 0.9746594778660613, + "epoch": 0.9733061293887614, "grad_norm": 0.0, - "learning_rate": 3.366153582454623e-08, - "loss": 0.7861, + "learning_rate": 3.7349709261133015e-08, + "loss": 0.8851, "step": 34347 }, { - "epoch": 0.9746878547105562, + "epoch": 0.9733344668310239, "grad_norm": 0.0, - "learning_rate": 3.358623046392118e-08, - "loss": 0.7915, + "learning_rate": 3.727050187501746e-08, + "loss": 0.7161, "step": 34348 }, { - "epoch": 0.974716231555051, + "epoch": 0.9733628042732863, "grad_norm": 0.0, - "learning_rate": 3.3511009290042585e-08, - "loss": 0.7864, + "learning_rate": 3.719137840867082e-08, + "loss": 0.7563, "step": 34349 }, { - "epoch": 0.974744608399546, + "epoch": 0.9733911417155487, "grad_norm": 0.0, - "learning_rate": 3.3435872303543284e-08, - "loss": 0.8337, + "learning_rate": 3.7112338862761446e-08, + "loss": 0.786, "step": 34350 }, { - "epoch": 0.9747729852440409, + "epoch": 0.9734194791578112, "grad_norm": 0.0, - "learning_rate": 3.3360819505059424e-08, - "loss": 0.8161, + "learning_rate": 3.7033383237954356e-08, + "loss": 0.7938, "step": 34351 }, { - "epoch": 0.9748013620885357, + "epoch": 0.9734478166000736, "grad_norm": 0.0, - "learning_rate": 3.3285850895224956e-08, - "loss": 0.8548, + "learning_rate": 3.695451153491458e-08, + "loss": 0.865, "step": 34352 }, { - "epoch": 0.9748297389330306, + "epoch": 0.9734761540423361, "grad_norm": 0.0, - "learning_rate": 3.32109664746727e-08, - "loss": 0.7683, + "learning_rate": 3.6875723754307145e-08, + "loss": 0.8558, "step": 34353 }, { - "epoch": 0.9748581157775256, + "epoch": 0.9735044914845986, "grad_norm": 0.0, - "learning_rate": 3.3136166244035486e-08, - "loss": 0.7524, + "learning_rate": 3.679701989679374e-08, + "loss": 0.771, "step": 34354 }, { - "epoch": 0.9748864926220204, + "epoch": 0.9735328289268611, "grad_norm": 0.0, - "learning_rate": 3.3061450203945025e-08, - "loss": 0.8143, + "learning_rate": 3.67183999630405e-08, + "loss": 0.8317, "step": 34355 }, { - "epoch": 0.9749148694665153, + "epoch": 0.9735611663691235, "grad_norm": 0.0, - "learning_rate": 3.298681835503304e-08, - "loss": 0.8228, + "learning_rate": 3.663986395370689e-08, + "loss": 0.8143, "step": 34356 }, { - "epoch": 0.9749432463110103, + "epoch": 0.973589503811386, "grad_norm": 0.0, - "learning_rate": 3.2912270697929013e-08, - "loss": 0.8817, + "learning_rate": 3.6561411869455723e-08, + "loss": 0.9196, "step": 34357 }, { - "epoch": 0.9749716231555051, + "epoch": 0.9736178412536485, "grad_norm": 0.0, - "learning_rate": 3.283780723326358e-08, - "loss": 0.7329, + "learning_rate": 3.648304371094869e-08, + "loss": 0.8411, "step": 34358 }, { - "epoch": 0.975, + "epoch": 0.9736461786959109, "grad_norm": 0.0, - "learning_rate": 3.276342796166399e-08, - "loss": 0.7585, + "learning_rate": 3.640475947884303e-08, + "loss": 0.8316, "step": 34359 }, { - "epoch": 0.9750283768444948, + "epoch": 0.9736745161381734, "grad_norm": 0.0, - "learning_rate": 3.2689132883760856e-08, - "loss": 0.9182, + "learning_rate": 3.6326559173801565e-08, + "loss": 0.9518, "step": 34360 }, { - "epoch": 0.9750567536889898, + "epoch": 0.9737028535804358, "grad_norm": 0.0, - "learning_rate": 3.261492200018035e-08, - "loss": 0.7124, + "learning_rate": 3.624844279648043e-08, + "loss": 0.7239, "step": 34361 }, { - "epoch": 0.9750851305334847, + "epoch": 0.9737311910226983, "grad_norm": 0.0, - "learning_rate": 3.254079531154975e-08, - "loss": 0.8807, + "learning_rate": 3.61704103475391e-08, + "loss": 0.8007, "step": 34362 }, { - "epoch": 0.9751135073779795, + "epoch": 0.9737595284649607, "grad_norm": 0.0, - "learning_rate": 3.2466752818495205e-08, - "loss": 0.8649, + "learning_rate": 3.6092461827633704e-08, + "loss": 0.7947, "step": 34363 }, { - "epoch": 0.9751418842224745, + "epoch": 0.9737878659072232, "grad_norm": 0.0, - "learning_rate": 3.23927945216429e-08, - "loss": 0.776, + "learning_rate": 3.601459723742262e-08, + "loss": 0.8158, "step": 34364 }, { - "epoch": 0.9751702610669694, + "epoch": 0.9738162033494857, "grad_norm": 0.0, - "learning_rate": 3.231892042161566e-08, - "loss": 0.7925, + "learning_rate": 3.5936816577559765e-08, + "loss": 0.7482, "step": 34365 }, { - "epoch": 0.9751986379114642, + "epoch": 0.9738445407917481, "grad_norm": 0.0, - "learning_rate": 3.2245130519038547e-08, - "loss": 0.8355, + "learning_rate": 3.5859119848701276e-08, + "loss": 0.8337, "step": 34366 }, { - "epoch": 0.9752270147559592, + "epoch": 0.9738728782340106, "grad_norm": 0.0, - "learning_rate": 3.21714248145355e-08, - "loss": 0.7988, + "learning_rate": 3.5781507051502185e-08, + "loss": 0.8426, "step": 34367 }, { - "epoch": 0.975255391600454, + "epoch": 0.9739012156762731, "grad_norm": 0.0, - "learning_rate": 3.209780330872714e-08, - "loss": 0.9551, + "learning_rate": 3.570397818661531e-08, + "loss": 0.88, "step": 34368 }, { - "epoch": 0.9752837684449489, + "epoch": 0.9739295531185356, "grad_norm": 0.0, - "learning_rate": 3.202426600223851e-08, - "loss": 0.7473, + "learning_rate": 3.562653325469345e-08, + "loss": 0.7941, "step": 34369 }, { - "epoch": 0.9753121452894438, + "epoch": 0.973957890560798, "grad_norm": 0.0, - "learning_rate": 3.195081289568802e-08, - "loss": 0.8631, + "learning_rate": 3.554917225638943e-08, + "loss": 0.7552, "step": 34370 }, { - "epoch": 0.9753405221339387, + "epoch": 0.9739862280030605, "grad_norm": 0.0, - "learning_rate": 3.187744398969739e-08, - "loss": 0.8624, + "learning_rate": 3.5471895192354947e-08, + "loss": 0.7843, "step": 34371 }, { - "epoch": 0.9753688989784336, + "epoch": 0.9740145654453229, "grad_norm": 0.0, - "learning_rate": 3.180415928488612e-08, - "loss": 0.8519, + "learning_rate": 3.5394702063241695e-08, + "loss": 0.71, "step": 34372 }, { - "epoch": 0.9753972758229285, + "epoch": 0.9740429028875853, "grad_norm": 0.0, - "learning_rate": 3.17309587818726e-08, - "loss": 0.8909, + "learning_rate": 3.531759286969805e-08, + "loss": 0.8141, "step": 34373 }, { - "epoch": 0.9754256526674234, + "epoch": 0.9740712403298478, "grad_norm": 0.0, - "learning_rate": 3.1657842481276346e-08, - "loss": 0.8277, + "learning_rate": 3.5240567612375706e-08, + "loss": 0.8811, "step": 34374 }, { - "epoch": 0.9754540295119183, + "epoch": 0.9740995777721103, "grad_norm": 0.0, - "learning_rate": 3.1584810383714634e-08, - "loss": 0.7173, + "learning_rate": 3.5163626291921934e-08, + "loss": 0.8598, "step": 34375 }, { - "epoch": 0.9754824063564131, + "epoch": 0.9741279152143727, "grad_norm": 0.0, - "learning_rate": 3.1511862489803644e-08, - "loss": 0.7509, + "learning_rate": 3.50867689089851e-08, + "loss": 0.8634, "step": 34376 }, { - "epoch": 0.975510783200908, + "epoch": 0.9741562526566352, "grad_norm": 0.0, - "learning_rate": 3.143899880015955e-08, - "loss": 0.8255, + "learning_rate": 3.5009995464212465e-08, + "loss": 0.7919, "step": 34377 }, { - "epoch": 0.975539160045403, + "epoch": 0.9741845900988977, "grad_norm": 0.0, - "learning_rate": 3.1366219315399625e-08, - "loss": 0.861, + "learning_rate": 3.4933305958251285e-08, + "loss": 0.773, "step": 34378 }, { - "epoch": 0.9755675368898978, + "epoch": 0.9742129275411602, "grad_norm": 0.0, - "learning_rate": 3.1293524036136724e-08, - "loss": 0.8392, + "learning_rate": 3.485670039174882e-08, + "loss": 0.8571, "step": 34379 }, { - "epoch": 0.9755959137343927, + "epoch": 0.9742412649834226, "grad_norm": 0.0, - "learning_rate": 3.1220912962985905e-08, - "loss": 0.798, + "learning_rate": 3.4780178765346785e-08, + "loss": 0.8449, "step": 34380 }, { - "epoch": 0.9756242905788877, + "epoch": 0.974269602425685, "grad_norm": 0.0, - "learning_rate": 3.1148386096560015e-08, - "loss": 0.6946, + "learning_rate": 3.470374107969243e-08, + "loss": 0.7977, "step": 34381 }, { - "epoch": 0.9756526674233825, + "epoch": 0.9742979398679475, "grad_norm": 0.0, - "learning_rate": 3.107594343747189e-08, - "loss": 0.8101, + "learning_rate": 3.4627387335429696e-08, + "loss": 0.7961, "step": 34382 }, { - "epoch": 0.9756810442678774, + "epoch": 0.9743262773102099, "grad_norm": 0.0, - "learning_rate": 3.100358498633327e-08, - "loss": 0.791, + "learning_rate": 3.455111753320028e-08, + "loss": 0.7821, "step": 34383 }, { - "epoch": 0.9757094211123724, + "epoch": 0.9743546147524724, "grad_norm": 0.0, - "learning_rate": 3.093131074375477e-08, - "loss": 0.8989, + "learning_rate": 3.4474931673647014e-08, + "loss": 0.7451, "step": 34384 }, { - "epoch": 0.9757377979568672, + "epoch": 0.9743829521947349, "grad_norm": 0.0, - "learning_rate": 3.085912071034813e-08, - "loss": 0.8335, + "learning_rate": 3.439882975741271e-08, + "loss": 0.8447, "step": 34385 }, { - "epoch": 0.9757661748013621, + "epoch": 0.9744112896369974, "grad_norm": 0.0, - "learning_rate": 3.078701488672287e-08, - "loss": 0.7396, + "learning_rate": 3.4322811785137965e-08, + "loss": 0.7228, "step": 34386 }, { - "epoch": 0.9757945516458569, + "epoch": 0.9744396270792598, "grad_norm": 0.0, - "learning_rate": 3.071499327348848e-08, - "loss": 0.8053, + "learning_rate": 3.424687775746227e-08, + "loss": 0.7593, "step": 34387 }, { - "epoch": 0.9758229284903519, + "epoch": 0.9744679645215223, "grad_norm": 0.0, - "learning_rate": 3.064305587125227e-08, - "loss": 0.7455, + "learning_rate": 3.4171027675026225e-08, + "loss": 0.7963, "step": 34388 }, { - "epoch": 0.9758513053348468, + "epoch": 0.9744963019637848, "grad_norm": 0.0, - "learning_rate": 3.057120268062153e-08, - "loss": 0.779, + "learning_rate": 3.4095261538468204e-08, + "loss": 0.7653, "step": 34389 }, { - "epoch": 0.9758796821793416, + "epoch": 0.9745246394060472, "grad_norm": 0.0, - "learning_rate": 3.049943370220465e-08, - "loss": 0.7872, + "learning_rate": 3.401957934842659e-08, + "loss": 0.8083, "step": 34390 }, { - "epoch": 0.9759080590238366, + "epoch": 0.9745529768483097, "grad_norm": 0.0, - "learning_rate": 3.042774893660671e-08, - "loss": 0.7243, + "learning_rate": 3.394398110553754e-08, + "loss": 0.8945, "step": 34391 }, { - "epoch": 0.9759364358683315, + "epoch": 0.9745813142905722, "grad_norm": 0.0, - "learning_rate": 3.0356148384435014e-08, - "loss": 0.7816, + "learning_rate": 3.3868466810440534e-08, + "loss": 0.8145, "step": 34392 }, { - "epoch": 0.9759648127128263, + "epoch": 0.9746096517328346, "grad_norm": 0.0, - "learning_rate": 3.028463204629128e-08, - "loss": 0.8369, + "learning_rate": 3.379303646377064e-08, + "loss": 0.8031, "step": 34393 }, { - "epoch": 0.9759931895573212, + "epoch": 0.974637989175097, "grad_norm": 0.0, - "learning_rate": 3.021319992278282e-08, - "loss": 0.7908, + "learning_rate": 3.371769006616177e-08, + "loss": 0.8548, "step": 34394 }, { - "epoch": 0.9760215664018161, + "epoch": 0.9746663266173595, "grad_norm": 0.0, - "learning_rate": 3.014185201451136e-08, - "loss": 0.8074, + "learning_rate": 3.3642427618250094e-08, + "loss": 0.7399, "step": 34395 }, { - "epoch": 0.976049943246311, + "epoch": 0.974694664059622, "grad_norm": 0.0, - "learning_rate": 3.0070588322079765e-08, - "loss": 0.8333, + "learning_rate": 3.356724912066955e-08, + "loss": 0.7927, "step": 34396 }, { - "epoch": 0.9760783200908059, + "epoch": 0.9747230015018844, "grad_norm": 0.0, - "learning_rate": 2.9999408846089763e-08, - "loss": 0.8864, + "learning_rate": 3.349215457405186e-08, + "loss": 0.7842, "step": 34397 }, { - "epoch": 0.9761066969353008, + "epoch": 0.9747513389441469, "grad_norm": 0.0, - "learning_rate": 2.99283135871431e-08, - "loss": 0.8532, + "learning_rate": 3.3417143979032064e-08, + "loss": 0.804, "step": 34398 }, { - "epoch": 0.9761350737797957, + "epoch": 0.9747796763864094, "grad_norm": 0.0, - "learning_rate": 2.9857302545840403e-08, - "loss": 0.7575, + "learning_rate": 3.3342217336239656e-08, + "loss": 0.7916, "step": 34399 }, { - "epoch": 0.9761634506242906, + "epoch": 0.9748080138286718, "grad_norm": 0.0, - "learning_rate": 2.97863757227812e-08, - "loss": 0.6773, + "learning_rate": 3.326737464630747e-08, + "loss": 0.957, "step": 34400 }, { - "epoch": 0.9761918274687855, + "epoch": 0.9748363512709343, "grad_norm": 0.0, - "learning_rate": 2.971553311856501e-08, - "loss": 0.7933, + "learning_rate": 3.3192615909865e-08, + "loss": 0.755, "step": 34401 }, { - "epoch": 0.9762202043132804, + "epoch": 0.9748646887131968, "grad_norm": 0.0, - "learning_rate": 2.9644774733789128e-08, - "loss": 0.775, + "learning_rate": 3.3117941127541745e-08, + "loss": 0.7043, "step": 34402 }, { - "epoch": 0.9762485811577752, + "epoch": 0.9748930261554593, "grad_norm": 0.0, - "learning_rate": 2.9574100569051967e-08, - "loss": 0.7714, + "learning_rate": 3.30433502999683e-08, + "loss": 0.781, "step": 34403 }, { - "epoch": 0.9762769580022701, + "epoch": 0.9749213635977216, "grad_norm": 0.0, - "learning_rate": 2.9503510624950825e-08, - "loss": 0.8212, + "learning_rate": 3.2968843427770844e-08, + "loss": 0.7032, "step": 34404 }, { - "epoch": 0.9763053348467651, + "epoch": 0.9749497010399841, "grad_norm": 0.0, - "learning_rate": 2.9433004902081897e-08, - "loss": 0.7227, + "learning_rate": 3.2894420511578874e-08, + "loss": 0.7976, "step": 34405 }, { - "epoch": 0.9763337116912599, + "epoch": 0.9749780384822466, "grad_norm": 0.0, - "learning_rate": 2.9362583401041367e-08, - "loss": 0.7456, + "learning_rate": 3.2820081552017435e-08, + "loss": 0.7777, "step": 34406 }, { - "epoch": 0.9763620885357548, + "epoch": 0.975006375924509, "grad_norm": 0.0, - "learning_rate": 2.9292246122422096e-08, - "loss": 0.7368, + "learning_rate": 3.274582654971381e-08, + "loss": 0.7013, "step": 34407 }, { - "epoch": 0.9763904653802498, + "epoch": 0.9750347133667715, "grad_norm": 0.0, - "learning_rate": 2.9221993066819166e-08, - "loss": 0.8409, + "learning_rate": 3.2671655505294165e-08, + "loss": 0.7557, "step": 34408 }, { - "epoch": 0.9764188422247446, + "epoch": 0.975063050809034, "grad_norm": 0.0, - "learning_rate": 2.915182423482765e-08, - "loss": 0.8012, + "learning_rate": 3.259756841938244e-08, + "loss": 0.7699, "step": 34409 }, { - "epoch": 0.9764472190692395, + "epoch": 0.9750913882512965, "grad_norm": 0.0, - "learning_rate": 2.9081739627038197e-08, - "loss": 0.7487, + "learning_rate": 3.252356529260148e-08, + "loss": 0.7805, "step": 34410 }, { - "epoch": 0.9764755959137343, + "epoch": 0.9751197256935589, "grad_norm": 0.0, - "learning_rate": 2.9011739244043657e-08, - "loss": 0.7617, + "learning_rate": 3.244964612557633e-08, + "loss": 0.748, "step": 34411 }, { - "epoch": 0.9765039727582293, + "epoch": 0.9751480631358214, "grad_norm": 0.0, - "learning_rate": 2.894182308643467e-08, - "loss": 0.825, + "learning_rate": 3.237581091892983e-08, + "loss": 0.766, "step": 34412 }, { - "epoch": 0.9765323496027242, + "epoch": 0.9751764005780839, "grad_norm": 0.0, - "learning_rate": 2.8871991154802993e-08, - "loss": 0.8644, + "learning_rate": 3.230205967328259e-08, + "loss": 0.8309, "step": 34413 }, { - "epoch": 0.976560726447219, + "epoch": 0.9752047380203462, "grad_norm": 0.0, - "learning_rate": 2.8802243449737034e-08, - "loss": 0.8084, + "learning_rate": 3.222839238925635e-08, + "loss": 0.827, "step": 34414 }, { - "epoch": 0.976589103291714, + "epoch": 0.9752330754626087, "grad_norm": 0.0, - "learning_rate": 2.8732579971827434e-08, - "loss": 0.867, + "learning_rate": 3.215480906747281e-08, + "loss": 0.7973, "step": 34415 }, { - "epoch": 0.9766174801362089, + "epoch": 0.9752614129048712, "grad_norm": 0.0, - "learning_rate": 2.8663000721662615e-08, - "loss": 0.8447, + "learning_rate": 3.208130970855039e-08, + "loss": 0.7363, "step": 34416 }, { - "epoch": 0.9766458569807037, + "epoch": 0.9752897503471337, "grad_norm": 0.0, - "learning_rate": 2.859350569982877e-08, - "loss": 0.8022, + "learning_rate": 3.200789431310858e-08, + "loss": 0.929, "step": 34417 }, { - "epoch": 0.9766742338251987, + "epoch": 0.9753180877893961, "grad_norm": 0.0, - "learning_rate": 2.8524094906914323e-08, - "loss": 0.7365, + "learning_rate": 3.193456288176577e-08, + "loss": 0.8075, "step": 34418 }, { - "epoch": 0.9767026106696935, + "epoch": 0.9753464252316586, "grad_norm": 0.0, - "learning_rate": 2.8454768343504356e-08, - "loss": 0.7827, + "learning_rate": 3.186131541513926e-08, + "loss": 0.7647, "step": 34419 }, { - "epoch": 0.9767309875141884, + "epoch": 0.9753747626739211, "grad_norm": 0.0, - "learning_rate": 2.8385526010185073e-08, - "loss": 0.8255, + "learning_rate": 3.178815191384632e-08, + "loss": 0.7872, "step": 34420 }, { - "epoch": 0.9767593643586833, + "epoch": 0.9754031001161835, "grad_norm": 0.0, - "learning_rate": 2.8316367907542663e-08, - "loss": 0.9013, + "learning_rate": 3.171507237850424e-08, + "loss": 0.7138, "step": 34421 }, { - "epoch": 0.9767877412031782, + "epoch": 0.975431437558446, "grad_norm": 0.0, - "learning_rate": 2.824729403615889e-08, - "loss": 0.8164, + "learning_rate": 3.1642076809726973e-08, + "loss": 0.8861, "step": 34422 }, { - "epoch": 0.9768161180476731, + "epoch": 0.9754597750007085, "grad_norm": 0.0, - "learning_rate": 2.817830439661995e-08, - "loss": 0.7357, + "learning_rate": 3.156916520813069e-08, + "loss": 0.7856, "step": 34423 }, { - "epoch": 0.976844494892168, + "epoch": 0.9754881124429708, "grad_norm": 0.0, - "learning_rate": 2.8109398989505376e-08, - "loss": 0.8814, + "learning_rate": 3.149633757432824e-08, + "loss": 0.849, "step": 34424 }, { - "epoch": 0.9768728717366629, + "epoch": 0.9755164498852333, "grad_norm": 0.0, - "learning_rate": 2.804057781540026e-08, - "loss": 0.8669, + "learning_rate": 3.142359390893468e-08, + "loss": 0.9491, "step": 34425 }, { - "epoch": 0.9769012485811578, + "epoch": 0.9755447873274958, "grad_norm": 0.0, - "learning_rate": 2.7971840874884138e-08, - "loss": 0.731, + "learning_rate": 3.1350934212561746e-08, + "loss": 0.686, "step": 34426 }, { - "epoch": 0.9769296254256526, + "epoch": 0.9755731247697583, "grad_norm": 0.0, - "learning_rate": 2.7903188168538763e-08, - "loss": 0.7604, + "learning_rate": 3.127835848582117e-08, + "loss": 0.8862, "step": 34427 }, { - "epoch": 0.9769580022701475, + "epoch": 0.9756014622120207, "grad_norm": 0.0, - "learning_rate": 2.7834619696942565e-08, - "loss": 0.7338, + "learning_rate": 3.120586672932469e-08, + "loss": 0.8623, "step": 34428 }, { - "epoch": 0.9769863791146425, + "epoch": 0.9756297996542832, "grad_norm": 0.0, - "learning_rate": 2.776613546067619e-08, - "loss": 0.928, + "learning_rate": 3.1133458943684024e-08, + "loss": 0.8602, "step": 34429 }, { - "epoch": 0.9770147559591373, + "epoch": 0.9756581370965457, "grad_norm": 0.0, - "learning_rate": 2.7697735460316954e-08, - "loss": 0.7461, + "learning_rate": 3.106113512950759e-08, + "loss": 0.8011, "step": 34430 }, { - "epoch": 0.9770431328036322, + "epoch": 0.9756864745388081, "grad_norm": 0.0, - "learning_rate": 2.7629419696444392e-08, - "loss": 0.8523, + "learning_rate": 3.09888952874049e-08, + "loss": 0.7968, "step": 34431 }, { - "epoch": 0.9770715096481272, + "epoch": 0.9757148119810706, "grad_norm": 0.0, - "learning_rate": 2.7561188169633602e-08, - "loss": 0.7994, + "learning_rate": 3.091673941798545e-08, + "loss": 0.8025, "step": 34432 }, { - "epoch": 0.977099886492622, + "epoch": 0.9757431494233331, "grad_norm": 0.0, - "learning_rate": 2.7493040880461896e-08, - "loss": 0.8407, + "learning_rate": 3.084466752185544e-08, + "loss": 0.6988, "step": 34433 }, { - "epoch": 0.9771282633371169, + "epoch": 0.9757714868655956, "grad_norm": 0.0, - "learning_rate": 2.7424977829504373e-08, - "loss": 0.8142, + "learning_rate": 3.0772679599623266e-08, + "loss": 0.7149, "step": 34434 }, { - "epoch": 0.9771566401816119, + "epoch": 0.9757998243078579, "grad_norm": 0.0, - "learning_rate": 2.735699901733613e-08, - "loss": 0.8216, + "learning_rate": 3.0700775651894e-08, + "loss": 0.7885, "step": 34435 }, { - "epoch": 0.9771850170261067, + "epoch": 0.9758281617501204, "grad_norm": 0.0, - "learning_rate": 2.7289104444532255e-08, - "loss": 0.776, + "learning_rate": 3.062895567927382e-08, + "loss": 0.8043, "step": 34436 }, { - "epoch": 0.9772133938706016, + "epoch": 0.9758564991923829, "grad_norm": 0.0, - "learning_rate": 2.722129411166563e-08, - "loss": 0.7679, + "learning_rate": 3.055721968236891e-08, + "loss": 0.8584, "step": 34437 }, { - "epoch": 0.9772417707150964, + "epoch": 0.9758848366346453, "grad_norm": 0.0, - "learning_rate": 2.715356801930913e-08, - "loss": 0.7772, + "learning_rate": 3.048556766178212e-08, + "loss": 0.7718, "step": 34438 }, { - "epoch": 0.9772701475595914, + "epoch": 0.9759131740769078, "grad_norm": 0.0, - "learning_rate": 2.7085926168034517e-08, - "loss": 0.7564, + "learning_rate": 3.0413999618117416e-08, + "loss": 0.8098, "step": 34439 }, { - "epoch": 0.9772985244040863, + "epoch": 0.9759415115191703, "grad_norm": 0.0, - "learning_rate": 2.7018368558413553e-08, - "loss": 0.8296, + "learning_rate": 3.034251555197876e-08, + "loss": 0.713, "step": 34440 }, { - "epoch": 0.9773269012485811, + "epoch": 0.9759698489614328, "grad_norm": 0.0, - "learning_rate": 2.6950895191016902e-08, - "loss": 0.7985, + "learning_rate": 3.027111546396677e-08, + "loss": 0.7868, "step": 34441 }, { - "epoch": 0.9773552780930761, + "epoch": 0.9759981864036952, "grad_norm": 0.0, - "learning_rate": 2.68835060664141e-08, - "loss": 0.7898, + "learning_rate": 3.01997993546832e-08, + "loss": 0.7911, "step": 34442 }, { - "epoch": 0.977383654937571, + "epoch": 0.9760265238459577, "grad_norm": 0.0, - "learning_rate": 2.6816201185174695e-08, - "loss": 0.7565, + "learning_rate": 3.0128567224728676e-08, + "loss": 0.7195, "step": 34443 }, { - "epoch": 0.9774120317820658, + "epoch": 0.9760548612882202, "grad_norm": 0.0, - "learning_rate": 2.674898054786712e-08, - "loss": 0.7808, + "learning_rate": 3.005741907470272e-08, + "loss": 0.7971, "step": 34444 }, { - "epoch": 0.9774404086265607, + "epoch": 0.9760831987304825, "grad_norm": 0.0, - "learning_rate": 2.6681844155058702e-08, - "loss": 0.914, + "learning_rate": 2.998635490520707e-08, + "loss": 0.7258, "step": 34445 }, { - "epoch": 0.9774687854710556, + "epoch": 0.976111536172745, "grad_norm": 0.0, - "learning_rate": 2.661479200731787e-08, - "loss": 0.7456, + "learning_rate": 2.991537471683681e-08, + "loss": 0.7904, "step": 34446 }, { - "epoch": 0.9774971623155505, + "epoch": 0.9761398736150075, "grad_norm": 0.0, - "learning_rate": 2.654782410521084e-08, - "loss": 0.859, + "learning_rate": 2.984447851019367e-08, + "loss": 0.7723, "step": 34447 }, { - "epoch": 0.9775255391600454, + "epoch": 0.9761682110572699, "grad_norm": 0.0, - "learning_rate": 2.6480940449301607e-08, - "loss": 0.7823, + "learning_rate": 2.977366628587164e-08, + "loss": 0.7774, "step": 34448 }, { - "epoch": 0.9775539160045403, + "epoch": 0.9761965484995324, "grad_norm": 0.0, - "learning_rate": 2.6414141040156382e-08, - "loss": 0.8461, + "learning_rate": 2.9702938044468e-08, + "loss": 0.8126, "step": 34449 }, { - "epoch": 0.9775822928490352, + "epoch": 0.9762248859417949, "grad_norm": 0.0, - "learning_rate": 2.634742587833916e-08, - "loss": 0.8134, + "learning_rate": 2.9632293786578946e-08, + "loss": 0.8324, "step": 34450 }, { - "epoch": 0.97761066969353, + "epoch": 0.9762532233840574, "grad_norm": 0.0, - "learning_rate": 2.6280794964412827e-08, - "loss": 0.8408, + "learning_rate": 2.9561733512800673e-08, + "loss": 0.8884, "step": 34451 }, { - "epoch": 0.977639046538025, + "epoch": 0.9762815608263198, "grad_norm": 0.0, - "learning_rate": 2.621424829894248e-08, - "loss": 0.7287, + "learning_rate": 2.949125722372492e-08, + "loss": 0.8991, "step": 34452 }, { - "epoch": 0.9776674233825199, + "epoch": 0.9763098982685823, "grad_norm": 0.0, - "learning_rate": 2.6147785882487676e-08, - "loss": 0.8332, + "learning_rate": 2.9420864919947888e-08, + "loss": 0.7005, "step": 34453 }, { - "epoch": 0.9776958002270147, + "epoch": 0.9763382357108448, "grad_norm": 0.0, - "learning_rate": 2.6081407715611297e-08, - "loss": 0.8653, + "learning_rate": 2.935055660206021e-08, + "loss": 0.7507, "step": 34454 }, { - "epoch": 0.9777241770715096, + "epoch": 0.9763665731531072, "grad_norm": 0.0, - "learning_rate": 2.6015113798874004e-08, - "loss": 0.7367, + "learning_rate": 2.9280332270656962e-08, + "loss": 0.8076, "step": 34455 }, { - "epoch": 0.9777525539160046, + "epoch": 0.9763949105953696, "grad_norm": 0.0, - "learning_rate": 2.5948904132835352e-08, - "loss": 0.8765, + "learning_rate": 2.921019192632657e-08, + "loss": 0.6902, "step": 34456 }, { - "epoch": 0.9777809307604994, + "epoch": 0.9764232480376321, "grad_norm": 0.0, - "learning_rate": 2.588277871805378e-08, - "loss": 0.9384, + "learning_rate": 2.9140135569661886e-08, + "loss": 0.8526, "step": 34457 }, { - "epoch": 0.9778093076049943, + "epoch": 0.9764515854798946, "grad_norm": 0.0, - "learning_rate": 2.5816737555089954e-08, - "loss": 0.795, + "learning_rate": 2.9070163201252445e-08, + "loss": 0.7374, "step": 34458 }, { - "epoch": 0.9778376844494893, + "epoch": 0.976479922922157, "grad_norm": 0.0, - "learning_rate": 2.5750780644500096e-08, - "loss": 0.7636, + "learning_rate": 2.900027482168777e-08, + "loss": 0.8354, "step": 34459 }, { - "epoch": 0.9778660612939841, + "epoch": 0.9765082603644195, "grad_norm": 0.0, - "learning_rate": 2.5684907986841534e-08, - "loss": 0.8089, + "learning_rate": 2.8930470431556278e-08, + "loss": 0.8313, "step": 34460 }, { - "epoch": 0.977894438138479, + "epoch": 0.976536597806682, "grad_norm": 0.0, - "learning_rate": 2.5619119582670494e-08, - "loss": 0.8619, + "learning_rate": 2.8860750031446395e-08, + "loss": 0.7939, "step": 34461 }, { - "epoch": 0.9779228149829738, + "epoch": 0.9765649352489444, "grad_norm": 0.0, - "learning_rate": 2.5553415432544303e-08, - "loss": 0.7141, + "learning_rate": 2.879111362194431e-08, + "loss": 0.752, "step": 34462 }, { - "epoch": 0.9779511918274688, + "epoch": 0.9765932726912069, "grad_norm": 0.0, - "learning_rate": 2.5487795537015858e-08, - "loss": 0.8015, + "learning_rate": 2.8721561203637338e-08, + "loss": 0.7466, "step": 34463 }, { - "epoch": 0.9779795686719637, + "epoch": 0.9766216101334694, "grad_norm": 0.0, - "learning_rate": 2.5422259896640266e-08, - "loss": 0.7659, + "learning_rate": 2.865209277711167e-08, + "loss": 0.8294, "step": 34464 }, { - "epoch": 0.9780079455164585, + "epoch": 0.9766499475757319, "grad_norm": 0.0, - "learning_rate": 2.535680851197153e-08, - "loss": 0.8988, + "learning_rate": 2.8582708342952402e-08, + "loss": 0.7314, "step": 34465 }, { - "epoch": 0.9780363223609535, + "epoch": 0.9766782850179943, "grad_norm": 0.0, - "learning_rate": 2.529144138356254e-08, - "loss": 0.8627, + "learning_rate": 2.8513407901744618e-08, + "loss": 0.8699, "step": 34466 }, { - "epoch": 0.9780646992054484, + "epoch": 0.9767066224602567, "grad_norm": 0.0, - "learning_rate": 2.522615851196508e-08, - "loss": 0.7314, + "learning_rate": 2.8444191454070068e-08, + "loss": 0.9146, "step": 34467 }, { - "epoch": 0.9780930760499432, + "epoch": 0.9767349599025192, "grad_norm": 0.0, - "learning_rate": 2.516095989772982e-08, - "loss": 0.7846, + "learning_rate": 2.837505900051274e-08, + "loss": 0.807, "step": 34468 }, { - "epoch": 0.9781214528944382, + "epoch": 0.9767632973447816, "grad_norm": 0.0, - "learning_rate": 2.5095845541409648e-08, - "loss": 0.7404, + "learning_rate": 2.8306010541655493e-08, + "loss": 0.7359, "step": 34469 }, { - "epoch": 0.978149829738933, + "epoch": 0.9767916347870441, "grad_norm": 0.0, - "learning_rate": 2.5030815443551905e-08, - "loss": 0.8213, + "learning_rate": 2.8237046078080087e-08, + "loss": 0.8734, "step": 34470 }, { - "epoch": 0.9781782065834279, + "epoch": 0.9768199722293066, "grad_norm": 0.0, - "learning_rate": 2.496586960470837e-08, - "loss": 0.8307, + "learning_rate": 2.816816561036717e-08, + "loss": 0.7559, "step": 34471 }, { - "epoch": 0.9782065834279228, + "epoch": 0.976848309671569, "grad_norm": 0.0, - "learning_rate": 2.490100802542639e-08, - "loss": 0.8579, + "learning_rate": 2.8099369139096277e-08, + "loss": 0.8329, "step": 34472 }, { - "epoch": 0.9782349602724177, + "epoch": 0.9768766471138315, "grad_norm": 0.0, - "learning_rate": 2.4836230706253294e-08, - "loss": 0.7389, + "learning_rate": 2.8030656664846944e-08, + "loss": 0.7521, "step": 34473 }, { - "epoch": 0.9782633371169126, + "epoch": 0.976904984556094, "grad_norm": 0.0, - "learning_rate": 2.4771537647737543e-08, - "loss": 0.8865, + "learning_rate": 2.796202818819871e-08, + "loss": 0.7337, "step": 34474 }, { - "epoch": 0.9782917139614075, + "epoch": 0.9769333219983565, "grad_norm": 0.0, - "learning_rate": 2.470692885042536e-08, - "loss": 0.7532, + "learning_rate": 2.7893483709728885e-08, + "loss": 0.7912, "step": 34475 }, { - "epoch": 0.9783200908059024, + "epoch": 0.9769616594406189, "grad_norm": 0.0, - "learning_rate": 2.4642404314861866e-08, - "loss": 0.7963, + "learning_rate": 2.7825023230015903e-08, + "loss": 0.7359, "step": 34476 }, { - "epoch": 0.9783484676503973, + "epoch": 0.9769899968828814, "grad_norm": 0.0, - "learning_rate": 2.4577964041592185e-08, - "loss": 0.8846, + "learning_rate": 2.7756646749635962e-08, + "loss": 0.8465, "step": 34477 }, { - "epoch": 0.9783768444948922, + "epoch": 0.9770183343251438, "grad_norm": 0.0, - "learning_rate": 2.4513608031160318e-08, - "loss": 0.7597, + "learning_rate": 2.7688354269164164e-08, + "loss": 0.7693, "step": 34478 }, { - "epoch": 0.978405221339387, + "epoch": 0.9770466717674062, "grad_norm": 0.0, - "learning_rate": 2.444933628411139e-08, - "loss": 0.874, + "learning_rate": 2.7620145789177823e-08, + "loss": 0.8704, "step": 34479 }, { - "epoch": 0.978433598183882, + "epoch": 0.9770750092096687, "grad_norm": 0.0, - "learning_rate": 2.438514880098719e-08, + "learning_rate": 2.75520213102487e-08, "loss": 0.7699, "step": 34480 }, { - "epoch": 0.9784619750283768, + "epoch": 0.9771033466519312, "grad_norm": 0.0, - "learning_rate": 2.4321045582329504e-08, - "loss": 0.7774, + "learning_rate": 2.7483980832953006e-08, + "loss": 0.826, "step": 34481 }, { - "epoch": 0.9784903518728717, + "epoch": 0.9771316840941937, "grad_norm": 0.0, - "learning_rate": 2.4257026628681236e-08, - "loss": 0.7621, + "learning_rate": 2.7416024357862503e-08, + "loss": 0.7904, "step": 34482 }, { - "epoch": 0.9785187287173667, + "epoch": 0.9771600215364561, "grad_norm": 0.0, - "learning_rate": 2.419309194058195e-08, - "loss": 0.7491, + "learning_rate": 2.7348151885550066e-08, + "loss": 0.8495, "step": 34483 }, { - "epoch": 0.9785471055618615, + "epoch": 0.9771883589787186, "grad_norm": 0.0, - "learning_rate": 2.412924151857121e-08, - "loss": 0.765, + "learning_rate": 2.7280363416587463e-08, + "loss": 0.7062, "step": 34484 }, { - "epoch": 0.9785754824063564, + "epoch": 0.9772166964209811, "grad_norm": 0.0, - "learning_rate": 2.4065475363189705e-08, - "loss": 0.762, + "learning_rate": 2.7212658951546457e-08, + "loss": 0.7952, "step": 34485 }, { - "epoch": 0.9786038592508514, + "epoch": 0.9772450338632435, "grad_norm": 0.0, - "learning_rate": 2.4001793474976996e-08, - "loss": 0.7161, + "learning_rate": 2.714503849099548e-08, + "loss": 0.7263, "step": 34486 }, { - "epoch": 0.9786322360953462, + "epoch": 0.977273371305506, "grad_norm": 0.0, - "learning_rate": 2.3938195854468217e-08, - "loss": 0.7881, + "learning_rate": 2.707750203550519e-08, + "loss": 0.8005, "step": 34487 }, { - "epoch": 0.9786606129398411, + "epoch": 0.9773017087477684, "grad_norm": 0.0, - "learning_rate": 2.387468250220182e-08, - "loss": 0.9042, + "learning_rate": 2.7010049585645125e-08, + "loss": 0.7466, "step": 34488 }, { - "epoch": 0.9786889897843359, + "epoch": 0.9773300461900309, "grad_norm": 0.0, - "learning_rate": 2.3811253418715152e-08, - "loss": 0.8894, + "learning_rate": 2.6942681141981508e-08, + "loss": 0.8287, "step": 34489 }, { - "epoch": 0.9787173666288309, + "epoch": 0.9773583836322933, "grad_norm": 0.0, - "learning_rate": 2.3747908604542237e-08, - "loss": 0.7759, + "learning_rate": 2.687539670508388e-08, + "loss": 0.7694, "step": 34490 }, { - "epoch": 0.9787457434733258, + "epoch": 0.9773867210745558, "grad_norm": 0.0, - "learning_rate": 2.3684648060220416e-08, - "loss": 0.8402, + "learning_rate": 2.680819627551845e-08, + "loss": 0.834, "step": 34491 }, { - "epoch": 0.9787741203178206, + "epoch": 0.9774150585168183, "grad_norm": 0.0, - "learning_rate": 2.36214717862826e-08, - "loss": 0.81, + "learning_rate": 2.6741079853851436e-08, + "loss": 0.7983, "step": 34492 }, { - "epoch": 0.9788024971623156, + "epoch": 0.9774433959590807, "grad_norm": 0.0, - "learning_rate": 2.35583797832617e-08, - "loss": 0.7892, + "learning_rate": 2.6674047440646834e-08, + "loss": 0.7964, "step": 34493 }, { - "epoch": 0.9788308740068105, + "epoch": 0.9774717334013432, "grad_norm": 0.0, - "learning_rate": 2.3495372051692832e-08, - "loss": 0.8129, + "learning_rate": 2.6607099036470853e-08, + "loss": 0.7761, "step": 34494 }, { - "epoch": 0.9788592508513053, + "epoch": 0.9775000708436057, "grad_norm": 0.0, - "learning_rate": 2.3432448592106694e-08, - "loss": 0.8466, + "learning_rate": 2.6540234641886375e-08, + "loss": 0.8157, "step": 34495 }, { - "epoch": 0.9788876276958002, + "epoch": 0.9775284082858681, "grad_norm": 0.0, - "learning_rate": 2.3369609405035076e-08, - "loss": 0.753, + "learning_rate": 2.64734542574574e-08, + "loss": 0.9175, "step": 34496 }, { - "epoch": 0.9789160045402951, + "epoch": 0.9775567457281306, "grad_norm": 0.0, - "learning_rate": 2.330685449100867e-08, - "loss": 0.8636, + "learning_rate": 2.6406757883745693e-08, + "loss": 0.7208, "step": 34497 }, { - "epoch": 0.97894438138479, + "epoch": 0.977585083170393, "grad_norm": 0.0, - "learning_rate": 2.3244183850558154e-08, - "loss": 0.818, + "learning_rate": 2.6340145521314143e-08, + "loss": 0.8014, "step": 34498 }, { - "epoch": 0.9789727582292849, + "epoch": 0.9776134206126555, "grad_norm": 0.0, - "learning_rate": 2.3181597484212005e-08, - "loss": 0.7952, + "learning_rate": 2.6273617170722298e-08, + "loss": 0.7296, "step": 34499 }, { - "epoch": 0.9790011350737798, + "epoch": 0.9776417580549179, "grad_norm": 0.0, - "learning_rate": 2.3119095392499792e-08, - "loss": 0.7253, + "learning_rate": 2.6207172832531935e-08, + "loss": 0.6443, "step": 34500 }, { - "epoch": 0.9790295119182747, + "epoch": 0.9776700954971804, "grad_norm": 0.0, - "learning_rate": 2.3056677575948872e-08, - "loss": 0.6704, + "learning_rate": 2.6140812507302606e-08, + "loss": 0.7393, "step": 34501 }, { - "epoch": 0.9790578887627696, + "epoch": 0.9776984329394429, "grad_norm": 0.0, - "learning_rate": 2.29943440350866e-08, - "loss": 0.7218, + "learning_rate": 2.6074536195592747e-08, + "loss": 0.7736, "step": 34502 }, { - "epoch": 0.9790862656072645, + "epoch": 0.9777267703817053, "grad_norm": 0.0, - "learning_rate": 2.293209477043923e-08, - "loss": 0.9178, + "learning_rate": 2.60083438979597e-08, + "loss": 0.7831, "step": 34503 }, { - "epoch": 0.9791146424517594, + "epoch": 0.9777551078239678, "grad_norm": 0.0, - "learning_rate": 2.2869929782534108e-08, - "loss": 0.797, + "learning_rate": 2.5942235614963006e-08, + "loss": 0.6858, "step": 34504 }, { - "epoch": 0.9791430192962542, + "epoch": 0.9777834452662303, "grad_norm": 0.0, - "learning_rate": 2.280784907189415e-08, - "loss": 0.8909, + "learning_rate": 2.5876211347158896e-08, + "loss": 0.8167, "step": 34505 }, { - "epoch": 0.9791713961407491, + "epoch": 0.9778117827084928, "grad_norm": 0.0, - "learning_rate": 2.2745852639045608e-08, - "loss": 0.7682, + "learning_rate": 2.5810271095102478e-08, + "loss": 0.7428, "step": 34506 }, { - "epoch": 0.9791997729852441, + "epoch": 0.9778401201507552, "grad_norm": 0.0, - "learning_rate": 2.268394048451028e-08, - "loss": 0.78, + "learning_rate": 2.5744414859351084e-08, + "loss": 0.7214, "step": 34507 }, { - "epoch": 0.9792281498297389, + "epoch": 0.9778684575930177, "grad_norm": 0.0, - "learning_rate": 2.2622112608813307e-08, - "loss": 0.746, + "learning_rate": 2.567864264045761e-08, + "loss": 0.8067, "step": 34508 }, { - "epoch": 0.9792565266742338, + "epoch": 0.9778967950352802, "grad_norm": 0.0, - "learning_rate": 2.256036901247538e-08, - "loss": 0.8212, + "learning_rate": 2.5612954438977155e-08, + "loss": 0.6474, "step": 34509 }, { - "epoch": 0.9792849035187288, + "epoch": 0.9779251324775425, "grad_norm": 0.0, - "learning_rate": 2.2498709696018307e-08, - "loss": 0.7631, + "learning_rate": 2.554735025546151e-08, + "loss": 0.8595, "step": 34510 }, { - "epoch": 0.9793132803632236, + "epoch": 0.977953469919805, "grad_norm": 0.0, - "learning_rate": 2.2437134659962777e-08, - "loss": 0.8621, + "learning_rate": 2.5481830090465787e-08, + "loss": 0.7577, "step": 34511 }, { - "epoch": 0.9793416572077185, + "epoch": 0.9779818073620675, "grad_norm": 0.0, - "learning_rate": 2.23756439048306e-08, - "loss": 0.7473, + "learning_rate": 2.541639394454065e-08, + "loss": 0.8628, "step": 34512 }, { - "epoch": 0.9793700340522133, + "epoch": 0.9780101448043299, "grad_norm": 0.0, - "learning_rate": 2.231423743113914e-08, - "loss": 0.8026, + "learning_rate": 2.5351041818236778e-08, + "loss": 0.8357, "step": 34513 }, { - "epoch": 0.9793984108967083, + "epoch": 0.9780384822465924, "grad_norm": 0.0, - "learning_rate": 2.225291523940798e-08, - "loss": 0.7961, + "learning_rate": 2.5285773712104833e-08, + "loss": 0.7629, "step": 34514 }, { - "epoch": 0.9794267877412032, + "epoch": 0.9780668196888549, "grad_norm": 0.0, - "learning_rate": 2.219167733015448e-08, - "loss": 0.746, + "learning_rate": 2.5220589626694382e-08, + "loss": 0.8431, "step": 34515 }, { - "epoch": 0.979455164585698, + "epoch": 0.9780951571311174, "grad_norm": 0.0, - "learning_rate": 2.2130523703897123e-08, - "loss": 0.7641, + "learning_rate": 2.515548956255609e-08, + "loss": 0.7551, "step": 34516 }, { - "epoch": 0.979483541430193, + "epoch": 0.9781234945733798, "grad_norm": 0.0, - "learning_rate": 2.2069454361151042e-08, - "loss": 0.6824, + "learning_rate": 2.509047352023619e-08, + "loss": 0.8276, "step": 34517 }, { - "epoch": 0.9795119182746879, + "epoch": 0.9781518320156423, "grad_norm": 0.0, - "learning_rate": 2.2008469302433612e-08, - "loss": 0.8673, + "learning_rate": 2.5025541500284245e-08, + "loss": 0.8085, "step": 34518 }, { - "epoch": 0.9795402951191827, + "epoch": 0.9781801694579048, "grad_norm": 0.0, - "learning_rate": 2.1947568528258854e-08, - "loss": 0.7064, + "learning_rate": 2.496069350324537e-08, + "loss": 0.7763, "step": 34519 }, { - "epoch": 0.9795686719636776, + "epoch": 0.9782085069001671, "grad_norm": 0.0, - "learning_rate": 2.188675203914192e-08, - "loss": 0.7558, + "learning_rate": 2.4895929529666908e-08, + "loss": 0.7708, "step": 34520 }, { - "epoch": 0.9795970488081726, + "epoch": 0.9782368443424296, "grad_norm": 0.0, - "learning_rate": 2.1826019835595735e-08, - "loss": 0.852, + "learning_rate": 2.4831249580095086e-08, + "loss": 0.8426, "step": 34521 }, { - "epoch": 0.9796254256526674, + "epoch": 0.9782651817846921, "grad_norm": 0.0, - "learning_rate": 2.1765371918133216e-08, - "loss": 0.7295, + "learning_rate": 2.4766653655072803e-08, + "loss": 0.731, "step": 34522 }, { - "epoch": 0.9796538024971623, + "epoch": 0.9782935192269546, "grad_norm": 0.0, - "learning_rate": 2.1704808287267286e-08, - "loss": 0.7845, + "learning_rate": 2.4702141755145182e-08, + "loss": 0.7433, "step": 34523 }, { - "epoch": 0.9796821793416572, + "epoch": 0.978321856669217, "grad_norm": 0.0, - "learning_rate": 2.1644328943509762e-08, - "loss": 0.8389, + "learning_rate": 2.463771388085623e-08, + "loss": 0.7754, "step": 34524 }, { - "epoch": 0.9797105561861521, + "epoch": 0.9783501941114795, "grad_norm": 0.0, - "learning_rate": 2.1583933887371345e-08, - "loss": 0.7166, + "learning_rate": 2.4573370032748845e-08, + "loss": 0.723, "step": 34525 }, { - "epoch": 0.979738933030647, + "epoch": 0.978378531553742, "grad_norm": 0.0, - "learning_rate": 2.1523623119361627e-08, - "loss": 0.7682, + "learning_rate": 2.4509110211362596e-08, + "loss": 0.9245, "step": 34526 }, { - "epoch": 0.9797673098751419, + "epoch": 0.9784068689960044, "grad_norm": 0.0, - "learning_rate": 2.1463396639991306e-08, - "loss": 0.7743, + "learning_rate": 2.4444934417241495e-08, + "loss": 0.868, "step": 34527 }, { - "epoch": 0.9797956867196368, + "epoch": 0.9784352064382669, "grad_norm": 0.0, - "learning_rate": 2.140325444976665e-08, - "loss": 0.8423, + "learning_rate": 2.4380842650923996e-08, + "loss": 0.8673, "step": 34528 }, { - "epoch": 0.9798240635641317, + "epoch": 0.9784635438805294, "grad_norm": 0.0, - "learning_rate": 2.1343196549198363e-08, - "loss": 0.7904, + "learning_rate": 2.4316834912951892e-08, + "loss": 0.8116, "step": 34529 }, { - "epoch": 0.9798524404086265, + "epoch": 0.9784918813227919, "grad_norm": 0.0, - "learning_rate": 2.1283222938792702e-08, - "loss": 0.8135, + "learning_rate": 2.425291120386364e-08, + "loss": 0.8797, "step": 34530 }, { - "epoch": 0.9798808172531215, + "epoch": 0.9785202187650542, "grad_norm": 0.0, - "learning_rate": 2.122333361905593e-08, - "loss": 0.7837, + "learning_rate": 2.4189071524196585e-08, + "loss": 0.7786, "step": 34531 }, { - "epoch": 0.9799091940976163, + "epoch": 0.9785485562073167, "grad_norm": 0.0, - "learning_rate": 2.116352859049431e-08, - "loss": 0.8826, + "learning_rate": 2.4125315874490295e-08, + "loss": 0.8051, "step": 34532 }, { - "epoch": 0.9799375709421112, + "epoch": 0.9785768936495792, "grad_norm": 0.0, - "learning_rate": 2.110380785361299e-08, - "loss": 0.8286, + "learning_rate": 2.4061644255281013e-08, + "loss": 0.7118, "step": 34533 }, { - "epoch": 0.9799659477866062, + "epoch": 0.9786052310918416, "grad_norm": 0.0, - "learning_rate": 2.1044171408916016e-08, - "loss": 0.6817, + "learning_rate": 2.3998056667103865e-08, + "loss": 0.8205, "step": 34534 }, { - "epoch": 0.979994324631101, + "epoch": 0.9786335685341041, "grad_norm": 0.0, - "learning_rate": 2.0984619256908534e-08, - "loss": 0.8198, + "learning_rate": 2.3934553110496194e-08, + "loss": 0.7498, "step": 34535 }, { - "epoch": 0.9800227014755959, + "epoch": 0.9786619059763666, "grad_norm": 0.0, - "learning_rate": 2.092515139809126e-08, - "loss": 0.8139, + "learning_rate": 2.3871133585993134e-08, + "loss": 0.7729, "step": 34536 }, { - "epoch": 0.9800510783200908, + "epoch": 0.978690243418629, "grad_norm": 0.0, - "learning_rate": 2.086576783296934e-08, - "loss": 0.7366, + "learning_rate": 2.3807798094127587e-08, + "loss": 0.905, "step": 34537 }, { - "epoch": 0.9800794551645857, + "epoch": 0.9787185808608915, "grad_norm": 0.0, - "learning_rate": 2.0806468562041272e-08, - "loss": 0.8233, + "learning_rate": 2.3744546635432463e-08, + "loss": 0.8416, "step": 34538 }, { - "epoch": 0.9801078320090806, + "epoch": 0.978746918303154, "grad_norm": 0.0, - "learning_rate": 2.074725358580998e-08, - "loss": 0.738, + "learning_rate": 2.368137921044289e-08, + "loss": 0.7904, "step": 34539 }, { - "epoch": 0.9801362088535754, + "epoch": 0.9787752557454165, "grad_norm": 0.0, - "learning_rate": 2.0688122904776175e-08, - "loss": 0.8508, + "learning_rate": 2.3618295819688442e-08, + "loss": 0.6505, "step": 34540 }, { - "epoch": 0.9801645856980704, + "epoch": 0.9788035931876788, "grad_norm": 0.0, - "learning_rate": 2.062907651943724e-08, - "loss": 0.8968, + "learning_rate": 2.3555296463703138e-08, + "loss": 0.8238, "step": 34541 }, { - "epoch": 0.9801929625425653, + "epoch": 0.9788319306299413, "grad_norm": 0.0, - "learning_rate": 2.0570114430293887e-08, - "loss": 0.8309, + "learning_rate": 2.349238114301544e-08, + "loss": 0.8535, "step": 34542 }, { - "epoch": 0.9802213393870601, + "epoch": 0.9788602680722038, "grad_norm": 0.0, - "learning_rate": 2.051123663784238e-08, - "loss": 0.8531, + "learning_rate": 2.3429549858156042e-08, + "loss": 0.7509, "step": 34543 }, { - "epoch": 0.9802497162315551, + "epoch": 0.9788886055144662, "grad_norm": 0.0, - "learning_rate": 2.0452443142582322e-08, - "loss": 0.7784, + "learning_rate": 2.3366802609654515e-08, + "loss": 0.9251, "step": 34544 }, { - "epoch": 0.98027809307605, + "epoch": 0.9789169429567287, "grad_norm": 0.0, - "learning_rate": 2.0393733945007764e-08, - "loss": 0.84, + "learning_rate": 2.3304139398039327e-08, + "loss": 0.781, "step": 34545 }, { - "epoch": 0.9803064699205448, + "epoch": 0.9789452803989912, "grad_norm": 0.0, - "learning_rate": 2.0335109045617196e-08, - "loss": 0.8024, + "learning_rate": 2.3241560223837833e-08, + "loss": 0.7839, "step": 34546 }, { - "epoch": 0.9803348467650397, + "epoch": 0.9789736178412537, "grad_norm": 0.0, - "learning_rate": 2.0276568444904666e-08, - "loss": 0.7389, + "learning_rate": 2.317906508757739e-08, + "loss": 0.7758, "step": 34547 }, { - "epoch": 0.9803632236095347, + "epoch": 0.9790019552835161, "grad_norm": 0.0, - "learning_rate": 2.021811214336311e-08, - "loss": 0.8672, + "learning_rate": 2.3116653989784243e-08, + "loss": 0.8166, "step": 34548 }, { - "epoch": 0.9803916004540295, + "epoch": 0.9790302927257786, "grad_norm": 0.0, - "learning_rate": 2.0159740141488803e-08, - "loss": 0.7525, + "learning_rate": 2.305432693098464e-08, + "loss": 0.8007, "step": 34549 }, { - "epoch": 0.9804199772985244, + "epoch": 0.9790586301680411, "grad_norm": 0.0, - "learning_rate": 2.010145243977357e-08, - "loss": 0.7663, + "learning_rate": 2.299208391170371e-08, + "loss": 0.7868, "step": 34550 }, { - "epoch": 0.9804483541430193, + "epoch": 0.9790869676103034, "grad_norm": 0.0, - "learning_rate": 2.0043249038710355e-08, - "loss": 0.7739, + "learning_rate": 2.2929924932465487e-08, + "loss": 0.791, "step": 34551 }, { - "epoch": 0.9804767309875142, + "epoch": 0.9791153050525659, "grad_norm": 0.0, - "learning_rate": 1.9985129938790982e-08, - "loss": 0.7767, + "learning_rate": 2.286784999379177e-08, + "loss": 0.7121, "step": 34552 }, { - "epoch": 0.9805051078320091, + "epoch": 0.9791436424948284, "grad_norm": 0.0, - "learning_rate": 1.992709514050506e-08, - "loss": 0.8186, + "learning_rate": 2.2805859096208805e-08, + "loss": 0.7204, "step": 34553 }, { - "epoch": 0.9805334846765039, + "epoch": 0.9791719799370909, "grad_norm": 0.0, - "learning_rate": 1.9869144644343307e-08, - "loss": 0.8955, + "learning_rate": 2.274395224023618e-08, + "loss": 0.6802, "step": 34554 }, { - "epoch": 0.9805618615209989, + "epoch": 0.9792003173793533, "grad_norm": 0.0, - "learning_rate": 1.9811278450795336e-08, - "loss": 0.8206, + "learning_rate": 2.2682129426395693e-08, + "loss": 0.7727, "step": 34555 }, { - "epoch": 0.9805902383654937, + "epoch": 0.9792286548216158, "grad_norm": 0.0, - "learning_rate": 1.975349656035075e-08, - "loss": 0.7473, + "learning_rate": 2.2620390655210268e-08, + "loss": 0.7546, "step": 34556 }, { - "epoch": 0.9806186152099886, + "epoch": 0.9792569922638783, "grad_norm": 0.0, - "learning_rate": 1.9695798973496937e-08, - "loss": 0.7578, + "learning_rate": 2.2558735927197262e-08, + "loss": 0.9139, "step": 34557 }, { - "epoch": 0.9806469920544836, + "epoch": 0.9792853297061407, "grad_norm": 0.0, - "learning_rate": 1.9638185690721288e-08, - "loss": 0.7205, + "learning_rate": 2.2497165242877373e-08, + "loss": 0.9091, "step": 34558 }, { - "epoch": 0.9806753688989784, + "epoch": 0.9793136671484032, "grad_norm": 0.0, - "learning_rate": 1.958065671251008e-08, - "loss": 0.7603, + "learning_rate": 2.243567860276796e-08, + "loss": 0.7752, "step": 34559 }, { - "epoch": 0.9807037457434733, + "epoch": 0.9793420045906657, "grad_norm": 0.0, - "learning_rate": 1.9523212039350703e-08, - "loss": 0.8461, + "learning_rate": 2.2374276007388616e-08, + "loss": 0.7293, "step": 34560 }, { - "epoch": 0.9807321225879683, + "epoch": 0.979370342032928, "grad_norm": 0.0, - "learning_rate": 1.946585167172721e-08, - "loss": 0.7455, + "learning_rate": 2.23129574572567e-08, + "loss": 0.6709, "step": 34561 }, { - "epoch": 0.9807604994324631, + "epoch": 0.9793986794751905, "grad_norm": 0.0, - "learning_rate": 1.9408575610123657e-08, - "loss": 0.8473, + "learning_rate": 2.2251722952886245e-08, + "loss": 0.7892, "step": 34562 }, { - "epoch": 0.980788876276958, + "epoch": 0.979427016917453, "grad_norm": 0.0, - "learning_rate": 1.9351383855025218e-08, - "loss": 0.7291, + "learning_rate": 2.2190572494795725e-08, + "loss": 0.8568, "step": 34563 }, { - "epoch": 0.9808172531214528, + "epoch": 0.9794553543597155, "grad_norm": 0.0, - "learning_rate": 1.9294276406913727e-08, - "loss": 0.6735, + "learning_rate": 2.2129506083499176e-08, + "loss": 0.8168, "step": 34564 }, { - "epoch": 0.9808456299659478, + "epoch": 0.9794836918019779, "grad_norm": 0.0, - "learning_rate": 1.9237253266272126e-08, - "loss": 0.7908, + "learning_rate": 2.2068523719510626e-08, + "loss": 0.8567, "step": 34565 }, { - "epoch": 0.9808740068104427, + "epoch": 0.9795120292442404, "grad_norm": 0.0, - "learning_rate": 1.918031443358337e-08, - "loss": 0.8256, + "learning_rate": 2.2007625403344113e-08, + "loss": 0.8245, "step": 34566 }, { - "epoch": 0.9809023836549375, + "epoch": 0.9795403666865029, "grad_norm": 0.0, - "learning_rate": 1.912345990932596e-08, - "loss": 0.6852, + "learning_rate": 2.1946811135512557e-08, + "loss": 0.8277, "step": 34567 }, { - "epoch": 0.9809307604994325, + "epoch": 0.9795687041287653, "grad_norm": 0.0, - "learning_rate": 1.906668969398173e-08, - "loss": 0.7696, + "learning_rate": 2.1886080916528884e-08, + "loss": 0.7643, "step": 34568 }, { - "epoch": 0.9809591373439274, + "epoch": 0.9795970415710278, "grad_norm": 0.0, - "learning_rate": 1.9010003788029195e-08, - "loss": 0.8256, + "learning_rate": 2.1825434746903794e-08, + "loss": 0.834, "step": 34569 }, { - "epoch": 0.9809875141884222, + "epoch": 0.9796253790132903, "grad_norm": 0.0, - "learning_rate": 1.8953402191947966e-08, - "loss": 0.8258, + "learning_rate": 2.1764872627147994e-08, + "loss": 0.8083, "step": 34570 }, { - "epoch": 0.9810158910329171, + "epoch": 0.9796537164555528, "grad_norm": 0.0, - "learning_rate": 1.8896884906216552e-08, - "loss": 0.7259, + "learning_rate": 2.170439455777218e-08, + "loss": 0.9476, "step": 34571 }, { - "epoch": 0.9810442678774121, + "epoch": 0.9796820538978152, "grad_norm": 0.0, - "learning_rate": 1.8840451931312342e-08, - "loss": 0.8531, + "learning_rate": 2.1644000539285948e-08, + "loss": 0.7886, "step": 34572 }, { - "epoch": 0.9810726447219069, + "epoch": 0.9797103913400776, "grad_norm": 0.0, - "learning_rate": 1.8784103267710518e-08, - "loss": 0.7389, + "learning_rate": 2.158369057219667e-08, + "loss": 0.8318, "step": 34573 }, { - "epoch": 0.9811010215664018, + "epoch": 0.9797387287823401, "grad_norm": 0.0, - "learning_rate": 1.8727838915888476e-08, - "loss": 0.7757, + "learning_rate": 2.1523464657013936e-08, + "loss": 0.8086, "step": 34574 }, { - "epoch": 0.9811293984108967, + "epoch": 0.9797670662246025, "grad_norm": 0.0, - "learning_rate": 1.8671658876321385e-08, - "loss": 0.6526, + "learning_rate": 2.146332279424512e-08, + "loss": 0.7268, "step": 34575 }, { - "epoch": 0.9811577752553916, + "epoch": 0.979795403666865, "grad_norm": 0.0, - "learning_rate": 1.8615563149482206e-08, - "loss": 0.7947, + "learning_rate": 2.1403264984395378e-08, + "loss": 0.7024, "step": 34576 }, { - "epoch": 0.9811861520998865, + "epoch": 0.9798237411091275, "grad_norm": 0.0, - "learning_rate": 1.8559551735847225e-08, - "loss": 0.8321, + "learning_rate": 2.1343291227972073e-08, + "loss": 0.8924, "step": 34577 }, { - "epoch": 0.9812145289443814, + "epoch": 0.97985207855139, "grad_norm": 0.0, - "learning_rate": 1.8503624635888284e-08, - "loss": 0.728, + "learning_rate": 2.1283401525478142e-08, + "loss": 0.8157, "step": 34578 }, { - "epoch": 0.9812429057888763, + "epoch": 0.9798804159936524, "grad_norm": 0.0, - "learning_rate": 1.8447781850077227e-08, - "loss": 0.7597, + "learning_rate": 2.1223595877420954e-08, + "loss": 0.7717, "step": 34579 }, { - "epoch": 0.9812712826333712, + "epoch": 0.9799087534359149, "grad_norm": 0.0, - "learning_rate": 1.8392023378888125e-08, - "loss": 0.7071, + "learning_rate": 2.1163874284302334e-08, + "loss": 0.8386, "step": 34580 }, { - "epoch": 0.981299659477866, + "epoch": 0.9799370908781774, "grad_norm": 0.0, - "learning_rate": 1.8336349222788374e-08, - "loss": 0.6917, + "learning_rate": 2.1104236746626318e-08, + "loss": 0.8737, "step": 34581 }, { - "epoch": 0.981328036322361, + "epoch": 0.9799654283204398, "grad_norm": 0.0, - "learning_rate": 1.8280759382250934e-08, - "loss": 0.7034, + "learning_rate": 2.104468326489584e-08, + "loss": 0.7787, "step": 34582 }, { - "epoch": 0.9813564131668558, + "epoch": 0.9799937657627023, "grad_norm": 0.0, - "learning_rate": 1.822525385774543e-08, - "loss": 0.7406, + "learning_rate": 2.0985213839610498e-08, + "loss": 0.8272, "step": 34583 }, { - "epoch": 0.9813847900113507, + "epoch": 0.9800221032049647, "grad_norm": 0.0, - "learning_rate": 1.816983264973926e-08, - "loss": 0.7301, + "learning_rate": 2.0925828471272115e-08, + "loss": 0.8903, "step": 34584 }, { - "epoch": 0.9814131668558457, + "epoch": 0.9800504406472271, "grad_norm": 0.0, - "learning_rate": 1.8114495758700945e-08, - "loss": 0.7875, + "learning_rate": 2.08665271603814e-08, + "loss": 0.7614, "step": 34585 }, { - "epoch": 0.9814415437003405, + "epoch": 0.9800787780894896, "grad_norm": 0.0, - "learning_rate": 1.8059243185097886e-08, - "loss": 0.8367, + "learning_rate": 2.0807309907437955e-08, + "loss": 0.85, "step": 34586 }, { - "epoch": 0.9814699205448354, + "epoch": 0.9801071155317521, "grad_norm": 0.0, - "learning_rate": 1.8004074929397485e-08, - "loss": 0.7708, + "learning_rate": 2.074817671294027e-08, + "loss": 0.76, "step": 34587 }, { - "epoch": 0.9814982973893303, + "epoch": 0.9801354529740146, "grad_norm": 0.0, - "learning_rate": 1.794899099206604e-08, - "loss": 0.8482, + "learning_rate": 2.0689127577385724e-08, + "loss": 0.8761, "step": 34588 }, { - "epoch": 0.9815266742338252, + "epoch": 0.980163790416277, "grad_norm": 0.0, - "learning_rate": 1.789399137356762e-08, - "loss": 0.7174, + "learning_rate": 2.063016250127281e-08, + "loss": 0.7144, "step": 34589 }, { - "epoch": 0.9815550510783201, + "epoch": 0.9801921278585395, "grad_norm": 0.0, - "learning_rate": 1.783907607436741e-08, - "loss": 0.7806, + "learning_rate": 2.0571281485097793e-08, + "loss": 0.8458, "step": 34590 }, { - "epoch": 0.9815834279228149, + "epoch": 0.980220465300802, "grad_norm": 0.0, - "learning_rate": 1.7784245094929488e-08, - "loss": 0.8259, + "learning_rate": 2.0512484529356947e-08, + "loss": 0.714, "step": 34591 }, { - "epoch": 0.9816118047673099, + "epoch": 0.9802488027430644, "grad_norm": 0.0, - "learning_rate": 1.7729498435716808e-08, - "loss": 0.7845, + "learning_rate": 2.045377163454432e-08, + "loss": 0.8224, "step": 34592 }, { - "epoch": 0.9816401816118048, + "epoch": 0.9802771401853269, "grad_norm": 0.0, - "learning_rate": 1.767483609719123e-08, - "loss": 0.7695, + "learning_rate": 2.0395142801156174e-08, + "loss": 0.8414, "step": 34593 }, { - "epoch": 0.9816685584562996, + "epoch": 0.9803054776275894, "grad_norm": 0.0, - "learning_rate": 1.762025807981571e-08, - "loss": 0.9069, + "learning_rate": 2.0336598029684352e-08, + "loss": 0.8119, "step": 34594 }, { - "epoch": 0.9816969353007946, + "epoch": 0.9803338150698518, "grad_norm": 0.0, - "learning_rate": 1.7565764384049887e-08, - "loss": 0.7681, + "learning_rate": 2.0278137320625114e-08, + "loss": 0.7822, "step": 34595 }, { - "epoch": 0.9817253121452895, + "epoch": 0.9803621525121142, "grad_norm": 0.0, - "learning_rate": 1.7511355010355614e-08, - "loss": 0.8414, + "learning_rate": 2.021976067446807e-08, + "loss": 0.7535, "step": 34596 }, { - "epoch": 0.9817536889897843, + "epoch": 0.9803904899543767, "grad_norm": 0.0, - "learning_rate": 1.7457029959191407e-08, - "loss": 0.8834, + "learning_rate": 2.016146809170505e-08, + "loss": 0.7112, "step": 34597 }, { - "epoch": 0.9817820658342792, + "epoch": 0.9804188273966392, "grad_norm": 0.0, - "learning_rate": 1.740278923101579e-08, - "loss": 0.8295, + "learning_rate": 2.010325957282899e-08, + "loss": 0.7472, "step": 34598 }, { - "epoch": 0.9818104426787742, + "epoch": 0.9804471648389016, "grad_norm": 0.0, - "learning_rate": 1.7348632826288403e-08, - "loss": 0.8392, + "learning_rate": 2.0045135118328397e-08, + "loss": 0.7711, "step": 34599 }, { - "epoch": 0.981838819523269, + "epoch": 0.9804755022811641, "grad_norm": 0.0, - "learning_rate": 1.729456074546554e-08, - "loss": 0.8966, + "learning_rate": 1.9987094728695088e-08, + "loss": 0.7882, "step": 34600 }, { - "epoch": 0.9818671963677639, + "epoch": 0.9805038397234266, "grad_norm": 0.0, - "learning_rate": 1.7240572989003502e-08, - "loss": 0.7687, + "learning_rate": 1.9929138404415348e-08, + "loss": 0.8011, "step": 34601 }, { - "epoch": 0.9818955732122588, + "epoch": 0.9805321771656891, "grad_norm": 0.0, - "learning_rate": 1.7186669557360812e-08, - "loss": 0.8363, + "learning_rate": 1.9871266145977673e-08, + "loss": 0.8434, "step": 34602 }, { - "epoch": 0.9819239500567537, + "epoch": 0.9805605146079515, "grad_norm": 0.0, - "learning_rate": 1.7132850450989336e-08, - "loss": 0.7934, + "learning_rate": 1.9813477953871675e-08, + "loss": 0.8256, "step": 34603 }, { - "epoch": 0.9819523269012486, + "epoch": 0.980588852050214, "grad_norm": 0.0, - "learning_rate": 1.707911567034648e-08, - "loss": 0.7268, + "learning_rate": 1.9755773828582514e-08, + "loss": 0.7859, "step": 34604 }, { - "epoch": 0.9819807037457434, + "epoch": 0.9806171894924764, "grad_norm": 0.0, - "learning_rate": 1.7025465215885217e-08, - "loss": 0.8109, + "learning_rate": 1.9698153770596474e-08, + "loss": 0.7698, "step": 34605 }, { - "epoch": 0.9820090805902384, + "epoch": 0.9806455269347388, "grad_norm": 0.0, - "learning_rate": 1.697189908805741e-08, - "loss": 0.805, + "learning_rate": 1.964061778039872e-08, + "loss": 0.8783, "step": 34606 }, { - "epoch": 0.9820374574347333, + "epoch": 0.9806738643770013, "grad_norm": 0.0, - "learning_rate": 1.6918417287318245e-08, - "loss": 0.8225, + "learning_rate": 1.9583165858474417e-08, + "loss": 0.7085, "step": 34607 }, { - "epoch": 0.9820658342792281, + "epoch": 0.9807022018192638, "grad_norm": 0.0, - "learning_rate": 1.6865019814117368e-08, - "loss": 0.7698, + "learning_rate": 1.9525798005307628e-08, + "loss": 0.7691, "step": 34608 }, { - "epoch": 0.9820942111237231, + "epoch": 0.9807305392615262, "grad_norm": 0.0, - "learning_rate": 1.6811706668905526e-08, - "loss": 0.6847, + "learning_rate": 1.9468514221380185e-08, + "loss": 0.807, "step": 34609 }, { - "epoch": 0.9821225879682179, + "epoch": 0.9807588767037887, "grad_norm": 0.0, - "learning_rate": 1.6758477852135692e-08, - "loss": 0.7203, + "learning_rate": 1.941131450717615e-08, + "loss": 0.7768, "step": 34610 }, { - "epoch": 0.9821509648127128, + "epoch": 0.9807872141460512, "grad_norm": 0.0, - "learning_rate": 1.6705333364254174e-08, - "loss": 0.7677, + "learning_rate": 1.9354198863177355e-08, + "loss": 0.8078, "step": 34611 }, { - "epoch": 0.9821793416572078, + "epoch": 0.9808155515883137, "grad_norm": 0.0, - "learning_rate": 1.6652273205712834e-08, - "loss": 0.8156, + "learning_rate": 1.9297167289863417e-08, + "loss": 0.7905, "step": 34612 }, { - "epoch": 0.9822077185017026, + "epoch": 0.9808438890305761, "grad_norm": 0.0, - "learning_rate": 1.6599297376957978e-08, - "loss": 0.8329, + "learning_rate": 1.9240219787716175e-08, + "loss": 0.7923, "step": 34613 }, { - "epoch": 0.9822360953461975, + "epoch": 0.9808722264728386, "grad_norm": 0.0, - "learning_rate": 1.6546405878437033e-08, - "loss": 0.7645, + "learning_rate": 1.9183356357215242e-08, + "loss": 0.8992, "step": 34614 }, { - "epoch": 0.9822644721906924, + "epoch": 0.980900563915101, "grad_norm": 0.0, - "learning_rate": 1.6493598710598524e-08, - "loss": 0.7917, + "learning_rate": 1.912657699883802e-08, + "loss": 0.8857, "step": 34615 }, { - "epoch": 0.9822928490351873, + "epoch": 0.9809289013573634, "grad_norm": 0.0, - "learning_rate": 1.644087587388654e-08, - "loss": 0.7962, + "learning_rate": 1.906988171306523e-08, + "loss": 0.7934, "step": 34616 }, { - "epoch": 0.9823212258796822, + "epoch": 0.9809572387996259, "grad_norm": 0.0, - "learning_rate": 1.6388237368747396e-08, - "loss": 0.8282, + "learning_rate": 1.901327050037205e-08, + "loss": 0.8287, "step": 34617 }, { - "epoch": 0.982349602724177, + "epoch": 0.9809855762418884, "grad_norm": 0.0, - "learning_rate": 1.6335683195626286e-08, - "loss": 0.8111, + "learning_rate": 1.8956743361236983e-08, + "loss": 0.8806, "step": 34618 }, { - "epoch": 0.982377979568672, + "epoch": 0.9810139136841509, "grad_norm": 0.0, - "learning_rate": 1.6283213354965077e-08, - "loss": 0.7232, + "learning_rate": 1.890030029613521e-08, + "loss": 0.868, "step": 34619 }, { - "epoch": 0.9824063564131669, + "epoch": 0.9810422511264133, "grad_norm": 0.0, - "learning_rate": 1.6230827847208974e-08, - "loss": 0.834, + "learning_rate": 1.8843941305543013e-08, + "loss": 0.751, "step": 34620 }, { - "epoch": 0.9824347332576617, + "epoch": 0.9810705885686758, "grad_norm": 0.0, - "learning_rate": 1.6178526672799845e-08, - "loss": 0.7854, + "learning_rate": 1.8787666389935567e-08, + "loss": 0.8264, "step": 34621 }, { - "epoch": 0.9824631101021566, + "epoch": 0.9810989260109383, "grad_norm": 0.0, - "learning_rate": 1.6126309832180665e-08, - "loss": 0.8597, + "learning_rate": 1.8731475549784717e-08, + "loss": 0.757, "step": 34622 }, { - "epoch": 0.9824914869466516, + "epoch": 0.9811272634532007, "grad_norm": 0.0, - "learning_rate": 1.6074177325789974e-08, - "loss": 0.8446, + "learning_rate": 1.867536878556564e-08, + "loss": 0.7783, "step": 34623 }, { - "epoch": 0.9825198637911464, + "epoch": 0.9811556008954632, "grad_norm": 0.0, - "learning_rate": 1.6022129154069643e-08, - "loss": 0.8297, + "learning_rate": 1.861934609775018e-08, + "loss": 0.6906, "step": 34624 }, { - "epoch": 0.9825482406356413, + "epoch": 0.9811839383377257, "grad_norm": 0.0, - "learning_rate": 1.5970165317460428e-08, - "loss": 0.7689, + "learning_rate": 1.856340748681129e-08, + "loss": 0.7012, "step": 34625 }, { - "epoch": 0.9825766174801362, + "epoch": 0.9812122757799882, "grad_norm": 0.0, - "learning_rate": 1.591828581639865e-08, - "loss": 0.8297, + "learning_rate": 1.850755295321749e-08, + "loss": 0.8123, "step": 34626 }, { - "epoch": 0.9826049943246311, + "epoch": 0.9812406132222505, "grad_norm": 0.0, - "learning_rate": 1.5866490651323952e-08, - "loss": 0.8332, + "learning_rate": 1.8451782497442838e-08, + "loss": 0.8487, "step": 34627 }, { - "epoch": 0.982633371169126, + "epoch": 0.981268950664513, "grad_norm": 0.0, - "learning_rate": 1.5814779822674876e-08, - "loss": 0.7677, + "learning_rate": 1.8396096119954744e-08, + "loss": 0.8027, "step": 34628 }, { - "epoch": 0.9826617480136209, + "epoch": 0.9812972881067755, "grad_norm": 0.0, - "learning_rate": 1.5763153330886627e-08, - "loss": 0.8217, + "learning_rate": 1.8340493821222827e-08, + "loss": 0.7486, "step": 34629 }, { - "epoch": 0.9826901248581158, + "epoch": 0.9813256255490379, "grad_norm": 0.0, - "learning_rate": 1.5711611176395525e-08, - "loss": 0.7661, + "learning_rate": 1.8284975601715606e-08, + "loss": 0.8093, "step": 34630 }, { - "epoch": 0.9827185017026107, + "epoch": 0.9813539629913004, "grad_norm": 0.0, - "learning_rate": 1.5660153359637886e-08, - "loss": 0.754, + "learning_rate": 1.822954146190159e-08, + "loss": 0.7866, "step": 34631 }, { - "epoch": 0.9827468785471055, + "epoch": 0.9813823004335629, "grad_norm": 0.0, - "learning_rate": 1.560877988104781e-08, - "loss": 0.7832, + "learning_rate": 1.8174191402244855e-08, + "loss": 0.8998, "step": 34632 }, { - "epoch": 0.9827752553916005, + "epoch": 0.9814106378758253, "grad_norm": 0.0, - "learning_rate": 1.555749074105828e-08, - "loss": 0.9121, + "learning_rate": 1.8118925423215027e-08, + "loss": 0.7508, "step": 34633 }, { - "epoch": 0.9828036322360953, + "epoch": 0.9814389753180878, "grad_norm": 0.0, - "learning_rate": 1.5506285940103394e-08, - "loss": 0.784, + "learning_rate": 1.806374352527618e-08, + "loss": 0.8499, "step": 34634 }, { - "epoch": 0.9828320090805902, + "epoch": 0.9814673127603503, "grad_norm": 0.0, - "learning_rate": 1.545516547861614e-08, - "loss": 0.7716, + "learning_rate": 1.800864570889238e-08, + "loss": 0.765, "step": 34635 }, { - "epoch": 0.9828603859250852, + "epoch": 0.9814956502026128, "grad_norm": 0.0, - "learning_rate": 1.5404129357028396e-08, - "loss": 0.8202, + "learning_rate": 1.7953631974528818e-08, + "loss": 0.7394, "step": 34636 }, { - "epoch": 0.98288876276958, + "epoch": 0.9815239876448751, "grad_norm": 0.0, - "learning_rate": 1.535317757577093e-08, - "loss": 0.7809, + "learning_rate": 1.7898702322648453e-08, + "loss": 0.8421, "step": 34637 }, { - "epoch": 0.9829171396140749, + "epoch": 0.9815523250871376, "grad_norm": 0.0, - "learning_rate": 1.530231013527339e-08, - "loss": 0.8309, + "learning_rate": 1.784385675371425e-08, + "loss": 0.7497, "step": 34638 }, { - "epoch": 0.9829455164585698, + "epoch": 0.9815806625294001, "grad_norm": 0.0, - "learning_rate": 1.5251527035966552e-08, - "loss": 0.8307, + "learning_rate": 1.778909526818806e-08, + "loss": 0.8398, "step": 34639 }, { - "epoch": 0.9829738933030647, + "epoch": 0.9816089999716625, "grad_norm": 0.0, - "learning_rate": 1.5200828278278957e-08, - "loss": 0.876, + "learning_rate": 1.773441786653063e-08, + "loss": 0.7503, "step": 34640 }, { - "epoch": 0.9830022701475596, + "epoch": 0.981637337413925, "grad_norm": 0.0, - "learning_rate": 1.5150213862638042e-08, - "loss": 0.8437, + "learning_rate": 1.7679824549203805e-08, + "loss": 0.6827, "step": 34641 }, { - "epoch": 0.9830306469920544, + "epoch": 0.9816656748561875, "grad_norm": 0.0, - "learning_rate": 1.5099683789473463e-08, - "loss": 0.8141, + "learning_rate": 1.7625315316666115e-08, + "loss": 0.9203, "step": 34642 }, { - "epoch": 0.9830590238365494, + "epoch": 0.98169401229845, "grad_norm": 0.0, - "learning_rate": 1.5049238059209325e-08, - "loss": 0.847, + "learning_rate": 1.7570890169377185e-08, + "loss": 0.7818, "step": 34643 }, { - "epoch": 0.9830874006810443, + "epoch": 0.9817223497407124, "grad_norm": 0.0, - "learning_rate": 1.4998876672274176e-08, - "loss": 0.6931, + "learning_rate": 1.7516549107795543e-08, + "loss": 0.8099, "step": 34644 }, { - "epoch": 0.9831157775255391, + "epoch": 0.9817506871829749, "grad_norm": 0.0, - "learning_rate": 1.494859962909101e-08, - "loss": 0.7797, + "learning_rate": 1.746229213237971e-08, + "loss": 0.7821, "step": 34645 }, { - "epoch": 0.9831441543700341, + "epoch": 0.9817790246252374, "grad_norm": 0.0, - "learning_rate": 1.4898406930087262e-08, - "loss": 0.8365, + "learning_rate": 1.7408119243584875e-08, + "loss": 0.8445, "step": 34646 }, { - "epoch": 0.983172531214529, + "epoch": 0.9818073620674997, "grad_norm": 0.0, - "learning_rate": 1.4848298575684817e-08, - "loss": 0.8669, + "learning_rate": 1.7354030441868452e-08, + "loss": 0.7967, "step": 34647 }, { - "epoch": 0.9832009080590238, + "epoch": 0.9818356995097622, "grad_norm": 0.0, - "learning_rate": 1.479827456630778e-08, - "loss": 0.8658, + "learning_rate": 1.7300025727686744e-08, + "loss": 0.8256, "step": 34648 }, { - "epoch": 0.9832292849035187, + "epoch": 0.9818640369520247, "grad_norm": 0.0, - "learning_rate": 1.4748334902379147e-08, - "loss": 0.8841, + "learning_rate": 1.7246105101493825e-08, + "loss": 0.7481, "step": 34649 }, { - "epoch": 0.9832576617480137, + "epoch": 0.9818923743942872, "grad_norm": 0.0, - "learning_rate": 1.469847958431858e-08, - "loss": 0.8149, + "learning_rate": 1.7192268563743786e-08, + "loss": 0.7997, "step": 34650 }, { - "epoch": 0.9832860385925085, + "epoch": 0.9819207118365496, "grad_norm": 0.0, - "learning_rate": 1.4648708612550189e-08, - "loss": 0.806, + "learning_rate": 1.7138516114890702e-08, + "loss": 0.8603, "step": 34651 }, { - "epoch": 0.9833144154370034, + "epoch": 0.9819490492788121, "grad_norm": 0.0, - "learning_rate": 1.4599021987493634e-08, - "loss": 0.7402, + "learning_rate": 1.7084847755385324e-08, + "loss": 0.8948, "step": 34652 }, { - "epoch": 0.9833427922814983, + "epoch": 0.9819773867210746, "grad_norm": 0.0, - "learning_rate": 1.4549419709566359e-08, - "loss": 0.8671, + "learning_rate": 1.703126348568285e-08, + "loss": 0.9812, "step": 34653 }, { - "epoch": 0.9833711691259932, + "epoch": 0.982005724163337, "grad_norm": 0.0, - "learning_rate": 1.4499901779190251e-08, - "loss": 0.761, + "learning_rate": 1.697776330623291e-08, + "loss": 0.755, "step": 34654 }, { - "epoch": 0.9833995459704881, + "epoch": 0.9820340616055995, "grad_norm": 0.0, - "learning_rate": 1.4450468196781641e-08, - "loss": 0.7523, + "learning_rate": 1.692434721748626e-08, + "loss": 0.7724, "step": 34655 }, { - "epoch": 0.9834279228149829, + "epoch": 0.982062399047862, "grad_norm": 0.0, - "learning_rate": 1.4401118962759086e-08, - "loss": 0.8568, + "learning_rate": 1.687101521989254e-08, + "loss": 0.7296, "step": 34656 }, { - "epoch": 0.9834562996594779, + "epoch": 0.9820907364901243, "grad_norm": 0.0, - "learning_rate": 1.4351854077540028e-08, - "loss": 0.7248, + "learning_rate": 1.68177673139025e-08, + "loss": 0.7228, "step": 34657 }, { - "epoch": 0.9834846765039728, + "epoch": 0.9821190739323868, "grad_norm": 0.0, - "learning_rate": 1.4302673541538581e-08, - "loss": 0.8013, + "learning_rate": 1.6764603499962452e-08, + "loss": 0.816, "step": 34658 }, { - "epoch": 0.9835130533484676, + "epoch": 0.9821474113746493, "grad_norm": 0.0, - "learning_rate": 1.4253577355171078e-08, - "loss": 0.8008, + "learning_rate": 1.671152377852092e-08, + "loss": 0.789, "step": 34659 }, { - "epoch": 0.9835414301929626, + "epoch": 0.9821757488169118, "grad_norm": 0.0, - "learning_rate": 1.4204565518853853e-08, - "loss": 0.8412, + "learning_rate": 1.665852815002644e-08, + "loss": 0.8521, "step": 34660 }, { - "epoch": 0.9835698070374574, + "epoch": 0.9822040862591742, "grad_norm": 0.0, - "learning_rate": 1.4155638032998797e-08, - "loss": 0.8195, + "learning_rate": 1.6605616614924214e-08, + "loss": 0.7686, "step": 34661 }, { - "epoch": 0.9835981838819523, + "epoch": 0.9822324237014367, "grad_norm": 0.0, - "learning_rate": 1.4106794898020027e-08, - "loss": 0.8974, + "learning_rate": 1.6552789173660543e-08, + "loss": 0.7569, "step": 34662 }, { - "epoch": 0.9836265607264473, + "epoch": 0.9822607611436992, "grad_norm": 0.0, - "learning_rate": 1.4058036114329433e-08, - "loss": 0.7608, + "learning_rate": 1.6500045826679523e-08, + "loss": 0.7372, "step": 34663 }, { - "epoch": 0.9836549375709421, + "epoch": 0.9822890985859616, "grad_norm": 0.0, - "learning_rate": 1.4009361682340018e-08, - "loss": 0.7309, + "learning_rate": 1.644738657442524e-08, + "loss": 0.7186, "step": 34664 }, { - "epoch": 0.983683314415437, + "epoch": 0.9823174360282241, "grad_norm": 0.0, - "learning_rate": 1.3960771602462565e-08, - "loss": 0.861, + "learning_rate": 1.6394811417342892e-08, + "loss": 0.7859, "step": 34665 }, { - "epoch": 0.9837116912599319, + "epoch": 0.9823457734704866, "grad_norm": 0.0, - "learning_rate": 1.3912265875106745e-08, - "loss": 0.7731, + "learning_rate": 1.6342320355874353e-08, + "loss": 0.8195, "step": 34666 }, { - "epoch": 0.9837400681044268, + "epoch": 0.9823741109127491, "grad_norm": 0.0, - "learning_rate": 1.3863844500683343e-08, - "loss": 0.7338, + "learning_rate": 1.6289913390461487e-08, + "loss": 0.8303, "step": 34667 }, { - "epoch": 0.9837684449489217, + "epoch": 0.9824024483550114, "grad_norm": 0.0, - "learning_rate": 1.381550747960203e-08, - "loss": 0.7288, + "learning_rate": 1.6237590521546165e-08, + "loss": 0.8557, "step": 34668 }, { - "epoch": 0.9837968217934165, + "epoch": 0.9824307857972739, "grad_norm": 0.0, - "learning_rate": 1.3767254812269148e-08, - "loss": 0.8812, + "learning_rate": 1.6185351749569146e-08, + "loss": 0.7656, "step": 34669 }, { - "epoch": 0.9838251986379115, + "epoch": 0.9824591232395364, "grad_norm": 0.0, - "learning_rate": 1.3719086499092149e-08, - "loss": 0.864, + "learning_rate": 1.6133197074970075e-08, + "loss": 0.7307, "step": 34670 }, { - "epoch": 0.9838535754824064, + "epoch": 0.9824874606817988, "grad_norm": 0.0, - "learning_rate": 1.3671002540480705e-08, - "loss": 0.8755, + "learning_rate": 1.6081126498187494e-08, + "loss": 0.7869, "step": 34671 }, { - "epoch": 0.9838819523269012, + "epoch": 0.9825157981240613, "grad_norm": 0.0, - "learning_rate": 1.3623002936837825e-08, - "loss": 0.7069, + "learning_rate": 1.602914001966216e-08, + "loss": 0.7012, "step": 34672 }, { - "epoch": 0.9839103291713961, + "epoch": 0.9825441355663238, "grad_norm": 0.0, - "learning_rate": 1.3575087688570965e-08, - "loss": 0.9315, + "learning_rate": 1.597723763983039e-08, + "loss": 0.7536, "step": 34673 }, { - "epoch": 0.9839387060158911, + "epoch": 0.9825724730085863, "grad_norm": 0.0, - "learning_rate": 1.3527256796084243e-08, - "loss": 0.8537, + "learning_rate": 1.5925419359130723e-08, + "loss": 0.8272, "step": 34674 }, { - "epoch": 0.9839670828603859, + "epoch": 0.9826008104508487, "grad_norm": 0.0, - "learning_rate": 1.347951025978178e-08, - "loss": 0.8025, + "learning_rate": 1.5873685177998365e-08, + "loss": 0.8248, "step": 34675 }, { - "epoch": 0.9839954597048808, + "epoch": 0.9826291478931112, "grad_norm": 0.0, - "learning_rate": 1.343184808006659e-08, - "loss": 0.7972, + "learning_rate": 1.5822035096868526e-08, + "loss": 0.8528, "step": 34676 }, { - "epoch": 0.9840238365493758, + "epoch": 0.9826574853353737, "grad_norm": 0.0, - "learning_rate": 1.338427025734168e-08, - "loss": 0.8228, + "learning_rate": 1.5770469116178634e-08, + "loss": 0.8175, "step": 34677 }, { - "epoch": 0.9840522133938706, + "epoch": 0.982685822777636, "grad_norm": 0.0, - "learning_rate": 1.3336776792008954e-08, - "loss": 0.7471, + "learning_rate": 1.5718987236360563e-08, + "loss": 0.9109, "step": 34678 }, { - "epoch": 0.9840805902383655, + "epoch": 0.9827141602198985, "grad_norm": 0.0, - "learning_rate": 1.3289367684469201e-08, - "loss": 0.7637, + "learning_rate": 1.5667589457849518e-08, + "loss": 0.7849, "step": 34679 }, { - "epoch": 0.9841089670828603, + "epoch": 0.982742497662161, "grad_norm": 0.0, - "learning_rate": 1.3242042935123211e-08, - "loss": 0.8831, + "learning_rate": 1.5616275781077384e-08, + "loss": 0.8616, "step": 34680 }, { - "epoch": 0.9841373439273553, + "epoch": 0.9827708351044234, "grad_norm": 0.0, - "learning_rate": 1.3194802544370666e-08, - "loss": 0.8366, + "learning_rate": 1.5565046206478252e-08, + "loss": 0.7403, "step": 34681 }, { - "epoch": 0.9841657207718502, + "epoch": 0.9827991725466859, "grad_norm": 0.0, - "learning_rate": 1.3147646512610135e-08, - "loss": 0.685, + "learning_rate": 1.551390073448067e-08, + "loss": 0.8406, "step": 34682 }, { - "epoch": 0.984194097616345, + "epoch": 0.9828275099889484, "grad_norm": 0.0, - "learning_rate": 1.3100574840240187e-08, - "loss": 0.8071, + "learning_rate": 1.5462839365518734e-08, + "loss": 0.7398, "step": 34683 }, { - "epoch": 0.98422247446084, + "epoch": 0.9828558474312109, "grad_norm": 0.0, - "learning_rate": 1.3053587527659394e-08, - "loss": 0.8213, + "learning_rate": 1.5411862100019882e-08, + "loss": 0.7666, "step": 34684 }, { - "epoch": 0.9842508513053349, + "epoch": 0.9828841848734733, "grad_norm": 0.0, - "learning_rate": 1.3006684575264106e-08, - "loss": 0.7943, + "learning_rate": 1.5360968938414876e-08, + "loss": 0.8241, "step": 34685 }, { - "epoch": 0.9842792281498297, + "epoch": 0.9829125223157358, "grad_norm": 0.0, - "learning_rate": 1.295986598344956e-08, - "loss": 0.8591, + "learning_rate": 1.531015988113227e-08, + "loss": 0.8776, "step": 34686 }, { - "epoch": 0.9843076049943247, + "epoch": 0.9829408597579983, "grad_norm": 0.0, - "learning_rate": 1.2913131752612107e-08, - "loss": 0.6992, + "learning_rate": 1.525943492859949e-08, + "loss": 0.8583, "step": 34687 }, { - "epoch": 0.9843359818388195, + "epoch": 0.9829691972002607, "grad_norm": 0.0, - "learning_rate": 1.2866481883146986e-08, - "loss": 0.8479, + "learning_rate": 1.5208794081245092e-08, + "loss": 0.7969, "step": 34688 }, { - "epoch": 0.9843643586833144, + "epoch": 0.9829975346425232, "grad_norm": 0.0, - "learning_rate": 1.2819916375446106e-08, - "loss": 0.8671, + "learning_rate": 1.5158237339494285e-08, + "loss": 0.7977, "step": 34689 }, { - "epoch": 0.9843927355278093, + "epoch": 0.9830258720847856, "grad_norm": 0.0, - "learning_rate": 1.2773435229905818e-08, - "loss": 0.8338, + "learning_rate": 1.5107764703773398e-08, + "loss": 0.8087, "step": 34690 }, { - "epoch": 0.9844211123723042, + "epoch": 0.9830542095270481, "grad_norm": 0.0, - "learning_rate": 1.2727038446916918e-08, - "loss": 0.7684, + "learning_rate": 1.505737617450764e-08, + "loss": 0.8508, "step": 34691 }, { - "epoch": 0.9844494892167991, + "epoch": 0.9830825469693105, "grad_norm": 0.0, - "learning_rate": 1.2680726026871314e-08, - "loss": 0.7876, + "learning_rate": 1.5007071752121128e-08, + "loss": 0.788, "step": 34692 }, { - "epoch": 0.984477866061294, + "epoch": 0.983110884411573, "grad_norm": 0.0, - "learning_rate": 1.263449797016092e-08, - "loss": 0.722, + "learning_rate": 1.4956851437037957e-08, + "loss": 0.8333, "step": 34693 }, { - "epoch": 0.9845062429057889, + "epoch": 0.9831392218538355, "grad_norm": 0.0, - "learning_rate": 1.2588354277176529e-08, - "loss": 0.7051, + "learning_rate": 1.4906715229682232e-08, + "loss": 0.7959, "step": 34694 }, { - "epoch": 0.9845346197502838, + "epoch": 0.9831675592960979, "grad_norm": 0.0, - "learning_rate": 1.2542294948305611e-08, - "loss": 0.8579, + "learning_rate": 1.4856663130473625e-08, + "loss": 0.7941, "step": 34695 }, { - "epoch": 0.9845629965947786, + "epoch": 0.9831958967383604, "grad_norm": 0.0, - "learning_rate": 1.2496319983940074e-08, - "loss": 0.7624, + "learning_rate": 1.4806695139836236e-08, + "loss": 0.8445, "step": 34696 }, { - "epoch": 0.9845913734392735, + "epoch": 0.9832242341806229, "grad_norm": 0.0, - "learning_rate": 1.2450429384467388e-08, - "loss": 0.8185, + "learning_rate": 1.475681125818973e-08, + "loss": 0.7501, "step": 34697 }, { - "epoch": 0.9846197502837685, + "epoch": 0.9832525716228853, "grad_norm": 0.0, - "learning_rate": 1.2404623150273908e-08, - "loss": 0.708, + "learning_rate": 1.4707011485953771e-08, + "loss": 0.8065, "step": 34698 }, { - "epoch": 0.9846481271282633, + "epoch": 0.9832809090651478, "grad_norm": 0.0, - "learning_rate": 1.2358901281748215e-08, - "loss": 0.8056, + "learning_rate": 1.4657295823549134e-08, + "loss": 0.8471, "step": 34699 }, { - "epoch": 0.9846765039727582, + "epoch": 0.9833092465074103, "grad_norm": 0.0, - "learning_rate": 1.2313263779275553e-08, - "loss": 0.7614, + "learning_rate": 1.4607664271394374e-08, + "loss": 0.9005, "step": 34700 }, { - "epoch": 0.9847048808172532, + "epoch": 0.9833375839496727, "grad_norm": 0.0, - "learning_rate": 1.226771064324228e-08, - "loss": 0.7856, + "learning_rate": 1.4558116829906931e-08, + "loss": 0.7686, "step": 34701 }, { - "epoch": 0.984733257661748, + "epoch": 0.9833659213919351, "grad_norm": 0.0, - "learning_rate": 1.2222241874031427e-08, - "loss": 0.7902, + "learning_rate": 1.4508653499504256e-08, + "loss": 0.7815, "step": 34702 }, { - "epoch": 0.9847616345062429, + "epoch": 0.9833942588341976, "grad_norm": 0.0, - "learning_rate": 1.2176857472029347e-08, - "loss": 0.8182, + "learning_rate": 1.4459274280603786e-08, + "loss": 0.7623, "step": 34703 }, { - "epoch": 0.9847900113507378, + "epoch": 0.9834225962764601, "grad_norm": 0.0, - "learning_rate": 1.2131557437617958e-08, - "loss": 0.7868, + "learning_rate": 1.4409979173620747e-08, + "loss": 0.7936, "step": 34704 }, { - "epoch": 0.9848183881952327, + "epoch": 0.9834509337187225, "grad_norm": 0.0, - "learning_rate": 1.2086341771180288e-08, - "loss": 0.836, + "learning_rate": 1.4360768178969253e-08, + "loss": 0.7814, "step": 34705 }, { - "epoch": 0.9848467650397276, + "epoch": 0.983479271160985, "grad_norm": 0.0, - "learning_rate": 1.2041210473098253e-08, - "loss": 0.7805, + "learning_rate": 1.4311641297066747e-08, + "loss": 0.8534, "step": 34706 }, { - "epoch": 0.9848751418842224, + "epoch": 0.9835076086032475, "grad_norm": 0.0, - "learning_rate": 1.199616354375377e-08, - "loss": 0.7172, + "learning_rate": 1.4262598528325122e-08, + "loss": 0.8217, "step": 34707 }, { - "epoch": 0.9849035187287174, + "epoch": 0.98353594604551, "grad_norm": 0.0, - "learning_rate": 1.1951200983526534e-08, - "loss": 0.8845, + "learning_rate": 1.421363987315738e-08, + "loss": 0.8352, "step": 34708 }, { - "epoch": 0.9849318955732123, + "epoch": 0.9835642834877724, "grad_norm": 0.0, - "learning_rate": 1.1906322792795132e-08, - "loss": 0.805, + "learning_rate": 1.4164765331976527e-08, + "loss": 0.7937, "step": 34709 }, { - "epoch": 0.9849602724177071, + "epoch": 0.9835926209300349, "grad_norm": 0.0, - "learning_rate": 1.1861528971941482e-08, - "loss": 0.7938, + "learning_rate": 1.4115974905193342e-08, + "loss": 0.8466, "step": 34710 }, { - "epoch": 0.9849886492622021, + "epoch": 0.9836209583722973, "grad_norm": 0.0, - "learning_rate": 1.181681952134195e-08, - "loss": 0.8474, + "learning_rate": 1.4067268593219719e-08, + "loss": 0.7113, "step": 34711 }, { - "epoch": 0.985017026106697, + "epoch": 0.9836492958145597, "grad_norm": 0.0, - "learning_rate": 1.1772194441374008e-08, - "loss": 0.6924, + "learning_rate": 1.401864639646533e-08, + "loss": 0.8062, "step": 34712 }, { - "epoch": 0.9850454029511918, + "epoch": 0.9836776332568222, "grad_norm": 0.0, - "learning_rate": 1.1727653732416245e-08, - "loss": 0.788, + "learning_rate": 1.3970108315340958e-08, + "loss": 0.8199, "step": 34713 }, { - "epoch": 0.9850737797956867, + "epoch": 0.9837059706990847, "grad_norm": 0.0, - "learning_rate": 1.1683197394843915e-08, - "loss": 0.7589, + "learning_rate": 1.3921654350254054e-08, + "loss": 0.7992, "step": 34714 }, { - "epoch": 0.9851021566401816, + "epoch": 0.9837343081413472, "grad_norm": 0.0, - "learning_rate": 1.1638825429033384e-08, - "loss": 0.8047, + "learning_rate": 1.3873284501614292e-08, + "loss": 0.7883, "step": 34715 }, { - "epoch": 0.9851305334846765, + "epoch": 0.9837626455836096, "grad_norm": 0.0, - "learning_rate": 1.1594537835357689e-08, - "loss": 0.7352, + "learning_rate": 1.3824998769826902e-08, + "loss": 0.8115, "step": 34716 }, { - "epoch": 0.9851589103291714, + "epoch": 0.9837909830258721, "grad_norm": 0.0, - "learning_rate": 1.1550334614192082e-08, - "loss": 0.7872, + "learning_rate": 1.3776797155300448e-08, + "loss": 0.7822, "step": 34717 }, { - "epoch": 0.9851872871736663, + "epoch": 0.9838193204681346, "grad_norm": 0.0, - "learning_rate": 1.15062157659096e-08, - "loss": 0.8119, + "learning_rate": 1.3728679658440159e-08, + "loss": 0.7786, "step": 34718 }, { - "epoch": 0.9852156640181612, + "epoch": 0.983847657910397, "grad_norm": 0.0, - "learning_rate": 1.1462181290883279e-08, - "loss": 0.8228, + "learning_rate": 1.3680646279651266e-08, + "loss": 0.8116, "step": 34719 }, { - "epoch": 0.985244040862656, + "epoch": 0.9838759953526595, "grad_norm": 0.0, - "learning_rate": 1.141823118948504e-08, - "loss": 0.8234, + "learning_rate": 1.3632697019339003e-08, + "loss": 0.6971, "step": 34720 }, { - "epoch": 0.985272417707151, + "epoch": 0.983904332794922, "grad_norm": 0.0, - "learning_rate": 1.137436546208681e-08, - "loss": 0.7854, + "learning_rate": 1.358483187790638e-08, + "loss": 0.7774, "step": 34721 }, { - "epoch": 0.9853007945516459, + "epoch": 0.9839326702371843, "grad_norm": 0.0, - "learning_rate": 1.1330584109057185e-08, - "loss": 0.7711, + "learning_rate": 1.3537050855757516e-08, + "loss": 0.763, "step": 34722 }, { - "epoch": 0.9853291713961407, + "epoch": 0.9839610076794468, "grad_norm": 0.0, - "learning_rate": 1.1286887130766977e-08, - "loss": 0.851, + "learning_rate": 1.3489353953294315e-08, + "loss": 0.707, "step": 34723 }, { - "epoch": 0.9853575482406356, + "epoch": 0.9839893451217093, "grad_norm": 0.0, - "learning_rate": 1.1243274527587001e-08, - "loss": 0.7662, + "learning_rate": 1.3441741170918676e-08, + "loss": 0.825, "step": 34724 }, { - "epoch": 0.9853859250851306, + "epoch": 0.9840176825639718, "grad_norm": 0.0, - "learning_rate": 1.1199746299882518e-08, - "loss": 0.6891, + "learning_rate": 1.339421250903139e-08, + "loss": 0.8488, "step": 34725 }, { - "epoch": 0.9854143019296254, + "epoch": 0.9840460200062342, "grad_norm": 0.0, - "learning_rate": 1.1156302448023237e-08, - "loss": 0.7844, + "learning_rate": 1.3346767968033247e-08, + "loss": 0.8469, "step": 34726 }, { - "epoch": 0.9854426787741203, + "epoch": 0.9840743574484967, "grad_norm": 0.0, - "learning_rate": 1.111294297237664e-08, - "loss": 0.6228, + "learning_rate": 1.3299407548323927e-08, + "loss": 0.8168, "step": 34727 }, { - "epoch": 0.9854710556186153, + "epoch": 0.9841026948907592, "grad_norm": 0.0, - "learning_rate": 1.1069667873306878e-08, - "loss": 0.867, + "learning_rate": 1.3252131250302003e-08, + "loss": 0.8245, "step": 34728 }, { - "epoch": 0.9854994324631101, + "epoch": 0.9841310323330216, "grad_norm": 0.0, - "learning_rate": 1.1026477151180325e-08, - "loss": 0.8443, + "learning_rate": 1.320493907436604e-08, + "loss": 0.7298, "step": 34729 }, { - "epoch": 0.985527809307605, + "epoch": 0.9841593697752841, "grad_norm": 0.0, - "learning_rate": 1.0983370806363359e-08, - "loss": 0.7537, + "learning_rate": 1.3157831020913503e-08, + "loss": 0.7795, "step": 34730 }, { - "epoch": 0.9855561861520998, + "epoch": 0.9841877072175466, "grad_norm": 0.0, - "learning_rate": 1.0940348839219018e-08, - "loss": 0.8326, + "learning_rate": 1.3110807090340738e-08, + "loss": 0.8185, "step": 34731 }, { - "epoch": 0.9855845629965948, + "epoch": 0.984216044659809, "grad_norm": 0.0, - "learning_rate": 1.0897411250109235e-08, - "loss": 0.7856, + "learning_rate": 1.3063867283045206e-08, + "loss": 0.6975, "step": 34732 }, { - "epoch": 0.9856129398410897, + "epoch": 0.9842443821020714, "grad_norm": 0.0, - "learning_rate": 1.0854558039399276e-08, - "loss": 0.8528, + "learning_rate": 1.3017011599419927e-08, + "loss": 0.8385, "step": 34733 }, { - "epoch": 0.9856413166855845, + "epoch": 0.9842727195443339, "grad_norm": 0.0, - "learning_rate": 1.0811789207448853e-08, - "loss": 0.7157, + "learning_rate": 1.2970240039861248e-08, + "loss": 0.7418, "step": 34734 }, { - "epoch": 0.9856696935300795, + "epoch": 0.9843010569865964, "grad_norm": 0.0, - "learning_rate": 1.0769104754621006e-08, - "loss": 0.7547, + "learning_rate": 1.2923552604763301e-08, + "loss": 0.8067, "step": 34735 }, { - "epoch": 0.9856980703745744, + "epoch": 0.9843293944288588, "grad_norm": 0.0, - "learning_rate": 1.0726504681275452e-08, - "loss": 0.8409, + "learning_rate": 1.2876949294517993e-08, + "loss": 0.7825, "step": 34736 }, { - "epoch": 0.9857264472190692, + "epoch": 0.9843577318711213, "grad_norm": 0.0, - "learning_rate": 1.0683988987773008e-08, - "loss": 0.8161, + "learning_rate": 1.2830430109519454e-08, + "loss": 0.7843, "step": 34737 }, { - "epoch": 0.9857548240635642, + "epoch": 0.9843860693133838, "grad_norm": 0.0, - "learning_rate": 1.064155767447006e-08, - "loss": 0.7941, + "learning_rate": 1.2783995050158483e-08, + "loss": 0.7635, "step": 34738 }, { - "epoch": 0.985783200908059, + "epoch": 0.9844144067556463, "grad_norm": 0.0, - "learning_rate": 1.0599210741728538e-08, - "loss": 0.737, + "learning_rate": 1.2737644116826986e-08, + "loss": 0.868, "step": 34739 }, { - "epoch": 0.9858115777525539, + "epoch": 0.9844427441979087, "grad_norm": 0.0, - "learning_rate": 1.0556948189903715e-08, - "loss": 0.7322, + "learning_rate": 1.2691377309915765e-08, + "loss": 0.6603, "step": 34740 }, { - "epoch": 0.9858399545970488, + "epoch": 0.9844710816401712, "grad_norm": 0.0, - "learning_rate": 1.0514770019354193e-08, - "loss": 0.7336, + "learning_rate": 1.2645194629812285e-08, + "loss": 0.7281, "step": 34741 }, { - "epoch": 0.9858683314415437, + "epoch": 0.9844994190824337, "grad_norm": 0.0, - "learning_rate": 1.0472676230435242e-08, - "loss": 0.6986, + "learning_rate": 1.2599096076907347e-08, + "loss": 0.8208, "step": 34742 }, { - "epoch": 0.9858967082860386, + "epoch": 0.984527756524696, "grad_norm": 0.0, - "learning_rate": 1.0430666823502134e-08, - "loss": 0.9004, + "learning_rate": 1.2553081651589527e-08, + "loss": 0.7823, "step": 34743 }, { - "epoch": 0.9859250851305335, + "epoch": 0.9845560939669585, "grad_norm": 0.0, - "learning_rate": 1.038874179891014e-08, - "loss": 0.831, + "learning_rate": 1.2507151354245184e-08, + "loss": 0.8216, "step": 34744 }, { - "epoch": 0.9859534619750284, + "epoch": 0.984584431409221, "grad_norm": 0.0, - "learning_rate": 1.0346901157014532e-08, - "loss": 0.782, + "learning_rate": 1.2461305185262895e-08, + "loss": 1.0058, "step": 34745 }, { - "epoch": 0.9859818388195233, + "epoch": 0.9846127688514834, "grad_norm": 0.0, - "learning_rate": 1.0305144898166142e-08, - "loss": 0.7109, + "learning_rate": 1.2415543145026798e-08, + "loss": 0.7296, "step": 34746 }, { - "epoch": 0.9860102156640181, + "epoch": 0.9846411062937459, "grad_norm": 0.0, - "learning_rate": 1.0263473022720239e-08, - "loss": 0.8573, + "learning_rate": 1.236986523392325e-08, + "loss": 0.8268, "step": 34747 }, { - "epoch": 0.986038592508513, + "epoch": 0.9846694437360084, "grad_norm": 0.0, - "learning_rate": 1.0221885531027653e-08, - "loss": 0.9096, + "learning_rate": 1.232427145233861e-08, + "loss": 0.9178, "step": 34748 }, { - "epoch": 0.986066969353008, + "epoch": 0.9846977811782709, "grad_norm": 0.0, - "learning_rate": 1.0180382423440327e-08, - "loss": 0.7496, + "learning_rate": 1.2278761800653682e-08, + "loss": 0.8598, "step": 34749 }, { - "epoch": 0.9860953461975028, + "epoch": 0.9847261186205333, "grad_norm": 0.0, - "learning_rate": 1.013896370030798e-08, - "loss": 0.8148, + "learning_rate": 1.2233336279254825e-08, + "loss": 0.8429, "step": 34750 }, { - "epoch": 0.9861237230419977, + "epoch": 0.9847544560627958, "grad_norm": 0.0, - "learning_rate": 1.0097629361981442e-08, - "loss": 0.7682, + "learning_rate": 1.2187994888522847e-08, + "loss": 0.8809, "step": 34751 }, { - "epoch": 0.9861520998864927, + "epoch": 0.9847827935050583, "grad_norm": 0.0, - "learning_rate": 1.0056379408808214e-08, - "loss": 0.653, + "learning_rate": 1.2142737628840773e-08, + "loss": 0.7706, "step": 34752 }, { - "epoch": 0.9861804767309875, + "epoch": 0.9848111309473206, "grad_norm": 0.0, - "learning_rate": 1.0015213841139126e-08, - "loss": 0.8098, + "learning_rate": 1.20975645005883e-08, + "loss": 0.7211, "step": 34753 }, { - "epoch": 0.9862088535754824, + "epoch": 0.9848394683895831, "grad_norm": 0.0, - "learning_rate": 9.974132659319457e-09, - "loss": 0.8175, + "learning_rate": 1.2052475504148453e-08, + "loss": 0.8384, "step": 34754 }, { - "epoch": 0.9862372304199774, + "epoch": 0.9848678058318456, "grad_norm": 0.0, - "learning_rate": 9.933135863697818e-09, - "loss": 0.808, + "learning_rate": 1.20074706398976e-08, + "loss": 0.8752, "step": 34755 }, { - "epoch": 0.9862656072644722, + "epoch": 0.9848961432741081, "grad_norm": 0.0, - "learning_rate": 9.892223454620597e-09, - "loss": 0.8142, + "learning_rate": 1.1962549908218769e-08, + "loss": 0.8788, "step": 34756 }, { - "epoch": 0.9862939841089671, + "epoch": 0.9849244807163705, "grad_norm": 0.0, - "learning_rate": 9.851395432431965e-09, - "loss": 0.7806, + "learning_rate": 1.1917713309487212e-08, + "loss": 0.7345, "step": 34757 }, { - "epoch": 0.9863223609534619, + "epoch": 0.984952818158633, "grad_norm": 0.0, - "learning_rate": 9.81065179747831e-09, - "loss": 0.8434, + "learning_rate": 1.1872960844082626e-08, + "loss": 0.7758, "step": 34758 }, { - "epoch": 0.9863507377979569, + "epoch": 0.9849811556008955, "grad_norm": 0.0, - "learning_rate": 9.769992550102692e-09, - "loss": 0.7824, + "learning_rate": 1.1828292512380269e-08, + "loss": 0.7503, "step": 34759 }, { - "epoch": 0.9863791146424518, + "epoch": 0.9850094930431579, "grad_norm": 0.0, - "learning_rate": 9.729417690649279e-09, - "loss": 0.8035, + "learning_rate": 1.1783708314756503e-08, + "loss": 0.8939, "step": 34760 }, { - "epoch": 0.9864074914869466, + "epoch": 0.9850378304854204, "grad_norm": 0.0, - "learning_rate": 9.688927219460021e-09, - "loss": 0.7984, + "learning_rate": 1.1739208251588807e-08, + "loss": 0.8195, "step": 34761 }, { - "epoch": 0.9864358683314416, + "epoch": 0.9850661679276829, "grad_norm": 0.0, - "learning_rate": 9.648521136877975e-09, - "loss": 0.7244, + "learning_rate": 1.1694792323250215e-08, + "loss": 0.7859, "step": 34762 }, { - "epoch": 0.9864642451759364, + "epoch": 0.9850945053699454, "grad_norm": 0.0, - "learning_rate": 9.608199443243981e-09, - "loss": 0.7151, + "learning_rate": 1.1650460530115981e-08, + "loss": 0.7659, "step": 34763 }, { - "epoch": 0.9864926220204313, + "epoch": 0.9851228428122077, "grad_norm": 0.0, - "learning_rate": 9.56796213889888e-09, - "loss": 0.8316, + "learning_rate": 1.1606212872559142e-08, + "loss": 0.8143, "step": 34764 }, { - "epoch": 0.9865209988649262, + "epoch": 0.9851511802544702, "grad_norm": 0.0, - "learning_rate": 9.527809224182393e-09, - "loss": 0.7531, + "learning_rate": 1.1562049350951621e-08, + "loss": 0.8715, "step": 34765 }, { - "epoch": 0.9865493757094211, + "epoch": 0.9851795176967327, "grad_norm": 0.0, - "learning_rate": 9.487740699433145e-09, - "loss": 0.7522, + "learning_rate": 1.1517969965666454e-08, + "loss": 0.8481, "step": 34766 }, { - "epoch": 0.986577752553916, + "epoch": 0.9852078551389951, "grad_norm": 0.0, - "learning_rate": 9.447756564990863e-09, - "loss": 0.8646, + "learning_rate": 1.1473974717074455e-08, + "loss": 0.6844, "step": 34767 }, { - "epoch": 0.9866061293984109, + "epoch": 0.9852361925812576, "grad_norm": 0.0, - "learning_rate": 9.40785682119194e-09, - "loss": 0.7509, + "learning_rate": 1.1430063605546437e-08, + "loss": 0.6714, "step": 34768 }, { - "epoch": 0.9866345062429058, + "epoch": 0.9852645300235201, "grad_norm": 0.0, - "learning_rate": 9.368041468372779e-09, - "loss": 0.8241, + "learning_rate": 1.1386236631452108e-08, + "loss": 0.7757, "step": 34769 }, { - "epoch": 0.9866628830874007, + "epoch": 0.9852928674657825, "grad_norm": 0.0, - "learning_rate": 9.328310506873106e-09, - "loss": 0.761, + "learning_rate": 1.1342493795160059e-08, + "loss": 0.7331, "step": 34770 }, { - "epoch": 0.9866912599318955, + "epoch": 0.985321204908045, "grad_norm": 0.0, - "learning_rate": 9.288663937024877e-09, - "loss": 0.9023, + "learning_rate": 1.1298835097039995e-08, + "loss": 0.7764, "step": 34771 }, { - "epoch": 0.9867196367763905, + "epoch": 0.9853495423503075, "grad_norm": 0.0, - "learning_rate": 9.249101759164492e-09, - "loss": 0.7686, + "learning_rate": 1.1255260537459401e-08, + "loss": 0.7046, "step": 34772 }, { - "epoch": 0.9867480136208854, + "epoch": 0.98537787979257, "grad_norm": 0.0, - "learning_rate": 9.209623973626126e-09, - "loss": 0.774, + "learning_rate": 1.1211770116784649e-08, + "loss": 0.9569, "step": 34773 }, { - "epoch": 0.9867763904653802, + "epoch": 0.9854062172348323, "grad_norm": 0.0, - "learning_rate": 9.170230580742844e-09, - "loss": 0.7359, + "learning_rate": 1.1168363835382113e-08, + "loss": 0.7611, "step": 34774 }, { - "epoch": 0.9868047673098751, + "epoch": 0.9854345546770948, "grad_norm": 0.0, - "learning_rate": 9.130921580848829e-09, - "loss": 0.8737, + "learning_rate": 1.1125041693617056e-08, + "loss": 0.8079, "step": 34775 }, { - "epoch": 0.9868331441543701, + "epoch": 0.9854628921193573, "grad_norm": 0.0, - "learning_rate": 9.091696974273812e-09, - "loss": 0.8223, + "learning_rate": 1.1081803691855853e-08, + "loss": 0.7027, "step": 34776 }, { - "epoch": 0.9868615209988649, + "epoch": 0.9854912295616197, "grad_norm": 0.0, - "learning_rate": 9.052556761351972e-09, - "loss": 0.7975, + "learning_rate": 1.1038649830462656e-08, + "loss": 0.7931, "step": 34777 }, { - "epoch": 0.9868898978433598, + "epoch": 0.9855195670038822, "grad_norm": 0.0, - "learning_rate": 9.013500942410824e-09, - "loss": 0.8358, + "learning_rate": 1.0995580109799397e-08, + "loss": 0.8599, "step": 34778 }, { - "epoch": 0.9869182746878548, + "epoch": 0.9855479044461447, "grad_norm": 0.0, - "learning_rate": 8.974529517782327e-09, - "loss": 0.8293, + "learning_rate": 1.0952594530230232e-08, + "loss": 0.8207, "step": 34779 }, { - "epoch": 0.9869466515323496, + "epoch": 0.9855762418884072, "grad_norm": 0.0, - "learning_rate": 8.935642487795104e-09, - "loss": 0.8159, + "learning_rate": 1.090969309211598e-08, + "loss": 0.7304, "step": 34780 }, { - "epoch": 0.9869750283768445, + "epoch": 0.9856045793306696, "grad_norm": 0.0, - "learning_rate": 8.896839852777783e-09, - "loss": 0.8117, + "learning_rate": 1.0866875795818576e-08, + "loss": 0.7834, "step": 34781 }, { - "epoch": 0.9870034052213393, + "epoch": 0.9856329167729321, "grad_norm": 0.0, - "learning_rate": 8.85812161305788e-09, - "loss": 0.7773, + "learning_rate": 1.0824142641699952e-08, + "loss": 0.9206, "step": 34782 }, { - "epoch": 0.9870317820658343, + "epoch": 0.9856612542151946, "grad_norm": 0.0, - "learning_rate": 8.819487768961799e-09, - "loss": 0.7884, + "learning_rate": 1.078149363011871e-08, + "loss": 0.7398, "step": 34783 }, { - "epoch": 0.9870601589103292, + "epoch": 0.985689591657457, "grad_norm": 0.0, - "learning_rate": 8.780938320817057e-09, - "loss": 0.8395, + "learning_rate": 1.0738928761433453e-08, + "loss": 0.9376, "step": 34784 }, { - "epoch": 0.987088535754824, + "epoch": 0.9857179290997194, "grad_norm": 0.0, - "learning_rate": 8.742473268950059e-09, - "loss": 0.8335, + "learning_rate": 1.0696448036003892e-08, + "loss": 0.848, "step": 34785 }, { - "epoch": 0.987116912599319, + "epoch": 0.9857462665419819, "grad_norm": 0.0, - "learning_rate": 8.704092613682768e-09, - "loss": 0.8647, + "learning_rate": 1.065405145418863e-08, + "loss": 0.8799, "step": 34786 }, { - "epoch": 0.9871452894438139, + "epoch": 0.9857746039842444, "grad_norm": 0.0, - "learning_rate": 8.665796355342703e-09, - "loss": 0.7576, + "learning_rate": 1.0611739016342937e-08, + "loss": 0.8117, "step": 34787 }, { - "epoch": 0.9871736662883087, + "epoch": 0.9858029414265068, "grad_norm": 0.0, - "learning_rate": 8.627584494250719e-09, - "loss": 0.8165, + "learning_rate": 1.0569510722823194e-08, + "loss": 0.7505, "step": 34788 }, { - "epoch": 0.9872020431328037, + "epoch": 0.9858312788687693, "grad_norm": 0.0, - "learning_rate": 8.589457030730997e-09, - "loss": 0.8238, + "learning_rate": 1.0527366573986897e-08, + "loss": 0.9994, "step": 34789 }, { - "epoch": 0.9872304199772985, + "epoch": 0.9858596163110318, "grad_norm": 0.0, - "learning_rate": 8.551413965105505e-09, - "loss": 0.7544, + "learning_rate": 1.0485306570188203e-08, + "loss": 0.8306, "step": 34790 }, { - "epoch": 0.9872587968217934, + "epoch": 0.9858879537532942, "grad_norm": 0.0, - "learning_rate": 8.513455297695095e-09, - "loss": 0.8112, + "learning_rate": 1.0443330711781275e-08, + "loss": 0.7635, "step": 34791 }, { - "epoch": 0.9872871736662883, + "epoch": 0.9859162911955567, "grad_norm": 0.0, - "learning_rate": 8.475581028820623e-09, - "loss": 0.849, + "learning_rate": 1.0401438999119163e-08, + "loss": 0.8909, "step": 34792 }, { - "epoch": 0.9873155505107832, + "epoch": 0.9859446286378192, "grad_norm": 0.0, - "learning_rate": 8.437791158801833e-09, - "loss": 0.7329, + "learning_rate": 1.0359631432556027e-08, + "loss": 0.7302, "step": 34793 }, { - "epoch": 0.9873439273552781, + "epoch": 0.9859729660800816, "grad_norm": 0.0, - "learning_rate": 8.40008568795847e-09, - "loss": 0.8339, + "learning_rate": 1.0317908012442701e-08, + "loss": 0.8688, "step": 34794 }, { - "epoch": 0.987372304199773, + "epoch": 0.986001303522344, "grad_norm": 0.0, - "learning_rate": 8.362464616609168e-09, - "loss": 0.8076, + "learning_rate": 1.0276268739131124e-08, + "loss": 0.9222, "step": 34795 }, { - "epoch": 0.9874006810442679, + "epoch": 0.9860296409646065, "grad_norm": 0.0, - "learning_rate": 8.324927945070337e-09, - "loss": 0.8047, + "learning_rate": 1.0234713612973234e-08, + "loss": 0.7732, "step": 34796 }, { - "epoch": 0.9874290578887628, + "epoch": 0.986057978406869, "grad_norm": 0.0, - "learning_rate": 8.287475673660617e-09, - "loss": 0.7888, + "learning_rate": 1.0193242634316536e-08, + "loss": 0.8763, "step": 34797 }, { - "epoch": 0.9874574347332576, + "epoch": 0.9860863158491314, "grad_norm": 0.0, - "learning_rate": 8.250107802696416e-09, - "loss": 0.9285, + "learning_rate": 1.0151855803512967e-08, + "loss": 0.8503, "step": 34798 }, { - "epoch": 0.9874858115777525, + "epoch": 0.9861146532913939, "grad_norm": 0.0, - "learning_rate": 8.212824332491931e-09, - "loss": 0.8379, + "learning_rate": 1.0110553120908917e-08, + "loss": 0.6525, "step": 34799 }, { - "epoch": 0.9875141884222475, + "epoch": 0.9861429907336564, "grad_norm": 0.0, - "learning_rate": 8.175625263362463e-09, - "loss": 0.8277, + "learning_rate": 1.0069334586854106e-08, + "loss": 0.7831, "step": 34800 }, { - "epoch": 0.9875425652667423, + "epoch": 0.9861713281759188, "grad_norm": 0.0, - "learning_rate": 8.138510595623317e-09, - "loss": 0.9031, + "learning_rate": 1.0028200201693816e-08, + "loss": 0.7458, "step": 34801 }, { - "epoch": 0.9875709421112372, + "epoch": 0.9861996656181813, "grad_norm": 0.0, - "learning_rate": 8.101480329587574e-09, - "loss": 0.8211, + "learning_rate": 9.987149965776655e-09, + "loss": 0.8305, "step": 34802 }, { - "epoch": 0.9875993189557322, + "epoch": 0.9862280030604438, "grad_norm": 0.0, - "learning_rate": 8.06453446556721e-09, - "loss": 0.9397, + "learning_rate": 9.946183879447901e-09, + "loss": 0.8593, "step": 34803 }, { - "epoch": 0.987627695800227, + "epoch": 0.9862563405027063, "grad_norm": 0.0, - "learning_rate": 8.027673003875302e-09, - "loss": 0.7521, + "learning_rate": 9.905301943050615e-09, + "loss": 0.7628, "step": 34804 }, { - "epoch": 0.9876560726447219, + "epoch": 0.9862846779449687, "grad_norm": 0.0, - "learning_rate": 7.990895944821609e-09, - "loss": 0.8217, + "learning_rate": 9.864504156932297e-09, + "loss": 0.7666, "step": 34805 }, { - "epoch": 0.9876844494892169, + "epoch": 0.9863130153872312, "grad_norm": 0.0, - "learning_rate": 7.954203288719209e-09, - "loss": 0.7532, + "learning_rate": 9.823790521433785e-09, + "loss": 0.7973, "step": 34806 }, { - "epoch": 0.9877128263337117, + "epoch": 0.9863413528294936, "grad_norm": 0.0, - "learning_rate": 7.917595035876746e-09, - "loss": 0.7685, + "learning_rate": 9.783161036900357e-09, + "loss": 0.6564, "step": 34807 }, { - "epoch": 0.9877412031782066, + "epoch": 0.986369690271756, "grad_norm": 0.0, - "learning_rate": 7.881071186602863e-09, - "loss": 0.7686, + "learning_rate": 9.742615703671742e-09, + "loss": 0.7955, "step": 34808 }, { - "epoch": 0.9877695800227014, + "epoch": 0.9863980277140185, "grad_norm": 0.0, - "learning_rate": 7.84463174120731e-09, - "loss": 0.7693, + "learning_rate": 9.702154522092111e-09, + "loss": 0.8076, "step": 34809 }, { - "epoch": 0.9877979568671964, + "epoch": 0.986426365156281, "grad_norm": 0.0, - "learning_rate": 7.80827669999762e-09, - "loss": 0.8303, + "learning_rate": 9.66177749250008e-09, + "loss": 0.8379, "step": 34810 }, { - "epoch": 0.9878263337116913, + "epoch": 0.9864547025985435, "grad_norm": 0.0, - "learning_rate": 7.772006063280214e-09, - "loss": 0.8235, + "learning_rate": 9.621484615237598e-09, + "loss": 0.8044, "step": 34811 }, { - "epoch": 0.9878547105561861, + "epoch": 0.9864830400408059, "grad_norm": 0.0, - "learning_rate": 7.735819831361513e-09, - "loss": 0.7691, + "learning_rate": 9.581275890643282e-09, + "loss": 0.7991, "step": 34812 }, { - "epoch": 0.9878830874006811, + "epoch": 0.9865113774830684, "grad_norm": 0.0, - "learning_rate": 7.69971800454794e-09, - "loss": 0.7897, + "learning_rate": 9.541151319054643e-09, + "loss": 0.7741, "step": 34813 }, { - "epoch": 0.987911464245176, + "epoch": 0.9865397149253309, "grad_norm": 0.0, - "learning_rate": 7.663700583144806e-09, - "loss": 0.8728, + "learning_rate": 9.501110900811405e-09, + "loss": 0.8489, "step": 34814 }, { - "epoch": 0.9879398410896708, + "epoch": 0.9865680523675933, "grad_norm": 0.0, - "learning_rate": 7.627767567454092e-09, - "loss": 0.7962, + "learning_rate": 9.46115463624997e-09, + "loss": 0.792, "step": 34815 }, { - "epoch": 0.9879682179341657, + "epoch": 0.9865963898098558, "grad_norm": 0.0, - "learning_rate": 7.591918957782219e-09, - "loss": 0.8728, + "learning_rate": 9.421282525707842e-09, + "loss": 0.794, "step": 34816 }, { - "epoch": 0.9879965947786606, + "epoch": 0.9866247272521182, "grad_norm": 0.0, - "learning_rate": 7.556154754428946e-09, - "loss": 0.8706, + "learning_rate": 9.3814945695192e-09, + "loss": 0.795, "step": 34817 }, { - "epoch": 0.9880249716231555, + "epoch": 0.9866530646943806, "grad_norm": 0.0, - "learning_rate": 7.520474957699586e-09, - "loss": 0.7683, + "learning_rate": 9.341790768020443e-09, + "loss": 0.7616, "step": 34818 }, { - "epoch": 0.9880533484676504, + "epoch": 0.9866814021366431, "grad_norm": 0.0, - "learning_rate": 7.48487956789279e-09, - "loss": 0.8426, + "learning_rate": 9.302171121546855e-09, + "loss": 0.8037, "step": 34819 }, { - "epoch": 0.9880817253121453, + "epoch": 0.9867097395789056, "grad_norm": 0.0, - "learning_rate": 7.449368585311645e-09, - "loss": 0.7284, + "learning_rate": 9.262635630429284e-09, + "loss": 0.8246, "step": 34820 }, { - "epoch": 0.9881101021566402, + "epoch": 0.9867380770211681, "grad_norm": 0.0, - "learning_rate": 7.4139420102536944e-09, - "loss": 0.8666, + "learning_rate": 9.223184295004129e-09, + "loss": 0.8453, "step": 34821 }, { - "epoch": 0.988138479001135, + "epoch": 0.9867664144634305, "grad_norm": 0.0, - "learning_rate": 7.378599843019807e-09, - "loss": 0.7048, + "learning_rate": 9.183817115601124e-09, + "loss": 0.8048, "step": 34822 }, { - "epoch": 0.98816685584563, + "epoch": 0.986794751905693, "grad_norm": 0.0, - "learning_rate": 7.343342083908634e-09, - "loss": 0.8311, + "learning_rate": 9.144534092552227e-09, + "loss": 0.7519, "step": 34823 }, { - "epoch": 0.9881952326901249, + "epoch": 0.9868230893479555, "grad_norm": 0.0, - "learning_rate": 7.308168733216603e-09, - "loss": 0.758, + "learning_rate": 9.105335226190504e-09, + "loss": 0.7218, "step": 34824 }, { - "epoch": 0.9882236095346197, + "epoch": 0.9868514267902179, "grad_norm": 0.0, - "learning_rate": 7.273079791242366e-09, - "loss": 0.7932, + "learning_rate": 9.066220516843471e-09, + "loss": 0.8088, "step": 34825 }, { - "epoch": 0.9882519863791146, + "epoch": 0.9868797642324804, "grad_norm": 0.0, - "learning_rate": 7.238075258281241e-09, - "loss": 0.804, + "learning_rate": 9.027189964841977e-09, + "loss": 0.8489, "step": 34826 }, { - "epoch": 0.9882803632236096, + "epoch": 0.9869081016747429, "grad_norm": 0.0, - "learning_rate": 7.203155134629658e-09, - "loss": 0.8182, + "learning_rate": 8.988243570513533e-09, + "loss": 0.9182, "step": 34827 }, { - "epoch": 0.9883087400681044, + "epoch": 0.9869364391170053, "grad_norm": 0.0, - "learning_rate": 7.168319420581826e-09, - "loss": 0.9338, + "learning_rate": 8.94938133418899e-09, + "loss": 0.7583, "step": 34828 }, { - "epoch": 0.9883371169125993, + "epoch": 0.9869647765592677, "grad_norm": 0.0, - "learning_rate": 7.1335681164319545e-09, - "loss": 0.8286, + "learning_rate": 8.91060325619253e-09, + "loss": 0.7509, "step": 34829 }, { - "epoch": 0.9883654937570943, + "epoch": 0.9869931140015302, "grad_norm": 0.0, - "learning_rate": 7.098901222474253e-09, - "loss": 0.797, + "learning_rate": 8.87190933685278e-09, + "loss": 0.8661, "step": 34830 }, { - "epoch": 0.9883938706015891, + "epoch": 0.9870214514437927, "grad_norm": 0.0, - "learning_rate": 7.0643187390018184e-09, - "loss": 0.7218, + "learning_rate": 8.833299576495036e-09, + "loss": 0.817, "step": 34831 }, { - "epoch": 0.988422247446084, + "epoch": 0.9870497888860551, "grad_norm": 0.0, - "learning_rate": 7.029820666306641e-09, - "loss": 0.6681, + "learning_rate": 8.794773975444592e-09, + "loss": 0.8079, "step": 34832 }, { - "epoch": 0.9884506242905788, + "epoch": 0.9870781263283176, "grad_norm": 0.0, - "learning_rate": 6.9954070046796e-09, - "loss": 0.7828, + "learning_rate": 8.756332534025635e-09, + "loss": 0.7266, "step": 34833 }, { - "epoch": 0.9884790011350738, + "epoch": 0.9871064637705801, "grad_norm": 0.0, - "learning_rate": 6.961077754410461e-09, - "loss": 0.7428, + "learning_rate": 8.71797525256235e-09, + "loss": 0.8401, "step": 34834 }, { - "epoch": 0.9885073779795687, + "epoch": 0.9871348012128426, "grad_norm": 0.0, - "learning_rate": 6.926832915791215e-09, - "loss": 0.7875, + "learning_rate": 8.67970213137781e-09, + "loss": 0.8081, "step": 34835 }, { - "epoch": 0.9885357548240635, + "epoch": 0.987163138655105, "grad_norm": 0.0, - "learning_rate": 6.892672489110519e-09, - "loss": 0.8204, + "learning_rate": 8.64151317079398e-09, + "loss": 0.8759, "step": 34836 }, { - "epoch": 0.9885641316685585, + "epoch": 0.9871914760973675, "grad_norm": 0.0, - "learning_rate": 6.8585964746559206e-09, - "loss": 0.7514, + "learning_rate": 8.603408371132826e-09, + "loss": 0.7579, "step": 34837 }, { - "epoch": 0.9885925085130534, + "epoch": 0.98721981353963, "grad_norm": 0.0, - "learning_rate": 6.824604872717188e-09, - "loss": 0.8242, + "learning_rate": 8.565387732716312e-09, + "loss": 0.754, "step": 34838 }, { - "epoch": 0.9886208853575482, + "epoch": 0.9872481509818923, "grad_norm": 0.0, - "learning_rate": 6.79069768357854e-09, - "loss": 0.7869, + "learning_rate": 8.527451255863073e-09, + "loss": 0.7075, "step": 34839 }, { - "epoch": 0.9886492622020431, + "epoch": 0.9872764884241548, "grad_norm": 0.0, - "learning_rate": 6.756874907529743e-09, - "loss": 0.7916, + "learning_rate": 8.489598940892851e-09, + "loss": 0.7978, "step": 34840 }, { - "epoch": 0.988677639046538, + "epoch": 0.9873048258664173, "grad_norm": 0.0, - "learning_rate": 6.723136544853903e-09, - "loss": 0.7317, + "learning_rate": 8.451830788126503e-09, + "loss": 0.8689, "step": 34841 }, { - "epoch": 0.9887060158910329, + "epoch": 0.9873331633086797, "grad_norm": 0.0, - "learning_rate": 6.6894825958363495e-09, - "loss": 0.8352, + "learning_rate": 8.41414679787933e-09, + "loss": 0.8864, "step": 34842 }, { - "epoch": 0.9887343927355278, + "epoch": 0.9873615007509422, "grad_norm": 0.0, - "learning_rate": 6.6559130607635196e-09, - "loss": 0.7977, + "learning_rate": 8.376546970471077e-09, + "loss": 0.8345, "step": 34843 }, { - "epoch": 0.9887627695800227, + "epoch": 0.9873898381932047, "grad_norm": 0.0, - "learning_rate": 6.622427939916298e-09, - "loss": 0.7949, + "learning_rate": 8.33903130621594e-09, + "loss": 0.8782, "step": 34844 }, { - "epoch": 0.9887911464245176, + "epoch": 0.9874181756354672, "grad_norm": 0.0, - "learning_rate": 6.589027233578904e-09, - "loss": 0.601, + "learning_rate": 8.301599805432547e-09, + "loss": 0.8807, "step": 34845 }, { - "epoch": 0.9888195232690125, + "epoch": 0.9874465130777296, "grad_norm": 0.0, - "learning_rate": 6.555710942033333e-09, - "loss": 0.8125, + "learning_rate": 8.264252468435097e-09, + "loss": 0.8108, "step": 34846 }, { - "epoch": 0.9888479001135074, + "epoch": 0.9874748505199921, "grad_norm": 0.0, - "learning_rate": 6.522479065561582e-09, - "loss": 0.7974, + "learning_rate": 8.226989295537779e-09, + "loss": 0.8348, "step": 34847 }, { - "epoch": 0.9888762769580023, + "epoch": 0.9875031879622546, "grad_norm": 0.0, - "learning_rate": 6.489331604442317e-09, - "loss": 0.7568, + "learning_rate": 8.189810287055899e-09, + "loss": 0.8191, "step": 34848 }, { - "epoch": 0.9889046538024971, + "epoch": 0.9875315254045169, "grad_norm": 0.0, - "learning_rate": 6.456268558957534e-09, - "loss": 0.8465, + "learning_rate": 8.152715443300318e-09, + "loss": 0.7423, "step": 34849 }, { - "epoch": 0.988933030646992, + "epoch": 0.9875598628467794, "grad_norm": 0.0, - "learning_rate": 6.423289929385901e-09, - "loss": 0.6561, + "learning_rate": 8.11570476458523e-09, + "loss": 0.8158, "step": 34850 }, { - "epoch": 0.988961407491487, + "epoch": 0.9875882002890419, "grad_norm": 0.0, - "learning_rate": 6.390395716006081e-09, - "loss": 0.76, + "learning_rate": 8.078778251222608e-09, + "loss": 0.7128, "step": 34851 }, { - "epoch": 0.9889897843359818, + "epoch": 0.9876165377313044, "grad_norm": 0.0, - "learning_rate": 6.3575859190956325e-09, - "loss": 0.7922, + "learning_rate": 8.041935903522202e-09, + "loss": 0.8238, "step": 34852 }, { - "epoch": 0.9890181611804767, + "epoch": 0.9876448751735668, "grad_norm": 0.0, - "learning_rate": 6.32486053893211e-09, - "loss": 0.7789, + "learning_rate": 8.005177721794877e-09, + "loss": 0.8462, "step": 34853 }, { - "epoch": 0.9890465380249717, + "epoch": 0.9876732126158293, "grad_norm": 0.0, - "learning_rate": 6.292219575791958e-09, - "loss": 0.885, + "learning_rate": 7.968503706350384e-09, + "loss": 0.8082, "step": 34854 }, { - "epoch": 0.9890749148694665, + "epoch": 0.9877015500580918, "grad_norm": 0.0, - "learning_rate": 6.259663029950514e-09, - "loss": 0.8261, + "learning_rate": 7.931913857498474e-09, + "loss": 0.8212, "step": 34855 }, { - "epoch": 0.9891032917139614, + "epoch": 0.9877298875003542, "grad_norm": 0.0, - "learning_rate": 6.227190901683111e-09, - "loss": 0.8492, + "learning_rate": 7.895408175545571e-09, + "loss": 0.793, "step": 34856 }, { - "epoch": 0.9891316685584562, + "epoch": 0.9877582249426167, "grad_norm": 0.0, - "learning_rate": 6.194803191262866e-09, - "loss": 0.8294, + "learning_rate": 7.858986660800316e-09, + "loss": 0.7762, "step": 34857 }, { - "epoch": 0.9891600454029512, + "epoch": 0.9877865623848792, "grad_norm": 0.0, - "learning_rate": 6.162499898966223e-09, - "loss": 0.7909, + "learning_rate": 7.822649313569131e-09, + "loss": 0.7432, "step": 34858 }, { - "epoch": 0.9891884222474461, + "epoch": 0.9878148998271417, "grad_norm": 0.0, - "learning_rate": 6.1302810250629674e-09, - "loss": 0.7917, + "learning_rate": 7.786396134158437e-09, + "loss": 0.8998, "step": 34859 }, { - "epoch": 0.9892167990919409, + "epoch": 0.987843237269404, "grad_norm": 0.0, - "learning_rate": 6.098146569826213e-09, - "loss": 0.7218, + "learning_rate": 7.750227122873544e-09, + "loss": 0.8923, "step": 34860 }, { - "epoch": 0.9892451759364359, + "epoch": 0.9878715747116665, "grad_norm": 0.0, - "learning_rate": 6.066096533527965e-09, - "loss": 0.7416, + "learning_rate": 7.714142280019764e-09, + "loss": 0.987, "step": 34861 }, { - "epoch": 0.9892735527809308, + "epoch": 0.987899912153929, "grad_norm": 0.0, - "learning_rate": 6.034130916439118e-09, - "loss": 0.7796, + "learning_rate": 7.678141605899081e-09, + "loss": 0.8035, "step": 34862 }, { - "epoch": 0.9893019296254256, + "epoch": 0.9879282495961914, "grad_norm": 0.0, - "learning_rate": 6.0022497188283454e-09, - "loss": 0.7578, + "learning_rate": 7.642225100816802e-09, + "loss": 0.948, "step": 34863 }, { - "epoch": 0.9893303064699206, + "epoch": 0.9879565870384539, "grad_norm": 0.0, - "learning_rate": 5.970452940966542e-09, - "loss": 0.8033, + "learning_rate": 7.60639276507491e-09, + "loss": 0.8024, "step": 34864 }, { - "epoch": 0.9893586833144155, + "epoch": 0.9879849244807164, "grad_norm": 0.0, - "learning_rate": 5.9387405831212715e-09, - "loss": 0.8296, + "learning_rate": 7.570644598974274e-09, + "loss": 0.7637, "step": 34865 }, { - "epoch": 0.9893870601589103, + "epoch": 0.9880132619229788, "grad_norm": 0.0, - "learning_rate": 5.907112645561208e-09, - "loss": 0.8657, + "learning_rate": 7.534980602816877e-09, + "loss": 0.713, "step": 34866 }, { - "epoch": 0.9894154370034052, + "epoch": 0.9880415993652413, "grad_norm": 0.0, - "learning_rate": 5.875569128552805e-09, - "loss": 0.7682, + "learning_rate": 7.499400776902477e-09, + "loss": 0.7637, "step": 34867 }, { - "epoch": 0.9894438138479001, + "epoch": 0.9880699368075038, "grad_norm": 0.0, - "learning_rate": 5.8441100323625154e-09, - "loss": 0.9414, + "learning_rate": 7.463905121530834e-09, + "loss": 0.744, "step": 34868 }, { - "epoch": 0.989472190692395, + "epoch": 0.9880982742497663, "grad_norm": 0.0, - "learning_rate": 5.812735357255683e-09, - "loss": 0.7657, + "learning_rate": 7.42849363700282e-09, + "loss": 0.827, "step": 34869 }, { - "epoch": 0.9895005675368899, + "epoch": 0.9881266116920286, "grad_norm": 0.0, - "learning_rate": 5.781445103498762e-09, - "loss": 0.862, + "learning_rate": 7.393166323614865e-09, + "loss": 0.6825, "step": 34870 }, { - "epoch": 0.9895289443813848, + "epoch": 0.9881549491342911, "grad_norm": 0.0, - "learning_rate": 5.750239271355984e-09, - "loss": 0.7878, + "learning_rate": 7.357923181664506e-09, + "loss": 0.7527, "step": 34871 }, { - "epoch": 0.9895573212258797, + "epoch": 0.9881832865765536, "grad_norm": 0.0, - "learning_rate": 5.719117861088253e-09, - "loss": 0.6572, + "learning_rate": 7.3227642114492845e-09, + "loss": 0.7589, "step": 34872 }, { - "epoch": 0.9895856980703746, + "epoch": 0.988211624018816, "grad_norm": 0.0, - "learning_rate": 5.6880808729620205e-09, - "loss": 0.863, + "learning_rate": 7.28768941326452e-09, + "loss": 0.7403, "step": 34873 }, { - "epoch": 0.9896140749148694, + "epoch": 0.9882399614610785, "grad_norm": 0.0, - "learning_rate": 5.657128307237081e-09, - "loss": 0.726, + "learning_rate": 7.252698787406642e-09, + "loss": 0.7694, "step": 34874 }, { - "epoch": 0.9896424517593644, + "epoch": 0.988268298903341, "grad_norm": 0.0, - "learning_rate": 5.626260164176556e-09, - "loss": 0.7547, + "learning_rate": 7.21779233417097e-09, + "loss": 0.7758, "step": 34875 }, { - "epoch": 0.9896708286038592, + "epoch": 0.9882966363456035, "grad_norm": 0.0, - "learning_rate": 5.5954764440391275e-09, - "loss": 0.8437, + "learning_rate": 7.182970053849492e-09, + "loss": 0.7519, "step": 34876 }, { - "epoch": 0.9896992054483541, + "epoch": 0.9883249737878659, "grad_norm": 0.0, - "learning_rate": 5.564777147086809e-09, - "loss": 0.7551, + "learning_rate": 7.148231946736417e-09, + "loss": 0.7974, "step": 34877 }, { - "epoch": 0.9897275822928491, + "epoch": 0.9883533112301284, "grad_norm": 0.0, - "learning_rate": 5.534162273578281e-09, - "loss": 0.7852, + "learning_rate": 7.1135780131248445e-09, + "loss": 0.7612, "step": 34878 }, { - "epoch": 0.9897559591373439, + "epoch": 0.9883816486723909, "grad_norm": 0.0, - "learning_rate": 5.503631823771116e-09, - "loss": 0.7879, + "learning_rate": 7.079008253306763e-09, + "loss": 0.823, "step": 34879 }, { - "epoch": 0.9897843359818388, + "epoch": 0.9884099861146532, "grad_norm": 0.0, - "learning_rate": 5.473185797923997e-09, - "loss": 0.8202, + "learning_rate": 7.0445226675719404e-09, + "loss": 0.7587, "step": 34880 }, { - "epoch": 0.9898127128263338, + "epoch": 0.9884383235569157, "grad_norm": 0.0, - "learning_rate": 5.442824196294494e-09, - "loss": 0.8024, + "learning_rate": 7.010121256213476e-09, + "loss": 0.812, "step": 34881 }, { - "epoch": 0.9898410896708286, + "epoch": 0.9884666609991782, "grad_norm": 0.0, - "learning_rate": 5.41254701913907e-09, - "loss": 0.723, + "learning_rate": 6.975804019517807e-09, + "loss": 0.8431, "step": 34882 }, { - "epoch": 0.9898694665153235, + "epoch": 0.9884949984414407, "grad_norm": 0.0, - "learning_rate": 5.3823542667130745e-09, - "loss": 0.7791, + "learning_rate": 6.941570957776922e-09, + "loss": 0.8113, "step": 34883 }, { - "epoch": 0.9898978433598183, + "epoch": 0.9885233358837031, "grad_norm": 0.0, - "learning_rate": 5.352245939271861e-09, - "loss": 0.8589, + "learning_rate": 6.907422071278369e-09, + "loss": 0.7014, "step": 34884 }, { - "epoch": 0.9899262202043133, + "epoch": 0.9885516733259656, "grad_norm": 0.0, - "learning_rate": 5.322222037068558e-09, - "loss": 0.7241, + "learning_rate": 6.873357360308586e-09, + "loss": 0.8088, "step": 34885 }, { - "epoch": 0.9899545970488082, + "epoch": 0.9885800107682281, "grad_norm": 0.0, - "learning_rate": 5.292282560358519e-09, - "loss": 0.7873, + "learning_rate": 6.839376825155119e-09, + "loss": 0.8495, "step": 34886 }, { - "epoch": 0.989982973893303, + "epoch": 0.9886083482104905, "grad_norm": 0.0, - "learning_rate": 5.262427509393764e-09, - "loss": 0.7545, + "learning_rate": 6.805480466105519e-09, + "loss": 0.7792, "step": 34887 }, { - "epoch": 0.990011350737798, + "epoch": 0.988636685652753, "grad_norm": 0.0, - "learning_rate": 5.232656884426313e-09, - "loss": 0.811, + "learning_rate": 6.771668283442889e-09, + "loss": 0.8434, "step": 34888 }, { - "epoch": 0.9900397275822929, + "epoch": 0.9886650230950155, "grad_norm": 0.0, - "learning_rate": 5.202970685708186e-09, - "loss": 0.7871, + "learning_rate": 6.73794027745478e-09, + "loss": 0.677, "step": 34889 }, { - "epoch": 0.9900681044267877, + "epoch": 0.9886933605372779, "grad_norm": 0.0, - "learning_rate": 5.1733689134902954e-09, - "loss": 0.7478, + "learning_rate": 6.704296448423186e-09, + "loss": 0.781, "step": 34890 }, { - "epoch": 0.9900964812712826, + "epoch": 0.9887216979795403, "grad_norm": 0.0, - "learning_rate": 5.143851568021329e-09, - "loss": 0.8557, + "learning_rate": 6.670736796632326e-09, + "loss": 0.7632, "step": 34891 }, { - "epoch": 0.9901248581157776, + "epoch": 0.9887500354218028, "grad_norm": 0.0, - "learning_rate": 5.1144186495522e-09, - "loss": 0.8353, + "learning_rate": 6.637261322364197e-09, + "loss": 0.8031, "step": 34892 }, { - "epoch": 0.9901532349602724, + "epoch": 0.9887783728640653, "grad_norm": 0.0, - "learning_rate": 5.085070158330485e-09, - "loss": 0.828, + "learning_rate": 6.603870025901904e-09, + "loss": 0.7006, "step": 34893 }, { - "epoch": 0.9901816118047673, + "epoch": 0.9888067103063277, "grad_norm": 0.0, - "learning_rate": 5.055806094603766e-09, - "loss": 0.723, + "learning_rate": 6.570562907526335e-09, + "loss": 0.7057, "step": 34894 }, { - "epoch": 0.9902099886492622, + "epoch": 0.9888350477485902, "grad_norm": 0.0, - "learning_rate": 5.026626458620732e-09, - "loss": 0.7656, + "learning_rate": 6.537339967518374e-09, + "loss": 0.901, "step": 34895 }, { - "epoch": 0.9902383654937571, + "epoch": 0.9888633851908527, "grad_norm": 0.0, - "learning_rate": 4.997531250627851e-09, - "loss": 0.7159, + "learning_rate": 6.504201206156691e-09, + "loss": 0.8788, "step": 34896 }, { - "epoch": 0.990266742338252, + "epoch": 0.9888917226331151, "grad_norm": 0.0, - "learning_rate": 4.9685204708693755e-09, - "loss": 0.8667, + "learning_rate": 6.4711466237210585e-09, + "loss": 0.8224, "step": 34897 }, { - "epoch": 0.9902951191827469, + "epoch": 0.9889200600753776, "grad_norm": 0.0, - "learning_rate": 4.939594119590663e-09, - "loss": 0.7769, + "learning_rate": 6.438176220490144e-09, + "loss": 0.89, "step": 34898 }, { - "epoch": 0.9903234960272418, + "epoch": 0.9889483975176401, "grad_norm": 0.0, - "learning_rate": 4.910752197037072e-09, - "loss": 0.8093, + "learning_rate": 6.405289996741504e-09, + "loss": 0.8415, "step": 34899 }, { - "epoch": 0.9903518728717366, + "epoch": 0.9889767349599026, "grad_norm": 0.0, - "learning_rate": 4.881994703451742e-09, - "loss": 0.8086, + "learning_rate": 6.372487952751582e-09, + "loss": 0.8054, "step": 34900 }, { - "epoch": 0.9903802497162315, + "epoch": 0.989005072402165, "grad_norm": 0.0, - "learning_rate": 4.8533216390778125e-09, - "loss": 0.7197, + "learning_rate": 6.339770088797937e-09, + "loss": 0.7624, "step": 34901 }, { - "epoch": 0.9904086265607265, + "epoch": 0.9890334098444274, "grad_norm": 0.0, - "learning_rate": 4.8247330041562015e-09, - "loss": 0.8261, + "learning_rate": 6.307136405155901e-09, + "loss": 0.7081, "step": 34902 }, { - "epoch": 0.9904370034052213, + "epoch": 0.9890617472866899, "grad_norm": 0.0, - "learning_rate": 4.796228798930047e-09, - "loss": 0.7512, + "learning_rate": 6.274586902098589e-09, + "loss": 0.8156, "step": 34903 }, { - "epoch": 0.9904653802497162, + "epoch": 0.9890900847289523, "grad_norm": 0.0, - "learning_rate": 4.767809023639158e-09, - "loss": 0.7098, + "learning_rate": 6.242121579902449e-09, + "loss": 0.7976, "step": 34904 }, { - "epoch": 0.9904937570942112, + "epoch": 0.9891184221712148, "grad_norm": 0.0, - "learning_rate": 4.739473678524453e-09, - "loss": 0.7968, + "learning_rate": 6.209740438839485e-09, + "loss": 0.8035, "step": 34905 }, { - "epoch": 0.990522133938706, + "epoch": 0.9891467596134773, "grad_norm": 0.0, - "learning_rate": 4.71122276382463e-09, - "loss": 0.7863, + "learning_rate": 6.17744347918281e-09, + "loss": 0.7176, "step": 34906 }, { - "epoch": 0.9905505107832009, + "epoch": 0.9891750970557397, "grad_norm": 0.0, - "learning_rate": 4.6830562797783865e-09, - "loss": 0.7651, + "learning_rate": 6.14523070120554e-09, + "loss": 0.837, "step": 34907 }, { - "epoch": 0.9905788876276957, + "epoch": 0.9892034344980022, "grad_norm": 0.0, - "learning_rate": 4.65497422662331e-09, - "loss": 0.7807, + "learning_rate": 6.11310210517746e-09, + "loss": 0.7282, "step": 34908 }, { - "epoch": 0.9906072644721907, + "epoch": 0.9892317719402647, "grad_norm": 0.0, - "learning_rate": 4.626976604596989e-09, - "loss": 0.7161, + "learning_rate": 6.081057691370573e-09, + "loss": 0.8356, "step": 34909 }, { - "epoch": 0.9906356413166856, + "epoch": 0.9892601093825272, "grad_norm": 0.0, - "learning_rate": 4.5990634139359004e-09, - "loss": 0.852, + "learning_rate": 6.0490974600535546e-09, + "loss": 0.8617, "step": 34910 }, { - "epoch": 0.9906640181611804, + "epoch": 0.9892884468247896, "grad_norm": 0.0, - "learning_rate": 4.571234654876522e-09, - "loss": 0.8027, + "learning_rate": 6.017221411496188e-09, + "loss": 0.8458, "step": 34911 }, { - "epoch": 0.9906923950056754, + "epoch": 0.989316784267052, "grad_norm": 0.0, - "learning_rate": 4.543490327653111e-09, - "loss": 0.7137, + "learning_rate": 5.985429545967147e-09, + "loss": 0.7142, "step": 34912 }, { - "epoch": 0.9907207718501703, + "epoch": 0.9893451217093145, "grad_norm": 0.0, - "learning_rate": 4.515830432498813e-09, - "loss": 0.7933, + "learning_rate": 5.953721863732886e-09, + "loss": 0.8264, "step": 34913 }, { - "epoch": 0.9907491486946651, + "epoch": 0.9893734591515769, "grad_norm": 0.0, - "learning_rate": 4.488254969650108e-09, - "loss": 0.8314, + "learning_rate": 5.922098365063189e-09, + "loss": 0.8652, "step": 34914 }, { - "epoch": 0.9907775255391601, + "epoch": 0.9894017965938394, "grad_norm": 0.0, - "learning_rate": 4.46076393933792e-09, - "loss": 0.7267, + "learning_rate": 5.890559050222289e-09, + "loss": 0.8298, "step": 34915 }, { - "epoch": 0.990805902383655, + "epoch": 0.9894301340361019, "grad_norm": 0.0, - "learning_rate": 4.433357341795397e-09, - "loss": 0.7163, + "learning_rate": 5.859103919475528e-09, + "loss": 0.7132, "step": 34916 }, { - "epoch": 0.9908342792281498, + "epoch": 0.9894584714783644, "grad_norm": 0.0, - "learning_rate": 4.406035177253465e-09, - "loss": 0.8677, + "learning_rate": 5.8277329730904716e-09, + "loss": 0.7207, "step": 34917 }, { - "epoch": 0.9908626560726447, + "epoch": 0.9894868089206268, "grad_norm": 0.0, - "learning_rate": 4.37879744594305e-09, - "loss": 0.7383, + "learning_rate": 5.796446211328022e-09, + "loss": 0.7122, "step": 34918 }, { - "epoch": 0.9908910329171396, + "epoch": 0.9895151463628893, "grad_norm": 0.0, - "learning_rate": 4.35164414809397e-09, - "loss": 0.8012, + "learning_rate": 5.7652436344546315e-09, + "loss": 0.7733, "step": 34919 }, { - "epoch": 0.9909194097616345, + "epoch": 0.9895434838051518, "grad_norm": 0.0, - "learning_rate": 4.3245752839360385e-09, - "loss": 0.859, + "learning_rate": 5.734125242731203e-09, + "loss": 0.8879, "step": 34920 }, { - "epoch": 0.9909477866061294, + "epoch": 0.9895718212474142, "grad_norm": 0.0, - "learning_rate": 4.297590853697964e-09, - "loss": 0.8795, + "learning_rate": 5.70309103642086e-09, + "loss": 0.8004, "step": 34921 }, { - "epoch": 0.9909761634506243, + "epoch": 0.9896001586896767, "grad_norm": 0.0, - "learning_rate": 4.27069085760623e-09, - "loss": 0.6799, + "learning_rate": 5.672141015784505e-09, + "loss": 0.7832, "step": 34922 }, { - "epoch": 0.9910045402951192, + "epoch": 0.9896284961319392, "grad_norm": 0.0, - "learning_rate": 4.243875295890653e-09, - "loss": 0.7724, + "learning_rate": 5.641275181083039e-09, + "loss": 0.8536, "step": 34923 }, { - "epoch": 0.9910329171396141, + "epoch": 0.9896568335742016, "grad_norm": 0.0, - "learning_rate": 4.21714416877661e-09, - "loss": 0.7627, + "learning_rate": 5.610493532576256e-09, + "loss": 0.8652, "step": 34924 }, { - "epoch": 0.9910612939841089, + "epoch": 0.989685171016464, "grad_norm": 0.0, - "learning_rate": 4.190497476488364e-09, - "loss": 0.8177, + "learning_rate": 5.579796070523946e-09, + "loss": 0.8575, "step": 34925 }, { - "epoch": 0.9910896708286039, + "epoch": 0.9897135084587265, "grad_norm": 0.0, - "learning_rate": 4.163935219253512e-09, - "loss": 0.8088, + "learning_rate": 5.549182795183683e-09, + "loss": 0.9028, "step": 34926 }, { - "epoch": 0.9911180476730987, + "epoch": 0.989741845900989, "grad_norm": 0.0, - "learning_rate": 4.137457397295208e-09, - "loss": 0.7097, + "learning_rate": 5.518653706814148e-09, + "loss": 0.8187, "step": 34927 }, { - "epoch": 0.9911464245175936, + "epoch": 0.9897701833432514, "grad_norm": 0.0, - "learning_rate": 4.111064010836607e-09, - "loss": 0.8085, + "learning_rate": 5.488208805672912e-09, + "loss": 0.9962, "step": 34928 }, { - "epoch": 0.9911748013620886, + "epoch": 0.9897985207855139, "grad_norm": 0.0, - "learning_rate": 4.084755060101975e-09, - "loss": 0.8752, + "learning_rate": 5.457848092015328e-09, + "loss": 0.8578, "step": 34929 }, { - "epoch": 0.9912031782065834, + "epoch": 0.9898268582277764, "grad_norm": 0.0, - "learning_rate": 4.0585305453122444e-09, - "loss": 0.7422, + "learning_rate": 5.427571566097856e-09, + "loss": 0.7475, "step": 34930 }, { - "epoch": 0.9912315550510783, + "epoch": 0.9898551956700388, "grad_norm": 0.0, - "learning_rate": 4.032390466688352e-09, - "loss": 0.9296, + "learning_rate": 5.3973792281758475e-09, + "loss": 0.9372, "step": 34931 }, { - "epoch": 0.9912599318955733, + "epoch": 0.9898835331123013, "grad_norm": 0.0, - "learning_rate": 4.00633482445345e-09, - "loss": 0.8246, + "learning_rate": 5.367271078502434e-09, + "loss": 0.7552, "step": 34932 }, { - "epoch": 0.9912883087400681, + "epoch": 0.9899118705545638, "grad_norm": 0.0, - "learning_rate": 3.980363618826255e-09, - "loss": 0.9189, + "learning_rate": 5.337247117331856e-09, + "loss": 0.8374, "step": 34933 }, { - "epoch": 0.991316685584563, + "epoch": 0.9899402079968262, "grad_norm": 0.0, - "learning_rate": 3.95447685002659e-09, - "loss": 0.8947, + "learning_rate": 5.307307344918355e-09, + "loss": 0.7913, "step": 34934 }, { - "epoch": 0.9913450624290578, + "epoch": 0.9899685454390886, "grad_norm": 0.0, - "learning_rate": 3.928674518272058e-09, - "loss": 0.7395, + "learning_rate": 5.277451761511732e-09, + "loss": 0.7746, "step": 34935 }, { - "epoch": 0.9913734392735528, + "epoch": 0.9899968828813511, "grad_norm": 0.0, - "learning_rate": 3.902956623782484e-09, - "loss": 0.8556, + "learning_rate": 5.247680367364005e-09, + "loss": 0.7822, "step": 34936 }, { - "epoch": 0.9914018161180477, + "epoch": 0.9900252203236136, "grad_norm": 0.0, - "learning_rate": 3.87732316677325e-09, - "loss": 0.814, + "learning_rate": 5.217993162727197e-09, + "loss": 0.7133, "step": 34937 }, { - "epoch": 0.9914301929625425, + "epoch": 0.990053557765876, "grad_norm": 0.0, - "learning_rate": 3.851774147461962e-09, - "loss": 0.8525, + "learning_rate": 5.188390147851108e-09, + "loss": 0.7595, "step": 34938 }, { - "epoch": 0.9914585698070375, + "epoch": 0.9900818952081385, "grad_norm": 0.0, - "learning_rate": 3.826309566062891e-09, - "loss": 0.8669, + "learning_rate": 5.158871322984426e-09, + "loss": 0.8202, "step": 34939 }, { - "epoch": 0.9914869466515324, + "epoch": 0.990110232650401, "grad_norm": 0.0, - "learning_rate": 3.800929422793642e-09, - "loss": 0.8141, + "learning_rate": 5.129436688375844e-09, + "loss": 0.7881, "step": 34940 }, { - "epoch": 0.9915153234960272, + "epoch": 0.9901385700926635, "grad_norm": 0.0, - "learning_rate": 3.7756337178662675e-09, - "loss": 0.7963, + "learning_rate": 5.100086244274049e-09, + "loss": 0.8083, "step": 34941 }, { - "epoch": 0.9915437003405221, + "epoch": 0.9901669075349259, "grad_norm": 0.0, - "learning_rate": 3.750422451496149e-09, - "loss": 0.7304, + "learning_rate": 5.070819990925513e-09, + "loss": 0.7802, "step": 34942 }, { - "epoch": 0.991572077185017, + "epoch": 0.9901952449771884, "grad_norm": 0.0, - "learning_rate": 3.725295623896452e-09, - "loss": 0.8133, + "learning_rate": 5.041637928576704e-09, + "loss": 0.7243, "step": 34943 }, { - "epoch": 0.9916004540295119, + "epoch": 0.9902235824194509, "grad_norm": 0.0, - "learning_rate": 3.700253235277007e-09, - "loss": 0.842, + "learning_rate": 5.012540057474091e-09, + "loss": 0.82, "step": 34944 }, { - "epoch": 0.9916288308740068, + "epoch": 0.9902519198617132, "grad_norm": 0.0, - "learning_rate": 3.6752952858520873e-09, - "loss": 0.7472, + "learning_rate": 4.983526377861925e-09, + "loss": 0.8277, "step": 34945 }, { - "epoch": 0.9916572077185017, + "epoch": 0.9902802573039757, "grad_norm": 0.0, - "learning_rate": 3.650421775830415e-09, - "loss": 0.8621, + "learning_rate": 4.9545968899855635e-09, + "loss": 0.9065, "step": 34946 }, { - "epoch": 0.9916855845629966, + "epoch": 0.9903085947462382, "grad_norm": 0.0, - "learning_rate": 3.6256327054229325e-09, - "loss": 0.8674, + "learning_rate": 4.925751594087036e-09, + "loss": 0.8741, "step": 34947 }, { - "epoch": 0.9917139614074915, + "epoch": 0.9903369321885007, "grad_norm": 0.0, - "learning_rate": 3.6009280748394716e-09, - "loss": 0.8318, + "learning_rate": 4.896990490411701e-09, + "loss": 0.7281, "step": 34948 }, { - "epoch": 0.9917423382519864, + "epoch": 0.9903652696307631, "grad_norm": 0.0, - "learning_rate": 3.5763078842887546e-09, - "loss": 0.8132, + "learning_rate": 4.868313579200479e-09, + "loss": 0.7187, "step": 34949 }, { - "epoch": 0.9917707150964813, + "epoch": 0.9903936070730256, "grad_norm": 0.0, - "learning_rate": 3.5517721339783928e-09, - "loss": 0.8331, + "learning_rate": 4.839720860694286e-09, + "loss": 0.7421, "step": 34950 }, { - "epoch": 0.9917990919409762, + "epoch": 0.9904219445152881, "grad_norm": 0.0, - "learning_rate": 3.5273208241148882e-09, - "loss": 0.9177, + "learning_rate": 4.811212335136262e-09, + "loss": 0.7329, "step": 34951 }, { - "epoch": 0.991827468785471, + "epoch": 0.9904502819575505, "grad_norm": 0.0, - "learning_rate": 3.502953954905852e-09, - "loss": 0.7145, + "learning_rate": 4.782788002763994e-09, + "loss": 0.825, "step": 34952 }, { - "epoch": 0.991855845629966, + "epoch": 0.990478619399813, "grad_norm": 0.0, - "learning_rate": 3.478671526556676e-09, - "loss": 0.7349, + "learning_rate": 4.754447863817291e-09, + "loss": 0.7292, "step": 34953 }, { - "epoch": 0.9918842224744608, + "epoch": 0.9905069568420755, "grad_norm": 0.0, - "learning_rate": 3.454473539271641e-09, - "loss": 0.7116, + "learning_rate": 4.726191918537071e-09, + "loss": 0.8895, "step": 34954 }, { - "epoch": 0.9919125993189557, + "epoch": 0.9905352942843378, "grad_norm": 0.0, - "learning_rate": 3.4303599932572485e-09, - "loss": 0.7396, + "learning_rate": 4.698020167158701e-09, + "loss": 0.7859, "step": 34955 }, { - "epoch": 0.9919409761634507, + "epoch": 0.9905636317266003, "grad_norm": 0.0, - "learning_rate": 3.4063308887155587e-09, - "loss": 0.8266, + "learning_rate": 4.66993260992088e-09, + "loss": 0.7269, "step": 34956 }, { - "epoch": 0.9919693530079455, + "epoch": 0.9905919691688628, "grad_norm": 0.0, - "learning_rate": 3.382386225849743e-09, - "loss": 0.8088, + "learning_rate": 4.6419292470589735e-09, + "loss": 0.8282, "step": 34957 }, { - "epoch": 0.9919977298524404, + "epoch": 0.9906203066111253, "grad_norm": 0.0, - "learning_rate": 3.358526004862972e-09, - "loss": 0.8133, + "learning_rate": 4.6140100788105716e-09, + "loss": 0.7571, "step": 34958 }, { - "epoch": 0.9920261066969353, + "epoch": 0.9906486440533877, "grad_norm": 0.0, - "learning_rate": 3.334750225956196e-09, - "loss": 0.8374, + "learning_rate": 4.586175105411039e-09, + "loss": 0.7042, "step": 34959 }, { - "epoch": 0.9920544835414302, + "epoch": 0.9906769814956502, "grad_norm": 0.0, - "learning_rate": 3.311058889329255e-09, - "loss": 0.7934, + "learning_rate": 4.558424327092415e-09, + "loss": 0.8486, "step": 34960 }, { - "epoch": 0.9920828603859251, + "epoch": 0.9907053189379127, "grad_norm": 0.0, - "learning_rate": 3.2874519951830995e-09, - "loss": 0.8117, + "learning_rate": 4.530757744090064e-09, + "loss": 0.7835, "step": 34961 }, { - "epoch": 0.9921112372304199, + "epoch": 0.9907336563801751, "grad_norm": 0.0, - "learning_rate": 3.26392954371868e-09, - "loss": 0.8138, + "learning_rate": 4.5031753566382455e-09, + "loss": 0.8889, "step": 34962 }, { - "epoch": 0.9921396140749149, + "epoch": 0.9907619938224376, "grad_norm": 0.0, - "learning_rate": 3.2404915351313957e-09, - "loss": 0.693, + "learning_rate": 4.475677164966774e-09, + "loss": 0.7996, "step": 34963 }, { - "epoch": 0.9921679909194098, + "epoch": 0.9907903312647001, "grad_norm": 0.0, - "learning_rate": 3.217137969622197e-09, - "loss": 0.7782, + "learning_rate": 4.4482631693076874e-09, + "loss": 0.7957, "step": 34964 }, { - "epoch": 0.9921963677639046, + "epoch": 0.9908186687069626, "grad_norm": 0.0, - "learning_rate": 3.1938688473875935e-09, - "loss": 0.8707, + "learning_rate": 4.420933369894131e-09, + "loss": 0.8125, "step": 34965 }, { - "epoch": 0.9922247446083996, + "epoch": 0.9908470061492249, "grad_norm": 0.0, - "learning_rate": 3.170684168622984e-09, - "loss": 0.7916, + "learning_rate": 4.393687766953703e-09, + "loss": 0.7586, "step": 34966 }, { - "epoch": 0.9922531214528945, + "epoch": 0.9908753435914874, "grad_norm": 0.0, - "learning_rate": 3.1475839335248783e-09, - "loss": 0.7551, + "learning_rate": 4.3665263607184375e-09, + "loss": 0.8766, "step": 34967 }, { - "epoch": 0.9922814982973893, + "epoch": 0.9909036810337499, "grad_norm": 0.0, - "learning_rate": 3.1245681422886753e-09, - "loss": 0.7553, + "learning_rate": 4.3394491514137105e-09, + "loss": 0.7348, "step": 34968 }, { - "epoch": 0.9923098751418842, + "epoch": 0.9909320184760123, "grad_norm": 0.0, - "learning_rate": 3.1016367951086646e-09, - "loss": 0.964, + "learning_rate": 4.312456139271559e-09, + "loss": 0.7439, "step": 34969 }, { - "epoch": 0.9923382519863791, + "epoch": 0.9909603559182748, "grad_norm": 0.0, - "learning_rate": 3.078789892179135e-09, - "loss": 0.8957, + "learning_rate": 4.2855473245162486e-09, + "loss": 0.8453, "step": 34970 }, { - "epoch": 0.992366628830874, + "epoch": 0.9909886933605373, "grad_norm": 0.0, - "learning_rate": 3.0560274336921548e-09, - "loss": 0.7683, + "learning_rate": 4.2587227073753735e-09, + "loss": 0.8016, "step": 34971 }, { - "epoch": 0.9923950056753689, + "epoch": 0.9910170308027998, "grad_norm": 0.0, - "learning_rate": 3.033349419839793e-09, - "loss": 0.7446, + "learning_rate": 4.23198228807542e-09, + "loss": 0.86, "step": 34972 }, { - "epoch": 0.9924233825198638, + "epoch": 0.9910453682450622, "grad_norm": 0.0, - "learning_rate": 3.0107558508141177e-09, - "loss": 0.8171, + "learning_rate": 4.205326066841764e-09, + "loss": 0.8717, "step": 34973 }, { - "epoch": 0.9924517593643587, + "epoch": 0.9910737056873247, "grad_norm": 0.0, - "learning_rate": 2.9882467268060877e-09, - "loss": 0.7993, + "learning_rate": 4.178754043898669e-09, + "loss": 0.7102, "step": 34974 }, { - "epoch": 0.9924801362088536, + "epoch": 0.9911020431295872, "grad_norm": 0.0, - "learning_rate": 2.9658220480066612e-09, - "loss": 0.7856, + "learning_rate": 4.152266219469292e-09, + "loss": 0.8242, "step": 34975 }, { - "epoch": 0.9925085130533484, + "epoch": 0.9911303805718495, "grad_norm": 0.0, - "learning_rate": 2.943481814603466e-09, - "loss": 0.7793, + "learning_rate": 4.125862593776786e-09, + "loss": 0.8066, "step": 34976 }, { - "epoch": 0.9925368898978434, + "epoch": 0.991158718014112, "grad_norm": 0.0, - "learning_rate": 2.92122602678635e-09, - "loss": 0.8694, + "learning_rate": 4.099543167044307e-09, + "loss": 0.7433, "step": 34977 }, { - "epoch": 0.9925652667423382, + "epoch": 0.9911870554563745, "grad_norm": 0.0, - "learning_rate": 2.899054684742941e-09, - "loss": 0.7915, + "learning_rate": 4.073307939493898e-09, + "loss": 0.7806, "step": 34978 }, { - "epoch": 0.9925936435868331, + "epoch": 0.9912153928986369, "grad_norm": 0.0, - "learning_rate": 2.876967788659757e-09, - "loss": 0.7963, + "learning_rate": 4.0471569113453844e-09, + "loss": 0.7858, "step": 34979 }, { - "epoch": 0.9926220204313281, + "epoch": 0.9912437303408994, "grad_norm": 0.0, - "learning_rate": 2.8549653387255347e-09, - "loss": 0.9085, + "learning_rate": 4.021090082819701e-09, + "loss": 0.7102, "step": 34980 }, { - "epoch": 0.9926503972758229, + "epoch": 0.9912720677831619, "grad_norm": 0.0, - "learning_rate": 2.8330473351245722e-09, - "loss": 0.7368, + "learning_rate": 3.9951074541366706e-09, + "loss": 0.7407, "step": 34981 }, { - "epoch": 0.9926787741203178, + "epoch": 0.9913004052254244, "grad_norm": 0.0, - "learning_rate": 2.8112137780422766e-09, - "loss": 0.7806, + "learning_rate": 3.969209025513898e-09, + "loss": 0.7513, "step": 34982 }, { - "epoch": 0.9927071509648128, + "epoch": 0.9913287426676868, "grad_norm": 0.0, - "learning_rate": 2.7894646676629446e-09, - "loss": 0.7891, + "learning_rate": 3.943394797171207e-09, + "loss": 0.811, "step": 34983 }, { - "epoch": 0.9927355278093076, + "epoch": 0.9913570801099493, "grad_norm": 0.0, - "learning_rate": 2.7678000041697628e-09, - "loss": 0.7729, + "learning_rate": 3.917664769323981e-09, + "loss": 0.85, "step": 34984 }, { - "epoch": 0.9927639046538025, + "epoch": 0.9913854175522118, "grad_norm": 0.0, - "learning_rate": 2.7462197877470286e-09, - "loss": 0.7452, + "learning_rate": 3.892018942192044e-09, + "loss": 0.831, "step": 34985 }, { - "epoch": 0.9927922814982973, + "epoch": 0.9914137549944742, "grad_norm": 0.0, - "learning_rate": 2.7247240185768186e-09, - "loss": 0.7907, + "learning_rate": 3.866457315988559e-09, + "loss": 0.8124, "step": 34986 }, { - "epoch": 0.9928206583427923, + "epoch": 0.9914420924367366, "grad_norm": 0.0, - "learning_rate": 2.7033126968389887e-09, - "loss": 0.845, + "learning_rate": 3.840979890930019e-09, + "loss": 0.8204, "step": 34987 }, { - "epoch": 0.9928490351872872, + "epoch": 0.9914704298789991, "grad_norm": 0.0, - "learning_rate": 2.681985822716726e-09, - "loss": 0.902, + "learning_rate": 3.815586667230697e-09, + "loss": 0.7852, "step": 34988 }, { - "epoch": 0.992877412031782, + "epoch": 0.9914987673212616, "grad_norm": 0.0, - "learning_rate": 2.6607433963887763e-09, - "loss": 0.7955, + "learning_rate": 3.790277645104867e-09, + "loss": 0.8322, "step": 34989 }, { - "epoch": 0.992905788876277, + "epoch": 0.991527104763524, "grad_norm": 0.0, - "learning_rate": 2.6395854180349955e-09, - "loss": 0.7794, + "learning_rate": 3.765052824765691e-09, + "loss": 0.8677, "step": 34990 }, { - "epoch": 0.9929341657207719, + "epoch": 0.9915554422057865, "grad_norm": 0.0, - "learning_rate": 2.61851188783413e-09, - "loss": 0.8894, + "learning_rate": 3.739912206425222e-09, + "loss": 0.8729, "step": 34991 }, { - "epoch": 0.9929625425652667, + "epoch": 0.991583779648049, "grad_norm": 0.0, - "learning_rate": 2.5975228059638146e-09, - "loss": 0.9023, + "learning_rate": 3.714855790295513e-09, + "loss": 0.8112, "step": 34992 }, { - "epoch": 0.9929909194097616, + "epoch": 0.9916121170903114, "grad_norm": 0.0, - "learning_rate": 2.5766181726016857e-09, - "loss": 0.8015, + "learning_rate": 3.689883576587505e-09, + "loss": 0.7538, "step": 34993 }, { - "epoch": 0.9930192962542566, + "epoch": 0.9916404545325739, "grad_norm": 0.0, - "learning_rate": 2.555797987924269e-09, - "loss": 0.7615, + "learning_rate": 3.6649955655121415e-09, + "loss": 0.7566, "step": 34994 }, { - "epoch": 0.9930476730987514, + "epoch": 0.9916687919748364, "grad_norm": 0.0, - "learning_rate": 2.5350622521080893e-09, - "loss": 0.7041, + "learning_rate": 3.6401917572781444e-09, + "loss": 0.9157, "step": 34995 }, { - "epoch": 0.9930760499432463, + "epoch": 0.9916971294170989, "grad_norm": 0.0, - "learning_rate": 2.5144109653274517e-09, - "loss": 0.8312, + "learning_rate": 3.6154721520953453e-09, + "loss": 0.7151, "step": 34996 }, { - "epoch": 0.9931044267877412, + "epoch": 0.9917254668593612, "grad_norm": 0.0, - "learning_rate": 2.4938441277566615e-09, - "loss": 0.8272, + "learning_rate": 3.5908367501702455e-09, + "loss": 0.8303, "step": 34997 }, { - "epoch": 0.9931328036322361, + "epoch": 0.9917538043016237, "grad_norm": 0.0, - "learning_rate": 2.4733617395700238e-09, - "loss": 0.7759, + "learning_rate": 3.5662855517126782e-09, + "loss": 0.7597, "step": 34998 }, { - "epoch": 0.993161180476731, + "epoch": 0.9917821417438862, "grad_norm": 0.0, - "learning_rate": 2.4529638009396227e-09, - "loss": 0.8418, + "learning_rate": 3.541818556928034e-09, + "loss": 0.7604, "step": 34999 }, { - "epoch": 0.9931895573212258, + "epoch": 0.9918104791861486, "grad_norm": 0.0, - "learning_rate": 2.432650312039764e-09, - "loss": 0.8029, + "learning_rate": 3.517435766022814e-09, + "loss": 0.7609, "step": 35000 }, { - "epoch": 0.9932179341657208, + "epoch": 0.9918388166284111, "grad_norm": 0.0, - "learning_rate": 2.4124212730392004e-09, - "loss": 0.8529, + "learning_rate": 3.4931371792035207e-09, + "loss": 0.8181, "step": 35001 }, { - "epoch": 0.9932463110102157, + "epoch": 0.9918671540706736, "grad_norm": 0.0, - "learning_rate": 2.392276684111128e-09, - "loss": 0.8771, + "learning_rate": 3.468922796672214e-09, + "loss": 0.7349, "step": 35002 }, { - "epoch": 0.9932746878547105, + "epoch": 0.991895491512936, "grad_norm": 0.0, - "learning_rate": 2.3722165454254096e-09, - "loss": 0.7645, + "learning_rate": 3.4447926186342852e-09, + "loss": 0.7709, "step": 35003 }, { - "epoch": 0.9933030646992055, + "epoch": 0.9919238289551985, "grad_norm": 0.0, - "learning_rate": 2.3522408571508006e-09, - "loss": 0.7597, + "learning_rate": 3.420746645292905e-09, + "loss": 0.8859, "step": 35004 }, { - "epoch": 0.9933314415437003, + "epoch": 0.991952166397461, "grad_norm": 0.0, - "learning_rate": 2.3323496194549435e-09, - "loss": 0.8718, + "learning_rate": 3.3967848768512445e-09, + "loss": 0.8948, "step": 35005 }, { - "epoch": 0.9933598183881952, + "epoch": 0.9919805038397235, "grad_norm": 0.0, - "learning_rate": 2.3125428325088127e-09, - "loss": 0.7005, + "learning_rate": 3.3729073135113642e-09, + "loss": 0.8082, "step": 35006 }, { - "epoch": 0.9933881952326902, + "epoch": 0.9920088412819859, "grad_norm": 0.0, - "learning_rate": 2.2928204964778324e-09, - "loss": 0.9293, + "learning_rate": 3.3491139554719944e-09, + "loss": 0.7473, "step": 35007 }, { - "epoch": 0.993416572077185, + "epoch": 0.9920371787242483, "grad_norm": 0.0, - "learning_rate": 2.2731826115274246e-09, - "loss": 0.7342, + "learning_rate": 3.325404802936305e-09, + "loss": 0.9081, "step": 35008 }, { - "epoch": 0.9934449489216799, + "epoch": 0.9920655161665108, "grad_norm": 0.0, - "learning_rate": 2.253629177826344e-09, - "loss": 0.926, + "learning_rate": 3.3017798561030268e-09, + "loss": 0.8548, "step": 35009 }, { - "epoch": 0.9934733257661748, + "epoch": 0.9920938536087732, "grad_norm": 0.0, - "learning_rate": 2.2341601955377933e-09, - "loss": 0.6934, + "learning_rate": 3.278239115169779e-09, + "loss": 0.7362, "step": 35010 }, { - "epoch": 0.9935017026106697, + "epoch": 0.9921221910510357, "grad_norm": 0.0, - "learning_rate": 2.2147756648260854e-09, - "loss": 0.8057, + "learning_rate": 3.254782580337512e-09, + "loss": 0.7786, "step": 35011 }, { - "epoch": 0.9935300794551646, + "epoch": 0.9921505284932982, "grad_norm": 0.0, - "learning_rate": 2.1954755858566436e-09, - "loss": 0.7731, + "learning_rate": 3.2314102518016256e-09, + "loss": 0.6927, "step": 35012 }, { - "epoch": 0.9935584562996594, + "epoch": 0.9921788659355607, "grad_norm": 0.0, - "learning_rate": 2.17625995879156e-09, - "loss": 0.7119, + "learning_rate": 3.208122129759739e-09, + "loss": 0.9102, "step": 35013 }, { - "epoch": 0.9935868331441544, + "epoch": 0.9922072033778231, "grad_norm": 0.0, - "learning_rate": 2.1571287837918178e-09, - "loss": 0.8984, + "learning_rate": 3.1849182144083614e-09, + "loss": 0.7758, "step": 35014 }, { - "epoch": 0.9936152099886493, + "epoch": 0.9922355408200856, "grad_norm": 0.0, - "learning_rate": 2.138082061021729e-09, - "loss": 0.681, + "learning_rate": 3.1617985059428923e-09, + "loss": 0.693, "step": 35015 }, { - "epoch": 0.9936435868331441, + "epoch": 0.9922638782623481, "grad_norm": 0.0, - "learning_rate": 2.1191197906400563e-09, - "loss": 0.8674, + "learning_rate": 3.138763004557621e-09, + "loss": 0.7239, "step": 35016 }, { - "epoch": 0.993671963677639, + "epoch": 0.9922922157046105, "grad_norm": 0.0, - "learning_rate": 2.1002419728066714e-09, - "loss": 0.7074, + "learning_rate": 3.1158117104468365e-09, + "loss": 0.7237, "step": 35017 }, { - "epoch": 0.993700340522134, + "epoch": 0.992320553146873, "grad_norm": 0.0, - "learning_rate": 2.081448607682557e-09, - "loss": 0.7452, + "learning_rate": 3.0929446238037174e-09, + "loss": 0.6867, "step": 35018 }, { - "epoch": 0.9937287173666288, + "epoch": 0.9923488905891354, "grad_norm": 0.0, - "learning_rate": 2.0627396954264743e-09, - "loss": 0.7548, + "learning_rate": 3.0701617448203325e-09, + "loss": 0.8159, "step": 35019 }, { - "epoch": 0.9937570942111237, + "epoch": 0.9923772280313979, "grad_norm": 0.0, - "learning_rate": 2.044115236196076e-09, - "loss": 0.7609, + "learning_rate": 3.0474630736898604e-09, + "loss": 0.7585, "step": 35020 }, { - "epoch": 0.9937854710556187, + "epoch": 0.9924055654736603, "grad_norm": 0.0, - "learning_rate": 2.0255752301479028e-09, - "loss": 0.6651, + "learning_rate": 3.0248486106032593e-09, + "loss": 0.8255, "step": 35021 }, { - "epoch": 0.9938138479001135, + "epoch": 0.9924339029159228, "grad_norm": 0.0, - "learning_rate": 2.007119677438496e-09, - "loss": 0.8459, + "learning_rate": 3.0023183557503776e-09, + "loss": 0.8201, "step": 35022 }, { - "epoch": 0.9938422247446084, + "epoch": 0.9924622403581853, "grad_norm": 0.0, - "learning_rate": 1.988748578225508e-09, - "loss": 0.8812, + "learning_rate": 2.9798723093210635e-09, + "loss": 0.7787, "step": 35023 }, { - "epoch": 0.9938706015891033, + "epoch": 0.9924905778004477, "grad_norm": 0.0, - "learning_rate": 1.970461932662149e-09, - "loss": 0.808, + "learning_rate": 2.957510471504055e-09, + "loss": 0.8271, "step": 35024 }, { - "epoch": 0.9938989784335982, + "epoch": 0.9925189152427102, "grad_norm": 0.0, - "learning_rate": 1.95225974090385e-09, - "loss": 0.7877, + "learning_rate": 2.9352328424891997e-09, + "loss": 0.7574, "step": 35025 }, { - "epoch": 0.9939273552780931, + "epoch": 0.9925472526849727, "grad_norm": 0.0, - "learning_rate": 1.9341420031049328e-09, - "loss": 0.7062, + "learning_rate": 2.9130394224630155e-09, + "loss": 0.8404, "step": 35026 }, { - "epoch": 0.9939557321225879, + "epoch": 0.9925755901272351, "grad_norm": 0.0, - "learning_rate": 1.9161087194174978e-09, - "loss": 0.78, + "learning_rate": 2.89093021161202e-09, + "loss": 0.7969, "step": 35027 }, { - "epoch": 0.9939841089670829, + "epoch": 0.9926039275694976, "grad_norm": 0.0, - "learning_rate": 1.898159889993645e-09, - "loss": 0.8632, + "learning_rate": 2.8689052101238402e-09, + "loss": 0.7817, "step": 35028 }, { - "epoch": 0.9940124858115778, + "epoch": 0.99263226501176, "grad_norm": 0.0, - "learning_rate": 1.8802955149865854e-09, - "loss": 0.9055, + "learning_rate": 2.8469644181827736e-09, + "loss": 0.822, "step": 35029 }, { - "epoch": 0.9940408626560726, + "epoch": 0.9926606024540225, "grad_norm": 0.0, - "learning_rate": 1.862515594545089e-09, - "loss": 0.78, + "learning_rate": 2.825107835974228e-09, + "loss": 0.8692, "step": 35030 }, { - "epoch": 0.9940692395005676, + "epoch": 0.9926889398962849, "grad_norm": 0.0, - "learning_rate": 1.8448201288201462e-09, - "loss": 0.7588, + "learning_rate": 2.8033354636824993e-09, + "loss": 0.8297, "step": 35031 }, { - "epoch": 0.9940976163450624, + "epoch": 0.9927172773385474, "grad_norm": 0.0, - "learning_rate": 1.8272091179627472e-09, - "loss": 0.7926, + "learning_rate": 2.781647301489665e-09, + "loss": 0.8436, "step": 35032 }, { - "epoch": 0.9941259931895573, + "epoch": 0.9927456147808099, "grad_norm": 0.0, - "learning_rate": 1.8096825621194414e-09, - "loss": 0.8104, + "learning_rate": 2.7600433495800215e-09, + "loss": 0.7523, "step": 35033 }, { - "epoch": 0.9941543700340522, + "epoch": 0.9927739522230723, "grad_norm": 0.0, - "learning_rate": 1.7922404614389987e-09, - "loss": 0.8392, + "learning_rate": 2.738523608135646e-09, + "loss": 0.7762, "step": 35034 }, { - "epoch": 0.9941827468785471, + "epoch": 0.9928022896653348, "grad_norm": 0.0, - "learning_rate": 1.774882816069079e-09, - "loss": 0.8031, + "learning_rate": 2.717088077335284e-09, + "loss": 0.8175, "step": 35035 }, { - "epoch": 0.994211123723042, + "epoch": 0.9928306271075973, "grad_norm": 0.0, - "learning_rate": 1.7576096261562314e-09, - "loss": 0.8356, + "learning_rate": 2.695736757363232e-09, + "loss": 0.7872, "step": 35036 }, { - "epoch": 0.9942395005675368, + "epoch": 0.9928589645498598, "grad_norm": 0.0, - "learning_rate": 1.7404208918470055e-09, - "loss": 0.8394, + "learning_rate": 2.6744696483960166e-09, + "loss": 0.8695, "step": 35037 }, { - "epoch": 0.9942678774120318, + "epoch": 0.9928873019921222, "grad_norm": 0.0, - "learning_rate": 1.7233166132846203e-09, - "loss": 0.7634, + "learning_rate": 2.6532867506146033e-09, + "loss": 0.7232, "step": 35038 }, { - "epoch": 0.9942962542565267, + "epoch": 0.9929156394343847, "grad_norm": 0.0, - "learning_rate": 1.7062967906156248e-09, - "loss": 0.722, + "learning_rate": 2.632188064196628e-09, + "loss": 0.8693, "step": 35039 }, { - "epoch": 0.9943246311010215, + "epoch": 0.9929439768766471, "grad_norm": 0.0, - "learning_rate": 1.689361423983238e-09, - "loss": 0.8185, + "learning_rate": 2.6111735893208366e-09, + "loss": 0.8744, "step": 35040 }, { - "epoch": 0.9943530079455165, + "epoch": 0.9929723143189095, "grad_norm": 0.0, - "learning_rate": 1.6725105135295683e-09, - "loss": 0.773, + "learning_rate": 2.5902433261637548e-09, + "loss": 0.65, "step": 35041 }, { - "epoch": 0.9943813847900114, + "epoch": 0.993000651761172, "grad_norm": 0.0, - "learning_rate": 1.655744059398945e-09, - "loss": 0.7648, + "learning_rate": 2.5693972749007977e-09, + "loss": 0.7386, "step": 35042 }, { - "epoch": 0.9944097616345062, + "epoch": 0.9930289892034345, "grad_norm": 0.0, - "learning_rate": 1.6390620617301457e-09, - "loss": 0.7738, + "learning_rate": 2.548635435708491e-09, + "loss": 0.6682, "step": 35043 }, { - "epoch": 0.9944381384790011, + "epoch": 0.993057326645697, "grad_norm": 0.0, - "learning_rate": 1.6224645206663892e-09, - "loss": 0.777, + "learning_rate": 2.527957808761139e-09, + "loss": 0.7261, "step": 35044 }, { - "epoch": 0.9944665153234961, + "epoch": 0.9930856640879594, "grad_norm": 0.0, - "learning_rate": 1.6059514363475638e-09, - "loss": 0.8872, + "learning_rate": 2.5073643942341575e-09, + "loss": 0.8646, "step": 35045 }, { - "epoch": 0.9944948921679909, + "epoch": 0.9931140015302219, "grad_norm": 0.0, - "learning_rate": 1.589522808912447e-09, - "loss": 0.8029, + "learning_rate": 2.486855192299631e-09, + "loss": 0.7689, "step": 35046 }, { - "epoch": 0.9945232690124858, + "epoch": 0.9931423389724844, "grad_norm": 0.0, - "learning_rate": 1.573178638499817e-09, - "loss": 0.8106, + "learning_rate": 2.466430203130754e-09, + "loss": 0.8447, "step": 35047 }, { - "epoch": 0.9945516458569807, + "epoch": 0.9931706764147468, "grad_norm": 0.0, - "learning_rate": 1.5569189252473416e-09, - "loss": 0.7316, + "learning_rate": 2.4460894268996116e-09, + "loss": 0.8335, "step": 35048 }, { - "epoch": 0.9945800227014756, + "epoch": 0.9931990138570093, "grad_norm": 0.0, - "learning_rate": 1.5407436692937983e-09, - "loss": 0.6821, + "learning_rate": 2.4258328637771776e-09, + "loss": 0.7811, "step": 35049 }, { - "epoch": 0.9946083995459705, + "epoch": 0.9932273512992718, "grad_norm": 0.0, - "learning_rate": 1.5246528707757447e-09, - "loss": 0.7332, + "learning_rate": 2.405660513934427e-09, + "loss": 0.8089, "step": 35050 }, { - "epoch": 0.9946367763904653, + "epoch": 0.9932556887415341, "grad_norm": 0.0, - "learning_rate": 1.5086465298275178e-09, - "loss": 0.7907, + "learning_rate": 2.3855723775423334e-09, + "loss": 0.7501, "step": 35051 }, { - "epoch": 0.9946651532349603, + "epoch": 0.9932840261837966, "grad_norm": 0.0, - "learning_rate": 1.492724646585675e-09, - "loss": 0.8929, + "learning_rate": 2.3655684547685408e-09, + "loss": 0.8704, "step": 35052 }, { - "epoch": 0.9946935300794552, + "epoch": 0.9933123636260591, "grad_norm": 0.0, - "learning_rate": 1.4768872211834428e-09, - "loss": 0.8707, + "learning_rate": 2.3456487457818033e-09, + "loss": 0.7462, "step": 35053 }, { - "epoch": 0.99472190692395, + "epoch": 0.9933407010683216, "grad_norm": 0.0, - "learning_rate": 1.461134253756269e-09, - "loss": 0.7671, + "learning_rate": 2.3258132507508745e-09, + "loss": 0.7949, "step": 35054 }, { - "epoch": 0.994750283768445, + "epoch": 0.993369038510584, "grad_norm": 0.0, - "learning_rate": 1.4454657444351595e-09, - "loss": 0.8746, + "learning_rate": 2.306061969841178e-09, + "loss": 0.7234, "step": 35055 }, { - "epoch": 0.9947786606129398, + "epoch": 0.9933973759528465, "grad_norm": 0.0, - "learning_rate": 1.4298816933555615e-09, - "loss": 0.7047, + "learning_rate": 2.286394903220357e-09, + "loss": 0.7619, "step": 35056 }, { - "epoch": 0.9948070374574347, + "epoch": 0.993425713395109, "grad_norm": 0.0, - "learning_rate": 1.414382100646261e-09, - "loss": 0.9259, + "learning_rate": 2.266812051054945e-09, + "loss": 0.6895, "step": 35057 }, { - "epoch": 0.9948354143019297, + "epoch": 0.9934540508373714, "grad_norm": 0.0, - "learning_rate": 1.3989669664382644e-09, - "loss": 0.833, + "learning_rate": 2.247313413507035e-09, + "loss": 0.8017, "step": 35058 }, { - "epoch": 0.9948637911464245, + "epoch": 0.9934823882796339, "grad_norm": 0.0, - "learning_rate": 1.383636290863688e-09, - "loss": 0.6422, + "learning_rate": 2.2278989907442706e-09, + "loss": 0.7722, "step": 35059 }, { - "epoch": 0.9948921679909194, + "epoch": 0.9935107257218964, "grad_norm": 0.0, - "learning_rate": 1.3683900740513178e-09, - "loss": 0.8853, + "learning_rate": 2.2085687829276336e-09, + "loss": 0.7785, "step": 35060 }, { - "epoch": 0.9949205448354143, + "epoch": 0.9935390631641589, "grad_norm": 0.0, - "learning_rate": 1.3532283161288295e-09, - "loss": 0.7394, + "learning_rate": 2.1893227902203273e-09, + "loss": 0.7727, "step": 35061 }, { - "epoch": 0.9949489216799092, + "epoch": 0.9935674006064212, "grad_norm": 0.0, - "learning_rate": 1.338151017225009e-09, - "loss": 0.7888, + "learning_rate": 2.1701610127855543e-09, + "loss": 0.7915, "step": 35062 }, { - "epoch": 0.9949772985244041, + "epoch": 0.9935957380486837, "grad_norm": 0.0, - "learning_rate": 1.3231581774675317e-09, - "loss": 0.8035, + "learning_rate": 2.151083450784297e-09, + "loss": 0.7948, "step": 35063 }, { - "epoch": 0.995005675368899, + "epoch": 0.9936240754909462, "grad_norm": 0.0, - "learning_rate": 1.3082497969829632e-09, - "loss": 0.7887, + "learning_rate": 2.1320901043764276e-09, + "loss": 0.752, "step": 35064 }, { - "epoch": 0.9950340522133939, + "epoch": 0.9936524129332086, "grad_norm": 0.0, - "learning_rate": 1.2934258758967588e-09, - "loss": 0.7837, + "learning_rate": 2.113180973722928e-09, + "loss": 0.7859, "step": 35065 }, { - "epoch": 0.9950624290578888, + "epoch": 0.9936807503754711, "grad_norm": 0.0, - "learning_rate": 1.2786864143354837e-09, - "loss": 0.8036, + "learning_rate": 2.094356058982561e-09, + "loss": 0.8555, "step": 35066 }, { - "epoch": 0.9950908059023836, + "epoch": 0.9937090878177336, "grad_norm": 0.0, - "learning_rate": 1.2640314124212626e-09, - "loss": 0.7826, + "learning_rate": 2.075615360314087e-09, + "loss": 0.795, "step": 35067 }, { - "epoch": 0.9951191827468785, + "epoch": 0.9937374252599961, "grad_norm": 0.0, - "learning_rate": 1.249460870280661e-09, - "loss": 0.8407, + "learning_rate": 2.0569588778762695e-09, + "loss": 0.7401, "step": 35068 }, { - "epoch": 0.9951475595913735, + "epoch": 0.9937657627022585, "grad_norm": 0.0, - "learning_rate": 1.2349747880335828e-09, - "loss": 0.8467, + "learning_rate": 2.0383866118245388e-09, + "loss": 0.8882, "step": 35069 }, { - "epoch": 0.9951759364358683, + "epoch": 0.993794100144521, "grad_norm": 0.0, - "learning_rate": 1.220573165805483e-09, - "loss": 0.8135, + "learning_rate": 2.0198985623154368e-09, + "loss": 0.7973, "step": 35070 }, { - "epoch": 0.9952043132803632, + "epoch": 0.9938224375867835, "grad_norm": 0.0, - "learning_rate": 1.2062560037162664e-09, - "loss": 0.7649, + "learning_rate": 2.0014947295066145e-09, + "loss": 0.7991, "step": 35071 }, { - "epoch": 0.9952326901248582, + "epoch": 0.9938507750290458, "grad_norm": 0.0, - "learning_rate": 1.1920233018880567e-09, - "loss": 0.8169, + "learning_rate": 1.9831751135512833e-09, + "loss": 0.7389, "step": 35072 }, { - "epoch": 0.995261066969353, + "epoch": 0.9938791124713083, "grad_norm": 0.0, - "learning_rate": 1.177875060438538e-09, - "loss": 0.8151, + "learning_rate": 1.964939714603764e-09, + "loss": 0.7768, "step": 35073 }, { - "epoch": 0.9952894438138479, + "epoch": 0.9939074499135708, "grad_norm": 0.0, - "learning_rate": 1.163811279490945e-09, - "loss": 0.9357, + "learning_rate": 1.946788532819488e-09, + "loss": 0.8172, "step": 35074 }, { - "epoch": 0.9953178206583428, + "epoch": 0.9939357873558332, "grad_norm": 0.0, - "learning_rate": 1.1498319591607409e-09, - "loss": 0.7666, + "learning_rate": 1.928721568349445e-09, + "loss": 0.7416, "step": 35075 }, { - "epoch": 0.9953461975028377, + "epoch": 0.9939641247980957, "grad_norm": 0.0, - "learning_rate": 1.1359370995678298e-09, - "loss": 0.7866, + "learning_rate": 1.910738821346847e-09, + "loss": 0.8829, "step": 35076 }, { - "epoch": 0.9953745743473326, + "epoch": 0.9939924622403582, "grad_norm": 0.0, - "learning_rate": 1.122126700828785e-09, - "loss": 0.6739, + "learning_rate": 1.892840291961573e-09, + "loss": 0.7291, "step": 35077 }, { - "epoch": 0.9954029511918274, + "epoch": 0.9940207996826207, "grad_norm": 0.0, - "learning_rate": 1.1084007630612903e-09, - "loss": 0.8098, + "learning_rate": 1.875025980346834e-09, + "loss": 0.8473, "step": 35078 }, { - "epoch": 0.9954313280363224, + "epoch": 0.9940491371248831, "grad_norm": 0.0, - "learning_rate": 1.0947592863808087e-09, - "loss": 0.7374, + "learning_rate": 1.8572958866514e-09, + "loss": 0.8163, "step": 35079 }, { - "epoch": 0.9954597048808173, + "epoch": 0.9940774745671456, "grad_norm": 0.0, - "learning_rate": 1.0812022709016934e-09, - "loss": 0.906, + "learning_rate": 1.8396500110240411e-09, + "loss": 0.8247, "step": 35080 }, { - "epoch": 0.9954880817253121, + "epoch": 0.9941058120094081, "grad_norm": 0.0, - "learning_rate": 1.0677297167394075e-09, - "loss": 0.8035, + "learning_rate": 1.8220883536146372e-09, + "loss": 0.6967, "step": 35081 }, { - "epoch": 0.9955164585698071, + "epoch": 0.9941341494516704, "grad_norm": 0.0, - "learning_rate": 1.0543416240071934e-09, - "loss": 0.7707, + "learning_rate": 1.8046109145697377e-09, + "loss": 0.7395, "step": 35082 }, { - "epoch": 0.9955448354143019, + "epoch": 0.9941624868939329, "grad_norm": 0.0, - "learning_rate": 1.0410379928182946e-09, - "loss": 0.6799, + "learning_rate": 1.7872176940381125e-09, + "loss": 0.8199, "step": 35083 }, { - "epoch": 0.9955732122587968, + "epoch": 0.9941908243361954, "grad_norm": 0.0, - "learning_rate": 1.0278188232859533e-09, - "loss": 0.7347, + "learning_rate": 1.769908692165201e-09, + "loss": 0.8203, "step": 35084 }, { - "epoch": 0.9956015891032917, + "epoch": 0.9942191617784579, "grad_norm": 0.0, - "learning_rate": 1.0146841155200814e-09, - "loss": 0.7654, + "learning_rate": 1.7526839090975522e-09, + "loss": 0.8034, "step": 35085 }, { - "epoch": 0.9956299659477866, + "epoch": 0.9942474992207203, "grad_norm": 0.0, - "learning_rate": 1.0016338696339224e-09, - "loss": 0.7252, + "learning_rate": 1.7355433449794955e-09, + "loss": 0.8188, "step": 35086 }, { - "epoch": 0.9956583427922815, + "epoch": 0.9942758366629828, "grad_norm": 0.0, - "learning_rate": 9.886680857362773e-10, - "loss": 0.8273, + "learning_rate": 1.71848699995536e-09, + "loss": 0.821, "step": 35087 }, { - "epoch": 0.9956867196367764, + "epoch": 0.9943041741052453, "grad_norm": 0.0, - "learning_rate": 9.757867639359485e-10, - "loss": 0.6439, + "learning_rate": 1.7015148741694742e-09, + "loss": 0.7862, "step": 35088 }, { - "epoch": 0.9957150964812713, + "epoch": 0.9943325115475077, "grad_norm": 0.0, - "learning_rate": 9.629899043428481e-10, - "loss": 0.8338, + "learning_rate": 1.6846269677650574e-09, + "loss": 0.8395, "step": 35089 }, { - "epoch": 0.9957434733257662, + "epoch": 0.9943608489897702, "grad_norm": 0.0, - "learning_rate": 9.502775070657778e-10, - "loss": 0.806, + "learning_rate": 1.6678232808831074e-09, + "loss": 0.7662, "step": 35090 }, { - "epoch": 0.995771850170261, + "epoch": 0.9943891864320327, "grad_norm": 0.0, - "learning_rate": 9.376495722102085e-10, - "loss": 0.701, + "learning_rate": 1.6511038136657332e-09, + "loss": 0.8101, "step": 35091 }, { - "epoch": 0.995800227014756, + "epoch": 0.994417523874295, "grad_norm": 0.0, - "learning_rate": 9.251060998838323e-10, - "loss": 0.803, + "learning_rate": 1.6344685662539328e-09, + "loss": 0.8316, "step": 35092 }, { - "epoch": 0.9958286038592509, + "epoch": 0.9944458613165575, "grad_norm": 0.0, - "learning_rate": 9.126470901932305e-10, - "loss": 0.7848, + "learning_rate": 1.6179175387887048e-09, + "loss": 0.8119, "step": 35093 }, { - "epoch": 0.9958569807037457, + "epoch": 0.99447419875882, "grad_norm": 0.0, - "learning_rate": 9.002725432427639e-10, - "loss": 0.8373, + "learning_rate": 1.6014507314077165e-09, + "loss": 0.7771, "step": 35094 }, { - "epoch": 0.9958853575482406, + "epoch": 0.9945025362010825, "grad_norm": 0.0, - "learning_rate": 8.879824591367936e-10, - "loss": 0.8437, + "learning_rate": 1.5850681442508563e-09, + "loss": 0.8605, "step": 35095 }, { - "epoch": 0.9959137343927356, + "epoch": 0.9945308736433449, "grad_norm": 0.0, - "learning_rate": 8.757768379796805e-10, - "loss": 0.8076, + "learning_rate": 1.568769777455792e-09, + "loss": 0.901, "step": 35096 }, { - "epoch": 0.9959421112372304, + "epoch": 0.9945592110856074, "grad_norm": 0.0, - "learning_rate": 8.636556798746754e-10, - "loss": 0.8384, + "learning_rate": 1.5525556311590807e-09, + "loss": 0.9694, "step": 35097 }, { - "epoch": 0.9959704880817253, + "epoch": 0.9945875485278699, "grad_norm": 0.0, - "learning_rate": 8.516189849239187e-10, - "loss": 0.7421, + "learning_rate": 1.53642570549839e-09, + "loss": 0.7795, "step": 35098 }, { - "epoch": 0.9959988649262203, + "epoch": 0.9946158859701323, "grad_norm": 0.0, - "learning_rate": 8.396667532284408e-10, - "loss": 0.8002, + "learning_rate": 1.5203800006102776e-09, + "loss": 0.7675, "step": 35099 }, { - "epoch": 0.9960272417707151, + "epoch": 0.9946442234123948, "grad_norm": 0.0, - "learning_rate": 8.277989848903822e-10, - "loss": 0.7757, + "learning_rate": 1.5044185166279702e-09, + "loss": 0.8041, "step": 35100 }, { - "epoch": 0.99605561861521, + "epoch": 0.9946725608546573, "grad_norm": 0.0, - "learning_rate": 8.160156800085528e-10, - "loss": 0.815, + "learning_rate": 1.488541253686915e-09, + "loss": 0.726, "step": 35101 }, { - "epoch": 0.9960839954597048, + "epoch": 0.9947008982969198, "grad_norm": 0.0, - "learning_rate": 8.043168386839828e-10, - "loss": 0.773, + "learning_rate": 1.4727482119203385e-09, + "loss": 0.8014, "step": 35102 }, { - "epoch": 0.9961123723041998, + "epoch": 0.9947292357391821, "grad_norm": 0.0, - "learning_rate": 7.927024610154821e-10, - "loss": 0.8405, + "learning_rate": 1.4570393914614678e-09, + "loss": 0.7551, "step": 35103 }, { - "epoch": 0.9961407491486947, + "epoch": 0.9947575731814446, "grad_norm": 0.0, - "learning_rate": 7.811725470996401e-10, - "loss": 0.784, + "learning_rate": 1.4414147924435295e-09, + "loss": 0.7835, "step": 35104 }, { - "epoch": 0.9961691259931895, + "epoch": 0.9947859106237071, "grad_norm": 0.0, - "learning_rate": 7.697270970352666e-10, - "loss": 0.7354, + "learning_rate": 1.4258744149975301e-09, + "loss": 0.8169, "step": 35105 }, { - "epoch": 0.9961975028376845, + "epoch": 0.9948142480659695, "grad_norm": 0.0, - "learning_rate": 7.583661109178408e-10, - "loss": 0.8561, + "learning_rate": 1.4104182592544756e-09, + "loss": 0.7419, "step": 35106 }, { - "epoch": 0.9962258796821793, + "epoch": 0.994842585508232, "grad_norm": 0.0, - "learning_rate": 7.470895888439522e-10, - "loss": 0.8555, + "learning_rate": 1.3950463253431523e-09, + "loss": 0.7997, "step": 35107 }, { - "epoch": 0.9962542565266742, + "epoch": 0.9948709229504945, "grad_norm": 0.0, - "learning_rate": 7.358975309090799e-10, - "loss": 0.776, + "learning_rate": 1.3797586133956763e-09, + "loss": 0.7942, "step": 35108 }, { - "epoch": 0.9962826333711692, + "epoch": 0.994899260392757, "grad_norm": 0.0, - "learning_rate": 7.247899372087031e-10, - "loss": 0.7613, + "learning_rate": 1.3645551235386134e-09, + "loss": 0.8055, "step": 35109 }, { - "epoch": 0.996311010215664, + "epoch": 0.9949275978350194, "grad_norm": 0.0, - "learning_rate": 7.1376680783386e-10, - "loss": 0.8368, + "learning_rate": 1.3494358559007491e-09, + "loss": 0.755, "step": 35110 }, { - "epoch": 0.9963393870601589, + "epoch": 0.9949559352772819, "grad_norm": 0.0, - "learning_rate": 7.028281428800299e-10, - "loss": 0.898, + "learning_rate": 1.3344008106097594e-09, + "loss": 0.809, "step": 35111 }, { - "epoch": 0.9963677639046538, + "epoch": 0.9949842727195444, "grad_norm": 0.0, - "learning_rate": 6.919739424393612e-10, - "loss": 0.8897, + "learning_rate": 1.3194499877910994e-09, + "loss": 0.8432, "step": 35112 }, { - "epoch": 0.9963961407491487, + "epoch": 0.9950126101618068, "grad_norm": 0.0, - "learning_rate": 6.812042066028923e-10, - "loss": 0.7446, + "learning_rate": 1.3045833875724446e-09, + "loss": 0.7969, "step": 35113 }, { - "epoch": 0.9964245175936436, + "epoch": 0.9950409476040692, "grad_norm": 0.0, - "learning_rate": 6.705189354616615e-10, - "loss": 0.8486, + "learning_rate": 1.28980101007814e-09, + "loss": 0.7684, "step": 35114 }, { - "epoch": 0.9964528944381384, + "epoch": 0.9950692850463317, "grad_norm": 0.0, - "learning_rate": 6.599181291055967e-10, - "loss": 0.7751, + "learning_rate": 1.2751028554325307e-09, + "loss": 0.7612, "step": 35115 }, { - "epoch": 0.9964812712826334, + "epoch": 0.9950976224885941, "grad_norm": 0.0, - "learning_rate": 6.494017876246261e-10, - "loss": 0.7536, + "learning_rate": 1.2604889237599615e-09, + "loss": 0.8135, "step": 35116 }, { - "epoch": 0.9965096481271283, + "epoch": 0.9951259599308566, "grad_norm": 0.0, - "learning_rate": 6.389699111086778e-10, - "loss": 0.7509, + "learning_rate": 1.2459592151836674e-09, + "loss": 0.7066, "step": 35117 }, { - "epoch": 0.9965380249716231, + "epoch": 0.9951542973731191, "grad_norm": 0.0, - "learning_rate": 6.286224996443491e-10, - "loss": 0.6614, + "learning_rate": 1.2315137298246626e-09, + "loss": 0.8177, "step": 35118 }, { - "epoch": 0.996566401816118, + "epoch": 0.9951826348153816, "grad_norm": 0.0, - "learning_rate": 6.183595533193476e-10, - "loss": 0.8352, + "learning_rate": 1.217152467806182e-09, + "loss": 0.7033, "step": 35119 }, { - "epoch": 0.996594778660613, + "epoch": 0.995210972257644, "grad_norm": 0.0, - "learning_rate": 6.081810722202708e-10, - "loss": 0.8182, + "learning_rate": 1.2028754292492395e-09, + "loss": 0.8807, "step": 35120 }, { - "epoch": 0.9966231555051078, + "epoch": 0.9952393096999065, "grad_norm": 0.0, - "learning_rate": 5.98087056433716e-10, - "loss": 0.7501, + "learning_rate": 1.1886826142726293e-09, + "loss": 0.708, "step": 35121 }, { - "epoch": 0.9966515323496027, + "epoch": 0.995267647142169, "grad_norm": 0.0, - "learning_rate": 5.880775060451705e-10, - "loss": 0.7378, + "learning_rate": 1.1745740229962555e-09, + "loss": 0.8444, "step": 35122 }, { - "epoch": 0.9966799091940977, + "epoch": 0.9952959845844314, "grad_norm": 0.0, - "learning_rate": 5.781524211379008e-10, - "loss": 0.811, + "learning_rate": 1.1605496555400219e-09, + "loss": 0.7778, "step": 35123 }, { - "epoch": 0.9967082860385925, + "epoch": 0.9953243220266939, "grad_norm": 0.0, - "learning_rate": 5.683118017973943e-10, - "loss": 0.8931, + "learning_rate": 1.1466095120216126e-09, + "loss": 0.8148, "step": 35124 }, { - "epoch": 0.9967366628830874, + "epoch": 0.9953526594689563, "grad_norm": 0.0, - "learning_rate": 5.585556481046972e-10, - "loss": 0.6946, + "learning_rate": 1.1327535925576006e-09, + "loss": 0.8123, "step": 35125 }, { - "epoch": 0.9967650397275823, + "epoch": 0.9953809969112188, "grad_norm": 0.0, - "learning_rate": 5.488839601441864e-10, - "loss": 0.6792, + "learning_rate": 1.1189818972656697e-09, + "loss": 0.8466, "step": 35126 }, { - "epoch": 0.9967934165720772, + "epoch": 0.9954093343534812, "grad_norm": 0.0, - "learning_rate": 5.392967379969083e-10, - "loss": 0.8935, + "learning_rate": 1.1052944262623932e-09, + "loss": 0.7877, "step": 35127 }, { - "epoch": 0.9968217934165721, + "epoch": 0.9954376717957437, "grad_norm": 0.0, - "learning_rate": 5.297939817427989e-10, - "loss": 0.6401, + "learning_rate": 1.0916911796610142e-09, + "loss": 0.8417, "step": 35128 }, { - "epoch": 0.9968501702610669, + "epoch": 0.9954660092380062, "grad_norm": 0.0, - "learning_rate": 5.203756914640146e-10, - "loss": 0.8013, + "learning_rate": 1.0781721575781057e-09, + "loss": 0.8035, "step": 35129 }, { - "epoch": 0.9968785471055619, + "epoch": 0.9954943466802686, "grad_norm": 0.0, - "learning_rate": 5.110418672382711e-10, - "loss": 0.8108, + "learning_rate": 1.0647373601258003e-09, + "loss": 0.7345, "step": 35130 }, { - "epoch": 0.9969069239500568, + "epoch": 0.9955226841225311, "grad_norm": 0.0, - "learning_rate": 5.017925091455044e-10, - "loss": 0.8274, + "learning_rate": 1.0513867874195615e-09, + "loss": 0.7201, "step": 35131 }, { - "epoch": 0.9969353007945516, + "epoch": 0.9955510215647936, "grad_norm": 0.0, - "learning_rate": 4.926276172645405e-10, - "loss": 0.8002, + "learning_rate": 1.0381204395693011e-09, + "loss": 0.7713, "step": 35132 }, { - "epoch": 0.9969636776390466, + "epoch": 0.9955793590070561, "grad_norm": 0.0, - "learning_rate": 4.835471916708745e-10, - "loss": 0.7847, + "learning_rate": 1.0249383166893723e-09, + "loss": 0.7939, "step": 35133 }, { - "epoch": 0.9969920544835414, + "epoch": 0.9956076964493185, "grad_norm": 0.0, - "learning_rate": 4.745512324422219e-10, - "loss": 0.7194, + "learning_rate": 1.0118404188885767e-09, + "loss": 0.8499, "step": 35134 }, { - "epoch": 0.9970204313280363, + "epoch": 0.995636033891581, "grad_norm": 0.0, - "learning_rate": 4.656397396540779e-10, - "loss": 0.8192, + "learning_rate": 9.988267462779366e-10, + "loss": 0.8211, "step": 35135 }, { - "epoch": 0.9970488081725312, + "epoch": 0.9956643713338434, "grad_norm": 0.0, - "learning_rate": 4.56812713383048e-10, - "loss": 0.7574, + "learning_rate": 9.858972989673643e-10, + "loss": 0.7299, "step": 35136 }, { - "epoch": 0.9970771850170261, + "epoch": 0.9956927087761058, "grad_norm": 0.0, - "learning_rate": 4.480701537024068e-10, - "loss": 0.8261, + "learning_rate": 9.730520770656616e-10, + "loss": 0.8408, "step": 35137 }, { - "epoch": 0.997105561861521, + "epoch": 0.9957210462183683, "grad_norm": 0.0, - "learning_rate": 4.394120606876495e-10, - "loss": 0.8367, + "learning_rate": 9.602910806805199e-10, + "loss": 0.8357, "step": 35138 }, { - "epoch": 0.9971339387060159, + "epoch": 0.9957493836606308, "grad_norm": 0.0, - "learning_rate": 4.3083843440872017e-10, - "loss": 0.9738, + "learning_rate": 9.476143099207414e-10, + "loss": 0.8444, "step": 35139 }, { - "epoch": 0.9971623155505108, + "epoch": 0.9957777211028932, "grad_norm": 0.0, - "learning_rate": 4.22349274941114e-10, - "loss": 0.7959, + "learning_rate": 9.35021764891797e-10, + "loss": 0.7727, "step": 35140 }, { - "epoch": 0.9971906923950057, + "epoch": 0.9958060585451557, "grad_norm": 0.0, - "learning_rate": 4.139445823558852e-10, - "loss": 0.7112, + "learning_rate": 9.225134457002682e-10, + "loss": 0.7528, "step": 35141 }, { - "epoch": 0.9972190692395005, + "epoch": 0.9958343959874182, "grad_norm": 0.0, - "learning_rate": 4.056243567229778e-10, - "loss": 0.7633, + "learning_rate": 9.100893524505161e-10, + "loss": 0.7333, "step": 35142 }, { - "epoch": 0.9972474460839955, + "epoch": 0.9958627334296807, "grad_norm": 0.0, - "learning_rate": 3.973885981134462e-10, - "loss": 0.8474, + "learning_rate": 8.97749485249122e-10, + "loss": 0.8191, "step": 35143 }, { - "epoch": 0.9972758229284904, + "epoch": 0.9958910708719431, "grad_norm": 0.0, - "learning_rate": 3.892373065961241e-10, - "loss": 0.7817, + "learning_rate": 8.854938441993366e-10, + "loss": 0.8549, "step": 35144 }, { - "epoch": 0.9973041997729852, + "epoch": 0.9959194083142056, "grad_norm": 0.0, - "learning_rate": 3.8117048224095564e-10, - "loss": 0.8903, + "learning_rate": 8.733224294044107e-10, + "loss": 0.8115, "step": 35145 }, { - "epoch": 0.9973325766174801, + "epoch": 0.995947745756468, "grad_norm": 0.0, - "learning_rate": 3.7318812511566436e-10, - "loss": 0.7797, + "learning_rate": 8.612352409653746e-10, + "loss": 0.7174, "step": 35146 }, { - "epoch": 0.9973609534619751, + "epoch": 0.9959760831987304, "grad_norm": 0.0, - "learning_rate": 3.652902352868637e-10, - "loss": 0.8985, + "learning_rate": 8.492322789865892e-10, + "loss": 0.8028, "step": 35147 }, { - "epoch": 0.9973893303064699, + "epoch": 0.9960044206409929, "grad_norm": 0.0, - "learning_rate": 3.5747681282227717e-10, - "loss": 0.723, + "learning_rate": 8.373135435668645e-10, + "loss": 0.7653, "step": 35148 }, { - "epoch": 0.9974177071509648, + "epoch": 0.9960327580832554, "grad_norm": 0.0, - "learning_rate": 3.4974785778740807e-10, - "loss": 0.7831, + "learning_rate": 8.254790348072306e-10, + "loss": 0.7935, "step": 35149 }, { - "epoch": 0.9974460839954598, + "epoch": 0.9960610955255179, "grad_norm": 0.0, - "learning_rate": 3.4210337024886965e-10, - "loss": 0.7457, + "learning_rate": 8.137287528087179e-10, + "loss": 0.877, "step": 35150 }, { - "epoch": 0.9974744608399546, + "epoch": 0.9960894329677803, "grad_norm": 0.0, - "learning_rate": 3.345433502688344e-10, - "loss": 0.7769, + "learning_rate": 8.02062697669026e-10, + "loss": 0.7853, "step": 35151 }, { - "epoch": 0.9975028376844495, + "epoch": 0.9961177704100428, "grad_norm": 0.0, - "learning_rate": 3.270677979128056e-10, - "loss": 0.8118, + "learning_rate": 7.904808694858546e-10, + "loss": 0.8428, "step": 35152 }, { - "epoch": 0.9975312145289443, + "epoch": 0.9961461078523053, "grad_norm": 0.0, - "learning_rate": 3.1967671324406593e-10, - "loss": 0.8909, + "learning_rate": 7.789832683580134e-10, + "loss": 0.7437, "step": 35153 }, { - "epoch": 0.9975595913734393, + "epoch": 0.9961744452945677, "grad_norm": 0.0, - "learning_rate": 3.1237009632367756e-10, - "loss": 0.7785, + "learning_rate": 7.67569894382092e-10, + "loss": 0.6502, "step": 35154 }, { - "epoch": 0.9975879682179342, + "epoch": 0.9962027827368302, "grad_norm": 0.0, - "learning_rate": 3.0514794721492325e-10, - "loss": 0.7729, + "learning_rate": 7.562407476546796e-10, + "loss": 0.7653, "step": 35155 }, { - "epoch": 0.997616345062429, + "epoch": 0.9962311201790927, "grad_norm": 0.0, - "learning_rate": 2.9801026597775507e-10, - "loss": 0.8756, + "learning_rate": 7.449958282690351e-10, + "loss": 0.825, "step": 35156 }, { - "epoch": 0.997644721906924, + "epoch": 0.9962594576213551, "grad_norm": 0.0, - "learning_rate": 2.9095705267323524e-10, - "loss": 0.7903, + "learning_rate": 7.33835136322858e-10, + "loss": 0.8068, "step": 35157 }, { - "epoch": 0.9976730987514189, + "epoch": 0.9962877950636175, "grad_norm": 0.0, - "learning_rate": 2.839883073602057e-10, - "loss": 0.7757, + "learning_rate": 7.227586719082968e-10, + "loss": 0.8284, "step": 35158 }, { - "epoch": 0.9977014755959137, + "epoch": 0.99631613250588, "grad_norm": 0.0, - "learning_rate": 2.7710403009750807e-10, - "loss": 0.7815, + "learning_rate": 7.117664351186104e-10, + "loss": 0.7573, "step": 35159 }, { - "epoch": 0.9977298524404086, + "epoch": 0.9963444699481425, "grad_norm": 0.0, - "learning_rate": 2.703042209439843e-10, - "loss": 0.8407, + "learning_rate": 7.008584260470574e-10, + "loss": 0.7973, "step": 35160 }, { - "epoch": 0.9977582292849035, + "epoch": 0.9963728073904049, "grad_norm": 0.0, - "learning_rate": 2.6358887995625584e-10, - "loss": 0.8245, + "learning_rate": 6.900346447857864e-10, + "loss": 0.7907, "step": 35161 }, { - "epoch": 0.9977866061293984, + "epoch": 0.9964011448326674, "grad_norm": 0.0, - "learning_rate": 2.5695800719205413e-10, - "loss": 0.8151, + "learning_rate": 6.792950914247254e-10, + "loss": 0.6723, "step": 35162 }, { - "epoch": 0.9978149829738933, + "epoch": 0.9964294822749299, "grad_norm": 0.0, - "learning_rate": 2.5041160270689035e-10, - "loss": 0.7904, + "learning_rate": 6.686397660560229e-10, + "loss": 0.8655, "step": 35163 }, { - "epoch": 0.9978433598183882, + "epoch": 0.9964578197171923, "grad_norm": 0.0, - "learning_rate": 2.4394966655627573e-10, - "loss": 0.87, + "learning_rate": 6.580686687684968e-10, + "loss": 0.6448, "step": 35164 }, { - "epoch": 0.9978717366628831, + "epoch": 0.9964861571594548, "grad_norm": 0.0, - "learning_rate": 2.375721987935009e-10, - "loss": 0.8918, + "learning_rate": 6.475817996498546e-10, + "loss": 0.7661, "step": 35165 }, { - "epoch": 0.997900113507378, + "epoch": 0.9965144946017173, "grad_norm": 0.0, - "learning_rate": 2.3127919947407707e-10, - "loss": 0.7187, + "learning_rate": 6.371791587911347e-10, + "loss": 0.9068, "step": 35166 }, { - "epoch": 0.9979284903518729, + "epoch": 0.9965428320439798, "grad_norm": 0.0, - "learning_rate": 2.250706686512949e-10, - "loss": 0.8515, + "learning_rate": 6.268607462778242e-10, + "loss": 0.7516, "step": 35167 }, { - "epoch": 0.9979568671963678, + "epoch": 0.9965711694862421, "grad_norm": 0.0, - "learning_rate": 2.1894660637622467e-10, - "loss": 0.8345, + "learning_rate": 6.16626562198741e-10, + "loss": 0.8711, "step": 35168 }, { - "epoch": 0.9979852440408626, + "epoch": 0.9965995069285046, "grad_norm": 0.0, - "learning_rate": 2.1290701270104686e-10, - "loss": 0.9273, + "learning_rate": 6.064766066382622e-10, + "loss": 0.762, "step": 35169 }, { - "epoch": 0.9980136208853575, + "epoch": 0.9966278443707671, "grad_norm": 0.0, - "learning_rate": 2.0695188767683173e-10, - "loss": 0.8216, + "learning_rate": 5.964108796818746e-10, + "loss": 0.7618, "step": 35170 }, { - "epoch": 0.9980419977298525, + "epoch": 0.9966561818130295, "grad_norm": 0.0, - "learning_rate": 2.010812313546495e-10, - "loss": 0.7291, + "learning_rate": 5.864293814161758e-10, + "loss": 0.7488, "step": 35171 }, { - "epoch": 0.9980703745743473, + "epoch": 0.996684519255292, "grad_norm": 0.0, - "learning_rate": 1.9529504378223984e-10, - "loss": 0.8472, + "learning_rate": 5.765321119244327e-10, + "loss": 0.7982, "step": 35172 }, { - "epoch": 0.9980987514188422, + "epoch": 0.9967128566975545, "grad_norm": 0.0, - "learning_rate": 1.8959332501067294e-10, - "loss": 0.9176, + "learning_rate": 5.667190712888016e-10, + "loss": 0.8326, "step": 35173 }, { - "epoch": 0.9981271282633372, + "epoch": 0.996741194139817, "grad_norm": 0.0, - "learning_rate": 1.8397607508768844e-10, - "loss": 0.7882, + "learning_rate": 5.569902595936594e-10, + "loss": 0.7951, "step": 35174 }, { - "epoch": 0.998155505107832, + "epoch": 0.9967695315820794, "grad_norm": 0.0, - "learning_rate": 1.7844329405880544e-10, - "loss": 0.9381, + "learning_rate": 5.473456769200525e-10, + "loss": 0.8921, "step": 35175 }, { - "epoch": 0.9981838819523269, + "epoch": 0.9967978690243419, "grad_norm": 0.0, - "learning_rate": 1.7299498197287378e-10, - "loss": 0.9168, + "learning_rate": 5.377853233490271e-10, + "loss": 0.9009, "step": 35176 }, { - "epoch": 0.9982122587968217, + "epoch": 0.9968262064666044, "grad_norm": 0.0, - "learning_rate": 1.676311388754126e-10, - "loss": 0.7506, + "learning_rate": 5.283091989616296e-10, + "loss": 0.7972, "step": 35177 }, { - "epoch": 0.9982406356413167, + "epoch": 0.9968545439088667, "grad_norm": 0.0, - "learning_rate": 1.6235176481083082e-10, - "loss": 0.8104, + "learning_rate": 5.189173038366857e-10, + "loss": 0.8061, "step": 35178 }, { - "epoch": 0.9982690124858116, + "epoch": 0.9968828813511292, "grad_norm": 0.0, - "learning_rate": 1.5715685982464756e-10, - "loss": 0.8623, + "learning_rate": 5.096096380552417e-10, + "loss": 0.7886, "step": 35179 }, { - "epoch": 0.9982973893303064, + "epoch": 0.9969112187933917, "grad_norm": 0.0, - "learning_rate": 1.5204642396127178e-10, - "loss": 0.7165, + "learning_rate": 5.003862016939031e-10, + "loss": 0.8299, "step": 35180 }, { - "epoch": 0.9983257661748014, + "epoch": 0.9969395562356542, "grad_norm": 0.0, - "learning_rate": 1.4702045726178172e-10, - "loss": 0.8167, + "learning_rate": 4.912469948314957e-10, + "loss": 0.7855, "step": 35181 }, { - "epoch": 0.9983541430192963, + "epoch": 0.9969678936779166, "grad_norm": 0.0, - "learning_rate": 1.4207895977058627e-10, - "loss": 0.7799, + "learning_rate": 4.821920175446249e-10, + "loss": 0.8473, "step": 35182 }, { - "epoch": 0.9983825198637911, + "epoch": 0.9969962311201791, "grad_norm": 0.0, - "learning_rate": 1.3722193152876373e-10, - "loss": 0.7487, + "learning_rate": 4.732212699087857e-10, + "loss": 0.702, "step": 35183 }, { - "epoch": 0.9984108967082861, + "epoch": 0.9970245685624416, "grad_norm": 0.0, - "learning_rate": 1.3244937257739233e-10, - "loss": 0.7648, + "learning_rate": 4.643347520005836e-10, + "loss": 0.7187, "step": 35184 }, { - "epoch": 0.998439273552781, + "epoch": 0.997052906004704, "grad_norm": 0.0, - "learning_rate": 1.2776128295644008e-10, - "loss": 0.7661, + "learning_rate": 4.5553246389551386e-10, + "loss": 0.7459, "step": 35185 }, { - "epoch": 0.9984676503972758, + "epoch": 0.9970812434469665, "grad_norm": 0.0, - "learning_rate": 1.2315766270698527e-10, - "loss": 0.861, + "learning_rate": 4.4681440566574087e-10, + "loss": 0.8018, "step": 35186 }, { - "epoch": 0.9984960272417707, + "epoch": 0.997109580889229, "grad_norm": 0.0, - "learning_rate": 1.186385118656652e-10, - "loss": 0.7034, + "learning_rate": 4.381805773856496e-10, + "loss": 0.7782, "step": 35187 }, { - "epoch": 0.9985244040862656, + "epoch": 0.9971379183314913, "grad_norm": 0.0, - "learning_rate": 1.1420383047133777e-10, - "loss": 0.743, + "learning_rate": 4.296309791274045e-10, + "loss": 0.8193, "step": 35188 }, { - "epoch": 0.9985527809307605, + "epoch": 0.9971662557737538, "grad_norm": 0.0, - "learning_rate": 1.0985361856286069e-10, - "loss": 0.8079, + "learning_rate": 4.211656109642803e-10, + "loss": 0.7723, "step": 35189 }, { - "epoch": 0.9985811577752554, + "epoch": 0.9971945932160163, "grad_norm": 0.0, - "learning_rate": 1.0558787617576117e-10, - "loss": 0.8792, + "learning_rate": 4.1278447296733137e-10, + "loss": 0.8741, "step": 35190 }, { - "epoch": 0.9986095346197503, + "epoch": 0.9972229306582788, "grad_norm": 0.0, - "learning_rate": 1.0140660334556629e-10, - "loss": 0.8684, + "learning_rate": 4.044875652065017e-10, + "loss": 0.8181, "step": 35191 }, { - "epoch": 0.9986379114642452, + "epoch": 0.9972512681005412, "grad_norm": 0.0, - "learning_rate": 9.730980010891345e-11, - "loss": 0.7978, + "learning_rate": 3.962748877517353e-10, + "loss": 0.813, "step": 35192 }, { - "epoch": 0.99866628830874, + "epoch": 0.9972796055428037, "grad_norm": 0.0, - "learning_rate": 9.329746649910932e-11, - "loss": 0.7683, + "learning_rate": 3.881464406729762e-10, + "loss": 0.6311, "step": 35193 }, { - "epoch": 0.9986946651532349, + "epoch": 0.9973079429850662, "grad_norm": 0.0, - "learning_rate": 8.936960255168104e-11, - "loss": 0.7395, + "learning_rate": 3.8010222403794815e-10, + "loss": 0.7739, "step": 35194 }, { - "epoch": 0.9987230419977299, + "epoch": 0.9973362804273286, "grad_norm": 0.0, - "learning_rate": 8.552620829882508e-11, - "loss": 0.8605, + "learning_rate": 3.7214223791437464e-10, + "loss": 0.8345, "step": 35195 }, { - "epoch": 0.9987514188422247, + "epoch": 0.9973646178695911, "grad_norm": 0.0, - "learning_rate": 8.17672837727379e-11, - "loss": 0.7204, + "learning_rate": 3.642664823688691e-10, + "loss": 0.869, "step": 35196 }, { - "epoch": 0.9987797956867196, + "epoch": 0.9973929553118536, "grad_norm": 0.0, - "learning_rate": 7.809282900561598e-11, - "loss": 0.7542, + "learning_rate": 3.564749574691551e-10, + "loss": 0.7616, "step": 35197 }, { - "epoch": 0.9988081725312146, + "epoch": 0.9974212927541161, "grad_norm": 0.0, - "learning_rate": 7.450284402854558e-11, - "loss": 0.745, + "learning_rate": 3.4876766327962554e-10, + "loss": 0.7254, "step": 35198 }, { - "epoch": 0.9988365493757094, + "epoch": 0.9974496301963784, "grad_norm": 0.0, - "learning_rate": 7.099732887150268e-11, - "loss": 0.722, + "learning_rate": 3.4114459986689386e-10, + "loss": 0.6576, "step": 35199 }, { - "epoch": 0.9988649262202043, + "epoch": 0.9974779676386409, "grad_norm": 0.0, - "learning_rate": 6.757628356335311e-11, - "loss": 0.9218, + "learning_rate": 3.3360576729313253e-10, + "loss": 0.6778, "step": 35200 }, { - "epoch": 0.9988933030646993, + "epoch": 0.9975063050809034, "grad_norm": 0.0, - "learning_rate": 6.42397081351831e-11, - "loss": 0.7765, + "learning_rate": 3.261511656227345e-10, + "loss": 0.8592, "step": 35201 }, { - "epoch": 0.9989216799091941, + "epoch": 0.9975346425231658, "grad_norm": 0.0, - "learning_rate": 6.0987602613638e-11, - "loss": 0.6866, + "learning_rate": 3.1878079491787227e-10, + "loss": 0.7432, "step": 35202 }, { - "epoch": 0.998950056753689, + "epoch": 0.9975629799654283, "grad_norm": 0.0, - "learning_rate": 5.781996702647341e-11, - "loss": 0.7686, + "learning_rate": 3.1149465524182856e-10, + "loss": 0.841, "step": 35203 }, { - "epoch": 0.9989784335981838, + "epoch": 0.9975913174076908, "grad_norm": 0.0, - "learning_rate": 5.473680140033466e-11, - "loss": 0.8163, + "learning_rate": 3.0429274665566555e-10, + "loss": 0.8153, "step": 35204 }, { - "epoch": 0.9990068104426788, + "epoch": 0.9976196548499533, "grad_norm": 0.0, - "learning_rate": 5.1738105760756884e-11, - "loss": 0.7102, + "learning_rate": 2.971750692193354e-10, + "loss": 0.8078, "step": 35205 }, { - "epoch": 0.9990351872871737, + "epoch": 0.9976479922922157, "grad_norm": 0.0, - "learning_rate": 4.882388013438544e-11, - "loss": 0.8565, + "learning_rate": 2.9014162299279004e-10, + "loss": 0.8242, "step": 35206 }, { - "epoch": 0.9990635641316685, + "epoch": 0.9976763297344782, "grad_norm": 0.0, - "learning_rate": 4.5994124544535004e-11, - "loss": 0.773, + "learning_rate": 2.8319240803598156e-10, + "loss": 0.7331, "step": 35207 }, { - "epoch": 0.9990919409761635, + "epoch": 0.9977046671767407, "grad_norm": 0.0, - "learning_rate": 4.324883901674071e-11, - "loss": 0.7847, + "learning_rate": 2.7632742440775184e-10, + "loss": 0.7528, "step": 35208 }, { - "epoch": 0.9991203178206584, + "epoch": 0.997733004619003, "grad_norm": 0.0, - "learning_rate": 4.058802357209679e-11, - "loss": 0.7411, + "learning_rate": 2.6954667216472217e-10, + "loss": 0.7971, "step": 35209 }, { - "epoch": 0.9991486946651532, + "epoch": 0.9977613420612655, "grad_norm": 0.0, - "learning_rate": 3.801167823502816e-11, - "loss": 0.7918, + "learning_rate": 2.6285015136462423e-10, + "loss": 0.8376, "step": 35210 }, { - "epoch": 0.9991770715096481, + "epoch": 0.997789679503528, "grad_norm": 0.0, - "learning_rate": 3.5519803025518826e-11, - "loss": 0.8455, + "learning_rate": 2.562378620640793e-10, + "loss": 0.8191, "step": 35211 }, { - "epoch": 0.999205448354143, + "epoch": 0.9978180169457904, "grad_norm": 0.0, - "learning_rate": 3.311239796577326e-11, - "loss": 0.7619, + "learning_rate": 2.497098043185986e-10, + "loss": 0.792, "step": 35212 }, { - "epoch": 0.9992338251986379, + "epoch": 0.9978463543880529, "grad_norm": 0.0, - "learning_rate": 3.078946307577546e-11, - "loss": 0.86, + "learning_rate": 2.4326597818258303e-10, + "loss": 0.8222, "step": 35213 }, { - "epoch": 0.9992622020431328, + "epoch": 0.9978746918303154, "grad_norm": 0.0, - "learning_rate": 2.8550998375509452e-11, - "loss": 0.7734, + "learning_rate": 2.369063837115437e-10, + "loss": 0.8047, "step": 35214 }, { - "epoch": 0.9992905788876277, + "epoch": 0.9979030292725779, "grad_norm": 0.0, - "learning_rate": 2.6397003883849027e-11, - "loss": 0.7873, + "learning_rate": 2.3063102095877143e-10, + "loss": 0.6988, "step": 35215 }, { - "epoch": 0.9993189557321226, + "epoch": 0.9979313667148403, "grad_norm": 0.0, - "learning_rate": 2.432747961855775e-11, - "loss": 0.7891, + "learning_rate": 2.244398899753364e-10, + "loss": 0.7579, "step": 35216 }, { - "epoch": 0.9993473325766175, + "epoch": 0.9979597041571028, "grad_norm": 0.0, - "learning_rate": 2.2342425597399187e-11, - "loss": 0.8102, + "learning_rate": 2.1833299081563952e-10, + "loss": 0.8123, "step": 35217 }, { - "epoch": 0.9993757094211124, + "epoch": 0.9979880415993653, "grad_norm": 0.0, - "learning_rate": 2.0441841837026688e-11, - "loss": 0.814, + "learning_rate": 2.1231032352964088e-10, + "loss": 0.7574, "step": 35218 }, { - "epoch": 0.9994040862656073, + "epoch": 0.9980163790416277, "grad_norm": 0.0, - "learning_rate": 1.8625728352983375e-11, - "loss": 0.7773, + "learning_rate": 2.063718881695209e-10, + "loss": 0.7642, "step": 35219 }, { - "epoch": 0.9994324631101021, + "epoch": 0.9980447164838901, "grad_norm": 0.0, - "learning_rate": 1.689408516192259e-11, - "loss": 0.8777, + "learning_rate": 2.0051768478412948e-10, + "loss": 0.6528, "step": 35220 }, { - "epoch": 0.999460839954597, + "epoch": 0.9980730539261526, "grad_norm": 0.0, - "learning_rate": 1.5246912277167014e-11, - "loss": 0.811, + "learning_rate": 1.9474771342342659e-10, + "loss": 0.8525, "step": 35221 }, { - "epoch": 0.999489216799092, + "epoch": 0.9981013913684151, "grad_norm": 0.0, - "learning_rate": 1.3684209713149543e-11, - "loss": 0.7946, + "learning_rate": 1.890619741351518e-10, + "loss": 0.8062, "step": 35222 }, { - "epoch": 0.9995175936435868, + "epoch": 0.9981297288106775, "grad_norm": 0.0, - "learning_rate": 1.2205977483192855e-11, - "loss": 0.8295, + "learning_rate": 1.8346046696815502e-10, + "loss": 0.8298, "step": 35223 }, { - "epoch": 0.9995459704880817, + "epoch": 0.99815806625294, "grad_norm": 0.0, - "learning_rate": 1.0812215599509402e-11, - "loss": 0.8495, + "learning_rate": 1.779431919690655e-10, + "loss": 0.7853, "step": 35224 }, { - "epoch": 0.9995743473325767, + "epoch": 0.9981864036952025, "grad_norm": 0.0, - "learning_rate": 9.502924074311637e-12, - "loss": 0.7965, + "learning_rate": 1.725101491845127e-10, + "loss": 0.74, "step": 35225 }, { - "epoch": 0.9996027241770715, + "epoch": 0.9982147411374649, "grad_norm": 0.0, - "learning_rate": 8.278102917591569e-12, - "loss": 0.7718, + "learning_rate": 1.671613386600157e-10, + "loss": 0.9165, "step": 35226 }, { - "epoch": 0.9996311010215664, + "epoch": 0.9982430785797274, "grad_norm": 0.0, - "learning_rate": 7.137752141561649e-12, - "loss": 0.7643, + "learning_rate": 1.618967604410937e-10, + "loss": 0.8995, "step": 35227 }, { - "epoch": 0.9996594778660612, + "epoch": 0.9982714160219899, "grad_norm": 0.0, - "learning_rate": 6.08187175399344e-12, - "loss": 0.8705, + "learning_rate": 1.5671641457104536e-10, + "loss": 0.7775, "step": 35228 }, { - "epoch": 0.9996878547105562, + "epoch": 0.9982997534642524, "grad_norm": 0.0, - "learning_rate": 5.110461764878949e-12, - "loss": 0.7204, + "learning_rate": 1.5162030109538982e-10, + "loss": 0.8644, "step": 35229 }, { - "epoch": 0.9997162315550511, + "epoch": 0.9983280909065148, "grad_norm": 0.0, - "learning_rate": 4.223522181989736e-12, - "loss": 0.8001, + "learning_rate": 1.4660842005520538e-10, + "loss": 0.7067, "step": 35230 }, { - "epoch": 0.9997446083995459, + "epoch": 0.9983564283487772, "grad_norm": 0.0, - "learning_rate": 3.421053013097364e-12, - "loss": 0.7357, + "learning_rate": 1.4168077149379067e-10, + "loss": 0.7156, "step": 35231 }, { - "epoch": 0.9997729852440409, + "epoch": 0.9983847657910397, "grad_norm": 0.0, - "learning_rate": 2.7030542648631697e-12, - "loss": 0.862, + "learning_rate": 1.368373554533342e-10, + "loss": 0.8138, "step": 35232 }, { - "epoch": 0.9998013620885358, + "epoch": 0.9984131032333021, "grad_norm": 0.0, - "learning_rate": 2.0695259439484917e-12, - "loss": 0.9245, + "learning_rate": 1.320781719726938e-10, + "loss": 0.7981, "step": 35233 }, { - "epoch": 0.9998297389330306, + "epoch": 0.9984414406755646, "grad_norm": 0.0, - "learning_rate": 1.5204680536839989e-12, - "loss": 0.6937, + "learning_rate": 1.2740322109294766e-10, + "loss": 0.7976, "step": 35234 }, { - "epoch": 0.9998581157775256, + "epoch": 0.9984697781178271, "grad_norm": 0.0, - "learning_rate": 1.0558806018412526e-12, - "loss": 0.8098, + "learning_rate": 1.2281250285295364e-10, + "loss": 0.7288, "step": 35235 }, { - "epoch": 0.9998864926220205, + "epoch": 0.9984981155600895, "grad_norm": 0.0, - "learning_rate": 6.75763589530476e-13, - "loss": 0.8111, + "learning_rate": 1.1830601729267976e-10, + "loss": 0.7981, "step": 35236 }, { - "epoch": 0.9999148694665153, + "epoch": 0.998526453002352, "grad_norm": 0.0, - "learning_rate": 3.801170211925609e-13, - "loss": 0.782, + "learning_rate": 1.1388376444987359e-10, + "loss": 0.8715, "step": 35237 }, { - "epoch": 0.9999432463110102, + "epoch": 0.9985547904446145, "grad_norm": 0.0, - "learning_rate": 1.689408990479535e-13, - "loss": 0.7523, + "learning_rate": 1.0954574436006227e-10, + "loss": 0.7875, "step": 35238 }, { - "epoch": 0.9999716231555051, + "epoch": 0.998583127886877, "grad_norm": 0.0, - "learning_rate": 4.2235225317099895e-14, - "loss": 0.8682, + "learning_rate": 1.0529195706099338e-10, + "loss": 0.8472, "step": 35239 }, + { + "epoch": 0.9986114653291394, + "grad_norm": 0.0, + "learning_rate": 1.0112240258819406e-10, + "loss": 0.804, + "step": 35240 + }, + { + "epoch": 0.9986398027714019, + "grad_norm": 0.0, + "learning_rate": 9.703708097830167e-11, + "loss": 0.6952, + "step": 35241 + }, + { + "epoch": 0.9986681402136643, + "grad_norm": 0.0, + "learning_rate": 9.303599226351267e-11, + "loss": 0.7852, + "step": 35242 + }, + { + "epoch": 0.9986964776559267, + "grad_norm": 0.0, + "learning_rate": 8.9119136478244e-11, + "loss": 0.9467, + "step": 35243 + }, + { + "epoch": 0.9987248150981892, + "grad_norm": 0.0, + "learning_rate": 8.528651365580232e-11, + "loss": 0.7797, + "step": 35244 + }, + { + "epoch": 0.9987531525404517, + "grad_norm": 0.0, + "learning_rate": 8.153812382838411e-11, + "loss": 0.8124, + "step": 35245 + }, + { + "epoch": 0.9987814899827142, + "grad_norm": 0.0, + "learning_rate": 7.787396702818584e-11, + "loss": 0.8083, + "step": 35246 + }, + { + "epoch": 0.9988098274249766, + "grad_norm": 0.0, + "learning_rate": 7.429404328518353e-11, + "loss": 0.8019, + "step": 35247 + }, + { + "epoch": 0.9988381648672391, + "grad_norm": 0.0, + "learning_rate": 7.079835262935319e-11, + "loss": 0.8194, + "step": 35248 + }, + { + "epoch": 0.9988665023095016, + "grad_norm": 0.0, + "learning_rate": 6.738689509067087e-11, + "loss": 0.7687, + "step": 35249 + }, + { + "epoch": 0.998894839751764, + "grad_norm": 0.0, + "learning_rate": 6.405967069800234e-11, + "loss": 0.9105, + "step": 35250 + }, + { + "epoch": 0.9989231771940265, + "grad_norm": 0.0, + "learning_rate": 6.08166794791032e-11, + "loss": 0.7432, + "step": 35251 + }, + { + "epoch": 0.998951514636289, + "grad_norm": 0.0, + "learning_rate": 5.765792146172899e-11, + "loss": 0.8423, + "step": 35252 + }, + { + "epoch": 0.9989798520785514, + "grad_norm": 0.0, + "learning_rate": 5.458339667141488e-11, + "loss": 0.8492, + "step": 35253 + }, + { + "epoch": 0.9990081895208138, + "grad_norm": 0.0, + "learning_rate": 5.15931051348062e-11, + "loss": 0.8493, + "step": 35254 + }, + { + "epoch": 0.9990365269630763, + "grad_norm": 0.0, + "learning_rate": 4.868704687743808e-11, + "loss": 0.8394, + "step": 35255 + }, + { + "epoch": 0.9990648644053388, + "grad_norm": 0.0, + "learning_rate": 4.5865221922625216e-11, + "loss": 0.7642, + "step": 35256 + }, + { + "epoch": 0.9990932018476012, + "grad_norm": 0.0, + "learning_rate": 4.3127630295902725e-11, + "loss": 0.8694, + "step": 35257 + }, + { + "epoch": 0.9991215392898637, + "grad_norm": 0.0, + "learning_rate": 4.0474272018364845e-11, + "loss": 0.7968, + "step": 35258 + }, + { + "epoch": 0.9991498767321262, + "grad_norm": 0.0, + "learning_rate": 3.790514711332627e-11, + "loss": 0.8184, + "step": 35259 + }, + { + "epoch": 0.9991782141743886, + "grad_norm": 0.0, + "learning_rate": 3.542025560299145e-11, + "loss": 0.7538, + "step": 35260 + }, + { + "epoch": 0.9992065516166511, + "grad_norm": 0.0, + "learning_rate": 3.3019597507344404e-11, + "loss": 0.7043, + "step": 35261 + }, + { + "epoch": 0.9992348890589136, + "grad_norm": 0.0, + "learning_rate": 3.070317284747937e-11, + "loss": 0.7782, + "step": 35262 + }, + { + "epoch": 0.999263226501176, + "grad_norm": 0.0, + "learning_rate": 2.8470981642270135e-11, + "loss": 0.7616, + "step": 35263 + }, + { + "epoch": 0.9992915639434384, + "grad_norm": 0.0, + "learning_rate": 2.6323023910590494e-11, + "loss": 0.8292, + "step": 35264 + }, + { + "epoch": 0.9993199013857009, + "grad_norm": 0.0, + "learning_rate": 2.4259299670204016e-11, + "loss": 0.6999, + "step": 35265 + }, + { + "epoch": 0.9993482388279634, + "grad_norm": 0.0, + "learning_rate": 2.227980893887427e-11, + "loss": 0.7889, + "step": 35266 + }, + { + "epoch": 0.9993765762702258, + "grad_norm": 0.0, + "learning_rate": 2.0384551733254598e-11, + "loss": 0.7304, + "step": 35267 + }, + { + "epoch": 0.9994049137124883, + "grad_norm": 0.0, + "learning_rate": 1.8573528069998348e-11, + "loss": 0.8028, + "step": 35268 + }, + { + "epoch": 0.9994332511547508, + "grad_norm": 0.0, + "learning_rate": 1.6846737963538418e-11, + "loss": 0.774, + "step": 35269 + }, + { + "epoch": 0.9994615885970133, + "grad_norm": 0.0, + "learning_rate": 1.5204181428307707e-11, + "loss": 0.8024, + "step": 35270 + }, + { + "epoch": 0.9994899260392757, + "grad_norm": 0.0, + "learning_rate": 1.3645858478739115e-11, + "loss": 0.7325, + "step": 35271 + }, + { + "epoch": 0.9995182634815382, + "grad_norm": 0.0, + "learning_rate": 1.2171769127045097e-11, + "loss": 0.8617, + "step": 35272 + }, + { + "epoch": 0.9995466009238007, + "grad_norm": 0.0, + "learning_rate": 1.0781913386548326e-11, + "loss": 0.8023, + "step": 35273 + }, + { + "epoch": 0.999574938366063, + "grad_norm": 0.0, + "learning_rate": 9.476291268351035e-12, + "loss": 0.9635, + "step": 35274 + }, + { + "epoch": 0.9996032758083255, + "grad_norm": 0.0, + "learning_rate": 8.254902784665674e-12, + "loss": 0.8557, + "step": 35275 + }, + { + "epoch": 0.999631613250588, + "grad_norm": 0.0, + "learning_rate": 7.117747943263809e-12, + "loss": 0.6999, + "step": 35276 + }, + { + "epoch": 0.9996599506928505, + "grad_norm": 0.0, + "learning_rate": 6.0648267563578886e-12, + "loss": 0.7727, + "step": 35277 + }, + { + "epoch": 0.9996882881351129, + "grad_norm": 0.0, + "learning_rate": 5.096139231719477e-12, + "loss": 0.8799, + "step": 35278 + }, + { + "epoch": 0.9997166255773754, + "grad_norm": 0.0, + "learning_rate": 4.211685378230357e-12, + "loss": 0.8973, + "step": 35279 + }, + { + "epoch": 0.9997449630196379, + "grad_norm": 0.0, + "learning_rate": 3.411465201441644e-12, + "loss": 0.7955, + "step": 35280 + }, + { + "epoch": 0.9997733004619003, + "grad_norm": 0.0, + "learning_rate": 2.695478710235122e-12, + "loss": 0.9404, + "step": 35281 + }, + { + "epoch": 0.9998016379041628, + "grad_norm": 0.0, + "learning_rate": 2.0637259090516837e-12, + "loss": 0.7283, + "step": 35282 + }, + { + "epoch": 0.9998299753464253, + "grad_norm": 0.0, + "learning_rate": 1.5162068045526668e-12, + "loss": 0.8705, + "step": 35283 + }, + { + "epoch": 0.9998583127886876, + "grad_norm": 0.0, + "learning_rate": 1.0529214000687405e-12, + "loss": 0.7588, + "step": 35284 + }, + { + "epoch": 0.9998866502309501, + "grad_norm": 0.0, + "learning_rate": 6.738697000407968e-13, + "loss": 0.7999, + "step": 35285 + }, + { + "epoch": 0.9999149876732126, + "grad_norm": 0.0, + "learning_rate": 3.7905170779950483e-13, + "loss": 0.8688, + "step": 35286 + }, + { + "epoch": 0.9999433251154751, + "grad_norm": 0.0, + "learning_rate": 1.6846742667553374e-13, + "loss": 0.7845, + "step": 35287 + }, + { + "epoch": 0.9999716625577375, + "grad_norm": 0.0, + "learning_rate": 4.2116856668883434e-14, + "loss": 0.7521, + "step": 35288 + }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 0.0, - "loss": 0.7954, - "step": 35240 + "loss": 0.818, + "step": 35289 }, { "epoch": 1.0, - "step": 35240, - "total_flos": 8.012990692718543e+19, - "train_loss": 0.9087818804210755, - "train_runtime": 242499.5276, - "train_samples_per_second": 18.601, - "train_steps_per_second": 0.145 + "step": 35289, + "total_flos": 8.02060457534984e+19, + "train_loss": 0.915870410711829, + "train_runtime": 238307.0321, + "train_samples_per_second": 18.954, + "train_steps_per_second": 0.148 } ], "logging_steps": 1.0, - "max_steps": 35240, + "max_steps": 35289, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3000, @@ -246715,7 +247058,7 @@ "attributes": {} } }, - "total_flos": 8.012990692718543e+19, + "total_flos": 8.02060457534984e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null